Use non-central t-distribution and averaging based on data of both estimated effect and estimated standard error for `retrodesign`

andytimm commented 1 year ago

These are suggested improvements from @martijnweterings over email, making issues to track everything I want in a 0.2.0 version of the package.

Martijn provided the attached reproducible example of both features which gives slightly improved performance in the simulation case (i.e, in retrodesign). It's a pretty small difference, but worth adding in since it's not hard to implement.

library(retrodesign)
layout(matrix(1:2,1))

### some settings
set.seed(2)
n = 5
nu = 2*n-2
alpha = 0.01
d = seq(0,3,0.05)
m = 10^4

####
### computations with retrodesign
####

retrodesign <- function(A, s, alpha=.05, df=Inf, n.sims=10000){
  z <- qt(1-alpha/2, df)
  p.hi <- 1 - pt(z-A/s, df)
  p.lo <- pt(-z-A/s, df)
  power <- p.hi + p.lo
  typeS <- p.lo/power
  estimate <- A + s*rt(n.sims,df)
  significant <- abs(estimate) > s*z
  exaggeration <- mean(abs(estimate)[significant])/A
  return(list(power=power, typeS=typeS, exaggeration=exaggeration))
}

nl <- length(d)
a_power <- rep(NA, nl)
a_typeS <- rep(NA, nl)
a_exaggeration <- rep(NA, nl)
for (i in 1:nl){
  a <- retrodesign(d[i], s=sqrt(2/n), alpha = alpha, df = nu, n.sims = m)
  a_power[i] <- a$power
  a_typeS[i] <- a$typeS
  a_exaggeration[i] <- a$exaggeration
}

#### 
### Alternative computations
####

### boundary for alpha level t-test
tc = qt(1-alpha/2, df = nu)

### power 
b_power = pt(-tc, df = nu, ncp=d/sqrt(2/n)) + 
  1-pt( tc, df = nu, ncp=d/sqrt(2/n))

### s-error rate
b_typeS = pt(-tc, df = nu, ncp=d/sqrt(2/n))/b_power

### simulate experiments for m-error
x0 = rnorm(m,0,sqrt(2/n))
s = sqrt(rchisq(m,nu)/nu) * sqrt(2/n)

### m-error
b_exaggeration = sapply(d, FUN = function(di) {
  x = x0+di
  significant = abs(x/s)>tc
  return(mean(abs(x[significant == 1]))/di)  
})

####
### Verify with a straightforward/simple/foolproof simulation
### this uses simulations of two samples and the t.test() function
####

### some values to be tested
test_effects = c(0.25,0.5,0.75,1)
lt = length(test_effects)

test_power = rep(NA,lt)
test_typeS = rep(NA,lt)
test_exaggeration= rep(NA,4)

for (j in 1:lt) {
  test_effect = test_effects[j]

  ### repeat m simulations in a loop
  ### compute the effect size and the significance
  ### store the results
  effects =     rep(NA,m)
  significant = rep(NA,m)
  for (i in 1:m) {  
    x = rnorm(n,0,1)
    y = rnorm(n,test_effect,1)
    mod = t.test(x,y, var.equal = TRUE)
    significant[i] = (mod$p.value<alpha)
    effects[i] = mean(y)-mean(x)
  }

  ### compute the statistics and save them in the vectors
  test_power[j] = mean(significant)
  test_exaggeration[j] = mean(abs(effects  )[significant == 1])/test_effect
  test_typeS[j] =        mean(   (effects<0)[significant == 1])
}

####
### plot everything
####

### plot of type S error versus power
plot(b_power,b_typeS, 
     type = "l", log = "xy", ylim = 10^c(-3,0) ,xlim = c(alpha,1),
     xlab = "power", ylab = "type S error",
     yaxt = "n")
axis(2, at = 10^c(-3:0), labels = c(expression(10^-3),expression(10^-2),expression(10^-1),expression(1)), las = 2)
lines(a_power,a_typeS, col = 2)
points(test_power, test_typeS, pch = 21, bg = 0)
legend(0.10, 1, 
      c("shifted t-distribution", "non-central t-distribution", "simulations"),
      lty = c(1,1,NA), pch = c(NA,NA,21), col = c(2,1,1), pt.bg = c(NA,NA,0), cex = 0.7)

### plot of type M error versus power
plot(b_power,b_exaggeration, 
     type = "l", log = "xy", ylim = c(1,10), xlim = c(alpha,1),
     xlab = "power", ylab = "type M error")
     )
lines(a_power,a_exaggeration, col = 2)
points(test_power, test_exaggeration, pch = 21, bg = 0)

title(main="type S and M error versus power for two sample t-test with n=5", outer=TRUE, line=-2)

andytimm commented 1 year ago

@martijnweterings, is it alright if I work from your provided code to implement this? Happy to add you as a contributor in that case if you want.

martijnweterings commented 1 year ago

Sure, you can use that code.

Also check out the following code where I made the previous code more streamlined to help generating the following image:

The code is mostly doing thew same as the previous code but this time enclosed in a separate function. An important difference is that the simulations to compute the type-m error are only done once for multiple different levels of power. This saves some computation time.


library(retrodesign) ### for shifted t-distribution significance tests

### compute power and error rates for two sample t-test
retropower = function(d, n, alpha = 0.05, n.sim = 10^4) {
  nu = 2*n-2

  ### boundary for alpha level t-test
  tc = qt(1-alpha/2, df = nu)

  ### power 
  power = pt(-tc, df = nu, ncp=d/sqrt(2/n)) + 
        1-pt( tc, df = nu, ncp=d/sqrt(2/n))

  ### s-error rate
  type_s = pt(-tc, df = nu, ncp=d/sqrt(2/n))/power

  ### simulate experiments
  x0 = rnorm(n.sim,0)
  s = sqrt(rchisq(n.sim,nu)/nu) 

  ### m-error
  type_m = sapply(d, FUN = function(di) {
    x = abs(x0+di*sqrt(n/2))
    significant = x/s>tc
    return(mean(x[significant == 1]/sqrt(n/2))/di)  
  })
  return(list(power = power, type_s = type_s, type_m = type_m))
}

### some settings
set.seed(1)
d = seq(0,3,0.05)

####
### creating plots for image 4
####

layout(matrix(1:2,1, byrow = TRUE))
par(mgp =c(2,1,0), mar = c(4,4,3,1))

plot(2,2, log = "xy", type = "l", xlim = c(0.05,1), ylim = c(0.0001,1),
     xlab = "power", ylab = "type-s error")
m = c(5,20,100)
i = 0
for (mi in m) {
   i = i+1
   r = retropower(d/sqrt(mi/5),mi)
   q = retrodesign(A = as.list(d/sqrt(mi/5)), s = sqrt(2/mi), df = 2*mi - 2)

   lines(r$power,r$type_s, col = i)
   lines(q$power,q$type_s, col = i, lty = 2)
}

plot(2,2, log = "xy", type = "l", xlim = c(0.05,1), ylim = c(1,20),
     xlab = "power", ylab = "type-m error")

m = c(5,20,100)
i = 0
for (mi in m) {
  i = i+1
  r = retropower(d/sqrt(mi/5),mi)
  q = retrodesign(A = as.list(d/sqrt(mi/5)), s = sqrt(2/mi), df = 2*mi - 2)

  lines(r$power,r$type_m, col = i)
  lines(q$power,q$type_m, col = i, lty = 2)

}

title(main="type S/M errors versus power for different type of tests", outer=TRUE, line=-1, cex = 1)
legend(0.08,18, c("n=5","n=20","n=100"), col = c(1,2,3), lty = 1, cex = 0.7, title = "non-central\n t-distribution", box.lwd = 0)
legend(0.3,18, c("n=5","n=20","n=100"), col = c(1,2,3), lty = 2, cex = 0.7, title = "shifted\n t-distribution", box.lwd = 0)

####
## creating plots for image 3
###

d = seq(0,5,0.025)
r = retropower(d,5)

layout(matrix(1:3,3))
par(mgp =c(2,1,0), mar = c(4,4,2,1))

plot(d,r$power, type ="l", xlab = "effect size in terms of sigma", ylab = "power", main = "power for two sample test with n = 5" , ylim = c(0,1))
plot(d,r$type_s, type ="l", xlab = "effect size in terms of sigma", ylab = "error rate", main = "S-type error for two sample test with n = 5" , ylim = c(0,0.5))
plot(d,r$type_m, type ="l", xlab = "effect size in terms of sigma", ylab = "magnification", main = "M-type error for two sample test with n = 5" , ylim = c(0,30))

andytimm / retrodesign

Use non-central t-distribution and averaging based on data of both estimated effect and estimated standard error for `retrodesign` #3