--- title: "Lab 3" author: "J. Lucas McKay" date: "10/4/2021" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, collapse = T, warning = F, message = F) ``` *** #### 1. A worked example An example from the PASS documentation: In studying deaths from SIDS (Sudden Infant Death Syndrome), one hypothesis put forward is that infants dying of SIDS weigh less than normal at birth. Suppose the average birth weight of infants is 3300±663 grams. Using alpha = 0.05 and beta = 0.20, how large a sample of a SIDS infants will be needed to detect a drop in average weight of 25% ? ```{r} # convert expected difference to effect size PercentToCohen = function(pct) (pct*3300)/663 CohenToPercent = function(delta) 663*delta/3300 ``` ```{r} # ask for the required sample size given desired effect size pwr.res = power.t.test(power = 0.8, delta = PercentToCohen(0.25)) ?pwr::pwr.t.test ``` ```{r} # ask for the resolvable effect size given feasible sample size power.t.test(power = 0.8, n=6, sig.level = 0.05)$delta |> CohenToPercent() ``` *** #### 2. Plotting the relationship between power, effect size, and N ```{r} library(tidyverse) n_per_group = 17 power = 0.8 alpha = 0.05 effect_size = 1 # we can use power.t.test to identify the required N given the hypothesized effect size power.t.test(delta = effect_size, power = power, sig.level = alpha) # or the expected power given effect size and N power.t.test(n = n_per_group, delta = effect_size, sig.level = alpha) # or the effect size that can be resolved given everything else power.t.test(n = n_per_group, power = power, sig.level = alpha) # in general, during study design we will want to look at a series of scenarios and trade-off between power and budget. power_sweep = tibble(N=seq(6,100)) |> group_by(N) |> mutate(Power = power.t.test(n=N, delta = effect_size)$power) |> mutate(`Effect Size` = power.t.test(n=N, power = power)$delta) ``` ```{r} # plot power vs. N power_sweep |> ggplot(aes(N,Power)) + geom_hline(yintercept = power) + geom_vline(xintercept = power.t.test(n = n_per_group, delta = effect_size, sig.level = alpha)$n) + geom_line(size=1) + expand_limits(y = 0) + theme_minimal() + labs(title = "Power of a t-test vs. N, Effect Size = 1.0") ggsave("t-test-power.png") ``` ```{r} # plot effect size vs. N power_sweep |> ggplot(aes(N,`Effect Size`)) + geom_line(size=1) + expand_limits(y = 0) + theme_minimal() + labs(title = "Effect size that can be resolved with 80% power vs. N") ggsave("t-test-effect-size.png") ``` *** #### 3. Planning a simple clinical trial Let's say you have an off-label drug that has been shown in case reports to reduce blood pressure. Your clinical colleagues ran a pilot study on N=6 patients and found that 3/6 had a clinically meaningful improvement. You expect based on historical data that if you had a placebo group about 1/10 would improve. ```{r} # we do not have any control subjects, but based on historical controls you expect the following type of table: improvements = c(3,0,3,6) |> matrix(nrow=2, byrow = T) improvements |> fisher.test() |> broom::tidy() ``` ```{r} # estimate N per group power.prop.test(p1=3/6,p2=1/10,power=0.8) ``` ```{r} # we can also do this through simulation - typically I find the simulation results more conservative. OR22 = function(x) x[1,1]*x[2,2]/(x[1,2]*x[2,1]) SimulateTrial = function(n=20,p1=3/6,p2=1/10){ # successes in group 1 successes_1 = rbinom(1,n,p1) # successes in group 2 successes_2 = rbinom(1,n,p2) # 2x2 table patient_results = c(successes_1, n-successes_1, successes_2, n-successes_2) |> matrix(nrow = 2) OR = OR22(patient_results) p.value = chisq.test(patient_results,simulate.p.value = T)$p.value # return a tibble with the relevant information tibble(n,p1,p2,OR,p.value) } set.seed(404) res = replicate(100,SimulateTrial(n=20),simplify=F) |> bind_rows() sum(res$p.value<0.05)/nrow(res) set.seed(404) res = replicate(100,SimulateTrial(n=22),simplify=F) |> bind_rows() sum(res$p.value<0.05)/nrow(res) ```