---
title: "Lab 3"
author: "J. Lucas McKay"
date: "10/4/2021"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, collapse = T, warning = F, message = F)
```

***
#### 1. A worked example
An example from the PASS documentation: In studying deaths from SIDS (Sudden Infant Death Syndrome), one hypothesis put forward is that infants dying of SIDS weigh less than normal at birth.
Suppose the average birth weight of infants is 3300±663 grams. Using alpha = 0.05 and beta = 0.20, how large a sample of a SIDS infants will be needed to detect a drop in average weight of 25% ?

```{r}
# convert expected difference to effect size
PercentToCohen = function(pct) (pct*3300)/663
CohenToPercent = function(delta) 663*delta/3300
```

```{r}
# ask for the required sample size given desired effect size
pwr.res = power.t.test(power = 0.8, delta = PercentToCohen(0.25))

?pwr::pwr.t.test
```

```{r}
# ask for the resolvable effect size given feasible sample size
power.t.test(power = 0.8, n=6, sig.level = 0.05)$delta |> CohenToPercent()
```

***
#### 2. Plotting the relationship between power, effect size, and N

```{r}
library(tidyverse)

n_per_group = 17
power = 0.8
alpha = 0.05
effect_size = 1

# we can use power.t.test to identify the required N given the hypothesized effect size
power.t.test(delta = effect_size, power = power, sig.level = alpha)

# or the expected power given effect size and N
power.t.test(n = n_per_group, delta = effect_size, sig.level = alpha)

# or the effect size that can be resolved given everything else
power.t.test(n = n_per_group, power = power, sig.level = alpha)

# in general, during study design we will want to look at a series of scenarios and trade-off between power and budget.
power_sweep = tibble(N=seq(6,100)) |>
  group_by(N) |>
  mutate(Power = power.t.test(n=N, delta = effect_size)$power) |> 
  mutate(`Effect Size` = power.t.test(n=N, power = power)$delta)
```

```{r}
# plot power vs. N
power_sweep |> 
  ggplot(aes(N,Power)) +
  geom_hline(yintercept = power) +
  geom_vline(xintercept = power.t.test(n = n_per_group, delta = effect_size, sig.level = alpha)$n) +
  geom_line(size=1) +
  expand_limits(y = 0) +
  theme_minimal() +
  labs(title = "Power of a t-test vs. N, Effect Size = 1.0")
ggsave("t-test-power.png")
```

```{r}
# plot effect size vs. N
power_sweep |> 
  ggplot(aes(N,`Effect Size`)) +
  geom_line(size=1) +
  expand_limits(y = 0) +
  theme_minimal() +
  labs(title = "Effect size that can be resolved with 80% power vs. N")
ggsave("t-test-effect-size.png")
```


***
#### 3. Planning a simple clinical trial

Let's say you have an off-label drug that has been shown in case reports to reduce blood pressure.
Your clinical colleagues ran a pilot study on N=6 patients and found that 3/6 had a clinically
meaningful improvement.
You expect based on historical data that if you had a placebo group about 1/10 would improve.

```{r}
# we do not have any control subjects, but based on historical controls you expect the following type of table:
improvements = c(3,0,3,6) |> matrix(nrow=2, byrow = T)
improvements |> fisher.test() |> broom::tidy()
```


```{r}
# estimate N per group
power.prop.test(p1=3/6,p2=1/10,power=0.8)
```

```{r}
# we can also do this through simulation - typically I find the simulation results more conservative.
OR22 = function(x) x[1,1]*x[2,2]/(x[1,2]*x[2,1])

SimulateTrial = function(n=20,p1=3/6,p2=1/10){
  # successes in group 1
  successes_1 = rbinom(1,n,p1)
  # successes in group 2
  successes_2 = rbinom(1,n,p2)
  # 2x2 table
  patient_results = c(successes_1, n-successes_1, successes_2, n-successes_2) |> matrix(nrow = 2)
  OR = OR22(patient_results)
  p.value = chisq.test(patient_results,simulate.p.value = T)$p.value
  # return a tibble with the relevant information
  tibble(n,p1,p2,OR,p.value)
}

set.seed(404)
res = replicate(100,SimulateTrial(n=20),simplify=F) |> bind_rows()
sum(res$p.value<0.05)/nrow(res)

set.seed(404)
res = replicate(100,SimulateTrial(n=22),simplify=F) |> bind_rows()
sum(res$p.value<0.05)/nrow(res)
```