--- title: "Multiple Comparisons" author: "J. Lucas McKay" date: "2023-03-02" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = F, collapse = T, warning = F, message = F) ``` *** #### 1. Using `p.adjust` ```{r} library(tidyverse) # Let's say we have performed a series of statistical tests and obtained the following p-values: p.vals = seq(0.0025,0.025,0.0025) p.vals # We could give them names, to identify the results of each test. names(p.vals) = paste("Test",LETTERS[1:10]) p.vals # Which ones are statistically-significant? p.vals[p.vals<0.05] length(p.vals[p.vals<0.05]) ``` ```{r} # To control for Type I error, we have a few options, all of which are built in to p.adjust(). # We can calculate the adjusted p-values p.adjust(p.vals, method = "holm") # And compare to the original statistical significance level p.vals[p.adjust(p.vals, method = "holm")<0.05] # Note that as we get less conservative, the number of significant tests increases p.vals[p.adjust(p.vals, method = "bonferroni")<0.05] # same thing p.vals[p.vals<0.05/10] # Note that as we get less conservative, the number of significant tests increases p.vals[p.adjust(p.vals, method = "bonferroni")<0.05] # 1 significant result p.vals[p.adjust(p.vals, method = "holm")<0.05] # 2 significant result p.vals[p.adjust(p.vals, method = "fdr")<0.05] ``` ```{r} # What is the expected number of false positives when we use FDR? ``` *** #### 2. Reporting P values ```{r} # load biomarker data; compare group A and C library(tidyverse) csf = read_csv("https://jlucasmckay.bmi.emory.edu/global/bmi510/Labs-Materials/csf.csv") |> filter(Group %in% c("A","C")) # Extract group G = csf$Group # Extract numerical values vals = csf |> select(-Group) |> as.matrix() # Calculate p-values. We can use "apply" for this: apply(vals,2,mean) nanmean = function(x) mean(x,na.rm = T) |> round(2) vals |> apply(2,nanmean) p.values = vals |> apply(2,function(x) t.test(x[G=="A"],x[G=="C"])$p.value) sort(p.values) # We can directly adjust with FDR: sort(p.adjust(p.values, "fdr")) # But with Holm, remember that no adjustment is made for the higher p-values; sometimes this leads to curious repeated numbers, etc. sort(p.adjust(p.values, "holm")) ``` ```{r} # Often what we'd like to do is something like the following. report.values = tibble(Biomarker = names(p.values), P = as.numeric(p.values)) report.values$padj = p.adjust(report.values$P,"fdr") report.values = report.values |> mutate( `FDR` = case_when( padj<0.001 ~ "***", padj<0.01 ~ "**", padj<0.05 ~ "*", TRUE ~ "" )) report.values |> select(-padj) |> mutate(P = round(P,3)) |> arrange(P) |> knitr::kable() ```