--- title: "Lab-Mixed Models" author: "J. Lucas McKay" date: "2024-04-10" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = F, collapse = T, warning = F, message = F) ``` *** #### 1. Linear mixed models - deviance study. ```{r} library(tidyverse) tolerance = read_csv("https://jlucasmckay.bmi.emory.edu/global/bmi510/Labs-Materials/tolerance.csv") |> mutate(id = factor(id)) |> mutate(male = factor(male)) ``` ```{r} # plot of raw data # a few variables to make the plots behave nicely y_breaks = seq(3) fit_color = "black" fit_size = 0.5 tolerance |> ggplot(aes(age,tolerance,color = male)) + geom_point() + facet_wrap(~id,ncol = 4) + theme_minimal() + labs(title = "Tolerance of deviant behavior", subtitle = "Raw data") + scale_y_continuous(breaks = y_breaks) ``` ```{r} # by the way, I ran across a really nice geom: ggbswarm::quasirandom() tolerance |> ggplot(aes(x = male, y=tolerance,color = male)) + ggbeeswarm::geom_quasirandom(width=0.1) + theme_minimal() ``` ```{r} # model with assumptions violated m0 = lm(tolerance~age+male, data = tolerance) summary(m0) # what is the fit? demean = function(x) scale(x,scale=F) ss = function(x) sum(demean(x)^2) r2 = function(pred,truth){ sstot = ss(truth) sserr = ss(truth-pred) 1-sserr/sstot } # check vs lm r2(predict(m0),tolerance$tolerance) summary(m0) ``` ```{r} # model with only subject-specific intercepts m1 = lmerTest::lmer(tolerance~age+male+(1|id), data = tolerance) summary(m1) r2(predict(m1),tolerance$tolerance) ``` ```{r} # model with subject-specific intercepts AND slopes m2 = lmerTest::lmer(tolerance~age+male+(1+age|id), data = tolerance) summary(m2) r2(predict(m2),tolerance$tolerance) ``` ```{r} # plot - bundle the predictions in the original dataset tolerance$t0 = predict(m0) tolerance$t1 = predict(m1) tolerance$t2 = predict(m2) ``` ```{r} tolerance |> ggplot(aes(age,tolerance,color = male)) + geom_point() + facet_wrap(~id,ncol = 4) + theme_minimal() + labs(title = "Tolerance of deviant behavior", subtitle = "Linear effects only") + geom_line(mapping = aes(age,t0), color = fit_color, size = fit_size, show.legend = F) + scale_y_continuous(breaks = y_breaks) ``` ```{r} tolerance |> ggplot(aes(age,tolerance,color = male)) + geom_point() + facet_wrap(~id,ncol = 4) + theme_minimal() + labs(title = "Tolerance of deviant behavior", subtitle = "Random intercept") + geom_line(mapping = aes(age,t1), color = fit_color, size = fit_size, show.legend = F) + scale_y_continuous(breaks = y_breaks) ``` ```{r} tolerance |> ggplot(aes(age,tolerance,color = male)) + geom_point() + facet_wrap(~id,ncol = 4) + theme_minimal() + labs(title = "Tolerance of deviant behavior", subtitle = "Random intercept and random slope") + geom_line(mapping = aes(age,t2), color = fit_color, size = fit_size, show.legend = F) + scale_y_continuous(breaks = y_breaks) ``` ```{r} # note what we are doing when we calculate a subject-specific intercept - # we are essentially zeroing out each subject's baseline value. # this is a common operation that you might have done before in analyses. m0 m1 tolerance = tolerance |> arrange(id,age) |> group_by(id) |> mutate(tzero = tolerance - tolerance[1]) |> ungroup() mz = lm(tzero~age+male,data = tolerance) tolerance$tz = predict(mz) tolerance |> ggplot(aes(age,tzero,color = male)) + geom_point() + facet_wrap(~id,ncol = 4) + theme_minimal() + geom_line(mapping = aes(age,tz), color = fit_color, size = fit_size, show.legend = F) ``` ```{r} # compare age and sex slopes for the first two models - are we potentially overfitting? m1 m2 ``` ```{r} # allow an interaction between sex and age m3 = lmerTest::lmer(tolerance~age*male+(1+age|id), data = tolerance) tolerance$t3 = predict(m3) ``` ```{r} tolerance |> ggplot(aes(age,tolerance,color = male)) + geom_point() + facet_wrap(~id,ncol = 4) + theme_minimal() + labs(title = "Tolerance of deviant behavior", subtitle = "Age by sex interaction and random intercept") + geom_line(mapping = aes(age,t3), size = fit_size, show.legend = F) + scale_y_continuous(breaks = y_breaks) # to me, this looks pretty good, but the increase in r2 is such that I would probably leave it out in the interest of parsimony. r2(tolerance$t2,tolerance$tolerance) r2(tolerance$t3,tolerance$tolerance) ``` *** #### 2. Linear mixed models - altering SS type and df estimation ```{r} # the df estimation method can be altered in the summary statement: m2 |> summary() m2 |> summary(ddf="Kenward-Roger") # the ss type can be altered in the anova statement: m2 |> anova() m2 |> anova(type=1) m2 |> anova(type=2) m2 |> anova(type=3) ```