---
title: "Lab-Mixed Models"
author: "J. Lucas McKay"
date: "2024-04-10"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = F, collapse = T, warning = F, message = F)
```

***
#### 1. Linear mixed models - deviance study.

```{r}
library(tidyverse)
tolerance = read_csv("https://jlucasmckay.bmi.emory.edu/global/bmi510/Labs-Materials/tolerance.csv") |> 
  mutate(id = factor(id)) |> 
  mutate(male = factor(male))
```

```{r}
# plot of raw data
# a few variables to make the plots behave nicely
y_breaks = seq(3)
fit_color = "black"
fit_size = 0.5

tolerance |> ggplot(aes(age,tolerance,color = male)) +
  geom_point() +
  facet_wrap(~id,ncol = 4) +
  theme_minimal() +
  labs(title = "Tolerance of deviant behavior", subtitle = "Raw data") +
  scale_y_continuous(breaks = y_breaks)
```

```{r}
# by the way, I ran across a really nice geom: ggbswarm::quasirandom()
tolerance |>
  ggplot(aes(x = male, y=tolerance,color = male)) +
  ggbeeswarm::geom_quasirandom(width=0.1) +
  theme_minimal()
```


```{r}
# model with assumptions violated
m0 = lm(tolerance~age+male, data = tolerance)
summary(m0)

# what is the fit?
demean = function(x) scale(x,scale=F)
ss = function(x) sum(demean(x)^2)
r2 = function(pred,truth){
  sstot = ss(truth)
  sserr = ss(truth-pred)
  1-sserr/sstot
} 

# check vs lm
r2(predict(m0),tolerance$tolerance)
summary(m0)
```


```{r}
# model with only subject-specific intercepts
m1 = lmerTest::lmer(tolerance~age+male+(1|id), data = tolerance)
summary(m1)
r2(predict(m1),tolerance$tolerance)
```

```{r}
# model with subject-specific intercepts AND slopes
m2 = lmerTest::lmer(tolerance~age+male+(1+age|id), data = tolerance)
summary(m2)
r2(predict(m2),tolerance$tolerance)
```

```{r}
# plot - bundle the predictions in the original dataset
tolerance$t0 = predict(m0)
tolerance$t1 = predict(m1)
tolerance$t2 = predict(m2)
```

```{r}
tolerance |>
  ggplot(aes(age,tolerance,color = male)) +
  geom_point() +
  facet_wrap(~id,ncol = 4) +
  theme_minimal() +
  labs(title = "Tolerance of deviant behavior", subtitle = "Linear effects only") +
  geom_line(mapping = aes(age,t0), color = fit_color, size = fit_size, show.legend = F) +
  scale_y_continuous(breaks = y_breaks)
```

```{r}
tolerance |>
  ggplot(aes(age,tolerance,color = male)) +
  geom_point() +
  facet_wrap(~id,ncol = 4) +
  theme_minimal() +
  labs(title = "Tolerance of deviant behavior", subtitle = "Random intercept") +
  geom_line(mapping = aes(age,t1), color = fit_color, size = fit_size, show.legend = F) +
  scale_y_continuous(breaks = y_breaks)
```

```{r}
tolerance |>
  ggplot(aes(age,tolerance,color = male)) +
  geom_point() +
  facet_wrap(~id,ncol = 4) +
  theme_minimal() +
  labs(title = "Tolerance of deviant behavior", subtitle = "Random intercept and random slope") +
  geom_line(mapping = aes(age,t2), color = fit_color, size = fit_size, show.legend = F) +
  scale_y_continuous(breaks = y_breaks)
```

```{r}
# note what we are doing when we calculate a subject-specific intercept -
# we are essentially zeroing out each subject's baseline value.
# this is a common operation that you might have done before in analyses.
m0
m1
tolerance = tolerance |>
  arrange(id,age) |>
  group_by(id) |> 
  mutate(tzero = tolerance - tolerance[1]) |> 
  ungroup()

mz = lm(tzero~age+male,data = tolerance)
tolerance$tz = predict(mz)

tolerance |> ggplot(aes(age,tzero,color = male)) +
  geom_point() +
  facet_wrap(~id,ncol = 4) +
  theme_minimal() +
    geom_line(mapping = aes(age,tz), color = fit_color, size = fit_size, show.legend = F)
```


```{r}
# compare age and sex slopes for the first two models - are we potentially overfitting?
m1
m2
```

```{r}
# allow an interaction between sex and age
m3 = lmerTest::lmer(tolerance~age*male+(1+age|id), data = tolerance)
tolerance$t3 = predict(m3)
```

```{r}
tolerance |>
  ggplot(aes(age,tolerance,color = male)) +
  geom_point() +
  facet_wrap(~id,ncol = 4) +
  theme_minimal() +
  labs(title = "Tolerance of deviant behavior", subtitle = "Age by sex interaction and random intercept") +
  geom_line(mapping = aes(age,t3), size = fit_size, show.legend = F) +
  scale_y_continuous(breaks = y_breaks)

# to me, this looks pretty good, but the increase in r2 is such that I would probably leave it out in the interest of parsimony.
r2(tolerance$t2,tolerance$tolerance)
r2(tolerance$t3,tolerance$tolerance)
```


***
#### 2. Linear mixed models - altering SS type and df estimation

```{r}
# the df estimation method can be altered in the summary statement:
m2 |> summary()
m2 |> summary(ddf="Kenward-Roger")

# the ss type can be altered in the anova statement:
m2 |> anova()
m2 |> anova(type=1)
m2 |> anova(type=2)
m2 |> anova(type=3)
```