From 6310b44b1b5b94123512f8b5c875afc5d182912e Mon Sep 17 00:00:00 2001 From: reubenthomas Date: Mon, 24 Apr 2023 16:58:45 -0700 Subject: [PATCH] R code for the different models --- intro-linear-mixed-effects-models/LMM.Rmd | 196 +++++ intro-linear-mixed-effects-models/LMM.html | 853 +++++++++++++++++++++ 2 files changed, 1049 insertions(+) create mode 100644 intro-linear-mixed-effects-models/LMM.Rmd create mode 100644 intro-linear-mixed-effects-models/LMM.html diff --git a/intro-linear-mixed-effects-models/LMM.Rmd b/intro-linear-mixed-effects-models/LMM.Rmd new file mode 100644 index 0000000..b543435 --- /dev/null +++ b/intro-linear-mixed-effects-models/LMM.Rmd @@ -0,0 +1,196 @@ +--- +title: "Linear_Mixed_Effects_Models: example datasets & models" +author: "Reuben Thomas" +date: "2023-04-24" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` +## Load the necessary libraries for this markdown +If you don't already have these packages installed then please do so before implementing the code in the rest of this file. +```{r} +require(lattice) +require(lme4) +require(ggplot2) +require(dplyr) +require(magrittr) + +``` +## Sleepstudy +Example of a data set modeled by random intercept and random slope mixed effects model + +```{r} +require(lattice) +require(lme4) +require(ggplot2) +require(dplyr) +require(magrittr) + +##what is the sleepstudy +?sleepstudy +##load the data +data("sleepstudy") + +##best fit line plot the data ignoring individual subjects +ggplot(sleepstudy, aes(x=Days, y=Reaction)) + + geom_point() + + geom_smooth(method = "lm", se=FALSE) + + ylab("reaction time in ms") + + theme_classic() + + theme(text = element_text(size = 16)) + + +##best fit line plots per individual subject +ggplot(sleepstudy, aes(x=Days, y=Reaction, color=Subject)) + + geom_point() + + geom_smooth(method = "lm", se=FALSE) + + ylab("reaction time in ms") + + theme_classic() + + theme(text = element_text(size = 16)) + + +##as boxplot per subject +ggplot(sleepstudy, aes(x=Subject, y=Reaction)) + + geom_boxplot() + + ylab("reaction time in ms") + + theme_classic() + + theme(text = element_text(size = 16)) + +##as boxplot per subject coloring the days +ggplot(sleepstudy, aes(x=Subject, y=Reaction)) + + geom_boxplot() + + geom_point(aes(color=Days)) + + ylab("reaction time in ms") + + theme_classic() + + theme(text = element_text(size = 16)) + +##xy plot +xyplot(Reaction ~ Days | Subject, sleepstudy, type = c("g","p","r"), + index = function(x,y) coef(lm(y ~ x))[1], + xlab = "Days of sleep deprivation", + ylab = "Average reaction time (ms)", aspect = "xy") + +##correlated random intercept and random slope +fm1 <- lme4::lmer(Reaction ~ Days + (Days | Subject), sleepstudy) +summary(fm1) + +##uncorrelated random intercept and random slope +fm2 <- lme4::lmer(Reaction ~ Days + (Days || Subject), sleepstudy) +summary(fm2) + +##ignoring the repeated measures within a subject - linear model +lm1 <- lm(Reaction ~ Days, data = sleepstudy) +summary(lm1) +``` + +## Dye stuff + +Example of a situation where pseudo-bulking the repeated measures per subject works + +```{r} +?Dyestuff +data("Dyestuff") + +#boxplot of yield by batch +ggplot(Dyestuff, aes(x=Batch, y=Yield)) + + geom_boxplot() + + geom_point() + + theme_classic() + + theme(text = element_text(size = 16)) + +#simple random intercept model +fm1 <- lmer(Yield ~ 1|Batch, Dyestuff) +summary(fm1) + +#ignore the repeated measures within a batch: linear model +lm1 <- lm(Yield ~ 1, Dyestuff) +summary(lm1) + +#pseudo-bulk yield by batch +summ_Dyestuff <- Dyestuff %>% + group_by(Batch) %>% + summarise(mYield = mean(Yield)) + +#linear model on the pseudo-bulked data +lm2 <- lm(mYield ~ 1, summ_Dyestuff) +summary(lm2) + +``` + +## Penicillin +Example illustrated crossed random designs +```{r} +data("Penicillin") +?Penicillin + +#visualize the data as a boxplot +ggplot(Penicillin, aes(x=plate, y=diameter)) + + geom_boxplot() + + geom_point(aes(color=sample)) + + theme(text = element_text(size = 16)) + + +#mixed effects model with two random effects +fm1 <- lme4::lmer(diameter ~ (1|plate) + (1|sample), Penicillin) +summary(fm1) + +#mixed effects model with one of them +fm2 <- lme4::lmer(diameter ~ (1|sample), Penicillin) +summary(fm2) + +#significance of the plate effect +anova(fm1, fm2) + +#ignoring the repeated measures associated with plate and sample +lm1 <- lm(diameter ~ 1, Penicillin) +summary(lm1) + +#pseudo-bulk by plate +p_summ_Penicillin <- Penicillin %>% + group_by(plate) %>% + summarise(m_diameter = mean(diameter)) + +#linear model with plate pseudo-bulked data +lm2 <- lm(m_diameter ~ 1, p_summ_Penicillin) +summary(lm2) + +#pseudo-bulk by sample +s_summ_Penicillin <- Penicillin %>% + group_by(sample) %>% + summarise(m_diameter = mean(diameter)) + +#linear model with sample pseudo-bulked data +lm3 <- lm(m_diameter ~ 1, s_summ_Penicillin) +summary(lm3) + + +``` + + +## Pastes data +Hierarchical or nested random effects +```{r} +?Pastes +data(Pastes) + +##dotplot visualization of the data +dotplot(cask ~ strength | reorder(batch, strength), Pastes, + strip = FALSE, strip.left = TRUE, layout = c(1, 10), + ylab = "Cask within batch", + xlab = "Paste strength", jitter.y = TRUE) + +##linear mixed effects model implemented nested/hierarchical design +fm1 <- lme4::lmer(strength ~ (1|batch/cask), Pastes) +summary(fm1) + +##ignoring the repeated measures with a linear model +lm1 <- lm(strength ~ 1, Pastes) +summary(lm1) + +##incorrectly modeling the nested random effects as crossed ones +fm2 <- lme4::lmer(strength ~ (1|batch) + (1|cask), Pastes) +summary(fm2) + +``` \ No newline at end of file diff --git a/intro-linear-mixed-effects-models/LMM.html b/intro-linear-mixed-effects-models/LMM.html new file mode 100644 index 0000000..77ca9b2 --- /dev/null +++ b/intro-linear-mixed-effects-models/LMM.html @@ -0,0 +1,853 @@ + + + + + + + + + + + + + + + +Linear_Mixed_Effects_Models: example datasets & models + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

Load the necessary libraries for this markdown

+

If you don’t already have these packages installed then please do so +before implementing the code in the rest of this file.

+
require(lattice)
+
## Loading required package: lattice
+
require(lme4)
+
## Loading required package: lme4
+
## Loading required package: Matrix
+
require(ggplot2)
+
## Loading required package: ggplot2
+
require(dplyr)
+
## Loading required package: dplyr
+
## 
+## Attaching package: 'dplyr'
+
## The following objects are masked from 'package:stats':
+## 
+##     filter, lag
+
## The following objects are masked from 'package:base':
+## 
+##     intersect, setdiff, setequal, union
+
require(magrittr)
+
## Loading required package: magrittr
+
+
+

Sleepstudy

+

Example of a data set modeled by random intercept and random slope +mixed effects model

+
require(lattice)
+require(lme4)
+require(ggplot2)
+require(dplyr)
+require(magrittr)
+
+##what is the sleepstudy
+?sleepstudy
+##load the data
+data("sleepstudy")
+
+##best fit line plot the data ignoring individual subjects
+ggplot(sleepstudy, aes(x=Days, y=Reaction)) +
+  geom_point() +
+  geom_smooth(method = "lm", se=FALSE) +
+  ylab("reaction time in ms") +
+  theme_classic() +
+  theme(text = element_text(size = 16)) 
+
## `geom_smooth()` using formula = 'y ~ x'
+

+
##best fit line plots per individual subject
+ggplot(sleepstudy, aes(x=Days, y=Reaction, color=Subject)) +
+  geom_point() +
+  geom_smooth(method = "lm", se=FALSE) +
+  ylab("reaction time in ms") +
+  theme_classic() +
+  theme(text = element_text(size = 16)) 
+
## `geom_smooth()` using formula = 'y ~ x'
+

+
##as boxplot per subject
+ggplot(sleepstudy, aes(x=Subject, y=Reaction)) +
+  geom_boxplot() +
+  ylab("reaction time in ms") +
+  theme_classic() +
+  theme(text = element_text(size = 16)) 
+

+
##as boxplot per subject coloring the days
+ggplot(sleepstudy, aes(x=Subject, y=Reaction)) +
+  geom_boxplot() +
+  geom_point(aes(color=Days)) +
+  ylab("reaction time in ms") +
+  theme_classic() +
+  theme(text = element_text(size = 16)) 
+

+
##xy plot
+xyplot(Reaction ~ Days | Subject, sleepstudy, type = c("g","p","r"),
+       index = function(x,y) coef(lm(y ~ x))[1],
+       xlab = "Days of sleep deprivation",
+       ylab = "Average reaction time (ms)", aspect = "xy")
+

+
##correlated random intercept and random slope
+fm1 <- lme4::lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
+summary(fm1)
+
## Linear mixed model fit by REML ['lmerMod']
+## Formula: Reaction ~ Days + (Days | Subject)
+##    Data: sleepstudy
+## 
+## REML criterion at convergence: 1743.6
+## 
+## Scaled residuals: 
+##     Min      1Q  Median      3Q     Max 
+## -3.9536 -0.4634  0.0231  0.4634  5.1793 
+## 
+## Random effects:
+##  Groups   Name        Variance Std.Dev. Corr
+##  Subject  (Intercept) 612.10   24.741       
+##           Days         35.07    5.922   0.07
+##  Residual             654.94   25.592       
+## Number of obs: 180, groups:  Subject, 18
+## 
+## Fixed effects:
+##             Estimate Std. Error t value
+## (Intercept)  251.405      6.825  36.838
+## Days          10.467      1.546   6.771
+## 
+## Correlation of Fixed Effects:
+##      (Intr)
+## Days -0.138
+
##uncorrelated random intercept and random slope
+fm2 <- lme4::lmer(Reaction ~ Days + (Days || Subject), sleepstudy)
+summary(fm2)
+
## Linear mixed model fit by REML ['lmerMod']
+## Formula: Reaction ~ Days + ((1 | Subject) + (0 + Days | Subject))
+##    Data: sleepstudy
+## 
+## REML criterion at convergence: 1743.7
+## 
+## Scaled residuals: 
+##     Min      1Q  Median      3Q     Max 
+## -3.9626 -0.4625  0.0204  0.4653  5.1860 
+## 
+## Random effects:
+##  Groups    Name        Variance Std.Dev.
+##  Subject   (Intercept) 627.57   25.051  
+##  Subject.1 Days         35.86    5.988  
+##  Residual              653.58   25.565  
+## Number of obs: 180, groups:  Subject, 18
+## 
+## Fixed effects:
+##             Estimate Std. Error t value
+## (Intercept)  251.405      6.885  36.513
+## Days          10.467      1.560   6.712
+## 
+## Correlation of Fixed Effects:
+##      (Intr)
+## Days -0.184
+
##ignoring the repeated measures within a subject - linear model
+lm1 <- lm(Reaction ~ Days, data = sleepstudy)
+summary(lm1)
+
## 
+## Call:
+## lm(formula = Reaction ~ Days, data = sleepstudy)
+## 
+## Residuals:
+##      Min       1Q   Median       3Q      Max 
+## -110.848  -27.483    1.546   26.142  139.953 
+## 
+## Coefficients:
+##             Estimate Std. Error t value Pr(>|t|)    
+## (Intercept)  251.405      6.610  38.033  < 2e-16 ***
+## Days          10.467      1.238   8.454 9.89e-15 ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## Residual standard error: 47.71 on 178 degrees of freedom
+## Multiple R-squared:  0.2865, Adjusted R-squared:  0.2825 
+## F-statistic: 71.46 on 1 and 178 DF,  p-value: 9.894e-15
+
+
+

Dye stuff

+

Example of a situation where pseudo-bulking the repeated measures per +subject works

+
?Dyestuff
+data("Dyestuff")
+
+#boxplot of yield by batch
+ggplot(Dyestuff, aes(x=Batch, y=Yield)) +
+  geom_boxplot() +
+  geom_point() +
+  theme_classic() +
+  theme(text = element_text(size = 16)) 
+

+
#simple random intercept model
+fm1 <- lmer(Yield ~ 1|Batch, Dyestuff)
+summary(fm1)
+
## Linear mixed model fit by REML ['lmerMod']
+## Formula: Yield ~ 1 | Batch
+##    Data: Dyestuff
+## 
+## REML criterion at convergence: 319.7
+## 
+## Scaled residuals: 
+##     Min      1Q  Median      3Q     Max 
+## -1.4117 -0.7634  0.1418  0.7792  1.8296 
+## 
+## Random effects:
+##  Groups   Name        Variance Std.Dev.
+##  Batch    (Intercept) 1764     42.00   
+##  Residual             2451     49.51   
+## Number of obs: 30, groups:  Batch, 6
+## 
+## Fixed effects:
+##             Estimate Std. Error t value
+## (Intercept)  1527.50      19.38    78.8
+
#ignore the repeated measures within a batch: linear model
+lm1 <- lm(Yield ~ 1, Dyestuff)
+summary(lm1)
+
## 
+## Call:
+## lm(formula = Yield ~ 1, data = Dyestuff)
+## 
+## Residuals:
+##    Min     1Q Median     3Q    Max 
+## -87.50 -58.75   2.50  47.50 107.50 
+## 
+## Coefficients:
+##             Estimate Std. Error t value Pr(>|t|)    
+## (Intercept)  1527.50      11.51   132.8   <2e-16 ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## Residual standard error: 63.02 on 29 degrees of freedom
+
#pseudo-bulk yield by batch
+summ_Dyestuff <- Dyestuff %>% 
+  group_by(Batch) %>% 
+  summarise(mYield = mean(Yield))
+
+#linear model on the pseudo-bulked data
+lm2 <- lm(mYield ~ 1, summ_Dyestuff)
+summary(lm2)
+
## 
+## Call:
+## lm(formula = mYield ~ 1, data = summ_Dyestuff)
+## 
+## Residuals:
+##     1     2     3     4     5     6 
+## -22.5   0.5  36.5 -29.5  72.5 -57.5 
+## 
+## Coefficients:
+##             Estimate Std. Error t value Pr(>|t|)    
+## (Intercept)  1527.50      19.38    78.8 6.23e-09 ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## Residual standard error: 47.48 on 5 degrees of freedom
+
+
+

Penicillin

+

Example illustrated crossed random designs

+
data("Penicillin")
+?Penicillin
+
+#visualize the data as a boxplot
+ggplot(Penicillin, aes(x=plate, y=diameter)) +
+  geom_boxplot() +
+  geom_point(aes(color=sample)) +
+  theme(text = element_text(size = 16)) 
+

+
#mixed effects model with two random effects
+fm1 <- lme4::lmer(diameter ~ (1|plate) + (1|sample), Penicillin)
+summary(fm1)
+
## Linear mixed model fit by REML ['lmerMod']
+## Formula: diameter ~ (1 | plate) + (1 | sample)
+##    Data: Penicillin
+## 
+## REML criterion at convergence: 330.9
+## 
+## Scaled residuals: 
+##      Min       1Q   Median       3Q      Max 
+## -2.07923 -0.67140  0.06292  0.58377  2.97959 
+## 
+## Random effects:
+##  Groups   Name        Variance Std.Dev.
+##  plate    (Intercept) 0.7169   0.8467  
+##  sample   (Intercept) 3.7311   1.9316  
+##  Residual             0.3024   0.5499  
+## Number of obs: 144, groups:  plate, 24; sample, 6
+## 
+## Fixed effects:
+##             Estimate Std. Error t value
+## (Intercept)  22.9722     0.8086   28.41
+
#mixed effects model with one of them
+fm2 <- lme4::lmer(diameter ~  (1|sample), Penicillin)
+summary(fm2)
+
## Linear mixed model fit by REML ['lmerMod']
+## Formula: diameter ~ (1 | sample)
+##    Data: Penicillin
+## 
+## REML criterion at convergence: 435.9
+## 
+## Scaled residuals: 
+##      Min       1Q   Median       3Q      Max 
+## -1.97355 -0.90190  0.02988  1.00350  2.02207 
+## 
+## Random effects:
+##  Groups   Name        Variance Std.Dev.
+##  sample   (Intercept) 3.701    1.924   
+##  Residual             1.019    1.010   
+## Number of obs: 144, groups:  sample, 6
+## 
+## Fixed effects:
+##             Estimate Std. Error t value
+## (Intercept)  22.9722     0.7899   29.08
+
#significance of the plate effect
+anova(fm1, fm2)
+
## refitting model(s) with ML (instead of REML)
+
## Data: Penicillin
+## Models:
+## fm2: diameter ~ (1 | sample)
+## fm1: diameter ~ (1 | plate) + (1 | sample)
+##     npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)    
+## fm2    3 443.19 452.10 -218.59   437.19                        
+## fm1    4 340.19 352.07 -166.09   332.19   105  1  < 2.2e-16 ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
#ignoring the repeated measures associated with plate and sample
+lm1 <- lm(diameter ~ 1, Penicillin)
+summary(lm1)
+
## 
+## Call:
+## lm(formula = diameter ~ 1, data = Penicillin)
+## 
+## Residuals:
+##     Min      1Q  Median      3Q     Max 
+## -4.9722 -0.9722  0.0278  1.0278  4.0278 
+## 
+## Coefficients:
+##             Estimate Std. Error t value Pr(>|t|)    
+## (Intercept)  22.9722     0.1693   135.7   <2e-16 ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## Residual standard error: 2.031 on 143 degrees of freedom
+
#pseudo-bulk by plate
+p_summ_Penicillin <- Penicillin %>% 
+  group_by(plate) %>%
+  summarise(m_diameter = mean(diameter))
+
+#linear model with plate pseudo-bulked data
+lm2 <- lm(m_diameter ~ 1, p_summ_Penicillin)
+summary(lm2)
+
## 
+## Call:
+## lm(formula = m_diameter ~ 1, data = p_summ_Penicillin)
+## 
+## Residuals:
+##      Min       1Q   Median       3Q      Max 
+## -1.47222 -0.68056  0.02778  0.86111  1.52778 
+## 
+## Coefficients:
+##             Estimate Std. Error t value Pr(>|t|)    
+## (Intercept)  22.9722     0.1788   128.5   <2e-16 ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## Residual standard error: 0.876 on 23 degrees of freedom
+
#pseudo-bulk by sample
+s_summ_Penicillin <- Penicillin %>% 
+  group_by(sample) %>%
+  summarise(m_diameter = mean(diameter))
+
+#linear model with sample pseudo-bulked data
+lm3 <- lm(m_diameter ~ 1, s_summ_Penicillin)
+summary(lm3)
+
## 
+## Call:
+## lm(formula = m_diameter ~ 1, data = s_summ_Penicillin)
+## 
+## Residuals:
+##        1        2        3        4        5        6 
+##  2.19444 -1.01389  1.94444 -0.09722 -0.01389 -3.01389 
+## 
+## Coefficients:
+##             Estimate Std. Error t value Pr(>|t|)    
+## (Intercept)  22.9722     0.7899   29.08 9.01e-07 ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## Residual standard error: 1.935 on 5 degrees of freedom
+
+
+

Pastes data

+

Hierarchical or nested random effects

+
?Pastes
+data(Pastes)
+
+##dotplot visualization of the data
+dotplot(cask ~ strength | reorder(batch, strength), Pastes,
+        strip = FALSE, strip.left = TRUE, layout = c(1, 10),
+        ylab = "Cask within batch",
+        xlab = "Paste strength", jitter.y = TRUE)
+

+
##linear mixed effects model implemented nested/hierarchical design
+fm1 <- lme4::lmer(strength ~ (1|batch/cask), Pastes)
+summary(fm1)
+
## Linear mixed model fit by REML ['lmerMod']
+## Formula: strength ~ (1 | batch/cask)
+##    Data: Pastes
+## 
+## REML criterion at convergence: 247
+## 
+## Scaled residuals: 
+##     Min      1Q  Median      3Q     Max 
+## -1.4798 -0.5156  0.0095  0.4720  1.3897 
+## 
+## Random effects:
+##  Groups     Name        Variance Std.Dev.
+##  cask:batch (Intercept) 8.434    2.9041  
+##  batch      (Intercept) 1.657    1.2874  
+##  Residual               0.678    0.8234  
+## Number of obs: 60, groups:  cask:batch, 30; batch, 10
+## 
+## Fixed effects:
+##             Estimate Std. Error t value
+## (Intercept)  60.0533     0.6769   88.72
+
##ignoring the repeated measures with a linear model
+lm1 <- lm(strength ~ 1, Pastes)
+summary(lm1)
+
## 
+## Call:
+## lm(formula = strength ~ 1, data = Pastes)
+## 
+## Residuals:
+##     Min      1Q  Median      3Q     Max 
+## -5.8533 -2.5533 -0.7533  2.8217  5.9467 
+## 
+## Coefficients:
+##             Estimate Std. Error t value Pr(>|t|)    
+## (Intercept)   60.053      0.418   143.7   <2e-16 ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## Residual standard error: 3.238 on 59 degrees of freedom
+
##incorrectly modeling the nested random effects as crossed ones
+fm2 <- lme4::lmer(strength ~ (1|batch) + (1|cask), Pastes)
+summary(fm2)
+
## Linear mixed model fit by REML ['lmerMod']
+## Formula: strength ~ (1 | batch) + (1 | cask)
+##    Data: Pastes
+## 
+## REML criterion at convergence: 301.5
+## 
+## Scaled residuals: 
+##      Min       1Q   Median       3Q      Max 
+## -1.49025 -0.90096 -0.01247  0.62911  1.82246 
+## 
+## Random effects:
+##  Groups   Name        Variance Std.Dev.
+##  batch    (Intercept) 3.3639   1.8341  
+##  cask     (Intercept) 0.1487   0.3856  
+##  Residual             7.3060   2.7030  
+## Number of obs: 60, groups:  batch, 10; cask, 3
+## 
+## Fixed effects:
+##             Estimate Std. Error t value
+## (Intercept)  60.0533     0.7125   84.28
+
+ + + + +
+ + + + + + + + + + + + + + +