Linear_Mixed_Effects_Models: example datasets & models

Load the necessary libraries for this markdown

If you don’t already have these packages installed then please do so before implementing the code in the rest of this file.

require(lattice)

## Loading required package: lattice

require(lme4)

## Loading required package: lme4

## Loading required package: Matrix

require(ggplot2)

## Loading required package: ggplot2

require(dplyr)

## Loading required package: dplyr

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

require(magrittr)

## Loading required package: magrittr

Sleepstudy

Example of a data set modeled by random intercept and random slope mixed effects model

require(lattice)
require(lme4)
require(ggplot2)
require(dplyr)
require(magrittr)

##what is the sleepstudy
?sleepstudy
##load the data
data("sleepstudy")

##best fit line plot the data ignoring individual subjects
ggplot(sleepstudy, aes(x=Days, y=Reaction)) +
  geom_point() +
  geom_smooth(method = "lm", se=FALSE) +
  ylab("reaction time in ms") +
  theme_classic() +
  theme(text = element_text(size = 16))

## `geom_smooth()` using formula = 'y ~ x'

##best fit line plots per individual subject
ggplot(sleepstudy, aes(x=Days, y=Reaction, color=Subject)) +
  geom_point() +
  geom_smooth(method = "lm", se=FALSE) +
  ylab("reaction time in ms") +
  theme_classic() +
  theme(text = element_text(size = 16))

## `geom_smooth()` using formula = 'y ~ x'

##as boxplot per subject
ggplot(sleepstudy, aes(x=Subject, y=Reaction)) +
  geom_boxplot() +
  ylab("reaction time in ms") +
  theme_classic() +
  theme(text = element_text(size = 16))

##as boxplot per subject coloring the days
ggplot(sleepstudy, aes(x=Subject, y=Reaction)) +
  geom_boxplot() +
  geom_point(aes(color=Days)) +
  ylab("reaction time in ms") +
  theme_classic() +
  theme(text = element_text(size = 16))

##xy plot
xyplot(Reaction ~ Days | Subject, sleepstudy, type = c("g","p","r"),
       index = function(x,y) coef(lm(y ~ x))[1],
       xlab = "Days of sleep deprivation",
       ylab = "Average reaction time (ms)", aspect = "xy")

##correlated random intercept and random slope
fm1 <- lme4::lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
summary(fm1)

## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + (Days | Subject)
##    Data: sleepstudy
## 
## REML criterion at convergence: 1743.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9536 -0.4634  0.0231  0.4634  5.1793 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  Subject  (Intercept) 612.10   24.741       
##           Days         35.07    5.922   0.07
##  Residual             654.94   25.592       
## Number of obs: 180, groups:  Subject, 18
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  251.405      6.825  36.838
## Days          10.467      1.546   6.771
## 
## Correlation of Fixed Effects:
##      (Intr)
## Days -0.138

##uncorrelated random intercept and random slope
fm2 <- lme4::lmer(Reaction ~ Days + (Days || Subject), sleepstudy)
summary(fm2)

## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + ((1 | Subject) + (0 + Days | Subject))
##    Data: sleepstudy
## 
## REML criterion at convergence: 1743.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9626 -0.4625  0.0204  0.4653  5.1860 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  Subject   (Intercept) 627.57   25.051  
##  Subject.1 Days         35.86    5.988  
##  Residual              653.58   25.565  
## Number of obs: 180, groups:  Subject, 18
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  251.405      6.885  36.513
## Days          10.467      1.560   6.712
## 
## Correlation of Fixed Effects:
##      (Intr)
## Days -0.184

##ignoring the repeated measures within a subject - linear model
lm1 <- lm(Reaction ~ Days, data = sleepstudy)
summary(lm1)

## 
## Call:
## lm(formula = Reaction ~ Days, data = sleepstudy)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -110.848  -27.483    1.546   26.142  139.953 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  251.405      6.610  38.033  < 2e-16 ***
## Days          10.467      1.238   8.454 9.89e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47.71 on 178 degrees of freedom
## Multiple R-squared:  0.2865, Adjusted R-squared:  0.2825 
## F-statistic: 71.46 on 1 and 178 DF,  p-value: 9.894e-15

Dye stuff

Example of a situation where pseudo-bulking the repeated measures per subject works

?Dyestuff
data("Dyestuff")

#boxplot of yield by batch
ggplot(Dyestuff, aes(x=Batch, y=Yield)) +
  geom_boxplot() +
  geom_point() +
  theme_classic() +
  theme(text = element_text(size = 16))

#simple random intercept model
fm1 <- lmer(Yield ~ 1|Batch, Dyestuff)
summary(fm1)

## Linear mixed model fit by REML ['lmerMod']
## Formula: Yield ~ 1 | Batch
##    Data: Dyestuff
## 
## REML criterion at convergence: 319.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4117 -0.7634  0.1418  0.7792  1.8296 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Batch    (Intercept) 1764     42.00   
##  Residual             2451     49.51   
## Number of obs: 30, groups:  Batch, 6
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  1527.50      19.38    78.8

#ignore the repeated measures within a batch: linear model
lm1 <- lm(Yield ~ 1, Dyestuff)
summary(lm1)

## 
## Call:
## lm(formula = Yield ~ 1, data = Dyestuff)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -87.50 -58.75   2.50  47.50 107.50 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1527.50      11.51   132.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 63.02 on 29 degrees of freedom

#pseudo-bulk yield by batch
summ_Dyestuff <- Dyestuff %>% 
  group_by(Batch) %>% 
  summarise(mYield = mean(Yield))

#linear model on the pseudo-bulked data
lm2 <- lm(mYield ~ 1, summ_Dyestuff)
summary(lm2)

## 
## Call:
## lm(formula = mYield ~ 1, data = summ_Dyestuff)
## 
## Residuals:
##     1     2     3     4     5     6 
## -22.5   0.5  36.5 -29.5  72.5 -57.5 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1527.50      19.38    78.8 6.23e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47.48 on 5 degrees of freedom

Penicillin

Example illustrated crossed random designs

data("Penicillin")
?Penicillin

#visualize the data as a boxplot
ggplot(Penicillin, aes(x=plate, y=diameter)) +
  geom_boxplot() +
  geom_point(aes(color=sample)) +
  theme(text = element_text(size = 16))

#mixed effects model with two random effects
fm1 <- lme4::lmer(diameter ~ (1|plate) + (1|sample), Penicillin)
summary(fm1)

## Linear mixed model fit by REML ['lmerMod']
## Formula: diameter ~ (1 | plate) + (1 | sample)
##    Data: Penicillin
## 
## REML criterion at convergence: 330.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.07923 -0.67140  0.06292  0.58377  2.97959 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  plate    (Intercept) 0.7169   0.8467  
##  sample   (Intercept) 3.7311   1.9316  
##  Residual             0.3024   0.5499  
## Number of obs: 144, groups:  plate, 24; sample, 6
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  22.9722     0.8086   28.41

#mixed effects model with one of them
fm2 <- lme4::lmer(diameter ~  (1|sample), Penicillin)
summary(fm2)

## Linear mixed model fit by REML ['lmerMod']
## Formula: diameter ~ (1 | sample)
##    Data: Penicillin
## 
## REML criterion at convergence: 435.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.97355 -0.90190  0.02988  1.00350  2.02207 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  sample   (Intercept) 3.701    1.924   
##  Residual             1.019    1.010   
## Number of obs: 144, groups:  sample, 6
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  22.9722     0.7899   29.08

#significance of the plate effect
anova(fm1, fm2)

## refitting model(s) with ML (instead of REML)

## Data: Penicillin
## Models:
## fm2: diameter ~ (1 | sample)
## fm1: diameter ~ (1 | plate) + (1 | sample)
##     npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)    
## fm2    3 443.19 452.10 -218.59   437.19                        
## fm1    4 340.19 352.07 -166.09   332.19   105  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#ignoring the repeated measures associated with plate and sample
lm1 <- lm(diameter ~ 1, Penicillin)
summary(lm1)

## 
## Call:
## lm(formula = diameter ~ 1, data = Penicillin)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9722 -0.9722  0.0278  1.0278  4.0278 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  22.9722     0.1693   135.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.031 on 143 degrees of freedom

#pseudo-bulk by plate
p_summ_Penicillin <- Penicillin %>% 
  group_by(plate) %>%
  summarise(m_diameter = mean(diameter))

#linear model with plate pseudo-bulked data
lm2 <- lm(m_diameter ~ 1, p_summ_Penicillin)
summary(lm2)

## 
## Call:
## lm(formula = m_diameter ~ 1, data = p_summ_Penicillin)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.47222 -0.68056  0.02778  0.86111  1.52778 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  22.9722     0.1788   128.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.876 on 23 degrees of freedom

#pseudo-bulk by sample
s_summ_Penicillin <- Penicillin %>% 
  group_by(sample) %>%
  summarise(m_diameter = mean(diameter))

#linear model with sample pseudo-bulked data
lm3 <- lm(m_diameter ~ 1, s_summ_Penicillin)
summary(lm3)

## 
## Call:
## lm(formula = m_diameter ~ 1, data = s_summ_Penicillin)
## 
## Residuals:
##        1        2        3        4        5        6 
##  2.19444 -1.01389  1.94444 -0.09722 -0.01389 -3.01389 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  22.9722     0.7899   29.08 9.01e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.935 on 5 degrees of freedom

Pastes data

Hierarchical or nested random effects

?Pastes
data(Pastes)

##dotplot visualization of the data
dotplot(cask ~ strength | reorder(batch, strength), Pastes,
        strip = FALSE, strip.left = TRUE, layout = c(1, 10),
        ylab = "Cask within batch",
        xlab = "Paste strength", jitter.y = TRUE)

##linear mixed effects model implemented nested/hierarchical design
fm1 <- lme4::lmer(strength ~ (1|batch/cask), Pastes)
summary(fm1)

## Linear mixed model fit by REML ['lmerMod']
## Formula: strength ~ (1 | batch/cask)
##    Data: Pastes
## 
## REML criterion at convergence: 247
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4798 -0.5156  0.0095  0.4720  1.3897 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  cask:batch (Intercept) 8.434    2.9041  
##  batch      (Intercept) 1.657    1.2874  
##  Residual               0.678    0.8234  
## Number of obs: 60, groups:  cask:batch, 30; batch, 10
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  60.0533     0.6769   88.72

##ignoring the repeated measures with a linear model
lm1 <- lm(strength ~ 1, Pastes)
summary(lm1)

## 
## Call:
## lm(formula = strength ~ 1, data = Pastes)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.8533 -2.5533 -0.7533  2.8217  5.9467 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   60.053      0.418   143.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.238 on 59 degrees of freedom

##incorrectly modeling the nested random effects as crossed ones
fm2 <- lme4::lmer(strength ~ (1|batch) + (1|cask), Pastes)
summary(fm2)

## Linear mixed model fit by REML ['lmerMod']
## Formula: strength ~ (1 | batch) + (1 | cask)
##    Data: Pastes
## 
## REML criterion at convergence: 301.5
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.49025 -0.90096 -0.01247  0.62911  1.82246 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  batch    (Intercept) 3.3639   1.8341  
##  cask     (Intercept) 0.1487   0.3856  
##  Residual             7.3060   2.7030  
## Number of obs: 60, groups:  batch, 10; cask, 3
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  60.0533     0.7125   84.28