Load the necessary libraries for this markdown

If you don’t already have these packages installed then please do so before implementing the code in the rest of this file.

require(lattice)
## Loading required package: lattice
require(lme4)
## Loading required package: lme4
## Loading required package: Matrix
require(ggplot2)
## Loading required package: ggplot2
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(magrittr)
## Loading required package: magrittr

Sleepstudy

Example of a data set modeled by random intercept and random slope mixed effects model

require(lattice)
require(lme4)
require(ggplot2)
require(dplyr)
require(magrittr)

##what is the sleepstudy
?sleepstudy
##load the data
data("sleepstudy")

##best fit line plot the data ignoring individual subjects
ggplot(sleepstudy, aes(x=Days, y=Reaction)) +
  geom_point() +
  geom_smooth(method = "lm", se=FALSE) +
  ylab("reaction time in ms") +
  theme_classic() +
  theme(text = element_text(size = 16)) 
## `geom_smooth()` using formula = 'y ~ x'

##best fit line plots per individual subject
ggplot(sleepstudy, aes(x=Days, y=Reaction, color=Subject)) +
  geom_point() +
  geom_smooth(method = "lm", se=FALSE) +
  ylab("reaction time in ms") +
  theme_classic() +
  theme(text = element_text(size = 16)) 
## `geom_smooth()` using formula = 'y ~ x'

##as boxplot per subject
ggplot(sleepstudy, aes(x=Subject, y=Reaction)) +
  geom_boxplot() +
  ylab("reaction time in ms") +
  theme_classic() +
  theme(text = element_text(size = 16)) 

##as boxplot per subject coloring the days
ggplot(sleepstudy, aes(x=Subject, y=Reaction)) +
  geom_boxplot() +
  geom_point(aes(color=Days)) +
  ylab("reaction time in ms") +
  theme_classic() +
  theme(text = element_text(size = 16)) 

##xy plot
xyplot(Reaction ~ Days | Subject, sleepstudy, type = c("g","p","r"),
       index = function(x,y) coef(lm(y ~ x))[1],
       xlab = "Days of sleep deprivation",
       ylab = "Average reaction time (ms)", aspect = "xy")

##correlated random intercept and random slope
fm1 <- lme4::lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
summary(fm1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + (Days | Subject)
##    Data: sleepstudy
## 
## REML criterion at convergence: 1743.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9536 -0.4634  0.0231  0.4634  5.1793 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  Subject  (Intercept) 612.10   24.741       
##           Days         35.07    5.922   0.07
##  Residual             654.94   25.592       
## Number of obs: 180, groups:  Subject, 18
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  251.405      6.825  36.838
## Days          10.467      1.546   6.771
## 
## Correlation of Fixed Effects:
##      (Intr)
## Days -0.138
##uncorrelated random intercept and random slope
fm2 <- lme4::lmer(Reaction ~ Days + (Days || Subject), sleepstudy)
summary(fm2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + ((1 | Subject) + (0 + Days | Subject))
##    Data: sleepstudy
## 
## REML criterion at convergence: 1743.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9626 -0.4625  0.0204  0.4653  5.1860 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  Subject   (Intercept) 627.57   25.051  
##  Subject.1 Days         35.86    5.988  
##  Residual              653.58   25.565  
## Number of obs: 180, groups:  Subject, 18
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  251.405      6.885  36.513
## Days          10.467      1.560   6.712
## 
## Correlation of Fixed Effects:
##      (Intr)
## Days -0.184
##ignoring the repeated measures within a subject - linear model
lm1 <- lm(Reaction ~ Days, data = sleepstudy)
summary(lm1)
## 
## Call:
## lm(formula = Reaction ~ Days, data = sleepstudy)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -110.848  -27.483    1.546   26.142  139.953 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  251.405      6.610  38.033  < 2e-16 ***
## Days          10.467      1.238   8.454 9.89e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47.71 on 178 degrees of freedom
## Multiple R-squared:  0.2865, Adjusted R-squared:  0.2825 
## F-statistic: 71.46 on 1 and 178 DF,  p-value: 9.894e-15

Dye stuff

Example of a situation where pseudo-bulking the repeated measures per subject works

?Dyestuff
data("Dyestuff")

#boxplot of yield by batch
ggplot(Dyestuff, aes(x=Batch, y=Yield)) +
  geom_boxplot() +
  geom_point() +
  theme_classic() +
  theme(text = element_text(size = 16)) 

#simple random intercept model
fm1 <- lmer(Yield ~ 1|Batch, Dyestuff)
summary(fm1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Yield ~ 1 | Batch
##    Data: Dyestuff
## 
## REML criterion at convergence: 319.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4117 -0.7634  0.1418  0.7792  1.8296 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Batch    (Intercept) 1764     42.00   
##  Residual             2451     49.51   
## Number of obs: 30, groups:  Batch, 6
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  1527.50      19.38    78.8
#ignore the repeated measures within a batch: linear model
lm1 <- lm(Yield ~ 1, Dyestuff)
summary(lm1)
## 
## Call:
## lm(formula = Yield ~ 1, data = Dyestuff)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -87.50 -58.75   2.50  47.50 107.50 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1527.50      11.51   132.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 63.02 on 29 degrees of freedom
#pseudo-bulk yield by batch
summ_Dyestuff <- Dyestuff %>% 
  group_by(Batch) %>% 
  summarise(mYield = mean(Yield))

#linear model on the pseudo-bulked data
lm2 <- lm(mYield ~ 1, summ_Dyestuff)
summary(lm2)
## 
## Call:
## lm(formula = mYield ~ 1, data = summ_Dyestuff)
## 
## Residuals:
##     1     2     3     4     5     6 
## -22.5   0.5  36.5 -29.5  72.5 -57.5 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1527.50      19.38    78.8 6.23e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47.48 on 5 degrees of freedom

Penicillin

Example illustrated crossed random designs

data("Penicillin")
?Penicillin

#visualize the data as a boxplot
ggplot(Penicillin, aes(x=plate, y=diameter)) +
  geom_boxplot() +
  geom_point(aes(color=sample)) +
  theme(text = element_text(size = 16)) 

#mixed effects model with two random effects
fm1 <- lme4::lmer(diameter ~ (1|plate) + (1|sample), Penicillin)
summary(fm1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: diameter ~ (1 | plate) + (1 | sample)
##    Data: Penicillin
## 
## REML criterion at convergence: 330.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.07923 -0.67140  0.06292  0.58377  2.97959 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  plate    (Intercept) 0.7169   0.8467  
##  sample   (Intercept) 3.7311   1.9316  
##  Residual             0.3024   0.5499  
## Number of obs: 144, groups:  plate, 24; sample, 6
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  22.9722     0.8086   28.41
#mixed effects model with one of them
fm2 <- lme4::lmer(diameter ~  (1|sample), Penicillin)
summary(fm2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: diameter ~ (1 | sample)
##    Data: Penicillin
## 
## REML criterion at convergence: 435.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.97355 -0.90190  0.02988  1.00350  2.02207 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  sample   (Intercept) 3.701    1.924   
##  Residual             1.019    1.010   
## Number of obs: 144, groups:  sample, 6
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  22.9722     0.7899   29.08
#significance of the plate effect
anova(fm1, fm2)
## refitting model(s) with ML (instead of REML)
## Data: Penicillin
## Models:
## fm2: diameter ~ (1 | sample)
## fm1: diameter ~ (1 | plate) + (1 | sample)
##     npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)    
## fm2    3 443.19 452.10 -218.59   437.19                        
## fm1    4 340.19 352.07 -166.09   332.19   105  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#ignoring the repeated measures associated with plate and sample
lm1 <- lm(diameter ~ 1, Penicillin)
summary(lm1)
## 
## Call:
## lm(formula = diameter ~ 1, data = Penicillin)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9722 -0.9722  0.0278  1.0278  4.0278 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  22.9722     0.1693   135.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.031 on 143 degrees of freedom
#pseudo-bulk by plate
p_summ_Penicillin <- Penicillin %>% 
  group_by(plate) %>%
  summarise(m_diameter = mean(diameter))

#linear model with plate pseudo-bulked data
lm2 <- lm(m_diameter ~ 1, p_summ_Penicillin)
summary(lm2)
## 
## Call:
## lm(formula = m_diameter ~ 1, data = p_summ_Penicillin)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.47222 -0.68056  0.02778  0.86111  1.52778 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  22.9722     0.1788   128.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.876 on 23 degrees of freedom
#pseudo-bulk by sample
s_summ_Penicillin <- Penicillin %>% 
  group_by(sample) %>%
  summarise(m_diameter = mean(diameter))

#linear model with sample pseudo-bulked data
lm3 <- lm(m_diameter ~ 1, s_summ_Penicillin)
summary(lm3)
## 
## Call:
## lm(formula = m_diameter ~ 1, data = s_summ_Penicillin)
## 
## Residuals:
##        1        2        3        4        5        6 
##  2.19444 -1.01389  1.94444 -0.09722 -0.01389 -3.01389 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  22.9722     0.7899   29.08 9.01e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.935 on 5 degrees of freedom

Pastes data

Hierarchical or nested random effects

?Pastes
data(Pastes)

##dotplot visualization of the data
dotplot(cask ~ strength | reorder(batch, strength), Pastes,
        strip = FALSE, strip.left = TRUE, layout = c(1, 10),
        ylab = "Cask within batch",
        xlab = "Paste strength", jitter.y = TRUE)

##linear mixed effects model implemented nested/hierarchical design
fm1 <- lme4::lmer(strength ~ (1|batch/cask), Pastes)
summary(fm1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: strength ~ (1 | batch/cask)
##    Data: Pastes
## 
## REML criterion at convergence: 247
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4798 -0.5156  0.0095  0.4720  1.3897 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  cask:batch (Intercept) 8.434    2.9041  
##  batch      (Intercept) 1.657    1.2874  
##  Residual               0.678    0.8234  
## Number of obs: 60, groups:  cask:batch, 30; batch, 10
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  60.0533     0.6769   88.72
##ignoring the repeated measures with a linear model
lm1 <- lm(strength ~ 1, Pastes)
summary(lm1)
## 
## Call:
## lm(formula = strength ~ 1, data = Pastes)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.8533 -2.5533 -0.7533  2.8217  5.9467 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   60.053      0.418   143.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.238 on 59 degrees of freedom
##incorrectly modeling the nested random effects as crossed ones
fm2 <- lme4::lmer(strength ~ (1|batch) + (1|cask), Pastes)
summary(fm2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: strength ~ (1 | batch) + (1 | cask)
##    Data: Pastes
## 
## REML criterion at convergence: 301.5
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.49025 -0.90096 -0.01247  0.62911  1.82246 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  batch    (Intercept) 3.3639   1.8341  
##  cask     (Intercept) 0.1487   0.3856  
##  Residual             7.3060   2.7030  
## Number of obs: 60, groups:  batch, 10; cask, 3
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  60.0533     0.7125   84.28