If you don’t already have these packages installed then please do so before implementing the code in the rest of this file.
require(lattice)
## Loading required package: lattice
require(lme4)
## Loading required package: lme4
## Loading required package: Matrix
require(ggplot2)
## Loading required package: ggplot2
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(magrittr)
## Loading required package: magrittr
Example of a data set modeled by random intercept and random slope mixed effects model
require(lattice)
require(lme4)
require(ggplot2)
require(dplyr)
require(magrittr)
##what is the sleepstudy
?sleepstudy
##load the data
data("sleepstudy")
##best fit line plot the data ignoring individual subjects
ggplot(sleepstudy, aes(x=Days, y=Reaction)) +
geom_point() +
geom_smooth(method = "lm", se=FALSE) +
ylab("reaction time in ms") +
theme_classic() +
theme(text = element_text(size = 16))
## `geom_smooth()` using formula = 'y ~ x'
##best fit line plots per individual subject
ggplot(sleepstudy, aes(x=Days, y=Reaction, color=Subject)) +
geom_point() +
geom_smooth(method = "lm", se=FALSE) +
ylab("reaction time in ms") +
theme_classic() +
theme(text = element_text(size = 16))
## `geom_smooth()` using formula = 'y ~ x'
##as boxplot per subject
ggplot(sleepstudy, aes(x=Subject, y=Reaction)) +
geom_boxplot() +
ylab("reaction time in ms") +
theme_classic() +
theme(text = element_text(size = 16))
##as boxplot per subject coloring the days
ggplot(sleepstudy, aes(x=Subject, y=Reaction)) +
geom_boxplot() +
geom_point(aes(color=Days)) +
ylab("reaction time in ms") +
theme_classic() +
theme(text = element_text(size = 16))
##xy plot
xyplot(Reaction ~ Days | Subject, sleepstudy, type = c("g","p","r"),
index = function(x,y) coef(lm(y ~ x))[1],
xlab = "Days of sleep deprivation",
ylab = "Average reaction time (ms)", aspect = "xy")
##correlated random intercept and random slope
fm1 <- lme4::lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
summary(fm1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + (Days | Subject)
## Data: sleepstudy
##
## REML criterion at convergence: 1743.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9536 -0.4634 0.0231 0.4634 5.1793
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Subject (Intercept) 612.10 24.741
## Days 35.07 5.922 0.07
## Residual 654.94 25.592
## Number of obs: 180, groups: Subject, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 251.405 6.825 36.838
## Days 10.467 1.546 6.771
##
## Correlation of Fixed Effects:
## (Intr)
## Days -0.138
##uncorrelated random intercept and random slope
fm2 <- lme4::lmer(Reaction ~ Days + (Days || Subject), sleepstudy)
summary(fm2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + ((1 | Subject) + (0 + Days | Subject))
## Data: sleepstudy
##
## REML criterion at convergence: 1743.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9626 -0.4625 0.0204 0.4653 5.1860
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subject (Intercept) 627.57 25.051
## Subject.1 Days 35.86 5.988
## Residual 653.58 25.565
## Number of obs: 180, groups: Subject, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 251.405 6.885 36.513
## Days 10.467 1.560 6.712
##
## Correlation of Fixed Effects:
## (Intr)
## Days -0.184
##ignoring the repeated measures within a subject - linear model
lm1 <- lm(Reaction ~ Days, data = sleepstudy)
summary(lm1)
##
## Call:
## lm(formula = Reaction ~ Days, data = sleepstudy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -110.848 -27.483 1.546 26.142 139.953
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 251.405 6.610 38.033 < 2e-16 ***
## Days 10.467 1.238 8.454 9.89e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 47.71 on 178 degrees of freedom
## Multiple R-squared: 0.2865, Adjusted R-squared: 0.2825
## F-statistic: 71.46 on 1 and 178 DF, p-value: 9.894e-15
Example of a situation where pseudo-bulking the repeated measures per subject works
?Dyestuff
data("Dyestuff")
#boxplot of yield by batch
ggplot(Dyestuff, aes(x=Batch, y=Yield)) +
geom_boxplot() +
geom_point() +
theme_classic() +
theme(text = element_text(size = 16))
#simple random intercept model
fm1 <- lmer(Yield ~ 1|Batch, Dyestuff)
summary(fm1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Yield ~ 1 | Batch
## Data: Dyestuff
##
## REML criterion at convergence: 319.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4117 -0.7634 0.1418 0.7792 1.8296
##
## Random effects:
## Groups Name Variance Std.Dev.
## Batch (Intercept) 1764 42.00
## Residual 2451 49.51
## Number of obs: 30, groups: Batch, 6
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1527.50 19.38 78.8
#ignore the repeated measures within a batch: linear model
lm1 <- lm(Yield ~ 1, Dyestuff)
summary(lm1)
##
## Call:
## lm(formula = Yield ~ 1, data = Dyestuff)
##
## Residuals:
## Min 1Q Median 3Q Max
## -87.50 -58.75 2.50 47.50 107.50
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1527.50 11.51 132.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 63.02 on 29 degrees of freedom
#pseudo-bulk yield by batch
summ_Dyestuff <- Dyestuff %>%
group_by(Batch) %>%
summarise(mYield = mean(Yield))
#linear model on the pseudo-bulked data
lm2 <- lm(mYield ~ 1, summ_Dyestuff)
summary(lm2)
##
## Call:
## lm(formula = mYield ~ 1, data = summ_Dyestuff)
##
## Residuals:
## 1 2 3 4 5 6
## -22.5 0.5 36.5 -29.5 72.5 -57.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1527.50 19.38 78.8 6.23e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 47.48 on 5 degrees of freedom
Example illustrated crossed random designs
data("Penicillin")
?Penicillin
#visualize the data as a boxplot
ggplot(Penicillin, aes(x=plate, y=diameter)) +
geom_boxplot() +
geom_point(aes(color=sample)) +
theme(text = element_text(size = 16))
#mixed effects model with two random effects
fm1 <- lme4::lmer(diameter ~ (1|plate) + (1|sample), Penicillin)
summary(fm1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: diameter ~ (1 | plate) + (1 | sample)
## Data: Penicillin
##
## REML criterion at convergence: 330.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.07923 -0.67140 0.06292 0.58377 2.97959
##
## Random effects:
## Groups Name Variance Std.Dev.
## plate (Intercept) 0.7169 0.8467
## sample (Intercept) 3.7311 1.9316
## Residual 0.3024 0.5499
## Number of obs: 144, groups: plate, 24; sample, 6
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 22.9722 0.8086 28.41
#mixed effects model with one of them
fm2 <- lme4::lmer(diameter ~ (1|sample), Penicillin)
summary(fm2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: diameter ~ (1 | sample)
## Data: Penicillin
##
## REML criterion at convergence: 435.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.97355 -0.90190 0.02988 1.00350 2.02207
##
## Random effects:
## Groups Name Variance Std.Dev.
## sample (Intercept) 3.701 1.924
## Residual 1.019 1.010
## Number of obs: 144, groups: sample, 6
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 22.9722 0.7899 29.08
#significance of the plate effect
anova(fm1, fm2)
## refitting model(s) with ML (instead of REML)
## Data: Penicillin
## Models:
## fm2: diameter ~ (1 | sample)
## fm1: diameter ~ (1 | plate) + (1 | sample)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## fm2 3 443.19 452.10 -218.59 437.19
## fm1 4 340.19 352.07 -166.09 332.19 105 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#ignoring the repeated measures associated with plate and sample
lm1 <- lm(diameter ~ 1, Penicillin)
summary(lm1)
##
## Call:
## lm(formula = diameter ~ 1, data = Penicillin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.9722 -0.9722 0.0278 1.0278 4.0278
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.9722 0.1693 135.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.031 on 143 degrees of freedom
#pseudo-bulk by plate
p_summ_Penicillin <- Penicillin %>%
group_by(plate) %>%
summarise(m_diameter = mean(diameter))
#linear model with plate pseudo-bulked data
lm2 <- lm(m_diameter ~ 1, p_summ_Penicillin)
summary(lm2)
##
## Call:
## lm(formula = m_diameter ~ 1, data = p_summ_Penicillin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.47222 -0.68056 0.02778 0.86111 1.52778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.9722 0.1788 128.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.876 on 23 degrees of freedom
#pseudo-bulk by sample
s_summ_Penicillin <- Penicillin %>%
group_by(sample) %>%
summarise(m_diameter = mean(diameter))
#linear model with sample pseudo-bulked data
lm3 <- lm(m_diameter ~ 1, s_summ_Penicillin)
summary(lm3)
##
## Call:
## lm(formula = m_diameter ~ 1, data = s_summ_Penicillin)
##
## Residuals:
## 1 2 3 4 5 6
## 2.19444 -1.01389 1.94444 -0.09722 -0.01389 -3.01389
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.9722 0.7899 29.08 9.01e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.935 on 5 degrees of freedom
Hierarchical or nested random effects
?Pastes
data(Pastes)
##dotplot visualization of the data
dotplot(cask ~ strength | reorder(batch, strength), Pastes,
strip = FALSE, strip.left = TRUE, layout = c(1, 10),
ylab = "Cask within batch",
xlab = "Paste strength", jitter.y = TRUE)
##linear mixed effects model implemented nested/hierarchical design
fm1 <- lme4::lmer(strength ~ (1|batch/cask), Pastes)
summary(fm1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: strength ~ (1 | batch/cask)
## Data: Pastes
##
## REML criterion at convergence: 247
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4798 -0.5156 0.0095 0.4720 1.3897
##
## Random effects:
## Groups Name Variance Std.Dev.
## cask:batch (Intercept) 8.434 2.9041
## batch (Intercept) 1.657 1.2874
## Residual 0.678 0.8234
## Number of obs: 60, groups: cask:batch, 30; batch, 10
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 60.0533 0.6769 88.72
##ignoring the repeated measures with a linear model
lm1 <- lm(strength ~ 1, Pastes)
summary(lm1)
##
## Call:
## lm(formula = strength ~ 1, data = Pastes)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.8533 -2.5533 -0.7533 2.8217 5.9467
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 60.053 0.418 143.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.238 on 59 degrees of freedom
##incorrectly modeling the nested random effects as crossed ones
fm2 <- lme4::lmer(strength ~ (1|batch) + (1|cask), Pastes)
summary(fm2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: strength ~ (1 | batch) + (1 | cask)
## Data: Pastes
##
## REML criterion at convergence: 301.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.49025 -0.90096 -0.01247 0.62911 1.82246
##
## Random effects:
## Groups Name Variance Std.Dev.
## batch (Intercept) 3.3639 1.8341
## cask (Intercept) 0.1487 0.3856
## Residual 7.3060 2.7030
## Number of obs: 60, groups: batch, 10; cask, 3
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 60.0533 0.7125 84.28