mirror of
https://github.com/gladstone-institutes/Bioinformatics-Workshops.git
synced 2025-11-30 09:45:43 -08:00
201 lines
5.4 KiB
R
201 lines
5.4 KiB
R
rm(list = ls())
|
|
|
|
require(dplyr)
|
|
require(tidyr)
|
|
require(magrittr)
|
|
require(ggplot2)
|
|
require(lme4)
|
|
|
|
set.seed(1234)
|
|
input_param <- list(m = 10, sigma_a = 3, sigma_e = 1, n=10, b0=10)
|
|
naive_var_of_mean <- (input_param$sigma_a^2 + input_param$sigma_e^2)/(input_param$m*input_param$n)
|
|
true_var_of_mean <- (input_param$n*input_param$sigma_a^2 + input_param$sigma_e^2)/(input_param$m*input_param$n)
|
|
|
|
Nsim <- 100
|
|
|
|
get_variance_estimates <- function(s, input_param) {
|
|
|
|
m = input_param$m
|
|
n = rpois(m, input_param$n)
|
|
sigma_a = input_param$sigma_a
|
|
sigma_e = input_param$sigma_e
|
|
b0 = input_param$b0
|
|
|
|
subject_effect <- rnorm(m, mean = 0, sd = sigma_a)
|
|
Y <- vector(mode = "numeric")
|
|
subject_ID <- NULL
|
|
for(i in 1:m) {
|
|
Y %<>% append(., b0 + subject_effect[i] + rnorm(n[i], mean = 0, sd = sigma_e))
|
|
subject_ID %<>% append(., rep(paste0("subjectID_",i), n[i]))
|
|
}
|
|
|
|
data <- data.frame(subject_ID, Y)
|
|
|
|
ggplot(data, aes(x=subject_ID, y=Y)) + geom_boxplot()
|
|
|
|
mean_data <- data %>%
|
|
group_by(subject_ID) %>%
|
|
summarise(mean_i = mean(Y))
|
|
|
|
sample_n <- data.frame(subject_ID=paste0("subjectID_",1:m), n=n)
|
|
|
|
mean_data %<>% merge(., sample_n)
|
|
lm_fit <- lm(mean_i ~ 1, data = mean_data)
|
|
slm_fit <- summary(lm_fit)
|
|
mean(slm_fit$residuals^2)
|
|
summary(slm_fit$residuals^2)
|
|
wt_lm_fit <- lm(mean_i ~ 1, data = mean_data, weights = n)
|
|
swlm_fit <- summary(wt_lm_fit)
|
|
mean(swlm_fit$residuals^2)
|
|
summary(swlm_fit$residuals^2)
|
|
simple_obs_var_of_mean <- mean_data %>%
|
|
.$mean_i %>%
|
|
var() %>%
|
|
divide_by(m)
|
|
|
|
lmer_fit <- lmer(Y ~ (1|subject_ID), data = data)
|
|
slmer_fit <- summary(lmer_fit)
|
|
|
|
lmer_obs_var_of_mean <- slmer_fit$coefficients[2]^2
|
|
return(c(simple_obs_var_of_mean, lmer_obs_var_of_mean))
|
|
}
|
|
|
|
sim_res <- lapply(1:Nsim, get_variance_estimates, input_param) %>%
|
|
do.call("rbind", .) %>%
|
|
as.matrix() %>%
|
|
as.data.frame()
|
|
|
|
colnames(sim_res) <- c("simple_obs_var_of_mean", "lmer_obs_var_of_mean")
|
|
col_means <- sim_res %>%
|
|
colMeans()
|
|
|
|
|
|
sim_res %>%
|
|
ggplot(., aes(x=simple_obs_var_of_mean, y=lmer_obs_var_of_mean)) +
|
|
geom_point() +
|
|
geom_abline(slope = 1, intercept = 0) +
|
|
geom_hline(yintercept = true_var_of_mean, lty=2, col="blue") +
|
|
geom_hline(yintercept = naive_var_of_mean, lty=3, col="red") +
|
|
geom_hline(yintercept = col_means[2], lty=2, col="darkblue")
|
|
|
|
long_sim_res <- sim_res %>%
|
|
pivot_longer(everything(),names_to = "estimate_type", values_to = "variance")
|
|
|
|
long_sim_res %>%
|
|
ggplot(., aes(x=estimate_type, y=variance)) +
|
|
geom_boxplot() +
|
|
geom_hline(yintercept = true_var_of_mean, lty=2, col="blue") +
|
|
geom_hline(yintercept = naive_var_of_mean, lty=3, col="red") +
|
|
geom_hline(yintercept = col_means[2], lty=2, col="darkblue")
|
|
|
|
|
|
##DATASETS
|
|
setwd("~/Dropbox (Gladstone)/scripts/Bioinformatics-Workshops/intro-linear-mixed-effects-models")
|
|
##simple random effect
|
|
str(Dyestuff)
|
|
p <- ggplot(Dyestuff, aes(x=Batch, y=Yield)) +
|
|
geom_boxplot() +
|
|
theme_classic() +
|
|
theme(text = element_text(size = 16))
|
|
|
|
p %>% ggsave(plot = .,
|
|
filename = "Dyestuff.pdf",
|
|
width = 7,
|
|
height = 7)
|
|
fm1 <- lmer(Yield ~ 1|Batch, Dyestuff)
|
|
summary(fm1)
|
|
|
|
lm1 <- lm(Yield ~ 1, Dyestuff)
|
|
summary(lm1)
|
|
|
|
summ_Dyestuff <- Dyestuff %>%
|
|
group_by(Batch) %>%
|
|
summarise(mYield = mean(Yield))
|
|
|
|
lm2 <- lm(mYield ~ 1, summ_Dyestuff)
|
|
summary(lm2)
|
|
|
|
##very small inter-batch variability
|
|
str(Dyestuff2)
|
|
##multiple independent random effects
|
|
str(Penicillin)
|
|
|
|
p <- ggplot(Penicillin, aes(x=plate, y=diameter)) +
|
|
geom_boxplot() +
|
|
geom_point(aes(color=sample)) +
|
|
theme(text = element_text(size = 16))
|
|
|
|
p %>% ggsave(plot = .,
|
|
filename = "Penicillin.pdf",
|
|
width = 10,
|
|
height = 7)
|
|
|
|
|
|
fm1 <- lmer(diameter ~ (1|plate) + (1|sample), Penicillin)
|
|
summary(fm1)
|
|
|
|
lm1 <- lm(diameter ~ 1, Penicillin)
|
|
summary(lm1)
|
|
|
|
##hierarchy
|
|
str(Pastes)
|
|
##random slope and intercept
|
|
str(sleepstudy)
|
|
|
|
require(lattice)
|
|
require(lmerTest)
|
|
require(ggplot2)
|
|
p <- ggplot(sleepstudy, aes(x=Days, y=Reaction)) +
|
|
geom_point() +
|
|
geom_smooth(method = "lm", se=FALSE) +
|
|
ylab("reaction time in ms") +
|
|
theme_classic() +
|
|
theme(text = element_text(size = 16))
|
|
|
|
p %>% ggsave(plot = .,
|
|
filename = "sleepstudy1.pdf",
|
|
width = 7,
|
|
height = 7)
|
|
|
|
p <- ggplot(sleepstudy, aes(x=Days, y=Reaction, color=Subject)) +
|
|
geom_point() +
|
|
geom_smooth(method = "lm", se=FALSE) +
|
|
ylab("reaction time in ms") +
|
|
theme_classic() +
|
|
theme(text = element_text(size = 16))
|
|
|
|
p %>% ggsave(plot = .,
|
|
filename = "sleepstudy2.pdf",
|
|
width = 10,
|
|
height = 7)
|
|
|
|
p <- ggplot(sleepstudy, aes(x=Subject, y=Reaction)) +
|
|
geom_boxplot() +
|
|
ylab("reaction time in ms") +
|
|
theme_classic() +
|
|
theme(text = element_text(size = 16))
|
|
|
|
p <- ggplot(sleepstudy, aes(x=Subject, y=Reaction)) +
|
|
geom_boxplot() +
|
|
geom_point(aes(color=Days)) +
|
|
ylab("reaction time in ms") +
|
|
theme_classic() +
|
|
theme(text = element_text(size = 16))
|
|
p %>% ggsave(plot = .,
|
|
filename = "sleepstudy3.pdf",
|
|
width = 10,
|
|
height = 7)
|
|
|
|
xyplot(Reaction ~ Days | Subject, sleepstudy, type = c("g","p","r"),
|
|
index = function(x,y) coef(lm(y ~ x))[1],
|
|
xlab = "Days of sleep deprivation",
|
|
ylab = "Average reaction time (ms)", aspect = "xy")
|
|
|
|
fm1 <- lme4::lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
|
|
summary(fm1)
|
|
|
|
fm2 <- lmerTest::lmer(Reaction ~ Days + (Days || Subject), sleepstudy)
|
|
summary(fm2)
|
|
|
|
lm1 <- lm(Reaction ~ Days, data = sleepstudy)
|
|
summary(lm1)
|