ylab("Residual Value") + xlab("Classification")
colnames(residualed)
colnames(residualed)=gsub("_","-",colnames(residualed))
ggplot(residualed,                ### The data frame to use.
aes(x = condition_column,
y = residual)) +
geom_errorbar(aes(ymin = Trad.lower,
ymax = Trad.upper),
width = 0.05,
size  = 0.5) +
geom_point(shape = 15,
size  = 4) +
theme_bw() +
theme(axis.title   = element_text(face  = "bold")) +
ylab("Residual Value") + xlab("Classification")
colnames(residualed)
colnames(residualed)=gsub("-","",colnames(residualed))
ggplot(residualed,                ### The data frame to use.
aes(x = condition_column,
y = residual)) +
geom_errorbar(aes(ymin = Trad.lower,
ymax = Trad.upper),
width = 0.05,
size  = 0.5) +
geom_point(shape = 15,
size  = 4) +
theme_bw() +
theme(axis.title   = element_text(face  = "bold")) +
ylab("Residual Value") + xlab("Classification")
colnames(residualed)
residualed=get_residuals(data = original_data, condition_column = "classif", experimental_columns = c("experiment"), response_column = "perim_logTransformed_noOutlier",
condition_is_categorical = TRUE,  response_is_categorical=FALSE, family=NULL)
colnames(residualed)
residualed=residualed[,c("residual", "response_column", "condition_column")]
ggplot(residualed,                ### The data frame to use.
aes(x = condition_column,
y = residual)) +
geom_errorbar(aes(ymin = Trad.lower,
ymax = Trad.upper),
width = 0.05,
size  = 0.5) +
geom_point(shape = 15,
size  = 4) +
theme_bw() +
theme(axis.title   = element_text(face  = "bold")) +
ylab("Residual Value") + xlab("Classification")
library(rcompanion)
Sum = groupwiseMean(residual ~ condition_column,
data   = residualed,
conf   = 0.95,
digits = 3)
install.packages("rcompanion")
Sum = groupwiseMean(residual ~ condition_column,
data   = residualed,
conf   = 0.95,
digits = 3)
library(rcompanion)
Sum = groupwiseMean(residual ~ condition_column,
data   = residualed,
conf   = 0.95,
digits = 3)
Sum
ggplot(Sum,                ### The data frame to use.
aes(x = condition_column,
y = Mean)) +
geom_errorbar(aes(ymin = Trad.lower,
ymax = Trad.upper),
width = 0.05,
size  = 0.5) +
geom_point(shape = 15,
size  = 4) +
theme_bw() +
theme(axis.title   = element_text(face  = "bold")) +
ylab("Residual Value") + xlab("Classification")
ggplot(Sum,                ### The data frame to use.
aes(x = as.factor(condition_column),
y = Mean)) +
geom_errorbar(aes(ymin = Trad.lower,
ymax = Trad.upper),
width = 0.05,
size  = 0.5) +
geom_point(shape = 15,
size  = 4) +
theme_bw() +
theme(axis.title   = element_text(face  = "bold")) +
ylab("Residual Value") + xlab("Classification")
ggplot(Sum,                ### The data frame to use.
aes(x = as.factor(condition_column),
y = Mean)) +
geom_errorbar(aes(ymin = Trad.lower,
ymax = Trad.upper),
width = 0.05,
size  = 0.5) +
geom_point(shape = 15,
size  = 4) +
theme_bw() +
theme(axis.title   = element_text(face  = "bold")) +
ylab("Mean Residual Value") + xlab("Classification")
file.edit("/Users/mingyoungshin/Dropbox (Gladstone)/calcPower f1000_manuscript/Final_paper_materials/Supplementary_Data_v2.Rmd")
lms=calculate_lmer_estimates(data=original_data, condition_column="classif", experimental_columns=c("experiment", "line"),
response_column="perim_logTransformed_noOutlier",
condition_is_categorical=TRUE, response_is_categorical=FALSE, family=NULL)
file.edit("/Users/mingyoungshin/Dropbox (Gladstone)/calcPower f1000_manuscript/Final_paper_materials/Supplementary_Data_v3.Rmd")
colnames(RMeDPower_data3_transformed)
colnames(original_data)
detach("package:RMeDPower", unload = TRUE)
original_data=read.csv("/Users/mingyoungshin/Dropbox (Gladstone)/calcPower f1000_manuscript/Old_Parts/power_test_allexps_alllines_KREVNA5671012131415.csv")
original_data=original_data[,c("experiment","line","classif","perim")]
original_data$classif[original_data$classif==2]=1
table(original_data$classif)
original_data=transform_data(data=original_data, condition_column="classif", experimental_columns=c("experiment", "line"), response_column="perim", condition_is_categorical=TRUE,
response_is_categorical=FALSE, alpha=0.05)
library(RMeDPower)
original_data=read.csv("/Users/mingyoungshin/Dropbox (Gladstone)/calcPower f1000_manuscript/Old_Parts/power_test_allexps_alllines_KREVNA5671012131415.csv")
original_data=original_data[,c("experiment","line","classif","perim")]
original_data$classif[original_data$classif==2]=1
table(original_data$classif)
original_data=transform_data(data=original_data, condition_column="classif", experimental_columns=c("experiment", "line"), response_column="perim", condition_is_categorical=TRUE,
response_is_categorical=FALSE, alpha=0.05)
RMeDPower_data3_2=RMeDPower_data3
original_data=read.csv("/Users/mingyoungshin/Dropbox (Gladstone)/calcPower f1000_manuscript/Old_Parts/power_test_allexps_alllines_KREVNA5671012131415.csv")
original_data=original_data[,c("experiment","line","classif","perim")]
original_data$classif[original_data$classif==2]=1
table(original_data$classif)
original_data=transform_data(data=original_data, condition_column="classif", experimental_columns=c("experiment", "line"), response_column="perim", condition_is_categorical=TRUE,
response_is_categorical=FALSE, alpha=0.05)
lms=calculate_lmer_estimates(data=original_data, condition_column="classif", experimental_columns=c("experiment", "line"),
response_column="perim_logTransformed_noOutlier",
condition_is_categorical=TRUE, response_is_categorical=FALSE, family=NULL)
file.edit("/Users/mingyoungshin/git/RMeDPower/R/get_residuals.R")
original_data=read.csv("/Users/mingyoungshin/Dropbox (Gladstone)/calcPower f1000_manuscript/Old_Parts/power_test_allexps_alllines_KREVNA5671012131415.csv")
original_data=original_data[,c("experiment","line","classif","perim")]
original_data$classif[original_data$classif==2]=1
table(original_data$classif)
original_data=transform_data(data=original_data, condition_column="classif", experimental_columns=c("experiment", "line"), response_column="perim", condition_is_categorical=TRUE,
response_is_categorical=FALSE, alpha=0.05)
residualed=get_residuals(data = original_data, condition_column = "classif", experimental_columns = c("experiment"), response_column = "perim_logTransformed_noOutlier",
condition_is_categorical = TRUE,  response_is_categorical=FALSE, family=NULL, repeatable_columns = "line")
Sum = groupwiseMean(residual ~ condition_column,
data   = residualed,
conf   = 0.95,
digits = 3)
ggplot(Sum,                ### The data frame to use.
aes(x = as.factor(condition_column),
y = Mean)) +
geom_errorbar(aes(ymin = Trad.lower,
ymax = Trad.upper),
width = 0.05,
size  = 0.5) +
geom_point(shape = 15,
size  = 4) +
theme_bw() +
theme(axis.title   = element_text(face  = "bold")) +
ylab("Mean Residual Value") + xlab("Classification")
lms
lms=calculate_lmer_estimates(data=original_data, condition_column="classif", experimental_columns=c("experiment", "line"),
response_column="perim_logTransformed_noOutlier",
condition_is_categorical=TRUE, response_is_categorical=FALSE, family=NULL, repeatable_columns = "line")
residualed=get_residuals(data = original_data, condition_column = "classif", experimental_columns = c("experiment"), response_column = "perim_logTransformed_noOutlier",
condition_is_categorical = TRUE,  response_is_categorical=FALSE, family=NULL)
Sum = groupwiseMean(residual ~ condition_column,
data   = residualed,
conf   = 0.95,
digits = 3)
ggplot(Sum,                ### The data frame to use.
aes(x = as.factor(condition_column),
y = Mean)) +
geom_errorbar(aes(ymin = Trad.lower,
ymax = Trad.upper),
width = 0.05,
size  = 0.5) +
geom_point(shape = 15,
size  = 4) +
theme_bw() +
theme(axis.title   = element_text(face  = "bold")) +
ylab("Mean Residual Value") + xlab("Classification")
Sum
length(table(original_data$line))
colnames(residualed)
ggplot(residualed,                ### The data frame to use.
aes(x = as.factor(line),
y = residual), col=as.factor(condition_column)) +
geom_boxplot() +
geom_point(shape = 15,
size  = 4) +
theme_bw() +
theme(axis.title   = element_text(face  = "bold"))
ggplot(residualed,                ### The data frame to use.
aes(x = as.factor(line),
y = residual), color=as.factor(condition_column)) +
geom_boxplot() +
theme_bw() +
theme(axis.title   = element_text(face  = "bold"))
ggplot(residualed,                ### The data frame to use.
aes(x = as.factor(line),
y = residual), fill=as.factor(condition_column)) +
geom_boxplot() +
theme_bw() +
theme(axis.title   = element_text(face  = "bold"))
ggplot(residualed,                ### The data frame to use.
aes(x = as.factor(line),
y = residual), group=as.factor(condition_column)) +
geom_boxplot() +
theme_bw() +
theme(axis.title   = element_text(face  = "bold"))
ggplot(residualed,                ### The data frame to use.
aes(x = as.factor(line),
y = residual), color=as.factor(condition_column)) +
geom_boxplot()
ggplot(residualed,                ### The data frame to use.
aes(x = as.factor(line),
y = residual), fill=as.factor(condition_column)) +
geom_boxplot()
ggplot(residualed,                ### The data frame to use.
aes(x = as.factor(line),
y = residual)) +
geom_boxplot(fill=as.factor(condition_column))
ggplot(residualed,                ### The data frame to use.
aes(x = as.factor(line),
y = residual, fill=as.factor(condition_column))) +
geom_boxplot()
library(dplyr)
library(magrittr)
residualed%<>%group_by(line)%<>%summarise(mean = mean(line))
residualed
residualed=get_residuals(data = original_data, condition_column = "classif", experimental_columns = c("experiment"), response_column = "perim_logTransformed_noOutlier",
condition_is_categorical = TRUE,  response_is_categorical=FALSE, family=NULL)
residualed2=residualed
colnames(residualed2)
residualed2%<>%group_by(line)%<>%summarise(mean = mean(residual))
residualed2
residualed2=residualed
residualed2%<>%group_by(line, condition_column)%<>%summarise(mean = mean(residual))
condition_column
residualed2
residualed2=residualed
residualed2%<>%group_by(line, condition_column)%<>%summarise(mean_residual = mean(residual))
#new version
Sum = groupwiseMean(mean_residual ~ condition_column,
data   = residualed2,
conf   = 0.95,
digits = 3)
Sum
ggplot(Sum,                ### The data frame to use.
aes(x = as.factor(condition_column),
y = Mean)) +
geom_errorbar(aes(ymin = Trad.lower,
ymax = Trad.upper),
width = 0.05,
size  = 0.5) +
geom_point(shape = 15,
size  = 4) +
theme_bw() +
theme(axis.title   = element_text(face  = "bold")) +
ylab("Mean Residual Value") + xlab("Classification")
setwd("/Users/mingyoungshin/Dropbox (Gladstone)/ML_workshop/2022_fall_machine_learning")
#load iris data
data(iris)
before_suffling=head(iris)
View(iris)
library(ggplot2)
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width))+
geom_point(aes(color = Species, shape = Species))+
scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))
ggplot(iris, aes(x = Petal.Length, y = Petal.Width))+
geom_point(aes(color = Species, shape = Species))+
scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))
#shuffle iris data
iris=iris[sample(1:nrow(iris),nrow(iris)),]
head(iris)
before_suffling
#assign training data and test data
nrow(iris)
train_index=1:round( nrow(iris) *0.7)
train_data=iris[train_index,1:4]
nrow(train_data)
train_label=as.character(iris[train_index,5])
test_data=iris[-train_index,1:4]
nrow(test_data)
test_label=as.character(iris[-train_index,5])
#find the best k using cross validation
caret_fit <- train(train_data, train_label, method = "knn", trControl = trainControl(method="cv",number = 10))
train_data=iris[train_index,1:4]
nrow(train_data)
train_label=as.character(iris[train_index,5])
#plot accuracy and k
plot(caret_fit)
library(class)
library(caret)
#plot accuracy and k
plot(caret_fit)
caret_fit$bestTune
#find the best k using cross validation
caret_fit <- train(train_data, train_label, method = "knn", trControl = trainControl(method="cv",number = 10))
#plot accuracy and k
plot(caret_fit)
#find the best k using cross validation
caret_fit <- train(train_data, train_label, method = "knn", trControl = trainControl(method="cv",number = 10))
#plot accuracy and k
plot(caret_fit)
set.seed(123)
#load iris data
data(iris)
before_suffling=head(iris)
View(iris)
table(iris$Species)
#visualize iris data
library(ggplot2)
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width))+
geom_point(aes(color = Species, shape = Species))+
scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))
ggplot(iris, aes(x = Petal.Length, y = Petal.Width))+
geom_point(aes(color = Species, shape = Species))+
scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))
#shuffle iris data
iris=iris[sample(1:nrow(iris),nrow(iris)),]
head(iris)
before_suffling
#assign training data and test data
nrow(iris)
train_index=1:round( nrow(iris) *0.7)
train_data=iris[train_index,1:4]
nrow(train_data)
train_label=as.character(iris[train_index,5])
test_data=iris[-train_index,1:4]
nrow(test_data)
test_label=as.character(iris[-train_index,5])
#scale data
#train_data=scale(train_data)
#test_data=scale(test_data)
#find the best k using cross validation
caret_fit <- train(train_data, train_label, method = "knn", trControl = trainControl(method="cv",number = 10))
#plot accuracy and k
plot(caret_fit)
caret_fit$bestTune
#predict labels
prediction<- predict(caret_fit, newdata = test_data)
prediction
#check the prediction result
summary(prediction)
#tabularize prediction vs observed value
tb <- table(prediction,test_label)
tb
#calculate accuracy
accuracy <- function(x){sum(diag(x)/(sum(rowSums(x)))) * 100}
accuracy(tb)
library(cluster)
library(ggplot2)
#load iris data
data(iris)
head(iris)
#choose k
library(factoextra)
fviz.p <-fviz_nbclust(x = data,FUNcluster = kmeans, method = 'wss' )
#retrieve variables
data=as.data.frame(iris[,1:4])
fviz.p <-fviz_nbclust(x = data,FUNcluster = kmeans, method = 'wss' )
fviz.p #save ggplot output for methods below
?kmeans
#train kmeans
fit <- kmeans(data, centers= 3, nstart=100, algorithm ="Lloyd", iter.max=100)
fit$cluster
#train kmeans
fit <- kmeans(data, centers= 3, nstart=50, algorithm ="Lloyd", iter.max=100)
fit$cluster
#visualize clusters
fviz_cluster(fit, data = data, palette = c("#00AFBB","#2E9FDF", "#E7B800", "#FC4E07"), ggtheme = theme_minimal())
data[[1]]
#1 kneepointDetection function
BiocManager::install("SamSPECTRAL")
library(SamSPECTRAL)
#1 kneepointDetection function
BiocManager::install("SamSPECTRAL")
#1 kneepointDetection function
BiocManager::install("SamSPECTRAL")
#1 kneepointDetection function
BiocManager::install("SamSPECTRAL")
BiocManager::install("SamSPECTRAL")
library(rpart)
library(partykit)
library(rpart.plot)
#load titanic data
data(ptitanic)
View(ptitanic)
#shuffle titanic data
titanic=ptitanic[sample(1:nrow(ptitanic),nrow(ptitanic)),]
#split training data and test data
train_index=1:round(nrow(ptitanic)*0.7)
train_data=ptitanic[train_index,]
test_data=ptitanic[-train_index,]
#train a decision tree
survived <- rpart(survived ~ ., data = train_data)
#visualize the decision tree
new_tree <- as.party(survived)
plot(new_tree)
#check the splitting rules
rpart.rules(survived, cover = TRUE)
?rpart.rules
# print Complexity parameter of the current tree
printcp(survived) # result of rpart
# prune the decision tree based on the best cp
survived_pruned<- prune(survived, cp= survived$cptable[which.min(survived$cptable[,"xerror"]),"CP"])
#visualize the decision tree
new_tree <- as.party(survived_pruned)
plot(new_tree)
#prediction
prediction_pruned <-predict(survived_pruned, test_data, type = 'class')
?predict
#tabulate prediction vs observed labels
table_mat_pruned <- table(test_data$survived, prediction_pruned)
#tabularize prediction vs observed labels
table_mat_pruned <- table(test_data$survived, prediction_pruned)
table_mat_pruned
#calculate accuracy
accuracy <- function(x){sum(diag(x)/(sum(rowSums(x)))) * 100}
accuracy(table_mat_pruned)
getwd()
library(class)
library(caret)
set.seed(123)
#load iris data
data(iris)
before_suffling=head(iris)
View(iris)
table(iris$Species)
library(ggplot2)
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width))+
geom_point(aes(color = Species, shape = Species))+
scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))
ggplot(iris, aes(x = Petal.Length, y = Petal.Width))+
geom_point(aes(color = Species, shape = Species))+
scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))
#shuffle iris data
iris=iris[sample(1:nrow(iris),nrow(iris)),]
head(iris)
before_suffling
#assign training data and test data
nrow(iris)
train_index=1:round( nrow(iris) *0.7)
train_data=iris[train_index,1:4]
nrow(train_data)
train_label=as.character(iris[train_index,5])
test_data=iris[-train_index,1:4]
nrow(test_data)
test_label=as.character(iris[-train_index,5])
#find the best k using cross validation
caret_fit <- train(train_data, train_label, method = "knn", trControl = trainControl(method="cv",number = 10))
#plot accuracy and k
plot(caret_fit)
caret_fit$bestTune
#predict labels
prediction<- predict(caret_fit, newdata = test_data)
prediction
#check the prediction result
summary(prediction)
#tabularize prediction vs observed value
tb <- table(prediction,test_label)
tb
#calculate accuracy
accuracy <- function(x){sum(diag(x)/(sum(rowSums(x)))) * 100}
accuracy(tb)
library(cluster)
library(ggplot2)
#load iris data
data(iris)
head(iris)
#retrieve variables
data=as.data.frame(iris[,1:4])
#choose k
library(factoextra)
fviz.p <-fviz_nbclust(x = data,FUNcluster = kmeans, method = 'wss' )
fviz.p #save ggplot output for methods below
#train kmeans
fit <- kmeans(data, centers= 3, nstart=50, algorithm ="Lloyd", iter.max=100)
fit$cluster
#visualize clusters
fviz_cluster(fit, data = data, palette = c("#00AFBB","#2E9FDF", "#E7B800", "#FC4E07"), ggtheme = theme_minimal())
library(rpart)
library(partykit)
library(rpart.plot)
#load titanic data
data(ptitanic)
View(ptitanic)
#shuffle titanic data
titanic=ptitanic[sample(1:nrow(ptitanic),nrow(ptitanic)),]
#split training data and test data
train_index=1:round(nrow(ptitanic)*0.7)
train_data=ptitanic[train_index,]
test_data=ptitanic[-train_index,]
#train a decision tree
survived <- rpart(survived ~ ., data = train_data)
#visualize the decision tree
new_tree <- as.party(survived)
plot(new_tree)
#check the splitting rules
rpart.rules(survived, cover = TRUE)
# print Complexity parameter of the current tree
printcp(survived) # result of rpart
# prune the decision tree based on the best cp
survived_pruned<- prune(survived, cp= survived$cptable[which.min(survived$cptable[,"xerror"]),"CP"])
#visualize the decision tree
new_tree <- as.party(survived_pruned)
plot(new_tree)
#prediction
prediction_pruned <-predict(survived_pruned, test_data, type = 'class')
prediction_pruned
#tabularize prediction vs observed labels
table_mat_pruned <- table(test_data$survived, prediction_pruned)
table_mat_pruned
#calculate accuracy
accuracy <- function(x){sum(diag(x)/(sum(rowSums(x)))) * 100}
accuracy(table_mat_pruned)
