library(Seurat)
library(dplyr)
library(ggplot2)
#set all directory paths
basedir <- paste0("~/Dropbox (Gladstone)/GB-LZ-1266/results")
basedir <- paste0("~/Dropbox (Gladstone)/GB-LZ-1266/results")
indir <- paste0(basedir,
"/03_cellranger_count_custom_reference_genome_mRatBN7_chr2_yfp_prv/")
path_postfix_filtered_matrix <- "/filtered_feature_bc_matrix"
#get the list of all samples
samples_list <- list.dirs(indir,full.names=FALSE,recursive=FALSE)
#create a data frame to record cell count before and after filtering per sample
cells_per_sample <- data.frame(sample=character(),
filter_stage=character(),
number_of_cells=numeric(),
nFeature_RNA_min_cutoff=numeric(),
nFeature_RNA_max_cutoff=numeric(),
percent.mt_cutoff=numeric(),
stringsAsFactors = FALSE)
samples_list
i=1
obj.name <- samples_list[i]
datadir <- paste0(indir,samples_list[i],path_postfix_filtered_matrix)
obj.data <- Read10X(data.dir = datadir)
this.obj <- CreateSeuratObject(counts = obj.data,
# Don't accept data with fewer than 3 cells
#min.cells=3,
# Don't accept data with fewer than 200 genes
#min.features=200
)
this.obj
x <- as.data.frame(this.obj@assays$RNA@data)
dim(x)
x <- x[c("UL15","UL18","UL19","UL20","UL25"),]
View(x)
rowSums(x)
x <- as.data.frame(this.obj@assays$RNA@data)
x <- x[c("UL15","UL18","UL19","UL20","UL25","mRFP1","YFP","ChR2(H134R)"),]
rowSums(x)
#read in data for each sample and perform QC
for(i in 1:length(samples_list)){
obj.name <- samples_list[i]
datadir <- paste0(indir,samples_list[i],path_postfix_filtered_matrix)
obj.data <- Read10X(data.dir = datadir)
this.obj <- CreateSeuratObject(counts = obj.data,
# Don't accept data with fewer than 3 cells
#min.cells=3,
# Don't accept data with fewer than 200 genes
#min.features=200
)
x <- as.data.frame(this.obj@assays$RNA@data)
x <- x[c("UL15","UL18","UL19","UL20","UL25"),]
print(rowSums(x))
}
samples_list <- list.dirs(indir,full.names=FALSE,recursive=FALSE)
#create a data frame to record cell count before and after filtering per sample
cells_per_sample <- data.frame(sample=character(),
filter_stage=character(),
number_of_cells=numeric(),
nFeature_RNA_min_cutoff=numeric(),
nFeature_RNA_max_cutoff=numeric(),
percent.mt_cutoff=numeric(),
stringsAsFactors = FALSE)
#read in data for each sample and perform QC
for(i in 1:length(samples_list)){
obj.name <- samples_list[i]
datadir <- paste0(indir,samples_list[i],path_postfix_filtered_matrix)
obj.data <- Read10X(data.dir = datadir)
this.obj <- CreateSeuratObject(counts = obj.data,
# Don't accept data with fewer than 3 cells
#min.cells=3,
# Don't accept data with fewer than 200 genes
#min.features=200
)
x <- as.data.frame(this.obj@assays$RNA@data)
x <- x[c("UL15","UL18","UL19","UL20","UL25","mRFP1","YFP","ChR2(H134R)"),]
print(rowSums(x))
}
library(Seurat)
library(dplyr)
library(ggplot2)
#set all directory paths
basedir <- paste0("~/Dropbox (Gladstone)/GB-LZ-1266/results")
indir <- paste0(basedir,
"/03_cellranger_count_custom_reference_genome_",
"mRatBN7_chr2_yfp_prv/")
path_postfix_filtered_matrix <- "/filtered_feature_bc_matrix"
#get the list of all samples
samples_list <- list.dirs(indir,full.names=FALSE,recursive=FALSE)
samples_list
#read in data for each sample and perform QC
for(i in 1:length(samples_list)){
obj.name <- samples_list[i]
datadir <- paste0(indir,samples_list[i],path_postfix_filtered_matrix)
obj.data <- Read10X(data.dir = datadir)
this.obj <- CreateSeuratObject(counts = obj.data,
# Don't accept data with fewer than 3 cells
#min.cells=3,
# Don't accept data with fewer than 200 genes
#min.features=200
)
x <- as.data.frame(this.obj@assays$RNA@data)
x <- x[c("UL15","UL18","UL19","UL20","UL25","mRFP1","YFP","ChR2(H134R)"),]
print(paste0("Sample ",samples_list[i], " :"))
print(rowSums(x))
}
library(Seurat)
library(dplyr)
library(ggplot2)
#set all directory paths
basedir <- paste0("~/Dropbox (Gladstone)/GB-LZ-1266/results")
indir <- paste0(basedir,
"/03_cellranger_count_custom_reference_genome_",
"mRatBN7_chr2_yfp_prv/")
path_postfix_filtered_matrix <- "raw_feature_bc_matrix" #"/filtered_feature_bc_matrix"
#get the list of all samples
samples_list <- list.dirs(indir,full.names=FALSE,recursive=FALSE)
samples_list
#read in data for each sample and perform QC
for(i in 1:length(samples_list)){
obj.name <- samples_list[i]
datadir <- paste0(indir,samples_list[i],path_postfix_filtered_matrix)
obj.data <- Read10X(data.dir = datadir)
this.obj <- CreateSeuratObject(counts = obj.data,
# Don't accept data with fewer than 3 cells
#min.cells=3,
# Don't accept data with fewer than 200 genes
#min.features=200
)
x <- as.data.frame(this.obj@assays$RNA@data)
x <- x[c("UL15","UL18","UL19","UL20","UL25","mRFP1","YFP","ChR2(H134R)"),]
print(paste0("Sample ",samples_list[i], " :"))
print(rowSums(x))
}
path_postfix_filtered_matrix <- "/raw_feature_bc_matrix" #"/filtered_feature_bc_matrix"
#read in data for each sample and perform QC
for(i in 1:length(samples_list)){
obj.name <- samples_list[i]
datadir <- paste0(indir,samples_list[i],path_postfix_filtered_matrix)
obj.data <- Read10X(data.dir = datadir)
this.obj <- CreateSeuratObject(counts = obj.data,
# Don't accept data with fewer than 3 cells
#min.cells=3,
# Don't accept data with fewer than 200 genes
#min.features=200
)
x <- as.data.frame(this.obj@assays$RNA@data)
x <- x[c("UL15","UL18","UL19","UL20","UL25","mRFP1","YFP","ChR2(H134R)"),]
print(paste0("Sample ",samples_list[i], " :"))
print(rowSums(x))
}
library(rGREAT)
library(dplyr)
library(ggplot2)
#list all comparison folders
list_comparisons <- list.dirs(full.names = FALSE, recursive = FALSE)
list_comparisons <- list_comparisons[!grepl("scripts", list_comparisons)]
version(rGreat)
sessionInfo()
BiocManager::install("rGREAT")
library(rGREAT)
library(dplyr)
library(ggplot2)
#list all comparison folders
list_comparisons <- list.dirs(full.names = FALSE, recursive = FALSE)
list_comparisons <- list_comparisons[!grepl("scripts", list_comparisons)]
sessionInfo()
c <- seq(1,15)
c
c[!(c %in% c(1,3,5))]
clusters_for_de <- c(1,3,5)
c[!(c %in% clusters_for_de)]
library(Seurat)
x <- readRDS("~/Dropbox (Gladstone)/YH_NK01-Results/11_seurat_analysis_filter_percent_mt_noGFAPCre/06_analysis_of_cluster_of_interest/sub_cluster_astrocytes_cluster_12/astrocyte_data_post_cluster_noGFAPCre_noS2_pcadims_15_res_0.9.rds")
colnames(x[[]])
unique(x$seurat_clusters)
all_clusters <- unique(x$seurat_clusters)
all_clusters[!(all)]
all_clusters[!(all_clusters %in% clusters_for_de)]
as.numeric(all_clusters[!(all_clusters %in% clusters_for_de)])
as.character(all_clusters[!(all_clusters %in% clusters_for_de)])
as.numeric(as.character(all_clusters[!(all_clusters %in% clusters_for_de)]))
library(Seurat)
library(dplyr)
library(ggplot2)
library(grid)
library(gridExtra)
library(cowplot)
#set all directory paths
basedir <- paste0("~/Dropbox (Gladstone)/YH_NK01-Results/",
"11_seurat_analysis_filter_percent_mt_noGFAPCre/")
indir <- paste0(basedir,
"04_clustering_noS2/")
outdir <- paste0("~/Dropbox (Gladstone)/YH_NK01-Results/",
"12_paper_figures_noGFAP/")
#load the Seurat object
pca_dim <- 15
cluster_res <- 0.7
dat <- readRDS(paste0(indir,
"sct_data_noGFAPCre_noS2_post_cluster_pcadims_",
pca_dim,
"_res_",
cluster_res,
".rds"))
##the number of cells in each mouse in each cluster
all_metadata <- dat[[]]
#make a table of the required data
avg_qc_features_per_cluster <- all_metadata %>% group_by(seurat_clusters) %>%
summarise(total_cells = n(),
avg_ngene = mean(nFeature_RNA),
sd_ngene = sd(nFeature_RNA),
se_ngene = sd(nFeature_RNA) / sqrt(length(nFeature_RNA)),
avg_nUMI = mean(nCount_RNA),
sd_nUMI = sd(nCount_RNA),
se_nUMI = sd(nCount_RNA) / sqrt(length(nCount_RNA)),
avg_mt =  mean(percent.mt),
sd_mt =  sd(percent.mt),
se_mt = sd(percent.mt) / sqrt(length(percent.mt))) %>%
as.data.frame()
##########
#fig a
##########
# Create plot with legend
ggp1_legend <- ggplot(avg_qc_features_per_cluster,
aes(x=seurat_clusters, y=total_cells,
fill=seurat_clusters)) +
geom_bar(position=position_dodge(), stat="identity") +
guides(fill=guide_legend(ncol=4)) +
scale_fill_discrete(name = "Cluster Identity",
labels = c("1 - Oligodendrocyte",
"2 - Oligodendrocyte",
"3 - In Neuron",
"4 - Ex Neuron",
"5 - Ex Neuron",
"6 - Ex Neuron",
"7 - Ex Neuron",
"8 - In Neuron",
"9 - Ex Neuron",
"10 - Ex Neuron",
"11 - In Neuron",
"12 - Astrocyte",
"13 - In Neuron",
"14 - Microglia",
"15 - Oligodendrocyte",
"16 - OPC",
"17 - Ex Neuron",
"18 - Ex Neuron",
"19 - Ex Neuron",
"20 - Ex Neuron",
"21 - Ex Neuron",
"22 - Ex Neuron",
"23 - Ex Neuron",
"24 - In Neuron",
"25 - Microglia",
"26 - Ex Neuron",
"27 - In Neuron",
"28 - Ex Neuron",
"29 - Microglia",
"30 - Ex Neuron",
"31 - In Neuron",
"32 - Ex Neuron",
"33 - Unknown",
"34 - Unknown")) +
theme(text = element_text(size = 26))
# Create user-defined function, which extracts legends from ggplots
extract_legend <- function(my_ggp) {
step1 <- ggplot_gtable(ggplot_build(my_ggp))
step2 <- which(sapply(step1$grobs, function(x) x$name) == "guide-box")
step3 <- step1$grobs[[step2]]
return(step3)
}
# Apply user-defined function to extract legend
shared_legend <- extract_legend(ggp1_legend)
##########
#fig b
##########
p1 <- ggplot(avg_qc_features_per_cluster,
aes(x=seurat_clusters, y=total_cells, fill=seurat_clusters)) +
geom_bar(position=position_dodge(), stat="identity") +
scale_x_discrete(expand = c(0.04,0.04))+
theme_classic() + theme(
panel.border = element_rect(colour = "black", fill=NA, size=2),
text = element_text(size = 20),
legend.position = "none") +
ggtitle("Number of Cells per Cluster") +
xlab("Cell Cluster") +
ylab("Number of Cells")
##########
#fig c
##########
p2 <- ggplot(avg_qc_features_per_cluster,
aes(x=seurat_clusters, y=avg_ngene, fill=seurat_clusters)) +
geom_bar(stat="identity", position=position_dodge()) +
geom_errorbar(aes(ymin=avg_ngene-se_ngene, ymax=avg_ngene+se_ngene),
width=.2,
position=position_dodge(.9)) +
scale_x_discrete(expand = c(0.04,0.04))+
theme_classic() + theme(
panel.border = element_rect(colour = "black", fill=NA, size=2),
text = element_text(size = 20),
legend.position = "none") +
ggtitle("Average Genes per Cells by Cluster") +
xlab("Cell Cluster") +
ylab("Average Number of Genes")
##########
#fig d
##########
p3 <- ggplot(avg_qc_features_per_cluster,
aes(x=seurat_clusters, y=avg_nUMI, fill=seurat_clusters)) +
geom_bar(stat="identity", position=position_dodge()) +
geom_errorbar(aes(ymin=avg_nUMI-se_nUMI, ymax=avg_nUMI+se_nUMI),
width=.2,
position=position_dodge(.9)) +
scale_x_discrete(expand = c(0.04,0.04))+
theme_classic() + theme(
panel.border = element_rect(colour = "black", fill=NA, size=2),
text = element_text(size = 20),
legend.position = "none") +
ggtitle("Average nUMI per Cells by Cluster") +
xlab("Cell Cluster") +
ylab("Average nUMI")
##########
#fig e
##########
p4 <- ggplot(avg_qc_features_per_cluster,
aes(x=seurat_clusters, y=avg_mt, fill=seurat_clusters)) +
geom_bar(stat="identity", position=position_dodge()) +
geom_errorbar(aes(ymin=avg_mt-se_mt, ymax=avg_mt+se_mt),
width=.2,
position=position_dodge(.9)) +
scale_x_discrete(expand = c(0.04,0.04))+
theme_classic() + theme(
panel.border = element_rect(colour = "black", fill=NA, size=2),
text = element_text(size = 20),
legend.position = "none") +
ggtitle("Average % Mitochondrial Genes per Cells by Cluster") +
xlab("Cell Cluster") +
ylab("Average % Mito Genes")
# Draw plots with shared legend
#with standard error
pdf(paste0(outdir, "quality_control_supplemental_figure_with_SE_bars.pdf"),
width = 25, height = 26)
grid.arrange(shared_legend,
arrangeGrob(p1, p2, p3,p4, ncol = 2),
heights=c(2, 10))
dev.off()
library(org.Mm.eg.db)
AnnotationDbi::select(org.Mm.eg.db,
keys = "Syt1",
columns = c("ENTREZID", "SYMBOL", "GENENAME"),
keytype = "SYMBOL")
x <- c(1,2,3,4,5)
y <- 0
save.image("~/Downloads/tesrtR.RData")
rm(x)
x <- c(1,2,3,4,5)
x <- c(1,2,3,4,5,6)
load("~/Downloads/tesrtR.RData")
x <- c(1,2,3,4,5,6)
setwd("~/Downloads/Intro_to_R_materials/session2")
#Reading data file.
dat <- read.table("GSE60450_Lactation-GenewiseCounts.txt")
View(dat)
DAT[1,]
dat[1,]
#Information about samples is in another file.
phenotype_info <- read.table("targets.csv",
header = TRUE,
sep = ",")
View(phenotype_info)
dat <- read.table("GSE60450_Lactation-GenewiseCounts.txt")
#Let us examine how our data looks.
View(dat)
#Seems like all data points are there. Can we improve appearance?
#Examine the details of read.table command.
?read.table
#Looks like we can inform read.table about separator type and presence of header.
dat <- read.table("GSE60450_Lactation-GenewiseCounts.txt",
header= TRUE, sep = "\t")
#Let us examine how data looks now.
View(dat)
#What are the observations represented in our data?
#colnames gives the names of columns of data.
colnames(dat)
#To check the number of rows and columns in table.
dim(dat)
#To check first few rows of table.
head(dat)
#To check last few rows of table.
tail(dat)
#To check basic stats for each column.
summary(dat)
#Let us extract a column of data.
#For example, EntrezGeneID.
geneIds <- dat$EntrezGeneID
#Check what kind of variable geneIds is.
class(geneIds)
#geneIds should be string type.
dat$EntrezGeneID <- as.character(dat$EntrezGeneID)
#Check the class of gene ids again
class(dat$EntrezGeneID)
#Information about samples is in another file.
phenotype_info <- read.table("targets.csv",
header = TRUE,
sep = ",")
#Looks like we can inform read.table about separator type and presence of header.
dat <- read.table("GSE60450_Lactation-GenewiseCounts.txt",
header= TRUE, sep = "\t")
#What are the observations represented in our data?
#colnames gives the names of columns of data.
colnames(dat)
#Reading data file.
dat <- read.table("GSE60450_Lactation-GenewiseCounts.txt")
dat[1,]
