mirror of
https://github.com/haniffalab/scRNA-seq_analysis.git
synced 2024-10-23 08:29:24 -07:00
scRNA-seq_analysis
This commit is contained in:
commit
82cc2d191e
188 changed files with 146184 additions and 0 deletions
179
pipelines/09_gene_heatmap_and_spotplot/gene_heatmap_and_spotplot.R
Executable file
179
pipelines/09_gene_heatmap_and_spotplot/gene_heatmap_and_spotplot.R
Executable file
|
|
@ -0,0 +1,179 @@
|
|||
args = commandArgs(trailingOnly=T)
|
||||
args = paste(args, collapse = "")
|
||||
args = unlist(strsplit(args, ";"))
|
||||
if(length(args) != 8){
|
||||
stop('This pipeline requires 8 parameters: seurat.addr\n set.ident \n genes.to.plot (name of file containing genes to plot)\n cell.types (name of file containing cell types to plot or all)\n cluster.genes (boolean)\n diagonalize (boolean indicating to compute the order of the genes in such a way as to make the appearance of a diagonal on the spotplot - will override the cluster.genes boolean)\n plot.dims (4 tuple for plots dimenssion)\n save.gene.order (boolean indicate to save the genes in the order computed by clustering and/or diaganolization - can be NA or a file name)')
|
||||
}
|
||||
|
||||
arguments.list = "
|
||||
seurat.addr.arg = args[1]
|
||||
set.ident.arg = args[2]
|
||||
genes.to.plot.arg = args[3]
|
||||
cell.types.arg = args[4]
|
||||
cluster.genes.arg = args[5]
|
||||
diagonalize.arg = args[6]
|
||||
plot.dims.arg = args[7]
|
||||
save.gene.order.arg = args[8]
|
||||
"
|
||||
eval(parse(text = arguments.list))
|
||||
|
||||
arguments.list = unlist(strsplit(arguments.list, "\n"))
|
||||
arguments.list = arguments.list[!(arguments.list == "")]
|
||||
|
||||
for(n in 1:length(arguments.list)){
|
||||
argument = arguments.list[n]
|
||||
argument = gsub(pattern=" ", replacement="", x=argument)
|
||||
argument.name = unlist(strsplit(argument, "="))[1]
|
||||
variable.name = gsub(pattern=".arg", replacement="", argument.name)
|
||||
argument.content = eval(parse(text = argument.name))
|
||||
eval(parse(text = argument.content))
|
||||
if (!exists(variable.name)){
|
||||
stop(sprintf("Argument %s not passed. Stopping ... ", variable.name))
|
||||
}
|
||||
}
|
||||
|
||||
# create required folders for output and work material
|
||||
output_folder = paste("09_gene_expression_heatmap_and_spotplot", seurat.addr, sep = "_")
|
||||
c.time = Sys.time()
|
||||
c.time = gsub(pattern=" BST", replacement="", x=c.time)
|
||||
c.time = gsub(pattern=":", replacement="", x=c.time)
|
||||
c.time = gsub(pattern=" ", replacement="", x=c.time)
|
||||
c.time = gsub(pattern="-", replacement="", x=c.time)
|
||||
c.time = substr(x=c.time, start=3, stop=nchar(c.time))
|
||||
output_folder = paste(output_folder, c.time, sep = "_")
|
||||
output_folder = file.path("../../output", output_folder)
|
||||
dir.create(output_folder)
|
||||
|
||||
seurat.addr = file.path("../../data", seurat.addr)
|
||||
genes.to.plot = file.path("../../resources", genes.to.plot)
|
||||
|
||||
library(Seurat)
|
||||
library(dplyr)
|
||||
library(plyr)
|
||||
library(reshape)
|
||||
|
||||
#######################################################################################################
|
||||
|
||||
# load seurat object
|
||||
print("loading data ...")
|
||||
seurat.obj = readRDS(seurat.addr)
|
||||
print("Data loaded.")
|
||||
|
||||
# set identies
|
||||
seurat.obj = SetAllIdent(object=seurat.obj, id=set.ident)
|
||||
|
||||
# load genes
|
||||
genes.to.plot = file(genes.to.plot, "r")
|
||||
genes = readLines(genes.to.plot, warn=F)
|
||||
close(genes.to.plot)
|
||||
genes = unlist(strsplit(genes, "\n"))
|
||||
|
||||
# check that all genes are in the dataset
|
||||
if(!(all(genes %in% rownames(seurat.obj@data)))){
|
||||
not.found = genes[!(genes %in% rownames(seurat.obj@data))]
|
||||
print(sprintf("The following genes were not found in the data: %s", paste(not.found, collapse = ", ")))
|
||||
genes = genes[genes %in% rownames(seurat.obj@data)]
|
||||
}
|
||||
|
||||
# check for duplicates
|
||||
if(length(genes) != length(unique(genes))){
|
||||
duplicates = names(table(genes)[table(genes) > 1])
|
||||
duplicates = paste(duplicates, collapse = ", ")
|
||||
print(sprintf("Duplicates found: %s", duplicates))
|
||||
print("This will not affect the workflow, but be aware the heat map will have fewer genes than expected.")
|
||||
genes = unique(genes)
|
||||
}
|
||||
|
||||
# rearange expression matrix by the order in cell types
|
||||
if(cell.types != "all"){
|
||||
cell.types = file.path("../../resources", cell.types)
|
||||
cell_types_file = file(cell.types, "r")
|
||||
cell.types = readLines(cell_types_file, warn = F)
|
||||
close(cell_types_file)
|
||||
cell.types = unlist(strsplit(cell.types, ", "))
|
||||
print(cell.types)
|
||||
print("All cell types in data set:")
|
||||
print(table(cell.types %in% as.vector(unique(seurat.obj@ident))))
|
||||
}else{
|
||||
cell.types = sort(as.vector(unique(seurat.obj@ident)))
|
||||
}
|
||||
|
||||
# subset expression data matrix
|
||||
keep.cell.names = names(seurat.obj@ident)[seurat.obj@ident %in% cell.types]
|
||||
expression.data = data.matrix(seurat.obj@data[genes, keep.cell.names])
|
||||
|
||||
# create a data matrix with mean expression of each marker by cell type
|
||||
expression.data = t(expression.data)
|
||||
expression.data = as.data.frame(expression.data)
|
||||
expression.data = cbind(data.frame(CellLabels = as.vector(seurat.obj@ident[keep.cell.names])), expression.data)
|
||||
expression.data = aggregate(expression.data[2:dim(expression.data)[2]], list(expression.data$CellLabels), mean)
|
||||
expression.data = cbind(data.frame(CellType = expression.data$Group.1), expression.data[, 2:dim(expression.data)[2]])
|
||||
rownames(expression.data) = expression.data$CellType
|
||||
expression.data = expression.data[, 2:ncol(expression.data)]
|
||||
|
||||
# cluster the genes and reorder the expression matrix
|
||||
if (cluster.genes){
|
||||
expression.distance = dist(x=t(expression.data), method="euclidian")
|
||||
gene.order = hclust(d=expression.distance, method="ward.D")$order
|
||||
expression.data = expression.data[, gene.order]
|
||||
}
|
||||
|
||||
if (diagonalize){
|
||||
computer.vector.weight.center = function(vecn){
|
||||
indices = 1:length(vecn)
|
||||
sum(indices * vecn) / sum(vecn)
|
||||
}
|
||||
centers = apply(X=expression.data, MARGIN=2, FUN=computer.vector.weight.center)
|
||||
centers = order(centers)
|
||||
print(length(centers))
|
||||
expression.data = expression.data[, centers]
|
||||
}
|
||||
|
||||
# plot the heatmap
|
||||
expression.melt = reshape::melt(data=as.matrix(expression.data))
|
||||
colnames(expression.melt) = c("CellTypes", "Genes", "Values")
|
||||
expression.melt$CellTypes = factor(as.vector(expression.melt$CellTypes), levels = cell.types)
|
||||
heatmap.plot = ggplot(expression.melt, aes(factor(Genes, levels = colnames(expression.data)), factor(CellTypes, levels = rev(cell.types)))) + geom_tile(aes(fill = Values), color = "black")
|
||||
heatmap.plot = heatmap.plot + scale_fill_gradient(low = "lightblue", high = "darkred")
|
||||
heatmap.plot = heatmap.plot + theme(axis.title.x=element_blank(), axis.title.y=element_blank(),
|
||||
axis.text.x = element_text(angle = 45, hjust = 1))
|
||||
heatmap.plot = heatmap.plot + labs(fill='Expression')
|
||||
heatmap.fname = file.path(output_folder, "./heatmap.pdf")
|
||||
pdf(heatmap.fname, width = plot.dims[1], height = plot.dims[2])
|
||||
print(heatmap.plot)
|
||||
dev.off()
|
||||
|
||||
# plot diag matrix as dot plot (spot plot)
|
||||
expression.data.r = expression.data
|
||||
expression.data.r = expression.data.r[rev(cell.types), rev(colnames(expression.data.r))]
|
||||
expression.melt = reshape::melt(data=as.matrix(expression.data.r))
|
||||
colnames(expression.melt) = c("CellTypes", "Genes", "Values")
|
||||
expression.melt$X = rep(1:length(unique(expression.melt$Genes)), each=nrow(expression.data.r))
|
||||
expression.melt$Y = rep(length(unique(expression.melt$CellTypes)):1, times=ncol(expression.data.r))
|
||||
colnames(expression.melt) = c("CellTypes", "Genes", "Expression", "X", "Y" )
|
||||
|
||||
max.expression = floor(max(expression.melt$Expression)) + 1
|
||||
|
||||
spot.plot = ggplot(expression.melt, aes(x = Y, y = X)) +
|
||||
geom_point(aes(size = Expression, color = Expression)) +
|
||||
scale_color_gradient(limits = c(0, max.expression), breaks = seq(0,max.expression, by = 1), low = "lightsteelblue1", high = "darkred") +
|
||||
guides(color = guide_legend(), size = guide_legend()) +
|
||||
scale_size_continuous(limits=c(0, max.expression), breaks=seq(0, max.expression, by=1)) +
|
||||
scale_y_discrete(name ="", limits=colnames(expression.data.r)) +
|
||||
scale_x_discrete(name ="", limits=rev(rownames(expression.data.r))) +
|
||||
theme(axis.title.x=element_blank(), axis.title.y=element_blank(),
|
||||
axis.text.x = element_text(angle = 45, hjust = 1))
|
||||
|
||||
splotplot.fname = file.path(output_folder, "./spotplot.pdf")
|
||||
pdf(splotplot.fname, width = plot.dims[3], height = plot.dims[4])
|
||||
print(spot.plot)
|
||||
dev.off()
|
||||
|
||||
if(!is.na(save.gene.order)){
|
||||
save.gene.order = file.path("../../resources", save.gene.order)
|
||||
save.gene.order = file(save.gene.order, "w")
|
||||
writeLines(colnames(expression.data), save.gene.order)
|
||||
close(save.gene.order)
|
||||
}
|
||||
|
||||
print("Ended beautifully ... ")
|
||||
16
pipelines/09_gene_heatmap_and_spotplot/gene_heatmap_and_spotplot.sh
Executable file
16
pipelines/09_gene_heatmap_and_spotplot/gene_heatmap_and_spotplot.sh
Executable file
|
|
@ -0,0 +1,16 @@
|
|||
#!/bin/bash
|
||||
|
||||
#$ -cwd
|
||||
#$ -N gene_heatmap_and_spotplot
|
||||
#$ -V
|
||||
#$ -l h_rt=47:59:59
|
||||
#$ -l h_vmem=200G
|
||||
|
||||
if [ "$#" -ne 1 ]; then
|
||||
echo "Illegal number of parameters"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
Rscript gene_heatmap_and_spotplot.R $1
|
||||
|
||||
echo "End on `date`"
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
HSC/MPP
|
||||
Pre pro B cell
|
||||
pro-B cell
|
||||
pre-B cell
|
||||
B cell
|
||||
ILC progenitor
|
||||
Early lymphoid/T lymphocyte
|
||||
NK
|
||||
Neutrophil-myeloid progenitor
|
||||
Monocyte-DC precursor
|
||||
pDC precursor
|
||||
DC1
|
||||
DC2
|
||||
Monocyte
|
||||
Mono-Mac
|
||||
Mono-NK
|
||||
Kupffer Cell
|
||||
VCAM1+ EI macrophage
|
||||
EI macrophage
|
||||
MEMP
|
||||
Mast cell
|
||||
Megakaryocyte
|
||||
Early Erythroid
|
||||
Mid Erythroid
|
||||
Late Erythroid
|
||||
Endothelial cell
|
||||
Fibroblast
|
||||
Hepatocyte
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
PTPRC
|
||||
IL3RA
|
||||
CD7
|
||||
EPCAM
|
||||
FCGR3A
|
||||
CD4
|
||||
HLA-DRA
|
||||
MS4A1
|
||||
VCAM1
|
||||
CD38
|
||||
NCAM1
|
||||
CLEC9A
|
||||
CD14
|
||||
KIT
|
||||
ESAM
|
||||
CD3E
|
||||
CD8A
|
||||
CD1C
|
||||
CD34
|
||||
GYPA
|
||||
CD79B
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
HLA-DRA
|
||||
CD34
|
||||
SPINK2
|
||||
JCHAIN
|
||||
IGLL1
|
||||
CD79B
|
||||
TCL1A
|
||||
IGKC
|
||||
MS4A1
|
||||
LTB
|
||||
PTPRC
|
||||
CD3E
|
||||
CD7
|
||||
IL32
|
||||
CD8A
|
||||
NKG7
|
||||
XCL2
|
||||
NCAM1
|
||||
MPO
|
||||
LYZ
|
||||
PLAC8
|
||||
IL3RA
|
||||
CLEC9A
|
||||
CD1C
|
||||
S100A9
|
||||
CCL4
|
||||
CD14
|
||||
FCGR3A
|
||||
CD4
|
||||
C1QA
|
||||
VCAM1
|
||||
GYPA
|
||||
SERPINB1
|
||||
TPSAB1
|
||||
KIT
|
||||
PF4
|
||||
ESAM
|
||||
UBE2C
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
HLA-DRA
|
||||
CD34
|
||||
SPINK2
|
||||
JCHAIN
|
||||
IGLL1
|
||||
CD79B
|
||||
TCL1A
|
||||
IGKC
|
||||
MS4A1
|
||||
CD19
|
||||
LTB
|
||||
KLRB1
|
||||
PTPRC
|
||||
CD3E
|
||||
CD7
|
||||
IL32
|
||||
CD8A
|
||||
KLRD1
|
||||
NKG7
|
||||
XCL2
|
||||
NCAM1
|
||||
MPO
|
||||
LYZ
|
||||
PLAC8
|
||||
IL3RA
|
||||
CLEC9A
|
||||
CD1C
|
||||
S100A9
|
||||
CCL4
|
||||
CD14
|
||||
FCGR3A
|
||||
CD4
|
||||
C1QA
|
||||
VCAM1
|
||||
GYPA
|
||||
SERPINB1
|
||||
TPSAB1
|
||||
KIT
|
||||
PF4
|
||||
ITGA2B
|
||||
UBE2C
|
||||
GATA1
|
||||
KLF1
|
||||
ALAS2
|
||||
HBA1
|
||||
ESAM
|
||||
ECM1
|
||||
APOA1
|
||||
|
|
@ -0,0 +1 @@
|
|||
HSC
pro B cell early
pro B cell
pre B cell
B cell
ILC progenitor
NK Progenitor
NK
Neut-myeloid progenitor
Monocyte-DC progenitor
pDC progenitor
DC1
DC2
Monocyte
Mono-Mac
Mono-4 like
Kupffer Cell
VCAM1+ Erythroid Macrophage
Erythroid Macrophage
MEP
Mast cell
Megakaryocyte
Early Erythroid
Mid Erythroid
Late Erythroid
Endothelial cell
Fibroblast
Hepatocyte
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
VCAM1
|
||||
FCGR3A
|
||||
CD14
|
||||
GYPA
|
||||
CD1C
|
||||
LYZ
|
||||
NKG7
|
||||
CD3D
|
||||
CTSW
|
||||
ESAM
|
||||
CD34
|
||||
MYC
|
||||
GATA2
|
||||
CLEC9A
|
||||
IL3RA
|
||||
SPIB
|
||||
IRF8
|
||||
TPSAB1
|
||||
CPA3
|
||||
PF4
|
||||
ITGA2B
|
||||
MKI67
|
||||
MS4A1
|
||||
CD79B
|
||||
EBF1
|
||||
DNTT
|
||||
SPINK2
|
||||
IGLL1
|
||||
CD7
|
||||
XCL2
|
||||
IFNG
|
||||
RORC
|
||||
MPO
|
||||
GATA1
|
||||
KLF1
|
||||
APOA1
|
||||
AHSG
|
||||
IGKC
|
||||
IGLC2
|
||||
IGLC3
|
||||
HLA-DQB1
|
||||
HLA-DPB1
|
||||
HLA-DPA1
|
||||
HLA-DRA
|
||||
CNRIP1
|
||||
DNASE1L3
|
||||
AHSP
|
||||
HBM
|
||||
HBZ
|
||||
HBA1
|
||||
HBA2
|
||||
HBG1
|
||||
APOA2
|
||||
ALB
|
||||
C1QTNF4
|
||||
IL7R
|
||||
LTB
|
||||
CD52
|
||||
C1QC
|
||||
C1QA
|
||||
C1QB
|
||||
TPSB2
|
||||
HBD
|
||||
PPBP
|
||||
UBE2C
|
||||
PRSS57
|
||||
SERPINB1
|
||||
KLRB1
|
||||
CCL4
|
||||
CCL3
|
||||
HLA-DRB1
|
||||
S100A9
|
||||
S100A8
|
||||
LGALS1
|
||||
AZU1
|
||||
PRTN3
|
||||
GZMA
|
||||
IL32
|
||||
JCHAIN
|
||||
PLAC8
|
||||
IGHM
|
||||
TCL1A
|
||||
VPREB3
|
||||
HBB
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
SPINK2
|
||||
CD34
|
||||
C1QTNF4
|
||||
IGLL1
|
||||
EBF1
|
||||
DNTT
|
||||
LTB
|
||||
CD52
|
||||
CD79B
|
||||
VPREB3
|
||||
IGHM
|
||||
JCHAIN
|
||||
IGLC2
|
||||
TCL1A
|
||||
SPIB
|
||||
IGKC
|
||||
MS4A1
|
||||
IGLC3
|
||||
RORC
|
||||
IL7R
|
||||
KLRB1
|
||||
CD3D
|
||||
IL32
|
||||
CD7
|
||||
CTSW
|
||||
GZMA
|
||||
XCL2
|
||||
IFNG
|
||||
CCL4
|
||||
CCL3
|
||||
NKG7
|
||||
PRSS57
|
||||
SERPINB1
|
||||
APOA2
|
||||
ALB
|
||||
MPO
|
||||
AZU1
|
||||
PRTN3
|
||||
APOA1
|
||||
AHSG
|
||||
HLA-DPB1
|
||||
HLA-DPA1
|
||||
HLA-DRA
|
||||
HLA-DRB1
|
||||
CD1C
|
||||
PLAC8
|
||||
IRF8
|
||||
IL3RA
|
||||
DNASE1L3
|
||||
CLEC9A
|
||||
HLA-DQB1
|
||||
LGALS1
|
||||
LYZ
|
||||
S100A9
|
||||
S100A8
|
||||
C1QC
|
||||
C1QA
|
||||
C1QB
|
||||
HBA1
|
||||
HBA2
|
||||
HBG1
|
||||
HBB
|
||||
AHSP
|
||||
HBM
|
||||
FCGR3A
|
||||
CD14
|
||||
VCAM1
|
||||
GYPA
|
||||
HBZ
|
||||
KLF1
|
||||
MYC
|
||||
CPA3
|
||||
TPSAB1
|
||||
TPSB2
|
||||
GATA2
|
||||
CNRIP1
|
||||
ITGA2B
|
||||
PPBP
|
||||
PF4
|
||||
HBD
|
||||
ESAM
|
||||
GATA1
|
||||
MKI67
|
||||
UBE2C
|
||||
|
|
@ -0,0 +1 @@
|
|||
HSC
pro B cell early
pro B cell
pre B cell
B cell
ILC progenitor
NK Progenitor
NK
NK - proliferating
Neut-myeloid progenitor
Monocyte-DC progenitor
pDC progenitor
DC1
DC2
Monocyte
Mono-Mac
Mono-4 like
Kupffer Cell
VCAM1+ Erythroid Macrophage
Erythroid Macrophage
MEP
Mast cell
Megakaryocyte
Megakaryocyte - proliferating
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
HLA-DRA
|
||||
CD34
|
||||
SPINK2
|
||||
JCHAIN
|
||||
IGLL1
|
||||
CD79B
|
||||
TCL1A
|
||||
IGKC
|
||||
MS4A1
|
||||
CD19
|
||||
LTB
|
||||
KLRB1
|
||||
PTPRC
|
||||
CD3E
|
||||
CD7
|
||||
IL32
|
||||
CD8A
|
||||
KLRD1
|
||||
NKG7
|
||||
XCL2
|
||||
NCAM1
|
||||
MPO
|
||||
LYZ
|
||||
PLAC8
|
||||
IL3RA
|
||||
CLEC9A
|
||||
CD1C
|
||||
S100A9
|
||||
CCL4
|
||||
CD14
|
||||
FCGR3A
|
||||
CD4
|
||||
C1QA
|
||||
VCAM1
|
||||
GYPA
|
||||
SERPINB1
|
||||
TPSAB1
|
||||
KIT
|
||||
PF4
|
||||
ITGA2B
|
||||
UBE2C
|
||||
GATA1
|
||||
KLF1
|
||||
ALAS2
|
||||
HBA1
|
||||
ESAM
|
||||
ECM1
|
||||
APOA1
|
||||
Loading…
Add table
Add a link
Reference in a new issue