scRNA-seq_analysis

This commit is contained in:
veghp 2019-07-08 12:22:01 +01:00
commit 82cc2d191e
188 changed files with 146184 additions and 0 deletions

View file

@ -0,0 +1,179 @@
args = commandArgs(trailingOnly=T)
args = paste(args, collapse = "")
args = unlist(strsplit(args, ";"))
if(length(args) != 8){
stop('This pipeline requires 8 parameters: seurat.addr\n set.ident \n genes.to.plot (name of file containing genes to plot)\n cell.types (name of file containing cell types to plot or all)\n cluster.genes (boolean)\n diagonalize (boolean indicating to compute the order of the genes in such a way as to make the appearance of a diagonal on the spotplot - will override the cluster.genes boolean)\n plot.dims (4 tuple for plots dimenssion)\n save.gene.order (boolean indicate to save the genes in the order computed by clustering and/or diaganolization - can be NA or a file name)')
}
arguments.list = "
seurat.addr.arg = args[1]
set.ident.arg = args[2]
genes.to.plot.arg = args[3]
cell.types.arg = args[4]
cluster.genes.arg = args[5]
diagonalize.arg = args[6]
plot.dims.arg = args[7]
save.gene.order.arg = args[8]
"
eval(parse(text = arguments.list))
arguments.list = unlist(strsplit(arguments.list, "\n"))
arguments.list = arguments.list[!(arguments.list == "")]
for(n in 1:length(arguments.list)){
argument = arguments.list[n]
argument = gsub(pattern=" ", replacement="", x=argument)
argument.name = unlist(strsplit(argument, "="))[1]
variable.name = gsub(pattern=".arg", replacement="", argument.name)
argument.content = eval(parse(text = argument.name))
eval(parse(text = argument.content))
if (!exists(variable.name)){
stop(sprintf("Argument %s not passed. Stopping ... ", variable.name))
}
}
# create required folders for output and work material
output_folder = paste("09_gene_expression_heatmap_and_spotplot", seurat.addr, sep = "_")
c.time = Sys.time()
c.time = gsub(pattern=" BST", replacement="", x=c.time)
c.time = gsub(pattern=":", replacement="", x=c.time)
c.time = gsub(pattern=" ", replacement="", x=c.time)
c.time = gsub(pattern="-", replacement="", x=c.time)
c.time = substr(x=c.time, start=3, stop=nchar(c.time))
output_folder = paste(output_folder, c.time, sep = "_")
output_folder = file.path("../../output", output_folder)
dir.create(output_folder)
seurat.addr = file.path("../../data", seurat.addr)
genes.to.plot = file.path("../../resources", genes.to.plot)
library(Seurat)
library(dplyr)
library(plyr)
library(reshape)
#######################################################################################################
# load seurat object
print("loading data ...")
seurat.obj = readRDS(seurat.addr)
print("Data loaded.")
# set identies
seurat.obj = SetAllIdent(object=seurat.obj, id=set.ident)
# load genes
genes.to.plot = file(genes.to.plot, "r")
genes = readLines(genes.to.plot, warn=F)
close(genes.to.plot)
genes = unlist(strsplit(genes, "\n"))
# check that all genes are in the dataset
if(!(all(genes %in% rownames(seurat.obj@data)))){
not.found = genes[!(genes %in% rownames(seurat.obj@data))]
print(sprintf("The following genes were not found in the data: %s", paste(not.found, collapse = ", ")))
genes = genes[genes %in% rownames(seurat.obj@data)]
}
# check for duplicates
if(length(genes) != length(unique(genes))){
duplicates = names(table(genes)[table(genes) > 1])
duplicates = paste(duplicates, collapse = ", ")
print(sprintf("Duplicates found: %s", duplicates))
print("This will not affect the workflow, but be aware the heat map will have fewer genes than expected.")
genes = unique(genes)
}
# rearange expression matrix by the order in cell types
if(cell.types != "all"){
cell.types = file.path("../../resources", cell.types)
cell_types_file = file(cell.types, "r")
cell.types = readLines(cell_types_file, warn = F)
close(cell_types_file)
cell.types = unlist(strsplit(cell.types, ", "))
print(cell.types)
print("All cell types in data set:")
print(table(cell.types %in% as.vector(unique(seurat.obj@ident))))
}else{
cell.types = sort(as.vector(unique(seurat.obj@ident)))
}
# subset expression data matrix
keep.cell.names = names(seurat.obj@ident)[seurat.obj@ident %in% cell.types]
expression.data = data.matrix(seurat.obj@data[genes, keep.cell.names])
# create a data matrix with mean expression of each marker by cell type
expression.data = t(expression.data)
expression.data = as.data.frame(expression.data)
expression.data = cbind(data.frame(CellLabels = as.vector(seurat.obj@ident[keep.cell.names])), expression.data)
expression.data = aggregate(expression.data[2:dim(expression.data)[2]], list(expression.data$CellLabels), mean)
expression.data = cbind(data.frame(CellType = expression.data$Group.1), expression.data[, 2:dim(expression.data)[2]])
rownames(expression.data) = expression.data$CellType
expression.data = expression.data[, 2:ncol(expression.data)]
# cluster the genes and reorder the expression matrix
if (cluster.genes){
expression.distance = dist(x=t(expression.data), method="euclidian")
gene.order = hclust(d=expression.distance, method="ward.D")$order
expression.data = expression.data[, gene.order]
}
if (diagonalize){
computer.vector.weight.center = function(vecn){
indices = 1:length(vecn)
sum(indices * vecn) / sum(vecn)
}
centers = apply(X=expression.data, MARGIN=2, FUN=computer.vector.weight.center)
centers = order(centers)
print(length(centers))
expression.data = expression.data[, centers]
}
# plot the heatmap
expression.melt = reshape::melt(data=as.matrix(expression.data))
colnames(expression.melt) = c("CellTypes", "Genes", "Values")
expression.melt$CellTypes = factor(as.vector(expression.melt$CellTypes), levels = cell.types)
heatmap.plot = ggplot(expression.melt, aes(factor(Genes, levels = colnames(expression.data)), factor(CellTypes, levels = rev(cell.types)))) + geom_tile(aes(fill = Values), color = "black")
heatmap.plot = heatmap.plot + scale_fill_gradient(low = "lightblue", high = "darkred")
heatmap.plot = heatmap.plot + theme(axis.title.x=element_blank(), axis.title.y=element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
heatmap.plot = heatmap.plot + labs(fill='Expression')
heatmap.fname = file.path(output_folder, "./heatmap.pdf")
pdf(heatmap.fname, width = plot.dims[1], height = plot.dims[2])
print(heatmap.plot)
dev.off()
# plot diag matrix as dot plot (spot plot)
expression.data.r = expression.data
expression.data.r = expression.data.r[rev(cell.types), rev(colnames(expression.data.r))]
expression.melt = reshape::melt(data=as.matrix(expression.data.r))
colnames(expression.melt) = c("CellTypes", "Genes", "Values")
expression.melt$X = rep(1:length(unique(expression.melt$Genes)), each=nrow(expression.data.r))
expression.melt$Y = rep(length(unique(expression.melt$CellTypes)):1, times=ncol(expression.data.r))
colnames(expression.melt) = c("CellTypes", "Genes", "Expression", "X", "Y" )
max.expression = floor(max(expression.melt$Expression)) + 1
spot.plot = ggplot(expression.melt, aes(x = Y, y = X)) +
geom_point(aes(size = Expression, color = Expression)) +
scale_color_gradient(limits = c(0, max.expression), breaks = seq(0,max.expression, by = 1), low = "lightsteelblue1", high = "darkred") +
guides(color = guide_legend(), size = guide_legend()) +
scale_size_continuous(limits=c(0, max.expression), breaks=seq(0, max.expression, by=1)) +
scale_y_discrete(name ="", limits=colnames(expression.data.r)) +
scale_x_discrete(name ="", limits=rev(rownames(expression.data.r))) +
theme(axis.title.x=element_blank(), axis.title.y=element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
splotplot.fname = file.path(output_folder, "./spotplot.pdf")
pdf(splotplot.fname, width = plot.dims[3], height = plot.dims[4])
print(spot.plot)
dev.off()
if(!is.na(save.gene.order)){
save.gene.order = file.path("../../resources", save.gene.order)
save.gene.order = file(save.gene.order, "w")
writeLines(colnames(expression.data), save.gene.order)
close(save.gene.order)
}
print("Ended beautifully ... ")

View file

@ -0,0 +1,16 @@
#!/bin/bash
#$ -cwd
#$ -N gene_heatmap_and_spotplot
#$ -V
#$ -l h_rt=47:59:59
#$ -l h_vmem=200G
if [ "$#" -ne 1 ]; then
echo "Illegal number of parameters"
exit 1
fi
Rscript gene_heatmap_and_spotplot.R $1
echo "End on `date`"

View file

@ -0,0 +1,28 @@
HSC/MPP
Pre pro B cell
pro-B cell
pre-B cell
B cell
ILC progenitor
Early lymphoid/T lymphocyte
NK
Neutrophil-myeloid progenitor
Monocyte-DC precursor
pDC precursor
DC1
DC2
Monocyte
Mono-Mac
Mono-NK
Kupffer Cell
VCAM1+ EI macrophage
EI macrophage
MEMP
Mast cell
Megakaryocyte
Early Erythroid
Mid Erythroid
Late Erythroid
Endothelial cell
Fibroblast
Hepatocyte

View file

@ -0,0 +1,21 @@
PTPRC
IL3RA
CD7
EPCAM
FCGR3A
CD4
HLA-DRA
MS4A1
VCAM1
CD38
NCAM1
CLEC9A
CD14
KIT
ESAM
CD3E
CD8A
CD1C
CD34
GYPA
CD79B

View file

@ -0,0 +1,38 @@
HLA-DRA
CD34
SPINK2
JCHAIN
IGLL1
CD79B
TCL1A
IGKC
MS4A1
LTB
PTPRC
CD3E
CD7
IL32
CD8A
NKG7
XCL2
NCAM1
MPO
LYZ
PLAC8
IL3RA
CLEC9A
CD1C
S100A9
CCL4
CD14
FCGR3A
CD4
C1QA
VCAM1
GYPA
SERPINB1
TPSAB1
KIT
PF4
ESAM
UBE2C

View file

@ -0,0 +1,48 @@
HLA-DRA
CD34
SPINK2
JCHAIN
IGLL1
CD79B
TCL1A
IGKC
MS4A1
CD19
LTB
KLRB1
PTPRC
CD3E
CD7
IL32
CD8A
KLRD1
NKG7
XCL2
NCAM1
MPO
LYZ
PLAC8
IL3RA
CLEC9A
CD1C
S100A9
CCL4
CD14
FCGR3A
CD4
C1QA
VCAM1
GYPA
SERPINB1
TPSAB1
KIT
PF4
ITGA2B
UBE2C
GATA1
KLF1
ALAS2
HBA1
ESAM
ECM1
APOA1

View file

@ -0,0 +1 @@
HSC pro B cell early pro B cell pre B cell B cell ILC progenitor NK Progenitor NK Neut-myeloid progenitor Monocyte-DC progenitor pDC progenitor DC1 DC2 Monocyte Mono-Mac Mono-4 like Kupffer Cell VCAM1+ Erythroid Macrophage Erythroid Macrophage MEP Mast cell Megakaryocyte Early Erythroid Mid Erythroid Late Erythroid Endothelial cell Fibroblast Hepatocyte

View file

@ -0,0 +1,84 @@
VCAM1
FCGR3A
CD14
GYPA
CD1C
LYZ
NKG7
CD3D
CTSW
ESAM
CD34
MYC
GATA2
CLEC9A
IL3RA
SPIB
IRF8
TPSAB1
CPA3
PF4
ITGA2B
MKI67
MS4A1
CD79B
EBF1
DNTT
SPINK2
IGLL1
CD7
XCL2
IFNG
RORC
MPO
GATA1
KLF1
APOA1
AHSG
IGKC
IGLC2
IGLC3
HLA-DQB1
HLA-DPB1
HLA-DPA1
HLA-DRA
CNRIP1
DNASE1L3
AHSP
HBM
HBZ
HBA1
HBA2
HBG1
APOA2
ALB
C1QTNF4
IL7R
LTB
CD52
C1QC
C1QA
C1QB
TPSB2
HBD
PPBP
UBE2C
PRSS57
SERPINB1
KLRB1
CCL4
CCL3
HLA-DRB1
S100A9
S100A8
LGALS1
AZU1
PRTN3
GZMA
IL32
JCHAIN
PLAC8
IGHM
TCL1A
VPREB3
HBB

View file

@ -0,0 +1,84 @@
SPINK2
CD34
C1QTNF4
IGLL1
EBF1
DNTT
LTB
CD52
CD79B
VPREB3
IGHM
JCHAIN
IGLC2
TCL1A
SPIB
IGKC
MS4A1
IGLC3
RORC
IL7R
KLRB1
CD3D
IL32
CD7
CTSW
GZMA
XCL2
IFNG
CCL4
CCL3
NKG7
PRSS57
SERPINB1
APOA2
ALB
MPO
AZU1
PRTN3
APOA1
AHSG
HLA-DPB1
HLA-DPA1
HLA-DRA
HLA-DRB1
CD1C
PLAC8
IRF8
IL3RA
DNASE1L3
CLEC9A
HLA-DQB1
LGALS1
LYZ
S100A9
S100A8
C1QC
C1QA
C1QB
HBA1
HBA2
HBG1
HBB
AHSP
HBM
FCGR3A
CD14
VCAM1
GYPA
HBZ
KLF1
MYC
CPA3
TPSAB1
TPSB2
GATA2
CNRIP1
ITGA2B
PPBP
PF4
HBD
ESAM
GATA1
MKI67
UBE2C

View file

@ -0,0 +1 @@
HSC pro B cell early pro B cell pre B cell B cell ILC progenitor NK Progenitor NK NK - proliferating Neut-myeloid progenitor Monocyte-DC progenitor pDC progenitor DC1 DC2 Monocyte Mono-Mac Mono-4 like Kupffer Cell VCAM1+ Erythroid Macrophage Erythroid Macrophage MEP Mast cell Megakaryocyte Megakaryocyte - proliferating

View file

@ -0,0 +1,48 @@
HLA-DRA
CD34
SPINK2
JCHAIN
IGLL1
CD79B
TCL1A
IGKC
MS4A1
CD19
LTB
KLRB1
PTPRC
CD3E
CD7
IL32
CD8A
KLRD1
NKG7
XCL2
NCAM1
MPO
LYZ
PLAC8
IL3RA
CLEC9A
CD1C
S100A9
CCL4
CD14
FCGR3A
CD4
C1QA
VCAM1
GYPA
SERPINB1
TPSAB1
KIT
PF4
ITGA2B
UBE2C
GATA1
KLF1
ALAS2
HBA1
ESAM
ECM1
APOA1