mirror of
https://github.com/haniffalab/scRNA-seq_analysis.git
synced 2024-10-23 08:29:24 -07:00
148 lines
5.3 KiB
R
Executable file
148 lines
5.3 KiB
R
Executable file
args = commandArgs(trailingOnly=T)
|
|
args = paste(args, collapse = "")
|
|
args = unlist(strsplit(args, ";"))
|
|
|
|
arguments.list = "
|
|
seurat.addr.arg = args[1]
|
|
set.ident.arg = args[2]
|
|
"
|
|
|
|
expected_arguments = unlist(strsplit(arguments.list, "\n"))
|
|
expected_arguments = expected_arguments[!(expected_arguments == "")]
|
|
|
|
if(length(args) != length(expected_arguments)){
|
|
error.msg = sprintf('This pipeline requires %s parameters', as.character(length(expected_arguments)))
|
|
expected_arguments = paste(unlist(lapply(strsplit(expected_arguments, ".arg"), "[", 1)), collapse = "\n")
|
|
stop(sprintf('This pipeline requires %s parameters: '))
|
|
}
|
|
|
|
eval(parse(text = arguments.list))
|
|
|
|
for(n in 1:length(expected_arguments)){
|
|
argument = expected_arguments[n]
|
|
argument = gsub(pattern=" ", replacement="", x=argument)
|
|
argument.name = unlist(strsplit(argument, "="))[1]
|
|
variable.name = gsub(pattern=".arg", replacement="", argument.name)
|
|
argument.content = eval(parse(text = argument.name))
|
|
eval(parse(text = argument.content))
|
|
if (!exists(variable.name)){
|
|
stop(sprintf("Argument %s not passed. Stopping ... ", variable.name))
|
|
}
|
|
}
|
|
|
|
# create required folders for output and work material
|
|
output_folder = gsub(pattern="^\\d+_", replacement="", x=basename(getwd()))
|
|
output_folder = paste(output_folder, seurat.addr, sep = "_")
|
|
c.time = Sys.time()
|
|
c.time = gsub(pattern=" BST", replacement="", x=c.time)
|
|
c.time = gsub(pattern=":", replacement="", x=c.time)
|
|
c.time = gsub(pattern=" ", replacement="", x=c.time)
|
|
c.time = gsub(pattern="-", replacement="", x=c.time)
|
|
c.time = substr(x=c.time, start=3, stop=nchar(c.time))
|
|
output_folder = paste(output_folder, c.time, sep = "_")
|
|
output_folder = file.path("../../output", output_folder)
|
|
dir.create(output_folder)
|
|
dir.create(file.path(output_folder, "gene_word_clouds"))
|
|
dir.create(file.path(output_folder, "celltype_word_clouds"))
|
|
|
|
seurat.addr = file.path("../../data", seurat.addr)
|
|
|
|
source("../../tools/bunddle_utils.R")
|
|
|
|
library(Seurat)
|
|
library(RColorBrewer)
|
|
library(plyr)
|
|
library(dplyr)
|
|
library(wordcloud)
|
|
|
|
|
|
gene_to_weighted_cell_mention = function(gene.expr){
|
|
idx = which(as.vector(gene_to_pop$V1) %in% names(gene.expr))
|
|
gene.expr = gene.expr[as.vector(gene_to_pop$V1)[idx]]
|
|
pop.expr = c()
|
|
pop.names = c()
|
|
for (k in 1:length(idx)){
|
|
gene.name = names(gene.expr)[k]
|
|
gene.value = gene.expr[k]
|
|
pop.flags = as.vector(gene_to_pop$V2)[as.vector(gene_to_pop$V1) == gene.name]
|
|
pop.flags = unlist(strsplit(pop.flags, ", "))
|
|
for (p in 1:length(pop.flags)){
|
|
pop.flag = pop.flags[p]
|
|
gene.v = 100 * gene.value / populations.weight[pop.flag]
|
|
if (pop.flag %in% pop.names){
|
|
pop.expr[pop.flag] = pop.expr[pop.flag] + gene.v
|
|
}else{
|
|
pop.names = c(pop.names, pop.flag)
|
|
pop.expr = c(pop.expr, gene.v)
|
|
names(pop.expr) = pop.names
|
|
}
|
|
}
|
|
}
|
|
pop.expr
|
|
}
|
|
|
|
#######################################################################################################
|
|
|
|
# load data
|
|
print("loading data ... ")
|
|
seurat.obj = readRDS(seurat.addr)
|
|
print("Data loaded.")
|
|
|
|
seurat.obj = SetAllIdent(object=seurat.obj, id=set.ident)
|
|
|
|
print('Making gene clouds')
|
|
idents = as.vector(unique(seurat.obj@ident))
|
|
for (i in 1:length(idents)){
|
|
ident = idents[i]
|
|
ident = names(seurat.obj@ident)[seurat.obj@ident == ident]
|
|
expression.data = as.matrix(seurat.obj@data[,ident])
|
|
expression.data = rowMeans(expression.data)
|
|
genes = names(expression.data)
|
|
genes = genes[!(genes %in% genes[grep(pattern='^MT-', x=genes)])]
|
|
expression.data = expression.data[genes]
|
|
fname = sprintf('%s/%s.pdf', file.path(output_folder, "gene_word_clouds"), gsub(pattern="/", replacement="-", x=idents[i]))
|
|
pdf(fname, width = 15, height = 15)
|
|
freq.weight = round(expression.data * 100)
|
|
wordcloud(words=names(expression.data), freq.weight, min.freq = 1, max.words=500,
|
|
random.order=FALSE, rot.per=0.0, colors=brewer.pal(8, "Dark2"),
|
|
order.color = T)
|
|
dev.off()
|
|
}
|
|
|
|
print('Making cell type flags clouds')
|
|
|
|
|
|
expression.data = seurat.obj@data
|
|
mito.genes = grep(pattern="^MT-", x=rownames(expression.data))
|
|
expression.data = expression.data[-c(mito.genes), ]
|
|
|
|
gene_to_pop = read.csv("./gene_to_pop.tsv", sep = '\t', header = F)
|
|
populations = paste(as.vector(gene_to_pop$V2), collapse = ", ")
|
|
populations = unlist(strsplit(populations, ", "))
|
|
populations.table = table(populations)
|
|
populations.weight = as.vector(populations.table)
|
|
names(populations.weight) = names(populations.table)
|
|
|
|
idents = as.vector(unique(seurat.obj@ident))
|
|
for (i in 1:length(idents)){
|
|
ident = idents[i]
|
|
print(ident)
|
|
ident = names(seurat.obj@ident)[seurat.obj@ident == ident]
|
|
expression.data = as.matrix(seurat.obj@data[,ident])
|
|
expression.data = rowMeans(expression.data)
|
|
genes = names(expression.data)
|
|
genes = genes[!(genes %in% genes[grep(pattern='^MT-', x=genes)])]
|
|
expression.data = expression.data[genes]
|
|
pop.expr = gene_to_weighted_cell_mention(expression.data)
|
|
clouder = round(100 * pop.expr)
|
|
fname = sprintf('%s.pdf', idents[i])
|
|
fname = gsub(pattern="/", replacement="-", x=fname)
|
|
fname = file.path(file.path(output_folder, "celltype_word_clouds"), fname)
|
|
pdf(fname, width = 10, height = 10)
|
|
wordcloud(words=names(clouder), clouder, min.freq = 1, max.words=500,
|
|
random.order=FALSE, rot.per=0.0, colors=brewer.pal(8, "Dark2"),
|
|
order.color = T)
|
|
dev.off()
|
|
}
|
|
|
|
print("Ended beautifully ... ")
|