mirror of
https://github.com/haniffalab/scRNA-seq_analysis.git
synced 2024-10-23 08:29:24 -07:00
scRNA-seq_analysis
This commit is contained in:
commit
82cc2d191e
188 changed files with 146184 additions and 0 deletions
118
pipelines/93_gene_grouping/gene_grouping.R
Executable file
118
pipelines/93_gene_grouping/gene_grouping.R
Executable file
|
|
@ -0,0 +1,118 @@
|
|||
args = commandArgs(trailingOnly=T)
|
||||
args = paste(args, collapse = "")
|
||||
args = unlist(strsplit(args, ";"))
|
||||
|
||||
arguments.list = "
|
||||
seurat.addr.arg = args[1]
|
||||
no_clusters.arg = args[2]
|
||||
"
|
||||
|
||||
python.addr = "python"
|
||||
|
||||
expected_arguments = unlist(strsplit(arguments.list, "\n"))
|
||||
expected_arguments = expected_arguments[!(expected_arguments == "")]
|
||||
|
||||
if(length(args) != length(expected_arguments)){
|
||||
error.msg = sprintf('This pipeline requires %s parameters', as.character(length(expected_arguments)))
|
||||
expected_arguments = paste(unlist(lapply(strsplit(expected_arguments, ".arg"), "[", 1)), collapse = "\n")
|
||||
stop(sprintf('This pipeline requires %s parameters: '))
|
||||
}
|
||||
|
||||
eval(parse(text = arguments.list))
|
||||
|
||||
for(n in 1:length(expected_arguments)){
|
||||
argument = expected_arguments[n]
|
||||
argument = gsub(pattern=" ", replacement="", x=argument)
|
||||
argument.name = unlist(strsplit(argument, "="))[1]
|
||||
variable.name = gsub(pattern=".arg", replacement="", argument.name)
|
||||
argument.content = eval(parse(text = argument.name))
|
||||
eval(parse(text = argument.content))
|
||||
if (!exists(variable.name)){
|
||||
stop(sprintf("Argument %s not passed. Stopping ... ", variable.name))
|
||||
}
|
||||
}
|
||||
|
||||
# create required folders for output and work material
|
||||
output_folder = gsub(pattern="^\\d+_", replacement="", x=basename(getwd()))
|
||||
output_folder = paste(output_folder, seurat.addr, sep = "_")
|
||||
c.time = Sys.time()
|
||||
c.time = gsub(pattern=" BST", replacement="", x=c.time)
|
||||
c.time = gsub(pattern=":", replacement="", x=c.time)
|
||||
c.time = gsub(pattern=" ", replacement="", x=c.time)
|
||||
c.time = gsub(pattern="-", replacement="", x=c.time)
|
||||
c.time = substr(x=c.time, start=3, stop=nchar(c.time))
|
||||
output_folder = paste(output_folder, c.time, sep = "_")
|
||||
output_folder = file.path("../../output", output_folder)
|
||||
dir.create(output_folder)
|
||||
|
||||
library(Seurat)
|
||||
library(RColorBrewer)
|
||||
library(dplyr)
|
||||
library(plyr)
|
||||
|
||||
#######################################################################################################
|
||||
|
||||
# load data
|
||||
print("loading data ... ")
|
||||
seurat.obj = readRDS(seurat.addr)
|
||||
print("Data loaded.")
|
||||
|
||||
# check if LouvainClustering is present
|
||||
if ("LouvainClustering" %in% colnames(seurat.obj@meta.data)){
|
||||
print("Identifying gene outliers but first need to aggregate gene expression by clusters")
|
||||
seurat.obj = SetAllIdent(object=seurat.obj, id="LouvainClustering")
|
||||
no.genes = nrow(seurat.obj@data)
|
||||
start_index = 1
|
||||
while (start_index < no.genes){
|
||||
end_index = start_index + 999
|
||||
end_index = min(end_index, no.genes)
|
||||
expression.data_ = data.matrix(seurat.obj@data[start_index:end_index, ])
|
||||
expression.data_ = t(expression.data_)
|
||||
expression.data_ = as.data.frame(expression.data_)
|
||||
expression.data_ = cbind(data.frame(CellLabels = as.vector(seurat.obj@ident)), expression.data_)
|
||||
expression.data_ = aggregate(expression.data_[2:dim(expression.data_)[2]], list(expression.data_$CellLabels), mean)
|
||||
expression.data_ = cbind(data.frame(CellType = expression.data_$Group.1), expression.data_[, 2:dim(expression.data_)[2]])
|
||||
rownames(expression.data_) = expression.data_$CellType
|
||||
expression.data_ = expression.data_[, 2:ncol(expression.data_)]
|
||||
print(start_index)
|
||||
if (start_index == 1){
|
||||
expression.data = expression.data_
|
||||
}else{
|
||||
expression.data = cbind(expression.data, expression.data_)
|
||||
}
|
||||
start_index = start_index + 1000
|
||||
}
|
||||
# saving the expression matrix
|
||||
write.csv(expression.data, file.path(output_folder, "expression.csv"))
|
||||
# run python script to identify outliers
|
||||
command = sprintf("%s clustering.py %s %s", python.addr, output_folder, no_clusters)
|
||||
system(command, wait = T)
|
||||
# remove the expression csv file
|
||||
file.remove(file.path(output_folder, "expression.csv"))
|
||||
# load gene clustering
|
||||
gene_clustering = read.csv(file.path(output_folder, "clustering.csv"), row.names = 1)
|
||||
# save feature plots
|
||||
gene_names = as.vector(unique(gene_clustering$GeneNames))
|
||||
features_folder = file.path(output_folder, "features")
|
||||
dir.create(features_folder)
|
||||
dr_coordinates = seurat.obj@dr$umap@cell.embeddings
|
||||
for (i in seq_along(gene_names)){
|
||||
gene_name = gene_names[i]
|
||||
png_name = paste(file.path(features_folder, gene_name), "png", sep = ".")
|
||||
dframe = data.frame(X = dr_coordinates[, 1], Y = dr_coordinates[, 2], Expression = seurat.obj@data[gene_name, ])
|
||||
plot.obj = ggplot(dframe, aes(x = X, y = Y, color = Expression))
|
||||
plot.obj = plot.obj + geom_point(size = .5)
|
||||
plot.obj = plot.obj + theme_void() + theme(panel.background = element_rect(fill = 'black', colour = 'black'))
|
||||
plot.obj = plot.obj + scale_colour_gradient(low = "blue", high = "red")
|
||||
png(png_name, width = 500, height = 500)
|
||||
print(plot.obj)
|
||||
dev.off()
|
||||
if (i %% 10 == 0){
|
||||
print(sprintf("%s / %s", i, length(gene_names)))
|
||||
}
|
||||
}
|
||||
}else{
|
||||
print("Data needs to be clustered first")
|
||||
}
|
||||
|
||||
print("Ended beautifully ... ")
|
||||
Loading…
Add table
Add a link
Reference in a new issue