scRNA-seq_analysis

This commit is contained in:
veghp 2019-07-08 12:22:01 +01:00
commit 82cc2d191e
188 changed files with 146184 additions and 0 deletions

64
pipelines/11_plot_dr/dm.py Executable file
View file

@ -0,0 +1,64 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 14 15:01:36 2018
@author: doru
"""
import sys
args = sys.argv
working_folder = args[1]
import matplotlib; matplotlib.use('Agg');
import scanpy.api as sc;
import pandas as pd
sc.settings.verbosity = 3
scObj = sc.read("{CW}/raw_data.mtx".format(CW = working_folder), cache = False).T
# load gene names
scObj.var_names = pd.read_csv("{CW}/genenames.csv".format(CW = working_folder)).iloc[:, 1]
# load cell names
scObj.obs_names = pd.read_csv("{CW}/cellnames.csv".format(CW = working_folder)).iloc[:, 1]
# add cell labels
cell_labels = pd.read_csv("{CW}/cell_labels.csv".format(CW = working_folder), index_col = 0)
scObj.obs["cell_labels"] = cell_labels
# filter out genes present in less than 3 cells
sc.pp.filter_genes(scObj, min_cells=3)
# log-normalize the data
scObj.raw = sc.pp.log1p(scObj, copy=True)
sc.pp.normalize_per_cell(scObj, counts_per_cell_after=1e4)
# variable genes
filter_result = sc.pp.filter_genes_dispersion(
scObj.X, min_mean=0.0125, max_mean=3, min_disp=0.5)
# subset data on variable genes
scObj = scObj[:, filter_result.gene_subset]
# not sure?
sc.pp.log1p(scObj)
# scale the data
sc.pp.scale(scObj, max_value=10)
# run pca
sc.tl.pca(scObj)
# compunte neighborhood graph
sc.pp.neighbors(scObj, n_neighbors = 15, n_pcs = 20, knn = True, random_state = 10, method = "gauss")
# compute diffusion map
sc.tl.diffmap(scObj, n_comps = 20)
# save diffusion map to disk
dm = scObj.obsm["X_diffmap"]
dm = pd.DataFrame(data = dm, index = None, columns = None)
dm.to_csv("{CW}/dm.csv".format(CW = working_folder), columns = None, header = None)

View file

@ -0,0 +1,90 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 22 11:03:12 2018
@author: doru
"""
# argument variables
import sys
output_folder = sys.argv[1]
from os.path import join
# file names
material_folder = join(output_folder, "AGA_folder")
save_to = join(output_folder, 'AGAlinkage_map_{cat}.html'.format(cat = sys.argv[2]))
colors_fname = join(material_folder, 'colours.csv')
connectivities_fname = join(material_folder, 'connectivities.csv')
coordinates_fname = join(material_folder, 'coordinates.csv')
# read data from files in csv formatr
import pandas as pd
connectivities = pd.read_csv(connectivities_fname, index_col = 0, header = 0)
coordinates = pd.read_csv(coordinates_fname, index_col = 0, header = 0)
try:
colors = pd.read_csv(colors_fname, index_col = 0, header = 0)
except FileNotFoundError:
cell_types = connectivities.columns
import random
cell_types = [f for f in connectivities.columns]
colours = []
for cell_type in cell_types:
r = lambda: random.randint(0,255)
col = '#%02X%02X%02X' % (r(),r(),r())
colours.append({'CellTypes': cell_type, 'Colours': col})
colors = pd.DataFrame(colours)
colors = colors.set_index('CellTypes')
scaleScale = 1.4
minX = coordinates.min()[0] * scaleScale
minY = coordinates.min()[1] * scaleScale
maxX = coordinates.max()[0] * scaleScale
maxY = coordinates.max()[1] * scaleScale
# prepare the coordinates and colors data
cell_names = list(coordinates.index)
cell_sizes = coordinates.Size.tolist()
# reorder cell names by population size - so during drawing smaller cell population are not covered by bigger bubbles
cell_names = [cell_name for [cell_size, cell_name] in sorted(zip(cell_sizes, cell_names), reverse = True)]
data_coordinates = []
for cell_name in cell_names:
row_data = coordinates.loc[cell_name]
X, Y, R = row_data.X, row_data.Y, row_data.Size
X = (X - minX) / (maxX - minX);
Y = (Y - minY) / (maxY - minY);
color = colors.loc[cell_name].Colours
indata = 'data_coordinates["{cell_name}"] = [{X}, {Y}, {R}, "{C}"]'.format(cell_name = cell_name,
X = X, Y = Y, R = R, C = color)
data_coordinates.append(indata)
data_coordinates = '\n'.join(data_coordinates)
# prepare edge thickness data
data_edges = []
# rearrange connectivities by order of cell name
for cell_name in cell_names:
indata = connectivities[cell_name][cell_names].tolist()
indata = ','.join([str(i) for i in indata])
indata = 'data_edges["{cell_name}"] = [{indata}]'.format(cell_name = cell_name, indata = indata)
data_edges.append(indata)
data_edges = '\n'.join(data_edges)
# make cell_names array
cell_names = ['"{cell_name}"'.format(cell_name = cell_name) for cell_name in cell_names]
cell_names = ','.join(cell_names)
cell_names = 'cell_names = [{cell_names}]'.format(cell_names = cell_names)
# prepare all the data
data = '\n'.join([data_coordinates, data_edges, cell_names])
template_fobj = open('template_for_AGA_app.html', 'r')
template = template_fobj.read();
template_fobj.close()
# insert data in template
template = template.replace('// insert data here', data)
# save interactive page
with open(save_to, 'w') as save_fobj:
save_fobj.write(template)

330
pipelines/11_plot_dr/plot_dr.R Executable file
View file

@ -0,0 +1,330 @@
args = commandArgs(trailingOnly=T)
args = paste(args, collapse = "")
args = unlist(strsplit(args, ";"))
arguments.list = "
seurat.addr.arg = args[1]
plot.by.arg = args[2]
type.to.colours.arg = args[3]
runDiffusionMap.arg = args[4]
runAGA.arg = args[5]
overlay.data.arg = args[6]
overlay.data.ordered.arg = args[7]
"
plotW = 8
plotH = 8
expected_arguments = unlist(strsplit(arguments.list, "\n"))
expected_arguments = expected_arguments[!(expected_arguments == "")]
if(length(args) != length(expected_arguments)){
error.msg = sprintf('This pipeline requires %s parameters', as.character(length(expected_arguments)))
expected_arguments = paste(unlist(lapply(strsplit(expected_arguments, ".arg"), "[", 1)), collapse = "\n")
stop(sprintf('This pipeline requires %s parameters: '))
}
eval(parse(text = arguments.list))
for(n in 1:length(expected_arguments)){
argument = expected_arguments[n]
argument = gsub(pattern=" ", replacement="", x=argument)
argument.name = unlist(strsplit(argument, "="))[1]
variable.name = gsub(pattern=".arg", replacement="", argument.name)
argument.content = eval(parse(text = argument.name))
eval(parse(text = argument.content))
if (!exists(variable.name)){
stop(sprintf("Argument %s not passed. Stopping ... ", variable.name))
}
}
# create required folders for output and work material
output_folder = paste("11_plot_dr", seurat.addr, sep = "_")
c.time = Sys.time()
c.time = gsub(pattern=" BST", replacement="", x=c.time)
c.time = gsub(pattern=":", replacement="", x=c.time)
c.time = gsub(pattern=" ", replacement="", x=c.time)
c.time = gsub(pattern="-", replacement="", x=c.time)
c.time = substr(x=c.time, start=3, stop=nchar(c.time))
output_folder = paste(output_folder, c.time, sep = "_")
output_folder = file.path("../../output", output_folder)
dir.create(output_folder)
output_folder_material = file.path(output_folder, "material")
dir.create(output_folder_material)
seurat.addr = file.path("../../data", seurat.addr)
source("../../tools/bunddle_utils.R")
library(Seurat)
library(RColorBrewer)
library(plyr)
library(dplyr)
library(destiny)
#######################################################################################################
# load data
print("loading data ... ")
seurat.obj = readRDS(seurat.addr)
print("Data loaded.")
# save raw data to disk
raw_data = seurat.obj@raw.data
raw_data = raw_data[rownames(seurat.obj@data), colnames(seurat.obj@data)]
# save gene names
gene_names = rownames(raw_data)
write.csv(data.frame(Genes = gene_names), file.path(output_folder_material, "genenames.csv"))
# save cell names
cell_names = colnames(raw_data)
write.csv(data.frame(Cells = cell_names), file.path(output_folder_material, "cellnames.csv"))
# write cell labels to disk
write.csv(data.frame(Cells = names(seurat.obj@ident), Labels = seurat.obj@ident), file.path(output_folder_material, "cell_labels.csv"), row.names = F)
# run the diffusion map
if(runDiffusionMap){
print("Writing .mtx file for diffusion map ...")
writeMM(raw_data, file.path(output_folder_material, "raw_data.mtx"))
print("Running the diffusion map ... ")
command = sprintf("%s ./dm.py %s", python.addr, output_folder_material)
system(command, wait = T)
# load dm from disk
dm = read.csv(file.path(output_folder_material, "dm.csv"), row.names = 1, header = F)
#dm = DiffusionMap(seurat.obj@dr$pca@cell.embeddings, k = 100, density_norm=F, n_eigs = 20)
#dm = data.frame(DC1 = dm$DC1, DC2 = dm$DC2, DC2 = dm$DC3)
}
print("Computing FDG limits ...")
fdg.x = seurat.obj@dr$fdg@cell.embeddings[, 1]
fdg.y = seurat.obj@dr$fdg@cell.embeddings[, 2]
fdg.limits = 1.15 * c(quantile(fdg.x, c(.01)), quantile(fdg.x, c(.99)), quantile(fdg.y, c(.01)), quantile(fdg.y, c(.99)))
print("Making the plots ...")
for (index in 1:length(plot.by)){
caty = plot.by[index]
seurat.obj = SetAllIdent(object=seurat.obj, id=caty)
if (!is.na(type.to.colours[index])){
type.to.colour = read.csv(file.path("../../resources", type.to.colours[index]))
filter.key = type.to.colour$CellTypes %in% as.vector(unique(seurat.obj@ident))
cell.labels = as.vector(type.to.colour$CellTypes[filter.key])
cell.colours = as.vector(type.to.colour$Colours[filter.key])
all_celltypes_missing <- levels(factor(seurat.obj@ident))[!levels(factor(seurat.obj@ident)) %in% levels(factor(type.to.colour$CellTypes))]
all_colours_missing <- levels(factor(type.to.colour$CellTypes))[!levels(factor(type.to.colour$CellTypes)) %in% levels(factor(seurat.obj@ident))]
if (length(all_celltypes_missing)>0){
cat(all_celltypes_missing, "have not been found in your type.to.cols .csv \n", sep="\n")
}
if (length(all_colours_missing)>0){
cat(all_colours_missing, "have not been found in your selected metadata column", sep="\n")
}
if (length(all_colours_missing)==0|length(all_colours_missing)==0){
print("All colours and annotations match")
}
}else{
cell.labels = sort(as.vector(unique(seurat.obj@ident)))
cell.colours = sample(colorRampPalette(brewer.pal(12, "Paired"))(length(cell.labels)))
}
caty = gsub(pattern="\\.", replacement="_", caty)
# file paths for annotated graphs
tsne.file.name = file.path(output_folder, paste("tsne", paste(caty, "pdf", sep = "."), sep = "_"))
umap.file.name = file.path(output_folder, paste("umap", paste(caty, "pdf", sep = "."), sep = "_"))
fdg.file.name = file.path(output_folder, paste("fdg", paste(caty, "pdf", sep = "."), sep = "_"))
legend.file.name = file.path(output_folder, paste("legend", paste(caty, "pdf", sep = "."), sep = "_"))
AGA.file.name = file.path(output_folder, paste("AGA", paste(caty, "pdf", sep = "."), sep = "_"))
# file paths for unannotated plots
tsne.file.name_unlabeled = file.path(output_folder, paste("tsne_unlabeled", paste(caty, "pdf", sep = "."), sep = "_"))
umap.file.name_unlabeled = file.path(output_folder, paste("umap_unlabeled", paste(caty, "pdf", sep = "."), sep = "_"))
fdg.file.name_unlabeled = file.path(output_folder, paste("fdg_unlabeled", paste(caty, "pdf", sep = "."), sep = "_"))
# preparing data frame
if(!is.na(overlay.data)){
df = data.frame(CellType = as.vector(seurat.obj@ident),
tSNEx = seurat.obj@dr$tsne@cell.embeddings[, 1],
tSNEy = seurat.obj@dr$tsne@cell.embeddings[, 2],
UMAPx = seurat.obj@dr$umap@cell.embeddings[, 1],
UMAPy = seurat.obj@dr$umap@cell.embeddings[, 2],
FDGx = seurat.obj@dr$fdg@cell.embeddings[, 1],
FDGy = seurat.obj@dr$fdg@cell.embeddings[, 2],
overlay.data = seurat.obj@meta.data[overlay.data])
colnames(df)[8] <- "overlay.data"
}
else
{
df = data.frame(CellType = as.vector(seurat.obj@ident),
tSNEx = seurat.obj@dr$tsne@cell.embeddings[, 1],
tSNEy = seurat.obj@dr$tsne@cell.embeddings[, 2],
UMAPx = seurat.obj@dr$umap@cell.embeddings[, 1],
UMAPy = seurat.obj@dr$umap@cell.embeddings[, 2],
FDGx = seurat.obj@dr$fdg@cell.embeddings[, 1],
FDGy = seurat.obj@dr$fdg@cell.embeddings[, 2])
}
print("printing header of df made at beginning od r script")
print(head(df))
interactive_plot_df = data.frame(X = seurat.obj@dr$tsne@cell.embeddings[, 1],
Y = seurat.obj@dr$tsne@cell.embeddings[, 2])
interactive_plot_df$Labels = factor(seurat.obj@ident, levels = cell.labels)
interactive_plot_df$Colours = mapvalues(x = interactive_plot_df$Labels, from = cell.labels, to = cell.colours)
# make interartive tsne
interactive_tsne_filename = file.path(output_folder, paste(paste("Interactive_tSNE", caty, sep = "_"), "html", sep = "."))
make_2D_interactive_page(data_frame_2D=interactive_plot_df, tool_addr=tool_addr, python.addr=python.addr, save.to=interactive_tsne_filename)
# make interactive UMAP
interactive_plot_df$X = seurat.obj@dr$umap@cell.embeddings[, 1]
interactive_plot_df$Y = seurat.obj@dr$umap@cell.embeddings[, 2]
interactive_umap_filename = file.path(output_folder, paste(paste("Interactive_UMAP", caty, sep = "_"), "html", sep = "."))
make_2D_interactive_page(data_frame_2D=interactive_plot_df, tool_addr=tool_addr, python.addr=python.addr, save.to=interactive_umap_filename)
# make interactive FDG
interactive_plot_df$X = seurat.obj@dr$fdg@cell.embeddings[, 1]
interactive_plot_df$Y = seurat.obj@dr$fdg@cell.embeddings[, 2]
interactive_fdg_filename = file.path(output_folder, paste(paste("Interactive_FDG", caty, sep = "_"), "html", sep = "."))
make_2D_interactive_page(data_frame_2D=interactive_plot_df, tool_addr=tool_addr, python.addr=python.addr, save.to=interactive_fdg_filename)
n.cols = min(2, length(cell.labels))
n.rows = ceiling(length(cell.labels) / n.cols)
# making the plots
print("making the plots")
# annotated plots
print("making annotated plots")
plot.tsne = dr.plot(point.labels=df$CellType, dr1=df$tSNEx, dr2=df$tSNEy, dr1.name="tSNE-x", dr2.name="tSNE-y", no.legend=T, plt.lb.sz=7, txt.lb.size=4, use.cols=cell.colours, use.labels=cell.labels)
plot.umap = dr.plot(point.labels=df$CellType, dr1=df$UMAPx, dr2=df$UMAPy, dr1.name="UMAP-x", dr2.name="UMAP-y", no.legend=T, plt.lb.sz=7, txt.lb.size=4, use.cols=cell.colours, use.labels=cell.labels)
plot.fdg = dr.plot(point.labels=df$CellType, dr1=df$FDGx, dr2=df$FDGy, dr1.name="FDG-x", dr2.name="FDG-y", no.legend=T, plt.lb.sz=7, txt.lb.size=4, use.cols=cell.colours, limits=fdg.limits, use.labels=cell.labels)
plot.legend = plot.indexed.legend(label.vector=cell.labels, color.vector=cell.colours, ncols=n.cols, left.limit=.2, symbol.size=10, text.size=6, padH=.6, padV=.6)
# unannotated plots
print("making un-annotated plots")
plot.tsne_unlabeled = dr.plot(point.labels=df$CellType, dr1=df$tSNEx, dr2=df$tSNEy, dr1.name="tSNE-x", dr2.name="tSNE-y", no.legend=T, plt.lb.sz=7, txt.lb.size=4, use.cols=cell.colours, use.labels=cell.labels, annotate.plot = F)
plot.umap_unlabeled = dr.plot(point.labels=df$CellType, dr1=df$UMAPx, dr2=df$UMAPy, dr1.name="UMAP-x", dr2.name="UMAP-y", no.legend=T, plt.lb.sz=7, txt.lb.size=4, use.cols=cell.colours, use.labels=cell.labels, annotate.plot = F)
plot.fdg_unlabeled = dr.plot(point.labels=df$CellType, dr1=df$FDGx, dr2=df$FDGy, dr1.name="FDG-x", dr2.name="FDG-y", no.legend=T, plt.lb.sz=7, txt.lb.size=4, use.cols=cell.colours, limits=fdg.limits, use.labels=cell.labels, annotate.plot = F, pt.size=1)
# print the annotated plots
print("printing annotated plots")
pdf(tsne.file.name, width = plotW, height = plotH)
print(plot.tsne)
dev.off()
pdf(umap.file.name, width = plotW, height = plotH)
print(plot.umap)
dev.off()
pdf(fdg.file.name, width = plotW, height = plotH)
print(plot.fdg)
dev.off()
pdf(legend.file.name, width = 1.5 + .15 * n.cols * max(unlist(lapply(cell.labels, nchar))), height = .5 + n.rows * .35)
print(plot.legend)
dev.off()
# print the unannotated plots
print("printing un-annotated plots")
pdf(tsne.file.name_unlabeled, width = plotW, height = plotH)
print(plot.tsne_unlabeled)
dev.off()
pdf(umap.file.name_unlabeled, width = plotW, height = plotH)
print(plot.umap_unlabeled)
dev.off()
pdf(fdg.file.name_unlabeled, width = plotW, height = plotH)
print(plot.fdg_unlabeled)
dev.off()
# run diffusion map
if(runDiffusionMap){
df = as.data.frame(dm[, 1:3])
df$Labels = factor(seurat.obj@ident, levels = cell.labels)
df$Colours = mapvalues(x = df$Labels, from = cell.labels, to = cell.colours)
dm.file.name = file.path(output_folder_material, paste(paste("dm_data", caty, sep="_"), "csv", sep = "."))
write.csv(df, dm.file.name, row.names = F)
dm.file.name = file.path(output_folder, paste(paste("DiffusionMap_3D", caty, sep = "_"), "html", sep = "."))
make_3D_interactive_page(data_frame_3D=df, tool_addr=tool_addr, python.addr=python.addr, save.to=dm.file.name)
}
if(runAGA){
if(runDiffusionMap==F){
print("Writing .mtx file for AGA map ...")
writeMM(raw_data, file.path(output_folder_material, "raw_data.mtx"))
}
print("running AGA ...")
AGA_folder = file.path(output_folder, "AGA_folder")
dir.create(AGA_folder)
# write cell labels to disk
write.csv(data.frame(Cells = names(seurat.obj@ident), Labels = seurat.obj@ident), file.path(output_folder_material, "cell_labels.csv"), row.names = F)
# running AGA
command =file.path(tool_addr, "AGA/AGA_from_Seurat.py")
command = paste(paste(python.addr, command, sep = " "), output_folder, sep = " ")
command = paste(command, caty, sep =" ")
system(command, wait = T)
# read the AGA output
coordinates = read.csv(file.path(AGA_folder, "coordinates.csv"), row.names = 1)
connectivities = read.csv(file.path(AGA_folder, "connectivities.csv"), row.names = 1)
# plot AGA
coordinates = coordinates[cell.labels, ]
coordinates$Colours = cell.colours
label.order = match(cell.labels, rownames(connectivities))
connectivities = connectivities[label.order, label.order]
plot.obj = ggplot(data=coordinates, aes(x = X, y = Y))
plot.obj = plot.obj + theme_void() + theme(legend.position="none")
xi = c(); xf = c(); yi = c(); yf = c(); vs = c();
for(i in 1:dim(connectivities)[1]){
for(j in i:dim(connectivities)[2]){
v = connectivities[i, j]
if(v > 0){
xi = c(xi, coordinates$X[i])
xf = c(xf, coordinates$X[j])
yi = c(yi, coordinates$Y[i])
yf = c(yf, coordinates$Y[j])
vs = c(vs, v)
}
}
}
lineDF = data.frame(Xi = xi, Yi = yi, Xf = xf, Yf = yf, Vs = vs)
plot.obj = plot.obj + geom_segment(data = lineDF, aes(x = Xi, y = Yi, xend = Xf, yend = Yf), color = "black", size = 3 * lineDF$Vs)
plot.obj = plot.obj + geom_point(size = 2 * log(coordinates$Size), color = coordinates$Colours)
plot.obj = plot.obj + annotate("text", x=coordinates$X, y=coordinates$Y, label = 1:dim(coordinates)[1])
pdf(AGA.file.name, width = 10, height = 10)
print(plot.obj)
dev.off()
######## now make the interactive AGA app
#########################################
print("Making the AGA app ... ")
# save colours
colours.df = data.frame(CellTypes = cell.labels, Colours = cell.colours)
write.csv(colours.df, file.path(AGA_folder, "colours.csv"), row.names = F)
# run python to built the AGA app
command = sprintf("%s make_AGA_app.py %s %s", python.addr, output_folder, caty)
system(command, wait = T)
}
}
# cleaning garbage folders
unlink(output_folder_material, recursive=T, force=T)
if (runAGA){
unlink(AGA_folder, recursive=T, force=T)
unlink(file.path(output_folder, 'figures'), recursive=T, force=T)
}
print("Ended beautifully ... ")

16
pipelines/11_plot_dr/plot_dr.sh Executable file
View file

@ -0,0 +1,16 @@
#!/bin/bash
#$ -cwd
#$ -N plot_dr
#$ -V
#$ -l h_rt=47:59:59
#$ -l h_vmem=200G
if [ "$#" -ne 1 ]; then
echo "Illegal number of parameters"
exit 1
fi
Rscript plot_dr.R $1
echo "End on `date`"

View file

@ -0,0 +1,289 @@
args = commandArgs(trailingOnly=T)
args = paste(args, collapse = "")
args = unlist(strsplit(args, ";"))
arguments.list = "
seurat.addr.arg = args[1]
plot.by.arg = args[2]
type.to.colours.arg = args[3]
runDiffusionMap.arg = args[4]
runAGA.arg = args[5]
"
plotW = 8
plotH = 8
expected_arguments = unlist(strsplit(arguments.list, "\n"))
expected_arguments = expected_arguments[!(expected_arguments == "")]
if(length(args) != length(expected_arguments)){
error.msg = sprintf('This pipeline requires %s parameters', as.character(length(expected_arguments)))
expected_arguments = paste(unlist(lapply(strsplit(expected_arguments, ".arg"), "[", 1)), collapse = "\n")
stop(sprintf('This pipeline requires %s parameters: '))
}
eval(parse(text = arguments.list))
for(n in 1:length(expected_arguments)){
argument = expected_arguments[n]
argument = gsub(pattern=" ", replacement="", x=argument)
argument.name = unlist(strsplit(argument, "="))[1]
variable.name = gsub(pattern=".arg", replacement="", argument.name)
argument.content = eval(parse(text = argument.name))
eval(parse(text = argument.content))
if (!exists(variable.name)){
stop(sprintf("Argument %s not passed. Stopping ... ", variable.name))
}
}
# create required folders for output and work material
output_folder = paste("11_plot_dr", seurat.addr, sep = "_")
c.time = Sys.time()
c.time = gsub(pattern=" BST", replacement="", x=c.time)
c.time = gsub(pattern=":", replacement="", x=c.time)
c.time = gsub(pattern=" ", replacement="", x=c.time)
c.time = gsub(pattern="-", replacement="", x=c.time)
c.time = substr(x=c.time, start=3, stop=nchar(c.time))
output_folder = paste(output_folder, c.time, sep = "_")
output_folder = file.path("../../output", output_folder)
dir.create(output_folder)
output_folder_material = file.path(output_folder, "material")
dir.create(output_folder_material)
seurat.addr = file.path("../../data", seurat.addr)
source("../../tools/bunddle_utils.R")
library(Seurat)
library(RColorBrewer)
library(plyr)
library(dplyr)
library(destiny)
#######################################################################################################
# load data
print("loading data ... ")
seurat.obj = readRDS(seurat.addr)
print("Data loaded.")
# save raw data to disk
raw_data = seurat.obj@raw.data
raw_data = raw_data[rownames(seurat.obj@data), colnames(seurat.obj@data)]
# save gene names
gene_names = rownames(raw_data)
# save cell names
cell_names = colnames(raw_data)
write.csv(data.frame(Cells = cell_names), file.path(output_folder_material, "cellnames.csv"))
# write cell labels to disk
write.csv(data.frame(Cells = names(seurat.obj@ident), Labels = seurat.obj@ident), file.path(output_folder_material, "cell_labels.csv"), row.names = F)
# run the diffusion map
if(runDiffusionMap){
print("Writing .mtx file for diffusion map ...")
writeMM(raw_data, file.path(output_folder_material, "raw_data.mtx"))
print("Running the diffusion map ... ")
command = sprintf("%s ./dm.py %s", python.addr, output_folder_material)
system(command, wait = T)
# load dm from disk
dm = read.csv(file.path(output_folder_material, "dm.csv"), row.names = 1, header = F)
#dm = DiffusionMap(seurat.obj@dr$pca@cell.embeddings, k = 100, density_norm=F, n_eigs = 20)
#dm = data.frame(DC1 = dm$DC1, DC2 = dm$DC2, DC2 = dm$DC3)
}
print("Computing FDG limits ...")
fdg.x = seurat.obj@dr$fdg@cell.embeddings[, 1]
fdg.y = seurat.obj@dr$fdg@cell.embeddings[, 2]
fdg.limits = 1.15 * c(quantile(fdg.x, c(.01)), quantile(fdg.x, c(.99)), quantile(fdg.y, c(.01)), quantile(fdg.y, c(.99)))
print("Making the plots ...")
for (index in 1:length(plot.by)){
caty = plot.by[index]
seurat.obj = SetAllIdent(object=seurat.obj, id=caty)
if (!is.na(type.to.colours[index])){
type.to.colour = read.csv(file.path("../../resources", type.to.colours[index]))
filter.key = type.to.colour$CellTypes %in% as.vector(unique(seurat.obj@ident))
cell.labels = as.vector(type.to.colour$CellTypes[filter.key])
cell.colours = as.vector(type.to.colour$Colours[filter.key])
}else{
cell.labels = sort(as.vector(unique(seurat.obj@ident)))
cell.colours = sample(colorRampPalette(brewer.pal(12, "Paired"))(length(cell.labels)))
}
caty = gsub(pattern="\\.", replacement="_", caty)
# file paths for annotated graphs
tsne.file.name = file.path(output_folder, paste("tsne", paste(caty, "pdf", sep = "."), sep = "_"))
umap.file.name = file.path(output_folder, paste("umap", paste(caty, "pdf", sep = "."), sep = "_"))
fdg.file.name = file.path(output_folder, paste("fdg", paste(caty, "pdf", sep = "."), sep = "_"))
legend.file.name = file.path(output_folder, paste("legend", paste(caty, "pdf", sep = "."), sep = "_"))
AGA.file.name = file.path(output_folder, paste("AGA", paste(caty, "pdf", sep = "."), sep = "_"))
# file paths for unannotated plots
tsne.file.name_unlabeled = file.path(output_folder, paste("tsne_unlabeled", paste(caty, "pdf", sep = "."), sep = "_"))
umap.file.name_unlabeled = file.path(output_folder, paste("umap_unlabeled", paste(caty, "pdf", sep = "."), sep = "_"))
fdg.file.name_unlabeled = file.path(output_folder, paste("fdg_unlabeled", paste(caty, "pdf", sep = "."), sep = "_"))
# preparing data frame
df = data.frame(CellType = as.vector(seurat.obj@ident),
tSNEx = seurat.obj@dr$tsne@cell.embeddings[, 1],
tSNEy = seurat.obj@dr$tsne@cell.embeddings[, 2],
UMAPx = seurat.obj@dr$umap@cell.embeddings[, 1],
UMAPy = seurat.obj@dr$umap@cell.embeddings[, 2],
FDGx = seurat.obj@dr$fdg@cell.embeddings[, 1],
FDGy = seurat.obj@dr$fdg@cell.embeddings[, 2])
interactive_plot_df = data.frame(X = seurat.obj@dr$tsne@cell.embeddings[, 1],
Y = seurat.obj@dr$tsne@cell.embeddings[, 2])
interactive_plot_df$Labels = factor(seurat.obj@ident, levels = cell.labels)
interactive_plot_df$Colours = mapvalues(x = interactive_plot_df$Labels, from = cell.labels, to = cell.colours)
# make interartive tsne
interactive_tsne_filename = file.path(output_folder, paste(paste("Interactive_tSNE", caty, sep = "_"), "html", sep = "."))
make_2D_interactive_page(data_frame_2D=interactive_plot_df, tool_addr=tool_addr, python.addr=python.addr, save.to=interactive_tsne_filename)
# make interactive UMAP
interactive_plot_df$X = seurat.obj@dr$umap@cell.embeddings[, 1]
interactive_plot_df$Y = seurat.obj@dr$umap@cell.embeddings[, 2]
interactive_umap_filename = file.path(output_folder, paste(paste("Interactive_UMAP", caty, sep = "_"), "html", sep = "."))
make_2D_interactive_page(data_frame_2D=interactive_plot_df, tool_addr=tool_addr, python.addr=python.addr, save.to=interactive_umap_filename)
# make interactive FDG
interactive_plot_df$X = seurat.obj@dr$fdg@cell.embeddings[, 1]
interactive_plot_df$Y = seurat.obj@dr$fdg@cell.embeddings[, 2]
interactive_fdg_filename = file.path(output_folder, paste(paste("Interactive_FDG", caty, sep = "_"), "html", sep = "."))
make_2D_interactive_page(data_frame_2D=interactive_plot_df, tool_addr=tool_addr, python.addr=python.addr, save.to=interactive_fdg_filename)
n.cols = min(2, length(cell.labels))
n.rows = ceiling(length(cell.labels) / n.cols)
# making the plots
print("making the plots")
# annotated plots
plot.tsne = dr.plot.numerical(point.labels=df$CellType, dr1=df$tSNEx, dr2=df$tSNEy, dr1.name="tSNE-x", dr2.name="tSNE-y", no.legend=T, plt.lb.sz=7, txt.lb.size=4, use.cols=cell.colours, use.labels=cell.labels)
plot.umap = dr.plot.numerical(point.labels=df$CellType, dr1=df$UMAPx, dr2=df$UMAPy, dr1.name="UMAP-x", dr2.name="UMAP-y", no.legend=T, plt.lb.sz=7, txt.lb.size=4, use.cols=cell.colours, use.labels=cell.labels)
plot.fdg = dr.plot.numerical(point.labels=df$CellType, dr1=df$FDGx, dr2=df$FDGy, dr1.name="FDG-x", dr2.name="FDG-y", no.legend=T, plt.lb.sz=7, txt.lb.size=4, use.cols=cell.colours, limits=fdg.limits, use.labels=cell.labels)
#plot.legend = plot.indexed.legend(label.vector=cell.labels, color.vector=cell.colours, ncols=n.cols, left.limit=.2, symbol.size=10, text.size=6, padH=.6, padV=.6)
# unannotated plots
plot.tsne_unlabeled = dr.plot.numerical(point.labels=df$CellType, dr1=df$tSNEx, dr2=df$tSNEy, dr1.name="tSNE-x", dr2.name="tSNE-y", no.legend=T, plt.lb.sz=7, txt.lb.size=4, use.cols=cell.colours, use.labels=cell.labels, annotate.plot = F)
plot.umap_unlabeled = dr.plot.numerical(point.labels=df$CellType, dr1=df$UMAPx, dr2=df$UMAPy, dr1.name="UMAP-x", dr2.name="UMAP-y", no.legend=T, plt.lb.sz=7, txt.lb.size=4, use.cols=cell.colours, use.labels=cell.labels, annotate.plot = F)
plot.fdg_unlabeled = dr.plot.numerical(point.labels=df$CellType, dr1=df$FDGx, dr2=df$FDGy, dr1.name="FDG-x", dr2.name="FDG-y", no.legend=T, plt.lb.sz=7, txt.lb.size=4, use.cols=cell.colours, limits=fdg.limits, use.labels=cell.labels, annotate.plot = F)
# print the annotated plots
pdf(tsne.file.name, width = plotW, height = plotH)
print(plot.tsne)
dev.off()
pdf(umap.file.name, width = plotW, height = plotH)
print(plot.umap)
dev.off()
pdf(fdg.file.name, width = plotW, height = plotH)
print(plot.fdg)
dev.off()
#pdf(legend.file.name, width = 1.5 + .15 * n.cols * max(unlist(lapply(cell.labels, nchar))), height = .5 + n.rows * .35)
#print(plot.legend)
#dev.off()
# print the unannotated plots
pdf(tsne.file.name_unlabeled, width = plotW, height = plotH)
print(plot.tsne_unlabeled)
dev.off()
pdf(umap.file.name_unlabeled, width = plotW, height = plotH)
print(plot.umap_unlabeled)
dev.off()
pdf(fdg.file.name_unlabeled, width = plotW, height = plotH)
print(plot.fdg_unlabeled)
dev.off()
# run diffusion map
if(runDiffusionMap){
df = as.data.frame(dm[, 1:3])
df$Labels = factor(seurat.obj@ident, levels = cell.labels)
df$Colours = mapvalues(x = df$Labels, from = cell.labels, to = cell.colours)
dm.file.name = file.path(output_folder_material, paste(paste("dm_data", caty, sep="_"), "csv", sep = "."))
write.csv(df, dm.file.name, row.names = F)
dm.file.name = file.path(output_folder, paste(paste("DiffusionMap_3D", caty, sep = "_"), "html", sep = "."))
make_3D_interactive_page(data_frame_3D=df, tool_addr=tool_addr, python.addr=python.addr, save.to=dm.file.name)
}
if(runAGA){
if(runDiffusionMap==F){
print("Writing .mtx file for AGA map ...")
writeMM(raw_data, file.path(output_folder_material, "raw_data.mtx"))
}
print("running AGA ...")
AGA_folder = file.path(output_folder, "AGA_folder")
dir.create(AGA_folder)
# write cell labels to disk
write.csv(data.frame(Cells = names(seurat.obj@ident), Labels = seurat.obj@ident), file.path(output_folder_material, "cell_labels.csv"), row.names = F)
# running AGA
command =file.path(tool_addr, "AGA/AGA_from_Seurat.py")
command = paste(paste(python.addr, command, sep = " "), output_folder, sep = " ")
command = paste(command, caty, sep =" ")
system(command, wait = T)
# read the AGA output
coordinates = read.csv(file.path(AGA_folder, "coordinates.csv"), row.names = 1)
connectivities = read.csv(file.path(AGA_folder, "connectivities.csv"), row.names = 1)
# plot AGA
coordinates = coordinates[cell.labels, ]
coordinates$Colours = cell.colours
label.order = match(cell.labels, rownames(connectivities))
connectivities = connectivities[label.order, label.order]
plot.obj = ggplot(data=coordinates, aes(x = X, y = Y))
plot.obj = plot.obj + theme_void() + theme(legend.position="none")
xi = c(); xf = c(); yi = c(); yf = c(); vs = c();
for(i in 1:dim(connectivities)[1]){
for(j in i:dim(connectivities)[2]){
v = connectivities[i, j]
if(v > 0){
xi = c(xi, coordinates$X[i])
xf = c(xf, coordinates$X[j])
yi = c(yi, coordinates$Y[i])
yf = c(yf, coordinates$Y[j])
vs = c(vs, v)
}
}
}
lineDF = data.frame(Xi = xi, Yi = yi, Xf = xf, Yf = yf, Vs = vs)
plot.obj = plot.obj + geom_segment(data = lineDF, aes(x = Xi, y = Yi, xend = Xf, yend = Yf), color = "black", size = 3 * lineDF$Vs)
plot.obj = plot.obj + geom_point(size = 2 * log(coordinates$Size), color = coordinates$Colours)
plot.obj = plot.obj + annotate("text", x=coordinates$X, y=coordinates$Y, label = 1:dim(coordinates)[1])
pdf(AGA.file.name, width = 10, height = 10)
print(plot.obj)
dev.off()
######## now make the interactive AGA app
#########################################
print("Making the AGA app ... ")
# save colours
colours.df = data.frame(CellTypes = cell.labels, Colours = cell.colours)
write.csv(colours.df, file.path(AGA_folder, "colours.csv"), row.names = F)
# run python to built the AGA app
command = sprintf("%s make_AGA_app.py %s %s", python.addr, output_folder, caty)
system(command, wait = T)
}
}
# cleaning garbage folders
unlink(output_folder_material, recursive=T, force=T)
if (runAGA){
unlink(AGA_folder, recursive=T, force=T)
unlink(file.path(output_folder, 'figures'), recursive=T, force=T)
}
print("Ended beautifully ... ")

View file

@ -0,0 +1,16 @@
#!/bin/bash
#$ -cwd
#$ -N plot_dr
#$ -V
#$ -l h_rt=47:59:59
#$ -l h_vmem=200G
if [ "$#" -ne 1 ]; then
echo "Illegal number of parameters"
exit 1
fi
Rscript plot_dr_numerical.R $1
echo "End on `date`"

View file

@ -0,0 +1,215 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Interactive linkage plot</title>
<meta name="description" content="An interactive plot for the linkage map">
<meta name="author" content="Dorin-Mirel Popescu">
</head>
<body>
<ul>
<li>Bubble size reflects population size; Edge thickness reflects connectivity scores;</li>
<li>Use the sliders to set plotting parameters;</li>
<li>Click the canvas area to select a cell population and reposition it by dragging;</li>
<li>Plot can be saved by right click on canvas area and choose 'Save as'; For higher image resolution increase canvas area, font size and scales before saving;</li>
</ul>
<table>
<tr>
<td>Canvas width</td><td>Canvas height</td><td>Size scale</td><td>Edge scale</td><td>Edge threshold</td><td>Font size</td>
<tr>
<td><input type = 'range' min = '100' max = '3000' value = '500' onchange = 'setWidth(this.value)' /></td>
<td><input type = 'range' min = '100' max = '3000' value = '500' onchange = 'setHeight(this.value)' /></td>
<td><input type = 'range' min = '0' max = '300' value = '10' onchange = 'setSizeScale(this.value)' /></td>
<td><input type = 'range' min = '.1' max = '15' step = '.1' value = '5' onchange = 'setEdgeScale(this.value)' /></td>
<td><input type = 'range' min = '0' max = '1' step = '.001' value = '0' onchange = 'setEdgeThreshold(this.value)'/></td>
<td><input type = 'range' min = '5' max = '80' value = '10' step = '1' onchange = 'setFontSize(this.value)' /></td>
</tr>
</tr>
</table>
<canvas id = 'canvas' width = '500' height = '500'></canvas>
<script type = 'text/javascript'>
// global parameters
var canvas = document.getElementById('canvas'),
canvasW = 500,
canvasH = 500,
sizeScale = .1,
edgeScale = 5,
edgeT = 0,
fontSize = 10,
context = canvas.getContext('2d'),
mouseX = 0,
mouseY = 0,
currentX = 0,
currentY = 0,
selectedX = 0,
selectedY = 0,
selectedPopulation = null;
// data placeholders
var data_coordinates = [], // for each cell name include x coordinate, y coordinate, and radius values
data_edges = [], // for each cell name include and array of edge values
data_composition = []; // for each cell name include 8 numbers (first 4 for male gender, last 4 for female gender)
// insert data here
// function to set the width of canvas. called from slider
function setWidth(value){
canvasW = parseFloat(value)
canvas.width = canvasW
context = canvas.getContext('2d')
draw()
}
// function to set height of canvas. called from slider
function setHeight(value){
canvasH = parseFloat(value)
canvas.height = canvasH
context = canvas.getContext('2d')
draw()
}
// function to set bubble size scale. called from slider
function setSizeScale(value){
sizeScale = parseFloat(value) / 100
draw()
}
// function to set edge scale. called from slider
function setEdgeScale(value){
edgeScale = parseFloat(value)
draw()
}
// function to set edge theshold. Any edge smaller than this threshold will not be drawn. called from slider
function setEdgeThreshold(value){
edgeT = parseFloat(value)
draw()
}
// function to set font size of cell name labels in the plot. called from slider
function setFontSize(value){
fontSize = parseInt(value)
draw()
}
// function to draw the canvas
function draw(){
// clear canvas by drawing a rectangle
context.fillStyle = '#efefef'
context.fillRect(0, 0, canvas.width, canvas.height)
// loop through all the cell name and draw their coresponding bubble reflect population size and write the label above the bubble
for (key in data_coordinates){
// get bubble parameters
var bubble_data = data_coordinates[key],
bubbleX = canvasW * bubble_data[0],
bubbleY = canvasH * (1 - bubble_data[1]),
bubbleA = sizeScale * bubble_data[2],
bubbleR = Math.sqrt(bubbleA),
bubbleC = bubble_data[3];
// draw edges
var edges = data_edges[key]
context.strokeStyle = '#888888'
edges.forEach(function(edgeVal, i){
if (edgeVal > edgeT){
var connectingCellName = cell_names[i],
connectingBubble = data_coordinates[connectingCellName],
endX = canvasW * connectingBubble[0],
endY = canvasH * (1 - connectingBubble[1])
edgeVal *= edgeScale
context.lineWidth = edgeVal;
context.beginPath()
context.moveTo(bubbleX, bubbleY)
context.lineTo(endX, endY)
context.stroke()
}
})
}
// loop through all values in connectivities and draw corresponding edges if great the edge threshold
for (key in data_coordinates){
// get bubble parameters
var bubble_data = data_coordinates[key],
bubbleX = canvasW * bubble_data[0],
bubbleY = canvasH * (1 - bubble_data[1]),
bubbleA = sizeScale * bubble_data[2],
bubbleR = Math.sqrt(bubbleA),
bubbleC = bubble_data[3];
// draw bubble
context.fillStyle = bubbleC
context.beginPath()
context.arc(bubbleX, bubbleY, bubbleR, 0, 2 * Math.PI, false)
context.fill()
// write cell name
context.fillStyle = 'black';
context.font = parseInt(fontSize) + 'px arial'
context.textAlign = 'center'
context.textBaseline = 'Alphabetical'
context.fillText(key, bubbleX, bubbleY - bubbleR - 2)
}
}
// function that takes an event as input and return x, y values of mouse cursor
function getEventCoordinates(event){
var canvasRect = canvas.getBoundingClientRect(),
X = event.clientX - canvasRect.x,
Y = event.clientY - canvasRect.y;
return [X, Y]
}
// function that stops dragging of selected cell name
function stopDraging(event){
// first draw the data point at dropping location
dragDataPoint(event)
// remove dragDataPoint from canvas event listeners
canvas.removeEventListener('mousemove', dragDataPoint)
// remove stopDraging from canvas event listeners
canvas.removeEventListener('mouseup', stopDraging)
}
// function that drags a selected bubble to follow the movement of the cursor
function dragDataPoint(event){
var XY = getEventCoordinates(event)
currentX = XY[0];
currentY = XY[1];
var dx = (mouseX - currentX) / canvasW,
dy = (mouseY - currentY) / canvasH;
// reset coordinates of selected data point
data_coordinates[selectedPopulation][0] = selectedX - dx;
data_coordinates[selectedPopulation][1] = selectedY + dy;
// then draw
draw()
}
// draw the canvas and add the event listeners only when the entire document is loaded
window.onload = function(){
draw()
canvas.addEventListener('mousedown', function(event){
var XY = getEventCoordinates(event),
hit = false;
mouseX = XY[0];
mouseY = XY[1];
// loop through all the data poins and check for hit
for (key in data_coordinates){
var bubble_data = data_coordinates[key],
bubbleX = canvasW * bubble_data[0],
bubbleY = canvasH * (1 - bubble_data[1]),
bubbleA = sizeScale * bubble_data[2],
bubbleR = Math.sqrt(bubbleA),
dx = mouseX - bubbleX,
dy = mouseY - bubbleY,
distance = Math.sqrt(Math.pow(dx, 2) + Math.pow(dy, 2))
if (distance < bubbleR){
hit = true;
selectedPopulation = key;
selectedX = data_coordinates[selectedPopulation][0]
selectedY = data_coordinates[selectedPopulation][1]
}
}
if (hit){
canvas.addEventListener('mousemove', dragDataPoint)
canvas.addEventListener('mouseup', stopDraging)
}else{selectedPopulation = null}
})
}
</script>
</body>
</html>