scRNA-seq_analysis

This commit is contained in:
veghp 2019-07-08 12:22:01 +01:00
commit 82cc2d191e
188 changed files with 146184 additions and 0 deletions

View file

@ -0,0 +1,83 @@
from pptx import Presentation
from pptx.util import Inches, Pt
from os import listdir
from os.path import isfile
import pandas as pd
if isfile("./graphs/previous_clusters.csv"):
previousAnnotation = True
previousAnnData = pd.read_csv("./graphs/previous_clusters.csv", index_col = 0 )
else:
previousAnnotation = False
# count the number of clusters by counting the number of images begin with "clusters"
clusters = len([tsne_img for tsne_img in listdir("./graphs") if tsne_img[0:7] == "cluster"])
# initiate presentation
prs = Presentation()
prs = Presentation()
black_slide_layout = prs.slide_layouts[6]
# add tsne and umap plots by cluster on first slide
img_path = "./graphs/dr.png"
slide = prs.slides.add_slide(black_slide_layout)
left = Inches(.1)
top = Inches(.5)
height = Inches(5.7)
pic = slide.shapes.add_picture(img_path, left, top, height=height)
# add tsne and umap plots by sample on second slide
img_path = "./graphs/dr_sample.png"
slide = prs.slides.add_slide(black_slide_layout)
left = Inches(.1)
top = Inches(.5)
height = Inches(5.7)
pic = slide.shapes.add_picture(img_path, left, top, height=height)
# for each cluter counted import dr plot and tally plots and insert them on a slide
for cluster in range(clusters):
# insert dr plot
img_path = "./graphs/cluster_dr_{cluster}.png".format(cluster = cluster)
slide = prs.slides.add_slide(black_slide_layout)
left = Inches(7)
top = Inches(.7)
height = Inches(6.7)
pic = slide.shapes.add_picture(img_path, left, top, height=height)
# insert tally plot
img_path = "./graphs/tally_{cluster}.png".format(cluster = cluster)
left = Inches(2.2)
top = Inches(0)
height = Inches(.77)
pic = slide.shapes.add_picture(img_path, left, top, height=height)
# insert the text
left = top = Inches(0)
width = Inches(6)
height = Inches(3)
txtBox = slide.shapes.add_textbox(left, top, width, height)
tf = txtBox.text_frame
tf.clear()
p = tf.paragraphs[0]
p.text = "Cluster {cluster}: ...".format(cluster = int(cluster))
p.font.bold = True
p.font.size = Pt(24)
p = tf.add_paragraph()
p.text = "Defining markers:"
p.font.size = Pt(12)
p = tf.add_paragraph()
p.text = "..."; p.level = 1
p.font.size = Pt(10)
p = tf.add_paragraph(); p.text = "Indentity: ..."; p.font.bold = True;
p.font.size = Pt(12)
p = tf.add_paragraph(); p.text = "Justification: ..."; p.font.bold = True;
p.font.size = Pt(12)
if previousAnnotation:
left = Inches(4); top = Inches(1); width = Inches(2); height = Inches(3);
txtBox = slide.shapes.add_textbox(left, top, width, height)
tf = txtBox.text_frame; tf.clear(); p = tf.paragraphs[0];
p.text = "\n".join(previousAnnData.loc[cluster].values[0].split("; "))
p.font.size = Pt(12)
prs.save('annotation_template.pptx')

Binary file not shown.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,106 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 7 11:42:51 2018
@author: doru
"""
import sys
args = sys.argv
save_to = args[1]
expression_data_fname = args[2]
no_of_categories = int(args[3])
import pandas as pd
import numpy as np
data = pd.read_csv(expression_data_fname, index_col = None)
# convert Colours to r, g, b values, then to floats < 1.0
def hexdec_to_1floats(hexdec):
return np.array([int(hexdec[1:][i:(i+2)], 16) for i in (0, 2, 4)]) / 255.0
gene_names = [gene_name for gene_name in data.columns[(2 + 2 * no_of_categories):]]
raw_expression = data.values[:, (2 + 2 * no_of_categories):]
gene_options = []
gene_expression_colour_coded = []
max_expression = raw_expression.max(axis = 1)
raw_expression / max_expression.reshape(max_expression.shape[0], 1)
max_expression_string = []
for index, gene_name in enumerate(gene_names):
gene_expression = raw_expression[:, index]
gene_expression = [str(value)[:min(4, len(str(value)))] for value in gene_expression]
gene_expression = ",".join(gene_expression)
gene_expression_colour_coded.append("gene_expression['{gn}'] = [{ge}]".format(gn = gene_name, ge = gene_expression))
gene_options.append("<option value='{gn}'>{gn}</option>".format(gn = gene_name))
max_expression_string.append("max_expression['{gene}'] = {val}".format(gene = gene_name, val = max_expression[index]))
gene_options = "".join(gene_options)
gene_expression_colour_coded = ";".join(gene_expression_colour_coded)
max_expression_string = ";".join(max_expression_string)
# make coordinates data string
coordinates = data.values[:, 0:2].astype('float32')
# next few steps are compressing the data into a stadard cube centered at (0,0,0) and L = 200
Xrange = np.percentile(coordinates[:, 0], q = [1, 98]) * 1.2
Yrange = np.percentile(coordinates[:, 1], q = [1, 98]) * 1.2
center = np.array((np.mean(Xrange), np.mean(Yrange)))
coordinates = coordinates - np.tile(center, (coordinates.shape[0], 1))
ratio = max(np.abs(np.percentile(coordinates[:, 0], q = [1, 98]) * 1.2))
ratio = max(ratio, max(np.abs(np.percentile(coordinates[:, 1], q = [1, 98]) * 1.2)))
ratio = 1.0 / ratio
coordinates = coordinates * ratio
coordinates = ",".join([str(value)[:min(6, len(str(value)))] for value in coordinates.ravel()])
categories = [str(value).replace(".", " ") for value in data.columns[2:(2 + no_of_categories)]]
categories_options = ["<option value='{cat}'>{cat}</option>".format(cat=cat) for cat in categories]
categories_options = "".join(categories_options)
categories_colours = []
categories_indices = []
for cat_index in range(no_of_categories):
category_name = data.columns[2 + cat_index]
category_name = category_name.replace(".", " ")
category_colours = [hexdec_to_1floats(colour) for colour in data.values[:, 2 + cat_index + no_of_categories]]
category_colours = [",".join([str(value)[:min(4, len(str(value)))] for value in colour]) for colour in category_colours]
category_colours = ",".join(category_colours)
categories_colours.append("categories_colours['{cn}'] = [{cc}]".format(cn = category_name, cc = category_colours))
types = [value for value in np.unique(data.values[:, 2 + cat_index])]
cat_indices = []
categories_indices.append("categories_indices['{cn}'] = []".format(cn = category_name))
for t_name in types:
indices = data.values[:, 2 + cat_index] == t_name
indices = np.where(indices)[0]
indices = ",".join([str(value) for value in indices])
cat_indices.append("categories_indices['{cn}']['{tn}'] = [{ind}]".format(cn = category_name, tn = t_name, ind = indices))
cat_indices = "\n".join(cat_indices)
categories_indices.append(cat_indices)
categories_indices = "\n".join(categories_indices)
categories_colours = "\n".join(categories_colours)
gene_families_file = open("./gene_families.txt", "r")
gene_families = gene_families_file.read()
gene_families_file.close()
geneFams = [fam.split("=")[0] for fam in gene_families.split("\n") if fam != ""]
geneFams = [fam.split("\'")[1] for fam in geneFams]
geneFams = ["<option value='{cat}'>{cat}</option>".format(cat=cat) for cat in geneFams]
geneFams = "".join(geneFams)
f = open('template.html', "r")
template_str = f.read()
f.close()
template_str = template_str.replace('gene_options_here', gene_options)
template_str = template_str.replace('gene_expression_colour_coded', gene_expression_colour_coded)
template_str = template_str.replace('coordinates_data_here', coordinates)
template_str = template_str.replace('category_options_here', categories_options)
template_str = template_str.replace('categories_colours_data_here', categories_colours)
template_str = template_str.replace('categories_indices_data_here', categories_indices)
template_str = template_str.replace('gene_families_options_here', gene_families)
template_str = template_str.replace('feature_family_option_here', geneFams)
template_str = template_str.replace('max_expression_here', max_expression_string)
with open(save_to, 'w') as result:
result.write(template_str)

View file

@ -0,0 +1,339 @@
args = commandArgs(trailingOnly=T)
args = paste(args, collapse = "")
args = unlist(strsplit(args, ";"))
arguments.list = "
seurat.addr.arg = args[1]
clustering.res.arg = args[2]
DE.downsample.arg = args[3]
sample.arg = args[4]
"
expected_arguments = unlist(strsplit(arguments.list, "\n"))
expected_arguments = expected_arguments[!(expected_arguments == "")]
if(length(args) != length(expected_arguments)){
error.msg = sprintf('This pipeline requires %s parameters', as.character(length(expected_arguments)))
expected_arguments = paste(unlist(lapply(strsplit(expected_arguments, ".arg"), "[", 1)), collapse = "\n")
stop(sprintf('This pipeline requires %s parameters: '))
}
eval(parse(text = arguments.list))
for(n in 1:length(expected_arguments)){
argument = expected_arguments[n]
argument = gsub(pattern=" ", replacement="", x=argument)
argument.name = unlist(strsplit(argument, "="))[1]
variable.name = gsub(pattern=".arg", replacement="", argument.name)
argument.content = eval(parse(text = argument.name))
eval(parse(text = argument.content))
if (!exists(variable.name)){
stop(sprintf("Argument %s not passed. Stopping ... ", variable.name))
}
}
# create required folders for output and work material
output_folder = gsub(pattern="^\\d+_", replacement="", x=basename(getwd()))
output_folder = paste(output_folder, seurat.addr, sep = "_")
c.time = Sys.time()
c.time = gsub(pattern=" BST", replacement="", x=c.time)
c.time = gsub(pattern=":", replacement="", x=c.time)
c.time = gsub(pattern=" ", replacement="", x=c.time)
c.time = gsub(pattern="-", replacement="", x=c.time)
c.time = substr(x=c.time, start=3, stop=nchar(c.time))
output_folder = paste(output_folder, c.time, sep = "_")
output_folder = file.path("../../output", output_folder)
dir.create(output_folder)
output_folder_material = file.path(output_folder, "material")
dir.create(output_folder_material)
seurat.addr = file.path("../../data", seurat.addr)
source("../../tools/bunddle_utils.R")
library(Seurat)
library(plyr)
library(dplyr)
library(reshape2)
library(RColorBrewer)
library(gridExtra)
library(grid)
library(BiocParallel)
dr.plot.indexed.clusters <- function(point.labels, dr1, dr2, dr1.name, dr2.name, no.legend = F, plt.lb.sz = 5, txt.lb.size = 3, pt.size = .2, random_state = 2){
df.dr <- data.frame("Cell Labels" = point.labels, DR1 = dr1, DR2 = dr2)
p.labels <- unique(as.vector(point.labels))
p.labels <- as.character(sort(as.numeric(p.labels)))
p.labels.medians <- aggregate(df.dr[, 2:3], list(df.dr$Cell.Labels), median)
set.seed(random_state)
plt.colours <- sample(colorRampPalette(brewer.pal(12, "Paired"))(length(p.labels)))
index.map <- p.labels
plot.obj <- ggplot(data = df.dr, aes(x = DR1, y = DR2, color = Cell.Labels))
plot.obj <- plot.obj + geom_point(size = pt.size)
plot.obj <- plot.obj + scale_color_manual(values=plt.colours)
plot.obj <- plot.obj + stat_density2d(geom="density2d", aes(x=DR1, y=DR2,alpha=5/10), size=.2, contour=TRUE,bins=7,h=1.5)
plot.obj <- plot.obj + geom_point(data=p.labels.medians,aes(x = DR1, y = DR2), colour = "gray", size = plt.lb.sz, fill = plt.colours, alpha = .5, pch = 21)
plot.obj <- plot.obj + annotate("text", x=p.labels.medians$DR1, y = p.labels.medians$DR2, label = as.vector(p.labels.medians$Group.1), size = txt.lb.size)
if (no.legend){
plot.obj <- plot.obj + theme(legend.position="none")
}else{
plot.obj <- plot.obj + guides(color = guide_legend(override.aes = list(size=5)))
}
plot.obj <- plot.obj + xlab(dr1.name) + ylab(dr2.name)
return(plot.obj)
}
dr.plot <- function(point.labels, dr1, dr2, dr1.name, dr2.name, no.legend = F, plt.lb.sz = 5, txt.lb.size = 3, pt.size = .2, random_state = 2, use.cols = FALSE, index.map = c()){
df.dr <- data.frame("Cell Labels" = point.labels, DR1 = dr1, DR2 = dr2)
p.labels <- sort(unique(as.vector(point.labels)))
df.dr$Cell.Labels <- factor(df.dr$Cell.Labels, levels=p.labels)
p.labels.medians <- aggregate(df.dr[, 2:3], list(df.dr$Cell.Labels), median)
df.dr$Cell.Labels <- mapvalues(x = df.dr$Cell.Labels, from = p.labels, to = paste(1:length(p.labels), p.labels, sep = " "))
if(is.logical(use.cols)){
set.seed(random_state)
plt.colours <- sample(colorRampPalette(brewer.pal(12, "Paired"))(length(p.labels)))
index.map <- 1:length(p.labels)
}else{
plt.colours <- use.cols
}
plot.obj <- ggplot(data = df.dr, aes(x = DR1, y = DR2, color = Cell.Labels))
plot.obj <- plot.obj + geom_point(size = pt.size)
plot.obj <- plot.obj + scale_color_manual(values=plt.colours)
#plot.obj <- plot.obj + stat_density2d(geom="density2d", aes(x=DR1, y=DR2,alpha=5/10), size=.2, contour=TRUE,bins=7,h=1.5)
plot.obj <- plot.obj + geom_point(data=p.labels.medians,aes(x = DR1, y = DR2), colour = "gray", size = plt.lb.sz, fill = "gray", alpha = .5, pch = 21)
plot.obj <- plot.obj + annotate("text", x=p.labels.medians$DR1, y = p.labels.medians$DR2, label = index.map, size = txt.lb.size)
if (no.legend){
plot.obj <- plot.obj + theme(legend.position="none")
}else{
plot.obj <- plot.obj + guides(color = guide_legend(override.aes = list(size=5)))
}
plot.obj <- plot.obj + xlab(dr1.name) + ylab(dr2.name)
return(plot.obj)
}
dr.plot.group <- function(point.labels, dr1, dr2, dr1.name, dr2.name, group.name, pt.size = .4){
df.dr <- data.frame("Cell Labels" = point.labels, DR1 = dr1, DR2 = dr2)
p.labels <- sort(unique(as.vector(point.labels)))
df.dr$Cell.Labels <- factor(df.dr$Cell.Labels, levels=p.labels)
group.index <- which(p.labels == group.name)
plt.colours <- rep("#bae1ff", length(p.labels))
plt.colours[group.index] <- "#0D7D75"
plot.obj <- ggplot(data = df.dr, aes(x = DR1, y = DR2, color = Cell.Labels))
plot.obj <- plot.obj + geom_point(size = pt.size)
plot.obj <- plot.obj + scale_color_manual(values=plt.colours)
plot.obj <- plot.obj + theme(legend.position="none")
plot.obj <- plot.obj + xlab(dr1.name) + ylab(dr2.name) + ggtitle(group.name)
return(plot.obj)
}
tabulate.seurat.by.cluster <- function(seurat.obj, slot1, slot2, save.at, width, height, saveas.pdf = F){
"used to build tables that show contingency distribution of cells by 2 different labeling criteria"
"these are slot1 and slot2 which should be in the meta.data slot of the seurat object"
for (i in 1:length(levels(seurat.obj@ident))){
cluster = levels(seurat.obj@ident)[i]
cells.cluster <- colnames(seurat.obj@data)[seurat.obj@ident == cluster]
cells.indices <- match(cells.cluster, colnames(seurat.obj@data))
base.com <- paste(substitute(seurat.obj), "meta.data", sep = "@")
com1 <- paste(base.com, slot1, sep = "$")
com1 <- sprintf("%s[cells.indices]", com1)
com2 <- paste(base.com, slot2, sep = "$")
com2 <- sprintf("%s[cells.indices]", com2)
command <- sprintf("tally <- table(%s, %s)", com1, com2)
eval(parse(text = command))
command <- sprintf("tally.rez <- cbind(tally, `Total by %s` = rowSums(tally))", slot1)
eval(parse(text = command))
command <- sprintf("tally.rez <- rbind(tally.rez, `Total by %s` = c(colSums(tally), length(cells.cluster)))", slot2)
eval(parse(text = command))
print(tally.rez)
if (saveas.pdf){
filename <- paste(paste("tally_", cluster, sep = ""), ".pdf", sep = "")
filename <- file.path(save.at, filename)
pdf(filename, width = width, height = height)
grid.table(tally.rez)
dev.off()
}else{
filename <- paste(paste("tally_", cluster, sep = ""), ".png", sep = "")
filename <- file.path(save.at, filename)
png(filename, width = width, height = height)
grid.table(tally.rez)
dev.off()
}
}
}
FindMarker.wrapper <- function(markers.for){
if (DE.downsample){
markers = FindMarkers(seurat.obj_d, ident.1=markers.for, only.pos = F, min.pct=0.25, genes.use=rownames(seurat.obj_d@data),
thresh.use = 0.25, test.use = "wilcox", random.seed = 42, print.bar=T, do.print=T)
}else{
markers = FindMarkers(seurat.obj, ident.1=markers.for, only.pos = F, min.pct=0.25, genes.use=rownames(seurat.obj@data),
thresh.use = 0.25, test.use = "wilcox", random.seed = 42, print.bar=T, do.print=T)
}
markers$cluster = markers.for
markers$gene = rownames(markers)
markers
}
# load data
print("loading data ... ")
seurat.obj = readRDS(seurat.addr)
print("Data loaded.")
# process the data (normalize - scale - variable genes - pca - tsne)
print("Normalizing data ... ")
seurat.obj <- NormalizeData(object = seurat.obj, normalization.method = "LogNormalize", scale.factor = 10000)
print("Computing variable genes ... ")
# find all clusters
print("Clustering data ... ")
seurat.obj <- FindClusters(object = seurat.obj, reduction.type = "pca",
dims.use = 1:20, resolution = clustering.res, save.SNN = T, algorithm=1)
print(paste("Number of clusters: ", print(length(levels(seurat.obj@ident))), sep = ""))
seurat.obj@meta.data$LouvainClustering = as.vector(seurat.obj@ident)
print(table(seurat.obj@meta.data$LouvainClustering))
print("Saving Seurat object")
saveRDS(seurat.obj, seurat.addr)
print('Seurat object saved')
# writing marker genes to disk
if (DE.downsample){
cluster.ids <-unique(as.vector(seurat.obj@ident))
cells.to.keep <- c()
for (k in 1:length(cluster.ids)){
cluster.id <- cluster.ids[k]
cell.ids <- names(seurat.obj@ident)[seurat.obj@ident == cluster.id]
cell.ids <- which(names(seurat.obj@ident) %in% cell.ids )
cells.to.keep <- c(sample(x=cell.ids, size=min(300, length(cell.ids)), replace=F), cells.to.keep)
}
seurat.obj_d <- SubsetData(object=seurat.obj, cells.use=names(seurat.obj@ident)[cells.to.keep])
seurat.obj_d <- NormalizeData(object = seurat.obj_d, normalization.method = "LogNormalize", scale.factor = 10000)
print("Calculating marker genes: finished subseting, currently actually calculating the markers ... ")
Markers <- bplapply(sort(as.vector(unique(seurat.obj_d@meta.data$LouvainClustering))), FindMarker.wrapper,BPPARAM=MulticoreParam(5))
}else{
print("Calculating marker genes: finished subseting, currently actually calculating the markers ... ")
Markers <- bplapply(sort(as.vector(unique(seurat.obj@meta.data$LouvainClustering))), FindMarker.wrapper,BPPARAM=MulticoreParam(5))
}
marker.genes = Reduce(f=rbind, x=Markers)
print("Saving marker genes ... ")
write.csv(marker.genes, file.path(output_folder, "all_markers.csv"))
print('Creating and saving to disk annotation marker genes')
gene_db = read.csv('./gene_info.csv')
rownames(gene_db) = as.vector(gene_db$gene.symbol)
marker.genes.top = marker.genes %>% group_by(cluster) %>% top_n(50, avg_logFC)
gene_to_pop = read.csv("./gene_to_pop.tsv", sep = '\t', header = F)
colnames(gene_to_pop) = c('Gene', 'Population')
marker.genes.unique = unique(as.vector(marker.genes.top$gene))
gene_info = gene_db[marker.genes.unique, ]
# get gene name
gene.name = mapvalues(x=as.vector(marker.genes.top$gene), from=as.vector(gene_info$gene.symbol),
to=as.vector(gene_info$gene.name))
marker.genes.top = cbind(as.data.frame(marker.genes.top), data.frame(GeneName = gene.name))
# get also present in
also_present_in = function(gene.sym){
part = as.vector(marker.genes.top[as.vector(marker.genes.top$gene) == gene.sym, ]$cluster)
if (length(part) > 1){
return(paste(part, collapse = ', '))
}else{
return('')
}
}
present_in = unlist(lapply(as.list(marker.genes.unique), also_present_in))
present_in = mapvalues(x=as.vector(marker.genes.top$gene), from=as.vector(marker.genes.unique), to=as.vector(present_in))
marker.genes.top = cbind(marker.genes.top, data.frame(AlsoPresentInClusters = present_in))
# get cell type flag
cell_type_flag = c()
for(i in 1:dim(marker.genes.top)[1]){
gene.sym = as.vector(marker.genes.top$gene)[i]
pops = as.vector(gene_to_pop$Population)[as.vector(gene_to_pop$Gene) == gene.sym]
if(length(pops) == 0){
pops = ''
}
cell_type_flag = c(cell_type_flag, pops)
}
marker.genes.top = cbind(marker.genes.top, data.frame(CellTypeFlag = cell_type_flag))
# get gene summary
gene.summary = mapvalues(x=as.vector(marker.genes.top$gene), from=as.vector(gene_info$gene.symbol),
to=as.vector(gene_info$gene.summary))
marker.genes.top = cbind(as.data.frame(marker.genes.top), data.frame(Summary = gene.summary))
# get reactom
reactome.pathway = mapvalues(x=as.vector(marker.genes.top$gene), from=as.vector(gene_info$gene.symbol),
to=as.vector(gene_info$reactome.pathway))
marker.genes.top = cbind(as.data.frame(marker.genes.top), data.frame(Reactom = reactome.pathway))
# get gene family
gene.family = mapvalues(x=as.vector(marker.genes.top$gene), from=as.vector(gene_info$gene.symbol),
to=as.vector(gene_info$gene.family))
marker.genes.top = cbind(as.data.frame(marker.genes.top), data.frame(GeneFamily = gene.family))
write.csv(marker.genes.top, file.path(output_folder, "annotation_markers.csv"))
update.template <- data.frame(Cluster = sort(as.vector(unique(seurat.obj@ident))), Identity = rep("None", length(unique(seurat.obj@ident))))
write.csv(update.template, file.path(output_folder, "update_template.csv"), row.names = F)
print("compiling the template")
df <- data.frame(CellNames = names(seurat.obj@ident),
ClusterIndex = as.vector(seurat.obj@ident),
tSNEx = seurat.obj@dr$tsne@cell.embeddings[, 1],
tSNEy = seurat.obj@dr$tsne@cell.embeddings[, 2],
UMAPx = seurat.obj@dr$umap@cell.embeddings[, 1],
UMAPy = seurat.obj@dr$umap@cell.embeddings[, 2],
Sample = seurat.obj@meta.data[, sample])
# transfer the compile.py file to the output
file.copy(from='compile_template.py', to=file.path(output_folder, 'compile_template.py'))
CURDIR = getwd()
setwd(output_folder)
dir.create("graphs")
# make tsne and umap plots by clusters
plot.tsne <- dr.plot.indexed.clusters(point.labels=df$ClusterIndex, dr1=df$tSNEx, dr2=df$tSNEy, dr1.name="tSNE-x", dr2.name="tSNE-y", no.legend = T, plt.lb.sz = 5, txt.lb.size = 3, pt.size = .2, random_state = 2)
plot.umap <- dr.plot.indexed.clusters(point.labels=df$ClusterIndex, dr1=df$UMAPx, dr2=df$UMAPy, dr1.name="UMAP-x", dr2.name="UMAP-y", no.legend = T, plt.lb.sz = 5, txt.lb.size = 3, pt.size = .2, random_state = 2)
png("./graphs/dr.png", width = 1200, height = 700)
plot_grid(plot.tsne, plot.umap)
dev.off()
# make tsne and umap plots by sample
plot.tsne <- dr.plot(point.labels=df$Sample, dr1=df$tSNEx, dr2=df$tSNEy, dr1.name="tSNE-x", dr2.name="tSNE-y", no.legend = F, plt.lb.sz = 5, txt.lb.size = 3, pt.size = 1)
plot.umap <- dr.plot(point.labels=df$Sample, dr1=df$UMAPx, dr2=df$UMAPy, dr1.name="UMAP-x", dr2.name="UMAP-y", no.legend = F, plt.lb.sz = 5, txt.lb.size = 3, pt.size = 1)
png("./graphs/dr_sample.png", width = 1200, height = 700)
plot_grid(plot.tsne, plot.umap)
dev.off()
# create cell tally plots, tsne plots and umap plots
no.clusters <- length(levels(seurat.obj@ident))
for (i in 1:no.clusters){
cluster.name <- levels(seurat.obj@ident)[i]
plot.tsne <- dr.plot.group(point.labels=df$ClusterIndex, dr1=df$tSNEx, dr2=df$tSNEy, dr1.name="tSNE1", dr2.name="tSNE2", group.name=cluster.name, pt.size = .4)
plot.umap <- dr.plot.group(point.labels=df$ClusterIndex, dr1=df$UMAPx, dr2=df$UMAPy, dr1.name="UMAP1", dr2.name="UMAP2", group.name=cluster.name, pt.size = .4)
graph.addr <- paste(paste("cluster_dr_", cluster.name, sep = ""), ".png", sep = "")
graph.addr <- file.path("graphs", graph.addr)
png(graph.addr, width = 400, height = 900)
print(plot_grid(plot.tsne, plot.umap, nrow = 2))
dev.off()
print(cluster.name)
}
# plot cell numbers by sample and gate for all the clusters ("sort.ids", "fetal.ids")
tabulate.seurat.by.cluster(seurat.obj, "tissue", "tissue", save.at="./graphs", width=1110, height=110, saveas.pdf = F)
# compile template annotation powerpoint
system(paste(python.addr, "compile_template.py", sep = " "), wait = T)
setwd(CURDIR)
print('Finished.')

View file

@ -0,0 +1,17 @@
#!/bin/bash
#$ -cwd
#$ -N make_annotation_template
#$ -V
#$ -l h_rt=47:59:59
#$ -l h_vmem=200G
#$ -pe smp 6
if [ "$#" -ne 1 ]; then
echo "Illegal number of parameters"
exit 1
fi
Rscript make_annotation_template.R $1
echo "End on `date`"

View file

@ -0,0 +1,577 @@
<!doctype html>
<html lang='en'>
<head>
<meta charset='utf-8'>
<title>3D viewer</title>
<meta name='description' content='The HTML5 Herald'>
<meta name='author' content='Dorin-Mirel Popescu'>
</head>
<body>
<table>
<tr>
<td align='left'>
<form>
<fieldset>
<legend><b>Visualisation options</b></legend>
<label for = 'particleSizeBar'>Particle size: </label>
<input type='range' name = 'particleSizeBar' min = 1 max = 14 step=0.1 oninput='setParticleSize(value)' value = 2 /><br />
<label for = 'alphaInput'>Transparency: </label>
<input type='range' name = 'alphaInput' min = 0 max = 1000 oninput='setAlpha(value)' value = 1000 /><br />
<label for = 'canvasSizeInput'>Canvas size: </label>
<input type='range' name = 'canvasSizeInput' min = 200 max = 2000 oninput='setCanvasSize(value)' value = 500 /><br />
<label for = 'bgInput'>Dark background: </label>
<input type='radio' name = 'bgInput' oninput='setBackground(value)' value = 'dark' />
<label for = 'bgInput'>White background: </label>
<input type='radio' name = 'bgInput' oninput='setBackground(value)' value = 'white' checked />
<br />
</fieldset>
</form>
</td>
<td style='vertical-align: top' rowspan='2'>
<form>
<fieldset>
<legend><b>Colour by:</b></legend>
<table>
<tr>
<td>
Choose gene family:
</td>
<td>
<label for='familyGeneSelector'><select name='familyGeneSelector' id='familyGeneSelector' onchange='selectFeatureFamily()'>feature_family_option_here</select></label>
</td>
</tr>
<tr>
<td>
<label for='colourType'><input type='radio' name='colourType' onchange='setColourBy(value)' value='gene_expression' />Gene expression: </label>
</td>
<td>
<label for='geneSelector'><select name='geneSelector' id='geneSelector' onchange='selectFeature()'>gene_options_here</select></label>
</td>
</tr>
<tr>
<td colspan = '2' align='center'>
<canvas id='canvasColorScale' width = 200 height=40></canvas>
</td>
</tr>
<tr>
<td>
<label for='colourType'><input type='radio' name='colourType' checked onchange='setColourBy(value)' value='category' />Category:</label>
</td>
<td>
<label for='categorySelector'><select name='categorySelector' id='categorySelector' onchange = 'setCategory()'>category_options_here</select></label>
</td>
</tr>
</table>
</fieldset>
</form>
<br />
<div>
<fieldset>
<legend><b>Cell types:</b></legend>
<label for='toggleRadio'><input type='checkbox' name = 'toggleRadio' id='toggleRadio' onchange='toggleAllTypes()' checked />Show all:</label>
<form id = 'typesControlPanel'>
</form>
</fieldset>
</div>
</td>
</tr>
<tr>
<td style='vertical-align: text-top' >
<canvas id='canvas' width=600 height=600></canvas>
</td>
</tr>
</table>
<script id='vertex-shader' type='x-shader/x-fragment'>
attribute vec4 a_Position;
attribute vec3 a_Color;
uniform float u_basePointSize;
uniform float u_Alpha;
uniform int u_PaintFeatureScale;
varying vec4 v_Color;
void main() {
gl_Position = a_Position;
gl_PointSize = u_basePointSize;
if (u_PaintFeatureScale == 0){
v_Color = vec4(a_Color, u_Alpha);
}
else{
float r = 0.0;
float g = 0.0;
float b = 0.0;
r = max(0.0, 2.0 * a_Color.r - 1.0);
b = max(0.0, 2.0 * (1.0 - a_Color.r) - 1.0);
g = 1.0 - 2.0 * abs(a_Color.r - 0.5);
v_Color = vec4(r, g, b, u_Alpha);
}
}
</script>
<script id ='fragment-shader' type='x-shader/x-fragment'>
precision mediump float;
varying vec4 v_Color;
void main() {
float r = 0.0;
vec2 cxy = 2.0 * gl_PointCoord - 1.0;
r = dot(cxy, cxy);
if (r > 1.0){
discard;
}
gl_FragColor = v_Color;
}
</script>
<script type = 'text/javascript'>
var Matrix4 = function(opt_src) {
var i, s, d;
if (opt_src && typeof opt_src === 'object' && opt_src.hasOwnProperty('elements')) {
s = opt_src.elements;
d = new Float32Array(16);
for (i = 0; i < 16; ++i) {
d[i] = s[i];
}
this.elements = d;
} else {
this.elements = new Float32Array([1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1]);
}
};
Matrix4.prototype.setTranslate = function(x, y, z) {
var e = this.elements;
e[0] = 1; e[4] = 0; e[8] = 0; e[12] = x;
e[1] = 0; e[5] = 1; e[9] = 0; e[13] = y;
e[2] = 0; e[6] = 0; e[10] = 1; e[14] = z;
e[3] = 0; e[7] = 0; e[11] = 0; e[15] = 1;
return this;
};
Matrix4.prototype.setLookAt = function(eyeX, eyeY, eyeZ, centerX, centerY, centerZ, upX, upY, upZ) {
var e, fx, fy, fz, rlf, sx, sy, sz, rls, ux, uy, uz;
fx = centerX - eyeX;
fy = centerY - eyeY;
fz = centerZ - eyeZ;
// Normalize f.
rlf = 1 / Math.sqrt(fx*fx + fy*fy + fz*fz);
fx *= rlf;
fy *= rlf;
fz *= rlf;
// Calculate cross product of f and up.
sx = fy * upZ - fz * upY;
sy = fz * upX - fx * upZ;
sz = fx * upY - fy * upX;
// Normalize s.
rls = 1 / Math.sqrt(sx*sx + sy*sy + sz*sz);
sx *= rls;
sy *= rls;
sz *= rls;
// Calculate cross product of s and f.
ux = sy * fz - sz * fy;
uy = sz * fx - sx * fz;
uz = sx * fy - sy * fx;
// Set to this.
e = this.elements;
e[0] = sx;
e[1] = ux;
e[2] = -fx;
e[3] = 0;
e[4] = sy;
e[5] = uy;
e[6] = -fy;
e[7] = 0;
e[8] = sz;
e[9] = uz;
e[10] = -fz;
e[11] = 0;
e[12] = 0;
e[13] = 0;
e[14] = 0;
e[15] = 1;
// Translate.
return this.translate(-eyeX, -eyeY, -eyeZ);
};
Matrix4.prototype.translate = function(x, y, z) {
var e = this.elements;
e[12] += e[0] * x + e[4] * y + e[8] * z;
e[13] += e[1] * x + e[5] * y + e[9] * z;
e[14] += e[2] * x + e[6] * y + e[10] * z;
e[15] += e[3] * x + e[7] * y + e[11] * z;
return this;
};
Matrix4.prototype.setPerspective = function(fovy, aspect, near, far) {
var e, rd, s, ct;
if (near === far || aspect === 0) {
throw 'null frustum';
}
if (near <= 0) {
throw 'near <= 0';
}
if (far <= 0) {
throw 'far <= 0';
}
fovy = Math.PI * fovy / 180 / 2;
s = Math.sin(fovy);
if (s === 0) {
throw 'null frustum';
}
rd = 1 / (far - near);
ct = Math.cos(fovy) / s;
e = this.elements;
e[0] = ct / aspect;
e[1] = 0;
e[2] = 0;
e[3] = 0;
e[4] = 0;
e[5] = ct;
e[6] = 0;
e[7] = 0;
e[8] = 0;
e[9] = 0;
e[10] = -(far + near) * rd;
e[11] = -1;
e[12] = 0;
e[13] = 0;
e[14] = -2 * near * far * rd;
e[15] = 0;
return this;
};
</script>
<script type='text/javascript'>
function buildCategoryRadioButtons(){
category_type = categorySelector.options[categorySelector.selectedIndex].value;
current_indices = indices_all;
// create radio commands from categories
typesControlPanel.innerHTML = "";
radio_commands_HTML = "";
for(name in categories_indices[category_type]){
f_index = categories_indices[category_type][name][0]
cols = categories_colours[category_type].slice(3 * f_index, 3 * f_index + 3)
col_label = "#";
for(k=0;k<cols.length;k++){col_hex = Math.round(255 * cols[k]).toString(16).padStart(2, '0'); col_label = col_label + col_hex}
radio_command = "<div style='background-color:" + col_label + "'>";
radio_command = radio_command + "<input style='float:left' type='checkbox' id='" + name;
radio_command = radio_command + "' checked onchange='toggleCategoryAction()' /><label style='float:left' for='" + name + "'";
radio_command = radio_command + ">" + name + ": </label><br/></div>"
radio_commands_HTML = radio_commands_HTML + radio_command
}
typesControlPanel.innerHTML = radio_commands_HTML;
}
function toggleCategoryAction(){
updateBuffer()
draw()
}
function setCategory(){
buildCategoryRadioButtons()
updateBuffer()
draw()
}
function setColourBy(value){
colour_by = value;
if (colour_by =='category'){
PaintFeatureScale = 0;
}else{
PaintFeatureScale = 1;
}
gl_context.uniform1i(u_PaintFeatureScale, PaintFeatureScale)
updateBuffer()
draw()
}
function toggleAllTypes(){
controlRadios = typesControlPanel.elements
for(i=0;i<controlRadios.length;i++){
controlRadios[i].checked = toggleRadio.checked
}
updateBuffer()
draw()
}
function selectFeature(){
feature = geneSelector.value
updateBuffer()
draw()
drawScale(max_expression[feature])
console.log('selected features')
}
function draw(){
if(bg_color == "white"){
gl_context.clearColor(1, 1, 1, 1)
}else{
gl_context.clearColor(0, 0, 0, 1)
}
gl_context.clear(gl_context.COLOR_BUFFER_BIT);
gl_context.bufferData(gl_context.ARRAY_BUFFER, buffer_data_array, gl_context.STATIC_DRAW)
gl_context.drawArrays(gl_context.POINTS, 0, n)
}
function updateBuffer(){
var buffer_data = [];
// first update indices to be used - for this read the category control panel radio buttons
controlRadios = typesControlPanel.elements
current_indices = []
for(i=0;i<controlRadios.length;i++){
if(controlRadios[i].checked){
radio_type = controlRadios[i].id
current_indices = current_indices.concat(categories_indices[category_type][radio_type])
}
}
// now just populate the buffer_data
if(colour_by == 'gene_expression'){
current_indices.forEach(function(index, i){
buffer_data.push(coordinates_data[2 * index])
buffer_data.push(coordinates_data[2 * index + 1])
buffer_data.push(gene_expression[feature][index])
buffer_data.push(gene_expression[feature][index])
buffer_data.push(gene_expression[feature][index])
})
}else{
current_indices.forEach(function(index, i){
buffer_data.push(coordinates_data[2 * index])
buffer_data.push(coordinates_data[2 * index + 1])
buffer_data.push(categories_colours[category_type][3 * index])
buffer_data.push(categories_colours[category_type][3 * index + 1])
buffer_data.push(categories_colours[category_type][3 * index + 2])
})
}
buffer_data_array = new Float32Array(buffer_data)
n = buffer_data_array.length / 5
}
function setParticleSize(value){
particleSize = parseInt(value)
gl_context.uniform1f(u_basePointSize, particleSize)
updateBuffer()
draw()
}
function setAlpha(value){
alphaValue = parseInt(value) / 1000
gl_context.uniform1f(u_Alpha, alphaValue)
updateBuffer()
draw()
}
function setCanvasSize(value){
value = parseInt(value)
canvas.width = value
canvas.height = value
gl_context = getContext(canvas)
gl_context = initContext(gl_context)
gl_context.viewport(0, 0, canvas.width, canvas.height)
updateBuffer()
draw()
}
function setBackground(value){
bg_color = value;
draw()
}
function shadersFromScriptElement(gl, ID, type){
shaderScript = document.getElementById(ID)
var str = ''
var k = shaderScript.firstChild;
while(k){
if (k.nodeType == 3){
str += k.textContent;
}
k = k.nextSibling
}
var shader = gl.createShader(type)
gl.shaderSource(shader, str)
gl.compileShader(shader)
return shader
}
function getContext(canvasWidget){
var names = ['webgl', 'experimental-webgl', 'webkit-3d', 'moz-webgl'];
for(var i=0; i<names.length; i++){
try{
var gl = canvasWidget.getContext(names[i])
}catch(e){}
if(gl){i=names.length}
}
var vshader = shadersFromScriptElement(gl, 'vertex-shader', gl.VERTEX_SHADER),
fshader = shadersFromScriptElement(gl, 'fragment-shader', gl.FRAGMENT_SHADER)
program = gl.createProgram();
gl.attachShader(program, vshader)
gl.attachShader(program, fshader)
gl.linkProgram(program)
gl.useProgram(program)
gl.program = program
return gl
}
function initContext(gl){
n = buffer_data_array.length / 5
var vertexColourBuffer = gl.createBuffer()
gl.bindBuffer(gl.ARRAY_BUFFER, vertexColourBuffer)
var FSIZE = buffer_data_array.BYTES_PER_ELEMENT;
var a_Position = gl.getAttribLocation(gl.program, 'a_Position')
gl.vertexAttribPointer(a_Position, 2, gl.FLOAT, false, FSIZE * 5, 0)
gl.enableVertexAttribArray(a_Position)
var a_Color = gl.getAttribLocation(gl.program, 'a_Color')
gl.vertexAttribPointer(a_Color, 3, gl.FLOAT, false, FSIZE * 5, 2 * FSIZE)
gl.enableVertexAttribArray(a_Color)
u_basePointSize = gl.getUniformLocation(gl.program, 'u_basePointSize')
gl.uniform1f(u_basePointSize, particleSize)
u_Alpha = gl.getUniformLocation(gl.program, "u_Alpha")
gl.uniform1f(u_Alpha, alphaValue)
u_PaintFeatureScale = gl.getUniformLocation(gl.program, 'u_PaintFeatureScale')
gl.uniform1i(u_PaintFeatureScale, PaintFeatureScale)
gl.clearColor(1, 1, 1, 1);
if(bg_color == "dark"){
gl.clearColor(0, 0, 0, 1)
}
gl.disable(gl.DEPTH_TEST)
gl.enable(gl.BLEND)
gl.blendFunc(gl.SRC_ALPHA, gl.ONE_MINUS_SRC_ALPHA)
gl.clear(gl.COLOR_BUFFER_BIT);
return gl
}
var categorySelector = document.getElementById('categorySelector'),
geneSelector = document.getElementById('geneSelector'),
typesControlPanel = document.getElementById('typesControlPanel'),
toggleRadio = document.getElementById('toggleRadio'),
familyGeneSelector = document.getElementById("familyGeneSelector")
var canvas = document.getElementById('canvas'),
particleSize = 5,
alphaValue = 1.0,
bg_color = "white",
n = 0,
particleSize = 2,
PaintFeatureScale = 0,
currentMaxExpression = 0;
coordinates_data = [coordinates_data_here]
gene_expression = []; gene_expression_colour_coded;
categories_colours = []
categories_colours_data_here
categories_indices = []
categories_indices_data_here
var gene_families = []
gene_families_options_here
var max_expression=[]
max_expression_here
function selectFeatureFamily(value){
var genes = gene_families[familyGeneSelector.value],
gene_options = "";
for(var i=0;i<genes.length;i++){
console.log(i)
gene_options = gene_options + "<option value='" + genes[i] + "'>" + genes[i] + "</option>";
}
geneSelector.innerHTML = gene_options
selectFeature()
}
// initialize flags
// when toggling between gene expression and category, do not slice data i.e. do not recompute index data
// when choosing a category always re-initiate index data
var colour_by = 'category', // the other options is can be 'category'
category_types = [],
category_type = '',
features = [],
feature = '';
// set category
for(name in categories_colours){category_types.push(name)}
category_type = category_types[0]
// set feature
for(name in gene_expression){features.push(name)}
feature = features[0];
// create global data holders
var indices_all = [],
current_indices = [],
current_colours = [],
buffer_data_array = [];
for(j=0;j<categories_colours[category_type].length/3;j++){indices_all.push(j)}
// build the categories buttons for the first time
buildCategoryRadioButtons()
updateBuffer()
// create the renderer
var gl_context = getContext(canvas);
gl_context = initContext(gl_context)
// now draw
draw()
// draw the scale
var canvasColorScale = document.getElementById('canvasColorScale'),
canvas_ctx = canvasColorScale.getContext('2d'),
scale_gradient = canvas_ctx.createLinearGradient(0, 0, 200, 0);
function drawScale(maxVal){
canvas_ctx.fillStyle = 'white'
canvas_ctx.fillRect(0, 0, canvasColorScale.width, canvasColorScale.height)
canvas_ctx.fillStyle = scale_gradient;
canvas_ctx.fillRect(0, 20, canvasColorScale.width, canvasColorScale.height)
canvas_ctx.fillStyle = 'black'
canvas_ctx.fillText('0', 10, 10)
canvas_ctx.fillText(parseInt(10 * maxVal) / 10, 180, 10)
}
scale_gradient.addColorStop(0, 'blue');
scale_gradient.addColorStop(0.5, 'green');
scale_gradient.addColorStop(1, 'red');
selectFeature()
</script>
</body>
</html>