scRNA-seq_analysis

This commit is contained in:
veghp 2019-07-08 12:22:01 +01:00
commit 82cc2d191e
188 changed files with 146184 additions and 0 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,106 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 7 11:42:51 2018
@author: doru
"""
import sys
args = sys.argv
save_to = args[1]
expression_data_fname = args[2]
no_of_categories = int(args[3])
import pandas as pd
import numpy as np
data = pd.read_csv(expression_data_fname, index_col = None)
# convert Colours to r, g, b values, then to floats < 1.0
def hexdec_to_1floats(hexdec):
return np.array([int(hexdec[1:][i:(i+2)], 16) for i in (0, 2, 4)]) / 255.0
gene_names = [gene_name for gene_name in data.columns[(2 + 2 * no_of_categories):]]
raw_expression = data.values[:, (2 + 2 * no_of_categories):]
gene_options = []
gene_expression_colour_coded = []
max_expression = raw_expression.max(axis = 1)
raw_expression / max_expression.reshape(max_expression.shape[0], 1)
max_expression_string = []
for index, gene_name in enumerate(gene_names):
gene_expression = raw_expression[:, index]
gene_expression = [str(value)[:min(4, len(str(value)))] for value in gene_expression]
gene_expression = ",".join(gene_expression)
gene_expression_colour_coded.append("gene_expression['{gn}'] = [{ge}]".format(gn = gene_name, ge = gene_expression))
gene_options.append("<option value='{gn}'>{gn}</option>".format(gn = gene_name))
max_expression_string.append("max_expression['{gene}'] = {val}".format(gene = gene_name, val = max_expression[index]))
gene_options = "".join(gene_options)
gene_expression_colour_coded = ";".join(gene_expression_colour_coded)
max_expression_string = ";".join(max_expression_string)
# make coordinates data string
coordinates = data.values[:, 0:2].astype('float32')
# next few steps are compressing the data into a stadard cube centered at (0,0,0) and L = 200
Xrange = np.percentile(coordinates[:, 0], q = [1, 98]) * 1.2
Yrange = np.percentile(coordinates[:, 1], q = [1, 98]) * 1.2
center = np.array((np.mean(Xrange), np.mean(Yrange)))
coordinates = coordinates - np.tile(center, (coordinates.shape[0], 1))
ratio = max(np.abs(np.percentile(coordinates[:, 0], q = [1, 98]) * 1.2))
ratio = max(ratio, max(np.abs(np.percentile(coordinates[:, 1], q = [1, 98]) * 1.2)))
ratio = 1.0 / ratio
coordinates = coordinates * ratio
coordinates = ",".join([str(value)[:min(6, len(str(value)))] for value in coordinates.ravel()])
categories = [str(value).replace(".", " ") for value in data.columns[2:(2 + no_of_categories)]]
categories_options = ["<option value='{cat}'>{cat}</option>".format(cat=cat) for cat in categories]
categories_options = "".join(categories_options)
categories_colours = []
categories_indices = []
for cat_index in range(no_of_categories):
category_name = data.columns[2 + cat_index]
category_name = category_name.replace(".", " ")
category_colours = [hexdec_to_1floats(colour) for colour in data.values[:, 2 + cat_index + no_of_categories]]
category_colours = [",".join([str(value)[:min(4, len(str(value)))] for value in colour]) for colour in category_colours]
category_colours = ",".join(category_colours)
categories_colours.append("categories_colours['{cn}'] = [{cc}]".format(cn = category_name, cc = category_colours))
types = [value for value in np.unique(data.values[:, 2 + cat_index])]
cat_indices = []
categories_indices.append("categories_indices['{cn}'] = []".format(cn = category_name))
for t_name in types:
indices = data.values[:, 2 + cat_index] == t_name
indices = np.where(indices)[0]
indices = ",".join([str(value) for value in indices])
cat_indices.append("categories_indices['{cn}']['{tn}'] = [{ind}]".format(cn = category_name, tn = t_name, ind = indices))
cat_indices = "\n".join(cat_indices)
categories_indices.append(cat_indices)
categories_indices = "\n".join(categories_indices)
categories_colours = "\n".join(categories_colours)
gene_families_file = open("./gene_families.txt", "r")
gene_families = gene_families_file.read()
gene_families_file.close()
geneFams = [fam.split("=")[0] for fam in gene_families.split("\n") if fam != ""]
geneFams = [fam.split("\'")[1] for fam in geneFams]
geneFams = ["<option value='{cat}'>{cat}</option>".format(cat=cat) for cat in geneFams]
geneFams = "".join(geneFams)
f = open('template.html', "r")
template_str = f.read()
f.close()
template_str = template_str.replace('gene_options_here', gene_options)
template_str = template_str.replace('gene_expression_colour_coded', gene_expression_colour_coded)
template_str = template_str.replace('coordinates_data_here', coordinates)
template_str = template_str.replace('category_options_here', categories_options)
template_str = template_str.replace('categories_colours_data_here', categories_colours)
template_str = template_str.replace('categories_indices_data_here', categories_indices)
template_str = template_str.replace('gene_families_options_here', gene_families)
template_str = template_str.replace('feature_family_option_here', geneFams)
template_str = template_str.replace('max_expression_here', max_expression_string)
with open(save_to, 'w') as result:
result.write(template_str)

View file

@ -0,0 +1,577 @@
<!doctype html>
<html lang='en'>
<head>
<meta charset='utf-8'>
<title>3D viewer</title>
<meta name='description' content='The HTML5 Herald'>
<meta name='author' content='Dorin-Mirel Popescu'>
</head>
<body>
<table>
<tr>
<td align='left'>
<form>
<fieldset>
<legend><b>Visualisation options</b></legend>
<label for = 'particleSizeBar'>Particle size: </label>
<input type='range' name = 'particleSizeBar' min = 1 max = 14 step=0.1 oninput='setParticleSize(value)' value = 2 /><br />
<label for = 'alphaInput'>Transparency: </label>
<input type='range' name = 'alphaInput' min = 0 max = 1000 oninput='setAlpha(value)' value = 1000 /><br />
<label for = 'canvasSizeInput'>Canvas size: </label>
<input type='range' name = 'canvasSizeInput' min = 200 max = 2000 oninput='setCanvasSize(value)' value = 500 /><br />
<label for = 'bgInput'>Dark background: </label>
<input type='radio' name = 'bgInput' oninput='setBackground(value)' value = 'dark' />
<label for = 'bgInput'>White background: </label>
<input type='radio' name = 'bgInput' oninput='setBackground(value)' value = 'white' checked />
<br />
</fieldset>
</form>
</td>
<td style='vertical-align: top' rowspan='2'>
<form>
<fieldset>
<legend><b>Colour by:</b></legend>
<table>
<tr>
<td>
Choose gene family:
</td>
<td>
<label for='familyGeneSelector'><select name='familyGeneSelector' id='familyGeneSelector' onchange='selectFeatureFamily()'>feature_family_option_here</select></label>
</td>
</tr>
<tr>
<td>
<label for='colourType'><input type='radio' name='colourType' onchange='setColourBy(value)' value='gene_expression' />Gene expression: </label>
</td>
<td>
<label for='geneSelector'><select name='geneSelector' id='geneSelector' onchange='selectFeature()'>gene_options_here</select></label>
</td>
</tr>
<tr>
<td colspan = '2' align='center'>
<canvas id='canvasColorScale' width = 200 height=40></canvas>
</td>
</tr>
<tr>
<td>
<label for='colourType'><input type='radio' name='colourType' checked onchange='setColourBy(value)' value='category' />Category:</label>
</td>
<td>
<label for='categorySelector'><select name='categorySelector' id='categorySelector' onchange = 'setCategory()'>category_options_here</select></label>
</td>
</tr>
</table>
</fieldset>
</form>
<br />
<div>
<fieldset>
<legend><b>Cell types:</b></legend>
<label for='toggleRadio'><input type='checkbox' name = 'toggleRadio' id='toggleRadio' onchange='toggleAllTypes()' checked />Show all:</label>
<form id = 'typesControlPanel'>
</form>
</fieldset>
</div>
</td>
</tr>
<tr>
<td style='vertical-align: text-top' >
<canvas id='canvas' width=600 height=600></canvas>
</td>
</tr>
</table>
<script id='vertex-shader' type='x-shader/x-fragment'>
attribute vec4 a_Position;
attribute vec3 a_Color;
uniform float u_basePointSize;
uniform float u_Alpha;
uniform int u_PaintFeatureScale;
varying vec4 v_Color;
void main() {
gl_Position = a_Position;
gl_PointSize = u_basePointSize;
if (u_PaintFeatureScale == 0){
v_Color = vec4(a_Color, u_Alpha);
}
else{
float r = 0.0;
float g = 0.0;
float b = 0.0;
r = max(0.0, 2.0 * a_Color.r - 1.0);
b = max(0.0, 2.0 * (1.0 - a_Color.r) - 1.0);
g = 1.0 - 2.0 * abs(a_Color.r - 0.5);
v_Color = vec4(r, g, b, u_Alpha);
}
}
</script>
<script id ='fragment-shader' type='x-shader/x-fragment'>
precision mediump float;
varying vec4 v_Color;
void main() {
float r = 0.0;
vec2 cxy = 2.0 * gl_PointCoord - 1.0;
r = dot(cxy, cxy);
if (r > 1.0){
discard;
}
gl_FragColor = v_Color;
}
</script>
<script type = 'text/javascript'>
var Matrix4 = function(opt_src) {
var i, s, d;
if (opt_src && typeof opt_src === 'object' && opt_src.hasOwnProperty('elements')) {
s = opt_src.elements;
d = new Float32Array(16);
for (i = 0; i < 16; ++i) {
d[i] = s[i];
}
this.elements = d;
} else {
this.elements = new Float32Array([1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1]);
}
};
Matrix4.prototype.setTranslate = function(x, y, z) {
var e = this.elements;
e[0] = 1; e[4] = 0; e[8] = 0; e[12] = x;
e[1] = 0; e[5] = 1; e[9] = 0; e[13] = y;
e[2] = 0; e[6] = 0; e[10] = 1; e[14] = z;
e[3] = 0; e[7] = 0; e[11] = 0; e[15] = 1;
return this;
};
Matrix4.prototype.setLookAt = function(eyeX, eyeY, eyeZ, centerX, centerY, centerZ, upX, upY, upZ) {
var e, fx, fy, fz, rlf, sx, sy, sz, rls, ux, uy, uz;
fx = centerX - eyeX;
fy = centerY - eyeY;
fz = centerZ - eyeZ;
// Normalize f.
rlf = 1 / Math.sqrt(fx*fx + fy*fy + fz*fz);
fx *= rlf;
fy *= rlf;
fz *= rlf;
// Calculate cross product of f and up.
sx = fy * upZ - fz * upY;
sy = fz * upX - fx * upZ;
sz = fx * upY - fy * upX;
// Normalize s.
rls = 1 / Math.sqrt(sx*sx + sy*sy + sz*sz);
sx *= rls;
sy *= rls;
sz *= rls;
// Calculate cross product of s and f.
ux = sy * fz - sz * fy;
uy = sz * fx - sx * fz;
uz = sx * fy - sy * fx;
// Set to this.
e = this.elements;
e[0] = sx;
e[1] = ux;
e[2] = -fx;
e[3] = 0;
e[4] = sy;
e[5] = uy;
e[6] = -fy;
e[7] = 0;
e[8] = sz;
e[9] = uz;
e[10] = -fz;
e[11] = 0;
e[12] = 0;
e[13] = 0;
e[14] = 0;
e[15] = 1;
// Translate.
return this.translate(-eyeX, -eyeY, -eyeZ);
};
Matrix4.prototype.translate = function(x, y, z) {
var e = this.elements;
e[12] += e[0] * x + e[4] * y + e[8] * z;
e[13] += e[1] * x + e[5] * y + e[9] * z;
e[14] += e[2] * x + e[6] * y + e[10] * z;
e[15] += e[3] * x + e[7] * y + e[11] * z;
return this;
};
Matrix4.prototype.setPerspective = function(fovy, aspect, near, far) {
var e, rd, s, ct;
if (near === far || aspect === 0) {
throw 'null frustum';
}
if (near <= 0) {
throw 'near <= 0';
}
if (far <= 0) {
throw 'far <= 0';
}
fovy = Math.PI * fovy / 180 / 2;
s = Math.sin(fovy);
if (s === 0) {
throw 'null frustum';
}
rd = 1 / (far - near);
ct = Math.cos(fovy) / s;
e = this.elements;
e[0] = ct / aspect;
e[1] = 0;
e[2] = 0;
e[3] = 0;
e[4] = 0;
e[5] = ct;
e[6] = 0;
e[7] = 0;
e[8] = 0;
e[9] = 0;
e[10] = -(far + near) * rd;
e[11] = -1;
e[12] = 0;
e[13] = 0;
e[14] = -2 * near * far * rd;
e[15] = 0;
return this;
};
</script>
<script type='text/javascript'>
function buildCategoryRadioButtons(){
category_type = categorySelector.options[categorySelector.selectedIndex].value;
current_indices = indices_all;
// create radio commands from categories
typesControlPanel.innerHTML = "";
radio_commands_HTML = "";
for(name in categories_indices[category_type]){
f_index = categories_indices[category_type][name][0]
cols = categories_colours[category_type].slice(3 * f_index, 3 * f_index + 3)
col_label = "#";
for(k=0;k<cols.length;k++){col_hex = Math.round(255 * cols[k]).toString(16).padStart(2, '0'); col_label = col_label + col_hex}
radio_command = "<div style='background-color:" + col_label + "'>";
radio_command = radio_command + "<input style='float:left' type='checkbox' id='" + name;
radio_command = radio_command + "' checked onchange='toggleCategoryAction()' /><label style='float:left' for='" + name + "'";
radio_command = radio_command + ">" + name + ": </label><br/></div>"
radio_commands_HTML = radio_commands_HTML + radio_command
}
typesControlPanel.innerHTML = radio_commands_HTML;
}
function toggleCategoryAction(){
updateBuffer()
draw()
}
function setCategory(){
buildCategoryRadioButtons()
updateBuffer()
draw()
}
function setColourBy(value){
colour_by = value;
if (colour_by =='category'){
PaintFeatureScale = 0;
}else{
PaintFeatureScale = 1;
}
gl_context.uniform1i(u_PaintFeatureScale, PaintFeatureScale)
updateBuffer()
draw()
}
function toggleAllTypes(){
controlRadios = typesControlPanel.elements
for(i=0;i<controlRadios.length;i++){
controlRadios[i].checked = toggleRadio.checked
}
updateBuffer()
draw()
}
function selectFeature(){
feature = geneSelector.value
updateBuffer()
draw()
drawScale(max_expression[feature])
console.log('selected features')
}
function draw(){
if(bg_color == "white"){
gl_context.clearColor(1, 1, 1, 1)
}else{
gl_context.clearColor(0, 0, 0, 1)
}
gl_context.clear(gl_context.COLOR_BUFFER_BIT);
gl_context.bufferData(gl_context.ARRAY_BUFFER, buffer_data_array, gl_context.STATIC_DRAW)
gl_context.drawArrays(gl_context.POINTS, 0, n)
}
function updateBuffer(){
var buffer_data = [];
// first update indices to be used - for this read the category control panel radio buttons
controlRadios = typesControlPanel.elements
current_indices = []
for(i=0;i<controlRadios.length;i++){
if(controlRadios[i].checked){
radio_type = controlRadios[i].id
current_indices = current_indices.concat(categories_indices[category_type][radio_type])
}
}
// now just populate the buffer_data
if(colour_by == 'gene_expression'){
current_indices.forEach(function(index, i){
buffer_data.push(coordinates_data[2 * index])
buffer_data.push(coordinates_data[2 * index + 1])
buffer_data.push(gene_expression[feature][index])
buffer_data.push(gene_expression[feature][index])
buffer_data.push(gene_expression[feature][index])
})
}else{
current_indices.forEach(function(index, i){
buffer_data.push(coordinates_data[2 * index])
buffer_data.push(coordinates_data[2 * index + 1])
buffer_data.push(categories_colours[category_type][3 * index])
buffer_data.push(categories_colours[category_type][3 * index + 1])
buffer_data.push(categories_colours[category_type][3 * index + 2])
})
}
buffer_data_array = new Float32Array(buffer_data)
n = buffer_data_array.length / 5
}
function setParticleSize(value){
particleSize = parseInt(value)
gl_context.uniform1f(u_basePointSize, particleSize)
updateBuffer()
draw()
}
function setAlpha(value){
alphaValue = parseInt(value) / 1000
gl_context.uniform1f(u_Alpha, alphaValue)
updateBuffer()
draw()
}
function setCanvasSize(value){
value = parseInt(value)
canvas.width = value
canvas.height = value
gl_context = getContext(canvas)
gl_context = initContext(gl_context)
gl_context.viewport(0, 0, canvas.width, canvas.height)
updateBuffer()
draw()
}
function setBackground(value){
bg_color = value;
draw()
}
function shadersFromScriptElement(gl, ID, type){
shaderScript = document.getElementById(ID)
var str = ''
var k = shaderScript.firstChild;
while(k){
if (k.nodeType == 3){
str += k.textContent;
}
k = k.nextSibling
}
var shader = gl.createShader(type)
gl.shaderSource(shader, str)
gl.compileShader(shader)
return shader
}
function getContext(canvasWidget){
var names = ['webgl', 'experimental-webgl', 'webkit-3d', 'moz-webgl'];
for(var i=0; i<names.length; i++){
try{
var gl = canvasWidget.getContext(names[i])
}catch(e){}
if(gl){i=names.length}
}
var vshader = shadersFromScriptElement(gl, 'vertex-shader', gl.VERTEX_SHADER),
fshader = shadersFromScriptElement(gl, 'fragment-shader', gl.FRAGMENT_SHADER)
program = gl.createProgram();
gl.attachShader(program, vshader)
gl.attachShader(program, fshader)
gl.linkProgram(program)
gl.useProgram(program)
gl.program = program
return gl
}
function initContext(gl){
n = buffer_data_array.length / 5
var vertexColourBuffer = gl.createBuffer()
gl.bindBuffer(gl.ARRAY_BUFFER, vertexColourBuffer)
var FSIZE = buffer_data_array.BYTES_PER_ELEMENT;
var a_Position = gl.getAttribLocation(gl.program, 'a_Position')
gl.vertexAttribPointer(a_Position, 2, gl.FLOAT, false, FSIZE * 5, 0)
gl.enableVertexAttribArray(a_Position)
var a_Color = gl.getAttribLocation(gl.program, 'a_Color')
gl.vertexAttribPointer(a_Color, 3, gl.FLOAT, false, FSIZE * 5, 2 * FSIZE)
gl.enableVertexAttribArray(a_Color)
u_basePointSize = gl.getUniformLocation(gl.program, 'u_basePointSize')
gl.uniform1f(u_basePointSize, particleSize)
u_Alpha = gl.getUniformLocation(gl.program, "u_Alpha")
gl.uniform1f(u_Alpha, alphaValue)
u_PaintFeatureScale = gl.getUniformLocation(gl.program, 'u_PaintFeatureScale')
gl.uniform1i(u_PaintFeatureScale, PaintFeatureScale)
gl.clearColor(1, 1, 1, 1);
if(bg_color == "dark"){
gl.clearColor(0, 0, 0, 1)
}
gl.disable(gl.DEPTH_TEST)
gl.enable(gl.BLEND)
gl.blendFunc(gl.SRC_ALPHA, gl.ONE_MINUS_SRC_ALPHA)
gl.clear(gl.COLOR_BUFFER_BIT);
return gl
}
var categorySelector = document.getElementById('categorySelector'),
geneSelector = document.getElementById('geneSelector'),
typesControlPanel = document.getElementById('typesControlPanel'),
toggleRadio = document.getElementById('toggleRadio'),
familyGeneSelector = document.getElementById("familyGeneSelector")
var canvas = document.getElementById('canvas'),
particleSize = 5,
alphaValue = 1.0,
bg_color = "white",
n = 0,
particleSize = 2,
PaintFeatureScale = 0,
currentMaxExpression = 0;
coordinates_data = [coordinates_data_here]
gene_expression = []; gene_expression_colour_coded;
categories_colours = []
categories_colours_data_here
categories_indices = []
categories_indices_data_here
var gene_families = []
gene_families_options_here
var max_expression=[]
max_expression_here
function selectFeatureFamily(value){
var genes = gene_families[familyGeneSelector.value],
gene_options = "";
for(var i=0;i<genes.length;i++){
console.log(i)
gene_options = gene_options + "<option value='" + genes[i] + "'>" + genes[i] + "</option>";
}
geneSelector.innerHTML = gene_options
selectFeature()
}
// initialize flags
// when toggling between gene expression and category, do not slice data i.e. do not recompute index data
// when choosing a category always re-initiate index data
var colour_by = 'category', // the other options is can be 'category'
category_types = [],
category_type = '',
features = [],
feature = '';
// set category
for(name in categories_colours){category_types.push(name)}
category_type = category_types[0]
// set feature
for(name in gene_expression){features.push(name)}
feature = features[0];
// create global data holders
var indices_all = [],
current_indices = [],
current_colours = [],
buffer_data_array = [];
for(j=0;j<categories_colours[category_type].length/3;j++){indices_all.push(j)}
// build the categories buttons for the first time
buildCategoryRadioButtons()
updateBuffer()
// create the renderer
var gl_context = getContext(canvas);
gl_context = initContext(gl_context)
// now draw
draw()
// draw the scale
var canvasColorScale = document.getElementById('canvasColorScale'),
canvas_ctx = canvasColorScale.getContext('2d'),
scale_gradient = canvas_ctx.createLinearGradient(0, 0, 200, 0);
function drawScale(maxVal){
canvas_ctx.fillStyle = 'white'
canvas_ctx.fillRect(0, 0, canvasColorScale.width, canvasColorScale.height)
canvas_ctx.fillStyle = scale_gradient;
canvas_ctx.fillRect(0, 20, canvasColorScale.width, canvasColorScale.height)
canvas_ctx.fillStyle = 'black'
canvas_ctx.fillText('0', 10, 10)
canvas_ctx.fillText(parseInt(10 * maxVal) / 10, 180, 10)
}
scale_gradient.addColorStop(0, 'blue');
scale_gradient.addColorStop(0.5, 'green');
scale_gradient.addColorStop(1, 'red');
selectFeature()
</script>
</body>
</html>

View file

@ -0,0 +1,434 @@
args = commandArgs(trailingOnly=T)
args = paste(args, collapse = "")
args = unlist(strsplit(args, ";"))
arguments.list = "
seurat.addr.arg = args[1]
make.app = args[2]
update.file = args[3]
"
expected_arguments = unlist(strsplit(arguments.list, "\n"))
expected_arguments = expected_arguments[!(expected_arguments == "")]
if(length(args) != length(expected_arguments)){
error.msg = sprintf('This pipeline requires %s parameters', as.character(length(expected_arguments)))
expected_arguments = paste(unlist(lapply(strsplit(expected_arguments, ".arg"), "[", 1)), collapse = "\n")
stop(sprintf('This pipeline requires %s parameters: '))
}
eval(parse(text = arguments.list))
for(n in 1:length(expected_arguments)){
argument = expected_arguments[n]
argument = gsub(pattern=" ", replacement="", x=argument)
argument.name = unlist(strsplit(argument, "="))[1]
variable.name = gsub(pattern=".arg", replacement="", argument.name)
argument.content = eval(parse(text = argument.name))
eval(parse(text = argument.content))
if (!exists(variable.name)){
stop(sprintf("Argument %s not passed. Stopping ... ", variable.name))
}
}
# create required folders for output and work material
output_folder = gsub(pattern="^\\d+_", replacement="", x=basename(getwd()))
output_folder = paste(output_folder, seurat.addr, sep = "_")
c.time = Sys.time()
c.time = gsub(pattern=" BST", replacement="", x=c.time)
c.time = gsub(pattern=":", replacement="", x=c.time)
c.time = gsub(pattern=" ", replacement="", x=c.time)
c.time = gsub(pattern="-", replacement="", x=c.time)
c.time = substr(x=c.time, start=3, stop=nchar(c.time))
output_folder = paste(output_folder, c.time, sep = "_")
output_folder = file.path("../../output", output_folder)
dir.create(output_folder)
seurat.addr = file.path("../../data", seurat.addr)
source("../../tools/bunddle_utils.R")
library(Seurat)
library(plyr)
library(dplyr)
library(reshape2)
library(RColorBrewer)
library(wordcloud)
gene_to_weighted_cell_mention = function(gene.expr){
idx = which(as.vector(gene_to_pop$V1) %in% names(gene.expr))
gene.expr = gene.expr[as.vector(gene_to_pop$V1)[idx]]
pop.expr = c()
pop.names = c()
for (k in 1:length(idx)){
gene.name = names(gene.expr)[k]
gene.value = gene.expr[k]
pop.flags = as.vector(gene_to_pop$V2)[as.vector(gene_to_pop$V1) == gene.name]
pop.flags = unlist(strsplit(pop.flags, ", "))
for (p in 1:length(pop.flags)){
pop.flag = pop.flags[p]
gene.v = 100 * gene.value / populations.weight[pop.flag]
if (pop.flag %in% pop.names){
pop.expr[pop.flag] = pop.expr[pop.flag] + gene.v
}else{
pop.names = c(pop.names, pop.flag)
pop.expr = c(pop.expr, gene.v)
names(pop.expr) = pop.names
}
}
}
pop.expr
}
# load data
print("loading data ... ")
seurat.obj = readRDS(seurat.addr)
print("Data loaded.")
# load updated annotation
update.template = read.csv(update.file, stringsAsFactors = F, sep = '\t')
if(dim(update.template)[2] == 1){
update.template = read.csv(update.file, stringsAsFactors = F, sep = ',')
}
# update cell labels in seurat object
seurat.obj@meta.data$cell.labels = mapvalues(as.vector(seurat.obj@meta.data$LouvainClustering), from = update.template$Cluster, to = update.template$Identity)
print("Saving seurat object")
saveRDS(seurat.obj, seurat.addr)
if (make.app){
print('Making the interactive app')
marker.genes.top = read.csv("annotation_markers.csv", stringsAsFactors = F)
# update cluster names in marker.genes.top
marker.genes.top$cluster = mapvalues(x=as.vector(marker.genes.top$cluster), from = update.template$Cluster, to = update.template$Identity)
# now make an interactive maps
gene_sym_to_marker = marker.genes.top[, c('gene', 'cluster')]
categories = c("LouvainClustering", "fetal.ids", "sort.ids", "lanes", "stages", "gender", "doublets", "cell.labels")
genes = as.vector(unique(gene_sym_to_marker$gene))
expression.data = as.data.frame(as.matrix(t(seurat.obj@data[genes, names(seurat.obj@ident)])))
categories.colours = rep(NA, length(categories))
categories.data = as.data.frame(seurat.obj@meta.data[names(seurat.obj@ident), categories])
for(j in 1:length(categories)){
category = categories[j]
category.colour.scheme = categories.colours[j]
if (!is.na(category.colour.scheme)){
category.colour.scheme = read.csv(category.colour.scheme)
category.colour.scheme = mapvalues(x=categories.data[, category], from=as.vector(unique(category.colour.scheme$CellTypes)), to=as.vector(unique(category.colour.scheme$Colours)))
}else{
category.colour.scheme = sample(colorRampPalette(brewer.pal(12, "Paired"))(length(as.vector(unique(categories.data[, category])))))
category.colour.scheme = mapvalues(x=categories.data[, category], from=as.vector(unique(categories.data[, category])), to=category.colour.scheme)
}
category = paste(category, "colours", sep = "_")
categories.data[, category] = category.colour.scheme
}
dim.data = seurat.obj@dr$umap@cell.embeddings[, 1:2]
expression.data = cbind(dim.data, categories.data, expression.data)
write.csv(expression.data, "./expression_data.csv", row.names = F)
#gene.families = as.vector(unique(unlist(strsplit(as.vector(gene_sym_to_marker$cluster), "\\|"))))
gene_sym_to_marker$ClusterName = as.character(gene_sym_to_marker$cluster)
#gene_sym_to_marker$ClusterName = paste('000', gene_sym_to_marker$ClusterName, sep = '')
#gene_sym_to_marker$ClusterName = unlist(lapply(gene_sym_to_marker$ClusterName, function(cluster_name){substr(cluster_name, nchar(cluster_name) - 2, nchar(cluster_name))}))
#gene_sym_to_marker$ClusterName = paste('Cluster', gene_sym_to_marker$ClusterName, sep = '_')
gene.families = as.vector(unique(gene_sym_to_marker$ClusterName))
gene.to.family = c()
for(i in 1:length(gene.families)){
gene.family = gene.families[i]
gene.family = gsub(pattern="\\'", replacement="", x=gene.family)
members = as.vector(gene_sym_to_marker$gene[grep(gene_sym_to_marker$ClusterName, pattern=gene.family, value=F)])
inline = sprintf("gene_families['%s']=", gene.family)
members = paste(members, "'", sep = "")
members = paste("'", members, sep = "")
members = paste(members, collapse = ",")
members = paste("[", members, "]", sep = "")
inline = paste(inline, members)
gene.to.family = c(gene.to.family, inline)
}
all.genes = as.vector(unique(gene_sym_to_marker$gene))
all.genes = paste(all.genes, "'", sep = "")
all.genes = paste("'", all.genes, sep = "")
all.genes = paste(all.genes, collapse = ",")
all.genes = paste("gene_families['ALL']=[", all.genes, "]", sep = "")
gene.to.family = c(gene.to.family, all.genes)
gene.to.family = sort(gene.to.family)
gene.families.file = file('gene_families.txt', "w")
writeLines(gene.to.family, gene.families.file)
close(gene.families.file)
save.to = file.path(output_folder, 'interactive_markers.html')
n_categories = length(categories)
command = sprintf('%s html_2D_gene_expression_viewer_by_gene_family.py %s %s %s', python.addr,
save.to, 'expression_data.csv', n_categories)
system(command, wait = T)
file.remove(c('./expression_data.csv', './gene_families.txt'))
# make annotation clouds for each cluster
seurat.obj = SetAllIdent(object=seurat.obj, id='cell.labels')
expression.data = seurat.obj@data
mito.genes = grep(pattern="^MT-", x=rownames(expression.data))
expression.data = expression.data[-c(mito.genes), ]
gene_to_pop = read.csv("./gene_to_pop.tsv", sep = '\t', header = F)
populations = paste(as.vector(gene_to_pop$V2), collapse = ", ")
populations = unlist(strsplit(populations, ", "))
populations.table = table(populations)
populations.weight = as.vector(populations.table)
names(populations.weight) = names(populations.table)
idents = as.vector(unique(seurat.obj@ident))
for (i in 1:length(idents)){
ident = idents[i]
print(ident)
ident = names(seurat.obj@ident)[seurat.obj@ident == ident]
expression.data = as.matrix(seurat.obj@data[,ident])
expression.data = rowMeans(expression.data)
genes = names(expression.data)
genes = genes[!(genes %in% genes[grep(pattern='^MT-', x=genes)])]
expression.data = expression.data[genes]
pop.expr = gene_to_weighted_cell_mention(expression.data)
clouder = round(100 * pop.expr)
fname = sprintf('%s.pdf', idents[i])
fname = gsub(pattern="/", replacement="-", x=fname)
fname = file.path(output_folder, fname)
pdf(fname, width = 10, height = 10)
wordcloud(words=names(clouder), clouder, min.freq = 1, max.words=500,
random.order=FALSE, rot.per=0.0, colors=brewer.pal(8, "Dark2"),
order.color = T)
dev.off()
}
}
print("Ended beautifully ... ")

View file

@ -0,0 +1,16 @@
#!/bin/bash
#$ -cwd
#$ -N update_annotation
#$ -V
#$ -l h_rt=47:59:59
#$ -l h_vmem=200G
if [ "$#" -ne 1 ]; then
echo "Illegal number of parameters"
exit 1
fi
Rscript update_annotation.R $1
echo "End on `date`"

View file

@ -0,0 +1,24 @@
"Cluster","Identity"
"0","A"
"1","B"
"10","C"
"11","D"
"12","E"
"13","F"
"14","G"
"15","H"
"16","I"
"17","J"
"18","K"
"19","L"
"2","M"
"20","N"
"21","O"
"22","P"
"3","Q"
"4","R"
"5","S"
"6","T"
"7","U"
"8","V"
"9","W"
1 Cluster Identity
2 0 A
3 1 B
4 10 C
5 11 D
6 12 E
7 13 F
8 14 G
9 15 H
10 16 I
11 17 J
12 18 K
13 19 L
14 2 M
15 20 N
16 21 O
17 22 P
18 3 Q
19 4 R
20 5 S
21 6 T
22 7 U
23 8 V
24 9 W