scRNA-seq_analysis

This commit is contained in:
veghp 2019-07-08 12:22:01 +01:00
commit 82cc2d191e
188 changed files with 146184 additions and 0 deletions

Binary file not shown.

View file

@ -0,0 +1,62 @@
# Prepare a smaller pseudotime heatmap, using the following genes:
selected.gene.list <- scan("selected.genes.std.txt", what = character(), sep = "\n", blank.lines.skip = T, comment.char = "#") # or character vector c("")
path <- "." # path to 'ploting.material.RDS' [sic]
library("ggplot2")
###############################################################################
plottingmat <- readRDS(file.path(path, "ploting_material.RDS"))
# str(plottingmat)
# str(plottingmat$beautiful_result_norm)
# View(plottingmat$beautiful_result_norm)
subsetplotmat <- plottingmat$beautiful_result_norm[plottingmat$beautiful_result_norm$GeneNames %in% selected.gene.list, ]
subsetplotmat$GeneNames <- droplevels(subsetplotmat$GeneNames)
subsetplotmat$GeneNames <- factor(subsetplotmat$GeneNames, levels = rev(selected.gene.list)) # Orders the heatmap
# The following section is adapted from: https://github.com/haniffalab/Single-cell-RNAseq-data-analysis-bundle/blob/master/pipelines/13_pseudotime/pseudotime.R#L270 commit b86d20dc87d35820daac178a93e46badf99216ab
plot.genes <- ggplot(data = subsetplotmat, aes(x = Pseudotime, y = GeneNames))
plot.genes <- plot.genes + geom_tile(aes(fill = ExpressionValue),
width=1.001, height=1.001)
plot.genes <- plot.genes + scale_fill_gradient2(low = "deepskyblue",
high = "firebrick3",
mid = "darkolivegreen3",
midpoint = 0.5,
name = "Minmax normalized gene expression")
plot.genes <- plot.genes + theme(legend.position = "bottom",
legend.text = element_text(size = 25, angle = 90),
legend.title = element_text(size = 25),
legend.key.width = unit(2, "cm"),
axis.text.x = element_blank(), axis.title.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 0), axis.text.y = element_text(size = 8))
plot.genes
height = 6; width = 3
pdf("dpt_heatmap.pdf", height = height, width = width)
plot.genes
dev.off()
svg("dpt_heatmap.svg", height = height, width = width)
plot.genes
dev.off()
postscript("dpt_heatmap.ps", height = height, width = width)
plot.genes
dev.off()
png("dpt_heatmap.png", height = 600, width = 300)
plot.genes
dev.off()
###############################################################################
# Alternative formats for density plots
pdt_exp <- read.csv(file.path(path, "pdt_and_expression.csv"))
#~ str(pdt_exp)
# Standard:
ggplot(data = pdt_exp, aes(x = Pseudotime, color = Labels, fill = Labels)) + geom_density(alpha = .7) # alpha for transparency
# Stacked:
ggplot(data = pdt_exp, aes(x = Pseudotime, color = Labels, fill = Labels)) + geom_density(position = "stack")
# Relative:
ggplot(data = pdt_exp, aes(x = Pseudotime, color = Labels, fill = Labels)) + geom_density(adjust = 1.5, position = "fill")
# Histogram:
ggplot(data = pdt_exp, aes(x = Pseudotime, color = Labels, fill = Labels)) + geom_histogram(binwidth = 0.01)

View file

@ -0,0 +1,819 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 23 glorious 2018
@author: Dorin-Mirel Popescu
"""
import sys
args = sys.argv
save_to = args[1]
load_from = args[2]
template = """
<!doctype html>
<html lang='en'>
<head>
<meta charset='utf-8'>
<title>3D viewer</title>
<meta name='description' content='The HTML5 Herald'>
<meta name='author' content='Dorin-Mirel Popescu'>
</head>
<body>
<table>
<tr>
<td align='left'>
<form>
<fieldset>
<legend><b>Visualisation options</b></legend>
<label for = 'particleSizeBar'>Particle size: </label>
<input type='range' name = 'particleSizeBar' min = 10 max = 300 onchange='setParticleSize(value)' value = 150 /><br />
<label for = 'alphaInput'>Transparency: </label>
<input type='range' name = 'alphaInput' min = 0 max = 1000 onchange='setAlpha(value)' value = 1000 /><br />
<label for = 'canvasSizeInput'>Canvas size: </label>
<input type='range' name = 'canvasSizeInput' min = 200 max = 2000 onchange='setCanvasSize(value)' value = 500 /><br />
<label for = "zoom">Zoom: </label>
<input type='range' name = 'zoom' min = 100 max = 1000 onchange='setZoom(value)' value = 400 /><br />
<label for = 'bgInput'>Dark background: </label>
<input type='radio' name = 'bgInput' onchange='setBackground(value)' value = 'dark' />
<label for = 'bgInput'>White background: </label>
<input type='radio' name = 'bgInput' onchange='setBackground(value)' value = 'white' checked />
<br />
<label for='sliderX'>Slide X: </label>
<input type='range' name='sliderX' min='-100' max='100' onchange='slideOnX(value)' value='0' />
<label for='sliderY'>Slide Y: </label>
<input type='range' name='sliderY' min='-100' max='100' onchange='slideOnY(value)' value='0' />
<br />
</fieldset>
</form>
</td>
<td style='vertical-align: top' rowspan='2'>
<form>
<fieldset>
<legend><b>Colour by:</b></legend>
<label for='colourType'><input type='radio' name=colourType onchange='setColourByType(value)' value='celltype' checked />Cell type</label><br />
<label for='colourType'><input type='radio' name=colourType onchange='setColourByType(value)' value='pseudotime' />Pseudotime</label><br />
<label for='colourType'><input type='radio' name=colourType onchange='setColourByType(value)' value='gene' />Gene</label>
</fieldset>
</form>
<br/>
<form>
<fieldset>
<legend><b>Gene expression options</b></legend>
<label for='geneselector'>Chose gene by ID: </label>
<select id='geneselector' onchange='colourByType()'>
gene_options_here
</select>
<br/>
Gene expression as:<br/>
<label><input type='radio' name='expressionType' value='nsnn' onchange='setExpressionType(value)' checked />Non-smooth non-norm</label><br/>
<label><input type='radio' name='expressionType' value='snn' onchange='setExpressionType(value)' />Smoothed non-norm</label><br/>
<label><input type='radio' name='expressionType' value='sn' onchange='setExpressionType(value)' />Smoothed minmax norm</label><br/>
</fieldset>
</form>
<br />
<div>
<fieldset>
<legend><b>Cell types:</b></legend>
<label for='toggleRadio'><input type='checkbox' name = 'toggleRadio' id='toggleRadio' onchange='toggleShowTypes()' checked />Show all:</label>
<form id = 'ControlPanel'>
radiocommands
</form>
</fieldset>
</div>
</td>
</tr>
<tr>
<td style='vertical-align: text-top' >
<canvas id='canvas' width=600 height=600></canvas>
</td>
</tr>
</table>
<script id='vertex-shader' type='x-shader/x-fragment'>
attribute vec4 a_Position;
attribute vec3 a_Color;
uniform mat4 u_ModelMatrix;
uniform mat4 u_ViewMatrix;
uniform mat4 u_ProjMatrix;
uniform float u_basePointSize;
uniform float u_Alpha;
varying vec4 v_Color;
void main() {
vec4 cubePos = u_ProjMatrix * u_ModelMatrix * u_ViewMatrix * a_Position;
float currentWidth = 0.0;
currentWidth = 3.0 + (u_basePointSize - 3.0) * (1.0 - cubePos.z / cubePos.w) / 2.0;
gl_Position = cubePos;
gl_PointSize = currentWidth;
v_Color = vec4(a_Color, u_Alpha);
}
</script>
<script id ='fragment-shader' type='x-shader/x-fragment'>
precision mediump float;
varying vec4 v_Color;
void main() {
float r = 0.0;
vec2 cxy = 2.0 * gl_PointCoord - 1.0;
r = dot(cxy, cxy);
if (r > 1.0){
discard;
}
vec2 D = vec2(0.0, 0.0), centers = vec2(.65, .35);
float light = 0.0;
light = length(centers - gl_PointCoord);
light = .1 + .9 * (pow(50.0, -light));
gl_FragColor = v_Color * light + (1.0 - light) * vec4(0.0, 0.0, 0.0, 1.0);
}
</script>
<script type = 'text/javascript'>
var Matrix4 = function(opt_src) {
var i, s, d;
if (opt_src && typeof opt_src === 'object' && opt_src.hasOwnProperty('elements')) {
s = opt_src.elements;
d = new Float32Array(16);
for (i = 0; i < 16; ++i) {
d[i] = s[i];
}
this.elements = d;
} else {
this.elements = new Float32Array([1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1]);
}
};
Matrix4.prototype.setTranslate = function(x, y, z) {
var e = this.elements;
e[0] = 1; e[4] = 0; e[8] = 0; e[12] = x;
e[1] = 0; e[5] = 1; e[9] = 0; e[13] = y;
e[2] = 0; e[6] = 0; e[10] = 1; e[14] = z;
e[3] = 0; e[7] = 0; e[11] = 0; e[15] = 1;
return this;
};
Matrix4.prototype.setLookAt = function(eyeX, eyeY, eyeZ, centerX, centerY, centerZ, upX, upY, upZ) {
var e, fx, fy, fz, rlf, sx, sy, sz, rls, ux, uy, uz;
fx = centerX - eyeX;
fy = centerY - eyeY;
fz = centerZ - eyeZ;
// Normalize f.
rlf = 1 / Math.sqrt(fx*fx + fy*fy + fz*fz);
fx *= rlf;
fy *= rlf;
fz *= rlf;
// Calculate cross product of f and up.
sx = fy * upZ - fz * upY;
sy = fz * upX - fx * upZ;
sz = fx * upY - fy * upX;
// Normalize s.
rls = 1 / Math.sqrt(sx*sx + sy*sy + sz*sz);
sx *= rls;
sy *= rls;
sz *= rls;
// Calculate cross product of s and f.
ux = sy * fz - sz * fy;
uy = sz * fx - sx * fz;
uz = sx * fy - sy * fx;
// Set to this.
e = this.elements;
e[0] = sx;
e[1] = ux;
e[2] = -fx;
e[3] = 0;
e[4] = sy;
e[5] = uy;
e[6] = -fy;
e[7] = 0;
e[8] = sz;
e[9] = uz;
e[10] = -fz;
e[11] = 0;
e[12] = 0;
e[13] = 0;
e[14] = 0;
e[15] = 1;
// Translate.
return this.translate(-eyeX, -eyeY, -eyeZ);
};
Matrix4.prototype.translate = function(x, y, z) {
var e = this.elements;
e[12] += e[0] * x + e[4] * y + e[8] * z;
e[13] += e[1] * x + e[5] * y + e[9] * z;
e[14] += e[2] * x + e[6] * y + e[10] * z;
e[15] += e[3] * x + e[7] * y + e[11] * z;
return this;
};
Matrix4.prototype.setPerspective = function(fovy, aspect, near, far) {
var e, rd, s, ct;
if (near === far || aspect === 0) {
throw 'null frustum';
}
if (near <= 0) {
throw 'near <= 0';
}
if (far <= 0) {
throw 'far <= 0';
}
fovy = Math.PI * fovy / 180 / 2;
s = Math.sin(fovy);
if (s === 0) {
throw 'null frustum';
}
rd = 1 / (far - near);
ct = Math.cos(fovy) / s;
e = this.elements;
e[0] = ct / aspect;
e[1] = 0;
e[2] = 0;
e[3] = 0;
e[4] = 0;
e[5] = ct;
e[6] = 0;
e[7] = 0;
e[8] = 0;
e[9] = 0;
e[10] = -(far + near) * rd;
e[11] = -1;
e[12] = 0;
e[13] = 0;
e[14] = -2 * near * far * rd;
e[15] = 0;
return this;
};
</script>
<script type='text/javascript'>
function slideOnX(value){
Xshift = parseInt(value);
modelMatrix.setTranslate(Xshift, Yshift, 0);
gl_context.uniformMatrix4fv(u_ModelMatrix, false, modelMatrix.elements);
gl_context.clear(gl_context.COLOR_BUFFER_BIT)
gl_context.drawArrays(gl_context.POINTS, 0, n)
}
function slideOnY(value){
Yshift = parseInt(value)
modelMatrix.setTranslate(Xshift, Yshift, 0);
gl_context.uniformMatrix4fv(u_ModelMatrix, false, modelMatrix.elements);
gl_context.clear(gl_context.COLOR_BUFFER_BIT)
gl_context.drawArrays(gl_context.POINTS, 0, n)
}
function setColourByType(value){
colourKey = value;
colourByType()
}
function colourByType(){
if(colourKey == 'celltype'){
colourByCellType()
}else if(colourKey == 'pseudotime'){
colourByPseudotime()
}else{
colourByGene()
}
}
function colourByCellType(){
loadBuffer(selectData(), data_buffer)
drawBuffers()
}
function colourByPseudotime(){
current_pseudotime_buffer = new Float32Array(data_buffer.length)
current_pseudotime_buffer.set(data_buffer)
points_n = data_buffer.length / 6
for (i=0;i<points_n;i++){
current_pseudotime_buffer[6 * i + 3] = pseudotime_buffer[3*i]
current_pseudotime_buffer[6 * i + 4] = pseudotime_buffer[3*i + 1]
current_pseudotime_buffer[6 * i + 5] = pseudotime_buffer[3*i + 2]
}
loadBuffer(selectData(), current_pseudotime_buffer)
drawBuffers()
}
function setExpressionType(value){
expressionType = value
colourByType()
}
function colourByGene(){
current_gene = geneselector.value;
if(expressionType == 'nsnn'){
// check if colours have been already computed for this gene
if (gene_raw_colours[current_gene].length == 0){
gene_raw_colours[current_gene] = valuesToColours(gene_raw_expression[current_gene], 0, maxRawExpression)
}
var gene_colors = gene_raw_colours[current_gene]
}else if(expressionType == 'snn'){
if(gene_smooth_colours[current_gene].length == 0){
var vector = adaptiveMovingAverage(gene_raw_expression[current_gene])
gene_smooth_colours[current_gene] = valuesToColours(vector, 0, 6)
}
var gene_colors = gene_smooth_colours[current_gene]
}else{
if(gene_diff_colours[current_gene].length == 0){
var vector = adaptiveMovingAverage(gene_raw_expression[current_gene])
vector = minMaxNormalization(vector)
gene_diff_colours[current_gene] = valuesToColours(vector, 0, 1)
}
var gene_colors = gene_diff_colours[current_gene]
}
genecolor_buffer = new Float32Array(data_buffer.length)
genecolor_buffer.set(data_buffer)
points_n = data_buffer.length / 6
for (i=0;i<points_n;i++){
genecolor_buffer[6 * i + 3] = gene_colors[3*i]
genecolor_buffer[6 * i + 4] = gene_colors[3*i + 1]
genecolor_buffer[6 * i + 5] = gene_colors[3*i + 2]
}
loadBuffer(selectData(), genecolor_buffer)
drawBuffers()
}
function valuesToColours(vector, minimum, maximum){
colours = []
range = maximum - minimum;
middle = (maximum + minimum) / 2.0;
vector.forEach(function(val, i){
r = Math.max(0, 2 * (val - minimum) / range - 1)
b = Math.max(0, 2 * (maximum - val) / range - 1)
g = 1.0 - 2 * Math.abs(val - middle) / range
colours = colours.concat([r, g, b])
})
colours = new Float32Array(colours);
return colours;
}
function adaptiveMovingAverage(vector){
var colours = [],
kernel = 10,
minim_kernel = 10,
range_factor = 5,
window = 2 * kernel;
for(i=0;i<vector.length;i++){
var start_index = Math.max(1, i - kernel),
stop_index = Math.min(vector.length, i + kernel),
local_sd = vector.slice(start_index, stop_index);
local_mean = local_sd.reduce(function(sum, val){return sum + val}, 0) / local_sd.length;
sqDiffs = local_sd.map(function(value){var diff = value - local_mean; return diff*diff});
local_sd = Math.sqrt(sqDiffs.reduce(function(sum, val){return sum + val}, 0))
local_kernel = minim_kernel + Math.round(range_factor / (local_sd + .1))
start_index = Math.max(1, i - local_kernel)
stop_index = Math.min(vector.length, i + local_kernel)
local_v = vector.slice(start_index, stop_index);
colours.push(local_v.reduce(function(sum, val){return sum + val}, 0) / local_v.length)
}
return colours
}
function minMaxNormalization(vector){
var minim = vector.reduce(function(a, b){return(Math.min(a, b))})
vector = vector.map(function(value){return value - minim})
var maxim = vector.reduce(function(a, b){return(Math.max(a, b))});
vector = vector.map(function(value){return value / maxim})
return vector
}
function selectData(){
controlPanel = document.getElementById('ControlPanel')
controlRadios = controlPanel.elements
values = []
for(i=0;i<controlRadios.length;i++){
if(controlRadios[i].checked){
values = values.concat(index_table[controlRadios[i].id])
}
}
new_indices = []
for (i=0;i<values.length;i++){
v = values[i]
new_indices.push(6*v)
new_indices.push(6*v+1)
new_indices.push(6*v+2)
new_indices.push(6*v+3)
new_indices.push(6*v+4)
new_indices.push(6*v+5)
}
return new_indices
}
function loadBuffer(new_indices, data_buffer_from){
current_data_buffer = []
new_indices.forEach(function(val, i){current_data_buffer.push(data_buffer_from[val])})
current_data_buffer = new Float32Array(current_data_buffer)
gl_context.bufferData(gl_context.ARRAY_BUFFER, current_data_buffer, gl_context.STATIC_DRAW); // load data to buffer
n = current_data_buffer.length / 6
}
function drawBuffers(){
gl_context.clear(gl_context.COLOR_BUFFER_BIT)
gl_context.drawArrays(gl_context.POINTS, 0, n)
}
function toggleShowTypes(){
toggleRadio = document.getElementById('toggleRadio')
controlPanel = document.getElementById('ControlPanel')
controlRadios = controlPanel.elements
for(i=0;i<controlRadios.length;i++){
controlRadios[i].checked = toggleRadio.checked
}
colourByType()
}
function setParticleSize(value){
particleSize = parseInt(value)
gl_context.uniform1f(u_basePointSize, particleSize)
colourByType()
}
function setAlpha(value){
alphaValue = parseInt(value) / 1000
gl_context.uniform1f(u_Alpha, alphaValue)
colourByType()
}
function setCanvasSize(value){
value = parseInt(value)
canvas.width = value
canvas.height = value
gl_context = getContext(canvas)
gl_context = initContext(gl_context)
gl_context.viewport(0, 0, canvas.width, canvas.height)
if(bg_color == "white"){
gl_context.clearColor(1, 1, 1, 1)
}else{
gl_context.clearColor(0, 0, 0, 1)
}
gl_context.clear(gl_context.COLOR_BUFFER_BIT)
gl_context.drawArrays(gl_context.POINTS, 0, n)
}
function setZoom(value){
eyeVN = parseInt(value)
farField = eyeVN + 100;
rotateData(0, 0)
}
function setBackground(value){
if(value == "dark"){
gl_context.clearColor(0, 0, 0, 1)
bg_color = "dark"
}else{
gl_context.clearColor(1, 1, 1, 1)
bg_color = "white"
}
gl_context.clear(gl_context.COLOR_BUFFER_BIT)
gl_context.drawArrays(gl_context.POINTS, 0, n)
}
function shadersFromScriptElement(gl, ID, type){
shaderScript = document.getElementById(ID)
var str = ''
var k = shaderScript.firstChild;
while(k){
if (k.nodeType == 3){
str += k.textContent;
}
k = k.nextSibling
}
var shader = gl.createShader(type)
gl.shaderSource(shader, str)
gl.compileShader(shader)
return shader
}
function getContext(canvasWidget){
var names = ['webgl', 'experimental-webgl', 'webkit-3d', 'moz-webgl'];
for(var i=0; i<names.length; i++){
try{
var gl = canvasWidget.getContext(names[i])
}catch(e){}
if(gl){i=names.length}
}
var vshader = shadersFromScriptElement(gl, 'vertex-shader', gl.VERTEX_SHADER),
fshader = shadersFromScriptElement(gl, 'fragment-shader', gl.FRAGMENT_SHADER)
program = gl.createProgram();
gl.attachShader(program, vshader)
gl.attachShader(program, fshader)
gl.linkProgram(program)
gl.useProgram(program)
gl.program = program
return gl
}
function initContext(gl){
n = current_data_buffer.length / 6
var vertexColourBuffer = gl.createBuffer()
gl.bindBuffer(gl.ARRAY_BUFFER, vertexColourBuffer)
gl.bufferData(gl.ARRAY_BUFFER, current_data_buffer, gl.STATIC_DRAW)
var FSIZE = data_buffer.BYTES_PER_ELEMENT;
var a_Position = gl.getAttribLocation(gl.program, 'a_Position')
gl.vertexAttribPointer(a_Position, 3, gl.FLOAT, false, FSIZE * 6, 0)
gl.enableVertexAttribArray(a_Position)
var a_Color = gl.getAttribLocation(gl.program, 'a_Color')
gl.vertexAttribPointer(a_Color, 3, gl.FLOAT, false, FSIZE * 6, 3 * FSIZE)
gl.enableVertexAttribArray(a_Color)
u_basePointSize = gl.getUniformLocation(gl.program, 'u_basePointSize')
gl.uniform1f(u_basePointSize, particleSize)
u_Alpha = gl.getUniformLocation(gl.program, "u_Alpha")
gl.uniform1f(u_Alpha, alphaValue)
u_ModelMatrix = gl.getUniformLocation(gl.program, 'u_ModelMatrix');
u_ViewMatrix = gl.getUniformLocation(gl.program, 'u_ViewMatrix');
u_ProjMatrix = gl.getUniformLocation(gl.program, 'u_ProjMatrix');
modelMatrix = new Matrix4(); // The model matrix
viewMatrix = new Matrix4(); // The view matrix
projMatrix = new Matrix4(); // The projection matrix
modelMatrix.setTranslate(0, 0, 0); //
viewMatrix.setLookAt(eyeX, eyeY, eyeZ, 0, 0, 0, upX, upY, upZ); // eyeX, eyeY, eyeZ, camX, camY, camZ, upX, upY, upY
projMatrix.setPerspective(30, canvas.width/canvas.height, 100, farField); // fov, ratio, near, far
// Pass the model, view, and projection matrix to the uniform variable respectively
gl.uniformMatrix4fv(u_ModelMatrix, false, modelMatrix.elements);
gl.uniformMatrix4fv(u_ViewMatrix, false, viewMatrix.elements);
gl.uniformMatrix4fv(u_ProjMatrix, false, projMatrix.elements);
gl.clearColor(1, 1, 1, 1); // add ternary conditional
gl.enable(gl.DEPTH_TEST)
gl.enable(gl.BLEND)
gl.blendFunc(gl.SRC_ALPHA, gl.ONE_MINUS_SRC_ALPHA)
//gl.blendFunc(gl.ONE, gl.ONE_MINUS_SRC_ALPHA)
gl.clear(gl.COLOR_BUFFER_BIT);
return gl
}
var canvas = document.getElementById('canvas'),
particleSize = 150,
alphaValue = 1.0,
bg_color = "white",
eyeX = 0.0,
eyeY = 0.0,
eyeZ = 400.0,
upX = 0.0,
upY = 1.0,
upZ = 0.0,
eyeVN = 400.0,
farField = 500.0,
previousX = null,
previousY = null,
currentX = null,
currentY = null,
Xshift = 0,
Yshift = 0,
colourKey = 'celltype',
expressionType = 'nsnn',
geneselector = document.getElementById('geneselector');
data_buffer = new Float32Array([
datahere
])
pseudotime_buffer = new Float32Array([
pseudotime_here
])
pseudotime_buffer = valuesToColours(pseudotime_buffer, 0.0, 1.0)
gene_raw_expression = []
gene_raw_expression_write_here
gene_raw_colours = []
gene_raw_colours_here
gene_smooth_colours = []
gene_smooth_colours_here
gene_diff_colours = []
gene_diff_colours_here
current_gene_here
var maxRawExpression = maxRawExpression_here
index_table = []
indiceshere
current_data_buffer = data_buffer
gl_context = getContext(canvas)
gl_context = initContext(gl_context)
gl_context.drawArrays(gl_context.POINTS, 0, n)
function negCrossProduct(vecA, vecB){
crossproduct = [ - vecA[1] * vecB[2] + vecA[2] * vecB[1],
- vecA[2] * vecB[0] + vecA[0] * vecB[2],
- vecA[0] * vecB[1] + vecA[1] * vecB[0]
]
return(crossproduct)
}
function vectNorm(vector){
return(Math.sqrt((vector[0] * vector[0]) + (vector[1] * vector[1]) + (vector[2] * vector[2])))
}
function rotateData(hAngle, vAngle){
// change vector for very small angles is approximately the cross product of the eye vector and up vector
change = negCrossProduct([eyeX, eyeY, eyeZ], [upX, upY, upZ])
// normalize the change vector
normChange = vectNorm(change)
// scale the change vector by the horizontal angle
change = [hAngle * change[0]/normChange, hAngle * change[1]/normChange, hAngle * change[2]/normChange]
// update the eye vector by adding the change vector
eyeX = eyeX - change[0]
eyeY = eyeY - change[1]
eyeZ = eyeZ - change[2]
// renormalize the eye vector, other it will increase with each change (due to approx error)
normEye = vectNorm([eyeX, eyeY, eyeZ])
eyeX = eyeVN * eyeX / normEye
eyeY = eyeVN * eyeY / normEye
eyeZ = eyeVN * eyeZ / normEye
// get the (eye, up) plane normal
planeInvNormal = negCrossProduct([eyeX, eyeY, eyeZ], [upX, upY, upZ])
// in the case of vertical angle, the up vector is already the change vector
normChange = vectNorm([upX, upY, upZ])
change = [vAngle * upX / normChange, vAngle * upY / normChange, vAngle * upZ / normChange]
// update the eye vector by adding the change vector
eyeX = eyeX + change[0]
eyeY = eyeY + change[1]
eyeZ = eyeZ + change[2]
// renormalize the eye vector, other it will increase with each change (due to approx error)
normEye = Math.sqrt((eyeX * eyeX)+(eyeY * eyeY)+(eyeZ * eyeZ))
eyeX = eyeVN * eyeX / normEye
eyeY = eyeVN * eyeY / normEye
eyeZ = eyeVN * eyeZ / normEye
// but the up vector needs changing as well
newUp = negCrossProduct([eyeX, eyeY, eyeZ], planeInvNormal)
newUpNormal = vectNorm(newUp)
upX = -newUp[0] / newUpNormal
upY = -newUp[1] / newUpNormal
upZ = -newUp[2] / newUpNormal
gl_context.clear(gl_context.COLOR_BUFFER_BIT);
viewMatrix.setLookAt(eyeX, eyeY, eyeZ, 0, 0, 0, upX, upY, upZ);
projMatrix.setPerspective(30, canvas.width/canvas.height, 100, farField);
gl_context.uniformMatrix4fv(u_ViewMatrix, false, viewMatrix.elements);
gl_context.uniformMatrix4fv(u_ProjMatrix, false, projMatrix.elements);
gl_context.drawArrays(gl_context.POINTS, 0, n);
}
function startRotating(ev){
previousX = ev.clientX
previousY = ev.clientY
canvas.addEventListener('mousemove', rotateEvent)
canvas.addEventListener('mouseup', stopRotation)
canvas.addEventListener('mouseout', stopRotation)
}
function stopRotation(ev){
canvas.removeEventListener('mousemove', rotateEvent)
canvas.removeEventListener('mouseup', stopRotation)
canvas.removeEventListener('mouseout', stopRotation)
}
function rotateEvent(ev){
currentX = ev.clientX
currentY = ev.clientY
var dX = currentX - previousX,
dY = currentY - previousY;
rotateData(2.0 * dX, 2.0 * dY)
previousX = currentX;
previousY = currentY;
}
canvas.addEventListener('mousedown', startRotating)
</script>
</body>
</html>
"""
import pandas as pd
import numpy as np
data = pd.read_csv(load_from, index_col = None)
# convert Colours to r, g, b values, then to floats < 1.0
def hexdec_to_1floats(hexdec):
return np.array([int(hexdec[1:][i:(i+2)], 16) for i in (0, 2, 4)]) / 255.0
# map Labels to colours
labels = sorted(list(data.Labels.unique()))
index_table = []
radio_commands = []
for index, label in enumerate(labels):
indices = data.Labels == label
indices = indices.values
indices = np.where(indices)
indices = ','.join([str(i) for i in indices[0]])
indices = "[{indices}]".format(indices = indices)
index_table.append("index_table['{label}'] = {indices}".format(label = label, indices = indices))
colour = data.Colours[data.Labels == label].values[0]
radio_command = "<div style='background-color:{colour}'><input style='float:left' type='checkbox' id='{label}' checked onchange='colourByType()' /><label style='float:left' for='{label}'>{label}: </label><br /></div>".format(colour = colour, label = label)
radio_commands.append(radio_command)
index_table = ';\n '.join(index_table)
radio_commands = '\n '.join(radio_commands)
# make data string
coordinates = data.values[:, 0:3].astype('float32')
# next few steps are compressing the data into a stadard cube centered at (0,0,0) and L = 200
Xrange = np.percentile(coordinates[:, 0], q = [1, 99]) * 1.2
Yrange = np.percentile(coordinates[:, 1], q = [1, 99]) * 1.2
Zrange = np.percentile(coordinates[:, 2], q = [1, 99]) * 1.2
center = np.tile(np.array([np.mean(Xrange), np.mean(Yrange), np.mean(Zrange)]),
(coordinates.shape[0], 1))
coordinates = coordinates - center
Xrange = Xrange[1] - Xrange[0]
Yrange = Yrange[1] - Yrange[0]
Zrange = Zrange[1] - Zrange[0]
maxRange = max((Xrange, Yrange, Zrange))
ratio = 180.0 / maxRange
coordinates = coordinates * ratio
# next few steps the buffer data is created as string
colours = data.values[:, 4]
buffer_data = []
for index in range(coordinates.shape[0]):
coordinate = [str(i) for i in coordinates[index, :]]
colour = [str(i) for i in hexdec_to_1floats(colours[index]).astype('float32')]
vertex_data = coordinate + colour
buffer_data.append(",".join(vertex_data))
buffer_data = ",".join(buffer_data)
pseudotime = data.values[:, 5]
pseudotime_buffer = []
for index in range(pseudotime.shape[0]):
pseudotime_buffer.append(str(pseudotime[index, ]))
pseudotime_buffer = ",".join(pseudotime_buffer)
raw_expression = data.values[:, 6:]
gene_names = data.columns[6:]
gene_raw_expression = []
gene_raw_colours = []
gene_smooth_colours = []
gene_diff_colours = []
gene_options = []
for index in range(gene_names.shape[0]):
gene_name = gene_names[index]
gene_expression = ",".join([str(val) for val in raw_expression[:, index]])
gene_raw_expression.append("gene_raw_expression['{gn}']=[{ge}]".format(gn = gene_name, ge = gene_expression))
gene_raw_colours.append("gene_raw_colours['{gn}'] = []".format(gn = gene_name))
gene_smooth_colours.append("gene_smooth_colours['{gn}'] = []".format(gn = gene_name))
gene_diff_colours.append("gene_diff_colours['{gn}'] = []".format(gn = gene_name))
gene_options.append("<option value='{gn}'>{gn}</option>".format(gn = gene_name));
gene_raw_expression = ";\n".join(gene_raw_expression)
gene_raw_colours = ";\n".join(gene_raw_colours)
gene_smooth_colours = ";\n".join(gene_smooth_colours)
gene_diff_colours = ";\n".join(gene_diff_colours)
gene_options = "".join(gene_options)
maxRawExpression = raw_expression.max()
template_str = template.replace('datahere', buffer_data)
template_str = template_str.replace('indiceshere', index_table)
template_str = template_str.replace('radiocommands', radio_commands)
template_str = template_str.replace('pseudotime_here', pseudotime_buffer)
template_str = template_str.replace('gene_raw_expression_write_here', gene_raw_expression)
template_str = template_str.replace('maxRawExpression_here', str(maxRawExpression))
template_str = template_str.replace('gene_raw_colours_here', gene_raw_colours)
template_str = template_str.replace('gene_smooth_colours_here', gene_smooth_colours)
template_str = template_str.replace('gene_diff_colours_here', gene_diff_colours)
template_str = template_str.replace('gene_options_here', gene_options)
template_str = template_str.replace('current_gene_here', "var current_gene = '{gn}'".format(gn = str(gene_names[0])))
with open(save_to, 'w') as result:
result.write(template_str)

View file

@ -0,0 +1,5 @@
../../seurat_data/liver_all.RDS
HSC, Neut-myeloid progenitor, Monocyte-DC progenitor, DC2, DC1
HSC
../../constant_inputs/liver_cell_type_colours.csv
Fig5b

View file

@ -0,0 +1,5 @@
../../seurat_data/liver_all.RDS
HSC, Neut-myeloid progenitor, Monocyte-DC progenitor, DC1
HSC
../../constant_inputs/liver_cell_type_colours.csv
Fig5b_1

View file

@ -0,0 +1,5 @@
../../seurat_data/liver_all.RDS
HSC, Neut-myeloid progenitor, Monocyte-DC progenitor, DC2
HSC
../../constant_inputs/liver_cell_type_colours.csv
Fig5b_2

View file

@ -0,0 +1,5 @@
../../seurat_data/liver_all.RDS
HSC, Neut-myeloid progenitor, Monocyte-DC progenitor, DC2, DC1, Monocyte
HSC
../../constant_inputs/liver_cell_type_colours.csv
Fig5b_3

View file

@ -0,0 +1,5 @@
../../seurat_data/liver_all.RDS
HSC, MEP, Early Erythroid, Mid Erythroid, Late Erythroid
HSC
../../constant_inputs/liver_cell_type_colours.csv
Fig3d_e

View file

@ -0,0 +1,5 @@
../../seurat_data/liver_all.RDS
HSC, MEP, Mast cell
HSC
../../constant_inputs/liver_cell_type_colours.csv
Fig3d_m

View file

@ -0,0 +1,5 @@
../../seurat_data/liver_all.RDS
HSC, Neut-myeloid progenitor, Monocyte-DC progenitor, Monocyte, Mono-Mac, Kupffer Cell
HSC
../../constant_inputs/liver_cell_type_colours.csv
Fig5c

View file

@ -0,0 +1,5 @@
../../seurat_data/liver_all.RDS
HSC, Neut-myeloid progenitor, Monocyte-DC progenitor, Monocyte
HSC
../../constant_inputs/liver_cell_type_colours.csv
Fig5c_1

View file

@ -0,0 +1,5 @@
../../seurat_data/liver_all.RDS
HSC, Neut-myeloid progenitor, Monocyte-DC progenitor, Mono-Mac, Kupffer Cell
HSC
../../constant_inputs/liver_cell_type_colours.csv
Fig5c_2

View file

@ -0,0 +1,6 @@
HSC_LI
HSC_TH
NK Progenitor_LI
T_DN_TH
T_DP_TH
T_mature_TH

View file

@ -0,0 +1,5 @@
../../seurat_data/liver_all.RDS
HSC, MEP, Megakaryocyte
HSC
../../constant_inputs/liver_cell_type_colours.csv
Fig3d_t

View file

@ -0,0 +1,6 @@
HSC
pro B cell early
pro B cell
pre B cell
B cell

View file

@ -0,0 +1,90 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 14 15:01:36 2018
@author: doru
"""
print("starting .py script")
import sys
args = sys.argv
root_cell_type = args[1]
CWD = args[2]
print("printing args")
print(args)
args
# use the args below if you have a root cell type containing spaces and @@'s
#root_cell_type = args[1] + " " + args[2]
#CWD = args[3]
import matplotlib; matplotlib.use('Agg');
import scanpy.api as sc;
import pandas as pd
import numpy as np
print("printing root_cell_type")
print(root_cell_type)
print("printing CWD")
print(CWD)
sc.settings.verbosity = 3
scObj = sc.read("{CWD}/material/raw_data.mtx".format(CWD=CWD), cache = False).T
# load gene names
scObj.var_names = pd.read_csv("{CWD}/material/genenames.csv".format(CWD=CWD)).iloc[:, 1]
# load cell names
scObj.obs_names = pd.read_csv("{CWD}/material/cellnames.csv".format(CWD=CWD)).iloc[:, 1]
# add cell labels
cell_labels = pd.read_csv("{CWD}/material/cell_labels.csv".format(CWD=CWD), index_col = 0)
scObj.obs["cell_labels"] = cell_labels
# filter out genes present in less than 3 cells
sc.pp.filter_genes(scObj, min_cells=3)
# log-normalize the data
scObj.raw = sc.pp.log1p(scObj, copy=True)
sc.pp.normalize_per_cell(scObj, counts_per_cell_after=1e4)
# variable genes
filter_result = sc.pp.filter_genes_dispersion(
scObj.X, min_mean=0.0125, max_mean=3, min_disp=0.5)
# subset data on variable genes
scObj = scObj[:, filter_result.gene_subset]
# not sure?
sc.pp.log1p(scObj)
# scale the data
sc.pp.scale(scObj, max_value=10)
# run pca
sc.tl.pca(scObj)
# compunte neighborhood graph
sc.pp.neighbors(scObj, n_neighbors = 15, n_pcs = 20, knn = True, random_state = 10, method = "gauss")
# compute diffusion map
sc.tl.diffmap(scObj, n_comps = 20)
# set root
scObj.uns['iroot'] = np.flatnonzero(scObj.obs['cell_labels'] == root_cell_type)[0]
# compute dpt
print("computing sc.tl.dpt")
sc.tl.dpt(scObj, n_dcs = 20)
# pdt is at scObj.obs["dpt_pseudotime"]
print("displaying pdt table stored in scObj")
print(scObj.obs["dpt_pseudotime"])
pdt = scObj.obs["dpt_pseudotime"].to_csv("{CWD}/material/pseudotime.csv".format(CWD=CWD))
# save the pseudotime
dm = scObj.obsm["X_diffmap"]
dm = pd.DataFrame(data = dm, index = None, columns = None)
dm.to_csv("{CWD}/material/dm.csv".format(CWD=CWD), columns = None, header = None)

View file

@ -0,0 +1,399 @@
args = commandArgs(trailingOnly=T)
args = paste(args, collapse = "")
args = unlist(strsplit(args, ";"))
args = gsub(pattern = '@@', replacement = ' ', x = args)
arguments.list = "
seurat.addr.arg = args[1]
set.ident.arg = args[2]
cell.types.arg = args[3]
root_cell_type.arg = args[4]
var.genes.arg = args[5]
type.to.colours.arg = args[6]
"
expected_arguments = unlist(strsplit(arguments.list, "\n"))
expected_arguments = expected_arguments[!(expected_arguments == "")]
if(length(args) != length(expected_arguments)){
error.msg = sprintf('This pipeline requires %s parameters', as.character(length(expected_arguments)))
expected_arguments = paste(unlist(lapply(strsplit(expected_arguments, ".arg"), "[", 1)), collapse = "\n")
stop(sprintf('This pipeline requires %s parameters: ', length(expected_arguments)))
}
eval(parse(text = arguments.list))
for(n in 1:length(expected_arguments)){
argument = expected_arguments[n]
#argument = gsub(pattern=" ", replacement="", x=argument)
argument.name = unlist(strsplit(argument, "="))[1]
variable.name = gsub(pattern=".arg", replacement="", argument.name)
variable.name = gsub(pattern=" ", replacement="", argument.name)
argument.content = eval(parse(text = argument.name))
eval(parse(text = argument.content))
if (!exists(variable.name)){
stop(sprintf("Argument %s not passed. Stopping ... ", variable.name))
}
}
# create required folders for output and work material
output_folder = gsub(pattern="^\\d+_", replacement="", x=basename(getwd()))
output_folder = paste(output_folder, seurat.addr, sep = "_")
c.time = Sys.time()
c.time = gsub(pattern=" BST", replacement="", x=c.time)
c.time = gsub(pattern=":", replacement="", x=c.time)
c.time = gsub(pattern=" ", replacement="", x=c.time)
c.time = gsub(pattern="-", replacement="", x=c.time)
c.time = substr(x=c.time, start=3, stop=nchar(c.time))
output_folder = paste(output_folder, c.time, sep = "_")
output_folder = file.path("../../output", output_folder)
dir.create(output_folder)
output_folder_material = file.path(output_folder, "material")
dir.create(output_folder_material)
seurat.addr = file.path("../../data", seurat.addr)
source("../../tools/bunddle_utils.R")
library(Seurat)
library(ggplot2)
library(RColorBrewer)
library(plyr)
library(monocle)
library(dplyr)
library(reshape2)
#######################################################################################################
###########
print("printing cell.types")
print(cell.types)
print("printing root_cell_type")
print(root_cell_type)
ma = function(arr, kernel = 50){
res = arr
n = 2 * kernel
for(i in 1:length(arr)){
start_index = max(1, i - kernel)
stop_index = min(length(arr), i + kernel)
res[i] = mean(arr[start_index:stop_index])
}
res
}
adaptive.moving_average = function(arr, kernel = 10, minim_kernel = 10, range.factor = 5){
res = arr
n = 2 * kernel
for(i in 1:length(arr)){
start_index = max(1, i - kernel)
stop_index = min(length(arr), i + kernel)
local_sd = sd(arr[start_index:stop_index])
local_kernel = minim_kernel + round(range.factor / (local_sd + .1))
start_index = max(1, i - local_kernel)
stop_index = min(length(arr), i + local_kernel)
res[i] = mean(arr[start_index:stop_index])
}
res
}
###########
#######################################################################################################
print("Loading data ...")
seurat.obj = readRDS(seurat.addr)
seurat.obj = SetAllIdent(object=seurat.obj, id=set.ident)
print("Data loaded.")
print("Subseting data on singlets and required cell populations")
if(cell.types == "all"){
cell.types = as.vector(unique(seurat.obj@ident))
}
print(table(seurat.obj@ident))
print("Subseting data ...")
to.keep = names(seurat.obj@ident)[as.vector(seurat.obj@ident) %in% cell.types]
seurat.obj = SubsetData(object=seurat.obj, cells.use=to.keep)
seurat.obj@ident = factor(seurat.obj@ident, levels = cell.types)
print(table(seurat.obj@ident))
print("Writing data to disk ...")
# save raw data to disk
raw_data = seurat.obj@raw.data
raw_data = raw_data[rownames(seurat.obj@data), colnames(seurat.obj@data)]
# decomment the next lines if there is a list of genes that you need to exclude
to_exclude = readRDS('fca_cellcycle_genes.RDS')
genes_to_keep = rownames(raw_data)
genes_to_keep = genes_to_keep[!(genes_to_keep %in% to_exclude)]
raw_data = raw_data[genes_to_keep, colnames(seurat.obj@data)]
writeMM(raw_data, file.path(output_folder_material, "raw_data.mtx"))
# save gene names
gene_names = rownames(raw_data)
write.csv(data.frame(Genes = gene_names), file.path(output_folder_material, "genenames.csv"))
# save cell names
cell_names = colnames(raw_data)
write.csv(data.frame(Cells = cell_names), file.path(output_folder_material, "cellnames.csv"))
# write cell labels to disk
write.csv(data.frame(Cells = names(seurat.obj@ident), Labels = seurat.obj@ident), file.path(output_folder_material, "cell_labels.csv"), row.names = F)
print("Computing pseudotime using pdt.scanpy.py...")
# compute pseudotime in python scanpy
command = sprintf("%s pdt_scanpy.py %s %s", python.addr, root_cell_type, output_folder)
system(command, wait=T)
print("finished running .py")
# get cell labels and colours
if (!is.na(type.to.colours)){
type.to.colours = file.path("../../resources", type.to.colours)
type.to.colour = read.csv(type.to.colours)
print("printing type.to.colour after it is loaded in")
print(type.to.colour)
print("printing seurat obj idents which the typetocol arg will be compared against in next lines")
print(as.vector(unique(seurat.obj@ident)))
filter.key = type.to.colour$CellTypes %in% as.vector(unique(seurat.obj@ident))
cell.labels = as.vector(type.to.colour$CellTypes[filter.key])
cell.colours = as.vector(type.to.colour$Colours[filter.key])
}else{
cell.labels = sort(as.vector(unique(seurat.obj@ident)))
cell.colours = sample(colorRampPalette(brewer.pal(12, "Paired"))(length(cell.labels)))
}
print("printing cell.labels")
print(cell.labels)
print("printing cell.colours")
print(cell.colours)
# load pseudotime
print('reading pseudotime values')
pseudotime = read.csv(file.path(output_folder_material, "pseudotime.csv"), row.names = 1, header = F)
print("Are the cells in the same order in both pseudotime and seurat object? ")
print(all(rownames(pseudotime) == names(seurat.obj@ident)))
pseudotime$CellTypes = seurat.obj@ident
colnames(pseudotime) = c("Pseudotime", "CellType")
pseudotime$Color = mapvalues(x=pseudotime$CellType, from=cell.labels, to=cell.colours)
pseudotime$Color = factor(as.vector(pseudotime$Color), levels = cell.colours)
pseudotime$CellType = factor(as.vector(pseudotime$CellType), levels = cell.labels)
colnames(pseudotime) = c("Pseudotime", "Cell Type", "Color")
# making sure that there are no inf values in pdt column
#pseudotime["Pseudotime"][pseudotime["Pseudotime"] == "Inf"] <- 1
plot.density = ggplot(data = pseudotime, aes(x = Pseudotime, color = `Cell Type`, fill = `Cell Type`)) + geom_density(alpha = .7)
plot.density = plot.density + scale_x_continuous(position = "top", limits = c(.0, 1.0), expand = c(0.0, .0))
plot.density = plot.density + scale_color_manual(values = cell.colours)
plot.density = plot.density + scale_fill_manual(values = cell.colours)
plot.density = plot.density + theme(axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
axis.title.x = element_text(size = 25),
legend.position = c(0, 1),
legend.justification = c(0, 1))
print("printing cell.colours which is used for scale_colour_manual for plot.density plot")
print(cell.colours)
# compute diff genes
print("Computing var genes by cell type...")
cds = newCellDataSet(cellData = as.matrix(raw_data), phenoData=NULL, featureData=NULL, expressionFamily = negbinomial.size())
print("printing cds made using newCellDataSet function")
print(cds)
pData(cds)$Cluster = as.vector(seurat.obj@ident)
print("printing cds after adding cluster to pdata")
print(cds)
print("running estimatesizefactors for cds")
cds = estimateSizeFactors(cds)
pData(cds)$Pseudotime = pseudotime$Pseudotime
if (is.na(var.genes)){
var.genes.total = c()
print('Computing variable genes ... ')
for (j in 1:length(cell.labels)){
print(sprintf("Choice %s out of %s ... ", as.character(j), as.character(length(cell.labels))))
choices = pseudotime$`Cell Type` == cell.labels[j]
var.genes = differentialGeneTest(cds[, choices], fullModelFormulaStr = "~sm.ns(Pseudotime)")
var.genes = cbind(var.genes, data.frame(gene_id = rownames(var.genes)))
var.genes.ch = var.genes %>% arrange(qval)
var.genes.ch = as.vector(var.genes.ch$gene_id[1:100])
var.genes.total = union(var.genes.total, var.genes.ch)
}
print("Computing var genes globally...")
var.genes = differentialGeneTest(cds, fullModelFormulaStr = "~sm.ns(Pseudotime)")
var.genes = cbind(var.genes, data.frame(gene_id = rownames(var.genes)))
var.genes.ch = var.genes %>% arrange(qval)
var.genes.ch = as.vector(var.genes.ch$gene_id[1:100])
var.genes.total = union(var.genes.total, var.genes.ch)
MT_genes = var.genes.total[grep("^MT-", x=var.genes.total, ignore.case=T)]
var.genes.total = setdiff(var.genes.total, MT_genes)
}else{
var.genes.file = file.path('../../resources', var.genes)
var.genes.file = file(var.genes.file)
var.genes.total = readLines(var.genes.file)
var.genes.total = as.vector(unique(var.genes.total))
var.genes.total = var.genes.total[var.genes.total != '']
close(var.genes.file)
}
# saving the genes to disk
print("Heavy computing finished. Next saving to output...")
print("calculating var_gene_expression")
# cluster genes based on their min-max normalized values
var_gene_expression = as.matrix(seurat.obj@data[var.genes.total, order(pseudotime$Pseudotime)])
var_gene_expression = t(apply(var_gene_expression, 1, adaptive.moving_average, kernel = 15, minim_kernel = 1, range.factor=15))
# min-max normalization
var_gene_min = apply(var_gene_expression, 1, min)
var_gene_expression = var_gene_expression - var_gene_min
var_gene_genes_max = apply(var_gene_expression, 1, max)
var_gene_expression = var_gene_expression / var_gene_genes_max
print("clustering genes by level of expression")
# actual clustering of genes
d_matrix = as.dist(1.0 - cor(t(as.matrix(var_gene_expression)), method="spearman"))
genes_clust = hclust(d=d_matrix, method="ward.D2")
genes.in.order = var.genes.total[genes_clust$order]
# plot min-max normalized expression
###################################################################################################
raw_data_genes = as.matrix(seurat.obj@data[rev(genes.in.order), order(pseudotime$Pseudotime)])
raw_data_genes = t(apply(raw_data_genes, 1, adaptive.moving_average, kernel = 15, minim_kernel = 1, range.factor=15))
# min-max normalization
raw_data_genes_min = apply(raw_data_genes, 1, min)
raw_data_genes = raw_data_genes - raw_data_genes_min
raw_data_genes_max = apply(raw_data_genes, 1, max)
raw_data_genes = raw_data_genes / raw_data_genes_max
print("group genes by pdt")
# group by pdt
pdt = range(pseudotime$Pseudotime)
pdt = seq(pdt[1], pdt[2], length.out=100)
pdt_data = c()
for (k in 1:nrow(raw_data_genes)){
for(j in 1:length(pdt)){
local_pdt = pdt[j]
pdt_index = abs(pseudotime$Pseudotime[order(pseudotime$Pseudotime)] - local_pdt)
pdt_index = which(pdt_index == min(pdt_index))
pdt_data = c(pdt_data, raw_data_genes[k, pdt_index])
}
}
pdt_data = matrix(data=pdt_data, nrow=nrow(raw_data_genes), byrow=T)
print("printing nrow pdt_data")
nrow(pdt_data)
print("printing ncol pdt_data")
ncol(pdt_data)
rownames(pdt_data) = rownames(raw_data_genes)
colnames(pdt_data) = paste("PDT", 1:100, sep = "")
#colnames(pdt_data) = paste("PDT", 1:ncol(pdt_data), sep = "")
# smooth a bit the pdt_data matrx
pdt_data = t(apply(pdt_data, 1, ma, kernel = 7))
pdt_data = pdt_data - apply(pdt_data, 1, min)
pdt_data = pdt_data / apply(pdt_data, 1, max)
beautiful_result_norm = reshape2::melt(data=pdt_data)
colnames(beautiful_result_norm) = c("GeneNames", "Pseudotime", "ExpressionValue")
print("preparing to plot genes by expression level")
plot.genes = ggplot(data = beautiful_result_norm, aes(x = Pseudotime, y = GeneNames))
plot.genes = plot.genes + geom_tile(aes(fill = ExpressionValue), width=1.001, height=1.001)
plot.genes = plot.genes + scale_fill_gradient2(low = "deepskyblue", high = "firebrick3", mid = "darkolivegreen3", midpoint = 0.5, name = "Minmax normalized gene expression")
plot.genes = plot.genes + theme(legend.position = "bottom", legend.text = element_text(size = 25, angle = 90),
legend.title = element_text(size = 25),
legend.key.width = unit(2, "cm"),
axis.text.x = element_blank(), axis.title.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 0), axis.text.y = element_text(size = 8))
pdf(file.path(output_folder, "expression_vs_norm_expression.pdf"), width = 13, height = 35)
plot_grid(plot.density, plot.genes, nrow = 2, align = "v", rel_heights = c(1/9, 8/9))
dev.off()
print("plotted expression_vs_norm_expression.pdf in output folder")
# plot non-normalized expression
###################################################################################################
raw_data_genes = as.matrix(seurat.obj@data[rev(genes.in.order), order(pseudotime$Pseudotime)])
print("made raw_data_genes matrix")
raw_data_genes = t(apply(raw_data_genes, 1, adaptive.moving_average, kernel = 15, minim_kernel = 1, range.factor=15))
print("raw_data_genes matrix has applied apply function and t")
# group by pdt
pdt = range(pseudotime$Pseudotime)
pdt = seq(pdt[1], pdt[2], length.out=100)
pdt_data = c()
for (k in 1:nrow(raw_data_genes)){
for(j in 1:length(pdt)){
local_pdt = pdt[j]
pdt_index = abs(pseudotime$Pseudotime[order(pseudotime$Pseudotime)] - local_pdt)
pdt_index = which(pdt_index == min(pdt_index))
pdt_data = c(pdt_data, raw_data_genes[k, pdt_index])
}
}
pdt_data = matrix(data=pdt_data, nrow=nrow(raw_data_genes), byrow=T)
rownames(pdt_data) = rownames(raw_data_genes)
#colnames(pdt_data) = paste("PDT", 1:ncol(pdt_data), sep = "")
colnames(pdt_data) = paste("PDT", 1:100, sep = "")
# smooth a bit the pdt_data matrx
pdt_data = t(apply(pdt_data, 1, ma, kernel = 7))
beautiful_result_nonnorm = reshape2::melt(data=pdt_data)
colnames(beautiful_result_nonnorm) = c("GeneNames", "Pseudotime", "ExpressionValue")
plot.genes = ggplot(data = beautiful_result_nonnorm, aes(x = Pseudotime, y = GeneNames))
plot.genes = plot.genes + geom_tile(aes(fill = ExpressionValue), width=1.001, height=1.001)
plot.genes = plot.genes + scale_fill_gradient2(low = "deepskyblue", high = "firebrick3", mid = "darkolivegreen3", midpoint = mean(range(pdt_data)), name = "Gene expression")
plot.genes = plot.genes + theme(legend.position = "bottom", legend.text = element_text(size = 25, angle = 90),
legend.title = element_text(size = 25),
legend.key.width = unit(2, "cm"),
axis.text.x = element_blank(), axis.title.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 0), axis.text.y = element_text(size = 8))
pdf(file.path(output_folder, "expression_vs_nonnorm_expression.pdf"), width = 13, height = 35)
plot_grid(plot.density, plot.genes, nrow = 2, align = "v", rel_heights = c(1/9, 8/9))
dev.off()
print("plotted genes by expression_vs_nonnorm_expression.pdf")
# save diffusion map coordinates and expression data for found genes
by.pdt.order = order(pseudotime$Pseudotime)
dm.df = read.csv(file.path(output_folder_material, "dm.csv"), row.names = 1, header = F)
dm.df = as.data.frame(dm.df[, 1:3])
dm.df$Labels = factor(seurat.obj@ident, levels = cell.labels)
dm.df$Colours = mapvalues(x = dm.df$Labels, from = cell.labels, to = cell.colours)
dm.df = dm.df[by.pdt.order, ]
colnames(dm.df) = c("DM1", "DM2", "DM3", "Labels", "Colours")
print("writing pdt_and_expression.csv")
expression_data_and_pdt = as.data.frame(t(as.matrix(seurat.obj@data[rev(genes.in.order), by.pdt.order])))
pdt.data = data.frame(Pseudotime = pseudotime[by.pdt.order, c(1)])
pdt.data = cbind(dm.df, pdt.data, expression_data_and_pdt)
pdt.data.fp = file.path(output_folder, "pdt_and_expression.csv")
write.csv(pdt.data, pdt.data.fp, row.names = F)
# make interactive diffusion map
command = sprintf("%s html_3D_viewer_and_plotter.py %s %s", python.addr, file.path(output_folder, "Interactive_Pseudotime.html"), pdt.data.fp)
system(command, wait = T)
# save the plotting material, just in case
plot.data.objects = list(pseudotime = pseudotime, beautiful_result_norm = beautiful_result_norm, beautiful_result_nonnorm = beautiful_result_nonnorm)
saveRDS(plot.data.objects, file.path(output_folder, "ploting_material.RDS"))
unlink(output_folder_material, recursive=T, force=T)
print("Ended beautifully ... ")

View file

@ -0,0 +1,16 @@
#!/bin/bash
#$ -cwd
#$ -N pseudotime
#$ -V
#$ -l h_rt=47:59:59
#$ -l h_vmem=400G
if [ "$#" -ne 1 ]; then
echo "Illegal number of parameters"
exit 1
fi
Rscript pseudotime.R $1
echo "End on `date`"