scRNA-seq_analysis

This commit is contained in:
veghp 2019-07-08 12:22:01 +01:00
commit 82cc2d191e
188 changed files with 146184 additions and 0 deletions

View file

@ -0,0 +1,90 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 22 11:03:12 2018
@author: doru
"""
# argument variables
import sys
output_folder = sys.argv[1]
from os.path import join
# file names
material_folder = join(output_folder, "AGA_folder")
save_to = join(output_folder, 'AGAlinkage_map_{cat}.html'.format(cat = sys.argv[2]))
colors_fname = join(material_folder, 'colours.csv')
connectivities_fname = join(material_folder, 'connectivities.csv')
coordinates_fname = join(material_folder, 'coordinates.csv')
# read data from files in csv formatr
import pandas as pd
connectivities = pd.read_csv(connectivities_fname, index_col = 0, header = 0)
coordinates = pd.read_csv(coordinates_fname, index_col = 0, header = 0)
try:
colors = pd.read_csv(colors_fname, index_col = 0, header = 0)
except FileNotFoundError:
cell_types = connectivities.columns
import random
cell_types = [f for f in connectivities.columns]
colours = []
for cell_type in cell_types:
r = lambda: random.randint(0,255)
col = '#%02X%02X%02X' % (r(),r(),r())
colours.append({'CellTypes': cell_type, 'Colours': col})
colors = pd.DataFrame(colours)
colors = colors.set_index('CellTypes')
scaleScale = 1.4
minX = coordinates.min()[0] * scaleScale
minY = coordinates.min()[1] * scaleScale
maxX = coordinates.max()[0] * scaleScale
maxY = coordinates.max()[1] * scaleScale
# prepare the coordinates and colors data
cell_names = list(coordinates.index)
cell_sizes = coordinates.Size.tolist()
# reorder cell names by population size - so during drawing smaller cell population are not covered by bigger bubbles
cell_names = [cell_name for [cell_size, cell_name] in sorted(zip(cell_sizes, cell_names), reverse = True)]
data_coordinates = []
for cell_name in cell_names:
row_data = coordinates.loc[cell_name]
X, Y, R = row_data.X, row_data.Y, row_data.Size
X = (X - minX) / (maxX - minX);
Y = (Y - minY) / (maxY - minY);
color = colors.loc[cell_name].Colours
indata = 'data_coordinates["{cell_name}"] = [{X}, {Y}, {R}, "{C}"]'.format(cell_name = cell_name,
X = X, Y = Y, R = R, C = color)
data_coordinates.append(indata)
data_coordinates = '\n'.join(data_coordinates)
# prepare edge thickness data
data_edges = []
# rearrange connectivities by order of cell name
for cell_name in cell_names:
indata = connectivities[cell_name][cell_names].tolist()
indata = ','.join([str(i) for i in indata])
indata = 'data_edges["{cell_name}"] = [{indata}]'.format(cell_name = cell_name, indata = indata)
data_edges.append(indata)
data_edges = '\n'.join(data_edges)
# make cell_names array
cell_names = ['"{cell_name}"'.format(cell_name = cell_name) for cell_name in cell_names]
cell_names = ','.join(cell_names)
cell_names = 'cell_names = [{cell_names}]'.format(cell_names = cell_names)
# prepare all the data
data = '\n'.join([data_coordinates, data_edges, cell_names])
template_fobj = open('template_for_AGA_app.html', 'r')
template = template_fobj.read();
template_fobj.close()
# insert data in template
template = template.replace('// insert data here', data)
# save interactive page
with open(save_to, 'w') as save_fobj:
save_fobj.write(template)

View file

@ -0,0 +1,134 @@
args = commandArgs(trailingOnly=T)
option.file = args[1]
# create required folders for output and work material
output_folder = gsub(pattern="^\\d+_", replacement="", x=basename(getwd()))
c.time = Sys.time()
c.time = gsub(pattern=" BST", replacement="", x=c.time)
c.time = gsub(pattern=":", replacement="", x=c.time)
c.time = gsub(pattern=" ", replacement="", x=c.time)
c.time = gsub(pattern="-", replacement="", x=c.time)
c.time = substr(x=c.time, start=3, stop=nchar(c.time))
output_folder = paste(output_folder, c.time, sep = "_")
output_folder = file.path("../../output", output_folder)
dir.create(output_folder)
source("../../tools/bunddle_utils.R")
library(Seurat)
library(RColorBrewer)
library(plyr)
library(dplyr)
library(magrittr)
#######################################################################################################
# parse options
option.file = file(option.file, "r")
option_lines = readLines(option.file)
close(option.file)
#remove comments
option_lines = option_lines[-grep(pattern="^#", x=option_lines)]
# split options into blocks
option_lines =paste(option_lines, collapse = "@@@")
option_blocks = option_lines %>% strsplit(split="name: ") %>% unlist
option_blocks = option_blocks[option_blocks != ""]
# loop through each block of options, load the data sets and make the AGAs
for(k in seq_along(option_blocks)){
option_block = option_blocks[k] %>% strsplit(split="@@@") %>% unlist
print(sprintf("Making AGA for %s", option_block[1]))
AGA_save_to = file.path(output_folder, option_block[1])
dir.create(AGA_save_to)
output_folder_material = file.path(AGA_save_to, "material")
AGA_folder = file.path(AGA_save_to, "AGA_folder")
dir.create(output_folder_material)
dir.create(AGA_folder)
# load data sets
data_files = option_block[grep(pattern="^data", x=option_block)] %>% gsub(pattern="^data.: ", replacement="")
set.idents = option_block[grep(pattern="^set.ident", x=option_block)] %>% gsub(pattern="^set.ident..: ", replacement="")
label.tags = option_block[grep(pattern="^tag", x=option_block)] %>% gsub(pattern="^tag.: ", replacement="")
categories = option_block[grep(pattern="^categories", x=option_block)]
data.list = list()
for(i in seq_along(data_files)){
data.file = file.path("../../data", data_files[i])
print(sprintf("Loading %s", data.file))
seurat.obj = readRDS(data.file)
seurat.obj %<>% SetAllIdent(id=set.idents[i])
cell.labels = categories[i] %>% strsplit(split=": ") %>% unlist %>% (function(x)x[2]) %>% strsplit(split=", ") %>% unlist
seurat.obj %<>% SubsetData(ident.use=cell.labels)
seurat.obj@meta.data$AGA_labels = paste(paste(label.tags[i], "::", sep=""), as.vector(seurat.obj@ident), sep="")
eval(parse(text=sprintf("data.list$data%s = seurat.obj", i)))
}
print("Subsetting and merging datasets ...")
seurat.obj = Reduce(f=MergeSeurat, x=data.list)
seurat.obj %<>% SetAllIdent(id="AGA_labels")
write.csv(data.frame(Cells = names(seurat.obj@ident), Labels = seurat.obj@ident), file.path(output_folder_material, "cell_labels.csv"), row.names = F)
# save raw data to disk
raw_data = seurat.obj@raw.data
raw_data = raw_data[rownames(seurat.obj@data), colnames(seurat.obj@data)]
writeMM(raw_data, file.path(output_folder_material, "raw_data.mtx"))
# save gene names
gene_names = rownames(raw_data)
write.csv(data.frame(Genes = gene_names), file.path(output_folder_material, "genenames.csv"))
# save cell names
cell_names = colnames(raw_data)
write.csv(data.frame(Cells = cell_names), file.path(output_folder_material, "cellnames.csv"))
# write cell labels to disk
write.csv(data.frame(Cells = names(seurat.obj@ident), Labels = seurat.obj@ident), file.path(output_folder_material, "cell_labels.csv"), row.names = F)
# running AGA
command = file.path(tool_addr, "AGA/AGA_from_Seurat.py")
command = paste(paste(python.addr, command, sep = " "), AGA_save_to, sep = " ")
command = paste(command, option_block[1], sep =" ")
system(command, wait = T)
# read the AGA output
coordinates = read.csv(file.path(AGA_folder, "coordinates.csv"), row.names = 1)
connectivities = read.csv(file.path(AGA_folder, "connectivities.csv"), row.names = 1)
colnames(connectivities) = rownames(connectivities)
cell.labels = rownames(coordinates)
cell.colours = sample(colorRampPalette(brewer.pal(12, "Paired"))(length(cell.labels)))
######## now make the interactive AGA app
#########################################
print("Making the AGA app ... ")
# save colours
colours.df = data.frame(CellTypes = cell.labels, Colours = cell.colours)
write.csv(colours.df, file.path(AGA_folder, "colours.csv"), row.names = F)
# run python to built the AGA app
command = sprintf("%s make_AGA_app.py %s %s", python.addr, AGA_save_to, option_block[1])
system(command, wait = T)
AGA.file = file.path(AGA_save_to, paste(c("AGAlinkage_map_", option_block[1], ".html"), collapse = ""))
AGA.final.destination = file.path(output_folder, paste(c("AGAlinkage_map_", option_block[1], ".html"), collapse = ""))
file.rename(from=AGA.file, to=AGA.final.destination)
# make FDG
print("Making force directed graph interactive app ...")
seurat.obj = FindVariableGenes(object = seurat.obj, mean.function = ExpMean,
dispersion.function = LogVMR, x.low.cutoff = .0125,
x.high.cutoff = 3, y.cutoff = .625, do.plot=F)
seurat.obj = ScaleData(object=seurat.obj)
seurat.obj = RunPCA(object = seurat.obj, pc.genes = seurat.obj@var.genes, do.print = FALSE)
seurat.obj = BuildSNN(object=seurat.obj, reduction.type="pca", dims.use=1:20, plot.SNN=F, force.recalc=T)
fdg_coordinates = runFDG(pca.df=seurat.obj@dr$pca@cell.embeddings, snn=seurat.obj@snn, iterations=2000, tool_addr=tool_addr, python.addr=python.addr)
seurat.obj = SetDimReduction(object=seurat.obj, reduction.type="fdg", slot="cell.embeddings", new.data=as.matrix(fdg_coordinates))
seurat.obj = SetDimReduction(object=seurat.obj, reduction.type="fdg", slot = "key", new.data = "fdg")
interactive_plot_df = data.frame(X = seurat.obj@dr$fdg@cell.embeddings[, 1],
Y = seurat.obj@dr$fdg@cell.embeddings[, 2])
interactive_plot_df$Labels = factor(seurat.obj@ident, levels = cell.labels)
interactive_plot_df$Colours = mapvalues(x = interactive_plot_df$Labels, from = cell.labels, to = cell.colours)
interactive_fdg_filename = file.path(output_folder, paste(paste("Interactive_FDG", option_block[1], sep = "_"), "html", sep = "."))
make_2D_interactive_page(data_frame_2D=interactive_plot_df, tool_addr=tool_addr, python.addr=python.addr, save.to=interactive_fdg_filename)
unlink(AGA_save_to, recursive=T, force=T)
}
print("Ended beautifully ... ")

View file

@ -0,0 +1,11 @@
#!/bin/bash
#$ -cwd
#$ -N multiple_AGAs
#$ -V
#$ -l h_rt=23:59:59
#$ -l h_vmem=100G
Rscript multiple_AGAs.R $1
echo "End on `date`"

View file

@ -0,0 +1,29 @@
################################################################################
# making AGA on HSC and lymphoid in bone marrow and liver
# pDCs are also included
name: lymphoids_bm_lv
data1: bonemarrow_reference_annotation_10X.RDS
data2: liver_F21F22F23_reference_annotation_10X.RDS
set.ident.1: cell.labels
set.ident.2: cell.labels
tag1: BM
tag2: LV
categories1: HSPC, pro-B, pre-B, T cell, naïve B cell, pDC, pre pDC
categories2: pro-B cell, B cell, pre-B cell, NK, Pre pro B cell, HSC/MPP, pDC precursor, ILC progenitor
################################################################################
# making AGA on HSC and a few myeloids in bone marrow, liver
# pDCs are also included
name: myeloids_bm_liver
data1: bonemarrow_reference_annotation_10X.RDS
data2: liver_F21F22F23_reference_annotation_10X.RDS
data3: spleen_reference_annotation_10X.RDS
set.ident.1: cell.labels
set.ident.2: cell.labels
set.ident.3: cell.labels
tag1: BM
tag2: LV
tag2: SP
categories1: myeloid precursor, HSPC, tissue macrophage, pDC, DC2
categories2: Mono-Mac, DC2, Monocyte, HSC/MPP
categories3: cDC2, HSC, Macrophage, Neut_Myeloid
################################################################################

View file

@ -0,0 +1,215 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Interactive linkage plot</title>
<meta name="description" content="An interactive plot for the linkage map">
<meta name="author" content="Dorin-Mirel Popescu">
</head>
<body>
<ul>
<li>Bubble size reflects population size; Edge thickness reflects connectivity scores;</li>
<li>Use the sliders to set plotting parameters;</li>
<li>Click the canvas area to select a cell population and reposition it by dragging;</li>
<li>Plot can be saved by right click on canvas area and choose 'Save as'; For higher image resolution increase canvas area, font size and scales before saving;</li>
</ul>
<table>
<tr>
<td>Canvas width</td><td>Canvas height</td><td>Size scale</td><td>Edge scale</td><td>Edge threshold</td><td>Font size</td>
<tr>
<td><input type = 'range' min = '100' max = '3000' value = '500' onchange = 'setWidth(this.value)' /></td>
<td><input type = 'range' min = '100' max = '3000' value = '500' onchange = 'setHeight(this.value)' /></td>
<td><input type = 'range' min = '0' max = '300' value = '10' onchange = 'setSizeScale(this.value)' /></td>
<td><input type = 'range' min = '.1' max = '15' step = '.1' value = '5' onchange = 'setEdgeScale(this.value)' /></td>
<td><input type = 'range' min = '0' max = '1' step = '.001' value = '0' onchange = 'setEdgeThreshold(this.value)'/></td>
<td><input type = 'range' min = '5' max = '80' value = '10' step = '1' onchange = 'setFontSize(this.value)' /></td>
</tr>
</tr>
</table>
<canvas id = 'canvas' width = '500' height = '500'></canvas>
<script type = 'text/javascript'>
// global parameters
var canvas = document.getElementById('canvas'),
canvasW = 500,
canvasH = 500,
sizeScale = .1,
edgeScale = 5,
edgeT = 0,
fontSize = 10,
context = canvas.getContext('2d'),
mouseX = 0,
mouseY = 0,
currentX = 0,
currentY = 0,
selectedX = 0,
selectedY = 0,
selectedPopulation = null;
// data placeholders
var data_coordinates = [], // for each cell name include x coordinate, y coordinate, and radius values
data_edges = [], // for each cell name include and array of edge values
data_composition = []; // for each cell name include 8 numbers (first 4 for male gender, last 4 for female gender)
// insert data here
// function to set the width of canvas. called from slider
function setWidth(value){
canvasW = parseFloat(value)
canvas.width = canvasW
context = canvas.getContext('2d')
draw()
}
// function to set height of canvas. called from slider
function setHeight(value){
canvasH = parseFloat(value)
canvas.height = canvasH
context = canvas.getContext('2d')
draw()
}
// function to set bubble size scale. called from slider
function setSizeScale(value){
sizeScale = parseFloat(value) / 100
draw()
}
// function to set edge scale. called from slider
function setEdgeScale(value){
edgeScale = parseFloat(value)
draw()
}
// function to set edge theshold. Any edge smaller than this threshold will not be drawn. called from slider
function setEdgeThreshold(value){
edgeT = parseFloat(value)
draw()
}
// function to set font size of cell name labels in the plot. called from slider
function setFontSize(value){
fontSize = parseInt(value)
draw()
}
// function to draw the canvas
function draw(){
// clear canvas by drawing a rectangle
context.fillStyle = '#efefef'
context.fillRect(0, 0, canvas.width, canvas.height)
// loop through all the cell name and draw their coresponding bubble reflect population size and write the label above the bubble
for (key in data_coordinates){
// get bubble parameters
var bubble_data = data_coordinates[key],
bubbleX = canvasW * bubble_data[0],
bubbleY = canvasH * (1 - bubble_data[1]),
bubbleA = sizeScale * bubble_data[2],
bubbleR = Math.sqrt(bubbleA),
bubbleC = bubble_data[3];
// draw edges
var edges = data_edges[key]
context.strokeStyle = '#888888'
edges.forEach(function(edgeVal, i){
if (edgeVal > edgeT){
var connectingCellName = cell_names[i],
connectingBubble = data_coordinates[connectingCellName],
endX = canvasW * connectingBubble[0],
endY = canvasH * (1 - connectingBubble[1])
edgeVal *= edgeScale
context.lineWidth = edgeVal;
context.beginPath()
context.moveTo(bubbleX, bubbleY)
context.lineTo(endX, endY)
context.stroke()
}
})
}
// loop through all values in connectivities and draw corresponding edges if great the edge threshold
for (key in data_coordinates){
// get bubble parameters
var bubble_data = data_coordinates[key],
bubbleX = canvasW * bubble_data[0],
bubbleY = canvasH * (1 - bubble_data[1]),
bubbleA = sizeScale * bubble_data[2],
bubbleR = Math.sqrt(bubbleA),
bubbleC = bubble_data[3];
// draw bubble
context.fillStyle = bubbleC
context.beginPath()
context.arc(bubbleX, bubbleY, bubbleR, 0, 2 * Math.PI, false)
context.fill()
// write cell name
context.fillStyle = 'black';
context.font = parseInt(fontSize) + 'px arial'
context.textAlign = 'center'
context.textBaseline = 'Alphabetical'
context.fillText(key, bubbleX, bubbleY - bubbleR - 2)
}
}
// function that takes an event as input and return x, y values of mouse cursor
function getEventCoordinates(event){
var canvasRect = canvas.getBoundingClientRect(),
X = event.clientX - canvasRect.x,
Y = event.clientY - canvasRect.y;
return [X, Y]
}
// function that stops dragging of selected cell name
function stopDraging(event){
// first draw the data point at dropping location
dragDataPoint(event)
// remove dragDataPoint from canvas event listeners
canvas.removeEventListener('mousemove', dragDataPoint)
// remove stopDraging from canvas event listeners
canvas.removeEventListener('mouseup', stopDraging)
}
// function that drags a selected bubble to follow the movement of the cursor
function dragDataPoint(event){
var XY = getEventCoordinates(event)
currentX = XY[0];
currentY = XY[1];
var dx = (mouseX - currentX) / canvasW,
dy = (mouseY - currentY) / canvasH;
// reset coordinates of selected data point
data_coordinates[selectedPopulation][0] = selectedX - dx;
data_coordinates[selectedPopulation][1] = selectedY + dy;
// then draw
draw()
}
// draw the canvas and add the event listeners only when the entire document is loaded
window.onload = function(){
draw()
canvas.addEventListener('mousedown', function(event){
var XY = getEventCoordinates(event),
hit = false;
mouseX = XY[0];
mouseY = XY[1];
// loop through all the data poins and check for hit
for (key in data_coordinates){
var bubble_data = data_coordinates[key],
bubbleX = canvasW * bubble_data[0],
bubbleY = canvasH * (1 - bubble_data[1]),
bubbleA = sizeScale * bubble_data[2],
bubbleR = Math.sqrt(bubbleA),
dx = mouseX - bubbleX,
dy = mouseY - bubbleY,
distance = Math.sqrt(Math.pow(dx, 2) + Math.pow(dy, 2))
if (distance < bubbleR){
hit = true;
selectedPopulation = key;
selectedX = data_coordinates[selectedPopulation][0]
selectedY = data_coordinates[selectedPopulation][1]
}
}
if (hit){
canvas.addEventListener('mousemove', dragDataPoint)
canvas.addEventListener('mouseup', stopDraging)
}else{selectedPopulation = null}
})
}
</script>
</body>
</html>