From eaebd3f0b05a7d7b72a008b5e403a32b063a66b5 Mon Sep 17 00:00:00 2001 From: veghp Date: Fri, 13 Sep 2019 16:33:04 +0100 Subject: [PATCH] Added pipeline 91b --- README.md | 6 + .../compile_app.py | 52 ++ .../interactive_heatmap_dotplot.R | 25 + .../interactive_heatmap_dotplot.sh | 16 + .../options/options_csv.txt | 4 + .../91b_interactive_heatmap_csv/template.html | 775 ++++++++++++++++++ 6 files changed, 878 insertions(+) create mode 100755 pipelines/91b_interactive_heatmap_csv/compile_app.py create mode 100755 pipelines/91b_interactive_heatmap_csv/interactive_heatmap_dotplot.R create mode 100755 pipelines/91b_interactive_heatmap_csv/interactive_heatmap_dotplot.sh create mode 100755 pipelines/91b_interactive_heatmap_csv/options/options_csv.txt create mode 100755 pipelines/91b_interactive_heatmap_csv/template.html diff --git a/README.md b/README.md index 5c44b57..62c963d 100755 --- a/README.md +++ b/README.md @@ -589,6 +589,12 @@ This repository contains tools for making web portals and interactive tools used * line 5: a short line describing the data which will be included in the interactive page * IMPORTANT NOTICE: if the vector partitioning the data (i.e. meta data column) is using integer indices (e.g. Louvain clustering which assigns integer identifies to clusters) it is highly recommended to pre-append the tag "Cluster\_" to all indices (e.g. "1" and "103" becomes "Cluster\_1" and "Cluster\_103" respectively). Failure to do so will not raise any errors, but the resulting interactive heatmap/dot plot will have glitches. +* A version of this script (91b) creates a html file from an csv file containing expression levels (columns: genes, rows: cell types; first datacell is empty). For this, the option file format is: + * line 1: expressions csv data filepath + * line 2: name of output folder + * line 3: name of interactive html page + * line 4: a short line describing the data + ### pseudotime_webportal.sh * see an example [here](https://developmentcellatlas.ncl.ac.uk/datasets/pseudotime_liver_blin/) * this creates a web portal useful for exploring the results of a trajectory analysis diff --git a/pipelines/91b_interactive_heatmap_csv/compile_app.py b/pipelines/91b_interactive_heatmap_csv/compile_app.py new file mode 100755 index 0000000..b100311 --- /dev/null +++ b/pipelines/91b_interactive_heatmap_csv/compile_app.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +import sys +args = sys.argv +options_file = args[1] + +with open(options_file, "r") as options: + options_fields = options.readlines() + csv_filename = options_fields[0].strip() + output_dir = options_fields[1].strip() + save_to = options_fields[2].strip() + data_name = options_fields[3].strip() + +# function to truncated floats to 3 digits - for memory efficiency +def truncateFloat(val): + return round(val, 2) + +# join save_to to output_dir and create file name for expression data csv file +from os.path import join +save_to = join(output_dir, save_to) + +# open the required csv file +import pandas as pd +expression_data = pd.read_csv(csv_filename, index_col = 0, header = 0) +expression_data = expression_data.apply(truncateFloat) + +# populate data +gene_names = expression_data.columns.values +cell_names = expression_data.index.values +data = [] +for gene_name in gene_names: + indata = expression_data[gene_name][cell_names].values + indata = [str(d) for d in indata] + indata = ','.join(indata) + indata = 'expression_data["{gene_name}"] = [{values}]'.format(gene_name = gene_name, values = indata) + data.append(indata) +data = '\n'.join(data) +cell_names = ['"{cell_name}"'.format(cell_name = cell_name) for cell_name in cell_names] +cell_names = ','.join(cell_names) +cell_names = 'cell_names = [{values}]'.format(values = cell_names) + +data_name_var = "dataset_name = '{dataname}'".format(dataname = data_name) +data = '\n'.join([data, cell_names, data_name_var]) + +# open template and insert data +template_addr = 'template.html' +template_fobj = open(template_addr, 'r') +template = template_fobj.read() +template_fobj.close() + +with open(save_to, 'w') as save_to_fobj: + template = template.replace('// insert data here', data) + save_to_fobj.write(template) diff --git a/pipelines/91b_interactive_heatmap_csv/interactive_heatmap_dotplot.R b/pipelines/91b_interactive_heatmap_csv/interactive_heatmap_dotplot.R new file mode 100755 index 0000000..1cadf3d --- /dev/null +++ b/pipelines/91b_interactive_heatmap_csv/interactive_heatmap_dotplot.R @@ -0,0 +1,25 @@ +library(Seurat) +library(methods) + +python.addr = 'python' + +args = commandArgs(trailingOnly=T) +options_file = args[1] + +options_fobj = file(options_file, 'r') +options_fields = readLines(options_fobj) +close(options_fobj) + +file_name = options_fields[1] +output_folder = options_fields[2] +save_to = options_fields[3] +data_name = options_fields[4] + +dir.create(output_folder) + +# start the python script +command = sprintf('%s compile_app.py %s', python.addr, options_file) +system(command, wait = T) + +# end +print('Ended beautifully') diff --git a/pipelines/91b_interactive_heatmap_csv/interactive_heatmap_dotplot.sh b/pipelines/91b_interactive_heatmap_csv/interactive_heatmap_dotplot.sh new file mode 100755 index 0000000..7f32e03 --- /dev/null +++ b/pipelines/91b_interactive_heatmap_csv/interactive_heatmap_dotplot.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +#$ -cwd +#$ -N interactive_heatmap_dotplot +#$ -V +#$ -l h_rt=47:59:59 +#$ -l h_vmem=100G + +if [ "$#" -ne 1 ]; then + echo "Illegal number of parameters" + exit 1 +fi + +Rscript interactive_heatmap_dotplot.R $1 + +echo "End on `date`" diff --git a/pipelines/91b_interactive_heatmap_csv/options/options_csv.txt b/pipelines/91b_interactive_heatmap_csv/options/options_csv.txt new file mode 100755 index 0000000..fba5404 --- /dev/null +++ b/pipelines/91b_interactive_heatmap_csv/options/options_csv.txt @@ -0,0 +1,4 @@ +../../data/expression.csv +interactive_gene_expression_heatmap_from_csv +interactive_gene_expression_heatmap.html +Test data \ No newline at end of file diff --git a/pipelines/91b_interactive_heatmap_csv/template.html b/pipelines/91b_interactive_heatmap_csv/template.html new file mode 100755 index 0000000..c47269b --- /dev/null +++ b/pipelines/91b_interactive_heatmap_csv/template.html @@ -0,0 +1,775 @@ + + + + + + + Interactive heatmap + + + + + + +
+
Data set:
+
Description: An interactive environment for exploring the expression of multiple genes in a data set using dot plots and heatmaps.
+
+ Instructions: +
    +
  • Switch between cell types on x axis and gene names on x axis using the drop-down menu at "Layout"
  • +
  • Switch between dot plot and heatmap using the drop down menu at "Plot type"
  • +
  • The "Cell type selection menu" allows switching on/off of particular cell types. Changes will be reflected in the plots immediately. If there are too many cell types, use to horizontal scrollbar to navigate the list of cell types.
  • +
  • In the "Cell type selection menu" there is also the possibility to write or copy/paste the list desired cell types
  • +
  • The "Gene selection menu" contains functionalities for editing the list of genes that are plotted
  • +
  • The "Add new gene" adds a randomly chosen gene to the list. The plots are redraw to include the additional gene
  • +
  • Alternatively a list of genes can be pasted in the text box at right of the button mentioned above. The list can have the gene names separated by comma, space, semicolon or tab
  • +
  • Each gene being plotted has a dedicated field which indicates its name and a "Remove" button
  • +
  • By changing the gene name in any of this boxes, if the new typed name is a valid gene name then the plots will be update to reflect this change
  • +
  • Finally the user can click-chose any gene or cell type directly on the plot and reorder them by dragging
  • +
  • Plots can be saved as png files by right clicking on plot area and choosing "Save as"
  • +
  • At the bottom of the page there is a list of genes and a list of cell types used in the plot. These lists can be copy/pasted and saved for easy restoring of the plots in a future session.
  • +
+
+
Applications: FACS sorting panel design; data annotation; cell type validation; exploring groups of genes relevant to a function (e.g. cycling genes)
+

+
+
Layout: + Plot type: +
+ + +
+
+
List of cell types: +
+
+
List of genes: +
+
+
+ + +