mirror of
https://github.com/haniffalab/scRNA-seq_analysis.git
synced 2024-10-23 08:29:24 -07:00
scRNA-seq_analysis
This commit is contained in:
commit
82cc2d191e
188 changed files with 146184 additions and 0 deletions
90
pipelines/13_pseudotime/pdt_scanpy.py
Executable file
90
pipelines/13_pseudotime/pdt_scanpy.py
Executable file
|
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Tue Aug 14 15:01:36 2018
|
||||
|
||||
@author: doru
|
||||
"""
|
||||
print("starting .py script")
|
||||
|
||||
import sys
|
||||
args = sys.argv
|
||||
root_cell_type = args[1]
|
||||
CWD = args[2]
|
||||
print("printing args")
|
||||
print(args)
|
||||
args
|
||||
# use the args below if you have a root cell type containing spaces and @@'s
|
||||
#root_cell_type = args[1] + " " + args[2]
|
||||
#CWD = args[3]
|
||||
|
||||
import matplotlib; matplotlib.use('Agg');
|
||||
import scanpy.api as sc;
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
print("printing root_cell_type")
|
||||
print(root_cell_type)
|
||||
print("printing CWD")
|
||||
print(CWD)
|
||||
|
||||
sc.settings.verbosity = 3
|
||||
|
||||
scObj = sc.read("{CWD}/material/raw_data.mtx".format(CWD=CWD), cache = False).T
|
||||
|
||||
# load gene names
|
||||
scObj.var_names = pd.read_csv("{CWD}/material/genenames.csv".format(CWD=CWD)).iloc[:, 1]
|
||||
|
||||
# load cell names
|
||||
scObj.obs_names = pd.read_csv("{CWD}/material/cellnames.csv".format(CWD=CWD)).iloc[:, 1]
|
||||
|
||||
# add cell labels
|
||||
cell_labels = pd.read_csv("{CWD}/material/cell_labels.csv".format(CWD=CWD), index_col = 0)
|
||||
|
||||
scObj.obs["cell_labels"] = cell_labels
|
||||
|
||||
# filter out genes present in less than 3 cells
|
||||
sc.pp.filter_genes(scObj, min_cells=3)
|
||||
|
||||
# log-normalize the data
|
||||
scObj.raw = sc.pp.log1p(scObj, copy=True)
|
||||
sc.pp.normalize_per_cell(scObj, counts_per_cell_after=1e4)
|
||||
|
||||
# variable genes
|
||||
filter_result = sc.pp.filter_genes_dispersion(
|
||||
scObj.X, min_mean=0.0125, max_mean=3, min_disp=0.5)
|
||||
# subset data on variable genes
|
||||
scObj = scObj[:, filter_result.gene_subset]
|
||||
# not sure?
|
||||
sc.pp.log1p(scObj)
|
||||
|
||||
# scale the data
|
||||
sc.pp.scale(scObj, max_value=10)
|
||||
|
||||
# run pca
|
||||
sc.tl.pca(scObj)
|
||||
|
||||
# compunte neighborhood graph
|
||||
sc.pp.neighbors(scObj, n_neighbors = 15, n_pcs = 20, knn = True, random_state = 10, method = "gauss")
|
||||
|
||||
# compute diffusion map
|
||||
sc.tl.diffmap(scObj, n_comps = 20)
|
||||
|
||||
# set root
|
||||
scObj.uns['iroot'] = np.flatnonzero(scObj.obs['cell_labels'] == root_cell_type)[0]
|
||||
|
||||
# compute dpt
|
||||
print("computing sc.tl.dpt")
|
||||
sc.tl.dpt(scObj, n_dcs = 20)
|
||||
|
||||
# pdt is at scObj.obs["dpt_pseudotime"]
|
||||
print("displaying pdt table stored in scObj")
|
||||
print(scObj.obs["dpt_pseudotime"])
|
||||
pdt = scObj.obs["dpt_pseudotime"].to_csv("{CWD}/material/pseudotime.csv".format(CWD=CWD))
|
||||
|
||||
# save the pseudotime
|
||||
dm = scObj.obsm["X_diffmap"]
|
||||
dm = pd.DataFrame(data = dm, index = None, columns = None)
|
||||
dm.to_csv("{CWD}/material/dm.csv".format(CWD=CWD), columns = None, header = None)
|
||||
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue