#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Aug 1 15:18:12 2018 @author: doru """ import sys args = sys.argv CWD = args[1] #split_cat = args[2] from os import chdir chdir(CWD) import matplotlib; matplotlib.use('Agg'); import scanpy.api as sc; import pandas as pd from scipy.sparse import csr_matrix import numpy as np sc.settings.verbosity = 3 scObj = sc.read("./material/raw_data.mtx", cache = False).T # load gene names scObj.var_names = pd.read_csv("./material/genenames.csv").iloc[:, 1] # load cell names scObj.obs_names = pd.read_csv("./material/cellnames.csv").iloc[:, 1] # filter out genes present in less than 3 cells sc.pp.filter_genes(scObj, min_cells=3) # log-normalize the data scObj.raw = sc.pp.log1p(scObj, copy=True) sc.pp.normalize_per_cell(scObj, counts_per_cell_after=1e4) # variable genes filter_result = sc.pp.filter_genes_dispersion( scObj.X, min_mean=0.0125, max_mean=3, min_disp=0.5) # subset data on variable genes scObj = scObj[:, filter_result.gene_subset] # not sure? sc.pp.log1p(scObj) # scale the data sc.pp.scale(scObj, max_value=10) # run pca sc.tl.pca(scObj) # compunte neighborhood graph sc.pp.neighbors(scObj, n_neighbors = 15, n_pcs = 20, knn = True, random_state = 10, method = "gauss") # add cell labels cell_labels = pd.read_csv("./material/cell_labels.csv", index_col = 0) scObj.obs["cell_labels"] = cell_labels # run aga sc.tl.paga(scObj, groups = "cell_labels") # save the scipy paga graph to disk for comparison to the plot generated by ggplot - for trouble shooting sc.pl.paga(scObj, save = "_ugly_scanpy_plot.pdf", show = True, edge_width_scale = .4, solid_edges = "connectivities", layout="fa") #sc.pl.paga(scObj, save = "ugly_scanpy_plot.pdf", show = False, edge_width_scale = .4, solid_edges = "connectivities", # layout = "fa") #sc.pl.paga(scObj, save = "_ugly_scanpy_plot.pdf", show = True, edge_width_scale = .4, solid_edges = "connectivities", layout = "fa") #sc.pl.paga(scObj, save = "{split_cat}_ugly_scanpy_plot.pdf".format(split_cat=split_cat), show = True, edge_width_scale = .4, solid_edges = "connectivities", layout = "fa") #sc.pl.paga(scObj, save = "{split_cat}_ugly_scanpy_plot.pdf".format(split_cat=split_cat), # show = True, edge_width_scale = .4, solid_edges = "connectivities", # layout = "fa") # layout = "fa" # prepare the output and save it to disk cell_cats = list(scObj.obs["cell_labels"].cat.categories) population_size = cell_labels["Labels"].value_counts() population_size = population_size[cell_cats].values connectivities = np.array(csr_matrix.todense(scObj.uns["paga"]["connectivities"]), dtype = "float64") connectivities[connectivities < .05] = 0.0 #connectivities[connectivities < .1] = 0.0 connectivities = pd.DataFrame(connectivities, columns = cell_cats, index = cell_cats) connectivities.to_csv("./AGA_folder/connectivities.csv", index = True, header = True) coordinates = pd.DataFrame(scObj.uns["paga"]["pos"], columns = ["X", "Y"], index = cell_cats) coordinates["Size"] = pd.Series(population_size, coordinates.index) coordinates.to_csv("AGA_folder/coordinates.csv", index = True, header = True)