mirror of
https://github.com/haniffalab/scRNA-seq_analysis.git
synced 2024-10-23 08:29:24 -07:00
scRNA-seq_analysis
This commit is contained in:
commit
82cc2d191e
188 changed files with 146184 additions and 0 deletions
31
pipelines/93_gene_grouping/clustering.py
Executable file
31
pipelines/93_gene_grouping/clustering.py
Executable file
|
|
@ -0,0 +1,31 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Tue Jan 15 20:52:21 2019
|
||||
|
||||
@author: doru
|
||||
"""
|
||||
|
||||
import sys
|
||||
from os.path import join
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
args = sys.argv
|
||||
output_folder = args[1]
|
||||
no_clusters = int(args[2])
|
||||
|
||||
expression_file = join(output_folder, "expression.csv")
|
||||
expression_df = pd.read_csv(expression_file, index_col = 0)
|
||||
expression = np.transpose(expression_df.values)
|
||||
|
||||
from sklearn.mixture import GaussianMixture
|
||||
clustering = GaussianMixture(n_components = no_clusters, random_state = 19).fit(expression)
|
||||
clustering = clustering.predict(expression)
|
||||
|
||||
# save the output
|
||||
gene_names = list(expression_df.head(0))
|
||||
df = {"GeneNames": gene_names, "Cluster": clustering}
|
||||
df = pd.DataFrame.from_dict(df)
|
||||
|
||||
df.to_csv(join(output_folder, "clustering.csv"))
|
||||
118
pipelines/93_gene_grouping/gene_grouping.R
Executable file
118
pipelines/93_gene_grouping/gene_grouping.R
Executable file
|
|
@ -0,0 +1,118 @@
|
|||
args = commandArgs(trailingOnly=T)
|
||||
args = paste(args, collapse = "")
|
||||
args = unlist(strsplit(args, ";"))
|
||||
|
||||
arguments.list = "
|
||||
seurat.addr.arg = args[1]
|
||||
no_clusters.arg = args[2]
|
||||
"
|
||||
|
||||
python.addr = "python"
|
||||
|
||||
expected_arguments = unlist(strsplit(arguments.list, "\n"))
|
||||
expected_arguments = expected_arguments[!(expected_arguments == "")]
|
||||
|
||||
if(length(args) != length(expected_arguments)){
|
||||
error.msg = sprintf('This pipeline requires %s parameters', as.character(length(expected_arguments)))
|
||||
expected_arguments = paste(unlist(lapply(strsplit(expected_arguments, ".arg"), "[", 1)), collapse = "\n")
|
||||
stop(sprintf('This pipeline requires %s parameters: '))
|
||||
}
|
||||
|
||||
eval(parse(text = arguments.list))
|
||||
|
||||
for(n in 1:length(expected_arguments)){
|
||||
argument = expected_arguments[n]
|
||||
argument = gsub(pattern=" ", replacement="", x=argument)
|
||||
argument.name = unlist(strsplit(argument, "="))[1]
|
||||
variable.name = gsub(pattern=".arg", replacement="", argument.name)
|
||||
argument.content = eval(parse(text = argument.name))
|
||||
eval(parse(text = argument.content))
|
||||
if (!exists(variable.name)){
|
||||
stop(sprintf("Argument %s not passed. Stopping ... ", variable.name))
|
||||
}
|
||||
}
|
||||
|
||||
# create required folders for output and work material
|
||||
output_folder = gsub(pattern="^\\d+_", replacement="", x=basename(getwd()))
|
||||
output_folder = paste(output_folder, seurat.addr, sep = "_")
|
||||
c.time = Sys.time()
|
||||
c.time = gsub(pattern=" BST", replacement="", x=c.time)
|
||||
c.time = gsub(pattern=":", replacement="", x=c.time)
|
||||
c.time = gsub(pattern=" ", replacement="", x=c.time)
|
||||
c.time = gsub(pattern="-", replacement="", x=c.time)
|
||||
c.time = substr(x=c.time, start=3, stop=nchar(c.time))
|
||||
output_folder = paste(output_folder, c.time, sep = "_")
|
||||
output_folder = file.path("../../output", output_folder)
|
||||
dir.create(output_folder)
|
||||
|
||||
library(Seurat)
|
||||
library(RColorBrewer)
|
||||
library(dplyr)
|
||||
library(plyr)
|
||||
|
||||
#######################################################################################################
|
||||
|
||||
# load data
|
||||
print("loading data ... ")
|
||||
seurat.obj = readRDS(seurat.addr)
|
||||
print("Data loaded.")
|
||||
|
||||
# check if LouvainClustering is present
|
||||
if ("LouvainClustering" %in% colnames(seurat.obj@meta.data)){
|
||||
print("Identifying gene outliers but first need to aggregate gene expression by clusters")
|
||||
seurat.obj = SetAllIdent(object=seurat.obj, id="LouvainClustering")
|
||||
no.genes = nrow(seurat.obj@data)
|
||||
start_index = 1
|
||||
while (start_index < no.genes){
|
||||
end_index = start_index + 999
|
||||
end_index = min(end_index, no.genes)
|
||||
expression.data_ = data.matrix(seurat.obj@data[start_index:end_index, ])
|
||||
expression.data_ = t(expression.data_)
|
||||
expression.data_ = as.data.frame(expression.data_)
|
||||
expression.data_ = cbind(data.frame(CellLabels = as.vector(seurat.obj@ident)), expression.data_)
|
||||
expression.data_ = aggregate(expression.data_[2:dim(expression.data_)[2]], list(expression.data_$CellLabels), mean)
|
||||
expression.data_ = cbind(data.frame(CellType = expression.data_$Group.1), expression.data_[, 2:dim(expression.data_)[2]])
|
||||
rownames(expression.data_) = expression.data_$CellType
|
||||
expression.data_ = expression.data_[, 2:ncol(expression.data_)]
|
||||
print(start_index)
|
||||
if (start_index == 1){
|
||||
expression.data = expression.data_
|
||||
}else{
|
||||
expression.data = cbind(expression.data, expression.data_)
|
||||
}
|
||||
start_index = start_index + 1000
|
||||
}
|
||||
# saving the expression matrix
|
||||
write.csv(expression.data, file.path(output_folder, "expression.csv"))
|
||||
# run python script to identify outliers
|
||||
command = sprintf("%s clustering.py %s %s", python.addr, output_folder, no_clusters)
|
||||
system(command, wait = T)
|
||||
# remove the expression csv file
|
||||
file.remove(file.path(output_folder, "expression.csv"))
|
||||
# load gene clustering
|
||||
gene_clustering = read.csv(file.path(output_folder, "clustering.csv"), row.names = 1)
|
||||
# save feature plots
|
||||
gene_names = as.vector(unique(gene_clustering$GeneNames))
|
||||
features_folder = file.path(output_folder, "features")
|
||||
dir.create(features_folder)
|
||||
dr_coordinates = seurat.obj@dr$umap@cell.embeddings
|
||||
for (i in seq_along(gene_names)){
|
||||
gene_name = gene_names[i]
|
||||
png_name = paste(file.path(features_folder, gene_name), "png", sep = ".")
|
||||
dframe = data.frame(X = dr_coordinates[, 1], Y = dr_coordinates[, 2], Expression = seurat.obj@data[gene_name, ])
|
||||
plot.obj = ggplot(dframe, aes(x = X, y = Y, color = Expression))
|
||||
plot.obj = plot.obj + geom_point(size = .5)
|
||||
plot.obj = plot.obj + theme_void() + theme(panel.background = element_rect(fill = 'black', colour = 'black'))
|
||||
plot.obj = plot.obj + scale_colour_gradient(low = "blue", high = "red")
|
||||
png(png_name, width = 500, height = 500)
|
||||
print(plot.obj)
|
||||
dev.off()
|
||||
if (i %% 10 == 0){
|
||||
print(sprintf("%s / %s", i, length(gene_names)))
|
||||
}
|
||||
}
|
||||
}else{
|
||||
print("Data needs to be clustered first")
|
||||
}
|
||||
|
||||
print("Ended beautifully ... ")
|
||||
16
pipelines/93_gene_grouping/gene_grouping.sh
Executable file
16
pipelines/93_gene_grouping/gene_grouping.sh
Executable file
|
|
@ -0,0 +1,16 @@
|
|||
#!/bin/bash
|
||||
|
||||
#$ -cwd
|
||||
#$ -N gene_grouping
|
||||
#$ -V
|
||||
#$ -l h_rt=23:59:59
|
||||
#$ -l h_vmem=100G
|
||||
|
||||
if [ "$#" -ne 1 ]; then
|
||||
echo "Illegal number of parameters"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
Rscript gene_grouping.R $1
|
||||
|
||||
echo "End on `date`"
|
||||
491
pipelines/93_gene_grouping/gene_viewer.py
Executable file
491
pipelines/93_gene_grouping/gene_viewer.py
Executable file
|
|
@ -0,0 +1,491 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Wed Jan 16 19:39:59 2019
|
||||
|
||||
@author: doru
|
||||
"""
|
||||
|
||||
from os.path import join, exists
|
||||
from os import listdir, mkdir
|
||||
from shutil import rmtree
|
||||
import cv2
|
||||
import numpy as np
|
||||
import tkinter as tk
|
||||
from PIL import Image, ImageTk
|
||||
import pandas as pd
|
||||
|
||||
class GeneViewer(object):
|
||||
def __init__(self, feature_addrs, clustering_addr, gene_info_addr):
|
||||
self.feature_addrs = feature_addrs
|
||||
self.clustering_addr = clustering_addr
|
||||
self.gene_info_addr = gene_info_addr
|
||||
|
||||
self.clustering = pd.read_csv(self.clustering_addr)
|
||||
self.gene_info = pd.read_csv(self.gene_info_addr)
|
||||
|
||||
self.clusters = self.clustering.Cluster.unique()
|
||||
np.ndarray.sort(self.clusters)
|
||||
|
||||
self.data = {}
|
||||
for cluster in self.clusters:
|
||||
gene_names = [gene_name for gene_name in self.clustering.GeneNames[self.clustering.Cluster == cluster]]
|
||||
cluster_name = "Cluster_{number}".format(number = cluster)
|
||||
if not exists(join('group_descriptions', cluster_name)):
|
||||
fobj = open(join('group_descriptions', cluster_name), 'w')
|
||||
cluster_description = ""
|
||||
fobj.writelines(cluster_description)
|
||||
else:
|
||||
fobj = open(join('group_descriptions', cluster_name), 'r')
|
||||
cluster_description = fobj.read().strip()
|
||||
fobj.close()
|
||||
self.data[cluster_name] = [gene_names, cluster_description]
|
||||
|
||||
self.current_cluster_index = 0
|
||||
self.current_cluster = int(self.clusters[self.current_cluster_index])
|
||||
self.current_gene_index = 0
|
||||
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
|
||||
|
||||
self.root = tk.Tk()
|
||||
|
||||
self.groups_frame = tk.Frame(self.root)
|
||||
self.genes_frame = tk.Frame(self.root)
|
||||
self.panel_frame = tk.Frame(self.root)
|
||||
self.description_frame = tk.Frame(self.root)
|
||||
|
||||
self.groups_frame.grid( row = 0, column = 0)
|
||||
self.genes_frame.grid( row = 0, column = 1)
|
||||
self.description_frame.grid(row = 0, column = 2, sticky = tk.N)
|
||||
self.panel_frame.grid( row = 0, column = 3)
|
||||
|
||||
self.buttons_frame = tk.Frame(self.panel_frame)
|
||||
self.gene_name_label = tk.Label(self.panel_frame)
|
||||
self.canvas = tk.Canvas(self.panel_frame, width = 500, height = 500)
|
||||
|
||||
self.buttons_frame.grid(row = 0, column = 0)
|
||||
self.gene_name_label.grid(row = 1, column = 0)
|
||||
self.canvas.grid(row = 2, column = 0)
|
||||
|
||||
self.save_button = tk.Button(self.buttons_frame, text = 'Save changes')
|
||||
self.changeGroupAssignment_button = tk.Button(self.buttons_frame, text = 'Change group assignment')
|
||||
self.merge_groups_button = tk.Button(self.buttons_frame, text = 'Merge with')
|
||||
|
||||
self.save_button.grid( row = 0, column = 0)
|
||||
self.changeGroupAssignment_button.grid(row = 0, column = 1)
|
||||
self.merge_groups_button.grid( row = 0, column = 2)
|
||||
|
||||
self.group_label = tk.Label(self.groups_frame)
|
||||
self.group_list_frame = tk.Frame(self.groups_frame)
|
||||
self.group_label.grid( row = 0, column = 0)
|
||||
self.group_list_frame.grid(row = 1, column = 0)
|
||||
|
||||
self.group_list = tk.Listbox(self.group_list_frame, height = 30, exportselection = 0)
|
||||
self.group_list.config(selectmode = tk.SINGLE)
|
||||
self.group_list_scroll = tk.Scrollbar(self.group_list_frame)
|
||||
|
||||
self.group_list_scroll.pack(side = tk.RIGHT, fill = tk.Y)
|
||||
self.group_list.pack()
|
||||
self.group_list.config(yscrollcommand = self.group_list_scroll.set)
|
||||
self.group_list_scroll.config(command = self.group_list.yview)
|
||||
|
||||
self.group_label['text'] = "Gene groups"
|
||||
|
||||
self.gene_label = tk.Label(self.genes_frame)
|
||||
self.gene_list_frame = tk.Frame(self.genes_frame)
|
||||
self.gene_label.grid( row = 0, column = 0)
|
||||
self.gene_list_frame.grid(row = 1, column = 0)
|
||||
|
||||
self.gene_list = tk.Listbox(self.gene_list_frame, height = 30, exportselection = 0)
|
||||
self.gene_list.config(selectmode = tk.SINGLE)
|
||||
self.gene_list_scroll = tk.Scrollbar(self.gene_list_frame)
|
||||
|
||||
self.gene_list_scroll.pack(side = tk.RIGHT, fill = tk.Y)
|
||||
self.gene_list.pack()
|
||||
self.gene_list.config(yscrollcommand = self.gene_list_scroll.set)
|
||||
self.gene_list_scroll.config(command = self.gene_list.yview)
|
||||
|
||||
self.query_frame = tk.Frame(self.description_frame)
|
||||
self.group_description_label = tk.Label(self.description_frame)
|
||||
self.group_description_content = tk.Text(self.description_frame, width = 50, height = 10, borderwidth=2, relief="solid")
|
||||
self.gene_description_label = tk.Label(self.description_frame)
|
||||
self.gene_description_content = tk.Text(self.description_frame, width = 50, height = 20, borderwidth=2, relief="solid")
|
||||
|
||||
self.query_frame.grid( row = 0, column = 0, sticky = tk.W)
|
||||
self.group_description_label.grid( row = 1, column = 0, sticky = tk.W)
|
||||
self.group_description_content.grid(row = 2, column = 0, sticky = tk.W)
|
||||
self.gene_description_label.grid( row = 3, column = 0, sticky = tk.W)
|
||||
self.gene_description_content.grid( row = 4, column = 0, sticky = tk.W)
|
||||
|
||||
self.query_label = tk.Label(self.query_frame)
|
||||
self.query_enter = tk.Entry(self.query_frame, width = 15)
|
||||
|
||||
self.query_label.grid(row = 0, column = 0)
|
||||
self.query_enter.grid(row = 0, column = 1)
|
||||
self.query_label['text'] = 'Enter gene name: '
|
||||
|
||||
self.group_description_label['text'] = "Group description: "
|
||||
self.gene_description_label['text'] = "Gene summary: "
|
||||
|
||||
self.load_image()
|
||||
|
||||
self.update_genes()
|
||||
self.update_groups()
|
||||
|
||||
self.gene_list.bind( "<ButtonRelease-1>", self.select_gene)
|
||||
self.group_list.bind( "<ButtonRelease-1>", self.select_group)
|
||||
self.save_button.bind( "<ButtonRelease-1>", self.save_changes)
|
||||
self.changeGroupAssignment_button.bind("<ButtonRelease-1>", self.changeGroupAssignment)
|
||||
self.merge_groups_button.bind( "<ButtonRelease-1>", self.merge_groups)
|
||||
self.query_enter.bind( "<Return>", self.query_gene_name)
|
||||
|
||||
self.root.bind("<Left>", self.go_to_previous)
|
||||
self.root.bind("<Right>", self.go_to_next)
|
||||
|
||||
self.root.bind("<Up>", self.previous_group)
|
||||
self.root.bind("<Down>", self.next_group)
|
||||
|
||||
self.group_description_content.bind("<KeyRelease>", self.update_group_description)
|
||||
|
||||
self.root.title("Gene expression with grouped genes")
|
||||
self.root.mainloop()
|
||||
|
||||
def load_image(self):
|
||||
# clean the canvas before loading
|
||||
img_addr = join('features', "{gene_name}.png".format(gene_name = self.current_gene))
|
||||
self.img_data = cv2.imread(img_addr)
|
||||
self.img_data = cv2.cvtColor(self.img_data, cv2.COLOR_BGR2RGB)
|
||||
|
||||
self.bg_img = Image.fromarray(self.img_data)
|
||||
self.photo = ImageTk.PhotoImage(image=self.bg_img)
|
||||
self.canvas.create_image(0, 0, image=self.photo, anchor=tk.NW)
|
||||
|
||||
self.gene_name_label['text'] = self.current_gene
|
||||
|
||||
self.write_gene_description()
|
||||
self.write_group_description()
|
||||
|
||||
self.query_enter.delete(0, tk.END)
|
||||
|
||||
def previous_group(self, event):
|
||||
self.current_cluster_index -= 1
|
||||
if self.current_cluster_index < 0:
|
||||
self.current_cluster_index = len(self.clusters) - 1
|
||||
self.current_cluster = self.clusters[self.current_cluster_index]
|
||||
self.current_gene_index = 0
|
||||
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
|
||||
self.update_genes()
|
||||
self.group_list.selection_clear(0, tk.END)
|
||||
self.group_list.select_set(first = self.current_cluster_index)
|
||||
self.group_list.see(self.current_cluster_index)
|
||||
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
|
||||
self.gene_list.delete(0, tk.END)
|
||||
self.update_genes()
|
||||
self.load_image()
|
||||
|
||||
def next_group(self, event):
|
||||
self.current_cluster_index += 1
|
||||
if self.current_cluster_index >= len(self.clusters):
|
||||
self.current_cluster_index = 0
|
||||
self.current_cluster = self.clusters[self.current_cluster_index]
|
||||
self.current_gene_index = 0
|
||||
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
|
||||
self.update_genes()
|
||||
self.group_list.selection_clear(0, tk.END)
|
||||
self.group_list.select_set(first = self.current_cluster_index)
|
||||
self.group_list.see(self.current_cluster_index)
|
||||
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
|
||||
self.update_genes()
|
||||
self.load_image()
|
||||
|
||||
def go_to_previous(self, event):
|
||||
self.current_gene_index -= 1
|
||||
if self.current_gene_index < 0:
|
||||
self.current_gene_index = len(self.data["Cluster_{number}".format(number = self.current_cluster)][0]) - 1
|
||||
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
|
||||
self.gene_name_label['text'] = self.current_gene
|
||||
self.gene_list.selection_clear(0, tk.END)
|
||||
self.gene_list.select_set(first = self.current_gene_index)
|
||||
self.gene_list.see(self.current_gene_index)
|
||||
self.load_image()
|
||||
|
||||
def go_to_next(self, event):
|
||||
self.current_gene_index += 1
|
||||
if self.current_gene_index == len(self.data["Cluster_{number}".format(number = self.current_cluster)][0]):
|
||||
self.current_gene_index = 0
|
||||
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
|
||||
self.gene_name_label['text'] = self.current_gene
|
||||
self.gene_list.selection_clear(0, tk.END)
|
||||
self.gene_list.select_set(first = self.current_gene_index)
|
||||
self.gene_list.see(self.current_gene_index)
|
||||
self.load_image()
|
||||
|
||||
def update_groups(self):
|
||||
for cluster in self.clusters:
|
||||
self.group_list.insert(tk.END, "Group_{number}".format(number = cluster))
|
||||
self.group_list.select_set(0)
|
||||
|
||||
def update_genes(self):
|
||||
self.gene_list.delete(0, tk.END)
|
||||
for gene_name in self.data["Cluster_{number}".format(number = self.current_cluster)][0]:
|
||||
self.gene_list.insert(tk.END, gene_name)
|
||||
self.gene_list.select_set(0)
|
||||
self.gene_label['text'] = "Gene names ({numbers})".format(numbers = len(self.data["Cluster_{number}".format(number = self.current_cluster)][0]))
|
||||
|
||||
def select_gene(self, event):
|
||||
self.current_gene_index = self.gene_list.curselection()[0]
|
||||
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
|
||||
self.load_image()
|
||||
|
||||
def select_group(self, event):
|
||||
self.current_cluster_index = self.group_list.curselection()[0]
|
||||
self.current_cluster = self.clusters[self.current_cluster_index]
|
||||
self.current_gene_index = 0
|
||||
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
|
||||
self.gene_list.delete(0, tk.END)
|
||||
self.update_genes()
|
||||
self.load_image()
|
||||
|
||||
def write_gene_description(self):
|
||||
gene_field = self.gene_info.GeneSymbol == self.current_gene
|
||||
gene_symbol = "Gene symbol: {sym}\n".format(sym = self.current_gene)
|
||||
if len(np.unique(gene_field)) == 1:
|
||||
gene_name = "Gene name: Not available\n"
|
||||
gene_family = "Gene family: Not available\n"
|
||||
reactom_pathway = "Reactom pathway: Not available\n"
|
||||
gene_summary = "Gene summary: Not available\n"
|
||||
else:
|
||||
gene_name = "Gene name: {x_factor}\n".format(x_factor = self.gene_info.GeneName[gene_field].values[0])
|
||||
gene_family = "Gene family: {x_factor}\n".format(x_factor = self.gene_info.GeneFamily[gene_field].values[0])
|
||||
reactom_pathway = "Reactom pathway: {x_factor}\n".format(x_factor = self.gene_info.ReactomPathway[gene_field].values[0])
|
||||
gene_summary = "Gene summary: {x_factor}\n".format(x_factor = self.gene_info.GeneSummary[gene_field].values[0])
|
||||
gene_field = "\n".join([gene_symbol, gene_name, gene_family, reactom_pathway, gene_summary])
|
||||
self.gene_description_content.delete('1.0', tk.END)
|
||||
self.gene_description_content.insert(tk.END, gene_field)
|
||||
|
||||
def write_group_description(self):
|
||||
text_info = self.data["Cluster_{number}".format(number = self.current_cluster)][1]
|
||||
self.group_description_content.delete('1.0', tk.END)
|
||||
self.group_description_content.insert('1.0', text_info)
|
||||
|
||||
def query_gene_name(self, event):
|
||||
query_entry = self.query_enter.get()
|
||||
if query_entry in self.clustering.GeneNames.values:
|
||||
self.current_gene = query_entry
|
||||
self.current_cluster = self.clustering.Cluster[self.clustering.GeneNames == self.current_gene].values[0]
|
||||
self.current_cluster_index = np.where(self.clusters == self.current_cluster)[0][0]
|
||||
self.current_gene_index = self.data["Cluster_{number}".format(number = self.current_cluster_index)][0]
|
||||
self.current_gene_index = self.current_gene_index.index(self.current_gene)
|
||||
self.update_genes()
|
||||
self.gene_list.selection_clear(0, tk.END)
|
||||
self.gene_list.select_set(first = self.current_gene_index)
|
||||
self.gene_list.see(self.current_gene_index)
|
||||
self.group_list.selection_clear(0, tk.END)
|
||||
self.group_list.select_set(first = self.current_cluster_index)
|
||||
self.group_list.see(self.current_cluster_index)
|
||||
self.load_image()
|
||||
else:
|
||||
self.query_enter.delete(0, tk.END)
|
||||
self.query_enter.insert(0, "Gene not found: {entrquery}".format(entrquery = query_entry))
|
||||
|
||||
def update_group_description(self, event):
|
||||
text_info = self.group_description_content.get('1.0', tk.END)
|
||||
self.data["Cluster_{number}".format(number = self.current_cluster)][1] = text_info
|
||||
|
||||
def save_changes(self, event):
|
||||
rmtree("group_descriptions")
|
||||
mkdir("group_descriptions")
|
||||
for cluster in self.clusters:
|
||||
cluster_name = "Cluster_{number}".format(number = cluster)
|
||||
fobj = open(join('group_descriptions', cluster_name), 'w')
|
||||
text_info = self.data[cluster_name][1]
|
||||
fobj.writelines(text_info)
|
||||
fobj.close()
|
||||
self.clustering.to_csv(self.clustering_addr)
|
||||
|
||||
def changeGroupAssignment(self, event):
|
||||
self.new_window = tk.Toplevel(self.root)
|
||||
|
||||
self.new_window.grab_set()
|
||||
|
||||
self.options_frame = tk.Frame(self.new_window)
|
||||
self.actions_frame = tk.Frame(self.new_window)
|
||||
|
||||
self.options = tk.Listbox(self.options_frame, exportselection = 0)
|
||||
self.options.config(selectmode = tk.SINGLE)
|
||||
self.options.pack()
|
||||
|
||||
self.change_assignment_button = tk.Button(self.actions_frame, text = 'Change assignment')
|
||||
self.show_selected_description = tk.Text(self.actions_frame, borderwidth=1, relief="solid", width = 28, height = 10)
|
||||
|
||||
self.change_assignment_button.grid( row = 0, column = 0, sticky = tk.N)
|
||||
self.show_selected_description.grid(row = 2, column = 0, sticky = tk.N)
|
||||
|
||||
for cluster in self.clusters:
|
||||
cluster_name = "Cluster_{number}".format(number = cluster)
|
||||
self.options.insert(tk.END, cluster_name)
|
||||
self.options.select_set(first = 0)
|
||||
self.options.see(0)
|
||||
|
||||
self.show_selected_description.delete('1.0', tk.END)
|
||||
self.show_selected_description.insert('1.0', self.data['Cluster_0'][1])
|
||||
|
||||
self.options_frame.grid(row = 0, column = 0, sticky = tk.N)
|
||||
self.actions_frame.grid(row = 0, column = 1, sticky = tk.N)
|
||||
|
||||
self.new_window.bind("<Up>", self.assignment_up)
|
||||
self.new_window.bind("<Down>", self.assignment_down)
|
||||
self.change_assignment_button.bind("<ButtonRelease-1>", self.change_assignment)
|
||||
|
||||
def assignment_up(self, event):
|
||||
asgn_index = self.options.curselection()[0]
|
||||
asgn_index -= 1
|
||||
if asgn_index < 0:
|
||||
asgn_index = len(self.clusters) - 1
|
||||
self.options.selection_clear(0, tk.END)
|
||||
self.options.select_set(first = asgn_index)
|
||||
self.options.see(asgn_index)
|
||||
self.show_selected_description.delete('1.0', tk.END)
|
||||
self.show_selected_description.insert('1.0', self.data['Cluster_{number}'.format(number = asgn_index)][1])
|
||||
|
||||
def assignment_down(self, event):
|
||||
asgn_index = self.options.curselection()[0]
|
||||
asgn_index += 1
|
||||
if asgn_index >= len(self.clusters):
|
||||
asgn_index = 0
|
||||
self.options.selection_clear(0, tk.END)
|
||||
self.options.select_set(first = asgn_index)
|
||||
self.options.see(asgn_index)
|
||||
self.show_selected_description.delete('1.0', tk.END)
|
||||
self.show_selected_description.insert('1.0', self.data['Cluster_{number}'.format(number = asgn_index)][1])
|
||||
|
||||
def change_assignment(self, event):
|
||||
asgn_index = self.options.curselection()[0]
|
||||
self.clustering.Cluster[self.clustering.GeneNames == self.current_gene] = asgn_index
|
||||
self.data = {}
|
||||
for cluster in self.clusters:
|
||||
gene_names = [gene_name for gene_name in self.clustering.GeneNames[self.clustering.Cluster == cluster]]
|
||||
cluster_name = "Cluster_{number}".format(number = cluster)
|
||||
fobj = open(join('group_descriptions', cluster_name), 'r')
|
||||
cluster_description = fobj.read().strip()
|
||||
fobj.close()
|
||||
self.data[cluster_name] = [gene_names, cluster_description]
|
||||
self.query_enter.delete(0, tk.END)
|
||||
self.query_enter.insert(0, self.current_gene)
|
||||
self.query_gene_name(event)
|
||||
self.new_window.destroy()
|
||||
self.new_window.grab_release()
|
||||
|
||||
def merge_groups(self, event):
|
||||
self.new_window = tk.Toplevel(self.root)
|
||||
|
||||
self.new_window.grab_set()
|
||||
|
||||
self.up_frame = tk.Frame(self.new_window)
|
||||
self.down_frame = tk.Frame(self.new_window)
|
||||
|
||||
self.up_frame.grid( row = 0, column = 0)
|
||||
self.down_frame.grid(row = 1, column = 0)
|
||||
|
||||
self.confirm_merging_button = tk.Button(self.up_frame, text = 'Confirm merging')
|
||||
self.confirm_merging_button.grid(row = 0, column = 0)
|
||||
|
||||
self.set1_listbox = tk.Listbox(self.down_frame, exportselection = 0)
|
||||
self.set1_description = tk.Text(self.down_frame, width = 28, height = 10)
|
||||
self.set2_listbox = tk.Listbox(self.down_frame, exportselection = 0)
|
||||
self.set2_description = tk.Text(self.down_frame, width = 28, height = 10)
|
||||
|
||||
self.set1_listbox.config(selectmode = tk.SINGLE)
|
||||
self.set2_listbox.config(selectmode = tk.SINGLE)
|
||||
|
||||
self.set1_listbox.grid( row = 0, column = 0)
|
||||
self.set1_description.grid(row = 0, column = 1)
|
||||
self.set2_listbox.grid( row = 0, column = 2)
|
||||
self.set2_description.grid(row = 0, column = 3)
|
||||
|
||||
for cluster in self.clusters:
|
||||
cluster_name = "Cluster_{number}".format(number = cluster)
|
||||
self.set1_listbox.insert(tk.END, cluster_name)
|
||||
self.set2_listbox.insert(tk.END, cluster_name)
|
||||
|
||||
self.set1_listbox.select_set(first = self.current_cluster_index)
|
||||
self.set1_listbox.see(self.current_cluster_index)
|
||||
|
||||
self.set2_listbox.select_set(first = 0)
|
||||
self.set2_listbox.see(0)
|
||||
|
||||
self.set1_description.delete('1.0', tk.END)
|
||||
self.set1_description.insert('1.0', self.data['Cluster_{number}'.format(number = self.current_cluster_index)][1])
|
||||
|
||||
self.set2_description.delete('1.0', tk.END)
|
||||
self.set2_description.insert('1.0', self.data['Cluster_0'][1])
|
||||
|
||||
self.set1_listbox.bind( "<ButtonRelease-1>", self.setListbox1)
|
||||
self.set2_listbox.bind( "<ButtonRelease-1>", self.setListbox2)
|
||||
self.confirm_merging_button.bind("<ButtonRelease-1>", self.confirm_merging)
|
||||
|
||||
def setListbox1(self, event):
|
||||
sel_idx = self.set1_listbox.curselection()[0]
|
||||
self.set1_description.delete('1.0', tk.END)
|
||||
self.set1_description.insert('1.0', self.data['Cluster_{number}'.format(number = sel_idx)][1])
|
||||
|
||||
|
||||
def setListbox2(self, event):
|
||||
sel_idx = self.set2_listbox.curselection()[0]
|
||||
self.set2_description.delete('1.0', tk.END)
|
||||
self.set2_description.insert('1.0', self.data['Cluster_{number}'.format(number = sel_idx)][1])
|
||||
|
||||
def confirm_merging(self, event):
|
||||
member_1 = self.set1_listbox.curselection()[0]
|
||||
member_2 = self.set2_listbox.curselection()[0]
|
||||
to_erase = max(member_1, member_2)
|
||||
to_keep = min(member_1, member_2)
|
||||
cluster1 = "Cluster_{number}".format(number = to_keep)
|
||||
cluster2 = "Cluster_{number}".format(number = to_erase)
|
||||
self.data[cluster1][0].extend(self.data[cluster2][0])
|
||||
self.data[cluster1][1] = "{a}\n{b}".format(a = self.data[cluster1][1], b = self.data[cluster2][1])
|
||||
new_data = {}
|
||||
for cluster in self.data.keys():
|
||||
cluster_idx = int(cluster.split('_')[1])
|
||||
if cluster_idx < to_erase:
|
||||
new_data[cluster] = self.data[cluster]
|
||||
elif cluster_idx == to_erase:
|
||||
continue
|
||||
else:
|
||||
new_cluster = "Cluster_{number}".format(number = cluster_idx - 1)
|
||||
new_data[new_cluster] = self.data[cluster]
|
||||
self.data = new_data
|
||||
ClusterCol, GeneCol = [], []
|
||||
for cluster in self.data.keys():
|
||||
cluster_idx = cluster.split("_")[1]
|
||||
genes = self.data[cluster][0]
|
||||
ClusterCol.extend(len(genes) * [cluster_idx, ])
|
||||
GeneCol.extend(genes)
|
||||
self.clustering = pd.DataFrame.from_dict({"Cluster": ClusterCol, "GeneNames": GeneCol})
|
||||
self.clustering['Cluster'] = pd.to_numeric(self.clustering['Cluster'])
|
||||
|
||||
self.clusters = np.array([int(a) for a in self.clustering.Cluster.unique()])
|
||||
np.ndarray.sort(self.clusters)
|
||||
|
||||
self.group_list.delete(0, tk.END)
|
||||
for cluster in self.clusters:
|
||||
self.group_list.insert(tk.END, "Group_{number}".format(number = cluster))
|
||||
self.group_list.select_set(first = to_keep)
|
||||
self.group_list.see(to_keep)
|
||||
self.current_cluster_index = int(to_keep)
|
||||
|
||||
self.query_enter.delete(0, tk.END)
|
||||
self.query_enter.insert(0, self.current_gene)
|
||||
self.query_gene_name(event)
|
||||
|
||||
self.new_window.destroy()
|
||||
self.new_window.grab_release()
|
||||
|
||||
feature_addrs = [join("features", addr) for addr in listdir('features') if addr[-3:] == "png"]
|
||||
clus_addr = "clustering.csv"
|
||||
gene_info_addr = "gene_info.csv"
|
||||
|
||||
GeneViewer(feature_addrs, clus_addr, gene_info_addr)
|
||||
|
||||
|
||||
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue