scRNA-seq_analysis

This commit is contained in:
veghp 2019-07-08 12:22:01 +01:00
commit 82cc2d191e
188 changed files with 146184 additions and 0 deletions

View file

@ -0,0 +1,31 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 15 20:52:21 2019
@author: doru
"""
import sys
from os.path import join
import pandas as pd
import numpy as np
args = sys.argv
output_folder = args[1]
no_clusters = int(args[2])
expression_file = join(output_folder, "expression.csv")
expression_df = pd.read_csv(expression_file, index_col = 0)
expression = np.transpose(expression_df.values)
from sklearn.mixture import GaussianMixture
clustering = GaussianMixture(n_components = no_clusters, random_state = 19).fit(expression)
clustering = clustering.predict(expression)
# save the output
gene_names = list(expression_df.head(0))
df = {"GeneNames": gene_names, "Cluster": clustering}
df = pd.DataFrame.from_dict(df)
df.to_csv(join(output_folder, "clustering.csv"))

View file

@ -0,0 +1,118 @@
args = commandArgs(trailingOnly=T)
args = paste(args, collapse = "")
args = unlist(strsplit(args, ";"))
arguments.list = "
seurat.addr.arg = args[1]
no_clusters.arg = args[2]
"
python.addr = "python"
expected_arguments = unlist(strsplit(arguments.list, "\n"))
expected_arguments = expected_arguments[!(expected_arguments == "")]
if(length(args) != length(expected_arguments)){
error.msg = sprintf('This pipeline requires %s parameters', as.character(length(expected_arguments)))
expected_arguments = paste(unlist(lapply(strsplit(expected_arguments, ".arg"), "[", 1)), collapse = "\n")
stop(sprintf('This pipeline requires %s parameters: '))
}
eval(parse(text = arguments.list))
for(n in 1:length(expected_arguments)){
argument = expected_arguments[n]
argument = gsub(pattern=" ", replacement="", x=argument)
argument.name = unlist(strsplit(argument, "="))[1]
variable.name = gsub(pattern=".arg", replacement="", argument.name)
argument.content = eval(parse(text = argument.name))
eval(parse(text = argument.content))
if (!exists(variable.name)){
stop(sprintf("Argument %s not passed. Stopping ... ", variable.name))
}
}
# create required folders for output and work material
output_folder = gsub(pattern="^\\d+_", replacement="", x=basename(getwd()))
output_folder = paste(output_folder, seurat.addr, sep = "_")
c.time = Sys.time()
c.time = gsub(pattern=" BST", replacement="", x=c.time)
c.time = gsub(pattern=":", replacement="", x=c.time)
c.time = gsub(pattern=" ", replacement="", x=c.time)
c.time = gsub(pattern="-", replacement="", x=c.time)
c.time = substr(x=c.time, start=3, stop=nchar(c.time))
output_folder = paste(output_folder, c.time, sep = "_")
output_folder = file.path("../../output", output_folder)
dir.create(output_folder)
library(Seurat)
library(RColorBrewer)
library(dplyr)
library(plyr)
#######################################################################################################
# load data
print("loading data ... ")
seurat.obj = readRDS(seurat.addr)
print("Data loaded.")
# check if LouvainClustering is present
if ("LouvainClustering" %in% colnames(seurat.obj@meta.data)){
print("Identifying gene outliers but first need to aggregate gene expression by clusters")
seurat.obj = SetAllIdent(object=seurat.obj, id="LouvainClustering")
no.genes = nrow(seurat.obj@data)
start_index = 1
while (start_index < no.genes){
end_index = start_index + 999
end_index = min(end_index, no.genes)
expression.data_ = data.matrix(seurat.obj@data[start_index:end_index, ])
expression.data_ = t(expression.data_)
expression.data_ = as.data.frame(expression.data_)
expression.data_ = cbind(data.frame(CellLabels = as.vector(seurat.obj@ident)), expression.data_)
expression.data_ = aggregate(expression.data_[2:dim(expression.data_)[2]], list(expression.data_$CellLabels), mean)
expression.data_ = cbind(data.frame(CellType = expression.data_$Group.1), expression.data_[, 2:dim(expression.data_)[2]])
rownames(expression.data_) = expression.data_$CellType
expression.data_ = expression.data_[, 2:ncol(expression.data_)]
print(start_index)
if (start_index == 1){
expression.data = expression.data_
}else{
expression.data = cbind(expression.data, expression.data_)
}
start_index = start_index + 1000
}
# saving the expression matrix
write.csv(expression.data, file.path(output_folder, "expression.csv"))
# run python script to identify outliers
command = sprintf("%s clustering.py %s %s", python.addr, output_folder, no_clusters)
system(command, wait = T)
# remove the expression csv file
file.remove(file.path(output_folder, "expression.csv"))
# load gene clustering
gene_clustering = read.csv(file.path(output_folder, "clustering.csv"), row.names = 1)
# save feature plots
gene_names = as.vector(unique(gene_clustering$GeneNames))
features_folder = file.path(output_folder, "features")
dir.create(features_folder)
dr_coordinates = seurat.obj@dr$umap@cell.embeddings
for (i in seq_along(gene_names)){
gene_name = gene_names[i]
png_name = paste(file.path(features_folder, gene_name), "png", sep = ".")
dframe = data.frame(X = dr_coordinates[, 1], Y = dr_coordinates[, 2], Expression = seurat.obj@data[gene_name, ])
plot.obj = ggplot(dframe, aes(x = X, y = Y, color = Expression))
plot.obj = plot.obj + geom_point(size = .5)
plot.obj = plot.obj + theme_void() + theme(panel.background = element_rect(fill = 'black', colour = 'black'))
plot.obj = plot.obj + scale_colour_gradient(low = "blue", high = "red")
png(png_name, width = 500, height = 500)
print(plot.obj)
dev.off()
if (i %% 10 == 0){
print(sprintf("%s / %s", i, length(gene_names)))
}
}
}else{
print("Data needs to be clustered first")
}
print("Ended beautifully ... ")

View file

@ -0,0 +1,16 @@
#!/bin/bash
#$ -cwd
#$ -N gene_grouping
#$ -V
#$ -l h_rt=23:59:59
#$ -l h_vmem=100G
if [ "$#" -ne 1 ]; then
echo "Illegal number of parameters"
exit 1
fi
Rscript gene_grouping.R $1
echo "End on `date`"

View file

@ -0,0 +1,491 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 16 19:39:59 2019
@author: doru
"""
from os.path import join, exists
from os import listdir, mkdir
from shutil import rmtree
import cv2
import numpy as np
import tkinter as tk
from PIL import Image, ImageTk
import pandas as pd
class GeneViewer(object):
def __init__(self, feature_addrs, clustering_addr, gene_info_addr):
self.feature_addrs = feature_addrs
self.clustering_addr = clustering_addr
self.gene_info_addr = gene_info_addr
self.clustering = pd.read_csv(self.clustering_addr)
self.gene_info = pd.read_csv(self.gene_info_addr)
self.clusters = self.clustering.Cluster.unique()
np.ndarray.sort(self.clusters)
self.data = {}
for cluster in self.clusters:
gene_names = [gene_name for gene_name in self.clustering.GeneNames[self.clustering.Cluster == cluster]]
cluster_name = "Cluster_{number}".format(number = cluster)
if not exists(join('group_descriptions', cluster_name)):
fobj = open(join('group_descriptions', cluster_name), 'w')
cluster_description = ""
fobj.writelines(cluster_description)
else:
fobj = open(join('group_descriptions', cluster_name), 'r')
cluster_description = fobj.read().strip()
fobj.close()
self.data[cluster_name] = [gene_names, cluster_description]
self.current_cluster_index = 0
self.current_cluster = int(self.clusters[self.current_cluster_index])
self.current_gene_index = 0
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
self.root = tk.Tk()
self.groups_frame = tk.Frame(self.root)
self.genes_frame = tk.Frame(self.root)
self.panel_frame = tk.Frame(self.root)
self.description_frame = tk.Frame(self.root)
self.groups_frame.grid( row = 0, column = 0)
self.genes_frame.grid( row = 0, column = 1)
self.description_frame.grid(row = 0, column = 2, sticky = tk.N)
self.panel_frame.grid( row = 0, column = 3)
self.buttons_frame = tk.Frame(self.panel_frame)
self.gene_name_label = tk.Label(self.panel_frame)
self.canvas = tk.Canvas(self.panel_frame, width = 500, height = 500)
self.buttons_frame.grid(row = 0, column = 0)
self.gene_name_label.grid(row = 1, column = 0)
self.canvas.grid(row = 2, column = 0)
self.save_button = tk.Button(self.buttons_frame, text = 'Save changes')
self.changeGroupAssignment_button = tk.Button(self.buttons_frame, text = 'Change group assignment')
self.merge_groups_button = tk.Button(self.buttons_frame, text = 'Merge with')
self.save_button.grid( row = 0, column = 0)
self.changeGroupAssignment_button.grid(row = 0, column = 1)
self.merge_groups_button.grid( row = 0, column = 2)
self.group_label = tk.Label(self.groups_frame)
self.group_list_frame = tk.Frame(self.groups_frame)
self.group_label.grid( row = 0, column = 0)
self.group_list_frame.grid(row = 1, column = 0)
self.group_list = tk.Listbox(self.group_list_frame, height = 30, exportselection = 0)
self.group_list.config(selectmode = tk.SINGLE)
self.group_list_scroll = tk.Scrollbar(self.group_list_frame)
self.group_list_scroll.pack(side = tk.RIGHT, fill = tk.Y)
self.group_list.pack()
self.group_list.config(yscrollcommand = self.group_list_scroll.set)
self.group_list_scroll.config(command = self.group_list.yview)
self.group_label['text'] = "Gene groups"
self.gene_label = tk.Label(self.genes_frame)
self.gene_list_frame = tk.Frame(self.genes_frame)
self.gene_label.grid( row = 0, column = 0)
self.gene_list_frame.grid(row = 1, column = 0)
self.gene_list = tk.Listbox(self.gene_list_frame, height = 30, exportselection = 0)
self.gene_list.config(selectmode = tk.SINGLE)
self.gene_list_scroll = tk.Scrollbar(self.gene_list_frame)
self.gene_list_scroll.pack(side = tk.RIGHT, fill = tk.Y)
self.gene_list.pack()
self.gene_list.config(yscrollcommand = self.gene_list_scroll.set)
self.gene_list_scroll.config(command = self.gene_list.yview)
self.query_frame = tk.Frame(self.description_frame)
self.group_description_label = tk.Label(self.description_frame)
self.group_description_content = tk.Text(self.description_frame, width = 50, height = 10, borderwidth=2, relief="solid")
self.gene_description_label = tk.Label(self.description_frame)
self.gene_description_content = tk.Text(self.description_frame, width = 50, height = 20, borderwidth=2, relief="solid")
self.query_frame.grid( row = 0, column = 0, sticky = tk.W)
self.group_description_label.grid( row = 1, column = 0, sticky = tk.W)
self.group_description_content.grid(row = 2, column = 0, sticky = tk.W)
self.gene_description_label.grid( row = 3, column = 0, sticky = tk.W)
self.gene_description_content.grid( row = 4, column = 0, sticky = tk.W)
self.query_label = tk.Label(self.query_frame)
self.query_enter = tk.Entry(self.query_frame, width = 15)
self.query_label.grid(row = 0, column = 0)
self.query_enter.grid(row = 0, column = 1)
self.query_label['text'] = 'Enter gene name: '
self.group_description_label['text'] = "Group description: "
self.gene_description_label['text'] = "Gene summary: "
self.load_image()
self.update_genes()
self.update_groups()
self.gene_list.bind( "<ButtonRelease-1>", self.select_gene)
self.group_list.bind( "<ButtonRelease-1>", self.select_group)
self.save_button.bind( "<ButtonRelease-1>", self.save_changes)
self.changeGroupAssignment_button.bind("<ButtonRelease-1>", self.changeGroupAssignment)
self.merge_groups_button.bind( "<ButtonRelease-1>", self.merge_groups)
self.query_enter.bind( "<Return>", self.query_gene_name)
self.root.bind("<Left>", self.go_to_previous)
self.root.bind("<Right>", self.go_to_next)
self.root.bind("<Up>", self.previous_group)
self.root.bind("<Down>", self.next_group)
self.group_description_content.bind("<KeyRelease>", self.update_group_description)
self.root.title("Gene expression with grouped genes")
self.root.mainloop()
def load_image(self):
# clean the canvas before loading
img_addr = join('features', "{gene_name}.png".format(gene_name = self.current_gene))
self.img_data = cv2.imread(img_addr)
self.img_data = cv2.cvtColor(self.img_data, cv2.COLOR_BGR2RGB)
self.bg_img = Image.fromarray(self.img_data)
self.photo = ImageTk.PhotoImage(image=self.bg_img)
self.canvas.create_image(0, 0, image=self.photo, anchor=tk.NW)
self.gene_name_label['text'] = self.current_gene
self.write_gene_description()
self.write_group_description()
self.query_enter.delete(0, tk.END)
def previous_group(self, event):
self.current_cluster_index -= 1
if self.current_cluster_index < 0:
self.current_cluster_index = len(self.clusters) - 1
self.current_cluster = self.clusters[self.current_cluster_index]
self.current_gene_index = 0
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
self.update_genes()
self.group_list.selection_clear(0, tk.END)
self.group_list.select_set(first = self.current_cluster_index)
self.group_list.see(self.current_cluster_index)
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
self.gene_list.delete(0, tk.END)
self.update_genes()
self.load_image()
def next_group(self, event):
self.current_cluster_index += 1
if self.current_cluster_index >= len(self.clusters):
self.current_cluster_index = 0
self.current_cluster = self.clusters[self.current_cluster_index]
self.current_gene_index = 0
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
self.update_genes()
self.group_list.selection_clear(0, tk.END)
self.group_list.select_set(first = self.current_cluster_index)
self.group_list.see(self.current_cluster_index)
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
self.update_genes()
self.load_image()
def go_to_previous(self, event):
self.current_gene_index -= 1
if self.current_gene_index < 0:
self.current_gene_index = len(self.data["Cluster_{number}".format(number = self.current_cluster)][0]) - 1
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
self.gene_name_label['text'] = self.current_gene
self.gene_list.selection_clear(0, tk.END)
self.gene_list.select_set(first = self.current_gene_index)
self.gene_list.see(self.current_gene_index)
self.load_image()
def go_to_next(self, event):
self.current_gene_index += 1
if self.current_gene_index == len(self.data["Cluster_{number}".format(number = self.current_cluster)][0]):
self.current_gene_index = 0
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
self.gene_name_label['text'] = self.current_gene
self.gene_list.selection_clear(0, tk.END)
self.gene_list.select_set(first = self.current_gene_index)
self.gene_list.see(self.current_gene_index)
self.load_image()
def update_groups(self):
for cluster in self.clusters:
self.group_list.insert(tk.END, "Group_{number}".format(number = cluster))
self.group_list.select_set(0)
def update_genes(self):
self.gene_list.delete(0, tk.END)
for gene_name in self.data["Cluster_{number}".format(number = self.current_cluster)][0]:
self.gene_list.insert(tk.END, gene_name)
self.gene_list.select_set(0)
self.gene_label['text'] = "Gene names ({numbers})".format(numbers = len(self.data["Cluster_{number}".format(number = self.current_cluster)][0]))
def select_gene(self, event):
self.current_gene_index = self.gene_list.curselection()[0]
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
self.load_image()
def select_group(self, event):
self.current_cluster_index = self.group_list.curselection()[0]
self.current_cluster = self.clusters[self.current_cluster_index]
self.current_gene_index = 0
self.current_gene = self.data["Cluster_{number}".format(number = self.current_cluster)][0][self.current_gene_index]
self.gene_list.delete(0, tk.END)
self.update_genes()
self.load_image()
def write_gene_description(self):
gene_field = self.gene_info.GeneSymbol == self.current_gene
gene_symbol = "Gene symbol: {sym}\n".format(sym = self.current_gene)
if len(np.unique(gene_field)) == 1:
gene_name = "Gene name: Not available\n"
gene_family = "Gene family: Not available\n"
reactom_pathway = "Reactom pathway: Not available\n"
gene_summary = "Gene summary: Not available\n"
else:
gene_name = "Gene name: {x_factor}\n".format(x_factor = self.gene_info.GeneName[gene_field].values[0])
gene_family = "Gene family: {x_factor}\n".format(x_factor = self.gene_info.GeneFamily[gene_field].values[0])
reactom_pathway = "Reactom pathway: {x_factor}\n".format(x_factor = self.gene_info.ReactomPathway[gene_field].values[0])
gene_summary = "Gene summary: {x_factor}\n".format(x_factor = self.gene_info.GeneSummary[gene_field].values[0])
gene_field = "\n".join([gene_symbol, gene_name, gene_family, reactom_pathway, gene_summary])
self.gene_description_content.delete('1.0', tk.END)
self.gene_description_content.insert(tk.END, gene_field)
def write_group_description(self):
text_info = self.data["Cluster_{number}".format(number = self.current_cluster)][1]
self.group_description_content.delete('1.0', tk.END)
self.group_description_content.insert('1.0', text_info)
def query_gene_name(self, event):
query_entry = self.query_enter.get()
if query_entry in self.clustering.GeneNames.values:
self.current_gene = query_entry
self.current_cluster = self.clustering.Cluster[self.clustering.GeneNames == self.current_gene].values[0]
self.current_cluster_index = np.where(self.clusters == self.current_cluster)[0][0]
self.current_gene_index = self.data["Cluster_{number}".format(number = self.current_cluster_index)][0]
self.current_gene_index = self.current_gene_index.index(self.current_gene)
self.update_genes()
self.gene_list.selection_clear(0, tk.END)
self.gene_list.select_set(first = self.current_gene_index)
self.gene_list.see(self.current_gene_index)
self.group_list.selection_clear(0, tk.END)
self.group_list.select_set(first = self.current_cluster_index)
self.group_list.see(self.current_cluster_index)
self.load_image()
else:
self.query_enter.delete(0, tk.END)
self.query_enter.insert(0, "Gene not found: {entrquery}".format(entrquery = query_entry))
def update_group_description(self, event):
text_info = self.group_description_content.get('1.0', tk.END)
self.data["Cluster_{number}".format(number = self.current_cluster)][1] = text_info
def save_changes(self, event):
rmtree("group_descriptions")
mkdir("group_descriptions")
for cluster in self.clusters:
cluster_name = "Cluster_{number}".format(number = cluster)
fobj = open(join('group_descriptions', cluster_name), 'w')
text_info = self.data[cluster_name][1]
fobj.writelines(text_info)
fobj.close()
self.clustering.to_csv(self.clustering_addr)
def changeGroupAssignment(self, event):
self.new_window = tk.Toplevel(self.root)
self.new_window.grab_set()
self.options_frame = tk.Frame(self.new_window)
self.actions_frame = tk.Frame(self.new_window)
self.options = tk.Listbox(self.options_frame, exportselection = 0)
self.options.config(selectmode = tk.SINGLE)
self.options.pack()
self.change_assignment_button = tk.Button(self.actions_frame, text = 'Change assignment')
self.show_selected_description = tk.Text(self.actions_frame, borderwidth=1, relief="solid", width = 28, height = 10)
self.change_assignment_button.grid( row = 0, column = 0, sticky = tk.N)
self.show_selected_description.grid(row = 2, column = 0, sticky = tk.N)
for cluster in self.clusters:
cluster_name = "Cluster_{number}".format(number = cluster)
self.options.insert(tk.END, cluster_name)
self.options.select_set(first = 0)
self.options.see(0)
self.show_selected_description.delete('1.0', tk.END)
self.show_selected_description.insert('1.0', self.data['Cluster_0'][1])
self.options_frame.grid(row = 0, column = 0, sticky = tk.N)
self.actions_frame.grid(row = 0, column = 1, sticky = tk.N)
self.new_window.bind("<Up>", self.assignment_up)
self.new_window.bind("<Down>", self.assignment_down)
self.change_assignment_button.bind("<ButtonRelease-1>", self.change_assignment)
def assignment_up(self, event):
asgn_index = self.options.curselection()[0]
asgn_index -= 1
if asgn_index < 0:
asgn_index = len(self.clusters) - 1
self.options.selection_clear(0, tk.END)
self.options.select_set(first = asgn_index)
self.options.see(asgn_index)
self.show_selected_description.delete('1.0', tk.END)
self.show_selected_description.insert('1.0', self.data['Cluster_{number}'.format(number = asgn_index)][1])
def assignment_down(self, event):
asgn_index = self.options.curselection()[0]
asgn_index += 1
if asgn_index >= len(self.clusters):
asgn_index = 0
self.options.selection_clear(0, tk.END)
self.options.select_set(first = asgn_index)
self.options.see(asgn_index)
self.show_selected_description.delete('1.0', tk.END)
self.show_selected_description.insert('1.0', self.data['Cluster_{number}'.format(number = asgn_index)][1])
def change_assignment(self, event):
asgn_index = self.options.curselection()[0]
self.clustering.Cluster[self.clustering.GeneNames == self.current_gene] = asgn_index
self.data = {}
for cluster in self.clusters:
gene_names = [gene_name for gene_name in self.clustering.GeneNames[self.clustering.Cluster == cluster]]
cluster_name = "Cluster_{number}".format(number = cluster)
fobj = open(join('group_descriptions', cluster_name), 'r')
cluster_description = fobj.read().strip()
fobj.close()
self.data[cluster_name] = [gene_names, cluster_description]
self.query_enter.delete(0, tk.END)
self.query_enter.insert(0, self.current_gene)
self.query_gene_name(event)
self.new_window.destroy()
self.new_window.grab_release()
def merge_groups(self, event):
self.new_window = tk.Toplevel(self.root)
self.new_window.grab_set()
self.up_frame = tk.Frame(self.new_window)
self.down_frame = tk.Frame(self.new_window)
self.up_frame.grid( row = 0, column = 0)
self.down_frame.grid(row = 1, column = 0)
self.confirm_merging_button = tk.Button(self.up_frame, text = 'Confirm merging')
self.confirm_merging_button.grid(row = 0, column = 0)
self.set1_listbox = tk.Listbox(self.down_frame, exportselection = 0)
self.set1_description = tk.Text(self.down_frame, width = 28, height = 10)
self.set2_listbox = tk.Listbox(self.down_frame, exportselection = 0)
self.set2_description = tk.Text(self.down_frame, width = 28, height = 10)
self.set1_listbox.config(selectmode = tk.SINGLE)
self.set2_listbox.config(selectmode = tk.SINGLE)
self.set1_listbox.grid( row = 0, column = 0)
self.set1_description.grid(row = 0, column = 1)
self.set2_listbox.grid( row = 0, column = 2)
self.set2_description.grid(row = 0, column = 3)
for cluster in self.clusters:
cluster_name = "Cluster_{number}".format(number = cluster)
self.set1_listbox.insert(tk.END, cluster_name)
self.set2_listbox.insert(tk.END, cluster_name)
self.set1_listbox.select_set(first = self.current_cluster_index)
self.set1_listbox.see(self.current_cluster_index)
self.set2_listbox.select_set(first = 0)
self.set2_listbox.see(0)
self.set1_description.delete('1.0', tk.END)
self.set1_description.insert('1.0', self.data['Cluster_{number}'.format(number = self.current_cluster_index)][1])
self.set2_description.delete('1.0', tk.END)
self.set2_description.insert('1.0', self.data['Cluster_0'][1])
self.set1_listbox.bind( "<ButtonRelease-1>", self.setListbox1)
self.set2_listbox.bind( "<ButtonRelease-1>", self.setListbox2)
self.confirm_merging_button.bind("<ButtonRelease-1>", self.confirm_merging)
def setListbox1(self, event):
sel_idx = self.set1_listbox.curselection()[0]
self.set1_description.delete('1.0', tk.END)
self.set1_description.insert('1.0', self.data['Cluster_{number}'.format(number = sel_idx)][1])
def setListbox2(self, event):
sel_idx = self.set2_listbox.curselection()[0]
self.set2_description.delete('1.0', tk.END)
self.set2_description.insert('1.0', self.data['Cluster_{number}'.format(number = sel_idx)][1])
def confirm_merging(self, event):
member_1 = self.set1_listbox.curselection()[0]
member_2 = self.set2_listbox.curselection()[0]
to_erase = max(member_1, member_2)
to_keep = min(member_1, member_2)
cluster1 = "Cluster_{number}".format(number = to_keep)
cluster2 = "Cluster_{number}".format(number = to_erase)
self.data[cluster1][0].extend(self.data[cluster2][0])
self.data[cluster1][1] = "{a}\n{b}".format(a = self.data[cluster1][1], b = self.data[cluster2][1])
new_data = {}
for cluster in self.data.keys():
cluster_idx = int(cluster.split('_')[1])
if cluster_idx < to_erase:
new_data[cluster] = self.data[cluster]
elif cluster_idx == to_erase:
continue
else:
new_cluster = "Cluster_{number}".format(number = cluster_idx - 1)
new_data[new_cluster] = self.data[cluster]
self.data = new_data
ClusterCol, GeneCol = [], []
for cluster in self.data.keys():
cluster_idx = cluster.split("_")[1]
genes = self.data[cluster][0]
ClusterCol.extend(len(genes) * [cluster_idx, ])
GeneCol.extend(genes)
self.clustering = pd.DataFrame.from_dict({"Cluster": ClusterCol, "GeneNames": GeneCol})
self.clustering['Cluster'] = pd.to_numeric(self.clustering['Cluster'])
self.clusters = np.array([int(a) for a in self.clustering.Cluster.unique()])
np.ndarray.sort(self.clusters)
self.group_list.delete(0, tk.END)
for cluster in self.clusters:
self.group_list.insert(tk.END, "Group_{number}".format(number = cluster))
self.group_list.select_set(first = to_keep)
self.group_list.see(to_keep)
self.current_cluster_index = int(to_keep)
self.query_enter.delete(0, tk.END)
self.query_enter.insert(0, self.current_gene)
self.query_gene_name(event)
self.new_window.destroy()
self.new_window.grab_release()
feature_addrs = [join("features", addr) for addr in listdir('features') if addr[-3:] == "png"]
clus_addr = "clustering.csv"
gene_info_addr = "gene_info.csv"
GeneViewer(feature_addrs, clus_addr, gene_info_addr)