scRNA-seq_analysis

2024-10-23 08:29:24 -07:00 · 2019-07-08 12:22:01 +01:00 · 2019-07-08 12:22:01 +01:00 · 82cc2d191e
commit 82cc2d191e
188 changed files with 146184 additions and 0 deletions
--- a/pipelines/15_train_classifier/svm.py
+++ b/pipelines/15_train_classifier/svm.py
@ -0,0 +1,58 @@
+import pandas as pd
+from sklearn.metrics import classification_report
+from sklearn.metrics import confusion_matrix
+from sklearn.svm import SVC
+from sklearn.model_selection import GridSearchCV
+from sklearn.model_selection import train_test_split
+import pickle
+
+import sys
+args = sys.argv
+material_dir = args[1]
+output_dir = args[2]
+
+from os.path import join
+
+print("Loading data ...")
+X = pd.read_csv(join(material_dir, 'data.csv'), sep = ",", index_col = 0).values
+Y = pd.read_csv(join(material_dir, 'labels.csv')).values[:, 0].reshape(-1, 1).ravel()
+
+from sklearn.decomposition import PCA
+pca = PCA(n_components = .8)
+X = pca.fit_transform(X)
+
+modelFile = open(join(output_dir, "pca.pickle"), "wb")
+print(modelFile)
+modelFile.write(pickle.dumps(pca))
+modelFile.close()
+
+print("Splitting into training and test sets...")
+(X_train, X_test, y_train, y_test) = train_test_split(X, Y, test_size = .3, random_state = 42)
+
+params = {"C":[1e-6, 1e-3, .1, 1, 10, 100, 1000],
+          "gamma": [1e-6, 1e-3, .1, 1]}
+
+# established as the best paramaters in some other work
+params = {"C":[10], "gamma": [1e-3]}
+
+print("Creating the model and fitting the data ...")
+model = GridSearchCV(SVC(probability = False, kernel = "rbf"), params, cv=5)
+model.fit(X_train, y_train)
+
+print("Testing ...")
+pred = model.predict(X_test)
+cls_report = classification_report(y_test, pred, target_names = model.classes_)
+print(cls_report)
+with open(join(output_dir, 'classification_report.txt'), "w")  as cl_f:
+    cl_f.write(cls_report)
+
+print("Saving model and confusion matrix to disk ...")
+cnf_matrix = confusion_matrix(y_test, pred)
+df = pd.DataFrame(cnf_matrix)
+df.columns = model.classes_
+df.to_csv(join(output_dir, 'confusion_matrix.csv'))
+
+modelFile = open(join(output_dir, 'model.pickle'), "wb")
+modelFile.write(pickle.dumps(model))
+modelFile.close()
+