Merge remote-tracking branch 'remotes/origin/PBS_Torque_support' into…

… hisat2
proost · Jul 26, 2017 · 7bb3580 · 7bb3580
2 parents e7f8ddc + 3446452
commit 7bb3580
Show file tree

Hide file tree

Showing 2 changed files with 94 additions and 4 deletions.
diff --git a/cluster/__init__.py b/cluster/__init__.py
@@ -1,21 +1,61 @@
 import re
+import sys
 
-from subprocess import check_output
+from subprocess import check_output, DEVNULL
 from time import sleep
 
 
+def detect_cluster_system():
+    """
+    Checks which cluster manager is installed on the system, return "SGE" for Sun/Oracle Grid Engine, "PBS" for
+    PBS/Torque based systems and otherwise "other"
+
+    :return: string "SBE", "PBS" or "other"
+    """
+    try:
+        which_output = check_output(["which", "sge_qmaster"], stderr=DEVNULL).decode("utf-8")
+
+        if "/sge_qmaster" in which_output:
+            return "SGE"
+    except Exception as _:
+        pass
+
+    try:
+        which_output = check_output(["which", "pbs_sched"], stderr=DEVNULL).decode("utf-8")
+
+        if "/pbs_sched" in which_output:
+            return "PBS"
+    except Exception as _:
+        pass
+
+    return "other"
+
+
 def job_running(job_name):
     """
     Checks if a specific job is still running on a cluster using the qstat command
 
     :param job_name: name of the submitted script/jobname
     :return: boolean true if the job is still running or in the queue
     """
-    qstat = check_output(["qstat", "-r"]).decode("utf-8")
 
-    pattern = "Full jobname:\s*" + job_name
+    running_jobs = []
+    c_system = detect_cluster_system()
 
-    running_jobs = re.findall(pattern, qstat)
+    if c_system == "SGE":
+        # Sun/Oracle Grid engine detected
+        qstat = check_output(["qstat", "-r"]).decode("utf-8")
+
+        pattern = "Full jobname:\s*" + job_name
+
+        running_jobs = re.findall(pattern, qstat)
+    elif c_system == "PBS":
+        # Sun/Oracle Grid engine detected
+        qstat = check_output(["qstat", "-f"]).decode("utf-8")
+        pattern = "Job_Name = \s*" + job_name
+        running_jobs = re.findall(pattern, qstat)
+    else:
+        print("Unsupported System", file=sys.stderr)
 
     if len(running_jobs) > 0:
         print('Still %d jobs running.' % len(running_jobs), end='\r')

diff --git a/helper/pca_plot.py b/helper/pca_plot.py
@@ -0,0 +1,50 @@
+""""
+centers and scales the data from expression matrix and then plots the PCA result
+input: expression matrix, file with RunIDs, SRAIDs and description eg. tissues
+output: plot with the points colored by the tissues that were taken for the given experiment
+"""
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn.decomposition import PCA as sklearnPCA
+from sklearn import preprocessing
+import seaborn as sns
+
+import argparse
+
+
+def run_pca(expression):
+    # Load Expression data
+    df = pd.read_table(expression, header=0, index_col=0)
+    run_ids = list(df.columns.values)
+    dataMatrix = np.transpose(np.array(df))
+
+    run_ids = [s.split('_')[0] for s in run_ids]
+
+    # Run PCA
+    sklearn_pca = sklearnPCA(n_components=2)
+    sklearn_transf = sklearn_pca.fit_transform(preprocessing.maxabs_scale(dataMatrix, axis=0))
+
+    with sns.axes_style("whitegrid", {"grid.linestyle": None}):
+        for run, pca_data in zip(run_ids, sklearn_transf):
+            plt.plot(pca_data[0], pca_data[1], 'o',
+                     markersize=7,
+                     alpha=0.5,
+                     color='gray')
+            plt.text(pca_data[0], pca_data[1], run)
+
+        plt.xlabel('PC 1 (%0.2f %%)' % (sklearn_pca.explained_variance_ratio_[0]*100))
+        plt.ylabel('PC 2 (%0.2f %%)' % (sklearn_pca.explained_variance_ratio_[1]*100))
+
+        plt.show()
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(prog="./pca_plot.py")
+
+    parser.add_argument('expression', help='path to expression matrix')
+
+    # Parse arguments and start script
+    args = parser.parse_args()
+
+    run_pca(args.expression)