diff --git a/cluster/__init__.py b/cluster/__init__.py index cb1c10d..bb8bc36 100644 --- a/cluster/__init__.py +++ b/cluster/__init__.py @@ -1,9 +1,36 @@ import re +import sys -from subprocess import check_output +from subprocess import check_output, DEVNULL from time import sleep +def detect_cluster_system(): + """ + Checks which cluster manager is installed on the system, return "SGE" for Sun/Oracle Grid Engine, "PBS" for + PBS/Torque based systems and otherwise "other" + + :return: string "SBE", "PBS" or "other" + """ + try: + which_output = check_output(["which", "sge_qmaster"], stderr=DEVNULL).decode("utf-8") + + if "/sge_qmaster" in which_output: + return "SGE" + except Exception as _: + pass + + try: + which_output = check_output(["which", "pbs_sched"], stderr=DEVNULL).decode("utf-8") + + if "/pbs_sched" in which_output: + return "PBS" + except Exception as _: + pass + + return "other" + + def job_running(job_name): """ Checks if a specific job is still running on a cluster using the qstat command @@ -11,11 +38,24 @@ def job_running(job_name): :param job_name: name of the submitted script/jobname :return: boolean true if the job is still running or in the queue """ - qstat = check_output(["qstat", "-r"]).decode("utf-8") - pattern = "Full jobname:\s*" + job_name + running_jobs = [] + c_system = detect_cluster_system() - running_jobs = re.findall(pattern, qstat) + if c_system == "SGE": + # Sun/Oracle Grid engine detected + qstat = check_output(["qstat", "-r"]).decode("utf-8") + + pattern = "Full jobname:\s*" + job_name + + running_jobs = re.findall(pattern, qstat) + elif c_system == "PBS": + # Sun/Oracle Grid engine detected + qstat = check_output(["qstat", "-f"]).decode("utf-8") + pattern = "Job_Name = \s*" + job_name + running_jobs = re.findall(pattern, qstat) + else: + print("Unsupported System", file=sys.stderr) if len(running_jobs) > 0: print('Still %d jobs running.' % len(running_jobs), end='\r') diff --git a/helper/pca_plot.py b/helper/pca_plot.py new file mode 100644 index 0000000..3444afd --- /dev/null +++ b/helper/pca_plot.py @@ -0,0 +1,50 @@ +"""" +centers and scales the data from expression matrix and then plots the PCA result +input: expression matrix, file with RunIDs, SRAIDs and description eg. tissues +output: plot with the points colored by the tissues that were taken for the given experiment +""" + +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from sklearn.decomposition import PCA as sklearnPCA +from sklearn import preprocessing +import seaborn as sns + +import argparse + + +def run_pca(expression): + # Load Expression data + df = pd.read_table(expression, header=0, index_col=0) + run_ids = list(df.columns.values) + dataMatrix = np.transpose(np.array(df)) + + run_ids = [s.split('_')[0] for s in run_ids] + + # Run PCA + sklearn_pca = sklearnPCA(n_components=2) + sklearn_transf = sklearn_pca.fit_transform(preprocessing.maxabs_scale(dataMatrix, axis=0)) + + with sns.axes_style("whitegrid", {"grid.linestyle": None}): + for run, pca_data in zip(run_ids, sklearn_transf): + plt.plot(pca_data[0], pca_data[1], 'o', + markersize=7, + alpha=0.5, + color='gray') + plt.text(pca_data[0], pca_data[1], run) + + plt.xlabel('PC 1 (%0.2f %%)' % (sklearn_pca.explained_variance_ratio_[0]*100)) + plt.ylabel('PC 2 (%0.2f %%)' % (sklearn_pca.explained_variance_ratio_[1]*100)) + + plt.show() + +if __name__ == "__main__": + parser = argparse.ArgumentParser(prog="./pca_plot.py") + + parser.add_argument('expression', help='path to expression matrix') + + # Parse arguments and start script + args = parser.parse_args() + + run_pca(args.expression) \ No newline at end of file