From f5d0a39359afc5fd8ad8e6fa5557099588341775 Mon Sep 17 00:00:00 2001 From: sepro Date: Thu, 27 Apr 2017 14:07:18 +0200 Subject: [PATCH] initial commit --- .gitignore | 95 +++++++++++++++++++++++++++++++++++++++++++++ blast_on_cluster.py | 82 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 177 insertions(+) create mode 100644 .gitignore create mode 100644 blast_on_cluster.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0646b06 --- /dev/null +++ b/.gitignore @@ -0,0 +1,95 @@ +# Created by .ignore support plugin (hsz.mobi) +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +.venv/ +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject + +# pycharm/intellij +.idea/ \ No newline at end of file diff --git a/blast_on_cluster.py b/blast_on_cluster.py new file mode 100644 index 0000000..f8154a6 --- /dev/null +++ b/blast_on_cluster.py @@ -0,0 +1,82 @@ +import sys +from datetime import datetime +from math import ceil +import shlex +import subprocess + +cluster_template = """#!/bin/bash +# + +#$ -N blast_on_cluster +#$ -cwd +#$ -j y +#$ -S /bin/bash +#$ -o OUT_$JOB_NAME.$JOB_ID +#$ -e ERR_$JOB_NAME.$JOB_ID + +module load biotools/ncbi-blast-2.3.0+ +date +hostname +blastp -outfmt 6 -evalue 0.001 -query ${q} -db ${db} -out ${o} -num_threads {t} +date +""" + + +def check_line(line): + """ + Checks if a line is a blastp command + + :param line: string to check + :return: True if the line is a blastp command + """ + return line.startswith("blastp -outfmt") + + +def write_template(filename=None): + if filename is None: + filename = "blast_on_cluster" + str(ceil(datetime.utcnow().timestamp())) + ".sh" + + with open(filename, "w") as t: + print(cluster_template, file=t) + + return filename + + +def execute_command_on_cluster(submission_script, cmd, threads=1): + """ + Parses blastp command reported by OrthoFinder and submits an equivalent job to the cluster + + :param submission_script: path to the submission script + :param cmd: command reported by OrthoFinder + :param threads: number of desired thread for each BLAST + :return: + """ + cmd_args = shlex.split(cmd) + + query, db, output = cmd_args[6], cmd_args[8], cmd_args[10] + + qsub_cmd = ['qsub', + '-pe', 'cores', str(threads), + '-v', 'q=%s,db=%s,out=%s,t=%d' % (query, db, output, threads), + submission_script] + + print(qsub_cmd) + # subprocess.call(qsub_cmd) + + +def run(): + """ + Main loop, will get STDIN, parse the line and submit the job using qsub + """ + print("Writing template") + submission_script = write_template() + + print("Waiting for command to execute...") + + for line in sys.stdin(): + if check_line(line): + print("\n\tCommand found! Executing") + execute_command_on_cluster(submission_script, line.strip()) + +if __name__ == "main": + run()