Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
LSTrAP/run.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
executable file
145 lines (114 sloc)
6.31 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import sys | |
from pipeline.check.sanity import check_sanity_config, check_sanity_data | |
from pipeline.interpro import InterProPipeline | |
from pipeline.transcriptome import TranscriptomePipeline | |
from pipeline.orthology import OrthologyPipeline | |
def run_pipeline(args): | |
""" | |
Runs pipeline based on settings in args. | |
:param args: Parsed arguments from argparse | |
""" | |
if check_sanity_config(args.config) and check_sanity_data(args.data): | |
if args.transcriptomics: | |
tp = TranscriptomePipeline(args.config, | |
args.data, | |
enable_log=args.enable_log, | |
use_hisat2=args.use_hisat2) | |
if args.indexing: | |
tp.prepare_genome() | |
else: | |
print("Skipping Indexing", file=sys.stderr) | |
if args.trim_fastq: | |
tp.trim_fastq() | |
else: | |
print("Skipping Trimmomatic", file=sys.stderr) | |
if args.alignment: | |
tp.run_alignment(keep_previous=args.keep_intermediate) | |
else: | |
print("Skipping Alignment", file=sys.stderr) | |
if args.htseq: | |
tp.run_htseq_count(keep_previous=args.keep_intermediate) | |
else: | |
print("Skipping htseq-counts", file=sys.stderr) | |
if args.qc: | |
tp.check_quality() | |
else: | |
print("Skipping quality control", file=sys.stderr) | |
if args.exp_matrix: | |
tp.htseq_to_matrix() | |
tp.normalize_rpkm() | |
tp.normalize_tpm() | |
else: | |
print("Skipping expression matrix", file=sys.stderr) | |
if args.pcc: | |
tp.run_pcc() | |
else: | |
print("Skipping PCC calculations", file=sys.stderr) | |
if args.mcl: | |
tp.cluster_pcc() | |
else: | |
print("Skipping MCL clustering of PCC values", file=sys.stderr) | |
else: | |
print("Skipping transcriptomics", file=sys.stderr) | |
# Run InterPro Section | |
if args.interpro: | |
ip = InterProPipeline(args.config, args.data) | |
ip.run_interproscan() | |
else: | |
print("Skipping Interpro", file=sys.stderr) | |
# Run Orthology Section | |
if args.orthology: | |
op = OrthologyPipeline(args.config, args.data) | |
if args.orthofinder: | |
op.run_orthofinder() | |
if args.mcl_families: | |
op.run_mcl() | |
else: | |
print("Skipping Orthology", file=sys.stderr) | |
else: | |
print("Sanity check failed, cannot start pipeline", file=sys.stderr) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(prog="./run.py") | |
parser.add_argument('config', help='path to config.ini') | |
parser.add_argument('data', help='path to data.ini') | |
# Optional arguments | |
parser.add_argument('--skip-transcriptomics', dest='transcriptomics', action='store_false', help='add --skip-transcriptomics to skip the entire transcriptome step') | |
parser.add_argument('--enable-interpro', dest='interpro', action='store_true', help='Runs InterProScan to detect protein domains and provide functional annotation') | |
parser.add_argument('--enable-orthology', dest='orthology', action='store_true', help='Runs OrhtoFinder and MCL to detect orthogroups, gene families and orthologs') | |
parser.add_argument('--use-hisat2', dest='use_hisat2', action='store_true', help='Use HISAT2 to build the index and align reads instead of BowTie2 and TopHat2') | |
parser.add_argument('--skip-indexing', dest='indexing', action='store_false', help='add --skip-indexing to skip building an index (for read alignment) on the genome (BowTie2 or HISAT2)') | |
parser.add_argument('--skip-trim-fastq', dest='trim_fastq', action='store_false', help='add --skip-trim-fastq to skip trimming fastq files using trimmomatic') | |
parser.add_argument('--skip-alignment', dest='alignment', action='store_false', help='add --skip-alignment to skip the read alignment step (TopHat 2 or HISAT2)') | |
parser.add_argument('--skip-htseq', dest='htseq', action='store_false', help='add --skip-htseq to skip counting reads per gene with htseq-count') | |
parser.add_argument('--skip-qc', dest='qc', action='store_false', help='add --skip-qc to skip quality control of tophat and htseq output') | |
parser.add_argument('--skip-exp-matrix', dest='exp_matrix', action='store_false', help='add --skip-exp-matrix to skip converting htseq files to an expression matrix') | |
parser.add_argument('--skip-pcc', dest='pcc', action='store_false', help='add --skip-pcc to skip calculating PCC values') | |
parser.add_argument('--skip-mcl', dest='mcl', action='store_false', help='add --skip-mcl to skip clustering PCC values using MCL') | |
parser.add_argument('--skip-orthofinder', dest='orthofinder', action='store_false', help='add --skip-orthofinder to skip the orthology detection') | |
parser.add_argument('--skip-mcl-families', dest='mcl_families', action='store_false', help='add --skip-mcl to skip clustering blast with MCL') | |
parser.add_argument('--remove-intermediate', dest='keep_intermediate', action='store_false', help='add --remove-intermediate to clear trimmomatic and tophat files after completing those steps') | |
parser.add_argument('--disable-log', dest='enable_log', action='store_false', | |
help='add --disable-log to disable writing additional statistics.') | |
# Flags for the big sections of the pipeline | |
parser.set_defaults(transcriptomics=True) | |
parser.set_defaults(interpro=False) | |
parser.set_defaults(orthology=False) | |
parser.set_defaults(use_hisat2=False) | |
# Flags for individual tools for transcriptomics | |
parser.set_defaults(indexing=True) | |
parser.set_defaults(trim_fastq=True) | |
parser.set_defaults(alignment=True) | |
parser.set_defaults(htseq=True) | |
parser.set_defaults(qc=True) | |
parser.set_defaults(exp_matrix=True) | |
parser.set_defaults(pcc=True) | |
parser.set_defaults(mcl=True) | |
parser.set_defaults(orthofinder=True) | |
parser.set_defaults(mcl_families=True) | |
parser.set_defaults(keep_intermediate=True) | |
parser.set_defaults(enable_log=True) | |
# Parse arguments and start pipeline | |
args = parser.parse_args() | |
run_pipeline(args) |