Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 145 lines (114 sloc) 6.31 KB
#!/usr/bin/env python3
import argparse
import sys
from pipeline.check.sanity import check_sanity_config, check_sanity_data
from pipeline.interpro import InterProPipeline
from pipeline.transcriptome import TranscriptomePipeline
from pipeline.orthology import OrthologyPipeline
def run_pipeline(args):
"""
Runs pipeline based on settings in args.
:param args: Parsed arguments from argparse
"""
if check_sanity_config(args.config) and check_sanity_data(args.data):
if args.transcriptomics:
tp = TranscriptomePipeline(args.config,
args.data,
enable_log=args.enable_log,
use_hisat2=args.use_hisat2)
if args.indexing:
tp.prepare_genome()
else:
print("Skipping Indexing", file=sys.stderr)
if args.trim_fastq:
tp.trim_fastq()
else:
print("Skipping Trimmomatic", file=sys.stderr)
if args.alignment:
tp.run_alignment(keep_previous=args.keep_intermediate)
else:
print("Skipping Alignment", file=sys.stderr)
if args.htseq:
tp.run_htseq_count(keep_previous=args.keep_intermediate)
else:
print("Skipping htseq-counts", file=sys.stderr)
if args.qc:
tp.check_quality()
else:
print("Skipping quality control", file=sys.stderr)
if args.exp_matrix:
tp.htseq_to_matrix()
tp.normalize_rpkm()
tp.normalize_tpm()
else:
print("Skipping expression matrix", file=sys.stderr)
if args.pcc:
tp.run_pcc()
else:
print("Skipping PCC calculations", file=sys.stderr)
if args.mcl:
tp.cluster_pcc()
else:
print("Skipping MCL clustering of PCC values", file=sys.stderr)
else:
print("Skipping transcriptomics", file=sys.stderr)
# Run InterPro Section
if args.interpro:
ip = InterProPipeline(args.config, args.data)
ip.run_interproscan()
else:
print("Skipping Interpro", file=sys.stderr)
# Run Orthology Section
if args.orthology:
op = OrthologyPipeline(args.config, args.data)
if args.orthofinder:
op.run_orthofinder()
if args.mcl_families:
op.run_mcl()
else:
print("Skipping Orthology", file=sys.stderr)
else:
print("Sanity check failed, cannot start pipeline", file=sys.stderr)
if __name__ == "__main__":
parser = argparse.ArgumentParser(prog="./run.py")
parser.add_argument('config', help='path to config.ini')
parser.add_argument('data', help='path to data.ini')
# Optional arguments
parser.add_argument('--skip-transcriptomics', dest='transcriptomics', action='store_false', help='add --skip-transcriptomics to skip the entire transcriptome step')
parser.add_argument('--enable-interpro', dest='interpro', action='store_true', help='Runs InterProScan to detect protein domains and provide functional annotation')
parser.add_argument('--enable-orthology', dest='orthology', action='store_true', help='Runs OrhtoFinder and MCL to detect orthogroups, gene families and orthologs')
parser.add_argument('--use-hisat2', dest='use_hisat2', action='store_true', help='Use HISAT2 to build the index and align reads instead of BowTie2 and TopHat2')
parser.add_argument('--skip-indexing', dest='indexing', action='store_false', help='add --skip-indexing to skip building an index (for read alignment) on the genome (BowTie2 or HISAT2)')
parser.add_argument('--skip-trim-fastq', dest='trim_fastq', action='store_false', help='add --skip-trim-fastq to skip trimming fastq files using trimmomatic')
parser.add_argument('--skip-alignment', dest='alignment', action='store_false', help='add --skip-alignment to skip the read alignment step (TopHat 2 or HISAT2)')
parser.add_argument('--skip-htseq', dest='htseq', action='store_false', help='add --skip-htseq to skip counting reads per gene with htseq-count')
parser.add_argument('--skip-qc', dest='qc', action='store_false', help='add --skip-qc to skip quality control of tophat and htseq output')
parser.add_argument('--skip-exp-matrix', dest='exp_matrix', action='store_false', help='add --skip-exp-matrix to skip converting htseq files to an expression matrix')
parser.add_argument('--skip-pcc', dest='pcc', action='store_false', help='add --skip-pcc to skip calculating PCC values')
parser.add_argument('--skip-mcl', dest='mcl', action='store_false', help='add --skip-mcl to skip clustering PCC values using MCL')
parser.add_argument('--skip-orthofinder', dest='orthofinder', action='store_false', help='add --skip-orthofinder to skip the orthology detection')
parser.add_argument('--skip-mcl-families', dest='mcl_families', action='store_false', help='add --skip-mcl to skip clustering blast with MCL')
parser.add_argument('--remove-intermediate', dest='keep_intermediate', action='store_false', help='add --remove-intermediate to clear trimmomatic and tophat files after completing those steps')
parser.add_argument('--disable-log', dest='enable_log', action='store_false',
help='add --disable-log to disable writing additional statistics.')
# Flags for the big sections of the pipeline
parser.set_defaults(transcriptomics=True)
parser.set_defaults(interpro=False)
parser.set_defaults(orthology=False)
parser.set_defaults(use_hisat2=False)
# Flags for individual tools for transcriptomics
parser.set_defaults(indexing=True)
parser.set_defaults(trim_fastq=True)
parser.set_defaults(alignment=True)
parser.set_defaults(htseq=True)
parser.set_defaults(qc=True)
parser.set_defaults(exp_matrix=True)
parser.set_defaults(pcc=True)
parser.set_defaults(mcl=True)
parser.set_defaults(orthofinder=True)
parser.set_defaults(mcl_families=True)
parser.set_defaults(keep_intermediate=True)
parser.set_defaults(enable_log=True)
# Parse arguments and start pipeline
args = parser.parse_args()
run_pipeline(args)