Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
more gruntwork to support HISAT2
  • Loading branch information
proost committed Jul 21, 2017
1 parent 79de4d2 commit b8923d6
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 10 deletions.
3 changes: 3 additions & 0 deletions config.template.ini
Expand Up @@ -26,6 +26,9 @@ trimmomatic_pe_command=java -jar ${jar} PE -threads 1 ${ina} ${inb} ${outap} ${
tophat_se_cmd=tophat -p 3 -o ${out} ${genome} ${fq}
tophat_pe_cmd=tophat -p 3 -o ${out} ${genome} ${forward},${reverse}

hisat2_se_cmd=hisat2 -p 3 -x ${genome} -U ${fq} -S ${out} 2> ${stats}
hisat2_pe_cmd=hisat2 -p 3 -x ${genome} -1 ${forward} -2 ${reverse} -S ${out} 2> ${stats}

htseq_count_cmd=htseq-count -s no -f ${itype} -t ${feature} -i ${field} ${bam} ${gff} > ${out}

interproscan_cmd=interproscan.sh -i ${in_dir}/${in_prefix}${SGE_TASK_ID} -o ${out_dir}/${out_prefix}${SGE_TASK_ID} -f tsv -dp -iprlookup -goterms --tempdir /tmp
Expand Down
10 changes: 8 additions & 2 deletions pipeline/base.py
Expand Up @@ -33,18 +33,24 @@ def __init__(self, config, data, enable_log=False, use_hisat2=False):

self.bowtie_build_cmd = self.cp['TOOLS']['bowtie_cmd']
self.hisat2_build_cmd = self.cp['TOOLS']['hisat2_build_cmd']

self.trimmomatic_se_cmd = self.cp['TOOLS']['trimmomatic_se_command']
self.trimmomatic_pe_cmd = self.cp['TOOLS']['trimmomatic_pe_command']

self.tophat_se_cmd = self.cp['TOOLS']['tophat_se_cmd']
self.tophat_pe_cmd = self.cp['TOOLS']['tophat_pe_cmd']
self.hisat2_se_cmd = self.cp['TOOLS']['hisat2_se_cmd']
self.hisat2_pe_cmd = self.cp['TOOLS']['hisat2_pe_cmd']

self.htseq_count_cmd = self.cp['TOOLS']['htseq_count_cmd']
self.interproscan_cmd = self.cp['TOOLS']['interproscan_cmd']
self.orthofinder_cmd = self.cp['TOOLS']['orthofinder_cmd']

self.pcc_cmd = self.cp['TOOLS']['pcc_cmd']
self.mcl_cmd = self.cp['TOOLS']['mcl_cmd']
self.mcxdeblast_cmd = self.cp['TOOLS']['mcxdeblast_cmd']

self.interproscan_cmd = self.cp['TOOLS']['interproscan_cmd']
self.orthofinder_cmd = self.cp['TOOLS']['orthofinder_cmd']

self.qsub_indexing = shlex.split(self.cp['TOOLS']['qsub_indexing'].strip('\''))
self.qsub_trimmomatic = shlex.split(self.cp['TOOLS']['qsub_trimmomatic'].strip('\''))
self.qsub_tophat = shlex.split(self.cp['TOOLS']['qsub_tophat'].strip('\''))
Expand Down
3 changes: 2 additions & 1 deletion pipeline/check/sanity.py
Expand Up @@ -70,7 +70,8 @@ def check_sanity_config(filename):
'trimmomatic_pe_command', 'tophat_se_cmd', 'tophat_pe_cmd', 'htseq_count_cmd',
'interproscan_cmd', 'pcc_cmd', 'mcl_cmd', 'orthofinder_cmd', 'mcxdeblast_cmd',
'trimmomatic_path', 'qsub_indexing', 'qsub_trimmomatic', 'qsub_tophat', 'qsub_htseq_count',
'qsub_interproscan', 'qsub_pcc', 'qsub_mcl', 'qsub_orthofinder', 'qsub_mcxdeblast']
'qsub_interproscan', 'qsub_pcc', 'qsub_mcl', 'qsub_orthofinder', 'qsub_mcxdeblast',
'hisat2_se_cmd', 'hisat2_pe_cmd']
required_paths = ['trimmomatic_path']

if 'TOOLS' in cp:
Expand Down
44 changes: 42 additions & 2 deletions pipeline/transcriptome.py
Expand Up @@ -141,7 +141,7 @@ def trim_fastq(self, overwrite=False):

print("Done\n\n")

def run_tophat(self, overwrite=False, keep_previous=False):
def __run_tophat(self, overwrite=False, keep_previous=False):
"""
Maps the reads from the trimmed fastq files to the bowtie-indexed genome
Expand All @@ -162,7 +162,7 @@ def run_tophat(self, overwrite=False, keep_previous=False):

for g in self.genomes:
tophat_output = self.dp[g]['tophat_output']
bowtie_output = self.dp[g]['bowtie_output']
bowtie_output = self.dp[g]['indexing_output']
trimmed_fastq_dir = self.dp[g]['trimmomatic_output']
os.makedirs(tophat_output, exist_ok=True)

Expand Down Expand Up @@ -221,6 +221,46 @@ def run_tophat(self, overwrite=False, keep_previous=False):
# remove OUT_ files
PipelineBase.clean_out_files(jobname)

def __run_hisat2(self, overwrite=False, keep_previous=False):
"""
Maps the reads from the trimmed fastq files to the bowtie-indexed genome
:param overwrite: when true the pipeline will start tophat even if the output exists
:param keep_previous: when true trimmed fastq files will not be removed after tophat completes
"""
filename_se, jobname = self.write_submission_script("hisat2_%d",
self.hisat2_module,
self.hisat2_se_cmd,
"hisat2_se_%d.sh")

filename_pe, jobname = self.write_submission_script("hisat2_%d",
self.hisat2_module,
self.hisat2_pe_cmd,
"hisat2_pe_%d.sh")

print('Mapping reads with HISAT2...')

# remove the submission script
os.remove(filename_se)
os.remove(filename_pe)

# remove OUT_ files
PipelineBase.clean_out_files(jobname)

def run_alignment(self, overwrite=False, keep_previous=False):
"""
Determine which aligner to use and align reads to indexed genome
:param overwrite: will overwrite existing data when True, otherwise existing runs will be skipped
:param keep_previous: will keep trimmed reads upon completion when true,
otherwise the trimmed reads will be deleted
"""
if self.use_hisat2:
self.__run_hisat2(overwrite=overwrite, keep_previous=keep_previous)
else:
self.__run_tophat(overwrite=overwrite, keep_previous=keep_previous)

print("Done\n\n")

def run_htseq_count(self, keep_previous=False):
Expand Down
6 changes: 1 addition & 5 deletions run.py
Expand Up @@ -32,11 +32,7 @@ def run_pipeline(args):
print("Skipping Trimmomatic", file=sys.stderr)

if args.alignment:
if args.use_hisat2:
print("alignment using hisat2 not implemented yet!", file=sys.argv)
quit()
else:
tp.run_tophat(keep_previous=args.keep_intermediate)
tp.run_alignment(keep_previous=args.keep_intermediate)
else:
print("Skipping Alignment", file=sys.stderr)

Expand Down

0 comments on commit b8923d6

Please sign in to comment.