From b8923d66edb0c336634c0bf17c467a83a95b9a70 Mon Sep 17 00:00:00 2001
From: Sebastian Proost <sebastian.proost@gmail.com>
Date: Fri, 21 Jul 2017 12:04:16 +0200
Subject: [PATCH] more gruntwork to support HISAT2

---
 config.template.ini       |  3 +++
 pipeline/base.py          | 10 +++++++--
 pipeline/check/sanity.py  |  3 ++-
 pipeline/transcriptome.py | 44 +++++++++++++++++++++++++++++++++++++--
 run.py                    |  6 +-----
 5 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/config.template.ini b/config.template.ini
index ead1a8d..955a2ab 100644
--- a/config.template.ini
+++ b/config.template.ini
@@ -26,6 +26,9 @@ trimmomatic_pe_command=java -jar ${jar} PE -threads 1  ${ina} ${inb} ${outap} ${
 tophat_se_cmd=tophat -p 3 -o ${out} ${genome} ${fq}
 tophat_pe_cmd=tophat -p 3 -o ${out} ${genome} ${forward},${reverse}
 
+hisat2_se_cmd=hisat2 -p 3 -x ${genome} -U ${fq} -S ${out} 2> ${stats}
+hisat2_pe_cmd=hisat2 -p 3 -x ${genome} -1 ${forward} -2 ${reverse} -S ${out} 2> ${stats}
+
 htseq_count_cmd=htseq-count -s no -f ${itype} -t ${feature} -i ${field} ${bam} ${gff} > ${out}
 
 interproscan_cmd=interproscan.sh -i ${in_dir}/${in_prefix}${SGE_TASK_ID} -o ${out_dir}/${out_prefix}${SGE_TASK_ID} -f tsv -dp -iprlookup -goterms --tempdir /tmp
diff --git a/pipeline/base.py b/pipeline/base.py
index 5850781..e707d3f 100644
--- a/pipeline/base.py
+++ b/pipeline/base.py
@@ -33,18 +33,24 @@ def __init__(self, config, data, enable_log=False, use_hisat2=False):
 
         self.bowtie_build_cmd = self.cp['TOOLS']['bowtie_cmd']
         self.hisat2_build_cmd = self.cp['TOOLS']['hisat2_build_cmd']
+
         self.trimmomatic_se_cmd = self.cp['TOOLS']['trimmomatic_se_command']
         self.trimmomatic_pe_cmd = self.cp['TOOLS']['trimmomatic_pe_command']
+
         self.tophat_se_cmd = self.cp['TOOLS']['tophat_se_cmd']
         self.tophat_pe_cmd = self.cp['TOOLS']['tophat_pe_cmd']
+        self.hisat2_se_cmd = self.cp['TOOLS']['hisat2_se_cmd']
+        self.hisat2_pe_cmd = self.cp['TOOLS']['hisat2_pe_cmd']
+
         self.htseq_count_cmd = self.cp['TOOLS']['htseq_count_cmd']
-        self.interproscan_cmd = self.cp['TOOLS']['interproscan_cmd']
-        self.orthofinder_cmd = self.cp['TOOLS']['orthofinder_cmd']
 
         self.pcc_cmd = self.cp['TOOLS']['pcc_cmd']
         self.mcl_cmd = self.cp['TOOLS']['mcl_cmd']
         self.mcxdeblast_cmd = self.cp['TOOLS']['mcxdeblast_cmd']
 
+        self.interproscan_cmd = self.cp['TOOLS']['interproscan_cmd']
+        self.orthofinder_cmd = self.cp['TOOLS']['orthofinder_cmd']
+
         self.qsub_indexing = shlex.split(self.cp['TOOLS']['qsub_indexing'].strip('\''))
         self.qsub_trimmomatic = shlex.split(self.cp['TOOLS']['qsub_trimmomatic'].strip('\''))
         self.qsub_tophat = shlex.split(self.cp['TOOLS']['qsub_tophat'].strip('\''))
diff --git a/pipeline/check/sanity.py b/pipeline/check/sanity.py
index 4267dcd..686acdc 100644
--- a/pipeline/check/sanity.py
+++ b/pipeline/check/sanity.py
@@ -70,7 +70,8 @@ def check_sanity_config(filename):
                      'trimmomatic_pe_command', 'tophat_se_cmd', 'tophat_pe_cmd', 'htseq_count_cmd',
                      'interproscan_cmd', 'pcc_cmd', 'mcl_cmd', 'orthofinder_cmd', 'mcxdeblast_cmd',
                      'trimmomatic_path', 'qsub_indexing', 'qsub_trimmomatic', 'qsub_tophat', 'qsub_htseq_count',
-                     'qsub_interproscan', 'qsub_pcc', 'qsub_mcl', 'qsub_orthofinder', 'qsub_mcxdeblast']
+                     'qsub_interproscan', 'qsub_pcc', 'qsub_mcl', 'qsub_orthofinder', 'qsub_mcxdeblast',
+                     'hisat2_se_cmd', 'hisat2_pe_cmd']
     required_paths = ['trimmomatic_path']
 
     if 'TOOLS' in cp:
diff --git a/pipeline/transcriptome.py b/pipeline/transcriptome.py
index 926a447..0134284 100644
--- a/pipeline/transcriptome.py
+++ b/pipeline/transcriptome.py
@@ -141,7 +141,7 @@ def trim_fastq(self, overwrite=False):
 
         print("Done\n\n")
 
-    def run_tophat(self, overwrite=False, keep_previous=False):
+    def __run_tophat(self, overwrite=False, keep_previous=False):
         """
         Maps the reads from the trimmed fastq files to the bowtie-indexed genome
 
@@ -162,7 +162,7 @@ def run_tophat(self, overwrite=False, keep_previous=False):
 
         for g in self.genomes:
             tophat_output = self.dp[g]['tophat_output']
-            bowtie_output = self.dp[g]['bowtie_output']
+            bowtie_output = self.dp[g]['indexing_output']
             trimmed_fastq_dir = self.dp[g]['trimmomatic_output']
             os.makedirs(tophat_output, exist_ok=True)
 
@@ -221,6 +221,46 @@ def run_tophat(self, overwrite=False, keep_previous=False):
         # remove OUT_ files
         PipelineBase.clean_out_files(jobname)
 
+    def __run_hisat2(self, overwrite=False, keep_previous=False):
+        """
+        Maps the reads from the trimmed fastq files to the bowtie-indexed genome
+
+        :param overwrite: when true the pipeline will start tophat even if the output exists
+        :param keep_previous: when true trimmed fastq files will not be removed after tophat completes
+        """
+        filename_se, jobname = self.write_submission_script("hisat2_%d",
+                                                            self.hisat2_module,
+                                                            self.hisat2_se_cmd,
+                                                            "hisat2_se_%d.sh")
+
+        filename_pe, jobname = self.write_submission_script("hisat2_%d",
+                                                            self.hisat2_module,
+                                                            self.hisat2_pe_cmd,
+                                                            "hisat2_pe_%d.sh")
+
+        print('Mapping reads with HISAT2...')
+
+        # remove the submission script
+        os.remove(filename_se)
+        os.remove(filename_pe)
+
+        # remove OUT_ files
+        PipelineBase.clean_out_files(jobname)
+
+    def run_alignment(self, overwrite=False, keep_previous=False):
+        """
+
+        Determine which aligner to use and align reads to indexed genome
+
+        :param overwrite: will overwrite existing data when True, otherwise existing runs will be skipped
+        :param keep_previous: will keep trimmed reads upon completion when true,
+        otherwise the trimmed reads will be deleted
+        """
+        if self.use_hisat2:
+            self.__run_hisat2(overwrite=overwrite, keep_previous=keep_previous)
+        else:
+            self.__run_tophat(overwrite=overwrite, keep_previous=keep_previous)
+
         print("Done\n\n")
 
     def run_htseq_count(self, keep_previous=False):
diff --git a/run.py b/run.py
index 2d229dd..474e040 100755
--- a/run.py
+++ b/run.py
@@ -32,11 +32,7 @@ def run_pipeline(args):
                 print("Skipping Trimmomatic", file=sys.stderr)
 
             if args.alignment:
-                if args.use_hisat2:
-                    print("alignment using hisat2 not implemented yet!", file=sys.argv)
-                    quit()
-                else:
-                    tp.run_tophat(keep_previous=args.keep_intermediate)
+                    tp.run_alignment(keep_previous=args.keep_intermediate)
             else:
                 print("Skipping Alignment", file=sys.stderr)