From 79de4d22204559da697e5d576eb6d63e59dd2b72 Mon Sep 17 00:00:00 2001 From: Sebastian Proost Date: Fri, 21 Jul 2017 11:12:53 +0200 Subject: [PATCH] gruntwork to support HISAT2 --- .gitignore | 1 + config.template.ini | 7 ++++--- data.template.ini | 2 +- pipeline/base.py | 3 ++- pipeline/check/sanity.py | 4 ++-- pipeline/transcriptome.py | 18 ++++++++++++------ run.py | 6 +----- 7 files changed, 23 insertions(+), 18 deletions(-) diff --git a/.gitignore b/.gitignore index 563f048..73a6536 100644 --- a/.gitignore +++ b/.gitignore @@ -58,5 +58,6 @@ target/ .idea/ .data/ +tmp/ config.ini data.ini \ No newline at end of file diff --git a/config.template.ini b/config.template.ini index 824fab0..ead1a8d 100644 --- a/config.template.ini +++ b/config.template.ini @@ -17,6 +17,7 @@ trimmomatic_path=/home/sepro/tools/Trimmomatic-0.36/trimmomatic-0.36.jar ; Note that in some cases hard coded paths were required, adjust these to match the location of these files on ; your system bowtie_cmd=bowtie2-build ${in} ${out} +hisat2_build_cmd=hisat2-build ${in} ${out} ; ADJUST PATHS TO ADAPTERS trimmomatic_se_command=java -jar ${jar} SE -threads 1 ${in} ${out} ILLUMINACLIP:/home/sepro/tools/Trimmomatic-0.36/adapters/TruSeq3-SE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36 @@ -40,7 +41,7 @@ orthofinder_cmd=python /home/sepro/OrthoFinder-0.4/orthofinder.py -f ${fasta_dir ; qsub parameters (OGE) -qsub_bowtie='' +qsub_indexing='' qsub_trimmomatic='' qsub_tophat='-pe cores 4' qsub_htseq_count='' @@ -52,7 +53,7 @@ qsub_mcxdeblast='' ; qsub parameters (PBS/Torque) -; qsub_bowtie='' +; qsub_indexing='' ; qsub_trimmomatic='' ; qsub_tophat='-l nodes=1,ppn=4' ; qsub_htseq_count='' @@ -64,7 +65,7 @@ qsub_mcxdeblast='' ; qsub parameters (PBS/Torque with walltimes) -; qsub_bowtie='-l walltime=00:10:00' +; qsub_indexing='-l walltime=00:10:00' ; qsub_trimmomatic='-l walltime=00:10:00' ; qsub_tophat='-l nodes=1,ppn=4 -l walltime=00:10:00' ; qsub_htseq_count=' -l walltime=00:02:00' diff --git a/data.template.ini b/data.template.ini index 0b27fc6..e053a3d 100644 --- a/data.template.ini +++ b/data.template.ini @@ -23,7 +23,7 @@ fastq_dir=./data/zma/fastq tophat_cutoff=65 htseq_cutoff=40 -bowtie_output=./output/bowtie-build/zma +indexing_output=./output/bowtie-build/zma trimmomatic_output=./output/trimmed_fastq/zma tophat_output=./output/tophat/zma samtools_output=./output/samtools/zma diff --git a/pipeline/base.py b/pipeline/base.py index 89c2926..5850781 100644 --- a/pipeline/base.py +++ b/pipeline/base.py @@ -32,6 +32,7 @@ def __init__(self, config, data, enable_log=False, use_hisat2=False): self.mcl_module = None if self.cp['TOOLS']['mcl_module'] is 'None' else self.cp['TOOLS']['mcl_module'] self.bowtie_build_cmd = self.cp['TOOLS']['bowtie_cmd'] + self.hisat2_build_cmd = self.cp['TOOLS']['hisat2_build_cmd'] self.trimmomatic_se_cmd = self.cp['TOOLS']['trimmomatic_se_command'] self.trimmomatic_pe_cmd = self.cp['TOOLS']['trimmomatic_pe_command'] self.tophat_se_cmd = self.cp['TOOLS']['tophat_se_cmd'] @@ -44,7 +45,7 @@ def __init__(self, config, data, enable_log=False, use_hisat2=False): self.mcl_cmd = self.cp['TOOLS']['mcl_cmd'] self.mcxdeblast_cmd = self.cp['TOOLS']['mcxdeblast_cmd'] - self.qsub_bowtie = shlex.split(self.cp['TOOLS']['qsub_bowtie'].strip('\'')) + self.qsub_indexing = shlex.split(self.cp['TOOLS']['qsub_indexing'].strip('\'')) self.qsub_trimmomatic = shlex.split(self.cp['TOOLS']['qsub_trimmomatic'].strip('\'')) self.qsub_tophat = shlex.split(self.cp['TOOLS']['qsub_tophat'].strip('\'')) self.qsub_htseq_count = shlex.split(self.cp['TOOLS']['qsub_htseq_count'].strip('\'')) diff --git a/pipeline/check/sanity.py b/pipeline/check/sanity.py index 14f7e1f..4267dcd 100644 --- a/pipeline/check/sanity.py +++ b/pipeline/check/sanity.py @@ -21,7 +21,7 @@ def check_sanity_data(filename): genomes = cp['GLOBAL']['genomes'].split(';') # For each genome test that section required_keys = ['cds_fasta', 'protein_fasta', 'genome_fasta', 'gff_file', 'gff_feature', 'gff_id', - 'fastq_dir', 'bowtie_output', 'trimmomatic_output', 'tophat_output', + 'fastq_dir', 'indexing_output', 'trimmomatic_output', 'tophat_output', 'htseq_output', 'exp_matrix_output', 'exp_matrix_tpm_output', 'exp_matrix_rpkm_output', 'interpro_output', 'pcc_output', 'pcc_mcl_output', 'mcl_cluster_output'] required_paths = ['cds_fasta', 'protein_fasta', 'genome_fasta', 'gff_file', 'fastq_dir'] @@ -69,7 +69,7 @@ def check_sanity_config(filename): 'blast_module', 'mcl_module', 'python_module', 'python3_module', 'bowtie_cmd', 'trimmomatic_se_command', 'trimmomatic_pe_command', 'tophat_se_cmd', 'tophat_pe_cmd', 'htseq_count_cmd', 'interproscan_cmd', 'pcc_cmd', 'mcl_cmd', 'orthofinder_cmd', 'mcxdeblast_cmd', - 'trimmomatic_path', 'qsub_bowtie', 'qsub_trimmomatic', 'qsub_tophat', 'qsub_htseq_count', + 'trimmomatic_path', 'qsub_indexing', 'qsub_trimmomatic', 'qsub_tophat', 'qsub_htseq_count', 'qsub_interproscan', 'qsub_pcc', 'qsub_mcl', 'qsub_orthofinder', 'qsub_mcxdeblast'] required_paths = ['trimmomatic_path'] diff --git a/pipeline/transcriptome.py b/pipeline/transcriptome.py index 5a2aa15..926a447 100644 --- a/pipeline/transcriptome.py +++ b/pipeline/transcriptome.py @@ -17,19 +17,25 @@ def prepare_genome(self): """ Runs bowtie-build for each genome on the cluster. All settings are obtained from the settings fasta file """ - filename, jobname = self.write_submission_script("bowtie_build_%d", - self.bowtie_module, - self.bowtie_build_cmd, - "bowtie_build_%d.sh") + if self.use_hisat2: + filename, jobname = self.write_submission_script("build_index_%d", + self.hisat2_module, + self.hisat2_build_cmd, + "build_index_%d.sh") + else: + filename, jobname = self.write_submission_script("build_index_%d", + self.bowtie_module, + self.bowtie_build_cmd, + "build_index_%d.sh") for g in self.genomes: con_file = self.dp[g]['genome_fasta'] - output = self.dp[g]['bowtie_output'] + output = self.dp[g]['indexing_output'] os.makedirs(os.path.dirname(output), exist_ok=True) shutil.copy(con_file, output + '.fa') - command = ["qsub"] + self.qsub_bowtie + ["-v", "in=" + con_file + ",out=" + output, filename] + command = ["qsub"] + self.qsub_indexing + ["-v", "in=" + con_file + ",out=" + output, filename] subprocess.call(command) diff --git a/run.py b/run.py index a5a5407..2d229dd 100755 --- a/run.py +++ b/run.py @@ -22,11 +22,7 @@ def run_pipeline(args): use_hisat2=args.use_hisat2) if args.indexing: - if args.use_hisat2: - print("alignment using hisat2 not implemented yet!", file=sys.argv) - quit() - else: - tp.prepare_genome() + tp.prepare_genome() else: print("Skipping Indexing", file=sys.stderr)