Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
gruntwork to support HISAT2
  • Loading branch information
proost committed Jul 21, 2017
1 parent a6cce89 commit 79de4d2
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 18 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -58,5 +58,6 @@ target/
.idea/
.data/

tmp/
config.ini
data.ini
7 changes: 4 additions & 3 deletions config.template.ini
Expand Up @@ -17,6 +17,7 @@ trimmomatic_path=/home/sepro/tools/Trimmomatic-0.36/trimmomatic-0.36.jar
; Note that in some cases hard coded paths were required, adjust these to match the location of these files on
; your system
bowtie_cmd=bowtie2-build ${in} ${out}
hisat2_build_cmd=hisat2-build ${in} ${out}

; ADJUST PATHS TO ADAPTERS
trimmomatic_se_command=java -jar ${jar} SE -threads 1 ${in} ${out} ILLUMINACLIP:/home/sepro/tools/Trimmomatic-0.36/adapters/TruSeq3-SE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
Expand All @@ -40,7 +41,7 @@ orthofinder_cmd=python /home/sepro/OrthoFinder-0.4/orthofinder.py -f ${fasta_dir

; qsub parameters (OGE)

qsub_bowtie=''
qsub_indexing=''
qsub_trimmomatic=''
qsub_tophat='-pe cores 4'
qsub_htseq_count=''
Expand All @@ -52,7 +53,7 @@ qsub_mcxdeblast=''

; qsub parameters (PBS/Torque)

; qsub_bowtie=''
; qsub_indexing=''
; qsub_trimmomatic=''
; qsub_tophat='-l nodes=1,ppn=4'
; qsub_htseq_count=''
Expand All @@ -64,7 +65,7 @@ qsub_mcxdeblast=''

; qsub parameters (PBS/Torque with walltimes)

; qsub_bowtie='-l walltime=00:10:00'
; qsub_indexing='-l walltime=00:10:00'
; qsub_trimmomatic='-l walltime=00:10:00'
; qsub_tophat='-l nodes=1,ppn=4 -l walltime=00:10:00'
; qsub_htseq_count=' -l walltime=00:02:00'
Expand Down
2 changes: 1 addition & 1 deletion data.template.ini
Expand Up @@ -23,7 +23,7 @@ fastq_dir=./data/zma/fastq
tophat_cutoff=65
htseq_cutoff=40

bowtie_output=./output/bowtie-build/zma
indexing_output=./output/bowtie-build/zma
trimmomatic_output=./output/trimmed_fastq/zma
tophat_output=./output/tophat/zma
samtools_output=./output/samtools/zma
Expand Down
3 changes: 2 additions & 1 deletion pipeline/base.py
Expand Up @@ -32,6 +32,7 @@ def __init__(self, config, data, enable_log=False, use_hisat2=False):
self.mcl_module = None if self.cp['TOOLS']['mcl_module'] is 'None' else self.cp['TOOLS']['mcl_module']

self.bowtie_build_cmd = self.cp['TOOLS']['bowtie_cmd']
self.hisat2_build_cmd = self.cp['TOOLS']['hisat2_build_cmd']
self.trimmomatic_se_cmd = self.cp['TOOLS']['trimmomatic_se_command']
self.trimmomatic_pe_cmd = self.cp['TOOLS']['trimmomatic_pe_command']
self.tophat_se_cmd = self.cp['TOOLS']['tophat_se_cmd']
Expand All @@ -44,7 +45,7 @@ def __init__(self, config, data, enable_log=False, use_hisat2=False):
self.mcl_cmd = self.cp['TOOLS']['mcl_cmd']
self.mcxdeblast_cmd = self.cp['TOOLS']['mcxdeblast_cmd']

self.qsub_bowtie = shlex.split(self.cp['TOOLS']['qsub_bowtie'].strip('\''))
self.qsub_indexing = shlex.split(self.cp['TOOLS']['qsub_indexing'].strip('\''))
self.qsub_trimmomatic = shlex.split(self.cp['TOOLS']['qsub_trimmomatic'].strip('\''))
self.qsub_tophat = shlex.split(self.cp['TOOLS']['qsub_tophat'].strip('\''))
self.qsub_htseq_count = shlex.split(self.cp['TOOLS']['qsub_htseq_count'].strip('\''))
Expand Down
4 changes: 2 additions & 2 deletions pipeline/check/sanity.py
Expand Up @@ -21,7 +21,7 @@ def check_sanity_data(filename):
genomes = cp['GLOBAL']['genomes'].split(';')
# For each genome test that section
required_keys = ['cds_fasta', 'protein_fasta', 'genome_fasta', 'gff_file', 'gff_feature', 'gff_id',
'fastq_dir', 'bowtie_output', 'trimmomatic_output', 'tophat_output',
'fastq_dir', 'indexing_output', 'trimmomatic_output', 'tophat_output',
'htseq_output', 'exp_matrix_output', 'exp_matrix_tpm_output', 'exp_matrix_rpkm_output',
'interpro_output', 'pcc_output', 'pcc_mcl_output', 'mcl_cluster_output']
required_paths = ['cds_fasta', 'protein_fasta', 'genome_fasta', 'gff_file', 'fastq_dir']
Expand Down Expand Up @@ -69,7 +69,7 @@ def check_sanity_config(filename):
'blast_module', 'mcl_module', 'python_module', 'python3_module', 'bowtie_cmd', 'trimmomatic_se_command',
'trimmomatic_pe_command', 'tophat_se_cmd', 'tophat_pe_cmd', 'htseq_count_cmd',
'interproscan_cmd', 'pcc_cmd', 'mcl_cmd', 'orthofinder_cmd', 'mcxdeblast_cmd',
'trimmomatic_path', 'qsub_bowtie', 'qsub_trimmomatic', 'qsub_tophat', 'qsub_htseq_count',
'trimmomatic_path', 'qsub_indexing', 'qsub_trimmomatic', 'qsub_tophat', 'qsub_htseq_count',
'qsub_interproscan', 'qsub_pcc', 'qsub_mcl', 'qsub_orthofinder', 'qsub_mcxdeblast']
required_paths = ['trimmomatic_path']

Expand Down
18 changes: 12 additions & 6 deletions pipeline/transcriptome.py
Expand Up @@ -17,19 +17,25 @@ def prepare_genome(self):
"""
Runs bowtie-build for each genome on the cluster. All settings are obtained from the settings fasta file
"""
filename, jobname = self.write_submission_script("bowtie_build_%d",
self.bowtie_module,
self.bowtie_build_cmd,
"bowtie_build_%d.sh")
if self.use_hisat2:
filename, jobname = self.write_submission_script("build_index_%d",
self.hisat2_module,
self.hisat2_build_cmd,
"build_index_%d.sh")
else:
filename, jobname = self.write_submission_script("build_index_%d",
self.bowtie_module,
self.bowtie_build_cmd,
"build_index_%d.sh")

for g in self.genomes:
con_file = self.dp[g]['genome_fasta']
output = self.dp[g]['bowtie_output']
output = self.dp[g]['indexing_output']

os.makedirs(os.path.dirname(output), exist_ok=True)
shutil.copy(con_file, output + '.fa')

command = ["qsub"] + self.qsub_bowtie + ["-v", "in=" + con_file + ",out=" + output, filename]
command = ["qsub"] + self.qsub_indexing + ["-v", "in=" + con_file + ",out=" + output, filename]

subprocess.call(command)

Expand Down
6 changes: 1 addition & 5 deletions run.py
Expand Up @@ -22,11 +22,7 @@ def run_pipeline(args):
use_hisat2=args.use_hisat2)

if args.indexing:
if args.use_hisat2:
print("alignment using hisat2 not implemented yet!", file=sys.argv)
quit()
else:
tp.prepare_genome()
tp.prepare_genome()
else:
print("Skipping Indexing", file=sys.stderr)

Expand Down

0 comments on commit 79de4d2

Please sign in to comment.