Skip to content
This repository has been archived by the owner. It is now read-only.

Commit

Permalink
Exchanged prinseq for HTStream for QC
Browse files Browse the repository at this point in the history
  • Loading branch information
jenzopr committed Apr 5, 2018
1 parent 82e0c73 commit 2c0edc0
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 14 deletions.
21 changes: 15 additions & 6 deletions config/default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,21 @@ salmon:
quant_flags: '--gcBias'

#
# The PRINSEQ section
#
# flags - flags to add to the PRINSEQ call while cleaning fastq files
#
prinseq:
flags: '-derep 1 -custom_params "A 8"'
# The HTStream section
#
# chain - actions of HTStream (will be prefixed with hts_) and their flags
#
htstream:
actions:
- PolyATTrim
- QWindowTrim
- SuperDeduper
- Stats
flags:
- '-StOA --min-trim 8 --max-mismatch 3'
- '-StOA --window-size 20 --avg-qual 20 --min-length 50'
- '-StOA --start 1 --length 50'
- '-SFfgA --notes stats_after_qc'

#
# The fastq-multx section
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ channels:
dependencies:
- snakemake
- salmon
- prinseq
#- htstream
- fastq-multx
19 changes: 12 additions & 7 deletions src/clean.snake
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
def construct_htstream_partials(wildcards, log):
template = 'hts_{prog} -L {log}/htstream/{sample}.txt {flags}'
partials = [template.format(prog = k, log = log, flags = v, sample = wildcards.sample) for k, v in list(zip(config['htstream']['actions'], config['htstream']['flags']))]
return(' | '.join(partials))

rule prinseq_clean_se_gz:
rule htstream_clean_se_gz:
version: "1.0"
input:
'{out}/{sample}.fastq.gz'
output:
clean = '{out}/{sample}.clean.fastq.gz',
grubby = '{out}/{sample}.grubby.fastq.gz',
'{out}/{sample}.clean.fastq.gz'
params:
flags = config['prinseq']['flags'] if 'flags' in config.get('prinseq', {}) else ''
message: 'Cleaning {input} using prinseq-lite.'
log: join(config['dirs']['log'], 'prinseq', 'clean-se.log')
call = lambda wildcards: construct_htstream_partials(wildcards, config['dirs']['log'])
message: 'Cleaning {input} using HTStream.'
log: join(config['dirs']['log'], 'htstream', '{sample}.txt')
threads: 4
shell:
"""
zcat {input} | prinseq-lite.pl -fastq -out_good {wildcards.out}/{wildcards.sample}.clean -out_bad {wildcards.out}/{wildcards.sample}.grubby -graph_data {log} {params.flags}
hts_Stats -U {input} -L {log} -tO | {params.call} -p {wildcards.out}/{wildcards.sample}
mv {wildcards.out}/{wildcards.sample}_SE.fastq.gz {output}
"""

0 comments on commit 2c0edc0

Please sign in to comment.