diff --git a/config/default.yml b/config/default.yml index 556c083..6ac6096 100644 --- a/config/default.yml +++ b/config/default.yml @@ -59,12 +59,21 @@ salmon: quant_flags: '--gcBias' # -# The PRINSEQ section -# -# flags - flags to add to the PRINSEQ call while cleaning fastq files -# -prinseq: - flags: '-derep 1 -custom_params "A 8"' +# The HTStream section +# +# chain - actions of HTStream (will be prefixed with hts_) and their flags +# +htstream: + actions: + - PolyATTrim + - QWindowTrim + - SuperDeduper + - Stats + flags: + - '-StOA --min-trim 8 --max-mismatch 3' + - '-StOA --window-size 20 --avg-qual 20 --min-length 50' + - '-StOA --start 1 --length 50' + - '-SFfgA --notes stats_after_qc' # # The fastq-multx section diff --git a/environment.yml b/environment.yml index 6b66da8..250c7ca 100644 --- a/environment.yml +++ b/environment.yml @@ -7,5 +7,5 @@ channels: dependencies: - snakemake - salmon - - prinseq + #- htstream - fastq-multx diff --git a/src/clean.snake b/src/clean.snake index 50c7e7c..d31442d 100644 --- a/src/clean.snake +++ b/src/clean.snake @@ -1,16 +1,21 @@ +def construct_htstream_partials(wildcards, log): + template = 'hts_{prog} -L {log}/htstream/{sample}.txt {flags}' + partials = [template.format(prog = k, log = log, flags = v, sample = wildcards.sample) for k, v in list(zip(config['htstream']['actions'], config['htstream']['flags']))] + return(' | '.join(partials)) -rule prinseq_clean_se_gz: +rule htstream_clean_se_gz: version: "1.0" input: '{out}/{sample}.fastq.gz' output: - clean = '{out}/{sample}.clean.fastq.gz', - grubby = '{out}/{sample}.grubby.fastq.gz', + '{out}/{sample}.clean.fastq.gz' params: - flags = config['prinseq']['flags'] if 'flags' in config.get('prinseq', {}) else '' - message: 'Cleaning {input} using prinseq-lite.' - log: join(config['dirs']['log'], 'prinseq', 'clean-se.log') + call = lambda wildcards: construct_htstream_partials(wildcards, config['dirs']['log']) + message: 'Cleaning {input} using HTStream.' + log: join(config['dirs']['log'], 'htstream', '{sample}.txt') + threads: 4 shell: """ - zcat {input} | prinseq-lite.pl -fastq -out_good {wildcards.out}/{wildcards.sample}.clean -out_bad {wildcards.out}/{wildcards.sample}.grubby -graph_data {log} {params.flags} + hts_Stats -U {input} -L {log} -tO | {params.call} -p {wildcards.out}/{wildcards.sample} + mv {wildcards.out}/{wildcards.sample}_SE.fastq.gz {output} """