From 3627958f1aa8b805999578cda9c99c7f3cec2bc6 Mon Sep 17 00:00:00 2001 From: Jens Preussner Date: Thu, 5 Apr 2018 10:30:15 +0200 Subject: [PATCH] Fixed cleaning to take fastq from samplesheet --- sc-preprocess.snake | 9 ++++++++- src/clean.snake | 17 +++++++++++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/sc-preprocess.snake b/sc-preprocess.snake index a44d728..b4f9b3c 100644 --- a/sc-preprocess.snake +++ b/sc-preprocess.snake @@ -18,6 +18,10 @@ REFERENCE_FASTA = join(config['dirs']['ref'], config['reference']['organism'], c #-------------------------------------------------------------------------------# samplesheet = pd.read_table(config['samplesheet']['file'], sep = '\t', index_col = config['samplesheet']['index']) + +if 'URL_r1' not in list(samplesheet): + samplesheet['URL_r1'] = ['{dir}/{sample}.{format}'.format(dir = config['dirs']['fastq'], sample = sample, format = 'fastq.gz') for sample in list(samplesheet.index)] + SAMPLES = samplesheet.to_dict(orient = 'index') SAMPLE_NAMES = sorted(SAMPLES.keys()) @@ -25,7 +29,9 @@ SAMPLE_NAMES = sorted(SAMPLES.keys()) #--------------------------- Generate output files -----------------------------# #-------------------------------------------------------------------------------# -output_files = [] +output_files = [ + #join(config['dirs']['ref'], 'tx2gene', basename(REFERENCE_FASTA).rstrip(".fa")) +] if config["action"]["demultiplex"]: demultiplexed_files = expand('{o}/{s}.fastq.gz', o = config['dirs']['fastq'], s = SAMPLE_NAMES) @@ -41,6 +47,7 @@ if config["action"]["clean"]: include: "src/auxiliary.snake" #include: "src/demultiplex.snake" # not yet implemented +#include: "src/reference.snake" include: "src/clean.snake" if config["debug"]: diff --git a/src/clean.snake b/src/clean.snake index d31442d..0244f1c 100644 --- a/src/clean.snake +++ b/src/clean.snake @@ -1,3 +1,12 @@ +# vim: syntax=python tabstop=4 expandtab +# coding: utf-8 + +''' +@author: jpreuss + +Provides cleaning and QC from the HTStream toolkit +''' + def construct_htstream_partials(wildcards, log): template = 'hts_{prog} -L {log}/htstream/{sample}.txt {flags}' partials = [template.format(prog = k, log = log, flags = v, sample = wildcards.sample) for k, v in list(zip(config['htstream']['actions'], config['htstream']['flags']))] @@ -6,9 +15,9 @@ def construct_htstream_partials(wildcards, log): rule htstream_clean_se_gz: version: "1.0" input: - '{out}/{sample}.fastq.gz' + lambda wildcards: SAMPLES[wildcards.sample]['URL_r1'] output: - '{out}/{sample}.clean.fastq.gz' + config['dirs']['fastq'] + '/{sample}.clean.fastq.gz' params: call = lambda wildcards: construct_htstream_partials(wildcards, config['dirs']['log']) message: 'Cleaning {input} using HTStream.' @@ -16,6 +25,6 @@ rule htstream_clean_se_gz: threads: 4 shell: """ - hts_Stats -U {input} -L {log} -tO | {params.call} -p {wildcards.out}/{wildcards.sample} - mv {wildcards.out}/{wildcards.sample}_SE.fastq.gz {output} + hts_Stats -U {input} -L {log} -tO | {params.call} -p {config[dirs][fastq]}/{wildcards.sample} + mv {config[dirs][fastq]}/{wildcards.sample}_SE.fastq.gz {output} """