Skip to content
This repository has been archived by the owner. It is now read-only.

Commit

Permalink
Fixed cleaning to take fastq from samplesheet
Browse files Browse the repository at this point in the history
  • Loading branch information
jenzopr committed Apr 5, 2018
1 parent 2c0edc0 commit 3627958
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 5 deletions.
9 changes: 8 additions & 1 deletion sc-preprocess.snake
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,20 @@ REFERENCE_FASTA = join(config['dirs']['ref'], config['reference']['organism'], c
#-------------------------------------------------------------------------------#

samplesheet = pd.read_table(config['samplesheet']['file'], sep = '\t', index_col = config['samplesheet']['index'])

if 'URL_r1' not in list(samplesheet):
samplesheet['URL_r1'] = ['{dir}/{sample}.{format}'.format(dir = config['dirs']['fastq'], sample = sample, format = 'fastq.gz') for sample in list(samplesheet.index)]

SAMPLES = samplesheet.to_dict(orient = 'index')
SAMPLE_NAMES = sorted(SAMPLES.keys())

#-------------------------------------------------------------------------------#
#--------------------------- Generate output files -----------------------------#
#-------------------------------------------------------------------------------#

output_files = []
output_files = [
#join(config['dirs']['ref'], 'tx2gene', basename(REFERENCE_FASTA).rstrip(".fa"))
]

if config["action"]["demultiplex"]:
demultiplexed_files = expand('{o}/{s}.fastq.gz', o = config['dirs']['fastq'], s = SAMPLE_NAMES)
Expand All @@ -41,6 +47,7 @@ if config["action"]["clean"]:

include: "src/auxiliary.snake"
#include: "src/demultiplex.snake" # not yet implemented
#include: "src/reference.snake"
include: "src/clean.snake"

if config["debug"]:
Expand Down
17 changes: 13 additions & 4 deletions src/clean.snake
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# vim: syntax=python tabstop=4 expandtab
# coding: utf-8

'''
@author: jpreuss
Provides cleaning and QC from the HTStream toolkit
'''

def construct_htstream_partials(wildcards, log):
template = 'hts_{prog} -L {log}/htstream/{sample}.txt {flags}'
partials = [template.format(prog = k, log = log, flags = v, sample = wildcards.sample) for k, v in list(zip(config['htstream']['actions'], config['htstream']['flags']))]
Expand All @@ -6,16 +15,16 @@ def construct_htstream_partials(wildcards, log):
rule htstream_clean_se_gz:
version: "1.0"
input:
'{out}/{sample}.fastq.gz'
lambda wildcards: SAMPLES[wildcards.sample]['URL_r1']
output:
'{out}/{sample}.clean.fastq.gz'
config['dirs']['fastq'] + '/{sample}.clean.fastq.gz'
params:
call = lambda wildcards: construct_htstream_partials(wildcards, config['dirs']['log'])
message: 'Cleaning {input} using HTStream.'
log: join(config['dirs']['log'], 'htstream', '{sample}.txt')
threads: 4
shell:
"""
hts_Stats -U {input} -L {log} -tO | {params.call} -p {wildcards.out}/{wildcards.sample}
mv {wildcards.out}/{wildcards.sample}_SE.fastq.gz {output}
hts_Stats -U {input} -L {log} -tO | {params.call} -p {config[dirs][fastq]}/{wildcards.sample}
mv {config[dirs][fastq]}/{wildcards.sample}_SE.fastq.gz {output}
"""

0 comments on commit 3627958

Please sign in to comment.