From 0a23d6afceafb0b79b3a5d34db2844f1393c0ba1 Mon Sep 17 00:00:00 2001 From: Jens Preussner Date: Thu, 5 Apr 2018 10:46:31 +0200 Subject: [PATCH] Added reference related logic. --- config/default.yml | 2 +- sc-preprocess.snake | 4 ++-- src/reference.snake | 52 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 src/reference.snake diff --git a/config/default.yml b/config/default.yml index 6ac6096..7cc8e55 100644 --- a/config/default.yml +++ b/config/default.yml @@ -46,7 +46,7 @@ reference: file: - pc_transcripts - lncRNA_transcripts - spikeIns: 'ref/humanDux.fasta' + #spikeIns: 'path/to/spikeIns.fa' # # The salmon section diff --git a/sc-preprocess.snake b/sc-preprocess.snake index b4f9b3c..353a15c 100644 --- a/sc-preprocess.snake +++ b/sc-preprocess.snake @@ -30,7 +30,7 @@ SAMPLE_NAMES = sorted(SAMPLES.keys()) #-------------------------------------------------------------------------------# output_files = [ - #join(config['dirs']['ref'], 'tx2gene', basename(REFERENCE_FASTA).rstrip(".fa")) + join(config['dirs']['ref'], 'tx2gene', basename(REFERENCE_FASTA).rstrip(".fa")) ] if config["action"]["demultiplex"]: @@ -47,7 +47,7 @@ if config["action"]["clean"]: include: "src/auxiliary.snake" #include: "src/demultiplex.snake" # not yet implemented -#include: "src/reference.snake" +include: "src/reference.snake" include: "src/clean.snake" if config["debug"]: diff --git a/src/reference.snake b/src/reference.snake new file mode 100644 index 0000000..a2e826a --- /dev/null +++ b/src/reference.snake @@ -0,0 +1,52 @@ +# vim: syntax=python tabstop=4 expandtab +# coding: utf-8 + +''' +@author: jpreuss + +Provides rules for reference related modifications +''' + +from snakemake.remote.FTP import RemoteProvider as FTPRemoteProvider +FTP = FTPRemoteProvider() + +rule reference_download: + input: + FTP.remote(expand(config['reference']['URL'], **config['reference']), keep_local = True) + output: + GENCODE_FASTA + threads: 1 + message: + 'Downloading gencode transcriptome reference.' + shell: + """ + zcat {input} > {output} + """ + +if 'spikeIns' in config.get('reference', {}): + rule reference_addSpikes: + input: + ref = GENCODE_FASTA, + spike = config['reference']['spikeIns'] + output: + join(config['dirs']['ref'], config['reference']['organism'], config['reference']['release'] + '-withSpikeIns.fa') + threads: 1 + message: + 'Adding spike-in sequences to gencode transcriptome reference' + shell: + """ + cat {input.ref} {input.spike} > {output} + """ + +rule tx2gene_from_fasta: + input: + REFERENCE_FASTA + output: + join(config['dirs']['ref'], 'tx2gene', basename(REFERENCE_FASTA).rstrip(".fa")) + threads: 1 + message: + 'Creating tx2gene table from reference {input}.' + shell: + """ + grep ">" {input} | tr -d '>' | cut -d'|' --output-delimiter=$'\t' -f1,2 | sed -e 's/\([A-Z]\+[[:digit:]]\+\)\(\.[[:digit:]]\+\)$/\\1/g' > {output} + """