Skip to content
This repository has been archived by the owner. It is now read-only.

Commit

Permalink
Added reference related logic.
Browse files Browse the repository at this point in the history
  • Loading branch information
jenzopr committed Apr 5, 2018
1 parent 3627958 commit 0a23d6a
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 3 deletions.
2 changes: 1 addition & 1 deletion config/default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ reference:
file:
- pc_transcripts
- lncRNA_transcripts
spikeIns: 'ref/humanDux.fasta'
#spikeIns: 'path/to/spikeIns.fa'

#
# The salmon section
Expand Down
4 changes: 2 additions & 2 deletions sc-preprocess.snake
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ SAMPLE_NAMES = sorted(SAMPLES.keys())
#-------------------------------------------------------------------------------#

output_files = [
#join(config['dirs']['ref'], 'tx2gene', basename(REFERENCE_FASTA).rstrip(".fa"))
join(config['dirs']['ref'], 'tx2gene', basename(REFERENCE_FASTA).rstrip(".fa"))
]

if config["action"]["demultiplex"]:
Expand All @@ -47,7 +47,7 @@ if config["action"]["clean"]:

include: "src/auxiliary.snake"
#include: "src/demultiplex.snake" # not yet implemented
#include: "src/reference.snake"
include: "src/reference.snake"
include: "src/clean.snake"

if config["debug"]:
Expand Down
52 changes: 52 additions & 0 deletions src/reference.snake
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# vim: syntax=python tabstop=4 expandtab
# coding: utf-8

'''
@author: jpreuss
Provides rules for reference related modifications
'''

from snakemake.remote.FTP import RemoteProvider as FTPRemoteProvider
FTP = FTPRemoteProvider()

rule reference_download:
input:
FTP.remote(expand(config['reference']['URL'], **config['reference']), keep_local = True)
output:
GENCODE_FASTA
threads: 1
message:
'Downloading gencode transcriptome reference.'
shell:
"""
zcat {input} > {output}
"""

if 'spikeIns' in config.get('reference', {}):
rule reference_addSpikes:
input:
ref = GENCODE_FASTA,
spike = config['reference']['spikeIns']
output:
join(config['dirs']['ref'], config['reference']['organism'], config['reference']['release'] + '-withSpikeIns.fa')
threads: 1
message:
'Adding spike-in sequences to gencode transcriptome reference'
shell:
"""
cat {input.ref} {input.spike} > {output}
"""

rule tx2gene_from_fasta:
input:
REFERENCE_FASTA
output:
join(config['dirs']['ref'], 'tx2gene', basename(REFERENCE_FASTA).rstrip(".fa"))
threads: 1
message:
'Creating tx2gene table from reference {input}.'
shell:
"""
grep ">" {input} | tr -d '>' | cut -d'|' --output-delimiter=$'\t' -f1,2 | sed -e 's/\([A-Z]\+[[:digit:]]\+\)\(\.[[:digit:]]\+\)$/\\1/g' > {output}
"""

0 comments on commit 0a23d6a

Please sign in to comment.