Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
02184e58f1
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
75 lines (58 sloc) 3.28 KB
import pandas as pd
from os.path import join, basename, dirname
if workflow.overwrite_configfile != None:
configfile: str(workflow.overwrite_configfile)
else:
configfile: 'config/default.yml'
#-------------------------------------------------------------------------------#
#-------------- Handle reference related paths and file names ------------------#
#-------------------------------------------------------------------------------#
GENCODE_FASTA = join(config['dirs']['ref'], config['reference']['organism'], config['reference']['release'] + '.fa')
REFERENCE_FASTA = join(config['dirs']['ref'], config['reference']['organism'], config['reference']['release'] + '-withSpikeIns.fa') if 'spikeIns' in config.get('reference', {}) else GENCODE_FASTA
#-------------------------------------------------------------------------------#
#------------- Read samplesheet and provide necessary variables ----------------#
#-------------------------------------------------------------------------------#
samplesheet = pd.read_table(config['samplesheet']['file'], sep = '\t', index_col = config['samplesheet']['index'])
if 'URL_r1' not in list(samplesheet):
samplesheet['URL_r1'] = ['{dir}/{sample}.{format}'.format(dir = config['dirs']['fastq'], sample = sample, format = 'fastq.gz') for sample in list(samplesheet.index)]
SAMPLES = samplesheet.to_dict(orient = 'index')
SAMPLE_NAMES = sorted(SAMPLES.keys())
#-------------------------------------------------------------------------------#
#--------------------------- Generate output files -----------------------------#
#-------------------------------------------------------------------------------#
output_files = [
join(config['dirs']['ref'], 'tx2gene', basename(REFERENCE_FASTA).rstrip(".fa"))
]
if config['action']['demultiplex']:
demultiplexed_files = expand('{o}/{s}.fastq.gz', o = config['dirs']['fastq'], s = SAMPLE_NAMES)
output_files.extend(demultiplexed_files)
if config['action']['clean']:
clean_files = expand('{o}/{s}.clean.fastq.gz', o = config['dirs']['fastq'], s = SAMPLE_NAMES)
output_files.extend(clean_files)
output_files.extend([join(config['dirs']['tables'], 'htstream.txt')])
if config['action']['quantification'] == 'salmon':
quant_files = expand('{o}/{s}/quant.sf', o = config['dirs']['quant'], s = SAMPLE_NAMES)
output_files.extend(quant_files)
output_files.extend([join(config['dirs']['tables'], 'salmon.txt')])
elif config['action']['quantification'] == 'kallisto':
quant_files = expand('{o}/matrix.{ending}', o = config['dirs']['quant'], ending = ['ec', 'tsv', 'cells'])
output_files.extend(quant_files)
output_files.extend([join(config['dirs']['quant'], 'run_info.json')])
if config['action']['container']:
output_files.extend([join(config['dirs']['R'], 'scData.rds')])
#-------------------------------------------------------------------------------#
#---------------------------------- RUN :-) ------------------------------------#
#-------------------------------------------------------------------------------#
include: "src/auxiliary.snake"
#include: "src/demultiplex.snake" # not yet implemented
include: "src/reference.snake"
include: "src/clean.snake"
include: "src/salmon.snake"
include: "src/kallisto.snake"
include: "src/container.snake"
if config["debug"]:
print_debug()
rule all:
input:
output_files
message: "Done."