-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
4 changed files
with
187 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
|
||
# | ||
# The action section | ||
# | ||
# clean - whether or not to skip the prinseq cleaning step (see below). | ||
# snv - whether or not to perform SNV analysis. | ||
# cnv - whether or not to perform CNV analysis. | ||
# container - whether or not to create a RDS container. | ||
# | ||
action: | ||
clean: True | ||
snv: True | ||
cnv: True | ||
container: True | ||
|
||
# | ||
# The samplesheet section | ||
# | ||
# file - The path to the SampleSheet text file. Set to None if no samplesheet is available | ||
# index - The name of the Column that should be used as index | ||
# case - The name of the Column containing the case identifier to which the index belongs to | ||
# | ||
samplesheet: | ||
file: 'SampleSheet.txt' | ||
index: 'Sample' | ||
case: 'Case' | ||
|
||
# | ||
# The data section | ||
# | ||
# Contains named dictionaries with input sequencing files: | ||
# r1 - The fastq file containing R1. | ||
# r2 - The fastq file containing R2. | ||
# | ||
data: | ||
sample1: | ||
r1: '' | ||
r2: '' | ||
|
||
# | ||
# The reference section | ||
# | ||
# URL - The URL from where the reference genome can be downloaded. Wildcards are allowed in curly brackets | ||
# organism - Wildcard for the reference organism | ||
# release - Wildcard for the reference release | ||
# file - Wildcard(s) for the reference files that should be merged into the final reference transcriptome | ||
# target - The path to a bed file with coordinates of the target regions | ||
# | ||
reference: | ||
annotation_URL: 'ftp.ebi.ac.uk/pub/databases/gencode/Gencode_{organism}/release_{release}/gencode.v{release}.{file}.fa.gz' | ||
genome_URL: 'ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_{organism}/release_{release}/{assembly}.primary_assembly.genome.fa.gz' | ||
organism: 'mouse' | ||
release: 'M17' | ||
assembly: 'GRCm38' | ||
file: | ||
- basic_annotation | ||
target: 'path/to/target_regions.bed' | ||
|
||
# | ||
# The directory section | ||
# | ||
# ref - directory name for reference files | ||
# fastq - directory name for fastq files | ||
# bam - directory name for STAR alignments | ||
# cnv - directory name for Copy Number Variation analysis | ||
# snv - directory name for Sequence Nucleotide Variation analysis | ||
# tables - directory name for aggregated tables | ||
# log - directory name for program logs | ||
# R - directory name for Rdata objects | ||
# | ||
dirs: | ||
ref: 'ref' | ||
fastq: 'fastq' | ||
bam: 'bam' | ||
cnv: 'cnv' | ||
snv: 'snv' | ||
tables: 'tables' | ||
log: 'log' | ||
R: 'rds' | ||
|
||
# | ||
# The debug section | ||
# | ||
debug: False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
name: target-dnaseq | ||
|
||
channels: | ||
- bioconda | ||
- conda-forge | ||
|
||
dependencies: | ||
- python=3.5 | ||
- snakemake | ||
- trimmomatic | ||
- star | ||
- samtools | ||
- biobambam | ||
- qualimap | ||
- varscan | ||
- vcfanno | ||
- vcflib | ||
- control-freec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# vim: syntax=python tabstop=4 expandtab | ||
# coding: utf-8 | ||
|
||
''' | ||
@author: jpreuss | ||
Provides rules for reference related modifications | ||
''' | ||
|
||
from snakemake.remote.FTP import RemoteProvider as FTPRemoteProvider | ||
FTP = FTPRemoteProvider() | ||
|
||
rule genome_download: | ||
input: | ||
FTP.remote(expand(config['reference']['genome_URL'], **config['reference']), keep_local = True) | ||
output: | ||
GENOME | ||
threads: 1 | ||
message: | ||
'Downloading gencode genome reference.' | ||
shell: | ||
""" | ||
zcat -f {input} > {output} | ||
""" | ||
|
||
rule annotation_download: | ||
input: | ||
FTP.remote(expand(config['reference']['annotation_URL'], **config['reference']), keep_local = True) | ||
output: | ||
ANNOTATION | ||
threads: 1 | ||
message: | ||
'Downloading gencode annotation reference.' | ||
shell: | ||
""" | ||
zcat -f {input} > {output} | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import pandas as pd | ||
from os.path import join, basename, dirname | ||
|
||
if workflow.overwrite_configfile != None: | ||
configfile: str(workflow.overwrite_configfile) | ||
else: | ||
configfile: 'config/default.yml' | ||
|
||
#-------------------------------------------------------------------------------# | ||
#-------------- Handle reference related paths and file names ------------------# | ||
#-------------------------------------------------------------------------------# | ||
|
||
GENOME = join(config['dirs']['ref'], config['reference']['organism'], config['reference']['assembly'] + '.fa.gz') | ||
ANNOTATION = join(config['dirs']['ref'], config['reference']['organism'], config['reference']['release'] + '.gtf.gz') | ||
|
||
#-------------------------------------------------------------------------------# | ||
#------------- Read samplesheet and provide necessary variables ----------------# | ||
#-------------------------------------------------------------------------------# | ||
|
||
samplesheet = pd.read_table(config['samplesheet']['file'], sep = '\t', index_col = config['samplesheet']['index']) | ||
|
||
if 'URL_r1' not in list(samplesheet): | ||
samplesheet['URL_r1'] = ['{dir}/{sample}.{format}'.format(dir = config['dirs']['fastq'], sample = sample, format = 'fastq.gz') for sample in list(samplesheet.index)] | ||
|
||
SAMPLES = samplesheet.to_dict(orient = 'index') | ||
SAMPLE_NAMES = sorted(SAMPLES.keys()) | ||
|
||
#-------------------------------------------------------------------------------# | ||
#--------------------------- Generate output files -----------------------------# | ||
#-------------------------------------------------------------------------------# | ||
|
||
output_files = [ | ||
GENOME, | ||
ANNOTATION | ||
] | ||
|
||
#-------------------------------------------------------------------------------# | ||
#---------------------------------- RUN :-) ------------------------------------# | ||
#-------------------------------------------------------------------------------# | ||
|
||
include: "src/reference.snake" | ||
|
||
if config["debug"]: | ||
print_debug() | ||
|
||
rule all: | ||
input: | ||
output_files | ||
message: "Done." |