config/default.yml


#
# The action section
#
# clean - whether or not to skip the prinseq cleaning step (see below).
# snv - whether or not to perform SNV analysis.
# cnv - whether or not to perform CNV analysis.
# container - whether or not to create a RDS container.
#
action:
  clean: True
  snv: True
  cnv: True
  container: True

#
# The samplesheet section
#
# file - The path to the SampleSheet text file. Set to None if no samplesheet is available
# index - The name of the Column that should be used as index
# case - The name of the Column containing the case identifier to which the index belongs to
#
samplesheet:
  file: 'SampleSheet.txt'
  index: 'Sample'
  case: 'Case'

#
# The data section
#
# Contains named dictionaries with input sequencing files:
# r1 - The fastq file containing R1.
# r2 - The fastq file containing R2.
#
data:
  sample1:
    r1: ''
    r2: ''

#
# The reference section
#
# URL - The URL from where the reference genome can be downloaded. Wildcards are allowed in curly brackets
# organism - Wildcard for the reference organism
# release - Wildcard for the reference release
# file - Wildcard(s) for the reference files that should be merged into the final reference transcriptome
# target - The path to a bed file with coordinates of the target regions
#
reference:
  annotation_URL: 'ftp.ebi.ac.uk/pub/databases/gencode/Gencode_{organism}/release_{release}/gencode.v{release}.{file}.fa.gz'
  genome_URL: 'ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_{organism}/release_{release}/{assembly}.primary_assembly.genome.fa.gz'
  organism: 'mouse'
  release: 'M17'
  assembly: 'GRCm38'
  file:
    - basic_annotation
  target: 'path/to/target_regions.bed'

#
# The directory section
#
# ref - directory name for reference files
# fastq - directory name for fastq files
# bam - directory name for STAR alignments
# cnv - directory name for Copy Number Variation analysis
# snv - directory name for Sequence Nucleotide Variation analysis
# tables - directory name for aggregated tables
# log - directory name for program logs
# R - directory name for Rdata objects
#
dirs:
  ref: 'ref'
  fastq: 'fastq'
  bam: 'bam'
  cnv: 'cnv'
  snv: 'snv'
  tables: 'tables'
  log: 'log'
  R: 'rds'

#
# The debug section
#
debug: False