Skip to content
Permalink
9a7d6419a0
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
237 lines (234 sloc) 8.83 KB
<?xml version="1.0"?>
<?xml-stylesheet type="text/css" href="http://deep.mpi-inf.mpg.de/DAC/files/style/deep_process_style.css"?>
<process>
<name>BAL</name>
<version>1</version>
<author>
<name>Charles Imbusch</name>
<email>c.imbusch@dkfz.de</email>
</author>
<!-- Precise description of what this process does, what output is generated and what statistics are computed -->
<description>
* trimming, in silico conversion of reads, mapping, re-converting reads, flagstats, QC after mapping
</description>
<!-- Following section: list input files [samples to be analysed and similar] -->
<inputs>
<filetype>
<identifier>SampleID_R1</identifier>
<format>FASTQ</format>
<quantity>single</quantity>
<comment>raw input file, pre-filtered for Illumina chastity filter failed reads</comment>
</filetype>
<filetype>
<identifier>SampleID_R2</identifier>
<format>FASTQ</format>
<quantity>single</quantity>
<comment>raw input file, pre-filtered for Illumina chastity filter failed reads</comment>
</filetype>
</inputs>
<!-- Following section: list reference files [e.g. reference genomes] used in this process -->
<references>
<filetype>
<identifier>reference_genome</identifier>
<format>FASTA</format>
<quantity>single</quantity>
<comment>The in silico bisulfite converted reference genome file</comment>
</filetype>
<filetype>
<identifier>reference_genome.pos</identifier>
<format>text</format>
<quantity>single</quantity>
<comment>CG/GH positions in the reference genome created by 'methylCtools fapos'</comment>
</filetype>
</references>
<!-- Following section: list output files of process [e.g. bed files, wiggle tracks] -->
<outputs>
<filetype>
<identifier>DEEPID.PROC.DATE.bam</identifier>
<format>BAM</format>
<quantity>single</quantity>
<comment>Final Bam file with reconverted Cs</comment>
</filetype>
<filetype>
<identifier>DEEPID.PROC.DATE.bai</identifier>
<format>BAI</format>
<quantity>single</quantity>
<comment>Corresponding BAM index file</comment>
</filetype>
<filetype>
<identifier>DEEPID.PROC.DATE.R1.aln</identifier>
<format>SAI</format>
<quantity>single</quantity>
<comment>aligned reads, output from bwa</comment>
</filetype>
<filetype>
<identifier>DEEPID.PROC.DATE.R2.aln</identifier>
<format>SAI</format>
<quantity>single</quantity>
<comment>aligned reads, output from bwa</comment>
</filetype>
<filetype>
<identifier>DEEPID.PROC.DATE.R1.trim</identifier>
<format>FASTQ</format>
<quantity>single</quantity>
<comment>Adaptor trimmed fastq file</comment>
</filetype>
<filetype>
<identifier>DEEPID.PROC.DATE.R2.trim</identifier>
<format>FASTQ</format>
<quantity>single</quantity>
<comment>Adaptor trimmed fastq file</comment>
</filetype>
<filetype>
<identifier>DEEPID.PROC.DATE.R1.conv</identifier>
<format>FASTQ</format>
<quantity>single</quantity>
<comment>In silico converted fastq file</comment>
</filetype>
<filetype>
<identifier>DEEPID.PROC.DATE.R2.conv</identifier>
<format>FASTQ</format>
<quantity>single</quantity>
<comment>In silico converted fastq file</comment>
</filetype>
<filetype>
<identifier>DEEPID.PROC.DATE.flagstats</identifier>
<format>text</format>
<quantity>single</quantity>
<comment>samtools flagstat</comment>
</filetype>
<filetype>
<identifier>DEEPID.PROC.DATE.PicardMarkDupmetrics</identifier>
<format>text</format>
<quantity>single</quantity>
<comment></comment>
</filetype>
<filetype>
<identifier>DEEPID.PROC.DATE.PicardInsertSizemetrics</identifier>
<format>text</format>
<quantity>single</quantity>
<comment></comment>
</filetype>
<filetype>
<identifier>DEEPID.PROC.DATE.CHROM_CG_CH.mcall</identifier>
<format>methylation calls</format>
<quantity>collection</quantity>
<comment>The methylation calls separated per chromosome</comment>
</filetype>
<filetype>
<identifier>DEEPID.PROC.DATE.CHROM_CG_CH.mcall.tbi</identifier>
<format>tabix index</format>
<quantity>collection</quantity>
<comment>The tabix index for the methylation calls of each chromosome</comment>
</filetype>
<filetype>
<identifier>DEEPID.PROC.DATE.CHROM_CG_CH.mcall.metrics</identifier>
<format>text file</format>
<quantity>collection</quantity>
<comment>Metrics for methylation calls of each chromosome</comment>
</filetype>
<filetype>
<identifier>DEEPID.PROC.DATE.reconversion.metrics</identifier>
<format>text</format>
<quantity>single</quantity>
<comment>Metrics on re-conversion</comment>
</filetype>
</outputs>
<software>
<tool>
<name>seqprep</name>
<version>0.4</version>
<command_line><![CDATA[ SeqPrep AGATCGGAAGAGCGGTTCAG -f {SampleID_R1} -r {SampleID_R2} -1 {DEEPID.PROC.DATE.R1.trim} -2 {DEEPID.PROC.DATE.R2.trim} ]]></command_line>
<loop>SampleID_R*</loop>
<comment>trim reads by default adaptor</comment>
</tool>
<tool>
<name>methylCtools</name>
<version>0.9.2</version>
<command_line><![CDATA[ methylCtools fqconv {DEEPID.PROC.DATE.R*.trim} {DEEPID.PROC.DATE.R*.conv} ]]></command_line>
<loop>DEEPID.PROC.DATE.R*.trim</loop>
<comment>bisulfite convert reads in silico</comment>
</tool>
<tool>
<name>bwa</name>
<version>cnybwa-0.6.2</version>
<command_line><![CDATA[ cnybwa-0.6.2 aln -q 20 -t 8 -I {reference_genome} {DEEPID.PROC.DATE.R*.conv} > {DEEPID.PROC.DATE.R*.aln} ]]></command_line>
<loop>DEEPID.PROC.DATE.R*.conv</loop>
<comment>produce two intermediate .sai files per lane, performed on convey machines</comment>
</tool>
<tool>
<name>bwa</name>
<version>0.6.2-tpx</version>
<command_line>
<![CDATA[
bwa sampe -t 8 -T -s -P -n 0 -N 0 -r "@RG\tID:${ID}\tSM:${SM}\tLB:${LB}\tPL:ILLUMINA"
{reference_genome} {DEEPID.PROC.DATE.R1.aln} {DEEPID.PROC.DATE.R2.aln} {DEEPID.PROC.DATE.R1.conv}
{DEEPID.PROC.DATE.R2.conv} > sampe_output
]]>
</command_line>
<loop>DEEPID.PROC.DATE.R*.aln</loop>
<comment>pairing of reads to SAM format; output is piped to next step</comment>
</tool>
<tool>
<name>methylCtools</name>
<version>0.9.2</version>
<command_line><![CDATA[ methylCtools bconv --metrics {DEEPID.PROC.DATE.reconversion.metrics} sampe_output methylCtools_reconverted.bam ]]></command_line>
<loop>sampe_output</loop>
<comment>Input piped from previous step; reconversion step of methylCtools</comment>
</tool>
<tool>
<name>samtools</name>
<version>0.1.19</version>
<command_line><![CDATA[ samtools view -Sbu methylCtools_reconverted.bam | samtools sort -o - sorted_lane_bamfile ]]></command_line>
<loop>no looping</loop>
<comment>Sorting BAM by coordinate</comment>
</tool>
<tool>
<name>Picard</name>
<version>1.61</version>
<command_line>
<![CDATA[
java -Xmx50G picard-1.61.jar MarkDuplicates I=sorted_lane_bamfile OUTPUT={DEEPID.PROC.DATE.bam}
TMP_DIR={TMP_DIR} VALIDATION_STRINGENCY=SILENT REMOVE_DUPLICATES=FALSE ASSUME_SORTED=TRUE CREATE_INDEX=TRUE
MAX_RECORDS_IN_RAM=12500000 METRICS_FILE={DEEPID.PROC.DATE.PicardMarkDupmetrics}
]]>
</command_line>
<loop>no looping</loop>
<comment>Merging lanes, marking duplicates and index creation. The Picard commandline gets I=bamfile for each bam file as input which is simplified above in the command line.</comment>
</tool>
<tool>
<name>samtools</name>
<version>0.1.19</version>
<command_line><![CDATA[ samtools flagstat {DEEPID.PROC.DATE.bam} > {DEEPID.PROC.DATE.flagstats} ]]></command_line>
<loop>no looping</loop>
<comment></comment>
</tool>
<tool>
<name>Picard</name>
<version>1.61</version>
<command_line>
<![CDATA[
java -Xmx4G picard-1.61.jar CollectMultipleMetrics INPUT={DEEPID.PROC.DATE.bam}
REFERENCE_SEQUENCE={reference_genome} ASSUME_SORTED=true VALIDATION_STRINGENCY=SILENT
OUTPUT={DEEPID.PROC.DATE.PicardInsertSizemetrics} PROGRAM=CollectAlignmentSummaryMetrics
PROGRAM=CollectInsertSizeMetrics PROGRAM=QualityScoreDistribution PROGRAM=MeanQualityByCycle
]]>
</command_line>
<loop>no looping</loop>
<comment>creates several output files</comment>
</tool>
<tool>
<name>methylCtools</name>
<version>0.9.2</version>
<command_line>
<![CDATA[
python bcall.py -r {CHROM} --trimPE --snv --zero -e 5 {reference_genome.pos}
-m {DEEPID.PROC.DATE.CHROM_CG_CH.mcall.metrics} {DEEPID.PROC.DATE.bam}
{DEEPID.PROC.DATE.CHROM_CG_CH.mcall}
]]>
</command_line>
<loop>CHROM</loop>
<comment>Creates methylation calls for each chromosome</comment>
</tool>
</software>
</process>