Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
comp-metadata/docs/alignment/bisulfite/BALv1.xml
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
237 lines (234 sloc)
8.83 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0"?> | |
<?xml-stylesheet type="text/css" href="http://deep.mpi-inf.mpg.de/DAC/files/style/deep_process_style.css"?> | |
<process> | |
<name>BAL</name> | |
<version>1</version> | |
<author> | |
<name>Charles Imbusch</name> | |
<email>c.imbusch@dkfz.de</email> | |
</author> | |
<!-- Precise description of what this process does, what output is generated and what statistics are computed --> | |
<description> | |
* trimming, in silico conversion of reads, mapping, re-converting reads, flagstats, QC after mapping | |
</description> | |
<!-- Following section: list input files [samples to be analysed and similar] --> | |
<inputs> | |
<filetype> | |
<identifier>SampleID_R1</identifier> | |
<format>FASTQ</format> | |
<quantity>single</quantity> | |
<comment>raw input file, pre-filtered for Illumina chastity filter failed reads</comment> | |
</filetype> | |
<filetype> | |
<identifier>SampleID_R2</identifier> | |
<format>FASTQ</format> | |
<quantity>single</quantity> | |
<comment>raw input file, pre-filtered for Illumina chastity filter failed reads</comment> | |
</filetype> | |
</inputs> | |
<!-- Following section: list reference files [e.g. reference genomes] used in this process --> | |
<references> | |
<filetype> | |
<identifier>reference_genome</identifier> | |
<format>FASTA</format> | |
<quantity>single</quantity> | |
<comment>The in silico bisulfite converted reference genome file</comment> | |
</filetype> | |
<filetype> | |
<identifier>reference_genome.pos</identifier> | |
<format>text</format> | |
<quantity>single</quantity> | |
<comment>CG/GH positions in the reference genome created by 'methylCtools fapos'</comment> | |
</filetype> | |
</references> | |
<!-- Following section: list output files of process [e.g. bed files, wiggle tracks] --> | |
<outputs> | |
<filetype> | |
<identifier>DEEPID.PROC.DATE.bam</identifier> | |
<format>BAM</format> | |
<quantity>single</quantity> | |
<comment>Final Bam file with reconverted Cs</comment> | |
</filetype> | |
<filetype> | |
<identifier>DEEPID.PROC.DATE.bai</identifier> | |
<format>BAI</format> | |
<quantity>single</quantity> | |
<comment>Corresponding BAM index file</comment> | |
</filetype> | |
<filetype> | |
<identifier>DEEPID.PROC.DATE.R1.aln</identifier> | |
<format>SAI</format> | |
<quantity>single</quantity> | |
<comment>aligned reads, output from bwa</comment> | |
</filetype> | |
<filetype> | |
<identifier>DEEPID.PROC.DATE.R2.aln</identifier> | |
<format>SAI</format> | |
<quantity>single</quantity> | |
<comment>aligned reads, output from bwa</comment> | |
</filetype> | |
<filetype> | |
<identifier>DEEPID.PROC.DATE.R1.trim</identifier> | |
<format>FASTQ</format> | |
<quantity>single</quantity> | |
<comment>Adaptor trimmed fastq file</comment> | |
</filetype> | |
<filetype> | |
<identifier>DEEPID.PROC.DATE.R2.trim</identifier> | |
<format>FASTQ</format> | |
<quantity>single</quantity> | |
<comment>Adaptor trimmed fastq file</comment> | |
</filetype> | |
<filetype> | |
<identifier>DEEPID.PROC.DATE.R1.conv</identifier> | |
<format>FASTQ</format> | |
<quantity>single</quantity> | |
<comment>In silico converted fastq file</comment> | |
</filetype> | |
<filetype> | |
<identifier>DEEPID.PROC.DATE.R2.conv</identifier> | |
<format>FASTQ</format> | |
<quantity>single</quantity> | |
<comment>In silico converted fastq file</comment> | |
</filetype> | |
<filetype> | |
<identifier>DEEPID.PROC.DATE.flagstats</identifier> | |
<format>text</format> | |
<quantity>single</quantity> | |
<comment>samtools flagstat</comment> | |
</filetype> | |
<filetype> | |
<identifier>DEEPID.PROC.DATE.PicardMarkDupmetrics</identifier> | |
<format>text</format> | |
<quantity>single</quantity> | |
<comment></comment> | |
</filetype> | |
<filetype> | |
<identifier>DEEPID.PROC.DATE.PicardInsertSizemetrics</identifier> | |
<format>text</format> | |
<quantity>single</quantity> | |
<comment></comment> | |
</filetype> | |
<filetype> | |
<identifier>DEEPID.PROC.DATE.CHROM_CG_CH.mcall</identifier> | |
<format>methylation calls</format> | |
<quantity>collection</quantity> | |
<comment>The methylation calls separated per chromosome</comment> | |
</filetype> | |
<filetype> | |
<identifier>DEEPID.PROC.DATE.CHROM_CG_CH.mcall.tbi</identifier> | |
<format>tabix index</format> | |
<quantity>collection</quantity> | |
<comment>The tabix index for the methylation calls of each chromosome</comment> | |
</filetype> | |
<filetype> | |
<identifier>DEEPID.PROC.DATE.CHROM_CG_CH.mcall.metrics</identifier> | |
<format>text file</format> | |
<quantity>collection</quantity> | |
<comment>Metrics for methylation calls of each chromosome</comment> | |
</filetype> | |
<filetype> | |
<identifier>DEEPID.PROC.DATE.reconversion.metrics</identifier> | |
<format>text</format> | |
<quantity>single</quantity> | |
<comment>Metrics on re-conversion</comment> | |
</filetype> | |
</outputs> | |
<software> | |
<tool> | |
<name>seqprep</name> | |
<version>0.4</version> | |
<command_line><![CDATA[ SeqPrep AGATCGGAAGAGCGGTTCAG -f {SampleID_R1} -r {SampleID_R2} -1 {DEEPID.PROC.DATE.R1.trim} -2 {DEEPID.PROC.DATE.R2.trim} ]]></command_line> | |
<loop>SampleID_R*</loop> | |
<comment>trim reads by default adaptor</comment> | |
</tool> | |
<tool> | |
<name>methylCtools</name> | |
<version>0.9.2</version> | |
<command_line><![CDATA[ methylCtools fqconv {DEEPID.PROC.DATE.R*.trim} {DEEPID.PROC.DATE.R*.conv} ]]></command_line> | |
<loop>DEEPID.PROC.DATE.R*.trim</loop> | |
<comment>bisulfite convert reads in silico</comment> | |
</tool> | |
<tool> | |
<name>bwa</name> | |
<version>cnybwa-0.6.2</version> | |
<command_line><![CDATA[ cnybwa-0.6.2 aln -q 20 -t 8 -I {reference_genome} {DEEPID.PROC.DATE.R*.conv} > {DEEPID.PROC.DATE.R*.aln} ]]></command_line> | |
<loop>DEEPID.PROC.DATE.R*.conv</loop> | |
<comment>produce two intermediate .sai files per lane, performed on convey machines</comment> | |
</tool> | |
<tool> | |
<name>bwa</name> | |
<version>0.6.2-tpx</version> | |
<command_line> | |
<![CDATA[ | |
bwa sampe -t 8 -T -s -P -n 0 -N 0 -r "@RG\tID:${ID}\tSM:${SM}\tLB:${LB}\tPL:ILLUMINA" | |
{reference_genome} {DEEPID.PROC.DATE.R1.aln} {DEEPID.PROC.DATE.R2.aln} {DEEPID.PROC.DATE.R1.conv} | |
{DEEPID.PROC.DATE.R2.conv} > sampe_output | |
]]> | |
</command_line> | |
<loop>DEEPID.PROC.DATE.R*.aln</loop> | |
<comment>pairing of reads to SAM format; output is piped to next step</comment> | |
</tool> | |
<tool> | |
<name>methylCtools</name> | |
<version>0.9.2</version> | |
<command_line><![CDATA[ methylCtools bconv --metrics {DEEPID.PROC.DATE.reconversion.metrics} sampe_output methylCtools_reconverted.bam ]]></command_line> | |
<loop>sampe_output</loop> | |
<comment>Input piped from previous step; reconversion step of methylCtools</comment> | |
</tool> | |
<tool> | |
<name>samtools</name> | |
<version>0.1.19</version> | |
<command_line><![CDATA[ samtools view -Sbu methylCtools_reconverted.bam | samtools sort -o - sorted_lane_bamfile ]]></command_line> | |
<loop>no looping</loop> | |
<comment>Sorting BAM by coordinate</comment> | |
</tool> | |
<tool> | |
<name>Picard</name> | |
<version>1.61</version> | |
<command_line> | |
<![CDATA[ | |
java -Xmx50G picard-1.61.jar MarkDuplicates I=sorted_lane_bamfile OUTPUT={DEEPID.PROC.DATE.bam} | |
TMP_DIR={TMP_DIR} VALIDATION_STRINGENCY=SILENT REMOVE_DUPLICATES=FALSE ASSUME_SORTED=TRUE CREATE_INDEX=TRUE | |
MAX_RECORDS_IN_RAM=12500000 METRICS_FILE={DEEPID.PROC.DATE.PicardMarkDupmetrics} | |
]]> | |
</command_line> | |
<loop>no looping</loop> | |
<comment>Merging lanes, marking duplicates and index creation. The Picard commandline gets I=bamfile for each bam file as input which is simplified above in the command line.</comment> | |
</tool> | |
<tool> | |
<name>samtools</name> | |
<version>0.1.19</version> | |
<command_line><![CDATA[ samtools flagstat {DEEPID.PROC.DATE.bam} > {DEEPID.PROC.DATE.flagstats} ]]></command_line> | |
<loop>no looping</loop> | |
<comment></comment> | |
</tool> | |
<tool> | |
<name>Picard</name> | |
<version>1.61</version> | |
<command_line> | |
<![CDATA[ | |
java -Xmx4G picard-1.61.jar CollectMultipleMetrics INPUT={DEEPID.PROC.DATE.bam} | |
REFERENCE_SEQUENCE={reference_genome} ASSUME_SORTED=true VALIDATION_STRINGENCY=SILENT | |
OUTPUT={DEEPID.PROC.DATE.PicardInsertSizemetrics} PROGRAM=CollectAlignmentSummaryMetrics | |
PROGRAM=CollectInsertSizeMetrics PROGRAM=QualityScoreDistribution PROGRAM=MeanQualityByCycle | |
]]> | |
</command_line> | |
<loop>no looping</loop> | |
<comment>creates several output files</comment> | |
</tool> | |
<tool> | |
<name>methylCtools</name> | |
<version>0.9.2</version> | |
<command_line> | |
<![CDATA[ | |
python bcall.py -r {CHROM} --trimPE --snv --zero -e 5 {reference_genome.pos} | |
-m {DEEPID.PROC.DATE.CHROM_CG_CH.mcall.metrics} {DEEPID.PROC.DATE.bam} | |
{DEEPID.PROC.DATE.CHROM_CG_CH.mcall} | |
]]> | |
</command_line> | |
<loop>CHROM</loop> | |
<comment>Creates methylation calls for each chromosome</comment> | |
</tool> | |
</software> | |
</process> |