<?xml version="1.0"?>
<?xml-stylesheet type="text/css" href="http://deep.mpi-inf.mpg.de/DAC/files/style/deep_process_style.css"?>
<process>
    <name>CHP</name>
	<version>1</version>
	<author>
		<name>Andreas Richter, Peter Ebert</name>
		<email>arichter@ie-freiburg.mpg.de, pebert@mpi-inf.mpg.de</email>
	</author>
    <description>
		Process CHPv1 has been used to analyse the first set of DEEP pilot data.
		* {genomesize}: effective genome size (for deepTools and MACS2): for deeply sequenced samples and random mapping strategy approximated by (genome size) - (#N)
		    * human hs37d5 (1k genomes): 2.9e9
		    * mouse mm10: 2.65e9
		* {fragment_length}: individual median fragment length/insert size of each sample derived from the BAM file (based on alignment statistics from HD [see email from Barbara Hutter, 16 October 2013] or computed by PE_fragment_size)
		* use most recent deeptools version from github and most recent MACS2 version
	</description>
	<inputs>
		<filetype>
			<identifier>ALNvX_histone.bam</identifier>
			<format></format>
			<quantity>collection</quantity>
			<comment></comment>
		</filetype>
		<filetype>
			<identifier>ALNvX_input.bam</identifier>
			<format></format>
			<quantity>single</quantity>
			<comment></comment>
		</filetype>
		<filetype>
			<identifier>ALNvX.bai</identifier>
			<format></format>
			<quantity>collection</quantity>
			<comment>Index files are renamed internally to .bam.bai since deepTools is expecting index naming like this</comment>
		</filetype>
	</inputs>
	<references>
		<filetype>
			<identifier>filtered_regions</identifier>
			<format>BED</format>
			<quantity>single</quantity>
			<comment>ENCODE blacklist extended by A. Richter (FB); see DCC/download/results/references/annotations</comment>
		</filetype>
		<filetype>
			<identifier>reference_genome</identifier>
			<format>2bit</format>
			<quantity>single</quantity>
			<comment>The reference genome file; see DCC/download/results/references/genomes</comment>
		</filetype>
		<filetype>
			<identifier>plot_regions</identifier>
			<format>BED</format>
			<quantity>single</quantity>
			<comment>Control regions obtained from A. Richter (FB) for quality control of ChIPseq samples; see DCC/download/results/references/annotations</comment>
		</filetype>
	</references>
	<outputs>
		<filetype>
			<identifier>samplesID.PROCESS.DATE.corplot.cormethod</identifier>
			<format>deepTools graphics PNG</format>
			<quantity>single</quantity>
			<comment></comment>
		</filetype>
		<filetype>
			<identifier>samplesID.PROCESS.DATE.fgprplot</identifier>
			<format>deepTools graphics PNG</format>
			<quantity>single</quantity>
			<comment></comment>
		</filetype>
		<filetype>
			<identifier>sampleID.PROCESS.DATE.gcbplot</identifier>
			<format>deepTools graphics PNG</format>
			<quantity>collection</quantity>
			<comment></comment>
		</filetype>
		<filetype>
			<identifier>sampleID.PROCESS.DATE.gcbfreq</identifier>
			<format>tab-separated text file</format>
			<quantity>collection</quantity>
			<comment></comment>
		</filetype>
		<filetype>
			<identifier>sampleIDs.PROCESS.DATE.bamcomp.scalemethod</identifier>
			<format>bigwig</format>
			<quantity>collection</quantity>
			<comment>Always compare a signal vs the input</comment>
		</filetype>
		<filetype>
			<identifier>sampleID.PROCESS.DATE.bamcov.seqDepthNorm</identifier>
			<format>bigwig</format>
			<quantity>collection</quantity>
			<comment></comment>
		</filetype>
		<filetype>
			<identifier>sampleID.PROCESS.DATE.ctrlreg</identifier>
			<format>graphics PNG</format>
			<quantity>collection</quantity>
			<comment></comment>
		</filetype>
	</outputs>
	<software>
		<tool>
			<name>region_filter.py</name>
			<version>0.1</version>
			<command_line><![CDATA[ region_filter.py --bamfile {ALNvX_*.bam} --regions {filtered_regions} --output BAM_filtered.tmp ]]></command_line>
            <loop></loop>
			<comment>Script to generate a temporary BAM file with ENCODE blacklist regions excluded, only relevant for bamCorrelate tool. The filtered BAM file is discarded at the end of this process</comment>
		</tool>
		<tool>
			<name>bamCorrelate (deepTools)</name>
			<version>1.5.7-5-gcbab7b3</version>
			<command_line><![CDATA[ bamCorrelate bins -p {numproc} --bamfiles BAMs_filtered.tmp --plotFile {samplesID.PROCESS.DATE.corplot.cormethod} --corMethod pearson --labels {labels} --binSize 1000 --numberOfSamples 1000000 --fragmentLength {all_median_fraglen} ]]></command_line>
            <loop></loop>
			<comment>Window/bin size of 1kb since multiple narrow signals will be merged with default value (10kb), 1m samples</comment>
		</tool>
		<tool>
			<name>bamFingerprint (deepTools)</name>
			<version>1.5.7-5-gcbab7b3</version>
			<command_line><![CDATA[ bamFingerprint -p {numproc} --bamfiles {ALNvX_*.bam} --plotFile {samplesID.PROCESS.DATE.fgpplot} --labels {labels} --fragmentLength {all_median_fraglen} --numberOfSamples 500000 ]]></command_line>
            <loop></loop>
			<comment></comment>
		</tool>
		<tool>
			<name>computeGCBias (deepTools)</name>
			<version>1.5.7-5-gcbab7b3</version>
			<command_line><![CDATA[ computeGCBias -p {numproc} --bamfile {ALNvX_*.bam} --effectiveGenomeSize {genomesize} --genome {reference_genome} --fragmentLength {_median_fraglen} --sampleSize 50000000 --GCbiasFrequenciesFile {sampleID.PROCESS.DATE.gcbfreq} --biasPlot {sampleID.PROCESS.DATE.gcbplot} ]]></command_line>
            <loop></loop>
			<comment></comment>
		</tool>
		<tool>
			<name>MACS2</name>
			<version>2.0.10.20131216 (tag:beta)</version>
			<command_line><![CDATA[ macs2 callpeak -t {ALNvX_histone.bam} -c {ALNvX_input.bam} -f BAM --gsize {genomesize} --keep-dup all --name {_name_prefix} --nomodel --extsize {_median_fraglen} --qvalue 0.05 {broad} ]]></command_line>
            <loop></loop>
			<comment>parameter &quot;--broad&quot; for samples H3K4me1/H3K27me3/H3K36me/H3K9me3; default q-value cutoff of 0.05 is recommended by the author at least for broad marks and approved by A. Richter for all marks</comment>
		</tool>
		<tool>
			<name>bamCompare (deepTools)</name>
			<version>1.5.7-5-gcbab7b3</version>
			<command_line><![CDATA[ bamCompare -p {numproc} --bamfile1 {ALNvX_histone.bam} --bamfile2 {ALNvX_input.bam} --outFileName {sampleIDs.PROCESS.DATE.bamcomp.scalemethod} --outFileFormat bigwig --scaleFactorsMethod {_scaling_method} --ratio log2 --fragmentLength {_median_fraglen} ]]></command_line>
            <loop></loop>
			<comment>scaling_method: "readCount" for samples H3K27me3/H3K9me3, "SES" else</comment>
		</tool>
		<tool>
			<name>bamCoverage (deepTools)</name>
			<version>1.5.7-5-gcbab7b3</version>
			<command_line><![CDATA[ bamCoverage -p {numproc} --bam {bamfile} --outFileName {sampleID.PROCESS.DATE.bamcov.seqDepthNorm} --outFileFormat bigwig --normalizeTo1x {genomesize} --fragmentLength {_median_fraglen} ]]></command_line>
            <loop></loop>
			<comment>report read coverage normalized to 1x sequencing depth</comment>
		</tool>
		<tool>
			<name>signal_plotter.py</name>
			<version>0.1</version>
			<command_line><![CDATA[ signal_plotter.py --signal {sampleID_histone.PROCESS.DATE.bamcov.seqDepthNorm} --input {sampleID_input.PROCESS.DATE.bamcov.seqDepthNorm} --regions {plot_regions} --outfile {sampleID.PROCESS.DATE.ctrlreg} ]]></command_line>
            <loop></loop>
			<comment></comment>
		</tool>
	</software>
</process>