supp_file_5_CHPv2.xml

<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type="text/css" href="supp_file_3_style.css"?>
<process>
	<name>CHP</name>
	<version>2</version>
	<author>
		<name>Andreas Richter, Peter Ebert</name>
		<email>arichter@ie-freiburg.mpg.de, pebert@mpi-inf.mpg.de</email>
	</author>
	<description>
		Process CHPv2 has been created to correct a couple of mistakes in the v1 process description and - more importantly - since new software versions have been installed on the DEEP cluster at DAC/MPI-Inf. This process takes as input aligned reads coming from the DCC/DKFZ and creates individual and comparative signal tracks as well as peak files for the different histone marks. Note that before the correlation among all files is computed, a couple of known problematic regions are removed that usually show a spurious read distribution that would subsequently lead to an inaccurate correlation among the files. The last step of this process plots the coverage of the histone signal (and, if available, of the input control) in a few selected control regions (for details, contact Andreas Richter). Note that these plots are by no means suited to interpret the data or judge the quality of the entire dataset - the plot of the control regions just shows regions with expected high or low signal compared to the input; the scaling of the values is performed for layout reasons and independent for each region, i.e. plots of different regions cannot be compared directly.
	</description>
	<inputs>
		<filetype>
			<identifier>GALvX_histone</identifier>
			<format>BAM</format>
			<quantity>collection</quantity>
			<comment></comment>
		</filetype>
		<filetype>
			<identifier>GALvX_input</identifier>
			<format>BAM</format>
			<quantity>single</quantity>
			<comment></comment>
		</filetype>
		<filetype>
			<identifier>GALvX_index</identifier>
			<format>BAM index</format>
			<quantity>collection</quantity>
			<comment>Index files are renamed internally to .bam.bai since deepTools is expecting index naming like this</comment>
		</filetype>
	</inputs>
	<references>
		<filetype>
			<identifier>filtered_regions</identifier>
			<format>BED</format>
			<quantity>single</quantity>
			<comment>ENCODE blacklist extended by A. Richter (FB); see DCC/download/results/references/annotations</comment>
		</filetype>
		<filetype>
			<identifier>reference_genome</identifier>
			<format>2bit</format>
			<quantity>single</quantity>
			<comment>The reference genome file; see DCC/download/results/references/genomes</comment>
		</filetype>
		<filetype>
			<identifier>plot_regions</identifier>
			<format>BED</format>
			<quantity>single</quantity>
			<comment>Control regions obtained from A. Richter (FB) for quality control of ChIPseq samples; see DCC/download/results/references/annotations</comment>
		</filetype>
	</references>
	<outputs>
		<filetype>
			<identifier>samplesID.PROCESS.DATE.corplot.cormethod</identifier>
			<format>deepTools graphics PNG</format>
			<quantity>single</quantity>
			<comment></comment>
		</filetype>
		<filetype>
			<identifier>samplesID.PROCESS.DATE.fgprplot</identifier>
			<format>deepTools graphics PNG</format>
			<quantity>single</quantity>
			<comment></comment>
		</filetype>
		<filetype>
			<identifier>sampleID.PROCESS.DATE.gcbplot</identifier>
			<format>deepTools graphics PNG</format>
			<quantity>collection</quantity>
			<comment></comment>
		</filetype>
		<filetype>
			<identifier>sampleID.PROCESS.DATE.gcbfreq</identifier>
			<format>tab-separated text file</format>
			<quantity>collection</quantity>
			<comment></comment>
		</filetype>
		<filetype>
			<identifier>sampleID.PROCESS.DATE._peaks.xls</identifier>
			<format>XLS table</format>
			<quantity>collection</quantity>
			<comment>Standard MACS2 output XLS table for broad and narrow marks</comment>
		</filetype>
		<filetype>
			<identifier>sampleID.PROCESS.DATE._peaks.broadPeak</identifier>
			<format>broadPeak</format>
			<quantity>collection</quantity>
			<comment>Standard MACS2 output in ENCODE's broadPeak format for broad marks, this file is usually used for subsequent analyses</comment>
		</filetype>
		<filetype>
			<identifier>sampleID.PROCESS.DATE._peaks.gappedPeak</identifier>
			<format>gappedPeak</format>
			<quantity>collection</quantity>
			<comment>Standard MACS2 output in ENCODE's gappedPeak format for broad marks</comment>
		</filetype>
		<filetype>
			<identifier>sampleID.PROCESS.DATE._summits.bed</identifier>
			<format>BED</format>
			<quantity>collection</quantity>
			<comment>Standard MACS2 output for narrow marks</comment>
		</filetype>
		<filetype>
			<identifier>sampleID.PROCESS.DATE._peaks.narrowPeak</identifier>
			<format>narrowPeak</format>
			<quantity>collection</quantity>
			<comment>Standard MACS2 output for narrow marks, this file is usually used for subsequent analyses</comment>
		</filetype>
		<filetype>
			<identifier>sampleIDs.PROCESS.DATE.bamcomp.scalemethod</identifier>
			<format>bigwig</format>
			<quantity>collection</quantity>
			<comment>Input-normalized histone signal tracks</comment>
		</filetype>
		<filetype>
			<identifier>sampleID.PROCESS.DATE.bamcov.seqDepthNorm</identifier>
			<format>bigwig</format>
			<quantity>collection</quantity>
			<comment>Sequencing-depth normalized signal coverage tracks</comment>
		</filetype>
		<filetype>
			<identifier>sampleID.PROCESS.DATE.ctrlreg</identifier>
			<format>graphics PNG</format>
			<quantity>collection</quantity>
			<comment>A plot of a set of control regions for each histone mark (histone and input signal). Attention: this plot can only be used for a rough quality assessment (experiment fail or success), you cannot base any interpretation on this plot.</comment>
		</filetype>
	</outputs>
	<software>
		<tool>
			<name>region_filter.py</name>
			<version>0.1</version>
			<command_line><![CDATA[ region_filter.py --bamfile {GALvX_*} --regions {filtered_regions} --output BAM_filtered.tmp ]]></command_line>
			<loop>GALvX_histone, GALvX_input</loop>
			<comment>Script to generate a temporary BAM file with ENCODE blacklist regions excluded, only relevant for bamCorrelate tool. The filtered BAM files are discarded at the end of this process. One temporary file per histone mark plus the input is generated.</comment>
		</tool>
		<tool>
			<name>bamCorrelate (deepTools)</name>
			<version>1.5.8.1</version>
			<command_line><![CDATA[ bamCorrelate bins -p {numproc} --bamfiles BAM_filtered.tmp --plotFile {samplesID.PROCESS.DATE.corplot.cormethod} --corMethod pearson --labels {labels} --binSize 1000 --distanceBetweenBins 2000 --fragmentLength {all_median_fraglen} ]]></command_line>
			<loop></loop>
			<comment>Window/bin size of 1kb since multiple narrow signals will be merged with default value (10kb), 1m samples</comment>
		</tool>
		<tool>
			<name>bamFingerprint (deepTools)</name>
			<version>1.5.8.1</version>
			<command_line><![CDATA[ bamFingerprint -p {numproc} --bamfiles {GALvX_*} --plotFile {samplesID.PROCESS.DATE.fgprplot} --labels {labels} --fragmentLength {all_median_fraglen} --numberOfSamples 500000 ]]></command_line>
			<loop></loop>
			<comment></comment>
		</tool>
		<tool>
			<name>computeGCBias (deepTools)</name>
			<version>1.5.8.1</version>
			<command_line><![CDATA[ computeGCBias -p {numproc} --bamfile {GALvX_*} --effectiveGenomeSize {genomesize} --genome {reference_genome} --fragmentLength {*_median_fraglen} --sampleSize 50000000 --GCbiasFrequenciesFile {sampleID.PROCESS.DATE.gcbfreq} --biasPlot {sampleID.PROCESS.DATE.gcbplot} ]]></command_line>
			<loop>GALvX_histone, GALvX_input</loop>
			<comment></comment>
		</tool>
		<tool>
			<name>MACS2</name>
			<version>macs2 2.1.0.20140616</version>
			<command_line><![CDATA[ macs2 callpeak -t {GALvX_histone} -c {GALvX_input} -f BAM --gsize {genomesize} --keep-dup all --name {*_name_prefix} --nomodel --extsize {*_median_fraglen} --qvalue 0.05 {broad} ]]></command_line>
			<loop>GALvX_histone</loop>
			<comment>parameter "--broad" for samples H3K4me1/H3K27me3/H3K36me/H3K9me3; default q-value cutoff of 0.05 is recommended by the author at least for broad marks and approved by A. Richter for all marks</comment>
		</tool>
		<tool>
			<name>bamCompare (deepTools)</name>
			<version>1.5.8.1</version>
			<command_line><![CDATA[ bamCompare -p {numproc} --bamfile1 {GALvX_histone} --bamfile2 {GALvX_input} --outFileName {sampleIDs.PROCESS.DATE.bamcomp.scalemethod} --outFileFormat bigwig --scaleFactorsMethod {*_scaling_method} --ratio log2 --fragmentLength {*_median_fraglen} ]]></command_line>
			<loop>GALvX_histone</loop>
			<comment>scaling_method: "readCount" for samples H3K27me3/H3K9me3, "SES" else</comment>
		</tool>
		<tool>
			<name>bamCoverage (deepTools)</name>
			<version>1.5.8.1</version>
			<command_line><![CDATA[ bamCoverage -p {numproc} --bam {GALvX_*} --outFileName {sampleID.PROCESS.DATE.bamcov.seqDepthNorm} --outFileFormat bigwig --normalizeTo1x {genomesize} --fragmentLength {*_median_fraglen} ]]></command_line>
			<loop>GALvX_histone, GALvX_input</loop>
			<comment>report read coverage normalized to 1x sequencing depth</comment>
		</tool>
		<tool>
			<name>signal_plotter.py</name>
			<version>0.1</version>
			<command_line><![CDATA[ signal_plotter.py --signal {sampleID.PROCESS.DATE.bamcov.seqDepthNorm} --input {sampleID.PROCESS.DATE.bamcov.seqDepthNorm} --regions {plot_regions} --outfile {sampleID.PROCESS.DATE.ctrlreg} ]]></command_line>
			<loop>only for histone marks: sampleID.PROCESS.DATE.bamcov.seqDepthNorm</loop>
			<comment></comment>
		</tool>
	</software>
</process>
	<?xml version="1.0" encoding="utf-8"?>
	<?xml-stylesheet type="text/css" href="supp_file_3_style.css"?>
	<process>
	<name>CHP</name>
	<version>2</version>
	<author>
	<name>Andreas Richter, Peter Ebert</name>
	<email>arichter@ie-freiburg.mpg.de, pebert@mpi-inf.mpg.de</email>
	</author>
	<description>
	Process CHPv2 has been created to correct a couple of mistakes in the v1 process description and - more importantly - since new software versions have been installed on the DEEP cluster at DAC/MPI-Inf. This process takes as input aligned reads coming from the DCC/DKFZ and creates individual and comparative signal tracks as well as peak files for the different histone marks. Note that before the correlation among all files is computed, a couple of known problematic regions are removed that usually show a spurious read distribution that would subsequently lead to an inaccurate correlation among the files. The last step of this process plots the coverage of the histone signal (and, if available, of the input control) in a few selected control regions (for details, contact Andreas Richter). Note that these plots are by no means suited to interpret the data or judge the quality of the entire dataset - the plot of the control regions just shows regions with expected high or low signal compared to the input; the scaling of the values is performed for layout reasons and independent for each region, i.e. plots of different regions cannot be compared directly.
	</description>
	<inputs>
	<filetype>
	<identifier>GALvX_histone</identifier>
	<format>BAM</format>
	<quantity>collection</quantity>
	<comment></comment>
	</filetype>
	<filetype>
	<identifier>GALvX_input</identifier>
	<format>BAM</format>
	<quantity>single</quantity>
	<comment></comment>
	</filetype>
	<filetype>
	<identifier>GALvX_index</identifier>
	<format>BAM index</format>
	<quantity>collection</quantity>
	<comment>Index files are renamed internally to .bam.bai since deepTools is expecting index naming like this</comment>
	</filetype>
	</inputs>
	<references>
	<filetype>
	<identifier>filtered_regions</identifier>
	<format>BED</format>
	<quantity>single</quantity>
	<comment>ENCODE blacklist extended by A. Richter (FB); see DCC/download/results/references/annotations</comment>
	</filetype>
	<filetype>
	<identifier>reference_genome</identifier>
	<format>2bit</format>
	<quantity>single</quantity>
	<comment>The reference genome file; see DCC/download/results/references/genomes</comment>
	</filetype>
	<filetype>
	<identifier>plot_regions</identifier>
	<format>BED</format>
	<quantity>single</quantity>
	<comment>Control regions obtained from A. Richter (FB) for quality control of ChIPseq samples; see DCC/download/results/references/annotations</comment>
	</filetype>
	</references>
	<outputs>
	<filetype>
	<identifier>samplesID.PROCESS.DATE.corplot.cormethod</identifier>
	<format>deepTools graphics PNG</format>
	<quantity>single</quantity>
	<comment></comment>
	</filetype>
	<filetype>
	<identifier>samplesID.PROCESS.DATE.fgprplot</identifier>
	<format>deepTools graphics PNG</format>
	<quantity>single</quantity>
	<comment></comment>
	</filetype>
	<filetype>
	<identifier>sampleID.PROCESS.DATE.gcbplot</identifier>
	<format>deepTools graphics PNG</format>
	<quantity>collection</quantity>
	<comment></comment>
	</filetype>
	<filetype>
	<identifier>sampleID.PROCESS.DATE.gcbfreq</identifier>
	<format>tab-separated text file</format>
	<quantity>collection</quantity>
	<comment></comment>
	</filetype>
	<filetype>
	<identifier>sampleID.PROCESS.DATE._peaks.xls</identifier>
	<format>XLS table</format>
	<quantity>collection</quantity>
	<comment>Standard MACS2 output XLS table for broad and narrow marks</comment>
	</filetype>
	<filetype>
	<identifier>sampleID.PROCESS.DATE._peaks.broadPeak</identifier>
	<format>broadPeak</format>
	<quantity>collection</quantity>
	<comment>Standard MACS2 output in ENCODE's broadPeak format for broad marks, this file is usually used for subsequent analyses</comment>
	</filetype>
	<filetype>
	<identifier>sampleID.PROCESS.DATE._peaks.gappedPeak</identifier>
	<format>gappedPeak</format>
	<quantity>collection</quantity>
	<comment>Standard MACS2 output in ENCODE's gappedPeak format for broad marks</comment>
	</filetype>
	<filetype>
	<identifier>sampleID.PROCESS.DATE._summits.bed</identifier>
	<format>BED</format>
	<quantity>collection</quantity>
	<comment>Standard MACS2 output for narrow marks</comment>
	</filetype>
	<filetype>
	<identifier>sampleID.PROCESS.DATE._peaks.narrowPeak</identifier>
	<format>narrowPeak</format>
	<quantity>collection</quantity>
	<comment>Standard MACS2 output for narrow marks, this file is usually used for subsequent analyses</comment>
	</filetype>
	<filetype>
	<identifier>sampleIDs.PROCESS.DATE.bamcomp.scalemethod</identifier>
	<format>bigwig</format>
	<quantity>collection</quantity>
	<comment>Input-normalized histone signal tracks</comment>
	</filetype>
	<filetype>
	<identifier>sampleID.PROCESS.DATE.bamcov.seqDepthNorm</identifier>
	<format>bigwig</format>
	<quantity>collection</quantity>
	<comment>Sequencing-depth normalized signal coverage tracks</comment>
	</filetype>
	<filetype>
	<identifier>sampleID.PROCESS.DATE.ctrlreg</identifier>
	<format>graphics PNG</format>
	<quantity>collection</quantity>
	<comment>A plot of a set of control regions for each histone mark (histone and input signal). Attention: this plot can only be used for a rough quality assessment (experiment fail or success), you cannot base any interpretation on this plot.</comment>
	</filetype>
	</outputs>
	<software>
	<tool>
	<name>region_filter.py</name>
	<version>0.1</version>
	<command_line><![CDATA[ region_filter.py --bamfile {GALvX_*} --regions {filtered_regions} --output BAM_filtered.tmp ]]></command_line>
	<loop>GALvX_histone, GALvX_input</loop>
	<comment>Script to generate a temporary BAM file with ENCODE blacklist regions excluded, only relevant for bamCorrelate tool. The filtered BAM files are discarded at the end of this process. One temporary file per histone mark plus the input is generated.</comment>
	</tool>
	<tool>
	<name>bamCorrelate (deepTools)</name>
	<version>1.5.8.1</version>
	<command_line><![CDATA[ bamCorrelate bins -p {numproc} --bamfiles BAM_filtered.tmp --plotFile {samplesID.PROCESS.DATE.corplot.cormethod} --corMethod pearson --labels {labels} --binSize 1000 --distanceBetweenBins 2000 --fragmentLength {all_median_fraglen} ]]></command_line>
	<loop></loop>
	<comment>Window/bin size of 1kb since multiple narrow signals will be merged with default value (10kb), 1m samples</comment>
	</tool>
	<tool>
	<name>bamFingerprint (deepTools)</name>
	<version>1.5.8.1</version>
	<command_line><![CDATA[ bamFingerprint -p {numproc} --bamfiles {GALvX_*} --plotFile {samplesID.PROCESS.DATE.fgprplot} --labels {labels} --fragmentLength {all_median_fraglen} --numberOfSamples 500000 ]]></command_line>
	<loop></loop>
	<comment></comment>
	</tool>
	<tool>
	<name>computeGCBias (deepTools)</name>
	<version>1.5.8.1</version>
	<command_line><![CDATA[ computeGCBias -p {numproc} --bamfile {GALvX_} --effectiveGenomeSize {genomesize} --genome {reference_genome} --fragmentLength {_median_fraglen} --sampleSize 50000000 --GCbiasFrequenciesFile {sampleID.PROCESS.DATE.gcbfreq} --biasPlot {sampleID.PROCESS.DATE.gcbplot} ]]></command_line>
	<loop>GALvX_histone, GALvX_input</loop>
	<comment></comment>
	</tool>
	<tool>
	<name>MACS2</name>
	<version>macs2 2.1.0.20140616</version>
	<command_line><![CDATA[ macs2 callpeak -t {GALvX_histone} -c {GALvX_input} -f BAM --gsize {genomesize} --keep-dup all --name {_name_prefix} --nomodel --extsize {_median_fraglen} --qvalue 0.05 {broad} ]]></command_line>
	<loop>GALvX_histone</loop>
	<comment>parameter "--broad" for samples H3K4me1/H3K27me3/H3K36me/H3K9me3; default q-value cutoff of 0.05 is recommended by the author at least for broad marks and approved by A. Richter for all marks</comment>
	</tool>
	<tool>
	<name>bamCompare (deepTools)</name>
	<version>1.5.8.1</version>
	<command_line><![CDATA[ bamCompare -p {numproc} --bamfile1 {GALvX_histone} --bamfile2 {GALvX_input} --outFileName {sampleIDs.PROCESS.DATE.bamcomp.scalemethod} --outFileFormat bigwig --scaleFactorsMethod {_scaling_method} --ratio log2 --fragmentLength {_median_fraglen} ]]></command_line>
	<loop>GALvX_histone</loop>
	<comment>scaling_method: "readCount" for samples H3K27me3/H3K9me3, "SES" else</comment>
	</tool>
	<tool>
	<name>bamCoverage (deepTools)</name>
	<version>1.5.8.1</version>
	<command_line><![CDATA[ bamCoverage -p {numproc} --bam {GALvX_} --outFileName {sampleID.PROCESS.DATE.bamcov.seqDepthNorm} --outFileFormat bigwig --normalizeTo1x {genomesize} --fragmentLength {_median_fraglen} ]]></command_line>
	<loop>GALvX_histone, GALvX_input</loop>
	<comment>report read coverage normalized to 1x sequencing depth</comment>
	</tool>
	<tool>
	<name>signal_plotter.py</name>
	<version>0.1</version>
	<command_line><![CDATA[ signal_plotter.py --signal {sampleID.PROCESS.DATE.bamcov.seqDepthNorm} --input {sampleID.PROCESS.DATE.bamcov.seqDepthNorm} --regions {plot_regions} --outfile {sampleID.PROCESS.DATE.ctrlreg} ]]></command_line>
	<loop>only for histone marks: sampleID.PROCESS.DATE.bamcov.seqDepthNorm</loop>
	<comment></comment>
	</tool>
	</software>
	</process>