docs/misc/THBv1.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/css" href="http://deep.mpi-inf.mpg.de/DAC/files/style/deep_process_style.css"?>
<process>
    <name>THB</name>
	<version>1</version>
	<author>
		<name>Peter Ebert</name>
		<email>pebert@mpi-inf.mpg.de</email>
	</author>
	<description>
		The trackhub_conv.py Python3 script adds the 'chr' prefix to the chromosome names and filters
		for the chromosomes 1-22 / 1-19 and X,Y for reasons of compatibility of genomic coordinates between assemblies.
		Note that the script just reads the folder contents and converts every file in the folder that appears
		to be output of a DEEP process and to be a peak or bigwig file (based on file naming).
		The converted files are put in the same folder.
		Important: MACS2 outputs narrowPeak/broadPeak files that are not fully compliant to ENCODE standards,
		the score column (index 5) has to be between 0-1000, so the conversion script rescales these values.
		Please note that the peak name still refers to the original (unconverted) file.
		Approximately 1 out 10 files is chosen at random and checked for consistency by reversing the conversion
		(except for scaling of the score column in case of peak files) and computing the MD5 checksum,
		which is then compared to the MD5 checksum of the original file after filtering
		for the appropriate chromosomes as explained above.
	</description>
	<inputs>
		<filetype>
			<identifier>CHP_peaks</identifier>
			<format>narrowPeak</format>
			<quantity>collection</quantity>
			<comment>Standard output of MACS2 in ENCODE narrowPeak format</comment>
		</filetype>
		<filetype>
			<identifier>CHP_peaks</identifier>
			<format>broadPeak</format>
			<quantity>collection</quantity>
			<comment>Standard output of MACS2 in ENCODE broadPeak format</comment>
		</filetype>
		<filetype>
			<identifier>DEEP_bigwig</identifier>
			<format>bigwig</format>
			<quantity>collection</quantity>
			<comment>Any bigwig output of a standardized DEEP process</comment>
		</filetype>
	</inputs>
	<references>
		<filetype>
			<identifier>chrom_sizes</identifier>
			<format>table</format>
			<quantity>single</quantity>
			<comment>File holding information on chromosome sizes for UCSC assembly (i.e. hg19, mm10)</comment>
		</filetype>
		<filetype>
			<identifier>field_names</identifier>
			<format>AutoSQL</format>
			<quantity>collection</quantity>
			<comment>Field_names is a folder containing files in AutoSQL format necessary for conversion of narrowPeak and broadPeak format into bigbed</comment>
		</filetype>
	</references>
	<outputs>
		<filetype>
			<identifier>THB_peaks</identifier>
			<format>bigbed</format>
			<quantity>collection</quantity>
			<comment>Converted peak files</comment>
		</filetype>
		<filetype>
			<identifier>THB_bigwig</identifier>
			<format>bigwig</format>
			<quantity>collection</quantity>
			<comment>Converted bigwig files</comment>
		</filetype>
	</outputs>
	<software>
		<tool>
			<name>trackhub_conv.py</name>
			<version>0.1</version>
			<command_line><![CDATA[ trackhub_conv.py --folder $PWD --process THBv1 --chrom-table {chrom-sizes} --field-names {field-names} ]]></command_line>
			<loop>CHP_peaks, DEEP_bigwig</loop>
			<comment>Simple Python3 script to handle the batch conversion of files</comment>
		</tool>
	</software>
</process>