SALv1.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/css" href="http://deep.mpi-inf.mpg.de/DAC/files/style/deep_process_style.css"?>
<process>
    <name>SAL</name>
	<version>1</version>
	<author>
		<name>Filippos Klironomos</name>
		<email>filippos.klironomos@mdc-berlin.de</email>
	</author>
    <description>
    1) map short reads to genome and keep those uniquely mapped
    2) extract bracketing DNA of the uniquely mapped reads
    3) RNAfold extracted sequences and keep those that form unbifurcated hairpins
    4) score putative precursors:
       *) expect greater number of reads mapping to either the -5p or -3p strand and very little to the hairpin
       *) short 3&apos; duplex overhang characteristic of Drosha/Dicer processing adds to the score
       *) relative and absolute stabilities contribute to the score
       *) if 5&apos; end of mature sequence is identical to that of known mature sequence it adds to the score
       5) randomly permute read signatures with putative precursor sequences in order to determine the FPR
	</description>
	<inputs>
		<filetype>
      <identifier>config</identifier>
			<format>TSV</format>
      <quantity>single</quantity>
			<comment>this is the configuration file that miRDeep2 uses to locate the FASTQ library and assign a unique 3-letter ID to it</comment>
		</filetype>
	</inputs>
	<references>
		<filetype>
			<identifier>genome</identifier>
			<format>fasta</format>
			<quantity>single</quantity>
			<comment>UCSC genome</comment>
		</filetype>
		<filetype>
			<identifier>genome_index</identifier>
			<format>bowtie-index</format>
			<quantity>collection</quantity>
			<comment>bowtie index of UCSC hg19</comment>
		</filetype>
		<filetype>
			<identifier>miRBase_mature</identifier>
			<format>fasta</format>
			<quantity>single</quantity>
			<comment>mature miRBase sequences</comment>
		</filetype>
		<filetype>
			<identifier>miRBase_hairpin</identifier>
			<format>fasta</format>
			<quantity>single</quantity>
			<comment>precursor (hairpin) miRBase sequences</comment>
		</filetype>
	</references>
	<outputs>
		<filetype>
      <identifier>SampleID.SALv1.DATE.csv</identifier>
			<format>csv</format>
			<quantity>single</quantity>
			<comment>expression of known miRNAs</comment>
		</filetype>
		<filetype>
      <identifier>SampleID.SALv1.DATE.bed</identifier>
			<format>bed</format>
			<quantity>single</quantity>
			<comment>bed track of expression of known miRNAs</comment>
		</filetype>
	</outputs>
	<software>
	    <tool>
            <name>generate_config</name>
		    <version>n/a</version>
            <command_line>
            <![CDATA[
            echo -ne "{SampleID.fastq}\tSA1\n" > config
            ]]>
            </command_line>
            <loop></loop>
            <comment>this command creates the configuration file for miRDeep2 to use in order to locate the FASTQ library {SampleID.fastq} and assign
               a 3-letter internal ID to it, in this case SA1
            </comment>
		</tool>
		<tool>
			<name>mapper.pl</name>
			<version>miRDeep2.0.0.4</version>
            <command_line>
            <![CDATA[
            mapper.pl config -d -e -h -j -k {Adaptor} -l 18 -m -p {genome_index} -s reads_collapsed.fa -t reads_vs_genome.arf -v -o 12  &> mapper_summary.log
            ]]>
            </command_line>
            <loop></loop>
            <comment>use the configuration file to locate the library; remove adaptor provided by {Adaptor};
               collapse the reads to the file &quot;read_collapsed.fa&quot;;
               map using the bowtie-index and output the alignments in the file &quot;reads_vs_genome.arf&quot;;
               print out summary in &quot;mapper_summary.log&quot;
            </comment>
		</tool>
		<tool>
			<name>miRDeep2</name>
			<version>miRDeep2.0.0.4</version>
            <command_line>
            <![CDATA[
            miRDeep2.pl reads_collapsed.fa {genome} reads_vs_genome.arf {miRBase_mature} none {miRBase_hairpin} -t {Species} -P 2> miRDeep2.report.log
            ]]>
            </command_line>
            <loop></loop>
			<comment>quantify known miRNAs and predict putative novel miRNAs across samples</comment>
		</tool>
		<tool>
			<name>rename_according_to_metadata_standards</name>
			<version>n/a</version>
            <command_line>
            <![CDATA[
            cp miRNAs_expressed_all_samples_DATE_t_TIME.csv {SampleID.SALv1.DATE.csv}
            ]]>
            </command_line>
            <loop></loop>
			<comment>rename output data file to conform to metadata naming conventions</comment>
		</tool>
		<tool>
			<name>get_mirdeep2_precursor_coverage.pl</name>
			<version>miRDeep2.0.0.4</version>
            <command_line>
            <![CDATA[
            get_mirdeep2_precursor_coverage.pl -r result_DATE_t_TIME.csv -p -T {SampleID} && cp known_pres_DATE_t_TIME_score-50_to_na.bed {SampleID.SALv1.DATE}.bed
            ]]>
            </command_line>
            <loop></loop>
			<comment>generate BED tracks from the quantification data of known and novel miRNAs and rename them according to metadata standards</comment>
		</tool>
		<tool>
			<name>bed_to_bedGraph</name>
			<version>n/a</version>
            <command_line>
            <![CDATA[
            gawk 'NR==1 {print >> FILENAME"Graph"} NR == 2 {print "track type=bedGraph "$2" description=\"miRDeep2 known miRNAs\" visibility=2 color=0,0,255 altColor=255,0,0" >> FILENAME"Graph"} NR>2 {print $1,$2,$3,$5 >> FILENAME"Graph"}' {SampleID.SALv1.DATE}.bed
            ]]>
            </command_line>
            <loop></loop>
			<comment>convert BED tracks to bedGraph</comment>
		</tool>
	</software>
</process>
	<?xml version="1.0"?>
	<?xml-stylesheet type="text/css" href="http://deep.mpi-inf.mpg.de/DAC/files/style/deep_process_style.css"?>
	<process>
	<name>SAL</name>
	<version>1</version>
	<author>
	<name>Filippos Klironomos</name>
	<email>filippos.klironomos@mdc-berlin.de</email>
	</author>
	<description>
	1) map short reads to genome and keep those uniquely mapped
	2) extract bracketing DNA of the uniquely mapped reads
	3) RNAfold extracted sequences and keep those that form unbifurcated hairpins
	4) score putative precursors:
	*) expect greater number of reads mapping to either the -5p or -3p strand and very little to the hairpin
	*) short 3' duplex overhang characteristic of Drosha/Dicer processing adds to the score
	*) relative and absolute stabilities contribute to the score
	*) if 5' end of mature sequence is identical to that of known mature sequence it adds to the score
	5) randomly permute read signatures with putative precursor sequences in order to determine the FPR
	</description>
	<inputs>
	<filetype>
	<identifier>config</identifier>
	<format>TSV</format>
	<quantity>single</quantity>
	<comment>this is the configuration file that miRDeep2 uses to locate the FASTQ library and assign a unique 3-letter ID to it</comment>
	</filetype>
	</inputs>
	<references>
	<filetype>
	<identifier>genome</identifier>
	<format>fasta</format>
	<quantity>single</quantity>
	<comment>UCSC genome</comment>
	</filetype>
	<filetype>
	<identifier>genome_index</identifier>
	<format>bowtie-index</format>
	<quantity>collection</quantity>
	<comment>bowtie index of UCSC hg19</comment>
	</filetype>
	<filetype>
	<identifier>miRBase_mature</identifier>
	<format>fasta</format>
	<quantity>single</quantity>
	<comment>mature miRBase sequences</comment>
	</filetype>
	<filetype>
	<identifier>miRBase_hairpin</identifier>
	<format>fasta</format>
	<quantity>single</quantity>
	<comment>precursor (hairpin) miRBase sequences</comment>
	</filetype>
	</references>
	<outputs>
	<filetype>
	<identifier>SampleID.SALv1.DATE.csv</identifier>
	<format>csv</format>
	<quantity>single</quantity>
	<comment>expression of known miRNAs</comment>
	</filetype>
	<filetype>
	<identifier>SampleID.SALv1.DATE.bed</identifier>
	<format>bed</format>
	<quantity>single</quantity>
	<comment>bed track of expression of known miRNAs</comment>
	</filetype>
	</outputs>
	<software>
	<tool>
	<name>generate_config</name>
	<version>n/a</version>
	<command_line>
	<![CDATA[
	echo -ne "{SampleID.fastq}\tSA1\n" > config
	]]>
	</command_line>
	<loop></loop>
	<comment>this command creates the configuration file for miRDeep2 to use in order to locate the FASTQ library {SampleID.fastq} and assign
	a 3-letter internal ID to it, in this case SA1
	</comment>
	</tool>
	<tool>
	<name>mapper.pl</name>
	<version>miRDeep2.0.0.4</version>
	<command_line>
	<![CDATA[
	mapper.pl config -d -e -h -j -k {Adaptor} -l 18 -m -p {genome_index} -s reads_collapsed.fa -t reads_vs_genome.arf -v -o 12 &> mapper_summary.log
	]]>
	</command_line>
	<loop></loop>
	<comment>use the configuration file to locate the library; remove adaptor provided by {Adaptor};
	collapse the reads to the file "read_collapsed.fa";
	map using the bowtie-index and output the alignments in the file "reads_vs_genome.arf";
	print out summary in "mapper_summary.log"
	</comment>
	</tool>
	<tool>
	<name>miRDeep2</name>
	<version>miRDeep2.0.0.4</version>
	<command_line>
	<![CDATA[
	miRDeep2.pl reads_collapsed.fa {genome} reads_vs_genome.arf {miRBase_mature} none {miRBase_hairpin} -t {Species} -P 2> miRDeep2.report.log
	]]>
	</command_line>
	<loop></loop>
	<comment>quantify known miRNAs and predict putative novel miRNAs across samples</comment>
	</tool>
	<tool>
	<name>rename_according_to_metadata_standards</name>
	<version>n/a</version>
	<command_line>
	<![CDATA[
	cp miRNAs_expressed_all_samples_DATE_t_TIME.csv {SampleID.SALv1.DATE.csv}
	]]>
	</command_line>
	<loop></loop>
	<comment>rename output data file to conform to metadata naming conventions</comment>
	</tool>
	<tool>
	<name>get_mirdeep2_precursor_coverage.pl</name>
	<version>miRDeep2.0.0.4</version>
	<command_line>
	<![CDATA[
	get_mirdeep2_precursor_coverage.pl -r result_DATE_t_TIME.csv -p -T {SampleID} && cp known_pres_DATE_t_TIME_score-50_to_na.bed {SampleID.SALv1.DATE}.bed
	]]>
	</command_line>
	<loop></loop>
	<comment>generate BED tracks from the quantification data of known and novel miRNAs and rename them according to metadata standards</comment>
	</tool>
	<tool>
	<name>bed_to_bedGraph</name>
	<version>n/a</version>
	<command_line>
	<![CDATA[
	gawk 'NR==1 {print >> FILENAME"Graph"} NR == 2 {print "track type=bedGraph "$2" description=\"miRDeep2 known miRNAs\" visibility=2 color=0,0,255 altColor=255,0,0" >> FILENAME"Graph"} NR>2 {print $1,$2,$3,$5 >> FILENAME"Graph"}' {SampleID.SALv1.DATE}.bed
	]]>
	</command_line>
	<loop></loop>
	<comment>convert BED tracks to bedGraph</comment>
	</tool>
	</software>
	</process>