Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
comp-metadata/docs/alignment/transcriptome/SALv1.xml
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
143 lines (143 sloc)
5.52 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0"?> | |
<?xml-stylesheet type="text/css" href="http://deep.mpi-inf.mpg.de/DAC/files/style/deep_process_style.css"?> | |
<process> | |
<name>SAL</name> | |
<version>1</version> | |
<author> | |
<name>Filippos Klironomos</name> | |
<email>filippos.klironomos@mdc-berlin.de</email> | |
</author> | |
<description> | |
1) map short reads to genome and keep those uniquely mapped | |
2) extract bracketing DNA of the uniquely mapped reads | |
3) RNAfold extracted sequences and keep those that form unbifurcated hairpins | |
4) score putative precursors: | |
*) expect greater number of reads mapping to either the -5p or -3p strand and very little to the hairpin | |
*) short 3' duplex overhang characteristic of Drosha/Dicer processing adds to the score | |
*) relative and absolute stabilities contribute to the score | |
*) if 5' end of mature sequence is identical to that of known mature sequence it adds to the score | |
5) randomly permute read signatures with putative precursor sequences in order to determine the FPR | |
</description> | |
<inputs> | |
<filetype> | |
<identifier>config</identifier> | |
<format>TSV</format> | |
<quantity>single</quantity> | |
<comment>this is the configuration file that miRDeep2 uses to locate the FASTQ library and assign a unique 3-letter ID to it</comment> | |
</filetype> | |
</inputs> | |
<references> | |
<filetype> | |
<identifier>genome</identifier> | |
<format>fasta</format> | |
<quantity>single</quantity> | |
<comment>UCSC genome</comment> | |
</filetype> | |
<filetype> | |
<identifier>genome_index</identifier> | |
<format>bowtie-index</format> | |
<quantity>collection</quantity> | |
<comment>bowtie index of UCSC hg19</comment> | |
</filetype> | |
<filetype> | |
<identifier>miRBase_mature</identifier> | |
<format>fasta</format> | |
<quantity>single</quantity> | |
<comment>mature miRBase sequences</comment> | |
</filetype> | |
<filetype> | |
<identifier>miRBase_hairpin</identifier> | |
<format>fasta</format> | |
<quantity>single</quantity> | |
<comment>precursor (hairpin) miRBase sequences</comment> | |
</filetype> | |
</references> | |
<outputs> | |
<filetype> | |
<identifier>SampleID.SALv1.DATE.csv</identifier> | |
<format>csv</format> | |
<quantity>single</quantity> | |
<comment>expression of known miRNAs</comment> | |
</filetype> | |
<filetype> | |
<identifier>SampleID.SALv1.DATE.bed</identifier> | |
<format>bed</format> | |
<quantity>single</quantity> | |
<comment>bed track of expression of known miRNAs</comment> | |
</filetype> | |
</outputs> | |
<software> | |
<tool> | |
<name>generate_config</name> | |
<version>n/a</version> | |
<command_line> | |
<![CDATA[ | |
echo -ne "{SampleID.fastq}\tSA1\n" > config | |
]]> | |
</command_line> | |
<loop></loop> | |
<comment>this command creates the configuration file for miRDeep2 to use in order to locate the FASTQ library {SampleID.fastq} and assign | |
a 3-letter internal ID to it, in this case SA1 | |
</comment> | |
</tool> | |
<tool> | |
<name>mapper.pl</name> | |
<version>miRDeep2.0.0.4</version> | |
<command_line> | |
<![CDATA[ | |
mapper.pl config -d -e -h -j -k {Adaptor} -l 18 -m -p {genome_index} -s reads_collapsed.fa -t reads_vs_genome.arf -v -o 12 &> mapper_summary.log | |
]]> | |
</command_line> | |
<loop></loop> | |
<comment>use the configuration file to locate the library; remove adaptor provided by {Adaptor}; | |
collapse the reads to the file "read_collapsed.fa"; | |
map using the bowtie-index and output the alignments in the file "reads_vs_genome.arf"; | |
print out summary in "mapper_summary.log" | |
</comment> | |
</tool> | |
<tool> | |
<name>miRDeep2</name> | |
<version>miRDeep2.0.0.4</version> | |
<command_line> | |
<![CDATA[ | |
miRDeep2.pl reads_collapsed.fa {genome} reads_vs_genome.arf {miRBase_mature} none {miRBase_hairpin} -t {Species} -P 2> miRDeep2.report.log | |
]]> | |
</command_line> | |
<loop></loop> | |
<comment>quantify known miRNAs and predict putative novel miRNAs across samples</comment> | |
</tool> | |
<tool> | |
<name>rename_according_to_metadata_standards</name> | |
<version>n/a</version> | |
<command_line> | |
<![CDATA[ | |
cp miRNAs_expressed_all_samples_DATE_t_TIME.csv {SampleID.SALv1.DATE.csv} | |
]]> | |
</command_line> | |
<loop></loop> | |
<comment>rename output data file to conform to metadata naming conventions</comment> | |
</tool> | |
<tool> | |
<name>get_mirdeep2_precursor_coverage.pl</name> | |
<version>miRDeep2.0.0.4</version> | |
<command_line> | |
<![CDATA[ | |
get_mirdeep2_precursor_coverage.pl -r result_DATE_t_TIME.csv -p -T {SampleID} && cp known_pres_DATE_t_TIME_score-50_to_na.bed {SampleID.SALv1.DATE}.bed | |
]]> | |
</command_line> | |
<loop></loop> | |
<comment>generate BED tracks from the quantification data of known and novel miRNAs and rename them according to metadata standards</comment> | |
</tool> | |
<tool> | |
<name>bed_to_bedGraph</name> | |
<version>n/a</version> | |
<command_line> | |
<![CDATA[ | |
gawk 'NR==1 {print >> FILENAME"Graph"} NR == 2 {print "track type=bedGraph "$2" description=\"miRDeep2 known miRNAs\" visibility=2 color=0,0,255 altColor=255,0,0" >> FILENAME"Graph"} NR>2 {print $1,$2,$3,$5 >> FILENAME"Graph"}' {SampleID.SALv1.DATE}.bed | |
]]> | |
</command_line> | |
<loop></loop> | |
<comment>convert BED tracks to bedGraph</comment> | |
</tool> | |
</software> | |
</process> |