LimiTT.xml

<tool id="LimiTT" name="LimiTT" version="0.1">
    <requirements>
        <requirement type="package">anaconda</requirement>
        <requirement type="package">r</requirement>
    </requirements>
    <description> MTI identification </description>
    <command interpreter="python">
        #set $p_base = str($p_base).replace(',',' ')
        LimiTT.py -idb /home/mirna/galaxy-dist/tools/limitt/files/
        -ot $o_info -om $o_matrix -ol $o_rank -ob $o_bar -oh $o_hmap -oe $o_escore -og $o_genes
        -base $p_base
        -occ $p_occ

        #if str($c_afile.p_afile) == "True":
            #for $ainput in $c_afile.r_afile:
                #set $p_info_str = '"'+str($ainput.p_ainfo)+'"'
                #if $p_info_str == '""':
                    #set $p_info_str = ""
                #end if
                -ia $ainput.p_adata $ainput.p_acol1 $ainput.p_acol2 $p_info_str
            #end for
        #end if

        #if str($c_mfile.p_mfile) == "True":
            #for $minput in $c_mfile.r_mfile:
                #set $p_minfo_str = '"'+str($minput.p_minfo)+'"'
                #if $p_minfo_str == '""':
                    #set $p_minfo_str = ""
                #end if
                -im $minput.p_mdata $minput.p_mcol2 $p_minfo_str
            #end for
        #elif str($c_mfile.p_mfile) == "False":
            #if $c_mfile.p_com:
                -cl
            #end if
        #end if

        #if str($c_efile.p_efile) == "True":
            #for $rinput in $c_efile.r_efile:
                -ir $rinput.p_edata
                -p $rinput.p_pval
                -perm $rinput.p_perm
            #end for
        #end if

        #if str($c_spec.p_igspec) == "ignore":
            -spec i
        #elif str($c_spec.p_spec) != "all":
            #set $spec_l = str($c_spec.p_spec).replace(',',' ')
            -spec $spec_l
        #end if

        #if not str($p_exp) == "all":
            #set $p_exp = str($p_exp).replace(',',' ').replace("__sq__","'")
            -exp $p_exp
        #end if

        #if "4" in str($p_base):
            -str $p_stri
        #end if

    </command>
    <inputs>
        <conditional name="c_afile">
            <param name="p_afile" type="boolean" truevalue="yes" falsevalue="no" label="Add Annotation File"
            help="Link MTIs to genes/proteins existent in this file."/>
            <when value="no"/>
            <when value="yes">
                <repeat name="r_afile" title="Annotation File" min="1" max="1">
                    <param name="p_adata" type="data" format="txt" label="File"
                    help="The tab delimited file needs to consist of genes or proteins mapped onto UniProt Accessions (e.g. BLASTed against UniProtKB). See Help for more details."/>
                    <param name="p_acol1" type="integer" value="3" label="Column of UniProt Accessions"
                    help="The column number (count from 1) with the UniProt Accessions within the file."/>
                    <param name="p_acol2" type="integer" value="1" optional="true" label="Column of additional information"
                    help="The (optional) column number of additional information. Information will be added to the 'MTI_Info' output."/>
                    <param name="p_ainfo" type="text" value="Transcript" size="20" optional="true" label="Description of additional information"
                    help="The (optional) description of the additional information."/>
                </repeat>
            </when>
        </conditional>

        <conditional name="c_mfile">
            <param name="p_mfile" type="boolean" truevalue="yes" falsevalue="no" label="Add miRNA File"
            help="Link MTIs to miRNAs existent in this file."/>
            <when value="no">
                <param name="p_com" type="boolean" label="Cluster miRNAs" checked="true" truevalue="True" falsevalue="False"
                help="If no miRNA file is used, cluster miRNAs by ignoring species and hairpin arm information (hsa-miR-123a-5p, mmu-miR-123a -> miR-123a). Otherwise miRNAs are distinguished by their complete identifiers."/>
            </when>
            <when value="yes">
                <repeat name="r_mfile" title="miRNA File" min="1" max="1">
                    <param name="p_mdata" type="data" format="txt" label="File" help="The tab delimited miRNA file needs to consist of one miRNA per line in the first column. See Help for more details."/>
                    <param name="p_mcol2" type="integer" value="5" optional="true" label="Column of additional information" help="The (optional) column number of additional information. Information will be added to the 'MTI_Info' output."/>
                    <param name="p_minfo" type="text" value="miRNA sequence" size="20" optional="true" label="Description of additional information" help="The (optional) description of the additional information."/>
                </repeat>
            </when>
        </conditional>

        <conditional name="c_efile">
            <param name="p_efile" type="boolean" truevalue="yes" falsevalue="no" label="Add Ranking File" help="Start a MTI Set Enrichment Analysis based on the ranked genes/proteins in this file."/>
            <when value="no"/>
            <when value="yes">
                <repeat name="r_efile" title="Ranking File" min="1" max="1">
                    <param name="p_edata" type="data" format="txt" label="File" help="The tab delimited ranking file needs to consist of genes/proteins mapped onto UniProt Accessions. LimiTT will sort the list by the values in descending order. See Help for more details."/>
                    <param name="p_pval" type="select" label="Weighting" help="The weighting of the ranking values to calculate the Enrichment Score (ES) per MTI Set.">
                        <option value="0">No weighting</option>
                        <option value="1" selected="true">Normal</option>
                        <option value="2">Square weighting</option>
                    </param>
                    <param name="p_perm" type="integer" min="1" value="1000" label="Number of permutations" help="The number of permutations to calculate the Normalized Enrichment Score (NES) per MTI Set."/>
                </repeat>
            </when>
        </conditional>

        <param name="p_base" type="select" label="MTI Databases" display="checkboxes" multiple="true" help="Select the MTI databases the MTIs are retrieved from.">
            <option value="1" selected="true">TarBase</option>
            <option value="2" selected="true">miRTarBase</option>
            <option value="3" selected="true">miRecords</option>
            <option value="4" selected="true">starBase</option>
        </param>

        <param name="p_occ" type="integer" min="1" max="4" value="2" label="Occurrence of MTIs over Databases" help="If more than one database (DB) was selected, the occurrence parameter can be used to define the minimum number of DBs the MTIs have to occur in. If the manually set value is higher than the number of selected DBs it is automatically changed to the number of DBs. "/>

        <conditional name="c_spec">
            <param name="p_igspec" type="select" label="Species" help="Choose MTIs of your species or categories of interest or ignore species to expand the MTI search with predicted homologous MTIs.">
                <option value="choose" selected="true">Choose</option>
                <option value="ignore" >Ignore</option>
            </param>
            <when value="ignore"/>
            <when value="choose">
                <param name="p_spec" type="select" label="Choose Species" help="Behind each category in brackets is the number of species. Scroll down to get to single species. Multiple selection is possible." multiple="true">
                    <option value="all" selected="true">All</option>
                    <option value="">--CATEGORIES--</option>
                    <option value="a">Animals (14)</option>
                    <option value="pl">Plants (6)</option>
                    <option value="v">Viruses (4)</option>
                    <option value="f">Fungi (1)</option>
                    <option value="pr">Protozoa (1)</option>
                    <option value="">--SPECIES--</option>
                    <option value="ath">Arabidopsis thaliana</option>
                    <option value="bmo">Bombyx mori</option>
                    <option value="bta">Bos taurus</option>
                    <option value="cel">Caenorhabditis elegans</option>
                    <option value="cgr">Candida glabrata</option>
                    <option value="cin">Ciona intestinalis</option>
                    <option value="der">Danio rerio</option>
                    <option value="dme">Drosophila melanogaster</option>
                    <option value="ebv">Epstein Barr virus</option>
                    <option value="gga">Gallus gallus</option>
                    <option value="gma">Glycine max</option>
                    <option value="hsa">Homo sapiens</option>
                    <option value="hcmv">Human cytomegalovirus</option>
                    <option value="kshv">Kaposi sarcoma-associated herpesvirus</option>
                    <option value="mdv1">Mareks disease virus</option>
                    <option value="mtr">Medicago truncatula</option>
                    <option value="mmu">Mus musculus</option>
                    <option value="osa">Oryza sativa</option>
                    <option value="ola">Oryzias latipes</option>
                    <option value="oar">Ovis aries</option>
                    <option value="ppt">Physcomitrella patens</option>
                    <option value="rno">Rattus norvegicus</option>
                    <option value="tva">Trichomonas vaginalis</option>
                    <option value="vvi">Vitis vinifera</option>
                    <option value="xla">Xenopus laevis</option>
                </param>
            </when>
        </conditional>

        <param name="p_exp" type="select" label="Experimental Methods" help="Select the experimental methods the MTIs were validated with. Multiple selection is possible." multiple="true">
            <option value="all" selected="true">All</option>
            <option value="'Western blot'">Western blot</option>
            <option value="'Reporter assay'">Reporter assay</option>
            <option value="qPCR">qPCR</option>
            <option value="Microarray">Microarray</option>
            <option value="NGS">NGS</option>
            <option value="Other">Other</option>
        </param>

        <param name="p_stri" type="select" label="StarBase Stringency" help="Number of CLIP-Seq Experiments supporting MTIs. Just for starBase.">
            <option value="1" selected="true">1</option>
            <option value="2">2</option>
            <option value="3">3</option>
            <option value="5">5</option>
        </param>


    </inputs>
    <outputs>
        <data format="tabular" name="o_info" label="MTI_Info"/>
        <data format="tabular" name="o_matrix" label="MTI_Matrix"/>
        <data format="tabular" name="o_rank" label="MTI_Sets_ranked"/>
        <data format="pdf" name="o_bar" label="BarGraphs"/>
        <data format="pdf" name="o_hmap" label="MTI_Overlap_HM"/>
        <data format="pdf" name="o_escore" label="EnrichmentScore_Plots">
            <filter>c_efile['p_efile'] is True</filter>
        </data>
        <data format="tabular" name="o_genes" label="MTI_Set_Genes">
            <filter>c_efile['p_efile'] is True</filter>
        </data>
    </outputs>

    <help>


.. class:: infomark

**See LimiTT in action**

Browse to the shared data library to load example files for LimiTT into galaxy.
The given annotation file was used to benchmark LimiTT and it contains PH-relevant genes from Bertero et al. (2014).
To reproduce the benchmarking, choose the LimiTT tool, click on "Add annotation file" and use the LimiTT_annotation.txt as input into the pipeline.
Next, change the "Description of additional information" field from "Transcript" to "Functional Pathway".
Change the "Occurrence of MTIs over DBs" value to 1. Select "Homo sapiens" as species. Hit "Execute" to see the tool in action.

------------

.. class:: infomark

**Input Files:**

**Annotation File**

*File type:* Tab delimited

*Header:* No

*Required Content:* UniProt accessions per line or separated by comma.

*Allowed Content:* Several columns; Empty content; Accessions with attached information concerning, for example, the underlying database, delimited by a pipe (  | ) symbol (e.g. sp|Q9XS59|S6A15_BOVIN). At this, only this identifier will be saved, which occurs after the first pipe symbol (e.g. sp|Q9XS59|S6A15_BOVIN > Q9XS59). Identifiers from other databases are ignored.

*Example "Required":*

+--------------------+
|A2A6A1              |
+--------------------+
|O88898,P54763,Q3UHC0|
+--------------------+
|G5E870              |
+--------------------+
|Q6A037              |
+--------------------+

|

*Example "Allowed":*

+-------------------+-------+-----------------------------------------+
|comp1000309_c0_seq1|slc15a3|Q8IY34,O75618                            |
+-------------------+-------+-----------------------------------------+
|comp1000318_c0_seq1|       |                                         |
+-------------------+-------+-----------------------------------------+
|comp1000627_c0_seq1|slc6a15|sp|Q9XS59|S6A15_BOVIN                    |
+-------------------+-------+-----------------------------------------+
|comp1000899_c0_seq1|       |gb|CX212397.1,dbj|DB530926.2,gi|154363325|
+-------------------+-------+-----------------------------------------+

|

**miRNA File**

*File type:* Tab delimited

*Header:* No

*Required Content:* One mature miRNA identifier (e.g. hsa-miR-17a-5p) per line in column 1.

*Allowed Content:* Several columns and shortened miRNA identifiers. Shortened miRNA identifiers have to consist at least of the prefix miR, lin or let, the identification number and, if existent, the lettered suffix showing sequence similarity (e.g miR-17a).

*Example "Required":*

+-------+
|miR-93b|
+-------+
|miR-36f|
+-------+
|miR-29d|
+-------+
|miR-29c|
+-------+

|

*Example "Allowed":*

The example is a part of an original output of the MIRPIPE pipeline, which the parameters are adjusted to.

+-------+------+----+---+----------------------+------+
|miR-93b|52    |1.00|170|CAAGTGCTGTTCGTGCAGGTAG|33    |
+-------+------+----+---+----------------------+------+
|miR-36f|211   |0.00|171|ATTGAGCTATCTGTGTAG    |211   |
+-------+------+----+---+----------------------+------+
|miR-29d|141233|0.02|172|TAGCACCATATGAAATCAGTGT|133582|
+-------+------+----+---+----------------------+------+
|miR-29c|55690 |1.00|172|TAGCACCATTTGAAATCGGTTA|44200 |
+-------+------+----+---+----------------------+------+

|

**Ranking File**

*File type:* Tab delimited

*Header:* No

*Required Content:* UniProt accessions in column one, corresponding ranking value in column two.

*Allowed Content:* The content must not be sorted by the ranking values.

*Example:*

+------+-----------+
|A2A6A1|0.152108244|
+------+-----------+
|P54763|0.640846805|
+------+-----------+
|Q3UHC0|0.931454837|
+------+-----------+
|O88898|0.240325584|
+------+-----------+
|G5E870|0.47554716 |
+------+-----------+
|Q6A037|0.495874819|
+------+-----------+

------------

.. class:: infomark

**Output Files:**

**BarGraphs**

The bar graphs provide an overview of the number of miRNAs and MTIs after the different processing steps of LimiTT. At this, miRNAs and MTIs are counted after searching the MTI databases, after filtering by their occurrence over the DBs, after mapping MTIs onto UniProtAccs and after mapping the remaining MTI targets onto the annotated UniProtAccs. Thus, the last number within the bars is the final result.

**MTI Matrix**

The matrix contains all identified MTIs ranged in targets as UniProtAccs (rows) and miRNAs (columns). If an interaction between miRNA and target was identified, a binary number represents the occurrence of the interaction over the chosen MTI DBs. The order of the DBs for the binary string can be found in the first row.

*Example:*

+--------------------------------------------------------+
|Database order: TarBase, miRTarBase, miRecords, starBase|
+------+-----+-------+------+-------+------+-------------+
|      |miR-9|miR-15a|miR-17|miR-19b|miR-24|miR-26a      |
+------+-----+-------+------+-------+------+-------------+
|A0AVK6|0001 |       |      |       |0110  |             |
+------+-----+-------+------+-------+------+-------------+
|A2A6A1|     |       |      |1110   |      |             |
+------+-----+-------+------+-------+------+-------------+
|A2AAY5|     |       |1110  |       |      | 1001        |
+------+-----+-------+------+-------+------+-------------+
|A2AHG0|     |0001   |0101  |       |      |             |
+------+-----+-------+------+-------+------+-------------+

|

**MTI Info**

The MTI information file is a list of all identified target UniProt Accessions together with the interacting miRNAs and further information which was collected during the process. If in the beginning of the process additional information from the annotation file and/or the miRNA list was specified, this information will also be part of the MTI information file.

Standard columns::

    UniProt Accessions      UniProt Accession of the identified miRNA target
    miRNA Target            Target symbol from the MTI database(s)
    miRNA                   miRNA(s) identified to interact with the target
    Review status           Review status of UniProtKB entry
    Organism                The MTI's organism
    Gene synonyms           Synonyms of the target gene
    Protein names           Name(s) of the influenced protein
    EC number               Enzyme Commission number
    Existence               Evidence for protein existence
    GO-IDs                  Gene Ontology identifier(s)

**MTI Overlap HM**

Based on the idea that each identified miRNA interacts with a set of target genes, the Heatmap (HM) depicts the ratio of overlapping UniProtAcc targets between each of these MTI sets. If the MTI set enrichment analysis was used, the Heatmap output will depict for each MTI set the ratio overlapping target genes which are part of the leading edge sets of the corresponding MTI sets.

**MTI Sets ranked**

If a ranking file was passed to miRNA, a reduced version of the Gene Set Enrichment Analysis tool is started, analysing the enrichment of the identified MTI sets based on the ranked UniProtAccs.
With a running sum statistic, a weighted Enrichment Score (**ES**) is calculated for each gene set based on position dependant gene matches between the ranked list and the set.
The **Leading Edge** analysis additionally identifies and analyses the core genes of the gene set which mainly affect the ES.
At this, the Leading Edge analysis proceeds as follows: Depending on whether the ES of a MTI set is positive or negative, the set of Leading Edge targets either consists of the MTI set targets before or after the peak in the running sum calculation.
Based on this, three statistics are calculated: **Tags** represents the ratio of leading edge targets to all targets in the given set. **List** calculates the ratio of UniProtAccs from the ranked dataset before/after the ES of the current set, to all UniProtAccs in the submitted file. **Signal** is a combination of the two previous calculations, describing the distribution of the MTI set targets over the ranked dataset, resulting in 100% or more, if all targets can be found at the beginning of the ranked list.
To take the set sizes into account, MTI set enrichment analysis calculates in the next step the Normalized Enrichment Score (**NES**) for each gene set by using permutations of the dataset. Additionally, the False Discovery Rate (**FDR**) q-value is calculated, representing the estimated probability of a false positive result for each set with a given NES.

Aside from the ES, NES, FDR q-value and Leading Edge analysis, the file consists of the size of each MTI set, which is the number of overlapping UniProtAccs between the MTI set and the ranked list, and the index within the ranked gene file at which the running sum statistic calculated the maximal ES.

*Example:*

+--------+----+----+----+---------+-----------+------------------------------+
|MTI Set |Size|ES  |NES |FDR q-val|Rank at Max|Leading Edge                  |
+========+====+====+====+=========+===========+==============================+
|miR-149 | 6  |0.65|1.55|0.290    |16         |tags=67%, list=29%, signal=53%|
+--------+----+----+----+---------+-----------+------------------------------+
|miR-301b| 4  |0.61|1.29|0.790    |1          |tags=25%, list=2%, signal=26% |
+--------+----+----+----+---------+-----------+------------------------------+

|

**EnrichmentScore Plots**

Enrichments plots depict for each MTI set the running enrichments score over all UniProtAccs in the ranked dataset (blue line), the position of targets of the current MTI set within in the ranked list (black dashes) and the maximum ES, either positive or negative (red dash).
Enrichment plots are created only if a MTI set enrichment analysis was started.

**MTI Set Genes**

The MTI set gene file output of LimiTT is more or less a written version of all enrichment plots and thus just produced, if an enrichment analyses was initiated.
The file lists for each MTI set, the targets which overlap with the ranked list of UniProtAccs, the index of each of this targets within the ranked list, the running ES for this target and whether it is a member of the leading edge set or not.

*Example:*

+--------+------+--------------------+----------+---------+
|MTI set |Target|Index in Ranked List|Running ES|LE Member|
+========+======+====================+==========+=========+
|miR-149 |Q9WV91| 10                 |0.06      |Yes      |
+--------+------+--------------------+----------+---------+
|miR-149 |Q80SW1| 13                 |0.24      |Yes      |
+--------+------+--------------------+----------+---------+
|miR-149 |Q71B07| 15                 |0.44      |Yes      |
+--------+------+--------------------+----------+---------+
|miR-181a|Q56A04| 29                 |-0.16     |Yes      |
+--------+------+--------------------+----------+---------+
|miR-181a|A2AJK6| 36                 |0.38      |Yes      |
+--------+------+--------------------+----------+---------+
|miR-190a|A3KGB4| 14                 |0.76      |Yes      |
+--------+------+--------------------+----------+---------+

|

    </help>
    <citations>
        <citation type="doi">10.1093/database/bar009</citation>
        <citation type="doi">10.1073/pnas.0506580102</citation>
        <citation type="doi">10.1093/nar/gkr1161</citation>
        <citation type="doi">10.1093/nar/gkt1266</citation>
        <citation type="doi">10.1093/nar/gkn851</citation>
        <citation type="doi">10.1093/nar/gkq1056</citation>
        <citation type="doi">10.1093/bioinformatics/btu573</citation>
    </citations>
</tool>
	<tool id="LimiTT" name="LimiTT" version="0.1">
	<requirements>
	<requirement type="package">anaconda</requirement>
	<requirement type="package">r</requirement>
	</requirements>
	<description> MTI identification </description>
	<command interpreter="python">
	#set $p_base = str($p_base).replace(',',' ')
	LimiTT.py -idb /home/mirna/galaxy-dist/tools/limitt/files/
	-ot $o_info -om $o_matrix -ol $o_rank -ob $o_bar -oh $o_hmap -oe $o_escore -og $o_genes
	-base $p_base
	-occ $p_occ

	#if str($c_afile.p_afile) == "True":
	#for $ainput in $c_afile.r_afile:
	#set $p_info_str = '"'+str($ainput.p_ainfo)+'"'
	#if $p_info_str == '""':
	#set $p_info_str = ""
	#end if
	-ia $ainput.p_adata $ainput.p_acol1 $ainput.p_acol2 $p_info_str
	#end for
	#end if

	#if str($c_mfile.p_mfile) == "True":
	#for $minput in $c_mfile.r_mfile:
	#set $p_minfo_str = '"'+str($minput.p_minfo)+'"'
	#if $p_minfo_str == '""':
	#set $p_minfo_str = ""
	#end if
	-im $minput.p_mdata $minput.p_mcol2 $p_minfo_str
	#end for
	#elif str($c_mfile.p_mfile) == "False":
	#if $c_mfile.p_com:
	-cl
	#end if
	#end if

	#if str($c_efile.p_efile) == "True":
	#for $rinput in $c_efile.r_efile:
	-ir $rinput.p_edata
	-p $rinput.p_pval
	-perm $rinput.p_perm
	#end for
	#end if

	#if str($c_spec.p_igspec) == "ignore":
	-spec i
	#elif str($c_spec.p_spec) != "all":
	#set $spec_l = str($c_spec.p_spec).replace(',',' ')
	-spec $spec_l
	#end if

	#if not str($p_exp) == "all":
	#set $p_exp = str($p_exp).replace(',',' ').replace("__sq__","'")
	-exp $p_exp
	#end if

	#if "4" in str($p_base):
	-str $p_stri
	#end if

	</command>
	<inputs>
	<conditional name="c_afile">
	<param name="p_afile" type="boolean" truevalue="yes" falsevalue="no" label="Add Annotation File"
	help="Link MTIs to genes/proteins existent in this file."/>
	<when value="no"/>
	<when value="yes">
	<repeat name="r_afile" title="Annotation File" min="1" max="1">
	<param name="p_adata" type="data" format="txt" label="File"
	help="The tab delimited file needs to consist of genes or proteins mapped onto UniProt Accessions (e.g. BLASTed against UniProtKB). See Help for more details."/>
	<param name="p_acol1" type="integer" value="3" label="Column of UniProt Accessions"
	help="The column number (count from 1) with the UniProt Accessions within the file."/>
	<param name="p_acol2" type="integer" value="1" optional="true" label="Column of additional information"
	help="The (optional) column number of additional information. Information will be added to the 'MTI_Info' output."/>
	<param name="p_ainfo" type="text" value="Transcript" size="20" optional="true" label="Description of additional information"
	help="The (optional) description of the additional information."/>
	</repeat>
	</when>
	</conditional>

	<conditional name="c_mfile">
	<param name="p_mfile" type="boolean" truevalue="yes" falsevalue="no" label="Add miRNA File"
	help="Link MTIs to miRNAs existent in this file."/>
	<when value="no">
	<param name="p_com" type="boolean" label="Cluster miRNAs" checked="true" truevalue="True" falsevalue="False"
	help="If no miRNA file is used, cluster miRNAs by ignoring species and hairpin arm information (hsa-miR-123a-5p, mmu-miR-123a -> miR-123a). Otherwise miRNAs are distinguished by their complete identifiers."/>
	</when>
	<when value="yes">
	<repeat name="r_mfile" title="miRNA File" min="1" max="1">
	<param name="p_mdata" type="data" format="txt" label="File" help="The tab delimited miRNA file needs to consist of one miRNA per line in the first column. See Help for more details."/>
	<param name="p_mcol2" type="integer" value="5" optional="true" label="Column of additional information" help="The (optional) column number of additional information. Information will be added to the 'MTI_Info' output."/>
	<param name="p_minfo" type="text" value="miRNA sequence" size="20" optional="true" label="Description of additional information" help="The (optional) description of the additional information."/>
	</repeat>
	</when>
	</conditional>

	<conditional name="c_efile">
	<param name="p_efile" type="boolean" truevalue="yes" falsevalue="no" label="Add Ranking File" help="Start a MTI Set Enrichment Analysis based on the ranked genes/proteins in this file."/>
	<when value="no"/>
	<when value="yes">
	<repeat name="r_efile" title="Ranking File" min="1" max="1">
	<param name="p_edata" type="data" format="txt" label="File" help="The tab delimited ranking file needs to consist of genes/proteins mapped onto UniProt Accessions. LimiTT will sort the list by the values in descending order. See Help for more details."/>
	<param name="p_pval" type="select" label="Weighting" help="The weighting of the ranking values to calculate the Enrichment Score (ES) per MTI Set.">
	<option value="0">No weighting</option>
	<option value="1" selected="true">Normal</option>
	<option value="2">Square weighting</option>
	</param>
	<param name="p_perm" type="integer" min="1" value="1000" label="Number of permutations" help="The number of permutations to calculate the Normalized Enrichment Score (NES) per MTI Set."/>
	</repeat>
	</when>
	</conditional>

	<param name="p_base" type="select" label="MTI Databases" display="checkboxes" multiple="true" help="Select the MTI databases the MTIs are retrieved from.">
	<option value="1" selected="true">TarBase</option>
	<option value="2" selected="true">miRTarBase</option>
	<option value="3" selected="true">miRecords</option>
	<option value="4" selected="true">starBase</option>
	</param>

	<param name="p_occ" type="integer" min="1" max="4" value="2" label="Occurrence of MTIs over Databases" help="If more than one database (DB) was selected, the occurrence parameter can be used to define the minimum number of DBs the MTIs have to occur in. If the manually set value is higher than the number of selected DBs it is automatically changed to the number of DBs. "/>

	<conditional name="c_spec">
	<param name="p_igspec" type="select" label="Species" help="Choose MTIs of your species or categories of interest or ignore species to expand the MTI search with predicted homologous MTIs.">
	<option value="choose" selected="true">Choose</option>
	<option value="ignore" >Ignore</option>
	</param>
	<when value="ignore"/>
	<when value="choose">
	<param name="p_spec" type="select" label="Choose Species" help="Behind each category in brackets is the number of species. Scroll down to get to single species. Multiple selection is possible." multiple="true">
	<option value="all" selected="true">All</option>
	<option value="">--CATEGORIES--</option>
	<option value="a">Animals (14)</option>
	<option value="pl">Plants (6)</option>
	<option value="v">Viruses (4)</option>
	<option value="f">Fungi (1)</option>
	<option value="pr">Protozoa (1)</option>
	<option value="">--SPECIES--</option>
	<option value="ath">Arabidopsis thaliana</option>
	<option value="bmo">Bombyx mori</option>
	<option value="bta">Bos taurus</option>
	<option value="cel">Caenorhabditis elegans</option>
	<option value="cgr">Candida glabrata</option>
	<option value="cin">Ciona intestinalis</option>
	<option value="der">Danio rerio</option>
	<option value="dme">Drosophila melanogaster</option>
	<option value="ebv">Epstein Barr virus</option>
	<option value="gga">Gallus gallus</option>
	<option value="gma">Glycine max</option>
	<option value="hsa">Homo sapiens</option>
	<option value="hcmv">Human cytomegalovirus</option>
	<option value="kshv">Kaposi sarcoma-associated herpesvirus</option>
	<option value="mdv1">Mareks disease virus</option>
	<option value="mtr">Medicago truncatula</option>
	<option value="mmu">Mus musculus</option>
	<option value="osa">Oryza sativa</option>
	<option value="ola">Oryzias latipes</option>
	<option value="oar">Ovis aries</option>
	<option value="ppt">Physcomitrella patens</option>
	<option value="rno">Rattus norvegicus</option>
	<option value="tva">Trichomonas vaginalis</option>
	<option value="vvi">Vitis vinifera</option>
	<option value="xla">Xenopus laevis</option>
	</param>
	</when>
	</conditional>

	<param name="p_exp" type="select" label="Experimental Methods" help="Select the experimental methods the MTIs were validated with. Multiple selection is possible." multiple="true">
	<option value="all" selected="true">All</option>
	<option value="'Western blot'">Western blot</option>
	<option value="'Reporter assay'">Reporter assay</option>
	<option value="qPCR">qPCR</option>
	<option value="Microarray">Microarray</option>
	<option value="NGS">NGS</option>
	<option value="Other">Other</option>
	</param>

	<param name="p_stri" type="select" label="StarBase Stringency" help="Number of CLIP-Seq Experiments supporting MTIs. Just for starBase.">
	<option value="1" selected="true">1</option>
	<option value="2">2</option>
	<option value="3">3</option>
	<option value="5">5</option>
	</param>


	</inputs>
	<outputs>
	<data format="tabular" name="o_info" label="MTI_Info"/>
	<data format="tabular" name="o_matrix" label="MTI_Matrix"/>
	<data format="tabular" name="o_rank" label="MTI_Sets_ranked"/>
	<data format="pdf" name="o_bar" label="BarGraphs"/>
	<data format="pdf" name="o_hmap" label="MTI_Overlap_HM"/>
	<data format="pdf" name="o_escore" label="EnrichmentScore_Plots">
	<filter>c_efile['p_efile'] is True</filter>
	</data>
	<data format="tabular" name="o_genes" label="MTI_Set_Genes">
	<filter>c_efile['p_efile'] is True</filter>
	</data>
	</outputs>

	<help>



	.. class:: infomark

	See LimiTT in action

	Browse to the shared data library to load example files for LimiTT into galaxy.
	The given annotation file was used to benchmark LimiTT and it contains PH-relevant genes from Bertero et al. (2014).
	To reproduce the benchmarking, choose the LimiTT tool, click on "Add annotation file" and use the LimiTT_annotation.txt as input into the pipeline.
	Next, change the "Description of additional information" field from "Transcript" to "Functional Pathway".
	Change the "Occurrence of MTIs over DBs" value to 1. Select "Homo sapiens" as species. Hit "Execute" to see the tool in action.

	------------

	.. class:: infomark

	Input Files:

	Annotation File

	File type: Tab delimited

	Header: No

	Required Content: UniProt accessions per line or separated by comma.

	Allowed Content: Several columns; Empty content; Accessions with attached information concerning, for example, the underlying database, delimited by a pipe ( \| ) symbol (e.g. sp\|Q9XS59\|S6A15_BOVIN). At this, only this identifier will be saved, which occurs after the first pipe symbol (e.g. sp\|Q9XS59\|S6A15_BOVIN > Q9XS59). Identifiers from other databases are ignored.

	Example "Required":

	+--------------------+
	\|A2A6A1 \|
	+--------------------+
	\|O88898,P54763,Q3UHC0\|
	+--------------------+
	\|G5E870 \|
	+--------------------+
	\|Q6A037 \|
	+--------------------+

	\|

	Example "Allowed":

	+-------------------+-------+-----------------------------------------+
	\|comp1000309_c0_seq1\|slc15a3\|Q8IY34,O75618 \|
	+-------------------+-------+-----------------------------------------+
	\|comp1000318_c0_seq1\| \| \|
	+-------------------+-------+-----------------------------------------+
	\|comp1000627_c0_seq1\|slc6a15\|sp\|Q9XS59\|S6A15_BOVIN \|
	+-------------------+-------+-----------------------------------------+
	\|comp1000899_c0_seq1\| \|gb\|CX212397.1,dbj\|DB530926.2,gi\|154363325\|
	+-------------------+-------+-----------------------------------------+

	\|

	miRNA File

	File type: Tab delimited

	Header: No

	Required Content: One mature miRNA identifier (e.g. hsa-miR-17a-5p) per line in column 1.

	Allowed Content: Several columns and shortened miRNA identifiers. Shortened miRNA identifiers have to consist at least of the prefix miR, lin or let, the identification number and, if existent, the lettered suffix showing sequence similarity (e.g miR-17a).

	Example "Required":

	+-------+
	\|miR-93b\|
	+-------+
	\|miR-36f\|
	+-------+
	\|miR-29d\|
	+-------+
	\|miR-29c\|
	+-------+

	\|

	Example "Allowed":

	The example is a part of an original output of the MIRPIPE pipeline, which the parameters are adjusted to.

	+-------+------+----+---+----------------------+------+
	\|miR-93b\|52 \|1.00\|170\|CAAGTGCTGTTCGTGCAGGTAG\|33 \|
	+-------+------+----+---+----------------------+------+
	\|miR-36f\|211 \|0.00\|171\|ATTGAGCTATCTGTGTAG \|211 \|
	+-------+------+----+---+----------------------+------+
	\|miR-29d\|141233\|0.02\|172\|TAGCACCATATGAAATCAGTGT\|133582\|
	+-------+------+----+---+----------------------+------+
	\|miR-29c\|55690 \|1.00\|172\|TAGCACCATTTGAAATCGGTTA\|44200 \|
	+-------+------+----+---+----------------------+------+

	\|

	Ranking File

	File type: Tab delimited

	Header: No

	Required Content: UniProt accessions in column one, corresponding ranking value in column two.

	Allowed Content: The content must not be sorted by the ranking values.

	Example:

	+------+-----------+
	\|A2A6A1\|0.152108244\|
	+------+-----------+
	\|P54763\|0.640846805\|
	+------+-----------+
	\|Q3UHC0\|0.931454837\|
	+------+-----------+
	\|O88898\|0.240325584\|
	+------+-----------+
	\|G5E870\|0.47554716 \|
	+------+-----------+
	\|Q6A037\|0.495874819\|
	+------+-----------+

	------------

	.. class:: infomark

	Output Files:

	BarGraphs

	The bar graphs provide an overview of the number of miRNAs and MTIs after the different processing steps of LimiTT. At this, miRNAs and MTIs are counted after searching the MTI databases, after filtering by their occurrence over the DBs, after mapping MTIs onto UniProtAccs and after mapping the remaining MTI targets onto the annotated UniProtAccs. Thus, the last number within the bars is the final result.

	MTI Matrix

	The matrix contains all identified MTIs ranged in targets as UniProtAccs (rows) and miRNAs (columns). If an interaction between miRNA and target was identified, a binary number represents the occurrence of the interaction over the chosen MTI DBs. The order of the DBs for the binary string can be found in the first row.

	Example:

	+--------------------------------------------------------+
	\|Database order: TarBase, miRTarBase, miRecords, starBase\|
	+------+-----+-------+------+-------+------+-------------+
	\| \|miR-9\|miR-15a\|miR-17\|miR-19b\|miR-24\|miR-26a \|
	+------+-----+-------+------+-------+------+-------------+
	\|A0AVK6\|0001 \| \| \| \|0110 \| \|
	+------+-----+-------+------+-------+------+-------------+
	\|A2A6A1\| \| \| \|1110 \| \| \|
	+------+-----+-------+------+-------+------+-------------+
	\|A2AAY5\| \| \|1110 \| \| \| 1001 \|
	+------+-----+-------+------+-------+------+-------------+
	\|A2AHG0\| \|0001 \|0101 \| \| \| \|
	+------+-----+-------+------+-------+------+-------------+

	\|

	MTI Info

	The MTI information file is a list of all identified target UniProt Accessions together with the interacting miRNAs and further information which was collected during the process. If in the beginning of the process additional information from the annotation file and/or the miRNA list was specified, this information will also be part of the MTI information file.

	Standard columns::

	UniProt Accessions UniProt Accession of the identified miRNA target
	miRNA Target Target symbol from the MTI database(s)
	miRNA miRNA(s) identified to interact with the target
	Review status Review status of UniProtKB entry
	Organism The MTI's organism
	Gene synonyms Synonyms of the target gene
	Protein names Name(s) of the influenced protein
	EC number Enzyme Commission number
	Existence Evidence for protein existence
	GO-IDs Gene Ontology identifier(s)

	MTI Overlap HM

	Based on the idea that each identified miRNA interacts with a set of target genes, the Heatmap (HM) depicts the ratio of overlapping UniProtAcc targets between each of these MTI sets. If the MTI set enrichment analysis was used, the Heatmap output will depict for each MTI set the ratio overlapping target genes which are part of the leading edge sets of the corresponding MTI sets.

	MTI Sets ranked

	If a ranking file was passed to miRNA, a reduced version of the Gene Set Enrichment Analysis tool is started, analysing the enrichment of the identified MTI sets based on the ranked UniProtAccs.
	With a running sum statistic, a weighted Enrichment Score (ES) is calculated for each gene set based on position dependant gene matches between the ranked list and the set.
	The Leading Edge analysis additionally identifies and analyses the core genes of the gene set which mainly affect the ES.
	At this, the Leading Edge analysis proceeds as follows: Depending on whether the ES of a MTI set is positive or negative, the set of Leading Edge targets either consists of the MTI set targets before or after the peak in the running sum calculation.
	Based on this, three statistics are calculated: Tags represents the ratio of leading edge targets to all targets in the given set. List calculates the ratio of UniProtAccs from the ranked dataset before/after the ES of the current set, to all UniProtAccs in the submitted file. Signal is a combination of the two previous calculations, describing the distribution of the MTI set targets over the ranked dataset, resulting in 100% or more, if all targets can be found at the beginning of the ranked list.
	To take the set sizes into account, MTI set enrichment analysis calculates in the next step the Normalized Enrichment Score (NES) for each gene set by using permutations of the dataset. Additionally, the False Discovery Rate (FDR) q-value is calculated, representing the estimated probability of a false positive result for each set with a given NES.

	Aside from the ES, NES, FDR q-value and Leading Edge analysis, the file consists of the size of each MTI set, which is the number of overlapping UniProtAccs between the MTI set and the ranked list, and the index within the ranked gene file at which the running sum statistic calculated the maximal ES.

	Example:

	+--------+----+----+----+---------+-----------+------------------------------+
	\|MTI Set \|Size\|ES \|NES \|FDR q-val\|Rank at Max\|Leading Edge \|
	+========+====+====+====+=========+===========+==============================+
	\|miR-149 \| 6 \|0.65\|1.55\|0.290 \|16 \|tags=67%, list=29%, signal=53%\|
	+--------+----+----+----+---------+-----------+------------------------------+
	\|miR-301b\| 4 \|0.61\|1.29\|0.790 \|1 \|tags=25%, list=2%, signal=26% \|
	+--------+----+----+----+---------+-----------+------------------------------+

	\|

	EnrichmentScore Plots

	Enrichments plots depict for each MTI set the running enrichments score over all UniProtAccs in the ranked dataset (blue line), the position of targets of the current MTI set within in the ranked list (black dashes) and the maximum ES, either positive or negative (red dash).
	Enrichment plots are created only if a MTI set enrichment analysis was started.

	MTI Set Genes

	The MTI set gene file output of LimiTT is more or less a written version of all enrichment plots and thus just produced, if an enrichment analyses was initiated.
	The file lists for each MTI set, the targets which overlap with the ranked list of UniProtAccs, the index of each of this targets within the ranked list, the running ES for this target and whether it is a member of the leading edge set or not.

	Example:

	+--------+------+--------------------+----------+---------+
	\|MTI set \|Target\|Index in Ranked List\|Running ES\|LE Member\|
	+========+======+====================+==========+=========+
	\|miR-149 \|Q9WV91\| 10 \|0.06 \|Yes \|
	+--------+------+--------------------+----------+---------+
	\|miR-149 \|Q80SW1\| 13 \|0.24 \|Yes \|
	+--------+------+--------------------+----------+---------+
	\|miR-149 \|Q71B07\| 15 \|0.44 \|Yes \|
	+--------+------+--------------------+----------+---------+
	\|miR-181a\|Q56A04\| 29 \|-0.16 \|Yes \|
	+--------+------+--------------------+----------+---------+
	\|miR-181a\|A2AJK6\| 36 \|0.38 \|Yes \|
	+--------+------+--------------------+----------+---------+
	\|miR-190a\|A3KGB4\| 14 \|0.76 \|Yes \|
	+--------+------+--------------------+----------+---------+

	\|

	</help>
	<citations>
	<citation type="doi">10.1093/database/bar009</citation>
	<citation type="doi">10.1073/pnas.0506580102</citation>
	<citation type="doi">10.1093/nar/gkr1161</citation>
	<citation type="doi">10.1093/nar/gkt1266</citation>
	<citation type="doi">10.1093/nar/gkn851</citation>
	<citation type="doi">10.1093/nar/gkq1056</citation>
	<citation type="doi">10.1093/bioinformatics/btu573</citation>
	</citations>
	</tool>