admire.xml

<tool id="admire" name="admire">
	<requirements>
		<requirement type="package">anaconda</requirement>
		<requirement type="package">bedtools</requirement>
		<requirement type="package">r</requirement>
		<requirement type="package">comb-p</requirement>
	</requirements>
	<description>methylation analysis</description>
	<command>
	/home/galaxy/galaxy-dist/tools/admire/src/admire
	#if str($c_def.i_def) == "nodef"
	-z /home/galaxy/galaxy-dist/database/files/ftp/$__user_email__/$c_def.idat
	-c $c_def.sheet
	#end if
	#if str($c_def.i_def) == "usedef"
	-s $c_def.deffile
	#end if
	-n $c_norm.normalization
	-q $fdr
	-p $det_p
	-t $s_thresh
	#if str($i_report) == "create"
	-e
	#echo $o_qc#
	#end if
	#if $c_norm.normalization == "fn"
	#echo $c_norm.bgnoob#
	#echo $c_norm.dyenoob#
	#end if
	#if $c_norm.normalization == "noob"
        #echo $c_norm.dyenoob#
        #end if
	#if $c_norm.normalization == "quantile"
        #echo $c_norm.fixoutliers#
	#echo $c_norm.removesamples#
	-l $c_norm.samplecutoff
        #end if
	#if str($regions) != "None"
	#set $r=str($regions).split(",")
        #for $region in $r
        #set global $reg=$region
        -r ${ filter( lambda x: str( x[0] ) == str( $reg ), $__app__.tool_data_tables[ 'admire_regions' ].get_fields() )[0][-1] }
        #end for
        #end if
	#for $custom in $customs
	-r $custom.cr
	#end for
	#if str($genesets) != "None"
	#set $g=str($genesets).split(",")
        #for $geneset in $g
        #set global $gs=$geneset
        -g ${ filter( lambda x: str( x[0] ) == str( $gs ), $__app__.tool_data_tables[ 'admire_genesets' ].get_fields() )[0][-1] }
        #end for
        #end if
	#for $cugs in $cu_genesets
	-g $cugs.cu
	#end for
	-i $n_images
	-o $o_compressed > $o_log
	</command>
	<inputs>
		<conditional name="c_def">
			<param name="i_def" type="select" label="Choose mode for input files" help="ADMIRE can operate on a SampleSheet.csv together with compressed IDAT files or on a tabular sample definition file containing paths to single, uncompressed IDAT files.">
				<option value="nodef" selected="true">Operate on compressed IDAT files and a SampleSheet.csv</option>
				<option value="usedef">Operate on a tabular sample definition file (see documentation)</option>
			</param>
			<when value="nodef">
				<param name="idat" type="text" label="Filename of compressed IDAT files" help="Upload compressed IDAT scanner files into your private FTP directory at ftp://bioinformatics.mpi-bn.mpg.de and provide the filename here. ADMIRE can handle .zip, .tar.gz, .tgz, .tar.bz2, .tbz2 and .tar.bzip2 compressed files." />
				<param name="sheet" type="data" format="txt,csv" label="Sample Sheet from Illumna iScan/HiScan system" help="Provide the CSV file called SampleSheet.csv from the Illumina Scanner."/>
			</when>
			<when value="usedef">
				<param name="deffile" type="data" format="tabular" label="ADMIRE example tabular definition file" help="Use a tabular file with the following columns: sample_id, file, channel, sample_group. See ADMIRE documentation for further information on how to try the examples." />
			</when>
		</conditional>
		<param name="i_report" type="boolean" checked="true" truevalue="create" falsevalue="no" label="Include quality control report in output" help="Quality control might help to identidy failed samples." />
		<conditional name="c_norm">
			<param name="normalization" type="select" label="Select a method for input normalization" help="Normalization helps to reduce technical variation between arrays by taking internal controls into account.">
				<option value="fn" selected="true">Functional normalization</option>
				<option value="noob">Noob normalization</option>
				<option value="swan">SWAN normalization</option>
				<option value="quantile">Quantile normalization</option>
				<option value="illumina">Illumina Genome Studio normalization</option>
				<option value="raw">No normalization - use raw values</option>
			</param>
			<when value="fn">
				<param name="bgnoob" type="select" label="Use noob background correction prior to functional normalization">
                                	<option value="" selected="true">Yes</option>
                                	<option value="-b">No</option>
                        	</param>
				<param name="dyenoob" type="select" label="Use noob dye correction for functional normalization">
                                        <option value="" selected="true">Yes</option>
                                        <option value="-d">No</option>
                                </param>
			</when>
			<when value="noob">
				<param name="dyenoob" type="select" label="Use dye correction for noob normalization">
                                        <option value="" selected="true">Yes</option>
                                        <option value="-d">No</option>
                                </param>
			</when>
			<when value="quantile">
				<param name="fixoutliers" type="select" label="Fix low signal outliers">
					<option value="" selected="true">Yes</option>
					<option value="-f">No</option>
				</param>
				<param name="samplecutoff" type="float" min="0" value="10.5" label="Bad sample cutoff" help="Label samples as bad if their median signals are below the given value." />
				<param name="removesamples" type="select" label="Remove samples labelled as bad">
					<option value="" selected="true">No</option>
					<option value="-m">Yes</option>
				</param>
			</when>
		</conditional>
		<param name="det_p" label="Detection p-value threshold for failed probe identification" type="float" min="0" max="1" value="0.01" help="Mark a probes as failed, if it has a detection p-value higher than the given value. Failed probes can be excluded from subsequent analysis using the failed sample threshold (see below)." />
		<param name="s_thresh" label="Failed sample threshold" type="float" min="0" max="1" value="0.4" help="Probes are excluded from subsequent analysis if the proportion of failed probes across all samples is higher than the given value." />
		<param name="fdr" label="Q-value cutoff for multiple testing" type="float" min="0" max="1" value="0.05" help="Definde a false discovery rate to limit results after multiple testing correction." />
		<param name="n_images" label="Number of additional plots for n best regions" type="integer" value="20" max="50" min="0" help="Prints n bubble plots for the best (i.e. most significant) regions into the visualization directory." />
		<param name="regions" type="select" label="Select genomic regions to test" display="checkboxes" multiple="true" help="Regions will be overlapped with GC probes and significant different methylated regions will be reported.">
			<options from_data_table="admire_regions" />
		</param>
		<repeat name="customs" title="custom genomic region">
			<param name="cr" type="data" format="bed" label="Select region" help="Please provide a bed file with hg19 coordinates"/>
		</repeat>
		<param name="genesets" type="select" label="Choose gene sets" help="Select (multiple) gene sets that should be used in gene set enrichment analysis" multiple="true">
			<options from_data_table="admire_genesets" />
		</param>
		<repeat name="cu_genesets" title="custom gene set">
			<param name="cu" type="data" format="txt" label="Select gene set" help="Please provide a file with a custom gene set"/>
		</repeat>
	</inputs>
	<outputs>
		<data name="o_log" format="txt" label="admire output" />
		<data name="o_compressed" format="tgz" label="compressed output" />
		<data name="o_qc" format="pdf" label="Quality control report">
			<filter>i_report==True</filter>
		</data>
	</outputs>

	<help>
.. class:: infomark

**Extensive documentation**

We have an extensive documentation, together with step by step instructions on how to use the web service available at http://admire.readthedocs.org/

----

.. class:: infomark

**Input parameters**:

**Select a method for input normalization**:

Normalization is used to remove unwanted variation and normalize between arrays. You can choose from five different normalization methods:

1. Functional normalization
2. Noob normalization
3. SWAN normalization
4. Quantile normalization
5. Illumina Genome Studio normalization

Additionaly, you can skip the normalization step by selecting *No normalization - use raw values*.

**Detection p-value threshold for failed probe identification**:

Every probe on the array has a detection p-value assigned, which indicates confidence of the scanner that the detection was correct. A probe will be marked as *failed* in a sample if its detection p-value is higher that the given value.
Together with the

**Failed sample threshold**:

You can subsequently exclude probes that have a certain proportion of failed marks across all samples.

**Q-value cutoff for multiple testing**:

Multiple testing is done automatically by ADMIRE and corrects the test statistic for multiple performed tests. If a certain region remains with a Q-value or FDR below the given value, it will be retained for subsequent analysis, like the gene set enrichment analysis or visualizations.

**Number of additional plots for n best regions**:

The number given here will determine how many visualizations are plotted for significant regions. Importantly, if the number of samples is higher than 100, a heatmap is created. Otherwise, non-proportional bubble plots are plotted.

**Select genomic regions to test**:

Regions selected here will be overlapped with methylation probes and significant different methylated regions will be reported.

By uploading bed files to the work space (right panel), users can give custom regions by adding them to the **custom genomic regions** list.

**Choose gene sets**:

If a gene set is given (either by selecting pre-defined gene sets or uploading a custom gene set), a gene set enrichment analysis is performed by taking all significantly different methylathed regions that are annotated with a gene name and testing them for enrichment in a gene set.
Regions with annotated gene names are *Promoter Regions (2kB)* and *Exons*.

----

.. class:: infomark

**Output files**:

**compressed output**:

The complete output of admire compressed into a tar.gz file. This file is intended for download and extraction on your local hard drive. When extracted, it contains

 * Files in the excel subdirectory: This subdirectory contains csv files for each combination of sample group comparison (e.g. case-vs-control) and genomic region (e.g. promoters), with information about the genomic feature, its genomic location as well as p- and q-values of the sample groups. In addition to a file with all regions passing the q-value threshold, a file containing all regions is also present.
 * Files in the visualization subdirectory: This subdirectory contains files for visualization with IGV. General files, like the genomic location of all Illumina probes, as well as the genomics regions used during analysis, are located in the `annotation-tracks` subfolder. Data specific files are located in the `data-tracks` folder. Here, you can find information per sample-group comparison (e.g. case-vs-control), information on significantly altered probe methylation (`control-case.igv`), as well as significantly altered genomic regions in [BED format](http://genome.ucsc.edu/FAQ/FAQformat.html#format1). Additionally, publication-ready images are stored in region-specific subdirectories.

**Quality control report**

A PDF file that can be used to check the per sample quality. This file is generated by the R package *minfi*'s quality control routines.

**admire output**:

This file gives information on the admire run. In case of errors, you can find them here.

----

.. class:: infomark

**Use the example data sets provided by ADMIRE**

Browse to the shared data library_ to load an example sample definition file into galaxy. The sample definition file contains file paths for the IDAT files from the GEO experiment E-GEOD-62727 that is stored on our server. Next, choose the ADMIRE tool and use the experiment_design.txt as input into the pipeline. Choose some genomic regions (e.g. 5kbp and 10kbp tiling regions to reproduce our findings from the paper) and hit Execute to see the tool in action.

**Step by step instructions**

Graphical step by step instructions on how to use the example data sets can be found at http://admire.readthedocs.org/en/latest/galaxy-example/

.. _format: http://genome.ucsc.edu/FAQ/FAQformat.html#format1
.. _library: https://bioinformatics.mpi-bn.mpg.de/library
	</help>
</tool>
	<tool id="admire" name="admire">
	<requirements>
	<requirement type="package">anaconda</requirement>
	<requirement type="package">bedtools</requirement>
	<requirement type="package">r</requirement>
	<requirement type="package">comb-p</requirement>
	</requirements>
	<description>methylation analysis</description>
	<command>
	/home/galaxy/galaxy-dist/tools/admire/src/admire
	#if str($c_def.i_def) == "nodef"
	-z /home/galaxy/galaxy-dist/database/files/ftp/$__user_email__/$c_def.idat
	-c $c_def.sheet
	#end if
	#if str($c_def.i_def) == "usedef"
	-s $c_def.deffile
	#end if
	-n $c_norm.normalization
	-q $fdr
	-p $det_p
	-t $s_thresh
	#if str($i_report) == "create"
	-e
	#echo $o_qc#
	#end if
	#if $c_norm.normalization == "fn"
	#echo $c_norm.bgnoob#
	#echo $c_norm.dyenoob#
	#end if
	#if $c_norm.normalization == "noob"
	#echo $c_norm.dyenoob#
	#end if
	#if $c_norm.normalization == "quantile"
	#echo $c_norm.fixoutliers#
	#echo $c_norm.removesamples#
	-l $c_norm.samplecutoff
	#end if
	#if str($regions) != "None"
	#set $r=str($regions).split(",")
	#for $region in $r
	#set global $reg=$region
	-r ${ filter( lambda x: str( x[0] ) == str( $reg ), $__app__.tool_data_tables[ 'admire_regions' ].get_fields() )[0][-1] }
	#end for
	#end if
	#for $custom in $customs
	-r $custom.cr
	#end for
	#if str($genesets) != "None"
	#set $g=str($genesets).split(",")
	#for $geneset in $g
	#set global $gs=$geneset
	-g ${ filter( lambda x: str( x[0] ) == str( $gs ), $__app__.tool_data_tables[ 'admire_genesets' ].get_fields() )[0][-1] }
	#end for
	#end if
	#for $cugs in $cu_genesets
	-g $cugs.cu
	#end for
	-i $n_images
	-o $o_compressed > $o_log
	</command>
	<inputs>
	<conditional name="c_def">
	<param name="i_def" type="select" label="Choose mode for input files" help="ADMIRE can operate on a SampleSheet.csv together with compressed IDAT files or on a tabular sample definition file containing paths to single, uncompressed IDAT files.">
	<option value="nodef" selected="true">Operate on compressed IDAT files and a SampleSheet.csv</option>
	<option value="usedef">Operate on a tabular sample definition file (see documentation)</option>
	</param>
	<when value="nodef">
	<param name="idat" type="text" label="Filename of compressed IDAT files" help="Upload compressed IDAT scanner files into your private FTP directory at ftp://bioinformatics.mpi-bn.mpg.de and provide the filename here. ADMIRE can handle .zip, .tar.gz, .tgz, .tar.bz2, .tbz2 and .tar.bzip2 compressed files." />
	<param name="sheet" type="data" format="txt,csv" label="Sample Sheet from Illumna iScan/HiScan system" help="Provide the CSV file called SampleSheet.csv from the Illumina Scanner."/>
	</when>
	<when value="usedef">
	<param name="deffile" type="data" format="tabular" label="ADMIRE example tabular definition file" help="Use a tabular file with the following columns: sample_id, file, channel, sample_group. See ADMIRE documentation for further information on how to try the examples." />
	</when>
	</conditional>
	<param name="i_report" type="boolean" checked="true" truevalue="create" falsevalue="no" label="Include quality control report in output" help="Quality control might help to identidy failed samples." />
	<conditional name="c_norm">
	<param name="normalization" type="select" label="Select a method for input normalization" help="Normalization helps to reduce technical variation between arrays by taking internal controls into account.">
	<option value="fn" selected="true">Functional normalization</option>
	<option value="noob">Noob normalization</option>
	<option value="swan">SWAN normalization</option>
	<option value="quantile">Quantile normalization</option>
	<option value="illumina">Illumina Genome Studio normalization</option>
	<option value="raw">No normalization - use raw values</option>
	</param>
	<when value="fn">
	<param name="bgnoob" type="select" label="Use noob background correction prior to functional normalization">
	<option value="" selected="true">Yes</option>
	<option value="-b">No</option>
	</param>
	<param name="dyenoob" type="select" label="Use noob dye correction for functional normalization">
	<option value="" selected="true">Yes</option>
	<option value="-d">No</option>
	</param>
	</when>
	<when value="noob">
	<param name="dyenoob" type="select" label="Use dye correction for noob normalization">
	<option value="" selected="true">Yes</option>
	<option value="-d">No</option>
	</param>
	</when>
	<when value="quantile">
	<param name="fixoutliers" type="select" label="Fix low signal outliers">
	<option value="" selected="true">Yes</option>
	<option value="-f">No</option>
	</param>
	<param name="samplecutoff" type="float" min="0" value="10.5" label="Bad sample cutoff" help="Label samples as bad if their median signals are below the given value." />
	<param name="removesamples" type="select" label="Remove samples labelled as bad">
	<option value="" selected="true">No</option>
	<option value="-m">Yes</option>
	</param>
	</when>
	</conditional>
	<param name="det_p" label="Detection p-value threshold for failed probe identification" type="float" min="0" max="1" value="0.01" help="Mark a probes as failed, if it has a detection p-value higher than the given value. Failed probes can be excluded from subsequent analysis using the failed sample threshold (see below)." />
	<param name="s_thresh" label="Failed sample threshold" type="float" min="0" max="1" value="0.4" help="Probes are excluded from subsequent analysis if the proportion of failed probes across all samples is higher than the given value." />
	<param name="fdr" label="Q-value cutoff for multiple testing" type="float" min="0" max="1" value="0.05" help="Definde a false discovery rate to limit results after multiple testing correction." />
	<param name="n_images" label="Number of additional plots for n best regions" type="integer" value="20" max="50" min="0" help="Prints n bubble plots for the best (i.e. most significant) regions into the visualization directory." />
	<param name="regions" type="select" label="Select genomic regions to test" display="checkboxes" multiple="true" help="Regions will be overlapped with GC probes and significant different methylated regions will be reported.">
	<options from_data_table="admire_regions" />
	</param>
	<repeat name="customs" title="custom genomic region">
	<param name="cr" type="data" format="bed" label="Select region" help="Please provide a bed file with hg19 coordinates"/>
	</repeat>
	<param name="genesets" type="select" label="Choose gene sets" help="Select (multiple) gene sets that should be used in gene set enrichment analysis" multiple="true">
	<options from_data_table="admire_genesets" />
	</param>
	<repeat name="cu_genesets" title="custom gene set">
	<param name="cu" type="data" format="txt" label="Select gene set" help="Please provide a file with a custom gene set"/>
	</repeat>
	</inputs>
	<outputs>
	<data name="o_log" format="txt" label="admire output" />
	<data name="o_compressed" format="tgz" label="compressed output" />
	<data name="o_qc" format="pdf" label="Quality control report">
	<filter>i_report==True</filter>
	</data>
	</outputs>

	<help>
	.. class:: infomark

	Extensive documentation

	We have an extensive documentation, together with step by step instructions on how to use the web service available at http://admire.readthedocs.org/

	----

	.. class:: infomark

	Input parameters:

	Select a method for input normalization:

	Normalization is used to remove unwanted variation and normalize between arrays. You can choose from five different normalization methods:

	1. Functional normalization
	2. Noob normalization
	3. SWAN normalization
	4. Quantile normalization
	5. Illumina Genome Studio normalization

	Additionaly, you can skip the normalization step by selecting No normalization - use raw values.

	Detection p-value threshold for failed probe identification:

	Every probe on the array has a detection p-value assigned, which indicates confidence of the scanner that the detection was correct. A probe will be marked as failed in a sample if its detection p-value is higher that the given value.
	Together with the

	Failed sample threshold:

	You can subsequently exclude probes that have a certain proportion of failed marks across all samples.

	Q-value cutoff for multiple testing:

	Multiple testing is done automatically by ADMIRE and corrects the test statistic for multiple performed tests. If a certain region remains with a Q-value or FDR below the given value, it will be retained for subsequent analysis, like the gene set enrichment analysis or visualizations.

	Number of additional plots for n best regions:

	The number given here will determine how many visualizations are plotted for significant regions. Importantly, if the number of samples is higher than 100, a heatmap is created. Otherwise, non-proportional bubble plots are plotted.

	Select genomic regions to test:

	Regions selected here will be overlapped with methylation probes and significant different methylated regions will be reported.

	By uploading bed files to the work space (right panel), users can give custom regions by adding them to the custom genomic regions list.

	Choose gene sets:

	If a gene set is given (either by selecting pre-defined gene sets or uploading a custom gene set), a gene set enrichment analysis is performed by taking all significantly different methylathed regions that are annotated with a gene name and testing them for enrichment in a gene set.
	Regions with annotated gene names are Promoter Regions (2kB) and Exons.

	----

	.. class:: infomark

	Output files:

	compressed output:

	The complete output of admire compressed into a tar.gz file. This file is intended for download and extraction on your local hard drive. When extracted, it contains

	* Files in the excel subdirectory: This subdirectory contains csv files for each combination of sample group comparison (e.g. case-vs-control) and genomic region (e.g. promoters), with information about the genomic feature, its genomic location as well as p- and q-values of the sample groups. In addition to a file with all regions passing the q-value threshold, a file containing all regions is also present.
	* Files in the visualization subdirectory: This subdirectory contains files for visualization with IGV. General files, like the genomic location of all Illumina probes, as well as the genomics regions used during analysis, are located in the `annotation-tracks` subfolder. Data specific files are located in the `data-tracks` folder. Here, you can find information per sample-group comparison (e.g. case-vs-control), information on significantly altered probe methylation (`control-case.igv`), as well as significantly altered genomic regions in [BED format](http://genome.ucsc.edu/FAQ/FAQformat.html#format1). Additionally, publication-ready images are stored in region-specific subdirectories.

	Quality control report

	A PDF file that can be used to check the per sample quality. This file is generated by the R package minfi's quality control routines.

	admire output:

	This file gives information on the admire run. In case of errors, you can find them here.

	----

	.. class:: infomark

	Use the example data sets provided by ADMIRE

	Browse to the shared data library_ to load an example sample definition file into galaxy. The sample definition file contains file paths for the IDAT files from the GEO experiment E-GEOD-62727 that is stored on our server. Next, choose the ADMIRE tool and use the experiment_design.txt as input into the pipeline. Choose some genomic regions (e.g. 5kbp and 10kbp tiling regions to reproduce our findings from the paper) and hit Execute to see the tool in action.

	Step by step instructions

	Graphical step by step instructions on how to use the example data sets can be found at http://admire.readthedocs.org/en/latest/galaxy-example/

	.. _format: http://genome.ucsc.edu/FAQ/FAQformat.html#format1
	.. _library: https://bioinformatics.mpi-bn.mpg.de/library
	</help>
	</tool>