admire.xml

<tool id="admire" name="admire">
	<requirements>
		<requirement type="package">anaconda</requirement>
		<requirement type="package">bedtools</requirement>
		<requirement type="package">r</requirement>
		<requirement type="package">comb-p</requirement>
	</requirements>
	<description>methylation analysis</description>
	<command>
	/home/galaxy/galaxy-dist/tools/admire/src/admire
	#if str($c_def.i_def) == "nodef"
	-z /home/galaxy/galaxy-dist/database/files/ftp/$__user_email__/$c_def.idat
	-c $c_def.sheet
	#end if
	#if str($c_def.i_def) == "usedef"
	-s $c_def.deffile
	#end if
	-n $c_norm.normalization
	-q $fdr
	-p $det_p
	-t $s_thresh
	#if str($i_report) == "create"
	-e
	#echo $o_qc#
	#end if
	#if $c_norm.normalization == "fn"
	#echo $c_norm.bgnoob#
	#echo $c_norm.dyenoob#
	#end if
	#if $c_norm.normalization == "noob"
        #echo $c_norm.dyenoob#
        #end if
	#if $c_norm.normalization == "quantile"
        #echo $c_norm.fixoutliers#
	#echo $c_norm.removesamples#
	-l $c_norm.samplecutoff
        #end if
	#if str($regions) != "None"
	#set $r=str($regions).split(",")
        #for $region in $r
        #set global $reg=$region
        -r ${ filter( lambda x: str( x[0] ) == str( $reg ), $__app__.tool_data_tables[ 'admire_regions' ].get_fields() )[0][-1] }
        #end for
        #end if
	#for $custom in $customs
	-r $custom.cr
	#end for
	-o $o_compressed > $o_log
	</command>
	<inputs>
		<conditional name="c_def">
			<param name="i_def" type="select" label="Choose mode for input files" help="ADMIRE can operate on a SampleSheet.csv together with compressed IDAT files or on a tabular sample definition file containing paths to single, uncompressed IDAT files.">
				<option value="nodef" selected="true">Operate on compressed IDAT files and a SampleSheet.csv</option>
				<option value="usedef">Operate on a tabular sample definition file (see documentation)</option>
			</param>
			<when value="nodef">
				<param name="idat" type="text" label="Filename of compressed IDAT files" help="Upload compressed IDAT scanner files into your private FTP directory at ftp://bioinformatics.mpi-bn.mpg.de and provide the filename here." />
				<param name="sheet" type="data" format="txt,csv" label="Sample Sheet from Illumna iScan/HiScan system" help="Provide the CSV file called SampleSheet.csv from the Illumina Scanner."/>
			</when>
			<when value="usedef">
				<param name="deffile" type="data" format="tabular" label="Tabular sample definition file" help="Use a tabular file with the following columns: sample_id, file, channel, sample_group. See ADMIRE documentation for further information." />
			</when>
		</conditional>
		<param name="i_report" type="boolean" checked="true" truevalue="create" falsevalue="no" label="Include quality control report in output" help="Quality control might help to identidy failed samples." />
		<conditional name="c_norm">
			<param name="normalization" type="select" label="Select a method for input normalization" help="Normalization helps to reduce technical variation between arrays by taking internal controls into account.">
				<option value="fn" selected="true">Functional normalization</option>
				<option value="noob">Noob normalization</option>
				<option value="swan">SWAN normalization</option>
				<option value="quantile">Quantile normalization</option>
				<option value="illumina">Illumina Genome Studio normalization</option>
				<option value="raw">No normalization - use raw values</option>
			</param>
			<when value="fn">
				<param name="bgnoob" type="select" label="Use noob background correction prior to functional normalization">
                                	<option value="" selected="true">Yes</option>
                                	<option value="-b">No</option>
                        	</param>
				<param name="dyenoob" type="select" label="Use noob dye correction for functional normalization">
                                        <option value="" selected="true">Yes</option>
                                        <option value="-d">No</option>
                                </param>
			</when>
			<when value="noob">
				<param name="dyenoob" type="select" label="Use dye correction for noob normalization">
                                        <option value="" selected="true">Yes</option>
                                        <option value="-d">No</option>
                                </param>
			</when>
			<when value="quantile">
				<param name="fixoutliers" type="select" label="Fix low signal outliers">
					<option value="" selected="true">Yes</option>
					<option value="-f">No</option>
				</param>
				<param name="samplecutoff" type="float" min="0" value="10.5" label="Bad sample cutoff" help="Label samples as bad if their median signals are below the given value." />
				<param name="removesamples" type="select" label="Remove samples labelled as bad">
					<option value="" selected="true">No</option>
					<option value="-m">Yes</option>
				</param>
			</when>
		</conditional>
		<param name="det_p" label="Detection p-value threshold for failed probe identification" type="float" min="0" max="1" value="0.01" help="Mark a probes as failed, if it has a detection p-value higher than the given value. Failed probes can be excluded from subsequent analysis using the failed sample threshold (see below)." />
		<param name="s_thresh" label="Failed sample threshold" type="float" min="0" max="1" value="0.4" help="Probes are excluded from subsequent analysis if the proportion of failed probes across all samples is higher than the given value." />
		<param name="fdr" label="Q-value cutoff for multiple testing" type="float" min="0" max="1" value="0.05" help="Definde a false discovery rate to limit results after multiple testing correction." />
		<param name="regions" type="select" label="Select genomic regions to test" display="checkboxes" multiple="true" help="Regions will be overlapped with GC probes and significant different methylated regions will be reported.">
			<options from_data_table="admire_regions" />
		</param>
		<repeat name="customs" title="custom genomic region">
			<param name="cr" type="data" format="bed" label="Select region" help="Please provide a bed file with hg19 coordinates"/>
		</repeat>
		<param name="genesets" type="select" label="Choose gene sets" help="Select (multiple) gene sets that should be used in gene set enrichment analysis" multiple="true">
			<options from_data_table="admire_genesets" />
		</param>
		<repeat name="cu_genesets" title="custom gene set">
			<param name="cu" type="data" format="txt" label="Select gene set" help="Please provide a file with a custom gene set"/>
		</repeat>
	</inputs>
	<outputs>
		<data name="o_log" format="txt" label="admire output" />
		<data name="o_compressed" format="tgz" label="compressed output" />
		<data name="o_qc" format="pdf" label="Quality control report">
			<filter>i_report==True</filter>
		</data>
	</outputs>

	<help>
.. class:: infomark

**Helpful tips**

* Use FTP server at ftp://bioinformatics.mpi-bn.mpg.de to upload your compressed IDAT files.
* Use the Upload Tool to upload the SampleSheet.csv file to your history.
* Use the built-in genomic region files to work on.

----

.. class:: infomark

**Output files**:

**compressed output**:

The complete output of admire compressed into a tar.gz file. This file is intended for download and extraction on your local hard drive. When extracted, it contains

* an **excel subdirectory**: This subdirectory contains a csv file for each combination of sample group comparison (e.g. case-vs-control) and genomic region (e.g. promoters), with information about the genomic feature, its genomic location as well as p- and q-values of the sample groups.
* an **visualization subdirectory**: This subdirectory contains files for visualization with IGV. General files, like the genomic location of all Illumina probes, as well as the genomics regions used during analysis, are located in the *annotation-tracks* subfolder. Data specific files are located in the `data-tracks` folder. Here, you can find information per sample-group comparison (e.g. case-vs-control) information on significantly altered probe methylation (*control-case.igv*), as well as significantly altered genomic regions in BED format_.


**Quality control report**

A PDF file that can be used to check the per sample quality. This file is generated by the R package *minfi*.

**admire output**:

This file gives information on the admire run. In case of errors, you can find them here.

----

.. class:: infomark

Browse to the shared data library_ to load an example sample definition file into galaxy. The sample definition file contains file paths for the IDAT files from the GEO experiment E-GEOD-62727 that is stored on our server. Next, choose the ADMIRE tool and use the experiment_design.txt as input into the pipeline. Choose some genomic regions (e.g. 5kbp and 10kbp tiling regions to reproduce our findings from the paper) and hit Execute to see the tool in action.

.. _format: http://genome.ucsc.edu/FAQ/FAQformat.html#format1
.. _library: https://bioinformatics.mpi-bn.mpg.de/library
	</help>
</tool>
	<tool id="admire" name="admire">
	<requirements>
	<requirement type="package">anaconda</requirement>
	<requirement type="package">bedtools</requirement>
	<requirement type="package">r</requirement>
	<requirement type="package">comb-p</requirement>
	</requirements>
	<description>methylation analysis</description>
	<command>
	/home/galaxy/galaxy-dist/tools/admire/src/admire
	#if str($c_def.i_def) == "nodef"
	-z /home/galaxy/galaxy-dist/database/files/ftp/$__user_email__/$c_def.idat
	-c $c_def.sheet
	#end if
	#if str($c_def.i_def) == "usedef"
	-s $c_def.deffile
	#end if
	-n $c_norm.normalization
	-q $fdr
	-p $det_p
	-t $s_thresh
	#if str($i_report) == "create"
	-e
	#echo $o_qc#
	#end if
	#if $c_norm.normalization == "fn"
	#echo $c_norm.bgnoob#
	#echo $c_norm.dyenoob#
	#end if
	#if $c_norm.normalization == "noob"
	#echo $c_norm.dyenoob#
	#end if
	#if $c_norm.normalization == "quantile"
	#echo $c_norm.fixoutliers#
	#echo $c_norm.removesamples#
	-l $c_norm.samplecutoff
	#end if
	#if str($regions) != "None"
	#set $r=str($regions).split(",")
	#for $region in $r
	#set global $reg=$region
	-r ${ filter( lambda x: str( x[0] ) == str( $reg ), $__app__.tool_data_tables[ 'admire_regions' ].get_fields() )[0][-1] }
	#end for
	#end if
	#for $custom in $customs
	-r $custom.cr
	#end for
	-o $o_compressed > $o_log
	</command>
	<inputs>
	<conditional name="c_def">
	<param name="i_def" type="select" label="Choose mode for input files" help="ADMIRE can operate on a SampleSheet.csv together with compressed IDAT files or on a tabular sample definition file containing paths to single, uncompressed IDAT files.">
	<option value="nodef" selected="true">Operate on compressed IDAT files and a SampleSheet.csv</option>
	<option value="usedef">Operate on a tabular sample definition file (see documentation)</option>
	</param>
	<when value="nodef">
	<param name="idat" type="text" label="Filename of compressed IDAT files" help="Upload compressed IDAT scanner files into your private FTP directory at ftp://bioinformatics.mpi-bn.mpg.de and provide the filename here." />
	<param name="sheet" type="data" format="txt,csv" label="Sample Sheet from Illumna iScan/HiScan system" help="Provide the CSV file called SampleSheet.csv from the Illumina Scanner."/>
	</when>
	<when value="usedef">
	<param name="deffile" type="data" format="tabular" label="Tabular sample definition file" help="Use a tabular file with the following columns: sample_id, file, channel, sample_group. See ADMIRE documentation for further information." />
	</when>
	</conditional>
	<param name="i_report" type="boolean" checked="true" truevalue="create" falsevalue="no" label="Include quality control report in output" help="Quality control might help to identidy failed samples." />
	<conditional name="c_norm">
	<param name="normalization" type="select" label="Select a method for input normalization" help="Normalization helps to reduce technical variation between arrays by taking internal controls into account.">
	<option value="fn" selected="true">Functional normalization</option>
	<option value="noob">Noob normalization</option>
	<option value="swan">SWAN normalization</option>
	<option value="quantile">Quantile normalization</option>
	<option value="illumina">Illumina Genome Studio normalization</option>
	<option value="raw">No normalization - use raw values</option>
	</param>
	<when value="fn">
	<param name="bgnoob" type="select" label="Use noob background correction prior to functional normalization">
	<option value="" selected="true">Yes</option>
	<option value="-b">No</option>
	</param>
	<param name="dyenoob" type="select" label="Use noob dye correction for functional normalization">
	<option value="" selected="true">Yes</option>
	<option value="-d">No</option>
	</param>
	</when>
	<when value="noob">
	<param name="dyenoob" type="select" label="Use dye correction for noob normalization">
	<option value="" selected="true">Yes</option>
	<option value="-d">No</option>
	</param>
	</when>
	<when value="quantile">
	<param name="fixoutliers" type="select" label="Fix low signal outliers">
	<option value="" selected="true">Yes</option>
	<option value="-f">No</option>
	</param>
	<param name="samplecutoff" type="float" min="0" value="10.5" label="Bad sample cutoff" help="Label samples as bad if their median signals are below the given value." />
	<param name="removesamples" type="select" label="Remove samples labelled as bad">
	<option value="" selected="true">No</option>
	<option value="-m">Yes</option>
	</param>
	</when>
	</conditional>
	<param name="det_p" label="Detection p-value threshold for failed probe identification" type="float" min="0" max="1" value="0.01" help="Mark a probes as failed, if it has a detection p-value higher than the given value. Failed probes can be excluded from subsequent analysis using the failed sample threshold (see below)." />
	<param name="s_thresh" label="Failed sample threshold" type="float" min="0" max="1" value="0.4" help="Probes are excluded from subsequent analysis if the proportion of failed probes across all samples is higher than the given value." />
	<param name="fdr" label="Q-value cutoff for multiple testing" type="float" min="0" max="1" value="0.05" help="Definde a false discovery rate to limit results after multiple testing correction." />
	<param name="regions" type="select" label="Select genomic regions to test" display="checkboxes" multiple="true" help="Regions will be overlapped with GC probes and significant different methylated regions will be reported.">
	<options from_data_table="admire_regions" />
	</param>
	<repeat name="customs" title="custom genomic region">
	<param name="cr" type="data" format="bed" label="Select region" help="Please provide a bed file with hg19 coordinates"/>
	</repeat>
	<param name="genesets" type="select" label="Choose gene sets" help="Select (multiple) gene sets that should be used in gene set enrichment analysis" multiple="true">
	<options from_data_table="admire_genesets" />
	</param>
	<repeat name="cu_genesets" title="custom gene set">
	<param name="cu" type="data" format="txt" label="Select gene set" help="Please provide a file with a custom gene set"/>
	</repeat>
	</inputs>
	<outputs>
	<data name="o_log" format="txt" label="admire output" />
	<data name="o_compressed" format="tgz" label="compressed output" />
	<data name="o_qc" format="pdf" label="Quality control report">
	<filter>i_report==True</filter>
	</data>
	</outputs>

	<help>
	.. class:: infomark

	Helpful tips

	* Use FTP server at ftp://bioinformatics.mpi-bn.mpg.de to upload your compressed IDAT files.
	* Use the Upload Tool to upload the SampleSheet.csv file to your history.
	* Use the built-in genomic region files to work on.

	----

	.. class:: infomark

	Output files:

	compressed output:

	The complete output of admire compressed into a tar.gz file. This file is intended for download and extraction on your local hard drive. When extracted, it contains

	* an excel subdirectory: This subdirectory contains a csv file for each combination of sample group comparison (e.g. case-vs-control) and genomic region (e.g. promoters), with information about the genomic feature, its genomic location as well as p- and q-values of the sample groups.
	* an visualization subdirectory: This subdirectory contains files for visualization with IGV. General files, like the genomic location of all Illumina probes, as well as the genomics regions used during analysis, are located in the annotation-tracks subfolder. Data specific files are located in the `data-tracks` folder. Here, you can find information per sample-group comparison (e.g. case-vs-control) information on significantly altered probe methylation (control-case.igv), as well as significantly altered genomic regions in BED format_.


	Quality control report

	A PDF file that can be used to check the per sample quality. This file is generated by the R package minfi.

	admire output:

	This file gives information on the admire run. In case of errors, you can find them here.

	----

	.. class:: infomark

	Browse to the shared data library_ to load an example sample definition file into galaxy. The sample definition file contains file paths for the IDAT files from the GEO experiment E-GEOD-62727 that is stored on our server. Next, choose the ADMIRE tool and use the experiment_design.txt as input into the pipeline. Choose some genomic regions (e.g. 5kbp and 10kbp tiling regions to reproduce our findings from the paper) and hit Execute to see the tool in action.

	.. _format: http://genome.ucsc.edu/FAQ/FAQformat.html#format1
	.. _library: https://bioinformatics.mpi-bn.mpg.de/library
	</help>
	</tool>