Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Added scaffold for GSEA implementation
  • Loading branch information
jenzopr committed Jul 10, 2015
1 parent e5f7991 commit ff8c15d
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 12 deletions.
8 changes: 7 additions & 1 deletion admire.xml
Expand Up @@ -108,6 +108,12 @@
<repeat name="customs" title="custom genomic region">
<param name="cr" type="data" format="bed" label="Select region" help="Please provide a bed file with hg19 coordinates"/>
</repeat>
<param name="genesets" type="select" label="Choose gene sets" help="Select (multiple) gene sets that should be used in gene set enrichment analysis" multiple="true">
<options from_data_table="admire_genesets" />
</param>
<repeat name="cu_genesets" title="custom gene set">
<param name="cu" type="data" format="txt" label="Select gene set" help="Please provide a file with a custom gene set"/>
</repeat>
</inputs>
<outputs>
<data name="o_log" format="txt" label="admire output" />
Expand All @@ -124,7 +130,7 @@

* Use FTP server at ftp://bioinformatics.mpi-bn.mpg.de to upload your compressed IDAT files.
* Use the Upload Tool to upload the SampleSheet.csv file to your history.
* Use built-in genomic region files to work on.
* Use the built-in genomic region files to work on.

----

Expand Down
53 changes: 42 additions & 11 deletions src/admire
Expand Up @@ -56,11 +56,14 @@ O=output.tar.gz
# NUMBER OF ADDITIONAL IMAGES
I=25

# GENE SETS for GSEA
G=()

PVAL_COLS="7 8"
STEPSIZE=100
CHECK_Z=false

while getopts ":hvbdflm:q:s:c:r:p:t:n:z:o:e:i:" opt; do
while getopts ":hvbdflm:q:s:c:r:p:t:n:z:o:e:i:g:" opt; do
case $opt in
s) S=$OPTARG
;;
Expand Down Expand Up @@ -90,17 +93,21 @@ Available options:
-m In case of quantile normalization, remove bad samples
-q Q-value cutoff for multiple testing correction (0.05)
-i Render advanced plots for the best i regions (20)
-g Gene set file for enrichment analysis
Use multiple -g parameters to calculate enrichment across many gene sets
-o tar-gz compress output into file given
-h shows this help message.
-v shows version information.
Options -c and -s are mutually exclusive.
Dependencies: Rscript, bedtools, comb-p
Dependencies: Rscript, bedtools, comb-p, python
EOL
exit 1
;;
r) R+=("$OPTARG")
;;
g) G+=("$OPTARG")
;;
n)
if [[ "$OPTARG" == "fn" || "$OPTARG" == "noob" || "$OPTARG" == "illumina" || "$OPTARG" == "raw" || "$OPTARG" == "swan" || "$OPTARG" == "quantile" ]]; then
N=$OPTARG
Expand Down Expand Up @@ -142,7 +149,7 @@ fi
done
shift $(expr $OPTIND - 1 )

echo "Step 1 of 8: Evaluating provided files..."
echo "Step 1 of 9: Evaluating provided files..."

for d in $DEPENDENCIES; do
command -v $d >/dev/null 2>&1 || { echo "Dependency error: $d required, but it's not installed. Use admire -h to show help."; exit 1; }
Expand Down Expand Up @@ -174,7 +181,7 @@ done
# PART 2: PROVIDE NECCESSARY ENVIRONMENT
#
#----------------------------------------------------
echo "Step 2 of 8: Providing neccessary environment..."
echo "Step 2 of 9: Providing neccessary environment..."
# SANITY CHECK FOR compressed input
if [[ -f $Z && -f $C ]]; then
CSVUSE=true
Expand Down Expand Up @@ -333,7 +340,7 @@ done
# PART 3: NORMALIZE AND ANALYSE SINGLE PROBES
#
#----------------------------------------------------
echo "Step 3 of 8: Normalizing and analysing single probes..."
echo "Step 3 of 9: Normalizing and analysing single probes..."
# To speed up local usage, try to avoid normalization if possible
RESULTS=results
if [[ ! -d $RESULTS/$N ]]; then
Expand All @@ -348,7 +355,7 @@ fi
# PART 4: COMBINE INTO REGIONS
#
#----------------------------------------------------
echo "Step 4 of 8: Combining probes into regions..."
echo "Step 4 of 9: Combining probes into regions..."
COMBP=comb-p
mkdir -p $RESULTS/$N/$COMBP
for reg in "${R[@]}"; do
Expand Down Expand Up @@ -376,7 +383,7 @@ done
# PART 5: CREATE VISUALIZATIONS
#
#----------------------------------------------------
echo "Step 5 of 8: Creating visualizations..."
echo "Step 5 of 9: Creating visualizations..."
VISUALIZATION=visualization/data-tracks
set -- ${groups[@]}
for a; do
Expand Down Expand Up @@ -412,7 +419,7 @@ done
# PART 6: CREATE TABLES FOR EXCEL
#
#----------------------------------------------------
echo "Step 6 of 8: Providing tables for Excel..."
echo "Step 6 of 9: Providing tables for Excel..."
EXCEL=excel
mkdir -p $EXCEL
set -- ${groups[@]}
Expand All @@ -439,7 +446,9 @@ if($8 < $10){higherin=c[1];pval=$7;qval=$8}else{higherin=c[2];pval=$9;qval=$10};
print id,name,gid,type,$1,$2,$3,$6,$7,$8,$9,$10,higherin,qval,$14,$11,$12,$13;}' $RESULTS/$N/$r/$g.pvals.passed_fdr_$Q.abs.bed > $EXCEL/$g-$r.csv

# Begin preparation of additional image rendering
(head -n 1 $WORKDIR/normalized/betaValues_fn.txt && tail -n +2 $WORKDIR/normalized/betaValues_fn.txt | sort) > $WORKDIR/normalized/betaValues_fn.sorted.txt
if [[ ! -s $WORKDIR/normalized/betaValues_fn.sorted.txt ]]; then
(head -n 1 $WORKDIR/normalized/betaValues_fn.txt && tail -n +2 $WORKDIR/normalized/betaValues_fn.txt | sort) > $WORKDIR/normalized/betaValues_fn.sorted.txt
fi
sort -t',' -k14,14n $EXCEL/$g-$r.csv | cut -d',' -f5,6,7 | sed -e 's/,/\t/g' | tail -n +2 | head -n $I > $VISUALIZATION/$g/$r-target.bed
split -l 1 $VISUALIZATION/$g/$r-target.bed $VISUALIZATION/$g/$r-target-
for f in $VISUALIZATION/$g/$r-target-*; do
Expand All @@ -463,15 +472,37 @@ rm -rf $RESULTS/$N/*.sorted.bed
# PART 7: CREATE ADDITIONAL IMAGES
#
#----------------------------------------------------
echo "Step 7 of 8: Rendering additional images..."
echo "Step 7 of 9: Rendering additional images..."
ln -s $DIR/meth_plot.R meth_plot.R
Rscript $DIR/renderAdditionalImages.R >/dev/null 2>&1

#----------------------------------------------------
#
# PART 8: WRAP-UP, PROVIDE RESULTS
#
#----------------------------------------------------
echo "Step 8 of 9: Calculating enrichment across gene sets..."
GSEA=geneset_enrichment
mkdir -p $GSEA
set -- ${groups[@]}
for a; do
shift
for b; do
g=$a-$b
for reg in "${R[@]}"; do
r=`basename $reg .bed`

done
done
done


#----------------------------------------------------
#
# PART 8: WRAP-UP, PROVIDE RESULTS
#
#----------------------------------------------------
echo "Step 8 of 8: Wrapping up results..."
echo "Step 9 of 9: Wrapping up results..."
tar -zcvf $O visualization excel >/dev/null 2>&1

echo "Done. Your results are compressed into $O."

0 comments on commit ff8c15d

Please sign in to comment.