diff --git a/admire.xml b/admire.xml
index e4bb81c..839064e 100644
--- a/admire.xml
+++ b/admire.xml
@@ -108,6 +108,12 @@
+
+
+
+
+
+
@@ -124,7 +130,7 @@
* Use FTP server at ftp://bioinformatics.mpi-bn.mpg.de to upload your compressed IDAT files.
* Use the Upload Tool to upload the SampleSheet.csv file to your history.
-* Use built-in genomic region files to work on.
+* Use the built-in genomic region files to work on.
----
diff --git a/src/admire b/src/admire
index 1eced15..001eb7f 100755
--- a/src/admire
+++ b/src/admire
@@ -56,11 +56,14 @@ O=output.tar.gz
# NUMBER OF ADDITIONAL IMAGES
I=25
+# GENE SETS for GSEA
+G=()
+
PVAL_COLS="7 8"
STEPSIZE=100
CHECK_Z=false
-while getopts ":hvbdflm:q:s:c:r:p:t:n:z:o:e:i:" opt; do
+while getopts ":hvbdflm:q:s:c:r:p:t:n:z:o:e:i:g:" opt; do
case $opt in
s) S=$OPTARG
;;
@@ -90,17 +93,21 @@ Available options:
-m In case of quantile normalization, remove bad samples
-q Q-value cutoff for multiple testing correction (0.05)
-i Render advanced plots for the best i regions (20)
+ -g Gene set file for enrichment analysis
+ Use multiple -g parameters to calculate enrichment across many gene sets
-o tar-gz compress output into file given
-h shows this help message.
-v shows version information.
Options -c and -s are mutually exclusive.
-Dependencies: Rscript, bedtools, comb-p
+Dependencies: Rscript, bedtools, comb-p, python
EOL
exit 1
;;
r) R+=("$OPTARG")
;;
+ g) G+=("$OPTARG")
+ ;;
n)
if [[ "$OPTARG" == "fn" || "$OPTARG" == "noob" || "$OPTARG" == "illumina" || "$OPTARG" == "raw" || "$OPTARG" == "swan" || "$OPTARG" == "quantile" ]]; then
N=$OPTARG
@@ -142,7 +149,7 @@ fi
done
shift $(expr $OPTIND - 1 )
-echo "Step 1 of 8: Evaluating provided files..."
+echo "Step 1 of 9: Evaluating provided files..."
for d in $DEPENDENCIES; do
command -v $d >/dev/null 2>&1 || { echo "Dependency error: $d required, but it's not installed. Use admire -h to show help."; exit 1; }
@@ -174,7 +181,7 @@ done
# PART 2: PROVIDE NECCESSARY ENVIRONMENT
#
#----------------------------------------------------
-echo "Step 2 of 8: Providing neccessary environment..."
+echo "Step 2 of 9: Providing neccessary environment..."
# SANITY CHECK FOR compressed input
if [[ -f $Z && -f $C ]]; then
CSVUSE=true
@@ -333,7 +340,7 @@ done
# PART 3: NORMALIZE AND ANALYSE SINGLE PROBES
#
#----------------------------------------------------
-echo "Step 3 of 8: Normalizing and analysing single probes..."
+echo "Step 3 of 9: Normalizing and analysing single probes..."
# To speed up local usage, try to avoid normalization if possible
RESULTS=results
if [[ ! -d $RESULTS/$N ]]; then
@@ -348,7 +355,7 @@ fi
# PART 4: COMBINE INTO REGIONS
#
#----------------------------------------------------
-echo "Step 4 of 8: Combining probes into regions..."
+echo "Step 4 of 9: Combining probes into regions..."
COMBP=comb-p
mkdir -p $RESULTS/$N/$COMBP
for reg in "${R[@]}"; do
@@ -376,7 +383,7 @@ done
# PART 5: CREATE VISUALIZATIONS
#
#----------------------------------------------------
-echo "Step 5 of 8: Creating visualizations..."
+echo "Step 5 of 9: Creating visualizations..."
VISUALIZATION=visualization/data-tracks
set -- ${groups[@]}
for a; do
@@ -412,7 +419,7 @@ done
# PART 6: CREATE TABLES FOR EXCEL
#
#----------------------------------------------------
-echo "Step 6 of 8: Providing tables for Excel..."
+echo "Step 6 of 9: Providing tables for Excel..."
EXCEL=excel
mkdir -p $EXCEL
set -- ${groups[@]}
@@ -439,7 +446,9 @@ if($8 < $10){higherin=c[1];pval=$7;qval=$8}else{higherin=c[2];pval=$9;qval=$10};
print id,name,gid,type,$1,$2,$3,$6,$7,$8,$9,$10,higherin,qval,$14,$11,$12,$13;}' $RESULTS/$N/$r/$g.pvals.passed_fdr_$Q.abs.bed > $EXCEL/$g-$r.csv
# Begin preparation of additional image rendering
- (head -n 1 $WORKDIR/normalized/betaValues_fn.txt && tail -n +2 $WORKDIR/normalized/betaValues_fn.txt | sort) > $WORKDIR/normalized/betaValues_fn.sorted.txt
+ if [[ ! -s $WORKDIR/normalized/betaValues_fn.sorted.txt ]]; then
+ (head -n 1 $WORKDIR/normalized/betaValues_fn.txt && tail -n +2 $WORKDIR/normalized/betaValues_fn.txt | sort) > $WORKDIR/normalized/betaValues_fn.sorted.txt
+ fi
sort -t',' -k14,14n $EXCEL/$g-$r.csv | cut -d',' -f5,6,7 | sed -e 's/,/\t/g' | tail -n +2 | head -n $I > $VISUALIZATION/$g/$r-target.bed
split -l 1 $VISUALIZATION/$g/$r-target.bed $VISUALIZATION/$g/$r-target-
for f in $VISUALIZATION/$g/$r-target-*; do
@@ -463,15 +472,37 @@ rm -rf $RESULTS/$N/*.sorted.bed
# PART 7: CREATE ADDITIONAL IMAGES
#
#----------------------------------------------------
-echo "Step 7 of 8: Rendering additional images..."
+echo "Step 7 of 9: Rendering additional images..."
ln -s $DIR/meth_plot.R meth_plot.R
Rscript $DIR/renderAdditionalImages.R >/dev/null 2>&1
+
+#----------------------------------------------------
+#
+# PART 8: WRAP-UP, PROVIDE RESULTS
+#
+#----------------------------------------------------
+echo "Step 8 of 9: Calculating enrichment across gene sets..."
+GSEA=geneset_enrichment
+mkdir -p $GSEA
+set -- ${groups[@]}
+for a; do
+ shift
+ for b; do
+ g=$a-$b
+ for reg in "${R[@]}"; do
+ r=`basename $reg .bed`
+
+ done
+ done
+done
+
+
#----------------------------------------------------
#
# PART 8: WRAP-UP, PROVIDE RESULTS
#
#----------------------------------------------------
-echo "Step 8 of 8: Wrapping up results..."
+echo "Step 9 of 9: Wrapping up results..."
tar -zcvf $O visualization excel >/dev/null 2>&1
echo "Done. Your results are compressed into $O."