From ff8c15d18be5a5310875d21ddd592ddb50c727c0 Mon Sep 17 00:00:00 2001 From: jenzopr Date: Fri, 10 Jul 2015 14:18:12 +0200 Subject: [PATCH] Added scaffold for GSEA implementation --- admire.xml | 8 +++++++- src/admire | 53 ++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/admire.xml b/admire.xml index e4bb81c..839064e 100644 --- a/admire.xml +++ b/admire.xml @@ -108,6 +108,12 @@ + + + + + + @@ -124,7 +130,7 @@ * Use FTP server at ftp://bioinformatics.mpi-bn.mpg.de to upload your compressed IDAT files. * Use the Upload Tool to upload the SampleSheet.csv file to your history. -* Use built-in genomic region files to work on. +* Use the built-in genomic region files to work on. ---- diff --git a/src/admire b/src/admire index 1eced15..001eb7f 100755 --- a/src/admire +++ b/src/admire @@ -56,11 +56,14 @@ O=output.tar.gz # NUMBER OF ADDITIONAL IMAGES I=25 +# GENE SETS for GSEA +G=() + PVAL_COLS="7 8" STEPSIZE=100 CHECK_Z=false -while getopts ":hvbdflm:q:s:c:r:p:t:n:z:o:e:i:" opt; do +while getopts ":hvbdflm:q:s:c:r:p:t:n:z:o:e:i:g:" opt; do case $opt in s) S=$OPTARG ;; @@ -90,17 +93,21 @@ Available options: -m In case of quantile normalization, remove bad samples -q Q-value cutoff for multiple testing correction (0.05) -i Render advanced plots for the best i regions (20) + -g Gene set file for enrichment analysis + Use multiple -g parameters to calculate enrichment across many gene sets -o tar-gz compress output into file given -h shows this help message. -v shows version information. Options -c and -s are mutually exclusive. -Dependencies: Rscript, bedtools, comb-p +Dependencies: Rscript, bedtools, comb-p, python EOL exit 1 ;; r) R+=("$OPTARG") ;; + g) G+=("$OPTARG") + ;; n) if [[ "$OPTARG" == "fn" || "$OPTARG" == "noob" || "$OPTARG" == "illumina" || "$OPTARG" == "raw" || "$OPTARG" == "swan" || "$OPTARG" == "quantile" ]]; then N=$OPTARG @@ -142,7 +149,7 @@ fi done shift $(expr $OPTIND - 1 ) -echo "Step 1 of 8: Evaluating provided files..." +echo "Step 1 of 9: Evaluating provided files..." for d in $DEPENDENCIES; do command -v $d >/dev/null 2>&1 || { echo "Dependency error: $d required, but it's not installed. Use admire -h to show help."; exit 1; } @@ -174,7 +181,7 @@ done # PART 2: PROVIDE NECCESSARY ENVIRONMENT # #---------------------------------------------------- -echo "Step 2 of 8: Providing neccessary environment..." +echo "Step 2 of 9: Providing neccessary environment..." # SANITY CHECK FOR compressed input if [[ -f $Z && -f $C ]]; then CSVUSE=true @@ -333,7 +340,7 @@ done # PART 3: NORMALIZE AND ANALYSE SINGLE PROBES # #---------------------------------------------------- -echo "Step 3 of 8: Normalizing and analysing single probes..." +echo "Step 3 of 9: Normalizing and analysing single probes..." # To speed up local usage, try to avoid normalization if possible RESULTS=results if [[ ! -d $RESULTS/$N ]]; then @@ -348,7 +355,7 @@ fi # PART 4: COMBINE INTO REGIONS # #---------------------------------------------------- -echo "Step 4 of 8: Combining probes into regions..." +echo "Step 4 of 9: Combining probes into regions..." COMBP=comb-p mkdir -p $RESULTS/$N/$COMBP for reg in "${R[@]}"; do @@ -376,7 +383,7 @@ done # PART 5: CREATE VISUALIZATIONS # #---------------------------------------------------- -echo "Step 5 of 8: Creating visualizations..." +echo "Step 5 of 9: Creating visualizations..." VISUALIZATION=visualization/data-tracks set -- ${groups[@]} for a; do @@ -412,7 +419,7 @@ done # PART 6: CREATE TABLES FOR EXCEL # #---------------------------------------------------- -echo "Step 6 of 8: Providing tables for Excel..." +echo "Step 6 of 9: Providing tables for Excel..." EXCEL=excel mkdir -p $EXCEL set -- ${groups[@]} @@ -439,7 +446,9 @@ if($8 < $10){higherin=c[1];pval=$7;qval=$8}else{higherin=c[2];pval=$9;qval=$10}; print id,name,gid,type,$1,$2,$3,$6,$7,$8,$9,$10,higherin,qval,$14,$11,$12,$13;}' $RESULTS/$N/$r/$g.pvals.passed_fdr_$Q.abs.bed > $EXCEL/$g-$r.csv # Begin preparation of additional image rendering - (head -n 1 $WORKDIR/normalized/betaValues_fn.txt && tail -n +2 $WORKDIR/normalized/betaValues_fn.txt | sort) > $WORKDIR/normalized/betaValues_fn.sorted.txt + if [[ ! -s $WORKDIR/normalized/betaValues_fn.sorted.txt ]]; then + (head -n 1 $WORKDIR/normalized/betaValues_fn.txt && tail -n +2 $WORKDIR/normalized/betaValues_fn.txt | sort) > $WORKDIR/normalized/betaValues_fn.sorted.txt + fi sort -t',' -k14,14n $EXCEL/$g-$r.csv | cut -d',' -f5,6,7 | sed -e 's/,/\t/g' | tail -n +2 | head -n $I > $VISUALIZATION/$g/$r-target.bed split -l 1 $VISUALIZATION/$g/$r-target.bed $VISUALIZATION/$g/$r-target- for f in $VISUALIZATION/$g/$r-target-*; do @@ -463,15 +472,37 @@ rm -rf $RESULTS/$N/*.sorted.bed # PART 7: CREATE ADDITIONAL IMAGES # #---------------------------------------------------- -echo "Step 7 of 8: Rendering additional images..." +echo "Step 7 of 9: Rendering additional images..." ln -s $DIR/meth_plot.R meth_plot.R Rscript $DIR/renderAdditionalImages.R >/dev/null 2>&1 + +#---------------------------------------------------- +# +# PART 8: WRAP-UP, PROVIDE RESULTS +# +#---------------------------------------------------- +echo "Step 8 of 9: Calculating enrichment across gene sets..." +GSEA=geneset_enrichment +mkdir -p $GSEA +set -- ${groups[@]} +for a; do + shift + for b; do + g=$a-$b + for reg in "${R[@]}"; do + r=`basename $reg .bed` + + done + done +done + + #---------------------------------------------------- # # PART 8: WRAP-UP, PROVIDE RESULTS # #---------------------------------------------------- -echo "Step 8 of 8: Wrapping up results..." +echo "Step 9 of 9: Wrapping up results..." tar -zcvf $O visualization excel >/dev/null 2>&1 echo "Done. Your results are compressed into $O."