From 151c535ca86d3fc86553e2f4ad6a2013d7e98f83 Mon Sep 17 00:00:00 2001 From: mkondili Date: Sun, 29 Jan 2017 19:04:57 +0100 Subject: [PATCH 01/27] htslib and python-Anaconda instruct. analytically --- README.md | 49 ++++++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index a2951c0..513ba5a 100644 --- a/README.md +++ b/README.md @@ -35,8 +35,18 @@ Installation and Command-line usage Make sure all prerequisites are met: - [R/Rscript](http://www.r-project.org/) (v3.3.0 or higher, follow instructions on url) -- [Python](http://continuum.io/downloads) (v2.7.8-anaconda-2.1.0, install with ```bash Anaconda2-4.2.0-Linux-x86_64.sh``` and ```PATH=dir/to/python_anaconda:$PATH```) -- [htslib](http://www.htslib.org/download/) (1.3.2 or higher, follow instructions on url) + +- [Python](https://repo.continuum.io/archive/) (v2.7.8-anaconda-2.1.0) + * Choose Anaconda-2.1.0-Linux-x86 or -x86_64, + * Install with ```bash Anaconda2-4.2.0-Linux-x86_64.sh```, + * Follow instructions and type 'yes' to prepend Anaconda to PATH in your .bashrc file, + * or type after installation: ``` $export PATH=/home//anaconda/bin:$PATH ``` + +- [htslib](https://github.com/samtools/htslib/blob/develop/INSTALL) (1.3.2 or higher, follow instructions on url) + * ```cd htslib-1.3.2/``` + * ```./configure``` + * ```make``` + * ```make install ``` or ```sudo make prefix=/home//htslib install``` Install required packages for R: @@ -44,42 +54,43 @@ Install required packages for R: install.packages(c("ggplot2", "devtools", "gplots", "gridExtra", "jsonlite", "VennDiagram")) source("https://bioconductor.org/biocLite.R") biocLite(c("RBGL","graph")) -# to install the last required package, devtools has to be loaded to use the install from github function + +# To install the last required package-Vennerable, devtools has to be loaded to use the install from github function # if you copy this and want to use it in once, # make sure the library loading is not interrupted by the question if present packages should be updated + library(devtools) install_github("jenzopr/Vennerable") ``` - -### Install UROPA locally +Install UROPA locally by running: ```bash git clone https://github.molgen.mpg.de/loosolab/UROPA.git -export PATH=$PATH:dir/to/uropa +export PATH=$PATH:dir/to/uropa/uropa.py ``` -```bash -Usage: uropa [options] +```python +Usage: uropa.py [options] Available options: - -h, --help print this help message and further details on the configuration file - -i, --input filename of configuration file - -o, --output directory for results and prefix of the output file name - -r, --reformat create an additional compact and line-reduced table as result file - -s, --summary filename of additional visualisation of results in graphical format - -t n, --threads n multiprocessed run: n = number of threads to run annotation process - -no-comment do not show comment lines in output files - -l, --log log file name for messages and warnings - -d, --debug print verbose messages (for debugging purposes) - -v, --version print the version and exit + - h, --help print this help message and further details on the configuration file + - i, --input filename of configuration file + - o, --output directory for results and prefix of the output file name + - r, --reformat create an additional compact and line-reduced table as result file + - s, --summary filename of additional visualisation of results in graphical format + - t n, --threads n multiprocessed run: n = number of threads to run annotation process + --no-comment do not show comment lines in output files + - l, --log log file name for messages and warnings + - d, --debug print verbose messages (for debugging purposes) + - v, --version print the version and exit ``` How to cite ----------- -Kondili M, Fust A, Preussner J, Kuenne C, Braun T, and Looso M, UROPA: a tool for Universal RObust Peak Annotation. *(in preparation)* +Kondili M, Fust A, Preussner J, Kuenne C and Looso M. UROPA: a tool for Universal RObust Peak Annotation. *(in preparation)* Contribute ---------- From fe230b489bfa6038cb3312ff03b27b194bef8458 Mon Sep 17 00:00:00 2001 From: mkondili Date: Sun, 29 Jan 2017 19:11:27 +0100 Subject: [PATCH 02/27] Added logger to parse_parameters call --- uropa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uropa.py b/uropa.py index 9c82d06..051ca02 100755 --- a/uropa.py +++ b/uropa.py @@ -143,7 +143,7 @@ logger.error("File %s contains malformed JSON. %s", config, e) sys.exit() - parameters = cfg.parse_parameters(cfg_dict, log=None) # ,logger + parameters = cfg.parse_parameters(cfg_dict, logger) priority = parameters["priority"] annot_gtf = parameters["gtf"] peaks_bed = parameters["bed"] From 00a17c8e2ee640c20345faccf82e13cc831a3cf1 Mon Sep 17 00:00:00 2001 From: mkondili Date: Sun, 29 Jan 2017 19:13:19 +0100 Subject: [PATCH 03/27] logpath=cwd when no dir given --- uropa.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/uropa.py b/uropa.py index 051ca02..224c53e 100755 --- a/uropa.py +++ b/uropa.py @@ -117,6 +117,8 @@ if args.log is not None: logpath = os.path.dirname(args.log) + # when no dir given, consider current dir to save it + logpath = os.getcwd() if logpath =="" or logpath=="." else logpath if not os.path.exists(logpath): try: os.makedirs(logpath) From 5bbd2688809bc95a1460d7b469fe35acaadc996a Mon Sep 17 00:00:00 2001 From: mkondili Date: Sun, 29 Jan 2017 19:17:42 +0100 Subject: [PATCH 04/27] if name =main() :in end of script to call def "main" --- uropa.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/uropa.py b/uropa.py index 224c53e..df26e5a 100755 --- a/uropa.py +++ b/uropa.py @@ -29,7 +29,7 @@ import uropa.overlaps as ovls import uropa.annotation as ant -if __name__ == "__main__": +def main(): parser = argparse.ArgumentParser( prog="uropa.py", @@ -288,9 +288,9 @@ logger.warning( "Split command not available. Falling back to one thread.") -# -# Processing peaks -# + # + # Processing peaks + # input_args = [outdir, gtf_index, query_attributes, queries, max_distance, pr, gtf_has_chr] # Output FileNames common for any thread option @@ -458,3 +458,8 @@ os.remove(gtf_cut_file + ".sorted") logger.info("End time: %s", datetime.datetime.now().strftime("%d.%m.%Y %H:%M")) + + +#Call main to run uropa: +if __name__ == '__main__': + main() From 8cfeb5e1caf876bf25289a380a4b038bb6c7dd6f Mon Sep 17 00:00:00 2001 From: mkondili Date: Sun, 29 Jan 2017 19:24:13 +0100 Subject: [PATCH 05/27] Tabix*f*ile(not F) & anno.fetch without "multiple_iterators" --- uropa/annotation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uropa/annotation.py b/uropa/annotation.py index 9469cac..923d228 100644 --- a/uropa/annotation.py +++ b/uropa/annotation.py @@ -12,7 +12,7 @@ def annotation_process(input_args, peak_file, log=None): priority, gtf_has_chr] = input_args try: - anno = pysam.TabixFile(gtf_index) + anno = pysam.Tabixfile(gtf_index) except IOError: log.error("Unable to open tabix index {}.".format(gtf_index)) except ValueError: @@ -64,7 +64,7 @@ def annotation_process(input_args, peak_file, log=None): str(peak['estart']) + '-' + str(peak['eend']) try: - hits = anno.fetch(tabix_query, multiple_iterators=True) + hits = anno.fetch(tabix_query) except ValueError: hits = list() if log is not None: From 2c757db7a30acc55c8c0496375241e66f93cf3b4 Mon Sep 17 00:00:00 2001 From: mkondili Date: Sun, 29 Jan 2017 19:37:15 +0100 Subject: [PATCH 06/27] remove log.debug msgs,shrink comments. --- uropa/annotation.py | 58 +++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/uropa/annotation.py b/uropa/annotation.py index 923d228..8969bec 100644 --- a/uropa/annotation.py +++ b/uropa/annotation.py @@ -131,11 +131,10 @@ def annotation_process(input_args, peak_file, log=None): if not valid_queries: has_hits.append(False) - # ----- All hits parsed. Check only hits with valid-queries now ----- # - #log.debug("\n---All hits parsed for the peak.Totally, the Peak has {} hits from which {} are Valid ---".format( len(has_hits), has_hits.count(True) )) - # 9 cols= feat, f.start, f.end, f.strand, dist, min_pos, genom_loc, - # feat-to-peak-ovl , peak-to-feat-ovl (no Q) + # All hits parsed. Check only hits with valid-queries now.. nas_len = len(attrib_k) + 9 + # 9 cols= feat, f.start, f.end, f.strand, dist, min_pos, genom_loc, ++ # feat-to-peak-ovl , peak-to-feat-ovl (no Q) NAsList_q = list(np.repeat("NA", nas_len)) # Search hits with only Prior or Secondary Queries @@ -148,18 +147,18 @@ def annotation_process(input_args, peak_file, log=None): if queries[j]["direction"] != ['any_direction']: correct_dir = ovls.define_direction(queries[j]["direction"], int(hit[3]), int( hit[4]), strand, peak['length'], int(peak['start']), int(peak['end']), peak['center']) - #logger.debug("\nFor the hit {}-{}, direction is correct : {}".format(hit[3],hit[4] ,correct_dir)) + if correct_dir: # IF direction of hit is the desired, then filter - # for Distance + # for Distance (Dhit) min_pos, Dhit = ovls.distance_to_peak_center(peak['center'], int( hit[3]), int(hit[4]), strand, queries[j]["feature.anchor"]) - #logger.debug("\nClosest Distance from the peak center : {} ".format(Dhit)) + if not correct_dir and Best_hits_tab[j][peak['id']] == "" and All_hits_tab[j][peak['id']] == "": # hit with Incorrect Dir: move to new peak, fill in NAs if peak is empty - #log.debug("\nThe hit doesn't have the correct direction -> NAs in All & Best hits table -> Continue to New hit.") + #The hit doesn't have the correct direction,All & Best hits = NA All_hits_tab[j][peak['id']] = "\t".join(np.hstack([peak['name'], peak['chr'], peak[ 'start'], peak['center'], peak['end'], NAsList_q, str(j) + "\n"])) Best_hits_tab[j][peak['id']] = "\t".join(np.hstack([peak['name'], peak['chr'], peak[ @@ -173,7 +172,7 @@ def annotation_process(input_args, peak_file, log=None): elif queries[j]['direction'] == ['any_direction']: min_pos, Dhit = ovls.distance_to_peak_center(peak['center'], int( hit[3]), int(hit[4]), strand, queries[j]["feature.anchor"]) - #log.debug("\nClosest Distance(Dhit) when No Direction defined :{} ".format(Dhit)) + # > Check OVERLAPS of peak-hit, define genomic_location genomic_location = "not.specified" @@ -194,13 +193,13 @@ def annotation_process(input_args, peak_file, log=None): if len(queries[j]["distance"]) == 1: d_is_best = ovls.get_distance_by_dir([min_dist[j][peak['id']][0], min_dist[j][ peak['id']][0]], genomic_location, internals_location, Dhit) - - # print "Best distance found :{}".format(d_is_best) - if d_is_best and not same_gene: # Dhit < Dbest - #log.debug("\nDistance of Hit from Peak Center = {}, INFERIOR to current Min Distance = {} ".format(Dhit, min_dist[j][peak['id']][0])) + #Best distance found = d_is_best + + # Dhit < Dbest + if d_is_best and not same_gene: + #> Update the Min_distance min_dist[j][peak['id']] = ovls.get_besthit(j, len(queries[j]["distance"]), peak[ 'id'], hit, attrib_k, Dhit, min_dist) - #print("Minimum distance updated: {}".format(min_dist[j][peak['id']][0])) Best_res = ovls.create_table(peak['name'], peak['chr'], peak['start'], peak['end'], str( peak['center']), min_dist[j][peak['id']], attrib_k, min_pos, genomic_location, ovl_pk, ovl_feat, j) Best_hits_tab[j][peak['id']] = Best_res @@ -208,7 +207,7 @@ def annotation_process(input_args, peak_file, log=None): # Dhit > Dbest # only if empty fill in with NAs elif not d_is_best and not same_gene and Best_hits_tab[j][peak['id']] == "": - #log.debug("\nDistance of Hit from Peak Center = {}, LARGER THAN current Min Distance = {}\n".format(Dhit, min_dist[j][peak['id']][0])) + Best_hits_tab[j][peak['id']] = "\t".join(np.hstack([peak['name'], peak['chr'], peak[ 'start'], str(peak['center']), peak['end'], NAsList_q, str(j) + "\n"])) @@ -222,27 +221,25 @@ def annotation_process(input_args, peak_file, log=None): "distance"][0]], genomic_location, internals_location, Dhit) if Dhit_smaller and not same_gene: - #log.debug("Distance of hit SMALLER than Config Distance -> Hit written to All_Hits.") + All_hits_tab[j][peak['id']] = ovls.write_hit_to_All(All_hits_tab, peak['id'], attrib_k, Dhit, hit, peak['name'], peak['chr'], peak['start'], peak['end'], str(peak['center']), min_pos, genomic_location, ovl_pk, ovl_feat, j) # Hit not fitting in any of the distance values if not Dhit_smaller and queries[j]["internals"] != ["True"] and (All_hits_tab[j][peak['id']] == ''): - #log.debug("\nDhit= {} is LARGER than config Distance:{}.".format(Dhit, queries[j]["distance"])) - # Write over empty or NA the new NA, for new hit,so - # each hit is parsed. + + # Write over empty or NA the new NA, for new hit,so each hit is parsed. All_hits_tab[j][peak['id']] = "\t".join(np.hstack([peak['name'], peak['chr'], peak[ 'start'], str(peak['center']), peak['end'], NAsList_q, str(j) + "\n"])) # Hits with further Distance -> Check for Internals if not Dhit_smaller and queries[j]["internals"] == ["True"] and not same_gene: if "Inside" in genomic_location: - #log.debug("\n-> Hit is 'Internal' with distance LARGER than config.distance.It will be recorded to the All_hits table.") All_hits_tab[j][peak['id']] = ovls.write_hit_to_All(All_hits_tab, peak['id'], attrib_k, Dhit, hit, peak['name'], peak['chr'], peak['start'], peak['end'], str(peak['center']), min_pos, genomic_location, ovl_pk, ovl_feat, j) if not Dhit_smaller and All_hits_tab[j][peak['id']] != '': - #log.debug("Hit has larger distance than allowed ") + #Hit has larger distance than allowed continue # > Add to Best Hits the Best Internal features(when internals=True, D>config."distance"), @@ -266,7 +263,7 @@ def annotation_process(input_args, peak_file, log=None): if internal[mv_pos] in ["PeakInsideFeature", "FeatureInsidePeak"] and min_val != []: Best_hits_tab[j][peak['id']] = hit_line[ mv_pos] + "\n" - #logger.debug("\nBest Internal hit to be recorded: {}\n".format(hit_line[mv_pos]+"\n")) + else: # if All_hits_tab[j][peak['id']] == "" : continue @@ -275,15 +272,15 @@ def annotation_process(input_args, peak_file, log=None): # Get them filled with NAs for a in range(len(queries)): if All_hits_tab[a][peak['id']] == "": - #log.debug("\nQuery {} didn't validate any hit ! All_hits will be filled in with NAs".format(a)) + #Query didn't validate any hit.All_hits will be filled in with NAs All_hits_tab[a][peak['id']] = "\t".join(np.hstack([peak['name'], peak['chr'], peak[ 'start'], str(peak['center']), peak['end'], NAsList_q, str(a) + "\n"])) if Best_hits_tab[a][peak['id']] == "": - #log.debug("\nQuery {} didn't validate any hit !Best hits will be filled in with NAs".format(a)) + #Query didn't validate any hit !Best hits will be filled in with NAs Best_hits_tab[a][peak['id']] = "\t".join(np.hstack([peak['name'], peak['chr'], peak[ 'start'], str(peak['center']), peak['end'], NAsList_q, str(a) + "\n"])) - # end.for_hit + #--- end.for_hit --- if not has_hits or True not in has_hits: # Write the Non Overlaps in the first dict,so it is saved only once. # log.debug ("\nPeak has no hits ! All queries Tables will be @@ -323,10 +320,10 @@ def annotation_process(input_args, peak_file, log=None): # Queries with NA: map(lambda qna : # All_hits_tab[qna][peak['id']], Q_NA) repl_q = min(QnotNA) # The query to be used instead of qO. - ##log.debug ("Priority Query hasn't given any hit, so it will be replaced by secondary Query's Hit: {} ".format (repl_q)) + ## When Priority_Query hasn't given any hit,it will be replaced by secondary Query's Hit :repl_q All_hits_tab[0][peak['id']] = Best_hits_tab[ 0][peak['id']] = "" - ##log.debug ("\nHits from Secondary Q to replace priority query: \n{} ".format(All_hits_tab[repl_q][peak['id']] )) + ## Hits from Secondary Q to replace priority query: All_hits_tab[repl_q][peak['id']] rest_QnotNA = [qn for qn in QnotNA if qn != repl_q] if any(rest_QnotNA): for qr in rest_QnotNA: @@ -337,11 +334,11 @@ def annotation_process(input_args, peak_file, log=None): qna][peak['id']] = "" if all(isNA): - #logg.debug("\nNo query has any Hit.No replacement of Priority Query possible-> NAs will be filled in the Output.") + # No query with Hit.No replacement of Priority Query possible. Output with NAs. TabInList_p = map(lambda l: All_hits_tab[l][ peak['id']], range(len(queries))) - ##log.debug("Hit lines for all queries are : {}".format(TabInList_p )) - # If all_hits_tab doesn't have all queries will have "" + # Hit lines for all queries are :TabInList_p + # If 'all_hits_tab' doesn't have any hit, all queries will have "". TabInList_p = [Tab for Tab in TabInList_p if Tab != ""] for j in enumerate(TabInList_p): @@ -353,7 +350,6 @@ def annotation_process(input_args, peak_file, log=None): peak['id']] = ovls.merge_queries(TabInList_p) Best_hits_tab[0][ peak['id']] = ovls.merge_queries(TabInList_p) - ##log.debug("\nQueries hits for this peak to be merged in the Q.col:{}".format(ovls.merge_queries(TabInList_p)) ) for r in rest_q: All_hits_tab[r][peak['id']] = Best_hits_tab[ r][peak['id']] = "" From 11f08031ff60efd49a4d4ab268e595890d7c3157 Mon Sep 17 00:00:00 2001 From: mkondili Date: Sun, 29 Jan 2017 19:39:25 +0100 Subject: [PATCH 07/27] not mention bigwig as optional key-not available --- uropa/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uropa/config.py b/uropa/config.py index f44f295..f1cb082 100644 --- a/uropa/config.py +++ b/uropa/config.py @@ -34,7 +34,7 @@ def howtoconfig(): "gtf": "/path/to/annotation/file.gtf" } - Optionally, the priority and bigwig keys can be used to fine tune UROPAs behaviour: + Optionally, the priority key can be used to fine tune UROPAs behaviour: { "queries": [ From 47db5cb67937e321cc8607d405a1bce3839df5fa Mon Sep 17 00:00:00 2001 From: mkondili Date: Sun, 29 Jan 2017 19:42:18 +0100 Subject: [PATCH 08/27] Add '-p gff' in run_tabix bash command was giving error : "-p not given" and file format unknown --- uropa/overlaps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uropa/overlaps.py b/uropa/overlaps.py index 3543b68..9dfd3bb 100644 --- a/uropa/overlaps.py +++ b/uropa/overlaps.py @@ -51,7 +51,7 @@ def tabix_index(annot_gtf): os.system('bgzip -c -f ' + annot_gtf + '.sorted ' + ' > ' + out_zipped) - run_tabix = 'tabix -f ' + out_zipped + run_tabix = 'tabix -f -p gff ' + out_zipped idx = subprocess.Popen(shlex.split(run_tabix), stdout=subprocess.PIPE) idx.wait() From 2bc87c42bfb499e21886198d73d0b134bd365243 Mon Sep 17 00:00:00 2001 From: mkondili Date: Sun, 29 Jan 2017 19:44:07 +0100 Subject: [PATCH 09/27] fix typo error --- uropa/overlaps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uropa/overlaps.py b/uropa/overlaps.py index 9dfd3bb..d95f29c 100644 --- a/uropa/overlaps.py +++ b/uropa/overlaps.py @@ -82,7 +82,7 @@ def valid_strand(hit_str, p_str, q_str): def valid_attribute(attr_filter_key, attr_filter_val, hit): - """Validates the hit accoring to a filter attribute.""" + """Validates the hit according to a filter attribute.""" if (attr_filter_key != "None") and (attr_filter_val != "None"): try: # If key for filtering is not correct or doesn't exist-> error From 548068fd4b34245c52772dc512b26647874f997a Mon Sep 17 00:00:00 2001 From: mkondili Date: Sun, 29 Jan 2017 19:50:15 +0100 Subject: [PATCH 10/27] added my small fixes post-Review --- CHANGES | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES b/CHANGES index 160d151..55cf490 100644 --- a/CHANGES +++ b/CHANGES @@ -1,5 +1,7 @@ ## 1.0.1 (2017-01-27) +- Edit pysam.TabixFile (Error :attribute doesn't exist) -> pysam.Tabixfile +- add flag "-p gff" in run_tabix call (overlaps.py) - updated summary script for new gridExtra requirements (rows=NULL instead of show.rownames=FALSE) - fixed log file bug - fixed internals feature to work for distances larger than default distance From c7b3e7abd7559dd9b09c0424dd42045e30ffc29c Mon Sep 17 00:00:00 2001 From: mkondili Date: Mon, 30 Jan 2017 13:05:42 +0100 Subject: [PATCH 11/27] correct Anaconda version to 4.2.0 --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 513ba5a..37ee7be 100644 --- a/README.md +++ b/README.md @@ -37,17 +37,18 @@ Make sure all prerequisites are met: - [R/Rscript](http://www.r-project.org/) (v3.3.0 or higher, follow instructions on url) - [Python](https://repo.continuum.io/archive/) (v2.7.8-anaconda-2.1.0) - * Choose Anaconda-2.1.0-Linux-x86 or -x86_64, + * Choose Anaconda2-4.2.0-Linux-x86 (for 32bit) or -x86_64 (for 64bit), * Install with ```bash Anaconda2-4.2.0-Linux-x86_64.sh```, * Follow instructions and type 'yes' to prepend Anaconda to PATH in your .bashrc file, - * or type after installation: ``` $export PATH=/home//anaconda/bin:$PATH ``` + * or type after installation: ``` $export PATH=/home//anaconda2/bin:$PATH ``` -- [htslib](https://github.com/samtools/htslib/blob/develop/INSTALL) (1.3.2 or higher, follow instructions on url) +- [htslib](http://www.htslib.org/download/) (1.3.2 or higher) + Follow instructions : * ```cd htslib-1.3.2/``` * ```./configure``` * ```make``` * ```make install ``` or ```sudo make prefix=/home//htslib install``` - + Further information given in [url](https://github.com/samtools/htslib/blob/develop/INSTALL) Install required packages for R: ```bash From bae47f5ab392d451e39068ce1f41c865f0dbf9e4 Mon Sep 17 00:00:00 2001 From: mkondili Date: Mon, 30 Jan 2017 14:12:55 +0100 Subject: [PATCH 12/27] add sudo to make install @htslib --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 37ee7be..805d692 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,8 @@ Make sure all prerequisites are met: * ```cd htslib-1.3.2/``` * ```./configure``` * ```make``` - * ```make install ``` or ```sudo make prefix=/home//htslib install``` + * ```sudo make install ``` (will be installed to /usr/local/bin/) + * or ```sudo make prefix=/home//htslib install``` Further information given in [url](https://github.com/samtools/htslib/blob/develop/INSTALL) Install required packages for R: From 535c3511064a7426b7d01a2cca192cf04f9aa7a2 Mon Sep 17 00:00:00 2001 From: mkondili Date: Wed, 1 Feb 2017 13:15:10 +0100 Subject: [PATCH 13/27] change output filenames-depnd on config name --- docs_rst/output.rst | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/docs_rst/output.rst b/docs_rst/output.rst index 5d78fef..601a2ac 100644 --- a/docs_rst/output.rst +++ b/docs_rst/output.rst @@ -1,26 +1,26 @@ Output tables ============= -UROPA provides multiple output files, each providing valuable information in either a more extended or a more condense way in order to cover the needs of a user. +UROPA provides multiple output files, each providing valuable information in either a more extended or a more condense way in order to cover the needs of a user. Details are given below: File overview ------------- -- **Uropa_AllHits**: Basic output table, reports for each peak all valid annotations and additionally NA rows for peaks without valid annotation. +- **Allhits.txt**: Basic output table, reports for each peak all valid annotations and additionally NA rows for peaks without valid annotation. -- **Uropa_FinalHits**: Filtered output table, it reports the best (closest) feature according to the config criteria for each peak. If multiple queries are given, it reports the best annotation taking multiple queries into account. +- **Finalhits.txt**: Filtered output table, it reports the best (closest) feature according to the config criteria for each peak. If multiple queries are given, it reports the best annotation taking multiple queries into account. -- **Uropa_BestperQuery_Hits**: This table is only produced if more than one query is given. It reports the best annotation per query for each peak. +- **Besthits.txt**: This table is only produced if more than one query is given. It reports the best annotation per query for each peak. -- **Uropa_Reformatted_HitsperPeak**: Another format of the BestperQuery Hits produced by using the ``-r`` parameter. A compact table with best per query annotation for each peak in one row is presented. +- **Reformatted_HitsperPeak.txt**: Another format of the BestperQuery Hits produced by using the ``-r`` parameter. A compact table with best per query annotation for each peak in one row is presented. -- **Uropa_Summary**: Statistical summary of the UROPA annotation by using the ``-s`` parameter. +- **Uropa_Summary.pdf**: Statistical summary of the UROPA annotation by using the ``-s`` parameter. .. note:: - The output files will be named additionally by the output directory name. - For example if uropa is used like this: ``uropa.py -i config.json -o ChIPanno`` + The output files will be named additionally by the basename of the configuration file used to run UROPA. + For example if uropa is used like this: ``uropa.py -i config_gene1.json `` - The result would contain: ChIPannot/Uropa_AllHits_ChIPannot.txt + The result would contain: ChIPannot/config_gene1_AllHits.txt , ChIPannot/config_gene1_FinalHits.txt Output columns explanation -------------------------- @@ -50,6 +50,10 @@ The four output tables mentioned above contain informative columns about the per - **query**: Valid query ID for the current annotation (0-based). +.. note :: + -The columns terminology can be included in the header of theoutput files if '--add-comments' option is used when executing UROPA. + + Output files (one query) ------------------------ UROPA annotation with one query results in two output tables. Those are the Uropa_AllHits and Uropa_FinalHits. From dadbc79e26e657e53e047dd147d2e0aff25b043a Mon Sep 17 00:00:00 2001 From: mkondili Date: Wed, 1 Feb 2017 13:28:55 +0100 Subject: [PATCH 14/27] reformatted -> compact.txt --- docs_rst/output.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs_rst/output.rst b/docs_rst/output.rst index 601a2ac..2ec27db 100644 --- a/docs_rst/output.rst +++ b/docs_rst/output.rst @@ -12,7 +12,7 @@ File overview - **Besthits.txt**: This table is only produced if more than one query is given. It reports the best annotation per query for each peak. -- **Reformatted_HitsperPeak.txt**: Another format of the BestperQuery Hits produced by using the ``-r`` parameter. A compact table with best per query annotation for each peak in one row is presented. +- **Besthits_compact.txt**: Another format of the BestperQuery Hits produced by using the ``-r`` parameteR. A compact reformatted table with best per query annotation for each peak in one row is presented. - **Uropa_Summary.pdf**: Statistical summary of the UROPA annotation by using the ``-s`` parameter. From fa0ec14f109e292d0289098b9f05b82333e42d2c Mon Sep 17 00:00:00 2001 From: mkondili Date: Tue, 7 Feb 2017 10:55:24 +0100 Subject: [PATCH 15/27] take in annikas changes(3/02) --- docs_rst/output.rst | 88 ++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 46 deletions(-) diff --git a/docs_rst/output.rst b/docs_rst/output.rst index 2ec27db..1753e3b 100644 --- a/docs_rst/output.rst +++ b/docs_rst/output.rst @@ -1,27 +1,28 @@ -Output tables -============= -UROPA provides multiple output files, each providing valuable information in either a more extended or a more condense way in order to cover the needs of a user. +Output files +============ +UROPA provides multiple output files, each providing valuable information in either a more extended or a more condense way in order to cover the needs of a user. Details are given below: File overview ------------- -- **Allhits.txt**: Basic output table, reports for each peak all valid annotations and additionally NA rows for peaks without valid annotation. +- **allhits.txt**: Basic output table, reports for each peak all valid annotations and additionally NA rows for peaks without valid annotation. -- **Finalhits.txt**: Filtered output table, it reports the best (closest) feature according to the config criteria for each peak. If multiple queries are given, it reports the best annotation taking multiple queries into account. +- **finalhits.txt**: Filtered output table, it reports the best (closest) feature according to the config criteria for each peak. If multiple queries are given, it reports the best annotation taking multiple queries into account. -- **Besthits.txt**: This table is only produced if more than one query is given. It reports the best annotation per query for each peak. +- **besthits.txt**: This table is only produced if more than one query is given. It reports the best annotation per query for each peak. -- **Besthits_compact.txt**: Another format of the BestperQuery Hits produced by using the ``-r`` parameteR. A compact reformatted table with best per query annotation for each peak in one row is presented. +- **besthits_compact.txt**: Another format of the besthits produced by using the ``-r`` parameter. A compact table with best per query annotation for each peak in one row is presented. -- **Uropa_Summary.pdf**: Statistical summary of the UROPA annotation by using the ``-s`` parameter. +- **summary.pdf**: Statistical summary of the UROPA annotation generated by using the ``-s`` parameter. .. note:: - The output files will be named additionally by the basename of the configuration file used to run UROPA. - For example if uropa is used like this: ``uropa.py -i config_gene1.json `` + If no prefix is specified, the result files will be stored in call dir with the basename of the config file. + E.g. ``uropa.py -i histonMarkPeaks.json`` The allhits file would be called histonMarkPeaks_allhits.txt + With specified prefix, the result files will be stored in the call dir with the prefix as part of the names. + E.g. ``uropa.py -i histonMarkPeaks.json -p abc`` the allhits file would be called abc_allhits.txt, + or with ``-p xy/abc`` all results will be stored in folder xy, which will be created if it does not exist already. - The result would contain: ChIPannot/config_gene1_AllHits.txt , ChIPannot/config_gene1_FinalHits.txt - Output columns explanation -------------------------- @@ -50,23 +51,19 @@ The four output tables mentioned above contain informative columns about the per - **query**: Valid query ID for the current annotation (0-based). -.. note :: - -The columns terminology can be included in the header of theoutput files if '--add-comments' option is used when executing UROPA. - - Output files (one query) ------------------------ -UROPA annotation with one query results in two output tables. Those are the Uropa_AllHits and Uropa_FinalHits. -For a configuration as given below, the AllHits is given in Table 1, and the FinalHits is displayed in Table 2. +UROPA annotation with one query results in two output tables. Those are the allhits and finalhits. +For a configuration as given below, the allhits is given in Table 1, and the finalhits is displayed in Table 2. Peak and annotation files are further described in the :doc:`/uropa-example` section. The UROPA annotation process for one query can run into three cases for each peak: -- **Case 1**: No query gives any feature for annotating the peak, this leads to no valid annotation at all -> NA row in AllHits and FinalHits. +- **Case 1**: No query gives any feature for annotating the peak, this leads to no valid annotation at all -> NA row in allhits and finalhits. -- **Case 2**: There is one valid annotation for the specified query -> annotation will be given in AllHits and FinalHits. +- **Case 2**: There is one valid annotation for the specified query -> annotation will be given in allhits and finalhits. -- **Case 3**: There are multiple valid annotations for the specified query -> all valid annotations will be given in the AllHits, the best annotation (smallest distance) will be presented in the FinalHits. +- **Case 3**: There are multiple valid annotations for the specified query -> all valid annotations will be given in the allhits, the best annotation (smallest distance) will be presented in the finalhits. .. code:: json @@ -106,7 +103,7 @@ The UROPA annotation process for one query can run into three cases for each pea +----------+----------+------------+-------------+------------+-------------+---------+------------+-------------+-------------+-------------+----------+-------------------+---------------+---------------+-----------+----------------+-------+ -**Table 5.1:** Excerpt of table AllHits for one query as described in the configuration above. +**Table 5.1:** Excerpt of table allhits for one query as described in the configuration above. +----------+----------+------------+-------------+------------+-------------+---------+------------+-------------+-------------+-------------+----------+-------------------+---------------+---------------+-----------+----------------+-------+ | peak_id | peak_chr | peak_start | peak_center | peak_end | peak_strand | feature | feat_start | feature_end | feat_strand | feat_anchor | distance | genomic_location | feat_ovl_peak | peak_ovl_feat | gene_name | gene_type | query | @@ -129,22 +126,22 @@ The UROPA annotation process for one query can run into three cases for each pea +----------+----------+------------+-------------+------------+-------------+---------+------------+-------------+-------------+-------------+----------+-------------------+---------------+---------------+-----------+----------------+-------+ -**Table 5.2:** Excerpt of table FinalHits for one query as described in the configuration above. +**Table 5.2:** Excerpt of table finalhits for one query as described in the configuration above. As displayed in Table 1 and Table 2, peak 355 is a representative of Case 1. There is no valid annotation at all, thus there is an NA row in both output tables. -The peaks 356 and 765 belong to Case 2, there is one valid annotation for them, their annotation is displayed in the same way in AllHits and FinalHits. +The peaks 356 and 765 belong to Case 2, there is one valid annotation for them, their annotation is displayed in the same way in allhits and finalhits. -Peak 769 has three valid annotations for the specified query (Case 3). All of them are displayed in the AllHits output. -In the FinalHits only the best annotation, the one for gene NOP16 with the minimal distance of 937, is represented. +Peak 769 has three valid annotations for the specified query (Case 3). All of them are displayed in the allhits output. +In the finalhits only the best annotation, the one for gene NOP16 with the minimal distance of 937, is represented. Output files (multiple queries) -------------------------------- UROPA annotation with multiple queries and default priority results in at least three output tables. -Those are the Uropa_AllHits, Uropa_FinalHits, and Uropa_BestperQuery_Hits. -If the ``-r`` parameter is added in the command line call, there will the additional output Uropa_Reformatted_HitsperPeak file. -Furthermore, if the ``-s`` parameter is also added, the Uropa_Summary file is generated. +Those are the allhits, finalhits, and besthits. +If the ``-r`` parameter is added in the command line call, there will the additional output compact file. +Furthermore, if the ``-s`` parameter is also added, the summary file is generated. With a configuration as given below, the generated output files are generated as presented in Tables 3 to 6 and Figure 1. Peak and annotation files are further described in the :doc:`/uropa-example` section. @@ -153,8 +150,7 @@ The UROPA annotation process for multiple queries allows an additional case in r - **Case 1 to 3** as described for one query -- **Case 4**: There are valid annotations for one peak for multiple queries -> all valid annotations will be given in the AllHits, the best annotation (smallest distance across all queries) will be presented in the FinalHits. -Additionally, the best annotation per query will be displayed in the BestperQuery_Hits output. +- **Case 4**: There are valid annotations for one peak for multiple queries -> all valid annotations will be given in the allhits, the best annotation (smallest distance across all queries) will be presented in the finalhits. Additionally, the best annotation per query will be displayed in the besthits output. .. code:: json @@ -214,7 +210,7 @@ Additionally, the best annotation per query will be displayed in the BestperQuer | … | | | | | | | | | | | | | | | | | | +----------+----------+------------+-------------+------------+-------------+---------+------------+-------------+-------------+-------------+----------+-------------------+---------------+---------------+---------------+----------------+-------+ -**Table 5.3:** Excerpt of table AllHits for three queries as described in the configuration above. +**Table 5.3:** Excerpt of table allhits for three queries as described in the configuration above. +----------+----------+------------+-------------+------------+-------------+---------+------------+-------------+-------------+-------------+----------+-------------------+---------------+---------------+---------------+----------------+-------+ | peak_id | peak_chr | peak_start | peak_center | peak_end | peak_strand | feature | feat_start | feature_end | feat_strand | feat_anchor | distance | genomic_location | feat_ovl_peak | peak_ovl_feat | gene_name | gene_type | query | @@ -236,7 +232,7 @@ Additionally, the best annotation per query will be displayed in the BestperQuer | … | | | | | | | | | | | | | | | | | | +----------+----------+------------+-------------+------------+-------------+---------+------------+-------------+-------------+-------------+----------+-------------------+---------------+---------------+---------------+----------------+-------+ -**Table 5.4:** Excerpt of table FinalHits for three queries as described in the configuration above. +**Table 5.4:** Excerpt of table finalhits for three queries as described in the configuration above. +----------+----------+------------+-------------+------------+-------------+---------+------------+-------------+-------------+-------------+----------+-------------------+---------------+---------------+---------------+----------------+-------+ | peak_id | peak_chr | peak_start | peak_center | peak_end | peak_strand | feature | feat_start | feature_end | feat_strand | feat_anchor | distance | genomic_location | feat_ovl_peak | peak_ovl_feat | gene_name | gene_type | query | @@ -274,23 +270,23 @@ Additionally, the best annotation per query will be displayed in the BestperQuer | … | | | | | | | | | | | | | | | | | | +----------+----------+------------+-------------+------------+-------------+---------+------------+-------------+-------------+-------------+----------+-------------------+---------------+---------------+---------------+----------------+-------+ -**Table 5.5:** Excerpt of table BestperQuery_Hits for three queries as described in the configuration above. +**Table 5.5:** Excerpt of table besthits for three queries as described in the configuration above. .. note:: - The BestperQuery_Hits is only generated if multiple queries are specified and the priority flag is set to FALSE! If this flag is TRUE, there will be only one valid query. There can be multiple valid annotations for one peak, but all based on one query. In this case only the AllHits and FinalHits are produced. + The besthits is only generated if multiple queries are specified and the priority flag is set to FALSE! If this flag is TRUE, there will be only one valid query. There can be multiple valid annotations for one peak, but all based on one query. In this case only the allhits and finalhits are produced. Same as in the first example with one query, peak_355 has no valid annotation and is represented as NA row (correspond to Case 1). -In the AllHits (Table 3) and BestperQuery_Hits (Table 5) there will be one NA row for each query. In the FinalHits (Table 4) there will be only one NA row for all queries. +In the allhits (Table 3) and besthits (Table 5) there will be one NA row for each query. In the finalhits (Table 4) there will be only one NA row for all queries. -The peak_356 has only for one query a valid annotation, as given in AllHits, FinalHits, and BestperQuery_Hits (Case 2). In AllHits and BestperQuery_Hits there are additional NA rows for this peak for the other queries without a hit. +The peak_356 has only for one query a valid annotation, as given in allhits, finalhits, and besthits (Case 2). In allhits and besthits there are additional NA rows for this peak for the other queries without a hit. -For peak_765 there are valid annotations for all given queries as displayed in the AllHits, (Case 4). The best of these is the annotation for the lincRNA, therefore this annotation is displayed in the FinalHits. -Because there is only one valid annotation for each query, they will be displayed in the same way in the BestperQuery_Hits. +For peak_765 there are valid annotations for all given queries as displayed in the allhits, (Case 4). The best of these is the annotation for the lincRNA, therefore this annotation is displayed in the finalhits. +Because there is only one valid annotation for each query, they will be displayed in the same way in the besthits. -This is different for peak_769. As described above this peaks equates to Case 3. With multiple queries, there will be additional NA rows for the invalid queries in the AllHits and BestperQuery_Hits. +This is different for peak_769. As described above this peaks equates to Case 3. With multiple queries, there will be additional NA rows for the invalid queries in the allhits and besthits. -With multiple queries UROPA provides an option to reformat the BestperQuery_Hits table in order to condense the best per query annotations for each peak in one row. -A reformatted example for the BestperQuery_Hits of Table 5 is presented in Tables 6. +With multiple queries UROPA provides an option to reformat the besthits table in order to condense the best per query annotations for each peak in one row. +A reformatted example for the besthits of Table 5 is presented in Tables 6. The Reformatted_HitsperPeak represents all information for each peak in one row. Within this format the information for query 0 is always given at the first position, for query 1 at second positon etc. To receive this output format, the parameter ``-r`` has to be added to the command line call. @@ -332,7 +328,7 @@ Detailed information about the different plots and how they are dedicated to the - Specified queries, and value of the priority flag (Figure 1A) - Number of peaks per query -**Graphs based on the 'FinalHits' output:** +**Graphs based on the 'finalhits' output:** - Density plot displaying the distance per feature across all queries (Figure 1B) - Pie chart illustrating the genomic locations of the peaks per annotated feature (Figure 1C) @@ -340,7 +336,7 @@ Detailed information about the different plots and how they are dedicated to the *Figure 1A-C would be the summary for the first example UROPA run given above with only one query* -**Graphs based on the 'BestperQuery_Hits' output:** +**Graphs based on the 'besthits' output:** - Distribution of the distances per feature per query is displayed in a histogram (Figure 1D) - Pie chart illustrating the genomic locations of the peaks per annotated feature (not illustrated) @@ -350,5 +346,5 @@ Detailed information about the different plots and how they are dedicated to the .. figure:: img/output-formats-summary.png Figure 1: Summary file example for queries as described above: (A) Summary of specified queries, used annotation and peak files, and how many peaks were present and annotated, - (B) Distance density for all features based on FinalHits, (C) Pie Chart representing genomic location for each feature across FinalHits, - (D) Distance per query per feature across BestperQuery_Hits, (E) Pairwise comparison across all queries displayed in Venn diagrams, (F) Chow Ruskey plot to compare all queries. + (B) Distance density for all features based on finalhits, (C) Pie Chart representing genomic location for each feature across finalhits, + (D) Distance per query per feature across besthits, (E) Pairwise comparison across all queries displayed in Venn diagrams, (F) Chow Ruskey plot to compare all queries. From 4d6bbe77002fc8aef428d77d9f9d418617fad0ea Mon Sep 17 00:00:00 2001 From: mkondili Date: Tue, 7 Feb 2017 10:57:31 +0100 Subject: [PATCH 16/27] add "why summary empty" --- docs_rst/help.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs_rst/help.rst b/docs_rst/help.rst index 5f7deaf..ee5c91d 100644 --- a/docs_rst/help.rst +++ b/docs_rst/help.rst @@ -16,5 +16,6 @@ They will be addressed as soon as possible. * **How to annotate with default values?** In order to activate default values, the key itself shouldn't be present in the config file. If you want to annotate with default values only, do not remove the queries key, but leave it empty like "queries":[] - +* **Why is the summary empty?** + It is a problem about gridExtra and ggplot2, the summary starts from page 2 From 88d64d8932d7725b13afecb21cc8a44a41ed9cf3 Mon Sep 17 00:00:00 2001 From: mkondili Date: Tue, 7 Feb 2017 10:59:20 +0100 Subject: [PATCH 17/27] update accrd to output names --- docs_rst/index.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs_rst/index.rst b/docs_rst/index.rst index 945898f..564a7d6 100644 --- a/docs_rst/index.rst +++ b/docs_rst/index.rst @@ -17,7 +17,7 @@ annotation. - Utilization of any available GTF files as annotation reference - Multiple queries can be processed in a single run - Graduated annotation due to prioritization -- Multiple output tables (AllHits, FinalHits, BestperQuery\_Hits) +- Multiple output tables (allhits, finalhits, besthits) - Visual summary for annotation evaluation - Preparation of custom annotation files with the UROPA to GTF utility @@ -55,4 +55,4 @@ The project is licensed under the MIT license (see :doc:`/license`) custom license help - \ No newline at end of file + From adf7104e2e8ea211260c410f1c8962cd1f2fe6b0 Mon Sep 17 00:00:00 2001 From: mkondili Date: Tue, 7 Feb 2017 11:52:06 +0100 Subject: [PATCH 18/27] update with conda-pysam/no htslib anymore --- docs_rst/install.rst | 61 ++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/docs_rst/install.rst b/docs_rst/install.rst index 525b218..d264ed2 100644 --- a/docs_rst/install.rst +++ b/docs_rst/install.rst @@ -5,43 +5,31 @@ Prerequisites ----------------- For running UROPA locally, the following prerequisites have to be met: -- `Python`_, v2.7 (packages: numpy, sampy) -- `R/Rscript`_, v3.3.0 or higher (packages: devtools, ggplot2, gplots, gridExtra, jsonlite, RBGL, graph, VennDiagram, Vennerable, snow) -- `htslib`_ 1.3.2 or higher - -Instructions ------------------ - -Python -~~~~~~~~~~ -As numpy and sampy packages can be taxing to install in Python, we recommend the usage of the `Anaconda`_ distribution instead (v2.7.8-anaconda-2.1.0 or higher). Install with eg. ``bash Anaconda2-4.2.0-Linux-x86_64.sh`` and include the path in the environment variable ``export PATH=dir/to/python_anaconda:$PATH``. - -Using standard Python 2.7, the packages can be installed with ``pip install pysam numpy``. - -R -~~~~~ -The following packages are hosted by CRAN and can be installed from the R console with the syntax ``install.packages("packagename")``. - -- `devtools`_ -- `ggplot2`_ -- `gplots`_ -- `gridExtra`_ -- `jsonlite`_ -- `VennDiagram`_ -- `snow`_ - -The RBGL and graph packages are hosted by BioConductor. To install those start R and type ``source("https://bioconductor.org/biocLite.R")`` and ``biocLite(c("RBGL","graph"))``. - -Vennerable has to be installed with ``library("devtools")`` followed by ``install_github("jenzopr/Vennerable")``. - -HTSlib -~~~~~ -The HTSlib library is necessary for the indexing of the reference features using Tabix. For installation instructions please refer to the following url: `htslib`_. - +- `Python`_, v2.7 + - download Anaconda for Linux version Python 2.7 to direction where python should be installed + - run ``bash Anaconda2-4.3.0-Linux-x86_64.sh`` + - Answer the question "Do you wish the installer to prepend the Anaconda2 install location to PATH in your /home/.../.bashrc ?" with yes OR do ``PATH=dir/to/python_anaconda:$PATH`` after the installation process has finished + - run ``conda install -c bioconda pysam`` + - if you are NOT using the anaconda version of python 2, the packages `pysam`_ and `numpy`_ can be installed with ``pip install pysam numpy`` +- `R/Rscript`_, v3.3.0 or higher (follow the instructions on url) + Install packages: + + - ``install.packages(c("`ggplot2`_", "`devtools`_", "`gplots`_", "`gridExtra`_", "`jsonlite`_", "`VennDiagram`_"))`` + + ## choose mirror + + - ``source("https://bioconductor.org/biocLite.R")`` + + - ``biocLite(c("RBGL","graph"))`` + + - ``library(devtools)`` + + - ``install_github("jenzopr/Vennerable")`` + +- `git`_ with ``bash sudo apt-get install git`` UROPA -~~~~~ - +----- UROPA itself can be installed by simply cloning the Github library and adding the target folder to the system environment variable. .. code:: bash @@ -54,12 +42,13 @@ UROPA itself can be installed by simply cloning the Github library and adding th .. _R/Rscript: http://www.r-project.org/ .. _Python: http://continuum.io/downloads .. _Anaconda: http://continuum.io/downloads -.. _htslib: http://www.htslib.org/download/ +.. _git: https://git-scm.com/ .. _numpy: http://www.numpy.org .. _pysam: https://pysam.readthedocs.io/en/latest/index.html .. _ggplot2: https://cran.r-project.org/web/packages/ggplot2/index.html .. _gplots: https://cran.r-project.org/web/packages/gplots/index.html .. _gridExtra: https://cran.r-project.org/web/packages/gridExtra/index.html +.. _gridExtra: https://cran.r-project.org/web/packages/gridExtra/index.html .. _jsonlite: https://cran.r-project.org/web/packages/jsonlite/index.html .. _VennDiagram: https://cran.r-project.org/web/packages/VennDiagram/index.html .. _snow: https://cran.r-project.org/web/packages/snow/index.html From 81107e2975d9a2fb1e3b2894043f590aa24c3e15 Mon Sep 17 00:00:00 2001 From: mkondili Date: Tue, 7 Feb 2017 11:53:42 +0100 Subject: [PATCH 19/27] load R --- docs_rst/install.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs_rst/install.rst b/docs_rst/install.rst index d264ed2..caf4368 100644 --- a/docs_rst/install.rst +++ b/docs_rst/install.rst @@ -12,7 +12,8 @@ For running UROPA locally, the following prerequisites have to be met: - run ``conda install -c bioconda pysam`` - if you are NOT using the anaconda version of python 2, the packages `pysam`_ and `numpy`_ can be installed with ``pip install pysam numpy`` - `R/Rscript`_, v3.3.0 or higher (follow the instructions on url) - Install packages: + + Load R and install packages: - ``install.packages(c("`ggplot2`_", "`devtools`_", "`gplots`_", "`gridExtra`_", "`jsonlite`_", "`VennDiagram`_"))`` From 0f2647d4f2e073878921b8666957cb384dcc2034 Mon Sep 17 00:00:00 2001 From: mkondili Date: Tue, 7 Feb 2017 11:54:18 +0100 Subject: [PATCH 20/27] indent fix --- docs_rst/install.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs_rst/install.rst b/docs_rst/install.rst index caf4368..0defee4 100644 --- a/docs_rst/install.rst +++ b/docs_rst/install.rst @@ -13,7 +13,7 @@ For running UROPA locally, the following prerequisites have to be met: - if you are NOT using the anaconda version of python 2, the packages `pysam`_ and `numpy`_ can be installed with ``pip install pysam numpy`` - `R/Rscript`_, v3.3.0 or higher (follow the instructions on url) - Load R and install packages: +Load R and install packages: - ``install.packages(c("`ggplot2`_", "`devtools`_", "`gplots`_", "`gridExtra`_", "`jsonlite`_", "`VennDiagram`_"))`` From a2647cb6c5ef8f727953443f327068bfb7639f25 Mon Sep 17 00:00:00 2001 From: mkondili Date: Tue, 7 Feb 2017 11:57:49 +0100 Subject: [PATCH 21/27] how to call uropa.py by "alias" --- docs_rst/install.rst | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/docs_rst/install.rst b/docs_rst/install.rst index 0defee4..fc3d938 100644 --- a/docs_rst/install.rst +++ b/docs_rst/install.rst @@ -15,17 +15,18 @@ For running UROPA locally, the following prerequisites have to be met: Load R and install packages: - - ``install.packages(c("`ggplot2`_", "`devtools`_", "`gplots`_", "`gridExtra`_", "`jsonlite`_", "`VennDiagram`_"))`` +- ``install.packages(c("ggplot2", "devtools", "gplots", "gridExtra", "jsonlite", "VennDiagram"))`` - ## choose mirror +## choose mirror - - ``source("https://bioconductor.org/biocLite.R")`` +- ``source("https://bioconductor.org/biocLite.R")`` - - ``biocLite(c("RBGL","graph"))`` +- ``biocLite(c("RBGL","graph"))`` - - ``library(devtools)`` +- ``library(devtools)`` + +- ``install_github("jenzopr/Vennerable")`` - - ``install_github("jenzopr/Vennerable")`` - `git`_ with ``bash sudo apt-get install git`` @@ -37,7 +38,8 @@ UROPA itself can be installed by simply cloning the Github library and adding th git clone https://github.molgen.mpg.de/loosolab/UROPA export PATH=$PATH:dir/to/uropa - + -> Open ``.bashrc`` file and create alias name to call ``uropa``: + ``alias uropa= /dir/to/UROPA/uropa.py`` .. _R/Rscript: http://www.r-project.org/ From a6eacf13828712d5fc2c31f53eb00e1badd59d5b Mon Sep 17 00:00:00 2001 From: mkondili Date: Tue, 7 Feb 2017 12:02:15 +0100 Subject: [PATCH 22/27] write in bashrc --- docs_rst/install.rst | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/docs_rst/install.rst b/docs_rst/install.rst index fc3d938..9d10266 100644 --- a/docs_rst/install.rst +++ b/docs_rst/install.rst @@ -15,17 +15,17 @@ For running UROPA locally, the following prerequisites have to be met: Load R and install packages: -- ``install.packages(c("ggplot2", "devtools", "gplots", "gridExtra", "jsonlite", "VennDiagram"))`` + - ``install.packages(c("ggplot2", "devtools", "gplots", "gridExtra", "jsonlite", "VennDiagram"))`` ## choose mirror -- ``source("https://bioconductor.org/biocLite.R")`` + - ``source("https://bioconductor.org/biocLite.R")`` -- ``biocLite(c("RBGL","graph"))`` + - ``biocLite(c("RBGL","graph"))`` -- ``library(devtools)`` + - ``library(devtools)`` -- ``install_github("jenzopr/Vennerable")`` + - ``install_github("jenzopr/Vennerable")`` - `git`_ with ``bash sudo apt-get install git`` @@ -37,9 +37,8 @@ UROPA itself can be installed by simply cloning the Github library and adding th .. code:: bash git clone https://github.molgen.mpg.de/loosolab/UROPA - export PATH=$PATH:dir/to/uropa - -> Open ``.bashrc`` file and create alias name to call ``uropa``: - ``alias uropa= /dir/to/UROPA/uropa.py`` + ## Open ``.bashrc`` file and create alias name to call ``uropa`` in each session easily : + ``alias uropa='/dir/to/UROPA/uropa.py' `` .. _R/Rscript: http://www.r-project.org/ From 4992b89c7b6d08c281e45f7a304407fb16720464 Mon Sep 17 00:00:00 2001 From: mkondili Date: Tue, 7 Feb 2017 12:03:26 +0100 Subject: [PATCH 23/27] remove code-quotes --- docs_rst/install.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs_rst/install.rst b/docs_rst/install.rst index 9d10266..3af5096 100644 --- a/docs_rst/install.rst +++ b/docs_rst/install.rst @@ -37,7 +37,7 @@ UROPA itself can be installed by simply cloning the Github library and adding th .. code:: bash git clone https://github.molgen.mpg.de/loosolab/UROPA - ## Open ``.bashrc`` file and create alias name to call ``uropa`` in each session easily : + ## Open .bashrc file and create alias name to call uropa in each session easily : ``alias uropa='/dir/to/UROPA/uropa.py' `` From ad9e28357213e44e5a77a45b3e2749e93a532706 Mon Sep 17 00:00:00 2001 From: mkondili Date: Tue, 7 Feb 2017 12:06:10 +0100 Subject: [PATCH 24/27] change indents --- docs_rst/install.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs_rst/install.rst b/docs_rst/install.rst index 3af5096..2f611a2 100644 --- a/docs_rst/install.rst +++ b/docs_rst/install.rst @@ -15,17 +15,17 @@ For running UROPA locally, the following prerequisites have to be met: Load R and install packages: - - ``install.packages(c("ggplot2", "devtools", "gplots", "gridExtra", "jsonlite", "VennDiagram"))`` +- ``install.packages(c("ggplot2", "devtools", "gplots", "gridExtra", "jsonlite", "VennDiagram"))`` -## choose mirror + Choose CRAN mirror. - - ``source("https://bioconductor.org/biocLite.R")`` +- ``source("https://bioconductor.org/biocLite.R")`` - - ``biocLite(c("RBGL","graph"))`` +- ``biocLite(c("RBGL","graph"))`` - - ``library(devtools)`` +- ``library(devtools)`` - - ``install_github("jenzopr/Vennerable")`` +- ``install_github("jenzopr/Vennerable")`` - `git`_ with ``bash sudo apt-get install git`` @@ -38,7 +38,7 @@ UROPA itself can be installed by simply cloning the Github library and adding th git clone https://github.molgen.mpg.de/loosolab/UROPA ## Open .bashrc file and create alias name to call uropa in each session easily : - ``alias uropa='/dir/to/UROPA/uropa.py' `` + alias uropa='/dir/to/UROPA/uropa.py' .. _R/Rscript: http://www.r-project.org/ From 603726d70c6230089a3220cba3c1681c92fbea80 Mon Sep 17 00:00:00 2001 From: mkondili Date: Tue, 7 Feb 2017 12:07:28 +0100 Subject: [PATCH 25/27] change indents of R packages --- docs_rst/install.rst | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/docs_rst/install.rst b/docs_rst/install.rst index 2f611a2..b4a25d3 100644 --- a/docs_rst/install.rst +++ b/docs_rst/install.rst @@ -15,18 +15,12 @@ For running UROPA locally, the following prerequisites have to be met: Load R and install packages: -- ``install.packages(c("ggplot2", "devtools", "gplots", "gridExtra", "jsonlite", "VennDiagram"))`` - + - ``install.packages(c("ggplot2", "devtools", "gplots", "gridExtra", "jsonlite", "VennDiagram"))`` Choose CRAN mirror. - -- ``source("https://bioconductor.org/biocLite.R")`` - -- ``biocLite(c("RBGL","graph"))`` - -- ``library(devtools)`` - -- ``install_github("jenzopr/Vennerable")`` - + - ``source("https://bioconductor.org/biocLite.R")`` + - ``biocLite(c("RBGL","graph"))`` + - ``library(devtools)`` + - ``install_github("jenzopr/Vennerable")`` - `git`_ with ``bash sudo apt-get install git`` From ed02d96249def0f7861419f4f193aed2de54453c Mon Sep 17 00:00:00 2001 From: mkondili Date: Tue, 7 Feb 2017 12:10:12 +0100 Subject: [PATCH 26/27] change indents-3 --- docs_rst/install.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs_rst/install.rst b/docs_rst/install.rst index b4a25d3..f37a156 100644 --- a/docs_rst/install.rst +++ b/docs_rst/install.rst @@ -16,10 +16,14 @@ For running UROPA locally, the following prerequisites have to be met: Load R and install packages: - ``install.packages(c("ggplot2", "devtools", "gplots", "gridExtra", "jsonlite", "VennDiagram"))`` + Choose CRAN mirror. - ``source("https://bioconductor.org/biocLite.R")`` + - ``biocLite(c("RBGL","graph"))`` + - ``library(devtools)`` + - ``install_github("jenzopr/Vennerable")`` - `git`_ with ``bash sudo apt-get install git`` From 6ea4423fc0f3f6c3ed6771c4c278bf96e35277f2 Mon Sep 17 00:00:00 2001 From: mkondili Date: Tue, 7 Feb 2017 12:10:56 +0100 Subject: [PATCH 27/27] add lines to make dotted list in Python section --- docs_rst/install.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs_rst/install.rst b/docs_rst/install.rst index f37a156..14a3449 100644 --- a/docs_rst/install.rst +++ b/docs_rst/install.rst @@ -7,10 +7,15 @@ For running UROPA locally, the following prerequisites have to be met: - `Python`_, v2.7 - download Anaconda for Linux version Python 2.7 to direction where python should be installed + - run ``bash Anaconda2-4.3.0-Linux-x86_64.sh`` + - Answer the question "Do you wish the installer to prepend the Anaconda2 install location to PATH in your /home/.../.bashrc ?" with yes OR do ``PATH=dir/to/python_anaconda:$PATH`` after the installation process has finished + - run ``conda install -c bioconda pysam`` + - if you are NOT using the anaconda version of python 2, the packages `pysam`_ and `numpy`_ can be installed with ``pip install pysam numpy`` + - `R/Rscript`_, v3.3.0 or higher (follow the instructions on url) Load R and install packages: