diff --git a/uropa.py b/uropa.py old mode 100644 new mode 100755 index 7418d32..9c82d06 --- a/uropa.py +++ b/uropa.py @@ -104,7 +104,7 @@ # Configure logging logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) - + streamHandle = logging.StreamHandler() loggerFormat = logging.Formatter('[%(levelname)s] - %(message)s') streamHandle.setFormatter(loggerFormat) @@ -114,20 +114,21 @@ else: streamHandle.setLevel(logging.WARNING) logger.addHandler(streamHandle) - + if args.log is not None: logpath = os.path.dirname(args.log) if not os.path.exists(logpath): try: os.makedirs(logpath) - fileHandle = logging.FileHandler(args.log, mode='w') - fileHandle.setLevel(logging.DEBUG) - fileHandle.setFormatter(loggerFormat) - logger.addHandler(fileHandle) except OSError: logger.error("Could not create directory for log file {}".format(logpath)) - except IOError: - logger.error("Could not create log file {}".format(args.log)) + try: + fileHandle = logging.FileHandler(args.log, mode='w') + fileHandle.setLevel(logging.DEBUG) + fileHandle.setFormatter(loggerFormat) + logger.addHandler(fileHandle) + except IOError: + logger.error("Could not create log file {}".format(args.log)) logger.info("Start time: %s", datetime.datetime.now().strftime("%d.%m.%Y %H:%M")) @@ -181,7 +182,7 @@ query_features = list(chain.from_iterable([q["feature"] for q in queries])) feat_not_valid = [f for f in query_features if f not in gtf_feat] feat_valid = [f for f in query_features if f not in feat_not_valid] - + if feat_not_valid: logger.warning("Invalid features present (found in a query but not in the GTF): %s", ','.join(feat_not_valid)) @@ -200,7 +201,7 @@ summ_dict["gtf"] = annot_gtf summ_dict["bed"] = peaks_bed - with open("summary_config.json", "w") as fj: + with open(outdir+"summary_config.json", "w") as fj: json.dump(summ_dict, fj, indent=4) distances = reduce(list.__add__, map(lambda q: q["distance"], queries)) @@ -226,9 +227,9 @@ # Table-columns for All queries query_attributes_none = list([q["show.attributes"] for q in queries]) query_attributes = [a for a in np.unique(list(chain.from_iterable(query_attributes_none))) if a is not None and a != 'None'] - + logger.info("Additional attributes included in output: %s", ','.join(query_attributes)) - + # query included, when there is no hit from tabix nas_len = len(query_attributes) + 10 NAsList = list(np.repeat("NA", nas_len)) @@ -253,7 +254,7 @@ "genomic_location", "feat_ovl_peak", "peak_ovl_feat"] - + header = "\t".join(header_base + query_attributes + ["query"]) outdir_len = len(outdir.split("/")) @@ -321,7 +322,7 @@ allhits_partials = glob.glob(outdir + "AllHits_Partial*") besthits_partials = glob.glob(outdir + "BestHits_Partial*") - + ovls.write_final_file( args.threads, outdir, @@ -441,11 +442,11 @@ logger.warning("Visualized summary output could not be created from %s.", ' '.join(call)) except OSError: logger.warning("Rscript command not available for summary output.") - if os.path.exists(summary_output): - os.remove("summary_config.json") -# Remove all other un-necessary files : + # Remove all other un-necessary files : if outputs_ready: + if os.path.exists(outdir+"summary_config.json"): + os.remove(outdir+"summary_config.json") os.remove(gtf_index) # .gz os.remove(gtf_index + ".tbi") if not args.no_comments: diff --git a/uropa/overlaps.py b/uropa/overlaps.py index cb69ce5..3543b68 100644 --- a/uropa/overlaps.py +++ b/uropa/overlaps.py @@ -222,19 +222,14 @@ def get_distance_by_dir(inputD, genom_loc, intern_loc, Dhit): """Limits the hit by a distance window depending on peak's position relative-to-feature """ [D_upstr, D_downstr] = inputD - # D_upstr = D_downstr when len(input_D) = 1 if genom_loc == "upstream" or genom_loc == "overlapStart": return Dhit <= D_upstr if genom_loc == "downstream" or genom_loc == "overlapEnd": return Dhit <= D_downstr - if any(intern_loc): - best_dist = map(lambda l: Dhit <= D_upstr if l == - "upstream" else Dhit <= D_downstr, intern_loc) - # when 3 positions given: any Dhit inside limit-> hit accepted // if 1 - # given any(True) =True - return any(best_dist) + # Rescue if internal peak + return(any(intern_loc)) def detect_internals(pstart, pend, hit_start, hit_end): @@ -297,8 +292,7 @@ def define_genom_loc(current_loc, pstart, p_center, pend, hit_start, hit_end, hi def overlap_peak_feature(genom_loc, pstart, p_center, pend, peakL, featL, hit_start, hit_end, hit_strand): """Gives the collective information of the location and overlap of a peak to a feature """ - -# A/ Find internal features/peaks + # A/ Find internal features/peaks # hit_start, hit_end : as given in gtf/no inversion for '-'strand. genom_loc = detect_internals(pstart, pend, hit_start, hit_end) # B/ Calculate overlap Ratio peak/feat and feat/peak