From 64189fd12d90a056360917d5e60aa51b89c2f64c Mon Sep 17 00:00:00 2001 From: afust Date: Thu, 10 Jan 2019 15:18:01 +0100 Subject: [PATCH] multi feature bug fixed --- uropa/annotation.py | 7 ++++--- uropa/overlaps.py | 4 +++- uropa/uropa.py | 5 +++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/uropa/annotation.py b/uropa/annotation.py index feb15bf..01c3532 100644 --- a/uropa/annotation.py +++ b/uropa/annotation.py @@ -50,6 +50,7 @@ def annotation_process(input_args, peak_file, log=None): # 'name'] is not None else peak['id'] # Re-initialise table records for each peak for q in enumerate(queries): + All_hits_tab[q[0]][peak['id']] = "" Best_hits_tab[q[0]][peak['id']] = "" @@ -99,6 +100,7 @@ def annotation_process(input_args, peak_file, log=None): # queries valid_dist = any([dist_from_peak <= int( max(q["distance"])) for q in queries]) + # Keep hit if internals are required in any query i_want_internals = any([q["internals"] in [ ['T'], ['True'], ['TRUE'], ['Yes'], ['YES'], ['Y'], ['yes'], ['F'], ['False'], ['FALSE'], ['No'], ['NO'], ['N'], ['no']] for q in queries]) @@ -116,13 +118,12 @@ def annotation_process(input_args, peak_file, log=None): # Find hits with valid values for the queries (Search all # queries ,Not only PRIORITY) v_fsa = [ovls.valid_fsa(h, hit, q, peak['strand']) for q in queries] - # Pair the Valid query values(fsb) with valid_distance and # valid strand for each query vsd = [[x, valid_dist] for x in v_fsa] valid_queries = [i for i, v in enumerate(vsd) if all(v)] - hitj = "\t".join(hit) + # Create dictionary of hit with its valid query per peak if valid_queries: has_hits.append(True) @@ -133,7 +134,7 @@ def annotation_process(input_args, peak_file, log=None): has_hits.append(False) # ----- All hits parsed. Check only hits with valid-queries now ----- # - #log.debug("\n---All hits parsed for the peak.Totally, the Peak has {} hits from which {} are Valid ---".format( len(has_hits), has_hits.count(True) )) + # log.debug("\n---All hits parsed for the peak. Totally, the Peak has {} hits from which {} are Valid ---".format( len(has_hits), has_hits.count(True) )) # 9 cols= feat, f.start, f.end, f.strand, dist, min_pos, genom_loc, # feat-to-peak-ovl , peak-to-feat-ovl (no Q) nas_len = len(attrib_k) + 9 diff --git a/uropa/overlaps.py b/uropa/overlaps.py index 7cae930..3f65d63 100644 --- a/uropa/overlaps.py +++ b/uropa/overlaps.py @@ -61,7 +61,9 @@ def tabix_index(annot_gtf): def valid_fsa(h, hit, q, pstrand): """Returns if a hit h is valid for query q in their basic common keys.""" - vf = (h["feature"] == q["feature"][0]) + # not just the first feature should be hit but any! + #vf = (h["feature"] == q["feature"][0]) + vf = (h["feature"] in q["feature"]) v_str = valid_strand(h["strand"], pstrand, q["strand"][0]) va = valid_attribute(q["filter.attribute"][0], q["attribute.value"][0], hit) return all([vf, v_str, va]) diff --git a/uropa/uropa.py b/uropa/uropa.py index 2a21b33..4c29e58 100644 --- a/uropa/uropa.py +++ b/uropa/uropa.py @@ -95,7 +95,7 @@ def main(): "--version", help="prints the version and exits", action="version", - version="%(prog)s 2.0.2") + version="%(prog)s 2.0.3") args = parser.parse_args() config = args.input @@ -173,7 +173,7 @@ def main(): logger.warning("File %s is not a proper GTF file!", annot_gtf) gtf_feat = cfg.column_from_file(annot_gtf, 3, logger) - + if len(gtf_feat) < 1: logger.error("No features found in file {} for annotation.".format(annot_gtf)) sys.exit() @@ -207,6 +207,7 @@ def main(): if len(gtf_feat) > 1: gtf_cut_file = cfg.cut_gtf_perFeat(annot_gtf, feat_valid, outdir) mygtf = gtf_cut_file + else: mygtf = annot_gtf