From 9a7c46e41fc290251e20ac34f920914099f06da5 Mon Sep 17 00:00:00 2001 From: msbentsen Date: Wed, 17 Apr 2019 17:43:20 +0200 Subject: [PATCH] 3.2.0: show_attributes=all will show all available attributes in output --- CHANGES | 3 +++ uropa/__init__.py | 2 +- uropa/uropa.py | 36 ++++++++++++++++++++++-------------- uropa/utils.py | 2 +- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/CHANGES b/CHANGES index 653d7c5..6d8ca66 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ +## 3.2.0 (2019-04-17) +- added functionality to view all available gtf-attributes in output files. Set by show_attributes = all. + ## 3.1.4 (2019-04-13) - fixed error due to insufficient handling of "chr"-prefix of both bed and gtf diff --git a/uropa/__init__.py b/uropa/__init__.py index 1fe90f6..1173108 100644 --- a/uropa/__init__.py +++ b/uropa/__init__.py @@ -1 +1 @@ -__version__ = "3.1.4" +__version__ = "3.2.0" diff --git a/uropa/uropa.py b/uropa/uropa.py index c4510ac..4d4140d 100644 --- a/uropa/uropa.py +++ b/uropa/uropa.py @@ -408,20 +408,7 @@ def main(): logger.info("Processing annotated peaks") - #Add attribute columns to output - #The keys are different internally vs. the output columns - attribute_columns = cfg_dict.get("show_attributes", []) - main = ["peak_chr", "peak_start", "peak_end", "peak_id", "peak_score", "peak_strand", "feature", "feat_start", "feat_end", "feat_strand", "feat_anchor", "distance", "relative_location", "feat_ovl_peak", "peak_ovl_feat"] - header_internal = main + ["attribute_" + col for col in attribute_columns] + ["query_name"] - header_output = main + attribute_columns + ["name"] - - logger.debug("Adding attribute columns") - for annotation in all_annotations: - attributes_dict = annotation.get("feat_attributes", {}) - for key in attribute_columns: - annotation["attribute_" + key] = attributes_dict.get(key, "NA") - - #Check if no annotations were found + ##### Check if no annotations were found ##### all_NA = 0 for anno in all_annotations: if "feature" in anno: @@ -429,6 +416,27 @@ def main(): if all_NA == 0: #This is 0 coming out of the loop if no features were found logger.warning("No annotations were found for input regions (all hits are NA). If this is unexpected, please check the configuration of your input queries.") + #Add attribute columns to output + logger.debug("Adding attribute columns") + all_possible_attributes = {} + for annotation in all_annotations: + attributes_dict = annotation.get("feat_attributes", {}) + for key in attributes_dict: + annotation["attribute_" + key] = attributes_dict[key] + all_possible_attributes[key] = "" + + #Set output attribute columns + attribute_columns = cfg_dict.get("show_attributes", []) + + #If "all" was set in show_attributes, set attributes_columns to total set of attributes + if "all" in [str(att).lower() for att in attribute_columns]: + attribute_columns = sorted(list(all_possible_attributes.keys())) + logger.info("Config key show_attributes was set to \'all\'. All possible attributes are shown in output ({0})".format(attribute_columns)) + + #Set output columns (the keys are different internally vs. the output columns) + main = ["peak_chr", "peak_start", "peak_end", "peak_id", "peak_score", "peak_strand", "feature", "feat_start", "feat_end", "feat_strand", "feat_anchor", "distance", "relative_location", "feat_ovl_peak", "peak_ovl_feat"] + header_internal = main + ["attribute_" + col for col in attribute_columns] + ["query_name"] + header_output = main + attribute_columns + ["name"] ##### Write output files ##### logger.info("Writing output files") diff --git a/uropa/utils.py b/uropa/utils.py index f0826d0..7ef6813 100644 --- a/uropa/utils.py +++ b/uropa/utils.py @@ -290,7 +290,7 @@ def parse_bedfile(bedfile, gtf_has_chr): gtf_chr = "chr" + chrom if not chrom.startswith("chr") else chrom #add chr to match gtf if needed else: gtf_chr = chrom.replace("chr", "") #gtf chrom should not have chr-prefix - + peak_dict = {"gtf_chr": gtf_chr, "peak_chr":chrom, "peak_start":start, "peak_end":end, "peak_id":name, "peak_score":score, "peak_strand":strand, "internal_peak_id": i+1} peak_dict.update(dict(zip(additional_header, additional))) peaks.append(peak_dict)