Skip to content
This repository has been archived by the owner. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
3.2.0: show_attributes=all will show all available attributes in output
  • Loading branch information
msbentsen committed Apr 17, 2019
1 parent bb56d53 commit 9a7c46e
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 16 deletions.
3 changes: 3 additions & 0 deletions CHANGES
@@ -1,3 +1,6 @@
## 3.2.0 (2019-04-17)
- added functionality to view all available gtf-attributes in output files. Set by show_attributes = all.

## 3.1.4 (2019-04-13)
- fixed error due to insufficient handling of "chr"-prefix of both bed and gtf

Expand Down
2 changes: 1 addition & 1 deletion uropa/__init__.py
@@ -1 +1 @@
__version__ = "3.1.4"
__version__ = "3.2.0"
36 changes: 22 additions & 14 deletions uropa/uropa.py
Expand Up @@ -408,27 +408,35 @@ def main():

logger.info("Processing annotated peaks")

#Add attribute columns to output
#The keys are different internally vs. the output columns
attribute_columns = cfg_dict.get("show_attributes", [])
main = ["peak_chr", "peak_start", "peak_end", "peak_id", "peak_score", "peak_strand", "feature", "feat_start", "feat_end", "feat_strand", "feat_anchor", "distance", "relative_location", "feat_ovl_peak", "peak_ovl_feat"]
header_internal = main + ["attribute_" + col for col in attribute_columns] + ["query_name"]
header_output = main + attribute_columns + ["name"]

logger.debug("Adding attribute columns")
for annotation in all_annotations:
attributes_dict = annotation.get("feat_attributes", {})
for key in attribute_columns:
annotation["attribute_" + key] = attributes_dict.get(key, "NA")

#Check if no annotations were found
##### Check if no annotations were found #####
all_NA = 0
for anno in all_annotations:
if "feature" in anno:
all_NA = 1
if all_NA == 0: #This is 0 coming out of the loop if no features were found
logger.warning("No annotations were found for input regions (all hits are NA). If this is unexpected, please check the configuration of your input queries.")

#Add attribute columns to output
logger.debug("Adding attribute columns")
all_possible_attributes = {}
for annotation in all_annotations:
attributes_dict = annotation.get("feat_attributes", {})
for key in attributes_dict:
annotation["attribute_" + key] = attributes_dict[key]
all_possible_attributes[key] = ""

#Set output attribute columns
attribute_columns = cfg_dict.get("show_attributes", [])

#If "all" was set in show_attributes, set attributes_columns to total set of attributes
if "all" in [str(att).lower() for att in attribute_columns]:
attribute_columns = sorted(list(all_possible_attributes.keys()))
logger.info("Config key show_attributes was set to \'all\'. All possible attributes are shown in output ({0})".format(attribute_columns))

#Set output columns (the keys are different internally vs. the output columns)
main = ["peak_chr", "peak_start", "peak_end", "peak_id", "peak_score", "peak_strand", "feature", "feat_start", "feat_end", "feat_strand", "feat_anchor", "distance", "relative_location", "feat_ovl_peak", "peak_ovl_feat"]
header_internal = main + ["attribute_" + col for col in attribute_columns] + ["query_name"]
header_output = main + attribute_columns + ["name"]

##### Write output files #####
logger.info("Writing output files")
Expand Down
2 changes: 1 addition & 1 deletion uropa/utils.py
Expand Up @@ -290,7 +290,7 @@ def parse_bedfile(bedfile, gtf_has_chr):
gtf_chr = "chr" + chrom if not chrom.startswith("chr") else chrom #add chr to match gtf if needed
else:
gtf_chr = chrom.replace("chr", "") #gtf chrom should not have chr-prefix

peak_dict = {"gtf_chr": gtf_chr, "peak_chr":chrom, "peak_start":start, "peak_end":end, "peak_id":name, "peak_score":score, "peak_strand":strand, "internal_peak_id": i+1}
peak_dict.update(dict(zip(additional_header, additional)))
peaks.append(peak_dict)
Expand Down

0 comments on commit 9a7c46e

Please sign in to comment.