3.2.0: show_attributes=all will show all available attributes in output

loosolab · Apr 17, 2019 · 9a7c46e · 9a7c46e
1 parent bb56d53
commit 9a7c46e
Show file tree

Hide file tree

Showing 4 changed files with 27 additions and 16 deletions.
diff --git a/CHANGES b/CHANGES
@@ -1,3 +1,6 @@
+## 3.2.0 (2019-04-17)
+- added functionality to view all available gtf-attributes in output files. Set by show_attributes = all.
+
 ## 3.1.4 (2019-04-13)
 - fixed error due to insufficient handling of "chr"-prefix of both bed and gtf
 

diff --git a/uropa/__init__.py b/uropa/__init__.py
@@ -1 +1 @@
-__version__ = "3.1.4"
+__version__ = "3.2.0"
diff --git a/uropa/uropa.py b/uropa/uropa.py
@@ -408,27 +408,35 @@ def main():
 
 	logger.info("Processing annotated peaks")
 
-	#Add attribute columns to output
-	#The keys are different internally vs. the output columns
-	attribute_columns = cfg_dict.get("show_attributes", [])
-	main = ["peak_chr", "peak_start", "peak_end", "peak_id", "peak_score", "peak_strand", "feature", "feat_start", "feat_end", "feat_strand", "feat_anchor", "distance", "relative_location", "feat_ovl_peak", "peak_ovl_feat"]
-	header_internal = main + ["attribute_" + col for col in attribute_columns]  + ["query_name"]
-	header_output = main + attribute_columns + ["name"]
-
-	logger.debug("Adding attribute columns")
-	for annotation in all_annotations:
-		attributes_dict = annotation.get("feat_attributes", {})
-		for key in attribute_columns:
-			annotation["attribute_" + key] = attributes_dict.get(key, "NA")
-
-	#Check if no annotations were found
+	##### Check if no annotations were found #####
 	all_NA = 0
 	for anno in all_annotations:
 		if "feature" in anno:
 			all_NA = 1
 	if all_NA == 0:	#This is 0 coming out of the loop if no features were found
 		logger.warning("No annotations were found for input regions (all hits are NA). If this is unexpected, please check the configuration of your input queries.")
 
+	#Add attribute columns to output
+	logger.debug("Adding attribute columns")
+	all_possible_attributes = {}
+	for annotation in all_annotations:
+		attributes_dict = annotation.get("feat_attributes", {})
+		for key in attributes_dict:
+			annotation["attribute_" + key] = attributes_dict[key]
+			all_possible_attributes[key] = ""
+
+	#Set output attribute columns
+	attribute_columns = cfg_dict.get("show_attributes", [])
+
+	#If "all" was set in show_attributes, set attributes_columns to total set of attributes
+	if "all" in [str(att).lower() for att in attribute_columns]:
+		attribute_columns = sorted(list(all_possible_attributes.keys()))
+		logger.info("Config key show_attributes was set to \'all\'. All possible attributes are shown in output ({0})".format(attribute_columns))
+
+	#Set output columns (the keys are different internally vs. the output columns)
+	main = ["peak_chr", "peak_start", "peak_end", "peak_id", "peak_score", "peak_strand", "feature", "feat_start", "feat_end", "feat_strand", "feat_anchor", "distance", "relative_location", "feat_ovl_peak", "peak_ovl_feat"]
+	header_internal = main + ["attribute_" + col for col in attribute_columns]  + ["query_name"]
+	header_output = main + attribute_columns + ["name"]
 
 	##### Write output files #####
 	logger.info("Writing output files")

diff --git a/uropa/utils.py b/uropa/utils.py
@@ -290,7 +290,7 @@ def parse_bedfile(bedfile, gtf_has_chr):
 				gtf_chr = "chr" + chrom if not chrom.startswith("chr") else chrom		#add chr to match gtf if needed
 			else:
 				gtf_chr = chrom.replace("chr", "") #gtf chrom should not have chr-prefix
-
+			
 			peak_dict = {"gtf_chr": gtf_chr, "peak_chr":chrom, "peak_start":start, "peak_end":end, "peak_id":name, "peak_score":score, "peak_strand":strand, "internal_peak_id": i+1}
 			peak_dict.update(dict(zip(additional_header, additional)))
 			peaks.append(peak_dict)