From a9331ac17ba1ebbf4ebb27e5bc5a5f3d045b3d56 Mon Sep 17 00:00:00 2001
From: anastasiia <anastasiia.petrova@mpi-bn.mpg.de>
Date: Sat, 12 Jan 2019 16:08:36 +0100
Subject: [PATCH] making the list out of the dictionary in the merge function
 to ensure the sorting before check for merging

---
 .../footprints_extraction.py                  | 32 +++++++++++++++----
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/bin/1.1_footprint_extraction/footprints_extraction.py b/bin/1.1_footprint_extraction/footprints_extraction.py
index f9f5321..044be21 100644
--- a/bin/1.1_footprint_extraction/footprints_extraction.py
+++ b/bin/1.1_footprint_extraction/footprints_extraction.py
@@ -118,7 +118,7 @@ def make_bed_dictionary(bed_file):
 #an array containing scores (signals) from the bigwig file; the footprints already saved for the particular peak; information for the bed file:
 #chromosom, start and end position, as well as additional information from the original bed file
 #the function returns the count for footprints, as well as footprints for the current peak
-def save_footprint(footprint_count, footprint_scores, peak_footprints, chromosom, footprint_start, footprint_end, bonus_info_from_bed):
+def save_footprint(footprint_count, footprint_scores, peak_footprints, chromosom, footprint_start, footprint_end, bonus_info_from_bed): 
 
 	save_current_footprint = False
 
@@ -282,11 +282,21 @@ def search_in_window(peak_footprints, footprint_count, chromosom, peak_start, pe
 #the input parameter are: dictionary with footprints within one peak, and the max number of bp allowed to be in between the footprints
 #the output is the renewed dictionary containing only the best footprints for the output file
 def check_and_merge(peak_footprints, max_bp_between):
+	#to ensure the merging works well, sort the footprints first arter start and end positions
+	#the sort can not be applied to a dictionary, we are making a list out of peak_footprints_dict
+	peak_footprints_list = sorted(peak_footprints.items(), key = lambda x : (x[1]['start'], x[1]['end']), reverse = False)
+
 	peak_footprints_new = {}
 	merged_footprints = {}
 
+	for footprint in peak_footprints_list:
+		print(footprint[0]) #name of footprint
+		print(footprint[1]['start']) #the features of saved footprint
+
 	#we need to check each footprint within this peak with the other footprints for possible merging
-	for footprint_to_check in peak_footprints.keys():
+	#for footprint_to_check in peak_footprints.keys():
+	for footprint in peak_footprints_list:
+		footprint_to_check = footprint[0] #save the name of the footprint which we are working with now
 		start_to_check = peak_footprints[footprint_to_check]['start']
 		end_to_check = peak_footprints[footprint_to_check]['end']
 
@@ -296,6 +306,9 @@ def check_and_merge(peak_footprints, max_bp_between):
 
 			if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < max_bp_between:
 				#make compared_footprint longer: compared_footprint + footprint_to_check
+				print()
+				print(start_to_check, peak_footprints[compared_footprint]['end'])
+				print("merge right", footprint_to_check, compared_footprint)
 				merge_footprints_left = False
 				break
 			elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < max_bp_between:
@@ -344,12 +357,16 @@ def check_and_merge(peak_footprints, max_bp_between):
 						peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score'])
 		#the right merging is enabled, start and end of compared footprint should be bigger than the start of the footprint_to_check
 		elif merge_footprints_left == False: 
+			print("entered right merging")
 			if end_to_check > peak_footprints[compared_footprint]['end']:
-				if compared_footprint not in peak_footprints_new.keys():					
+				print(end_to_check, peak_footprints[compared_footprint]['end'])
+				if compared_footprint not in peak_footprints_new.keys():
+					print("compared_footprint", compared_footprint, "not in peak_footprints_new.keys")					
 					if any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()):
 						for k, v in merged_footprints.items():
 							if compared_footprint in v:
 								main_footprint = k
+						print(main_footprint)
 						#make merging using the information from the merged_footprints and peak_footprints_new
 						#UPDATE
 						peak_footprints_new[main_footprint] = footprint_update(peak_footprints_new[main_footprint], peak_footprints[main_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score'])
@@ -358,7 +375,9 @@ def check_and_merge(peak_footprints, max_bp_between):
 						merged_array.append(footprint_to_check)
 						merged_footprints[main_footprint] = merged_array
 					else:
-						#"make normal update, using data from peak footprints
+						print("make normal update")
+						print("save", compared_footprint)
+						#make normal update, using data from peak footprints
 						merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, [])
 						merged_footprints[compared_footprint] = [footprint_to_check]
 
@@ -366,6 +385,7 @@ def check_and_merge(peak_footprints, max_bp_between):
 						peak_footprints_new[compared_footprint] = peak_footprints[compared_footprint]
 						#UPDATE
 						peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score'])
+						print(peak_footprints_new[compared_footprint])
 				else:
 					if compared_footprint in merged_footprints.keys():
 						#compared_footprint was as main for merging already
@@ -388,8 +408,8 @@ def check_and_merge(peak_footprints, max_bp_between):
 			peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check]
 
 	#print(len(peak_footprints_new))
-	#for footprint in peak_footprints_new:
-	#	print(footprint)
+	for footprint in peak_footprints_new:
+		print(footprint, peak_footprints_new[footprint])
 	#sys.exit()
 	return peak_footprints_new