diff --git a/bin/1.1_footprint_extraction/footprints_extraction.py b/bin/1.1_footprint_extraction/footprints_extraction.py index f9f5321..044be21 100644 --- a/bin/1.1_footprint_extraction/footprints_extraction.py +++ b/bin/1.1_footprint_extraction/footprints_extraction.py @@ -118,7 +118,7 @@ def make_bed_dictionary(bed_file): #an array containing scores (signals) from the bigwig file; the footprints already saved for the particular peak; information for the bed file: #chromosom, start and end position, as well as additional information from the original bed file #the function returns the count for footprints, as well as footprints for the current peak -def save_footprint(footprint_count, footprint_scores, peak_footprints, chromosom, footprint_start, footprint_end, bonus_info_from_bed): +def save_footprint(footprint_count, footprint_scores, peak_footprints, chromosom, footprint_start, footprint_end, bonus_info_from_bed): save_current_footprint = False @@ -282,11 +282,21 @@ def search_in_window(peak_footprints, footprint_count, chromosom, peak_start, pe #the input parameter are: dictionary with footprints within one peak, and the max number of bp allowed to be in between the footprints #the output is the renewed dictionary containing only the best footprints for the output file def check_and_merge(peak_footprints, max_bp_between): + #to ensure the merging works well, sort the footprints first arter start and end positions + #the sort can not be applied to a dictionary, we are making a list out of peak_footprints_dict + peak_footprints_list = sorted(peak_footprints.items(), key = lambda x : (x[1]['start'], x[1]['end']), reverse = False) + peak_footprints_new = {} merged_footprints = {} + for footprint in peak_footprints_list: + print(footprint[0]) #name of footprint + print(footprint[1]['start']) #the features of saved footprint + #we need to check each footprint within this peak with the other footprints for possible merging - for footprint_to_check in peak_footprints.keys(): + #for footprint_to_check in peak_footprints.keys(): + for footprint in peak_footprints_list: + footprint_to_check = footprint[0] #save the name of the footprint which we are working with now start_to_check = peak_footprints[footprint_to_check]['start'] end_to_check = peak_footprints[footprint_to_check]['end'] @@ -296,6 +306,9 @@ def check_and_merge(peak_footprints, max_bp_between): if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < max_bp_between: #make compared_footprint longer: compared_footprint + footprint_to_check + print() + print(start_to_check, peak_footprints[compared_footprint]['end']) + print("merge right", footprint_to_check, compared_footprint) merge_footprints_left = False break elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < max_bp_between: @@ -344,12 +357,16 @@ def check_and_merge(peak_footprints, max_bp_between): peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score']) #the right merging is enabled, start and end of compared footprint should be bigger than the start of the footprint_to_check elif merge_footprints_left == False: + print("entered right merging") if end_to_check > peak_footprints[compared_footprint]['end']: - if compared_footprint not in peak_footprints_new.keys(): + print(end_to_check, peak_footprints[compared_footprint]['end']) + if compared_footprint not in peak_footprints_new.keys(): + print("compared_footprint", compared_footprint, "not in peak_footprints_new.keys") if any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()): for k, v in merged_footprints.items(): if compared_footprint in v: main_footprint = k + print(main_footprint) #make merging using the information from the merged_footprints and peak_footprints_new #UPDATE peak_footprints_new[main_footprint] = footprint_update(peak_footprints_new[main_footprint], peak_footprints[main_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) @@ -358,7 +375,9 @@ def check_and_merge(peak_footprints, max_bp_between): merged_array.append(footprint_to_check) merged_footprints[main_footprint] = merged_array else: - #"make normal update, using data from peak footprints + print("make normal update") + print("save", compared_footprint) + #make normal update, using data from peak footprints merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, []) merged_footprints[compared_footprint] = [footprint_to_check] @@ -366,6 +385,7 @@ def check_and_merge(peak_footprints, max_bp_between): peak_footprints_new[compared_footprint] = peak_footprints[compared_footprint] #UPDATE peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) + print(peak_footprints_new[compared_footprint]) else: if compared_footprint in merged_footprints.keys(): #compared_footprint was as main for merging already @@ -388,8 +408,8 @@ def check_and_merge(peak_footprints, max_bp_between): peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check] #print(len(peak_footprints_new)) - #for footprint in peak_footprints_new: - # print(footprint) + for footprint in peak_footprints_new: + print(footprint, peak_footprints_new[footprint]) #sys.exit() return peak_footprints_new