diff --git a/bin/1.1_footprint_extraction/footprints_extraction.py b/bin/1.1_footprint_extraction/footprints_extraction.py index b340dc7..f9f5321 100644 --- a/bin/1.1_footprint_extraction/footprints_extraction.py +++ b/bin/1.1_footprint_extraction/footprints_extraction.py @@ -285,8 +285,7 @@ def check_and_merge(peak_footprints, max_bp_between): peak_footprints_new = {} merged_footprints = {} - #print(len(peak_footprints)) - + #we need to check each footprint within this peak with the other footprints for possible merging for footprint_to_check in peak_footprints.keys(): start_to_check = peak_footprints[footprint_to_check]['start'] end_to_check = peak_footprints[footprint_to_check]['end'] @@ -296,142 +295,93 @@ def check_and_merge(peak_footprints, max_bp_between): for compared_footprint in peak_footprints.keys(): if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < max_bp_between: - #make compared_footprint longer - #print() - #print("footprint_to_check", footprint_to_check) - #print(compared_footprint, " + ", footprint_to_check) + #make compared_footprint longer: compared_footprint + footprint_to_check merge_footprints_left = False break elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < max_bp_between: - #make footprint_to_check longer - #print() - #print("footprint_to_check", footprint_to_check) - #print(footprint_to_check, " + ", compared_footprint) + #make footprint_to_check longer: footprint_to_check + compared footprint merge_footprints_left = True break if merge_footprints_left: #the left merging is enabled, start and end of compared_footprint should be smaller than the start of the footprint_to_check - #print("entered left merging") - #print(start_to_check, end_to_check) - #print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end']) if start_to_check < peak_footprints[compared_footprint]['start']: if footprint_to_check not in peak_footprints_new.keys(): - #print("left, footprint to check ", footprint_to_check, " not in peak_footprints_new") - #even if it is not in keys, it could be merged already, so check first, if there are some footprints merged with this one - #print(merged_footprints) if any(footprint_to_check in merged_footprints[x] for x in merged_footprints.keys()): #true if footprint_to_check was already merged with someone - #print("footprint_to_check ", footprint_to_check, " was a part of some merging") for k, v in merged_footprints.items(): if footprint_to_check in v: main_footprint = k - #print("make merging using the information from the merged_footprints and peak_footprints_new") + #make merging using the information from the merged_footprints and peak_footprints_new #UPDATE - #print("update") - #print(peak_footprints_new[main_footprint]) peak_footprints_new[main_footprint] = footprint_update(peak_footprints_new[main_footprint], peak_footprints[compared_footprint]['start'], peak_footprints[main_footprint]['end'], peak_footprints[compared_footprint]['score']) - #print(peak_footprints_new[main_footprint]) merged_array = merged_footprints[main_footprint] merged_array.append(compared_footprint) merged_footprints[main_footprint] = merged_array - #print(merged_footprints) #there are no merged footprints with the footprint_to_check yet, so make a new one else: - #print("make normal update, using data from peak_footprints") + #add the compared footprint and footprint_to_check to the merged_footprints merged_footprints[footprint_to_check] = merged_footprints.get(footprint_to_check, []) merged_footprints[footprint_to_check] = [compared_footprint] - #print(merged_footprints) - #print("saving ", footprint_to_check) peak_footprints_new[footprint_to_check] = peak_footprints.get(footprint_to_check, {}) peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check] #<-- update #UPDATE - #print("update") - #print(peak_footprints_new[footprint_to_check]) peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score']) - #print(peak_footprints_new[footprint_to_check]) else: #the footprint_to_check is in peak_footprints_new already #the footprint_to_check can only be the main part of merging before, check it if footprint_to_check in merged_footprints.keys(): - #print("footprint_to_check ", footprint_to_check, " was as main for merging already") + #footprint_to_check was as main for merging already #UPDATE - #print("update") - #print(peak_footprints_new[footprint_to_check]) peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score']) - #print(peak_footprints_new[footprint_to_check]) - #merged_footprints[footprint_to_check].append(compared_footprint) + #add it to the merged_footprints as well merged_array = merged_footprints[footprint_to_check] merged_array.append(compared_footprint) merged_footprints[footprint_to_check] = merged_array - #print(merged_footprints) - else: - #print("merge now and add footprint_to_check to the merged_footprints") + #the footprint_to check was not merged with anything yet merged_footprints[footprint_to_check] = merged_footprints.get(footprint_to_check, []) merged_footprints[footprint_to_check] = [compared_footprint] #UPDATE - #print("update") - #print(peak_footprints_new[footprint_to_check]) peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score']) - #print(peak_footprints_new[footprint_to_check]) - elif merge_footprints_left == False: #the right merging is enabled, start and end of compared footprint should be bigger than the start of the footprint_to_check - #print("entered right merging") - #print(start_to_check, end_to_check) - #print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end']) + #the right merging is enabled, start and end of compared footprint should be bigger than the start of the footprint_to_check + elif merge_footprints_left == False: if end_to_check > peak_footprints[compared_footprint]['end']: - if compared_footprint not in peak_footprints_new.keys(): - #print("right, compared_footprint ", compared_footprint, " not in peak_footprints_new") + if compared_footprint not in peak_footprints_new.keys(): if any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()): - #print("compared_footprint ", compared_footprint, " was a part of some merging") for k, v in merged_footprints.items(): if compared_footprint in v: main_footprint = k - #print("make merging using the information from the merged_footprints and peak_footprints_new") + #make merging using the information from the merged_footprints and peak_footprints_new #UPDATE - #print("update") - #print(merged_footprints) peak_footprints_new[main_footprint] = footprint_update(peak_footprints_new[main_footprint], peak_footprints[main_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) - #print(merged_footprints) - #merged_footprints[main_footprint] = merged_footprints[main_footprint].append(footprint_to_check) + #add to the merged_footprints merged_array = merged_footprints[main_footprint] merged_array.append(footprint_to_check) merged_footprints[main_footprint] = merged_array else: - #print("make normal update, using data from peak footprints") + #"make normal update, using data from peak footprints merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, []) merged_footprints[compared_footprint] = [footprint_to_check] - #print(merged_footprints) - #print("saving ", compared_footprint) peak_footprints_new[compared_footprint] = peak_footprints.get(compared_footprint, {}) peak_footprints_new[compared_footprint] = peak_footprints[compared_footprint] #UPDATE - #print("update") - #print(peak_footprints_new[compared_footprint]) peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) - #print(peak_footprints_new[compared_footprint]) else: if compared_footprint in merged_footprints.keys(): - #print("compared_footprint ", compared_footprint, " was as main for merging already") + #compared_footprint was as main for merging already #UPDATE - #print("update") - #print(peak_footprints_new[compared_footprint]) peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) - #print(peak_footprints_new[compared_footprint]) - #merged_footprints[compared_footprint] = merged_footprints[compared_footprint].append(footprint_to_check) + merged_array = merged_footprints[compared_footprint] merged_array.append(footprint_to_check) merged_footprints[compared_footprint] = merged_array - #print(merged_footprints) + else: - #print("merge now and add compared_footprint to the merged_footprints") + #merge now and add compared_footprint to the merged_footprints merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, []) merged_footprints[compared_footprint] = [compared_footprint] - #print(merged_footprints) #UPDATE - #print("update") - #print(peak_footprints_new[compared_footprint]) peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) - #print(peak_footprints_new[compared_footprint]) else: #save the current footprint, as it should not be merged peak_footprints_new[footprint_to_check] = peak_footprints_new.get(footprint_to_check, [])