From 31315e697fa4051240e87bd1bf4592641192140d Mon Sep 17 00:00:00 2001 From: anastasiia Date: Wed, 9 Jan 2019 15:39:45 +0100 Subject: [PATCH] the structure is ready, this need to be adjusted to the data the footprint_extraction parses --- .../footprints_extraction.py | 116 +++++++++++++++--- 1 file changed, 96 insertions(+), 20 deletions(-) diff --git a/bin/1.1_footprint_extraction/footprints_extraction.py b/bin/1.1_footprint_extraction/footprints_extraction.py index d1b0f8f..36a2342 100644 --- a/bin/1.1_footprint_extraction/footprints_extraction.py +++ b/bin/1.1_footprint_extraction/footprints_extraction.py @@ -280,6 +280,9 @@ def search_in_window(peak_footprints, footprint_count, chromosom, peak_start, pe #the output is the renewed dictionary containing only the best footprints for the output file def check_and_merge(peak_footprints, max_bp_between): peak_footprints_new = {} + merged_footprints = {} + + print(len(peak_footprints)) for footprint_to_check in peak_footprints.keys(): start_to_check = peak_footprints[footprint_to_check]['start'] @@ -291,38 +294,111 @@ def check_and_merge(peak_footprints, max_bp_between): if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < max_bp_between: #make compared_footprint longer + print() + print("footprint_to_check", footprint_to_check) + print(compared_footprint, " + ", footprint_to_check) merge_footprints_left = False break elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < max_bp_between: #make footprint_to_check longer + print() + print("footprint_to_check", footprint_to_check) + print(footprint_to_check, " + ", compared_footprint) merge_footprints_left = True break - if merge_footprints_left: #if the merging left is enabled - #check if this footprint can be merged with the compared_footprint - #if compared footprint is not in peak_footprint_new.keys(), the next loop will check for this footprint. There is no need for doulbe check now - if start_to_check < peak_footprints[compared_footprint]['start'] and compared_footprint in peak_footprints_new.keys(): - #update the start position - peak_footprints_new[compared_footprint]['start'] = start_to_check - #update the length - peak_footprints_new[compared_footprint]['len'] = peak_footprints[compared_footprint]['end'] - start_to_check - #update the score - peak_footprints_new[compared_footprint]['score'] = (peak_footprints[footprint_to_check]['score'] + peak_footprints[compared_footprint]['score']) / 2 - - elif merge_footprints_left == False: #otherwise merge right - #check if the merging is possible - if end_to_check > peak_footprints[compared_footprint]['end'] and compared_footprint in peak_footprints_new.keys(): - #update the end position - peak_footprints_new[compared_footprint]['end'] = end_to_check - #update the length - peak_footprints_new[compared_footprint]['len'] = end_to_check - peak_footprints[compared_footprint]['end'] - #update the score - peak_footprints_new[compared_footprint]['score'] = (peak_footprints[footprint_to_check]['score'] + peak_footprints[compared_footprint]['score']) / 2 + if merge_footprints_left: #the left merging is enabled, start and end of compared_footprint should be smaller than the start of the footprint_to_check + print("entered left merging") + print(start_to_check, end_to_check) + print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end']) + if start_to_check < peak_footprints[compared_footprint]['start']: + if footprint_to_check not in peak_footprints_new.keys(): + print("left, footprint to check ", footprint_to_check, " not in peak_footprints_new") + #UPDATE + if any(footprint_to_check in merged_footprints[x] for x in merged_footprints.keys()): #true if footprint_to_check was already merged with someone + #print("footprint_to_check ", footprint_to_check, " was a part of some merging") + for k, v in merged_footprints.items(): + if footprint_to_check in v: + main_footprint = k + print("make merging using the information from the merged_footprints and peak_footprints_new") + #UPDATE + else: + print("make normal update, using data from peak_footprints") + merged_footprints[footprint_to_check] = merged_footprints.get(footprint_to_check, []) + merged_footprints[footprint_to_check] = [compared_footprint] + print(merged_footprints) + + print("saving ", footprint_to_check) + peak_footprints_new[footprint_to_check] = peak_footprints.get(footprint_to_check, {}) + peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check] #<-- update + #UPDATE + + else: #the footprint_to_check is in peak_footprints_new already + if footprint_to_check in merged_footprints.keys(): + print("footprint_to_check ", footprint_to_check, " was as main for merging already") + #UPDATE + elif any(footprint_to_check in merged_footprints[x] for x in merged_footprints.keys()): + print("footprint_to_check ", footprint_to_check, " was a part of some merging") + for k, v in merged_footprints.items(): + if footprint_to_check in v: + main_footprint = k + #UPDATE + else: + print("merge now and add footprint_to_check to the merged_footprints") + merged_footprints[footprint_to_check] = merged_footprints.get(footprint_to_check, []) + merged_footprints[footprint_to_check] = [compared_footprint] + #UPDATE + + elif merge_footprints_left == False: #the right merging is enabled, start and end of compared footprint should be bigger than the start of the footprint_to_check + print("entered right merging") + print(start_to_check, end_to_check) + print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end']) + if end_to_check > peak_footprints[compared_footprint]['end']: + if compared_footprint not in peak_footprints_new.keys(): + print("right, compared_footprint ", compared_footprint, " not in peak_footprints_new") + if any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()): + print("compared_footprint ", compared_footprint, " was a part of some merging") + for k, v in merged_footprints.items(): + if compared_footprint in v: + main_footprint = k + print("make merging using the information from the merged_footprints and peak_footprints_new") + #UPDATE + else: + print("make normal update, using data from peak footprints") + merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, []) + merged_footprints[compared_footprint] = [footprint_to_check] + print(merged_footprints) + + print("saving ", compared_footprint) + peak_footprints_new[compared_footprint] = peak_footprints.get(compared_footprint, {}) + peak_footprints_new[compared_footprint] = peak_footprints[compared_footprint] + #UPDATE + else: + if compared_footprint in merged_footprints.keys(): + print("compared_footprint ", compared_footprint, " was as main for merging already") + #UPDATE + elif any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()): + #print("compared_footprint ", compared_footprint, " was a part of some merging") + for k, v in merged_footprints.items(): + if compared_footprint in v: + main_footprint = k + #UPDATE + else: + print("merge now and add compared_footprint to the merged_footprints") + merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, []) + merged_footprints[compared_footprint] = [compared_footprint] + print(merged_footprints) + #UPDATE + else: #save the current footprint, as it should not be merged peak_footprints_new[footprint_to_check] = peak_footprints_new.get(footprint_to_check, []) peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check] + print(len(peak_footprints_new)) + for footprint in peak_footprints_new: + print(footprint) + sys.exit() return peak_footprints_new #this function uses the information provided from the .bed file to look for footprints within the peaks of interest