From 31315e697fa4051240e87bd1bf4592641192140d Mon Sep 17 00:00:00 2001 From: anastasiia Date: Wed, 9 Jan 2019 15:39:45 +0100 Subject: [PATCH 1/9] the structure is ready, this need to be adjusted to the data the footprint_extraction parses --- .../footprints_extraction.py | 116 +++++++++++++++--- 1 file changed, 96 insertions(+), 20 deletions(-) diff --git a/bin/1.1_footprint_extraction/footprints_extraction.py b/bin/1.1_footprint_extraction/footprints_extraction.py index d1b0f8f..36a2342 100644 --- a/bin/1.1_footprint_extraction/footprints_extraction.py +++ b/bin/1.1_footprint_extraction/footprints_extraction.py @@ -280,6 +280,9 @@ def search_in_window(peak_footprints, footprint_count, chromosom, peak_start, pe #the output is the renewed dictionary containing only the best footprints for the output file def check_and_merge(peak_footprints, max_bp_between): peak_footprints_new = {} + merged_footprints = {} + + print(len(peak_footprints)) for footprint_to_check in peak_footprints.keys(): start_to_check = peak_footprints[footprint_to_check]['start'] @@ -291,38 +294,111 @@ def check_and_merge(peak_footprints, max_bp_between): if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < max_bp_between: #make compared_footprint longer + print() + print("footprint_to_check", footprint_to_check) + print(compared_footprint, " + ", footprint_to_check) merge_footprints_left = False break elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < max_bp_between: #make footprint_to_check longer + print() + print("footprint_to_check", footprint_to_check) + print(footprint_to_check, " + ", compared_footprint) merge_footprints_left = True break - if merge_footprints_left: #if the merging left is enabled - #check if this footprint can be merged with the compared_footprint - #if compared footprint is not in peak_footprint_new.keys(), the next loop will check for this footprint. There is no need for doulbe check now - if start_to_check < peak_footprints[compared_footprint]['start'] and compared_footprint in peak_footprints_new.keys(): - #update the start position - peak_footprints_new[compared_footprint]['start'] = start_to_check - #update the length - peak_footprints_new[compared_footprint]['len'] = peak_footprints[compared_footprint]['end'] - start_to_check - #update the score - peak_footprints_new[compared_footprint]['score'] = (peak_footprints[footprint_to_check]['score'] + peak_footprints[compared_footprint]['score']) / 2 - - elif merge_footprints_left == False: #otherwise merge right - #check if the merging is possible - if end_to_check > peak_footprints[compared_footprint]['end'] and compared_footprint in peak_footprints_new.keys(): - #update the end position - peak_footprints_new[compared_footprint]['end'] = end_to_check - #update the length - peak_footprints_new[compared_footprint]['len'] = end_to_check - peak_footprints[compared_footprint]['end'] - #update the score - peak_footprints_new[compared_footprint]['score'] = (peak_footprints[footprint_to_check]['score'] + peak_footprints[compared_footprint]['score']) / 2 + if merge_footprints_left: #the left merging is enabled, start and end of compared_footprint should be smaller than the start of the footprint_to_check + print("entered left merging") + print(start_to_check, end_to_check) + print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end']) + if start_to_check < peak_footprints[compared_footprint]['start']: + if footprint_to_check not in peak_footprints_new.keys(): + print("left, footprint to check ", footprint_to_check, " not in peak_footprints_new") + #UPDATE + if any(footprint_to_check in merged_footprints[x] for x in merged_footprints.keys()): #true if footprint_to_check was already merged with someone + #print("footprint_to_check ", footprint_to_check, " was a part of some merging") + for k, v in merged_footprints.items(): + if footprint_to_check in v: + main_footprint = k + print("make merging using the information from the merged_footprints and peak_footprints_new") + #UPDATE + else: + print("make normal update, using data from peak_footprints") + merged_footprints[footprint_to_check] = merged_footprints.get(footprint_to_check, []) + merged_footprints[footprint_to_check] = [compared_footprint] + print(merged_footprints) + + print("saving ", footprint_to_check) + peak_footprints_new[footprint_to_check] = peak_footprints.get(footprint_to_check, {}) + peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check] #<-- update + #UPDATE + + else: #the footprint_to_check is in peak_footprints_new already + if footprint_to_check in merged_footprints.keys(): + print("footprint_to_check ", footprint_to_check, " was as main for merging already") + #UPDATE + elif any(footprint_to_check in merged_footprints[x] for x in merged_footprints.keys()): + print("footprint_to_check ", footprint_to_check, " was a part of some merging") + for k, v in merged_footprints.items(): + if footprint_to_check in v: + main_footprint = k + #UPDATE + else: + print("merge now and add footprint_to_check to the merged_footprints") + merged_footprints[footprint_to_check] = merged_footprints.get(footprint_to_check, []) + merged_footprints[footprint_to_check] = [compared_footprint] + #UPDATE + + elif merge_footprints_left == False: #the right merging is enabled, start and end of compared footprint should be bigger than the start of the footprint_to_check + print("entered right merging") + print(start_to_check, end_to_check) + print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end']) + if end_to_check > peak_footprints[compared_footprint]['end']: + if compared_footprint not in peak_footprints_new.keys(): + print("right, compared_footprint ", compared_footprint, " not in peak_footprints_new") + if any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()): + print("compared_footprint ", compared_footprint, " was a part of some merging") + for k, v in merged_footprints.items(): + if compared_footprint in v: + main_footprint = k + print("make merging using the information from the merged_footprints and peak_footprints_new") + #UPDATE + else: + print("make normal update, using data from peak footprints") + merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, []) + merged_footprints[compared_footprint] = [footprint_to_check] + print(merged_footprints) + + print("saving ", compared_footprint) + peak_footprints_new[compared_footprint] = peak_footprints.get(compared_footprint, {}) + peak_footprints_new[compared_footprint] = peak_footprints[compared_footprint] + #UPDATE + else: + if compared_footprint in merged_footprints.keys(): + print("compared_footprint ", compared_footprint, " was as main for merging already") + #UPDATE + elif any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()): + #print("compared_footprint ", compared_footprint, " was a part of some merging") + for k, v in merged_footprints.items(): + if compared_footprint in v: + main_footprint = k + #UPDATE + else: + print("merge now and add compared_footprint to the merged_footprints") + merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, []) + merged_footprints[compared_footprint] = [compared_footprint] + print(merged_footprints) + #UPDATE + else: #save the current footprint, as it should not be merged peak_footprints_new[footprint_to_check] = peak_footprints_new.get(footprint_to_check, []) peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check] + print(len(peak_footprints_new)) + for footprint in peak_footprints_new: + print(footprint) + sys.exit() return peak_footprints_new #this function uses the information provided from the .bed file to look for footprints within the peaks of interest From d4b1245d1541f6336900309ca46d6ce1352b94b7 Mon Sep 17 00:00:00 2001 From: anastasiia Date: Wed, 9 Jan 2019 22:25:00 +0100 Subject: [PATCH 2/9] fixing the bug with merging. The code will be edited soon to look nice and easy to understand --- .../footprints_extraction.py | 151 ++++++++++++------ 1 file changed, 102 insertions(+), 49 deletions(-) diff --git a/bin/1.1_footprint_extraction/footprints_extraction.py b/bin/1.1_footprint_extraction/footprints_extraction.py index 36a2342..00d90ee 100644 --- a/bin/1.1_footprint_extraction/footprints_extraction.py +++ b/bin/1.1_footprint_extraction/footprints_extraction.py @@ -282,7 +282,7 @@ def check_and_merge(peak_footprints, max_bp_between): peak_footprints_new = {} merged_footprints = {} - print(len(peak_footprints)) + #print(len(peak_footprints)) for footprint_to_check in peak_footprints.keys(): start_to_check = peak_footprints[footprint_to_check]['start'] @@ -294,113 +294,166 @@ def check_and_merge(peak_footprints, max_bp_between): if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < max_bp_between: #make compared_footprint longer - print() - print("footprint_to_check", footprint_to_check) - print(compared_footprint, " + ", footprint_to_check) + #print() + #print("footprint_to_check", footprint_to_check) + #print(compared_footprint, " + ", footprint_to_check) merge_footprints_left = False break elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < max_bp_between: #make footprint_to_check longer - print() - print("footprint_to_check", footprint_to_check) - print(footprint_to_check, " + ", compared_footprint) + #print() + #print("footprint_to_check", footprint_to_check) + #print(footprint_to_check, " + ", compared_footprint) merge_footprints_left = True break if merge_footprints_left: #the left merging is enabled, start and end of compared_footprint should be smaller than the start of the footprint_to_check - print("entered left merging") - print(start_to_check, end_to_check) - print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end']) + #print("entered left merging") + #print(start_to_check, end_to_check) + #print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end']) if start_to_check < peak_footprints[compared_footprint]['start']: if footprint_to_check not in peak_footprints_new.keys(): - print("left, footprint to check ", footprint_to_check, " not in peak_footprints_new") - #UPDATE + #print("left, footprint to check ", footprint_to_check, " not in peak_footprints_new") + #even if it is not in keys, it could be merged already, so check first, if there are some footprints merged with this one + #print(merged_footprints) if any(footprint_to_check in merged_footprints[x] for x in merged_footprints.keys()): #true if footprint_to_check was already merged with someone #print("footprint_to_check ", footprint_to_check, " was a part of some merging") for k, v in merged_footprints.items(): if footprint_to_check in v: main_footprint = k - print("make merging using the information from the merged_footprints and peak_footprints_new") + #print("make merging using the information from the merged_footprints and peak_footprints_new") #UPDATE + #print("update") + #print(peak_footrpints_new[main_footprint]) + peak_footrpints_new[main_footprint] = footprint_update(peak_footprints_new[main_footprint], peak_footprints[compared_footprint]['start'], peak_footprints[main_footprint]['end'], peak_footprints[compared_footprint]['score']) + #print(peak_footrpints_new[main_footprint]) + merged_array = merged_footprints[main_footprint] + merged_array.append(compared_footprint) + merged_footprints[main_footprint] = merged_array + #print(merged_footprints) + #there are no merged footprints with the footprint_to_check yet, so make a new one else: - print("make normal update, using data from peak_footprints") + #print("make normal update, using data from peak_footprints") merged_footprints[footprint_to_check] = merged_footprints.get(footprint_to_check, []) merged_footprints[footprint_to_check] = [compared_footprint] - print(merged_footprints) + #print(merged_footprints) - print("saving ", footprint_to_check) + #print("saving ", footprint_to_check) peak_footprints_new[footprint_to_check] = peak_footprints.get(footprint_to_check, {}) peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check] #<-- update #UPDATE - + #print("update") + #print(peak_footprints_new[footprint_to_check]) + peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score']) + #print(peak_footprints_new[footprint_to_check]) else: #the footprint_to_check is in peak_footprints_new already + #the footprint_to_check can only be the main part of merging before, check it if footprint_to_check in merged_footprints.keys(): - print("footprint_to_check ", footprint_to_check, " was as main for merging already") - #UPDATE - elif any(footprint_to_check in merged_footprints[x] for x in merged_footprints.keys()): - print("footprint_to_check ", footprint_to_check, " was a part of some merging") - for k, v in merged_footprints.items(): - if footprint_to_check in v: - main_footprint = k + #print("footprint_to_check ", footprint_to_check, " was as main for merging already") #UPDATE + #print("update") + #print(peak_footprints_new[footprint_to_check]) + peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score']) + #print(peak_footprints_new[footprint_to_check]) + #merged_footprints[footprint_to_check].append(compared_footprint) + merged_array = merged_footprints[footprint_to_check] + merged_array.append(compared_footprint) + merged_footprints[footprint_to_check] = merged_array + #print(merged_footprints) + else: - print("merge now and add footprint_to_check to the merged_footprints") + #print("merge now and add footprint_to_check to the merged_footprints") merged_footprints[footprint_to_check] = merged_footprints.get(footprint_to_check, []) merged_footprints[footprint_to_check] = [compared_footprint] #UPDATE - + #print("update") + #print(peak_footprints_new[footprint_to_check]) + peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score']) + #print(peak_footprints_new[footprint_to_check]) elif merge_footprints_left == False: #the right merging is enabled, start and end of compared footprint should be bigger than the start of the footprint_to_check - print("entered right merging") - print(start_to_check, end_to_check) - print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end']) + #print("entered right merging") + #print(start_to_check, end_to_check) + #print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end']) if end_to_check > peak_footprints[compared_footprint]['end']: if compared_footprint not in peak_footprints_new.keys(): - print("right, compared_footprint ", compared_footprint, " not in peak_footprints_new") + #print("right, compared_footprint ", compared_footprint, " not in peak_footprints_new") if any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()): - print("compared_footprint ", compared_footprint, " was a part of some merging") + #print("compared_footprint ", compared_footprint, " was a part of some merging") for k, v in merged_footprints.items(): if compared_footprint in v: main_footprint = k - print("make merging using the information from the merged_footprints and peak_footprints_new") + #print("make merging using the information from the merged_footprints and peak_footprints_new") #UPDATE + #print("update") + #print(merged_footprints) + peak_footprints_new[main_footprint] = footprint_update(peak_footprints_new[main_footprint], peak_footprints[main_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) + #print(merged_footprints) + #merged_footprints[main_footprint] = merged_footprints[main_footprint].append(footprint_to_check) + merged_array = merged_footprints[main_footprint] + merged_array.append(footprint_to_check) + merged_footprints[main_footprint] = merged_array else: - print("make normal update, using data from peak footprints") + #print("make normal update, using data from peak footprints") merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, []) merged_footprints[compared_footprint] = [footprint_to_check] - print(merged_footprints) + #print(merged_footprints) - print("saving ", compared_footprint) + #print("saving ", compared_footprint) peak_footprints_new[compared_footprint] = peak_footprints.get(compared_footprint, {}) peak_footprints_new[compared_footprint] = peak_footprints[compared_footprint] #UPDATE + #print("update") + #print(peak_footprints_new[compared_footprint]) + peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) + #print(peak_footprints_new[compared_footprint]) else: if compared_footprint in merged_footprints.keys(): - print("compared_footprint ", compared_footprint, " was as main for merging already") - #UPDATE - elif any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()): - #print("compared_footprint ", compared_footprint, " was a part of some merging") - for k, v in merged_footprints.items(): - if compared_footprint in v: - main_footprint = k + #print("compared_footprint ", compared_footprint, " was as main for merging already") #UPDATE + #print("update") + #print(peak_footprints_new[compared_footprint]) + peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) + #print(peak_footprints_new[compared_footprint]) + #merged_footprints[compared_footprint] = merged_footprints[compared_footprint].append(footprint_to_check) + merged_array = merged_footprints[compared_footprint] + merged_array.append(footprint_to_check) + merged_footprints[compared_footprint] = merged_array + #print(merged_footprints) else: - print("merge now and add compared_footprint to the merged_footprints") + #print("merge now and add compared_footprint to the merged_footprints") merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, []) merged_footprints[compared_footprint] = [compared_footprint] - print(merged_footprints) + #print(merged_footprints) #UPDATE - + #print("update") + #print(peak_footprints_new[compared_footprint]) + peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) + #print(peak_footprints_new[compared_footprint]) else: #save the current footprint, as it should not be merged peak_footprints_new[footprint_to_check] = peak_footprints_new.get(footprint_to_check, []) peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check] - print(len(peak_footprints_new)) - for footprint in peak_footprints_new: - print(footprint) - sys.exit() + #print(len(peak_footprints_new)) + #for footprint in peak_footprints_new: + # print(footprint) + #sys.exit() return peak_footprints_new +#this function is used to update the footprint that should to be merged with another one +#as input the footprint, needed the update, as well as new start, new end and the score of the merged footprint are passed +#the output of this function is a dictionary containing the new information about the footprint +def footprint_update(footprint, start, end, score): + new_len = end - start + new_score = (footprint['score'] + score) / 2 + + footprint['start'] = start + footprint['end'] = end + footprint['score'] = new_score + footprint['len'] = new_len + + return footprint + #this function uses the information provided from the .bed file to look for footprints within the peaks of interest #as input the information from the original bed file, as well as bigwig file is needed #the optional parameters window_length, step and percentage are needed as well to use the sliding window algorithm and work with the "background" score From b10273724b243764aa5722adb27fd0001c007ad8 Mon Sep 17 00:00:00 2001 From: anastasiia Date: Wed, 9 Jan 2019 22:54:13 +0100 Subject: [PATCH 3/9] fixing the typo footprints and not footrpints --- bin/1.1_footprint_extraction/footprints_extraction.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/1.1_footprint_extraction/footprints_extraction.py b/bin/1.1_footprint_extraction/footprints_extraction.py index 72b8b59..ab294d2 100644 --- a/bin/1.1_footprint_extraction/footprints_extraction.py +++ b/bin/1.1_footprint_extraction/footprints_extraction.py @@ -325,9 +325,9 @@ def check_and_merge(peak_footprints, max_bp_between): #print("make merging using the information from the merged_footprints and peak_footprints_new") #UPDATE #print("update") - #print(peak_footrpints_new[main_footprint]) - peak_footrpints_new[main_footprint] = footprint_update(peak_footprints_new[main_footprint], peak_footprints[compared_footprint]['start'], peak_footprints[main_footprint]['end'], peak_footprints[compared_footprint]['score']) - #print(peak_footrpints_new[main_footprint]) + #print(peak_footprints_new[main_footprint]) + peak_footprints_new[main_footprint] = footprint_update(peak_footprints_new[main_footprint], peak_footprints[compared_footprint]['start'], peak_footprints[main_footprint]['end'], peak_footprints[compared_footprint]['score']) + #print(peak_footprints_new[main_footprint]) merged_array = merged_footprints[main_footprint] merged_array.append(compared_footprint) merged_footprints[main_footprint] = merged_array From b2f37731d5864d15145816a49a410a1aab0dd9af Mon Sep 17 00:00:00 2001 From: anastasiia Date: Thu, 10 Jan 2019 12:13:35 +0100 Subject: [PATCH 4/9] the max_pos is the index, and so add always 1 after calculating it. Otherwise it could be 0 as index --- bin/1.1_footprint_extraction/footprints_extraction.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bin/1.1_footprint_extraction/footprints_extraction.py b/bin/1.1_footprint_extraction/footprints_extraction.py index ab294d2..f8fea5a 100644 --- a/bin/1.1_footprint_extraction/footprints_extraction.py +++ b/bin/1.1_footprint_extraction/footprints_extraction.py @@ -143,6 +143,8 @@ def save_footprint(footprint_count, footprint_scores, peak_footprints, chromosom else: max_pos = first_max_pos + max_pos = max_pos + 1 #as the index of an array starts with 0 + #calculate the score for the current footprint as mean of all scores from the bigwig file footprint_score = np.mean(footprint_scores) @@ -527,6 +529,10 @@ def write_to_bed_file(all_footprints, sorted_output_file_name): #write each footprint line for line to the output file for footprint in all_footprints: + #if footprint[1]['start'] >= footprint[1]['end']: + # print(footprint) + #if footprint[1]['max_pos'] == 0: + # print(footprint) output_file.write('\t'.join([footprint[1]['chromosom'], str(footprint[1]['start']), str(footprint[1]['end']), footprint[0], str(round(footprint[1]['score'], 6)), '.', str(footprint[1]['len']), str(footprint[1]['max_pos']), ';'.join(footprint[1]['bonus'])]) + '\n') output_file.close() From 2fe66fde83df9739a8e5ec4fce4045d849658172 Mon Sep 17 00:00:00 2001 From: anastasiia Date: Thu, 10 Jan 2019 13:51:30 +0100 Subject: [PATCH 5/9] adding a validation while printing to the output file. If there are problems with start/end positions or max_pos, the footprint will not be written to the output file --- .../footprints_extraction.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/bin/1.1_footprint_extraction/footprints_extraction.py b/bin/1.1_footprint_extraction/footprints_extraction.py index f8fea5a..b340dc7 100644 --- a/bin/1.1_footprint_extraction/footprints_extraction.py +++ b/bin/1.1_footprint_extraction/footprints_extraction.py @@ -529,11 +529,18 @@ def write_to_bed_file(all_footprints, sorted_output_file_name): #write each footprint line for line to the output file for footprint in all_footprints: - #if footprint[1]['start'] >= footprint[1]['end']: - # print(footprint) - #if footprint[1]['max_pos'] == 0: - # print(footprint) - output_file.write('\t'.join([footprint[1]['chromosom'], str(footprint[1]['start']), str(footprint[1]['end']), footprint[0], str(round(footprint[1]['score'], 6)), '.', str(footprint[1]['len']), str(footprint[1]['max_pos']), ';'.join(footprint[1]['bonus'])]) + '\n') + #validation of the footprints, if there is a problem with some of them, write which one it is + #first check the start and end positions + if footprint[1]['start'] >= footprint[1]['end']: + logger.info("The problem occured with start and end positions. This footprint will not be printed to the output file:") + logger.info(footprint) + #then check the max_pos + elif footprint[1]['max_pos'] == 0: + logger.info("The problem occured with max_pos of the footprint. This footprint will not be printed to the output file:") + logger.info(footprint) + #otherwise everything is fine, write to the output + else: + output_file.write('\t'.join([footprint[1]['chromosom'], str(footprint[1]['start']), str(footprint[1]['end']), footprint[0], str(round(footprint[1]['score'], 6)), '.', str(footprint[1]['len']), str(footprint[1]['max_pos']), ';'.join(footprint[1]['bonus'])]) + '\n') output_file.close() From 1523096fb18babca08a9adb8bbe1d11f611975f5 Mon Sep 17 00:00:00 2001 From: anastasiia Date: Thu, 10 Jan 2019 14:39:36 +0100 Subject: [PATCH 6/9] deleting the print statements, adding some comments --- .../footprints_extraction.py | 86 ++++--------------- 1 file changed, 18 insertions(+), 68 deletions(-) diff --git a/bin/1.1_footprint_extraction/footprints_extraction.py b/bin/1.1_footprint_extraction/footprints_extraction.py index b340dc7..f9f5321 100644 --- a/bin/1.1_footprint_extraction/footprints_extraction.py +++ b/bin/1.1_footprint_extraction/footprints_extraction.py @@ -285,8 +285,7 @@ def check_and_merge(peak_footprints, max_bp_between): peak_footprints_new = {} merged_footprints = {} - #print(len(peak_footprints)) - + #we need to check each footprint within this peak with the other footprints for possible merging for footprint_to_check in peak_footprints.keys(): start_to_check = peak_footprints[footprint_to_check]['start'] end_to_check = peak_footprints[footprint_to_check]['end'] @@ -296,142 +295,93 @@ def check_and_merge(peak_footprints, max_bp_between): for compared_footprint in peak_footprints.keys(): if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < max_bp_between: - #make compared_footprint longer - #print() - #print("footprint_to_check", footprint_to_check) - #print(compared_footprint, " + ", footprint_to_check) + #make compared_footprint longer: compared_footprint + footprint_to_check merge_footprints_left = False break elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < max_bp_between: - #make footprint_to_check longer - #print() - #print("footprint_to_check", footprint_to_check) - #print(footprint_to_check, " + ", compared_footprint) + #make footprint_to_check longer: footprint_to_check + compared footprint merge_footprints_left = True break if merge_footprints_left: #the left merging is enabled, start and end of compared_footprint should be smaller than the start of the footprint_to_check - #print("entered left merging") - #print(start_to_check, end_to_check) - #print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end']) if start_to_check < peak_footprints[compared_footprint]['start']: if footprint_to_check not in peak_footprints_new.keys(): - #print("left, footprint to check ", footprint_to_check, " not in peak_footprints_new") - #even if it is not in keys, it could be merged already, so check first, if there are some footprints merged with this one - #print(merged_footprints) if any(footprint_to_check in merged_footprints[x] for x in merged_footprints.keys()): #true if footprint_to_check was already merged with someone - #print("footprint_to_check ", footprint_to_check, " was a part of some merging") for k, v in merged_footprints.items(): if footprint_to_check in v: main_footprint = k - #print("make merging using the information from the merged_footprints and peak_footprints_new") + #make merging using the information from the merged_footprints and peak_footprints_new #UPDATE - #print("update") - #print(peak_footprints_new[main_footprint]) peak_footprints_new[main_footprint] = footprint_update(peak_footprints_new[main_footprint], peak_footprints[compared_footprint]['start'], peak_footprints[main_footprint]['end'], peak_footprints[compared_footprint]['score']) - #print(peak_footprints_new[main_footprint]) merged_array = merged_footprints[main_footprint] merged_array.append(compared_footprint) merged_footprints[main_footprint] = merged_array - #print(merged_footprints) #there are no merged footprints with the footprint_to_check yet, so make a new one else: - #print("make normal update, using data from peak_footprints") + #add the compared footprint and footprint_to_check to the merged_footprints merged_footprints[footprint_to_check] = merged_footprints.get(footprint_to_check, []) merged_footprints[footprint_to_check] = [compared_footprint] - #print(merged_footprints) - #print("saving ", footprint_to_check) peak_footprints_new[footprint_to_check] = peak_footprints.get(footprint_to_check, {}) peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check] #<-- update #UPDATE - #print("update") - #print(peak_footprints_new[footprint_to_check]) peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score']) - #print(peak_footprints_new[footprint_to_check]) else: #the footprint_to_check is in peak_footprints_new already #the footprint_to_check can only be the main part of merging before, check it if footprint_to_check in merged_footprints.keys(): - #print("footprint_to_check ", footprint_to_check, " was as main for merging already") + #footprint_to_check was as main for merging already #UPDATE - #print("update") - #print(peak_footprints_new[footprint_to_check]) peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score']) - #print(peak_footprints_new[footprint_to_check]) - #merged_footprints[footprint_to_check].append(compared_footprint) + #add it to the merged_footprints as well merged_array = merged_footprints[footprint_to_check] merged_array.append(compared_footprint) merged_footprints[footprint_to_check] = merged_array - #print(merged_footprints) - else: - #print("merge now and add footprint_to_check to the merged_footprints") + #the footprint_to check was not merged with anything yet merged_footprints[footprint_to_check] = merged_footprints.get(footprint_to_check, []) merged_footprints[footprint_to_check] = [compared_footprint] #UPDATE - #print("update") - #print(peak_footprints_new[footprint_to_check]) peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score']) - #print(peak_footprints_new[footprint_to_check]) - elif merge_footprints_left == False: #the right merging is enabled, start and end of compared footprint should be bigger than the start of the footprint_to_check - #print("entered right merging") - #print(start_to_check, end_to_check) - #print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end']) + #the right merging is enabled, start and end of compared footprint should be bigger than the start of the footprint_to_check + elif merge_footprints_left == False: if end_to_check > peak_footprints[compared_footprint]['end']: - if compared_footprint not in peak_footprints_new.keys(): - #print("right, compared_footprint ", compared_footprint, " not in peak_footprints_new") + if compared_footprint not in peak_footprints_new.keys(): if any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()): - #print("compared_footprint ", compared_footprint, " was a part of some merging") for k, v in merged_footprints.items(): if compared_footprint in v: main_footprint = k - #print("make merging using the information from the merged_footprints and peak_footprints_new") + #make merging using the information from the merged_footprints and peak_footprints_new #UPDATE - #print("update") - #print(merged_footprints) peak_footprints_new[main_footprint] = footprint_update(peak_footprints_new[main_footprint], peak_footprints[main_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) - #print(merged_footprints) - #merged_footprints[main_footprint] = merged_footprints[main_footprint].append(footprint_to_check) + #add to the merged_footprints merged_array = merged_footprints[main_footprint] merged_array.append(footprint_to_check) merged_footprints[main_footprint] = merged_array else: - #print("make normal update, using data from peak footprints") + #"make normal update, using data from peak footprints merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, []) merged_footprints[compared_footprint] = [footprint_to_check] - #print(merged_footprints) - #print("saving ", compared_footprint) peak_footprints_new[compared_footprint] = peak_footprints.get(compared_footprint, {}) peak_footprints_new[compared_footprint] = peak_footprints[compared_footprint] #UPDATE - #print("update") - #print(peak_footprints_new[compared_footprint]) peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) - #print(peak_footprints_new[compared_footprint]) else: if compared_footprint in merged_footprints.keys(): - #print("compared_footprint ", compared_footprint, " was as main for merging already") + #compared_footprint was as main for merging already #UPDATE - #print("update") - #print(peak_footprints_new[compared_footprint]) peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) - #print(peak_footprints_new[compared_footprint]) - #merged_footprints[compared_footprint] = merged_footprints[compared_footprint].append(footprint_to_check) + merged_array = merged_footprints[compared_footprint] merged_array.append(footprint_to_check) merged_footprints[compared_footprint] = merged_array - #print(merged_footprints) + else: - #print("merge now and add compared_footprint to the merged_footprints") + #merge now and add compared_footprint to the merged_footprints merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, []) merged_footprints[compared_footprint] = [compared_footprint] - #print(merged_footprints) #UPDATE - #print("update") - #print(peak_footprints_new[compared_footprint]) peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) - #print(peak_footprints_new[compared_footprint]) else: #save the current footprint, as it should not be merged peak_footprints_new[footprint_to_check] = peak_footprints_new.get(footprint_to_check, []) From a9331ac17ba1ebbf4ebb27e5bc5a5f3d045b3d56 Mon Sep 17 00:00:00 2001 From: anastasiia Date: Sat, 12 Jan 2019 16:08:36 +0100 Subject: [PATCH 7/9] making the list out of the dictionary in the merge function to ensure the sorting before check for merging --- .../footprints_extraction.py | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/bin/1.1_footprint_extraction/footprints_extraction.py b/bin/1.1_footprint_extraction/footprints_extraction.py index f9f5321..044be21 100644 --- a/bin/1.1_footprint_extraction/footprints_extraction.py +++ b/bin/1.1_footprint_extraction/footprints_extraction.py @@ -118,7 +118,7 @@ def make_bed_dictionary(bed_file): #an array containing scores (signals) from the bigwig file; the footprints already saved for the particular peak; information for the bed file: #chromosom, start and end position, as well as additional information from the original bed file #the function returns the count for footprints, as well as footprints for the current peak -def save_footprint(footprint_count, footprint_scores, peak_footprints, chromosom, footprint_start, footprint_end, bonus_info_from_bed): +def save_footprint(footprint_count, footprint_scores, peak_footprints, chromosom, footprint_start, footprint_end, bonus_info_from_bed): save_current_footprint = False @@ -282,11 +282,21 @@ def search_in_window(peak_footprints, footprint_count, chromosom, peak_start, pe #the input parameter are: dictionary with footprints within one peak, and the max number of bp allowed to be in between the footprints #the output is the renewed dictionary containing only the best footprints for the output file def check_and_merge(peak_footprints, max_bp_between): + #to ensure the merging works well, sort the footprints first arter start and end positions + #the sort can not be applied to a dictionary, we are making a list out of peak_footprints_dict + peak_footprints_list = sorted(peak_footprints.items(), key = lambda x : (x[1]['start'], x[1]['end']), reverse = False) + peak_footprints_new = {} merged_footprints = {} + for footprint in peak_footprints_list: + print(footprint[0]) #name of footprint + print(footprint[1]['start']) #the features of saved footprint + #we need to check each footprint within this peak with the other footprints for possible merging - for footprint_to_check in peak_footprints.keys(): + #for footprint_to_check in peak_footprints.keys(): + for footprint in peak_footprints_list: + footprint_to_check = footprint[0] #save the name of the footprint which we are working with now start_to_check = peak_footprints[footprint_to_check]['start'] end_to_check = peak_footprints[footprint_to_check]['end'] @@ -296,6 +306,9 @@ def check_and_merge(peak_footprints, max_bp_between): if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < max_bp_between: #make compared_footprint longer: compared_footprint + footprint_to_check + print() + print(start_to_check, peak_footprints[compared_footprint]['end']) + print("merge right", footprint_to_check, compared_footprint) merge_footprints_left = False break elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < max_bp_between: @@ -344,12 +357,16 @@ def check_and_merge(peak_footprints, max_bp_between): peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score']) #the right merging is enabled, start and end of compared footprint should be bigger than the start of the footprint_to_check elif merge_footprints_left == False: + print("entered right merging") if end_to_check > peak_footprints[compared_footprint]['end']: - if compared_footprint not in peak_footprints_new.keys(): + print(end_to_check, peak_footprints[compared_footprint]['end']) + if compared_footprint not in peak_footprints_new.keys(): + print("compared_footprint", compared_footprint, "not in peak_footprints_new.keys") if any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()): for k, v in merged_footprints.items(): if compared_footprint in v: main_footprint = k + print(main_footprint) #make merging using the information from the merged_footprints and peak_footprints_new #UPDATE peak_footprints_new[main_footprint] = footprint_update(peak_footprints_new[main_footprint], peak_footprints[main_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) @@ -358,7 +375,9 @@ def check_and_merge(peak_footprints, max_bp_between): merged_array.append(footprint_to_check) merged_footprints[main_footprint] = merged_array else: - #"make normal update, using data from peak footprints + print("make normal update") + print("save", compared_footprint) + #make normal update, using data from peak footprints merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, []) merged_footprints[compared_footprint] = [footprint_to_check] @@ -366,6 +385,7 @@ def check_and_merge(peak_footprints, max_bp_between): peak_footprints_new[compared_footprint] = peak_footprints[compared_footprint] #UPDATE peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) + print(peak_footprints_new[compared_footprint]) else: if compared_footprint in merged_footprints.keys(): #compared_footprint was as main for merging already @@ -388,8 +408,8 @@ def check_and_merge(peak_footprints, max_bp_between): peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check] #print(len(peak_footprints_new)) - #for footprint in peak_footprints_new: - # print(footprint) + for footprint in peak_footprints_new: + print(footprint, peak_footprints_new[footprint]) #sys.exit() return peak_footprints_new From 40088b2757e056c01321a20d825592fabc5579fa Mon Sep 17 00:00:00 2001 From: anastasiia Date: Sat, 12 Jan 2019 17:00:04 +0100 Subject: [PATCH 8/9] deleting print statements --- .../footprints_extraction.py | 22 ++----------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/bin/1.1_footprint_extraction/footprints_extraction.py b/bin/1.1_footprint_extraction/footprints_extraction.py index 044be21..2ac4862 100644 --- a/bin/1.1_footprint_extraction/footprints_extraction.py +++ b/bin/1.1_footprint_extraction/footprints_extraction.py @@ -289,13 +289,9 @@ def check_and_merge(peak_footprints, max_bp_between): peak_footprints_new = {} merged_footprints = {} - for footprint in peak_footprints_list: - print(footprint[0]) #name of footprint - print(footprint[1]['start']) #the features of saved footprint - #we need to check each footprint within this peak with the other footprints for possible merging #for footprint_to_check in peak_footprints.keys(): - for footprint in peak_footprints_list: + for footprint in peak_footprints_list: #work with sorted footprints footprint_to_check = footprint[0] #save the name of the footprint which we are working with now start_to_check = peak_footprints[footprint_to_check]['start'] end_to_check = peak_footprints[footprint_to_check]['end'] @@ -306,9 +302,6 @@ def check_and_merge(peak_footprints, max_bp_between): if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < max_bp_between: #make compared_footprint longer: compared_footprint + footprint_to_check - print() - print(start_to_check, peak_footprints[compared_footprint]['end']) - print("merge right", footprint_to_check, compared_footprint) merge_footprints_left = False break elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < max_bp_between: @@ -357,16 +350,12 @@ def check_and_merge(peak_footprints, max_bp_between): peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score']) #the right merging is enabled, start and end of compared footprint should be bigger than the start of the footprint_to_check elif merge_footprints_left == False: - print("entered right merging") if end_to_check > peak_footprints[compared_footprint]['end']: - print(end_to_check, peak_footprints[compared_footprint]['end']) - if compared_footprint not in peak_footprints_new.keys(): - print("compared_footprint", compared_footprint, "not in peak_footprints_new.keys") + if compared_footprint not in peak_footprints_new.keys(): if any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()): for k, v in merged_footprints.items(): if compared_footprint in v: main_footprint = k - print(main_footprint) #make merging using the information from the merged_footprints and peak_footprints_new #UPDATE peak_footprints_new[main_footprint] = footprint_update(peak_footprints_new[main_footprint], peak_footprints[main_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) @@ -375,8 +364,6 @@ def check_and_merge(peak_footprints, max_bp_between): merged_array.append(footprint_to_check) merged_footprints[main_footprint] = merged_array else: - print("make normal update") - print("save", compared_footprint) #make normal update, using data from peak footprints merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, []) merged_footprints[compared_footprint] = [footprint_to_check] @@ -385,7 +372,6 @@ def check_and_merge(peak_footprints, max_bp_between): peak_footprints_new[compared_footprint] = peak_footprints[compared_footprint] #UPDATE peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score']) - print(peak_footprints_new[compared_footprint]) else: if compared_footprint in merged_footprints.keys(): #compared_footprint was as main for merging already @@ -407,10 +393,6 @@ def check_and_merge(peak_footprints, max_bp_between): peak_footprints_new[footprint_to_check] = peak_footprints_new.get(footprint_to_check, []) peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check] - #print(len(peak_footprints_new)) - for footprint in peak_footprints_new: - print(footprint, peak_footprints_new[footprint]) - #sys.exit() return peak_footprints_new #this function is used to update the footprint that should to be merged with another one From 220a5082b166517139b406cb5613a80c7264560e Mon Sep 17 00:00:00 2001 From: anastasiia Date: Sat, 12 Jan 2019 17:03:07 +0100 Subject: [PATCH 9/9] changing the name of max_bp_between to min_gap --- .../footprints_extraction.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bin/1.1_footprint_extraction/footprints_extraction.py b/bin/1.1_footprint_extraction/footprints_extraction.py index 2ac4862..c4f4723 100644 --- a/bin/1.1_footprint_extraction/footprints_extraction.py +++ b/bin/1.1_footprint_extraction/footprints_extraction.py @@ -44,7 +44,7 @@ def parse_args(): parser.add_argument('--window_length', default=200, type=int, help='Please enter the length for a window, by defauld 200 bp.') parser.add_argument('--step', default=100, type=int, help='Please enter a step to move the window, by default 100 bp.') parser.add_argument('--percentage', default=0, type=int, help='Please enter a percentage to be added to background while searching for footprints, by default 0%%.') - parser.add_argument('--max_bp_between', default=6, type=int, help='Please enter the number of bp allowed to be in between two footprints, by default 6 bp.') + parser.add_argument('--min_gap', default=6, type=int, help='Please enter the number of bp allowed to be in between two footprints, by default 6 bp.') parser.add_argument('--silent', action='store_true', help='While working with data write the information only into ./footprints_extraction.log.') args = parser.parse_args() @@ -281,7 +281,7 @@ def search_in_window(peak_footprints, footprint_count, chromosom, peak_start, pe #if at this point there are still overlaps of footprints, this function will delete them #the input parameter are: dictionary with footprints within one peak, and the max number of bp allowed to be in between the footprints #the output is the renewed dictionary containing only the best footprints for the output file -def check_and_merge(peak_footprints, max_bp_between): +def check_and_merge(peak_footprints, min_gap): #to ensure the merging works well, sort the footprints first arter start and end positions #the sort can not be applied to a dictionary, we are making a list out of peak_footprints_dict peak_footprints_list = sorted(peak_footprints.items(), key = lambda x : (x[1]['start'], x[1]['end']), reverse = False) @@ -300,11 +300,11 @@ def check_and_merge(peak_footprints, max_bp_between): for compared_footprint in peak_footprints.keys(): - if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < max_bp_between: + if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < min_gap: #make compared_footprint longer: compared_footprint + footprint_to_check merge_footprints_left = False break - elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < max_bp_between: + elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < min_gap: #make footprint_to_check longer: footprint_to_check + compared footprint merge_footprints_left = True break @@ -413,7 +413,7 @@ def footprint_update(footprint, start, end, score): #as input the information from the original bed file, as well as bigwig file is needed #the optional parameters window_length, step and percentage are needed as well to use the sliding window algorithm and work with the "background" score #the output of this function is a dictionary contains all the found footprints ready to write to the output file -def find_peaks_from_bw(bed_dictionary, bw_file, window_length, step, percentage, max_bp_between): +def find_peaks_from_bw(bed_dictionary, bw_file, window_length, step, percentage, min_gap): logger.info('Looking for footprints within peaks...') @@ -437,7 +437,7 @@ def find_peaks_from_bw(bed_dictionary, bw_file, window_length, step, percentage, peak_footprints, footprint_count = search_in_window(peak_footprints, footprint_count, chromosom, peak_start, peak_end, scores_in_peak, window_length, bed_dictionary[header], step, percentage) #double check for overlaps and possibly merging of footprints having up to 5 bp in between - peak_footprints = check_and_merge(peak_footprints, max_bp_between) + peak_footprints = check_and_merge(peak_footprints, min_gap) for footprint_name in peak_footprints.keys(): all_footprints[footprint_name] = all_footprints.get(footprint_name, {}) @@ -533,7 +533,7 @@ def main(): logger.info("The script footprints_extraction.py was called using these parameters: " + str(vars(args))) bed_dictionary = make_bed_dictionary(args.bed) - all_footprints = find_peaks_from_bw(bed_dictionary, args.bigwig, args.window_length, args.step, args.percentage, args.max_bp_between) + all_footprints = find_peaks_from_bw(bed_dictionary, args.bigwig, args.window_length, args.step, args.percentage, args.min_gap) write_to_bed_file(all_footprints, args.output_file) logger.info("the number of peaks: " + str(len(bed_dictionary)))