Skip to content

Commit

Permalink
fixing the bug with overlaps + adding a merging function if the footp…
Browse files Browse the repository at this point in the history
…rints have small ammount of bp in between
  • Loading branch information
anastasiia committed Jan 7, 2019
1 parent aa1e9f0 commit c452934
Showing 1 changed file with 53 additions and 4 deletions.
57 changes: 53 additions & 4 deletions bin/1.1_footprint_extraction/footprints_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,10 +273,59 @@ def search_in_window(peak_footprints, footprint_count, chromosom, peak_start, pe

return peak_footprints, footprint_count

def check_for_overlap_and_merge(peak_footprints):
def check_and_merge(peak_footprints):
peak_footprints_new = {}

for footprint_to_check in peak_footprints.keys():
print(footprint_to_check, peak_footprints[footprint_to_check])
return peak_footprints
start_to_check = peak_footprints[footprint_to_check]['start']
end_to_check = peak_footprints[footprint_to_check]['end']

#if not peak_footprints_new: #the dictionary is empty, save the first footprint
# peak_footprints_new[footprint_to_check] = peak_footprints_new.get(footprint_to_check, [])
# peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check]
#print(peak_footprints_new.keys())

merge_footprints_left = None

for compared_footprint in peak_footprints.keys():

if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] <= 5: #5 is hardcoded !!!!!! :(
#make compared_footprint longer
print("MERGE", footprint_to_check, compared_footprint)
merge_footprints_left = False
break
elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check <= 5:
#make footprint_to_check longer
print("MERGE", footprint_to_check, compared_footprint)
merge_footprints_left = True
break

if merge_footprints_left: #if it is true
print("merging left")
if start_to_check < peak_footprints[compared_footprint]['start'] and compared_footprint in peak_footprints_new.keys():
print("make longer")
print("before merging" + str(peak_footprints_new[compared_footprint]))
peak_footprints_new[compared_footprint]['start'] = start_to_check
peak_footprints_new[compared_footprint]['len'] = peak_footprints[compared_footprint]['end'] - start_to_check
peak_footprints_new[compared_footprint]['score'] = (peak_footprints[footprint_to_check]['score'] + peak_footprints[compared_footprint]['score']) / 2
print(peak_footprints_new[compared_footprint])

elif merge_footprints_left == False:
print("merging right")
if end_to_check > peak_footprints[compared_footprint]['end'] and compared_footprint in peak_footprints_new.keys():
print("make longer")
print("before merging" + str(peak_footprints_new[compared_footprint]))
peak_footprints_new[compared_footprint]['end'] = end_to_check
peak_footprints_new[compared_footprint]['len'] = end_to_check - peak_footprints[compared_footprint]['end']
peak_footprints_new[compared_footprint]['score'] = (peak_footprints[footprint_to_check]['score'] + peak_footprints[compared_footprint]['score']) / 2
print(peak_footprints_new[compared_footprint])
else:
print("saving " + footprint_to_check)
peak_footprints_new[footprint_to_check] = peak_footprints_new.get(footprint_to_check, [])
peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check]

#sys.exit()
return peak_footprints_new

#this function uses the information provided from the .bed file to look for footprints within the peaks of interest
#as input the information from the original bed file, as well as bigwig file is needed
Expand Down Expand Up @@ -307,7 +356,7 @@ def find_peaks_from_bw(bed_dictionary, bw_file, window_length, step, percentage)
peak_footprints, footprint_count = search_in_window(peak_footprints, footprint_count, chromosom, peak_start, peak_end, scores_in_peak, window_length, bed_dictionary[header], step, percentage)

#double check for overlaps and possibly merging of footprints having up to 5 bp in between
peak_footprints = check_for_overlap_and_merge(peak_footprints)
peak_footprints = check_and_merge(peak_footprints)

for footprint_name in peak_footprints.keys():
all_footprints[footprint_name] = all_footprints.get(footprint_name, {})
Expand Down

0 comments on commit c452934

Please sign in to comment.