Skip to content

Commit

Permalink
fixing the bug with footprint length
Browse files Browse the repository at this point in the history
  • Loading branch information
anastasiia committed Jan 7, 2019
1 parent 18a4fb5 commit aa1e9f0
Showing 1 changed file with 13 additions and 2 deletions.
15 changes: 13 additions & 2 deletions bin/1.1_footprint_extraction/footprints_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def save_footprint(footprint_count, footprint_scores, peak_footprints, chromosom
old_start = peak_footprints[existing_footprint_name]['start']
old_end = peak_footprints[existing_footprint_name]['end']
old_score = peak_footprints[existing_footprint_name]['score']
old_length = peak_footprints[existing_footprint_name]['len']

if footprint_start >= old_start and footprint_start <= old_end: #the start of the new footprint is between the start and end of an old footprint
if footprint_end > old_end: #the new footprint is not completely inside the old one
Expand All @@ -163,19 +164,21 @@ def save_footprint(footprint_count, footprint_scores, peak_footprints, chromosom

peak_footprints[existing_footprint_name]['end'] = footprint_end
peak_footprints[existing_footprint_name]['score'] = footprint_score
peak_footprints[existing_footprint_name]['len'] = footprint_end - old_start
#we can not update the max_pos as we do not have the information about scores array of the existing footprint
#else: the new footprint is completely inside the old one, do nothing
save_current_footprint = False
break

elif footprint_end >= old_start and footprint_end <= old_end: #the end of the new footprint is between the start and end of an old footprint
if footprint_start < old_start: #the new footprint is not completely inside the old one
if footprint_start < old_start: #the new footprint is not completely inside the old one
#update the information about the existing footprint
#find the average of both scores
footprint_score = (peak_footprints[existing_footprint_name]['score'] + footprint_score) / 2

peak_footprints[existing_footprint_name]['start'] = footprint_start
peak_footprints[existing_footprint_name]['score'] = footprint_score
peak_footprints[existing_footprint_name]['len'] = old_end - footprint_start
#else do nothing
save_current_footprint = False
break
Expand Down Expand Up @@ -270,6 +273,11 @@ def search_in_window(peak_footprints, footprint_count, chromosom, peak_start, pe

return peak_footprints, footprint_count

def check_for_overlap_and_merge(peak_footprints):
for footprint_to_check in peak_footprints.keys():
print(footprint_to_check, peak_footprints[footprint_to_check])
return peak_footprints

#this function uses the information provided from the .bed file to look for footprints within the peaks of interest
#as input the information from the original bed file, as well as bigwig file is needed
#the optional parameters window_length, step and percentage are needed as well to use the sliding window algorithm and work with the "background" score
Expand All @@ -295,9 +303,12 @@ def find_peaks_from_bw(bed_dictionary, bw_file, window_length, step, percentage)
peak_end = int(positions[1])

scores_in_peak = np.nan_to_num(np.array(list(bw_open.values(chromosom, peak_start, peak_end)))) #save the scores to an array

print()
peak_footprints, footprint_count = search_in_window(peak_footprints, footprint_count, chromosom, peak_start, peak_end, scores_in_peak, window_length, bed_dictionary[header], step, percentage)

#double check for overlaps and possibly merging of footprints having up to 5 bp in between
peak_footprints = check_for_overlap_and_merge(peak_footprints)

for footprint_name in peak_footprints.keys():
all_footprints[footprint_name] = all_footprints.get(footprint_name, {})
all_footprints[footprint_name] = peak_footprints[footprint_name]
Expand Down

0 comments on commit aa1e9f0

Please sign in to comment.