Skip to content

Commit

Permalink
fixing the bug with merging. The code will be edited soon to look nic…
Browse files Browse the repository at this point in the history
…e and easy to understand
  • Loading branch information
anastasiia committed Jan 9, 2019
1 parent 31315e6 commit d4b1245
Showing 1 changed file with 102 additions and 49 deletions.
151 changes: 102 additions & 49 deletions bin/1.1_footprint_extraction/footprints_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def check_and_merge(peak_footprints, max_bp_between):
peak_footprints_new = {}
merged_footprints = {}

print(len(peak_footprints))
#print(len(peak_footprints))

for footprint_to_check in peak_footprints.keys():
start_to_check = peak_footprints[footprint_to_check]['start']
Expand All @@ -294,113 +294,166 @@ def check_and_merge(peak_footprints, max_bp_between):

if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < max_bp_between:
#make compared_footprint longer
print()
print("footprint_to_check", footprint_to_check)
print(compared_footprint, " + ", footprint_to_check)
#print()
#print("footprint_to_check", footprint_to_check)
#print(compared_footprint, " + ", footprint_to_check)
merge_footprints_left = False
break
elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < max_bp_between:
#make footprint_to_check longer
print()
print("footprint_to_check", footprint_to_check)
print(footprint_to_check, " + ", compared_footprint)
#print()
#print("footprint_to_check", footprint_to_check)
#print(footprint_to_check, " + ", compared_footprint)
merge_footprints_left = True
break

if merge_footprints_left: #the left merging is enabled, start and end of compared_footprint should be smaller than the start of the footprint_to_check
print("entered left merging")
print(start_to_check, end_to_check)
print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end'])
#print("entered left merging")
#print(start_to_check, end_to_check)
#print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end'])
if start_to_check < peak_footprints[compared_footprint]['start']:
if footprint_to_check not in peak_footprints_new.keys():
print("left, footprint to check ", footprint_to_check, " not in peak_footprints_new")
#UPDATE
#print("left, footprint to check ", footprint_to_check, " not in peak_footprints_new")
#even if it is not in keys, it could be merged already, so check first, if there are some footprints merged with this one
#print(merged_footprints)
if any(footprint_to_check in merged_footprints[x] for x in merged_footprints.keys()): #true if footprint_to_check was already merged with someone
#print("footprint_to_check ", footprint_to_check, " was a part of some merging")
for k, v in merged_footprints.items():
if footprint_to_check in v:
main_footprint = k
print("make merging using the information from the merged_footprints and peak_footprints_new")
#print("make merging using the information from the merged_footprints and peak_footprints_new")
#UPDATE
#print("update")
#print(peak_footrpints_new[main_footprint])
peak_footrpints_new[main_footprint] = footprint_update(peak_footprints_new[main_footprint], peak_footprints[compared_footprint]['start'], peak_footprints[main_footprint]['end'], peak_footprints[compared_footprint]['score'])
#print(peak_footrpints_new[main_footprint])
merged_array = merged_footprints[main_footprint]
merged_array.append(compared_footprint)
merged_footprints[main_footprint] = merged_array
#print(merged_footprints)
#there are no merged footprints with the footprint_to_check yet, so make a new one
else:
print("make normal update, using data from peak_footprints")
#print("make normal update, using data from peak_footprints")
merged_footprints[footprint_to_check] = merged_footprints.get(footprint_to_check, [])
merged_footprints[footprint_to_check] = [compared_footprint]
print(merged_footprints)
#print(merged_footprints)

print("saving ", footprint_to_check)
#print("saving ", footprint_to_check)
peak_footprints_new[footprint_to_check] = peak_footprints.get(footprint_to_check, {})
peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check] #<-- update
#UPDATE

#print("update")
#print(peak_footprints_new[footprint_to_check])
peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score'])
#print(peak_footprints_new[footprint_to_check])
else: #the footprint_to_check is in peak_footprints_new already
#the footprint_to_check can only be the main part of merging before, check it
if footprint_to_check in merged_footprints.keys():
print("footprint_to_check ", footprint_to_check, " was as main for merging already")
#UPDATE
elif any(footprint_to_check in merged_footprints[x] for x in merged_footprints.keys()):
print("footprint_to_check ", footprint_to_check, " was a part of some merging")
for k, v in merged_footprints.items():
if footprint_to_check in v:
main_footprint = k
#print("footprint_to_check ", footprint_to_check, " was as main for merging already")
#UPDATE
#print("update")
#print(peak_footprints_new[footprint_to_check])
peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score'])
#print(peak_footprints_new[footprint_to_check])
#merged_footprints[footprint_to_check].append(compared_footprint)
merged_array = merged_footprints[footprint_to_check]
merged_array.append(compared_footprint)
merged_footprints[footprint_to_check] = merged_array
#print(merged_footprints)

else:
print("merge now and add footprint_to_check to the merged_footprints")
#print("merge now and add footprint_to_check to the merged_footprints")
merged_footprints[footprint_to_check] = merged_footprints.get(footprint_to_check, [])
merged_footprints[footprint_to_check] = [compared_footprint]
#UPDATE

#print("update")
#print(peak_footprints_new[footprint_to_check])
peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score'])
#print(peak_footprints_new[footprint_to_check])
elif merge_footprints_left == False: #the right merging is enabled, start and end of compared footprint should be bigger than the start of the footprint_to_check
print("entered right merging")
print(start_to_check, end_to_check)
print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end'])
#print("entered right merging")
#print(start_to_check, end_to_check)
#print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end'])
if end_to_check > peak_footprints[compared_footprint]['end']:
if compared_footprint not in peak_footprints_new.keys():
print("right, compared_footprint ", compared_footprint, " not in peak_footprints_new")
#print("right, compared_footprint ", compared_footprint, " not in peak_footprints_new")
if any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()):
print("compared_footprint ", compared_footprint, " was a part of some merging")
#print("compared_footprint ", compared_footprint, " was a part of some merging")
for k, v in merged_footprints.items():
if compared_footprint in v:
main_footprint = k
print("make merging using the information from the merged_footprints and peak_footprints_new")
#print("make merging using the information from the merged_footprints and peak_footprints_new")
#UPDATE
#print("update")
#print(merged_footprints)
peak_footprints_new[main_footprint] = footprint_update(peak_footprints_new[main_footprint], peak_footprints[main_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score'])
#print(merged_footprints)
#merged_footprints[main_footprint] = merged_footprints[main_footprint].append(footprint_to_check)
merged_array = merged_footprints[main_footprint]
merged_array.append(footprint_to_check)
merged_footprints[main_footprint] = merged_array
else:
print("make normal update, using data from peak footprints")
#print("make normal update, using data from peak footprints")
merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, [])
merged_footprints[compared_footprint] = [footprint_to_check]
print(merged_footprints)
#print(merged_footprints)

print("saving ", compared_footprint)
#print("saving ", compared_footprint)
peak_footprints_new[compared_footprint] = peak_footprints.get(compared_footprint, {})
peak_footprints_new[compared_footprint] = peak_footprints[compared_footprint]
#UPDATE
#print("update")
#print(peak_footprints_new[compared_footprint])
peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score'])
#print(peak_footprints_new[compared_footprint])
else:
if compared_footprint in merged_footprints.keys():
print("compared_footprint ", compared_footprint, " was as main for merging already")
#UPDATE
elif any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()):
#print("compared_footprint ", compared_footprint, " was a part of some merging")
for k, v in merged_footprints.items():
if compared_footprint in v:
main_footprint = k
#print("compared_footprint ", compared_footprint, " was as main for merging already")
#UPDATE
#print("update")
#print(peak_footprints_new[compared_footprint])
peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score'])
#print(peak_footprints_new[compared_footprint])
#merged_footprints[compared_footprint] = merged_footprints[compared_footprint].append(footprint_to_check)
merged_array = merged_footprints[compared_footprint]
merged_array.append(footprint_to_check)
merged_footprints[compared_footprint] = merged_array
#print(merged_footprints)
else:
print("merge now and add compared_footprint to the merged_footprints")
#print("merge now and add compared_footprint to the merged_footprints")
merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, [])
merged_footprints[compared_footprint] = [compared_footprint]
print(merged_footprints)
#print(merged_footprints)
#UPDATE

#print("update")
#print(peak_footprints_new[compared_footprint])
peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score'])
#print(peak_footprints_new[compared_footprint])

else: #save the current footprint, as it should not be merged
peak_footprints_new[footprint_to_check] = peak_footprints_new.get(footprint_to_check, [])
peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check]

print(len(peak_footprints_new))
for footprint in peak_footprints_new:
print(footprint)
sys.exit()
#print(len(peak_footprints_new))
#for footprint in peak_footprints_new:
# print(footprint)
#sys.exit()
return peak_footprints_new

#this function is used to update the footprint that should to be merged with another one
#as input the footprint, needed the update, as well as new start, new end and the score of the merged footprint are passed
#the output of this function is a dictionary containing the new information about the footprint
def footprint_update(footprint, start, end, score):
new_len = end - start
new_score = (footprint['score'] + score) / 2

footprint['start'] = start
footprint['end'] = end
footprint['score'] = new_score
footprint['len'] = new_len

return footprint

#this function uses the information provided from the .bed file to look for footprints within the peaks of interest
#as input the information from the original bed file, as well as bigwig file is needed
#the optional parameters window_length, step and percentage are needed as well to use the sliding window algorithm and work with the "background" score
Expand Down

0 comments on commit d4b1245

Please sign in to comment.