Skip to content

Commit

Permalink
the structure is ready, this need to be adjusted to the data the foot…
Browse files Browse the repository at this point in the history
…print_extraction parses
  • Loading branch information
anastasiia committed Jan 9, 2019
1 parent 0308a0c commit 31315e6
Showing 1 changed file with 96 additions and 20 deletions.
116 changes: 96 additions & 20 deletions bin/1.1_footprint_extraction/footprints_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,9 @@ def search_in_window(peak_footprints, footprint_count, chromosom, peak_start, pe
#the output is the renewed dictionary containing only the best footprints for the output file
def check_and_merge(peak_footprints, max_bp_between):
peak_footprints_new = {}
merged_footprints = {}

print(len(peak_footprints))

for footprint_to_check in peak_footprints.keys():
start_to_check = peak_footprints[footprint_to_check]['start']
Expand All @@ -291,38 +294,111 @@ def check_and_merge(peak_footprints, max_bp_between):

if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < max_bp_between:
#make compared_footprint longer
print()
print("footprint_to_check", footprint_to_check)
print(compared_footprint, " + ", footprint_to_check)
merge_footprints_left = False
break
elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < max_bp_between:
#make footprint_to_check longer
print()
print("footprint_to_check", footprint_to_check)
print(footprint_to_check, " + ", compared_footprint)
merge_footprints_left = True
break

if merge_footprints_left: #if the merging left is enabled
#check if this footprint can be merged with the compared_footprint
#if compared footprint is not in peak_footprint_new.keys(), the next loop will check for this footprint. There is no need for doulbe check now
if start_to_check < peak_footprints[compared_footprint]['start'] and compared_footprint in peak_footprints_new.keys():
#update the start position
peak_footprints_new[compared_footprint]['start'] = start_to_check
#update the length
peak_footprints_new[compared_footprint]['len'] = peak_footprints[compared_footprint]['end'] - start_to_check
#update the score
peak_footprints_new[compared_footprint]['score'] = (peak_footprints[footprint_to_check]['score'] + peak_footprints[compared_footprint]['score']) / 2

elif merge_footprints_left == False: #otherwise merge right
#check if the merging is possible
if end_to_check > peak_footprints[compared_footprint]['end'] and compared_footprint in peak_footprints_new.keys():
#update the end position
peak_footprints_new[compared_footprint]['end'] = end_to_check
#update the length
peak_footprints_new[compared_footprint]['len'] = end_to_check - peak_footprints[compared_footprint]['end']
#update the score
peak_footprints_new[compared_footprint]['score'] = (peak_footprints[footprint_to_check]['score'] + peak_footprints[compared_footprint]['score']) / 2
if merge_footprints_left: #the left merging is enabled, start and end of compared_footprint should be smaller than the start of the footprint_to_check
print("entered left merging")
print(start_to_check, end_to_check)
print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end'])
if start_to_check < peak_footprints[compared_footprint]['start']:
if footprint_to_check not in peak_footprints_new.keys():
print("left, footprint to check ", footprint_to_check, " not in peak_footprints_new")
#UPDATE
if any(footprint_to_check in merged_footprints[x] for x in merged_footprints.keys()): #true if footprint_to_check was already merged with someone
#print("footprint_to_check ", footprint_to_check, " was a part of some merging")
for k, v in merged_footprints.items():
if footprint_to_check in v:
main_footprint = k
print("make merging using the information from the merged_footprints and peak_footprints_new")
#UPDATE
else:
print("make normal update, using data from peak_footprints")
merged_footprints[footprint_to_check] = merged_footprints.get(footprint_to_check, [])
merged_footprints[footprint_to_check] = [compared_footprint]
print(merged_footprints)

print("saving ", footprint_to_check)
peak_footprints_new[footprint_to_check] = peak_footprints.get(footprint_to_check, {})
peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check] #<-- update
#UPDATE

else: #the footprint_to_check is in peak_footprints_new already
if footprint_to_check in merged_footprints.keys():
print("footprint_to_check ", footprint_to_check, " was as main for merging already")
#UPDATE
elif any(footprint_to_check in merged_footprints[x] for x in merged_footprints.keys()):
print("footprint_to_check ", footprint_to_check, " was a part of some merging")
for k, v in merged_footprints.items():
if footprint_to_check in v:
main_footprint = k
#UPDATE
else:
print("merge now and add footprint_to_check to the merged_footprints")
merged_footprints[footprint_to_check] = merged_footprints.get(footprint_to_check, [])
merged_footprints[footprint_to_check] = [compared_footprint]
#UPDATE

elif merge_footprints_left == False: #the right merging is enabled, start and end of compared footprint should be bigger than the start of the footprint_to_check
print("entered right merging")
print(start_to_check, end_to_check)
print(peak_footprints[compared_footprint]['start'], peak_footprints[compared_footprint]['end'])
if end_to_check > peak_footprints[compared_footprint]['end']:
if compared_footprint not in peak_footprints_new.keys():
print("right, compared_footprint ", compared_footprint, " not in peak_footprints_new")
if any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()):
print("compared_footprint ", compared_footprint, " was a part of some merging")
for k, v in merged_footprints.items():
if compared_footprint in v:
main_footprint = k
print("make merging using the information from the merged_footprints and peak_footprints_new")
#UPDATE
else:
print("make normal update, using data from peak footprints")
merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, [])
merged_footprints[compared_footprint] = [footprint_to_check]
print(merged_footprints)

print("saving ", compared_footprint)
peak_footprints_new[compared_footprint] = peak_footprints.get(compared_footprint, {})
peak_footprints_new[compared_footprint] = peak_footprints[compared_footprint]
#UPDATE
else:
if compared_footprint in merged_footprints.keys():
print("compared_footprint ", compared_footprint, " was as main for merging already")
#UPDATE
elif any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()):
#print("compared_footprint ", compared_footprint, " was a part of some merging")
for k, v in merged_footprints.items():
if compared_footprint in v:
main_footprint = k
#UPDATE
else:
print("merge now and add compared_footprint to the merged_footprints")
merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, [])
merged_footprints[compared_footprint] = [compared_footprint]
print(merged_footprints)
#UPDATE


else: #save the current footprint, as it should not be merged
peak_footprints_new[footprint_to_check] = peak_footprints_new.get(footprint_to_check, [])
peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check]

print(len(peak_footprints_new))
for footprint in peak_footprints_new:
print(footprint)
sys.exit()
return peak_footprints_new

#this function uses the information provided from the .bed file to look for footprints within the peaks of interest
Expand Down

0 comments on commit 31315e6

Please sign in to comment.