Skip to content

Commit

Permalink
making the list out of the dictionary in the merge function to ensure…
Browse files Browse the repository at this point in the history
… the sorting before check for merging
  • Loading branch information
anastasiia committed Jan 12, 2019
1 parent 1523096 commit a9331ac
Showing 1 changed file with 26 additions and 6 deletions.
32 changes: 26 additions & 6 deletions bin/1.1_footprint_extraction/footprints_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def make_bed_dictionary(bed_file):
#an array containing scores (signals) from the bigwig file; the footprints already saved for the particular peak; information for the bed file:
#chromosom, start and end position, as well as additional information from the original bed file
#the function returns the count for footprints, as well as footprints for the current peak
def save_footprint(footprint_count, footprint_scores, peak_footprints, chromosom, footprint_start, footprint_end, bonus_info_from_bed):
def save_footprint(footprint_count, footprint_scores, peak_footprints, chromosom, footprint_start, footprint_end, bonus_info_from_bed):

save_current_footprint = False

Expand Down Expand Up @@ -282,11 +282,21 @@ def search_in_window(peak_footprints, footprint_count, chromosom, peak_start, pe
#the input parameter are: dictionary with footprints within one peak, and the max number of bp allowed to be in between the footprints
#the output is the renewed dictionary containing only the best footprints for the output file
def check_and_merge(peak_footprints, max_bp_between):
#to ensure the merging works well, sort the footprints first arter start and end positions
#the sort can not be applied to a dictionary, we are making a list out of peak_footprints_dict
peak_footprints_list = sorted(peak_footprints.items(), key = lambda x : (x[1]['start'], x[1]['end']), reverse = False)

peak_footprints_new = {}
merged_footprints = {}

for footprint in peak_footprints_list:
print(footprint[0]) #name of footprint
print(footprint[1]['start']) #the features of saved footprint

#we need to check each footprint within this peak with the other footprints for possible merging
for footprint_to_check in peak_footprints.keys():
#for footprint_to_check in peak_footprints.keys():
for footprint in peak_footprints_list:
footprint_to_check = footprint[0] #save the name of the footprint which we are working with now
start_to_check = peak_footprints[footprint_to_check]['start']
end_to_check = peak_footprints[footprint_to_check]['end']

Expand All @@ -296,6 +306,9 @@ def check_and_merge(peak_footprints, max_bp_between):

if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < max_bp_between:
#make compared_footprint longer: compared_footprint + footprint_to_check
print()
print(start_to_check, peak_footprints[compared_footprint]['end'])
print("merge right", footprint_to_check, compared_footprint)
merge_footprints_left = False
break
elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < max_bp_between:
Expand Down Expand Up @@ -344,12 +357,16 @@ def check_and_merge(peak_footprints, max_bp_between):
peak_footprints_new[footprint_to_check] = footprint_update(peak_footprints_new[footprint_to_check], peak_footprints_new[footprint_to_check]['start'], peak_footprints[compared_footprint]['end'], peak_footprints[compared_footprint]['score'])
#the right merging is enabled, start and end of compared footprint should be bigger than the start of the footprint_to_check
elif merge_footprints_left == False:
print("entered right merging")
if end_to_check > peak_footprints[compared_footprint]['end']:
if compared_footprint not in peak_footprints_new.keys():
print(end_to_check, peak_footprints[compared_footprint]['end'])
if compared_footprint not in peak_footprints_new.keys():
print("compared_footprint", compared_footprint, "not in peak_footprints_new.keys")
if any(compared_footprint in merged_footprints[x] for x in merged_footprints.keys()):
for k, v in merged_footprints.items():
if compared_footprint in v:
main_footprint = k
print(main_footprint)
#make merging using the information from the merged_footprints and peak_footprints_new
#UPDATE
peak_footprints_new[main_footprint] = footprint_update(peak_footprints_new[main_footprint], peak_footprints[main_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score'])
Expand All @@ -358,14 +375,17 @@ def check_and_merge(peak_footprints, max_bp_between):
merged_array.append(footprint_to_check)
merged_footprints[main_footprint] = merged_array
else:
#"make normal update, using data from peak footprints
print("make normal update")
print("save", compared_footprint)
#make normal update, using data from peak footprints
merged_footprints[compared_footprint] = merged_footprints.get(compared_footprint, [])
merged_footprints[compared_footprint] = [footprint_to_check]

peak_footprints_new[compared_footprint] = peak_footprints.get(compared_footprint, {})
peak_footprints_new[compared_footprint] = peak_footprints[compared_footprint]
#UPDATE
peak_footprints_new[compared_footprint] = footprint_update(peak_footprints_new[compared_footprint], peak_footprints_new[compared_footprint]['start'], peak_footprints[footprint_to_check]['end'], peak_footprints[footprint_to_check]['score'])
print(peak_footprints_new[compared_footprint])
else:
if compared_footprint in merged_footprints.keys():
#compared_footprint was as main for merging already
Expand All @@ -388,8 +408,8 @@ def check_and_merge(peak_footprints, max_bp_between):
peak_footprints_new[footprint_to_check] = peak_footprints[footprint_to_check]

#print(len(peak_footprints_new))
#for footprint in peak_footprints_new:
# print(footprint)
for footprint in peak_footprints_new:
print(footprint, peak_footprints_new[footprint])
#sys.exit()
return peak_footprints_new

Expand Down

0 comments on commit a9331ac

Please sign in to comment.