From 220a5082b166517139b406cb5613a80c7264560e Mon Sep 17 00:00:00 2001 From: anastasiia Date: Sat, 12 Jan 2019 17:03:07 +0100 Subject: [PATCH] changing the name of max_bp_between to min_gap --- .../footprints_extraction.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bin/1.1_footprint_extraction/footprints_extraction.py b/bin/1.1_footprint_extraction/footprints_extraction.py index 2ac4862..c4f4723 100644 --- a/bin/1.1_footprint_extraction/footprints_extraction.py +++ b/bin/1.1_footprint_extraction/footprints_extraction.py @@ -44,7 +44,7 @@ def parse_args(): parser.add_argument('--window_length', default=200, type=int, help='Please enter the length for a window, by defauld 200 bp.') parser.add_argument('--step', default=100, type=int, help='Please enter a step to move the window, by default 100 bp.') parser.add_argument('--percentage', default=0, type=int, help='Please enter a percentage to be added to background while searching for footprints, by default 0%%.') - parser.add_argument('--max_bp_between', default=6, type=int, help='Please enter the number of bp allowed to be in between two footprints, by default 6 bp.') + parser.add_argument('--min_gap', default=6, type=int, help='Please enter the number of bp allowed to be in between two footprints, by default 6 bp.') parser.add_argument('--silent', action='store_true', help='While working with data write the information only into ./footprints_extraction.log.') args = parser.parse_args() @@ -281,7 +281,7 @@ def search_in_window(peak_footprints, footprint_count, chromosom, peak_start, pe #if at this point there are still overlaps of footprints, this function will delete them #the input parameter are: dictionary with footprints within one peak, and the max number of bp allowed to be in between the footprints #the output is the renewed dictionary containing only the best footprints for the output file -def check_and_merge(peak_footprints, max_bp_between): +def check_and_merge(peak_footprints, min_gap): #to ensure the merging works well, sort the footprints first arter start and end positions #the sort can not be applied to a dictionary, we are making a list out of peak_footprints_dict peak_footprints_list = sorted(peak_footprints.items(), key = lambda x : (x[1]['start'], x[1]['end']), reverse = False) @@ -300,11 +300,11 @@ def check_and_merge(peak_footprints, max_bp_between): for compared_footprint in peak_footprints.keys(): - if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < max_bp_between: + if start_to_check > peak_footprints[compared_footprint]['start'] and start_to_check - peak_footprints[compared_footprint]['end'] < min_gap: #make compared_footprint longer: compared_footprint + footprint_to_check merge_footprints_left = False break - elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < max_bp_between: + elif end_to_check < peak_footprints[compared_footprint]['end'] and peak_footprints[compared_footprint]['start'] - end_to_check < min_gap: #make footprint_to_check longer: footprint_to_check + compared footprint merge_footprints_left = True break @@ -413,7 +413,7 @@ def footprint_update(footprint, start, end, score): #as input the information from the original bed file, as well as bigwig file is needed #the optional parameters window_length, step and percentage are needed as well to use the sliding window algorithm and work with the "background" score #the output of this function is a dictionary contains all the found footprints ready to write to the output file -def find_peaks_from_bw(bed_dictionary, bw_file, window_length, step, percentage, max_bp_between): +def find_peaks_from_bw(bed_dictionary, bw_file, window_length, step, percentage, min_gap): logger.info('Looking for footprints within peaks...') @@ -437,7 +437,7 @@ def find_peaks_from_bw(bed_dictionary, bw_file, window_length, step, percentage, peak_footprints, footprint_count = search_in_window(peak_footprints, footprint_count, chromosom, peak_start, peak_end, scores_in_peak, window_length, bed_dictionary[header], step, percentage) #double check for overlaps and possibly merging of footprints having up to 5 bp in between - peak_footprints = check_and_merge(peak_footprints, max_bp_between) + peak_footprints = check_and_merge(peak_footprints, min_gap) for footprint_name in peak_footprints.keys(): all_footprints[footprint_name] = all_footprints.get(footprint_name, {}) @@ -533,7 +533,7 @@ def main(): logger.info("The script footprints_extraction.py was called using these parameters: " + str(vars(args))) bed_dictionary = make_bed_dictionary(args.bed) - all_footprints = find_peaks_from_bw(bed_dictionary, args.bigwig, args.window_length, args.step, args.percentage, args.max_bp_between) + all_footprints = find_peaks_from_bw(bed_dictionary, args.bigwig, args.window_length, args.step, args.percentage, args.min_gap) write_to_bed_file(all_footprints, args.output_file) logger.info("the number of peaks: " + str(len(bed_dictionary)))