From 3c4f733eb627dce9c2e8566f617dbe7dbf58a1fd Mon Sep 17 00:00:00 2001 From: renewiegandt Date: Wed, 19 Dec 2018 14:40:59 +0100 Subject: [PATCH] get_best_motif.py: fixed bug which caused to print motif header as last line + improved documentation --- bin/get_best_motif.py | 54 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/bin/get_best_motif.py b/bin/get_best_motif.py index cc24949..37eccc5 100644 --- a/bin/get_best_motif.py +++ b/bin/get_best_motif.py @@ -1,26 +1,64 @@ -# parses arguments using argparse -# @return args list of all parameters +''' +parses arguments using argparse +@return args list of all parameters +''' def parse_arguments(): - parser = argparse.ArgumentParser() - parser.add_argument("meme", help="Path to meme file") + parser = argparse.ArgumentParser(description='A script to convert from GLAM2 output to MEME-format and parsing only the [num] first motifs from file to the output.') + parser.add_argument("meme", help="Path to 'meme' file generated by GLAM2") parser.add_argument("output", help="Output file") parser.add_argument("num", help="Number of motifs parsed from file") args = parser.parse_args() return args -# write lines of file till certain line (MOTIF + [num]) +''' +The script has to functions: + 1. Writing lines of file till certain line (MOTIF + [num]) + 2. Converting GLAM2 output to minimal meme-format +@params meme STING Path to 'meme' file generated from Meme suite +@parmas output STING Output file +@params num INT Number of motifs parsed from file + +@author René Wiegandt +@contact rene.wiegandt(at)mpi-bn.mpg.de +''' def main(): + args = parse_arguments() out = open(args.output, "w+") + + ''' + Create pattern where script should stop writing + For Example: + If num == 3, which means that you want the first/best 3 Motifs, the script + should stop writing lines to output if loop reaches line 'MOTIF 4' + ''' number = int(args.num) + 1 - motif = "MOTIF " + str(number) + break_header = "MOTIF " + str(number) + + # Pattern for motif header + pattern = re.compile("^MOTIF\s{2}(\d)+") + # Init count + count = 0 + with open(args.meme) as f: for line in f: - if motif in line: + ## do not write [count] lines after each header -> needed for meme-format + if count > 0: + count-=1 + continue + if pattern.match(line): + # if line is a motif header + count = 2 + ## + + if break_header in line: + # line matches breaking_header, e.g. 'MOTIF 4' break - out.write(line) + else: + out.write(line) if __name__ == "__main__": import argparse + import re main()