Skip to content

Commit

Permalink
get_best_motif.py: fixed bug which caused to print motif header as la…
Browse files Browse the repository at this point in the history
…st line + improved documentation
  • Loading branch information
renewiegandt committed Dec 19, 2018
1 parent ce52871 commit 3c4f733
Showing 1 changed file with 46 additions and 8 deletions.
54 changes: 46 additions & 8 deletions bin/get_best_motif.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,64 @@
# parses arguments using argparse
# @return args list of all parameters
'''
parses arguments using argparse
@return args list of all parameters
'''
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument("meme", help="Path to meme file")
parser = argparse.ArgumentParser(description='A script to convert from GLAM2 output to MEME-format and parsing only the [num] first motifs from file to the output.')
parser.add_argument("meme", help="Path to 'meme' file generated by GLAM2")
parser.add_argument("output", help="Output file")
parser.add_argument("num", help="Number of motifs parsed from file")
args = parser.parse_args()
return args

# write lines of file till certain line (MOTIF + [num])
'''
The script has to functions:
1. Writing lines of file till certain line (MOTIF + [num])
2. Converting GLAM2 output to minimal meme-format
@params meme STING Path to 'meme' file generated from Meme suite
@parmas output STING Output file
@params num INT Number of motifs parsed from file
@author René Wiegandt
@contact rene.wiegandt(at)mpi-bn.mpg.de
'''
def main():

args = parse_arguments()
out = open(args.output, "w+")

'''
Create pattern where script should stop writing
For Example:
If num == 3, which means that you want the first/best 3 Motifs, the script
should stop writing lines to output if loop reaches line 'MOTIF 4'
'''
number = int(args.num) + 1
motif = "MOTIF " + str(number)
break_header = "MOTIF " + str(number)

# Pattern for motif header
pattern = re.compile("^MOTIF\s{2}(\d)+")
# Init count
count = 0

with open(args.meme) as f:
for line in f:
if motif in line:
## do not write [count] lines after each header -> needed for meme-format
if count > 0:
count-=1
continue
if pattern.match(line):
# if line is a motif header
count = 2
##

if break_header in line:
# line matches breaking_header, e.g. 'MOTIF 4'
break
out.write(line)
else:
out.write(line)


if __name__ == "__main__":
import argparse
import re
main()

0 comments on commit 3c4f733

Please sign in to comment.