Skip to content
Permalink
b70a38b8c3
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
71 lines (61 sloc) 2.27 KB
#!/usr/bin/env python
'''
parses arguments using argparse
@return args list of all parameters
'''
def parse_arguments():
parser = argparse.ArgumentParser(description='A script to convert from GLAM2 output to MEME-format and parsing only the [num] first motifs from file to the output.')
parser.add_argument("meme", help="Path to 'meme' file generated by GLAM2")
parser.add_argument("output", help="Output file")
parser.add_argument("num", help="Number of motifs parsed from file")
parser.add_argument("id", help="Cluster ID")
args = parser.parse_args()
return args
'''
The script has two functions:
1. Writing lines of file till certain line (MOTIF + [num])
2. Converting GLAM2 output to minimal meme-format
@params meme STRING Path to 'meme' file generated from Meme suite
@parmas output STRING Output file
@params num INT Number of motifs parsed from file
@author René Wiegandt
@contact rene.wiegandt(at)mpi-bn.mpg.de
'''
def main():
args = parse_arguments()
out = open(args.output, "w+")
'''
Create pattern where script should stop writing
For Example:
If num == 3, which means that you want the first/best 3 Motifs, the script
should stop writing lines to output if loop reaches line 'MOTIF 4'
'''
number = int(args.num) + 1
break_header = "MOTIF " + str(number)
# Get cluster_id
cluster_id = args.id
# Pattern for motif header
pattern = re.compile("^MOTIF\s{2}(\d)+")
# Init count
count = 0
with open(args.meme) as f:
for line in f:
if break_header in line:
# line matches breaking_header, e.g. 'MOTIF 4'
break
else:
## do not write [count] lines after each header -> needed for meme-format
if count > 0:
count-=1
continue
if pattern.match(line):
# if line is a motif header
count = 2
out.write(line.strip('\n').replace(" "," ") + " Cluster_" + cluster_id + '\n')
##
else:
out.write(line)
if __name__ == "__main__":
import argparse
import re
main()