Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
master_project_JLU2018/bin/2.2_motif_estimation/get_best_motif.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
71 lines (61 sloc)
2.27 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
''' | |
parses arguments using argparse | |
@return args list of all parameters | |
''' | |
def parse_arguments(): | |
parser = argparse.ArgumentParser(description='A script to convert from GLAM2 output to MEME-format and parsing only the [num] first motifs from file to the output.') | |
parser.add_argument("meme", help="Path to 'meme' file generated by GLAM2") | |
parser.add_argument("output", help="Output file") | |
parser.add_argument("num", help="Number of motifs parsed from file") | |
parser.add_argument("id", help="Cluster ID") | |
args = parser.parse_args() | |
return args | |
''' | |
The script has two functions: | |
1. Writing lines of file till certain line (MOTIF + [num]) | |
2. Converting GLAM2 output to minimal meme-format | |
@params meme STRING Path to 'meme' file generated from Meme suite | |
@parmas output STRING Output file | |
@params num INT Number of motifs parsed from file | |
@author René Wiegandt | |
@contact rene.wiegandt(at)mpi-bn.mpg.de | |
''' | |
def main(): | |
args = parse_arguments() | |
out = open(args.output, "w+") | |
''' | |
Create pattern where script should stop writing | |
For Example: | |
If num == 3, which means that you want the first/best 3 Motifs, the script | |
should stop writing lines to output if loop reaches line 'MOTIF 4' | |
''' | |
number = int(args.num) + 1 | |
break_header = "MOTIF " + str(number) | |
# Get cluster_id | |
cluster_id = args.id | |
# Pattern for motif header | |
pattern = re.compile("^MOTIF\s{2}(\d)+") | |
# Init count | |
count = 0 | |
with open(args.meme) as f: | |
for line in f: | |
if break_header in line: | |
# line matches breaking_header, e.g. 'MOTIF 4' | |
break | |
else: | |
## do not write [count] lines after each header -> needed for meme-format | |
if count > 0: | |
count-=1 | |
continue | |
if pattern.match(line): | |
# if line is a motif header | |
count = 2 | |
out.write(line.strip('\n').replace(" "," ") + " Cluster_" + cluster_id + '\n') | |
## | |
else: | |
out.write(line) | |
if __name__ == "__main__": | |
import argparse | |
import re | |
main() |