Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
TOuCAN/bin/create_matrix.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
141 lines (109 sloc)
3.76 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python | |
import sys | |
import getopt | |
import re | |
def alignment_iterator(instream=sys.stdin): | |
"""Iterate over the lines in the input file.""" | |
for line in instream: | |
line = line.rstrip() | |
# no content or header | |
if len(line) == 0 or line[0] == "@": | |
continue | |
# split on tab | |
fields = line.split("\t") | |
yield fields | |
def get_tag(iterable, tagname="fr"): | |
"""Get the value for tag.""" | |
for value in iterable: | |
if value.startswith(tagname): | |
return value | |
return None | |
def parse_region(word): | |
"""Parse the region from a string.""" | |
fields = word.split(":") | |
startend = fields[3].split("-") | |
start = int(startend[0]) | |
end = int(startend[1]) | |
return fields[2], start, end, fields[4] | |
def swap(a, b): | |
"""Swap 2 variables.""" | |
return b, a | |
def main(instream=sys.stdin, outstream=sys.stdout, tagname="fr"): | |
"""The main program loop.""" | |
previous = None | |
for current in alignment_iterator(instream): | |
if previous is None or previous[0] != current[0]: | |
previous = current | |
continue | |
# same read names | |
fragment_a = get_tag(previous, tagname=tagname) | |
fragment_b = get_tag(current, tagname=tagname) | |
# skip if no fragments are annotated | |
if fragment_a is None or fragment_b is None: | |
previous = current | |
continue | |
# get the info | |
chrom_a, start_a, end_a, name_a = parse_region(fragment_a) | |
chrom_b, start_b, end_b, name_b = parse_region(fragment_b) | |
# always report interactions from the first to the second location | |
if chrom_a > chrom_b: | |
chrom_a, chrom_b = swap(chrom_a, chrom_b) | |
start_a, start_b = swap(start_a, start_b) | |
end_a, end_b = swap(end_a, end_b) | |
name_a, name_b = swap(name_a, name_b) | |
elif chrom_a == chrom_b and start_a > start_b: | |
start_a, start_b = swap(start_a, start_b) | |
end_a, end_b = swap(end_a, end_b) | |
name_a, name_b = swap(name_a, name_b) | |
# write the bedpe file | |
outstream.write( | |
"%s\t%d\t%d\t%s\t%d\t%d\t%s\t1\n" % ( | |
chrom_a, start_a, end_a, | |
chrom_b, start_b, end_b, | |
name_a + ", " + name_b | |
)) | |
# | |
previous = current | |
if __name__ == "__main__": | |
def usage(message="", error=None): | |
"""Print the usage information.""" | |
sys.stdout.write(""" | |
Message %(message)s | |
Called as %(commandline)s | |
Usage: | |
python %(tool)s -i [input file] -o [output file] | |
Options: | |
-i/--input [file] the input SAM file, default stdin | |
-o/--output [file] the output matrix file | |
-h/--help [] print this message | |
""" % { | |
"message": message, | |
"tool": sys.argv[0], | |
"commandline": " ".join(sys.argv) | |
}) | |
if error is not None: | |
sys.exit(error) | |
# end of usage | |
# main variables | |
fstream_input = sys.stdin | |
fstream_output = sys.stdout | |
# option parsing | |
shortopt = "i:o:h" | |
longopt = ["input=", "output=", "help"] | |
try: | |
opts, args = getopt.getopt(sys.argv[1:], shortopt, longopt) | |
for o, a in opts: | |
if o in ("-i", "--input"): | |
fstream_input = open(a, "r") | |
elif o in ("-o", "--output"): | |
fstream_output = open(a, "w") | |
elif o in ('-h', '--help'): | |
usage("Help was asked", error=0) | |
except getopt.GetoptError as err: | |
usage(str(err), error=2) | |
# run the program loop | |
main(fstream_input, fstream_output) | |
if not fstream_input.closed and fstream_input is not sys.stdin: | |
fstream_input.close() | |
if not fstream_output.closed and fstream_output is not sys.stdout: | |
fstream_output.close() |