Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#!/bin/env python
import sys
import getopt
import re
def alignment_iterator(instream=sys.stdin):
"""Iterate over the lines in the input file."""
for line in instream:
line = line.rstrip()
# no content or header
if len(line) == 0 or line[0] == "@":
continue
# split on tab
fields = line.split("\t")
yield fields
def get_tag(iterable, tagname="fr"):
"""Get the value for tag."""
for value in iterable:
if value.startswith(tagname):
return value
return None
def parse_region(word):
"""Parse the region from a string."""
fields = word.split(":")
startend = fields[3].split("-")
start = int(startend[0])
end = int(startend[1])
return fields[2], start, end, fields[4]
def swap(a, b):
"""Swap 2 variables."""
return b, a
def main(instream=sys.stdin, outstream=sys.stdout, tagname="fr"):
"""The main program loop."""
previous = None
for current in alignment_iterator(instream):
if previous is None or previous[0] != current[0]:
previous = current
continue
# same read names
fragment_a = get_tag(previous, tagname=tagname)
fragment_b = get_tag(current, tagname=tagname)
# skip if no fragments are annotated
if fragment_a is None or fragment_b is None:
previous = current
continue
# get the info
chrom_a, start_a, end_a, name_a = parse_region(fragment_a)
chrom_b, start_b, end_b, name_b = parse_region(fragment_b)
# always report interactions from the first to the second location
if chrom_a > chrom_b:
chrom_a, chrom_b = swap(chrom_a, chrom_b)
start_a, start_b = swap(start_a, start_b)
end_a, end_b = swap(end_a, end_b)
name_a, name_b = swap(name_a, name_b)
elif chrom_a == chrom_b and start_a > start_b:
start_a, start_b = swap(start_a, start_b)
end_a, end_b = swap(end_a, end_b)
name_a, name_b = swap(name_a, name_b)
# write the bedpe file
outstream.write(
"%s\t%d\t%d\t%s\t%d\t%d\t%s\t1\n" % (
chrom_a, start_a, end_a,
chrom_b, start_b, end_b,
name_a + ", " + name_b
))
#
previous = current
if __name__ == "__main__":
def usage(message="", error=None):
"""Print the usage information."""
sys.stdout.write("""
Message %(message)s
Called as %(commandline)s
Usage:
python %(tool)s -i [input file] -o [output file]
Options:
-i/--input [file] the input SAM file, default stdin
-o/--output [file] the output matrix file
-h/--help [] print this message
""" % {
"message": message,
"tool": sys.argv[0],
"commandline": " ".join(sys.argv)
})
if error is not None:
sys.exit(error)
# end of usage
# main variables
fstream_input = sys.stdin
fstream_output = sys.stdout
# option parsing
shortopt = "i:o:h"
longopt = ["input=", "output=", "help"]
try:
opts, args = getopt.getopt(sys.argv[1:], shortopt, longopt)
for o, a in opts:
if o in ("-i", "--input"):
fstream_input = open(a, "r")
elif o in ("-o", "--output"):
fstream_output = open(a, "w")
elif o in ('-h', '--help'):
usage("Help was asked", error=0)
except getopt.GetoptError as err:
usage(str(err), error=2)
# run the program loop
main(fstream_input, fstream_output)
if not fstream_input.closed and fstream_input is not sys.stdin:
fstream_input.close()
if not fstream_output.closed and fstream_output is not sys.stdout:
fstream_output.close()