Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
from enum import Enum
from pyfaidx import Fasta
class ConfMode(Enum):
CSV = 0
JSON = 1
STRING = 2
class Region():
"""
A class that saves the chromosome, start and end positions of a genomic region.
"""
def __init__(self, s: str, start: int=-1, end: int=-1) -> None:
if start == -1:
self.chrom, self.start, self.end = Region.from_str(s).vals()
else:
self.chrom = s
self.start = int(start)
self.end = int(end)
def vals(self):
""" Returns all three information points as tuple. """
return self.chrom, self.start, self.end
def coordinates(self):
""" Returns the start and end points as tuple. """
return (self.start, self.end)
def get_reference_seq(self, ref: Fasta):
""" Returns the sequence inside the Region of a given reference. """
return ref[self.chrom][self.start:self.end]
def with_padding(self, padding=-1):
"""
Returns a Region object with a padding to both sides of self.
If the padding is not chosen, it is set to a fifth of the region's size.
"""
if padding == -1:
padding = self.__len__() // 5 # make the padding depending on the region
return Region(self.chrom, max(self.start-padding, 0), self.end+padding)
def __len__(self):
""" Returns the distance between start and end of the Region. """
return self.end - self.start
def __str__(self) -> str:
"""
format region info as string
If one of the values is None, returns 'INVALID REGION'
"""
if None in [self.chrom, self.start, self.end]:
return "INVALID REGION"
return self.__format__()
def __format__(self, f: str="") -> str:
"""
Format the Region object as string.
If the format string 'f' is of length 1, it is put between chrom, start
and end.
If it is of length 2, the first character of f is put between chrom and start
and f[1] is put between start and end.
Examples:
__format__("_") --> chr9_123_456
__format__(":-") --> chr9:123-456
"""
if f == "":
f = ":-"
if len(f) == 1:
return f"{self.chrom}{f}{self.start}{f}{self.end}"
elif len(f) == 2:
return f"{self.chrom}{f[0]}{self.start}{f[1]}{self.end}"
else:
print("Format string not viable!")
return str(self)
@staticmethod
def from_str(s):
"""
splits a region string into chromosome string and start and end ints.
The coordinates can be comma separated and still be parsed.
Example: 'chr12:12314-12412' --> ('chr12', 12314, 12412)
"""
info = s.split(":")
chrom = info[0]
start, end = map(lambda x: int(x.replace(',', '')), info[1].split("-"))
return Region(chrom, start, end)
def copy(self):
""" Returns a copy of self. """
return Region(self.chrom, self.start, self.end)
def get_conflict_position(info, conf_mode=ConfMode.JSON) -> Region:
"""
Returns the Region of a given conlflict. The processing depends on the get_conflict_position
mode used.
"""
if conf_mode == ConfMode.JSON:
positions = []
ends = []
chrom = None
for sv in info:
info = sv.split("_")
# info looks like ["svim", "DEL", "-133", "232424", "323242", "chr18" ...]
# thus, we need field 3 and 4 for start and stop, and field 5 for chromosome
start_field, end_field, chrom_field = 3, 4, 5
positions.append(int(info[start_field]))
ends.append(int(info[end_field]))
if chrom is None:
chrom = info[chrom_field]
return Region(chrom, min(positions), max(ends))
elif conf_mode == ConfMode.CSV:
chrom, start, end = info.split(",")[:3]
return Region(chrom, int(start), int(end))
else: # conf_mode == ConfMode.STRING:
return Region.from_str(info)