Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
PASSAnnotator/lib/Record/Bed.pm
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
216 lines (174 sloc)
5.04 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package Bed; | |
use warnings; | |
use strict; | |
use Tree::Interval; | |
sub new | |
{ | |
my $class = shift; | |
my $bed_line = shift; | |
my $self = {}; | |
bless ($self, $class); | |
# split bed line | |
my @bed_vals = split("\t", $bed_line, 12); | |
# properties | |
$self->{chrom} = $bed_vals[0]; | |
$self->{chromStart} = $bed_vals[1]; | |
$self->{chromEnd} = $bed_vals[2]; | |
$self->{name} = $bed_vals[3]; | |
$self->{score} = $bed_vals[4]; | |
$self->{strand} = $bed_vals[5]; | |
$self->{thickStart} = $bed_vals[6]; | |
$self->{thickEnd} = $bed_vals[7]; | |
$self->{itemRgb} = $bed_vals[8]; | |
$self->{blockCount} = $bed_vals[9]; | |
$self->{blockSizes} = $bed_vals[10]; | |
$self->{blockStarts} = $bed_vals[11]; | |
$self->{exons} = (); | |
$self->{introns} = (); | |
$self->{tree} = (); | |
$self->{thickStartNode} = (); | |
$self->{thickEndNode} = (); | |
# assign features | |
$self->getExons(); | |
$self->getIntrons(); | |
$self->getFeaturesTree(); | |
$self->getTickNodes(); | |
return $self; | |
} | |
### get methods | |
sub strand | |
{ | |
my $self = shift; | |
return ($self->{strand} eq '+') ? 1 : 0; | |
} | |
# GETEXONS | |
# | |
# extract BED12 record exons | |
sub getExons | |
{ | |
my $self = shift; | |
# split blocks | |
my @blockSizes = split(",", $self->{blockSizes}); | |
my @blockStarts = split(",", $self->{blockStarts}); | |
# keep exons to list | |
my @exons = (); | |
my $exonId = 0; | |
for (my $ex = 0; $ex < $self->{blockCount}; $ex++) | |
{ | |
my $exonStart = $self->{chromStart} + $blockStarts[$ex]; | |
my $exonEnd = $exonStart + $blockSizes[$ex]; | |
# merge exons | |
if (($exonId > 0) && ($exonStart <= $exons[$exonId-1][1])) | |
{ | |
$exons[$exonId-1][1] = $exonEnd; | |
} | |
else | |
{ | |
$exonId++; | |
push(@exons, [$exonStart, $exonEnd, 0]); | |
} | |
} | |
# sort exons | |
@exons = sort {$a->[0] <=> $b->[0]} @exons; | |
# update exon id | |
my $exonCount = scalar(@exons); | |
for (my $ex = 0; $ex < $exonCount; $ex++) | |
{ | |
$exons[$ex][2] = ($self->strand()) ? ($ex + 1) : ($exonCount - $ex); | |
} | |
# return reference | |
$self->{exons} = \@exons; | |
} | |
# GETINTRONS | |
# | |
# extract BED12 record introns | |
sub getIntrons | |
{ | |
my $self = shift; | |
# get exons | |
my $exons = $self->getExons(); | |
my $blockCount = scalar(@{$exons}) - 1; | |
# generate introns list | |
my @introns = (); | |
for(my $in = 0; $in < $blockCount; $in++) | |
{ | |
my $intronStart = $exons->[$in][1] + 1; | |
my $intronEnd = $exons->[$in+1][0] - 1; | |
if (($intronEnd - $intronStart) > 0) | |
{ | |
push(@introns,[$intronStart, $intronEnd, 0]); | |
} | |
} | |
# sort introns | |
@introns = sort {$a->[0] <=> $b->[0]} @introns; | |
# update intron id | |
my $intronCount = scalar(@introns); | |
for (my $in = 0; $in < $intronCount; $in++) | |
{ | |
$introns[$in][2] = ($self->strand()) ? ($in + 1) : ($intronCount - $in); | |
} | |
# return reference | |
$self->{introns} = \@introns; | |
} | |
# Red-Black Features Tree | |
sub getFeaturesTree | |
{ | |
my $self = shift; | |
# allocate tree | |
$self->{tree} = Tree::Interval->new(); | |
# insert exons | |
my $exons_cnt = scalar(@{$self->{exons}}); | |
for(my $ex = 0; $ex < $exons_cnt; $ex++) | |
{ | |
$self->{tree}->insert($self->{exons}->[$ex][0], $self->{exons}->[$ex][1], ['exon', $self->{exons}->[$ex][2], $exons_cnt]); | |
} | |
# insert introns | |
my $introns_cnt = scalar(@{$self->{introns}}); | |
for (my $in = 0; $in < $introns_cnt; $in++) | |
{ | |
$self->{tree}->insert($self->{introns}->[$in][0], $self->{introns}->[$in][1], ['intron', $self->{introns}->[$in][2], $introns_cnt]); | |
} | |
} | |
# Nodes for thicks Start and End | |
sub getTickNodes | |
{ | |
my $self = shift; | |
$self->{thickStartNode} = $self->{tree}->find($self->{thickStart}); | |
$self->{thickEndNode} = $self->{tree}->find($self->{thickEnd}); | |
} | |
sub closest | |
{ | |
my $self = shift; | |
my $base = shift; | |
# check base | |
my $baseNode = $self->{tree}->find($base); | |
# allocate feature | |
my $feature = '<unknown>'; | |
# root condition | |
my $cond_hit = defined($baseNode) ? $baseNode->[0] : 'intergenic'; | |
# decision tree | |
if ($cond_hit eq 'exon') | |
{ | |
$feature = '5pUTR' if ($base < $self->{thickStart}); | |
$feature = 'CDS' if (($self->{thickStart} <= $base) && ($base <= $self->{thickEnd})); | |
$feature = '3pUTR' if ($self->{thickEnd} < $base); | |
} | |
elsif ($cond_hit eq 'intron') | |
{ | |
$feature = '5pUTR_intron' if ($base < $self->{thickStart}); | |
$feature = 'intron' if (($self->{thickStart} <= $base) && ($base <= $self->{thickEnd})); | |
$feature = '3pUTR_intron' if ($self->{thickEnd} < $base); | |
} | |
elsif ($cond_hit eq 'intergenic') | |
{ | |
$feature = '5pUTR_extended' if($base < $self->{chromStart}); | |
$feature = '3pUTR_extended' if($base > $self->{chromEnd}); | |
} | |
# correct for strand | |
if ($self->strand() == 0) | |
{ | |
$feature =~ tr/53/35/; | |
} | |
return ($baseNode, $feature); | |
} | |
1; # return true |