Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
PASSAnnotator/lib/Tests/PassParser.pm
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
171 lines (133 sloc)
3.92 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package PassParser; | |
use warnings; | |
use strict; | |
use IO::Zlib; | |
use Bio::DB::HTS::Tabix; | |
use Benchmark; | |
use lib "$FindBin::Bin/lib"; | |
use Record::Pass; | |
use Record::Bed; | |
# constructor | |
sub new | |
{ | |
my $class = shift; | |
my $filename = shift; | |
my $fileann = shift; | |
my $self = {}; | |
bless ($self, $class); | |
# open file handle | |
my $fileHandle = (); | |
if ($filename =~ m/\.gz$/) | |
{ | |
$fileHandle = IO::Zlib->new($filename, "rb"); | |
if (!defined($fileHandle)) | |
{ | |
$fileHandle->close(); | |
die $!; | |
} | |
} | |
else | |
{ | |
open($fileHandle, "<", $filename) or die $!; | |
} | |
# open tabix index of annotation | |
$self->{annotation} = Bio::DB::HTS::Tabix->new(filename => $fileann); | |
$self->{handle} = $fileHandle; | |
return $self; | |
} | |
# destructor | |
sub DESTROY { | |
my $self = shift; | |
$self->{handle}->close() if($self->{handle}); | |
$self->{annotation}->close() if($self->{annotation}); | |
} | |
# read | |
sub read | |
{ | |
my $self = shift; | |
my $line = readline($self->{handle}); | |
return $line; | |
} | |
# parse | |
sub parse | |
{ | |
my $self = shift; | |
my $count = 0; | |
my $total = 0; | |
my $notail = 0; | |
my $mito = 0; | |
my $window_upstream = 20000; | |
my %featureScores = ( | |
'notail' => 11, | |
'mitochondria' => 9, | |
'intergenic' => 10, | |
'5pUTR_extended' => 8, | |
'5pUTR_intron' => 7, | |
'5pUTR' => 6, | |
'CDS' => 5, | |
'intron' => 4, | |
'3pUTR' => 1, | |
'3pUTR_intron' => 3, | |
'3pUTR_extended' => 2, | |
); | |
my $t0 = Benchmark->new(); | |
my %stats = (); | |
while(my $line = $self->read()) | |
{ | |
chomp($line); | |
$count++; | |
# create pass object | |
my $pass = Pass->new($line); | |
# count no tail | |
$stats{'pass'}{'total'}++; | |
$stats{'reads'}{'total'} += $pass->{readsSumCoverage}; | |
# allocate counters | |
my $feature = 'intergenic'; | |
if ($pass->ispoly() == 0) | |
{ | |
$feature = 'notail'; | |
} | |
elsif ($pass->ischrom('chrM')) | |
{ | |
$feature = 'mito'; | |
} | |
else | |
{ | |
my $iter = $self->{annotation}->query($pass->window($window_upstream)); | |
my $prevFeatureScore = $featureScores{'notail'}; | |
my $hits = 0; | |
while (my $line = $iter->next) | |
{ | |
# current bed record | |
my $bed = Bed->new($line); | |
# skip different strand | |
next if($bed->{strand} ne $pass->{strand}); | |
# process bed hit | |
(my $bnode, $feature) = $bed->closest($pass->{tailsBestBase}); | |
# export features | |
$pass->regions($bed, $bnode, $feature); | |
# increment hits | |
$hits++; | |
# get scores | |
my $currFeatureScore = $featureScores{$feature}; | |
print $pass->{name},"\t",$bed->{name},"\t",$feature,"\t",$currFeatureScore,"\n"; | |
# update score | |
$prevFeatureScore = $currFeatureScore; | |
} | |
last if($hits > 1); | |
} | |
$stats{'reads'}{$feature} += $pass->{readsSumCoverage}; | |
$stats{'pass'}{$feature}++; | |
} | |
my $t1 = Benchmark->new(); | |
my $td = timediff($t1, $t0); | |
print "# benchmark :: ",timestr($td),"\n"; | |
=head | |
print "# feature\tpass\treads\n"; | |
foreach my $key (sort keys %{$stats{'reads'}}) | |
{ | |
print $key,"\t",$stats{'pass'}{$key},"\t",sprintf("%.2f",100*$stats{'pass'}{$key}/$stats{'pass'}{'total'}),"\t",$stats{'reads'}{$key},"\t",sprintf("%.2f",100*$stats{'reads'}{$key}/$stats{'reads'}{"total"}),"\n"; | |
} | |
=cut | |
} | |
1; # return true value |