Skip to content
Permalink
48f388f94a
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
171 lines (133 sloc) 3.92 KB
package PassParser;
use warnings;
use strict;
use IO::Zlib;
use Bio::DB::HTS::Tabix;
use Benchmark;
use lib "$FindBin::Bin/lib";
use Record::Pass;
use Record::Bed;
# constructor
sub new
{
my $class = shift;
my $filename = shift;
my $fileann = shift;
my $self = {};
bless ($self, $class);
# open file handle
my $fileHandle = ();
if ($filename =~ m/\.gz$/)
{
$fileHandle = IO::Zlib->new($filename, "rb");
if (!defined($fileHandle))
{
$fileHandle->close();
die $!;
}
}
else
{
open($fileHandle, "<", $filename) or die $!;
}
# open tabix index of annotation
$self->{annotation} = Bio::DB::HTS::Tabix->new(filename => $fileann);
$self->{handle} = $fileHandle;
return $self;
}
# destructor
sub DESTROY {
my $self = shift;
$self->{handle}->close() if($self->{handle});
$self->{annotation}->close() if($self->{annotation});
}
# read
sub read
{
my $self = shift;
my $line = readline($self->{handle});
return $line;
}
# parse
sub parse
{
my $self = shift;
my $count = 0;
my $total = 0;
my $notail = 0;
my $mito = 0;
my $window_upstream = 20000;
my %featureScores = (
'notail' => 11,
'mitochondria' => 9,
'intergenic' => 10,
'5pUTR_extended' => 8,
'5pUTR_intron' => 7,
'5pUTR' => 6,
'CDS' => 5,
'intron' => 4,
'3pUTR' => 1,
'3pUTR_intron' => 3,
'3pUTR_extended' => 2,
);
my $t0 = Benchmark->new();
my %stats = ();
while(my $line = $self->read())
{
chomp($line);
$count++;
# create pass object
my $pass = Pass->new($line);
# count no tail
$stats{'pass'}{'total'}++;
$stats{'reads'}{'total'} += $pass->{readsSumCoverage};
# allocate counters
my $feature = 'intergenic';
if ($pass->ispoly() == 0)
{
$feature = 'notail';
}
elsif ($pass->ischrom('chrM'))
{
$feature = 'mito';
}
else
{
my $iter = $self->{annotation}->query($pass->window($window_upstream));
my $prevFeatureScore = $featureScores{'notail'};
my $hits = 0;
while (my $line = $iter->next)
{
# current bed record
my $bed = Bed->new($line);
# skip different strand
next if($bed->{strand} ne $pass->{strand});
# process bed hit
(my $bnode, $feature) = $bed->closest($pass->{tailsBestBase});
# export features
$pass->regions($bed, $bnode, $feature);
# increment hits
$hits++;
# get scores
my $currFeatureScore = $featureScores{$feature};
print $pass->{name},"\t",$bed->{name},"\t",$feature,"\t",$currFeatureScore,"\n";
# update score
$prevFeatureScore = $currFeatureScore;
}
last if($hits > 1);
}
$stats{'reads'}{$feature} += $pass->{readsSumCoverage};
$stats{'pass'}{$feature}++;
}
my $t1 = Benchmark->new();
my $td = timediff($t1, $t0);
print "# benchmark :: ",timestr($td),"\n";
=head
print "# feature\tpass\treads\n";
foreach my $key (sort keys %{$stats{'reads'}})
{
print $key,"\t",$stats{'pass'}{$key},"\t",sprintf("%.2f",100*$stats{'pass'}{$key}/$stats{'pass'}{'total'}),"\t",$stats{'reads'}{$key},"\t",sprintf("%.2f",100*$stats{'reads'}{$key}/$stats{'reads'}{"total"}),"\n";
}
=cut
}
1; # return true value