diff --git a/lib/Tests/PassParser.pm b/lib/Tests/PassParser.pm index 16b1351..45d0b87 100644 --- a/lib/Tests/PassParser.pm +++ b/lib/Tests/PassParser.pm @@ -8,6 +8,7 @@ use Benchmark; use lib "$FindBin::Bin/lib"; use Record::Pass; +use Record::Bed; # constructor sub new @@ -65,38 +66,101 @@ sub parse { my $self = shift; my $count = 0; + my $total = 0; + my $notail = 0; + my $mito = 0; + my $window_upstream = 20000; + my %featureScores = ( + 'notail' => 11, + 'mitochondria' => 9, + 'intergenic' => 10, + '5pUTR_extended' => 8, + '5pUTR_intron' => 7, + '5pUTR' => 6, + 'CDS' => 5, + 'intron' => 4, + '3pUTR' => 1, + '3pUTR_intron' => 3, + '3pUTR_extended' => 2, + ); + my $t0 = Benchmark->new(); + my %stats = (); + while(my $line = $self->read()) { chomp($line); + $count++; # create pass object my $pass = Pass->new($line); # count no tail + $stats{'pass'}{'total'}++; + $stats{'reads'}{'total'} += $pass->{readsSumCoverage}; + + # allocate counters + my $feature = 'intergenic'; + + if ($pass->ispoly() == 0) { - $notail += $pass->{readsSumCoverage}; + $feature = 'notail'; } elsif ($pass->ischrom('chrM')) { - $mito += $pass->{readsSumCoverage}; + $feature = 'mito'; + } + else + { + my $iter = $self->{annotation}->query($pass->region($window_upstream)); + my $prevFeatureScore = $featureScores{'notail'}; + my $hits = 0; + + while (my $line = $iter->next) + { + # current bed record + my $bed = Bed->new($line); + + # skip different strand + next if($bed->{strand} ne $pass->{strand}); + + # process bed hit + (my $tmp, $feature) = $bed->closest($pass->{tailsBestBase}); + + # increment hits + $hits++; + + # get scores + my $currFeatureScore = $featureScores{$feature}; + print $pass->{name},"\t",$bed->{name},"\t",$feature,"\t",$currFeatureScore,"\n"; + + # update score + $prevFeatureScore = $currFeatureScore; + + } + + last if($hits > 1); + } - - print $pass->{name},"\n"; - last; + $stats{'reads'}{$feature} += $pass->{readsSumCoverage}; + $stats{'pass'}{$feature}++; - $count++; } my $t1 = Benchmark->new(); my $td = timediff($t1, $t0); - print "benchmark :: ",timestr($td),"\n"; - print $count,"\n"; - + print "# benchmark :: ",timestr($td),"\n"; +=head + print "# feature\tpass\treads\n"; + foreach my $key (sort keys %{$stats{'reads'}}) + { + print $key,"\t",$stats{'pass'}{$key},"\t",sprintf("%.2f",100*$stats{'pass'}{$key}/$stats{'pass'}{'total'}),"\t",$stats{'reads'}{$key},"\t",sprintf("%.2f",100*$stats{'reads'}{$key}/$stats{'reads'}{"total"}),"\n"; + } +=cut } 1; # return true value \ No newline at end of file