diff --git a/PASSAnnotator.pl b/PASSAnnotator.pl index 77eda67..99194f4 100644 --- a/PASSAnnotator.pl +++ b/PASSAnnotator.pl @@ -7,22 +7,34 @@ #use Bio::DB::HTS::Tabix; use FindBin; use File::Spec; +use Benchmark qw(:all); use lib "$FindBin::Bin/lib"; use Tests::BedParser; +use Tests::PassParser; -my $queryFile = File::Spec->catfile($FindBin::Bin, 'data', 'refSeq_rn5_Annotated_May2017.bed.gz'); +sub parseLineOld(); +sub parseLineNew(); +sub mysplit($$$); -my $obj = BedParser->new($queryFile); +my $queryBedFile = File::Spec->catfile($FindBin::Bin, 'data', 'refSeq_rn5_Annotated_May2017.bed.gz'); +my $queryPassFile = File::Spec->catfile($FindBin::Bin, 'data', 'passData_ClustersRaw_May2017.bed.gz'); + + + +# my $obj = BedParser->new($queryFile); +# $obj->parse(); + + +my $obj = PassParser->new($queryPassFile, $queryBedFile); $obj->parse(); -#my $label = '3pUTR_extended'; -#$label =~ tr/53/35/; -#print $label,"\n"; + + diff --git a/PASSAnnotator.xcodeproj/project.pbxproj b/PASSAnnotator.xcodeproj/project.pbxproj index c7825f0..be8f606 100644 --- a/PASSAnnotator.xcodeproj/project.pbxproj +++ b/PASSAnnotator.xcodeproj/project.pbxproj @@ -7,6 +7,7 @@ objects = { /* Begin PBXFileReference section */ + B16A7F0C1ECF1D9E004F1B12 /* PassParser.pm */ = {isa = PBXFileReference; lastKnownFileType = text.script.perl; name = PassParser.pm; path = lib/Tests/PassParser.pm; sourceTree = ""; }; B179B9361EC676A1006FA542 /* PASSAnnotator.pl */ = {isa = PBXFileReference; lastKnownFileType = text.script.perl; path = PASSAnnotator.pl; sourceTree = ""; }; B1BE38C01ECDA7FF0017F94D /* Bed.pm */ = {isa = PBXFileReference; lastKnownFileType = text.script.perl; name = Bed.pm; path = lib/Record/Bed.pm; sourceTree = ""; }; B1BE38C11ECDA7FF0017F94D /* Pass.pm */ = {isa = PBXFileReference; lastKnownFileType = text.script.perl; name = Pass.pm; path = lib/Record/Pass.pm; sourceTree = ""; }; @@ -38,6 +39,7 @@ children = ( B1BE38C71ECDC2B20017F94D /* BedTree.pm */, B1BE38C81ECDC5380017F94D /* BedParser.pm */, + B16A7F0C1ECF1D9E004F1B12 /* PassParser.pm */, ); name = Tests; sourceTree = ""; diff --git a/lib/Record/Bed.pm b/lib/Record/Bed.pm index 26baf3c..7b41eeb 100644 --- a/lib/Record/Bed.pm +++ b/lib/Record/Bed.pm @@ -181,7 +181,9 @@ sub closest } elsif ($cond_hit eq 'intron') { - $feature = 'intron'; + $feature = '5pUTR_intron' if ($base < $self->{thickStart}); + $feature = 'intron' if (($self->{thickStart} <= $base) && ($base <= $self->{thickEnd})); + $feature = '3pUTR_intron' if ($self->{thickEnd} < $base); } elsif ($cond_hit eq 'intergenic') { @@ -201,4 +203,5 @@ sub closest + 1; # return true \ No newline at end of file diff --git a/lib/Record/Pass.pm b/lib/Record/Pass.pm index 8d4b0a0..da4902e 100644 --- a/lib/Record/Pass.pm +++ b/lib/Record/Pass.pm @@ -11,7 +11,8 @@ sub new my $self = {}; bless ($self, $class); - # split bed line + + # parse line my @pass_vals = split("\t", $pass_line, 15); # properties @@ -30,10 +31,12 @@ sub new $self->{tailsSumCoverage} = $pass_vals[12]; $self->{tailsMaxCoverage} = $pass_vals[13]; $self->{tailsBestBase} = $pass_vals[14]; - + return $self; } + + sub ispoly { my $self = shift; diff --git a/lib/Tests/BedParser.pm b/lib/Tests/BedParser.pm index eebe79e..ea720bf 100644 --- a/lib/Tests/BedParser.pm +++ b/lib/Tests/BedParser.pm @@ -78,7 +78,7 @@ sub parse # process bed hit my ($hit, $feature) = $bed->closest($randomBase); - + #print $bed->{name},"\t",$feature,"\n"; $count++; diff --git a/lib/Tests/LineSplit.pm b/lib/Tests/LineSplit.pm new file mode 100644 index 0000000..5e1f4a7 --- /dev/null +++ b/lib/Tests/LineSplit.pm @@ -0,0 +1,92 @@ + +my $line = 'chr1 388172 401149 NM_001099460;Vom2r3 0 + 388172 401149 0 6 206,283,804,225,124,884, 0,2845,3556,5212,10804,12093,'; + + +# ... or use subroutine references. +=head + cmpthese(1000000, { + 'Split' => '&parseLineOld($line)', + 'Substr' => '&parseLineNew($line)', + }); + =cut + + +my $res = mysplit($line, "\t", 12); +my $ex = mysplit($res->[-1], ",",$res->[9]); +print join(";", @{$res}),"\n"; +print join(";", @{$ex}),"\n"; + + + +sub parseLineOld() +{ + my $line = 'chr1 388172 401149 NM_001099460;Vom2r3 0 + 388172 401149 0 6 206,283,804,225,124,884, 0,2845,3556,5212,10804,12093,'; + my @res = split('\t', $line, 12); + return \@res; +} + + +sub parseLineNew() +{ + my $line = 'chr1 388172 401149 NM_001099460;Vom2r3 0 + 388172 401149 0 6 206,283,804,225,124,884, 0,2845,3556,5212,10804,12093,'; + my @res = (0) x 12; + my $pos_prev = -1; + my $pos_next = 0; + my $line_siz = length($line); + my $i = 0; + while($pos_next < $line_siz) + { + $pos_next = index($line,"\t", $pos_prev + 1); + $pos_next = $line_siz if($pos_next == -1); + my $txt = substr($line, $pos_prev + 1, $pos_next - $pos_prev - 1); + $res[$i] = $txt; + + #print $i,"\t",$txt,"\n"; + + $i++; + $pos_prev = $pos_next; + } + #print $i,"\t",join(";",@res),"\n"; + + return \@res; +} + +sub mysplit($$$) +{ + my $line = $_[0]; + my $delim = $_[1]; + my $count = $_[2]; + + # allocate result array + my @vals = (0) x $count; + my $char_prev = -1; + my $char_next = -1; + my $line_siz = length($line); + my $idx = 0; + while ($char_next < $line_siz) + { + # find delimiter + $char_next = index($line, $delim, $char_prev + 1); + $char_next = $line_siz if($char_next == - 1); + + # splice line + my $str = substr($line, $char_prev + 1, $char_next - $char_prev - 1); + + # fill array + $vals[$idx] = $str; + + # update counters + $idx++; + $char_prev = $char_next; + + } + + return \@vals; +} + + + + +#print $line,"\n",index($line,"\t"),"\n"; +#print $cnt,"\n"; + diff --git a/lib/Tests/PassParser.pm b/lib/Tests/PassParser.pm new file mode 100644 index 0000000..16b1351 --- /dev/null +++ b/lib/Tests/PassParser.pm @@ -0,0 +1,102 @@ +package PassParser; + +use warnings; +use strict; +use IO::Zlib; +use Bio::DB::HTS::Tabix; +use Benchmark; + +use lib "$FindBin::Bin/lib"; +use Record::Pass; + +# constructor +sub new +{ + my $class = shift; + my $filename = shift; + my $fileann = shift; + + my $self = {}; + bless ($self, $class); + + # open file handle + my $fileHandle = (); + if ($filename =~ m/\.gz$/) + { + $fileHandle = IO::Zlib->new($filename, "rb"); + if (!defined($fileHandle)) + { + $fileHandle->close(); + die $!; + } + + } + else + { + open($fileHandle, "<", $filename) or die $!; + } + + # open tabix index of annotation + $self->{annotation} = Bio::DB::HTS::Tabix->new(filename => $fileann); + $self->{handle} = $fileHandle; + + return $self; +} + +# destructor +sub DESTROY { + my $self = shift; + $self->{handle}->close() if($self->{handle}); + $self->{annotation}->close() if($self->{annotation}); +} + +# read +sub read +{ + my $self = shift; + + my $line = readline($self->{handle}); + + return $line; +} + +# parse +sub parse +{ + my $self = shift; + my $count = 0; + + my $t0 = Benchmark->new(); + + while(my $line = $self->read()) + { + chomp($line); + + # create pass object + my $pass = Pass->new($line); + + # count no tail + if ($pass->ispoly() == 0) + { + $notail += $pass->{readsSumCoverage}; + } + elsif ($pass->ischrom('chrM')) + { + $mito += $pass->{readsSumCoverage}; + } + + + print $pass->{name},"\n"; + last; + + + $count++; + } + my $t1 = Benchmark->new(); + my $td = timediff($t1, $t0); + print "benchmark :: ",timestr($td),"\n"; + print $count,"\n"; + +} + +1; # return true value \ No newline at end of file