Skip to content

Commit

Permalink
explore line split
Browse files Browse the repository at this point in the history
  • Loading branch information
MPIBR-tushevg committed May 19, 2017
1 parent 192847e commit a4b27e3
Show file tree
Hide file tree
Showing 7 changed files with 223 additions and 9 deletions.
22 changes: 17 additions & 5 deletions PASSAnnotator.pl
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,34 @@
#use Bio::DB::HTS::Tabix;
use FindBin;
use File::Spec;
use Benchmark qw(:all);

use lib "$FindBin::Bin/lib";
use Tests::BedParser;
use Tests::PassParser;


my $queryFile = File::Spec->catfile($FindBin::Bin, 'data', 'refSeq_rn5_Annotated_May2017.bed.gz');
sub parseLineOld();
sub parseLineNew();
sub mysplit($$$);

my $obj = BedParser->new($queryFile);

my $queryBedFile = File::Spec->catfile($FindBin::Bin, 'data', 'refSeq_rn5_Annotated_May2017.bed.gz');
my $queryPassFile = File::Spec->catfile($FindBin::Bin, 'data', 'passData_ClustersRaw_May2017.bed.gz');



# my $obj = BedParser->new($queryFile);
# $obj->parse();


my $obj = PassParser->new($queryPassFile, $queryBedFile);
$obj->parse();

#my $label = '3pUTR_extended';

#$label =~ tr/53/35/;

#print $label,"\n";





Expand Down
2 changes: 2 additions & 0 deletions PASSAnnotator.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
objects = {

/* Begin PBXFileReference section */
B16A7F0C1ECF1D9E004F1B12 /* PassParser.pm */ = {isa = PBXFileReference; lastKnownFileType = text.script.perl; name = PassParser.pm; path = lib/Tests/PassParser.pm; sourceTree = "<group>"; };
B179B9361EC676A1006FA542 /* PASSAnnotator.pl */ = {isa = PBXFileReference; lastKnownFileType = text.script.perl; path = PASSAnnotator.pl; sourceTree = "<group>"; };
B1BE38C01ECDA7FF0017F94D /* Bed.pm */ = {isa = PBXFileReference; lastKnownFileType = text.script.perl; name = Bed.pm; path = lib/Record/Bed.pm; sourceTree = "<group>"; };
B1BE38C11ECDA7FF0017F94D /* Pass.pm */ = {isa = PBXFileReference; lastKnownFileType = text.script.perl; name = Pass.pm; path = lib/Record/Pass.pm; sourceTree = "<group>"; };
Expand Down Expand Up @@ -38,6 +39,7 @@
children = (
B1BE38C71ECDC2B20017F94D /* BedTree.pm */,
B1BE38C81ECDC5380017F94D /* BedParser.pm */,
B16A7F0C1ECF1D9E004F1B12 /* PassParser.pm */,
);
name = Tests;
sourceTree = "<group>";
Expand Down
5 changes: 4 additions & 1 deletion lib/Record/Bed.pm
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,9 @@ sub closest
}
elsif ($cond_hit eq 'intron')
{
$feature = 'intron';
$feature = '5pUTR_intron' if ($base < $self->{thickStart});
$feature = 'intron' if (($self->{thickStart} <= $base) && ($base <= $self->{thickEnd}));
$feature = '3pUTR_intron' if ($self->{thickEnd} < $base);
}
elsif ($cond_hit eq 'intergenic')
{
Expand All @@ -201,4 +203,5 @@ sub closest




1; # return true
7 changes: 5 additions & 2 deletions lib/Record/Pass.pm
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ sub new
my $self = {};
bless ($self, $class);

# split bed line

# parse line
my @pass_vals = split("\t", $pass_line, 15);

# properties
Expand All @@ -30,10 +31,12 @@ sub new
$self->{tailsSumCoverage} = $pass_vals[12];
$self->{tailsMaxCoverage} = $pass_vals[13];
$self->{tailsBestBase} = $pass_vals[14];

return $self;
}



sub ispoly
{
my $self = shift;
Expand Down
2 changes: 1 addition & 1 deletion lib/Tests/BedParser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ sub parse
# process bed hit
my ($hit, $feature) = $bed->closest($randomBase);


#print $bed->{name},"\t",$feature,"\n";


$count++;
Expand Down
92 changes: 92 additions & 0 deletions lib/Tests/LineSplit.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@

my $line = 'chr1 388172 401149 NM_001099460;Vom2r3 0 + 388172 401149 0 6 206,283,804,225,124,884, 0,2845,3556,5212,10804,12093,';


# ... or use subroutine references.
=head
cmpthese(1000000, {
'Split' => '&parseLineOld($line)',
'Substr' => '&parseLineNew($line)',
});
=cut
my $res = mysplit($line, "\t", 12);
my $ex = mysplit($res->[-1], ",",$res->[9]);
print join(";", @{$res}),"\n";
print join(";", @{$ex}),"\n";
sub parseLineOld()
{
my $line = 'chr1 388172 401149 NM_001099460;Vom2r3 0 + 388172 401149 0 6 206,283,804,225,124,884, 0,2845,3556,5212,10804,12093,';
my @res = split('\t', $line, 12);
return \@res;
}
sub parseLineNew()
{
my $line = 'chr1 388172 401149 NM_001099460;Vom2r3 0 + 388172 401149 0 6 206,283,804,225,124,884, 0,2845,3556,5212,10804,12093,';
my @res = (0) x 12;
my $pos_prev = -1;
my $pos_next = 0;
my $line_siz = length($line);
my $i = 0;
while($pos_next < $line_siz)
{
$pos_next = index($line,"\t", $pos_prev + 1);
$pos_next = $line_siz if($pos_next == -1);
my $txt = substr($line, $pos_prev + 1, $pos_next - $pos_prev - 1);
$res[$i] = $txt;
#print $i,"\t",$txt,"\n";
$i++;
$pos_prev = $pos_next;
}
#print $i,"\t",join(";",@res),"\n";
return \@res;
}
sub mysplit($$$)
{
my $line = $_[0];
my $delim = $_[1];
my $count = $_[2];
# allocate result array
my @vals = (0) x $count;
my $char_prev = -1;
my $char_next = -1;
my $line_siz = length($line);
my $idx = 0;
while ($char_next < $line_siz)
{
# find delimiter
$char_next = index($line, $delim, $char_prev + 1);
$char_next = $line_siz if($char_next == - 1);
# splice line
my $str = substr($line, $char_prev + 1, $char_next - $char_prev - 1);
# fill array
$vals[$idx] = $str;
# update counters
$idx++;
$char_prev = $char_next;
}
return \@vals;
}
#print $line,"\n",index($line,"\t"),"\n";
#print $cnt,"\n";
102 changes: 102 additions & 0 deletions lib/Tests/PassParser.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package PassParser;

use warnings;
use strict;
use IO::Zlib;
use Bio::DB::HTS::Tabix;
use Benchmark;

use lib "$FindBin::Bin/lib";
use Record::Pass;

# constructor
sub new
{
my $class = shift;
my $filename = shift;
my $fileann = shift;

my $self = {};
bless ($self, $class);

# open file handle
my $fileHandle = ();
if ($filename =~ m/\.gz$/)
{
$fileHandle = IO::Zlib->new($filename, "rb");
if (!defined($fileHandle))
{
$fileHandle->close();
die $!;
}

}
else
{
open($fileHandle, "<", $filename) or die $!;
}

# open tabix index of annotation
$self->{annotation} = Bio::DB::HTS::Tabix->new(filename => $fileann);
$self->{handle} = $fileHandle;

return $self;
}

# destructor
sub DESTROY {
my $self = shift;
$self->{handle}->close() if($self->{handle});
$self->{annotation}->close() if($self->{annotation});
}

# read
sub read
{
my $self = shift;

my $line = readline($self->{handle});

return $line;
}

# parse
sub parse
{
my $self = shift;
my $count = 0;

my $t0 = Benchmark->new();

while(my $line = $self->read())
{
chomp($line);

# create pass object
my $pass = Pass->new($line);

# count no tail
if ($pass->ispoly() == 0)
{
$notail += $pass->{readsSumCoverage};
}
elsif ($pass->ischrom('chrM'))
{
$mito += $pass->{readsSumCoverage};
}


print $pass->{name},"\n";
last;


$count++;
}
my $t1 = Benchmark->new();
my $td = timediff($t1, $t0);
print "benchmark :: ",timestr($td),"\n";
print $count,"\n";

}

1; # return true value

0 comments on commit a4b27e3

Please sign in to comment.