#!/usr/bin/perl use warnings; use strict; use Bio::DB::HTS::Tabix; sub parseChromSizes($); MAIN: { my $file_annotation = shift; my $file_clusters = shift; my $file_chromSizes = shift; my $window_fp = 5000; my $window_tp = 20000; # parse chrom sizes my $chromSizes = parseChromSizes($file_chromSizes); my $obj = Bio::DB::HTS::Tabix->new( filename => $file_clusters); my $total = 0; my %test_hash = (); open(my $fh, "<", $file_annotation) or die $!; while(<$fh>) { # remove new line chomp($_); # split bed line my @bed_line = split("\t", $_, 12); # extract clusters my $regionLeft = ($bed_line[5] eq "+") ? ($bed_line[1] - $window_fp) : ($bed_line[1] - $window_tp); $regionLeft = 0 if($regionLeft < 0); my $regionRight = ($bed_line[5] eq "+") ? ($bed_line[2] + $window_tp) : ($bed_line[2] + $window_fp); $regionLeft = $chromSizes->{$bed_line[0]} if($regionLeft > $chromSizes->{$bed_line[0]}); my $region = $bed_line[0] . ":" . $regionLeft . "-" . $regionRight; my $value = 0; my $iter = $obj->query($region); while (my $pass = $iter->next) { my @line = split("\t", $pass); ### check for strand and tail containing pass if(($line[5] eq $bed_line[5]) && ($line[11] > 0)) { if (~exists($test_hash{$line[3]})) { $value += $line[8]; $test_hash{$line[3]}++; } } } print $bed_line[3],"\t",$value,"\t",$region,"\n"; $total+= $value; } close($fh); $obj->close; print $total,"\n"; my $test = 0; foreach my $key (keys %test_hash) { $test++ if($test_hash{$key} > 1); } print $test,"\t",scalar(keys %test_hash),"\n" } sub parseChromSizes($) { my $file_chromSizes = $_[0]; my %hash_chromSizes = (); open(my $fh, "<", $file_chromSizes) or die $!; while(<$fh>) { chomp($_); my ($chrom, $span) = split("\t", $_, 2); $hash_chromSizes{$chrom} = $span; } close($fh); return \%hash_chromSizes; }