#!/usr/bin/perl


use warnings;
use strict;
use Bio::DB::HTS::Tabix;

sub parseChromSizes($);

MAIN:
{
    my $file_annotation = shift;
    my $file_clusters = shift;
    my $file_chromSizes = shift;
    
    my $window_fp = 5000;
    my $window_tp = 20000;
    
    # parse chrom sizes
    my $chromSizes = parseChromSizes($file_chromSizes);
    
    
    my $obj = Bio::DB::HTS::Tabix->new( filename => $file_clusters);
    my $total = 0;
    
    
    my %test_hash = ();
    open(my $fh, "<", $file_annotation) or die $!;
    while(<$fh>)
    {
        # remove new line
        chomp($_);
        
        # split bed line
        my @bed_line = split("\t", $_, 12);
        
        # extract clusters
        
        my $regionLeft = ($bed_line[5] eq "+") ? ($bed_line[1] - $window_fp) : ($bed_line[1] - $window_tp);
        $regionLeft = 0 if($regionLeft < 0);
        
        my $regionRight = ($bed_line[5] eq "+") ? ($bed_line[2] + $window_tp) : ($bed_line[2] + $window_fp);
        $regionLeft = $chromSizes->{$bed_line[0]} if($regionLeft > $chromSizes->{$bed_line[0]});
        
        
        my $region = $bed_line[0] . ":" . $regionLeft . "-" . $regionRight;
        my $value = 0;
        

        my $iter = $obj->query($region);
        while (my $pass = $iter->next)
        {
            my @line = split("\t", $pass);
            
            ### check for strand and tail containing pass
            if(($line[5] eq $bed_line[5]) && ($line[11] > 0))
            {
                if (~exists($test_hash{$line[3]}))
                {
                    $value += $line[8];
                    $test_hash{$line[3]}++;
                }
            }
            
        }

        print $bed_line[3],"\t",$value,"\t",$region,"\n";
        $total+= $value;
        
    }
    close($fh);
    $obj->close;
    
    print $total,"\n";
    
    my $test = 0;
    foreach my $key (keys %test_hash)
    {
        $test++ if($test_hash{$key} > 1);
    }
    print $test,"\t",scalar(keys %test_hash),"\n"
}


sub parseChromSizes($)
{
    my $file_chromSizes = $_[0];
    my %hash_chromSizes = ();
    
    open(my $fh, "<", $file_chromSizes) or die $!;
    while(<$fh>)
    {
        chomp($_);
        my ($chrom, $span) = split("\t", $_, 2);
        $hash_chromSizes{$chrom} = $span;
    }
    close($fh);
    return \%hash_chromSizes;
}