Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#!/usr/bin/perl
use warnings;
use strict;
use Getopt::Long();
use File::Basename;
use List::Util qw(max);
use POSIX;
use BedLine;
sub usage($);
sub GetBedLine($$);
sub BedToTracks($$$$);
sub updateAccumulator($$$$$);
sub writeOutTrack($$$);
MAIN:
{
my $query;
my $file_bed;
my @files_gbed;
my $reps = 1;
my $help;
my $version = "Version 1.0";
Getopt::Long::GetOptions(
"q|query=s" => \$query,
"b|bed=s" => \$file_bed,
"g|gbed=s{1,}" => \@files_gbed,
"r|reps=i" => \$reps,
"h|help" => \$help
) or usage("MetageneView::Error invalid comand line option\n");
usage("MetageneView $version\n") if(defined($help));
usage("MetageneView::Error annotation file is required\n") unless(defined($file_bed));
usage("MetageneView::Error alignment file is required\n") unless(@files_gbed);
usage("MetageneView::Error query gene symbol is required\n") unless(defined($query));
# parse query bed
my $bed = GetBedLine($file_bed, $query);
my $strand = $bed->strand;
print "browser position " . $bed->chrom . ":" . ($bed->chromStart - 100) . "-" . ($bed->chromStart + $bed->geneSpan + 100) . "\n";
print "browser hide all\n";
print "track type=bed name=RefSeq description=\"linear transcript, strand $strand\" visibility=full color=0,0,0\n";
print $bed->chrom,"\t",
$bed->chromStart,"\t",
($bed->chromStart+$bed->geneSpan),"\t",
$bed->name,"\t",
$bed->score,"\t",
$bed->strand,"\t",
($bed->chromStart + $bed->cdsStart),"\t",
($bed->chromStart + $bed->cdsEnd),"\n";
# read tracks
my %tracks = ();
BedToTracks(\%tracks, \@files_gbed, $reps, $bed);
# find the max
my $maxDepth = 0;
foreach my $group (sort keys %tracks)
{
my $maxPerTrack = ceil(max(@{$tracks{$group}}));
$maxDepth = ($maxDepth <= $maxPerTrack) ? $maxPerTrack : $maxDepth;
}
# print out tracks
my @colors = ("30,144,255",
"148,0,211",
"112,128,144",
"34,139,34",
"255,69,0",
"220,20,60");
my $c = 0;
foreach my $group (sort keys %tracks)
{
print "track type=bedGraph name=$group visibility=full color=$colors[$c] viewLimits=0:$maxDepth autoScale=off\n";
writeOutTrack($tracks{$group}, $bed->chrom, $bed->chromStart);
$c++;
$c = 0 if ($c >= $#colors);
}
}
sub BedToTracks($$$$)
{
my $tracks = $_[0];
my $files_gbed = $_[1];
my $reps = $_[2];
my $bed = $_[3];
my $query = $bed->chrom . ":" . $bed->chromStart . "-" . $bed->chromEnd;
my $k = 0;
my @k_track = (0) x $bed->geneSpan;
my $r = 0;
foreach my $file (@{$files_gbed})
{
my $name = fileparse($file, ".gbed.gz");
my $group = ($name =~ m/^([_A-Za-z0-9]+)/) ? $1 : "unknown";
$group =~ s/[_0-9]+$//g;
$r++;
open (my $fh, "tabix $file $query|") or die $!;
while (<$fh>)
{
chomp($_);
my ($chrom, $regStart, $regEnd, $depth) = split("\t", $_, 4);
my $overlap = $bed->find($regStart, $regEnd);
foreach (@{$overlap})
{
updateAccumulator(\@k_track, $_->[0], $_->[1], $depth, (1/$r));
}
}
close($fh);
# rewind replica
if(($r % $reps) == 0)
{
$r = 0;
my @c_track = @k_track;
$tracks->{$group} = \@c_track;
@k_track = (0) x $bed->geneSpan;
#last;
}
}
}
sub GetBedLine($$)
{
my $file_bed = $_[0];
my $query = $_[1];
my $fh;
if ($file_bed =~ m/\.gz$/)
{
open($fh, "gunzip -c $file_bed|grep \"$query\"|") or die $!;
}
else
{
open($fh, "grep \"$query\" $file_bed|") or die $!;
}
my $bedObj = BedLine->new();
while (<$fh>)
{
chomp($_);
$bedObj->parse($_);
last;
}
close($fh);
if (!defined($bedObj->name))
{
die("MetageneView::Error query symbol $query is not annotated.\n");
}
return $bedObj;
}
sub usage($)
{
my $message = $_[0];
if (defined $message && length $message)
{
$message .= "\n" unless($message =~ /\n$/);
}
my $command = $0;
$command =~ s#^.*/##;
print STDERR (
$message,
"usage: $command --bed annotation.bed -bams runA.bam runB.bam --query Gene\n" .
"description: MetageneView creates a BedGraph track file with linear gene representation (no introns).\n" .
"parameters:\n" .
"-q|--query\n" .
"\tquery gene name\n" .
"-r|--reps\n" .
"\tnumber of replica, to accumulate coverage in consecutive files\n" .
"-b|--bed\n" .
"\tvalid annotation file in BED12 format\n" .
"-g|--gbed\n" .
"\tsingle or a space-separated list of GraphBed files.\n" .
"\tThe GraphBed files need to be block compressed with BGZIP.\n" .
"\tThe script uses Tabix to query the coverage files.\n" .
"-help\n" .
"\tdefine usage\n"
);
die("\n");
}
sub updateAccumulator($$$$$)
{
my $track = $_[0];
my $idxStart = $_[1];
my $idxEnd = $_[2];
my $value = $_[3];
my $scale = $_[4];
for (my $k = $idxStart; $k < $idxEnd; $k++)
{
$track->[$k] = $track->[$k] + $scale * ($value - $track->[$k]);
}
}
sub writeOutTrack($$$)
{
my $track = $_[0];
my $chrom = $_[1];
my $offset = $_[2];
my $lastStart = -1;
my $lastDepth = -1;
my $trackLength = scalar(@{$track});
for(my $k = 0; $k < $trackLength; $k++)
{
my $depth = $track->[$k];
if ($depth != $lastDepth)
{
if (($lastDepth != -1) && ($lastDepth > 0))
{
print $chrom, "\t",
($lastStart + $offset), "\t",
($k + 1 + $offset), "\t",
sprintf("%.5f",$lastDepth),"\n";
}
$lastDepth = $depth;
$lastStart = $k;
}
}
print $chrom, "\t",
($lastStart + $offset), "\t",
($trackLength + $offset), "\t",
sprintf("%.5f",$lastDepth),"\n";
}