Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
MetaGeneView/MetaGeneView.pl
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
255 lines (208 sloc)
6.25 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use warnings; | |
use strict; | |
use Getopt::Long(); | |
use File::Basename; | |
use List::Util qw(max); | |
use POSIX; | |
use BedLine; | |
sub usage($); | |
sub GetBedLine($$); | |
sub BedToTracks($$$$); | |
sub updateAccumulator($$$$$); | |
sub writeOutTrack($$$); | |
MAIN: | |
{ | |
my $query; | |
my $file_bed; | |
my @files_gbed; | |
my $reps = 1; | |
my $help; | |
my $version = "Version 1.0"; | |
Getopt::Long::GetOptions( | |
"q|query=s" => \$query, | |
"b|bed=s" => \$file_bed, | |
"g|gbed=s{1,}" => \@files_gbed, | |
"r|reps=i" => \$reps, | |
"h|help" => \$help | |
) or usage("MetageneView::Error invalid comand line option\n"); | |
usage("MetageneView $version\n") if(defined($help)); | |
usage("MetageneView::Error annotation file is required\n") unless(defined($file_bed)); | |
usage("MetageneView::Error alignment file is required\n") unless(@files_gbed); | |
usage("MetageneView::Error query gene symbol is required\n") unless(defined($query)); | |
# parse query bed | |
my $bed = GetBedLine($file_bed, $query); | |
my $strand = $bed->strand; | |
print "browser position " . $bed->chrom . ":" . ($bed->chromStart - 100) . "-" . ($bed->chromStart + $bed->geneSpan + 100) . "\n"; | |
print "browser hide all\n"; | |
print "track type=bed name=RefSeq description=\"linear transcript, strand $strand\" visibility=full color=0,0,0\n"; | |
print $bed->chrom,"\t", | |
$bed->chromStart,"\t", | |
($bed->chromStart+$bed->geneSpan),"\t", | |
$bed->name,"\t", | |
$bed->score,"\t", | |
$bed->strand,"\t", | |
($bed->chromStart + $bed->cdsStart),"\t", | |
($bed->chromStart + $bed->cdsEnd),"\n"; | |
# read tracks | |
my %tracks = (); | |
BedToTracks(\%tracks, \@files_gbed, $reps, $bed); | |
# find the max | |
my $maxDepth = 0; | |
foreach my $group (sort keys %tracks) | |
{ | |
my $maxPerTrack = ceil(max(@{$tracks{$group}})); | |
$maxDepth = ($maxDepth <= $maxPerTrack) ? $maxPerTrack : $maxDepth; | |
} | |
# print out tracks | |
my @colors = ("30,144,255", | |
"148,0,211", | |
"112,128,144", | |
"34,139,34", | |
"255,69,0", | |
"220,20,60"); | |
my $c = 0; | |
foreach my $group (sort keys %tracks) | |
{ | |
print "track type=bedGraph name=$group visibility=full color=$colors[$c] viewLimits=0:$maxDepth autoScale=off\n"; | |
writeOutTrack($tracks{$group}, $bed->chrom, $bed->chromStart); | |
$c++; | |
$c = 0 if ($c >= $#colors); | |
} | |
} | |
sub BedToTracks($$$$) | |
{ | |
my $tracks = $_[0]; | |
my $files_gbed = $_[1]; | |
my $reps = $_[2]; | |
my $bed = $_[3]; | |
my $query = $bed->chrom . ":" . $bed->chromStart . "-" . $bed->chromEnd; | |
my $k = 0; | |
my @k_track = (0) x $bed->geneSpan; | |
my $r = 0; | |
foreach my $file (@{$files_gbed}) | |
{ | |
my $name = fileparse($file, ".gbed.gz"); | |
my $group = ($name =~ m/^([_A-Za-z0-9]+)/) ? $1 : "unknown"; | |
$group =~ s/[_0-9]+$//g; | |
$r++; | |
open (my $fh, "tabix $file $query|") or die $!; | |
while (<$fh>) | |
{ | |
chomp($_); | |
my ($chrom, $regStart, $regEnd, $depth) = split("\t", $_, 4); | |
my $overlap = $bed->find($regStart, $regEnd); | |
foreach (@{$overlap}) | |
{ | |
updateAccumulator(\@k_track, $_->[0], $_->[1], $depth, (1/$r)); | |
} | |
} | |
close($fh); | |
# rewind replica | |
if(($r % $reps) == 0) | |
{ | |
$r = 0; | |
my @c_track = @k_track; | |
$tracks->{$group} = \@c_track; | |
@k_track = (0) x $bed->geneSpan; | |
#last; | |
} | |
} | |
} | |
sub GetBedLine($$) | |
{ | |
my $file_bed = $_[0]; | |
my $query = $_[1]; | |
my $fh; | |
if ($file_bed =~ m/\.gz$/) | |
{ | |
open($fh, "gunzip -c $file_bed|grep \"$query\"|") or die $!; | |
} | |
else | |
{ | |
open($fh, "grep \"$query\" $file_bed|") or die $!; | |
} | |
my $bedObj = BedLine->new(); | |
while (<$fh>) | |
{ | |
chomp($_); | |
$bedObj->parse($_); | |
last; | |
} | |
close($fh); | |
if (!defined($bedObj->name)) | |
{ | |
die("MetageneView::Error query symbol $query is not annotated.\n"); | |
} | |
return $bedObj; | |
} | |
sub usage($) | |
{ | |
my $message = $_[0]; | |
if (defined $message && length $message) | |
{ | |
$message .= "\n" unless($message =~ /\n$/); | |
} | |
my $command = $0; | |
$command =~ s#^.*/##; | |
print STDERR ( | |
$message, | |
"usage: $command --bed annotation.bed -bams runA.bam runB.bam --query Gene\n" . | |
"description: MetageneView creates a BedGraph track file with linear gene representation (no introns).\n" . | |
"parameters:\n" . | |
"-q|--query\n" . | |
"\tquery gene name\n" . | |
"-r|--reps\n" . | |
"\tnumber of replica, to accumulate coverage in consecutive files\n" . | |
"-b|--bed\n" . | |
"\tvalid annotation file in BED12 format\n" . | |
"-g|--gbed\n" . | |
"\tsingle or a space-separated list of GraphBed files.\n" . | |
"\tThe GraphBed files need to be block compressed with BGZIP.\n" . | |
"\tThe script uses Tabix to query the coverage files.\n" . | |
"-help\n" . | |
"\tdefine usage\n" | |
); | |
die("\n"); | |
} | |
sub updateAccumulator($$$$$) | |
{ | |
my $track = $_[0]; | |
my $idxStart = $_[1]; | |
my $idxEnd = $_[2]; | |
my $value = $_[3]; | |
my $scale = $_[4]; | |
for (my $k = $idxStart; $k < $idxEnd; $k++) | |
{ | |
$track->[$k] = $track->[$k] + $scale * ($value - $track->[$k]); | |
} | |
} | |
sub writeOutTrack($$$) | |
{ | |
my $track = $_[0]; | |
my $chrom = $_[1]; | |
my $offset = $_[2]; | |
my $lastStart = -1; | |
my $lastDepth = -1; | |
my $trackLength = scalar(@{$track}); | |
for(my $k = 0; $k < $trackLength; $k++) | |
{ | |
my $depth = $track->[$k]; | |
if ($depth != $lastDepth) | |
{ | |
if (($lastDepth != -1) && ($lastDepth > 0)) | |
{ | |
print $chrom, "\t", | |
($lastStart + $offset), "\t", | |
($k + 1 + $offset), "\t", | |
sprintf("%.5f",$lastDepth),"\n"; | |
} | |
$lastDepth = $depth; | |
$lastStart = $k; | |
} | |
} | |
print $chrom, "\t", | |
($lastStart + $offset), "\t", | |
($trackLength + $offset), "\t", | |
sprintf("%.5f",$lastDepth),"\n"; | |
} |