Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
5059372a14
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
186 lines (155 sloc) 4.18 KB
#!/usr/bin/perl
# PASSCluster
# parse result from PASSFinder
# cluster Poly A supported bases in 3'UTR sites
#
# Input
# -gbed : graphBed file of last base coverage
# -window_next : sliding window to define next cluster
# -window_best : sliding window to split best base inside cluster
# -sort : use unix sort
# -help : prints a help message
#
# Output
# BED file with 9 columns defining cluster size,
# PASS position and cluster expression
#
# Version 3.0
# Date Nov 2016
# Georgi Tushev
# Scientific Computing Facility
# Max-Planck Institute for Brain Research
# send bug reports to sciclist@brain.mpg.de
#
use warnings;
use strict;
use Getopt::Long();
use PASSRecord;
use PASSGroup;
sub usage($);
sub cluster($$$$$$$);
sub test($);
# main body
MAIN:{
# define input
my $gbed_file;
my $window_next = 0;
my $window_best = 100;
my $sort;
my $help;
# create objects
my $base = PASSRecord->new();
my $prevpos_base = PASSRecord->new();
my $prevpos_tail = PASSRecord->new();
my $prevneg_base = PASSRecord->new();
my $prevneg_tail = PASSRecord->new();
my $group_pos = PASSGroup->new();
my $group_neg = PASSGroup->new();
# counters
my $lines = 0;
my $idx_group_pos = 0;
my $idx_group_neg = 0;
# set-up input parameters
Getopt::Long::GetOptions(
"gbed=s" => \$gbed_file,
"window_next=i" => \$window_next,
"window_best=i" => \$window_best,
"sort" => \$sort,
"help" => \$help,
) or usage("Error :: invalid command line options.");
# default help output
usage("version 1.0, Oct 2016") if($help);
# check if gbed file is provided
usage("Error :: gz compressed GBed file is required") unless defined($gbed_file);
# open stream
my $fh;
if ($sort)
{
open ($fh, "gunzip -c $gbed_file |sort -k1,1 -k3,3 -k2,2n |") or die $!;
}
else
{
open ($fh, "gunzip -c $gbed_file |") or die $!;
}
# parse stream
while (<$fh>)
{
# read current record
chomp($_);
$base->parse($_);
$lines++;
if ($base->isreverse())
{
cluster($base, $group_neg, $prevneg_base, $prevneg_tail, \$idx_group_neg, $window_next, $window_best);
}
else
{
cluster($base, $group_pos, $prevpos_base, $prevpos_tail, \$idx_group_pos, $window_next, $window_best);
}
}
# close stream
close($fh);
$group_pos->report();
$group_neg->report();
}
### --- Subroutines --- ###
# cluster :: cluster sorted base
sub cluster($$$$$$$)
{
my $base = $_[0];
my $group = $_[1];
my $prev_base = $_[2];
my $prev_tail = $_[3];
my $idx_group_ref = $_[4];
my $window_next = $_[5];
my $window_best = $_[6];
# group logic
if (($base->isoverlap($prev_base, $window_next) == 0) || (($group->hastail() == 1) && ($base->isoverlap($prev_tail, $window_best) == 0)))
{
# report old group
$group->report();
# reset new group
${$idx_group_ref}++;
$group->create($base, ${$idx_group_ref});
}
else
{
# extend group
$group->extend($base);
}
# cluster on reads
$prev_base->update($base);
# cluster on tails
if ($base->ispass() == 1)
{
$prev_tail->update($base);
}
}
# usage :: define user arguments
sub usage($)
{
my $message = $_[0];
if (defined $message && length $message)
{
$message .= "\n" unless($message =~ /\n$/);
}
my $command = $0;
$command =~ s#^.*/##;
print STDERR (
$message,
"usage: $command -gbed graphBed_file.gbed.gz\n" .
"description: cluster last base coverage into PASS\n" .
"parameters:\n" .
"-gbed\n" .
"\tGZIP compressed GraphBed file as a result from PASSFinder\n" .
"-window_next\n" .
"\tsliding window defining a PASS cluster [default is 1]\n" .
"-window_best\n" .
"\tsliding window for granularity of cluster [default is 100]\n" .
"-sort\n" .
"\t apply unix sort -k1,1 -k3,3 -k2,2n [default is 0]\n" .
"-help\n" .
"\tdefine usage\n"
);
die("\n");
}