This repository has been archived by the owner. It is now read-only.
Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
ReptilePallium/Extension_3UTR/scripts/PASSCluster.pl
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
186 lines (155 sloc)
4.18 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# PASSCluster | |
# parse result from PASSFinder | |
# cluster Poly A supported bases in 3'UTR sites | |
# | |
# Input | |
# -gbed : graphBed file of last base coverage | |
# -window_next : sliding window to define next cluster | |
# -window_best : sliding window to split best base inside cluster | |
# -sort : use unix sort | |
# -help : prints a help message | |
# | |
# Output | |
# BED file with 9 columns defining cluster size, | |
# PASS position and cluster expression | |
# | |
# Version 3.0 | |
# Date Nov 2016 | |
# Georgi Tushev | |
# Scientific Computing Facility | |
# Max-Planck Institute for Brain Research | |
# send bug reports to sciclist@brain.mpg.de | |
# | |
use warnings; | |
use strict; | |
use Getopt::Long(); | |
use PASSRecord; | |
use PASSGroup; | |
sub usage($); | |
sub cluster($$$$$$$); | |
sub test($); | |
# main body | |
MAIN:{ | |
# define input | |
my $gbed_file; | |
my $window_next = 0; | |
my $window_best = 100; | |
my $sort; | |
my $help; | |
# create objects | |
my $base = PASSRecord->new(); | |
my $prevpos_base = PASSRecord->new(); | |
my $prevpos_tail = PASSRecord->new(); | |
my $prevneg_base = PASSRecord->new(); | |
my $prevneg_tail = PASSRecord->new(); | |
my $group_pos = PASSGroup->new(); | |
my $group_neg = PASSGroup->new(); | |
# counters | |
my $lines = 0; | |
my $idx_group_pos = 0; | |
my $idx_group_neg = 0; | |
# set-up input parameters | |
Getopt::Long::GetOptions( | |
"gbed=s" => \$gbed_file, | |
"window_next=i" => \$window_next, | |
"window_best=i" => \$window_best, | |
"sort" => \$sort, | |
"help" => \$help, | |
) or usage("Error :: invalid command line options."); | |
# default help output | |
usage("version 1.0, Oct 2016") if($help); | |
# check if gbed file is provided | |
usage("Error :: gz compressed GBed file is required") unless defined($gbed_file); | |
# open stream | |
my $fh; | |
if ($sort) | |
{ | |
open ($fh, "gunzip -c $gbed_file |sort -k1,1 -k3,3 -k2,2n |") or die $!; | |
} | |
else | |
{ | |
open ($fh, "gunzip -c $gbed_file |") or die $!; | |
} | |
# parse stream | |
while (<$fh>) | |
{ | |
# read current record | |
chomp($_); | |
$base->parse($_); | |
$lines++; | |
if ($base->isreverse()) | |
{ | |
cluster($base, $group_neg, $prevneg_base, $prevneg_tail, \$idx_group_neg, $window_next, $window_best); | |
} | |
else | |
{ | |
cluster($base, $group_pos, $prevpos_base, $prevpos_tail, \$idx_group_pos, $window_next, $window_best); | |
} | |
} | |
# close stream | |
close($fh); | |
$group_pos->report(); | |
$group_neg->report(); | |
} | |
### --- Subroutines --- ### | |
# cluster :: cluster sorted base | |
sub cluster($$$$$$$) | |
{ | |
my $base = $_[0]; | |
my $group = $_[1]; | |
my $prev_base = $_[2]; | |
my $prev_tail = $_[3]; | |
my $idx_group_ref = $_[4]; | |
my $window_next = $_[5]; | |
my $window_best = $_[6]; | |
# group logic | |
if (($base->isoverlap($prev_base, $window_next) == 0) || (($group->hastail() == 1) && ($base->isoverlap($prev_tail, $window_best) == 0))) | |
{ | |
# report old group | |
$group->report(); | |
# reset new group | |
${$idx_group_ref}++; | |
$group->create($base, ${$idx_group_ref}); | |
} | |
else | |
{ | |
# extend group | |
$group->extend($base); | |
} | |
# cluster on reads | |
$prev_base->update($base); | |
# cluster on tails | |
if ($base->ispass() == 1) | |
{ | |
$prev_tail->update($base); | |
} | |
} | |
# usage :: define user arguments | |
sub usage($) | |
{ | |
my $message = $_[0]; | |
if (defined $message && length $message) | |
{ | |
$message .= "\n" unless($message =~ /\n$/); | |
} | |
my $command = $0; | |
$command =~ s#^.*/##; | |
print STDERR ( | |
$message, | |
"usage: $command -gbed graphBed_file.gbed.gz\n" . | |
"description: cluster last base coverage into PASS\n" . | |
"parameters:\n" . | |
"-gbed\n" . | |
"\tGZIP compressed GraphBed file as a result from PASSFinder\n" . | |
"-window_next\n" . | |
"\tsliding window defining a PASS cluster [default is 1]\n" . | |
"-window_best\n" . | |
"\tsliding window for granularity of cluster [default is 100]\n" . | |
"-sort\n" . | |
"\t apply unix sort -k1,1 -k3,3 -k2,2n [default is 0]\n" . | |
"-help\n" . | |
"\tdefine usage\n" | |
); | |
die("\n"); | |
} | |