This repository has been archived by the owner. It is now read-only.
Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
ReptilePallium/Extension_3UTR/scripts/BED12Split.pl
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
executable file
108 lines (85 sloc)
3.66 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# BED12Split | |
# | |
# split BED12 file intoBED6 file | |
# assign feature label after the name field | |
# | |
# Input | |
# $1 BED12 annotation file | |
# | |
# Version 1.0 | |
# Date Nov 2016 | |
# Georgi Tushev | |
# Scientific Computing Facility | |
# Max-Planck Institute for Brain Research | |
# send bug reports to sciclist@brain.mpg.de | |
# | |
use warnings; | |
use strict; | |
my $bed_file = shift; | |
open(my $fh, "<", $bed_file) or die $!; | |
while (<$fh>) | |
{ | |
chomp($_); | |
my ($chrom, $chrom_start, $chrom_end, $name, $score, $strand, $thick_start, $thick_end, $rgb, $blocks, $block_sizes, $block_starts) = split("\t", $_, 12); | |
my @blockSizes = split(",", $block_sizes); | |
my @blockStarts = split(",", $block_starts); | |
my $left_utr = ($strand eq "+") ? "5pUTR" : "3pUTR"; | |
my $right_utr = ($strand eq "+") ? "3pUTR" : "5pUTR"; | |
my $exon_end_last = 0; | |
for (my $e = 0; $e < $blocks; $e++) | |
{ | |
my $exon_start = $chrom_start + $blockStarts[$e]; | |
my $exon_end = $exon_start + $blockSizes[$e]; | |
my $exon_id = ($strand eq "-") ? ($e + 1) : ($blocks - $e); | |
# left UTR only | |
if ($exon_end < $thick_start) | |
{ | |
print $chrom,"\t",$exon_start,"\t",$exon_end,"\t",$name,";",$left_utr,"\t",$exon_id,"\t",$strand,"\n"; | |
} | |
# left UTR in exon | |
if (($exon_start <= $thick_start) && ($thick_start <= $exon_end) && ($exon_end < $thick_end)) | |
{ | |
print $chrom,"\t",$exon_start,"\t",$thick_start,"\t",$name,";",$left_utr,"\t",$exon_id,"\t",$strand,"\n" if($exon_start < $thick_start); | |
print $chrom,"\t",$thick_start,"\t",$exon_end,"\t",$name,";","CDS","\t",$exon_id,"\t",$strand,"\n" if($thick_start < $exon_end); | |
} | |
# CDS | |
if (($thick_start < $exon_start) && ($exon_end < $thick_end)) | |
{ | |
print $chrom,"\t",$exon_start,"\t",$exon_end,"\t",$name,";","CDS","\t",$exon_id,"\t",$strand,"\n"; | |
} | |
# right UTR in exon | |
if (($exon_start <= $thick_end) && ($thick_end <= $exon_end) && ($thick_start < $exon_start)) | |
{ | |
print $chrom,"\t",$exon_start,"\t",$thick_end,"\t",$name,";","CDS","\t",$exon_id,"\t",$strand,"\n" if($exon_start < $thick_end); | |
print $chrom,"\t",$thick_end,"\t",$exon_end,"\t",$name,";",$right_utr,"\t",$exon_id,"\t",$strand,"\n" if($thick_end < $exon_end); | |
} | |
# right UTR only | |
if ($exon_start > $thick_end) | |
{ | |
print $chrom,"\t",$exon_start,"\t",$exon_end,"\t",$name,";",$right_utr,"\t",$exon_id,"\t",$strand,"\n"; | |
} | |
# both UTRs in exon | |
if (($exon_start <= $thick_start) && ($thick_end <= $exon_end) && ($thick_start < $thick_end)) | |
{ | |
print $chrom,"\t",$exon_start,"\t",$thick_start,"\t",$name,";",$left_utr,"\t",$exon_id,"\t",$strand,"\n"; | |
print $chrom,"\t",$thick_start,"\t",$thick_end,"\t",$name,";","CDS","\t",$exon_id,"\t",$strand,"\n"; | |
print $chrom,"\t",$thick_end,"\t",$exon_end,"\t",$name,";",$right_utr,"\t",$exon_id,"\t",$strand,"\n"; | |
} | |
# exon only | |
if ($thick_start == $thick_end) | |
{ | |
print $chrom,"\t",$exon_start,"\t",$exon_end,"\t",$name,";","CDS","\t",$exon_id,"\t",$strand,"\n"; | |
} | |
# intron | |
if ($e > 0) | |
{ | |
my $intron_start = $exon_end_last; | |
my $intron_end = $exon_start; | |
print $chrom,"\t",$intron_start,"\t",$intron_end,"\t",$name,";","intron","\t",($exon_id-1),"\t",$strand,"\n" if($intron_start < $intron_end); | |
} | |
# update last exon | |
$exon_end_last = $exon_end; | |
} | |
} | |
close($fh); | |