Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
GOAnalysis/GORankedList.pl
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
231 lines (180 sloc)
5.84 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# GORankedList | |
# do GO analysis of single ranked list | |
# | |
# Input | |
# -obo : Gene Ontology terms annotation in | |
# open biological and biomedical ontology format | |
# -ann : Gene annotation to GO terms | |
# -target : target list of ranked genes (1st column tab delimited file) | |
# -threshold : p-value threshold (default 0.001) | |
# | |
# Output | |
# tab-delimited table of represented terms | |
# | |
# Version 1.0 | |
# Date Apr 2017 | |
# Georgi Tushev | |
# Scientific Computing Facility | |
# Max-Planck Institute for Brain Research | |
# send bug reports to sciclist@brain.mpg.de | |
use warnings; | |
use strict; | |
use Getopt::Long(); | |
sub usage($); | |
sub ParseGOGraph($); | |
sub ParseGOAnnotation($); | |
sub ParseRankedList($$$); | |
MAIN: | |
{ | |
# define input selection | |
my $file_obo; | |
my $file_ann; | |
my $file_tgt; | |
my $threshold = 0.01; | |
my $help; | |
# parse input parameters | |
Getopt::Long::GetOptions( | |
"obo=s" => \$file_obo, | |
"ann=s" => \$file_ann, | |
"target=s" => \$file_tgt, | |
"threshold=f" => \$threshold, | |
"help" => \$help | |
) or usage("Error :: invalid command line options"); | |
# define help output | |
usage("version 1.0, Apr 2017") if($help); | |
# parse GO Graph terms and relations | |
my $go_graph = ParseGOGraph($file_obo); | |
print STDERR "### parsed GO_Graph\n"; | |
print STDERR "terms :: ",scalar(keys %{$go_graph}),"\n"; | |
print STDERR "\n"; | |
# parse GO Annotations | |
my ($termToGene, $geneToTerm) = ParseGOAnnotation($file_ann); | |
print STDERR "### parsed GO_Annotation\n"; | |
print STDERR "terms :: ",scalar(keys %{$termToGene}),"\n"; | |
print STDERR "genes :: ",scalar(keys %{$geneToTerm}),"\n"; | |
print STDERR "\n"; | |
# parse target list | |
my $list_tgt = ParseRankedList($file_tgt, $geneToTerm, $go_graph); | |
print STDERR "### parsed Target list\n"; | |
print STDERR "genes :: ",scalar(keys %{$list_tgt}),"\n"; | |
} | |
# parse Ranked List | |
sub ParseRankedList($$$) | |
{ | |
my $file_tgt = $_[0]; | |
my $geneToTerm = $_[1]; | |
my $go_graph = $_[2]; | |
my %list_tgt = (); | |
### add live download for annotation | |
usage("Error :: Ranked list is not specified") unless defined ($file_tgt); | |
### parse file | |
open(my $fh, "<", $file_tgt) or die $!; | |
while(<$fh>) | |
{ | |
chomp($_); | |
my ($symbol, $rest) = split('\t', $_, 2); | |
$list_tgt{$symbol}++; | |
# print each gene - term pair | |
if (exists($geneToTerm->{$symbol})) | |
{ | |
foreach my $term (sort keys %{$geneToTerm->{$symbol}}) | |
{ | |
my $goid = $term; | |
if (exists($go_graph->{$term})) | |
{ | |
my $namespace = $go_graph->{$term}{"namespace"}; | |
my $name = $go_graph->{$term}{"name"}; | |
print $_,"\t",$goid,"\t",$namespace,"\t",$name,"\n"; | |
} | |
} | |
} | |
} | |
close($fh); | |
return \%list_tgt; | |
} | |
# parse GO Annotations | |
sub ParseGOAnnotation($) | |
{ | |
my $file_ann = $_[0]; | |
my %termToGene = (); | |
my %geneToTerm = (); | |
### add live download for annotation | |
usage("Error :: GO annotation file is not specified") unless defined ($file_ann); | |
# parse annotation file | |
open (my $fh, "gzcat $file_ann |") or die $!; | |
while (<$fh>) | |
{ | |
chomp($_); | |
# skip header | |
next if ($_ =~ m/^\!/); | |
# split line | |
my @line = split('\t', $_, 15); | |
$termToGene{$line[4]}{$line[2]}++; | |
$geneToTerm{$line[2]}{$line[4]}++; | |
} | |
close($fh); | |
return (\%termToGene, \%geneToTerm); | |
} | |
# parse GO Graph terms and relations from Obo file | |
sub ParseGOGraph($) | |
{ | |
my $obo_file = $_[0]; | |
my %go_graph = (); | |
### add live download for obo terms | |
usage("Error :: GO Graph OBO file is not specified") unless defined ($obo_file); | |
# parse obo file | |
local $/ = ''; | |
open (my $fh, "<", $obo_file) or die $!; | |
while (<$fh>) | |
{ | |
# remove new line at the end | |
chomp($_); | |
### check if term | |
my $isterm = ($_ =~ m/\[Term\]/) ? 1 : 0; | |
my $isvalid = ($_ !~ m/is_obsolete\: true/) ? 1 : 0; | |
my $goid = ($_ =~ m/id\: (GO\:[0-9]+)/) ? $1 : "<unknown>"; | |
my $name = ($_ =~ m/name\: (.*)/) ? $1 : "<no_name>"; | |
my $namespace = ($_ =~ m/namespace\: (.*)/) ? $1 : "<no_category>"; | |
my @is_a = ($_ =~ m/is_a\: (GO\:[0-9]+)/g); | |
my @part_of = ($_ =~ m/part_of (GO\:[0-9]+)/g); | |
#my $def = ($_ =~ m/def\: \"(.*)\"/) ? $1 : "<no_definition>"; | |
if ($isterm && $isvalid && ($goid ne "<unknown>")) | |
{ | |
$go_graph{$goid}{"name"} = $name; | |
$go_graph{$goid}{"namespace"} = $namespace; | |
$go_graph{$goid}{"is_a"} = \@is_a; | |
$go_graph{$goid}{"part_of"} = \@part_of; | |
} | |
} | |
close($fh); | |
return \%go_graph; | |
} | |
# define user usage | |
sub usage($) | |
{ | |
my $message = $_[0]; | |
if (defined $message && length $message) | |
{ | |
$message .= "\n" unless($message =~ /\n$/); | |
} | |
my $command = $0; | |
$command =~ s#^.*/##; | |
print STDERR ( | |
$message, | |
"usage: $command -target target_ranked_list.txt\n" . | |
"description: GORankedList performs gene ontology analysis of single rancked list\n" . | |
"parameters:\n" . | |
"-obo\n" . | |
"\tGO Terms annotations in OBO file format http://geneontology.org/page/download-ontology\n" . | |
"-ann\n" . | |
"\tGenes to terms annotation in tabulated file format http://geneontology.org/page/download-annotations\n" . | |
"-target\n" . | |
"\tlist of target genes in tab-delimited format, 1st column of official gene symbols is used\n" . | |
"-threshold\n" . | |
"\tp-value threshold for significance, default 0.01\n" . | |
"-help\n" . | |
"\tdefine usage\n" | |
); | |
die("\n"); | |
} | |