Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#!/usr/bin/perl
# GORankedList
# do GO analysis of single ranked list
#
# Input
# -obo : Gene Ontology terms annotation in
# open biological and biomedical ontology format
# -ann : Gene annotation to GO terms
# -target : target list of ranked genes (1st column tab delimited file)
# -threshold : p-value threshold (default 0.001)
#
# Output
# tab-delimited table of represented terms
#
# Version 1.0
# Date Apr 2017
# Georgi Tushev
# Scientific Computing Facility
# Max-Planck Institute for Brain Research
# send bug reports to sciclist@brain.mpg.de
use warnings;
use strict;
use Getopt::Long();
sub usage($);
sub ParseGOGraph($);
sub ParseGOAnnotation($);
sub ParseRankedList($$$);
MAIN:
{
# define input selection
my $file_obo;
my $file_ann;
my $file_tgt;
my $threshold = 0.01;
my $help;
# parse input parameters
Getopt::Long::GetOptions(
"obo=s" => \$file_obo,
"ann=s" => \$file_ann,
"target=s" => \$file_tgt,
"threshold=f" => \$threshold,
"help" => \$help
) or usage("Error :: invalid command line options");
# define help output
usage("version 1.0, Apr 2017") if($help);
# parse GO Graph terms and relations
my $go_graph = ParseGOGraph($file_obo);
print STDERR "### parsed GO_Graph\n";
print STDERR "terms :: ",scalar(keys %{$go_graph}),"\n";
print STDERR "\n";
# parse GO Annotations
my ($termToGene, $geneToTerm) = ParseGOAnnotation($file_ann);
print STDERR "### parsed GO_Annotation\n";
print STDERR "terms :: ",scalar(keys %{$termToGene}),"\n";
print STDERR "genes :: ",scalar(keys %{$geneToTerm}),"\n";
print STDERR "\n";
# parse target list
my $list_tgt = ParseRankedList($file_tgt, $geneToTerm, $go_graph);
print STDERR "### parsed Target list\n";
print STDERR "genes :: ",scalar(keys %{$list_tgt}),"\n";
}
# parse Ranked List
sub ParseRankedList($$$)
{
my $file_tgt = $_[0];
my $geneToTerm = $_[1];
my $go_graph = $_[2];
my %list_tgt = ();
### add live download for annotation
usage("Error :: Ranked list is not specified") unless defined ($file_tgt);
### parse file
open(my $fh, "<", $file_tgt) or die $!;
while(<$fh>)
{
chomp($_);
my ($symbol, $rest) = split('\t', $_, 2);
$list_tgt{$symbol}++;
# print each gene - term pair
if (exists($geneToTerm->{$symbol}))
{
foreach my $term (sort keys %{$geneToTerm->{$symbol}})
{
my $goid = $term;
if (exists($go_graph->{$term}))
{
my $namespace = $go_graph->{$term}{"namespace"};
my $name = $go_graph->{$term}{"name"};
print $_,"\t",$goid,"\t",$namespace,"\t",$name,"\n";
}
}
}
}
close($fh);
return \%list_tgt;
}
# parse GO Annotations
sub ParseGOAnnotation($)
{
my $file_ann = $_[0];
my %termToGene = ();
my %geneToTerm = ();
### add live download for annotation
usage("Error :: GO annotation file is not specified") unless defined ($file_ann);
# parse annotation file
open (my $fh, "gzcat $file_ann |") or die $!;
while (<$fh>)
{
chomp($_);
# skip header
next if ($_ =~ m/^\!/);
# split line
my @line = split('\t', $_, 15);
$termToGene{$line[4]}{$line[2]}++;
$geneToTerm{$line[2]}{$line[4]}++;
}
close($fh);
return (\%termToGene, \%geneToTerm);
}
# parse GO Graph terms and relations from Obo file
sub ParseGOGraph($)
{
my $obo_file = $_[0];
my %go_graph = ();
### add live download for obo terms
usage("Error :: GO Graph OBO file is not specified") unless defined ($obo_file);
# parse obo file
local $/ = '';
open (my $fh, "<", $obo_file) or die $!;
while (<$fh>)
{
# remove new line at the end
chomp($_);
### check if term
my $isterm = ($_ =~ m/\[Term\]/) ? 1 : 0;
my $isvalid = ($_ !~ m/is_obsolete\: true/) ? 1 : 0;
my $goid = ($_ =~ m/id\: (GO\:[0-9]+)/) ? $1 : "<unknown>";
my $name = ($_ =~ m/name\: (.*)/) ? $1 : "<no_name>";
my $namespace = ($_ =~ m/namespace\: (.*)/) ? $1 : "<no_category>";
my @is_a = ($_ =~ m/is_a\: (GO\:[0-9]+)/g);
my @part_of = ($_ =~ m/part_of (GO\:[0-9]+)/g);
#my $def = ($_ =~ m/def\: \"(.*)\"/) ? $1 : "<no_definition>";
if ($isterm && $isvalid && ($goid ne "<unknown>"))
{
$go_graph{$goid}{"name"} = $name;
$go_graph{$goid}{"namespace"} = $namespace;
$go_graph{$goid}{"is_a"} = \@is_a;
$go_graph{$goid}{"part_of"} = \@part_of;
}
}
close($fh);
return \%go_graph;
}
# define user usage
sub usage($)
{
my $message = $_[0];
if (defined $message && length $message)
{
$message .= "\n" unless($message =~ /\n$/);
}
my $command = $0;
$command =~ s#^.*/##;
print STDERR (
$message,
"usage: $command -target target_ranked_list.txt\n" .
"description: GORankedList performs gene ontology analysis of single rancked list\n" .
"parameters:\n" .
"-obo\n" .
"\tGO Terms annotations in OBO file format http://geneontology.org/page/download-ontology\n" .
"-ann\n" .
"\tGenes to terms annotation in tabulated file format http://geneontology.org/page/download-annotations\n" .
"-target\n" .
"\tlist of target genes in tab-delimited format, 1st column of official gene symbols is used\n" .
"-threshold\n" .
"\tp-value threshold for significance, default 0.01\n" .
"-help\n" .
"\tdefine usage\n"
);
die("\n");
}