Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#!/usr/bin/perl
# GOGetTerm
# do GO analysis of two unrancked lists
#
# Input
# -obo : Gene Ontology terms annotation in
# open biological and biomedical ontology format
# -ann : Gene annotation to GO terms
# -target : target list of genes (1st column tab delimited file)
# -background : background list of genes (1st column tab delimited file)
# -threshold : p-value threshold (default 0.001)
# -term : term of interest
#
# Output
# tab-delimited table of represented terms
#
# Version 1.0
# Date Mar 2017
# Georgi Tushev
# Scientific Computing Facility
# Max-Planck Institute for Brain Research
# send bug reports to sciclist@brain.mpg.de
use warnings;
use strict;
use Getopt::Long();
sub usage($);
sub ParseQueryList($$$);
sub ParseGOAnnotation($$$);
sub ParseTermsAnnotation($$$);
MAIN:
{
# define input selection
my $obo_file;
my $annotation_file;
my $target_file;
my $background_file;
my $threshold = 0.05;
my $term;
my $help;
my %hash = ();
# set-up parameters
Getopt::Long::GetOptions(
"obo=s" => \$obo_file,
"ann=s" => \$annotation_file,
"target=s" => \$target_file,
"background=s" => \$background_file,
"threshold=f" => \$threshold,
"term=s" => \$term,
"help" => \$help
) or usage("Error :: invalid command line options");
# define help output
usage("version 1.0, Sep 2016") if($help);
# parse background list
ParseQueryList(\%hash, $background_file, 1);
# parse target list
ParseQueryList(\%hash, $target_file, 2);
# parse go annotation
ParseGOAnnotation(\%hash, $annotation_file, 0);
# parse term annotation
ParseTermsAnnotation(\%hash, $obo_file, 0);
# count per GO term
print $term,"\n";
if (exists($hash{"go2gene"}{$term}{"namespace"}) && exists($hash{"go2gene"}{$term}{"name"}))
{
if (exists($hash{"go2gene"}{$term}{"genes"}))
{
foreach my $gene (sort keys %{$hash{"go2gene"}{$term}{"genes"}})
{
my $ref = ($hash{"list"}{$gene} & 4) ? 1 : 0;
my $bkg = ($hash{"list"}{$gene} & 1) ? 1 : 0;
my $trg = ($hash{"list"}{$gene} & 2) ? 1 : 0;
print $gene,"\t",$ref,"\t",$bkg,"\t",$trg,"\n";
}
}
}
}
### subroutines ###
sub ParseTermsAnnotation($$$)
{
my $hash_ref = $_[0];
my $obo_file = $_[1];
my $live = $_[2];
### add live download for obo terms
usage("Error :: OBO file is not specified") unless defined ($obo_file);
# parse obo file
local $/ = '';
open (my $fh, "<", $obo_file) or die $!;
while (<$fh>)
{
chomp($_);
### check if term
my $isterm = ($_ =~ m/\[Term\]/) ? "term" : "not-term";
my $isobsolete = ($_ =~ m/is_obsolete\: true/) ? "obsolete" : "valid";
my $goid = ($_ =~ m/id\: (GO\:[0-9]+)/) ? $1 : "<unknown>";
my $name = ($_ =~ m/name\: (.*)/) ? $1 : "<no_name>";
my $namespace = ($_ =~ m/namespace\: (.*)/) ? $1 : "<no_category>";
#my $def = ($_ =~ m/def\: \"(.*)\"/) ? $1 : "<no_definition>";
if (($isterm eq "term") && ($isobsolete eq "valid") && ($goid ne "<unknown>"))
{
if (exists($hash_ref->{"go2gene"}{$goid}))
{
$hash_ref->{"go2gene"}{$goid}{"name"} = $name;
$hash_ref->{"go2gene"}{$goid}{"namespace"} = $namespace;
#$hash_ref->{"go2gene"}{$goid}{"def"} = $def;
}
}
}
close($fh);
}
sub ParseGOAnnotation($$$)
{
my $hash_ref = $_[0];
my $ann_file = $_[1];
my $live = $_[2];
### add live download for annotation
usage("Error :: GO annotation file is not specified") unless defined ($ann_file);
# parse annotation file
open (my $fh, "gzcat $ann_file |") or die $!;
while (<$fh>)
{
chomp($_);
# skip header
next if ($_ =~ m/^\!/);
# skip
my @line = split('\t', $_, 15);
if(exists($hash_ref->{"list"}{$line[2]}))
{
$hash_ref->{"list"}{$line[2]} |= 4;
$hash_ref->{"gene2go"}{$line[2]}{$line[4]}++;
$hash_ref->{"go2gene"}{$line[4]}{"genes"}{$line[2]}++;
}
else
{
next;
}
}
close($fh);
}
sub ParseQueryList($$$)
{
my $hash_ref = $_[0];
my $query_file = $_[1];
my $bit_mask = $_[2];
open(my $fh, "<", $query_file) or die $!;
while(<$fh>)
{
chomp($_);
my ($symbol, $rest) = split('\t', $_, 2);
$hash_ref->{"list"}{$symbol} |= $bit_mask;
}
close($fh);
}
sub usage($)
{
my $message = $_[0];
if (defined $message && length $message)
{
$message .= "\n" unless($message =~ /\n$/);
}
my $command = $0;
$command =~ s#^.*/##;
print STDERR (
$message,
"usage: $command -live -target target_list.txt -background background_list.txt\n" .
"description: GOAnalysis performs gene ontology analysis of two unrancked lists\n" .
"parameters:\n" .
"-obo\n" .
"\tGO Terms annotations in OBO file format http://geneontology.org/page/download-ontology\n" .
"-ann\n" .
"\tGenes to terms annotation in tabulated file format http://geneontology.org/page/download-annotations\n" .
"-target\n" .
"\tlist of target genes in tab-delimited format, 1st column of official gene symbols is used\n" .
"-background\n" .
"\tlist pf background genes in tab-delimited format, 1st column of offical gene symbols is used\n" .
"-term\n" .
"\tGOid for term of interest\n".
"-help\n" .
"\tdefine usage\n"
);
die("\n");
}