Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
adding test data to unit test build procedure
  • Loading branch information
proost committed Nov 30, 2016
1 parent b171575 commit 47abaf0
Show file tree
Hide file tree
Showing 13 changed files with 358 additions and 3 deletions.
7 changes: 4 additions & 3 deletions run_unittest.py
@@ -1,14 +1,15 @@
#!/usr/bin/env python3
from coverage import coverage

cov = coverage(branch=True, omit=['virtualenv/*', 'tests/*'])
cov = coverage(branch=True, omit=['virtualenv/*', 'tests/*', 'config.py'])
cov.start()

import unittest
import os

from tests.website import WebsiteTest
from tests.utils import UtilsTest
# from tests.website import WebsiteTest
# from tests.utils import UtilsTest
from tests.build import BuildTest

if __name__ == '__main__':
try:
Expand Down
64 changes: 64 additions & 0 deletions tests/build.py
@@ -0,0 +1,64 @@
#!/usr/bin/env python3
from planet import create_app, db

from flask_testing import TestCase


class BuildTest(TestCase):
"""
TestCase to check if the website is functional
* a DB will be created and filled with dummy data
* an app will be spawned with the testing config, DO NOT run this against a database that is in use !!
* the DB will be cleared !
"""

def create_app(self):
"""
Creates the app using the tests config (tests/config.py)
:return: flask app with settings from tests/config.py
"""
app = create_app('tests.config')
return app

def setUp(self):
"""
Creates a database and fills it with sufficient dummy data to run the tests.
"""
db.create_all()

def tearDown(self):
"""
Removes test database again, so the next test can start with a clean slate
"""
db.session.remove()
db.drop_all()

def test_build(self):
from planet.models.species import Species
from planet.models.sequences import Sequence

from planet.models.xrefs import XRef
from planet.models.go import GO

Species.add('mmu', 'Marek mutwiliana')
s = Species.query.first()

Sequence.add_from_fasta('./tests/data/mamut.cds.fasta', s.id)
XRef.add_xref_genes_from_file(s.id, './tests/data/mamut.xref.txt')
GO.add_from_obo('./tests/data/test_go.obo')
GO.add_go_from_tab('./tests/data/functional_data/mamut.go.txt', s.id, source="Fake UnitTest Data")

test_sequences = Sequence.query.all()
test_sequence = Sequence.query.filter_by(name='Gene01').first()
test_xref = test_sequence.xrefs[0]
test_go = test_sequence.go_labels.first()
test_go_association = test_sequence.go_associations.filter_by(evidence=None).first()

self.assertTrue(len(test_sequences) == 3) # Check if all genes are added

self.assertTrue(test_sequence.aliases == 'BRCA2') # Check if alias is added and correct
self.assertTrue('www.ensembl.org' in test_xref.url) # Check if url is added

self.assertTrue(test_go.label == 'GO:0000003') # Check if go is added
self.assertTrue(test_go_association.go.label == 'GO:0000001') # Check if go parent is added
10 changes: 10 additions & 0 deletions tests/data/expression/mamut.expression_annotation.txt
@@ -0,0 +1,10 @@
SampleID ConditionDescription
A Tissue 01
B Tissue 01
C Tissue 01
D Tissue 02
E Tissue 02
F Tissue 02
G Tissue 03
H Tissue 03
I Tissue 03
3 changes: 3 additions & 0 deletions tests/data/expression/mamut.expression_order_color.txt
@@ -0,0 +1,3 @@
Tissue 01 rgba(225, 158, 54, 0.5)
Tissue 02 rgba(104, 29, 255, 0.5)
Tissue 03 rgba(2, 204, 34, 0.5)
1 change: 1 addition & 0 deletions tests/data/expression/mamut.mcl_clusters.txt
@@ -0,0 +1 @@
Gene01 Gene02
2 changes: 2 additions & 0 deletions tests/data/expression/mamut.pcc.txt
@@ -0,0 +1,2 @@
Gene01: Gene02(0.71)
Gene02: Gene01(0.71)
4 changes: 4 additions & 0 deletions tests/data/expression/mamut.tmp.matrix.txt
@@ -0,0 +1,4 @@
gene A.htseq B.htseq C.htseq D.htseq E.htseq F.htseq G.htseq H.htseq I.htseq
Gene01 5.065123 4.394866 5.163173 6.5252372 2.8849534 4.7091107 5.573157 4.718033 5.799339
Gene02 5.120176 7.337115 3.370472 6.9694873 7.2767113 8.9042121 7.954998 7.470095 8.745305
Gene03 2.081211 1.125116 1.287691 0.8902363 0.1989783 0.9289251 20.034411 20.334816 21.149522
3 changes: 3 additions & 0 deletions tests/data/functional_data/mamut.go.txt
@@ -0,0 +1,3 @@
Gene01 GO:0000003 ISS
Gene01 GO:0000002 ISS
Gene02 GO:0000003 IEA
206 changes: 206 additions & 0 deletions tests/data/mamut.cds.fasta
@@ -0,0 +1,206 @@
>Gene01
ATGCCTATTGGATCCAAAGAGAGGCCAACATTTTTTGAAATTTTTAAGACACGCTGCAAC
AAAGCAGATTTAGGACCAATAAGTCTTAATTGGTTTGAAGAACTTTCTTCAGAAGCTCCA
CCCTATAATTCTGAACCTGCAGAAGAATCTGAACATAAAAACAACAATTACGAACCAAAC
CTATTTAAAACTCCACAAAGGAAACCATCTTATAATCAGCTGGCTTCAACTCCAATAATA
TTCAAAGAGCAAGGGCTGACTCTGCCGCTGTACCAATCTCCTGTAAAAGAATTAGATAAA
TTCAAATTAGACTTAGGAAGGAATGTTCCCAATAGTAGACATAAAAGTCTTCGCACAGTG
AAAACTAAAATGGATCAAGCAGATGATGTTTCCTGTCCACTTCTAAATTCTTGTCTTAGT
GAAAGTCCTGTTGTTCTACAATGTACACATGTAACACCACAAAGAGATAAGTCAGTGGTA
TGTGGGAGTTTGTTTCATACACCAAAGTTTGTGAAGGGTCGTCAGACACCAAAACATATT
TCTGAAAGTCTAGGAGCTGAGGTGGATCCTGATATGTCTTGGTCAAGTTCTTTAGCTACA
CCACCCACCCTTAGTTCTACTGTGCTCATAGTCAGAAATGAAGAAGCATCTGAAACTGTA
TTTCCTCATGATACTACTGCTAATGTGAAAAGCTATTTTTCCAATCATGATGAAAGTCTG
AAGAAAAATGATAGATTTATCGCTTCTGTGACAGACAGTGAAAACACAAATCAAAGAGAA
GCTGCAAGTCATGGATTTGGAAAAACATCAGGGAATTCATTTAAAGTAAATAGCTGCAAA
GACCACATTGGAAAGTCAATGCCAAATGTCCTAGAAGATGAAGTATATGAAACAGTTGTA
GATACCTCTGAAGAAGATAGTTTTTCATTATGTTTTTCTAAATGTAGAACAAAAAATCTA
CAAAAAGTAAGAACTAGCAAGACTAGGAAAAAAATTTTCCATGAAGCAAACGCTGATGAA
TGTGAAAAATCTAAAAACCAAGTGAAAGAAAAATACTCATTTGTATCTGAAGTGGAACCA
AATGATACTGATCCATTAGATTCAAATGTAGCAAATCAGAAGCCCTTTGAGAGTGGAAGT
GACAAAATCTCCAAGGAAGTTGTACCGTCTTTGGCCTGTGAATGGTCTCAACTAACCCTT
TCAGGTCTAAATGGAGCCCAGATGGAGAAAATACCCCTATTGCATATTTCTTCATGTGAC
CAAAATATTTCAGAAAAAGACCTATTAGACACAGAGAACAAAAGAAAGAAAGATTTTCTT
ACTTCAGAGAATTCTTTGCCACGTATTTCTAGCCTACCAAAATCAGAGAAGCCATTAAAT
GAGGAAACAGTGGTAAATAAGAGAGATGAAGAGCAGCATCTTGAATCTCATACAGACTGC
ATTCTTGCAGTAAAGCAGGCAATATCTGGAACTTCTCCAGTGGCTTCTTCATTTCAGGGT
ATCAAAAAGTCTATATTCAGAATAAGAGAATCACCTAAAGAGACTTTCAATGCAAGTTTT
TCAGGTCATATGACTGATCCAAACTTTAAAAAAGAAACTGAAGCCTCTGAAAGTGGACTG
GAAATACATACTGTTTGCTCACAGAAGGAGGACTCCTTATGTCCAAATTTAATTGATAAT
GGAAGCTGGCCAGCCACCACCACACAGAATTCTGTAGCTTTGAAGAATGCAGGTTTAATA
TCCACTTTGAAAAAGAAAACAAATAAGTTTATTTATGCTATACATGATGAAACATCTTAT
AAAGGAAAAAAAATACCGAAAGACCAAAAATCAGAACTAATTAACTGTTCAGCCCAGTTT
GAAGCAAATGCTTTTGAAGCACCACTTACATTTGCAAATGCTGATTCAGGTTTATTGCAT
TCTTCTGTGAAAAGAAGCTGTTCACAGAATGATTCTGAAGAACCAACTTTGTCCTTAACT
AGCTCTTTTGGGACAATTCTGAGGAAATGTTCTAGAAATGAAACATGTTCTAATAATACA
GTAATCTCTCAGGATCTTGATTATAAAGAAGCAAAATGTAATAAGGAAAAACTACAGTTA
TTTATTACCCCAGAAGCTGATTCTCTGTCATGCCTGCAGGAAGGACAGTGTGAAAATGAT
CCAAAAAGCAAAAAAGTTTCAGATATAAAAGAAGAGGTCTTGGCTGCAGCATGTCACCCA
GTACAACATTCAAAAGTGGAATACAGTGATACTGACTTTCAATCCCAGAAAAGTCTTTTA
TATGATCATGAAAATGCCAGCACTCTTATTTTAACTCCTACTTCCAAGGATGTTCTGTCA
AACCTAGTCATGATTTCTAGAGGCAAAGAATCATACAAAATGTCAGACAAGCTCAAAGGT
AACAATTATGAATCTGATGTTGAATTAACCAAAAATATTCCCATGGAAAAGAATCAAGAT
GTATGTGCTTTAAATGAAAATTATAAAAACGTTGAGCTGTTGCCACCTGAAAAATACATG
AGAGTAGCATCACCTTCAAGAAAGGTACAATTCAACCAAAACACAAATCTAAGAGTAATC
CAAAAAAATCAAGAAGAAACTACTTCAATTTCAAAAATAACTGTCAATCCAGACTCTGAA
GAACTTTTCTCAGACAATGAGAATAATTTTGTCTTCCAAGTAGCTAATGAAAGGAATAAT
CTTGCTTTAGGAAATACTAAGGAACTTCATGAAACAGACTTGACTTGTGTAAACGAACCC
ATTTTCAAGAACTCTACCATGGTTTTATATGGAGACACAGGTGATAAACAAGCAACCCAA
GTGTCAATTAAAAAAGATTTGGTTTATGTTCTTGCAGAGGAGAACAAAAATAGTGTAAAG
CAGCATATAAAAATGACTCTAGGTCAAGATTTAAAATCGGACATCTCCTTGAATATAGAT
AAAATACCAGAAAAAAATAATGATTACATGAACAAATGGGCAGGACTCTTAGGTCCAATT
TCAAATCACAGTTTTGGAGGTAGCTTCAGAACAGCTTCAAATAAGGAAATCAAGCTCTCT
GAACATAACATTAAGAAGAGCAAAATGTTCTTCAAAGATATTGAAGAACAATATCCTACT
AGTTTAGCTTGTGTTGAAATTGTAAATACCTTGGCATTAGATAATCAAAAGAAACTGAGC
AAGCCTCAGTCAATTAATACTGTATCTGCACATTTACAGAGTAGTGTAGTTGTTTCTGAT
TGTAAAAATAGTCATATAACCCCTCAGATGTTATTTTCCAAGCAGGATTTTAATTCAAAC
CATAATTTAACACCTAGCCAAAAGGCAGAAATTACAGAACTTTCTACTATATTAGAAGAA
TCAGGAAGTCAGTTTGAATTTACTCAGTTTAGAAAACCAAGCTACATATTGCAGAAGAGT
ACATTTGAAGTGCCTGAAAACCAGATGACTATCTTAAAGACCACTTCTGAGGAATGCAGA
GATGCTGATCTTCATGTCATAATGAATGCCCCATCGATTGGTCAGGTAGACAGCAGCAAG
CAATTTGAAGGTACAGTTGAAATTAAACGGAAGTTTGCTGGCCTGTTGAAAAATGACTGT
AACAAAAGTGCTTCTGGTTATTTAACAGATGAAAATGAAGTGGGGTTTAGGGGCTTTTAT
TCTGCTCATGGCACAAAACTGAATGTTTCTACTGAAGCTCTGCAAAAAGCTGTGAAACTG
TTTAGTGATATTGAGAATATTAGTGAGGAAACTTCTGCAGAGGTACATCCAATAAGTTTA
TCTTCAAGTAAATGTCATGATTCTGTTGTTTCAATGTTTAAGATAGAAAATCATAATGAT
AAAACTGTAAGTGAAAAAAATAATAAATGCCAACTGATATTACAAAATAATATTGAAATG
ACTACTGGCACTTTTGTTGAAGAAATTACTGAAAATTACAAGAGAAATACTGAAAATGAA
GATAACAAATATACTGCTGCCAGTAGAAATTCTCATAACTTAGAATTTGATGGCAGTGAT
TCAAGTAAAAATGATACTGTTTGTATTCATAAAGATGAAACGGACTTGCTATTTACTGAT
CAGCACAACATATGTCTTAAATTATCTGGCCAGTTTATGAAGGAGGGAAACACTCAGATT
AAAGAAGATTTGTCAGATTTAACTTTTTTGGAAGTTGCGAAAGCTCAAGAAGCATGTCAT
GGTAATACTTCAAATAAAGAACAGTTAACTGCTACTAAAACGGAGCAAAATATAAAAGAT
TTTGAGACTTCTGATACATTTTTTCAGACTGCAAGTGGGAAAAATATTAGTGTCGCCAAA
GAGTCATTTAATAAAATTGTAAATTTCTTTGATCAGAAACCAGAAGAATTGCATAACTTT
TCCTTAAATTCTGAATTACATTCTGACATAAGAAAGAACAAAATGGACATTCTAAGTTAT
GAGGAAACAGACATAGTTAAACACAAAATACTGAAAGAAAGTGTCCCAGTTGGTACTGGA
AATCAACTAGTGACCTTCCAGGGACAACCCGAACGTGATGAAAAGATCAAAGAACCTACT
CTGTTGGGTTTTCATACAGCTAGCGGGAAAAAAGTTAAAATTGCAAAGGAATCTTTGGAC
AAAGTGAAAAACCTTTTTGATGAAAAAGAGCAAGGTACTAGTGAAATCACCAGTTTTAGC
CATCAATGGGCAAAGACCCTAAAGTACAGAGAGGCCTGTAAAGACCTTGAATTAGCATGT
GAGACCATTGAGATCACAGCTGCCCCAAAGTGTAAAGAAATGCAGAATTCTCTCAATAAT
GATAAAAACCTTGTTTCTATTGAGACTGTGGTGCCACCTAAGCTCTTAAGTGATAATTTA
TGTAGACAAACTGAAAATCTCAAAACATCAAAAAGTATCTTTTTGAAAGTTAAAGTACAT
GAAAATGTAGAAAAAGAAACAGCAAAAAGTCCTGCAACTTGTTACACAAATCAGTCCCCT
TATTCAGTCATTGAAAATTCAGCCTTAGCTTTTTACACAAGTTGTAGTAGAAAAACTTCT
GTGAGTCAGACTTCATTACTTGAAGCAAAAAAATGGCTTAGAGAAGGAATATTTGATGGT
CAACCAGAAAGAATAAATACTGCAGATTATGTAGGAAATTATTTGTATGAAAATAATTCA
AACAGTACTATAGCTGAAAATGACAAAAATCATCTCTCCGAAAAACAAGATACTTATTTA
AGTAACAGTAGCATGTCTAACAGCTATTCCTACCATTCTGATGAGGTATATAATGATTCA
GGATATCTCTCAAAAAATAAACTTGATTCTGGTATTGAGCCAGTATTGAAGAATGTTGAA
GATCAAAAAAACACTAGTTTTTCCAAAGTAATATCCAATGTAAAAGATGCAAATGCATAC
CCACAAACTGTAAATGAAGATATTTGCGTTGAGGAACTTGTGACTAGCTCTTCACCCTGC
AAAAATAAAAATGCAGCCATTAAATTGTCCATATCTAATAGTAATAATTTTGAGGTAGGG
CCACCTGCATTTAGGATAGCCAGTGGTAAAATCGTTTGTGTTTCACATGAAACAATTAAA
AAAGTGAAAGACATATTTACAGACAGTTTCAGTAAAGTAATTAAGGAAAACAACGAGAAT
AAATCAAAAATTTGCCAAACGAAAATTATGGCAGGTTGTTACGAGGCATTGGATGATTCA
GAGGATATTCTTCATAACTCTCTAGATAATGATGAATGTAGCACGCATTCACATAAGGTT
TTTGCTGACATTCAGAGTGAAGAAATTTTACAACATAACCAAAATATGTCTGGATTGGAG
AAAGTTTCTAAAATATCACCTTGTGATGTTAGTTTGGAAACTTCAGATATATGTAAATGT
AGTATAGGGAAGCTTCATAAGTCAGTCTCATCTGCAAATACTTGTGGGATTTTTAGCACA
GCAAGTGGAAAATCTGTCCAGGTATCAGATGCTTCATTACAAAACGCAAGACAAGTGTTT
TCTGAAATAGAAGATAGTACCAAGCAAGTCTTTTCCAAAGTATTGTTTAAAAGTAACGAA
CATTCAGACCAGCTCACAAGAGAAGAAAATACTGCTATACGTACTCCAGAACATTTAATA
TCCCAAAAAGGCTTTTCATATAATGTGGTAAATTCATCTGCTTTCTCTGGATTTAGTACA
GCAAGTGGAAAGCAAGTTTCCATTTTAGAAAGTTCCTTACACAAAGTTAAGGGAGTGTTA
GAGGAATTTGATTTAATCAGAACTGAGCATAGTCTTCACTATTCACCTACGTCTAGACAA
AATGTATCAAAAATACTTCCTCGTGTTGATAAGAGAAACCCAGAGCACTGTGTAAACTCA
GAAATGGAAAAAACCTGCAGTAAAGAATTTAAATTATCAAATAACTTAAATGTTGAAGGT
GGTTCTTCAGAAAATAATCACTCTATTAAAGTTTCTCCATATCTCTCTCAATTTCAACAA
GACAAACAACAGTTGGTATTAGGAACCAAAGTCTCACTTGTTGAGAACATTCATGTTTTG
GGAAAAGAACAGGCTTCACCTAAAAACGTAAAAATGGAAATTGGTAAAACTGAAACTTTT
TCTGATGTTCCTGTGAAAACAAATATAGAAGTTTGTTCTACTTACTCCAAAGATTCAGAA
AACTACTTTGAAACAGAAGCAGTAGAAATTGCTAAAGCTTTTATGGAAGATGATGAACTG
ACAGATTCTAAACTGCCAAGTCATGCCACACATTCTCTTTTTACATGTCCCGAAAATGAG
GAAATGGTTTTGTCAAATTCAAGAATTGGAAAAAGAAGAGGAGAGCCCCTTATCTTAGTG
GGAGAACCCTCAATCAAAAGAAACTTATTAAATGAATTTGACAGGATAATAGAAAATCAA
GAAAAATCCTTAAAGGCTTCAAAAAGCACTCCAGATGGCACAATAAAAGATCGAAGATTG
TTTATGCATCATGTTTCTTTAGAGCCGATTACCTGTGTACCCTTTCGCACAACTAAGGAA
CGTCAAGAGATACAGAATCCAAATTTTACCGCACCTGGTCAAGAATTTCTGTCTAAATCT
CATTTGTATGAACATCTGACTTTGGAAAAATCTTCAAGCAATTTAGCAGTTTCAGGACAT
CCATTTTATCAAGTTTCTGCTACAAGAAATGAAAAAATGAGACACTTGATTACTACAGGC
AGACCAACCAAAGTCTTTGTTCCACCTTTTAAAACTAAATCACATTTTCACAGAGTTGAA
CAGTGTGTTAGGAATATTAACTTGGAGGAAAACAGACAAAAGCAAAACATTGATGGACAT
GGCTCTGATGATAGTAAAAATAAGATTAATGACAATGAGATTCATCAGTTTAACAAAAAC
AACTCCAATCAAGCAGCAGCTGTAACTTTCACAAAGTGTGAAGAAGAACCTTTAGATTTA
ATTACAAGTCTTCAGAATGCCAGAGATATACAGGATATGCGAATTAAGAAGAAACAAAGG
CAACGCGTCTTTCCACAGCCAGGCAGTCTGTATCTTGCAAAAACATCCACTCTGCCTCGA
ATCTCTCTGAAAGCAGCAGTAGGAGGCCAAGTTCCCTCTGCGTGTTCTCATAAACAGCTG
TATACGTATGGCGTTTCTAAACATTGCATAAAAATTAACAGCAAAAATGCAGAGTCTTTT
CAGTTTCACACTGAAGATTATTTTGGTAAGGAAAGTTTATGGACTGGAAAAGGAATACAG
TTGGCTGATGGTGGATGGCTCATACCCTCCAATGATGGAAAGGCTGGAAAAGAAGAATTT
TATAGGGCTCTGTGTGACACTCCAGGTGTGGATCCAAAGCTTATTTCTAGAATTTGGGTT
TATAATCACTATAGATGGATCATATGGAAACTGGCAGCTATGGAATGTGCCTTTCCTAAG
GAATTTGCTAATAGATGCCTAAGCCCAGAAAGGGTGCTTCTTCAACTAAAATACAGATAT
GATACGGAAATTGATAGAAGCAGAAGATCGGCTATAAAAAAGATAATGGAAAGGGATGAC
ACAGCTGCAAAAACACTTGTTCTCTGTGTTTCTGACATAATTTCATTGAGCGCAAATATA
TCTGAAACTTCTAGCAATAAAACTAGTAGTGCAGATACCCAAAAAGTGGCCATTATTGAA
CTTACAGATGGGTGGTATGCTGTTAAGGCCCAGTTAGATCCTCCCCTCTTAGCTGTCTTA
AAGAATGGCAGACTGACAGTTGGTCAGAAGATTATTCTTCATGGAGCAGAACTGGTGGGC
TCTCCTGATGCCTGTACACCTCTTGAAGCCCCAGAATCTCTTATGTTAAAGATTTCTGCT
AACAGTACTCGGCCTGCTCGCTGGTATACCAAACTTGGATTCTTTCCTGACCCTAGACCT
TTTCCTCTGCCCTTATCATCGCTTTTCAGTGATGGAGGAAATGTTGGTTGTGTTGATGTA
ATTATTCAAAGAGCATACCCTATACAGTGGATGGAGAAGACATCATCTGGATTATACATA
TTTCGCAATGAAAGAGAGGAAGAAAAGGAAGCAGCAAAATATGTGGAGGCCCAACAAAAG
AGACTAGAAGCCTTATTCACTAAAATTCAGGAGGAATTTGAAGAACATGAAGAAAACACA
ACAAAACCATATTTACCATCACGTGCACTAACAAGACAGCAAGTTCGTGCTTTGCAAGAT
GGTGCAGAGCTTTATGAAGCAGTGAAGAATGCAGCAGACCCAGCTTACCTTGAGGGTTAT
TTCAGTGAAGAGCAGTTAAGAGCCTTGAATAATCACAGGCAAATGTTGAATGATAAGAAA
CAAGCTCAGATCCAGTTGGAAATTAGGAAGGCCATGGAATCTGCTGAACAAAAGGAACAA
GGTTTATCAAGGGATGTCACAACCGTGTGGAAGTTGCGTATTGTAAGCTATTCAAAAAAA
GAAAAAGATTCAGTTATACTGAGTATTTGGCGTCCATCATCAGATTTATATTCTCTGTTA
ACAGAAGGAAAGAGATACAGAATTTATCATCTTGCAACTTCAAAATCTAAAAGTAAATCT
GAAAGAGCTAACATACAGTTAGCAGCGACAAAAAAAACTCAGTATCAACAACTACCGGTT
TCAGATGAAATTTTATTTCAGATTTACCAGCCACGGGAGCCCCTTCACTTCAGCAAATTT
TTAGATCCAGACTTTCAGCCATCTTGTTCTGAGGTGGACCTAATAGGATTTGTCGTTTCT
GTTGTGAAAAAAACAGGACTTGCCCCTTTCGTCTATTTGTCAGACGAATGTTACAATTTA
CTGGCAATAAAGTTTTGGATAGACCTTAATGAGGACATTATTAAGCCTCATATGTTAATT
GCTGCAAGCAACCTCCAGTGGCGACCAGAATCCAAATCAGGCCTTCTTACTTTATTTGCT
GGAGATTTTTCTGTGTTTTCTGCTAGTCCAAAAGAGGGCCACTTTCAAGAGACATTCAAC
AAAATGAAAAATACTGTTGAGAATATTGACATACTTTGCAATGAAGCAGAAAACAAGCTT
ATGCATATACTGCATGCAAATGATCCCAAGTGGTCCACCCCAACTAAAGACTGTACTTCA
GGGCCGTACACTGCTCAAATCATTCCTGGTACAGGAAACAAGCTTCTGATGTCTTCTCCT
AATTGTGAGATATATTATCAAAGTCCTTTATCACTTTGTATGGCCAAAAGGAAGTCTGTT
TCCACACCTGTCTCAGCCCAGATGACTTCAAAGTCTTGTAAAGGGGAGAAAGAGATTGAT
GACCAAAAGAACTGCAAAAAGAGAAGAGCCTTGGATTTCTTGAGTAGACTGCCTTTACCT
CCACCTGTTAGTCCCATTTGTACATTTGTTTCTCCGGCTGCACAGAAGGCATTTCAGCCA
CCAAGGAGTTGTGGCACCAAATACGAAACACCCATAAAGAAAAAAGAACTGAATTCTCCT
CAGATGACTCCATTTAAAAAATTCAATGAAATTTCTCTTTTGGAAAGTAATTCAATAGCT
GACGAAGAACTTGCATTGATAAATACCCAAGCTCTTTTGTCTGGTTCAACAGGAGAAAAA
CAATTTATATCTGTCAGTGAATCCACTAGGACTGCTCCCACCAGTTCAGAAGATTATCTC
AGACTGAAACGACGTTGTACTACATCTCTGATCAAAGAACAGGAGAGTTCCCAGGCCAGT
ACGGAAGAATGTGAGAAAAATAAGCAGGACACAATTACAACTAAAAAATATATCTAA
>Gene02
ATGCTGCTGCTGGCGAGATGTCTGCTGCTAGTCCTCGTCTCCTCGCTGCTGGTATGCTCGGGACTGGCGT
GCGGACCGGGCAGGGGGTTCGGGAAGAGGAGGCACCCCAAAAAGCTGACCCCTTTAGCCTACAAGCAGTT
TATCCCCAATGTGGCCGAGAAGACCCTAGGCGCCAGCGGAAGGTATGAAGGGAAGATCTCCAGAAACTCC
GAGCGATTTAAGGAACTCACCCCCAATTACAACCCCGACATCATATTTAAGGATGAAGAAAACACCGGAG
CGGACAGGCTGATGACTCAGAGGTGTAAGGACAAGTTGAACGCTTTGGCCATCTCGGTGATGAACCAGTG
GCCAGGAGTGAAACTGCGGGTGACCGAGGGCTGGGACGAAGATGGCCACCACTCAGAGGAGTCTCTGCAC
TACGAGGGCCGCGCAGTGGACATCACCACGTCTGACCGCGACCGCAGCAAGTACGGCATGCTGGCCCGCC
TGGCGGTGGAGGCCGGCTTCGACTGGGTGTACTACGAGTCCAAGGCACATATCCACTGCTCGGTGAAAGC
AGAGAACTCGGTGGCGGCCAAATCGGGAGGCTGCTTCCCGGGCTCGGCCACGGTGCACCTGGAGCAGGGC
GGCACCAAGCTGGTGAAGGACCTGAGCCCCGGGGACCGCGTGCTGGCGGCGGACGACCAGGGCCGGCTGC
TCTACAGCGACTTCCTCACTTTCCTGGACCGCGACGACGGCGCCAAGAAGGTCTTCTACGTGATCGAGAC
GCGGGAGCCGCGCGAGCGCCTGCTGCTCACCGCCGCGCACCTGCTCTTTGTGGCGCCGCACAACGACTCG
GCCACCGGGGAGCCCGAGGCGTCCTCGGGCTCGGGGCCGCCTTCCGGGGGCGCACTGGGGCCTCGGGCGC
TGTTCGCCAGCCGCGTGCGCCCGGGCCAGCGCGTGTACGTGGTGGCCGAGCGTGACGGGGACCGCCGGCT
CCTGCCCGCCGCTGTGCACAGCGTGACCCTAAGCGAGGAGGCCGCGGGCGCCTACGCGCCGCTCACGGCC
CAGGGCACCATTCTCATCAACCGGGTGCTGGCCTCGTGCTACGCGGTCATCGAGGAGCACAGCTGGGCGC
ACCGGGCCTTCGCGCCCTTCCGCCTGGCGCACGCGCTCCTGGCTGCACTGGCGCCCGCGCGCACGGACCG
CGGCGGGGACAGCGGCGGCGGGGACCGCGGGGGCGGCGGCGGCAGAGTAGCCCTAACCGCTCCAGGTGCT
GCCGACGCTCCGGGTGCGGGGGCCACCGCGGGCATCCACTGGTACTCGCAGCTGCTCTACCAAATAGGCA
CCTGGCTCCTGGACAGCGAGGCCCTGCACCCGCTGGGCATGGCGGTCAAGTCCAGCTGA
>Gene03
ATGGAGAAAAAGGGTTTGGTGAGCAACAGCTTGAAAGATCATGCTGGTATTGTCACCAGA
GCCATTAGGCGCAGCAAGACTACCTATGAAGAAGACGATTACAATTTTGATGGGTTTGCA
TGGCCTCCAAGATCTTACACGTGTAGCTTCTGCAAAAGGGAATTTAGATCTGCTCAAGCT
CTTGGCGGCCACATGAATGTTCACCGAAAAGACCGAGCCAGGCTCAAAGGCTCACCCCCA
AGAGACAGTCAATACACGAGTACTATTCTTAACCTTAACCTCAACAAAGTGCCAAACCCT
AACCCTAATTTCTCATCAACATCATCAGCATCCTCACCATCCTCACCATCCTCCTGGATA
TCACCAATTAGTAGCACATTACCCTCTTTGATATCACCACCAGCTCCTCCACCTGTTTTT
TTGGTGCCGTCTAGCGAAAATATGAAATGGGTTGTGGGAGACACCCTCTTTAATCACCCT
CTAAACTTCAAAGCCTCGGATTTTGGTACTACAGTAAAGAAGAACGCAGAATCTTTTTGT
GGAGTAGGAGATCGTCAATGTGATGGTTTCATTGGAGAAGAACATGGGTGCATAAAGACT
GTCAAGGCAGCTGATCATCCTCATCATCCGATTGTTAGGTTGGACTTGGAGATCGGTATG
CTTGGTGACTCAAATAAGGAAGACTTAGATTTGGAACTTCGATTGGGATACTCTTAG
3 changes: 3 additions & 0 deletions tests/data/mamut.descriptions.txt
@@ -0,0 +1,3 @@
Gene01 Breast Cancer 2 gene, fingers crossed you've got good alleles for this one!
Gene02 SonicHedgehog, the gene not the adorable blue video game mascot
Gene03 Superman class transcription factor
7 changes: 7 additions & 0 deletions tests/data/mamut.xref.txt
@@ -0,0 +1,7 @@
Gene01 BRCA2 Ensembl http://www.ensembl.org/Homo_sapiens/Gene/Summary?g=ENSG00000139618;r=13:32315474-32400266
Gene02 SonicHedgehog Ensembl http://www.ensembl.org/Homo_sapiens/Gene/Summary?g=ENSG00000164690;r=7:155799986-155812273
Gene03 Superman Plaza http://bioinformatics.psb.ugent.be/plaza/versions/plaza_v3_dicots/genes/view/MD13G022660
Gene01 BRCA2 token
Gene02 SHH token
Gene02 SonicHedgehog token
Gene03 Superman token
50 changes: 50 additions & 0 deletions tests/data/test_go.obo
@@ -0,0 +1,50 @@
format-version: 1.2
data-version: releases/2014-10-14
date: 13:10:2014 08:33
saved-by: kmv
auto-generated-by: TermGenie 1.0
subsetdef: Cross_product_review "Involved_in"
subsetdef: goantislim_grouping "Grouping classes that can be excluded"
subsetdef: gocheck_do_not_annotate "Term not to be used for direct annotation"
subsetdef: gocheck_do_not_manually_annotate "Term not to be used for direct manual annotation"
subsetdef: goslim_aspergillus "Aspergillus GO slim"
subsetdef: goslim_candida "Candida GO slim"
subsetdef: goslim_generic "Generic GO slim"
subsetdef: goslim_goa "GOA and proteome slim"
subsetdef: goslim_metagenomics "Metagenomics GO slim"
subsetdef: goslim_pir "PIR GO slim"
subsetdef: goslim_plant "Plant GO slim"
subsetdef: goslim_pombe "Fission yeast GO slim"
subsetdef: goslim_virus "Viral GO slim"
subsetdef: goslim_yeast "Yeast GO slim"
subsetdef: gosubset_prok "Prokaryotic GO subset"
subsetdef: mf_needs_review "Catalytic activity terms in need of attention"
subsetdef: termgenie_unvetted "Terms created by TermGenie that do not follow a template and require additional vetting by editors"
subsetdef: virus_checked "Viral overhaul terms"
synonymtypedef: systematic_synonym "Systematic synonym" EXACT
default-namespace: gene_ontology
remark: cvs version: $Revision: 21018 $
remark: Includes Ontology(OntologyID(OntologyIRI(<http://purl.obolibrary.org/obo/go/never_in_taxon.owl>))) [Axioms: 18 Logical Axioms: 0]
ontology: go
property_value: propformat-version "1.2" xsd:string

[Term]
id: GO:0000001
name: top_label
namespace: biological_process
def: "Top GO LABEL"
synonym: "top_level" EXACT []

[Term]
id: GO:0000002
name: second_label
namespace: biological_process
def: "Second label"
is_a: GO:0000001 ! top_label

[Term]
id: GO:0000003
name: third_label
namespace: biological_process
def: "Third label"
is_a: GO:0000001 ! top_label
1 change: 1 addition & 0 deletions utils/entropy.py
@@ -1,6 +1,7 @@
from math import log2
from bisect import bisect


def entropy(dist):
"""
Calculates the entropy for a given distribution (!)
Expand Down

0 comments on commit 47abaf0

Please sign in to comment.