From 47abaf09ed9a767a28104b51c2ee9fa08a4c7be5 Mon Sep 17 00:00:00 2001 From: sepro Date: Wed, 30 Nov 2016 14:10:59 +0100 Subject: [PATCH] adding test data to unit test build procedure --- run_unittest.py | 7 +- tests/build.py | 64 ++++++ .../mamut.expression_annotation.txt | 10 + .../mamut.expression_order_color.txt | 3 + tests/data/expression/mamut.mcl_clusters.txt | 1 + tests/data/expression/mamut.pcc.txt | 2 + tests/data/expression/mamut.tmp.matrix.txt | 4 + tests/data/functional_data/mamut.go.txt | 3 + tests/data/mamut.cds.fasta | 206 ++++++++++++++++++ tests/data/mamut.descriptions.txt | 3 + tests/data/mamut.xref.txt | 7 + tests/data/test_go.obo | 50 +++++ utils/entropy.py | 1 + 13 files changed, 358 insertions(+), 3 deletions(-) create mode 100644 tests/build.py create mode 100644 tests/data/expression/mamut.expression_annotation.txt create mode 100644 tests/data/expression/mamut.expression_order_color.txt create mode 100644 tests/data/expression/mamut.mcl_clusters.txt create mode 100644 tests/data/expression/mamut.pcc.txt create mode 100644 tests/data/expression/mamut.tmp.matrix.txt create mode 100644 tests/data/functional_data/mamut.go.txt create mode 100644 tests/data/mamut.cds.fasta create mode 100644 tests/data/mamut.descriptions.txt create mode 100644 tests/data/mamut.xref.txt create mode 100644 tests/data/test_go.obo diff --git a/run_unittest.py b/run_unittest.py index b7d53f6..a89aea5 100644 --- a/run_unittest.py +++ b/run_unittest.py @@ -1,14 +1,15 @@ #!/usr/bin/env python3 from coverage import coverage -cov = coverage(branch=True, omit=['virtualenv/*', 'tests/*']) +cov = coverage(branch=True, omit=['virtualenv/*', 'tests/*', 'config.py']) cov.start() import unittest import os -from tests.website import WebsiteTest -from tests.utils import UtilsTest +# from tests.website import WebsiteTest +# from tests.utils import UtilsTest +from tests.build import BuildTest if __name__ == '__main__': try: diff --git a/tests/build.py b/tests/build.py new file mode 100644 index 0000000..407809c --- /dev/null +++ b/tests/build.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +from planet import create_app, db + +from flask_testing import TestCase + + +class BuildTest(TestCase): + """ + TestCase to check if the website is functional + * a DB will be created and filled with dummy data + * an app will be spawned with the testing config, DO NOT run this against a database that is in use !! + * the DB will be cleared ! + """ + + def create_app(self): + """ + Creates the app using the tests config (tests/config.py) + + :return: flask app with settings from tests/config.py + """ + app = create_app('tests.config') + return app + + def setUp(self): + """ + Creates a database and fills it with sufficient dummy data to run the tests. + """ + db.create_all() + + def tearDown(self): + """ + Removes test database again, so the next test can start with a clean slate + """ + db.session.remove() + db.drop_all() + + def test_build(self): + from planet.models.species import Species + from planet.models.sequences import Sequence + + from planet.models.xrefs import XRef + from planet.models.go import GO + + Species.add('mmu', 'Marek mutwiliana') + s = Species.query.first() + + Sequence.add_from_fasta('./tests/data/mamut.cds.fasta', s.id) + XRef.add_xref_genes_from_file(s.id, './tests/data/mamut.xref.txt') + GO.add_from_obo('./tests/data/test_go.obo') + GO.add_go_from_tab('./tests/data/functional_data/mamut.go.txt', s.id, source="Fake UnitTest Data") + + test_sequences = Sequence.query.all() + test_sequence = Sequence.query.filter_by(name='Gene01').first() + test_xref = test_sequence.xrefs[0] + test_go = test_sequence.go_labels.first() + test_go_association = test_sequence.go_associations.filter_by(evidence=None).first() + + self.assertTrue(len(test_sequences) == 3) # Check if all genes are added + + self.assertTrue(test_sequence.aliases == 'BRCA2') # Check if alias is added and correct + self.assertTrue('www.ensembl.org' in test_xref.url) # Check if url is added + + self.assertTrue(test_go.label == 'GO:0000003') # Check if go is added + self.assertTrue(test_go_association.go.label == 'GO:0000001') # Check if go parent is added diff --git a/tests/data/expression/mamut.expression_annotation.txt b/tests/data/expression/mamut.expression_annotation.txt new file mode 100644 index 0000000..285d486 --- /dev/null +++ b/tests/data/expression/mamut.expression_annotation.txt @@ -0,0 +1,10 @@ +SampleID ConditionDescription +A Tissue 01 +B Tissue 01 +C Tissue 01 +D Tissue 02 +E Tissue 02 +F Tissue 02 +G Tissue 03 +H Tissue 03 +I Tissue 03 \ No newline at end of file diff --git a/tests/data/expression/mamut.expression_order_color.txt b/tests/data/expression/mamut.expression_order_color.txt new file mode 100644 index 0000000..c618e9f --- /dev/null +++ b/tests/data/expression/mamut.expression_order_color.txt @@ -0,0 +1,3 @@ +Tissue 01 rgba(225, 158, 54, 0.5) +Tissue 02 rgba(104, 29, 255, 0.5) +Tissue 03 rgba(2, 204, 34, 0.5) diff --git a/tests/data/expression/mamut.mcl_clusters.txt b/tests/data/expression/mamut.mcl_clusters.txt new file mode 100644 index 0000000..7455aeb --- /dev/null +++ b/tests/data/expression/mamut.mcl_clusters.txt @@ -0,0 +1 @@ +Gene01 Gene02 \ No newline at end of file diff --git a/tests/data/expression/mamut.pcc.txt b/tests/data/expression/mamut.pcc.txt new file mode 100644 index 0000000..a7b24fc --- /dev/null +++ b/tests/data/expression/mamut.pcc.txt @@ -0,0 +1,2 @@ +Gene01: Gene02(0.71) +Gene02: Gene01(0.71) \ No newline at end of file diff --git a/tests/data/expression/mamut.tmp.matrix.txt b/tests/data/expression/mamut.tmp.matrix.txt new file mode 100644 index 0000000..a085835 --- /dev/null +++ b/tests/data/expression/mamut.tmp.matrix.txt @@ -0,0 +1,4 @@ +gene A.htseq B.htseq C.htseq D.htseq E.htseq F.htseq G.htseq H.htseq I.htseq +Gene01 5.065123 4.394866 5.163173 6.5252372 2.8849534 4.7091107 5.573157 4.718033 5.799339 +Gene02 5.120176 7.337115 3.370472 6.9694873 7.2767113 8.9042121 7.954998 7.470095 8.745305 +Gene03 2.081211 1.125116 1.287691 0.8902363 0.1989783 0.9289251 20.034411 20.334816 21.149522 \ No newline at end of file diff --git a/tests/data/functional_data/mamut.go.txt b/tests/data/functional_data/mamut.go.txt new file mode 100644 index 0000000..76212f8 --- /dev/null +++ b/tests/data/functional_data/mamut.go.txt @@ -0,0 +1,3 @@ +Gene01 GO:0000003 ISS +Gene01 GO:0000002 ISS +Gene02 GO:0000003 IEA \ No newline at end of file diff --git a/tests/data/mamut.cds.fasta b/tests/data/mamut.cds.fasta new file mode 100644 index 0000000..06f92bc --- /dev/null +++ b/tests/data/mamut.cds.fasta @@ -0,0 +1,206 @@ +>Gene01 +ATGCCTATTGGATCCAAAGAGAGGCCAACATTTTTTGAAATTTTTAAGACACGCTGCAAC +AAAGCAGATTTAGGACCAATAAGTCTTAATTGGTTTGAAGAACTTTCTTCAGAAGCTCCA +CCCTATAATTCTGAACCTGCAGAAGAATCTGAACATAAAAACAACAATTACGAACCAAAC +CTATTTAAAACTCCACAAAGGAAACCATCTTATAATCAGCTGGCTTCAACTCCAATAATA +TTCAAAGAGCAAGGGCTGACTCTGCCGCTGTACCAATCTCCTGTAAAAGAATTAGATAAA +TTCAAATTAGACTTAGGAAGGAATGTTCCCAATAGTAGACATAAAAGTCTTCGCACAGTG +AAAACTAAAATGGATCAAGCAGATGATGTTTCCTGTCCACTTCTAAATTCTTGTCTTAGT +GAAAGTCCTGTTGTTCTACAATGTACACATGTAACACCACAAAGAGATAAGTCAGTGGTA +TGTGGGAGTTTGTTTCATACACCAAAGTTTGTGAAGGGTCGTCAGACACCAAAACATATT +TCTGAAAGTCTAGGAGCTGAGGTGGATCCTGATATGTCTTGGTCAAGTTCTTTAGCTACA +CCACCCACCCTTAGTTCTACTGTGCTCATAGTCAGAAATGAAGAAGCATCTGAAACTGTA +TTTCCTCATGATACTACTGCTAATGTGAAAAGCTATTTTTCCAATCATGATGAAAGTCTG +AAGAAAAATGATAGATTTATCGCTTCTGTGACAGACAGTGAAAACACAAATCAAAGAGAA +GCTGCAAGTCATGGATTTGGAAAAACATCAGGGAATTCATTTAAAGTAAATAGCTGCAAA +GACCACATTGGAAAGTCAATGCCAAATGTCCTAGAAGATGAAGTATATGAAACAGTTGTA +GATACCTCTGAAGAAGATAGTTTTTCATTATGTTTTTCTAAATGTAGAACAAAAAATCTA +CAAAAAGTAAGAACTAGCAAGACTAGGAAAAAAATTTTCCATGAAGCAAACGCTGATGAA +TGTGAAAAATCTAAAAACCAAGTGAAAGAAAAATACTCATTTGTATCTGAAGTGGAACCA +AATGATACTGATCCATTAGATTCAAATGTAGCAAATCAGAAGCCCTTTGAGAGTGGAAGT +GACAAAATCTCCAAGGAAGTTGTACCGTCTTTGGCCTGTGAATGGTCTCAACTAACCCTT +TCAGGTCTAAATGGAGCCCAGATGGAGAAAATACCCCTATTGCATATTTCTTCATGTGAC +CAAAATATTTCAGAAAAAGACCTATTAGACACAGAGAACAAAAGAAAGAAAGATTTTCTT +ACTTCAGAGAATTCTTTGCCACGTATTTCTAGCCTACCAAAATCAGAGAAGCCATTAAAT +GAGGAAACAGTGGTAAATAAGAGAGATGAAGAGCAGCATCTTGAATCTCATACAGACTGC +ATTCTTGCAGTAAAGCAGGCAATATCTGGAACTTCTCCAGTGGCTTCTTCATTTCAGGGT +ATCAAAAAGTCTATATTCAGAATAAGAGAATCACCTAAAGAGACTTTCAATGCAAGTTTT +TCAGGTCATATGACTGATCCAAACTTTAAAAAAGAAACTGAAGCCTCTGAAAGTGGACTG +GAAATACATACTGTTTGCTCACAGAAGGAGGACTCCTTATGTCCAAATTTAATTGATAAT +GGAAGCTGGCCAGCCACCACCACACAGAATTCTGTAGCTTTGAAGAATGCAGGTTTAATA +TCCACTTTGAAAAAGAAAACAAATAAGTTTATTTATGCTATACATGATGAAACATCTTAT +AAAGGAAAAAAAATACCGAAAGACCAAAAATCAGAACTAATTAACTGTTCAGCCCAGTTT +GAAGCAAATGCTTTTGAAGCACCACTTACATTTGCAAATGCTGATTCAGGTTTATTGCAT +TCTTCTGTGAAAAGAAGCTGTTCACAGAATGATTCTGAAGAACCAACTTTGTCCTTAACT +AGCTCTTTTGGGACAATTCTGAGGAAATGTTCTAGAAATGAAACATGTTCTAATAATACA +GTAATCTCTCAGGATCTTGATTATAAAGAAGCAAAATGTAATAAGGAAAAACTACAGTTA +TTTATTACCCCAGAAGCTGATTCTCTGTCATGCCTGCAGGAAGGACAGTGTGAAAATGAT +CCAAAAAGCAAAAAAGTTTCAGATATAAAAGAAGAGGTCTTGGCTGCAGCATGTCACCCA +GTACAACATTCAAAAGTGGAATACAGTGATACTGACTTTCAATCCCAGAAAAGTCTTTTA +TATGATCATGAAAATGCCAGCACTCTTATTTTAACTCCTACTTCCAAGGATGTTCTGTCA +AACCTAGTCATGATTTCTAGAGGCAAAGAATCATACAAAATGTCAGACAAGCTCAAAGGT +AACAATTATGAATCTGATGTTGAATTAACCAAAAATATTCCCATGGAAAAGAATCAAGAT +GTATGTGCTTTAAATGAAAATTATAAAAACGTTGAGCTGTTGCCACCTGAAAAATACATG +AGAGTAGCATCACCTTCAAGAAAGGTACAATTCAACCAAAACACAAATCTAAGAGTAATC +CAAAAAAATCAAGAAGAAACTACTTCAATTTCAAAAATAACTGTCAATCCAGACTCTGAA +GAACTTTTCTCAGACAATGAGAATAATTTTGTCTTCCAAGTAGCTAATGAAAGGAATAAT +CTTGCTTTAGGAAATACTAAGGAACTTCATGAAACAGACTTGACTTGTGTAAACGAACCC +ATTTTCAAGAACTCTACCATGGTTTTATATGGAGACACAGGTGATAAACAAGCAACCCAA +GTGTCAATTAAAAAAGATTTGGTTTATGTTCTTGCAGAGGAGAACAAAAATAGTGTAAAG +CAGCATATAAAAATGACTCTAGGTCAAGATTTAAAATCGGACATCTCCTTGAATATAGAT +AAAATACCAGAAAAAAATAATGATTACATGAACAAATGGGCAGGACTCTTAGGTCCAATT +TCAAATCACAGTTTTGGAGGTAGCTTCAGAACAGCTTCAAATAAGGAAATCAAGCTCTCT +GAACATAACATTAAGAAGAGCAAAATGTTCTTCAAAGATATTGAAGAACAATATCCTACT +AGTTTAGCTTGTGTTGAAATTGTAAATACCTTGGCATTAGATAATCAAAAGAAACTGAGC +AAGCCTCAGTCAATTAATACTGTATCTGCACATTTACAGAGTAGTGTAGTTGTTTCTGAT +TGTAAAAATAGTCATATAACCCCTCAGATGTTATTTTCCAAGCAGGATTTTAATTCAAAC +CATAATTTAACACCTAGCCAAAAGGCAGAAATTACAGAACTTTCTACTATATTAGAAGAA +TCAGGAAGTCAGTTTGAATTTACTCAGTTTAGAAAACCAAGCTACATATTGCAGAAGAGT +ACATTTGAAGTGCCTGAAAACCAGATGACTATCTTAAAGACCACTTCTGAGGAATGCAGA +GATGCTGATCTTCATGTCATAATGAATGCCCCATCGATTGGTCAGGTAGACAGCAGCAAG +CAATTTGAAGGTACAGTTGAAATTAAACGGAAGTTTGCTGGCCTGTTGAAAAATGACTGT +AACAAAAGTGCTTCTGGTTATTTAACAGATGAAAATGAAGTGGGGTTTAGGGGCTTTTAT +TCTGCTCATGGCACAAAACTGAATGTTTCTACTGAAGCTCTGCAAAAAGCTGTGAAACTG +TTTAGTGATATTGAGAATATTAGTGAGGAAACTTCTGCAGAGGTACATCCAATAAGTTTA +TCTTCAAGTAAATGTCATGATTCTGTTGTTTCAATGTTTAAGATAGAAAATCATAATGAT +AAAACTGTAAGTGAAAAAAATAATAAATGCCAACTGATATTACAAAATAATATTGAAATG +ACTACTGGCACTTTTGTTGAAGAAATTACTGAAAATTACAAGAGAAATACTGAAAATGAA +GATAACAAATATACTGCTGCCAGTAGAAATTCTCATAACTTAGAATTTGATGGCAGTGAT +TCAAGTAAAAATGATACTGTTTGTATTCATAAAGATGAAACGGACTTGCTATTTACTGAT +CAGCACAACATATGTCTTAAATTATCTGGCCAGTTTATGAAGGAGGGAAACACTCAGATT +AAAGAAGATTTGTCAGATTTAACTTTTTTGGAAGTTGCGAAAGCTCAAGAAGCATGTCAT +GGTAATACTTCAAATAAAGAACAGTTAACTGCTACTAAAACGGAGCAAAATATAAAAGAT +TTTGAGACTTCTGATACATTTTTTCAGACTGCAAGTGGGAAAAATATTAGTGTCGCCAAA +GAGTCATTTAATAAAATTGTAAATTTCTTTGATCAGAAACCAGAAGAATTGCATAACTTT +TCCTTAAATTCTGAATTACATTCTGACATAAGAAAGAACAAAATGGACATTCTAAGTTAT +GAGGAAACAGACATAGTTAAACACAAAATACTGAAAGAAAGTGTCCCAGTTGGTACTGGA +AATCAACTAGTGACCTTCCAGGGACAACCCGAACGTGATGAAAAGATCAAAGAACCTACT +CTGTTGGGTTTTCATACAGCTAGCGGGAAAAAAGTTAAAATTGCAAAGGAATCTTTGGAC +AAAGTGAAAAACCTTTTTGATGAAAAAGAGCAAGGTACTAGTGAAATCACCAGTTTTAGC +CATCAATGGGCAAAGACCCTAAAGTACAGAGAGGCCTGTAAAGACCTTGAATTAGCATGT +GAGACCATTGAGATCACAGCTGCCCCAAAGTGTAAAGAAATGCAGAATTCTCTCAATAAT +GATAAAAACCTTGTTTCTATTGAGACTGTGGTGCCACCTAAGCTCTTAAGTGATAATTTA +TGTAGACAAACTGAAAATCTCAAAACATCAAAAAGTATCTTTTTGAAAGTTAAAGTACAT +GAAAATGTAGAAAAAGAAACAGCAAAAAGTCCTGCAACTTGTTACACAAATCAGTCCCCT +TATTCAGTCATTGAAAATTCAGCCTTAGCTTTTTACACAAGTTGTAGTAGAAAAACTTCT +GTGAGTCAGACTTCATTACTTGAAGCAAAAAAATGGCTTAGAGAAGGAATATTTGATGGT +CAACCAGAAAGAATAAATACTGCAGATTATGTAGGAAATTATTTGTATGAAAATAATTCA +AACAGTACTATAGCTGAAAATGACAAAAATCATCTCTCCGAAAAACAAGATACTTATTTA +AGTAACAGTAGCATGTCTAACAGCTATTCCTACCATTCTGATGAGGTATATAATGATTCA +GGATATCTCTCAAAAAATAAACTTGATTCTGGTATTGAGCCAGTATTGAAGAATGTTGAA +GATCAAAAAAACACTAGTTTTTCCAAAGTAATATCCAATGTAAAAGATGCAAATGCATAC +CCACAAACTGTAAATGAAGATATTTGCGTTGAGGAACTTGTGACTAGCTCTTCACCCTGC +AAAAATAAAAATGCAGCCATTAAATTGTCCATATCTAATAGTAATAATTTTGAGGTAGGG +CCACCTGCATTTAGGATAGCCAGTGGTAAAATCGTTTGTGTTTCACATGAAACAATTAAA +AAAGTGAAAGACATATTTACAGACAGTTTCAGTAAAGTAATTAAGGAAAACAACGAGAAT +AAATCAAAAATTTGCCAAACGAAAATTATGGCAGGTTGTTACGAGGCATTGGATGATTCA +GAGGATATTCTTCATAACTCTCTAGATAATGATGAATGTAGCACGCATTCACATAAGGTT +TTTGCTGACATTCAGAGTGAAGAAATTTTACAACATAACCAAAATATGTCTGGATTGGAG +AAAGTTTCTAAAATATCACCTTGTGATGTTAGTTTGGAAACTTCAGATATATGTAAATGT +AGTATAGGGAAGCTTCATAAGTCAGTCTCATCTGCAAATACTTGTGGGATTTTTAGCACA +GCAAGTGGAAAATCTGTCCAGGTATCAGATGCTTCATTACAAAACGCAAGACAAGTGTTT +TCTGAAATAGAAGATAGTACCAAGCAAGTCTTTTCCAAAGTATTGTTTAAAAGTAACGAA +CATTCAGACCAGCTCACAAGAGAAGAAAATACTGCTATACGTACTCCAGAACATTTAATA +TCCCAAAAAGGCTTTTCATATAATGTGGTAAATTCATCTGCTTTCTCTGGATTTAGTACA +GCAAGTGGAAAGCAAGTTTCCATTTTAGAAAGTTCCTTACACAAAGTTAAGGGAGTGTTA +GAGGAATTTGATTTAATCAGAACTGAGCATAGTCTTCACTATTCACCTACGTCTAGACAA +AATGTATCAAAAATACTTCCTCGTGTTGATAAGAGAAACCCAGAGCACTGTGTAAACTCA +GAAATGGAAAAAACCTGCAGTAAAGAATTTAAATTATCAAATAACTTAAATGTTGAAGGT +GGTTCTTCAGAAAATAATCACTCTATTAAAGTTTCTCCATATCTCTCTCAATTTCAACAA +GACAAACAACAGTTGGTATTAGGAACCAAAGTCTCACTTGTTGAGAACATTCATGTTTTG +GGAAAAGAACAGGCTTCACCTAAAAACGTAAAAATGGAAATTGGTAAAACTGAAACTTTT +TCTGATGTTCCTGTGAAAACAAATATAGAAGTTTGTTCTACTTACTCCAAAGATTCAGAA +AACTACTTTGAAACAGAAGCAGTAGAAATTGCTAAAGCTTTTATGGAAGATGATGAACTG +ACAGATTCTAAACTGCCAAGTCATGCCACACATTCTCTTTTTACATGTCCCGAAAATGAG +GAAATGGTTTTGTCAAATTCAAGAATTGGAAAAAGAAGAGGAGAGCCCCTTATCTTAGTG +GGAGAACCCTCAATCAAAAGAAACTTATTAAATGAATTTGACAGGATAATAGAAAATCAA +GAAAAATCCTTAAAGGCTTCAAAAAGCACTCCAGATGGCACAATAAAAGATCGAAGATTG +TTTATGCATCATGTTTCTTTAGAGCCGATTACCTGTGTACCCTTTCGCACAACTAAGGAA +CGTCAAGAGATACAGAATCCAAATTTTACCGCACCTGGTCAAGAATTTCTGTCTAAATCT +CATTTGTATGAACATCTGACTTTGGAAAAATCTTCAAGCAATTTAGCAGTTTCAGGACAT +CCATTTTATCAAGTTTCTGCTACAAGAAATGAAAAAATGAGACACTTGATTACTACAGGC +AGACCAACCAAAGTCTTTGTTCCACCTTTTAAAACTAAATCACATTTTCACAGAGTTGAA +CAGTGTGTTAGGAATATTAACTTGGAGGAAAACAGACAAAAGCAAAACATTGATGGACAT +GGCTCTGATGATAGTAAAAATAAGATTAATGACAATGAGATTCATCAGTTTAACAAAAAC +AACTCCAATCAAGCAGCAGCTGTAACTTTCACAAAGTGTGAAGAAGAACCTTTAGATTTA +ATTACAAGTCTTCAGAATGCCAGAGATATACAGGATATGCGAATTAAGAAGAAACAAAGG +CAACGCGTCTTTCCACAGCCAGGCAGTCTGTATCTTGCAAAAACATCCACTCTGCCTCGA +ATCTCTCTGAAAGCAGCAGTAGGAGGCCAAGTTCCCTCTGCGTGTTCTCATAAACAGCTG +TATACGTATGGCGTTTCTAAACATTGCATAAAAATTAACAGCAAAAATGCAGAGTCTTTT +CAGTTTCACACTGAAGATTATTTTGGTAAGGAAAGTTTATGGACTGGAAAAGGAATACAG +TTGGCTGATGGTGGATGGCTCATACCCTCCAATGATGGAAAGGCTGGAAAAGAAGAATTT +TATAGGGCTCTGTGTGACACTCCAGGTGTGGATCCAAAGCTTATTTCTAGAATTTGGGTT +TATAATCACTATAGATGGATCATATGGAAACTGGCAGCTATGGAATGTGCCTTTCCTAAG +GAATTTGCTAATAGATGCCTAAGCCCAGAAAGGGTGCTTCTTCAACTAAAATACAGATAT +GATACGGAAATTGATAGAAGCAGAAGATCGGCTATAAAAAAGATAATGGAAAGGGATGAC +ACAGCTGCAAAAACACTTGTTCTCTGTGTTTCTGACATAATTTCATTGAGCGCAAATATA +TCTGAAACTTCTAGCAATAAAACTAGTAGTGCAGATACCCAAAAAGTGGCCATTATTGAA +CTTACAGATGGGTGGTATGCTGTTAAGGCCCAGTTAGATCCTCCCCTCTTAGCTGTCTTA +AAGAATGGCAGACTGACAGTTGGTCAGAAGATTATTCTTCATGGAGCAGAACTGGTGGGC +TCTCCTGATGCCTGTACACCTCTTGAAGCCCCAGAATCTCTTATGTTAAAGATTTCTGCT +AACAGTACTCGGCCTGCTCGCTGGTATACCAAACTTGGATTCTTTCCTGACCCTAGACCT +TTTCCTCTGCCCTTATCATCGCTTTTCAGTGATGGAGGAAATGTTGGTTGTGTTGATGTA +ATTATTCAAAGAGCATACCCTATACAGTGGATGGAGAAGACATCATCTGGATTATACATA +TTTCGCAATGAAAGAGAGGAAGAAAAGGAAGCAGCAAAATATGTGGAGGCCCAACAAAAG +AGACTAGAAGCCTTATTCACTAAAATTCAGGAGGAATTTGAAGAACATGAAGAAAACACA +ACAAAACCATATTTACCATCACGTGCACTAACAAGACAGCAAGTTCGTGCTTTGCAAGAT +GGTGCAGAGCTTTATGAAGCAGTGAAGAATGCAGCAGACCCAGCTTACCTTGAGGGTTAT +TTCAGTGAAGAGCAGTTAAGAGCCTTGAATAATCACAGGCAAATGTTGAATGATAAGAAA +CAAGCTCAGATCCAGTTGGAAATTAGGAAGGCCATGGAATCTGCTGAACAAAAGGAACAA +GGTTTATCAAGGGATGTCACAACCGTGTGGAAGTTGCGTATTGTAAGCTATTCAAAAAAA +GAAAAAGATTCAGTTATACTGAGTATTTGGCGTCCATCATCAGATTTATATTCTCTGTTA +ACAGAAGGAAAGAGATACAGAATTTATCATCTTGCAACTTCAAAATCTAAAAGTAAATCT +GAAAGAGCTAACATACAGTTAGCAGCGACAAAAAAAACTCAGTATCAACAACTACCGGTT +TCAGATGAAATTTTATTTCAGATTTACCAGCCACGGGAGCCCCTTCACTTCAGCAAATTT +TTAGATCCAGACTTTCAGCCATCTTGTTCTGAGGTGGACCTAATAGGATTTGTCGTTTCT +GTTGTGAAAAAAACAGGACTTGCCCCTTTCGTCTATTTGTCAGACGAATGTTACAATTTA +CTGGCAATAAAGTTTTGGATAGACCTTAATGAGGACATTATTAAGCCTCATATGTTAATT +GCTGCAAGCAACCTCCAGTGGCGACCAGAATCCAAATCAGGCCTTCTTACTTTATTTGCT +GGAGATTTTTCTGTGTTTTCTGCTAGTCCAAAAGAGGGCCACTTTCAAGAGACATTCAAC +AAAATGAAAAATACTGTTGAGAATATTGACATACTTTGCAATGAAGCAGAAAACAAGCTT +ATGCATATACTGCATGCAAATGATCCCAAGTGGTCCACCCCAACTAAAGACTGTACTTCA +GGGCCGTACACTGCTCAAATCATTCCTGGTACAGGAAACAAGCTTCTGATGTCTTCTCCT +AATTGTGAGATATATTATCAAAGTCCTTTATCACTTTGTATGGCCAAAAGGAAGTCTGTT +TCCACACCTGTCTCAGCCCAGATGACTTCAAAGTCTTGTAAAGGGGAGAAAGAGATTGAT +GACCAAAAGAACTGCAAAAAGAGAAGAGCCTTGGATTTCTTGAGTAGACTGCCTTTACCT +CCACCTGTTAGTCCCATTTGTACATTTGTTTCTCCGGCTGCACAGAAGGCATTTCAGCCA +CCAAGGAGTTGTGGCACCAAATACGAAACACCCATAAAGAAAAAAGAACTGAATTCTCCT +CAGATGACTCCATTTAAAAAATTCAATGAAATTTCTCTTTTGGAAAGTAATTCAATAGCT +GACGAAGAACTTGCATTGATAAATACCCAAGCTCTTTTGTCTGGTTCAACAGGAGAAAAA +CAATTTATATCTGTCAGTGAATCCACTAGGACTGCTCCCACCAGTTCAGAAGATTATCTC +AGACTGAAACGACGTTGTACTACATCTCTGATCAAAGAACAGGAGAGTTCCCAGGCCAGT +ACGGAAGAATGTGAGAAAAATAAGCAGGACACAATTACAACTAAAAAATATATCTAA +>Gene02 +ATGCTGCTGCTGGCGAGATGTCTGCTGCTAGTCCTCGTCTCCTCGCTGCTGGTATGCTCGGGACTGGCGT +GCGGACCGGGCAGGGGGTTCGGGAAGAGGAGGCACCCCAAAAAGCTGACCCCTTTAGCCTACAAGCAGTT +TATCCCCAATGTGGCCGAGAAGACCCTAGGCGCCAGCGGAAGGTATGAAGGGAAGATCTCCAGAAACTCC +GAGCGATTTAAGGAACTCACCCCCAATTACAACCCCGACATCATATTTAAGGATGAAGAAAACACCGGAG +CGGACAGGCTGATGACTCAGAGGTGTAAGGACAAGTTGAACGCTTTGGCCATCTCGGTGATGAACCAGTG +GCCAGGAGTGAAACTGCGGGTGACCGAGGGCTGGGACGAAGATGGCCACCACTCAGAGGAGTCTCTGCAC +TACGAGGGCCGCGCAGTGGACATCACCACGTCTGACCGCGACCGCAGCAAGTACGGCATGCTGGCCCGCC +TGGCGGTGGAGGCCGGCTTCGACTGGGTGTACTACGAGTCCAAGGCACATATCCACTGCTCGGTGAAAGC +AGAGAACTCGGTGGCGGCCAAATCGGGAGGCTGCTTCCCGGGCTCGGCCACGGTGCACCTGGAGCAGGGC +GGCACCAAGCTGGTGAAGGACCTGAGCCCCGGGGACCGCGTGCTGGCGGCGGACGACCAGGGCCGGCTGC +TCTACAGCGACTTCCTCACTTTCCTGGACCGCGACGACGGCGCCAAGAAGGTCTTCTACGTGATCGAGAC +GCGGGAGCCGCGCGAGCGCCTGCTGCTCACCGCCGCGCACCTGCTCTTTGTGGCGCCGCACAACGACTCG +GCCACCGGGGAGCCCGAGGCGTCCTCGGGCTCGGGGCCGCCTTCCGGGGGCGCACTGGGGCCTCGGGCGC +TGTTCGCCAGCCGCGTGCGCCCGGGCCAGCGCGTGTACGTGGTGGCCGAGCGTGACGGGGACCGCCGGCT +CCTGCCCGCCGCTGTGCACAGCGTGACCCTAAGCGAGGAGGCCGCGGGCGCCTACGCGCCGCTCACGGCC +CAGGGCACCATTCTCATCAACCGGGTGCTGGCCTCGTGCTACGCGGTCATCGAGGAGCACAGCTGGGCGC +ACCGGGCCTTCGCGCCCTTCCGCCTGGCGCACGCGCTCCTGGCTGCACTGGCGCCCGCGCGCACGGACCG +CGGCGGGGACAGCGGCGGCGGGGACCGCGGGGGCGGCGGCGGCAGAGTAGCCCTAACCGCTCCAGGTGCT +GCCGACGCTCCGGGTGCGGGGGCCACCGCGGGCATCCACTGGTACTCGCAGCTGCTCTACCAAATAGGCA +CCTGGCTCCTGGACAGCGAGGCCCTGCACCCGCTGGGCATGGCGGTCAAGTCCAGCTGA +>Gene03 +ATGGAGAAAAAGGGTTTGGTGAGCAACAGCTTGAAAGATCATGCTGGTATTGTCACCAGA +GCCATTAGGCGCAGCAAGACTACCTATGAAGAAGACGATTACAATTTTGATGGGTTTGCA +TGGCCTCCAAGATCTTACACGTGTAGCTTCTGCAAAAGGGAATTTAGATCTGCTCAAGCT +CTTGGCGGCCACATGAATGTTCACCGAAAAGACCGAGCCAGGCTCAAAGGCTCACCCCCA +AGAGACAGTCAATACACGAGTACTATTCTTAACCTTAACCTCAACAAAGTGCCAAACCCT +AACCCTAATTTCTCATCAACATCATCAGCATCCTCACCATCCTCACCATCCTCCTGGATA +TCACCAATTAGTAGCACATTACCCTCTTTGATATCACCACCAGCTCCTCCACCTGTTTTT +TTGGTGCCGTCTAGCGAAAATATGAAATGGGTTGTGGGAGACACCCTCTTTAATCACCCT +CTAAACTTCAAAGCCTCGGATTTTGGTACTACAGTAAAGAAGAACGCAGAATCTTTTTGT +GGAGTAGGAGATCGTCAATGTGATGGTTTCATTGGAGAAGAACATGGGTGCATAAAGACT +GTCAAGGCAGCTGATCATCCTCATCATCCGATTGTTAGGTTGGACTTGGAGATCGGTATG +CTTGGTGACTCAAATAAGGAAGACTTAGATTTGGAACTTCGATTGGGATACTCTTAG diff --git a/tests/data/mamut.descriptions.txt b/tests/data/mamut.descriptions.txt new file mode 100644 index 0000000..191d6d4 --- /dev/null +++ b/tests/data/mamut.descriptions.txt @@ -0,0 +1,3 @@ +Gene01 Breast Cancer 2 gene, fingers crossed you've got good alleles for this one! +Gene02 SonicHedgehog, the gene not the adorable blue video game mascot +Gene03 Superman class transcription factor \ No newline at end of file diff --git a/tests/data/mamut.xref.txt b/tests/data/mamut.xref.txt new file mode 100644 index 0000000..9c4286a --- /dev/null +++ b/tests/data/mamut.xref.txt @@ -0,0 +1,7 @@ +Gene01 BRCA2 Ensembl http://www.ensembl.org/Homo_sapiens/Gene/Summary?g=ENSG00000139618;r=13:32315474-32400266 +Gene02 SonicHedgehog Ensembl http://www.ensembl.org/Homo_sapiens/Gene/Summary?g=ENSG00000164690;r=7:155799986-155812273 +Gene03 Superman Plaza http://bioinformatics.psb.ugent.be/plaza/versions/plaza_v3_dicots/genes/view/MD13G022660 +Gene01 BRCA2 token +Gene02 SHH token +Gene02 SonicHedgehog token +Gene03 Superman token \ No newline at end of file diff --git a/tests/data/test_go.obo b/tests/data/test_go.obo new file mode 100644 index 0000000..39617d5 --- /dev/null +++ b/tests/data/test_go.obo @@ -0,0 +1,50 @@ +format-version: 1.2 +data-version: releases/2014-10-14 +date: 13:10:2014 08:33 +saved-by: kmv +auto-generated-by: TermGenie 1.0 +subsetdef: Cross_product_review "Involved_in" +subsetdef: goantislim_grouping "Grouping classes that can be excluded" +subsetdef: gocheck_do_not_annotate "Term not to be used for direct annotation" +subsetdef: gocheck_do_not_manually_annotate "Term not to be used for direct manual annotation" +subsetdef: goslim_aspergillus "Aspergillus GO slim" +subsetdef: goslim_candida "Candida GO slim" +subsetdef: goslim_generic "Generic GO slim" +subsetdef: goslim_goa "GOA and proteome slim" +subsetdef: goslim_metagenomics "Metagenomics GO slim" +subsetdef: goslim_pir "PIR GO slim" +subsetdef: goslim_plant "Plant GO slim" +subsetdef: goslim_pombe "Fission yeast GO slim" +subsetdef: goslim_virus "Viral GO slim" +subsetdef: goslim_yeast "Yeast GO slim" +subsetdef: gosubset_prok "Prokaryotic GO subset" +subsetdef: mf_needs_review "Catalytic activity terms in need of attention" +subsetdef: termgenie_unvetted "Terms created by TermGenie that do not follow a template and require additional vetting by editors" +subsetdef: virus_checked "Viral overhaul terms" +synonymtypedef: systematic_synonym "Systematic synonym" EXACT +default-namespace: gene_ontology +remark: cvs version: $Revision: 21018 $ +remark: Includes Ontology(OntologyID(OntologyIRI())) [Axioms: 18 Logical Axioms: 0] +ontology: go +property_value: propformat-version "1.2" xsd:string + +[Term] +id: GO:0000001 +name: top_label +namespace: biological_process +def: "Top GO LABEL" +synonym: "top_level" EXACT [] + +[Term] +id: GO:0000002 +name: second_label +namespace: biological_process +def: "Second label" +is_a: GO:0000001 ! top_label + +[Term] +id: GO:0000003 +name: third_label +namespace: biological_process +def: "Third label" +is_a: GO:0000001 ! top_label diff --git a/utils/entropy.py b/utils/entropy.py index c23d756..39d1450 100644 --- a/utils/entropy.py +++ b/utils/entropy.py @@ -1,6 +1,7 @@ from math import log2 from bisect import bisect + def entropy(dist): """ Calculates the entropy for a given distribution (!)