From ad7c3cc84b7ac3b140731af66604cde6d4680262 Mon Sep 17 00:00:00 2001 From: sepro Date: Thu, 1 Dec 2016 16:47:15 +0100 Subject: [PATCH] importing mcl families supported testing gene families + counts --- planet/controllers/admin/controls.py | 3 + planet/models/gene_families.py | 64 +++++++++++++++++++ tests/build.py | 51 +++++++++++---- .../comparative_data/mamut.families.mcl.txt | 2 + 4 files changed, 108 insertions(+), 12 deletions(-) create mode 100644 tests/data/comparative_data/mamut.families.mcl.txt diff --git a/planet/controllers/admin/controls.py b/planet/controllers/admin/controls.py index 0bc4433..cbdf81c 100644 --- a/planet/controllers/admin/controls.py +++ b/planet/controllers/admin/controls.py @@ -384,6 +384,9 @@ def add_family(): if source == 'plaza': GeneFamily.add_families_from_plaza(temp_path, method_description) flash('Added Gene families from file %s' % form.file.name, 'success') + elif source == 'mcl': + GeneFamily.add_families_from_mcl(temp_path, method_description) + flash('Added Gene families from file %s' % form.file.name, 'success') else: flash('Method not implemented yet', 'danger') os.close(fd) diff --git a/planet/models/gene_families.py b/planet/models/gene_families.py index b37c3ad..8a680e7 100644 --- a/planet/models/gene_families.py +++ b/planet/models/gene_families.py @@ -41,6 +41,19 @@ def update_count(): db.session.rollback() print(e) + @staticmethod + def add(description): + new_method = GeneFamilyMethod(description) + + try: + db.session.add(new_method) + db.session.commit() + except Exception as e: + db.session.rollback() + raise e + + return new_method + class GeneFamily(db.Model): __tablename__ = 'gene_families' @@ -104,8 +117,59 @@ def ecc_associations(self): return output + @staticmethod + def add_families_from_mcl(filename, description, handle_isoforms=True, prefix='mcl'): + """ + Add gene families directly from MCL output (one line with all genes from one family) + + :param filename: The file to load + :param description: Description of the method to store in the database + :param handle_isoforms: should isofroms (indicated by .1 at the end) be handled + :return the new methods internal ID + """ + # Create new method for these families + method = GeneFamilyMethod.add(description) + + gene_hash = {} + all_sequences = Sequence.query.all() + + for sequence in all_sequences: + gene_hash[sequence.name.lower()] = sequence + + if handle_isoforms: + gene_id = re.sub('\.\d+$', '', sequence.name.lower()) + gene_hash[gene_id] = sequence + + with open(filename, "r") as f_in: + for i, line in enumerate(f_in, start=1): + parts = line.strip().split() + + new_family = GeneFamily('%s_%02d_%08d' % (prefix, method.id, i)) + new_family.method_id = method.id + + for p in parts: + if p.lower() in gene_hash.keys(): + new_family.sequences.append(gene_hash[p.lower()]) + + try: + db.session.add(new_family) + db.session.commit() + except Exception as e: + db.session.rollback() + quit() + + return method.id + @staticmethod def add_families_from_tab(filename, description, handle_isoforms=True): + """ + DEPRICATED IMPORT FROM MCL + + :param filename: + :param description: + :param handle_isoforms: + :return: + """ # Create new method for these families method = GeneFamilyMethod(description) diff --git a/tests/build.py b/tests/build.py index 2fa361a..11091d9 100644 --- a/tests/build.py +++ b/tests/build.py @@ -2,12 +2,13 @@ from planet import create_app, db from flask_testing import TestCase + import json class BuildTest(TestCase): """ - TestCase to check if the website is functional + BuildCase to check if the build functions work as planned * a DB will be created and filled with dummy data * an app will be spawned with the testing config, DO NOT run this against a database that is in use !! * the DB will be cleared ! @@ -28,14 +29,6 @@ def setUp(self): """ db.create_all() - def tearDown(self): - """ - Removes test database again, so the next test can start with a clean slate - """ - db.session.remove() - db.drop_all() - - def test_build(self): from planet.models.species import Species from planet.models.sequences import Sequence @@ -45,6 +38,9 @@ def test_build(self): from planet.models.expression_profiles import ExpressionProfile from planet.models.expression_networks import ExpressionNetwork, ExpressionNetworkMethod from planet.models.coexpression_clusters import CoexpressionClusteringMethod + from planet.models.expression_specificity import ExpressionSpecificityMethod + from planet.models.gene_families import GeneFamily, GeneFamilyMethod + from planet.models.clades import Clade Species.add('mmu', 'Marek mutwiliana') s = Species.query.first() @@ -73,7 +69,29 @@ def test_build(self): test_network.id, min_size=1) - test_sequences = Sequence.query.all() + ExpressionSpecificityMethod.calculate_specificities(s.id, s.name + " condition specific profiles", False) + + GeneFamily.add_families_from_mcl('./tests/data/comparative_data/mamut.families.mcl.txt', 'Fake Families') + + GeneFamilyMethod.update_count() + + Clade.add_clades_from_json({"Marek mutwiliana": {"species": ["mmu"], "tree": None}}) + Clade.update_clades() + Clade.update_clades_interpro() + + def tearDown(self): + """ + Removes test database again, so the next test can start with a clean slate + """ + db.session.remove() + db.drop_all() + + def test_build(self): + from planet.models.sequences import Sequence + from planet.models.species import Species + + s = Species.query.first() + test_sequence = Sequence.query.filter_by(name='Gene01').first() test_xref = test_sequence.xrefs[0] @@ -88,9 +106,12 @@ def test_build(self): test_network_nodes = test_sequence.network_nodes.first() test_network_data = json.loads(test_network_nodes.network) - test_cluster = test_sequence.coexpression_clusters.first() #TODO make test for this + test_cluster = test_sequence.coexpression_clusters.first() + cluster_sequence = test_cluster.sequences.filter_by(name='Gene01').first() + + test_family = test_sequence.families.first() - self.assertEqual(len(test_sequences), 3) # Check if all genes are added + self.assertEqual(len(s.sequences.all()), 3) # Check if all genes are added self.assertEqual(test_sequence.name, 'Gene01') self.assertEqual(test_sequence.species_id, s.id) @@ -119,5 +140,11 @@ def test_build(self): self.assertEqual(test_network_data[0]["link_pcc"], 0.71) # Check if network contains required fields self.assertEqual(test_network_data[0]["link_score"], 0) # Check if network contains required fields + self.assertNotEqual(cluster_sequence, None) # Check if gene is in cluster + self.assertEqual(test_profile.specificities.first().condition, 'Tissue 03') # Check if SPM worked + self.assertAlmostEqual(test_profile.specificities.first().score, 0.62, places=2) # Check if SPM score is correct + self.assertAlmostEqual(test_profile.specificities.first().entropy, 1.58, places=2) # Check if entropy is correct + self.assertAlmostEqual(test_profile.specificities.first().tau, 0.11, places=2) # Check if tau is correct + self.assertEqual(len(test_family.sequences.all()), 2) # Check if gene family contains 2 genes diff --git a/tests/data/comparative_data/mamut.families.mcl.txt b/tests/data/comparative_data/mamut.families.mcl.txt new file mode 100644 index 0000000..1fa90a3 --- /dev/null +++ b/tests/data/comparative_data/mamut.families.mcl.txt @@ -0,0 +1,2 @@ +Gene01 Gene02 +Gene03 \ No newline at end of file