Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
importing mcl families supported
testing gene families + counts
  • Loading branch information
proost committed Dec 1, 2016
1 parent eb387ba commit ad7c3cc
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 12 deletions.
3 changes: 3 additions & 0 deletions planet/controllers/admin/controls.py
Expand Up @@ -384,6 +384,9 @@ def add_family():
if source == 'plaza':
GeneFamily.add_families_from_plaza(temp_path, method_description)
flash('Added Gene families from file %s' % form.file.name, 'success')
elif source == 'mcl':
GeneFamily.add_families_from_mcl(temp_path, method_description)
flash('Added Gene families from file %s' % form.file.name, 'success')
else:
flash('Method not implemented yet', 'danger')
os.close(fd)
Expand Down
64 changes: 64 additions & 0 deletions planet/models/gene_families.py
Expand Up @@ -41,6 +41,19 @@ def update_count():
db.session.rollback()
print(e)

@staticmethod
def add(description):
new_method = GeneFamilyMethod(description)

try:
db.session.add(new_method)
db.session.commit()
except Exception as e:
db.session.rollback()
raise e

return new_method


class GeneFamily(db.Model):
__tablename__ = 'gene_families'
Expand Down Expand Up @@ -104,8 +117,59 @@ def ecc_associations(self):

return output

@staticmethod
def add_families_from_mcl(filename, description, handle_isoforms=True, prefix='mcl'):
"""
Add gene families directly from MCL output (one line with all genes from one family)
:param filename: The file to load
:param description: Description of the method to store in the database
:param handle_isoforms: should isofroms (indicated by .1 at the end) be handled
:return the new methods internal ID
"""
# Create new method for these families
method = GeneFamilyMethod.add(description)

gene_hash = {}
all_sequences = Sequence.query.all()

for sequence in all_sequences:
gene_hash[sequence.name.lower()] = sequence

if handle_isoforms:
gene_id = re.sub('\.\d+$', '', sequence.name.lower())
gene_hash[gene_id] = sequence

with open(filename, "r") as f_in:
for i, line in enumerate(f_in, start=1):
parts = line.strip().split()

new_family = GeneFamily('%s_%02d_%08d' % (prefix, method.id, i))
new_family.method_id = method.id

for p in parts:
if p.lower() in gene_hash.keys():
new_family.sequences.append(gene_hash[p.lower()])

try:
db.session.add(new_family)
db.session.commit()
except Exception as e:
db.session.rollback()
quit()

return method.id

@staticmethod
def add_families_from_tab(filename, description, handle_isoforms=True):
"""
DEPRICATED IMPORT FROM MCL
:param filename:
:param description:
:param handle_isoforms:
:return:
"""

# Create new method for these families
method = GeneFamilyMethod(description)
Expand Down
51 changes: 39 additions & 12 deletions tests/build.py
Expand Up @@ -2,12 +2,13 @@
from planet import create_app, db

from flask_testing import TestCase

import json


class BuildTest(TestCase):
"""
TestCase to check if the website is functional
BuildCase to check if the build functions work as planned
* a DB will be created and filled with dummy data
* an app will be spawned with the testing config, DO NOT run this against a database that is in use !!
* the DB will be cleared !
Expand All @@ -28,14 +29,6 @@ def setUp(self):
"""
db.create_all()

def tearDown(self):
"""
Removes test database again, so the next test can start with a clean slate
"""
db.session.remove()
db.drop_all()

def test_build(self):
from planet.models.species import Species
from planet.models.sequences import Sequence

Expand All @@ -45,6 +38,9 @@ def test_build(self):
from planet.models.expression_profiles import ExpressionProfile
from planet.models.expression_networks import ExpressionNetwork, ExpressionNetworkMethod
from planet.models.coexpression_clusters import CoexpressionClusteringMethod
from planet.models.expression_specificity import ExpressionSpecificityMethod
from planet.models.gene_families import GeneFamily, GeneFamilyMethod
from planet.models.clades import Clade

Species.add('mmu', 'Marek mutwiliana')
s = Species.query.first()
Expand Down Expand Up @@ -73,7 +69,29 @@ def test_build(self):
test_network.id,
min_size=1)

test_sequences = Sequence.query.all()
ExpressionSpecificityMethod.calculate_specificities(s.id, s.name + " condition specific profiles", False)

GeneFamily.add_families_from_mcl('./tests/data/comparative_data/mamut.families.mcl.txt', 'Fake Families')

GeneFamilyMethod.update_count()

Clade.add_clades_from_json({"Marek mutwiliana": {"species": ["mmu"], "tree": None}})
Clade.update_clades()
Clade.update_clades_interpro()

def tearDown(self):
"""
Removes test database again, so the next test can start with a clean slate
"""
db.session.remove()
db.drop_all()

def test_build(self):
from planet.models.sequences import Sequence
from planet.models.species import Species

s = Species.query.first()

test_sequence = Sequence.query.filter_by(name='Gene01').first()

test_xref = test_sequence.xrefs[0]
Expand All @@ -88,9 +106,12 @@ def test_build(self):
test_network_nodes = test_sequence.network_nodes.first()
test_network_data = json.loads(test_network_nodes.network)

test_cluster = test_sequence.coexpression_clusters.first() #TODO make test for this
test_cluster = test_sequence.coexpression_clusters.first()
cluster_sequence = test_cluster.sequences.filter_by(name='Gene01').first()

test_family = test_sequence.families.first()

self.assertEqual(len(test_sequences), 3) # Check if all genes are added
self.assertEqual(len(s.sequences.all()), 3) # Check if all genes are added

self.assertEqual(test_sequence.name, 'Gene01')
self.assertEqual(test_sequence.species_id, s.id)
Expand Down Expand Up @@ -119,5 +140,11 @@ def test_build(self):
self.assertEqual(test_network_data[0]["link_pcc"], 0.71) # Check if network contains required fields
self.assertEqual(test_network_data[0]["link_score"], 0) # Check if network contains required fields

self.assertNotEqual(cluster_sequence, None) # Check if gene is in cluster

self.assertEqual(test_profile.specificities.first().condition, 'Tissue 03') # Check if SPM worked
self.assertAlmostEqual(test_profile.specificities.first().score, 0.62, places=2) # Check if SPM score is correct
self.assertAlmostEqual(test_profile.specificities.first().entropy, 1.58, places=2) # Check if entropy is correct
self.assertAlmostEqual(test_profile.specificities.first().tau, 0.11, places=2) # Check if tau is correct

self.assertEqual(len(test_family.sequences.all()), 2) # Check if gene family contains 2 genes
2 changes: 2 additions & 0 deletions tests/data/comparative_data/mamut.families.mcl.txt
@@ -0,0 +1,2 @@
Gene01 Gene02
Gene03

0 comments on commit ad7c3cc

Please sign in to comment.