Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
updated code to find clades for interpro domains ...
  • Loading branch information
proost committed Jun 13, 2017
1 parent 8e1e169 commit 4320104
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 40 deletions.
50 changes: 17 additions & 33 deletions planet/models/clades.py
Expand Up @@ -2,6 +2,8 @@
from planet.models.gene_families import GeneFamily
from planet.models.interpro import Interpro

from utils.phylo import get_clade

import json

SQL_COLLATION = 'NOCASE' if db.engine.name == 'sqlite' else ''
Expand Down Expand Up @@ -57,6 +59,9 @@ def update_clades():
clades = Clade.query.all()
families = GeneFamily.query.all()

clade_to_species = {c.name: json.loads(c.species) for c in clades}
clade_to_id = {c.name: c.id for c in clades}

for f in families:
family_species = f.species_codes

Expand All @@ -66,23 +71,11 @@ def update_clades():
continue

# find the clade with the fewest species that contains all the codes
selected_clade = None
for c in clades:
clade_species = json.loads(c.species)

overlap = set(family_species).intersection(clade_species)

if len(overlap) == len(family_species):
if selected_clade is None:
selected_clade = c
else:
if selected_clade.species_count > c.species_count:
selected_clade = c
selected_clade, _ = get_clade(family_species, clade_to_species)
if selected_clade is None:
f.clade_id = None
else:
if selected_clade is None:
print("An error occurred, no clades found, check the clades in the database!")
else:
f.clade_id = selected_clade.id
f.clade_id = clade_to_id[selected_clade]

try:
db.session.commit()
Expand All @@ -98,6 +91,9 @@ def update_clades_interpro():
clades = Clade.query.all()
interpro= Interpro.query.all()

clade_to_species = {c.name: json.loads(c.species) for c in clades}
clade_to_id = {c.name: c.id for c in clades}

for i in interpro:
interpro_species = i.species_codes

Expand All @@ -107,26 +103,14 @@ def update_clades_interpro():
continue

# find the clade with the fewest species that contains all the codes
selected_clade = None
for c in clades:
clade_species = json.loads(c.species)

overlap = set(interpro_species).intersection(clade_species)

if len(overlap) == len(interpro_species):
if selected_clade is None:
selected_clade = c
else:
if selected_clade.species_count > c.species_count:
selected_clade = c
selected_clade, _ = get_clade(interpro_species, clade_to_species)
if selected_clade is None:
i.clade_id = None
else:
if selected_clade is None:
print("An error occurred, no clades found, check the clades in the database!")
else:
i.clade_id = selected_clade.id
i.clade_id = clade_to_id[selected_clade]

try:
db.session.commit()
except Exception as e:
db.session.rollback()
print(e)
print(e)
11 changes: 4 additions & 7 deletions planet/models/trees.py
Expand Up @@ -25,8 +25,6 @@ class TreeMethod(db.Model):
cascade="all, delete-orphan",
passive_deletes=True)



def reconcile_trees(self):
# Fetch required data from the database
sequences = Sequence.query.all()
Expand All @@ -40,12 +38,11 @@ def reconcile_trees(self):
tree = newick.loads(t.data_newick)[0]

for node in tree.walk():
if not node.is_binary:
print("[%d, %s] Skipping node... Can only reconcile binary nodes ..." % (tree.id, tree.label))
continue

if len(node.descendants) != 2:
# no need to reconcile leaf nodes
if not node.is_binary:
# Print warning in case there is a non-binary node
print("[%d, %s] Skipping node... Can only reconcile binary nodes ..." % (tree.id, tree.label))
# Otherwise it is a leaf node and can be skipped
continue

branch_one_seq = [l.name for l in node.descendants[0].get_leaves()]
Expand Down

0 comments on commit 4320104

Please sign in to comment.