Skip to content

Commit

Permalink
put phylo code in utils library
Browse files Browse the repository at this point in the history
  • Loading branch information
proost committed Jun 13, 2017
1 parent 6ccde2f commit 8e1e169
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 38 deletions.
47 changes: 9 additions & 38 deletions planet/models/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,14 @@
from planet.models.sequences import Sequence
from planet.models.clades import Clade

import utils.phylo as phylo

import newick
import json

SQL_COLLATION = 'NOCASE' if db.engine.name == 'sqlite' else ''


def __get_clade(species, clades_to_species):
"""
Checks for a list of species which clade matches best (fewest other species in the clade).
:param species: list of species for which the best clade needs to be determined
:param clades_to_species: dict with clade names (keys) and lists of species (values)
:return: tuple of the clade name
"""
for c, s in sorted(clades_to_species.items(), key=lambda k: len(clades_to_species[k])):
print(c, s)


class TreeMethod(db.Model):
__tablename__ = 'tree_methods'
id = db.Column(db.Integer, primary_key=True)
Expand All @@ -36,29 +25,7 @@ class TreeMethod(db.Model):
cascade="all, delete-orphan",
passive_deletes=True)

@staticmethod
def __get_clade(species, clades_to_species):
"""
Checks for a list of species which clade matches best (fewest other species in the clade).
:param species: list of species for which the best clade needs to be determined
:param clades_to_species: dict with clade names (keys) and lists of species (values)
:return: tuple of the clade name
"""
for c in sorted(clades_to_species.keys(), key=lambda k: len(clades_to_species[k])):
cs = clades_to_species[c]
if all([s in cs for s in species]):
return c, cs
else:
return None, []

@staticmethod
def __duplication(set_one, set_two, clades_to_species):
_, species_one = TreeMethod.__get_clade(set_one, clades_to_species)
_, species_two = TreeMethod.__get_clade(set_two, clades_to_species)

return any([s in species_two for s in species_one])

def reconcile_trees(self):
# Fetch required data from the database
Expand Down Expand Up @@ -89,9 +56,13 @@ def reconcile_trees(self):

all_species = branch_one_species.union(branch_two_species)

c, s = TreeMethod.__get_clade(all_species, clade_to_species)
duplication = TreeMethod.__duplication(branch_one_species, branch_two_species, clade_to_species)
print(t.id, c, s, "Dup" if duplication else "Spe", branch_one_species, branch_two_species)
c, s = phylo.get_clade(all_species, clade_to_species)
duplication = phylo.is_duplication(branch_one_species, branch_two_species, clade_to_species)

if c is not None:
node.name = "%s_%s" % (c, "D" if duplication else "S")

print(newick.dumps([tree]))


class Tree(db.Model):
Expand Down
31 changes: 31 additions & 0 deletions utils/phylo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@

def get_clade(species, clades_to_species):
"""
Checks for a list of species which clade matches best (fewest other species in the clade).
:param species: list of species for which the best clade needs to be determined
:param clades_to_species: dict with clade names (keys) and lists of species (values)
:return: tuple of the clade name
"""
for c in sorted(clades_to_species.keys(), key=lambda k: len(clades_to_species[k])):
cs = clades_to_species[c]
if all([s in cs for s in species]):
return c, cs
else:
return None, []


def is_duplication(set_one, set_two, clades_to_species):
"""
Check if two sets of species are shared or not
:param set_one:
:param set_two:
:param clades_to_species:
:return:
"""
_, species_one = get_clade(set_one, clades_to_species)
_, species_two = get_clade(set_two, clades_to_species)

return any([s in species_two for s in species_one])

0 comments on commit 8e1e169

Please sign in to comment.