Skip to content
Permalink
0ee5743cf1
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
135 lines (101 sloc) 5.3 KB
from planet import db
from planet.models.sequences import Sequence
from planet.models.clades import Clade
from planet.models.relationships.sequence_sequence_clade import SequenceSequenceCladeAssociation
import utils.phylo as phylo
import newick
import json
SQL_COLLATION = 'NOCASE' if db.engine.name == 'sqlite' else ''
class TreeMethod(db.Model):
__tablename__ = 'tree_methods'
id = db.Column(db.Integer, primary_key=True)
description = db.Column(db.Text)
gene_family_method_id = db.Column(db.Integer,
db.ForeignKey('gene_family_methods.id', ondelete='CASCADE'), index=True)
trees = db.relationship('Tree',
backref=db.backref('method', lazy='joined'),
lazy='dynamic',
passive_deletes=True)
def reconcile_trees(self):
# Fetch required data from the database
sequences = Sequence.query.all()
clades = Clade.query.all()
seq_to_species = {s.name: s.species.code for s in sequences}
seq_to_id = {s.name: s.id for s in sequences}
clade_to_species = {c.name: json.loads(c.species) for c in clades}
clade_to_id = {c.name: c.id for c in clades}
new_associations = []
for t in self.trees:
# Load tree from Newick string and start reconciliating
tree = newick.loads(t.data_newick)[0]
for node in tree.walk():
if len(node.descendants) != 2:
if not node.is_binary:
# Print warning in case there is a non-binary node
print("[%d, %s] Skipping node... Can only reconcile binary nodes ..." % (tree.id, tree.label))
# Otherwise it is a leaf node and can be skipped
continue
branch_one_seq = [l.name for l in node.descendants[0].get_leaves()]
branch_two_seq = [l.name for l in node.descendants[1].get_leaves()]
branch_one_species = set([seq_to_species[s] for s in branch_one_seq if s in seq_to_species.keys()])
branch_two_species = set([seq_to_species[s] for s in branch_two_seq if s in seq_to_species.keys()])
all_species = branch_one_species.union(branch_two_species)
clade, _ = phylo.get_clade(all_species, clade_to_species)
duplication = phylo.is_duplication(branch_one_species, branch_two_species, clade_to_species)
duplication_consistency = None
if duplication:
duplication_consistency = phylo.duplication_consistency(branch_one_species, branch_two_species)
if clade is not None:
for seq_one in branch_one_seq:
for seq_two in branch_two_seq:
new_associations.append({
'sequence_one_id': seq_to_id[seq_one],
'sequence_two_id': seq_to_id[seq_two],
'tree_id': t.id,
'clade_id': clade_to_id[clade],
'duplication': 1 if duplication else 0,
'duplication_consistency_score': duplication_consistency
})
new_associations.append({
'sequence_one_id': seq_to_id[seq_two],
'sequence_two_id': seq_to_id[seq_one],
'tree_id': t.id,
'clade_id': clade_to_id[clade],
'duplication': 1 if duplication else 0,
'duplication_consistency_score': duplication_consistency
})
if len(new_associations) > 400:
db.engine.execute(SequenceSequenceCladeAssociation.__table__.insert(), new_associations)
new_associations = []
db.engine.execute(SequenceSequenceCladeAssociation.__table__.insert(), new_associations)
class Tree(db.Model):
__tablename__ = 'trees'
id = db.Column(db.Integer, primary_key=True)
label = db.Column(db.String(50, collation=SQL_COLLATION), index=True)
data_newick = db.Column(db.Text)
data_phyloxml = db.Column(db.Text)
gf_id = db.Column(db.Integer, db.ForeignKey('gene_families.id', ondelete='CASCADE'), index=True)
method_id = db.Column(db.Integer, db.ForeignKey('tree_methods.id', ondelete='CASCADE'), index=True)
@property
def ascii_art(self):
"""
Returns an ascii representation of the tree. Useful for quick visualizations
:return: string with ascii representation of the tree
"""
tree = newick.loads(self.data_newick)[0]
return tree.ascii_art()
@property
def count(self):
tree = newick.loads(self.data_newick)[0]
return len(tree.get_leaves())
@property
def sequences(self):
tree = newick.loads(self.data_newick)[0]
sequences = [l.name for l in tree.get_leaves()]
return Sequence.query.filter(Sequence.name.in_(sequences))
@property
def tree_stripped(self):
tree = newick.loads(self.data_newick)[0]
tree.remove_lengths()
print(newick.dumps([tree]))
return newick.dumps([tree])