From 0ee5743cf11ba002246e410669beb664796eba2c Mon Sep 17 00:00:00 2001 From: sepro Date: Wed, 14 Jun 2017 12:51:30 +0200 Subject: [PATCH] code added to visualize relations --- planet/controllers/clade.py | 16 +++++++- planet/models/relationships/__init__.py | 6 +++ .../relationships/sequence_sequence_clade.py | 41 +++++++++++++++++++ planet/models/sequences.py | 10 +++++ planet/models/species.py | 10 ++--- planet/models/trees.py | 41 ++++++++++++++++--- planet/templates/clade.html | 1 + .../templates/pagination/clade_relations.html | 20 +++++++++ utils/phylo.py | 14 +++++++ 9 files changed, 147 insertions(+), 12 deletions(-) create mode 100644 planet/models/relationships/sequence_sequence_clade.py create mode 100644 planet/templates/pagination/clade_relations.html diff --git a/planet/controllers/clade.py b/planet/controllers/clade.py index 4f695b9..80fd209 100644 --- a/planet/controllers/clade.py +++ b/planet/controllers/clade.py @@ -35,10 +35,11 @@ def clade_view(clade_id): families_count = current_clade.families.count() interpro_count = current_clade.interpro.count() + association_count = current_clade.sequence_sequence_clade_associations.count() return render_template('clade.html', clade=current_clade, families_count=families_count, interpro_count=interpro_count, - species=species) + association_count=association_count, species=species) @clade.route('/families//') @@ -105,3 +106,16 @@ def clade_interpro_table(clade_id): interpro = Clade.query.get(clade_id).interpro.order_by(Interpro.label) return Response(render_template('tables/interpro.csv', interpro=interpro), mimetype='text/plain') + + +@clade.route('/associations//') +@clade.route('/associations//') +@cache.cached() +def clade_associations(clade_id, page=1): + + current_clade = Clade.query.get_or_404(clade_id) + associations = current_clade.sequence_sequence_clade_associations.paginate(page, + g.page_items, + False).items + + return render_template('pagination/clade_relations.html', relations=associations) \ No newline at end of file diff --git a/planet/models/relationships/__init__.py b/planet/models/relationships/__init__.py index 587a11c..d431ae5 100644 --- a/planet/models/relationships/__init__.py +++ b/planet/models/relationships/__init__.py @@ -44,6 +44,12 @@ db.Column('target_id', db.Integer, db.ForeignKey('sequences.id'), index=True) ) +sequence_sequence_clade = db.Table('sequence_sequence_clade', + db.Column('id', db.Integer, primary_key=True), + db.Column('sequence_one_id', db.Integer, db.ForeignKey('sequences.id'), index=True), + db.Column('sequence_two_id', db.Integer, db.ForeignKey('sequences.id'), index=True) + ) + family_xref = db.Table('family_xref', db.Column('id', db.Integer, primary_key=True), db.Column('gene_family_id', db.Integer, db.ForeignKey('gene_families.id'), index=True), diff --git a/planet/models/relationships/sequence_sequence_clade.py b/planet/models/relationships/sequence_sequence_clade.py new file mode 100644 index 0000000..052774f --- /dev/null +++ b/planet/models/relationships/sequence_sequence_clade.py @@ -0,0 +1,41 @@ +from planet import db + + +class SequenceSequenceCladeAssociation(db.Model): + __tablename__ = 'sequence_sequence_clade' + __table_args__ = {'extend_existing': True} + + id = db.Column(db.Integer, primary_key=True) + + sequence_one_id = db.Column(db.Integer, db.ForeignKey('sequences.id', ondelete='CASCADE')) + sequence_two_id = db.Column(db.Integer, db.ForeignKey('sequences.id', ondelete='CASCADE')) + + clade_id = db.Column(db.Integer, db.ForeignKey('clades.id', ondelete='CASCADE'), index=True) + tree_id = db.Column(db.Integer, db.ForeignKey('trees.id', ondelete='CASCADE'), index=True) + + duplication = db.Column(db.Boolean) + duplication_consistency_score = db.Column(db.Float) + + tree = db.relationship('Tree', lazy='joined', + backref=db.backref('sequence_sequence_clade_associations', + lazy='dynamic', + passive_deletes=True) + ) + + clade = db.relationship('Clade', lazy='joined', + backref=db.backref('sequence_sequence_clade_associations', + lazy='dynamic', + passive_deletes=True) + ) + + def __str__(self): + return "%d" % self.id + + @property + def readable_type(self): + return "Duplication" if self.duplication else "Speciation" + + @property + def readable_score(self): + + return "%.3f" % self.duplication_consistency_score if self.duplication else "Not available" \ No newline at end of file diff --git a/planet/models/sequences.py b/planet/models/sequences.py index 839f67c..a57ea10 100644 --- a/planet/models/sequences.py +++ b/planet/models/sequences.py @@ -59,6 +59,16 @@ class Sequence(db.Model): backref=db.backref('target_sequence', lazy='joined'), lazy='dynamic') + clade_associations_one = db.relationship('SequenceSequenceCladeAssociation', + primaryjoin="SequenceSequenceCladeAssociation.sequence_one_id == Sequence.id", + backref=db.backref('sequence_one', lazy='joined'), + lazy='dynamic') + + clade_associations_two = db.relationship('SequenceSequenceCladeAssociation', + primaryjoin="SequenceSequenceCladeAssociation.sequence_two_id == Sequence.id", + backref=db.backref('sequence_two', lazy='joined'), + lazy='dynamic') + xrefs = db.relationship('XRef', secondary=sequence_xref, lazy='joined') def __init__(self, species_id, name, coding_sequence, type='protein_coding', is_chloroplast=False, diff --git a/planet/models/species.py b/planet/models/species.py index ebc5224..a37550d 100644 --- a/planet/models/species.py +++ b/planet/models/species.py @@ -16,11 +16,11 @@ class Species(db.Model): profile_count = db.Column(db.Integer) description = db.Column(db.Text) - sequences = db.relationship('Sequence', backref='species', lazy='dynamic', cascade="all, delete-orphan",passive_deletes=True) - networks = db.relationship('ExpressionNetworkMethod', backref='species', lazy='dynamic', cascade="all, delete-orphan",passive_deletes=True) - profiles = db.relationship('ExpressionProfile', backref='species', lazy='dynamic', cascade="all, delete-orphan",passive_deletes=True) - expression_specificities = db.relationship('ExpressionSpecificityMethod', backref='species', lazy='dynamic', cascade="all, delete-orphan",passive_deletes=True) - condition_tissues = db.relationship('ConditionTissue', backref='species', lazy='dynamic', cascade="all, delete-orphan",passive_deletes=True) + sequences = db.relationship('Sequence', backref='species', lazy='dynamic', cascade="all, delete-orphan", passive_deletes=True) + networks = db.relationship('ExpressionNetworkMethod', backref='species', lazy='dynamic', cascade="all, delete-orphan", passive_deletes=True) + profiles = db.relationship('ExpressionProfile', backref='species', lazy='dynamic', cascade="all, delete-orphan", passive_deletes=True) + expression_specificities = db.relationship('ExpressionSpecificityMethod', backref='species', lazy='dynamic', cascade="all, delete-orphan", passive_deletes=True) + condition_tissues = db.relationship('ConditionTissue', backref='species', lazy='dynamic', cascade="all, delete-orphan", passive_deletes=True) def __init__(self, code, name, data_type='genome', color="#C7C7C7", highlight="#DEDEDE", description=None): diff --git a/planet/models/trees.py b/planet/models/trees.py index 4c4be32..db9badf 100644 --- a/planet/models/trees.py +++ b/planet/models/trees.py @@ -1,6 +1,7 @@ from planet import db from planet.models.sequences import Sequence from planet.models.clades import Clade +from planet.models.relationships.sequence_sequence_clade import SequenceSequenceCladeAssociation import utils.phylo as phylo @@ -22,7 +23,6 @@ class TreeMethod(db.Model): trees = db.relationship('Tree', backref=db.backref('method', lazy='joined'), lazy='dynamic', - cascade="all, delete-orphan", passive_deletes=True) def reconcile_trees(self): @@ -31,7 +31,11 @@ def reconcile_trees(self): clades = Clade.query.all() seq_to_species = {s.name: s.species.code for s in sequences} + seq_to_id = {s.name: s.id for s in sequences} clade_to_species = {c.name: json.loads(c.species) for c in clades} + clade_to_id = {c.name: c.id for c in clades} + + new_associations = [] for t in self.trees: # Load tree from Newick string and start reconciliating @@ -53,13 +57,38 @@ def reconcile_trees(self): all_species = branch_one_species.union(branch_two_species) - c, s = phylo.get_clade(all_species, clade_to_species) + clade, _ = phylo.get_clade(all_species, clade_to_species) duplication = phylo.is_duplication(branch_one_species, branch_two_species, clade_to_species) - if c is not None: - node.name = "%s_%s" % (c, "D" if duplication else "S") - - print(newick.dumps([tree])) + duplication_consistency = None + if duplication: + duplication_consistency = phylo.duplication_consistency(branch_one_species, branch_two_species) + + if clade is not None: + for seq_one in branch_one_seq: + for seq_two in branch_two_seq: + new_associations.append({ + 'sequence_one_id': seq_to_id[seq_one], + 'sequence_two_id': seq_to_id[seq_two], + 'tree_id': t.id, + 'clade_id': clade_to_id[clade], + 'duplication': 1 if duplication else 0, + 'duplication_consistency_score': duplication_consistency + }) + new_associations.append({ + 'sequence_one_id': seq_to_id[seq_two], + 'sequence_two_id': seq_to_id[seq_one], + 'tree_id': t.id, + 'clade_id': clade_to_id[clade], + 'duplication': 1 if duplication else 0, + 'duplication_consistency_score': duplication_consistency + }) + + if len(new_associations) > 400: + db.engine.execute(SequenceSequenceCladeAssociation.__table__.insert(), new_associations) + new_associations = [] + + db.engine.execute(SequenceSequenceCladeAssociation.__table__.insert(), new_associations) class Tree(db.Model): diff --git a/planet/templates/clade.html b/planet/templates/clade.html index b252c1a..a7535ae 100644 --- a/planet/templates/clade.html +++ b/planet/templates/clade.html @@ -36,6 +36,7 @@ {{ macro.pagination('Families', url_for('clade.clade_families', clade_id=clade.id), families_count, url_for('clade.clade_families_table', clade_id=clade.id), 'family') }} {{ macro.pagination('Interpro domains', url_for('clade.clade_interpro', clade_id=clade.id), interpro_count, url_for('clade.clade_interpro_table', clade_id=clade.id), 'interpro') }} + {{ macro.pagination('Associations', url_for('clade.clade_associations', clade_id=clade.id), association_count, None, 'association') }} {% endif %} diff --git a/planet/templates/pagination/clade_relations.html b/planet/templates/pagination/clade_relations.html new file mode 100644 index 0000000..af4e8d6 --- /dev/null +++ b/planet/templates/pagination/clade_relations.html @@ -0,0 +1,20 @@ +
+ + + {% for r in relations %} + + + + + + + + + {% else %} + + + + {% endfor %} + +
{{ r.sequence_one.name }}{{ r.sequence_two.name }}{{ r.clade.name }}{{ r.readable_type }}{{ r.readable_score }}{{ r.tree.label }}
No sequences sequence clade associations found
+
\ No newline at end of file diff --git a/utils/phylo.py b/utils/phylo.py index 35aeab8..57744ef 100644 --- a/utils/phylo.py +++ b/utils/phylo.py @@ -29,3 +29,17 @@ def is_duplication(set_one, set_two, clades_to_species): _, species_two = get_clade(set_two, clades_to_species) return any([s in species_two for s in species_one]) + + +def duplication_consistency(set_one, set_two): + """ + Calculates the duplication consistency score for two sets of species + + :param set_one: set/list of species + :param set_two: set/list of species + :return: float with duplication consistency score + """ + union_size = len(set(set_one).union(set(set_two))) + intersection_size = len(set(set_one).intersection(set(set_two))) + + return intersection_size/union_size