Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
updated code to add clade information to interpro domains
  • Loading branch information
proost committed Dec 23, 2015
1 parent e872b90 commit 4be1893
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 1 deletion.
45 changes: 44 additions & 1 deletion planet/models/clades.py
@@ -1,5 +1,6 @@
from planet import db
from planet.models.gene_families import GeneFamily
from planet.models.interpro import Interpro
from config import SQL_COLLATION

import json
Expand All @@ -13,6 +14,7 @@ class Clade(db.Model):
species_count = db.Column(db.Integer)

families = db.relationship('GeneFamily', backref='clade', lazy='dynamic')
interpro = db.relationship('Interpro', backref='clade', lazy='dynamic')

def __init__(self, name, species):
self.name = name
Expand Down Expand Up @@ -69,7 +71,7 @@ def update_clades():
selected_clade = c
else:
if selected_clade is None:
print("An error occurred, no clade found, check the clades in the database!")
print("An error occurred, no clades found, check the clades in the database!")
else:
f.clade_id = selected_clade.id

Expand All @@ -78,3 +80,44 @@ def update_clades():
except Exception as e:
db.session.rollback()
print(e)

@staticmethod
def update_clades_interpro():
"""
Loop over all families and determine what clade they belong too
"""
clades = Clade.query.all()
interpro= Interpro.query.all()

for i in interpro:
interpro_species = i.species_codes

# skip for families without members
if len(interpro_species) == 0:
i.clade_id = None
continue

# find the clade with the fewest species that contains all the codes
selected_clade = None
for c in clades:
clade_species = json.loads(c.species)

overlap = set(interpro_species).intersection(clade_species)

if len(overlap) == len(interpro_species):
if selected_clade is None:
selected_clade = c
else:
if selected_clade.species_count > c.species_count:
selected_clade = c
else:
if selected_clade is None:
print("An error occurred, no clades found, check the clades in the database!")
else:
i.clade_id = selected_clade.id

try:
db.session.commit()
except Exception as e:
db.session.rollback()
print(e)
40 changes: 40 additions & 0 deletions planet/models/interpro.py
Expand Up @@ -2,12 +2,16 @@
from planet.models.relationships import sequence_interpro
from config import SQL_COLLATION

from sqlalchemy.orm import joinedload

class Interpro(db.Model):
__tablename__ = 'interpro'
id = db.Column(db.Integer, primary_key=True)
label = db.Column(db.String(50, collation=SQL_COLLATION), unique=True, index=True)
description = db.Column(db.Text)

clade_id = db.Column(db.Integer, db.ForeignKey('clades.id'), index=True)

sequences = db.relationship('Sequence', secondary=sequence_interpro, lazy='dynamic')
sequence_associations = db.relationship('SequenceInterproAssociation',
backref=db.backref('interpro', lazy='joined'),
Expand All @@ -16,3 +20,39 @@ class Interpro(db.Model):
def __init__(self, label, description):
self.label = label
self.description = description

@property
def species_codes(self):
"""
Finds all species the family has genes from
:return: a list of all species (codes)
"""

sequences = self.sequences.options(joinedload('species')).all()

output = []

for s in sequences:
if s.species.code not in output:
output.append(s.species.code)

return output

@property
def species_counts(self):
"""
Generates a phylogenetic profile of a gene family
:return: a dict with counts per species (codes are keys)
"""

sequences = self.sequences.options(joinedload('species')).all()

output = {}

for s in sequences:
if s.species.code not in output:
output[s.species.code] = 1
else:
output[s.species.code] += 1

return output

0 comments on commit 4be1893

Please sign in to comment.