Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
started work on cluster similarity
  • Loading branch information
proost committed Dec 10, 2015
1 parent cfd84bb commit cda6833
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 23 deletions.
38 changes: 38 additions & 0 deletions planet/models/coexpression_clusters.py
Expand Up @@ -2,11 +2,14 @@

from planet import db
from planet.models.expression_networks import ExpressionNetwork
from planet.models.gene_families import GeneFamily
from planet.models.relationships import sequence_coexpression_cluster, SequenceGOAssociation, ClusterGOEnrichment
from planet.models.relationships import CoexpressionClusterSimilarity, SequenceCoexpressionClusterAssociation, SequenceFamilyAssociation

from utils.enrichment import hypergeo_sf, fdr_correction
from utils.benchmark import benchmark

from sqlalchemy import join
from sqlalchemy.orm import joinedload, load_only

import json
Expand Down Expand Up @@ -189,3 +192,38 @@ def calculate_enrichment(empty=True):
for i, cluster in enumerate(clusters):
print(i, "\t cluster: ", cluster.method_id, cluster.name)
cluster.__calculate_enrichment()

@staticmethod
@benchmark
def calculate_similarities(gene_family_method_id=1):

# sqlalchemy to fetch cluster associations
fields = [SequenceCoexpressionClusterAssociation.__table__.c.sequence_id,
SequenceCoexpressionClusterAssociation.__table__.c.coexpression_cluster_id]
condition = SequenceCoexpressionClusterAssociation.__table__.c.sequence_id is not None
cluster_associations = db.engine.execute(db.select(fields).where(condition)).fetchall()

# sqlalchemy to fetch sequence family associations
fields = [SequenceFamilyAssociation.__table__.c.sequence_id, SequenceFamilyAssociation.__table__.c.gene_family_id, GeneFamily.__table__.c.method_id]
condition = GeneFamily.__table__.c.method_id == gene_family_method_id
table = join(SequenceFamilyAssociation.__table__, GeneFamily.__table__, SequenceFamilyAssociation.__table__.c.gene_family_id == GeneFamily.__table__.c.id)
sequence_families = db.engine.execute(db.select(fields).select_from(table).where(condition)).fetchall()

# convert sqlachemy results into dictionary
sequence_to_family = {seq_id: fam_id for seq_id, fam_id, method_id in sequence_families}

cluster_to_sequences = {}
cluster_to_families = {}

for seq_id, cluster_id in cluster_associations:
if cluster_id not in cluster_to_sequences.keys():
cluster_to_sequences[cluster_id] = []
cluster_to_sequences[cluster_id].append(seq_id)

for cluster_id, sequences in cluster_to_sequences.items():
families = list(set([sequence_to_family[s] for s in sequences if s in sequence_to_family.keys()]))
if len(families) > 0:
cluster_to_families[cluster_id] = families

for c, f in cluster_to_families.items():
print(c,f)
69 changes: 46 additions & 23 deletions planet/models/relationships.py
Expand Up @@ -26,6 +26,13 @@
db.Column('coexpression_cluster_id', db.Integer, db.ForeignKey('coexpression_clusters.id'), index=True)
)

coexpression_cluster_similarity = \
db.Table('coexpression_cluster_similarity',
db.Column('id', db.Integer, primary_key=True),
db.Column('source_id', db.Integer, db.ForeignKey('coexpression_clusters.id'), index=True),
db.Column('target_id', db.Integer, db.ForeignKey('coexpression_clusters.id'), index=True)
)

sequence_xref = db.Table('sequence_xref',
db.Column('id', db.Integer, primary_key=True),
db.Column('sequence_id', db.Integer, db.ForeignKey('sequences.id'), index=True),
Expand Down Expand Up @@ -55,6 +62,22 @@ class SequenceCoexpressionClusterAssociation(db.Model):
coexpression_cluster_id = db.Column(db.Integer, db.ForeignKey('coexpression_clusters.id'))


class CoexpressionClusterSimilarity(db.Model):
__tablename__ = 'coexpression_cluster_similarity'
__table_args__ = {'extend_existing': True}

id = db.Column(db.Integer, primary_key=True)
source_id = db.Column(db.Integer, db.ForeignKey('coexpression_clusters.id'))
target_id = db.Column(db.Integer, db.ForeignKey('coexpression_clusters.id'))

gene_family_method_id = db.Column('gene_family_method_id', db.Integer, db.ForeignKey('gene_family_methods.id'),
index=True)

jaccard_index = db.Column(db.Float, index=True)
p_value = db.Column(db.Float, index=True)
corrected_p_value = db.Column(db.Float, index=True)


class SequenceFamilyAssociation(db.Model):
__tablename__ = 'sequence_family'
__table_args__ = {'extend_existing': True}
Expand Down Expand Up @@ -128,26 +151,26 @@ def genome_percentage(self):
return self.go_count*100/self.go_size


class ProbeGOEnrichment(db.Model):
__tablename__ = 'probe_go_enrichment'

id = db.Column(db.Integer, primary_key=True)
probe_id = db.Column(db.Integer, db.ForeignKey('expression_network.id'), index=True)
go_id = db.Column(db.Integer, db.ForeignKey('go.id'), index=True)

"""
Counts required to calculate the enrichment,
store here for quick access
"""
cluster_count = db.Column(db.Integer)
cluster_size = db.Column(db.Integer)
go_count = db.Column(db.Integer)
go_size = db.Column(db.Integer)

"""
Enrichment score (log-transformed), p-value and corrected p-value. Calculated using the hypergeometric
distribution and applying FDR correction (aka. BH)
"""
enrichment = db.Column(db.Float)
p_value = db.Column(db.Float)
corrected_p_value = db.Column(db.Float)
# class ProbeGOEnrichment(db.Model):
# __tablename__ = 'probe_go_enrichment'
#
# id = db.Column(db.Integer, primary_key=True)
# probe_id = db.Column(db.Integer, db.ForeignKey('expression_network.id'), index=True)
# go_id = db.Column(db.Integer, db.ForeignKey('go.id'), index=True)
#
# """
# Counts required to calculate the enrichment,
# store here for quick access
# """
# cluster_count = db.Column(db.Integer)
# cluster_size = db.Column(db.Integer)
# go_count = db.Column(db.Integer)
# go_size = db.Column(db.Integer)
#
# """
# Enrichment score (log-transformed), p-value and corrected p-value. Calculated using the hypergeometric
# distribution and applying FDR correction (aka. BH)
# """
# enrichment = db.Column(db.Float)
# p_value = db.Column(db.Float)
# corrected_p_value = db.Column(db.Float)

0 comments on commit cda6833

Please sign in to comment.