Skip to content

Commit

Permalink
adding additional documentation to the code
Browse files Browse the repository at this point in the history
  • Loading branch information
proost committed Dec 1, 2017
1 parent 46af928 commit 6c92b29
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 1 deletion.
25 changes: 25 additions & 0 deletions conekt/models/expression/coexpression_clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,16 @@ def calculate_enrichment(empty=True):
cluster.__calculate_enrichment()

def __calculate_clade_enrichment(self, background, gf_method_id):
"""
Calculates the clade enrichment for a co-expression cluster (i.e. if genes which originated in a certain clade
are overrepresented). A background is required (how many genes there are per clade in the organism) and the
gene family method those clades are based on.
Calculations will be immediately committed to the DB.
:param background: dict with background
:param gf_method_id: internal ID of gene family method
"""
species_gene_count = self.method.network_method.species.sequence_count
species_id = self.method.network_method.species_id

Expand Down Expand Up @@ -685,18 +695,33 @@ def profiles(self):

@property
def interpro_stats(self):
"""
Get InterPro statistics for the current cluster
:return: Interpro statistics
"""
sequence_ids = [s.id for s in self.sequences.all()]

return Interpro.sequence_stats(sequence_ids)

@property
def go_stats(self):
"""
Get GO statistics for the current cluster
:return: GO statistics
"""
sequence_ids = [s.id for s in self.sequences.all()]

return GO.sequence_stats(sequence_ids)

@property
def family_stats(self):
"""
Get gene family statistics for the current cluster
:return: gene family statistics
"""
sequence_ids = [s.id for s in self.sequences.all()]

return GeneFamily.sequence_stats(sequence_ids)
18 changes: 17 additions & 1 deletion conekt/models/expression/cross_species_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@


class CrossSpeciesExpressionProfile:

def __init__(self):
"""
Function that gets required data, checks for which species there is a comparative profile available and stores
details to convert the profiles for that species.
"""
self.condition_tissue = ConditionTissue.query.filter(ConditionTissue.in_tree == 1).all()

# Way to merge various (potentially incomplete) lists and preserve the order (as good as possible)
Expand All @@ -27,6 +30,12 @@ def __init__(self):
self.species_to_condition = {ct.species_id: ct for ct in self.condition_tissue}

def get_data(self, *sequence_ids):
"""
Gets comparative profiles for a set of sequences (where available)
:param sequence_ids: list of sequence ids to get data for
:return: list of dicts with available profiles.
"""
profiles = ExpressionProfile.query.filter(ExpressionProfile.sequence_id.in_(list(sequence_ids))).\
options(undefer('profile')).all()

Expand Down Expand Up @@ -77,6 +86,13 @@ def get_data(self, *sequence_ids):
return converted_profiles

def get_heatmap(self, *sequence_ids, option='raw'):
"""
Gets comparative data and converts it into a dict compatible with our heatmap visualization
:param sequence_ids: sequences
:param option: normalization method 'raw' -> no normalization, 'row' -> row based normalization
:return: list with dict entries compatible with our heatmap visualization
"""
data = self.get_data(*sequence_ids)

output = {
Expand Down
38 changes: 38 additions & 0 deletions conekt/models/expression/networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,13 @@ def calculate_ecc(network_method_ids, gene_family_method_id, max_size=100):

@staticmethod
def __neighborhoods_overlap(neighborhood_a, neighborhood_b):
"""
Checks if two genes have overlapping networks
:param neighborhood_a: neighborhood for first gene (string as stored in database)
:param neighborhood_b: neighborhood for second gene (string as stored in database)
:return: Bool, true if networks overlap
"""
genes_a = set([n['gene_id'] for n in json.loads(neighborhood_a) if n['gene_id'] is not None])
genes_b = set([n['gene_id'] for n in json.loads(neighborhood_b) if n['gene_id'] is not None])

Expand Down Expand Up @@ -285,12 +292,22 @@ def __init__(self, probe, sequence_id, network, method_id):

@property
def neighbors_count(self):
"""
Returns the number of neighors the current gene has
:return: int, number of neighbors
"""
data = json.loads(self.network)

return len(data)

@property
def neighbors_table(self):
"""
Returns a tab delimited representation of the current gene's neighbors
:return:
"""
data = json.loads(self.network)
output = [["Sequence", "Description", "Alias", "PCC", "hrr"]]

Expand Down Expand Up @@ -430,6 +447,14 @@ def get_neighborhood(probe, depth=0):

@staticmethod
def get_custom_network(method_id, probes):
"""
Return a network dict for a certain set of probes/sequences. Only returns the selected nodes and connections
between them (if any)
:param method_id: network method to extract information from
:param probes: list of probe/sequence names
:return: network dict
"""
nodes = []
edges = []

Expand Down Expand Up @@ -502,6 +527,19 @@ def __process_link(linked_probe, depth):
@staticmethod
def read_expression_network_lstrap(network_file, species_id, description, score_type="rank",
pcc_cutoff=0.7, limit=30, enable_second_level=False):
"""
Reads a network from disk, generated using LSTrAP, determing hrr scores for each pair and store things in the
DB.
:param network_file: path to input file
:param species_id: species the data is from
:param description: description to add to the db for this network
:param score_type: which scores are used, default = "rank"
:param pcc_cutoff: pcc threshold, pairs with a score below this will be ignored
:param limit: hrr score threshold, pairs with a score above this will be ignored
:param enable_second_level: include second level neighborhood in the database (only to be used for sparse networks)
:return: internal ID of the new network
"""
# build conversion table for sequences
sequences = Sequence.query.filter_by(species_id=species_id).all()

Expand Down
9 changes: 9 additions & 0 deletions conekt/models/expression/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,15 @@ def low_abundance(self, cutoff=10):

@staticmethod
def convert_profile(condition_to_tissue, profile_data, use_means=True):
"""
Convert a full, detailed profile into a more general summarized one using conversion table stored in the
database
:param condition_to_tissue: dict with conversion instructions
:param profile_data: profile to convert
:param use_means: use means of detailed condition if True otherwise use samples independently. Default True
:return: New profile
"""
tissues = list(set(condition_to_tissue['conversion'].values()))

output = {}
Expand Down
5 changes: 5 additions & 0 deletions conekt/models/relationships/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
"""
Tables to be used to define many-to-many relations. In case additional parameters are defined on the relationship, an
additional model needs to be created that extends these.
"""

from conekt import db

sequence_go = db.Table('sequence_go',
Expand Down
3 changes: 3 additions & 0 deletions conekt/models/relationships/cluster_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,7 @@ class CoexpressionClusterSimilarity(db.Model):

@staticmethod
def empty_table():
"""
Delete all content from this table. Use carefully !
"""
CoexpressionClusterSimilarity.query.delete()
9 changes: 9 additions & 0 deletions conekt/models/relationships/sequence_sequence_clade.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,18 @@ def __str__(self):

@property
def readable_type(self):
"""
Returns type (duplication or speciation) in a human-readable format
:return: string Duplication or Speciation
"""
return "Duplication" if self.duplication else "Speciation"

@property
def readable_score(self):
"""
Returns the duplication consistency score in a nicer format
:return: string with dup. consistency score in .%3 - format. Or "Not available" for speciations.
"""
return "%.3f" % self.duplication_consistency_score if self.duplication else "Not available"

0 comments on commit 6c92b29

Please sign in to comment.