diff --git a/conekt/models/expression/coexpression_clusters.py b/conekt/models/expression/coexpression_clusters.py index cc70c3d..3fb6a88 100644 --- a/conekt/models/expression/coexpression_clusters.py +++ b/conekt/models/expression/coexpression_clusters.py @@ -476,6 +476,16 @@ def calculate_enrichment(empty=True): cluster.__calculate_enrichment() def __calculate_clade_enrichment(self, background, gf_method_id): + """ + Calculates the clade enrichment for a co-expression cluster (i.e. if genes which originated in a certain clade + are overrepresented). A background is required (how many genes there are per clade in the organism) and the + gene family method those clades are based on. + + Calculations will be immediately committed to the DB. + + :param background: dict with background + :param gf_method_id: internal ID of gene family method + """ species_gene_count = self.method.network_method.species.sequence_count species_id = self.method.network_method.species_id @@ -685,18 +695,33 @@ def profiles(self): @property def interpro_stats(self): + """ + Get InterPro statistics for the current cluster + + :return: Interpro statistics + """ sequence_ids = [s.id for s in self.sequences.all()] return Interpro.sequence_stats(sequence_ids) @property def go_stats(self): + """ + Get GO statistics for the current cluster + + :return: GO statistics + """ sequence_ids = [s.id for s in self.sequences.all()] return GO.sequence_stats(sequence_ids) @property def family_stats(self): + """ + Get gene family statistics for the current cluster + + :return: gene family statistics + """ sequence_ids = [s.id for s in self.sequences.all()] return GeneFamily.sequence_stats(sequence_ids) diff --git a/conekt/models/expression/cross_species_profile.py b/conekt/models/expression/cross_species_profile.py index 496a687..0fee27a 100644 --- a/conekt/models/expression/cross_species_profile.py +++ b/conekt/models/expression/cross_species_profile.py @@ -10,8 +10,11 @@ class CrossSpeciesExpressionProfile: - def __init__(self): + """ + Function that gets required data, checks for which species there is a comparative profile available and stores + details to convert the profiles for that species. + """ self.condition_tissue = ConditionTissue.query.filter(ConditionTissue.in_tree == 1).all() # Way to merge various (potentially incomplete) lists and preserve the order (as good as possible) @@ -27,6 +30,12 @@ def __init__(self): self.species_to_condition = {ct.species_id: ct for ct in self.condition_tissue} def get_data(self, *sequence_ids): + """ + Gets comparative profiles for a set of sequences (where available) + + :param sequence_ids: list of sequence ids to get data for + :return: list of dicts with available profiles. + """ profiles = ExpressionProfile.query.filter(ExpressionProfile.sequence_id.in_(list(sequence_ids))).\ options(undefer('profile')).all() @@ -77,6 +86,13 @@ def get_data(self, *sequence_ids): return converted_profiles def get_heatmap(self, *sequence_ids, option='raw'): + """ + Gets comparative data and converts it into a dict compatible with our heatmap visualization + + :param sequence_ids: sequences + :param option: normalization method 'raw' -> no normalization, 'row' -> row based normalization + :return: list with dict entries compatible with our heatmap visualization + """ data = self.get_data(*sequence_ids) output = { diff --git a/conekt/models/expression/networks.py b/conekt/models/expression/networks.py index 0c028c8..d68daee 100644 --- a/conekt/models/expression/networks.py +++ b/conekt/models/expression/networks.py @@ -188,6 +188,13 @@ def calculate_ecc(network_method_ids, gene_family_method_id, max_size=100): @staticmethod def __neighborhoods_overlap(neighborhood_a, neighborhood_b): + """ + Checks if two genes have overlapping networks + + :param neighborhood_a: neighborhood for first gene (string as stored in database) + :param neighborhood_b: neighborhood for second gene (string as stored in database) + :return: Bool, true if networks overlap + """ genes_a = set([n['gene_id'] for n in json.loads(neighborhood_a) if n['gene_id'] is not None]) genes_b = set([n['gene_id'] for n in json.loads(neighborhood_b) if n['gene_id'] is not None]) @@ -285,12 +292,22 @@ def __init__(self, probe, sequence_id, network, method_id): @property def neighbors_count(self): + """ + Returns the number of neighors the current gene has + + :return: int, number of neighbors + """ data = json.loads(self.network) return len(data) @property def neighbors_table(self): + """ + Returns a tab delimited representation of the current gene's neighbors + + :return: + """ data = json.loads(self.network) output = [["Sequence", "Description", "Alias", "PCC", "hrr"]] @@ -430,6 +447,14 @@ def get_neighborhood(probe, depth=0): @staticmethod def get_custom_network(method_id, probes): + """ + Return a network dict for a certain set of probes/sequences. Only returns the selected nodes and connections + between them (if any) + + :param method_id: network method to extract information from + :param probes: list of probe/sequence names + :return: network dict + """ nodes = [] edges = [] @@ -502,6 +527,19 @@ def __process_link(linked_probe, depth): @staticmethod def read_expression_network_lstrap(network_file, species_id, description, score_type="rank", pcc_cutoff=0.7, limit=30, enable_second_level=False): + """ + Reads a network from disk, generated using LSTrAP, determing hrr scores for each pair and store things in the + DB. + + :param network_file: path to input file + :param species_id: species the data is from + :param description: description to add to the db for this network + :param score_type: which scores are used, default = "rank" + :param pcc_cutoff: pcc threshold, pairs with a score below this will be ignored + :param limit: hrr score threshold, pairs with a score above this will be ignored + :param enable_second_level: include second level neighborhood in the database (only to be used for sparse networks) + :return: internal ID of the new network + """ # build conversion table for sequences sequences = Sequence.query.filter_by(species_id=species_id).all() diff --git a/conekt/models/expression/profiles.py b/conekt/models/expression/profiles.py index 40a306c..f34c981 100644 --- a/conekt/models/expression/profiles.py +++ b/conekt/models/expression/profiles.py @@ -96,6 +96,15 @@ def low_abundance(self, cutoff=10): @staticmethod def convert_profile(condition_to_tissue, profile_data, use_means=True): + """ + Convert a full, detailed profile into a more general summarized one using conversion table stored in the + database + + :param condition_to_tissue: dict with conversion instructions + :param profile_data: profile to convert + :param use_means: use means of detailed condition if True otherwise use samples independently. Default True + :return: New profile + """ tissues = list(set(condition_to_tissue['conversion'].values())) output = {} diff --git a/conekt/models/relationships/__init__.py b/conekt/models/relationships/__init__.py index 72febe7..929cba3 100644 --- a/conekt/models/relationships/__init__.py +++ b/conekt/models/relationships/__init__.py @@ -1,3 +1,8 @@ +""" +Tables to be used to define many-to-many relations. In case additional parameters are defined on the relationship, an +additional model needs to be created that extends these. +""" + from conekt import db sequence_go = db.Table('sequence_go', diff --git a/conekt/models/relationships/cluster_similarity.py b/conekt/models/relationships/cluster_similarity.py index d9f71e1..0c4027d 100644 --- a/conekt/models/relationships/cluster_similarity.py +++ b/conekt/models/relationships/cluster_similarity.py @@ -32,4 +32,7 @@ class CoexpressionClusterSimilarity(db.Model): @staticmethod def empty_table(): + """ + Delete all content from this table. Use carefully ! + """ CoexpressionClusterSimilarity.query.delete() diff --git a/conekt/models/relationships/sequence_sequence_clade.py b/conekt/models/relationships/sequence_sequence_clade.py index d32cdd9..3ba71a0 100644 --- a/conekt/models/relationships/sequence_sequence_clade.py +++ b/conekt/models/relationships/sequence_sequence_clade.py @@ -33,9 +33,18 @@ def __str__(self): @property def readable_type(self): + """ + Returns type (duplication or speciation) in a human-readable format + + :return: string Duplication or Speciation + """ return "Duplication" if self.duplication else "Speciation" @property def readable_score(self): + """ + Returns the duplication consistency score in a nicer format + :return: string with dup. consistency score in .%3 - format. Or "Not available" for speciations. + """ return "%.3f" % self.duplication_consistency_score if self.duplication else "Not available" \ No newline at end of file