Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
CoNekT/conekt/helpers/cytoscape.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
578 lines (461 sloc)
25.4 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from copy import deepcopy | |
from collections import Counter | |
from flask import url_for | |
from sqlalchemy.orm import joinedload | |
from conekt.models.expression.specificity import ExpressionSpecificity | |
from conekt.models.expression.profiles import ExpressionProfile | |
from conekt.models.relationships.sequence_cluster import SequenceCoexpressionClusterAssociation | |
from conekt.models.relationships.sequence_family import SequenceFamilyAssociation | |
from conekt.models.relationships.sequence_interpro import SequenceInterproAssociation | |
from conekt.models.sequences import Sequence | |
from conekt.models.species import Species | |
from conekt.models.clades import Clade | |
from utils.color import family_to_shape_and_color, index_to_shape_and_color | |
class CytoscapeHelper: | |
@staticmethod | |
def parse_network(network): | |
""" | |
Parses a network generated by the ExpressionNetwork and CoexpressionCluster model, adding basic information | |
and exporting the whole thing to a cytoscape.js compatible | |
:param network: internal id of the network | |
:return: Network fully compatible with Cytoscape.js | |
""" | |
output = {"nodes": [], "edges": []} | |
for n in network["nodes"]: | |
output["nodes"].append({"data": n}) | |
for e in network["edges"]: | |
output["edges"].append({"data": e}) | |
# add basic colors and shapes to nodes and url to gene pages | |
for n in output["nodes"]: | |
if n["data"]["gene_id"] is not None: | |
n["data"]["gene_link"] = url_for("sequence.sequence_view", sequence_id=n["data"]["gene_id"]) | |
if n["data"]["id"] != n["data"]["gene_name"]: | |
n["data"]["profile_link"] = url_for("expression_profile.expression_profile_find", probe=n["data"]["id"]) | |
n["data"]["color"] = "#CCC" | |
n["data"]["shape"] = "ellipse" | |
for e in output["edges"]: | |
e["data"]["color"] = "#888" | |
return output | |
@staticmethod | |
def add_family_data_nodes(network, family_method_id): | |
""" | |
Adds family, clade and interpro information to a a cytoscape compatible network (dict) | |
:param network: dict containing the network | |
:param family_method_id: desired type/method used to construct the families | |
:return: Cytoscape.js compatible network with family, clade and interpro information included | |
""" | |
completed_network = deepcopy(network) | |
sequence_ids = [] | |
for node in completed_network["nodes"]: | |
if "data" in node.keys() and "gene_id" in node["data"].keys(): | |
sequence_ids.append(node["data"]["gene_id"]) | |
sequence_families = SequenceFamilyAssociation.query.\ | |
filter(SequenceFamilyAssociation.sequence_id.in_(sequence_ids)).\ | |
options(joinedload('family.clade')).\ | |
filter(SequenceFamilyAssociation.family.has(method_id=family_method_id)).all() | |
sequence_interpro = SequenceInterproAssociation.query.\ | |
filter(SequenceInterproAssociation.sequence_id.in_(sequence_ids)).all() | |
data = {} | |
clades = Clade.query.order_by(Clade.species_count).all() | |
clade_list = [c.name for c in clades] | |
for s in sequence_families: | |
data[s.sequence_id] = {} | |
data[s.sequence_id]["name"] = s.family.name | |
data[s.sequence_id]["id"] = s.gene_family_id | |
data[s.sequence_id]["url"] = url_for('family.family_view', family_id=s.gene_family_id) | |
if s.family.clade is not None: | |
clade_index = clade_list.index(s.family.clade.name) | |
color, shape = index_to_shape_and_color(clade_index) | |
data[s.sequence_id]["clade_color"] = color | |
data[s.sequence_id]["clade_shape"] = shape | |
data[s.sequence_id]["clade"] = s.family.clade.name | |
data[s.sequence_id]["clade_count"] = s.family.clade.species_count | |
else: | |
data[s.sequence_id]["clade_color"] = "#CCC" | |
data[s.sequence_id]["clade_shape"] = "rectangle" | |
data[s.sequence_id]["clade"] = "None" | |
data[s.sequence_id]["clade_count"] = 0 | |
for i in sequence_interpro: | |
if i.sequence_id not in data: | |
data[i.sequence_id] = {} | |
data[i.sequence_id]["name"] = None | |
data[i.sequence_id]["id"] = None | |
data[i.sequence_id]["url"] = None | |
data[i.sequence_id]["clade"] = "None" | |
data[i.sequence_id]["clade_count"] = 0 | |
if "interpro" in data[i.sequence_id]: | |
data[i.sequence_id]["interpro"] += [i.domain.description] | |
else: | |
data[i.sequence_id]["interpro"] = [i.domain.description] | |
for node in completed_network["nodes"]: | |
if "data" in node.keys() and "gene_id" in node["data"].keys() \ | |
and node["data"]["gene_id"] in data.keys(): | |
if "interpro" in data[node["data"]["gene_id"]]: | |
node["data"]["interpro"] = data[node["data"]["gene_id"]]["interpro"] | |
node["data"]["family_name"] = data[node["data"]["gene_id"]]["name"] | |
node["data"]["family_id"] = data[node["data"]["gene_id"]]["id"] | |
node["data"]["family_url"] = data[node["data"]["gene_id"]]["url"] | |
if "clade" in data[node["data"]["gene_id"]] and \ | |
"clade_count" in data[node["data"]["gene_id"]] and \ | |
"clade_shape" in data[node["data"]["gene_id"]] and \ | |
"clade_color" in data[node["data"]["gene_id"]]: | |
node["data"]["family_clade"] = data[node["data"]["gene_id"]]["clade"] | |
node["data"]["family_clade_color"] = data[node["data"]["gene_id"]]["clade_color"] | |
node["data"]["family_clade_shape"] = data[node["data"]["gene_id"]]["clade_shape"] | |
node["data"]["family_clade_count"] = data[node["data"]["gene_id"]]["clade_count"] | |
else: | |
node["data"]["family_clade_color"] = "#CCC" | |
node["data"]["family_clade_shape"] = "rectangle" | |
node["data"]["family_clade"] = "None" | |
node["data"]["family_clade_count"] = 1 | |
else: | |
node["data"]["family_name"] = None | |
node["data"]["family_id"] = None | |
node["data"]["family_url"] = None | |
node["data"]["family_color"] = "#CCC" | |
node["data"]["family_shape"] = "rectangle" | |
node["data"]["family_clade_color"] = "#CCC" | |
node["data"]["family_clade_shape"] = "rectangle" | |
node["data"]["family_clade"] = "None" | |
node["data"]["family_clade_count"] = 1 | |
return completed_network | |
@staticmethod | |
def add_lc_data_nodes(network): | |
""" | |
Colors a network based on family information and label co-occurrences. | |
:param network: dict containing the network | |
:return: Cytoscape.js compatible network with colors and shapes based on gene families and label co-occurrances | |
""" | |
completed_network = deepcopy(network) | |
gene_family_only, gene_both = {}, {} | |
for node in completed_network["nodes"]: | |
if "data" in node.keys() and "gene_id" in node["data"].keys(): | |
fam_only, both = [], [] | |
if "family_name" in node["data"]: | |
fam_only += [node["data"]["family_name"]] | |
both += [node["data"]["family_name"]] | |
if "interpro" in node["data"]: | |
both += node["data"]["interpro"] | |
gene_family_only[node["data"]["gene_id"]] = set(fam_only) | |
gene_both[node["data"]["gene_id"]] = set(both) | |
fam_to_shape_and_color = family_to_shape_and_color(gene_family_only) | |
both_to_shape_and_color = family_to_shape_and_color(gene_both) | |
for node in completed_network["nodes"]: | |
if "data" in node.keys() and "gene_id" in node["data"].keys(): | |
if node["data"]["gene_id"] in fam_to_shape_and_color: | |
node["data"]["family_color"] = fam_to_shape_and_color[node["data"]["gene_id"]][1] | |
node["data"]["family_shape"] = fam_to_shape_and_color[node["data"]["gene_id"]][0] | |
if node["data"]["gene_id"] in both_to_shape_and_color: | |
node["data"]["lc_label"] = both_to_shape_and_color[node["data"]["gene_id"]][2] | |
node["data"]["lc_color"] = both_to_shape_and_color[node["data"]["gene_id"]][1] | |
node["data"]["lc_shape"] = both_to_shape_and_color[node["data"]["gene_id"]][0] | |
return completed_network | |
@staticmethod | |
def add_descriptions_nodes(network): | |
""" | |
Adds the description to nodes (if available), the best name to display and alternative names (aka gene tokens) | |
to a cytoscape.js network | |
:param network: Cytoscape.js compatible network object | |
:return: Network with descriptions and tokens added | |
""" | |
completed_network = deepcopy(network) | |
sequence_ids = [] | |
for node in completed_network["nodes"]: | |
if "data" in node.keys() and "gene_id" in node["data"].keys(): | |
sequence_ids.append(node["data"]["gene_id"]) | |
sequences = Sequence.query.filter(Sequence.id.in_(sequence_ids)).all() | |
descriptions = {s.id: s.description for s in sequences} | |
best_names = {s.id: s.best_name for s in sequences} | |
tokens = {s.id: ", ".join([x.name for x in s.xrefs if x.platform == 'token']) for s in sequences} | |
# Set empty tokens to None | |
for k, v in tokens.items(): | |
if v == "": | |
tokens[k] = None | |
for node in completed_network["nodes"]: | |
if "data" in node.keys() and "gene_id" in node["data"].keys(): | |
if node["data"]["gene_id"] in descriptions.keys(): | |
node["data"]["description"] = descriptions[node["data"]["gene_id"]] | |
else: | |
node["data"]["description"] = None | |
if node["data"]["gene_id"] in best_names.keys(): | |
node["data"]["best_name"] = best_names[node["data"]["gene_id"]] | |
else: | |
node["data"]["best_name"] = node["data"]["gene_name"] | |
if node["data"]["gene_id"] in tokens.keys(): | |
node["data"]["tokens"] = tokens[node["data"]["gene_id"]] | |
else: | |
node["data"]["tokens"] = None | |
return completed_network | |
@staticmethod | |
def add_depth_data_nodes(network): | |
""" | |
Colors a cytoscape compatible network (dict) based on edge depth | |
This function is no longer used as it has been replaced by a mapper in the cycss | |
:param network: dict containing the network | |
:return: Cytoscape.js compatible network with depth information for nodes added | |
""" | |
colored_network = deepcopy(network) | |
colors = ["#3CE500", "#B7D800", "#CB7300", "#BF0003"] | |
for node in colored_network["nodes"]: | |
if "data" in node.keys() and "depth" in node["data"].keys(): | |
node["data"]["depth_color"] = colors[node["data"]["depth"]] | |
return colored_network | |
@staticmethod | |
def connect_homologs(network): | |
""" | |
Connects homologous (or orthologous) genes using a dashed edge. Requires a cytoscape.js compatible network as | |
input and will return a network with homologs connected. Note that gene families need to be present in the | |
network *before* applying this function. (e.g. using add_family_data_nodes in this class) | |
:param network: | |
:return: | |
""" | |
connected_network = deepcopy(network) | |
""" | |
Add edges between homologous genes from different targets, family_id needs to be specified ! | |
""" | |
for i in range(len(connected_network['nodes']) - 1): | |
for j in range(i + 1, len(connected_network['nodes'])): | |
if connected_network['nodes'][i]['data']['family_id'] == connected_network['nodes'][j]['data']['family_id'] and connected_network['nodes'][i]['data']['family_id'] is not None: | |
connected_network['edges'].append({ | |
'data': {'source': connected_network['nodes'][i]['data']['id'], | |
'target': connected_network['nodes'][j]['data']['id'], | |
'color': "#33D", | |
'homology_color': "#33D", | |
'edge_type': 'homology', | |
'ecc_pair_color': "#33D", | |
'homology': True} | |
}) | |
return connected_network | |
@staticmethod | |
def add_connection_data_nodes(network): | |
""" | |
A data to cytoscape compatible network's nodes based on the number of edges that node possesses | |
:param network: dict containing the network | |
:return: Cytoscape.js compatible network with connectivity information for nodes added | |
""" | |
colored_network = deepcopy(network) | |
for node in colored_network["nodes"]: | |
if "data" in node.keys() and "id" in node["data"].keys(): | |
probe = node["data"]["id"] | |
neighbors = 0 | |
for edge in colored_network["edges"]: | |
if "data" in edge.keys() and "source" in edge["data"].keys() and "target" in edge["data"].keys(): | |
if probe == edge["data"]["source"] or probe == edge["data"]["target"]: | |
neighbors += 1 | |
node["data"]["neighbors"] = neighbors | |
return colored_network | |
@staticmethod | |
def add_species_data_nodes(network): | |
""" | |
Colors nodes in a cytoscape compatible network (dict) based on species | |
:param network: dict containing the network | |
:return: Cytoscape.js compatible network with depth information for edges added | |
""" | |
colors = {s.id: s.color for s in Species.query.all()} | |
colored_network = deepcopy(network) | |
for node in colored_network["nodes"]: | |
if "data" in node.keys() and "species_id" in node["data"].keys(): | |
node["data"]["species_color"] = colors[node["data"]["species_id"]] | |
return colored_network | |
@staticmethod | |
def add_cluster_data_nodes(network, cluster_method_id): | |
""" | |
Adds co-expression cluster information to a cytoscape compatible network (dict) | |
:param network: dict containing the network | |
:param cluster_method_id: internal id for the clustering method to use | |
:return: Network dict completed with cluster info | |
""" | |
colored_network = deepcopy(network) | |
probes = [node['data']['id'] for node in colored_network['nodes'] if 'id' in node['data']] | |
sequence_cluster_ass = SequenceCoexpressionClusterAssociation.query.filter(SequenceCoexpressionClusterAssociation.probe.in_(probes))\ | |
.filter(SequenceCoexpressionClusterAssociation.coexpression_cluster.has(method_id=cluster_method_id)).all() | |
data = {} | |
for sca in sequence_cluster_ass: | |
data[sca.probe] = {} | |
data[sca.probe]['cluster_id'] = sca.coexpression_cluster_id | |
data[sca.probe]['cluster_name'] = sca.coexpression_cluster.name | |
color_shapes = family_to_shape_and_color({p: [v['cluster_name']] for p, v in data.items()}) | |
for node in colored_network["nodes"]: | |
if node['data']['id'] in data.keys(): | |
node['data']['cluster_id'] = data[node['data']['id']]['cluster_id'] | |
node['data']['cluster_name'] = data[node['data']['id']]['cluster_name'] | |
node['data']['cluster_url'] = url_for('expression_cluster.expression_cluster_view', cluster_id=node['data']['cluster_id']) | |
if node['data']['id'] in color_shapes.keys(): | |
node['data']['cluster_color'] = color_shapes[node['data']['id']][1] | |
node['data']['cluster_shape'] = color_shapes[node['data']['id']][0] | |
return colored_network | |
@staticmethod | |
def add_specificity_data_nodes(network, specificity_method_id): | |
""" | |
Adds profile specificity information to a cytoscape compatible network (dict) | |
:param network: dict containing the network | |
:param specificity_method_id: specificity method which should be used | |
:return: Network dict completed with cluster info | |
""" | |
colored_network = deepcopy(network) | |
probes = [node['data']['id'] for node in colored_network['nodes'] if 'id' in node['data']] | |
spm = ExpressionSpecificity.query.filter(ExpressionSpecificity.method_id == specificity_method_id).filter(ExpressionSpecificity.profile.has(ExpressionProfile.probe.in_(probes))).all(); | |
data = {} | |
for s in spm: | |
if s.profile.probe in data.keys(): | |
if s.score > data[s.profile.probe]['score']: | |
data[s.profile.probe]['score'] = s.score | |
data[s.profile.probe]['condition'] = s.condition | |
else: | |
data[s.profile.probe] = {} | |
data[s.profile.probe]['score'] = s.score | |
data[s.profile.probe]['condition'] = s.condition | |
color_shapes = family_to_shape_and_color({p: [v['condition']] for p, v in data.items()}) | |
for node in colored_network["nodes"]: | |
if node['data']['id'] in data.keys(): | |
node['data']['spm_score'] = data[node['data']['id']]['score'] | |
node['data']['spm_condition'] = data[node['data']['id']]['condition'] | |
if node['data']['id'] in color_shapes.keys(): | |
node['data']['spm_condition_color'] = color_shapes[node['data']['id']][1] | |
node['data']['spm_condition_shape'] = color_shapes[node['data']['id']][0] | |
return colored_network | |
@staticmethod | |
def add_depth_data_edges(network): | |
""" | |
Colors a cytoscape compatible network (dict) based on edge depth | |
This function is no longer used as it has been replaced by a mapper in the cycss | |
:param network: dict containing the network | |
:return: Cytoscape.js compatible network with depth information for edges added | |
""" | |
colored_network = deepcopy(network) | |
colors = ["#3CE500", "#B7D800", "#CB7300", "#BF0003"] | |
for edge in colored_network["edges"]: | |
if "data" in edge.keys() and "depth" in edge["data"].keys(): | |
edge["data"]["depth_color"] = colors[edge["data"]["depth"]] | |
return colored_network | |
@staticmethod | |
def prune_unique_lc(network): | |
""" | |
Remove genes from network that have a unique label (label co-occ.). Requires a Cytoscape.js compatible network | |
and will return a purned copy in the same format. Note that label co-occ. need to be present in the | |
network *before* applying this function. (e.g. using add_lc_data_nodes in this class) | |
:param network: dict containing the network | |
:return: Cytoscape.js compatible network with the pruned network | |
""" | |
lc_labels = [] | |
for node in network["nodes"]: | |
if 'lc_label' in node['data'].keys(): | |
lc_labels.append(node['data']['lc_label']) | |
lc_counter = Counter(lc_labels) | |
print(lc_counter) | |
pruned_network = {'nodes': [], 'edges': []} | |
good_nodes = [] | |
for node in network['nodes']: | |
if 'lc_label' in node['data'].keys(): | |
if lc_counter[node['data']['lc_label']] > 1: | |
good_nodes.append(node['data']['name']) | |
pruned_network['nodes'].append(deepcopy(node)) | |
else: | |
good_nodes.append(node['data']['name']) | |
pruned_network['nodes'].append(deepcopy(node)) | |
for edge in network['edges']: | |
if edge['data']['source'] in good_nodes and edge['data']['target'] in good_nodes: | |
pruned_network['edges'].append(deepcopy(edge)) | |
return pruned_network | |
@staticmethod | |
def tag_ecc_singles(network): | |
""" | |
When comparing ECC pairs, genes without a homolog in the graph could be hidden, to this end these genes need | |
to be tagged so javascript can handle this. | |
:param network: input network | |
:return: network with singles tagged | |
""" | |
output_network = deepcopy(network) | |
# Find Query genes, add hideable tag to everything except queries | |
queries = [] | |
for n in output_network['nodes']: | |
if n['data']['node_type'] == 'query' and n['data']['name'] not in queries: | |
queries.append(n['data']['name']) | |
n['data']['tag'] = 'always_show' | |
else: | |
n['data']['tag'] = 'hideable' | |
# Store neighborhoods | |
neighborhoods = {q: [] for q in queries} | |
for e in output_network['edges']: | |
if e['data']['source'] in queries: | |
if e['data']['target'] not in queries: | |
neighborhoods[e['data']['source']].append(e['data']['target']) | |
elif e['data']['target'] in queries: | |
if e['data']['source'] not in queries: | |
neighborhoods[e['data']['target']].append(e['data']['source']) | |
# adjust tags on genes that should be shown (shared neighborhood) | |
# Check for genes present in both neighborhoods (intra species comparisons) | |
for n in output_network['nodes']: | |
counter = 0 | |
for k in queries: | |
if n['data']['name'] in neighborhoods[k]: | |
counter += 1 | |
if counter > 1: | |
n['data']['tag'] = 'always_show' | |
# Check homology edges | |
genes_to_show = [] | |
for e in output_network['edges']: | |
if 'homology' in e['data'].keys() and e['data']['homology']: | |
counter = 0 | |
for k in queries: | |
if e['data']['source'] in neighborhoods[k] or e['data']['target'] in neighborhoods[k]: | |
counter += 1 | |
if counter > 1: | |
genes_to_show.append(e['data']['source']) | |
genes_to_show.append(e['data']['target']) | |
for n in output_network['nodes']: | |
if n['data']['name'] in genes_to_show: | |
n['data']['tag'] = 'always_show' | |
return output_network | |
@staticmethod | |
def merge_networks(network_one, network_two, prune=True): | |
""" | |
Function to merge two networks. A compound/parent node is created for each network and based on the family_id, | |
edges between homologous/orthologous genes are added. | |
Note that label co-occurrences need to be (re-)calculated on the merged network | |
:param network_one: Dictionary (cytoscape.js structure) of the first network | |
:param network_two: Dictionary (cytoscape.js structure) of the second network | |
:param prune: if True (will only retain nodes with a homolog in the other network | |
:return: Cytoscape.js compatible network with both networks merged and homologs/orthologs connected | |
""" | |
nodes = [] | |
edges = network_one['edges'] + network_two['edges'] | |
nodes.append({"data": {"id": "compound_node_one", "compound": True, "color": "#BEF"}}) | |
nodes.append({"data": {"id": "compound_node_two", "compound": True, "color": "#BEF"}}) | |
for node in network_one["nodes"]: | |
node["data"]["parent"] = "compound_node_one" | |
nodes.append(node) | |
for node in network_two["nodes"]: | |
node["data"]["parent"] = "compound_node_two" | |
nodes.append(node) | |
# draw edges between nodes from different networks | |
# TODO: optimize this to avoid nested loop | |
nodes_to_keep = ["compound_node_one", "compound_node_two"] # Nodes to keep when prune is enabled | |
for node_one in network_one["nodes"]: | |
for node_two in network_two["nodes"]: | |
# if nodes are from the same family add an edge between them | |
if node_one["data"]["family_id"] is not None \ | |
and node_one["data"]["family_id"] == node_two["data"]["family_id"]: | |
nodes_to_keep.append(node_one["data"]["id"]) | |
nodes_to_keep.append(node_two["data"]["id"]) | |
edges.append({'data': {'source': node_one["data"]["id"], | |
'target': node_two["data"]["id"], | |
'color': "#33D", | |
'homology': True}}) | |
if not prune: | |
return {'nodes': nodes, 'edges': edges} | |
else: | |
# Prune is enabled, only return nodes which are in both lists (and compound nodes) | |
nodes_to_keep = list(set(nodes_to_keep)) | |
return {'nodes': [n for n in nodes if n["data"]["id"] in nodes_to_keep], | |
'edges': [e for e in edges if e["data"]["source"] in nodes_to_keep and e["data"]["target"]in nodes_to_keep]} | |
@staticmethod | |
def get_families(network): | |
""" | |
Extracts gene families from a cytoscape.js compatible network object | |
:param network: network to extract families from | |
:return: List of all families that occur in the network | |
""" | |
return [f["data"]["family_name"] for f in network["nodes"] if 'data' in f.keys() and | |
'family_name' in f["data"].keys() and | |
f["data"]["family_name"] is not None] |