Skip to content
Permalink
20fce0df6e
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
567 lines (451 sloc) 24.6 KB
from copy import deepcopy
from collections import Counter
from flask import url_for
from sqlalchemy.orm import joinedload
from conekt.models.expression.specificity import ExpressionSpecificity
from conekt.models.expression.profiles import ExpressionProfile
from conekt.models.relationships.sequence_cluster import SequenceCoexpressionClusterAssociation
from conekt.models.relationships.sequence_family import SequenceFamilyAssociation
from conekt.models.relationships.sequence_interpro import SequenceInterproAssociation
from conekt.models.sequences import Sequence
from conekt.models.species import Species
from conekt.models.clades import Clade
from utils.color import family_to_shape_and_color, index_to_shape_and_color
class CytoscapeHelper:
@staticmethod
def parse_network(network):
"""
Parses a network generated by the ExpressionNetwork and CoexpressionCluster model, adding basic information
and exporting the whole thing to a cytoscape.js compatible
:param network: internal id of the network
:return: Network fully compatible with Cytoscape.js
"""
output = {"nodes": [], "edges": []}
for n in network["nodes"]:
output["nodes"].append({"data": n})
for e in network["edges"]:
output["edges"].append({"data": e})
# add basic colors and shapes to nodes and url to gene pages
for n in output["nodes"]:
if n["data"]["gene_id"] is not None:
n["data"]["gene_link"] = url_for("sequence.sequence_view", sequence_id=n["data"]["gene_id"])
if n["data"]["id"] != n["data"]["gene_name"]:
n["data"]["profile_link"] = url_for("expression_profile.expression_profile_find", probe=n["data"]["id"])
n["data"]["color"] = "#CCC"
n["data"]["shape"] = "ellipse"
for e in output["edges"]:
e["data"]["color"] = "#888"
return output
@staticmethod
def add_family_data_nodes(network, family_method_id):
"""
Adds family, clade and interpro information to a a cytoscape compatible network (dict)
:param network: dict containing the network
:param family_method_id: desired type/method used to construct the families
:return: Cytoscape.js compatible network with family, clade and interpro information included
"""
completed_network = deepcopy(network)
sequence_ids = []
for node in completed_network["nodes"]:
if "data" in node.keys() and "gene_id" in node["data"].keys():
sequence_ids.append(node["data"]["gene_id"])
sequence_families = SequenceFamilyAssociation.query.\
filter(SequenceFamilyAssociation.sequence_id.in_(sequence_ids)).\
options(joinedload('family.clade')).\
filter(SequenceFamilyAssociation.family.has(method_id=family_method_id)).all()
sequence_interpro = SequenceInterproAssociation.query.\
filter(SequenceInterproAssociation.sequence_id.in_(sequence_ids)).all()
data = {}
clades = Clade.query.order_by(Clade.species_count).all()
clade_list = [c.name for c in clades]
for s in sequence_families:
data[s.sequence_id] = {}
data[s.sequence_id]["name"] = s.family.name
data[s.sequence_id]["id"] = s.gene_family_id
data[s.sequence_id]["url"] = url_for('family.family_view', family_id=s.gene_family_id)
if s.family.clade is not None:
clade_index = clade_list.index(s.family.clade.name)
color, shape = index_to_shape_and_color(clade_index)
data[s.sequence_id]["clade_color"] = color
data[s.sequence_id]["clade_shape"] = shape
data[s.sequence_id]["clade"] = s.family.clade.name
data[s.sequence_id]["clade_count"] = s.family.clade.species_count
else:
data[s.sequence_id]["clade_color"] = "#CCC"
data[s.sequence_id]["clade_shape"] = "rectangle"
data[s.sequence_id]["clade"] = "None"
data[s.sequence_id]["clade_count"] = 0
for i in sequence_interpro:
if i.sequence_id not in data:
data[i.sequence_id] = {}
data[i.sequence_id]["name"] = None
data[i.sequence_id]["id"] = None
data[i.sequence_id]["url"] = None
data[i.sequence_id]["clade"] = "None"
data[i.sequence_id]["clade_count"] = 0
if "interpro" in data[i.sequence_id]:
data[i.sequence_id]["interpro"] += [i.domain.label]
else:
data[i.sequence_id]["interpro"] = [i.domain.label]
for node in completed_network["nodes"]:
if "data" in node.keys() and "gene_id" in node["data"].keys() \
and node["data"]["gene_id"] in data.keys():
if "interpro" in data[node["data"]["gene_id"]]:
node["data"]["interpro"] = data[node["data"]["gene_id"]]["interpro"]
node["data"]["family_name"] = data[node["data"]["gene_id"]]["name"]
node["data"]["family_id"] = data[node["data"]["gene_id"]]["id"]
node["data"]["family_url"] = data[node["data"]["gene_id"]]["url"]
if "clade" in data[node["data"]["gene_id"]] and \
"clade_count" in data[node["data"]["gene_id"]] and \
"clade_shape" in data[node["data"]["gene_id"]] and \
"clade_color" in data[node["data"]["gene_id"]]:
node["data"]["family_clade"] = data[node["data"]["gene_id"]]["clade"]
node["data"]["family_clade_color"] = data[node["data"]["gene_id"]]["clade_color"]
node["data"]["family_clade_shape"] = data[node["data"]["gene_id"]]["clade_shape"]
node["data"]["family_clade_count"] = data[node["data"]["gene_id"]]["clade_count"]
else:
node["data"]["family_clade_color"] = "#CCC"
node["data"]["family_clade_shape"] = "rectangle"
node["data"]["family_clade"] = "None"
node["data"]["family_clade_count"] = 1
else:
node["data"]["family_name"] = None
node["data"]["family_id"] = None
node["data"]["family_url"] = None
node["data"]["family_color"] = "#CCC"
node["data"]["family_shape"] = "rectangle"
node["data"]["family_clade_color"] = "#CCC"
node["data"]["family_clade_shape"] = "rectangle"
node["data"]["family_clade"] = "None"
node["data"]["family_clade_count"] = 1
return completed_network
@staticmethod
def add_lc_data_nodes(network):
"""
Colors a network based on family information and label co-occurrences.
:param network: dict containing the network
:return: Cytoscape.js compatible network with colors and shapes based on gene families and label co-occurrances
"""
completed_network = deepcopy(network)
gene_family_only, gene_both = {}, {}
for node in completed_network["nodes"]:
if "data" in node.keys() and "gene_id" in node["data"].keys():
fam_only, both = [], []
if "family_name" in node["data"]:
fam_only += [node["data"]["family_name"]]
both += [node["data"]["family_name"]]
if "interpro" in node["data"]:
both += node["data"]["interpro"]
gene_family_only[node["data"]["gene_id"]] = set(fam_only)
gene_both[node["data"]["gene_id"]] = set(both)
fam_to_shape_and_color = family_to_shape_and_color(gene_family_only)
both_to_shape_and_color = family_to_shape_and_color(gene_both)
for node in completed_network["nodes"]:
if "data" in node.keys() and "gene_id" in node["data"].keys():
if node["data"]["gene_id"] in fam_to_shape_and_color:
node["data"]["family_color"] = fam_to_shape_and_color[node["data"]["gene_id"]][1]
node["data"]["family_shape"] = fam_to_shape_and_color[node["data"]["gene_id"]][0]
if node["data"]["gene_id"] in both_to_shape_and_color:
node["data"]["lc_label"] = both_to_shape_and_color[node["data"]["gene_id"]][2]
node["data"]["lc_color"] = both_to_shape_and_color[node["data"]["gene_id"]][1]
node["data"]["lc_shape"] = both_to_shape_and_color[node["data"]["gene_id"]][0]
return completed_network
@staticmethod
def add_descriptions_nodes(network):
"""
Adds the description to nodes (if available), the best name to display and alternative names (aka gene tokens)
to a cytoscape.js network
:param network: Cytoscape.js compatible network object
:return: Network with descriptions and tokens added
"""
completed_network = deepcopy(network)
sequence_ids = []
for node in completed_network["nodes"]:
if "data" in node.keys() and "gene_id" in node["data"].keys():
sequence_ids.append(node["data"]["gene_id"])
sequences = Sequence.query.filter(Sequence.id.in_(sequence_ids)).all()
descriptions = {s.id: s.description for s in sequences}
best_names = {s.id: s.best_name for s in sequences}
tokens = {s.id: ", ".join([x.name for x in s.xrefs if x.platform == 'token']) for s in sequences}
# Set empty tokens to None
for k, v in tokens.items():
if v == "":
tokens[k] = None
for node in completed_network["nodes"]:
if "data" in node.keys() and "gene_id" in node["data"].keys():
if node["data"]["gene_id"] in descriptions.keys():
node["data"]["description"] = descriptions[node["data"]["gene_id"]]
else:
node["data"]["description"] = None
if node["data"]["gene_id"] in best_names.keys():
node["data"]["best_name"] = best_names[node["data"]["gene_id"]]
else:
node["data"]["best_name"] = node["data"]["gene_name"]
if node["data"]["gene_id"] in tokens.keys():
node["data"]["tokens"] = tokens[node["data"]["gene_id"]]
else:
node["data"]["tokens"] = None
return completed_network
@staticmethod
def add_depth_data_nodes(network):
"""
Colors a cytoscape compatible network (dict) based on edge depth
This function is no longer used as it has been replaced by a mapper in the cycss
:param network: dict containing the network
:return: Cytoscape.js compatible network with depth information for nodes added
"""
colored_network = deepcopy(network)
colors = ["#3CE500", "#B7D800", "#CB7300", "#BF0003"]
for node in colored_network["nodes"]:
if "data" in node.keys() and "depth" in node["data"].keys():
node["data"]["depth_color"] = colors[node["data"]["depth"]]
return colored_network
@staticmethod
def connect_homologs(network):
"""
Connects homologous (or orthologous) genes using a dashed edge. Requires a cytoscape.js compatible network as
input and will return a network with homologs connected. Note that gene families need to be present in the
network *before* applying this function. (e.g. using add_family_data_nodes in this class)
:param network:
:return:
"""
connected_network = deepcopy(network)
"""
Add edges between homologous genes from different targets, family_id needs to be specified !
"""
for i in range(len(connected_network['nodes']) - 1):
for j in range(i + 1, len(connected_network['nodes'])):
if connected_network['nodes'][i]['data']['family_id'] == connected_network['nodes'][j]['data']['family_id'] and connected_network['nodes'][i]['data']['family_id'] is not None:
connected_network['edges'].append({
'data': {'source': connected_network['nodes'][i]['data']['id'],
'target': connected_network['nodes'][j]['data']['id'],
'color': "#33D",
'homology_color': "#33D",
'edge_type': 'homology',
'ecc_pair_color': "#33D",
'homology': True}
})
return connected_network
@staticmethod
def add_connection_data_nodes(network):
"""
A data to cytoscape compatible network's nodes based on the number of edges that node possesses
:param network: dict containing the network
:return: Cytoscape.js compatible network with connectivity information for nodes added
"""
colored_network = deepcopy(network)
for node in colored_network["nodes"]:
if "data" in node.keys() and "id" in node["data"].keys():
probe = node["data"]["id"]
neighbors = 0
for edge in colored_network["edges"]:
if "data" in edge.keys() and "source" in edge["data"].keys() and "target" in edge["data"].keys():
if probe == edge["data"]["source"] or probe == edge["data"]["target"]:
neighbors += 1
node["data"]["neighbors"] = neighbors
return colored_network
@staticmethod
def add_species_data_nodes(network):
"""
Colors nodes in a cytoscape compatible network (dict) based on species
:param network: dict containing the network
:return: Cytoscape.js compatible network with depth information for edges added
"""
colors = {s.id: s.color for s in Species.query.all()}
colored_network = deepcopy(network)
for node in colored_network["nodes"]:
if "data" in node.keys() and "species_id" in node["data"].keys():
node["data"]["species_color"] = colors[node["data"]["species_id"]]
return colored_network
@staticmethod
def add_cluster_data_nodes(network, cluster_method_id):
"""
Adds co-expression cluster information to a cytoscape compatible network (dict)
:param network: dict containing the network
:param cluster_method_id: internal id for the clustering method to use
:return: Network dict completed with cluster info
"""
colored_network = deepcopy(network)
probes = [node['data']['id'] for node in colored_network['nodes'] if 'id' in node['data']]
sequence_cluster_ass = SequenceCoexpressionClusterAssociation.query.filter(SequenceCoexpressionClusterAssociation.probe.in_(probes))\
.filter(SequenceCoexpressionClusterAssociation.coexpression_cluster.has(method_id=cluster_method_id)).all()
data = {}
for sca in sequence_cluster_ass:
data[sca.probe] = {}
data[sca.probe]['cluster_id'] = sca.coexpression_cluster_id
data[sca.probe]['cluster_name'] = sca.coexpression_cluster.name
color_shapes = family_to_shape_and_color({p: [v['cluster_name']] for p, v in data.items()})
for node in colored_network["nodes"]:
if node['data']['id'] in data.keys():
node['data']['cluster_id'] = data[node['data']['id']]['cluster_id']
node['data']['cluster_name'] = data[node['data']['id']]['cluster_name']
node['data']['cluster_url'] = url_for('expression_cluster.expression_cluster_view', cluster_id=node['data']['cluster_id'])
if node['data']['id'] in color_shapes.keys():
node['data']['cluster_color'] = color_shapes[node['data']['id']][1]
node['data']['cluster_shape'] = color_shapes[node['data']['id']][0]
return colored_network
@staticmethod
def add_specificity_data_nodes(network, specificity_method_id):
"""
Adds profile specificity information to a cytoscape compatible network (dict)
:param network: dict containing the network
:param specificity_method_id: specificity method which should be used
:return: Network dict completed with cluster info
"""
colored_network = deepcopy(network)
probes = [node['data']['id'] for node in colored_network['nodes'] if 'id' in node['data']]
spm = ExpressionSpecificity.query.filter(ExpressionSpecificity.method_id == specificity_method_id).filter(ExpressionSpecificity.profile.has(ExpressionProfile.probe.in_(probes))).all();
data = {}
for s in spm:
if s.profile.probe in data.keys():
if s.score > data[s.profile.probe]['score']:
data[s.profile.probe]['score'] = s.score
data[s.profile.probe]['condition'] = s.condition
else:
data[s.profile.probe] = {}
data[s.profile.probe]['score'] = s.score
data[s.profile.probe]['condition'] = s.condition
color_shapes = family_to_shape_and_color({p: [v['condition']] for p, v in data.items()})
for node in colored_network["nodes"]:
if node['data']['id'] in data.keys():
node['data']['spm_score'] = data[node['data']['id']]['score']
node['data']['spm_condition'] = data[node['data']['id']]['condition']
if node['data']['id'] in color_shapes.keys():
node['data']['spm_condition_color'] = color_shapes[node['data']['id']][1]
node['data']['spm_condition_shape'] = color_shapes[node['data']['id']][0]
return colored_network
@staticmethod
def add_depth_data_edges(network):
"""
Colors a cytoscape compatible network (dict) based on edge depth
This function is no longer used as it has been replaced by a mapper in the cycss
:param network: dict containing the network
:return: Cytoscape.js compatible network with depth information for edges added
"""
colored_network = deepcopy(network)
colors = ["#3CE500", "#B7D800", "#CB7300", "#BF0003"]
for edge in colored_network["edges"]:
if "data" in edge.keys() and "depth" in edge["data"].keys():
edge["data"]["depth_color"] = colors[edge["data"]["depth"]]
return colored_network
@staticmethod
def prune_unique_lc(network):
"""
Remove genes from network that have a unique label (label co-occ.). Requires a Cytoscape.js compatible network
and will return a purned copy in the same format. Note that label co-occ. need to be present in the
network *before* applying this function. (e.g. using add_lc_data_nodes in this class)
:param network: dict containing the network
:return: Cytoscape.js compatible network with the pruned network
"""
lc_labels = []
for node in network["nodes"]:
if 'lc_label' in node['data'].keys():
lc_labels.append(node['data']['lc_label'])
lc_counter = Counter(lc_labels)
print(lc_counter)
pruned_network = {'nodes': [], 'edges': []}
good_nodes = []
for node in network['nodes']:
if 'lc_label' in node['data'].keys():
if lc_counter[node['data']['lc_label']] > 1:
good_nodes.append(node['data']['name'])
pruned_network['nodes'].append(deepcopy(node))
else:
good_nodes.append(node['data']['name'])
pruned_network['nodes'].append(deepcopy(node))
for edge in network['edges']:
if edge['data']['source'] in good_nodes and edge['data']['target'] in good_nodes:
pruned_network['edges'].append(deepcopy(edge))
return pruned_network
@staticmethod
def tag_ecc_singles(network):
"""
When comparing ECC pairs, genes without a homolog in the graph could be hidden, to this end these genes need
to be tagged so javascript can handle this.
:param network: input network
:return: network with singles tagged
"""
output_network = deepcopy(network)
# Find Query genes, add hideable tag to everything except queries
queries = []
for n in output_network['nodes']:
if n['data']['node_type'] == 'query' and n['data']['name'] not in queries:
queries.append(n['data']['name'])
n['data']['tag'] = 'always_show'
else:
n['data']['tag'] = 'hideable'
# Store neighborhoods
neighborhoods = {q: [] for q in queries}
for e in output_network['edges']:
if e['data']['source'] in queries:
if e['data']['target'] not in queries:
neighborhoods[e['data']['source']].append(e['data']['target'])
elif e['data']['target'] in queries:
if e['data']['source'] not in queries:
neighborhoods[e['data']['target']].append(e['data']['source'])
# adjust tags on genes that should be shown (shared neighborhood)
# Check for genes present in both neighborhoods (intra species comparisons)
for n in output_network['nodes']:
counter = 0
for k in queries:
if n['data']['name'] in neighborhoods[k]:
counter += 1
if counter > 1:
n['data']['tag'] = 'always_show'
# Check homology edges
genes_to_show = []
for e in output_network['edges']:
if 'homology' in e['data'].keys() and e['data']['homology']:
counter = 0
for k in queries:
if e['data']['source'] in neighborhoods[k] or e['data']['target'] in neighborhoods[k]:
counter += 1
if counter > 1:
genes_to_show.append(e['data']['source'])
genes_to_show.append(e['data']['target'])
for n in output_network['nodes']:
if n['data']['name'] in genes_to_show:
n['data']['tag'] = 'always_show'
return output_network
@staticmethod
def merge_networks(network_one, network_two):
"""
Function to merge two networks. A compound/parent node is created for each network and based on the family_id,
edges between homologous/orthologous genes are added.
Note that label co-occurrences need to be (re-)calculated on the merged network
:param network_one: Dictionary (cytoscape.js structure) of the first network
:param network_two: Dictionary (cytoscape.js structure) of the second network
:return: Cytoscape.js compatible network with both networks merged and homologs/orthologs connected
"""
nodes = []
edges = network_one['edges'] + network_two['edges']
nodes.append({"data": {"id": "compound_node_one", "compound": True, "color": "#BEF"}})
nodes.append({"data": {"id": "compound_node_two", "compound": True, "color": "#BEF"}})
for node in network_one["nodes"]:
node["data"]["parent"] = "compound_node_one"
nodes.append(node)
for node in network_two["nodes"]:
node["data"]["parent"] = "compound_node_two"
nodes.append(node)
# draw edges between nodes from different networks
# TODO: optimize this to avoid nested loop
for node_one in network_one["nodes"]:
for node_two in network_two["nodes"]:
# if nodes are from the same family add an edge between them
if node_one["data"]["family_id"] is not None \
and node_one["data"]["family_id"] == node_two["data"]["family_id"]:
edges.append({'data': {'source': node_one["data"]["id"],
'target': node_two["data"]["id"],
'color': "#33D",
'homology': True}})
return {'nodes': nodes, 'edges': edges}
@staticmethod
def get_families(network):
"""
Extracts gene families from a cytoscape.js compatible network object
:param network: network to extract families from
:return: List of all families that occur in the network
"""
return [f["data"]["family_name"] for f in network["nodes"] if 'data' in f.keys() and
'family_name' in f["data"].keys() and
f["data"]["family_name"] is not None]