Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
4fad16beb9
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
61 lines (50 sloc) 1.89 KB
import copy
import sys
from Graph import Graph
import find_components
import functions
if len(sys.argv) != 3:
print("You need to give the following arguments <input_gfa> <k>")
sys.exit(0)
print("reading files")
new_graph = Graph()
new_graph.read_gfa(sys.argv[1])
print("deep copying")
new_graph.k = int(sys.argv[2])
new_graph1 = copy.deepcopy(new_graph)
# hacky stuff I need to remove later, just have it for testing
print("finding bubbles")
new_graph1.find_bubbles()
print("removing chains then finding components")
functions.remove_chains(new_graph1)
con_comp1 = find_components.connected_components(new_graph)
nodes_to_remove_from_original = []
new_graph1.nodes_reset()
for cc in con_comp1:
if len(cc) == 1:
nodes_to_remove_from_original.append(cc[0])
print(new_graph1)
print("removing singular CCs from original graph")
functions.remove_nodes(new_graph, nodes_to_remove_from_original)
new_graph.nodes_reset()
print("finding bubbles in original graph")
new_graph.find_bubbles()
print(new_graph)
print("Done finding bubbles")
longest = new_graph.max_chain()
longest_chain = new_graph.longest_chain()
print("Longest chain bubble-wise has {} bubbles and length"
" of {}\n".format(longest[1], new_graph.bubble_chains[longest[0]].chain_length()))
print("Longest chain sequence-wise has {} bp and"
" {} bubbles\n".format(longest_chain[1], new_graph.bubble_chains[longest_chain[0]].length()))
# print(new_graph.bubble_chains[longest].list_chain())
print("The bubble chains covered {}% nodes in the graph".format(
(new_graph.nodes_in_chains() * 100) / len(new_graph.nodes))
)
total_seq = 0
for n in new_graph.nodes.values():
total_seq += n.seq_len
print("The percentage of sequences covered by chains is {}%".format(
((new_graph.chains_coverage() - (new_graph.nodes_in_chains() * float(new_graph.k))) * 100) / (
float(total_seq) - len(new_graph.nodes) * float(new_graph.k))
))