This repository has been archived by the owner. It is now read-only.
Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
thesis_tm/remove_singular_components.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
61 lines (50 sloc)
1.89 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import copy | |
import sys | |
from Graph import Graph | |
import find_components | |
import functions | |
if len(sys.argv) != 3: | |
print("You need to give the following arguments <input_gfa> <k>") | |
sys.exit(0) | |
print("reading files") | |
new_graph = Graph() | |
new_graph.read_gfa(sys.argv[1]) | |
print("deep copying") | |
new_graph.k = int(sys.argv[2]) | |
new_graph1 = copy.deepcopy(new_graph) | |
# hacky stuff I need to remove later, just have it for testing | |
print("finding bubbles") | |
new_graph1.find_bubbles() | |
print("removing chains then finding components") | |
functions.remove_chains(new_graph1) | |
con_comp1 = find_components.connected_components(new_graph) | |
nodes_to_remove_from_original = [] | |
new_graph1.nodes_reset() | |
for cc in con_comp1: | |
if len(cc) == 1: | |
nodes_to_remove_from_original.append(cc[0]) | |
print(new_graph1) | |
print("removing singular CCs from original graph") | |
functions.remove_nodes(new_graph, nodes_to_remove_from_original) | |
new_graph.nodes_reset() | |
print("finding bubbles in original graph") | |
new_graph.find_bubbles() | |
print(new_graph) | |
print("Done finding bubbles") | |
longest = new_graph.max_chain() | |
longest_chain = new_graph.longest_chain() | |
print("Longest chain bubble-wise has {} bubbles and length" | |
" of {}\n".format(longest[1], new_graph.bubble_chains[longest[0]].chain_length())) | |
print("Longest chain sequence-wise has {} bp and" | |
" {} bubbles\n".format(longest_chain[1], new_graph.bubble_chains[longest_chain[0]].length())) | |
# print(new_graph.bubble_chains[longest].list_chain()) | |
print("The bubble chains covered {}% nodes in the graph".format( | |
(new_graph.nodes_in_chains() * 100) / len(new_graph.nodes)) | |
) | |
total_seq = 0 | |
for n in new_graph.nodes.values(): | |
total_seq += n.seq_len | |
print("The percentage of sequences covered by chains is {}%".format( | |
((new_graph.chains_coverage() - (new_graph.nodes_in_chains() * float(new_graph.k))) * 100) / ( | |
float(total_seq) - len(new_graph.nodes) * float(new_graph.k)) | |
)) |