From 51540f57a89d12174c93f214886ebdffe73a3be3 Mon Sep 17 00:00:00 2001 From: anastasiia Date: Tue, 18 Dec 2018 13:24:13 +0100 Subject: [PATCH] fixing the bug. now all the genes will be converted to lowercase to exclude the problems with input files --- visualize.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/visualize.py b/visualize.py index 6b6c973..32ddc02 100644 --- a/visualize.py +++ b/visualize.py @@ -156,10 +156,13 @@ def read_correlation(correlation_file, groups, variable): for gene_name in genes_of_interest: - genes_to_show[gene_name] = genes_to_show.get(gene_name, []) #get a place in the dictionary for this gene + orig_gene_name = gene_name.lower() + + genes_to_show[orig_gene_name] = genes_to_show.get(orig_gene_name, []) #get a place in the dictionary for this gene #first look for the genes in the gtf_genes file genes_family = subprocess.getoutput("grep -i " + gene_name + " " + correlation_file) + print(genes_family) if len(genes_family) != 0: logger.info("working with the gene " + gene_name) @@ -168,16 +171,20 @@ def read_correlation(correlation_file, groups, variable): for genes_line in re.split(r'\n', genes_family): genes_array = re.split(r'\t', genes_line) - if genes_array[0] == gene_name: + + check_gene_name = genes_array[0].lower() + + print(genes_array) + if check_gene_name == orig_gene_name: #this is a perfect match <3 - genes_to_show[gene_name].append(float(genes_array[3])) #append score to the array - elif genes_array[0].startswith(gene_name) and len(genes_array[0]) <= len(gene_name) + variable: #the found gene is by default 1 char longer than the gene name - if genes_array[0] in genes_to_show.keys(): - genes_to_show[genes_array[0]].append(float(genes_array[3])) + genes_to_show[check_gene_name].append(float(genes_array[3])) #append score to the array + elif check_gene_name.startswith(orig_gene_name) and len(genes_array[0]) <= len(gene_name) + variable: #the found gene is by default 1 char longer than the gene name + if check_gene_name in genes_to_show.keys(): + genes_to_show[check_gene_name].append(float(genes_array[3])) else: #this gene is not yet saved #make a new place in the dictionary for this gene - genes_to_show[genes_array[0]] = genes_to_show.get(genes_array[0], []) - genes_to_show[genes_array[0]].append(float(genes_array[3])) + genes_to_show[check_gene_name] = genes_to_show.get(check_gene_name, []) + genes_to_show[check_gene_name].append(float(genes_array[3])) else: logger.info("no similar genes to " + gene_name + " were found") @@ -186,6 +193,7 @@ def read_correlation(correlation_file, groups, variable): genes_to_show = dict([(k, v) for k, v in genes_to_show.items() if len(v) > 0]) genes_to_show_groups.append(genes_to_show) + print(genes_to_show_groups) return genes_to_show_groups def main():