Skip to content

Commit

Permalink
fixing the bug. now all the genes will be converted to lowercase to e…
Browse files Browse the repository at this point in the history
…xclude the problems with input files
  • Loading branch information
anastasiia committed Dec 18, 2018
1 parent 94f32ca commit 51540f5
Showing 1 changed file with 16 additions and 8 deletions.
24 changes: 16 additions & 8 deletions visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,13 @@ def read_correlation(correlation_file, groups, variable):

for gene_name in genes_of_interest:

genes_to_show[gene_name] = genes_to_show.get(gene_name, []) #get a place in the dictionary for this gene
orig_gene_name = gene_name.lower()

genes_to_show[orig_gene_name] = genes_to_show.get(orig_gene_name, []) #get a place in the dictionary for this gene

#first look for the genes in the gtf_genes file
genes_family = subprocess.getoutput("grep -i " + gene_name + " " + correlation_file)
print(genes_family)

if len(genes_family) != 0:
logger.info("working with the gene " + gene_name)
Expand All @@ -168,16 +171,20 @@ def read_correlation(correlation_file, groups, variable):

for genes_line in re.split(r'\n', genes_family):
genes_array = re.split(r'\t', genes_line)
if genes_array[0] == gene_name:

check_gene_name = genes_array[0].lower()

print(genes_array)
if check_gene_name == orig_gene_name:
#this is a perfect match <3
genes_to_show[gene_name].append(float(genes_array[3])) #append score to the array
elif genes_array[0].startswith(gene_name) and len(genes_array[0]) <= len(gene_name) + variable: #the found gene is by default 1 char longer than the gene name
if genes_array[0] in genes_to_show.keys():
genes_to_show[genes_array[0]].append(float(genes_array[3]))
genes_to_show[check_gene_name].append(float(genes_array[3])) #append score to the array
elif check_gene_name.startswith(orig_gene_name) and len(genes_array[0]) <= len(gene_name) + variable: #the found gene is by default 1 char longer than the gene name
if check_gene_name in genes_to_show.keys():
genes_to_show[check_gene_name].append(float(genes_array[3]))
else: #this gene is not yet saved
#make a new place in the dictionary for this gene
genes_to_show[genes_array[0]] = genes_to_show.get(genes_array[0], [])
genes_to_show[genes_array[0]].append(float(genes_array[3]))
genes_to_show[check_gene_name] = genes_to_show.get(check_gene_name, [])
genes_to_show[check_gene_name].append(float(genes_array[3]))

else:
logger.info("no similar genes to " + gene_name + " were found")
Expand All @@ -186,6 +193,7 @@ def read_correlation(correlation_file, groups, variable):
genes_to_show = dict([(k, v) for k, v in genes_to_show.items() if len(v) > 0])
genes_to_show_groups.append(genes_to_show)

print(genes_to_show_groups)
return genes_to_show_groups

def main():
Expand Down

0 comments on commit 51540f5

Please sign in to comment.