From ad02c52dd3210554a24d26cf67c25122cfd8d6c7 Mon Sep 17 00:00:00 2001 From: anastasiia Date: Tue, 27 Nov 2018 10:41:15 +0100 Subject: [PATCH] to not loose some of the genes while looking for transcripts we are saving the combination of gene and transcript name --- find_exons.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/find_exons.py b/find_exons.py index 4c5da26..2a71d1a 100644 --- a/find_exons.py +++ b/find_exons.py @@ -312,8 +312,9 @@ def plot_and_output(all_genes, output_directory, gene_groups): if sorted_exons[i][5] != transcript_name or i == len(sorted_exons) - 1: #this is a new transcript or the last one #save the previous transcript_name score = "%.2f" % round((sum_length * 100)/all_genes[gene]['gene_length'], 2) - genes_with_transcript_names[transcript_name] = genes_with_transcript_names.get(transcript_name, {}) - genes_with_transcript_names[transcript_name] = {'gene': gene, 'sum_length': sum_length, 'gene_length': all_genes[gene]['gene_length'], 'score': score} + gene_transcript_name = gene + "_" + transcript_name + genes_with_transcript_names[gene_transcript_name] = genes_with_transcript_names.get(transcript_name, {}) + genes_with_transcript_names[gene_transcript_name] = {'gene': gene, 'sum_length': sum_length, 'gene_length': all_genes[gene]['gene_length'], 'score': score} transcript_name = sorted_exons[i][5] sum_length = sorted_exons[i][0] else: #this is still the same transcript name @@ -369,7 +370,7 @@ def plot_and_output(all_genes, output_directory, gene_groups): #print("i am saving this gene ", all_genes[gene_to_save]) """ #sorted_dict = sorted(dict_motifs_p_values.items(), key = lambda x : (x[1]['adjusted_p_value']), reverse = False) - + genes_with_transcript_names = sorted(genes_with_transcript_names.items(), key = lambda x: (x[1]['gene'], x[0])) logger.info("writing the output file") @@ -385,7 +386,7 @@ def plot_and_output(all_genes, output_directory, gene_groups): """ for transcript in genes_with_transcript_names: - output_file.write('\t'.join([transcript[1]['gene'], transcript[0], str(transcript[1]['score']),str(transcript[1]['gene_length']), str(transcript[1]['sum_length'])]) + '\n') + output_file.write('\t'.join([transcript[1]['gene'], transcript[0].replace(transcript[1]['gene'] + '_', ''), str(transcript[1]['score']),str(transcript[1]['gene_length']), str(transcript[1]['sum_length'])]) + '\n') output_file.close()