Skip to content

Commit

Permalink
writing the output file
Browse files Browse the repository at this point in the history
  • Loading branch information
anastasiia committed Nov 15, 2018
1 parent e7f7d5d commit bd040a2
Showing 1 changed file with 34 additions and 10 deletions.
44 changes: 34 additions & 10 deletions find_exons.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ def get_name_from_path(full_path):

def check_existing_input_files(args):

print('checking the input files')

if not os.path.isfile(args.gtf_genes):
print('please make sure the .gtf file with genes exists')
sys.exit()
Expand Down Expand Up @@ -106,6 +108,8 @@ def check_existing_input_files(args):

def procede_gtf_files(gtf_genes, gtf_exons, genes_of_interest):

logger.info("looking for genes of interest in gtf files")

all_genes = {}

for gene_name in genes_of_interest:
Expand Down Expand Up @@ -151,11 +155,6 @@ def procede_gtf_files(gtf_genes, gtf_exons, genes_of_interest):
if gene_full_name in all_genes.keys():
all_genes[gene_full_name]['exons'].append([exon_length, exon_number, tsl])

"""
for gene in all_genes:
print(gene, all_genes[gene])
print()
"""
return all_genes

def make_plot(x_array, y_array, gene_names, output_directory, figure_name, color_name):
Expand All @@ -179,6 +178,13 @@ def make_plot(x_array, y_array, gene_names, output_directory, figure_name, color

def plot_and_output(all_genes, output_directory):

logger.info("preparing for plotting")

gene_names = []
gene_lengths = []
exons_lengths = []
scores = []

#find the sum length of exons for each gene
for gene in all_genes:
#first sort the exons array
Expand All @@ -187,17 +193,35 @@ def plot_and_output(all_genes, output_directory):

check_exon = 1
sum_length = 0
last_exon = sorted_exons[-1][1]
last_exon = sorted_exons[-1][1]

while check_exon <= last_exon:
if sorted_exons[0][1] == check_exon:
print(check_exon)
sum_length = sum_length + sorted_exons[0][0]
print(sum_length)
sorted_exons = [x for x in sorted_exons if x[1] != check_exon]
print(sorted_exons)
sorted_exons = [x for x in sorted_exons if x[1] != check_exon] #cut the current sorted_exons array so that there are no occurences of the current exon there
check_exon = check_exon + 1

score = "%.2f" % round((sum_length * 100)/all_genes[gene]['gene_length'], 2)

gene_names.append(gene)
gene_lengths.append(all_genes[gene]['gene_length'])
exons_lengths.append(sum_length)
scores.append(score)

logger.info("writing the output file")

#now write an output file containing names of genes, their length, the sum length of exons and the prozent of exons in the gene length as score
output_file = open(os.path.join(output_directory, "genes_exons_correlation.txt"), 'w')
header = ["gene_name", "exons_len_percentage", "gene_length", "exons_sum_length"]
output_file.write('\t'.join(header) + '\n') #write the header

for i in range(len(gene_names)):
output_file.write('\t'.join([gene_names[i], str(scores[i]), str(gene_lengths[i]), str(exons_lengths[i])]) + '\n')

output_file.close()




def main():

Expand Down

0 comments on commit bd040a2

Please sign in to comment.