Skip to content

Commit

Permalink
write each transcript with the corresponding gene in the output file
Browse files Browse the repository at this point in the history
  • Loading branch information
anastasiia committed Nov 27, 2018
1 parent 750209a commit 9e940f8
Showing 1 changed file with 26 additions and 26 deletions.
52 changes: 26 additions & 26 deletions find_exons.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def parse_args():
required_arguments = parser.add_argument_group('required arguments')
required_arguments.add_argument('--gtf_genes', help='a .gtf file with genes', required=True)
required_arguments.add_argument('--gtf_exons', help='a .gtf file with exons', required=True)
required_arguments.add_argument('--genes_of_interest', nargs='*', dest='genes', help='enter a list of genes of interest or type file and enter a path to the file with genes of interest with the optional argument --genes_file', default=['file'], required=True)
required_arguments.add_argument('--genes_of_interest', nargs='*', dest='genes_of_interest', help='a .txt file or a list with genes one is interested in', required=True)

#all other arguments are optional
parser.add_argument('--genes_file', help='names of genes of interest')
Expand Down Expand Up @@ -81,32 +81,32 @@ def check_existing_input_files(args):
print('please make sure the .gtf file with exons exists')
sys.exit()

if len(args.genes) == 1 and args.genes[0] == "file":
if args.genes_file != None:
if not os.path.isfile(args.genes_file):
print('please make sure the file with genes of interest exists')
sys.exit()
else:
#there is a file with list of genes. Extract them and return in an array
genes = []
with open(args.genes_file) as genes_file:
for line in genes_file:
line_array = re.split(r'\t', line.rstrip('\n'))
for gene in line_array:
if ' ' in gene:
print('please make sure that genes in the file with genes of interest are separated with tabs or are written each on a new line')
sys.exit()
else:
genes.append(gene)

genes_file.close()
args.genes = genes

if len(args.genes_of_interest) == 1:
if not os.path.isfile(args.genes_of_interest[0]):
print('working with one gene of interest')
genes_of_interest = args.genes_of_interest
else:
print('please provide a path to a file with genes of interest')
sys.exit()
print('working with file of genes')
#there is a file with list of genes. Extract them and return in an array
genes = []
with open(args.genes_of_interest[0]) as genes_file:
for line in genes_file:
line_array = re.split(r'\t', line.rstrip('\n'))
for gene in line_array:
if ' ' in gene:
print('please make sure that genes in the file with genes of interest are separated with tabs or are written each on a new line')
sys.exit()
else:
genes.append(gene)

genes_file.close()
genes_of_interest = genes

else: #there is a list of genes of interest
print('working with a list of genes')
genes_of_interest = args.genes_of_interest

return args.genes
return genes_of_interest

def procede_gtf_files(gtf_genes, gtf_exons, genes_of_interest, exact):

Expand Down Expand Up @@ -314,7 +314,7 @@ def plot_and_output(all_genes, output_directory, gene_groups):
score = "%.2f" % round((sum_length * 100)/all_genes[gene]['gene_length'], 2)
gene_transcript_name = gene + "_" + transcript_name
genes_with_transcript_names[gene_transcript_name] = genes_with_transcript_names.get(transcript_name, {})
genes_with_transcript_names[gene_transcript_name] = {'gene': gene, 'sum_length': sum_length, 'gene_length': all_genes[gene]['gene_length'], 'score': score}
genes_with_transcript_names[gene_transcript_name] = {'gene': gene, 'sum_length': sum_length, 'gene_length': all_genes[gene]['gene_length'], 'score': score} #anzahl von exons
transcript_name = sorted_exons[i][5]
sum_length = sorted_exons[i][0]
else: #this is still the same transcript name
Expand Down

0 comments on commit 9e940f8

Please sign in to comment.