diff --git a/find_exons.py b/find_exons.py index 2a71d1a..2c33246 100644 --- a/find_exons.py +++ b/find_exons.py @@ -42,7 +42,7 @@ def parse_args(): required_arguments = parser.add_argument_group('required arguments') required_arguments.add_argument('--gtf_genes', help='a .gtf file with genes', required=True) required_arguments.add_argument('--gtf_exons', help='a .gtf file with exons', required=True) - required_arguments.add_argument('--genes_of_interest', nargs='*', dest='genes', help='enter a list of genes of interest or type file and enter a path to the file with genes of interest with the optional argument --genes_file', default=['file'], required=True) + required_arguments.add_argument('--genes_of_interest', nargs='*', dest='genes_of_interest', help='a .txt file or a list with genes one is interested in', required=True) #all other arguments are optional parser.add_argument('--genes_file', help='names of genes of interest') @@ -81,32 +81,32 @@ def check_existing_input_files(args): print('please make sure the .gtf file with exons exists') sys.exit() - if len(args.genes) == 1 and args.genes[0] == "file": - if args.genes_file != None: - if not os.path.isfile(args.genes_file): - print('please make sure the file with genes of interest exists') - sys.exit() - else: - #there is a file with list of genes. Extract them and return in an array - genes = [] - with open(args.genes_file) as genes_file: - for line in genes_file: - line_array = re.split(r'\t', line.rstrip('\n')) - for gene in line_array: - if ' ' in gene: - print('please make sure that genes in the file with genes of interest are separated with tabs or are written each on a new line') - sys.exit() - else: - genes.append(gene) - - genes_file.close() - args.genes = genes - + if len(args.genes_of_interest) == 1: + if not os.path.isfile(args.genes_of_interest[0]): + print('working with one gene of interest') + genes_of_interest = args.genes_of_interest else: - print('please provide a path to a file with genes of interest') - sys.exit() + print('working with file of genes') + #there is a file with list of genes. Extract them and return in an array + genes = [] + with open(args.genes_of_interest[0]) as genes_file: + for line in genes_file: + line_array = re.split(r'\t', line.rstrip('\n')) + for gene in line_array: + if ' ' in gene: + print('please make sure that genes in the file with genes of interest are separated with tabs or are written each on a new line') + sys.exit() + else: + genes.append(gene) + + genes_file.close() + genes_of_interest = genes + + else: #there is a list of genes of interest + print('working with a list of genes') + genes_of_interest = args.genes_of_interest - return args.genes + return genes_of_interest def procede_gtf_files(gtf_genes, gtf_exons, genes_of_interest, exact): @@ -314,7 +314,7 @@ def plot_and_output(all_genes, output_directory, gene_groups): score = "%.2f" % round((sum_length * 100)/all_genes[gene]['gene_length'], 2) gene_transcript_name = gene + "_" + transcript_name genes_with_transcript_names[gene_transcript_name] = genes_with_transcript_names.get(transcript_name, {}) - genes_with_transcript_names[gene_transcript_name] = {'gene': gene, 'sum_length': sum_length, 'gene_length': all_genes[gene]['gene_length'], 'score': score} + genes_with_transcript_names[gene_transcript_name] = {'gene': gene, 'sum_length': sum_length, 'gene_length': all_genes[gene]['gene_length'], 'score': score} #anzahl von exons transcript_name = sorted_exons[i][5] sum_length = sorted_exons[i][0] else: #this is still the same transcript name