diff --git a/find_exons.py b/find_exons.py index ece8f8f..dd87554 100644 --- a/find_exons.py +++ b/find_exons.py @@ -39,9 +39,10 @@ def parse_args(): required_arguments = parser.add_argument_group('required arguments') required_arguments.add_argument('--gtf_genes', help='a .gtf file with genes', required=True) required_arguments.add_argument('--gtf_exons', help='a .gtf file with exons', required=True) - required_arguments.add_argument('--gene_list', help='names of genes of interest', required=True) + required_arguments.add_argument('--genes_of_interest', nargs='*', dest='genes', help='enter a list of genes of interest or type file and enter a path to the file with genes of interest with the optional argument --genes_file', default=['file'], required=True) #all other arguments are optional + parser.add_argument('--genes_file', help='names of genes of interest') parser.add_argument('--output_directory', default='output', const='output', nargs='?', help='output directory, by default ./output/') parser.add_argument('--silent', action='store_true', help='while working with data write the information only into ./call_peaks_log.txt') args = parser.parse_args() @@ -67,18 +68,39 @@ def get_name_from_path(full_path): def check_existing_input_files(args): if not os.path.isfile(args.gtf_genes): - #logger.info('please make sure the both files with conditions to compare exist') print('please make sure the .gtf file with genes exists') sys.exit() if not os.path.isfile(args.gtf_exons): - #logger.info('please make sure the both files with conditions to compare exist') print('please make sure the .gtf file with exons exists') sys.exit() - if not os.path.isfile(args.gene_list): - print('please make sure the file with genes of interest exists') - sys.exit() + if len(args.genes) == 1 and args.genes[0] == "file": + if args.genes_file != None: + if not os.path.isfile(args.genes_file): + print('please make sure the file with genes of interest exists') + sys.exit() + else: + #there is a file with list of genes. Extract them and return in an array + genes = [] + with open(args.genes_file) as genes_file: + for line in genes_file: + line_array = re.split(r'\t', line.rstrip('\n')) + for gene in line_array: + if ' ' in gene: + print('please make sure that genes in the file with genes of interest are separated with tabs or are written each on a new line') + sys.exit() + else: + genes.append(gene) + + genes_file.close() + args.genes = genes + + else: + print('please provide a path to a file with genes of interest') + sys.exit() + + return args.genes def main(): @@ -86,7 +108,7 @@ def main(): args = parse_args() - check_existing_input_files(args) + args.genes = check_existing_input_files(args) #check if there is an existing directory that user gave as input, otherwise create this directory from the path provided from the user check_directory(args.output_directory)