Skip to content

Commit

Permalink
Starting script
Browse files Browse the repository at this point in the history
  • Loading branch information
anastasiia authored Nov 14, 2018
1 parent 8c5d090 commit bc76b7a
Showing 1 changed file with 118 additions and 0 deletions.
118 changes: 118 additions & 0 deletions find_exons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@

"""
call_peaks uses the uncontinuous score from a bigWig file to estimate peaks
@author: Anastasiia Petrova
@contact: anastasiia.petrova(at)mpi-bn.mpg.de
"""

import argparse
import sys
import os
import re
import time
import multiprocessing
import logging
import subprocess
from Bio import SeqIO
import Bio.SeqIO.FastaIO as bio
import numpy as np
from collections import defaultdict
from scipy import stats
import pyBigWig
from statsmodels.sandbox.stats.multicomp import multipletests #for bonfferoni
import matplotlib.pyplot as plt
import random
import textwrap

logger = logging.getLogger('find_exons')
logger.setLevel(logging.INFO)

formatter = logging.Formatter("%(asctime)s : %(message)s", "%Y-%m-%d %H:%M")

def parse_args():

parser = argparse.ArgumentParser(prog = '', description = textwrap.dedent('''
This script produces plots and a tsv-file to show the number of exons for the list of genes of interest.
'''), epilog='That is what you need to make this script work for you. Enjoy it')

required_arguments = parser.add_argument_group('required arguments')
required_arguments.add_argument('--gtf_genes', help='a .gtf file with genes', required=True)
required_arguments.add_argument('--gtf_exons', help='a .gtf file with exons', required=True)
required_arguments.add_argument('--gene_list', help='names of genes of interest', required=True)

#all other arguments are optional
parser.add_argument('--output_directory', default='output', const='output', nargs='?', help='output directory, by default ./output/')
parser.add_argument('--silent', action='store_true', help='while working with data write the information only into ./call_peaks_log.txt')
args = parser.parse_args()

return args

def check_directory(directory):
if not os.path.exists(directory):
os.makedirs(directory)
#logger.info('a new directory ' + directory + ' was created')
print('a new directory ' + directory + ' was created')

def remove_file(file):
if os.path.isfile(file):
os.remove(file)

def make_name_from_path(full_path, output_directory, ending):
return os.path.join(output_directory, get_name_from_path(full_path) + ending)

def get_name_from_path(full_path):
return os.path.splitext(os.path.basename(full_path))[0]

def check_existing_input_files(args):

if not os.path.isfile(args.gtf_genes):
#logger.info('please make sure the both files with conditions to compare exist')
print('please make sure the .gtf file with genes exists')
sys.exit()

if not os.path.isfile(args.gtf_exons):
#logger.info('please make sure the both files with conditions to compare exist')
print('please make sure the .gtf file with exons exists')
sys.exit()

if not os.path.isfile(args.gene_list):
print('please make sure the file with genes of interest exists')
sys.exit()

def main():

start = time.time()

args = parse_args()

check_existing_input_files(args)

#check if there is an existing directory that user gave as input, otherwise create this directory from the path provided from the user
check_directory(args.output_directory)

fh = logging.FileHandler(os.path.join(args.output_directory, "find_exons_log.txt"))
fh.setLevel(logging.INFO)
fh.setFormatter(formatter)
logger.addHandler(fh)

ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)

#if user do not want to see the information about the status of jobs, remove the handler, that writes to the terminal
if args.silent:
logger.removeHandler(ch)

logger.info("find_exons.py was called using these parameters:")
logger.info(vars(args))

#logger.info("find_exons needed %s seconds to generate the output" % (time.time() - start))

for handler in logger.handlers:
handler.close()
logger.removeFilter(handler)

if __name__ == "__main__":
main()

0 comments on commit bc76b7a

Please sign in to comment.