Skip to content
This repository has been archived by the owner. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
0.3.0: Getting ready for pypi release
  • Loading branch information
msbentsen committed Mar 17, 2019
1 parent eecc24b commit dc0d9bd
Show file tree
Hide file tree
Showing 18 changed files with 387 additions and 439 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
@@ -1,2 +1,3 @@
include README.md
include LICENSE
recursive-include tobias *.c
123 changes: 79 additions & 44 deletions setup.py
@@ -1,49 +1,84 @@
import os
import sys
import re
from setuptools import setup, Extension
import numpy as np

def readme():
with open('README.md') as f:
return f.read()
#Test if numpy is installed
try:
import numpy as np
except:
sys.exit("ERROR: Numpy needed for TOBIAS installation. Numpy can be installed using the command \"pip install numpy\"")

cmdclass = {}

#Add cython modules depending on the availability of cython
try:
from Cython.Distutils import build_ext
except ImportError:
use_cython = False
else:
use_cython = True

if use_cython:
ext_modules = [Extension("tobias.utils.ngs", ["tobias/utils/ngs.pyx"], include_dirs=[np.get_include()]),
Extension("tobias.utils.sequences", ["tobias/utils/sequences.pyx"], include_dirs=[np.get_include()]),
Extension("tobias.utils.signals", ["tobias/utils/signals.pyx"], include_dirs=[np.get_include()])]
cmdclass.update({'build_ext': build_ext})

else:
ext_modules = [Extension("tobias.utils.ngs", ["tobias/utils/ngs.c"], include_dirs=[np.get_include()]),
Extension("tobias.utils.sequences", ["tobias/utils/sequences.c"], include_dirs=[np.get_include()]),
Extension("tobias.utils.signals", ["tobias/utils/signals.c"], include_dirs=[np.get_include()])]

ext_modules = [Extension("tobias.utils.ngs", ["tobias/utils/ngs.pyx"], include_dirs=[np.get_include()]),
Extension("tobias.utils.sequences", ["tobias/utils/sequences.pyx"], include_dirs=[np.get_include()]),
Extension("tobias.utils.signals", ["tobias/utils/signals.pyx"], include_dirs=[np.get_include()])]
#Path of setup file to establish version
setupdir = os.path.abspath(os.path.dirname(__file__))

def find_version(init_file):
version_file = open(init_file).read()
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M)
if version_match:
return version_match.group(1)
else:
raise RuntimeError("Unable to find version string.")

def readme():
with open('README.md') as f:
return f.read()

setup(name='tobias',
version='0.2',
description='Transcription factor Occupancy prediction By Investigation of ATAC-seq Signal',
long_description=readme(),
url='https://github.molgen.mpg.de/loosolab/TOBIAS',
author='Mette Bentsen',
author_email='mette.bentsen@mpi-bn.mpg.de',
license='MIT',
packages=['tobias', 'tobias.footprinting', 'tobias.plotting', 'tobias.motifs', 'tobias.misc', 'tobias.utils'],
entry_points = {
'console_scripts': ['TOBIAS=tobias.TOBIAS:main']
},
install_requires=[
'setuptools_cython',
'numpy',
'scipy',
'pyBigWig',
'pysam',
'pybedtools',
'matplotlib>=2',
'scikit-learn',
'pandas',
'pypdf2',
'xlsxwriter',
'adjustText',
],
#dependency_links=['https://github.com/jhkorhonen/MOODS/tarball/master'],
classifiers = [
'License :: OSI Approved :: MIT License',
'Intended Audience :: Science/Research',
'Topic :: Scientific/Engineering :: Bio-Informatics',
'Programming Language :: Python :: 3'
],
zip_safe=False,
include_package_data=True,
ext_modules = ext_modules,
scripts=["tobias/utils/peak_annotation.sh"]
)
version=find_version(os.path.join(setupdir, "tobias", "__init__.py")), #get version from __init__.py
description='Transcription factor Occupancy prediction By Investigation of ATAC-seq Signal',
long_description=readme(),
url='https://github.molgen.mpg.de/loosolab/TOBIAS',
author='Mette Bentsen',
author_email='mette.bentsen@mpi-bn.mpg.de',
license='MIT',
packages=['tobias', 'tobias.footprinting', 'tobias.plotting', 'tobias.motifs', 'tobias.misc', 'tobias.utils'],
entry_points={
'console_scripts': ['TOBIAS=tobias.TOBIAS:main']
},
ext_modules=ext_modules,
cmdclass = cmdclass,
#dependency_links=['https://github.com/jhkorhonen/MOODS/releases/download/v1.9.3/MOODS-python-1.9.3.tar.gz#egg=MOODS-python-1.9.3'],
install_requires=[
'numpy',
'scipy',
'pysam',
'pybedtools',
'matplotlib>=2',
'scikit-learn',
'pandas',
'pypdf2',
'xlsxwriter',
'adjustText',
'pyBigWig',
],

classifiers=[
'License :: OSI Approved :: MIT License',
'Intended Audience :: Science/Research',
'Topic :: Scientific/Engineering :: Bio-Informatics',
'Programming Language :: Python :: 3'
],
zip_safe=True,
)
21 changes: 16 additions & 5 deletions tobias/TOBIAS.py
Expand Up @@ -9,6 +9,18 @@
"""

import sys

#Import extra dependencies
try:
import MOODS
except:
sys.exit("ERROR: Package MOODS is not installed and is needed by TOBIAS. You can install it using:\n" +
"$ wget https://github.com/jhkorhonen/MOODS/releases/download/v1.9.3/MOODS-python-1.9.3.tar.gz\n" +
"$ tar xzvf MOODS-python-1.9.3.tar.gz\n" +
"$ cd MOODS-python-1.9.3\n"
"$ python setup.py install")

#Import general
import argparse
from argparse import SUPPRESS
import textwrap
Expand All @@ -23,7 +35,7 @@

from tobias.motifs.tfbscan import *
from tobias.motifs.format_motifs import *
from tobias.motifs.cluster_tfbs import *
#from tobias.motifs.cluster_tfbs import *
from tobias.motifs.score_bed import *

from tobias.misc.subsample_bam import *
Expand All @@ -32,8 +44,7 @@
#from tobias.misc.create_network import *
from tobias.misc.log2table import *


TOBIAS_VERSION = "0.2" #<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Change here :-)
from tobias import __version__ as TOBIAS_VERSION

def main():

Expand All @@ -43,14 +54,14 @@ def main():
{
"ATACorrect":{"help":"Correct reads with regards to Tn5 sequence bias", "add_arguments": add_atacorrect_arguments, "function":run_atacorrect},
"FootprintScores":{"help":"Calculate footprint scores from cutsites", "add_arguments": add_footprint_arguments, "function":run_footprinting, "space":"\t"},
"BINDetect":{"help":"Detect TF binding from footprints", "add_arguments": add_bindetect_arguments, "function":run_bindetect},
"BINDetect":{"help":"Detect TF binding from footprints and motifs", "add_arguments": add_bindetect_arguments, "function":run_bindetect},
},

"Tools for working with motifs/TFBS":
{
"TFBScan": {"help":"Identify positions of TFBS given sequence and motifs", "add_arguments": add_tfbscan_arguments, "function": run_tfbscan},
"FormatMotifs": {"help": "Utility to deal with motif files", "add_arguments": add_formatmotifs_arguments, "function": run_formatmotifs},
"ClusterTFBS": {"help": "Cluster TFs based on overlap of sites", "add_arguments": add_clustering_arguments, "function": run_clustering},
#"ClusterTFBS": {"help": "Cluster TFs based on overlap of sites", "add_arguments": add_clustering_arguments, "function": run_clustering},
"ScoreBed": {"help":"Score .bed-file with signal from .bigwig-file(s)", "add_arguments": add_scorebed_arguments, "function": run_scorebed},
},

Expand Down
1 change: 1 addition & 0 deletions tobias/__init__.py
@@ -0,0 +1 @@
__version__ = "0.3.0"
31 changes: 2 additions & 29 deletions tobias/footprinting/ATACorrect.py
Expand Up @@ -30,7 +30,6 @@
from collections import OrderedDict
import logging
import itertools
from scipy.optimize import curve_fit
from matplotlib.backends.backend_pdf import PdfPages

#Bio-specific packages
Expand Down Expand Up @@ -116,11 +115,6 @@ def run_atacorrect(args):
if args.peaks == None:
sys.exit("Error: No .peaks-file given")

#Adjust input files to full path
#args.bam = os.path.abspath(args.bam)
#args.genome = os.path.abspath(args.genome)
#args.peaks = os.path.abspath(args.peaks) if args.peaks != None else None

#Adjust some parameters depending on input
args.prefix = os.path.splitext(os.path.basename(args.bam))[0] if args.prefix == None else args.prefix
args.outdir = os.path.abspath(args.outdir) if args.outdir != None else os.path.abspath(os.getcwd())
Expand Down Expand Up @@ -168,31 +162,12 @@ def run_atacorrect(args):

logger.info("----- Processing input data -----")

#Todo: use TOBIAS functions

#Input test
logger.debug("Testing input file availability")
file_list = [args.bam, args.genome, args.peaks]
file_list = [file for file in file_list if file != None] #some files can be None depending on choice
for path in file_list:
if not os.path.exists(path):
logger.error("\nError: {0} does not exist.".format(path))
sys.exit(1)
check_files([args.bam, args.genome, args.peaks], "r")

logger.debug("Testing output directory/file writeability")
make_directory(args.outdir)
if not os.access(args.outdir, os.W_OK):
logger.error("Error: {0} does not exist or is not writeable.".format(args.outdir))
sys.exit(1)

#Output write test
for path in output_files[:-1]: #Do not include log-file as this is managed by logger
if path == None:
continue
if os.path.exists(path):
if not os.access(path, os.W_OK):
logger.error("Error: {0} could not be opened for writing.".format(path))
sys.exit(1)
check_files(output_files, "w")

#Open pdf for figures
figure_pdf = PdfPages(figures_f, keep_empty=True)
Expand Down Expand Up @@ -368,10 +343,8 @@ def run_atacorrect(args):
logger.info("Finalizing bias motif for scoring")
for strand in strands:
bias_obj.bias[strand].prepare_mat()

figure_pdf.savefig(plot_pssm(bias_obj.bias[strand].pssm, "Tn5 insertion bias of reads ({0})".format(strand)))


#----------------------------------------------------------------------------------------------------#
# Correct read bias and write to bigwig
#----------------------------------------------------------------------------------------------------#
Expand Down
3 changes: 1 addition & 2 deletions tobias/footprinting/ATACorrect_functions.py
Expand Up @@ -16,7 +16,6 @@
import multiprocessing as mp
import time
from datetime import datetime

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from scipy.optimize import curve_fit
Expand Down Expand Up @@ -135,7 +134,7 @@ def bias_estimation(regions_list, params):
for read in read_lst_strand[strand]:
if read.cigartuples is not None:
first_tuple = read.cigartuples[-1] if read.is_reverse else read.cigartuples[0]
if first_tuple[0] == 0 and first_tuple[1] > params.k_flank + max(np.abs(params.read_shift)):
if first_tuple[0] == 0 and first_tuple[1] > params.k_flank + max(np.abs(params.read_shift)): #Only include non-clipped reads
read_per_pos[read.cutsite] = read_per_pos.get(read.cutsite, []) + [read]

for cutsite in read_per_pos:
Expand Down

0 comments on commit dc0d9bd

Please sign in to comment.