diff --git a/setup.py b/setup.py index 1875429..2dc9383 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ def readme(): return f.read() setup(name='uropa', - version='3.0.0', + version='3.0.1', description='UROPA is a command line based tool, intended for genomic region annotation', long_description=readme(), url='https://github.molgen.mpg.de/loosolab/UROPA', diff --git a/uropa/annotation.py b/uropa/annotation.py index aa9b4fb..57a70bb 100644 --- a/uropa/annotation.py +++ b/uropa/annotation.py @@ -142,7 +142,8 @@ def annotate_peaks(peaks, gtf_gz, gtf_index, cfg_dict, logger=None): try: hits = tabix_obj.fetch(peak["peak_chr"], extend_start, extend_end, parser=pysam.asGTF()) except ValueError: - logger.error() + print("Could not fetch any hits for tabix {0}:{1}-{2}. Continueing.".format(peak["gtf_chr"], extend_start, extend_end)) + continue #Go through each hit from tabix and establish whether they are valid for queries valid_annotations = [] diff --git a/uropa/uropa.py b/uropa/uropa.py index 15a96da..f83e450 100644 --- a/uropa/uropa.py +++ b/uropa/uropa.py @@ -76,11 +76,11 @@ def main(): additional.add_argument("-o", "--outdir", metavar="", help="Output directory for output files (default: current dir)", default=".") #additional.add_argument("-r","--reformat", help="create an additional compact and line-reduced table as result file", action="store_true") additional.add_argument("-s","--summary", help="Filename of additional visualisation of results in graphical format", action="store_true") - additional.add_argument("-t","--threads", help="Multiprocessed run: n = number of threads to run annotation process", type=int, action="store",metavar="n",default=1) + additional.add_argument("-t","--threads", help="Multiprocessed run: n = number of threads to run annotation process", type=int, action="store", metavar="n", default=1) #additional.add_argument("--add-comments",help="add comment lines to output files", action="store_true") additional.add_argument("-l","--log", help="Log file name for messages and warnings (default: log is written to stdout)", action="store", metavar="uropa.log") additional.add_argument("-d","--debug",help="Print verbose messages (for debugging)", action="store_true") - additional.add_argument("-v","--version",help="Prints the version and exits", action="version",version="%(prog)s 3.0.0") + additional.add_argument("-v","--version",help="Prints the version and exits", action="version",version="%(prog)s 3.0.1") args = parser.parse_args() #Write help if no input was given @@ -264,12 +264,18 @@ def main(): gtf_feat_count = {} with open(cfg_dict["gtf"]) as f: for line in f: - columns = line.rstrip().split("\t") - feature = columns[2] - - if feature not in gtf_feat_count: - gtf_feat_count[feature] = 0 - gtf_feat_count[feature] += 1 + if not line.startswith("#"): + columns = line.rstrip().split("\t") + + if len(columns) < 9: + logger.error("Input GTF ({0}) has less than 9 columns - please check that the file has the correct GTF format.".format(cfg_dict["gtf"])) + sys.exit() + + feature = columns[2] + + if feature not in gtf_feat_count: + gtf_feat_count[feature] = 0 + gtf_feat_count[feature] += 1 gtf_feat = list(gtf_feat_count.keys()) logger.debug("Features in gtf: {0}".format(gtf_feat_count)) @@ -304,7 +310,6 @@ def main(): #Compress and index using gzip/tabix logger.debug("Tabix compress") - anno_gtf_gz = output_prefix + ".gtf.gz" anno_gtf_index = anno_gtf_gz + ".tbi" pysam.tabix_compress(anno_gtf, anno_gtf_gz, force=True)