Skip to content
This repository has been archived by the owner. It is now read-only.

Added fixes for two issues #5

Merged
merged 2 commits into from
Jan 27, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 18 additions & 17 deletions uropa.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@
# Configure logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

streamHandle = logging.StreamHandler()
loggerFormat = logging.Formatter('[%(levelname)s] - %(message)s')
streamHandle.setFormatter(loggerFormat)
Expand All @@ -114,20 +114,21 @@
else:
streamHandle.setLevel(logging.WARNING)
logger.addHandler(streamHandle)

if args.log is not None:
logpath = os.path.dirname(args.log)
if not os.path.exists(logpath):
try:
os.makedirs(logpath)
fileHandle = logging.FileHandler(args.log, mode='w')
fileHandle.setLevel(logging.DEBUG)
fileHandle.setFormatter(loggerFormat)
logger.addHandler(fileHandle)
except OSError:
logger.error("Could not create directory for log file {}".format(logpath))
except IOError:
logger.error("Could not create log file {}".format(args.log))
try:
fileHandle = logging.FileHandler(args.log, mode='w')
fileHandle.setLevel(logging.DEBUG)
fileHandle.setFormatter(loggerFormat)
logger.addHandler(fileHandle)
except IOError:
logger.error("Could not create log file {}".format(args.log))

logger.info("Start time: %s", datetime.datetime.now().strftime("%d.%m.%Y %H:%M"))

Expand Down Expand Up @@ -181,7 +182,7 @@
query_features = list(chain.from_iterable([q["feature"] for q in queries]))
feat_not_valid = [f for f in query_features if f not in gtf_feat]
feat_valid = [f for f in query_features if f not in feat_not_valid]

if feat_not_valid:
logger.warning("Invalid features present (found in a query but not in the GTF): %s", ','.join(feat_not_valid))

Expand All @@ -200,7 +201,7 @@
summ_dict["gtf"] = annot_gtf
summ_dict["bed"] = peaks_bed

with open("summary_config.json", "w") as fj:
with open(outdir+"summary_config.json", "w") as fj:
json.dump(summ_dict, fj, indent=4)

distances = reduce(list.__add__, map(lambda q: q["distance"], queries))
Expand All @@ -226,9 +227,9 @@
# Table-columns for All queries
query_attributes_none = list([q["show.attributes"] for q in queries])
query_attributes = [a for a in np.unique(list(chain.from_iterable(query_attributes_none))) if a is not None and a != 'None']

logger.info("Additional attributes included in output: %s", ','.join(query_attributes))

# query included, when there is no hit from tabix
nas_len = len(query_attributes) + 10
NAsList = list(np.repeat("NA", nas_len))
Expand All @@ -253,7 +254,7 @@
"genomic_location",
"feat_ovl_peak",
"peak_ovl_feat"]

header = "\t".join(header_base + query_attributes + ["query"])

outdir_len = len(outdir.split("/"))
Expand Down Expand Up @@ -321,7 +322,7 @@
allhits_partials = glob.glob(outdir + "AllHits_Partial*")
besthits_partials = glob.glob(outdir + "BestHits_Partial*")


ovls.write_final_file(
args.threads,
outdir,
Expand Down Expand Up @@ -441,11 +442,11 @@
logger.warning("Visualized summary output could not be created from %s.", ' '.join(call))
except OSError:
logger.warning("Rscript command not available for summary output.")
if os.path.exists(summary_output):
os.remove("summary_config.json")

# Remove all other un-necessary files :
# Remove all other un-necessary files :
if outputs_ready:
if os.path.exists(outdir+"summary_config.json"):
os.remove(outdir+"summary_config.json")
os.remove(gtf_index) # .gz
os.remove(gtf_index + ".tbi")
if not args.no_comments:
Expand Down
12 changes: 3 additions & 9 deletions uropa/overlaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,19 +222,14 @@ def get_distance_by_dir(inputD, genom_loc, intern_loc, Dhit):
"""Limits the hit by a distance window depending on peak's position relative-to-feature """

[D_upstr, D_downstr] = inputD
# D_upstr = D_downstr when len(input_D) = 1

if genom_loc == "upstream" or genom_loc == "overlapStart":
return Dhit <= D_upstr
if genom_loc == "downstream" or genom_loc == "overlapEnd":
return Dhit <= D_downstr

if any(intern_loc):
best_dist = map(lambda l: Dhit <= D_upstr if l ==
"upstream" else Dhit <= D_downstr, intern_loc)
# when 3 positions given: any Dhit inside limit-> hit accepted // if 1
# given any(True) =True
return any(best_dist)
# Rescue if internal peak
return(any(intern_loc))


def detect_internals(pstart, pend, hit_start, hit_end):
Expand Down Expand Up @@ -297,8 +292,7 @@ def define_genom_loc(current_loc, pstart, p_center, pend, hit_start, hit_end, hi

def overlap_peak_feature(genom_loc, pstart, p_center, pend, peakL, featL, hit_start, hit_end, hit_strand):
"""Gives the collective information of the location and overlap of a peak to a feature """

# A/ Find internal features/peaks
# A/ Find internal features/peaks
# hit_start, hit_end : as given in gtf/no inversion for '-'strand.
genom_loc = detect_internals(pstart, pend, hit_start, hit_end)
# B/ Calculate overlap Ratio peak/feat and feat/peak
Expand Down