Skip to content
This repository has been archived by the owner. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
Updated UROPA to v1.1.2
  • Loading branch information
jenzopr committed Sep 4, 2017
1 parent 6bfae67 commit 11c0cb4
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 68 deletions.
6 changes: 6 additions & 0 deletions CHANGES
@@ -1,3 +1,9 @@
## 1.1.2 (2017-09-04)

- Added proper help for auxillary R scripts (exit code 0)
- Made call to reformat_output.R multi-threaded
- Cleaned some code, added citation

## 1.1.1 (2017-03-08)

- Bug fix for possible older versions of GNU sort, the order of results may have changed now
Expand Down
118 changes: 69 additions & 49 deletions summary.R
Expand Up @@ -15,16 +15,36 @@ suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(jsonlite))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(grid))
suppressPackageStartupMessages(library(getopt))

options = matrix(c(
'finalhits', 'f', 1, 'character', 'file containing the final hits from UROPA.',
'config', 'c', 1, 'character', 'file containing the json formatted configuration from the UROPA run.',
'output', 'o', 2, 'character', 'file name of output file [summary.pdf].',
'besthits', 'b', 2, 'character', 'file containing the best hits from UROPA.',
'help', 'h', 0, 'logical','Provides command line help.'
), byrow=TRUE, ncol=5)
opt = getopt(options)

# End if help is requested
if (!is.null(opt$help)) {
cat(getopt(options, usage=TRUE), file = stdout()); q(status=0)
}

# script gets arguments
args <- commandArgs(TRUE)
# basic information independent if there are 3 or 4 input arguments, used otherwhere as .basic.summary
# Also exit if the mandatory parameters are not given
if (is.null(opt$finalhits) | is.null(opt$config)) {
cat(getopt(options, usage=TRUE), file = stdout()); q(status=1)
}

# Default values for options
if (is.null(opt$output)) {
opt$output <- "summary.pdf"
}

# basic information independent if there are 3 or 4 input arguments, used otherwhere as .basic.summary
num.features <- 0
features <- c()



# reformat every row of the config.query file to string for cover page
.print.query <- function(row){
r <- paste(unname(as.vector(unlist(row))), collapse=" ")
Expand All @@ -40,11 +60,12 @@ features <- c()
viewport(layout.pos.row = 1, layout.pos.col = 1)
} else {
viewport(layout.pos.row = 2, layout.pos.col = 1)
}
}
}
}

# helper for plot 2 and 5 (counts the occurence of each loci for pie charts)
.plot.genomic.location.per.feature.helper <- function(f, df.uropa, pie.basic){

feat <- features[f]
df.feature <- subset(df.uropa, df.uropa$feature==feat)
unique.loci <- sort(unique(df.feature$genomic_location))
Expand All @@ -70,7 +91,7 @@ features <- c()
blank_theme <- theme_minimal()+ theme(axis.title.x = element_blank(), axis.title.y = element_blank(), panel.border = element_blank(), panel.grid=element_blank(), axis.ticks = element_blank(), plot.title=element_text(size=14, face="bold"))
# title
main <- paste0("Genomic location of '",feat, "' across ",pie.basic)
pie <- ggplot(df.pie, aes(x="", y=value, fill=location)) + geom_bar(width = 1, stat = "identity") + coord_polar("y", start=0) + blank_theme + theme(axis.text.x=element_blank()) + geom_text(aes(y = value/length(value) + c(0, cumsum(value)[-length(value)]), label = rep("",nrow(df.pie))), size=2, nudge_x = 0.7, nudge_y = 0.7) + ggtitle(main) + theme(plot.title = element_text(size = 10, face = "bold", , vjust=-10))
pie <- ggplot(df.pie, aes(x="", y=value, fill=location)) + geom_bar(width = 1, stat = "identity") + coord_polar("y", start=0) + blank_theme + theme(axis.text.x=element_blank()) + geom_text(aes(y = value/length(value) + c(0, cumsum(value)[-length(value)]), label = rep("",nrow(df.pie))), size=2, nudge_x = 0.7, nudge_y = 0.7) + ggtitle(main) + theme(plot.title = element_text(size = 10, face = "bold", , vjust=-10))
print(pie, vp=.define_region(f))
}

Expand All @@ -79,9 +100,10 @@ features <- c()
# layout of plot
# if there is more than one feature, there should two columns and the according number of rows
for(f in 1:num.features){
.plot.genomic.location.per.feature.helper(f, df.uropa, pie.basic)
.plot.genomic.location.per.feature.helper(f, df.uropa, pie.basic)
}
}

# plot 3 and 6
.plot.feature.distribution <- function(df.uropa, header){
if(num.features>1){
Expand All @@ -91,7 +113,7 @@ features <- c()
o <- nrow(subset(df.uropa, df.uropa$feature==feat))
occurence.features <- c(occurence.features,o)
}
names(occurence.features) <- features
names(occurence.features) <- features
barplot(occurence.features, ylab="occurence",main=header)
}
}
Expand All @@ -110,7 +132,7 @@ features <- c()
# get infos from config for overview page
config <- fromJSON(conf)
config.query <- as.data.frame(config$queries)

config.cols <- colnames(config.query)
priority <- config$priority

Expand All @@ -124,31 +146,31 @@ features <- c()
if(grepl("T",priority) && num.queries>1){
priority <- paste0(priority, "\nNote: No pairwise comparisons and Chow Ruskey plot (no overlaps)")
}
# add query to query data frame
# add query to query data frame
config.query$query <- paste0("query",sprintf("%02d",0:(nrow(config.query)-1)))
config.query <- config.query[,c("query", "feature", "distance", "feature.anchor","internals", "strand","direction","filter.attribute", "attribute.value","show.attributes")]
# replaye "start,center,end" position by "any_pos"
config.query <- config.query[,c("query", "feature", "distance", "feature.anchor", "internals", "direction", "filter.attribute", "attribute.value", "show.attributes")]
# replaye "start,center,end" position by "any_pos"
config.query$feature.anchor <- sapply(config.query$feature.anchor, function(x) if(length(x)==3){return("any_pos")}else{return(x)})
mtext("UROPA summary", side=3, line=0,outer=FALSE, cex=2)
mtext(paste0("There were ", num.peaks, " peaks in the input bed file,\nUROPA annotated ", anno.peaks, " peaks\n"),side=3, line=-3,outer=FALSE, cex=.7)
if(num.queries != nrow(config.query)){
mtext(paste("Only queries", paste(queries, collapse=","), "are represented in the FinalHits", sep=" "), side=3, line=-5,outer=FALSE, cex=.7)
mtext(paste("Only queries", paste(queries, collapse=","), "are represented in the FinalHits", sep=" "), side=3, line=-5,outer=FALSE, cex=.7)
}



mytheme <- ttheme_default(core = list(fg_params=list(cex = 0.5)),colhead = list(fg_params=list(cex = 0.5)),rowhead = list(fg_params=list(cex = 0.5)))
config.query <- data.frame(lapply(config.query, as.character), stringsAsFactors=FALSE)
config.query <- data.frame(lapply(config.query, as.character), stringsAsFactors=FALSE)
g <- tableGrob(format(config.query), theme=mytheme,rows=NULL)
grid.draw(g)

mtext(paste0("priority: ", priority), cex=.7,side=1, line=-2)
input <- paste("Input:",unlist(config$bed),collapse=" ")
anno <- paste("Anno:",unlist(config$gtf),collapse=" ")
input.anno <- paste(input,anno,sep="\n")
mtext(input.anno,cex=.6, side=1, line=0)

# plot 1
# plot 1
# description
plot1 <- paste0("1. Distances of annotated peaks in finalhits:",
"\n\nThe following density plot displays the distance of anntotated peaks\nto its feature(s) based on the finalhits.\n\n",
Expand All @@ -168,9 +190,9 @@ features <- c()
} else {
y.lim <- median(df.uropa.final[,"distance"]) + 5000
}

}

density <- subset(df.uropa.final[,c("distance","feature")], (df.uropa.final[,"distance"]<y.lim))
dpq <- qplot(distance,data=density, geom="density", color=feature, xlab = "Distance to feature", ylab = "Relative count")
print(dpq + ggtitle("Distance to features across finalhits"))
Expand Down Expand Up @@ -200,7 +222,7 @@ features <- c()
mtext(plot3, cex=1, adj=0, padj=1)
.plot.feature.distribution(df.uropa=df.uropa.final,header="Feature distribution across finalhits")
}

return(df.uropa.final)
}

Expand All @@ -209,18 +231,18 @@ features <- c()
# arg 3 = output pdf
# arg 4 = besthits

if(length(args)==3){
## basic summary -- only one query definded
df.uropa.final <- .basic.summary(args[1],args[2], args[3])
dev.off()
} else if(length(args)==4){
## basic summary -- only one query definded
if (is.null(opt$besthits)) {
df.uropa.final <- .basic.summary(opt$finalhits, opt$config, opt$output)
dev.off()
} else
{
suppressPackageStartupMessages(library(VennDiagram))
suppressPackageStartupMessages(library(gplots))


## increased summary -- there is more than one query definded
df.uropa.final <- .basic.summary(args[1],args[2], args[3])
df.uropa.best.per.query <- read.table(args[4], header=TRUE, sep="\t",stringsAsFactors = FALSE)
## increased summary -- there is more than one query definded
df.uropa.final <- .basic.summary(opt$finalhits, opt$config, opt$output)
df.uropa.best.per.query <- read.table(opt$besthits, header=TRUE, sep="\t",stringsAsFactors = FALSE)
num.peaks <- length(unique(df.uropa.final$peak_id))
df.uropa.best.per.query[,"distance"] <- as.numeric(df.uropa.best.per.query[,"distance"])
df.uropa.best.per.query <- df.uropa.best.per.query[complete.cases(df.uropa.best.per.query),]
Expand Down Expand Up @@ -248,12 +270,12 @@ if(length(args)==3){
considered.distance <- median.uropa.best + round(max.uropa.best/15)
if(considered.distance > max.uropa.best || considered.distance>10000){
if(median.uropa.best + 5000 > max.uropa.best){
considered.distance <- median.uropa.best
considered.distance <- median.uropa.best
} else {
considered.distance <- median.uropa.best + 5000
}
}

}
df.distance.query <- subset(df.uropa.best.per.query[,c("feature","distance","query")], (df.uropa.best.per.query[,"distance"] < considered.distance))
max.distance.query <- round(max(as.numeric(df.distance.query[,"distance"])))
bin.width <- round(max.distance.query/20)
Expand Down Expand Up @@ -305,7 +327,7 @@ if(length(args)==3){
for(q in 1:(num.queries-1)){
initial.query <- peaks.per.query[[paste0("query",queries[q])]]
if(tmp.num.query < num.queries){
for(c in num.queries:tmp.num.query){
for(c in num.queries:tmp.num.query){
if(queries[q] != queries[c]){
compare.query <- peaks.per.query[[paste0("query",queries[c])]]
venn.object <- venn(list(initial.query, compare.query))
Expand All @@ -319,16 +341,16 @@ if(length(args)==3){
dist <- 0.02
} else if(anno.compare.query == matches || anno.initial.query ==matches){
dist <- c(0.02, -0.02, -0.02)
}
venn.plot <- draw.triple.venn(num.peaks, anno.initial.query, anno.compare.query, anno.initial.query, matches, anno.compare.query, matches, category =c("all",paste0("query",queries[q]),paste0("query",queries[c])),
lwd = 2, lty ="solid", col = c("black", "red","blue"), fill = c("white","red","blue"), cex=1, cat.pos = c(0,0,0), cat.dist = dist, reverse = FALSE,
}
venn.plot <- draw.triple.venn(num.peaks, anno.initial.query, anno.compare.query, anno.initial.query, matches, anno.compare.query, matches, category =c("all",paste0("query",queries[q]),paste0("query",queries[c])),
lwd = 2, lty ="solid", col = c("black", "red","blue"), fill = c("white","red","blue"), cex=1, cat.pos = c(0,0,0), cat.dist = dist, reverse = FALSE,
cat.cex =1, cat.default.pos= "outer", alpha = c(0,.4,.4), euler.d=TRUE, scaled=TRUE)
grid.draw(venn.plot)
mtext(paste0("Peak based pairwise compare of ", paste0("query",queries[q]), " and ", paste0("query",queries[c])), side=3, line=0,outer=FALSE, cex=1)
popViewport()
}
}
}
}
}
tmp.num.query <- tmp.num.query+1
}

Expand All @@ -344,25 +366,25 @@ if(length(args)==3){
"\n\nAdditional Info:\nIt is evalueated whether peaks are annotated,\nbut not whether they are annotated for the same feature")
grid.newpage()
mtext(plot8, cex=1, adj=0, padj=1)

num <- venn(peaks.per.query)[,"num"]
if(num[length(num)] == 0){
num <- num+1
}
# compute distribution
v <- Venn(SetNames=paste0("query",queries), Weight=num)
cv <- compute.Venn(v, type="ChowRuskey", doWeights=TRUE)

# change FaceText size and line thickness
gp <- VennThemes(cv, colourAlgorithm = "signature")
gp[["FaceText"]] <- lapply(gp[["FaceText"]],function(gps){gps$fontsize<-10;gps})
gp[["Set"]] <- lapply(gp[["Set"]],function(gps){gps$lwd<-.8;gps})

#change label positions
SetLabels <- VennGetSetLabels(cv)
SetLabels <- VennGetSetLabels(cv)
max.x <- max(VennGetUniverseRange(cv)[,"x"])
max.y <- max(VennGetUniverseRange(cv)[,"y"])
for(i in 1:nrow(SetLabels)){
for(i in 1:nrow(SetLabels)){
SetLabels[i,"x"] <- max.x+2
SetLabels[i,"hjust"] <- "right"
if(i==1){
Expand All @@ -373,7 +395,7 @@ if(length(args)==3){
}
cv <- VennSetSetLabels(cv,SetLabels)

# change face label positions - not yet implemented in original package
# change face label positions - not yet implemented in original package
facelabel <- as.data.frame(VennGetFaceLabels(cv))
face.labels <- nrow(facelabel)
for(i in 1:(face.labels-2)){
Expand All @@ -384,7 +406,7 @@ if(length(args)==3){

# draw plot
pushViewport(viewport(x=.5, y=0.5, width=.8, height=.7))
grid.rect(gp=gpar(fill="white",lty = "blank"),width = unit(1.5, "npc"), height = unit(1.5, "npc"))
grid.rect(gp=gpar(fill="white",lty = "blank"),width = unit(1.5, "npc"), height = unit(1.5, "npc"))
venn.draw <- plot(cv, gp=gp)
grid.draw(venn.draw)
mtext("Chow Ruskey comparison of all peaks annotated with UROPA", side=3, line=0,outer=FALSE, cex=1)
Expand All @@ -393,6 +415,4 @@ if(length(args)==3){
})
}
dev.off()
} else {
stop("Usage: Rscript summary.R finalhits.txt summary_config.json summary.pdf besthits.txt")
}
38 changes: 19 additions & 19 deletions uropa.py
Expand Up @@ -4,7 +4,7 @@
@authors: Maria Kondili, Jens Preussner and Annika Fust
@license: MIT
@version: 1.1.0
@version: 1.1.2
@maintainer: Mario Looso
@email: mario.looso@mpi-bn.mpg.de
"""
Expand Down Expand Up @@ -95,7 +95,7 @@
"--version",
help="prints the version and exits",
action="version",
version="%(prog)s 1.1.1")
version="%(prog)s 1.1.2")
args = parser.parse_args()

config = args.input
Expand Down Expand Up @@ -338,41 +338,33 @@
logger.debug("Filenames for output files are: {}, {}". format(allhits_outfile, besthits_outfile))

ovls.finalize_file(allhits_outfile, allhits_partials, header, comments, log=logger)

#
# Reformat output
#
if args.reformat and len(queries) > 1 and not pr:
logger.info("Reformatting output...")
R_reform_Best = [
'reformat_output.R',
'-i',
besthits_outfile,
'-k',
'peak_id',
'-c',
'1:5',
',']
reformatBest_out = outdir + "Reformatted_" + \
os.path.basename(besthits_outfile)
'-d',
',',
'-t',
str(args.threads)]
try:
# creates output of Best and gives name "Reformatted_"
logger.debug('Reformat output call is {}'.format(R_reform_Best))
pr_R = sp.check_output(R_reform_Best)
except sp.CalledProcessError:
logger.warning("Reformatted output could not be created with call %s", ' '.join(R_reform_Best))
except OSError:
logger.warning("Rscript command not available for summary output.")

# Add header and write to new file:
reformatted_outfile = "Uropa_Reformatted_" + \
os.path.basename(besthits_outfile)
if os.path.exists(outdir + reformatBest_out):
ovls.write_out_file(
outdir +
reformatted_outfile,
reformatBest_out,
header_comments,
header)
if os.path.exists(outdir + reformatted_outfile):
logger.info("Reformatted BestHits output: %s", reformatted_outfile)
os.remove(outdir + reformatBest_out)

#
# Visual summary
#
Expand All @@ -384,17 +376,25 @@
if len(queries) > 1 and not pr and os.path.exists(merged_outfile):
call = [
summary_script,
"-f",
merged_outfile,
"-c",
outdir + "summary_config.json",
"-o",
summary_output,
"-b",
besthits_outfile]
else:
call = [
summary_script,
"-f",
besthits_outfile,
"-c",
outdir + "summary_config.json",
"-o",
summary_output]
try:
logger.debug('Summary output call is {}'.format(call))
sum_pr = sp.check_output(call)
except sp.CalledProcessError:
logger.warning("Visualized summary output could not be created from %s.", ' '.join(call))
Expand Down

0 comments on commit 11c0cb4

Please sign in to comment.