Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
library(optparse)
library(reshape2)
library(rjson)
library(ggplot2)
library(ggpubr)
# helper method for reading the pan-cancer XGBoost survival prediction results for n model replications
load_pancancer_json_results <- function(result_path, cohorts, n_replication, result_file="%s/xgb_measure_CI_replication_%d_result.json") {
results <- list()
num_reps = 0
for (replication in 1:n_replication) {
if (! file.exists(sprintf(result_file, result_path, replication))) {
print(sprintf("WARNING: Results for replication %d are missing.", replication))
next
}
num_reps = num_reps + 1
result <- fromJSON(file=sprintf(result_file, result_path, replication))
for (cohort in cohorts) {
results[[cohort]][replication] <- result$CI[[cohort]]
}
}
print(sprintf("Read %d replications.", num_reps))
return(results)
}
# plot pan-cancer results for the different cohorts
cohorts <- c("TCGA-ACC", "TCGA-BLCA", "TCGA-BRCA", "TCGA-CESC", "TCGA-COAD",
"TCGA-ESCA", "TCGA-GBM", "TCGA-HNSC", "TCGA-KIRC", "TCGA-KIRP",
"TCGA-LAML", "TCGA-LGG", "TCGA-LIHC", "TCGA-LUAD", "TCGA-LUSC",
"TCGA-MESO", "TCGA-OV", "TCGA-PAAD", "TCGA-READ", "TCGA-SARC",
"TCGA-SKCM", "TCGA-STAD", "TCGA-UCEC", "TCGA-UCS", "TCGA-UVM")
option_list = list(
make_option(c("-o", "--output_file"), type="character", default="model_performance_xgb_pancancer.pdf",
help="Filename (and path) where plot should be saved to [default = %default]", metavar="character"),
make_option(c("-r", "--result_path"), type="character", default="results/",
help="Path to the result directory of model training [default = %default]", metavar="character"),
make_option(c("-n", "--num_replications"), type="numeric", default=100,
help="Number of model replications [default = %default]", metavar="numeric"),
make_option(c("-c", "--cohort"), type="character", default=NULL,
help="Prepare data only for specified cohort(s) [default = all cohorts]. Either a single cohort (e.g. 'TCGA-BRCA') or a comma separated list of cohorts (e.g. 'TCGA-BRCA', 'TCGA-COAD', 'TCGA-LUAD')", metavar="character")
);
opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser);
output_file <- opt$output_file
result_path <- opt$result_path
num_replications <- opt$num_replications
if (!is.null(opt$cohort)) {
cohorts <- strsplit(opt$cohort, ",")
}
# load the pan-cancer results
pancancer_results <- load_pancancer_json_results(result_path, cohorts, num_replications)
# prepare result data frame for plotting
result_df <- melt(pancancer_results)
colnames(result_df) <- c("CI", "cohort")
mm = 1/25.4
pdf(output_file, width=174*mm, height=174*mm)
p <- ggplot(data = result_df, aes(y=CI)) + geom_boxplot(fill="#619CFF") + labs(y = "C-Index")
p + facet_wrap( ~ cohort, ncol=5, scales="free",'strip.position' = 'bottom') + geom_hline(yintercept=0.5, linetype="dashed", color = "green") + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.title.x=element_blank()) + scale_y_continuous(expand = expansion(mult = c(0.05, 0.1)))
dev.off()