plotPredictionPerformance.R

library(optparse)
library(reshape2)
library(rjson)
library(ggplot2)
library(ggpubr)

# helper method for reading the pan-cancer XGBoost survival prediction results for n model replications
load_pancancer_json_results <- function(result_path, cohorts, n_replication, result_file="%s/xgb_measure_CI_replication_%d_result.json") {
    results <- list()
    num_reps = 0
    for (replication in 1:n_replication) {
        if (! file.exists(sprintf(result_file, result_path, replication))) {
            print(sprintf("WARNING: Results for replication %d are missing.", replication))
            next
        }
        num_reps = num_reps + 1
        result <- fromJSON(file=sprintf(result_file, result_path, replication))
        for (cohort in cohorts) {
            results[[cohort]][replication] <- result$CI[[cohort]]
        }
    }
    print(sprintf("Read %d replications.", num_reps))
    return(results)
}

# plot pan-cancer results for the different cohorts

cohorts <- c("TCGA-ACC", "TCGA-BLCA", "TCGA-BRCA", "TCGA-CESC", "TCGA-COAD",
            "TCGA-ESCA", "TCGA-GBM", "TCGA-HNSC", "TCGA-KIRC", "TCGA-KIRP",
            "TCGA-LAML", "TCGA-LGG", "TCGA-LIHC", "TCGA-LUAD", "TCGA-LUSC",
            "TCGA-MESO", "TCGA-OV", "TCGA-PAAD", "TCGA-READ", "TCGA-SARC",
            "TCGA-SKCM", "TCGA-STAD", "TCGA-UCEC", "TCGA-UCS", "TCGA-UVM")

option_list = list(
    make_option(c("-o", "--output_file"), type="character", default="model_performance_xgb_pancancer.pdf",
                help="Filename (and path) where plot should be saved to [default = %default]", metavar="character"),
    make_option(c("-r", "--result_path"), type="character", default="results/",
                help="Path to the result directory of model training [default = %default]", metavar="character"),
    make_option(c("-n", "--num_replications"), type="numeric", default=100,
                help="Number of model replications [default = %default]", metavar="numeric"),
    make_option(c("-c", "--cohort"), type="character", default=NULL,
                help="Prepare data only for specified cohort(s) [default = all cohorts]. Either a single cohort (e.g. 'TCGA-BRCA') or a comma separated list of cohorts (e.g. 'TCGA-BRCA', 'TCGA-COAD', 'TCGA-LUAD')", metavar="character")
);

opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser);

output_file <- opt$output_file
result_path <- opt$result_path
num_replications <- opt$num_replications

if (!is.null(opt$cohort)) {
    cohorts <- strsplit(opt$cohort, ",")
}

# load the pan-cancer results
pancancer_results <- load_pancancer_json_results(result_path, cohorts, num_replications)

# prepare result data frame for plotting
result_df <- melt(pancancer_results)
colnames(result_df) <- c("CI", "cohort")

mm = 1/25.4
pdf(output_file, width=174*mm, height=174*mm)
p <- ggplot(data = result_df, aes(y=CI)) + geom_boxplot(fill="#619CFF") + labs(y = "C-Index")
p + facet_wrap( ~ cohort, ncol=5, scales="free",'strip.position' = 'bottom') + geom_hline(yintercept=0.5, linetype="dashed", color = "green") + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.title.x=element_blank()) + scale_y_continuous(expand = expansion(mult = c(0.05, 0.1)))
dev.off()
	library(optparse)
	library(reshape2)
	library(rjson)
	library(ggplot2)
	library(ggpubr)

	# helper method for reading the pan-cancer XGBoost survival prediction results for n model replications
	load_pancancer_json_results <- function(result_path, cohorts, n_replication, result_file="%s/xgb_measure_CI_replication_%d_result.json") {
	results <- list()
	num_reps = 0
	for (replication in 1:n_replication) {
	if (! file.exists(sprintf(result_file, result_path, replication))) {
	print(sprintf("WARNING: Results for replication %d are missing.", replication))
	next
	}
	num_reps = num_reps + 1
	result <- fromJSON(file=sprintf(result_file, result_path, replication))
	for (cohort in cohorts) {
	results[[cohort]][replication] <- result$CI[[cohort]]
	}
	}
	print(sprintf("Read %d replications.", num_reps))
	return(results)
	}

	# plot pan-cancer results for the different cohorts

	cohorts <- c("TCGA-ACC", "TCGA-BLCA", "TCGA-BRCA", "TCGA-CESC", "TCGA-COAD",
	"TCGA-ESCA", "TCGA-GBM", "TCGA-HNSC", "TCGA-KIRC", "TCGA-KIRP",
	"TCGA-LAML", "TCGA-LGG", "TCGA-LIHC", "TCGA-LUAD", "TCGA-LUSC",
	"TCGA-MESO", "TCGA-OV", "TCGA-PAAD", "TCGA-READ", "TCGA-SARC",
	"TCGA-SKCM", "TCGA-STAD", "TCGA-UCEC", "TCGA-UCS", "TCGA-UVM")

	option_list = list(
	make_option(c("-o", "--output_file"), type="character", default="model_performance_xgb_pancancer.pdf",
	help="Filename (and path) where plot should be saved to [default = %default]", metavar="character"),
	make_option(c("-r", "--result_path"), type="character", default="results/",
	help="Path to the result directory of model training [default = %default]", metavar="character"),
	make_option(c("-n", "--num_replications"), type="numeric", default=100,
	help="Number of model replications [default = %default]", metavar="numeric"),
	make_option(c("-c", "--cohort"), type="character", default=NULL,
	help="Prepare data only for specified cohort(s) [default = all cohorts]. Either a single cohort (e.g. 'TCGA-BRCA') or a comma separated list of cohorts (e.g. 'TCGA-BRCA', 'TCGA-COAD', 'TCGA-LUAD')", metavar="character")
	);

	opt_parser = OptionParser(option_list=option_list);
	opt = parse_args(opt_parser);

	output_file <- opt$output_file
	result_path <- opt$result_path
	num_replications <- opt$num_replications

	if (!is.null(opt$cohort)) {
	cohorts <- strsplit(opt$cohort, ",")
	}

	# load the pan-cancer results
	pancancer_results <- load_pancancer_json_results(result_path, cohorts, num_replications)

	# prepare result data frame for plotting
	result_df <- melt(pancancer_results)
	colnames(result_df) <- c("CI", "cohort")

	mm = 1/25.4
	pdf(output_file, width=174mm, height=174mm)
	p <- ggplot(data = result_df, aes(y=CI)) + geom_boxplot(fill="#619CFF") + labs(y = "C-Index")
	p + facet_wrap( ~ cohort, ncol=5, scales="free",'strip.position' = 'bottom') + geom_hline(yintercept=0.5, linetype="dashed", color = "green") + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.title.x=element_blank()) + scale_y_continuous(expand = expansion(mult = c(0.05, 0.1)))
	dev.off()