Skip to content

Commit

Permalink
Merge pull request loosolab#25 from loosolab/mqparser_tests
Browse files Browse the repository at this point in the history
Added tests for parse_MaxQuant
  • Loading branch information
HendrikSchultheis authored Jul 16, 2018
2 parents 09b40ca + 4548826 commit 1b98804
Show file tree
Hide file tree
Showing 9 changed files with 675 additions and 6 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,6 @@ Imports: shiny,
RoxygenNote: 6.0.1
biocViews:
Suggests: knitr,
rmarkdown
rmarkdown,
testthat
VignetteBuilder: knitr
28 changes: 23 additions & 5 deletions R/parser.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#' @param version pre-header information about version (optional)
#' @param experiment_id pre-header information about experiment id (optional)
#'
#' @return TRUE on success
#'
#' @export
parse_MaxQuant <- function(proteinGroups_in, summary_in, outfile, outfile_reduced, config = system.file("extdata", "parser_MaxQuant_config.json", package = "wilson"), delimiter = ";", format = NULL, version = NULL, experiment_id = NULL){
if (missing(proteinGroups_in)) {
Expand Down Expand Up @@ -85,7 +87,7 @@ parse_MaxQuant <- function(proteinGroups_in, summary_in, outfile, outfile_reduce
# @return String level of given column
get_sample_level <- function(col_head, isSample, full_list) {
# Get the level of all 'sample' columns.
# Default: level <- "sample"
# Default: level is "sample"
if (grepl("Ratio", col_head, perl = TRUE)) {
if (grepl("type", col_head, perl = TRUE)) return("feature")
return("contrast")
Expand Down Expand Up @@ -165,7 +167,7 @@ parse_MaxQuant <- function(proteinGroups_in, summary_in, outfile, outfile_reduce
# @param version version number
# @param exp_id experiment id
# @param pGroups data table protein groups file
write_clarion_file <- function(meta, out, format, version, exp_id, pGroups, delimiter){
write_clarion_file <- function(meta, out, format, version, exp_id, pGroups, delimiter) {
to_append <- FALSE
if (!missing(format)) {
write(paste0("!format=", format), file = out, append = to_append)
Expand All @@ -188,10 +190,21 @@ parse_MaxQuant <- function(proteinGroups_in, summary_in, outfile, outfile_reduce
# reading files in data tables
proteinGroups <- data.table::fread(proteinGroups_in, header = TRUE, quote = "")
summary_file <- data.table::fread(summary_in, header = TRUE)
meta_config <- rjson::fromJSON(file = config)

meta_config <- tryCatch({
rjson::fromJSON(file = config)
}, error = function(cond) {
stop("Could not read config file")
}, warning = function(w) {
stop("Could not read config file")
})

# getting experiment names
exp_names <- (unique(summary_file[Experiment != "", Experiment]))
if ("Experiment" %in% colnames(summary_file)) {
exp_names <- unique(summary_file[Experiment != "", Experiment])
} else {
stop("wrong format on summary file: column \'Experiment\' misssing")
}

meta <- get_meta_from_config(meta_config = meta_config)

Expand All @@ -203,6 +216,9 @@ parse_MaxQuant <- function(proteinGroups_in, summary_in, outfile, outfile_reduce
sample_ary <- meta_config$type_array
reduced_list <- meta_config$reduced_list
full_sample_list <- c(sample_scores, sample_ratios, sample_probability, sample_category, sample_ary)
if (is.null(reduced_list)) {
stop("reduced_list is missing in config file")
}

# get column names
col_names <- colnames(proteinGroups)
Expand All @@ -214,7 +230,7 @@ parse_MaxQuant <- function(proteinGroups_in, summary_in, outfile, outfile_reduce
# append rows to data table with metadata
samples_list <- lapply(col_names, function(col_head) {

unlist(lapply(exp_names, function(name){
unlist(lapply(exp_names, function(name) {
name_brackets <- paste0("\\Q", name)
exp_regex <- paste0("\\Q ", name)
sample_description <- strsplit(col_head, exp_regex)
Expand Down Expand Up @@ -286,6 +302,8 @@ parse_MaxQuant <- function(proteinGroups_in, summary_in, outfile, outfile_reduce
# writing reduced CLARION file
write_clarion_file(meta = meta_reduced, out = outfile_reduced, format = format,
version = version, exp_id = experiment_id, pGroups = proteinGroups, delimiter = delimiter)

return(TRUE)
}

#' Method to parse input file.
Expand Down
4 changes: 4 additions & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
library(testthat)
library(wilson)

test_check("wilson")
291 changes: 291 additions & 0 deletions tests/testthat/fail_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
{
"meta": [
{
"col_name": "Protein IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "proteins"
},
{
"col_name": "Majority protein IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "majority protein"
},
{
"col_name": "Protein names",
"level": "feature",
"type": "array",
"label": "protein names",
"sublabel": ""
},
{
"col_name": "Gene names",
"level": "feature",
"type": "array",
"label": "gene names",
"sublabel": ""
},
{
"col_name": "Fasta headers",
"level": "feature",
"type": "array",
"label": "fasta headers",
"sublabel": ""
},
{
"col_name": "id",
"level": "feature",
"type": "unique_id",
"label": "unique identifier",
"sublabel": ""
},
{
"col_name": "Peptide IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "peptide"
},
{
"col_name": "Mod. peptide IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "mod. peptide"
},
{
"col_name": "Evidence IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "evidence"
},
{
"col_name": "MS/MS IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "MS/MS"
},
{
"col_name": "Mol. weight [kDa]",
"level": "feature",
"type": "category",
"label": "Mol. weight [kDa]",
"sublabel": ""
},
{
"col_name": "Sequence length",
"level": "feature",
"type": "category",
"label": "length",
"sublabel": "Sequence"
},
{
"col_name": "Sequence lengths",
"level": "feature",
"type": "array",
"label": "lengths",
"sublabel": "Sequence"
},
{
"col_name": "Reverse",
"level": "feature",
"type": "category",
"label": "Reverse",
"sublabel": ""
},
{
"col_name": "Potential contaminant",
"level": "feature",
"type": "category",
"label": "Potential contaminant",
"sublabel": ""
},
{
"col_name": "Oxidation (M) site IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "Oxidation (M) site"
},
{
"col_name": "Oxidation (M) site positions",
"level": "feature",
"type": "array",
"label": "positions",
"sublabel": "Oxidation (M) site"
},
{
"col_name": "Phospho (STY) site IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "Phospho (STY) site"
},
{
"col_name": "Phospho (STY) site positions",
"level": "feature",
"type": "array",
"label": "positions",
"sublabel": "Phospho (STY) site"
},
{
"col_name": "Peptide counts (all)",
"level": "condition",
"type": "array",
"label": "counts",
"sublabel": "all peptide"
},
{
"col_name": "Peptide counts (razor+unique)",
"level": "condition",
"type": "array",
"label": "counts",
"sublabel": "razor+unique peptides"
},
{
"col_name": "Peptide counts (unique)",
"level": "condition",
"type": "array",
"label": "counts",
"sublabel": "unique peptides"
},
{
"col_name": "Number of proteins",
"level": "condition",
"type": "score",
"label": "count",
"sublabel": "proteins"
},
{
"col_name": "Peptides",
"level": "condition",
"type": "score",
"label": "count",
"sublabel": "Peptides"
},
{
"col_name": "Razor + unique peptides",
"level": "condition",
"type": "score",
"label": "count",
"sublabel": "Razor + unique peptides"
},
{
"col_name": "Unique peptides",
"level": "condition",
"type": "score",
"label": "count",
"sublabel": "Unique peptides"
},
{
"col_name": "MS/MS count",
"level": "condition",
"type": "score",
"label": "count",
"sublabel": "MS/MS"
},
{
"col_name": "Fraction average",
"level": "condition",
"type": "score",
"label": "fraction",
"sublabel": "average"
},
{
"col_name": "Best MS/MS",
"level": "condition",
"type": "array",
"label": "MS/MS",
"sublabel": "best"
},
{
"col_name": "Intensity",
"level": "condition",
"type": "score",
"label": "Intensity",
"sublabel": ""
},
{
"col_name": "Intensity L",
"level": "condition",
"type": "score",
"label": "Intensity",
"sublabel": "L"
},
{
"col_name": "Intensity M",
"level": "condition",
"type": "score",
"label": "Intensity",
"sublabel": "M"
},
{
"col_name": "Intensity H",
"level": "condition",
"type": "score",
"label": "Intensity",
"sublabel": "H"
},
{
"col_name": "Q-value",
"level": "condition",
"type": "probability",
"label": "q-value",
"sublabel": ""
},
{
"col_name": "Score",
"level": "condition",
"type": "probability",
"label": "score",
"sublabel": ""
},
{
"col_name": "Unique sequence coverage [%]",
"level": "condition",
"type": "ratio",
"label": "sequence coverage",
"sublabel": "Unique"
},
{
"col_name": "Unique + razor sequence coverage [%]",
"level": "condition",
"type": "ratio",
"label": "sequence coverage",
"sublabel": "Unique + razor"
},
{
"col_name": "Sequence coverage [%]",
"level": "condition",
"type": "ratio",
"label": "sequence coverage",
"sublabel": ""
},
{
"col_name": "Only identified by site",
"level": "feature",
"type": "category",
"label": "identified by site",
"sublabel": ""
},
{
"col_name": "Peptide is razor",
"level": "condition",
"type": "array",
"label": "peptide",
"sublabel": "is razor"
}
],

"type_scores":
["Peptides", "Razor + unique peptides", "Unique peptides", "Ratio M/L count", "Ratio M/L iso-count", "Ratio H/L count", "Ratio H/L iso-count" ,
"Ratio H/M count", "Ratio H/M iso-count", "Intensity", "Intensity L", "Intensity M", "Intensity H", "MS/MS count", "LFQ intensity", "Reporter intensity count",
"Reporter intensity corrected", "Reporter intensity", "Fraction", "iBAQ"]

}

2 changes: 2 additions & 0 deletions tests/testthat/proteinGroups_test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Protein IDs Majority protein IDs Peptide counts (all) Peptide counts (razor+unique) Peptide counts (unique) Protein names Gene names Fasta headers Number of proteins Peptides Razor + unique peptides Unique peptides Peptides Exp1 Peptides Exp2 Razor + unique peptides Exp1 Razor + unique peptides Exp2 Unique peptides Exp1 Unique peptides Exp2 Sequence coverage [%] Unique + razor sequence coverage [%] Unique sequence coverage [%] Mol. weight [kDa] Sequence length Sequence lengths Fraction average Fraction 1 Fraction 2 Fraction 3 Fraction 4 Fraction 5 Fraction 6 Fraction 7 Fraction 8 Q-value Score Reporter intensity corrected 0 Reporter intensity corrected 1 Reporter intensity corrected 2 Reporter intensity corrected 3 Reporter intensity corrected 4 Reporter intensity corrected 5 Reporter intensity 0 Reporter intensity 1 Reporter intensity 2 Reporter intensity 3 Reporter intensity 4 Reporter intensity 5 Reporter intensity count 0 Reporter intensity count 1 Reporter intensity count 2 Reporter intensity count 3 Reporter intensity count 4 Reporter intensity count 5 Reporter intensity corrected 0 Exp1 Reporter intensity corrected 1 Exp1 Reporter intensity corrected 2 Exp1 Reporter intensity corrected 3 Exp1 Reporter intensity corrected 4 Exp1 Reporter intensity corrected 5 Exp1 Reporter intensity corrected 0 Exp2 Reporter intensity corrected 1 Exp2 Reporter intensity corrected 2 Exp2 Reporter intensity corrected 3 Exp2 Reporter intensity corrected 4 Exp2 Reporter intensity corrected 5 Exp2 Reporter intensity 0 Exp1 Reporter intensity 1 Exp1 Reporter intensity 2 Exp1 Reporter intensity 3 Exp1 Reporter intensity 4 Exp1 Reporter intensity 5 Exp1 Reporter intensity 0 Exp2 Reporter intensity 1 Exp2 Reporter intensity 2 Exp2 Reporter intensity 3 Exp2 Reporter intensity 4 Exp2 Reporter intensity 5 Exp2 Reporter intensity count 0 Exp1 Reporter intensity count 1 Exp1 Reporter intensity count 2 Exp1 Reporter intensity count 3 Exp1 Reporter intensity count 4 Exp1 Reporter intensity count 5 Exp1 Reporter intensity count 0 Exp2 Reporter intensity count 1 Exp2 Reporter intensity count 2 Exp2 Reporter intensity count 3 Exp2 Reporter intensity count 4 Exp2 Reporter intensity count 5 Exp2 Sequence coverage Exp1 [%] Sequence coverage Exp2 [%] Intensity Intensity Exp1 Intensity Exp2 MS/MS count Only identified by site Reverse Potential contaminant id Peptide IDs Peptide is razor Mod. peptide IDs Evidence IDs MS/MS IDs Best MS/MS Oxidation (M) site IDs Oxidation (M) site positions
A0A068BEQ2;P50171;P50171-2;G3UX44 A0A068BEQ2;P50171;P50171-2;G3UX44 11;11;11;10 11;11;11;10 11;11;11;10 Estradiol 17-beta-dehydrogenase 8 H2-Ke6;Hsd17b8 tr|A0A068BEQ2|A0A068BEQ2_MOUSE H2-K region expressed gene 6, isoform CRA_a OS=Mus musculus GN=H2-Ke6 PE=2 SV=1;sp|P50171|DHB8_MOUSE Estradiol 17-beta-dehydrogenase 8 OS=Mus musculus GN=Hsd17b8 PE=1 SV=2;sp|P50171-2|DHB8_MOUSE Isoform Long of Estradiol 17-b 4 11 11 11 9 9 9 9 9 9 61.4 61.4 61.4 26.587 259 259;259;274;234 6.32 2 1 1 3 4 4 5 17 0 309.9 422810 381560 394980 374200 414580 428320 405880 379740 401170 384810 418890 414670 29 29 29 29 29 29 159850 141420 149170 147480 162360 166190 262960 240140 245810 226710 252220 262130 153390 140950 151450 151320 164040 160930 252490 238790 249720 233490 254850 253730 12 12 12 12 12 12 17 17 17 17 17 17 46.3 51 8493600000 3090200000 5403400000 42 0 1707;5068;5754;17624;17838;18452;24153;26755;37315;45123;45334 True;True;True;True;True;True;True;True;True;True;True 1818;5449;5450;6176;18892;19121;19782;25924;28715;40406;48814;49037 5720;5721;5722;5723;5724;16176;16177;18100;56337;56338;56339;56340;57035;57036;57037;57038;57039;57040;59065;59066;59067;59068;77499;77500;85797;119780;119781;119782;119783;145479;145480;145481;145482;145483;146163;146164;146165 6359;6360;6361;6362;6363;6364;6365;6366;17918;17919;20019;62610;62611;62612;62613;63369;63370;63371;63372;63373;63374;65658;65659;65660;65661;86004;86005;86006;86007;95195;133113;133114;133115;133116;161672;161673;161674;161675;161676;162411;162412;162413 6366;17919;20019;62610;63370;65660;86006;95195;133113;161676;162411 0;1 113;204
Loading

0 comments on commit 1b98804

Please sign in to comment.