Skip to content
This repository has been archived by the owner. It is now read-only.

Added tests for parse_MaxQuant #25

Merged
merged 13 commits into from
Jul 16, 2018
19 changes: 17 additions & 2 deletions R/parser.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#' @param version pre-header information about version (optional)
#' @param experiment_id pre-header information about experiment id (optional)
#'
#' @return TRUE on success
#'
#' @export
parse_MaxQuant <- function(proteinGroups_in, summary_in, outfile, outfile_reduced, config = system.file("extdata", "parser_MaxQuant_config.json", package = "wilson"), delimiter = ";", format = NULL, version = NULL, experiment_id = NULL){
if (missing(proteinGroups_in)) {
Expand Down Expand Up @@ -188,13 +190,21 @@ parse_MaxQuant <- function(proteinGroups_in, summary_in, outfile, outfile_reduce
# reading files in data tables
proteinGroups <- data.table::fread(proteinGroups_in, header = TRUE, quote = "")
summary_file <- data.table::fread(summary_in, header = TRUE)
meta_config <- rjson::fromJSON(file = config)
meta_config <- tryCatch({rjson::fromJSON(file = config)},
error=function(cond){
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add whitespace around '=' and before '{'.

stop("Could not read config file")
})

# getting experiment names
exp_names <- (unique(summary_file[Experiment != "", Experiment]))
if("Experiment" %in% colnames(summary_file)){
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add whitespace after 'if' and before '{'. Like this:
if ("Experiment" %in% colnames(summary_file)) {

exp_names <- (unique(summary_file[Experiment != "", Experiment]))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The outer brackets are redundant.

} else {
stop("wrong format on summary file: column \'Experiment\' misssing")
}

meta <- get_meta_from_config(meta_config = meta_config)


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't follow an empty line with another empty line.

sample_scores <- meta_config$type_scores
sample_ratios <- meta_config$type_ratios
sample_probability <- meta_config$type_probability
Expand All @@ -203,6 +213,9 @@ parse_MaxQuant <- function(proteinGroups_in, summary_in, outfile, outfile_reduce
sample_ary <- meta_config$type_array
reduced_list <- meta_config$reduced_list
full_sample_list <- c(sample_scores, sample_ratios, sample_probability, sample_category, sample_ary)
if(is.null(reduced_list)){
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add whitespaces.

stop("reduced_list is missing in config file")
}

# get column names
col_names <- colnames(proteinGroups)
Expand Down Expand Up @@ -286,6 +299,8 @@ parse_MaxQuant <- function(proteinGroups_in, summary_in, outfile, outfile_reduce
# writing reduced CLARION file
write_clarion_file(meta = meta_reduced, out = outfile_reduced, format = format,
version = version, exp_id = experiment_id, pGroups = proteinGroups, delimiter = delimiter)

return(TRUE)
}

#' Method to parse input file.
Expand Down
4 changes: 4 additions & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
library(testthat)
library(wilson)

test_check("wilson")
291 changes: 291 additions & 0 deletions tests/testthat/fail_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
{
"meta": [
{
"col_name": "Protein IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "proteins"
},
{
"col_name": "Majority protein IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "majority protein"
},
{
"col_name": "Protein names",
"level": "feature",
"type": "array",
"label": "protein names",
"sublabel": ""
},
{
"col_name": "Gene names",
"level": "feature",
"type": "array",
"label": "gene names",
"sublabel": ""
},
{
"col_name": "Fasta headers",
"level": "feature",
"type": "array",
"label": "fasta headers",
"sublabel": ""
},
{
"col_name": "id",
"level": "feature",
"type": "unique_id",
"label": "unique identifier",
"sublabel": ""
},
{
"col_name": "Peptide IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "peptide"
},
{
"col_name": "Mod. peptide IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "mod. peptide"
},
{
"col_name": "Evidence IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "evidence"
},
{
"col_name": "MS/MS IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "MS/MS"
},
{
"col_name": "Mol. weight [kDa]",
"level": "feature",
"type": "category",
"label": "Mol. weight [kDa]",
"sublabel": ""
},
{
"col_name": "Sequence length",
"level": "feature",
"type": "category",
"label": "length",
"sublabel": "Sequence"
},
{
"col_name": "Sequence lengths",
"level": "feature",
"type": "array",
"label": "lengths",
"sublabel": "Sequence"
},
{
"col_name": "Reverse",
"level": "feature",
"type": "category",
"label": "Reverse",
"sublabel": ""
},
{
"col_name": "Potential contaminant",
"level": "feature",
"type": "category",
"label": "Potential contaminant",
"sublabel": ""
},
{
"col_name": "Oxidation (M) site IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "Oxidation (M) site"
},
{
"col_name": "Oxidation (M) site positions",
"level": "feature",
"type": "array",
"label": "positions",
"sublabel": "Oxidation (M) site"
},
{
"col_name": "Phospho (STY) site IDs",
"level": "feature",
"type": "array",
"label": "IDs",
"sublabel": "Phospho (STY) site"
},
{
"col_name": "Phospho (STY) site positions",
"level": "feature",
"type": "array",
"label": "positions",
"sublabel": "Phospho (STY) site"
},
{
"col_name": "Peptide counts (all)",
"level": "condition",
"type": "array",
"label": "counts",
"sublabel": "all peptide"
},
{
"col_name": "Peptide counts (razor+unique)",
"level": "condition",
"type": "array",
"label": "counts",
"sublabel": "razor+unique peptides"
},
{
"col_name": "Peptide counts (unique)",
"level": "condition",
"type": "array",
"label": "counts",
"sublabel": "unique peptides"
},
{
"col_name": "Number of proteins",
"level": "condition",
"type": "score",
"label": "count",
"sublabel": "proteins"
},
{
"col_name": "Peptides",
"level": "condition",
"type": "score",
"label": "count",
"sublabel": "Peptides"
},
{
"col_name": "Razor + unique peptides",
"level": "condition",
"type": "score",
"label": "count",
"sublabel": "Razor + unique peptides"
},
{
"col_name": "Unique peptides",
"level": "condition",
"type": "score",
"label": "count",
"sublabel": "Unique peptides"
},
{
"col_name": "MS/MS count",
"level": "condition",
"type": "score",
"label": "count",
"sublabel": "MS/MS"
},
{
"col_name": "Fraction average",
"level": "condition",
"type": "score",
"label": "fraction",
"sublabel": "average"
},
{
"col_name": "Best MS/MS",
"level": "condition",
"type": "array",
"label": "MS/MS",
"sublabel": "best"
},
{
"col_name": "Intensity",
"level": "condition",
"type": "score",
"label": "Intensity",
"sublabel": ""
},
{
"col_name": "Intensity L",
"level": "condition",
"type": "score",
"label": "Intensity",
"sublabel": "L"
},
{
"col_name": "Intensity M",
"level": "condition",
"type": "score",
"label": "Intensity",
"sublabel": "M"
},
{
"col_name": "Intensity H",
"level": "condition",
"type": "score",
"label": "Intensity",
"sublabel": "H"
},
{
"col_name": "Q-value",
"level": "condition",
"type": "probability",
"label": "q-value",
"sublabel": ""
},
{
"col_name": "Score",
"level": "condition",
"type": "probability",
"label": "score",
"sublabel": ""
},
{
"col_name": "Unique sequence coverage [%]",
"level": "condition",
"type": "ratio",
"label": "sequence coverage",
"sublabel": "Unique"
},
{
"col_name": "Unique + razor sequence coverage [%]",
"level": "condition",
"type": "ratio",
"label": "sequence coverage",
"sublabel": "Unique + razor"
},
{
"col_name": "Sequence coverage [%]",
"level": "condition",
"type": "ratio",
"label": "sequence coverage",
"sublabel": ""
},
{
"col_name": "Only identified by site",
"level": "feature",
"type": "category",
"label": "identified by site",
"sublabel": ""
},
{
"col_name": "Peptide is razor",
"level": "condition",
"type": "array",
"label": "peptide",
"sublabel": "is razor"
}
],

"type_scores":
["Peptides", "Razor + unique peptides", "Unique peptides", "Ratio M/L count", "Ratio M/L iso-count", "Ratio H/L count", "Ratio H/L iso-count" ,
"Ratio H/M count", "Ratio H/M iso-count", "Intensity", "Intensity L", "Intensity M", "Intensity H", "MS/MS count", "LFQ intensity", "Reporter intensity count",
"Reporter intensity corrected", "Reporter intensity", "Fraction", "iBAQ"]

}

2 changes: 2 additions & 0 deletions tests/testthat/proteinGroups_test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Protein IDs Majority protein IDs Peptide counts (all) Peptide counts (razor+unique) Peptide counts (unique) Protein names Gene names Fasta headers Number of proteins Peptides Razor + unique peptides Unique peptides Peptides Exp1 Peptides Exp2 Razor + unique peptides Exp1 Razor + unique peptides Exp2 Unique peptides Exp1 Unique peptides Exp2 Sequence coverage [%] Unique + razor sequence coverage [%] Unique sequence coverage [%] Mol. weight [kDa] Sequence length Sequence lengths Fraction average Fraction 1 Fraction 2 Fraction 3 Fraction 4 Fraction 5 Fraction 6 Fraction 7 Fraction 8 Q-value Score Reporter intensity corrected 0 Reporter intensity corrected 1 Reporter intensity corrected 2 Reporter intensity corrected 3 Reporter intensity corrected 4 Reporter intensity corrected 5 Reporter intensity 0 Reporter intensity 1 Reporter intensity 2 Reporter intensity 3 Reporter intensity 4 Reporter intensity 5 Reporter intensity count 0 Reporter intensity count 1 Reporter intensity count 2 Reporter intensity count 3 Reporter intensity count 4 Reporter intensity count 5 Reporter intensity corrected 0 Exp1 Reporter intensity corrected 1 Exp1 Reporter intensity corrected 2 Exp1 Reporter intensity corrected 3 Exp1 Reporter intensity corrected 4 Exp1 Reporter intensity corrected 5 Exp1 Reporter intensity corrected 0 Exp2 Reporter intensity corrected 1 Exp2 Reporter intensity corrected 2 Exp2 Reporter intensity corrected 3 Exp2 Reporter intensity corrected 4 Exp2 Reporter intensity corrected 5 Exp2 Reporter intensity 0 Exp1 Reporter intensity 1 Exp1 Reporter intensity 2 Exp1 Reporter intensity 3 Exp1 Reporter intensity 4 Exp1 Reporter intensity 5 Exp1 Reporter intensity 0 Exp2 Reporter intensity 1 Exp2 Reporter intensity 2 Exp2 Reporter intensity 3 Exp2 Reporter intensity 4 Exp2 Reporter intensity 5 Exp2 Reporter intensity count 0 Exp1 Reporter intensity count 1 Exp1 Reporter intensity count 2 Exp1 Reporter intensity count 3 Exp1 Reporter intensity count 4 Exp1 Reporter intensity count 5 Exp1 Reporter intensity count 0 Exp2 Reporter intensity count 1 Exp2 Reporter intensity count 2 Exp2 Reporter intensity count 3 Exp2 Reporter intensity count 4 Exp2 Reporter intensity count 5 Exp2 Sequence coverage Exp1 [%] Sequence coverage Exp2 [%] Intensity Intensity Exp1 Intensity Exp2 MS/MS count Only identified by site Reverse Potential contaminant id Peptide IDs Peptide is razor Mod. peptide IDs Evidence IDs MS/MS IDs Best MS/MS Oxidation (M) site IDs Oxidation (M) site positions
A0A068BEQ2;P50171;P50171-2;G3UX44 A0A068BEQ2;P50171;P50171-2;G3UX44 11;11;11;10 11;11;11;10 11;11;11;10 Estradiol 17-beta-dehydrogenase 8 H2-Ke6;Hsd17b8 tr|A0A068BEQ2|A0A068BEQ2_MOUSE H2-K region expressed gene 6, isoform CRA_a OS=Mus musculus GN=H2-Ke6 PE=2 SV=1;sp|P50171|DHB8_MOUSE Estradiol 17-beta-dehydrogenase 8 OS=Mus musculus GN=Hsd17b8 PE=1 SV=2;sp|P50171-2|DHB8_MOUSE Isoform Long of Estradiol 17-b 4 11 11 11 9 9 9 9 9 9 61.4 61.4 61.4 26.587 259 259;259;274;234 6.32 2 1 1 3 4 4 5 17 0 309.9 422810 381560 394980 374200 414580 428320 405880 379740 401170 384810 418890 414670 29 29 29 29 29 29 159850 141420 149170 147480 162360 166190 262960 240140 245810 226710 252220 262130 153390 140950 151450 151320 164040 160930 252490 238790 249720 233490 254850 253730 12 12 12 12 12 12 17 17 17 17 17 17 46.3 51 8493600000 3090200000 5403400000 42 0 1707;5068;5754;17624;17838;18452;24153;26755;37315;45123;45334 True;True;True;True;True;True;True;True;True;True;True 1818;5449;5450;6176;18892;19121;19782;25924;28715;40406;48814;49037 5720;5721;5722;5723;5724;16176;16177;18100;56337;56338;56339;56340;57035;57036;57037;57038;57039;57040;59065;59066;59067;59068;77499;77500;85797;119780;119781;119782;119783;145479;145480;145481;145482;145483;146163;146164;146165 6359;6360;6361;6362;6363;6364;6365;6366;17918;17919;20019;62610;62611;62612;62613;63369;63370;63371;63372;63373;63374;65658;65659;65660;65661;86004;86005;86006;86007;95195;133113;133114;133115;133116;161672;161673;161674;161675;161676;162411;162412;162413 6366;17919;20019;62610;63370;65660;86006;95195;133113;161676;162411 0;1 113;204
3 changes: 3 additions & 0 deletions tests/testthat/summary_test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Raw file Experiment
raw_file1 Exp1
raw_file2 Exp2
3 changes: 3 additions & 0 deletions tests/testthat/summary_test_2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Raw file False
raw_file1 Exp1
raw_file2 Exp2
30 changes: 30 additions & 0 deletions tests/testthat/test_mqparser.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
context("MaxQuant parser")

testthat::test_that("all needed input parameteres are given", {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The :: notation is not needed for wilson and testthat. The packages are both loaded before testing

expect_error(wilson::parse_MaxQuant(),"The proteinGroups file was not given")
expect_error(wilson::parse_MaxQuant(proteinGroups_in = "/path/path/"),"The summary file was not given")
expect_error(wilson::parse_MaxQuant(proteinGroups_in = "/path/path/",summary_in = "/path/path/"),
"The output file was not given")
expect_error(wilson::parse_MaxQuant(proteinGroups_in = "/path/path/",summary_in = "/path/path/",
outfile = "/path/path/"),"The output_reduced file was not given")
})

testthat::test_that("mq_parser",{

expect_error(wilson::parse_MaxQuant(proteinGroups_in = system.file("/tests/testthat", "proteinGroups_test.txt", package = "wilson"),
summary_in = system.file("/tests/testthat", "summary_test_2.txt", package = "wilson"),
outfile = "./out", outfile_reduced = "./outres" ),
"wrong format on summary file: column \'Experiment\' misssing")
expect_equal(wilson::parse_MaxQuant(proteinGroups_in = system.file("/tests/testthat", "proteinGroups_test.txt", package = "wilson"),
summary_in = system.file("/tests/testthat", "summary_test.txt", package = "wilson"),
outfile = "./out", outfile_reduced = "./outres" ),
1)
expect_error(wilson::parse_MaxQuant(proteinGroups_in = system.file("/tests/testthat", "proteinGroups_test.txt", package = "wilson"),
summary_in = system.file("/tests/testthat", "summary_test.txt", package = "wilson"),
outfile = "./out", outfile_reduced = "./outres", config = "" ),
"Could not read config file")
expect_error(wilson::parse_MaxQuant(proteinGroups_in = system.file("/tests/testthat", "proteinGroups_test.txt", package = "wilson"),
summary_in = system.file("/tests/testthat", "summary_test.txt", package = "wilson"),
outfile = "./out", outfile_reduced = "./outres", config = system.file("/tests/testthat", "fail_config.json", package = "wilson") ),
"reduced_list is missing in config file")
})