-
Notifications
You must be signed in to change notification settings - Fork 2
Added tests for parse_MaxQuant #25
Changes from 1 commit
d37339d
f35d7c9
ddc89ce
cf8a9e2
9d265d6
12f7a19
ec3d416
586ed7e
85bc805
02c5163
6111a8a
961742d
4548826
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,8 @@ | |
#' @param version pre-header information about version (optional) | ||
#' @param experiment_id pre-header information about experiment id (optional) | ||
#' | ||
#' @return 1 on success | ||
#' | ||
#' @export | ||
parse_MaxQuant <- function(proteinGroups_in, summary_in, outfile, outfile_reduced, config = system.file("extdata", "parser_MaxQuant_config.json", package = "wilson"), delimiter = ";", format = NULL, version = NULL, experiment_id = NULL){ | ||
if (missing(proteinGroups_in)) { | ||
|
@@ -188,13 +190,21 @@ parse_MaxQuant <- function(proteinGroups_in, summary_in, outfile, outfile_reduce | |
# reading files in data tables | ||
proteinGroups <- data.table::fread(proteinGroups_in, header = TRUE, quote = "") | ||
summary_file <- data.table::fread(summary_in, header = TRUE) | ||
meta_config <- rjson::fromJSON(file = config) | ||
meta_config <- tryCatch({rjson::fromJSON(file = config)}, | ||
error=function(cond){ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add whitespace around '=' and before '{'. |
||
stop("Could not read config file") | ||
}) | ||
|
||
# getting experiment names | ||
exp_names <- (unique(summary_file[Experiment != "", Experiment])) | ||
if("Experiment" %in% colnames(summary_file)){ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add whitespace after 'if' and before '{'. Like this: |
||
exp_names <- (unique(summary_file[Experiment != "", Experiment])) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The outer brackets are redundant. |
||
} else { | ||
stop("wrong format on summary file: column \'Experiment\' misssing") | ||
} | ||
|
||
meta <- get_meta_from_config(meta_config = meta_config) | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't follow an empty line with another empty line. |
||
sample_scores <- meta_config$type_scores | ||
sample_ratios <- meta_config$type_ratios | ||
sample_probability <- meta_config$type_probability | ||
|
@@ -203,6 +213,9 @@ parse_MaxQuant <- function(proteinGroups_in, summary_in, outfile, outfile_reduce | |
sample_ary <- meta_config$type_array | ||
reduced_list <- meta_config$reduced_list | ||
full_sample_list <- c(sample_scores, sample_ratios, sample_probability, sample_category, sample_ary) | ||
if(is.null(reduced_list)){ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add whitespaces. |
||
stop("reduced_list is missing in config file") | ||
} | ||
|
||
# get column names | ||
col_names <- colnames(proteinGroups) | ||
|
@@ -286,6 +299,8 @@ parse_MaxQuant <- function(proteinGroups_in, summary_in, outfile, outfile_reduce | |
# writing reduced CLARION file | ||
write_clarion_file(meta = meta_reduced, out = outfile_reduced, format = format, | ||
version = version, exp_id = experiment_id, pGroups = proteinGroups, delimiter = delimiter) | ||
|
||
return(1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should not return |
||
} | ||
|
||
#' Method to parse input file. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
library(testthat) | ||
library(wilson) | ||
|
||
test_check("wilson") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,291 @@ | ||
{ | ||
"meta": [ | ||
{ | ||
"col_name": "Protein IDs", | ||
"level": "feature", | ||
"type": "array", | ||
"label": "IDs", | ||
"sublabel": "proteins" | ||
}, | ||
{ | ||
"col_name": "Majority protein IDs", | ||
"level": "feature", | ||
"type": "array", | ||
"label": "IDs", | ||
"sublabel": "majority protein" | ||
}, | ||
{ | ||
"col_name": "Protein names", | ||
"level": "feature", | ||
"type": "array", | ||
"label": "protein names", | ||
"sublabel": "" | ||
}, | ||
{ | ||
"col_name": "Gene names", | ||
"level": "feature", | ||
"type": "array", | ||
"label": "gene names", | ||
"sublabel": "" | ||
}, | ||
{ | ||
"col_name": "Fasta headers", | ||
"level": "feature", | ||
"type": "array", | ||
"label": "fasta headers", | ||
"sublabel": "" | ||
}, | ||
{ | ||
"col_name": "id", | ||
"level": "feature", | ||
"type": "unique_id", | ||
"label": "unique identifier", | ||
"sublabel": "" | ||
}, | ||
{ | ||
"col_name": "Peptide IDs", | ||
"level": "feature", | ||
"type": "array", | ||
"label": "IDs", | ||
"sublabel": "peptide" | ||
}, | ||
{ | ||
"col_name": "Mod. peptide IDs", | ||
"level": "feature", | ||
"type": "array", | ||
"label": "IDs", | ||
"sublabel": "mod. peptide" | ||
}, | ||
{ | ||
"col_name": "Evidence IDs", | ||
"level": "feature", | ||
"type": "array", | ||
"label": "IDs", | ||
"sublabel": "evidence" | ||
}, | ||
{ | ||
"col_name": "MS/MS IDs", | ||
"level": "feature", | ||
"type": "array", | ||
"label": "IDs", | ||
"sublabel": "MS/MS" | ||
}, | ||
{ | ||
"col_name": "Mol. weight [kDa]", | ||
"level": "feature", | ||
"type": "category", | ||
"label": "Mol. weight [kDa]", | ||
"sublabel": "" | ||
}, | ||
{ | ||
"col_name": "Sequence length", | ||
"level": "feature", | ||
"type": "category", | ||
"label": "length", | ||
"sublabel": "Sequence" | ||
}, | ||
{ | ||
"col_name": "Sequence lengths", | ||
"level": "feature", | ||
"type": "array", | ||
"label": "lengths", | ||
"sublabel": "Sequence" | ||
}, | ||
{ | ||
"col_name": "Reverse", | ||
"level": "feature", | ||
"type": "category", | ||
"label": "Reverse", | ||
"sublabel": "" | ||
}, | ||
{ | ||
"col_name": "Potential contaminant", | ||
"level": "feature", | ||
"type": "category", | ||
"label": "Potential contaminant", | ||
"sublabel": "" | ||
}, | ||
{ | ||
"col_name": "Oxidation (M) site IDs", | ||
"level": "feature", | ||
"type": "array", | ||
"label": "IDs", | ||
"sublabel": "Oxidation (M) site" | ||
}, | ||
{ | ||
"col_name": "Oxidation (M) site positions", | ||
"level": "feature", | ||
"type": "array", | ||
"label": "positions", | ||
"sublabel": "Oxidation (M) site" | ||
}, | ||
{ | ||
"col_name": "Phospho (STY) site IDs", | ||
"level": "feature", | ||
"type": "array", | ||
"label": "IDs", | ||
"sublabel": "Phospho (STY) site" | ||
}, | ||
{ | ||
"col_name": "Phospho (STY) site positions", | ||
"level": "feature", | ||
"type": "array", | ||
"label": "positions", | ||
"sublabel": "Phospho (STY) site" | ||
}, | ||
{ | ||
"col_name": "Peptide counts (all)", | ||
"level": "condition", | ||
"type": "array", | ||
"label": "counts", | ||
"sublabel": "all peptide" | ||
}, | ||
{ | ||
"col_name": "Peptide counts (razor+unique)", | ||
"level": "condition", | ||
"type": "array", | ||
"label": "counts", | ||
"sublabel": "razor+unique peptides" | ||
}, | ||
{ | ||
"col_name": "Peptide counts (unique)", | ||
"level": "condition", | ||
"type": "array", | ||
"label": "counts", | ||
"sublabel": "unique peptides" | ||
}, | ||
{ | ||
"col_name": "Number of proteins", | ||
"level": "condition", | ||
"type": "score", | ||
"label": "count", | ||
"sublabel": "proteins" | ||
}, | ||
{ | ||
"col_name": "Peptides", | ||
"level": "condition", | ||
"type": "score", | ||
"label": "count", | ||
"sublabel": "Peptides" | ||
}, | ||
{ | ||
"col_name": "Razor + unique peptides", | ||
"level": "condition", | ||
"type": "score", | ||
"label": "count", | ||
"sublabel": "Razor + unique peptides" | ||
}, | ||
{ | ||
"col_name": "Unique peptides", | ||
"level": "condition", | ||
"type": "score", | ||
"label": "count", | ||
"sublabel": "Unique peptides" | ||
}, | ||
{ | ||
"col_name": "MS/MS count", | ||
"level": "condition", | ||
"type": "score", | ||
"label": "count", | ||
"sublabel": "MS/MS" | ||
}, | ||
{ | ||
"col_name": "Fraction average", | ||
"level": "condition", | ||
"type": "score", | ||
"label": "fraction", | ||
"sublabel": "average" | ||
}, | ||
{ | ||
"col_name": "Best MS/MS", | ||
"level": "condition", | ||
"type": "array", | ||
"label": "MS/MS", | ||
"sublabel": "best" | ||
}, | ||
{ | ||
"col_name": "Intensity", | ||
"level": "condition", | ||
"type": "score", | ||
"label": "Intensity", | ||
"sublabel": "" | ||
}, | ||
{ | ||
"col_name": "Intensity L", | ||
"level": "condition", | ||
"type": "score", | ||
"label": "Intensity", | ||
"sublabel": "L" | ||
}, | ||
{ | ||
"col_name": "Intensity M", | ||
"level": "condition", | ||
"type": "score", | ||
"label": "Intensity", | ||
"sublabel": "M" | ||
}, | ||
{ | ||
"col_name": "Intensity H", | ||
"level": "condition", | ||
"type": "score", | ||
"label": "Intensity", | ||
"sublabel": "H" | ||
}, | ||
{ | ||
"col_name": "Q-value", | ||
"level": "condition", | ||
"type": "probability", | ||
"label": "q-value", | ||
"sublabel": "" | ||
}, | ||
{ | ||
"col_name": "Score", | ||
"level": "condition", | ||
"type": "probability", | ||
"label": "score", | ||
"sublabel": "" | ||
}, | ||
{ | ||
"col_name": "Unique sequence coverage [%]", | ||
"level": "condition", | ||
"type": "ratio", | ||
"label": "sequence coverage", | ||
"sublabel": "Unique" | ||
}, | ||
{ | ||
"col_name": "Unique + razor sequence coverage [%]", | ||
"level": "condition", | ||
"type": "ratio", | ||
"label": "sequence coverage", | ||
"sublabel": "Unique + razor" | ||
}, | ||
{ | ||
"col_name": "Sequence coverage [%]", | ||
"level": "condition", | ||
"type": "ratio", | ||
"label": "sequence coverage", | ||
"sublabel": "" | ||
}, | ||
{ | ||
"col_name": "Only identified by site", | ||
"level": "feature", | ||
"type": "category", | ||
"label": "identified by site", | ||
"sublabel": "" | ||
}, | ||
{ | ||
"col_name": "Peptide is razor", | ||
"level": "condition", | ||
"type": "array", | ||
"label": "peptide", | ||
"sublabel": "is razor" | ||
} | ||
], | ||
|
||
"type_scores": | ||
["Peptides", "Razor + unique peptides", "Unique peptides", "Ratio M/L count", "Ratio M/L iso-count", "Ratio H/L count", "Ratio H/L iso-count" , | ||
"Ratio H/M count", "Ratio H/M iso-count", "Intensity", "Intensity L", "Intensity M", "Intensity H", "MS/MS count", "LFQ intensity", "Reporter intensity count", | ||
"Reporter intensity corrected", "Reporter intensity", "Fraction", "iBAQ"] | ||
|
||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Protein IDs Majority protein IDs Peptide counts (all) Peptide counts (razor+unique) Peptide counts (unique) Protein names Gene names Fasta headers Number of proteins Peptides Razor + unique peptides Unique peptides Peptides Exp1 Peptides Exp2 Razor + unique peptides Exp1 Razor + unique peptides Exp2 Unique peptides Exp1 Unique peptides Exp2 Sequence coverage [%] Unique + razor sequence coverage [%] Unique sequence coverage [%] Mol. weight [kDa] Sequence length Sequence lengths Fraction average Fraction 1 Fraction 2 Fraction 3 Fraction 4 Fraction 5 Fraction 6 Fraction 7 Fraction 8 Q-value Score Reporter intensity corrected 0 Reporter intensity corrected 1 Reporter intensity corrected 2 Reporter intensity corrected 3 Reporter intensity corrected 4 Reporter intensity corrected 5 Reporter intensity 0 Reporter intensity 1 Reporter intensity 2 Reporter intensity 3 Reporter intensity 4 Reporter intensity 5 Reporter intensity count 0 Reporter intensity count 1 Reporter intensity count 2 Reporter intensity count 3 Reporter intensity count 4 Reporter intensity count 5 Reporter intensity corrected 0 Exp1 Reporter intensity corrected 1 Exp1 Reporter intensity corrected 2 Exp1 Reporter intensity corrected 3 Exp1 Reporter intensity corrected 4 Exp1 Reporter intensity corrected 5 Exp1 Reporter intensity corrected 0 Exp2 Reporter intensity corrected 1 Exp2 Reporter intensity corrected 2 Exp2 Reporter intensity corrected 3 Exp2 Reporter intensity corrected 4 Exp2 Reporter intensity corrected 5 Exp2 Reporter intensity 0 Exp1 Reporter intensity 1 Exp1 Reporter intensity 2 Exp1 Reporter intensity 3 Exp1 Reporter intensity 4 Exp1 Reporter intensity 5 Exp1 Reporter intensity 0 Exp2 Reporter intensity 1 Exp2 Reporter intensity 2 Exp2 Reporter intensity 3 Exp2 Reporter intensity 4 Exp2 Reporter intensity 5 Exp2 Reporter intensity count 0 Exp1 Reporter intensity count 1 Exp1 Reporter intensity count 2 Exp1 Reporter intensity count 3 Exp1 Reporter intensity count 4 Exp1 Reporter intensity count 5 Exp1 Reporter intensity count 0 Exp2 Reporter intensity count 1 Exp2 Reporter intensity count 2 Exp2 Reporter intensity count 3 Exp2 Reporter intensity count 4 Exp2 Reporter intensity count 5 Exp2 Sequence coverage Exp1 [%] Sequence coverage Exp2 [%] Intensity Intensity Exp1 Intensity Exp2 MS/MS count Only identified by site Reverse Potential contaminant id Peptide IDs Peptide is razor Mod. peptide IDs Evidence IDs MS/MS IDs Best MS/MS Oxidation (M) site IDs Oxidation (M) site positions | ||
A0A068BEQ2;P50171;P50171-2;G3UX44 A0A068BEQ2;P50171;P50171-2;G3UX44 11;11;11;10 11;11;11;10 11;11;11;10 Estradiol 17-beta-dehydrogenase 8 H2-Ke6;Hsd17b8 tr|A0A068BEQ2|A0A068BEQ2_MOUSE H2-K region expressed gene 6, isoform CRA_a OS=Mus musculus GN=H2-Ke6 PE=2 SV=1;sp|P50171|DHB8_MOUSE Estradiol 17-beta-dehydrogenase 8 OS=Mus musculus GN=Hsd17b8 PE=1 SV=2;sp|P50171-2|DHB8_MOUSE Isoform Long of Estradiol 17-b 4 11 11 11 9 9 9 9 9 9 61.4 61.4 61.4 26.587 259 259;259;274;234 6.32 2 1 1 3 4 4 5 17 0 309.9 422810 381560 394980 374200 414580 428320 405880 379740 401170 384810 418890 414670 29 29 29 29 29 29 159850 141420 149170 147480 162360 166190 262960 240140 245810 226710 252220 262130 153390 140950 151450 151320 164040 160930 252490 238790 249720 233490 254850 253730 12 12 12 12 12 12 17 17 17 17 17 17 46.3 51 8493600000 3090200000 5403400000 42 0 1707;5068;5754;17624;17838;18452;24153;26755;37315;45123;45334 True;True;True;True;True;True;True;True;True;True;True 1818;5449;5450;6176;18892;19121;19782;25924;28715;40406;48814;49037 5720;5721;5722;5723;5724;16176;16177;18100;56337;56338;56339;56340;57035;57036;57037;57038;57039;57040;59065;59066;59067;59068;77499;77500;85797;119780;119781;119782;119783;145479;145480;145481;145482;145483;146163;146164;146165 6359;6360;6361;6362;6363;6364;6365;6366;17918;17919;20019;62610;62611;62612;62613;63369;63370;63371;63372;63373;63374;65658;65659;65660;65661;86004;86005;86006;86007;95195;133113;133114;133115;133116;161672;161673;161674;161675;161676;162411;162412;162413 6366;17919;20019;62610;63370;65660;86006;95195;133113;161676;162411 0;1 113;204 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Raw file Experiment | ||
Bernd baut bemüht bienen bei blumenbergen Exp1 | ||
Dieter drückt daknbar den dünnen Daniel durch die dicke dauerbelstete Dachrinne Exp2 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. WTF 🤣 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ups.. that was the wrong test file :D |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Raw file False | ||
Bernd baut bemüht bienen bei blumenbergen Exp1 | ||
Dieter drückt daknbar den dünnen Daniel durch die dicke dauerbelastete Dachrinne Exp2 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. **dankbar |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
context("MaxQuant parser") | ||
|
||
testthat::test_that("all needed input parameteres are given", { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The :: notation is not needed for wilson and testthat. The packages are both loaded before testing |
||
expect_error(wilson::parse_MaxQuant(),"The proteinGroups file was not given") | ||
expect_error(wilson::parse_MaxQuant(proteinGroups_in = "/path/path/"),"The summary file was not given") | ||
expect_error(wilson::parse_MaxQuant(proteinGroups_in = "/path/path/",summary_in = "/path/path/"), | ||
"The output file was not given") | ||
expect_error(wilson::parse_MaxQuant(proteinGroups_in = "/path/path/",summary_in = "/path/path/", | ||
outfile = "/path/path/"),"The output_reduced file was not given") | ||
}) | ||
|
||
testthat::test_that("mq_parser",{ | ||
|
||
expect_error(wilson::parse_MaxQuant(proteinGroups_in = system.file("/tests/testthat", "proteinGroups_test.txt", package = "wilson"), | ||
summary_in = system.file("/tests/testthat", "summary_test_2.txt", package = "wilson"), | ||
outfile = "./out", outfile_reduced = "./outres" ), | ||
"wrong format on summary file: column \'Experiment\' misssing") | ||
expect_equal(wilson::parse_MaxQuant(proteinGroups_in = system.file("/tests/testthat", "proteinGroups_test.txt", package = "wilson"), | ||
summary_in = system.file("/tests/testthat", "summary_test.txt", package = "wilson"), | ||
outfile = "./out", outfile_reduced = "./outres" ), | ||
1) | ||
expect_error(wilson::parse_MaxQuant(proteinGroups_in = system.file("/tests/testthat", "proteinGroups_test.txt", package = "wilson"), | ||
summary_in = system.file("/tests/testthat", "summary_test.txt", package = "wilson"), | ||
outfile = "./out", outfile_reduced = "./outres", config = "" ), | ||
"Could not read config file") | ||
expect_error(wilson::parse_MaxQuant(proteinGroups_in = system.file("/tests/testthat", "proteinGroups_test.txt", package = "wilson"), | ||
summary_in = system.file("/tests/testthat", "summary_test.txt", package = "wilson"), | ||
outfile = "./out", outfile_reduced = "./outres", config = system.file("/tests/testthat", "fail_config.json", package = "wilson") ), | ||
"reduced_list is missing in config file") | ||
}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Return
TRUE
instead