diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..b1b6a61 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,13 @@ +Package: mmRmeta +Title: What the Package Does (One Line, Title Case) +Version: 0.0.0.9000 +Authors@R: + person(given = "First", + family = "Last", + role = c("aut", "cre"), + email = "first.last@example.com") +Description: What the package does (one paragraph). +License: What license it uses +Encoding: UTF-8 +LazyData: true +RoxygenNote: 6.1.1 diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..b734f3c --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,6 @@ +# Generated by roxygen2: do not edit by hand + +export(make.kruskal.frame) +importFrom(dplyr,group_by_) +importFrom(dplyr,summarize) +importFrom(magrittr,"%>%") diff --git a/R/add.expression.R b/R/add.expression.R index 7a7234a..2bd282d 100644 --- a/R/add.expression.R +++ b/R/add.expression.R @@ -3,7 +3,7 @@ #' @param cancer_data A large list created by multimodalR #' @param nested_metadata A list of data frames made by >make.nested.metadata< #' @param key Character or integer - Column name or index of unique identifier of nested_metadata, default = "case_id" -#' @importFrom magittr %>% +#' @importFrom magrittr %>% #' @examples #' lungMetaExpression <- add.expression(lungXY, lungMetaExpression) #' @return Returns the same list but with data franes containing the expression values in a new colummn. diff --git a/R/count.column.R b/R/count.column.R index a493189..1d1be15 100644 --- a/R/count.column.R +++ b/R/count.column.R @@ -3,7 +3,7 @@ #' @param df Data frame, in this case meta data of a corresponding gene #' @param factor_one Character - first grouping factor, a column name of the data frame #' @param factor_two Character - second grouping factor, a column name of the data frame -#' @importFrom magittr %>% +#' @importFrom magrittr %>% #' @examples #' df <- data.frame(case_id = c(1,2,3,4,5,6), group = c("one","one","two","one","two","three"), primary_diagnosis = c("Squamous cell carcinoma, NOS", "Adenocarcinoma, NOS", "Adenocarcinoma with mixed subtypes", "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS")) #' diagnosis_counts <- count.column(df, factor_one = "group", factor_two = "primary_diagnosis") diff --git a/R/counts.per.group.R b/R/counts.per.group.R index 4884a8b..160c270 100644 --- a/R/counts.per.group.R +++ b/R/counts.per.group.R @@ -4,7 +4,7 @@ #' @param list_df Either a list of data frames or a single data frame #' @param first_grouping Character - first grouping variable | Default "case_id" #' @param second_grouping Character - second grouping variable | Default "group"" -#' @importFrom magittr %>% +#' @importFrom magrittr %>% #' @importFrom dplyr group_by_ summarize #' @examples #' Using a Data Frame diff --git a/R/reorder.column.R b/R/reorder.column.R index 880b56f..d2b4ad0 100644 --- a/R/reorder.column.R +++ b/R/reorder.column.R @@ -4,6 +4,7 @@ #' @param cancer_data A large list created prior by multimodalR. #' @param column Character - column name #' @param remove Integer - threshold of counts; any factor level with equal or less counts are discarded +#' @importFrom magrittr %>% #' @examples #' lungMeta <- reorder.colums(lungMeta, "primary_diagnosis", 10) #keeps factor levels with >= 10 counts. #' @return Returns the same large list with changed column names of the expressionmatrix. diff --git a/R/subset.metadata.R b/R/subset.metadata.R index 2e5c573..47bd3f9 100644 --- a/R/subset.metadata.R +++ b/R/subset.metadata.R @@ -5,20 +5,20 @@ #' @param key character - Unique identifier, default = "case_id" #' @param additional_key needs to be added #' @param additional_value needs to be added -#' @importFrom magittr %>% +#' @importFrom magrittr %>% #' @examples #' lungMetaData <- subset.metadata(metadata, lungXY) #' @return Returns a data table of the metadata for a primary cancer site (organ). subset.metadata <- function(meta_data, cancer_data, key = "case_id", additional_key = NULL, additional_value = NULL) { - if(!is.character(meta_data[[key]]) { - meta_data[[key]] <- as.character(meta_data[[key]]) #transform factor to character + if(!is.character(meta_data[[key]])) { + meta_data[[key]] <- as.character(meta_data[[key]]) #transform factor to character } correctRows <- numeric() #empty vector to be filled later caseNames <- names(cancer_data$Expressionmatrix) #vector of case_id from expressionmatrix for (cases in caseNames) { #match case_id of the vector with case_id of metadata - matchedRow <- grep(cases, meta_data[[key]] - correctRows <- c(correctRows, matchedRow) + matchedRow <- grep(cases, meta_data[[key]]) + correctRows <- c(correctRows, matchedRow) } meta_data <- meta_data[c(correctRows),] #subset by machted case_id if(!is.null(additional_key)) { diff --git a/R/summary.plots.R b/R/summary.plots.R index 91c4dea..7be363f 100644 --- a/R/summary.plots.R +++ b/R/summary.plots.R @@ -1,6 +1,12 @@ summary.plots <- function(meta_data, file_name = "summary", environment = FALSE, width = 12, height =8, ... ) { #first Graph + data.summary <- function(x) { #function to calculate median + sd for violin plots + mu <- mean(x) + sigma1 <- mu-sd(x) + sigma2 <- mu+sd(x) + return(c(y=mu, ymin=sigma1, ymax=sigma2)) +} ### 1. ###################### Boxplots(gender, age) ~ status firstGraph <- ggplot(organ_metadata, aes(x = gender, y = age/365)) + geom_boxplot(data = organ_metadata, aes(color = status)) + diff --git a/R/use.prop.test.R b/R/use.prop.test.R index 771480d..28f9d09 100644 --- a/R/use.prop.test.R +++ b/R/use.prop.test.R @@ -3,7 +3,10 @@ #' @param df_count A data frame, made by >count.column< #' @param eval_first_column Boolean, should the first column be evaluated as well? #' @examples -#' df <- data.frame(case_id = c(1,2,3,4,5,6), group = c("one","one","two","one","two","three"), primary_diagnosis = c("Squamous cell carcinoma, NOS", "Adenocarcinoma, NOS", "Adenocarcinoma with mixed subtypes", "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS")) +#' df <- data.frame(case_id = c(1,2,3,4,5,6), +#' group = c("one","one","two","one","two","three"), +#' primary_diagnosis = c("Squamous cell carcinoma, NOS", "Adenocarcinoma, NOS", "Adenocarcinoma with mixed subtypes", +#' "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS")) #' diagnosis_counts <- count.column(df, factor_one = "group", factor_two = "primary_diagnosis") #' pValues <- use.prop.test(diagnosis_counts, FALSE) #' @return Returns a vector of p.values obtained by the prop.test diff --git a/preprocessing.R b/example/preprocessing.R similarity index 100% rename from preprocessing.R rename to example/preprocessing.R diff --git a/example/statistical.test.R b/example/statistical.test.R index 729d391..ad1a85d 100644 --- a/example/statistical.test.R +++ b/example/statistical.test.R @@ -12,6 +12,17 @@ x1 <- pairwise.wilcox.test(lungMetaExpression[[x[1]]]$expression, g = lungMetaEx x2 <- x1 %>% tibble::rownames_to_column('name') %>% filter_all(. , any_vars(. < 0.05)) %>% tibble::column_to_rownames('name') #filter and keep rownames x3 <- data.frame(t(x2)) %>% tibble::rownames_to_column('name') %>% filter_all(., any_vars(. < 0.05)) %>% tibble::column_to_rownames('name') #transpose to filter out rows without significant values, cant filter out colums that easy -wilcox_test(Value ~ factor(Group), data=data[data$Group==1|data$Group==2,], distribution="exact") +wilcox_test(Value ~ factor(Group), data=data[data$Group==1|data$Group==2,]) +xWil <- wilcox.test(lungMetaExpression[[x[1]]]$expression ~ lungMetaExpression[[x[1]]]$primary_diagnosis == c("Adenocarcinoma, NOS" , "Squamous cell carninoma, NOS")) +#for every row: do wilcox test if p.value is <0.05 and not the same name ... furthermore try coin::wilcox_test -#for every row: do wilcox test if p.value is <0.05 and not the same name \ No newline at end of file +rownames(x3) + +#what if i subset expression values byy diagnosis factor +primaryDSpread <- lungMetaExpression$AL671277.1 %>% select(. , c(case_id, primary_diagnosis, expression)) %>% tidyr::spread(., primary_diagnosis, expression) +wilcox.test(primaryD$`Papillary adenocarcinoma, NOS`, primaryD$`Papillary adenocarcinoma, NOS`) +primaryD <- lungMetaExpression$AL671277.1 %>% select(. , primary_diagnosis, expression) +firstTest <- filter(primaryD, primary_diagnosis %in% c("Adenocarcinoma, NOS", "Squamous cell carcinoma, NOS")) #filter if they are named like this + +coin::wilcox_test(formula = expression ~ primary_diagnosis, data = firstTest) +# FUNKTIONIERT \ No newline at end of file diff --git a/man/add.expression.Rd b/man/add.expression.Rd new file mode 100644 index 0000000..918847d --- /dev/null +++ b/man/add.expression.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add.expression.R +\name{add.expression} +\alias{add.expression} +\title{Add Gene Expression} +\usage{ +add.expression(cancer_data, nested_metadata, key = "case_id") +} +\arguments{ +\item{cancer_data}{A large list created by multimodalR} + +\item{nested_metadata}{A list of data frames made by >make.nested.metadata<} + +\item{key}{Character or integer - Column name or index of unique identifier of nested_metadata, default = "case_id"} +} +\value{ +Returns the same list but with data franes containing the expression values in a new colummn. +} +\description{ +This function adds the gene expression values to each "case_id" of a gene in the data frame. It uses the expressionmatrix from organ_data. +} +\examples{ +lungMetaExpression <- add.expression(lungXY, lungMetaExpression) +} diff --git a/man/add.stage.simple.Rd b/man/add.stage.simple.Rd new file mode 100644 index 0000000..13256dc --- /dev/null +++ b/man/add.stage.simple.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add.stage.simple.R +\name{add.stage.simple} +\alias{add.stage.simple} +\title{Adds column with simplified tumor stages without subtypes} +\usage{ +add.stage.simple(meta_data, tumor_stage = "tumor_stage", + new_name = "stage") +} +\arguments{ +\item{meta_data}{A data frame} + +\item{tumor_stage}{Character or Integer - Column name or index of tumor stage} + +\item{new_name}{Character - Name of new column} +} +\value{ +Returns the data frame with an additional column. +} +\description{ +This function adds a new column "stage" to the meta data containing the simplified tumor stages (i.e. without subtypes). It uses the function stages.to.number. +As for now 4 subtypes are accepted: a,b,c and 0 - it doesnt compute "stage i/ii NOS", "stage 0" and "stage x" +} +\examples{ +lungMeta <- add.stage.simple(lungmeta, "tumor_stage") +} diff --git a/man/count.column.Rd b/man/count.column.Rd new file mode 100644 index 0000000..326e3d4 --- /dev/null +++ b/man/count.column.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/count.column.R +\name{count.column} +\alias{count.column} +\title{Counts by 2 grouping varaibles} +\usage{ +count.column(df, factor_one = "group", + factor_two = "primary_diagnosis") +} +\arguments{ +\item{df}{Data frame, in this case meta data of a corresponding gene} + +\item{factor_one}{Character - first grouping factor, a column name of the data frame} + +\item{factor_two}{Character - second grouping factor, a column name of the data frame} +} +\value{ +Returns a data frame +} +\description{ +This function simply groups by two factors and returns the frequency. Can be passed to "use.prop.test" to test proportions. +} +\examples{ +df <- data.frame(case_id = c(1,2,3,4,5,6), group = c("one","one","two","one","two","three"), primary_diagnosis = c("Squamous cell carcinoma, NOS", "Adenocarcinoma, NOS", "Adenocarcinoma with mixed subtypes", "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS")) +diagnosis_counts <- count.column(df, factor_one = "group", factor_two = "primary_diagnosis") +} diff --git a/man/counts.per.group.Rd b/man/counts.per.group.Rd new file mode 100644 index 0000000..c433975 --- /dev/null +++ b/man/counts.per.group.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/counts.per.group.R +\name{counts.per.group} +\alias{counts.per.group} +\title{Group by and summarize counts} +\usage{ +counts.per.group(list_df, first_grouping = "case_id", + second_grouping = "group") +} +\arguments{ +\item{list_df}{Either a list of data frames or a single data frame} + +\item{first_grouping}{Character - first grouping variable | Default "case_id"} + +\item{second_grouping}{Character - second grouping variable | Default "group""} +} +\value{ +Returns a data frame +} +\description{ +This functions groups by 2 variables/columns with the counts of the combination. It accepts a list of data frames or a single data frame. When passing a single data frame this function will simply return a data frame with the counts. When using a list as input one might wants to know how often a patient(case_id) +appears in a group. If youre using a single data frame you need to change the grouping variables. Using ".id" as first_grouping and lists the counts of groups for the genes. +} +\examples{ +Using a Data Frame +df <- data.frame() +df_counts <- counts.per.group(df, "firstColumn", "secondColumn") +} diff --git a/man/drop.unused.levels.Rd b/man/drop.unused.levels.Rd new file mode 100644 index 0000000..7ddeb37 --- /dev/null +++ b/man/drop.unused.levels.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/drop.unused.levels.R +\name{drop.unused.levels} +\alias{drop.unused.levels} +\title{Drop unused factor levels} +\usage{ +drop.unused.levels(df) +} +\arguments{ +\item{df}{Data frame} +} +\value{ +Returns the altered data frame. +} +\description{ +This function drops all unused levels from all factors in a data frame. +} diff --git a/man/filter.columns.as.na.Rd b/man/filter.columns.as.na.Rd new file mode 100644 index 0000000..7875d7c --- /dev/null +++ b/man/filter.columns.as.na.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filter.columns.as.na.R +\name{filter.columns.as.na} +\alias{filter.columns.as.na} +\title{Delete Colums with only NA} +\usage{ +filter.columns.as.na(df, value = "not reported") +} +\arguments{ +\item{df}{Data frame} + +\item{value}{A value or character that should be converted to NA.} +} +\value{ +Returns the filtered data frame +} +\description{ +Sometimes actual NA values are indicated as other strings (e.g. "not reported"). This function converts these strings to NA and ultimately deletes any column consisting only of NA values. +} +\examples{ +df <- data.frame(num1 = c(1,2,3,4), num2= c(5,6,7, "not reported")) +df <- filter.column.as.na(df, "no value") +} diff --git a/man/make.kruskal.frame.Rd b/man/make.kruskal.frame.Rd new file mode 100644 index 0000000..7c10076 --- /dev/null +++ b/man/make.kruskal.frame.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/make.kruskal.frame.R +\name{make.kruskal.frame} +\alias{make.kruskal.frame} +\title{Kruskal-Wallice Test on nested List} +\usage{ +make.kruskal.frame(df_list, column, factor, name = "pValue", + p_adjust = NULL) +} +\arguments{ +\item{df_list}{List of data frames, meta data with expression values and modality groups} + +\item{column}{Character or integer - column name or index (argument x in kruskal.test)} + +\item{factor}{Character or integer - column name or index to group by (argument g in kruskal.test)} + +\item{name}{Name of the created column containing the p-values} + +\item{p_adjust}{Boolean - Conduct p-value adjustement} +} +\description{ +The Kruskal-Wallice test calculates if the medians of two/or more groups are significantly different. It is therefore independent from +normal distribution. This function applies the non parametric test on all data frames in the list, using >use.kruskal.test<. +} +\examples{ +lungAgeKruskal <- make.kruskal.frame(lungMetaExpression, "age_at_diagnosis", "group", "age.kruskal") +} +\keyword{kruskal} +\keyword{test} diff --git a/man/make.nested.metadata.Rd b/man/make.nested.metadata.Rd new file mode 100644 index 0000000..711396a --- /dev/null +++ b/man/make.nested.metadata.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/make.nested.metadata.R +\name{make.nested.metadata} +\alias{make.nested.metadata} +\title{Make nested data frames} +\usage{ +make.nested.metadata(cancer_metadata, cancer_data, key = "case_id") +} +\arguments{ +\item{cancer_metadata}{Meta data provided by TCGA} + +\item{cancer_data}{A large list created prior by multimodalR} + +\item{key}{Character or integer - Column name or index of unique identifier, default = "case_id"} +} +\value{ +Returns a list of data frames for every gene +} +\description{ +Create a data frame of metadata for every gene. The case_id are taken from cancer_data and are matched against cancer_metadata. An additional +column "group" is appended to the data table. The column "group" represents the modality of the gene expression. +} +\examples{ +lungMetaExpression <- nested.metadata(lungMetaData, lungXY) +} diff --git a/man/make.overview.metadata.Rd b/man/make.overview.metadata.Rd new file mode 100644 index 0000000..cb8a3f2 --- /dev/null +++ b/man/make.overview.metadata.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/make.overview.metadata.R +\name{make.overview.metadata} +\alias{make.overview.metadata} +\title{Calculuate specifig meta data DEPRECATED} +\usage{ +make.overview.metadata(df_list, vertical = TRUE) +} +\arguments{ +\item{df_list}{List of data frames} + +\item{vertical}{Boolean - Specify if the data is presented in verticaly} +} +\value{ +Returns a vector for each group in a gene which are stored in a list. So if a gene has 3 groups. There will be 3 vectors filled with the calculations. +} +\description{ +This function applies >overview.metadata< to every data frame in the list. +} +\examples{ +brainCalculatedMetadata <- calculate.metadata(metadata, brainRows) +} diff --git a/man/make.prop.frame.Rd b/man/make.prop.frame.Rd new file mode 100644 index 0000000..1217858 --- /dev/null +++ b/man/make.prop.frame.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/make.prop.frame.R +\name{make.prop.frame} +\alias{make.prop.frame} +\title{Test of Proportion} +\usage{ +make.prop.frame(df_list, factor_one = "group", factor_two) +} +\arguments{ +\item{df_list}{A nested list of meta data obtained by >make.nested.list<} + +\item{factor_one}{First grouping factor for function >count.column<} + +\item{factor_two}{Second grouping factor for function >count.column<} +} +\value{ +Returns a data frame containing all pvalues of the proportion test of all genes. +} +\description{ +This function applies >count.column< and >use.prop.test< on the whole list of data frames. +} +\examples{ +df <- data.frame(case_id = c(1,2,3,4,5,6), group = c("one","one","two","one","two","three"), primary_diagnosis = c("Squamous cell carcinoma, NOS", "Adenocarcinoma, NOS", "Adenocarcinoma with mixed subtypes", "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS")) +diagnosis_counts <- count.column(df, factor_one = "group", factor_two = "primary_diagnosis") +pValues <- use.prop.test(diagnosis_counts, FALSE) +} diff --git a/man/overview.metadata.Rd b/man/overview.metadata.Rd new file mode 100644 index 0000000..e8551c3 --- /dev/null +++ b/man/overview.metadata.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/overview.metadata.R +\name{overview.metadata} +\alias{overview.metadata} +\title{Calculuate specifig meta data DEPRECATED} +\usage{ +overview.metadata(df) +} +\arguments{ +\item{single_data_frame}{A data frame containing meta data, that was made by >create.data.tables<.} +} +\value{ +Returns a vector for each group in a gene which are stored in a list. So if a gene has 3 groups. There will be 3 vectors filled with the calculations. +} +\description{ +This function groups the data by specific values. N +} +\examples{ +brainCalculatedMetadata <- calculate.metadata(metadata, brainRows) +} diff --git a/man/proptest.template.Rd b/man/proptest.template.Rd new file mode 100644 index 0000000..1fd7643 --- /dev/null +++ b/man/proptest.template.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/proptest.template.R +\name{proptest.template} +\alias{proptest.template} +\title{Test of Proportion on meta data overview} +\usage{ +proptest.template(calculated_metadata, col_counts, col_total, name, + p_adjust) +} +\arguments{ +\item{calculated_metadata}{Data frame made by >make.overview.metadata<} + +\item{col_counts}{Character or integer - column name or index of counts of the observation (e.g. sum of dead people in both groups)} + +\item{col_total}{Character or integer - column name or index of total counts of the observation (e.g. total count of patients in both groups)} + +\item{name}{Name of the created column containing the p-values} + +\item{p_adjust}{Boolean - Conduct p-value adjustement} +} +\value{ +Returns the p-values as a data frame for every gene. +} +\description{ +This function applies prop.test on specified columns on a list of data frames. Specially made for data frames made by >make.overview.metadata<. +} +\examples{ +lungDeadProp <- proptest.template(lungCalculatedMeta, col_counts = 4 , col_total = 2, col_name = "dead.proportion") +} diff --git a/man/remove.x.Rd b/man/remove.x.Rd new file mode 100644 index 0000000..5e47220 --- /dev/null +++ b/man/remove.x.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/remove.x.R +\name{remove.x} +\alias{remove.x} +\title{Remove 'X' infront of caseID} +\usage{ +remove.x(cancer_data) +} +\arguments{ +\item{cancer_data}{A large list created prior by multimodalR.} +} +\value{ +Returns the same large list with changed column names of the expressionmatrix. +} +\description{ +This function removes the 'X' in the column names (case_id) of cancer_data$Expressionmatrix that occures in R, when the start of the string is a number. +It's necessary to remove them so downstream functions work propperly. +} +\examples{ +lungXY <- remove.x(lungXY) +} diff --git a/man/rename.columns.Rd b/man/rename.columns.Rd new file mode 100644 index 0000000..63ba989 --- /dev/null +++ b/man/rename.columns.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rename.columns.R +\name{rename.columns} +\alias{rename.columns} +\title{Shorten column names} +\usage{ +rename.columns(df) +} +\arguments{ +\item{df}{A Data frame} +} +\value{ +Returns the same df with shortened column names. +} +\description{ +This function shortenes the column names using regex. When flattening a list from .json the resulting colums are named like "diagnoses.treatment.updated_datetime". Each period indicating a level of the list. +For better readability everything in front of the last period is deleted. +} +\examples{ +df <- data.frame(a.long.column.name_1= c(1,2,3,4), another.long2.column.name_2= c(5,6,7, "not reported")) +df <- rename.columns(df) +} diff --git a/man/reorder.column.Rd b/man/reorder.column.Rd new file mode 100644 index 0000000..ea872f7 --- /dev/null +++ b/man/reorder.column.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reorder.column.R +\name{reorder.column} +\alias{reorder.column} +\title{Remove small factor levels and reorder} +\usage{ +\method{reorder}{column}(cancer_metadata, column, remove = 0) +} +\arguments{ +\item{column}{Character - column name} + +\item{remove}{Integer - threshold of counts; any factor level with equal or less counts are discarded} + +\item{cancer_data}{A large list created prior by multimodalR.} +} +\value{ +Returns the same large list with changed column names of the expressionmatrix. +} +\description{ +This function removes factor levels with counts less than a set value. Additionally, it reorders the factor levels depending on the counts. +This may be necessary later on to get ordered labels when plotting. +} +\examples{ +lungMeta <- reorder.colums(lungMeta, "primary_diagnosis", 10) #keeps factor levels with >= 10 counts. +} diff --git a/man/reorder.column.all.Rd b/man/reorder.column.all.Rd new file mode 100644 index 0000000..6f89bbd --- /dev/null +++ b/man/reorder.column.all.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reorder.all.R +\name{reorder.column.all} +\alias{reorder.column.all} +\title{Uses reorder.column on all eligible columns} +\usage{ +\method{reorder}{column.all}(cancer_metadata, exception = NULL, + remove = 0) +} +\arguments{ +\item{exception}{Character - One or more column names that should not be processed by this function} + +\item{remove}{Integer - remove >= counts of the factor level} + +\item{cancer_data}{A large list created prior by multimodalR} +} +\value{ +Returns the data frame with filtered/ordered columns. +} +\description{ +This function removes factor levels with less counts than a set value of every column. Additionally, it reorders the factor levels depending on the counts. +} +\examples{ +lungMeta <- reorder.column.all(lungMeta, c("tumor_stage", "race"), 10) +} diff --git a/man/stage.to.numeral.Rd b/man/stage.to.numeral.Rd new file mode 100644 index 0000000..6dc086d --- /dev/null +++ b/man/stage.to.numeral.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stage.to.numeral.R +\name{stage.to.numeral} +\alias{stage.to.numeral} +\title{Change roman to arabic numerals} +\usage{ +stage.to.numeral(stage_values) +} +\arguments{ +\item{stage_values}{A vector of tumor stages with roman numerals and subtypes} +} +\value{ +Returns a vetor of tumor stages with arabic numerals and subtypes. +} +\description{ +This function changes roman numerals to arabic numerals and keeps the subtypes. As for now 4 subtypes are accepted: a,b,c and 0 - it doesnt compute "stage i/ii NOS", "stage 0" and "stage x" +} +\examples{ +stages <- stage.to.numeral(metadata$tumor_stage) +} diff --git a/man/subset.metadata.Rd b/man/subset.metadata.Rd new file mode 100644 index 0000000..56d16f5 --- /dev/null +++ b/man/subset.metadata.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/subset.metadata.R +\name{subset.metadata} +\alias{subset.metadata} +\title{Filter Metadata for Primary Cancer Site (Organ)} +\usage{ +\method{subset}{metadata}(meta_data, cancer_data, key = "case_id", + additional_key = NULL, additional_value = NULL) +} +\arguments{ +\item{meta_data}{Meta data provided by TCGA.} + +\item{cancer_data}{A large list created prior by multimodalR for a cancer site.} + +\item{key}{character - Unique identifier, default = "case_id"} + +\item{additional_key}{needs to be added} + +\item{additional_value}{needs to be added} +} +\value{ +Returns a data table of the metadata for a primary cancer site (organ). +} +\description{ +This function subsets the metadata for an organ. It works for meta data directly downloaded from TCGA or already filtered meta data. The subset is done by matching the "key" with the column names (patient ID or case_id) of the expression matrix. +} +\examples{ +lungMetaData <- subset.metadata(metadata, lungXY) +} diff --git a/man/use.kruskal.test.Rd b/man/use.kruskal.test.Rd new file mode 100644 index 0000000..8a06475 --- /dev/null +++ b/man/use.kruskal.test.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/use.kruskal.test.R +\name{use.kruskal.test} +\alias{use.kruskal.test} +\title{Kruskal-Wallis Test} +\usage{ +use.kruskal.test(df, column, factor) +} +\arguments{ +\item{column}{A column of the data frame (argument x in kruskal.test)} + +\item{factor}{A column of the data frame to group by (argumentg in kruskal.test)} +} +\value{ +Returns the p-value of the kruskal-wallis test. +} +\description{ +This function applies kruskal.test on a data frame. +} +\examples{ +ageKruskal <- use.kruskal.test(lungMeta, "age_at_diagnosis", "gender") #Is there a difference regarding the age of the two genders? +} diff --git a/man/use.prop.test.Rd b/man/use.prop.test.Rd new file mode 100644 index 0000000..35f0499 --- /dev/null +++ b/man/use.prop.test.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/use.prop.test.R +\name{use.prop.test} +\alias{use.prop.test} +\title{Test of Proportion} +\usage{ +use.prop.test(df_count, eval_first_column = FALSE) +} +\arguments{ +\item{df_count}{A data frame, made by >count.column<} + +\item{eval_first_column}{Boolean, should the first column be evaluated as well?} +} +\value{ +Returns a vector of p.values obtained by the prop.test +} +\description{ +This function applies prop.test on a data frame. Thi When using >count.column< the first column represents the first grouping factor. Applying the prop.test on the whole data frame would lead to an error. Therefore, eval_first_column is set to FALSE. Throws warnings if the group size is too small (n<=5). +} +\examples{ +df <- data.frame(case_id = c(1,2,3,4,5,6), + group = c("one","one","two","one","two","three"), + primary_diagnosis = c("Squamous cell carcinoma, NOS", "Adenocarcinoma, NOS", "Adenocarcinoma with mixed subtypes", + "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS")) +diagnosis_counts <- count.column(df, factor_one = "group", factor_two = "primary_diagnosis") +pValues <- use.prop.test(diagnosis_counts, FALSE) +} diff --git a/mmRmeta.Rproj b/mmRmeta.Rproj new file mode 100644 index 0000000..eaa6b81 --- /dev/null +++ b/mmRmeta.Rproj @@ -0,0 +1,18 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace