use.prop.test.R

#' Test of Proportion
#' @description This function applies prop.test on a data frame. Thi When using >count.column< the first column represents the first grouping factor. Applying the prop.test on the whole data frame would lead to an error. Therefore, eval_first_column is set to FALSE. Throws warnings if the group size is too small (n<=5).
#' @param df_count A data frame, made by >count.column<
#' @param eval_first_column Boolean, should the first column be evaluated as well?
#' @examples
#' df <- data.frame(case_id = c(1,2,3,4,5,6),
#'                  group = c("one","one","two","one","two","three"),
#'                  primary_diagnosis = c("Squamous cell carcinoma, NOS", "Adenocarcinoma, NOS", "Adenocarcinoma with mixed subtypes",
#'                                        "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS"))
#' diagnosis_counts <- count.column(df, factor_one = "group", factor_two = "primary_diagnosis")
#' pValues <- use.prop.test(diagnosis_counts, FALSE)
#' @return Returns a vector of p.values obtained by the prop.test

use.prop.test <- function(df_count, eval_first_column = FALSE) {
  pvalues <- numeric()
  if(eval_first_column == TRUE) {
    for(i in 1:ncol(df_count)) {
      pvalues[i] <- prop.test(x = pull(df_count[,i]), n = c(rowSums(df_count)))$p.value
    }
    pvalues <- data.frame(pvalues)
    colnames(pvalues) <- colnames(df_count)
  } else {
    for(i in 2:ncol(df_count)) {
      pvalues[i-1] <- prop.test(x = pull(df_count[,i]), n = c(rowSums(df_count[, c(2:ncol(df_count))])))$p.value #only rowsums from second to last column
    }
    pvalues <- data.frame(pvalues)
    colnames(pvalues) <- colnames(df_count[2:ncol(df_count)])
  }
  return(pvalues)
}
	#' Test of Proportion
	#' @description This function applies prop.test on a data frame. Thi When using >count.column< the first column represents the first grouping factor. Applying the prop.test on the whole data frame would lead to an error. Therefore, eval_first_column is set to FALSE. Throws warnings if the group size is too small (n<=5).
	#' @param df_count A data frame, made by >count.column<
	#' @param eval_first_column Boolean, should the first column be evaluated as well?
	#' @examples
	#' df <- data.frame(case_id = c(1,2,3,4,5,6),
	#' group = c("one","one","two","one","two","three"),
	#' primary_diagnosis = c("Squamous cell carcinoma, NOS", "Adenocarcinoma, NOS", "Adenocarcinoma with mixed subtypes",
	#' "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS", "Squamous cell carcinoma, NOS"))
	#' diagnosis_counts <- count.column(df, factor_one = "group", factor_two = "primary_diagnosis")
	#' pValues <- use.prop.test(diagnosis_counts, FALSE)
	#' @return Returns a vector of p.values obtained by the prop.test

	use.prop.test <- function(df_count, eval_first_column = FALSE) {
	pvalues <- numeric()
	if(eval_first_column == TRUE) {
	for(i in 1:ncol(df_count)) {
	pvalues[i] <- prop.test(x = pull(df_count[,i]), n = c(rowSums(df_count)))$p.value
	}
	pvalues <- data.frame(pvalues)
	colnames(pvalues) <- colnames(df_count)
	} else {
	for(i in 2:ncol(df_count)) {
	pvalues[i-1] <- prop.test(x = pull(df_count[,i]), n = c(rowSums(df_count[, c(2:ncol(df_count))])))$p.value #only rowsums from second to last column
	}
	pvalues <- data.frame(pvalues)
	colnames(pvalues) <- colnames(df_count[2:ncol(df_count)])
	}
	return(pvalues)
	}