counts.per.group.R

#' Group by and summarize counts
#' @description This functions groups by 2 variables/columns with the counts of the combination. It accepts a list of data frames or a single data frame. When passing a single data frame this function will simply return a data frame with the counts. When using a list as input one might wants to know how often a patient(case_id)
#' appears in a group. If youre using a single data frame you need to change the grouping variables. Using ".id" as first_grouping and lists the counts of groups for the genes.
#' @param list_df Either a list of data frames or a single data frame
#' @param first_grouping Character - first grouping variable | Default "case_id"
#' @param second_grouping Character - second grouping variable | Default  "group""
#' @importFrom magrittr %>%
#' @importFrom dplyr group_by_ summarize
#' @examples
#' Using a Data Frame
#' df <- data.frame()
#' df_counts <- counts.per.group(df, "firstColumn", "secondColumn")
#' @return Returns a data frame

counts.per.group <- function(list_df, first_grouping = "case_id", second_grouping = "group") {
  if(class(list_df) == "list") {
    flatList <- plyr::ldply(list_df, data.table::data.table)                                       #flatten list to data.table
    countFrame <- flatList %>% group_by_(first_grouping, second_grouping) %>% summarize(n = n())   #data.table is grouped by the 2 variables and a third column containing the frequency is made
    outputFrame <- tidyr::spread(countFrame, second_grouping, n , fill = 0 )                       #spread the data frame by the second_grouping
  } else {
    countFrame <- list_df %>% group_by_(first_grouping, second_grouping) %>% summarize(n = n())
    outputFrame <- tidyr::spread(countFrame, second_grouping, n, fill = 0)
  }
  return(outputFrame)
}
	#' Group by and summarize counts
	#' @description This functions groups by 2 variables/columns with the counts of the combination. It accepts a list of data frames or a single data frame. When passing a single data frame this function will simply return a data frame with the counts. When using a list as input one might wants to know how often a patient(case_id)
	#' appears in a group. If youre using a single data frame you need to change the grouping variables. Using ".id" as first_grouping and lists the counts of groups for the genes.
	#' @param list_df Either a list of data frames or a single data frame
	#' @param first_grouping Character - first grouping variable \| Default "case_id"
	#' @param second_grouping Character - second grouping variable \| Default "group""
	#' @importFrom magrittr %>%
	#' @importFrom dplyr group_by_ summarize
	#' @examples
	#' Using a Data Frame
	#' df <- data.frame()
	#' df_counts <- counts.per.group(df, "firstColumn", "secondColumn")
	#' @return Returns a data frame

	counts.per.group <- function(list_df, first_grouping = "case_id", second_grouping = "group") {
	if(class(list_df) == "list") {
	flatList <- plyr::ldply(list_df, data.table::data.table) #flatten list to data.table
	countFrame <- flatList %>% group_by_(first_grouping, second_grouping) %>% summarize(n = n()) #data.table is grouped by the 2 variables and a third column containing the frequency is made
	outputFrame <- tidyr::spread(countFrame, second_grouping, n , fill = 0 ) #spread the data frame by the second_grouping
	} else {
	countFrame <- list_df %>% group_by_(first_grouping, second_grouping) %>% summarize(n = n())
	outputFrame <- tidyr::spread(countFrame, second_grouping, n, fill = 0)
	}
	return(outputFrame)
	}