Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
mmRmeta/R/reorder.column.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
28 lines (25 sloc)
1.71 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Remove small factor levels and reorder | |
#' @description This function removes factor levels with counts less than a set value. Additionally, it reorders the factor levels depending on the counts. | |
#' This may be necessary later on to get ordered labels when plotting. | |
#' @param cancer_data A large list created prior by multimodalR. | |
#' @param column Character - column name | |
#' @param remove Integer - threshold of counts; any factor level with equal or less counts are discarded | |
#' @importFrom magrittr %>% | |
#' @examples | |
#' lungMeta <- reorder.colums(lungMeta, "primary_diagnosis", 10) #keeps factor levels with >= 10 counts. | |
#' @return Returns the same large list with changed column names of the expressionmatrix. | |
reorder.column <- function(cancer_metadata, column, remove = 0) { | |
if(is.factor(cancer_metadata[[column]])) { | |
arrangedCounts <- cancer_metadata %>% dplyr::group_by_(column) %>% dplyr::summarize(n = n()) | |
arrangedCounts <- dplyr::arrange(arrangedCounts, desc(n)) #order by descending counts | |
cancer_metadata[[column]] <- factor(cancer_metadata[[column]], levels = arrangedCounts[[column]]) #reorder factor in data frame | |
factorCount <- plyr::count(cancer_metadata[[column]]) %>% dplyr::filter(freq >= remove) #removes counts less than the value of removes | |
factorCount <- factorCount[,1] | |
columnSymbol <- rlang::sym(column) | |
cancer_metadata <- dplyr::filter(cancer_metadata, !!(columnSymbol) %in% factorCount) # !! is the newer version of rlang::UQ() which unquotes the variable | |
cancer_metadata[[column]] <- factor(cancer_metadata[[column]]) | |
} else { | |
warning(column, " is no factor, no changes applied") | |
} | |
return(cancer_metadata) | |
} |