This repository has been archived by the owner. It is now read-only.
Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
ReporteR.scRNAseq/inst/content/04-feature-selection-B-dropout.Rmd
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
104 lines (83 sloc)
6.11 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
```{r parameters-and-defaults, include = FALSE} | |
module <- "scRNAseq" | |
section <- "feature_selection" | |
``` | |
```{r parameter-merge, include = FALSE} | |
local_params <- module %>% | |
options() %>% | |
magrittr::extract2(module) %>% | |
magrittr::extract2(section) %>% | |
ReporteR.base::validate_params(parameters_and_defaults) | |
``` | |
### Gene dropouts | |
Dropout-based feature selection is conceptually similar to highly-variable gene-based feature selection. In both cases it is assumed that genes expressed at a constant level will follow some distribution due to technical noise, and that genes responding to a biological perturbation will follow a different distribution. Highly variable gene detection characterizes these distributions using the relationship between the mean and the variance, whereas dropout-based feature selection uses the relationship between the mean and the number of zeros. Since single-cell RNASeq data contains a large number of zeros, with dropout rates often spanning the full range from 0 to 1, this is effective in characterizing the expression distributions. An important property of the mean-dropouts relation is that differential expression across distinct populations of cells or through pseudotime increases the observed dropout-rate due to the nonlinearity of the relationship. The advantage of using the dropout-rate over variance is that the former can be estimated more accurately due to much lower sampling noise. *From: [@andrews_dropout_2018]*. | |
Figure \@ref(fig:scRNAseq-feature-selection-B-dropout-figure) clearly depicts the dependency of gene dropout (measured as percentage of cells with *Zero* expression, y-axis) on the average gene expression (x-axis). The gene dropout value (color bar) of each gene is normalized by conditioning on its mean expression. | |
```{r scRNAseq-feature-selection-B-dropout-processing, include=FALSE, echo=FALSE} | |
object_filtered %<>% | |
singlecellutils::add_heterogeneity(exprs_values = local_params$assay, | |
column = ".heterogeneity_dropout", | |
statistic = "dropout", | |
order_by = means$all, | |
normalization = "windows", | |
window = 200) | |
if (length(setdiff(names(celltypes), "all")) > 0) { | |
het <- sapply(setdiff(names(celltypes), "all"), function(t) { | |
i <- celltypes[[t]] | |
obj <- object_filtered[, i] | |
singlecellutils::heterogeneity(data = SummarizedExperiment::assay(obj, local_params$assay), | |
statistic = "dropout", | |
order_by = means[[t]], | |
normalization = "windows", | |
window = 200) | |
}) | |
colnames(het) <- paste0(".heterogeneity_dropout_", colnames(het)) | |
SummarizedExperiment::rowData(object_filtered) <- cbind(SummarizedExperiment::rowData(object_filtered), het) | |
} | |
``` | |
```{r scRNAseq-feature-selection-B-dropout-figure-params, message=FALSE, warning=FALSE, echo=FALSE} | |
fig_height <- ReporteR.base::estimate_figure_height( | |
height_in_panels = ceiling(length(celltypes)/2), | |
panel_height_in_in = params$formatting_defaults$figures$panel_height_in, | |
axis_space_in_in = params$formatting_defaults$figures$axis_space_in, | |
mpf_row_space = as.numeric(grid::convertUnit(grid::unit(5, 'mm'), 'in')), | |
max_height_in_in = params$formatting_defaults$figures$max_height_in) | |
sup_fig_cap <- "." | |
if (length(setdiff(names(celltypes), "all")) > 0) { | |
tmp <- sapply(1:length(setdiff(names(celltypes), "all")), function(i) { | |
paste0("(", LETTERS[i+1], ") ", setdiff(names(celltypes), "all")[i], " cells") | |
}) | |
sup_fig_cap <- paste0(", ", ReporteR.base::itemize(tmp, sort = FALSE), sup_fig_cap) | |
} | |
fig_cap <- paste0("Gene dropouts and its dependency on the mean expression in (A) all cells", sup_fig_cap) | |
color_function <- circlize::colorRamp2(seq(from = -4, to = 4, length.out = 7), colors = scales::brewer_pal("div", palette = "RdBu", -1)(7)) | |
``` | |
```{r scRNAseq-feature-selection-B-dropout-figure, message=FALSE, warning=FALSE, echo=FALSE, fig.height = fig_height$global, fig.cap=fig_cap} | |
figure_feature_selection_dropout <- multipanelfigure::multi_panel_figure(height = fig_height$sub, columns = min(length(celltypes), 2), rows = ceiling(length(celltypes)/2), unit = "in") | |
plot_data <- data.frame(mean = means$all, dropouts = dropouts$all, heterogeneity = SummarizedExperiment::rowData(object_filtered)[, ".heterogeneity_dropout"], col = color_function(SummarizedExperiment::rowData(object_filtered)[, ".heterogeneity_dropout"])) | |
plot_feature_selection_cv2_all <- ggplot2::ggplot(plot_data, ggplot2::aes_string(x = "mean", y = "dropouts", color = "col")) + | |
ggplot2::geom_point(size = 0.2, ggplot2::aes(alpha = 0.3)) + | |
ggplot2::scale_color_identity() + | |
#scale_color_manual(name = "", values = het_colors) + | |
ggplot2::ggtitle("") + | |
theme_feature_selection_scatter + | |
ggplot2::guides(alpha = FALSE, size = FALSE) + | |
ggplot2::xlab("Mean gene expression") + | |
ggplot2::ylab("Fraction of dropouts") | |
figure_feature_selection_dropout <- multipanelfigure::fill_panel(figure_feature_selection_dropout, plot_feature_selection_cv2_all) | |
if (length(setdiff(names(celltypes), "all")) > 0) { | |
for(t in setdiff(names(celltypes), "all")) { | |
tmp_data <- data.frame(mean = means[[t]], dropouts = dropouts[[t]], heterogeneity = SummarizedExperiment::rowData(object_filtered)[, paste0(".heterogeneity_dropout_", t)], col = color_function(SummarizedExperiment::rowData(object_filtered)[, paste0(".heterogeneity_dropout_", t)])) | |
tmp_plot <- ggplot2::ggplot(tmp_data, ggplot2::aes_string(x = "mean", y = "dropouts", color = "col")) + | |
ggplot2::geom_point(size = 0.2, ggplot2::aes(alpha = 0.3)) + | |
#scale_color_manual(name = "", values = het_colors) + | |
ggplot2::scale_color_identity() + | |
ggplot2::ggtitle("") + | |
theme_feature_selection_scatter + | |
ggplot2::guides(alpha = FALSE, size = FALSE) + | |
ggplot2::xlab("Mean gene expression") + | |
ggplot2::ylab("Fraction of dropouts") | |
figure_feature_selection_dropout <- multipanelfigure::fill_panel(figure_feature_selection_dropout, tmp_plot) | |
} | |
} | |
figure_feature_selection_dropout | |
``` |