This repository has been archived by the owner. It is now read-only.
Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
ReporteR.scRNAseq/inst/content/04-feature-selection-A-cv2.Rmd
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
105 lines (83 sloc)
6.07 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
```{r parameters-and-defaults, include = FALSE} | |
module <- "scRNAseq" | |
section <- "feature_selection" | |
``` | |
```{r parameter-merge, include = FALSE} | |
local_params <- module %>% | |
options() %>% | |
magrittr::extract2(module) %>% | |
magrittr::extract2(section) %>% | |
ReporteR.base::validate_params(parameters_and_defaults) | |
``` | |
### Coefficient of variation | |
Observed variation in gene expression from single-cell experiments can be attributed to two main components: a) *true* biological variation, e.g. through different cell-cycle stages and b) technical variation (noise) that is inevitably introduced by low input amounts of RNA. Here, we try to find genes that exhibit much more variation than expected. The coefficient of variantion is a measure of dispersion (variation) and is defined as the ratio of the variance to the mean ($cv = \frac{\sigma}{\mu}$). Since technical variation associated with gene expression is elevated when lowering the starting amount of RNA down to picograms [@ramskold_noise_2012], and this is true in particular for less abundantly expressed transcripts, we will judge a genes dispersion estimate by taking its expression into account. | |
Figure \@ref(fig:scRNAseq-feature-selection-A-cv2-figure) clearly depicts the dependency of dispersion (measured as $cv^2$, y-axis) on the average gene expression (x-axis). The dispersion value (color bar) of each gene is normalized by conditioning on its mean expression, which means that lowly abundant genes have to show a much greater dispersion compared to highly abundant genes in order be assigned the same dispersion value. | |
```{r scRNAseq-feature-selection-A-cv2-processing, include=FALSE, echo=FALSE} | |
object_filtered %<>% | |
singlecellutils::add_heterogeneity(exprs_values = local_params$assay, | |
column = ".heterogeneity_cv2", | |
statistic = "cv", | |
order_by = means$all, | |
normalization = "windows", | |
window = 200) | |
if (length(setdiff(names(celltypes), "all")) > 0) { | |
het <- sapply(setdiff(names(celltypes), "all"), function(t) { | |
i <- celltypes[[t]] | |
obj <- object_filtered[, i] | |
singlecellutils::heterogeneity(data = SummarizedExperiment::assay(obj, local_params$assay), | |
statistic = "cv", | |
order_by = means[[t]], | |
normalization = "windows", | |
window = 200) | |
}) | |
colnames(het) <- paste0(".heterogeneity_cv2_", colnames(het)) | |
SummarizedExperiment::rowData(object_filtered) <- cbind(SummarizedExperiment::rowData(object_filtered), het) | |
} | |
``` | |
```{r scRNAseq-feature-selection-A-cv2-figure-params, message=FALSE, warning=FALSE, echo=FALSE} | |
fig_height <- ReporteR.base::estimate_figure_height( | |
height_in_panels = ceiling(length(celltypes)/2), | |
panel_height_in_in = params$formatting_defaults$figures$panel_height_in, | |
axis_space_in_in = params$formatting_defaults$figures$axis_space_in, | |
mpf_row_space = as.numeric(grid::convertUnit(grid::unit(5, 'mm'), 'in')), | |
max_height_in_in = params$formatting_defaults$figures$max_height_in) | |
sup_fig_cap <- "." | |
if (length(setdiff(names(celltypes), "all")) > 0) { | |
tmp <- sapply(1:length(setdiff(names(celltypes), "all")), function(i) { | |
paste0("(", LETTERS[i+1], ") ", setdiff(names(celltypes), "all")[i], " cells") | |
}) | |
sup_fig_cap <- paste0(", ", ReporteR.base::itemize(tmp, sort = FALSE), sup_fig_cap) | |
} | |
fig_cap <- paste0("Gene dispersion and the dependency of the mean expression and coefficient of variation in (A) all cells", sup_fig_cap) | |
color_function <- circlize::colorRamp2(seq(from = -4, to = 4, length.out = 7), colors = scales::brewer_pal("div", palette = "RdBu", -1)(7)) | |
``` | |
```{r scRNAseq-feature-selection-A-cv2-figure, message=FALSE, warning=FALSE, echo=FALSE, fig.height = fig_height$global, fig.cap=fig_cap} | |
figure_feature_selection_cv2 <- multipanelfigure::multi_panel_figure(height = fig_height$sub, columns = min(length(celltypes), 2), rows = ceiling(length(celltypes)/2), unit = "in") | |
plot_data <- data.frame(mean = means$all, cv = cvs$all, dispersion = SummarizedExperiment::rowData(object_filtered)[, ".heterogeneity_cv2"], col = color_function(SummarizedExperiment::rowData(object_filtered)[, ".heterogeneity_cv2"])) | |
plot_feature_selection_cv2_all <- ggplot2::ggplot(plot_data, ggplot2::aes_string(x = "mean", y = "cv", color = "col")) + | |
ggplot2::geom_point(size = 0.2, ggplot2::aes(alpha = 0.3)) + | |
ggplot2::scale_color_identity() + | |
#ggplot2::scale_colour_distiller(limits = cv2_range, type = "div", palette = "RdBu", name = "dispersion", values = c(0.2, 0.3, 0.4, 0.6, 0.7, 0.8)) + | |
ggplot2::ggtitle("") + | |
theme_feature_selection_scatter + | |
ggplot2::guides(alpha = FALSE, size = FALSE) + | |
ggplot2::xlab("Mean gene expression") + | |
ggplot2::ylab("Squared coefficient of variation") | |
figure_feature_selection_cv2 <- multipanelfigure::fill_panel(figure_feature_selection_cv2, plot_feature_selection_cv2_all) | |
if (length(setdiff(names(celltypes), "all")) > 0) { | |
for(t in setdiff(names(celltypes), "all")) { | |
tmp_data <- data.frame(mean = means[[t]], cv = cvs[[t]], dispersion = SummarizedExperiment::rowData(object_filtered)[, paste0(".heterogeneity_cv2_", t)], col = color_function(SummarizedExperiment::rowData(object_filtered)[, paste0(".heterogeneity_cv2_", t)])) | |
tmp_plot <- ggplot2::ggplot(tmp_data, ggplot2::aes_string(x = "mean", y = "cv", color = "col")) + | |
ggplot2::geom_point(size = 0.2, ggplot2::aes(alpha = 0.3)) + | |
ggplot2::scale_color_identity() + | |
#ggplot2::scale_colour_distiller(limits = cv2_range, type = "div", palette = "RdBu", name = "dispersion", values = c(0.2, 0.3, 0.4, 0.6, 0.7, 0.8)) + | |
ggplot2::ggtitle("") + | |
theme_feature_selection_scatter + | |
ggplot2::guides(alpha = FALSE, size = FALSE) + | |
ggplot2::xlab("Mean gene expression") + | |
ggplot2::ylab("Squared coefficient of variation") | |
figure_feature_selection_cv2 <- multipanelfigure::fill_panel(figure_feature_selection_cv2, tmp_plot) | |
} | |
} | |
figure_feature_selection_cv2 | |
``` |