Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
64fce7794e
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
105 lines (83 sloc) 6.07 KB
```{r parameters-and-defaults, include = FALSE}
module <- "scRNAseq"
section <- "feature_selection"
```
```{r parameter-merge, include = FALSE}
local_params <- module %>%
options() %>%
magrittr::extract2(module) %>%
magrittr::extract2(section) %>%
ReporteR.base::validate_params(parameters_and_defaults)
```
### Coefficient of variation
Observed variation in gene expression from single-cell experiments can be attributed to two main components: a) *true* biological variation, e.g. through different cell-cycle stages and b) technical variation (noise) that is inevitably introduced by low input amounts of RNA. Here, we try to find genes that exhibit much more variation than expected. The coefficient of variantion is a measure of dispersion (variation) and is defined as the ratio of the variance to the mean ($cv = \frac{\sigma}{\mu}$). Since technical variation associated with gene expression is elevated when lowering the starting amount of RNA down to picograms [@ramskold_noise_2012], and this is true in particular for less abundantly expressed transcripts, we will judge a genes dispersion estimate by taking its expression into account.
Figure \@ref(fig:scRNAseq-feature-selection-A-cv2-figure) clearly depicts the dependency of dispersion (measured as $cv^2$, y-axis) on the average gene expression (x-axis). The dispersion value (color bar) of each gene is normalized by conditioning on its mean expression, which means that lowly abundant genes have to show a much greater dispersion compared to highly abundant genes in order be assigned the same dispersion value.
```{r scRNAseq-feature-selection-A-cv2-processing, include=FALSE, echo=FALSE}
object_filtered %<>%
singlecellutils::add_heterogeneity(exprs_values = local_params$assay,
column = ".heterogeneity_cv2",
statistic = "cv",
order_by = means$all,
normalization = "windows",
window = 200)
if (length(setdiff(names(celltypes), "all")) > 0) {
het <- sapply(setdiff(names(celltypes), "all"), function(t) {
i <- celltypes[[t]]
obj <- object_filtered[, i]
singlecellutils::heterogeneity(data = SummarizedExperiment::assay(obj, local_params$assay),
statistic = "cv",
order_by = means[[t]],
normalization = "windows",
window = 200)
})
colnames(het) <- paste0(".heterogeneity_cv2_", colnames(het))
SummarizedExperiment::rowData(object_filtered) <- cbind(SummarizedExperiment::rowData(object_filtered), het)
}
```
```{r scRNAseq-feature-selection-A-cv2-figure-params, message=FALSE, warning=FALSE, echo=FALSE}
fig_height <- ReporteR.base::estimate_figure_height(
height_in_panels = ceiling(length(celltypes)/2),
panel_height_in_in = params$formatting_defaults$figures$panel_height_in,
axis_space_in_in = params$formatting_defaults$figures$axis_space_in,
mpf_row_space = as.numeric(grid::convertUnit(grid::unit(5, 'mm'), 'in')),
max_height_in_in = params$formatting_defaults$figures$max_height_in)
sup_fig_cap <- "."
if (length(setdiff(names(celltypes), "all")) > 0) {
tmp <- sapply(1:length(setdiff(names(celltypes), "all")), function(i) {
paste0("(", LETTERS[i+1], ") ", setdiff(names(celltypes), "all")[i], " cells")
})
sup_fig_cap <- paste0(", ", ReporteR.base::itemize(tmp, sort = FALSE), sup_fig_cap)
}
fig_cap <- paste0("Gene dispersion and the dependency of the mean expression and coefficient of variation in (A) all cells", sup_fig_cap)
color_function <- circlize::colorRamp2(seq(from = -4, to = 4, length.out = 7), colors = scales::brewer_pal("div", palette = "RdBu", -1)(7))
```
```{r scRNAseq-feature-selection-A-cv2-figure, message=FALSE, warning=FALSE, echo=FALSE, fig.height = fig_height$global, fig.cap=fig_cap}
figure_feature_selection_cv2 <- multipanelfigure::multi_panel_figure(height = fig_height$sub, columns = min(length(celltypes), 2), rows = ceiling(length(celltypes)/2), unit = "in")
plot_data <- data.frame(mean = means$all, cv = cvs$all, dispersion = SummarizedExperiment::rowData(object_filtered)[, ".heterogeneity_cv2"], col = color_function(SummarizedExperiment::rowData(object_filtered)[, ".heterogeneity_cv2"]))
plot_feature_selection_cv2_all <- ggplot2::ggplot(plot_data, ggplot2::aes_string(x = "mean", y = "cv", color = "col")) +
ggplot2::geom_point(size = 0.2, ggplot2::aes(alpha = 0.3)) +
ggplot2::scale_color_identity() +
#ggplot2::scale_colour_distiller(limits = cv2_range, type = "div", palette = "RdBu", name = "dispersion", values = c(0.2, 0.3, 0.4, 0.6, 0.7, 0.8)) +
ggplot2::ggtitle("") +
theme_feature_selection_scatter +
ggplot2::guides(alpha = FALSE, size = FALSE) +
ggplot2::xlab("Mean gene expression") +
ggplot2::ylab("Squared coefficient of variation")
figure_feature_selection_cv2 <- multipanelfigure::fill_panel(figure_feature_selection_cv2, plot_feature_selection_cv2_all)
if (length(setdiff(names(celltypes), "all")) > 0) {
for(t in setdiff(names(celltypes), "all")) {
tmp_data <- data.frame(mean = means[[t]], cv = cvs[[t]], dispersion = SummarizedExperiment::rowData(object_filtered)[, paste0(".heterogeneity_cv2_", t)], col = color_function(SummarizedExperiment::rowData(object_filtered)[, paste0(".heterogeneity_cv2_", t)]))
tmp_plot <- ggplot2::ggplot(tmp_data, ggplot2::aes_string(x = "mean", y = "cv", color = "col")) +
ggplot2::geom_point(size = 0.2, ggplot2::aes(alpha = 0.3)) +
ggplot2::scale_color_identity() +
#ggplot2::scale_colour_distiller(limits = cv2_range, type = "div", palette = "RdBu", name = "dispersion", values = c(0.2, 0.3, 0.4, 0.6, 0.7, 0.8)) +
ggplot2::ggtitle("") +
theme_feature_selection_scatter +
ggplot2::guides(alpha = FALSE, size = FALSE) +
ggplot2::xlab("Mean gene expression") +
ggplot2::ylab("Squared coefficient of variation")
figure_feature_selection_cv2 <- multipanelfigure::fill_panel(figure_feature_selection_cv2, tmp_plot)
}
}
figure_feature_selection_cv2
```