Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
shinyFilter/meta_analysis.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
322 lines (211 sloc)
7.21 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cutoff_list <- | |
list( | |
cv = c(0.1, 0.2, 0.25, .3, .35, .4, .5, 1, 2) * (-1), | |
water_ratio = log2(c(1, 2, 3, 4, 5, 10, 1e2, 1e4, 1e5, 1e6, 1e7)), | |
coverage = c(0.1, .25, .50, .75, .9, 1), | |
calib_r2 = c(0, 0.1, .25, .5, .75, .9), | |
calib_count = c(1, 2, 3 , 4, 5), | |
calib_curvature = c(0, 0.1, .25, .5, .75, .9) | |
) | |
t1 <- my_filter$table %>% mutate(cv = -cv) | |
for (i in seq_along(cutoff_list)) { | |
nm = names(cutoff_list[i]) | |
for (v in cutoff_list[[i]]) { | |
nm2 <- sprintf("%s_%.2f_pass", nm, v) %>% sub("-", "", .) | |
t1 %<>% | |
mutate(!!as.name(nm2) := !!as.name(nm) >= v) | |
} | |
} | |
t2 <- t1 %>% | |
select(ends_with("pass")) | |
nm3 <- names(t2) %>% sub("_pass", "", .) | |
shortname_list <- | |
c( | |
cv = "cv", | |
water_ratio = "wr", | |
coverage = "cov", | |
calib_r2 = "r2", | |
calib_count = "cp", | |
calib_curvature = "curve" | |
) | |
for (i in seq_along(shortname_list)) { | |
nm3 %<>% sub(names(shortname_list[i]), shortname_list[i], .) | |
} | |
t3 <- t2 %>% set_names(nm3) | |
pca <- t3 %>% t() %>% prcomp() | |
# plot(pca$x, type = "n") | |
# text(pca$x, labels = rownames(pca$x)) | |
pca_data <- | |
pca$x %>% | |
as_tibble(rownames = "Name") %>% | |
select(Name, matches("PC[1-5]$")) %>% | |
mutate( | |
param = map_chr(strsplit(Name, "_"), first), | |
val = map_chr(strsplit(Name, "_"), last) %>% as.numeric() | |
) %>% | |
mutate( | |
val = ifelse(param == "wr", round(2^val) %>% signif(1), val) | |
) | |
plt <- ggplot( | |
data = pca_data, | |
mapping = aes(x=PC1, y = PC2, colour = param, label = val) | |
) + | |
geom_path(linewidth = 2) + | |
geom_point(colour = "black") + | |
geom_text(colour = "black", nudge_y = 1.5) | |
pval_table <- my_filter$table$p_value_fdr %>% cut(c(0,0.01,0.05, 0.1, 0.2, 0.5, 1)) %>% table() | |
# cs_table <- t2 %>% colSums() %>% | |
# {tibble(nm = names(.), retention = unname(.) / nrow(t2))} | |
cs_table <- t2 %>% colSums() %>% | |
{tibble(nm = names(.), retention_gross = unname(.) )} | |
t1_pv1 <- my_filter$table %>% mutate(cv = -cv) | |
for (i in seq_along(cutoff_list)) { | |
nm = names(cutoff_list[i]) | |
for (v in cutoff_list[[i]]) { | |
nm2 <- sprintf("%s_%.2f_pass", nm, v) %>% sub("-", "", .) | |
t1_pv1 %<>% | |
mutate(!!as.name(nm2) := !!as.name(nm) >= v & p_value_fdr <= 0.05) | |
} | |
} | |
n0.05 <- my_filter$table %>% filter(p_value_fdr <= 0.05) %>% nrow() | |
pv1_table <- t1_pv1 %>% | |
select(ends_with("pass")) %>% | |
colSums() %>% | |
{tibble(nm = names(.), retention_p_0.05 = unname(.) )} %>% | |
left_join(cs_table, by = "nm") %>% | |
mutate( | |
sensitivity_p_0.05 = retention_p_0.05 / n0.05, | |
specificity_p_0.05 = retention_p_0.05 / retention_gross | |
) | |
t1_pv2 <- my_filter$table %>% mutate(cv = -cv) | |
for (i in seq_along(cutoff_list)) { | |
nm = names(cutoff_list[i]) | |
for (v in cutoff_list[[i]]) { | |
nm2 <- sprintf("%s_%.2f_pass", nm, v) %>% sub("-", "", .) | |
t1_pv2 %<>% | |
mutate(!!as.name(nm2) := !!as.name(nm) >= v & p_value_fdr <= 0.2) | |
} | |
} | |
n0.2 <- my_filter$table %>% filter(p_value_fdr <= 0.2) %>% nrow() | |
pv2_table <- t1_pv2 %>% | |
select(ends_with("pass")) %>% | |
colSums() %>% | |
{tibble(nm = names(.), retention_p_0.2 = unname(.) )} %>% | |
left_join(cs_table, by = "nm") %>% | |
mutate( | |
sensitivity_p_0.2 = retention_p_0.2 / n0.2, | |
specificity_p_0.2 = retention_p_0.2 / retention_gross | |
) | |
roc_table <- | |
pv1_table %>% | |
left_join(pv2_table %>% select(-retention_gross), by = "nm") %>% | |
bind_cols(pca_data) | |
ggplot( | |
data = roc_table, | |
mapping = aes(x = specificity_p_0.05, y = sensitivity_p_0.05, colour = param, label = val) | |
) + | |
geom_path(linewidth = 1) + | |
geom_point(colour = "black") + | |
geom_text(colour = "black", nudge_y = 0.01) + | |
# geom_abline(slope = -1, intercept = 1) + | |
lims(x = c(0,NA), y = c(0,1.1)) + | |
geom_hline(yintercept = c(0,1)) + | |
geom_vline(xintercept = c(0)) | |
plt3 <- ggplot( | |
data = roc_table, | |
mapping = aes(x = specificity_p_0.2, y = sensitivity_p_0.2, colour = param, label = val) | |
) + | |
geom_path(linewidth = 1) + | |
geom_point(colour = "black") + | |
geom_text(colour = "black", nudge_y = 0.01) + | |
# geom_abline(slope = -1, intercept = 1) + | |
lims(x = c(0,NA), y = c(0,1.1)) + | |
geom_hline(yintercept = c(0,1)) + | |
geom_vline(xintercept = c(0)) | |
wr3_gross <- my_filter$table %>% mutate(cv = -cv) | |
for (v in seq(0.1, 0.5, 0.05)) { | |
nm = "calib_curvature" | |
nm2 <- sprintf("%s_%.2f_pass", nm, v) %>% sub("-", "", .) | |
wr3_gross %<>% | |
mutate(!!as.name(nm2) := !!as.name(nm) >= v & water_ratio >= 3) | |
} | |
wr3_gross %<>% | |
select(ends_with("pass")) %>% | |
colSums() %>% | |
{tibble(nm = names(.), retention_gross = unname(.) )} | |
wr3_p_0.05 <- my_filter$table %>% mutate(cv = -cv) | |
for (v in seq(0.1, 0.5, 0.05)) { | |
nm = "calib_curvature" | |
nm2 <- sprintf("%s_%.2f_pass", nm, v) %>% sub("-", "", .) | |
wr3_p_0.05 %<>% | |
mutate(!!as.name(nm2) := !!as.name(nm) >= v & water_ratio >= 3 & p_value_fdr <= 0.05) | |
} | |
wr3_p_0.05 %<>% | |
select(ends_with("pass")) %>% | |
colSums() %>% | |
{tibble(nm = names(.), retention_p_0.05 = unname(.) )} | |
wr3 <- | |
left_join(wr3_gross, wr3_p_0.05, by = "nm") %>% | |
mutate( | |
sensitivity_p_0.05 = retention_p_0.05 / n0.05, | |
specificity_p_0.05= retention_p_0.05 / retention_gross | |
) | |
#### named species #### | |
named1 <- my_filter$table %>% mutate(cv = -cv) %>% left_join(my_raw_data$feat) | |
named2 <- my_filter$table %>% mutate(cv = -cv) %>% left_join(my_raw_data$feat) | |
n_named1 <- | |
named1 %>% filter(!is.na(NAME)) %>% | |
nrow() | |
n_named2 <- | |
named2 %>% filter(!is.na(NAME2)) %>% | |
nrow() | |
for (i in seq_along(cutoff_list)) { | |
nm = names(cutoff_list[i]) | |
for (v in cutoff_list[[i]]) { | |
nm2 <- sprintf("%s_%.2f_pass", nm, v) %>% sub("-", "", .) | |
named1 %<>% | |
mutate(!!as.name(nm2) := !!as.name(nm) >= v & !is.na(NAME)) | |
named2 %<>% | |
mutate(!!as.name(nm2) := !!as.name(nm) >= v & !is.na(NAME2)) | |
} | |
} | |
named1 %<>% | |
select(ends_with("pass")) %>% | |
colSums() %>% | |
{tibble(nm = names(.), retention_named1 = unname(.) )} | |
named2 %<>% | |
select(ends_with("pass")) %>% | |
colSums() %>% | |
{tibble(nm = names(.), retention_named2 = unname(.) )} | |
roc_table %<>% left_join(named1, by = "nm") %>% left_join(named2, by = "nm") | |
roc_table %<>% | |
mutate( | |
sensitivity_named1 = retention_named1 / n_named1, | |
specificity_named1 = retention_named1 / retention_gross | |
) | |
plt4 <- ggplot( | |
data = roc_table, | |
mapping = aes(y = sensitivity_named1, x = specificity_named1, colour = param, label = val) | |
) + | |
geom_path(linewidth = 1) + | |
geom_point(colour = "black") + | |
geom_text(colour = "black", nudge_y = 0.01) + | |
# geom_abline(slope = -1, intercept = 1) + | |
lims(x = c(0,NA), y = c(0,1.1)) + | |
geom_hline(yintercept = c(0,1)) + | |
geom_vline(xintercept = c(0)) | |
#### #### | |
df1 <- left_join(t1, my_raw_data$feat, by = "FEATURE_ID") | |
df2 <- | |
df1 %>% | |
mutate(CL = sapply(strsplit(NAME, " "), first)) %>% | |
group_by(water_ratio_1.58_pass, CL) %>% summarise(count = length(CL)) %>% | |
ungroup() %>% | |
pivot_wider(id_cols = CL, names_from = 1, values_from = count) | |
df2 %>% | |
set_names(c("CL", "Rejected", "Pass")) %>% | |
mutate(across(c(Rejected, Pass), ~replace(.x, is.na(.x), 0))) %>% | |
mutate(Pass_ratio = Pass / (Pass + Rejected)) %>% | |
arrange(Pass_ratio) %>% | |
print(n = 27) | |
df2 %>% summarise(across(2:3, sum, na.rm = T)) | |