Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
cutoff_list <-
list(
cv = c(0.1, 0.2, 0.25, .3, .35, .4, .5, 1, 2) * (-1),
water_ratio = log2(c(1, 2, 3, 4, 5, 10, 1e2, 1e4, 1e5, 1e6, 1e7)),
coverage = c(0.1, .25, .50, .75, .9, 1),
calib_r2 = c(0, 0.1, .25, .5, .75, .9),
calib_count = c(1, 2, 3 , 4, 5),
calib_curvature = c(0, 0.1, .25, .5, .75, .9)
)
t1 <- my_filter$table %>% mutate(cv = -cv)
for (i in seq_along(cutoff_list)) {
nm = names(cutoff_list[i])
for (v in cutoff_list[[i]]) {
nm2 <- sprintf("%s_%.2f_pass", nm, v) %>% sub("-", "", .)
t1 %<>%
mutate(!!as.name(nm2) := !!as.name(nm) >= v)
}
}
t2 <- t1 %>%
select(ends_with("pass"))
nm3 <- names(t2) %>% sub("_pass", "", .)
shortname_list <-
c(
cv = "cv",
water_ratio = "wr",
coverage = "cov",
calib_r2 = "r2",
calib_count = "cp",
calib_curvature = "curve"
)
for (i in seq_along(shortname_list)) {
nm3 %<>% sub(names(shortname_list[i]), shortname_list[i], .)
}
t3 <- t2 %>% set_names(nm3)
pca <- t3 %>% t() %>% prcomp()
# plot(pca$x, type = "n")
# text(pca$x, labels = rownames(pca$x))
pca_data <-
pca$x %>%
as_tibble(rownames = "Name") %>%
select(Name, matches("PC[1-5]$")) %>%
mutate(
param = map_chr(strsplit(Name, "_"), first),
val = map_chr(strsplit(Name, "_"), last) %>% as.numeric()
) %>%
mutate(
val = ifelse(param == "wr", round(2^val) %>% signif(1), val)
)
plt <- ggplot(
data = pca_data,
mapping = aes(x=PC1, y = PC2, colour = param, label = val)
) +
geom_path(linewidth = 2) +
geom_point(colour = "black") +
geom_text(colour = "black", nudge_y = 1.5)
pval_table <- my_filter$table$p_value_fdr %>% cut(c(0,0.01,0.05, 0.1, 0.2, 0.5, 1)) %>% table()
# cs_table <- t2 %>% colSums() %>%
# {tibble(nm = names(.), retention = unname(.) / nrow(t2))}
cs_table <- t2 %>% colSums() %>%
{tibble(nm = names(.), retention_gross = unname(.) )}
t1_pv1 <- my_filter$table %>% mutate(cv = -cv)
for (i in seq_along(cutoff_list)) {
nm = names(cutoff_list[i])
for (v in cutoff_list[[i]]) {
nm2 <- sprintf("%s_%.2f_pass", nm, v) %>% sub("-", "", .)
t1_pv1 %<>%
mutate(!!as.name(nm2) := !!as.name(nm) >= v & p_value_fdr <= 0.05)
}
}
n0.05 <- my_filter$table %>% filter(p_value_fdr <= 0.05) %>% nrow()
pv1_table <- t1_pv1 %>%
select(ends_with("pass")) %>%
colSums() %>%
{tibble(nm = names(.), retention_p_0.05 = unname(.) )} %>%
left_join(cs_table, by = "nm") %>%
mutate(
sensitivity_p_0.05 = retention_p_0.05 / n0.05,
specificity_p_0.05 = retention_p_0.05 / retention_gross
)
t1_pv2 <- my_filter$table %>% mutate(cv = -cv)
for (i in seq_along(cutoff_list)) {
nm = names(cutoff_list[i])
for (v in cutoff_list[[i]]) {
nm2 <- sprintf("%s_%.2f_pass", nm, v) %>% sub("-", "", .)
t1_pv2 %<>%
mutate(!!as.name(nm2) := !!as.name(nm) >= v & p_value_fdr <= 0.2)
}
}
n0.2 <- my_filter$table %>% filter(p_value_fdr <= 0.2) %>% nrow()
pv2_table <- t1_pv2 %>%
select(ends_with("pass")) %>%
colSums() %>%
{tibble(nm = names(.), retention_p_0.2 = unname(.) )} %>%
left_join(cs_table, by = "nm") %>%
mutate(
sensitivity_p_0.2 = retention_p_0.2 / n0.2,
specificity_p_0.2 = retention_p_0.2 / retention_gross
)
roc_table <-
pv1_table %>%
left_join(pv2_table %>% select(-retention_gross), by = "nm") %>%
bind_cols(pca_data)
ggplot(
data = roc_table,
mapping = aes(x = specificity_p_0.05, y = sensitivity_p_0.05, colour = param, label = val)
) +
geom_path(linewidth = 1) +
geom_point(colour = "black") +
geom_text(colour = "black", nudge_y = 0.01) +
# geom_abline(slope = -1, intercept = 1) +
lims(x = c(0,NA), y = c(0,1.1)) +
geom_hline(yintercept = c(0,1)) +
geom_vline(xintercept = c(0))
plt3 <- ggplot(
data = roc_table,
mapping = aes(x = specificity_p_0.2, y = sensitivity_p_0.2, colour = param, label = val)
) +
geom_path(linewidth = 1) +
geom_point(colour = "black") +
geom_text(colour = "black", nudge_y = 0.01) +
# geom_abline(slope = -1, intercept = 1) +
lims(x = c(0,NA), y = c(0,1.1)) +
geom_hline(yintercept = c(0,1)) +
geom_vline(xintercept = c(0))
wr3_gross <- my_filter$table %>% mutate(cv = -cv)
for (v in seq(0.1, 0.5, 0.05)) {
nm = "calib_curvature"
nm2 <- sprintf("%s_%.2f_pass", nm, v) %>% sub("-", "", .)
wr3_gross %<>%
mutate(!!as.name(nm2) := !!as.name(nm) >= v & water_ratio >= 3)
}
wr3_gross %<>%
select(ends_with("pass")) %>%
colSums() %>%
{tibble(nm = names(.), retention_gross = unname(.) )}
wr3_p_0.05 <- my_filter$table %>% mutate(cv = -cv)
for (v in seq(0.1, 0.5, 0.05)) {
nm = "calib_curvature"
nm2 <- sprintf("%s_%.2f_pass", nm, v) %>% sub("-", "", .)
wr3_p_0.05 %<>%
mutate(!!as.name(nm2) := !!as.name(nm) >= v & water_ratio >= 3 & p_value_fdr <= 0.05)
}
wr3_p_0.05 %<>%
select(ends_with("pass")) %>%
colSums() %>%
{tibble(nm = names(.), retention_p_0.05 = unname(.) )}
wr3 <-
left_join(wr3_gross, wr3_p_0.05, by = "nm") %>%
mutate(
sensitivity_p_0.05 = retention_p_0.05 / n0.05,
specificity_p_0.05= retention_p_0.05 / retention_gross
)
#### named species ####
named1 <- my_filter$table %>% mutate(cv = -cv) %>% left_join(my_raw_data$feat)
named2 <- my_filter$table %>% mutate(cv = -cv) %>% left_join(my_raw_data$feat)
n_named1 <-
named1 %>% filter(!is.na(NAME)) %>%
nrow()
n_named2 <-
named2 %>% filter(!is.na(NAME2)) %>%
nrow()
for (i in seq_along(cutoff_list)) {
nm = names(cutoff_list[i])
for (v in cutoff_list[[i]]) {
nm2 <- sprintf("%s_%.2f_pass", nm, v) %>% sub("-", "", .)
named1 %<>%
mutate(!!as.name(nm2) := !!as.name(nm) >= v & !is.na(NAME))
named2 %<>%
mutate(!!as.name(nm2) := !!as.name(nm) >= v & !is.na(NAME2))
}
}
named1 %<>%
select(ends_with("pass")) %>%
colSums() %>%
{tibble(nm = names(.), retention_named1 = unname(.) )}
named2 %<>%
select(ends_with("pass")) %>%
colSums() %>%
{tibble(nm = names(.), retention_named2 = unname(.) )}
roc_table %<>% left_join(named1, by = "nm") %>% left_join(named2, by = "nm")
roc_table %<>%
mutate(
sensitivity_named1 = retention_named1 / n_named1,
specificity_named1 = retention_named1 / retention_gross
)
plt4 <- ggplot(
data = roc_table,
mapping = aes(y = sensitivity_named1, x = specificity_named1, colour = param, label = val)
) +
geom_path(linewidth = 1) +
geom_point(colour = "black") +
geom_text(colour = "black", nudge_y = 0.01) +
# geom_abline(slope = -1, intercept = 1) +
lims(x = c(0,NA), y = c(0,1.1)) +
geom_hline(yintercept = c(0,1)) +
geom_vline(xintercept = c(0))
#### ####
df1 <- left_join(t1, my_raw_data$feat, by = "FEATURE_ID")
df2 <-
df1 %>%
mutate(CL = sapply(strsplit(NAME, " "), first)) %>%
group_by(water_ratio_1.58_pass, CL) %>% summarise(count = length(CL)) %>%
ungroup() %>%
pivot_wider(id_cols = CL, names_from = 1, values_from = count)
df2 %>%
set_names(c("CL", "Rejected", "Pass")) %>%
mutate(across(c(Rejected, Pass), ~replace(.x, is.na(.x), 0))) %>%
mutate(Pass_ratio = Pass / (Pass + Rejected)) %>%
arrange(Pass_ratio) %>%
print(n = 27)
df2 %>% summarise(across(2:3, sum, na.rm = T))