Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
CEdlich committed Apr 7, 2022
1 parent 870c3bc commit f449455
Show file tree
Hide file tree
Showing 10 changed files with 740 additions and 0 deletions.
Binary file added .DS_Store
Binary file not shown.
88 changes: 88 additions & 0 deletions Scripts/delta_list.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
to_matrix <- function(df) {

nm <- df[[1]]

df %>% select(-1) %>% as.matrix() %>% set_rownames(nm)

}

my_quick_mass <- function(x) {
check_chemform(isotopes = isotopes, x)$monoisotopic_mass
}

K = my_quick_mass("K")
Na = my_quick_mass("Na")
H = my_quick_mass("H")
C13 = 1.003355

NH3 = my_quick_mass("NH3")
H2O = my_quick_mass("H2O")


delta_list =
c(`Na-H` = Na - H,
`K-H` = K-H,
NH3 = NH3,
`Na-NH4` = Na - NH3 - H,
`K-NH4` = K - NH3 - H,
H2O = H2O,
C13 = C13,
C1H2O2 = my_quick_mass("C1H2O2"),
C2H7N1 = my_quick_mass("C2H7N1"),
C8H6O3 = my_quick_mass("C8H6O3"),
CO2 = my_quick_mass("C1O2"),
Bz = my_quick_mass("C7H5O1"),
Acetonitrile = my_quick_mass("C2H3N1"),
DMSO = my_quick_mass("C2H6O1S1")
)


d1 <- tibble(
adduct = c("H", "Na", "K", "NH4"),
delta1 = c(0, Na-H, K-H, NH3)
)



d2 <- tibble(
gain_loss = c("None", "+DMSO", "+Acetonitrile", "+C2H7N1", "+H2O", "-H2O", "-CO2", "-C1H2O2", "-C8H6O3", "-Bz", "-NH3"),
delta2 = c(0, delta_list[c("DMSO", "Acetonitrile", "C2H7N1", "H2O")], -delta_list[c("H2O", "CO2", "C1H2O2", "C8H6O3", "Bz", "NH3")])
)

d3 <- tibble(
isotope = c("M", "M+1", "M+2", "M+3"),
delta3 = C13 * c(0:3)
)


format_species <- function(a, gl, iso) {

case_when(
gl == "None" & iso == "M" ~ sprintf("[%s + %s]", iso, a),
gl == "None" & iso != "M" ~ sprintf("[(%s) + %s]", iso, a),
gl != "None" & iso == "M" ~ sprintf("[%s + %s] %s", iso, a, gl),
TRUE ~ sprintf("[(%s) + %s] %s", iso, a, gl)
)

}

delta_list2 <-
expand_grid(
adduct = c("H", "Na", "K", "NH4"),
gain_loss = c("None", "+DMSO", "+Acetonitrile", "+C2H7N1", "+H2O", "-H2O", "-CO2", "-C1H2O2", "-C8H6O3", "-Bz", "-NH3"),
isotope = c("M", "M+1", "M+2", "M+3")
) %>%
left_join(
d1, by = "adduct"
) %>%
left_join(
d2, by = "gain_loss"
) %>%
left_join(
d3, by = "isotope"
) %>%
filter(!(gain_loss == "-NH3" & adduct == "NH4")) %>%
mutate(
name2 = format_species(adduct, gain_loss, isotope),
delta = delta1 + delta2 + delta3
)
45 changes: 45 additions & 0 deletions Scripts/get_network_layout.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
get_network_layout <- function(obj, clust) {

# node_cat, edge_cat, node_size, edge_size

node <- obj$node %>%
filter(clust_id == clust)

edge <- obj$edge %>%
filter(FEATURE_ID_1 %in% node$FEATURE_ID & FEATURE_ID_2 %in% node$FEATURE_ID)

coord <- get_layout(edge)

node %<>%
left_join(coord, by = "FEATURE_ID")

edge %<>%
left_join(coord, by = c("FEATURE_ID_1" = "FEATURE_ID")) %>%
left_join(coord, by = c("FEATURE_ID_2" = "FEATURE_ID"), suffix = c("", "end"))

deg <- c(edge$FEATURE_ID_1, edge$FEATURE_ID_2) %>% table() %>% {tibble(FEATURE_ID = names(.), Degree = unname(.))}

node %<>%
left_join(deg, by = "FEATURE_ID")

list(
edge = edge,
node = node
)

}















71 changes: 71 additions & 0 deletions Scripts/network_plotter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@

gg_network_plotter <- function(
obj,
node_size1 = 10,
node_colour_var = "adduct",
show_text = F,
text_colour_var = "name2",
text_size = 8,
text_colour = "cyan",
edge_colour_var = "Signature",
edge_size = 1
)

{

node <- obj$node
edge <- obj$edge

save(list = ls(), file = "bip.RData")

gg <- ggplot() +
geom_segment(
mapping = aes(x=x, y=y, xend=xend, yend=yend, colour = !!as.name(edge_colour_var)),
data = edge,
size = edge_size
) +

geom_point(
data = node,
mapping=aes(x=x, y=y, fill = !!as.name(node_colour_var)),
shape = 21,
size = node_size1,
) +

theme_void() +
labs(
title = "COMPOUND_ID %s MZ %.4f RT %.2f" %>% sprintf(node$COMPOUND_ID[1], node$MZ[1], node$RT[1])
)

if(show_text) {

gg <- gg +
geom_text(
data = node,
mapping=aes(x=x, y=y, label = !!as.name(text_colour_var)),
size = text_size,
colour = text_colour,
)
}


return(gg)

}


plot_cluster <- function(obj, clust) {

obj %>%
get_network_layout(clust) %>%
gg_network_plotter(show_text = T)

}








128 changes: 128 additions & 0 deletions Scripts/propagate_label.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
delta_converter_list <- read_csv("delta_converter_list.csv", show_col_types = FALSE) %>% to_matrix()

my_label_propagation2 <- function(obj) {

node <- obj$node %>%
arrange(-mean)

edge <- obj$edge

# anchor_node: character[1] (FEATURE_ID)
anchor_node <- node$FEATURE_ID %>% first()

# initiliase tibbles
nodelist <- anchor_node
done <- character(0)


# matrix
node_assign <-
bind_cols(
node %>% select(FEATURE_ID),
map_df(as_tibble(delta_converter_list), ~ rep(0L, nrow(node)))
) %>%
to_matrix()

# assume principal node is M+H
node_assign[anchor_node, "H"] <- 1

# neighborhood tibble
N <- get_adjacency_tibble(edge)


while (length(nodelist) > 0) {

new_nodelist <- character(0)

# i: character[1]
for (i in nodelist) {

# print(paste("i", i))

# j character[1]
for(j in N %>% filter(n1==i) %$% n2 %>% setdiff(done)) {

# print(paste("j", j))

node_assign[j,] <- node_assign[i,] + convert_delta_code(N %>% filter(n1==i, n2==j) %$% code)

new_nodelist <- c(new_nodelist, j)
}

done <- c(done, i)
}

done %<>% unique()

nodelist <- new_nodelist %>% unique()



}

node_assign %<>%
as_tibble(rownames = "FEATURE_ID") %>%
get_species()

list(
edge = edge,
node = node %>% left_join(node_assign %>% select(FEATURE_ID, M, adduct, gain_loss, name2), by = "FEATURE_ID"),
decomposition_table = nodelist,
anchor_node = anchor_node

)

}

get_species <- function(x) {


x %>%
mutate(
adduct = sprintf("_Na_%d__K_%d__H_%d_", Na, K, H) %>%
gsub("_[A-Za-z0-9]+_0_", "", .) %>%
gsub("(_1)|(^_)|(_$)", "", .) %>%
# sub("H__NH3", "NH4", .) %>%
sub("__H_-([1-9])", "-\\1H", .) %>%
sub("-1H", "-H", .) %>%
gsub("__", "+", .),

M = if_else(C13 == 0, "M", sprintf("(M+%d)", C13)),

gain_loss = sprintf("_NH3_%d__H2O_%d__FA_%d__C2H7N1_%d__C8H6O3_%d__CO2_%d__Bz_%d__ACN_%d__DMSO_%d_", NH3, H2O, C1H2O2, C2H7N1, C8H6O3, CO2, Bz, Acetonitrile, DMSO) %>%

gsub("_[A-Za-z0-9]+_0_", "", .) %>%
gsub("_([A-Za-z0-9]+)_-([1-9])", "-\\2.\\1", .) %>%
gsub("-1\\.", "-", .) %>%
gsub("(_1)|(^_)|(_$)", "", .) %>%
gsub("__", "+", .) %>%
gsub("_-", "-", .),

name2 = sprintf("[%s + %s] +%s", M, adduct, gain_loss) %>%
gsub("\\+-", "-", .) %>%
gsub("\\+$", "", .) %>%
trimws()


)

}



get_adjacency_tibble <- function(edge) {

adj <- get_adjacency_matrix_delta(edge) %>% t()

N <- lapply(seq(nrow(adj)), function(i) adj[i,] %>% keep(.!=0))

tibble(
n1 = rownames(adj),
n2 = map(N, ~names(.x)),
code = map(N, ~unname(.x))
) %>%
unnest(c(n2, code))

}


Binary file added cor_cluster_mix.RDS
Binary file not shown.
Binary file added data20.RDS
Binary file not shown.
15 changes: 15 additions & 0 deletions delta_converter_list.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Name,Na,K,H,NH3,H2O,C13,C1H2O2,C2H7N1,C8H6O3,CO2,Bz,Acetonitrile,DMSO
Na-H,1,0,-1,0,0,0,0,0,0,0,0,0,0
K-H,0,1,-1,0,0,0,0,0,0,0,0,0,0
NH3,0,0,0,1,0,0,0,0,0,0,0,0,0
Na-NH4,1,0,-1,-1,0,0,0,0,0,0,0,0,0
K-NH4,0,1,-1,-1,0,0,0,0,0,0,0,0,0
H2O,0,0,0,0,1,0,0,0,0,0,0,0,0
C13,0,0,0,0,0,1,0,0,0,0,0,0,0
C1H2O2,0,0,0,0,0,0,1,0,0,0,0,0,0
C2H7N1,0,0,0,0,0,0,0,1,0,0,0,0,0
C8H6O3,0,0,0,0,0,0,0,0,1,0,0,0,0
CO2,0,0,0,0,0,0,0,0,0,1,0,0,0
Bz,0,0,0,0,0,0,0,0,0,0,1,0,0
Acetonitrile,0,0,0,0,0,0,0,0,0,0,0,1,0
DMSO,0,0,0,0,0,0,0,0,0,0,0,0,1
7 changes: 7 additions & 0 deletions make_data20.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

cor_cluster_mix <- readRDS("cor_cluster_mix.RDS")

cl <- cor_cluster_mix$node %>% arrange(-clust_size) %$% clust_id %>% unique() %>% head(100)

purrr::map(cl %>% head(20), ~get_network_layout(cor_cluster_mix, .x) %>% my_label_propagation2()) %>%
saveRDS("data20.RDS")
Loading

0 comments on commit f449455

Please sign in to comment.