Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
##################################################
## Project: DexStim Mouse Brain
## Date: 03.12.2020
## Author: Nathalie
##################################################
# Decide on a beta cutoff for single region funcoup networks
library(data.table)
library(dplyr)
library(ggplot2)
library(igraph)
library(eulerr)
basepath <- "~/Documents/ownCloud/DexStim_RNAseq_Mouse/"
region <- "PFC"
padj_cutoff <- 0.01
rsquared_cutoff <- 0.1
### FUNCTIONS -------------------------------------
# function to read all files from list
readFiles_concat <- function(file_list){
# initialize empty data frame
dataset <- data.frame()
# read each file from list and append to data frame
for (i in 1:length(file_list)){
temp_data <- fread(file_list[i])
dataset <- rbindlist(list(dataset, temp_data), use.names = T)
}
return(dataset)
}
# Z-score (z_ij) for the differential analysis between gene i and j
z_score <- function(beta_t, beta_c, se_t, se_c){
z <- (beta_t - beta_c)/
sqrt((se_t)^2 + (se_c)^2)
}
# Plot changes in ranks of nodes with highest betweenness
plotRanks <- function(a, b, labels = TRUE, labels.offset=0.1, arrow.len=0.1)
{
old.par <- par(mar=c(1,1,1,1))
# Find the length of the vectors
len.1 <- length(a)
len.2 <- length(b)
# Plot two columns of equidistant points
plot(rep(1, len.1), 1:len.1, pch=20, cex=0.8,
xlim=c(0, 3), ylim=c(0, max(len.1, len.2)),
axes=F, xlab="", ylab="") # Remove axes and labels
points(rep(2, len.2), 1:len.2, pch=20, cex=0.8)
# Put labels next to each observation
if (labels){
text(rep(1-labels.offset, len.1), 1:len.1, a)
text(rep(2+labels.offset, len.2), 1:len.2, b)
}
# Now we need to map where the elements of a are in b
# We use the match function for this job
a.to.b <- match(a, b)
# Now we can draw arrows from the first column to the second
arrows(rep(1.02, len.1), 1:len.1, rep(1.98, len.2), a.to.b,
length=arrow.len, angle=20)
par(old.par)
}
### ANALYSIS ---------------------------------------
# 1. Read data
# 1a. Read co expression networks
dex0_files <- list.files(path = file.path(basepath, "tables/coExpression_kimono"),
pattern = paste0("04\\_singleRegion\\_",region,"\\_dex0\\_funcoup\\_parallel\\_.*\\.csv"),
full.names = TRUE)
dex1_files <- list.files(path = file.path(basepath, "tables/coExpression_kimono"),
pattern = paste0("04\\_singleRegion\\_",region,"\\_dex1\\_funcoup\\_parallel\\_.*\\.csv"),
full.names = TRUE)
data_dex0 <- readFiles_concat(dex0_files)
data_dex1 <- readFiles_concat(dex1_files)
# 2. Join Base and Dex data frame
data <- inner_join(data_dex0, data_dex1,
by = c("target", "predictor"),
suffix = c(".base", ".dex"))
dex_notBase <- anti_join(data_dex1, data_dex0, by = c("target", "predictor"))
head(data)
# Plot beta values
beta_data <- as.data.frame(rbind(cbind(rep("base", times = nrow(data)), data$beta_mean.base, data$relation.base),
cbind(rep("dex", times = nrow(data)), data$beta_mean.dex, data$relation.dex)))
# ggplot(data = beta_data, aes(x = V1, y = V2, col = V3)) +
# geom_boxplot() +
# xlab("network") +
# ylab("beta") +
# theme(text = element_text(size=20))
# hist(data$beta_mean.base, breaks = 500) # histogram of beta values in base network
# png(filename = paste0(basepath, "figures/02_CoExp_Kimono/03_AnalysisFuncoup/singleRegions/",
# region,"_funcoup_differential_betahistogramZoom.png"),
# height = 400, width = 600)
# hist(data$beta_mean.base, breaks = 5000, xlim = c(-0.05,0.05),
# xlab = "Beta", main = "", cex.lab=1.5, cex.axis = 1.2)
# dev.off()
# hist(data$beta_mean.dex, breaks = 500) # histogram of beta values in dex network
# hist(data$beta_mean.dex, breaks = 5000, xlim = c(-0.05,0.05))
# 3. Remove interactions with very low r squared values & intercept & SVs
data <- data %>%
filter(overall_rsq.base >= rsquared_cutoff, overall_rsq.dex >= rsquared_cutoff) %>%
filter(predictor != '(Intercept)')
data <- data[!startsWith(data$predictor, "SV"),]
# 4. Calculate z scores for interactions that are left
data <- mutate(data, z = z_score(beta_mean.dex,beta_mean.base, beta_stderr.dex, beta_stderr.base))
hist(data$beta_mean.base)
# 5. Cutoff optimization
poss_cutoff <- c(0.000001, 0.00001, 0.0001, 0.001, 0.005, 0.01, 0.1)
nodebetweenness_list <- list()
for(beta_cutoff in poss_cutoff) {
print(beta_cutoff)
# 5a. Keep only interactions that have at least in one network a beta value > 0.05
data <- data %>%
mutate(diff = (abs(beta_mean.base) > beta_cutoff | abs(beta_mean.dex) > beta_cutoff))
data_diff1 <- data %>%
filter(diff)
data_diff1$p_diff <- 2*pnorm(-abs(data_diff1$z))
data_diff1$p_adj <- p.adjust(data_diff1$p_diff, method = "fdr")
# 5b. Create network corresponding to beta cutoff
data_diff <- data_diff1 %>%
filter(p_adj <= padj_cutoff)
head(data_diff[,c("beta_mean.base", "beta_stderr.base", "beta_mean.dex", "beta_stderr.dex", "z", "p_adj")], 20)
# Nodes in network
node_vec <- unique(c(data_diff$target, data_diff$predictor))
# Find modules
relations <- data.frame(from=data_diff$target,
to=data_diff$predictor,
value=data_diff$z,
performance=data_diff$p_adj)
g_diff <- graph_from_data_frame(relations, directed=FALSE, vertices=node_vec)
# Calculate nodedegree
nodedegree <- igraph::degree(g_diff)
nodedegree <- sort(nodedegree, decreasing = TRUE)
# Calculate nodebetweenness
nodebetweenness <- betweenness(g_diff, directed = FALSE) # node betweenness: number of shortest paths going through a node
nodebetweenness <- sort(nodebetweenness, decreasing = TRUE)
nodebetweenness_list[[as.character(beta_cutoff)]] <- nodebetweenness
# hist(nodebetweenness)
}
# Compare top genes between different cutoffs
for (i in 1:(length(nodebetweenness_list)-1)){
intersect_top <- intersect(names(nodebetweenness_list[[i]][1:100]), names(nodebetweenness_list[[i+1]][1:100]))
print(length(intersect_top))
match_pos <- match(names(nodebetweenness_list[[i]])[names(nodebetweenness_list[[i]]) %in% intersect_top],
names(nodebetweenness_list[[i+1]])[names(nodebetweenness_list[[i+1]]) %in% intersect_top])
print(match_pos)
# Rank plot to compare ranks of top genes
# Comparison between i and i+1
png(filename = paste0(basepath, "figures/02_CoExp_Kimono/03_AnalysisFuncoup/singleRegions/",
region,"_funcoup_cutoffSelection_rankPlot_betacutoffs",poss_cutoff[i],"-",poss_cutoff[i+1],".png"),
width = 700, height = 700)
plotRanks(names(nodebetweenness_list[[i]][1:100]), names(nodebetweenness_list[[i+1]][1:100]),
labels = FALSE)
dev.off()
# Comparison between 1 and i
if (i > 1){
png(filename = paste0(basepath, "figures/02_CoExp_Kimono/03_AnalysisFuncoup/singleRegions/",
region,"_funcoup_cutoffSelection_rankPlot_betacutoffs",poss_cutoff[1],"-",poss_cutoff[i],".png"),
width = 700, height = 700)
plotRanks(names(nodebetweenness_list[[1]][1:100]), names(nodebetweenness_list[[i]][1:100]),
labels = FALSE)
dev.off()
}
# Venn diagram (euler plot)
# Comparison between i and i+1
list1 <- list(names(nodebetweenness_list[[i]][1:100]),
names(nodebetweenness_list[[i+1]][1:100]))
names(list1) <- c(paste("Beta cutoff", poss_cutoff[i]),
paste("Beta cutoff", poss_cutoff[i+1]))
png(filename = paste0(basepath, "figures/02_CoExp_Kimono/03_AnalysisFuncoup/singleRegions/",
region,"_funcoup_cutoffSelection_vennEuler_betacutoffs",poss_cutoff[i],"-",poss_cutoff[i+1],".png"),
width = 700, height = 700)
print(plot(euler(list1, shape = "ellipse"),
labels = list(cex = 1.5), quantities = list(cex = 1.5)))
dev.off()
# Comparison between i and 1
if(i > 1){
list1 <- list(names(nodebetweenness_list[[1]][1:100]),
names(nodebetweenness_list[[i]][1:100]))
names(list1) <- c(paste("Beta cutoff", poss_cutoff[1]),
paste("Beta cutoff", poss_cutoff[i]))
png(filename = paste0(basepath, "figures/02_CoExp_Kimono/03_AnalysisFuncoup/singleRegions/",
region,"_funcoup_cutoffSelection_vennEuler_betacutoffs",poss_cutoff[1],"-",poss_cutoff[i],".png"),
width = 700, height = 700)
print(plot(euler(list1, shape = "ellipse"),
labels = list(cex = 1.5), quantities = list(cex = 1.5)))
dev.off()
}
}
# I WOULD DECIDE ON 0.001 from plots, but top genes have too many connections with 0.001
# --> probably 0.01