Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
ilian_functions/ilian_functions_MD.r
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
829 lines (613 sloc)
30.5 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
####################### | |
####################### Functions | |
### | |
imputator <- function(x, seed = 5531, sd = 0.3, down_shift = 1.8){ # the imputator function substitutes missing values from normal disriution with mean = 0 and sd = sd and down shift of down_shift | |
set.seed(seed = seed) | |
x[is.na(x)] <- rnorm(n = length(x[is.na(x)]), mean = 0, sd = sd)- down_shift | |
return(x) | |
} | |
scale.reversor <- function(x){ # backtransform scaled data; from here: https://stackoverflow.com/questions/10287545/backtransform-scale-for-plotting | |
y <- x * attr(x, 'scaled:scale') + attr(x, 'scaled:center') | |
return(y) | |
} | |
### | |
inf.to.NA <- function(x){ # change the +/-inf calues to NAs | |
y <- ifelse(is.finite(x),x,NA) | |
return(y) | |
} | |
### | |
LFQ_data_imputator <- function(data.frame.for.imputation){ | |
data.frame.for.imputation %>% # the data frame | |
mutate_at(vars(-contains("Protein IDs")),.funs = funs(log2)) %>% # log2 transform all columns, except for Protein IDs | |
mutate_at(vars(-contains("Protein IDs")),.funs =funs(inf.to.NA)) %>% # replace +inf values with NAs, except for Protein IDs | |
mutate_at(vars(-contains("Protein IDs")),.funs =funs(scale)) %>% # scale the columns, except for Protein IDs | |
mutate_at(vars(-contains("Protein IDs")),.funs =funs(imputator)) %>% # substitute the missing values in the columns from normal distribution, except for Protein IDs | |
mutate_at(vars(-contains("Protein IDs")),.funs = funs(scale.reversor)) -> LFQ.intensity.imputed | |
return(LFQ.intensity.imputed) | |
} | |
### | |
imputed_values_annotator <- function(non.imputed.data,imputed.data){ # compares two data frames with before and after imputation and | |
# marks with "+" the imputed valeus | |
non.imputed.data %>% | |
gather(`Experiment`,value,-`Protein IDs`) %>% | |
mutate(value = log2(value))-> non.imputed.data.long | |
imputed.data %>% | |
gather(`Experiment`,value,-`Protein IDs`) -> imputed.data.long | |
warning(paste0("All Protein IDs are the same order: ", all(non.imputed.data.long[,"Experiment"] == imputed.data.long[,"Experiment"]))) | |
imputed.data.long %>% | |
mutate(`Imputed values` = ifelse(value == non.imputed.data.long$value,"","+")) -> imputed.data.long.imputation.indicated | |
return(imputed.data.long.imputation.indicated) | |
} | |
#### imputation of LFQ data and imputation | |
LFQ_data_imputator_annotator <- function(data.frame.for.imputation){ | |
data.frame.for.imputation %>% | |
LFQ_data_imputator() -> data.frame.for.imputation.imputed | |
data.frame.for.imputation %>% | |
imputed_values_annotator(imputed.data = data.frame.for.imputation.imputed) -> data.frame.for.imputation.imputed.long.annotated | |
df.list <- list(data.frame.for.imputation.imputed, | |
data.frame.for.imputation.imputed.long.annotated) | |
names(df.list) <- c("imputed.df","imputed.df.imputation.annotated") | |
return(df.list) | |
} | |
### add Uniprot priority ID | |
add_uniprot_prior_prot_id <- function(protein.data, protein.annotations){ | |
protein.data %>% | |
unnest(`Peptide count (all)` = strsplit(`Peptide counts (all)`,";"),`Priority protein ID` = strsplit(`Protein IDs`,";")) %>% | |
mutate(`Peptide count (all)` = as.numeric(`Peptide count (all)`)) %>% | |
mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>% | |
left_join(protein.annotations[,c("Entry","Status","Date of creation")], by = c("Protein ID for merge" = "Entry")) %>% | |
group_by(id) %>% | |
arrange(desc(`Peptide count (all)`), | |
Status, | |
`Date of creation`, | |
`Priority protein ID`) %>% | |
filter(row_number() %in% 1) %>% | |
ungroup() %>% | |
select(-Status,-`Date of creation`, -`Protein ID for merge`, -id, -`Peptide count (all)`) | |
} | |
### add mouse mitocarta annotations | |
add_mcarta2_mouse_annot <- function(protein.data.annotated,mitocarta.data){ | |
mitocarta.data %>% | |
select(Symbol,Description,MitoCarta2_List) -> mitocarta.symbol | |
mitocarta.data %>% | |
select(Synonyms,Description,MitoCarta2_List) %>% | |
unnest(Synonyms = strsplit(Synonyms,"\\|"))-> mitocarta.synonym | |
# Gene names (primary ) -> Symbol | |
protein.data.annotated %>% | |
left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names (primary )" = "Symbol")) %>% filter(!is.na(MitoCarta2_List)) -> | |
protein.data.annotated.mito.0 | |
protein.data.annotated %>% | |
left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names (primary )" = "Symbol")) %>% filter(is.na(MitoCarta2_List)) -> | |
protein.data.annotated.mito.missing | |
# Gene names (primary ) -> Synonym | |
protein.data.annotated.mito.missing %>% | |
select(-MitoCarta2_List,-Description) %>% | |
left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by = c("Gene names (primary )" = "Synonyms")) %>% | |
filter(!is.na(MitoCarta2_List)) -> protein.data.annotated.mito.1 | |
protein.data.annotated.mito.missing %>% | |
select(-MitoCarta2_List,-Description) %>% | |
left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by = c("Gene names (primary )" = "Synonyms")) %>% | |
filter(is.na(MitoCarta2_List)) -> protein.data.annotated.mito.missing | |
# Gene names (synonym ) -> Symbol | |
protein.data.annotated.mito.missing %>% | |
select(-MitoCarta2_List,-Description) %>% | |
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>% | |
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>% | |
unnest(`Gene names (synonym )`) %>% | |
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>% | |
left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names (synonym )" = "Symbol")) %>% | |
group_by(`Gene names (primary )`,`Priority protein ID`) %>% | |
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>% | |
unique() %>% | |
filter(!is.na(MitoCarta2_List)) %>% | |
ungroup() -> protein.data.annotated.mito.2 | |
protein.data.annotated.mito.missing %>% | |
select(-MitoCarta2_List,-Description) %>% | |
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>% | |
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>% | |
unnest(`Gene names (synonym )`) %>% | |
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>% | |
left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names (synonym )" = "Symbol")) %>% | |
group_by(`Gene names (primary )`,`Priority protein ID`) %>% | |
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>% | |
unique() %>% | |
filter(all(is.na(MitoCarta2_List))) %>% | |
ungroup() -> protein.data.annotated.mito.missing | |
# Gene names (synonym ) -> Synonym | |
protein.data.annotated.mito.missing %>% | |
select(-MitoCarta2_List,-Description) %>% | |
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>% | |
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>% | |
unnest(`Gene names (synonym )`) %>% | |
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>% | |
left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by = c("Gene names (synonym )" = "Synonyms")) %>% | |
group_by(`Gene names (primary )`,`Priority protein ID`) %>% | |
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>% | |
unique() %>% | |
filter(!is.na(MitoCarta2_List)) %>% | |
ungroup() -> protein.data.annotated.mito.3 | |
protein.data.annotated.mito.missing %>% | |
select(-MitoCarta2_List,-Description) %>% | |
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>% | |
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>% | |
unnest(`Gene names (synonym )`) %>% | |
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>% | |
left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by = c("Gene names (synonym )" = "Synonyms")) %>% | |
group_by(`Gene names (primary )`,`Priority protein ID`) %>% | |
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>% | |
unique() %>% | |
filter(all(is.na(MitoCarta2_List))) %>% | |
ungroup()-> protein.data.annotated.mito.missing | |
protein.data.annotated.mito.0 %>% | |
rbind(protein.data.annotated.mito.1) %>% | |
rbind(protein.data.annotated.mito.2) %>% | |
rbind(protein.data.annotated.mito.3) %>% | |
rbind(protein.data.annotated.mito.missing) %>% | |
unique() %>% | |
select(`Gene names (primary )`) %>% | |
table() %>% | |
as_tibble() %>% | |
filter(`n` > 1) -> duplicated.mitoannotations | |
protein.data.annotated.mito.0 %>% | |
filter(!is.na(MitoCarta2_List)) %>% | |
rbind(protein.data.annotated.mito.1) %>% | |
rbind(protein.data.annotated.mito.2) %>% | |
rbind(protein.data.annotated.mito.3) %>% | |
rbind(protein.data.annotated.mito.missing) %>% | |
unique() %>% | |
select(-`Description`) %>% | |
unique() | |
} | |
### add human mitocarta annotations | |
add_mcarta2_human_annot <- function(protein.data.annotated,mitocarta.data){ | |
mitocarta.data %>% | |
select(Symbol,Description,MCARTA2_LIST) -> mitocarta.symbol | |
mitocarta.data %>% | |
select(Synonyms,Description,MCARTA2_LIST) %>% | |
unnest(Synonyms = strsplit(Synonyms,"\\|"))-> mitocarta.synonym | |
# Gene names (primary ) -> Symbol | |
protein.data.annotated %>% | |
left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names (primary )" = "Symbol")) %>% filter(!is.na(MCARTA2_LIST)) -> | |
protein.data.annotated.mito.0 | |
protein.data.annotated %>% | |
left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names (primary )" = "Symbol")) %>% filter(is.na(MCARTA2_LIST)) -> | |
protein.data.annotated.mito.missing | |
# Gene names (primary ) -> Synonym | |
protein.data.annotated.mito.missing %>% | |
select(-MCARTA2_LIST,-Description) %>% | |
left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by = c("Gene names (primary )" = "Synonyms")) %>% | |
filter(!is.na(MCARTA2_LIST)) -> protein.data.annotated.mito.1 | |
protein.data.annotated.mito.missing %>% | |
select(-MCARTA2_LIST,-Description) %>% | |
left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by = c("Gene names (primary )" = "Synonyms")) %>% | |
filter(is.na(MCARTA2_LIST)) -> protein.data.annotated.mito.missing | |
# Gene names (synonym ) -> Symbol | |
protein.data.annotated.mito.missing %>% | |
select(-MCARTA2_LIST,-Description) %>% | |
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>% | |
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>% | |
unnest(`Gene names (synonym )`) %>% | |
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>% | |
left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names (synonym )" = "Symbol")) %>% | |
group_by(`Gene names (primary )`,`Priority protein ID`) %>% | |
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>% | |
unique() %>% | |
filter(!is.na(MCARTA2_LIST)) %>% | |
ungroup() -> protein.data.annotated.mito.2 | |
protein.data.annotated.mito.missing %>% | |
select(-MCARTA2_LIST,-Description) %>% | |
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>% | |
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>% | |
unnest(`Gene names (synonym )`) %>% | |
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>% | |
left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names (synonym )" = "Symbol")) %>% | |
group_by(`Gene names (primary )`,`Priority protein ID`) %>% | |
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>% | |
unique() %>% | |
filter(all(is.na(MCARTA2_LIST))) %>% | |
ungroup() -> protein.data.annotated.mito.missing | |
# Gene names (synonym ) -> Synonym | |
protein.data.annotated.mito.missing %>% | |
select(-MCARTA2_LIST,-Description) %>% | |
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>% | |
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>% | |
unnest(`Gene names (synonym )`) %>% | |
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>% | |
left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by = c("Gene names (synonym )" = "Synonyms")) %>% | |
group_by(`Gene names (primary )`,`Priority protein ID`) %>% | |
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>% | |
unique() %>% | |
filter(!is.na(MCARTA2_LIST)) %>% | |
ungroup() -> protein.data.annotated.mito.3 | |
protein.data.annotated.mito.missing %>% | |
select(-MCARTA2_LIST,-Description) %>% | |
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>% | |
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>% | |
unnest(`Gene names (synonym )`) %>% | |
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>% | |
left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by = c("Gene names (synonym )" = "Synonyms")) %>% | |
group_by(`Gene names (primary )`,`Priority protein ID`) %>% | |
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>% | |
unique() %>% | |
filter(all(is.na(MCARTA2_LIST))) %>% | |
ungroup()-> protein.data.annotated.mito.missing | |
protein.data.annotated.mito.0 %>% | |
rbind(protein.data.annotated.mito.1) %>% | |
rbind(protein.data.annotated.mito.2) %>% | |
rbind(protein.data.annotated.mito.3) %>% | |
rbind(protein.data.annotated.mito.missing) %>% | |
unique() %>% | |
select(`Gene names (primary )`) %>% | |
table() %>% | |
as_tibble() %>% | |
filter(`n` > 1) -> duplicated.mitoannotations | |
protein.data.annotated.mito.0 %>% | |
filter(!is.na(MCARTA2_LIST)) %>% | |
rbind(protein.data.annotated.mito.1) %>% | |
rbind(protein.data.annotated.mito.2) %>% | |
rbind(protein.data.annotated.mito.3) %>% | |
rbind(protein.data.annotated.mito.missing) %>% | |
unique() %>% | |
select(-`Description`) %>% | |
unique() | |
} | |
### Check duplicated mitoannotations | |
mitoannot_duplications <- function(protein.data.annotated){ | |
group_by(`Protein IDs`, `Gene names (primary )`) %>% | |
table() %>% | |
as_tibble() %>% | |
filter(`n` > 1) | |
} | |
#### Numbers of quantified proteins LFQ | |
plot_num_lfq_quant_prot <- function(df, ylim = c(0,2000)){ | |
df %>% | |
select(-matches("Protein IDs")) %>% | |
gather(Experiment, value) %>% | |
group_by(Experiment) %>% | |
summarise(`Quantified proteins` = sum(value>0, na.rm = TRUE)) %>% | |
ggplot(aes(x = Experiment, y = `Quantified proteins`)) + | |
geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+ | |
theme_bw()+ | |
geom_text(aes(label = `Quantified proteins`), color = "red", vjust = -0.5)+ | |
coord_cartesian(ylim = ylim)+ | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) | |
} | |
plot_num_tmt_quant_prot <- function(x, Experimental_design = Experimental.design, ylim = c(0,2000)){ | |
x %>% | |
select(matches("Reporter intensity corrected")) %>% | |
gather(`TMT reporter intensity experiment`, value) %>% | |
left_join(Experimental_design[,c("TMT reporter intensity experiment","Experiment")]) %>% | |
group_by(Experiment) %>% | |
summarise(`Quantified proteins` = sum(value>0, na.rm = TRUE)) %>% | |
mutate(Experiment = factor(Experiment, levels = rev(Experimental_design$Experiment))) %>% | |
ggplot(aes(x = Experiment, y = `Quantified proteins`)) + | |
geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+ | |
theme_bw()+ | |
theme(axis.text.x = element_text(angle = 90,size = 12), | |
axis.text.y = element_text(size = 12), | |
axis.title.x = element_text(size = 14), | |
axis.title.y = element_text(size = 14))+ | |
geom_text(aes(label = `Quantified proteins`), color = "red", hjust = -0.5)+ | |
coord_flip(ylim = ylim) | |
} | |
#### Numbers of quantified proteins SILAC | |
plot_num_SILAC_quant_prot <- function(df, Experimental_design = Experimental.design, ylim = c(0,2000)){ | |
df %>% | |
select(`Protein IDs`,matches("Ratio H/L normalized ")) %>% | |
gather(`SILAC ratio experiment`, value, -`Protein IDs`) %>% | |
left_join(Experimental_design[,c("SILAC ratio experiment","Experiment")]) %>% | |
group_by(Experiment) %>% | |
summarise(`Quantified proteins` = sum(value>-Inf, na.rm = TRUE)) %>% | |
ggplot(aes(x = Experiment, y = `Quantified proteins`)) + | |
geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+ | |
theme_bw()+ | |
theme(axis.text.x = element_text(angle = 90,size = 12), | |
axis.text.y = element_text(size = 12), | |
axis.title.x = element_text(size = 14), | |
axis.title.y = element_text(size = 14))+ | |
geom_text(aes(label = `Quantified proteins`), color = "red", hjust = -0.5)+ | |
coord_flip(ylim = ylim) | |
} | |
### limma results | |
limma_results_select_ProtIDs_logFC_CI_AveExpr_PVal_adjPVal <- function(x){ | |
x %>% | |
as.data.frame() %>% | |
as_tibble() %>% | |
rownames_to_column("Protein IDs") %>% | |
select(`Protein IDs`,logFC, CI.L, CI.R, AveExpr, P.Value, adj.P.Val) | |
} | |
### rename all but Protein IDs | |
limma_rename_all_but_proteinIDs <- function(x, chr_to_add){ | |
if(!is.character(chr_to_add)){"Stop, chr_to_add has to be a character vector"} | |
x %>% | |
rename_at(vars(-matches("Protein IDs")), funs(paste0(.,chr_to_add))) | |
} | |
### Significance according to adj.P.Value | |
limma_significant_adjPVal <- function(x, cutoff = 0.05, indicator = "+"){ | |
x %>% | |
mutate(Significant = ifelse(adj.P.Val < cutoff, indicator,"")) | |
} | |
### Significant enrichment according to adj.P.Value and logFC | |
limma_signif_enriched_adjPVal <- function(x,indicator = "+"){ | |
x %>% | |
mutate(Enriched = ifelse(adj.P.Val < 0.05 & logFC > 0,indicator,"")) | |
} | |
### Read uniprot annotation | |
get_uniprot_annotations_url <- function(x = "parameters.txt"){ | |
parameters <- read.delim(paste0("data/",x),stringsAsFactors = FALSE) | |
fasta.file <- subset(parameters,grepl("_Mus_musculus|_Homo_sapiens_|_Saccharomyces_cerevisiae_|_Caenorhabditis_elegans_|_Drosophila_melanogaster_",Value)) | |
fasta.file %>% | |
select(Value) %>% | |
unnest(Value = strsplit(Value,";")) %>% | |
filter(grepl("_Mus_musculus|_Homo_sapiens_|_Saccharomyces_cerevisiae_|_Caenorhabditis_elegans_|_Drosophila_melanogaster_",Value)) %>% | |
mutate(Value = gsub("^.*\\\\","",Value)) %>% | |
mutate(Value = gsub("\\.fasta","",Value)) -> match.key | |
return(match.key) | |
} | |
#### histogram of LFQ imputed values | |
plot_lfq_imputation_histograms <- function(imputed.df, | |
ncol = 3, | |
show.default.arguments = "no"){ | |
# numbers of non-imputed proteins | |
imputed.df$imputed.df.imputation.annotated %>% | |
group_by(Experiment,`Imputed values`) %>% | |
summarise(proteins = n()) %>% | |
ungroup() %>% | |
filter(`Imputed values` == "") %>% | |
mutate(value = +Inf) -> number.non.imputed.proteins | |
# numbers of imputed proteins | |
imputed.df$imputed.df.imputation.annotated %>% | |
group_by(Experiment,`Imputed values`) %>% | |
summarise(proteins = n()) %>% | |
ungroup() %>% | |
filter(`Imputed values` == "+") %>% | |
mutate(value = -Inf) -> number.imputed.proteins | |
imputed.df$imputed.df.imputation.annotated %>% | |
ggplot(aes(x = value, fill = `Imputed values`),alpha = 0.5)+ | |
theme_bw()+ | |
geom_histogram()+ | |
scale_fill_manual(values = RColorBrewer::brewer.pal(3,"Dark2")[1:2])+ | |
facet_wrap(~ Experiment,ncol = ncol)+ | |
geom_text(y = +Inf, aes(x = value, label = proteins), data = number.non.imputed.proteins,color = "#1B9E77",hjust = 1.2,vjust = 1.5)+ | |
geom_text(y = +Inf, aes(x = value, label = proteins), data = number.imputed.proteins, color = "#D95F02",hjust = -0.2,vjust = 1.5) | |
} | |
### multiscatter | |
plot_multiscatter_lfq <- function(imputed.df, alpha = 0.25, order_vector = NULL){ | |
if(is.null(order_vector)){ | |
imputed.df %>% | |
gather(Experiment, value,-`Protein IDs`) %>% | |
spread(Experiment, value) %>% | |
select(-`Protein IDs`) %>% | |
ggpairs(upper = list(continuous = wrap("points", alpha = alpha)), | |
lower = list(continuous = wrap("cor", size = 5)))+ | |
theme_bw() -> multiscatter | |
return(multiscatter) | |
} | |
if(!is.null(order_vector)){ | |
imputed.df %>% | |
gather(Experiment, value,-`Protein IDs`) %>% | |
mutate(Experiment = factor(Experiment, levels = order_vector)) %>% | |
spread(Experiment, value) %>% | |
select(-`Protein IDs`) %>% | |
ggpairs(upper = list(continuous = wrap("points", alpha = alpha)), | |
lower = list(continuous = wrap("cor", size = 5)))+ | |
theme_bw()-> multiscatter | |
return(multiscatter) | |
} | |
} | |
### dendrogram | |
cluster_dendrogram <- function(imputed.df){ | |
imputed.df %>% | |
remove_rownames() %>% | |
column_to_rownames(var = "Protein IDs") %>% | |
as.data.frame() %>% | |
as.matrix() %>% | |
t() %>% | |
dist() %>% | |
hclust(.,method = "average") %>% | |
as.dendrogram() | |
} | |
### PCA | |
plot_PCA <- function(imputed.df, Experimental_design = Experimental.design, | |
filename = "PCA.pdf", | |
width = 20, | |
height = 22.5, | |
col.ind = NULL ){ | |
if(!(is.null(col.ind)|is.character(col.ind))){stop("order.by should be NULL or a column name from Experimental_design")} | |
if(!is.null(col.ind)){ | |
Experimental_design %>% | |
arrange_("Experiment") -> Experimental.design.temp | |
imputed.df %>% | |
gather(`LFQ intensity experiment`,value,-`Protein IDs`) %>% | |
left_join(Experimental.design.temp[,c("LFQ intensity experiment","Experiment")]) %>% | |
filter(!is.na(Experiment)) %>% | |
select(-`LFQ intensity experiment`) %>% | |
spread(key = Experiment, value = value) %>% | |
remove_rownames() %>% | |
column_to_rownames(var = "Protein IDs") %>% | |
t() %>% | |
as.data.frame() %>% | |
as_tibble() %>% | |
PCA(graph = FALSE) %>% | |
fviz_pca_ind(repel = TRUE,col.ind = factor(unlist(Experimental.design.temp[,col.ind])),mean.point = FALSE) | |
} else { | |
protein.data.imputed$imputed.df %>% | |
#imputed.df %>% | |
gather(`LFQ intensity experiment`,value,-`Protein IDs`) %>% | |
left_join(Experimental.design.temp[,c("LFQ intensity experiment","Experiment")]) %>% | |
filter(!is.na(Experiment)) %>% | |
select(-`LFQ intensity experiment`) %>% | |
spread(key = Experiment, value = value) %>% | |
remove_rownames() %>% | |
column_to_rownames(var = "Protein IDs") %>% | |
t() %>% | |
as.data.frame() %>% | |
as_tibble() %>% | |
PCA(graph = FALSE) %>% | |
fviz_pca_ind(repel = TRUE,mean.point = FALSE,palette = "RdBu") | |
} | |
} | |
### volcanoplot; significance | |
### add Inge and Maria's annotations | |
add_mito_dysf_annot <- function(protein.data, | |
mito.dysf.annot.Prot.ID = mito.dysf.annot.Prot.ID, | |
mito.dysf.annot.Gene.name = mito.dysf.annot.Gene.name){ | |
protein.data %>% | |
left_join(mito.dysf.annot.Gene.name[,c("Gene name","Annotations; Mitochondrial dysfunction, PMID:29132502; Gene name")], | |
by = c("Gene names (primary )" = "Gene name")) %>% | |
left_join(mito.dysf.annot.Prot.ID[,c("Protein ID","Annotations; Mitochondrial dysfunction, PMID:29132502; Protein ID")], | |
by = c("Priority protein ID" = "Protein ID")) %>% | |
mutate(`Annotations; Mitochondrial dysfunction, PMID:29132502` = `Annotations; Mitochondrial dysfunction, PMID:29132502; Gene name`) %>% | |
mutate(`Annotations; Mitochondrial dysfunction, PMID:29132502` = ifelse(is.na(`Annotations; Mitochondrial dysfunction, PMID:29132502`),`Annotations; Mitochondrial dysfunction, PMID:29132502; Protein ID`,`Annotations; Mitochondrial dysfunction, PMID:29132502`)) %>% | |
select(-`Annotations; Mitochondrial dysfunction, PMID:29132502; Gene name`, | |
-`Annotations; Mitochondrial dysfunction, PMID:29132502; Protein ID`) | |
} | |
### Calculate enrichment based on logFC; calculating the proportion of logFC <0 from the logFC >0 | |
limma_calculate_enrichment_fdr <- function(limma.result){ | |
### the function assumes that this is a lima resunts and that it is ordered by increasing adj.P.Val | |
limma.result %>% | |
mutate(`Enrichment FDR` = NA) %>% | |
mutate(`Enrichment FDR` = as.numeric(`Enrichment FDR`)) -> temp.df | |
for(i in 1:nrow(temp.df)){ | |
Signif.enriched <- numeric(length = 1) | |
Signif.deenriched <- numeric(length = 1) | |
temp.df[1:i,] -> temp.df.2 | |
Signif.enriched <- nrow(temp.df.2[temp.df.2$logFC > 0,]) | |
Signif.deenriched <- nrow(temp.df.2[temp.df.2$logFC < 0,]) | |
temp.df$`Enrichment FDR`[i] <- Signif.deenriched/Signif.enriched | |
} | |
temp.df$`Enrichment FDR` <- ifelse(is.infinite(temp.df$`Enrichment FDR`),1,temp.df$`Enrichment FDR`) | |
temp.df | |
} | |
#### | |
### annotate enrichment FDR | |
limma_annotate_enriched <- function(limma.result, fdr_cutoff = 0.05){ | |
if(nrow(subset(limma.result,`Enrichment FDR` < fdr_cutoff)) > 0){ | |
highest.adj.P.val <- max(unlist(limma.result$adj.P.Val[limma.result$`Enrichment FDR` <= fdr_cutoff])) | |
limma.result %>% | |
mutate(`Enriched` = ifelse(logFC>0 & adj.P.Val <= highest.adj.P.val,"+","")) | |
} else { | |
limma.result %>% | |
mutate(`Enriched` = "") | |
} | |
} | |
#### | |
limma_annotate_enriched_P_value <- function(limma.result, fdr_cutoff = 0.05){ | |
enriched_variable_name <- paste0("Enriched","_",fdr_cutoff) | |
if(nrow(subset(limma.result,`Enrichment FDR` < fdr_cutoff)) > 0){ | |
highest.P.Value <- max(unlist(limma.result$P.Value[limma.result$`Enrichment FDR` <= fdr_cutoff])) | |
limma.result %>% | |
mutate(!! enriched_variable_name := ifelse(logFC>0 & P.Value <= highest.P.Value,"+","")) | |
} else { | |
limma.result %>% | |
mutate(!! enriched_variable_name := "") | |
} | |
} | |
#### | |
limma_regulation_adjPVal <- function(x){ | |
x %>% | |
mutate(regulation = ifelse(adj.P.Val < 0.05 & logFC > 0,"upregulation", | |
ifelse(adj.P.Val < 0.05 & logFC < 0,"downregulation",""))) | |
} | |
#### | |
substract_global_median <- function(x){ | |
global.median <- median(x, na.rm = TRUE) | |
y <- x-global.median | |
y | |
} | |
### PCA | |
plot_PCA_res <- function(PCA.result, color_var = NULL, shape_var = NULL, label = TRUE, palette = "Dark2"){ | |
if(label){ | |
color_var <- enexpr(color_var) | |
shape_var <- enexpr(shape_var) | |
PCA.result$data %>% | |
ggplot(aes(x = x, y = y, color = !!color_var, shape = !!shape_var))+ | |
geom_hline(yintercept = 0, linetype = "dashed")+ | |
geom_vline(xintercept = 0, linetype = "dashed")+ | |
geom_point()+ | |
scale_color_brewer(palette = palette)+ | |
theme_bw()+ | |
theme(panel.border = element_blank())+ | |
labs(x = PCA.result$labels$x, y = PCA.result$labels$y)+ | |
geom_text_repel(aes(label = name)) | |
} | |
if(!label){ | |
color_var <- enexpr(color_var) | |
shape_var <- enexpr(shape_var) | |
PCA.result$data %>% | |
ggplot(aes(x = x, y = y, color = !!color_var, shape = !!shape_var))+ | |
geom_hline(yintercept = 0, linetype = "dashed")+ | |
geom_vline(xintercept = 0, linetype = "dashed")+ | |
geom_point()+ | |
scale_color_brewer(palette = palette)+ | |
theme_bw()+ | |
theme(panel.border = element_blank())+ | |
labs(x = PCA.result$labels$x, y = PCA.result$labels$y) | |
} | |
} | |
## limma contrast | |
rs.extractLimmaContrasts = function( limmaobject ) { | |
outlist = list() | |
for (i in 1:ncol(limmaobject$contrasts)) { | |
outlist[colnames(limmaobject$contrasts)[i]] = list( topTable(limmaobject, coef=i, number=Inf, confint = T ) ) | |
} | |
return(outlist) | |
} | |
### | |
my_scatterplot_char_var <- function(df, xvar, yvar, colorvar = NULL, axis_lim = NULL, alpha = 0.5, text_annot = NULL){ | |
if(is.null(text_annot)){ | |
ggplot(df, aes_string(xvar, yvar, color = colorvar))+ | |
geom_point(alpha = alpha)+ | |
geom_abline(slope = 1, intercept = 0, color = "grey",linetype = "dashed")+ | |
geom_abline(slope = 1, intercept = 2, color = "red",linetype = "dashed")+ | |
geom_abline(slope = 1, intercept = -2, color = "red",linetype = "dashed")+ | |
geom_hline(yintercept = 0)+ | |
geom_vline(xintercept = 0)+ | |
scale_color_viridis(discrete = TRUE)+ | |
theme_bw()+ | |
coord_fixed(xlim = axis_lim, ylim = axis_lim) | |
} | |
if(!is.null(text_annot)){ | |
df %>% | |
filter(`Gene names (primary )` %in% text_annot) -> temp_df | |
ggplot(df, aes_string(xvar, yvar, color = colorvar))+ | |
geom_point(alpha = alpha)+ | |
geom_abline(slope = 1, intercept = 0, color = "grey",linetype = "dashed")+ | |
geom_abline(slope = 1, intercept = 2, color = "red",linetype = "dashed")+ | |
geom_abline(slope = 1, intercept = -2, color = "red",linetype = "dashed")+ | |
geom_hline(yintercept = 0)+ | |
geom_vline(xintercept = 0)+ | |
scale_color_viridis(discrete = TRUE)+ | |
theme_bw()+ | |
coord_fixed(xlim = axis_lim, ylim = axis_lim)+ | |
geom_point(data = temp_df, color = "red")+ | |
geom_text(data = temp_df, aes(label = `Gene names (primary )`), color = "red") | |
} | |
} | |
## ---- scatterplot_char_var ---- | |
scatterplot_logFC <- function(df, xvar, yvar, colorvar = NULL, axis_lim = NULL, alpha = 0.5, text_annot = NULL){ | |
if(is.null(text_annot)){ | |
ggplot(df, aes_string(xvar, yvar, color = colorvar))+ | |
geom_point(alpha = alpha)+ | |
geom_abline(slope = 1, intercept = 0, color = "grey",linetype = "dashed")+ | |
geom_hline(yintercept = 0)+ | |
geom_vline(xintercept = 0)+ | |
scale_color_viridis(discrete = TRUE)+ | |
theme_bw()+ | |
coord_fixed(xlim = axis_lim, ylim = axis_lim) | |
} | |
if(!is.null()){ | |
df %>% | |
filter(`Gene names (primary )` %in% text_annot) -> temp_df | |
ggplot(df, aes_string(xvar, yvar, color = colorvar))+ | |
geom_point(alpha = alpha)+ | |
geom_abline(slope = 1, intercept = 0, color = "grey",linetype = "dashed")+ | |
geom_hline(yintercept = 0)+ | |
geom_vline(xintercept = 0)+ | |
scale_color_viridis(discrete = TRUE)+ | |
theme_bw()+ | |
coord_fixed(xlim = axis_lim, ylim = axis_lim)+ | |
geom_point(temp_df, color = "red")+ | |
geom_text(temp_df, aes(label = `Gene names (primary )`)) | |
} | |
} |