Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#######################
####################### Functions
###
imputator <- function(x, seed = 5531, sd = 0.3, down_shift = 1.8){ # the imputator function substitutes missing values from normal disriution with mean = 0 and sd = sd and down shift of down_shift
set.seed(seed = seed)
x[is.na(x)] <- rnorm(n = length(x[is.na(x)]), mean = 0, sd = sd)- down_shift
return(x)
}
scale.reversor <- function(x){ # backtransform scaled data; from here: https://stackoverflow.com/questions/10287545/backtransform-scale-for-plotting
y <- x * attr(x, 'scaled:scale') + attr(x, 'scaled:center')
return(y)
}
###
inf.to.NA <- function(x){ # change the +/-inf calues to NAs
y <- ifelse(is.finite(x),x,NA)
return(y)
}
###
LFQ_data_imputator <- function(data.frame.for.imputation){
data.frame.for.imputation %>% # the data frame
mutate_at(vars(-contains("Protein IDs")),.funs = funs(log2)) %>% # log2 transform all columns, except for Protein IDs
mutate_at(vars(-contains("Protein IDs")),.funs =funs(inf.to.NA)) %>% # replace +inf values with NAs, except for Protein IDs
mutate_at(vars(-contains("Protein IDs")),.funs =funs(scale)) %>% # scale the columns, except for Protein IDs
mutate_at(vars(-contains("Protein IDs")),.funs =funs(imputator)) %>% # substitute the missing values in the columns from normal distribution, except for Protein IDs
mutate_at(vars(-contains("Protein IDs")),.funs = funs(scale.reversor)) -> LFQ.intensity.imputed
return(LFQ.intensity.imputed)
}
###
imputed_values_annotator <- function(non.imputed.data,imputed.data){ # compares two data frames with before and after imputation and
# marks with "+" the imputed valeus
non.imputed.data %>%
gather(`Experiment`,value,-`Protein IDs`) %>%
mutate(value = log2(value))-> non.imputed.data.long
imputed.data %>%
gather(`Experiment`,value,-`Protein IDs`) -> imputed.data.long
warning(paste0("All Protein IDs are the same order: ", all(non.imputed.data.long[,"Experiment"] == imputed.data.long[,"Experiment"])))
imputed.data.long %>%
mutate(`Imputed values` = ifelse(value == non.imputed.data.long$value,"","+")) -> imputed.data.long.imputation.indicated
return(imputed.data.long.imputation.indicated)
}
#### imputation of LFQ data and imputation
LFQ_data_imputator_annotator <- function(data.frame.for.imputation){
data.frame.for.imputation %>%
LFQ_data_imputator() -> data.frame.for.imputation.imputed
data.frame.for.imputation %>%
imputed_values_annotator(imputed.data = data.frame.for.imputation.imputed) -> data.frame.for.imputation.imputed.long.annotated
df.list <- list(data.frame.for.imputation.imputed,
data.frame.for.imputation.imputed.long.annotated)
names(df.list) <- c("imputed.df","imputed.df.imputation.annotated")
return(df.list)
}
### add Uniprot priority ID
add_uniprot_prior_prot_id <- function(protein.data, protein.annotations){
protein.data %>%
unnest(`Peptide count (all)` = strsplit(`Peptide counts (all)`,";"),`Priority protein ID` = strsplit(`Protein IDs`,";")) %>%
mutate(`Peptide count (all)` = as.numeric(`Peptide count (all)`)) %>%
mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>%
left_join(protein.annotations[,c("Entry","Status","Date of creation")], by = c("Protein ID for merge" = "Entry")) %>%
group_by(id) %>%
arrange(desc(`Peptide count (all)`),
Status,
`Date of creation`,
`Priority protein ID`) %>%
filter(row_number() %in% 1) %>%
ungroup() %>%
select(-Status,-`Date of creation`, -`Protein ID for merge`, -id, -`Peptide count (all)`)
}
### add mouse mitocarta annotations
add_mcarta2_mouse_annot <- function(protein.data.annotated,mitocarta.data){
mitocarta.data %>%
select(Symbol,Description,MitoCarta2_List) -> mitocarta.symbol
mitocarta.data %>%
select(Synonyms,Description,MitoCarta2_List) %>%
unnest(Synonyms = strsplit(Synonyms,"\\|"))-> mitocarta.synonym
# Gene names (primary ) -> Symbol
protein.data.annotated %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names (primary )" = "Symbol")) %>% filter(!is.na(MitoCarta2_List)) ->
protein.data.annotated.mito.0
protein.data.annotated %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names (primary )" = "Symbol")) %>% filter(is.na(MitoCarta2_List)) ->
protein.data.annotated.mito.missing
# Gene names (primary ) -> Synonym
protein.data.annotated.mito.missing %>%
select(-MitoCarta2_List,-Description) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by = c("Gene names (primary )" = "Synonyms")) %>%
filter(!is.na(MitoCarta2_List)) -> protein.data.annotated.mito.1
protein.data.annotated.mito.missing %>%
select(-MitoCarta2_List,-Description) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by = c("Gene names (primary )" = "Synonyms")) %>%
filter(is.na(MitoCarta2_List)) -> protein.data.annotated.mito.missing
# Gene names (synonym ) -> Symbol
protein.data.annotated.mito.missing %>%
select(-MitoCarta2_List,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names (synonym )" = "Symbol")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(!is.na(MitoCarta2_List)) %>%
ungroup() -> protein.data.annotated.mito.2
protein.data.annotated.mito.missing %>%
select(-MitoCarta2_List,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names (synonym )" = "Symbol")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(all(is.na(MitoCarta2_List))) %>%
ungroup() -> protein.data.annotated.mito.missing
# Gene names (synonym ) -> Synonym
protein.data.annotated.mito.missing %>%
select(-MitoCarta2_List,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by = c("Gene names (synonym )" = "Synonyms")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(!is.na(MitoCarta2_List)) %>%
ungroup() -> protein.data.annotated.mito.3
protein.data.annotated.mito.missing %>%
select(-MitoCarta2_List,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by = c("Gene names (synonym )" = "Synonyms")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(all(is.na(MitoCarta2_List))) %>%
ungroup()-> protein.data.annotated.mito.missing
protein.data.annotated.mito.0 %>%
rbind(protein.data.annotated.mito.1) %>%
rbind(protein.data.annotated.mito.2) %>%
rbind(protein.data.annotated.mito.3) %>%
rbind(protein.data.annotated.mito.missing) %>%
unique() %>%
select(`Gene names (primary )`) %>%
table() %>%
as_tibble() %>%
filter(`n` > 1) -> duplicated.mitoannotations
protein.data.annotated.mito.0 %>%
filter(!is.na(MitoCarta2_List)) %>%
rbind(protein.data.annotated.mito.1) %>%
rbind(protein.data.annotated.mito.2) %>%
rbind(protein.data.annotated.mito.3) %>%
rbind(protein.data.annotated.mito.missing) %>%
unique() %>%
select(-`Description`) %>%
unique()
}
### add human mitocarta annotations
add_mcarta2_human_annot <- function(protein.data.annotated,mitocarta.data){
mitocarta.data %>%
select(Symbol,Description,MCARTA2_LIST) -> mitocarta.symbol
mitocarta.data %>%
select(Synonyms,Description,MCARTA2_LIST) %>%
unnest(Synonyms = strsplit(Synonyms,"\\|"))-> mitocarta.synonym
# Gene names (primary ) -> Symbol
protein.data.annotated %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names (primary )" = "Symbol")) %>% filter(!is.na(MCARTA2_LIST)) ->
protein.data.annotated.mito.0
protein.data.annotated %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names (primary )" = "Symbol")) %>% filter(is.na(MCARTA2_LIST)) ->
protein.data.annotated.mito.missing
# Gene names (primary ) -> Synonym
protein.data.annotated.mito.missing %>%
select(-MCARTA2_LIST,-Description) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by = c("Gene names (primary )" = "Synonyms")) %>%
filter(!is.na(MCARTA2_LIST)) -> protein.data.annotated.mito.1
protein.data.annotated.mito.missing %>%
select(-MCARTA2_LIST,-Description) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by = c("Gene names (primary )" = "Synonyms")) %>%
filter(is.na(MCARTA2_LIST)) -> protein.data.annotated.mito.missing
# Gene names (synonym ) -> Symbol
protein.data.annotated.mito.missing %>%
select(-MCARTA2_LIST,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names (synonym )" = "Symbol")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(!is.na(MCARTA2_LIST)) %>%
ungroup() -> protein.data.annotated.mito.2
protein.data.annotated.mito.missing %>%
select(-MCARTA2_LIST,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names (synonym )" = "Symbol")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(all(is.na(MCARTA2_LIST))) %>%
ungroup() -> protein.data.annotated.mito.missing
# Gene names (synonym ) -> Synonym
protein.data.annotated.mito.missing %>%
select(-MCARTA2_LIST,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by = c("Gene names (synonym )" = "Synonyms")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(!is.na(MCARTA2_LIST)) %>%
ungroup() -> protein.data.annotated.mito.3
protein.data.annotated.mito.missing %>%
select(-MCARTA2_LIST,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by = c("Gene names (synonym )" = "Synonyms")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(all(is.na(MCARTA2_LIST))) %>%
ungroup()-> protein.data.annotated.mito.missing
protein.data.annotated.mito.0 %>%
rbind(protein.data.annotated.mito.1) %>%
rbind(protein.data.annotated.mito.2) %>%
rbind(protein.data.annotated.mito.3) %>%
rbind(protein.data.annotated.mito.missing) %>%
unique() %>%
select(`Gene names (primary )`) %>%
table() %>%
as_tibble() %>%
filter(`n` > 1) -> duplicated.mitoannotations
protein.data.annotated.mito.0 %>%
filter(!is.na(MCARTA2_LIST)) %>%
rbind(protein.data.annotated.mito.1) %>%
rbind(protein.data.annotated.mito.2) %>%
rbind(protein.data.annotated.mito.3) %>%
rbind(protein.data.annotated.mito.missing) %>%
unique() %>%
select(-`Description`) %>%
unique()
}
### Check duplicated mitoannotations
mitoannot_duplications <- function(protein.data.annotated){
group_by(`Protein IDs`, `Gene names (primary )`) %>%
table() %>%
as_tibble() %>%
filter(`n` > 1)
}
#### Numbers of quantified proteins LFQ
plot_num_lfq_quant_prot <- function(df, ylim = c(0,2000)){
df %>%
select(-matches("Protein IDs")) %>%
gather(Experiment, value) %>%
group_by(Experiment) %>%
summarise(`Quantified proteins` = sum(value>0, na.rm = TRUE)) %>%
ggplot(aes(x = Experiment, y = `Quantified proteins`)) +
geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+
theme_bw()+
geom_text(aes(label = `Quantified proteins`), color = "red", vjust = -0.5)+
coord_cartesian(ylim = ylim)+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
}
plot_num_tmt_quant_prot <- function(x, Experimental_design = Experimental.design, ylim = c(0,2000)){
x %>%
select(matches("Reporter intensity corrected")) %>%
gather(`TMT reporter intensity experiment`, value) %>%
left_join(Experimental_design[,c("TMT reporter intensity experiment","Experiment")]) %>%
group_by(Experiment) %>%
summarise(`Quantified proteins` = sum(value>0, na.rm = TRUE)) %>%
mutate(Experiment = factor(Experiment, levels = rev(Experimental_design$Experiment))) %>%
ggplot(aes(x = Experiment, y = `Quantified proteins`)) +
geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+
theme_bw()+
theme(axis.text.x = element_text(angle = 90,size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14))+
geom_text(aes(label = `Quantified proteins`), color = "red", hjust = -0.5)+
coord_flip(ylim = ylim)
}
#### Numbers of quantified proteins SILAC
plot_num_SILAC_quant_prot <- function(df, Experimental_design = Experimental.design, ylim = c(0,2000)){
df %>%
select(`Protein IDs`,matches("Ratio H/L normalized ")) %>%
gather(`SILAC ratio experiment`, value, -`Protein IDs`) %>%
left_join(Experimental_design[,c("SILAC ratio experiment","Experiment")]) %>%
group_by(Experiment) %>%
summarise(`Quantified proteins` = sum(value>-Inf, na.rm = TRUE)) %>%
ggplot(aes(x = Experiment, y = `Quantified proteins`)) +
geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+
theme_bw()+
theme(axis.text.x = element_text(angle = 90,size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14))+
geom_text(aes(label = `Quantified proteins`), color = "red", hjust = -0.5)+
coord_flip(ylim = ylim)
}
### limma results
limma_results_select_ProtIDs_logFC_CI_AveExpr_PVal_adjPVal <- function(x){
x %>%
as.data.frame() %>%
as_tibble() %>%
rownames_to_column("Protein IDs") %>%
select(`Protein IDs`,logFC, CI.L, CI.R, AveExpr, P.Value, adj.P.Val)
}
### rename all but Protein IDs
limma_rename_all_but_proteinIDs <- function(x, chr_to_add){
if(!is.character(chr_to_add)){"Stop, chr_to_add has to be a character vector"}
x %>%
rename_at(vars(-matches("Protein IDs")), funs(paste0(.,chr_to_add)))
}
### Significance according to adj.P.Value
limma_significant_adjPVal <- function(x, cutoff = 0.05, indicator = "+"){
x %>%
mutate(Significant = ifelse(adj.P.Val < cutoff, indicator,""))
}
### Significant enrichment according to adj.P.Value and logFC
limma_signif_enriched_adjPVal <- function(x,indicator = "+"){
x %>%
mutate(Enriched = ifelse(adj.P.Val < 0.05 & logFC > 0,indicator,""))
}
### Read uniprot annotation
get_uniprot_annotations_url <- function(x = "parameters.txt"){
parameters <- read.delim(paste0("data/",x),stringsAsFactors = FALSE)
fasta.file <- subset(parameters,grepl("_Mus_musculus|_Homo_sapiens_|_Saccharomyces_cerevisiae_|_Caenorhabditis_elegans_|_Drosophila_melanogaster_",Value))
fasta.file %>%
select(Value) %>%
unnest(Value = strsplit(Value,";")) %>%
filter(grepl("_Mus_musculus|_Homo_sapiens_|_Saccharomyces_cerevisiae_|_Caenorhabditis_elegans_|_Drosophila_melanogaster_",Value)) %>%
mutate(Value = gsub("^.*\\\\","",Value)) %>%
mutate(Value = gsub("\\.fasta","",Value)) -> match.key
return(match.key)
}
#### histogram of LFQ imputed values
plot_lfq_imputation_histograms <- function(imputed.df,
ncol = 3,
show.default.arguments = "no"){
# numbers of non-imputed proteins
imputed.df$imputed.df.imputation.annotated %>%
group_by(Experiment,`Imputed values`) %>%
summarise(proteins = n()) %>%
ungroup() %>%
filter(`Imputed values` == "") %>%
mutate(value = +Inf) -> number.non.imputed.proteins
# numbers of imputed proteins
imputed.df$imputed.df.imputation.annotated %>%
group_by(Experiment,`Imputed values`) %>%
summarise(proteins = n()) %>%
ungroup() %>%
filter(`Imputed values` == "+") %>%
mutate(value = -Inf) -> number.imputed.proteins
imputed.df$imputed.df.imputation.annotated %>%
ggplot(aes(x = value, fill = `Imputed values`),alpha = 0.5)+
theme_bw()+
geom_histogram()+
scale_fill_manual(values = RColorBrewer::brewer.pal(3,"Dark2")[1:2])+
facet_wrap(~ Experiment,ncol = ncol)+
geom_text(y = +Inf, aes(x = value, label = proteins), data = number.non.imputed.proteins,color = "#1B9E77",hjust = 1.2,vjust = 1.5)+
geom_text(y = +Inf, aes(x = value, label = proteins), data = number.imputed.proteins, color = "#D95F02",hjust = -0.2,vjust = 1.5)
}
### multiscatter
plot_multiscatter_lfq <- function(imputed.df, alpha = 0.25, order_vector = NULL){
if(is.null(order_vector)){
imputed.df %>%
gather(Experiment, value,-`Protein IDs`) %>%
spread(Experiment, value) %>%
select(-`Protein IDs`) %>%
ggpairs(upper = list(continuous = wrap("points", alpha = alpha)),
lower = list(continuous = wrap("cor", size = 5)))+
theme_bw() -> multiscatter
return(multiscatter)
}
if(!is.null(order_vector)){
imputed.df %>%
gather(Experiment, value,-`Protein IDs`) %>%
mutate(Experiment = factor(Experiment, levels = order_vector)) %>%
spread(Experiment, value) %>%
select(-`Protein IDs`) %>%
ggpairs(upper = list(continuous = wrap("points", alpha = alpha)),
lower = list(continuous = wrap("cor", size = 5)))+
theme_bw()-> multiscatter
return(multiscatter)
}
}
### dendrogram
cluster_dendrogram <- function(imputed.df){
imputed.df %>%
remove_rownames() %>%
column_to_rownames(var = "Protein IDs") %>%
as.data.frame() %>%
as.matrix() %>%
t() %>%
dist() %>%
hclust(.,method = "average") %>%
as.dendrogram()
}
### PCA
plot_PCA <- function(imputed.df, Experimental_design = Experimental.design,
filename = "PCA.pdf",
width = 20,
height = 22.5,
col.ind = NULL ){
if(!(is.null(col.ind)|is.character(col.ind))){stop("order.by should be NULL or a column name from Experimental_design")}
if(!is.null(col.ind)){
Experimental_design %>%
arrange_("Experiment") -> Experimental.design.temp
imputed.df %>%
gather(`LFQ intensity experiment`,value,-`Protein IDs`) %>%
left_join(Experimental.design.temp[,c("LFQ intensity experiment","Experiment")]) %>%
filter(!is.na(Experiment)) %>%
select(-`LFQ intensity experiment`) %>%
spread(key = Experiment, value = value) %>%
remove_rownames() %>%
column_to_rownames(var = "Protein IDs") %>%
t() %>%
as.data.frame() %>%
as_tibble() %>%
PCA(graph = FALSE) %>%
fviz_pca_ind(repel = TRUE,col.ind = factor(unlist(Experimental.design.temp[,col.ind])),mean.point = FALSE)
} else {
protein.data.imputed$imputed.df %>%
#imputed.df %>%
gather(`LFQ intensity experiment`,value,-`Protein IDs`) %>%
left_join(Experimental.design.temp[,c("LFQ intensity experiment","Experiment")]) %>%
filter(!is.na(Experiment)) %>%
select(-`LFQ intensity experiment`) %>%
spread(key = Experiment, value = value) %>%
remove_rownames() %>%
column_to_rownames(var = "Protein IDs") %>%
t() %>%
as.data.frame() %>%
as_tibble() %>%
PCA(graph = FALSE) %>%
fviz_pca_ind(repel = TRUE,mean.point = FALSE,palette = "RdBu")
}
}
### volcanoplot; significance
### add Inge and Maria's annotations
add_mito_dysf_annot <- function(protein.data,
mito.dysf.annot.Prot.ID = mito.dysf.annot.Prot.ID,
mito.dysf.annot.Gene.name = mito.dysf.annot.Gene.name){
protein.data %>%
left_join(mito.dysf.annot.Gene.name[,c("Gene name","Annotations; Mitochondrial dysfunction, PMID:29132502; Gene name")],
by = c("Gene names (primary )" = "Gene name")) %>%
left_join(mito.dysf.annot.Prot.ID[,c("Protein ID","Annotations; Mitochondrial dysfunction, PMID:29132502; Protein ID")],
by = c("Priority protein ID" = "Protein ID")) %>%
mutate(`Annotations; Mitochondrial dysfunction, PMID:29132502` = `Annotations; Mitochondrial dysfunction, PMID:29132502; Gene name`) %>%
mutate(`Annotations; Mitochondrial dysfunction, PMID:29132502` = ifelse(is.na(`Annotations; Mitochondrial dysfunction, PMID:29132502`),`Annotations; Mitochondrial dysfunction, PMID:29132502; Protein ID`,`Annotations; Mitochondrial dysfunction, PMID:29132502`)) %>%
select(-`Annotations; Mitochondrial dysfunction, PMID:29132502; Gene name`,
-`Annotations; Mitochondrial dysfunction, PMID:29132502; Protein ID`)
}
### Calculate enrichment based on logFC; calculating the proportion of logFC <0 from the logFC >0
limma_calculate_enrichment_fdr <- function(limma.result){
### the function assumes that this is a lima resunts and that it is ordered by increasing adj.P.Val
limma.result %>%
mutate(`Enrichment FDR` = NA) %>%
mutate(`Enrichment FDR` = as.numeric(`Enrichment FDR`)) -> temp.df
for(i in 1:nrow(temp.df)){
Signif.enriched <- numeric(length = 1)
Signif.deenriched <- numeric(length = 1)
temp.df[1:i,] -> temp.df.2
Signif.enriched <- nrow(temp.df.2[temp.df.2$logFC > 0,])
Signif.deenriched <- nrow(temp.df.2[temp.df.2$logFC < 0,])
temp.df$`Enrichment FDR`[i] <- Signif.deenriched/Signif.enriched
}
temp.df$`Enrichment FDR` <- ifelse(is.infinite(temp.df$`Enrichment FDR`),1,temp.df$`Enrichment FDR`)
temp.df
}
####
### annotate enrichment FDR
limma_annotate_enriched <- function(limma.result, fdr_cutoff = 0.05){
if(nrow(subset(limma.result,`Enrichment FDR` < fdr_cutoff)) > 0){
highest.adj.P.val <- max(unlist(limma.result$adj.P.Val[limma.result$`Enrichment FDR` <= fdr_cutoff]))
limma.result %>%
mutate(`Enriched` = ifelse(logFC>0 & adj.P.Val <= highest.adj.P.val,"+",""))
} else {
limma.result %>%
mutate(`Enriched` = "")
}
}
####
limma_annotate_enriched_P_value <- function(limma.result, fdr_cutoff = 0.05){
enriched_variable_name <- paste0("Enriched","_",fdr_cutoff)
if(nrow(subset(limma.result,`Enrichment FDR` < fdr_cutoff)) > 0){
highest.P.Value <- max(unlist(limma.result$P.Value[limma.result$`Enrichment FDR` <= fdr_cutoff]))
limma.result %>%
mutate(!! enriched_variable_name := ifelse(logFC>0 & P.Value <= highest.P.Value,"+",""))
} else {
limma.result %>%
mutate(!! enriched_variable_name := "")
}
}
####
limma_regulation_adjPVal <- function(x){
x %>%
mutate(regulation = ifelse(adj.P.Val < 0.05 & logFC > 0,"upregulation",
ifelse(adj.P.Val < 0.05 & logFC < 0,"downregulation","")))
}
####
substract_global_median <- function(x){
global.median <- median(x, na.rm = TRUE)
y <- x-global.median
y
}
### PCA
plot_PCA_res <- function(PCA.result, color_var = NULL, shape_var = NULL, label = TRUE, palette = "Dark2"){
if(label){
color_var <- enexpr(color_var)
shape_var <- enexpr(shape_var)
PCA.result$data %>%
ggplot(aes(x = x, y = y, color = !!color_var, shape = !!shape_var))+
geom_hline(yintercept = 0, linetype = "dashed")+
geom_vline(xintercept = 0, linetype = "dashed")+
geom_point()+
scale_color_brewer(palette = palette)+
theme_bw()+
theme(panel.border = element_blank())+
labs(x = PCA.result$labels$x, y = PCA.result$labels$y)+
geom_text_repel(aes(label = name))
}
if(!label){
color_var <- enexpr(color_var)
shape_var <- enexpr(shape_var)
PCA.result$data %>%
ggplot(aes(x = x, y = y, color = !!color_var, shape = !!shape_var))+
geom_hline(yintercept = 0, linetype = "dashed")+
geom_vline(xintercept = 0, linetype = "dashed")+
geom_point()+
scale_color_brewer(palette = palette)+
theme_bw()+
theme(panel.border = element_blank())+
labs(x = PCA.result$labels$x, y = PCA.result$labels$y)
}
}
## limma contrast
rs.extractLimmaContrasts = function( limmaobject ) {
outlist = list()
for (i in 1:ncol(limmaobject$contrasts)) {
outlist[colnames(limmaobject$contrasts)[i]] = list( topTable(limmaobject, coef=i, number=Inf, confint = T ) )
}
return(outlist)
}
###
my_scatterplot_char_var <- function(df, xvar, yvar, colorvar = NULL, axis_lim = NULL, alpha = 0.5, text_annot = NULL){
if(is.null(text_annot)){
ggplot(df, aes_string(xvar, yvar, color = colorvar))+
geom_point(alpha = alpha)+
geom_abline(slope = 1, intercept = 0, color = "grey",linetype = "dashed")+
geom_abline(slope = 1, intercept = 2, color = "red",linetype = "dashed")+
geom_abline(slope = 1, intercept = -2, color = "red",linetype = "dashed")+
geom_hline(yintercept = 0)+
geom_vline(xintercept = 0)+
scale_color_viridis(discrete = TRUE)+
theme_bw()+
coord_fixed(xlim = axis_lim, ylim = axis_lim)
}
if(!is.null(text_annot)){
df %>%
filter(`Gene names (primary )` %in% text_annot) -> temp_df
ggplot(df, aes_string(xvar, yvar, color = colorvar))+
geom_point(alpha = alpha)+
geom_abline(slope = 1, intercept = 0, color = "grey",linetype = "dashed")+
geom_abline(slope = 1, intercept = 2, color = "red",linetype = "dashed")+
geom_abline(slope = 1, intercept = -2, color = "red",linetype = "dashed")+
geom_hline(yintercept = 0)+
geom_vline(xintercept = 0)+
scale_color_viridis(discrete = TRUE)+
theme_bw()+
coord_fixed(xlim = axis_lim, ylim = axis_lim)+
geom_point(data = temp_df, color = "red")+
geom_text(data = temp_df, aes(label = `Gene names (primary )`), color = "red")
}
}
## ---- scatterplot_char_var ----
scatterplot_logFC <- function(df, xvar, yvar, colorvar = NULL, axis_lim = NULL, alpha = 0.5, text_annot = NULL){
if(is.null(text_annot)){
ggplot(df, aes_string(xvar, yvar, color = colorvar))+
geom_point(alpha = alpha)+
geom_abline(slope = 1, intercept = 0, color = "grey",linetype = "dashed")+
geom_hline(yintercept = 0)+
geom_vline(xintercept = 0)+
scale_color_viridis(discrete = TRUE)+
theme_bw()+
coord_fixed(xlim = axis_lim, ylim = axis_lim)
}
if(!is.null()){
df %>%
filter(`Gene names (primary )` %in% text_annot) -> temp_df
ggplot(df, aes_string(xvar, yvar, color = colorvar))+
geom_point(alpha = alpha)+
geom_abline(slope = 1, intercept = 0, color = "grey",linetype = "dashed")+
geom_hline(yintercept = 0)+
geom_vline(xintercept = 0)+
scale_color_viridis(discrete = TRUE)+
theme_bw()+
coord_fixed(xlim = axis_lim, ylim = axis_lim)+
geom_point(temp_df, color = "red")+
geom_text(temp_df, aes(label = `Gene names (primary )`))
}
}