Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#######################
####################### Functions
###
imputator <- function(x, seed = 5531, sd = 0.3, down_shift = 1.8){ # the imputator function substitutes missing values from normal disriution with mean = 0 and sd = sd and down shift of down_shift
set.seed(seed = seed)
x[is.na(x)] <- rnorm(n = length(x[is.na(x)]), mean = 0, sd = sd)- down_shift
return(x)
}
scale.reversor <- function(x){ # backtransform scaled data; from here: https://stackoverflow.com/questions/10287545/backtransform-scale-for-plotting
y <- x * attr(x, 'scaled:scale') + attr(x, 'scaled:center')
return(y)
}
###
inf.to.NA <- function(x){ # change the +/-inf calues to NAs
y <- ifelse(is.finite(x),x,NA)
return(y)
}
###
LFQ_data_imputator <- function(data.frame.for.imputation){
data.frame.for.imputation %>% # the data frame
mutate_at(vars(-contains("Protein IDs")),.funs = funs(log2)) %>% # log2 transform all columns, except for Protein IDs
mutate_at(vars(-contains("Protein IDs")),.funs =funs(inf.to.NA)) %>% # replace +inf values with NAs, except for Protein IDs
mutate_at(vars(-contains("Protein IDs")),.funs =funs(scale)) %>% # scale the columns, except for Protein IDs
mutate_at(vars(-contains("Protein IDs")),.funs =funs(imputator)) %>% # substitute the missing values in the columns from normal distribution, except for Protein IDs
mutate_at(vars(-contains("Protein IDs")),.funs = funs(scale.reversor)) -> LFQ.intensity.imputed
return(LFQ.intensity.imputed)
}
###
imputed_values_annotator <- function(non.imputed.data,imputed.data){ # compares two data frames with before and after imputation and
# marks with "+" the imputed valeus
non.imputed.data %>%
gather(`Experiment`,value,-`Protein IDs`) %>%
mutate(value = log2(value))-> non.imputed.data.long
imputed.data %>%
gather(`Experiment`,value,-`Protein IDs`) -> imputed.data.long
warning(paste0("All Protein IDs are the same order: ", all(non.imputed.data.long[,"Experiment"] == imputed.data.long[,"Experiment"])))
imputed.data.long %>%
mutate(`Imputed values` = ifelse(value == non.imputed.data.long$value,"","+")) -> imputed.data.long.imputation.indicated
return(imputed.data.long.imputation.indicated)
}
### add Uniprot priority ID
add_uniprot_prior_prot_id <- function(protein.data, protein.annotations){
protein.data %>%
unnest(`Peptide count (all)` = strsplit(`Peptide counts (all)`,";"),`Priority protein ID` = strsplit(`Protein IDs`,";")) %>%
mutate(`Peptide count (all)` = as.numeric(`Peptide count (all)`)) %>%
mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>%
left_join(protein.annotations[,c("Entry","Status","Date of creation")], by = c("Protein ID for merge" = "Entry")) %>%
group_by(id) %>%
arrange(desc(`Peptide count (all)`),
Status,
`Date of creation`,
`Priority protein ID`) %>%
filter(row_number() %in% 1) %>%
ungroup() %>%
select(-Status,-`Date of creation`, -`Protein ID for merge`, -id, -`Peptide count (all)`)
}
### add gene names (primary)
add_uniprot_gene_name_primary <- function(protein.data, protein.annotations){
protein.data %>%
mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>%
left_join(protein.annotations[,c("Entry","Gene names (primary )")], by = c("Protein ID for merge" = "Entry")) %>%
select(-`Protein ID for merge`)
}
### add gene names (primary)
add_uniprot_gene_names_synonym <- function(protein.data, protein.annotations){
protein.data %>%
mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>%
left_join(protein.annotations[,c("Entry","Gene names (synonym )")], by = c("Protein ID for merge" = "Entry")) %>%
select(-`Protein ID for merge`)
}
### add gene names (ORF)
add_uniprot_gene_names_orf <- function(protein.data, protein.annotations){
protein.data %>%
mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>%
left_join(protein.annotations[,c("Entry","Gene names (ORF )")], by = c("Protein ID for merge" = "Entry")) %>%
select(-`Protein ID for merge`)
}
### add ordered locus
add_uniprot_ordered_locus <- function(protein.data, protein.annotations){
protein.data %>%
mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>%
left_join(protein.annotations[,c("Entry","Gene names (ordered locus )")], by = c("Protein ID for merge" = "Entry")) %>%
select(-`Protein ID for merge`)
}
### add cross-ref Flybase
add_uniprot_flybase <- function(protein.data, protein.annotations){
protein.data %>%
mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>%
left_join(protein.annotations[,c("Entry","Cross-reference (Flybase)")], by = c("Protein ID for merge" = "Entry")) %>%
select(-`Protein ID for merge`)
}
### add uniprot protein names from fasta
add_uniprot_protein_name_from_fasta <- function(protein.data, protein.names){
protein.data %>%
left_join(protein.names[,c("Uniprot ID","Protein name")], by = c("Priority protein ID" = "Uniprot ID"))
}
### add mitocarta annotations
add_mcarta2_mouse_annot <- function(protein.data.annotated,mitocarta.data){
mitocarta.data %>%
select(Symbol,Description,MitoCarta2_List) -> mitocarta.symbol
mitocarta.data %>%
select(Synonyms,Description,MitoCarta2_List) %>%
unnest(Synonyms = strsplit(Synonyms,"\\|"))-> mitocarta.synonym
# Gene names (primary ) -> Symbol
protein.data.annotated %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names (primary )" = "Symbol")) %>% filter(!is.na(MitoCarta2_List)) ->
protein.data.annotated.mito.0
protein.data.annotated %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names (primary )" = "Symbol")) %>% filter(is.na(MitoCarta2_List)) ->
protein.data.annotated.mito.missing
# Gene names (primary ) -> Synonym
protein.data.annotated.mito.missing %>%
select(-MitoCarta2_List,-Description) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by = c("Gene names (primary )" = "Synonyms")) %>%
filter(!is.na(MitoCarta2_List)) -> protein.data.annotated.mito.1
protein.data.annotated.mito.missing %>%
select(-MitoCarta2_List,-Description) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by = c("Gene names (primary )" = "Synonyms")) %>%
filter(is.na(MitoCarta2_List)) -> protein.data.annotated.mito.missing
# Gene names (synonym ) -> Symbol
protein.data.annotated.mito.missing %>%
select(-MitoCarta2_List,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names (synonym )" = "Symbol")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(!is.na(MitoCarta2_List)) %>%
ungroup() -> protein.data.annotated.mito.2
protein.data.annotated.mito.missing %>%
select(-MitoCarta2_List,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names (synonym )" = "Symbol")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(all(is.na(MitoCarta2_List))) %>%
ungroup() -> protein.data.annotated.mito.missing
# Gene names (synonym ) -> Synonym
protein.data.annotated.mito.missing %>%
select(-MitoCarta2_List,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by = c("Gene names (synonym )" = "Synonyms")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(!is.na(MitoCarta2_List)) %>%
ungroup() -> protein.data.annotated.mito.3
protein.data.annotated.mito.missing %>%
select(-MitoCarta2_List,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by = c("Gene names (synonym )" = "Synonyms")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(all(is.na(MitoCarta2_List))) %>%
ungroup()-> protein.data.annotated.mito.missing
protein.data.annotated.mito.0 %>%
rbind(protein.data.annotated.mito.1) %>%
rbind(protein.data.annotated.mito.2) %>%
rbind(protein.data.annotated.mito.3) %>%
rbind(protein.data.annotated.mito.missing) %>%
unique() %>%
select(`Gene names (primary )`) %>%
table() %>%
as_tibble() %>%
filter(`n` > 1) -> duplicated.mitoannotations
protein.data.annotated.mito.0 %>%
filter(!is.na(MitoCarta2_List)) %>%
rbind(protein.data.annotated.mito.1) %>%
rbind(protein.data.annotated.mito.2) %>%
rbind(protein.data.annotated.mito.3) %>%
rbind(protein.data.annotated.mito.missing) %>%
unique() %>%
select(-`Description`) %>%
unique()
}
### add Cross-refe (GeneID)
add_uniprot_GeneID <- function(protein.data, protein.annotations){
protein.data %>%
mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>%
left_join(protein.annotations[,c("Entry","Cross-reference (GeneID)")], by = c("Protein ID for merge" = "Entry")) %>%
select(-`Protein ID for merge`)
}
add_mcarta2_human_annot <- function(protein.data.annotated,mitocarta.data){
mitocarta.data %>%
select(Symbol,Description,MCARTA2_LIST) -> mitocarta.symbol
mitocarta.data %>%
select(Synonyms,Description,MCARTA2_LIST) %>%
unnest(Synonyms = strsplit(Synonyms,"\\|"))-> mitocarta.synonym
# Gene names (primary ) -> Symbol
protein.data.annotated %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names (primary )" = "Symbol")) %>% filter(!is.na(MCARTA2_LIST)) ->
protein.data.annotated.mito.0
protein.data.annotated %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names (primary )" = "Symbol")) %>% filter(is.na(MCARTA2_LIST)) ->
protein.data.annotated.mito.missing
# Gene names (primary ) -> Synonym
protein.data.annotated.mito.missing %>%
select(-MCARTA2_LIST,-Description) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by = c("Gene names (primary )" = "Synonyms")) %>%
filter(!is.na(MCARTA2_LIST)) -> protein.data.annotated.mito.1
protein.data.annotated.mito.missing %>%
select(-MCARTA2_LIST,-Description) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by = c("Gene names (primary )" = "Synonyms")) %>%
filter(is.na(MCARTA2_LIST)) -> protein.data.annotated.mito.missing
# Gene names (synonym ) -> Symbol
protein.data.annotated.mito.missing %>%
select(-MCARTA2_LIST,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names (synonym )" = "Symbol")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(!is.na(MCARTA2_LIST)) %>%
ungroup() -> protein.data.annotated.mito.2
protein.data.annotated.mito.missing %>%
select(-MCARTA2_LIST,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names (synonym )" = "Symbol")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(all(is.na(MCARTA2_LIST))) %>%
ungroup() -> protein.data.annotated.mito.missing
# Gene names (synonym ) -> Synonym
protein.data.annotated.mito.missing %>%
select(-MCARTA2_LIST,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by = c("Gene names (synonym )" = "Synonyms")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(!is.na(MCARTA2_LIST)) %>%
ungroup() -> protein.data.annotated.mito.3
protein.data.annotated.mito.missing %>%
select(-MCARTA2_LIST,-Description) %>%
mutate(`Gene names (synonym )` = strsplit(`Gene names (synonym )`, " ")) %>%
mutate(`Gene names (synonym )` = ifelse(`Gene names (synonym )` %in% "character(0)","",`Gene names (synonym )`)) %>%
unnest(`Gene names (synonym )`) %>%
mutate(`Gene names (synonym )` = gsub(";","",`Gene names (synonym )`)) %>%
left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by = c("Gene names (synonym )" = "Synonyms")) %>%
group_by(`Gene names (primary )`,`Priority protein ID`) %>%
mutate(`Gene names (synonym )` = paste(`Gene names (synonym )`, collapse = " ")) %>%
unique() %>%
filter(all(is.na(MCARTA2_LIST))) %>%
ungroup()-> protein.data.annotated.mito.missing
protein.data.annotated.mito.0 %>%
rbind(protein.data.annotated.mito.1) %>%
rbind(protein.data.annotated.mito.2) %>%
rbind(protein.data.annotated.mito.3) %>%
rbind(protein.data.annotated.mito.missing) %>%
unique() %>%
select(`Gene names (primary )`) %>%
table() %>%
as_tibble() %>%
filter(`n` > 1) -> duplicated.mitoannotations
protein.data.annotated.mito.0 %>%
filter(!is.na(MCARTA2_LIST)) %>%
rbind(protein.data.annotated.mito.1) %>%
rbind(protein.data.annotated.mito.2) %>%
rbind(protein.data.annotated.mito.3) %>%
rbind(protein.data.annotated.mito.missing) %>%
unique() %>%
select(-`Description`) %>%
unique()
}
### Check duplicated mitoannotations
mitoannot_duplications <- function(protein.data.annotated){
group_by(`Protein IDs`, `Gene names (primary )`) %>%
table() %>%
as_tibble() %>%
filter(`n` > 1)
}
#### Numbers of quantified proteins LFQ
plot_num_lfq_quant_prot <- function(df, ylim = c(0,2000), filename = "QC_Quantified_proteins.pdf", order_vector = NULL){
if(is.null(order_vector)){
df %>%
select(-matches("Protein IDs")) %>%
gather(Experiment, value) %>%
group_by(Experiment) %>%
summarise(`Quantified proteins` = sum(value>0, na.rm = TRUE)) %>%
ggplot(aes(x = Experiment, y = `Quantified proteins`)) +
geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+
theme_bw()+
theme(axis.text.x = element_text(angle = 90,size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14))+
geom_text(aes(label = `Quantified proteins`), color = "red", hjust = -0.5)+
coord_flip(ylim = ylim)+
ggsave(filename = filename, path = "results", width = 105, height = 148, units = "mm")
}
if(!is.null(order_vector)){
df %>%
select(-matches("Protein IDs")) %>%
gather(Experiment, value) %>%
group_by(Experiment) %>%
summarise(`Quantified proteins` = sum(value>0, na.rm = TRUE)) %>%
mutate(Experiment = factor(Experiment, levels = order_vector)) %>%
ggplot(aes(x = Experiment, y = `Quantified proteins`)) +
geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+
theme_bw()+
theme(axis.text.x = element_text(angle = 90,size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14))+
geom_text(aes(label = `Quantified proteins`), color = "red", hjust = -0.5)+
coord_flip(ylim = ylim)+
ggsave(filename = filename, path = "results", width = 105, height = 148, units = "mm")
}
}
plot_num_tmt_quant_prot <- function(x, Experimental_design = Experimental.design, ylim = c(0,2000)){
x %>%
select(matches("Reporter intensity corrected")) %>%
gather(`TMT reporter intensity experiment`, value) %>%
left_join(Experimental_design[,c("TMT reporter intensity experiment","Experiment")]) %>%
group_by(Experiment) %>%
summarise(`Quantified proteins` = sum(value>0, na.rm = TRUE)) %>%
mutate(Experiment = factor(Experiment, levels = rev(Experimental_design$Experiment))) %>%
ggplot(aes(x = Experiment, y = `Quantified proteins`)) +
geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+
theme_bw()+
theme(axis.text.x = element_text(angle = 90,size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14))+
geom_text(aes(label = `Quantified proteins`), color = "red", hjust = -0.5)+
coord_flip(ylim = ylim)+
ggsave(filename = "Quantified_proteins.pdf", path = "results", width = 105, height = 148, units = "mm")
}
#### Numbers of quantified proteins SILAC
plot_num_SILAC_quant_prot <- function(x, Experimental_design = Experimental.design, ylim = c(0,2000), order.by = NULL, filename = "Quantified_proteins.pdf"){
if(!(is.null(order.by)|is.character(order.by))){stop("order.by should be NULL or a column name from Experimental_design")}
if(!is.null(order.by)){
Experimental_design %>%
arrange_(.dots = order.by) -> Experimental.design.temp
x %>%
select(`Protein IDs`,matches("Ratio H/L normalized ")) %>%
gather(`SILAC ratio experiment`, value, -`Protein IDs`) %>%
left_join(Experimental_design[,c("SILAC ratio experiment","Experiment")]) %>%
mutate(`Experiment` = factor(`Experiment`, levels = rev(Experimental.design.temp$`Experiment`))) %>%
group_by(Experiment) %>%
summarise(`Quantified proteins` = sum(value>-Inf, na.rm = TRUE)) %>%
ggplot(aes(x = Experiment, y = `Quantified proteins`)) +
geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+
theme_bw()+
theme(axis.text.x = element_text(angle = 90,size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14))+
geom_text(aes(label = `Quantified proteins`), color = "red", hjust = -0.5)+
coord_flip(ylim = ylim)+
ggsave(filename = filename, path = "results", width = 105, height = 148, units = "mm")
} else {
x %>%
select(`Protein IDs`,matches("Ratio H/L normalized ")) %>%
gather(`SILAC ratio experiment`, value, -`Protein IDs`) %>%
left_join(Experimental_design[,c("SILAC ratio experiment","Experiment")]) %>%
group_by(Experiment) %>%
summarise(`Quantified proteins` = sum(value>-Inf, na.rm = TRUE)) %>%
mutate(Experiment = factor(Experiment, levels = rev(Experimental_design$Experiment))) %>%
ggplot(aes(x = Experiment, y = `Quantified proteins`)) +
geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+
theme_bw()+
theme(axis.text.x = element_text(angle = 90,size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14))+
geom_text(aes(label = `Quantified proteins`), color = "red", hjust = -0.5)+
coord_flip(ylim = ylim)+
ggsave(filename = filename, path = "results", width = 105, height = 148, units = "mm")
}
}
### MA plot
plot_ma <- function(x, filename = "MA_plot.pdf"){
x %>%
as_tibble() %>%
mutate(Significant = ifelse(adj.P.Val < 0.05,"+","")) -> temp.df
temp.df %>%
ggplot(aes(x = AveExpr, y = logFC, color = Significant))+
scale_color_manual(values = c("grey","red"))+
theme_bw()+
coord_cartesian(ylim = c(-ceiling(max(abs(x$logFC),na.rm = TRUE)),ceiling(max(abs(x$logFC),na.rm = TRUE))))+
geom_point(data = subset(temp.df, Significant %in% ""),alpha = 0.5)+
geom_point(data = subset(temp.df, Significant %in% "+"),alpha = 0.5)+
ggsave(path = "results",filename = filename, width = 20, height = 15, units = "cm")
}
### add row names as Protein IDs
row_names_to_protein_IDs <- function(x){
x %>%
as.data.frame() %>%
as_tibble() %>%
rownames_to_column(var = "Protein IDs")
}
### row names to Proteins and extract Protein IDs, logFC, P.Value and adj.P.Val
limma_results_select_ProtIDs_logFC_PVal_adjPVal <- function(x){
x %>%
as.data.frame() %>%
as_tibble() %>%
rownames_to_column("Protein IDs") %>%
select(`Protein IDs`,logFC,P.Value,adj.P.Val)
}
limma_results_select_ProtIDs_logFC_CI_PVal_adjPVal <- function(x){
x %>%
as.data.frame() %>%
as_tibble() %>%
rownames_to_column("Protein IDs") %>%
select(`Protein IDs`,logFC, CI.L, CI.R, P.Value, adj.P.Val)
}
limma_results_select_ProtIDs_logFC_CI_AveExpr_PVal_adjPVal <- function(x){
x %>%
as.data.frame() %>%
as_tibble() %>%
rownames_to_column("Protein IDs") %>%
select(`Protein IDs`,logFC, CI.L, CI.R, AveExpr, P.Value, adj.P.Val)
}
### row names to Proteins and extract Protein IDs, logFC, P.Value and adj.P.Val
limma_results_select_GeneID_logFC_PVal_adjPVal <- function(x){
x %>%
as.data.frame() %>%
as_tibble() %>%
rownames_to_column("Cross-reference (GeneID)") %>%
select(`Cross-reference (GeneID)`,logFC,P.Value,adj.P.Val)
}
### row names to Proteins and extract Protein IDs, logFC, P.Value and adj.P.Val
limma_results_select_ProtIDs_logFC_adjPVal <- function(x){
x %>%
as.data.frame() %>%
as_tibble() %>%
rownames_to_column("Protein IDs") %>%
select(`Protein IDs`,logFC,adj.P.Val)
}
### rename all but Protein IDs
limma_rename_all_but_proteinIDs <- function(x, chr_to_add){
if(!is.character(chr_to_add)){"Stop, chr_to_add has to be a character vector"}
x %>%
rename_at(vars(-matches("Protein IDs")), funs(paste0(.,chr_to_add)))
}
limma_rename_all_but_GeneIDs <- function(x, chr_to_add){
if(!is.character(chr_to_add)){"Stop, chr_to_add has to be a character vector"}
x %>%
rename_at(vars(-matches("Cross-reference \\(GeneID\\)")), funs(paste0(.,chr_to_add)))
}
### Significance according to adj.P.Value
limma_significant_adjPVal <- function(x, cutoff = 0.05, indicator = "+"){
x %>%
mutate(Significant = ifelse(adj.P.Val < cutoff, indicator,""))
}
### Significant enrichment according to adj.P.Value and logFC
limma_signif_enriched_adjPVal <- function(x,indicator = "+"){
x %>%
mutate(Enriched = ifelse(adj.P.Val < 0.05 & logFC > 0,indicator,""))
}
### Read uniprot annotation
get_uniprot_annotations_url <- function(x = "parameters.txt"){
parameters <- read.delim(paste0("data/",x),stringsAsFactors = FALSE)
fasta.file <- subset(parameters,grepl("_Mus_musculus|_Homo_sapiens_|_Saccharomyces_cerevisiae_|_Caenorhabditis_elegans_|_Drosophila_melanogaster_",Value))
fasta.file %>%
select(Value) %>%
unnest(Value = strsplit(Value,";")) %>%
filter(grepl("_Mus_musculus|_Homo_sapiens_|_Saccharomyces_cerevisiae_|_Caenorhabditis_elegans_|_Drosophila_melanogaster_",Value)) %>%
mutate(Value = gsub("^.*\\\\","",Value)) %>%
mutate(Value = gsub("\\.fasta","",Value)) -> match.key
return(match.key)
}
#### imputation of LFQ data and imputation
LFQ_data_imputator_annotator <- function(data.frame.for.imputation){
data.frame.for.imputation %>%
LFQ_data_imputator() -> data.frame.for.imputation.imputed
data.frame.for.imputation %>%
imputed_values_annotator(imputed.data = data.frame.for.imputation.imputed) -> data.frame.for.imputation.imputed.long.annotated
df.list <- list(data.frame.for.imputation.imputed,
data.frame.for.imputation.imputed.long.annotated)
names(df.list) <- c("imputed.df","imputed.df.imputation.annotated")
return(df.list)
}
#### histogram of LFQ imputed values
plot_lfq_imputation_histograms <- function(imputed.df,
filename = "QC_histogram_imputation.pdf",
height = 20,
width = 28.7,
ncol = 4,
order_vector = NULL,
show.default.arguments = "no"){
# numbers of non-imputed proteins
imputed.df$imputed.df.imputation.annotated %>%
group_by(Experiment,`Imputed values`) %>%
summarise(proteins = n()) %>%
ungroup() %>%
filter(`Imputed values` == "") %>%
mutate(value = +Inf) -> number.non.imputed.proteins
# numbers of imputed proteins
imputed.df$imputed.df.imputation.annotated %>%
group_by(Experiment,`Imputed values`) %>%
summarise(proteins = n()) %>%
ungroup() %>%
filter(`Imputed values` == "+") %>%
mutate(value = -Inf) -> number.imputed.proteins
if(is.null(order_vector)){
imputed.df$imputed.df.imputation.annotated %>%
ggplot(aes(x = value, fill = `Imputed values`),alpha = 0.5)+
theme_bw()+
geom_histogram()+
scale_fill_manual(values = RColorBrewer::brewer.pal(3,"Dark2")[1:2])+
facet_wrap(~ Experiment,ncol = ncol)+
geom_text(y = +Inf, aes(x = value, label = proteins), data = number.non.imputed.proteins,color = "#1B9E77",hjust = 1.2,vjust = 1.5)+
geom_text(y = +Inf, aes(x = value, label = proteins), data = number.imputed.proteins, color = "#D95F02",hjust = -0.2,vjust = 1.5)+
ggsave(filename = filename, path = "results", height = height, width = width, units = "cm", limitsize = FALSE)
}
if(!is.null(order_vector)){
number.non.imputed.proteins %>%
mutate(Experiment = factor(Experiment,levels = order_vector)) -> number.non.imputed.proteins
number.imputed.proteins %>%
mutate(Experiment = factor(Experiment,levels = order_vector)) -> number.imputed.proteins
imputed.df$imputed.df.imputation.annotated %>%
mutate(Experiment = factor(Experiment,levels = order_vector)) %>%
ggplot(aes(x = value, fill = `Imputed values`),alpha = 0.5)+
theme_bw()+
geom_histogram()+
scale_fill_manual(values = RColorBrewer::brewer.pal(3,"Dark2")[1:2])+
facet_wrap(~ Experiment,ncol = ncol)+
geom_text(y = +Inf, aes(x = value, label = proteins), data = number.non.imputed.proteins,color = "#1B9E77",hjust = 1.2,vjust = 1.5)+
geom_text(y = +Inf, aes(x = value, label = proteins), data = number.imputed.proteins, color = "#D95F02",hjust = -0.2,vjust = 1.5)+
ggsave(filename = filename, path = "results", height = height, width = width, units = "cm", limitsize = FALSE)
}
}
### multiscatter
plot_multiscatter_lfq <- function(imputed.df, alpha = 0.25, order_vector = NULL){
if(is.null(order_vector)){
imputed.df %>%
gather(Experiment, value,-`Protein IDs`) %>%
spread(Experiment, value) %>%
select(-`Protein IDs`) %>%
ggpairs(upper = list(continuous = wrap("points", alpha = alpha)),
lower = list(continuous = wrap("cor", size = 5)))+
theme_bw() -> multiscatter
return(multiscatter)
}
if(!is.null(order_vector)){
imputed.df %>%
gather(Experiment, value,-`Protein IDs`) %>%
mutate(Experiment = factor(Experiment, levels = order_vector)) %>%
spread(Experiment, value) %>%
select(-`Protein IDs`) %>%
ggpairs(upper = list(continuous = wrap("points", alpha = alpha)),
lower = list(continuous = wrap("cor", size = 5)))+
theme_bw()-> multiscatter
return(multiscatter)
}
}
### dendrogram
cluster_dendrogram <- function(imputed.df){
imputed.df %>%
remove_rownames() %>%
column_to_rownames(var = "Protein IDs") %>%
as.data.frame() %>%
as.matrix() %>%
t() %>%
dist() %>%
hclust(.,method = "average") %>%
as.dendrogram()
}
### PCA
plot_PCA <- function(imputed.df, Experimental_design = Experimental.design,
filename = "PCA.pdf",
width = 20,
height = 22.5,
col.ind = NULL ){
if(!(is.null(col.ind)|is.character(col.ind))){stop("order.by should be NULL or a column name from Experimental_design")}
if(!is.null(col.ind)){
Experimental_design %>%
arrange_("Experiment") -> Experimental.design.temp
imputed.df %>%
gather(`LFQ intensity experiment`,value,-`Protein IDs`) %>%
left_join(Experimental.design.temp[,c("LFQ intensity experiment","Experiment")]) %>%
filter(!is.na(Experiment)) %>%
select(-`LFQ intensity experiment`) %>%
spread(key = Experiment, value = value) %>%
remove_rownames() %>%
column_to_rownames(var = "Protein IDs") %>%
t() %>%
as.data.frame() %>%
as_tibble() %>%
PCA(graph = FALSE) %>%
fviz_pca_ind(repel = TRUE,col.ind = factor(unlist(Experimental.design.temp[,col.ind])),mean.point = FALSE)
ggsave(path = "results",filename = filename, width = width, height = height, units = "cm")
} else {
protein.data.imputed$imputed.df %>%
#imputed.df %>%
gather(`LFQ intensity experiment`,value,-`Protein IDs`) %>%
left_join(Experimental.design.temp[,c("LFQ intensity experiment","Experiment")]) %>%
filter(!is.na(Experiment)) %>%
select(-`LFQ intensity experiment`) %>%
spread(key = Experiment, value = value) %>%
remove_rownames() %>%
column_to_rownames(var = "Protein IDs") %>%
t() %>%
as.data.frame() %>%
as_tibble() %>%
PCA(graph = FALSE) %>%
fviz_pca_ind(repel = TRUE,mean.point = FALSE,palette = "RdBu")
ggsave(path = "results",filename = filename, width = width, height = height, units = "cm")
}
}
### volcanoplot; significance
plot_volcanoplot_significant <- function(limma.results ,path = "results",filename = "volcanoplot.pdf", xaxis.lab = "logFC"){
if(!(is.null(xaxis.lab)|is.character(xaxis.lab))){stop("renamelogFC has to be NULL or a character vector of length 1")}
if(!("Gene names (primary )" %in% names(limma.results))){stop("Gene names (primary ) is missing")}
limma.results %>%
ggplot(aes(x = `logFC`, y = -log10(adj.P.Val), color = Significant))+
geom_point(alpha = 0.5)+
scale_color_manual(values = c("grey","red"))+
coord_cartesian(xlim = c(-ceiling(max(abs(limma.results$logFC),na.rm = TRUE)),ceiling(max(abs(limma.results$logFC),na.rm = TRUE))))+
theme_bw()+
geom_text_repel(data = subset(limma.results, Significant %in% "+"),
aes(label = `Gene names (primary )`),
color = "black")+
labs(x = xaxis.lab)+
ggsave(path = path,filename = filename, width = 30, height = 20, units = "cm")
}
### volcanoplot; significance cutoff
plot_volcanoplot_adjpval.cutoff <- function(limma.results, path = "results", filename = "volcanoplot.pdf", xaxis.lab = "logFC", adj.P.Val_cutoff = 0.05){
if(!(is.null(xaxis.lab)|is.character(xaxis.lab))){stop("renamelogFC has to be NULL or a character vector of length 1")}
if(!("Gene names (primary )" %in% names(limma.results))){stop("Gene names (primary ) is missing")}
limma.results %>%
ggplot(aes(x = `logFC`, y = -log10(adj.P.Val), color = Significant))+
geom_point(alpha = 0.5)+
scale_color_manual(values = c("grey","red"))+
coord_cartesian(xlim = c(-ceiling(max(abs(limma.results$logFC),na.rm = TRUE)),ceiling(max(abs(limma.results$logFC),na.rm = TRUE))))+
theme_bw()+
geom_text_repel(data = subset(limma.results, adj.P.Val < adj.P.Val_cutoff),
aes(label = `Gene names (primary )`),
color = "black")+
labs(x = xaxis.lab)+
ggsave(path = path, filename = filename, width = 30, height = 20, units = "cm")
}
plot_volcanoplot_pval.cutoff <- function(limma.results, path = "results", filename = "volcanoplot.pdf", xaxis.lab = "logFC", P.Val_cutoff = 0.05, xlim = NULL){
if(!(is.null(xaxis.lab)|is.character(xaxis.lab))){stop("renamelogFC has to be NULL or a character vector of length 1")}
if(!("Gene names (primary )" %in% names(limma.results))){stop("Gene names (primary ) is missing")}
if(is.null(xlim)){
limma.results %>%
ggplot(aes(x = `logFC`, y = -log10(P.Value), color = Significant))+
geom_point(alpha = 0.5)+
scale_color_manual(values = c("grey","red"))+
coord_cartesian(xlim = c(-ceiling(max(abs(limma.results$logFC),na.rm = TRUE)),ceiling(max(abs(limma.results$logFC),na.rm = TRUE))))+
theme_bw()+
geom_text_repel(data = subset(limma.results, P.Value < P.Val_cutoff),
aes(label = `Gene names (primary )`),
color = "black")+
labs(x = xaxis.lab)+
ggsave(path = path, filename = filename, width = 30, height = 20, units = "cm")
} else {
limma.results %>%
ggplot(aes(x = `logFC`, y = -log10(P.Value), color = Significant))+
geom_point(alpha = 0.5)+
scale_color_manual(values = c("grey","red"))+
coord_cartesian(xlim = xlim)+
theme_bw()+
geom_text_repel(data = subset(limma.results, P.Value < P.Val_cutoff),
aes(label = `Gene names (primary )`),
color = "black")+
labs(x = xaxis.lab)+
ggsave(path = path, filename = filename, width = 30, height = 20, units = "cm")
}
}
### add Inge and Maria's annotations
add_mito_dysf_annot <- function(protein.data,
mito.dysf.annot.Prot.ID = mito.dysf.annot.Prot.ID,
mito.dysf.annot.Gene.name = mito.dysf.annot.Gene.name){
protein.data %>%
left_join(mito.dysf.annot.Gene.name[,c("Gene name","Annotations; Mitochondrial dysfunction, PMID:29132502; Gene name")],
by = c("Gene names (primary )" = "Gene name")) %>%
left_join(mito.dysf.annot.Prot.ID[,c("Protein ID","Annotations; Mitochondrial dysfunction, PMID:29132502; Protein ID")],
by = c("Priority protein ID" = "Protein ID")) %>%
mutate(`Annotations; Mitochondrial dysfunction, PMID:29132502` = `Annotations; Mitochondrial dysfunction, PMID:29132502; Gene name`) %>%
mutate(`Annotations; Mitochondrial dysfunction, PMID:29132502` = ifelse(is.na(`Annotations; Mitochondrial dysfunction, PMID:29132502`),`Annotations; Mitochondrial dysfunction, PMID:29132502; Protein ID`,`Annotations; Mitochondrial dysfunction, PMID:29132502`)) %>%
select(-`Annotations; Mitochondrial dysfunction, PMID:29132502; Gene name`,
-`Annotations; Mitochondrial dysfunction, PMID:29132502; Protein ID`)
}
### Calculate enrichment based on logFC; calculating the proportion of logFC <0 from the logFC >0
limma_calculate_enrichment_fdr <- function(limma.result){
### the function assumes that this is a lima resunts and that it is ordered by increasing adj.P.Val
limma.result %>%
mutate(`Enrichment FDR` = NA) %>%
mutate(`Enrichment FDR` = as.numeric(`Enrichment FDR`)) -> temp.df
for(i in 1:nrow(temp.df)){
Signif.enriched <- numeric(length = 1)
Signif.deenriched <- numeric(length = 1)
temp.df[1:i,] -> temp.df.2
Signif.enriched <- nrow(temp.df.2[temp.df.2$logFC > 0,])
Signif.deenriched <- nrow(temp.df.2[temp.df.2$logFC < 0,])
temp.df$`Enrichment FDR`[i] <- Signif.deenriched/Signif.enriched
}
temp.df$`Enrichment FDR` <- ifelse(is.infinite(temp.df$`Enrichment FDR`),1,temp.df$`Enrichment FDR`)
temp.df
}
####
### annotate enrichment FDR
limma_annotate_enriched <- function(limma.result, fdr_cutoff = 0.05){
if(nrow(subset(limma.result,`Enrichment FDR` < fdr_cutoff)) > 0){
highest.adj.P.val <- max(unlist(limma.result$adj.P.Val[limma.result$`Enrichment FDR` <= fdr_cutoff]))
limma.result %>%
mutate(`Enriched` = ifelse(logFC>0 & adj.P.Val <= highest.adj.P.val,"+",""))
} else {
limma.result %>%
mutate(`Enriched` = "")
}
}
####
limma_annotate_enriched_P_value <- function(limma.result, fdr_cutoff = 0.05){
enriched_variable_name <- paste0("Enriched","_",fdr_cutoff)
if(nrow(subset(limma.result,`Enrichment FDR` < fdr_cutoff)) > 0){
highest.P.Value <- max(unlist(limma.result$P.Value[limma.result$`Enrichment FDR` <= fdr_cutoff]))
limma.result %>%
mutate(!! enriched_variable_name := ifelse(logFC>0 & P.Value <= highest.P.Value,"+",""))
} else {
limma.result %>%
mutate(!! enriched_variable_name := "")
}
}
####
limma_regulation_adjPVal <- function(x){
x %>%
mutate(Regulation = ifelse(adj.P.Val < 0.05 & logFC > 0,"Up-regulated",
ifelse(adj.P.Val < 0.05 & logFC < 0,"Down-regulated","")))
}
####
substract_global_median <- function(x){
global.median <- median(x, na.rm = TRUE)
y <- x-global.median
y
}
### limma row names to Protein IDs
limma_rownames_proteinIDs <- function(x){
x %>%
as.data.frame() %>%
as_tibble() %>%
rownames_to_column("Protein IDs")
}
plot_PCA_res <- function(PCA.result, color_var = NULL, shape_var = NULL, label = TRUE, palette = "Dark2",filename = "QC_PCA_plot.pdf"){
if(label){
color_var <- enexpr(color_var)
shape_var <- enexpr(shape_var)
PCA.result$data %>%
left_join(Experimental_design, by = c("name" = "Experiment")) %>%
ggplot(aes(x = x, y = y, color = !!color_var, shape = !!shape_var))+
geom_hline(yintercept = 0, linetype = "dashed")+
geom_vline(xintercept = 0, linetype = "dashed")+
geom_point()+
scale_color_brewer(palette = palette)+
theme_bw()+
theme(panel.border = element_blank())+
labs(x = PCA.result$labels$x, y = PCA.result$labels$y)+
geom_text_repel(aes(label = name))+
ggsave(filename, path = "results", width = 22.5, height = 20, units = "cm",useDingbats = FALSE)
}
if(!label){
color_var <- enexpr(color_var)
shape_var <- enexpr(shape_var)
PCA.result$data %>%
left_join(Experimental_design, by = c("name" = "Experiment")) %>%
ggplot(aes(x = x, y = y, color = !!color_var, shape = !!shape_var))+
geom_hline(yintercept = 0, linetype = "dashed")+
geom_vline(xintercept = 0, linetype = "dashed")+
geom_point()+
scale_color_brewer(palette = palette)+
theme_bw()+
theme(panel.border = element_blank())+
labs(x = PCA.result$labels$x, y = PCA.result$labels$y)+
ggsave(filename, path = "results", width = 22.5, height = 20, units = "cm",useDingbats = FALSE)
}
}
rs.extractLimmaContrasts = function( limmaobject ) {
outlist = list()
for (i in 1:ncol(limmaobject$contrasts)) {
outlist[colnames(limmaobject$contrasts)[i]] = list( topTable(limmaobject, coef=i, number=Inf, confint = T ) )
}
return(outlist)
}
###
my_scatterplot_char_var <- function(df, xvar, yvar, colorvar = NULL, axis_lim = NULL, alpha = 0.5, text_annot = NULL){
if(is.null(text_annot)){
ggplot(df, aes_string(xvar, yvar, color = colorvar))+
geom_point(alpha = alpha)+
geom_abline(slope = 1, intercept = 0, color = "grey",linetype = "dashed")+
geom_abline(slope = 1, intercept = 2, color = "red",linetype = "dashed")+
geom_abline(slope = 1, intercept = -2, color = "red",linetype = "dashed")+
geom_hline(yintercept = 0)+
geom_vline(xintercept = 0)+
scale_color_viridis(discrete = TRUE)+
theme_bw()+
coord_fixed(xlim = axis_lim, ylim = axis_lim)+
ggsave(filename = paste0("Scatter_",xvar,"__","vs","__",yvar,".pdf"),path = "results",width = 22.5, height = 20, units = "cm", useDingbats = FALSE)
}
if(!is.null(text_annot)){
df %>%
filter(`Gene names (primary )` %in% text_annot) -> temp_df
ggplot(df, aes_string(xvar, yvar, color = colorvar))+
geom_point(alpha = alpha)+
geom_abline(slope = 1, intercept = 0, color = "grey",linetype = "dashed")+
geom_abline(slope = 1, intercept = 2, color = "red",linetype = "dashed")+
geom_abline(slope = 1, intercept = -2, color = "red",linetype = "dashed")+
geom_hline(yintercept = 0)+
geom_vline(xintercept = 0)+
scale_color_viridis(discrete = TRUE)+
theme_bw()+
coord_fixed(xlim = axis_lim, ylim = axis_lim)+
geom_point(data = temp_df, color = "red")+
geom_text(data = temp_df, aes(label = `Gene names (primary )`), color = "red") +
ggsave(filename = paste0("Scatter_",xvar,"__","vs","__",yvar,".pdf"),path = "results",width = 22.5, height = 20, units = "cm", useDingbats = FALSE)
}
}
## ---- scatterplot_char_var ----
scatterplot_logFC <- function(df, xvar, yvar, colorvar = NULL, axis_lim = NULL, alpha = 0.5, text_annot = NULL){
if(is.null(text_annot)){
ggplot(df, aes_string(xvar, yvar, color = colorvar))+
geom_point(alpha = alpha)+
geom_abline(slope = 1, intercept = 0, color = "grey",linetype = "dashed")+
geom_hline(yintercept = 0)+
geom_vline(xintercept = 0)+
scale_color_viridis(discrete = TRUE)+
theme_bw()+
coord_fixed(xlim = axis_lim, ylim = axis_lim)+
ggsave(filename = paste0("Scatter_",xvar,"__","vs","__",yvar,".pdf"),path = "results",width = 22.5, height = 20, units = "cm", useDingbats = FALSE)
}
if(!is.null()){
df %>%
filter(`Gene names (primary )` %in% text_annot) -> temp_df
ggplot(df, aes_string(xvar, yvar, color = colorvar))+
geom_point(alpha = alpha)+
geom_abline(slope = 1, intercept = 0, color = "grey",linetype = "dashed")+
geom_hline(yintercept = 0)+
geom_vline(xintercept = 0)+
scale_color_viridis(discrete = TRUE)+
theme_bw()+
coord_fixed(xlim = axis_lim, ylim = axis_lim)+
geom_point(temp_df, color = "red")+
geom_text(temp_df, aes(label = `Gene names (primary )`)) %>%
ggsave(filename = paste0("Scatter_",xvar,"__","vs","__",yvar,".pdf"),path = "results",width = 22.5, height = 20, units = "cm", useDingbats = FALSE)
}
}