ilian_functions.r

#######################
####################### Functions
###
imputator <- function(x, seed = 5531, sd = 0.3, down_shift = 1.8){ # the imputator function substitutes missing values from normal disriution with mean = 0 and sd = sd and down shift of down_shift
  set.seed(seed = seed)
  x[is.na(x)] <-  rnorm(n = length(x[is.na(x)]), mean = 0, sd = sd)- down_shift
  return(x)
}


scale.reversor <- function(x){ # backtransform scaled data; from here: https://stackoverflow.com/questions/10287545/backtransform-scale-for-plotting

  y <- x * attr(x, 'scaled:scale') + attr(x, 'scaled:center')
  return(y)
}

###
inf.to.NA <- function(x){ # change the +/-inf calues to NAs

  y <- ifelse(is.finite(x),x,NA)
  return(y)
}

###
LFQ_data_imputator <- function(data.frame.for.imputation){

  data.frame.for.imputation %>%                       # the data frame
    mutate_at(vars(-contains("Protein IDs")),.funs = funs(log2)) %>%          # log2 transform all columns, except for Protein IDs
    mutate_at(vars(-contains("Protein IDs")),.funs =funs(inf.to.NA)) %>%     # replace +inf values with NAs, except for Protein IDs
    mutate_at(vars(-contains("Protein IDs")),.funs =funs(scale)) %>%         # scale the columns, except for Protein IDs
    mutate_at(vars(-contains("Protein IDs")),.funs =funs(imputator)) %>%     # substitute the missing values in the columns from normal distribution, except for Protein IDs
    mutate_at(vars(-contains("Protein IDs")),.funs = funs(scale.reversor)) -> LFQ.intensity.imputed
  return(LFQ.intensity.imputed)
}

###
imputed_values_annotator <- function(non.imputed.data,imputed.data){ # compares two data frames with before and after imputation and
  # marks with "+" the imputed valeus

  non.imputed.data %>%
    gather(`LFQ intensity experiment`,value,-`Protein IDs`) %>%
    mutate(value = log2(value))-> non.imputed.data.long

  imputed.data %>%
    gather(`LFQ intensity experiment`,value,-`Protein IDs`) -> imputed.data.long

  warning(paste0("All Protein IDs are the same order: ", all(non.imputed.data.long[,"LFQ intensity experiment"] == imputed.data.long[,"LFQ intensity experiment"])))

  imputed.data.long %>%
    mutate(`Imputed values` = ifelse(value == non.imputed.data.long$value,"","+")) -> imputed.data.long.imputation.indicated
  return(imputed.data.long.imputation.indicated)

}


### add Uniprot priority ID

add_uniprot_prior_prot_id <- function(protein.data, protein.annotations){
protein.data %>%
    unnest(`Peptide count (all)` = strsplit(`Peptide counts (all)`,";"),`Priority protein ID` = strsplit(`Protein IDs`,";")) %>%
    mutate(`Peptide count (all)` = as.numeric(`Peptide count (all)`)) %>%
    mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>%
    left_join(protein.annotations.df[,c("Entry","Status","Date of creation")], by = c("Protein ID for merge" = "Entry")) %>%
    group_by(id) %>%
    arrange(desc(`Peptide count (all)`),
            Status,
            `Date of creation`,
            `Priority protein ID`) %>%
    filter(row_number() %in% 1) %>%
    ungroup() %>%
    select(-Status,-`Date of creation`, -`Protein ID for merge`)

}

### add gene names (primary)
add_uniprot_gene_name_primary <- function(protein.data, protein.annotations){

  protein.data %>%
    mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>%
    left_join(protein.annotations[,c("Entry","Gene names  (primary )")], by = c("Protein ID for merge" = "Entry")) %>%
    select(-`Protein ID for merge`)


}

### add gene names (primary)
add_uniprot_gene_names_synonym <- function(protein.data, protein.annotations){

  protein.data %>%
    mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>%
    left_join(protein.annotations[,c("Entry","Gene names  (synonym )")], by = c("Protein ID for merge" = "Entry")) %>%
    select(-`Protein ID for merge`)


}

### add gene names (ORF)
add_uniprot_gene_names_orf <- function(protein.data, protein.annotations){

  protein.data %>%
    mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>%
    left_join(protein.annotations[,c("Entry","Gene names  (ORF )")], by = c("Protein ID for merge" = "Entry")) %>%
    select(-`Protein ID for merge`)


}

### add ordered locus
add_uniprot_ordered_locus <- function(protein.data, protein.annotations){

  protein.data %>%
    mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>%
    left_join(protein.annotations[,c("Entry","Gene names  (ordered locus )")], by = c("Protein ID for merge" = "Entry")) %>%
    select(-`Protein ID for merge`)


}

### add cross-ref Flybase
add_uniprot_flybase <- function(protein.data, protein.annotations){

  protein.data %>%
    mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>%
    left_join(protein.annotations[,c("Entry","Cross-reference (Flybase)")], by = c("Protein ID for merge" = "Entry")) %>%
    select(-`Protein ID for merge`)


}

### add uniprot protein names from fasta

add_uniprot_protein_name_from_fasta <- function(protein.data, protein.names){

  protein.data %>%
    left_join(protein.names[,c("Uniprot ID","Protein name")], by = c("Priority protein ID" = "Uniprot ID"))

}


### add mitocarta annotations

add_mcarta2_annot <- function(protein.data.annotated,mitocarta.data){

  mitocarta.data %>%
    select(Symbol,Description,MitoCarta2_List) -> mitocarta.symbol

  mitocarta.data %>%
    select(Synonyms,Description,MitoCarta2_List) %>%
    unnest(Synonyms = strsplit(Synonyms,"\\|"))-> mitocarta.synonym

  # Gene names  (primary ) -> Symbol
  protein.data.annotated %>%
    left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names  (primary )" = "Symbol")) %>%     filter(!is.na(MitoCarta2_List)) ->
    protein.data.annotated.mito.0

  protein.data.annotated %>%
    left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names  (primary )" = "Symbol")) %>%     filter(is.na(MitoCarta2_List)) ->
    protein.data.annotated.mito.missing

  # Gene names  (primary ) -> Synonym
  protein.data.annotated.mito.missing %>%
    select(-MitoCarta2_List,-Description) %>%
    left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by =  c("Gene names  (primary )" = "Synonyms")) %>%
    filter(!is.na(MitoCarta2_List)) -> protein.data.annotated.mito.1

  protein.data.annotated.mito.missing %>%
    select(-MitoCarta2_List,-Description) %>%
    left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by =  c("Gene names  (primary )" = "Synonyms")) %>%
    filter(is.na(MitoCarta2_List)) -> protein.data.annotated.mito.missing

  # Gene names  (synonym ) -> Symbol
  protein.data.annotated.mito.missing %>%
    select(-MitoCarta2_List,-Description) %>%
    mutate(`Gene names  (synonym )` = strsplit(`Gene names  (synonym )`, " ")) %>%
    mutate(`Gene names  (synonym )` = ifelse(`Gene names  (synonym )` %in% "character(0)","",`Gene names  (synonym )`)) %>%
    unnest(`Gene names  (synonym )`) %>%
    mutate(`Gene names  (synonym )` = gsub(";","",`Gene names  (synonym )`)) %>%
    left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names  (synonym )" = "Symbol")) %>%
    group_by(`Gene names  (primary )`,`Priority protein ID`) %>%
    mutate(`Gene names  (synonym )` = paste(`Gene names  (synonym )`, collapse = " ")) %>%
    unique() %>%
    filter(!is.na(MitoCarta2_List)) %>%
    ungroup() -> protein.data.annotated.mito.2

  protein.data.annotated.mito.missing %>%
    select(-MitoCarta2_List,-Description) %>%
    mutate(`Gene names  (synonym )` = strsplit(`Gene names  (synonym )`, " ")) %>%
    mutate(`Gene names  (synonym )` = ifelse(`Gene names  (synonym )` %in% "character(0)","",`Gene names  (synonym )`)) %>%
    unnest(`Gene names  (synonym )`) %>%
    mutate(`Gene names  (synonym )` = gsub(";","",`Gene names  (synonym )`)) %>%
    left_join(mitocarta.symbol[,c("Symbol","Description","MitoCarta2_List")], by = c("Gene names  (synonym )" = "Symbol")) %>%
    group_by(`Gene names  (primary )`,`Priority protein ID`) %>%
    mutate(`Gene names  (synonym )` = paste(`Gene names  (synonym )`, collapse = " ")) %>%
    unique() %>%
    filter(all(is.na(MitoCarta2_List))) %>%
    ungroup() -> protein.data.annotated.mito.missing

  # Gene names  (synonym ) -> Synonym
  protein.data.annotated.mito.missing %>%
    select(-MitoCarta2_List,-Description) %>%
    mutate(`Gene names  (synonym )` = strsplit(`Gene names  (synonym )`, " ")) %>%
    mutate(`Gene names  (synonym )` = ifelse(`Gene names  (synonym )` %in% "character(0)","",`Gene names  (synonym )`)) %>%
    unnest(`Gene names  (synonym )`) %>%
    mutate(`Gene names  (synonym )` = gsub(";","",`Gene names  (synonym )`)) %>%
    left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by = c("Gene names  (synonym )" = "Synonyms")) %>%
    group_by(`Gene names  (primary )`,`Priority protein ID`) %>%
    mutate(`Gene names  (synonym )` = paste(`Gene names  (synonym )`, collapse = " ")) %>%
    unique() %>%
    filter(!is.na(MitoCarta2_List)) %>%
    ungroup()  -> protein.data.annotated.mito.3

  protein.data.annotated.mito.missing %>%
    select(-MitoCarta2_List,-Description) %>%
    mutate(`Gene names  (synonym )` = strsplit(`Gene names  (synonym )`, " ")) %>%
    mutate(`Gene names  (synonym )` = ifelse(`Gene names  (synonym )` %in% "character(0)","",`Gene names  (synonym )`)) %>%
    unnest(`Gene names  (synonym )`) %>%
    mutate(`Gene names  (synonym )` = gsub(";","",`Gene names  (synonym )`)) %>%
    left_join(mitocarta.synonym[,c("Synonyms","Description","MitoCarta2_List")], by = c("Gene names  (synonym )" = "Synonyms")) %>%
    group_by(`Gene names  (primary )`,`Priority protein ID`) %>%
    mutate(`Gene names  (synonym )` = paste(`Gene names  (synonym )`, collapse = " ")) %>%
    unique() %>%
    filter(all(is.na(MitoCarta2_List))) %>%
    ungroup()-> protein.data.annotated.mito.missing


  protein.data.annotated.mito.0 %>%
    rbind(protein.data.annotated.mito.1) %>%
    rbind(protein.data.annotated.mito.2) %>%
    rbind(protein.data.annotated.mito.3) %>%
    rbind(protein.data.annotated.mito.missing) %>%
    unique() %>%
    select(`Gene names  (primary )`) %>%
    table() %>%
    as_tibble() %>%
    filter(`n` > 1) -> duplicated.mitoannotations

  protein.data.annotated.mito.0 %>%
    filter(!is.na(MitoCarta2_List)) %>%
    rbind(protein.data.annotated.mito.1) %>%
    rbind(protein.data.annotated.mito.2) %>%
    rbind(protein.data.annotated.mito.3) %>%
    rbind(protein.data.annotated.mito.missing) %>%
    unique() %>%
    select(-`Description`) %>%
    unique()


}

### add Cross-refe (GeneID)
add_uniprot_GeneID <- function(protein.data, protein.annotations){

  protein.data %>%
    mutate(`Protein ID for merge` = gsub("-.*","",`Priority protein ID`)) %>%
    left_join(protein.annotations[,c("Entry","Cross-reference (GeneID)")], by = c("Protein ID for merge" = "Entry")) %>%
    select(-`Protein ID for merge`)


}

add_mcarta2_human_annot <- function(protein.data.annotated,mitocarta.data){

  mitocarta.data %>%
    select(Symbol,Description,MCARTA2_LIST) -> mitocarta.symbol

  mitocarta.data %>%
    select(Synonyms,Description,MCARTA2_LIST) %>%
    unnest(Synonyms = strsplit(Synonyms,"\\|"))-> mitocarta.synonym

  # Gene names  (primary ) -> Symbol
  protein.data.annotated %>%
    left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names  (primary )" = "Symbol")) %>%     filter(!is.na(MCARTA2_LIST)) ->
    protein.data.annotated.mito.0

  protein.data.annotated %>%
    left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names  (primary )" = "Symbol")) %>%     filter(is.na(MCARTA2_LIST)) ->
    protein.data.annotated.mito.missing

  # Gene names  (primary ) -> Synonym
  protein.data.annotated.mito.missing %>%
    select(-MCARTA2_LIST,-Description) %>%
    left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by =  c("Gene names  (primary )" = "Synonyms")) %>%
    filter(!is.na(MCARTA2_LIST)) -> protein.data.annotated.mito.1

  protein.data.annotated.mito.missing %>%
    select(-MCARTA2_LIST,-Description) %>%
    left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by =  c("Gene names  (primary )" = "Synonyms")) %>%
    filter(is.na(MCARTA2_LIST)) -> protein.data.annotated.mito.missing

  # Gene names  (synonym ) -> Symbol
  protein.data.annotated.mito.missing %>%
    select(-MCARTA2_LIST,-Description) %>%
    mutate(`Gene names  (synonym )` = strsplit(`Gene names  (synonym )`, " ")) %>%
    mutate(`Gene names  (synonym )` = ifelse(`Gene names  (synonym )` %in% "character(0)","",`Gene names  (synonym )`)) %>%
    unnest(`Gene names  (synonym )`) %>%
    mutate(`Gene names  (synonym )` = gsub(";","",`Gene names  (synonym )`)) %>%
    left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names  (synonym )" = "Symbol")) %>%
    group_by(`Gene names  (primary )`,`Priority protein ID`) %>%
    mutate(`Gene names  (synonym )` = paste(`Gene names  (synonym )`, collapse = " ")) %>%
    unique() %>%
    filter(!is.na(MCARTA2_LIST)) %>%
    ungroup() -> protein.data.annotated.mito.2

  protein.data.annotated.mito.missing %>%
    select(-MCARTA2_LIST,-Description) %>%
    mutate(`Gene names  (synonym )` = strsplit(`Gene names  (synonym )`, " ")) %>%
    mutate(`Gene names  (synonym )` = ifelse(`Gene names  (synonym )` %in% "character(0)","",`Gene names  (synonym )`)) %>%
    unnest(`Gene names  (synonym )`) %>%
    mutate(`Gene names  (synonym )` = gsub(";","",`Gene names  (synonym )`)) %>%
    left_join(mitocarta.symbol[,c("Symbol","Description","MCARTA2_LIST")], by = c("Gene names  (synonym )" = "Symbol")) %>%
    group_by(`Gene names  (primary )`,`Priority protein ID`) %>%
    mutate(`Gene names  (synonym )` = paste(`Gene names  (synonym )`, collapse = " ")) %>%
    unique() %>%
    filter(all(is.na(MCARTA2_LIST))) %>%
    ungroup() -> protein.data.annotated.mito.missing

  # Gene names  (synonym ) -> Synonym
  protein.data.annotated.mito.missing %>%
    select(-MCARTA2_LIST,-Description) %>%
    mutate(`Gene names  (synonym )` = strsplit(`Gene names  (synonym )`, " ")) %>%
    mutate(`Gene names  (synonym )` = ifelse(`Gene names  (synonym )` %in% "character(0)","",`Gene names  (synonym )`)) %>%
    unnest(`Gene names  (synonym )`) %>%
    mutate(`Gene names  (synonym )` = gsub(";","",`Gene names  (synonym )`)) %>%
    left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by = c("Gene names  (synonym )" = "Synonyms")) %>%
    group_by(`Gene names  (primary )`,`Priority protein ID`) %>%
    mutate(`Gene names  (synonym )` = paste(`Gene names  (synonym )`, collapse = " ")) %>%
    unique() %>%
    filter(!is.na(MCARTA2_LIST)) %>%
    ungroup()  -> protein.data.annotated.mito.3

  protein.data.annotated.mito.missing %>%
    select(-MCARTA2_LIST,-Description) %>%
    mutate(`Gene names  (synonym )` = strsplit(`Gene names  (synonym )`, " ")) %>%
    mutate(`Gene names  (synonym )` = ifelse(`Gene names  (synonym )` %in% "character(0)","",`Gene names  (synonym )`)) %>%
    unnest(`Gene names  (synonym )`) %>%
    mutate(`Gene names  (synonym )` = gsub(";","",`Gene names  (synonym )`)) %>%
    left_join(mitocarta.synonym[,c("Synonyms","Description","MCARTA2_LIST")], by = c("Gene names  (synonym )" = "Synonyms")) %>%
    group_by(`Gene names  (primary )`,`Priority protein ID`) %>%
    mutate(`Gene names  (synonym )` = paste(`Gene names  (synonym )`, collapse = " ")) %>%
    unique() %>%
    filter(all(is.na(MCARTA2_LIST))) %>%
    ungroup()-> protein.data.annotated.mito.missing


  protein.data.annotated.mito.0 %>%
    rbind(protein.data.annotated.mito.1) %>%
    rbind(protein.data.annotated.mito.2) %>%
    rbind(protein.data.annotated.mito.3) %>%
    rbind(protein.data.annotated.mito.missing) %>%
    unique() %>%
    select(`Gene names  (primary )`) %>%
    table() %>%
    as_tibble() %>%
    filter(`n` > 1) -> duplicated.mitoannotations

  protein.data.annotated.mito.0 %>%
    filter(!is.na(MCARTA2_LIST)) %>%
    rbind(protein.data.annotated.mito.1) %>%
    rbind(protein.data.annotated.mito.2) %>%
    rbind(protein.data.annotated.mito.3) %>%
    rbind(protein.data.annotated.mito.missing) %>%
    unique() %>%
    select(-`Description`) %>%
    unique()


}

### Check duplicated mitoannotations

mitoannot_duplications <- function(protein.data.annotated){

  group_by(`Protein IDs`, `Gene names  (primary )`) %>%
    table() %>%
    as_tibble() %>%
    filter(`n` > 1)

}

#### Numbers of quantified proteins LFQ

plot_num_lfq_quant_prot <- function(x, Experimental_design = Experimental.design, ylim = c(0,2000), order.by = NULL, filename = "Quantified_proteins.pdf"){

  if(!(is.null(order.by)|is.character(order.by))){stop("order.by should be NULL or a column name from Experimental_design")}

  if(!is.null(order.by)){

    Experimental_design %>%

      arrange_(.dots = order.by) -> Experimental.design.temp

    x %>%
      select(matches("LFQ.")) %>%
      gather(`LFQ intensity experiment`, value) %>%
      left_join(Experimental_design[,c("LFQ intensity experiment","Experiment")]) %>%
      mutate(`Experiment` = factor(`Experiment`, levels = rev(Experimental.design.temp$`Experiment`))) %>%
      group_by(Experiment) %>%
      summarise(`Quantified proteins` = sum(value>0, na.rm = TRUE)) %>%
      ggplot(aes(x = Experiment, y = `Quantified proteins`)) +
      geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+
      theme_bw()+
      theme(axis.text.x = element_text(angle = 90,size = 12),
            axis.text.y = element_text(size = 12),
            axis.title.x = element_text(size = 14),
            axis.title.y = element_text(size = 14))+
      geom_text(aes(label = `Quantified proteins`), color = "red", hjust = -0.5)+
      coord_flip(ylim = ylim)+
      ggsave(filename = filename, path = "QC", width = 105, height = 148, units = "mm")

  } else {

    x %>%
      select(matches("LFQ.")) %>%
      gather(`LFQ intensity experiment`, value) %>%
      left_join(Experimental_design[,c("LFQ intensity experiment","Experiment")]) %>%
      group_by(Experiment) %>%
      summarise(`Quantified proteins` = sum(value>0, na.rm = TRUE)) %>%
      mutate(Experiment = factor(Experiment, levels = rev(Experimental_design$Experiment))) %>%
      ggplot(aes(x = Experiment, y = `Quantified proteins`)) +
      geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+
      theme_bw()+
      theme(axis.text.x = element_text(angle = 90,size = 12),
            axis.text.y = element_text(size = 12),
            axis.title.x = element_text(size = 14),
            axis.title.y = element_text(size = 14))+
      geom_text(aes(label = `Quantified proteins`), color = "red", hjust = -0.5)+
      coord_flip(ylim = ylim)+
      ggsave(filename = filename, path = "QC", width = 105, height = 148, units = "mm")

  }

}


plot_num_tmt_quant_prot <- function(x, Experimental_design = Experimental.design, ylim = c(0,2000)){

  x %>%
  select(matches("Reporter intensity corrected")) %>%
    gather(`TMT reporter intensity experiment`, value) %>%
    left_join(Experimental_design[,c("TMT reporter intensity experiment","Experiment")]) %>%
    group_by(Experiment) %>%
    summarise(`Quantified proteins` = sum(value>0, na.rm = TRUE)) %>%
    mutate(Experiment = factor(Experiment, levels = rev(Experimental_design$Experiment))) %>%
    ggplot(aes(x = Experiment, y = `Quantified proteins`)) +
    geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+
    theme_bw()+
    theme(axis.text.x = element_text(angle = 90,size = 12),
          axis.text.y = element_text(size = 12),
          axis.title.x = element_text(size = 14),
          axis.title.y = element_text(size = 14))+
    geom_text(aes(label = `Quantified proteins`), color = "red", hjust = -0.5)+
    coord_flip(ylim = ylim)+
    ggsave(filename = "Quantified_proteins.pdf", path = "QC", width = 105, height = 148, units = "mm")

}


#### Numbers of quantified proteins SILAC

plot_num_SILAC_quant_prot <- function(x, Experimental_design = Experimental.design, ylim = c(0,2000), order.by = NULL, filename = "Quantified_proteins.pdf"){

  if(!(is.null(order.by)|is.character(order.by))){stop("order.by should be NULL or a column name from Experimental_design")}

  if(!is.null(order.by)){

    Experimental_design %>%

      arrange_(.dots = order.by) -> Experimental.design.temp

    x %>%
      select(matches("Ratio H/L normalized ")) %>%
      gather(`SILAC ratio experiment`, value) %>%
      left_join(Experimental_design[,c("SILAC ratio experiment","Experiment")]) %>%
      mutate(`Experiment` = factor(`Experiment`, levels = rev(Experimental.design.temp$`Experiment`))) %>%
      group_by(Experiment) %>%
      summarise(`Quantified proteins` = sum(value>-Inf, na.rm = TRUE)) %>%
      ggplot(aes(x = Experiment, y = `Quantified proteins`)) +
      geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+
      theme_bw()+
      theme(axis.text.x = element_text(angle = 90,size = 12),
            axis.text.y = element_text(size = 12),
            axis.title.x = element_text(size = 14),
            axis.title.y = element_text(size = 14))+
      geom_text(aes(label = `Quantified proteins`), color = "red", hjust = -0.5)+
      coord_flip(ylim = ylim)+
      ggsave(filename = filename, path = "QC", width = 105, height = 148, units = "mm")

  } else {

    x %>%
      select(matches("Ratio H/L normalized ")) %>%
      gather(`SILAC ratio experiment`, value) %>%
      left_join(Experimental_design[,c("SILAC ratio experiment","Experiment")]) %>%
      group_by(Experiment) %>%
      summarise(`Quantified proteins` = sum(value>-Inf, na.rm = TRUE)) %>%
      mutate(Experiment = factor(Experiment, levels = rev(Experimental_design$Experiment))) %>%
      ggplot(aes(x = Experiment, y = `Quantified proteins`)) +
      geom_bar(stat = "identity", fill = "#a6cee3", color = "black")+
      theme_bw()+
      theme(axis.text.x = element_text(angle = 90,size = 12),
            axis.text.y = element_text(size = 12),
            axis.title.x = element_text(size = 14),
            axis.title.y = element_text(size = 14))+
      geom_text(aes(label = `Quantified proteins`), color = "red", hjust = -0.5)+
      coord_flip(ylim = ylim)+
      ggsave(filename = filename, path = "QC", width = 105, height = 148, units = "mm")

  }

}


### MA plot

plot_ma <- function(x, filename = "MA_plot.pdf"){

  x %>%
    as_tibble() %>%
    mutate(Significant = ifelse(adj.P.Val < 0.05,"+","")) -> temp.df

  temp.df %>%
  ggplot(aes(x = AveExpr, y = logFC, color = Significant))+
    scale_color_manual(values = c("grey","red"))+
    theme_bw()+
    coord_cartesian(ylim = c(-ceiling(max(abs(x$logFC),na.rm = TRUE)),ceiling(max(abs(x$logFC),na.rm = TRUE))))+
    geom_point(data = subset(temp.df, Significant %in% ""),alpha = 0.5)+
    geom_point(data = subset(temp.df, Significant %in% "+"),alpha = 0.5)+
    ggsave(path = "QC",filename = filename, width = 20, height = 15, units = "cm")

}

### add row names as Protein IDs

row_names_to_protein_IDs <- function(x){

  x %>%
    as.data.frame() %>%
    as_tibble() %>%
    rownames_to_column(var = "Protein IDs")

}

### row names to Proteins and extract Protein IDs, logFC, P.Value and adj.P.Val
limma_results_select_ProtIDs_logFC_PVal_adjPVal <- function(x){
    x %>%
    as.data.frame() %>%
    as_tibble() %>%
    rownames_to_column("Protein IDs") %>%
    select(`Protein IDs`,logFC,P.Value,adj.P.Val)
}

limma_results_select_ProtIDs_logFC_CI_PVal_adjPVal <- function(x){
  x %>%
    as.data.frame() %>%
    as_tibble() %>%
    rownames_to_column("Protein IDs") %>%
    select(`Protein IDs`,logFC, CI.L, CI.R, P.Value, adj.P.Val)
}

limma_results_select_ProtIDs_logFC_CI_AveExpr_PVal_adjPVal <- function(x){
  x %>%
    as.data.frame() %>%
    as_tibble() %>%
    rownames_to_column("Protein IDs") %>%
    select(`Protein IDs`,logFC, CI.L, CI.R, AveExpr, P.Value, adj.P.Val)
}

### row names to Proteins and extract Protein IDs, logFC, P.Value and adj.P.Val
limma_results_select_GeneID_logFC_PVal_adjPVal <- function(x){
  x %>%
    as.data.frame() %>%
    as_tibble() %>%
    rownames_to_column("Cross-reference (GeneID)") %>%
    select(`Cross-reference (GeneID)`,logFC,P.Value,adj.P.Val)
}

### row names to Proteins and extract Protein IDs, logFC, P.Value and adj.P.Val
limma_results_select_ProtIDs_logFC_adjPVal <- function(x){
  x %>%
    as.data.frame() %>%
    as_tibble() %>%
    rownames_to_column("Protein IDs") %>%
    select(`Protein IDs`,logFC,adj.P.Val)
}


### rename all but Protein IDs

limma_rename_all_but_proteinIDs <- function(x, chr_to_add){

  if(!is.character(chr_to_add)){"Stop, chr_to_add has to be a character vector"}

  x %>%
    rename_at(vars(-matches("Protein IDs")), funs(paste0(.,chr_to_add)))

}

limma_rename_all_but_GeneIDs <- function(x, chr_to_add){

  if(!is.character(chr_to_add)){"Stop, chr_to_add has to be a character vector"}

  x %>%
    rename_at(vars(-matches("Cross-reference \\(GeneID\\)")), funs(paste0(.,chr_to_add)))

}


### Significance according to adj.P.Value

limma_significant_adjPVal <- function(x,indicator = "+"){

  x %>%
    mutate(Significant = ifelse(adj.P.Val < 0.05,indicator,""))


}

### Significant enrichment according to adj.P.Value and logFC

limma_signif_enriched_adjPVal <- function(x,indicator = "+"){

  x %>%
    mutate(Enriched = ifelse(adj.P.Val < 0.05 & logFC > 0,indicator,""))


}

### Read uniprot annotation

get_uniprot_annotations_url <- function(x = "parameters.txt"){

  parameters <- read.delim(paste0("data/",x),stringsAsFactors = FALSE)

  fasta.file <- subset(parameters,grepl("_Mus_musculus|_Homo_sapiens_|_Saccharomyces_cerevisiae_|_Caenorhabditis_elegans_|_Drosophila_melanogaster_",Value))

  fasta.file %>%
    select(Value) %>%
    unnest(Value = strsplit(Value,";")) %>%
    filter(grepl("_Mus_musculus|_Homo_sapiens_|_Saccharomyces_cerevisiae_|_Caenorhabditis_elegans_|_Drosophila_melanogaster_",Value)) %>%
    mutate(Value = gsub("^.*\\\\","",Value)) %>%
    mutate(Value = gsub("\\.fasta","",Value)) -> match.key

  return(match.key)
}

#### imputation of LFQ data and imputation

LFQ_data_imputator_annotator <- function(data.frame.for.imputation){

  data.frame.for.imputation %>%
    LFQ_data_imputator() -> data.frame.for.imputation.imputed

  data.frame.for.imputation %>%
    imputed_values_annotator(imputed.data = data.frame.for.imputation.imputed) -> data.frame.for.imputation.imputed.long.annotated

  df.list <- list(data.frame.for.imputation.imputed,
                  data.frame.for.imputation.imputed.long.annotated)

  names(df.list) <- c("imputed.df","imputed.df.imputation.annotated")

  return(df.list)
}


#### histogram of LFQ imputed values
plot_lfq_imputation_histograms <- function(imputed.df,
                                           Experimental_design = Experimental.design,
                                           filename = "Hist_LFQ_intensity_imputation.pdf",
                                           height = 20,
                                           width = 28.7,
                                           ncol = 4,
                                           order.by = NULL,
                                           show.default.arguments = "no"){

  if(show.default.arguments %in% "yes"){ stop('Default arguments:\nimputed.df\nfilename = "Hist_LFQ_intensity_imputation.pdf"\nheight = 200\nwidth = 287\nshow.default.arguments = "no"')}

  if(!(is.null(order.by)|is.character(order.by))){stop("order.by should be NULL or a column name from Experimental_design")}

  if(!is.null(order.by)){

    Experimental_design %>%

      arrange_(.dots = order.by) -> Experimental.design.temp

    imputed.df$imputed.df.imputation.annotated %>%
      left_join(.,Experimental.design.temp[,c("LFQ intensity experiment","Experiment")]) %>%
      mutate(Experiment = factor(Experiment, levels = Experimental.design.temp$Experiment)) %>%
      group_by(Experiment,`Imputed values`) %>%
      summarise(proteins = n()) %>%
      ungroup() %>%
      filter(`Imputed values` == "") %>%
      mutate(value = +Inf) -> number.non.imputed.proteins

    # numbers of imputed proteins
    imputed.df$imputed.df.imputation.annotated %>%
      left_join(.,Experimental.design.temp[,c("LFQ intensity experiment","Experiment")]) %>%
      mutate(Experiment = factor(Experiment, levels = Experimental.design.temp$Experiment)) %>%
      group_by(Experiment,`Imputed values`) %>%
      summarise(proteins = n()) %>%
      ungroup() %>%
      filter(`Imputed values` == "+") %>%
      mutate(value = -Inf) -> number.imputed.proteins

    imputed.df$imputed.df.imputation.annotated %>%
      left_join(.,Experimental.design.temp[,c("LFQ intensity experiment","Experiment")]) %>%
      mutate(Experiment = factor(Experiment, levels = Experimental.design.temp$Experiment)) %>%
      ggplot(aes(x = value, fill = `Imputed values`),alpha = 0.5)+
      theme_bw()+
      geom_histogram()+
      scale_fill_manual(values = RColorBrewer::brewer.pal(3,"Dark2")[1:2])+
      facet_wrap(~ Experiment,ncol = ncol)+
      geom_text(y = +Inf, aes(x = value, label = proteins), data = number.non.imputed.proteins,color = "#1B9E77",hjust = 1.2,vjust = 1.5)+
      geom_text(y = +Inf, aes(x = value, label = proteins), data = number.imputed.proteins, color = "#D95F02",hjust = -0.2,vjust = 1.5)+
      ggsave(filename = filename, path = "QC", height = height, width = width, units = "cm", limitsize = FALSE)

  } else {

  # numbers of non imputed proteins
  imputed.df$imputed.df.imputation.annotated %>%
    left_join(.,Experimental_design[,c("LFQ intensity experiment","Experiment")]) %>%
    mutate(Experiment = factor(Experiment, levels = Experimental_design$Experiment)) %>%
    group_by(Experiment,`Imputed values`) %>%
    summarise(proteins = n()) %>%
    ungroup() %>%
    filter(`Imputed values` == "") %>%
    mutate(value = +Inf) -> number.non.imputed.proteins

  # numbers of imputed proteins
  imputed.df$imputed.df.imputation.annotated %>%
    left_join(.,Experimental_design[,c("LFQ intensity experiment","Experiment")]) %>%
    mutate(Experiment = factor(Experiment, levels = Experimental_design$Experiment)) %>%
    group_by(Experiment,`Imputed values`) %>%
    summarise(proteins = n()) %>%
    ungroup() %>%
    filter(`Imputed values` == "+") %>%
    mutate(value = -Inf) -> number.imputed.proteins

  imputed.df$imputed.df.imputation.annotated %>%
    left_join(.,Experimental_design[,c("LFQ intensity experiment","Experiment")]) %>%
    mutate(Experiment = factor(Experiment, levels = Experimental_design$Experiment)) %>%
    ggplot(aes(x = value, fill = `Imputed values`),alpha = 0.5)+
    theme_bw()+
    geom_histogram()+
    scale_fill_manual(values = RColorBrewer::brewer.pal(3,"Dark2")[1:2])+
    facet_wrap(~ Experiment,ncol = ncol)+
    geom_text(y = +Inf, aes(x = value, label = proteins), data = number.non.imputed.proteins,color = "#1B9E77",hjust = 1.2,vjust = 1.5)+
    geom_text(y = +Inf, aes(x = value, label = proteins), data = number.imputed.proteins, color = "#D95F02",hjust = -0.2,vjust = 1.5)+
    ggsave(filename = filename, path = "QC", height = height, width = width, units = "cm", limitsize = FALSE)

  }

}

### multiscatter

plot_multiscatter_lfq <- function(imputed.df, Experimental_design = Experimental.design, alpha = 0.25){

  imputed.df %>%
    gather(`LFQ intensity experiment`,value , -`Protein IDs`) %>%
    left_join(Experimental_design[,c("LFQ intensity experiment","Experiment")]) %>%
    filter(!is.na(Experiment)) %>%
    select(-`LFQ intensity experiment`) %>%
    spread(key = Experiment, value = value) %>%
    select(-`Protein IDs`) %>%
    ggpairs(upper = list(continuous = wrap("points", alpha = alpha)), lower = list(continuous = wrap("cor", size = 5)))+
    theme_bw()
}

### dendrogram

cluster_dendrogram <- function(imputed.df, Experimental_design = Experimental.design){

  imputed.df %>%
    gather(`LFQ intensity experiment`,value,-`Protein IDs`) %>%
    left_join(Experimental_design[,c("LFQ intensity experiment","Experiment")]) %>%
    select(-`LFQ intensity experiment`) %>%
    mutate(Experiment = factor(Experiment, levels = unique(unlist(Experimental_design$Experiment)))) %>%
    spread(key = Experiment, value = value) %>%
    remove_rownames() %>%
    column_to_rownames(var = "Protein IDs") %>%
    as.data.frame() %>%
    as.matrix() %>%
    t() %>%
    dist() %>%
    hclust(.,method = "average") %>%
    as.dendrogram()

}

### PCA

plot_PCA <- function(imputed.df, Experimental_design = Experimental.design,
                     filename = "PCA.pdf",
                     width = 20,
                     height = 22.5,
                     col.ind = NULL ){

  if(!(is.null(col.ind)|is.character(col.ind))){stop("order.by should be NULL or a column name from Experimental_design")}

  if(!is.null(col.ind)){

    Experimental_design %>%

      arrange_("Experiment") -> Experimental.design.temp

    imputed.df %>%
      gather(`LFQ intensity experiment`,value,-`Protein IDs`) %>%
      left_join(Experimental.design.temp[,c("LFQ intensity experiment","Experiment")]) %>%
      filter(!is.na(Experiment)) %>%
      select(-`LFQ intensity experiment`) %>%
      spread(key = Experiment, value = value) %>%
      remove_rownames() %>%
      column_to_rownames(var = "Protein IDs") %>%
      t() %>%
      as.data.frame() %>%
      as_tibble() %>%
      PCA(graph = FALSE) %>%
      fviz_pca_ind(repel = TRUE,col.ind = factor(unlist(Experimental.design.temp[,col.ind])),mean.point = FALSE)
    ggsave(path = "QC",filename = filename, width = width, height = height, units = "cm")


  } else {

    protein.data.imputed$imputed.df %>%
      #imputed.df %>%
      gather(`LFQ intensity experiment`,value,-`Protein IDs`) %>%
      left_join(Experimental.design.temp[,c("LFQ intensity experiment","Experiment")]) %>%
      filter(!is.na(Experiment)) %>%
      select(-`LFQ intensity experiment`) %>%
      spread(key = Experiment, value = value) %>%
      remove_rownames() %>%
      column_to_rownames(var = "Protein IDs") %>%
      t() %>%
      as.data.frame() %>%
      as_tibble() %>%
      PCA(graph = FALSE) %>%
      fviz_pca_ind(repel = TRUE,mean.point = FALSE,palette = "RdBu")
    ggsave(path = "QC",filename = filename, width = width, height = height, units = "cm")

  }

}

### volcanoplot; significance

plot_volcanoplot_significant <- function(limma.results ,path = "results",filename = "volcanoplot.pdf", xaxis.lab = "logFC"){

  if(!(is.null(xaxis.lab)|is.character(xaxis.lab))){stop("renamelogFC has to be NULL or a character vector of length 1")}
  if(!("Gene names  (primary )" %in% names(limma.results))){stop("Gene names  (primary ) is missing")}


  limma.results %>%
    ggplot(aes(x = `logFC`, y = -log10(adj.P.Val), color = Significant))+
    geom_point(alpha = 0.5)+
    scale_color_manual(values = c("grey","red"))+
    coord_cartesian(xlim = c(-ceiling(max(abs(limma.results$logFC),na.rm = TRUE)),ceiling(max(abs(limma.results$logFC),na.rm = TRUE))))+
    theme_bw()+
    geom_text_repel(data = subset(limma.results, Significant %in% "+"),
                    aes(label = `Gene names  (primary )`),
                    color = "black")+
    labs(x = xaxis.lab)+
    ggsave(path = path,filename = filename, width = 30, height = 20, units = "cm")

}

### volcanoplot; significance cutoff

plot_volcanoplot_adjpval.cutoff <- function(limma.results, path = "results", filename = "volcanoplot.pdf", xaxis.lab = "logFC", adj.P.Val_cutoff = 0.05){

  if(!(is.null(xaxis.lab)|is.character(xaxis.lab))){stop("renamelogFC has to be NULL or a character vector of length 1")}
  if(!("Gene names  (primary )" %in% names(limma.results))){stop("Gene names  (primary ) is missing")}

  limma.results %>%
    ggplot(aes(x = `logFC`, y = -log10(adj.P.Val), color = Significant))+
    geom_point(alpha = 0.5)+
    scale_color_manual(values = c("grey","red"))+
    coord_cartesian(xlim = c(-ceiling(max(abs(limma.results$logFC),na.rm = TRUE)),ceiling(max(abs(limma.results$logFC),na.rm = TRUE))))+
    theme_bw()+
    geom_text_repel(data = subset(limma.results, adj.P.Val < adj.P.Val_cutoff),
                    aes(label = `Gene names  (primary )`),
                    color = "black")+
    labs(x = xaxis.lab)+
    ggsave(path = path, filename = filename, width = 30, height = 20, units = "cm")

}

plot_volcanoplot_pval.cutoff <- function(limma.results, path = "results", filename = "volcanoplot.pdf", xaxis.lab = "logFC", P.Val_cutoff = 0.05, xlim = NULL){

  if(!(is.null(xaxis.lab)|is.character(xaxis.lab))){stop("renamelogFC has to be NULL or a character vector of length 1")}
  if(!("Gene names  (primary )" %in% names(limma.results))){stop("Gene names  (primary ) is missing")}

  if(is.null(xlim)){
  limma.results %>%
    ggplot(aes(x = `logFC`, y = -log10(P.Value), color = Significant))+
    geom_point(alpha = 0.5)+
    scale_color_manual(values = c("grey","red"))+
    coord_cartesian(xlim = c(-ceiling(max(abs(limma.results$logFC),na.rm = TRUE)),ceiling(max(abs(limma.results$logFC),na.rm = TRUE))))+
    theme_bw()+
    geom_text_repel(data = subset(limma.results, P.Value < P.Val_cutoff),
                    aes(label = `Gene names  (primary )`),
                    color = "black")+
    labs(x = xaxis.lab)+
    ggsave(path = path, filename = filename, width = 30, height = 20, units = "cm")
  } else {
    limma.results %>%
      ggplot(aes(x = `logFC`, y = -log10(P.Value), color = Significant))+
      geom_point(alpha = 0.5)+
      scale_color_manual(values = c("grey","red"))+
      coord_cartesian(xlim = xlim)+
      theme_bw()+
      geom_text_repel(data = subset(limma.results, P.Value < P.Val_cutoff),
                      aes(label = `Gene names  (primary )`),
                      color = "black")+
      labs(x = xaxis.lab)+
      ggsave(path = path, filename = filename, width = 30, height = 20, units = "cm")
  }

}

### add Inge and Maria's annotations

add_mito_dysf_annot <- function(protein.data,
                                mito.dysf.annot.Prot.ID = mito.dysf.annot.Prot.ID,
                                mito.dysf.annot.Gene.name = mito.dysf.annot.Gene.name){

  protein.data %>%
    left_join(mito.dysf.annot.Gene.name[,c("Gene name","Annotations; Mitochondrial dysfunction, PMID:29132502; Gene name")],
              by = c("Gene names  (primary )" = "Gene name")) %>%
    left_join(mito.dysf.annot.Prot.ID[,c("Protein ID","Annotations; Mitochondrial dysfunction, PMID:29132502; Protein ID")],
              by = c("Priority protein ID" = "Protein ID")) %>%
    mutate(`Annotations; Mitochondrial dysfunction, PMID:29132502` = `Annotations; Mitochondrial dysfunction, PMID:29132502; Gene name`) %>%
    mutate(`Annotations; Mitochondrial dysfunction, PMID:29132502` = ifelse(is.na(`Annotations; Mitochondrial dysfunction, PMID:29132502`),`Annotations; Mitochondrial dysfunction, PMID:29132502; Protein ID`,`Annotations; Mitochondrial dysfunction, PMID:29132502`)) %>%
    select(-`Annotations; Mitochondrial dysfunction, PMID:29132502; Gene name`,
           -`Annotations; Mitochondrial dysfunction, PMID:29132502; Protein ID`)


}


### Calculate enrichment based on logFC; calculating the proportion of logFC <0 from the logFC >0
limma_calculate_enrichment_fdr <- function(limma.result){

  ### the function assumes that this is a lima resunts and that it is ordered by increasing adj.P.Val

  limma.result %>%
    mutate(`Enrichment FDR` = NA) %>%
    mutate(`Enrichment FDR` = as.numeric(`Enrichment FDR`)) -> temp.df

  for(i in 1:nrow(temp.df)){

    Signif.enriched <- numeric(length = 1)

    Signif.deenriched <- numeric(length = 1)

    temp.df[1:i,] -> temp.df.2

    Signif.enriched <- nrow(temp.df.2[temp.df.2$logFC > 0,])

    Signif.deenriched <- nrow(temp.df.2[temp.df.2$logFC < 0,])

    temp.df$`Enrichment FDR`[i] <- Signif.deenriched/Signif.enriched


  }

  temp.df$`Enrichment FDR` <- ifelse(is.infinite(temp.df$`Enrichment FDR`),1,temp.df$`Enrichment FDR`)

  temp.df

}

####


### annotate enrichment FDR
limma_annotate_enriched <- function(limma.result, fdr_cutoff = 0.05){

  if(nrow(subset(limma.result,`Enrichment FDR` < fdr_cutoff)) > 0){

    highest.adj.P.val <- max(unlist(limma.result$adj.P.Val[limma.result$`Enrichment FDR` <= fdr_cutoff]))

    limma.result %>%
    mutate(`Enriched` = ifelse(logFC>0 & adj.P.Val <= highest.adj.P.val,"+",""))

    } else {

      limma.result %>%
        mutate(`Enriched` = "")

  }
}

####
limma_annotate_enriched_P_value <- function(limma.result, fdr_cutoff = 0.05){

  if(nrow(subset(limma.result,`Enrichment FDR` < fdr_cutoff)) > 0){

    highest.P.Value <- max(unlist(limma.result$P.Value[limma.result$`Enrichment FDR` <= fdr_cutoff]))

    limma.result %>%
      mutate(`Enriched` = ifelse(logFC>0 & P.Value <= highest.P.Value,"+",""))

  } else {

    limma.result %>%
      mutate(`Enriched` = "")

  }
}
####

limma_regulation_adjPVal <- function(x){

  x %>%
    mutate(Regulation = ifelse(adj.P.Val < 0.05 & logFC > 0,"Up-regulated",
                               ifelse(adj.P.Val < 0.05 & logFC < 0,"Down-regulated","")))
}


####
substract_global_median <- function(x){

  global.median <- median(x, na.rm = TRUE)
  y <- x-global.median
  y
}

### limma row names to Protein IDs

limma_rownames_proteinIDs <- function(x){

  x %>%
    as.data.frame() %>%
    as_tibble() %>%
    rownames_to_column("Protein IDs")
}