Skip to content

Commit

Permalink
adapting conseauences of filtering before normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
NatanYusupov committed Apr 12, 2024
1 parent b25f141 commit e2e8455
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 17 deletions.
1 change: 1 addition & 0 deletions scripts/2_import_raw_DNAm_data.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ saveRDS(summary_table_preprocessing,
RGSet_original
Mset_original
RatioSet_original
gRatioSet_original
```

## User report
Expand Down
3 changes: 3 additions & 0 deletions scripts/3_quality_control.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ knitr::opts_chunk$set(echo = FALSE)
```{r load data, include=FALSE}
load(paste0(user_choices$project_name, "/processed_data/RGSet_original.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/Betas_original.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/gRatioSet_original.Rdata"))
phenotype_data <- read.csv(paste0(user_choices$project_name, "/processed_data/phenotype_data.csv"), sep = ",")
summary_table_preprocessing <- readRDS(paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds"))
```
Expand Down Expand Up @@ -97,6 +98,8 @@ keep <- colMeans(detP_original) < user_choices$detP_threshold
RGSet_quality <- RGSet_original[,keep]
save(RGSet_quality, file = paste0(user_choices$project_name, "/processed_data/RGSet_quality.Rdata"))
gRatioSet_quality <- gRatioSet_original[,keep]
save(gRatioSet_quality, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_quality.Rdata"))
Betas_quality <- Betas_original[,keep]
save(Betas_quality, file = paste0(user_choices$project_name, "/processed_data/Betas_quality.Rdata"))
detP_quality <- detP_original[,keep]
Expand Down
6 changes: 3 additions & 3 deletions scripts/4_check_matching_of_epigenetic_sex.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ knitr::opts_chunk$set(echo = FALSE)
```

```{r load data, include=FALSE}
load(paste0(user_choices$project_name, "/processed_data/gRatioSet_original.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/gRatioSet_quality.Rdata"))
phenotype_data <- read.csv(paste0(user_choices$project_name, "/processed_data/phenotype_data.csv"), sep = ",")
```

# Description

This script:

* predicts sex with DNA methylation data (gRatioSet_original, chip-wide medians
* predicts sex with DNA methylation data (gRatioSet_quality, chip-wide medians
of measurements on sex chromosomes)
* compares epigenetically predicted sex with phenotype information
* reports samples with mismatches. These will be excluded in script 5
Expand All @@ -50,7 +50,7 @@ fail the sex match check
## Prediction of sex with DNA methylation data

```{r, predict sex with DNAm data, include= FALSE}
predictedSex <- getSex(gRatioSet_original, cutoff=-2)
predictedSex <- getSex(gRatioSet_quality, cutoff=-2)
sex_df <- phenotype_data %>%
select(personid, arrayid, reported_sex = sex) %>%
Expand Down
4 changes: 4 additions & 0 deletions scripts/5_samples_exclusion.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ detP_mean_fails_df <- readRDS(paste0(user_choices$project_name, "/reports/detP_m
sex_df <- readRDS(paste0(user_choices$project_name, "/reports/predicted_sex.rds"))
summary_table_preprocessing <- readRDS(paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds"))
load(paste0(user_choices$project_name, "/processed_data/RGSet_quality.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/gRatioSet_quality.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/Betas_quality.Rdata"))
load(paste0(user_choices$project_name, "/reports/detP_quality.Rdata"))
```
Expand Down Expand Up @@ -93,16 +94,19 @@ saveRDS(exclusion_before_norm_vector, paste0(user_choices$project_name, "/report
# subset objects
if (!identical(exclusion_arrayids, NULL)){
RGSet_clean <- RGSet_quality[ , !(colnames(RGSet_quality) %in% exclusion_arrayids)]
gRatioSet_clean <- gRatioSet_quality[ , !(colnames(gRatioSet_quality) %in% exclusion_arrayids)]
Betas_clean <- Betas_quality[ , !(colnames(Betas_quality) %in% exclusion_arrayids)]
detP_clean <- detP_quality[, !(colnames(detP_quality) %in% exclusion_arrayids)]
} else {
RGSet_clean <- RGSet_quality
gRatioSet_clean <- gRatioSet_quality
Betas_clean <- Betas_quality
detP_clean <- detP_quality
}
PhenoData_clean <- pData(RGSet_clean)
# save objects
save(RGSet_clean, file = paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata"))
save(gRatioSet_clean, file = paste0(user_choices$project_name, "/processed_data/gRatioSet_clean.Rdata"))
save(Betas_clean, file = paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata"))
save(detP_clean, file = paste0(user_choices$project_name, "/processed_data/detP_clean.Rdata"))
save(PhenoData_clean, file = paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata"))
Expand Down
15 changes: 7 additions & 8 deletions scripts/6_filtering_cpgs.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ knitr::opts_chunk$set(echo = FALSE)
```{r load data, include=FALSE}
summary_table_preprocessing <- readRDS(paste0(user_choices$project_name, "/reports/summary_table_preprocessing.rds"))
load(paste0(user_choices$project_name, "/processed_data/RGSet_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/gRatioSet_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/Betas_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/PhenoData_clean.Rdata"))
load(paste0(user_choices$project_name, "/processed_data/detP_clean.Rdata"))
Expand Down Expand Up @@ -62,10 +63,10 @@ detP_clean_filtered <- detP_clean[match(rownames(Betas_clean),rownames(detP_clea
keep_betas <- rowSums(detP_clean_filtered < 0.01) == ncol(Betas_clean)
Betas_clean_filtered <- Betas_clean[keep_betas,]
# ensure probes are in the same order in cleaned RGChannelSet and detP objects, then subset
detP_clean_filtered2 <- detP_clean[match(featureNames(RGSet_clean),rownames(detP_clean)),]
keep_rgset <- rowSums(detP_clean_filtered2 < 0.01) == ncol(RGSet_clean)
RGSet_clean_filtered <- RGSet_clean[keep_rgset,]
# ensure probes are in the same order in cleaned gRatioSet (RGSet with annotations) and detP objects, then subset
detP_clean_filtered2 <- detP_clean[match(featureNames(gRatioSet_clean),rownames(detP_clean)),]
keep_rgset <- rowSums(detP_clean_filtered < 0.01) == ncol(gRatioSet_clean)
RGSet_clean_filtered <- gRatioSet_clean[keep_rgset,]
```

```{r, info output for user failed probes, results='asis'}
Expand Down Expand Up @@ -146,8 +147,7 @@ summary_table_preprocessing <- bind_rows(summary_table_preprocessing, table_prep
```{r filter out probes affected by snps, include=FALSE}
if (user_choices$array_type == "v1") {
RGSet_clean_filtered <- dropLociWithSnps(RGSet_clean_filtered) # function requires a GenomicRatioSet
Betas_clean_filtered <- Betas_clean_filtered[rownames(Betas_clean_filtered) %in% featureNames(RGSet_clean_filtered),]
Betas_clean_filtered <- Betas_clean_filtered[rownames(Betas_clean_filtered) %in% featureNames(RGSet_clean_filtered),]
}
```

Expand Down Expand Up @@ -284,13 +284,12 @@ saveRDS(summary_table_preprocessing, paste0(user_choices$project_name, "/reports
```

```{r, save data, include=FALSE}
save(Betas_clean_filtered, file = paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered.Rdata"))
save(RGSet_clean_filtered, file = paste0(user_choices$project_name, "/processed_data/RGSet_clean_filtered.Rdata"))
Betas_clean_filtered <- getBeta(RGSet_clean_filtered) # beta values
save(Betas_clean_filtered, file = paste0(user_choices$project_name, "/processed_data/Betas_clean_filtered.Rdata"))
Ms_clean_filtered <- getM(RGSet_clean_filtered)
Ms_clean_filtered <- getM(RGSet_clean_filtered) # M values
save(Ms_clean_filtered, file = paste0(user_choices$project_name, "/processed_data/Ms_clean_filtered.Rdata"))
```

Expand Down
18 changes: 12 additions & 6 deletions scripts/7_normalization.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,22 @@ save(annotations_clean_filtered, file = paste0(user_choices$project_name, "/repo

```{r normalize probes, include=FALSE}
# Normalization of filtered data
# Matrix of probes removed in all prior filtering steps
exclusion_matrix <- Betas_clean[!rownames(Betas_clean) %in% rownames(Betas_clean_filtered), ]
# Exclude all probes filtered script 5 from steps before to improve preprocessing
RGSet_clean_filtered <- subsetByLoci(RGSet_clean, excludeLoci = rownames(exclusion_matrix))
RGSet_clean_filtered_quantile <- preprocessQuantile(RGSet_clean_filtered)
save(RGSet_clean_filtered_quantile, file = paste0(user_choices$project_name,
"/processed_data/RGSet_clean_filtered_quantile.Rdata")) # output: GenomicRatioSet
Betas_clean_filtered_quantile <- getBeta(RGSet_clean_filtered_quantile)
save(Betas_clean_filtered_quantile, file = paste0(user_choices$project_name,
"/processed_data/Betas_clean_filtered_quantile.Rdata"))
Mset_clean_filtered_qunatile <- getM(RGSet_clean_filtered_quantile)
save(Mset_clean_filtered_qunatile, file = paste0(user_choices$project_name,
"/processed_data/Mset_clean_filtered_qunatile.Rdata"))
Ms_clean_filtered_qunatile <- getM(RGSet_clean_filtered_quantile)
save(Ms_clean_filtered_qunatile, file = paste0(user_choices$project_name,
"/processed_data/Ms_clean_filtered_qunatile.Rdata"))
# further normalization with BMIQ:
probeType <- as.data.frame(annotations_clean_filtered[rownames(Betas_clean_filtered_quantile),c("Name","Type")])
probeType$probeType = ifelse(probeType$Type %in% "I", 1, 2)
Expand All @@ -99,9 +105,9 @@ save(RGSet_clean_unfiltered_quantile, file = paste0(user_choices$project_name,
Betas_clean_unfiltered_quantile <- getBeta(RGSet_clean_unfiltered_quantile)
save(Betas_clean_unfiltered_quantile, file = paste0(user_choices$project_name,
"/processed_data/Betas_clean_unfiltered_quantile.Rdata"))
Mset_clean_unfiltered_qunatile <- getM(RGSet_clean_unfiltered_quantile)
save(Mset_clean_unfiltered_qunatile, file = paste0(user_choices$project_name,
"/processed_data/Mset_clean_unfiltered_qunatile.Rdata"))
Ms_clean_unfiltered_qunatile <- getM(RGSet_clean_unfiltered_quantile)
save(Ms_clean_unfiltered_qunatile, file = paste0(user_choices$project_name,
"/processed_data/Ms_clean_unfiltered_qunatile.Rdata"))
# further normalization with BMIQ:
probeType <- as.data.frame(annotations_clean[rownames(Betas_clean_unfiltered_quantile),c("Name","Type")])
probeType$probeType = ifelse(probeType$Type %in% "I", 1, 2)
Expand Down

0 comments on commit e2e8455

Please sign in to comment.