Skip to content

Commit

Permalink
latest R workshop material
Browse files Browse the repository at this point in the history
  • Loading branch information
FranziMe committed Oct 5, 2021
1 parent 078602f commit 7ec077f
Show file tree
Hide file tree
Showing 10 changed files with 1,068 additions and 5 deletions.
7 changes: 5 additions & 2 deletions R_workshop.2.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
title: "R Workshop -- Day 2"
author: "Franziska Metge"
output:
pdf_document: default
html_document: default
pdf_document: default
---


Expand Down Expand Up @@ -705,6 +705,7 @@ install.packages("pheatmap")
install.packages("RColorBrewer")
install.packages("PoiClaClu")
install.packages("ggbeeswarm")
install.packages("dbplyr")
library("magrittr")
library("dplyr")
Expand Down Expand Up @@ -735,6 +736,7 @@ BiocManager::install("Gviz")
BiocManager::install("sva")
BiocManager::install("RUVSeq")
BiocManager::install("fission")
BiocManager::install("tximeta")
library("airway")
library("GenomicFeatures")
Expand All @@ -746,12 +748,13 @@ library("vsn")
library("apeglm")
library("genefilter")
library("AnnotationDbi")
library("org.Hs.eg.,m db")
library("org.Hs.eg.db")
library("ReportingTools")
library("Gviz")
library("sva")
library("RUVSeq")
library("fission")
library("tximeta")
```


2 changes: 1 addition & 1 deletion R_workshop.Rmd
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
---
title: "R Workshop -- Introduction to R"
output:
pdf_document: default
html_document: default
pdf_document: default
fontsize: 8pt
---

Expand Down
4 changes: 2 additions & 2 deletions R_workshop.html
Original file line number Diff line number Diff line change
Expand Up @@ -692,11 +692,11 @@ <h3>Repeat</h3>
<h3>Random numbers</h3>
<pre class="r"><code># random numbers from a normal distribution
rnorm(n = 5, mean = 5, sd = 1)
## [1] 7.494026 4.626585 6.750520 4.668170 4.534657
## [1] 3.073082 5.903837 5.941621 5.891309 4.580985

# random numbers from a uniform distribution
runif(n = 5, min = 1, max = 5)
## [1] 1.631906 3.175063 1.543790 2.199235 2.108228</code></pre>
## [1] 4.910746 3.200908 2.756291 4.979401 3.739086</code></pre>
<p>Other distributions in R include:</p>
<ul>
<li>Normal: <code>rnorm(n, mean, sd)</code></li>
Expand Down
291 changes: 291 additions & 0 deletions scripts/day2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
for(i in 1:nrow(Experiment_DEG)){
ttest = t.test(Experiment_DEG[i, 2:5], Experiment_DEG[i, 6:9])
Experiment_DEG[i, 'pvals'] = as.numeric(ttest$p.value)
}

sig_genes = subset(Experiment_DEG, pvals < 0.05)

head(sig_genes)
nrow(sig_genes)



Square <- function(x){

result = x * x

return(result)

}

exponent_2_and_3 <- function(x = 3){
sq = x*x
cb = x ^ 3

return(c(sq, cb))
}

ex23 = exponent_2_and_3(4)
ex23[2]



exponent_2_and_3 <- function(x = 3){
sq = x*x
cb = x ^ 3

return(list( square = sq, cube = cb))
}

ex23 = exponent_2_and_3(4)



my_log2fc <- function(x, y){

result = log2(y/x)

return(result)
}

my_log2fc(3, 6)


my_bigger <- function(x,y) {

if( x > y ){
return(x)
} else if ( x == y){
return( c(x,y))
} else if (x < y){
return(y)
}
}



y <- numeric(0)
y = append(y, 8)



myVolcanoPlot <- function(D, group1, group2, alpha ){
# Input:
# D > dataframe to be analyzed
# group1 > column indeces for group1
# group2 > column indeces for group2
# alpha > p.value cut off for significant genes

# this function calculates the pvalue using a t.test
# the group means and the log2fc and makes a plot

# calculate the pvalue for each row using apply
for(i in 1:nrow(D)){
# calculate p.value
D[i,'p.value'] = as.numeric(t.test(D[i,group1], D[i,group2])$p.value)

# calculate mean for group 1
D[i,'mean.g1'] = mean(as.numeric(D[i,group1]), na.rm = TRUE)

# calculate mean for group 2
D[i,'mean.g2'] = mean(as.numeric(D[i,group2]), na.rm = TRUE)

# calculate log2 fold change
D[i,'log2fc'] = log2(D[i,'mean.g2']/D[i,'mean.g1'])
}

D[, 'q.value'] = p.adjust(D[,'p.value'], method = 'fdr')
plot(D[,'log2fc'], -log10(D[,'q.value']), pch = 19,
xlab = 'log2fc', ylab = '-log10(q.value)', cex.axis = 1.3, cex.lab = 1.3,
col = ifelse(D[,'q.value'] <= alpha,
ifelse(D[, 'log2fc'] < 0, '#2166ac50', '#b2182b50'),
'#bdbdbd30')) # nested ifelse call to make three colors
# add legend
legend('bottomleft', c('n.s', 'sig down', 'sig up'), col = c('#bdbdbd30', '#2166ac50', '#b2182b50'), pch = 19, cex = 0.8, bty = 'n')

return(D)
}

head(Experiment_DEG)
AD = myVolcanoPlot(Experiment_DEG, 2:5, 6:9, 0.05)


# plotting
x = runif(100, 20, 80)
y = rnorm(100, mean = x, sd = 10)

plot(x, y, main = 'Simple Plot', xlab = 'WT', ylab = 'Treat1',
cex.main = 1, cex.axis = 1, cex.lab = 1, cex = 2, pch = '1',
col = '#bebaff50')


plot(x, y, main = 'Simple Plot', xlab = 'WT', ylab = 'Treat1',
cex.main = 2, cex.axis = 1.5, cex.lab = 1.5,
pch = ifelse(abs(x - mean(x)) > sd(x), 6, 17),
col = ifelse(y > x, 'firebrick2', 'dodgerblue2')
)

legend('topleft', legend = c('outside sd', 'inside sd'), pch = c(6,17), cex = .8)
legend('bottomright', legend = c('y > x', 'y < x'),
col = c('firebrick2', 'dodgerblue2'), pch = 19, bty = 'n',
cex = .8, title = 'color')


abline(a = 0 , b =1)


airway.D = read.delim('data/airway_gene_expression.tsv', header = T, as.is = T)
airway.I = read.delim('data/airway_sample_info.tsv', header = T, as.is = T)

RM = rowMeans(airway.D[, 2:9])

airway.expressed = subset(airway.D, RM > 1)

# calculate p.value
for(row in 1:nrow(airway.expressed)){
airway.expressed[row, 'pvalue'] = t.test(airway.expressed[row, c(2,4,6,8)],
airway.expressed[row, c(3,5,7,9)])$p.value

}

airway.expressed[ ,'mean.before'] = rowMeans(airway.expressed[airway.I[airway.I$Intervention == "before",]$SampleName])
airway.expressed[, 'mean.after'] = rowMeans(airway.expressed[airway.I[airway.I$Intervention == "after",]$SampleName])

# caclulate log2fc
airway.expressed[, 'log2fc'] = log2(airway.expressed[, 'mean.after']/airway.expressed[ ,'mean.before'])


# plot log2fc and -log10(pvalue)
plot(x = airway.expressed[, 'log2fc'], y = -log10(airway.expressed$pvalue), pch = 19,
col = ifelse(airway.expressed$pvalue <= 0.05, 'red', 'gray'))

abline(h = -log10(0.01), lty = 3 )
abline(v= 0, lwd = 3)

# plot Volcano plot
mean_groupA = rowMeans(Experiment_DEG[,2:5])
mean_groupB = rowMeans(Experiment_DEG[,6:9])
log2fc = log2(mean_groupB/mean_groupA)

plot(x = log2fc, y = -log10(Experiment_DEG$pvals),
col = ifelse(Experiment_DEG$pvals <= 0.05, 'red', 'gray'),
main = 'Volcano Plot', pch = 19, ylab = '-log10(pvalue)')
legend('bottomright', legend = c('significant', 'n.s.'), col = c('red', 'gray'), pch = 19)


# PCA
wine <- read.csv('data/wine.csv')


winePCA <- prcomp(scale(wine[, -1]))

summary(winePCA)
names(winePCA)


head(winePCA$x)
plot(x = winePCA$x[,1], y = winePCA$x[,'PC2'],
col = ifelse(wine$Cvs == 1, 'green', ifelse(wine$Cvs == 2, 'blue', 'red')),
pch = 19,
xlab = 'PC 1 [36%]',
ylab = paste('PC2 [', round(summary(winePCA)$importance[2,2] * 100, 1), '%]'))


# calculate the PCA for airway.expressed
pca = prcomp(t(airway.expressed[,2:9]))

plot(x = pca$x[, 'PC1'], y = pca$x[, 'PC2'],
col = ifelse(airway.I$Intervention == 'before', 'red', 'black'),
pch = rep(c(8, 11, 16, 18), each = 2))




x = rnorm(1000, mean = 20, sd = 80)

x.hist = hist(x, breaks = 20, freq = TRUE)

abline(v = 20)



hist(x, xlim = c(-350,350), ylim = c(0, 500) )


x = runif(100, 20, 80)
y = rnorm(100, x, 10)

hist.x = hist(y, xlim = c(0,100), col = 'gray')
hist(x, col = '#00FFFF50', add = T)

'#RRGGBBXX'


# heatmap

example_file <- system.file ("extra/TagSeqExample.tab", package="DESeq")
data <- read.delim(example_file, header=T, row.names="gene")

data_subset <- as.matrix(data[rowSums(data)>50000,])

pheatmap(data_subset)


# plot a pheatmap with our data
pheatmap(as.matrix(airway.expressed[1:300, 2:9]), scale = 'row')

# plot only significant genes
airway.significant = subset(airway.expressed, pvalue <= 0.05)


pheatmap(as.matrix(airway.significant[, 2:9]), scale = 'row')

# Venndiagrams
genes1 = paste('gene', seq(1, 30))
genes2 = paste('gene', seq(1, 50, 2))


venn.diagram(list(WT = genes1, group2 = genes2),
filename = 'venn1.tiff', col = 'gray', fill = c('blue', 'red'))

genes.overlap = calculate.overlap(list(WT = genes1, group2 = genes2))

M = matrix(c(12, 13, 11, 9, 5, 7 ), byrow = T, ncol = 3)

par(mar = c(10, 5, 4, 4))
barplot(colMeans(airway.D[, 2:9]), las = 2)

barplot(colMeans(airway.D[, 2:9]))

dev.off()


x = rnorm(100, 10, 2)
y = rnorm(100, 15, 2)
boxplot(list(x = x,y = y), names = c('wt', 'ko'), col = 'cornflowerblue')

x.and.y = c(x, y)


groups.x.and.y = c(rep('treatment 1', 50), rep('treatment 2', 80), rep( 'treatment 3', 70))
boxplot(x.and.y ~ groups.x.and.y)

pdf('boxplot.pdf', width = 20, height = 10)
boxplot(log2(airway.expressed[,2:9]), las = 2)
dev.off()

png('some_name.png')
boxplot(log2(airway.expressed[,2:9]), las = 2)
dev.off()



M = matrix(c(12, 13, 11, 9, 5, 7 ), byrow = T, ncol = 3)
bp = barplot(M, beside = T, main = 'Barplot, beside = T',
names.arg = c('A', 'B', 'C'), ylim = c(0, 15))


text(x = colMeans(bp), y = c(12, 13, 11), labels = c('**', '***', 'n.s'),
cex = 1.5, pos = 0)
mtext('Samples', 1, 3)
mtext('Samples', 2, 2)
42 changes: 42 additions & 0 deletions scripts/deseq2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
library("airway")
dir <- system.file("extdata", package="airway", mustWork=TRUE)

csvfile <- file.path(dir, "sample_table.csv")
coldata <- read.csv(csvfile, row.names=1, stringsAsFactors=FALSE)
coldata

coldata <- coldata[1:2,]
coldata$names <- coldata$Run
coldata$files <- file.path(dir, "quants", coldata$names, "quant.sf.gz")
file.exists(coldata$files)

install.packages("dbplyr")
BiocManager::install("tximeta")

library("tximeta")
se <- tximeta(coldata)

dim(se)


# skip first part and continue with part 2.5
library("airway")

data(gse)
assayNames(gse)
head(assay(gse), 3)
colData(gse)

# for section 3.1
BiocManager::install("DESeq2")
library(DESeq2)

# for section 4.2
install.packages('hexbin')

# for section 4.5
install.packages('glmpca')

# for section 6.5
BiocManager::install("IHW")

Loading

0 comments on commit 7ec077f

Please sign in to comment.