Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
stabilizeCounts/stabilizecounts.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
116 lines (99 sloc)
4.38 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(Rcpp) | |
sourceCpp("~/software/R/stabilizeCounts/stabilizecounts.cpp") | |
#regions: genomic ranges file with the regions of interest | |
#if they are not multiples of binsize base pairs the last bin in each | |
#resulting signal will correspond to less than binsize base pairs. | |
#bampath: path to a bam file | |
#binsize: size of each bin in base pairs | |
#...: the options 'shift', 'paired.end' and 'paired.end.midpoint' are | |
#supported. To see what they do type ?bamProfile | |
#details: it splits calls to bamProfile into chromosomes | |
stabilizeCounts <- function(regions, bampath, binsize, ...){ | |
require(bamsignals) | |
require(GenomicRanges) | |
if (binsize < 1){ | |
stop("provide a binsize greater or equal to 1") | |
} else if (binsize > 1 && any((width(regions) %% binsize) != 0)){ | |
warning("some ranges' widths are not a multiple of the selected | |
binsize, some bins will correspond to less than binsize basepairs") | |
} | |
pu <- bamProfile(bampath=bampath, gr=regions, binsize=1, ...) | |
lapply(pu, stabilizeBins, binsize=binsize) | |
} | |
### example code | |
if (F){ | |
#install the bamsignals package | |
source("http://bioconductor.org/biocLite.R") | |
biocLite("bamsignals") | |
library(bamsignals) | |
#load a toy dataset | |
library(GenomicRanges) | |
bampath <- system.file("extdata", "randomBam.bam", package="bamsignals") | |
genes <- | |
get(load(system.file("extdata", "randomAnnot.Rdata", package="bamsignals"))) | |
stabCounts <- stabilizeCounts(genes, bampath, binsize=200) | |
# construct genome regions | |
require(BSgenome.Hsapiens.UCSC.hg19) | |
genome <- cbind(names(seqlengths(Hsapiens)), seqlengths(Hsapiens)) | |
genome <- genome[1:25,] | |
gr <- GRanges(genome[,1], IRanges(start = 1, end = as.numeric(genome[,2]))) | |
#compute the stabilized counts for bigger dataset | |
k562.bam <- "/project/epigenome/K562/H3K4me3.bam" | |
h1.bam <- "/project/epigenome/H1/H3K4me3.bam" | |
k562.counts <- bamProfile(bampath=k562.bam, gr=gr, 500, 20) | |
k562.stabCounts <- | |
unlist(stabilizeCounts(bampath=k562.bam, regions=gr, binsize=500, mapq=20)) | |
h1.counts <- bamProfile(bampath=h1.bam, gr=gr, 500, 20) | |
h1.stabCounts <- | |
unlist(stabilizeCounts(bampath=h1.bam, regions=gr,binsize=500, mapq=20)) | |
#plot something | |
reducedSmoothScatter <- function(x, y, quant=.99999, | |
colramp=colorRampPalette(c("white", "black")), | |
part=.1, x.min=NA, y.min=NA, ...) { | |
if ( length(x) != length(y) ) | |
return | |
part = sample( 1:length(x), length(x)*part) | |
x.sub = x[part] | |
y.sub = y[part] | |
selected = which(x.sub < quantile(x.sub, quant, na.rm=T) & | |
y.sub < quantile(y.sub, quant, na.rm=T) ) | |
if (is.na(x.min)) | |
x.min = min(x.sub[ selected ][ !is.infinite(x.sub[ selected ]) ],na.rm=T) | |
if (is.na(y.min)) | |
y.min = min(y.sub[ selected ][ !is.infinite(y.sub[ selected ]) ],na.rm=T) | |
xlim = c(x.min, max(x.sub[ selected ], na.rm=T)) | |
ylim = c(y.min, max(y.sub[ selected ], na.rm=T)) | |
smoothScatter(x.sub[ selected ], y.sub[ selected ], colramp=colramp, | |
xlim=xlim, ylim=ylim, ...) | |
} | |
png("StabilizeCounts_H1vsK562.png", width=1400, height=1400) | |
par(mfrow=c(3,3), oma=c(1,1,3,1), cex=1.2) | |
#1st row H1 | |
reducedSmoothScatter(unlist(h1.counts@signals), unlist(h1.stabCounts), | |
xlab="H1 tags", ylab="H1 stabilized") | |
abline(0,1) | |
plot(density(log10(unlist(h1.counts@signals) + 1), na.rm=T), | |
xlab="log10(H1 tags + 1)", main="H1") | |
plot(density(log10(h1.stabCounts + 1), na.rm=T), | |
xlab="log10(H1 stabilized + 1)", main="") | |
#2nd row K562 | |
reducedSmoothScatter(unlist(k562.counts@signals), k562.stabCounts, | |
xlab="K562 tags", ylab="K562 stabilized") | |
abline(0,1) | |
plot(density(log10(unlist(k562.counts@signals) + 1), na.rm=T), | |
xlab="log10(K562 tags + 1)", main="K562") | |
plot(density(log10(k562.stabCounts + 1), na.rm=T), | |
xlab="log10(K562 stabilized + 1)", main="") | |
#3rd row H1 vs K562 | |
frame() | |
reducedSmoothScatter(unlist(k562.counts@signals), unlist(h1.counts@signals), | |
xlab="K562 tags", ylab="H1 tags", | |
main="K562 tags vs H1 tags") | |
abline(0,1) | |
reducedSmoothScatter(unlist(k562.stabCounts), unlist(h1.stabCounts), | |
xlab="K562 stabilized", ylab="H1 stabilized", | |
main="K562 stabilized vs H1 stabilized") | |
abline(0,1) | |
title("H1 and K562 ENCODE Data stabilized, MAPQ >=20", outer=T) | |
dev.off() | |
} |