Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
expr.dat
factors.expr <- factors
for (expr.dat in expres.list[1:2,1]){#
gene.expr <- read.delim(file.path("expression/H1-hESC",expr.dat),header=TRUE,as.is=TRUE)#
# define the gene id without the numbers after . to be matched with the matrix2gene ids#
gene.expr$gene_id_simpl <- gsub("\\..*","",gene.expr$gene_id)#
# remove genes with weird ENSEMBL IDs#
#gene.expr <- gene.expr[grep("^ENSG",gene.expr$gene_id),]#
factors.expr <- merge(x=factors, y=gene.expr[,c("gene_id_simpl","TPM")], by.x="ensembl_id", by.y="gene_id_simpl",all.x=TRUE,suffixes=c("",sub(".tsv","",expr.dat)))#
}
head(factors.expr)
expr.dat=expres.list[5,1]
gene.expr <- read.delim(file.path("expression/H1-hESC",expr.dat),header=TRUE,as.is=TRUE)#
# define the gene id without the numbers after . to be matched with the matrix2gene ids#
gene.expr$gene_id_simpl <- gsub("\\..*","",gene.expr$gene_id)#
# remove genes with weird ENSEMBL IDs#
#gene.expr <- gene.expr[grep("^ENSG",gene.expr$gene_id),]#
factors.expr <- merge(x=factors.expr, y=gene.expr[,c("gene_id_simpl","TPM")], by.x="ensembl_id", by.y="gene_id_simpl",all.x=TRUE,suffixes=c("",sub(".tsv","",expr.dat)))
head(factors.expr)
dim(factors.expr)
dim(factors)
paste0(".",sub(".tsv","",expr.dat))
factors.expr <- factors#
for (expr.dat in expres.list[,1]){#
gene.expr <- read.delim(file.path("expression/H1-hESC",expr.dat),header=TRUE,as.is=TRUE)#
# define the gene id without the numbers after . to be matched with the matrix2gene ids#
gene.expr$gene_id_simpl <- gsub("\\..*","",gene.expr$gene_id)#
# remove genes with weird ENSEMBL IDs#
#gene.expr <- gene.expr[grep("^ENSG",gene.expr$gene_id),]#
factors.expr <- merge(x=factors.expr, y=gene.expr[,c("gene_id_simpl","TPM")], by.x="ensembl_id", by.y="gene_id_simpl", all.x=TRUE, suffixes=c("",paste0(".",sub(".tsv","",expr.dat))))#
}
dim(factors.expr)
head(factors.expr)
gene.expr=NULL
write.table("candidates/encode/ESC_expression_matrices_1e-10.dat",sep="\t",col.names=TRUE,row.names=FALSE,quote=FALSE)
write.table(factors.expr,"candidates/encode/ESC_expression_matrices_1e-10.dat",sep="\t",col.names=TRUE,row.names=FALSE,quote=FALSE)
head(cand)
rownames(factors.expr) <- factors.expr$x
f1 <- cand[i,TF1]
i=1
f1 <- cand[i,TF1]
f1 <- cand[i,"TF1"]
f2 <- cand[i,"TF2"]
match(f1,factors.expr$x)
f1
f1 <- as.character(cand[i,"TF1"])
match(f1,factors.expr$x)
f1
i=3
f1 <- as.character(cand[i,"TF1"])
match(f1,factors.expr$x)
factors.expr[301,]
id.f1 <- match(f1,factors.expr$x)#
id.f2 <- match(f2,factors.expr$x)
id.f1
id.f2
j1=id.f1
j2=id.f2
factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]
cor(t(factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))
plot(t(factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))
cor(t(factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))[1,2]
cand$cor <- c()
for (i in 1:nrow(cand)){#
f1 <- as.character(cand[i,"TF1"])#
f2 <- as.character(cand[i,"TF2"])#
id.f1 <- match(f1,factors.expr$x)#
id.f2 <- match(f2,factors.expr$x)#
if (!is.na(id.f1)&!is.na(id.f2)){#
cr <- c()#
for (j1 in 1:length(id.f1)){#
for (j2 in 1:length(id.f2)){#
cr <- c(cr,cor(t(factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))[1,2])#
}#
}#
cand[i,"cor"] <- mean(cr,na.rm=TRUE) #
}#
}
cand$cor
i=2
f1 <- as.character(cand[i,"TF1"])#
f2 <- as.character(cand[i,"TF2"])#
id.f1 <- match(f1,factors.expr$x)#
id.f2 <- match(f2,factors.expr$x)
f1
f2
id.f1
id.f2
(!is.na(id.f1)&!is.na(id.f2))
cr <- c()
cr <- c()#
for (j1 in 1:length(id.f1)){#
for (j2 in 1:length(id.f2)){#
cr <- c(cr,cor(t(factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))[1,2])#
print(cr)#
}#
}
cor(t(factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))[1,2]
cor(t(factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))
factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]
j1
cr <- c()#
for (j1 in (id.f1)){#
for (j2 in (id.f2)){#
cr <- c(cr,cor(t(factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))[1,2])#
print(cr)#
}#
}
cand$cor <- c()#
for (i in 1:nrow(cand)){#
f1 <- as.character(cand[i,"TF1"])#
f2 <- as.character(cand[i,"TF2"])#
id.f1 <- match(f1,factors.expr$x)#
id.f2 <- match(f2,factors.expr$x)#
if (!is.na(id.f1)&!is.na(id.f2)){#
cr <- c()#
for (j1 in (id.f1)){#
for (j2 in (id.f2)){#
cr <- c(cr,cor(t(factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))[1,2])#
print(cr)#
}#
}#
cand[i,"cor"] <- mean(cr,na.rm=TRUE) #
}#
}
warnings()
cand$cor
i=127
f1 <- as.character(cand[i,"TF1"])#
f2 <- as.character(cand[i,"TF2"])#
id.f1 <- match(f1,factors.expr$x)#
id.f2 <- match(f2,factors.expr$x)
f1
f2
id.f1
id.f2
cr <- c()
j1=id.f1
j2=id.f2
factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]
hist(cand$cor)
cand[cand$cor<(-0.5),]
subset(cand,(!is.na(cand$cor)&(cand$cor<(-0.5))]
subset(cand,(!is.na(cand$cor)&(cand$cor<(-0.5))
)
)
i=8
f1 <- as.character(cand[i,"TF1"])#
f2 <- as.character(cand[i,"TF2"])#
id.f1 <- match(f1,factors.expr$x)#
id.f2 <- match(f2,factors.expr$x)
f1
f2
id.f1
id.f2
j1=id.f1
j2=id.f2
factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]
plot(t(factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]),pch=16)
factors.expr[c(j1,j2)]
factors.expr[c(j1,j2),]
i=537
f1 <- as.character(cand[i,"TF1"])#
f2 <- as.character(cand[i,"TF2"])#
id.f1 <- match(f1,factors.expr$x)#
id.f2 <- match(f2,factors.expr$x)
id.f1
id.f2
j1=id.f1
j2=id.f2
factors.expr[c(j1,j2),
]
plot(t(factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))
plot(t(log2(factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))])))
j1
plot(t(log2(factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))])), pch=16, xlab=factors.expr[j1,"factor_name"],ylab=factors.expr[j2,"factor_name"])
1+factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]
log2(1+factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))])
sel.dat <- t(log2(1+factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))#
crs <- c(crs,cor(sel.dat))#
plot(sel.dat, pch=16, xlab=factors.expr[j1,"factor_name"], ylab=factors.expr[j2,"factor_name"], main="tis")#
text(x=mean(sel.dat[,1]),y=max(sel.dat[,2]-0.1),round(cr,3))
crs <- c()
sel.dat <- t(log2(1+factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))#
crs <- c(crs,cor(sel.dat)[1,2])
plot(sel.dat, pch=16, xlab=factors.expr[j1,"factor_name"], ylab=factors.expr[j2,"factor_name"], main="tis")#
text(x=mean(sel.dat[,1]),y=max(sel.dat[,2]-0.1),round(cor(sel.dat)[1,2],3))
plot(sel.dat, pch=16, xlab=factors.expr[j1,"factor_name"], ylab=factors.expr[j2,"factor_name"], main="tis")#
text(x=mean(sel.dat[,1]),y=max(sel.dat[,2]-0.1), paste("cor =",round(cor(sel.dat)[1,2],3)))
plot(sel.dat, pch=16, col="ryalblue", xlab=factors.expr[j1,"factor_name"], ylab=factors.expr[j2,"factor_name"], main="tis")#
text(x=mean(sel.dat[,1]),y=max(sel.dat[,2]-0.1), paste("cor =",round(cor(sel.dat)[1,2],3)))
plot(sel.dat, pch=16, col="royalblue", xlab=factors.expr[j1,"factor_name"], ylab=factors.expr[j2,"factor_name"], main="tis")#
text(x=mean(sel.dat[,1]),y=max(sel.dat[,2]-0.1), paste("cor =",round(cor(sel.dat)[1,2],3)))
paste0(tis,factors.expr[c(j1,j2),"factor_name"] )
tis="ESC"
paste(factors.expr[c(j1,j2),"factor_name"],sep=" and " )
plot(sel.dat, pch=16, col="royalblue", xlab=factors.expr[j1,"factor_name"], ylab=factors.expr[j2,"factor_name"], main=paste(factors.expr[c(j1,j2),"factor_name"]),sub=tis)#
text(x=mean(sel.dat[,1]),y=max(sel.dat[,2]-0.1), paste("cor =",round(cor(sel.dat)[1,2],3)))
paste0("candidates/encode/expression/tis_",factors.expr[j1,"factor_name"],"_",factors.expr[j2,"factor_name"],".pdf")
paste0("candidates/encode/expression/", tis, "_", factors.expr[j1,"factor_name"], "_", factors.expr[j2,"factor_name"], ".pdf")
pdf(paste0("candidates/encode/expression/", tis, "_", factors.expr[j1,"factor_name"], "_", factors.expr[j2,"factor_name"], ".pdf"))#
plot(sel.dat, pch=16, col="royalblue", xlab=factors.expr[j1,"factor_name"], ylab=factors.expr[j2,"factor_name"], main=paste(factors.expr[c(j1,j2),"factor_name"]),sub=tis)#
text(x=mean(sel.dat[,1]),y=max(sel.dat[,2]-0.1), paste("cor =",round(cor(sel.dat)[1,2],3))) #
dev.off()
cand$cor <- c()#
for (i in 1:nrow(cand)){#
f1 <- as.character(cand[i,"TF1"])#
f2 <- as.character(cand[i,"TF2"])#
id.f1 <- match(f1,factors.expr$x)#
id.f2 <- match(f2,factors.expr$x)#
if (!is.na(id.f1)&!is.na(id.f2)){#
crs <- c()#
for (j1 in (id.f1)){#
for (j2 in (id.f2)){#
sel.dat <- t(log2(1+factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))#
crs <- c(crs,cor(sel.dat)[1,2])#
# pdf(paste0("candidates/encode/expression/", tis, "_", factors.expr[j1,"factor_name"], "_", factors.expr[j2,"factor_name"], ".pdf"))#
# plot(sel.dat, pch=16, col="royalblue", xlab=factors.expr[j1,"factor_name"], ylab=factors.expr[j2,"factor_name"], main=paste(factors.expr[c(j1,j2),"factor_name"]),sub=tis)#
# text(x=mean(sel.dat[,1]),y=max(sel.dat[,2]-0.1), paste("cor =",round(cor(sel.dat)[1,2],3))) #
# dev.off() #
}#
}#
cand[i,"cor"] <- mean(crs,na.rm=TRUE) #
}#
}
cand$cor
write.csv(cand,"candidates/encode/ESC_matrices_1e-10_expr.csv",row.names=FALSE,quote=FALSE)
subset(cand, !is.na(cand$cor)&(cand$cor<-0.2))
subset(cand, !is.na(cand$cor)&(cand$cor<(-0.2)))
dim(matrix2gene)
dim(factors)
head(matrix2gene)
ids <- sample(1:nrow(matrix2gene),200)#
random.factors <- matrix2gene[ids,]
factors.expr <- random.factors
for (expr.dat in expres.list[,1]){#
gene.expr <- read.delim(file.path("expression/H1-hESC",expr.dat),header=TRUE,as.is=TRUE)#
# define the gene id without the numbers after . to be matched with the matrix2gene ids#
gene.expr$gene_id_simpl <- gsub("\\..*","",gene.expr$gene_id)#
# remove genes with weird ENSEMBL IDs#
#gene.expr <- gene.expr[grep("^ENSG",gene.expr$gene_id),]#
factors.expr <- merge(x=factors.expr, y=gene.expr[,c("gene_id_simpl","TPM")], by.x="ensembl_id", by.y="gene_id_simpl", all.x=TRUE, suffixes=c("",paste0(".",sub(".tsv","",expr.dat))))#
}
write.table(factors.expr,"candidates/encode/ESC_expression_randommatrices.dat",sep="\t",col.names=TRUE,row.names=FALSE,quote=FALSE)
head(factors.expr)
j1=1
j2=3
sel.dat <- t(log2(1+factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))
cr <- cor(sel.dat)[1,2]
data.frame(factors.expr[j1,c("matrix_id", "factor_name")],factors.expr[j2,c("matrix_id", "factor_name")],cr)
random.cors <- data.frame()
random.cors <- rbind(random.cors,data.frame(factors.expr[j1,c("matrix_id", "factor_name")],factors.expr[j2,c("matrix_id", "factor_name")],cr))
random.cors
random.cors <- data.frame()#
for (j1 in 1:(nrow(factors.expr)-1)){#
for (j2 in (j1+1):nrow(factors.expr)){ #
sel.dat <- t(log2(1+factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))#
cr <- cor(sel.dat)[1,2]#
random.cors <- rbind(random.cors,data.frame(factors.expr[j1,c("matrix_id", "factor_name")],factors.expr[j2,c("matrix_id", "factor_name")],cr)) #
}#
}
random.cors
setwd("/Volumes/AJAS_DATA/Documents/TF_analysis/")
write.csv(random.cors,"candidates/encode/random_correlations_expr.csv", row.names=FALSE, quote=FALSE))
write.csv(random.cors,"candidates/encode/random_correlations_expr.csv", row.names=FALSE, quote=FALSE)
head(random.cors)
head(cand)
colnames(random.cor)<-c("TF1","TF.name1","TF2","TF.name2","cor")
colnames(random.cors)<-c("TF1","TF.name1","TF2","TF.name2","cor")
write.csv(random.cors,"candidates/encode/random_correlations_expr.csv", row.names=FALSE, quote=FALSE)
dat <- rbind(cand[,c("TF1","TF.name1","TF2","TF.name2","cor")],random.cors)
dat$type <- c(rep(tis,nrow(cand)),rep("random",nrow(random.cors)))
dat$type <- as.factor(dat$type)
ggplot(dat, aes(x=cor, color=type, fill=type)) + #
geom_histogram(aes(y=..density..), alpha=0.5, #
position="identity")+#
geom_density(alpha=.2)
library(ggplot2)
ggplot(dat, aes(x=cor, color=type, fill=type)) + #
geom_histogram(aes(y=..density..), alpha=0.5, #
position="identity")+#
geom_density(alpha=.2)
dim(dat)
nrow(cand)
nrow(random.cors)
200*199/2
ggplot(dat, aes(x=cor, color=type, fill=type)) +
geom_density(alpha=.2)
factors.expr
dim(factors)
head(factors)
head(factors.expr)
gene.expr<-NULL
factors.expr <- read.delim("candidates/encode/ESC_expression_matrices_1e-10.dat", sep="\t", header=TRUE,row.names=FALSE,as.is=TRUE)
factors.expr <- read.delim("candidates/encode/ESC_expression_matrices_1e-10.dat", sep="\t", header=TRUE,as.is=TRUE)
head(factors.expr)
dim(factors.expr)
random.cors <- data.frame()#
for (j1 in 1:(nrow(factors.expr)-1)){#
for (j2 in (j1+1):nrow(factors.expr)){ #
sel.dat <- t(log2(1+factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))#
cr <- cor(sel.dat)[1,2]#
random.cors <- rbind(random.cors,data.frame(factors.expr[j1,c("matrix_id", "factor_name")],factors.expr[j2,c("matrix_id", "factor_name")],cr)) #
}#
} #
#
colnames(random.cors)<-c("TF1","TF.name1","TF2","TF.name2","cor")#
write.csv(random.cors,"candidates/encode/random_correlations_expr.csv", row.names=FALSE, quote=FALSE)
random.cors <- data.frame()#
for (j1 in 1:(nrow(factors.expr)-1)){#
for (j2 in (j1+1):nrow(factors.expr)){ #
sel.dat <- t(log2(1+factors.expr[c(j1,j2),grep("^TPM",colnames(factors.expr))]))#
cr <- cor(sel.dat)[1,2]#
random.cors <- rbind(random.cors,data.frame(factors.expr[j1,c("x", "factor_name")],factors.expr[j2,c("x", "factor_name")],cr)) #
}#
} #
#
colnames(random.cors)<-c("TF1","TF.name1","TF2","TF.name2","cor")#
write.csv(random.cors,"candidates/encode/random_correlations_expr.csv", row.names=FALSE, quote=FALSE)
subset(cand, !is.na(cand$cor)&(cand$cor<(-0.2)))
subset(cand, !is.na(cand$cor)&(cand$cor<(-0.5)))
mat1 ="OCT4_01" mat2="FOXD3_01"
mat1 ="OCT4_01"
mat2="FOXD3_01"
dat <- rbind(cand[,c("TF1","TF.name1","TF2","TF.name2","cor")],random.cors)#
dat$type <- c(rep(tis,nrow(cand)),rep("random",nrow(random.cors)))#
dat$type <- as.factor(dat$type)
ggplot(dat, aes(x=cor, color=type, fill=type)) + #
geom_histogram(aes(y=..density..), alpha=0.5, #
position="identity")+#
geom_density(alpha=.2)
ggplot(dat, aes(x=cor, color=type, fill=type)) + #
#geom_histogram(aes(y=..density..), alpha=0.5, position="identity")+#
geom_density(alpha=.2)
ggplot(dat, aes(x=cor, fill=type)) + #
#geom_histogram(aes(y=..density..), alpha=0.5, position="identity")+#
geom_density(alpha=.2)
ggplot(dat, aes(x=cor, color=type, fill=type)) + #
#geom_histogram(aes(y=..density..), alpha=0.5, position="identity")+#
geom_density(alpha=.2) +#
ggtitle("Gene expression correlations between TF pairs") +#
theme_light()
dat$type <- c(rep(paste0(tis,"-pairs"),nrow(cand)),rep("all pairs",nrow(random.cors)))
dat$type <- as.factor(dat$type)
ggplot(dat, aes(x=cor, color=type, fill=type)) + #
#geom_histogram(aes(y=..density..), alpha=0.5, position="identity")+#
geom_density(alpha=.2) +#
ggtitle("Gene expression correlations between TF pairs") +#
theme_light()
gp <- ggplot(dat, aes(x=cor, color=type, fill=type)) + #
#geom_histogram(aes(y=..density..), alpha=0.5, position="identity")+#
geom_density(alpha=.2) +#
ggtitle("Gene expression correlations between TF pairs") +#
theme_light()
pdf("candidates/encode/ESC_geneexpr_correlation_hist.pdf")#
plot(gp)#
dev.off()
setwd("~/Documents/GitHub/coTRaCTE/")
source("scripts/DHS.functions.github.R")
if (!require(devtools)) install.packages("devtools",repos="http://cran.rstudio.com/", quiet = TRUE)#
library(devtools, quietly = TRUE)#
#
#if (!require(tRap)) install_github("matthuska/tRap", quiet = TRUE)#
#library(tRap, quietly = TRUE) # TRAP library for counting binding affinities#
#
if (!require(gplots)) install.packages("gplots",repos="http://cran.rstudio.com/", quiet = TRUE)#
library(gplots, quietly = TRUE)#
#
if (!require(optparse)) install.packages("optparse", repos = "http://cran.rstudio.com/", quiet = TRUE)#
library(optparse, quietly = TRUE)
option_list = list(#
make_option(c("-l", "--lof.matrices"), type="character", default= "data/list_matrices.dat", #
help = "list of all matrices for which the enrichment should be calculated", metavar="character"),#
make_option(c("-a", "--affinity.dir"), type="character", default="data/affinity", #
help="directory where the affinity files (for each matrix separately) are saved, each tissue in separate directory"),#
make_option(c("-t", "--tissues"), type="character", default = "data/cell_types.dat", #
help="textfile with all tissues analyzed", metavar="character"),#
make_option(c("-k", "--top.ranked"), type="integer", default=1000, #
help="top ranked DHSs considered to be bound by the TF"),#
make_option(c("-n", "--top.dhs"), type="integer", default=5000, #
help="top most cell-type specific CTS-DHSs and global DHSs considered"),#
make_option(c("-o", "--out.dir"), type="character", default="results/tf.pairs/", #
help="directory where to save the results ")#
); #
#
opt_parser = OptionParser(option_list=option_list);#
opt = parse_args(opt_parser);
opt$tissues
tissues <- read.delim(opt$tissues,as.is=TRUE,header=FALSE)#
tissues <- tissues[,1]#
tissues <- tissues[tissues!="global"] # in case global in the tissue list, remove it from calculations
matrices.names <- read.delim(opt$lof.matrices, header=TRUE, as.is=TRUE)#
matrices <- matrices.names[,1]
matrices
grep(mat1,matrices,value=TRUE)
mat2 <- grep(mat2,matrices,value=TRUE)
mat2
mat1 <- grep(mat1,matrices,value=TRUE)
k <- opt$top.ranked#
n <- opt$top.dhs
k
n
mat1 ="OCT4_01" #
mat2="FOXD3_01"
matrix1 <- grep(mat1,matrices,value=TRUE)#
matrix2 <- grep(mat2,matrices,value=TRUE)
aff.dir=opt$affinity.dir
aff1 <- select.dhs(n=n, tis=tis, aff.dir=aff.dir, matrix=matrix1)
matrix1 ="OCT4_01" #
matrix2="FOXD3_01"
aff1 <- select.dhs(n=n, tis=tis, aff.dir=aff.dir, matrix=matrix1)
aff2 <- select.dhs(n=n, tis=tis, aff.dir=aff.dir, matrix=matrix2)
head(aff1)
aff.merged = merge(aff1,aff2,by="dhs",suffixes=c("1","2"),sort=FALSE,all=TRUE)
head(aff.merged)
aff.sel <- subset(aff.merged, (aff.merged$rank1<=k)&(aff.merged$rank2<=k))
aff.sel
dim(aff.sel)
aff.sel <- subset(aff.sel, aff.sel$tissue1==tis)
dim(aff.sel)
bed1 <- strsplit(aff.sel$dhs,split=":")
head(bed1)
unlist(bed1)
seq(1,nrow(bed1),by=2)
bed1 <- unlist(bed1)
bed1[seq(1,length(bed1),by=2)]
bed2 <- strsplit(bed1[seq(2,length(bed1),by=2)],split="-")
bed2
bed2 <- strsplit(bed1[seq(2,length(bed1),by=2)],split="-")#
bed2 <- unlist(bed2)
bed.file <- data.frame(chr=bed1[seq(1,length(bed1),by=2)], start=as.numeric(bed2[seq(1,length(bed2),by=2)]), as.numeric(bed2[seq(2,length(bed2),by=2)]))
head(bed.file)
bed.file <- data.frame(chr=bed1[seq(1,length(bed1),by=2)], start=as.numeric(bed2[seq(1,length(bed2),by=2)]),end= as.numeric(bed2[seq(2,length(bed2),by=2)]))
head(bed.file)
write.table(bed.file,"expression/H1-hESC/OCT4_FOXD3_shareddhs_ESC.bed", sep="\t", quote=FALSE, col.names=FALSE, row.names=FALSE)
write.table(bed.file,"/Volumes/AJAS_DATA/TF_analysis/expression/H1-hESC/OCT4_FOXD3_shareddhs_ESC.bed", sep="\t", quote=FALSE, col.names=FALSE, row.names=FALSE)
write.table(bed.file,"/Volumes/AJAS_DATA/Documents/TF_analysis/expression/H1-hESC/OCT4_FOXD3_shareddhs_ESC.bed", sep="\t", quote=FALSE, col.names=FALSE, row.names=FALSE)
bed.file <- data.frame(chr=bed1[seq(1,length(bed1),by=2)], start=as.numeric(bed2[seq(1,length(bed2),by=2)])-1,end= as.numeric(bed2[seq(2,length(bed2),by=2)]))
head(bed.file)
write.table(bed.file,"/Volumes/AJAS_DATA/Documents/TF_analysis/expression/H1-hESC/OCT4_FOXD3_shareddhs_ESC.bed", sep="\t", quote=FALSE, col.names=FALSE, row.names=FALSE)
all.int = read.csv(paste("/Volumes/AJAS_DATA/Documents/TF_analysis/candidates/encode/candidates_all_",l1,"_",l2,"tresh_",tr.const,".csv",sep=""),as.is=TRUE)
dim(all.int)
head(all.int)
general.int <- all.int[all.int$freq>30,]
vyber.list.names
write.csv(general.int,file=paste0("/Volumes/AJAS_DATA/Documents/TF_analysis/candidates/encode/general.candidates_",l1,"_",l2,"tresh_",tr.const,".csv",sep=""),quote=FALSE,col.names=TRUE,row.names=FALSE)
head(matrix2gene)
factors <- unique(c(general.int$TF.name1,general.int$TF.name2))
length(factors)
factors
gen.factors <- unique(c(general.int$TF.name1,general.int$TF.name2))#
length(gen.factors)
gen.factors <- merge(gen.factors,matrix2gene,by.x=1,by.y="factor_name")
dim(gen.factors)
gen.factors
tf1 <- general.int$TF.name1[i]#
tf2 <- general.int$TF.name2[i]#
unique(gen.factors[gen.factors$x==tf1,"ensembl_id"])#
unique(gen.factors[gen.factors$x==tf2,"ensembl_id"])#
id.f1 <- match(tf1,gen.factors$x)#
id.f2 <- match(tf2,gen.factors$x)