using_rnaseq.R

library(data.table)
library(stringr)
library(stringi)

genes_exons_correlation <- "./genes_exons_correlation.txt"

genes_exons_table <- fread(genes_exons_correlation, header = TRUE, sep = "\t")
View(genes_exons_table)

rnaseq_file <- "./bigmatrix_norm.txt"

#use read.delim as read.table produces warning message EOF within quoted string
rnaseq_table <- fread(rnaseq_file, header = TRUE, sep = "\t", fill = TRUE)
View(rnaseq_table)

tobias_results_file <- "./bindetect_results.txt"
tobias_results <- fread(tobias_results_file, header = TRUE, sep = "\t", fill = TRUE)
View(tobias_results)

#sort the table by the column mDuxNeg_mDuxPos_change
tobias_results_sorted <- tobias_results[order(tobias_results$mDuxNeg_mDuxPos_change), ]
View(tobias_results_sorted)

#testing around
i <- grep("CEBPD_MA0836.1", tobias_results_sorted$TF_name) #the number of the row
tobias_results_sorted[i,] #print this row

sample_size = 10
#take top 10 from the tobias results and save only the gene names
top_10 <- tobias_results_sorted[1:sample_size, ]$TF_name
View(top_10)
c_top_10 <- unlist(top_10, use.names = FALSE)

#make a subset of the same length as top_10, with random samples, replace = False excludes using one gene twice
random_10 <- tobias_results_sorted[sample(sample_size + 1:nrow(tobias_results_sorted), sample_size, replace=FALSE), ]$TF_name
View(random_10)
c_random_10 <- unlist(random_10, use.names = FALSE)

#write the top_10 and the random_10 to the txt files
file_top_10 <- file("top_10.txt")
writeLines(c_top_10, con = file_top_10, sep = "\n")
close(file_top_10)

file_random_10 <- file("random_10.txt")
writeLines(c_random_10, con = file_random_10, sep = "\n")
close(file_random_10)


#apply(genes_exons_table, 1, function(r) any(r %in% c_top_10)) #bebe

#look_for <- grepl("Cphx", genes_exons_table$gene_name)
#look_for

#genes_exons_table[look_for]

#look_for2 <- stri_subset(genes_exons_table$gene_name, regex = c_top_10)

#look_for3 <- apply(outer(genes_exons_table$gene_name, c_top_10, stri_detect), 1, all)
#genes_exons_table[look_for3]

#stri_detect(str = genes_exons_table$gene_name, regex = c_top_10)

small_list <- lapply(c_top_10, function (x){
  as.data.table(stri_detect(str = genes_exons_table$gene_name, regex = x))
}) #liste von datatables

?do.call
new_table <- do.call(cbind, small_list) #containing rows with true/false

new_vector <- apply(new_table, MARGIN = 1, any) #margin 1 über die zeilen

any(new_vector)


small_list2 <- lapply(c_top_10, function (y){
  as.data.table(grepl(x = genes_exons_table$gene_name, pattern = y, ignore.case = TRUE, perl = TRUE))
})

new_table2 <- do.call(cbind, small_list2) #containing rows with true/false

new_vector2 <- apply(new_table2, MARGIN = 1, any) #margin 1 über die zeilen

any(new_vector2) #false
any(grepl(x = genes_exons_table$gene_name, pattern = "CPHX", ignore.case = TRUE, perl = TRUE)) #true

#------------------
	library(data.table)
	library(stringr)
	library(stringi)

	genes_exons_correlation <- "./genes_exons_correlation.txt"

	genes_exons_table <- fread(genes_exons_correlation, header = TRUE, sep = "\t")
	View(genes_exons_table)

	rnaseq_file <- "./bigmatrix_norm.txt"

	#use read.delim as read.table produces warning message EOF within quoted string
	rnaseq_table <- fread(rnaseq_file, header = TRUE, sep = "\t", fill = TRUE)
	View(rnaseq_table)

	tobias_results_file <- "./bindetect_results.txt"
	tobias_results <- fread(tobias_results_file, header = TRUE, sep = "\t", fill = TRUE)
	View(tobias_results)

	#sort the table by the column mDuxNeg_mDuxPos_change
	tobias_results_sorted <- tobias_results[order(tobias_results$mDuxNeg_mDuxPos_change), ]
	View(tobias_results_sorted)

	#testing around
	i <- grep("CEBPD_MA0836.1", tobias_results_sorted$TF_name) #the number of the row
	tobias_results_sorted[i,] #print this row

	sample_size = 10
	#take top 10 from the tobias results and save only the gene names
	top_10 <- tobias_results_sorted[1:sample_size, ]$TF_name
	View(top_10)
	c_top_10 <- unlist(top_10, use.names = FALSE)

	#make a subset of the same length as top_10, with random samples, replace = False excludes using one gene twice
	random_10 <- tobias_results_sorted[sample(sample_size + 1:nrow(tobias_results_sorted), sample_size, replace=FALSE), ]$TF_name
	View(random_10)
	c_random_10 <- unlist(random_10, use.names = FALSE)

	#write the top_10 and the random_10 to the txt files
	file_top_10 <- file("top_10.txt")
	writeLines(c_top_10, con = file_top_10, sep = "\n")
	close(file_top_10)

	file_random_10 <- file("random_10.txt")
	writeLines(c_random_10, con = file_random_10, sep = "\n")
	close(file_random_10)



	#apply(genes_exons_table, 1, function(r) any(r %in% c_top_10)) #bebe

	#look_for <- grepl("Cphx", genes_exons_table$gene_name)
	#look_for

	#genes_exons_table[look_for]

	#look_for2 <- stri_subset(genes_exons_table$gene_name, regex = c_top_10)

	#look_for3 <- apply(outer(genes_exons_table$gene_name, c_top_10, stri_detect), 1, all)
	#genes_exons_table[look_for3]

	#stri_detect(str = genes_exons_table$gene_name, regex = c_top_10)

	small_list <- lapply(c_top_10, function (x){
	as.data.table(stri_detect(str = genes_exons_table$gene_name, regex = x))
	}) #liste von datatables

	?do.call
	new_table <- do.call(cbind, small_list) #containing rows with true/false

	new_vector <- apply(new_table, MARGIN = 1, any) #margin 1 über die zeilen

	any(new_vector)



	small_list2 <- lapply(c_top_10, function (y){
	as.data.table(grepl(x = genes_exons_table$gene_name, pattern = y, ignore.case = TRUE, perl = TRUE))
	})

	new_table2 <- do.call(cbind, small_list2) #containing rows with true/false

	new_vector2 <- apply(new_table2, MARGIN = 1, any) #margin 1 über die zeilen

	any(new_vector2) #false
	any(grepl(x = genes_exons_table$gene_name, pattern = "CPHX", ignore.case = TRUE, perl = TRUE)) #true

	#------------------