diff --git a/merge.R b/merge.R deleted file mode 100644 index cfa5f03..0000000 --- a/merge.R +++ /dev/null @@ -1,60 +0,0 @@ -#!/home/jhamp/.conda/envs/tfbs/bin/Rscript -#library(data.table) -#args=commandArgs(TRUE) -#min=as.numeric(args[1]) -#max=as.numeric(args[2]) -#folder=args[3] -#splitted = fread(paste(folder, "/pass2Tr.bed", sep=''), header=FALSE) -#colnames(splitted) = c("chromosome", "start", "stop", "id", "score", "length", "maxpos", "bonus_info") -#p1 = fread(paste(folder, "/pass1Tr.bed", sep=''), header=TRUE) -#colnames(p1) = c("chromosome", "start", "stop", "id", "score", "length", "maxpos", "bonus_info") -#p1$maxpos = p1$start + p1$maxpos - -#splitted=rbind(splitted, p1) - -#splitted=splitted[which(splitted$stop - splitted$start >= min),] -#splitted=splitted[which(splitted$stop - splitted$start <= max),] -#splitted$id=make.unique(as.character(splitted$id)) -#splitted$length=splitted$stop - splitted$start - -#splitted=cbind(splitted, containsMaxpos=0) -#splitted$containsMaxpos[intersect(which(splitted$start <= splitted$maxpos), which(splitted$stop > splitted$maxpos))] = 1 -#splitted$maxpos = splitted$maxpos - splitted$start -#fwrite(splitted, paste(folder, "/merged.bed", sep=''), row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t') - -#!/bin/Rscript -library(data.table) -args=commandArgs(TRUE) -min=as.numeric(args[1]) -max=as.numeric(args[2]) -folder=args[3] - -splitted = fread(paste(folder, "/pass2Tr.bed", sep='')) -colnames(splitted) = c("chromosome", "start", "stop", "id", "score", "length", "maxpos", "info") -p1 = fread(paste(folder, "/pass1Tr.bed", sep='')) -colnames(p1) = c("chromosome", "start", "stop", "id", "score", "length", "maxpos", "info") - -p1$maxpos = p1$start + p1$maxpos - -splitted=rbind(splitted, p1) - -splitted=splitted[which(splitted$stop - splitted$start >= min),] -splitted=splitted[which(splitted$stop - splitted$start <= max),] -splitted$id=make.unique(as.character(splitted$id)) -splitted$length=splitted$stop - splitted$start - -splitted=cbind(splitted, containsMaxpos=0) -splitted$containsMaxpos[intersect(which(splitted$start <= splitted$maxpos), which(splitted$stop > splitted$maxpos))] = 1 -splitted$maxpos = splitted$maxpos - splitted$start -data.table::fwrite(splitted, paste(folder, "/merged.bed", sep=''), row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t') - -before = fread(args[4], header=FALSE) - -sumb=sum(before$V3-before$V2) -suma=sum(splitted$length) -difference = formatC(sumb/suma, digits=4) -loss = formatC(1 - suma/sumb, digits=2) -lengthb = formatC(mean(before$V3-before$V2), digits=4) -lengtha = formatC(mean(splitted$length), digits=4) -stats=data.frame(sum_nt_input=sumb, sum_nt_filtered=suma, factor=difference, loss=loss, mean_length_input=lengthb, mean_length_filtered=lengtha) -write.table(stats, "./FilterMotifs.stats", row.names=FALSE, quote=FALSE, sep='\t')