diff --git a/bin/bed_to_fasta.R b/bin/bed_to_fasta.R new file mode 100644 index 0000000..f63b06f --- /dev/null +++ b/bin/bed_to_fasta.R @@ -0,0 +1,25 @@ +bed <- data.table::data.table(1:10,1:10,c("AAAAAAAAA","CTGAGA","CCCTAGC","GC","AA","ACGTACGTGTCA","GGGCCGCTA","GCA","TTTTTGCA","AAAATCGACGT"),c(1,2,3,1,1,1,2,2,3,3)) + +#!/usr/bin/env Rscript + +# Splitting BED-files depending on their cluster. +# The Sequences of each cluster are writen as an FASTA-file. +# @parameter bedInput BED-file with sequences and cluster-id as column +# @parameter out output directory +# @parameter prefix prefix for filenames + +args = commandArgs(trailingOnly = TRUE) + + +bedInput <- args[1] +out <- args[2] # "G://Rene.Wiegandt/10_Master/" +prefix <- args[3] # "Fasta" + +bed <- data.table::fread(bedInput, header = FALSE, sep = "\t") + +clusters <- split(bed, bed$V4, sorted = TRUE, flatten = FALSE) # <---- Spalte mit Cluster +discard <- lapply(1:length(clusters), function(i){ + sequences <- as.list(as.data.frame(clusters[i])[[3]]) # <---- Splate mit Sequenz + outfile <- paste0(out,prefix,"_cluster_",i) + seqinr::write.fasta(sequences = sequences, names = as.data.frame(clusters[i])[[2]], file.out = outfile, as.string = TRUE) # <---- Spalte mit Name +})