diff --git a/bin/bed_to_fasta.R b/bin/bed_to_fasta.R index f63b06f..6767ab5 100644 --- a/bin/bed_to_fasta.R +++ b/bin/bed_to_fasta.R @@ -1,25 +1,21 @@ -bed <- data.table::data.table(1:10,1:10,c("AAAAAAAAA","CTGAGA","CCCTAGC","GC","AA","ACGTACGTGTCA","GGGCCGCTA","GCA","TTTTTGCA","AAAATCGACGT"),c(1,2,3,1,1,1,2,2,3,3)) - #!/usr/bin/env Rscript # Splitting BED-files depending on their cluster. # The Sequences of each cluster are writen as an FASTA-file. -# @parameter bedInput BED-file with sequences and cluster-id as column -# @parameter out output directory +# @parameter bedInput BED-file with sequences and cluster-id as column"TEs # @parameter prefix prefix for filenames args = commandArgs(trailingOnly = TRUE) - bedInput <- args[1] -out <- args[2] # "G://Rene.Wiegandt/10_Master/" -prefix <- args[3] # "Fasta" +prefix <- args[2] # "Fasta" bed <- data.table::fread(bedInput, header = FALSE, sep = "\t") -clusters <- split(bed, bed$V4, sorted = TRUE, flatten = FALSE) # <---- Spalte mit Cluster +clusters <- split(bed, bed$V3, sorted = TRUE, flatten = FALSE) # <---- Spalte mit Cluster discard <- lapply(1:length(clusters), function(i){ - sequences <- as.list(as.data.frame(clusters[i])[[3]]) # <---- Splate mit Sequenz - outfile <- paste0(out,prefix,"_cluster_",i) - seqinr::write.fasta(sequences = sequences, names = as.data.frame(clusters[i])[[2]], file.out = outfile, as.string = TRUE) # <---- Spalte mit Name + sequences <- as.list(as.data.frame(clusters[i])[[2]]) # <---- Splate mit Sequenz + outfile <- paste0(prefix,"_cluster_",i,".FASTA") + seqinr::write.fasta(sequences = sequences, names = as.data.frame(clusters[i])[[1]] + , file.out = outfile, as.string = TRUE) # <---- Spalte mit Name })