Skip to content

Commit

Permalink
bed_to_fasta update
Browse files Browse the repository at this point in the history
  • Loading branch information
renewiegandt committed Nov 5, 2018
1 parent 89eb998 commit b4b972f
Showing 1 changed file with 7 additions and 11 deletions.
18 changes: 7 additions & 11 deletions bin/bed_to_fasta.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,21 @@
bed <- data.table::data.table(1:10,1:10,c("AAAAAAAAA","CTGAGA","CCCTAGC","GC","AA","ACGTACGTGTCA","GGGCCGCTA","GCA","TTTTTGCA","AAAATCGACGT"),c(1,2,3,1,1,1,2,2,3,3))

#!/usr/bin/env Rscript

# Splitting BED-files depending on their cluster.
# The Sequences of each cluster are writen as an FASTA-file.
# @parameter bedInput <string> BED-file with sequences and cluster-id as column
# @parameter out <string> output directory
# @parameter bedInput <string> BED-file with sequences and cluster-id as column"TEs
# @parameter prefix <string> prefix for filenames

args = commandArgs(trailingOnly = TRUE)


bedInput <- args[1]
out <- args[2] # "G://Rene.Wiegandt/10_Master/"
prefix <- args[3] # "Fasta"
prefix <- args[2] # "Fasta"

bed <- data.table::fread(bedInput, header = FALSE, sep = "\t")

clusters <- split(bed, bed$V4, sorted = TRUE, flatten = FALSE) # <---- Spalte mit Cluster
clusters <- split(bed, bed$V3, sorted = TRUE, flatten = FALSE) # <---- Spalte mit Cluster
discard <- lapply(1:length(clusters), function(i){
sequences <- as.list(as.data.frame(clusters[i])[[3]]) # <---- Splate mit Sequenz
outfile <- paste0(out,prefix,"_cluster_",i)
seqinr::write.fasta(sequences = sequences, names = as.data.frame(clusters[i])[[2]], file.out = outfile, as.string = TRUE) # <---- Spalte mit Name
sequences <- as.list(as.data.frame(clusters[i])[[2]]) # <---- Splate mit Sequenz
outfile <- paste0(prefix,"_cluster_",i,".FASTA")
seqinr::write.fasta(sequences = sequences, names = as.data.frame(clusters[i])[[1]]
, file.out = outfile, as.string = TRUE) # <---- Spalte mit Name
})

0 comments on commit b4b972f

Please sign in to comment.