Skip to content

Commit

Permalink
added filter for small clusters
Browse files Browse the repository at this point in the history
  • Loading branch information
renewiegandt committed Nov 29, 2018
1 parent 80ffd2a commit a5e902f
Showing 1 changed file with 13 additions and 6 deletions.
19 changes: 13 additions & 6 deletions bin/bed_to_fasta.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,25 @@
# The Sequences of each cluster are writen as an FASTA-file.
# @parameter bedInput <string> BED-file with sequences and cluster-id as column"TEs
# @parameter prefix <string> prefix for filenames
# @parameter min_seq <INT> min. number of sequences per cluster

args = commandArgs(trailingOnly = TRUE)

bedInput <- args[1]
prefix <- args[2] # "Fasta"
prefix <- args[2]
min_seq <- args[3]

bed <- data.table::fread(bedInput, header = FALSE, sep = "\t")

clusters <- split(bed, bed$V3, sorted = TRUE, flatten = FALSE) # <---- Spalte mit Cluster
clusters <- split(bed, bed$V8, sorted = TRUE, flatten = FALSE) # <---- Spalte mit Cluster
discard <- lapply(1:length(clusters), function(i){
sequences <- as.list(as.data.frame(clusters[i])[[2]]) # <---- Splate mit Sequenz
outfile <- paste0(prefix,"_cluster_",i,".FASTA")
seqinr::write.fasta(sequences = sequences, names = as.data.frame(clusters[i])[[1]]
, file.out = outfile, as.string = TRUE) # <---- Spalte mit Name
clust <- as.data.frame(clusters[i])
print(nrow(clust))
if (nrow(clust) >= as.numeric(min_seq) ) {
sequences <- as.list(clust[[7]]) # <---- Splate mit Sequenz
outfile <- paste0(prefix,"_cluster_",i,".FASTA")
seqinr::write.fasta(sequences = sequences, names = clust[[4]], file.out = outfile, as.string = TRUE) # <---- Spalte mit Name
} else {
print(paste0("Cluster: ",i," is to small"))
}
})

0 comments on commit a5e902f

Please sign in to comment.