diff --git a/bin/2.1_clustering/cdhit_wrapper.R b/bin/2.1_clustering/cdhit_wrapper.R index ebf7667..6225e70 100644 --- a/bin/2.1_clustering/cdhit_wrapper.R +++ b/bin/2.1_clustering/cdhit_wrapper.R @@ -199,6 +199,11 @@ cdhitest <- function(input, identity = 0.8, coverage = 8, output = "cluster.bed" data.table::fwrite(x = cluster_table, file = summary, append = TRUE, sep = "\t", col.names = TRUE) } + + # cast start and end column to integer64 to prevent scientific notation e.g. 1e+10 + # start and end are assumed to be at position 2 and 3 + result[, c(2, 3) := lapply(.SD, bit64::as.integer64), .SDcols = c(2, 3)] + data.table::fwrite(x = result, file = output, sep = "\t", col.names = keep_col_names) } diff --git a/bin/2.1_clustering/reduce_sequence.R b/bin/2.1_clustering/reduce_sequence.R index 15d99ae..b8ebf09 100644 --- a/bin/2.1_clustering/reduce_sequence.R +++ b/bin/2.1_clustering/reduce_sequence.R @@ -231,6 +231,10 @@ reduce_sequence <- function(input, kmer = 10, motif = 10, output = "reduced.bed" names(merged) <- col_names } + # cast start and end column to integer64 to prevent scientific notation e.g. 1e+10 + # start and end are assumed to be at position 2 and 3 + merged[, c(2, 3) := lapply(.SD, bit64::as.integer64), .SDcols = c(2, 3)] + data.table::fwrite(merged, file = output, sep = "\t", col.names = keep_col_names) } diff --git a/masterenv.yml b/masterenv.yml index 751964e..d4f1cb0 100644 --- a/masterenv.yml +++ b/masterenv.yml @@ -21,3 +21,4 @@ dependencies: - matplotlib - seaborn - crossmap + - r-bit64