From b15c2034109bb67a0d9bb8ca258f55f2ce03477b Mon Sep 17 00:00:00 2001 From: Schultheis Date: Sat, 12 Jan 2019 13:58:29 +0100 Subject: [PATCH 1/4] add r package bit64 to environment --- masterenv.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/masterenv.yml b/masterenv.yml index 751964e..d4f1cb0 100644 --- a/masterenv.yml +++ b/masterenv.yml @@ -21,3 +21,4 @@ dependencies: - matplotlib - seaborn - crossmap + - r-bit64 From b65dc90a8cf897045e8014154e94e87fdf35e0d9 Mon Sep 17 00:00:00 2001 From: Schultheis Date: Sat, 12 Jan 2019 14:00:42 +0100 Subject: [PATCH 2/4] cast start and end column to integer64 to prevent scientific notation --- bin/2.1_clustering/reduce_sequence.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bin/2.1_clustering/reduce_sequence.R b/bin/2.1_clustering/reduce_sequence.R index 15d99ae..2b1b64e 100644 --- a/bin/2.1_clustering/reduce_sequence.R +++ b/bin/2.1_clustering/reduce_sequence.R @@ -231,6 +231,10 @@ reduce_sequence <- function(input, kmer = 10, motif = 10, output = "reduced.bed" names(merged) <- col_names } + # cast start and end column to integer64 to prevent scientific notation e.g. 1e+10 + # start and end are assumed to be at position 2 and 3 + merged[, c(2, 3) := lapply(.SD, bit64::as.integer64), SDcols = c(2, 3)] + data.table::fwrite(merged, file = output, sep = "\t", col.names = keep_col_names) } From 7c4185626bbf584f578015e4c53ddeaa7832f2f3 Mon Sep 17 00:00:00 2001 From: Schultheis Date: Sat, 12 Jan 2019 14:02:27 +0100 Subject: [PATCH 3/4] cast start and end column to integer64 to prevent scientific notation --- bin/2.1_clustering/cdhit_wrapper.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bin/2.1_clustering/cdhit_wrapper.R b/bin/2.1_clustering/cdhit_wrapper.R index ebf7667..d0618b8 100644 --- a/bin/2.1_clustering/cdhit_wrapper.R +++ b/bin/2.1_clustering/cdhit_wrapper.R @@ -199,6 +199,11 @@ cdhitest <- function(input, identity = 0.8, coverage = 8, output = "cluster.bed" data.table::fwrite(x = cluster_table, file = summary, append = TRUE, sep = "\t", col.names = TRUE) } + + # cast start and end column to integer64 to prevent scientific notation e.g. 1e+10 + # start and end are assumed to be at position 2 and 3 + result[, c(2, 3) := lapply(.SD, bit64::as.integer64), SDcols = c(2, 3)] + data.table::fwrite(x = result, file = output, sep = "\t", col.names = keep_col_names) } From 5acffac0c684e4689745427c20daa6583de218e5 Mon Sep 17 00:00:00 2001 From: Schultheis Date: Sat, 12 Jan 2019 15:19:47 +0100 Subject: [PATCH 4/4] fixed missing point --- bin/2.1_clustering/cdhit_wrapper.R | 2 +- bin/2.1_clustering/reduce_sequence.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/2.1_clustering/cdhit_wrapper.R b/bin/2.1_clustering/cdhit_wrapper.R index d0618b8..6225e70 100644 --- a/bin/2.1_clustering/cdhit_wrapper.R +++ b/bin/2.1_clustering/cdhit_wrapper.R @@ -202,7 +202,7 @@ cdhitest <- function(input, identity = 0.8, coverage = 8, output = "cluster.bed" # cast start and end column to integer64 to prevent scientific notation e.g. 1e+10 # start and end are assumed to be at position 2 and 3 - result[, c(2, 3) := lapply(.SD, bit64::as.integer64), SDcols = c(2, 3)] + result[, c(2, 3) := lapply(.SD, bit64::as.integer64), .SDcols = c(2, 3)] data.table::fwrite(x = result, file = output, sep = "\t", col.names = keep_col_names) } diff --git a/bin/2.1_clustering/reduce_sequence.R b/bin/2.1_clustering/reduce_sequence.R index 2b1b64e..b8ebf09 100644 --- a/bin/2.1_clustering/reduce_sequence.R +++ b/bin/2.1_clustering/reduce_sequence.R @@ -233,7 +233,7 @@ reduce_sequence <- function(input, kmer = 10, motif = 10, output = "reduced.bed" # cast start and end column to integer64 to prevent scientific notation e.g. 1e+10 # start and end are assumed to be at position 2 and 3 - merged[, c(2, 3) := lapply(.SD, bit64::as.integer64), SDcols = c(2, 3)] + merged[, c(2, 3) := lapply(.SD, bit64::as.integer64), .SDcols = c(2, 3)] data.table::fwrite(merged, file = output, sep = "\t", col.names = keep_col_names) }