From b15c2034109bb67a0d9bb8ca258f55f2ce03477b Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Sat, 12 Jan 2019 13:58:29 +0100
Subject: [PATCH 1/4] add r package bit64 to environment

---
 masterenv.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/masterenv.yml b/masterenv.yml
index 751964e..d4f1cb0 100644
--- a/masterenv.yml
+++ b/masterenv.yml
@@ -21,3 +21,4 @@ dependencies:
   - matplotlib
   - seaborn
   - crossmap
+  - r-bit64

From b65dc90a8cf897045e8014154e94e87fdf35e0d9 Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Sat, 12 Jan 2019 14:00:42 +0100
Subject: [PATCH 2/4] cast start and end column to integer64 to prevent
 scientific notation

---
 bin/2.1_clustering/reduce_sequence.R | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/bin/2.1_clustering/reduce_sequence.R b/bin/2.1_clustering/reduce_sequence.R
index 15d99ae..2b1b64e 100644
--- a/bin/2.1_clustering/reduce_sequence.R
+++ b/bin/2.1_clustering/reduce_sequence.R
@@ -231,6 +231,10 @@ reduce_sequence <- function(input, kmer = 10, motif = 10, output = "reduced.bed"
     names(merged) <- col_names
   }
   
+  # cast start and end column to integer64 to prevent scientific notation e.g. 1e+10
+  # start and end are assumed to be at position 2 and 3
+  merged[, c(2, 3) := lapply(.SD, bit64::as.integer64), SDcols = c(2, 3)]
+  
   data.table::fwrite(merged, file = output, sep = "\t", col.names = keep_col_names)
 }
 

From 7c4185626bbf584f578015e4c53ddeaa7832f2f3 Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Sat, 12 Jan 2019 14:02:27 +0100
Subject: [PATCH 3/4] cast start and end column to integer64 to prevent
 scientific notation

---
 bin/2.1_clustering/cdhit_wrapper.R | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/bin/2.1_clustering/cdhit_wrapper.R b/bin/2.1_clustering/cdhit_wrapper.R
index ebf7667..d0618b8 100644
--- a/bin/2.1_clustering/cdhit_wrapper.R
+++ b/bin/2.1_clustering/cdhit_wrapper.R
@@ -199,6 +199,11 @@ cdhitest <- function(input, identity = 0.8, coverage = 8, output = "cluster.bed"
     data.table::fwrite(x = cluster_table, file = summary, append = TRUE, sep = "\t", col.names = TRUE)
   }
   
+  
+  # cast start and end column to integer64 to prevent scientific notation e.g. 1e+10
+  # start and end are assumed to be at position 2 and 3
+  result[, c(2, 3) := lapply(.SD, bit64::as.integer64), SDcols = c(2, 3)]
+  
   data.table::fwrite(x = result, file = output, sep = "\t", col.names = keep_col_names)
 }
 

From 5acffac0c684e4689745427c20daa6583de218e5 Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Sat, 12 Jan 2019 15:19:47 +0100
Subject: [PATCH 4/4] fixed missing point

---
 bin/2.1_clustering/cdhit_wrapper.R   | 2 +-
 bin/2.1_clustering/reduce_sequence.R | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/2.1_clustering/cdhit_wrapper.R b/bin/2.1_clustering/cdhit_wrapper.R
index d0618b8..6225e70 100644
--- a/bin/2.1_clustering/cdhit_wrapper.R
+++ b/bin/2.1_clustering/cdhit_wrapper.R
@@ -202,7 +202,7 @@ cdhitest <- function(input, identity = 0.8, coverage = 8, output = "cluster.bed"
   
   # cast start and end column to integer64 to prevent scientific notation e.g. 1e+10
   # start and end are assumed to be at position 2 and 3
-  result[, c(2, 3) := lapply(.SD, bit64::as.integer64), SDcols = c(2, 3)]
+  result[, c(2, 3) := lapply(.SD, bit64::as.integer64), .SDcols = c(2, 3)]
   
   data.table::fwrite(x = result, file = output, sep = "\t", col.names = keep_col_names)
 }
diff --git a/bin/2.1_clustering/reduce_sequence.R b/bin/2.1_clustering/reduce_sequence.R
index 2b1b64e..b8ebf09 100644
--- a/bin/2.1_clustering/reduce_sequence.R
+++ b/bin/2.1_clustering/reduce_sequence.R
@@ -233,7 +233,7 @@ reduce_sequence <- function(input, kmer = 10, motif = 10, output = "reduced.bed"
   
   # cast start and end column to integer64 to prevent scientific notation e.g. 1e+10
   # start and end are assumed to be at position 2 and 3
-  merged[, c(2, 3) := lapply(.SD, bit64::as.integer64), SDcols = c(2, 3)]
+  merged[, c(2, 3) := lapply(.SD, bit64::as.integer64), .SDcols = c(2, 3)]
   
   data.table::fwrite(merged, file = output, sep = "\t", col.names = keep_col_names)
 }