diff --git a/README.md b/README.md index 4d3e1b8..8be705c 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,9 @@ alternative_haplotype_vcf: # str # path to liftover directory (explained below) liftover_root: # str, defaults to 'resources/references/liftover' +clustering: + eps: 0.6 # float, the higher the bigger the clusters can be + # Settings for the pairwise alignment pw_alignment: match: # positive integer diff --git a/config/config.yaml b/config/config.yaml index a525b53..cdb29cf 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -11,6 +11,9 @@ pw_alignment: gap: -1 mismatch: -1 +clustering: + eps: 0.6 # float, the higher the bigger the clusters can be + samples: ['17_08'] # display names and file names of the used references diff --git a/workflow/Snakefile b/workflow/Snakefile index 8faf366..4ba2308 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -108,9 +108,11 @@ rule cluster: sequences = "results/{sample}/{region}/sequences.fa" output: clustering = "results/{sample}/{region}/clustering.txt" + params: + eps = config['clustering']['eps'] shell: """ - python workflow/scripts/read_cluster.py cluster {input.sequences} {input.dists} -o {output.clustering} + python workflow/scripts/read_cluster.py cluster {input.sequences} {input.dists} --eps {params.eps} -o {output.clustering} """ rule pca_plot: diff --git a/workflow/scripts/read_cluster.py b/workflow/scripts/read_cluster.py index 484779f..80711fd 100644 --- a/workflow/scripts/read_cluster.py +++ b/workflow/scripts/read_cluster.py @@ -213,8 +213,8 @@ def parse_args(): cluster_parser = subparsers.add_parser('cluster', help="Cluster reads by distance.") cluster_parser.add_argument("sequences", type=Path) cluster_parser.add_argument("distances", type=Path) - cluster_parser.add_argument("--eps", type=float, default=0.15) cluster_parser.add_argument("--min_samples", type=int, default=5) + cluster_parser.add_argument("--eps", type=float, default=0.3) cluster_parser.add_argument("--output", "-o", type=Path, default=None) plot_parser = subparsers.add_parser('plot', help="Plot the clustering in a PCA plot (2D).")