Skip to content

Commit

Permalink
replace param min_samples with dynamic calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
kriese committed Jun 3, 2022
1 parent c3739cf commit 371d209
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions workflow/scripts/read_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,6 @@ def parse_args():
cluster_parser = subparsers.add_parser('cluster', help="Cluster reads by distance.")
cluster_parser.add_argument("sequences", type=Path)
cluster_parser.add_argument("distances", type=Path)
cluster_parser.add_argument("--min_samples", type=int, default=5)
cluster_parser.add_argument("--eps", type=float, default=0.3)
cluster_parser.add_argument("--output", "-o", type=Path, default=None)

Expand All @@ -229,13 +228,15 @@ def parse_args():
if __name__ == '__main__':
args = parse_args()
seq_coll = SequenceCollection.from_fasta(args.sequences)
n_reads = seq_coll.n_reads()
n_excluded = len(seq_coll.get_labels())
if args.command == "cluster":
# Inputs are: distances, output, eps, min_samples
# produces a file (output) that contains the cluster labels for all given sequences
if args.distances is not None:
matrix = read_dist_matrix(args.distances)
clustering = cluster_on_distances(matrix, args.eps, args.min_samples, n_excluded)
clustering = cluster_on_distances(matrix, args.eps,
min_samples=min(n_reads//3+1, 5), exclude_last_n=n_excluded)
write_clustering(clustering, args.output)
elif args.sequences is None:
print("You have to either pass pairwise distances or sequences to be clustered.")
Expand Down

0 comments on commit 371d209

Please sign in to comment.