From f94d54208f25dc93f3bcae9e725a582380a503b1 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Mon, 23 Sep 2024 16:07:49 +0100 Subject: [PATCH] afs: Fix possible infinite loop with unresponsive servers A return code of 0 from afs_wait_for_one_fs_probe is an indication that the endpoint state attached to the operation is stale and has been superseded. In that case the iteration needs to be restarted so that the newer probe result state gets used. Failure to do so can result in an tight infinite loop around the iterate_address label, where all addresses are thought to be responsive and have been tried, with nothing to refresh the endpoint state. Fixes: 495f2ae9e355 ("afs: Fix fileserver rotation") Reported-by: Markus Suvanto Link: https://lists.infradead.org/pipermail/linux-afs/2024-July/008628.html cc: linux-afs@lists.infradead.org Signed-off-by: Marc Dionne Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240906134019.131553-1-marc.dionne@auristor.com/ Link: https://lore.kernel.org/r/20240923150756.902363-6-dhowells@redhat.com Signed-off-by: Christian Brauner --- fs/afs/fs_probe.c | 4 ++-- fs/afs/rotate.c | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index 580de4adaaf65..b516d05b0fefc 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -506,10 +506,10 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_sta finish_wait(&server->probe_wq, &wait); dont_wait: - if (estate->responsive_set & ~exclude) - return 1; if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags)) return 0; + if (estate->responsive_set & ~exclude) + return 1; if (is_intr && signal_pending(current)) return -ERESTARTSYS; if (timo == 0) diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index ed09d4d4c2112..d612983d6f38e 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -632,8 +632,10 @@ bool afs_select_fileserver(struct afs_operation *op) wait_for_more_probe_results: error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried, !(op->flags & AFS_OPERATION_UNINTR)); - if (!error) + if (error == 1) goto iterate_address; + if (!error) + goto restart_from_beginning; /* We've now had a failure to respond on all of a server's addresses - * immediately probe them again and consider retrying the server. @@ -644,10 +646,13 @@ bool afs_select_fileserver(struct afs_operation *op) error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried, !(op->flags & AFS_OPERATION_UNINTR)); switch (error) { - case 0: + case 1: op->flags &= ~AFS_OPERATION_RETRY_SERVER; - trace_afs_rotate(op, afs_rotate_trace_retry_server, 0); + trace_afs_rotate(op, afs_rotate_trace_retry_server, 1); goto retry_server; + case 0: + trace_afs_rotate(op, afs_rotate_trace_retry_server, 0); + goto restart_from_beginning; case -ERESTARTSYS: afs_op_set_error(op, error); goto failed;