Skip to content

Commit

Permalink
Merge patch series "netfs: Miscellaneous fixes"
Browse files Browse the repository at this point in the history
David Howells <dhowells@redhat.com> says:

Here are some miscellaneous fixes and changes for netfslib:

 (1) Fix a number of read-retry hangs, including:

     (a) Incorrect getting/putting of references on subreqs as we retry
     	 them.

     (b) Failure to track whether a last old subrequest in a retried set is
     	 superfluous.

     (c) Inconsistency in the usage of wait queues used for subrequests
     	 (ie. using clear_and_wake_up_bit() whilst waiting on a private
     	 waitqueue).

     	 (Note that waitqueue consistency also needs looking at for
     	 netfs_io_request structs.)

 (2) Add stats counters for retries and publish in /proc/fs/netfs/stats.
     This is not a fix per se, but is useful in debugging and shouldn't
     otherwise change the operation of the code.

 (3) Fix the ordering of queuing subrequests with respect to setting the
     request flag that says we've now queued them all.

* patches from https://lore.kernel.org/r/20250212222402.3618494-1-dhowells@redhat.com:
  netfs: Fix setting NETFS_RREQ_ALL_QUEUED to be after all subreqs queued
  netfs: Add retry stat counters
  netfs: Fix a number of read-retry hangs

Link: https://lore.kernel.org/r/20250212222402.3618494-1-dhowells@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
  • Loading branch information
Christian Brauner committed Feb 13, 2025
2 parents 2401892 + 5de0219 commit a33f725
Show file tree
Hide file tree
Showing 9 changed files with 70 additions and 20 deletions.
19 changes: 13 additions & 6 deletions fs/netfs/buffered_read.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,9 @@ static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
netfs_cache_read_terminated, subreq);
}

static void netfs_issue_read(struct netfs_io_request *rreq,
struct netfs_io_subrequest *subreq)
static void netfs_queue_read(struct netfs_io_request *rreq,
struct netfs_io_subrequest *subreq,
bool last_subreq)
{
struct netfs_io_stream *stream = &rreq->io_streams[0];

Expand All @@ -177,8 +178,17 @@ static void netfs_issue_read(struct netfs_io_request *rreq,
}
}

if (last_subreq) {
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
}

spin_unlock(&rreq->lock);
}

static void netfs_issue_read(struct netfs_io_request *rreq,
struct netfs_io_subrequest *subreq)
{
switch (subreq->source) {
case NETFS_DOWNLOAD_FROM_SERVER:
rreq->netfs_ops->issue_read(subreq);
Expand Down Expand Up @@ -293,11 +303,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
}
size -= slice;
start += slice;
if (size <= 0) {
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
}

netfs_queue_read(rreq, subreq, size <= 0);
netfs_issue_read(rreq, subreq);
cond_resched();
} while (size > 0);
Expand Down
4 changes: 4 additions & 0 deletions fs/netfs/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ extern atomic_t netfs_n_rh_write_begin;
extern atomic_t netfs_n_rh_write_done;
extern atomic_t netfs_n_rh_write_failed;
extern atomic_t netfs_n_rh_write_zskip;
extern atomic_t netfs_n_rh_retry_read_req;
extern atomic_t netfs_n_rh_retry_read_subreq;
extern atomic_t netfs_n_wh_buffered_write;
extern atomic_t netfs_n_wh_writethrough;
extern atomic_t netfs_n_wh_dio_write;
Expand All @@ -147,6 +149,8 @@ extern atomic_t netfs_n_wh_upload_failed;
extern atomic_t netfs_n_wh_write;
extern atomic_t netfs_n_wh_write_done;
extern atomic_t netfs_n_wh_write_failed;
extern atomic_t netfs_n_wh_retry_write_req;
extern atomic_t netfs_n_wh_retry_write_subreq;
extern atomic_t netfs_n_wb_lock_skip;
extern atomic_t netfs_n_wb_lock_wait;
extern atomic_t netfs_n_folioq;
Expand Down
6 changes: 4 additions & 2 deletions fs/netfs/read_collect.c
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,8 @@ void netfs_read_collection_worker(struct work_struct *work)
*/
void netfs_wake_read_collector(struct netfs_io_request *rreq)
{
if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) &&
!test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) {
if (!work_pending(&rreq->work)) {
netfs_get_request(rreq, netfs_rreq_trace_get_work);
if (!queue_work(system_unbound_wq, &rreq->work))
Expand Down Expand Up @@ -586,7 +587,8 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */

/* If we are at the head of the queue, wake up the collector. */
if (list_is_first(&subreq->rreq_link, &stream->subrequests))
if (list_is_first(&subreq->rreq_link, &stream->subrequests) ||
test_bit(NETFS_RREQ_RETRYING, &rreq->flags))
netfs_wake_read_collector(rreq);

netfs_put_subrequest(subreq, true, netfs_sreq_trace_put_terminated);
Expand Down
43 changes: 33 additions & 10 deletions fs/netfs/read_retry.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ static void netfs_reissue_read(struct netfs_io_request *rreq,
{
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_stat(&netfs_n_rh_retry_read_subreq);
subreq->rreq->netfs_ops->issue_read(subreq);
}

Expand Down Expand Up @@ -48,6 +48,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
subreq->retry_count++;
netfs_reset_iter(subreq);
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_reissue_read(rreq, subreq);
}
}
Expand Down Expand Up @@ -75,7 +76,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
struct iov_iter source;
unsigned long long start, len;
size_t part;
bool boundary = false;
bool boundary = false, subreq_superfluous = false;

/* Go through the subreqs and find the next span of contiguous
* buffer that we then rejig (cifs, for example, needs the
Expand Down Expand Up @@ -116,8 +117,10 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
/* Work through the sublist. */
subreq = from;
list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
if (!len)
if (!len) {
subreq_superfluous = true;
break;
}
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
subreq->start = start - subreq->transferred;
subreq->len = len + subreq->transferred;
Expand Down Expand Up @@ -154,19 +157,21 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)

netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_reissue_read(rreq, subreq);
if (subreq == to)
if (subreq == to) {
subreq_superfluous = false;
break;
}
}

/* If we managed to use fewer subreqs, we can discard the
* excess; if we used the same number, then we're done.
*/
if (!len) {
if (subreq == to)
if (!subreq_superfluous)
continue;
list_for_each_entry_safe_from(subreq, tmp,
&stream->subrequests, rreq_link) {
trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous);
list_del(&subreq->rreq_link);
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
if (subreq == to)
Expand All @@ -187,14 +192,12 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
subreq->start = start;
subreq->len = len;
subreq->debug_index = atomic_inc_return(&rreq->subreq_counter);
subreq->stream_nr = stream->stream_nr;
subreq->retry_count = 1;

trace_netfs_sreq_ref(rreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
netfs_sreq_trace_new);
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);

list_add(&subreq->rreq_link, &to->rreq_link);
to = list_next_entry(to, rreq_link);
Expand Down Expand Up @@ -256,14 +259,34 @@ void netfs_retry_reads(struct netfs_io_request *rreq)
{
struct netfs_io_subrequest *subreq;
struct netfs_io_stream *stream = &rreq->io_streams[0];
DEFINE_WAIT(myself);

netfs_stat(&netfs_n_rh_retry_read_req);

set_bit(NETFS_RREQ_RETRYING, &rreq->flags);

/* Wait for all outstanding I/O to quiesce before performing retries as
* we may need to renegotiate the I/O sizes.
*/
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
TASK_UNINTERRUPTIBLE);
if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
continue;

trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
for (;;) {
prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);

if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
break;

trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for);
schedule();
trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
}

finish_wait(&rreq->waitq, &myself);
}
clear_bit(NETFS_RREQ_RETRYING, &rreq->flags);

trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
netfs_retry_read_subrequests(rreq);
Expand Down
9 changes: 9 additions & 0 deletions fs/netfs/stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ atomic_t netfs_n_rh_write_begin;
atomic_t netfs_n_rh_write_done;
atomic_t netfs_n_rh_write_failed;
atomic_t netfs_n_rh_write_zskip;
atomic_t netfs_n_rh_retry_read_req;
atomic_t netfs_n_rh_retry_read_subreq;
atomic_t netfs_n_wh_buffered_write;
atomic_t netfs_n_wh_writethrough;
atomic_t netfs_n_wh_dio_write;
Expand All @@ -41,6 +43,8 @@ atomic_t netfs_n_wh_upload_failed;
atomic_t netfs_n_wh_write;
atomic_t netfs_n_wh_write_done;
atomic_t netfs_n_wh_write_failed;
atomic_t netfs_n_wh_retry_write_req;
atomic_t netfs_n_wh_retry_write_subreq;
atomic_t netfs_n_wb_lock_skip;
atomic_t netfs_n_wb_lock_wait;
atomic_t netfs_n_folioq;
Expand Down Expand Up @@ -81,6 +85,11 @@ int netfs_stats_show(struct seq_file *m, void *v)
atomic_read(&netfs_n_wh_write),
atomic_read(&netfs_n_wh_write_done),
atomic_read(&netfs_n_wh_write_failed));
seq_printf(m, "Retries: rq=%u rs=%u wq=%u ws=%u\n",
atomic_read(&netfs_n_rh_retry_read_req),
atomic_read(&netfs_n_rh_retry_read_subreq),
atomic_read(&netfs_n_wh_retry_write_req),
atomic_read(&netfs_n_wh_retry_write_subreq));
seq_printf(m, "Objs : rr=%u sr=%u foq=%u wsc=%u\n",
atomic_read(&netfs_n_rh_rreq),
atomic_read(&netfs_n_rh_sreq),
Expand Down
1 change: 1 addition & 0 deletions fs/netfs/write_issue.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ void netfs_reissue_write(struct netfs_io_stream *stream,
subreq->retry_count++;
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
netfs_stat(&netfs_n_wh_retry_write_subreq);
netfs_do_issue_write(stream, subreq);
}

Expand Down
2 changes: 2 additions & 0 deletions fs/netfs/write_retry.c
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ void netfs_retry_writes(struct netfs_io_request *wreq)
struct netfs_io_stream *stream;
int s;

netfs_stat(&netfs_n_wh_retry_write_req);

/* Wait for all outstanding I/O to quiesce before performing retries as
* we may need to renegotiate the I/O sizes.
*/
Expand Down
2 changes: 1 addition & 1 deletion include/linux/netfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ struct netfs_io_request {
#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */
#define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */
#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */
#define NETFS_RREQ_NEED_RETRY 14 /* Need to try retrying */
#define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */
#define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark
* write to cache on read */
const struct netfs_request_ops *netfs_ops;
Expand Down
4 changes: 3 additions & 1 deletion include/trace/events/netfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@
EM(netfs_sreq_trace_limited, "LIMIT") \
EM(netfs_sreq_trace_need_clear, "N-CLR") \
EM(netfs_sreq_trace_partial_read, "PARTR") \
EM(netfs_sreq_trace_need_retry, "NRTRY") \
EM(netfs_sreq_trace_need_retry, "ND-RT") \
EM(netfs_sreq_trace_prepare, "PREP ") \
EM(netfs_sreq_trace_prep_failed, "PRPFL") \
EM(netfs_sreq_trace_progress, "PRGRS") \
Expand All @@ -108,7 +108,9 @@
EM(netfs_sreq_trace_short, "SHORT") \
EM(netfs_sreq_trace_split, "SPLIT") \
EM(netfs_sreq_trace_submit, "SUBMT") \
EM(netfs_sreq_trace_superfluous, "SPRFL") \
EM(netfs_sreq_trace_terminated, "TERM ") \
EM(netfs_sreq_trace_wait_for, "_WAIT") \
EM(netfs_sreq_trace_write, "WRITE") \
EM(netfs_sreq_trace_write_skip, "SKIP ") \
E_(netfs_sreq_trace_write_term, "WTERM")
Expand Down

0 comments on commit a33f725

Please sign in to comment.