Skip to content

Commit

Permalink
pNFS: Add tracking to limit the number of pNFS retries
Browse files Browse the repository at this point in the history
When the client is reading or writing using pNFS, and hits an error
on the DS, then it typically sends a LAYOUTERROR and/or LAYOUTRETURN
to the MDS, before redirtying the failed pages, and going for a new
round of reads/writebacks. The problem is that if the server has no
way to fix the DS, then we may need a way to interrupt this loop
after a set number of attempts have been made.
This patch adds an optional module parameter that allows the admin
to specify how many times to retry the read/writeback process before
failing with a fatal error.
The default behaviour is to retry forever.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
  • Loading branch information
Trond Myklebust authored and Anna Schumaker committed Apr 25, 2019
1 parent 28b1d3f commit 33344e0
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 2 deletions.
7 changes: 7 additions & 0 deletions fs/nfs/direct.c
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
}

list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
/* Bump the transmission count */
req->wb_nio++;
if (!nfs_pageio_add_request(&desc, req)) {
nfs_list_move_request(req, &failed);
spin_lock(&cinfo.inode->i_lock);
Expand Down Expand Up @@ -703,6 +705,11 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
/*
* Despite the reboot, the write was successful,
* so reset wb_nio.
*/
req->wb_nio = 0;
/* Note the rewrite will go through mds */
nfs_mark_request_commit(req, NULL, &cinfo, 0);
} else
Expand Down
8 changes: 8 additions & 0 deletions fs/nfs/flexfilelayout/flexfilelayout.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
#define FF_LAYOUT_POLL_RETRY_MAX (15*HZ)
#define FF_LAYOUTRETURN_MAXERR 20

static unsigned short io_maxretrans;

static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
struct nfs_pgio_header *hdr);
static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
Expand Down Expand Up @@ -925,6 +927,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
pgm = &pgio->pg_mirrors[0];
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;

pgio->pg_maxretrans = io_maxretrans;
return;
out_nolseg:
if (pgio->pg_error < 0)
Expand Down Expand Up @@ -992,6 +995,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize;
}

pgio->pg_maxretrans = io_maxretrans;
return;

out_mds:
Expand Down Expand Up @@ -2515,3 +2519,7 @@ MODULE_DESCRIPTION("The NFSv4 flexfile layout driver");

module_init(nfs4flexfilelayout_init);
module_exit(nfs4flexfilelayout_exit);

module_param(io_maxretrans, ushort, 0644);
MODULE_PARM_DESC(io_maxretrans, "The number of times the NFSv4.1 client "
"retries an I/O request before returning an error. ");
14 changes: 13 additions & 1 deletion fs/nfs/pagelist.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#include <linux/nfs.h>
#include <linux/nfs3.h>
#include <linux/nfs4.h>
#include <linux/nfs_page.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/nfs_mount.h>
#include <linux/export.h>

Expand Down Expand Up @@ -327,6 +327,7 @@ __nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page,
req->wb_bytes = count;
req->wb_context = get_nfs_open_context(ctx);
kref_init(&req->wb_kref);
req->wb_nio = 0;
return req;
}

Expand Down Expand Up @@ -370,6 +371,7 @@ nfs_create_subreq(struct nfs_page *req, struct nfs_page *last,
nfs_lock_request(ret);
ret->wb_index = req->wb_index;
nfs_page_group_init(ret, last);
ret->wb_nio = req->wb_nio;
}
return ret;
}
Expand Down Expand Up @@ -724,6 +726,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_mirrors_dynamic = NULL;
desc->pg_mirrors = desc->pg_mirrors_static;
nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize);
desc->pg_maxretrans = 0;
}

/**
Expand Down Expand Up @@ -983,6 +986,15 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
return 0;
mirror->pg_base = req->wb_pgbase;
}

if (desc->pg_maxretrans && req->wb_nio > desc->pg_maxretrans) {
if (NFS_SERVER(desc->pg_inode)->flags & NFS_MOUNT_SOFTERR)
desc->pg_error = -ETIMEDOUT;
else
desc->pg_error = -EIO;
return 0;
}

if (!nfs_can_coalesce_requests(prev, req, desc))
return 0;
nfs_list_move_request(req, &mirror->pg_list);
Expand Down
5 changes: 5 additions & 0 deletions fs/nfs/write.c
Original file line number Diff line number Diff line change
Expand Up @@ -1009,6 +1009,8 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
goto remove_req;
}
if (nfs_write_need_commit(hdr)) {
/* Reset wb_nio, since the write was successful. */
req->wb_nio = 0;
memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
nfs_mark_request_commit(req, hdr->lseg, &cinfo,
hdr->pgio_mirror_idx);
Expand Down Expand Up @@ -1142,6 +1144,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
req->wb_bytes = end - req->wb_offset;
else
req->wb_bytes = rqend - req->wb_offset;
req->wb_nio = 0;
return req;
out_flushme:
/*
Expand Down Expand Up @@ -1416,6 +1419,8 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
*/
static void nfs_redirty_request(struct nfs_page *req)
{
/* Bump the transmission count */
req->wb_nio++;
nfs_mark_request_dirty(req);
set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
nfs_end_page_writeback(req);
Expand Down
4 changes: 3 additions & 1 deletion include/linux/nfs_page.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ struct nfs_page {
struct nfs_write_verifier wb_verf; /* Commit cookie */
struct nfs_page *wb_this_page; /* list of reqs for this page */
struct nfs_page *wb_head; /* head pointer for req list */
unsigned short wb_nio; /* Number of I/O attempts */
};

struct nfs_pageio_descriptor;
Expand Down Expand Up @@ -87,7 +88,6 @@ struct nfs_pgio_mirror {
};

struct nfs_pageio_descriptor {
unsigned char pg_moreio : 1;
struct inode *pg_inode;
const struct nfs_pageio_ops *pg_ops;
const struct nfs_rw_ops *pg_rw_ops;
Expand All @@ -105,6 +105,8 @@ struct nfs_pageio_descriptor {
struct nfs_pgio_mirror pg_mirrors_static[1];
struct nfs_pgio_mirror *pg_mirrors_dynamic;
u32 pg_mirror_idx; /* current mirror */
unsigned short pg_maxretrans;
unsigned char pg_moreio : 1;
};

/* arbitrarily selected limit to number of mirrors */
Expand Down

0 comments on commit 33344e0

Please sign in to comment.