Skip to content

Commit

Permalink
afs: Fix missing net error handling
Browse files Browse the repository at this point in the history
kAFS can be given certain network errors (EADDRNOTAVAIL, EHOSTDOWN and
ERFKILL) that it doesn't handle in its server/address rotation algorithms.
They cause the probing and rotation to abort immediately rather than
rotating.

Fix this by:

 (1) Abstracting out the error prioritisation from the VL and FS rotation
     algorithms into a common function and expand usage into the server
     probing code.

     When multiple errors are available, this code selects the one we'd
     prefer to return.

 (2) Add handling for EADDRNOTAVAIL, EHOSTDOWN and ERFKILL.

Fixes: 0fafdc9 ("afs: Fix file locking")
Fixes: 0338747d8454 ("afs: Probe multiple fileservers simultaneously")
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
  • Loading branch information
David Howells authored and Al Viro committed Nov 30, 2018
1 parent ae3b736 commit 4584ae9
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 113 deletions.
39 changes: 24 additions & 15 deletions fs/afs/fs_probe.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,11 @@ void afs_fileserver_probe_result(struct afs_call *call)
afs_io_error(call, afs_io_error_fs_probe_fail);
goto out;
case -ECONNRESET: /* Responded, but call expired. */
case -ERFKILL:
case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EHOSTDOWN:
case -ECONNREFUSED:
case -ETIMEDOUT:
case -ETIME:
Expand Down Expand Up @@ -132,12 +135,14 @@ void afs_fileserver_probe_result(struct afs_call *call)
static int afs_do_probe_fileserver(struct afs_net *net,
struct afs_server *server,
struct key *key,
unsigned int server_index)
unsigned int server_index,
struct afs_error *_e)
{
struct afs_addr_cursor ac = {
.index = 0,
};
int ret;
bool in_progress = false;
int err;

_enter("%pU", &server->uuid);

Expand All @@ -151,15 +156,17 @@ static int afs_do_probe_fileserver(struct afs_net *net,
server->probe.rtt = UINT_MAX;

for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
ret = afs_fs_get_capabilities(net, server, &ac, key, server_index,
err = afs_fs_get_capabilities(net, server, &ac, key, server_index,
true);
if (ret != -EINPROGRESS) {
afs_fs_probe_done(server);
return ret;
}
if (err == -EINPROGRESS)
in_progress = true;
else
afs_prioritise_error(_e, err, ac.abort_code);
}

return 0;
if (!in_progress)
afs_fs_probe_done(server);
return in_progress;
}

/*
Expand All @@ -169,21 +176,23 @@ int afs_probe_fileservers(struct afs_net *net, struct key *key,
struct afs_server_list *list)
{
struct afs_server *server;
int i, ret;
struct afs_error e;
bool in_progress = false;
int i;

e.error = 0;
e.responded = false;
for (i = 0; i < list->nr_servers; i++) {
server = list->servers[i].server;
if (test_bit(AFS_SERVER_FL_PROBED, &server->flags))
continue;

if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags)) {
ret = afs_do_probe_fileserver(net, server, key, i);
if (ret)
return ret;
}
if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags) &&
afs_do_probe_fileserver(net, server, key, i, &e))
in_progress = true;
}

return 0;
return in_progress ? 0 : e.error;
}

/*
Expand Down
9 changes: 9 additions & 0 deletions fs/afs/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,14 @@ struct afs_interface {
unsigned mtu; /* MTU of interface */
};

/*
* Error prioritisation and accumulation.
*/
struct afs_error {
short error; /* Accumulated error */
bool responded; /* T if server responded */
};

/*
* Cursor for iterating over a server's address list.
*/
Expand Down Expand Up @@ -1015,6 +1023,7 @@ static inline void __afs_stat(atomic_t *s)
* misc.c
*/
extern int afs_abort_to_error(u32);
extern void afs_prioritise_error(struct afs_error *, int, u32);

/*
* mntpt.c
Expand Down
52 changes: 52 additions & 0 deletions fs/afs/misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,55 @@ int afs_abort_to_error(u32 abort_code)
default: return -EREMOTEIO;
}
}

/*
* Select the error to report from a set of errors.
*/
void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code)
{
switch (error) {
case 0:
return;
default:
if (e->error == -ETIMEDOUT ||
e->error == -ETIME)
return;
case -ETIMEDOUT:
case -ETIME:
if (e->error == -ENOMEM ||
e->error == -ENONET)
return;
case -ENOMEM:
case -ENONET:
if (e->error == -ERFKILL)
return;
case -ERFKILL:
if (e->error == -EADDRNOTAVAIL)
return;
case -EADDRNOTAVAIL:
if (e->error == -ENETUNREACH)
return;
case -ENETUNREACH:
if (e->error == -EHOSTUNREACH)
return;
case -EHOSTUNREACH:
if (e->error == -EHOSTDOWN)
return;
case -EHOSTDOWN:
if (e->error == -ECONNREFUSED)
return;
case -ECONNREFUSED:
if (e->error == -ECONNRESET)
return;
case -ECONNRESET: /* Responded, but call expired. */
if (e->responded)
return;
e->error = error;
return;

case -ECONNABORTED:
e->responded = true;
e->error = afs_abort_to_error(abort_code);
return;
}
}
53 changes: 13 additions & 40 deletions fs/afs/rotate.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
struct afs_addr_list *alist;
struct afs_server *server;
struct afs_vnode *vnode = fc->vnode;
u32 rtt, abort_code;
struct afs_error e;
u32 rtt;
int error = fc->ac.error, i;

_enter("%lx[%d],%lx[%d],%d,%d",
Expand Down Expand Up @@ -306,8 +307,11 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
if (fc->error != -EDESTADDRREQ)
goto iterate_address;
/* Fall through */
case -ERFKILL:
case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EHOSTDOWN:
case -ECONNREFUSED:
_debug("no conn");
fc->error = error;
Expand Down Expand Up @@ -446,50 +450,15 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
if (fc->flags & AFS_FS_CURSOR_VBUSY)
goto restart_from_beginning;

abort_code = 0;
error = -EDESTADDRREQ;
e.error = -EDESTADDRREQ;
e.responded = false;
for (i = 0; i < fc->server_list->nr_servers; i++) {
struct afs_server *s = fc->server_list->servers[i].server;
int probe_error = READ_ONCE(s->probe.error);

switch (probe_error) {
case 0:
continue;
default:
if (error == -ETIMEDOUT ||
error == -ETIME)
continue;
case -ETIMEDOUT:
case -ETIME:
if (error == -ENOMEM ||
error == -ENONET)
continue;
case -ENOMEM:
case -ENONET:
if (error == -ENETUNREACH)
continue;
case -ENETUNREACH:
if (error == -EHOSTUNREACH)
continue;
case -EHOSTUNREACH:
if (error == -ECONNREFUSED)
continue;
case -ECONNREFUSED:
if (error == -ECONNRESET)
continue;
case -ECONNRESET: /* Responded, but call expired. */
if (error == -ECONNABORTED)
continue;
case -ECONNABORTED:
abort_code = s->probe.abort_code;
error = probe_error;
continue;
}
afs_prioritise_error(&e, READ_ONCE(s->probe.error),
s->probe.abort_code);
}

if (error == -ECONNABORTED)
error = afs_abort_to_error(abort_code);

failed_set_error:
fc->error = error;
failed:
Expand Down Expand Up @@ -553,8 +522,11 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
_leave(" = f [abort]");
return false;

case -ERFKILL:
case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EHOSTDOWN:
case -ECONNREFUSED:
case -ETIMEDOUT:
case -ETIME:
Expand Down Expand Up @@ -633,6 +605,7 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc)
struct afs_net *net = afs_v2net(fc->vnode);

if (fc->error == -EDESTADDRREQ ||
fc->error == -EADDRNOTAVAIL ||
fc->error == -ENETUNREACH ||
fc->error == -EHOSTUNREACH)
afs_dump_edestaddrreq(fc);
Expand Down
45 changes: 27 additions & 18 deletions fs/afs/vl_probe.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,11 @@ void afs_vlserver_probe_result(struct afs_call *call)
afs_io_error(call, afs_io_error_vl_probe_fail);
goto out;
case -ECONNRESET: /* Responded, but call expired. */
case -ERFKILL:
case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EHOSTDOWN:
case -ECONNREFUSED:
case -ETIMEDOUT:
case -ETIME:
Expand Down Expand Up @@ -129,15 +132,17 @@ void afs_vlserver_probe_result(struct afs_call *call)
* Probe all of a vlserver's addresses to find out the best route and to
* query its capabilities.
*/
static int afs_do_probe_vlserver(struct afs_net *net,
struct afs_vlserver *server,
struct key *key,
unsigned int server_index)
static bool afs_do_probe_vlserver(struct afs_net *net,
struct afs_vlserver *server,
struct key *key,
unsigned int server_index,
struct afs_error *_e)
{
struct afs_addr_cursor ac = {
.index = 0,
};
int ret;
bool in_progress = false;
int err;

_enter("%s", server->name);

Expand All @@ -151,15 +156,17 @@ static int afs_do_probe_vlserver(struct afs_net *net,
server->probe.rtt = UINT_MAX;

for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
ret = afs_vl_get_capabilities(net, &ac, key, server,
err = afs_vl_get_capabilities(net, &ac, key, server,
server_index, true);
if (ret != -EINPROGRESS) {
afs_vl_probe_done(server);
return ret;
}
if (err == -EINPROGRESS)
in_progress = true;
else
afs_prioritise_error(_e, err, ac.abort_code);
}

return 0;
if (!in_progress)
afs_vl_probe_done(server);
return in_progress;
}

/*
Expand All @@ -169,21 +176,23 @@ int afs_send_vl_probes(struct afs_net *net, struct key *key,
struct afs_vlserver_list *vllist)
{
struct afs_vlserver *server;
int i, ret;
struct afs_error e;
bool in_progress = false;
int i;

e.error = 0;
e.responded = false;
for (i = 0; i < vllist->nr_servers; i++) {
server = vllist->servers[i].server;
if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags))
continue;

if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags)) {
ret = afs_do_probe_vlserver(net, server, key, i);
if (ret)
return ret;
}
if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags) &&
afs_do_probe_vlserver(net, server, key, i, &e))
in_progress = true;
}

return 0;
return in_progress ? 0 : e.error;
}

/*
Expand Down
Loading

0 comments on commit 4584ae9

Please sign in to comment.