Skip to content

Commit

Permalink
ceph: resubmit requests on pg mapping change (not just primary change)
Browse files Browse the repository at this point in the history
OSD requests need to be resubmitted on any pg mapping change, not just when
the pg primary changes.  Resending only when the primary changes results in
occasional 'hung' requests during osd cluster recovery or rebalancing.

Signed-off-by: Sage Weil <sage@newdream.net>
  • Loading branch information
Sage Weil committed May 11, 2010
1 parent 04d000e commit d85b705
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 9 deletions.
19 changes: 15 additions & 4 deletions fs/ceph/osd_client.c
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,8 @@ static int __map_osds(struct ceph_osd_client *osdc,
{
struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
struct ceph_pg pgid;
int o = -1;
int acting[CEPH_PG_MAX_SIZE];
int o = -1, num = 0;
int err;

dout("map_osds %p tid %lld\n", req, req->r_tid);
Expand All @@ -576,17 +577,27 @@ static int __map_osds(struct ceph_osd_client *osdc,
pgid = reqhead->layout.ol_pgid;
req->r_pgid = pgid;

o = ceph_calc_pg_primary(osdc->osdmap, pgid);
err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
if (err > 0) {
o = acting[0];
num = err;
}

if ((req->r_osd && req->r_osd->o_osd == o &&
req->r_sent >= req->r_osd->o_incarnation) ||
req->r_sent >= req->r_osd->o_incarnation &&
req->r_num_pg_osds == num &&
memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
(req->r_osd == NULL && o == -1))
return 0; /* no change */

dout("map_osds tid %llu pgid %d.%x osd%d (was osd%d)\n",
req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
req->r_osd ? req->r_osd->o_osd : -1);

/* record full pg acting set */
memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num);
req->r_num_pg_osds = num;

if (req->r_osd) {
__cancel_request(req);
list_del_init(&req->r_osd_item);
Expand All @@ -612,7 +623,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
__remove_osd_from_lru(req->r_osd);
list_add(&req->r_osd_item, &req->r_osd->o_requests);
}
err = 1; /* osd changed */
err = 1; /* osd or pg changed */

out:
return err;
Expand Down
2 changes: 2 additions & 0 deletions fs/ceph/osd_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ struct ceph_osd_request {
struct list_head r_osd_item;
struct ceph_osd *r_osd;
struct ceph_pg r_pgid;
int r_pg_osds[CEPH_PG_MAX_SIZE];
int r_num_pg_osds;

struct ceph_connection *r_con_filling_msg;

Expand Down
29 changes: 24 additions & 5 deletions fs/ceph/osdmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -1040,23 +1040,42 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
return osds;
}

/*
* Return acting set for given pgid.
*/
int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
int *acting)
{
int rawosds[CEPH_PG_MAX_SIZE], *osds;
int i, o, num = CEPH_PG_MAX_SIZE;

osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
if (!osds)
return -1;

/* primary is first up osd */
o = 0;
for (i = 0; i < num; i++)
if (ceph_osd_is_up(osdmap, osds[i]))
acting[o++] = osds[i];
return o;
}

/*
* Return primary osd for given pgid, or -1 if none.
*/
int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
{
int rawosds[10], *osds;
int i, num = ARRAY_SIZE(rawosds);
int rawosds[CEPH_PG_MAX_SIZE], *osds;
int i, num = CEPH_PG_MAX_SIZE;

osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
if (!osds)
return -1;

/* primary is first up osd */
for (i = 0; i < num; i++)
if (ceph_osd_is_up(osdmap, osds[i])) {
if (ceph_osd_is_up(osdmap, osds[i]))
return osds[i];
break;
}
return -1;
}
2 changes: 2 additions & 0 deletions fs/ceph/osdmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
const char *oid,
struct ceph_file_layout *fl,
struct ceph_osdmap *osdmap);
extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
int *acting);
extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
struct ceph_pg pgid);

Expand Down
1 change: 1 addition & 0 deletions fs/ceph/rados.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ struct ceph_timespec {
#define CEPH_PG_LAYOUT_LINEAR 2
#define CEPH_PG_LAYOUT_HYBRID 3

#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */

/*
* placement group.
Expand Down

0 comments on commit d85b705

Please sign in to comment.