Skip to content

Commit

Permalink
libceph: implement RECONNECT_SEQ feature
Browse files Browse the repository at this point in the history
This is an old protocol extension that allows the client and server to
avoid resending old messages after a reconnect (following a socket error).
Instead, the exchange their sequence numbers during the handshake.  This
avoids sending a bunch of useless data over the socket.

It has been supported in the server code since v0.22 (Sep 2010).

Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Alex Elder <elder@inktank.com>
  • Loading branch information
Sage Weil committed May 2, 2013
1 parent 022f3e2 commit 3a23083
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 5 deletions.
2 changes: 2 additions & 0 deletions include/linux/ceph/ceph_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
*/
#define CEPH_FEATURES_SUPPORTED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
CEPH_FEATURE_RECONNECT_SEQ | \
CEPH_FEATURE_PGID64 | \
CEPH_FEATURE_PGPOOL3 | \
CEPH_FEATURE_OSDENC | \
Expand All @@ -51,6 +52,7 @@

#define CEPH_FEATURES_REQUIRED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
CEPH_FEATURE_RECONNECT_SEQ | \
CEPH_FEATURE_PGID64 | \
CEPH_FEATURE_PGPOOL3 | \
CEPH_FEATURE_OSDENC)
Expand Down
1 change: 1 addition & 0 deletions include/linux/ceph/msgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ struct ceph_entity_inst {
#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */
#define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */
#define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */
#define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */


/*
Expand Down
43 changes: 38 additions & 5 deletions net/ceph/messenger.c
Original file line number Diff line number Diff line change
Expand Up @@ -1246,6 +1246,24 @@ static void prepare_write_ack(struct ceph_connection *con)
con_flag_set(con, CON_FLAG_WRITE_PENDING);
}

/*
* Prepare to share the seq during handshake
*/
static void prepare_write_seq(struct ceph_connection *con)
{
dout("prepare_write_seq %p %llu -> %llu\n", con,
con->in_seq_acked, con->in_seq);
con->in_seq_acked = con->in_seq;

con_out_kvec_reset(con);

con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
con_out_kvec_add(con, sizeof (con->out_temp_ack),
&con->out_temp_ack);

con_flag_set(con, CON_FLAG_WRITE_PENDING);
}

/*
* Prepare to write keepalive byte.
*/
Expand Down Expand Up @@ -1582,6 +1600,13 @@ static void prepare_read_ack(struct ceph_connection *con)
con->in_base_pos = 0;
}

static void prepare_read_seq(struct ceph_connection *con)
{
dout("prepare_read_seq %p\n", con);
con->in_base_pos = 0;
con->in_tag = CEPH_MSGR_TAG_SEQ;
}

static void prepare_read_tag(struct ceph_connection *con)
{
dout("prepare_read_tag %p\n", con);
Expand Down Expand Up @@ -2059,6 +2084,7 @@ static int process_connect(struct ceph_connection *con)
prepare_read_connect(con);
break;

case CEPH_MSGR_TAG_SEQ:
case CEPH_MSGR_TAG_READY:
if (req_feat & ~server_feat) {
pr_err("%s%lld %s protocol feature mismatch,"
Expand Down Expand Up @@ -2089,7 +2115,12 @@ static int process_connect(struct ceph_connection *con)

con->delay = 0; /* reset backoff memory */

prepare_read_tag(con);
if (con->in_reply.tag == CEPH_MSGR_TAG_SEQ) {
prepare_write_seq(con);
prepare_read_seq(con);
} else {
prepare_read_tag(con);
}
break;

case CEPH_MSGR_TAG_WAIT:
Expand Down Expand Up @@ -2123,7 +2154,6 @@ static int read_partial_ack(struct ceph_connection *con)
return read_partial(con, end, size, &con->in_temp_ack);
}


/*
* We can finally discard anything that's been acked.
*/
Expand All @@ -2148,8 +2178,6 @@ static void process_ack(struct ceph_connection *con)
}




static int read_partial_message_section(struct ceph_connection *con,
struct kvec *section,
unsigned int sec_len, u32 *crc)
Expand Down Expand Up @@ -2672,7 +2700,12 @@ static int try_read(struct ceph_connection *con)
prepare_read_tag(con);
goto more;
}
if (con->in_tag == CEPH_MSGR_TAG_ACK) {
if (con->in_tag == CEPH_MSGR_TAG_ACK ||
con->in_tag == CEPH_MSGR_TAG_SEQ) {
/*
* the final handshake seq exchange is semantically
* equivalent to an ACK
*/
ret = read_partial_ack(con);
if (ret <= 0)
goto out;
Expand Down

0 comments on commit 3a23083

Please sign in to comment.