Skip to content

Commit

Permalink
Merge branch 'rds-perf'
Browse files Browse the repository at this point in the history
Sowmini Varadhan says:

====================
RDS: RDS-TCP perf enhancements

A 3-part patchset that (a) improves current RDS-TCP perf
by 2X-3X and (b) refactors earlier robustness code for
better observability/scaling.

Patch 1 is an enhancment of earlier robustness fixes
that had used separate sockets for client and server endpoints to
resolve race conditions. It is possible to have an equivalent
solution that does not use 2 sockets. The benefit of a
single socket solution is that it results in more predictable
and observable behavior for the underlying TCP pipe of an
RDS connection

Patches 2 and 3 are simple, straightforward perf bug fixes
that align the RDS TCP socket with other parts of the kernel stack.

v2: fix kbuild-test-robot warnings, comments from  Sergei Shtylov
    and Santosh Shilimkar.
====================

Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Oct 5, 2015
2 parents 393159e + 76b29ef commit 7e2832f
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 43 deletions.
22 changes: 6 additions & 16 deletions net/rds/connection.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
struct rds_transport *loop_trans;
unsigned long flags;
int ret;
struct rds_transport *otrans = trans;

if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
goto new_conn;
rcu_read_lock();
conn = rds_conn_lookup(net, head, laddr, faddr, trans);
if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
Expand All @@ -147,7 +144,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
if (conn)
goto out;

new_conn:
conn = kmem_cache_zalloc(rds_conn_slab, gfp);
if (!conn) {
conn = ERR_PTR(-ENOMEM);
Expand Down Expand Up @@ -207,6 +203,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,

atomic_set(&conn->c_state, RDS_CONN_DOWN);
conn->c_send_gen = 0;
conn->c_outgoing = (is_outgoing ? 1 : 0);
conn->c_reconnect_jiffies = 0;
INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
Expand Down Expand Up @@ -243,22 +240,13 @@ static struct rds_connection *__rds_conn_create(struct net *net,
/* Creating normal conn */
struct rds_connection *found;

if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
found = NULL;
else
found = rds_conn_lookup(net, head, laddr, faddr, trans);
found = rds_conn_lookup(net, head, laddr, faddr, trans);
if (found) {
trans->conn_free(conn->c_transport_data);
kmem_cache_free(rds_conn_slab, conn);
conn = found;
} else {
if ((is_outgoing && otrans->t_type == RDS_TRANS_TCP) ||
(otrans->t_type != RDS_TRANS_TCP)) {
/* Only the active side should be added to
* reconnect list for TCP.
*/
hlist_add_head_rcu(&conn->c_hash_node, head);
}
hlist_add_head_rcu(&conn->c_hash_node, head);
rds_cong_add_conn(conn);
rds_conn_count++;
}
Expand Down Expand Up @@ -337,7 +325,9 @@ void rds_conn_shutdown(struct rds_connection *conn)
rcu_read_lock();
if (!hlist_unhashed(&conn->c_hash_node)) {
rcu_read_unlock();
rds_queue_reconnect(conn);
if (conn->c_trans->t_type != RDS_TRANS_TCP ||
conn->c_outgoing == 1)
rds_queue_reconnect(conn);
} else {
rcu_read_unlock();
}
Expand Down
4 changes: 3 additions & 1 deletion net/rds/rds.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ struct rds_connection {
struct hlist_node c_hash_node;
__be32 c_laddr;
__be32 c_faddr;
unsigned int c_loopback:1;
unsigned int c_loopback:1,
c_outgoing:1,
c_pad_to_32:30;
struct rds_connection *c_passive;

struct rds_cong_map *c_lcong;
Expand Down
16 changes: 4 additions & 12 deletions net/rds/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,21 +67,13 @@ void rds_tcp_nonagle(struct socket *sock)
set_fs(oldfs);
}

/* All module specific customizations to the RDS-TCP socket should be done in
* rds_tcp_tune() and applied after socket creation. In general these
* customizations should be tunable via module_param()
*/
void rds_tcp_tune(struct socket *sock)
{
struct sock *sk = sock->sk;

rds_tcp_nonagle(sock);

/*
* We're trying to saturate gigabit with the default,
* see svc_sock_setbufsize().
*/
lock_sock(sk);
sk->sk_sndbuf = RDS_TCP_DEFAULT_BUFSIZE;
sk->sk_rcvbuf = RDS_TCP_DEFAULT_BUFSIZE;
sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
release_sock(sk);
}

u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
Expand Down
22 changes: 9 additions & 13 deletions net/rds/tcp_listen.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,28 +110,24 @@ int rds_tcp_accept_one(struct socket *sock)
goto out;
}
/* An incoming SYN request came in, and TCP just accepted it.
* We always create a new conn for listen side of TCP, and do not
* add it to the c_hash_list.
*
* If the client reboots, this conn will need to be cleaned up.
* rds_tcp_state_change() will do that cleanup
*/
rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
WARN_ON(!rs_tcp || rs_tcp->t_sock);

/*
* see the comment above rds_queue_delayed_reconnect()
*/
if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
if (rds_conn_state(conn) == RDS_CONN_UP)
rds_tcp_stats_inc(s_tcp_listen_closed_stale);
else
rds_tcp_stats_inc(s_tcp_connect_raced);
rds_conn_drop(conn);
if (rs_tcp->t_sock &&
ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
struct sock *nsk = new_sock->sk;

nsk->sk_user_data = NULL;
nsk->sk_prot->disconnect(nsk, 0);
tcp_done(nsk);
new_sock = NULL;
ret = 0;
goto out;
}

rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
rds_tcp_set_callbacks(new_sock, conn);
rds_connect_complete(conn);
new_sock = NULL;
Expand Down
8 changes: 7 additions & 1 deletion net/rds/tcp_send.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
struct rds_tcp_connection *tc = conn->c_transport_data;
int done = 0;
int ret = 0;
int more;

if (hdr_off == 0) {
/*
Expand Down Expand Up @@ -116,12 +117,15 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
goto out;
}

more = rm->data.op_nents > 1 ? (MSG_MORE | MSG_SENDPAGE_NOTLAST) : 0;
while (sg < rm->data.op_nents) {
int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more;

ret = tc->t_sock->ops->sendpage(tc->t_sock,
sg_page(&rm->data.op_sg[sg]),
rm->data.op_sg[sg].offset + off,
rm->data.op_sg[sg].length - off,
MSG_DONTWAIT|MSG_NOSIGNAL);
flags);
rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.op_sg[sg]),
rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off,
ret);
Expand All @@ -134,6 +138,8 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
off = 0;
sg++;
}
if (sg == rm->data.op_nents - 1)
more = 0;
}

out:
Expand Down

0 comments on commit 7e2832f

Please sign in to comment.