Skip to content

Commit

Permalink
net/smc: Unbind r/w buffer size from clcsock and make them tunable
Browse files Browse the repository at this point in the history
Currently, SMC uses smc->sk.sk_{rcv|snd}buf to create buffers for
send buffer and RMB. And the values of buffer size are from tcp_{w|r}mem
in clcsock.

The buffer size from TCP socket doesn't fit SMC well. Generally, buffers
are usually larger than TCP for SMC-R/-D to get higher performance, for
they are different underlay devices and paths.

So this patch unbinds buffer size from TCP, and introduces two sysctl
knobs to tune them independently. Also, these knobs are per net
namespace and work for containers.

Signed-off-by: Tony Lu <tonylu@linux.alibaba.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
  • Loading branch information
Tony Lu authored and Paolo Abeni committed Sep 22, 2022
1 parent 77eee32 commit 0227f05
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 7 deletions.
18 changes: 18 additions & 0 deletions Documentation/networking/smc-sysctl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,21 @@ smcr_testlink_time - INTEGER
disabling TEST_LINK.

Default: 30 seconds.

wmem - INTEGER
Initial size of send buffer used by SMC sockets.
The default value inherits from net.ipv4.tcp_wmem[1].

The minimum value is 16KiB and there is no hard limit for max value, but
only allowed 512KiB for SMC-R and 1MiB for SMC-D.

Default: 16K

rmem - INTEGER
Initial size of receive buffer (RMB) used by SMC sockets.
The default value inherits from net.ipv4.tcp_rmem[1].

The minimum value is 16KiB and there is no hard limit for max value, but
only allowed 512KiB for SMC-R and 1MiB for SMC-D.

Default: 128K
2 changes: 2 additions & 0 deletions include/net/netns/smc.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,7 @@ struct netns_smc {
unsigned int sysctl_autocorking_size;
unsigned int sysctl_smcr_buf_type;
int sysctl_smcr_testlink_time;
int sysctl_wmem;
int sysctl_rmem;
};
#endif
5 changes: 2 additions & 3 deletions net/smc/af_smc.c
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct;
sk->sk_protocol = protocol;
WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem));
WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem));
smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_WORK(&smc->connect_work, smc_connect_work);
Expand Down Expand Up @@ -3253,9 +3255,6 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
smc->clcsock = clcsock;
}

smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);

out:
return rc;
}
Expand Down
8 changes: 4 additions & 4 deletions net/smc/smc_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2307,10 +2307,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)

if (is_rmb)
/* use socket recv buffer size (w/o overhead) as start value */
sk_buf_size = smc->sk.sk_rcvbuf / 2;
sk_buf_size = smc->sk.sk_rcvbuf;
else
/* use socket send buffer size (w/o overhead) as start value */
sk_buf_size = smc->sk.sk_sndbuf / 2;
sk_buf_size = smc->sk.sk_sndbuf;

for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
bufsize_short >= 0; bufsize_short--) {
Expand Down Expand Up @@ -2369,15 +2369,15 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_rmb) {
conn->rmb_desc = buf_desc;
conn->rmbe_size_short = bufsize_short;
smc->sk.sk_rcvbuf = bufsize * 2;
smc->sk.sk_rcvbuf = bufsize;
atomic_set(&conn->bytes_to_rcv, 0);
conn->rmbe_update_limit =
smc_rmb_wnd_update_limit(buf_desc->len);
if (is_smcd)
smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
} else {
conn->sndbuf_desc = buf_desc;
smc->sk.sk_sndbuf = bufsize * 2;
smc->sk.sk_sndbuf = bufsize;
atomic_set(&conn->sndbuf_space, bufsize);
}
return 0;
Expand Down
21 changes: 21 additions & 0 deletions net/smc/smc_sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
#include "smc_llc.h"
#include "smc_sysctl.h"

static int min_sndbuf = SMC_BUF_MIN_SIZE;
static int min_rcvbuf = SMC_BUF_MIN_SIZE;

static struct ctl_table smc_table[] = {
{
.procname = "autocorking_size",
Expand All @@ -43,6 +46,22 @@ static struct ctl_table smc_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "wmem",
.data = &init_net.smc.sysctl_wmem,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_sndbuf,
},
{
.procname = "rmem",
.data = &init_net.smc.sysctl_rmem,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_rcvbuf,
},
{ }
};

Expand All @@ -69,6 +88,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));

return 0;

Expand Down

0 comments on commit 0227f05

Please sign in to comment.