From 93b80887668226180ea5f5349cc728ca6dc700ab Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Thu, 14 Dec 2023 15:52:28 +0300 Subject: [PATCH 1/3] virtio/vsock: fix logic which reduces credit update messages Add one more condition for sending credit update during dequeue from stream socket: when number of bytes in the rx queue is smaller than SO_RCVLOWAT value of the socket. This is actual for non-default value of SO_RCVLOWAT (e.g. not 1) - idea is to "kick" peer to continue data transmission, because we need at least SO_RCVLOWAT bytes in our rx queue to wake up user for reading data (in corner case it is also possible to stuck both tx and rx sides, this is why 'Fixes' is used). Fixes: b89d882dc9fc ("vsock/virtio: reduce credit update messages") Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport_common.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 6df246b532606..b35306dfcebed 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -557,6 +557,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, struct virtio_vsock_sock *vvs = vsk->trans; size_t bytes, total = 0; struct sk_buff *skb; + u32 fwd_cnt_delta; + bool low_rx_bytes; int err = -EFAULT; u32 free_space; @@ -600,7 +602,10 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, } } - free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); + fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt; + free_space = vvs->buf_alloc - fwd_cnt_delta; + low_rx_bytes = (vvs->rx_bytes < + sock_rcvlowat(sk_vsock(vsk), 0, INT_MAX)); spin_unlock_bh(&vvs->rx_lock); @@ -610,9 +615,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, * too high causes extra messages. Too low causes transmitter * stalls. As stalls are in theory more expensive than extra * messages, we set the limit to a high value. TODO: experiment - * with different values. + * with different values. Also send credit update message when + * number of bytes in rx queue is not enough to wake up reader. */ - if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) + if (fwd_cnt_delta && + (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE || low_rx_bytes)) virtio_transport_send_credit_update(vsk); return total; From 0fe1798968115488c0c02f4633032a015b1faf97 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Thu, 14 Dec 2023 15:52:29 +0300 Subject: [PATCH 2/3] virtio/vsock: send credit update during setting SO_RCVLOWAT Send credit update message when SO_RCVLOWAT is updated and it is bigger than number of bytes in rx queue. It is needed, because 'poll()' will wait until number of bytes in rx queue will be not smaller than O_RCVLOWAT, so kick sender to send more data. Otherwise mutual hungup for tx/rx is possible: sender waits for free space and receiver is waiting data in 'poll()'. Rename 'set_rcvlowat' callback to 'notify_set_rcvlowat' and set 'sk->sk_rcvlowat' only in one place (i.e. 'vsock_set_rcvlowat'), so the transport doesn't need to do it. Fixes: b89d882dc9fc ("vsock/virtio: reduce credit update messages") Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/vhost/vsock.c | 1 + include/linux/virtio_vsock.h | 1 + include/net/af_vsock.h | 2 +- net/vmw_vsock/af_vsock.c | 9 ++++++-- net/vmw_vsock/hyperv_transport.c | 4 ++-- net/vmw_vsock/virtio_transport.c | 1 + net/vmw_vsock/virtio_transport_common.c | 30 +++++++++++++++++++++++++ net/vmw_vsock/vsock_loopback.c | 1 + 8 files changed, 44 insertions(+), 5 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index f75731396b7ef..ec20ecff85c7f 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -449,6 +449,7 @@ static struct virtio_transport vhost_transport = { .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, .notify_buffer_size = virtio_transport_notify_buffer_size, + .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat, .read_skb = virtio_transport_read_skb, }, diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index ebb3ce63d64da..c82089dee0c83 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -256,4 +256,5 @@ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit); void virtio_transport_deliver_tap_pkt(struct sk_buff *skb); int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *list); int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t read_actor); +int virtio_transport_notify_set_rcvlowat(struct vsock_sock *vsk, int val); #endif /* _LINUX_VIRTIO_VSOCK_H */ diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index e302c0e804d0f..535701efc1e5c 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -137,7 +137,6 @@ struct vsock_transport { u64 (*stream_rcvhiwat)(struct vsock_sock *); bool (*stream_is_active)(struct vsock_sock *); bool (*stream_allow)(u32 cid, u32 port); - int (*set_rcvlowat)(struct vsock_sock *vsk, int val); /* SEQ_PACKET. */ ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, @@ -168,6 +167,7 @@ struct vsock_transport { struct vsock_transport_send_notify_data *); /* sk_lock held by the caller */ void (*notify_buffer_size)(struct vsock_sock *, u64 *); + int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val); /* Shutdown. */ int (*shutdown)(struct vsock_sock *, int); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 816725af281f3..54ba7316f8085 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -2264,8 +2264,13 @@ static int vsock_set_rcvlowat(struct sock *sk, int val) transport = vsk->transport; - if (transport && transport->set_rcvlowat) - return transport->set_rcvlowat(vsk, val); + if (transport && transport->notify_set_rcvlowat) { + int err; + + err = transport->notify_set_rcvlowat(vsk, val); + if (err) + return err; + } WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); return 0; diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 7cb1a9d2cdb4f..e2157e3872177 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -816,7 +816,7 @@ int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written, } static -int hvs_set_rcvlowat(struct vsock_sock *vsk, int val) +int hvs_notify_set_rcvlowat(struct vsock_sock *vsk, int val) { return -EOPNOTSUPP; } @@ -856,7 +856,7 @@ static struct vsock_transport hvs_transport = { .notify_send_pre_enqueue = hvs_notify_send_pre_enqueue, .notify_send_post_enqueue = hvs_notify_send_post_enqueue, - .set_rcvlowat = hvs_set_rcvlowat + .notify_set_rcvlowat = hvs_notify_set_rcvlowat }; static bool hvs_check_transport(struct vsock_sock *vsk) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index af5bab1acee17..f495b9e5186b2 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -537,6 +537,7 @@ static struct virtio_transport virtio_transport = { .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, .notify_buffer_size = virtio_transport_notify_buffer_size, + .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat, .read_skb = virtio_transport_read_skb, }, diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index b35306dfcebed..16ff976a86e3e 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1690,6 +1690,36 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto } EXPORT_SYMBOL_GPL(virtio_transport_read_skb); +int virtio_transport_notify_set_rcvlowat(struct vsock_sock *vsk, int val) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + bool send_update; + + spin_lock_bh(&vvs->rx_lock); + + /* If number of available bytes is less than new SO_RCVLOWAT value, + * kick sender to send more data, because sender may sleep in its + * 'send()' syscall waiting for enough space at our side. Also + * don't send credit update when peer already knows actual value - + * such transmission will be useless. + */ + send_update = (vvs->rx_bytes < val) && + (vvs->fwd_cnt != vvs->last_fwd_cnt); + + spin_unlock_bh(&vvs->rx_lock); + + if (send_update) { + int err; + + err = virtio_transport_send_credit_update(vsk); + if (err < 0) + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_set_rcvlowat); + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Asias He"); MODULE_DESCRIPTION("common code for virtio vsock"); diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c index 0486401674117..6dea6119f5b28 100644 --- a/net/vmw_vsock/vsock_loopback.c +++ b/net/vmw_vsock/vsock_loopback.c @@ -96,6 +96,7 @@ static struct virtio_transport loopback_transport = { .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, .notify_buffer_size = virtio_transport_notify_buffer_size, + .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat, .read_skb = virtio_transport_read_skb, }, From 542e893fbadc51caa38a7c4c2a8f8e822cdba2b1 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Thu, 14 Dec 2023 15:52:30 +0300 Subject: [PATCH 3/3] vsock/test: two tests to check credit update logic Both tests are almost same, only differs in two 'if' conditions, so implemented in a single function. Tests check, that credit update message is sent: 1) During setting SO_RCVLOWAT value of the socket. 2) When number of 'rx_bytes' become smaller than SO_RCVLOWAT value. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- tools/testing/vsock/vsock_test.c | 175 +++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 01fa816868bc4..66246d81d6549 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1232,6 +1232,171 @@ static void test_double_bind_connect_client(const struct test_opts *opts) } } +#define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128) +/* This define is the same as in 'include/linux/virtio_vsock.h': + * it is used to decide when to send credit update message during + * reading from rx queue of a socket. Value and its usage in + * kernel is important for this test. + */ +#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) + +static void test_stream_rcvlowat_def_cred_upd_client(const struct test_opts *opts) +{ + size_t buf_size; + void *buf; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + /* Send 1 byte more than peer's buffer size. */ + buf_size = RCVLOWAT_CREDIT_UPD_BUF_SIZE + 1; + + buf = malloc(buf_size); + if (!buf) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + /* Wait until peer sets needed buffer size. */ + recv_byte(fd, 1, 0); + + if (send(fd, buf, buf_size, 0) != buf_size) { + perror("send failed"); + exit(EXIT_FAILURE); + } + + free(buf); + close(fd); +} + +static void test_stream_credit_update_test(const struct test_opts *opts, + bool low_rx_bytes_test) +{ + size_t recv_buf_size; + struct pollfd fds; + size_t buf_size; + void *buf; + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + buf_size = RCVLOWAT_CREDIT_UPD_BUF_SIZE; + + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + &buf_size, sizeof(buf_size))) { + perror("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); + exit(EXIT_FAILURE); + } + + if (low_rx_bytes_test) { + /* Set new SO_RCVLOWAT here. This enables sending credit + * update when number of bytes if our rx queue become < + * SO_RCVLOWAT value. + */ + recv_buf_size = 1 + VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; + + if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, + &recv_buf_size, sizeof(recv_buf_size))) { + perror("setsockopt(SO_RCVLOWAT)"); + exit(EXIT_FAILURE); + } + } + + /* Send one dummy byte here, because 'setsockopt()' above also + * sends special packet which tells sender to update our buffer + * size. This 'send_byte()' will serialize such packet with data + * reads in a loop below. Sender starts transmission only when + * it receives this single byte. + */ + send_byte(fd, 1, 0); + + buf = malloc(buf_size); + if (!buf) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + /* Wait until there will be 128KB of data in rx queue. */ + while (1) { + ssize_t res; + + res = recv(fd, buf, buf_size, MSG_PEEK); + if (res == buf_size) + break; + + if (res <= 0) { + fprintf(stderr, "unexpected 'recv()' return: %zi\n", res); + exit(EXIT_FAILURE); + } + } + + /* There is 128KB of data in the socket's rx queue, dequeue first + * 64KB, credit update is sent if 'low_rx_bytes_test' == true. + * Otherwise, credit update is sent in 'if (!low_rx_bytes_test)'. + */ + recv_buf_size = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; + recv_buf(fd, buf, recv_buf_size, 0, recv_buf_size); + + if (!low_rx_bytes_test) { + recv_buf_size++; + + /* Updating SO_RCVLOWAT will send credit update. */ + if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, + &recv_buf_size, sizeof(recv_buf_size))) { + perror("setsockopt(SO_RCVLOWAT)"); + exit(EXIT_FAILURE); + } + } + + fds.fd = fd; + fds.events = POLLIN | POLLRDNORM | POLLERR | + POLLRDHUP | POLLHUP; + + /* This 'poll()' will return once we receive last byte + * sent by client. + */ + if (poll(&fds, 1, -1) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + if (fds.revents & POLLERR) { + fprintf(stderr, "'poll()' error\n"); + exit(EXIT_FAILURE); + } + + if (fds.revents & (POLLIN | POLLRDNORM)) { + recv_buf(fd, buf, recv_buf_size, MSG_DONTWAIT, recv_buf_size); + } else { + /* These flags must be set, as there is at + * least 64KB of data ready to read. + */ + fprintf(stderr, "POLLIN | POLLRDNORM expected\n"); + exit(EXIT_FAILURE); + } + + free(buf); + close(fd); +} + +static void test_stream_cred_upd_on_low_rx_bytes(const struct test_opts *opts) +{ + test_stream_credit_update_test(opts, true); +} + +static void test_stream_cred_upd_on_set_rcvlowat(const struct test_opts *opts) +{ + test_stream_credit_update_test(opts, false); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1342,6 +1507,16 @@ static struct test_case test_cases[] = { .run_client = test_double_bind_connect_client, .run_server = test_double_bind_connect_server, }, + { + .name = "SOCK_STREAM virtio credit update + SO_RCVLOWAT", + .run_client = test_stream_rcvlowat_def_cred_upd_client, + .run_server = test_stream_cred_upd_on_set_rcvlowat, + }, + { + .name = "SOCK_STREAM virtio credit update + low rx_bytes", + .run_client = test_stream_rcvlowat_def_cred_upd_client, + .run_server = test_stream_cred_upd_on_low_rx_bytes, + }, {}, };