Skip to content

Commit

Permalink
vsock/virtio: MSG_ZEROCOPY flag support
Browse files Browse the repository at this point in the history
This adds handling of MSG_ZEROCOPY flag on transmission path:

1) If this flag is set and zerocopy transmission is possible (enabled
   in socket options and transport allows zerocopy), then non-linear
   skb will be created and filled with the pages of user's buffer.
   Pages of user's buffer are locked in memory by 'get_user_pages()'.
2) Replaces way of skb owning: instead of 'skb_set_owner_sk_safe()' it
   calls 'skb_set_owner_w()'. Reason of this change is that
   '__zerocopy_sg_from_iter()' increments 'sk_wmem_alloc' of socket, so
   to decrease this field correctly, proper skb destructor is needed:
   'sock_wfree()'. This destructor is set by 'skb_set_owner_w()'.
3) Adds new callback to 'struct virtio_transport': 'can_msgzerocopy'.
   If this callback is set, then transport needs extra check to be able
   to send provided number of buffers in zerocopy mode. Currently, the
   only transport that needs this callback set is virtio, because this
   transport adds new buffers to the virtio queue and we need to check,
   that number of these buffers is less than size of the queue (it is
   required by virtio spec). vhost and loopback transports don't need
   this check.

Signed-off-by: Arseniy Krasnov <avkrasnov@salutedevices.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
  • Loading branch information
Arseniy Krasnov authored and Paolo Abeni committed Sep 21, 2023
1 parent 4b0bf10 commit 581512a
Show file tree
Hide file tree
Showing 4 changed files with 241 additions and 62 deletions.
9 changes: 9 additions & 0 deletions include/linux/virtio_vsock.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,15 @@ struct virtio_transport {

/* Takes ownership of the packet */
int (*send_pkt)(struct sk_buff *skb);

/* Used in MSG_ZEROCOPY mode. Checks, that provided data
* (number of buffers) could be transmitted with zerocopy
* mode. If this callback is not implemented for the current
* transport - this means that this transport doesn't need
* extra checks and can perform zerocopy transmission by
* default.
*/
bool (*can_msgzerocopy)(int bufs_num);
};

ssize_t
Expand Down
12 changes: 8 additions & 4 deletions include/trace/events/vsock_virtio_transport_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,17 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
__u32 len,
__u16 type,
__u16 op,
__u32 flags
__u32 flags,
bool zcopy
),
TP_ARGS(
src_cid, src_port,
dst_cid, dst_port,
len,
type,
op,
flags
flags,
zcopy
),
TP_STRUCT__entry(
__field(__u32, src_cid)
Expand All @@ -62,6 +64,7 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
__field(__u16, type)
__field(__u16, op)
__field(__u32, flags)
__field(bool, zcopy)
),
TP_fast_assign(
__entry->src_cid = src_cid;
Expand All @@ -72,14 +75,15 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
__entry->type = type;
__entry->op = op;
__entry->flags = flags;
__entry->zcopy = zcopy;
),
TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x",
TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x zcopy=%s",
__entry->src_cid, __entry->src_port,
__entry->dst_cid, __entry->dst_port,
__entry->len,
show_type(__entry->type),
show_op(__entry->op),
__entry->flags)
__entry->flags, __entry->zcopy ? "true" : "false")
);

TRACE_EVENT(virtio_transport_recv_pkt,
Expand Down
32 changes: 32 additions & 0 deletions net/vmw_vsock/virtio_transport.c
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,37 @@ static void virtio_vsock_rx_done(struct virtqueue *vq)
queue_work(virtio_vsock_workqueue, &vsock->rx_work);
}

static bool virtio_transport_can_msgzerocopy(int bufs_num)
{
struct virtio_vsock *vsock;
bool res = false;

rcu_read_lock();

vsock = rcu_dereference(the_virtio_vsock);
if (vsock) {
struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX];

/* Check that tx queue is large enough to keep whole
* data to send. This is needed, because when there is
* not enough free space in the queue, current skb to
* send will be reinserted to the head of tx list of
* the socket to retry transmission later, so if skb
* is bigger than whole queue, it will be reinserted
* again and again, thus blocking other skbs to be sent.
* Each page of the user provided buffer will be added
* as a single buffer to the tx virtqueue, so compare
* number of pages against maximum capacity of the queue.
*/
if (bufs_num <= vq->num_max)
res = true;
}

rcu_read_unlock();

return res;
}

static bool virtio_transport_seqpacket_allow(u32 remote_cid);

static struct virtio_transport virtio_transport = {
Expand Down Expand Up @@ -504,6 +535,7 @@ static struct virtio_transport virtio_transport = {
},

.send_pkt = virtio_transport_send_pkt,
.can_msgzerocopy = virtio_transport_can_msgzerocopy,
};

static bool virtio_transport_seqpacket_allow(u32 remote_cid)
Expand Down
Loading

0 comments on commit 581512a

Please sign in to comment.