Skip to content

Commit

Permalink
Merge branch 'shrink-struct-ubuf_info'
Browse files Browse the repository at this point in the history
Pavel Begunkov says:

====================
shrink struct ubuf_info

struct ubuf_info is large but not all fields are needed for all
cases. We have limited space in io_uring for it and large ubuf_info
prevents some struct embedding, even though we use only a subset
of the fields. It's also not very clean trying to use this typeless
extra space.

Shrink struct ubuf_info to only necessary fields used in generic paths,
namely ->callback, ->refcnt and ->flags, which take only 16 bytes. And
make MSG_ZEROCOPY and some other users to embed it into a larger struct
ubuf_info_msgzc mimicking the former ubuf_info.

Note, xen/vhost may also have some cleaning on top by creating
new structs containing ubuf_info but with proper types.
====================

Link: https://lore.kernel.org/r/cover.1663892211.git.asml.silence@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Sep 29, 2022
2 parents 929a6cd + e7d2b51 commit 578b054
Show file tree
Hide file tree
Showing 9 changed files with 48 additions and 35 deletions.
2 changes: 1 addition & 1 deletion drivers/net/xen-netback/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ struct pending_tx_info {
* ubuf_to_vif is a helper which finds the struct xenvif from a pointer
* to this field.
*/
struct ubuf_info callback_struct;
struct ubuf_info_msgzc callback_struct;
};

#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE)
Expand Down
4 changes: 2 additions & 2 deletions drivers/net/xen-netback/interface.c
Original file line number Diff line number Diff line change
Expand Up @@ -591,8 +591,8 @@ int xenvif_init_queue(struct xenvif_queue *queue)
}

for (i = 0; i < MAX_PENDING_REQS; i++) {
queue->pending_tx_info[i].callback_struct = (struct ubuf_info)
{ .callback = xenvif_zerocopy_callback,
queue->pending_tx_info[i].callback_struct = (struct ubuf_info_msgzc)
{ { .callback = xenvif_zerocopy_callback },
{ { .ctx = NULL,
.desc = i } } };
queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
Expand Down
7 changes: 4 additions & 3 deletions drivers/net/xen-netback/netback.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,

/* Find the containing VIF's structure from a pointer in pending_tx_info array
*/
static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf)
static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info_msgzc *ubuf)
{
u16 pending_idx = ubuf->desc;
struct pending_tx_info *temp =
Expand Down Expand Up @@ -1228,11 +1228,12 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
return work_done;
}

void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf,
void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf_base,
bool zerocopy_success)
{
unsigned long flags;
pending_ring_idx_t index;
struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base);
struct xenvif_queue *queue = ubuf_to_queue(ubuf);

/* This is the only place where we grab this lock, to protect callbacks
Expand All @@ -1241,7 +1242,7 @@ void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf,
spin_lock_irqsave(&queue->callback_lock, flags);
do {
u16 pending_idx = ubuf->desc;
ubuf = (struct ubuf_info *) ubuf->ctx;
ubuf = (struct ubuf_info_msgzc *) ubuf->ctx;
BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
MAX_PENDING_REQS);
index = pending_index(queue->dealloc_prod);
Expand Down
15 changes: 8 additions & 7 deletions drivers/vhost/net.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ struct vhost_net_virtqueue {
/* Number of XDP frames batched */
int batched_xdp;
/* an array of userspace buffers info */
struct ubuf_info *ubuf_info;
struct ubuf_info_msgzc *ubuf_info;
/* Reference counting for outstanding ubufs.
* Protected by vq mutex. Writers must also take device mutex. */
struct vhost_net_ubuf_ref *ubufs;
Expand Down Expand Up @@ -382,8 +382,9 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
}

static void vhost_zerocopy_callback(struct sk_buff *skb,
struct ubuf_info *ubuf, bool success)
struct ubuf_info *ubuf_base, bool success)
{
struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base);
struct vhost_net_ubuf_ref *ubufs = ubuf->ctx;
struct vhost_virtqueue *vq = ubufs->vq;
int cnt;
Expand Down Expand Up @@ -871,7 +872,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
size_t len, total_len = 0;
int err;
struct vhost_net_ubuf_ref *ubufs;
struct ubuf_info *ubuf;
struct ubuf_info_msgzc *ubuf;
bool zcopy_used;
int sent_pkts = 0;

Expand Down Expand Up @@ -907,14 +908,14 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
ubuf = nvq->ubuf_info + nvq->upend_idx;
vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
ubuf->callback = vhost_zerocopy_callback;
ubuf->ctx = nvq->ubufs;
ubuf->desc = nvq->upend_idx;
ubuf->flags = SKBFL_ZEROCOPY_FRAG;
refcount_set(&ubuf->refcnt, 1);
ubuf->ubuf.callback = vhost_zerocopy_callback;
ubuf->ubuf.flags = SKBFL_ZEROCOPY_FRAG;
refcount_set(&ubuf->ubuf.refcnt, 1);
msg.msg_control = &ctl;
ctl.type = TUN_MSG_UBUF;
ctl.ptr = ubuf;
ctl.ptr = &ubuf->ubuf;
msg.msg_controllen = sizeof(ctl);
ubufs = nvq->ubufs;
atomic_inc(&ubufs->refcount);
Expand Down
11 changes: 9 additions & 2 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,13 @@ enum {
struct ubuf_info {
void (*callback)(struct sk_buff *, struct ubuf_info *,
bool zerocopy_success);
refcount_t refcnt;
u8 flags;
};

struct ubuf_info_msgzc {
struct ubuf_info ubuf;

union {
struct {
unsigned long desc;
Expand All @@ -545,8 +552,6 @@ struct ubuf_info {
u32 bytelen;
};
};
refcount_t refcnt;
u8 flags;

struct mmpin {
struct user_struct *user;
Expand All @@ -555,6 +560,8 @@ struct ubuf_info {
};

#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
#define uarg_to_msgzc(ubuf_ptr) container_of((ubuf_ptr), struct ubuf_info_msgzc, \
ubuf)

int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
void mm_unaccount_pinned_pages(struct mmpin *mmp);
Expand Down
38 changes: 21 additions & 17 deletions net/core/skbuff.c
Original file line number Diff line number Diff line change
Expand Up @@ -1188,7 +1188,7 @@ EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);

static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
{
struct ubuf_info *uarg;
struct ubuf_info_msgzc *uarg;
struct sk_buff *skb;

WARN_ON_ONCE(!in_task());
Expand All @@ -1206,19 +1206,19 @@ static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
return NULL;
}

uarg->callback = msg_zerocopy_callback;
uarg->ubuf.callback = msg_zerocopy_callback;
uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
uarg->len = 1;
uarg->bytelen = size;
uarg->zerocopy = 1;
uarg->flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
refcount_set(&uarg->refcnt, 1);
uarg->ubuf.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
refcount_set(&uarg->ubuf.refcnt, 1);
sock_hold(sk);

return uarg;
return &uarg->ubuf;
}

static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
static inline struct sk_buff *skb_from_uarg(struct ubuf_info_msgzc *uarg)
{
return container_of((void *)uarg, struct sk_buff, cb);
}
Expand All @@ -1227,6 +1227,7 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
struct ubuf_info *uarg)
{
if (uarg) {
struct ubuf_info_msgzc *uarg_zc;
const u32 byte_limit = 1 << 19; /* limit to a few TSO */
u32 bytelen, next;

Expand All @@ -1242,20 +1243,21 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
return NULL;
}

bytelen = uarg->bytelen + size;
if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) {
uarg_zc = uarg_to_msgzc(uarg);
bytelen = uarg_zc->bytelen + size;
if (uarg_zc->len == USHRT_MAX - 1 || bytelen > byte_limit) {
/* TCP can create new skb to attach new uarg */
if (sk->sk_type == SOCK_STREAM)
goto new_alloc;
return NULL;
}

next = (u32)atomic_read(&sk->sk_zckey);
if ((u32)(uarg->id + uarg->len) == next) {
if (mm_account_pinned_pages(&uarg->mmp, size))
if ((u32)(uarg_zc->id + uarg_zc->len) == next) {
if (mm_account_pinned_pages(&uarg_zc->mmp, size))
return NULL;
uarg->len++;
uarg->bytelen = bytelen;
uarg_zc->len++;
uarg_zc->bytelen = bytelen;
atomic_set(&sk->sk_zckey, ++next);

/* no extra ref when appending to datagram (MSG_MORE) */
Expand Down Expand Up @@ -1291,7 +1293,7 @@ static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
return true;
}

static void __msg_zerocopy_callback(struct ubuf_info *uarg)
static void __msg_zerocopy_callback(struct ubuf_info_msgzc *uarg)
{
struct sk_buff *tail, *skb = skb_from_uarg(uarg);
struct sock_exterr_skb *serr;
Expand Down Expand Up @@ -1344,19 +1346,21 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
bool success)
{
uarg->zerocopy = uarg->zerocopy & success;
struct ubuf_info_msgzc *uarg_zc = uarg_to_msgzc(uarg);

uarg_zc->zerocopy = uarg_zc->zerocopy & success;

if (refcount_dec_and_test(&uarg->refcnt))
__msg_zerocopy_callback(uarg);
__msg_zerocopy_callback(uarg_zc);
}
EXPORT_SYMBOL_GPL(msg_zerocopy_callback);

void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
{
struct sock *sk = skb_from_uarg(uarg)->sk;
struct sock *sk = skb_from_uarg(uarg_to_msgzc(uarg))->sk;

atomic_dec(&sk->sk_zckey);
uarg->len--;
uarg_to_msgzc(uarg)->len--;

if (have_uref)
msg_zerocopy_callback(NULL, uarg, true);
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/ip_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -1043,7 +1043,7 @@ static int __ip_append_data(struct sock *sk,
paged = true;
zc = true;
} else {
uarg->zerocopy = 0;
uarg_to_msgzc(uarg)->zerocopy = 0;
skb_zcopy_set(skb, uarg, &extra_uref);
}
}
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1239,7 +1239,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
}
zc = sk->sk_route_caps & NETIF_F_SG;
if (!zc)
uarg->zerocopy = 0;
uarg_to_msgzc(uarg)->zerocopy = 0;
}
}

Expand Down
2 changes: 1 addition & 1 deletion net/ipv6/ip6_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -1567,7 +1567,7 @@ static int __ip6_append_data(struct sock *sk,
paged = true;
zc = true;
} else {
uarg->zerocopy = 0;
uarg_to_msgzc(uarg)->zerocopy = 0;
skb_zcopy_set(skb, uarg, &extra_uref);
}
}
Expand Down

0 comments on commit 578b054

Please sign in to comment.