Skip to content

Commit

Permalink
tcp: RX path for devmem TCP
Browse files Browse the repository at this point in the history
In tcp_recvmsg_locked(), detect if the skb being received by the user
is a devmem skb. In this case - if the user provided the MSG_SOCK_DEVMEM
flag - pass it to tcp_recvmsg_devmem() for custom handling.

tcp_recvmsg_devmem() copies any data in the skb header to the linear
buffer, and returns a cmsg to the user indicating the number of bytes
returned in the linear buffer.

tcp_recvmsg_devmem() then loops over the unaccessible devmem skb frags,
and returns to the user a cmsg_devmem indicating the location of the
data in the dmabuf device memory. cmsg_devmem contains this information:

1. the offset into the dmabuf where the payload starts. 'frag_offset'.
2. the size of the frag. 'frag_size'.
3. an opaque token 'frag_token' to return to the kernel when the buffer
is to be released.

The pages awaiting freeing are stored in the newly added
sk->sk_user_frags, and each page passed to userspace is get_page()'d.
This reference is dropped once the userspace indicates that it is
done reading this page.  All pages are released when the socket is
destroyed.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Kaiyuan Zhang <kaiyuanz@google.com>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20240910171458.219195-10-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Mina Almasry authored and Jakub Kicinski committed Sep 12, 2024
1 parent 65249fe commit 8f0b3cc
Show file tree
Hide file tree
Showing 12 changed files with 333 additions and 5 deletions.
5 changes: 5 additions & 0 deletions arch/alpha/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@
#define SO_PASSPIDFD 76
#define SO_PEERPIDFD 77

#define SO_DEVMEM_LINEAR 78
#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR
#define SO_DEVMEM_DMABUF 79
#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF

#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64
Expand Down
5 changes: 5 additions & 0 deletions arch/mips/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@
#define SO_PASSPIDFD 76
#define SO_PEERPIDFD 77

#define SO_DEVMEM_LINEAR 78
#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR
#define SO_DEVMEM_DMABUF 79
#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF

#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64
Expand Down
5 changes: 5 additions & 0 deletions arch/parisc/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@
#define SO_PASSPIDFD 0x404A
#define SO_PEERPIDFD 0x404B

#define SO_DEVMEM_LINEAR 78
#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR
#define SO_DEVMEM_DMABUF 79
#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF

#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64
Expand Down
5 changes: 5 additions & 0 deletions arch/sparc/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@
#define SO_PASSPIDFD 0x0055
#define SO_PEERPIDFD 0x0056

#define SO_DEVMEM_LINEAR 0x0057
#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR
#define SO_DEVMEM_DMABUF 0x0058
#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF

#if !defined(__KERNEL__)


Expand Down
1 change: 1 addition & 0 deletions include/linux/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ struct ucred {
* plain text and require encryption
*/

#define MSG_SOCK_DEVMEM 0x2000000 /* Receive devmem skbs as cmsg */
#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */
#define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */
#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */
Expand Down
2 changes: 2 additions & 0 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ struct sk_filter;
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
* @ns_tracker: tracker for netns reference
* @sk_user_frags: xarray of pages the user is holding a reference on.
*/
struct sock {
/*
Expand Down Expand Up @@ -542,6 +543,7 @@ struct sock {
#endif
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
struct xarray sk_user_frags;
};

struct sock_bh_locked {
Expand Down
5 changes: 5 additions & 0 deletions include/uapi/asm-generic/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@
#define SO_PASSPIDFD 76
#define SO_PEERPIDFD 77

#define SO_DEVMEM_LINEAR 78
#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR
#define SO_DEVMEM_DMABUF 79
#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF

#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
Expand Down
13 changes: 13 additions & 0 deletions include/uapi/linux/uio.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,19 @@ struct iovec
__kernel_size_t iov_len; /* Must be size_t (1003.1g) */
};

struct dmabuf_cmsg {
__u64 frag_offset; /* offset into the dmabuf where the frag starts.
*/
__u32 frag_size; /* size of the frag. */
__u32 frag_token; /* token representing this frag for
* DEVMEM_DONTNEED.
*/
__u32 dmabuf_id; /* dmabuf id this frag belongs to. */
__u32 flags; /* Currently unused. Reserved for future
* uses.
*/
};

/*
* UIO_MAXIOV shall be at least 16 1003.1g (5.4.1.1)
*/
Expand Down
22 changes: 22 additions & 0 deletions net/core/devmem.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,19 @@ net_iov_binding(const struct net_iov *niov)
return net_iov_owner(niov)->binding;
}

static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
{
struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);

return owner->base_virtual +
((unsigned long)net_iov_idx(niov) << PAGE_SHIFT);
}

static inline u32 net_iov_binding_id(const struct net_iov *niov)
{
return net_iov_owner(niov)->binding->id;
}

static inline void
net_devmem_dmabuf_binding_get(struct net_devmem_dmabuf_binding *binding)
{
Expand Down Expand Up @@ -153,6 +166,15 @@ static inline void net_devmem_free_dmabuf(struct net_iov *ppiov)
{
}

static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
{
return 0;
}

static inline u32 net_iov_binding_id(const struct net_iov *niov)
{
return 0;
}
#endif

#endif /* _NET_DEVMEM_H */
Loading

0 comments on commit 8f0b3cc

Please sign in to comment.