Skip to content

Commit

Permalink
net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message
Browse files Browse the repository at this point in the history
SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX
timestamps and packets sent via socket. Unfortunately, there is no way
to reliably predict socket timestamp ID value in case of error returned
by sendmsg. For UDP sockets it's impossible because of lockless
nature of UDP transmit, several threads may send packets in parallel. In
case of RAW sockets MSG_MORE option makes things complicated. More
details are in the conversation [1].
This patch adds new control message type to give user-space
software an opportunity to control the mapping between packets and
values by providing ID with each sendmsg for UDP sockets.
The documentation is also added in this patch.

[1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/

Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
Link: https://patch.msgid.link/20241001125716.2832769-2-vadfed@meta.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Vadim Fedorenko authored and Jakub Kicinski committed Oct 4, 2024
1 parent 34ea1df commit 4aecca4
Show file tree
Hide file tree
Showing 11 changed files with 75 additions and 12 deletions.
14 changes: 14 additions & 0 deletions Documentation/networking/timestamping.rst
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,20 @@ SOF_TIMESTAMPING_OPT_ID:
among all possibly concurrently outstanding timestamp requests for
that socket.

The process can optionally override the default generated ID, by
passing a specific ID with control message SCM_TS_OPT_ID (not
supported for TCP sockets)::

struct msghdr *msg;
...
cmsg = CMSG_FIRSTHDR(msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_TS_OPT_ID;
cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
*((__u32 *) CMSG_DATA(cmsg)) = opt_id;
err = sendmsg(fd, msg, 0);


SOF_TIMESTAMPING_OPT_ID_TCP:
Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP
timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the
Expand Down
2 changes: 2 additions & 0 deletions arch/alpha/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@
#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF
#define SO_DEVMEM_DONTNEED 80

#define SCM_TS_OPT_ID 81

#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64
Expand Down
2 changes: 2 additions & 0 deletions arch/mips/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@
#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF
#define SO_DEVMEM_DONTNEED 80

#define SCM_TS_OPT_ID 81

#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64
Expand Down
2 changes: 2 additions & 0 deletions arch/parisc/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@
#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF
#define SO_DEVMEM_DONTNEED 80

#define SCM_TS_OPT_ID 0x404C

#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64
Expand Down
2 changes: 2 additions & 0 deletions arch/sparc/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@
#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF
#define SO_DEVMEM_DONTNEED 0x0059

#define SCM_TS_OPT_ID 0x005a

#if !defined(__KERNEL__)


Expand Down
4 changes: 3 additions & 1 deletion include/net/inet_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ struct inet_cork {
__s16 tos;
char priority;
__u16 gso_size;
u32 ts_opt_id;
u64 transmit_time;
u32 mark;
};
Expand Down Expand Up @@ -241,7 +242,8 @@ struct inet_sock {
struct inet_cork_full cork;
};

#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */
#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */
#define IPCORK_TS_OPT_ID 2 /* ts_opt_id field is valid, overriding sk_tskey */

enum {
INET_FLAGS_PKTINFO = 0,
Expand Down
7 changes: 7 additions & 0 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,12 @@ enum sock_flags {
};

#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
/*
* The highest bit of sk_tsflags is reserved for kernel-internal
* SOCKCM_FLAG_TS_OPT_ID. There is a check in core/sock.c to control that
* SOF_TIMESTAMPING* values do not reach this reserved area
*/
#define SOCKCM_FLAG_TS_OPT_ID BIT(31)

static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk)
{
Expand Down Expand Up @@ -1796,6 +1802,7 @@ struct sockcm_cookie {
u64 transmit_time;
u32 mark;
u32 tsflags;
u32 ts_opt_id;
};

static inline void sockcm_init(struct sockcm_cookie *sockc,
Expand Down
2 changes: 2 additions & 0 deletions include/uapi/asm-generic/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@
#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF
#define SO_DEVMEM_DONTNEED 80

#define SCM_TS_OPT_ID 81

#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
Expand Down
13 changes: 13 additions & 0 deletions net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -2899,6 +2899,8 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
{
u32 tsflags;

BUILD_BUG_ON(SOF_TIMESTAMPING_LAST == (1 << 31));

switch (cmsg->cmsg_type) {
case SO_MARK:
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
Expand Down Expand Up @@ -2927,6 +2929,17 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
return -EINVAL;
sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
break;
case SCM_TS_OPT_ID:
if (sk_is_tcp(sk))
return -EINVAL;
tsflags = READ_ONCE(sk->sk_tsflags);
if (!(tsflags & SOF_TIMESTAMPING_OPT_ID))
return -EINVAL;
if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
return -EINVAL;
sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg);
sockc->tsflags |= SOCKCM_FLAG_TS_OPT_ID;
break;
/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
case SCM_RIGHTS:
case SCM_CREDENTIALS:
Expand Down
19 changes: 14 additions & 5 deletions net/ipv4/ip_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -973,7 +973,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
struct rtable *rt = dst_rtable(cork->dst);
bool paged, hold_tskey, extra_uref = false;
bool paged, hold_tskey = false, extra_uref = false;
unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;

Expand Down Expand Up @@ -1049,10 +1049,15 @@ static int __ip_append_data(struct sock *sk,

cork->length += length;

hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
if (hold_tskey)
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
if (cork->flags & IPCORK_TS_OPT_ID) {
tskey = cork->ts_opt_id;
} else {
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hold_tskey = true;
}
}

/* So, what's going on in the loop below?
*
Expand Down Expand Up @@ -1327,6 +1332,10 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
cork->transmit_time = ipc->sockc.transmit_time;
cork->tx_flags = 0;
sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags);
if (ipc->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) {
cork->flags |= IPCORK_TS_OPT_ID;
cork->ts_opt_id = ipc->sockc.ts_opt_id;
}

return 0;
}
Expand Down
20 changes: 14 additions & 6 deletions net/ipv6/ip6_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -1402,7 +1402,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
cork->base.tx_flags = 0;
cork->base.mark = ipc6->sockc.mark;
sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);

if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) {
cork->base.flags |= IPCORK_TS_OPT_ID;
cork->base.ts_opt_id = ipc6->sockc.ts_opt_id;
}
cork->base.length = 0;
cork->base.transmit_time = ipc6->sockc.transmit_time;

Expand Down Expand Up @@ -1433,7 +1436,7 @@ static int __ip6_append_data(struct sock *sk,
bool zc = false;
u32 tskey = 0;
struct rt6_info *rt = dst_rt6_info(cork->dst);
bool paged, hold_tskey, extra_uref = false;
bool paged, hold_tskey = false, extra_uref = false;
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
Expand Down Expand Up @@ -1543,10 +1546,15 @@ static int __ip6_append_data(struct sock *sk,
flags &= ~MSG_SPLICE_PAGES;
}

hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
if (hold_tskey)
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
if (cork->flags & IPCORK_TS_OPT_ID) {
tskey = cork->ts_opt_id;
} else {
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hold_tskey = true;
}
}

/*
* Let's try using as much space as possible.
Expand Down

0 comments on commit 4aecca4

Please sign in to comment.