Skip to content

Commit

Permalink
Merge branch 'net-timestamp'
Browse files Browse the repository at this point in the history
Willem de Bruijn says:

====================
net-timestamp: blinding

Changes
  (v2 -> v3)
  - rebase only: v2 did not make it to patchwork / netdev
  (v1 -> v2)
  - fix capability check in patch 2
      this could be moved into net/core/sock.c as sk_capable_nouser()
  (rfc -> v1)
  - dropped patch 4: timestamp batching
      due to complexity, as discussed
  - dropped patch 5: default mode
      because it does not really cover all use cases, as discussed
  - added documentation
  - minor fix, see patch 2

Two issues were raised during recent timestamping discussions:
1. looping full packets on the error queue exposes packet headers
2. TCP timestamping with retransmissions generates many timestamps

This RFC patchset is an attempt at addressing both without breaking
legacy behavior.

Patch 1 reintroduces the "no payload" timestamp option, which loops
timestamps onto an empty skb. This reduces the pressure on SO_RCVBUF
from looping many timestamps. It does not reduce the number of recv()
calls needed to process them. The timestamp cookie mechanism developed
in http://patchwork.ozlabs.org/patch/427213/ did, but this is
considerably simpler.

Patch 2 then gives administrators the power to block all timestamp
requests that contain data by unprivileged users. I proposed this
earlier as a backward compatible workaround in the discussion of

  net-timestamp: pull headers for SOCK_STREAM
  http://patchwork.ozlabs.org/patch/414810/

Patch 3 only updates the txtimestamp example to test this option.
Verified that with option '-n', length is zero in all cases and
option '-I' (PKTINFO) stops working.
====================

Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Feb 3, 2015
2 parents 9766e97 + 2368592 commit 6942241
Show file tree
Hide file tree
Showing 11 changed files with 113 additions and 17 deletions.
21 changes: 21 additions & 0 deletions Documentation/networking/timestamping.txt
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,27 @@ SOF_TIMESTAMPING_OPT_CMSG:
option IP_PKTINFO simultaneously.


SOF_TIMESTAMPING_OPT_TSONLY:

Applies to transmit timestamps only. Makes the kernel return the
timestamp as a cmsg alongside an empty packet, as opposed to
alongside the original packet. This reduces the amount of memory
charged to the socket's receive budget (SO_RCVBUF) and delivers
the timestamp even if sysctl net.core.tstamp_allow_data is 0.
This option disables SOF_TIMESTAMPING_OPT_CMSG.


New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
regardless of the setting of sysctl net.core.tstamp_allow_data.

An exception is when a process needs additional cmsg data, for
instance SOL_IP/IP_PKTINFO to detect the egress network interface.
Then pass option SOF_TIMESTAMPING_OPT_CMSG. This option depends on
having access to the contents of the original packet, so cannot be
combined with SOF_TIMESTAMPING_OPT_TSONLY.


1.4 Bytestream Timestamps

The SO_TIMESTAMPING interface supports timestamping of bytes in a
Expand Down
28 changes: 24 additions & 4 deletions Documentation/networking/timestamping/txtimestamp.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ static int do_ipv6 = 1;
static int cfg_payload_len = 10;
static bool cfg_show_payload;
static bool cfg_do_pktinfo;
static bool cfg_loop_nodata;
static uint16_t dest_port = 9000;

static struct sockaddr_in daddr;
Expand Down Expand Up @@ -141,6 +142,9 @@ static void print_payload(char *data, int len)
{
int i;

if (!len)
return;

if (len > 70)
len = 70;

Expand Down Expand Up @@ -177,6 +181,7 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
struct sock_extended_err *serr = NULL;
struct scm_timestamping *tss = NULL;
struct cmsghdr *cm;
int batch = 0;

for (cm = CMSG_FIRSTHDR(msg);
cm && cm->cmsg_len;
Expand Down Expand Up @@ -209,10 +214,18 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
} else
fprintf(stderr, "unknown cmsg %d,%d\n",
cm->cmsg_level, cm->cmsg_type);

if (serr && tss) {
print_timestamp(tss, serr->ee_info, serr->ee_data,
payload_len);
serr = NULL;
tss = NULL;
batch++;
}
}

if (serr && tss)
print_timestamp(tss, serr->ee_info, serr->ee_data, payload_len);
if (batch > 1)
fprintf(stderr, "batched %d timestamps\n", batch);
}

static int recv_errmsg(int fd)
Expand Down Expand Up @@ -244,7 +257,7 @@ static int recv_errmsg(int fd)
if (ret == -1 && errno != EAGAIN)
error(1, errno, "recvmsg");

if (ret > 0) {
if (ret >= 0) {
__recv_errmsg_cmsg(&msg, ret);
if (cfg_show_payload)
print_payload(data, cfg_payload_len);
Expand Down Expand Up @@ -309,6 +322,9 @@ static void do_test(int family, unsigned int opt)
opt |= SOF_TIMESTAMPING_SOFTWARE |
SOF_TIMESTAMPING_OPT_CMSG |
SOF_TIMESTAMPING_OPT_ID;
if (cfg_loop_nodata)
opt |= SOF_TIMESTAMPING_OPT_TSONLY;

if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
(char *) &opt, sizeof(opt)))
error(1, 0, "setsockopt timestamping");
Expand Down Expand Up @@ -378,6 +394,7 @@ static void __attribute__((noreturn)) usage(const char *filepath)
" -h: show this message\n"
" -I: request PKTINFO\n"
" -l N: send N bytes at a time\n"
" -n: set no-payload option\n"
" -r: use raw\n"
" -R: use raw (IP_HDRINCL)\n"
" -p N: connect to port N\n"
Expand All @@ -392,7 +409,7 @@ static void parse_opt(int argc, char **argv)
int proto_count = 0;
char c;

while ((c = getopt(argc, argv, "46hIl:p:rRux")) != -1) {
while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) {
switch (c) {
case '4':
do_ipv6 = 0;
Expand All @@ -403,6 +420,9 @@ static void parse_opt(int argc, char **argv)
case 'I':
cfg_do_pktinfo = true;
break;
case 'n':
cfg_loop_nodata = true;
break;
case 'r':
proto_count++;
cfg_proto = SOCK_RAW;
Expand Down
8 changes: 8 additions & 0 deletions Documentation/sysctl/net.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,14 @@ rmem_max

The maximum receive socket buffer size in bytes.

tstamp_allow_data
-----------------
Allow processes to receive tx timestamps looped together with the original
packet contents. If disabled, transmit timestamp requests from unprivileged
processes are dropped unless socket option SOF_TIMESTAMPING_OPT_TSONLY is set.
Default: 1 (on)


wmem_default
------------

Expand Down
1 change: 1 addition & 0 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -2239,6 +2239,7 @@ bool sk_net_capable(const struct sock *sk, int cap);
extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;

extern int sysctl_tstamp_allow_data;
extern int sysctl_optmem_max;

extern __u32 sysctl_wmem_default;
Expand Down
3 changes: 2 additions & 1 deletion include/uapi/linux/net_tstamp.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ enum {
SOF_TIMESTAMPING_TX_SCHED = (1<<8),
SOF_TIMESTAMPING_TX_ACK = (1<<9),
SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),

SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_CMSG,
SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TSONLY,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
SOF_TIMESTAMPING_LAST
};
Expand Down
40 changes: 34 additions & 6 deletions net/core/skbuff.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@
#include <asm/uaccess.h>
#include <trace/events/skb.h>
#include <linux/highmem.h>
#include <linux/capability.h>
#include <linux/user_namespace.h>

struct kmem_cache *skbuff_head_cache __read_mostly;
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
Expand Down Expand Up @@ -3690,11 +3692,28 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
kfree_skb(skb);
}

static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
{
bool ret;

if (likely(sysctl_tstamp_allow_data || tsonly))
return true;

read_lock_bh(&sk->sk_callback_lock);
ret = sk->sk_socket && sk->sk_socket->file &&
file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW);
read_unlock_bh(&sk->sk_callback_lock);
return ret;
}

void skb_complete_tx_timestamp(struct sk_buff *skb,
struct skb_shared_hwtstamps *hwtstamps)
{
struct sock *sk = skb->sk;

if (!skb_may_tx_timestamp(sk, false))
return;

/* take a reference to prevent skb_orphan() from freeing the socket */
sock_hold(sk);

Expand All @@ -3710,19 +3729,28 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
struct sock *sk, int tstype)
{
struct sk_buff *skb;
bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;

if (!sk)
if (!sk || !skb_may_tx_timestamp(sk, tsonly))
return;

if (hwtstamps)
*skb_hwtstamps(orig_skb) = *hwtstamps;
if (tsonly)
skb = alloc_skb(0, GFP_ATOMIC);
else
orig_skb->tstamp = ktime_get_real();

skb = skb_clone(orig_skb, GFP_ATOMIC);
skb = skb_clone(orig_skb, GFP_ATOMIC);
if (!skb)
return;

if (tsonly) {
skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags;
skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
}

if (hwtstamps)
*skb_hwtstamps(skb) = *hwtstamps;
else
skb->tstamp = ktime_get_real();

__skb_complete_tx_timestamp(skb, sk, tstype);
}
EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
Expand Down
3 changes: 3 additions & 0 deletions net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,8 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
EXPORT_SYMBOL(sysctl_optmem_max);

int sysctl_tstamp_allow_data __read_mostly = 1;

struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
EXPORT_SYMBOL_GPL(memalloc_socks);

Expand Down Expand Up @@ -840,6 +842,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
ret = -EINVAL;
break;
}

if (val & SOF_TIMESTAMPING_OPT_ID &&
!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
if (sk->sk_protocol == IPPROTO_TCP) {
Expand Down
9 changes: 9 additions & 0 deletions net/core/sysctl_net_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,15 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tstamp_allow_data",
.data = &sysctl_tstamp_allow_data,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one
},
#ifdef CONFIG_RPS
{
.procname = "rps_sock_flow_entries",
Expand Down
7 changes: 4 additions & 3 deletions net/ipv4/ip_sockglue.c
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)

serr = SKB_EXT_ERR(skb);

if (sin) {
if (sin && skb->len) {
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
serr->addr_offset);
Expand All @@ -496,8 +496,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
sin = &errhdr.offender;
memset(sin, 0, sizeof(*sin));

if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) {
if (skb->len &&
(serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) {
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
if (inet_sk(sk)->cmsg_flags)
Expand Down
5 changes: 2 additions & 3 deletions net/ipv6/datagram.c
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)

serr = SKB_EXT_ERR(skb);

if (sin) {
if (sin && skb->len) {
const unsigned char *nh = skb_network_header(skb);
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
Expand All @@ -394,8 +394,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
sin = &errhdr.offender;
memset(sin, 0, sizeof(*sin));

if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) {
sin->sin6_family = AF_INET6;
if (np->rxopt.all) {
if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP &&
Expand Down
5 changes: 5 additions & 0 deletions net/rxrpc/ar-error.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ void rxrpc_UDP_error_report(struct sock *sk)
_leave("UDP socket errqueue empty");
return;
}
if (!skb->len) {
_leave("UDP empty message");
kfree_skb(skb);
return;
}

rxrpc_new_skb(skb);

Expand Down

0 comments on commit 6942241

Please sign in to comment.