Skip to content

Commit

Permalink
Merge branch 'tstamp-next'
Browse files Browse the repository at this point in the history
Willem de Bruijn says:

====================
timestamping updates

The main goal for this patchset is to allow correlating timestamps
with the egress interface. Also introduce a warning, as discussed
previously, and update the tests to verify the new feature.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Dec 9, 2014
2 parents 8d0c469 + cbd3aad commit aae68bc
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 25 deletions.
33 changes: 25 additions & 8 deletions Documentation/networking/timestamping.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,27 +122,44 @@ SOF_TIMESTAMPING_RAW_HARDWARE:

1.3.3 Timestamp Options

The interface supports one option
The interface supports the options

SOF_TIMESTAMPING_OPT_ID:

Generate a unique identifier along with each packet. A process can
have multiple concurrent timestamping requests outstanding. Packets
can be reordered in the transmit path, for instance in the packet
scheduler. In that case timestamps will be queued onto the error
queue out of order from the original send() calls. This option
embeds a counter that is incremented at send() time, to order
timestamps within a flow.
queue out of order from the original send() calls. It is not always
possible to uniquely match timestamps to the original send() calls
based on timestamp order or payload inspection alone, then.

This option associates each packet at send() with a unique
identifier and returns that along with the timestamp. The identifier
is derived from a per-socket u32 counter (that wraps). For datagram
sockets, the counter increments with each sent packet. For stream
sockets, it increments with every byte.

The counter starts at zero. It is initialized the first time that
the socket option is enabled. It is reset each time the option is
enabled after having been disabled. Resetting the counter does not
change the identifiers of existing packets in the system.

This option is implemented only for transmit timestamps. There, the
timestamp is always looped along with a struct sock_extended_err.
The option modifies field ee_data to pass an id that is unique
among all possibly concurrently outstanding timestamp requests for
that socket. In practice, it is a monotonically increasing u32
(that wraps).
that socket.


SOF_TIMESTAMPING_OPT_CMSG:

In datagram sockets, the counter increments on each send call. In
stream sockets, it increments with every byte.
Support recv() cmsg for all timestamped packets. Control messages
are already supported unconditionally on all packets with receive
timestamps and on IPv6 packets with transmit timestamp. This option
extends them to IPv4 packets with transmit timestamp. One use case
is to correlate packets with their egress device, by enabling socket
option IP_PKTINFO simultaneously.


1.4 Bytestream Timestamps
Expand Down
90 changes: 78 additions & 12 deletions Documentation/networking/timestamping/txtimestamp.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include <netpacket/packet.h>
#include <poll.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
Expand All @@ -58,13 +59,23 @@
#include <time.h>
#include <unistd.h>

/* ugly hack to work around netinet/in.h and linux/ipv6.h conflicts */
#ifndef in6_pktinfo
struct in6_pktinfo {
struct in6_addr ipi6_addr;
int ipi6_ifindex;
};
#endif

/* command line parameters */
static int cfg_proto = SOCK_STREAM;
static int cfg_ipproto = IPPROTO_TCP;
static int cfg_num_pkts = 4;
static int do_ipv4 = 1;
static int do_ipv6 = 1;
static int cfg_payload_len = 10;
static bool cfg_show_payload;
static bool cfg_do_pktinfo;
static uint16_t dest_port = 9000;

static struct sockaddr_in daddr;
Expand Down Expand Up @@ -131,6 +142,30 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype,
__print_timestamp(tsname, &tss->ts[0], tskey, payload_len);
}

/* TODO: convert to check_and_print payload once API is stable */
static void print_payload(char *data, int len)
{
int i;

if (len > 70)
len = 70;

fprintf(stderr, "payload: ");
for (i = 0; i < len; i++)
fprintf(stderr, "%02hhx ", data[i]);
fprintf(stderr, "\n");
}

static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr)
{
char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN];

fprintf(stderr, " pktinfo: ifindex=%u src=%s dst=%s\n",
ifindex,
saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown",
daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown");
}

static void __poll(int fd)
{
struct pollfd pollfd;
Expand All @@ -156,10 +191,9 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
cm->cmsg_type == SCM_TIMESTAMPING) {
tss = (void *) CMSG_DATA(cm);
} else if ((cm->cmsg_level == SOL_IP &&
cm->cmsg_type == IP_RECVERR) ||
(cm->cmsg_level == SOL_IPV6 &&
cm->cmsg_type == IPV6_RECVERR)) {

cm->cmsg_type == IP_RECVERR) ||
(cm->cmsg_level == SOL_IPV6 &&
cm->cmsg_type == IPV6_RECVERR)) {
serr = (void *) CMSG_DATA(cm);
if (serr->ee_errno != ENOMSG ||
serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) {
Expand All @@ -168,6 +202,16 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
serr->ee_origin);
serr = NULL;
}
} else if (cm->cmsg_level == SOL_IP &&
cm->cmsg_type == IP_PKTINFO) {
struct in_pktinfo *info = (void *) CMSG_DATA(cm);
print_pktinfo(AF_INET, info->ipi_ifindex,
&info->ipi_spec_dst, &info->ipi_addr);
} else if (cm->cmsg_level == SOL_IPV6 &&
cm->cmsg_type == IPV6_PKTINFO) {
struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm);
print_pktinfo(AF_INET6, info6->ipi6_ifindex,
NULL, &info6->ipi6_addr);
} else
fprintf(stderr, "unknown cmsg %d,%d\n",
cm->cmsg_level, cm->cmsg_type);
Expand Down Expand Up @@ -206,7 +250,11 @@ static int recv_errmsg(int fd)
if (ret == -1 && errno != EAGAIN)
error(1, errno, "recvmsg");

__recv_errmsg_cmsg(&msg, ret);
if (ret > 0) {
__recv_errmsg_cmsg(&msg, ret);
if (cfg_show_payload)
print_payload(data, cfg_payload_len);
}

free(data);
return ret == -1;
Expand All @@ -215,9 +263,9 @@ static int recv_errmsg(int fd)
static void do_test(int family, unsigned int opt)
{
char *buf;
int fd, i, val, total_len;
int fd, i, val = 1, total_len;

if (family == IPPROTO_IPV6 && cfg_proto != SOCK_STREAM) {
if (family == AF_INET6 && cfg_proto != SOCK_STREAM) {
/* due to lack of checksum generation code */
fprintf(stderr, "test: skipping datagram over IPv6\n");
return;
Expand All @@ -239,7 +287,6 @@ static void do_test(int family, unsigned int opt)
error(1, errno, "socket");

if (cfg_proto == SOCK_STREAM) {
val = 1;
if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
(char*) &val, sizeof(val)))
error(1, 0, "setsockopt no nagle");
Expand All @@ -253,7 +300,20 @@ static void do_test(int family, unsigned int opt)
}
}

if (cfg_do_pktinfo) {
if (family == AF_INET6) {
if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO,
&val, sizeof(val)))
error(1, errno, "setsockopt pktinfo ipv6");
} else {
if (setsockopt(fd, SOL_IP, IP_PKTINFO,
&val, sizeof(val)))
error(1, errno, "setsockopt pktinfo ipv4");
}
}

opt |= SOF_TIMESTAMPING_SOFTWARE |
SOF_TIMESTAMPING_OPT_CMSG |
SOF_TIMESTAMPING_OPT_ID;
if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
(char *) &opt, sizeof(opt)))
Expand All @@ -262,8 +322,6 @@ static void do_test(int family, unsigned int opt)
for (i = 0; i < cfg_num_pkts; i++) {
memset(&ts_prev, 0, sizeof(ts_prev));
memset(buf, 'a' + i, total_len);
buf[total_len - 2] = '\n';
buf[total_len - 1] = '\0';

if (cfg_proto == SOCK_RAW) {
struct udphdr *udph;
Expand Down Expand Up @@ -324,11 +382,13 @@ static void __attribute__((noreturn)) usage(const char *filepath)
" -4: only IPv4\n"
" -6: only IPv6\n"
" -h: show this message\n"
" -I: request PKTINFO\n"
" -l N: send N bytes at a time\n"
" -r: use raw\n"
" -R: use raw (IP_HDRINCL)\n"
" -p N: connect to port N\n"
" -u: use udp\n",
" -u: use udp\n"
" -x: show payload (up to 70 bytes)\n",
filepath);
exit(1);
}
Expand All @@ -338,14 +398,17 @@ static void parse_opt(int argc, char **argv)
int proto_count = 0;
char c;

while ((c = getopt(argc, argv, "46hl:p:rRu")) != -1) {
while ((c = getopt(argc, argv, "46hIl:p:rRux")) != -1) {
switch (c) {
case '4':
do_ipv6 = 0;
break;
case '6':
do_ipv4 = 0;
break;
case 'I':
cfg_do_pktinfo = true;
break;
case 'r':
proto_count++;
cfg_proto = SOCK_RAW;
Expand All @@ -367,6 +430,9 @@ static void parse_opt(int argc, char **argv)
case 'p':
dest_port = strtoul(optarg, NULL, 10);
break;
case 'x':
cfg_show_payload = true;
break;
case 'h':
default:
usage(argv[0]);
Expand Down
3 changes: 2 additions & 1 deletion include/uapi/linux/net_tstamp.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ enum {
SOF_TIMESTAMPING_OPT_ID = (1<<7),
SOF_TIMESTAMPING_TX_SCHED = (1<<8),
SOF_TIMESTAMPING_TX_ACK = (1<<9),
SOF_TIMESTAMPING_OPT_CMSG = (1<<10),

SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_ACK,
SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_CMSG,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
SOF_TIMESTAMPING_LAST
};
Expand Down
24 changes: 22 additions & 2 deletions net/ipv4/ip_sockglue.c
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,22 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
kfree_skb(skb);
}

static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk,
const struct sk_buff *skb,
int ee_origin)
{
struct in_pktinfo *info = PKTINFO_SKB_CB(skb);

if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) ||
(!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
(!skb->dev))
return false;

info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
info->ipi_ifindex = skb->dev->ifindex;
return true;
}

/*
* Handle MSG_ERRQUEUE
*/
Expand All @@ -414,6 +430,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
int err;
int copied;

WARN_ON_ONCE(sk->sk_family == AF_INET6);

err = -EAGAIN;
skb = sock_dequeue_err_skb(sk);
if (skb == NULL)
Expand Down Expand Up @@ -444,7 +462,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
sin = &errhdr.offender;
sin->sin_family = AF_UNSPEC;
if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) {

if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) {
struct inet_sock *inet = inet_sk(sk);

sin->sin_family = AF_INET;
Expand Down Expand Up @@ -1049,7 +1069,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}

/**
* ipv4_pktinfo_prepare - transfert some info from rtable to skb
* ipv4_pktinfo_prepare - transfer some info from rtable to skb
* @sk: socket
* @skb: buffer
*
Expand Down
21 changes: 19 additions & 2 deletions net/ipv6/datagram.c
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
kfree_skb(skb);
}

static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb)
{
int ifindex = skb->dev ? skb->dev->ifindex : -1;

if (skb->protocol == htons(ETH_P_IPV6))
IP6CB(skb)->iif = ifindex;
else
PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex;
}

/*
* Handle MSG_ERRQUEUE
*/
Expand Down Expand Up @@ -388,8 +398,12 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
sin->sin6_port = 0;
if (np->rxopt.all)
if (np->rxopt.all) {
if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP &&
serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6)
ip6_datagram_prepare_pktinfo_errqueue(skb);
ip6_datagram_recv_common_ctl(sk, msg, skb);
}
if (skb->protocol == htons(ETH_P_IPV6)) {
sin->sin6_addr = ipv6_hdr(skb)->saddr;
if (np->rxopt.all)
Expand Down Expand Up @@ -491,7 +505,10 @@ void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg,
ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
&src_info.ipi6_addr);
}
put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);

if (src_info.ipi6_ifindex >= 0)
put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO,
sizeof(src_info), &src_info);
}
}

Expand Down

0 comments on commit aae68bc

Please sign in to comment.