Skip to content

Commit

Permalink
ipv4: add IP_RECVFRAGSIZE cmsg
Browse files Browse the repository at this point in the history
The IP stack records the largest fragment of a reassembled packet
in IPCB(skb)->frag_max_size. When reading a datagram or raw packet
that arrived fragmented, expose the value to allow applications to
estimate receive path MTU.

Tested:
  Sent data over a veth pair of which the source has a small mtu.
  Sent data using netcat, received using a dedicated process.

  Verified that the cmsg IP_RECVFRAGSIZE is returned only when
  data arrives fragmented, and in that cases matches the veth mtu.

    ip link add veth0 type veth peer name veth1

    ip netns add from
    ip netns add to

    ip link set dev veth1 netns to
    ip netns exec to ip addr add dev veth1 192.168.10.1/24
    ip netns exec to ip link set dev veth1 up

    ip link set dev veth0 netns from
    ip netns exec from ip addr add dev veth0 192.168.10.2/24
    ip netns exec from ip link set dev veth0 up
    ip netns exec from ip link set dev veth0 mtu 1300
    ip netns exec from ethtool -K veth0 ufo off

    dd if=/dev/zero bs=1 count=1400 2>/dev/null > payload

    ip netns exec to ./recv_cmsg_recvfragsize -4 -u -p 6000 &
    ip netns exec from nc -q 1 -u 192.168.10.1 6000 < payload

  using github.com/wdebruij/kerneltools/blob/master/tests/recvfragsize.c

Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Willem de Bruijn authored and David S. Miller committed Nov 3, 2016
1 parent 1b99e5e commit 70ecc24
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 0 deletions.
1 change: 1 addition & 0 deletions include/net/inet_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ struct inet_sock {
#define IP_CMSG_PASSSEC BIT(5)
#define IP_CMSG_ORIGDSTADDR BIT(6)
#define IP_CMSG_CHECKSUM BIT(7)
#define IP_CMSG_RECVFRAGSIZE BIT(8)

/**
* sk_to_full_sk - Access to a full socket
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/in.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ struct in_addr {
#define IP_NODEFRAG 22
#define IP_CHECKSUM 23
#define IP_BIND_ADDRESS_NO_PORT 24
#define IP_RECVFRAGSIZE 25

/* IP_MTU_DISCOVER values */
#define IP_PMTUDISC_DONT 0 /* Never send DF frames */
Expand Down
26 changes: 26 additions & 0 deletions net/ipv4/ip_sockglue.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,17 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
}

static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb)
{
int val;

if (IPCB(skb)->frag_max_size == 0)
return;

val = IPCB(skb)->frag_max_size;
put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val);
}

static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
int tlen, int offset)
{
Expand Down Expand Up @@ -218,6 +229,9 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,

if (flags & IP_CMSG_CHECKSUM)
ip_cmsg_recv_checksum(msg, skb, tlen, offset);

if (flags & IP_CMSG_RECVFRAGSIZE)
ip_cmsg_recv_fragsize(msg, skb);
}
EXPORT_SYMBOL(ip_cmsg_recv_offset);

Expand Down Expand Up @@ -614,6 +628,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
case IP_MULTICAST_LOOP:
case IP_RECVORIGDSTADDR:
case IP_CHECKSUM:
case IP_RECVFRAGSIZE:
if (optlen >= sizeof(int)) {
if (get_user(val, (int __user *) optval))
return -EFAULT;
Expand Down Expand Up @@ -726,6 +741,14 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
}
break;
case IP_RECVFRAGSIZE:
if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
goto e_inval;
if (val)
inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE;
else
inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
break;
case IP_TOS: /* This sets both TOS and Precedence */
if (sk->sk_type == SOCK_STREAM) {
val &= ~INET_ECN_MASK;
Expand Down Expand Up @@ -1357,6 +1380,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_CHECKSUM:
val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
break;
case IP_RECVFRAGSIZE:
val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0;
break;
case IP_TOS:
val = inet->tos;
break;
Expand Down

0 comments on commit 70ecc24

Please sign in to comment.