Skip to content

Commit

Permalink
Merge branch 'ip-recvfragsize-cmsg'
Browse files Browse the repository at this point in the history
Willem de Bruijn says:

====================
ip: add RECVFRAGSIZE cmsg

On IP datagrams and raw sockets, when packets arrive fragmented,
expose the largest received fragment size through a new cmsg.

Protocols implemented on top of these sockets may use this, for
instance, to inform peers to lower MSS on platforms that silently
allow send calls to exceed PMTU and cause fragmentation.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Nov 3, 2016
2 parents 1b99e5e + dbd1759 commit a799126
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 3 deletions.
5 changes: 3 additions & 2 deletions include/linux/ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,9 @@ struct ipv6_pinfo {
rxflow:1,
rxtclass:1,
rxpmtu:1,
rxorigdstaddr:1;
/* 2 bits hole */
rxorigdstaddr:1,
recvfragsize:1;
/* 1 bits hole */
} bits;
__u16 all;
} rxopt;
Expand Down
1 change: 1 addition & 0 deletions include/net/inet_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ struct inet_sock {
#define IP_CMSG_PASSSEC BIT(5)
#define IP_CMSG_ORIGDSTADDR BIT(6)
#define IP_CMSG_CHECKSUM BIT(7)
#define IP_CMSG_RECVFRAGSIZE BIT(8)

/**
* sk_to_full_sk - Access to a full socket
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/in.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ struct in_addr {
#define IP_NODEFRAG 22
#define IP_CHECKSUM 23
#define IP_BIND_ADDRESS_NO_PORT 24
#define IP_RECVFRAGSIZE 25

/* IP_MTU_DISCOVER values */
#define IP_PMTUDISC_DONT 0 /* Never send DF frames */
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/in6.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ struct in6_flowlabel_req {
#define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR
#define IPV6_TRANSPARENT 75
#define IPV6_UNICAST_IF 76
#define IPV6_RECVFRAGSIZE 77

/*
* Multicast Routing:
Expand Down
26 changes: 26 additions & 0 deletions net/ipv4/ip_sockglue.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,17 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
}

static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb)
{
int val;

if (IPCB(skb)->frag_max_size == 0)
return;

val = IPCB(skb)->frag_max_size;
put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val);
}

static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
int tlen, int offset)
{
Expand Down Expand Up @@ -218,6 +229,9 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,

if (flags & IP_CMSG_CHECKSUM)
ip_cmsg_recv_checksum(msg, skb, tlen, offset);

if (flags & IP_CMSG_RECVFRAGSIZE)
ip_cmsg_recv_fragsize(msg, skb);
}
EXPORT_SYMBOL(ip_cmsg_recv_offset);

Expand Down Expand Up @@ -614,6 +628,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
case IP_MULTICAST_LOOP:
case IP_RECVORIGDSTADDR:
case IP_CHECKSUM:
case IP_RECVFRAGSIZE:
if (optlen >= sizeof(int)) {
if (get_user(val, (int __user *) optval))
return -EFAULT;
Expand Down Expand Up @@ -726,6 +741,14 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
}
break;
case IP_RECVFRAGSIZE:
if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
goto e_inval;
if (val)
inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE;
else
inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
break;
case IP_TOS: /* This sets both TOS and Precedence */
if (sk->sk_type == SOCK_STREAM) {
val &= ~INET_ECN_MASK;
Expand Down Expand Up @@ -1357,6 +1380,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_CHECKSUM:
val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
break;
case IP_RECVFRAGSIZE:
val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0;
break;
case IP_TOS:
val = inet->tos;
break;
Expand Down
5 changes: 5 additions & 0 deletions net/ipv6/datagram.c
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,11 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
}
}
if (np->rxopt.bits.recvfragsize && opt->frag_max_size) {
int val = opt->frag_max_size;

put_cmsg(msg, SOL_IPV6, IPV6_RECVFRAGSIZE, sizeof(val), &val);
}
}

void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
Expand Down
8 changes: 8 additions & 0 deletions net/ipv6/ipv6_sockglue.c
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
np->autoflowlabel = valbool;
retv = 0;
break;
case IPV6_RECVFRAGSIZE:
np->rxopt.bits.recvfragsize = valbool;
retv = 0;
break;
}

release_sock(sk);
Expand Down Expand Up @@ -1310,6 +1314,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->autoflowlabel;
break;

case IPV6_RECVFRAGSIZE:
val = np->rxopt.bits.recvfragsize;
break;

default:
return -ENOPROTOOPT;
}
Expand Down
7 changes: 6 additions & 1 deletion net/ipv6/reassembly.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
{
struct sk_buff *prev, *next;
struct net_device *dev;
int offset, end;
int offset, end, fragsize;
struct net *net = dev_net(skb_dst(skb)->dev);
u8 ecn;

Expand Down Expand Up @@ -336,6 +336,10 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->ecn |= ecn;
add_frag_mem_limit(fq->q.net, skb->truesize);

fragsize = -skb_network_offset(skb) + skb->len;
if (fragsize > fq->q.max_size)
fq->q.max_size = fragsize;

/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
*/
Expand Down Expand Up @@ -495,6 +499,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
IP6CB(head)->nhoff = nhoff;
IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
IP6CB(head)->frag_max_size = fq->q.max_size;

/* Yes, and fold redundant checksum back. 8) */
skb_postpush_rcsum(head, skb_network_header(head),
Expand Down

0 comments on commit a799126

Please sign in to comment.