From b1cadc1a0949c82ff7fcb15603e3caf2d32ff9f6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 18 Jun 2016 21:52:02 -0700 Subject: [PATCH 1/5] ipv6: icmp: add a force_saddr param to icmp6_send() SIT or GRE tunnels might want to translate an IPV4 address into a v4mapped one when translating ICMP to ICMPv6. This patch adds the parameter to icmp6_send() but does not change icmpv6_send() signature. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 3 ++- net/ipv6/icmp.c | 7 +++++-- net/ipv6/ip6_icmp.c | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 630f45335c73f..432611a297fbc 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -14,7 +14,8 @@ static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) #if IS_ENABLED(CONFIG_IPV6) extern void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info); -typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info); +typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, + const struct in6_addr *force_saddr); extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index e32a72fb9982d..6c57e6e90301c 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -388,7 +388,8 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, /* * Send an ICMP message in response to a packet in error */ -static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) +static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, + const struct in6_addr *force_saddr) { struct net *net = dev_net(skb->dev); struct inet6_dev *idev = NULL; @@ -475,6 +476,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.daddr = hdr->saddr; + if (force_saddr) + saddr = force_saddr; if (saddr) fl6.saddr = *saddr; fl6.flowi6_mark = mark; @@ -551,7 +554,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) */ void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) { - icmp6_send(skb, ICMPV6_PARAMPROB, code, pos); + icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL); kfree_skb(skb); } diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c index 14dacc544c3ef..713676f14a0ee 100644 --- a/net/ipv6/ip6_icmp.c +++ b/net/ipv6/ip6_icmp.c @@ -39,7 +39,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (!send) goto out; - send(skb, type, code, info); + send(skb, type, code, info, NULL); out: rcu_read_unlock(); } From 5fbba8ac9358f1e796c8aedcccc3487364643723 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 18 Jun 2016 21:52:03 -0700 Subject: [PATCH 2/5] ip6: move ipip6_err_gen_icmpv6_unreach() We want to use this helper from GRE as well, so this is the time to move it in net/ipv6/icmp.c Also add a @nhs parameter, since SIT and GRE have different values for the header(s) to skip. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 1 + net/ipv6/icmp.c | 39 +++++++++++++++++++++++++++++++++++++++ net/ipv6/sit.c | 38 +------------------------------------- 3 files changed, 41 insertions(+), 37 deletions(-) diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 432611a297fbc..9796481edbdb9 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -18,6 +18,7 @@ typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr); extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); +int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs); #else diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6c57e6e90301c..07bc63c23712a 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -558,6 +558,45 @@ void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) kfree_skb(skb); } +/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH + * if sufficient data bytes are available + * @nhs is the size of the tunnel header(s) : + * Either an IPv4 header for SIT encap + * an IPv4 header + GRE header for GRE encap + */ +int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs) +{ + struct rt6_info *rt; + struct sk_buff *skb2; + + if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8)) + return 1; + + skb2 = skb_clone(skb, GFP_ATOMIC); + + if (!skb2) + return 1; + + skb_dst_drop(skb2); + skb_pull(skb2, nhs); + skb_reset_network_header(skb2); + + rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); + + if (rt && rt->dst.dev) + skb2->dev = rt->dst.dev; + + icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); + + if (rt) + ip6_rt_put(rt); + + kfree_skb(skb2); + + return 0; +} +EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach); + static void icmpv6_echo_reply(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d9f2bd6ef72db..78e84d6793eef 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -479,42 +479,6 @@ static void ipip6_tunnel_uninit(struct net_device *dev) dev_put(dev); } -/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH - * if sufficient data bytes are available - */ -static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb) -{ - int ihl = ((const struct iphdr *)skb->data)->ihl*4; - struct rt6_info *rt; - struct sk_buff *skb2; - - if (!pskb_may_pull(skb, ihl + sizeof(struct ipv6hdr) + 8)) - return 1; - - skb2 = skb_clone(skb, GFP_ATOMIC); - - if (!skb2) - return 1; - - skb_dst_drop(skb2); - skb_pull(skb2, ihl); - skb_reset_network_header(skb2); - - rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); - - if (rt && rt->dst.dev) - skb2->dev = rt->dst.dev; - - icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); - - if (rt) - ip6_rt_put(rt); - - kfree_skb(skb2); - - return 0; -} - static int ipip6_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (const struct iphdr *)skb->data; @@ -575,7 +539,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) goto out; err = 0; - if (!ipip6_err_gen_icmpv6_unreach(skb)) + if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4)) goto out; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) From 2d7a3b276be2d032a6c1a48ced87a474327ee3d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 18 Jun 2016 21:52:04 -0700 Subject: [PATCH 3/5] ipv6: translate ICMP_TIME_EXCEEDED to ICMPV6_TIME_EXCEED For better traceroute/mtr support for SIT and GRE tunnels, we translate IPV4 ICMP ICMP_TIME_EXCEEDED to ICMPV6_TIME_EXCEED We also have to translate the IPv4 source IP address of ICMP message to IPv6 v4mapped. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 2 +- net/ipv6/icmp.c | 12 +++++++++--- net/ipv6/sit.c | 6 +++--- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 9796481edbdb9..97ae98071a031 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -18,7 +18,7 @@ typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr); extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); -int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs); +int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type); #else diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 07bc63c23712a..867aebc342488 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -564,8 +564,9 @@ void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) * Either an IPv4 header for SIT encap * an IPv4 header + GRE header for GRE encap */ -int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs) +int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type) { + struct in6_addr temp_saddr; struct rt6_info *rt; struct sk_buff *skb2; @@ -586,8 +587,13 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs) if (rt && rt->dst.dev) skb2->dev = rt->dst.dev; - icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); - + ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr); + if (type == ICMP_TIME_EXCEEDED) + icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, + 0, &temp_saddr); + else + icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, + 0, &temp_saddr); if (rt) ip6_rt_put(rt); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 78e84d6793eef..d7a36114eb50e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -535,11 +535,11 @@ static int ipip6_err(struct sk_buff *skb, u32 info) goto out; } - if (t->parms.iph.daddr == 0) + err = 0; + if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type)) goto out; - err = 0; - if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4)) + if (t->parms.iph.daddr == 0) goto out; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) From 9b8c6d7bf2e08a7d3eb6660a2bfaf29b8b49c329 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 18 Jun 2016 21:52:05 -0700 Subject: [PATCH 4/5] gre: better support for ICMP messages for gre+ipv6 ipgre_err() can call ip6_err_gen_icmpv6_unreach() for proper support of ipv4+gre+icmp+ipv6+... frames, used for example by traceroute/mtr. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 1 + net/ipv4/gre_demux.c | 1 + net/ipv4/ip_gre.c | 6 ++++++ 3 files changed, 8 insertions(+) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 9222678426a14..a5e7035fb93f7 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -157,6 +157,7 @@ struct tnl_ptk_info { __be16 proto; __be32 key; __be32 seq; + int hdr_len; }; #define PACKET_RCVD 0 diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 4c39f4fd332a3..c4c3e439f4249 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -117,6 +117,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, if ((*(u8 *)options & 0xF0) != 0x40) hdr_len += 4; } + tpi->hdr_len = hdr_len; return hdr_len; } EXPORT_SYMBOL(gre_parse_header); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0f8ca3fca00a0..ab4cff8e563dd 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -187,6 +187,12 @@ static void ipgre_err(struct sk_buff *skb, u32 info, if (!t) return; +#if IS_ENABLED(CONFIG_IPV6) + if (tpi->proto == htons(ETH_P_IPV6) && + !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, type)) + return; +#endif + if (t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) return; From 20e1954fe238dbe5f8d3a979e593fe352bd703cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 18 Jun 2016 21:52:06 -0700 Subject: [PATCH 5/5] ipv6: RFC 4884 partial support for SIT/GRE tunnels When receiving an ICMPv4 message containing extensions as defined in RFC 4884, and translating it to ICMPv6 at SIT or GRE tunnel, we need some extra manipulation in order to properly forward the extensions. This patch only takes care of Time Exceeded messages as they are the ones that typically carry information from various routers in a fabric during a traceroute session. It also avoids complex skb logic if the data_len is not a multiple of 8. RFC states : The "original datagram" field MUST contain at least 128 octets. If the original datagram did not contain 128 octets, the "original datagram" field MUST be zero padded to 128 octets. In practice routers use 128 bytes of original datagram, not more. Initial translation was added in commit ca15a078bd90 ("sit: generate icmpv6 error when receiving icmpv4 error") Signed-off-by: Eric Dumazet Cc: Oussama Ghorbel Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 3 ++- include/uapi/linux/icmp.h | 1 + net/ipv4/ip_gre.c | 5 ++++- net/ipv6/icmp.c | 28 ++++++++++++++++++++++++---- net/ipv6/sit.c | 4 +++- 5 files changed, 34 insertions(+), 7 deletions(-) diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 97ae98071a031..57086e9fc64c6 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -18,7 +18,8 @@ typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr); extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); -int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type); +int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, + unsigned int data_len); #else diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h index 16fff055f734d..fddd9d7362842 100644 --- a/include/uapi/linux/icmp.h +++ b/include/uapi/linux/icmp.h @@ -79,6 +79,7 @@ struct icmphdr { __be16 __unused; __be16 mtu; } frag; + __u8 reserved[4]; } un; }; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ab4cff8e563dd..8eec78f53f9e5 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -144,6 +144,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info, const struct iphdr *iph; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; + unsigned int data_len = 0; struct ip_tunnel *t; switch (type) { @@ -169,6 +170,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info, case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) return; + data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ break; case ICMP_REDIRECT: @@ -189,7 +191,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info, #if IS_ENABLED(CONFIG_IPV6) if (tpi->proto == htons(ETH_P_IPV6) && - !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, type)) + !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, + type, data_len)) return; #endif diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 867aebc342488..fd11f5856ce8e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -564,16 +564,22 @@ void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) * Either an IPv4 header for SIT encap * an IPv4 header + GRE header for GRE encap */ -int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type) +int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, + unsigned int data_len) { struct in6_addr temp_saddr; struct rt6_info *rt; struct sk_buff *skb2; + u32 info = 0; if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8)) return 1; - skb2 = skb_clone(skb, GFP_ATOMIC); + /* RFC 4884 (partial) support for ICMP extensions */ + if (data_len < 128 || (data_len & 7) || skb->len < data_len) + data_len = 0; + + skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC); if (!skb2) return 1; @@ -588,12 +594,26 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type) skb2->dev = rt->dst.dev; ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr); + + if (data_len) { + /* RFC 4884 (partial) support : + * insert 0 padding at the end, before the extensions + */ + __skb_push(skb2, nhs); + skb_reset_network_header(skb2); + memmove(skb2->data, skb2->data + nhs, data_len - nhs); + memset(skb2->data + data_len - nhs, 0, nhs); + /* RFC 4884 4.5 : Length is measured in 64-bit words, + * and stored in reserved[0] + */ + info = (data_len/8) << 24; + } if (type == ICMP_TIME_EXCEEDED) icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, - 0, &temp_saddr); + info, &temp_saddr); else icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, - 0, &temp_saddr); + info, &temp_saddr); if (rt) ip6_rt_put(rt); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d7a36114eb50e..cdd714690f95e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -484,6 +484,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) const struct iphdr *iph = (const struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; + unsigned int data_len = 0; struct ip_tunnel *t; int err; @@ -508,6 +509,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) return 0; + data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ break; case ICMP_REDIRECT: break; @@ -536,7 +538,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) } err = 0; - if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type)) + if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) goto out; if (t->parms.iph.daddr == 0)