From f26d9ae491c0c6beb09d2cbdae41d3f852cc03ad Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Thu, 7 Jun 2012 02:23:37 +0000 Subject: [PATCH] --- yaml --- r: 314269 b: refs/heads/master c: 185095fb80ce57c0f3db8738e36ad7c02dc34d33 h: refs/heads/master i: 314267: 2d7d0665d5b31b27395549266b417f2456cd4577 v: v3 --- [refs] | 2 +- .../net/ethernet/intel/e1000e/netdev.c | 11 +- .../drivers/net/ethernet/intel/e1000e/param.c | 43 ++++-- trunk/include/net/ip6_route.h | 8 +- trunk/include/net/route.h | 5 +- trunk/net/dcb/dcbnl.c | 7 +- trunk/net/dccp/ipv6.c | 2 - trunk/net/ipv4/ah4.c | 1 - trunk/net/ipv4/esp4.c | 1 - trunk/net/ipv4/ip_gre.c | 14 +- trunk/net/ipv4/ipcomp.c | 1 - trunk/net/ipv4/ipip.c | 15 +- trunk/net/ipv4/ping.c | 1 - trunk/net/ipv4/raw.c | 3 - trunk/net/ipv4/route.c | 28 ---- trunk/net/ipv4/udp.c | 1 - trunk/net/ipv6/ah6.c | 3 +- trunk/net/ipv6/esp6.c | 2 - trunk/net/ipv6/icmp.c | 6 +- trunk/net/ipv6/ipcomp6.c | 2 - trunk/net/ipv6/raw.c | 5 +- trunk/net/ipv6/route.c | 143 ++++++++++++++---- trunk/net/ipv6/sit.c | 15 +- trunk/net/ipv6/tcp_ipv6.c | 2 - trunk/net/ipv6/udp.c | 3 - 25 files changed, 175 insertions(+), 149 deletions(-) diff --git a/[refs] b/[refs] index b935ce36e5df..26ae7e73d6c8 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 81aded24675ebda5de8a68843250ad15584ac38a +refs/heads/master: 185095fb80ce57c0f3db8738e36ad7c02dc34d33 diff --git a/trunk/drivers/net/ethernet/intel/e1000e/netdev.c b/trunk/drivers/net/ethernet/intel/e1000e/netdev.c index 31d37a2b5ba8..ba86b3f8a404 100644 --- a/trunk/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/trunk/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6238,7 +6238,8 @@ static int __devinit e1000_probe(struct pci_dev *pdev, } if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw)) - e_info("PHY reset is blocked due to SOL/IDER session.\n"); + dev_info(&pdev->dev, + "PHY reset is blocked due to SOL/IDER session.\n"); /* Set initial default active device features */ netdev->features = (NETIF_F_SG | @@ -6288,7 +6289,7 @@ static int __devinit e1000_probe(struct pci_dev *pdev, if (e1000_validate_nvm_checksum(&adapter->hw) >= 0) break; if (i == 2) { - e_err("The NVM Checksum Is Not Valid\n"); + dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); err = -EIO; goto err_eeprom; } @@ -6298,13 +6299,15 @@ static int __devinit e1000_probe(struct pci_dev *pdev, /* copy the MAC address */ if (e1000e_read_mac_addr(&adapter->hw)) - e_err("NVM Read Error while reading MAC address\n"); + dev_err(&pdev->dev, + "NVM Read Error while reading MAC address\n"); memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len); memcpy(netdev->perm_addr, adapter->hw.mac.addr, netdev->addr_len); if (!is_valid_ether_addr(netdev->perm_addr)) { - e_err("Invalid MAC Address: %pM\n", netdev->perm_addr); + dev_err(&pdev->dev, "Invalid MAC Address: %pM\n", + netdev->perm_addr); err = -EIO; goto err_eeprom; } diff --git a/trunk/drivers/net/ethernet/intel/e1000e/param.c b/trunk/drivers/net/ethernet/intel/e1000e/param.c index 55cc1565bc2f..dfbfa7fd98c3 100644 --- a/trunk/drivers/net/ethernet/intel/e1000e/param.c +++ b/trunk/drivers/net/ethernet/intel/e1000e/param.c @@ -199,16 +199,19 @@ static int __devinit e1000_validate_option(unsigned int *value, case enable_option: switch (*value) { case OPTION_ENABLED: - e_info("%s Enabled\n", opt->name); + dev_info(&adapter->pdev->dev, "%s Enabled\n", + opt->name); return 0; case OPTION_DISABLED: - e_info("%s Disabled\n", opt->name); + dev_info(&adapter->pdev->dev, "%s Disabled\n", + opt->name); return 0; } break; case range_option: if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) { - e_info("%s set to %i\n", opt->name, *value); + dev_info(&adapter->pdev->dev, "%s set to %i\n", + opt->name, *value); return 0; } break; @@ -220,7 +223,8 @@ static int __devinit e1000_validate_option(unsigned int *value, ent = &opt->arg.l.p[i]; if (*value == ent->i) { if (ent->str[0] != '\0') - e_info("%s\n", ent->str); + dev_info(&adapter->pdev->dev, "%s\n", + ent->str); return 0; } } @@ -230,8 +234,8 @@ static int __devinit e1000_validate_option(unsigned int *value, BUG(); } - e_info("Invalid %s value specified (%i) %s\n", opt->name, *value, - opt->err); + dev_info(&adapter->pdev->dev, "Invalid %s value specified (%i) %s\n", + opt->name, *value, opt->err); *value = opt->def; return -1; } @@ -251,8 +255,10 @@ void __devinit e1000e_check_options(struct e1000_adapter *adapter) int bd = adapter->bd_number; if (bd >= E1000_MAX_NIC) { - e_notice("Warning: no configuration for board #%i\n", bd); - e_notice("Using defaults for all values\n"); + dev_notice(&adapter->pdev->dev, + "Warning: no configuration for board #%i\n", bd); + dev_notice(&adapter->pdev->dev, + "Using defaults for all values\n"); } { /* Transmit Interrupt Delay */ @@ -366,27 +372,32 @@ void __devinit e1000e_check_options(struct e1000_adapter *adapter) * default values */ if (adapter->itr > 4) - e_info("%s set to default %d\n", opt.name, - adapter->itr); + dev_info(&adapter->pdev->dev, + "%s set to default %d\n", opt.name, + adapter->itr); } adapter->itr_setting = adapter->itr; switch (adapter->itr) { case 0: - e_info("%s turned off\n", opt.name); + dev_info(&adapter->pdev->dev, "%s turned off\n", + opt.name); break; case 1: - e_info("%s set to dynamic mode\n", opt.name); + dev_info(&adapter->pdev->dev, + "%s set to dynamic mode\n", opt.name); adapter->itr = 20000; break; case 3: - e_info("%s set to dynamic conservative mode\n", - opt.name); + dev_info(&adapter->pdev->dev, + "%s set to dynamic conservative mode\n", + opt.name); adapter->itr = 20000; break; case 4: - e_info("%s set to simplified (2000-8000 ints) mode\n", - opt.name); + dev_info(&adapter->pdev->dev, + "%s set to simplified (2000-8000 ints) mode\n", + opt.name); break; default: /* diff --git a/trunk/include/net/ip6_route.h b/trunk/include/net/ip6_route.h index 58cb3fc34879..a2cda240ca95 100644 --- a/trunk/include/net/ip6_route.h +++ b/trunk/include/net/ip6_route.h @@ -140,10 +140,10 @@ extern void rt6_redirect(const struct in6_addr *dest, u8 *lladdr, int on_link); -extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, - int oif, u32 mark); -extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, - __be32 mtu); +extern void rt6_pmtu_discovery(const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct net_device *dev, + u32 pmtu); struct netlink_callback; diff --git a/trunk/include/net/route.h b/trunk/include/net/route.h index 47eb25ac1f7f..a36ae429ed5d 100644 --- a/trunk/include/net/route.h +++ b/trunk/include/net/route.h @@ -215,10 +215,7 @@ static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 s return ip_route_input_common(skb, dst, src, tos, devin, true); } -extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, - int oif, u32 mark, u8 protocol, int flow_flags); -extern void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu); -extern void ip_rt_send_redirect(struct sk_buff *skb); +extern void ip_rt_send_redirect(struct sk_buff *skb); extern unsigned int inet_addr_type(struct net *net, __be32 addr); extern unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr); diff --git a/trunk/net/dcb/dcbnl.c b/trunk/net/dcb/dcbnl.c index 0a360072cfec..70bba3eb4ae9 100644 --- a/trunk/net/dcb/dcbnl.c +++ b/trunk/net/dcb/dcbnl.c @@ -208,7 +208,10 @@ static struct sk_buff *dcbnl_newmsg(int type, u8 cmd, u32 port, u32 seq, return NULL; nlh = nlmsg_put(skb, port, seq, type, sizeof(*dcb), flags); - BUG_ON(!nlh); + if (!nlh) { + /* header should always fit, allocation must be buggy */ + BUG(); + } dcb = nlmsg_data(nlh); dcb->dcb_family = AF_UNSPEC; @@ -1661,7 +1664,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) u32 pid = skb ? NETLINK_CB(skb).pid : 0; int ret = -EINVAL; struct sk_buff *reply_skb; - struct nlmsghdr *reply_nlh = NULL; + struct nlmsghdr *reply_nlh; const struct reply_func *fn; if (!net_eq(net, &init_net)) diff --git a/trunk/net/dccp/ipv6.c b/trunk/net/dccp/ipv6.c index 9991be083ad0..fa9512d86f3b 100644 --- a/trunk/net/dccp/ipv6.c +++ b/trunk/net/dccp/ipv6.c @@ -165,8 +165,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else dst_hold(dst); - dst->ops->update_pmtu(dst, ntohl(info)); - if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { dccp_sync_mss(sk, dst_mtu(dst)); } /* else let the usual retransmit timer handle it */ diff --git a/trunk/net/ipv4/ah4.c b/trunk/net/ipv4/ah4.c index 916d5ecaf6c6..e8f2617ecd47 100644 --- a/trunk/net/ipv4/ah4.c +++ b/trunk/net/ipv4/ah4.c @@ -408,7 +408,6 @@ static void ah4_err(struct sk_buff *skb, u32 info) return; pr_debug("pmtu discovery on SA AH/%08x/%08x\n", ntohl(ah->spi), ntohl(iph->daddr)); - ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); } diff --git a/trunk/net/ipv4/esp4.c b/trunk/net/ipv4/esp4.c index 7b95b49a36ce..cb982a61536f 100644 --- a/trunk/net/ipv4/esp4.c +++ b/trunk/net/ipv4/esp4.c @@ -494,7 +494,6 @@ static void esp4_err(struct sk_buff *skb, u32 info) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", ntohl(esph->spi), ntohl(iph->daddr)); - ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); } diff --git a/trunk/net/ipv4/ip_gre.c b/trunk/net/ipv4/ip_gre.c index 594cec35ac4d..f49047b79609 100644 --- a/trunk/net/ipv4/ip_gre.c +++ b/trunk/net/ipv4/ip_gre.c @@ -516,6 +516,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info) case ICMP_PORT_UNREACH: /* Impossible event. */ return; + case ICMP_FRAG_NEEDED: + /* Soft state for pmtu is maintained by IP core. */ + return; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -535,16 +538,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) flags & GRE_KEY ? *(((__be32 *)p) + (grehlen / 4) - 1) : 0, p[1]); - if (t == NULL) - goto out; - - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, IPPROTO_GRE, 0); - goto out; - } - - if (t->parms.iph.daddr == 0 || + if (t == NULL || t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) goto out; diff --git a/trunk/net/ipv4/ipcomp.c b/trunk/net/ipv4/ipcomp.c index b91375482d84..63b64c45a826 100644 --- a/trunk/net/ipv4/ipcomp.c +++ b/trunk/net/ipv4/ipcomp.c @@ -42,7 +42,6 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI4\n", spi, &iph->daddr); - ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); xfrm_state_put(x); } diff --git a/trunk/net/ipv4/ipip.c b/trunk/net/ipv4/ipip.c index 715338a1b205..2d0f99bf61b3 100644 --- a/trunk/net/ipv4/ipip.c +++ b/trunk/net/ipv4/ipip.c @@ -348,6 +348,9 @@ static int ipip_err(struct sk_buff *skb, u32 info) case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; + case ICMP_FRAG_NEEDED: + /* Soft state for pmtu is maintained by IP core. */ + return 0; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -366,17 +369,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) rcu_read_lock(); t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); - if (t == NULL) - goto out; - - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->dev->ifindex, 0, IPPROTO_IPIP, 0); - err = 0; - goto out; - } - - if (t->parms.iph.daddr == 0) + if (t == NULL || t->parms.iph.daddr == 0) goto out; err = 0; diff --git a/trunk/net/ipv4/ping.c b/trunk/net/ipv4/ping.c index 340fcf29a966..2c00e8bf684d 100644 --- a/trunk/net/ipv4/ping.c +++ b/trunk/net/ipv4/ping.c @@ -371,7 +371,6 @@ void ping_err(struct sk_buff *skb, u32 info) break; case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ - ipv4_sk_update_pmtu(skb, sk, info); if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; diff --git a/trunk/net/ipv4/raw.c b/trunk/net/ipv4/raw.c index 659ddfb10947..4032b818f3e4 100644 --- a/trunk/net/ipv4/raw.c +++ b/trunk/net/ipv4/raw.c @@ -216,9 +216,6 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) int err = 0; int harderr = 0; - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) - ipv4_sk_update_pmtu(skb, sk, info); - /* Report error on raw socket, if: 1. User requested ip_recverr. 2. Socket is connected (otherwise the error indication diff --git a/trunk/net/ipv4/route.c b/trunk/net/ipv4/route.c index 41df5297a412..655506af47ca 100644 --- a/trunk/net/ipv4/route.c +++ b/trunk/net/ipv4/route.c @@ -1711,34 +1711,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } } -void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, - int oif, u32 mark, u8 protocol, int flow_flags) -{ - const struct iphdr *iph = (const struct iphdr *)skb->data; - struct flowi4 fl4; - struct rtable *rt; - - flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, - protocol, flow_flags | FLOWI_FLAG_PRECOW_METRICS, - iph->daddr, iph->saddr, 0, 0); - rt = __ip_route_output_key(net, &fl4); - if (!IS_ERR(rt)) { - ip_rt_update_pmtu(&rt->dst, mtu); - ip_rt_put(rt); - } -} -EXPORT_SYMBOL_GPL(ipv4_update_pmtu); - -void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) -{ - const struct inet_sock *inet = inet_sk(sk); - - return ipv4_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark, - inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - inet_sk_flowi_flags(sk)); -} -EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); static void ipv4_validate_peer(struct rtable *rt) { diff --git a/trunk/net/ipv4/udp.c b/trunk/net/ipv4/udp.c index db017efb76ea..eaca73644e79 100644 --- a/trunk/net/ipv4/udp.c +++ b/trunk/net/ipv4/udp.c @@ -615,7 +615,6 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) break; case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ - ipv4_sk_update_pmtu(skb, sk, info); if (inet->pmtudisc != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; diff --git a/trunk/net/ipv6/ah6.c b/trunk/net/ipv6/ah6.c index 49d4d26bda88..f1a4a2c28ed3 100644 --- a/trunk/net/ipv6/ah6.c +++ b/trunk/net/ipv6/ah6.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -622,7 +621,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n", ntohl(ah->spi), &iph->daddr); - ip6_update_pmtu(skb, net, info, 0, 0); + xfrm_state_put(x); } diff --git a/trunk/net/ipv6/esp6.c b/trunk/net/ipv6/esp6.c index 89a615ba84f8..db1521fcda5b 100644 --- a/trunk/net/ipv6/esp6.c +++ b/trunk/net/ipv6/esp6.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -443,7 +442,6 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; pr_debug("pmtu discovery on SA ESP/%08x/%pI6\n", ntohl(esph->spi), &iph->daddr); - ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/trunk/net/ipv6/icmp.c b/trunk/net/ipv6/icmp.c index 5247d5c211f9..ed89bba745a1 100644 --- a/trunk/net/ipv6/icmp.c +++ b/trunk/net/ipv6/icmp.c @@ -649,6 +649,7 @@ static int icmpv6_rcv(struct sk_buff *skb) struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; + const struct ipv6hdr *orig_hdr; struct icmp6hdr *hdr; u8 type; @@ -660,7 +661,7 @@ static int icmpv6_rcv(struct sk_buff *skb) XFRM_STATE_ICMP)) goto drop_no_count; - if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) + if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr))) goto drop_no_count; nh = skb_network_offset(skb); @@ -721,6 +722,9 @@ static int icmpv6_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto discard_it; hdr = icmp6_hdr(skb); + orig_hdr = (struct ipv6hdr *) (hdr + 1); + rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev, + ntohl(hdr->icmp6_mtu)); /* * Drop through to notify diff --git a/trunk/net/ipv6/ipcomp6.c b/trunk/net/ipv6/ipcomp6.c index 92832385a8ef..5cb75bfe45b1 100644 --- a/trunk/net/ipv6/ipcomp6.c +++ b/trunk/net/ipv6/ipcomp6.c @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include @@ -75,7 +74,6 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, pr_debug("pmtu discovery on SA IPCOMP/%08x/%pI6\n", spi, &iph->daddr); - ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/trunk/net/ipv6/raw.c b/trunk/net/ipv6/raw.c index 43b0042f15f4..93d69836fded 100644 --- a/trunk/net/ipv6/raw.c +++ b/trunk/net/ipv6/raw.c @@ -328,10 +328,9 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, return; harderr = icmpv6_err_convert(type, code, &err); - if (type == ICMPV6_PKT_TOOBIG) { - ip6_sk_update_pmtu(skb, sk, info); + if (type == ICMPV6_PKT_TOOBIG) harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); - } + if (np->recverr) { u8 *payload = skb->data; if (!inet->hdrincl) diff --git a/trunk/net/ipv6/route.c b/trunk/net/ipv6/route.c index 0d41f68daff2..58a3ec23da2f 100644 --- a/trunk/net/ipv6/route.c +++ b/trunk/net/ipv6/route.c @@ -1049,10 +1049,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { struct rt6_info *rt6 = (struct rt6_info*)dst; - dst_confirm(dst); if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { - struct net *net = dev_net(dst->dev); - rt6->rt6i_flags |= RTF_MODIFIED; if (mtu < IPV6_MIN_MTU) { u32 features = dst_metric(dst, RTAX_FEATURES); @@ -1061,39 +1058,9 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst_metric_set(dst, RTAX_FEATURES, features); } dst_metric_set(dst, RTAX_MTU, mtu); - rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); } } -void ip6_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, - int oif, __be32 mark) -{ - const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; - struct dst_entry *dst; - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_oif = oif; - fl6.flowi6_mark = mark; - fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS; - fl6.daddr = iph->daddr; - fl6.saddr = iph->saddr; - fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; - - dst = ip6_route_output(net, NULL, &fl6); - if (!dst->error) - ip6_rt_update_pmtu(dst, ntohl(mtu)); - dst_release(dst); -} -EXPORT_SYMBOL_GPL(ip6_update_pmtu); - -void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) -{ - ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark); -} -EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); - static unsigned int ip6_default_advmss(const struct dst_entry *dst) { struct net_device *dev = dst->dev; @@ -1736,6 +1703,116 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, dst_release(&rt->dst); } +/* + * Handle ICMP "packet too big" messages + * i.e. Path MTU discovery + */ + +static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr, + struct net *net, u32 pmtu, int ifindex) +{ + struct rt6_info *rt, *nrt; + int allfrag = 0; +again: + rt = rt6_lookup(net, daddr, saddr, ifindex, 0); + if (!rt) + return; + + if (rt6_check_expired(rt)) { + ip6_del_rt(rt); + goto again; + } + + if (pmtu >= dst_mtu(&rt->dst)) + goto out; + + if (pmtu < IPV6_MIN_MTU) { + /* + * According to RFC2460, PMTU is set to the IPv6 Minimum Link + * MTU (1280) and a fragment header should always be included + * after a node receiving Too Big message reporting PMTU is + * less than the IPv6 Minimum Link MTU. + */ + pmtu = IPV6_MIN_MTU; + allfrag = 1; + } + + /* New mtu received -> path was valid. + They are sent only in response to data packets, + so that this nexthop apparently is reachable. --ANK + */ + dst_confirm(&rt->dst); + + /* Host route. If it is static, it would be better + not to override it, but add new one, so that + when cache entry will expire old pmtu + would return automatically. + */ + if (rt->rt6i_flags & RTF_CACHE) { + dst_metric_set(&rt->dst, RTAX_MTU, pmtu); + if (allfrag) { + u32 features = dst_metric(&rt->dst, RTAX_FEATURES); + features |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&rt->dst, RTAX_FEATURES, features); + } + rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); + rt->rt6i_flags |= RTF_MODIFIED; + goto out; + } + + /* Network route. + Two cases are possible: + 1. It is connected route. Action: COW + 2. It is gatewayed route or NONEXTHOP route. Action: clone it. + */ + if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) + nrt = rt6_alloc_cow(rt, daddr, saddr); + else + nrt = rt6_alloc_clone(rt, daddr); + + if (nrt) { + dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); + if (allfrag) { + u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); + features |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&nrt->dst, RTAX_FEATURES, features); + } + + /* According to RFC 1981, detecting PMTU increase shouldn't be + * happened within 5 mins, the recommended timer is 10 mins. + * Here this route expiration time is set to ip6_rt_mtu_expires + * which is 10 mins. After 10 mins the decreased pmtu is expired + * and detecting PMTU increase will be automatically happened. + */ + rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires); + nrt->rt6i_flags |= RTF_DYNAMIC; + ip6_ins_rt(nrt); + } +out: + dst_release(&rt->dst); +} + +void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr, + struct net_device *dev, u32 pmtu) +{ + struct net *net = dev_net(dev); + + /* + * RFC 1981 states that a node "MUST reduce the size of the packets it + * is sending along the path" that caused the Packet Too Big message. + * Since it's not possible in the general case to determine which + * interface was used to send the original packet, we update the MTU + * on the interface that will be used to send future packets. We also + * update the MTU on the interface that received the Packet Too Big in + * case the original packet was forced out that interface with + * SO_BINDTODEVICE or similar. This is the next best thing to the + * correct behaviour, which would be to update the MTU on all + * interfaces. + */ + rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); + rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); +} + /* * Misc support functions */ diff --git a/trunk/net/ipv6/sit.c b/trunk/net/ipv6/sit.c index 49aea94c9be3..60415711563f 100644 --- a/trunk/net/ipv6/sit.c +++ b/trunk/net/ipv6/sit.c @@ -527,6 +527,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info) case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; + case ICMP_FRAG_NEEDED: + /* Soft state for pmtu is maintained by IP core. */ + return 0; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -548,17 +551,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) skb->dev, iph->daddr, iph->saddr); - if (t == NULL) - goto out; - - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->dev->ifindex, 0, IPPROTO_IPV6, 0); - err = 0; - goto out; - } - - if (t->parms.iph.daddr == 0) + if (t == NULL || t->parms.iph.daddr == 0) goto out; err = 0; diff --git a/trunk/net/ipv6/tcp_ipv6.c b/trunk/net/ipv6/tcp_ipv6.c index 26a88623940b..f91b0bfd12d5 100644 --- a/trunk/net/ipv6/tcp_ipv6.c +++ b/trunk/net/ipv6/tcp_ipv6.c @@ -415,8 +415,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else dst_hold(dst); - dst->ops->update_pmtu(dst, ntohl(info)); - if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); diff --git a/trunk/net/ipv6/udp.c b/trunk/net/ipv6/udp.c index 051ad481973f..f05099fc5901 100644 --- a/trunk/net/ipv6/udp.c +++ b/trunk/net/ipv6/udp.c @@ -479,9 +479,6 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk == NULL) return; - if (type == ICMPV6_PKT_TOOBIG) - ip6_sk_update_pmtu(skb, sk, info); - np = inet6_sk(sk); if (!icmpv6_err_convert(type, code, &err) && !np->recverr)