From 096361b15577a583afcc28179a08c75cf95e9dae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:25 +0000 Subject: [PATCH 01/15] ipv6: add ipv6_devconf_read_txrx cacheline_group IPv6 TX and RX fast path use the following fields: - disable_ipv6 - hop_limit - mtu6 - forwarding - disable_policy - proxy_ndp Place them in a group to increase data locality. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/ipv6.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ef3aa060a289e..383a0ea2ab913 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -3,6 +3,7 @@ #define _IPV6_H #include +#include #define ipv6_optlen(p) (((p)->hdrlen+1) << 3) #define ipv6_authlen(p) (((p)->hdrlen+2) << 2) @@ -10,9 +11,16 @@ * This structure contains configuration options per IPv6 link. */ struct ipv6_devconf { - __s32 forwarding; + /* RX & TX fastpath fields. */ + __cacheline_group_begin(ipv6_devconf_read_txrx); + __s32 disable_ipv6; __s32 hop_limit; __s32 mtu6; + __s32 forwarding; + __s32 disable_policy; + __s32 proxy_ndp; + __cacheline_group_end(ipv6_devconf_read_txrx); + __s32 accept_ra; __s32 accept_redirects; __s32 autoconf; @@ -45,7 +53,6 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif - __s32 proxy_ndp; __s32 accept_source_route; __s32 accept_ra_from_local; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -55,7 +62,6 @@ struct ipv6_devconf { #ifdef CONFIG_IPV6_MROUTE atomic_t mc_forwarding; #endif - __s32 disable_ipv6; __s32 drop_unicast_in_l2_multicast; __s32 accept_dad; __s32 force_tllao; @@ -76,7 +82,6 @@ struct ipv6_devconf { #endif __u32 enhanced_dad; __u32 addr_gen_mode; - __s32 disable_policy; __s32 ndisc_tclass; __s32 rpl_seg_enabled; __u32 ioam6_id; From d289ab65b89c1d4d88417cb6c03e923f21f95fae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:26 +0000 Subject: [PATCH 02/15] ipv6: annotate data-races around cnf.disable_ipv6 disable_ipv6 is read locklessly, add appropriate READ_ONCE() and WRITE_ONCE() annotations. v2: do not preload net before rtnl_trylock() in addrconf_disable_ipv6() (Jiri) Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 9 +++++---- net/ipv6/ip6_input.c | 4 ++-- net/ipv6/ip6_output.c | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9e949403c1363..c965400fb9340 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4214,7 +4214,7 @@ static void addrconf_dad_work(struct work_struct *w) if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) && ipv6_addr_equal(&ifp->addr, &addr)) { /* DAD failed for link-local based on MAC */ - idev->cnf.disable_ipv6 = 1; + WRITE_ONCE(idev->cnf.disable_ipv6, 1); pr_info("%s: IPv6 being disabled!\n", ifp->idev->dev->name); @@ -6389,7 +6389,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf) idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); - idev->cnf.disable_ipv6 = newf; + + WRITE_ONCE(idev->cnf.disable_ipv6, newf); if (changed) dev_disable_change(idev); } @@ -6406,7 +6407,7 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) net = (struct net *)table->extra2; old = *p; - *p = newf; + WRITE_ONCE(*p, newf); if (p == &net->ipv6.devconf_dflt->disable_ipv6) { rtnl_unlock(); @@ -6414,7 +6415,7 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) } if (p == &net->ipv6.devconf_all->disable_ipv6) { - net->ipv6.devconf_dflt->disable_ipv6 = newf; + WRITE_ONCE(net->ipv6.devconf_dflt->disable_ipv6, newf); addrconf_disable_change(net, newf); } else if ((!newf) ^ (!old)) dev_disable_change((struct inet6_dev *)table->extra1); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index b8378814532ce..1ba97933c74fb 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -168,9 +168,9 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, SKB_DR_SET(reason, NOT_SPECIFIED); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || - !idev || unlikely(idev->cnf.disable_ipv6)) { + !idev || unlikely(READ_ONCE(idev->cnf.disable_ipv6))) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); - if (idev && unlikely(idev->cnf.disable_ipv6)) + if (idev && unlikely(READ_ONCE(idev->cnf.disable_ipv6))) SKB_DR_SET(reason, IPV6DISABLED); goto drop; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 31b86fe661aa6..0559bd0005858 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -234,7 +234,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; - if (unlikely(idev->cnf.disable_ipv6)) { + if (unlikely(READ_ONCE(idev->cnf.disable_ipv6))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; From 553ced03b227c99f0bf68d86f7b45ed7125563f2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:27 +0000 Subject: [PATCH 03/15] ipv6: addrconf_disable_ipv6() optimization Writing over /proc/sys/net/ipv6/conf/default/disable_ipv6 does not need to hold RTNL. v3: remove a wrong change (Jakub Kicinski feedback) Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c965400fb9340..6975685f55ea9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6399,21 +6399,20 @@ static void addrconf_disable_change(struct net *net, __s32 newf) static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) { - struct net *net; + struct net *net = (struct net *)table->extra2; int old; + if (p == &net->ipv6.devconf_dflt->disable_ipv6) { + WRITE_ONCE(*p, newf); + return 0; + } + if (!rtnl_trylock()) return restart_syscall(); - net = (struct net *)table->extra2; old = *p; WRITE_ONCE(*p, newf); - if (p == &net->ipv6.devconf_dflt->disable_ipv6) { - rtnl_unlock(); - return 0; - } - if (p == &net->ipv6.devconf_all->disable_ipv6) { WRITE_ONCE(net->ipv6.devconf_dflt->disable_ipv6, newf); addrconf_disable_change(net, newf); From e7135f484994494a38071e3653a83d21d305f50c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:28 +0000 Subject: [PATCH 04/15] ipv6: annotate data-races around cnf.mtu6 idev->cnf.mtu6 might be read locklessly, add appropriate READ_ONCE() and WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- net/ipv6/addrconf.c | 4 ++-- net/ipv6/ndisc.c | 4 ++-- net/ipv6/route.c | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 52a51c69aa9de..a30c6aa9e5cf3 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -332,7 +332,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst rcu_read_lock(); idev = __in6_dev_get(dst->dev); if (idev) - mtu = idev->cnf.mtu6; + mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); out: diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6975685f55ea9..1b534edc7ca2c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3671,7 +3671,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (idev) { rt6_mtu_change(dev, dev->mtu); - idev->cnf.mtu6 = dev->mtu; + WRITE_ONCE(idev->cnf.mtu6, dev->mtu); break; } @@ -3763,7 +3763,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) { rt6_mtu_change(dev, dev->mtu); - idev->cnf.mtu6 = dev->mtu; + WRITE_ONCE(idev->cnf.mtu6, dev->mtu); } WRITE_ONCE(idev->tstamp, jiffies); inet6_ifinfo_notify(RTM_NEWLINK, idev); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 8523f0595b018..e96d79cd34d27 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1578,8 +1578,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); - } else if (in6_dev->cnf.mtu6 != mtu) { - in6_dev->cnf.mtu6 = mtu; + } else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) { + WRITE_ONCE(in6_dev->cnf.mtu6, mtu); fib6_metric_set(rt, RTAX_MTU, mtu); rt6_mtu_change(skb->dev, mtu); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 707d65bc9c0e5..66c685b0b6199 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1596,7 +1596,7 @@ static unsigned int fib6_mtu(const struct fib6_result *res) rcu_read_lock(); idev = __in6_dev_get(dev); - mtu = idev->cnf.mtu6; + mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); } @@ -3249,8 +3249,8 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res, mtu = IPV6_MIN_MTU; idev = __in6_dev_get(dev); - if (idev && idev->cnf.mtu6 > mtu) - mtu = idev->cnf.mtu6; + if (idev) + mtu = max_t(u32, mtu, READ_ONCE(idev->cnf.mtu6)); } mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); From e0bb2675fea2783c45bb95d74f00c55156720863 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:29 +0000 Subject: [PATCH 05/15] ipv6: annotate data-races around cnf.hop_limit idev->cnf.hop_limit and net->ipv6.devconf_all->hop_limit might be read locklessly, add appropriate READ_ONCE() and WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Acked-by: Florian Westphal # for netfilter parts Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/action.c | 2 +- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/netfilter/nf_reject_ipv6.c | 4 ++-- net/ipv6/output_core.c | 4 ++-- net/netfilter/nf_synproxy_core.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 2b383d92d7f57..2c3f629079584 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -460,7 +460,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, set_tun->ttl = ip6_dst_hoplimit(dst); dst_release(dst); } else { - set_tun->ttl = net->ipv6.devconf_all->hop_limit; + set_tun->ttl = READ_ONCE(net->ipv6.devconf_all->hop_limit); } #endif } else { diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 56c3c467f9deb..f61d977ac0528 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1346,7 +1346,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } if (val < 0) - val = sock_net(sk)->ipv6.devconf_all->hop_limit; + val = READ_ONCE(sock_net(sk)->ipv6.devconf_all->hop_limit); break; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index e96d79cd34d27..9c9c31268432e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1423,7 +1423,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && ra_msg->icmph.icmp6_hop_limit) { if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) { - in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + WRITE_ONCE(in6_dev->cnf.hop_limit, ra_msg->icmph.icmp6_hop_limit); fib6_metric_set(rt, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); } else { diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 196dd4ecb5e21..dedee264b8f6c 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -83,7 +83,7 @@ struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, - net->ipv6.devconf_all->hop_limit); + READ_ONCE(net->ipv6.devconf_all->hop_limit)); nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen); nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); @@ -124,7 +124,7 @@ struct sk_buff *nf_reject_skb_v6_unreach(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6, - net->ipv6.devconf_all->hop_limit); + READ_ONCE(net->ipv6.devconf_all->hop_limit)); skb_reset_transport_header(nskb); icmp6h = skb_put_zero(nskb, sizeof(struct icmp6hdr)); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index b5205311f372b..806d4b5dd1e60 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -111,9 +111,9 @@ int ip6_dst_hoplimit(struct dst_entry *dst) rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) - hoplimit = idev->cnf.hop_limit; + hoplimit = READ_ONCE(idev->cnf.hop_limit); else - hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; + hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit); rcu_read_unlock(); } return hoplimit; diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index fbbc4fd373495..5b140c12b7dfa 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -800,7 +800,7 @@ synproxy_build_ip_ipv6(struct net *net, struct sk_buff *skb, skb_reset_network_header(skb); iph = skb_put(skb, sizeof(*iph)); ip6_flow_hdr(iph, 0, 0); - iph->hop_limit = net->ipv6.devconf_all->hop_limit; + iph->hop_limit = READ_ONCE(net->ipv6.devconf_all->hop_limit); iph->nexthdr = IPPROTO_TCP; iph->saddr = *saddr; iph->daddr = *daddr; From 32f754176e889cdfe989ef08ece19859427755df Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:30 +0000 Subject: [PATCH 06/15] ipv6: annotate data-races around cnf.forwarding idev->cnf.forwarding and net->ipv6.devconf_all->forwarding might be read locklessly, add appropriate READ_ONCE() and WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 2 +- include/net/ipv6.h | 8 +++++--- net/core/filter.c | 2 +- net/ipv6/addrconf.c | 10 ++++++---- net/ipv6/ip6_output.c | 2 +- net/ipv6/ndisc.c | 11 ++++++----- net/ipv6/route.c | 4 ++-- 7 files changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index cd4083e0b3b9e..e13e4920ee9b2 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -339,7 +339,7 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci) in6_dev = in6_dev_get(netdev); if (!in6_dev) goto out; - is_router = !!in6_dev->cnf.forwarding; + is_router = !!READ_ONCE(in6_dev->cnf.forwarding); in6_dev_put(in6_dev); /* ipv6_stub != NULL if in6_dev_get returned an inet6_dev */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index cf25ea21d770d..88a8e554f7a12 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -534,13 +534,15 @@ static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb) return 0; } -static inline bool ipv6_accept_ra(struct inet6_dev *idev) +static inline bool ipv6_accept_ra(const struct inet6_dev *idev) { + s32 accept_ra = READ_ONCE(idev->cnf.accept_ra); + /* If forwarding is enabled, RA are not accepted unless the special * hybrid mode (accept_ra=2) is enabled. */ - return idev->cnf.forwarding ? idev->cnf.accept_ra == 2 : - idev->cnf.accept_ra; + return READ_ONCE(idev->cnf.forwarding) ? accept_ra == 2 : + accept_ra; } #define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */ diff --git a/net/core/filter.c b/net/core/filter.c index 358870408a51e..58e8e1a70aa75 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5988,7 +5988,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, return -ENODEV; idev = __in6_dev_get_safely(dev); - if (unlikely(!idev || !idev->cnf.forwarding)) + if (unlikely(!idev || !READ_ONCE(idev->cnf.forwarding))) return BPF_FIB_LKUP_RET_FWD_DISABLED; if (flags & BPF_FIB_LOOKUP_OUTPUT) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1b534edc7ca2c..ec99e393f6a91 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -551,7 +551,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, goto out; if ((all || type == NETCONFA_FORWARDING) && - nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) + nla_put_s32(skb, NETCONFA_FORWARDING, + READ_ONCE(devconf->forwarding)) < 0) goto nla_put_failure; #ifdef CONFIG_IPV6_MROUTE if ((all || type == NETCONFA_MC_FORWARDING) && @@ -869,7 +870,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf) idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); - idev->cnf.forwarding = newf; + + WRITE_ONCE(idev->cnf.forwarding, newf); if (changed) dev_forward_change(idev); } @@ -886,7 +888,7 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) net = (struct net *)table->extra2; old = *p; - *p = newf; + WRITE_ONCE(*p, newf); if (p == &net->ipv6.devconf_dflt->forwarding) { if ((!newf) ^ (!old)) @@ -901,7 +903,7 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) if (p == &net->ipv6.devconf_all->forwarding) { int old_dflt = net->ipv6.devconf_dflt->forwarding; - net->ipv6.devconf_dflt->forwarding = newf; + WRITE_ONCE(net->ipv6.devconf_dflt->forwarding, newf); if ((!newf) ^ (!old_dflt)) inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_FORWARDING, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0559bd0005858..444be8c84cc57 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -501,7 +501,7 @@ int ip6_forward(struct sk_buff *skb) u32 mtu; idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); - if (net->ipv6.devconf_all->forwarding == 0) + if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0) goto error; if (skb->pkt_type != PACKET_HOST) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 9c9c31268432e..1fb5e37bc78be 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -903,7 +903,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) } if (ipv6_chk_acast_addr(net, dev, &msg->target) || - (idev->cnf.forwarding && + (READ_ONCE(idev->cnf.forwarding) && (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) && (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && @@ -929,7 +929,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) } if (is_router < 0) - is_router = idev->cnf.forwarding; + is_router = READ_ONCE(idev->cnf.forwarding); if (dad) { ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target, @@ -1080,7 +1080,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) * Note that we don't do a (daddr == all-routers-mcast) check. */ new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE; - if (!neigh && lladdr && idev && idev->cnf.forwarding) { + if (!neigh && lladdr && idev && READ_ONCE(idev->cnf.forwarding)) { if (accept_untracked_na(dev, saddr)) { neigh = neigh_create(&nd_tbl, &msg->target, dev); new_state = NUD_STALE; @@ -1100,7 +1100,8 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) * has already sent a NA to us. */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && - net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp && + READ_ONCE(net->ipv6.devconf_all->forwarding) && + net->ipv6.devconf_all->proxy_ndp && pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { /* XXX: idev->cnf.proxy_ndp */ goto out; @@ -1148,7 +1149,7 @@ static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb) } /* Don't accept RS if we're not in router mode */ - if (!idev->cnf.forwarding) + if (!READ_ONCE(idev->cnf.forwarding)) goto out; /* diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 66c685b0b6199..6a2b53de48182 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2220,7 +2220,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, strict |= flags & RT6_LOOKUP_F_IFACE; strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; - if (net->ipv6.devconf_all->forwarding == 0) + if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0) strict |= RT6_LOOKUP_F_REACHABLE; rcu_read_lock(); @@ -4149,7 +4149,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) return; - if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) + if (READ_ONCE(in6_dev->cnf.forwarding) || !in6_dev->cnf.accept_redirects) return; /* RFC2461 8.1: From ddea75d344dd87cf7f5d79efb1aab80b493f8393 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:31 +0000 Subject: [PATCH 07/15] ipv6: annotate data-races in ndisc_router_discovery() Annotate reads from in6_dev->cnf.XXX fields, as they could change concurrently. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 1fb5e37bc78be..f6430db249401 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1319,7 +1319,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) if (old_if_flags != in6_dev->if_flags) send_ifinfo_notify = true; - if (!in6_dev->cnf.accept_ra_defrtr) { + if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) { ND_PRINTK(2, info, "RA: %s, defrtr is false for dev: %s\n", __func__, skb->dev->name); @@ -1327,7 +1327,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) } lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); - if (lifetime != 0 && lifetime < in6_dev->cnf.accept_ra_min_lft) { + if (lifetime != 0 && + lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) { ND_PRINTK(2, info, "RA: router lifetime (%ds) is too short: %s\n", lifetime, skb->dev->name); @@ -1338,7 +1339,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) * accept_ra_from_local is set to true. */ net = dev_net(in6_dev->dev); - if (!in6_dev->cnf.accept_ra_from_local && + if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { ND_PRINTK(2, info, "RA from local address detected on dev: %s: default router ignored\n", @@ -1350,7 +1351,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) pref = ra_msg->icmph.icmp6_router_pref; /* 10b is handled as if it were 00b (medium) */ if (pref == ICMPV6_ROUTER_PREF_INVALID || - !in6_dev->cnf.accept_ra_rtr_pref) + !READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref)) pref = ICMPV6_ROUTER_PREF_MEDIUM; #endif /* routes added from RAs do not use nexthop objects */ @@ -1421,10 +1422,12 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) spin_unlock_bh(&table->tb6_lock); } - if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && + if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) < 256 && ra_msg->icmph.icmp6_hop_limit) { - if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) { - WRITE_ONCE(in6_dev->cnf.hop_limit, ra_msg->icmph.icmp6_hop_limit); + if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) <= + ra_msg->icmph.icmp6_hop_limit) { + WRITE_ONCE(in6_dev->cnf.hop_limit, + ra_msg->icmph.icmp6_hop_limit); fib6_metric_set(rt, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); } else { @@ -1506,7 +1509,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) } #ifdef CONFIG_IPV6_ROUTE_INFO - if (!in6_dev->cnf.accept_ra_from_local && + if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { ND_PRINTK(2, info, @@ -1515,7 +1518,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) goto skip_routeinfo; } - if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) { + if (READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref) && ndopts.nd_opts_ri) { struct nd_opt_hdr *p; for (p = ndopts.nd_opts_ri; p; @@ -1527,14 +1530,14 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) continue; #endif if (ri->prefix_len == 0 && - !in6_dev->cnf.accept_ra_defrtr) + !READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) continue; if (ri->lifetime != 0 && - ntohl(ri->lifetime) < in6_dev->cnf.accept_ra_min_lft) + ntohl(ri->lifetime) < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) continue; - if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen) + if (ri->prefix_len < READ_ONCE(in6_dev->cnf.accept_ra_rt_info_min_plen)) continue; - if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) + if (ri->prefix_len > READ_ONCE(in6_dev->cnf.accept_ra_rt_info_max_plen)) continue; rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, &ipv6_hdr(skb)->saddr); @@ -1554,7 +1557,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) } #endif - if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) { + if (READ_ONCE(in6_dev->cnf.accept_ra_pinfo) && ndopts.nd_opts_pi) { struct nd_opt_hdr *p; for (p = ndopts.nd_opts_pi; p; @@ -1565,7 +1568,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) } } - if (ndopts.nd_opts_mtu && in6_dev->cnf.accept_ra_mtu) { + if (ndopts.nd_opts_mtu && READ_ONCE(in6_dev->cnf.accept_ra_mtu)) { __be32 n; u32 mtu; From fca34cc075996767fbbdb6252be9ddd21c34c920 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:32 +0000 Subject: [PATCH 08/15] ipv6: annotate data-races around idev->cnf.ignore_routes_with_linkdown idev->cnf.ignore_routes_with_linkdown can be used without any locks, add appropriate annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 +- net/ipv6/addrconf.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 30d6f1e84e465..9d06eb945509e 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -417,7 +417,7 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) if (unlikely(!idev)) return true; - return !!idev->cnf.ignore_routes_with_linkdown; + return !!READ_ONCE(idev->cnf.ignore_routes_with_linkdown); } void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ec99e393f6a91..82b8d90afb6aa 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -566,7 +566,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, - devconf->ignore_routes_with_linkdown) < 0) + READ_ONCE(devconf->ignore_routes_with_linkdown)) < 0) goto nla_put_failure; out: @@ -935,7 +935,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf) if (idev) { int changed = (!idev->cnf.ignore_routes_with_linkdown) ^ (!newf); - idev->cnf.ignore_routes_with_linkdown = newf; + WRITE_ONCE(idev->cnf.ignore_routes_with_linkdown, newf); if (changed) inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, @@ -956,7 +956,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) net = (struct net *)table->extra2; old = *p; - *p = newf; + WRITE_ONCE(*p, newf); if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) { if ((!newf) ^ (!old)) @@ -970,7 +970,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) } if (p == &net->ipv6.devconf_all->ignore_routes_with_linkdown) { - net->ipv6.devconf_dflt->ignore_routes_with_linkdown = newf; + WRITE_ONCE(net->ipv6.devconf_dflt->ignore_routes_with_linkdown, newf); addrconf_linkdown_change(net, newf); if ((!newf) ^ (!old)) inet6_netconf_notify_devconf(net, From e248948a4471c6efca19f7c5e657b30ee67a6485 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:33 +0000 Subject: [PATCH 09/15] ipv6: annotate data-races in rt6_probe() Use READ_ONCE() while reading idev->cnf.rtr_probe_interval while its value could be changed. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/route.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6a2b53de48182..1b897c57c55fe 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -645,14 +645,15 @@ static void rt6_probe(struct fib6_nh *fib6_nh) write_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, - neigh->updated + idev->cnf.rtr_probe_interval)) { + neigh->updated + + READ_ONCE(idev->cnf.rtr_probe_interval))) { work = kmalloc(sizeof(*work), GFP_ATOMIC); if (work) __neigh_set_probe_once(neigh); } write_unlock_bh(&neigh->lock); } else if (time_after(jiffies, last_probe + - idev->cnf.rtr_probe_interval)) { + READ_ONCE(idev->cnf.rtr_probe_interval))) { work = kmalloc(sizeof(*work), GFP_ATOMIC); } From a8fbd4d90720b6c930661ed593d54aba77cec3c2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:34 +0000 Subject: [PATCH 10/15] ipv6: annotate data-races around devconf->proxy_ndp devconf->proxy_ndp can be read and written locklessly, add appropriate annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 ++- net/ipv6/ip6_output.c | 2 +- net/ipv6/ndisc.c | 5 +++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 82b8d90afb6aa..409aa5ddb8569 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -561,7 +561,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, goto nla_put_failure; #endif if ((all || type == NETCONFA_PROXY_NEIGH) && - nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) + nla_put_s32(skb, NETCONFA_PROXY_NEIGH, + READ_ONCE(devconf->proxy_ndp)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 444be8c84cc57..f08af3f4e54f5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -552,7 +552,7 @@ int ip6_forward(struct sk_buff *skb) } /* XXX: idev->cnf.proxy_ndp? */ - if (net->ipv6.devconf_all->proxy_ndp && + if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f6430db249401..4114918f12c88 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -904,7 +904,8 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(net, dev, &msg->target) || (READ_ONCE(idev->cnf.forwarding) && - (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) && + (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) || + READ_ONCE(idev->cnf.proxy_ndp)) && (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && @@ -1101,7 +1102,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && READ_ONCE(net->ipv6.devconf_all->forwarding) && - net->ipv6.devconf_all->proxy_ndp && + READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { /* XXX: idev->cnf.proxy_ndp */ goto out; From 624d5aec487cf8c2955d9c5880685714f7fe8e6f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:35 +0000 Subject: [PATCH 11/15] ipv6: annotate data-races around devconf->disable_policy idev->cnf.disable_policy and net->ipv6.devconf_all->disable_policy can be read locklessly. Add appropriate annotations on reads and writes. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/route.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 409aa5ddb8569..44340ff82da53 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6699,7 +6699,7 @@ int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val) if (!rtnl_trylock()) return restart_syscall(); - *valp = val; + WRITE_ONCE(*valp, val); net = (struct net *)ctl->extra2; if (valp == &net->ipv6.devconf_dflt->disable_policy) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f08af3f4e54f5..b9dd3a66e4236 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -513,8 +513,8 @@ int ip6_forward(struct sk_buff *skb) if (skb_warn_if_lro(skb)) goto drop; - if (!net->ipv6.devconf_all->disable_policy && - (!idev || !idev->cnf.disable_policy) && + if (!READ_ONCE(net->ipv6.devconf_all->disable_policy) && + (!idev || !READ_ONCE(idev->cnf.disable_policy)) && !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1b897c57c55fe..a92fcac902aea 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4584,8 +4584,8 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, f6i->dst_nocount = true; if (!anycast && - (net->ipv6.devconf_all->disable_policy || - idev->cnf.disable_policy)) + (READ_ONCE(net->ipv6.devconf_all->disable_policy) || + READ_ONCE(idev->cnf.disable_policy))) f6i->dst_nopolicy = true; } From 45b90ec9a214ff7fbde8190126301a07387c46e5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:36 +0000 Subject: [PATCH 12/15] ipv6: addrconf_disable_policy() optimization Writing over /proc/sys/net/ipv6/conf/default/disable_policy does not need to hold RTNL. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 44340ff82da53..0e7135a206b09 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6693,20 +6693,19 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) static int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val) { + struct net *net = (struct net *)ctl->extra2; struct inet6_dev *idev; - struct net *net; + + if (valp == &net->ipv6.devconf_dflt->disable_policy) { + WRITE_ONCE(*valp, val); + return 0; + } if (!rtnl_trylock()) return restart_syscall(); WRITE_ONCE(*valp, val); - net = (struct net *)ctl->extra2; - if (valp == &net->ipv6.devconf_dflt->disable_policy) { - rtnl_unlock(); - return 0; - } - if (valp == &net->ipv6.devconf_all->disable_policy) { struct net_device *dev; From 2aba913f99debce6cd1c7f606d69d094d1202559 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:37 +0000 Subject: [PATCH 13/15] ipv6/addrconf: annotate data-races around devconf fields (I) Annotate lockless reads and writes on following devconf fields: - regen_min_advance - regen_max_retry - dad_transmits - use_tempaddr - max_addresses - max_desync_factor - temp_valid_lft - rtr_solicits - rtr_solicit_max_interval - rtr_solicit_interval - rtr_solicit_delay - enhanced_dad - accept_redirects Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 67 +++++++++++++++++++++++++-------------------- net/ipv6/route.c | 3 +- 2 files changed, 39 insertions(+), 31 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0e7135a206b09..bcfe725f8fbd4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1359,11 +1359,12 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) in6_ifa_put(ifp); } -static unsigned long ipv6_get_regen_advance(struct inet6_dev *idev) +static unsigned long ipv6_get_regen_advance(const struct inet6_dev *idev) { - return idev->cnf.regen_min_advance + idev->cnf.regen_max_retry * - idev->cnf.dad_transmits * - max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; + return READ_ONCE(idev->cnf.regen_min_advance) + + READ_ONCE(idev->cnf.regen_max_retry) * + READ_ONCE(idev->cnf.dad_transmits) * + max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; } static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) @@ -1384,7 +1385,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) retry: in6_dev_hold(idev); - if (idev->cnf.use_tempaddr <= 0) { + if (READ_ONCE(idev->cnf.use_tempaddr) <= 0) { write_unlock_bh(&idev->lock); pr_info("%s: use_tempaddr is disabled\n", __func__); in6_dev_put(idev); @@ -1392,8 +1393,8 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) goto out; } spin_lock_bh(&ifp->lock); - if (ifp->regen_count++ >= idev->cnf.regen_max_retry) { - idev->cnf.use_tempaddr = -1; /*XXX*/ + if (ifp->regen_count++ >= READ_ONCE(idev->cnf.regen_max_retry)) { + WRITE_ONCE(idev->cnf.use_tempaddr, -1); /*XXX*/ spin_unlock_bh(&ifp->lock); write_unlock_bh(&idev->lock); pr_warn("%s: regeneration time exceeded - disabled temporary address support\n", @@ -1415,7 +1416,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) */ cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft); max_desync_factor = min_t(long, - idev->cnf.max_desync_factor, + READ_ONCE(idev->cnf.max_desync_factor), cnf_temp_preferred_lft - regen_advance); if (unlikely(idev->desync_factor > max_desync_factor)) { @@ -1432,7 +1433,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) memset(&cfg, 0, sizeof(cfg)); cfg.valid_lft = min_t(__u32, ifp->valid_lft, - idev->cnf.temp_valid_lft + age); + READ_ONCE(idev->cnf.temp_valid_lft) + age); cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor; cfg.preferred_lft = min_t(__u32, if_public_preferred_lft, cfg.preferred_lft); cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft); @@ -1685,7 +1686,7 @@ static int ipv6_get_saddr_eval(struct net *net, */ int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ? !!(dst->prefs & IPV6_PREFER_SRC_TMP) : - score->ifa->idev->cnf.use_tempaddr >= 2; + READ_ONCE(score->ifa->idev->cnf.use_tempaddr) >= 2; ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp; break; } @@ -2168,6 +2169,7 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net *net = dev_net(idev->dev); + int max_addresses; if (addrconf_dad_end(ifp)) { in6_ifa_put(ifp); @@ -2205,9 +2207,9 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp) spin_unlock_bh(&ifp->lock); - if (idev->cnf.max_addresses && - ipv6_count_addresses(idev) >= - idev->cnf.max_addresses) + max_addresses = READ_ONCE(idev->cnf.max_addresses); + if (max_addresses && + ipv6_count_addresses(idev) >= max_addresses) goto lock_errdad; net_info_ratelimited("%s: generating new stable privacy address because of DAD conflict\n", @@ -2604,11 +2606,11 @@ static void manage_tempaddrs(struct inet6_dev *idev, * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), respectively. */ age = (now - ift->cstamp) / HZ; - max_valid = idev->cnf.temp_valid_lft - age; + max_valid = READ_ONCE(idev->cnf.temp_valid_lft) - age; if (max_valid < 0) max_valid = 0; - max_prefered = idev->cnf.temp_prefered_lft - + max_prefered = READ_ONCE(idev->cnf.temp_prefered_lft) - idev->desync_factor - age; if (max_prefered < 0) max_prefered = 0; @@ -2641,7 +2643,7 @@ static void manage_tempaddrs(struct inet6_dev *idev, if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft)) create = true; - if (create && idev->cnf.use_tempaddr > 0) { + if (create && READ_ONCE(idev->cnf.use_tempaddr) > 0) { /* When a new public address is created as described * in [ADDRCONF], also create a new temporary address. */ @@ -2669,7 +2671,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, int create = 0, update_lft = 0; if (!ifp && valid_lft) { - int max_addresses = in6_dev->cnf.max_addresses; + int max_addresses = READ_ONCE(in6_dev->cnf.max_addresses); struct ifa6_config cfg = { .pfx = addr, .plen = pinfo->prefix_len, @@ -4028,6 +4030,7 @@ static void addrconf_rs_timer(struct timer_list *t) struct inet6_dev *idev = from_timer(idev, t, rs_timer); struct net_device *dev = idev->dev; struct in6_addr lladdr; + int rtr_solicits; write_lock(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) @@ -4040,7 +4043,9 @@ static void addrconf_rs_timer(struct timer_list *t) if (idev->if_flags & IF_RA_RCVD) goto out; - if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) { + rtr_solicits = READ_ONCE(idev->cnf.rtr_solicits); + + if (idev->rs_probes++ < rtr_solicits || rtr_solicits < 0) { write_unlock(&idev->lock); if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) ndisc_send_rs(dev, &lladdr, @@ -4050,11 +4055,12 @@ static void addrconf_rs_timer(struct timer_list *t) write_lock(&idev->lock); idev->rs_interval = rfc3315_s14_backoff_update( - idev->rs_interval, idev->cnf.rtr_solicit_max_interval); + idev->rs_interval, + READ_ONCE(idev->cnf.rtr_solicit_max_interval)); /* The wait after the last probe can be shorter */ addrconf_mod_rs_timer(idev, (idev->rs_probes == - idev->cnf.rtr_solicits) ? - idev->cnf.rtr_solicit_delay : + READ_ONCE(idev->cnf.rtr_solicits)) ? + READ_ONCE(idev->cnf.rtr_solicit_delay) : idev->rs_interval); } else { /* @@ -4075,24 +4081,25 @@ static void addrconf_rs_timer(struct timer_list *t) */ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) { - unsigned long rand_num; struct inet6_dev *idev = ifp->idev; + unsigned long rand_num; u64 nonce; if (ifp->flags & IFA_F_OPTIMISTIC) rand_num = 0; else - rand_num = get_random_u32_below(idev->cnf.rtr_solicit_delay ? : 1); + rand_num = get_random_u32_below( + READ_ONCE(idev->cnf.rtr_solicit_delay) ? : 1); nonce = 0; - if (idev->cnf.enhanced_dad || - dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad) { + if (READ_ONCE(idev->cnf.enhanced_dad) || + READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad)) { do get_random_bytes(&nonce, 6); while (nonce == 0); } ifp->dad_nonce = nonce; - ifp->dad_probes = idev->cnf.dad_transmits; + ifp->dad_probes = READ_ONCE(idev->cnf.dad_transmits); addrconf_mod_dad_work(ifp, rand_num); } @@ -4331,7 +4338,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp); send_rs = send_mld && ipv6_accept_ra(ifp->idev) && - ifp->idev->cnf.rtr_solicits != 0 && + READ_ONCE(ifp->idev->cnf.rtr_solicits) != 0 && (dev->flags & IFF_LOOPBACK) == 0 && (dev->type != ARPHRD_TUNNEL) && !netif_is_team_port(dev); @@ -4366,7 +4373,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, write_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); ifp->idev->rs_interval = rfc3315_s14_backoff_init( - ifp->idev->cnf.rtr_solicit_interval); + READ_ONCE(ifp->idev->cnf.rtr_solicit_interval)); ifp->idev->rs_probes = 1; ifp->idev->if_flags |= IF_RS_SENT; addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval); @@ -5915,7 +5922,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token, return -EINVAL; } - if (idev->cnf.rtr_solicits == 0) { + if (READ_ONCE(idev->cnf.rtr_solicits) == 0) { NL_SET_ERR_MSG(extack, "Router solicitation is disabled on device"); return -EINVAL; @@ -5948,7 +5955,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token, if (update_rs) { idev->if_flags |= IF_RS_SENT; idev->rs_interval = rfc3315_s14_backoff_init( - idev->cnf.rtr_solicit_interval); + READ_ONCE(idev->cnf.rtr_solicit_interval)); idev->rs_probes = 1; addrconf_mod_rs_timer(idev, idev->rs_interval); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a92fcac902aea..2cecb1c5a58f6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4150,7 +4150,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) return; - if (READ_ONCE(in6_dev->cnf.forwarding) || !in6_dev->cnf.accept_redirects) + if (READ_ONCE(in6_dev->cnf.forwarding) || + !READ_ONCE(in6_dev->cnf.accept_redirects)) return; /* RFC2461 8.1: From 2f0ff05a44302c91af54a5f9efe1b65b7681540e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:38 +0000 Subject: [PATCH 14/15] ipv6/addrconf: annotate data-races around devconf fields (II) Final (?) round of this series. Annotate lockless reads on following devconf fields, because they be changed concurrently from /proc/net/ipv6/conf. - accept_dad - optimistic_dad - use_optimistic - use_oif_addrs_only - ra_honor_pio_life - keep_addr_on_down - ndisc_notify - ndisc_evict_nocarrier - suppress_frag_ndisc - addr_gen_mode - seg6_enabled - ioam6_enabled - ioam6_id - ioam6_id_wide - drop_unicast_in_l2_multicast - mldv[12]_unsolicited_report_interval - force_mld_version - force_tllao - accept_untracked_na - drop_unsolicited_na - accept_source_route Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 49 +++++++++++++++++++++++--------------------- net/ipv6/exthdrs.c | 16 ++++++++------- net/ipv6/ioam6.c | 8 ++++---- net/ipv6/ip6_input.c | 2 +- net/ipv6/mcast.c | 14 ++++++------- net/ipv6/ndisc.c | 18 ++++++++-------- net/ipv6/seg6_hmac.c | 8 +++++--- 7 files changed, 61 insertions(+), 54 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index bcfe725f8fbd4..436d9a6e6a82e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1557,15 +1557,17 @@ static inline int ipv6_saddr_preferred(int type) return 0; } -static bool ipv6_use_optimistic_addr(struct net *net, - struct inet6_dev *idev) +static bool ipv6_use_optimistic_addr(const struct net *net, + const struct inet6_dev *idev) { #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (!idev) return false; - if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad) + if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) && + !READ_ONCE(idev->cnf.optimistic_dad)) return false; - if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic) + if (!READ_ONCE(net->ipv6.devconf_all->use_optimistic) && + !READ_ONCE(idev->cnf.use_optimistic)) return false; return true; @@ -1574,13 +1576,14 @@ static bool ipv6_use_optimistic_addr(struct net *net, #endif } -static bool ipv6_allow_optimistic_dad(struct net *net, - struct inet6_dev *idev) +static bool ipv6_allow_optimistic_dad(const struct net *net, + const struct inet6_dev *idev) { #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (!idev) return false; - if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad) + if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) && + !READ_ONCE(idev->cnf.optimistic_dad)) return false; return true; @@ -1862,7 +1865,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, idev = __in6_dev_get(dst_dev); if ((dst_type & IPV6_ADDR_MULTICAST) || dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL || - (idev && idev->cnf.use_oif_addrs_only)) { + (idev && READ_ONCE(idev->cnf.use_oif_addrs_only))) { use_oif_addr = true; } } @@ -2683,8 +2686,8 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, }; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - if ((net->ipv6.devconf_all->optimistic_dad || - in6_dev->cnf.optimistic_dad) && + if ((READ_ONCE(net->ipv6.devconf_all->optimistic_dad) || + READ_ONCE(in6_dev->cnf.optimistic_dad)) && !net->ipv6.devconf_all->forwarding && sllao) cfg.ifa_flags |= IFA_F_OPTIMISTIC; #endif @@ -2733,7 +2736,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, */ update_lft = !create && stored_lft; - if (update_lft && !in6_dev->cnf.ra_honor_pio_life) { + if (update_lft && !READ_ONCE(in6_dev->cnf.ra_honor_pio_life)) { const u32 minimum_lft = min_t(u32, stored_lft, MIN_VALID_LIFETIME); valid_lft = max(valid_lft, minimum_lft); @@ -3317,8 +3320,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev, struct inet6_ifaddr *ifp; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad || - idev->cnf.optimistic_dad) && + if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad) || + READ_ONCE(idev->cnf.optimistic_dad)) && !dev_net(idev->dev)->ipv6.devconf_all->forwarding) cfg.ifa_flags |= IFA_F_OPTIMISTIC; #endif @@ -3890,10 +3893,10 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) */ if (!unregister && !idev->cnf.disable_ipv6) { /* aggregate the system setting and interface setting */ - int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down; + int _keep_addr = READ_ONCE(net->ipv6.devconf_all->keep_addr_on_down); if (!_keep_addr) - _keep_addr = idev->cnf.keep_addr_on_down; + _keep_addr = READ_ONCE(idev->cnf.keep_addr_on_down); keep_addr = (_keep_addr > 0); } @@ -4119,8 +4122,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) net = dev_net(dev); if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - (net->ipv6.devconf_all->accept_dad < 1 && - idev->cnf.accept_dad < 1) || + (READ_ONCE(net->ipv6.devconf_all->accept_dad) < 1 && + READ_ONCE(idev->cnf.accept_dad) < 1) || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { bool send_na = false; @@ -4212,8 +4215,8 @@ static void addrconf_dad_work(struct work_struct *w) action = DAD_ABORT; ifp->state = INET6_IFADDR_STATE_POSTDAD; - if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 || - idev->cnf.accept_dad > 1) && + if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->accept_dad) > 1 || + READ_ONCE(idev->cnf.accept_dad) > 1) && !idev->cnf.disable_ipv6 && !(ifp->flags & IFA_F_STABLE_PRIVACY)) { struct in6_addr addr; @@ -4352,8 +4355,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, /* send unsolicited NA if enabled */ if (send_na && - (ifp->idev->cnf.ndisc_notify || - dev_net(dev)->ipv6.devconf_all->ndisc_notify)) { + (READ_ONCE(ifp->idev->cnf.ndisc_notify) || + READ_ONCE(dev_net(dev)->ipv6.devconf_all->ndisc_notify))) { ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr, /*router=*/ !!ifp->idev->cnf.forwarding, /*solicited=*/ false, /*override=*/ true, @@ -6541,7 +6544,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) { struct net_device *dev; - net->ipv6.devconf_dflt->addr_gen_mode = new_val; + WRITE_ONCE(net->ipv6.devconf_dflt->addr_gen_mode, new_val); for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev && @@ -6553,7 +6556,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, } } - *((u32 *)ctl->data) = new_val; + WRITE_ONCE(*((u32 *)ctl->data), new_val); } out: diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 727792907d6cb..26f1e4a5ade05 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -379,9 +379,8 @@ static int ipv6_srh_rcv(struct sk_buff *skb) idev = __in6_dev_get(skb->dev); - accept_seg6 = net->ipv6.devconf_all->seg6_enabled; - if (accept_seg6 > idev->cnf.seg6_enabled) - accept_seg6 = idev->cnf.seg6_enabled; + accept_seg6 = min(READ_ONCE(net->ipv6.devconf_all->seg6_enabled), + READ_ONCE(idev->cnf.seg6_enabled)); if (!accept_seg6) { kfree_skb(skb); @@ -655,10 +654,13 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) struct ipv6_rt_hdr *hdr; struct rt0_hdr *rthdr; struct net *net = dev_net(skb->dev); - int accept_source_route = net->ipv6.devconf_all->accept_source_route; + int accept_source_route; - if (idev && accept_source_route > idev->cnf.accept_source_route) - accept_source_route = idev->cnf.accept_source_route; + accept_source_route = READ_ONCE(net->ipv6.devconf_all->accept_source_route); + + if (idev) + accept_source_route = min(accept_source_route, + READ_ONCE(idev->cnf.accept_source_route)); if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + @@ -919,7 +921,7 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) goto drop; /* Ignore if IOAM is not enabled on ingress */ - if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled) + if (!READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_enabled)) goto ignore; /* Truncated Option header */ diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index 5fa923f066321..08c9295130657 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -727,7 +727,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (!skb->dev) raw16 = IOAM6_U16_UNAVAILABLE; else - raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id; + raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); @@ -735,7 +735,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) raw16 = IOAM6_U16_UNAVAILABLE; else - raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id; + raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); @@ -822,7 +822,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (!skb->dev) raw32 = IOAM6_U32_UNAVAILABLE; else - raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide; + raw32 = READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); @@ -830,7 +830,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) raw32 = IOAM6_U32_UNAVAILABLE; else - raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide; + raw32 = READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 1ba97933c74fb..133610a49da6b 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -236,7 +236,7 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, if (!ipv6_addr_is_multicast(&hdr->daddr) && (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) && - idev->cnf.drop_unicast_in_l2_multicast) { + READ_ONCE(idev->cnf.drop_unicast_in_l2_multicast)) { SKB_DR_SET(reason, UNICAST_IN_L2_MULTICAST); goto err; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 76ee1615ff2a0..7ba01d8cfbae8 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -159,9 +159,9 @@ static int unsolicited_report_interval(struct inet6_dev *idev) int iv; if (mld_in_v1_mode(idev)) - iv = idev->cnf.mldv1_unsolicited_report_interval; + iv = READ_ONCE(idev->cnf.mldv1_unsolicited_report_interval); else - iv = idev->cnf.mldv2_unsolicited_report_interval; + iv = READ_ONCE(idev->cnf.mldv2_unsolicited_report_interval); return iv > 0 ? iv : 1; } @@ -1202,15 +1202,15 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, static int mld_force_mld_version(const struct inet6_dev *idev) { + const struct net *net = dev_net(idev->dev); + int all_force; + + all_force = READ_ONCE(net->ipv6.devconf_all->force_mld_version); /* Normally, both are 0 here. If enforcement to a particular is * being used, individual device enforcement will have a lower * precedence over 'all' device (.../conf/all/force_mld_version). */ - - if (dev_net(idev->dev)->ipv6.devconf_all->force_mld_version != 0) - return dev_net(idev->dev)->ipv6.devconf_all->force_mld_version; - else - return idev->cnf.force_mld_version; + return all_force ?: READ_ONCE(idev->cnf.force_mld_version); } static bool mld_in_v2_mode_only(const struct inet6_dev *idev) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 4114918f12c88..ae134634c323c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -451,7 +451,7 @@ static void ip6_nd_hdr(struct sk_buff *skb, rcu_read_lock(); idev = __in6_dev_get(skb->dev); - tclass = idev ? idev->cnf.ndisc_tclass : 0; + tclass = idev ? READ_ONCE(idev->cnf.ndisc_tclass) : 0; rcu_read_unlock(); skb_push(skb, sizeof(*hdr)); @@ -535,7 +535,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) override = false; - inc_opt |= ifp->idev->cnf.force_tllao; + inc_opt |= READ_ONCE(ifp->idev->cnf.force_tllao); in6_ifa_put(ifp); } else { if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr, @@ -974,7 +974,7 @@ static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr) { struct inet6_dev *idev = __in6_dev_get(dev); - switch (idev->cnf.accept_untracked_na) { + switch (READ_ONCE(idev->cnf.accept_untracked_na)) { case 0: /* Don't accept untracked na (absent in neighbor cache) */ return 0; case 1: /* Create new entries from na if currently untracked */ @@ -1025,7 +1025,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) * drop_unsolicited_na takes precedence over accept_untracked_na */ if (!msg->icmph.icmp6_solicited && idev && - idev->cnf.drop_unsolicited_na) + READ_ONCE(idev->cnf.drop_unsolicited_na)) return reason; if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) @@ -1818,7 +1818,7 @@ static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb) if (!idev) return true; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED && - idev->cnf.suppress_frag_ndisc) { + READ_ONCE(idev->cnf.suppress_frag_ndisc)) { net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n"); return true; } @@ -1895,8 +1895,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, idev = in6_dev_get(dev); if (!idev) break; - if (idev->cnf.ndisc_notify || - net->ipv6.devconf_all->ndisc_notify) + if (READ_ONCE(idev->cnf.ndisc_notify) || + READ_ONCE(net->ipv6.devconf_all->ndisc_notify)) ndisc_send_unsol_na(dev); in6_dev_put(idev); break; @@ -1905,8 +1905,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, if (!idev) evict_nocarrier = true; else { - evict_nocarrier = idev->cnf.ndisc_evict_nocarrier && - net->ipv6.devconf_all->ndisc_evict_nocarrier; + evict_nocarrier = READ_ONCE(idev->cnf.ndisc_evict_nocarrier) && + READ_ONCE(net->ipv6.devconf_all->ndisc_evict_nocarrier); in6_dev_put(idev); } diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index d43c50a7310d6..861e0366f549d 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -241,6 +241,7 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb) struct sr6_tlv_hmac *tlv; struct ipv6_sr_hdr *srh; struct inet6_dev *idev; + int require_hmac; idev = __in6_dev_get(skb->dev); @@ -248,16 +249,17 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb) tlv = seg6_get_tlv_hmac(srh); + require_hmac = READ_ONCE(idev->cnf.seg6_require_hmac); /* mandatory check but no tlv */ - if (idev->cnf.seg6_require_hmac > 0 && !tlv) + if (require_hmac > 0 && !tlv) return false; /* no check */ - if (idev->cnf.seg6_require_hmac < 0) + if (require_hmac < 0) return true; /* check only if present */ - if (idev->cnf.seg6_require_hmac == 0 && !tlv) + if (require_hmac == 0 && !tlv) return true; /* now, seg6_require_hmac >= 0 && tlv */ From 2a02f8379bde89472cddb49dc8e55a17e0b68eed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:39 +0000 Subject: [PATCH 15/15] ipv6: use xa_array iterator to implement inet6_netconf_dump_devconf() 1) inet6_netconf_dump_devconf() can run under RCU protection instead of RTNL. 2) properly return 0 at the end of a dump, avoiding an an extra recvmsg() system call. 3) Do not use inet6_base_seq() anymore, for_each_netdev_dump() has nice properties. Restarting a GETDEVCONF dump if a device has been added/removed or if net->ipv6.dev_addr_genid has changed is moot. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 102 +++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 58 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 436d9a6e6a82e..2f84e6ecf19f4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -727,17 +727,18 @@ static u32 inet6_base_seq(const struct net *net) return res; } - static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); - int h, s_h; - int idx, s_idx; + struct { + unsigned long ifindex; + unsigned int all_default; + } *ctx = (void *)cb->ctx; struct net_device *dev; struct inet6_dev *idev; - struct hlist_head *head; + int err = 0; if (cb->strict_check) { struct netlink_ext_ack *extack = cb->extack; @@ -754,64 +755,48 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, } } - s_h = cb->args[0]; - s_idx = idx = cb->args[1]; - - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - rcu_read_lock(); - cb->seq = inet6_base_seq(net); - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - idev = __in6_dev_get(dev); - if (!idev) - goto cont; - - if (inet6_netconf_fill_devconf(skb, dev->ifindex, - &idev->cnf, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, - NLM_F_MULTI, - NETCONFA_ALL) < 0) { - rcu_read_unlock(); - goto done; - } - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; - } - rcu_read_unlock(); + rcu_read_lock(); + for_each_netdev_dump(net, dev, ctx->ifindex) { + idev = __in6_dev_get(dev); + if (!idev) + continue; + err = inet6_netconf_fill_devconf(skb, dev->ifindex, + &idev->cnf, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, + NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) + goto done; } - if (h == NETDEV_HASHENTRIES) { - if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, - net->ipv6.devconf_all, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, NLM_F_MULTI, - NETCONFA_ALL) < 0) + if (ctx->all_default == 0) { + err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) goto done; - else - h++; - } - if (h == NETDEV_HASHENTRIES + 1) { - if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, - net->ipv6.devconf_dflt, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, NLM_F_MULTI, - NETCONFA_ALL) < 0) + ctx->all_default++; + } + if (ctx->all_default == 1) { + err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) goto done; - else - h++; + ctx->all_default++; } done: - cb->args[0] = h; - cb->args[1] = idx; - - return skb->len; + if (err < 0 && likely(skb->len)) + err = skb->len; + rcu_read_unlock(); + return err; } #ifdef CONFIG_SYSCTL @@ -7503,7 +7488,8 @@ int __init addrconf_init(void) err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, inet6_netconf_dump_devconf, - RTNL_FLAG_DOIT_UNLOCKED); + RTNL_FLAG_DOIT_UNLOCKED | + RTNL_FLAG_DUMP_UNLOCKED); if (err < 0) goto errout; err = ipv6_addr_label_rtnl_register();