From f7a6082b5e4c15f34fd766cf0960f7e082009c54 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 15 Jan 2025 17:05:58 +0900 Subject: [PATCH 01/11] ipv6: Add __in6_dev_get_rtnl_net(). We will convert rtnl_lock() with rtnl_net_lock(), and we want to convert __in6_dev_get() too. __in6_dev_get() uses rcu_dereference_rtnl(), but as written in its comment, rtnl_dereference() or rcu_dereference() is preferable. Let's add __in6_dev_get_rtnl_net() that uses rtnl_net_dereference(). We can add the RCU version helper later if needed. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115080608.28127-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/addrconf.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f8f91b2038eaf..9e5e95988b9e5 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -347,6 +347,11 @@ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev) return rcu_dereference_rtnl(dev->ip6_ptr); } +static inline struct inet6_dev *__in6_dev_get_rtnl_net(const struct net_device *dev) +{ + return rtnl_net_dereference(dev_net(dev), dev->ip6_ptr); +} + /** * __in6_dev_stats_get - get inet6_dev pointer for stats * @dev: network device From 93c839e3edbe2cdf59d3bc12d776cce4cdf159ce Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 15 Jan 2025 17:05:59 +0900 Subject: [PATCH 02/11] ipv6: Convert net.ipv6.conf.${DEV}.XXX sysctl to per-netns RTNL. net.ipv6.conf.${DEV}.XXX sysctl are changed under RTNL: * forwarding * ignore_routes_with_linkdown * disable_ipv6 * proxy_ndp * addr_gen_mode * stable_secret * disable_policy Let's use rtnl_net_lock() there. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115080608.28127-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 60 ++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c3729382be3bd..fb0ef98c79b01 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -852,7 +852,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf) struct inet6_dev *idev; for_each_netdev(net, dev) { - idev = __in6_dev_get(dev); + idev = __in6_dev_get_rtnl_net(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); @@ -865,13 +865,12 @@ static void addrconf_forward_change(struct net *net, __s32 newf) static int addrconf_fixup_forwarding(const struct ctl_table *table, int *p, int newf) { - struct net *net; + struct net *net = (struct net *)table->extra2; int old; - if (!rtnl_trylock()) + if (!rtnl_net_trylock(net)) return restart_syscall(); - net = (struct net *)table->extra2; old = *p; WRITE_ONCE(*p, newf); @@ -881,7 +880,7 @@ static int addrconf_fixup_forwarding(const struct ctl_table *table, int *p, int NETCONFA_FORWARDING, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt); - rtnl_unlock(); + rtnl_net_unlock(net); return 0; } @@ -903,7 +902,7 @@ static int addrconf_fixup_forwarding(const struct ctl_table *table, int *p, int net->ipv6.devconf_all); } else if ((!newf) ^ (!old)) dev_forward_change((struct inet6_dev *)table->extra1); - rtnl_unlock(); + rtnl_net_unlock(net); if (newf) rt6_purge_dflt_routers(net); @@ -916,7 +915,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf) struct inet6_dev *idev; for_each_netdev(net, dev) { - idev = __in6_dev_get(dev); + idev = __in6_dev_get_rtnl_net(dev); if (idev) { int changed = (!idev->cnf.ignore_routes_with_linkdown) ^ (!newf); @@ -933,13 +932,12 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf) static int addrconf_fixup_linkdown(const struct ctl_table *table, int *p, int newf) { - struct net *net; + struct net *net = (struct net *)table->extra2; int old; - if (!rtnl_trylock()) + if (!rtnl_net_trylock(net)) return restart_syscall(); - net = (struct net *)table->extra2; old = *p; WRITE_ONCE(*p, newf); @@ -950,7 +948,7 @@ static int addrconf_fixup_linkdown(const struct ctl_table *table, int *p, int ne NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt); - rtnl_unlock(); + rtnl_net_unlock(net); return 0; } @@ -964,7 +962,8 @@ static int addrconf_fixup_linkdown(const struct ctl_table *table, int *p, int ne NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); } - rtnl_unlock(); + + rtnl_net_unlock(net); return 1; } @@ -6370,7 +6369,7 @@ static void addrconf_disable_change(struct net *net, __s32 newf) struct inet6_dev *idev; for_each_netdev(net, dev) { - idev = __in6_dev_get(dev); + idev = __in6_dev_get_rtnl_net(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); @@ -6391,7 +6390,7 @@ static int addrconf_disable_ipv6(const struct ctl_table *table, int *p, int newf return 0; } - if (!rtnl_trylock()) + if (!rtnl_net_trylock(net)) return restart_syscall(); old = *p; @@ -6400,10 +6399,11 @@ static int addrconf_disable_ipv6(const struct ctl_table *table, int *p, int newf if (p == &net->ipv6.devconf_all->disable_ipv6) { WRITE_ONCE(net->ipv6.devconf_dflt->disable_ipv6, newf); addrconf_disable_change(net, newf); - } else if ((!newf) ^ (!old)) + } else if ((!newf) ^ (!old)) { dev_disable_change((struct inet6_dev *)table->extra1); + } - rtnl_unlock(); + rtnl_net_unlock(net); return 0; } @@ -6446,20 +6446,20 @@ static int addrconf_sysctl_proxy_ndp(const struct ctl_table *ctl, int write, if (write && old != new) { struct net *net = ctl->extra2; - if (!rtnl_trylock()) + if (!rtnl_net_trylock(net)) return restart_syscall(); - if (valp == &net->ipv6.devconf_dflt->proxy_ndp) + if (valp == &net->ipv6.devconf_dflt->proxy_ndp) { inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_PROXY_NEIGH, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt); - else if (valp == &net->ipv6.devconf_all->proxy_ndp) + } else if (valp == &net->ipv6.devconf_all->proxy_ndp) { inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_PROXY_NEIGH, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); - else { + } else { struct inet6_dev *idev = ctl->extra1; inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, @@ -6467,7 +6467,7 @@ static int addrconf_sysctl_proxy_ndp(const struct ctl_table *ctl, int write, idev->dev->ifindex, &idev->cnf); } - rtnl_unlock(); + rtnl_net_unlock(net); } return ret; @@ -6487,7 +6487,7 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write, .mode = ctl->mode, }; - if (!rtnl_trylock()) + if (!rtnl_net_trylock(net)) return restart_syscall(); new_val = *((u32 *)ctl->data); @@ -6517,7 +6517,7 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write, WRITE_ONCE(net->ipv6.devconf_dflt->addr_gen_mode, new_val); for_each_netdev(net, dev) { - idev = __in6_dev_get(dev); + idev = __in6_dev_get_rtnl_net(dev); if (idev && idev->cnf.addr_gen_mode != new_val) { WRITE_ONCE(idev->cnf.addr_gen_mode, @@ -6531,7 +6531,7 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write, } out: - rtnl_unlock(); + rtnl_net_unlock(net); return ret; } @@ -6553,7 +6553,7 @@ static int addrconf_sysctl_stable_secret(const struct ctl_table *ctl, int write, lctl.maxlen = IPV6_MAX_STRLEN; lctl.data = str; - if (!rtnl_trylock()) + if (!rtnl_net_trylock(net)) return restart_syscall(); if (!write && !secret->initialized) { @@ -6583,7 +6583,7 @@ static int addrconf_sysctl_stable_secret(const struct ctl_table *ctl, int write, struct net_device *dev; for_each_netdev(net, dev) { - struct inet6_dev *idev = __in6_dev_get(dev); + struct inet6_dev *idev = __in6_dev_get_rtnl_net(dev); if (idev) { WRITE_ONCE(idev->cnf.addr_gen_mode, @@ -6598,7 +6598,7 @@ static int addrconf_sysctl_stable_secret(const struct ctl_table *ctl, int write, } out: - rtnl_unlock(); + rtnl_net_unlock(net); return err; } @@ -6682,7 +6682,7 @@ int addrconf_disable_policy(const struct ctl_table *ctl, int *valp, int val) return 0; } - if (!rtnl_trylock()) + if (!rtnl_net_trylock(net)) return restart_syscall(); WRITE_ONCE(*valp, val); @@ -6691,7 +6691,7 @@ int addrconf_disable_policy(const struct ctl_table *ctl, int *valp, int val) struct net_device *dev; for_each_netdev(net, dev) { - idev = __in6_dev_get(dev); + idev = __in6_dev_get_rtnl_net(dev); if (idev) addrconf_disable_policy_idev(idev, val); } @@ -6700,7 +6700,7 @@ int addrconf_disable_policy(const struct ctl_table *ctl, int *valp, int val) addrconf_disable_policy_idev(idev, val); } - rtnl_unlock(); + rtnl_net_unlock(net); return 0; } From 6550ba0863f9e015cf4ca9b668e44b28fc6fe896 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 15 Jan 2025 17:06:00 +0900 Subject: [PATCH 03/11] ipv6: Hold rtnl_net_lock() in addrconf_verify_work(). addrconf_verify_work() is per-netns work to call addrconf_verify_rtnl() under RTNL. Let's use rtnl_net_lock(). Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115080608.28127-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fb0ef98c79b01..fe85cb2d49c84 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4751,9 +4751,9 @@ static void addrconf_verify_work(struct work_struct *w) struct net *net = container_of(to_delayed_work(w), struct net, ipv6.addr_chk_work); - rtnl_lock(); + rtnl_net_lock(net); addrconf_verify_rtnl(net); - rtnl_unlock(); + rtnl_net_unlock(net); } static void addrconf_verify(struct net *net) From 02cdd78b4e8d411163327ecfdd54c3208baebc0e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 15 Jan 2025 17:06:01 +0900 Subject: [PATCH 04/11] ipv6: Hold rtnl_net_lock() in addrconf_dad_work(). addrconf_dad_work() is per-address work and holds RTNL internally. We can fetch netns as dev_net(ifp->idev->dev). Let's use rtnl_net_lock(). Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115080608.28127-5-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fe85cb2d49c84..e297cd6f9fd23 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4204,6 +4204,7 @@ static void addrconf_dad_work(struct work_struct *w) struct inet6_dev *idev = ifp->idev; bool bump_id, disable_ipv6 = false; struct in6_addr mcaddr; + struct net *net; enum { DAD_PROCESS, @@ -4211,7 +4212,9 @@ static void addrconf_dad_work(struct work_struct *w) DAD_ABORT, } action = DAD_PROCESS; - rtnl_lock(); + net = dev_net(idev->dev); + + rtnl_net_lock(net); spin_lock_bh(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_PREDAD) { @@ -4221,7 +4224,7 @@ static void addrconf_dad_work(struct work_struct *w) action = DAD_ABORT; ifp->state = INET6_IFADDR_STATE_POSTDAD; - if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->accept_dad) > 1 || + if ((READ_ONCE(net->ipv6.devconf_all->accept_dad) > 1 || READ_ONCE(idev->cnf.accept_dad) > 1) && !idev->cnf.disable_ipv6 && !(ifp->flags & IFA_F_STABLE_PRIVACY)) { @@ -4303,7 +4306,7 @@ static void addrconf_dad_work(struct work_struct *w) ifp->dad_nonce); out: in6_ifa_put(ifp); - rtnl_unlock(); + rtnl_net_unlock(net); } /* ifp->idev must be at least read locked */ From cdc5c1196ee9bcb12979d2599ed00dc187d989f1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 15 Jan 2025 17:06:02 +0900 Subject: [PATCH 05/11] ipv6: Hold rtnl_net_lock() in addrconf_init() and addrconf_cleanup(). addrconf_init() holds RTNL for blackhole_netdev, which is the global device in init_net. addrconf_cleanup() holds RTNL to clean up devices in init_net too. Let's use rtnl_net_lock(&init_net) there. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115080608.28127-6-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e297cd6f9fd23..4d1ec290a259c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7460,9 +7460,9 @@ int __init addrconf_init(void) goto out_nowq; } - rtnl_lock(); + rtnl_net_lock(&init_net); idev = ipv6_add_dev(blackhole_netdev); - rtnl_unlock(); + rtnl_net_unlock(&init_net); if (IS_ERR(idev)) { err = PTR_ERR(idev); goto errlo; @@ -7512,17 +7512,17 @@ void addrconf_cleanup(void) rtnl_af_unregister(&inet6_ops); - rtnl_lock(); + rtnl_net_lock(&init_net); /* clean dev list */ for_each_netdev(&init_net, dev) { - if (__in6_dev_get(dev) == NULL) + if (!__in6_dev_get_rtnl_net(dev)) continue; addrconf_ifdown(dev, true); } addrconf_ifdown(init_net.loopback_dev, true); - rtnl_unlock(); + rtnl_net_unlock(&init_net); destroy_workqueue(addrconf_wq); } From 832128cc4438ab69c801bc6130756203eae1198e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 15 Jan 2025 17:06:03 +0900 Subject: [PATCH 06/11] ipv6: Convert inet6_ioctl() to per-netns RTNL. These functions are called from inet6_ioctl() with a socket's netns and hold RTNL. * SIOCSIFADDR : addrconf_add_ifaddr() * SIOCDIFADDR : addrconf_del_ifaddr() * SIOCSIFDSTADDR : addrconf_set_dstaddr() Let's use rtnl_net_lock(). Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115080608.28127-7-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4d1ec290a259c..9c7257b28a84f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2979,11 +2979,11 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) return -EFAULT; - rtnl_lock(); + rtnl_net_lock(net); dev = __dev_get_by_index(net, ireq.ifr6_ifindex); if (dev && dev->type == ARPHRD_SIT) err = addrconf_set_sit_dstaddr(net, dev, &ireq); - rtnl_unlock(); + rtnl_net_unlock(net); return err; } @@ -3181,9 +3181,9 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) cfg.pfx = &ireq.ifr6_addr; cfg.plen = ireq.ifr6_prefixlen; - rtnl_lock(); + rtnl_net_lock(net); err = inet6_addr_add(net, ireq.ifr6_ifindex, &cfg, NULL); - rtnl_unlock(); + rtnl_net_unlock(net); return err; } @@ -3198,10 +3198,10 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg) if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) return -EFAULT; - rtnl_lock(); + rtnl_net_lock(net); err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr, ireq.ifr6_prefixlen, NULL); - rtnl_unlock(); + rtnl_net_unlock(net); return err; } From f7fce98a73df01901c8d67f1e7cd66a05fed148f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 15 Jan 2025 17:06:04 +0900 Subject: [PATCH 07/11] ipv6: Pass dev to inet6_addr_add(). inet6_addr_add() is called from inet6_rtm_newaddr() and addrconf_add_ifaddr(). inet6_addr_add() looks up dev by __dev_get_by_index(), but it's already done in inet6_rtm_newaddr(). Let's move the 2nd lookup to addrconf_add_ifaddr() and pass dev to inet6_addr_add(). Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115080608.28127-8-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9c7257b28a84f..0e7ca74012aaa 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3007,13 +3007,12 @@ static int ipv6_mc_config(struct sock *sk, bool join, /* * Manual configuration of address on an interface */ -static int inet6_addr_add(struct net *net, int ifindex, +static int inet6_addr_add(struct net *net, struct net_device *dev, struct ifa6_config *cfg, struct netlink_ext_ack *extack) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; - struct net_device *dev; unsigned long timeout; clock_t expires; u32 flags; @@ -3036,10 +3035,6 @@ static int inet6_addr_add(struct net *net, int ifindex, return -EINVAL; } - dev = __dev_get_by_index(net, ifindex); - if (!dev) - return -ENODEV; - idev = addrconf_add_dev(dev); if (IS_ERR(idev)) { NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device"); @@ -3048,7 +3043,7 @@ static int inet6_addr_add(struct net *net, int ifindex, if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) { int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk, - true, cfg->pfx, ifindex); + true, cfg->pfx, dev->ifindex); if (ret < 0) { NL_SET_ERR_MSG_MOD(extack, "Multicast auto join failed"); @@ -3103,7 +3098,7 @@ static int inet6_addr_add(struct net *net, int ifindex, return 0; } else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) { ipv6_mc_config(net->ipv6.mc_autojoin_sk, false, - cfg->pfx, ifindex); + cfg->pfx, dev->ifindex); } return PTR_ERR(ifp); @@ -3169,6 +3164,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) .preferred_lft = INFINITY_LIFE_TIME, .valid_lft = INFINITY_LIFE_TIME, }; + struct net_device *dev; struct in6_ifreq ireq; int err; @@ -3182,7 +3178,11 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) cfg.plen = ireq.ifr6_prefixlen; rtnl_net_lock(net); - err = inet6_addr_add(net, ireq.ifr6_ifindex, &cfg, NULL); + dev = __dev_get_by_index(net, ireq.ifr6_ifindex); + if (dev) + err = inet6_addr_add(net, dev, &cfg, NULL); + else + err = -ENODEV; rtnl_net_unlock(net); return err; } @@ -5064,7 +5064,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, * It would be best to check for !NLM_F_CREATE here but * userspace already relies on not having to provide this. */ - return inet6_addr_add(net, ifm->ifa_index, &cfg, extack); + return inet6_addr_add(net, dev, &cfg, extack); } if (nlh->nlmsg_flags & NLM_F_EXCL || From 2f1ace4127fdfc102603f359756b230f3d897411 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 15 Jan 2025 17:06:05 +0900 Subject: [PATCH 08/11] ipv6: Set cfg.ifa_flags before device lookup in inet6_rtm_newaddr(). We will convert inet6_rtm_newaddr() to per-netns RTNL. Except for IFA_F_OPTIMISTIC, cfg.ifa_flags can be set before __dev_get_by_index(). Let's move ifa_flags setup before __dev_get_by_index() so that we can set ifa_flags without RTNL. Also, now it's moved before tb[IFA_CACHEINFO] in preparing for the next patch. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115080608.28127-9-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0e7ca74012aaa..9720ff17f0a1e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5021,6 +5021,13 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[IFA_PROTO]) cfg.ifa_proto = nla_get_u8(tb[IFA_PROTO]); + cfg.ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags); + + /* We ignore other flags so far. */ + cfg.ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | + IFA_F_MANAGETEMPADDR | IFA_F_NOPREFIXROUTE | + IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC; + cfg.valid_lft = INFINITY_LIFE_TIME; cfg.preferred_lft = INFINITY_LIFE_TIME; @@ -5038,13 +5045,6 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENODEV; } - cfg.ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags); - - /* We ignore other flags so far. */ - cfg.ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | - IFA_F_MANAGETEMPADDR | IFA_F_NOPREFIXROUTE | - IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC; - idev = ipv6_find_idev(dev); if (IS_ERR(idev)) return PTR_ERR(idev); From 867b385251eac666685f7f4132315a1e3619baa5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 15 Jan 2025 17:06:06 +0900 Subject: [PATCH 09/11] ipv6: Move lifetime validation to inet6_rtm_newaddr(). inet6_addr_add() and inet6_addr_modify() have the same code to validate IPv6 lifetime that is done under RTNL. Let's factorise it out to inet6_rtm_newaddr() so that we can validate the lifetime without RTNL later. Note that inet6_addr_add() is called from addrconf_add_ifaddr(), but the lifetime is INFINITY_LIFE_TIME in the path, so expires and flags are 0. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115080608.28127-10-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 93 +++++++++++++++++---------------------------- 1 file changed, 35 insertions(+), 58 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9720ff17f0a1e..9ae25a8d16329 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3008,14 +3008,11 @@ static int ipv6_mc_config(struct sock *sk, bool join, * Manual configuration of address on an interface */ static int inet6_addr_add(struct net *net, struct net_device *dev, - struct ifa6_config *cfg, + struct ifa6_config *cfg, clock_t expires, u32 flags, struct netlink_ext_ack *extack) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; - unsigned long timeout; - clock_t expires; - u32 flags; ASSERT_RTNL(); @@ -3024,12 +3021,6 @@ static int inet6_addr_add(struct net *net, struct net_device *dev, return -EINVAL; } - /* check the lifetime */ - if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft) { - NL_SET_ERR_MSG_MOD(extack, "address lifetime invalid"); - return -EINVAL; - } - if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && cfg->plen != 64) { NL_SET_ERR_MSG_MOD(extack, "address with \"mngtmpaddr\" flag must have a prefix length of 64"); return -EINVAL; @@ -3053,24 +3044,6 @@ static int inet6_addr_add(struct net *net, struct net_device *dev, cfg->scope = ipv6_addr_scope(cfg->pfx); - timeout = addrconf_timeout_fixup(cfg->valid_lft, HZ); - if (addrconf_finite_timeout(timeout)) { - expires = jiffies_to_clock_t(timeout * HZ); - cfg->valid_lft = timeout; - flags = RTF_EXPIRES; - } else { - expires = 0; - flags = 0; - cfg->ifa_flags |= IFA_F_PERMANENT; - } - - timeout = addrconf_timeout_fixup(cfg->preferred_lft, HZ); - if (addrconf_finite_timeout(timeout)) { - if (timeout == 0) - cfg->ifa_flags |= IFA_F_DEPRECATED; - cfg->preferred_lft = timeout; - } - ifp = ipv6_add_addr(idev, cfg, true, extack); if (!IS_ERR(ifp)) { if (!(cfg->ifa_flags & IFA_F_NOPREFIXROUTE)) { @@ -3180,7 +3153,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) rtnl_net_lock(net); dev = __dev_get_by_index(net, ireq.ifr6_ifindex); if (dev) - err = inet6_addr_add(net, dev, &cfg, NULL); + err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL); else err = -ENODEV; rtnl_net_unlock(net); @@ -4869,20 +4842,15 @@ static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp, } static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp, - struct ifa6_config *cfg) + struct ifa6_config *cfg, clock_t expires, + u32 flags) { - u32 flags; - clock_t expires; - unsigned long timeout; bool was_managetempaddr; - bool had_prefixroute; bool new_peer = false; + bool had_prefixroute; ASSERT_RTNL(); - if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft) - return -EINVAL; - if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64)) return -EINVAL; @@ -4890,24 +4858,6 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp, if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED) cfg->ifa_flags &= ~IFA_F_OPTIMISTIC; - timeout = addrconf_timeout_fixup(cfg->valid_lft, HZ); - if (addrconf_finite_timeout(timeout)) { - expires = jiffies_to_clock_t(timeout * HZ); - cfg->valid_lft = timeout; - flags = RTF_EXPIRES; - } else { - expires = 0; - flags = 0; - cfg->ifa_flags |= IFA_F_PERMANENT; - } - - timeout = addrconf_timeout_fixup(cfg->preferred_lft, HZ); - if (addrconf_finite_timeout(timeout)) { - if (timeout == 0) - cfg->ifa_flags |= IFA_F_DEPRECATED; - cfg->preferred_lft = timeout; - } - if (cfg->peer_pfx && memcmp(&ifp->peer_addr, cfg->peer_pfx, sizeof(struct in6_addr))) { if (!ipv6_addr_any(&ifp->peer_addr)) @@ -4992,13 +4942,16 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); - struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *peer_pfx; struct inet6_ifaddr *ifa; struct net_device *dev; struct inet6_dev *idev; struct ifa6_config cfg; + struct ifaddrmsg *ifm; + unsigned long timeout; + clock_t expires; + u32 flags; int err; err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, @@ -5028,8 +4981,11 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, IFA_F_MANAGETEMPADDR | IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC; + cfg.ifa_flags |= IFA_F_PERMANENT; cfg.valid_lft = INFINITY_LIFE_TIME; cfg.preferred_lft = INFINITY_LIFE_TIME; + expires = 0; + flags = 0; if (tb[IFA_CACHEINFO]) { struct ifa_cacheinfo *ci; @@ -5037,6 +4993,27 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, ci = nla_data(tb[IFA_CACHEINFO]); cfg.valid_lft = ci->ifa_valid; cfg.preferred_lft = ci->ifa_prefered; + + if (!cfg.valid_lft || cfg.preferred_lft > cfg.valid_lft) { + NL_SET_ERR_MSG_MOD(extack, "address lifetime invalid"); + return -EINVAL; + } + + timeout = addrconf_timeout_fixup(cfg.valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + cfg.ifa_flags &= ~IFA_F_PERMANENT; + cfg.valid_lft = timeout; + expires = jiffies_to_clock_t(timeout * HZ); + flags = RTF_EXPIRES; + } + + timeout = addrconf_timeout_fixup(cfg.preferred_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + cfg.ifa_flags |= IFA_F_DEPRECATED; + + cfg.preferred_lft = timeout; + } } dev = __dev_get_by_index(net, ifm->ifa_index); @@ -5064,7 +5041,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, * It would be best to check for !NLM_F_CREATE here but * userspace already relies on not having to provide this. */ - return inet6_addr_add(net, dev, &cfg, extack); + return inet6_addr_add(net, dev, &cfg, expires, flags, extack); } if (nlh->nlmsg_flags & NLM_F_EXCL || @@ -5072,7 +5049,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, NL_SET_ERR_MSG_MOD(extack, "address already assigned"); err = -EEXIST; } else { - err = inet6_addr_modify(net, ifa, &cfg); + err = inet6_addr_modify(net, ifa, &cfg, expires, flags); } in6_ifa_put(ifa); From 82a1e6aa8f6c181e73117975a9d44eb3b51425a2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 15 Jan 2025 17:06:07 +0900 Subject: [PATCH 10/11] ipv6: Convert inet6_rtm_newaddr() to per-netns RTNL. Let's register inet6_rtm_newaddr() with RTNL_FLAG_DOIT_PERNET and hold rtnl_net_lock() before __dev_get_by_index(). Now that inet6_addr_add() and inet6_addr_modify() are always called under per-netns RTNL. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115080608.28127-11-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9ae25a8d16329..b848e4038d2ef 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3014,7 +3014,7 @@ static int inet6_addr_add(struct net *net, struct net_device *dev, struct inet6_ifaddr *ifp; struct inet6_dev *idev; - ASSERT_RTNL(); + ASSERT_RTNL_NET(net); if (cfg->plen > 128) { NL_SET_ERR_MSG_MOD(extack, "Invalid prefix length"); @@ -4849,7 +4849,7 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp, bool new_peer = false; bool had_prefixroute; - ASSERT_RTNL(); + ASSERT_RTNL_NET(net); if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64)) @@ -5016,15 +5016,20 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, } } + rtnl_net_lock(net); + dev = __dev_get_by_index(net, ifm->ifa_index); if (!dev) { NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface"); - return -ENODEV; + err = -ENODEV; + goto unlock; } idev = ipv6_find_idev(dev); - if (IS_ERR(idev)) - return PTR_ERR(idev); + if (IS_ERR(idev)) { + err = PTR_ERR(idev); + goto unlock; + } if (!ipv6_allow_optimistic_dad(net, idev)) cfg.ifa_flags &= ~IFA_F_OPTIMISTIC; @@ -5032,7 +5037,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (cfg.ifa_flags & IFA_F_NODAD && cfg.ifa_flags & IFA_F_OPTIMISTIC) { NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive"); - return -EINVAL; + err = -EINVAL; + goto unlock; } ifa = ipv6_get_ifaddr(net, cfg.pfx, dev, 1); @@ -5041,7 +5047,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, * It would be best to check for !NLM_F_CREATE here but * userspace already relies on not having to provide this. */ - return inet6_addr_add(net, dev, &cfg, expires, flags, extack); + err = inet6_addr_add(net, dev, &cfg, expires, flags, extack); + goto unlock; } if (nlh->nlmsg_flags & NLM_F_EXCL || @@ -5053,6 +5060,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, } in6_ifa_put(ifa); +unlock: + rtnl_net_unlock(net); return err; } @@ -7393,7 +7402,7 @@ static const struct rtnl_msg_handler addrconf_rtnl_msg_handlers[] __initconst_or {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETLINK, .dumpit = inet6_dump_ifinfo, .flags = RTNL_FLAG_DUMP_UNLOCKED}, {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDR, - .doit = inet6_rtm_newaddr}, + .doit = inet6_rtm_newaddr, .flags = RTNL_FLAG_DOIT_PERNET}, {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDR, .doit = inet6_rtm_deladdr}, {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDR, From 7bcf45ddb8bb3d386072677ce4d86d1ec9896096 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 15 Jan 2025 17:06:08 +0900 Subject: [PATCH 11/11] ipv6: Convert inet6_rtm_deladdr() to per-netns RTNL. Let's register inet6_rtm_deladdr() with RTNL_FLAG_DOIT_PERNET and hold rtnl_net_lock() before inet6_addr_del(). Now that inet6_addr_del() is always called under per-netns RTNL. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115080608.28127-12-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b848e4038d2ef..ac8cc10765360 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3096,7 +3096,7 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, return -ENODEV; } - idev = __in6_dev_get(dev); + idev = __in6_dev_get_rtnl_net(dev); if (!idev) { NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device"); return -ENXIO; @@ -4792,8 +4792,12 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, /* We ignore other flags so far. */ ifa_flags &= IFA_F_MANAGETEMPADDR; - return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx, - ifm->ifa_prefixlen, extack); + rtnl_net_lock(net); + err = inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx, + ifm->ifa_prefixlen, extack); + rtnl_net_unlock(net); + + return err; } static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp, @@ -7404,7 +7408,7 @@ static const struct rtnl_msg_handler addrconf_rtnl_msg_handlers[] __initconst_or {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDR, .doit = inet6_rtm_newaddr, .flags = RTNL_FLAG_DOIT_PERNET}, {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDR, - .doit = inet6_rtm_deladdr}, + .doit = inet6_rtm_deladdr, .flags = RTNL_FLAG_DOIT_PERNET}, {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDR, .doit = inet6_rtm_getaddr, .dumpit = inet6_dump_ifaddr, .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},