Skip to content

Commit

Permalink
Merge branch 'ipv4-namespacify-ipv4-address-hash-table'
Browse files Browse the repository at this point in the history
Kuniyuki Iwashima says:

====================
ipv4: Namespacify IPv4 address hash table.

This is a prep of per-net RTNL conversion for RTM_(NEW|DEL|SET)ADDR.

Currently, each IPv4 address is linked to the global hash table, and
this needs to be protected by another global lock or namespacified to
support per-net RTNL.

Adding a global lock will cause deadlock in the rtnetlink path and GC,

  rtnetlink                      check_lifetime
  |- rtnl_net_lock(net)          |- acquire the global lock
  |- acquire the global lock     |- check ifa's netns
  `- put ifa into hash table     `- rtnl_net_lock(net)

so we need to namespacify the hash table.

The IPv6 one is already namespacified, let's follow that.

v2: https://lore.kernel.org/netdev/20241004195958.64396-1-kuniyu@amazon.com/
v1: https://lore.kernel.org/netdev/20241001024837.96425-1-kuniyu@amazon.com/
====================

Link: https://patch.msgid.link/20241008172906.1326-1-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Oct 10, 2024
2 parents 22ee378 + 99ee348 commit 09cf85e
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 31 deletions.
2 changes: 1 addition & 1 deletion include/linux/inetdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
ARP_EVICT_NOCARRIER)

struct in_ifaddr {
struct hlist_node hash;
struct hlist_node addr_lst;
struct in_ifaddr __rcu *ifa_next;
struct in_device *ifa_dev;
struct rcu_head rcu_head;
Expand Down
2 changes: 2 additions & 0 deletions include/net/netns/ipv4.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,5 +270,7 @@ struct netns_ipv4 {

atomic_t rt_genid;
siphash_key_t ip_id_key;
struct hlist_head *inet_addr_lst;
struct delayed_work addr_chk_work;
};
#endif
69 changes: 39 additions & 30 deletions net/ipv4/devinet.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,6 @@ struct inet_fill_args {
#define IN4_ADDR_HSIZE_SHIFT 8
#define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)

static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];

static u32 inet_addr_hash(const struct net *net, __be32 addr)
{
u32 val = (__force u32) addr ^ net_hash_mix(net);
Expand All @@ -133,13 +131,13 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
u32 hash = inet_addr_hash(net, ifa->ifa_local);

ASSERT_RTNL();
hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]);
}

static void inet_hash_remove(struct in_ifaddr *ifa)
{
ASSERT_RTNL();
hlist_del_init_rcu(&ifa->hash);
hlist_del_init_rcu(&ifa->addr_lst);
}

/**
Expand Down Expand Up @@ -186,9 +184,8 @@ struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
u32 hash = inet_addr_hash(net, addr);
struct in_ifaddr *ifa;

hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
if (ifa->ifa_local == addr &&
net_eq(dev_net(ifa->ifa_dev->dev), net))
hlist_for_each_entry_rcu(ifa, &net->ipv4.inet_addr_lst[hash], addr_lst)
if (ifa->ifa_local == addr)
return ifa;

return NULL;
Expand Down Expand Up @@ -227,7 +224,7 @@ static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev)
in_dev_hold(in_dev);
ifa->ifa_dev = in_dev;

INIT_HLIST_NODE(&ifa->hash);
INIT_HLIST_NODE(&ifa->addr_lst);

return ifa;
}
Expand Down Expand Up @@ -484,15 +481,12 @@ static void inet_del_ifa(struct in_device *in_dev,
__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
}

static void check_lifetime(struct work_struct *work);

static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);

static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
u32 portid, struct netlink_ext_ack *extack)
{
struct in_ifaddr __rcu **last_primary, **ifap;
struct in_device *in_dev = ifa->ifa_dev;
struct net *net = dev_net(in_dev->dev);
struct in_validator_info ivi;
struct in_ifaddr *ifa1;
int ret;
Expand Down Expand Up @@ -561,8 +555,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,

inet_hash_insert(dev_net(in_dev->dev), ifa);

cancel_delayed_work(&check_lifetime_work);
queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
cancel_delayed_work(&net->ipv4.addr_chk_work);
queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0);

/* Send message first, then call notifier.
Notifier will trigger FIB update, so that
Expand Down Expand Up @@ -708,16 +702,19 @@ static void check_lifetime(struct work_struct *work)
unsigned long now, next, next_sec, next_sched;
struct in_ifaddr *ifa;
struct hlist_node *n;
struct net *net;
int i;

net = container_of(to_delayed_work(work), struct net, ipv4.addr_chk_work);
now = jiffies;
next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);

for (i = 0; i < IN4_ADDR_HSIZE; i++) {
struct hlist_head *head = &net->ipv4.inet_addr_lst[i];
bool change_needed = false;

rcu_read_lock();
hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
hlist_for_each_entry_rcu(ifa, head, addr_lst) {
unsigned long age, tstamp;
u32 preferred_lft;
u32 valid_lft;
Expand Down Expand Up @@ -755,7 +752,7 @@ static void check_lifetime(struct work_struct *work)
if (!change_needed)
continue;
rtnl_lock();
hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
hlist_for_each_entry_safe(ifa, n, head, addr_lst) {
unsigned long age;

if (ifa->ifa_flags & IFA_F_PERMANENT)
Expand Down Expand Up @@ -804,8 +801,8 @@ static void check_lifetime(struct work_struct *work)
if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;

queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
next_sched - now);
queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work,
next_sched - now);
}

static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
Expand Down Expand Up @@ -1002,9 +999,9 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
ifa->ifa_proto = new_proto;

set_ifa_lifetime(ifa, valid_lft, prefered_lft);
cancel_delayed_work(&check_lifetime_work);
cancel_delayed_work(&net->ipv4.addr_chk_work);
queue_delayed_work(system_power_efficient_wq,
&check_lifetime_work, 0);
&net->ipv4.addr_chk_work, 0);
rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
}
return 0;
Expand Down Expand Up @@ -2663,14 +2660,21 @@ static struct ctl_table ctl_forward_entry[] = {

static __net_init int devinet_init_net(struct net *net)
{
int err;
struct ipv4_devconf *all, *dflt;
#ifdef CONFIG_SYSCTL
struct ctl_table *tbl;
struct ctl_table_header *forw_hdr;
struct ctl_table *tbl;
#endif
struct ipv4_devconf *all, *dflt;
int err;
int i;

err = -ENOMEM;
net->ipv4.inet_addr_lst = kmalloc_array(IN4_ADDR_HSIZE,
sizeof(struct hlist_head),
GFP_KERNEL);
if (!net->ipv4.inet_addr_lst)
goto err_alloc_hash;

all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
if (!all)
goto err_alloc_all;
Expand Down Expand Up @@ -2731,6 +2735,11 @@ static __net_init int devinet_init_net(struct net *net)
net->ipv4.forw_hdr = forw_hdr;
#endif

for (i = 0; i < IN4_ADDR_HSIZE; i++)
INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]);

INIT_DEFERRABLE_WORK(&net->ipv4.addr_chk_work, check_lifetime);

net->ipv4.devconf_all = all;
net->ipv4.devconf_dflt = dflt;
return 0;
Expand All @@ -2748,14 +2757,20 @@ static __net_init int devinet_init_net(struct net *net)
err_alloc_dflt:
kfree(all);
err_alloc_all:
kfree(net->ipv4.inet_addr_lst);
err_alloc_hash:
return err;
}

static __net_exit void devinet_exit_net(struct net *net)
{
#ifdef CONFIG_SYSCTL
const struct ctl_table *tbl;
#endif

cancel_delayed_work_sync(&net->ipv4.addr_chk_work);

#ifdef CONFIG_SYSCTL
tbl = net->ipv4.forw_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.forw_hdr);
__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
Expand All @@ -2766,6 +2781,7 @@ static __net_exit void devinet_exit_net(struct net *net)
#endif
kfree(net->ipv4.devconf_dflt);
kfree(net->ipv4.devconf_all);
kfree(net->ipv4.inet_addr_lst);
}

static __net_initdata struct pernet_operations devinet_ops = {
Expand All @@ -2783,16 +2799,9 @@ static struct rtnl_af_ops inet_af_ops __read_mostly = {

void __init devinet_init(void)
{
int i;

for (i = 0; i < IN4_ADDR_HSIZE; i++)
INIT_HLIST_HEAD(&inet_addr_lst[i]);

register_pernet_subsys(&devinet_ops);
register_netdevice_notifier(&ip_netdev_notifier);

queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);

rtnl_af_register(&inet_af_ops);

rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
Expand Down

0 comments on commit 09cf85e

Please sign in to comment.