diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f7244ac4fa086..1fa2da22a49ec 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -555,7 +555,6 @@ struct xfrm_policy { u16 family; struct xfrm_sec_ctx *security; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; - struct hlist_node bydst_inexact_list; struct rcu_head rcu; struct xfrm_dev_offload xdo; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c56c61b0c12ef..b79ac453ea376 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -196,8 +196,6 @@ xfrm_policy_inexact_lookup_rcu(struct net *net, static struct xfrm_policy * xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy, bool excl); -static void xfrm_policy_insert_inexact_list(struct hlist_head *chain, - struct xfrm_policy *policy); static bool xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand, @@ -410,7 +408,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) if (policy) { write_pnet(&policy->xp_net, net); INIT_LIST_HEAD(&policy->walk.all); - INIT_HLIST_NODE(&policy->bydst_inexact_list); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); @@ -1228,26 +1225,31 @@ xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl) return ERR_PTR(-EEXIST); } - chain = &net->xfrm.policy_inexact[dir]; - xfrm_policy_insert_inexact_list(chain, policy); - if (delpol) __xfrm_policy_inexact_prune_bin(bin, false); return delpol; } +static bool xfrm_policy_is_dead_or_sk(const struct xfrm_policy *policy) +{ + int dir; + + if (policy->walk.dead) + return true; + + dir = xfrm_policy_id2dir(policy->index); + return dir >= XFRM_POLICY_MAX; +} + static void xfrm_hash_rebuild(struct work_struct *work) { struct net *net = container_of(work, struct net, xfrm.policy_hthresh.work); - unsigned int hmask; struct xfrm_policy *pol; struct xfrm_policy *policy; struct hlist_head *chain; - struct hlist_head *odst; struct hlist_node *newpos; - int i; int dir; unsigned seq; u8 lbits4, rbits4, lbits6, rbits6; @@ -1311,23 +1313,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) goto out_unlock; } - /* reset the bydst and inexact table in all directions */ for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - struct hlist_node *n; - - hlist_for_each_entry_safe(policy, n, - &net->xfrm.policy_inexact[dir], - bydst_inexact_list) { - hlist_del_rcu(&policy->bydst); - hlist_del_init(&policy->bydst_inexact_list); - } - - hmask = net->xfrm.policy_bydst[dir].hmask; - odst = net->xfrm.policy_bydst[dir].table; - for (i = hmask; i >= 0; i--) { - hlist_for_each_entry_safe(policy, n, odst + i, bydst) - hlist_del_rcu(&policy->bydst); - } if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { /* dir out => dst = remote, src = local */ net->xfrm.policy_bydst[dir].dbits4 = rbits4; @@ -1352,6 +1338,9 @@ static void xfrm_hash_rebuild(struct work_struct *work) /* skip socket policies */ continue; } + + hlist_del_rcu(&policy->bydst); + newpos = NULL; chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); @@ -1519,42 +1508,6 @@ static const struct rhashtable_params xfrm_pol_inexact_params = { .automatic_shrinking = true, }; -static void xfrm_policy_insert_inexact_list(struct hlist_head *chain, - struct xfrm_policy *policy) -{ - struct xfrm_policy *pol, *delpol = NULL; - struct hlist_node *newpos = NULL; - int i = 0; - - hlist_for_each_entry(pol, chain, bydst_inexact_list) { - if (pol->type == policy->type && - pol->if_id == policy->if_id && - !selector_cmp(&pol->selector, &policy->selector) && - xfrm_policy_mark_match(&policy->mark, pol) && - xfrm_sec_ctx_match(pol->security, policy->security) && - !WARN_ON(delpol)) { - delpol = pol; - if (policy->priority > pol->priority) - continue; - } else if (policy->priority >= pol->priority) { - newpos = &pol->bydst_inexact_list; - continue; - } - if (delpol) - break; - } - - if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET) - hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos); - else - hlist_add_head_rcu(&policy->bydst_inexact_list, chain); - - hlist_for_each_entry(pol, chain, bydst_inexact_list) { - pol->pos = i; - i++; - } -} - static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy, bool excl) @@ -2294,10 +2247,52 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, return pol; } +static u32 xfrm_gen_pos_slow(struct net *net) +{ + struct xfrm_policy *policy; + u32 i = 0; + + /* oldest entry is last in list */ + list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { + if (!xfrm_policy_is_dead_or_sk(policy)) + policy->pos = ++i; + } + + return i; +} + +static u32 xfrm_gen_pos(struct net *net) +{ + const struct xfrm_policy *policy; + u32 i = 0; + + /* most recently added policy is at the head of the list */ + list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) { + if (xfrm_policy_is_dead_or_sk(policy)) + continue; + + if (policy->pos == UINT_MAX) + return xfrm_gen_pos_slow(net); + + i = policy->pos + 1; + break; + } + + return i; +} + static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); + switch (dir) { + case XFRM_POLICY_IN: + case XFRM_POLICY_FWD: + case XFRM_POLICY_OUT: + pol->pos = xfrm_gen_pos(net); + break; + } + list_add(&pol->walk.all, &net->xfrm.policy_all); net->xfrm.policy_count[dir]++; xfrm_pol_hold(pol); @@ -2314,7 +2309,6 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, /* Socket policies are not hashed. */ if (!hlist_unhashed(&pol->bydst)) { hlist_del_rcu(&pol->bydst); - hlist_del_init(&pol->bydst_inexact_list); hlist_del(&pol->byidx); } @@ -4437,63 +4431,50 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); #endif #ifdef CONFIG_XFRM_MIGRATE -static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, - const struct xfrm_selector *sel_tgt) -{ - if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { - if (sel_tgt->family == sel_cmp->family && - xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, - sel_cmp->family) && - xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, - sel_cmp->family) && - sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && - sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { - return true; - } - } else { - if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { - return true; - } - } - return false; -} - static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, u8 dir, u8 type, struct net *net, u32 if_id) { struct xfrm_policy *pol, *ret = NULL; - struct hlist_head *chain; - u32 priority = ~0U; + struct flowi fl; - spin_lock_bh(&net->xfrm.xfrm_policy_lock); - chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); - hlist_for_each_entry(pol, chain, bydst) { - if ((if_id == 0 || pol->if_id == if_id) && - xfrm_migrate_selector_match(sel, &pol->selector) && - pol->type == type) { - ret = pol; - priority = ret->priority; - break; - } - } - chain = &net->xfrm.policy_inexact[dir]; - hlist_for_each_entry(pol, chain, bydst_inexact_list) { - if ((pol->priority >= priority) && ret) - break; + memset(&fl, 0, sizeof(fl)); - if ((if_id == 0 || pol->if_id == if_id) && - xfrm_migrate_selector_match(sel, &pol->selector) && - pol->type == type) { - ret = pol; + fl.flowi_proto = sel->proto; + + switch (sel->family) { + case AF_INET: + fl.u.ip4.saddr = sel->saddr.a4; + fl.u.ip4.daddr = sel->daddr.a4; + if (sel->proto == IPSEC_ULPROTO_ANY) break; - } + fl.u.flowi4_oif = sel->ifindex; + fl.u.ip4.fl4_sport = sel->sport; + fl.u.ip4.fl4_dport = sel->dport; + break; + case AF_INET6: + fl.u.ip6.saddr = sel->saddr.in6; + fl.u.ip6.daddr = sel->daddr.in6; + if (sel->proto == IPSEC_ULPROTO_ANY) + break; + fl.u.flowi6_oif = sel->ifindex; + fl.u.ip6.fl4_sport = sel->sport; + fl.u.ip6.fl4_dport = sel->dport; + break; + default: + return ERR_PTR(-EAFNOSUPPORT); } - xfrm_pol_hold(ret); + rcu_read_lock(); - spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + pol = xfrm_policy_lookup_bytype(net, type, &fl, sel->family, dir, if_id); + if (IS_ERR_OR_NULL(pol)) + goto out_unlock; - return ret; + if (!xfrm_pol_hold_rcu(ret)) + pol = NULL; +out_unlock: + rcu_read_unlock(); + return pol; } static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t) @@ -4630,9 +4611,9 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, /* Stage 1 - find policy */ pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id); - if (!pol) { + if (IS_ERR_OR_NULL(pol)) { NL_SET_ERR_MSG(extack, "Target policy not found"); - err = -ENOENT; + err = IS_ERR(pol) ? PTR_ERR(pol) : -ENOENT; goto out; } diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 8eaffd7a641c5..e127a80ff7133 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -56,7 +56,7 @@ TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh TEST_PROGS += netns-sysctl.sh -TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh +TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite diff --git a/tools/testing/selftests/net/xfrm_policy_add_speed.sh b/tools/testing/selftests/net/xfrm_policy_add_speed.sh new file mode 100755 index 0000000000000..2fab29d3cb91b --- /dev/null +++ b/tools/testing/selftests/net/xfrm_policy_add_speed.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +source lib.sh + +timeout=4m +ret=0 +tmp=$(mktemp) +cleanup() { + cleanup_all_ns + rm -f "$tmp" +} + +trap cleanup EXIT + +maxpolicies=100000 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && maxpolicies=10000 + +do_dummies4() { + local dir="$1" + local max="$2" + + local policies + local pfx + pfx=30 + policies=0 + + ip netns exec "$ns" ip xfrm policy flush + + for i in $(seq 1 100);do + local s + local d + for j in $(seq 1 255);do + s=$((i+0)) + d=$((i+100)) + + for a in $(seq 1 8 255); do + policies=$((policies+1)) + [ "$policies" -gt "$max" ] && return + echo xfrm policy add src 10.$s.$j.0/30 dst 10.$d.$j.$a/$pfx dir $dir action block + done + for a in $(seq 1 8 255); do + policies=$((policies+1)) + [ "$policies" -gt "$max" ] && return + echo xfrm policy add src 10.$s.$j.$a/30 dst 10.$d.$j.0/$pfx dir $dir action block + done + done + done +} + +setup_ns ns + +do_bench() +{ + local max="$1" + + start=$(date +%s%3N) + do_dummies4 "out" "$max" > "$tmp" + if ! timeout "$timeout" ip netns exec "$ns" ip -batch "$tmp";then + echo "WARNING: policy insertion cancelled after $timeout" + ret=1 + fi + stop=$(date +%s%3N) + + result=$((stop-start)) + + policies=$(wc -l < "$tmp") + printf "Inserted %-06s policies in $result ms\n" $policies + + have=$(ip netns exec "$ns" ip xfrm policy show | grep "action block" | wc -l) + if [ "$have" -ne "$policies" ]; then + echo "WARNING: mismatch, have $have policies, expected $policies" + ret=1 + fi +} + +p=100 +while [ $p -le "$maxpolicies" ]; do + do_bench "$p" + p="${p}0" +done + +exit $ret