Skip to content

Commit

Permalink
Merge branch 'xfrm: speed up policy insertions'
Browse files Browse the repository at this point in the history
Florian Westphal says:

====================
Policy insertions do not scale well, due to both a lienar list walk
to find the insertion spot and another list walk to set the 'pos' value
(a tie-breaker to detect which policy is older when there is ambiguity
as to which one should be matched).

First patch gets rid of the second list walk on insert.
Rest of the patches get rid of the insertion walk.

This list walk was only needed because when I moved the policy db
implementation to rbtree I retained the old insertion method for the
sake of XFRM_MIGRATE.

Switching that to tree-based lookup avoids the need for the full
list search.

After this, insertion of a policy is largely independent of the number
of pre-existing policies as long as they do not share the same source/
destination networks.

Note that this is compile tested only as I did not find any
tests for XFRM_MIGRATE.
====================

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
  • Loading branch information
Steffen Klassert committed Aug 27, 2024
2 parents 54f2f78 + a54ad72 commit 5ce90c8
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 112 deletions.
1 change: 0 additions & 1 deletion include/net/xfrm.h
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,6 @@ struct xfrm_policy {
u16 family;
struct xfrm_sec_ctx *security;
struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH];
struct hlist_node bydst_inexact_list;
struct rcu_head rcu;

struct xfrm_dev_offload xdo;
Expand Down
201 changes: 91 additions & 110 deletions net/xfrm/xfrm_policy.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,6 @@ xfrm_policy_inexact_lookup_rcu(struct net *net,
static struct xfrm_policy *
xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy,
bool excl);
static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
struct xfrm_policy *policy);

static bool
xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
Expand Down Expand Up @@ -410,7 +408,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
if (policy) {
write_pnet(&policy->xp_net, net);
INIT_LIST_HEAD(&policy->walk.all);
INIT_HLIST_NODE(&policy->bydst_inexact_list);
INIT_HLIST_NODE(&policy->bydst);
INIT_HLIST_NODE(&policy->byidx);
rwlock_init(&policy->lock);
Expand Down Expand Up @@ -1228,26 +1225,31 @@ xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl)
return ERR_PTR(-EEXIST);
}

chain = &net->xfrm.policy_inexact[dir];
xfrm_policy_insert_inexact_list(chain, policy);

if (delpol)
__xfrm_policy_inexact_prune_bin(bin, false);

return delpol;
}

static bool xfrm_policy_is_dead_or_sk(const struct xfrm_policy *policy)
{
int dir;

if (policy->walk.dead)
return true;

dir = xfrm_policy_id2dir(policy->index);
return dir >= XFRM_POLICY_MAX;
}

static void xfrm_hash_rebuild(struct work_struct *work)
{
struct net *net = container_of(work, struct net,
xfrm.policy_hthresh.work);
unsigned int hmask;
struct xfrm_policy *pol;
struct xfrm_policy *policy;
struct hlist_head *chain;
struct hlist_head *odst;
struct hlist_node *newpos;
int i;
int dir;
unsigned seq;
u8 lbits4, rbits4, lbits6, rbits6;
Expand Down Expand Up @@ -1311,23 +1313,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
goto out_unlock;
}

/* reset the bydst and inexact table in all directions */
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
struct hlist_node *n;

hlist_for_each_entry_safe(policy, n,
&net->xfrm.policy_inexact[dir],
bydst_inexact_list) {
hlist_del_rcu(&policy->bydst);
hlist_del_init(&policy->bydst_inexact_list);
}

hmask = net->xfrm.policy_bydst[dir].hmask;
odst = net->xfrm.policy_bydst[dir].table;
for (i = hmask; i >= 0; i--) {
hlist_for_each_entry_safe(policy, n, odst + i, bydst)
hlist_del_rcu(&policy->bydst);
}
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
/* dir out => dst = remote, src = local */
net->xfrm.policy_bydst[dir].dbits4 = rbits4;
Expand All @@ -1352,6 +1338,9 @@ static void xfrm_hash_rebuild(struct work_struct *work)
/* skip socket policies */
continue;
}

hlist_del_rcu(&policy->bydst);

newpos = NULL;
chain = policy_hash_bysel(net, &policy->selector,
policy->family, dir);
Expand Down Expand Up @@ -1519,42 +1508,6 @@ static const struct rhashtable_params xfrm_pol_inexact_params = {
.automatic_shrinking = true,
};

static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
struct xfrm_policy *policy)
{
struct xfrm_policy *pol, *delpol = NULL;
struct hlist_node *newpos = NULL;
int i = 0;

hlist_for_each_entry(pol, chain, bydst_inexact_list) {
if (pol->type == policy->type &&
pol->if_id == policy->if_id &&
!selector_cmp(&pol->selector, &policy->selector) &&
xfrm_policy_mark_match(&policy->mark, pol) &&
xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) {
delpol = pol;
if (policy->priority > pol->priority)
continue;
} else if (policy->priority >= pol->priority) {
newpos = &pol->bydst_inexact_list;
continue;
}
if (delpol)
break;
}

if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos);
else
hlist_add_head_rcu(&policy->bydst_inexact_list, chain);

hlist_for_each_entry(pol, chain, bydst_inexact_list) {
pol->pos = i;
i++;
}
}

static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
struct xfrm_policy *policy,
bool excl)
Expand Down Expand Up @@ -2294,10 +2247,52 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
return pol;
}

static u32 xfrm_gen_pos_slow(struct net *net)
{
struct xfrm_policy *policy;
u32 i = 0;

/* oldest entry is last in list */
list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
if (!xfrm_policy_is_dead_or_sk(policy))
policy->pos = ++i;
}

return i;
}

static u32 xfrm_gen_pos(struct net *net)
{
const struct xfrm_policy *policy;
u32 i = 0;

/* most recently added policy is at the head of the list */
list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) {
if (xfrm_policy_is_dead_or_sk(policy))
continue;

if (policy->pos == UINT_MAX)
return xfrm_gen_pos_slow(net);

i = policy->pos + 1;
break;
}

return i;
}

static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
{
struct net *net = xp_net(pol);

switch (dir) {
case XFRM_POLICY_IN:
case XFRM_POLICY_FWD:
case XFRM_POLICY_OUT:
pol->pos = xfrm_gen_pos(net);
break;
}

list_add(&pol->walk.all, &net->xfrm.policy_all);
net->xfrm.policy_count[dir]++;
xfrm_pol_hold(pol);
Expand All @@ -2314,7 +2309,6 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
/* Socket policies are not hashed. */
if (!hlist_unhashed(&pol->bydst)) {
hlist_del_rcu(&pol->bydst);
hlist_del_init(&pol->bydst_inexact_list);
hlist_del(&pol->byidx);
}

Expand Down Expand Up @@ -4437,63 +4431,50 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
#endif

#ifdef CONFIG_XFRM_MIGRATE
static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
const struct xfrm_selector *sel_tgt)
{
if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
if (sel_tgt->family == sel_cmp->family &&
xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
sel_cmp->family) &&
xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
sel_cmp->family) &&
sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
return true;
}
} else {
if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
return true;
}
}
return false;
}

static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
u8 dir, u8 type, struct net *net, u32 if_id)
{
struct xfrm_policy *pol, *ret = NULL;
struct hlist_head *chain;
u32 priority = ~0U;
struct flowi fl;

spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
hlist_for_each_entry(pol, chain, bydst) {
if ((if_id == 0 || pol->if_id == if_id) &&
xfrm_migrate_selector_match(sel, &pol->selector) &&
pol->type == type) {
ret = pol;
priority = ret->priority;
break;
}
}
chain = &net->xfrm.policy_inexact[dir];
hlist_for_each_entry(pol, chain, bydst_inexact_list) {
if ((pol->priority >= priority) && ret)
break;
memset(&fl, 0, sizeof(fl));

if ((if_id == 0 || pol->if_id == if_id) &&
xfrm_migrate_selector_match(sel, &pol->selector) &&
pol->type == type) {
ret = pol;
fl.flowi_proto = sel->proto;

switch (sel->family) {
case AF_INET:
fl.u.ip4.saddr = sel->saddr.a4;
fl.u.ip4.daddr = sel->daddr.a4;
if (sel->proto == IPSEC_ULPROTO_ANY)
break;
}
fl.u.flowi4_oif = sel->ifindex;
fl.u.ip4.fl4_sport = sel->sport;
fl.u.ip4.fl4_dport = sel->dport;
break;
case AF_INET6:
fl.u.ip6.saddr = sel->saddr.in6;
fl.u.ip6.daddr = sel->daddr.in6;
if (sel->proto == IPSEC_ULPROTO_ANY)
break;
fl.u.flowi6_oif = sel->ifindex;
fl.u.ip6.fl4_sport = sel->sport;
fl.u.ip6.fl4_dport = sel->dport;
break;
default:
return ERR_PTR(-EAFNOSUPPORT);
}

xfrm_pol_hold(ret);
rcu_read_lock();

spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
pol = xfrm_policy_lookup_bytype(net, type, &fl, sel->family, dir, if_id);
if (IS_ERR_OR_NULL(pol))
goto out_unlock;

return ret;
if (!xfrm_pol_hold_rcu(ret))
pol = NULL;
out_unlock:
rcu_read_unlock();
return pol;
}

static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
Expand Down Expand Up @@ -4630,9 +4611,9 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,

/* Stage 1 - find policy */
pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id);
if (!pol) {
if (IS_ERR_OR_NULL(pol)) {
NL_SET_ERR_MSG(extack, "Target policy not found");
err = -ENOENT;
err = IS_ERR(pol) ? PTR_ERR(pol) : -ENOENT;
goto out;
}

Expand Down
2 changes: 1 addition & 1 deletion tools/testing/selftests/net/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ TEST_PROGS += ip_local_port_range.sh
TEST_PROGS += rps_default_mask.sh
TEST_PROGS += big_tcp.sh
TEST_PROGS += netns-sysctl.sh
TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh
TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
Expand Down
Loading

0 comments on commit 5ce90c8

Please sign in to comment.