Skip to content

Commit

Permalink
Merge branch 'net-fix-lwtunnel-reentry-loops'
Browse files Browse the repository at this point in the history
Justin Iurman says:

====================
net: fix lwtunnel reentry loops

When the destination is the same after the transformation, we enter a
lwtunnel loop. This is true for most of lwt users: ioam6, rpl, seg6,
seg6_local, ila_lwt, and lwt_bpf. It can happen in their input() and
output() handlers respectively, where either dst_input() or dst_output()
is called at the end. It can also happen in xmit() handlers.

Here is an example for rpl_input():

dump_stack_lvl+0x60/0x80
rpl_input+0x9d/0x320
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
[...]
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
ip6_sublist_rcv_finish+0x85/0x90
ip6_sublist_rcv+0x236/0x2f0

... until rpl_do_srh() fails, which means skb_cow_head() failed.

This series provides a fix at the core level of lwtunnel to catch such
loops when they're not caught by the respective lwtunnel users, and
handle the loop case in ioam6 which is one of the users. This series
also comes with a new selftest to detect some dst cache reference loops
in lwtunnel users.
====================

Link: https://patch.msgid.link/20250314120048.12569-1-justin.iurman@uliege.be
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
  • Loading branch information
Paolo Abeni committed Mar 20, 2025
2 parents 47a9b5e + 3ed61b8 commit f31b6fb
Show file tree
Hide file tree
Showing 5 changed files with 306 additions and 16 deletions.
65 changes: 53 additions & 12 deletions net/core/lwtunnel.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include <net/ip6_fib.h>
#include <net/rtnh.h>

#include "dev.h"

DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);

Expand Down Expand Up @@ -325,13 +327,23 @@ EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap);

int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
const struct lwtunnel_encap_ops *ops;
struct lwtunnel_state *lwtstate;
int ret = -EINVAL;
struct dst_entry *dst;
int ret;

if (dev_xmit_recursion()) {
net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
__func__);
ret = -ENETDOWN;
goto drop;
}

if (!dst)
dst = skb_dst(skb);
if (!dst) {
ret = -EINVAL;
goto drop;
}
lwtstate = dst->lwtstate;

if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
Expand All @@ -341,8 +353,11 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
ret = -EOPNOTSUPP;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
if (likely(ops && ops->output))
if (likely(ops && ops->output)) {
dev_xmit_recursion_inc();
ret = ops->output(net, sk, skb);
dev_xmit_recursion_dec();
}
rcu_read_unlock();

if (ret == -EOPNOTSUPP)
Expand All @@ -359,13 +374,23 @@ EXPORT_SYMBOL_GPL(lwtunnel_output);

int lwtunnel_xmit(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
const struct lwtunnel_encap_ops *ops;
struct lwtunnel_state *lwtstate;
int ret = -EINVAL;
struct dst_entry *dst;
int ret;

if (dev_xmit_recursion()) {
net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
__func__);
ret = -ENETDOWN;
goto drop;
}

if (!dst)
dst = skb_dst(skb);
if (!dst) {
ret = -EINVAL;
goto drop;
}

lwtstate = dst->lwtstate;

Expand All @@ -376,8 +401,11 @@ int lwtunnel_xmit(struct sk_buff *skb)
ret = -EOPNOTSUPP;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
if (likely(ops && ops->xmit))
if (likely(ops && ops->xmit)) {
dev_xmit_recursion_inc();
ret = ops->xmit(skb);
dev_xmit_recursion_dec();
}
rcu_read_unlock();

if (ret == -EOPNOTSUPP)
Expand All @@ -394,13 +422,23 @@ EXPORT_SYMBOL_GPL(lwtunnel_xmit);

int lwtunnel_input(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
const struct lwtunnel_encap_ops *ops;
struct lwtunnel_state *lwtstate;
int ret = -EINVAL;
struct dst_entry *dst;
int ret;

if (!dst)
if (dev_xmit_recursion()) {
net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
__func__);
ret = -ENETDOWN;
goto drop;
}

dst = skb_dst(skb);
if (!dst) {
ret = -EINVAL;
goto drop;
}
lwtstate = dst->lwtstate;

if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
Expand All @@ -410,8 +448,11 @@ int lwtunnel_input(struct sk_buff *skb)
ret = -EOPNOTSUPP;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
if (likely(ops && ops->input))
if (likely(ops && ops->input)) {
dev_xmit_recursion_inc();
ret = ops->input(skb);
dev_xmit_recursion_dec();
}
rcu_read_unlock();

if (ret == -EOPNOTSUPP)
Expand Down
8 changes: 4 additions & 4 deletions net/ipv6/ioam6_iptunnel.c
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,6 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL;
struct in6_addr orig_daddr;
struct ioam6_lwt *ilwt;
int err = -EINVAL;
u32 pkt_cnt;
Expand All @@ -352,8 +351,6 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k)
goto out;

orig_daddr = ipv6_hdr(skb)->daddr;

local_bh_disable();
cache_dst = dst_cache_get(&ilwt->cache);
local_bh_enable();
Expand Down Expand Up @@ -422,7 +419,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
goto drop;
}

if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
/* avoid lwtunnel_output() reentry loop when destination is the same
* after transformation (e.g., with the inline mode)
*/
if (dst->lwtstate != cache_dst->lwtstate) {
skb_dst_drop(skb);
skb_dst_set(skb, cache_dst);
return dst_output(net, sk, skb);
Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/net/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ TEST_PROGS += vlan_bridge_binding.sh
TEST_PROGS += bpf_offload.py
TEST_PROGS += ipv6_route_update_soft_lockup.sh
TEST_PROGS += busy_poll_test.sh
TEST_PROGS += lwt_dst_cache_ref_loop.sh

# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller netlink-dumps
Expand Down
2 changes: 2 additions & 0 deletions tools/testing/selftests/net/config
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,5 @@ CONFIG_XFRM_INTERFACE=m
CONFIG_XFRM_USER=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IPV6_ILA=m
CONFIG_IPV6_RPL_LWTUNNEL=y
Loading

0 comments on commit f31b6fb

Please sign in to comment.