Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…l/git/netfilter/nf

Florian Westphal says:

====================
netfilter patches for net

First patch resolves a regression with vlan header matching, this was
broken since 6.5 release.  From myself.

Second patch fixes an ancient problem with sctp connection tracking in
case INIT_ACK packets are delayed.  This comes with a selftest, both
patches from Xin Long.

Patch 4 extends the existing nftables audit selftest, from
Phil Sutter.

Patch 5, also from Phil, avoids a situation where nftables
would emit an audit record twice. This was broken since 5.13 days.

Patch 6, from myself, avoids spurious insertion failure if we encounter an
overlapping but expired range during element insertion with the
'nft_set_rbtree' backend. This problem exists since 6.2.

* tag 'nf-23-10-04' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: nf_tables: nft_set_rbtree: fix spurious insertion failure
  netfilter: nf_tables: Deduplicate nft_register_obj audit logs
  selftests: netfilter: Extend nft_audit.sh
  selftests: netfilter: test for sctp collision processing in nf_conntrack
  netfilter: handle the connecting collision properly in nf_conntrack_proto_sctp
  netfilter: nft_payload: rebuild vlan header on h_proto access
====================

Link: https://lore.kernel.org/r/20231004141405.28749-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Oct 4, 2023
2 parents 513dbc1 + 0873882 commit c56e67f
Show file tree
Hide file tree
Showing 9 changed files with 395 additions and 62 deletions.
1 change: 1 addition & 0 deletions include/linux/netfilter/nf_conntrack_sctp.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ struct ip_ct_sctp {
enum sctp_conntrack state;

__be32 vtag[IP_CT_DIR_MAX];
u8 init[IP_CT_DIR_MAX];
u8 last_dir;
u8 flags;
};
Expand Down
43 changes: 33 additions & 10 deletions net/netfilter/nf_conntrack_proto_sctp.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA},
/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/
/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */
/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */
/* cookie_ack */ {sCL, sCL, sCW, sES, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */
/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL},
/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
Expand All @@ -126,7 +126,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV},
/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV},
/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV},
/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */
/* cookie_echo */ {sIV, sCL, sCE, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */
/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV},
/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV},
/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
Expand Down Expand Up @@ -412,6 +412,9 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
/* (D) vtag must be same as init_vtag as found in INIT_ACK */
if (sh->vtag != ct->proto.sctp.vtag[dir])
goto out_unlock;
} else if (sch->type == SCTP_CID_COOKIE_ACK) {
ct->proto.sctp.init[dir] = 0;
ct->proto.sctp.init[!dir] = 0;
} else if (sch->type == SCTP_CID_HEARTBEAT) {
if (ct->proto.sctp.vtag[dir] == 0) {
pr_debug("Setting %d vtag %x for dir %d\n", sch->type, sh->vtag, dir);
Expand Down Expand Up @@ -461,16 +464,18 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
}

/* If it is an INIT or an INIT ACK note down the vtag */
if (sch->type == SCTP_CID_INIT ||
sch->type == SCTP_CID_INIT_ACK) {
struct sctp_inithdr _inithdr, *ih;
if (sch->type == SCTP_CID_INIT) {
struct sctp_inithdr _ih, *ih;

ih = skb_header_pointer(skb, offset + sizeof(_sch),
sizeof(_inithdr), &_inithdr);
if (ih == NULL)
ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih);
if (!ih)
goto out_unlock;
pr_debug("Setting vtag %x for dir %d\n",
ih->init_tag, !dir);

if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir])
ct->proto.sctp.init[!dir] = 0;
ct->proto.sctp.init[dir] = 1;

pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir);
ct->proto.sctp.vtag[!dir] = ih->init_tag;

/* don't renew timeout on init retransmit so
Expand All @@ -481,6 +486,24 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
old_state == SCTP_CONNTRACK_CLOSED &&
nf_ct_is_confirmed(ct))
ignore = true;
} else if (sch->type == SCTP_CID_INIT_ACK) {
struct sctp_inithdr _ih, *ih;
__be32 vtag;

ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih);
if (!ih)
goto out_unlock;

vtag = ct->proto.sctp.vtag[!dir];
if (!ct->proto.sctp.init[!dir] && vtag && vtag != ih->init_tag)
goto out_unlock;
/* collision */
if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] &&
vtag != ih->init_tag)
goto out_unlock;

pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir);
ct->proto.sctp.vtag[!dir] = ih->init_tag;
}

ct->proto.sctp.state = new_state;
Expand Down
44 changes: 28 additions & 16 deletions net/netfilter/nf_tables_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -7871,24 +7871,14 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
return nft_delobj(&ctx, obj);
}

void nft_obj_notify(struct net *net, const struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq, int event,
u16 flags, int family, int report, gfp_t gfp)
static void
__nft_obj_notify(struct net *net, const struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq, int event,
u16 flags, int family, int report, gfp_t gfp)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct sk_buff *skb;
int err;
char *buf = kasprintf(gfp, "%s:%u",
table->name, nft_net->base_seq);

audit_log_nfcfg(buf,
family,
obj->handle,
event == NFT_MSG_NEWOBJ ?
AUDIT_NFT_OP_OBJ_REGISTER :
AUDIT_NFT_OP_OBJ_UNREGISTER,
gfp);
kfree(buf);

if (!report &&
!nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
Expand All @@ -7911,13 +7901,35 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
err:
nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
}

void nft_obj_notify(struct net *net, const struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq, int event,
u16 flags, int family, int report, gfp_t gfp)
{
struct nftables_pernet *nft_net = nft_pernet(net);
char *buf = kasprintf(gfp, "%s:%u",
table->name, nft_net->base_seq);

audit_log_nfcfg(buf,
family,
obj->handle,
event == NFT_MSG_NEWOBJ ?
AUDIT_NFT_OP_OBJ_REGISTER :
AUDIT_NFT_OP_OBJ_UNREGISTER,
gfp);
kfree(buf);

__nft_obj_notify(net, table, obj, portid, seq, event,
flags, family, report, gfp);
}
EXPORT_SYMBOL_GPL(nft_obj_notify);

static void nf_tables_obj_notify(const struct nft_ctx *ctx,
struct nft_object *obj, int event)
{
nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
ctx->flags, ctx->family, ctx->report, GFP_KERNEL);
__nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid,
ctx->seq, event, ctx->flags, ctx->family,
ctx->report, GFP_KERNEL);
}

/*
Expand Down
13 changes: 12 additions & 1 deletion net/netfilter/nft_payload.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,17 @@ int nft_payload_inner_offset(const struct nft_pktinfo *pkt)
return pkt->inneroff;
}

static bool nft_payload_need_vlan_copy(const struct nft_payload *priv)
{
unsigned int len = priv->offset + priv->len;

/* data past ether src/dst requested, copy needed */
if (len > offsetof(struct ethhdr, h_proto))
return true;

return false;
}

void nft_payload_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
Expand All @@ -172,7 +183,7 @@ void nft_payload_eval(const struct nft_expr *expr,
goto err;

if (skb_vlan_tag_present(skb) &&
priv->offset >= offsetof(struct ethhdr, h_proto)) {
nft_payload_need_vlan_copy(priv)) {
if (!nft_payload_copy_vlan(dest, skb,
priv->offset, priv->len))
goto err;
Expand Down
46 changes: 29 additions & 17 deletions net/netfilter/nft_set_rbtree.c
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,9 @@ static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
rb_erase(&rbe->node, &priv->root);
}

static int nft_rbtree_gc_elem(const struct nft_set *__set,
struct nft_rbtree *priv,
struct nft_rbtree_elem *rbe,
u8 genmask)
static const struct nft_rbtree_elem *
nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
struct nft_rbtree_elem *rbe, u8 genmask)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
Expand All @@ -246,7 +245,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,

gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
if (!gc)
return -ENOMEM;
return ERR_PTR(-ENOMEM);

/* search for end interval coming before this element.
* end intervals don't carry a timeout extension, they
Expand All @@ -261,6 +260,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
prev = rb_prev(prev);
}

rbe_prev = NULL;
if (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
nft_rbtree_gc_remove(net, set, priv, rbe_prev);
Expand All @@ -272,21 +272,21 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
*/
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (WARN_ON_ONCE(!gc))
return -ENOMEM;
return ERR_PTR(-ENOMEM);

nft_trans_gc_elem_add(gc, rbe_prev);
}

nft_rbtree_gc_remove(net, set, priv, rbe);
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (WARN_ON_ONCE(!gc))
return -ENOMEM;
return ERR_PTR(-ENOMEM);

nft_trans_gc_elem_add(gc, rbe);

nft_trans_gc_queue_sync_done(gc);

return 0;
return rbe_prev;
}

static bool nft_rbtree_update_first(const struct nft_set *set,
Expand Down Expand Up @@ -314,7 +314,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
int d, err;
int d;

/* Descend the tree to search for an existing element greater than the
* key value to insert that is greater than the new element. This is the
Expand Down Expand Up @@ -363,9 +363,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
*/
if (nft_set_elem_expired(&rbe->ext) &&
nft_set_elem_active(&rbe->ext, cur_genmask)) {
err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
if (err < 0)
return err;
const struct nft_rbtree_elem *removed_end;

removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask);
if (IS_ERR(removed_end))
return PTR_ERR(removed_end);

if (removed_end == rbe_le || removed_end == rbe_ge)
return -EAGAIN;

continue;
}
Expand Down Expand Up @@ -486,11 +491,18 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *rbe = elem->priv;
int err;

write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
err = __nft_rbtree_insert(net, set, rbe, ext);
write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
do {
if (fatal_signal_pending(current))
return -EINTR;

cond_resched();

write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
err = __nft_rbtree_insert(net, set, rbe, ext);
write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
} while (err == -EAGAIN);

return err;
}
Expand Down
5 changes: 3 additions & 2 deletions tools/testing/selftests/netfilter/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh
conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \
conntrack_sctp_collision.sh

HOSTPKG_CONFIG := pkg-config

CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)

TEST_GEN_FILES = nf-queue connect_close audit_logread
TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision

include ../lib.mk
89 changes: 89 additions & 0 deletions tools/testing/selftests/netfilter/conntrack_sctp_collision.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Testing For SCTP COLLISION SCENARIO as Below:
#
# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359]
# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187]
# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359]
# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO]
# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK]
# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970]
#
# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS

CLIENT_NS=$(mktemp -u client-XXXXXXXX)
CLIENT_IP="198.51.200.1"
CLIENT_PORT=1234

SERVER_NS=$(mktemp -u server-XXXXXXXX)
SERVER_IP="198.51.100.1"
SERVER_PORT=1234

ROUTER_NS=$(mktemp -u router-XXXXXXXX)
CLIENT_GW="198.51.200.2"
SERVER_GW="198.51.100.2"

# setup the topo
setup() {
ip net add $CLIENT_NS
ip net add $SERVER_NS
ip net add $ROUTER_NS
ip -n $SERVER_NS link add link0 type veth peer name link1 netns $ROUTER_NS
ip -n $CLIENT_NS link add link3 type veth peer name link2 netns $ROUTER_NS

ip -n $SERVER_NS link set link0 up
ip -n $SERVER_NS addr add $SERVER_IP/24 dev link0
ip -n $SERVER_NS route add $CLIENT_IP dev link0 via $SERVER_GW

ip -n $ROUTER_NS link set link1 up
ip -n $ROUTER_NS link set link2 up
ip -n $ROUTER_NS addr add $SERVER_GW/24 dev link1
ip -n $ROUTER_NS addr add $CLIENT_GW/24 dev link2
ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1

ip -n $CLIENT_NS link set link3 up
ip -n $CLIENT_NS addr add $CLIENT_IP/24 dev link3
ip -n $CLIENT_NS route add $SERVER_IP dev link3 via $CLIENT_GW

# simulate the delay on OVS upcall by setting up a delay for INIT_ACK with
# tc on $SERVER_NS side
tc -n $SERVER_NS qdisc add dev link0 root handle 1: htb
tc -n $SERVER_NS class add dev link0 parent 1: classid 1:1 htb rate 100mbit
tc -n $SERVER_NS filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \
0xff match u8 2 0xff at 32 flowid 1:1
tc -n $SERVER_NS qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms

# simulate the ctstate check on OVS nf_conntrack
ip net exec $ROUTER_NS iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP
ip net exec $ROUTER_NS iptables -A INPUT -p sctp -j DROP

# use a smaller number for assoc's max_retrans to reproduce the issue
modprobe sctp
ip net exec $CLIENT_NS sysctl -wq net.sctp.association_max_retrans=3
}

cleanup() {
ip net exec $CLIENT_NS pkill sctp_collision 2>&1 >/dev/null
ip net exec $SERVER_NS pkill sctp_collision 2>&1 >/dev/null
ip net del "$CLIENT_NS"
ip net del "$SERVER_NS"
ip net del "$ROUTER_NS"
}

do_test() {
ip net exec $SERVER_NS ./sctp_collision server \
$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT &
ip net exec $CLIENT_NS ./sctp_collision client \
$CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT
}

# NOTE: one way to work around the issue is set a smaller hb_interval
# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500

# run the test case
trap cleanup EXIT
setup && \
echo "Test for SCTP Collision in nf_conntrack:" && \
do_test && echo "PASS!"
exit $?
Loading

0 comments on commit c56e67f

Please sign in to comment.