Skip to content

Commit

Permalink
Merge tag 'nf-next-25-01-11' of git://git.kernel.org/pub/scm/linux/ke…
Browse files Browse the repository at this point in the history
…rnel/git/netfilter/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains a small batch of Netfilter/IPVS updates
for net-next:

1) Remove unused genmask parameter in nf_tables_addchain()

2) Speed up reads from /proc/net/ip_vs_conn, from Florian Westphal.

3) Skip empty buckets in hashlimit to avoid atomic operations that results
   in false positive reports by syzbot with lockdep enabled, patch from
   Eric Dumazet.

4) Add conntrack event timestamps available via ctnetlink,
   from Florian Westphal.

netfilter pull request 25-01-11

* tag 'nf-next-25-01-11' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  netfilter: conntrack: add conntrack event timestamp
  netfilter: xt_hashlimit: htable_selective_cleanup() optimization
  ipvs: speed up reads from ip_vs_conn proc file
  netfilter: nf_tables: remove the genmask parameter
====================

Link: https://patch.msgid.link/20250111230800.67349-1-pablo@netfilter.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
  • Loading branch information
Paolo Abeni committed Jan 14, 2025
2 parents a8d0066 + 601731f commit 624d7a8
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 27 deletions.
12 changes: 12 additions & 0 deletions include/net/netfilter/nf_conntrack_ecache.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <asm/local64.h>

enum nf_ct_ecache_state {
NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */
Expand All @@ -20,6 +21,9 @@ enum nf_ct_ecache_state {

struct nf_conntrack_ecache {
unsigned long cache; /* bitops want long */
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
local64_t timestamp; /* event timestamp, in nanoseconds */
#endif
u16 ctmask; /* bitmask of ct events to be delivered */
u16 expmask; /* bitmask of expect events to be delivered */
u32 missed; /* missed events */
Expand Down Expand Up @@ -108,6 +112,14 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
if (e == NULL)
return;

#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
/* renew only if this is the first cached event, so that the
* timestamp reflects the first, not the last, generated event.
*/
if (local64_read(&e->timestamp) && READ_ONCE(e->cache) == 0)
local64_set(&e->timestamp, ktime_get_real_ns());
#endif

set_bit(event, &e->cache);
#endif
}
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/netfilter/nfnetlink_conntrack.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ enum ctattr_type {
CTA_SYNPROXY,
CTA_FILTER,
CTA_STATUS_MASK,
CTA_TIMESTAMP_EVENT,
__CTA_MAX
};
#define CTA_MAX (__CTA_MAX - 1)
Expand Down
50 changes: 28 additions & 22 deletions net/netfilter/ipvs/ip_vs_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -1046,28 +1046,35 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
#ifdef CONFIG_PROC_FS
struct ip_vs_iter_state {
struct seq_net_private p;
struct hlist_head *l;
unsigned int bucket;
unsigned int skip_elems;
};

static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
static void *ip_vs_conn_array(struct ip_vs_iter_state *iter)
{
int idx;
struct ip_vs_conn *cp;
struct ip_vs_iter_state *iter = seq->private;

for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
for (idx = iter->bucket; idx < ip_vs_conn_tab_size; idx++) {
unsigned int skip = 0;

hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
/* __ip_vs_conn_get() is not needed by
* ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show
*/
if (pos-- == 0) {
iter->l = &ip_vs_conn_tab[idx];
if (skip >= iter->skip_elems) {
iter->bucket = idx;
return cp;
}

++skip;
}

iter->skip_elems = 0;
cond_resched_rcu();
}

iter->bucket = idx;
return NULL;
}

Expand All @@ -1076,38 +1083,37 @@ static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
{
struct ip_vs_iter_state *iter = seq->private;

iter->l = NULL;
rcu_read_lock();
return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
if (*pos == 0) {
iter->skip_elems = 0;
iter->bucket = 0;
return SEQ_START_TOKEN;
}

return ip_vs_conn_array(iter);
}

static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct ip_vs_conn *cp = v;
struct ip_vs_iter_state *iter = seq->private;
struct hlist_node *e;
struct hlist_head *l = iter->l;
int idx;

++*pos;
if (v == SEQ_START_TOKEN)
return ip_vs_conn_array(seq, 0);
return ip_vs_conn_array(iter);

/* more on same hash chain? */
e = rcu_dereference(hlist_next_rcu(&cp->c_list));
if (e)
if (e) {
iter->skip_elems++;
return hlist_entry(e, struct ip_vs_conn, c_list);

idx = l - ip_vs_conn_tab;
while (++idx < ip_vs_conn_tab_size) {
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
iter->l = &ip_vs_conn_tab[idx];
return cp;
}
cond_resched_rcu();
}
iter->l = NULL;
return NULL;

iter->skip_elems = 0;
iter->bucket++;

return ip_vs_conn_array(iter);
}

static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
Expand Down
23 changes: 23 additions & 0 deletions net/netfilter/nf_conntrack_ecache.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,14 @@ static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
return ret;
}

static void nf_ct_ecache_tstamp_refresh(struct nf_conntrack_ecache *e)
{
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
if (local64_read(&e->timestamp))
local64_set(&e->timestamp, ktime_get_real_ns());
#endif
}

int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
u32 portid, int report)
{
Expand All @@ -186,6 +194,8 @@ int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
/* This is a resent of a destroy event? If so, skip missed */
missed = e->portid ? 0 : e->missed;

nf_ct_ecache_tstamp_refresh(e);

ret = __nf_conntrack_eventmask_report(e, events, missed, &item);
if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) {
/* This is a destroy event that has been triggered by a process,
Expand Down Expand Up @@ -297,6 +307,18 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
}
}

static void nf_ct_ecache_tstamp_new(const struct nf_conn *ct, struct nf_conntrack_ecache *e)
{
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
u64 ts = 0;

if (nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP))
ts = ktime_get_real_ns();

local64_set(&e->timestamp, ts);
#endif
}

bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
{
struct net *net = nf_ct_net(ct);
Expand Down Expand Up @@ -326,6 +348,7 @@ bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp

e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp);
if (e) {
nf_ct_ecache_tstamp_new(ct, e);
e->ctmask = ctmask;
e->expmask = expmask;
}
Expand Down
25 changes: 25 additions & 0 deletions net/netfilter/nf_conntrack_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,23 @@ static int ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct)
#endif

#ifdef CONFIG_NF_CONNTRACK_EVENTS
static int
ctnetlink_dump_event_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
const struct nf_conntrack_ecache *e = nf_ct_ecache_find(ct);

if (e) {
u64 ts = local64_read(&e->timestamp);

if (ts)
return nla_put_be64(skb, CTA_TIMESTAMP_EVENT,
cpu_to_be64(ts), CTA_TIMESTAMP_PAD);
}
#endif
return 0;
}

static inline int ctnetlink_label_size(const struct nf_conn *ct)
{
struct nf_conn_labels *labels = nf_ct_labels_find(ct);
Expand Down Expand Up @@ -717,6 +734,9 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
#endif
+ ctnetlink_proto_size(ct)
+ ctnetlink_label_size(ct)
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ nla_total_size(sizeof(u64)) /* CTA_TIMESTAMP_EVENT */
#endif
;
}

Expand Down Expand Up @@ -838,6 +858,10 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK)))
goto nla_put_failure;
#endif

if (ctnetlink_dump_event_timestamp(skb, ct))
goto nla_put_failure;

nlmsg_end(skb, nlh);
err = nfnetlink_send(skb, net, item->portid, group, item->report,
GFP_ATOMIC);
Expand Down Expand Up @@ -1557,6 +1581,7 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
.len = NF_CT_LABELS_MAX_SIZE },
[CTA_FILTER] = { .type = NLA_NESTED },
[CTA_STATUS_MASK] = { .type = NLA_U32 },
[CTA_TIMESTAMP_EVENT] = { .type = NLA_REJECT },
};

static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
Expand Down
7 changes: 3 additions & 4 deletions net/netfilter/nf_tables_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -2598,9 +2598,8 @@ int nft_chain_add(struct nft_table *table, struct nft_chain *chain)

static u64 chain_id;

static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
u8 policy, u32 flags,
struct netlink_ext_ack *extack)
static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 policy,
u32 flags, struct netlink_ext_ack *extack)
{
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
Expand Down Expand Up @@ -3038,7 +3037,7 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
extack);
}

return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack);
return nf_tables_addchain(&ctx, family, policy, flags, extack);
}

static int nft_delchain_hook(struct nft_ctx *ctx,
Expand Down
6 changes: 5 additions & 1 deletion net/netfilter/xt_hashlimit.c
Original file line number Diff line number Diff line change
Expand Up @@ -363,11 +363,15 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, bool select
unsigned int i;

for (i = 0; i < ht->cfg.size; i++) {
struct hlist_head *head = &ht->hash[i];
struct dsthash_ent *dh;
struct hlist_node *n;

if (hlist_empty(head))
continue;

spin_lock_bh(&ht->lock);
hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) {
hlist_for_each_entry_safe(dh, n, head, node) {
if (time_after_eq(jiffies, dh->expires) || select_all)
dsthash_free(ht, dh);
}
Expand Down

0 comments on commit 624d7a8

Please sign in to comment.