From 5ebb335dcbe63470c88c4f80f2d571089543b638 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 21 Mar 2015 15:19:15 +0000 Subject: [PATCH 01/15] netfilter: nf_tables: move struct net pointer to base chain The network namespace is only needed for base chains to get at the gencursor. Also convert to possible_net_t. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++-- net/netfilter/nf_tables_api.c | 2 +- net/netfilter/nf_tables_core.c | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d756af559977d..ace67a549b303 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -449,7 +449,6 @@ enum nft_chain_flags { * * @rules: list of rules in the chain * @list: used internally - * @net: net namespace that this chain belongs to * @table: table that this chain belongs to * @handle: chain handle * @use: number of jump references to this chain @@ -460,7 +459,6 @@ enum nft_chain_flags { struct nft_chain { struct list_head rules; struct list_head list; - struct net *net; struct nft_table *table; u64 handle; u32 use; @@ -512,6 +510,7 @@ struct nft_stats { * struct nft_base_chain - nf_tables base chain * * @ops: netfilter hook ops + * @pnet: net namespace that this chain belongs to * @type: chain type * @policy: default policy * @stats: per-cpu chain stats @@ -519,6 +518,7 @@ struct nft_stats { */ struct nft_base_chain { struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; + possible_net_t pnet; const struct nf_chain_type *type; u8 policy; struct nft_stats __percpu *stats; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 363a39a6c2864..0b969b66cb775 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1354,6 +1354,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, rcu_assign_pointer(basechain->stats, stats); } + write_pnet(&basechain->pnet, net); basechain->type = type; chain = &basechain->chain; @@ -1381,7 +1382,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); - chain->net = net; chain->table = table; nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 77165bf023f36..4c921a302cfd4 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -112,6 +112,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { const struct nft_chain *chain = ops->priv, *basechain = chain; + const struct net *net = read_pnet(&nft_base_chain(basechain)->pnet); const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_data data[NFT_REG_MAX + 1]; @@ -123,7 +124,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) * Cache cursor to avoid problems in case that the cursor is updated * while traversing the ruleset. */ - unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor); + unsigned int gencursor = ACCESS_ONCE(net->nft.gencursor); do_chain: rulenum = 0; From a81b2ce8508b9c57c2eae0b0ab7fc8a5e849a4b4 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 23 Mar 2015 11:50:10 -0700 Subject: [PATCH 02/15] netfilter: Use LOGLEVEL_ defines Use the #defines where appropriate. Miscellanea: Add explicit #include where it was not previously used so that these #defines are a bit more explicitly defined instead of indirectly included via: module.h->moduleparam.h->kernel.h Signed-off-by: Joe Perches Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_log_arp.c | 4 +++- net/ipv4/netfilter/nf_log_ipv4.c | 4 +++- net/ipv6/netfilter/ip6_tables.c | 5 ++++- net/ipv6/netfilter/nf_log_ipv6.c | 4 +++- net/netfilter/nf_tables_core.c | 3 ++- net/netfilter/nft_log.c | 2 +- 6 files changed, 16 insertions(+), 6 deletions(-) diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index d059182c1466f..e7ad950cf9ef9 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -10,8 +10,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -27,7 +29,7 @@ static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 5, + .level = LOGLEVEL_NOTICE, .logflags = NF_LOG_MASK, }, }, diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 75101980eeee1..076aadda04737 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -5,8 +5,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -26,7 +28,7 @@ static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 5, + .level = LOGLEVEL_NOTICE, .logflags = NF_LOG_MASK, }, }, diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index bb00c6f2a8855..83f59dc3cccc1 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -9,7 +9,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include #include #include #include @@ -234,7 +237,7 @@ static struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 4, + .level = LOGLEVEL_WARNING, .logflags = NF_LOG_MASK, }, }, diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index ddf07e6f59d7d..8dd869642f45a 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -5,8 +5,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -27,7 +29,7 @@ static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 5, + .level = LOGLEVEL_NOTICE, .logflags = NF_LOG_MASK, }, }, diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 4c921a302cfd4..763a9d87296d6 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -8,6 +8,7 @@ * Development of this code funded by Astaro AG (http://www.astaro.com/) */ +#include #include #include #include @@ -37,7 +38,7 @@ static struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 4, + .level = LOGLEVEL_WARNING, .logflags = NF_LOG_MASK, }, }, diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index bde05f28cf147..e18af9db2f04e 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -78,7 +78,7 @@ static int nft_log_init(const struct nft_ctx *ctx, li->u.log.level = ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL])); } else { - li->u.log.level = 4; + li->u.log.level = LOGLEVEL_WARNING; } if (tb[NFTA_LOG_FLAGS] != NULL) { li->u.log.logflags = From fce1528ef619bf55bf5e5bca8acaa8a37f0d4202 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 Mar 2015 10:55:38 +0100 Subject: [PATCH 03/15] netfilter: nf_tables: restore nf_log_trace() in nf_tables_core.c As described by 4017a7e ("netfilter: restore rule tracing via nfnetlink_log"), this accidentally slipped through during conflict resolution in d5c1d8c. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 763a9d87296d6..4429008fe99db 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -50,10 +50,10 @@ static void __nft_trace_packet(const struct nft_pktinfo *pkt, { struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, - pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", - chain->table->name, chain->name, comments[type], - rulenum); + nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, + pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", + chain->table->name, chain->name, comments[type], + rulenum); } static inline void nft_trace_packet(const struct nft_pktinfo *pkt, From d95797252a9d967d462d9581fb72546d6a92e14b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 08:09:55 +0000 Subject: [PATCH 04/15] netfilter: nf_tables: nft_queue does not depend on x_tables Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 971cd7526f4bb..f70e34a68f702 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -522,7 +522,6 @@ config NFT_NAT typical Network Address Translation (NAT) packet transformations. config NFT_QUEUE - depends on NETFILTER_XTABLES depends on NETFILTER_NETLINK_QUEUE tristate "Netfilter nf_tables queue module" help From 14d14a5d2957a4a047b4dbabb6d5ef28a7a70b33 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 08:09:56 +0000 Subject: [PATCH 05/15] netfilter: nft_meta: use raw_smp_processor_id() Using smp_processor_id() triggers warnings with PREEMPT_RCU. There is no point in disabling preemption since we only collect the numeric value, so use raw_smp_processor_id() instead. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index abe68119a76c5..5197874372ec4 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -153,7 +153,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, } break; case NFT_META_CPU: - dest->data[0] = smp_processor_id(); + dest->data[0] = raw_smp_processor_id(); break; case NFT_META_IIFGROUP: if (in == NULL) From 49f7b33e63fec9d16e7ee62ba8f8ab4159cbdc26 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 13:07:45 +0000 Subject: [PATCH 06/15] rhashtable: provide len to obj_hashfn nftables sets will be converted to use so called setextensions, moving the key to a non-fixed position. To hash it, the obj_hashfn must be used, however it so far doesn't receive the length parameter. Pass the key length to obj_hashfn() and convert existing users. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/rhashtable.h | 6 ++++-- lib/rhashtable.c | 2 +- net/netlink/af_netlink.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 99f2e49a8a078..e23d242d1230f 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -88,7 +88,7 @@ struct rhashtable_compare_arg { }; typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); -typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed); +typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, const void *obj); @@ -242,7 +242,9 @@ static inline unsigned int rht_head_hashfn( const char *ptr = rht_obj(ht, he); return likely(params.obj_hashfn) ? - rht_bucket_index(tbl, params.obj_hashfn(ptr, tbl->hash_rnd)) : + rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?: + ht->p.key_len, + tbl->hash_rnd)) : rht_key_hashfn(ht, tbl, ptr + params.key_offset, params); } diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 4b7b7e672b934..4898442b837fb 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -691,7 +691,7 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) * struct rhash_head node; * }; * - * u32 my_hash_fn(const void *data, u32 seed) + * u32 my_hash_fn(const void *data, u32 len, u32 seed) * { * struct test_obj *obj = data; * diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4caa809dbbe03..19909d0786a2e 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -3127,7 +3127,7 @@ static struct pernet_operations __net_initdata netlink_net_ops = { .exit = netlink_net_exit, }; -static inline u32 netlink_hash(const void *data, u32 seed) +static inline u32 netlink_hash(const void *data, u32 len, u32 seed) { const struct netlink_sock *nlk = data; struct netlink_compare_arg arg; From 745f5450d5190e8bd02301b8d42f06999af3f5f8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 13:07:46 +0000 Subject: [PATCH 07/15] netfilter: nft_hash: restore struct nft_hash Following patches will add new private members, restore struct nft_hash as preparation. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_hash.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index f9ce2195fd63e..a517f84e9a21e 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -23,6 +23,10 @@ /* We target a hash table size of 4, element hint is 75% of final size */ #define NFT_HASH_ELEMENT_HINT 3 +struct nft_hash { + struct rhashtable ht; +}; + struct nft_hash_elem { struct rhash_head node; struct nft_data key; @@ -35,10 +39,10 @@ static bool nft_hash_lookup(const struct nft_set *set, const struct nft_data *key, struct nft_data *data) { - struct rhashtable *priv = nft_set_priv(set); + struct nft_hash *priv = nft_set_priv(set); const struct nft_hash_elem *he; - he = rhashtable_lookup_fast(priv, key, nft_hash_params); + he = rhashtable_lookup_fast(&priv->ht, key, nft_hash_params); if (he && set->flags & NFT_SET_MAP) nft_data_copy(data, he->data); @@ -48,7 +52,7 @@ static bool nft_hash_lookup(const struct nft_set *set, static int nft_hash_insert(const struct nft_set *set, const struct nft_set_elem *elem) { - struct rhashtable *priv = nft_set_priv(set); + struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; unsigned int size; int err; @@ -68,7 +72,7 @@ static int nft_hash_insert(const struct nft_set *set, if (set->flags & NFT_SET_MAP) nft_data_copy(he->data, &elem->data); - err = rhashtable_insert_fast(priv, &he->node, nft_hash_params); + err = rhashtable_insert_fast(&priv->ht, &he->node, nft_hash_params); if (err) kfree(he); @@ -87,19 +91,19 @@ static void nft_hash_elem_destroy(const struct nft_set *set, static void nft_hash_remove(const struct nft_set *set, const struct nft_set_elem *elem) { - struct rhashtable *priv = nft_set_priv(set); + struct nft_hash *priv = nft_set_priv(set); - rhashtable_remove_fast(priv, elem->cookie, nft_hash_params); + rhashtable_remove_fast(&priv->ht, elem->cookie, nft_hash_params); synchronize_rcu(); kfree(elem->cookie); } static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) { - struct rhashtable *priv = nft_set_priv(set); + struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; - he = rhashtable_lookup_fast(priv, &elem->key, nft_hash_params); + he = rhashtable_lookup_fast(&priv->ht, &elem->key, nft_hash_params); if (!he) return -ENOENT; @@ -114,13 +118,13 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_iter *iter) { - struct rhashtable *priv = nft_set_priv(set); + struct nft_hash *priv = nft_set_priv(set); const struct nft_hash_elem *he; struct rhashtable_iter hti; struct nft_set_elem elem; int err; - err = rhashtable_walk_init(priv, &hti); + err = rhashtable_walk_init(&priv->ht, &hti); iter->err = err; if (err) return; @@ -165,7 +169,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) { - return sizeof(struct rhashtable); + return sizeof(struct nft_hash); } static const struct rhashtable_params nft_hash_params = { @@ -179,13 +183,13 @@ static int nft_hash_init(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const tb[]) { - struct rhashtable *priv = nft_set_priv(set); + struct nft_hash *priv = nft_set_priv(set); struct rhashtable_params params = nft_hash_params; params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT; params.key_len = set->klen; - return rhashtable_init(priv, ¶ms); + return rhashtable_init(&priv->ht, ¶ms); } static void nft_free_element(void *ptr, void *arg) @@ -195,8 +199,9 @@ static void nft_free_element(void *ptr, void *arg) static void nft_hash_destroy(const struct nft_set *set) { - rhashtable_free_and_destroy(nft_set_priv(set), nft_free_element, - (void *)set); + struct nft_hash *priv = nft_set_priv(set); + + rhashtable_free_and_destroy(&priv->ht, nft_free_element, (void *)set); } static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, @@ -209,7 +214,7 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]); if (desc->size) { - est->size = sizeof(struct rhashtable) + + est->size = sizeof(struct nft_hash) + roundup_pow_of_two(desc->size * 4 / 3) * sizeof(struct nft_hash_elem *) + desc->size * esize; From 45d84751fb310fe0063cf005ffd6593b4c2321a8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 13:07:47 +0000 Subject: [PATCH 08/15] netfilter: nft_hash: indent rhashtable parameters Improve readability by indenting the parameter initialization. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_hash.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index a517f84e9a21e..e35f0b2d8e658 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -173,10 +173,10 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) } static const struct rhashtable_params nft_hash_params = { - .head_offset = offsetof(struct nft_hash_elem, node), - .key_offset = offsetof(struct nft_hash_elem, key), - .hashfn = jhash, - .automatic_shrinking = true, + .head_offset = offsetof(struct nft_hash_elem, node), + .key_offset = offsetof(struct nft_hash_elem, key), + .hashfn = jhash, + .automatic_shrinking = true, }; static int nft_hash_init(const struct nft_set *set, @@ -187,7 +187,7 @@ static int nft_hash_init(const struct nft_set *set, struct rhashtable_params params = nft_hash_params; params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT; - params.key_len = set->klen; + params.key_len = set->klen; return rhashtable_init(&priv->ht, ¶ms); } From bfd6e327e118d2fe443047829047862b49012457 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 13:07:48 +0000 Subject: [PATCH 09/15] netfilter: nft_hash: convert to use rhashtable callbacks A following patch will convert sets to use so called set extensions, where the key is not located in a fixed position anymore. This will require rhashtable hashing and comparison callbacks to be used. As preparation, convert nft_hash to use these callbacks without any functional changes. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_hash.c | 54 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index e35f0b2d8e658..dc96a7e94f803 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -33,16 +33,50 @@ struct nft_hash_elem { struct nft_data data[]; }; +struct nft_hash_cmp_arg { + const struct nft_set *set; + const struct nft_data *key; +}; + static const struct rhashtable_params nft_hash_params; +static inline u32 nft_hash_key(const void *data, u32 len, u32 seed) +{ + const struct nft_hash_cmp_arg *arg = data; + + return jhash(arg->key, len, seed); +} + +static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct nft_hash_elem *he = data; + + return jhash(&he->key, len, seed); +} + +static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct nft_hash_cmp_arg *x = arg->key; + const struct nft_hash_elem *he = ptr; + + if (nft_data_cmp(&he->key, x->key, x->set->klen)) + return 1; + return 0; +} + static bool nft_hash_lookup(const struct nft_set *set, const struct nft_data *key, struct nft_data *data) { struct nft_hash *priv = nft_set_priv(set); const struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .set = set, + .key = key, + }; - he = rhashtable_lookup_fast(&priv->ht, key, nft_hash_params); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); if (he && set->flags & NFT_SET_MAP) nft_data_copy(data, he->data); @@ -54,6 +88,10 @@ static int nft_hash_insert(const struct nft_set *set, { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .set = set, + .key = &elem->key, + }; unsigned int size; int err; @@ -72,7 +110,8 @@ static int nft_hash_insert(const struct nft_set *set, if (set->flags & NFT_SET_MAP) nft_data_copy(he->data, &elem->data); - err = rhashtable_insert_fast(&priv->ht, &he->node, nft_hash_params); + err = rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params); if (err) kfree(he); @@ -102,8 +141,12 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .set = set, + .key = &elem->key, + }; - he = rhashtable_lookup_fast(&priv->ht, &elem->key, nft_hash_params); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); if (!he) return -ENOENT; @@ -174,8 +217,9 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) static const struct rhashtable_params nft_hash_params = { .head_offset = offsetof(struct nft_hash_elem, node), - .key_offset = offsetof(struct nft_hash_elem, key), - .hashfn = jhash, + .hashfn = nft_hash_key, + .obj_hashfn = nft_hash_obj, + .obj_cmpfn = nft_hash_cmp, .automatic_shrinking = true, }; From 3ac4c07a24007f0f45d2082b745508768a8e21cf Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 13:07:49 +0000 Subject: [PATCH 10/15] netfilter: nf_tables: add set extensions Add simple set extension infrastructure for maintaining variable sized and optional per element data. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 105 ++++++++++++++++++++++++++++++ net/netfilter/nf_tables_api.c | 16 +++++ 2 files changed, 121 insertions(+) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index ace67a549b303..038f8a67ca1fd 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -311,6 +311,111 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); +/** + * enum nft_set_extensions - set extension type IDs + * + * @NFT_SET_EXT_KEY: element key + * @NFT_SET_EXT_DATA: mapping data + * @NFT_SET_EXT_FLAGS: element flags + * @NFT_SET_EXT_NUM: number of extension types + */ +enum nft_set_extensions { + NFT_SET_EXT_KEY, + NFT_SET_EXT_DATA, + NFT_SET_EXT_FLAGS, + NFT_SET_EXT_NUM +}; + +/** + * struct nft_set_ext_type - set extension type + * + * @len: fixed part length of the extension + * @align: alignment requirements of the extension + */ +struct nft_set_ext_type { + u8 len; + u8 align; +}; + +extern const struct nft_set_ext_type nft_set_ext_types[]; + +/** + * struct nft_set_ext_tmpl - set extension template + * + * @len: length of extension area + * @offset: offsets of individual extension types + */ +struct nft_set_ext_tmpl { + u16 len; + u8 offset[NFT_SET_EXT_NUM]; +}; + +/** + * struct nft_set_ext - set extensions + * + * @offset: offsets of individual extension types + * @data: beginning of extension data + */ +struct nft_set_ext { + u8 offset[NFT_SET_EXT_NUM]; + char data[0]; +}; + +static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) +{ + memset(tmpl, 0, sizeof(*tmpl)); + tmpl->len = sizeof(struct nft_set_ext); +} + +static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, + unsigned int len) +{ + tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align); + BUG_ON(tmpl->len > U8_MAX); + tmpl->offset[id] = tmpl->len; + tmpl->len += nft_set_ext_types[id].len + len; +} + +static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) +{ + nft_set_ext_add_length(tmpl, id, 0); +} + +static inline void nft_set_ext_init(struct nft_set_ext *ext, + const struct nft_set_ext_tmpl *tmpl) +{ + memcpy(ext->offset, tmpl->offset, sizeof(ext->offset)); +} + +static inline bool __nft_set_ext_exists(const struct nft_set_ext *ext, u8 id) +{ + return !!ext->offset[id]; +} + +static inline bool nft_set_ext_exists(const struct nft_set_ext *ext, u8 id) +{ + return ext && __nft_set_ext_exists(ext, id); +} + +static inline void *nft_set_ext(const struct nft_set_ext *ext, u8 id) +{ + return (void *)ext + ext->offset[id]; +} + +static inline struct nft_data *nft_set_ext_key(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_KEY); +} + +static inline struct nft_data *nft_set_ext_data(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_DATA); +} + +static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_FLAGS); +} /** * struct nft_expr_type - nf_tables expression type diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0b969b66cb775..972c47f6e823b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2827,6 +2827,22 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, nf_tables_set_destroy(ctx, set); } +const struct nft_set_ext_type nft_set_ext_types[] = { + [NFT_SET_EXT_KEY] = { + .len = sizeof(struct nft_data), + .align = __alignof__(struct nft_data), + }, + [NFT_SET_EXT_DATA] = { + .len = sizeof(struct nft_data), + .align = __alignof__(struct nft_data), + }, + [NFT_SET_EXT_FLAGS] = { + .len = sizeof(u8), + .align = __alignof__(u8), + }, +}; +EXPORT_SYMBOL_GPL(nft_set_ext_types); + /* * Set elements */ From fe2811ebeb97a7a76de0b2b35f13600169508393 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 13:07:50 +0000 Subject: [PATCH 11/15] netfilter: nf_tables: convert hash and rbtree to set extensions The set implementations' private struct will only contain the elements needed to maintain the search structure, all other elements are moved to the set extensions. Element allocation and initialization is performed centrally by nf_tables_api instead of by the different set implementations' ->insert() functions. A new "elemsize" member in the set ops specifies the amount of memory to reserve for internal usage. Destruction will also be moved out of the set implementations by a following patch. Except for element allocation, the patch is a simple conversion to using data from the extension area. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 14 +++- net/netfilter/nf_tables_api.c | 119 ++++++++++++++++++++++-------- net/netfilter/nft_hash.c | 56 ++++---------- net/netfilter/nft_rbtree.c | 64 +++++----------- 4 files changed, 132 insertions(+), 121 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 038f8a67ca1fd..ef3457c1cb629 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -140,8 +140,7 @@ struct nft_userdata { * * @cookie: implementation specific element cookie * @key: element key - * @data: element data (maps only) - * @flags: element flags (end of interval) + * @priv: element private data and extensions * * The cookie can be used to store a handle to the element for subsequent * removal. @@ -149,8 +148,7 @@ struct nft_userdata { struct nft_set_elem { void *cookie; struct nft_data key; - struct nft_data data; - u32 flags; + void *priv; }; struct nft_set; @@ -214,6 +212,7 @@ struct nft_set_estimate { * @destroy: destroy private data of set instance * @list: nf_tables_set_ops list node * @owner: module reference + * @elemsize: element private size * @features: features supported by the implementation */ struct nft_set_ops { @@ -241,6 +240,7 @@ struct nft_set_ops { struct list_head list; struct module *owner; + unsigned int elemsize; u32 features; }; @@ -417,6 +417,12 @@ static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext) return nft_set_ext(ext, NFT_SET_EXT_FLAGS); } +static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, + void *elem) +{ + return elem + set->ops->elemsize; +} + /** * struct nft_expr_type - nf_tables expression type * diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 972c47f6e823b..99cb884b985f1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2771,10 +2771,11 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, const struct nft_set_elem *elem) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); enum nft_registers dreg; dreg = nft_type_to_reg(set->dtype); - return nft_validate_data_load(ctx, dreg, &elem->data, + return nft_validate_data_load(ctx, dreg, nft_set_ext_data(ext), set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE); } @@ -2889,6 +2890,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, const struct nft_set *set, const struct nft_set_elem *elem) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -2896,20 +2898,20 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nest == NULL) goto nla_put_failure; - if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE, - set->klen) < 0) + if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext), + NFT_DATA_VALUE, set->klen) < 0) goto nla_put_failure; - if (set->flags & NFT_SET_MAP && - !(elem->flags & NFT_SET_ELEM_INTERVAL_END) && - nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data, + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && + nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext), set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE, set->dlen) < 0) goto nla_put_failure; - if (elem->flags != 0) - if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags))) - goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && + nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, + htonl(*nft_set_ext_flags(ext)))) + goto nla_put_failure; nla_nest_end(skb, nest); return 0; @@ -3130,15 +3132,42 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, return trans; } +static void *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const struct nft_data *key, + const struct nft_data *data, + gfp_t gfp) +{ + struct nft_set_ext *ext; + void *elem; + + elem = kzalloc(set->ops->elemsize + tmpl->len, gfp); + if (elem == NULL) + return NULL; + + ext = nft_set_elem_ext(set, elem); + nft_set_ext_init(ext, tmpl); + + memcpy(nft_set_ext_key(ext), key, set->klen); + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + memcpy(nft_set_ext_data(ext), data, set->dlen); + + return elem; +} + static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_data_desc d1, d2; + struct nft_set_ext_tmpl tmpl; + struct nft_set_ext *ext; struct nft_set_elem elem; struct nft_set_binding *binding; + struct nft_data data; enum nft_registers dreg; struct nft_trans *trans; + u32 flags; int err; if (set->size && set->nelems == set->size) @@ -3152,22 +3181,26 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_KEY] == NULL) return -EINVAL; - elem.flags = 0; + nft_set_ext_prepare(&tmpl); + + flags = 0; if (nla[NFTA_SET_ELEM_FLAGS] != NULL) { - elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); - if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END) + flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); + if (flags & ~NFT_SET_ELEM_INTERVAL_END) return -EINVAL; if (!(set->flags & NFT_SET_INTERVAL) && - elem.flags & NFT_SET_ELEM_INTERVAL_END) + flags & NFT_SET_ELEM_INTERVAL_END) return -EINVAL; + if (flags != 0) + nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); } if (set->flags & NFT_SET_MAP) { if (nla[NFTA_SET_ELEM_DATA] == NULL && - !(elem.flags & NFT_SET_ELEM_INTERVAL_END)) + !(flags & NFT_SET_ELEM_INTERVAL_END)) return -EINVAL; if (nla[NFTA_SET_ELEM_DATA] != NULL && - elem.flags & NFT_SET_ELEM_INTERVAL_END) + flags & NFT_SET_ELEM_INTERVAL_END) return -EINVAL; } else { if (nla[NFTA_SET_ELEM_DATA] != NULL) @@ -3185,8 +3218,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (set->ops->get(set, &elem) == 0) goto err2; + nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY); + if (nla[NFTA_SET_ELEM_DATA] != NULL) { - err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]); + err = nft_data_init(ctx, &data, &d2, nla[NFTA_SET_ELEM_DATA]); if (err < 0) goto err2; @@ -3203,29 +3238,42 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, }; err = nft_validate_data_load(&bind_ctx, dreg, - &elem.data, d2.type); + &data, d2.type); if (err < 0) goto err3; } + + nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA); } + err = -ENOMEM; + elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, GFP_KERNEL); + if (elem.priv == NULL) + goto err3; + + ext = nft_set_elem_ext(set, elem.priv); + if (flags) + *nft_set_ext_flags(ext) = flags; + trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) - goto err3; + goto err4; err = set->ops->insert(set, &elem); if (err < 0) - goto err4; + goto err5; nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; -err4: +err5: kfree(trans); +err4: + kfree(elem.priv); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) - nft_data_uninit(&elem.data, d2.type); + nft_data_uninit(&data, d2.type); err2: nft_data_uninit(&elem.key, d1.type); err1: @@ -3557,6 +3605,7 @@ static int nf_tables_commit(struct sk_buff *skb) struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; struct nft_trans_elem *te; + struct nft_set_ext *ext; /* Bump generation counter, invalidate any dump in progress */ while (++net->nft.base_seq == 0); @@ -3641,14 +3690,16 @@ static int nf_tables_commit(struct sk_buff *skb) break; case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; + ext = nft_set_elem_ext(te->set, te->elem.priv); + nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_DELSETELEM, 0); te->set->ops->get(te->set, &te->elem); nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); - if (te->set->flags & NFT_SET_MAP && - !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_uninit(&te->elem.data, te->set->dtype); + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_uninit(nft_set_ext_data(ext), + te->set->dtype); te->set->ops->remove(te->set, &te->elem); nft_trans_destroy(trans); break; @@ -3691,6 +3742,7 @@ static int nf_tables_abort(struct sk_buff *skb) struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; struct nft_trans_elem *te; + struct nft_set_ext *ext; list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { @@ -3752,11 +3804,13 @@ static int nf_tables_abort(struct sk_buff *skb) case NFT_MSG_NEWSETELEM: nft_trans_elem_set(trans)->nelems--; te = (struct nft_trans_elem *)trans->data; + ext = nft_set_elem_ext(te->set, te->elem.priv); + te->set->ops->get(te->set, &te->elem); nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); - if (te->set->flags & NFT_SET_MAP && - !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_uninit(&te->elem.data, te->set->dtype); + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_uninit(nft_set_ext_data(ext), + te->set->dtype); te->set->ops->remove(te->set, &te->elem); nft_trans_destroy(trans); break; @@ -3836,13 +3890,18 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, const struct nft_set_elem *elem) { - if (elem->flags & NFT_SET_ELEM_INTERVAL_END) + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_data *data; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && + *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) return 0; - switch (elem->data.verdict) { + data = nft_set_ext_data(ext); + switch (data->verdict) { case NFT_JUMP: case NFT_GOTO: - return nf_tables_check_loops(ctx, elem->data.chain); + return nf_tables_check_loops(ctx, data->chain); default: return 0; } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index dc96a7e94f803..15951a823d1da 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -29,8 +29,7 @@ struct nft_hash { struct nft_hash_elem { struct rhash_head node; - struct nft_data key; - struct nft_data data[]; + struct nft_set_ext ext; }; struct nft_hash_cmp_arg { @@ -51,7 +50,7 @@ static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed) { const struct nft_hash_elem *he = data; - return jhash(&he->key, len, seed); + return jhash(nft_set_ext_key(&he->ext), len, seed); } static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, @@ -60,7 +59,7 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, const struct nft_hash_cmp_arg *x = arg->key; const struct nft_hash_elem *he = ptr; - if (nft_data_cmp(&he->key, x->key, x->set->klen)) + if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) return 1; return 0; } @@ -78,7 +77,7 @@ static bool nft_hash_lookup(const struct nft_set *set, he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); if (he && set->flags & NFT_SET_MAP) - nft_data_copy(data, he->data); + nft_data_copy(data, nft_set_ext_data(&he->ext)); return !!he; } @@ -87,43 +86,22 @@ static int nft_hash_insert(const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; + struct nft_hash_elem *he = elem->priv; struct nft_hash_cmp_arg arg = { .set = set, .key = &elem->key, }; - unsigned int size; - int err; - - if (elem->flags != 0) - return -EINVAL; - - size = sizeof(*he); - if (set->flags & NFT_SET_MAP) - size += sizeof(he->data[0]); - - he = kzalloc(size, GFP_KERNEL); - if (he == NULL) - return -ENOMEM; - - nft_data_copy(&he->key, &elem->key); - if (set->flags & NFT_SET_MAP) - nft_data_copy(he->data, &elem->data); - - err = rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params); - if (err) - kfree(he); - return err; + return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params); } static void nft_hash_elem_destroy(const struct nft_set *set, struct nft_hash_elem *he) { - nft_data_uninit(&he->key, NFT_DATA_VALUE); + nft_data_uninit(nft_set_ext_key(&he->ext), NFT_DATA_VALUE); if (set->flags & NFT_SET_MAP) - nft_data_uninit(he->data, set->dtype); + nft_data_uninit(nft_set_ext_data(&he->ext), set->dtype); kfree(he); } @@ -150,10 +128,7 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) if (!he) return -ENOENT; - elem->cookie = he; - elem->flags = 0; - if (set->flags & NFT_SET_MAP) - nft_data_copy(&elem->data, he->data); + elem->priv = he; return 0; } @@ -162,7 +137,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_iter *iter) { struct nft_hash *priv = nft_set_priv(set); - const struct nft_hash_elem *he; + struct nft_hash_elem *he; struct rhashtable_iter hti; struct nft_set_elem elem; int err; @@ -192,10 +167,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, if (iter->count < iter->skip) goto cont; - memcpy(&elem.key, &he->key, sizeof(elem.key)); - if (set->flags & NFT_SET_MAP) - memcpy(&elem.data, he->data, sizeof(elem.data)); - elem.flags = 0; + elem.priv = he; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) @@ -254,9 +226,6 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, unsigned int esize; esize = sizeof(struct nft_hash_elem); - if (features & NFT_SET_MAP) - esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]); - if (desc->size) { est->size = sizeof(struct nft_hash) + roundup_pow_of_two(desc->size * 4 / 3) * @@ -278,6 +247,7 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, static struct nft_set_ops nft_hash_ops __read_mostly = { .privsize = nft_hash_privsize, + .elemsize = offsetof(struct nft_hash_elem, ext), .estimate = nft_hash_estimate, .init = nft_hash_init, .destroy = nft_hash_destroy, diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 2c75361077f7e..ebf6e60df41c2 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -26,9 +26,7 @@ struct nft_rbtree { struct nft_rbtree_elem { struct rb_node node; - u16 flags; - struct nft_data key; - struct nft_data data[]; + struct nft_set_ext ext; }; static bool nft_rbtree_lookup(const struct nft_set *set, @@ -45,7 +43,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set, while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(&rbe->key, key, set->klen); + d = nft_data_cmp(nft_set_ext_key(&rbe->ext), key, set->klen); if (d < 0) { parent = parent->rb_left; interval = rbe; @@ -53,10 +51,12 @@ static bool nft_rbtree_lookup(const struct nft_set *set, parent = parent->rb_right; else { found: - if (rbe->flags & NFT_SET_ELEM_INTERVAL_END) + if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && + *nft_set_ext_flags(&rbe->ext) & + NFT_SET_ELEM_INTERVAL_END) goto out; if (set->flags & NFT_SET_MAP) - nft_data_copy(data, rbe->data); + nft_data_copy(data, nft_set_ext_data(&rbe->ext)); spin_unlock_bh(&nft_rbtree_lock); return true; @@ -75,10 +75,10 @@ static bool nft_rbtree_lookup(const struct nft_set *set, static void nft_rbtree_elem_destroy(const struct nft_set *set, struct nft_rbtree_elem *rbe) { - nft_data_uninit(&rbe->key, NFT_DATA_VALUE); + nft_data_uninit(nft_set_ext_key(&rbe->ext), NFT_DATA_VALUE); if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_uninit(rbe->data, set->dtype); + nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_DATA)) + nft_data_uninit(nft_set_ext_data(&rbe->ext), set->dtype); kfree(rbe); } @@ -96,7 +96,9 @@ static int __nft_rbtree_insert(const struct nft_set *set, while (*p != NULL) { parent = *p; rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(&rbe->key, &new->key, set->klen); + d = nft_data_cmp(nft_set_ext_key(&rbe->ext), + nft_set_ext_key(&new->ext), + set->klen); if (d < 0) p = &parent->rb_left; else if (d > 0) @@ -112,31 +114,13 @@ static int __nft_rbtree_insert(const struct nft_set *set, static int nft_rbtree_insert(const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_rbtree_elem *rbe; - unsigned int size; + struct nft_rbtree_elem *rbe = elem->priv; int err; - size = sizeof(*rbe); - if (set->flags & NFT_SET_MAP && - !(elem->flags & NFT_SET_ELEM_INTERVAL_END)) - size += sizeof(rbe->data[0]); - - rbe = kzalloc(size, GFP_KERNEL); - if (rbe == NULL) - return -ENOMEM; - - rbe->flags = elem->flags; - nft_data_copy(&rbe->key, &elem->key); - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_copy(rbe->data, &elem->data); - spin_lock_bh(&nft_rbtree_lock); err = __nft_rbtree_insert(set, rbe); - if (err < 0) - kfree(rbe); - spin_unlock_bh(&nft_rbtree_lock); + return err; } @@ -162,17 +146,15 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(&rbe->key, &elem->key, set->klen); + d = nft_data_cmp(nft_set_ext_key(&rbe->ext), &elem->key, + set->klen); if (d < 0) parent = parent->rb_left; else if (d > 0) parent = parent->rb_right; else { elem->cookie = rbe; - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_copy(&elem->data, rbe->data); - elem->flags = rbe->flags; + elem->priv = rbe; return 0; } } @@ -184,7 +166,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_set_iter *iter) { const struct nft_rbtree *priv = nft_set_priv(set); - const struct nft_rbtree_elem *rbe; + struct nft_rbtree_elem *rbe; struct nft_set_elem elem; struct rb_node *node; @@ -194,11 +176,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, goto cont; rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_data_copy(&elem.key, &rbe->key); - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_copy(&elem.data, rbe->data); - elem.flags = rbe->flags; + elem.priv = rbe; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) { @@ -245,9 +223,6 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, unsigned int nsize; nsize = sizeof(struct nft_rbtree_elem); - if (features & NFT_SET_MAP) - nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]); - if (desc->size) est->size = sizeof(struct nft_rbtree) + desc->size * nsize; else @@ -260,6 +235,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, static struct nft_set_ops nft_rbtree_ops __read_mostly = { .privsize = nft_rbtree_privsize, + .elemsize = offsetof(struct nft_rbtree_elem, ext), .estimate = nft_rbtree_estimate, .init = nft_rbtree_init, .destroy = nft_rbtree_destroy, From 61edafbb47e9f46fb850035b1f8f062564445704 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 14:08:47 +0000 Subject: [PATCH 12/15] netfilter: nf_tables: consolide set element destruction With the conversion to set extensions, it is now possible to consolidate the different set element destruction functions. The set implementations' ->remove() functions are changed to only take the element out of their internal data structures. Elements will be freed in a batched fashion after the global transaction's completion RCU grace period. This reduces the amount of grace periods required for nft_hash from N to zero additional ones, additionally this guarantees that the set elements' extensions of all implementations can be used under RCU protection. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 34 ++++++++++++++++++------------- net/netfilter/nft_hash.c | 18 ++++------------ net/netfilter/nft_rbtree.c | 14 +------------ 4 files changed, 27 insertions(+), 41 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index ef3457c1cb629..6ac63323afd25 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -423,6 +423,8 @@ static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, return elem + set->ops->elemsize; } +void nft_set_elem_destroy(const struct nft_set *set, void *elem); + /** * struct nft_expr_type - nf_tables expression type * diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 99cb884b985f1..b35512f1934ca 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3155,6 +3155,18 @@ static void *nft_set_elem_init(const struct nft_set *set, return elem; } +void nft_set_elem_destroy(const struct nft_set *set, void *elem) +{ + struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + + nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_uninit(nft_set_ext_data(ext), set->dtype); + + kfree(elem); +} +EXPORT_SYMBOL_GPL(nft_set_elem_destroy); + static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { @@ -3596,6 +3608,10 @@ static void nf_tables_commit_release(struct nft_trans *trans) case NFT_MSG_DELSET: nft_set_destroy(nft_trans_set(trans)); break; + case NFT_MSG_DELSETELEM: + nft_set_elem_destroy(nft_trans_elem_set(trans), + nft_trans_elem(trans).priv); + break; } kfree(trans); } @@ -3605,7 +3621,6 @@ static int nf_tables_commit(struct sk_buff *skb) struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; struct nft_trans_elem *te; - struct nft_set_ext *ext; /* Bump generation counter, invalidate any dump in progress */ while (++net->nft.base_seq == 0); @@ -3690,18 +3705,12 @@ static int nf_tables_commit(struct sk_buff *skb) break; case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; - ext = nft_set_elem_ext(te->set, te->elem.priv); nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_DELSETELEM, 0); te->set->ops->get(te->set, &te->elem); - nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); - if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) - nft_data_uninit(nft_set_ext_data(ext), - te->set->dtype); te->set->ops->remove(te->set, &te->elem); - nft_trans_destroy(trans); break; } } @@ -3733,6 +3742,10 @@ static void nf_tables_abort_release(struct nft_trans *trans) case NFT_MSG_NEWSET: nft_set_destroy(nft_trans_set(trans)); break; + case NFT_MSG_NEWSETELEM: + nft_set_elem_destroy(nft_trans_elem_set(trans), + nft_trans_elem(trans).priv); + break; } kfree(trans); } @@ -3742,7 +3755,6 @@ static int nf_tables_abort(struct sk_buff *skb) struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; struct nft_trans_elem *te; - struct nft_set_ext *ext; list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { @@ -3804,15 +3816,9 @@ static int nf_tables_abort(struct sk_buff *skb) case NFT_MSG_NEWSETELEM: nft_trans_elem_set(trans)->nelems--; te = (struct nft_trans_elem *)trans->data; - ext = nft_set_elem_ext(te->set, te->elem.priv); te->set->ops->get(te->set, &te->elem); - nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); - if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) - nft_data_uninit(nft_set_ext_data(ext), - te->set->dtype); te->set->ops->remove(te->set, &te->elem); - nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: nft_trans_elem_set(trans)->nelems++; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 15951a823d1da..94bf25def37fd 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -96,23 +96,12 @@ static int nft_hash_insert(const struct nft_set *set, nft_hash_params); } -static void nft_hash_elem_destroy(const struct nft_set *set, - struct nft_hash_elem *he) -{ - nft_data_uninit(nft_set_ext_key(&he->ext), NFT_DATA_VALUE); - if (set->flags & NFT_SET_MAP) - nft_data_uninit(nft_set_ext_data(&he->ext), set->dtype); - kfree(he); -} - static void nft_hash_remove(const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); rhashtable_remove_fast(&priv->ht, elem->cookie, nft_hash_params); - synchronize_rcu(); - kfree(elem->cookie); } static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) @@ -208,16 +197,17 @@ static int nft_hash_init(const struct nft_set *set, return rhashtable_init(&priv->ht, ¶ms); } -static void nft_free_element(void *ptr, void *arg) +static void nft_hash_elem_destroy(void *ptr, void *arg) { - nft_hash_elem_destroy((const struct nft_set *)arg, ptr); + nft_set_elem_destroy((const struct nft_set *)arg, ptr); } static void nft_hash_destroy(const struct nft_set *set) { struct nft_hash *priv = nft_set_priv(set); - rhashtable_free_and_destroy(&priv->ht, nft_free_element, (void *)set); + rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy, + (void *)set); } static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index ebf6e60df41c2..332c6afc77e95 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -72,17 +72,6 @@ static bool nft_rbtree_lookup(const struct nft_set *set, return false; } -static void nft_rbtree_elem_destroy(const struct nft_set *set, - struct nft_rbtree_elem *rbe) -{ - nft_data_uninit(nft_set_ext_key(&rbe->ext), NFT_DATA_VALUE); - if (set->flags & NFT_SET_MAP && - nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_DATA)) - nft_data_uninit(nft_set_ext_data(&rbe->ext), set->dtype); - - kfree(rbe); -} - static int __nft_rbtree_insert(const struct nft_set *set, struct nft_rbtree_elem *new) { @@ -133,7 +122,6 @@ static void nft_rbtree_remove(const struct nft_set *set, spin_lock_bh(&nft_rbtree_lock); rb_erase(&rbe->node, &priv->root); spin_unlock_bh(&nft_rbtree_lock); - kfree(rbe); } static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) @@ -213,7 +201,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_rbtree_elem_destroy(set, rbe); + nft_set_elem_destroy(set, rbe); } } From b2832dd6621bf73eb8ad38389a94bd83a5983886 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 14:08:48 +0000 Subject: [PATCH 13/15] netfilter: nf_tables: return set extensions from ->lookup() Return the extension area from the ->lookup() function to allow to consolidate common actions. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 +++- net/netfilter/nft_hash.c | 6 +++--- net/netfilter/nft_lookup.c | 6 +++++- net/netfilter/nft_rbtree.c | 7 +++---- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 6ac63323afd25..f190d26bda7d7 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -200,6 +200,8 @@ struct nft_set_estimate { enum nft_set_class class; }; +struct nft_set_ext; + /** * struct nft_set_ops - nf_tables set operations * @@ -218,7 +220,7 @@ struct nft_set_estimate { struct nft_set_ops { bool (*lookup)(const struct nft_set *set, const struct nft_data *key, - struct nft_data *data); + const struct nft_set_ext **ext); int (*get)(const struct nft_set *set, struct nft_set_elem *elem); int (*insert)(const struct nft_set *set, diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 94bf25def37fd..5bee82195ef56 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -66,7 +66,7 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, static bool nft_hash_lookup(const struct nft_set *set, const struct nft_data *key, - struct nft_data *data) + const struct nft_set_ext **ext) { struct nft_hash *priv = nft_set_priv(set); const struct nft_hash_elem *he; @@ -76,8 +76,8 @@ static bool nft_hash_lookup(const struct nft_set *set, }; he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); - if (he && set->flags & NFT_SET_MAP) - nft_data_copy(data, nft_set_ext_data(&he->ext)); + if (he != NULL) + *ext = &he->ext; return !!he; } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 9615b8b9fb37d..a5f30b8760eab 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -31,9 +31,13 @@ static void nft_lookup_eval(const struct nft_expr *expr, { const struct nft_lookup *priv = nft_expr_priv(expr); const struct nft_set *set = priv->set; + const struct nft_set_ext *ext; - if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg])) + if (set->ops->lookup(set, &data[priv->sreg], &ext)) { + if (set->flags & NFT_SET_MAP) + nft_data_copy(&data[priv->dreg], nft_set_ext_data(ext)); return; + } data[NFT_REG_VERDICT].verdict = NFT_BREAK; } diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 332c6afc77e95..cbba755ebebc4 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -31,7 +31,7 @@ struct nft_rbtree_elem { static bool nft_rbtree_lookup(const struct nft_set *set, const struct nft_data *key, - struct nft_data *data) + const struct nft_set_ext **ext) { const struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; @@ -55,10 +55,9 @@ static bool nft_rbtree_lookup(const struct nft_set *set, *nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) goto out; - if (set->flags & NFT_SET_MAP) - nft_data_copy(data, nft_set_ext_data(&rbe->ext)); - spin_unlock_bh(&nft_rbtree_lock); + + *ext = &rbe->ext; return true; } } From ea4bd995b0f2fc5677ff8085e92a5d2544b9937c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 14:08:49 +0000 Subject: [PATCH 14/15] netfilter: nf_tables: add transaction helper functions Add some helper functions for building the genmask as preparation for set transactions. Also add a little documentation how this stuff actually works. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 28 ++++++++++++++++++++++++++++ net/netfilter/nf_tables_api.c | 17 ++++++----------- net/netfilter/nf_tables_core.c | 6 +----- 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f190d26bda7d7..4c46a325874e0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -720,6 +720,34 @@ void nft_unregister_expr(struct nft_expr_type *); #define MODULE_ALIAS_NFT_SET() \ MODULE_ALIAS("nft-set") +/* + * The gencursor defines two generations, the currently active and the + * next one. Objects contain a bitmask of 2 bits specifying the generations + * they're active in. A set bit means they're inactive in the generation + * represented by that bit. + * + * New objects start out as inactive in the current and active in the + * next generation. When committing the ruleset the bitmask is cleared, + * meaning they're active in all generations. When removing an object, + * it is set inactive in the next generation. After committing the ruleset, + * the objects are removed. + */ +static inline unsigned int nft_gencursor_next(const struct net *net) +{ + return net->nft.gencursor + 1 == 1 ? 1 : 0; +} + +static inline u8 nft_genmask_next(const struct net *net) +{ + return 1 << nft_gencursor_next(net); +} + +static inline u8 nft_genmask_cur(const struct net *net) +{ + /* Use ACCESS_ONCE() to prevent refetching the value for atomicity */ + return 1 << ACCESS_ONCE(net->nft.gencursor); +} + /** * struct nft_trans - nf_tables object update in transaction * diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b35512f1934ca..66fa5e935a55f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -198,36 +198,31 @@ static int nft_delchain(struct nft_ctx *ctx) static inline bool nft_rule_is_active(struct net *net, const struct nft_rule *rule) { - return (rule->genmask & (1 << net->nft.gencursor)) == 0; -} - -static inline int gencursor_next(struct net *net) -{ - return net->nft.gencursor+1 == 1 ? 1 : 0; + return (rule->genmask & nft_genmask_cur(net)) == 0; } static inline int nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) { - return (rule->genmask & (1 << gencursor_next(net))) == 0; + return (rule->genmask & nft_genmask_next(net)) == 0; } static inline void nft_rule_activate_next(struct net *net, struct nft_rule *rule) { /* Now inactive, will be active in the future */ - rule->genmask = (1 << net->nft.gencursor); + rule->genmask = nft_genmask_cur(net); } static inline void nft_rule_deactivate_next(struct net *net, struct nft_rule *rule) { - rule->genmask = (1 << gencursor_next(net)); + rule->genmask = nft_genmask_next(net); } static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) { - rule->genmask &= ~(1 << gencursor_next(net)); + rule->genmask &= ~nft_genmask_next(net); } static int @@ -3626,7 +3621,7 @@ static int nf_tables_commit(struct sk_buff *skb) while (++net->nft.base_seq == 0); /* A new generation has just started */ - net->nft.gencursor = gencursor_next(net); + net->nft.gencursor = nft_gencursor_next(net); /* Make sure all packets have left the previous generation before * purging old rules. diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 4429008fe99db..ef4dfcbaf149f 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -121,11 +121,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; struct nft_stats *stats; int rulenum; - /* - * Cache cursor to avoid problems in case that the cursor is updated - * while traversing the ruleset. - */ - unsigned int gencursor = ACCESS_ONCE(net->nft.gencursor); + unsigned int gencursor = nft_genmask_cur(net); do_chain: rulenum = 0; From cc02e457bb86f7b6ffee3651bab22d104b60effb Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 14:08:50 +0000 Subject: [PATCH 15/15] netfilter: nf_tables: implement set transaction support Set elements are the last object type not supporting transaction support. Implement similar to the existing rule transactions: The global transaction counter keeps track of two generations, current and next. Each element contains a bitmask specifying in which generations it is inactive. New elements start out as inactive in the current generation and active in the next. On commit, the previous next generation becomes the current generation and the element becomes active. The bitmask is then cleared to indicate that the element is active in all future generations. If the transaction is aborted, the element is removed from the set before it becomes active. When removing an element, it gets marked as inactive in the next generation. On commit the next generation becomes active and the therefor the element inactive. It is then taken out of then set and released. On abort, the element is marked as active for the next generation again. Lookups ignore elements not active in the current generation. The current set types (hash/rbtree) both use a field in the extension area to store the generation mask. This (currently) does not require any additional memory since we have some free space in there. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 33 ++++++++++++++++----- net/netfilter/nf_tables_api.c | 33 ++++++++++++--------- net/netfilter/nft_hash.c | 38 +++++++++++++++++------- net/netfilter/nft_rbtree.c | 48 ++++++++++++++++++++++++------- 4 files changed, 112 insertions(+), 40 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4c46a325874e0..b8cd60dcb4e1f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -138,15 +138,10 @@ struct nft_userdata { /** * struct nft_set_elem - generic representation of set elements * - * @cookie: implementation specific element cookie * @key: element key * @priv: element private data and extensions - * - * The cookie can be used to store a handle to the element for subsequent - * removal. */ struct nft_set_elem { - void *cookie; struct nft_data key; void *priv; }; @@ -207,6 +202,8 @@ struct nft_set_ext; * * @lookup: look up an element within the set * @insert: insert new element into set + * @activate: activate new element in the next generation + * @deactivate: deactivate element in the next generation * @remove: remove element from set * @walk: iterate over all set elemeennts * @privsize: function to return size of set private data @@ -221,10 +218,12 @@ struct nft_set_ops { bool (*lookup)(const struct nft_set *set, const struct nft_data *key, const struct nft_set_ext **ext); - int (*get)(const struct nft_set *set, - struct nft_set_elem *elem); int (*insert)(const struct nft_set *set, const struct nft_set_elem *elem); + void (*activate)(const struct nft_set *set, + const struct nft_set_elem *elem); + void * (*deactivate)(const struct nft_set *set, + const struct nft_set_elem *elem); void (*remove)(const struct nft_set *set, const struct nft_set_elem *elem); void (*walk)(const struct nft_ctx *ctx, @@ -261,6 +260,7 @@ void nft_unregister_set(struct nft_set_ops *ops); * @nelems: number of elements * @policy: set parameterization (see enum nft_set_policies) * @ops: set ops + * @pnet: network namespace * @flags: set flags * @klen: key length * @dlen: data length @@ -277,6 +277,7 @@ struct nft_set { u16 policy; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; + possible_net_t pnet; u16 flags; u8 klen; u8 dlen; @@ -355,10 +356,12 @@ struct nft_set_ext_tmpl { /** * struct nft_set_ext - set extensions * + * @genmask: generation mask * @offset: offsets of individual extension types * @data: beginning of extension data */ struct nft_set_ext { + u8 genmask; u8 offset[NFT_SET_EXT_NUM]; char data[0]; }; @@ -748,6 +751,22 @@ static inline u8 nft_genmask_cur(const struct net *net) return 1 << ACCESS_ONCE(net->nft.gencursor); } +/* + * Set element transaction helpers + */ + +static inline bool nft_set_elem_active(const struct nft_set_ext *ext, + u8 genmask) +{ + return !(ext->genmask & genmask); +} + +static inline void nft_set_elem_change_active(const struct nft_set *set, + struct nft_set_ext *ext) +{ + ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet)); +} + /** * struct nft_trans - nf_tables object update in transaction * diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 66fa5e935a55f..5604c2df05d1a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2690,6 +2690,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, goto err2; INIT_LIST_HEAD(&set->bindings); + write_pnet(&set->pnet, net); set->ops = ops; set->ktype = ktype; set->klen = desc.klen; @@ -3221,10 +3222,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (d1.type != NFT_DATA_VALUE || d1.len != set->klen) goto err2; - err = -EEXIST; - if (set->ops->get(set, &elem) == 0) - goto err2; - nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY); if (nla[NFTA_SET_ELEM_DATA] != NULL) { @@ -3266,6 +3263,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (trans == NULL) goto err4; + ext->genmask = nft_genmask_cur(ctx->net); err = set->ops->insert(set, &elem); if (err < 0) goto err5; @@ -3353,19 +3351,24 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) goto err2; - err = set->ops->get(set, &elem); - if (err < 0) - goto err2; - trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); if (trans == NULL) { err = -ENOMEM; goto err2; } + elem.priv = set->ops->deactivate(set, &elem); + if (elem.priv == NULL) { + err = -ENOENT; + goto err3; + } + nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; + +err3: + kfree(trans); err2: nft_data_uninit(&elem.key, desc.type); err1: @@ -3692,9 +3695,11 @@ static int nf_tables_commit(struct sk_buff *skb) NFT_MSG_DELSET, GFP_KERNEL); break; case NFT_MSG_NEWSETELEM: - nf_tables_setelem_notify(&trans->ctx, - nft_trans_elem_set(trans), - &nft_trans_elem(trans), + te = (struct nft_trans_elem *)trans->data; + + te->set->ops->activate(te->set, &te->elem); + nf_tables_setelem_notify(&trans->ctx, te->set, + &te->elem, NFT_MSG_NEWSETELEM, 0); nft_trans_destroy(trans); break; @@ -3704,7 +3709,6 @@ static int nf_tables_commit(struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_DELSETELEM, 0); - te->set->ops->get(te->set, &te->elem); te->set->ops->remove(te->set, &te->elem); break; } @@ -3812,11 +3816,14 @@ static int nf_tables_abort(struct sk_buff *skb) nft_trans_elem_set(trans)->nelems--; te = (struct nft_trans_elem *)trans->data; - te->set->ops->get(te->set, &te->elem); te->set->ops->remove(te->set, &te->elem); break; case NFT_MSG_DELSETELEM: + te = (struct nft_trans_elem *)trans->data; + nft_trans_elem_set(trans)->nelems++; + te->set->ops->activate(te->set, &te->elem); + nft_trans_destroy(trans); break; } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 5bee82195ef56..c7e1a9d7d46f5 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -35,6 +35,7 @@ struct nft_hash_elem { struct nft_hash_cmp_arg { const struct nft_set *set; const struct nft_data *key; + u8 genmask; }; static const struct rhashtable_params nft_hash_params; @@ -61,6 +62,8 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) return 1; + if (!nft_set_elem_active(&he->ext, x->genmask)) + return 1; return 0; } @@ -71,6 +74,7 @@ static bool nft_hash_lookup(const struct nft_set *set, struct nft_hash *priv = nft_set_priv(set); const struct nft_hash_elem *he; struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_cur(read_pnet(&set->pnet)), .set = set, .key = key, }; @@ -88,6 +92,7 @@ static int nft_hash_insert(const struct nft_set *set, struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he = elem->priv; struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_next(read_pnet(&set->pnet)), .set = set, .key = &elem->key, }; @@ -96,30 +101,39 @@ static int nft_hash_insert(const struct nft_set *set, nft_hash_params); } -static void nft_hash_remove(const struct nft_set *set, - const struct nft_set_elem *elem) +static void nft_hash_activate(const struct nft_set *set, + const struct nft_set_elem *elem) { - struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; - rhashtable_remove_fast(&priv->ht, elem->cookie, nft_hash_params); + nft_set_elem_change_active(set, &he->ext); } -static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) +static void *nft_hash_deactivate(const struct nft_set *set, + const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_next(read_pnet(&set->pnet)), .set = set, .key = &elem->key, }; he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); - if (!he) - return -ENOENT; + if (he != NULL) + nft_set_elem_change_active(set, &he->ext); - elem->priv = he; + return he; +} - return 0; +static void nft_hash_remove(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; + + rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); } static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, @@ -129,6 +143,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_hash_elem *he; struct rhashtable_iter hti; struct nft_set_elem elem; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int err; err = rhashtable_walk_init(&priv->ht, &hti); @@ -155,6 +170,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, if (iter->count < iter->skip) goto cont; + if (!nft_set_elem_active(&he->ext, genmask)) + goto cont; elem.priv = he; @@ -241,8 +258,9 @@ static struct nft_set_ops nft_hash_ops __read_mostly = { .estimate = nft_hash_estimate, .init = nft_hash_init, .destroy = nft_hash_destroy, - .get = nft_hash_get, .insert = nft_hash_insert, + .activate = nft_hash_activate, + .deactivate = nft_hash_deactivate, .remove = nft_hash_remove, .lookup = nft_hash_lookup, .walk = nft_hash_walk, diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index cbba755ebebc4..42d0ca45fb9e9 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -29,6 +29,7 @@ struct nft_rbtree_elem { struct nft_set_ext ext; }; + static bool nft_rbtree_lookup(const struct nft_set *set, const struct nft_data *key, const struct nft_set_ext **ext) @@ -36,6 +37,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; const struct rb_node *parent; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int d; spin_lock_bh(&nft_rbtree_lock); @@ -51,6 +53,10 @@ static bool nft_rbtree_lookup(const struct nft_set *set, parent = parent->rb_right; else { found: + if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; + } if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && *nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) @@ -77,6 +83,7 @@ static int __nft_rbtree_insert(const struct nft_set *set, struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; struct rb_node *parent, **p; + u8 genmask = nft_genmask_next(read_pnet(&set->pnet)); int d; parent = NULL; @@ -91,8 +98,11 @@ static int __nft_rbtree_insert(const struct nft_set *set, p = &parent->rb_left; else if (d > 0) p = &parent->rb_right; - else - return -EEXIST; + else { + if (nft_set_elem_active(&rbe->ext, genmask)) + return -EEXIST; + p = &parent->rb_left; + } } rb_link_node(&new->node, parent, p); rb_insert_color(&new->node, &priv->root); @@ -116,18 +126,28 @@ static void nft_rbtree_remove(const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree *priv = nft_set_priv(set); - struct nft_rbtree_elem *rbe = elem->cookie; + struct nft_rbtree_elem *rbe = elem->priv; spin_lock_bh(&nft_rbtree_lock); rb_erase(&rbe->node, &priv->root); spin_unlock_bh(&nft_rbtree_lock); } -static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) +static void nft_rbtree_activate(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_rbtree_elem *rbe = elem->priv; + + nft_set_elem_change_active(set, &rbe->ext); +} + +static void *nft_rbtree_deactivate(const struct nft_set *set, + const struct nft_set_elem *elem) { const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; struct nft_rbtree_elem *rbe; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int d; while (parent != NULL) { @@ -140,12 +160,15 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) else if (d > 0) parent = parent->rb_right; else { - elem->cookie = rbe; - elem->priv = rbe; - return 0; + if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; + } + nft_set_elem_change_active(set, &rbe->ext); + return rbe; } } - return -ENOENT; + return NULL; } static void nft_rbtree_walk(const struct nft_ctx *ctx, @@ -156,13 +179,17 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_rbtree_elem *rbe; struct nft_set_elem elem; struct rb_node *node; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); spin_lock_bh(&nft_rbtree_lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (iter->count < iter->skip) goto cont; + if (!nft_set_elem_active(&rbe->ext, genmask)) + goto cont; - rbe = rb_entry(node, struct nft_rbtree_elem, node); elem.priv = rbe; iter->err = iter->fn(ctx, set, iter, &elem); @@ -228,7 +255,8 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = { .destroy = nft_rbtree_destroy, .insert = nft_rbtree_insert, .remove = nft_rbtree_remove, - .get = nft_rbtree_get, + .deactivate = nft_rbtree_deactivate, + .activate = nft_rbtree_activate, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, .features = NFT_SET_INTERVAL | NFT_SET_MAP,