From eed807f626101f6a4227bd53942892c5983b95a7 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 21 Sep 2022 18:48:25 +0200 Subject: [PATCH 1/3] bpf: Tweak definition of KF_TRUSTED_ARGS Instead of forcing all arguments to be referenced pointers with non-zero reg->ref_obj_id, tweak the definition of KF_TRUSTED_ARGS to mean that only PTR_TO_BTF_ID (and socket types translated to PTR_TO_BTF_ID) have that constraint, and require their offset to be set to 0. The rest of pointer types are also accomodated in this definition of trusted pointers, but with more relaxed rules regarding offsets. The inherent meaning of setting this flag is that all kfunc pointer arguments have a guranteed lifetime, and kernel object pointers (PTR_TO_BTF_ID, PTR_TO_CTX) are passed in their unmodified form (with offset 0). In general, this is not true for PTR_TO_BTF_ID as it can be obtained using pointer walks. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/cdede0043c47ed7a357f0a915d16f9ce06a1d589.1663778601.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- Documentation/bpf/kfuncs.rst | 24 ++++++++++++++++-------- kernel/bpf/btf.c | 18 +++++++++++++----- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 781731749e558..0f858156371de 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -137,14 +137,22 @@ KF_ACQUIRE and KF_RET_NULL flags. -------------------------- The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It -indicates that the all pointer arguments will always be refcounted, and have -their offset set to 0. It can be used to enforce that a pointer to a refcounted -object acquired from a kfunc or BPF helper is passed as an argument to this -kfunc without any modifications (e.g. pointer arithmetic) such that it is -trusted and points to the original object. This flag is often used for kfuncs -that operate (change some property, perform some operation) on an object that -was obtained using an acquire kfunc. Such kfuncs need an unchanged pointer to -ensure the integrity of the operation being performed on the expected object. +indicates that the all pointer arguments will always have a guaranteed lifetime, +and pointers to kernel objects are always passed to helpers in their unmodified +form (as obtained from acquire kfuncs). + +It can be used to enforce that a pointer to a refcounted object acquired from a +kfunc or BPF helper is passed as an argument to this kfunc without any +modifications (e.g. pointer arithmetic) such that it is trusted and points to +the original object. + +Meanwhile, it is also allowed pass pointers to normal memory to such kfuncs, +but those can have a non-zero offset. + +This flag is often used for kfuncs that operate (change some property, perform +some operation) on an object that was obtained using an acquire kfunc. Such +kfuncs need an unchanged pointer to ensure the integrity of the operation being +performed on the expected object. 2.4.6 KF_SLEEPABLE flag ----------------------- diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 13faede0f2b48..a44ad4b347ff2 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6227,7 +6227,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, bool processing_call) { enum bpf_prog_type prog_type = resolve_prog_type(env->prog); - bool rel = false, kptr_get = false, trusted_arg = false; + bool rel = false, kptr_get = false, trusted_args = false; bool sleepable = false; struct bpf_verifier_log *log = &env->log; u32 i, nargs, ref_id, ref_obj_id = 0; @@ -6265,7 +6265,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, /* Only kfunc can be release func */ rel = kfunc_meta->flags & KF_RELEASE; kptr_get = kfunc_meta->flags & KF_KPTR_GET; - trusted_arg = kfunc_meta->flags & KF_TRUSTED_ARGS; + trusted_args = kfunc_meta->flags & KF_TRUSTED_ARGS; sleepable = kfunc_meta->flags & KF_SLEEPABLE; } @@ -6276,6 +6276,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, enum bpf_arg_type arg_type = ARG_DONTCARE; u32 regno = i + 1; struct bpf_reg_state *reg = ®s[regno]; + bool obj_ptr = false; t = btf_type_skip_modifiers(btf, args[i].type, NULL); if (btf_type_is_scalar(t)) { @@ -6323,10 +6324,17 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, return -EINVAL; } + /* These register types have special constraints wrt ref_obj_id + * and offset checks. The rest of trusted args don't. + */ + obj_ptr = reg->type == PTR_TO_CTX || reg->type == PTR_TO_BTF_ID || + reg2btf_ids[base_type(reg->type)]; + /* Check if argument must be a referenced pointer, args + i has * been verified to be a pointer (after skipping modifiers). + * PTR_TO_CTX is ok without having non-zero ref_obj_id. */ - if (is_kfunc && trusted_arg && !reg->ref_obj_id) { + if (is_kfunc && trusted_args && (obj_ptr && reg->type != PTR_TO_CTX) && !reg->ref_obj_id) { bpf_log(log, "R%d must be referenced\n", regno); return -EINVAL; } @@ -6335,7 +6343,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, ref_tname = btf_name_by_offset(btf, ref_t->name_off); /* Trusted args have the same offset checks as release arguments */ - if (trusted_arg || (rel && reg->ref_obj_id)) + if ((trusted_args && obj_ptr) || (rel && reg->ref_obj_id)) arg_type |= OBJ_RELEASE; ret = check_func_arg_reg_off(env, reg, regno, arg_type); if (ret < 0) @@ -6435,7 +6443,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, reg_ref_t->name_off); if (!btf_struct_ids_match(log, reg_btf, reg_ref_id, reg->off, btf, ref_id, - trusted_arg || (rel && reg->ref_obj_id))) { + trusted_args || (rel && reg->ref_obj_id))) { bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", func_name, i, btf_type_str(ref_t), ref_tname, From 0fabd2aa199faeb8754aee94658f2c48ccb2c8c3 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 21 Sep 2022 18:48:26 +0200 Subject: [PATCH 2/3] net: netfilter: add bpf_ct_set_nat_info kfunc helper Introduce bpf_ct_set_nat_info kfunc helper in order to set source and destination nat addresses/ports in a new allocated ct entry not inserted in the connection tracking table yet. Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/9567db2fdfa5bebe7b7cc5870f7a34549418b4fc.1663778601.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- net/netfilter/nf_conntrack_bpf.c | 47 +++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 67df64283aef9..756ea818574e4 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -17,6 +17,7 @@ #include #include #include +#include /* bpf_ct_opts - Options for CT lookup helpers * @@ -137,7 +138,6 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple, memset(&ct->proto, 0, sizeof(ct->proto)); __nf_ct_set_timeout(ct, timeout * HZ); - ct->status |= IPS_CONFIRMED; out: if (opts->netns_id >= 0) @@ -390,6 +390,7 @@ struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) struct nf_conn *nfct = (struct nf_conn *)nfct_i; int err; + nfct->status |= IPS_CONFIRMED; err = nf_conntrack_hash_check_insert(nfct); if (err < 0) { nf_conntrack_free(nfct); @@ -475,6 +476,49 @@ int bpf_ct_change_status(struct nf_conn *nfct, u32 status) return nf_ct_change_status_common(nfct, status); } +/* bpf_ct_set_nat_info - Set source or destination nat address + * + * Set source or destination nat address of the newly allocated + * nf_conn before insertion. This must be invoked for referenced + * PTR_TO_BTF_ID to nf_conn___init. + * + * Parameters: + * @nfct - Pointer to referenced nf_conn object, obtained using + * bpf_xdp_ct_alloc or bpf_skb_ct_alloc. + * @addr - Nat source/destination address + * @port - Nat source/destination port. Non-positive values are + * interpreted as select a random port. + * @manip - NF_NAT_MANIP_SRC or NF_NAT_MANIP_DST + */ +int bpf_ct_set_nat_info(struct nf_conn___init *nfct, + union nf_inet_addr *addr, int port, + enum nf_nat_manip_type manip) +{ +#if ((IS_MODULE(CONFIG_NF_NAT) && IS_MODULE(CONFIG_NF_CONNTRACK)) || \ + IS_BUILTIN(CONFIG_NF_NAT)) + struct nf_conn *ct = (struct nf_conn *)nfct; + u16 proto = nf_ct_l3num(ct); + struct nf_nat_range2 range; + + if (proto != NFPROTO_IPV4 && proto != NFPROTO_IPV6) + return -EINVAL; + + memset(&range, 0, sizeof(struct nf_nat_range2)); + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = *addr; + range.max_addr = range.min_addr; + if (port > 0) { + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + range.min_proto.all = cpu_to_be16(port); + range.max_proto.all = range.min_proto.all; + } + + return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0; +#else + return -EOPNOTSUPP; +#endif +} + __diag_pop() BTF_SET8_START(nf_ct_kfunc_set) @@ -488,6 +532,7 @@ BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS) BTF_SET8_END(nf_ct_kfunc_set) static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = { From b06b45e82b59b69f5ac6b3916ac5dbd0294efc95 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 21 Sep 2022 18:48:27 +0200 Subject: [PATCH 3/3] selftests/bpf: add tests for bpf_ct_set_nat_info kfunc Introduce self-tests for bpf_ct_set_nat_info kfunc used to set the source or destination nat addresses/ports. Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/803e33294e247744d466943105879414344d3235.1663778601.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/config | 1 + .../testing/selftests/bpf/prog_tests/bpf_nf.c | 10 ++++--- .../testing/selftests/bpf/progs/test_bpf_nf.c | 27 +++++++++++++++++++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 905a9be8d0a2a..9213565c03117 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -63,6 +63,7 @@ CONFIG_NF_CONNTRACK=y CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV6=y +CONFIG_NF_NAT=y CONFIG_RC_CORE=y CONFIG_SECURITY=y CONFIG_SECURITYFS=y diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index 0677a51694c90..8a838ea8bdf3b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -26,7 +26,10 @@ enum { TEST_TC_BPF, }; -#define TIMEOUT_MS 3000 +#define TIMEOUT_MS 3000 +#define IPS_STATUS_MASK (IPS_CONFIRMED | IPS_SEEN_REPLY | \ + IPS_SRC_NAT_DONE | IPS_DST_NAT_DONE | \ + IPS_SRC_NAT | IPS_DST_NAT) static int connect_to_server(int srv_fd) { @@ -114,10 +117,11 @@ static void test_bpf_nf_ct(int mode) ASSERT_GT(skel->bss->test_delta_timeout, 8, "Test for min ct timeout update"); ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update"); ASSERT_EQ(skel->bss->test_insert_lookup_mark, 77, "Test for insert and lookup mark value"); - ASSERT_EQ(skel->bss->test_status, IPS_CONFIRMED | IPS_SEEN_REPLY, - "Test for ct status update "); + ASSERT_EQ(skel->bss->test_status, IPS_STATUS_MASK, "Test for ct status update "); ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup"); ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark"); + ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting"); + ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting"); end: if (srv_client_fd != -1) close(srv_client_fd); diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index 88842da86ddc8..227e85e85ddaf 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #define EAFNOSUPPORT 97 #define EPROTO 71 @@ -24,6 +25,8 @@ int test_succ_lookup = -ENOENT; u32 test_delta_timeout = 0; u32 test_status = 0; u32 test_insert_lookup_mark = 0; +int test_snat_addr = -EINVAL; +int test_dnat_addr = -EINVAL; __be32 saddr = 0; __be16 sport = 0; __be32 daddr = 0; @@ -54,6 +57,8 @@ void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym; int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym; int bpf_ct_set_status(struct nf_conn *, u32) __ksym; int bpf_ct_change_status(struct nf_conn *, u32) __ksym; +int bpf_ct_set_nat_info(struct nf_conn *, union nf_inet_addr *, + int port, enum nf_nat_manip_type) __ksym; static __always_inline void nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, @@ -141,11 +146,22 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); if (ct) { + __u16 sport = bpf_get_prandom_u32(); + __u16 dport = bpf_get_prandom_u32(); + union nf_inet_addr saddr = {}; + union nf_inet_addr daddr = {}; struct nf_conn *ct_ins; bpf_ct_set_timeout(ct, 10000); ct->mark = 77; + /* snat */ + saddr.ip = bpf_get_prandom_u32(); + bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC); + /* dnat */ + daddr.ip = bpf_get_prandom_u32(); + bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST); + ct_ins = bpf_ct_insert_entry(ct); if (ct_ins) { struct nf_conn *ct_lk; @@ -153,6 +169,17 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); if (ct_lk) { + struct nf_conntrack_tuple *tuple; + + /* check snat and dnat addresses */ + tuple = &ct_lk->tuplehash[IP_CT_DIR_REPLY].tuple; + if (tuple->dst.u3.ip == saddr.ip && + tuple->dst.u.all == bpf_htons(sport)) + test_snat_addr = 0; + if (tuple->src.u3.ip == daddr.ip && + tuple->src.u.all == bpf_htons(dport)) + test_dnat_addr = 0; + /* update ct entry timeout */ bpf_ct_change_timeout(ct_lk, 10000); test_delta_timeout = ct_lk->timeout - bpf_jiffies64();