Skip to content

Commit

Permalink
bpf: Add skb dynptrs
Browse files Browse the repository at this point in the history
Add skb dynptrs, which are dynptrs whose underlying pointer points
to a skb. The dynptr acts on skb data. skb dynptrs have two main
benefits. One is that they allow operations on sizes that are not
statically known at compile-time (eg variable-sized accesses).
Another is that parsing the packet data through dynptrs (instead of
through direct access of skb->data and skb->data_end) can be more
ergonomic and less brittle (eg does not need manual if checking for
being within bounds of data_end).

For bpf prog types that don't support writes on skb data, the dynptr is
read-only (bpf_dynptr_write() will return an error)

For reads and writes through the bpf_dynptr_read() and bpf_dynptr_write()
interfaces, reading and writing from/to data in the head as well as from/to
non-linear paged buffers is supported. Data slices through the
bpf_dynptr_data API are not supported; instead bpf_dynptr_slice() and
bpf_dynptr_slice_rdwr() (added in subsequent commit) should be used.

For examples of how skb dynptrs can be used, please see the attached
selftests.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Link: https://lore.kernel.org/r/20230301154953.641654-8-joannelkoong@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Joanne Koong authored and Alexei Starovoitov committed Mar 1, 2023
1 parent d96d937 commit b5964b9
Show file tree
Hide file tree
Showing 8 changed files with 261 additions and 19 deletions.
14 changes: 13 additions & 1 deletion include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -607,11 +607,14 @@ enum bpf_type_flag {
*/
NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS),

/* DYNPTR points to sk_buff */
DYNPTR_TYPE_SKB = BIT(15 + BPF_BASE_TYPE_BITS),

__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};

#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF)
#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB)

/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
Expand Down Expand Up @@ -1146,6 +1149,8 @@ enum bpf_dynptr_type {
BPF_DYNPTR_TYPE_LOCAL,
/* Underlying data is a ringbuf record */
BPF_DYNPTR_TYPE_RINGBUF,
/* Underlying data is a sk_buff */
BPF_DYNPTR_TYPE_SKB,
};

int bpf_dynptr_check_size(u32 size);
Expand Down Expand Up @@ -2846,6 +2851,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size);
int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
struct bpf_dynptr_kern *ptr);
#else
static inline bool bpf_sock_common_is_valid_access(int off, int size,
enum bpf_access_type type,
Expand All @@ -2867,6 +2874,11 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
{
return 0;
}
static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
struct bpf_dynptr_kern *ptr)
{
return -EOPNOTSUPP;
}
#endif

#ifdef CONFIG_INET
Expand Down
18 changes: 18 additions & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -1542,4 +1542,22 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index
return XDP_REDIRECT;
}

#ifdef CONFIG_NET
int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len);
int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
u32 len, u64 flags);
#else /* CONFIG_NET */
static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
void *to, u32 len)
{
return -EOPNOTSUPP;
}

static inline int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset,
const void *from, u32 len, u64 flags)
{
return -EOPNOTSUPP;
}
#endif /* CONFIG_NET */

#endif /* __LINUX_FILTER_H__ */
13 changes: 11 additions & 2 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -5325,18 +5325,27 @@ union bpf_attr {
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
* *flags* is currently unused.
*
* *flags* must be 0 except for skb-type dynptrs.
*
* For skb-type dynptrs:
* * For *flags*, please see the flags accepted by
* **bpf_skb_store_bytes**\ ().
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
* is a read-only dynptr or if *flags* is not 0.
* is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
* other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
* Get a pointer to the underlying dynptr data.
*
* *len* must be a statically known value. The returned data slice
* is invalidated whenever the dynptr is invalidated.
*
* skb type dynptrs may not use bpf_dynptr_data. They should
* instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr.
* Return
* Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length
Expand Down
18 changes: 18 additions & 0 deletions kernel/bpf/btf.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,11 @@ enum btf_kfunc_hook {
BTF_KFUNC_HOOK_TRACING,
BTF_KFUNC_HOOK_SYSCALL,
BTF_KFUNC_HOOK_FMODRET,
BTF_KFUNC_HOOK_CGROUP_SKB,
BTF_KFUNC_HOOK_SCHED_ACT,
BTF_KFUNC_HOOK_SK_SKB,
BTF_KFUNC_HOOK_SOCKET_FILTER,
BTF_KFUNC_HOOK_LWT,
BTF_KFUNC_HOOK_MAX,
};

Expand Down Expand Up @@ -7708,6 +7713,19 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
return BTF_KFUNC_HOOK_TRACING;
case BPF_PROG_TYPE_SYSCALL:
return BTF_KFUNC_HOOK_SYSCALL;
case BPF_PROG_TYPE_CGROUP_SKB:
return BTF_KFUNC_HOOK_CGROUP_SKB;
case BPF_PROG_TYPE_SCHED_ACT:
return BTF_KFUNC_HOOK_SCHED_ACT;
case BPF_PROG_TYPE_SK_SKB:
return BTF_KFUNC_HOOK_SK_SKB;
case BPF_PROG_TYPE_SOCKET_FILTER:
return BTF_KFUNC_HOOK_SOCKET_FILTER;
case BPF_PROG_TYPE_LWT_OUT:
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_XMIT:
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
return BTF_KFUNC_HOOK_LWT;
default:
return BTF_KFUNC_HOOK_MAX;
}
Expand Down
76 changes: 62 additions & 14 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -1420,11 +1420,21 @@ static bool bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
return ptr->size & DYNPTR_RDONLY_BIT;
}

void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
{
ptr->size |= DYNPTR_RDONLY_BIT;
}

static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type)
{
ptr->size |= type << DYNPTR_TYPE_SHIFT;
}

static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr)
{
return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
}

u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
{
return ptr->size & DYNPTR_SIZE_MASK;
Expand Down Expand Up @@ -1497,6 +1507,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
u32, offset, u64, flags)
{
enum bpf_dynptr_type type;
int err;

if (!src->data || flags)
Expand All @@ -1506,13 +1517,23 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern
if (err)
return err;

/* Source and destination may possibly overlap, hence use memmove to
* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
* pointing to overlapping PTR_TO_MAP_VALUE regions.
*/
memmove(dst, src->data + src->offset + offset, len);
type = bpf_dynptr_get_type(src);

return 0;
switch (type) {
case BPF_DYNPTR_TYPE_LOCAL:
case BPF_DYNPTR_TYPE_RINGBUF:
/* Source and destination may possibly overlap, hence use memmove to
* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
* pointing to overlapping PTR_TO_MAP_VALUE regions.
*/
memmove(dst, src->data + src->offset + offset, len);
return 0;
case BPF_DYNPTR_TYPE_SKB:
return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
default:
WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
return -EFAULT;
}
}

static const struct bpf_func_proto bpf_dynptr_read_proto = {
Expand All @@ -1529,22 +1550,36 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
u32, len, u64, flags)
{
enum bpf_dynptr_type type;
int err;

if (!dst->data || flags || bpf_dynptr_is_rdonly(dst))
if (!dst->data || bpf_dynptr_is_rdonly(dst))
return -EINVAL;

err = bpf_dynptr_check_off_len(dst, offset, len);
if (err)
return err;

/* Source and destination may possibly overlap, hence use memmove to
* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
* pointing to overlapping PTR_TO_MAP_VALUE regions.
*/
memmove(dst->data + dst->offset + offset, src, len);
type = bpf_dynptr_get_type(dst);

return 0;
switch (type) {
case BPF_DYNPTR_TYPE_LOCAL:
case BPF_DYNPTR_TYPE_RINGBUF:
if (flags)
return -EINVAL;
/* Source and destination may possibly overlap, hence use memmove to
* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
* pointing to overlapping PTR_TO_MAP_VALUE regions.
*/
memmove(dst->data + dst->offset + offset, src, len);
return 0;
case BPF_DYNPTR_TYPE_SKB:
return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len,
flags);
default:
WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
return -EFAULT;
}
}

static const struct bpf_func_proto bpf_dynptr_write_proto = {
Expand All @@ -1560,6 +1595,7 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {

BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
{
enum bpf_dynptr_type type;
int err;

if (!ptr->data)
Expand All @@ -1572,7 +1608,19 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3
if (bpf_dynptr_is_rdonly(ptr))
return 0;

return (unsigned long)(ptr->data + ptr->offset + offset);
type = bpf_dynptr_get_type(ptr);

switch (type) {
case BPF_DYNPTR_TYPE_LOCAL:
case BPF_DYNPTR_TYPE_RINGBUF:
return (unsigned long)(ptr->data + ptr->offset + offset);
case BPF_DYNPTR_TYPE_SKB:
/* skb dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */
return 0;
default:
WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
return 0;
}
}

static const struct bpf_func_proto bpf_dynptr_data_proto = {
Expand Down
61 changes: 61 additions & 0 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,8 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
return BPF_DYNPTR_TYPE_LOCAL;
case DYNPTR_TYPE_RINGBUF:
return BPF_DYNPTR_TYPE_RINGBUF;
case DYNPTR_TYPE_SKB:
return BPF_DYNPTR_TYPE_SKB;
default:
return BPF_DYNPTR_TYPE_INVALID;
}
Expand Down Expand Up @@ -6295,6 +6297,9 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn
case DYNPTR_TYPE_RINGBUF:
err_extra = "ringbuf";
break;
case DYNPTR_TYPE_SKB:
err_extra = "skb ";
break;
default:
err_extra = "<unknown>";
break;
Expand Down Expand Up @@ -6737,6 +6742,24 @@ static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state
return state->stack[spi].spilled_ptr.ref_obj_id;
}

static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
int spi;

if (reg->type == CONST_PTR_TO_DYNPTR)
return reg->dynptr.type;

spi = __get_spi(reg->off);
if (spi < 0) {
verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
return BPF_DYNPTR_TYPE_INVALID;
}

return state->stack[spi].spilled_ptr.dynptr.type;
}

static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
const struct bpf_func_proto *fn,
Expand Down Expand Up @@ -8383,6 +8406,27 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn

break;
}
case BPF_FUNC_dynptr_write:
{
enum bpf_dynptr_type dynptr_type;
struct bpf_reg_state *reg;

reg = get_dynptr_arg_reg(env, fn, regs);
if (!reg)
return -EFAULT;

dynptr_type = dynptr_get_type(env, reg);
if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
return -EFAULT;

if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
/* this will trigger clear_all_pkt_pointers(), which will
* invalidate all dynptr slices associated with the skb
*/
changes_data = true;

break;
}
case BPF_FUNC_user_ringbuf_drain:
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_user_ringbuf_callback_state);
Expand Down Expand Up @@ -8898,6 +8942,7 @@ enum special_kfunc_type {
KF_bpf_rbtree_remove,
KF_bpf_rbtree_add,
KF_bpf_rbtree_first,
KF_bpf_dynptr_from_skb,
};

BTF_SET_START(special_kfunc_set)
Expand All @@ -8912,6 +8957,7 @@ BTF_ID(func, bpf_rdonly_cast)
BTF_ID(func, bpf_rbtree_remove)
BTF_ID(func, bpf_rbtree_add)
BTF_ID(func, bpf_rbtree_first)
BTF_ID(func, bpf_dynptr_from_skb)
BTF_SET_END(special_kfunc_set)

BTF_ID_LIST(special_kfunc_list)
Expand All @@ -8928,6 +8974,7 @@ BTF_ID(func, bpf_rcu_read_unlock)
BTF_ID(func, bpf_rbtree_remove)
BTF_ID(func, bpf_rbtree_add)
BTF_ID(func, bpf_rbtree_first)
BTF_ID(func, bpf_dynptr_from_skb)

static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
{
Expand Down Expand Up @@ -9682,6 +9729,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (is_kfunc_arg_uninit(btf, &args[i]))
dynptr_arg_type |= MEM_UNINIT;

if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb])
dynptr_arg_type |= DYNPTR_TYPE_SKB;

ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type);
if (ret < 0)
return ret;
Expand Down Expand Up @@ -16356,6 +16406,17 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
*cnt = 1;
} else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
bool seen_direct_write = env->seen_direct_write;
bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);

if (is_rdonly)
insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly);

/* restore env->seen_direct_write to its original value, since
* may_access_direct_pkt_data mutates it
*/
env->seen_direct_write = seen_direct_write;
}
return 0;
}
Expand Down
Loading

0 comments on commit b5964b9

Please sign in to comment.