Skip to content

Commit

Permalink
Merge branch 'bpf: allow cgroup progs to export custom retval to user…
Browse files Browse the repository at this point in the history
…space'

YiFei Zhu says:

====================

Right now, most cgroup hooks are best used for permission checks. They
can only reject a syscall with -EPERM, so a cause of a rejection, if
the rejected by eBPF cgroup hooks, is ambiguous to userspace.
Additionally, if the syscalls are implemented in eBPF, all permission
checks and the implementation has to happen within the same filter,
as programs executed later in the series of progs are unaware of the
return values return by the previous progs.

This patch series adds two helpers, bpf_get_retval and bpf_set_retval,
that allows hooks to get/set the return value of syscall to userspace.
This also allows later progs to retrieve retval set by previous progs.

For legacy programs that rejects a syscall without setting the retval,
for backwards compatibility, if a prog rejects without itself or a
prior prog setting retval to an -err, the retval is set by the kernel
to -EPERM.

For getsockopt hooks that has ctx->retval, this variable mirrors that
that accessed by the helpers.

Additionally, the following user-visible behavior for getsockopt
hooks has changed:
  - If a prior filter rejected the syscall, it will be visible
    in ctx->retval.
  - Attempting to change the retval arbitrarily is now allowed and
    will not cause an -EFAULT.
  - If kernel rejects a getsockopt syscall before running the hooks,
    the error will be visible in ctx->retval. Returning 0 from the
    prog will not overwrite the error to -EPERM unless there is an
    explicit call of bpf_set_retval(-EPERM)

Tests have been added in this series to test the behavior of the helper
with cgroup setsockopt getsockopt hooks.

Patch 1 changes the API of macros to prepare for the next patch and
  should be a no-op.
Patch 2 moves ctx->retval to a struct pointed to by current
  task_struct.
Patch 3 implements the helpers.
Patch 4 tests the behaviors of the helpers.
Patch 5 updates a test after the test broke due to the visible changes.

v1 -> v2:
  - errno -> retval
  - split one helper to get & set helpers
  - allow retval to be set arbitrarily in the general case
  - made the helper retval and context retval mirror each other
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed Jan 19, 2022
2 parents d81283d + 1080ef5 commit 4e95074
Show file tree
Hide file tree
Showing 11 changed files with 751 additions and 89 deletions.
34 changes: 20 additions & 14 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1245,6 +1245,7 @@ struct bpf_run_ctx {};
struct bpf_cg_run_ctx {
struct bpf_run_ctx run_ctx;
const struct bpf_prog_array_item *prog_item;
int retval;
};

struct bpf_trace_run_ctx {
Expand Down Expand Up @@ -1277,19 +1278,19 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)

typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);

static __always_inline u32
static __always_inline int
BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
const void *ctx, bpf_prog_run_fn run_prog,
u32 *ret_flags)
int retval, u32 *ret_flags)
{
const struct bpf_prog_array_item *item;
const struct bpf_prog *prog;
const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx;
struct bpf_cg_run_ctx run_ctx;
u32 ret = 1;
u32 func_ret;

run_ctx.retval = retval;
migrate_disable();
rcu_read_lock();
array = rcu_dereference(array_rcu);
Expand All @@ -1298,41 +1299,44 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
while ((prog = READ_ONCE(item->prog))) {
run_ctx.prog_item = item;
func_ret = run_prog(prog, ctx);
ret &= (func_ret & 1);
if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
run_ctx.retval = -EPERM;
*(ret_flags) |= (func_ret >> 1);
item++;
}
bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock();
migrate_enable();
return ret;
return run_ctx.retval;
}

static __always_inline u32
static __always_inline int
BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
const void *ctx, bpf_prog_run_fn run_prog)
const void *ctx, bpf_prog_run_fn run_prog,
int retval)
{
const struct bpf_prog_array_item *item;
const struct bpf_prog *prog;
const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx;
struct bpf_cg_run_ctx run_ctx;
u32 ret = 1;

run_ctx.retval = retval;
migrate_disable();
rcu_read_lock();
array = rcu_dereference(array_rcu);
item = &array->items[0];
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
while ((prog = READ_ONCE(item->prog))) {
run_ctx.prog_item = item;
ret &= run_prog(prog, ctx);
if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
run_ctx.retval = -EPERM;
item++;
}
bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock();
migrate_enable();
return ret;
return run_ctx.retval;
}

static __always_inline u32
Expand Down Expand Up @@ -1385,19 +1389,21 @@ BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
* 0: NET_XMIT_SUCCESS skb should be transmitted
* 1: NET_XMIT_DROP skb should be dropped and cn
* 2: NET_XMIT_CN skb should be transmitted and cn
* 3: -EPERM skb should be dropped
* 3: -err skb should be dropped
*/
#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
({ \
u32 _flags = 0; \
bool _cn; \
u32 _ret; \
_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
_cn = _flags & BPF_RET_SET_CN; \
if (_ret) \
if (_ret && !IS_ERR_VALUE((long)_ret)) \
_ret = -EFAULT; \
if (!_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
else \
_ret = (_cn ? NET_XMIT_DROP : -EPERM); \
_ret = (_cn ? NET_XMIT_DROP : _ret); \
_ret; \
})

Expand Down
5 changes: 4 additions & 1 deletion include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern {
s32 level;
s32 optname;
s32 optlen;
s32 retval;
/* for retval in struct bpf_cg_run_ctx */
struct task_struct *current_task;
/* Temporary "register" for indirect stores to ppos. */
u64 tmp_reg;
};

int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
Expand Down
18 changes: 18 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -5033,6 +5033,22 @@ union bpf_attr {
*
* Return
* The number of arguments of the traced function.
*
* int bpf_get_retval(void)
* Description
* Get the syscall's return value that will be returned to userspace.
*
* This helper is currently supported by cgroup programs only.
* Return
* The syscall's return value.
*
* int bpf_set_retval(int retval)
* Description
* Set the syscall's return value that will be returned to userspace.
*
* This helper is currently supported by cgroup programs only.
* Return
* 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
Expand Down Expand Up @@ -5221,6 +5237,8 @@ union bpf_attr {
FN(get_func_arg), \
FN(get_func_ret), \
FN(get_func_arg_cnt), \
FN(get_retval), \
FN(set_retval), \
/* */

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
Expand Down
Loading

0 comments on commit 4e95074

Please sign in to comment.