Skip to content

Commit

Permalink
Merge branch 'Add bpf_copy_from_user_task helper and sleepable bpf it…
Browse files Browse the repository at this point in the history
…erator programs'

Kenny Yu says:

====================

This patch series makes the following changes:
* Adds a new bpf helper `bpf_copy_from_user_task` to read user space
  memory from a different task.
* Adds the ability to create sleepable bpf iterator programs.

As an example of how this will be used, at Meta we are using bpf task
iterator programs and this new bpf helper to read C++ async stack traces of
a running process for debugging C++ binaries in production.

Changes since v6:
* Split first patch into two patches: first patch to add support
  for bpf iterators to use sleepable helpers, and the second to add
  the new bpf helper.
* Simplify implementation of `bpf_copy_from_user_task` based on feedback.
* Add to docs that the destination buffer will be zero-ed on error.

Changes since v5:
* Rename `bpf_access_process_vm` to `bpf_copy_from_user_task`.
* Change return value to be all-or-nothing. If we get a partial read,
  memset all bytes to 0 and return -EFAULT.
* Add to docs that the helper can only be used by sleepable BPF programs.
* Fix nits in selftests.

Changes since v4:
* Make `flags` into u64.
* Use `user_ptr` arg name to be consistent with `bpf_copy_from_user`.
* Add an extra check in selftests to verify access_process_vm calls
  succeeded.

Changes since v3:
* Check if `flags` is 0 and return -EINVAL if not.
* Rebase on latest bpf-next branch and fix merge conflicts.

Changes since v2:
* Reorder arguments in `bpf_access_process_vm` to match existing related
  bpf helpers (e.g. `bpf_probe_read_kernel`, `bpf_probe_read_user`,
  `bpf_copy_from_user`).
* `flags` argument is provided for future extensibility and is not
  currently used, and we always invoke `access_process_vm` with no flags.
* Merge bpf helper patch and `bpf_iter_run_prog` patch together for better
  bisectability in case of failures.
* Clean up formatting and comments in selftests.

Changes since v1:
* Fixed "Invalid wait context" issue in `bpf_iter_run_prog` by using
  `rcu_read_lock_trace()` for sleepable bpf iterator programs.
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed Jan 25, 2022
2 parents caaba96 + 45105c2 commit c45c79e
Show file tree
Hide file tree
Showing 9 changed files with 149 additions and 5 deletions.
1 change: 1 addition & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -2243,6 +2243,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
extern const struct bpf_func_proto bpf_find_vma_proto;
extern const struct bpf_func_proto bpf_loop_proto;
extern const struct bpf_func_proto bpf_strncmp_proto;
extern const struct bpf_func_proto bpf_copy_from_user_task_proto;

const struct bpf_func_proto *tracing_prog_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
Expand Down
11 changes: 11 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -5076,6 +5076,16 @@ union bpf_attr {
* associated to *xdp_md*, at *offset*.
* Return
* 0 on success, or a negative error in case of failure.
*
* long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
* Description
* Read *size* bytes from user space address *user_ptr* in *tsk*'s
* address space, and stores the data in *dst*. *flags* is not
* used yet and is provided for future extensibility. This helper
* can only be used by sleepable programs.
* Return
* 0 on success, or a negative error in case of failure. On error
* *dst* buffer is zeroed out.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
Expand Down Expand Up @@ -5269,6 +5279,7 @@ union bpf_attr {
FN(xdp_get_buff_len), \
FN(xdp_load_bytes), \
FN(xdp_store_bytes), \
FN(copy_from_user_task), \
/* */

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
Expand Down
20 changes: 15 additions & 5 deletions kernel/bpf/bpf_iter.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <linux/anon_inodes.h>
#include <linux/filter.h>
#include <linux/bpf.h>
#include <linux/rcupdate_trace.h>

struct bpf_iter_target_info {
struct list_head list;
Expand Down Expand Up @@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
{
int ret;

rcu_read_lock();
migrate_disable();
ret = bpf_prog_run(prog, ctx);
migrate_enable();
rcu_read_unlock();
if (prog->aux->sleepable) {
rcu_read_lock_trace();
migrate_disable();
might_fault();
ret = bpf_prog_run(prog, ctx);
migrate_enable();
rcu_read_unlock_trace();
} else {
rcu_read_lock();
migrate_disable();
ret = bpf_prog_run(prog, ctx);
migrate_enable();
rcu_read_unlock();
}

/* bpf program can only return 0 or 1:
* 0 : okay
Expand Down
34 changes: 34 additions & 0 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <linux/pid_namespace.h>
#include <linux/proc_ns.h>
#include <linux/security.h>
#include <linux/btf_ids.h>

#include "../../lib/kstrtox.h"

Expand Down Expand Up @@ -671,6 +672,39 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
.arg3_type = ARG_ANYTHING,
};

BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
const void __user *, user_ptr, struct task_struct *, tsk, u64, flags)
{
int ret;

/* flags is not used yet */
if (unlikely(flags))
return -EINVAL;

if (unlikely(!size))
return 0;

ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
if (ret == size)
return 0;

memset(dst, 0, size);
/* Return -EFAULT for partial read */
return ret < 0 ? ret : -EFAULT;
}

const struct bpf_func_proto bpf_copy_from_user_task_proto = {
.func = bpf_copy_from_user_task,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_BTF_ID,
.arg4_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
.arg5_type = ARG_ANYTHING
};

BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
{
if (cpu >= nr_cpu_ids)
Expand Down
2 changes: 2 additions & 0 deletions kernel/trace/bpf_trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -1235,6 +1235,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_task_stack_proto;
case BPF_FUNC_copy_from_user:
return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
case BPF_FUNC_copy_from_user_task:
return prog->aux->sleepable ? &bpf_copy_from_user_task_proto : NULL;
case BPF_FUNC_snprintf_btf:
return &bpf_snprintf_btf_proto;
case BPF_FUNC_per_cpu_ptr:
Expand Down
11 changes: 11 additions & 0 deletions tools/include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -5076,6 +5076,16 @@ union bpf_attr {
* associated to *xdp_md*, at *offset*.
* Return
* 0 on success, or a negative error in case of failure.
*
* long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
* Description
* Read *size* bytes from user space address *user_ptr* in *tsk*'s
* address space, and stores the data in *dst*. *flags* is not
* used yet and is provided for future extensibility. This helper
* can only be used by sleepable programs.
* Return
* 0 on success, or a negative error in case of failure. On error
* *dst* buffer is zeroed out.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
Expand Down Expand Up @@ -5269,6 +5279,7 @@ union bpf_attr {
FN(xdp_get_buff_len), \
FN(xdp_load_bytes), \
FN(xdp_store_bytes), \
FN(copy_from_user_task), \
/* */

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
Expand Down
1 change: 1 addition & 0 deletions tools/lib/bpf/libbpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -8612,6 +8612,7 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("lsm/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
SEC_DEF("iter.s/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE),
SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
Expand Down
20 changes: 20 additions & 0 deletions tools/testing/selftests/bpf/prog_tests/bpf_iter.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,24 @@ static void test_task(void)
bpf_iter_task__destroy(skel);
}

static void test_task_sleepable(void)
{
struct bpf_iter_task *skel;

skel = bpf_iter_task__open_and_load();
if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
return;

do_dummy_read(skel->progs.dump_task_sleepable);

ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
"num_expected_failure_copy_from_user_task");
ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
"num_success_copy_from_user_task");

bpf_iter_task__destroy(skel);
}

static void test_task_stack(void)
{
struct bpf_iter_task_stack *skel;
Expand Down Expand Up @@ -1252,6 +1270,8 @@ void test_bpf_iter(void)
test_bpf_map();
if (test__start_subtest("task"))
test_task();
if (test__start_subtest("task_sleepable"))
test_task_sleepable();
if (test__start_subtest("task_stack"))
test_task_stack();
if (test__start_subtest("task_file"))
Expand Down
54 changes: 54 additions & 0 deletions tools/testing/selftests/bpf/progs/bpf_iter_task.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

char _license[] SEC("license") = "GPL";

Expand All @@ -23,3 +24,56 @@ int dump_task(struct bpf_iter__task *ctx)
BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
return 0;
}

int num_expected_failure_copy_from_user_task = 0;
int num_success_copy_from_user_task = 0;

SEC("iter.s/task")
int dump_task_sleepable(struct bpf_iter__task *ctx)
{
struct seq_file *seq = ctx->meta->seq;
struct task_struct *task = ctx->task;
static const char info[] = " === END ===";
struct pt_regs *regs;
void *ptr;
uint32_t user_data = 0;
int ret;

if (task == (void *)0) {
BPF_SEQ_PRINTF(seq, "%s\n", info);
return 0;
}

/* Read an invalid pointer and ensure we get an error */
ptr = NULL;
ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
if (ret) {
++num_expected_failure_copy_from_user_task;
} else {
BPF_SEQ_PRINTF(seq, "%s\n", info);
return 0;
}

/* Try to read the contents of the task's instruction pointer from the
* remote task's address space.
*/
regs = (struct pt_regs *)bpf_task_pt_regs(task);
if (regs == (void *)0) {
BPF_SEQ_PRINTF(seq, "%s\n", info);
return 0;
}
ptr = (void *)PT_REGS_IP(regs);

ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
if (ret) {
BPF_SEQ_PRINTF(seq, "%s\n", info);
return 0;
}
++num_success_copy_from_user_task;

if (ctx->meta->seq_num == 0)
BPF_SEQ_PRINTF(seq, " tgid gid data\n");

BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data);
return 0;
}

0 comments on commit c45c79e

Please sign in to comment.