Skip to content

Commit

Permalink
Merge branch 'writeable-bpf-tracepoints'
Browse files Browse the repository at this point in the history
Matt Mullins says:

====================
This adds an opt-in interface for tracepoints to expose a writable context to
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE programs that are attached, while
supporting read-only access from existing BPF_PROG_TYPE_RAW_TRACEPOINT
programs, as well as from non-BPF-based tracepoints.

The initial motivation is to support tracing that can be observed from the
remote end of an NBD socket, e.g. by adding flags to the struct nbd_request
header.  Earlier attempts included adding an NBD-specific tracepoint fd, but in
code review, I was recommended to implement it more generically -- as a result,
this patchset is far simpler than my initial try.

v4->v5:
  * rebased onto bpf-next/master and fixed merge conflicts
  * "tools: sync bpf.h" also syncs comments that have previously changed
    in bpf-next

v3->v4:
  * fixed a silly copy/paste typo in include/trace/events/bpf_test_run.h
    (_TRACE_NBD_H -> _TRACE_BPF_TEST_RUN_H)
  * fixed incorrect/misleading wording in patch 1's commit message,
    since the pointer cannot be directly dereferenced in a
    BPF_PROG_TYPE_RAW_TRACEPOINT
  * cleaned up the error message wording if the prog_tests fail
  * Addressed feedback from Yonghong
    * reject non-pointer-sized accesses to the buffer pointer
    * use sizeof(struct nbd_request) as one-byte-past-the-end in
      raw_tp_writable_reject_nbd_invalid.c
    * use BPF_MOV64_IMM instead of BPF_LD_IMM64

v2->v3:
  * Andrew addressed Josef's comments:
    * C-style commenting in nbd.c
    * Collapsed identical events into a single DECLARE_EVENT_CLASS.
      This saves about 2kB of kernel text

v1->v2:
  * add selftests
    * sync tools/include/uapi/linux/bpf.h
  * reject variable offset into the buffer
  * add string representation of PTR_TO_TP_BUFFER to reg_type_str
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed Apr 27, 2019
2 parents 34b8ab0 + e950e84 commit 3745dc2
Show file tree
Hide file tree
Showing 19 changed files with 433 additions and 5 deletions.
1 change: 1 addition & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -10741,6 +10741,7 @@ L: linux-block@vger.kernel.org
L: nbd@other.debian.org
F: Documentation/blockdev/nbd.txt
F: drivers/block/nbd.c
F: include/trace/events/nbd.h
F: include/uapi/linux/nbd.h

NETWORK DROP MONITOR
Expand Down
13 changes: 13 additions & 0 deletions drivers/block/nbd.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@
#include <linux/nbd-netlink.h>
#include <net/genetlink.h>

#define CREATE_TRACE_POINTS
#include <trace/events/nbd.h>

static DEFINE_IDR(nbd_index_idr);
static DEFINE_MUTEX(nbd_index_mutex);
static int nbd_total_devices = 0;
Expand Down Expand Up @@ -510,6 +513,10 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
if (sent) {
if (sent >= sizeof(request)) {
skip = sent - sizeof(request);

/* initialize handle for tracing purposes */
handle = nbd_cmd_handle(cmd);

goto send_pages;
}
iov_iter_advance(&from, sent);
Expand All @@ -526,11 +533,14 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
handle = nbd_cmd_handle(cmd);
memcpy(request.handle, &handle, sizeof(handle));

trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd));

dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
req, nbdcmd_to_ascii(type),
(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
result = sock_xmit(nbd, index, 1, &from,
(type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
trace_nbd_header_sent(req, handle);
if (result <= 0) {
if (was_interrupted(result)) {
/* If we havne't sent anything we can just return BUSY,
Expand Down Expand Up @@ -603,6 +613,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
bio = next;
}
out:
trace_nbd_payload_sent(req, handle);
nsock->pending = NULL;
nsock->sent = 0;
return 0;
Expand Down Expand Up @@ -650,6 +661,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
tag, req);
return ERR_PTR(-ENOENT);
}
trace_nbd_header_received(req, handle);
cmd = blk_mq_rq_to_pdu(req);

mutex_lock(&cmd->lock);
Expand Down Expand Up @@ -703,6 +715,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
}
}
out:
trace_nbd_payload_received(req, handle);
mutex_unlock(&cmd->lock);
return ret ? ERR_PTR(ret) : cmd;
}
Expand Down
2 changes: 2 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ enum bpf_reg_type {
PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
};

/* The information passed from prog-specific *_is_valid_access
Expand Down Expand Up @@ -361,6 +362,7 @@ struct bpf_prog_aux {
u32 used_map_cnt;
u32 max_ctx_offset;
u32 max_pkt_offset;
u32 max_tp_access;
u32 stack_depth;
u32 id;
u32 func_cnt; /* used by non-func prog as the number of func progs */
Expand Down
1 change: 1 addition & 0 deletions include/linux/bpf_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
Expand Down
1 change: 1 addition & 0 deletions include/linux/tracepoint-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ struct bpf_raw_event_map {
struct tracepoint *tp;
void *bpf_func;
u32 num_args;
u32 writable_size;
} __aligned(32);

#endif
27 changes: 25 additions & 2 deletions include/trace/bpf_probe.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto) \
* to make sure that if the tracepoint handling changes, the
* bpf probe will fail to compile unless it too is updated.
*/
#undef DEFINE_EVENT
#define DEFINE_EVENT(template, call, proto, args) \
#define __DEFINE_EVENT(template, call, proto, args, size) \
static inline void bpf_test_probe_##call(void) \
{ \
check_trace_callback_type_##call(__bpf_trace_##template); \
Expand All @@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = { \
.tp = &__tracepoint_##call, \
.bpf_func = (void *)__bpf_trace_##template, \
.num_args = COUNT_ARGS(args), \
.writable_size = size, \
};

#define FIRST(x, ...) x

#undef DEFINE_EVENT_WRITABLE
#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \
static inline void bpf_test_buffer_##call(void) \
{ \
/* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \
* BUILD_BUG_ON_ZERO() uses a different mechanism that is not \
* dead-code-eliminated. \
*/ \
FIRST(proto); \
(void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \
} \
__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size)

#undef DEFINE_EVENT
#define DEFINE_EVENT(template, call, proto, args) \
__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0)

#undef DEFINE_EVENT_PRINT
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))

#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)

#undef DEFINE_EVENT_WRITABLE
#undef __DEFINE_EVENT
#undef FIRST

#endif /* CONFIG_BPF_EVENTS */
50 changes: 50 additions & 0 deletions include/trace/events/bpf_test_run.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM bpf_test_run

#if !defined(_TRACE_BPF_TEST_RUN_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_BPF_TEST_RUN_H

#include <linux/tracepoint.h>

DECLARE_EVENT_CLASS(bpf_test_finish,

TP_PROTO(int *err),

TP_ARGS(err),

TP_STRUCT__entry(
__field(int, err)
),

TP_fast_assign(
__entry->err = *err;
),

TP_printk("bpf_test_finish with err=%d", __entry->err)
);

#ifdef DEFINE_EVENT_WRITABLE
#undef BPF_TEST_RUN_DEFINE_EVENT
#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \
DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \
PARAMS(args), size)
#else
#undef BPF_TEST_RUN_DEFINE_EVENT
#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \
DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args))
#endif

BPF_TEST_RUN_DEFINE_EVENT(bpf_test_finish, bpf_test_finish,

TP_PROTO(int *err),

TP_ARGS(err),

sizeof(int)
);

#endif

/* This part must be outside protection */
#include <trace/define_trace.h>
107 changes: 107 additions & 0 deletions include/trace/events/nbd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM nbd

#if !defined(_TRACE_NBD_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_NBD_H

#include <linux/tracepoint.h>

DECLARE_EVENT_CLASS(nbd_transport_event,

TP_PROTO(struct request *req, u64 handle),

TP_ARGS(req, handle),

TP_STRUCT__entry(
__field(struct request *, req)
__field(u64, handle)
),

TP_fast_assign(
__entry->req = req;
__entry->handle = handle;
),

TP_printk(
"nbd transport event: request %p, handle 0x%016llx",
__entry->req,
__entry->handle
)
);

DEFINE_EVENT(nbd_transport_event, nbd_header_sent,

TP_PROTO(struct request *req, u64 handle),

TP_ARGS(req, handle)
);

DEFINE_EVENT(nbd_transport_event, nbd_payload_sent,

TP_PROTO(struct request *req, u64 handle),

TP_ARGS(req, handle)
);

DEFINE_EVENT(nbd_transport_event, nbd_header_received,

TP_PROTO(struct request *req, u64 handle),

TP_ARGS(req, handle)
);

DEFINE_EVENT(nbd_transport_event, nbd_payload_received,

TP_PROTO(struct request *req, u64 handle),

TP_ARGS(req, handle)
);

DECLARE_EVENT_CLASS(nbd_send_request,

TP_PROTO(struct nbd_request *nbd_request, int index,
struct request *rq),

TP_ARGS(nbd_request, index, rq),

TP_STRUCT__entry(
__field(struct nbd_request *, nbd_request)
__field(u64, dev_index)
__field(struct request *, request)
),

TP_fast_assign(
__entry->nbd_request = 0;
__entry->dev_index = index;
__entry->request = rq;
),

TP_printk("nbd%lld: request %p", __entry->dev_index, __entry->request)
);

#ifdef DEFINE_EVENT_WRITABLE
#undef NBD_DEFINE_EVENT
#define NBD_DEFINE_EVENT(template, call, proto, args, size) \
DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \
PARAMS(args), size)
#else
#undef NBD_DEFINE_EVENT
#define NBD_DEFINE_EVENT(template, call, proto, args, size) \
DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args))
#endif

NBD_DEFINE_EVENT(nbd_send_request, nbd_send_request,

TP_PROTO(struct nbd_request *nbd_request, int index,
struct request *rq),

TP_ARGS(nbd_request, index, rq),

sizeof(struct nbd_request)
);

#endif

/* This part must be outside protection */
#include <trace/define_trace.h>
1 change: 1 addition & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_REUSEPORT,
BPF_PROG_TYPE_FLOW_DISSECTOR,
BPF_PROG_TYPE_CGROUP_SYSCTL,
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
};

enum bpf_attach_type {
Expand Down
8 changes: 6 additions & 2 deletions kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -1789,12 +1789,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
}
raw_tp->btp = btp;

prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
BPF_PROG_TYPE_RAW_TRACEPOINT);
prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
if (IS_ERR(prog)) {
err = PTR_ERR(prog);
goto out_free_tp;
}
if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
err = -EINVAL;
goto out_put_prog;
}

err = bpf_probe_register(raw_tp->btp, prog);
if (err)
Expand Down
31 changes: 31 additions & 0 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,7 @@ static const char * const reg_type_str[] = {
[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
[PTR_TO_TCP_SOCK] = "tcp_sock",
[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
[PTR_TO_TP_BUFFER] = "tp_buffer",
};

static char slot_type_char[] = {
Expand Down Expand Up @@ -1993,6 +1994,32 @@ static int check_ctx_reg(struct bpf_verifier_env *env,
return 0;
}

static int check_tp_buffer_access(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
int regno, int off, int size)
{
if (off < 0) {
verbose(env,
"R%d invalid tracepoint buffer access: off=%d, size=%d",
regno, off, size);
return -EACCES;
}
if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
char tn_buf[48];

tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env,
"R%d invalid variable buffer offset: off=%d, var_off=%s",
regno, off, tn_buf);
return -EACCES;
}
if (off + size > env->prog->aux->max_tp_access)
env->prog->aux->max_tp_access = off + size;

return 0;
}


/* truncate register to smaller size (in bytes)
* must be called with size < BPF_REG_SIZE
*/
Expand Down Expand Up @@ -2137,6 +2164,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_sock_access(env, insn_idx, regno, off, size, t);
if (!err && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_TP_BUFFER) {
err = check_tp_buffer_access(env, reg, regno, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else {
verbose(env, "R%d invalid mem access '%s'\n", regno,
reg_type_str[reg->type]);
Expand Down
Loading

0 comments on commit 3745dc2

Please sign in to comment.