Skip to content

Commit

Permalink
Merge branch 'XDP-redirect-tracepoints'
Browse files Browse the repository at this point in the history
Jesper Dangaard Brouer says:

====================
XDP redirect tracepoints

I feel this is as far as I can take the tracepoint infrastructure to
assist XDP monitoring.

Tracepoints comes with a base overhead of 25 nanosec for an attached
bpf_prog, and 48 nanosec for using a full perf record. This is
problematic for the XDP use-case, but it is very convenient to use the
existing perf infrastructure.

From a performance perspective, the real solution would be to attach
another bpf_prog (that understand xdp_buff), but I'm not sure we want
to introduce yet another bpf attach API for this.

One thing left is to standardize the possible err return codes, to a
limited set, to allow easier (and faster) mapping into a bpf map.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Aug 29, 2017
2 parents d0fcece + 3ffab54 commit 25d4dae
Show file tree
Hide file tree
Showing 9 changed files with 543 additions and 46 deletions.
100 changes: 82 additions & 18 deletions include/trace/events/xdp.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,55 +31,119 @@ TRACE_EVENT(xdp_exception,
TP_ARGS(dev, xdp, act),

TP_STRUCT__entry(
__array(u8, prog_tag, 8)
__field(int, prog_id)
__field(u32, act)
__field(int, ifindex)
),

TP_fast_assign(
BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag));
memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag));
__entry->prog_id = xdp->aux->id;
__entry->act = act;
__entry->ifindex = dev->ifindex;
),

TP_printk("prog=%s action=%s ifindex=%d",
__print_hex_str(__entry->prog_tag, 8),
TP_printk("prog_id=%d action=%s ifindex=%d",
__entry->prog_id,
__print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
__entry->ifindex)
);

TRACE_EVENT(xdp_redirect,
DECLARE_EVENT_CLASS(xdp_redirect_template,

TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp, u32 act,
int to_index, int err),
const struct bpf_prog *xdp,
int to_ifindex, int err,
const struct bpf_map *map, u32 map_index),

TP_ARGS(dev, xdp, act, to_index, err),
TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),

TP_STRUCT__entry(
__array(u8, prog_tag, 8)
__field(int, prog_id)
__field(u32, act)
__field(int, ifindex)
__field(int, to_index)
__field(int, err)
__field(int, to_ifindex)
__field(u32, map_id)
__field(int, map_index)
),

TP_fast_assign(
BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag));
memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag));
__entry->act = act;
__entry->prog_id = xdp->aux->id;
__entry->act = XDP_REDIRECT;
__entry->ifindex = dev->ifindex;
__entry->to_index = to_index;
__entry->err = err;
__entry->to_ifindex = to_ifindex;
__entry->map_id = map ? map->id : 0;
__entry->map_index = map_index;
),

TP_printk("prog=%s action=%s ifindex=%d to_index=%d err=%d",
__print_hex_str(__entry->prog_tag, 8),
TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d",
__entry->prog_id,
__print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
__entry->ifindex, __entry->to_index,
__entry->ifindex, __entry->to_ifindex,
__entry->err)
);

DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
int to_ifindex, int err,
const struct bpf_map *map, u32 map_index),
TP_ARGS(dev, xdp, to_ifindex, err, map, map_index)
);

DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
int to_ifindex, int err,
const struct bpf_map *map, u32 map_index),
TP_ARGS(dev, xdp, to_ifindex, err, map, map_index)
);

#define _trace_xdp_redirect(dev, xdp, to) \
trace_xdp_redirect(dev, xdp, to, 0, NULL, 0);

#define _trace_xdp_redirect_err(dev, xdp, to, err) \
trace_xdp_redirect_err(dev, xdp, to, err, NULL, 0);

DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
int to_ifindex, int err,
const struct bpf_map *map, u32 map_index),
TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
" map_id=%d map_index=%d",
__entry->prog_id,
__print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
__entry->ifindex, __entry->to_ifindex,
__entry->err,
__entry->map_id, __entry->map_index)
);

DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
int to_ifindex, int err,
const struct bpf_map *map, u32 map_index),
TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
" map_id=%d map_index=%d",
__entry->prog_id,
__print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
__entry->ifindex, __entry->to_ifindex,
__entry->err,
__entry->map_id, __entry->map_index)
);

#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
trace_xdp_redirect_map(dev, xdp, fwd ? fwd->ifindex : 0, \
0, map, idx);

#define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \
trace_xdp_redirect_map_err(dev, xdp, fwd ? fwd->ifindex : 0, \
err, map, idx);

#endif /* _TRACE_XDP_H */

#include <trace/define_trace.h>
37 changes: 24 additions & 13 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -2515,16 +2515,20 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
fwd = __dev_map_lookup_elem(map, index);
if (!fwd) {
err = -EINVAL;
goto out;
goto err;
}
if (ri->map_to_flush && ri->map_to_flush != map)
xdp_do_flush_map();

err = __bpf_tx_xdp(fwd, map, xdp, index);
if (likely(!err))
ri->map_to_flush = map;
out:
trace_xdp_redirect(dev, xdp_prog, XDP_REDIRECT, index, err);
if (unlikely(err))
goto err;

ri->map_to_flush = map;
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
return 0;
err:
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
return err;
}

Expand All @@ -2543,12 +2547,17 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
ri->ifindex = 0;
if (unlikely(!fwd)) {
err = -EINVAL;
goto out;
goto err;
}

err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
out:
trace_xdp_redirect(dev, xdp_prog, XDP_REDIRECT, index, err);
if (unlikely(err))
goto err;

_trace_xdp_redirect(dev, xdp_prog, index);
return 0;
err:
_trace_xdp_redirect_err(dev, xdp_prog, index, err);
return err;
}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
Expand All @@ -2566,23 +2575,25 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
ri->ifindex = 0;
if (unlikely(!fwd)) {
err = -EINVAL;
goto out;
goto err;
}

if (unlikely(!(fwd->flags & IFF_UP))) {
err = -ENETDOWN;
goto out;
goto err;
}

len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
if (skb->len > len) {
err = -EMSGSIZE;
goto out;
goto err;
}

skb->dev = fwd;
out:
trace_xdp_redirect(dev, xdp_prog, XDP_REDIRECT, index, err);
_trace_xdp_redirect(dev, xdp_prog, index);
return 0;
err:
_trace_xdp_redirect_err(dev, xdp_prog, index, err);
return err;
}
EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
Expand Down
4 changes: 4 additions & 0 deletions samples/bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ hostprogs-y += per_socket_stats_example
hostprogs-y += load_sock_ops
hostprogs-y += xdp_redirect
hostprogs-y += xdp_redirect_map
hostprogs-y += xdp_monitor
hostprogs-y += syscall_tp

# Libbpf dependencies
Expand Down Expand Up @@ -83,6 +84,7 @@ test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o

# Tell kbuild to always build the programs
Expand Down Expand Up @@ -127,6 +129,7 @@ always += tcp_iw_kern.o
always += tcp_clamp_kern.o
always += xdp_redirect_kern.o
always += xdp_redirect_map_kern.o
always += xdp_monitor_kern.o
always += syscall_tp_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include
Expand Down Expand Up @@ -166,6 +169,7 @@ HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
HOSTLOADLIBES_test_map_in_map += -lelf
HOSTLOADLIBES_xdp_redirect += -lelf
HOSTLOADLIBES_xdp_redirect_map += -lelf
HOSTLOADLIBES_xdp_monitor += -lelf
HOSTLOADLIBES_syscall_tp += -lelf

# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
Expand Down
88 changes: 88 additions & 0 deletions samples/bpf/xdp_monitor_kern.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/* XDP monitor tool, based on tracepoints
*
* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
*/
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"

struct bpf_map_def SEC("maps") redirect_err_cnt = {
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(u64),
.max_entries = 2,
/* TODO: have entries for all possible errno's */
};

/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
* Code in: kernel/include/trace/events/xdp.h
*/
struct xdp_redirect_ctx {
unsigned short common_type; // offset:0; size:2; signed:0;
unsigned char common_flags; // offset:2; size:1; signed:0;
unsigned char common_preempt_count;// offset:3; size:1; signed:0;
int common_pid; // offset:4; size:4; signed:1;

int prog_id; // offset:8; size:4; signed:1;
u32 act; // offset:12 size:4; signed:0;
int ifindex; // offset:16 size:4; signed:1;
int err; // offset:20 size:4; signed:1;
int to_ifindex; // offset:24 size:4; signed:1;
u32 map_id; // offset:28 size:4; signed:0;
int map_index; // offset:32 size:4; signed:1;
}; // offset:36

enum {
XDP_REDIRECT_SUCCESS = 0,
XDP_REDIRECT_ERROR = 1
};

static __always_inline
int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
{
u32 key = XDP_REDIRECT_ERROR;
int err = ctx->err;
u64 *cnt;

if (!err)
key = XDP_REDIRECT_SUCCESS;

cnt = bpf_map_lookup_elem(&redirect_err_cnt, &key);
if (!cnt)
return 0;
*cnt += 1;

return 0; /* Indicate event was filtered (no further processing)*/
/*
* Returning 1 here would allow e.g. a perf-record tracepoint
* to see and record these events, but it doesn't work well
* in-practice as stopping perf-record also unload this
* bpf_prog. Plus, there is additional overhead of doing so.
*/
}

SEC("tracepoint/xdp/xdp_redirect_err")
int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
{
return xdp_redirect_collect_stat(ctx);
}


SEC("tracepoint/xdp/xdp_redirect_map_err")
int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
{
return xdp_redirect_collect_stat(ctx);
}

/* Likely unloaded when prog starts */
SEC("tracepoint/xdp/xdp_redirect")
int trace_xdp_redirect(struct xdp_redirect_ctx *ctx)
{
return xdp_redirect_collect_stat(ctx);
}

/* Likely unloaded when prog starts */
SEC("tracepoint/xdp/xdp_redirect_map")
int trace_xdp_redirect_map(struct xdp_redirect_ctx *ctx)
{
return xdp_redirect_collect_stat(ctx);
}
Loading

0 comments on commit 25d4dae

Please sign in to comment.