Skip to content

Commit

Permalink
Merge branch 'flow_dissector-input-flags'
Browse files Browse the repository at this point in the history
Stanislav Fomichev says:

====================
C flow dissector supports input flags that tell it to customize parsing
by either stopping early or trying to parse as deep as possible.
BPF flow dissector always parses as deep as possible which is sub-optimal.
Pass input flags to the BPF flow dissector as well so it can make the same
decisions.

Series outline:
* remove unused FLOW_DISSECTOR_F_STOP_AT_L3 flag
* export FLOW_DISSECTOR_F_XXX flags as uapi and pass them to BPF
  flow dissector
* add documentation for the export flags
* support input flags in BPF_PROG_TEST_RUN via ctx_{in,out}
* sync uapi to tools
* support FLOW_DISSECTOR_F_PARSE_1ST_FRAG in selftest
* support FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL in kernel and selftest
* support FLOW_DISSECTOR_F_STOP_AT_ENCAP in selftest

Pros:
* makes BPF flow dissector faster by avoiding burning extra cycles
* existing BPF progs continue to work by ignoring the flags and always
  parsing as deep as possible

Cons:
* new UAPI which we need to support (OTOH, if we need to deprecate some
  flags, we can just stop setting them upon calling BPF programs)

Some numbers (with .repeat = 4000000 in test_flow_dissector):
        test_flow_dissector:PASS:ipv4-frag 35 nsec
        test_flow_dissector:PASS:ipv4-frag 35 nsec
        test_flow_dissector:PASS:ipv4-no-frag 32 nsec
        test_flow_dissector:PASS:ipv4-no-frag 32 nsec

        test_flow_dissector:PASS:ipv6-frag 39 nsec
        test_flow_dissector:PASS:ipv6-frag 39 nsec
        test_flow_dissector:PASS:ipv6-no-frag 36 nsec
        test_flow_dissector:PASS:ipv6-no-frag 36 nsec

        test_flow_dissector:PASS:ipv6-flow-label 36 nsec
        test_flow_dissector:PASS:ipv6-flow-label 36 nsec
        test_flow_dissector:PASS:ipv6-no-flow-label 33 nsec
        test_flow_dissector:PASS:ipv6-no-flow-label 33 nsec

        test_flow_dissector:PASS:ipip-encap 38 nsec
        test_flow_dissector:PASS:ipip-encap 38 nsec
        test_flow_dissector:PASS:ipip-no-encap 32 nsec
        test_flow_dissector:PASS:ipip-no-encap 32 nsec

The improvement is around 10%, but it's in a tight cache-hot
BPF_PROG_TEST_RUN loop.
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed Jul 26, 2019
2 parents 03cd1d1 + e853ae7 commit 943e398
Show file tree
Hide file tree
Showing 8 changed files with 368 additions and 14 deletions.
18 changes: 18 additions & 0 deletions Documentation/bpf/prog_flow_dissector.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ The inputs are:
* ``nhoff`` - initial offset of the networking header
* ``thoff`` - initial offset of the transport header, initialized to nhoff
* ``n_proto`` - L3 protocol type, parsed out of L2 header
* ``flags`` - optional flags

Flow dissector BPF program should fill out the rest of the ``struct
bpf_flow_keys`` fields. Input arguments ``nhoff/thoff/n_proto`` should be
Expand Down Expand Up @@ -101,6 +102,23 @@ can be called for both cases and would have to be written carefully to
handle both cases.


Flags
=====

``flow_keys->flags`` might contain optional input flags that work as follows:

* ``BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG`` - tells BPF flow dissector to
continue parsing first fragment; the default expected behavior is that
flow dissector returns as soon as it finds out that the packet is fragmented;
used by ``eth_get_headlen`` to estimate length of all headers for GRO.
* ``BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL`` - tells BPF flow dissector to
stop parsing as soon as it reaches IPv6 flow label; used by
``___skb_get_hash`` and ``__skb_get_hash_symmetric`` to get flow hash.
* ``BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP`` - tells BPF flow dissector to stop
parsing as soon as it reaches encapsulated headers; used by routing
infrastructure.


Reference Implementation
========================

Expand Down
2 changes: 1 addition & 1 deletion include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -1271,7 +1271,7 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)

struct bpf_flow_dissector;
bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
__be16 proto, int nhoff, int hlen);
__be16 proto, int nhoff, int hlen, unsigned int flags);

bool __skb_flow_dissect(const struct net *net,
const struct sk_buff *skb,
Expand Down
6 changes: 6 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -3507,6 +3507,10 @@ enum bpf_task_fd_type {
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};

#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0)
#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1)
#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2)

struct bpf_flow_keys {
__u16 nhoff;
__u16 thoff;
Expand All @@ -3528,6 +3532,8 @@ struct bpf_flow_keys {
__u32 ipv6_dst[4]; /* in6_addr; network order */
};
};
__u32 flags;
__be32 flow_label;
};

struct bpf_func_info {
Expand Down
39 changes: 35 additions & 4 deletions net/bpf/test_run.c
Original file line number Diff line number Diff line change
Expand Up @@ -377,16 +377,34 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
return ret;
}

static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx)
{
/* make sure the fields we don't use are zeroed */
if (!range_is_zero(ctx, 0, offsetof(struct bpf_flow_keys, flags)))
return -EINVAL;

/* flags is allowed */

if (!range_is_zero(ctx, offsetof(struct bpf_flow_keys, flags) +
FIELD_SIZEOF(struct bpf_flow_keys, flags),
sizeof(struct bpf_flow_keys)))
return -EINVAL;

return 0;
}

int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
u32 size = kattr->test.data_size_in;
struct bpf_flow_dissector ctx = {};
u32 repeat = kattr->test.repeat;
struct bpf_flow_keys *user_ctx;
struct bpf_flow_keys flow_keys;
u64 time_start, time_spent = 0;
const struct ethhdr *eth;
unsigned int flags = 0;
u32 retval, duration;
void *data;
int ret;
Expand All @@ -395,9 +413,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
return -EINVAL;

if (kattr->test.ctx_in || kattr->test.ctx_out)
return -EINVAL;

if (size < ETH_HLEN)
return -EINVAL;

Expand All @@ -410,6 +425,18 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
if (!repeat)
repeat = 1;

user_ctx = bpf_ctx_init(kattr, sizeof(struct bpf_flow_keys));
if (IS_ERR(user_ctx)) {
kfree(data);
return PTR_ERR(user_ctx);
}
if (user_ctx) {
ret = verify_user_bpf_flow_keys(user_ctx);
if (ret)
goto out;
flags = user_ctx->flags;
}

ctx.flow_keys = &flow_keys;
ctx.data = data;
ctx.data_end = (__u8 *)data + size;
Expand All @@ -419,7 +446,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
time_start = ktime_get_ns();
for (i = 0; i < repeat; i++) {
retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
size);
size, flags);

if (signal_pending(current)) {
preempt_enable();
Expand Down Expand Up @@ -450,8 +477,12 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,

ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx,
sizeof(struct bpf_flow_keys));

out:
kfree(user_ctx);
kfree(data);
return ret;
}
21 changes: 19 additions & 2 deletions net/core/flow_dissector.c
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_tags *key_tags;

key_control = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_CONTROL,
Expand Down Expand Up @@ -781,10 +782,18 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
key_ports->src = flow_keys->sport;
key_ports->dst = flow_keys->dport;
}

if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
key_tags = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_FLOW_LABEL,
target_container);
key_tags->flow_label = ntohl(flow_keys->flow_label);
}
}

bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
__be16 proto, int nhoff, int hlen)
__be16 proto, int nhoff, int hlen, unsigned int flags)
{
struct bpf_flow_keys *flow_keys = ctx->flow_keys;
u32 result;
Expand All @@ -795,6 +804,14 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
flow_keys->nhoff = nhoff;
flow_keys->thoff = flow_keys->nhoff;

BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG !=
(int)FLOW_DISSECTOR_F_PARSE_1ST_FRAG);
BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL !=
(int)FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP !=
(int)FLOW_DISSECTOR_F_STOP_AT_ENCAP);
flow_keys->flags = flags;

preempt_disable();
result = BPF_PROG_RUN(prog, ctx);
preempt_enable();
Expand Down Expand Up @@ -914,7 +931,7 @@ bool __skb_flow_dissect(const struct net *net,
}

ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff,
hlen);
hlen, flags);
__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
target_container);
rcu_read_unlock();
Expand Down
6 changes: 6 additions & 0 deletions tools/include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -3504,6 +3504,10 @@ enum bpf_task_fd_type {
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};

#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0)
#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1)
#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2)

struct bpf_flow_keys {
__u16 nhoff;
__u16 thoff;
Expand All @@ -3525,6 +3529,8 @@ struct bpf_flow_keys {
__u32 ipv6_dst[4]; /* in6_addr; network order */
};
};
__u32 flags;
__be32 flow_label;
};

struct bpf_func_info {
Expand Down
Loading

0 comments on commit 943e398

Please sign in to comment.