Skip to content

Commit

Permalink
Merge branch 'bpf-flow-dissector'
Browse files Browse the repository at this point in the history
Petar Penkov says:

====================
This patch series hardens the RX stack by allowing flow dissection in BPF,
as previously discussed [1]. Because of the rigorous checks of the BPF
verifier, this provides significant security guarantees. In particular, the
BPF flow dissector cannot get inside of an infinite loop, as with
CVE-2013-4348, because BPF programs are guaranteed to terminate. It cannot
read outside of packet bounds, because all memory accesses are checked.
Also, with BPF the administrator can decide which protocols to support,
reducing potential attack surface. Rarely encountered protocols can be
excluded from dissection and the program can be updated without kernel
recompile or reboot if a bug is discovered.

Patch 1 adds infrastructure to execute a BPF program in __skb_flow_dissect.
This includes a new BPF program and attach type.

Patch 2 adds the new BPF flow dissector definitions to tools/uapi.

Patch 3 adds support for the new BPF program type to libbpf and bpftool.

Patch 4 adds a flow dissector program in BPF. This parses most protocols in
__skb_flow_dissect in BPF for a subset of flow keys (basic, control, ports,
and address types).

Patch 5 adds a selftest that attaches the BPF program to the flow dissector
and sends traffic with different levels of encapsulation.

Performance Evaluation:
The in-kernel implementation was compared against the demo program from
patch 4 using the test in patch 5 with IPv4/UDP traffic over 10 seconds.
	$perf record -a -C 4 taskset -c 4 ./test_flow_dissector -i 4 -f 8 \
		-t 10

In-kernel Dissector:
	__skb_flow_dissect overhead: 2.12%
	Total Packets: 3,272,597 (from output of ./test_flow_dissector)

BPF Dissector:
	__skb_flow_dissect overhead: 1.63%
	Total Packets: 3,232,356 (from output of ./test_flow_dissector)

No-op BPF Dissector:
	__skb_flow_dissect overhead: 1.52%
	Total Packets: 3,330,635 (from output of ./test_flow_dissector)

Changes since v3:
1/ struct bpf_flow_keys reorganized to remove holes in patch 1 and patch 2.

Changes since v2:
1/ Changes to tools/include/uapi pulled into a separate patch 2
2/ Changes to tools/lib and tools/bpftool pulled into a separate patch 3
3/ Changed flow_keys in __sk_buff from __u32 to struct bpf_flow_keys *
4/ Added nhoff field in struct bpf_flow_keys to pass initial offset
5/ Saving all of the modified control block, rather than just the qdisc
6/ Sample BPF program in patch 4 modified to use the changes above

Changes since v1:
1/ LD_ABS instructions now disallowed for the new BPF prog type
2/ now checks if skb is NULL in __skb_flow_dissect()
3/ fixed incorrect accesses in flow_dissector_is_valid_access()
	- writes to the flow_keys field now disallowed
	- reads/writes to tc_classid and data_meta now disallowed
4/ headers pulled with bpf_skb_load_data if direct access fails

Changes since RFC:
1/ Flow dissector hook changed from global to per-netns
2/ Defined struct bpf_flow_keys to be used in BPF flow dissector
programs instead of exposing the internal flow keys layout. Added a
function to translate from bpf_flow_keys to the internal layout after BPF
dissection is complete. The pointer to this struct is stored in
qdisc_skb_cb rather than inside of the 20 byte control block which
simplifies verification and allows access to all 20 bytes of the cb.
3/ Removed GUE parsing as it relied on a hardcoded port
4/ MPLS parsing now stops at the first label which is consistent
with the in-kernel flow dissector
5/ Refactored to use direct packet access and to write out to
struct bpf_flow_keys

[1] http://vger.kernel.org/netconf2017_files/rx_hardening_and_udp_gso.pdf
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed Sep 14, 2018
2 parents 1edb6e0 + 50b3ed5 commit 4a9f42c
Show file tree
Hide file tree
Showing 22 changed files with 1,828 additions and 6 deletions.
1 change: 1 addition & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ enum bpf_reg_type {
PTR_TO_PACKET_META, /* skb->data - meta_len */
PTR_TO_PACKET, /* reg points to skb->data */
PTR_TO_PACKET_END, /* skb->data + headlen */
PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */
};

/* The information passed from prog-specific *_is_valid_access
Expand Down
1 change: 1 addition & 0 deletions include/linux/bpf_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
#ifdef CONFIG_INET
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
#endif
BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)

BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
Expand Down
7 changes: 7 additions & 0 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ struct scatterlist;
struct pipe_inode_info;
struct iov_iter;
struct napi_struct;
struct bpf_prog;
union bpf_attr;

#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack {
Expand Down Expand Up @@ -1192,6 +1194,11 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
const struct flow_dissector_key *key,
unsigned int key_count);

int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
struct bpf_prog *prog);

int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);

bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
Expand Down
3 changes: 3 additions & 0 deletions include/net/net_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ struct ctl_table_header;
struct net_generic;
struct uevent_sock;
struct netns_ipvs;
struct bpf_prog;


#define NETDEV_HASHBITS 8
Expand Down Expand Up @@ -145,6 +146,8 @@ struct net {
#endif
struct net_generic __rcu *gen;

struct bpf_prog __rcu *flow_dissector_prog;

/* Note : following structs are cache line aligned */
#ifdef CONFIG_XFRM
struct netns_xfrm xfrm;
Expand Down
12 changes: 9 additions & 3 deletions include/net/sch_generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ struct Qdisc_ops;
struct qdisc_walker;
struct tcf_walker;
struct module;
struct bpf_flow_keys;

typedef int tc_setup_cb_t(enum tc_setup_type type,
void *type_data, void *cb_priv);
Expand Down Expand Up @@ -307,9 +308,14 @@ struct tcf_proto {
};

struct qdisc_skb_cb {
unsigned int pkt_len;
u16 slave_dev_queue_mapping;
u16 tc_classid;
union {
struct {
unsigned int pkt_len;
u16 slave_dev_queue_mapping;
u16 tc_classid;
};
struct bpf_flow_keys *flow_keys;
};
#define QDISC_CB_PRIV_LEN 20
unsigned char data[QDISC_CB_PRIV_LEN];
};
Expand Down
26 changes: 26 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LWT_SEG6LOCAL,
BPF_PROG_TYPE_LIRC_MODE2,
BPF_PROG_TYPE_SK_REUSEPORT,
BPF_PROG_TYPE_FLOW_DISSECTOR,
};

enum bpf_attach_type {
Expand All @@ -172,6 +173,7 @@ enum bpf_attach_type {
BPF_CGROUP_UDP4_SENDMSG,
BPF_CGROUP_UDP6_SENDMSG,
BPF_LIRC_MODE2,
BPF_FLOW_DISSECTOR,
__MAX_BPF_ATTACH_TYPE
};

Expand Down Expand Up @@ -2333,6 +2335,7 @@ struct __sk_buff {
/* ... here. */

__u32 data_meta;
struct bpf_flow_keys *flow_keys;
};

struct bpf_tunnel_key {
Expand Down Expand Up @@ -2778,4 +2781,27 @@ enum bpf_task_fd_type {
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};

struct bpf_flow_keys {
__u16 nhoff;
__u16 thoff;
__u16 addr_proto; /* ETH_P_* of valid addrs */
__u8 is_frag;
__u8 is_first_frag;
__u8 is_encap;
__u8 ip_proto;
__be16 n_proto;
__be16 sport;
__be16 dport;
union {
struct {
__be32 ipv4_src;
__be32 ipv4_dst;
};
struct {
__u32 ipv6_src[4]; /* in6_addr; network order */
__u32 ipv6_dst[4]; /* in6_addr; network order */
};
};
};

#endif /* _UAPI__LINUX_BPF_H__ */
8 changes: 8 additions & 0 deletions kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -1615,6 +1615,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_LIRC_MODE2:
ptype = BPF_PROG_TYPE_LIRC_MODE2;
break;
case BPF_FLOW_DISSECTOR:
ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
break;
default:
return -EINVAL;
}
Expand All @@ -1636,6 +1639,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_PROG_TYPE_LIRC_MODE2:
ret = lirc_prog_attach(attr, prog);
break;
case BPF_PROG_TYPE_FLOW_DISSECTOR:
ret = skb_flow_dissector_bpf_prog_attach(attr, prog);
break;
default:
ret = cgroup_bpf_prog_attach(attr, ptype, prog);
}
Expand Down Expand Up @@ -1688,6 +1694,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
case BPF_LIRC_MODE2:
return lirc_prog_detach(attr);
case BPF_FLOW_DISSECTOR:
return skb_flow_dissector_bpf_prog_detach(attr);
default:
return -EINVAL;
}
Expand Down
32 changes: 32 additions & 0 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ static const char * const reg_type_str[] = {
[PTR_TO_PACKET] = "pkt",
[PTR_TO_PACKET_META] = "pkt_meta",
[PTR_TO_PACKET_END] = "pkt_end",
[PTR_TO_FLOW_KEYS] = "flow_keys",
};

static char slot_type_char[] = {
Expand Down Expand Up @@ -965,6 +966,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_PACKET:
case PTR_TO_PACKET_META:
case PTR_TO_PACKET_END:
case PTR_TO_FLOW_KEYS:
case CONST_PTR_TO_MAP:
return true;
default:
Expand Down Expand Up @@ -1238,6 +1240,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
case BPF_PROG_TYPE_LWT_XMIT:
case BPF_PROG_TYPE_SK_SKB:
case BPF_PROG_TYPE_SK_MSG:
case BPF_PROG_TYPE_FLOW_DISSECTOR:
if (meta)
return meta->pkt_access;

Expand Down Expand Up @@ -1321,6 +1324,18 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
return -EACCES;
}

static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
int size)
{
if (size < 0 || off < 0 ||
(u64)off + size > sizeof(struct bpf_flow_keys)) {
verbose(env, "invalid access to flow keys off=%d size=%d\n",
off, size);
return -EACCES;
}
return 0;
}

static bool __is_pointer_value(bool allow_ptr_leaks,
const struct bpf_reg_state *reg)
{
Expand Down Expand Up @@ -1422,6 +1437,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
* right in front, treat it the very same way.
*/
return check_pkt_ptr_alignment(env, reg, off, size, strict);
case PTR_TO_FLOW_KEYS:
pointer_desc = "flow keys ";
break;
case PTR_TO_MAP_VALUE:
pointer_desc = "value ";
break;
Expand Down Expand Up @@ -1692,6 +1710,17 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_packet_access(env, regno, off, size, false);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_FLOW_KEYS) {
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
verbose(env, "R%d leaks addr into flow keys\n",
value_regno);
return -EACCES;
}

err = check_flow_keys_access(env, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else {
verbose(env, "R%d invalid mem access '%s'\n", regno,
reg_type_str[reg->type]);
Expand Down Expand Up @@ -1839,6 +1868,8 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
case PTR_TO_PACKET_META:
return check_packet_access(env, regno, reg->off, access_size,
zero_size_allowed);
case PTR_TO_FLOW_KEYS:
return check_flow_keys_access(env, reg->off, access_size);
case PTR_TO_MAP_VALUE:
return check_map_access(env, regno, reg->off, access_size,
zero_size_allowed);
Expand Down Expand Up @@ -4366,6 +4397,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
case PTR_TO_CTX:
case CONST_PTR_TO_MAP:
case PTR_TO_PACKET_END:
case PTR_TO_FLOW_KEYS:
/* Only valid matches are exact, which memcmp() above
* would have accepted
*/
Expand Down
70 changes: 70 additions & 0 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -5123,6 +5123,17 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
}

static const struct bpf_func_proto *
flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto;
default:
return bpf_base_func_proto(func_id);
}
}

static const struct bpf_func_proto *
lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
Expand Down Expand Up @@ -5241,6 +5252,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (size != size_default)
return false;
break;
case bpf_ctx_range(struct __sk_buff, flow_keys):
if (size != sizeof(struct bpf_flow_keys *))
return false;
break;
default:
/* Only narrow read access allowed for now. */
if (type == BPF_WRITE) {
Expand All @@ -5266,6 +5281,7 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
case bpf_ctx_range(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}
Expand All @@ -5291,6 +5307,7 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, flow_keys):
return false;
}

Expand Down Expand Up @@ -5501,6 +5518,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
case bpf_ctx_range(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}
Expand Down Expand Up @@ -5702,6 +5720,7 @@ static bool sk_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, flow_keys):
return false;
}

Expand Down Expand Up @@ -5761,6 +5780,39 @@ static bool sk_msg_is_valid_access(int off, int size,
return true;
}

static bool flow_dissector_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (type == BPF_WRITE) {
switch (off) {
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
break;
default:
return false;
}
}

switch (off) {
case bpf_ctx_range(struct __sk_buff, data):
info->reg_type = PTR_TO_PACKET;
break;
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
case bpf_ctx_range(struct __sk_buff, flow_keys):
info->reg_type = PTR_TO_FLOW_KEYS;
break;
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}

return bpf_skb_is_valid_access(off, size, type, prog, info);
}

static u32 bpf_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
Expand Down Expand Up @@ -6055,6 +6107,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
bpf_target_off(struct sock_common,
skc_num, 2, target_size));
break;

case offsetof(struct __sk_buff, flow_keys):
off = si->off;
off -= offsetof(struct __sk_buff, flow_keys);
off += offsetof(struct sk_buff, cb);
off += offsetof(struct qdisc_skb_cb, flow_keys);
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
si->src_reg, off);
break;
}

return insn - insn_buf;
Expand Down Expand Up @@ -7018,6 +7079,15 @@ const struct bpf_verifier_ops sk_msg_verifier_ops = {
const struct bpf_prog_ops sk_msg_prog_ops = {
};

const struct bpf_verifier_ops flow_dissector_verifier_ops = {
.get_func_proto = flow_dissector_func_proto,
.is_valid_access = flow_dissector_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};

const struct bpf_prog_ops flow_dissector_prog_ops = {
};

int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
Expand Down
Loading

0 comments on commit 4a9f42c

Please sign in to comment.