Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Browse files Browse the repository at this point in the history
Alexei Starovoitov says:

====================
pull-request: bpf-next 2019-10-14

The following pull-request contains BPF updates for your *net-next* tree.

12 days of development and
85 files changed, 1889 insertions(+), 1020 deletions(-)

The main changes are:

1) auto-generation of bpf_helper_defs.h, from Andrii.

2) split of bpf_helpers.h into bpf_{helpers, helper_defs, endian, tracing}.h
   and move into libbpf, from Andrii.

3) Track contents of read-only maps as scalars in the verifier, from Andrii.

4) small x86 JIT optimization, from Daniel.

5) cross compilation support, from Ivan.

6) bpf flow_dissector enhancements, from Jakub and Stanislav.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Oct 14, 2019
2 parents 7e0d15e + b8fc345 commit a98d62c
Show file tree
Hide file tree
Showing 85 changed files with 1,889 additions and 1,020 deletions.
3 changes: 3 additions & 0 deletions Documentation/bpf/prog_flow_dissector.rst
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,6 @@ BPF flow dissector doesn't support exporting all the metadata that in-kernel
C-based implementation can export. Notable example is single VLAN (802.1Q)
and double VLAN (802.1AD) tags. Please refer to the ``struct bpf_flow_keys``
for a set of information that's currently can be exported from the BPF context.

When BPF flow dissector is attached to the root network namespace (machine-wide
policy), users can't override it in their child network namespaces.
10 changes: 10 additions & 0 deletions arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -909,6 +909,16 @@ xadd: if (is_imm8(insn->off))
case BPF_JMP32 | BPF_JSLT | BPF_K:
case BPF_JMP32 | BPF_JSGE | BPF_K:
case BPF_JMP32 | BPF_JSLE | BPF_K:
/* test dst_reg, dst_reg to save one extra byte */
if (imm32 == 0) {
if (BPF_CLASS(insn->code) == BPF_JMP)
EMIT1(add_2mod(0x48, dst_reg, dst_reg));
else if (is_ereg(dst_reg))
EMIT1(add_2mod(0x40, dst_reg, dst_reg));
EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
goto emit_cond_jmp;
}

/* cmp dst_reg, imm8/32 */
if (BPF_CLASS(insn->code) == BPF_JMP)
EMIT1(add_1mod(0x48, dst_reg));
Expand Down
2 changes: 1 addition & 1 deletion include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ struct bpf_prog_stats {
u64 cnt;
u64 nsecs;
struct u64_stats_sync syncp;
};
} __aligned(2 * sizeof(u64));

struct bpf_prog_aux {
atomic_t refcnt;
Expand Down
32 changes: 16 additions & 16 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -794,7 +794,7 @@ union bpf_attr {
* A 64-bit integer containing the current GID and UID, and
* created as such: *current_gid* **<< 32 \|** *current_uid*.
*
* int bpf_get_current_comm(char *buf, u32 size_of_buf)
* int bpf_get_current_comm(void *buf, u32 size_of_buf)
* Description
* Copy the **comm** attribute of the current task into *buf* of
* *size_of_buf*. The **comm** attribute contains the name of
Expand Down Expand Up @@ -1023,7 +1023,7 @@ union bpf_attr {
* The realm of the route for the packet associated to *skb*, or 0
* if none was found.
*
* int bpf_perf_event_output(struct pt_regs *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
* int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
* Description
* Write raw *data* blob into a special BPF perf event held by
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
Expand Down Expand Up @@ -1068,7 +1068,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
* int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len)
* Description
* This helper was provided as an easy way to load data from a
* packet. It can be used to load *len* bytes from *offset* from
Expand All @@ -1085,7 +1085,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_get_stackid(struct pt_regs *ctx, struct bpf_map *map, u64 flags)
* int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags)
* Description
* Walk a user or a kernel stack and return its id. To achieve
* this, the helper needs *ctx*, which is a pointer to the context
Expand Down Expand Up @@ -1154,7 +1154,7 @@ union bpf_attr {
* The checksum result, or a negative error code in case of
* failure.
*
* int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
* int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
* Description
* Retrieve tunnel options metadata for the packet associated to
* *skb*, and store the raw tunnel option data to the buffer *opt*
Expand All @@ -1172,7 +1172,7 @@ union bpf_attr {
* Return
* The size of the option data retrieved.
*
* int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
* int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
* Description
* Set tunnel options metadata for the packet associated to *skb*
* to the option data contained in the raw buffer *opt* of *size*.
Expand Down Expand Up @@ -1511,7 +1511,7 @@ union bpf_attr {
* Return
* 0
*
* int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
* int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
* Description
* Emulate a call to **setsockopt()** on the socket associated to
* *bpf_socket*, which must be a full socket. The *level* at
Expand Down Expand Up @@ -1595,7 +1595,7 @@ union bpf_attr {
* Return
* **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
*
* int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
* int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
* Description
* Redirect the packet to the socket referenced by *map* (of type
* **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
Expand Down Expand Up @@ -1715,7 +1715,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
* int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
* Description
* Emulate a call to **getsockopt()** on the socket associated to
* *bpf_socket*, which must be a full socket. The *level* at
Expand Down Expand Up @@ -1947,7 +1947,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
* int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags)
* Description
* Return a user or a kernel stack in bpf program provided buffer.
* To achieve this, the helper needs *ctx*, which is a pointer
Expand Down Expand Up @@ -1980,7 +1980,7 @@ union bpf_attr {
* A non-negative value equal to or less than *size* on success,
* or a negative error in case of failure.
*
* int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
* int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
* Description
* This helper is similar to **bpf_skb_load_bytes**\ () in that
* it provides an easy way to load *len* bytes from *offset*
Expand Down Expand Up @@ -2033,7 +2033,7 @@ union bpf_attr {
* * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
* packet is not forwarded or needs assist from full stack
*
* int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
* int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
* Description
* Add an entry to, or update a sockhash *map* referencing sockets.
* The *skops* is used as a new value for the entry associated to
Expand Down Expand Up @@ -2392,7 +2392,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
* int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
* Description
* For socket policies, insert *len* bytes into *msg* at offset
* *start*.
Expand All @@ -2408,9 +2408,9 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags)
* int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
* Description
* Will remove *pop* bytes from a *msg* starting at byte *start*.
* Will remove *len* bytes from a *msg* starting at byte *start*.
* This may result in **ENOMEM** errors under certain situations if
* an allocation and copy are required due to a full ring buffer.
* However, the helper will try to avoid doing the allocation
Expand Down Expand Up @@ -2505,7 +2505,7 @@ union bpf_attr {
* A **struct bpf_tcp_sock** pointer on success, or **NULL** in
* case of failure.
*
* int bpf_skb_ecn_set_ce(struct sk_buf *skb)
* int bpf_skb_ecn_set_ce(struct sk_buff *skb)
* Description
* Set ECN (Explicit Congestion Notification) field of IP header
* to **CE** (Congestion Encountered) if current value is **ECT**
Expand Down
57 changes: 55 additions & 2 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -2739,6 +2739,41 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
reg->smax_value = reg->umax_value;
}

static bool bpf_map_is_rdonly(const struct bpf_map *map)
{
return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
}

static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
{
void *ptr;
u64 addr;
int err;

err = map->ops->map_direct_value_addr(map, &addr, off);
if (err)
return err;
ptr = (void *)(long)addr + off;

switch (size) {
case sizeof(u8):
*val = (u64)*(u8 *)ptr;
break;
case sizeof(u16):
*val = (u64)*(u16 *)ptr;
break;
case sizeof(u32):
*val = (u64)*(u32 *)ptr;
break;
case sizeof(u64):
*val = *(u64 *)ptr;
break;
default:
return -EINVAL;
}
return 0;
}

/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
Expand Down Expand Up @@ -2776,9 +2811,27 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err)
return err;
err = check_map_access(env, regno, off, size, false);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
if (!err && t == BPF_READ && value_regno >= 0) {
struct bpf_map *map = reg->map_ptr;

/* if map is read-only, track its contents as scalars */
if (tnum_is_const(reg->var_off) &&
bpf_map_is_rdonly(map) &&
map->ops->map_direct_value_addr) {
int map_off = off + reg->var_off.value;
u64 val = 0;

err = bpf_map_direct_read(map, map_off, size,
&val);
if (err)
return err;

regs[value_regno].type = SCALAR_VALUE;
__mark_reg_known(&regs[value_regno], val);
} else {
mark_reg_unknown(env, regs, value_regno);
}
}
} else if (reg->type == PTR_TO_CTX) {
enum bpf_reg_type reg_type = SCALAR_VALUE;

Expand Down
46 changes: 40 additions & 6 deletions net/core/flow_dissector.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,19 +114,50 @@ int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
{
struct bpf_prog *attached;
struct net *net;
int ret = 0;

net = current->nsproxy->net_ns;
mutex_lock(&flow_dissector_mutex);

if (net == &init_net) {
/* BPF flow dissector in the root namespace overrides
* any per-net-namespace one. When attaching to root,
* make sure we don't have any BPF program attached
* to the non-root namespaces.
*/
struct net *ns;

for_each_net(ns) {
if (ns == &init_net)
continue;
if (rcu_access_pointer(ns->flow_dissector_prog)) {
ret = -EEXIST;
goto out;
}
}
} else {
/* Make sure root flow dissector is not attached
* when attaching to the non-root namespace.
*/
if (rcu_access_pointer(init_net.flow_dissector_prog)) {
ret = -EEXIST;
goto out;
}
}

attached = rcu_dereference_protected(net->flow_dissector_prog,
lockdep_is_held(&flow_dissector_mutex));
if (attached) {
/* Only one BPF program can be attached at a time */
mutex_unlock(&flow_dissector_mutex);
return -EEXIST;
if (attached == prog) {
/* The same program cannot be attached twice */
ret = -EINVAL;
goto out;
}
rcu_assign_pointer(net->flow_dissector_prog, prog);
if (attached)
bpf_prog_put(attached);
out:
mutex_unlock(&flow_dissector_mutex);
return 0;
return ret;
}

int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
Expand Down Expand Up @@ -910,7 +941,10 @@ bool __skb_flow_dissect(const struct net *net,
WARN_ON_ONCE(!net);
if (net) {
rcu_read_lock();
attached = rcu_dereference(net->flow_dissector_prog);
attached = rcu_dereference(init_net.flow_dissector_prog);

if (!attached)
attached = rcu_dereference(net->flow_dissector_prog);

if (attached) {
struct bpf_flow_keys flow_keys;
Expand Down
2 changes: 1 addition & 1 deletion net/core/xdp.c
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);

/* XDP RX runs under NAPI protection, and in different delivery error
* scenarios (e.g. queue full), it is possible to return the xdp_frame
* while still leveraging this protection. The @napi_direct boolian
* while still leveraging this protection. The @napi_direct boolean
* is used for those calls sites. Thus, allowing for faster recycling
* of xdp_frames/pages in those cases.
*/
Expand Down
Loading

0 comments on commit a98d62c

Please sign in to comment.