Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Browse files Browse the repository at this point in the history
Alexei Starovoitov says:

====================
pull-request: bpf-next 2021-07-15

The following pull-request contains BPF updates for your *net-next* tree.

We've added 45 non-merge commits during the last 15 day(s) which contain
a total of 52 files changed, 3122 insertions(+), 384 deletions(-).

The main changes are:

1) Introduce bpf timers, from Alexei.

2) Add sockmap support for unix datagram socket, from Cong.

3) Fix potential memleak and UAF in the verifier, from He.

4) Add bpf_get_func_ip helper, from Jiri.

5) Improvements to generic XDP mode, from Kumar.

6) Support for passing xdp_md to XDP programs in bpf_prog_run, from Zvi.
===================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jul 16, 2021
2 parents 87117ba + c50524e commit 82a1ffe
Show file tree
Hide file tree
Showing 52 changed files with 3,122 additions and 384 deletions.
1 change: 1 addition & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -10387,6 +10387,7 @@ F: net/core/skmsg.c
F: net/core/sock_map.c
F: net/ipv4/tcp_bpf.c
F: net/ipv4/udp_bpf.c
F: net/unix/unix_bpf.c

LANDLOCK SECURITY MODULE
M: Mickaël Salaün <mic@digikod.net>
Expand Down
19 changes: 19 additions & 0 deletions arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1954,6 +1954,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (flags & BPF_TRAMP_F_CALL_ORIG)
stack_size += 8; /* room for return value of orig_call */

if (flags & BPF_TRAMP_F_IP_ARG)
stack_size += 8; /* room for IP address argument */

if (flags & BPF_TRAMP_F_SKIP_FRAME)
/* skip patched call instruction and point orig_call to actual
* body of the kernel function.
Expand All @@ -1967,6 +1970,22 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
EMIT1(0x53); /* push rbx */

if (flags & BPF_TRAMP_F_IP_ARG) {
/* Store IP address of the traced function:
* mov rax, QWORD PTR [rbp + 8]
* sub rax, X86_PATCH_SIZE
* mov QWORD PTR [rbp - stack_size], rax
*/
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
EMIT4(0x48, 0x83, 0xe8, X86_PATCH_SIZE);
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -stack_size);

/* Continue with stack_size for regs storage, stack will
* be correctly restored with 'leave' instruction.
*/
stack_size -= 8;
}

save_regs(m, &prog, nr_args, stack_size);

if (flags & BPF_TRAMP_F_CALL_ORIG) {
Expand Down
50 changes: 50 additions & 0 deletions include/linux/bitops.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include <asm/types.h>
#include <linux/bits.h>
#include <linux/typecheck.h>

#include <uapi/linux/kernel.h>

Expand Down Expand Up @@ -253,6 +254,55 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
__clear_bit(nr, addr);
}

/**
* __ptr_set_bit - Set bit in a pointer's value
* @nr: the bit to set
* @addr: the address of the pointer variable
*
* Example:
* void *p = foo();
* __ptr_set_bit(bit, &p);
*/
#define __ptr_set_bit(nr, addr) \
({ \
typecheck_pointer(*(addr)); \
__set_bit(nr, (unsigned long *)(addr)); \
})

/**
* __ptr_clear_bit - Clear bit in a pointer's value
* @nr: the bit to clear
* @addr: the address of the pointer variable
*
* Example:
* void *p = foo();
* __ptr_clear_bit(bit, &p);
*/
#define __ptr_clear_bit(nr, addr) \
({ \
typecheck_pointer(*(addr)); \
__clear_bit(nr, (unsigned long *)(addr)); \
})

/**
* __ptr_test_bit - Test bit in a pointer's value
* @nr: the bit to test
* @addr: the address of the pointer variable
*
* Example:
* void *p = foo();
* if (__ptr_test_bit(bit, &p)) {
* ...
* } else {
* ...
* }
*/
#define __ptr_test_bit(nr, addr) \
({ \
typecheck_pointer(*(addr)); \
test_bit(nr, (unsigned long *)(addr)); \
})

#ifdef __KERNEL__

#ifndef set_mask_bits
Expand Down
100 changes: 69 additions & 31 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ struct bpf_map {
u32 max_entries;
u32 map_flags;
int spin_lock_off; /* >=0 valid offset, <0 error */
int timer_off; /* >=0 valid offset, <0 error */
u32 id;
int numa_node;
u32 btf_key_type_id;
Expand Down Expand Up @@ -197,30 +198,53 @@ static inline bool map_value_has_spin_lock(const struct bpf_map *map)
return map->spin_lock_off >= 0;
}

static inline void check_and_init_map_lock(struct bpf_map *map, void *dst)
static inline bool map_value_has_timer(const struct bpf_map *map)
{
if (likely(!map_value_has_spin_lock(map)))
return;
*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
(struct bpf_spin_lock){};
return map->timer_off >= 0;
}

/* copy everything but bpf_spin_lock */
static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
{
if (unlikely(map_value_has_spin_lock(map)))
*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
(struct bpf_spin_lock){};
if (unlikely(map_value_has_timer(map)))
*(struct bpf_timer *)(dst + map->timer_off) =
(struct bpf_timer){};
}

/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
{
u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0;

if (unlikely(map_value_has_spin_lock(map))) {
u32 off = map->spin_lock_off;
s_off = map->spin_lock_off;
s_sz = sizeof(struct bpf_spin_lock);
} else if (unlikely(map_value_has_timer(map))) {
t_off = map->timer_off;
t_sz = sizeof(struct bpf_timer);
}

memcpy(dst, src, off);
memcpy(dst + off + sizeof(struct bpf_spin_lock),
src + off + sizeof(struct bpf_spin_lock),
map->value_size - off - sizeof(struct bpf_spin_lock));
if (unlikely(s_sz || t_sz)) {
if (s_off < t_off || !s_sz) {
swap(s_off, t_off);
swap(s_sz, t_sz);
}
memcpy(dst, src, t_off);
memcpy(dst + t_off + t_sz,
src + t_off + t_sz,
s_off - t_off - t_sz);
memcpy(dst + s_off + s_sz,
src + s_off + s_sz,
map->value_size - s_off - s_sz);
} else {
memcpy(dst, src, map->value_size);
}
}
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
bool lock_src);
void bpf_timer_cancel_and_free(void *timer);
int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);

struct bpf_offload_dev;
Expand Down Expand Up @@ -314,6 +338,7 @@ enum bpf_arg_type {
ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
ARG_PTR_TO_TIMER, /* pointer to bpf_timer */
__BPF_ARG_TYPE_MAX,
};

Expand Down Expand Up @@ -554,6 +579,11 @@ struct btf_func_model {
*/
#define BPF_TRAMP_F_SKIP_FRAME BIT(2)

/* Store IP address of the caller on the trampoline stack,
* so it's available for trampoline's programs.
*/
#define BPF_TRAMP_F_IP_ARG BIT(3)

/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
*/
Expand Down Expand Up @@ -1509,12 +1539,12 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
struct bpf_prog *xdp_prog, struct bpf_map *map,
bool exclude_ingress);
bool dev_map_can_have_prog(struct bpf_map *map);

void __cpu_map_flush(void);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);
bool cpu_map_prog_allowed(struct bpf_map *map);
int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
struct sk_buff *skb);

/* Return map's numa specified by userspace */
static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
Expand Down Expand Up @@ -1711,6 +1741,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
return 0;
}

static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
struct sk_buff *skb)
{
return -EOPNOTSUPP;
}

static inline bool cpu_map_prog_allowed(struct bpf_map *map)
{
return false;
Expand Down Expand Up @@ -1852,6 +1888,12 @@ void bpf_map_offload_map_free(struct bpf_map *map);
int bpf_prog_test_run_syscall(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);

int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
#else
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
union bpf_attr *attr)
Expand Down Expand Up @@ -1884,24 +1926,6 @@ static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog,
{
return -ENOTSUPP;
}
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */

#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);

void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
void *value);
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags);
#else
static inline void bpf_sk_reuseport_detach(struct sock *sk)
{
}

#ifdef CONFIG_BPF_SYSCALL
static inline int sock_map_get_from_fd(const union bpf_attr *attr,
Expand All @@ -1921,7 +1945,21 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
{
return -EOPNOTSUPP;
}
#endif /* CONFIG_BPF_SYSCALL */
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */

#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
void *value);
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags);
#else
static inline void bpf_sk_reuseport_detach(struct sock *sk)
{
}

#ifdef CONFIG_BPF_SYSCALL
static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
void *key, void *value)
{
Expand Down
19 changes: 17 additions & 2 deletions include/linux/bpf_verifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,14 @@ struct bpf_reg_state {
/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
* PTR_TO_MAP_VALUE_OR_NULL
*/
struct bpf_map *map_ptr;
struct {
struct bpf_map *map_ptr;
/* To distinguish map lookups from outer map
* the map_uid is non-zero for registers
* pointing to inner maps.
*/
u32 map_uid;
};

/* for PTR_TO_BTF_ID */
struct {
Expand Down Expand Up @@ -201,12 +208,19 @@ struct bpf_func_state {
* zero == main subprog
*/
u32 subprogno;
/* Every bpf_timer_start will increment async_entry_cnt.
* It's used to distinguish:
* void foo(void) { for(;;); }
* void foo(void) { bpf_timer_set_callback(,foo); }
*/
u32 async_entry_cnt;
bool in_callback_fn;
bool in_async_callback_fn;

/* The following fields should be last. See copy_func_state() */
int acquired_refs;
struct bpf_reference_state *refs;
int allocated_stack;
bool in_callback_fn;
struct bpf_stack_state *stack;
};

Expand Down Expand Up @@ -392,6 +406,7 @@ struct bpf_subprog_info {
bool has_tail_call;
bool tail_call_reachable;
bool has_ld_abs;
bool is_async_cb;
};

/* single container for all structs
Expand Down
1 change: 1 addition & 0 deletions include/linux/btf.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
const struct btf_member *m,
u32 expected_offset, u32 expected_size);
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
int btf_find_timer(const struct btf *btf, const struct btf_type *t);
bool btf_type_is_void(const struct btf_type *t);
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
Expand Down
3 changes: 2 additions & 1 deletion include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,8 @@ struct bpf_prog {
kprobe_override:1, /* Do we override a kprobe? */
has_callchain_buf:1, /* callchain buffer allocated? */
enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
call_get_stack:1; /* Do we call bpf_get_stack() or bpf_get_stackid() */
call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
call_get_func_ip:1; /* Do we call get_func_ip() */
enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
Expand Down
2 changes: 2 additions & 0 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -3984,6 +3984,8 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
__dev_kfree_skb_any(skb, SKB_REASON_CONSUMED);
}

u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog);
void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
int netif_rx(struct sk_buff *skb);
Expand Down
10 changes: 2 additions & 8 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -863,8 +863,8 @@ struct sk_buff {
__u8 tc_skip_classify:1;
__u8 tc_at_ingress:1;
#endif
#ifdef CONFIG_NET_REDIRECT
__u8 redirected:1;
#ifdef CONFIG_NET_REDIRECT
__u8 from_ingress:1;
#endif
#ifdef CONFIG_TLS_DEVICE
Expand Down Expand Up @@ -4664,17 +4664,13 @@ static inline __wsum lco_csum(struct sk_buff *skb)

static inline bool skb_is_redirected(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_REDIRECT
return skb->redirected;
#else
return false;
#endif
}

static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
{
#ifdef CONFIG_NET_REDIRECT
skb->redirected = 1;
#ifdef CONFIG_NET_REDIRECT
skb->from_ingress = from_ingress;
if (skb->from_ingress)
skb->tstamp = 0;
Expand All @@ -4683,9 +4679,7 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)

static inline void skb_reset_redirect(struct sk_buff *skb)
{
#ifdef CONFIG_NET_REDIRECT
skb->redirected = 0;
#endif
}

static inline bool skb_csum_is_sctp(struct sk_buff *skb)
Expand Down
Loading

0 comments on commit 82a1ffe

Please sign in to comment.