Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Browse files Browse the repository at this point in the history
Alexei Starovoitov says:

====================
pull-request: bpf 2020-06-12

The following pull-request contains BPF updates for your *net* tree.

We've added 26 non-merge commits during the last 10 day(s) which contain
a total of 27 files changed, 348 insertions(+), 93 deletions(-).

The main changes are:

1) sock_hash accounting fix, from Andrey.

2) libbpf fix and probe_mem sanitizing, from Andrii.

3) sock_hash fixes, from Jakub.

4) devmap_val fix, from Jesper.

5) load_bytes_relative fix, from YiFei.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jun 13, 2020
2 parents bf97bac + 29fcb05 commit fa7566a
Showing 27 changed files with 348 additions and 93 deletions.
13 changes: 13 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
@@ -3761,6 +3761,19 @@ struct xdp_md {
__u32 egress_ifindex; /* txq->dev->ifindex */
};

/* DEVMAP map-value layout
*
* The struct data-layout of map-value is a configuration interface.
* New members can only be added to the end of this structure.
*/
struct bpf_devmap_val {
__u32 ifindex; /* device index */
union {
int fd; /* prog fd on map write */
__u32 id; /* prog id on map read */
} bpf_prog;
};

enum sk_action {
SK_DROP = 0,
SK_PASS,
2 changes: 1 addition & 1 deletion kernel/bpf/cgroup.c
Original file line number Diff line number Diff line change
@@ -378,7 +378,7 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
}

list_for_each_entry(pl, progs, node) {
if (prog && pl->prog == prog)
if (prog && pl->prog == prog && prog != replace_prog)
/* disallow attaching the same prog twice */
return ERR_PTR(-EINVAL);
if (link && pl->link == link)
18 changes: 5 additions & 13 deletions kernel/bpf/devmap.c
Original file line number Diff line number Diff line change
@@ -60,15 +60,6 @@ struct xdp_dev_bulk_queue {
unsigned int count;
};

/* DEVMAP values */
struct bpf_devmap_val {
u32 ifindex; /* device index */
union {
int fd; /* prog fd on map write */
u32 id; /* prog id on map read */
} bpf_prog;
};

struct bpf_dtab_netdev {
struct net_device *dev; /* must be first member, due to tracepoint */
struct hlist_node index_hlist;
@@ -479,6 +470,7 @@ static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
struct xdp_txq_info txq = { .dev = dev };
u32 act;

xdp_set_data_meta_invalid(xdp);
xdp->txq = &txq;

act = bpf_prog_run_xdp(xdp_prog, xdp);
@@ -618,7 +610,7 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
if (!dev->dev)
goto err_out;

if (val->bpf_prog.fd >= 0) {
if (val->bpf_prog.fd > 0) {
prog = bpf_prog_get_type_dev(val->bpf_prog.fd,
BPF_PROG_TYPE_XDP, false);
if (IS_ERR(prog))
@@ -652,8 +644,8 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
void *key, void *value, u64 map_flags)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
struct bpf_dtab_netdev *dev, *old_dev;
struct bpf_devmap_val val = {};
u32 i = *(u32 *)key;

if (unlikely(map_flags > BPF_EXIST))
@@ -669,7 +661,7 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
if (!val.ifindex) {
dev = NULL;
/* can not specify fd if ifindex is 0 */
if (val.bpf_prog.fd != -1)
if (val.bpf_prog.fd > 0)
return -EINVAL;
} else {
dev = __dev_map_alloc_node(net, dtab, &val, i);
@@ -699,8 +691,8 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
void *key, void *value, u64 map_flags)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
struct bpf_dtab_netdev *dev, *old_dev;
struct bpf_devmap_val val = {};
u32 idx = *(u32 *)key;
unsigned long flags;
int err = -EEXIST;
17 changes: 12 additions & 5 deletions kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
@@ -3158,6 +3158,7 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
struct bpf_insn *insns;
u32 off, type;
u64 imm;
u8 code;
int i;

insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
@@ -3166,21 +3167,27 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
return insns;

for (i = 0; i < prog->len; i++) {
if (insns[i].code == (BPF_JMP | BPF_TAIL_CALL)) {
code = insns[i].code;

if (code == (BPF_JMP | BPF_TAIL_CALL)) {
insns[i].code = BPF_JMP | BPF_CALL;
insns[i].imm = BPF_FUNC_tail_call;
/* fall-through */
}
if (insns[i].code == (BPF_JMP | BPF_CALL) ||
insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) {
if (insns[i].code == (BPF_JMP | BPF_CALL_ARGS))
if (code == (BPF_JMP | BPF_CALL) ||
code == (BPF_JMP | BPF_CALL_ARGS)) {
if (code == (BPF_JMP | BPF_CALL_ARGS))
insns[i].code = BPF_JMP | BPF_CALL;
if (!bpf_dump_raw_ok())
insns[i].imm = 0;
continue;
}
if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) {
insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM;
continue;
}

if (insns[i].code != (BPF_LD | BPF_IMM | BPF_DW))
if (code != (BPF_LD | BPF_IMM | BPF_DW))
continue;

imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
2 changes: 1 addition & 1 deletion kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
@@ -7552,7 +7552,7 @@ static int check_btf_func(struct bpf_verifier_env *env,
const struct btf *btf;
void __user *urecord;
u32 prev_offset = 0;
int ret = 0;
int ret = -ENOMEM;

nfuncs = attr->func_info_cnt;
if (!nfuncs)
2 changes: 1 addition & 1 deletion kernel/trace/trace_kprobe.c
Original file line number Diff line number Diff line change
@@ -1629,7 +1629,7 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
if (perf_type_tracepoint)
tk = find_trace_kprobe(pevent, group);
else
tk = event->tp_event->data;
tk = trace_kprobe_primary_from_call(event->tp_event);
if (!tk)
return -EINVAL;

2 changes: 1 addition & 1 deletion kernel/trace/trace_uprobe.c
Original file line number Diff line number Diff line change
@@ -1412,7 +1412,7 @@ int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
if (perf_type_tracepoint)
tu = find_probe_event(pevent, group);
else
tu = event->tp_event->data;
tu = trace_uprobe_primary_from_call(event->tp_event);
if (!tu)
return -EINVAL;

19 changes: 9 additions & 10 deletions net/core/filter.c
Original file line number Diff line number Diff line change
@@ -1755,25 +1755,27 @@ BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
u32, offset, void *, to, u32, len, u32, start_header)
{
u8 *end = skb_tail_pointer(skb);
u8 *net = skb_network_header(skb);
u8 *mac = skb_mac_header(skb);
u8 *ptr;
u8 *start, *ptr;

if (unlikely(offset > 0xffff || len > (end - mac)))
if (unlikely(offset > 0xffff))
goto err_clear;

switch (start_header) {
case BPF_HDR_START_MAC:
ptr = mac + offset;
if (unlikely(!skb_mac_header_was_set(skb)))
goto err_clear;
start = skb_mac_header(skb);
break;
case BPF_HDR_START_NET:
ptr = net + offset;
start = skb_network_header(skb);
break;
default:
goto err_clear;
}

if (likely(ptr >= mac && ptr + len <= end)) {
ptr = start + offset;

if (likely(ptr + len <= end)) {
memcpy(to, ptr, len);
return 0;
}
@@ -4340,8 +4342,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
}
break;
case SO_BINDTODEVICE:
ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
optlen = min_t(long, optlen, IFNAMSIZ - 1);
strncpy(devname, optval, optlen);
devname[optlen] = 0;
@@ -4360,7 +4360,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
dev_put(dev);
}
ret = sock_bindtoindex(sk, ifindex, false);
#endif
break;
default:
ret = -EINVAL;
38 changes: 32 additions & 6 deletions net/core/sock_map.c
Original file line number Diff line number Diff line change
@@ -424,10 +424,7 @@ static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next)
return 0;
}

static bool sock_map_redirect_allowed(const struct sock *sk)
{
return sk->sk_state != TCP_LISTEN;
}
static bool sock_map_redirect_allowed(const struct sock *sk);

static int sock_map_update_common(struct bpf_map *map, u32 idx,
struct sock *sk, u64 flags)
@@ -508,6 +505,11 @@ static bool sk_is_udp(const struct sock *sk)
sk->sk_protocol == IPPROTO_UDP;
}

static bool sock_map_redirect_allowed(const struct sock *sk)
{
return sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN;
}

static bool sock_map_sk_is_suitable(const struct sock *sk)
{
return sk_is_tcp(sk) || sk_is_udp(sk);
@@ -989,11 +991,15 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
err = -EINVAL;
goto free_htab;
}
err = bpf_map_charge_init(&htab->map.memory, cost);
if (err)
goto free_htab;

htab->buckets = bpf_map_area_alloc(htab->buckets_num *
sizeof(struct bpf_htab_bucket),
htab->map.numa_node);
if (!htab->buckets) {
bpf_map_charge_finish(&htab->map.memory);
err = -ENOMEM;
goto free_htab;
}
@@ -1013,6 +1019,7 @@ static void sock_hash_free(struct bpf_map *map)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct bpf_htab_bucket *bucket;
struct hlist_head unlink_list;
struct bpf_htab_elem *elem;
struct hlist_node *node;
int i;
@@ -1024,13 +1031,32 @@ static void sock_hash_free(struct bpf_map *map)
synchronize_rcu();
for (i = 0; i < htab->buckets_num; i++) {
bucket = sock_hash_select_bucket(htab, i);
hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
hlist_del_rcu(&elem->node);

/* We are racing with sock_hash_delete_from_link to
* enter the spin-lock critical section. Every socket on
* the list is still linked to sockhash. Since link
* exists, psock exists and holds a ref to socket. That
* lets us to grab a socket ref too.
*/
raw_spin_lock_bh(&bucket->lock);
hlist_for_each_entry(elem, &bucket->head, node)
sock_hold(elem->sk);
hlist_move_list(&bucket->head, &unlink_list);
raw_spin_unlock_bh(&bucket->lock);

/* Process removed entries out of atomic context to
* block for socket lock before deleting the psock's
* link to sockhash.
*/
hlist_for_each_entry_safe(elem, node, &unlink_list, node) {
hlist_del(&elem->node);
lock_sock(elem->sk);
rcu_read_lock();
sock_map_unref(elem->sk, elem);
rcu_read_unlock();
release_sock(elem->sk);
sock_put(elem->sk);
sock_hash_free_elem(htab, elem);
}
}

6 changes: 6 additions & 0 deletions net/ipv4/tcp_bpf.c
Original file line number Diff line number Diff line change
@@ -64,6 +64,9 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
} while (i != msg_rx->sg.end);

if (unlikely(peek)) {
if (msg_rx == list_last_entry(&psock->ingress_msg,
struct sk_msg, list))
break;
msg_rx = list_next_entry(msg_rx, list);
continue;
}
@@ -242,6 +245,9 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
DEFINE_WAIT_FUNC(wait, woken_wake_function);
int ret = 0;

if (sk->sk_shutdown & RCV_SHUTDOWN)
return 1;

if (!timeo)
return ret;

4 changes: 1 addition & 3 deletions net/xdp/xsk.c
Original file line number Diff line number Diff line change
@@ -352,10 +352,8 @@ static int xsk_generic_xmit(struct sock *sk)

len = desc.len;
skb = sock_alloc_send_skb(sk, len, 1, &err);
if (unlikely(!skb)) {
err = -EAGAIN;
if (unlikely(!skb))
goto out;
}

skb_put(skb, len);
addr = desc.addr;
4 changes: 2 additions & 2 deletions scripts/link-vmlinux.sh
Original file line number Diff line number Diff line change
@@ -143,8 +143,8 @@ gen_btf()
fi

pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/')
if [ "${pahole_ver}" -lt "113" ]; then
echo >&2 "BTF: ${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13"
if [ "${pahole_ver}" -lt "116" ]; then
echo >&2 "BTF: ${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.16"
return 1
fi

1 change: 0 additions & 1 deletion tools/bpf/Makefile
Original file line number Diff line number Diff line change
@@ -3,7 +3,6 @@ include ../scripts/Makefile.include

prefix ?= /usr/local

CC = gcc
LEX = flex
YACC = bison
MAKE = make
11 changes: 6 additions & 5 deletions tools/bpf/bpftool/gen.c
Original file line number Diff line number Diff line change
@@ -200,7 +200,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
return err;
}

static int codegen(const char *template, ...)
static void codegen(const char *template, ...)
{
const char *src, *end;
int skip_tabs = 0, n;
@@ -211,7 +211,7 @@ static int codegen(const char *template, ...)
n = strlen(template);
s = malloc(n + 1);
if (!s)
return -ENOMEM;
exit(-1);
src = template;
dst = s;

@@ -224,7 +224,8 @@ static int codegen(const char *template, ...)
} else {
p_err("unrecognized character at pos %td in template '%s'",
src - template - 1, template);
return -EINVAL;
free(s);
exit(-1);
}
}

@@ -234,7 +235,8 @@ static int codegen(const char *template, ...)
if (*src != '\t') {
p_err("not enough tabs at pos %td in template '%s'",
src - template - 1, template);
return -EINVAL;
free(s);
exit(-1);
}
}
/* trim trailing whitespace */
@@ -255,7 +257,6 @@ static int codegen(const char *template, ...)
va_end(args);

free(s);
return n;
}

static int do_skeleton(int argc, char **argv)
Loading

0 comments on commit fa7566a

Please sign in to comment.