Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Browse files Browse the repository at this point in the history
Daniel Borkmann says:

====================
pull-request: bpf 2018-08-18

The following pull-request contains BPF updates for your *net* tree.

The main changes are:

1) Fix a BPF selftest failure in test_cgroup_storage due to rlimit
   restrictions, from Yonghong.

2) Fix a suspicious RCU rcu_dereference_check() warning triggered
   from removing a device's XDP memory allocator by using the correct
   rhashtable lookup function, from Tariq.

3) A batch of BPF sockmap and ULP fixes mainly fixing leaks and races
   as well as enforcing module aliases for ULPs. Another fix for BPF
   map redirect to make them work again with tail calls, from Daniel.

4) Fix XDP BPF samples to unload their programs upon SIGTERM, from Jesper.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Aug 18, 2018
2 parents 3fe49d6 + f6069b9 commit 6e3bf9b
Show file tree
Hide file tree
Showing 16 changed files with 123 additions and 130 deletions.
3 changes: 2 additions & 1 deletion include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,6 @@ struct bpf_redirect_info {
u32 flags;
struct bpf_map *map;
struct bpf_map *map_to_flush;
unsigned long map_owner;
u32 kern_flags;
};

Expand Down Expand Up @@ -781,6 +780,8 @@ static inline bool bpf_dump_raw_ok(void)
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
const struct bpf_insn *patch, u32 len);

void bpf_clear_redirect_map(struct bpf_map *map);

static inline bool xdp_return_frame_no_direct(void)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
Expand Down
4 changes: 4 additions & 0 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -2065,6 +2065,10 @@ int tcp_set_ulp_id(struct sock *sk, const int ulp);
void tcp_get_available_ulp(char *buf, size_t len);
void tcp_cleanup_ulp(struct sock *sk);

#define MODULE_ALIAS_TCP_ULP(name) \
__MODULE_INFO(alias, alias_userspace, name); \
__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)

/* Call BPF_SOCK_OPS program that returns an int. If the return value
* is < 0, then the BPF op failed (for example if the loaded BPF
* program does not support the chosen operation or there is no BPF
Expand Down
5 changes: 2 additions & 3 deletions include/trace/events/xdp.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,8 @@ struct _bpf_dtab_netdev {

#define devmap_ifindex(fwd, map) \
(!fwd ? 0 : \
(!map ? 0 : \
((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)))
((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0))

#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1579,7 +1579,7 @@ static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array,
struct bpf_prog_array_item *item;
int i = 0;

item = rcu_dereference(array)->items;
item = rcu_dereference_check(array, 1)->items;
for (; item->prog; item++) {
if (item->prog == &dummy_bpf_prog.prog)
continue;
Expand Down
2 changes: 2 additions & 0 deletions kernel/bpf/cpumap.c
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,8 @@ static void cpu_map_free(struct bpf_map *map)
* It does __not__ ensure pending flush operations (if any) are
* complete.
*/

bpf_clear_redirect_map(map);
synchronize_rcu();

/* To ensure all pending flush operations have completed wait for flush
Expand Down
1 change: 1 addition & 0 deletions kernel/bpf/devmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ static void dev_map_free(struct bpf_map *map)
list_del_rcu(&dtab->list);
spin_unlock(&dev_map_lock);

bpf_clear_redirect_map(map);
synchronize_rcu();

/* To ensure all pending flush operations have completed wait for flush
Expand Down
120 changes: 68 additions & 52 deletions kernel/bpf/sockmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ struct bpf_stab {
struct bpf_map map;
struct sock **sock_map;
struct bpf_sock_progs progs;
raw_spinlock_t lock;
};

struct bucket {
Expand Down Expand Up @@ -89,9 +90,9 @@ enum smap_psock_state {

struct smap_psock_map_entry {
struct list_head list;
struct bpf_map *map;
struct sock **entry;
struct htab_elem __rcu *hash_link;
struct bpf_htab __rcu *htab;
};

struct smap_psock {
Expand Down Expand Up @@ -343,13 +344,18 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
e = psock_map_pop(sk, psock);
while (e) {
if (e->entry) {
osk = cmpxchg(e->entry, sk, NULL);
struct bpf_stab *stab = container_of(e->map, struct bpf_stab, map);

raw_spin_lock_bh(&stab->lock);
osk = *e->entry;
if (osk == sk) {
*e->entry = NULL;
smap_release_sock(psock, sk);
}
raw_spin_unlock_bh(&stab->lock);
} else {
struct htab_elem *link = rcu_dereference(e->hash_link);
struct bpf_htab *htab = rcu_dereference(e->htab);
struct bpf_htab *htab = container_of(e->map, struct bpf_htab, map);
struct hlist_head *head;
struct htab_elem *l;
struct bucket *b;
Expand All @@ -370,6 +376,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
}
raw_spin_unlock_bh(&b->lock);
}
kfree(e);
e = psock_map_pop(sk, psock);
}
rcu_read_unlock();
Expand Down Expand Up @@ -1641,6 +1648,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);

bpf_map_init_from_attr(&stab->map, attr);
raw_spin_lock_init(&stab->lock);

/* make sure page count doesn't overflow */
cost = (u64) stab->map.max_entries * sizeof(struct sock *);
Expand Down Expand Up @@ -1675,8 +1683,10 @@ static void smap_list_map_remove(struct smap_psock *psock,

spin_lock_bh(&psock->maps_lock);
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
if (e->entry == entry)
if (e->entry == entry) {
list_del(&e->list);
kfree(e);
}
}
spin_unlock_bh(&psock->maps_lock);
}
Expand All @@ -1690,8 +1700,10 @@ static void smap_list_hash_remove(struct smap_psock *psock,
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
struct htab_elem *c = rcu_dereference(e->hash_link);

if (c == hash_link)
if (c == hash_link) {
list_del(&e->list);
kfree(e);
}
}
spin_unlock_bh(&psock->maps_lock);
}
Expand All @@ -1711,14 +1723,15 @@ static void sock_map_free(struct bpf_map *map)
* and a grace period expire to ensure psock is really safe to remove.
*/
rcu_read_lock();
raw_spin_lock_bh(&stab->lock);
for (i = 0; i < stab->map.max_entries; i++) {
struct smap_psock *psock;
struct sock *sock;

sock = xchg(&stab->sock_map[i], NULL);
sock = stab->sock_map[i];
if (!sock)
continue;

stab->sock_map[i] = NULL;
psock = smap_psock_sk(sock);
/* This check handles a racing sock event that can get the
* sk_callback_lock before this case but after xchg happens
Expand All @@ -1730,6 +1743,7 @@ static void sock_map_free(struct bpf_map *map)
smap_release_sock(psock, sock);
}
}
raw_spin_unlock_bh(&stab->lock);
rcu_read_unlock();

sock_map_remove_complete(stab);
Expand Down Expand Up @@ -1773,19 +1787,23 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
if (k >= map->max_entries)
return -EINVAL;

sock = xchg(&stab->sock_map[k], NULL);
raw_spin_lock_bh(&stab->lock);
sock = stab->sock_map[k];
stab->sock_map[k] = NULL;
raw_spin_unlock_bh(&stab->lock);
if (!sock)
return -EINVAL;

psock = smap_psock_sk(sock);
if (!psock)
goto out;

if (psock->bpf_parse)
return 0;
if (psock->bpf_parse) {
write_lock_bh(&sock->sk_callback_lock);
smap_stop_sock(psock, sock);
write_unlock_bh(&sock->sk_callback_lock);
}
smap_list_map_remove(psock, &stab->sock_map[k]);
smap_release_sock(psock, sock);
out:
return 0;
}

Expand Down Expand Up @@ -1821,11 +1839,9 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
static int __sock_map_ctx_update_elem(struct bpf_map *map,
struct bpf_sock_progs *progs,
struct sock *sock,
struct sock **map_link,
void *key)
{
struct bpf_prog *verdict, *parse, *tx_msg;
struct smap_psock_map_entry *e = NULL;
struct smap_psock *psock;
bool new = false;
int err = 0;
Expand Down Expand Up @@ -1898,14 +1914,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
new = true;
}

if (map_link) {
e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
if (!e) {
err = -ENOMEM;
goto out_free;
}
}

/* 3. At this point we have a reference to a valid psock that is
* running. Attach any BPF programs needed.
*/
Expand All @@ -1927,17 +1935,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
write_unlock_bh(&sock->sk_callback_lock);
}

/* 4. Place psock in sockmap for use and stop any programs on
* the old sock assuming its not the same sock we are replacing
* it with. Because we can only have a single set of programs if
* old_sock has a strp we can stop it.
*/
if (map_link) {
e->entry = map_link;
spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps);
spin_unlock_bh(&psock->maps_lock);
}
return err;
out_free:
smap_release_sock(psock, sock);
Expand All @@ -1948,7 +1945,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
}
if (tx_msg)
bpf_prog_put(tx_msg);
kfree(e);
return err;
}

Expand All @@ -1958,36 +1954,57 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
struct bpf_sock_progs *progs = &stab->progs;
struct sock *osock, *sock;
struct sock *osock, *sock = skops->sk;
struct smap_psock_map_entry *e;
struct smap_psock *psock;
u32 i = *(u32 *)key;
int err;

if (unlikely(flags > BPF_EXIST))
return -EINVAL;

if (unlikely(i >= stab->map.max_entries))
return -E2BIG;

sock = READ_ONCE(stab->sock_map[i]);
if (flags == BPF_EXIST && !sock)
return -ENOENT;
else if (flags == BPF_NOEXIST && sock)
return -EEXIST;
e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
if (!e)
return -ENOMEM;

sock = skops->sk;
err = __sock_map_ctx_update_elem(map, progs, sock, &stab->sock_map[i],
key);
err = __sock_map_ctx_update_elem(map, progs, sock, key);
if (err)
goto out;

osock = xchg(&stab->sock_map[i], sock);
if (osock) {
struct smap_psock *opsock = smap_psock_sk(osock);
/* psock guaranteed to be present. */
psock = smap_psock_sk(sock);
raw_spin_lock_bh(&stab->lock);
osock = stab->sock_map[i];
if (osock && flags == BPF_NOEXIST) {
err = -EEXIST;
goto out_unlock;
}
if (!osock && flags == BPF_EXIST) {
err = -ENOENT;
goto out_unlock;
}

smap_list_map_remove(opsock, &stab->sock_map[i]);
smap_release_sock(opsock, osock);
e->entry = &stab->sock_map[i];
e->map = map;
spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps);
spin_unlock_bh(&psock->maps_lock);

stab->sock_map[i] = sock;
if (osock) {
psock = smap_psock_sk(osock);
smap_list_map_remove(psock, &stab->sock_map[i]);
smap_release_sock(psock, osock);
}
raw_spin_unlock_bh(&stab->lock);
return 0;
out_unlock:
smap_release_sock(psock, sock);
raw_spin_unlock_bh(&stab->lock);
out:
kfree(e);
return err;
}

Expand Down Expand Up @@ -2350,7 +2367,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
b = __select_bucket(htab, hash);
head = &b->head;

err = __sock_map_ctx_update_elem(map, progs, sock, NULL, key);
err = __sock_map_ctx_update_elem(map, progs, sock, key);
if (err)
goto err;

Expand All @@ -2376,8 +2393,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
}

rcu_assign_pointer(e->hash_link, l_new);
rcu_assign_pointer(e->htab,
container_of(map, struct bpf_htab, map));
e->map = map;
spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps);
spin_unlock_bh(&psock->maps_lock);
Expand Down
21 changes: 0 additions & 21 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -5844,27 +5844,6 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
goto patch_call_imm;
}

if (insn->imm == BPF_FUNC_redirect_map) {
/* Note, we cannot use prog directly as imm as subsequent
* rewrites would still change the prog pointer. The only
* stable address we can use is aux, which also works with
* prog clones during blinding.
*/
u64 addr = (unsigned long)prog->aux;
struct bpf_insn r4_ld[] = {
BPF_LD_IMM64(BPF_REG_4, addr),
*insn,
};
cnt = ARRAY_SIZE(r4_ld);

new_prog = bpf_patch_insn_data(env, i + delta, r4_ld, cnt);
if (!new_prog)
return -ENOMEM;

delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
}
patch_call_imm:
fn = env->ops->get_func_proto(insn->imm, env->prog);
/* all functions that have prototype and verifier allowed
Expand Down
1 change: 1 addition & 0 deletions kernel/bpf/xskmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ static void xsk_map_free(struct bpf_map *map)
struct xsk_map *m = container_of(map, struct xsk_map, map);
int i;

bpf_clear_redirect_map(map);
synchronize_net();

for (i = 0; i < map->max_entries; i++) {
Expand Down
Loading

0 comments on commit 6e3bf9b

Please sign in to comment.