Skip to content

Commit

Permalink
Merge branch 'sockmap-ulp-fixes'
Browse files Browse the repository at this point in the history
Daniel Borkmann says:

====================
Batch of various fixes related to BPF sockmap and ULP, including
adding module alias to restrict module requests, races and memory
leaks in sockmap code. For details please refer to the individual
patches. Thanks!
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed Aug 16, 2018
2 parents 965931e + 585f5a6 commit cbb2fb1
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 53 deletions.
4 changes: 4 additions & 0 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -2065,6 +2065,10 @@ int tcp_set_ulp_id(struct sock *sk, const int ulp);
void tcp_get_available_ulp(char *buf, size_t len);
void tcp_cleanup_ulp(struct sock *sk);

#define MODULE_ALIAS_TCP_ULP(name) \
__MODULE_INFO(alias, alias_userspace, name); \
__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)

/* Call BPF_SOCK_OPS program that returns an int. If the return value
* is < 0, then the BPF op failed (for example if the loaded BPF
* program does not support the chosen operation or there is no BPF
Expand Down
120 changes: 68 additions & 52 deletions kernel/bpf/sockmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ struct bpf_stab {
struct bpf_map map;
struct sock **sock_map;
struct bpf_sock_progs progs;
raw_spinlock_t lock;
};

struct bucket {
Expand Down Expand Up @@ -89,9 +90,9 @@ enum smap_psock_state {

struct smap_psock_map_entry {
struct list_head list;
struct bpf_map *map;
struct sock **entry;
struct htab_elem __rcu *hash_link;
struct bpf_htab __rcu *htab;
};

struct smap_psock {
Expand Down Expand Up @@ -343,13 +344,18 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
e = psock_map_pop(sk, psock);
while (e) {
if (e->entry) {
osk = cmpxchg(e->entry, sk, NULL);
struct bpf_stab *stab = container_of(e->map, struct bpf_stab, map);

raw_spin_lock_bh(&stab->lock);
osk = *e->entry;
if (osk == sk) {
*e->entry = NULL;
smap_release_sock(psock, sk);
}
raw_spin_unlock_bh(&stab->lock);
} else {
struct htab_elem *link = rcu_dereference(e->hash_link);
struct bpf_htab *htab = rcu_dereference(e->htab);
struct bpf_htab *htab = container_of(e->map, struct bpf_htab, map);
struct hlist_head *head;
struct htab_elem *l;
struct bucket *b;
Expand All @@ -370,6 +376,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
}
raw_spin_unlock_bh(&b->lock);
}
kfree(e);
e = psock_map_pop(sk, psock);
}
rcu_read_unlock();
Expand Down Expand Up @@ -1641,6 +1648,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);

bpf_map_init_from_attr(&stab->map, attr);
raw_spin_lock_init(&stab->lock);

/* make sure page count doesn't overflow */
cost = (u64) stab->map.max_entries * sizeof(struct sock *);
Expand Down Expand Up @@ -1675,8 +1683,10 @@ static void smap_list_map_remove(struct smap_psock *psock,

spin_lock_bh(&psock->maps_lock);
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
if (e->entry == entry)
if (e->entry == entry) {
list_del(&e->list);
kfree(e);
}
}
spin_unlock_bh(&psock->maps_lock);
}
Expand All @@ -1690,8 +1700,10 @@ static void smap_list_hash_remove(struct smap_psock *psock,
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
struct htab_elem *c = rcu_dereference(e->hash_link);

if (c == hash_link)
if (c == hash_link) {
list_del(&e->list);
kfree(e);
}
}
spin_unlock_bh(&psock->maps_lock);
}
Expand All @@ -1711,14 +1723,15 @@ static void sock_map_free(struct bpf_map *map)
* and a grace period expire to ensure psock is really safe to remove.
*/
rcu_read_lock();
raw_spin_lock_bh(&stab->lock);
for (i = 0; i < stab->map.max_entries; i++) {
struct smap_psock *psock;
struct sock *sock;

sock = xchg(&stab->sock_map[i], NULL);
sock = stab->sock_map[i];
if (!sock)
continue;

stab->sock_map[i] = NULL;
psock = smap_psock_sk(sock);
/* This check handles a racing sock event that can get the
* sk_callback_lock before this case but after xchg happens
Expand All @@ -1730,6 +1743,7 @@ static void sock_map_free(struct bpf_map *map)
smap_release_sock(psock, sock);
}
}
raw_spin_unlock_bh(&stab->lock);
rcu_read_unlock();

sock_map_remove_complete(stab);
Expand Down Expand Up @@ -1773,19 +1787,23 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
if (k >= map->max_entries)
return -EINVAL;

sock = xchg(&stab->sock_map[k], NULL);
raw_spin_lock_bh(&stab->lock);
sock = stab->sock_map[k];
stab->sock_map[k] = NULL;
raw_spin_unlock_bh(&stab->lock);
if (!sock)
return -EINVAL;

psock = smap_psock_sk(sock);
if (!psock)
goto out;

if (psock->bpf_parse)
return 0;
if (psock->bpf_parse) {
write_lock_bh(&sock->sk_callback_lock);
smap_stop_sock(psock, sock);
write_unlock_bh(&sock->sk_callback_lock);
}
smap_list_map_remove(psock, &stab->sock_map[k]);
smap_release_sock(psock, sock);
out:
return 0;
}

Expand Down Expand Up @@ -1821,11 +1839,9 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
static int __sock_map_ctx_update_elem(struct bpf_map *map,
struct bpf_sock_progs *progs,
struct sock *sock,
struct sock **map_link,
void *key)
{
struct bpf_prog *verdict, *parse, *tx_msg;
struct smap_psock_map_entry *e = NULL;
struct smap_psock *psock;
bool new = false;
int err = 0;
Expand Down Expand Up @@ -1898,14 +1914,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
new = true;
}

if (map_link) {
e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
if (!e) {
err = -ENOMEM;
goto out_free;
}
}

/* 3. At this point we have a reference to a valid psock that is
* running. Attach any BPF programs needed.
*/
Expand All @@ -1927,17 +1935,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
write_unlock_bh(&sock->sk_callback_lock);
}

/* 4. Place psock in sockmap for use and stop any programs on
* the old sock assuming its not the same sock we are replacing
* it with. Because we can only have a single set of programs if
* old_sock has a strp we can stop it.
*/
if (map_link) {
e->entry = map_link;
spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps);
spin_unlock_bh(&psock->maps_lock);
}
return err;
out_free:
smap_release_sock(psock, sock);
Expand All @@ -1948,7 +1945,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
}
if (tx_msg)
bpf_prog_put(tx_msg);
kfree(e);
return err;
}

Expand All @@ -1958,36 +1954,57 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
struct bpf_sock_progs *progs = &stab->progs;
struct sock *osock, *sock;
struct sock *osock, *sock = skops->sk;
struct smap_psock_map_entry *e;
struct smap_psock *psock;
u32 i = *(u32 *)key;
int err;

if (unlikely(flags > BPF_EXIST))
return -EINVAL;

if (unlikely(i >= stab->map.max_entries))
return -E2BIG;

sock = READ_ONCE(stab->sock_map[i]);
if (flags == BPF_EXIST && !sock)
return -ENOENT;
else if (flags == BPF_NOEXIST && sock)
return -EEXIST;
e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
if (!e)
return -ENOMEM;

sock = skops->sk;
err = __sock_map_ctx_update_elem(map, progs, sock, &stab->sock_map[i],
key);
err = __sock_map_ctx_update_elem(map, progs, sock, key);
if (err)
goto out;

osock = xchg(&stab->sock_map[i], sock);
if (osock) {
struct smap_psock *opsock = smap_psock_sk(osock);
/* psock guaranteed to be present. */
psock = smap_psock_sk(sock);
raw_spin_lock_bh(&stab->lock);
osock = stab->sock_map[i];
if (osock && flags == BPF_NOEXIST) {
err = -EEXIST;
goto out_unlock;
}
if (!osock && flags == BPF_EXIST) {
err = -ENOENT;
goto out_unlock;
}

smap_list_map_remove(opsock, &stab->sock_map[i]);
smap_release_sock(opsock, osock);
e->entry = &stab->sock_map[i];
e->map = map;
spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps);
spin_unlock_bh(&psock->maps_lock);

stab->sock_map[i] = sock;
if (osock) {
psock = smap_psock_sk(osock);
smap_list_map_remove(psock, &stab->sock_map[i]);
smap_release_sock(psock, osock);
}
raw_spin_unlock_bh(&stab->lock);
return 0;
out_unlock:
smap_release_sock(psock, sock);
raw_spin_unlock_bh(&stab->lock);
out:
kfree(e);
return err;
}

Expand Down Expand Up @@ -2350,7 +2367,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
b = __select_bucket(htab, hash);
head = &b->head;

err = __sock_map_ctx_update_elem(map, progs, sock, NULL, key);
err = __sock_map_ctx_update_elem(map, progs, sock, key);
if (err)
goto err;

Expand All @@ -2376,8 +2393,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
}

rcu_assign_pointer(e->hash_link, l_new);
rcu_assign_pointer(e->htab,
container_of(map, struct bpf_htab, map));
e->map = map;
spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps);
spin_unlock_bh(&psock->maps_lock);
Expand Down
4 changes: 3 additions & 1 deletion net/ipv4/tcp_ulp.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name)
#ifdef CONFIG_MODULES
if (!ulp && capable(CAP_NET_ADMIN)) {
rcu_read_unlock();
request_module("%s", name);
request_module("tcp-ulp-%s", name);
rcu_read_lock();
ulp = tcp_ulp_find(name);
}
Expand Down Expand Up @@ -129,6 +129,8 @@ void tcp_cleanup_ulp(struct sock *sk)
if (icsk->icsk_ulp_ops->release)
icsk->icsk_ulp_ops->release(sk);
module_put(icsk->icsk_ulp_ops->owner);

icsk->icsk_ulp_ops = NULL;
}

/* Change upper layer protocol for socket */
Expand Down
1 change: 1 addition & 0 deletions net/tls/tls_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
MODULE_AUTHOR("Mellanox Technologies");
MODULE_DESCRIPTION("Transport Layer Security Support");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS_TCP_ULP("tls");

enum {
TLSV4,
Expand Down

0 comments on commit cbb2fb1

Please sign in to comment.