Skip to content

Commit

Permalink
Merge branch 'update-sockmap-from-prog'
Browse files Browse the repository at this point in the history
Lorenz Bauer says:

====================
We're currently building a control plane for our BPF socket dispatch
work. As part of that, we have a need to create a copy of an existing
sockhash, to allow us to change the keys. I previously proposed allowing
privileged userspace to look up sockets, which doesn't work due to
security concerns (see [1]).

In follow up discussions during BPF office hours we identified bpf_iter
as a possible solution: instead of accessing sockets from user space
we can iterate the source sockhash, and insert the values into a new
map. Enabling this requires two pieces: the ability to iterate
sockmap and sockhash, as well as being able to call map_update_elem
from BPF.

This patch set implements the latter: it's now possible to update
sockmap from BPF context. As a next step, we can implement bpf_iter
for sockmap.

===

I've done some more fixups, and audited the safe contexts more
thoroughly. As a result I'm removing CGROUP_SKB, SK_MSG and SK_SKB
for now.

Changes in v3:
- Use CHECK as much as possible (Yonghong)
- Reject ARG_PTR_TO_MAP_VALUE_OR_NULL for sockmap (Yonghong)
- Remove CGROUP_SKB, SK_MSG, SK_SKB from safe contexts
- Test that the verifier rejects update from unsafe context

Changes in v2:
- Fix warning in patch #2 (Jakub K)
- Renamed override_map_arg_type (John)
- Only allow updating sockmap from known safe contexts (John)
- Use __s64 for sockmap updates from user space (Yonghong)
- Various small test fixes around test macros and such (Yonghong)

Thank your for your reviews!

1: https://lore.kernel.org/bpf/20200310174711.7490-1-lmb@cloudflare.com/
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed Aug 21, 2020
2 parents dca5612 + bb23c0e commit 3c4a594
Show file tree
Hide file tree
Showing 11 changed files with 302 additions and 94 deletions.
7 changes: 7 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1648,6 +1648,7 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
struct bpf_prog *old, u32 which);
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
#else
Expand All @@ -1669,6 +1670,12 @@ static inline int sock_map_prog_detach(const union bpf_attr *attr,
{
return -EOPNOTSUPP;
}

static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value,
u64 flags)
{
return -EOPNOTSUPP;
}
#endif /* CONFIG_BPF_STREAM_PARSER */

#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
Expand Down
17 changes: 0 additions & 17 deletions include/linux/skmsg.h
Original file line number Diff line number Diff line change
Expand Up @@ -340,23 +340,6 @@ static inline void sk_psock_update_proto(struct sock *sk,
struct sk_psock *psock,
struct proto *ops)
{
/* Initialize saved callbacks and original proto only once, since this
* function may be called multiple times for a psock, e.g. when
* psock->progs.msg_parser is updated.
*
* Since we've not installed the new proto, psock is not yet in use and
* we can initialize it without synchronization.
*/
if (!psock->sk_proto) {
struct proto *orig = READ_ONCE(sk->sk_prot);

psock->saved_unhash = orig->unhash;
psock->saved_close = orig->close;
psock->saved_write_space = sk->sk_write_space;

psock->sk_proto = orig;
}

/* Pairs with lockless read in sk_clone_lock() */
WRITE_ONCE(sk->sk_prot, ops);
}
Expand Down
5 changes: 3 additions & 2 deletions kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,11 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
if (bpf_map_is_dev_bound(map)) {
return bpf_map_offload_update_elem(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
map->map_type == BPF_MAP_TYPE_SOCKHASH ||
map->map_type == BPF_MAP_TYPE_SOCKMAP ||
map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
return map->ops->map_update_elem(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_SOCKHASH ||
map->map_type == BPF_MAP_TYPE_SOCKMAP) {
return sock_map_update_elem_sys(map, key, value, flags);
} else if (IS_FD_PROG_ARRAY(map)) {
return bpf_fd_array_map_update_elem(map, f.file, key, value,
flags);
Expand Down
73 changes: 71 additions & 2 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -3872,6 +3872,33 @@ static int int_ptr_type_to_size(enum bpf_arg_type type)
return -EINVAL;
}

static int resolve_map_arg_type(struct bpf_verifier_env *env,
const struct bpf_call_arg_meta *meta,
enum bpf_arg_type *arg_type)
{
if (!meta->map_ptr) {
/* kernel subsystem misconfigured verifier */
verbose(env, "invalid map_ptr to access map->type\n");
return -EACCES;
}

switch (meta->map_ptr->map_type) {
case BPF_MAP_TYPE_SOCKMAP:
case BPF_MAP_TYPE_SOCKHASH:
if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
*arg_type = ARG_PTR_TO_SOCKET;
} else {
verbose(env, "invalid arg_type for sockmap/sockhash\n");
return -EINVAL;
}
break;

default:
break;
}
return 0;
}

static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
const struct bpf_func_proto *fn)
Expand Down Expand Up @@ -3904,6 +3931,14 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EACCES;
}

if (arg_type == ARG_PTR_TO_MAP_VALUE ||
arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
err = resolve_map_arg_type(env, meta, &arg_type);
if (err)
return err;
}

if (arg_type == ARG_PTR_TO_MAP_KEY ||
arg_type == ARG_PTR_TO_MAP_VALUE ||
arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
Expand Down Expand Up @@ -4143,6 +4178,38 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EACCES;
}

static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
{
enum bpf_attach_type eatype = env->prog->expected_attach_type;
enum bpf_prog_type type = env->prog->type;

if (func_id != BPF_FUNC_map_update_elem)
return false;

/* It's not possible to get access to a locked struct sock in these
* contexts, so updating is safe.
*/
switch (type) {
case BPF_PROG_TYPE_TRACING:
if (eatype == BPF_TRACE_ITER)
return true;
break;
case BPF_PROG_TYPE_SOCKET_FILTER:
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
case BPF_PROG_TYPE_XDP:
case BPF_PROG_TYPE_SK_REUSEPORT:
case BPF_PROG_TYPE_FLOW_DISSECTOR:
case BPF_PROG_TYPE_SK_LOOKUP:
return true;
default:
break;
}

verbose(env, "cannot update sockmap in this context\n");
return false;
}

static int check_map_func_compatibility(struct bpf_verifier_env *env,
struct bpf_map *map, int func_id)
{
Expand Down Expand Up @@ -4214,7 +4281,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_map &&
func_id != BPF_FUNC_sk_select_reuseport &&
func_id != BPF_FUNC_map_lookup_elem)
func_id != BPF_FUNC_map_lookup_elem &&
!may_update_sockmap(env, func_id))
goto error;
break;
case BPF_MAP_TYPE_SOCKHASH:
Expand All @@ -4223,7 +4291,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_hash &&
func_id != BPF_FUNC_sk_select_reuseport &&
func_id != BPF_FUNC_map_lookup_elem)
func_id != BPF_FUNC_map_lookup_elem &&
!may_update_sockmap(env, func_id))
goto error;
break;
case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
Expand Down
34 changes: 28 additions & 6 deletions net/core/skmsg.c
Original file line number Diff line number Diff line change
Expand Up @@ -494,14 +494,34 @@ static void sk_psock_backlog(struct work_struct *work)

struct sk_psock *sk_psock_init(struct sock *sk, int node)
{
struct sk_psock *psock = kzalloc_node(sizeof(*psock),
GFP_ATOMIC | __GFP_NOWARN,
node);
if (!psock)
return NULL;
struct sk_psock *psock;
struct proto *prot;

write_lock_bh(&sk->sk_callback_lock);

if (inet_csk_has_ulp(sk)) {
psock = ERR_PTR(-EINVAL);
goto out;
}

if (sk->sk_user_data) {
psock = ERR_PTR(-EBUSY);
goto out;
}

psock = kzalloc_node(sizeof(*psock), GFP_ATOMIC | __GFP_NOWARN, node);
if (!psock) {
psock = ERR_PTR(-ENOMEM);
goto out;
}

prot = READ_ONCE(sk->sk_prot);
psock->sk = sk;
psock->eval = __SK_NONE;
psock->eval = __SK_NONE;
psock->sk_proto = prot;
psock->saved_unhash = prot->unhash;
psock->saved_close = prot->close;
psock->saved_write_space = sk->sk_write_space;

INIT_LIST_HEAD(&psock->link);
spin_lock_init(&psock->link_lock);
Expand All @@ -516,6 +536,8 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
rcu_assign_sk_user_data_nocopy(sk, psock);
sock_hold(sk);

out:
write_unlock_bh(&sk->sk_callback_lock);
return psock;
}
EXPORT_SYMBOL_GPL(sk_psock_init);
Expand Down
89 changes: 35 additions & 54 deletions net/core/sock_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,6 @@ static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
{
struct proto *prot;

sock_owned_by_me(sk);

switch (sk->sk_type) {
case SOCK_STREAM:
prot = tcp_bpf_get_proto(sk, psock);
Expand Down Expand Up @@ -272,8 +270,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
}
} else {
psock = sk_psock_init(sk, map->numa_node);
if (!psock) {
ret = -ENOMEM;
if (IS_ERR(psock)) {
ret = PTR_ERR(psock);
goto out_progs;
}
}
Expand Down Expand Up @@ -322,8 +320,8 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk)

if (!psock) {
psock = sk_psock_init(sk, map->numa_node);
if (!psock)
return -ENOMEM;
if (IS_ERR(psock))
return PTR_ERR(psock);
}

ret = sock_map_init_proto(sk, psock);
Expand Down Expand Up @@ -478,8 +476,6 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
return -EINVAL;
if (unlikely(idx >= map->max_entries))
return -E2BIG;
if (inet_csk_has_ulp(sk))
return -EINVAL;

link = sk_psock_init_link();
if (!link)
Expand Down Expand Up @@ -563,10 +559,12 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
return false;
}

static int sock_map_update_elem(struct bpf_map *map, void *key,
void *value, u64 flags)
static int sock_hash_update_common(struct bpf_map *map, void *key,
struct sock *sk, u64 flags);

int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value,
u64 flags)
{
u32 idx = *(u32 *)key;
struct socket *sock;
struct sock *sk;
int ret;
Expand Down Expand Up @@ -595,14 +593,38 @@ static int sock_map_update_elem(struct bpf_map *map, void *key,
sock_map_sk_acquire(sk);
if (!sock_map_sk_state_allowed(sk))
ret = -EOPNOTSUPP;
else if (map->map_type == BPF_MAP_TYPE_SOCKMAP)
ret = sock_map_update_common(map, *(u32 *)key, sk, flags);
else
ret = sock_map_update_common(map, idx, sk, flags);
ret = sock_hash_update_common(map, key, sk, flags);
sock_map_sk_release(sk);
out:
fput(sock->file);
return ret;
}

static int sock_map_update_elem(struct bpf_map *map, void *key,
void *value, u64 flags)
{
struct sock *sk = (struct sock *)value;
int ret;

if (!sock_map_sk_is_suitable(sk))
return -EOPNOTSUPP;

local_bh_disable();
bh_lock_sock(sk);
if (!sock_map_sk_state_allowed(sk))
ret = -EOPNOTSUPP;
else if (map->map_type == BPF_MAP_TYPE_SOCKMAP)
ret = sock_map_update_common(map, *(u32 *)key, sk, flags);
else
ret = sock_hash_update_common(map, key, sk, flags);
bh_unlock_sock(sk);
local_bh_enable();
return ret;
}

BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, sops,
struct bpf_map *, map, void *, key, u64, flags)
{
Expand Down Expand Up @@ -855,8 +877,6 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
WARN_ON_ONCE(!rcu_read_lock_held());
if (unlikely(flags > BPF_EXIST))
return -EINVAL;
if (inet_csk_has_ulp(sk))
return -EINVAL;

link = sk_psock_init_link();
if (!link)
Expand Down Expand Up @@ -915,45 +935,6 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
return ret;
}

static int sock_hash_update_elem(struct bpf_map *map, void *key,
void *value, u64 flags)
{
struct socket *sock;
struct sock *sk;
int ret;
u64 ufd;

if (map->value_size == sizeof(u64))
ufd = *(u64 *)value;
else
ufd = *(u32 *)value;
if (ufd > S32_MAX)
return -EINVAL;

sock = sockfd_lookup(ufd, &ret);
if (!sock)
return ret;
sk = sock->sk;
if (!sk) {
ret = -EINVAL;
goto out;
}
if (!sock_map_sk_is_suitable(sk)) {
ret = -EOPNOTSUPP;
goto out;
}

sock_map_sk_acquire(sk);
if (!sock_map_sk_state_allowed(sk))
ret = -EOPNOTSUPP;
else
ret = sock_hash_update_common(map, key, sk, flags);
sock_map_sk_release(sk);
out:
fput(sock->file);
return ret;
}

static int sock_hash_get_next_key(struct bpf_map *map, void *key,
void *key_next)
{
Expand Down Expand Up @@ -1222,7 +1203,7 @@ const struct bpf_map_ops sock_hash_ops = {
.map_alloc = sock_hash_alloc,
.map_free = sock_hash_free,
.map_get_next_key = sock_hash_get_next_key,
.map_update_elem = sock_hash_update_elem,
.map_update_elem = sock_map_update_elem,
.map_delete_elem = sock_hash_delete_elem,
.map_lookup_elem = sock_hash_lookup,
.map_lookup_elem_sys_only = sock_hash_lookup_sys,
Expand Down
Loading

0 comments on commit 3c4a594

Please sign in to comment.