Skip to content

Commit

Permalink
bpf: Support sk lookup in netns with id 0
Browse files Browse the repository at this point in the history
David Ahern and Nicolas Dichtel report that the handling of the netns id
0 is incorrect for the BPF socket lookup helpers: rather than finding
the netns with id 0, it is resolving to the current netns. This renders
the netns_id 0 inaccessible.

To fix this, adjust the API for the netns to treat all negative s32
values as a lookup in the current netns (including u64 values which when
truncated to s32 become negative), while any values with a positive
value in the signed 32-bit integer space would result in a lookup for a
socket in the netns corresponding to that id. As before, if the netns
with that ID does not exist, no socket will be found. Any netns outside
of these ranges will fail to find a corresponding socket, as those
values are reserved for future usage.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Joey Pabalinas <joeypabalinas@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Joe Stringer authored and Alexei Starovoitov committed Dec 1, 2018
1 parent b7df9ad commit f71c614
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 44 deletions.
35 changes: 21 additions & 14 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -2170,7 +2170,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
* struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
* Description
* Look for TCP socket matching *tuple*, optionally in a child
* network namespace *netns*. The return value must be checked,
Expand All @@ -2187,12 +2187,14 @@ union bpf_attr {
* **sizeof**\ (*tuple*\ **->ipv6**)
* Look for an IPv6 socket.
*
* If the *netns* is zero, then the socket lookup table in the
* netns associated with the *ctx* will be used. For the TC hooks,
* this in the netns of the device in the skb. For socket hooks,
* this in the netns of the socket. If *netns* is non-zero, then
* it specifies the ID of the netns relative to the netns
* associated with the *ctx*.
* If the *netns* is a negative signed 32-bit integer, then the
* socket lookup table in the netns associated with the *ctx* will
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
* equal to zero then it specifies the ID of the netns relative to
* the netns associated with the *ctx*. *netns* values beyond the
* range of 32-bit integers are reserved for future use.
*
* All values for *flags* are reserved for future usage, and must
* be left at zero.
Expand All @@ -2202,7 +2204,7 @@ union bpf_attr {
* Return
* Pointer to *struct bpf_sock*, or NULL in case of failure.
*
* struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
* struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
* Description
* Look for UDP socket matching *tuple*, optionally in a child
* network namespace *netns*. The return value must be checked,
Expand All @@ -2219,12 +2221,14 @@ union bpf_attr {
* **sizeof**\ (*tuple*\ **->ipv6**)
* Look for an IPv6 socket.
*
* If the *netns* is zero, then the socket lookup table in the
* netns associated with the *ctx* will be used. For the TC hooks,
* this in the netns of the device in the skb. For socket hooks,
* this in the netns of the socket. If *netns* is non-zero, then
* it specifies the ID of the netns relative to the netns
* associated with the *ctx*.
* If the *netns* is a negative signed 32-bit integer, then the
* socket lookup table in the netns associated with the *ctx* will
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
* equal to zero then it specifies the ID of the netns relative to
* the netns associated with the *ctx*. *netns* values beyond the
* range of 32-bit integers are reserved for future use.
*
* All values for *flags* are reserved for future usage, and must
* be left at zero.
Expand Down Expand Up @@ -2405,6 +2409,9 @@ enum bpf_func_id {
/* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)

/* Current network namespace */
#define BPF_F_CURRENT_NETNS (-1L)

/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
Expand Down
11 changes: 6 additions & 5 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -4890,22 +4890,23 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
struct net *net;

family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags))
if (unlikely(family == AF_UNSPEC || flags ||
!((s32)netns_id < 0 || netns_id <= S32_MAX)))
goto out;

if (skb->dev)
caller_net = dev_net(skb->dev);
else
caller_net = sock_net(skb->sk);
if (netns_id) {
if ((s32)netns_id < 0) {
net = caller_net;
sk = sk_lookup(net, tuple, skb, family, proto);
} else {
net = get_net_ns_by_id(caller_net, netns_id);
if (unlikely(!net))
goto out;
sk = sk_lookup(net, tuple, skb, family, proto);
put_net(net);
} else {
net = caller_net;
sk = sk_lookup(net, tuple, skb, family, proto);
}

if (sk)
Expand Down
39 changes: 25 additions & 14 deletions tools/include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -2170,7 +2170,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
* struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
* Description
* Look for TCP socket matching *tuple*, optionally in a child
* network namespace *netns*. The return value must be checked,
Expand All @@ -2187,12 +2187,14 @@ union bpf_attr {
* **sizeof**\ (*tuple*\ **->ipv6**)
* Look for an IPv6 socket.
*
* If the *netns* is zero, then the socket lookup table in the
* netns associated with the *ctx* will be used. For the TC hooks,
* this in the netns of the device in the skb. For socket hooks,
* this in the netns of the socket. If *netns* is non-zero, then
* it specifies the ID of the netns relative to the netns
* associated with the *ctx*.
* If the *netns* is a negative signed 32-bit integer, then the
* socket lookup table in the netns associated with the *ctx* will
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
* equal to zero then it specifies the ID of the netns relative to
* the netns associated with the *ctx*. *netns* values beyond the
* range of 32-bit integers are reserved for future use.
*
* All values for *flags* are reserved for future usage, and must
* be left at zero.
Expand All @@ -2201,8 +2203,10 @@ union bpf_attr {
* **CONFIG_NET** configuration option.
* Return
* Pointer to *struct bpf_sock*, or NULL in case of failure.
* For sockets with reuseport option, *struct bpf_sock*
* return is from reuse->socks[] using hash of the packet.
*
* struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
* struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
* Description
* Look for UDP socket matching *tuple*, optionally in a child
* network namespace *netns*. The return value must be checked,
Expand All @@ -2219,12 +2223,14 @@ union bpf_attr {
* **sizeof**\ (*tuple*\ **->ipv6**)
* Look for an IPv6 socket.
*
* If the *netns* is zero, then the socket lookup table in the
* netns associated with the *ctx* will be used. For the TC hooks,
* this in the netns of the device in the skb. For socket hooks,
* this in the netns of the socket. If *netns* is non-zero, then
* it specifies the ID of the netns relative to the netns
* associated with the *ctx*.
* If the *netns* is a negative signed 32-bit integer, then the
* socket lookup table in the netns associated with the *ctx* will
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
* equal to zero then it specifies the ID of the netns relative to
* the netns associated with the *ctx*. *netns* values beyond the
* range of 32-bit integers are reserved for future use.
*
* All values for *flags* are reserved for future usage, and must
* be left at zero.
Expand All @@ -2233,6 +2239,8 @@ union bpf_attr {
* **CONFIG_NET** configuration option.
* Return
* Pointer to *struct bpf_sock*, or NULL in case of failure.
* For sockets with reuseport option, *struct bpf_sock*
* return is from reuse->socks[] using hash of the packet.
*
* int bpf_sk_release(struct bpf_sock *sk)
* Description
Expand Down Expand Up @@ -2405,6 +2413,9 @@ enum bpf_func_id {
/* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)

/* Current network namespace */
#define BPF_F_CURRENT_NETNS (-1L)

/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
Expand Down
4 changes: 2 additions & 2 deletions tools/testing/selftests/bpf/bpf_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,12 @@ static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
(void *) BPF_FUNC_skb_ancestor_cgroup_id;
static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx,
struct bpf_sock_tuple *tuple,
int size, unsigned int netns_id,
int size, unsigned long long netns_id,
unsigned long long flags) =
(void *) BPF_FUNC_sk_lookup_tcp;
static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx,
struct bpf_sock_tuple *tuple,
int size, unsigned int netns_id,
int size, unsigned long long netns_id,
unsigned long long flags) =
(void *) BPF_FUNC_sk_lookup_udp;
static int (*bpf_sk_release)(struct bpf_sock *sk) =
Expand Down
18 changes: 9 additions & 9 deletions tools/testing/selftests/bpf/test_sk_lookup_kern.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb)
return TC_ACT_SHOT;

tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, 0, 0);
sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
if (sk)
bpf_sk_release(sk);
return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
Expand All @@ -84,7 +84,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;

sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
if (sk)
bpf_sk_release(sk);
return 0;
Expand All @@ -97,7 +97,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
struct bpf_sock *sk;
__u32 family = 0;

sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
if (sk) {
bpf_sk_release(sk);
family = sk->family;
Expand All @@ -112,7 +112,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
struct bpf_sock *sk;
__u32 family;

sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
if (sk) {
sk += 1;
bpf_sk_release(sk);
Expand All @@ -127,7 +127,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
struct bpf_sock *sk;
__u32 family;

sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
sk += 1;
if (sk)
bpf_sk_release(sk);
Expand All @@ -139,7 +139,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};

bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
return 0;
}

Expand All @@ -149,7 +149,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;

sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
bpf_sk_release(sk);
bpf_sk_release(sk);
return 0;
Expand All @@ -161,15 +161,15 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb)
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;

sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
bpf_sk_release(sk);
return 0;
}

void lookup_no_release(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
}

SEC("fail_no_release_subcall")
Expand Down

0 comments on commit f71c614

Please sign in to comment.