Merge branch 'bpf-ancestor-cgroup-id'

Andrey Ignatov says: ==================== This patch set adds new BPF helper bpf_skb_ancestor_cgroup_id that returns id of cgroup v2 that is ancestor of cgroup associated with the skb at the ancestor_level. The helper is useful to implement policies in TC based on cgroups that are upper in hierarchy than immediate cgroup associated with skb. v1->v2: - more reliable check for testing IPv6 to become ready in selftest. ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
mariux64 · Aug 12, 2018 · 2ce3206 · 2ce3206
2 parents e8d2bec + 5ecd8c2
commit 2ce3206
Show file tree

Hide file tree

Showing 9 changed files with 404 additions and 5 deletions.
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
@@ -553,6 +553,36 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
 	return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
 }
 
+/**
+ * cgroup_ancestor - find ancestor of cgroup
+ * @cgrp: cgroup to find ancestor of
+ * @ancestor_level: level of ancestor to find starting from root
+ *
+ * Find ancestor of cgroup at specified level starting from root if it exists
+ * and return pointer to it. Return NULL if @cgrp doesn't have ancestor at
+ * @ancestor_level.
+ *
+ * This function is safe to call as long as @cgrp is accessible.
+ */
+static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp,
+					     int ancestor_level)
+{
+	struct cgroup *ptr;
+
+	if (cgrp->level < ancestor_level)
+		return NULL;
+
+	for (ptr = cgrp;
+	     ptr && ptr->level > ancestor_level;
+	     ptr = cgroup_parent(ptr))
+		;
+
+	if (ptr && ptr->level == ancestor_level)
+		return ptr;
+
+	return NULL;
+}
+
 /**
  * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
  * @task: the task to be tested

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
@@ -2093,6 +2093,24 @@ union bpf_attr {
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
  *
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ *	Description
+ *		Return id of cgroup v2 that is ancestor of cgroup associated
+ *		with the *skb* at the *ancestor_level*.  The root cgroup is at
+ *		*ancestor_level* zero and each step down the hierarchy
+ *		increments the level. If *ancestor_level* == level of cgroup
+ *		associated with *skb*, then return value will be same as that
+ *		of **bpf_skb_cgroup_id**\ ().
+ *
+ *		The helper is useful to implement policies based on cgroups
+ *		that are upper in hierarchy than immediate cgroup associated
+ *		with *skb*.
+ *
+ *		The format of returned id and helper limitations are same as in
+ *		**bpf_skb_cgroup_id**\ ().
+ *	Return
+ *		The id is returned or 0 in case the id could not be retrieved.
+ *
  * u64 bpf_get_current_cgroup_id(void)
  * 	Return
  * 		A 64-bit integer containing the current cgroup id based
@@ -2207,7 +2225,8 @@ union bpf_attr {
 	FN(skb_cgroup_id),		\
 	FN(get_current_cgroup_id),	\
 	FN(get_local_storage),		\
-	FN(sk_select_reuseport),
+	FN(sk_select_reuseport),	\
+	FN(skb_ancestor_cgroup_id),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call

diff --git a/net/core/filter.c b/net/core/filter.c
@@ -3778,6 +3778,32 @@ static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
 	.ret_type       = RET_INTEGER,
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
+
+BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
+	   ancestor_level)
+{
+	struct sock *sk = skb_to_full_sk(skb);
+	struct cgroup *ancestor;
+	struct cgroup *cgrp;
+
+	if (!sk || !sk_fullsock(sk))
+		return 0;
+
+	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	ancestor = cgroup_ancestor(cgrp, ancestor_level);
+	if (!ancestor)
+		return 0;
+
+	return ancestor->kn->id.id;
+}
+
+static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
+	.func           = bpf_skb_ancestor_cgroup_id,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+};
 #endif
 
 static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
@@ -4966,6 +4992,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 #ifdef CONFIG_SOCK_CGROUP_DATA
 	case BPF_FUNC_skb_cgroup_id:
 		return &bpf_skb_cgroup_id_proto;
+	case BPF_FUNC_skb_ancestor_cgroup_id:
+		return &bpf_skb_ancestor_cgroup_id_proto;
 #endif
 	default:
 		return bpf_base_func_proto(func_id);

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
@@ -2093,6 +2093,24 @@ union bpf_attr {
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
  *
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ *	Description
+ *		Return id of cgroup v2 that is ancestor of cgroup associated
+ *		with the *skb* at the *ancestor_level*.  The root cgroup is at
+ *		*ancestor_level* zero and each step down the hierarchy
+ *		increments the level. If *ancestor_level* == level of cgroup
+ *		associated with *skb*, then return value will be same as that
+ *		of **bpf_skb_cgroup_id**\ ().
+ *
+ *		The helper is useful to implement policies based on cgroups
+ *		that are upper in hierarchy than immediate cgroup associated
+ *		with *skb*.
+ *
+ *		The format of returned id and helper limitations are same as in
+ *		**bpf_skb_cgroup_id**\ ().
+ *	Return
+ *		The id is returned or 0 in case the id could not be retrieved.
+ *
  * u64 bpf_get_current_cgroup_id(void)
  * 	Return
  * 		A 64-bit integer containing the current cgroup id based
@@ -2207,7 +2225,8 @@ union bpf_attr {
 	FN(skb_cgroup_id),		\
 	FN(get_current_cgroup_id),	\
 	FN(get_local_storage),		\
-	FN(sk_select_reuseport),
+	FN(sk_select_reuseport),	\
+	FN(skb_ancestor_cgroup_id),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
@@ -34,7 +34,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
 	test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
 	test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
-	get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o
+	get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
+	test_skb_cgroup_id_kern.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -45,10 +46,11 @@ TEST_PROGS := test_kmod.sh \
 	test_sock_addr.sh \
 	test_tunnel.sh \
 	test_lwt_seg6local.sh \
-	test_lirc_mode2.sh
+	test_lirc_mode2.sh \
+	test_skb_cgroup_id.sh
 
 # Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
+TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user
 
 include ../lib.mk
 
@@ -59,6 +61,7 @@ $(TEST_GEN_PROGS): $(BPFOBJ)
 $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
 
 $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
 $(OUTPUT)/test_sock: cgroup_helpers.c
 $(OUTPUT)/test_sock_addr: cgroup_helpers.c
 $(OUTPUT)/test_socket_cookie: cgroup_helpers.c

diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -139,6 +139,10 @@ static unsigned long long (*bpf_get_current_cgroup_id)(void) =
 	(void *) BPF_FUNC_get_current_cgroup_id;
 static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
 	(void *) BPF_FUNC_get_local_storage;
+static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) =
+	(void *) BPF_FUNC_skb_cgroup_id;
+static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
+	(void *) BPF_FUNC_skb_ancestor_cgroup_id;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions

diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018 Facebook
+
+set -eu
+
+wait_for_ip()
+{
+	local _i
+	echo -n "Wait for testing link-local IP to become available "
+	for _i in $(seq ${MAX_PING_TRIES}); do
+		echo -n "."
+		if ping -6 -q -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
+			echo " OK"
+			return
+		fi
+		sleep 1
+	done
+	echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
+	exit 1
+}
+
+setup()
+{
+	# Create testing interfaces not to interfere with current environment.
+	ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
+	ip link set ${TEST_IF} up
+	ip link set ${TEST_IF_PEER} up
+
+	wait_for_ip
+
+	tc qdisc add dev ${TEST_IF} clsact
+	tc filter add dev ${TEST_IF} egress bpf obj ${BPF_PROG_OBJ} \
+		sec ${BPF_PROG_SECTION} da
+
+	BPF_PROG_ID=$(tc filter show dev ${TEST_IF} egress | \
+			awk '/ id / {sub(/.* id /, "", $0); print($1)}')
+}
+
+cleanup()
+{
+	ip link del ${TEST_IF} 2>/dev/null || :
+	ip link del ${TEST_IF_PEER} 2>/dev/null || :
+}
+
+main()
+{
+	trap cleanup EXIT 2 3 6 15
+	setup
+	${PROG} ${TEST_IF} ${BPF_PROG_ID}
+}
+
+DIR=$(dirname $0)
+TEST_IF="test_cgid_1"
+TEST_IF_PEER="test_cgid_2"
+MAX_PING_TRIES=5
+BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.o"
+BPF_PROG_SECTION="cgroup_id_logger"
+BPF_PROG_ID=0
+PROG="${DIR}/test_skb_cgroup_id_user"
+
+main
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+#include <string.h>
+
+#include "bpf_helpers.h"
+
+#define NUM_CGROUP_LEVELS	4
+
+struct bpf_map_def SEC("maps") cgroup_ids = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64),
+	.max_entries = NUM_CGROUP_LEVELS,
+};
+
+static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
+{
+	__u64 id;
+
+	/* [1] &level passed to external function that may change it, it's
+	 *     incompatible with loop unroll.
+	 */
+	id = bpf_skb_ancestor_cgroup_id(skb, level);
+	bpf_map_update_elem(&cgroup_ids, &level, &id, 0);
+}
+
+SEC("cgroup_id_logger")
+int log_cgroup_id(struct __sk_buff *skb)
+{
+	/* Loop unroll can't be used here due to [1]. Unrolling manually.
+	 * Number of calls should be in sync with NUM_CGROUP_LEVELS.
+	 */
+	log_nth_level(skb, 0);
+	log_nth_level(skb, 1);
+	log_nth_level(skb, 2);
+	log_nth_level(skb, 3);
+
+	return TC_ACT_OK;
+}
+
+int _version SEC("version") = 1;
+
+char _license[] SEC("license") = "GPL";