From e6c86c513f440bec5f1046539c7e3c6c653842da Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 14 Oct 2022 19:39:43 +0800 Subject: [PATCH 1/4] rcu-tasks: Provide rcu_trace_implies_rcu_gp() As an accident of implementation, an RCU Tasks Trace grace period also acts as an RCU grace period. However, this could change at any time. This commit therefore creates an rcu_trace_implies_rcu_gp() that currently returns true to codify this accident. Code relying on this accident must call this function to verify that this accident is still happening. Reported-by: Hou Tao Signed-off-by: Paul E. McKenney Cc: Alexei Starovoitov Cc: Martin KaFai Lau Link: https://lore.kernel.org/r/20221014113946.965131-2-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- include/linux/rcupdate.h | 12 ++++++++++++ kernel/rcu/tasks.h | 2 ++ 2 files changed, 14 insertions(+) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 08605ce7379d7..8822f06e4b40c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -240,6 +240,18 @@ static inline void exit_tasks_rcu_start(void) { } static inline void exit_tasks_rcu_finish(void) { } #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ +/** + * rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period? + * + * As an accident of implementation, an RCU Tasks Trace grace period also + * acts as an RCU grace period. However, this could change at any time. + * Code relying on this accident must call this function to verify that + * this accident is still happening. + * + * You have been warned! + */ +static inline bool rcu_trace_implies_rcu_gp(void) { return true; } + /** * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU * diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index f5bf6fb430dab..9435e5a7b53e4 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -1535,6 +1535,8 @@ static void rcu_tasks_trace_postscan(struct list_head *hop) { // Wait for late-stage exiting tasks to finish exiting. // These might have passed the call to exit_tasks_rcu_finish(). + + // If you remove the following line, update rcu_trace_implies_rcu_gp()!!! synchronize_rcu(); // Any tasks that exit after this point will set // TRC_NEED_QS_CHECKED in ->trc_reader_special.b.need_qs. From 59be91e5e70a1aa91dfee8088b071f6d05c8a1a3 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 14 Oct 2022 19:39:44 +0800 Subject: [PATCH 2/4] bpf: Use rcu_trace_implies_rcu_gp() in bpf memory allocator The memory free logic in bpf memory allocator chains a RCU Tasks Trace grace period and a normal RCU grace period one after the other, so it can ensure that both sleepable and non-sleepable programs have finished. With the introduction of rcu_trace_implies_rcu_gp(), __free_rcu_tasks_trace() can check whether or not a normal RCU grace period has also passed after a RCU Tasks Trace grace period has passed. If it is true, freeing these elements directly, else freeing through call_rcu(). Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20221014113946.965131-3-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/memalloc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 5f83be1d20181..2433be58bb852 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -222,9 +222,13 @@ static void __free_rcu(struct rcu_head *head) static void __free_rcu_tasks_trace(struct rcu_head *head) { - struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu); - - call_rcu(&c->rcu, __free_rcu); + /* If RCU Tasks Trace grace period implies RCU grace period, + * there is no need to invoke call_rcu(). + */ + if (rcu_trace_implies_rcu_gp()) + __free_rcu(head); + else + call_rcu(head, __free_rcu); } static void enque_to_free(struct bpf_mem_cache *c, void *obj) @@ -253,8 +257,9 @@ static void do_call_rcu(struct bpf_mem_cache *c) */ __llist_add(llnode, &c->waiting_for_gp); /* Use call_rcu_tasks_trace() to wait for sleepable progs to finish. - * Then use call_rcu() to wait for normal progs to finish - * and finally do free_one() on each element. + * If RCU Tasks Trace grace period implies RCU grace period, free + * these elements directly, else use call_rcu() to wait for normal + * progs to finish and finally do free_one() on each element. */ call_rcu_tasks_trace(&c->rcu, __free_rcu_tasks_trace); } From d39d1445d37747032e2b26732fed6fe25161cd36 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 14 Oct 2022 19:39:45 +0800 Subject: [PATCH 3/4] bpf: Use rcu_trace_implies_rcu_gp() in local storage map Local storage map is accessible for both sleepable and non-sleepable bpf program, and its memory is freed by using both call_rcu_tasks_trace() and kfree_rcu() to wait for both RCU-tasks-trace grace period and RCU grace period to pass. With the introduction of rcu_trace_implies_rcu_gp(), both bpf_selem_free_rcu() and bpf_local_storage_free_rcu() can check whether or not a normal RCU grace period has also passed after a RCU-tasks-trace grace period has passed. If it is true, it is safe to call kfree() directly. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20221014113946.965131-4-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 802fc15b0d73c..9dc6de1cf1853 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -88,8 +88,14 @@ void bpf_local_storage_free_rcu(struct rcu_head *rcu) { struct bpf_local_storage *local_storage; + /* If RCU Tasks Trace grace period implies RCU grace period, do + * kfree(), else do kfree_rcu(). + */ local_storage = container_of(rcu, struct bpf_local_storage, rcu); - kfree_rcu(local_storage, rcu); + if (rcu_trace_implies_rcu_gp()) + kfree(local_storage); + else + kfree_rcu(local_storage, rcu); } static void bpf_selem_free_rcu(struct rcu_head *rcu) @@ -97,7 +103,10 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu) struct bpf_local_storage_elem *selem; selem = container_of(rcu, struct bpf_local_storage_elem, rcu); - kfree_rcu(selem, rcu); + if (rcu_trace_implies_rcu_gp()) + kfree(selem); + else + kfree_rcu(selem, rcu); } /* local_storage->lock must be held and selem->local_storage == local_storage. From 4835f9ee980c1867584018e69cbf1f62d7844cb3 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 14 Oct 2022 19:39:46 +0800 Subject: [PATCH 4/4] bpf: Use rcu_trace_implies_rcu_gp() for program array freeing To support both sleepable and normal uprobe bpf program, the freeing of trace program array chains a RCU-tasks-trace grace period and a normal RCU grace period one after the other. With the introduction of rcu_trace_implies_rcu_gp(), __bpf_prog_array_free_sleepable_cb() can check whether or not a normal RCU grace period has also passed after a RCU-tasks-trace grace period has passed. If it is true, it is safe to invoke kfree() directly. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20221014113946.965131-5-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 711fd293b6de4..4bc5f46d7030e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2251,8 +2251,14 @@ static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu) { struct bpf_prog_array *progs; + /* If RCU Tasks Trace grace period implies RCU grace period, there is + * no need to call kfree_rcu(), just call kfree() directly. + */ progs = container_of(rcu, struct bpf_prog_array, rcu); - kfree_rcu(progs, rcu); + if (rcu_trace_implies_rcu_gp()) + kfree(progs); + else + kfree_rcu(progs, rcu); } void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs)