From b6c1a8af5b1eec42aabc13376f94aa90c3d765f1 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 10 Feb 2023 15:47:31 +0000 Subject: [PATCH 1/4] mm: memcontrol: add new kernel parameter cgroup.memory=nobpf Add new kernel parameter cgroup.memory=nobpf to allow user disable bpf memory accounting. This is a preparation for the followup patch. Signed-off-by: Yafang Shao Acked-by: Johannes Weiner Acked-by: Roman Gushchin Link: https://lore.kernel.org/r/20230210154734.4416-2-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- .../admin-guide/kernel-parameters.txt | 1 + include/linux/memcontrol.h | 11 +++++++++++ mm/memcontrol.c | 18 ++++++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6cfa6e3996cf7..29fb41e801ce0 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -557,6 +557,7 @@ Format: nosocket -- Disable socket memory accounting. nokmem -- Disable kernel memory accounting. + nobpf -- Disable BPF memory accounting. checkreqprot= [SELINUX] Set initial checkreqprot flag value. Format: { "0" | "1" } diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 85dc9b88ea379..1e38e99998c79 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1754,6 +1754,12 @@ struct obj_cgroup *get_obj_cgroup_from_page(struct page *page); int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size); void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); +extern struct static_key_false memcg_bpf_enabled_key; +static inline bool memcg_bpf_enabled(void) +{ + return static_branch_likely(&memcg_bpf_enabled_key); +} + extern struct static_key_false memcg_kmem_enabled_key; static inline bool memcg_kmem_enabled(void) @@ -1832,6 +1838,11 @@ static inline struct obj_cgroup *get_obj_cgroup_from_page(struct page *page) return NULL; } +static inline bool memcg_bpf_enabled(void) +{ + return false; +} + static inline bool memcg_kmem_enabled(void) { return false; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 73afff8062f9b..49f40730e7117 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -88,6 +88,9 @@ static bool cgroup_memory_nosocket __ro_after_init; /* Kernel memory accounting disabled? */ static bool cgroup_memory_nokmem __ro_after_init; +/* BPF memory accounting disabled? */ +static bool cgroup_memory_nobpf __ro_after_init; + #ifdef CONFIG_CGROUP_WRITEBACK static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq); #endif @@ -347,6 +350,9 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg, */ DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key); EXPORT_SYMBOL(memcg_kmem_enabled_key); + +DEFINE_STATIC_KEY_FALSE(memcg_bpf_enabled_key); +EXPORT_SYMBOL(memcg_bpf_enabled_key); #endif /** @@ -5357,6 +5363,11 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket) static_branch_inc(&memcg_sockets_enabled_key); +#if defined(CONFIG_MEMCG_KMEM) + if (!cgroup_memory_nobpf) + static_branch_inc(&memcg_bpf_enabled_key); +#endif + return &memcg->css; } @@ -5441,6 +5452,11 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_active) static_branch_dec(&memcg_sockets_enabled_key); +#if defined(CONFIG_MEMCG_KMEM) + if (!cgroup_memory_nobpf) + static_branch_dec(&memcg_bpf_enabled_key); +#endif + vmpressure_cleanup(&memcg->vmpressure); cancel_work_sync(&memcg->high_work); mem_cgroup_remove_from_trees(memcg); @@ -7269,6 +7285,8 @@ static int __init cgroup_memory(char *s) cgroup_memory_nosocket = true; if (!strcmp(token, "nokmem")) cgroup_memory_nokmem = true; + if (!strcmp(token, "nobpf")) + cgroup_memory_nobpf = true; } return 1; } From ddef81b5fd1da4d7c3cc8785d2043b73b72f38ef Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 10 Feb 2023 15:47:32 +0000 Subject: [PATCH 2/4] bpf: use bpf_map_kvcalloc in bpf_local_storage Introduce new helper bpf_map_kvcalloc() for the memory allocation in bpf_local_storage(). Then the allocation will charge the memory from the map instead of from current, though currently they are the same thing as it is only used in map creation path now. By charging map's memory into the memcg from the map, it will be more clear. Signed-off-by: Yafang Shao Acked-by: Johannes Weiner Acked-by: Roman Gushchin Link: https://lore.kernel.org/r/20230210154734.4416-3-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 8 ++++++++ kernel/bpf/bpf_local_storage.c | 4 ++-- kernel/bpf/syscall.c | 15 +++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 35c18a98c21a7..fe0bf482fdf89 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1886,6 +1886,8 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags); +void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, + gfp_t flags); void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, gfp_t flags); #else @@ -1902,6 +1904,12 @@ bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) return kzalloc(size, flags); } +static inline void * +bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, gfp_t flags) +{ + return kvcalloc(n, size, flags); +} + static inline void __percpu * bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, gfp_t flags) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 373c3c2c75bc0..35f4138a54dc1 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -568,8 +568,8 @@ static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_att nbuckets = max_t(u32, 2, nbuckets); smap->bucket_log = ilog2(nbuckets); - smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets, - GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); + smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets), + nbuckets, GFP_USER | __GFP_NOWARN); if (!smap->buckets) { bpf_map_area_free(smap); return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index bcc97613de768..9d94a35d8b0f9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -464,6 +464,21 @@ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) return ptr; } +void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, + gfp_t flags) +{ + struct mem_cgroup *memcg, *old_memcg; + void *ptr; + + memcg = bpf_map_get_memcg(map); + old_memcg = set_active_memcg(memcg); + ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT); + set_active_memcg(old_memcg); + mem_cgroup_put(memcg); + + return ptr; +} + void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, gfp_t flags) { From ee53cbfb1ebf990de0d084a7cd6b67b05fe1f7ac Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 10 Feb 2023 15:47:33 +0000 Subject: [PATCH 3/4] bpf: allow to disable bpf map memory accounting We can simply set root memcg as the map's memcg to disable bpf memory accounting. bpf_map_area_alloc is a little special as it gets the memcg from current rather than from the map, so we need to disable GFP_ACCOUNT specifically for it. Signed-off-by: Yafang Shao Acked-by: Johannes Weiner Acked-by: Roman Gushchin Link: https://lore.kernel.org/r/20230210154734.4416-4-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 8 ++++++++ kernel/bpf/memalloc.c | 3 ++- kernel/bpf/syscall.c | 5 +++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fe0bf482fdf89..4385418118f66 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -28,6 +28,7 @@ #include #include #include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -2933,4 +2934,11 @@ static inline bool type_is_alloc(u32 type) return type & MEM_ALLOC; } +static inline gfp_t bpf_memcg_flags(gfp_t flags) +{ + if (memcg_bpf_enabled()) + return flags | __GFP_ACCOUNT; + return flags; +} + #endif /* _LINUX_BPF_H */ diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 1db156405b68b..490d03a4581aa 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -395,7 +395,8 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) unit_size = size; #ifdef CONFIG_MEMCG_KMEM - objcg = get_obj_cgroup_from_current(); + if (memcg_bpf_enabled()) + objcg = get_obj_cgroup_from_current(); #endif for_each_possible_cpu(cpu) { c = per_cpu_ptr(pc, cpu); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 9d94a35d8b0f9..cda8d00f37622 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -309,7 +309,7 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) * __GFP_RETRY_MAYFAIL to avoid such situations. */ - const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_ACCOUNT; + gfp_t gfp = bpf_memcg_flags(__GFP_NOWARN | __GFP_ZERO); unsigned int flags = 0; unsigned long align = 1; void *area; @@ -418,7 +418,8 @@ static void bpf_map_save_memcg(struct bpf_map *map) * So we have to check map->objcg for being NULL each time it's * being used. */ - map->objcg = get_obj_cgroup_from_current(); + if (memcg_bpf_enabled()) + map->objcg = get_obj_cgroup_from_current(); } static void bpf_map_release_memcg(struct bpf_map *map) From bf3965082491601bf9cd6d9a0ce2d88cb219168a Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 10 Feb 2023 15:47:34 +0000 Subject: [PATCH 4/4] bpf: allow to disable bpf prog memory accounting We can simply disable the bpf prog memory accouting by not setting the GFP_ACCOUNT. Signed-off-by: Yafang Shao Acked-by: Johannes Weiner Acked-by: Roman Gushchin Link: https://lore.kernel.org/r/20230210154734.4416-5-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 16da51093aff8..3390961c4e108 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -87,7 +88,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags; + gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags); struct bpf_prog_aux *aux; struct bpf_prog *fp; @@ -96,12 +97,12 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag if (fp == NULL) return NULL; - aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT | gfp_extra_flags); + aux = kzalloc(sizeof(*aux), bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags)); if (aux == NULL) { vfree(fp); return NULL; } - fp->active = alloc_percpu_gfp(int, GFP_KERNEL_ACCOUNT | gfp_extra_flags); + fp->active = alloc_percpu_gfp(int, bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags)); if (!fp->active) { vfree(fp); kfree(aux); @@ -126,7 +127,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags; + gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags); struct bpf_prog *prog; int cpu; @@ -159,7 +160,7 @@ int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog) prog->aux->jited_linfo = kvcalloc(prog->aux->nr_linfo, sizeof(*prog->aux->jited_linfo), - GFP_KERNEL_ACCOUNT | __GFP_NOWARN); + bpf_memcg_flags(GFP_KERNEL | __GFP_NOWARN)); if (!prog->aux->jited_linfo) return -ENOMEM; @@ -234,7 +235,7 @@ void bpf_prog_fill_jited_linfo(struct bpf_prog *prog, struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags; + gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags); struct bpf_prog *fp; u32 pages;