Skip to content

Commit

Permalink
mm: introduce slabobj_ext to support slab object extensions
Browse files Browse the repository at this point in the history
Currently slab pages can store only vectors of obj_cgroup pointers in
page->memcg_data.  Introduce slabobj_ext structure to allow more data to
be stored for each slab object.  Wrap obj_cgroup into slabobj_ext to
support current functionality while allowing to extend slabobj_ext in the
future.

Link: https://lkml.kernel.org/r/20240321163705.3067592-7-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Tested-by: Kees Cook <keescook@chromium.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Alex Gaynor <alex.gaynor@gmail.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andreas Hindborg <a.hindborg@samsung.com>
Cc: Benno Lossin <benno.lossin@proton.me>
Cc: "Björn Roy Baron" <bjorn3_gh@protonmail.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Gary Guo <gary@garyguo.net>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Wedson Almeida Filho <wedsonaf@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  • Loading branch information
Suren Baghdasaryan authored and Andrew Morton committed Apr 26, 2024
1 parent a567411 commit 21c690a
Show file tree
Hide file tree
Showing 9 changed files with 145 additions and 112 deletions.
20 changes: 14 additions & 6 deletions include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,8 +349,8 @@ struct mem_cgroup {
extern struct mem_cgroup *root_mem_cgroup;

enum page_memcg_data_flags {
/* page->memcg_data is a pointer to an objcgs vector */
MEMCG_DATA_OBJCGS = (1UL << 0),
/* page->memcg_data is a pointer to an slabobj_ext vector */
MEMCG_DATA_OBJEXTS = (1UL << 0),
/* page has been accounted as a non-slab kernel page */
MEMCG_DATA_KMEM = (1UL << 1),
/* the next bit after the last actual flag */
Expand Down Expand Up @@ -388,7 +388,7 @@ static inline struct mem_cgroup *__folio_memcg(struct folio *folio)
unsigned long memcg_data = folio->memcg_data;

VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio);
VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJEXTS, folio);
VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio);

return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
Expand All @@ -409,7 +409,7 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio)
unsigned long memcg_data = folio->memcg_data;

VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio);
VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJEXTS, folio);
VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio);

return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
Expand Down Expand Up @@ -506,7 +506,7 @@ static inline struct mem_cgroup *folio_memcg_check(struct folio *folio)
*/
unsigned long memcg_data = READ_ONCE(folio->memcg_data);

if (memcg_data & MEMCG_DATA_OBJCGS)
if (memcg_data & MEMCG_DATA_OBJEXTS)
return NULL;

if (memcg_data & MEMCG_DATA_KMEM) {
Expand Down Expand Up @@ -552,7 +552,7 @@ static inline struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *ob
static inline bool folio_memcg_kmem(struct folio *folio)
{
VM_BUG_ON_PGFLAGS(PageTail(&folio->page), &folio->page);
VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJCGS, folio);
VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJEXTS, folio);
return folio->memcg_data & MEMCG_DATA_KMEM;
}

Expand Down Expand Up @@ -1633,6 +1633,14 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
}
#endif /* CONFIG_MEMCG */

/*
* Extended information for slab objects stored as an array in page->memcg_data
* if MEMCG_DATA_OBJEXTS is set.
*/
struct slabobj_ext {
struct obj_cgroup *objcg;
} __aligned(8);

static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx)
{
__mod_lruvec_kmem_state(p, idx, 1);
Expand Down
4 changes: 2 additions & 2 deletions include/linux/mm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ struct page {
/* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */
atomic_t _refcount;

#ifdef CONFIG_MEMCG
#ifdef CONFIG_SLAB_OBJ_EXT
unsigned long memcg_data;
#endif

Expand Down Expand Up @@ -331,7 +331,7 @@ struct folio {
};
atomic_t _mapcount;
atomic_t _refcount;
#ifdef CONFIG_MEMCG
#ifdef CONFIG_SLAB_OBJ_EXT
unsigned long memcg_data;
#endif
#if defined(WANT_PAGE_VIRTUAL)
Expand Down
4 changes: 4 additions & 0 deletions init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,9 @@ config NUMA_BALANCING_DEFAULT_ENABLED
If set, automatic NUMA balancing will be enabled if running on a NUMA
machine.

config SLAB_OBJ_EXT
bool

menuconfig CGROUPS
bool "Control Group support"
select KERNFS
Expand Down Expand Up @@ -962,6 +965,7 @@ config MEMCG
bool "Memory controller"
select PAGE_COUNTER
select EVENTFD
select SLAB_OBJ_EXT
help
Provides control over the memory footprint of tasks in a cgroup.

Expand Down
14 changes: 7 additions & 7 deletions mm/kfence/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -595,9 +595,9 @@ static unsigned long kfence_init_pool(void)
continue;

__folio_set_slab(slab_folio(slab));
#ifdef CONFIG_MEMCG
slab->memcg_data = (unsigned long)&kfence_metadata_init[i / 2 - 1].objcg |
MEMCG_DATA_OBJCGS;
#ifdef CONFIG_MEMCG_KMEM
slab->obj_exts = (unsigned long)&kfence_metadata_init[i / 2 - 1].obj_exts |
MEMCG_DATA_OBJEXTS;
#endif
}

Expand Down Expand Up @@ -645,8 +645,8 @@ static unsigned long kfence_init_pool(void)

if (!i || (i % 2))
continue;
#ifdef CONFIG_MEMCG
slab->memcg_data = 0;
#ifdef CONFIG_MEMCG_KMEM
slab->obj_exts = 0;
#endif
__folio_clear_slab(slab_folio(slab));
}
Expand Down Expand Up @@ -1139,8 +1139,8 @@ void __kfence_free(void *addr)
{
struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);

#ifdef CONFIG_MEMCG
KFENCE_WARN_ON(meta->objcg);
#ifdef CONFIG_MEMCG_KMEM
KFENCE_WARN_ON(meta->obj_exts.objcg);
#endif
/*
* If the objects of the cache are SLAB_TYPESAFE_BY_RCU, defer freeing
Expand Down
4 changes: 2 additions & 2 deletions mm/kfence/kfence.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ struct kfence_metadata {
struct kfence_track free_track;
/* For updating alloc_covered on frees. */
u32 alloc_stack_hash;
#ifdef CONFIG_MEMCG
struct obj_cgroup *objcg;
#ifdef CONFIG_MEMCG_KMEM
struct slabobj_ext obj_exts;
#endif
};

Expand Down
56 changes: 7 additions & 49 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -2977,13 +2977,6 @@ void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
}

#ifdef CONFIG_MEMCG_KMEM
/*
* The allocated objcg pointers array is not accounted directly.
* Moreover, it should not come from DMA buffer and is not readily
* reclaimable. So those GFP bits should be masked off.
*/
#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \
__GFP_ACCOUNT | __GFP_NOFAIL)

/*
* mod_objcg_mlstate() may be called with irq enabled, so
Expand All @@ -3003,70 +2996,35 @@ static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
rcu_read_unlock();
}

int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
gfp_t gfp, bool new_slab)
{
unsigned int objects = objs_per_slab(s, slab);
unsigned long memcg_data;
void *vec;

gfp &= ~OBJCGS_CLEAR_MASK;
vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp,
slab_nid(slab));
if (!vec)
return -ENOMEM;

memcg_data = (unsigned long) vec | MEMCG_DATA_OBJCGS;
if (new_slab) {
/*
* If the slab is brand new and nobody can yet access its
* memcg_data, no synchronization is required and memcg_data can
* be simply assigned.
*/
slab->memcg_data = memcg_data;
} else if (cmpxchg(&slab->memcg_data, 0, memcg_data)) {
/*
* If the slab is already in use, somebody can allocate and
* assign obj_cgroups in parallel. In this case the existing
* objcg vector should be reused.
*/
kfree(vec);
return 0;
}

kmemleak_not_leak(vec);
return 0;
}

static __always_inline
struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p)
{
/*
* Slab objects are accounted individually, not per-page.
* Memcg membership data for each individual object is saved in
* slab->memcg_data.
* slab->obj_exts.
*/
if (folio_test_slab(folio)) {
struct obj_cgroup **objcgs;
struct slabobj_ext *obj_exts;
struct slab *slab;
unsigned int off;

slab = folio_slab(folio);
objcgs = slab_objcgs(slab);
if (!objcgs)
obj_exts = slab_obj_exts(slab);
if (!obj_exts)
return NULL;

off = obj_to_index(slab->slab_cache, slab, p);
if (objcgs[off])
return obj_cgroup_memcg(objcgs[off]);
if (obj_exts[off].objcg)
return obj_cgroup_memcg(obj_exts[off].objcg);

return NULL;
}

/*
* folio_memcg_check() is used here, because in theory we can encounter
* a folio where the slab flag has been cleared already, but
* slab->memcg_data has not been freed yet
* slab->obj_exts has not been freed yet
* folio_memcg_check() will guarantee that a proper memory
* cgroup pointer or NULL will be returned.
*/
Expand Down
2 changes: 1 addition & 1 deletion mm/page_owner.c
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ static inline int print_page_owner_memcg(char *kbuf, size_t count, int ret,
if (!memcg_data)
goto out_unlock;

if (memcg_data & MEMCG_DATA_OBJCGS)
if (memcg_data & MEMCG_DATA_OBJEXTS)
ret += scnprintf(kbuf + ret, count - ret,
"Slab cache page\n");

Expand Down
52 changes: 27 additions & 25 deletions mm/slab.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ struct slab {
unsigned int __unused;

atomic_t __page_refcount;
#ifdef CONFIG_MEMCG
unsigned long memcg_data;
#ifdef CONFIG_SLAB_OBJ_EXT
unsigned long obj_exts;
#endif
};

Expand All @@ -97,8 +97,8 @@ struct slab {
SLAB_MATCH(flags, __page_flags);
SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */
SLAB_MATCH(_refcount, __page_refcount);
#ifdef CONFIG_MEMCG
SLAB_MATCH(memcg_data, memcg_data);
#ifdef CONFIG_SLAB_OBJ_EXT
SLAB_MATCH(memcg_data, obj_exts);
#endif
#undef SLAB_MATCH
static_assert(sizeof(struct slab) <= sizeof(struct page));
Expand Down Expand Up @@ -536,42 +536,44 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla
return false;
}

#ifdef CONFIG_MEMCG_KMEM
#ifdef CONFIG_SLAB_OBJ_EXT

/*
* slab_objcgs - get the object cgroups vector associated with a slab
* slab_obj_exts - get the pointer to the slab object extension vector
* associated with a slab.
* @slab: a pointer to the slab struct
*
* Returns a pointer to the object cgroups vector associated with the slab,
* Returns a pointer to the object extension vector associated with the slab,
* or NULL if no such vector has been associated yet.
*/
static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
static inline struct slabobj_ext *slab_obj_exts(struct slab *slab)
{
unsigned long memcg_data = READ_ONCE(slab->memcg_data);
unsigned long obj_exts = READ_ONCE(slab->obj_exts);

VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS),
#ifdef CONFIG_MEMCG
VM_BUG_ON_PAGE(obj_exts && !(obj_exts & MEMCG_DATA_OBJEXTS),
slab_page(slab));
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, slab_page(slab));
VM_BUG_ON_PAGE(obj_exts & MEMCG_DATA_KMEM, slab_page(slab));

return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
return (struct slabobj_ext *)(obj_exts & ~MEMCG_DATA_FLAGS_MASK);
#else
return (struct slabobj_ext *)obj_exts;
#endif
}

int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
gfp_t gfp, bool new_slab);
void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
enum node_stat_item idx, int nr);
#else /* CONFIG_MEMCG_KMEM */
static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
#else /* CONFIG_SLAB_OBJ_EXT */

static inline struct slabobj_ext *slab_obj_exts(struct slab *slab)
{
return NULL;
}

static inline int memcg_alloc_slab_cgroups(struct slab *slab,
struct kmem_cache *s, gfp_t gfp,
bool new_slab)
{
return 0;
}
#endif /* CONFIG_MEMCG_KMEM */
#endif /* CONFIG_SLAB_OBJ_EXT */

#ifdef CONFIG_MEMCG_KMEM
void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
enum node_stat_item idx, int nr);
#endif

size_t __ksize(const void *objp);

Expand Down
Loading

0 comments on commit 21c690a

Please sign in to comment.