Skip to content

Commit

Permalink
mm: charge active memcg when no mm is set
Browse files Browse the repository at this point in the history
set_active_memcg() worked for kernel allocations but was silently ignored
for user pages.

This patch establishes a precedence order for who gets charged:

1. If there is a memcg associated with the page already, that memcg is
   charged. This happens during swapin.

2. If an explicit mm is passed, mm->memcg is charged. This happens
   during page faults, which can be triggered in remote VMs (eg gup).

3. Otherwise consult the current process context. If there is an
   active_memcg, use that. Otherwise, current->mm->memcg.

Previously, if a NULL mm was passed to mem_cgroup_charge (case 3) it would
always charge the root cgroup.  Now it looks up the active_memcg first
(falling back to charging the root cgroup if not set).

Link: https://lkml.kernel.org/r/20210610173944.1203706-3-schatzberg.dan@gmail.com
Signed-off-by: Dan Schatzberg <schatzberg.dan@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Chris Down <chris@chrisdown.name>
Acked-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Michal Koutný <mkoutny@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Dan Schatzberg authored and Linus Torvalds committed Jun 29, 2021
1 parent 87579e9 commit 04f94e3
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 17 deletions.
2 changes: 1 addition & 1 deletion mm/filemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
page->index = offset;

if (!huge) {
error = mem_cgroup_charge(page, current->mm, gfp);
error = mem_cgroup_charge(page, NULL, gfp);
if (error)
goto error;
charged = true;
Expand Down
41 changes: 27 additions & 14 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -897,13 +897,24 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
}
EXPORT_SYMBOL(mem_cgroup_from_task);

static __always_inline struct mem_cgroup *active_memcg(void)
{
if (in_interrupt())
return this_cpu_read(int_active_memcg);
else
return current->active_memcg;
}

/**
* get_mem_cgroup_from_mm: Obtain a reference on given mm_struct's memcg.
* @mm: mm from which memcg should be extracted. It can be NULL.
*
* Obtain a reference on mm->memcg and returns it if successful. Otherwise
* root_mem_cgroup is returned. However if mem_cgroup is disabled, NULL is
* returned.
* Obtain a reference on mm->memcg and returns it if successful. If mm
* is NULL, then the memcg is chosen as follows:
* 1) The active memcg, if set.
* 2) current->mm->memcg, if available
* 3) root memcg
* If mem_cgroup is disabled, NULL is returned.
*/
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
{
Expand All @@ -921,8 +932,17 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
* counting is disabled on the root level in the
* cgroup core. See CSS_NO_REF.
*/
if (unlikely(!mm))
return root_mem_cgroup;
if (unlikely(!mm)) {
memcg = active_memcg();
if (unlikely(memcg)) {
/* remote memcg must hold a ref */
css_get(&memcg->css);
return memcg;
}
mm = current->mm;
if (unlikely(!mm))
return root_mem_cgroup;
}

rcu_read_lock();
do {
Expand All @@ -935,14 +955,6 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
}
EXPORT_SYMBOL(get_mem_cgroup_from_mm);

static __always_inline struct mem_cgroup *active_memcg(void)
{
if (in_interrupt())
return this_cpu_read(int_active_memcg);
else
return current->active_memcg;
}

static __always_inline bool memcg_kmem_bypass(void)
{
/* Allow remote memcg charging from any context. */
Expand Down Expand Up @@ -6711,7 +6723,8 @@ static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg,
* @gfp_mask: reclaim mode
*
* Try to charge @page to the memcg that @mm belongs to, reclaiming
* pages according to @gfp_mask if necessary.
* pages according to @gfp_mask if necessary. if @mm is NULL, try to
* charge to the active memcg.
*
* Do not use this for pages allocated for swapin.
*
Expand Down
4 changes: 2 additions & 2 deletions mm/shmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -1695,7 +1695,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
{
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm;
struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
struct swap_info_struct *si;
struct page *page = NULL;
swp_entry_t swap;
Expand Down Expand Up @@ -1828,7 +1828,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
}

sbinfo = SHMEM_SB(inode->i_sb);
charge_mm = vma ? vma->vm_mm : current->mm;
charge_mm = vma ? vma->vm_mm : NULL;

page = pagecache_get_page(mapping, index,
FGP_ENTRY | FGP_HEAD | FGP_LOCK, 0);
Expand Down

0 comments on commit 04f94e3

Please sign in to comment.