Skip to content

Commit

Permalink
memcontrol: schedule throttling if we are congested
Browse files Browse the repository at this point in the history
Memory allocations can induce swapping via kswapd or direct reclaim.  If
we are having IO done for us by kswapd and don't actually go into direct
reclaim we may never get scheduled for throttling.  So instead check to
see if our cgroup is congested, and if so schedule the throttling.
Before we return to user space the throttling stuff will only throttle
if we actually required it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
Tejun Heo authored and Jens Axboe committed Jul 9, 2018
1 parent d09d8df commit 2cf8558
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 14 deletions.
13 changes: 13 additions & 0 deletions include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,9 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask, struct mem_cgroup **memcgp,
bool compound);
int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask, struct mem_cgroup **memcgp,
bool compound);
void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
bool lrucare, bool compound);
void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
Expand Down Expand Up @@ -789,6 +792,16 @@ static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
return 0;
}

static inline int mem_cgroup_try_charge_delay(struct page *page,
struct mm_struct *mm,
gfp_t gfp_mask,
struct mem_cgroup **memcgp,
bool compound)
{
*memcgp = NULL;
return 0;
}

static inline void mem_cgroup_commit_charge(struct page *page,
struct mem_cgroup *memcg,
bool lrucare, bool compound)
Expand Down
11 changes: 10 additions & 1 deletion include/linux/swap.h
Original file line number Diff line number Diff line change
Expand Up @@ -629,14 +629,23 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)

return memcg->swappiness;
}

#else
static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
{
return vm_swappiness;
}
#endif

#if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
extern void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node,
gfp_t gfp_mask);
#else
static inline void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg,
int node, gfp_t gfp_mask)
{
}
#endif

#ifdef CONFIG_MEMCG_SWAP
extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry);
Expand Down
6 changes: 3 additions & 3 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,

VM_BUG_ON_PAGE(!PageCompound(page), page);

if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
if (mem_cgroup_try_charge_delay(page, vma->vm_mm, gfp, &memcg, true)) {
put_page(page);
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
Expand Down Expand Up @@ -1142,7 +1142,7 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, vma,
vmf->address, page_to_nid(page));
if (unlikely(!pages[i] ||
mem_cgroup_try_charge(pages[i], vma->vm_mm,
mem_cgroup_try_charge_delay(pages[i], vma->vm_mm,
GFP_KERNEL, &memcg, false))) {
if (pages[i])
put_page(pages[i]);
Expand Down Expand Up @@ -1312,7 +1312,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
goto out;
}

if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
if (unlikely(mem_cgroup_try_charge_delay(new_page, vma->vm_mm,
huge_gfp, &memcg, true))) {
put_page(new_page);
split_huge_pmd(vma, vmf->pmd, vmf->address);
Expand Down
13 changes: 13 additions & 0 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -5593,6 +5593,19 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
return ret;
}

int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask, struct mem_cgroup **memcgp,
bool compound)
{
struct mem_cgroup *memcg;
int ret;

ret = mem_cgroup_try_charge(page, mm, gfp_mask, memcgp, compound);
memcg = *memcgp;
mem_cgroup_throttle_swaprate(memcg, page_to_nid(page), gfp_mask);
return ret;
}

/**
* mem_cgroup_commit_charge - commit a page charge
* @page: page to charge
Expand Down
11 changes: 6 additions & 5 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -2503,7 +2503,7 @@ static int wp_page_copy(struct vm_fault *vmf)
cow_user_page(new_page, old_page, vmf->address, vma);
}

if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false))
goto oom_free_new;

__SetPageUptodate(new_page);
Expand Down Expand Up @@ -3003,8 +3003,8 @@ int do_swap_page(struct vm_fault *vmf)
goto out_page;
}

if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL,
&memcg, false)) {
if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL,
&memcg, false)) {
ret = VM_FAULT_OOM;
goto out_page;
}
Expand Down Expand Up @@ -3165,7 +3165,8 @@ static int do_anonymous_page(struct vm_fault *vmf)
if (!page)
goto oom;

if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false))
if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg,
false))
goto oom_free_page;

/*
Expand Down Expand Up @@ -3661,7 +3662,7 @@ static int do_cow_fault(struct vm_fault *vmf)
if (!vmf->cow_page)
return VM_FAULT_OOM;

if (mem_cgroup_try_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
if (mem_cgroup_try_charge_delay(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
&vmf->memcg, false)) {
put_page(vmf->cow_page);
return VM_FAULT_OOM;
Expand Down
10 changes: 5 additions & 5 deletions mm/shmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -1239,8 +1239,8 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
* the shmem_swaplist_mutex which might hold up shmem_writepage().
* Charged back to the user (not to caller) when swap account is used.
*/
error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg,
false);
error = mem_cgroup_try_charge_delay(page, current->mm, GFP_KERNEL,
&memcg, false);
if (error)
goto out;
/* No radix_tree_preload: swap entry keeps a place for page in tree */
Expand Down Expand Up @@ -1712,7 +1712,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
goto failed;
}

error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
false);
if (!error) {
error = shmem_add_to_page_cache(page, mapping, index,
Expand Down Expand Up @@ -1818,7 +1818,7 @@ alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, inode,
if (sgp == SGP_WRITE)
__SetPageReferenced(page);

error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
PageTransHuge(page));
if (error)
goto unacct;
Expand Down Expand Up @@ -2291,7 +2291,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
__SetPageSwapBacked(page);
__SetPageUptodate(page);

ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false);
ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg, false);
if (ret)
goto out_release;

Expand Down
31 changes: 31 additions & 0 deletions mm/swapfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -3731,6 +3731,37 @@ static void free_swap_count_continuations(struct swap_info_struct *si)
}
}

#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node,
gfp_t gfp_mask)
{
struct swap_info_struct *si, *next;
if (!(gfp_mask & __GFP_IO) || !memcg)
return;

if (!blk_cgroup_congested())
return;

/*
* We've already scheduled a throttle, avoid taking the global swap
* lock.
*/
if (current->throttle_queue)
return;

spin_lock(&swap_avail_lock);
plist_for_each_entry_safe(si, next, &swap_avail_heads[node],
avail_lists[node]) {
if (si->bdev) {
blkcg_schedule_throttle(bdev_get_queue(si->bdev),
true);
break;
}
}
spin_unlock(&swap_avail_lock);
}
#endif

static int __init swapfile_init(void)
{
int nid;
Expand Down

0 comments on commit 2cf8558

Please sign in to comment.