Skip to content

Commit

Permalink
memcg: synchronized LRU
Browse files Browse the repository at this point in the history
A big patch for changing memcg's LRU semantics.

Now,
  - page_cgroup is linked to mem_cgroup's its own LRU (per zone).

  - LRU of page_cgroup is not synchronous with global LRU.

  - page and page_cgroup is one-to-one and statically allocated.

  - To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as
    - lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc);

  - SwapCache is handled.

And, when we handle LRU list of page_cgroup, we do following.

	pc = lookup_page_cgroup(page);
	lock_page_cgroup(pc); .....................(1)
	mz = page_cgroup_zoneinfo(pc);
	spin_lock(&mz->lru_lock);
	.....add to LRU
	spin_unlock(&mz->lru_lock);
	unlock_page_cgroup(pc);

But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock.
So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct.

This is a trial to remove this dirty nesting of locks.
This patch changes mz->lru_lock to be zone->lru_lock.
Then, above sequence will be written as

        spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
	mem_cgroup_add/remove/etc_lru() {
		pc = lookup_page_cgroup(page);
		mz = page_cgroup_zoneinfo(pc);
		if (PageCgroupUsed(pc)) {
			....add to LRU
		}
        spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU

This is much simpler.
(*) We're safe even if we don't take lock_page_cgroup(pc). Because..
    1. When pc->mem_cgroup can be modified.
       - at charge.
       - at account_move().
    2. at charge
       the PCG_USED bit is not set before pc->mem_cgroup is fixed.
    3. at account_move()
       the page is isolated and not on LRU.

Pros.
  - easy for maintenance.
  - memcg can make use of laziness of pagevec.
  - we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup.
  - LRU status of memcg will be synchronized with global LRU's one.
  - # of locks are reduced.
  - account_move() is simplified very much.
Cons.
  - may increase cost of LRU rotation.
    (no impact if memcg is not configured.)

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
KAMEZAWA Hiroyuki authored and Linus Torvalds committed Jan 8, 2009
1 parent 8c7c6e3 commit 08e552c
Show file tree
Hide file tree
Showing 8 changed files with 178 additions and 206 deletions.
1 change: 1 addition & 0 deletions fs/splice.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <linux/file.h>
#include <linux/pagemap.h>
#include <linux/splice.h>
#include <linux/memcontrol.h>
#include <linux/mm_inline.h>
#include <linux/swap.h>
#include <linux/writeback.h>
Expand Down
29 changes: 27 additions & 2 deletions include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@ extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);

extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru);
extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru);
extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru);
extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru);
extern void mem_cgroup_del_lru(struct page *page);
extern void mem_cgroup_move_lists(struct page *page,
enum lru_list from, enum lru_list to);
extern void mem_cgroup_uncharge_page(struct page *page);
extern void mem_cgroup_uncharge_cache_page(struct page *page);
extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
Expand Down Expand Up @@ -131,7 +136,27 @@ static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
return 0;
}

static inline void mem_cgroup_move_lists(struct page *page, bool active)
static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
{
}

static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
{
return ;
}

static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru)
{
return ;
}

static inline void mem_cgroup_del_lru(struct page *page)
{
return ;
}

static inline void
mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
{
}

Expand Down
3 changes: 3 additions & 0 deletions include/linux/mm_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@ add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
{
list_add(&page->lru, &zone->lru[l].list);
__inc_zone_state(zone, NR_LRU_BASE + l);
mem_cgroup_add_lru_list(page, l);
}

static inline void
del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
{
list_del(&page->lru);
__dec_zone_state(zone, NR_LRU_BASE + l);
mem_cgroup_del_lru_list(page, l);
}

static inline void
Expand All @@ -54,6 +56,7 @@ del_page_from_lru(struct zone *zone, struct page *page)
l += page_is_file_cache(page);
}
__dec_zone_state(zone, NR_LRU_BASE + l);
mem_cgroup_del_lru_list(page, l);
}

/**
Expand Down
17 changes: 0 additions & 17 deletions include/linux/page_cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,6 @@ enum {
PCG_LOCK, /* page cgroup is locked */
PCG_CACHE, /* charged as cache */
PCG_USED, /* this object is in use. */
/* flags for LRU placement */
PCG_ACTIVE, /* page is active in this cgroup */
PCG_FILE, /* page is file system backed */
PCG_UNEVICTABLE, /* page is unevictableable */
};

#define TESTPCGFLAG(uname, lname) \
Expand All @@ -50,19 +46,6 @@ TESTPCGFLAG(Cache, CACHE)
TESTPCGFLAG(Used, USED)
CLEARPCGFLAG(Used, USED)

/* LRU management flags (from global-lru definition) */
TESTPCGFLAG(File, FILE)
SETPCGFLAG(File, FILE)
CLEARPCGFLAG(File, FILE)

TESTPCGFLAG(Active, ACTIVE)
SETPCGFLAG(Active, ACTIVE)
CLEARPCGFLAG(Active, ACTIVE)

TESTPCGFLAG(Unevictable, UNEVICTABLE)
SETPCGFLAG(Unevictable, UNEVICTABLE)
CLEARPCGFLAG(Unevictable, UNEVICTABLE)

static inline int page_cgroup_nid(struct page_cgroup *pc)
{
return page_to_nid(pc->page);
Expand Down
Loading

0 comments on commit 08e552c

Please sign in to comment.