Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 83761
b: refs/heads/master
c: 8a9f3cc
h: refs/heads/master
i:
  83759: c3ed69b
v: v3
  • Loading branch information
Balbir Singh authored and Linus Torvalds committed Feb 7, 2008
1 parent 9ab45ad commit af044f1
Show file tree
Hide file tree
Showing 10 changed files with 296 additions and 28 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 78fb74669e80883323391090e4d26d17fe29488f
refs/heads/master: 8a9f3ccd24741b50200c3f33d62534c7271f3dfc
20 changes: 20 additions & 0 deletions trunk/include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ extern void mm_free_cgroup(struct mm_struct *mm);
extern void page_assign_page_cgroup(struct page *page,
struct page_cgroup *pc);
extern struct page_cgroup *page_get_page_cgroup(struct page *page);
extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm);
extern void mem_cgroup_uncharge(struct page_cgroup *pc);

static inline void mem_cgroup_uncharge_page(struct page *page)
{
mem_cgroup_uncharge(page_get_page_cgroup(page));
}

#else /* CONFIG_CGROUP_MEM_CONT */
static inline void mm_init_cgroup(struct mm_struct *mm,
Expand All @@ -51,6 +58,19 @@ static inline struct page_cgroup *page_get_page_cgroup(struct page *page)
return NULL;
}

static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
{
return 0;
}

static inline void mem_cgroup_uncharge(struct page_cgroup *pc)
{
}

static inline void mem_cgroup_uncharge_page(struct page *page)
{
}

#endif /* CONFIG_CGROUP_MEM_CONT */

#endif /* _LINUX_MEMCONTROL_H */
Expand Down
12 changes: 11 additions & 1 deletion trunk/mm/filemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <linux/syscalls.h>
#include <linux/cpuset.h>
#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
#include <linux/memcontrol.h>
#include "internal.h"

/*
Expand Down Expand Up @@ -118,6 +119,7 @@ void __remove_from_page_cache(struct page *page)
{
struct address_space *mapping = page->mapping;

mem_cgroup_uncharge_page(page);
radix_tree_delete(&mapping->page_tree, page->index);
page->mapping = NULL;
mapping->nrpages--;
Expand Down Expand Up @@ -461,6 +463,11 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);

if (error == 0) {

error = mem_cgroup_charge(page, current->mm);
if (error)
goto out;

write_lock_irq(&mapping->tree_lock);
error = radix_tree_insert(&mapping->page_tree, offset, page);
if (!error) {
Expand All @@ -470,10 +477,13 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
page->index = offset;
mapping->nrpages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
}
} else
mem_cgroup_uncharge_page(page);

write_unlock_irq(&mapping->tree_lock);
radix_tree_preload_end();
}
out:
return error;
}
EXPORT_SYMBOL(add_to_page_cache);
Expand Down
166 changes: 163 additions & 3 deletions trunk/mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
#include <linux/memcontrol.h>
#include <linux/cgroup.h>
#include <linux/mm.h>
#include <linux/page-flags.h>
#include <linux/bit_spinlock.h>
#include <linux/rcupdate.h>

struct cgroup_subsys mem_cgroup_subsys;

Expand All @@ -31,7 +34,9 @@ struct cgroup_subsys mem_cgroup_subsys;
* to help the administrator determine what knobs to tune.
*
* TODO: Add a water mark for the memory controller. Reclaim will begin when
* we hit the water mark.
* we hit the water mark. May be even add a low water mark, such that
* no reclaim occurs from a cgroup at it's low water mark, this is
* a feature that will be implemented much later in the future.
*/
struct mem_cgroup {
struct cgroup_subsys_state css;
Expand All @@ -48,6 +53,14 @@ struct mem_cgroup {
struct list_head inactive_list;
};

/*
* We use the lower bit of the page->page_cgroup pointer as a bit spin
* lock. We need to ensure that page->page_cgroup is atleast two
* byte aligned (based on comments from Nick Piggin)
*/
#define PAGE_CGROUP_LOCK_BIT 0x0
#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT)

/*
* A page_cgroup page is associated with every page descriptor. The
* page_cgroup helps us identify information about the cgroup
Expand All @@ -56,6 +69,8 @@ struct page_cgroup {
struct list_head lru; /* per cgroup LRU list */
struct page *page;
struct mem_cgroup *mem_cgroup;
atomic_t ref_cnt; /* Helpful when pages move b/w */
/* mapped and cached states */
};


Expand Down Expand Up @@ -88,14 +103,157 @@ void mm_free_cgroup(struct mm_struct *mm)
css_put(&mm->mem_cgroup->css);
}

static inline int page_cgroup_locked(struct page *page)
{
return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT,
&page->page_cgroup);
}

void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
{
page->page_cgroup = (unsigned long)pc;
int locked;

/*
* While resetting the page_cgroup we might not hold the
* page_cgroup lock. free_hot_cold_page() is an example
* of such a scenario
*/
if (pc)
VM_BUG_ON(!page_cgroup_locked(page));
locked = (page->page_cgroup & PAGE_CGROUP_LOCK);
page->page_cgroup = ((unsigned long)pc | locked);
}

struct page_cgroup *page_get_page_cgroup(struct page *page)
{
return page->page_cgroup;
return (struct page_cgroup *)
(page->page_cgroup & ~PAGE_CGROUP_LOCK);
}

void __always_inline lock_page_cgroup(struct page *page)
{
bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
VM_BUG_ON(!page_cgroup_locked(page));
}

void __always_inline unlock_page_cgroup(struct page *page)
{
bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
}

/*
* Charge the memory controller for page usage.
* Return
* 0 if the charge was successful
* < 0 if the cgroup is over its limit
*/
int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
{
struct mem_cgroup *mem;
struct page_cgroup *pc, *race_pc;

/*
* Should page_cgroup's go to their own slab?
* One could optimize the performance of the charging routine
* by saving a bit in the page_flags and using it as a lock
* to see if the cgroup page already has a page_cgroup associated
* with it
*/
lock_page_cgroup(page);
pc = page_get_page_cgroup(page);
/*
* The page_cgroup exists and the page has already been accounted
*/
if (pc) {
atomic_inc(&pc->ref_cnt);
goto done;
}

unlock_page_cgroup(page);

pc = kzalloc(sizeof(struct page_cgroup), GFP_KERNEL);
if (pc == NULL)
goto err;

rcu_read_lock();
/*
* We always charge the cgroup the mm_struct belongs to
* the mm_struct's mem_cgroup changes on task migration if the
* thread group leader migrates. It's possible that mm is not
* set, if so charge the init_mm (happens for pagecache usage).
*/
if (!mm)
mm = &init_mm;

mem = rcu_dereference(mm->mem_cgroup);
/*
* For every charge from the cgroup, increment reference
* count
*/
css_get(&mem->css);
rcu_read_unlock();

/*
* If we created the page_cgroup, we should free it on exceeding
* the cgroup limit.
*/
if (res_counter_charge(&mem->res, 1)) {
css_put(&mem->css);
goto free_pc;
}

lock_page_cgroup(page);
/*
* Check if somebody else beat us to allocating the page_cgroup
*/
race_pc = page_get_page_cgroup(page);
if (race_pc) {
kfree(pc);
pc = race_pc;
atomic_inc(&pc->ref_cnt);
res_counter_uncharge(&mem->res, 1);
css_put(&mem->css);
goto done;
}

atomic_set(&pc->ref_cnt, 1);
pc->mem_cgroup = mem;
pc->page = page;
page_assign_page_cgroup(page, pc);

done:
unlock_page_cgroup(page);
return 0;
free_pc:
kfree(pc);
return -ENOMEM;
err:
unlock_page_cgroup(page);
return -ENOMEM;
}

/*
* Uncharging is always a welcome operation, we never complain, simply
* uncharge.
*/
void mem_cgroup_uncharge(struct page_cgroup *pc)
{
struct mem_cgroup *mem;
struct page *page;

if (!pc)
return;

if (atomic_dec_and_test(&pc->ref_cnt)) {
page = pc->page;
lock_page_cgroup(page);
mem = pc->mem_cgroup;
css_put(&mem->css);
page_assign_page_cgroup(page, NULL);
unlock_page_cgroup(page);
res_counter_uncharge(&mem->res, 1);
kfree(pc);
}
}

static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
Expand Down Expand Up @@ -150,6 +308,8 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
return NULL;

res_counter_init(&mem->res);
INIT_LIST_HEAD(&mem->active_list);
INIT_LIST_HEAD(&mem->inactive_list);
return &mem->css;
}

Expand Down
Loading

0 comments on commit af044f1

Please sign in to comment.