Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 292658
b: refs/heads/master
c: cc9a6c8
h: refs/heads/master
v: v3
  • Loading branch information
Mel Gorman authored and Linus Torvalds committed Mar 22, 2012
1 parent b45efa4 commit 8193220
Show file tree
Hide file tree
Showing 13 changed files with 134 additions and 111 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: e845e199362cc5712ba0e7eedc14eed70e144258
refs/heads/master: cc9a6c8776615f9c194ccf0b63a0aa5628235545
47 changes: 20 additions & 27 deletions trunk/include/linux/cpuset.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,42 +89,33 @@ extern void rebuild_sched_domains(void);
extern void cpuset_print_task_mems_allowed(struct task_struct *p);

/*
* reading current mems_allowed and mempolicy in the fastpath must protected
* by get_mems_allowed()
* get_mems_allowed is required when making decisions involving mems_allowed
* such as during page allocation. mems_allowed can be updated in parallel
* and depending on the new value an operation can fail potentially causing
* process failure. A retry loop with get_mems_allowed and put_mems_allowed
* prevents these artificial failures.
*/
static inline void get_mems_allowed(void)
static inline unsigned int get_mems_allowed(void)
{
current->mems_allowed_change_disable++;

/*
* ensure that reading mems_allowed and mempolicy happens after the
* update of ->mems_allowed_change_disable.
*
* the write-side task finds ->mems_allowed_change_disable is not 0,
* and knows the read-side task is reading mems_allowed or mempolicy,
* so it will clear old bits lazily.
*/
smp_mb();
return read_seqcount_begin(&current->mems_allowed_seq);
}

static inline void put_mems_allowed(void)
/*
* If this returns false, the operation that took place after get_mems_allowed
* may have failed. It is up to the caller to retry the operation if
* appropriate.
*/
static inline bool put_mems_allowed(unsigned int seq)
{
/*
* ensure that reading mems_allowed and mempolicy before reducing
* mems_allowed_change_disable.
*
* the write-side task will know that the read-side task is still
* reading mems_allowed or mempolicy, don't clears old bits in the
* nodemask.
*/
smp_mb();
--ACCESS_ONCE(current->mems_allowed_change_disable);
return !read_seqcount_retry(&current->mems_allowed_seq, seq);
}

static inline void set_mems_allowed(nodemask_t nodemask)
{
task_lock(current);
write_seqcount_begin(&current->mems_allowed_seq);
current->mems_allowed = nodemask;
write_seqcount_end(&current->mems_allowed_seq);
task_unlock(current);
}

Expand Down Expand Up @@ -234,12 +225,14 @@ static inline void set_mems_allowed(nodemask_t nodemask)
{
}

static inline void get_mems_allowed(void)
static inline unsigned int get_mems_allowed(void)
{
return 0;
}

static inline void put_mems_allowed(void)
static inline bool put_mems_allowed(unsigned int seq)
{
return true;
}

#endif /* !CONFIG_CPUSETS */
Expand Down
8 changes: 8 additions & 0 deletions trunk/include/linux/init_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ extern struct fs_struct init_fs;
#define INIT_GROUP_RWSEM(sig)
#endif

#ifdef CONFIG_CPUSETS
#define INIT_CPUSET_SEQ \
.mems_allowed_seq = SEQCNT_ZERO,
#else
#define INIT_CPUSET_SEQ
#endif

#define INIT_SIGNALS(sig) { \
.nr_threads = 1, \
.wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
Expand Down Expand Up @@ -192,6 +199,7 @@ extern struct cred init_cred;
INIT_FTRACE_GRAPH \
INIT_TRACE_RECURSION \
INIT_TASK_RCU_PREEMPT(tsk) \
INIT_CPUSET_SEQ \
}


Expand Down
2 changes: 1 addition & 1 deletion trunk/include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1514,7 +1514,7 @@ struct task_struct {
#endif
#ifdef CONFIG_CPUSETS
nodemask_t mems_allowed; /* Protected by alloc_lock */
int mems_allowed_change_disable;
seqcount_t mems_allowed_seq; /* Seqence no to catch updates */
int cpuset_mem_spread_rotor;
int cpuset_slab_spread_rotor;
#endif
Expand Down
43 changes: 8 additions & 35 deletions trunk/kernel/cpuset.c
Original file line number Diff line number Diff line change
Expand Up @@ -964,7 +964,6 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
{
bool need_loop;

repeat:
/*
* Allow tasks that have access to memory reserves because they have
* been OOM killed to get memory anywhere.
Expand All @@ -983,45 +982,19 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
*/
need_loop = task_has_mempolicy(tsk) ||
!nodes_intersects(*newmems, tsk->mems_allowed);
nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);

/*
* ensure checking ->mems_allowed_change_disable after setting all new
* allowed nodes.
*
* the read-side task can see an nodemask with new allowed nodes and
* old allowed nodes. and if it allocates page when cpuset clears newly
* disallowed ones continuous, it can see the new allowed bits.
*
* And if setting all new allowed nodes is after the checking, setting
* all new allowed nodes and clearing newly disallowed ones will be done
* continuous, and the read-side task may find no node to alloc page.
*/
smp_mb();
if (need_loop)
write_seqcount_begin(&tsk->mems_allowed_seq);

/*
* Allocation of memory is very fast, we needn't sleep when waiting
* for the read-side.
*/
while (need_loop && ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
task_unlock(tsk);
if (!task_curr(tsk))
yield();
goto repeat;
}

/*
* ensure checking ->mems_allowed_change_disable before clearing all new
* disallowed nodes.
*
* if clearing newly disallowed bits before the checking, the read-side
* task may find no node to alloc page.
*/
smp_mb();
nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);

mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
tsk->mems_allowed = *newmems;

if (need_loop)
write_seqcount_end(&tsk->mems_allowed_seq);

task_unlock(tsk);
}

Expand Down
1 change: 1 addition & 0 deletions trunk/kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -1237,6 +1237,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifdef CONFIG_CPUSETS
p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
seqcount_init(&p->mems_allowed_seq);
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
p->irq_events = 0;
Expand Down
11 changes: 7 additions & 4 deletions trunk/mm/filemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -499,10 +499,13 @@ struct page *__page_cache_alloc(gfp_t gfp)
struct page *page;

if (cpuset_do_page_mem_spread()) {
get_mems_allowed();
n = cpuset_mem_spread_node();
page = alloc_pages_exact_node(n, gfp, 0);
put_mems_allowed();
unsigned int cpuset_mems_cookie;
do {
cpuset_mems_cookie = get_mems_allowed();
n = cpuset_mem_spread_node();
page = alloc_pages_exact_node(n, gfp, 0);
} while (!put_mems_allowed(cpuset_mems_cookie) && !page);

return page;
}
return alloc_pages(gfp, 0);
Expand Down
15 changes: 11 additions & 4 deletions trunk/mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -454,14 +454,16 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
struct vm_area_struct *vma,
unsigned long address, int avoid_reserve)
{
struct page *page = NULL;
struct page *page;
struct mempolicy *mpol;
nodemask_t *nodemask;
struct zonelist *zonelist;
struct zone *zone;
struct zoneref *z;
unsigned int cpuset_mems_cookie;

get_mems_allowed();
retry_cpuset:
cpuset_mems_cookie = get_mems_allowed();
zonelist = huge_zonelist(vma, address,
htlb_alloc_mask, &mpol, &nodemask);
/*
Expand All @@ -488,10 +490,15 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
}
}
}
err:

mpol_cond_put(mpol);
put_mems_allowed();
if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
goto retry_cpuset;
return page;

err:
mpol_cond_put(mpol);
return NULL;
}

static void update_and_free_page(struct hstate *h, struct page *page)
Expand Down
28 changes: 21 additions & 7 deletions trunk/mm/mempolicy.c
Original file line number Diff line number Diff line change
Expand Up @@ -1850,18 +1850,24 @@ struct page *
alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
unsigned long addr, int node)
{
struct mempolicy *pol = get_vma_policy(current, vma, addr);
struct mempolicy *pol;
struct zonelist *zl;
struct page *page;
unsigned int cpuset_mems_cookie;

retry_cpuset:
pol = get_vma_policy(current, vma, addr);
cpuset_mems_cookie = get_mems_allowed();

get_mems_allowed();
if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
unsigned nid;

nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
mpol_cond_put(pol);
page = alloc_page_interleave(gfp, order, nid);
put_mems_allowed();
if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
goto retry_cpuset;

return page;
}
zl = policy_zonelist(gfp, pol, node);
Expand All @@ -1872,15 +1878,17 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
struct page *page = __alloc_pages_nodemask(gfp, order,
zl, policy_nodemask(gfp, pol));
__mpol_put(pol);
put_mems_allowed();
if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
goto retry_cpuset;
return page;
}
/*
* fast path: default or task policy
*/
page = __alloc_pages_nodemask(gfp, order, zl,
policy_nodemask(gfp, pol));
put_mems_allowed();
if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
goto retry_cpuset;
return page;
}

Expand All @@ -1907,11 +1915,14 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
{
struct mempolicy *pol = current->mempolicy;
struct page *page;
unsigned int cpuset_mems_cookie;

if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
pol = &default_policy;

get_mems_allowed();
retry_cpuset:
cpuset_mems_cookie = get_mems_allowed();

/*
* No reference counting needed for current->mempolicy
* nor system default_policy
Expand All @@ -1922,7 +1933,10 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
page = __alloc_pages_nodemask(gfp, order,
policy_zonelist(gfp, pol, numa_node_id()),
policy_nodemask(gfp, pol));
put_mems_allowed();

if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
goto retry_cpuset;

return page;
}
EXPORT_SYMBOL(alloc_pages_current);
Expand Down
33 changes: 23 additions & 10 deletions trunk/mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2380,8 +2380,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
{
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
struct zone *preferred_zone;
struct page *page;
struct page *page = NULL;
int migratetype = allocflags_to_migratetype(gfp_mask);
unsigned int cpuset_mems_cookie;

gfp_mask &= gfp_allowed_mask;

Expand All @@ -2400,15 +2401,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (unlikely(!zonelist->_zonerefs->zone))
return NULL;

get_mems_allowed();
retry_cpuset:
cpuset_mems_cookie = get_mems_allowed();

/* The preferred zone is used for statistics later */
first_zones_zonelist(zonelist, high_zoneidx,
nodemask ? : &cpuset_current_mems_allowed,
&preferred_zone);
if (!preferred_zone) {
put_mems_allowed();
return NULL;
}
if (!preferred_zone)
goto out;

/* First allocation attempt */
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
Expand All @@ -2418,9 +2419,19 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
page = __alloc_pages_slowpath(gfp_mask, order,
zonelist, high_zoneidx, nodemask,
preferred_zone, migratetype);
put_mems_allowed();

trace_mm_page_alloc(page, order, gfp_mask, migratetype);

out:
/*
* When updating a task's mems_allowed, it is possible to race with
* parallel threads in such a way that an allocation can fail while
* the mask is being updated. If a page allocation is about to fail,
* check if the cpuset changed during allocation and if so, retry.
*/
if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
goto retry_cpuset;

return page;
}
EXPORT_SYMBOL(__alloc_pages_nodemask);
Expand Down Expand Up @@ -2634,13 +2645,15 @@ void si_meminfo_node(struct sysinfo *val, int nid)
bool skip_free_areas_node(unsigned int flags, int nid)
{
bool ret = false;
unsigned int cpuset_mems_cookie;

if (!(flags & SHOW_MEM_FILTER_NODES))
goto out;

get_mems_allowed();
ret = !node_isset(nid, cpuset_current_mems_allowed);
put_mems_allowed();
do {
cpuset_mems_cookie = get_mems_allowed();
ret = !node_isset(nid, cpuset_current_mems_allowed);
} while (!put_mems_allowed(cpuset_mems_cookie));
out:
return ret;
}
Expand Down
Loading

0 comments on commit 8193220

Please sign in to comment.