Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 198230
b: refs/heads/master
c: c0ff745
h: refs/heads/master
v: v3
  • Loading branch information
Miao Xie authored and Linus Torvalds committed May 25, 2010
1 parent 9b6780d commit ead83f6
Show file tree
Hide file tree
Showing 12 changed files with 149 additions and 21 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 708c1bbc9d0c3e57f40501794d9b0eed29d10fce
refs/heads/master: c0ff7453bb5c7c98e0885fb94279f2571946f280
43 changes: 43 additions & 0 deletions trunk/include/linux/cpuset.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,44 @@ extern void rebuild_sched_domains(void);

extern void cpuset_print_task_mems_allowed(struct task_struct *p);

/*
* reading current mems_allowed and mempolicy in the fastpath must protected
* by get_mems_allowed()
*/
static inline void get_mems_allowed(void)
{
current->mems_allowed_change_disable++;

/*
* ensure that reading mems_allowed and mempolicy happens after the
* update of ->mems_allowed_change_disable.
*
* the write-side task finds ->mems_allowed_change_disable is not 0,
* and knows the read-side task is reading mems_allowed or mempolicy,
* so it will clear old bits lazily.
*/
smp_mb();
}

static inline void put_mems_allowed(void)
{
/*
* ensure that reading mems_allowed and mempolicy before reducing
* mems_allowed_change_disable.
*
* the write-side task will know that the read-side task is still
* reading mems_allowed or mempolicy, don't clears old bits in the
* nodemask.
*/
smp_mb();
--ACCESS_ONCE(current->mems_allowed_change_disable);
}

static inline void set_mems_allowed(nodemask_t nodemask)
{
task_lock(current);
current->mems_allowed = nodemask;
task_unlock(current);
}

#else /* !CONFIG_CPUSETS */
Expand Down Expand Up @@ -187,6 +222,14 @@ static inline void set_mems_allowed(nodemask_t nodemask)
{
}

static inline void get_mems_allowed(void)
{
}

static inline void put_mems_allowed(void)
{
}

#endif /* !CONFIG_CPUSETS */

#endif /* _LINUX_CPUSET_H */
1 change: 1 addition & 0 deletions trunk/include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1421,6 +1421,7 @@ struct task_struct {
#endif
#ifdef CONFIG_CPUSETS
nodemask_t mems_allowed; /* Protected by alloc_lock */
int mems_allowed_change_disable;
int cpuset_mem_spread_rotor;
#endif
#ifdef CONFIG_CGROUPS
Expand Down
58 changes: 50 additions & 8 deletions trunk/kernel/cpuset.c
Original file line number Diff line number Diff line change
Expand Up @@ -946,16 +946,62 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
* In order to avoid seeing no nodes if the old and new nodes are disjoint,
* we structure updates as setting all new allowed nodes, then clearing newly
* disallowed ones.
*
* Called with task's alloc_lock held
*/
static void cpuset_change_task_nodemask(struct task_struct *tsk,
nodemask_t *newmems)
{
repeat:
/*
* Allow tasks that have access to memory reserves because they have
* been OOM killed to get memory anywhere.
*/
if (unlikely(test_thread_flag(TIF_MEMDIE)))
return;
if (current->flags & PF_EXITING) /* Let dying task have memory */
return;

task_lock(tsk);
nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
mpol_rebind_task(tsk, &tsk->mems_allowed, MPOL_REBIND_ONCE);
mpol_rebind_task(tsk, newmems, MPOL_REBIND_ONCE);
mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);


/*
* ensure checking ->mems_allowed_change_disable after setting all new
* allowed nodes.
*
* the read-side task can see an nodemask with new allowed nodes and
* old allowed nodes. and if it allocates page when cpuset clears newly
* disallowed ones continuous, it can see the new allowed bits.
*
* And if setting all new allowed nodes is after the checking, setting
* all new allowed nodes and clearing newly disallowed ones will be done
* continuous, and the read-side task may find no node to alloc page.
*/
smp_mb();

/*
* Allocation of memory is very fast, we needn't sleep when waiting
* for the read-side.
*/
while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
task_unlock(tsk);
if (!task_curr(tsk))
yield();
goto repeat;
}

/*
* ensure checking ->mems_allowed_change_disable before clearing all new
* disallowed nodes.
*
* if clearing newly disallowed bits before the checking, the read-side
* task may find no node to alloc page.
*/
smp_mb();

mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
tsk->mems_allowed = *newmems;
task_unlock(tsk);
}

/*
Expand All @@ -978,9 +1024,7 @@ static void cpuset_change_nodemask(struct task_struct *p,
cs = cgroup_cs(scan->cg);
guarantee_online_mems(cs, newmems);

task_lock(p);
cpuset_change_task_nodemask(p, newmems);
task_unlock(p);

NODEMASK_FREE(newmems);

Expand Down Expand Up @@ -1383,9 +1427,7 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
err = set_cpus_allowed_ptr(tsk, cpus_attach);
WARN_ON_ONCE(err);

task_lock(tsk);
cpuset_change_task_nodemask(tsk, to);
task_unlock(tsk);
cpuset_update_task_spread_flag(cs, tsk);

}
Expand Down
2 changes: 2 additions & 0 deletions trunk/kernel/exit.c
Original file line number Diff line number Diff line change
Expand Up @@ -1002,8 +1002,10 @@ NORET_TYPE void do_exit(long code)

exit_notify(tsk, group_dead);
#ifdef CONFIG_NUMA
task_lock(tsk);
mpol_put(tsk->mempolicy);
tsk->mempolicy = NULL;
task_unlock(tsk);
#endif
#ifdef CONFIG_FUTEX
if (unlikely(current->pi_state_cache))
Expand Down
10 changes: 8 additions & 2 deletions trunk/mm/filemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -461,9 +461,15 @@ EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
#ifdef CONFIG_NUMA
struct page *__page_cache_alloc(gfp_t gfp)
{
int n;
struct page *page;

if (cpuset_do_page_mem_spread()) {
int n = cpuset_mem_spread_node();
return alloc_pages_exact_node(n, gfp, 0);
get_mems_allowed();
n = cpuset_mem_spread_node();
page = alloc_pages_exact_node(n, gfp, 0);
put_mems_allowed();
return page;
}
return alloc_pages(gfp, 0);
}
Expand Down
12 changes: 8 additions & 4 deletions trunk/mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -465,23 +465,25 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
struct page *page = NULL;
struct mempolicy *mpol;
nodemask_t *nodemask;
struct zonelist *zonelist = huge_zonelist(vma, address,
htlb_alloc_mask, &mpol, &nodemask);
struct zonelist *zonelist;
struct zone *zone;
struct zoneref *z;

get_mems_allowed();
zonelist = huge_zonelist(vma, address,
htlb_alloc_mask, &mpol, &nodemask);
/*
* A child process with MAP_PRIVATE mappings created by their parent
* have no page reserves. This check ensures that reservations are
* not "stolen". The child may still get SIGKILLed
*/
if (!vma_has_reserves(vma) &&
h->free_huge_pages - h->resv_huge_pages == 0)
return NULL;
goto err;

/* If reserves cannot be used, ensure enough pages are in the pool */
if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0)
return NULL;
goto err;;

for_each_zone_zonelist_nodemask(zone, z, zonelist,
MAX_NR_ZONES - 1, nodemask) {
Expand All @@ -500,7 +502,9 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
break;
}
}
err:
mpol_cond_put(mpol);
put_mems_allowed();
return page;
}

Expand Down
24 changes: 20 additions & 4 deletions trunk/mm/mempolicy.c
Original file line number Diff line number Diff line change
Expand Up @@ -1639,6 +1639,8 @@ static inline unsigned interleave_nid(struct mempolicy *pol,
* to the struct mempolicy for conditional unref after allocation.
* If the effective policy is 'BIND, returns a pointer to the mempolicy's
* @nodemask for filtering the zonelist.
*
* Must be protected by get_mems_allowed()
*/
struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
gfp_t gfp_flags, struct mempolicy **mpol,
Expand Down Expand Up @@ -1684,6 +1686,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
if (!(mask && current->mempolicy))
return false;

task_lock(current);
mempolicy = current->mempolicy;
switch (mempolicy->mode) {
case MPOL_PREFERRED:
Expand All @@ -1703,6 +1706,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
default:
BUG();
}
task_unlock(current);

return true;
}
Expand Down Expand Up @@ -1750,13 +1754,17 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
{
struct mempolicy *pol = get_vma_policy(current, vma, addr);
struct zonelist *zl;
struct page *page;

get_mems_allowed();
if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
unsigned nid;

nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
mpol_cond_put(pol);
return alloc_page_interleave(gfp, 0, nid);
page = alloc_page_interleave(gfp, 0, nid);
put_mems_allowed();
return page;
}
zl = policy_zonelist(gfp, pol);
if (unlikely(mpol_needs_cond_ref(pol))) {
Expand All @@ -1766,12 +1774,15 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
struct page *page = __alloc_pages_nodemask(gfp, 0,
zl, policy_nodemask(gfp, pol));
__mpol_put(pol);
put_mems_allowed();
return page;
}
/*
* fast path: default or task policy
*/
return __alloc_pages_nodemask(gfp, 0, zl, policy_nodemask(gfp, pol));
page = __alloc_pages_nodemask(gfp, 0, zl, policy_nodemask(gfp, pol));
put_mems_allowed();
return page;
}

/**
Expand All @@ -1796,18 +1807,23 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
struct page *alloc_pages_current(gfp_t gfp, unsigned order)
{
struct mempolicy *pol = current->mempolicy;
struct page *page;

if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
pol = &default_policy;

get_mems_allowed();
/*
* No reference counting needed for current->mempolicy
* nor system default_policy
*/
if (pol->mode == MPOL_INTERLEAVE)
return alloc_page_interleave(gfp, order, interleave_nodes(pol));
return __alloc_pages_nodemask(gfp, order,
page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
else
page = __alloc_pages_nodemask(gfp, order,
policy_zonelist(gfp, pol), policy_nodemask(gfp, pol));
put_mems_allowed();
return page;
}
EXPORT_SYMBOL(alloc_pages_current);

Expand Down
6 changes: 5 additions & 1 deletion trunk/mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1990,10 +1990,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (unlikely(!zonelist->_zonerefs->zone))
return NULL;

get_mems_allowed();
/* The preferred zone is used for statistics later */
first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone);
if (!preferred_zone)
if (!preferred_zone) {
put_mems_allowed();
return NULL;
}

/* First allocation attempt */
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
Expand All @@ -2003,6 +2006,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
page = __alloc_pages_slowpath(gfp_mask, order,
zonelist, high_zoneidx, nodemask,
preferred_zone, migratetype);
put_mems_allowed();

trace_mm_page_alloc(page, order, gfp_mask, migratetype);
return page;
Expand Down
4 changes: 4 additions & 0 deletions trunk/mm/slab.c
Original file line number Diff line number Diff line change
Expand Up @@ -3217,10 +3217,12 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
if (in_interrupt() || (flags & __GFP_THISNODE))
return NULL;
nid_alloc = nid_here = numa_node_id();
get_mems_allowed();
if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
nid_alloc = cpuset_mem_spread_node();
else if (current->mempolicy)
nid_alloc = slab_node(current->mempolicy);
put_mems_allowed();
if (nid_alloc != nid_here)
return ____cache_alloc_node(cachep, flags, nid_alloc);
return NULL;
Expand All @@ -3247,6 +3249,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
if (flags & __GFP_THISNODE)
return NULL;

get_mems_allowed();
zonelist = node_zonelist(slab_node(current->mempolicy), flags);
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);

Expand Down Expand Up @@ -3302,6 +3305,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
}
}
}
put_mems_allowed();
return obj;
}

Expand Down
6 changes: 5 additions & 1 deletion trunk/mm/slub.c
Original file line number Diff line number Diff line change
Expand Up @@ -1360,6 +1360,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
get_cycles() % 1024 > s->remote_node_defrag_ratio)
return NULL;

get_mems_allowed();
zonelist = node_zonelist(slab_node(current->mempolicy), flags);
for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
struct kmem_cache_node *n;
Expand All @@ -1369,10 +1370,13 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
n->nr_partial > s->min_partial) {
page = get_partial_node(n);
if (page)
if (page) {
put_mems_allowed();
return page;
}
}
}
put_mems_allowed();
#endif
return NULL;
}
Expand Down
Loading

0 comments on commit ead83f6

Please sign in to comment.