Skip to content

Commit

Permalink
[PATCH] cpuset: numa_policy_rebind cleanup
Browse files Browse the repository at this point in the history
Cleanup, reorganize and make more robust the mempolicy.c code to rebind
mempolicies relative to the containing cpuset after a tasks memory placement
changes.

The real motivator for this cleanup patch is to lay more groundwork for the
upcoming patch to correctly rebind NUMA mempolicies that are attached to vma's
after the containing cpuset memory placement changes.

NUMA mempolicies are constrained by the cpuset their task is a member of.
When either (1) a task is moved to a different cpuset, or (2) the 'mems'
mems_allowed of a cpuset is changed, then the NUMA mempolicies have embedded
node numbers (for MPOL_BIND, MPOL_INTERLEAVE and MPOL_PREFERRED) that need to
be recalculated, relative to their new cpuset placement.

The old code used an unreliable method of determining what was the old
mems_allowed constraining the mempolicy.  It just looked at the tasks
mems_allowed value.  This sort of worked with the present code, that just
rebinds the -task- mempolicy, and leaves any -vma- mempolicies broken,
referring to the old nodes.  But in an upcoming patch, the vma mempolicies
will be rebound as well.  Then the order in which the various task and vma
mempolicies are updated will no longer be deterministic, and one can no longer
count on the task->mems_allowed holding the old value for as long as needed.
It's not even clear if the current code was guaranteed to work reliably for
task mempolicies.

So I added a mems_allowed field to each mempolicy, stating exactly what
mems_allowed the policy is relative to, and updated synchronously and reliably
anytime that the mempolicy is rebound.

Also removed a useless wrapper routine, numa_policy_rebind(), and had its
caller, cpuset_update_task_memory_state(), call directly to the rewritten
policy_rebind() routine, and made that rebind routine extern instead of
static, and added a "mpol_" prefix to its name, making it
mpol_rebind_policy().

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  • Loading branch information
Paul Jackson authored and Linus Torvalds committed Jan 9, 2006
1 parent 909d75a commit 74cb215
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 15 deletions.
12 changes: 10 additions & 2 deletions include/linux/mempolicy.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ struct mempolicy {
nodemask_t nodes; /* interleave */
/* undefined for default */
} v;
nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */
};

/*
Expand Down Expand Up @@ -146,7 +147,9 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,

extern void numa_default_policy(void);
extern void numa_policy_init(void);
extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new);
extern void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *new);
extern void mpol_rebind_task(struct task_struct *tsk,
const nodemask_t *new);
extern struct mempolicy default_policy;
extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
unsigned long addr);
Expand Down Expand Up @@ -221,7 +224,12 @@ static inline void numa_default_policy(void)
{
}

static inline void numa_policy_rebind(const nodemask_t *old,
static inline void mpol_rebind_policy(struct mempolicy *pol,
const nodemask_t *new)
{
}

static inline void mpol_rebind_task(struct task_struct *tsk,
const nodemask_t *new)
{
}
Expand Down
2 changes: 1 addition & 1 deletion kernel/cpuset.c
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ void cpuset_update_task_memory_state()
tsk->cpuset_mems_generation = cs->mems_generation;
task_unlock(tsk);
up(&callback_sem);
numa_policy_rebind(&oldmem, &tsk->mems_allowed);
mpol_rebind_task(tsk, &tsk->mems_allowed);
if (!nodes_equal(oldmem, tsk->mems_allowed)) {
if (migrate) {
do_migrate_pages(tsk->mm, &oldmem,
Expand Down
31 changes: 19 additions & 12 deletions mm/mempolicy.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
break;
}
policy->policy = mode;
policy->cpuset_mems_allowed = cpuset_mems_allowed(current);
return policy;
}

Expand Down Expand Up @@ -1411,25 +1412,31 @@ void numa_default_policy(void)
}

/* Migrate a policy to a different set of nodes */
static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
const nodemask_t *new)
void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
{
nodemask_t *mpolmask;
nodemask_t tmp;

if (!pol)
return;
mpolmask = &pol->cpuset_mems_allowed;
if (nodes_equal(*mpolmask, *newmask))
return;

switch (pol->policy) {
case MPOL_DEFAULT:
break;
case MPOL_INTERLEAVE:
nodes_remap(tmp, pol->v.nodes, *old, *new);
nodes_remap(tmp, pol->v.nodes, *mpolmask, *newmask);
pol->v.nodes = tmp;
current->il_next = node_remap(current->il_next, *old, *new);
*mpolmask = *newmask;
current->il_next = node_remap(current->il_next,
*mpolmask, *newmask);
break;
case MPOL_PREFERRED:
pol->v.preferred_node = node_remap(pol->v.preferred_node,
*old, *new);
*mpolmask, *newmask);
*mpolmask = *newmask;
break;
case MPOL_BIND: {
nodemask_t nodes;
Expand All @@ -1439,7 +1446,7 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
nodes_clear(nodes);
for (z = pol->v.zonelist->zones; *z; z++)
node_set((*z)->zone_pgdat->node_id, nodes);
nodes_remap(tmp, nodes, *old, *new);
nodes_remap(tmp, nodes, *mpolmask, *newmask);
nodes = tmp;

zonelist = bind_zonelist(&nodes);
Expand All @@ -1454,6 +1461,7 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
kfree(pol->v.zonelist);
pol->v.zonelist = zonelist;
}
*mpolmask = *newmask;
break;
}
default:
Expand All @@ -1463,14 +1471,13 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
}

/*
* Someone moved this task to different nodes. Fixup mempolicies.
*
* TODO - fixup current->mm->vma and shmfs/tmpfs/hugetlbfs policies as well,
* once we have a cpuset mechanism to mark which cpuset subtree is migrating.
* Wrapper for mpol_rebind_policy() that just requires task
* pointer, and updates task mempolicy.
*/
void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new)

void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
{
rebind_policy(current->mempolicy, old, new);
mpol_rebind_policy(tsk->mempolicy, new);
}

/*
Expand Down

0 comments on commit 74cb215

Please sign in to comment.