Skip to content

Commit

Permalink
sched/numa: Reschedule task on preferred NUMA node once selected
Browse files Browse the repository at this point in the history
A preferred node is selected based on the node the most NUMA hinting
faults was incurred on. There is no guarantee that the task is running
on that node at the time so this patch rescheules the task to run on
the most idle CPU of the selected node when selected. This avoids
waiting for the balancer to make a decision.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-25-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Mel Gorman authored and Ingo Molnar committed Oct 9, 2013
1 parent 7a0f308 commit e6628d5
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 1 deletion.
19 changes: 19 additions & 0 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -4348,6 +4348,25 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
return ret;
}

#ifdef CONFIG_NUMA_BALANCING
/* Migrate current task p to target_cpu */
int migrate_task_to(struct task_struct *p, int target_cpu)
{
struct migration_arg arg = { p, target_cpu };
int curr_cpu = task_cpu(p);

if (curr_cpu == target_cpu)
return 0;

if (!cpumask_test_cpu(target_cpu, tsk_cpus_allowed(p)))
return -EINVAL;

/* TODO: This is not properly updating schedstats */

return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg);
}
#endif

/*
* migration_cpu_stop - this will be executed by a highprio stopper thread
* and performs thread migration by bumping thread off CPU then
Expand Down
46 changes: 45 additions & 1 deletion kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,31 @@ static unsigned int task_scan_max(struct task_struct *p)
*/
unsigned int sysctl_numa_balancing_settle_count __read_mostly = 3;

static unsigned long weighted_cpuload(const int cpu);


static int
find_idlest_cpu_node(int this_cpu, int nid)
{
unsigned long load, min_load = ULONG_MAX;
int i, idlest_cpu = this_cpu;

BUG_ON(cpu_to_node(this_cpu) == nid);

rcu_read_lock();
for_each_cpu(i, cpumask_of_node(nid)) {
load = weighted_cpuload(i);

if (load < min_load) {
min_load = load;
idlest_cpu = i;
}
}
rcu_read_unlock();

return idlest_cpu;
}

static void task_numa_placement(struct task_struct *p)
{
int seq, nid, max_nid = -1;
Expand Down Expand Up @@ -916,10 +941,29 @@ static void task_numa_placement(struct task_struct *p)
}
}

/* Update the tasks preferred node if necessary */
/*
* Record the preferred node as the node with the most faults,
* requeue the task to be running on the idlest CPU on the
* preferred node and reset the scanning rate to recheck
* the working set placement.
*/
if (max_faults && max_nid != p->numa_preferred_nid) {
int preferred_cpu;

/*
* If the task is not on the preferred node then find the most
* idle CPU to migrate to.
*/
preferred_cpu = task_cpu(p);
if (cpu_to_node(preferred_cpu) != max_nid) {
preferred_cpu = find_idlest_cpu_node(preferred_cpu,
max_nid);
}

/* Update the preferred nid and migrate task if possible */
p->numa_preferred_nid = max_nid;
p->numa_migrate_seq = 0;
migrate_task_to(p, preferred_cpu);
}
}

Expand Down
1 change: 1 addition & 0 deletions kernel/sched/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,7 @@ static inline u64 rq_clock_task(struct rq *rq)
}

#ifdef CONFIG_NUMA_BALANCING
extern int migrate_task_to(struct task_struct *p, int cpu);
static inline void task_numa_free(struct task_struct *p)
{
kfree(p->numa_faults);
Expand Down

0 comments on commit e6628d5

Please sign in to comment.