Skip to content

Commit

Permalink
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm…
Browse files Browse the repository at this point in the history
…/linux/kernel/git/tip/tip

Pull scheduler fix from Thomas Gleixner:
 "Revert the new NUMA aware placement approach which turned out to
  create more problems than it solved"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  Revert "sched/numa: Delay retrying placement for automatic NUMA balance after wake_affine()"
  • Loading branch information
Linus Torvalds committed May 13, 2018
2 parents baeda71 + 789ba28 commit 86a4ac4
Showing 1 changed file with 1 addition and 56 deletions.
57 changes: 1 addition & 56 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -1854,26 +1854,14 @@ static int task_numa_migrate(struct task_struct *p)
static void numa_migrate_preferred(struct task_struct *p)
{
unsigned long interval = HZ;
unsigned long numa_migrate_retry;

/* This task has no NUMA fault statistics yet */
if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults))
return;

/* Periodically retry migrating the task to the preferred node */
interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16);
numa_migrate_retry = jiffies + interval;

/*
* Check that the new retry threshold is after the current one. If
* the retry is in the future, it implies that wake_affine has
* temporarily asked NUMA balancing to backoff from placement.
*/
if (numa_migrate_retry > p->numa_migrate_retry)
return;

/* Safe to try placing the task on the preferred node */
p->numa_migrate_retry = numa_migrate_retry;
p->numa_migrate_retry = jiffies + interval;

/* Success if task is already running on preferred CPU */
if (task_node(p) == p->numa_preferred_nid)
Expand Down Expand Up @@ -5922,48 +5910,6 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits;
}

#ifdef CONFIG_NUMA_BALANCING
static void
update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
{
unsigned long interval;

if (!static_branch_likely(&sched_numa_balancing))
return;

/* If balancing has no preference then continue gathering data */
if (p->numa_preferred_nid == -1)
return;

/*
* If the wakeup is not affecting locality then it is neutral from
* the perspective of NUMA balacing so continue gathering data.
*/
if (cpu_to_node(prev_cpu) == cpu_to_node(target))
return;

/*
* Temporarily prevent NUMA balancing trying to place waker/wakee after
* wakee has been moved by wake_affine. This will potentially allow
* related tasks to converge and update their data placement. The
* 4 * numa_scan_period is to allow the two-pass filter to migrate
* hot data to the wakers node.
*/
interval = max(sysctl_numa_balancing_scan_delay,
p->numa_scan_period << 2);
p->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);

interval = max(sysctl_numa_balancing_scan_delay,
current->numa_scan_period << 2);
current->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
}
#else
static void
update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
{
}
#endif

static int wake_affine(struct sched_domain *sd, struct task_struct *p,
int this_cpu, int prev_cpu, int sync)
{
Expand All @@ -5979,7 +5925,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
if (target == nr_cpumask_bits)
return prev_cpu;

update_wa_numa_placement(p, prev_cpu, target);
schedstat_inc(sd->ttwu_move_affine);
schedstat_inc(p->se.statistics.nr_wakeups_affine);
return target;
Expand Down

0 comments on commit 86a4ac4

Please sign in to comment.