Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 344811
b: refs/heads/master
c: b8593bf
h: refs/heads/master
i:
  344809: e47c631
  344807: b3bdddc
v: v3
  • Loading branch information
Mel Gorman committed Dec 11, 2012
1 parent 14f0641 commit 1166189
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 16 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: e42c8ff2999de1239a57d434bfbd8e9f2a56e814
refs/heads/master: b8593bfda1652755136333cdd362de125b283a9c
3 changes: 3 additions & 0 deletions trunk/include/linux/mm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,9 @@ struct mm_struct {
*/
unsigned long numa_next_scan;

/* numa_next_reset is when the PTE scanner period will be reset */
unsigned long numa_next_reset;

/* Restart point for scanning and setting pte_numa */
unsigned long numa_scan_offset;

Expand Down
5 changes: 3 additions & 2 deletions trunk/include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1562,9 +1562,9 @@ struct task_struct {
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)

#ifdef CONFIG_NUMA_BALANCING
extern void task_numa_fault(int node, int pages);
extern void task_numa_fault(int node, int pages, bool migrated);
#else
static inline void task_numa_fault(int node, int pages)
static inline void task_numa_fault(int node, int pages, bool migrated)
{
}
#endif
Expand Down Expand Up @@ -2009,6 +2009,7 @@ extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
extern unsigned int sysctl_numa_balancing_scan_delay;
extern unsigned int sysctl_numa_balancing_scan_period_min;
extern unsigned int sysctl_numa_balancing_scan_period_max;
extern unsigned int sysctl_numa_balancing_scan_period_reset;
extern unsigned int sysctl_numa_balancing_scan_size;
extern unsigned int sysctl_numa_balancing_settle_count;

Expand Down
1 change: 1 addition & 0 deletions trunk/kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1537,6 +1537,7 @@ static void __sched_fork(struct task_struct *p)
#ifdef CONFIG_NUMA_BALANCING
if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
p->mm->numa_next_scan = jiffies;
p->mm->numa_next_reset = jiffies;
p->mm->numa_scan_seq = 0;
}

Expand Down
29 changes: 21 additions & 8 deletions trunk/kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,8 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
* numa task sample period in ms
*/
unsigned int sysctl_numa_balancing_scan_period_min = 100;
unsigned int sysctl_numa_balancing_scan_period_max = 100*16;
unsigned int sysctl_numa_balancing_scan_period_max = 100*50;
unsigned int sysctl_numa_balancing_scan_period_reset = 100*600;

/* Portion of address space to scan in MB */
unsigned int sysctl_numa_balancing_scan_size = 256;
Expand All @@ -806,20 +807,19 @@ static void task_numa_placement(struct task_struct *p)
/*
* Got a PROT_NONE fault for a page on @node.
*/
void task_numa_fault(int node, int pages)
void task_numa_fault(int node, int pages, bool migrated)
{
struct task_struct *p = current;

/* FIXME: Allocate task-specific structure for placement policy here */

/*
* Assume that as faults occur that pages are getting properly placed
* and fewer NUMA hints are required. Note that this is a big
* assumption, it assumes processes reach a steady steady with no
* further phase changes.
* If pages are properly placed (did not migrate) then scan slower.
* This is reset periodically in case of phase changes
*/
p->numa_scan_period = min(sysctl_numa_balancing_scan_period_max,
p->numa_scan_period + jiffies_to_msecs(2));
if (!migrated)
p->numa_scan_period = min(sysctl_numa_balancing_scan_period_max,
p->numa_scan_period + jiffies_to_msecs(10));

task_numa_placement(p);
}
Expand Down Expand Up @@ -857,6 +857,19 @@ void task_numa_work(struct callback_head *work)
if (p->flags & PF_EXITING)
return;

/*
* Reset the scan period if enough time has gone by. Objective is that
* scanning will be reduced if pages are properly placed. As tasks
* can enter different phases this needs to be re-examined. Lacking
* proper tracking of reference behaviour, this blunt hammer is used.
*/
migrate = mm->numa_next_reset;
if (time_after(now, migrate)) {
p->numa_scan_period = sysctl_numa_balancing_scan_period_min;
next_scan = now + msecs_to_jiffies(sysctl_numa_balancing_scan_period_reset);
xchg(&mm->numa_next_reset, next_scan);
}

/*
* Enforce maximal scan/migration frequency..
*/
Expand Down
7 changes: 7 additions & 0 deletions trunk/kernel/sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,13 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "numa_balancing_scan_period_reset",
.data = &sysctl_numa_balancing_scan_period_reset,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "numa_balancing_scan_period_max_ms",
.data = &sysctl_numa_balancing_scan_period_max,
Expand Down
2 changes: 1 addition & 1 deletion trunk/mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1068,7 +1068,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
spin_unlock(&mm->page_table_lock);
if (page) {
put_page(page);
task_numa_fault(numa_node_id(), HPAGE_PMD_NR);
task_numa_fault(numa_node_id(), HPAGE_PMD_NR, false);
}
return 0;
}
Expand Down
12 changes: 8 additions & 4 deletions trunk/mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -3468,6 +3468,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
spinlock_t *ptl;
int current_nid = -1;
int target_nid;
bool migrated = false;

/*
* The "pte" at this point cannot be used safely without
Expand Down Expand Up @@ -3509,12 +3510,13 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
}

/* Migrate to the requested node */
if (migrate_misplaced_page(page, target_nid))
migrated = migrate_misplaced_page(page, target_nid);
if (migrated)
current_nid = target_nid;

out:
if (current_nid != -1)
task_numa_fault(current_nid, 1);
task_numa_fault(current_nid, 1, migrated);
return 0;
}

Expand Down Expand Up @@ -3554,6 +3556,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page;
int curr_nid = local_nid;
int target_nid;
bool migrated;
if (!pte_present(pteval))
continue;
if (!pte_numa(pteval))
Expand Down Expand Up @@ -3590,9 +3593,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,

/* Migrate to the requested node */
pte_unmap_unlock(pte, ptl);
if (migrate_misplaced_page(page, target_nid))
migrated = migrate_misplaced_page(page, target_nid);
if (migrated)
curr_nid = target_nid;
task_numa_fault(curr_nid, 1);
task_numa_fault(curr_nid, 1, migrated);

pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
}
Expand Down

0 comments on commit 1166189

Please sign in to comment.