Skip to content

Commit

Permalink
sched/numa: Add infrastructure for split shared/private accounting of…
Browse files Browse the repository at this point in the history
… NUMA hinting faults

Ideally it would be possible to distinguish between NUMA hinting faults
that are private to a task and those that are shared.  This patch prepares
infrastructure for separately accounting shared and private faults by
allocating the necessary buffers and passing in relevant information. For
now, all faults are treated as private and detection will be introduced
later.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-26-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Mel Gorman authored and Ingo Molnar committed Oct 9, 2013
1 parent e6628d5 commit ac8e895
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 17 deletions.
5 changes: 3 additions & 2 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1445,10 +1445,11 @@ struct task_struct {
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)

#ifdef CONFIG_NUMA_BALANCING
extern void task_numa_fault(int node, int pages, bool migrated);
extern void task_numa_fault(int last_node, int node, int pages, bool migrated);
extern void set_numabalancing_state(bool enabled);
#else
static inline void task_numa_fault(int node, int pages, bool migrated)
static inline void task_numa_fault(int last_node, int node, int pages,
bool migrated)
{
}
static inline void set_numabalancing_state(bool enabled)
Expand Down
46 changes: 35 additions & 11 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,20 @@ static unsigned int task_scan_max(struct task_struct *p)
*/
unsigned int sysctl_numa_balancing_settle_count __read_mostly = 3;

static inline int task_faults_idx(int nid, int priv)
{
return 2 * nid + priv;
}

static inline unsigned long task_faults(struct task_struct *p, int nid)
{
if (!p->numa_faults)
return 0;

return p->numa_faults[task_faults_idx(nid, 0)] +
p->numa_faults[task_faults_idx(nid, 1)];
}

static unsigned long weighted_cpuload(const int cpu);


Expand Down Expand Up @@ -928,13 +942,19 @@ static void task_numa_placement(struct task_struct *p)
/* Find the node with the highest number of faults */
for_each_online_node(nid) {
unsigned long faults;
int priv, i;

/* Decay existing window and copy faults since last scan */
p->numa_faults[nid] >>= 1;
p->numa_faults[nid] += p->numa_faults_buffer[nid];
p->numa_faults_buffer[nid] = 0;
for (priv = 0; priv < 2; priv++) {
i = task_faults_idx(nid, priv);

faults = p->numa_faults[nid];
/* Decay existing window, copy faults since last scan */
p->numa_faults[i] >>= 1;
p->numa_faults[i] += p->numa_faults_buffer[i];
p->numa_faults_buffer[i] = 0;
}

/* Find maximum private faults */
faults = p->numa_faults[task_faults_idx(nid, 1)];
if (faults > max_faults) {
max_faults = faults;
max_nid = nid;
Expand Down Expand Up @@ -970,24 +990,28 @@ static void task_numa_placement(struct task_struct *p)
/*
* Got a PROT_NONE fault for a page on @node.
*/
void task_numa_fault(int node, int pages, bool migrated)
void task_numa_fault(int last_nid, int node, int pages, bool migrated)
{
struct task_struct *p = current;
int priv;

if (!numabalancing_enabled)
return;

/* For now, do not attempt to detect private/shared accesses */
priv = 1;

/* Allocate buffer to track faults on a per-node basis */
if (unlikely(!p->numa_faults)) {
int size = sizeof(*p->numa_faults) * nr_node_ids;
int size = sizeof(*p->numa_faults) * 2 * nr_node_ids;

/* numa_faults and numa_faults_buffer share the allocation */
p->numa_faults = kzalloc(size * 2, GFP_KERNEL|__GFP_NOWARN);
if (!p->numa_faults)
return;

BUG_ON(p->numa_faults_buffer);
p->numa_faults_buffer = p->numa_faults + nr_node_ids;
p->numa_faults_buffer = p->numa_faults + (2 * nr_node_ids);
}

/*
Expand All @@ -1005,7 +1029,7 @@ void task_numa_fault(int node, int pages, bool migrated)

task_numa_placement(p);

p->numa_faults_buffer[node] += pages;
p->numa_faults_buffer[task_faults_idx(node, priv)] += pages;
}

static void reset_ptenuma_scan(struct task_struct *p)
Expand Down Expand Up @@ -4146,7 +4170,7 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env)
return false;

if (dst_nid == p->numa_preferred_nid ||
p->numa_faults[dst_nid] > p->numa_faults[src_nid])
task_faults(p, dst_nid) > task_faults(p, src_nid))
return true;

return false;
Expand All @@ -4170,7 +4194,7 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
p->numa_migrate_seq >= sysctl_numa_balancing_settle_count)
return false;

if (p->numa_faults[dst_nid] < p->numa_faults[src_nid])
if (task_faults(p, dst_nid) < task_faults(p, src_nid))
return true;

return false;
Expand Down
5 changes: 3 additions & 2 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1282,7 +1282,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page;
unsigned long haddr = addr & HPAGE_PMD_MASK;
int page_nid = -1, this_nid = numa_node_id();
int target_nid;
int target_nid, last_nid = -1;
bool page_locked;
bool migrated = false;

Expand All @@ -1293,6 +1293,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
page = pmd_page(pmd);
BUG_ON(is_huge_zero_page(page));
page_nid = page_to_nid(page);
last_nid = page_nid_last(page);
count_vm_numa_event(NUMA_HINT_FAULTS);
if (page_nid == this_nid)
count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
Expand Down Expand Up @@ -1361,7 +1362,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
page_unlock_anon_vma_read(anon_vma);

if (page_nid != -1)
task_numa_fault(page_nid, HPAGE_PMD_NR, migrated);
task_numa_fault(last_nid, page_nid, HPAGE_PMD_NR, migrated);

return 0;
}
Expand Down
8 changes: 6 additions & 2 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -3536,6 +3536,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page = NULL;
spinlock_t *ptl;
int page_nid = -1;
int last_nid;
int target_nid;
bool migrated = false;

Expand Down Expand Up @@ -3566,6 +3567,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
}
BUG_ON(is_zero_pfn(page_to_pfn(page)));

last_nid = page_nid_last(page);
page_nid = page_to_nid(page);
target_nid = numa_migrate_prep(page, vma, addr, page_nid);
pte_unmap_unlock(ptep, ptl);
Expand All @@ -3581,7 +3583,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,

out:
if (page_nid != -1)
task_numa_fault(page_nid, 1, migrated);
task_numa_fault(last_nid, page_nid, 1, migrated);
return 0;
}

Expand All @@ -3596,6 +3598,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long offset;
spinlock_t *ptl;
bool numa = false;
int last_nid;

spin_lock(&mm->page_table_lock);
pmd = *pmdp;
Expand Down Expand Up @@ -3643,6 +3646,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(page_mapcount(page) != 1))
continue;

last_nid = page_nid_last(page);
page_nid = page_to_nid(page);
target_nid = numa_migrate_prep(page, vma, addr, page_nid);
pte_unmap_unlock(pte, ptl);
Expand All @@ -3655,7 +3659,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
}

if (page_nid != -1)
task_numa_fault(page_nid, 1, migrated);
task_numa_fault(last_nid, page_nid, 1, migrated);

pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
}
Expand Down

0 comments on commit ac8e895

Please sign in to comment.