Skip to content

Commit

Permalink
hugetlb: balance freeing of huge pages across nodes
Browse files Browse the repository at this point in the history
Free huges pages from nodes in round robin fashion in an attempt to keep
[persistent a.k.a static] hugepages balanced across nodes

New function free_pool_huge_page() is modeled on and performs roughly the
inverse of alloc_fresh_huge_page().  Replaces dequeue_huge_page() which
now has no callers, so this patch removes it.

Helper function hstate_next_node_to_free() uses new hstate member
next_to_free_nid to distribute "frees" across all nodes with huge pages.

Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Lee Schermerhorn authored and Linus Torvalds committed Sep 22, 2009
1 parent 55a4462 commit e8c5c82
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 47 deletions.
3 changes: 2 additions & 1 deletion include/linux/hugetlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
#define HSTATE_NAME_LEN 32
/* Defines one hugetlb page size */
struct hstate {
int hugetlb_next_nid;
int next_nid_to_alloc;
int next_nid_to_free;
unsigned int order;
unsigned long mask;
unsigned long max_huge_pages;
Expand Down
132 changes: 86 additions & 46 deletions mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -456,24 +456,6 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
h->free_huge_pages_node[nid]++;
}

static struct page *dequeue_huge_page(struct hstate *h)
{
int nid;
struct page *page = NULL;

for (nid = 0; nid < MAX_NUMNODES; ++nid) {
if (!list_empty(&h->hugepage_freelists[nid])) {
page = list_entry(h->hugepage_freelists[nid].next,
struct page, lru);
list_del(&page->lru);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
break;
}
}
return page;
}

static struct page *dequeue_huge_page_vma(struct hstate *h,
struct vm_area_struct *vma,
unsigned long address, int avoid_reserve)
Expand Down Expand Up @@ -641,7 +623,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)

/*
* Use a helper variable to find the next node and then
* copy it back to hugetlb_next_nid afterwards:
* copy it back to next_nid_to_alloc afterwards:
* otherwise there's a window in which a racer might
* pass invalid nid MAX_NUMNODES to alloc_pages_exact_node.
* But we don't need to use a spin_lock here: it really
Expand All @@ -650,13 +632,13 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
* if we just successfully allocated a hugepage so that
* the next caller gets hugepages on the next node.
*/
static int hstate_next_node(struct hstate *h)
static int hstate_next_node_to_alloc(struct hstate *h)
{
int next_nid;
next_nid = next_node(h->hugetlb_next_nid, node_online_map);
next_nid = next_node(h->next_nid_to_alloc, node_online_map);
if (next_nid == MAX_NUMNODES)
next_nid = first_node(node_online_map);
h->hugetlb_next_nid = next_nid;
h->next_nid_to_alloc = next_nid;
return next_nid;
}

Expand All @@ -667,14 +649,15 @@ static int alloc_fresh_huge_page(struct hstate *h)
int next_nid;
int ret = 0;

start_nid = h->hugetlb_next_nid;
start_nid = h->next_nid_to_alloc;
next_nid = start_nid;

do {
page = alloc_fresh_huge_page_node(h, h->hugetlb_next_nid);
page = alloc_fresh_huge_page_node(h, next_nid);
if (page)
ret = 1;
next_nid = hstate_next_node(h);
} while (!page && h->hugetlb_next_nid != start_nid);
next_nid = hstate_next_node_to_alloc(h);
} while (!page && next_nid != start_nid);

if (ret)
count_vm_event(HTLB_BUDDY_PGALLOC);
Expand All @@ -684,6 +667,52 @@ static int alloc_fresh_huge_page(struct hstate *h)
return ret;
}

/*
* helper for free_pool_huge_page() - find next node
* from which to free a huge page
*/
static int hstate_next_node_to_free(struct hstate *h)
{
int next_nid;
next_nid = next_node(h->next_nid_to_free, node_online_map);
if (next_nid == MAX_NUMNODES)
next_nid = first_node(node_online_map);
h->next_nid_to_free = next_nid;
return next_nid;
}

/*
* Free huge page from pool from next node to free.
* Attempt to keep persistent huge pages more or less
* balanced over allowed nodes.
* Called with hugetlb_lock locked.
*/
static int free_pool_huge_page(struct hstate *h)
{
int start_nid;
int next_nid;
int ret = 0;

start_nid = h->next_nid_to_free;
next_nid = start_nid;

do {
if (!list_empty(&h->hugepage_freelists[next_nid])) {
struct page *page =
list_entry(h->hugepage_freelists[next_nid].next,
struct page, lru);
list_del(&page->lru);
h->free_huge_pages--;
h->free_huge_pages_node[next_nid]--;
update_and_free_page(h, page);
ret = 1;
}
next_nid = hstate_next_node_to_free(h);
} while (!ret && next_nid != start_nid);

return ret;
}

static struct page *alloc_buddy_huge_page(struct hstate *h,
struct vm_area_struct *vma, unsigned long address)
{
Expand Down Expand Up @@ -1008,7 +1037,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
void *addr;

addr = __alloc_bootmem_node_nopanic(
NODE_DATA(h->hugetlb_next_nid),
NODE_DATA(h->next_nid_to_alloc),
huge_page_size(h), huge_page_size(h), 0);

if (addr) {
Expand All @@ -1020,7 +1049,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
m = addr;
goto found;
}
hstate_next_node(h);
hstate_next_node_to_alloc(h);
nr_nodes--;
}
return 0;
Expand Down Expand Up @@ -1141,31 +1170,43 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count)
*/
static int adjust_pool_surplus(struct hstate *h, int delta)
{
static int prev_nid;
int nid = prev_nid;
int start_nid, next_nid;
int ret = 0;

VM_BUG_ON(delta != -1 && delta != 1);
do {
nid = next_node(nid, node_online_map);
if (nid == MAX_NUMNODES)
nid = first_node(node_online_map);

/* To shrink on this node, there must be a surplus page */
if (delta < 0 && !h->surplus_huge_pages_node[nid])
continue;
/* Surplus cannot exceed the total number of pages */
if (delta > 0 && h->surplus_huge_pages_node[nid] >=
if (delta < 0)
start_nid = h->next_nid_to_alloc;
else
start_nid = h->next_nid_to_free;
next_nid = start_nid;

do {
int nid = next_nid;
if (delta < 0) {
next_nid = hstate_next_node_to_alloc(h);
/*
* To shrink on this node, there must be a surplus page
*/
if (!h->surplus_huge_pages_node[nid])
continue;
}
if (delta > 0) {
next_nid = hstate_next_node_to_free(h);
/*
* Surplus cannot exceed the total number of pages
*/
if (h->surplus_huge_pages_node[nid] >=
h->nr_huge_pages_node[nid])
continue;
continue;
}

h->surplus_huge_pages += delta;
h->surplus_huge_pages_node[nid] += delta;
ret = 1;
break;
} while (nid != prev_nid);
} while (next_nid != start_nid);

prev_nid = nid;
return ret;
}

Expand Down Expand Up @@ -1227,10 +1268,8 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
min_count = max(count, min_count);
try_to_free_low(h, min_count);
while (min_count < persistent_huge_pages(h)) {
struct page *page = dequeue_huge_page(h);
if (!page)
if (!free_pool_huge_page(h))
break;
update_and_free_page(h, page);
}
while (count < persistent_huge_pages(h)) {
if (!adjust_pool_surplus(h, 1))
Expand Down Expand Up @@ -1442,7 +1481,8 @@ void __init hugetlb_add_hstate(unsigned order)
h->free_huge_pages = 0;
for (i = 0; i < MAX_NUMNODES; ++i)
INIT_LIST_HEAD(&h->hugepage_freelists[i]);
h->hugetlb_next_nid = first_node(node_online_map);
h->next_nid_to_alloc = first_node(node_online_map);
h->next_nid_to_free = first_node(node_online_map);
snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
huge_page_size(h)/1024);

Expand Down

0 comments on commit e8c5c82

Please sign in to comment.