Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 25321
b: refs/heads/master
c: 68a3a7f
h: refs/heads/master
i:
  25319: 6e88434
v: v3
  • Loading branch information
Andi Kleen authored and Linus Torvalds committed Apr 9, 2006
1 parent 7307208 commit 42737bd
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 9 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 9d99aaa31f5994d1923c3713ce9144c4c42332e1
refs/heads/master: 68a3a7feb08f960095072f28ec20f7900793c506
5 changes: 5 additions & 0 deletions trunk/Documentation/x86_64/boot-options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ NUMA

numa=fake=X Fake X nodes and ignore NUMA setup of the actual machine.

numa=hotadd=percent
Only allow hotadd memory to preallocate page structures upto
percent of already available memory.
numa=hotadd=0 will disable hotadd memory.

ACPI

acpi=off Don't enable ACPI
Expand Down
3 changes: 1 addition & 2 deletions trunk/arch/x86_64/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -530,8 +530,7 @@ int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
unsigned long pfn;
unsigned long total = 0, mem = 0;
for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
unsigned long addr = pfn << PAGE_SHIFT;
if (pfn_valid(pfn) && e820_mapped(addr, addr+1, E820_RAM)) {
if (pfn_valid(pfn)) {
online_page(pfn_to_page(pfn));
err = 0;
mem++;
Expand Down
5 changes: 5 additions & 0 deletions trunk/arch/x86_64/mm/numa.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en

reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
#ifdef CONFIG_ACPI_NUMA
srat_reserve_add_area(nodeid);
#endif
node_set_online(nodeid);
}

Expand Down Expand Up @@ -335,6 +338,8 @@ __init int numa_setup(char *opt)
#ifdef CONFIG_ACPI_NUMA
if (!strncmp(opt,"noacpi",6))
acpi_numa = -1;
if (!strncmp(opt,"hotadd=", 7))
hotadd_percent = simple_strtoul(opt+7, NULL, 10);
#endif
return 1;
}
Expand Down
164 changes: 158 additions & 6 deletions trunk/arch/x86_64/mm/srat.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,26 @@
#include <linux/bitmap.h>
#include <linux/module.h>
#include <linux/topology.h>
#include <linux/bootmem.h>
#include <linux/mm.h>
#include <asm/proto.h>
#include <asm/numa.h>
#include <asm/e820.h>

#if (defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \
defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)) \
&& !defined(CONFIG_MEMORY_HOTPLUG)
#define RESERVE_HOTADD 1
#endif

static struct acpi_table_slit *acpi_slit;

static nodemask_t nodes_parsed __initdata;
static nodemask_t nodes_found __initdata;
static struct bootnode nodes[MAX_NUMNODES] __initdata;
static struct bootnode nodes_add[MAX_NUMNODES] __initdata;
static int found_add_area __initdata;
int hotadd_percent __initdata = 10;
static u8 pxm2node[256] = { [0 ... 255] = 0xff };

/* Too small nodes confuse the VM badly. Usually they result
Expand Down Expand Up @@ -71,6 +82,10 @@ static __init int conflicting_nodes(unsigned long start, unsigned long end)
static __init void cutoff_node(int i, unsigned long start, unsigned long end)
{
struct bootnode *nd = &nodes[i];

if (found_add_area)
return;

if (nd->start < start) {
nd->start = start;
if (nd->end < nd->start)
Expand All @@ -90,6 +105,8 @@ static __init void bad_srat(void)
acpi_numa = -1;
for (i = 0; i < MAX_LOCAL_APIC; i++)
apicid_to_node[i] = NUMA_NO_NODE;
for (i = 0; i < MAX_NUMNODES; i++)
nodes_add[i].start = nodes[i].end = 0;
}

static __init inline int srat_disabled(void)
Expand Down Expand Up @@ -155,11 +172,114 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
pxm, pa->apic_id, node);
}

#ifdef RESERVE_HOTADD
/*
* Protect against too large hotadd areas that would fill up memory.
*/
static int hotadd_enough_memory(struct bootnode *nd)
{
static unsigned long allocated;
static unsigned long last_area_end;
unsigned long pages = (nd->end - nd->start) >> PAGE_SHIFT;
long mem = pages * sizeof(struct page);
unsigned long addr;
unsigned long allowed;
unsigned long oldpages = pages;

if (mem < 0)
return 0;
allowed = (end_pfn - e820_hole_size(0, end_pfn)) * PAGE_SIZE;
allowed = (allowed / 100) * hotadd_percent;
if (allocated + mem > allowed) {
/* Give them at least part of their hotadd memory upto hotadd_percent
It would be better to spread the limit out
over multiple hotplug areas, but that is too complicated
right now */
if (allocated >= allowed)
return 0;
pages = (allowed - allocated + mem) / sizeof(struct page);
mem = pages * sizeof(struct page);
nd->end = nd->start + pages*PAGE_SIZE;
}
/* Not completely fool proof, but a good sanity check */
addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem);
if (addr == -1UL)
return 0;
if (pages != oldpages)
printk(KERN_NOTICE "SRAT: Hotadd area limited to %lu bytes\n",
pages << PAGE_SHIFT);
last_area_end = addr + mem;
allocated += mem;
return 1;
}

/*
* It is fine to add this area to the nodes data it will be used later
* This code supports one contigious hot add area per node.
*/
static int reserve_hotadd(int node, unsigned long start, unsigned long end)
{
unsigned long s_pfn = start >> PAGE_SHIFT;
unsigned long e_pfn = end >> PAGE_SHIFT;
int changed = 0;
struct bootnode *nd = &nodes_add[node];

/* I had some trouble with strange memory hotadd regions breaking
the boot. Be very strict here and reject anything unexpected.
If you want working memory hotadd write correct SRATs.
The node size check is a basic sanity check to guard against
mistakes */
if ((signed long)(end - start) < NODE_MIN_SIZE) {
printk(KERN_ERR "SRAT: Hotplug area too small\n");
return -1;
}

/* This check might be a bit too strict, but I'm keeping it for now. */
if (e820_hole_size(s_pfn, e_pfn) != e_pfn - s_pfn) {
printk(KERN_ERR "SRAT: Hotplug area has existing memory\n");
return -1;
}

if (!hotadd_enough_memory(&nodes_add[node])) {
printk(KERN_ERR "SRAT: Hotplug area too large\n");
return -1;
}

/* Looks good */

found_add_area = 1;
if (nd->start == nd->end) {
nd->start = start;
nd->end = end;
changed = 1;
} else {
if (nd->start == end) {
nd->start = start;
changed = 1;
}
if (nd->end == start) {
nd->end = end;
changed = 1;
}
if (!changed)
printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
}

if ((nd->end >> PAGE_SHIFT) > end_pfn)
end_pfn = nd->end >> PAGE_SHIFT;

if (changed)
printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
return 0;
}
#endif

/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
void __init
acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
{
struct bootnode *nd;
struct bootnode *nd, oldnode;
unsigned long start, end;
int node, pxm;
int i;
Expand All @@ -172,6 +292,8 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
}
if (ma->flags.enabled == 0)
return;
if (ma->flags.hot_pluggable && hotadd_percent == 0)
return;
start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32);
end = start + (ma->length_lo | ((u64)ma->length_hi << 32));
pxm = ma->proximity_domain;
Expand All @@ -181,10 +303,6 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
bad_srat();
return;
}
/* It is fine to add this area to the nodes data it will be used later*/
if (ma->flags.hot_pluggable == 1)
printk(KERN_INFO "SRAT: hot plug zone found %lx - %lx \n",
start, end);
i = conflicting_nodes(start, end);
if (i == node) {
printk(KERN_WARNING
Expand All @@ -199,6 +317,7 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
return;
}
nd = &nodes[node];
oldnode = *nd;
if (!node_test_and_set(node, nodes_parsed)) {
nd->start = start;
nd->end = end;
Expand All @@ -208,8 +327,19 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
if (nd->end < end)
nd->end = end;
}

printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
nd->start, nd->end);

#ifdef RESERVE_HOTADD
if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) {
/* Ignore hotadd region. Undo damage */
printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
*nd = oldnode;
if ((nd->start | nd->end) == 0)
node_clear(node, nodes_parsed);
}
#endif
}

/* Sanity check to catch more bad SRATs (they are amazingly common).
Expand All @@ -225,6 +355,9 @@ static int nodes_cover_memory(void)
unsigned long e = nodes[i].end >> PAGE_SHIFT;
pxmram += e - s;
pxmram -= e820_hole_size(s, e);
pxmram -= nodes_add[i].end - nodes_add[i].start;
if ((long)pxmram < 0)
pxmram = 0;
}

e820ram = end_pfn - e820_hole_size(0, end_pfn);
Expand Down Expand Up @@ -258,7 +391,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)

/* First clean up the node list */
for (i = 0; i < MAX_NUMNODES; i++) {
cutoff_node(i, start, end);
cutoff_node(i, start, end);
if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE)
unparse_node(i);
}
Expand Down Expand Up @@ -303,6 +436,25 @@ static int node_to_pxm(int n)
return 0;
}

void __init srat_reserve_add_area(int nodeid)
{
if (found_add_area && nodes_add[nodeid].end) {
u64 total_mb;

printk(KERN_INFO "SRAT: Reserving hot-add memory space "
"for node %d at %Lx-%Lx\n",
nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
>> PAGE_SHIFT;
total_mb *= sizeof(struct page);
total_mb >>= 20;
printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
"pre-allocated memory.\n", (unsigned long long)total_mb);
reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
nodes_add[nodeid].end - nodes_add[nodeid].start);
}
}

int __node_distance(int a, int b)
{
int index;
Expand Down
2 changes: 2 additions & 0 deletions trunk/include/asm-x86_64/numa.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ extern void numa_init_array(void);
extern int numa_off;

extern void numa_set_node(int cpu, int node);
extern void srat_reserve_add_area(int nodeid);
extern int hotadd_percent;

extern unsigned char apicid_to_node[256];
#ifdef CONFIG_NUMA
Expand Down

0 comments on commit 42737bd

Please sign in to comment.