Skip to content

Commit

Permalink
s390/numa: add core infrastructure
Browse files Browse the repository at this point in the history
Enable core NUMA support for s390 and add one simple default mode "plain"
that creates one single NUMA node.

This patch contains several changes from Michael Holzheu.

Signed-off-by: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
  • Loading branch information
Philipp Hachtmann authored and Martin Schwidefsky committed Aug 3, 2015
1 parent 199071f commit 3a368f7
Show file tree
Hide file tree
Showing 15 changed files with 375 additions and 26 deletions.
1 change: 1 addition & 0 deletions arch/s390/Kbuild
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ obj-$(CONFIG_S390_HYPFS_FS) += hypfs/
obj-$(CONFIG_APPLDATA_BASE) += appldata/
obj-y += net/
obj-$(CONFIG_PCI) += pci/
obj-$(CONFIG_NUMA) += numa/
37 changes: 37 additions & 0 deletions arch/s390/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ config S390
select TTY
select VIRT_CPU_ACCOUNTING
select VIRT_TO_BUS
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_WANTS_PROT_NUMA_PROT_NONE
select HAVE_ARCH_EARLY_PFN_TO_NID


config SCHED_OMIT_FRAME_POINTER
def_bool y
Expand Down Expand Up @@ -386,6 +390,39 @@ config HOTPLUG_CPU
config SCHED_SMT
def_bool n

# Some NUMA nodes have memory ranges that span
# other nodes. Even though a pfn is valid and
# between a node's start and end pfns, it may not
# reside on that node. See memmap_init_zone()
# for details. <- They meant memory holes!
config NODES_SPAN_OTHER_NODES
def_bool NUMA

config NUMA
bool "NUMA support"
depends on SMP && 64BIT && SCHED_TOPOLOGY
default n
help
Enable NUMA support

This option adds NUMA support to the kernel.

An operation mode can be selected by appending
numa=<method> to the kernel command line.

The default behaviour is identical to appending numa=plain to
the command line. This will create just one node with all
available memory and all CPUs in it.

config NODES_SHIFT
int "Maximum NUMA nodes (as a power of 2)"
range 1 10
depends on NUMA
default "4"
help
Specify the maximum number of NUMA nodes available on the target
system. Increases memory reserved to accommodate various tables.

config SCHED_MC
def_bool n

Expand Down
16 changes: 16 additions & 0 deletions arch/s390/include/asm/mmzone.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* NUMA support for s390
*
* Copyright IBM Corp. 2015
*/

#ifndef _ASM_S390_MMZONE_H
#define _ASM_S390_MMZONE_H

#ifdef CONFIG_NUMA

extern struct pglist_data *node_data[];
#define NODE_DATA(nid) (node_data[nid])

#endif /* CONFIG_NUMA */
#endif /* _ASM_S390_MMZONE_H */
31 changes: 31 additions & 0 deletions arch/s390/include/asm/numa.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* NUMA support for s390
*
* Declare the NUMA core code structures and functions.
*
* Copyright IBM Corp. 2015
*/

#ifndef _ASM_S390_NUMA_H
#define _ASM_S390_NUMA_H

#ifdef CONFIG_NUMA

#include <linux/numa.h>
#include <linux/cpumask.h>

void numa_setup(void);
int numa_pfn_to_nid(unsigned long pfn);
int __node_distance(int a, int b);
void numa_update_cpu_topology(void);

extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
extern int numa_debug_enabled;

#else

static inline void numa_setup(void) { }
static inline void numa_update_cpu_topology(void) { }

#endif /* CONFIG_NUMA */
#endif /* _ASM_S390_NUMA_H */
16 changes: 16 additions & 0 deletions arch/s390/include/asm/pci.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,4 +192,20 @@ void zpci_debug_init_device(struct zpci_dev *);
void zpci_debug_exit_device(struct zpci_dev *);
void zpci_debug_info(struct zpci_dev *, struct seq_file *);

#ifdef CONFIG_NUMA

/* Returns the node based on PCI bus */
static inline int __pcibus_to_node(const struct pci_bus *bus)
{
return NUMA_NO_NODE;
}

static inline const struct cpumask *
cpumask_of_pcibus(const struct pci_bus *bus)
{
return cpu_online_mask;
}

#endif /* CONFIG_NUMA */

#endif
39 changes: 39 additions & 0 deletions arch/s390/include/asm/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define _ASM_S390_TOPOLOGY_H

#include <linux/cpumask.h>
#include <asm/numa.h>

struct sysinfo_15_1_x;
struct cpu;
Expand All @@ -13,6 +14,7 @@ struct cpu_topology_s390 {
unsigned short core_id;
unsigned short socket_id;
unsigned short book_id;
unsigned short node_id;
cpumask_t thread_mask;
cpumask_t core_mask;
cpumask_t book_mask;
Expand Down Expand Up @@ -52,6 +54,43 @@ static inline void topology_expect_change(void) { }
#define POLARIZATION_VM (2)
#define POLARIZATION_VH (3)

#define SD_BOOK_INIT SD_CPU_INIT

#ifdef CONFIG_NUMA

#define cpu_to_node cpu_to_node
static inline int cpu_to_node(int cpu)
{
return per_cpu(cpu_topology, cpu).node_id;
}

/* Returns a pointer to the cpumask of CPUs on node 'node'. */
#define cpumask_of_node cpumask_of_node
static inline const struct cpumask *cpumask_of_node(int node)
{
return node_to_cpumask_map[node];
}

/*
* Returns the number of the node containing node 'node'. This
* architecture is flat, so it is a pretty simple function!
*/
#define parent_node(node) (node)

#define pcibus_to_node(bus) __pcibus_to_node(bus)

#define node_distance(a, b) __node_distance(a, b)

#else /* !CONFIG_NUMA */

#define numa_node_id numa_node_id
static inline int numa_node_id(void)
{
return 0;
}

#endif /* CONFIG_NUMA */

#include <asm-generic/topology.h>

#endif /* _ASM_S390_TOPOLOGY_H */
12 changes: 6 additions & 6 deletions arch/s390/include/asm/unistd.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@

#define __IGNORE_time

/* Ignore NUMA system calls. Not wired up on s390. */
#define __IGNORE_mbind
#define __IGNORE_get_mempolicy
#define __IGNORE_set_mempolicy
#define __IGNORE_migrate_pages
#define __IGNORE_move_pages
/* NUMA system calls */
#define _ARCH_WANT_mbind
#define __ARCH_WANT_get_mempolicy
#define __ARCH_WANT_set_mempolicy
#define __ARCH_WANT_migrate_pages
#define __ARCH_WANT_move_pages

/* Ignore system calls that are also reachable via sys_socket */
#define __IGNORE_recvmmsg
Expand Down
10 changes: 5 additions & 5 deletions arch/s390/include/uapi/asm/unistd.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,9 @@
#define __NR_statfs64 265
#define __NR_fstatfs64 266
#define __NR_remap_file_pages 267
/* Number 268 is reserved for new sys_mbind */
/* Number 269 is reserved for new sys_get_mempolicy */
/* Number 270 is reserved for new sys_set_mempolicy */
#define __NR_mbind 268
#define __NR_get_mempolicy 269
#define __NR_set_mempolicy 270
#define __NR_mq_open 271
#define __NR_mq_unlink 272
#define __NR_mq_timedsend 273
Expand All @@ -223,7 +223,7 @@
#define __NR_inotify_init 284
#define __NR_inotify_add_watch 285
#define __NR_inotify_rm_watch 286
/* Number 287 is reserved for new sys_migrate_pages */
#define __NR_migrate_pages 287
#define __NR_openat 288
#define __NR_mkdirat 289
#define __NR_mknodat 290
Expand All @@ -245,7 +245,7 @@
#define __NR_sync_file_range 307
#define __NR_tee 308
#define __NR_vmsplice 309
/* Number 310 is reserved for new sys_move_pages */
#define __NR_move_pages 310
#define __NR_getcpu 311
#define __NR_epoll_pwait 312
#define __NR_utimes 313
Expand Down
2 changes: 2 additions & 0 deletions arch/s390/kernel/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
#include <asm/os_info.h>
#include <asm/sclp.h>
#include <asm/sysinfo.h>
#include <asm/numa.h>
#include "entry.h"

/*
Expand Down Expand Up @@ -879,6 +880,7 @@ void __init setup_arch(char **cmdline_p)
setup_lowcore();
smp_fill_possible_mask();
cpu_init();
numa_setup();

/*
* Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
Expand Down
10 changes: 5 additions & 5 deletions arch/s390/kernel/syscalls.S
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,9 @@ SYSCALL(sys_ni_syscall,compat_sys_s390_fadvise64_64)
SYSCALL(sys_statfs64,compat_sys_statfs64)
SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
SYSCALL(sys_remap_file_pages,compat_sys_remap_file_pages)
NI_SYSCALL /* 268 sys_mbind */
NI_SYSCALL /* 269 sys_get_mempolicy */
NI_SYSCALL /* 270 sys_set_mempolicy */
SYSCALL(sys_mbind,compat_sys_mbind)
SYSCALL(sys_get_mempolicy,compat_sys_get_mempolicy)
SYSCALL(sys_set_mempolicy,compat_sys_set_mempolicy)
SYSCALL(sys_mq_open,compat_sys_mq_open)
SYSCALL(sys_mq_unlink,compat_sys_mq_unlink)
SYSCALL(sys_mq_timedsend,compat_sys_mq_timedsend)
Expand All @@ -295,7 +295,7 @@ SYSCALL(sys_ioprio_get,compat_sys_ioprio_get)
SYSCALL(sys_inotify_init,sys_inotify_init)
SYSCALL(sys_inotify_add_watch,compat_sys_inotify_add_watch) /* 285 */
SYSCALL(sys_inotify_rm_watch,compat_sys_inotify_rm_watch)
NI_SYSCALL /* 287 sys_migrate_pages */
SYSCALL(sys_migrate_pages,compat_sys_migrate_pages)
SYSCALL(sys_openat,compat_sys_openat)
SYSCALL(sys_mkdirat,compat_sys_mkdirat)
SYSCALL(sys_mknodat,compat_sys_mknodat) /* 290 */
Expand All @@ -318,7 +318,7 @@ SYSCALL(sys_splice,compat_sys_splice)
SYSCALL(sys_sync_file_range,compat_sys_s390_sync_file_range)
SYSCALL(sys_tee,compat_sys_tee)
SYSCALL(sys_vmsplice,compat_sys_vmsplice)
NI_SYSCALL /* 310 sys_move_pages */
SYSCALL(sys_move_pages,compat_sys_move_pages)
SYSCALL(sys_getcpu,compat_sys_getcpu)
SYSCALL(sys_epoll_pwait,compat_sys_epoll_pwait)
SYSCALL(sys_utimes,compat_sys_utimes)
Expand Down
21 changes: 12 additions & 9 deletions arch/s390/kernel/topology.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/mm.h>
#include <linux/nodemask.h>
#include <linux/node.h>
#include <asm/sysinfo.h>
#include <asm/numa.h>

#define PTF_HORIZONTAL (0UL)
#define PTF_VERTICAL (1UL)
Expand Down Expand Up @@ -260,6 +263,7 @@ static void update_cpu_masks(void)
}
}
spin_unlock_irqrestore(&topology_lock, flags);
numa_update_cpu_topology();
}

void store_topology(struct sysinfo_15_1_x *info)
Expand All @@ -274,21 +278,21 @@ int arch_update_cpu_topology(void)
{
struct sysinfo_15_1_x *info = tl_info;
struct device *dev;
int cpu;
int cpu, rc = 0;

if (!MACHINE_HAS_TOPOLOGY) {
update_cpu_masks();
topology_update_polarization_simple();
return 0;
if (MACHINE_HAS_TOPOLOGY) {
rc = 1;
store_topology(info);
tl_to_masks(info);
}
store_topology(info);
tl_to_masks(info);
update_cpu_masks();
if (!MACHINE_HAS_TOPOLOGY)
topology_update_polarization_simple();
for_each_online_cpu(cpu) {
dev = get_cpu_device(cpu);
kobject_uevent(&dev->kobj, KOBJ_CHANGE);
}
return 1;
return rc;
}

static void topology_work_fn(struct work_struct *work)
Expand Down Expand Up @@ -450,7 +454,6 @@ static struct sched_domain_topology_level s390_topology[] = {
{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
{ cpu_book_mask, SD_INIT_NAME(BOOK) },
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
{ NULL, },
};

Expand Down
2 changes: 1 addition & 1 deletion arch/s390/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ void __init mem_init(void)
cpumask_set_cpu(0, mm_cpumask(&init_mm));
atomic_set(&init_mm.context.attach_count, 1);

max_mapnr = max_low_pfn;
set_max_mapnr(max_low_pfn);
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);

/* Setup guest page hinting */
Expand Down
1 change: 1 addition & 0 deletions arch/s390/numa/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
obj-y += numa.o
Loading

0 comments on commit 3a368f7

Please sign in to comment.