Skip to content

Commit

Permalink
[S390] System z large page support.
Browse files Browse the repository at this point in the history
This adds hugetlbfs support on System z, using both hardware large page
support if available and software large page emulation on older hardware.
Shared (large) page tables are implemented in software emulation mode,
by using page->index of the first tail page from a compound large page
to store page table information.

Signed-off-by: Gerald Schaefer <geraldsc@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
  • Loading branch information
Gerald Schaefer authored and Martin Schwidefsky committed Apr 30, 2008
1 parent 2e5061e commit 53492b1
Show file tree
Hide file tree
Showing 14 changed files with 437 additions and 42 deletions.
16 changes: 16 additions & 0 deletions arch/s390/kernel/early.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,19 @@ static noinline __init void setup_lowcore_early(void)
s390_base_pgm_handler_fn = early_pgm_check_handler;
}

static noinline __init void setup_hpage(void)
{
#ifndef CONFIG_DEBUG_PAGEALLOC
unsigned int facilities;

facilities = stfl();
if (!(facilities & (1UL << 23)) || !(facilities & (1UL << 29)))
return;
machine_flags |= MACHINE_FLAG_HPAGE;
__ctl_set_bit(0, 23);
#endif
}

static __init void detect_mvpg(void)
{
#ifndef CONFIG_64BIT
Expand Down Expand Up @@ -360,6 +373,8 @@ static __init void detect_machine_facilities(void)
facilities = stfl();
if (facilities & (1 << 28))
machine_flags |= MACHINE_FLAG_IDTE;
if (facilities & (1 << 23))
machine_flags |= MACHINE_FLAG_PFMF;
if (facilities & (1 << 4))
machine_flags |= MACHINE_FLAG_MVCOS;
#endif
Expand Down Expand Up @@ -388,6 +403,7 @@ void __init startup_init(void)
detect_diag9c();
detect_diag44();
detect_machine_facilities();
setup_hpage();
sclp_read_info_early();
sclp_facilities_detect();
memsize = sclp_memory_detect();
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kernel/head64.S
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ startup_continue:
# virtual and never return ...
.align 16
.Lentry:.quad 0x0000000180000000,_stext
.Lctl: .quad 0x04b50002 # cr0: various things
.Lctl: .quad 0x04350002 # cr0: various things
.quad 0 # cr1: primary space segment table
.quad .Lduct # cr2: dispatchable unit control table
.quad 0 # cr3: instruction authorization
Expand Down
10 changes: 7 additions & 3 deletions arch/s390/kernel/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,9 @@ static void __init setup_hwcaps(void)
elf_hwcap |= 1UL << 6;
}

if (MACHINE_HAS_HPAGE)
elf_hwcap |= 1UL << 7;

switch (cpuinfo->cpu_id.machine) {
case 0x9672:
#if !defined(CONFIG_64BIT)
Expand Down Expand Up @@ -872,8 +875,9 @@ void __cpuinit print_cpu_info(struct cpuinfo_S390 *cpuinfo)

static int show_cpuinfo(struct seq_file *m, void *v)
{
static const char *hwcap_str[7] = {
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp"
static const char *hwcap_str[8] = {
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
"edat"
};
struct cpuinfo_S390 *cpuinfo;
unsigned long n = (unsigned long) v - 1;
Expand All @@ -888,7 +892,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
num_online_cpus(), loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ))%100);
seq_puts(m, "features\t: ");
for (i = 0; i < 7; i++)
for (i = 0; i < 8; i++)
if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
seq_printf(m, "%s ", hwcap_str[i]);
seq_puts(m, "\n");
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/mm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o
obj-$(CONFIG_CMM) += cmm.o

obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
3 changes: 3 additions & 0 deletions arch/s390/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <linux/hardirq.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/hugetlb.h>
#include <asm/system.h>
#include <asm/pgtable.h>
#include <asm/s390_ext.h>
Expand Down Expand Up @@ -367,6 +368,8 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write)
}

survive:
if (is_vm_hugetlb_page(vma))
address &= HPAGE_MASK;
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
Expand Down
134 changes: 134 additions & 0 deletions arch/s390/mm/hugetlbpage.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
* IBM System z Huge TLB Page Support for Kernel.
*
* Copyright 2007 IBM Corp.
* Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
*/

#include <linux/mm.h>
#include <linux/hugetlb.h>


void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *pteptr, pte_t pteval)
{
pmd_t *pmdp = (pmd_t *) pteptr;
pte_t shadow_pteval = pteval;
unsigned long mask;

if (!MACHINE_HAS_HPAGE) {
pteptr = (pte_t *) pte_page(pteval)[1].index;
mask = pte_val(pteval) &
(_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
if (mm->context.noexec) {
pteptr += PTRS_PER_PTE;
pte_val(shadow_pteval) =
(_SEGMENT_ENTRY + __pa(pteptr)) | mask;
}
}

pmd_val(*pmdp) = pte_val(pteval);
if (mm->context.noexec) {
pmdp = get_shadow_table(pmdp);
pmd_val(*pmdp) = pte_val(shadow_pteval);
}
}

int arch_prepare_hugepage(struct page *page)
{
unsigned long addr = page_to_phys(page);
pte_t pte;
pte_t *ptep;
int i;

if (MACHINE_HAS_HPAGE)
return 0;

ptep = (pte_t *) pte_alloc_one(&init_mm, address);
if (!ptep)
return -ENOMEM;

pte = mk_pte(page, PAGE_RW);
for (i = 0; i < PTRS_PER_PTE; i++) {
set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte);
pte_val(pte) += PAGE_SIZE;
}
page[1].index = (unsigned long) ptep;
return 0;
}

void arch_release_hugepage(struct page *page)
{
pte_t *ptep;

if (MACHINE_HAS_HPAGE)
return;

ptep = (pte_t *) page[1].index;
if (!ptep)
return;
pte_free(&init_mm, ptep);
page[1].index = 0;
}

pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgdp;
pud_t *pudp;
pmd_t *pmdp = NULL;

pgdp = pgd_offset(mm, addr);
pudp = pud_alloc(mm, pgdp, addr);
if (pudp)
pmdp = pmd_alloc(mm, pudp, addr);
return (pte_t *) pmdp;
}

pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgdp;
pud_t *pudp;
pmd_t *pmdp = NULL;

pgdp = pgd_offset(mm, addr);
if (pgd_present(*pgdp)) {
pudp = pud_offset(pgdp, addr);
if (pud_present(*pudp))
pmdp = pmd_offset(pudp, addr);
}
return (pte_t *) pmdp;
}

int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
{
return 0;
}

struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
int write)
{
return ERR_PTR(-EINVAL);
}

int pmd_huge(pmd_t pmd)
{
if (!MACHINE_HAS_HPAGE)
return 0;

return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
}

struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
pmd_t *pmdp, int write)
{
struct page *page;

if (!MACHINE_HAS_HPAGE)
return NULL;

page = pmd_page(*pmdp);
if (page)
page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
return page;
}
23 changes: 0 additions & 23 deletions arch/s390/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,28 +77,6 @@ void show_mem(void)
printk("%lu pages pagetables\n", global_page_state(NR_PAGETABLE));
}

static void __init setup_ro_region(void)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pte_t new_pte;
unsigned long address, end;

address = ((unsigned long)&_stext) & PAGE_MASK;
end = PFN_ALIGN((unsigned long)&_eshared);

for (; address < end; address += PAGE_SIZE) {
pgd = pgd_offset_k(address);
pud = pud_offset(pgd, address);
pmd = pmd_offset(pud, address);
pte = pte_offset_kernel(pmd, address);
new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO));
*pte = new_pte;
}
}

/*
* paging_init() sets up the page tables
*/
Expand All @@ -121,7 +99,6 @@ void __init paging_init(void)
clear_table((unsigned long *) init_mm.pgd, pgd_type,
sizeof(unsigned long)*2048);
vmem_map_init();
setup_ro_region();

/* enable virtual mapping in kernel mode */
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
Expand Down
55 changes: 48 additions & 7 deletions arch/s390/mm/vmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/hugetlb.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/setup.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>

static DEFINE_MUTEX(vmem_mutex);

Expand Down Expand Up @@ -113,7 +115,7 @@ static pte_t __init_refok *vmem_pte_alloc(void)
/*
* Add a physical memory range to the 1:1 mapping.
*/
static int vmem_add_range(unsigned long start, unsigned long size)
static int vmem_add_range(unsigned long start, unsigned long size, int ro)
{
unsigned long address;
pgd_t *pg_dir;
Expand All @@ -140,7 +142,19 @@ static int vmem_add_range(unsigned long start, unsigned long size)
pud_populate_kernel(&init_mm, pu_dir, pm_dir);
}

pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
pm_dir = pmd_offset(pu_dir, address);

#ifdef __s390x__
if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) &&
(address + HPAGE_SIZE <= start + size) &&
(address >= HPAGE_SIZE)) {
pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
pmd_val(*pm_dir) = pte_val(pte);
address += HPAGE_SIZE - PAGE_SIZE;
continue;
}
#endif
if (pmd_none(*pm_dir)) {
pt_dir = vmem_pte_alloc();
if (!pt_dir)
Expand All @@ -149,7 +163,6 @@ static int vmem_add_range(unsigned long start, unsigned long size)
}

pt_dir = pte_offset_kernel(pm_dir, address);
pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL);
*pt_dir = pte;
}
ret = 0;
Expand Down Expand Up @@ -180,6 +193,13 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
pm_dir = pmd_offset(pu_dir, address);
if (pmd_none(*pm_dir))
continue;

if (pmd_huge(*pm_dir)) {
pmd_clear_kernel(pm_dir);
address += HPAGE_SIZE - PAGE_SIZE;
continue;
}

pt_dir = pte_offset_kernel(pm_dir, address);
*pt_dir = pte;
}
Expand Down Expand Up @@ -248,14 +268,14 @@ static int vmem_add_mem_map(unsigned long start, unsigned long size)
return ret;
}

static int vmem_add_mem(unsigned long start, unsigned long size)
static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
{
int ret;

ret = vmem_add_mem_map(start, size);
if (ret)
return ret;
return vmem_add_range(start, size);
return vmem_add_range(start, size, ro);
}

/*
Expand Down Expand Up @@ -338,7 +358,7 @@ int add_shared_memory(unsigned long start, unsigned long size)
if (ret)
goto out_free;

ret = vmem_add_mem(start, size);
ret = vmem_add_mem(start, size, 0);
if (ret)
goto out_remove;

Expand Down Expand Up @@ -374,14 +394,35 @@ int add_shared_memory(unsigned long start, unsigned long size)
*/
void __init vmem_map_init(void)
{
unsigned long ro_start, ro_end;
unsigned long start, end;
int i;

INIT_LIST_HEAD(&init_mm.context.crst_list);
INIT_LIST_HEAD(&init_mm.context.pgtable_list);
init_mm.context.noexec = 0;
NODE_DATA(0)->node_mem_map = VMEM_MAP;
for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++)
vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size);
ro_start = ((unsigned long)&_stext) & PAGE_MASK;
ro_end = PFN_ALIGN((unsigned long)&_eshared);
for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
start = memory_chunk[i].addr;
end = memory_chunk[i].addr + memory_chunk[i].size;
if (start >= ro_end || end <= ro_start)
vmem_add_mem(start, end - start, 0);
else if (start >= ro_start && end <= ro_end)
vmem_add_mem(start, end - start, 1);
else if (start >= ro_start) {
vmem_add_mem(start, ro_end - start, 1);
vmem_add_mem(ro_end, end - ro_end, 0);
} else if (end < ro_end) {
vmem_add_mem(start, ro_start - start, 0);
vmem_add_mem(ro_start, end - ro_start, 1);
} else {
vmem_add_mem(start, ro_start - start, 0);
vmem_add_mem(ro_start, ro_end - ro_start, 1);
vmem_add_mem(ro_end, end - ro_end, 0);
}
}
}

/*
Expand Down
Loading

0 comments on commit 53492b1

Please sign in to comment.