Skip to content

Commit

Permalink
[S390] dynamic page tables.
Browse files Browse the repository at this point in the history
Add support for different number of page table levels dependent
on the highest address used for a process. This will cause a 31 bit
process to use a two level page table instead of the four level page
table that is the default after the pud has been introduced. Likewise
a normal 64 bit process will use three levels instead of four. Only
if a process runs out of the 4 tera bytes which can be addressed with
a three level page table the fourth level is dynamically added. Then
the process can use up to 8 peta byte.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
  • Loading branch information
Martin Schwidefsky committed Feb 9, 2008
1 parent 5a216a2 commit 6252d70
Show file tree
Hide file tree
Showing 13 changed files with 258 additions and 48 deletions.
11 changes: 11 additions & 0 deletions arch/s390/kernel/binfmt_elf32.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ static inline int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
}

#include <asm/processor.h>
#include <asm/pgalloc.h>
#include <linux/module.h>
#include <linux/elfcore.h>
#include <linux/binfmts.h>
Expand Down Expand Up @@ -183,6 +184,16 @@ struct elf_prpsinfo32
#undef start_thread
#define start_thread start_thread31

static inline void start_thread31(struct pt_regs *regs, unsigned long new_psw,
unsigned long new_stackp)
{
set_fs(USER_DS);
regs->psw.mask = psw_user32_bits;
regs->psw.addr = new_psw;
regs->gprs[15] = new_stackp;
crst_table_downgrade(current->mm, 1UL << 31);
}

MODULE_DESCRIPTION("Binary format loader for compatibility with 32bit Linux for S390 binaries,"
" Copyright 2000 IBM Corporation");
MODULE_AUTHOR("Gerhard Tonn <ton@de.ibm.com>");
Expand Down
3 changes: 2 additions & 1 deletion arch/s390/kernel/traps.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ int sysctl_userprocess_debug = 0;
extern pgm_check_handler_t do_protection_exception;
extern pgm_check_handler_t do_dat_exception;
extern pgm_check_handler_t do_monitor_call;
extern pgm_check_handler_t do_asce_exception;

#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; })

Expand Down Expand Up @@ -730,7 +731,7 @@ void __init trap_init(void)
pgm_check_table[0x12] = &translation_exception;
pgm_check_table[0x13] = &special_op_exception;
#ifdef CONFIG_64BIT
pgm_check_table[0x38] = &do_dat_exception;
pgm_check_table[0x38] = &do_asce_exception;
pgm_check_table[0x39] = &do_dat_exception;
pgm_check_table[0x3A] = &do_dat_exception;
pgm_check_table[0x3B] = &do_dat_exception;
Expand Down
40 changes: 40 additions & 0 deletions arch/s390/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <asm/system.h>
#include <asm/pgtable.h>
#include <asm/s390_ext.h>
#include <asm/mmu_context.h>

#ifndef CONFIG_64BIT
#define __FAIL_ADDR_MASK 0x7ffff000
Expand Down Expand Up @@ -444,6 +445,45 @@ void __kprobes do_dat_exception(struct pt_regs *regs, unsigned long error_code)
do_exception(regs, error_code & 0xff, 0);
}

#ifdef CONFIG_64BIT
void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code)
{
struct mm_struct *mm;
struct vm_area_struct *vma;
unsigned long address;
int space;

mm = current->mm;
address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK;
space = check_space(current);

if (unlikely(space == 0 || in_atomic() || !mm))
goto no_context;

local_irq_enable();

down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
up_read(&mm->mmap_sem);

if (vma) {
update_mm(mm, current);
return;
}

/* User mode accesses just cause a SIGSEGV */
if (regs->psw.mask & PSW_MASK_PSTATE) {
current->thread.prot_addr = address;
current->thread.trap_no = error_code;
do_sigsegv(regs, error_code, SEGV_MAPERR, address);
return;
}

no_context:
do_no_context(regs, error_code, address);
}
#endif

#ifdef CONFIG_PFAULT
/*
* 'pfault' pseudo page faults routines.
Expand Down
5 changes: 3 additions & 2 deletions arch/s390/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,9 @@ void __init paging_init(void)
init_mm.pgd = swapper_pg_dir;
S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK;
#ifdef CONFIG_64BIT
S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
pgd_type = _REGION2_ENTRY_EMPTY;
/* A three level page table (4TB) is enough for the kernel space. */
S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
pgd_type = _REGION3_ENTRY_EMPTY;
#else
S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH;
pgd_type = _SEGMENT_ENTRY_EMPTY;
Expand Down
65 changes: 65 additions & 0 deletions arch/s390/mm/mmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <linux/personality.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <asm/pgalloc.h>

/*
* Top of mmap area (just below the process stack).
Expand Down Expand Up @@ -62,6 +63,8 @@ static inline int mmap_is_legacy(void)
current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY;
}

#ifndef CONFIG_64BIT

/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
Expand All @@ -84,3 +87,65 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);

#else

static unsigned long
s390_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct mm_struct *mm = current->mm;
int rc;

addr = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
if (addr & ~PAGE_MASK)
return addr;
if (unlikely(mm->context.asce_limit < addr + len)) {
rc = crst_table_upgrade(mm, addr + len);
if (rc)
return (unsigned long) rc;
}
return addr;
}

static unsigned long
s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
const unsigned long len, const unsigned long pgoff,
const unsigned long flags)
{
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
int rc;

addr = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
if (addr & ~PAGE_MASK)
return addr;
if (unlikely(mm->context.asce_limit < addr + len)) {
rc = crst_table_upgrade(mm, addr + len);
if (rc)
return (unsigned long) rc;
}
return addr;
}
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
void arch_pick_mmap_layout(struct mm_struct *mm)
{
/*
* Fall back to the standard layout if the personality
* bit is set, or if the expected stack growth is unlimited:
*/
if (mmap_is_legacy()) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = s390_get_unmapped_area;
mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = s390_get_unmapped_area_topdown;
mm->unmap_area = arch_unmap_area_topdown;
}
}
EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);

#endif
74 changes: 74 additions & 0 deletions arch/s390/mm/pgtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>

#ifndef CONFIG_64BIT
#define ALLOC_ORDER 1
Expand Down Expand Up @@ -70,6 +71,79 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
free_pages((unsigned long) table, ALLOC_ORDER);
}

#ifdef CONFIG_64BIT
int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
{
unsigned long *table, *pgd;
unsigned long entry;

BUG_ON(limit > (1UL << 53));
repeat:
table = crst_table_alloc(mm, mm->context.noexec);
if (!table)
return -ENOMEM;
spin_lock(&mm->page_table_lock);
if (mm->context.asce_limit < limit) {
pgd = (unsigned long *) mm->pgd;
if (mm->context.asce_limit <= (1UL << 31)) {
entry = _REGION3_ENTRY_EMPTY;
mm->context.asce_limit = 1UL << 42;
mm->context.asce_bits = _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS |
_ASCE_TYPE_REGION3;
} else {
entry = _REGION2_ENTRY_EMPTY;
mm->context.asce_limit = 1UL << 53;
mm->context.asce_bits = _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS |
_ASCE_TYPE_REGION2;
}
crst_table_init(table, entry);
pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
mm->pgd = (pgd_t *) table;
table = NULL;
}
spin_unlock(&mm->page_table_lock);
if (table)
crst_table_free(mm, table);
if (mm->context.asce_limit < limit)
goto repeat;
update_mm(mm, current);
return 0;
}

void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
{
pgd_t *pgd;

if (mm->context.asce_limit <= limit)
return;
__tlb_flush_mm(mm);
while (mm->context.asce_limit > limit) {
pgd = mm->pgd;
switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
case _REGION_ENTRY_TYPE_R2:
mm->context.asce_limit = 1UL << 42;
mm->context.asce_bits = _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS |
_ASCE_TYPE_REGION3;
break;
case _REGION_ENTRY_TYPE_R3:
mm->context.asce_limit = 1UL << 31;
mm->context.asce_bits = _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS |
_ASCE_TYPE_SEGMENT;
break;
default:
BUG();
}
mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
crst_table_free(mm, (unsigned long *) pgd);
}
update_mm(mm, current);
}
#endif

/*
* page table entry allocation/free routines.
*/
Expand Down
2 changes: 1 addition & 1 deletion include/asm-s390/elf.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ typedef s390_regs elf_gregset_t;
use of this is to invoke "./ld.so someprog" to test out a new version of
the loader. We need to make sure that it is out of the way of the program
that it will "exec", and that there is sufficient room for the brk. */
#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2)
#define ELF_ET_DYN_BASE (STACK_TOP / 3 * 2)

/* Wow, the "main" arch needs arch dependent functions too.. :) */

Expand Down
1 change: 1 addition & 0 deletions include/asm-s390/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ typedef struct {
struct list_head crst_list;
struct list_head pgtable_list;
unsigned long asce_bits;
unsigned long asce_limit;
int noexec;
} mm_context_t;

Expand Down
8 changes: 4 additions & 4 deletions include/asm-s390/mmu_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ static inline int init_new_context(struct task_struct *tsk,
{
mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
#ifdef CONFIG_64BIT
mm->context.asce_bits |= _ASCE_TYPE_REGION2;
mm->context.asce_bits |= _ASCE_TYPE_REGION3;
#endif
mm->context.noexec = s390_noexec;
mm->context.asce_limit = STACK_TOP_MAX;
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
}

Expand All @@ -47,13 +49,12 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk)
/* Load home space page table origin. */
asm volatile(LCTL_OPCODE" 13,13,%0"
: : "m" (S390_lowcore.user_asce) );
set_fs(current->thread.mm_segment);
}

static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
if (unlikely(prev == next))
return;
cpu_set(smp_processor_id(), next->cpu_vm_mask);
update_mm(next, tsk);
}
Expand All @@ -65,7 +66,6 @@ static inline void activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
switch_mm(prev, next, current);
set_fs(current->thread.mm_segment);
}

#endif /* __S390_MMU_CONTEXT_H */
24 changes: 13 additions & 11 deletions include/asm-s390/pgalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,16 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm)

static inline unsigned long pgd_entry_type(struct mm_struct *mm)
{
if (mm->context.asce_limit <= (1UL << 31))
return _SEGMENT_ENTRY_EMPTY;
if (mm->context.asce_limit <= (1UL << 42))
return _REGION3_ENTRY_EMPTY;
return _REGION2_ENTRY_EMPTY;
}

int crst_table_upgrade(struct mm_struct *, unsigned long limit);
void crst_table_downgrade(struct mm_struct *, unsigned long limit);

static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
{
unsigned long *table = crst_table_alloc(mm, mm->context.noexec);
Expand All @@ -102,12 +109,12 @@ static inline void pgd_populate_kernel(struct mm_struct *mm,

static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
{
pgd_t *shadow_pgd = get_shadow_table(pgd);
pud_t *shadow_pud = get_shadow_table(pud);

if (shadow_pgd && shadow_pud)
pgd_populate_kernel(mm, shadow_pgd, shadow_pud);
pgd_populate_kernel(mm, pgd, pud);
if (mm->context.noexec) {
pgd = get_shadow_table(pgd);
pud = get_shadow_table(pud);
pgd_populate_kernel(mm, pgd, pud);
}
}

static inline void pud_populate_kernel(struct mm_struct *mm,
Expand All @@ -130,14 +137,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)

static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
unsigned long *crst;

INIT_LIST_HEAD(&mm->context.crst_list);
INIT_LIST_HEAD(&mm->context.pgtable_list);
crst = crst_table_alloc(mm, s390_noexec);
if (crst)
crst_table_init(crst, pgd_entry_type(mm));
return (pgd_t *) crst;
return (pgd_t *) crst_table_alloc(mm, s390_noexec);
}
#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)

Expand Down
Loading

0 comments on commit 6252d70

Please sign in to comment.