Skip to content

Commit

Permalink
powerpc/mm: Add support for handling > 512TB address in SLB miss
Browse files Browse the repository at this point in the history
For addresses above 512TB we allocate additional mmu contexts. To make
it all easy, addresses above 512TB are handled with IR/DR=1 and with
stack frame setup.

The mmu_context_t is also updated to track the new extended_ids. To
support upto 4PB we need a total 8 contexts.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[mpe: Minor formatting tweaks and comment wording, switch BUG to WARN
      in get_ea_context().]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
  • Loading branch information
Aneesh Kumar K.V authored and Michael Ellerman committed Mar 30, 2018
1 parent 0dea04b commit f384796
Show file tree
Hide file tree
Showing 15 changed files with 245 additions and 27 deletions.
6 changes: 6 additions & 0 deletions arch/powerpc/include/asm/book3s/64/hash-4k.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
#define H_PUD_INDEX_SIZE 9
#define H_PGD_INDEX_SIZE 9

/*
* Each context is 512TB. But on 4k we restrict our max TASK size to 64TB
* Hence also limit max EA bits to 64TB.
*/
#define MAX_EA_BITS_PER_CONTEXT 46

#ifndef __ASSEMBLY__
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE)
Expand Down
6 changes: 6 additions & 0 deletions arch/powerpc/include/asm/book3s/64/hash-64k.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
#define H_PUD_INDEX_SIZE 7
#define H_PGD_INDEX_SIZE 8

/*
* Each context is 512TB size. SLB miss for first context/default context
* is handled in the hotpath.
*/
#define MAX_EA_BITS_PER_CONTEXT 49

/*
* 64k aligned address free up few of the lower bits of RPN for us
* We steal that here. For more deatils look at pte_pfn/pfn_pte()
Expand Down
33 changes: 32 additions & 1 deletion arch/powerpc/include/asm/book3s/64/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,18 @@ struct slice_mask {
};

typedef struct {
mm_context_id_t id;
union {
/*
* We use id as the PIDR content for radix. On hash we can use
* more than one id. The extended ids are used when we start
* having address above 512TB. We allocate one extended id
* for each 512TB. The new id is then used with the 49 bit
* EA to build a new VA. We always use ESID_BITS_1T_MASK bits
* from EA and new context ids to build the new VAs.
*/
mm_context_id_t id;
mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
};
u16 user_psize; /* page size index */

/* Number of bits in the mm_cpumask */
Expand Down Expand Up @@ -196,5 +207,25 @@ extern void radix_init_pseries(void);
static inline void radix_init_pseries(void) { };
#endif

static inline int get_ea_context(mm_context_t *ctx, unsigned long ea)
{
int index = ea >> MAX_EA_BITS_PER_CONTEXT;

if (likely(index < ARRAY_SIZE(ctx->extended_id)))
return ctx->extended_id[index];

/* should never happen */
WARN_ON(1);
return 0;
}

static inline unsigned long get_user_vsid(mm_context_t *ctx,
unsigned long ea, int ssize)
{
unsigned long context = get_ea_context(ctx, ea);

return get_vsid(context, ea, ssize);
}

#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
39 changes: 39 additions & 0 deletions arch/powerpc/include/asm/mmu_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,51 @@ extern int hash__alloc_context_id(void);
extern void hash__reserve_context_id(int id);
extern void __destroy_context(int context_id);
static inline void mmu_context_init(void) { }

static inline int alloc_extended_context(struct mm_struct *mm,
unsigned long ea)
{
int context_id;

int index = ea >> MAX_EA_BITS_PER_CONTEXT;

context_id = hash__alloc_context_id();
if (context_id < 0)
return context_id;

VM_WARN_ON(mm->context.extended_id[index]);
mm->context.extended_id[index] = context_id;
return context_id;
}

static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
{
int context_id;

context_id = get_ea_context(&mm->context, ea);
if (!context_id)
return true;
return false;
}

#else
extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk);
extern unsigned long __init_new_context(void);
extern void __destroy_context(unsigned long context_id);
extern void mmu_context_init(void);
static inline int alloc_extended_context(struct mm_struct *mm,
unsigned long ea)
{
/* non book3s_64 should never find this called */
WARN_ON(1);
return -ENOMEM;
}

static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
{
return false;
}
#endif

#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
Expand Down
6 changes: 6 additions & 0 deletions arch/powerpc/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,15 @@ void release_thread(struct task_struct *);
*/
#define TASK_SIZE_USER64 TASK_SIZE_512TB
#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB
#define TASK_CONTEXT_SIZE TASK_SIZE_512TB
#else
#define TASK_SIZE_USER64 TASK_SIZE_64TB
#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB
/*
* We don't need to allocate extended context ids for 4K page size, because
* we limit the max effective address on this config to 64TB.
*/
#define TASK_CONTEXT_SIZE TASK_SIZE_64TB
#endif

/*
Expand Down
11 changes: 7 additions & 4 deletions arch/powerpc/kernel/exceptions-64s.S
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
mtlr r10

beq- 8f /* if bad address, make full stack frame */
/*
* Large address, check whether we have to allocate new contexts.
*/
beq- 8f

bne- cr5,2f /* if unrecoverable exception, oops */

Expand Down Expand Up @@ -685,7 +688,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
mr r3,r12
mfspr r11,SPRN_SRR0
mfspr r12,SPRN_SRR1
LOAD_HANDLER(r10,bad_addr_slb)
LOAD_HANDLER(r10, large_addr_slb)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10
Expand All @@ -700,7 +703,7 @@ EXC_COMMON_BEGIN(unrecov_slb)
bl unrecoverable_exception
b 1b

EXC_COMMON_BEGIN(bad_addr_slb)
EXC_COMMON_BEGIN(large_addr_slb)
EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
RECONCILE_IRQ_STATE(r10, r11)
ld r3, PACA_EXSLB+EX_DAR(r13)
Expand All @@ -710,7 +713,7 @@ EXC_COMMON_BEGIN(bad_addr_slb)
std r10, _TRAP(r1)
2: bl save_nvgprs
addi r3, r1, STACK_FRAME_OVERHEAD
bl slb_miss_bad_addr
bl slb_miss_large_addr
b ret_from_except

EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
Expand Down
12 changes: 0 additions & 12 deletions arch/powerpc/kernel/traps.c
Original file line number Diff line number Diff line change
Expand Up @@ -1495,18 +1495,6 @@ void alignment_exception(struct pt_regs *regs)
exception_exit(prev_state);
}

void slb_miss_bad_addr(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();

if (user_mode(regs))
_exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
else
bad_page_fault(regs, regs->dar, SIGSEGV);

exception_exit(prev_state);
}

void StackOverflow(struct pt_regs *regs)
{
printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n",
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/mm/copro_fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
return 1;
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
vsid = get_vsid(mm->context.id, ea, ssize);
vsid = get_user_vsid(&mm->context, ea, ssize);
vsidkey = SLB_VSID_USER;
break;
case VMALLOC_REGION_ID:
Expand Down
4 changes: 2 additions & 2 deletions arch/powerpc/mm/hash_utils_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -1267,7 +1267,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
}
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
vsid = get_vsid(mm->context.id, ea, ssize);
vsid = get_user_vsid(&mm->context, ea, ssize);
break;
case VMALLOC_REGION_ID:
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
Expand Down Expand Up @@ -1532,7 +1532,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,

/* Get VSID */
ssize = user_segment_size(ea);
vsid = get_vsid(mm->context.id, ea, ssize);
vsid = get_user_vsid(&mm->context, ea, ssize);
if (!vsid)
return;
/*
Expand Down
15 changes: 14 additions & 1 deletion arch/powerpc/mm/mmu_context_book3s64.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,19 @@ void __destroy_context(int context_id)
}
EXPORT_SYMBOL_GPL(__destroy_context);

static void destroy_contexts(mm_context_t *ctx)
{
int index, context_id;

spin_lock(&mmu_context_lock);
for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
context_id = ctx->extended_id[index];
if (context_id)
ida_remove(&mmu_context_ida, context_id);
}
spin_unlock(&mmu_context_lock);
}

#ifdef CONFIG_PPC_64K_PAGES
static void destroy_pagetable_page(struct mm_struct *mm)
{
Expand Down Expand Up @@ -217,7 +230,7 @@ void destroy_context(struct mm_struct *mm)
else
subpage_prot_free(mm);
destroy_pagetable_page(mm);
__destroy_context(mm->context.id);
destroy_contexts(&mm->context);
mm->context.id = MMU_NO_CONTEXT;
}

Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/mm/pgtable-hash64.c
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,

if (!is_kernel_addr(addr)) {
ssize = user_segment_size(addr);
vsid = get_vsid(mm->context.id, addr, ssize);
vsid = get_user_vsid(&mm->context, addr, ssize);
WARN_ON(vsid == 0);
} else {
vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
Expand Down
108 changes: 108 additions & 0 deletions arch/powerpc/mm/slb.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <asm/cacheflush.h>
#include <asm/smp.h>
#include <linux/compiler.h>
#include <linux/context_tracking.h>
#include <linux/mm_types.h>

#include <asm/udbg.h>
Expand Down Expand Up @@ -340,3 +341,110 @@ void slb_initialize(void)

asm volatile("isync":::"memory");
}

static void insert_slb_entry(unsigned long vsid, unsigned long ea,
int bpsize, int ssize)
{
unsigned long flags, vsid_data, esid_data;
enum slb_index index;
int slb_cache_index;

/*
* We are irq disabled, hence should be safe to access PACA.
*/
index = get_paca()->stab_rr;

/*
* simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
*/
if (index < (mmu_slb_size - 1))
index++;
else
index = SLB_NUM_BOLTED;

get_paca()->stab_rr = index;

flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
vsid_data = (vsid << slb_vsid_shift(ssize)) | flags |
((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
esid_data = mk_esid_data(ea, ssize, index);

asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)
: "memory");

/*
* Now update slb cache entries
*/
slb_cache_index = get_paca()->slb_cache_ptr;
if (slb_cache_index < SLB_CACHE_ENTRIES) {
/*
* We have space in slb cache for optimized switch_slb().
* Top 36 bits from esid_data as per ISA
*/
get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28;
get_paca()->slb_cache_ptr++;
} else {
/*
* Our cache is full and the current cache content strictly
* doesn't indicate the active SLB conents. Bump the ptr
* so that switch_slb() will ignore the cache.
*/
get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
}
}

static void handle_multi_context_slb_miss(int context_id, unsigned long ea)
{
struct mm_struct *mm = current->mm;
unsigned long vsid;
int bpsize;

/*
* We are always above 1TB, hence use high user segment size.
*/
vsid = get_vsid(context_id, ea, mmu_highuser_ssize);
bpsize = get_slice_psize(mm, ea);
insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize);
}

void slb_miss_large_addr(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
unsigned long ea = regs->dar;
int context;

if (REGION_ID(ea) != USER_REGION_ID)
goto slb_bad_addr;

/*
* Are we beyound what the page table layout supports ?
*/
if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
goto slb_bad_addr;

/* Lower address should have been handled by asm code */
if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT))
goto slb_bad_addr;

/*
* consider this as bad access if we take a SLB miss
* on an address above addr limit.
*/
if (ea >= current->mm->context.slb_addr_limit)
goto slb_bad_addr;

context = get_ea_context(&current->mm->context, ea);
if (!context)
goto slb_bad_addr;

handle_multi_context_slb_miss(context, ea);
exception_exit(prev_state);
return;

slb_bad_addr:
if (user_mode(regs))
_exception(SIGSEGV, regs, SEGV_BNDERR, ea);
else
bad_page_fault(regs, ea, SIGSEGV);
exception_exit(prev_state);
}
Loading

0 comments on commit f384796

Please sign in to comment.