Skip to content

Commit

Permalink
RISC-V: Introduce sv48 support without relocatable kernel
Browse files Browse the repository at this point in the history
This patchset allows to have a single kernel for sv39 and sv48 without
being relocatable.

The idea comes from Arnd Bergmann who suggested to do the same as x86,
that is mapping the kernel to the end of the address space, which allows
the kernel to be linked at the same address for both sv39 and sv48 and
then does not require to be relocated at runtime.

This implements sv48 support at runtime. The kernel will try to boot
with 4-level page table and will fallback to 3-level if the HW does not
support it. Folding the 4th level into a 3-level page table has almost
no cost at runtime.

Note that kasan region had to be moved to the end of the address space
since its location must be known at compile-time and then be valid for
both sv39 and sv48 (and sv57 that is coming).

* riscv-sv48-v3:
  riscv: Explicit comment about user virtual address space size
  riscv: Use pgtable_l4_enabled to output mmu_type in cpuinfo
  riscv: Implement sv48 support
  asm-generic: Prepare for riscv use of pud_alloc_one and pud_free
  riscv: Allow to dynamically define VA_BITS
  riscv: Introduce functions to switch pt_ops
  riscv: Split early kasan mapping to prepare sv48 introduction
  riscv: Move KASAN mapping next to the kernel mapping
  riscv: Get rid of MAXPHYSMEM configs

Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
  • Loading branch information
Palmer Dabbelt committed Jan 20, 2022
2 parents fc839c6 + c774de2 commit 0c34e79
Show file tree
Hide file tree
Showing 20 changed files with 731 additions and 200 deletions.
12 changes: 6 additions & 6 deletions Documentation/riscv/vm-layout.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ RISC-V Linux Kernel SV39
| Kernel-space virtual memory, shared between all processes:
____________________________________________________________|___________________________________________________________
| | | |
ffffffc000000000 | -256 GB | ffffffc7ffffffff | 32 GB | kasan
ffffffcefee00000 | -196 GB | ffffffcefeffffff | 2 MB | fixmap
ffffffceff000000 | -196 GB | ffffffceffffffff | 16 MB | PCI io
ffffffcf00000000 | -196 GB | ffffffcfffffffff | 4 GB | vmemmap
ffffffd000000000 | -192 GB | ffffffdfffffffff | 64 GB | vmalloc/ioremap space
ffffffe000000000 | -128 GB | ffffffff7fffffff | 124 GB | direct mapping of all physical memory
ffffffc6fee00000 | -228 GB | ffffffc6feffffff | 2 MB | fixmap
ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io
ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap
ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space
ffffffd800000000 | -160 GB | fffffff6ffffffff | 124 GB | direct mapping of all physical memory
fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan
__________________|____________|__________________|_________|____________________________________________________________
|
|
Expand Down
37 changes: 4 additions & 33 deletions arch/riscv/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -147,27 +147,16 @@ config MMU
Select if you want MMU-based virtualised addressing space
support by paged memory management. If unsure, say 'Y'.

config VA_BITS
int
default 32 if 32BIT
default 39 if 64BIT

config PA_BITS
int
default 34 if 32BIT
default 56 if 64BIT

config PAGE_OFFSET
hex
default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB
default 0xC0000000 if 32BIT
default 0x80000000 if 64BIT && !MMU
default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
default 0xffffaf8000000000 if 64BIT

config KASAN_SHADOW_OFFSET
hex
depends on KASAN_GENERIC
default 0xdfffffc800000000 if 64BIT
default 0xdfffffff00000000 if 64BIT
default 0xffffffff if 32BIT

config ARCH_FLATMEM_ENABLE
Expand Down Expand Up @@ -213,7 +202,7 @@ config FIX_EARLYCON_MEM

config PGTABLE_LEVELS
int
default 3 if 64BIT
default 4 if 64BIT
default 2

config LOCKDEP_SUPPORT
Expand Down Expand Up @@ -271,24 +260,6 @@ config MODULE_SECTIONS
bool
select HAVE_MOD_ARCH_SPECIFIC

choice
prompt "Maximum Physical Memory"
default MAXPHYSMEM_1GB if 32BIT
default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW
default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY

config MAXPHYSMEM_1GB
depends on 32BIT
bool "1GiB"
config MAXPHYSMEM_2GB
depends on 64BIT
bool "2GiB"
config MAXPHYSMEM_128GB
depends on 64BIT && CMODEL_MEDANY
bool "128GiB"
endchoice


config SMP
bool "Symmetric Multi-Processing"
help
Expand Down
1 change: 0 additions & 1 deletion arch/riscv/configs/nommu_k210_defconfig
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ CONFIG_EMBEDDED=y
CONFIG_SLOB=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_CMDLINE="earlycon console=ttySIF0"
Expand Down
1 change: 0 additions & 1 deletion arch/riscv/configs/nommu_k210_sdcard_defconfig
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ CONFIG_EMBEDDED=y
CONFIG_SLOB=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro"
Expand Down
1 change: 0 additions & 1 deletion arch/riscv/configs/nommu_virt_defconfig
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ CONFIG_SLOB=y
# CONFIG_SLAB_MERGE_DEFAULT is not set
# CONFIG_MMU is not set
CONFIG_SOC_VIRT=y
CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0"
CONFIG_CMDLINE_FORCE=y
Expand Down
3 changes: 1 addition & 2 deletions arch/riscv/include/asm/csr.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,13 @@
#ifndef CONFIG_64BIT
#define SATP_PPN _AC(0x003FFFFF, UL)
#define SATP_MODE_32 _AC(0x80000000, UL)
#define SATP_MODE SATP_MODE_32
#define SATP_ASID_BITS 9
#define SATP_ASID_SHIFT 22
#define SATP_ASID_MASK _AC(0x1FF, UL)
#else
#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
#define SATP_MODE SATP_MODE_39
#define SATP_MODE_48 _AC(0x9000000000000000, UL)
#define SATP_ASID_BITS 16
#define SATP_ASID_SHIFT 44
#define SATP_ASID_MASK _AC(0xFFFF, UL)
Expand Down
1 change: 1 addition & 0 deletions arch/riscv/include/asm/fixmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ enum fixed_addresses {
FIX_HOLE,
FIX_PTE,
FIX_PMD,
FIX_PUD,
FIX_TEXT_POKE1,
FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE,
Expand Down
11 changes: 8 additions & 3 deletions arch/riscv/include/asm/kasan.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,18 @@
*/
#define KASAN_SHADOW_SCALE_SHIFT 3

#define KASAN_SHADOW_SIZE (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
#define KASAN_SHADOW_START KERN_VIRT_START
#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
#define KASAN_SHADOW_SIZE (UL(1) << ((VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
/*
* Depending on the size of the virtual address space, the region may not be
* aligned on PGDIR_SIZE, so force its alignment to ease its population.
*/
#define KASAN_SHADOW_START ((KASAN_SHADOW_END - KASAN_SHADOW_SIZE) & PGDIR_MASK)
#define KASAN_SHADOW_END MODULES_LOWEST_VADDR
#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)

void kasan_init(void);
asmlinkage void kasan_early_init(void);
void kasan_swapper_init(void);

#endif
#endif
Expand Down
16 changes: 14 additions & 2 deletions arch/riscv/include/asm/page.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,20 @@
* When not using MMU this corresponds to the first free page in
* physical memory (aligned on a page boundary).
*/
#ifdef CONFIG_64BIT
#ifdef CONFIG_MMU
#define PAGE_OFFSET kernel_map.page_offset
#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)

#define KERN_VIRT_SIZE (-PAGE_OFFSET)
#endif
/*
* By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
* define the PAGE_OFFSET value for SV39.
*/
#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL)
#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
#endif /* CONFIG_64BIT */

#ifndef __ASSEMBLY__

Expand Down Expand Up @@ -86,6 +97,7 @@ extern unsigned long riscv_pfn_base;
#endif /* CONFIG_MMU */

struct kernel_mapping {
unsigned long page_offset;
unsigned long virt_addr;
uintptr_t phys_addr;
uintptr_t size;
Expand Down
40 changes: 40 additions & 0 deletions arch/riscv/include/asm/pgalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include <asm/tlb.h>

#ifdef CONFIG_MMU
#define __HAVE_ARCH_PUD_ALLOC_ONE
#define __HAVE_ARCH_PUD_FREE
#include <asm-generic/pgalloc.h>

static inline void pmd_populate_kernel(struct mm_struct *mm,
Expand All @@ -36,6 +38,44 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)

set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}

static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
{
if (pgtable_l4_enabled) {
unsigned long pfn = virt_to_pfn(pud);

set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
}

static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
pud_t *pud)
{
if (pgtable_l4_enabled) {
unsigned long pfn = virt_to_pfn(pud);

set_p4d_safe(p4d,
__p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
}

#define pud_alloc_one pud_alloc_one
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
if (pgtable_l4_enabled)
return __pud_alloc_one(mm, addr);

return NULL;
}

#define pud_free pud_free
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
if (pgtable_l4_enabled)
__pud_free(mm, pud);
}

#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
#endif /* __PAGETABLE_PMD_FOLDED */

static inline pgd_t *pgd_alloc(struct mm_struct *mm)
Expand Down
108 changes: 107 additions & 1 deletion arch/riscv/include/asm/pgtable-64.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,36 @@

#include <linux/const.h>

#define PGDIR_SHIFT 30
extern bool pgtable_l4_enabled;

#define PGDIR_SHIFT_L3 30
#define PGDIR_SHIFT_L4 39
#define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3)

#define PGDIR_SHIFT (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)
/* Size of region mapped by a page global directory */
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))

/* pud is folded into pgd in case of 3-level page table */
#define PUD_SHIFT 30
#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE - 1))

#define PMD_SHIFT 21
/* Size of region mapped by a page middle directory */
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE - 1))

/* Page Upper Directory entry */
typedef struct {
unsigned long pud;
} pud_t;

#define pud_val(x) ((x).pud)
#define __pud(x) ((pud_t) { (x) })
#define PTRS_PER_PUD (PAGE_SIZE / sizeof(pud_t))

/* Page Middle Directory entry */
typedef struct {
unsigned long pmd;
Expand Down Expand Up @@ -59,6 +79,16 @@ static inline void pud_clear(pud_t *pudp)
set_pud(pudp, __pud(0));
}

static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot)
{
return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
}

static inline unsigned long _pud_pfn(pud_t pud)
{
return pud_val(pud) >> _PAGE_PFN_SHIFT;
}

static inline pmd_t *pud_pgtable(pud_t pud)
{
return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT);
Expand All @@ -69,6 +99,17 @@ static inline struct page *pud_page(pud_t pud)
return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
}

#define mm_pud_folded mm_pud_folded
static inline bool mm_pud_folded(struct mm_struct *mm)
{
if (pgtable_l4_enabled)
return false;

return true;
}

#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))

static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot)
{
return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
Expand All @@ -84,4 +125,69 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))

#define pud_ERROR(e) \
pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))

static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
{
if (pgtable_l4_enabled)
*p4dp = p4d;
else
set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
}

static inline int p4d_none(p4d_t p4d)
{
if (pgtable_l4_enabled)
return (p4d_val(p4d) == 0);

return 0;
}

static inline int p4d_present(p4d_t p4d)
{
if (pgtable_l4_enabled)
return (p4d_val(p4d) & _PAGE_PRESENT);

return 1;
}

static inline int p4d_bad(p4d_t p4d)
{
if (pgtable_l4_enabled)
return !p4d_present(p4d);

return 0;
}

static inline void p4d_clear(p4d_t *p4d)
{
if (pgtable_l4_enabled)
set_p4d(p4d, __p4d(0));
}

static inline pud_t *p4d_pgtable(p4d_t p4d)
{
if (pgtable_l4_enabled)
return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT);

return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
}

static inline struct page *p4d_page(p4d_t p4d)
{
return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
}

#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))

#define pud_offset pud_offset
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
if (pgtable_l4_enabled)
return p4d_pgtable(*p4d) + pud_index(address);

return (pud_t *)p4d;
}

#endif /* _ASM_RISCV_PGTABLE_64_H */
Loading

0 comments on commit 0c34e79

Please sign in to comment.