Skip to content

Commit

Permalink
parisc: Optimize per-pagetable spinlocks
Browse files Browse the repository at this point in the history
On parisc a spinlock is stored in the next page behind the pgd which
protects against parallel accesses to the pgd. That's why one additional
page (PGD_ALLOC_ORDER) is allocated for the pgd.

Matthew Wilcox suggested that we instead should use a pointer in the
struct page table for this spinlock and noted, that the comments for the
PGD_ORDER and PMD_ORDER defines were wrong.

Both suggestions are addressed with this patch. Instead of having an own
spinlock to protect the pgd, we now switch to use the existing
page_table_lock.  Additionally, beside loading the pgd into cr25 in
switch_mm_irqs_off(), the physical address of this lock is loaded into
cr28 (tr4), so that we can avoid implementing a complicated lookup in
assembly for this lock in the TLB fault handlers.

The existing Hybrid L2/L3 page table scheme (where the pmd is adjacent
to the pgd) has been dropped with this patch.

Remove the locking in set_pte() and the huge-page pte functions too.
They trigger a spinlock recursion on 32bit machines and seem unnecessary.

Suggested-by: Matthew Wilcox <willy@infradead.org>
Fixes: b37d1c1 ("parisc: Use per-pagetable spinlock")
Signed-off-by: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Helge Deller <deller@gmx.de>
  • Loading branch information
Helge Deller committed Feb 12, 2021
1 parent ae3c476 commit b779507
Show file tree
Hide file tree
Showing 9 changed files with 110 additions and 214 deletions.
10 changes: 10 additions & 0 deletions arch/parisc/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,16 @@ config IRQSTACKS
for handling hard and soft interrupts. This can help avoid
overflowing the process kernel stacks.

config TLB_PTLOCK
bool "Use page table locks in TLB fault handler"
depends on SMP
default n
help
Select this option to enable page table locking in the TLB
fault handler. This ensures that page table entries are
updated consistently on SMP machines at the expense of some
loss in performance.

config HOTPLUG_CPU
bool
default y if SMP
Expand Down
7 changes: 7 additions & 0 deletions arch/parisc/include/asm/mmu_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/atomic.h>
#include <linux/spinlock.h>
#include <asm-generic/mm_hooks.h>

/* on PA-RISC, we actually have enough contexts to justify an allocator
Expand Down Expand Up @@ -50,6 +51,12 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
struct mm_struct *next, struct task_struct *tsk)
{
if (prev != next) {
#ifdef CONFIG_TLB_PTLOCK
/* put physical address of page_table_lock in cr28 (tr4)
for TLB faults */
spinlock_t *pgd_lock = &next->page_table_lock;
mtctl(__pa(__ldcw_align(&pgd_lock->rlock.raw_lock)), 28);
#endif
mtctl(__pa(next->pgd), 25);
load_context(next->context);
}
Expand Down
2 changes: 1 addition & 1 deletion arch/parisc/include/asm/page.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ extern int npmem_ranges;
#else
#define BITS_PER_PTE_ENTRY 2
#define BITS_PER_PMD_ENTRY 2
#define BITS_PER_PGD_ENTRY BITS_PER_PMD_ENTRY
#define BITS_PER_PGD_ENTRY 2
#endif
#define PGD_ENTRY_SIZE (1UL << BITS_PER_PGD_ENTRY)
#define PMD_ENTRY_SIZE (1UL << BITS_PER_PMD_ENTRY)
Expand Down
76 changes: 18 additions & 58 deletions arch/parisc/include/asm/pgalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,47 +15,23 @@
#define __HAVE_ARCH_PGD_FREE
#include <asm-generic/pgalloc.h>

/* Allocate the top level pgd (page directory)
*
* Here (for 64 bit kernels) we implement a Hybrid L2/L3 scheme: we
* allocate the first pmd adjacent to the pgd. This means that we can
* subtract a constant offset to get to it. The pmd and pgd sizes are
* arranged so that a single pmd covers 4GB (giving a full 64-bit
* process access to 8TB) so our lookups are effectively L2 for the
* first 4GB of the kernel (i.e. for all ILP32 processes and all the
* kernel for machines with under 4GB of memory) */
/* Allocate the top level pgd (page directory) */
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL,
PGD_ALLOC_ORDER);
pgd_t *actual_pgd = pgd;
pgd_t *pgd;

if (likely(pgd != NULL)) {
memset(pgd, 0, PAGE_SIZE<<PGD_ALLOC_ORDER);
#if CONFIG_PGTABLE_LEVELS == 3
actual_pgd += PTRS_PER_PGD;
/* Populate first pmd with allocated memory. We mark it
* with PxD_FLAG_ATTACHED as a signal to the system that this
* pmd entry may not be cleared. */
set_pgd(actual_pgd, __pgd((PxD_FLAG_PRESENT |
PxD_FLAG_VALID |
PxD_FLAG_ATTACHED)
+ (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT)));
/* The first pmd entry also is marked with PxD_FLAG_ATTACHED as
* a signal that this pmd may not be freed */
set_pgd(pgd, __pgd(PxD_FLAG_ATTACHED));
#endif
}
spin_lock_init(pgd_spinlock(actual_pgd));
return actual_pgd;
pgd = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ORDER);
if (unlikely(pgd == NULL))
return NULL;

memset(pgd, 0, PAGE_SIZE << PGD_ORDER);

return pgd;
}

static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
#if CONFIG_PGTABLE_LEVELS == 3
pgd -= PTRS_PER_PGD;
#endif
free_pages((unsigned long)pgd, PGD_ALLOC_ORDER);
free_pages((unsigned long)pgd, PGD_ORDER);
}

#if CONFIG_PGTABLE_LEVELS == 3
Expand All @@ -70,41 +46,25 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)

static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
{
return (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_ORDER);
pmd_t *pmd;

pmd = (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_ORDER);
if (likely(pmd))
memset ((void *)pmd, 0, PAGE_SIZE << PMD_ORDER);
return pmd;
}

static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) {
/*
* This is the permanent pmd attached to the pgd;
* cannot free it.
* Increment the counter to compensate for the decrement
* done by generic mm code.
*/
mm_inc_nr_pmds(mm);
return;
}
free_pages((unsigned long)pmd, PMD_ORDER);
}

#endif

static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
{
#if CONFIG_PGTABLE_LEVELS == 3
/* preserve the gateway marker if this is the beginning of
* the permanent pmd */
if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
set_pmd(pmd, __pmd((PxD_FLAG_PRESENT |
PxD_FLAG_VALID |
PxD_FLAG_ATTACHED)
+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
else
#endif
set_pmd(pmd, __pmd((PxD_FLAG_PRESENT | PxD_FLAG_VALID)
+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
set_pmd(pmd, __pmd((PxD_FLAG_PRESENT | PxD_FLAG_VALID)
+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
}

#define pmd_populate(mm, pmd, pte_page) \
Expand Down
89 changes: 18 additions & 71 deletions arch/parisc/include/asm/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
#include <asm/processor.h>
#include <asm/cache.h>

static inline spinlock_t *pgd_spinlock(pgd_t *);

/*
* kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
* memory. For the return value to be meaningful, ADDR must be >=
Expand All @@ -42,12 +40,8 @@ static inline spinlock_t *pgd_spinlock(pgd_t *);

/* This is for the serialization of PxTLB broadcasts. At least on the N class
* systems, only one PxTLB inter processor broadcast can be active at any one
* time on the Merced bus.
* PTE updates are protected by locks in the PMD.
*/
* time on the Merced bus. */
extern spinlock_t pa_tlb_flush_lock;
extern spinlock_t pa_swapper_pg_lock;
#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
extern int pa_serialize_tlb_flushes;
#else
Expand Down Expand Up @@ -86,18 +80,16 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
* within a page table are directly modified. Thus, the following
* hook is made available.
*/
#define set_pte(pteptr, pteval) \
do{ \
*(pteptr) = (pteval); \
} while(0)

#define set_pte_at(mm, addr, ptep, pteval) \
do { \
unsigned long flags; \
spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);\
set_pte(ptep, pteval); \
purge_tlb_entries(mm, addr); \
spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);\
#define set_pte(pteptr, pteval) \
do { \
*(pteptr) = (pteval); \
barrier(); \
} while(0)

#define set_pte_at(mm, addr, pteptr, pteval) \
do { \
*(pteptr) = (pteval); \
purge_tlb_entries(mm, addr); \
} while (0)

#endif /* !__ASSEMBLY__ */
Expand All @@ -120,12 +112,10 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#define KERNEL_INITIAL_SIZE (1 << KERNEL_INITIAL_ORDER)

#if CONFIG_PGTABLE_LEVELS == 3
#define PGD_ORDER 1 /* Number of pages per pgd */
#define PMD_ORDER 1 /* Number of pages per pmd */
#define PGD_ALLOC_ORDER (2 + 1) /* first pgd contains pmd */
#define PMD_ORDER 1
#define PGD_ORDER 0
#else
#define PGD_ORDER 1 /* Number of pages per pgd */
#define PGD_ALLOC_ORDER (PGD_ORDER + 1)
#define PGD_ORDER 1
#endif

/* Definitions for 3rd level (we use PLD here for Page Lower directory
Expand Down Expand Up @@ -240,11 +230,9 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
* able to effectively address 40/42/44-bits of physical address space
* depending on 4k/16k/64k PAGE_SIZE */
#define _PxD_PRESENT_BIT 31
#define _PxD_ATTACHED_BIT 30
#define _PxD_VALID_BIT 29
#define _PxD_VALID_BIT 30

#define PxD_FLAG_PRESENT (1 << xlate_pabit(_PxD_PRESENT_BIT))
#define PxD_FLAG_ATTACHED (1 << xlate_pabit(_PxD_ATTACHED_BIT))
#define PxD_FLAG_VALID (1 << xlate_pabit(_PxD_VALID_BIT))
#define PxD_FLAG_MASK (0xf)
#define PxD_FLAG_SHIFT (4)
Expand Down Expand Up @@ -326,23 +314,10 @@ extern unsigned long *empty_zero_page;
#define pgd_flag(x) (pgd_val(x) & PxD_FLAG_MASK)
#define pgd_address(x) ((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)

#if CONFIG_PGTABLE_LEVELS == 3
/* The first entry of the permanent pmd is not there if it contains
* the gateway marker */
#define pmd_none(x) (!pmd_val(x) || pmd_flag(x) == PxD_FLAG_ATTACHED)
#else
#define pmd_none(x) (!pmd_val(x))
#endif
#define pmd_bad(x) (!(pmd_flag(x) & PxD_FLAG_VALID))
#define pmd_present(x) (pmd_flag(x) & PxD_FLAG_PRESENT)
static inline void pmd_clear(pmd_t *pmd) {
#if CONFIG_PGTABLE_LEVELS == 3
if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
/* This is the entry pointing to the permanent pmd
* attached to the pgd; cannot clear it */
set_pmd(pmd, __pmd(PxD_FLAG_ATTACHED));
else
#endif
set_pmd(pmd, __pmd(0));
}

Expand All @@ -358,12 +333,6 @@ static inline void pmd_clear(pmd_t *pmd) {
#define pud_bad(x) (!(pud_flag(x) & PxD_FLAG_VALID))
#define pud_present(x) (pud_flag(x) & PxD_FLAG_PRESENT)
static inline void pud_clear(pud_t *pud) {
#if CONFIG_PGTABLE_LEVELS == 3
if(pud_flag(*pud) & PxD_FLAG_ATTACHED)
/* This is the permanent pmd attached to the pud; cannot
* free it */
return;
#endif
set_pud(pud, __pud(0));
}
#endif
Expand Down Expand Up @@ -456,57 +425,35 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })


static inline spinlock_t *pgd_spinlock(pgd_t *pgd)
{
if (unlikely(pgd == swapper_pg_dir))
return &pa_swapper_pg_lock;
return (spinlock_t *)((char *)pgd + (PAGE_SIZE << (PGD_ALLOC_ORDER - 1)));
}


static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
pte_t pte;
unsigned long flags;

if (!pte_young(*ptep))
return 0;

spin_lock_irqsave(pgd_spinlock(vma->vm_mm->pgd), flags);
pte = *ptep;
if (!pte_young(pte)) {
spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags);
return 0;
}
set_pte(ptep, pte_mkold(pte));
purge_tlb_entries(vma->vm_mm, addr);
spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags);
set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte));
return 1;
}

struct mm_struct;
static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
pte_t old_pte;
unsigned long flags;

spin_lock_irqsave(pgd_spinlock(mm->pgd), flags);
old_pte = *ptep;
set_pte(ptep, __pte(0));
purge_tlb_entries(mm, addr);
spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags);
set_pte_at(mm, addr, ptep, __pte(0));

return old_pte;
}

static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
unsigned long flags;
spin_lock_irqsave(pgd_spinlock(mm->pgd), flags);
set_pte(ptep, pte_wrprotect(*ptep));
purge_tlb_entries(mm, addr);
spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags);
set_pte_at(mm, addr, ptep, pte_wrprotect(*ptep));
}

#define pte_same(A,B) (pte_val(A) == pte_val(B))
Expand Down
1 change: 0 additions & 1 deletion arch/parisc/kernel/asm-offsets.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,6 @@ int main(void)
DEFINE(ASM_BITS_PER_PGD, BITS_PER_PGD);
DEFINE(ASM_BITS_PER_PMD, BITS_PER_PMD);
DEFINE(ASM_BITS_PER_PTE, BITS_PER_PTE);
DEFINE(ASM_PGD_PMD_OFFSET, -(PAGE_SIZE << PGD_ORDER));
DEFINE(ASM_PMD_ENTRY, ((PAGE_OFFSET & PMD_MASK) >> PMD_SHIFT));
DEFINE(ASM_PGD_ENTRY, PAGE_OFFSET >> PGDIR_SHIFT);
DEFINE(ASM_PGD_ENTRY_SIZE, PGD_ENTRY_SIZE);
Expand Down
Loading

0 comments on commit b779507

Please sign in to comment.