Skip to content

Commit

Permalink
powerpc/64s/hash: add stress_hpt kernel boot option to increase hash …
Browse files Browse the repository at this point in the history
…faults

This option increases the number of hash misses by limiting the number
of kernel HPT entries, by keeping a per-CPU record of the last kernel
HPTEs installed, and removing that from the hash table on the next hash
insertion. A timer round-robins CPUs removing remaining kernel HPTEs and
clearing the TLB (in the case of bare metal) to increase and slightly
randomise kernel fault activity.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Add comment about NR_CPUS usage, fixup whitespace]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221024030150.852517-1-npiggin@gmail.com
  • Loading branch information
Nicholas Piggin authored and Michael Ellerman committed Dec 2, 2022
1 parent dfecd06 commit 6b34a09
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 1 deletion.
5 changes: 5 additions & 0 deletions Documentation/admin-guide/kernel-parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1042,6 +1042,11 @@
them frequently to increase the rate of SLB faults
on kernel addresses.

stress_hpt [PPC]
Limits the number of kernel HPT entries in the hash
page table to increase the rate of hash page table
faults on kernel addresses.

disable= [IPV6]
See Documentation/networking/ipv6.rst.

Expand Down
5 changes: 5 additions & 0 deletions arch/powerpc/mm/book3s64/hash_4k.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include <asm/machdep.h>
#include <asm/mmu.h>

#include "internal.h"

int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, unsigned long flags,
int ssize, int subpg_prot)
Expand Down Expand Up @@ -118,6 +120,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);

if (stress_hpt())
hpt_do_stress(ea, hpte_group);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
Expand Down
10 changes: 10 additions & 0 deletions arch/powerpc/mm/book3s64/hash_64k.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include <asm/machdep.h>
#include <asm/mmu.h>

#include "internal.h"

/*
* Return true, if the entry has a slot value which
* the software considers as invalid.
Expand Down Expand Up @@ -216,6 +218,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
new_pte |= H_PAGE_HASHPTE;

if (stress_hpt())
hpt_do_stress(ea, hpte_group);

*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
Expand Down Expand Up @@ -327,7 +332,12 @@ int __hash_page_64K(unsigned long ea, unsigned long access,

new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);

if (stress_hpt())
hpt_do_stress(ea, hpte_group);
}

*ptep = __pte(new_pte & ~H_PAGE_BUSY);

return 0;
}
130 changes: 129 additions & 1 deletion arch/powerpc/mm/book3s64/hash_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
return ret;
}

static bool disable_1tb_segments = false;
static bool disable_1tb_segments __ro_after_init;

static int __init parse_disable_1tb_segments(char *p)
{
Expand All @@ -480,6 +480,40 @@ static int __init parse_disable_1tb_segments(char *p)
}
early_param("disable_1tb_segments", parse_disable_1tb_segments);

bool stress_hpt_enabled __initdata;

static int __init parse_stress_hpt(char *p)
{
stress_hpt_enabled = true;
return 0;
}
early_param("stress_hpt", parse_stress_hpt);

__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_hpt_key);

/*
* per-CPU array allocated if we enable stress_hpt.
*/
#define STRESS_MAX_GROUPS 16
struct stress_hpt_struct {
unsigned long last_group[STRESS_MAX_GROUPS];
};

static inline int stress_nr_groups(void)
{
/*
* LPAR H_REMOVE flushes TLB, so need some number > 1 of entries
* to allow practical forward progress. Bare metal returns 1, which
* seems to help uncover more bugs.
*/
if (firmware_has_feature(FW_FEATURE_LPAR))
return STRESS_MAX_GROUPS;
else
return 1;
}

static struct stress_hpt_struct *stress_hpt_struct;

static int __init htab_dt_scan_seg_sizes(unsigned long node,
const char *uname, int depth,
void *data)
Expand Down Expand Up @@ -976,6 +1010,23 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
pr_info("Partition table %p\n", partition_tb);
}

void hpt_clear_stress(void);
static struct timer_list stress_hpt_timer;
void stress_hpt_timer_fn(struct timer_list *timer)
{
int next_cpu;

hpt_clear_stress();
if (!firmware_has_feature(FW_FEATURE_LPAR))
tlbiel_all();

next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
if (next_cpu >= nr_cpu_ids)
next_cpu = cpumask_first(cpu_online_mask);
stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10);
add_timer_on(&stress_hpt_timer, next_cpu);
}

static void __init htab_initialize(void)
{
unsigned long table;
Expand All @@ -995,6 +1046,20 @@ static void __init htab_initialize(void)
if (stress_slb_enabled)
static_branch_enable(&stress_slb_key);

if (stress_hpt_enabled) {
unsigned long tmp;
static_branch_enable(&stress_hpt_key);
// Too early to use nr_cpu_ids, so use NR_CPUS
tmp = memblock_phys_alloc_range(sizeof(struct stress_hpt_struct) * NR_CPUS,
0, 0, MEMBLOCK_ALLOC_ANYWHERE);
memset((void *)tmp, 0xff, sizeof(struct stress_hpt_struct) * NR_CPUS);
stress_hpt_struct = __va(tmp);

timer_setup(&stress_hpt_timer, stress_hpt_timer_fn, 0);
stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10);
add_timer(&stress_hpt_timer);
}

/*
* Calculate the required size of the htab. We want the number of
* PTEGs to equal one half the number of real pages.
Expand Down Expand Up @@ -1980,6 +2045,69 @@ long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
return slot;
}

void hpt_clear_stress(void)
{
int cpu = raw_smp_processor_id();
int g;

for (g = 0; g < stress_nr_groups(); g++) {
unsigned long last_group;
last_group = stress_hpt_struct[cpu].last_group[g];

if (last_group != -1UL) {
int i;
for (i = 0; i < HPTES_PER_GROUP; i++) {
if (mmu_hash_ops.hpte_remove(last_group) == -1)
break;
}
stress_hpt_struct[cpu].last_group[g] = -1;
}
}
}

void hpt_do_stress(unsigned long ea, unsigned long hpte_group)
{
unsigned long last_group;
int cpu = raw_smp_processor_id();

last_group = stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1];
if (hpte_group == last_group)
return;

if (last_group != -1UL) {
int i;
/*
* Concurrent CPUs might be inserting into this group, so
* give up after a number of iterations, to prevent a live
* lock.
*/
for (i = 0; i < HPTES_PER_GROUP; i++) {
if (mmu_hash_ops.hpte_remove(last_group) == -1)
break;
}
stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1] = -1;
}

if (ea >= PAGE_OFFSET) {
/*
* We would really like to prefetch to get the TLB loaded, then
* remove the PTE before returning from fault interrupt, to
* increase the hash fault rate.
*
* Unfortunately QEMU TCG does not model the TLB in a way that
* makes this possible, and systemsim (mambo) emulator does not
* bring in TLBs with prefetches (although loads/stores do
* work for non-CI PTEs).
*
* So remember this PTE and clear it on the next hash fault.
*/
memmove(&stress_hpt_struct[cpu].last_group[1],
&stress_hpt_struct[cpu].last_group[0],
(stress_nr_groups() - 1) * sizeof(unsigned long));
stress_hpt_struct[cpu].last_group[0] = hpte_group;
}
}

#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
static DEFINE_RAW_SPINLOCK(linear_map_hash_lock);

Expand Down
11 changes: 11 additions & 0 deletions arch/powerpc/mm/book3s64/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,17 @@ static inline bool stress_slb(void)
return static_branch_unlikely(&stress_slb_key);
}

extern bool stress_hpt_enabled;

DECLARE_STATIC_KEY_FALSE(stress_hpt_key);

static inline bool stress_hpt(void)
{
return static_branch_unlikely(&stress_hpt_key);
}

void hpt_do_stress(unsigned long ea, unsigned long hpte_group);

void slb_setup_new_exec(void);

void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush);
Expand Down

0 comments on commit 6b34a09

Please sign in to comment.