Skip to content

Commit

Permalink
Merge branch 'topic/ppc-kvm' into next
Browse files Browse the repository at this point in the history
This merge's Nick's big P9 KVM series, original cover letter follows:

KVM: PPC: Book3S HV P9: entry/exit optimisations

This reduces radix guest full entry/exit latency on POWER9 and POWER10
by 2x.

Nested HV guests should see smaller improvements in their L1 entry/exit,
but this is also combined with most L0 speedups also applying to nested
entry. nginx localhost throughput test in a SMP nested guest is improved
about 10% (in a direct guest it doesn't change much because it uses XIVE
for IPIs) when L0 and L1 are patched.

It does this in several main ways:

- Rearrange code to optimise SPR accesses. Mainly, avoid scoreboard
  stalls.

- Test SPR values to avoid mtSPRs where possible. mtSPRs are expensive.

- Reduce mftb. mftb is expensive.

- Demand fault certain facilities to avoid saving and/or restoring them
  (at the cost of fault when they are used, but this is mitigated over
  a number of entries, like the facilities when context switching
  processes). PM, TM, and EBB so far.

- Defer some sequences that are made just in case a guest is interrupted
  in the middle of a critical section to the case where the guest is
  scheduled on a different CPU, rather than every time (at the cost of
  an extra IPI in this case). Namely the tlbsync sequence for radix with
  GTSE, which is very expensive.

- Reduce locking, barriers, atomics related to the vcpus-per-vcore > 1
  handling that the P9 path does not require.
  • Loading branch information
Michael Ellerman committed Nov 25, 2021
2 parents 1360572 + 9c5a432 commit ff0d6be
Show file tree
Hide file tree
Showing 30 changed files with 1,555 additions and 714 deletions.
8 changes: 8 additions & 0 deletions Documentation/admin-guide/kernel-parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4144,6 +4144,14 @@
Override pmtimer IOPort with a hex value.
e.g. pmtmr=0x508

pmu_override= [PPC] Override the PMU.
This option takes over the PMU facility, so it is no
longer usable by perf. Setting this option starts the
PMU counters by setting MMCR0 to 0 (the FC bit is
cleared). If a number is given, then MMCR1 is set to
that number, otherwise (e.g., 'pmu_override=on'), MMCR1
remains 0.

pm_debug_messages [SUSPEND,KNL]
Enable suspend/resume debug messages during boot up.

Expand Down
5 changes: 0 additions & 5 deletions arch/powerpc/include/asm/asm-prototypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
bool preserve_nv) { }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */

void kvmhv_save_host_pmu(void);
void kvmhv_load_host_pmu(void);
void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);

void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);

long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
Expand Down
1 change: 1 addition & 0 deletions arch/powerpc/include/asm/kvm_asm.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
#define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800
#define BOOK3S_INTERRUPT_DECREMENTER 0x900
#define BOOK3S_INTERRUPT_HV_DECREMENTER 0x980
#define BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER 0x1980
#define BOOK3S_INTERRUPT_DOORBELL 0xa00
#define BOOK3S_INTERRUPT_SYSCALL 0xc00
#define BOOK3S_INTERRUPT_TRACE 0xd00
Expand Down
6 changes: 6 additions & 0 deletions arch/powerpc/include/asm/kvm_book3s.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,12 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
return vcpu->arch.fault_dar;
}

/* Expiry time of vcpu DEC relative to host TB */
static inline u64 kvmppc_dec_expires_host_tb(struct kvm_vcpu *vcpu)
{
return vcpu->arch.dec_expires - vcpu->arch.vcore->tb_offset;
}

static inline bool is_kvmppc_resume_guest(int r)
{
return (r == RESUME_GUEST || r == RESUME_GUEST_NV);
Expand Down
5 changes: 3 additions & 2 deletions arch/powerpc/include/asm/kvm_book3s_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ struct kvm_nested_guest {
struct mutex tlb_lock; /* serialize page faults and tlbies */
struct kvm_nested_guest *next;
cpumask_t need_tlb_flush;
cpumask_t cpu_in_guest;
short prev_cpu[NR_CPUS];
u8 radix; /* is this nested guest radix */
};
Expand Down Expand Up @@ -154,7 +153,9 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
return radix;
}

int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr);
unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr);

int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb);

#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif
Expand Down
7 changes: 5 additions & 2 deletions arch/powerpc/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,6 @@ struct kvm_arch {
u32 online_vcores;
atomic_t hpte_mod_interest;
cpumask_t need_tlb_flush;
cpumask_t cpu_in_guest;
u8 radix;
u8 fwnmi_enabled;
u8 secure_guest;
Expand Down Expand Up @@ -579,6 +578,10 @@ struct kvm_vcpu_arch {
ulong cfar;
ulong ppr;
u32 pspb;
u8 load_ebb;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
u8 load_tm;
#endif
ulong fscr;
ulong shadow_fscr;
ulong ebbhr;
Expand Down Expand Up @@ -741,7 +744,7 @@ struct kvm_vcpu_arch {

struct hrtimer dec_timer;
u64 dec_jiffies;
u64 dec_expires;
u64 dec_expires; /* Relative to guest timebase. */
unsigned long pending_exceptions;
u8 ceded;
u8 prodded;
Expand Down
4 changes: 2 additions & 2 deletions arch/powerpc/include/asm/kvm_ppc.h
Original file line number Diff line number Diff line change
Expand Up @@ -552,8 +552,7 @@ extern void kvm_hv_vm_activated(void);
extern void kvm_hv_vm_deactivated(void);
extern bool kvm_hv_mode_active(void);

extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
struct kvm_nested_guest *nested);
extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu);

#else
static inline void __init kvm_cma_reserve(void)
Expand Down Expand Up @@ -760,6 +759,7 @@ void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
void kvmppc_subcore_enter_guest(void);
void kvmppc_subcore_exit_guest(void);
long kvmppc_realmode_hmi_handler(void);
long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu);
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel);
long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
Expand Down
3 changes: 3 additions & 0 deletions arch/powerpc/include/asm/switch_to.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ static inline void clear_task_ebb(struct task_struct *t)
#endif
}

void kvmppc_save_user_regs(void);
void kvmppc_save_current_sprs(void);

extern int set_thread_tidr(struct task_struct *t);

#endif /* _ASM_POWERPC_SWITCH_TO_H */
19 changes: 9 additions & 10 deletions arch/powerpc/include/asm/time.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <asm/vdso/timebase.h>

/* time.c */
extern u64 decrementer_max;

extern unsigned long tb_ticks_per_jiffy;
extern unsigned long tb_ticks_per_usec;
extern unsigned long tb_ticks_per_sec;
Expand Down Expand Up @@ -97,19 +99,16 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low,
extern void secondary_cpu_time_init(void);
extern void __init time_init(void);

#ifdef CONFIG_PPC64
static inline unsigned long test_irq_work_pending(void)
{
unsigned long x;
DECLARE_PER_CPU(u64, decrementers_next_tb);

asm volatile("lbz %0,%1(13)"
: "=r" (x)
: "i" (offsetof(struct paca_struct, irq_work_pending)));
return x;
static inline u64 timer_get_next_tb(void)
{
return __this_cpu_read(decrementers_next_tb);
}
#endif

DECLARE_PER_CPU(u64, decrementers_next_tb);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
void timer_rearm_host_dec(u64 now);
#endif

/* Convert timebase ticks to nanoseconds */
unsigned long long tb_to_ns(unsigned long long tb_ticks);
Expand Down
12 changes: 10 additions & 2 deletions arch/powerpc/kernel/cpu_setup_power.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
static void init_PMU(void)
{
mtspr(SPRN_MMCRA, 0);
mtspr(SPRN_MMCR0, 0);
mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
}
Expand All @@ -123,7 +123,7 @@ static void init_PMU_ISA31(void)
{
mtspr(SPRN_MMCR3, 0);
mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
}

/*
Expand All @@ -137,6 +137,7 @@ void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t)
return;

mtspr(SPRN_LPID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
}
Expand All @@ -150,6 +151,7 @@ void __restore_cpu_power7(void)
return;

mtspr(SPRN_LPID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
}
Expand All @@ -164,6 +166,7 @@ void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t)
return;

mtspr(SPRN_LPID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
init_HFSCR();
Expand All @@ -184,6 +187,7 @@ void __restore_cpu_power8(void)
return;

mtspr(SPRN_LPID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
init_HFSCR();
Expand All @@ -202,6 +206,7 @@ void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
Expand All @@ -223,6 +228,7 @@ void __restore_cpu_power9(void)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
Expand All @@ -242,6 +248,7 @@ void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
Expand All @@ -264,6 +271,7 @@ void __restore_cpu_power10(void)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
Expand Down
8 changes: 5 additions & 3 deletions arch/powerpc/kernel/dt_cpu_ftrs.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ static void __restore_cpu_cpufeatures(void)
mtspr(SPRN_LPCR, system_registers.lpcr);
if (hv_mode) {
mtspr(SPRN_LPID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
mtspr(SPRN_PCR, system_registers.pcr);
}
Expand Down Expand Up @@ -216,6 +217,7 @@ static int __init feat_enable_hv(struct dt_cpu_feature *f)
}

mtspr(SPRN_LPID, 0);
mtspr(SPRN_AMOR, ~0);

lpcr = mfspr(SPRN_LPCR);
lpcr &= ~LPCR_LPES0; /* HV external interrupts */
Expand Down Expand Up @@ -351,7 +353,7 @@ static void init_pmu_power8(void)
}

mtspr(SPRN_MMCRA, 0);
mtspr(SPRN_MMCR0, 0);
mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
mtspr(SPRN_MMCRS, 0);
Expand Down Expand Up @@ -390,7 +392,7 @@ static void init_pmu_power9(void)
mtspr(SPRN_MMCRC, 0);

mtspr(SPRN_MMCRA, 0);
mtspr(SPRN_MMCR0, 0);
mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
}
Expand Down Expand Up @@ -426,7 +428,7 @@ static void init_pmu_power10(void)

mtspr(SPRN_MMCR3, 0);
mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
}

static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
Expand Down
34 changes: 34 additions & 0 deletions arch/powerpc/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -1156,6 +1156,40 @@ static inline void save_sprs(struct thread_struct *t)
#endif
}

#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
void kvmppc_save_user_regs(void)
{
unsigned long usermsr;

if (!current->thread.regs)
return;

usermsr = current->thread.regs->msr;

if (usermsr & MSR_FP)
save_fpu(current);

if (usermsr & MSR_VEC)
save_altivec(current);

#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (usermsr & MSR_TM) {
current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
current->thread.tm_texasr = mfspr(SPRN_TEXASR);
current->thread.regs->msr &= ~MSR_TM;
}
#endif
}
EXPORT_SYMBOL_GPL(kvmppc_save_user_regs);

void kvmppc_save_current_sprs(void)
{
save_sprs(&current->thread);
}
EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */

static inline void restore_sprs(struct thread_struct *old_thread,
struct thread_struct *new_thread)
{
Expand Down
Loading

0 comments on commit ff0d6be

Please sign in to comment.