Skip to content

Commit

Permalink
Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/ker…
Browse files Browse the repository at this point in the history
…nel/git/paulus/powerpc into HEAD

Apart from various bugfixes and code cleanups, the major new feature
is the ability to run guests using the hashed page table (HPT) MMU
mode on a host that is using the radix MMU mode.  Because of limitations
in the current POWER9 chip (all SMT threads in each core must use the
same MMU mode, HPT or radix), this requires the host to be configured
to run similar to POWER8: the host runs in single-threaded mode (only
thread 0 of each core online), and have KVM be able to wake up the other
threads when a KVM guest is to be run, and use the other threads for
running guest VCPUs.  A new module parameter, called "indep_threads_mode",
is normally Y on POWER9 but must be set to N before any HPT guests can
be run on a radix host:

    # echo N >/sys/module/kvm_hv/parameters/indep_threads_mode
    # ppc64_cpu --smt=off

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
Paolo Bonzini committed Nov 2, 2017
2 parents 9ffd986 + c010150 commit 6d6ab94
Show file tree
Hide file tree
Showing 18 changed files with 797 additions and 338 deletions.
3 changes: 2 additions & 1 deletion arch/powerpc/include/asm/kvm_book3s.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,8 @@ extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa,
bool writing, bool *writable);
extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
unsigned long *rmap, long pte_index, int realmode);
extern void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize);
extern void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot,
unsigned long gfn, unsigned long psize);
extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
unsigned long pte_index);
void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
Expand Down
140 changes: 119 additions & 21 deletions arch/powerpc/include/asm/kvm_book3s_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#ifndef __ASM_KVM_BOOK3S_64_H__
#define __ASM_KVM_BOOK3S_64_H__

#include <linux/string.h>
#include <asm/bitops.h>
#include <asm/book3s/64/mmu-hash.h>

/* Power architecture requires HPT is at least 256kiB, at most 64TiB */
Expand Down Expand Up @@ -107,18 +109,96 @@ static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
hpte[0] = cpu_to_be64(hpte_v);
}

/*
* These functions encode knowledge of the POWER7/8/9 hardware
* interpretations of the HPTE LP (large page size) field.
*/
static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
{
unsigned int lphi;

if (!(h & HPTE_V_LARGE))
return 12; /* 4kB */
lphi = (l >> 16) & 0xf;
switch ((l >> 12) & 0xf) {
case 0:
return !lphi ? 24 : -1; /* 16MB */
break;
case 1:
return 16; /* 64kB */
break;
case 3:
return !lphi ? 34 : -1; /* 16GB */
break;
case 7:
return (16 << 8) + 12; /* 64kB in 4kB */
break;
case 8:
if (!lphi)
return (24 << 8) + 16; /* 16MB in 64kkB */
if (lphi == 3)
return (24 << 8) + 12; /* 16MB in 4kB */
break;
}
return -1;
}

static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l)
{
return kvmppc_hpte_page_shifts(h, l) & 0xff;
}

static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l)
{
int tmp = kvmppc_hpte_page_shifts(h, l);

if (tmp >= 0x100)
tmp >>= 8;
return tmp;
}

static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r)
{
return 1ul << kvmppc_hpte_actual_page_shift(v, r);
}

static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift)
{
switch (base_shift) {
case 12:
switch (actual_shift) {
case 12:
return 0;
case 16:
return 7;
case 24:
return 0x38;
}
break;
case 16:
switch (actual_shift) {
case 16:
return 1;
case 24:
return 8;
}
break;
case 24:
return 0;
}
return -1;
}

static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
unsigned long pte_index)
{
int i, b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
unsigned int penc;
int a_pgshift, b_pgshift;
unsigned long rb = 0, va_low, sllp;
unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);

if (v & HPTE_V_LARGE) {
i = hpte_page_sizes[lp];
b_psize = i & 0xf;
a_psize = i >> 4;
b_pgshift = a_pgshift = kvmppc_hpte_page_shifts(v, r);
if (a_pgshift >= 0x100) {
b_pgshift &= 0xff;
a_pgshift >>= 8;
}

/*
Expand Down Expand Up @@ -152,37 +232,33 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
va_low ^= v >> (SID_SHIFT_1T - 16);
va_low &= 0x7ff;

switch (b_psize) {
case MMU_PAGE_4K:
sllp = get_sllp_encoding(a_psize);
rb |= sllp << 5; /* AP field */
if (b_pgshift == 12) {
if (a_pgshift > 12) {
sllp = (a_pgshift == 16) ? 5 : 4;
rb |= sllp << 5; /* AP field */
}
rb |= (va_low & 0x7ff) << 12; /* remaining 11 bits of AVA */
break;
default:
{
} else {
int aval_shift;
/*
* remaining bits of AVA/LP fields
* Also contain the rr bits of LP
*/
rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000;
rb |= (va_low << b_pgshift) & 0x7ff000;
/*
* Now clear not needed LP bits based on actual psize
*/
rb &= ~((1ul << mmu_psize_defs[a_psize].shift) - 1);
rb &= ~((1ul << a_pgshift) - 1);
/*
* AVAL field 58..77 - base_page_shift bits of va
* we have space for 58..64 bits, Missing bits should
* be zero filled. +1 is to take care of L bit shift
*/
aval_shift = 64 - (77 - mmu_psize_defs[b_psize].shift) + 1;
aval_shift = 64 - (77 - b_pgshift) + 1;
rb |= ((va_low << aval_shift) & 0xfe);

rb |= 1; /* L field */
penc = mmu_psize_defs[b_psize].penc[a_psize];
rb |= penc << 12; /* LP field */
break;
}
rb |= r & 0xff000 & ((1ul << a_pgshift) - 1); /* LP field */
}
rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */
return rb;
Expand Down Expand Up @@ -370,6 +446,28 @@ static inline unsigned long kvmppc_hpt_mask(struct kvm_hpt_info *hpt)
return (1UL << (hpt->order - 7)) - 1;
}

/* Set bits in a dirty bitmap, which is in LE format */
static inline void set_dirty_bits(unsigned long *map, unsigned long i,
unsigned long npages)
{

if (npages >= 8)
memset((char *)map + i / 8, 0xff, npages / 8);
else
for (; npages; ++i, --npages)
__set_bit_le(i, map);
}

static inline void set_dirty_bits_atomic(unsigned long *map, unsigned long i,
unsigned long npages)
{
if (npages >= 8)
memset((char *)map + i / 8, 0xff, npages / 8);
else
for (; npages; ++i, --npages)
set_bit_le(i, map);
}

#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */

#endif /* __ASM_KVM_BOOK3S_64_H__ */
17 changes: 12 additions & 5 deletions arch/powerpc/include/asm/kvm_book3s_asm.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,16 @@ struct kvm_split_mode {
u8 do_nap;
u8 napped[MAX_SMT_THREADS];
struct kvmppc_vcore *vc[MAX_SUBCORES];
/* Bits for changing lpcr on P9 */
unsigned long lpcr_req;
unsigned long lpidr_req;
unsigned long host_lpcr;
u32 do_set;
u32 do_restore;
union {
u32 allphases;
u8 phase[4];
} lpcr_sync;
};

/*
Expand All @@ -104,14 +114,11 @@ struct kvmppc_host_state {
u8 napping;

#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* hwthread_req/hwthread_state pair is used to pull sibling threads
* out of guest on pre-ISAv3.0B CPUs where threads share MMU.
*/
u8 hwthread_req;
u8 hwthread_state;
u8 host_ipi;
u8 ptid;
u8 ptid; /* thread number within subcore when split */
u8 tid; /* thread number within whole core */
struct kvm_vcpu *kvm_vcpu;
struct kvmppc_vcore *kvm_vcore;
void __iomem *xics_phys;
Expand Down
6 changes: 2 additions & 4 deletions arch/powerpc/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,7 @@ struct revmap_entry {
*/
#define KVMPPC_RMAP_LOCK_BIT 63
#define KVMPPC_RMAP_RC_SHIFT 32
#define KVMPPC_RMAP_CHG_SHIFT 48
#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_CHG_ORDER (0x3ful << KVMPPC_RMAP_CHG_SHIFT)
#define KVMPPC_RMAP_PRESENT 0x100000000ul
#define KVMPPC_RMAP_INDEX 0xfffffffful

Expand Down Expand Up @@ -276,14 +273,15 @@ struct kvm_arch {
int tlbie_lock;
unsigned long lpcr;
unsigned long vrma_slb_v;
int hpte_setup_done;
int mmu_ready;
atomic_t vcpus_running;
u32 online_vcores;
atomic_t hpte_mod_interest;
cpumask_t need_tlb_flush;
cpumask_t cpu_in_guest;
u8 radix;
u8 fwnmi_enabled;
bool threads_indep;
pgd_t *pgtable;
u64 process_table;
struct dentry *debugfs_dir;
Expand Down
3 changes: 3 additions & 0 deletions arch/powerpc/include/asm/kvm_ppc.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
extern void kvmppc_rmap_reset(struct kvm *kvm);
extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
Expand All @@ -177,6 +178,8 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
struct iommu_group *grp);
extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
struct iommu_group *grp);
extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);

extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce_64 *args);
Expand Down
3 changes: 3 additions & 0 deletions arch/powerpc/kernel/asm-offsets.c
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,7 @@ int main(void)
HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_PTID, ptid);
HSTATE_FIELD(HSTATE_TID, tid);
HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]);
Expand All @@ -667,6 +668,8 @@ int main(void)
OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar);
OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap);
OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped);
OFFSET(KVM_SPLIT_DO_SET, kvm_split_mode, do_set);
OFFSET(KVM_SPLIT_DO_RESTORE, kvm_split_mode, do_restore);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */

#ifdef CONFIG_PPC_BOOK3S_64
Expand Down
35 changes: 11 additions & 24 deletions arch/powerpc/kernel/idle_book3s.S
Original file line number Diff line number Diff line change
Expand Up @@ -319,20 +319,13 @@ enter_winkle:
/*
* r3 - PSSCR value corresponding to the requested stop state.
*/
power_enter_stop:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
power_enter_stop_kvm_rm:
/*
* This is currently unused because POWER9 KVM does not have to
* gather secondary threads into sibling mode, but the code is
* here in case that function is required.
*
* Tell KVM we're entering idle.
*/
/* Tell KVM we're entering idle */
li r4,KVM_HWTHREAD_IN_IDLE
/* DO THIS IN REAL MODE! See comment above. */
stb r4,HSTATE_HWTHREAD_STATE(r13)
#endif
power_enter_stop:
/*
* Check if we are executing the lite variant with ESL=EC=0
*/
Expand Down Expand Up @@ -496,18 +489,6 @@ pnv_powersave_wakeup_mce:

b pnv_powersave_wakeup

#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
kvm_start_guest_check:
li r0,KVM_HWTHREAD_IN_KERNEL
stb r0,HSTATE_HWTHREAD_STATE(r13)
/* Order setting hwthread_state vs. testing hwthread_req */
sync
lbz r0,HSTATE_HWTHREAD_REQ(r13)
cmpwi r0,0
beqlr
b kvm_start_guest
#endif

/*
* Called from reset vector for powersave wakeups.
* cr3 - set to gt if waking up with partial/complete hypervisor state loss
Expand All @@ -532,9 +513,15 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mr r3,r12

#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
BEGIN_FTR_SECTION
bl kvm_start_guest_check
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
li r0,KVM_HWTHREAD_IN_KERNEL
stb r0,HSTATE_HWTHREAD_STATE(r13)
/* Order setting hwthread_state vs. testing hwthread_req */
sync
lbz r0,HSTATE_HWTHREAD_REQ(r13)
cmpwi r0,0
beq 1f
b kvm_start_guest
1:
#endif

/* Return SRR1 from power7_nap() */
Expand Down
Loading

0 comments on commit 6d6ab94

Please sign in to comment.