Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull kvm fixes from Paolo Bonzini:
 "ARM:

   - Plug a race in the stage-2 mapping code where the IPA and the PA
     would end up being out of sync

   - Make better use of the bitmap API (bitmap_zero, bitmap_zalloc...)

   - FP/SVE/SME documentation update, in the hope that this field
     becomes clearer...

   - Add workaround for Apple SEIS brokenness to a new SoC

   - Random comment fixes

  x86:

   - add MSR_IA32_TSX_CTRL into msrs_to_save

   - fixes for XCR0 handling in SGX enclaves

  Generic:

   - Fix vcpu_array[0] races

   - Fix race between starting a VM and 'reboot -f'"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: VMX: add MSR_IA32_TSX_CTRL into msrs_to_save
  KVM: x86: Don't adjust guest's CPUID.0x12.1 (allowed SGX enclave XFRM)
  KVM: VMX: Don't rely _only_ on CPUID to enforce XCR0 restrictions for ECREATE
  KVM: Fix vcpu_array[0] races
  KVM: VMX: Fix header file dependency of asm/vmx.h
  KVM: Don't enable hardware after a restart/shutdown is initiated
  KVM: Use syscore_ops instead of reboot_notifier to hook restart/shutdown
  KVM: arm64: vgic: Add Apple M2 PRO/MAX cpus to the list of broken SEIS implementations
  KVM: arm64: Clarify host SME state management
  KVM: arm64: Restructure check for SVE support in FP trap handler
  KVM: arm64: Document check for TIF_FOREIGN_FPSTATE
  KVM: arm64: Fix repeated words in comments
  KVM: arm64: Constify start/end/phys fields of the pgtable walker data
  KVM: arm64: Infer PA offset from VA in hyp map walker
  KVM: arm64: Infer the PA offset from IPA in stage-2 map walker
  KVM: arm64: Use the bitmap API to allocate bitmaps
  KVM: arm64: Slightly optimize flush_context()
  • Loading branch information
Linus Torvalds committed May 21, 2023
2 parents c47d122 + b9846a6 commit a35747c
Show file tree
Hide file tree
Showing 13 changed files with 129 additions and 66 deletions.
8 changes: 8 additions & 0 deletions arch/arm64/include/asm/cputype.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@
#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
#define APPLE_CPU_PART_M2_BLIZZARD 0x032
#define APPLE_CPU_PART_M2_AVALANCHE 0x033
#define APPLE_CPU_PART_M2_BLIZZARD_PRO 0x034
#define APPLE_CPU_PART_M2_AVALANCHE_PRO 0x035
#define APPLE_CPU_PART_M2_BLIZZARD_MAX 0x038
#define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039

#define AMPERE_CPU_PART_AMPERE1 0xAC3

Expand Down Expand Up @@ -181,6 +185,10 @@
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
#define MIDR_APPLE_M2_BLIZZARD_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO)
#define MIDR_APPLE_M2_AVALANCHE_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO)
#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)

/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
Expand Down
1 change: 1 addition & 0 deletions arch/arm64/include/asm/kvm_pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ struct kvm_pgtable_visit_ctx {
kvm_pte_t old;
void *arg;
struct kvm_pgtable_mm_ops *mm_ops;
u64 start;
u64 addr;
u64 end;
u32 level;
Expand Down
26 changes: 17 additions & 9 deletions arch/arm64/kvm/fpsimd.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,26 +81,34 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)

fpsimd_kvm_prepare();

/*
* We will check TIF_FOREIGN_FPSTATE just before entering the
* guest in kvm_arch_vcpu_ctxflush_fp() and override this to
* FP_STATE_FREE if the flag set.
*/
vcpu->arch.fp_state = FP_STATE_HOST_OWNED;

vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
vcpu_set_flag(vcpu, HOST_SVE_ENABLED);

/*
* We don't currently support SME guests but if we leave
* things in streaming mode then when the guest starts running
* FPSIMD or SVE code it may generate SME traps so as a
* special case if we are in streaming mode we force the host
* state to be saved now and exit streaming mode so that we
* don't have to handle any SME traps for valid guest
* operations. Do this for ZA as well for now for simplicity.
*/
if (system_supports_sme()) {
vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
vcpu_set_flag(vcpu, HOST_SME_ENABLED);

/*
* If PSTATE.SM is enabled then save any pending FP
* state and disable PSTATE.SM. If we leave PSTATE.SM
* enabled and the guest does not enable SME via
* CPACR_EL1.SMEN then operations that should be valid
* may generate SME traps from EL1 to EL1 which we
* can't intercept and which would confuse the guest.
*
* Do the same for PSTATE.ZA in the case where there
* is state in the registers which has not already
* been saved, this is very unlikely to happen.
*/
if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
vcpu->arch.fp_state = FP_STATE_FREE;
fpsimd_save_and_flush_cpu_state();
Expand Down
12 changes: 10 additions & 2 deletions arch/arm64/kvm/hyp/include/hyp/switch.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,17 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
sve_guest = vcpu_has_sve(vcpu);
esr_ec = kvm_vcpu_trap_get_class(vcpu);

/* Don't handle SVE traps for non-SVE vcpus here: */
if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD)
/* Only handle traps the vCPU can support here: */
switch (esr_ec) {
case ESR_ELx_EC_FP_ASIMD:
break;
case ESR_ELx_EC_SVE:
if (!sve_guest)
return false;
break;
default:
return false;
}

/* Valid trap. Switch the context: */

Expand Down
41 changes: 32 additions & 9 deletions arch/arm64/kvm/hyp/pgtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@
struct kvm_pgtable_walk_data {
struct kvm_pgtable_walker *walker;

const u64 start;
u64 addr;
u64 end;
const u64 end;
};

static bool kvm_phys_is_valid(u64 phys)
Expand Down Expand Up @@ -201,6 +202,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
.old = READ_ONCE(*ptep),
.arg = data->walker->arg,
.mm_ops = mm_ops,
.start = data->start,
.addr = data->addr,
.end = data->end,
.level = level,
Expand Down Expand Up @@ -293,6 +295,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
struct kvm_pgtable_walker *walker)
{
struct kvm_pgtable_walk_data walk_data = {
.start = ALIGN_DOWN(addr, PAGE_SIZE),
.addr = ALIGN_DOWN(addr, PAGE_SIZE),
.end = PAGE_ALIGN(walk_data.addr + size),
.walker = walker,
Expand Down Expand Up @@ -349,7 +352,7 @@ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
}

struct hyp_map_data {
u64 phys;
const u64 phys;
kvm_pte_t attr;
};

Expand Down Expand Up @@ -407,13 +410,12 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
struct hyp_map_data *data)
{
u64 phys = data->phys + (ctx->addr - ctx->start);
kvm_pte_t new;
u64 granule = kvm_granule_size(ctx->level), phys = data->phys;

if (!kvm_block_mapping_supported(ctx, phys))
return false;

data->phys += granule;
new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level);
if (ctx->old == new)
return true;
Expand Down Expand Up @@ -576,7 +578,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
}

struct stage2_map_data {
u64 phys;
const u64 phys;
kvm_pte_t attr;
u8 owner_id;

Expand Down Expand Up @@ -794,20 +796,43 @@ static bool stage2_pte_executable(kvm_pte_t pte)
return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
}

static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
const struct stage2_map_data *data)
{
u64 phys = data->phys;

/*
* Stage-2 walks to update ownership data are communicated to the map
* walker using an invalid PA. Avoid offsetting an already invalid PA,
* which could overflow and make the address valid again.
*/
if (!kvm_phys_is_valid(phys))
return phys;

/*
* Otherwise, work out the correct PA based on how far the walk has
* gotten.
*/
return phys + (ctx->addr - ctx->start);
}

static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
struct stage2_map_data *data)
{
u64 phys = stage2_map_walker_phys_addr(ctx, data);

if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1)))
return false;

return kvm_block_mapping_supported(ctx, data->phys);
return kvm_block_mapping_supported(ctx, phys);
}

static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
struct stage2_map_data *data)
{
kvm_pte_t new;
u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
u64 phys = stage2_map_walker_phys_addr(ctx, data);
u64 granule = kvm_granule_size(ctx->level);
struct kvm_pgtable *pgt = data->mmu->pgt;
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;

Expand Down Expand Up @@ -841,8 +866,6 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,

stage2_make_pte(ctx, new);

if (kvm_phys_is_valid(phys))
data->phys += granule;
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion arch/arm64/kvm/inject_fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
* Size Fault at level 0, as if exceeding PARange.
*
* Non-LPAE guests will only get the external abort, as there
* is no way to to describe the ASF.
* is no way to describe the ASF.
*/
if (vcpu_el1_is_32bit(vcpu) &&
!(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE))
Expand Down
4 changes: 4 additions & 0 deletions arch/arm64/kvm/vgic/vgic-v3.c
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,10 @@ static const struct midr_range broken_seis[] = {
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
{},
};

Expand Down
7 changes: 3 additions & 4 deletions arch/arm64/kvm/vmid.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ static void flush_context(void)
int cpu;
u64 vmid;

bitmap_clear(vmid_map, 0, NUM_USER_VMIDS);
bitmap_zero(vmid_map, NUM_USER_VMIDS);

for_each_possible_cpu(cpu) {
vmid = atomic64_xchg_relaxed(&per_cpu(active_vmids, cpu), 0);
Expand Down Expand Up @@ -182,8 +182,7 @@ int __init kvm_arm_vmid_alloc_init(void)
*/
WARN_ON(NUM_USER_VMIDS - 1 <= num_possible_cpus());
atomic64_set(&vmid_generation, VMID_FIRST_VERSION);
vmid_map = kcalloc(BITS_TO_LONGS(NUM_USER_VMIDS),
sizeof(*vmid_map), GFP_KERNEL);
vmid_map = bitmap_zalloc(NUM_USER_VMIDS, GFP_KERNEL);
if (!vmid_map)
return -ENOMEM;

Expand All @@ -192,5 +191,5 @@ int __init kvm_arm_vmid_alloc_init(void)

void __init kvm_arm_vmid_alloc_free(void)
{
kfree(vmid_map);
bitmap_free(vmid_map);
}
2 changes: 2 additions & 0 deletions arch/x86/include/asm/vmx.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@


#include <linux/bitops.h>
#include <linux/bug.h>
#include <linux/types.h>

#include <uapi/asm/vmx.h>
#include <asm/vmxfeatures.h>

Expand Down
16 changes: 0 additions & 16 deletions arch/x86/kvm/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,6 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
int nent)
{
struct kvm_cpuid_entry2 *best;
u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent);

best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (best) {
Expand Down Expand Up @@ -292,21 +291,6 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
vcpu->arch.ia32_misc_enable_msr &
MSR_IA32_MISC_ENABLE_MWAIT);
}

/*
* Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
* the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
* requested XCR0 value. The enclave's XFRM must be a subset of XCRO
* at the time of EENTER, thus adjust the allowed XFRM by the guest's
* supported XCR0. Similar to XCR0 handling, FP and SSE are forced to
* '1' even on CPUs that don't support XSAVE.
*/
best = cpuid_entry2_find(entries, nent, 0x12, 0x1);
if (best) {
best->ecx &= guest_supported_xcr0 & 0xffffffff;
best->edx &= guest_supported_xcr0 >> 32;
best->ecx |= XFEATURE_MASK_FPSSE;
}
}

void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
Expand Down
11 changes: 9 additions & 2 deletions arch/x86/kvm/vmx/sgx.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,19 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
return 1;
}

/* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */
/*
* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. Note
* that the allowed XFRM (XFeature Request Mask) isn't strictly bound
* by the supported XCR0. FP+SSE *must* be set in XFRM, even if XSAVE
* is unsupported, i.e. even if XCR0 itself is completely unsupported.
*/
if ((u32)miscselect & ~sgx_12_0->ebx ||
(u32)attributes & ~sgx_12_1->eax ||
(u32)(attributes >> 32) & ~sgx_12_1->ebx ||
(u32)xfrm & ~sgx_12_1->ecx ||
(u32)(xfrm >> 32) & ~sgx_12_1->edx) {
(u32)(xfrm >> 32) & ~sgx_12_1->edx ||
xfrm & ~(vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE) ||
(xfrm & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
kvm_inject_gp(vcpu, 0);
return 1;
}
Expand Down
6 changes: 5 additions & 1 deletion arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -1446,7 +1446,7 @@ static const u32 msrs_to_save_base[] = {
#endif
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
MSR_IA32_SPEC_CTRL,
MSR_IA32_SPEC_CTRL, MSR_IA32_TSX_CTRL,
MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
Expand Down Expand Up @@ -7155,6 +7155,10 @@ static void kvm_probe_msr_to_save(u32 msr_index)
if (!kvm_cpu_cap_has(X86_FEATURE_XFD))
return;
break;
case MSR_IA32_TSX_CTRL:
if (!(kvm_get_arch_capabilities() & ARCH_CAP_TSX_CTRL_MSR))
return;
break;
default:
break;
}
Expand Down
Loading

0 comments on commit a35747c

Please sign in to comment.