diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 2b52eb77e29c..5fe84f2427b5 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1000,6 +1000,10 @@ blobs in userspace. When the guest writes the MSR, kvm copies one page of a blob (32- or 64-bit, depending on the vcpu mode) to guest memory. +The MSR index must be in the range [0x40000000, 0x4fffffff], i.e. must reside +in the range that is unofficially reserved for use by hypervisors. The min/max +values are enumerated via KVM_XEN_MSR_MIN_INDEX and KVM_XEN_MSR_MAX_INDEX. + :: struct kvm_xen_hvm_config { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 14c6a63b0de7..d881e7d276b1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1192,6 +1192,8 @@ struct kvm_xen { struct gfn_to_pfn_cache shinfo_cache; struct idr evtchn_ports; unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + + struct kvm_xen_hvm_config hvm_config; }; #endif @@ -1412,8 +1414,6 @@ struct kvm_arch { struct delayed_work kvmclock_update_work; struct delayed_work kvmclock_sync_work; - struct kvm_xen_hvm_config xen_hvm_config; - /* reads protected by irq_srcu, writes by irq_lock */ struct hlist_head mask_notifier_list; diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 9e75da97bce0..460306b35a4b 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -559,6 +559,9 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) #define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) +#define KVM_XEN_MSR_MIN_INDEX 0x40000000u +#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index f863a6f93a1a..5e4d4934c0d3 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -2008,6 +2008,22 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, } else if (function == 0x80000007) { if (kvm_hv_invtsc_suppressed(vcpu)) *edx &= ~feature_bit(CONSTANT_TSC); + } else if (IS_ENABLED(CONFIG_KVM_XEN) && + kvm_xen_is_tsc_leaf(vcpu, function)) { + /* + * Update guest TSC frequency information if necessary. + * Ignore failures, there is no sane value that can be + * provided if KVM can't get the TSC frequency. + */ + if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) + kvm_guest_time_update(vcpu); + + if (index == 1) { + *ecx = vcpu->arch.pvclock_tsc_mul; + *edx = vcpu->arch.pvclock_tsc_shift; + } else if (index == 2) { + *eax = vcpu->arch.hw_tsc_khz; + } } } else { *eax = *ebx = *ecx = *edx = 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1d809ff292fd..69c20a68a3f0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3169,7 +3169,7 @@ static void kvm_setup_guest_pvclock(struct pvclock_vcpu_time_info *ref_hv_clock, trace_kvm_pvclock_update(vcpu->vcpu_id, &hv_clock); } -static int kvm_guest_time_update(struct kvm_vcpu *v) +int kvm_guest_time_update(struct kvm_vcpu *v) { struct pvclock_vcpu_time_info hv_clock = {}; unsigned long flags, tgt_tsc_khz; @@ -3242,7 +3242,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) &vcpu->pvclock_tsc_shift, &vcpu->pvclock_tsc_mul); vcpu->hw_tsc_khz = tgt_tsc_khz; - kvm_xen_update_tsc_info(v); } hv_clock.tsc_shift = vcpu->pvclock_tsc_shift; @@ -3282,7 +3281,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) * * Note! Clear TSC_STABLE only for Xen clocks, i.e. the order matters! */ - if (ka->xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE) + if (ka->xen.hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE) hv_clock.flags &= ~PVCLOCK_TSC_STABLE_BIT; if (vcpu->xen.vcpu_info_cache.active) @@ -3745,7 +3744,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) u32 msr = msr_info->index; u64 data = msr_info->data; - if (msr && msr == vcpu->kvm->arch.xen_hvm_config.msr) + /* + * Do not allow host-initiated writes to trigger the Xen hypercall + * page setup; it could incur locking paths which are not expected + * if userspace sets the MSR in an unusual location. + */ + if (kvm_xen_is_hypercall_page_msr(vcpu->kvm, msr) && + !msr_info->host_initiated) return kvm_xen_write_hypercall_page(vcpu, data); switch (msr) { diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 5c1fd5230cee..9dc32a409076 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -369,6 +369,7 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); u64 get_kvmclock_ns(struct kvm *kvm); uint64_t kvm_get_wall_clock_epoch(struct kvm *kvm); bool kvm_get_monotonic_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp); +int kvm_guest_time_update(struct kvm_vcpu *v); int kvm_read_guest_virt(struct kvm_vcpu *vcpu, gva_t addr, void *val, unsigned int bytes, diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 0e6740ebd6db..7449be30d701 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1335,10 +1335,10 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) * Note, truncation is a non-issue as 'lm' is guaranteed to be * false for a 32-bit kernel, i.e. when hva_t is only 4 bytes. */ - hva_t blob_addr = lm ? kvm->arch.xen_hvm_config.blob_addr_64 - : kvm->arch.xen_hvm_config.blob_addr_32; - u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 - : kvm->arch.xen_hvm_config.blob_size_32; + hva_t blob_addr = lm ? kvm->arch.xen.hvm_config.blob_addr_64 + : kvm->arch.xen.hvm_config.blob_addr_32; + u8 blob_size = lm ? kvm->arch.xen.hvm_config.blob_size_64 + : kvm->arch.xen.hvm_config.blob_size_32; u8 *page; int ret; @@ -1379,15 +1379,24 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) xhc->blob_size_32 || xhc->blob_size_64)) return -EINVAL; + /* + * Restrict the MSR to the range that is unofficially reserved for + * synthetic, virtualization-defined MSRs, e.g. to prevent confusing + * KVM by colliding with a real MSR that requires special handling. + */ + if (xhc->msr && + (xhc->msr < KVM_XEN_MSR_MIN_INDEX || xhc->msr > KVM_XEN_MSR_MAX_INDEX)) + return -EINVAL; + mutex_lock(&kvm->arch.xen.xen_lock); - if (xhc->msr && !kvm->arch.xen_hvm_config.msr) + if (xhc->msr && !kvm->arch.xen.hvm_config.msr) static_branch_inc(&kvm_xen_enabled.key); - else if (!xhc->msr && kvm->arch.xen_hvm_config.msr) + else if (!xhc->msr && kvm->arch.xen.hvm_config.msr) static_branch_slow_dec_deferred(&kvm_xen_enabled); - old_flags = kvm->arch.xen_hvm_config.flags; - memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc)); + old_flags = kvm->arch.xen.hvm_config.flags; + memcpy(&kvm->arch.xen.hvm_config, xhc, sizeof(*xhc)); mutex_unlock(&kvm->arch.xen.xen_lock); @@ -1468,7 +1477,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, int i; if (!lapic_in_kernel(vcpu) || - !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)) + !(vcpu->kvm->arch.xen.hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)) return false; if (IS_ENABLED(CONFIG_64BIT) && !longmode) { @@ -2302,29 +2311,6 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) del_timer_sync(&vcpu->arch.xen.poll_timer); } -void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *entry; - u32 function; - - if (!vcpu->arch.xen.cpuid.base) - return; - - function = vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3); - if (function > vcpu->arch.xen.cpuid.limit) - return; - - entry = kvm_find_cpuid_entry_index(vcpu, function, 1); - if (entry) { - entry->ecx = vcpu->arch.pvclock_tsc_mul; - entry->edx = vcpu->arch.pvclock_tsc_shift; - } - - entry = kvm_find_cpuid_entry_index(vcpu, function, 2); - if (entry) - entry->eax = vcpu->arch.hw_tsc_khz; -} - void kvm_xen_init_vm(struct kvm *kvm) { mutex_init(&kvm->arch.xen.xen_lock); @@ -2346,6 +2332,6 @@ void kvm_xen_destroy_vm(struct kvm *kvm) } idr_destroy(&kvm->arch.xen.evtchn_ports); - if (kvm->arch.xen_hvm_config.msr) + if (kvm->arch.xen.hvm_config.msr) static_branch_slow_dec_deferred(&kvm_xen_enabled); } diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index f5841d9000ae..59e6128a7bd3 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -9,6 +9,7 @@ #ifndef __ARCH_X86_KVM_XEN_H__ #define __ARCH_X86_KVM_XEN_H__ +#include #include #ifdef CONFIG_KVM_XEN @@ -35,7 +36,6 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, int kvm_xen_setup_evtchn(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); -void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu); static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu) { @@ -50,16 +50,32 @@ static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu) kvm_xen_inject_vcpu_vector(vcpu); } +static inline bool kvm_xen_is_tsc_leaf(struct kvm_vcpu *vcpu, u32 function) +{ + return static_branch_unlikely(&kvm_xen_enabled.key) && + vcpu->arch.xen.cpuid.base && + function <= vcpu->arch.xen.cpuid.limit && + function == (vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3)); +} + static inline bool kvm_xen_msr_enabled(struct kvm *kvm) { return static_branch_unlikely(&kvm_xen_enabled.key) && - kvm->arch.xen_hvm_config.msr; + kvm->arch.xen.hvm_config.msr; +} + +static inline bool kvm_xen_is_hypercall_page_msr(struct kvm *kvm, u32 msr) +{ + if (!static_branch_unlikely(&kvm_xen_enabled.key)) + return false; + + return msr && msr == kvm->arch.xen.hvm_config.msr; } static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm) { return static_branch_unlikely(&kvm_xen_enabled.key) && - (kvm->arch.xen_hvm_config.flags & + (kvm->arch.xen.hvm_config.flags & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL); } @@ -124,6 +140,11 @@ static inline bool kvm_xen_msr_enabled(struct kvm *kvm) return false; } +static inline bool kvm_xen_is_hypercall_page_msr(struct kvm *kvm, u32 msr) +{ + return false; +} + static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm) { return false; @@ -157,8 +178,9 @@ static inline bool kvm_xen_timer_enabled(struct kvm_vcpu *vcpu) return false; } -static inline void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu) +static inline bool kvm_xen_is_tsc_leaf(struct kvm_vcpu *vcpu, u32 function) { + return false; } #endif