Skip to content

Commit

Permalink
KVM: nVMX: Implement reading and writing of VMX MSRs
Browse files Browse the repository at this point in the history
When the guest can use VMX instructions (when the "nested" module option is
on), it should also be able to read and write VMX MSRs, e.g., to query about
VMX capabilities. This patch adds this support.

Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
  • Loading branch information
Nadav Har'El authored and Avi Kivity committed Jul 12, 2011
1 parent a9d30f3 commit b87a51a
Show file tree
Hide file tree
Showing 2 changed files with 231 additions and 0 deletions.
12 changes: 12 additions & 0 deletions arch/x86/include/asm/msr-index.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,18 @@
#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a
#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d
#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490

/* VMX_BASIC bits and bitmasks */
#define VMX_BASIC_VMCS_SIZE_SHIFT 32
#define VMX_BASIC_64 0x0001000000000000LLU
#define VMX_BASIC_MEM_TYPE_SHIFT 50
#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
#define VMX_BASIC_MEM_TYPE_WB 6LLU
#define VMX_BASIC_INOUT 0x0040000000000000LLU

/* AMD-V MSRs */

Expand Down
219 changes: 219 additions & 0 deletions arch/x86/kvm/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1395,6 +1395,218 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
return nested && guest_cpuid_has_vmx(vcpu);
}

/*
* nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
* returned for the various VMX controls MSRs when nested VMX is enabled.
* The same values should also be used to verify that vmcs12 control fields are
* valid during nested entry from L1 to L2.
* Each of these control msrs has a low and high 32-bit half: A low bit is on
* if the corresponding bit in the (32-bit) control field *must* be on, and a
* bit in the high half is on if the corresponding bit in the control field
* may be on. See also vmx_control_verify().
* TODO: allow these variables to be modified (downgraded) by module options
* or other means.
*/
static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high;
static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high;
static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
static __init void nested_vmx_setup_ctls_msrs(void)
{
/*
* Note that as a general rule, the high half of the MSRs (bits in
* the control fields which may be 1) should be initialized by the
* intersection of the underlying hardware's MSR (i.e., features which
* can be supported) and the list of features we want to expose -
* because they are known to be properly supported in our code.
* Also, usually, the low half of the MSRs (bits which must be 1) can
* be set to 0, meaning that L1 may turn off any of these bits. The
* reason is that if one of these bits is necessary, it will appear
* in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
* fields of vmcs01 and vmcs02, will turn these bits off - and
* nested_vmx_exit_handled() will not pass related exits to L1.
* These rules have exceptions below.
*/

/* pin-based controls */
/*
* According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is
* in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR.
*/
nested_vmx_pinbased_ctls_low = 0x16 ;
nested_vmx_pinbased_ctls_high = 0x16 |
PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
PIN_BASED_VIRTUAL_NMIS;

/* exit controls */
nested_vmx_exit_ctls_low = 0;
#ifdef CONFIG_X86_64
nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE;
#else
nested_vmx_exit_ctls_high = 0;
#endif

/* entry controls */
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
nested_vmx_entry_ctls_low = 0;
nested_vmx_entry_ctls_high &=
VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;

/* cpu-based controls */
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
nested_vmx_procbased_ctls_low = 0;
nested_vmx_procbased_ctls_high &=
CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING |
CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING |
#ifdef CONFIG_X86_64
CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
#endif
CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
/*
* We can allow some features even when not supported by the
* hardware. For example, L1 can specify an MSR bitmap - and we
* can use it to avoid exits to L1 - even when L0 runs L2
* without MSR bitmaps.
*/
nested_vmx_procbased_ctls_high |= CPU_BASED_USE_MSR_BITMAPS;

/* secondary cpu-based controls */
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high);
nested_vmx_secondary_ctls_low = 0;
nested_vmx_secondary_ctls_high &=
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
}

static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
{
/*
* Bits 0 in high must be 0, and bits 1 in low must be 1.
*/
return ((control & high) | low) == control;
}

static inline u64 vmx_control_msr(u32 low, u32 high)
{
return low | ((u64)high << 32);
}

/*
* If we allow our guest to use VMX instructions (i.e., nested VMX), we should
* also let it use VMX-specific MSRs.
* vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a
* VMX-specific MSR, or 0 when we haven't (and the caller should handle it
* like all other MSRs).
*/
static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
{
if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC &&
msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) {
/*
* According to the spec, processors which do not support VMX
* should throw a #GP(0) when VMX capability MSRs are read.
*/
kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
return 1;
}

switch (msr_index) {
case MSR_IA32_FEATURE_CONTROL:
*pdata = 0;
break;
case MSR_IA32_VMX_BASIC:
/*
* This MSR reports some information about VMX support. We
* should return information about the VMX we emulate for the
* guest, and the VMCS structure we give it - not about the
* VMX support of the underlying hardware.
*/
*pdata = VMCS12_REVISION |
((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
(VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
break;
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
case MSR_IA32_VMX_PINBASED_CTLS:
*pdata = vmx_control_msr(nested_vmx_pinbased_ctls_low,
nested_vmx_pinbased_ctls_high);
break;
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
case MSR_IA32_VMX_PROCBASED_CTLS:
*pdata = vmx_control_msr(nested_vmx_procbased_ctls_low,
nested_vmx_procbased_ctls_high);
break;
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
case MSR_IA32_VMX_EXIT_CTLS:
*pdata = vmx_control_msr(nested_vmx_exit_ctls_low,
nested_vmx_exit_ctls_high);
break;
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
case MSR_IA32_VMX_ENTRY_CTLS:
*pdata = vmx_control_msr(nested_vmx_entry_ctls_low,
nested_vmx_entry_ctls_high);
break;
case MSR_IA32_VMX_MISC:
*pdata = 0;
break;
/*
* These MSRs specify bits which the guest must keep fixed (on or off)
* while L1 is in VMXON mode (in L1's root mode, or running an L2).
* We picked the standard core2 setting.
*/
#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
case MSR_IA32_VMX_CR0_FIXED0:
*pdata = VMXON_CR0_ALWAYSON;
break;
case MSR_IA32_VMX_CR0_FIXED1:
*pdata = -1ULL;
break;
case MSR_IA32_VMX_CR4_FIXED0:
*pdata = VMXON_CR4_ALWAYSON;
break;
case MSR_IA32_VMX_CR4_FIXED1:
*pdata = -1ULL;
break;
case MSR_IA32_VMX_VMCS_ENUM:
*pdata = 0x1f;
break;
case MSR_IA32_VMX_PROCBASED_CTLS2:
*pdata = vmx_control_msr(nested_vmx_secondary_ctls_low,
nested_vmx_secondary_ctls_high);
break;
case MSR_IA32_VMX_EPT_VPID_CAP:
/* Currently, no nested ept or nested vpid */
*pdata = 0;
break;
default:
return 0;
}

return 1;
}

static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
{
if (!nested_vmx_allowed(vcpu))
return 0;

if (msr_index == MSR_IA32_FEATURE_CONTROL)
/* TODO: the right thing. */
return 1;
/*
* No need to treat VMX capability MSRs specially: If we don't handle
* them, handle_wrmsr will #GP(0), which is correct (they are readonly)
*/
return 0;
}

/*
* Reads an msr value (of 'msr_index') into 'pdata'.
* Returns 0 on success, non-0 otherwise.
Expand Down Expand Up @@ -1443,6 +1655,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
/* Otherwise falls through */
default:
vmx_load_host_state(to_vmx(vcpu));
if (vmx_get_vmx_msr(vcpu, msr_index, pdata))
return 0;
msr = find_msr_entry(to_vmx(vcpu), msr_index);
if (msr) {
vmx_load_host_state(to_vmx(vcpu));
Expand Down Expand Up @@ -1514,6 +1728,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
return 1;
/* Otherwise falls through */
default:
if (vmx_set_vmx_msr(vcpu, msr_index, data))
break;
msr = find_msr_entry(vmx, msr_index);
if (msr) {
vmx_load_host_state(vmx);
Expand Down Expand Up @@ -1902,6 +2118,9 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_ple())
ple_gap = 0;

if (nested)
nested_vmx_setup_ctls_msrs();

return alloc_kvm_area();
}

Expand Down

0 comments on commit b87a51a

Please sign in to comment.