Skip to content

Commit

Permalink
KVM: nVMX: Allow L1 to use 5-level page walks for nested EPT
Browse files Browse the repository at this point in the history
Add support for 5-level nested EPT, and advertise said support in the
EPT capabilities MSR.  KVM's MMU can already handle 5-level legacy page
tables, there's no reason to force an L1 VMM to use shadow paging if it
wants to employ 5-level page tables.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
Sean Christopherson authored and Paolo Bonzini committed Mar 16, 2020
1 parent 8053f92 commit bb1fcc7
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 12 deletions.
12 changes: 12 additions & 0 deletions arch/x86/include/asm/vmx.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,18 @@ enum vmcs_field {
VMX_EPT_EXECUTABLE_MASK)
#define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT)

static inline u8 vmx_eptp_page_walk_level(u64 eptp)
{
u64 encoded_level = eptp & VMX_EPTP_PWL_MASK;

if (encoded_level == VMX_EPTP_PWL_5)
return 5;

/* @eptp must be pre-validated by the caller. */
WARN_ON_ONCE(encoded_level != VMX_EPTP_PWL_4);
return 4;
}

/* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */
#define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \
VMX_EPT_EXECUTABLE_MASK)
Expand Down
11 changes: 6 additions & 5 deletions arch/x86/kvm/mmu/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -5008,14 +5008,14 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);

static union kvm_mmu_role
kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
bool execonly)
bool execonly, u8 level)
{
union kvm_mmu_role role = {0};

/* SMM flag is inherited from root_mmu */
role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm;

role.base.level = PT64_ROOT_4LEVEL;
role.base.level = level;
role.base.gpte_is_8_bytes = true;
role.base.direct = false;
role.base.ad_disabled = !accessed_dirty;
Expand All @@ -5039,16 +5039,17 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty, gpa_t new_eptp)
{
struct kvm_mmu *context = vcpu->arch.mmu;
u8 level = vmx_eptp_page_walk_level(new_eptp);
union kvm_mmu_role new_role =
kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty,
execonly);
execonly, level);

__kvm_mmu_new_cr3(vcpu, new_eptp, new_role.base, false);

if (new_role.as_u64 == context->mmu_role.as_u64)
return;

context->shadow_root_level = PT64_ROOT_4LEVEL;
context->shadow_root_level = level;

context->nx = true;
context->ept_ad = accessed_dirty;
Expand All @@ -5057,7 +5058,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->sync_page = ept_sync_page;
context->invlpg = ept_invlpg;
context->update_pte = ept_update_pte;
context->root_level = PT64_ROOT_4LEVEL;
context->root_level = level;
context->direct_map = false;
context->mmu_role.as_u64 = new_role.as_u64;

Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/mmu/paging_tmpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
#define PT_GUEST_ACCESSED_SHIFT 8
#define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
#define CMPXCHG cmpxchg64
#define PT_MAX_FULL_LEVELS 4
#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
#else
#error Invalid PTTYPE value
#endif
Expand Down
21 changes: 17 additions & 4 deletions arch/x86/kvm/vmx/nested.c
Original file line number Diff line number Diff line change
Expand Up @@ -2582,9 +2582,19 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
return false;
}

/* only 4 levels page-walk length are valid */
if (CC((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4))
/* Page-walk levels validity. */
switch (address & VMX_EPTP_PWL_MASK) {
case VMX_EPTP_PWL_5:
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT)))
return false;
break;
case VMX_EPTP_PWL_4:
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT)))
return false;
break;
default:
return false;
}

/* Reserved bits should not be set */
if (CC(address >> maxphyaddr || ((address >> 7) & 0x1f)))
Expand Down Expand Up @@ -6119,8 +6129,11 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
/* nested EPT: emulate EPT also to L1 */
msrs->secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_EPT;
msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
msrs->ept_caps =
VMX_EPT_PAGE_WALK_4_BIT |
VMX_EPT_PAGE_WALK_5_BIT |
VMX_EPTP_WB_BIT |
VMX_EPT_INVEPT_BIT;
if (cpu_has_vmx_ept_execute_only())
msrs->ept_caps |=
VMX_EPT_EXECUTE_ONLY_BIT;
Expand Down
3 changes: 1 addition & 2 deletions arch/x86/kvm/vmx/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -2985,9 +2985,8 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)

static int get_ept_level(struct kvm_vcpu *vcpu)
{
/* Nested EPT currently only supports 4-level walks. */
if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
return 4;
return vmx_eptp_page_walk_level(nested_ept_get_cr3(vcpu));
if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
return 5;
return 4;
Expand Down

0 comments on commit bb1fcc7

Please sign in to comment.