Skip to content

Commit

Permalink
KVM: x86: Use gpa_t for cr2/gpa to fix TDP support on 32-bit KVM
Browse files Browse the repository at this point in the history
Convert a plethora of parameters and variables in the MMU and page fault
flows from type gva_t to gpa_t to properly handle TDP on 32-bit KVM.

Thanks to PSE and PAE paging, 32-bit kernels can access 64-bit physical
addresses.  When TDP is enabled, the fault address is a guest physical
address and thus can be a 64-bit value, even when both KVM and its guest
are using 32-bit virtual addressing, e.g. VMX's VMCS.GUEST_PHYSICAL is a
64-bit field, not a natural width field.

Using a gva_t for the fault address means KVM will incorrectly drop the
upper 32-bits of the GPA.  Ditto for gva_to_gpa() when it is used to
translate L2 GPAs to L1 GPAs.

Opportunistically rename variables and parameters to better reflect the
dual address modes, e.g. use "cr2_or_gpa" for fault addresses and plain
"addr" instead of "vaddr" when the address may be either a GVA or an L2
GPA.  Similarly, use "gpa" in the nonpaging_page_fault() flows to avoid
a confusing "gpa_t gva" declaration; this also sets the stage for a
future patch to combing nonpaging_page_fault() and tdp_page_fault() with
minimal churn.

Sprinkle in a few comments to document flows where an address is known
to be a GVA and thus can be safely truncated to a 32-bit value.  Add
WARNs in kvm_handle_page_fault() and FNAME(gva_to_gpa_nested)() to help
document such cases and detect bugs.

Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
Sean Christopherson authored and Paolo Bonzini committed Jan 8, 2020
1 parent 95145c2 commit 736c291
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 78 deletions.
8 changes: 4 additions & 4 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,12 +378,12 @@ struct kvm_mmu {
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err,
int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err,
bool prefault);
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
struct x86_exception *fault);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
struct x86_exception *exception);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa,
u32 access, struct x86_exception *exception);
gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception);
int (*sync_page)(struct kvm_vcpu *vcpu,
Expand Down Expand Up @@ -1473,7 +1473,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);

int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);

int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
Expand Down
69 changes: 40 additions & 29 deletions arch/x86/kvm/mmu/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -3532,7 +3532,7 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte)
* - true: let the vcpu to access on the same address again.
* - false: let the real page fault path to fix it.
*/
static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int level,
u32 error_code)
{
struct kvm_shadow_walk_iterator iterator;
Expand All @@ -3552,7 +3552,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
do {
u64 new_spte;

for_each_shadow_entry_lockless(vcpu, gva, iterator, spte)
for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte)
if (!is_shadow_present_pte(spte) ||
iterator.level < level)
break;
Expand Down Expand Up @@ -3630,18 +3630,19 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,

} while (true);

trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep,
trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep,
spte, fault_handled);
walk_shadow_page_lockless_end(vcpu);

return fault_handled;
}

static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable);
gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
bool *writable);
static int make_mmu_pages_available(struct kvm_vcpu *vcpu);

static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
static int nonpaging_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
gfn_t gfn, bool prefault)
{
int r;
Expand All @@ -3667,16 +3668,16 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
}

if (fast_page_fault(vcpu, v, level, error_code))
if (fast_page_fault(vcpu, gpa, level, error_code))
return RET_PF_RETRY;

mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();

if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
return RET_PF_RETRY;

if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
if (handle_abnormal_pfn(vcpu, gpa, gfn, pfn, ACC_ALL, &r))
return r;

r = RET_PF_RETRY;
Expand All @@ -3687,7 +3688,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
goto out_unlock;
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
r = __direct_map(vcpu, v, write, map_writable, level, pfn,
r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
prefault, false);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
Expand Down Expand Up @@ -3985,15 +3986,15 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);

static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr,
u32 access, struct x86_exception *exception)
{
if (exception)
exception->error_code = 0;
return vaddr;
}

static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr,
u32 access,
struct x86_exception *exception)
{
Expand Down Expand Up @@ -4153,13 +4154,14 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
walk_shadow_page_lockless_end(vcpu);
}

static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa,
u32 error_code, bool prefault)
{
gfn_t gfn = gva >> PAGE_SHIFT;
gfn_t gfn = gpa >> PAGE_SHIFT;
int r;

pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
/* Note, paging is disabled, ergo gva == gpa. */
pgprintk("%s: gva %lx error %x\n", __func__, gpa, error_code);

if (page_fault_handle_page_track(vcpu, error_code, gfn))
return RET_PF_EMULATE;
Expand All @@ -4171,11 +4173,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));


return nonpaging_map(vcpu, gva & PAGE_MASK,
return nonpaging_map(vcpu, gpa & PAGE_MASK,
error_code, gfn, prefault);
}

static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
gfn_t gfn)
{
struct kvm_arch_async_pf arch;

Expand All @@ -4184,11 +4187,13 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
arch.direct_map = vcpu->arch.mmu->direct_map;
arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu);

return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
return kvm_setup_async_pf(vcpu, cr2_or_gpa,
kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
}

static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable)
gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
bool *writable)
{
struct kvm_memory_slot *slot;
bool async;
Expand All @@ -4208,12 +4213,12 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
return false; /* *pfn has correct page already */

if (!prefault && kvm_can_do_async_pf(vcpu)) {
trace_kvm_try_async_get_page(gva, gfn);
trace_kvm_try_async_get_page(cr2_or_gpa, gfn);
if (kvm_find_async_pf_gfn(vcpu, gfn)) {
trace_kvm_async_pf_doublefault(gva, gfn);
trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn);
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
return true;
} else if (kvm_arch_setup_async_pf(vcpu, gva, gfn))
} else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn))
return true;
}

Expand All @@ -4226,6 +4231,12 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
{
int r = 1;

#ifndef CONFIG_X86_64
/* A 64-bit CR2 should be impossible on 32-bit KVM. */
if (WARN_ON_ONCE(fault_address >> 32))
return -EFAULT;
#endif

vcpu->arch.l1tf_flush_l1d = true;
switch (vcpu->arch.apf.host_apf_reason) {
default:
Expand Down Expand Up @@ -4263,7 +4274,7 @@ check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num);
}

static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
bool prefault)
{
kvm_pfn_t pfn;
Expand Down Expand Up @@ -5520,7 +5531,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
return 0;
}

int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len)
{
int r, emulation_type = 0;
Expand All @@ -5529,18 +5540,18 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
/* With shadow page tables, fault_address contains a GVA or nGPA. */
if (vcpu->arch.mmu->direct_map) {
vcpu->arch.gpa_available = true;
vcpu->arch.gpa_val = cr2;
vcpu->arch.gpa_val = cr2_or_gpa;
}

r = RET_PF_INVALID;
if (unlikely(error_code & PFERR_RSVD_MASK)) {
r = handle_mmio_page_fault(vcpu, cr2, direct);
r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct);
if (r == RET_PF_EMULATE)
goto emulate;
}

if (r == RET_PF_INVALID) {
r = vcpu->arch.mmu->page_fault(vcpu, cr2,
r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa,
lower_32_bits(error_code),
false);
WARN_ON(r == RET_PF_INVALID);
Expand All @@ -5560,7 +5571,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
*/
if (vcpu->arch.mmu->direct_map &&
(error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa));
return 1;
}

Expand All @@ -5575,7 +5586,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
* explicitly shadowing L1's page tables, i.e. unprotecting something
* for L1 isn't going to magically fix whatever issue cause L2 to fail.
*/
if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu))
if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu))
emulation_type = EMULTYPE_ALLOW_RETRY;
emulate:
/*
Expand All @@ -5590,7 +5601,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
return 1;
}

return x86_emulate_instruction(vcpu, cr2, emulation_type, insn,
return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
insn_len);
}
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
Expand Down
25 changes: 16 additions & 9 deletions arch/x86/kvm/mmu/paging_tmpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,11 +291,11 @@ static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
}

/*
* Fetch a guest pte for a guest virtual address
* Fetch a guest pte for a guest virtual address, or for an L2's GPA.
*/
static int FNAME(walk_addr_generic)(struct guest_walker *walker,
struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gva_t addr, u32 access)
gpa_t addr, u32 access)
{
int ret;
pt_element_t pte;
Expand Down Expand Up @@ -496,7 +496,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
}

static int FNAME(walk_addr)(struct guest_walker *walker,
struct kvm_vcpu *vcpu, gva_t addr, u32 access)
struct kvm_vcpu *vcpu, gpa_t addr, u32 access)
{
return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr,
access);
Expand Down Expand Up @@ -611,7 +611,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
* If the guest tries to write a write-protected page, we need to
* emulate this operation, return 1 to indicate this case.
*/
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
struct guest_walker *gw,
int write_fault, int hlevel,
kvm_pfn_t pfn, bool map_writable, bool prefault,
Expand Down Expand Up @@ -765,7 +765,7 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
* Returns: 1 if we need to emulate the instruction, 0 otherwise, or
* a negative value on error.
*/
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
bool prefault)
{
int write_fault = error_code & PFERR_WRITE_MASK;
Expand Down Expand Up @@ -945,33 +945,40 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
spin_unlock(&vcpu->kvm->mmu_lock);
}

static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t addr, u32 access,
struct x86_exception *exception)
{
struct guest_walker walker;
gpa_t gpa = UNMAPPED_GVA;
int r;

r = FNAME(walk_addr)(&walker, vcpu, vaddr, access);
r = FNAME(walk_addr)(&walker, vcpu, addr, access);

if (r) {
gpa = gfn_to_gpa(walker.gfn);
gpa |= vaddr & ~PAGE_MASK;
gpa |= addr & ~PAGE_MASK;
} else if (exception)
*exception = walker.fault;

return gpa;
}

#if PTTYPE != PTTYPE_EPT
static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
/* Note, gva_to_gpa_nested() is only used to translate L2 GVAs. */
static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr,
u32 access,
struct x86_exception *exception)
{
struct guest_walker walker;
gpa_t gpa = UNMAPPED_GVA;
int r;

#ifndef CONFIG_X86_64
/* A 64-bit GVA should be impossible on 32-bit KVM. */
WARN_ON_ONCE(vaddr >> 32);
#endif

r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access);

if (r) {
Expand Down
12 changes: 6 additions & 6 deletions arch/x86/kvm/mmutrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -249,13 +249,13 @@ TRACE_EVENT(

TRACE_EVENT(
fast_page_fault,
TP_PROTO(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code,
u64 *sptep, u64 old_spte, bool retry),
TP_ARGS(vcpu, gva, error_code, sptep, old_spte, retry),
TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry),

TP_STRUCT__entry(
__field(int, vcpu_id)
__field(gva_t, gva)
__field(gpa_t, cr2_or_gpa)
__field(u32, error_code)
__field(u64 *, sptep)
__field(u64, old_spte)
Expand All @@ -265,17 +265,17 @@ TRACE_EVENT(

TP_fast_assign(
__entry->vcpu_id = vcpu->vcpu_id;
__entry->gva = gva;
__entry->cr2_or_gpa = cr2_or_gpa;
__entry->error_code = error_code;
__entry->sptep = sptep;
__entry->old_spte = old_spte;
__entry->new_spte = *sptep;
__entry->retry = retry;
),

TP_printk("vcpu %d gva %lx error_code %s sptep %p old %#llx"
TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx"
" new %llx spurious %d fixed %d", __entry->vcpu_id,
__entry->gva, __print_flags(__entry->error_code, "|",
__entry->cr2_or_gpa, __print_flags(__entry->error_code, "|",
kvm_mmu_trace_pferr_flags), __entry->sptep,
__entry->old_spte, __entry->new_spte,
__spte_satisfied(old_spte), __spte_satisfied(new_spte)
Expand Down
Loading

0 comments on commit 736c291

Please sign in to comment.