Skip to content

Commit

Permalink
KVM: x86/mmu: Consider the hva in mmu_notifier retry
Browse files Browse the repository at this point in the history
Track the range being invalidated by mmu_notifier and skip page fault
retries if the fault address is not affected by the in-progress
invalidation. Handle concurrent invalidations by finding the minimal
range which includes all ranges being invalidated. Although the combined
range may include unrelated addresses and cannot be shrunk as individual
invalidation operations complete, it is unlikely the marginal gains of
proper range tracking are worth the additional complexity.

The primary benefit of this change is the reduction in the likelihood of
extreme latency when handing a page fault due to another thread having
been preempted while modifying host virtual addresses.

Signed-off-by: David Stevens <stevensd@chromium.org>
Message-Id: <20210222024522.1751719-3-stevensd@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
David Stevens authored and Paolo Bonzini committed Feb 22, 2021
1 parent 5f8a7cf commit 4a42d84
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 16 deletions.
2 changes: 1 addition & 1 deletion arch/powerpc/kvm/book3s_64_mmu_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
} else {
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
writing, &write_ok);
writing, &write_ok, NULL);
if (is_error_noslot_pfn(pfn))
return -EFAULT;
page = NULL;
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/kvm/book3s_64_mmu_radix.c
Original file line number Diff line number Diff line change
Expand Up @@ -822,7 +822,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,

/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
writing, upgrade_p);
writing, upgrade_p, NULL);
if (is_error_noslot_pfn(pfn))
return -EFAULT;
page = NULL;
Expand Down
23 changes: 17 additions & 6 deletions arch/x86/kvm/mmu/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2734,6 +2734,13 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
if (sp->role.level > PG_LEVEL_4K)
return;

/*
* If addresses are being invalidated, skip prefetching to avoid
* accidentally prefetching those addresses.
*/
if (unlikely(vcpu->kvm->mmu_notifier_count))
return;

__direct_pte_prefetch(vcpu, sp, sptep);
}

Expand Down Expand Up @@ -3640,8 +3647,8 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
}

static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
bool *writable)
gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva,
bool write, bool *writable)
{
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
bool async;
Expand All @@ -3654,7 +3661,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
}

async = false;
*pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable);
*pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async,
write, writable, hva);
if (!async)
return false; /* *pfn has correct page already */

Expand All @@ -3668,7 +3676,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
return true;
}

*pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable);
*pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL,
write, writable, hva);
return false;
}

Expand All @@ -3681,6 +3690,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
gfn_t gfn = gpa >> PAGE_SHIFT;
unsigned long mmu_seq;
kvm_pfn_t pfn;
hva_t hva;
int r;

if (page_fault_handle_page_track(vcpu, error_code, gfn))
Expand All @@ -3699,7 +3709,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();

if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, &hva,
write, &map_writable))
return RET_PF_RETRY;

if (handle_abnormal_pfn(vcpu, is_tdp ? 0 : gpa, gfn, pfn, ACC_ALL, &r))
Expand All @@ -3712,7 +3723,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
else
write_lock(&vcpu->kvm->mmu_lock);

if (!is_noslot_pfn(pfn) && mmu_notifier_retry(vcpu->kvm, mmu_seq))
if (!is_noslot_pfn(pfn) && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, hva))
goto out_unlock;
r = make_mmu_pages_available(vcpu);
if (r)
Expand Down
14 changes: 11 additions & 3 deletions arch/x86/kvm/mmu/paging_tmpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,13 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
if (sp->role.level > PG_LEVEL_4K)
return;

/*
* If addresses are being invalidated, skip prefetching to avoid
* accidentally prefetching those addresses.
*/
if (unlikely(vcpu->kvm->mmu_notifier_count))
return;

if (sp->role.direct)
return __direct_pte_prefetch(vcpu, sp, sptep);

Expand Down Expand Up @@ -790,6 +797,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
struct guest_walker walker;
int r;
kvm_pfn_t pfn;
hva_t hva;
unsigned long mmu_seq;
bool map_writable, is_self_change_mapping;
int max_level;
Expand Down Expand Up @@ -840,8 +848,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();

if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, write_fault,
&map_writable))
if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, &hva,
write_fault, &map_writable))
return RET_PF_RETRY;

if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
Expand Down Expand Up @@ -869,7 +877,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,

r = RET_PF_RETRY;
write_lock(&vcpu->kvm->mmu_lock);
if (!is_noslot_pfn(pfn) && mmu_notifier_retry(vcpu->kvm, mmu_seq))
if (!is_noslot_pfn(pfn) && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, hva))
goto out_unlock;

kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
Expand Down
25 changes: 24 additions & 1 deletion include/linux/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/bug.h>
#include <linux/minmax.h>
#include <linux/mm.h>
#include <linux/mmu_notifier.h>
#include <linux/preempt.h>
Expand Down Expand Up @@ -506,6 +507,8 @@ struct kvm {
struct mmu_notifier mmu_notifier;
unsigned long mmu_notifier_seq;
long mmu_notifier_count;
unsigned long mmu_notifier_range_start;
unsigned long mmu_notifier_range_end;
#endif
long tlbs_dirty;
struct list_head devices;
Expand Down Expand Up @@ -733,7 +736,7 @@ kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
bool atomic, bool *async, bool write_fault,
bool *writable);
bool *writable, hva_t *hva);

void kvm_release_pfn_clean(kvm_pfn_t pfn);
void kvm_release_pfn_dirty(kvm_pfn_t pfn);
Expand Down Expand Up @@ -1207,6 +1210,26 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
return 1;
return 0;
}

static inline int mmu_notifier_retry_hva(struct kvm *kvm,
unsigned long mmu_seq,
unsigned long hva)
{
lockdep_assert_held(&kvm->mmu_lock);
/*
* If mmu_notifier_count is non-zero, then the range maintained by
* kvm_mmu_notifier_invalidate_range_start contains all addresses that
* might be being invalidated. Note that it may include some false
* positives, due to shortcuts when handing concurrent invalidations.
*/
if (unlikely(kvm->mmu_notifier_count) &&
hva >= kvm->mmu_notifier_range_start &&
hva < kvm->mmu_notifier_range_end)
return 1;
if (kvm->mmu_notifier_seq != mmu_seq)
return 1;
return 0;
}
#endif

#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
Expand Down
29 changes: 25 additions & 4 deletions virt/kvm/kvm_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,24 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
* count is also read inside the mmu_lock critical section.
*/
kvm->mmu_notifier_count++;
if (likely(kvm->mmu_notifier_count == 1)) {
kvm->mmu_notifier_range_start = range->start;
kvm->mmu_notifier_range_end = range->end;
} else {
/*
* Fully tracking multiple concurrent ranges has dimishing
* returns. Keep things simple and just find the minimal range
* which includes the current and new ranges. As there won't be
* enough information to subtract a range after its invalidate
* completes, any ranges invalidated concurrently will
* accumulate and persist until all outstanding invalidates
* complete.
*/
kvm->mmu_notifier_range_start =
min(kvm->mmu_notifier_range_start, range->start);
kvm->mmu_notifier_range_end =
max(kvm->mmu_notifier_range_end, range->end);
}
need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end,
range->flags);
/* we've to flush the tlb before the pages can be freed */
Expand Down Expand Up @@ -2023,10 +2041,13 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,

kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
bool atomic, bool *async, bool write_fault,
bool *writable)
bool *writable, hva_t *hva)
{
unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);

if (hva)
*hva = addr;

if (addr == KVM_HVA_ERR_RO_BAD) {
if (writable)
*writable = false;
Expand Down Expand Up @@ -2054,19 +2075,19 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
bool *writable)
{
return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL,
write_fault, writable);
write_fault, writable, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);

kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
{
return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);

kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
{
return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);

Expand Down

0 comments on commit 4a42d84

Please sign in to comment.