Skip to content

Commit

Permalink
KVM: x86/mmu: Avoid unnecessary flush on eager page split
Browse files Browse the repository at this point in the history
The TLB flush before installing the newly-populated lower level
page table is unnecessary if the lower-level page table maps
the huge page identically.  KVM knows it is if it did not reuse
an existing shadow page table, tell drop_large_spte() to skip
the flush in that case.

Extracted from a patch by David Matlack.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
Paolo Bonzini committed Jun 24, 2022
1 parent ada51a9 commit 0378739
Showing 1 changed file with 20 additions and 12 deletions.
32 changes: 20 additions & 12 deletions arch/x86/kvm/mmu/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1135,15 +1135,17 @@ static void drop_spte(struct kvm *kvm, u64 *sptep)
rmap_remove(kvm, sptep);
}

static void drop_large_spte(struct kvm *kvm, u64 *sptep)
static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush)
{
struct kvm_mmu_page *sp;

sp = sptep_to_sp(sptep);
WARN_ON(sp->role.level == PG_LEVEL_4K);

drop_spte(kvm, sptep);
kvm_flush_remote_tlbs_with_address(kvm, sp->gfn,

if (flush)
kvm_flush_remote_tlbs_with_address(kvm, sp->gfn,
KVM_PAGES_PER_HPAGE(sp->role.level));
}

Expand Down Expand Up @@ -2283,18 +2285,19 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)

static void __link_shadow_page(struct kvm *kvm,
struct kvm_mmu_memory_cache *cache, u64 *sptep,
struct kvm_mmu_page *sp)
struct kvm_mmu_page *sp, bool flush)
{
u64 spte;

BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);

/*
* If an SPTE is present already, it must be a leaf and therefore
* a large one. Drop it and flush the TLB before installing sp.
* a large one. Drop it, and flush the TLB if needed, before
* installing sp.
*/
if (is_shadow_present_pte(*sptep))
drop_large_spte(kvm, sptep);
drop_large_spte(kvm, sptep, flush);

spte = make_nonleaf_spte(sp->spt, sp_ad_disabled(sp));

Expand All @@ -2309,7 +2312,7 @@ static void __link_shadow_page(struct kvm *kvm,
static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
struct kvm_mmu_page *sp)
{
__link_shadow_page(vcpu->kvm, &vcpu->arch.mmu_pte_list_desc_cache, sptep, sp);
__link_shadow_page(vcpu->kvm, &vcpu->arch.mmu_pte_list_desc_cache, sptep, sp, true);
}

static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
Expand Down Expand Up @@ -6170,6 +6173,7 @@ static void shadow_mmu_split_huge_page(struct kvm *kvm,
struct kvm_mmu_memory_cache *cache = &kvm->arch.split_desc_cache;
u64 huge_spte = READ_ONCE(*huge_sptep);
struct kvm_mmu_page *sp;
bool flush = false;
u64 *sptep, spte;
gfn_t gfn;
int index;
Expand All @@ -6187,20 +6191,24 @@ static void shadow_mmu_split_huge_page(struct kvm *kvm,
* gfn-to-pfn translation since the SP is direct, so no need to
* modify them.
*
* If a given SPTE points to a lower level page table, installing
* such SPTEs would effectively unmap a potion of the huge page.
* This is not an issue because __link_shadow_page() flushes the TLB
* when the passed sp replaces a large SPTE.
* However, if a given SPTE points to a lower level page table,
* that lower level page table may only be partially populated.
* Installing such SPTEs would effectively unmap a potion of the
* huge page. Unmapping guest memory always requires a TLB flush
* since a subsequent operation on the unmapped regions would
* fail to detect the need to flush.
*/
if (is_shadow_present_pte(*sptep))
if (is_shadow_present_pte(*sptep)) {
flush |= !is_last_spte(*sptep, sp->role.level);
continue;
}

spte = make_huge_page_split_spte(kvm, huge_spte, sp->role, index);
mmu_spte_set(sptep, spte);
__rmap_add(kvm, cache, slot, sptep, gfn, sp->role.access);
}

__link_shadow_page(kvm, cache, huge_sptep, sp);
__link_shadow_page(kvm, cache, huge_sptep, sp, flush);
}

static int shadow_mmu_try_split_huge_page(struct kvm *kvm,
Expand Down

0 comments on commit 0378739

Please sign in to comment.