Skip to content

Commit

Permalink
KVM: MMU: Reinstate pte prefetch on invlpg
Browse files Browse the repository at this point in the history
Commit fb341f5 removed the pte prefetch on guest invlpg, citing guest races.
However, the SDM is adamant that prefetch is allowed:

  "The processor may create entries in paging-structure caches for
   translations required for prefetches and for accesses that are a
   result of speculative execution that would never actually occur
   in the executed code path."

And, in fact, there was a race in the prefetch code: we picked up the pte
without the mmu lock held, so an older invlpg could install the pte over
a newer invlpg.

Reinstate the prefetch logic, but this time note whether another invlpg has
executed using a counter.  If a race occured, do not install the pte.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
  • Loading branch information
Avi Kivity committed May 17, 2010
1 parent fbc5d13 commit 08e850c
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 14 deletions.
1 change: 1 addition & 0 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ struct kvm_arch {
unsigned int n_free_mmu_pages;
unsigned int n_requested_mmu_pages;
unsigned int n_alloc_mmu_pages;
atomic_t invlpg_counter;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
* Hash table of struct kvm_mmu_page.
Expand Down
37 changes: 23 additions & 14 deletions arch/x86/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2613,37 +2613,46 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
int flooded = 0;
int npte;
int r;
int invlpg_counter;

pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);

switch (bytes) {
case 4:
gentry = *(const u32 *)new;
break;
case 8:
gentry = *(const u64 *)new;
break;
default:
gentry = 0;
break;
}
invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter);

/*
* Assume that the pte write on a page table of the same type
* as the current vcpu paging mode. This is nearly always true
* (might be false while changing modes). Note it is verified later
* by update_pte().
*/
if (is_pae(vcpu) && bytes == 4) {
if ((is_pae(vcpu) && bytes == 4) || !new) {
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
gpa &= ~(gpa_t)7;
r = kvm_read_guest(vcpu->kvm, gpa, &gentry, 8);
if (is_pae(vcpu)) {
gpa &= ~(gpa_t)7;
bytes = 8;
}
r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
if (r)
gentry = 0;
new = (const u8 *)&gentry;
}

switch (bytes) {
case 4:
gentry = *(const u32 *)new;
break;
case 8:
gentry = *(const u64 *)new;
break;
default:
gentry = 0;
break;
}

mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
spin_lock(&vcpu->kvm->mmu_lock);
if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
gentry = 0;
kvm_mmu_access_page(vcpu, gfn);
kvm_mmu_free_some_pages(vcpu);
++vcpu->kvm->stat.mmu_pte_write;
Expand Down
15 changes: 15 additions & 0 deletions arch/x86/kvm/paging_tmpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
{
struct kvm_shadow_walk_iterator iterator;
gpa_t pte_gpa = -1;
int level;
u64 *sptep;
int need_flush = 0;
Expand All @@ -476,6 +477,10 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
if (level == PT_PAGE_TABLE_LEVEL ||
((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
struct kvm_mmu_page *sp = page_header(__pa(sptep));

pte_gpa = (sp->gfn << PAGE_SHIFT);
pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);

if (is_shadow_present_pte(*sptep)) {
rmap_remove(vcpu->kvm, sptep);
Expand All @@ -493,7 +498,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)

if (need_flush)
kvm_flush_remote_tlbs(vcpu->kvm);

atomic_inc(&vcpu->kvm->arch.invlpg_counter);

spin_unlock(&vcpu->kvm->mmu_lock);

if (pte_gpa == -1)
return;

if (mmu_topup_memory_caches(vcpu))
return;
kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0);
}

static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
Expand Down

0 comments on commit 08e850c

Please sign in to comment.