Skip to content

Commit

Permalink
KVM: MMU: Don't assume struct page for x86
Browse files Browse the repository at this point in the history
This patch introduces a gfn_to_pfn() function and corresponding functions like
kvm_release_pfn_dirty().  Using these new functions, we can modify the x86
MMU to no longer assume that it can always get a struct page for any given gfn.

We don't want to eliminate gfn_to_page() entirely because a number of places
assume they can do gfn_to_page() and then kmap() the results.  When we support
IO memory, gfn_to_page() will fail for IO pages although gfn_to_pfn() will
succeed.

This does not implement support for avoiding reference counting for reserved
RAM or for IO memory.  However, it should make those things pretty straight
forward.

Since we're only introducing new common symbols, I don't think it will break
the non-x86 architectures but I haven't tested those.  I've tested Intel,
AMD, NPT, and hugetlbfs with Windows and Linux guests.

[avi: fix overflow when shifting left pfns by adding casts]

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
  • Loading branch information
Anthony Liguori authored and Avi Kivity committed Apr 27, 2008
1 parent fdae862 commit 35149e2
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 68 deletions.
89 changes: 43 additions & 46 deletions arch/x86/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,9 @@ static int is_rmap_pte(u64 pte)
return is_shadow_present_pte(pte);
}

static struct page *spte_to_page(u64 pte)
static pfn_t spte_to_pfn(u64 pte)
{
hfn_t hfn = (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;

return pfn_to_page(hfn);
return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
}

static gfn_t pse36_gfn_delta(u32 gpte)
Expand Down Expand Up @@ -541,20 +539,20 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
struct kvm_rmap_desc *desc;
struct kvm_rmap_desc *prev_desc;
struct kvm_mmu_page *sp;
struct page *page;
pfn_t pfn;
unsigned long *rmapp;
int i;

if (!is_rmap_pte(*spte))
return;
sp = page_header(__pa(spte));
page = spte_to_page(*spte);
pfn = spte_to_pfn(*spte);
if (*spte & PT_ACCESSED_MASK)
mark_page_accessed(page);
kvm_set_pfn_accessed(pfn);
if (is_writeble_pte(*spte))
kvm_release_page_dirty(page);
kvm_release_pfn_dirty(pfn);
else
kvm_release_page_clean(page);
kvm_release_pfn_clean(pfn);
rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte));
if (!*rmapp) {
printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
Expand Down Expand Up @@ -635,11 +633,11 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
spte = rmap_next(kvm, rmapp, spte);
}
if (write_protected) {
struct page *page;
pfn_t pfn;

spte = rmap_next(kvm, rmapp, NULL);
page = spte_to_page(*spte);
SetPageDirty(page);
pfn = spte_to_pfn(*spte);
kvm_set_pfn_dirty(pfn);
}

/* check for huge page mappings */
Expand Down Expand Up @@ -1036,7 +1034,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty,
int *ptwrite, int largepage, gfn_t gfn,
struct page *page, bool speculative)
pfn_t pfn, bool speculative)
{
u64 spte;
int was_rmapped = 0;
Expand All @@ -1058,10 +1056,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,

child = page_header(pte & PT64_BASE_ADDR_MASK);
mmu_page_remove_parent_pte(child, shadow_pte);
} else if (page != spte_to_page(*shadow_pte)) {
} else if (pfn != spte_to_pfn(*shadow_pte)) {
pgprintk("hfn old %lx new %lx\n",
page_to_pfn(spte_to_page(*shadow_pte)),
page_to_pfn(page));
spte_to_pfn(*shadow_pte), pfn);
rmap_remove(vcpu->kvm, shadow_pte);
} else {
if (largepage)
Expand Down Expand Up @@ -1090,7 +1087,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
if (largepage)
spte |= PT_PAGE_SIZE_MASK;

spte |= page_to_phys(page);
spte |= (u64)pfn << PAGE_SHIFT;

if ((pte_access & ACC_WRITE_MASK)
|| (write_fault && !is_write_protection(vcpu) && !user_fault)) {
Expand Down Expand Up @@ -1135,12 +1132,12 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
if (!was_rmapped) {
rmap_add(vcpu, shadow_pte, gfn, largepage);
if (!is_rmap_pte(*shadow_pte))
kvm_release_page_clean(page);
kvm_release_pfn_clean(pfn);
} else {
if (was_writeble)
kvm_release_page_dirty(page);
kvm_release_pfn_dirty(pfn);
else
kvm_release_page_clean(page);
kvm_release_pfn_clean(pfn);
}
if (!ptwrite || !*ptwrite)
vcpu->arch.last_pte_updated = shadow_pte;
Expand All @@ -1151,7 +1148,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
}

static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
int largepage, gfn_t gfn, struct page *page,
int largepage, gfn_t gfn, pfn_t pfn,
int level)
{
hpa_t table_addr = vcpu->arch.mmu.root_hpa;
Expand All @@ -1166,13 +1163,13 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,

if (level == 1) {
mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
0, write, 1, &pt_write, 0, gfn, page, false);
0, write, 1, &pt_write, 0, gfn, pfn, false);
return pt_write;
}

if (largepage && level == 2) {
mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
0, write, 1, &pt_write, 1, gfn, page, false);
0, write, 1, &pt_write, 1, gfn, pfn, false);
return pt_write;
}

Expand All @@ -1187,7 +1184,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1, ACC_ALL, &table[index]);
if (!new_table) {
pgprintk("nonpaging_map: ENOMEM\n");
kvm_release_page_clean(page);
kvm_release_pfn_clean(pfn);
return -ENOMEM;
}

Expand All @@ -1202,27 +1199,26 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
{
int r;
int largepage = 0;

struct page *page;
pfn_t pfn;

down_read(&current->mm->mmap_sem);
if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
gfn &= ~(KVM_PAGES_PER_HPAGE-1);
largepage = 1;
}

page = gfn_to_page(vcpu->kvm, gfn);
pfn = gfn_to_pfn(vcpu->kvm, gfn);
up_read(&current->mm->mmap_sem);

/* mmio */
if (is_error_page(page)) {
kvm_release_page_clean(page);
if (is_error_pfn(pfn)) {
kvm_release_pfn_clean(pfn);
return 1;
}

spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
r = __direct_map(vcpu, v, write, largepage, gfn, page,
r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
PT32E_ROOT_LEVEL);
spin_unlock(&vcpu->kvm->mmu_lock);

Expand Down Expand Up @@ -1355,7 +1351,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
u32 error_code)
{
struct page *page;
pfn_t pfn;
int r;
int largepage = 0;
gfn_t gfn = gpa >> PAGE_SHIFT;
Expand All @@ -1372,16 +1368,16 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
gfn &= ~(KVM_PAGES_PER_HPAGE-1);
largepage = 1;
}
page = gfn_to_page(vcpu->kvm, gfn);
pfn = gfn_to_pfn(vcpu->kvm, gfn);
up_read(&current->mm->mmap_sem);
if (is_error_page(page)) {
kvm_release_page_clean(page);
if (is_error_pfn(pfn)) {
kvm_release_pfn_clean(pfn);
return 1;
}
spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
largepage, gfn, page, TDP_ROOT_LEVEL);
largepage, gfn, pfn, TDP_ROOT_LEVEL);
spin_unlock(&vcpu->kvm->mmu_lock);

return r;
Expand Down Expand Up @@ -1525,6 +1521,8 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu)

static int init_kvm_mmu(struct kvm_vcpu *vcpu)
{
vcpu->arch.update_pte.pfn = bad_pfn;

if (tdp_enabled)
return init_kvm_tdp_mmu(vcpu);
else
Expand Down Expand Up @@ -1644,7 +1642,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
gfn_t gfn;
int r;
u64 gpte = 0;
struct page *page;
pfn_t pfn;

vcpu->arch.update_pte.largepage = 0;

Expand Down Expand Up @@ -1680,15 +1678,15 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
gfn &= ~(KVM_PAGES_PER_HPAGE-1);
vcpu->arch.update_pte.largepage = 1;
}
page = gfn_to_page(vcpu->kvm, gfn);
pfn = gfn_to_pfn(vcpu->kvm, gfn);
up_read(&current->mm->mmap_sem);

if (is_error_page(page)) {
kvm_release_page_clean(page);
if (is_error_pfn(pfn)) {
kvm_release_pfn_clean(pfn);
return;
}
vcpu->arch.update_pte.gfn = gfn;
vcpu->arch.update_pte.page = page;
vcpu->arch.update_pte.pfn = pfn;
}

void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
Expand Down Expand Up @@ -1793,9 +1791,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
}
kvm_mmu_audit(vcpu, "post pte write");
spin_unlock(&vcpu->kvm->mmu_lock);
if (vcpu->arch.update_pte.page) {
kvm_release_page_clean(vcpu->arch.update_pte.page);
vcpu->arch.update_pte.page = NULL;
if (!is_error_pfn(vcpu->arch.update_pte.pfn)) {
kvm_release_pfn_clean(vcpu->arch.update_pte.pfn);
vcpu->arch.update_pte.pfn = bad_pfn;
}
}

Expand Down Expand Up @@ -2236,8 +2234,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
audit_mappings_page(vcpu, ent, va, level - 1);
} else {
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
struct page *page = gpa_to_page(vcpu, gpa);
hpa_t hpa = page_to_phys(page);
hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT;

if (is_shadow_present_pte(ent)
&& (ent & PT64_BASE_ADDR_MASK) != hpa)
Expand All @@ -2250,7 +2247,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
&& !is_error_hpa(hpa))
printk(KERN_ERR "audit: (%s) notrap shadow,"
" valid guest gva %lx\n", audit_msg, va);
kvm_release_page_clean(page);
kvm_release_pfn_clean(pfn);

}
}
Expand Down
26 changes: 13 additions & 13 deletions arch/x86/kvm/paging_tmpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
{
pt_element_t gpte;
unsigned pte_access;
struct page *npage;
pfn_t pfn;
int largepage = vcpu->arch.update_pte.largepage;

gpte = *(const pt_element_t *)pte;
Expand All @@ -260,13 +260,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
return;
npage = vcpu->arch.update_pte.page;
if (!npage)
pfn = vcpu->arch.update_pte.pfn;
if (is_error_pfn(pfn))
return;
get_page(npage);
kvm_get_pfn(pfn);
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
npage, true);
pfn, true);
}

/*
Expand All @@ -275,7 +275,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *walker,
int user_fault, int write_fault, int largepage,
int *ptwrite, struct page *page)
int *ptwrite, pfn_t pfn)
{
hpa_t shadow_addr;
int level;
Expand Down Expand Up @@ -336,7 +336,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
walker->pte_gpa[level - 2],
&curr_pte, sizeof(curr_pte));
if (r || curr_pte != walker->ptes[level - 2]) {
kvm_release_page_clean(page);
kvm_release_pfn_clean(pfn);
return NULL;
}
}
Expand All @@ -349,7 +349,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
user_fault, write_fault,
walker->ptes[walker->level-1] & PT_DIRTY_MASK,
ptwrite, largepage, walker->gfn, page, false);
ptwrite, largepage, walker->gfn, pfn, false);

return shadow_ent;
}
Expand Down Expand Up @@ -378,7 +378,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
u64 *shadow_pte;
int write_pt = 0;
int r;
struct page *page;
pfn_t pfn;
int largepage = 0;

pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
Expand Down Expand Up @@ -413,20 +413,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
largepage = 1;
}
}
page = gfn_to_page(vcpu->kvm, walker.gfn);
pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
up_read(&current->mm->mmap_sem);

/* mmio */
if (is_error_page(page)) {
if (is_error_pfn(pfn)) {
pgprintk("gfn %x is mmio\n", walker.gfn);
kvm_release_page_clean(page);
kvm_release_pfn_clean(pfn);
return 1;
}

spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
largepage, &write_pt, page);
largepage, &write_pt, pfn);

pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
shadow_pte, *shadow_pte, write_pt);
Expand Down
4 changes: 2 additions & 2 deletions include/asm-x86/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,8 @@ struct kvm_vcpu_arch {
u64 *last_pte_updated;

struct {
gfn_t gfn; /* presumed gfn during guest pte update */
struct page *page; /* page corresponding to that gfn */
gfn_t gfn; /* presumed gfn during guest pte update */
pfn_t pfn; /* pfn corresponding to that gfn */
int largepage;
} update_pte;

Expand Down
12 changes: 12 additions & 0 deletions include/linux/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,10 @@ static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);

extern struct page *bad_page;
extern pfn_t bad_pfn;

int is_error_page(struct page *page);
int is_error_pfn(pfn_t pfn);
int kvm_is_error_hva(unsigned long addr);
int kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
Expand All @@ -168,6 +170,16 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
void kvm_release_page_clean(struct page *page);
void kvm_release_page_dirty(struct page *page);
void kvm_set_page_dirty(struct page *page);
void kvm_set_page_accessed(struct page *page);

pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
void kvm_release_pfn_dirty(pfn_t);
void kvm_release_pfn_clean(pfn_t pfn);
void kvm_set_pfn_dirty(pfn_t pfn);
void kvm_set_pfn_accessed(pfn_t pfn);
void kvm_get_pfn(pfn_t pfn);

int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
int len);
int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
Expand Down
Loading

0 comments on commit 35149e2

Please sign in to comment.