Skip to content

Commit

Permalink
KVM: e500: perform hugepage check after looking up the PFN
Browse files Browse the repository at this point in the history
e500 KVM tries to bypass __kvm_faultin_pfn() in order to map VM_PFNMAP
VMAs as huge pages.  This is a Bad Idea because VM_PFNMAP VMAs could
become noncontiguous as a result of callsto remap_pfn_range().

Instead, use the already existing host PTE lookup to retrieve a
valid host-side mapping level after __kvm_faultin_pfn() has
returned.  Then find the largest size that will satisfy the
guest's request while staying within a single host PTE.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
Paolo Bonzini committed Jan 12, 2025
1 parent 03b755b commit 55f4db7
Showing 1 changed file with 69 additions and 109 deletions.
178 changes: 69 additions & 109 deletions arch/powerpc/kvm/e500_mmu_host.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,15 +326,14 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
struct tlbe_ref *ref)
{
struct kvm_memory_slot *slot;
unsigned long pfn = 0; /* silence GCC warning */
unsigned int psize;
unsigned long pfn;
struct page *page = NULL;
unsigned long hva;
int pfnmap = 0;
int tsize = BOOK3E_PAGESZ_4K;
int ret = 0;
unsigned long mmu_seq;
struct kvm *kvm = vcpu_e500->vcpu.kvm;
unsigned long tsize_pages = 0;
pte_t *ptep;
unsigned int wimg = 0;
pgd_t *pgdir;
Expand All @@ -356,111 +355,12 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn);
hva = gfn_to_hva_memslot(slot, gfn);

if (tlbsel == 1) {
struct vm_area_struct *vma;
mmap_read_lock(kvm->mm);

vma = find_vma(kvm->mm, hva);
if (vma && hva >= vma->vm_start &&
(vma->vm_flags & VM_PFNMAP)) {
/*
* This VMA is a physically contiguous region (e.g.
* /dev/mem) that bypasses normal Linux page
* management. Find the overlap between the
* vma and the memslot.
*/

unsigned long start, end;
unsigned long slot_start, slot_end;

pfnmap = 1;
writable = vma->vm_flags & VM_WRITE;

start = vma->vm_pgoff;
end = start +
vma_pages(vma);

pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);

slot_start = pfn - (gfn - slot->base_gfn);
slot_end = slot_start + slot->npages;

if (start < slot_start)
start = slot_start;
if (end > slot_end)
end = slot_end;

tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
MAS1_TSIZE_SHIFT;

/*
* e500 doesn't implement the lowest tsize bit,
* or 1K pages.
*/
tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);

/*
* Now find the largest tsize (up to what the guest
* requested) that will cover gfn, stay within the
* range, and for which gfn and pfn are mutually
* aligned.
*/

for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
unsigned long gfn_start, gfn_end;
tsize_pages = 1UL << (tsize - 2);

gfn_start = gfn & ~(tsize_pages - 1);
gfn_end = gfn_start + tsize_pages;

if (gfn_start + pfn - gfn < start)
continue;
if (gfn_end + pfn - gfn > end)
continue;
if ((gfn & (tsize_pages - 1)) !=
(pfn & (tsize_pages - 1)))
continue;

gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
pfn &= ~(tsize_pages - 1);
break;
}
} else if (vma && hva >= vma->vm_start &&
is_vm_hugetlb_page(vma)) {
unsigned long psize = vma_kernel_pagesize(vma);

tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
MAS1_TSIZE_SHIFT;

/*
* Take the largest page size that satisfies both host
* and guest mapping
*/
tsize = min(__ilog2(psize) - 10, tsize);

/*
* e500 doesn't implement the lowest tsize bit,
* or 1K pages.
*/
tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
}

mmap_read_unlock(kvm->mm);
}

if (likely(!pfnmap)) {
tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT);
pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, &writable, &page);
if (is_error_noslot_pfn(pfn)) {
if (printk_ratelimit())
pr_err("%s: real page not found for gfn %lx\n",
__func__, (long)gfn);
return -EINVAL;
}

/* Align guest and physical address to page map boundaries */
pfn &= ~(tsize_pages - 1);
gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, &writable, &page);
if (is_error_noslot_pfn(pfn)) {
if (printk_ratelimit())
pr_err("%s: real page not found for gfn %lx\n",
__func__, (long)gfn);
return -EINVAL;
}

spin_lock(&kvm->mmu_lock);
Expand All @@ -478,7 +378,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
* can't run hence pfn won't change.
*/
local_irq_save(flags);
ptep = find_linux_pte(pgdir, hva, NULL, NULL);
ptep = find_linux_pte(pgdir, hva, NULL, &psize);
if (ptep) {
pte_t pte = READ_ONCE(*ptep);

Expand All @@ -495,6 +395,66 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
}
local_irq_restore(flags);

if (psize && tlbsel == 1) {
unsigned long psize_pages, tsize_pages;
unsigned long start, end;
unsigned long slot_start, slot_end;

psize_pages = 1UL << (psize - PAGE_SHIFT);
start = pfn & ~(psize_pages - 1);
end = start + psize_pages;

slot_start = pfn - (gfn - slot->base_gfn);
slot_end = slot_start + slot->npages;

if (start < slot_start)
start = slot_start;
if (end > slot_end)
end = slot_end;

tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
MAS1_TSIZE_SHIFT;

/*
* Any page size that doesn't satisfy the host mapping
* will fail the start and end tests.
*/
tsize = min(psize - PAGE_SHIFT + BOOK3E_PAGESZ_4K, tsize);

/*
* e500 doesn't implement the lowest tsize bit,
* or 1K pages.
*/
tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);

/*
* Now find the largest tsize (up to what the guest
* requested) that will cover gfn, stay within the
* range, and for which gfn and pfn are mutually
* aligned.
*/

for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
unsigned long gfn_start, gfn_end;
tsize_pages = 1UL << (tsize - 2);

gfn_start = gfn & ~(tsize_pages - 1);
gfn_end = gfn_start + tsize_pages;

if (gfn_start + pfn - gfn < start)
continue;
if (gfn_end + pfn - gfn > end)
continue;
if ((gfn & (tsize_pages - 1)) !=
(pfn & (tsize_pages - 1)))
continue;

gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
pfn &= ~(tsize_pages - 1);
break;
}
}

kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg, writable);
kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
ref, gvaddr, stlbe);
Expand Down

0 comments on commit 55f4db7

Please sign in to comment.