Skip to content

Commit

Permalink
KVM: PPC: Only get pages when actually needed, not in prepare_memory_…
Browse files Browse the repository at this point in the history
…region()

This removes the code from kvmppc_core_prepare_memory_region() that
looked up the VMA for the region being added and called hva_to_page
to get the pfns for the memory.  We have no guarantee that there will
be anything mapped there at the time of the KVM_SET_USER_MEMORY_REGION
ioctl call; userspace can do that ioctl and then map memory into the
region later.

Instead we defer looking up the pfn for each memory page until it is
needed, which generally means when the guest does an H_ENTER hcall on
the page.  Since we can't call get_user_pages in real mode, if we don't
already have the pfn for the page, kvmppc_h_enter() will return
H_TOO_HARD and we then call kvmppc_virtmode_h_enter() once we get back
to kernel context.  That calls kvmppc_get_guest_page() to get the pfn
for the page, and then calls back to kvmppc_h_enter() to redo the HPTE
insertion.

When the first vcpu starts executing, we need to have the RMO or VRMA
region mapped so that the guest's real mode accesses will work.  Thus
we now have a check in kvmppc_vcpu_run() to see if the RMO/VRMA is set
up and if not, call kvmppc_hv_setup_rma().  It checks if the memslot
starting at guest physical 0 now has RMO memory mapped there; if so it
sets it up for the guest, otherwise on POWER7 it sets up the VRMA.
The function that does that, kvmppc_map_vrma, is now a bit simpler,
as it calls kvmppc_virtmode_h_enter instead of creating the HPTE itself.

Since we are now potentially updating entries in the slot_phys[]
arrays from multiple vcpu threads, we now have a spinlock protecting
those updates to ensure that we don't lose track of any references
to pages.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
  • Loading branch information
Paul Mackerras authored and Avi Kivity committed Mar 5, 2012
1 parent 075295d commit c77162d
Show file tree
Hide file tree
Showing 7 changed files with 290 additions and 160 deletions.
4 changes: 4 additions & 0 deletions arch/powerpc/include/asm/kvm_book3s.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,10 @@ extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
unsigned long *nb_ret);
extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel);
extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel);

extern void kvmppc_entry_trampoline(void);
extern void kvmppc_hv_entry_trampoline(void);
Expand Down
12 changes: 12 additions & 0 deletions arch/powerpc/include/asm/kvm_book3s_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,16 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
return rb;
}

static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
{
/* only handle 4k, 64k and 16M pages for now */
if (!(h & HPTE_V_LARGE))
return 1ul << 12; /* 4k page */
if ((l & 0xf000) == 0x1000 && cpu_has_feature(CPU_FTR_ARCH_206))
return 1ul << 16; /* 64k page */
if ((l & 0xff000) == 0)
return 1ul << 24; /* 16M page */
return 0; /* error */
}

#endif /* __ASM_KVM_BOOK3S_64_H__ */
2 changes: 2 additions & 0 deletions arch/powerpc/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,9 @@ struct kvm_arch {
unsigned long lpcr;
unsigned long rmor;
struct kvmppc_rma_info *rma;
int rma_setup_done;
struct list_head spapr_tce_tables;
spinlock_t slot_phys_lock;
unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
int slot_npages[KVM_MEM_SLOTS_NUM];
unsigned short last_vcpu[NR_CPUS];
Expand Down
4 changes: 2 additions & 2 deletions arch/powerpc/include/asm/kvm_ppc.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ extern long kvmppc_alloc_hpt(struct kvm *kvm);
extern void kvmppc_free_hpt(struct kvm *kvm);
extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
extern void kvmppc_map_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot);
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
Expand Down
130 changes: 106 additions & 24 deletions arch/powerpc/kvm/book3s_64_mmu_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,19 +95,17 @@ void kvmppc_free_hpt(struct kvm *kvm)
free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
}

void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot)
{
struct kvm *kvm = vcpu->kvm;
unsigned long i;
unsigned long npages;
unsigned long pa;
unsigned long *hpte;
unsigned long hash;
unsigned long hp_v, hp_r;
unsigned long addr, hash;
unsigned long porder = kvm->arch.ram_porder;
struct revmap_entry *rev;
unsigned long *physp;
long ret;

physp = kvm->arch.slot_phys[mem->slot];
npages = kvm->arch.slot_npages[mem->slot];
npages = kvm->arch.slot_npages[memslot->id];

/* VRMA can't be > 1TB */
if (npages > 1ul << (40 - porder))
Expand All @@ -117,10 +115,7 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
npages = HPT_NPTEG;

for (i = 0; i < npages; ++i) {
pa = physp[i];
if (!pa)
break;
pa &= PAGE_MASK;
addr = i << porder;
/* can't use hpt_hash since va > 64 bits */
hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
/*
Expand All @@ -130,18 +125,16 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
* is available and use it.
*/
hash = (hash << 3) + 7;
hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 4));
/* HPTE low word - RPN, protection, etc. */
hpte[1] = pa | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
smp_wmb();
hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
hp_v = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
(i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
HPTE_V_LARGE | HPTE_V_VALID;

/* Reverse map info */
rev = &kvm->arch.revmap[hash];
rev->guest_rpte = (i << porder) | HPTE_R_R | HPTE_R_C |
HPTE_R_M | PP_RWXX;
hp_r = addr | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
if (ret != H_SUCCESS) {
pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
addr, ret);
break;
}
}
}

Expand Down Expand Up @@ -178,6 +171,92 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
kvmppc_set_msr(vcpu, MSR_SF | MSR_ME);
}

/*
* This is called to get a reference to a guest page if there isn't
* one already in the kvm->arch.slot_phys[][] arrays.
*/
static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
struct kvm_memory_slot *memslot)
{
unsigned long start;
long np;
struct page *page, *pages[1];
unsigned long *physp;
unsigned long pfn, i;

physp = kvm->arch.slot_phys[memslot->id];
if (!physp)
return -EINVAL;
i = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT);
if (physp[i])
return 0;

page = NULL;
start = gfn_to_hva_memslot(memslot, gfn);

/* Instantiate and get the page we want access to */
np = get_user_pages_fast(start, 1, 1, pages);
if (np != 1)
return -EINVAL;
page = pages[0];

/* Check it's a 16MB page */
if (!PageHead(page) ||
compound_order(page) != (kvm->arch.ram_porder - PAGE_SHIFT)) {
pr_err("page at %lx isn't 16MB (o=%d)\n",
start, compound_order(page));
put_page(page);
return -EINVAL;
}
pfn = page_to_pfn(page);

spin_lock(&kvm->arch.slot_phys_lock);
if (!physp[i])
physp[i] = (pfn << PAGE_SHIFT) | KVMPPC_GOT_PAGE;
else
put_page(page);
spin_unlock(&kvm->arch.slot_phys_lock);

return 0;
}

/*
* We come here on a H_ENTER call from the guest when
* we don't have the requested page pinned already.
*/
long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel)
{
struct kvm *kvm = vcpu->kvm;
unsigned long psize, gpa, gfn;
struct kvm_memory_slot *memslot;
long ret;

psize = hpte_page_size(pteh, ptel);
if (!psize)
return H_PARAMETER;

/* Find the memslot (if any) for this address */
gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
gfn = gpa >> PAGE_SHIFT;
memslot = gfn_to_memslot(kvm, gfn);
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
return H_PARAMETER;
if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0)
return H_PARAMETER;

preempt_disable();
ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel);
preempt_enable();
if (ret == H_TOO_HARD) {
/* this can't happen */
pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
ret = H_RESOURCE; /* or something */
}
return ret;

}

static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *gpte, bool data)
{
Expand All @@ -203,8 +282,11 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
physp += (gfn - memslot->base_gfn) >>
(kvm->arch.ram_porder - PAGE_SHIFT);
pa = *physp;
if (!pa)
return NULL;
if (!pa) {
if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0)
return NULL;
pa = *physp;
}
pfn = pa >> PAGE_SHIFT;
page = pfn_to_page(pfn);
get_page(page);
Expand Down
Loading

0 comments on commit c77162d

Please sign in to comment.