Skip to content

Commit

Permalink
KVM: PPC: Book3s HV: Maintain separate guest and host views of R and …
Browse files Browse the repository at this point in the history
…C bits

This allows both the guest and the host to use the referenced (R) and
changed (C) bits in the guest hashed page table.  The guest has a view
of R and C that is maintained in the guest_rpte field of the revmap
entry for the HPTE, and the host has a view that is maintained in the
rmap entry for the associated gfn.

Both view are updated from the guest HPT.  If a bit (R or C) is zero
in either view, it will be initially set to zero in the HPTE (or HPTEs),
until set to 1 by hardware.  When an HPTE is removed for any reason,
the R and C bits from the HPTE are ORed into both views.  We have to
be careful to read the R and C bits from the HPTE after invalidating
it, but before unlocking it, in case of any late updates by the hardware.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
  • Loading branch information
Paul Mackerras authored and Avi Kivity committed Mar 5, 2012
1 parent a92bce9 commit bad3b50
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 39 deletions.
5 changes: 3 additions & 2 deletions arch/powerpc/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,9 @@ struct revmap_entry {
* index in the guest HPT of a HPTE that points to the page.
*/
#define KVMPPC_RMAP_LOCK_BIT 63
#define KVMPPC_RMAP_REF_BIT 33
#define KVMPPC_RMAP_REFERENCED (1ul << KVMPPC_RMAP_REF_BIT)
#define KVMPPC_RMAP_RC_SHIFT 32
#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_PRESENT 0x100000000ul
#define KVMPPC_RMAP_INDEX 0xfffffffful

Expand Down
48 changes: 30 additions & 18 deletions arch/powerpc/kvm/book3s_64_mmu_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned long is_io;
unsigned int writing, write_ok;
struct vm_area_struct *vma;
unsigned long rcbits;

/*
* Real-mode code has already searched the HPT and found the
Expand Down Expand Up @@ -640,11 +641,17 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
goto out_unlock;
}

/* Only set R/C in real HPTE if set in both *rmap and guest_rpte */
rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
r &= rcbits | ~(HPTE_R_R | HPTE_R_C);

if (hptep[0] & HPTE_V_VALID) {
/* HPTE was previously valid, so we need to invalidate it */
unlock_rmap(rmap);
hptep[0] |= HPTE_V_ABSENT;
kvmppc_invalidate_hpte(kvm, hptep, index);
/* don't lose previous R and C bits */
r |= hptep[1] & (HPTE_R_R | HPTE_R_C);
} else {
kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
}
Expand Down Expand Up @@ -701,50 +708,55 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
struct revmap_entry *rev = kvm->arch.revmap;
unsigned long h, i, j;
unsigned long *hptep;
unsigned long ptel, psize;
unsigned long ptel, psize, rcbits;

for (;;) {
while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp))
cpu_relax();
lock_rmap(rmapp);
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
__clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp);
unlock_rmap(rmapp);
break;
}

/*
* To avoid an ABBA deadlock with the HPTE lock bit,
* we have to unlock the rmap chain before locking the HPTE.
* Thus we remove the first entry, unlock the rmap chain,
* lock the HPTE and then check that it is for the
* page we're unmapping before changing it to non-present.
* we can't spin on the HPTE lock while holding the
* rmap chain lock.
*/
i = *rmapp & KVMPPC_RMAP_INDEX;
hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
/* unlock rmap before spinning on the HPTE lock */
unlock_rmap(rmapp);
while (hptep[0] & HPTE_V_HVLOCK)
cpu_relax();
continue;
}
j = rev[i].forw;
if (j == i) {
/* chain is now empty */
j = 0;
*rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
} else {
/* remove i from chain */
h = rev[i].back;
rev[h].forw = j;
rev[j].back = h;
rev[i].forw = rev[i].back = i;
j |= KVMPPC_RMAP_PRESENT;
*rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
}
smp_wmb();
*rmapp = j | (1ul << KVMPPC_RMAP_REF_BIT);

/* Now lock, check and modify the HPTE */
hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
cpu_relax();
/* Now check and modify the HPTE */
ptel = rev[i].guest_rpte;
psize = hpte_page_size(hptep[0], ptel);
if ((hptep[0] & HPTE_V_VALID) &&
hpte_rpn(ptel, psize) == gfn) {
kvmppc_invalidate_hpte(kvm, hptep, i);
hptep[0] |= HPTE_V_ABSENT;
kvmppc_invalidate_hpte(kvm, hptep, i);
/* Harvest R and C */
rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
rev[i].guest_rpte = ptel | rcbits;
}
unlock_rmap(rmapp);
hptep[0] &= ~HPTE_V_HVLOCK;
}
return 0;
Expand All @@ -767,7 +779,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
kvm_unmap_rmapp(kvm, rmapp, gfn);
while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp))
cpu_relax();
__clear_bit(KVMPPC_RMAP_REF_BIT, rmapp);
*rmapp &= ~KVMPPC_RMAP_REFERENCED;
__clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp);
return 1;
}
Expand Down
45 changes: 26 additions & 19 deletions arch/powerpc/kvm/book3s_hv_rm_mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,17 @@ EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);

/* Remove this HPTE from the chain for a real page */
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
unsigned long hpte_v)
struct revmap_entry *rev,
unsigned long hpte_v, unsigned long hpte_r)
{
struct revmap_entry *rev, *next, *prev;
struct revmap_entry *next, *prev;
unsigned long gfn, ptel, head;
struct kvm_memory_slot *memslot;
unsigned long *rmap;
unsigned long rcbits;

rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
ptel = rev->guest_rpte;
rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
ptel = rev->guest_rpte |= rcbits;
gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
memslot = builtin_gfn_to_memslot(kvm, gfn);
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
Expand All @@ -116,6 +118,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
else
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
}
*rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
unlock_rmap(rmap);
}

Expand Down Expand Up @@ -162,6 +165,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
pte_t pte;
unsigned int writing;
unsigned long mmu_seq;
unsigned long rcbits;
bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;

psize = hpte_page_size(pteh, ptel);
Expand Down Expand Up @@ -320,6 +324,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
} else {
kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
realmode);
/* Only set R/C in real HPTE if already set in *rmap */
rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
}
}

Expand Down Expand Up @@ -394,7 +401,8 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
asm volatile("tlbiel %0" : : "r" (rb));
asm volatile("ptesync" : : : "memory");
}
remove_revmap_chain(kvm, pte_index, v);
/* Read PTE low word after tlbie to get final R/C values */
remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
}
r = rev->guest_rpte;
unlock_hpte(hpte, 0);
Expand Down Expand Up @@ -469,12 +477,13 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)

args[j] = ((0x80 | flags) << 56) + pte_index;
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
/* insert R and C bits from guest PTE */
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
args[j] |= rcbits << (56 - 5);

if (!(hp[0] & HPTE_V_VALID))
if (!(hp[0] & HPTE_V_VALID)) {
/* insert R and C bits from PTE */
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
args[j] |= rcbits << (56 - 5);
continue;
}

hp[0] &= ~HPTE_V_VALID; /* leave it locked */
tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
Expand Down Expand Up @@ -505,13 +514,16 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
asm volatile("ptesync" : : : "memory");
}

/* Read PTE low words after tlbie to get final R/C values */
for (k = 0; k < n; ++k) {
j = indexes[k];
pte_index = args[j] & ((1ul << 56) - 1);
hp = hptes[k];
rev = revs[k];
remove_revmap_chain(kvm, pte_index, hp[0]);
unlock_hpte(hp, 0);
remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]);
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
args[j] |= rcbits << (56 - 5);
hp[0] = 0;
}
}

Expand Down Expand Up @@ -595,8 +607,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
pte_index &= ~3;
n = 4;
}
if (flags & H_R_XLATE)
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
for (i = 0; i < n; ++i, ++pte_index) {
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
v = hpte[0] & ~HPTE_V_HVLOCK;
Expand All @@ -605,12 +616,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
v &= ~HPTE_V_ABSENT;
v |= HPTE_V_VALID;
}
if (v & HPTE_V_VALID) {
if (rev)
r = rev[i].guest_rpte;
else
r = hpte[1] | HPTE_R_RPN;
}
if (v & HPTE_V_VALID)
r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
vcpu->arch.gpr[4 + i * 2] = v;
vcpu->arch.gpr[5 + i * 2] = r;
}
Expand Down

0 comments on commit bad3b50

Please sign in to comment.