KVM: ppc: directly insert shadow mappings into the hardware TLB

Formerly, we used to maintain a per-vcpu shadow TLB and on every entry to the guest would load this array into the hardware TLB. This consumed 1280 bytes of memory (64 entries of 16 bytes plus a struct page pointer each), and also required some assembly to loop over the array on every entry. Instead of saving a copy in memory, we can just store shadow mappings directly into the hardware TLB, accepting that the host kernel will clobber these as part of the normal 440 TLB round robin. When we do that we need less than half the memory, and we have decreased the exit handling time for all guest exits, at the cost of increased number of TLB misses because the host overwrites some guest entries. These savings will be increased on processors with larger TLBs or which implement intelligent flush instructions like tlbivax (which will avoid the need to walk arrays in software). In addition to that and to the code simplification, we have a greater chance of leaving other host userspace mappings in the TLB, instead of forcing all subsequent tasks to re-fault all their mappings. Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com> Signed-off-by: Avi Kivity <avi@redhat.com>
git-mirror · Dec 31, 2008 · 7924bd4 · 7924bd4
1 parent c0ca609
commit 7924bd4
Show file tree

Hide file tree

Showing 8 changed files with 168 additions and 221 deletions.
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
@@ -22,19 +22,25 @@
 
 #include <linux/kvm_host.h>
 
-/* XXX Can't include mmu-44x.h because it redefines struct mm_context. */
 #define PPC44x_TLB_SIZE 64
 
+/* If the guest is expecting it, this can be as large as we like; we'd just
+ * need to find some way of advertising it. */
+#define KVM44x_GUEST_TLB_SIZE 64
+
+struct kvmppc_44x_shadow_ref {
+	struct page *page;
+	u16 gtlb_index;
+	u8 writeable;
+	u8 tid;
+};
+
 struct kvmppc_vcpu_44x {
 	/* Unmodified copy of the guest's TLB. */
-	struct kvmppc_44x_tlbe guest_tlb[PPC44x_TLB_SIZE];
-	/* TLB that's actually used when the guest is running. */
-	struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
-	/* Pages which are referenced in the shadow TLB. */
-	struct page *shadow_pages[PPC44x_TLB_SIZE];
-
-	/* Track which TLB entries we've modified in the current exit. */
-	u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
+	struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
+
+	/* References to guest pages in the hardware TLB. */
+	struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
 
 	struct kvm_vcpu vcpu;
 };

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
@@ -53,7 +53,8 @@ extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 
 extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
-                           u64 asid, u32 flags, u32 max_bytes);
+                           u64 asid, u32 flags, u32 max_bytes,
+                           unsigned int gtlb_idx);
 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
@@ -359,12 +359,6 @@ int main(void)
 #ifdef CONFIG_KVM
 	DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
 
-	DEFINE(VCPU_TO_44X, offsetof(struct kvmppc_vcpu_44x, vcpu));
-	DEFINE(VCPU44x_SHADOW_TLB,
-	       offsetof(struct kvmppc_vcpu_44x, shadow_tlb));
-	DEFINE(VCPU44x_SHADOW_MOD,
-	       offsetof(struct kvmppc_vcpu_44x, shadow_tlb_mod));
-
 	DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
 	DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
 	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));

diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
@@ -96,21 +96,14 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	int i;
-
-	/* Mark every guest entry in the shadow TLB entry modified, so that they
-	 * will all be reloaded on the next vcpu run (instead of being
-	 * demand-faulted). */
-	for (i = 0; i <= tlb_44x_hwater; i++)
-		kvmppc_tlbe_set_modified(vcpu, i);
 }
 
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	/* Don't leave guest TLB entries resident when being de-scheduled. */
-	/* XXX It would be nice to differentiate between heavyweight exit and
-	 * sched_out here, since we could avoid the TLB flush for heavyweight
-	 * exits. */
+	/* XXX Since every guest uses TS=1 TID=0/1 mappings, we can't leave any TLB
+	 * entries around when we're descheduled, so we must completely flush the
+	 * TLB of all guest mappings. On the other hand, if there is only one
+	 * guest, this flush is completely unnecessary. */
 	_tlbia();
 }
 
@@ -130,6 +123,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
+	int i;
 
 	tlbe->tid = 0;
 	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
@@ -148,6 +142,9 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 	 * CCR1[TCS]. */
 	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
 
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
+		vcpu_44x->shadow_refs[i].gtlb_index = -1;
+
 	return 0;
 }