Skip to content

Commit

Permalink
KVM: MMU: hypercall based pte updates and TLB flushes
Browse files Browse the repository at this point in the history
Hypercall based pte updates are faster than faults, and also allow use
of the lazy MMU mode to batch operations.

Don't report the feature if two dimensional paging is enabled.

[avi:
 - one mmu_op hypercall instead of one per op
 - allow 64-bit gpa on hypercall
 - don't pass host errors (-ENOMEM) to guest]

[akpm: warning fix on i386]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Avi Kivity <avi@qumranet.com>
  • Loading branch information
Marcelo Tosatti authored and Avi Kivity committed Apr 27, 2008
1 parent 9f81128 commit 2f333bc
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 3 deletions.
136 changes: 135 additions & 1 deletion arch/x86/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <linux/module.h>
#include <linux/swap.h>
#include <linux/hugetlb.h>
#include <linux/compiler.h>

#include <asm/page.h>
#include <asm/cmpxchg.h>
Expand All @@ -40,7 +41,7 @@
* 2. while doing 1. it walks guest-physical to host-physical
* If the hardware supports that we don't need to do shadow paging.
*/
static bool tdp_enabled = false;
bool tdp_enabled = false;

#undef MMU_DEBUG

Expand Down Expand Up @@ -167,6 +168,13 @@ static int dbg = 1;
#define ACC_USER_MASK PT_USER_MASK
#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)

struct kvm_pv_mmu_op_buffer {
void *ptr;
unsigned len;
unsigned processed;
char buf[512] __aligned(sizeof(long));
};

struct kvm_rmap_desc {
u64 *shadow_ptes[RMAP_EXT];
struct kvm_rmap_desc *more;
Expand Down Expand Up @@ -2003,6 +2011,132 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
return nr_mmu_pages;
}

static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer,
unsigned len)
{
if (len > buffer->len)
return NULL;
return buffer->ptr;
}

static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer,
unsigned len)
{
void *ret;

ret = pv_mmu_peek_buffer(buffer, len);
if (!ret)
return ret;
buffer->ptr += len;
buffer->len -= len;
buffer->processed += len;
return ret;
}

static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
gpa_t addr, gpa_t value)
{
int bytes = 8;
int r;

if (!is_long_mode(vcpu) && !is_pae(vcpu))
bytes = 4;

r = mmu_topup_memory_caches(vcpu);
if (r)
return r;

if (!__emulator_write_phys(vcpu, addr, &value, bytes))
return -EFAULT;

return 1;
}

static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
{
kvm_x86_ops->tlb_flush(vcpu);
return 1;
}

static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
{
spin_lock(&vcpu->kvm->mmu_lock);
mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
spin_unlock(&vcpu->kvm->mmu_lock);
return 1;
}

static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu,
struct kvm_pv_mmu_op_buffer *buffer)
{
struct kvm_mmu_op_header *header;

header = pv_mmu_peek_buffer(buffer, sizeof *header);
if (!header)
return 0;
switch (header->op) {
case KVM_MMU_OP_WRITE_PTE: {
struct kvm_mmu_op_write_pte *wpte;

wpte = pv_mmu_read_buffer(buffer, sizeof *wpte);
if (!wpte)
return 0;
return kvm_pv_mmu_write(vcpu, wpte->pte_phys,
wpte->pte_val);
}
case KVM_MMU_OP_FLUSH_TLB: {
struct kvm_mmu_op_flush_tlb *ftlb;

ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb);
if (!ftlb)
return 0;
return kvm_pv_mmu_flush_tlb(vcpu);
}
case KVM_MMU_OP_RELEASE_PT: {
struct kvm_mmu_op_release_pt *rpt;

rpt = pv_mmu_read_buffer(buffer, sizeof *rpt);
if (!rpt)
return 0;
return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys);
}
default: return 0;
}
}

int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
gpa_t addr, unsigned long *ret)
{
int r;
struct kvm_pv_mmu_op_buffer buffer;

down_read(&vcpu->kvm->slots_lock);
down_read(&current->mm->mmap_sem);

buffer.ptr = buffer.buf;
buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf);
buffer.processed = 0;

r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len);
if (r)
goto out;

while (buffer.len) {
r = kvm_pv_mmu_op_one(vcpu, &buffer);
if (r < 0)
goto out;
if (r == 0)
break;
}

r = 1;
out:
*ret = buffer.processed;
up_read(&current->mm->mmap_sem);
up_read(&vcpu->kvm->slots_lock);
return r;
}

#ifdef AUDIT

static const char *audit_msg;
Expand Down
18 changes: 17 additions & 1 deletion arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_NR_MEMSLOTS:
r = KVM_MEMORY_SLOTS;
break;
case KVM_CAP_PV_MMU:
r = !tdp_enabled;
break;
default:
r = 0;
break;
Expand Down Expand Up @@ -2452,9 +2455,19 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);

static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
unsigned long a1)
{
if (is_long_mode(vcpu))
return a0;
else
return a0 | ((gpa_t)a1 << 32);
}

int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
int r = 1;

kvm_x86_ops->cache_regs(vcpu);

Expand All @@ -2476,14 +2489,17 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
case KVM_HC_VAPIC_POLL_IRQ:
ret = 0;
break;
case KVM_HC_MMU_OP:
r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
break;
default:
ret = -KVM_ENOSYS;
break;
}
vcpu->arch.regs[VCPU_REGS_RAX] = ret;
kvm_x86_ops->decache_regs(vcpu);
++vcpu->stat.hypercalls;
return 0;
return r;
}
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);

Expand Down
4 changes: 4 additions & 0 deletions include/asm-x86/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);

int __emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);
int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
gpa_t addr, unsigned long *ret);

extern bool tdp_enabled;

enum emulation_result {
EMULATE_DONE, /* no further processing */
Expand Down
29 changes: 29 additions & 0 deletions include/asm-x86/kvm_para.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,39 @@
#define KVM_CPUID_FEATURES 0x40000001
#define KVM_FEATURE_CLOCKSOURCE 0
#define KVM_FEATURE_NOP_IO_DELAY 1
#define KVM_FEATURE_MMU_OP 2

#define MSR_KVM_WALL_CLOCK 0x11
#define MSR_KVM_SYSTEM_TIME 0x12

#define KVM_MAX_MMU_OP_BATCH 32

/* Operations for KVM_HC_MMU_OP */
#define KVM_MMU_OP_WRITE_PTE 1
#define KVM_MMU_OP_FLUSH_TLB 2
#define KVM_MMU_OP_RELEASE_PT 3

/* Payload for KVM_HC_MMU_OP */
struct kvm_mmu_op_header {
__u32 op;
__u32 pad;
};

struct kvm_mmu_op_write_pte {
struct kvm_mmu_op_header header;
__u64 pte_phys;
__u64 pte_val;
};

struct kvm_mmu_op_flush_tlb {
struct kvm_mmu_op_header header;
};

struct kvm_mmu_op_release_pt {
struct kvm_mmu_op_header header;
__u64 pt_phys;
};

#ifdef __KERNEL__
#include <asm/processor.h>

Expand Down
1 change: 1 addition & 0 deletions include/linux/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ struct kvm_vapic_addr {
#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
#define KVM_CAP_PIT 11
#define KVM_CAP_NOP_IO_DELAY 12
#define KVM_CAP_PV_MMU 13

/*
* ioctls for VM fds
Expand Down
5 changes: 4 additions & 1 deletion include/linux/kvm_para.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@

/* Return values for hypercalls */
#define KVM_ENOSYS 1000
#define KVM_EFAULT EFAULT
#define KVM_E2BIG E2BIG

#define KVM_HC_VAPIC_POLL_IRQ 1
#define KVM_HC_VAPIC_POLL_IRQ 1
#define KVM_HC_MMU_OP 2

/*
* hypercalls use architecture specific
Expand Down

0 comments on commit 2f333bc

Please sign in to comment.