Skip to content

Commit

Permalink
KVM: Refactor hypercall infrastructure (v3)
Browse files Browse the repository at this point in the history
This patch refactors the current hypercall infrastructure to better
support live migration and SMP.  It eliminates the hypercall page by
trapping the UD exception that would occur if you used the wrong hypercall
instruction for the underlying architecture and replacing it with the right
one lazily.

A fall-out of this patch is that the unhandled hypercalls no longer trap to
userspace.  There is very little reason though to use a hypercall to
communicate with userspace as PIO or MMIO can be used.  There is no code
in tree that uses userspace hypercalls.

[avi: fix #ud injection on vmx]

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
  • Loading branch information
Anthony Liguori authored and Avi Kivity committed Jan 30, 2008
1 parent aca7f96 commit 7aa81cc
Show file tree
Hide file tree
Showing 6 changed files with 199 additions and 183 deletions.
8 changes: 4 additions & 4 deletions drivers/kvm/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#define KVM_MAX_CPUID_ENTRIES 40

#define DE_VECTOR 0
#define UD_VECTOR 6
#define NM_VECTOR 7
#define DF_VECTOR 8
#define TS_VECTOR 10
Expand Down Expand Up @@ -317,9 +318,6 @@ struct kvm_vcpu {
unsigned long cr0;
unsigned long cr2;
unsigned long cr3;
gpa_t para_state_gpa;
struct page *para_state_page;
gpa_t hypercall_gpa;
unsigned long cr4;
unsigned long cr8;
u64 pdptrs[4]; /* pae */
Expand Down Expand Up @@ -622,7 +620,9 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);

int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);

int kvm_fix_hypercall(struct kvm_vcpu *vcpu);

static inline void kvm_guest_enter(void)
{
Expand Down
156 changes: 46 additions & 110 deletions drivers/kvm/kvm_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include <linux/smp.h>
#include <linux/anon_inodes.h>
#include <linux/profile.h>
#include <linux/kvm_para.h>

#include <asm/processor.h>
#include <asm/msr.h>
Expand Down Expand Up @@ -1362,51 +1363,61 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);

int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
unsigned long nr, a0, a1, a2, a3, ret;

kvm_x86_ops->cache_regs(vcpu);
ret = -KVM_EINVAL;
#ifdef CONFIG_X86_64
if (is_long_mode(vcpu)) {
nr = vcpu->regs[VCPU_REGS_RAX];
a0 = vcpu->regs[VCPU_REGS_RDI];
a1 = vcpu->regs[VCPU_REGS_RSI];
a2 = vcpu->regs[VCPU_REGS_RDX];
a3 = vcpu->regs[VCPU_REGS_RCX];
a4 = vcpu->regs[VCPU_REGS_R8];
a5 = vcpu->regs[VCPU_REGS_R9];
} else
#endif
{
nr = vcpu->regs[VCPU_REGS_RBX] & -1u;
a0 = vcpu->regs[VCPU_REGS_RAX] & -1u;
a1 = vcpu->regs[VCPU_REGS_RCX] & -1u;
a2 = vcpu->regs[VCPU_REGS_RDX] & -1u;
a3 = vcpu->regs[VCPU_REGS_RSI] & -1u;
a4 = vcpu->regs[VCPU_REGS_RDI] & -1u;
a5 = vcpu->regs[VCPU_REGS_RBP] & -1u;

nr = vcpu->regs[VCPU_REGS_RAX];
a0 = vcpu->regs[VCPU_REGS_RBX];
a1 = vcpu->regs[VCPU_REGS_RCX];
a2 = vcpu->regs[VCPU_REGS_RDX];
a3 = vcpu->regs[VCPU_REGS_RSI];

if (!is_long_mode(vcpu)) {
nr &= 0xFFFFFFFF;
a0 &= 0xFFFFFFFF;
a1 &= 0xFFFFFFFF;
a2 &= 0xFFFFFFFF;
a3 &= 0xFFFFFFFF;
}

switch (nr) {
default:
run->hypercall.nr = nr;
run->hypercall.args[0] = a0;
run->hypercall.args[1] = a1;
run->hypercall.args[2] = a2;
run->hypercall.args[3] = a3;
run->hypercall.args[4] = a4;
run->hypercall.args[5] = a5;
run->hypercall.ret = ret;
run->hypercall.longmode = is_long_mode(vcpu);
kvm_x86_ops->decache_regs(vcpu);
return 0;
ret = -KVM_ENOSYS;
break;
}
vcpu->regs[VCPU_REGS_RAX] = ret;
kvm_x86_ops->decache_regs(vcpu);
return 1;
return 0;
}
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);

int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
{
char instruction[3];
int ret = 0;

mutex_lock(&vcpu->kvm->lock);

/*
* Blow out the MMU to ensure that no other VCPU has an active mapping
* to ensure that the updated hypercall appears atomically across all
* VCPUs.
*/
kvm_mmu_zap_all(vcpu->kvm);

kvm_x86_ops->cache_regs(vcpu);
kvm_x86_ops->patch_hypercall(vcpu, instruction);
if (emulator_write_emulated(vcpu->rip, instruction, 3, vcpu)
!= X86EMUL_CONTINUE)
ret = -EFAULT;

mutex_unlock(&vcpu->kvm->lock);

return ret;
}
EXPORT_SYMBOL_GPL(kvm_hypercall);

static u64 mk_cr_64(u64 curr_cr, u32 new_val)
{
Expand Down Expand Up @@ -1474,75 +1485,6 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
}
}

/*
* Register the para guest with the host:
*/
static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
{
struct kvm_vcpu_para_state *para_state;
hpa_t para_state_hpa, hypercall_hpa;
struct page *para_state_page;
unsigned char *hypercall;
gpa_t hypercall_gpa;

printk(KERN_DEBUG "kvm: guest trying to enter paravirtual mode\n");
printk(KERN_DEBUG ".... para_state_gpa: %08Lx\n", para_state_gpa);

/*
* Needs to be page aligned:
*/
if (para_state_gpa != PAGE_ALIGN(para_state_gpa))
goto err_gp;

para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);
printk(KERN_DEBUG ".... para_state_hpa: %08Lx\n", para_state_hpa);
if (is_error_hpa(para_state_hpa))
goto err_gp;

mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT);
para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);
para_state = kmap(para_state_page);

printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version);
printk(KERN_DEBUG ".... size: %d\n", para_state->size);

para_state->host_version = KVM_PARA_API_VERSION;
/*
* We cannot support guests that try to register themselves
* with a newer API version than the host supports:
*/
if (para_state->guest_version > KVM_PARA_API_VERSION) {
para_state->ret = -KVM_EINVAL;
goto err_kunmap_skip;
}

hypercall_gpa = para_state->hypercall_gpa;
hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa);
printk(KERN_DEBUG ".... hypercall_hpa: %08Lx\n", hypercall_hpa);
if (is_error_hpa(hypercall_hpa)) {
para_state->ret = -KVM_EINVAL;
goto err_kunmap_skip;
}

printk(KERN_DEBUG "kvm: para guest successfully registered.\n");
vcpu->para_state_page = para_state_page;
vcpu->para_state_gpa = para_state_gpa;
vcpu->hypercall_gpa = hypercall_gpa;

mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT);
hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT),
KM_USER1) + (hypercall_hpa & ~PAGE_MASK);
kvm_x86_ops->patch_hypercall(vcpu, hypercall);
kunmap_atomic(hypercall, KM_USER1);

para_state->ret = 0;
err_kunmap_skip:
kunmap(para_state_page);
return 0;
err_gp:
return 1;
}

int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 data;
Expand Down Expand Up @@ -1656,12 +1598,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
case MSR_IA32_MISC_ENABLE:
vcpu->ia32_misc_enable_msr = data;
break;
/*
* This is the 'probe whether the host is KVM' logic:
*/
case MSR_KVM_API_MAGIC:
return vcpu_register_para(vcpu, data);

default:
pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
return 1;
Expand Down
19 changes: 16 additions & 3 deletions drivers/kvm/svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,8 @@ static void init_vmcb(struct vmcb *vmcb)
INTERCEPT_DR5_MASK |
INTERCEPT_DR7_MASK;

control->intercept_exceptions = 1 << PF_VECTOR;
control->intercept_exceptions = (1 << PF_VECTOR) |
(1 << UD_VECTOR);


control->intercept = (1ULL << INTERCEPT_INTR) |
Expand Down Expand Up @@ -979,6 +980,17 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 0;
}

static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
int er;

er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0);
if (er != EMULATE_DONE)
inject_ud(&svm->vcpu);

return 1;
}

static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
Expand Down Expand Up @@ -1045,7 +1057,8 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
svm->next_rip = svm->vmcb->save.rip + 3;
skip_emulated_instruction(&svm->vcpu);
return kvm_hypercall(&svm->vcpu, kvm_run);
kvm_emulate_hypercall(&svm->vcpu);
return 1;
}

static int invalid_op_interception(struct vcpu_svm *svm,
Expand Down Expand Up @@ -1241,6 +1254,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_WRITE_DR3] = emulate_on_interception,
[SVM_EXIT_WRITE_DR5] = emulate_on_interception,
[SVM_EXIT_WRITE_DR7] = emulate_on_interception,
[SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
[SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
[SVM_EXIT_INTR] = nop_on_interception,
Expand Down Expand Up @@ -1675,7 +1689,6 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
hypercall[0] = 0x0f;
hypercall[1] = 0x01;
hypercall[2] = 0xd9;
hypercall[3] = 0xc3;
}

static void svm_check_processor_compat(void *rtn)
Expand Down
29 changes: 26 additions & 3 deletions drivers/kvm/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,13 @@ static inline int is_no_device(u32 intr_info)
(INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
}

static inline int is_invalid_opcode(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
(INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
}

static inline int is_external_interrupt(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
Expand Down Expand Up @@ -315,7 +322,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
{
u32 eb;

eb = 1u << PF_VECTOR;
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
if (!vcpu->fpu_active)
eb |= 1u << NM_VECTOR;
if (vcpu->guest_debug.enabled)
Expand Down Expand Up @@ -560,6 +567,14 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
INTR_INFO_VALID_MASK);
}

static void vmx_inject_ud(struct kvm_vcpu *vcpu)
{
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
UD_VECTOR |
INTR_TYPE_EXCEPTION |
INTR_INFO_VALID_MASK);
}

/*
* Swap MSR entry in host/guest MSR entry array.
*/
Expand Down Expand Up @@ -1771,6 +1786,14 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}

if (is_invalid_opcode(intr_info)) {
er = emulate_instruction(vcpu, kvm_run, 0, 0);
if (er != EMULATE_DONE)
vmx_inject_ud(vcpu);

return 1;
}

error_code = 0;
rip = vmcs_readl(GUEST_RIP);
if (intr_info & INTR_INFO_DELIEVER_CODE_MASK)
Expand Down Expand Up @@ -1873,7 +1896,6 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
hypercall[0] = 0x0f;
hypercall[1] = 0x01;
hypercall[2] = 0xc1;
hypercall[3] = 0xc3;
}

static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
Expand Down Expand Up @@ -2059,7 +2081,8 @@ static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
skip_emulated_instruction(vcpu);
return kvm_hypercall(vcpu, kvm_run);
kvm_emulate_hypercall(vcpu);
return 1;
}

/*
Expand Down
11 changes: 9 additions & 2 deletions drivers/kvm/x86_emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1384,7 +1384,11 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
if (modrm_mod != 3 || modrm_rm != 1)
goto cannot_emulate;

/* nop */
rc = kvm_fix_hypercall(ctxt->vcpu);
if (rc)
goto done;

kvm_emulate_hypercall(ctxt->vcpu);
break;
case 2: /* lgdt */
rc = read_descriptor(ctxt, ops, src.ptr,
Expand All @@ -1395,7 +1399,10 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
break;
case 3: /* lidt/vmmcall */
if (modrm_mod == 3 && modrm_rm == 1) {
/* nop */
rc = kvm_fix_hypercall(ctxt->vcpu);
if (rc)
goto done;
kvm_emulate_hypercall(ctxt->vcpu);
} else {
rc = read_descriptor(ctxt, ops, src.ptr,
&size, &address,
Expand Down
Loading

0 comments on commit 7aa81cc

Please sign in to comment.