Skip to content

Commit

Permalink
KVM: x86: save/load state on SMM switch
Browse files Browse the repository at this point in the history
The big ugly one.  This patch adds support for switching in and out of
system management mode, respectively upon receiving KVM_REQ_SMI and upon
executing a RSM instruction.  Both 32- and 64-bit formats are supported
for the SMM state save area.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
Paolo Bonzini committed Jun 4, 2015
1 parent cd7764f commit 660a5d5
Show file tree
Hide file tree
Showing 4 changed files with 498 additions and 2 deletions.
8 changes: 8 additions & 0 deletions arch/x86/kvm/cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
}

static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;

best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
return best && (best->edx & bit(X86_FEATURE_LM));
}

static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
Expand Down
248 changes: 247 additions & 1 deletion arch/x86/kvm/emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -2259,12 +2259,258 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
return rc;
}

static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
{
u32 eax, ebx, ecx, edx;

eax = 0x80000001;
ecx = 0;
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
return edx & bit(X86_FEATURE_LM);
}

#define GET_SMSTATE(type, smbase, offset) \
({ \
type __val; \
int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val, \
sizeof(__val), NULL); \
if (r != X86EMUL_CONTINUE) \
return X86EMUL_UNHANDLEABLE; \
__val; \
})

static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
{
desc->g = (flags >> 23) & 1;
desc->d = (flags >> 22) & 1;
desc->l = (flags >> 21) & 1;
desc->avl = (flags >> 20) & 1;
desc->p = (flags >> 15) & 1;
desc->dpl = (flags >> 13) & 3;
desc->s = (flags >> 12) & 1;
desc->type = (flags >> 8) & 15;
}

static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
{
struct desc_struct desc;
int offset;
u16 selector;

selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);

if (n < 3)
offset = 0x7f84 + n * 12;
else
offset = 0x7f2c + (n - 3) * 12;

set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
return X86EMUL_CONTINUE;
}

static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
{
struct desc_struct desc;
int offset;
u16 selector;
u32 base3;

offset = 0x7e00 + n * 16;

selector = GET_SMSTATE(u16, smbase, offset);
rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
base3 = GET_SMSTATE(u32, smbase, offset + 12);

ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
return X86EMUL_CONTINUE;
}

static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
u64 cr0, u64 cr4)
{
int bad;

/*
* First enable PAE, long mode needs it before CR0.PG = 1 is set.
* Then enable protected mode. However, PCID cannot be enabled
* if EFER.LMA=0, so set it separately.
*/
bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
if (bad)
return X86EMUL_UNHANDLEABLE;

bad = ctxt->ops->set_cr(ctxt, 0, cr0);
if (bad)
return X86EMUL_UNHANDLEABLE;

if (cr4 & X86_CR4_PCIDE) {
bad = ctxt->ops->set_cr(ctxt, 4, cr4);
if (bad)
return X86EMUL_UNHANDLEABLE;
}

return X86EMUL_CONTINUE;
}

static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
{
struct desc_struct desc;
struct desc_ptr dt;
u16 selector;
u32 val, cr0, cr4;
int i;

cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);

for (i = 0; i < 8; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);

val = GET_SMSTATE(u32, smbase, 0x7fcc);
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
val = GET_SMSTATE(u32, smbase, 0x7fc8);
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);

selector = GET_SMSTATE(u32, smbase, 0x7fc4);
set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64));
set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60));
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);

selector = GET_SMSTATE(u32, smbase, 0x7fc0);
set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80));
set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c));
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);

dt.address = GET_SMSTATE(u32, smbase, 0x7f74);
dt.size = GET_SMSTATE(u32, smbase, 0x7f70);
ctxt->ops->set_gdt(ctxt, &dt);

dt.address = GET_SMSTATE(u32, smbase, 0x7f58);
dt.size = GET_SMSTATE(u32, smbase, 0x7f54);
ctxt->ops->set_idt(ctxt, &dt);

for (i = 0; i < 6; i++) {
int r = rsm_load_seg_32(ctxt, smbase, i);
if (r != X86EMUL_CONTINUE)
return r;
}

cr4 = GET_SMSTATE(u32, smbase, 0x7f14);

ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));

return rsm_enter_protected_mode(ctxt, cr0, cr4);
}

static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
{
struct desc_struct desc;
struct desc_ptr dt;
u64 val, cr0, cr4;
u32 base3;
u16 selector;
int i;

for (i = 0; i < 16; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);

ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78);
ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;

val = GET_SMSTATE(u32, smbase, 0x7f68);
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
val = GET_SMSTATE(u32, smbase, 0x7f60);
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);

cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
val = GET_SMSTATE(u64, smbase, 0x7ed0);
ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);

selector = GET_SMSTATE(u32, smbase, 0x7e90);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8);
set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94));
set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98));
base3 = GET_SMSTATE(u32, smbase, 0x7e9c);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);

dt.size = GET_SMSTATE(u32, smbase, 0x7e84);
dt.address = GET_SMSTATE(u64, smbase, 0x7e88);
ctxt->ops->set_idt(ctxt, &dt);

selector = GET_SMSTATE(u32, smbase, 0x7e70);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8);
set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74));
set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78));
base3 = GET_SMSTATE(u32, smbase, 0x7e7c);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);

dt.size = GET_SMSTATE(u32, smbase, 0x7e64);
dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
ctxt->ops->set_gdt(ctxt, &dt);

for (i = 0; i < 6; i++) {
int r = rsm_load_seg_64(ctxt, smbase, i);
if (r != X86EMUL_CONTINUE)
return r;
}

return rsm_enter_protected_mode(ctxt, cr0, cr4);
}

static int em_rsm(struct x86_emulate_ctxt *ctxt)
{
unsigned long cr0, cr4, efer;
u64 smbase;
int ret;

if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0)
return emulate_ud(ctxt);

return X86EMUL_UNHANDLEABLE;
/*
* Get back to real mode, to prepare a safe state in which to load
* CR0/CR3/CR4/EFER. Also this will ensure that addresses passed
* to read_std/write_std are not virtual.
*
* CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
*/
cr0 = ctxt->ops->get_cr(ctxt, 0);
if (cr0 & X86_CR0_PE)
ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
cr4 = ctxt->ops->get_cr(ctxt, 4);
if (cr4 & X86_CR4_PAE)
ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
efer = 0;
ctxt->ops->set_msr(ctxt, MSR_EFER, efer);

smbase = ctxt->ops->get_smbase(ctxt);
if (emulator_has_longmode(ctxt))
ret = rsm_load_state_64(ctxt, smbase + 0x8000);
else
ret = rsm_load_state_32(ctxt, smbase + 0x8000);

if (ret != X86EMUL_CONTINUE) {
/* FIXME: should triple fault */
return X86EMUL_UNHANDLEABLE;
}

if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
ctxt->ops->set_nmi_mask(ctxt, false);

ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK;
ctxt->emul_flags &= ~X86EMUL_SMM_MASK;
return X86EMUL_CONTINUE;
}

static void
Expand Down
22 changes: 22 additions & 0 deletions arch/x86/kvm/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,28 @@ TRACE_EVENT(kvm_wait_lapic_expire,
__entry->delta < 0 ? "early" : "late")
);

TRACE_EVENT(kvm_enter_smm,
TP_PROTO(unsigned int vcpu_id, u64 smbase, bool entering),
TP_ARGS(vcpu_id, smbase, entering),

TP_STRUCT__entry(
__field( unsigned int, vcpu_id )
__field( u64, smbase )
__field( bool, entering )
),

TP_fast_assign(
__entry->vcpu_id = vcpu_id;
__entry->smbase = smbase;
__entry->entering = entering;
),

TP_printk("vcpu %u: %s SMM, smbase 0x%llx",
__entry->vcpu_id,
__entry->entering ? "entering" : "leaving",
__entry->smbase)
);

#endif /* _TRACE_KVM_H */

#undef TRACE_INCLUDE_PATH
Expand Down
Loading

0 comments on commit 660a5d5

Please sign in to comment.