Skip to content

Commit

Permalink
Merge tag 'x86-urgent-2022-06-19' of git://git.kernel.org/pub/scm/lin…
Browse files Browse the repository at this point in the history
…ux/kernel/git/tip/tip

Pull x86 fixes from Thomas Gleixner:

 - Make RESERVE_BRK() work again with older binutils. The recent
   'simplification' broke that.

 - Make early #VE handling increment RIP when successful.

 - Make the #VE code consistent vs. the RIP adjustments and add
   comments.

 - Handle load_unaligned_zeropad() across page boundaries correctly in
   #VE when the second page is shared.

* tag 'x86-urgent-2022-06-19' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/tdx: Handle load_unaligned_zeropad() page-cross to a shared page
  x86/tdx: Clarify RIP adjustments in #VE handler
  x86/tdx: Fix early #VE handling
  x86/mm: Fix RESERVE_BRK() for older binutils
  • Loading branch information
Linus Torvalds committed Jun 19, 2022
2 parents 5d770f1 + 1e77696 commit 05c6ca8
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 75 deletions.
187 changes: 136 additions & 51 deletions arch/x86/coco/tdx/tdx.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,51 @@ static u64 get_cc_mask(void)
return BIT_ULL(gpa_width - 1);
}

/*
* The TDX module spec states that #VE may be injected for a limited set of
* reasons:
*
* - Emulation of the architectural #VE injection on EPT violation;
*
* - As a result of guest TD execution of a disallowed instruction,
* a disallowed MSR access, or CPUID virtualization;
*
* - A notification to the guest TD about anomalous behavior;
*
* The last one is opt-in and is not used by the kernel.
*
* The Intel Software Developer's Manual describes cases when instruction
* length field can be used in section "Information for VM Exits Due to
* Instruction Execution".
*
* For TDX, it ultimately means GET_VEINFO provides reliable instruction length
* information if #VE occurred due to instruction execution, but not for EPT
* violations.
*/
static int ve_instr_len(struct ve_info *ve)
{
switch (ve->exit_reason) {
case EXIT_REASON_HLT:
case EXIT_REASON_MSR_READ:
case EXIT_REASON_MSR_WRITE:
case EXIT_REASON_CPUID:
case EXIT_REASON_IO_INSTRUCTION:
/* It is safe to use ve->instr_len for #VE due instructions */
return ve->instr_len;
case EXIT_REASON_EPT_VIOLATION:
/*
* For EPT violations, ve->insn_len is not defined. For those,
* the kernel must decode instructions manually and should not
* be using this function.
*/
WARN_ONCE(1, "ve->instr_len is not defined for EPT violations");
return 0;
default:
WARN_ONCE(1, "Unexpected #VE-type: %lld\n", ve->exit_reason);
return ve->instr_len;
}
}

static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
{
struct tdx_hypercall_args args = {
Expand All @@ -147,7 +192,7 @@ static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
}

static bool handle_halt(void)
static int handle_halt(struct ve_info *ve)
{
/*
* Since non safe halt is mainly used in CPU offlining
Expand All @@ -158,9 +203,9 @@ static bool handle_halt(void)
const bool do_sti = false;

if (__halt(irq_disabled, do_sti))
return false;
return -EIO;

return true;
return ve_instr_len(ve);
}

void __cpuidle tdx_safe_halt(void)
Expand All @@ -180,7 +225,7 @@ void __cpuidle tdx_safe_halt(void)
WARN_ONCE(1, "HLT instruction emulation failed\n");
}

static bool read_msr(struct pt_regs *regs)
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
{
struct tdx_hypercall_args args = {
.r10 = TDX_HYPERCALL_STANDARD,
Expand All @@ -194,14 +239,14 @@ static bool read_msr(struct pt_regs *regs)
* (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>".
*/
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
return false;
return -EIO;

regs->ax = lower_32_bits(args.r11);
regs->dx = upper_32_bits(args.r11);
return true;
return ve_instr_len(ve);
}

static bool write_msr(struct pt_regs *regs)
static int write_msr(struct pt_regs *regs, struct ve_info *ve)
{
struct tdx_hypercall_args args = {
.r10 = TDX_HYPERCALL_STANDARD,
Expand All @@ -215,10 +260,13 @@ static bool write_msr(struct pt_regs *regs)
* can be found in TDX Guest-Host-Communication Interface
* (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>".
*/
return !__tdx_hypercall(&args, 0);
if (__tdx_hypercall(&args, 0))
return -EIO;

return ve_instr_len(ve);
}

static bool handle_cpuid(struct pt_regs *regs)
static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve)
{
struct tdx_hypercall_args args = {
.r10 = TDX_HYPERCALL_STANDARD,
Expand All @@ -236,7 +284,7 @@ static bool handle_cpuid(struct pt_regs *regs)
*/
if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) {
regs->ax = regs->bx = regs->cx = regs->dx = 0;
return true;
return ve_instr_len(ve);
}

/*
Expand All @@ -245,7 +293,7 @@ static bool handle_cpuid(struct pt_regs *regs)
* (GHCI), section titled "VP.VMCALL<Instruction.CPUID>".
*/
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
return false;
return -EIO;

/*
* As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of
Expand All @@ -257,7 +305,7 @@ static bool handle_cpuid(struct pt_regs *regs)
regs->cx = args.r14;
regs->dx = args.r15;

return true;
return ve_instr_len(ve);
}

static bool mmio_read(int size, unsigned long addr, unsigned long *val)
Expand All @@ -283,45 +331,60 @@ static bool mmio_write(int size, unsigned long addr, unsigned long val)
EPT_WRITE, addr, val);
}

static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
{
unsigned long *reg, val, vaddr;
char buffer[MAX_INSN_SIZE];
unsigned long *reg, val;
struct insn insn = {};
enum mmio_type mmio;
int size, extend_size;
u8 extend_val = 0;

/* Only in-kernel MMIO is supported */
if (WARN_ON_ONCE(user_mode(regs)))
return false;
return -EFAULT;

if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE))
return false;
return -EFAULT;

if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64))
return false;
return -EINVAL;

mmio = insn_decode_mmio(&insn, &size);
if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED))
return false;
return -EINVAL;

if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
reg = insn_get_modrm_reg_ptr(&insn, regs);
if (!reg)
return false;
return -EINVAL;
}

ve->instr_len = insn.length;
/*
* Reject EPT violation #VEs that split pages.
*
* MMIO accesses are supposed to be naturally aligned and therefore
* never cross page boundaries. Seeing split page accesses indicates
* a bug or a load_unaligned_zeropad() that stepped into an MMIO page.
*
* load_unaligned_zeropad() will recover using exception fixups.
*/
vaddr = (unsigned long)insn_get_addr_ref(&insn, regs);
if (vaddr / PAGE_SIZE != (vaddr + size - 1) / PAGE_SIZE)
return -EFAULT;

/* Handle writes first */
switch (mmio) {
case MMIO_WRITE:
memcpy(&val, reg, size);
return mmio_write(size, ve->gpa, val);
if (!mmio_write(size, ve->gpa, val))
return -EIO;
return insn.length;
case MMIO_WRITE_IMM:
val = insn.immediate.value;
return mmio_write(size, ve->gpa, val);
if (!mmio_write(size, ve->gpa, val))
return -EIO;
return insn.length;
case MMIO_READ:
case MMIO_READ_ZERO_EXTEND:
case MMIO_READ_SIGN_EXTEND:
Expand All @@ -334,15 +397,15 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
* decoded or handled properly. It was likely not using io.h
* helpers or accessed MMIO accidentally.
*/
return false;
return -EINVAL;
default:
WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?");
return false;
return -EINVAL;
}

/* Handle reads */
if (!mmio_read(size, ve->gpa, &val))
return false;
return -EIO;

switch (mmio) {
case MMIO_READ:
Expand All @@ -364,13 +427,13 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
default:
/* All other cases has to be covered with the first switch() */
WARN_ON_ONCE(1);
return false;
return -EINVAL;
}

if (extend_size)
memset(reg, extend_val, extend_size);
memcpy(reg, &val, size);
return true;
return insn.length;
}

static bool handle_in(struct pt_regs *regs, int size, int port)
Expand Down Expand Up @@ -421,23 +484,28 @@ static bool handle_out(struct pt_regs *regs, int size, int port)
*
* Return True on success or False on failure.
*/
static bool handle_io(struct pt_regs *regs, u32 exit_qual)
static int handle_io(struct pt_regs *regs, struct ve_info *ve)
{
u32 exit_qual = ve->exit_qual;
int size, port;
bool in;
bool in, ret;

if (VE_IS_IO_STRING(exit_qual))
return false;
return -EIO;

in = VE_IS_IO_IN(exit_qual);
size = VE_GET_IO_SIZE(exit_qual);
port = VE_GET_PORT_NUM(exit_qual);


if (in)
return handle_in(regs, size, port);
ret = handle_in(regs, size, port);
else
return handle_out(regs, size, port);
ret = handle_out(regs, size, port);
if (!ret)
return -EIO;

return ve_instr_len(ve);
}

/*
Expand All @@ -447,13 +515,19 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual)
__init bool tdx_early_handle_ve(struct pt_regs *regs)
{
struct ve_info ve;
int insn_len;

tdx_get_ve_info(&ve);

if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION)
return false;

return handle_io(regs, ve.exit_qual);
insn_len = handle_io(regs, &ve);
if (insn_len < 0)
return false;

regs->ip += insn_len;
return true;
}

void tdx_get_ve_info(struct ve_info *ve)
Expand Down Expand Up @@ -486,54 +560,65 @@ void tdx_get_ve_info(struct ve_info *ve)
ve->instr_info = upper_32_bits(out.r10);
}

/* Handle the user initiated #VE */
static bool virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
/*
* Handle the user initiated #VE.
*
* On success, returns the number of bytes RIP should be incremented (>=0)
* or -errno on error.
*/
static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
{
switch (ve->exit_reason) {
case EXIT_REASON_CPUID:
return handle_cpuid(regs);
return handle_cpuid(regs, ve);
default:
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
return false;
return -EIO;
}
}

/* Handle the kernel #VE */
static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
/*
* Handle the kernel #VE.
*
* On success, returns the number of bytes RIP should be incremented (>=0)
* or -errno on error.
*/
static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
{
switch (ve->exit_reason) {
case EXIT_REASON_HLT:
return handle_halt();
return handle_halt(ve);
case EXIT_REASON_MSR_READ:
return read_msr(regs);
return read_msr(regs, ve);
case EXIT_REASON_MSR_WRITE:
return write_msr(regs);
return write_msr(regs, ve);
case EXIT_REASON_CPUID:
return handle_cpuid(regs);
return handle_cpuid(regs, ve);
case EXIT_REASON_EPT_VIOLATION:
return handle_mmio(regs, ve);
case EXIT_REASON_IO_INSTRUCTION:
return handle_io(regs, ve->exit_qual);
return handle_io(regs, ve);
default:
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
return false;
return -EIO;
}
}

bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve)
{
bool ret;
int insn_len;

if (user_mode(regs))
ret = virt_exception_user(regs, ve);
insn_len = virt_exception_user(regs, ve);
else
ret = virt_exception_kernel(regs, ve);
insn_len = virt_exception_kernel(regs, ve);
if (insn_len < 0)
return false;

/* After successful #VE handling, move the IP */
if (ret)
regs->ip += ve->instr_len;
regs->ip += insn_len;

return ret;
return true;
}

static bool tdx_tlb_flush_required(bool private)
Expand Down
Loading

0 comments on commit 05c6ca8

Please sign in to comment.