Skip to content

Commit

Permalink
x86/its: FineIBT-paranoid vs ITS
Browse files Browse the repository at this point in the history
FineIBT-paranoid was using the retpoline bytes for the paranoid check,
disabling retpolines, because all parts that have IBT also have eIBRS
and thus don't need no stinking retpolines.

Except... ITS needs the retpolines for indirect calls must not be in
the first half of a cacheline :-/

So what was the paranoid call sequence:

  <fineibt_paranoid_start>:
   0:   41 ba 78 56 34 12       mov    $0x12345678, %r10d
   6:   45 3b 53 f7             cmp    -0x9(%r11), %r10d
   a:   4d 8d 5b <f0>           lea    -0x10(%r11), %r11
   e:   75 fd                   jne    d <fineibt_paranoid_start+0xd>
  10:   41 ff d3                call   *%r11
  13:   90                      nop

Now becomes:

  <fineibt_paranoid_start>:
   0:   41 ba 78 56 34 12       mov    $0x12345678, %r10d
   6:   45 3b 53 f7             cmp    -0x9(%r11), %r10d
   a:   4d 8d 5b f0             lea    -0x10(%r11), %r11
   e:   2e e8 XX XX XX XX	cs call __x86_indirect_paranoid_thunk_r11

  Where the paranoid_thunk looks like:

   1d:  <ea>                    (bad)
   __x86_indirect_paranoid_thunk_r11:
   1e:  75 fd                   jne 1d
   __x86_indirect_its_thunk_r11:
   20:  41 ff eb                jmp *%r11
   23:  cc                      int3

[ dhansen: remove initialization to false ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
  • Loading branch information
Peter Zijlstra authored and Dave Hansen committed May 9, 2025
1 parent 872df34 commit e52c1dc
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 20 deletions.
8 changes: 8 additions & 0 deletions arch/x86/include/asm/alternative.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <linux/stringify.h>
#include <linux/objtool.h>
#include <asm/asm.h>
#include <asm/bug.h>

#define ALT_FLAGS_SHIFT 16

Expand Down Expand Up @@ -128,10 +129,17 @@ static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
extern void its_init_mod(struct module *mod);
extern void its_fini_mod(struct module *mod);
extern void its_free_mod(struct module *mod);
extern u8 *its_static_thunk(int reg);
#else /* CONFIG_MITIGATION_ITS */
static inline void its_init_mod(struct module *mod) { }
static inline void its_fini_mod(struct module *mod) { }
static inline void its_free_mod(struct module *mod) { }
static inline u8 *its_static_thunk(int reg)
{
WARN_ONCE(1, "ITS not compiled in");

return NULL;
}
#endif

#if defined(CONFIG_MITIGATION_RETHUNK) && defined(CONFIG_OBJTOOL)
Expand Down
145 changes: 129 additions & 16 deletions arch/x86/kernel/alternative.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
#endif
};

#ifdef CONFIG_FINEIBT
static bool cfi_paranoid __ro_after_init;
#endif

#ifdef CONFIG_MITIGATION_ITS

static struct module *its_mod;
Expand All @@ -137,8 +141,25 @@ static unsigned int its_offset;
static void *its_init_thunk(void *thunk, int reg)
{
u8 *bytes = thunk;
int offset = 0;
int i = 0;

#ifdef CONFIG_FINEIBT
if (cfi_paranoid) {
/*
* When ITS uses indirect branch thunk the fineibt_paranoid
* caller sequence doesn't fit in the caller site. So put the
* remaining part of the sequence (<ea> + JNE) into the ITS
* thunk.
*/
bytes[i++] = 0xea; /* invalid instruction */
bytes[i++] = 0x75; /* JNE */
bytes[i++] = 0xfd;

offset = 1;
}
#endif

if (reg >= 8) {
bytes[i++] = 0x41; /* REX.B prefix */
reg -= 8;
Expand All @@ -147,7 +168,7 @@ static void *its_init_thunk(void *thunk, int reg)
bytes[i++] = 0xe0 + reg; /* jmp *reg */
bytes[i++] = 0xcc;

return thunk;
return thunk + offset;
}

void its_init_mod(struct module *mod)
Expand Down Expand Up @@ -217,6 +238,17 @@ static void *its_allocate_thunk(int reg)
int size = 3 + (reg / 8);
void *thunk;

#ifdef CONFIG_FINEIBT
/*
* The ITS thunk contains an indirect jump and an int3 instruction so
* its size is 3 or 4 bytes depending on the register used. If CFI
* paranoid is used then 3 extra bytes are added in the ITS thunk to
* complete the fineibt_paranoid caller sequence.
*/
if (cfi_paranoid)
size += 3;
#endif

if (!its_page || (its_offset + size - 1) >= PAGE_SIZE) {
its_page = its_alloc();
if (!its_page) {
Expand All @@ -240,6 +272,18 @@ static void *its_allocate_thunk(int reg)
return its_init_thunk(thunk, reg);
}

u8 *its_static_thunk(int reg)
{
u8 *thunk = __x86_indirect_its_thunk_array[reg];

#ifdef CONFIG_FINEIBT
/* Paranoid thunk starts 2 bytes before */
if (cfi_paranoid)
return thunk - 2;
#endif
return thunk;
}

#endif

/*
Expand Down Expand Up @@ -775,8 +819,17 @@ static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg)
/* Lower-half of the cacheline? */
return !(addr & 0x20);
}
#else /* CONFIG_MITIGATION_ITS */

#ifdef CONFIG_FINEIBT
static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg)
{
return false;
}
#endif

#endif /* CONFIG_MITIGATION_ITS */

/*
* Rewrite the compiler generated retpoline thunk calls.
*
Expand Down Expand Up @@ -893,6 +946,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
int len, ret;
u8 bytes[16];
u8 op1, op2;
u8 *dest;

ret = insn_decode_kernel(&insn, addr);
if (WARN_ON_ONCE(ret < 0))
Expand All @@ -909,6 +963,12 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)

case CALL_INSN_OPCODE:
case JMP32_INSN_OPCODE:
/* Check for cfi_paranoid + ITS */
dest = addr + insn.length + insn.immediate.value;
if (dest[-1] == 0xea && (dest[0] & 0xf0) == 0x70) {
WARN_ON_ONCE(cfi_mode != CFI_FINEIBT);
continue;
}
break;

case 0x0f: /* escape */
Expand Down Expand Up @@ -1198,8 +1258,6 @@ int cfi_get_func_arity(void *func)
static bool cfi_rand __ro_after_init = true;
static u32 cfi_seed __ro_after_init;

static bool cfi_paranoid __ro_after_init = false;

/*
* Re-hash the CFI hash with a boot-time seed while making sure the result is
* not a valid ENDBR instruction.
Expand Down Expand Up @@ -1612,6 +1670,19 @@ static int cfi_rand_callers(s32 *start, s32 *end)
return 0;
}

static int emit_paranoid_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes)
{
u8 *thunk = (void *)__x86_indirect_its_thunk_array[reg] - 2;

#ifdef CONFIG_MITIGATION_ITS
u8 *tmp = its_allocate_thunk(reg);
if (tmp)
thunk = tmp;
#endif

return __emit_trampoline(addr, insn, bytes, thunk, thunk);
}

static int cfi_rewrite_callers(s32 *start, s32 *end)
{
s32 *s;
Expand Down Expand Up @@ -1653,9 +1724,14 @@ static int cfi_rewrite_callers(s32 *start, s32 *end)
memcpy(bytes, fineibt_paranoid_start, fineibt_paranoid_size);
memcpy(bytes + fineibt_caller_hash, &hash, 4);

ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind);
if (WARN_ON_ONCE(ret != 3))
continue;
if (cpu_wants_indirect_its_thunk_at((unsigned long)addr + fineibt_paranoid_ind, 11)) {
emit_paranoid_trampoline(addr + fineibt_caller_size,
&insn, 11, bytes + fineibt_caller_size);
} else {
ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind);
if (WARN_ON_ONCE(ret != 3))
continue;
}

text_poke_early(addr, bytes, fineibt_paranoid_size);
}
Expand Down Expand Up @@ -1882,29 +1958,66 @@ static bool decode_fineibt_bhi(struct pt_regs *regs, unsigned long *target, u32
return false;
}

static bool is_paranoid_thunk(unsigned long addr)
{
u32 thunk;

__get_kernel_nofault(&thunk, (u32 *)addr, u32, Efault);
return (thunk & 0x00FFFFFF) == 0xfd75ea;

Efault:
return false;
}

/*
* regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[]
* sequence.
* sequence, or to an invalid instruction (0xea) + Jcc.d8 for cfi_paranoid + ITS
* thunk.
*/
static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, u32 *type)
{
unsigned long addr = regs->ip - fineibt_paranoid_ud;
u32 hash;

if (!cfi_paranoid || !is_cfi_trap(addr + fineibt_caller_size - LEN_UD2))
if (!cfi_paranoid)
return false;

__get_kernel_nofault(&hash, addr + fineibt_caller_hash, u32, Efault);
*target = regs->r11 + fineibt_preamble_size;
*type = regs->r10;
if (is_cfi_trap(addr + fineibt_caller_size - LEN_UD2)) {
*target = regs->r11 + fineibt_preamble_size;
*type = regs->r10;

/*
* Since the trapping instruction is the exact, but LOCK prefixed,
* Jcc.d8 that got us here, the normal fixup will work.
*/
return true;
}

/*
* Since the trapping instruction is the exact, but LOCK prefixed,
* Jcc.d8 that got us here, the normal fixup will work.
* The cfi_paranoid + ITS thunk combination results in:
*
* 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d
* 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d
* a: 4d 8d 5b f0 lea -0x10(%r11), %r11
* e: 2e e8 XX XX XX XX cs call __x86_indirect_paranoid_thunk_r11
*
* Where the paranoid_thunk looks like:
*
* 1d: <ea> (bad)
* __x86_indirect_paranoid_thunk_r11:
* 1e: 75 fd jne 1d
* __x86_indirect_its_thunk_r11:
* 20: 41 ff eb jmp *%r11
* 23: cc int3
*
*/
return true;
if (is_paranoid_thunk(regs->ip)) {
*target = regs->r11 + fineibt_preamble_size;
*type = regs->r10;

regs->ip = *target;
return true;
}

Efault:
return false;
}

Expand Down
15 changes: 12 additions & 3 deletions arch/x86/lib/retpoline.S
Original file line number Diff line number Diff line change
Expand Up @@ -371,26 +371,35 @@ SYM_FUNC_END(call_depth_return_thunk)

.macro ITS_THUNK reg

/*
* If CFI paranoid is used then the ITS thunk starts with opcodes (0xea; jne 1b)
* that complete the fineibt_paranoid caller sequence.
*/
1: .byte 0xea
SYM_INNER_LABEL(__x86_indirect_paranoid_thunk_\reg, SYM_L_GLOBAL)
UNWIND_HINT_UNDEFINED
ANNOTATE_NOENDBR
jne 1b
SYM_INNER_LABEL(__x86_indirect_its_thunk_\reg, SYM_L_GLOBAL)
UNWIND_HINT_UNDEFINED
ANNOTATE_NOENDBR
ANNOTATE_RETPOLINE_SAFE
jmp *%\reg
int3
.align 32, 0xcc /* fill to the end of the line */
.skip 32, 0xcc /* skip to the next upper half */
.skip 32 - (__x86_indirect_its_thunk_\reg - 1b), 0xcc /* skip to the next upper half */
.endm

/* ITS mitigation requires thunks be aligned to upper half of cacheline */
.align 64, 0xcc
.skip 32, 0xcc
SYM_CODE_START(__x86_indirect_its_thunk_array)
.skip 29, 0xcc

#define GEN(reg) ITS_THUNK reg
#include <asm/GEN-for-each-reg.h>
#undef GEN

.align 64, 0xcc
SYM_FUNC_ALIAS(__x86_indirect_its_thunk_array, __x86_indirect_its_thunk_rax)
SYM_CODE_END(__x86_indirect_its_thunk_array)

.align 64, 0xcc
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)

if (cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) {
OPTIMIZER_HIDE_VAR(reg);
emit_jump(&prog, &__x86_indirect_its_thunk_array[reg], ip);
emit_jump(&prog, its_static_thunk(reg), ip);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
EMIT_LFENCE();
EMIT2(0xFF, 0xE0 + reg);
Expand Down
9 changes: 9 additions & 0 deletions tools/objtool/arch/x86/decode.c
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
op2 = ins.opcode.bytes[1];
op3 = ins.opcode.bytes[2];

/*
* XXX hack, decoder is buggered and thinks 0xea is 7 bytes long.
*/
if (op1 == 0xea) {
insn->len = 1;
insn->type = INSN_BUG;
return 0;
}

if (ins.rex_prefix.nbytes) {
rex = ins.rex_prefix.bytes[0];
rex_w = X86_REX_W(rex) >> 3;
Expand Down

0 comments on commit e52c1dc

Please sign in to comment.