Skip to content

Commit

Permalink
x86/retpoline: Add SKL retthunk retpolines
Browse files Browse the repository at this point in the history
Ensure that retpolines do the proper call accounting so that the return
accounting works correctly.

Specifically; retpolines are used to replace both 'jmp *%reg' and
'call *%reg', however these two cases do not have the same accounting
requirements. Therefore split things up and provide two different
retpoline arrays for SKL.

The 'jmp *%reg' case needs no accounting, the
__x86_indirect_jump_thunk_array[] covers this. The retpoline is
changed to not use the return thunk; it's a simple call;ret construct.

[ strictly speaking it should do:
	andq $(~0x1f), PER_CPU_VAR(__x86_call_depth)
  but we can argue this can be covered by the fuzz we already have
  in the accounting depth (12) vs the RSB depth (16) ]

The 'call *%reg' case does need accounting, the
__x86_indirect_call_thunk_array[] covers this. Again, this retpoline
avoids the use of the return-thunk, in this case to avoid double
accounting.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111147.996634749@infradead.org
  • Loading branch information
Peter Zijlstra committed Oct 17, 2022
1 parent 5d82138 commit 3b6c174
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 12 deletions.
12 changes: 12 additions & 0 deletions arch/x86/include/asm/nospec-branch.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,8 @@

typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
extern retpoline_thunk_t __x86_indirect_thunk_array[];
extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];

extern void __x86_return_thunk(void);
extern void zen_untrain_ret(void);
Expand Down Expand Up @@ -330,6 +332,16 @@ static inline void x86_set_skl_return_thunk(void) {}
#include <asm/GEN-for-each-reg.h>
#undef GEN

#define GEN(reg) \
extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN

#define GEN(reg) \
extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN

#ifdef CONFIG_X86_64

/*
Expand Down
59 changes: 56 additions & 3 deletions arch/x86/kernel/alternative.c
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,56 @@ static int emit_indirect(int op, int reg, u8 *bytes)
return i;
}

static inline bool is_jcc32(struct insn *insn)
{
/* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80;
}

static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes)
{
u8 op = insn->opcode.bytes[0];
int i = 0;

/*
* Clang does 'weird' Jcc __x86_indirect_thunk_r11 conditional
* tail-calls. Deal with them.
*/
if (is_jcc32(insn)) {
bytes[i++] = op;
op = insn->opcode.bytes[1];
goto clang_jcc;
}

if (insn->length == 6)
bytes[i++] = 0x2e; /* CS-prefix */

switch (op) {
case CALL_INSN_OPCODE:
__text_gen_insn(bytes+i, op, addr+i,
__x86_indirect_call_thunk_array[reg],
CALL_INSN_SIZE);
i += CALL_INSN_SIZE;
break;

case JMP32_INSN_OPCODE:
clang_jcc:
__text_gen_insn(bytes+i, op, addr+i,
__x86_indirect_jump_thunk_array[reg],
JMP32_INSN_SIZE);
i += JMP32_INSN_SIZE;
break;

default:
WARN("%pS %px %*ph\n", addr, addr, 6, addr);
return -1;
}

WARN_ON_ONCE(i != insn->length);

return i;
}

/*
* Rewrite the compiler generated retpoline thunk calls.
*
Expand Down Expand Up @@ -409,8 +459,12 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
BUG_ON(reg == 4);

if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
!cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE))
!cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
return emit_call_track_retpoline(addr, insn, reg, bytes);

return -1;
}

op = insn->opcode.bytes[0];

Expand All @@ -427,8 +481,7 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
* [ NOP ]
* 1:
*/
/* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) {
if (is_jcc32(insn)) {
cc = insn->opcode.bytes[1] & 0xf;
cc ^= 1; /* invert condition */

Expand Down
71 changes: 63 additions & 8 deletions arch/x86/lib/retpoline.S
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,18 @@

.section .text.__x86.indirect_thunk

.macro RETPOLINE reg

.macro POLINE reg
ANNOTATE_INTRA_FUNCTION_CALL
call .Ldo_rop_\@
.Lspec_trap_\@:
UNWIND_HINT_EMPTY
pause
lfence
jmp .Lspec_trap_\@
int3
.Ldo_rop_\@:
mov %\reg, (%_ASM_SP)
UNWIND_HINT_FUNC
.endm

.macro RETPOLINE reg
POLINE \reg
RET
.endm

Expand Down Expand Up @@ -54,7 +55,6 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
*/

#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)

.align RETPOLINE_THUNK_SIZE
SYM_CODE_START(__x86_indirect_thunk_array)
Expand All @@ -66,10 +66,65 @@ SYM_CODE_START(__x86_indirect_thunk_array)
.align RETPOLINE_THUNK_SIZE
SYM_CODE_END(__x86_indirect_thunk_array)

#define GEN(reg) EXPORT_THUNK(reg)
#define GEN(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN

#ifdef CONFIG_CALL_DEPTH_TRACKING
.macro CALL_THUNK reg
.align RETPOLINE_THUNK_SIZE

SYM_INNER_LABEL(__x86_indirect_call_thunk_\reg, SYM_L_GLOBAL)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR

CALL_DEPTH_ACCOUNT
POLINE \reg
ANNOTATE_UNRET_SAFE
ret
int3
.endm

.align RETPOLINE_THUNK_SIZE
SYM_CODE_START(__x86_indirect_call_thunk_array)

#define GEN(reg) CALL_THUNK reg
#include <asm/GEN-for-each-reg.h>
#undef GEN

.align RETPOLINE_THUNK_SIZE
SYM_CODE_END(__x86_indirect_call_thunk_array)

#define GEN(reg) __EXPORT_THUNK(__x86_indirect_call_thunk_ ## reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN

.macro JUMP_THUNK reg
.align RETPOLINE_THUNK_SIZE

SYM_INNER_LABEL(__x86_indirect_jump_thunk_\reg, SYM_L_GLOBAL)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR
POLINE \reg
ANNOTATE_UNRET_SAFE
ret
int3
.endm

.align RETPOLINE_THUNK_SIZE
SYM_CODE_START(__x86_indirect_jump_thunk_array)

#define GEN(reg) JUMP_THUNK reg
#include <asm/GEN-for-each-reg.h>
#undef GEN

.align RETPOLINE_THUNK_SIZE
SYM_CODE_END(__x86_indirect_jump_thunk_array)

#define GEN(reg) __EXPORT_THUNK(__x86_indirect_jump_thunk_ ## reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN
#endif
/*
* This function name is magical and is used by -mfunction-return=thunk-extern
* for the compiler to generate JMPs to it.
Expand Down
5 changes: 4 additions & 1 deletion arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,10 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
EMIT2(0xFF, 0xE0 + reg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
OPTIMIZER_HIDE_VAR(reg);
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg], ip);
else
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
} else {
EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */
if (IS_ENABLED(CONFIG_RETPOLINE) || IS_ENABLED(CONFIG_SLS))
Expand Down

0 comments on commit 3b6c174

Please sign in to comment.