Skip to content

Commit

Permalink
static_call: Add static_call_cond()
Browse files Browse the repository at this point in the history
Extend the static_call infrastructure to optimize the following common
pattern:

	if (func_ptr)
		func_ptr(args...)

For the trampoline (which is in effect a tail-call), we patch the
JMP.d32 into a RET, which then directly consumes the trampoline call.

For the in-line sites we replace the CALL with a NOP5.

NOTE: this is 'obviously' limited to functions with a 'void' return type.

NOTE: DEFINE_STATIC_COND_CALL() only requires a typename, as opposed
      to a full function.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20200818135805.042977182@infradead.org
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed Sep 1, 2020
1 parent c43a43e commit 452cddb
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 13 deletions.
12 changes: 9 additions & 3 deletions arch/x86/include/asm/static_call.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,21 @@
* it does tail-call optimization on the call; since you cannot compute the
* relative displacement across sections.
*/
#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \

#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
asm(".pushsection .static_call.text, \"ax\" \n" \
".align 4 \n" \
".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
STATIC_CALL_TRAMP_STR(name) ": \n" \
" .byte 0xe9 # jmp.d32 \n" \
" .long " #func " - (. + 4) \n" \
insns " \n" \
".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
".popsection \n")

#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")

#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop")

#endif /* _ASM_STATIC_CALL_H */
42 changes: 32 additions & 10 deletions arch/x86/kernel/static_call.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,52 @@
#include <linux/bug.h>
#include <asm/text-patching.h>

static void __static_call_transform(void *insn, u8 opcode, void *func)
enum insn_type {
CALL = 0, /* site call */
NOP = 1, /* site cond-call */
JMP = 2, /* tramp / site tail-call */
RET = 3, /* tramp / site cond-tail-call */
};

static void __static_call_transform(void *insn, enum insn_type type, void *func)
{
const void *code = text_gen_insn(opcode, insn, func);
int size = CALL_INSN_SIZE;
const void *code;

if (WARN_ONCE(*(u8 *)insn != opcode,
"unexpected static call insn opcode 0x%x at %pS\n",
opcode, insn))
return;
switch (type) {
case CALL:
code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
break;

case NOP:
code = ideal_nops[NOP_ATOMIC5];
break;

case JMP:
code = text_gen_insn(JMP32_INSN_OPCODE, insn, func);
break;

case RET:
code = text_gen_insn(RET_INSN_OPCODE, insn, func);
size = RET_INSN_SIZE;
break;
}

if (memcmp(insn, code, CALL_INSN_SIZE) == 0)
if (memcmp(insn, code, size) == 0)
return;

text_poke_bp(insn, code, CALL_INSN_SIZE, NULL);
text_poke_bp(insn, code, size, NULL);
}

void arch_static_call_transform(void *site, void *tramp, void *func)
{
mutex_lock(&text_mutex);

if (tramp)
__static_call_transform(tramp, JMP32_INSN_OPCODE, func);
__static_call_transform(tramp, func ? JMP : RET, func);

if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site)
__static_call_transform(site, CALL_INSN_OPCODE, func);
__static_call_transform(site, func ? CALL : NOP, func);

mutex_unlock(&text_mutex);
}
Expand Down
86 changes: 86 additions & 0 deletions include/linux/static_call.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
*
* DECLARE_STATIC_CALL(name, func);
* DEFINE_STATIC_CALL(name, func);
* DEFINE_STATIC_CALL_NULL(name, typename);
* static_call(name)(args...);
* static_call_cond(name)(args...);
* static_call_update(name, func);
*
* Usage example:
Expand Down Expand Up @@ -52,6 +54,43 @@
* rather than calling through the trampoline. This requires objtool or a
* compiler plugin to detect all the static_call() sites and annotate them
* in the .static_call_sites section.
*
*
* Notes on NULL function pointers:
*
* Static_call()s support NULL functions, with many of the caveats that
* regular function pointers have.
*
* Clearly calling a NULL function pointer is 'BAD', so too for
* static_call()s (although when HAVE_STATIC_CALL it might not be immediately
* fatal). A NULL static_call can be the result of:
*
* DECLARE_STATIC_CALL_NULL(my_static_call, void (*)(int));
*
* which is equivalent to declaring a NULL function pointer with just a
* typename:
*
* void (*my_func_ptr)(int arg1) = NULL;
*
* or using static_call_update() with a NULL function. In both cases the
* HAVE_STATIC_CALL implementation will patch the trampoline with a RET
* instruction, instead of an immediate tail-call JMP. HAVE_STATIC_CALL_INLINE
* architectures can patch the trampoline call to a NOP.
*
* In all cases, any argument evaluation is unconditional. Unlike a regular
* conditional function pointer call:
*
* if (my_func_ptr)
* my_func_ptr(arg1)
*
* where the argument evaludation also depends on the pointer value.
*
* When calling a static_call that can be NULL, use:
*
* static_call_cond(name)(arg1);
*
* which will include the required value tests to avoid NULL-pointer
* dereferences.
*/

#include <linux/types.h>
Expand Down Expand Up @@ -120,7 +159,16 @@ extern int static_call_text_reserved(void *start, void *end);
}; \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)

#define DEFINE_STATIC_CALL_NULL(name, _func) \
DECLARE_STATIC_CALL(name, _func); \
struct static_call_key STATIC_CALL_KEY(name) = { \
.func = NULL, \
.type = 1, \
}; \
ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)

#define static_call(name) __static_call(name)
#define static_call_cond(name) (void)__static_call(name)

#define EXPORT_STATIC_CALL(name) \
EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \
Expand All @@ -143,7 +191,15 @@ struct static_call_key {
}; \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)

#define DEFINE_STATIC_CALL_NULL(name, _func) \
DECLARE_STATIC_CALL(name, _func); \
struct static_call_key STATIC_CALL_KEY(name) = { \
.func = NULL, \
}; \
ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)

#define static_call(name) __static_call(name)
#define static_call_cond(name) (void)__static_call(name)

static inline
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
Expand Down Expand Up @@ -179,9 +235,39 @@ struct static_call_key {
.func = _func, \
}

#define DEFINE_STATIC_CALL_NULL(name, _func) \
DECLARE_STATIC_CALL(name, _func); \
struct static_call_key STATIC_CALL_KEY(name) = { \
.func = NULL, \
}

#define static_call(name) \
((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))

static inline void __static_call_nop(void) { }

/*
* This horrific hack takes care of two things:
*
* - it ensures the compiler will only load the function pointer ONCE,
* which avoids a reload race.
*
* - it ensures the argument evaluation is unconditional, similar
* to the HAVE_STATIC_CALL variant.
*
* Sadly current GCC/Clang (10 for both) do not optimize this properly
* and will emit an indirect call for the NULL case :-(
*/
#define __static_call_cond(name) \
({ \
void *func = READ_ONCE(STATIC_CALL_KEY(name).func); \
if (!func) \
func = &__static_call_nop; \
(typeof(STATIC_CALL_TRAMP(name))*)func; \
})

#define static_call_cond(name) (void)__static_call_cond(name)

static inline
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
{
Expand Down

0 comments on commit 452cddb

Please sign in to comment.