Skip to content

Commit

Permalink
x86/calldepth: Add ret/call counting for debug
Browse files Browse the repository at this point in the history
Add a debuigfs mechanism to validate the accounting, e.g. vs. call/ret
balance and to gather statistics about the stuffing to call ratio.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111148.204285506@infradead.org
  • Loading branch information
Thomas Gleixner authored and Peter Zijlstra committed Oct 17, 2022
1 parent bbaceb1 commit f5c1bb2
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 5 deletions.
36 changes: 32 additions & 4 deletions arch/x86/include/asm/nospec-branch.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,22 @@
#define RET_DEPTH_INIT_FROM_CALL 0xfc00000000000000ULL
#define RET_DEPTH_CREDIT 0xffffffffffffffffULL

#ifdef CONFIG_CALL_THUNKS_DEBUG
# define CALL_THUNKS_DEBUG_INC_CALLS \
incq %gs:__x86_call_count;
# define CALL_THUNKS_DEBUG_INC_RETS \
incq %gs:__x86_ret_count;
# define CALL_THUNKS_DEBUG_INC_STUFFS \
incq %gs:__x86_stuffs_count;
# define CALL_THUNKS_DEBUG_INC_CTXSW \
incq %gs:__x86_ctxsw_count;
#else
# define CALL_THUNKS_DEBUG_INC_CALLS
# define CALL_THUNKS_DEBUG_INC_RETS
# define CALL_THUNKS_DEBUG_INC_STUFFS
# define CALL_THUNKS_DEBUG_INC_CTXSW
#endif

#if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)

#include <asm/asm-offsets.h>
Expand All @@ -75,18 +91,23 @@
#define RESET_CALL_DEPTH_FROM_CALL \
mov $0xfc, %rax; \
shl $56, %rax; \
movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \
CALL_THUNKS_DEBUG_INC_CALLS

#define INCREMENT_CALL_DEPTH \
sarq $5, %gs:pcpu_hot + X86_call_depth;
sarq $5, %gs:pcpu_hot + X86_call_depth; \
CALL_THUNKS_DEBUG_INC_CALLS

#define ASM_INCREMENT_CALL_DEPTH \
sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth);
sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \
CALL_THUNKS_DEBUG_INC_CALLS

#else
#define CREDIT_CALL_DEPTH
#define ASM_CREDIT_CALL_DEPTH
#define RESET_CALL_DEPTH
#define INCREMENT_CALL_DEPTH
#define ASM_INCREMENT_CALL_DEPTH
#define RESET_CALL_DEPTH_FROM_CALL
#endif

Expand Down Expand Up @@ -137,7 +158,8 @@
jnz 771b; \
/* barrier for jnz misprediction */ \
lfence; \
ASM_CREDIT_CALL_DEPTH
ASM_CREDIT_CALL_DEPTH \
CALL_THUNKS_DEBUG_INC_CTXSW
#else
/*
* i386 doesn't unconditionally have LFENCE, as such it can't
Expand Down Expand Up @@ -321,6 +343,12 @@ static inline void x86_set_skl_return_thunk(void)
{
x86_return_thunk = &__x86_return_skl;
}
#ifdef CONFIG_CALL_THUNKS_DEBUG
DECLARE_PER_CPU(u64, __x86_call_count);
DECLARE_PER_CPU(u64, __x86_ret_count);
DECLARE_PER_CPU(u64, __x86_stuffs_count);
DECLARE_PER_CPU(u64, __x86_ctxsw_count);
#endif
#else
static inline void x86_set_skl_return_thunk(void) {}
#endif
Expand Down
53 changes: 53 additions & 0 deletions arch/x86/kernel/callthunks.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#define pr_fmt(fmt) "callthunks: " fmt

#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/memory.h>
#include <linux/moduleloader.h>
Expand Down Expand Up @@ -35,6 +36,15 @@ static int __init debug_thunks(char *str)
}
__setup("debug-callthunks", debug_thunks);

#ifdef CONFIG_CALL_THUNKS_DEBUG
DEFINE_PER_CPU(u64, __x86_call_count);
DEFINE_PER_CPU(u64, __x86_ret_count);
DEFINE_PER_CPU(u64, __x86_stuffs_count);
DEFINE_PER_CPU(u64, __x86_ctxsw_count);
EXPORT_SYMBOL_GPL(__x86_ctxsw_count);
EXPORT_SYMBOL_GPL(__x86_call_count);
#endif

extern s32 __call_sites[], __call_sites_end[];

struct thunk_desc {
Expand Down Expand Up @@ -283,3 +293,46 @@ void noinline callthunks_patch_module_calls(struct callthunk_sites *cs,
mutex_unlock(&text_mutex);
}
#endif /* CONFIG_MODULES */

#if defined(CONFIG_CALL_THUNKS_DEBUG) && defined(CONFIG_DEBUG_FS)
static int callthunks_debug_show(struct seq_file *m, void *p)
{
unsigned long cpu = (unsigned long)m->private;

seq_printf(m, "C: %16llu R: %16llu S: %16llu X: %16llu\n,",
per_cpu(__x86_call_count, cpu),
per_cpu(__x86_ret_count, cpu),
per_cpu(__x86_stuffs_count, cpu),
per_cpu(__x86_ctxsw_count, cpu));
return 0;
}

static int callthunks_debug_open(struct inode *inode, struct file *file)
{
return single_open(file, callthunks_debug_show, inode->i_private);
}

static const struct file_operations dfs_ops = {
.open = callthunks_debug_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};

static int __init callthunks_debugfs_init(void)
{
struct dentry *dir;
unsigned long cpu;

dir = debugfs_create_dir("callthunks", NULL);
for_each_possible_cpu(cpu) {
void *arg = (void *)cpu;
char name [10];

sprintf(name, "cpu%lu", cpu);
debugfs_create_file(name, 0644, dir, arg, &dfs_ops);
}
return 0;
}
__initcall(callthunks_debugfs_init);
#endif
7 changes: 6 additions & 1 deletion arch/x86/lib/retpoline.S
Original file line number Diff line number Diff line change
Expand Up @@ -203,13 +203,18 @@ EXPORT_SYMBOL(__x86_return_thunk)
.align 64
SYM_FUNC_START(__x86_return_skl)
ANNOTATE_NOENDBR
/* Keep the hotpath in a 16byte I-fetch */
/*
* Keep the hotpath in a 16byte I-fetch for the non-debug
* case.
*/
CALL_THUNKS_DEBUG_INC_RETS
shlq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth)
jz 1f
ANNOTATE_UNRET_SAFE
ret
int3
1:
CALL_THUNKS_DEBUG_INC_STUFFS
.rept 16
ANNOTATE_INTRA_FUNCTION_CALL
call 2f
Expand Down

0 comments on commit f5c1bb2

Please sign in to comment.