Skip to content

Commit

Permalink
powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI
Browse files Browse the repository at this point in the history
The gcc switch -mprofile-kernel defines a new ABI for calling _mcount()
very early in the function with minimal overhead.

Although mprofile-kernel has been available since GCC 3.4, there were
bugs which were only fixed recently. Currently it is known to work in
GCC 4.9, 5 and 6.

Additionally there are two possible code sequences generated by the
flag, the first uses mflr/std/bl and the second is optimised to omit the
std. Currently only gcc 6 has the optimised sequence. This patch
supports both sequences.

Initial work started by Vojtech Pavlik, used with permission.

Key changes:
 - rework _mcount() to work for both the old and new ABIs.
 - implement new versions of ftrace_caller() and ftrace_graph_caller()
   which deal with the new ABI.
 - updates to __ftrace_make_nop() to recognise the new mcount calling
   sequence.
 - updates to __ftrace_make_call() to recognise the nop'ed sequence.
 - implement ftrace_modify_call().
 - updates to the module loader to surpress the toc save in the module
   stub when calling mcount with the new ABI.

Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
  • Loading branch information
Torsten Duwe authored and Michael Ellerman committed Mar 7, 2016
1 parent 9a7841a commit 1530866
Show file tree
Hide file tree
Showing 5 changed files with 324 additions and 20 deletions.
21 changes: 21 additions & 0 deletions arch/powerpc/include/asm/code-patching.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,25 @@ static inline unsigned long ppc_global_function_entry(void *func)
#endif
}

#ifdef CONFIG_PPC64
/*
* Some instruction encodings commonly used in dynamic ftracing
* and function live patching.
*/

/* This must match the definition of STK_GOT in <asm/ppc_asm.h> */
#if defined(_CALL_ELF) && _CALL_ELF == 2
#define R2_STACK_OFFSET 24
#else
#define R2_STACK_OFFSET 40
#endif

#define PPC_INST_LD_TOC (PPC_INST_LD | ___PPC_RT(__REG_R2) | \
___PPC_RA(__REG_R1) | R2_STACK_OFFSET)

/* usually preceded by a mflr r0 */
#define PPC_INST_STD_LR (PPC_INST_STD | ___PPC_RS(__REG_R0) | \
___PPC_RA(__REG_R1) | PPC_LR_STKOFF)
#endif /* CONFIG_PPC64 */

#endif /* _ASM_POWERPC_CODE_PATCHING_H */
5 changes: 5 additions & 0 deletions arch/powerpc/include/asm/ftrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
extern void _mcount(void);

#ifdef CONFIG_DYNAMIC_FTRACE
# define FTRACE_ADDR ((unsigned long)ftrace_caller)
# define FTRACE_REGS_ADDR FTRACE_ADDR
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
/* reloction of mcount call site is the same as the address */
Expand All @@ -58,6 +60,9 @@ struct dyn_arch_ftrace {
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* __ASSEMBLY__ */

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
#define ARCH_SUPPORTS_FTRACE_OPS 1
#endif
#endif

#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__)
Expand Down
166 changes: 165 additions & 1 deletion arch/powerpc/kernel/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -1143,8 +1143,12 @@ _GLOBAL(enter_prom)
#ifdef CONFIG_DYNAMIC_FTRACE
_GLOBAL(mcount)
_GLOBAL(_mcount)
blr
mflr r12
mtctr r12
mtlr r0
bctr

#ifndef CC_USING_MPROFILE_KERNEL
_GLOBAL_TOC(ftrace_caller)
/* Taken from output of objdump from lib64/glibc */
mflr r3
Expand All @@ -1166,6 +1170,115 @@ _GLOBAL(ftrace_graph_stub)
ld r0, 128(r1)
mtlr r0
addi r1, r1, 112

#else /* CC_USING_MPROFILE_KERNEL */
/*
*
* ftrace_caller() is the function that replaces _mcount() when ftrace is
* active.
*
* We arrive here after a function A calls function B, and we are the trace
* function for B. When we enter r1 points to A's stack frame, B has not yet
* had a chance to allocate one yet.
*
* Additionally r2 may point either to the TOC for A, or B, depending on
* whether B did a TOC setup sequence before calling us.
*
* On entry the LR points back to the _mcount() call site, and r0 holds the
* saved LR as it was on entry to B, ie. the original return address at the
* call site in A.
*
* Our job is to save the register state into a struct pt_regs (on the stack)
* and then arrange for the ftrace function to be called.
*/
_GLOBAL(ftrace_caller)
/* Save the original return address in A's stack frame */
std r0,LRSAVE(r1)

/* Create our stack frame + pt_regs */
stdu r1,-SWITCH_FRAME_SIZE(r1)

/* Save all gprs to pt_regs */
SAVE_8GPRS(0,r1)
SAVE_8GPRS(8,r1)
SAVE_8GPRS(16,r1)
SAVE_8GPRS(24,r1)

/* Load special regs for save below */
mfmsr r8
mfctr r9
mfxer r10
mfcr r11

/* Get the _mcount() call site out of LR */
mflr r7
/* Save it as pt_regs->nip & pt_regs->link */
std r7, _NIP(r1)
std r7, _LINK(r1)

/* Save callee's TOC in the ABI compliant location */
std r2, 24(r1)
ld r2,PACATOC(r13) /* get kernel TOC in r2 */

addis r3,r2,function_trace_op@toc@ha
addi r3,r3,function_trace_op@toc@l
ld r5,0(r3)

/* Calculate ip from nip-4 into r3 for call below */
subi r3, r7, MCOUNT_INSN_SIZE

/* Put the original return address in r4 as parent_ip */
mr r4, r0

/* Save special regs */
std r8, _MSR(r1)
std r9, _CTR(r1)
std r10, _XER(r1)
std r11, _CCR(r1)

/* Load &pt_regs in r6 for call below */
addi r6, r1 ,STACK_FRAME_OVERHEAD

/* ftrace_call(r3, r4, r5, r6) */
.globl ftrace_call
ftrace_call:
bl ftrace_stub
nop

/* Load ctr with the possibly modified NIP */
ld r3, _NIP(r1)
mtctr r3

/* Restore gprs */
REST_8GPRS(0,r1)
REST_8GPRS(8,r1)
REST_8GPRS(16,r1)
REST_8GPRS(24,r1)

/* Restore callee's TOC */
ld r2, 24(r1)

/* Pop our stack frame */
addi r1, r1, SWITCH_FRAME_SIZE

/* Restore original LR for return to B */
ld r0, LRSAVE(r1)
mtlr r0

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
stdu r1, -112(r1)
.globl ftrace_graph_call
ftrace_graph_call:
b ftrace_graph_stub
_GLOBAL(ftrace_graph_stub)
addi r1, r1, 112
#endif

ld r0,LRSAVE(r1) /* restore callee's lr at _mcount site */
mtlr r0
bctr /* jump after _mcount site */
#endif /* CC_USING_MPROFILE_KERNEL */

_GLOBAL(ftrace_stub)
blr
#else
Expand Down Expand Up @@ -1198,6 +1311,7 @@ _GLOBAL(ftrace_stub)
#endif /* CONFIG_DYNAMIC_FTRACE */

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
#ifndef CC_USING_MPROFILE_KERNEL
_GLOBAL(ftrace_graph_caller)
/* load r4 with local address */
ld r4, 128(r1)
Expand All @@ -1222,6 +1336,56 @@ _GLOBAL(ftrace_graph_caller)
addi r1, r1, 112
blr

#else /* CC_USING_MPROFILE_KERNEL */
_GLOBAL(ftrace_graph_caller)
/* with -mprofile-kernel, parameter regs are still alive at _mcount */
std r10, 104(r1)
std r9, 96(r1)
std r8, 88(r1)
std r7, 80(r1)
std r6, 72(r1)
std r5, 64(r1)
std r4, 56(r1)
std r3, 48(r1)

/* Save callee's TOC in the ABI compliant location */
std r2, 24(r1)
ld r2, PACATOC(r13) /* get kernel TOC in r2 */

mfctr r4 /* ftrace_caller has moved local addr here */
std r4, 40(r1)
mflr r3 /* ftrace_caller has restored LR from stack */
subi r4, r4, MCOUNT_INSN_SIZE

bl prepare_ftrace_return
nop

/*
* prepare_ftrace_return gives us the address we divert to.
* Change the LR to this.
*/
mtlr r3

ld r0, 40(r1)
mtctr r0
ld r10, 104(r1)
ld r9, 96(r1)
ld r8, 88(r1)
ld r7, 80(r1)
ld r6, 72(r1)
ld r5, 64(r1)
ld r4, 56(r1)
ld r3, 48(r1)

/* Restore callee's TOC */
ld r2, 24(r1)

addi r1, r1, 112
mflr r0
std r0, LRSAVE(r1)
bctr
#endif /* CC_USING_MPROFILE_KERNEL */

_GLOBAL(return_to_handler)
/* need to save return values */
std r4, -32(r1)
Expand Down
103 changes: 87 additions & 16 deletions arch/powerpc/kernel/ftrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
return -EFAULT;

/* Make sure it is what we expect it to be */
if (replaced != old)
if (replaced != old) {
pr_err("%p: replaced (%#x) != old (%#x)",
(void *)ip, replaced, old);
return -EINVAL;
}

/* replace the text with the new text */
if (patch_instruction((unsigned int *)ip, new))
Expand Down Expand Up @@ -108,11 +111,13 @@ __ftrace_make_nop(struct module *mod,
{
unsigned long entry, ptr, tramp;
unsigned long ip = rec->ip;
unsigned int op;
unsigned int op, pop;

/* read where this goes */
if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
pr_err("Fetching opcode failed.\n");
return -EFAULT;
}

/* Make sure that that this is still a 24bit jump */
if (!is_bl_op(op)) {
Expand Down Expand Up @@ -152,10 +157,42 @@ __ftrace_make_nop(struct module *mod,
*
* Use a b +8 to jump over the load.
*/
op = 0x48000008; /* b +8 */

if (patch_instruction((unsigned int *)ip, op))
pop = PPC_INST_BRANCH | 8; /* b +8 */

/*
* Check what is in the next instruction. We can see ld r2,40(r1), but
* on first pass after boot we will see mflr r0.
*/
if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) {
pr_err("Fetching op failed.\n");
return -EFAULT;
}

if (op != PPC_INST_LD_TOC) {
unsigned int inst;

if (probe_kernel_read(&inst, (void *)(ip - 4), 4)) {
pr_err("Fetching instruction at %lx failed.\n", ip - 4);
return -EFAULT;
}

/* We expect either a mlfr r0, or a std r0, LRSAVE(r1) */
if (inst != PPC_INST_MFLR && inst != PPC_INST_STD_LR) {
pr_err("Unexpected instructions around bl _mcount\n"
"when enabling dynamic ftrace!\t"
"(%08x,bl,%08x)\n", inst, op);
return -EINVAL;
}

/* When using -mkernel_profile there is no load to jump over */
pop = PPC_INST_NOP;
}

if (patch_instruction((unsigned int *)ip, pop)) {
pr_err("Patching NOP failed.\n");
return -EPERM;
}

return 0;
}
Expand Down Expand Up @@ -281,16 +318,15 @@ int ftrace_make_nop(struct module *mod,

#ifdef CONFIG_MODULES
#ifdef CONFIG_PPC64
/*
* Examine the existing instructions for __ftrace_make_call.
* They should effectively be a NOP, and follow formal constraints,
* depending on the ABI. Return false if they don't.
*/
#ifndef CC_USING_MPROFILE_KERNEL
static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
{
unsigned int op[2];
void *ip = (void *)rec->ip;

/* read where this goes */
if (probe_kernel_read(op, ip, sizeof(op)))
return -EFAULT;

/*
* We expect to see:
*
Expand All @@ -300,8 +336,34 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
* The load offset is different depending on the ABI. For simplicity
* just mask it out when doing the compare.
*/
if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
if ((op0 != 0x48000008) || ((op1 & 0xffff0000) != 0xe8410000))
return 0;
return 1;
}
#else
static int
expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
{
/* look for patched "NOP" on ppc64 with -mprofile-kernel */
if (op0 != PPC_INST_NOP)
return 0;
return 1;
}
#endif

static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned int op[2];
void *ip = (void *)rec->ip;

/* read where this goes */
if (probe_kernel_read(op, ip, sizeof(op)))
return -EFAULT;

if (!expected_nop_sequence(ip, op[0], op[1])) {
pr_err("Unexpected call sequence at %p: %x %x\n",
ip, op[0], op[1]);
return -EINVAL;
}

Expand All @@ -324,7 +386,16 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)

return 0;
}
#else

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
return ftrace_make_call(rec, addr);
}
#endif

#else /* !CONFIG_PPC64: */
static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
Expand Down
Loading

0 comments on commit 1530866

Please sign in to comment.