Skip to content

Commit

Permalink
ARM: kprobes: Optimise emulation of LDM and STM
Browse files Browse the repository at this point in the history
This patch improves the performance of LDM and STM instruction
emulation. This is desirable because.

- jprobes and kretprobes probe the first instruction in a function and,
  when the frame pointer is omitted, this instruction is often a STM
  used to push registers onto the stack.

- The STM and LDM instructions are common in the body and tail of
  functions.

- At the same time as being a common instruction form, they also have
  one of the slowest and most complicated simulation routines.

The approach taken to optimisation is to use emulation rather than
simulation, that is, a modified form of the instruction is run with
an appropriate register context.

Benchmarking on an OMAP3530 shows the optimised emulation is between 2
and 3 times faster than the simulation routines. On a Kirkwood based
device the relative performance was very significantly better than this.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Acked-by: Nicolas Pitre <nicolas.pitre@linaro.org>
  • Loading branch information
Jon Medhurst authored and Tixy committed Jul 13, 2011
1 parent 235a4ce commit 3d4a997
Showing 1 changed file with 68 additions and 0 deletions.
68 changes: 68 additions & 0 deletions arch/arm/kernel/kprobes-common.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,13 +220,81 @@ static void __kprobes simulate_ldm1_pc(struct kprobe *p, struct pt_regs *regs)
load_write_pc(regs->ARM_pc, regs);
}

static void __kprobes
emulate_generic_r0_12_noflags(struct kprobe *p, struct pt_regs *regs)
{
register void *rregs asm("r1") = regs;
register void *rfn asm("lr") = p->ainsn.insn_fn;

__asm__ __volatile__ (
"stmdb sp!, {%[regs], r11} \n\t"
"ldmia %[regs], {r0-r12} \n\t"
#if __LINUX_ARM_ARCH__ >= 6
"blx %[fn] \n\t"
#else
"str %[fn], [sp, #-4]! \n\t"
"adr lr, 1f \n\t"
"ldr pc, [sp], #4 \n\t"
"1: \n\t"
#endif
"ldr lr, [sp], #4 \n\t" /* lr = regs */
"stmia lr, {r0-r12} \n\t"
"ldr r11, [sp], #4 \n\t"
: [regs] "=r" (rregs), [fn] "=r" (rfn)
: "0" (rregs), "1" (rfn)
: "r0", "r2", "r3", "r4", "r5", "r6", "r7",
"r8", "r9", "r10", "r12", "memory", "cc"
);
}

static void __kprobes
emulate_generic_r2_14_noflags(struct kprobe *p, struct pt_regs *regs)
{
emulate_generic_r0_12_noflags(p, (struct pt_regs *)(regs->uregs+2));
}

static void __kprobes
emulate_ldm_r3_15(struct kprobe *p, struct pt_regs *regs)
{
emulate_generic_r0_12_noflags(p, (struct pt_regs *)(regs->uregs+3));
load_write_pc(regs->ARM_pc, regs);
}

enum kprobe_insn __kprobes
kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi)
{
kprobe_insn_handler_t *handler = 0;
unsigned reglist = insn & 0xffff;
int is_ldm = insn & 0x100000;
int rn = (insn >> 16) & 0xf;

if (rn <= 12 && (reglist & 0xe000) == 0) {
/* Instruction only uses registers in the range R0..R12 */
handler = emulate_generic_r0_12_noflags;

} else if (rn >= 2 && (reglist & 0x8003) == 0) {
/* Instruction only uses registers in the range R2..R14 */
rn -= 2;
reglist >>= 2;
handler = emulate_generic_r2_14_noflags;

} else if (rn >= 3 && (reglist & 0x0007) == 0) {
/* Instruction only uses registers in the range R3..R15 */
if (is_ldm && (reglist & 0x8000)) {
rn -= 3;
reglist >>= 3;
handler = emulate_ldm_r3_15;
}
}

if (handler) {
/* We can emulate the instruction in (possibly) modified form */
asi->insn[0] = (insn & 0xfff00000) | (rn << 16) | reglist;
asi->insn_handler = handler;
return INSN_GOOD;
}

/* Fallback to slower simulation... */
if (reglist & 0x8000)
handler = is_ldm ? simulate_ldm1_pc : simulate_stm1_pc;
else
Expand Down

0 comments on commit 3d4a997

Please sign in to comment.