Skip to content

Commit

Permalink
powerpc/8xx: Only perform perf counting when perf is in use.
Browse files Browse the repository at this point in the history
In TLB miss handlers, updating the perf counter is only useful
when performing a perf analysis. As it has a noticeable overhead,
let's only do it when needed.

In order to do so, the exit of the miss handlers will be patched
when starting/stopping 'perf': the first register restore
instruction of each exit point will be replaced by a jump to
the counting code.

Once this is done, CONFIG_PPC_8xx_PERF_EVENT becomes useless as
this feature doesn't add any overhead.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
  • Loading branch information
Christophe Leroy authored and Michael Ellerman committed Jan 16, 2018
1 parent bb9b5a8 commit cd99ddb
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 32 deletions.
2 changes: 2 additions & 0 deletions arch/powerpc/include/asm/ppc-opcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@
#define PPC_INST_RFCI 0x4c000066
#define PPC_INST_RFDI 0x4c00004e
#define PPC_INST_RFMCI 0x4c00004c
#define PPC_INST_MFSPR 0x7c0002a6
#define PPC_INST_MFSPR_DSCR 0x7c1102a6
#define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe
#define PPC_INST_MTSPR_DSCR 0x7c1103a6
Expand Down Expand Up @@ -383,6 +384,7 @@
#define __PPC_ME64(s) __PPC_MB64(s)
#define __PPC_BI(s) (((s) & 0x1f) << 16)
#define __PPC_CT(t) (((t) & 0x0f) << 21)
#define __PPC_SPR(r) ((((r) & 0x1f) << 16) | ((((r) >> 5) & 0x1f) << 11))

/*
* Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
Expand Down
10 changes: 5 additions & 5 deletions arch/powerpc/kernel/entry_32.S
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ transfer_to_handler_cont:
mflr r9
lwz r11,0(r9) /* virtual address of handler */
lwz r9,4(r9) /* where to go when done */
#ifdef CONFIG_PPC_8xx_PERF_EVENT
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
Expand Down Expand Up @@ -301,7 +301,7 @@ stack_ovf:
lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
#ifdef CONFIG_PPC_8xx_PERF_EVENT
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR0,r9
Expand Down Expand Up @@ -430,7 +430,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
lwz r7,_NIP(r1)
lwz r2,GPR2(r1)
lwz r1,GPR1(r1)
#ifdef CONFIG_PPC_8xx_PERF_EVENT
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR0,r7
Expand Down Expand Up @@ -727,7 +727,7 @@ fast_exception_return:
lwz r10,_LINK(r11)
mtlr r10
REST_GPR(10, r11)
#ifdef CONFIG_PPC_8xx_PERF_EVENT
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR1,r9
Expand Down Expand Up @@ -978,7 +978,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
.globl exc_exit_restart
exc_exit_restart:
lwz r12,_NIP(r1)
#ifdef CONFIG_PPC_8xx_PERF_EVENT
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR0,r12
Expand Down
47 changes: 32 additions & 15 deletions arch/powerpc/kernel/head_8xx.S
Original file line number Diff line number Diff line change
Expand Up @@ -304,12 +304,6 @@ InstructionTLBMiss:
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mtspr SPRN_SPRG_SCRATCH2, r12
#endif
#ifdef CONFIG_PPC_8xx_PERF_EVENT
lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
#endif

/* If we are faulting a kernel address, we have to use the
* kernel page tables.
Expand Down Expand Up @@ -392,6 +386,20 @@ _ENTRY(ITLBMiss_cmp)
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */

/* Restore registers */
_ENTRY(itlb_miss_exit_1)
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mfspr r12, SPRN_SPRG_SCRATCH2
#endif
rfi
#ifdef CONFIG_PERF_EVENTS
_ENTRY(itlb_miss_perf)
lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
#endif
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
Expand Down Expand Up @@ -429,12 +437,6 @@ DataStoreTLBMiss:
mtspr SPRN_SPRG_SCRATCH0, r10
mtspr SPRN_SPRG_SCRATCH1, r11
mtspr SPRN_SPRG_SCRATCH2, r12
#ifdef CONFIG_PPC_8xx_PERF_EVENT
lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
#endif
mfcr r12

/* If we are faulting a kernel address, we have to use the
Expand Down Expand Up @@ -526,6 +528,18 @@ _ENTRY(DTLBMiss_jmp)

/* Restore registers */
mtspr SPRN_DAR, r11 /* Tag DAR */
_ENTRY(dtlb_miss_exit_1)
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
rfi
#ifdef CONFIG_PERF_EVENTS
_ENTRY(dtlb_miss_perf)
lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
#endif
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
Expand Down Expand Up @@ -635,7 +649,7 @@ DataBreakpoint:
mfspr r11, SPRN_SPRG_SCRATCH1
rfi

#ifdef CONFIG_PPC_8xx_PERF_EVENT
#ifdef CONFIG_PERF_EVENTS
. = 0x1d00
InstructionBreakpoint:
mtspr SPRN_SPRG_SCRATCH0, r10
Expand Down Expand Up @@ -675,6 +689,7 @@ DTLBMissIMMR:

li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */
_ENTRY(dtlb_miss_exit_2)
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
Expand All @@ -692,6 +707,7 @@ DTLBMissLinear:

li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */
_ENTRY(dtlb_miss_exit_3)
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
Expand All @@ -708,6 +724,7 @@ ITLBMissLinear:
_PAGE_PRESENT
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */

_ENTRY(itlb_miss_exit_2)
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
Expand Down Expand Up @@ -1039,7 +1056,7 @@ initial_mmu:
#endif
/* Disable debug mode entry on breakpoints */
mfspr r8, SPRN_DER
#ifdef CONFIG_PPC_8xx_PERF_EVENT
#ifdef CONFIG_PERF_EVENTS
rlwinm r8, r8, 0, ~0xc
#else
rlwinm r8, r8, 0, ~0x8
Expand Down Expand Up @@ -1072,7 +1089,7 @@ swapper_pg_dir:
abatron_pteptrs:
.space 8

#ifdef CONFIG_PPC_8xx_PERF_EVENT
#ifdef CONFIG_PERF_EVENTS
.globl itlb_miss_counter
itlb_miss_counter:
.space 4
Expand Down
52 changes: 48 additions & 4 deletions arch/powerpc/perf/8xx-pmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/ptrace.h>
#include <asm/code-patching.h>

#define PERF_8xx_ID_CPU_CYCLES 1
#define PERF_8xx_ID_HW_INSTRUCTIONS 2
Expand All @@ -30,8 +31,13 @@

extern unsigned long itlb_miss_counter, dtlb_miss_counter;
extern atomic_t instruction_counter;
extern unsigned int itlb_miss_perf, dtlb_miss_perf;
extern unsigned int itlb_miss_exit_1, itlb_miss_exit_2;
extern unsigned int dtlb_miss_exit_1, dtlb_miss_exit_2, dtlb_miss_exit_3;

static atomic_t insn_ctr_ref;
static atomic_t itlb_miss_ref;
static atomic_t dtlb_miss_ref;

static s64 get_insn_ctr(void)
{
Expand Down Expand Up @@ -96,9 +102,24 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags)
val = get_insn_ctr();
break;
case PERF_8xx_ID_ITLB_LOAD_MISS:
if (atomic_inc_return(&itlb_miss_ref) == 1) {
unsigned long target = (unsigned long)&itlb_miss_perf;

patch_branch(&itlb_miss_exit_1, target, 0);
#ifndef CONFIG_PIN_TLB_TEXT
patch_branch(&itlb_miss_exit_2, target, 0);
#endif
}
val = itlb_miss_counter;
break;
case PERF_8xx_ID_DTLB_LOAD_MISS:
if (atomic_inc_return(&dtlb_miss_ref) == 1) {
unsigned long target = (unsigned long)&dtlb_miss_perf;

patch_branch(&dtlb_miss_exit_1, target, 0);
patch_branch(&dtlb_miss_exit_2, target, 0);
patch_branch(&dtlb_miss_exit_3, target, 0);
}
val = dtlb_miss_counter;
break;
}
Expand Down Expand Up @@ -143,13 +164,36 @@ static void mpc8xx_pmu_read(struct perf_event *event)

static void mpc8xx_pmu_del(struct perf_event *event, int flags)
{
/* mfspr r10, SPRN_SPRG_SCRATCH0 */
unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) |
__PPC_SPR(SPRN_SPRG_SCRATCH0);

mpc8xx_pmu_read(event);
if (event_type(event) != PERF_8xx_ID_HW_INSTRUCTIONS)
return;

/* If it was the last user, stop counting to avoid useles overhead */
if (atomic_dec_return(&insn_ctr_ref) == 0)
mtspr(SPRN_ICTRL, 7);
switch (event_type(event)) {
case PERF_8xx_ID_CPU_CYCLES:
break;
case PERF_8xx_ID_HW_INSTRUCTIONS:
if (atomic_dec_return(&insn_ctr_ref) == 0)
mtspr(SPRN_ICTRL, 7);
break;
case PERF_8xx_ID_ITLB_LOAD_MISS:
if (atomic_dec_return(&itlb_miss_ref) == 0) {
patch_instruction(&itlb_miss_exit_1, insn);
#ifndef CONFIG_PIN_TLB_TEXT
patch_instruction(&itlb_miss_exit_2, insn);
#endif
}
break;
case PERF_8xx_ID_DTLB_LOAD_MISS:
if (atomic_dec_return(&dtlb_miss_ref) == 0) {
patch_instruction(&dtlb_miss_exit_1, insn);
patch_instruction(&dtlb_miss_exit_2, insn);
patch_instruction(&dtlb_miss_exit_3, insn);
}
break;
}
}

static struct pmu mpc8xx_pmu = {
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/perf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o

obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o

obj-$(CONFIG_PPC_8xx_PERF_EVENT) += 8xx-pmu.o
obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o

obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y)
7 changes: 0 additions & 7 deletions arch/powerpc/platforms/Kconfig.cputype
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,6 @@ config PPC_FPU
bool
default y if PPC64

config PPC_8xx_PERF_EVENT
bool "PPC 8xx perf events"
depends on PPC_8xx && PERF_EVENTS
help
This is Performance Events support for PPC 8xx. The 8xx doesn't
have a PMU but some events are emulated using 8xx features.

config FSL_EMB_PERFMON
bool "Freescale Embedded Perfmon"
depends on E500 || PPC_83xx
Expand Down

0 comments on commit cd99ddb

Please sign in to comment.