Skip to content

Commit

Permalink
sh: perf events: Add preliminary support for SH-4A counters.
Browse files Browse the repository at this point in the history
This adds in preliminary support for the SH-4A performance counters.
Presently only the first 2 counters are supported, as these are the ones
of the most interest to the perf tool and end users. Counter chaining is
not presently handled, so these are simply implemented as 32-bit
counters.

This also establishes a perf event support framework for other hardware
counters, which the existing SH-4 oprofile code will migrate over to as
the SH-4A support evolves.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
  • Loading branch information
Paul Mundt committed Oct 28, 2009
1 parent 3714a9a commit ac44e66
Show file tree
Hide file tree
Showing 5 changed files with 576 additions and 2 deletions.
31 changes: 29 additions & 2 deletions arch/sh/include/asm/perf_event.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,35 @@
#ifndef __ASM_SH_PERF_EVENT_H
#define __ASM_SH_PERF_EVENT_H

/* SH only supports software events through this interface. */
static inline void set_perf_event_pending(void) {}
struct hw_perf_event;

#define MAX_HWEVENTS 2

struct sh_pmu {
const char *name;
unsigned int num_events;
void (*disable_all)(void);
void (*enable_all)(void);
void (*enable)(struct hw_perf_event *, int);
void (*disable)(struct hw_perf_event *, int);
u64 (*read)(int);
int (*event_map)(int);
unsigned int max_events;
unsigned long raw_event_mask;
const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
};

/* arch/sh/kernel/perf_event.c */
extern int register_sh_pmu(struct sh_pmu *);
extern int reserve_pmc_hardware(void);
extern void release_pmc_hardware(void);

static inline void set_perf_event_pending(void)
{
/* Nothing to see here, move along. */
}

#define PERF_EVENT_INDEX_OFFSET 0

Expand Down
1 change: 1 addition & 0 deletions arch/sh/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_DUMP_CODE) += disassemble.o
obj-$(CONFIG_HIBERNATION) += swsusp.o
obj-$(CONFIG_DWARF_UNWINDER) += dwarf.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o

obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += localtimer.o

Expand Down
1 change: 1 addition & 0 deletions arch/sh/kernel/cpu/sh4a/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,4 @@ pinmux-$(CONFIG_CPU_SUBTYPE_SH7786) := pinmux-sh7786.o
obj-y += $(clock-y)
obj-$(CONFIG_SMP) += $(smp-y)
obj-$(CONFIG_GENERIC_GPIO) += $(pinmux-y)
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
231 changes: 231 additions & 0 deletions arch/sh/kernel/cpu/sh4a/perf_event.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
/*
* Performance events support for SH-4A performance counters
*
* Copyright (C) 2009 Paul Mundt
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/irq.h>
#include <linux/perf_event.h>
#include <asm/processor.h>

#define PPC_CCBR(idx) (0xff200800 + (sizeof(u32) * idx))
#define PPC_PMCTR(idx) (0xfc100000 + (sizeof(u32) * idx))

#define CCBR_CIT_MASK (0x7ff << 6)
#define CCBR_DUC (1 << 3)
#define CCBR_CMDS (1 << 1)
#define CCBR_PPCE (1 << 0)

#define PPC_PMCAT 0xfc100080

#define PMCAT_OVF3 (1 << 27)
#define PMCAT_CNN3 (1 << 26)
#define PMCAT_CLR3 (1 << 25)
#define PMCAT_OVF2 (1 << 19)
#define PMCAT_CLR2 (1 << 17)
#define PMCAT_OVF1 (1 << 11)
#define PMCAT_CNN1 (1 << 10)
#define PMCAT_CLR1 (1 << 9)
#define PMCAT_OVF0 (1 << 3)
#define PMCAT_CLR0 (1 << 1)

static struct sh_pmu sh4a_pmu;

/*
* Special reserved bits used by hardware emulators, read values will
* vary, but writes must always be 0.
*/
#define PMCAT_EMU_CLR_MASK ((1 << 24) | (1 << 16) | (1 << 8) | (1 << 0))

static const int sh4a_general_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = 0x0000,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x0202,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0029, /* I-cache */
[PERF_COUNT_HW_CACHE_MISSES] = 0x002a, /* I-cache */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0204,
[PERF_COUNT_HW_BRANCH_MISSES] = -1,
[PERF_COUNT_HW_BUS_CYCLES] = -1,
};

#define C(x) PERF_COUNT_HW_CACHE_##x

static const int sh4a_cache_events
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0031,
[ C(RESULT_MISS) ] = 0x0032,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x0039,
[ C(RESULT_MISS) ] = 0x003a,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
},

[ C(L1I) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0029,
[ C(RESULT_MISS) ] = 0x002a,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
},

[ C(LL) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0030,
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x0038,
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
},

[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0222,
[ C(RESULT_MISS) ] = 0x0220,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
},

[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0x02a0,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},

[ C(BPU) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
};

static int sh4a_event_map(int event)
{
return sh4a_general_events[event];
}

static u64 sh4a_pmu_read(int idx)
{
return __raw_readl(PPC_PMCTR(idx));
}

static void sh4a_pmu_disable(struct hw_perf_event *hwc, int idx)
{
unsigned int tmp;

tmp = __raw_readl(PPC_CCBR(idx));
tmp &= ~(CCBR_CIT_MASK | CCBR_DUC);
__raw_writel(tmp, PPC_CCBR(idx));
}

static void sh4a_pmu_enable(struct hw_perf_event *hwc, int idx)
{
unsigned int tmp;

tmp = __raw_readl(PPC_PMCAT);
tmp &= ~PMCAT_EMU_CLR_MASK;
tmp |= idx ? PMCAT_CLR1 : PMCAT_CLR0;
__raw_writel(tmp, PPC_PMCAT);

tmp = __raw_readl(PPC_CCBR(idx));
tmp |= (hwc->config << 6) | CCBR_CMDS | CCBR_PPCE;
__raw_writel(tmp, PPC_CCBR(idx));

__raw_writel(__raw_readl(PPC_CCBR(idx)) | CCBR_DUC, PPC_CCBR(idx));
}

static void sh4a_pmu_disable_all(void)
{
int i;

for (i = 0; i < sh4a_pmu.num_events; i++)
__raw_writel(__raw_readl(PPC_CCBR(i)) & ~CCBR_DUC, PPC_CCBR(i));
}

static void sh4a_pmu_enable_all(void)
{
int i;

for (i = 0; i < sh4a_pmu.num_events; i++)
__raw_writel(__raw_readl(PPC_CCBR(i)) | CCBR_DUC, PPC_CCBR(i));
}

static struct sh_pmu sh4a_pmu = {
.name = "SH-4A",
.num_events = 2,
.event_map = sh4a_event_map,
.max_events = ARRAY_SIZE(sh4a_general_events),
.raw_event_mask = 0x3ff,
.cache_events = &sh4a_cache_events,
.read = sh4a_pmu_read,
.disable = sh4a_pmu_disable,
.enable = sh4a_pmu_enable,
.disable_all = sh4a_pmu_disable_all,
.enable_all = sh4a_pmu_enable_all,
};

static int __init sh4a_pmu_init(void)
{
/*
* Make sure this CPU actually has perf counters.
*/
if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) {
pr_notice("HW perf events unsupported, software events only.\n");
return -ENODEV;
}

return register_sh_pmu(&sh4a_pmu);
}
arch_initcall(sh4a_pmu_init);
Loading

0 comments on commit ac44e66

Please sign in to comment.