Skip to content

Commit

Permalink
Merge tag 'arm64-perf' of git://git.kernel.org/pub/scm/linux/kernel/g…
Browse files Browse the repository at this point in the history
…it/arm64/linux

Pull arm[64] perf updates from Will Deacon:
 "I have another mixed bag of ARM-related perf patches here.

  It's about 25% CPU and 75% interconnect, but with drivers/bus/
  languishing without an obvious maintainer or tree, Olof and I agreed
  to keep all of these PMU patches together.  I suspect a whole load of
  code from drivers/bus/arm-* can be moved under drivers/perf/, so
  that's on the radar for the future.

  Summary:

   - Initial support for ARMv8.1 CPU PMUs

   - Support for the CPU PMU in Cavium ThunderX

   - CPU PMU support for systems running 32-bit Linux in secure mode

   - Support for the system PMU in ARM CCI-550 (Cache Coherent Interconnect)"

* tag 'arm64-perf' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (26 commits)
  drivers/perf: arm_pmu: avoid NULL dereference when not using devicetree
  arm64: perf: Extend ARMV8_EVTYPE_MASK to include PMCR.LC
  arm-cci: remove unused variable
  arm-cci: don't return value from void function
  arm-cci: make private functions static
  arm-cci: CoreLink CCI-550 PMU driver
  arm-cci500: Rearrange PMU driver for code sharing with CCI-550 PMU
  arm-cci: CCI-500: Work around PMU counter writes
  arm-cci: Provide hook for writing to PMU counters
  arm-cci: Add helper to enable PMU without synchornising counters
  arm-cci: Add routines to save/restore all counters
  arm-cci: Get the status of a counter
  arm-cci: write_counter: Remove redundant check
  arm-cci: Delay PMU counter writes to pmu::pmu_enable
  arm-cci: Refactor CCI PMU enable/disable methods
  arm-cci: Group writes to counter
  arm-cci: fix handling cpumask_any_but return value
  arm-cci: simplify sysfs attr handling
  drivers/perf: arm_pmu: implement CPU_PM notifier
  arm64: dts: Add Cavium ThunderX specific PMU
  ...
  • Loading branch information
Linus Torvalds committed Mar 21, 2016
2 parents d34687a + 357b565 commit 2c856e1
Show file tree
Hide file tree
Showing 9 changed files with 667 additions and 228 deletions.
2 changes: 2 additions & 0 deletions Documentation/devicetree/bindings/arm/cci.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ specific to ARM.
Definition: must contain one of the following:
"arm,cci-400"
"arm,cci-500"
"arm,cci-550"

- reg
Usage: required
Expand Down Expand Up @@ -101,6 +102,7 @@ specific to ARM.
"arm,cci-400-pmu" - DEPRECATED, permitted only where OS has
secure acces to CCI registers
"arm,cci-500-pmu,r0"
"arm,cci-550-pmu,r0"
- reg:
Usage: required
Value type: Integer cells. A register entry, expressed
Expand Down
11 changes: 11 additions & 0 deletions Documentation/devicetree/bindings/arm/pmu.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Required properties:
"qcom,scorpion-pmu"
"qcom,scorpion-mp-pmu"
"qcom,krait-pmu"
"cavium,thunder-pmu"
- interrupts : 1 combined interrupt or 1 per core. If the interrupt is a per-cpu
interrupt (PPI) then 1 interrupt should be specified.

Expand All @@ -46,6 +47,16 @@ Optional properties:
- qcom,no-pc-write : Indicates that this PMU doesn't support the 0xc and 0xd
events.

- secure-reg-access : Indicates that the ARMv7 Secure Debug Enable Register
(SDER) is accessible. This will cause the driver to do
any setup required that is only possible in ARMv7 secure
state. If not present the ARMv7 SDER will not be touched,
which means the PMU may fail to operate unless external
code (bootloader or security monitor) has performed the
appropriate initialisation. Note that this property is
not valid for non-ARMv7 CPUs or ARMv7 CPUs booting Linux
in Non-secure state.

Example:

pmu {
Expand Down
13 changes: 12 additions & 1 deletion arch/arm/kernel/perf_event_v7.c
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,11 @@ static const struct attribute_group *armv7_pmuv2_attr_groups[] = {
#define ARMV7_EXCLUDE_USER (1 << 30)
#define ARMV7_INCLUDE_HYP (1 << 27)

/*
* Secure debug enable reg
*/
#define ARMV7_SDER_SUNIDEN BIT(1) /* Permit non-invasive debug */

static inline u32 armv7_pmnc_read(void)
{
u32 val;
Expand Down Expand Up @@ -1094,7 +1099,13 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event,
static void armv7pmu_reset(void *info)
{
struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
u32 idx, nb_cnt = cpu_pmu->num_events;
u32 idx, nb_cnt = cpu_pmu->num_events, val;

if (cpu_pmu->secure_access) {
asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (val));
val |= ARMV7_SDER_SUNIDEN;
asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (val));
}

/* The counter and interrupt enable registers are unknown at reset. */
for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
Expand Down
5 changes: 5 additions & 0 deletions arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,11 @@
<1 10 0xff01>;
};

pmu {
compatible = "cavium,thunder-pmu", "arm,armv8-pmuv3";
interrupts = <1 7 4>;
};

soc {
compatible = "simple-bus";
#address-cells = <2>;
Expand Down
122 changes: 100 additions & 22 deletions arch/arm64/kernel/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,25 @@
#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F
#define ARMV8_PMUV3_PERFCTR_L21_TLB 0x30

/* ARMv8 implementation defined event types. */
#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD 0x40
#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST 0x41
#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD 0x42
#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST 0x43
#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD 0x4C
#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST 0x4D
#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD 0x4E
#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST 0x4F

/* ARMv8 Cortex-A53 specific event types. */
#define ARMV8_A53_PERFCTR_PREFETCH_LINEFILL 0xC2

/* ARMv8 Cortex-A57 and Cortex-A72 specific event types. */
#define ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD 0x40
#define ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST 0x41
#define ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD 0x42
#define ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST 0x43
#define ARMV8_A57_PERFCTR_DTLB_REFILL_LD 0x4c
#define ARMV8_A57_PERFCTR_DTLB_REFILL_ST 0x4d
/* ARMv8 Cavium ThunderX specific event types. */
#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST 0xE9
#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS 0xEA
#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS 0xEB
#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS 0xEC
#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS 0xED

/* PMUv3 HW events mapping. */
static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
Expand Down Expand Up @@ -132,6 +141,18 @@ static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
};

static const unsigned armv8_thunder_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
[PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
};

static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
Expand Down Expand Up @@ -175,16 +196,46 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
PERF_CACHE_MAP_ALL_UNSUPPORTED,

[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD,
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST,
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST,
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD,
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST,
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST,

[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,

[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_LD,
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_ST,
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD,
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST,

[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,

[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
[C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
};

static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
PERF_CACHE_MAP_ALL_UNSUPPORTED,

[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD,
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST,
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST,
[C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS,
[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS,

[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
[C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS,
[C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS,

[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD,
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD,
[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST,
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST,

[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,

Expand Down Expand Up @@ -325,7 +376,6 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = {
NULL,
};


/*
* Perf Events' indices
*/
Expand Down Expand Up @@ -356,9 +406,10 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = {
#define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */
#define ARMV8_PMCR_X (1 << 4) /* Export to ETM */
#define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
#define ARMV8_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */
#define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */
#define ARMV8_PMCR_N_MASK 0x1f
#define ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */
#define ARMV8_PMCR_MASK 0x7f /* Mask for writable bits */

/*
* PMOVSR: counters overflow flag status reg
Expand All @@ -369,8 +420,8 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = {
/*
* PMXEVTYPER: Event selection reg
*/
#define ARMV8_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */
#define ARMV8_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */
#define ARMV8_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
#define ARMV8_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */

/*
* Event filters for PMUv3
Expand Down Expand Up @@ -445,9 +496,16 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u32 value)
if (!armv8pmu_counter_valid(cpu_pmu, idx))
pr_err("CPU%u writing wrong counter %d\n",
smp_processor_id(), idx);
else if (idx == ARMV8_IDX_CYCLE_COUNTER)
asm volatile("msr pmccntr_el0, %0" :: "r" (value));
else if (armv8pmu_select_counter(idx) == idx)
else if (idx == ARMV8_IDX_CYCLE_COUNTER) {
/*
* Set the upper 32bits as this is a 64bit counter but we only
* count using the lower 32bits and we want an interrupt when
* it overflows.
*/
u64 value64 = 0xffffffff00000000ULL | value;

asm volatile("msr pmccntr_el0, %0" :: "r" (value64));
} else if (armv8pmu_select_counter(idx) == idx)
asm volatile("msr pmxevcntr_el0, %0" :: "r" (value));
}

Expand Down Expand Up @@ -722,8 +780,11 @@ static void armv8pmu_reset(void *info)
armv8pmu_disable_intens(idx);
}

/* Initialize & Reset PMNC: C and P bits. */
armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C);
/*
* Initialize & Reset PMNC. Request overflow interrupt for
* 64 bit cycle counter but cheat in armv8pmu_write_counter().
*/
armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C | ARMV8_PMCR_LC);
}

static int armv8_pmuv3_map_event(struct perf_event *event)
Expand All @@ -747,6 +808,13 @@ static int armv8_a57_map_event(struct perf_event *event)
ARMV8_EVTYPE_EVENT);
}

static int armv8_thunder_map_event(struct perf_event *event)
{
return armpmu_map_event(event, &armv8_thunder_perf_map,
&armv8_thunder_perf_cache_map,
ARMV8_EVTYPE_EVENT);
}

static void armv8pmu_read_num_pmnc_events(void *info)
{
int *nb_cnt = info;
Expand Down Expand Up @@ -815,11 +883,21 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
return armv8pmu_probe_num_events(cpu_pmu);
}

static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
{
armv8_pmu_init(cpu_pmu);
cpu_pmu->name = "armv8_cavium_thunder";
cpu_pmu->map_event = armv8_thunder_map_event;
cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups;
return armv8pmu_probe_num_events(cpu_pmu);
}

static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init},
{.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init},
{.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init},
{.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init},
{.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init},
{},
};

Expand Down
10 changes: 5 additions & 5 deletions drivers/bus/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@ config ARM_CCI400_PORT_CTRL
Low level power management driver for CCI400 cache coherent
interconnect for ARM platforms.

config ARM_CCI500_PMU
bool "ARM CCI500 PMU support"
config ARM_CCI5xx_PMU
bool "ARM CCI-500/CCI-550 PMU support"
depends on (ARM && CPU_V7) || ARM64
depends on PERF_EVENTS
select ARM_CCI_PMU
help
Support for PMU events monitoring on the ARM CCI-500 cache coherent
interconnect. CCI-500 provides 8 independent event counters, which
can count events pertaining to the slave/master interfaces as well
Support for PMU events monitoring on the ARM CCI-500/CCI-550 cache
coherent interconnects. Both of them provide 8 independent event counters,
which can count events pertaining to the slave/master interfaces as well
as the internal events to the CCI.

If unsure, say Y
Expand Down
Loading

0 comments on commit 2c856e1

Please sign in to comment.