Skip to content

Commit

Permalink
Merge branch 'will/for-next/perf' into for-next/core
Browse files Browse the repository at this point in the history
* will/for-next/perf:
  arm64: pmuv3: use arm_pmu ACPI framework
  arm64: pmuv3: handle !PMUv3 when probing
  drivers/perf: arm_pmu: add ACPI framework
  arm64: add function to get a cpu's MADT GICC table
  drivers/perf: arm_pmu: split out platform device probe logic
  drivers/perf: arm_pmu: move irq request/free into probe
  drivers/perf: arm_pmu: split cpu-local irq request/free
  drivers/perf: arm_pmu: rename irq request/free functions
  drivers/perf: arm_pmu: handle no platform_device
  drivers/perf: arm_pmu: simplify cpu_pmu_request_irqs()
  drivers/perf: arm_pmu: factor out pmu registration
  drivers/perf: arm_pmu: fold init into alloc
  drivers/perf: arm_pmu: define armpmu_init_fn
  drivers/perf: arm_pmu: remove pointless PMU disabling
  perf: qcom: Add L3 cache PMU driver
  drivers/perf: arm_pmu: split irq request from enable
  drivers/perf: arm_pmu: manage interrupts per-cpu
  drivers/perf: arm_pmu: rework per-cpu allocation
  MAINTAINERS: Add file patterns for perf device tree bindings
  • Loading branch information
Catalin Marinas committed Apr 12, 2017
2 parents d91750f + f00fa5f commit 494bc3c
Show file tree
Hide file tree
Showing 13 changed files with 1,660 additions and 410 deletions.
25 changes: 25 additions & 0 deletions Documentation/perf/qcom_l3_pmu.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Qualcomm Datacenter Technologies L3 Cache Performance Monitoring Unit (PMU)
===========================================================================

This driver supports the L3 cache PMUs found in Qualcomm Datacenter Technologies
Centriq SoCs. The L3 cache on these SOCs is composed of multiple slices, shared
by all cores within a socket. Each slice is exposed as a separate uncore perf
PMU with device name l3cache_<socket>_<instance>. User space is responsible
for aggregating across slices.

The driver provides a description of its available events and configuration
options in sysfs, see /sys/devices/l3cache*. Given that these are uncore PMUs
the driver also exposes a "cpumask" sysfs attribute which contains a mask
consisting of one CPU per socket which will be used to handle all the PMU
events on that socket.

The hardware implements 32bit event counters and has a flat 8bit event space
exposed via the "event" format attribute. In addition to the 32bit physical
counters the driver supports virtual 64bit hardware counters by using hardware
counter chaining. This feature is exposed via the "lc" (long counter) format
flag. E.g.:

perf stat -e l3cache_0_0/read-miss,lc/

Given that these are uncore PMUs the driver does not support sampling, therefore
"perf record" will not work. Per-task perf sessions are not supported.
1 change: 1 addition & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -976,6 +976,7 @@ F: arch/arm*/include/asm/perf_event.h
F: drivers/perf/*
F: include/linux/perf/arm_pmu.h
F: Documentation/devicetree/bindings/arm/pmu.txt
F: Documentation/devicetree/bindings/perf/

ARM PORT
M: Russell King <linux@armlinux.org.uk>
Expand Down
2 changes: 2 additions & 0 deletions arch/arm64/include/asm/acpi.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ static inline bool acpi_has_cpu_in_madt(void)
return true;
}

struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu);

static inline void arch_fix_phys_package_id(int num, u32 slot) { }
void __init acpi_init_cpus(void);

Expand Down
113 changes: 80 additions & 33 deletions arch/arm64/kernel/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -957,11 +957,26 @@ static int armv8_vulcan_map_event(struct perf_event *event)
ARMV8_PMU_EVTYPE_EVENT);
}

struct armv8pmu_probe_info {
struct arm_pmu *pmu;
bool present;
};

static void __armv8pmu_probe_pmu(void *info)
{
struct arm_pmu *cpu_pmu = info;
struct armv8pmu_probe_info *probe = info;
struct arm_pmu *cpu_pmu = probe->pmu;
u64 dfr0, pmuver;
u32 pmceid[2];

dfr0 = read_sysreg(id_aa64dfr0_el1);
pmuver = cpuid_feature_extract_unsigned_field(dfr0,
ID_AA64DFR0_PMUVER_SHIFT);
if (pmuver != 1)
return;

probe->present = true;

/* Read the nb of CNTx counters supported from PMNC */
cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT)
& ARMV8_PMU_PMCR_N_MASK;
Expand All @@ -979,13 +994,27 @@ static void __armv8pmu_probe_pmu(void *info)

static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
{
return smp_call_function_any(&cpu_pmu->supported_cpus,
struct armv8pmu_probe_info probe = {
.pmu = cpu_pmu,
.present = false,
};
int ret;

ret = smp_call_function_any(&cpu_pmu->supported_cpus,
__armv8pmu_probe_pmu,
cpu_pmu, 1);
&probe, 1);
if (ret)
return ret;

return probe.present ? 0 : -ENODEV;
}

static void armv8_pmu_init(struct arm_pmu *cpu_pmu)
static int armv8_pmu_init(struct arm_pmu *cpu_pmu)
{
int ret = armv8pmu_probe_pmu(cpu_pmu);
if (ret)
return ret;

cpu_pmu->handle_irq = armv8pmu_handle_irq,
cpu_pmu->enable = armv8pmu_enable_event,
cpu_pmu->disable = armv8pmu_disable_event,
Expand All @@ -997,78 +1026,104 @@ static void armv8_pmu_init(struct arm_pmu *cpu_pmu)
cpu_pmu->reset = armv8pmu_reset,
cpu_pmu->max_period = (1LLU << 32) - 1,
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;

return 0;
}

static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
{
armv8_pmu_init(cpu_pmu);
int ret = armv8_pmu_init(cpu_pmu);
if (ret)
return ret;

cpu_pmu->name = "armv8_pmuv3";
cpu_pmu->map_event = armv8_pmuv3_map_event;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
&armv8_pmuv3_events_attr_group;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
&armv8_pmuv3_format_attr_group;
return armv8pmu_probe_pmu(cpu_pmu);

return 0;
}

static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
{
armv8_pmu_init(cpu_pmu);
int ret = armv8_pmu_init(cpu_pmu);
if (ret)
return ret;

cpu_pmu->name = "armv8_cortex_a53";
cpu_pmu->map_event = armv8_a53_map_event;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
&armv8_pmuv3_events_attr_group;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
&armv8_pmuv3_format_attr_group;
return armv8pmu_probe_pmu(cpu_pmu);

return 0;
}

static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
{
armv8_pmu_init(cpu_pmu);
int ret = armv8_pmu_init(cpu_pmu);
if (ret)
return ret;

cpu_pmu->name = "armv8_cortex_a57";
cpu_pmu->map_event = armv8_a57_map_event;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
&armv8_pmuv3_events_attr_group;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
&armv8_pmuv3_format_attr_group;
return armv8pmu_probe_pmu(cpu_pmu);

return 0;
}

static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
{
armv8_pmu_init(cpu_pmu);
int ret = armv8_pmu_init(cpu_pmu);
if (ret)
return ret;

cpu_pmu->name = "armv8_cortex_a72";
cpu_pmu->map_event = armv8_a57_map_event;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
&armv8_pmuv3_events_attr_group;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
&armv8_pmuv3_format_attr_group;
return armv8pmu_probe_pmu(cpu_pmu);

return 0;
}

static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
{
armv8_pmu_init(cpu_pmu);
int ret = armv8_pmu_init(cpu_pmu);
if (ret)
return ret;

cpu_pmu->name = "armv8_cavium_thunder";
cpu_pmu->map_event = armv8_thunder_map_event;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
&armv8_pmuv3_events_attr_group;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
&armv8_pmuv3_format_attr_group;
return armv8pmu_probe_pmu(cpu_pmu);

return 0;
}

static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
{
armv8_pmu_init(cpu_pmu);
int ret = armv8_pmu_init(cpu_pmu);
if (ret)
return ret;

cpu_pmu->name = "armv8_brcm_vulcan";
cpu_pmu->map_event = armv8_vulcan_map_event;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
&armv8_pmuv3_events_attr_group;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
&armv8_pmuv3_format_attr_group;
return armv8pmu_probe_pmu(cpu_pmu);

return 0;
}

static const struct of_device_id armv8_pmu_of_device_ids[] = {
Expand All @@ -1081,24 +1136,9 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
{},
};

/*
* Non DT systems have their micro/arch events probed at run-time.
* A fairly complete list of generic events are provided and ones that
* aren't supported by the current PMU are disabled.
*/
static const struct pmu_probe_info armv8_pmu_probe_table[] = {
PMU_PROBE(0, 0, armv8_pmuv3_init), /* enable all defined counters */
{ /* sentinel value */ }
};

static int armv8_pmu_device_probe(struct platform_device *pdev)
{
if (acpi_disabled)
return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids,
NULL);

return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids,
armv8_pmu_probe_table);
return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL);
}

static struct platform_driver armv8_pmu_driver = {
Expand All @@ -1109,4 +1149,11 @@ static struct platform_driver armv8_pmu_driver = {
.probe = armv8_pmu_device_probe,
};

builtin_platform_driver(armv8_pmu_driver);
static int __init armv8_pmu_driver_init(void)
{
if (acpi_disabled)
return platform_driver_register(&armv8_pmu_driver);
else
return arm_pmu_acpi_probe(armv8_pmuv3_init);
}
device_initcall(armv8_pmu_driver_init)
10 changes: 10 additions & 0 deletions arch/arm64/kernel/smp.c
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,13 @@ static bool bootcpu_valid __initdata;
static unsigned int cpu_count = 1;

#ifdef CONFIG_ACPI
static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS];

struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu)
{
return &cpu_madt_gicc[cpu];
}

/*
* acpi_map_gic_cpu_interface - parse processor MADT entry
*
Expand Down Expand Up @@ -555,6 +562,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
return;
}
bootcpu_valid = true;
cpu_madt_gicc[0] = *processor;
early_map_cpu_to_node(0, acpi_numa_get_nid(0, hwid));
return;
}
Expand All @@ -565,6 +573,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
/* map the logical cpu id to cpu MPIDR */
cpu_logical_map(cpu_count) = hwid;

cpu_madt_gicc[cpu_count] = *processor;

/*
* Set-up the ACPI parking protocol cpu entries
* while initializing the cpu_logical_map to
Expand Down
14 changes: 14 additions & 0 deletions drivers/perf/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ config ARM_PMU
Say y if you want to use CPU performance monitors on ARM-based
systems.

config ARM_PMU_ACPI
depends on ARM_PMU && ACPI
def_bool y

config QCOM_L2_PMU
bool "Qualcomm Technologies L2-cache PMU"
depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI
Expand All @@ -21,6 +25,16 @@ config QCOM_L2_PMU
Adds the L2 cache PMU into the perf events subsystem for
monitoring L2 cache events.

config QCOM_L3_PMU
bool "Qualcomm Technologies L3-cache PMU"
depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI
select QCOM_IRQ_COMBINER
help
Provides support for the L3 cache performance monitor unit (PMU)
in Qualcomm Technologies processors.
Adds the L3 cache PMU into the perf events subsystem for
monitoring L3 cache events.

config XGENE_PMU
depends on PERF_EVENTS && ARCH_XGENE
bool "APM X-Gene SoC PMU"
Expand Down
4 changes: 3 additions & 1 deletion drivers/perf/Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
obj-$(CONFIG_ARM_PMU) += arm_pmu.o
obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
Loading

0 comments on commit 494bc3c

Please sign in to comment.