Skip to content

Commit

Permalink
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/li…
Browse files Browse the repository at this point in the history
…nux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "The main changes in this cycle were:

  Kernel side changes:

   - A couple of x86/cpu cleanups and changes were grandfathered in due
     to patch dependencies. These clean up the set of CPU model/family
     matching macros with a consistent namespace and C99 initializer
     style.

   - A bunch of updates to various low level PMU drivers:
       * AMD Family 19h L3 uncore PMU
       * Intel Tiger Lake uncore support
       * misc fixes to LBR TOS sampling

   - optprobe fixes

   - perf/cgroup: optimize cgroup event sched-in processing

   - misc cleanups and fixes

  Tooling side changes are to:

   - perf {annotate,expr,record,report,stat,test}

   - perl scripting

   - libapi, libperf and libtraceevent

   - vendor events on Intel and S390, ARM cs-etm

   - Intel PT updates

   - Documentation changes and updates to core facilities

   - misc cleanups, fixes and other enhancements"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (89 commits)
  cpufreq/intel_pstate: Fix wrong macro conversion
  x86/cpu: Cleanup the now unused CPU match macros
  hwrng: via_rng: Convert to new X86 CPU match macros
  crypto: Convert to new CPU match macros
  ASoC: Intel: Convert to new X86 CPU match macros
  powercap/intel_rapl: Convert to new X86 CPU match macros
  PCI: intel-mid: Convert to new X86 CPU match macros
  mmc: sdhci-acpi: Convert to new X86 CPU match macros
  intel_idle: Convert to new X86 CPU match macros
  extcon: axp288: Convert to new X86 CPU match macros
  thermal: Convert to new X86 CPU match macros
  hwmon: Convert to new X86 CPU match macros
  platform/x86: Convert to new CPU match macros
  EDAC: Convert to new X86 CPU match macros
  cpufreq: Convert to new X86 CPU match macros
  ACPI: Convert to new X86 CPU match macros
  x86/platform: Convert to new CPU match macros
  x86/kernel: Convert to new CPU match macros
  x86/kvm: Convert to new CPU match macros
  x86/perf/events: Convert to new CPU match macros
  ...
  • Loading branch information
Linus Torvalds committed Mar 30, 2020
2 parents 4b9fd8a + 629b3df commit 9b82f05
Show file tree
Hide file tree
Showing 161 changed files with 3,532 additions and 2,098 deletions.
1 change: 1 addition & 0 deletions arch/powerpc/perf/core-book3s.c
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,7 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *
}
}
cpuhw->bhrb_stack.nr = u_index;
cpuhw->bhrb_stack.hw_idx = -1ULL;
return;
}

Expand Down
1 change: 0 additions & 1 deletion arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -1875,7 +1875,6 @@ config X86_SMAP

config X86_UMIP
def_bool y
depends on CPU_SUP_INTEL || CPU_SUP_AMD
prompt "User Mode Instruction Prevention" if EXPERT
---help---
User Mode Instruction Prevention (UMIP) is a security feature in
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/crypto/aesni-intel_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -1064,7 +1064,7 @@ static struct aead_alg aesni_aeads[0];
static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)];

static const struct x86_cpu_id aesni_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_AES),
X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
{}
};
MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/crypto/crc32-pclmul_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ static struct shash_alg alg = {
};

static const struct x86_cpu_id crc32pclmul_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/crypto/crc32c-intel_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ static struct shash_alg alg = {
};

static const struct x86_cpu_id crc32c_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_XMM4_2),
X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/crypto/crct10dif-pclmul_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ static struct shash_alg alg = {
};

static const struct x86_cpu_id crct10dif_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/crypto/ghash-clmulni-intel_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ static struct ahash_alg ghash_async_alg = {
};

static const struct x86_cpu_id pcmul_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */
X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), /* Pickle-Mickle-Duck */
{}
};
MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/events/amd/power.c
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ static int power_cpu_init(unsigned int cpu)
}

static const struct x86_cpu_id cpu_match[] = {
{ .vendor = X86_VENDOR_AMD, .family = 0x15 },
X86_MATCH_VENDOR_FAM(AMD, 0x15, NULL),
{},
};

Expand Down
44 changes: 31 additions & 13 deletions arch/x86/events/amd/uncore.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,31 @@ static void amd_uncore_del(struct perf_event *event, int flags)
hwc->idx = -1;
}

/*
* Convert logical CPU number to L3 PMC Config ThreadMask format
*/
static u64 l3_thread_slice_mask(int cpu)
{
u64 thread_mask, core = topology_core_id(cpu);
unsigned int shift, thread = 0;

if (topology_smt_supported() && !topology_is_primary_thread(cpu))
thread = 1;

if (boot_cpu_data.x86 <= 0x18) {
shift = AMD64_L3_THREAD_SHIFT + 2 * (core % 4) + thread;
thread_mask = BIT_ULL(shift);

return AMD64_L3_SLICE_MASK | thread_mask;
}

core = (core << AMD64_L3_COREID_SHIFT) & AMD64_L3_COREID_MASK;
shift = AMD64_L3_THREAD_SHIFT + thread;
thread_mask = BIT_ULL(shift);

return AMD64_L3_EN_ALL_SLICES | core | thread_mask;
}

static int amd_uncore_event_init(struct perf_event *event)
{
struct amd_uncore *uncore;
Expand All @@ -203,18 +228,11 @@ static int amd_uncore_event_init(struct perf_event *event)
return -EINVAL;

/*
* SliceMask and ThreadMask need to be set for certain L3 events in
* Family 17h. For other events, the two fields do not affect the count.
* SliceMask and ThreadMask need to be set for certain L3 events.
* For other events, the two fields do not affect the count.
*/
if (l3_mask && is_llc_event(event)) {
int thread = 2 * (cpu_data(event->cpu).cpu_core_id % 4);

if (smp_num_siblings > 1)
thread += cpu_data(event->cpu).apicid & 1;

hwc->config |= (1ULL << (AMD64_L3_THREAD_SHIFT + thread) &
AMD64_L3_THREAD_MASK) | AMD64_L3_SLICE_MASK;
}
if (l3_mask && is_llc_event(event))
hwc->config |= l3_thread_slice_mask(event->cpu);

uncore = event_to_amd_uncore(event);
if (!uncore)
Expand Down Expand Up @@ -520,9 +538,9 @@ static int __init amd_uncore_init(void)
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
return -ENODEV;

if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) {
if (boot_cpu_data.x86 >= 0x17) {
/*
* For F17h or F18h, the Northbridge counters are
* For F17h and above, the Northbridge counters are
* repurposed as Data Fabric counters. Also, L3
* counters are supported too. The PMUs are exported
* based on family as either L2 or L3 and NB or DF.
Expand Down
25 changes: 22 additions & 3 deletions arch/x86/events/intel/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1945,6 +1945,14 @@ static __initconst const u64 knl_hw_cache_extra_regs
* intel_bts events don't coexist with intel PMU's BTS events because of
* x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them
* disabled around intel PMU's event batching etc, only inside the PMI handler.
*
* Avoid PEBS_ENABLE MSR access in PMIs.
* The GLOBAL_CTRL has been disabled. All the counters do not count anymore.
* It doesn't matter if the PEBS is enabled or not.
* Usually, the PEBS status are not changed in PMIs. It's unnecessary to
* access PEBS_ENABLE MSR in disable_all()/enable_all().
* However, there are some cases which may change PEBS status, e.g. PMI
* throttle. The PEBS_ENABLE should be updated where the status changes.
*/
static void __intel_pmu_disable_all(void)
{
Expand All @@ -1954,21 +1962,19 @@ static void __intel_pmu_disable_all(void)

if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
intel_pmu_disable_bts();

intel_pmu_pebs_disable_all();
}

static void intel_pmu_disable_all(void)
{
__intel_pmu_disable_all();
intel_pmu_pebs_disable_all();
intel_pmu_lbr_disable_all();
}

static void __intel_pmu_enable_all(int added, bool pmi)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

intel_pmu_pebs_enable_all();
intel_pmu_lbr_enable_all(pmi);
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
Expand All @@ -1986,6 +1992,7 @@ static void __intel_pmu_enable_all(int added, bool pmi)

static void intel_pmu_enable_all(int added)
{
intel_pmu_pebs_enable_all();
__intel_pmu_enable_all(added, false);
}

Expand Down Expand Up @@ -2374,9 +2381,21 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
* PEBS overflow sets bit 62 in the global status register
*/
if (__test_and_clear_bit(62, (unsigned long *)&status)) {
u64 pebs_enabled = cpuc->pebs_enabled;

handled++;
x86_pmu.drain_pebs(regs);
status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;

/*
* PMI throttle may be triggered, which stops the PEBS event.
* Although cpuc->pebs_enabled is updated accordingly, the
* MSR_IA32_PEBS_ENABLE is not updated. Because the
* cpuc->enabled has been forced to 0 in PMI.
* Update the MSR if pebs_enabled is changed.
*/
if (pebs_enabled != cpuc->pebs_enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
}

/*
Expand Down
83 changes: 40 additions & 43 deletions arch/x86/events/intel/cstate.c
Original file line number Diff line number Diff line change
Expand Up @@ -594,63 +594,60 @@ static const struct cstate_model glm_cstates __initconst = {
};


#define X86_CSTATES_MODEL(model, states) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }

static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM, nhm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhm_cstates),

X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE, nhm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhm_cstates),

X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates),
X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_cstates),
X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snb_cstates),

X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &snb_cstates),
X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &snb_cstates),

X86_CSTATES_MODEL(INTEL_FAM6_HASWELL, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_G, snb_cstates),
X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &snb_cstates),
X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &snb_cstates),
X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &snb_cstates),

X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_L, hswult_cstates),
X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hswult_cstates),

X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_D, slm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &slm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &slm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &slm_cstates),

X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_D, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_G, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates),
X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &snb_cstates),
X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &snb_cstates),
X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &snb_cstates),
X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &snb_cstates),

X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_L, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &snb_cstates),
X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &snb_cstates),
X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &snb_cstates),

X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_L, hswult_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE, hswult_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE_L, hswult_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE, hswult_cstates),
X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &hswult_cstates),
X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &hswult_cstates),
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &hswult_cstates),
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &hswult_cstates),

X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_L, cnl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &cnl_cstates),

X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_cstates),

X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_D, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT_D, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT, glm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &glm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &glm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &glm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &glm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &glm_cstates),

X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, icl_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE_L, icl_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE, icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
Expand Down
9 changes: 9 additions & 0 deletions arch/x86/events/intel/lbr.c
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
cpuc->lbr_stack.hw_idx = tos;
}

/*
Expand Down Expand Up @@ -680,6 +681,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
out++;
}
cpuc->lbr_stack.nr = out;
cpuc->lbr_stack.hw_idx = tos;
}

void intel_pmu_lbr_read(void)
Expand Down Expand Up @@ -1120,6 +1122,13 @@ void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr)
int i;

cpuc->lbr_stack.nr = x86_pmu.lbr_nr;

/* Cannot get TOS for large PEBS */
if (cpuc->n_pebs == cpuc->n_large_pebs)
cpuc->lbr_stack.hw_idx = -1ULL;
else
cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();

for (i = 0; i < x86_pmu.lbr_nr; i++) {
u64 info = lbr->lbr[i].info;
struct perf_branch_entry *e = &cpuc->lbr_entries[i];
Expand Down
Loading

0 comments on commit 9b82f05

Please sign in to comment.