Skip to content

Commit

Permalink
perf/intel: Remove Perfmon-v4 counter_freezing support
Browse files Browse the repository at this point in the history
Perfmon-v4 counter freezing is fundamentally broken; remove this default
disabled code to make sure nobody uses it.

The feature is called Freeze-on-PMI in the SDM, and if it would do that,
there wouldn't actually be a problem, *however* it does something subtly
different. It globally disables the whole PMU when it raises the PMI,
not when the PMI hits.

This means there's a window between the PMI getting raised and the PMI
actually getting served where we loose events and this violates the
perf counter independence. That is, a counting event should not result
in a different event count when there is a sampling event co-scheduled.

This is known to break existing software (RR).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
  • Loading branch information
Peter Zijlstra committed Jan 27, 2021
1 parent abd562d commit 3daa96d
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 160 deletions.
6 changes: 0 additions & 6 deletions Documentation/admin-guide/kernel-parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -944,12 +944,6 @@
causing system reset or hang due to sending
INIT from AP to BSP.

perf_v4_pmi= [X86,INTEL]
Format: <bool>
Disable Intel PMU counter freezing feature.
The feature only exists starting from
Arch Perfmon v4 (Skylake and newer).

disable_ddw [PPC/PSERIES]
Disable Dynamic DMA Window support. Use this
to workaround buggy firmware.
Expand Down
152 changes: 0 additions & 152 deletions arch/x86/events/intel/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2134,18 +2134,6 @@ static void intel_tfa_pmu_enable_all(int added)
intel_pmu_enable_all(added);
}

static void enable_counter_freeze(void)
{
update_debugctlmsr(get_debugctlmsr() |
DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
}

static void disable_counter_freeze(void)
{
update_debugctlmsr(get_debugctlmsr() &
~DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
}

static inline u64 intel_pmu_get_status(void)
{
u64 status;
Expand Down Expand Up @@ -2709,95 +2697,6 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
return handled;
}

static bool disable_counter_freezing = true;
static int __init intel_perf_counter_freezing_setup(char *s)
{
bool res;

if (kstrtobool(s, &res))
return -EINVAL;

disable_counter_freezing = !res;
return 1;
}
__setup("perf_v4_pmi=", intel_perf_counter_freezing_setup);

/*
* Simplified handler for Arch Perfmon v4:
* - We rely on counter freezing/unfreezing to enable/disable the PMU.
* This is done automatically on PMU ack.
* - Ack the PMU only after the APIC.
*/

static int intel_pmu_handle_irq_v4(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int handled = 0;
bool bts = false;
u64 status;
int pmu_enabled = cpuc->enabled;
int loops = 0;

/* PMU has been disabled because of counter freezing */
cpuc->enabled = 0;
if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
bts = true;
intel_bts_disable_local();
handled = intel_pmu_drain_bts_buffer();
handled += intel_bts_interrupt();
}
status = intel_pmu_get_status();
if (!status)
goto done;
again:
intel_pmu_lbr_read();
if (++loops > 100) {
static bool warned;

if (!warned) {
WARN(1, "perfevents: irq loop stuck!\n");
perf_event_print_debug();
warned = true;
}
intel_pmu_reset();
goto done;
}


handled += handle_pmi_common(regs, status);
done:
/* Ack the PMI in the APIC */
apic_write(APIC_LVTPC, APIC_DM_NMI);

/*
* The counters start counting immediately while ack the status.
* Make it as close as possible to IRET. This avoids bogus
* freezing on Skylake CPUs.
*/
if (status) {
intel_pmu_ack_status(status);
} else {
/*
* CPU may issues two PMIs very close to each other.
* When the PMI handler services the first one, the
* GLOBAL_STATUS is already updated to reflect both.
* When it IRETs, the second PMI is immediately
* handled and it sees clear status. At the meantime,
* there may be a third PMI, because the freezing bit
* isn't set since the ack in first PMI handlers.
* Double check if there is more work to be done.
*/
status = intel_pmu_get_status();
if (status)
goto again;
}

if (bts)
intel_bts_enable_local();
cpuc->enabled = pmu_enabled;
return handled;
}

/*
* This handler is triggered by the local APIC, so the APIC IRQ handling
* rules apply:
Expand Down Expand Up @@ -4074,9 +3973,6 @@ static void intel_pmu_cpu_starting(int cpu)
if (x86_pmu.version > 1)
flip_smm_bit(&x86_pmu.attr_freeze_on_smi);

if (x86_pmu.counter_freezing)
enable_counter_freeze();

/* Disable perf metrics if any added CPU doesn't support it. */
if (x86_pmu.intel_cap.perf_metrics) {
union perf_capabilities perf_cap;
Expand Down Expand Up @@ -4147,9 +4043,6 @@ static void free_excl_cntrs(struct cpu_hw_events *cpuc)
static void intel_pmu_cpu_dying(int cpu)
{
fini_debug_store_on_cpu(cpu);

if (x86_pmu.counter_freezing)
disable_counter_freeze();
}

void intel_cpuc_finish(struct cpu_hw_events *cpuc)
Expand Down Expand Up @@ -4541,39 +4434,6 @@ static __init void intel_nehalem_quirk(void)
}
}

static const struct x86_cpu_desc counter_freezing_ucodes[] = {
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 2, 0x0000000e),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 9, 0x0000002e),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 10, 0x00000008),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_D, 1, 0x00000028),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 1, 0x00000028),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 8, 0x00000006),
{}
};

static bool intel_counter_freezing_broken(void)
{
return !x86_cpu_has_min_microcode_rev(counter_freezing_ucodes);
}

static __init void intel_counter_freezing_quirk(void)
{
/* Check if it's already disabled */
if (disable_counter_freezing)
return;

/*
* If the system starts with the wrong ucode, leave the
* counter-freezing feature permanently disabled.
*/
if (intel_counter_freezing_broken()) {
pr_info("PMU counter freezing disabled due to CPU errata,"
"please upgrade microcode\n");
x86_pmu.counter_freezing = false;
x86_pmu.handle_irq = intel_pmu_handle_irq;
}
}

/*
* enable software workaround for errata:
* SNB: BJ122
Expand Down Expand Up @@ -4959,9 +4819,6 @@ __init int intel_pmu_init(void)
max((int)edx.split.num_counters_fixed, assume);
}

if (version >= 4)
x86_pmu.counter_freezing = !disable_counter_freezing;

if (boot_cpu_has(X86_FEATURE_PDCM)) {
u64 capabilities;

Expand Down Expand Up @@ -5089,7 +4946,6 @@ __init int intel_pmu_init(void)

case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_ATOM_GOLDMONT_D:
x86_add_quirk(intel_counter_freezing_quirk);
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
Expand All @@ -5116,7 +4972,6 @@ __init int intel_pmu_init(void)
break;

case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
x86_add_quirk(intel_counter_freezing_quirk);
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
Expand Down Expand Up @@ -5581,13 +5436,6 @@ __init int intel_pmu_init(void)
pr_cont("full-width counters, ");
}

/*
* For arch perfmon 4 use counter freezing to avoid
* several MSR accesses in the PMI.
*/
if (x86_pmu.counter_freezing)
x86_pmu.handle_irq = intel_pmu_handle_irq_v4;

if (x86_pmu.intel_cap.perf_metrics)
x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;

Expand Down
3 changes: 1 addition & 2 deletions arch/x86/events/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -682,8 +682,7 @@ struct x86_pmu {

/* PMI handler bits */
unsigned int late_ack :1,
enabled_ack :1,
counter_freezing :1;
enabled_ack :1;
/*
* sysfs attrs
*/
Expand Down

0 comments on commit 3daa96d

Please sign in to comment.