Skip to content

Commit

Permalink
Merge tag 'perf-urgent-2021-08-08' of git://git.kernel.org/pub/scm/li…
Browse files Browse the repository at this point in the history
…nux/kernel/git/tip/tip

Pull perf fixes from Thomas Gleixner:
 "A set of perf fixes:

   - Correct the permission checks for perf event which send SIGTRAP to
     a different process and clean up that code to be more readable.

   - Prevent an out of bound MSR access in the x86 perf code which
     happened due to an incomplete limiting to the actually available
     hardware counters.

   - Prevent access to the AMD64_EVENTSEL_HOSTONLY bit when running
     inside a guest.

   - Handle small core counter re-enabling correctly by issuing an ACK
     right before reenabling it to prevent a stale PEBS record being
     kept around"

* tag 'perf-urgent-2021-08-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/intel: Apply mid ACK for small core
  perf/x86/amd: Don't touch the AMD64_EVENTSEL_HOSTONLY bit inside the guest
  perf/x86: Fix out of bound MSR access
  perf: Refactor permissions check into perf_check_permission()
  perf: Fix required permissions if sigtrap is requested
  • Loading branch information
Linus Torvalds committed Aug 8, 2021
2 parents 6674586 + acade63 commit 74eedeb
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 17 deletions.
12 changes: 7 additions & 5 deletions arch/x86/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2489,13 +2489,15 @@ void perf_clear_dirty_counters(void)
return;

for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
/* Metrics and fake events don't have corresponding HW counters. */
if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR))
continue;
else if (i >= INTEL_PMC_IDX_FIXED)
if (i >= INTEL_PMC_IDX_FIXED) {
/* Metrics and fake events don't have corresponding HW counters. */
if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed))
continue;

wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
else
} else {
wrmsrl(x86_pmu_event_addr(i), 0);
}
}

bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX);
Expand Down
23 changes: 15 additions & 8 deletions arch/x86/events/intel/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2904,24 +2904,28 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
*/
static int intel_pmu_handle_irq(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
bool late_ack = hybrid_bit(cpuc->pmu, late_ack);
bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack);
int loops;
u64 status;
int handled;
int pmu_enabled;

cpuc = this_cpu_ptr(&cpu_hw_events);

/*
* Save the PMU state.
* It needs to be restored when leaving the handler.
*/
pmu_enabled = cpuc->enabled;
/*
* No known reason to not always do late ACK,
* but just in case do it opt-in.
* In general, the early ACK is only applied for old platforms.
* For the big core starts from Haswell, the late ACK should be
* applied.
* For the small core after Tremont, we have to do the ACK right
* before re-enabling counters, which is in the middle of the
* NMI handler.
*/
if (!x86_pmu.late_ack)
if (!late_ack && !mid_ack)
apic_write(APIC_LVTPC, APIC_DM_NMI);
intel_bts_disable_local();
cpuc->enabled = 0;
Expand Down Expand Up @@ -2958,6 +2962,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
goto again;

done:
if (mid_ack)
apic_write(APIC_LVTPC, APIC_DM_NMI);
/* Only restore PMU state when it's active. See x86_pmu_disable(). */
cpuc->enabled = pmu_enabled;
if (pmu_enabled)
Expand All @@ -2969,7 +2975,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
* have been reset. This avoids spurious NMIs on
* Haswell CPUs.
*/
if (x86_pmu.late_ack)
if (late_ack)
apic_write(APIC_LVTPC, APIC_DM_NMI);
return handled;
}
Expand Down Expand Up @@ -6129,7 +6135,6 @@ __init int intel_pmu_init(void)
static_branch_enable(&perf_is_hybrid);
x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS;

x86_pmu.late_ack = true;
x86_pmu.pebs_aliases = NULL;
x86_pmu.pebs_prec_dist = true;
x86_pmu.pebs_block = true;
Expand Down Expand Up @@ -6167,6 +6172,7 @@ __init int intel_pmu_init(void)
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
pmu->name = "cpu_core";
pmu->cpu_type = hybrid_big;
pmu->late_ack = true;
if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
pmu->num_counters = x86_pmu.num_counters + 2;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
Expand All @@ -6192,6 +6198,7 @@ __init int intel_pmu_init(void)
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
pmu->name = "cpu_atom";
pmu->cpu_type = hybrid_small;
pmu->mid_ack = true;
pmu->num_counters = x86_pmu.num_counters;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
pmu->max_pebs_events = x86_pmu.max_pebs_events;
Expand Down
18 changes: 17 additions & 1 deletion arch/x86/events/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,10 @@ struct x86_hybrid_pmu {
struct event_constraint *event_constraints;
struct event_constraint *pebs_constraints;
struct extra_reg *extra_regs;

unsigned int late_ack :1,
mid_ack :1,
enabled_ack :1;
};

static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu)
Expand Down Expand Up @@ -686,6 +690,16 @@ extern struct static_key_false perf_is_hybrid;
__Fp; \
}))

#define hybrid_bit(_pmu, _field) \
({ \
bool __Fp = x86_pmu._field; \
\
if (is_hybrid() && (_pmu)) \
__Fp = hybrid_pmu(_pmu)->_field; \
\
__Fp; \
})

enum hybrid_pmu_type {
hybrid_big = 0x40,
hybrid_small = 0x20,
Expand Down Expand Up @@ -755,6 +769,7 @@ struct x86_pmu {

/* PMI handler bits */
unsigned int late_ack :1,
mid_ack :1,
enabled_ack :1;
/*
* sysfs attrs
Expand Down Expand Up @@ -1115,9 +1130,10 @@ void x86_pmu_stop(struct perf_event *event, int flags);

static inline void x86_pmu_disable_event(struct perf_event *event)
{
u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
struct hw_perf_event *hwc = &event->hw;

wrmsrl(hwc->config_base, hwc->config);
wrmsrl(hwc->config_base, hwc->config & ~disable_mask);

if (is_counter_pair(hwc))
wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0);
Expand Down
35 changes: 32 additions & 3 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -11917,6 +11917,37 @@ __perf_event_ctx_lock_double(struct perf_event *group_leader,
return gctx;
}

static bool
perf_check_permission(struct perf_event_attr *attr, struct task_struct *task)
{
unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS;
bool is_capable = perfmon_capable();

if (attr->sigtrap) {
/*
* perf_event_attr::sigtrap sends signals to the other task.
* Require the current task to also have CAP_KILL.
*/
rcu_read_lock();
is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL);
rcu_read_unlock();

/*
* If the required capabilities aren't available, checks for
* ptrace permissions: upgrade to ATTACH, since sending signals
* can effectively change the target task.
*/
ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS;
}

/*
* Preserve ptrace permission check for backwards compatibility. The
* ptrace check also includes checks that the current task and other
* task have matching uids, and is therefore not done here explicitly.
*/
return is_capable || ptrace_may_access(task, ptrace_mode);
}

/**
* sys_perf_event_open - open a performance event, associate it to a task/cpu
*
Expand Down Expand Up @@ -12163,15 +12194,13 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_file;

/*
* Preserve ptrace permission check for backwards compatibility.
*
* We must hold exec_update_lock across this and any potential
* perf_install_in_context() call for this new event to
* serialize against exec() altering our credentials (and the
* perf_event_exit_task() that could imply).
*/
err = -EACCES;
if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
if (!perf_check_permission(&attr, task))
goto err_cred;
}

Expand Down

0 comments on commit 74eedeb

Please sign in to comment.