Skip to content

Commit

Permalink
Merge tag 'perf-core-2025-01-20' of git://git.kernel.org/pub/scm/linu…
Browse files Browse the repository at this point in the history
…x/kernel/git/tip/tip

Pull performance events updates from Ingo Molnar:
 "Seqlock optimizations that arose in a perf context and were merged
  into the perf tree:

   - seqlock: Add raw_seqcount_try_begin (Suren Baghdasaryan)
   - mm: Convert mm_lock_seq to a proper seqcount (Suren Baghdasaryan)
   - mm: Introduce mmap_lock_speculate_{try_begin|retry} (Suren
     Baghdasaryan)
   - mm/gup: Use raw_seqcount_try_begin() (Peter Zijlstra)

  Core perf enhancements:

   - Reduce 'struct page' footprint of perf by mapping pages in advance
     (Lorenzo Stoakes)
   - Save raw sample data conditionally based on sample type (Yabin Cui)
   - Reduce sampling overhead by checking sample_type in
     perf_sample_save_callchain() and perf_sample_save_brstack() (Yabin
     Cui)
   - Export perf_exclude_event() (Namhyung Kim)

  Uprobes scalability enhancements: (Andrii Nakryiko)

   - Simplify find_active_uprobe_rcu() VMA checks
   - Add speculative lockless VMA-to-inode-to-uprobe resolution
   - Simplify session consumer tracking
   - Decouple return_instance list traversal and freeing
   - Ensure return_instance is detached from the list before freeing
   - Reuse return_instances between multiple uretprobes within task
   - Guard against kmemdup() failing in dup_return_instance()

  AMD core PMU driver enhancements:

   - Relax privilege filter restriction on AMD IBS (Namhyung Kim)

  AMD RAPL energy counters support: (Dhananjay Ugwekar)

   - Introduce topology_logical_core_id() (K Prateek Nayak)
   - Remove the unused get_rapl_pmu_cpumask() function
   - Remove the cpu_to_rapl_pmu() function
   - Rename rapl_pmu variables
   - Make rapl_model struct global
   - Add arguments to the init and cleanup functions
   - Modify the generic variable names to *_pkg*
   - Remove the global variable rapl_msrs
   - Move the cntr_mask to rapl_pmus struct
   - Add core energy counter support for AMD CPUs

  Intel core PMU driver enhancements:

   - Support RDPMC 'metrics clear mode' feature (Kan Liang)
   - Clarify adaptive PEBS processing (Kan Liang)
   - Factor out functions for PEBS records processing (Kan Liang)
   - Simplify the PEBS records processing for adaptive PEBS (Kan Liang)

  Intel uncore driver enhancements: (Kan Liang)

   - Convert buggy pmu->func_id use to pmu->registered
   - Support more units on Granite Rapids"

* tag 'perf-core-2025-01-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits)
  perf: map pages in advance
  perf/x86/intel/uncore: Support more units on Granite Rapids
  perf/x86/intel/uncore: Clean up func_id
  perf/x86/intel: Support RDPMC metrics clear mode
  uprobes: Guard against kmemdup() failing in dup_return_instance()
  perf/x86: Relax privilege filter restriction on AMD IBS
  perf/core: Export perf_exclude_event()
  uprobes: Reuse return_instances between multiple uretprobes within task
  uprobes: Ensure return_instance is detached from the list before freeing
  uprobes: Decouple return_instance list traversal and freeing
  uprobes: Simplify session consumer tracking
  uprobes: add speculative lockless VMA-to-inode-to-uprobe resolution
  uprobes: simplify find_active_uprobe_rcu() VMA checks
  mm: introduce mmap_lock_speculate_{try_begin|retry}
  mm: convert mm_lock_seq to a proper seqcount
  mm/gup: Use raw_seqcount_try_begin()
  seqlock: add raw_seqcount_try_begin
  perf/x86/rapl: Add core energy counter support for AMD CPUs
  perf/x86/rapl: Move the cntr_mask to rapl_pmus struct
  perf/x86/rapl: Remove the global variable rapl_msrs
  ...
  • Loading branch information
Linus Torvalds committed Jan 21, 2025
2 parents a6640c8 + b709eb8 commit 6c4aa89
Show file tree
Hide file tree
Showing 36 changed files with 938 additions and 479 deletions.
4 changes: 4 additions & 0 deletions Documentation/arch/x86/topology.rst
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ Thread-related topology information in the kernel:
The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
"core_id."

- topology_logical_core_id();

The logical core ID to which a thread belongs.



System topology examples
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kernel/perf_cpum_cf.c
Original file line number Diff line number Diff line change
Expand Up @@ -981,7 +981,7 @@ static int cfdiag_push_sample(struct perf_event *event,
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = cpuhw->usedss;
raw.frag.data = cpuhw->stop;
perf_sample_save_raw_data(&data, &raw);
perf_sample_save_raw_data(&data, event, &raw);
}

overflow = perf_event_overflow(event, &data, &regs);
Expand Down
6 changes: 3 additions & 3 deletions arch/s390/kernel/perf_cpum_sf.c
Original file line number Diff line number Diff line change
Expand Up @@ -981,7 +981,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
cpuhw->flags &= ~PMU_F_ENABLED;
}

/* perf_exclude_event() - Filter event
/* perf_event_exclude() - Filter event
* @event: The perf event
* @regs: pt_regs structure
* @sde_regs: Sample-data-entry (sde) regs structure
Expand All @@ -990,7 +990,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
*
* Return non-zero if the event shall be excluded.
*/
static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
static int perf_event_exclude(struct perf_event *event, struct pt_regs *regs,
struct perf_sf_sde_regs *sde_regs)
{
if (event->attr.exclude_user && user_mode(regs))
Expand Down Expand Up @@ -1073,7 +1073,7 @@ static int perf_push_sample(struct perf_event *event,
data.tid_entry.pid = basic->hpp & LPP_PID_MASK;

overflow = 0;
if (perf_exclude_event(event, &regs, sde_regs))
if (perf_event_exclude(event, &regs, sde_regs))
goto out;
if (perf_event_overflow(event, &data, &regs)) {
overflow = 1;
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kernel/perf_pai_crypto.c
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump,
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = rawsize;
raw.frag.data = cpump->save;
perf_sample_save_raw_data(&data, &raw);
perf_sample_save_raw_data(&data, event, &raw);
}

overflow = perf_event_overflow(event, &data, &regs);
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kernel/perf_pai_ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump,
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = rawsize;
raw.frag.data = cpump->save;
perf_sample_save_raw_data(&data, &raw);
perf_sample_save_raw_data(&data, event, &raw);
}

overflow = perf_event_overflow(event, &data, &regs);
Expand Down
3 changes: 1 addition & 2 deletions arch/x86/events/amd/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1001,8 +1001,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!x86_perf_event_set_period(event))
continue;

if (has_branch_stack(event))
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);

if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
Expand Down
64 changes: 43 additions & 21 deletions arch/x86/events/amd/ibs.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ static u32 ibs_caps;
#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT

/* attr.config2 */
#define IBS_SW_FILTER_MASK 1

/*
* IBS states:
Expand Down Expand Up @@ -290,6 +292,16 @@ static int perf_ibs_init(struct perf_event *event)
if (has_branch_stack(event))
return -EOPNOTSUPP;

/* handle exclude_{user,kernel} in the IRQ handler */
if (event->attr.exclude_host || event->attr.exclude_guest ||
event->attr.exclude_idle)
return -EINVAL;

if (!(event->attr.config2 & IBS_SW_FILTER_MASK) &&
(event->attr.exclude_kernel || event->attr.exclude_user ||
event->attr.exclude_hv))
return -EINVAL;

ret = validate_group(event);
if (ret)
return ret;
Expand Down Expand Up @@ -550,24 +562,14 @@ static struct attribute *attrs_empty[] = {
NULL,
};

static struct attribute_group empty_format_group = {
.name = "format",
.attrs = attrs_empty,
};

static struct attribute_group empty_caps_group = {
.name = "caps",
.attrs = attrs_empty,
};

static const struct attribute_group *empty_attr_groups[] = {
&empty_format_group,
&empty_caps_group,
NULL,
};

PMU_FORMAT_ATTR(rand_en, "config:57");
PMU_FORMAT_ATTR(cnt_ctl, "config:19");
PMU_FORMAT_ATTR(swfilt, "config2:0");
PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59");
PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16");
PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1");
Expand All @@ -578,8 +580,9 @@ zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int
return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0;
}

static struct attribute *rand_en_attrs[] = {
static struct attribute *fetch_attrs[] = {
&format_attr_rand_en.attr,
&format_attr_swfilt.attr,
NULL,
};

Expand All @@ -593,9 +596,9 @@ static struct attribute *zen4_ibs_extensions_attrs[] = {
NULL,
};

static struct attribute_group group_rand_en = {
static struct attribute_group group_fetch_formats = {
.name = "format",
.attrs = rand_en_attrs,
.attrs = fetch_attrs,
};

static struct attribute_group group_fetch_l3missonly = {
Expand All @@ -611,7 +614,7 @@ static struct attribute_group group_zen4_ibs_extensions = {
};

static const struct attribute_group *fetch_attr_groups[] = {
&group_rand_en,
&group_fetch_formats,
&empty_caps_group,
NULL,
};
Expand All @@ -628,6 +631,11 @@ cnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i)
return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0;
}

static struct attribute *op_attrs[] = {
&format_attr_swfilt.attr,
NULL,
};

static struct attribute *cnt_ctl_attrs[] = {
&format_attr_cnt_ctl.attr,
NULL,
Expand All @@ -638,6 +646,11 @@ static struct attribute *op_l3missonly_attrs[] = {
NULL,
};

static struct attribute_group group_op_formats = {
.name = "format",
.attrs = op_attrs,
};

static struct attribute_group group_cnt_ctl = {
.name = "format",
.attrs = cnt_ctl_attrs,
Expand All @@ -650,6 +663,12 @@ static struct attribute_group group_op_l3missonly = {
.is_visible = zen4_ibs_extensions_is_visible,
};

static const struct attribute_group *op_attr_groups[] = {
&group_op_formats,
&empty_caps_group,
NULL,
};

static const struct attribute_group *op_attr_update[] = {
&group_cnt_ctl,
&group_op_l3missonly,
Expand All @@ -667,7 +686,6 @@ static struct perf_ibs perf_ibs_fetch = {
.start = perf_ibs_start,
.stop = perf_ibs_stop,
.read = perf_ibs_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
},
.msr = MSR_AMD64_IBSFETCHCTL,
.config_mask = IBS_FETCH_CONFIG_MASK,
Expand All @@ -691,7 +709,6 @@ static struct perf_ibs perf_ibs_op = {
.start = perf_ibs_start,
.stop = perf_ibs_stop,
.read = perf_ibs_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
},
.msr = MSR_AMD64_IBSOPCTL,
.config_mask = IBS_OP_CONFIG_MASK,
Expand Down Expand Up @@ -1111,14 +1128,20 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
regs.flags |= PERF_EFLAGS_EXACT;
}

if ((event->attr.config2 & IBS_SW_FILTER_MASK) &&
perf_exclude_event(event, &regs)) {
throttle = perf_event_account_interrupt(event);
goto out;
}

if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw = (struct perf_raw_record){
.frag = {
.size = sizeof(u32) + ibs_data.size,
.data = ibs_data.data,
},
};
perf_sample_save_raw_data(&data, &raw);
perf_sample_save_raw_data(&data, event, &raw);
}

if (perf_ibs == &perf_ibs_op)
Expand All @@ -1129,8 +1152,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
* recorded as part of interrupt regs. Thus we need to use rip from
* interrupt regs while unwinding call stack.
*/
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
perf_sample_save_callchain(&data, event, iregs);
perf_sample_save_callchain(&data, event, iregs);

throttle = perf_event_overflow(event, &data, &regs);
out:
Expand Down Expand Up @@ -1228,7 +1250,7 @@ static __init int perf_ibs_op_init(void)
if (ibs_caps & IBS_CAPS_ZEN4)
perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY;

perf_ibs_op.pmu.attr_groups = empty_attr_groups;
perf_ibs_op.pmu.attr_groups = op_attr_groups;
perf_ibs_op.pmu.attr_update = op_attr_update;

return perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
Expand Down
3 changes: 1 addition & 2 deletions arch/x86/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1707,8 +1707,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)

perf_sample_data_init(&data, 0, event->hw.last_period);

if (has_branch_stack(event))
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);

if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
Expand Down
20 changes: 19 additions & 1 deletion arch/x86/events/intel/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2826,6 +2826,9 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
return;

idx = INTEL_PMC_IDX_FIXED_SLOTS;

if (event->attr.config1 & INTEL_TD_CFG_METRIC_CLEAR)
bits |= INTEL_FIXED_3_METRICS_CLEAR;
}

intel_set_masks(event, idx);
Expand Down Expand Up @@ -4081,7 +4084,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
* is used in a metrics group, it too cannot support sampling.
*/
if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(event)) {
if (event->attr.config1 || event->attr.config2)
/* The metrics_clear can only be set for the slots event */
if (event->attr.config1 &&
(!is_slots_event(event) || (event->attr.config1 & ~INTEL_TD_CFG_METRIC_CLEAR)))
return -EINVAL;

if (event->attr.config2)
return -EINVAL;

/*
Expand Down Expand Up @@ -4690,6 +4698,8 @@ PMU_FORMAT_ATTR(in_tx, "config:32" );
PMU_FORMAT_ATTR(in_tx_cp, "config:33" );
PMU_FORMAT_ATTR(eq, "config:36" ); /* v6 + */

PMU_FORMAT_ATTR(metrics_clear, "config1:0"); /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */

static ssize_t umask2_show(struct device *dev,
struct device_attribute *attr,
char *page)
Expand All @@ -4709,6 +4719,7 @@ static struct device_attribute format_attr_umask2 =
static struct attribute *format_evtsel_ext_attrs[] = {
&format_attr_umask2.attr,
&format_attr_eq.attr,
&format_attr_metrics_clear.attr,
NULL
};

Expand All @@ -4733,6 +4744,13 @@ evtsel_ext_is_visible(struct kobject *kobj, struct attribute *attr, int i)
if (i == 1)
return (mask & ARCH_PERFMON_EVENTSEL_EQ) ? attr->mode : 0;

/* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */
if (i == 2) {
union perf_capabilities intel_cap = hybrid(dev_get_drvdata(dev), intel_cap);

return intel_cap.rdpmc_metrics_clear ? attr->mode : 0;
}

return 0;
}

Expand Down
Loading

0 comments on commit 6c4aa89

Please sign in to comment.