Skip to content

Commit

Permalink
Merge tag 'perf-tools-fixes-for-v6.8-1-2024-02-01' of git://git.kerne…
Browse files Browse the repository at this point in the history
…l.org/pub/scm/linux/kernel/git/perf/perf-tools

Pull perf tools fixes from Arnaldo Carvalho de Melo:
 "Vendor events:

   - Intel Alderlake/Sapphire Rapids metric fixes, the CPU type
     ("cpu_atom", "cpu_core") needs to be used as a prefix to be
     considered on a metric formula, detected via one of the 'perf test'
     entries.

  'perf test' fixes:

   - Fix the creation of event selector lists on 'perf test' entries, by
     initializing the sample ID flag, which is done by 'perf record', so
     this fix affects only the tests, the common case isn't affected

   - Make 'perf list' respect debug settings (-v) to fix its 'perf test'
     entry

   - Fix 'perf script' test when python support isn't enabled

   - Special case 'perf script' tests on s390, where only DWARF call
     graphs are supported and only on software events

   - Make 'perf daemon' signal test less racy

  Compiler warnings/errors:

   - Remove needless malloc(0) call in 'perf top' that triggers
     -Walloc-size

   - Fix calloc() argument order to address error introduced in gcc-14

  Build:

   - Make minimal shellcheck version to v0.6.0, avoiding the build to
     fail with older versions

  Sync kernel header copies:

   - stat.h to pick STATX_MNT_ID_UNIQUE

   - msr-index.h to pick IA32_MKTME_KEYID_PARTITIONING

   - drm.h to pick DRM_IOCTL_MODE_CLOSEFB

   - unistd.h to pick {list,stat}mount,
     lsm_{[gs]et_self_attr,list_modules} syscall numbers

   - x86 cpufeatures to pick TDX, Zen, APIC MSR fence changes

   - x86's mem{cpy,set}_64.S used in 'perf bench'

   - Also, without tooling effects: asm-generic/unaligned.h, mount.h,
     fcntl.h, kvm headers"

* tag 'perf-tools-fixes-for-v6.8-1-2024-02-01' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (21 commits)
  perf tools headers: update the asm-generic/unaligned.h copy with the kernel sources
  tools include UAPI: Sync linux/mount.h copy with the kernel sources
  perf evlist: Fix evlist__new_default() for > 1 core PMU
  tools headers: Update the copy of x86's mem{cpy,set}_64.S used in 'perf bench'
  tools headers x86 cpufeatures: Sync with the kernel sources to pick TDX, Zen, APIC MSR fence changes
  tools headers UAPI: Sync unistd.h to pick {list,stat}mount, lsm_{[gs]et_self_attr,list_modules} syscall numbers
  perf vendor events intel: Alderlake/sapphirerapids metric fixes
  tools headers UAPI: Sync kvm headers with the kernel sources
  perf tools: Fix calloc() arguments to address error introduced in gcc-14
  perf top: Remove needless malloc(0) call that triggers -Walloc-size
  perf build: Make minimal shellcheck version to v0.6.0
  tools headers UAPI: Update tools's copy of drm.h headers to pick DRM_IOCTL_MODE_CLOSEFB
  perf test shell daemon: Make signal test less racy
  perf test shell script: Fix test for python being disabled
  perf test: Workaround debug output in list test
  perf list: Add output file option
  perf list: Switch error message to pr_err() to respect debug settings (-v)
  perf test: Fix 'perf script' tests on s390
  tools headers UAPI: Sync linux/fcntl.h with the kernel sources
  tools arch x86: Sync the msr-index.h copy with the kernel sources to pick IA32_MKTME_KEYID_PARTITIONING
  ...
  • Loading branch information
Linus Torvalds committed Feb 3, 2024
2 parents 56897d5 + fdd0ae7 commit b555d19
Show file tree
Hide file tree
Showing 31 changed files with 588 additions and 383 deletions.
8 changes: 7 additions & 1 deletion tools/arch/x86/include/asm/cpufeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
Expand Down Expand Up @@ -308,10 +309,14 @@
#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */

#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */
#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */
#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */
#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */
#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */

/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
Expand Down Expand Up @@ -495,6 +500,7 @@
#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */
#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */

/* BUG word 2 */
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
Expand Down
8 changes: 8 additions & 0 deletions tools/arch/x86/include/asm/msr-index.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,11 @@
#define LBR_INFO_CYCLES 0xffff
#define LBR_INFO_BR_TYPE_OFFSET 56
#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET)
#define LBR_INFO_BR_CNTR_OFFSET 32
#define LBR_INFO_BR_CNTR_NUM 4
#define LBR_INFO_BR_CNTR_BITS 2
#define LBR_INFO_BR_CNTR_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0)
#define LBR_INFO_BR_CNTR_FULL_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0)

#define MSR_ARCH_LBR_CTL 0x000014ce
#define ARCH_LBR_CTL_LBREN BIT(0)
Expand Down Expand Up @@ -536,6 +541,9 @@
#define MSR_RELOAD_PMC0 0x000014c1
#define MSR_RELOAD_FIXED_CTR0 0x00001309

/* KeyID partitioning between MKTME and TDX */
#define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087

/*
* AMD64 MSRs. Not complete. See the architecture manual for a more
* complete list.
Expand Down
3 changes: 3 additions & 0 deletions tools/arch/x86/include/uapi/asm/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -562,4 +562,7 @@ struct kvm_pmu_event_filter {
/* x86-specific KVM_EXIT_HYPERCALL flags. */
#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0)

#define KVM_X86_DEFAULT_VM 0
#define KVM_X86_SW_PROTECTED_VM 1

#endif /* _ASM_X86_KVM_H */
4 changes: 2 additions & 2 deletions tools/arch/x86/lib/memcpy_64.S
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright 2002 Andi Kleen */

#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
#include <asm/export.h>

.section .noinstr.text, "ax"

Expand Down Expand Up @@ -39,7 +39,7 @@ SYM_TYPED_FUNC_START(__memcpy)
SYM_FUNC_END(__memcpy)
EXPORT_SYMBOL(__memcpy)

SYM_FUNC_ALIAS(memcpy, __memcpy)
SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
EXPORT_SYMBOL(memcpy)

SYM_FUNC_START_LOCAL(memcpy_orig)
Expand Down
4 changes: 2 additions & 2 deletions tools/arch/x86/lib/memset_64.S
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright 2002 Andi Kleen, SuSE Labs */

#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
#include <asm/export.h>

.section .noinstr.text, "ax"

Expand Down Expand Up @@ -40,7 +40,7 @@ SYM_FUNC_START(__memset)
SYM_FUNC_END(__memset)
EXPORT_SYMBOL(__memset)

SYM_FUNC_ALIAS(memset, __memset)
SYM_FUNC_ALIAS_MEMFUNC(memset, __memset)
EXPORT_SYMBOL(memset)

SYM_FUNC_START_LOCAL(memset_orig)
Expand Down
24 changes: 12 additions & 12 deletions tools/include/asm-generic/unaligned.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ static inline u32 get_unaligned_le24(const void *p)

static inline void __put_unaligned_be24(const u32 val, u8 *p)
{
*p++ = val >> 16;
*p++ = val >> 8;
*p++ = val;
*p++ = (val >> 16) & 0xff;
*p++ = (val >> 8) & 0xff;
*p++ = val & 0xff;
}

static inline void put_unaligned_be24(const u32 val, void *p)
Expand All @@ -117,9 +117,9 @@ static inline void put_unaligned_be24(const u32 val, void *p)

static inline void __put_unaligned_le24(const u32 val, u8 *p)
{
*p++ = val;
*p++ = val >> 8;
*p++ = val >> 16;
*p++ = val & 0xff;
*p++ = (val >> 8) & 0xff;
*p++ = (val >> 16) & 0xff;
}

static inline void put_unaligned_le24(const u32 val, void *p)
Expand All @@ -129,12 +129,12 @@ static inline void put_unaligned_le24(const u32 val, void *p)

static inline void __put_unaligned_be48(const u64 val, u8 *p)
{
*p++ = val >> 40;
*p++ = val >> 32;
*p++ = val >> 24;
*p++ = val >> 16;
*p++ = val >> 8;
*p++ = val;
*p++ = (val >> 40) & 0xff;
*p++ = (val >> 32) & 0xff;
*p++ = (val >> 24) & 0xff;
*p++ = (val >> 16) & 0xff;
*p++ = (val >> 8) & 0xff;
*p++ = val & 0xff;
}

static inline void put_unaligned_be48(const u64 val, void *p)
Expand Down
15 changes: 14 additions & 1 deletion tools/include/uapi/asm-generic/unistd.h
Original file line number Diff line number Diff line change
Expand Up @@ -829,8 +829,21 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait)
#define __NR_futex_requeue 456
__SYSCALL(__NR_futex_requeue, sys_futex_requeue)

#define __NR_statmount 457
__SYSCALL(__NR_statmount, sys_statmount)

#define __NR_listmount 458
__SYSCALL(__NR_listmount, sys_listmount)

#define __NR_lsm_get_self_attr 459
__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr)
#define __NR_lsm_set_self_attr 460
__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr)
#define __NR_lsm_list_modules 461
__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)

#undef __NR_syscalls
#define __NR_syscalls 457
#define __NR_syscalls 462

/*
* 32 bit systems traditionally used different
Expand Down
72 changes: 71 additions & 1 deletion tools/include/uapi/drm/drm.h
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,8 @@ struct drm_gem_open {
/**
* DRM_CAP_ASYNC_PAGE_FLIP
*
* If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC.
* If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy
* page-flips.
*/
#define DRM_CAP_ASYNC_PAGE_FLIP 0x7
/**
Expand Down Expand Up @@ -773,6 +774,13 @@ struct drm_gem_open {
* :ref:`drm_sync_objects`.
*/
#define DRM_CAP_SYNCOBJ_TIMELINE 0x14
/**
* DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP
*
* If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic
* commits.
*/
#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15

/* DRM_IOCTL_GET_CAP ioctl argument type */
struct drm_get_cap {
Expand Down Expand Up @@ -842,6 +850,31 @@ struct drm_get_cap {
*/
#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5

/**
* DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT
*
* Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and
* virtualbox) have additional restrictions for cursor planes (thus
* making cursor planes on those drivers not truly universal,) e.g.
* they need cursor planes to act like one would expect from a mouse
* cursor and have correctly set hotspot properties.
* If this client cap is not set the DRM core will hide cursor plane on
* those virtualized drivers because not setting it implies that the
* client is not capable of dealing with those extra restictions.
* Clients which do set cursor hotspot and treat the cursor plane
* like a mouse cursor should set this property.
* The client must enable &DRM_CLIENT_CAP_ATOMIC first.
*
* Setting this property on drivers which do not special case
* cursor planes (i.e. non-virtualized drivers) will return
* EOPNOTSUPP, which can be used by userspace to gauge
* requirements of the hardware/drivers they're running on.
*
* This capability is always supported for atomic-capable virtualized
* drivers starting from kernel version 6.6.
*/
#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6

/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
struct drm_set_client_cap {
__u64 capability;
Expand Down Expand Up @@ -893,6 +926,7 @@ struct drm_syncobj_transfer {
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */
struct drm_syncobj_wait {
__u64 handles;
/* absolute timeout */
Expand All @@ -901,6 +935,14 @@ struct drm_syncobj_wait {
__u32 flags;
__u32 first_signaled; /* only valid when not waiting all */
__u32 pad;
/**
* @deadline_nsec - fence deadline hint
*
* Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing
* fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is
* set.
*/
__u64 deadline_nsec;
};

struct drm_syncobj_timeline_wait {
Expand All @@ -913,6 +955,14 @@ struct drm_syncobj_timeline_wait {
__u32 flags;
__u32 first_signaled; /* only valid when not waiting all */
__u32 pad;
/**
* @deadline_nsec - fence deadline hint
*
* Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing
* fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is
* set.
*/
__u64 deadline_nsec;
};

/**
Expand Down Expand Up @@ -1218,6 +1268,26 @@ extern "C" {

#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd)

/**
* DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer.
*
* This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL
* argument is a framebuffer object ID.
*
* This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable
* planes and CRTCs. As long as the framebuffer is used by a plane, it's kept
* alive. When the plane no longer uses the framebuffer (because the
* framebuffer is replaced with another one, or the plane is disabled), the
* framebuffer is cleaned up.
*
* This is useful to implement flicker-free transitions between two processes.
*
* Depending on the threat model, user-space may want to ensure that the
* framebuffer doesn't expose any sensitive user information: closed
* framebuffers attached to a plane can be read back by the next DRM master.
*/
#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb)

/*
* Device specific ioctls should only be in their respective headers
* The device specific ioctl range is from 0x40 to 0x9f.
Expand Down
12 changes: 6 additions & 6 deletions tools/include/uapi/drm/i915_drm.h
Original file line number Diff line number Diff line change
Expand Up @@ -693,7 +693,7 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_HAS_EXEC_FENCE 44

/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
* user specified bufffers for post-mortem debugging of GPU hangs. See
* user-specified buffers for post-mortem debugging of GPU hangs. See
* EXEC_OBJECT_CAPTURE.
*/
#define I915_PARAM_HAS_EXEC_CAPTURE 45
Expand Down Expand Up @@ -1606,7 +1606,7 @@ struct drm_i915_gem_busy {
* is accurate.
*
* The returned dword is split into two fields to indicate both
* the engine classess on which the object is being read, and the
* the engine classes on which the object is being read, and the
* engine class on which it is currently being written (if any).
*
* The low word (bits 0:15) indicate if the object is being written
Expand Down Expand Up @@ -1815,7 +1815,7 @@ struct drm_i915_gem_madvise {
__u32 handle;

/* Advice: either the buffer will be needed again in the near future,
* or wont be and could be discarded under memory pressure.
* or won't be and could be discarded under memory pressure.
*/
__u32 madv;

Expand Down Expand Up @@ -3246,7 +3246,7 @@ struct drm_i915_query_topology_info {
* // enough to hold our array of engines. The kernel will fill out the
* // item.length for us, which is the number of bytes we need.
* //
* // Alternatively a large buffer can be allocated straight away enabling
* // Alternatively a large buffer can be allocated straightaway enabling
* // querying in one pass, in which case item.length should contain the
* // length of the provided buffer.
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
Expand All @@ -3256,7 +3256,7 @@ struct drm_i915_query_topology_info {
* // Now that we allocated the required number of bytes, we call the ioctl
* // again, this time with the data_ptr pointing to our newly allocated
* // blob, which the kernel can then populate with info on all engines.
* item.data_ptr = (uintptr_t)&info,
* item.data_ptr = (uintptr_t)&info;
*
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
* if (err) ...
Expand Down Expand Up @@ -3286,7 +3286,7 @@ struct drm_i915_query_topology_info {
/**
* struct drm_i915_engine_info
*
* Describes one engine and it's capabilities as known to the driver.
* Describes one engine and its capabilities as known to the driver.
*/
struct drm_i915_engine_info {
/** @engine: Engine class and instance. */
Expand Down
3 changes: 3 additions & 0 deletions tools/include/uapi/linux/fcntl.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,5 +116,8 @@
#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to
compare object identity and may not
be usable to open_by_handle_at(2) */
#if defined(__KERNEL__)
#define AT_GETATTR_NOSEC 0x80000000
#endif

#endif /* _UAPI_LINUX_FCNTL_H */
Loading

0 comments on commit b555d19

Please sign in to comment.