Skip to content

Commit

Permalink
Merge tag 'perf-tools-fixes-for-v6.4-1-2023-05-20' of git://git.kerne…
Browse files Browse the repository at this point in the history
…l.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tools fixes from Arnaldo Carvalho de Melo:

 - Fail graciously if BUILD_BPF_SKEL=1 is specified and clang isn't
   available

 - Add empty 'struct rq' to 'perf lock contention' to satisfy libbpf
   'runqueue' type verification. This feature is built only with
   BUILD_BPF_SKEL=1

 - Make vmlinux.h use bpf.h and perf_event.h in source directory, not
   system ones that may be old and not have things like 'union
   perf_sample_weight'

 - Add system include paths to BPF builds to pick things missing in the
   headers included by clang -target bpf

 - Update various header copies with the kernel sources

 - Change divide by zero and not supported events behavior to show
   'nan'/'not counted' in 'perf stat' output.

   This happens when using things like 'perf stat -M TopdownL2 true',
   involving JSON metrics

 - Update no event/metric expectations affected by using JSON metrics in
   'perf stat -ddd' perf test

 - Avoid segv with 'perf stat --topdown' for metrics without a group

 - Do not assume which events may have a PMU name, allowing the logic to
   keep an AUX event group together. Makes this usecase work again:

     $ perf record --no-bpf-event -c 10 -e '{intel_pt//,tlb_flush.stlb_any/aux-sample-size=8192/pp}:u' -- sleep 0.1
     [ perf record: Woken up 1 times to write data ]
     [ perf record: Captured and wrote 0.078 MB perf.data ]
     $ perf script -F-dso,+addr | grep -C5 tlb_flush.stlb_any | head -11
     sleep 20444 [003]  7939.510243:  1  branches:uH:  7f5350cc82a2 dl_main+0x9a2 => 7f5350cb38f0 _dl_add_to_namespace_list+0x0
     sleep 20444 [003]  7939.510243:  1  branches:uH:  7f5350cb3908 _dl_add_to_namespace_list+0x18 => 7f5350cbb080 rtld_mutex_dummy+0x0
     sleep 20444 [003]  7939.510243:  1  branches:uH:  7f5350cc8350 dl_main+0xa50 => 0 [unknown]
     sleep 20444 [003]  7939.510244:  1  branches:uH:  7f5350cc83ca dl_main+0xaca => 7f5350caeb60 _dl_process_pt_gnu_property+0x0
     sleep 20444 [003]  7939.510245:  1  branches:uH:  7f5350caeb60 _dl_process_pt_gnu_property+0x0 => 0 [unknown]
     sleep 20444  7939.510245:       10 tlb_flush.stlb_any/aux-sample-size=8192/pp: 0 7f5350caeb60 _dl_process_pt_gnu_property+0x0
     sleep 20444 [003]  7939.510254:  1  branches:uH:  7f5350cc87fe dl_main+0xefe => 7f5350ccd240 strcmp+0x0
     sleep 20444 [003]  7939.510254:  1  branches:uH:  7f5350cc8862 dl_main+0xf62 => 0 [unknown]

 - Add a check for the above use case in 'perf test test_intel_pt'

 - Fix build with refcount checking on arm64, it was still accessing
   fields that need to be wrapped so that the refcounted struct gets
   checked

 - Fix contextid validation in ARM's CS-ETM, so that older kernels
   without that field can still be supported

 - Skip unsupported aggregation for stat events found in perf.data files
   in 'perf script'

 - Add stat test for record and script to check the previous problem

 - Remove needless debuginfod queries from 'perf test java symbol', this
   was just making the test take a long time to complete

 - Address python SafeConfigParser() deprecation warning in 'perf test
   attr'

 - Fix __NR_execve undeclared on i386 'perf bench syscall' build error

* tag 'perf-tools-fixes-for-v6.4-1-2023-05-20' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (33 commits)
  perf bench syscall: Fix __NR_execve undeclared build error
  perf test attr: Fix python SafeConfigParser() deprecation warning
  perf test attr: Update no event/metric expectations
  tools headers disabled-features: Sync with the kernel sources
  tools headers UAPI: Sync arch prctl headers with the kernel sources
  tools headers: Update the copy of x86's mem{cpy,set}_64.S used in 'perf bench'
  tools headers x86 cpufeatures: Sync with the kernel sources
  tools headers UAPI: Sync s390 syscall table file that wires up the memfd_secret syscall
  tools headers UAPI: Sync linux/prctl.h with the kernel sources
  perf metrics: Avoid segv with --topdown for metrics without a group
  perf lock contention: Add empty 'struct rq' to satisfy libbpf 'runqueue' type verification
  perf cs-etm: Fix contextid validation
  perf arm64: Fix build with refcount checking
  perf test: Add stat test for record and script
  perf script: Skip aggregation for stat events
  perf build: Add system include paths to BPF builds
  perf bpf skels: Make vmlinux.h use bpf.h and perf_event.h in source directory
  perf parse-events: Do not break up AUX event group
  perf test test_intel_pt.sh: Test sample mode with event with PMU name
  perf evsel: Modify group pmu name for software events
  ...
  • Loading branch information
Linus Torvalds committed May 21, 2023
2 parents 4927cb9 + 4e111f0 commit c47d122
Show file tree
Hide file tree
Showing 71 changed files with 865 additions and 322 deletions.
36 changes: 36 additions & 0 deletions tools/arch/arm64/include/uapi/asm/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,15 @@ struct kvm_arm_copy_mte_tags {
__u64 reserved[2];
};

/*
* Counter/Timer offset structure. Describe the virtual/physical offset.
* To be used with KVM_ARM_SET_COUNTER_OFFSET.
*/
struct kvm_arm_counter_offset {
__u64 counter_offset;
__u64 reserved;
};

#define KVM_ARM_TAGS_TO_GUEST 0
#define KVM_ARM_TAGS_FROM_GUEST 1

Expand Down Expand Up @@ -372,6 +381,10 @@ enum {
#endif
};

/* Device Control API on vm fd */
#define KVM_ARM_VM_SMCCC_CTRL 0
#define KVM_ARM_VM_SMCCC_FILTER 0

/* Device Control API: ARM VGIC */
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
Expand Down Expand Up @@ -411,6 +424,8 @@ enum {
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
#define KVM_ARM_VCPU_TIMER_IRQ_HVTIMER 2
#define KVM_ARM_VCPU_TIMER_IRQ_HPTIMER 3
#define KVM_ARM_VCPU_PVTIME_CTRL 2
#define KVM_ARM_VCPU_PVTIME_IPA 0

Expand Down Expand Up @@ -469,6 +484,27 @@ enum {
/* run->fail_entry.hardware_entry_failure_reason codes. */
#define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0)

enum kvm_smccc_filter_action {
KVM_SMCCC_FILTER_HANDLE = 0,
KVM_SMCCC_FILTER_DENY,
KVM_SMCCC_FILTER_FWD_TO_USER,

#ifdef __KERNEL__
NR_SMCCC_FILTER_ACTIONS
#endif
};

struct kvm_smccc_filter {
__u32 base;
__u32 nr_functions;
__u8 action;
__u8 pad[15];
};

/* arm64-specific KVM_EXIT_HYPERCALL flags */
#define KVM_HYPERCALL_EXIT_SMC (1U << 0)
#define KVM_HYPERCALL_EXIT_16BIT (1U << 1)

#endif

#endif /* __ARM_KVM_H__ */
26 changes: 21 additions & 5 deletions tools/arch/x86/include/asm/cpufeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
Expand Down Expand Up @@ -226,10 +226,9 @@

/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */
#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */
#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */

#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
Expand Down Expand Up @@ -307,14 +306,21 @@
#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */
#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */
#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */

/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */
#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */
#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */
#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */
#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */
#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */
#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */

/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
Expand All @@ -331,6 +337,7 @@
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */
#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */

Expand Down Expand Up @@ -363,6 +370,7 @@
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */
#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */
#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */

/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
Expand Down Expand Up @@ -427,6 +435,13 @@
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */

/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */
#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */
#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */
#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */

/*
* BUG word(s)
*/
Expand Down Expand Up @@ -467,5 +482,6 @@
#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */

#endif /* _ASM_X86_CPUFEATURES_H */
8 changes: 7 additions & 1 deletion tools/arch/x86/include/asm/disabled-features.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@
# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
#endif

#ifdef CONFIG_ADDRESS_MASKING
# define DISABLE_LAM 0
#else
# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31))
#endif

#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0
#else
Expand Down Expand Up @@ -115,7 +121,7 @@
#define DISABLED_MASK10 0
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
DISABLE_CALL_DEPTH_TRACKING)
#define DISABLED_MASK12 0
#define DISABLED_MASK12 (DISABLE_LAM)
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
Expand Down
2 changes: 2 additions & 0 deletions tools/arch/x86/include/asm/msr-index.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@

/* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */
#define MSR_INTEGRITY_CAPS 0x000002d9
#define MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT 2
#define MSR_INTEGRITY_CAPS_ARRAY_BIST BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT)
#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4
#define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT)

Expand Down
3 changes: 3 additions & 0 deletions tools/arch/x86/include/uapi/asm/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -559,4 +559,7 @@ struct kvm_pmu_event_filter {
#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */

/* x86-specific KVM_EXIT_HYPERCALL flags. */
#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0)

#endif /* _ASM_X86_KVM_H */
8 changes: 8 additions & 0 deletions tools/arch/x86/include/uapi/asm/prctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,16 @@
#define ARCH_GET_XCOMP_GUEST_PERM 0x1024
#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025

#define ARCH_XCOMP_TILECFG 17
#define ARCH_XCOMP_TILEDATA 18

#define ARCH_MAP_VDSO_X32 0x2001
#define ARCH_MAP_VDSO_32 0x2002
#define ARCH_MAP_VDSO_64 0x2003

#define ARCH_GET_UNTAG_MASK 0x4001
#define ARCH_ENABLE_TAGGED_ADDR 0x4002
#define ARCH_GET_MAX_TAG_BITS 0x4003
#define ARCH_FORCE_TAGGED_SVA 0x4004

#endif /* _ASM_X86_PRCTL_H */
3 changes: 3 additions & 0 deletions tools/arch/x86/include/uapi/asm/unistd_32.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
#ifndef __NR_fork
#define __NR_fork 2
#endif
#ifndef __NR_execve
#define __NR_execve 11
#endif
#ifndef __NR_getppid
#define __NR_getppid 64
#endif
Expand Down
34 changes: 10 additions & 24 deletions tools/arch/x86/lib/memcpy_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,6 @@

.section .noinstr.text, "ax"

/*
* We build a jump to memcpy_orig by default which gets NOPped out on
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
*/

/*
* memcpy - Copy a memory block.
*
Expand All @@ -26,17 +19,21 @@
*
* Output:
* rax original destination
*
* The FSRM alternative should be done inline (avoiding the call and
* the disgusting return handling), but that would require some help
* from the compiler for better calling conventions.
*
* The 'rep movsb' itself is small enough to replace the call, but the
* two register moves blow up the code. And one of them is "needed"
* only for the return value that is the same as the source input,
* which the compiler could/should do much better anyway.
*/
SYM_TYPED_FUNC_START(__memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM

movq %rdi, %rax
movq %rdx, %rcx
shrq $3, %rcx
andl $7, %edx
rep movsq
movl %edx, %ecx
rep movsb
RET
SYM_FUNC_END(__memcpy)
Expand All @@ -45,17 +42,6 @@ EXPORT_SYMBOL(__memcpy)
SYM_FUNC_ALIAS(memcpy, __memcpy)
EXPORT_SYMBOL(memcpy)

/*
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
*/
SYM_FUNC_START_LOCAL(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
RET
SYM_FUNC_END(memcpy_erms)

SYM_FUNC_START_LOCAL(memcpy_orig)
movq %rdi, %rax

Expand Down
47 changes: 11 additions & 36 deletions tools/arch/x86/lib/memset_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,22 @@
* rdx count (bytes)
*
* rax original destination
*
* The FSRS alternative should be done inline (avoiding the call and
* the disgusting return handling), but that would require some help
* from the compiler for better calling conventions.
*
* The 'rep stosb' itself is small enough to replace the call, but all
* the register moves blow up the code. And two of them are "needed"
* only for the return value that is the same as the source input,
* which the compiler could/should do much better anyway.
*/
SYM_FUNC_START(__memset)
/*
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
* to use it when possible. If not available, use fast string instructions.
*
* Otherwise, use original memset function.
*/
ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memset_erms", X86_FEATURE_ERMS
ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS

movq %rdi,%r9
movb %sil,%al
movq %rdx,%rcx
andl $7,%edx
shrq $3,%rcx
/* expand byte value */
movzbl %sil,%esi
movabs $0x0101010101010101,%rax
imulq %rsi,%rax
rep stosq
movl %edx,%ecx
rep stosb
movq %r9,%rax
RET
Expand All @@ -48,26 +43,6 @@ EXPORT_SYMBOL(__memset)
SYM_FUNC_ALIAS(memset, __memset)
EXPORT_SYMBOL(memset)

/*
* ISO C memset - set a memory block to a byte value. This function uses
* enhanced rep stosb to override the fast string function.
* The code is simpler and shorter than the fast string function as well.
*
* rdi destination
* rsi value (char)
* rdx count (bytes)
*
* rax original destination
*/
SYM_FUNC_START_LOCAL(memset_erms)
movq %rdi,%r9
movb %sil,%al
movq %rdx,%rcx
rep stosb
movq %r9,%rax
RET
SYM_FUNC_END(memset_erms)

SYM_FUNC_START_LOCAL(memset_orig)
movq %rdi,%r10

Expand Down
3 changes: 1 addition & 2 deletions tools/include/asm/alternative.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */

#define altinstruction_entry #
#define ALTERNATIVE_2 #
#define ALTERNATIVE #

#endif
Loading

0 comments on commit c47d122

Please sign in to comment.