Skip to content

Commit

Permalink
Merge tag 'perf-core-for-mingo-5.5-20191203' of git://git.kernel.org/…
Browse files Browse the repository at this point in the history
…pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

perf report/top:

  - Fix segfault due to missing initialization of recently introduced
    struct map_symbol 'maps' field in append_inlines(), when running
    with DWARF callchains.

perf stat:

  Andi Kleen:

  - Affinity based optimizations for sessions with many events in
    machines with large core counts, avoiding excessive number of IPIs.

libtraceevent:

  - Sudip Mukherjee:

  - Fix installation with O=.

  - Copy pkg-config file to output folder when using O=.

perf bench:

  Arnaldo Carvalho de Melo:

  - Update the copies of x86's mem{cpy,set}_64.S, and because that
    now uses new stuff in linux/linkage.h, update that header too, which
    made the minimal clang version to build perf to be 3.5, as
    3.4 as found in some of the container images used to test build perf
    can't grok STT_FUNC as a token in .type lines.

ABI headers:

  Arnaldo Carvalho de Melo:

  - Sync x86's msr-index.h copy with the kernel sources, resulting
    in new MSRs to be usable in filter expressions in 'perf trace',
    such as IA32_TSX_CTRL.

  - Sync linux/fscrypt.h, linux/stat.h, sched.h and the kvm headers.

perf trace:

  Arnaldo Carvalho de Melo:

  - Add CLEAR_SIGHAND support for clone's flags arg

perf kvm:

  Arnaldo Carvalho de Melo:

  - Clarify the 'perf kvm' -i and -o command line options

perf test:

  Ian Rogers:

  - Move test functionality in to a 'perf test' entry.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Ingo Molnar committed Dec 4, 2019
2 parents 596cf45 + 15b3904 commit 9f58c93
Show file tree
Hide file tree
Showing 42 changed files with 789 additions and 229 deletions.
3 changes: 2 additions & 1 deletion tools/arch/arm/include/uapi/asm/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,9 @@ struct kvm_vcpu_events {
struct {
__u8 serror_pending;
__u8 serror_has_esr;
__u8 ext_dabt_pending;
/* Align it to 8 bytes */
__u8 pad[6];
__u8 pad[5];
__u64 serror_esr;
} exception;
__u32 reserved[12];
Expand Down
5 changes: 4 additions & 1 deletion tools/arch/arm64/include/uapi/asm/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,9 @@ struct kvm_vcpu_events {
struct {
__u8 serror_pending;
__u8 serror_has_esr;
__u8 ext_dabt_pending;
/* Align it to 8 bytes */
__u8 pad[6];
__u8 pad[5];
__u64 serror_esr;
} exception;
__u32 reserved[12];
Expand Down Expand Up @@ -323,6 +324,8 @@ struct kvm_vcpu_events {
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
#define KVM_ARM_VCPU_PVTIME_CTRL 2
#define KVM_ARM_VCPU_PVTIME_IPA 0

/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_VCPU2_SHIFT 28
Expand Down
3 changes: 3 additions & 0 deletions tools/arch/powerpc/include/uapi/asm/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,8 @@ struct kvm_ppc_cpu_char {

/* PPC64 eXternal Interrupt Controller Specification */
#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
#define KVM_DEV_XICS_GRP_CTRL 2
#define KVM_DEV_XICS_NR_SERVERS 1

/* Layout of 64-bit source attribute values */
#define KVM_XICS_DESTINATION_SHIFT 0
Expand All @@ -683,6 +685,7 @@ struct kvm_ppc_cpu_char {
#define KVM_DEV_XIVE_GRP_CTRL 1
#define KVM_DEV_XIVE_RESET 1
#define KVM_DEV_XIVE_EQ_SYNC 2
#define KVM_DEV_XIVE_NR_SERVERS 3
#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */
#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */
#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */
Expand Down
3 changes: 3 additions & 0 deletions tools/arch/x86/include/asm/cpufeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */
#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */
#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
Expand Down Expand Up @@ -399,5 +400,7 @@
#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */

#endif /* _ASM_X86_CPUFEATURES_H */
18 changes: 18 additions & 0 deletions tools/arch/x86/include/asm/msr-index.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,18 @@
* Microarchitectural Data
* Sampling (MDS) vulnerabilities.
*/
#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /*
* The processor is not susceptible to a
* machine check error due to modifying the
* code page size along with either the
* physical address or cache type
* without TLB invalidation.
*/
#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */
#define ARCH_CAP_TAA_NO BIT(8) /*
* Not susceptible to
* TSX Async Abort (TAA) vulnerabilities.
*/

#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
Expand All @@ -103,6 +115,10 @@
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e

#define MSR_IA32_TSX_CTRL 0x00000122
#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */
#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */

#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
#define MSR_IA32_SYSENTER_EIP 0x00000176
Expand Down Expand Up @@ -393,6 +409,8 @@
#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
#define MSR_AMD_PPIN 0xc00102f1
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_BU_CFG2 0xc001102a
Expand Down
20 changes: 10 additions & 10 deletions tools/arch/x86/lib/memcpy_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
* Output:
* rax original destination
*/
ENTRY(__memcpy)
ENTRY(memcpy)
SYM_FUNC_START_ALIAS(__memcpy)
SYM_FUNC_START_LOCAL(memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS

Expand All @@ -41,23 +41,23 @@ ENTRY(memcpy)
movl %edx, %ecx
rep movsb
ret
ENDPROC(memcpy)
ENDPROC(__memcpy)
SYM_FUNC_END(memcpy)
SYM_FUNC_END_ALIAS(__memcpy)
EXPORT_SYMBOL(memcpy)
EXPORT_SYMBOL(__memcpy)

/*
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
*/
ENTRY(memcpy_erms)
SYM_FUNC_START(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
ret
ENDPROC(memcpy_erms)
SYM_FUNC_END(memcpy_erms)

ENTRY(memcpy_orig)
SYM_FUNC_START(memcpy_orig)
movq %rdi, %rax

cmpq $0x20, %rdx
Expand Down Expand Up @@ -182,7 +182,7 @@ ENTRY(memcpy_orig)

.Lend:
retq
ENDPROC(memcpy_orig)
SYM_FUNC_END(memcpy_orig)

#ifndef CONFIG_UML

Expand All @@ -193,7 +193,7 @@ MCSAFE_TEST_CTL
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
ENTRY(__memcpy_mcsafe)
SYM_FUNC_START(__memcpy_mcsafe)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
Expand Down Expand Up @@ -260,7 +260,7 @@ ENTRY(__memcpy_mcsafe)
xorl %eax, %eax
.L_done:
ret
ENDPROC(__memcpy_mcsafe)
SYM_FUNC_END(__memcpy_mcsafe)
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)

.section .fixup, "ax"
Expand Down
16 changes: 8 additions & 8 deletions tools/arch/x86/lib/memset_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
*
* rax original destination
*/
ENTRY(memset)
ENTRY(__memset)
SYM_FUNC_START_ALIAS(memset)
SYM_FUNC_START(__memset)
/*
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
* to use it when possible. If not available, use fast string instructions.
Expand All @@ -42,8 +42,8 @@ ENTRY(__memset)
rep stosb
movq %r9,%rax
ret
ENDPROC(memset)
ENDPROC(__memset)
SYM_FUNC_END(__memset)
SYM_FUNC_END_ALIAS(memset)

/*
* ISO C memset - set a memory block to a byte value. This function uses
Expand All @@ -56,16 +56,16 @@ ENDPROC(__memset)
*
* rax original destination
*/
ENTRY(memset_erms)
SYM_FUNC_START(memset_erms)
movq %rdi,%r9
movb %sil,%al
movq %rdx,%rcx
rep stosb
movq %r9,%rax
ret
ENDPROC(memset_erms)
SYM_FUNC_END(memset_erms)

ENTRY(memset_orig)
SYM_FUNC_START(memset_orig)
movq %rdi,%r10

/* expand byte value */
Expand Down Expand Up @@ -136,4 +136,4 @@ ENTRY(memset_orig)
subq %r8,%rdx
jmp .Lafter_bad_alignment
.Lfinal:
ENDPROC(memset_orig)
SYM_FUNC_END(memset_orig)
3 changes: 2 additions & 1 deletion tools/include/uapi/linux/fscrypt.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
#define FSCRYPT_POLICY_FLAGS_PAD_32 0x03
#define FSCRYPT_POLICY_FLAGS_PAD_MASK 0x03
#define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04
#define FSCRYPT_POLICY_FLAGS_VALID 0x07
#define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 0x08
#define FSCRYPT_POLICY_FLAGS_VALID 0x0F

/* Encryption algorithms */
#define FSCRYPT_MODE_AES_256_XTS 1
Expand Down
11 changes: 11 additions & 0 deletions tools/include/uapi/linux/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ struct kvm_hyperv_exit {
#define KVM_EXIT_S390_STSI 25
#define KVM_EXIT_IOAPIC_EOI 26
#define KVM_EXIT_HYPERV 27
#define KVM_EXIT_ARM_NISV 28

/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
Expand Down Expand Up @@ -394,6 +395,11 @@ struct kvm_run {
} eoi;
/* KVM_EXIT_HYPERV */
struct kvm_hyperv_exit hyperv;
/* KVM_EXIT_ARM_NISV */
struct {
__u64 esr_iss;
__u64 fault_ipa;
} arm_nisv;
/* Fix the size of the union. */
char padding[256];
};
Expand Down Expand Up @@ -1000,6 +1006,9 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PMU_EVENT_FILTER 173
#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175
#define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176
#define KVM_CAP_ARM_NISV_TO_USER 177
#define KVM_CAP_ARM_INJECT_EXT_DABT 178

#ifdef KVM_CAP_IRQ_ROUTING

Expand Down Expand Up @@ -1227,6 +1236,8 @@ enum kvm_device_type {
#define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS
KVM_DEV_TYPE_XIVE,
#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE
KVM_DEV_TYPE_ARM_PV_TIME,
#define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME
KVM_DEV_TYPE_MAX,
};

Expand Down
60 changes: 42 additions & 18 deletions tools/include/uapi/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,27 +33,48 @@
#define CLONE_NEWNET 0x40000000 /* New network namespace */
#define CLONE_IO 0x80000000 /* Clone io context */

/* Flags for the clone3() syscall. */
#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */

#ifndef __ASSEMBLY__
/**
* struct clone_args - arguments for the clone3 syscall
* @flags: Flags for the new process as listed above.
* All flags are valid except for CSIGNAL and
* CLONE_DETACHED.
* @pidfd: If CLONE_PIDFD is set, a pidfd will be
* returned in this argument.
* @child_tid: If CLONE_CHILD_SETTID is set, the TID of the
* child process will be returned in the child's
* memory.
* @parent_tid: If CLONE_PARENT_SETTID is set, the TID of
* the child process will be returned in the
* parent's memory.
* @exit_signal: The exit_signal the parent process will be
* sent when the child exits.
* @stack: Specify the location of the stack for the
* child process.
* @stack_size: The size of the stack for the child process.
* @tls: If CLONE_SETTLS is set, the tls descriptor
* is set to tls.
* @flags: Flags for the new process as listed above.
* All flags are valid except for CSIGNAL and
* CLONE_DETACHED.
* @pidfd: If CLONE_PIDFD is set, a pidfd will be
* returned in this argument.
* @child_tid: If CLONE_CHILD_SETTID is set, the TID of the
* child process will be returned in the child's
* memory.
* @parent_tid: If CLONE_PARENT_SETTID is set, the TID of
* the child process will be returned in the
* parent's memory.
* @exit_signal: The exit_signal the parent process will be
* sent when the child exits.
* @stack: Specify the location of the stack for the
* child process.
* Note, @stack is expected to point to the
* lowest address. The stack direction will be
* determined by the kernel and set up
* appropriately based on @stack_size.
* @stack_size: The size of the stack for the child process.
* @tls: If CLONE_SETTLS is set, the tls descriptor
* is set to tls.
* @set_tid: Pointer to an array of type *pid_t. The size
* of the array is defined using @set_tid_size.
* This array is used to select PIDs/TIDs for
* newly created processes. The first element in
* this defines the PID in the most nested PID
* namespace. Each additional element in the array
* defines the PID in the parent PID namespace of
* the original PID namespace. If the array has
* less entries than the number of currently
* nested PID namespaces only the PIDs in the
* corresponding namespaces are set.
* @set_tid_size: This defines the size of the array referenced
* in @set_tid. This cannot be larger than the
* kernel's limit of nested PID namespaces.
*
* The structure is versioned by size and thus extensible.
* New struct members must go at the end of the struct and
Expand All @@ -68,10 +89,13 @@ struct clone_args {
__aligned_u64 stack;
__aligned_u64 stack_size;
__aligned_u64 tls;
__aligned_u64 set_tid;
__aligned_u64 set_tid_size;
};
#endif

#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */

/*
* Scheduling policies
Expand Down
2 changes: 1 addition & 1 deletion tools/include/uapi/linux/stat.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ struct statx {
#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */
#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */
#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */

#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */


#endif /* _UAPI_LINUX_STAT_H */
6 changes: 4 additions & 2 deletions tools/lib/traceevent/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION)

LIB_TARGET = libtraceevent.a libtraceevent.so.$(EVENT_PARSE_VERSION)
LIB_INSTALL = libtraceevent.a libtraceevent.so*
LIB_INSTALL := $(addprefix $(OUTPUT),$(LIB_INSTALL))

INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES)

Expand Down Expand Up @@ -207,10 +208,11 @@ define do_install
$(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
endef

PKG_CONFIG_FILE = libtraceevent.pc
PKG_CONFIG_SOURCE_FILE = libtraceevent.pc
PKG_CONFIG_FILE := $(addprefix $(OUTPUT),$(PKG_CONFIG_SOURCE_FILE))
define do_install_pkgconfig_file
if [ -n "${pkgconfig_dir}" ]; then \
cp -f ${PKG_CONFIG_FILE}.template ${PKG_CONFIG_FILE}; \
cp -f ${PKG_CONFIG_SOURCE_FILE}.template ${PKG_CONFIG_FILE}; \
sed -i "s|INSTALL_PREFIX|${1}|g" ${PKG_CONFIG_FILE}; \
sed -i "s|LIB_VERSION|${EVENT_PARSE_VERSION}|g" ${PKG_CONFIG_FILE}; \
sed -i "s|LIB_DIR|${libdir}|g" ${PKG_CONFIG_FILE}; \
Expand Down
5 changes: 3 additions & 2 deletions tools/perf/Documentation/perf-kvm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,11 @@ OPTIONS
-------
-i::
--input=<path>::
Input file name.
Input file name, for the 'report', 'diff' and 'buildid-list' subcommands.
-o::
--output=<path>::
Output file name.
Output file name, for the 'record' subcommand. Doesn't work with 'report',
just redirect the output to a file when using 'report'.
--host::
Collect host side performance profile.
--guest::
Expand Down
Loading

0 comments on commit 9f58c93

Please sign in to comment.