Skip to content

Commit

Permalink
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/…
Browse files Browse the repository at this point in the history
…linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
 "I'd like to apologize for this very late pull request: I was dithering
  through the week whether to send the fixes, and then yesterday Jiri's
  crash fix for a regression introduced in this cycle clearly marked
  perf/urgent as 'must merge now'.

  Most of the commits are tooling fixes, plus there's three kernel fixes
  via four commits:

    - race fix in the Intel PEBS code

    - fix an AUX bug and roll back a previous attempt

    - fix AMD family 17h generic HW cache-event perf counters

  The largest diffstat contribution comes from the AMD fix - a new event
  table is introduced, which is a fairly low risk change but has a large
  linecount"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/intel: Fix race in intel_pmu_disable_event()
  perf/x86/intel/pt: Remove software double buffering PMU capability
  perf/ring_buffer: Fix AUX software double buffering
  perf tools: Remove needless asm/unistd.h include fixing build in some places
  tools arch uapi: Copy missing unistd.h headers for arc, hexagon and riscv
  tools build: Add -ldl to the disassembler-four-args feature test
  perf cs-etm: Always allocate memory for cs_etm_queue::prev_packet
  perf cs-etm: Don't check cs_etm_queue::prev_packet validity
  perf report: Report OOM in status line in the GTK UI
  perf bench numa: Add define for RUSAGE_THREAD if not present
  tools lib traceevent: Change tag string for error
  perf annotate: Fix build on 32 bit for BPF annotation
  tools uapi x86: Sync vmx.h with the kernel
  perf bpf: Return value with unlocking in perf_env__find_btf()
  MAINTAINERS: Include vendor specific files under arch/*/events/*
  perf/x86/amd: Update generic hardware cache events for Family 17h
  • Loading branch information
Linus Torvalds committed May 5, 2019
2 parents 70c9fb5 + 6f55967 commit 7178fb0
Show file tree
Hide file tree
Showing 18 changed files with 272 additions and 32 deletions.
1 change: 1 addition & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -12176,6 +12176,7 @@ F: arch/*/kernel/*/*/perf_event*.c
F: arch/*/include/asm/perf_event.h
F: arch/*/kernel/perf_callchain.c
F: arch/*/events/*
F: arch/*/events/*/*
F: tools/perf/

PERSONALITY HANDLING
Expand Down
111 changes: 108 additions & 3 deletions arch/x86/events/amd/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,110 @@ static __initconst const u64 amd_hw_cache_event_ids
},
};

static __initconst const u64 amd_hw_cache_event_ids_f17h
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */
[C(RESULT_MISS)] = 0xc860, /* L2$ access from DC Miss */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */
[C(RESULT_MISS)] = 0,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches */
[C(RESULT_MISS)] = 0x0081, /* Instruction cache misses */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
},
[C(DTLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */
[C(RESULT_MISS)] = 0xf045, /* L2 DTLB misses (PT walks) */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */
[C(RESULT_MISS)] = 0xff85, /* L1 ITLB misses, L2 misses */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr. */
[C(RESULT_MISS)] = 0x00c3, /* Retired Mispredicted BI */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
},
[C(NODE)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
},
};

/*
* AMD Performance Monitor K7 and later, up to and including Family 16h:
*/
Expand Down Expand Up @@ -865,9 +969,10 @@ __init int amd_pmu_init(void)
x86_pmu.amd_nb_constraints = 0;
}

/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
if (boot_cpu_data.x86 >= 0x17)
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids));
else
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids));

return 0;
}
Expand Down
10 changes: 7 additions & 3 deletions arch/x86/events/intel/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2091,15 +2091,19 @@ static void intel_pmu_disable_event(struct perf_event *event)
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
cpuc->intel_cp_status &= ~(1ull << hwc->idx);

if (unlikely(event->attr.precise_ip))
intel_pmu_pebs_disable(event);

if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc);
return;
}

x86_pmu_disable_event(event);

/*
* Needs to be called after x86_pmu_disable_event,
* so we don't trigger the event without PEBS bit set.
*/
if (unlikely(event->attr.precise_ip))
intel_pmu_pebs_disable(event);
}

static void intel_pmu_del_event(struct perf_event *event)
Expand Down
3 changes: 1 addition & 2 deletions arch/x86/events/intel/pt.c
Original file line number Diff line number Diff line change
Expand Up @@ -1525,8 +1525,7 @@ static __init int pt_init(void)
}

if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
pt_pmu.pmu.capabilities =
PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;

pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
pt_pmu.pmu.attr_groups = pt_attr_groups;
Expand Down
1 change: 0 additions & 1 deletion include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,6 @@ struct perf_event;
#define PERF_PMU_CAP_NO_INTERRUPT 0x01
#define PERF_PMU_CAP_NO_NMI 0x02
#define PERF_PMU_CAP_AUX_NO_SG 0x04
#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08
#define PERF_PMU_CAP_EXCLUSIVE 0x10
#define PERF_PMU_CAP_ITRACE 0x20
#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40
Expand Down
3 changes: 1 addition & 2 deletions kernel/events/ring_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -610,8 +610,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
* PMU requests more than one contiguous chunks of memory
* for SW double buffering
*/
if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_SW_DOUBLEBUF) &&
!overwrite) {
if (!overwrite) {
if (!max_order)
return -EINVAL;

Expand Down
51 changes: 51 additions & 0 deletions tools/arch/arc/include/uapi/asm/unistd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/

/******** no-legacy-syscalls-ABI *******/

/*
* Non-typical guard macro to enable inclusion twice in ARCH sys.c
* That is how the Generic syscall wrapper generator works
*/
#if !defined(_UAPI_ASM_ARC_UNISTD_H) || defined(__SYSCALL)
#define _UAPI_ASM_ARC_UNISTD_H

#define __ARCH_WANT_RENAMEAT
#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SET_GET_RLIMIT
#define __ARCH_WANT_SYS_EXECVE
#define __ARCH_WANT_SYS_CLONE
#define __ARCH_WANT_SYS_VFORK
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_TIME32_SYSCALLS

#define sys_mmap2 sys_mmap_pgoff

#include <asm-generic/unistd.h>

#define NR_syscalls __NR_syscalls

/* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */
#define __NR_sysfs (__NR_arch_specific_syscall + 3)

/* ARC specific syscall */
#define __NR_cacheflush (__NR_arch_specific_syscall + 0)
#define __NR_arc_settls (__NR_arch_specific_syscall + 1)
#define __NR_arc_gettls (__NR_arch_specific_syscall + 2)
#define __NR_arc_usr_cmpxchg (__NR_arch_specific_syscall + 4)

__SYSCALL(__NR_cacheflush, sys_cacheflush)
__SYSCALL(__NR_arc_settls, sys_arc_settls)
__SYSCALL(__NR_arc_gettls, sys_arc_gettls)
__SYSCALL(__NR_arc_usr_cmpxchg, sys_arc_usr_cmpxchg)
__SYSCALL(__NR_sysfs, sys_sysfs)

#undef __SYSCALL

#endif
40 changes: 40 additions & 0 deletions tools/arch/hexagon/include/uapi/asm/unistd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Syscall support for Hexagon
*
* Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/

/*
* The kernel pulls this unistd.h in three different ways:
* 1. the "normal" way which gets all the __NR defines
* 2. with __SYSCALL defined to produce function declarations
* 3. with __SYSCALL defined to produce syscall table initialization
* See also: syscalltab.c
*/

#define sys_mmap2 sys_mmap_pgoff
#define __ARCH_WANT_RENAMEAT
#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SET_GET_RLIMIT
#define __ARCH_WANT_SYS_EXECVE
#define __ARCH_WANT_SYS_CLONE
#define __ARCH_WANT_SYS_VFORK
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_TIME32_SYSCALLS

#include <asm-generic/unistd.h>
42 changes: 42 additions & 0 deletions tools/arch/riscv/include/uapi/asm/unistd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright (C) 2018 David Abdurachmanov <david.abdurachmanov@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#ifdef __LP64__
#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_SET_GET_RLIMIT
#endif /* __LP64__ */

#include <asm-generic/unistd.h>

/*
* Allows the instruction cache to be flushed from userspace. Despite RISC-V
* having a direct 'fence.i' instruction available to userspace (which we
* can't trap!), that's not actually viable when running on Linux because the
* kernel might schedule a process on another hart. There is no way for
* userspace to handle this without invoking the kernel (as it doesn't know the
* thread->hart mappings), so we've defined a RISC-V specific system call to
* flush the instruction cache.
*
* __NR_riscv_flush_icache is defined to flush the instruction cache over an
* address range, with the flush applying to either all threads or just the
* caller. We don't currently do anything with the address range, that's just
* in there for forwards compatibility.
*/
#ifndef __NR_riscv_flush_icache
#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
#endif
__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
1 change: 1 addition & 0 deletions tools/arch/x86/include/uapi/asm/vmx.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@

#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
#define VMX_ABORT_VMCS_CORRUPTED 3
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4

#endif /* _UAPIVMX_H */
2 changes: 1 addition & 1 deletion tools/lib/traceevent/parse-utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
void __vwarning(const char *fmt, va_list ap)
{
if (errno)
perror("trace-cmd");
perror("libtraceevent");
errno = 0;

fprintf(stderr, " ");
Expand Down
2 changes: 1 addition & 1 deletion tools/perf/Makefile.config
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)

FEATURE_CHECK_LDFLAGS-libaio = -lrt

FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes
FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl

CFLAGS += -fno-omit-frame-pointer
CFLAGS += -ggdb3
Expand Down
4 changes: 4 additions & 0 deletions tools/perf/bench/numa.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@
#include <numa.h>
#include <numaif.h>

#ifndef RUSAGE_THREAD
# define RUSAGE_THREAD 1
#endif

/*
* Regular printout to the terminal, supressed if -q is specified:
*/
Expand Down
8 changes: 4 additions & 4 deletions tools/perf/util/annotate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1714,8 +1714,8 @@ static int symbol__disassemble_bpf(struct symbol *sym,
if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO)
return -1;

pr_debug("%s: handling sym %s addr %lx len %lx\n", __func__,
sym->name, sym->start, sym->end - sym->start);
pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__,
sym->name, sym->start, sym->end - sym->start);

memset(tpath, 0, sizeof(tpath));
perf_exe(tpath, sizeof(tpath));
Expand All @@ -1740,7 +1740,7 @@ static int symbol__disassemble_bpf(struct symbol *sym,
info_linear = info_node->info_linear;
sub_id = dso->bpf_prog.sub_id;

info.buffer = (void *)(info_linear->info.jited_prog_insns);
info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns);
info.buffer_length = info_linear->info.jited_prog_len;

if (info_linear->info.nr_line_info)
Expand Down Expand Up @@ -1776,7 +1776,7 @@ static int symbol__disassemble_bpf(struct symbol *sym,
const char *srcline;
u64 addr;

addr = pc + ((u64 *)(info_linear->info.jited_ksyms))[sub_id];
addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id];
count = disassemble(pc, &info);

if (prog_linfo)
Expand Down
1 change: 0 additions & 1 deletion tools/perf/util/cloexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include "asm/bug.h"
#include "debug.h"
#include <unistd.h>
#include <asm/unistd.h>
#include <sys/syscall.h>

static unsigned long flag = PERF_FLAG_FD_CLOEXEC;
Expand Down
Loading

0 comments on commit 7178fb0

Please sign in to comment.