Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Alexei Starovoitov says:

====================
pull-request: bpf-next 2021-09-17

We've added 63 non-merge commits during the last 12 day(s) which contain
a total of 65 files changed, 2653 insertions(+), 751 deletions(-).

The main changes are:

1) Streamline internal BPF program sections handling and
   bpf_program__set_attach_target() in libbpf, from Andrii.

2) Add support for new btf kind BTF_KIND_TAG, from Yonghong.

3) Introduce bpf_get_branch_snapshot() to capture LBR, from Song.

4) IMUL optimization for x86-64 JIT, from Jie.

5) xsk selftest improvements, from Magnus.

6) Introduce legacy kprobe events support in libbpf, from Rafael.

7) Access hw timestamp through BPF's __sk_buff, from Vadim.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (63 commits)
  selftests/bpf: Fix a few compiler warnings
  libbpf: Constify all high-level program attach APIs
  libbpf: Schedule open_opts.attach_prog_fd deprecation since v0.7
  selftests/bpf: Switch fexit_bpf2bpf selftest to set_attach_target() API
  libbpf: Allow skipping attach_func_name in bpf_program__set_attach_target()
  libbpf: Deprecated bpf_object_open_opts.relaxed_core_relocs
  selftests/bpf: Stop using relaxed_core_relocs which has no effect
  libbpf: Use pre-setup sec_def in libbpf_find_attach_btf_id()
  bpf: Update bpf_get_smp_processor_id() documentation
  libbpf: Add sphinx code documentation comments
  selftests/bpf: Skip btf_tag test if btf_tag attribute not supported
  docs/bpf: Add documentation for BTF_KIND_TAG
  selftests/bpf: Add a test with a bpf program with btf_tag attributes
  selftests/bpf: Test BTF_KIND_TAG for deduplication
  selftests/bpf: Add BTF_KIND_TAG unit tests
  selftests/bpf: Change NAME_NTH/IS_NAME_NTH for BTF_KIND_TAG format
  selftests/bpf: Test libbpf API function btf__add_tag()
  bpftool: Add support for BTF_KIND_TAG
  libbpf: Add support for BTF_KIND_TAG
  libbpf: Rename btf_{hash,equal}_int to btf_{hash,equal}_int_tag
  ...
====================

Link: https://lore.kernel.org/r/20210917173738.3397064-1-ast@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Sep 17, 2021
2 parents f68d08c + ca21a3e commit af54faa
Show file tree
Hide file tree
Showing 65 changed files with 2,653 additions and 751 deletions.
29 changes: 28 additions & 1 deletion Documentation/bpf/btf.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ sequentially and type id is assigned to each recognized type starting from id
#define BTF_KIND_VAR 14 /* Variable */
#define BTF_KIND_DATASEC 15 /* Section */
#define BTF_KIND_FLOAT 16 /* Floating point */
#define BTF_KIND_TAG 17 /* Tag */

Note that the type section encodes debug info, not just pure types.
``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
Expand All @@ -106,7 +107,7 @@ Each type contains the following common data::
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
* FUNC and FUNC_PROTO.
* FUNC, FUNC_PROTO and TAG.
* "type" is a type_id referring to another type.
*/
union {
Expand Down Expand Up @@ -465,6 +466,32 @@ map definition.

No additional type data follow ``btf_type``.

2.2.17 BTF_KIND_TAG
~~~~~~~~~~~~~~~~~~~

``struct btf_type`` encoding requirement:
* ``name_off``: offset to a non-empty string
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_TAG
* ``info.vlen``: 0
* ``type``: ``struct``, ``union``, ``func`` or ``var``

``btf_type`` is followed by ``struct btf_tag``.::

struct btf_tag {
__u32 component_idx;
};

The ``name_off`` encodes btf_tag attribute string.
The ``type`` should be ``struct``, ``union``, ``func`` or ``var``.
For ``var`` type, ``btf_tag.component_idx`` must be ``-1``.
For the other three types, if the btf_tag attribute is
applied to the ``struct``, ``union`` or ``func`` itself,
``btf_tag.component_idx`` must be ``-1``. Otherwise,
the attribute is applied to a ``struct``/``union`` member or
a ``func`` argument, and ``btf_tag.component_idx`` should be a
valid index (starting from 0) pointing to a member or an argument.

3. BTF Kernel API
*****************

Expand Down
67 changes: 61 additions & 6 deletions arch/x86/events/intel/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2143,19 +2143,19 @@ static __initconst const u64 knl_hw_cache_extra_regs
* However, there are some cases which may change PEBS status, e.g. PMI
* throttle. The PEBS_ENABLE should be updated where the status changes.
*/
static void __intel_pmu_disable_all(void)
static __always_inline void __intel_pmu_disable_all(bool bts)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);

if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
if (bts && test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
intel_pmu_disable_bts();
}

static void intel_pmu_disable_all(void)
static __always_inline void intel_pmu_disable_all(void)
{
__intel_pmu_disable_all();
__intel_pmu_disable_all(true);
intel_pmu_pebs_disable_all();
intel_pmu_lbr_disable_all();
}
Expand Down Expand Up @@ -2186,6 +2186,49 @@ static void intel_pmu_enable_all(int added)
__intel_pmu_enable_all(added, false);
}

static noinline int
__intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries,
unsigned int cnt, unsigned long flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

intel_pmu_lbr_read();
cnt = min_t(unsigned int, cnt, x86_pmu.lbr_nr);

memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt);
intel_pmu_enable_all(0);
local_irq_restore(flags);
return cnt;
}

static int
intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
{
unsigned long flags;

/* must not have branches... */
local_irq_save(flags);
__intel_pmu_disable_all(false); /* we don't care about BTS */
__intel_pmu_pebs_disable_all();
__intel_pmu_lbr_disable();
/* ... until here */
return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
}

static int
intel_pmu_snapshot_arch_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
{
unsigned long flags;

/* must not have branches... */
local_irq_save(flags);
__intel_pmu_disable_all(false); /* we don't care about BTS */
__intel_pmu_pebs_disable_all();
__intel_pmu_arch_lbr_disable();
/* ... until here */
return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
}

/*
* Workaround for:
* Intel Errata AAK100 (model 26)
Expand Down Expand Up @@ -2929,7 +2972,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
apic_write(APIC_LVTPC, APIC_DM_NMI);
intel_bts_disable_local();
cpuc->enabled = 0;
__intel_pmu_disable_all();
__intel_pmu_disable_all(true);
handled = intel_pmu_drain_bts_buffer();
handled += intel_bts_interrupt();
status = intel_pmu_get_status();
Expand Down Expand Up @@ -6283,9 +6326,21 @@ __init int intel_pmu_init(void)
x86_pmu.lbr_nr = 0;
}

if (x86_pmu.lbr_nr)
if (x86_pmu.lbr_nr) {
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);

/* only support branch_stack snapshot for perfmon >= v2 */
if (x86_pmu.disable_all == intel_pmu_disable_all) {
if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) {
static_call_update(perf_snapshot_branch_stack,
intel_pmu_snapshot_arch_branch_stack);
} else {
static_call_update(perf_snapshot_branch_stack,
intel_pmu_snapshot_branch_stack);
}
}
}

intel_pmu_check_extra_regs(x86_pmu.extra_regs);

/* Support full width counters using alternative MSR range */
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/events/intel/ds.c
Original file line number Diff line number Diff line change
Expand Up @@ -1301,7 +1301,7 @@ void intel_pmu_pebs_disable_all(void)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

if (cpuc->pebs_enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
__intel_pmu_pebs_disable_all();
}

static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
Expand Down
20 changes: 5 additions & 15 deletions arch/x86/events/intel/lbr.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,20 +228,6 @@ static void __intel_pmu_lbr_enable(bool pmi)
wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
}

static void __intel_pmu_lbr_disable(void)
{
u64 debugctl;

if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
wrmsrl(MSR_ARCH_LBR_CTL, 0);
return;
}

rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
}

void intel_pmu_lbr_reset_32(void)
{
int i;
Expand Down Expand Up @@ -779,8 +765,12 @@ void intel_pmu_lbr_disable_all(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

if (cpuc->lbr_users && !vlbr_exclude_host())
if (cpuc->lbr_users && !vlbr_exclude_host()) {
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
return __intel_pmu_arch_lbr_disable();

__intel_pmu_lbr_disable();
}
}

void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
Expand Down
19 changes: 19 additions & 0 deletions arch/x86/events/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -1240,6 +1240,25 @@ static inline bool intel_pmu_has_bts(struct perf_event *event)
return intel_pmu_has_bts_period(event, hwc->sample_period);
}

static __always_inline void __intel_pmu_pebs_disable_all(void)
{
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
}

static __always_inline void __intel_pmu_arch_lbr_disable(void)
{
wrmsrl(MSR_ARCH_LBR_CTL, 0);
}

static __always_inline void __intel_pmu_lbr_disable(void)
{
u64 debugctl;

rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
}

int intel_pmu_save_and_restart(struct perf_event *event);

struct event_constraint *
Expand Down
53 changes: 23 additions & 30 deletions arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1070,41 +1070,34 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
break;

case BPF_ALU | BPF_MUL | BPF_K:
case BPF_ALU | BPF_MUL | BPF_X:
case BPF_ALU64 | BPF_MUL | BPF_K:
case BPF_ALU64 | BPF_MUL | BPF_X:
{
bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
if (BPF_CLASS(insn->code) == BPF_ALU64)
EMIT1(add_2mod(0x48, dst_reg, dst_reg));
else if (is_ereg(dst_reg))
EMIT1(add_2mod(0x40, dst_reg, dst_reg));

if (dst_reg != BPF_REG_0)
EMIT1(0x50); /* push rax */
if (dst_reg != BPF_REG_3)
EMIT1(0x52); /* push rdx */
if (is_imm8(imm32))
/* imul dst_reg, dst_reg, imm8 */
EMIT3(0x6B, add_2reg(0xC0, dst_reg, dst_reg),
imm32);
else
/* imul dst_reg, dst_reg, imm32 */
EMIT2_off32(0x69,
add_2reg(0xC0, dst_reg, dst_reg),
imm32);
break;

/* mov r11, dst_reg */
EMIT_mov(AUX_REG, dst_reg);
case BPF_ALU | BPF_MUL | BPF_X:
case BPF_ALU64 | BPF_MUL | BPF_X:
if (BPF_CLASS(insn->code) == BPF_ALU64)
EMIT1(add_2mod(0x48, src_reg, dst_reg));
else if (is_ereg(dst_reg) || is_ereg(src_reg))
EMIT1(add_2mod(0x40, src_reg, dst_reg));

if (BPF_SRC(insn->code) == BPF_X)
emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
else
emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);

if (is64)
EMIT1(add_1mod(0x48, AUX_REG));
else if (is_ereg(AUX_REG))
EMIT1(add_1mod(0x40, AUX_REG));
/* mul(q) r11 */
EMIT2(0xF7, add_1reg(0xE0, AUX_REG));

if (dst_reg != BPF_REG_3)
EMIT1(0x5A); /* pop rdx */
if (dst_reg != BPF_REG_0) {
/* mov dst_reg, rax */
EMIT_mov(dst_reg, BPF_REG_0);
EMIT1(0x58); /* pop rax */
}
/* imul dst_reg, src_reg */
EMIT3(0x0F, 0xAF, add_2reg(0xC0, src_reg, dst_reg));
break;
}

/* Shifts */
case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU | BPF_RSH | BPF_K:
Expand Down
23 changes: 23 additions & 0 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ struct perf_guest_info_callbacks {
#include <linux/cgroup.h>
#include <linux/refcount.h>
#include <linux/security.h>
#include <linux/static_call.h>
#include <asm/local.h>

struct perf_callchain_entry {
Expand Down Expand Up @@ -1612,4 +1613,26 @@ extern void __weak arch_perf_update_userpage(struct perf_event *event,
extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr);
#endif

/*
* Snapshot branch stack on software events.
*
* Branch stack can be very useful in understanding software events. For
* example, when a long function, e.g. sys_perf_event_open, returns an
* errno, it is not obvious why the function failed. Branch stack could
* provide very helpful information in this type of scenarios.
*
* On software event, it is necessary to stop the hardware branch recorder
* fast. Otherwise, the hardware register/buffer will be flushed with
* entries of the triggering event. Therefore, static call is used to
* stop the hardware recorder.
*/

/*
* cnt is the number of entries allocated for entries.
* Return number of entries copied to .
*/
typedef int (perf_snapshot_branch_stack_t)(struct perf_branch_entry *entries,
unsigned int cnt);
DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);

#endif /* _LINUX_PERF_EVENT_H */
26 changes: 25 additions & 1 deletion include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1629,7 +1629,7 @@ union bpf_attr {
* u32 bpf_get_smp_processor_id(void)
* Description
* Get the SMP (symmetric multiprocessing) processor id. Note that
* all programs run with preemption disabled, which means that the
* all programs run with migration disabled, which means that the
* SMP processor id is stable during all the execution of the
* program.
* Return
Expand Down Expand Up @@ -4877,6 +4877,27 @@ union bpf_attr {
* Get the struct pt_regs associated with **task**.
* Return
* A pointer to struct pt_regs.
*
* long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags)
* Description
* Get branch trace from hardware engines like Intel LBR. The
* hardware engine is stopped shortly after the helper is
* called. Therefore, the user need to filter branch entries
* based on the actual use case. To capture branch trace
* before the trigger point of the BPF program, the helper
* should be called at the beginning of the BPF program.
*
* The data is stored as struct perf_branch_entry into output
* buffer *entries*. *size* is the size of *entries* in bytes.
* *flags* is reserved for now and must be zero.
*
* Return
* On success, number of bytes written to *buf*. On error, a
* negative value.
*
* **-EINVAL** if *flags* is not zero.
*
* **-ENOENT** if architecture does not support branch records.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
Expand Down Expand Up @@ -5055,6 +5076,7 @@ union bpf_attr {
FN(get_func_ip), \
FN(get_attach_cookie), \
FN(task_pt_regs), \
FN(get_branch_snapshot), \
/* */

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
Expand Down Expand Up @@ -5284,6 +5306,8 @@ struct __sk_buff {
__u32 gso_segs;
__bpf_md_ptr(struct bpf_sock *, sk);
__u32 gso_size;
__u32 :32; /* Padding, future use. */
__u64 hwtstamp;
};

struct bpf_tunnel_key {
Expand Down
Loading

0 comments on commit af54faa

Please sign in to comment.