Skip to content

Commit

Permalink
x86, trace: Add irq vector tracepoints
Browse files Browse the repository at this point in the history
[Purpose of this patch]

As Vaibhav explained in the thread below, tracepoints for irq vectors
are useful.

http://www.spinics.net/lists/mm-commits/msg85707.html

<snip>
The current interrupt traces from irq_handler_entry and irq_handler_exit
provide when an interrupt is handled.  They provide good data about when
the system has switched to kernel space and how it affects the currently
running processes.

There are some IRQ vectors which trigger the system into kernel space,
which are not handled in generic IRQ handlers.  Tracing such events gives
us the information about IRQ interaction with other system events.

The trace also tells where the system is spending its time.  We want to
know which cores are handling interrupts and how they are affecting other
processes in the system.  Also, the trace provides information about when
the cores are idle and which interrupts are changing that state.
<snip>

On the other hand, my usecase is tracing just local timer event and
getting a value of instruction pointer.

I suggested to add an argument local timer event to get instruction pointer before.
But there is another way to get it with external module like systemtap.
So, I don't need to add any argument to irq vector tracepoints now.

[Patch Description]

Vaibhav's patch shared a trace point ,irq_vector_entry/irq_vector_exit, in all events.
But there is an above use case to trace specific irq_vector rather than tracing all events.
In this case, we are concerned about overhead due to unwanted events.

So, add following tracepoints instead of introducing irq_vector_entry/exit.
so that we can enable them independently.
   - local_timer_vector
   - reschedule_vector
   - call_function_vector
   - call_function_single_vector
   - irq_work_entry_vector
   - error_apic_vector
   - thermal_apic_vector
   - threshold_apic_vector
   - spurious_apic_vector
   - x86_platform_ipi_vector

Also, introduce a logic switching IDT at enabling/disabling time so that a time penalty
makes a zero when tracepoints are disabled. Detailed explanations are as follows.
 - Create trace irq handlers with entering_irq()/exiting_irq().
 - Create a new IDT, trace_idt_table, at boot time by adding a logic to
   _set_gate(). It is just a copy of original idt table.
 - Register the new handlers for tracpoints to the new IDT by introducing
   macros to alloc_intr_gate() called at registering time of irq_vector handlers.
 - Add checking, whether irq vector tracing is on/off, into load_current_idt().
   This has to be done below debug checking for these reasons.
   - Switching to debug IDT may be kicked while tracing is enabled.
   - On the other hands, switching to trace IDT is kicked only when debugging
     is disabled.

In addition, the new IDT is created only when CONFIG_TRACING is enabled to avoid being
used for other purposes.

Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Link: http://lkml.kernel.org/r/51C323ED.5050708@hds.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
  • Loading branch information
Seiji Aguchi authored and H. Peter Anvin committed Jun 21, 2013
1 parent 629f4f9 commit cf910e8
Show file tree
Hide file tree
Showing 20 changed files with 422 additions and 15 deletions.
72 changes: 70 additions & 2 deletions arch/x86/include/asm/desc.h
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,19 @@ static inline void set_nmi_gate(int gate, void *addr)
}
#endif

#ifdef CONFIG_TRACING
extern struct desc_ptr trace_idt_descr;
extern gate_desc trace_idt_table[];
static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
{
write_idt_entry(trace_idt_table, entry, gate);
}
#else
static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
{
}
#endif

static inline void _set_gate(int gate, unsigned type, void *addr,
unsigned dpl, unsigned ist, unsigned seg)
{
Expand All @@ -331,6 +344,7 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
* setup time
*/
write_idt_entry(idt_table, gate, &s);
write_trace_idt_entry(gate, &s);
}

/*
Expand Down Expand Up @@ -360,12 +374,39 @@ static inline void alloc_system_vector(int vector)
}
}

static inline void alloc_intr_gate(unsigned int n, void *addr)
#ifdef CONFIG_TRACING
static inline void trace_set_intr_gate(unsigned int gate, void *addr)
{
gate_desc s;

pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
write_idt_entry(trace_idt_table, gate, &s);
}

static inline void __trace_alloc_intr_gate(unsigned int n, void *addr)
{
trace_set_intr_gate(n, addr);
}
#else
static inline void trace_set_intr_gate(unsigned int gate, void *addr)
{
}

#define __trace_alloc_intr_gate(n, addr)
#endif

static inline void __alloc_intr_gate(unsigned int n, void *addr)
{
alloc_system_vector(n);
set_intr_gate(n, addr);
}

#define alloc_intr_gate(n, addr) \
do { \
alloc_system_vector(n); \
__alloc_intr_gate(n, addr); \
__trace_alloc_intr_gate(n, trace_##addr); \
} while (0)

/*
* This routine sets up an interrupt gate at directory privilege level 3.
*/
Expand Down Expand Up @@ -430,6 +471,31 @@ static inline void load_debug_idt(void)
}
#endif

#ifdef CONFIG_TRACING
extern atomic_t trace_idt_ctr;
static inline bool is_trace_idt_enabled(void)
{
if (atomic_read(&trace_idt_ctr))
return true;

return false;
}

static inline void load_trace_idt(void)
{
load_idt((const struct desc_ptr *)&trace_idt_descr);
}
#else
static inline bool is_trace_idt_enabled(void)
{
return false;
}

static inline void load_trace_idt(void)
{
}
#endif

/*
* the load_current_idt() is called with interrupt disabled by local_irq_save()
* to avoid races. That way the IDT will always be set back to the expected
Expand All @@ -442,6 +508,8 @@ static inline void load_current_idt(void)
local_irq_save(flags);
if (is_debug_idt_enabled())
load_debug_idt();
else if (is_trace_idt_enabled())
load_trace_idt();
else
load_idt((const struct desc_ptr *)&idt_descr);
local_irq_restore(flags);
Expand Down
8 changes: 5 additions & 3 deletions arch/x86/include/asm/entry_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,16 @@
BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
BUILD_INTERRUPT3(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR,
smp_irq_move_cleanup_interrupt)
BUILD_INTERRUPT3(reboot_interrupt, REBOOT_VECTOR, smp_reboot_interrupt)
#endif

BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)

#ifdef CONFIG_HAVE_KVM
BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR,
smp_kvm_posted_intr_ipi)
#endif

/*
Expand Down
17 changes: 17 additions & 0 deletions arch/x86/include/asm/hw_irq.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,23 @@ extern void threshold_interrupt(void);
extern void call_function_interrupt(void);
extern void call_function_single_interrupt(void);

#ifdef CONFIG_TRACING
/* Interrupt handlers registered during init_IRQ */
extern void trace_apic_timer_interrupt(void);
extern void trace_x86_platform_ipi(void);
extern void trace_error_interrupt(void);
extern void trace_irq_work_interrupt(void);
extern void trace_spurious_interrupt(void);
extern void trace_thermal_interrupt(void);
extern void trace_reschedule_interrupt(void);
extern void trace_threshold_interrupt(void);
extern void trace_call_function_interrupt(void);
extern void trace_call_function_single_interrupt(void);
#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt
#define trace_reboot_interrupt reboot_interrupt
#define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
#endif /* CONFIG_TRACING */

/* IOAPIC */
#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
extern unsigned long io_apic_irqs;
Expand Down
3 changes: 3 additions & 0 deletions arch/x86/include/asm/mshyperv.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ struct ms_hyperv_info {
extern struct ms_hyperv_info ms_hyperv;

void hyperv_callback_vector(void);
#ifdef CONFIG_TRACING
#define trace_hyperv_callback_vector hyperv_callback_vector
#endif
void hyperv_vector_handler(struct pt_regs *regs);
void hv_register_vmbus_handler(int irq, irq_handler_t handler);

Expand Down
104 changes: 104 additions & 0 deletions arch/x86/include/asm/trace/irq_vectors.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM irq_vectors

#if !defined(_TRACE_IRQ_VECTORS_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_IRQ_VECTORS_H

#include <linux/tracepoint.h>

extern void trace_irq_vector_regfunc(void);
extern void trace_irq_vector_unregfunc(void);

DECLARE_EVENT_CLASS(x86_irq_vector,

TP_PROTO(int vector),

TP_ARGS(vector),

TP_STRUCT__entry(
__field( int, vector )
),

TP_fast_assign(
__entry->vector = vector;
),

TP_printk("vector=%d", __entry->vector) );

#define DEFINE_IRQ_VECTOR_EVENT(name) \
DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \
TP_PROTO(int vector), \
TP_ARGS(vector), \
trace_irq_vector_regfunc, \
trace_irq_vector_unregfunc); \
DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \
TP_PROTO(int vector), \
TP_ARGS(vector), \
trace_irq_vector_regfunc, \
trace_irq_vector_unregfunc);


/*
* local_timer - called when entering/exiting a local timer interrupt
* vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(local_timer);

/*
* reschedule - called when entering/exiting a reschedule vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(reschedule);

/*
* spurious_apic - called when entering/exiting a spurious apic vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(spurious_apic);

/*
* error_apic - called when entering/exiting an error apic vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(error_apic);

/*
* x86_platform_ipi - called when entering/exiting a x86 platform ipi interrupt
* vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi);

/*
* irq_work - called when entering/exiting a irq work interrupt
* vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(irq_work);

/*
* call_function - called when entering/exiting a call function interrupt
* vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(call_function);

/*
* call_function_single - called when entering/exiting a call function
* single interrupt vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(call_function_single);

/*
* threshold_apic - called when entering/exiting a threshold apic interrupt
* vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(threshold_apic);

/*
* thermal_apic - called when entering/exiting a thermal apic interrupt
* vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(thermal_apic);

#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE irq_vectors
#endif /* _TRACE_IRQ_VECTORS_H */

/* This part must be outside protection */
#include <trace/define_trace.h>
3 changes: 3 additions & 0 deletions arch/x86/include/asm/uv/uv_bau.h
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,9 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
}

extern void uv_bau_message_intr1(void);
#ifdef CONFIG_TRACING
#define trace_uv_bau_message_intr1 uv_bau_message_intr1
#endif
extern void uv_bau_timeout_intr1(void);

struct atomic_short {
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ obj-$(CONFIG_OF) += devicetree.o
obj-$(CONFIG_UPROBES) += uprobes.o

obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o

###
# 64 bit specific files
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/apic/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Makefile for local APIC drivers and for the IO-APIC code
#

CFLAGS_apic.o := -I$(src)/../../include/asm/trace
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o
obj-y += hw_nmi.o

Expand Down
42 changes: 42 additions & 0 deletions arch/x86/kernel/apic/apic.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@
#include <asm/tsc.h>
#include <asm/hypervisor.h>

#define CREATE_TRACE_POINTS
#include <asm/trace/irq_vectors.h>

unsigned int num_processors;

unsigned disabled_cpus __cpuinitdata;
Expand Down Expand Up @@ -931,6 +934,27 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
set_irq_regs(old_regs);
}

void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);

/*
* NOTE! We'd better ACK the irq immediately,
* because timer handling can be slow.
*
* update_process_times() expects us to have done irq_enter().
* Besides, if we don't timer interrupts ignore the global
* interrupt lock, which is the WrongThing (tm) to do.
*/
entering_ack_irq();
trace_local_timer_entry(LOCAL_TIMER_VECTOR);
local_apic_timer_interrupt();
trace_local_timer_exit(LOCAL_TIMER_VECTOR);
exiting_irq();

set_irq_regs(old_regs);
}

int setup_profiling_timer(unsigned int multiplier)
{
return -EINVAL;
Expand Down Expand Up @@ -1931,6 +1955,15 @@ void smp_spurious_interrupt(struct pt_regs *regs)
exiting_irq();
}

void smp_trace_spurious_interrupt(struct pt_regs *regs)
{
entering_irq();
trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR);
__smp_spurious_interrupt();
trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR);
exiting_irq();
}

/*
* This interrupt should never happen with our APIC/SMP architecture
*/
Expand Down Expand Up @@ -1978,6 +2011,15 @@ void smp_error_interrupt(struct pt_regs *regs)
exiting_irq();
}

void smp_trace_error_interrupt(struct pt_regs *regs)
{
entering_irq();
trace_error_apic_entry(ERROR_APIC_VECTOR);
__smp_error_interrupt(regs);
trace_error_apic_exit(ERROR_APIC_VECTOR);
exiting_irq();
}

/**
* connect_bsp_APIC - attach the APIC to the interrupt system
*/
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/kernel/cpu/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -1257,7 +1257,7 @@ void __cpuinit cpu_init(void)
switch_to_new_gdt(cpu);
loadsegment(fs, 0);

load_idt((const struct desc_ptr *)&idt_descr);
load_current_idt();

memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
syscall_init();
Expand Down Expand Up @@ -1334,7 +1334,7 @@ void __cpuinit cpu_init(void)
if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);

load_idt(&idt_descr);
load_current_idt();
switch_to_new_gdt(cpu);

/*
Expand Down
Loading

0 comments on commit cf910e8

Please sign in to comment.