Skip to content

Commit

Permalink
Merge branch 'timers/nohz-v3' of git://git.kernel.org/pub/scm/linux/k…
Browse files Browse the repository at this point in the history
…ernel/git/frederic/linux-dynticks into timers/nohz

Pull nohz improvements from Frederic Weisbecker:

 " It mostly contains fixes and full dynticks off-case optimizations. I believe that
   distros want to enable this feature so it seems important to optimize the case
   where the "nohz_full=" parameter is empty. ie: I'm trying to remove any performance
   regression that comes with NO_HZ_FULL=y when the feature is not used.

   This patchset improves the current situation a lot (off-case appears to be around 11% faster
   with hackbench, although I guess it may vary depending on the configuration but it should be
   significantly faster in any case) now there is still some work to do: I can still observe a
   remaining loss of 1.6% throughput seen with hackbench compared to CONFIG_NO_HZ_FULL=n. "

Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Ingo Molnar committed Aug 14, 2013
2 parents d4e4ab8 + d13508f commit 6f1d657
Show file tree
Hide file tree
Showing 22 changed files with 544 additions and 328 deletions.
1 change: 1 addition & 0 deletions arch/ia64/include/asm/Kbuild
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ generic-y += clkdev.h
generic-y += exec.h
generic-y += kvm_para.h
generic-y += trace_clock.h
generic-y += vtime.h
2 changes: 1 addition & 1 deletion arch/m68k/include/asm/irqflags.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include <linux/types.h>
#ifdef CONFIG_MMU
#include <linux/hardirq.h>
#include <linux/preempt_mask.h>
#endif
#include <linux/preempt.h>
#include <asm/thread_info.h>
Expand Down
1 change: 1 addition & 0 deletions arch/powerpc/include/asm/Kbuild
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
generic-y += clkdev.h
generic-y += rwsem.h
generic-y += trace_clock.h
generic-y += vtime.h
3 changes: 0 additions & 3 deletions arch/s390/include/asm/cputime.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@
#include <asm/div64.h>


#define __ARCH_HAS_VTIME_ACCOUNT
#define __ARCH_HAS_VTIME_TASK_SWITCH

/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */

typedef unsigned long long __nocast cputime_t;
Expand Down
7 changes: 7 additions & 0 deletions arch/s390/include/asm/vtime.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#ifndef _S390_VTIME_H
#define _S390_VTIME_H

#define __ARCH_HAS_VTIME_ACCOUNT
#define __ARCH_HAS_VTIME_TASK_SWITCH

#endif /* _S390_VTIME_H */
1 change: 1 addition & 0 deletions arch/s390/kernel/vtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <asm/irq_regs.h>
#include <asm/cputime.h>
#include <asm/vtimer.h>
#include <asm/vtime.h>
#include <asm/irq.h>
#include "entry.h"

Expand Down
Empty file added include/asm-generic/vtime.h
Empty file.
128 changes: 69 additions & 59 deletions include/linux/context_tracking.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,100 +2,110 @@
#define _LINUX_CONTEXT_TRACKING_H

#include <linux/sched.h>
#include <linux/percpu.h>
#include <linux/vtime.h>
#include <linux/context_tracking_state.h>
#include <asm/ptrace.h>

struct context_tracking {
/*
* When active is false, probes are unset in order
* to minimize overhead: TIF flags are cleared
* and calls to user_enter/exit are ignored. This
* may be further optimized using static keys.
*/
bool active;
enum ctx_state {
IN_KERNEL = 0,
IN_USER,
} state;
};

static inline void __guest_enter(void)
{
/*
* This is running in ioctl context so we can avoid
* the call to vtime_account() with its unnecessary idle check.
*/
vtime_account_system(current);
current->flags |= PF_VCPU;
}

static inline void __guest_exit(void)
{
/*
* This is running in ioctl context so we can avoid
* the call to vtime_account() with its unnecessary idle check.
*/
vtime_account_system(current);
current->flags &= ~PF_VCPU;
}

#ifdef CONFIG_CONTEXT_TRACKING
DECLARE_PER_CPU(struct context_tracking, context_tracking);
extern void context_tracking_cpu_set(int cpu);

static inline bool context_tracking_in_user(void)
extern void context_tracking_user_enter(void);
extern void context_tracking_user_exit(void);
extern void __context_tracking_task_switch(struct task_struct *prev,
struct task_struct *next);

static inline void user_enter(void)
{
return __this_cpu_read(context_tracking.state) == IN_USER;
}
if (static_key_false(&context_tracking_enabled))
context_tracking_user_enter();

static inline bool context_tracking_active(void)
}
static inline void user_exit(void)
{
return __this_cpu_read(context_tracking.active);
if (static_key_false(&context_tracking_enabled))
context_tracking_user_exit();
}

extern void user_enter(void);
extern void user_exit(void);

extern void guest_enter(void);
extern void guest_exit(void);

static inline enum ctx_state exception_enter(void)
{
enum ctx_state prev_ctx;

if (!static_key_false(&context_tracking_enabled))
return 0;

prev_ctx = this_cpu_read(context_tracking.state);
user_exit();
context_tracking_user_exit();

return prev_ctx;
}

static inline void exception_exit(enum ctx_state prev_ctx)
{
if (prev_ctx == IN_USER)
user_enter();
if (static_key_false(&context_tracking_enabled)) {
if (prev_ctx == IN_USER)
context_tracking_user_enter();
}
}

extern void context_tracking_task_switch(struct task_struct *prev,
struct task_struct *next);
static inline void context_tracking_task_switch(struct task_struct *prev,
struct task_struct *next)
{
if (static_key_false(&context_tracking_enabled))
__context_tracking_task_switch(prev, next);
}
#else
static inline bool context_tracking_in_user(void) { return false; }
static inline void user_enter(void) { }
static inline void user_exit(void) { }
static inline enum ctx_state exception_enter(void) { return 0; }
static inline void exception_exit(enum ctx_state prev_ctx) { }
static inline void context_tracking_task_switch(struct task_struct *prev,
struct task_struct *next) { }
#endif /* !CONFIG_CONTEXT_TRACKING */


#ifdef CONFIG_CONTEXT_TRACKING_FORCE
extern void context_tracking_init(void);
#else
static inline void context_tracking_init(void) { }
#endif /* CONFIG_CONTEXT_TRACKING_FORCE */


#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
static inline void guest_enter(void)
{
__guest_enter();
if (vtime_accounting_enabled())
vtime_guest_enter(current);
else
current->flags |= PF_VCPU;
}

static inline void guest_exit(void)
{
__guest_exit();
if (vtime_accounting_enabled())
vtime_guest_exit(current);
else
current->flags &= ~PF_VCPU;
}

static inline enum ctx_state exception_enter(void) { return 0; }
static inline void exception_exit(enum ctx_state prev_ctx) { }
static inline void context_tracking_task_switch(struct task_struct *prev,
struct task_struct *next) { }
#endif /* !CONFIG_CONTEXT_TRACKING */
#else
static inline void guest_enter(void)
{
/*
* This is running in ioctl context so its safe
* to assume that it's the stime pending cputime
* to flush.
*/
vtime_account_system(current);
current->flags |= PF_VCPU;
}

static inline void guest_exit(void)
{
/* Flush the guest cputime we spent on the guest */
vtime_account_system(current);
current->flags &= ~PF_VCPU;
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */

#endif
39 changes: 39 additions & 0 deletions include/linux/context_tracking_state.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifndef _LINUX_CONTEXT_TRACKING_STATE_H
#define _LINUX_CONTEXT_TRACKING_STATE_H

#include <linux/percpu.h>
#include <linux/static_key.h>

struct context_tracking {
/*
* When active is false, probes are unset in order
* to minimize overhead: TIF flags are cleared
* and calls to user_enter/exit are ignored. This
* may be further optimized using static keys.
*/
bool active;
enum ctx_state {
IN_KERNEL = 0,
IN_USER,
} state;
};

#ifdef CONFIG_CONTEXT_TRACKING
extern struct static_key context_tracking_enabled;
DECLARE_PER_CPU(struct context_tracking, context_tracking);

static inline bool context_tracking_in_user(void)
{
return __this_cpu_read(context_tracking.state) == IN_USER;
}

static inline bool context_tracking_active(void)
{
return __this_cpu_read(context_tracking.active);
}
#else
static inline bool context_tracking_in_user(void) { return false; }
static inline bool context_tracking_active(void) { return false; }
#endif /* CONFIG_CONTEXT_TRACKING */

#endif
117 changes: 1 addition & 116 deletions include/linux/hardirq.h
Original file line number Diff line number Diff line change
@@ -1,126 +1,11 @@
#ifndef LINUX_HARDIRQ_H
#define LINUX_HARDIRQ_H

#include <linux/preempt.h>
#include <linux/preempt_mask.h>
#include <linux/lockdep.h>
#include <linux/ftrace_irq.h>
#include <linux/vtime.h>
#include <asm/hardirq.h>

/*
* We put the hardirq and softirq counter into the preemption
* counter. The bitmask has the following meaning:
*
* - bits 0-7 are the preemption count (max preemption depth: 256)
* - bits 8-15 are the softirq count (max # of softirqs: 256)
*
* The hardirq count can in theory reach the same as NR_IRQS.
* In reality, the number of nested IRQS is limited to the stack
* size as well. For archs with over 1000 IRQS it is not practical
* to expect that they will all nest. We give a max of 10 bits for
* hardirq nesting. An arch may choose to give less than 10 bits.
* m68k expects it to be 8.
*
* - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
* - bit 26 is the NMI_MASK
* - bit 27 is the PREEMPT_ACTIVE flag
*
* PREEMPT_MASK: 0x000000ff
* SOFTIRQ_MASK: 0x0000ff00
* HARDIRQ_MASK: 0x03ff0000
* NMI_MASK: 0x04000000
*/
#define PREEMPT_BITS 8
#define SOFTIRQ_BITS 8
#define NMI_BITS 1

#define MAX_HARDIRQ_BITS 10

#ifndef HARDIRQ_BITS
# define HARDIRQ_BITS MAX_HARDIRQ_BITS
#endif

#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
#error HARDIRQ_BITS too high!
#endif

#define PREEMPT_SHIFT 0
#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)

#define __IRQ_MASK(x) ((1UL << (x))-1)

#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)

#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
#define NMI_OFFSET (1UL << NMI_SHIFT)

#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)

#ifndef PREEMPT_ACTIVE
#define PREEMPT_ACTIVE_BITS 1
#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
#endif

#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
#error PREEMPT_ACTIVE is too low!
#endif

#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
| NMI_MASK))

/*
* Are we doing bottom half or hardware interrupt processing?
* Are we in a softirq context? Interrupt context?
* in_softirq - Are we currently processing softirq or have bh disabled?
* in_serving_softirq - Are we currently processing softirq?
*/
#define in_irq() (hardirq_count())
#define in_softirq() (softirq_count())
#define in_interrupt() (irq_count())
#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)

/*
* Are we in NMI context?
*/
#define in_nmi() (preempt_count() & NMI_MASK)

#if defined(CONFIG_PREEMPT_COUNT)
# define PREEMPT_CHECK_OFFSET 1
#else
# define PREEMPT_CHECK_OFFSET 0
#endif

/*
* Are we running in atomic context? WARNING: this macro cannot
* always detect atomic context; in particular, it cannot know about
* held spinlocks in non-preemptible kernels. Thus it should not be
* used in the general case to determine whether sleeping is possible.
* Do not use in_atomic() in driver code.
*/
#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)

/*
* Check whether we were atomic before we did preempt_disable():
* (used by the scheduler, *after* releasing the kernel lock)
*/
#define in_atomic_preempt_off() \
((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)

#ifdef CONFIG_PREEMPT_COUNT
# define preemptible() (preempt_count() == 0 && !irqs_disabled())
#else
# define preemptible() 0
#endif

#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS)
extern void synchronize_irq(unsigned int irq);
Expand Down
Loading

0 comments on commit 6f1d657

Please sign in to comment.