Merge branch 'timers/nohz-v3' of git://git.kernel.org/pub/scm/linux/k…

…ernel/git/frederic/linux-dynticks into timers/nohz Pull nohz improvements from Frederic Weisbecker: " It mostly contains fixes and full dynticks off-case optimizations. I believe that distros want to enable this feature so it seems important to optimize the case where the "nohz_full=" parameter is empty. ie: I'm trying to remove any performance regression that comes with NO_HZ_FULL=y when the feature is not used. This patchset improves the current situation a lot (off-case appears to be around 11% faster with hackbench, although I guess it may vary depending on the configuration but it should be significantly faster in any case) now there is still some work to do: I can still observe a remaining loss of 1.6% throughput seen with hackbench compared to CONFIG_NO_HZ_FULL=n. " Signed-off-by: Ingo Molnar <mingo@kernel.org>
mariux64 · Aug 14, 2013 · 6f1d657 · 6f1d657
2 parents d4e4ab8 + d13508f
commit 6f1d657
Show file tree

Hide file tree

Showing 22 changed files with 544 additions and 328 deletions.
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
 generic-y += exec.h
 generic-y += kvm_para.h
 generic-y += trace_clock.h
+generic-y += vtime.h
diff --git a/arch/m68k/include/asm/irqflags.h b/arch/m68k/include/asm/irqflags.h
@@ -3,7 +3,7 @@
 
 #include <linux/types.h>
 #ifdef CONFIG_MMU
-#include <linux/hardirq.h>
+#include <linux/preempt_mask.h>
 #endif
 #include <linux/preempt.h>
 #include <asm/thread_info.h>

diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
@@ -2,3 +2,4 @@
 generic-y += clkdev.h
 generic-y += rwsem.h
 generic-y += trace_clock.h
+generic-y += vtime.h
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
@@ -13,9 +13,6 @@
 #include <asm/div64.h>
 
 
-#define __ARCH_HAS_VTIME_ACCOUNT
-#define __ARCH_HAS_VTIME_TASK_SWITCH
-
 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
 
 typedef unsigned long long __nocast cputime_t;

diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
@@ -0,0 +1,7 @@
+#ifndef _S390_VTIME_H
+#define _S390_VTIME_H
+
+#define __ARCH_HAS_VTIME_ACCOUNT
+#define __ARCH_HAS_VTIME_TASK_SWITCH
+
+#endif /* _S390_VTIME_H */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
@@ -19,6 +19,7 @@
 #include <asm/irq_regs.h>
 #include <asm/cputime.h>
 #include <asm/vtimer.h>
+#include <asm/vtime.h>
 #include <asm/irq.h>
 #include "entry.h"
 

diff --git a/include/asm-generic/vtime.h b/include/asm-generic/vtime.h
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
@@ -2,100 +2,110 @@
 #define _LINUX_CONTEXT_TRACKING_H
 
 #include <linux/sched.h>
-#include <linux/percpu.h>
 #include <linux/vtime.h>
+#include <linux/context_tracking_state.h>
 #include <asm/ptrace.h>
 
-struct context_tracking {
-	/*
-	 * When active is false, probes are unset in order
-	 * to minimize overhead: TIF flags are cleared
-	 * and calls to user_enter/exit are ignored. This
-	 * may be further optimized using static keys.
-	 */
-	bool active;
-	enum ctx_state {
-		IN_KERNEL = 0,
-		IN_USER,
-	} state;
-};
-
-static inline void __guest_enter(void)
-{
-	/*
-	 * This is running in ioctl context so we can avoid
-	 * the call to vtime_account() with its unnecessary idle check.
-	 */
-	vtime_account_system(current);
-	current->flags |= PF_VCPU;
-}
-
-static inline void __guest_exit(void)
-{
-	/*
-	 * This is running in ioctl context so we can avoid
-	 * the call to vtime_account() with its unnecessary idle check.
-	 */
-	vtime_account_system(current);
-	current->flags &= ~PF_VCPU;
-}
 
 #ifdef CONFIG_CONTEXT_TRACKING
-DECLARE_PER_CPU(struct context_tracking, context_tracking);
+extern void context_tracking_cpu_set(int cpu);
 
-static inline bool context_tracking_in_user(void)
+extern void context_tracking_user_enter(void);
+extern void context_tracking_user_exit(void);
+extern void __context_tracking_task_switch(struct task_struct *prev,
+					   struct task_struct *next);
+
+static inline void user_enter(void)
 {
-	return __this_cpu_read(context_tracking.state) == IN_USER;
-}
+	if (static_key_false(&context_tracking_enabled))
+		context_tracking_user_enter();
 
-static inline bool context_tracking_active(void)
+}
+static inline void user_exit(void)
 {
-	return __this_cpu_read(context_tracking.active);
+	if (static_key_false(&context_tracking_enabled))
+		context_tracking_user_exit();
 }
 
-extern void user_enter(void);
-extern void user_exit(void);
-
-extern void guest_enter(void);
-extern void guest_exit(void);
-
 static inline enum ctx_state exception_enter(void)
 {
 	enum ctx_state prev_ctx;
 
+	if (!static_key_false(&context_tracking_enabled))
+		return 0;
+
 	prev_ctx = this_cpu_read(context_tracking.state);
-	user_exit();
+	context_tracking_user_exit();
 
 	return prev_ctx;
 }
 
 static inline void exception_exit(enum ctx_state prev_ctx)
 {
-	if (prev_ctx == IN_USER)
-		user_enter();
+	if (static_key_false(&context_tracking_enabled)) {
+		if (prev_ctx == IN_USER)
+			context_tracking_user_enter();
+	}
 }
 
-extern void context_tracking_task_switch(struct task_struct *prev,
-					 struct task_struct *next);
+static inline void context_tracking_task_switch(struct task_struct *prev,
+						struct task_struct *next)
+{
+	if (static_key_false(&context_tracking_enabled))
+		__context_tracking_task_switch(prev, next);
+}
 #else
-static inline bool context_tracking_in_user(void) { return false; }
 static inline void user_enter(void) { }
 static inline void user_exit(void) { }
+static inline enum ctx_state exception_enter(void) { return 0; }
+static inline void exception_exit(enum ctx_state prev_ctx) { }
+static inline void context_tracking_task_switch(struct task_struct *prev,
+						struct task_struct *next) { }
+#endif /* !CONFIG_CONTEXT_TRACKING */
+
+
+#ifdef CONFIG_CONTEXT_TRACKING_FORCE
+extern void context_tracking_init(void);
+#else
+static inline void context_tracking_init(void) { }
+#endif /* CONFIG_CONTEXT_TRACKING_FORCE */
+
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 static inline void guest_enter(void)
 {
-	__guest_enter();
+	if (vtime_accounting_enabled())
+		vtime_guest_enter(current);
+	else
+		current->flags |= PF_VCPU;
 }
 
 static inline void guest_exit(void)
 {
-	__guest_exit();
+	if (vtime_accounting_enabled())
+		vtime_guest_exit(current);
+	else
+		current->flags &= ~PF_VCPU;
 }
 
-static inline enum ctx_state exception_enter(void) { return 0; }
-static inline void exception_exit(enum ctx_state prev_ctx) { }
-static inline void context_tracking_task_switch(struct task_struct *prev,
-						struct task_struct *next) { }
-#endif /* !CONFIG_CONTEXT_TRACKING */
+#else
+static inline void guest_enter(void)
+{
+	/*
+	 * This is running in ioctl context so its safe
+	 * to assume that it's the stime pending cputime
+	 * to flush.
+	 */
+	vtime_account_system(current);
+	current->flags |= PF_VCPU;
+}
+
+static inline void guest_exit(void)
+{
+	/* Flush the guest cputime we spent on the guest */
+	vtime_account_system(current);
+	current->flags &= ~PF_VCPU;
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
 
 #endif
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
@@ -0,0 +1,39 @@
+#ifndef _LINUX_CONTEXT_TRACKING_STATE_H
+#define _LINUX_CONTEXT_TRACKING_STATE_H
+
+#include <linux/percpu.h>
+#include <linux/static_key.h>
+
+struct context_tracking {
+	/*
+	 * When active is false, probes are unset in order
+	 * to minimize overhead: TIF flags are cleared
+	 * and calls to user_enter/exit are ignored. This
+	 * may be further optimized using static keys.
+	 */
+	bool active;
+	enum ctx_state {
+		IN_KERNEL = 0,
+		IN_USER,
+	} state;
+};
+
+#ifdef CONFIG_CONTEXT_TRACKING
+extern struct static_key context_tracking_enabled;
+DECLARE_PER_CPU(struct context_tracking, context_tracking);
+
+static inline bool context_tracking_in_user(void)
+{
+	return __this_cpu_read(context_tracking.state) == IN_USER;
+}
+
+static inline bool context_tracking_active(void)
+{
+	return __this_cpu_read(context_tracking.active);
+}
+#else
+static inline bool context_tracking_in_user(void) { return false; }
+static inline bool context_tracking_active(void) { return false; }
+#endif /* CONFIG_CONTEXT_TRACKING */
+
+#endif
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
@@ -1,126 +1,11 @@
 #ifndef LINUX_HARDIRQ_H
 #define LINUX_HARDIRQ_H
 
-#include <linux/preempt.h>
+#include <linux/preempt_mask.h>
 #include <linux/lockdep.h>
 #include <linux/ftrace_irq.h>
 #include <linux/vtime.h>
-#include <asm/hardirq.h>
 
-/*
- * We put the hardirq and softirq counter into the preemption
- * counter. The bitmask has the following meaning:
- *
- * - bits 0-7 are the preemption count (max preemption depth: 256)
- * - bits 8-15 are the softirq count (max # of softirqs: 256)
- *
- * The hardirq count can in theory reach the same as NR_IRQS.
- * In reality, the number of nested IRQS is limited to the stack
- * size as well. For archs with over 1000 IRQS it is not practical
- * to expect that they will all nest. We give a max of 10 bits for
- * hardirq nesting. An arch may choose to give less than 10 bits.
- * m68k expects it to be 8.
- *
- * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
- * - bit 26 is the NMI_MASK
- * - bit 27 is the PREEMPT_ACTIVE flag
- *
- * PREEMPT_MASK: 0x000000ff
- * SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x03ff0000
- *     NMI_MASK: 0x04000000
- */
-#define PREEMPT_BITS	8
-#define SOFTIRQ_BITS	8
-#define NMI_BITS	1
-
-#define MAX_HARDIRQ_BITS 10
-
-#ifndef HARDIRQ_BITS
-# define HARDIRQ_BITS	MAX_HARDIRQ_BITS
-#endif
-
-#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
-#error HARDIRQ_BITS too high!
-#endif
-
-#define PREEMPT_SHIFT	0
-#define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
-#define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)
-#define NMI_SHIFT	(HARDIRQ_SHIFT + HARDIRQ_BITS)
-
-#define __IRQ_MASK(x)	((1UL << (x))-1)
-
-#define PREEMPT_MASK	(__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
-#define SOFTIRQ_MASK	(__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
-#define HARDIRQ_MASK	(__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
-#define NMI_MASK	(__IRQ_MASK(NMI_BITS)     << NMI_SHIFT)
-
-#define PREEMPT_OFFSET	(1UL << PREEMPT_SHIFT)
-#define SOFTIRQ_OFFSET	(1UL << SOFTIRQ_SHIFT)
-#define HARDIRQ_OFFSET	(1UL << HARDIRQ_SHIFT)
-#define NMI_OFFSET	(1UL << NMI_SHIFT)
-
-#define SOFTIRQ_DISABLE_OFFSET	(2 * SOFTIRQ_OFFSET)
-
-#ifndef PREEMPT_ACTIVE
-#define PREEMPT_ACTIVE_BITS	1
-#define PREEMPT_ACTIVE_SHIFT	(NMI_SHIFT + NMI_BITS)
-#define PREEMPT_ACTIVE	(__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
-#endif
-
-#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
-#error PREEMPT_ACTIVE is too low!
-#endif
-
-#define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
-#define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
-#define irq_count()	(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
-				 | NMI_MASK))
-
-/*
- * Are we doing bottom half or hardware interrupt processing?
- * Are we in a softirq context? Interrupt context?
- * in_softirq - Are we currently processing softirq or have bh disabled?
- * in_serving_softirq - Are we currently processing softirq?
- */
-#define in_irq()		(hardirq_count())
-#define in_softirq()		(softirq_count())
-#define in_interrupt()		(irq_count())
-#define in_serving_softirq()	(softirq_count() & SOFTIRQ_OFFSET)
-
-/*
- * Are we in NMI context?
- */
-#define in_nmi()	(preempt_count() & NMI_MASK)
-
-#if defined(CONFIG_PREEMPT_COUNT)
-# define PREEMPT_CHECK_OFFSET 1
-#else
-# define PREEMPT_CHECK_OFFSET 0
-#endif
-
-/*
- * Are we running in atomic context?  WARNING: this macro cannot
- * always detect atomic context; in particular, it cannot know about
- * held spinlocks in non-preemptible kernels.  Thus it should not be
- * used in the general case to determine whether sleeping is possible.
- * Do not use in_atomic() in driver code.
- */
-#define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != 0)
-
-/*
- * Check whether we were atomic before we did preempt_disable():
- * (used by the scheduler, *after* releasing the kernel lock)
- */
-#define in_atomic_preempt_off() \
-		((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
-
-#ifdef CONFIG_PREEMPT_COUNT
-# define preemptible()	(preempt_count() == 0 && !irqs_disabled())
-#else
-# define preemptible()	0
-#endif
 
 #if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS)
 extern void synchronize_irq(unsigned int irq);