diff --git a/Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt b/Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt new file mode 100644 index 000000000000..b2d07ad90e9a --- /dev/null +++ b/Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt @@ -0,0 +1,33 @@ +TI-NSPIRE timer + +Required properties: + +- compatible : should be "lsi,zevio-timer". +- reg : The physical base address and size of the timer (always first). +- clocks: phandle to the source clock. + +Optional properties: + +- interrupts : The interrupt number of the first timer. +- reg : The interrupt acknowledgement registers + (always after timer base address) + +If any of the optional properties are not given, the timer is added as a +clock-source only. + +Example: + +timer { + compatible = "lsi,zevio-timer"; + reg = <0x900D0000 0x1000>, <0x900A0020 0x8>; + interrupts = <19>; + clocks = <&timer_clk>; +}; + +Example (no clock-events): + +timer { + compatible = "lsi,zevio-timer"; + reg = <0x900D0000 0x1000>; + clocks = <&timer_clk>; +}; diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt new file mode 100644 index 000000000000..cbf7ae412da4 --- /dev/null +++ b/Documentation/kernel-per-CPU-kthreads.txt @@ -0,0 +1,202 @@ +REDUCING OS JITTER DUE TO PER-CPU KTHREADS + +This document lists per-CPU kthreads in the Linux kernel and presents +options to control their OS jitter. Note that non-per-CPU kthreads are +not listed here. To reduce OS jitter from non-per-CPU kthreads, bind +them to a "housekeeping" CPU dedicated to such work. + + +REFERENCES + +o Documentation/IRQ-affinity.txt: Binding interrupts to sets of CPUs. + +o Documentation/cgroups: Using cgroups to bind tasks to sets of CPUs. + +o man taskset: Using the taskset command to bind tasks to sets + of CPUs. + +o man sched_setaffinity: Using the sched_setaffinity() system + call to bind tasks to sets of CPUs. + +o /sys/devices/system/cpu/cpuN/online: Control CPU N's hotplug state, + writing "0" to offline and "1" to online. + +o In order to locate kernel-generated OS jitter on CPU N: + + cd /sys/kernel/debug/tracing + echo 1 > max_graph_depth # Increase the "1" for more detail + echo function_graph > current_tracer + # run workload + cat per_cpu/cpuN/trace + + +KTHREADS + +Name: ehca_comp/%u +Purpose: Periodically process Infiniband-related work. +To reduce its OS jitter, do any of the following: +1. Don't use eHCA Infiniband hardware, instead choosing hardware + that does not require per-CPU kthreads. This will prevent these + kthreads from being created in the first place. (This will + work for most people, as this hardware, though important, is + relatively old and is produced in relatively low unit volumes.) +2. Do all eHCA-Infiniband-related work on other CPUs, including + interrupts. +3. Rework the eHCA driver so that its per-CPU kthreads are + provisioned only on selected CPUs. + + +Name: irq/%d-%s +Purpose: Handle threaded interrupts. +To reduce its OS jitter, do the following: +1. Use irq affinity to force the irq threads to execute on + some other CPU. + +Name: kcmtpd_ctr_%d +Purpose: Handle Bluetooth work. +To reduce its OS jitter, do one of the following: +1. Don't use Bluetooth, in which case these kthreads won't be + created in the first place. +2. Use irq affinity to force Bluetooth-related interrupts to + occur on some other CPU and furthermore initiate all + Bluetooth activity on some other CPU. + +Name: ksoftirqd/%u +Purpose: Execute softirq handlers when threaded or when under heavy load. +To reduce its OS jitter, each softirq vector must be handled +separately as follows: +TIMER_SOFTIRQ: Do all of the following: +1. To the extent possible, keep the CPU out of the kernel when it + is non-idle, for example, by avoiding system calls and by forcing + both kernel threads and interrupts to execute elsewhere. +2. Build with CONFIG_HOTPLUG_CPU=y. After boot completes, force + the CPU offline, then bring it back online. This forces + recurring timers to migrate elsewhere. If you are concerned + with multiple CPUs, force them all offline before bringing the + first one back online. Once you have onlined the CPUs in question, + do not offline any other CPUs, because doing so could force the + timer back onto one of the CPUs in question. +NET_TX_SOFTIRQ and NET_RX_SOFTIRQ: Do all of the following: +1. Force networking interrupts onto other CPUs. +2. Initiate any network I/O on other CPUs. +3. Once your application has started, prevent CPU-hotplug operations + from being initiated from tasks that might run on the CPU to + be de-jittered. (It is OK to force this CPU offline and then + bring it back online before you start your application.) +BLOCK_SOFTIRQ: Do all of the following: +1. Force block-device interrupts onto some other CPU. +2. Initiate any block I/O on other CPUs. +3. Once your application has started, prevent CPU-hotplug operations + from being initiated from tasks that might run on the CPU to + be de-jittered. (It is OK to force this CPU offline and then + bring it back online before you start your application.) +BLOCK_IOPOLL_SOFTIRQ: Do all of the following: +1. Force block-device interrupts onto some other CPU. +2. Initiate any block I/O and block-I/O polling on other CPUs. +3. Once your application has started, prevent CPU-hotplug operations + from being initiated from tasks that might run on the CPU to + be de-jittered. (It is OK to force this CPU offline and then + bring it back online before you start your application.) +TASKLET_SOFTIRQ: Do one or more of the following: +1. Avoid use of drivers that use tasklets. (Such drivers will contain + calls to things like tasklet_schedule().) +2. Convert all drivers that you must use from tasklets to workqueues. +3. Force interrupts for drivers using tasklets onto other CPUs, + and also do I/O involving these drivers on other CPUs. +SCHED_SOFTIRQ: Do all of the following: +1. Avoid sending scheduler IPIs to the CPU to be de-jittered, + for example, ensure that at most one runnable kthread is present + on that CPU. If a thread that expects to run on the de-jittered + CPU awakens, the scheduler will send an IPI that can result in + a subsequent SCHED_SOFTIRQ. +2. Build with CONFIG_RCU_NOCB_CPU=y, CONFIG_RCU_NOCB_CPU_ALL=y, + CONFIG_NO_HZ_FULL=y, and, in addition, ensure that the CPU + to be de-jittered is marked as an adaptive-ticks CPU using the + "nohz_full=" boot parameter. This reduces the number of + scheduler-clock interrupts that the de-jittered CPU receives, + minimizing its chances of being selected to do the load balancing + work that runs in SCHED_SOFTIRQ context. +3. To the extent possible, keep the CPU out of the kernel when it + is non-idle, for example, by avoiding system calls and by + forcing both kernel threads and interrupts to execute elsewhere. + This further reduces the number of scheduler-clock interrupts + received by the de-jittered CPU. +HRTIMER_SOFTIRQ: Do all of the following: +1. To the extent possible, keep the CPU out of the kernel when it + is non-idle. For example, avoid system calls and force both + kernel threads and interrupts to execute elsewhere. +2. Build with CONFIG_HOTPLUG_CPU=y. Once boot completes, force the + CPU offline, then bring it back online. This forces recurring + timers to migrate elsewhere. If you are concerned with multiple + CPUs, force them all offline before bringing the first one + back online. Once you have onlined the CPUs in question, do not + offline any other CPUs, because doing so could force the timer + back onto one of the CPUs in question. +RCU_SOFTIRQ: Do at least one of the following: +1. Offload callbacks and keep the CPU in either dyntick-idle or + adaptive-ticks state by doing all of the following: + a. Build with CONFIG_RCU_NOCB_CPU=y, CONFIG_RCU_NOCB_CPU_ALL=y, + CONFIG_NO_HZ_FULL=y, and, in addition ensure that the CPU + to be de-jittered is marked as an adaptive-ticks CPU using + the "nohz_full=" boot parameter. Bind the rcuo kthreads + to housekeeping CPUs, which can tolerate OS jitter. + b. To the extent possible, keep the CPU out of the kernel + when it is non-idle, for example, by avoiding system + calls and by forcing both kernel threads and interrupts + to execute elsewhere. +2. Enable RCU to do its processing remotely via dyntick-idle by + doing all of the following: + a. Build with CONFIG_NO_HZ=y and CONFIG_RCU_FAST_NO_HZ=y. + b. Ensure that the CPU goes idle frequently, allowing other + CPUs to detect that it has passed through an RCU quiescent + state. If the kernel is built with CONFIG_NO_HZ_FULL=y, + userspace execution also allows other CPUs to detect that + the CPU in question has passed through a quiescent state. + c. To the extent possible, keep the CPU out of the kernel + when it is non-idle, for example, by avoiding system + calls and by forcing both kernel threads and interrupts + to execute elsewhere. + +Name: rcuc/%u +Purpose: Execute RCU callbacks in CONFIG_RCU_BOOST=y kernels. +To reduce its OS jitter, do at least one of the following: +1. Build the kernel with CONFIG_PREEMPT=n. This prevents these + kthreads from being created in the first place, and also obviates + the need for RCU priority boosting. This approach is feasible + for workloads that do not require high degrees of responsiveness. +2. Build the kernel with CONFIG_RCU_BOOST=n. This prevents these + kthreads from being created in the first place. This approach + is feasible only if your workload never requires RCU priority + boosting, for example, if you ensure frequent idle time on all + CPUs that might execute within the kernel. +3. Build with CONFIG_RCU_NOCB_CPU=y and CONFIG_RCU_NOCB_CPU_ALL=y, + which offloads all RCU callbacks to kthreads that can be moved + off of CPUs susceptible to OS jitter. This approach prevents the + rcuc/%u kthreads from having any work to do, so that they are + never awakened. +4. Ensure that the CPU never enters the kernel, and, in particular, + avoid initiating any CPU hotplug operations on this CPU. This is + another way of preventing any callbacks from being queued on the + CPU, again preventing the rcuc/%u kthreads from having any work + to do. + +Name: rcuob/%d, rcuop/%d, and rcuos/%d +Purpose: Offload RCU callbacks from the corresponding CPU. +To reduce its OS jitter, do at least one of the following: +1. Use affinity, cgroups, or other mechanism to force these kthreads + to execute on some other CPU. +2. Build with CONFIG_RCU_NOCB_CPUS=n, which will prevent these + kthreads from being created in the first place. However, please + note that this will not eliminate OS jitter, but will instead + shift it to RCU_SOFTIRQ. + +Name: watchdog/%u +Purpose: Detect software lockups on each CPU. +To reduce its OS jitter, do at least one of the following: +1. Build with CONFIG_LOCKUP_DETECTOR=n, which will prevent these + kthreads from being created in the first place. +2. Echo a zero to /proc/sys/kernel/watchdog to disable the + watchdog timer. +3. Echo a large number of /proc/sys/kernel/watchdog_thresh in + order to reduce the frequency of OS jitter due to the watchdog + timer down to a level that is acceptable for your workload. diff --git a/arch/Kconfig b/arch/Kconfig index dd0e8eb8042f..a4429bcd609e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -213,6 +213,9 @@ config USE_GENERIC_SMP_HELPERS config GENERIC_SMP_IDLE_THREAD bool +config GENERIC_IDLE_POLL_SETUP + bool + # Select if arch init_task initializer is different to init/init_task.c config ARCH_INIT_TASK bool diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d423d58f938d..53d3a356f61f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -14,6 +14,7 @@ config ARM select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP + select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_IDLE_POLL_SETUP select GENERIC_STRNCPY_FROM_USER @@ -38,6 +39,7 @@ config ARM select HAVE_GENERIC_HARDIRQS select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)) select HAVE_IDE if PCI || ISA || PCMCIA + select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO @@ -488,7 +490,7 @@ config ARCH_IXP4XX config ARCH_DOVE bool "Marvell Dove" select ARCH_REQUIRE_GPIOLIB - select CPU_V7 + select CPU_PJ4 select GENERIC_CLOCKEVENTS select MIGHT_HAVE_PCI select PINCTRL diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 47374085befd..1ba358ba16b8 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -309,7 +309,7 @@ define archhelp echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' echo '* xipImage - XIP kernel image, if configured (arch/$(ARCH)/boot/xipImage)' echo ' uImage - U-Boot wrapped zImage' - echo ' bootpImage - Combined zImage and initial RAM disk' + echo ' bootpImage - Combined zImage and initial RAM disk' echo ' (supply initrd image via make variable INITRD=)' echo '* dtbs - Build device tree blobs for enabled boards' echo ' install - Install uncompressed kernel' diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c index 52b88d81b7bb..3caed0db6986 100644 --- a/arch/arm/common/mcpm_platsmp.c +++ b/arch/arm/common/mcpm_platsmp.c @@ -15,8 +15,6 @@ #include #include -#include - #include #include #include @@ -49,7 +47,6 @@ static int __cpuinit mcpm_boot_secondary(unsigned int cpu, struct task_struct *i static void __cpuinit mcpm_secondary_init(unsigned int cpu) { mcpm_cpu_powered_up(); - gic_secondary_init(0); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c index ddc740769601..023ee63827a2 100644 --- a/arch/arm/common/timer-sp.c +++ b/arch/arm/common/timer-sp.c @@ -28,8 +28,8 @@ #include #include #include +#include -#include #include #include diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h index 7eb18c1d8d6c..4f009c10540d 100644 --- a/arch/arm/include/asm/cmpxchg.h +++ b/arch/arm/include/asm/cmpxchg.h @@ -233,15 +233,15 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, ((__typeof__(*(ptr)))atomic64_cmpxchg(container_of((ptr), \ atomic64_t, \ counter), \ - (unsigned long)(o), \ - (unsigned long)(n))) + (unsigned long long)(o), \ + (unsigned long long)(n))) #define cmpxchg64_local(ptr, o, n) \ ((__typeof__(*(ptr)))local64_cmpxchg(container_of((ptr), \ local64_t, \ a), \ - (unsigned long)(o), \ - (unsigned long)(n))) + (unsigned long long)(o), \ + (unsigned long long)(n))) #endif /* __LINUX_ARM_ARCH__ >= 6 */ diff --git a/arch/arm/include/asm/sched_clock.h b/arch/arm/include/asm/sched_clock.h index 3d520ddca61b..2389b71a8e7c 100644 --- a/arch/arm/include/asm/sched_clock.h +++ b/arch/arm/include/asm/sched_clock.h @@ -1,16 +1,4 @@ -/* - * sched_clock.h: support for extending counters to full 64-bit ns counter - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. +/* You shouldn't include this file. Use linux/sched_clock.h instead. + * Temporary file until all asm/sched_clock.h users are gone */ -#ifndef ASM_SCHED_CLOCK -#define ASM_SCHED_CLOCK - -extern void sched_clock_postinit(void); -extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate); - -extern unsigned long long (*sched_clock_func)(void); - -#endif +#include diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5f3338eacad2..97cb0576d07c 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -16,7 +16,7 @@ CFLAGS_REMOVE_return_address.o = -pg # Object file lists. obj-y := elf.o entry-armv.o entry-common.o irq.o opcodes.o \ - process.o ptrace.o return_address.o sched_clock.o \ + process.o ptrace.o return_address.o \ setup.o signal.o stacktrace.o sys_arm.o time.o traps.o obj-$(CONFIG_ATAGS) += atags_parse.o diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c index 59dcdced6e30..221f07b11ccb 100644 --- a/arch/arm/kernel/arch_timer.c +++ b/arch/arm/kernel/arch_timer.c @@ -11,9 +11,9 @@ #include #include #include +#include #include -#include #include diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index abff4e9aaee0..98aee3258398 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -24,9 +24,9 @@ #include #include #include +#include #include -#include #include #include #include @@ -120,6 +120,4 @@ void __init time_init(void) machine_desc->init_time(); else clocksource_of_init(); - - sched_clock_postinit(); } diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c index bad361ec1666..7a55b5c95971 100644 --- a/arch/arm/mach-davinci/time.c +++ b/arch/arm/mach-davinci/time.c @@ -18,8 +18,8 @@ #include #include #include +#include -#include #include #include diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c index fea91313678b..cd46529e9eaa 100644 --- a/arch/arm/mach-imx/time.c +++ b/arch/arm/mach-imx/time.c @@ -26,8 +26,8 @@ #include #include #include +#include -#include #include #include "common.h" diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c index b23c8e4f28e8..aa4346227c41 100644 --- a/arch/arm/mach-integrator/integrator_ap.c +++ b/arch/arm/mach-integrator/integrator_ap.c @@ -41,6 +41,7 @@ #include #include #include +#include #include