diff --git a/[refs] b/[refs] index b155a5456c72..07297c9f6aab 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 6092848a2a23b660150a38bc06f59d75838d70c8 +refs/heads/master: 7b4967c532045a1983d6d4af5c69cc7c5109f62b diff --git a/trunk/Documentation/arm/mem_alignment b/trunk/Documentation/arm/mem_alignment index c7c7a114c78c..d145ccca169a 100644 --- a/trunk/Documentation/arm/mem_alignment +++ b/trunk/Documentation/arm/mem_alignment @@ -24,7 +24,7 @@ real bad - it changes the behaviour of all unaligned instructions in user space, and might cause programs to fail unexpectedly. To change the alignment trap behavior, simply echo a number into -/proc/cpu/alignment. The number is made up from various bits: +/proc/sys/debug/alignment. The number is made up from various bits: bit behavior when set --- ----------------- diff --git a/trunk/Documentation/controllers/cpuacct.txt b/trunk/Documentation/controllers/cpuacct.txt deleted file mode 100644 index bb775fbe43d7..000000000000 --- a/trunk/Documentation/controllers/cpuacct.txt +++ /dev/null @@ -1,32 +0,0 @@ -CPU Accounting Controller -------------------------- - -The CPU accounting controller is used to group tasks using cgroups and -account the CPU usage of these groups of tasks. - -The CPU accounting controller supports multi-hierarchy groups. An accounting -group accumulates the CPU usage of all of its child groups and the tasks -directly present in its group. - -Accounting groups can be created by first mounting the cgroup filesystem. - -# mkdir /cgroups -# mount -t cgroup -ocpuacct none /cgroups - -With the above step, the initial or the parent accounting group -becomes visible at /cgroups. At bootup, this group includes all the -tasks in the system. /cgroups/tasks lists the tasks in this cgroup. -/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by -this group which is essentially the CPU time obtained by all the tasks -in the system. - -New accounting groups can be created under the parent group /cgroups. - -# cd /cgroups -# mkdir g1 -# echo $$ > g1 - -The above steps create a new group g1 and move the current shell -process (bash) into it. CPU time consumed by this bash and its children -can be obtained from g1/cpuacct.usage and the same is accumulated in -/cgroups/cpuacct.usage also. diff --git a/trunk/Documentation/cpu-hotplug.txt b/trunk/Documentation/cpu-hotplug.txt index 9d620c153b04..94bbc27ddd4f 100644 --- a/trunk/Documentation/cpu-hotplug.txt +++ b/trunk/Documentation/cpu-hotplug.txt @@ -50,17 +50,16 @@ additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets cpu_possible_map = cpu_present_map + additional_cpus (*) Option valid only for following architectures -- ia64 +- x86_64, ia64 -ia64 uses the number of disabled local apics in ACPI tables MADT to -determine the number of potentially hot-pluggable cpus. The implementation -should only rely on this to count the # of cpus, but *MUST* not rely -on the apicid values in those tables for disabled apics. In the event -BIOS doesn't mark such hot-pluggable cpus as disabled entries, one could -use this parameter "additional_cpus=x" to represent those cpus in the -cpu_possible_map. +ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT +to determine the number of potentially hot-pluggable cpus. The implementation +should only rely on this to count the # of cpus, but *MUST* not rely on the +apicid values in those tables for disabled apics. In the event BIOS doesn't +mark such hot-pluggable cpus as disabled entries, one could use this +parameter "additional_cpus=x" to represent those cpus in the cpu_possible_map. -possible_cpus=n [s390,x86_64] use this to set hotpluggable cpus. +possible_cpus=n [s390 only] use this to set hotpluggable cpus. This option sets possible_cpus bits in cpu_possible_map. Thus keeping the numbers of bits set constant even if the machine gets rebooted. diff --git a/trunk/Documentation/ftrace.txt b/trunk/Documentation/ftrace.txt index 803b1318b13d..9cc4d685dde5 100644 --- a/trunk/Documentation/ftrace.txt +++ b/trunk/Documentation/ftrace.txt @@ -82,7 +82,7 @@ of ftrace. Here is a list of some of the key files: tracer is not adding more data, they will display the same information every time they are read. - trace_options: This file lets the user control the amount of data + iter_ctrl: This file lets the user control the amount of data that is displayed in one of the above output files. @@ -94,10 +94,10 @@ of ftrace. Here is a list of some of the key files: only be recorded if the latency is greater than the value in this file. (in microseconds) - buffer_size_kb: This sets or displays the number of kilobytes each CPU + trace_entries: This sets or displays the number of bytes each CPU buffer can hold. The tracer buffers are the same size for each CPU. The displayed number is the size of the - CPU buffer and not total size of all buffers. The + CPU buffer and not total size of all buffers. The trace buffers are allocated in pages (blocks of memory that the kernel uses for allocation, usually 4 KB in size). If the last page allocated has room for more bytes @@ -127,8 +127,6 @@ of ftrace. Here is a list of some of the key files: be traced. If a function exists in both set_ftrace_filter and set_ftrace_notrace, the function will _not_ be traced. - set_ftrace_pid: Have the function tracer only trace a single thread. - available_filter_functions: This lists the functions that ftrace has processed and can trace. These are the function names that you can pass to "set_ftrace_filter" or @@ -318,23 +316,23 @@ The above is mostly meaningful for kernel developers. The rest is the same as the 'trace' file. -trace_options -------------- +iter_ctrl +--------- -The trace_options file is used to control what gets printed in the trace +The iter_ctrl file is used to control what gets printed in the trace output. To see what is available, simply cat the file: - cat /debug/tracing/trace_options + cat /debug/tracing/iter_ctrl print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ - noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj + noblock nostacktrace nosched-tree To disable one of the options, echo in the option prepended with "no". - echo noprint-parent > /debug/tracing/trace_options + echo noprint-parent > /debug/tracing/iter_ctrl To enable an option, leave off the "no". - echo sym-offset > /debug/tracing/trace_options + echo sym-offset > /debug/tracing/iter_ctrl Here are the available options: @@ -380,20 +378,6 @@ Here are the available options: When a trace is recorded, so is the stack of functions. This allows for back traces of trace sites. - userstacktrace - This option changes the trace. - It records a stacktrace of the current userspace thread. - - sym-userobj - when user stacktrace are enabled, look up which object the - address belongs to, and print a relative address - This is especially useful when ASLR is on, otherwise you don't - get a chance to resolve the address to object/file/line after the app is no - longer running - - The lookup is performed when you read trace,trace_pipe,latency_trace. Example: - - a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0 -x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] - sched-tree - TBD (any users??) @@ -1075,83 +1059,6 @@ For simple one time traces, the above is sufficent. For anything else, a search through /proc/mounts may be needed to find where the debugfs file-system is mounted. - -Single thread tracing ---------------------- - -By writing into /debug/tracing/set_ftrace_pid you can trace a -single thread. For example: - -# cat /debug/tracing/set_ftrace_pid -no pid -# echo 3111 > /debug/tracing/set_ftrace_pid -# cat /debug/tracing/set_ftrace_pid -3111 -# echo function > /debug/tracing/current_tracer -# cat /debug/tracing/trace | head - # tracer: function - # - # TASK-PID CPU# TIMESTAMP FUNCTION - # | | | | | - yum-updatesd-3111 [003] 1637.254676: finish_task_switch <-thread_return - yum-updatesd-3111 [003] 1637.254681: hrtimer_cancel <-schedule_hrtimeout_range - yum-updatesd-3111 [003] 1637.254682: hrtimer_try_to_cancel <-hrtimer_cancel - yum-updatesd-3111 [003] 1637.254683: lock_hrtimer_base <-hrtimer_try_to_cancel - yum-updatesd-3111 [003] 1637.254685: fget_light <-do_sys_poll - yum-updatesd-3111 [003] 1637.254686: pipe_poll <-do_sys_poll -# echo -1 > /debug/tracing/set_ftrace_pid -# cat /debug/tracing/trace |head - # tracer: function - # - # TASK-PID CPU# TIMESTAMP FUNCTION - # | | | | | - ##### CPU 3 buffer started #### - yum-updatesd-3111 [003] 1701.957688: free_poll_entry <-poll_freewait - yum-updatesd-3111 [003] 1701.957689: remove_wait_queue <-free_poll_entry - yum-updatesd-3111 [003] 1701.957691: fput <-free_poll_entry - yum-updatesd-3111 [003] 1701.957692: audit_syscall_exit <-sysret_audit - yum-updatesd-3111 [003] 1701.957693: path_put <-audit_syscall_exit - -If you want to trace a function when executing, you could use -something like this simple program: - -#include -#include -#include -#include -#include -#include - -int main (int argc, char **argv) -{ - if (argc < 1) - exit(-1); - - if (fork() > 0) { - int fd, ffd; - char line[64]; - int s; - - ffd = open("/debug/tracing/current_tracer", O_WRONLY); - if (ffd < 0) - exit(-1); - write(ffd, "nop", 3); - - fd = open("/debug/tracing/set_ftrace_pid", O_WRONLY); - s = sprintf(line, "%d\n", getpid()); - write(fd, line, s); - - write(ffd, "function", 8); - - close(fd); - close(ffd); - - execvp(argv[1], argv+1); - } - - return 0; -} - dynamic ftrace -------------- @@ -1251,11 +1158,7 @@ These are the only wild cards which are supported. * will not work. -Note: It is better to use quotes to enclose the wild cards, otherwise - the shell may expand the parameters into names of files in the local - directory. - - # echo 'hrtimer_*' > /debug/tracing/set_ftrace_filter + # echo hrtimer_* > /debug/tracing/set_ftrace_filter Produces: @@ -1310,7 +1213,7 @@ Again, now we want to append. # echo sys_nanosleep > /debug/tracing/set_ftrace_filter # cat /debug/tracing/set_ftrace_filter sys_nanosleep - # echo 'hrtimer_*' >> /debug/tracing/set_ftrace_filter + # echo hrtimer_* >> /debug/tracing/set_ftrace_filter # cat /debug/tracing/set_ftrace_filter hrtimer_run_queues hrtimer_run_pending @@ -1396,29 +1299,41 @@ trace entries ------------- Having too much or not enough data can be troublesome in diagnosing -an issue in the kernel. The file buffer_size_kb is used to modify +an issue in the kernel. The file trace_entries is used to modify the size of the internal trace buffers. The number listed is the number of entries that can be recorded per CPU. To know the full size, multiply the number of possible CPUS with the number of entries. - # cat /debug/tracing/buffer_size_kb -1408 (units kilobytes) + # cat /debug/tracing/trace_entries +65620 Note, to modify this, you must have tracing completely disabled. To do that, echo "nop" into the current_tracer. If the current_tracer is not set to "nop", an EINVAL error will be returned. # echo nop > /debug/tracing/current_tracer - # echo 10000 > /debug/tracing/buffer_size_kb - # cat /debug/tracing/buffer_size_kb -10000 (units kilobytes) + # echo 100000 > /debug/tracing/trace_entries + # cat /debug/tracing/trace_entries +100045 + + +Notice that we echoed in 100,000 but the size is 100,045. The entries +are held in individual pages. It allocates the number of pages it takes +to fulfill the request. If more entries may fit on the last page +then they will be added. + + # echo 1 > /debug/tracing/trace_entries + # cat /debug/tracing/trace_entries +85 + +This shows us that 85 entries can fit in a single page. The number of pages which will be allocated is limited to a percentage of available memory. Allocating too much will produce an error. - # echo 1000000000000 > /debug/tracing/buffer_size_kb + # echo 1000000000000 > /debug/tracing/trace_entries -bash: echo: write error: Cannot allocate memory - # cat /debug/tracing/buffer_size_kb + # cat /debug/tracing/trace_entries 85 diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index 2919a2e91938..e0f346d201ed 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -750,14 +750,6 @@ and is between 256 and 4096 characters. It is defined in the file parameter will force ia64_sal_cache_flush to call ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. - ftrace=[tracer] - [ftrace] will set and start the specified tracer - as early as possible in order to facilitate early - boot debugging. - - ftrace_dump_on_oops - [ftrace] will dump the trace buffers on oops. - gamecon.map[2|3]= [HW,JOY] Multisystem joystick and NES/SNES/PSX pad support via parallel port (up to 5 devices per port) diff --git a/trunk/Documentation/lockstat.txt b/trunk/Documentation/lockstat.txt index 9cb9138f7a79..4ba4664ce5c3 100644 --- a/trunk/Documentation/lockstat.txt +++ b/trunk/Documentation/lockstat.txt @@ -71,50 +71,35 @@ Look at the current lock statistics: # less /proc/lock_stat -01 lock_stat version 0.3 +01 lock_stat version 0.2 02 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 03 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total 04 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 05 -06 &mm->mmap_sem-W: 233 538 18446744073708 22924.27 607243.51 1342 45806 1.71 8595.89 1180582.34 -07 &mm->mmap_sem-R: 205 587 18446744073708 28403.36 731975.00 1940 412426 0.58 187825.45 6307502.88 -08 --------------- -09 &mm->mmap_sem 487 [] do_page_fault+0x466/0x928 -10 &mm->mmap_sem 179 [] sys_mprotect+0xcd/0x21d -11 &mm->mmap_sem 279 [] sys_mmap+0x75/0xce -12 &mm->mmap_sem 76 [] sys_munmap+0x32/0x59 -13 --------------- -14 &mm->mmap_sem 270 [] sys_mmap+0x75/0xce -15 &mm->mmap_sem 431 [] do_page_fault+0x466/0x928 -16 &mm->mmap_sem 138 [] sys_munmap+0x32/0x59 -17 &mm->mmap_sem 145 [] sys_mprotect+0xcd/0x21d -18 -19 ............................................................................................................................................................................................... -20 -21 dcache_lock: 621 623 0.52 118.26 1053.02 6745 91930 0.29 316.29 118423.41 -22 ----------- -23 dcache_lock 179 [] _atomic_dec_and_lock+0x34/0x54 -24 dcache_lock 113 [] d_alloc+0x19a/0x1eb -25 dcache_lock 99 [] d_rehash+0x1b/0x44 -26 dcache_lock 104 [] d_instantiate+0x36/0x8a -27 ----------- -28 dcache_lock 192 [] _atomic_dec_and_lock+0x34/0x54 -29 dcache_lock 98 [] d_rehash+0x1b/0x44 -30 dcache_lock 72 [] d_alloc+0x19a/0x1eb -31 dcache_lock 112 [] d_instantiate+0x36/0x8a +06 &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60 +07 &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38 +08 -------------------------- +09 &inode->i_data.tree_lock 0 [] add_to_page_cache+0x5f/0x190 +10 +11 ............................................................................................................................................................................................... +12 +13 dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 +14 ----------- +15 dcache_lock 180 [] sys_getcwd+0x11e/0x230 +16 dcache_lock 165 [] d_alloc+0x15a/0x210 +17 dcache_lock 33 [] _atomic_dec_and_lock+0x4d/0x70 +18 dcache_lock 1 [] shrink_dcache_parent+0x18/0x130 This excerpt shows the first two lock class statistics. Line 01 shows the output version - each time the format changes this will be updated. Line 02-04 -show the header with column descriptions. Lines 05-18 and 20-31 show the actual +show the header with column descriptions. Lines 05-10 and 13-18 show the actual statistics. These statistics come in two parts; the actual stats separated by a -short separator (line 08, 13) from the contention points. +short separator (line 08, 14) from the contention points. -The first lock (05-18) is a read/write lock, and shows two lines above the +The first lock (05-10) is a read/write lock, and shows two lines above the short separator. The contention points don't match the column descriptors, -they have two: contentions and [] symbol. The second set of contention -points are the points we're contending with. +they have two: contentions and [] symbol. -The integer part of the time values is in us. View the top contending locks: diff --git a/trunk/Documentation/markers.txt b/trunk/Documentation/markers.txt index d2b3d0e91b26..089f6138fcd9 100644 --- a/trunk/Documentation/markers.txt +++ b/trunk/Documentation/markers.txt @@ -51,16 +51,11 @@ to call) for the specific marker through marker_probe_register() and can be activated by calling marker_arm(). Marker deactivation can be done by calling marker_disarm() as many times as marker_arm() has been called. Removing a probe is done through marker_probe_unregister(); it will disarm the probe. - -marker_synchronize_unregister() must be called between probe unregistration and -the first occurrence of -- the end of module exit function, - to make sure there is no caller left using the probe; -- the free of any resource used by the probes, - to make sure the probes wont be accessing invalid data. -This, and the fact that preemption is disabled around the probe call, make sure -that probe removal and module unload are safe. See the "Probe example" section -below for a sample probe module. +marker_synchronize_unregister() must be called before the end of the module exit +function to make sure there is no caller left using the probe. This, and the +fact that preemption is disabled around the probe call, make sure that probe +removal and module unload are safe. See the "Probe example" section below for a +sample probe module. The marker mechanism supports inserting multiple instances of the same marker. Markers can be put in inline functions, inlined static functions, and @@ -75,20 +70,6 @@ a printk warning which identifies the inconsistency: "Format mismatch for probe probe_name (format), marker (format)" -Another way to use markers is to simply define the marker without generating any -function call to actually call into the marker. This is useful in combination -with tracepoint probes in a scheme like this : - -void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk); - -DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name, - "arg1 %u pid %d"); - -notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk) -{ - struct marker *marker = &GET_MARKER(kernel_irq_entry); - /* write data to trace buffers ... */ -} * Probe / marker example diff --git a/trunk/Documentation/scheduler/sched-arch.txt b/trunk/Documentation/scheduler/sched-arch.txt index d43dbcbd163b..941615a9769b 100644 --- a/trunk/Documentation/scheduler/sched-arch.txt +++ b/trunk/Documentation/scheduler/sched-arch.txt @@ -8,7 +8,7 @@ Context switch By default, the switch_to arch function is called with the runqueue locked. This is usually not a problem unless switch_to may need to take the runqueue lock. This is usually due to a wake up operation in -the context switch. See arch/ia64/include/asm/system.h for an example. +the context switch. See include/asm-ia64/system.h for an example. To request the scheduler call switch_to with the runqueue unlocked, you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file @@ -23,7 +23,7 @@ disabled. Interrupts may be enabled over the call if it is likely to introduce a significant interrupt latency by adding the line `#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for unlocked context switches. This define also implies -`__ARCH_WANT_UNLOCKED_CTXSW`. See arch/arm/include/asm/system.h for an +`__ARCH_WANT_UNLOCKED_CTXSW`. See include/asm-arm/system.h for an example. diff --git a/trunk/Documentation/tracepoints.txt b/trunk/Documentation/tracepoints.txt index 6f0a044f5b5e..5d354e167494 100644 --- a/trunk/Documentation/tracepoints.txt +++ b/trunk/Documentation/tracepoints.txt @@ -3,30 +3,28 @@ Mathieu Desnoyers -This document introduces Linux Kernel Tracepoints and their use. It -provides examples of how to insert tracepoints in the kernel and -connect probe functions to them and provides some examples of probe -functions. +This document introduces Linux Kernel Tracepoints and their use. It provides +examples of how to insert tracepoints in the kernel and connect probe functions +to them and provides some examples of probe functions. * Purpose of tracepoints -A tracepoint placed in code provides a hook to call a function (probe) -that you can provide at runtime. A tracepoint can be "on" (a probe is -connected to it) or "off" (no probe is attached). When a tracepoint is -"off" it has no effect, except for adding a tiny time penalty -(checking a condition for a branch) and space penalty (adding a few -bytes for the function call at the end of the instrumented function -and adds a data structure in a separate section). When a tracepoint -is "on", the function you provide is called each time the tracepoint -is executed, in the execution context of the caller. When the function -provided ends its execution, it returns to the caller (continuing from -the tracepoint site). +A tracepoint placed in code provides a hook to call a function (probe) that you +can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or +"off" (no probe is attached). When a tracepoint is "off" it has no effect, +except for adding a tiny time penalty (checking a condition for a branch) and +space penalty (adding a few bytes for the function call at the end of the +instrumented function and adds a data structure in a separate section). When a +tracepoint is "on", the function you provide is called each time the tracepoint +is executed, in the execution context of the caller. When the function provided +ends its execution, it returns to the caller (continuing from the tracepoint +site). You can put tracepoints at important locations in the code. They are lightweight hooks that can pass an arbitrary number of parameters, -which prototypes are described in a tracepoint declaration placed in a -header file. +which prototypes are described in a tracepoint declaration placed in a header +file. They can be used for tracing and performance accounting. @@ -44,16 +42,14 @@ In include/trace/subsys.h : #include -DECLARE_TRACE(subsys_eventname, - TPPROTO(int firstarg, struct task_struct *p), +DEFINE_TRACE(subsys_eventname, + TPPTOTO(int firstarg, struct task_struct *p), TPARGS(firstarg, p)); In subsys/file.c (where the tracing statement must be added) : #include -DEFINE_TRACE(subsys_eventname); - void somefct(void) { ... @@ -65,41 +61,31 @@ Where : - subsys_eventname is an identifier unique to your event - subsys is the name of your subsystem. - eventname is the name of the event to trace. +- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function + called by this tracepoint. +- TPARGS(firstarg, p) are the parameters names, same as found in the prototype. -- TPPROTO(int firstarg, struct task_struct *p) is the prototype of the - function called by this tracepoint. - -- TPARGS(firstarg, p) are the parameters names, same as found in the - prototype. - -Connecting a function (probe) to a tracepoint is done by providing a -probe (function to call) for the specific tracepoint through +Connecting a function (probe) to a tracepoint is done by providing a probe +(function to call) for the specific tracepoint through register_trace_subsys_eventname(). Removing a probe is done through -unregister_trace_subsys_eventname(); it will remove the probe. - -tracepoint_synchronize_unregister() must be called before the end of -the module exit function to make sure there is no caller left using -the probe. This, and the fact that preemption is disabled around the -probe call, make sure that probe removal and module unload are safe. -See the "Probe example" section below for a sample probe module. - -The tracepoint mechanism supports inserting multiple instances of the -same tracepoint, but a single definition must be made of a given -tracepoint name over all the kernel to make sure no type conflict will -occur. Name mangling of the tracepoints is done using the prototypes -to make sure typing is correct. Verification of probe type correctness -is done at the registration site by the compiler. Tracepoints can be -put in inline functions, inlined static functions, and unrolled loops -as well as regular functions. - -The naming scheme "subsys_event" is suggested here as a convention -intended to limit collisions. Tracepoint names are global to the -kernel: they are considered as being the same whether they are in the -core kernel image or in modules. - -If the tracepoint has to be used in kernel modules, an -EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be -used to export the defined tracepoints. +unregister_trace_subsys_eventname(); it will remove the probe sure there is no +caller left using the probe when it returns. Probe removal is preempt-safe +because preemption is disabled around the probe call. See the "Probe example" +section below for a sample probe module. + +The tracepoint mechanism supports inserting multiple instances of the same +tracepoint, but a single definition must be made of a given tracepoint name over +all the kernel to make sure no type conflict will occur. Name mangling of the +tracepoints is done using the prototypes to make sure typing is correct. +Verification of probe type correctness is done at the registration site by the +compiler. Tracepoints can be put in inline functions, inlined static functions, +and unrolled loops as well as regular functions. + +The naming scheme "subsys_event" is suggested here as a convention intended +to limit collisions. Tracepoint names are global to the kernel: they are +considered as being the same whether they are in the core kernel image or in +modules. + * Probe / tracepoint example diff --git a/trunk/Documentation/usb/gadget_serial.txt b/trunk/Documentation/usb/gadget_serial.txt index eac7df94d8e3..9b22bd14c348 100644 --- a/trunk/Documentation/usb/gadget_serial.txt +++ b/trunk/Documentation/usb/gadget_serial.txt @@ -114,11 +114,11 @@ modules. Then you must load the gadget serial driver. To load it as an ACM device (recommended for interoperability), do this: - modprobe g_serial + modprobe g_serial use_acm=1 To load it as a vendor specific bulk in/out device, do this: - modprobe g_serial use_acm=0 + modprobe g_serial This will also automatically load the underlying gadget peripheral controller driver. This must be done each time you reboot the gadget diff --git a/trunk/Documentation/usb/proc_usb_info.txt b/trunk/Documentation/usb/proc_usb_info.txt index fafcd4723260..077e9032d0cd 100644 --- a/trunk/Documentation/usb/proc_usb_info.txt +++ b/trunk/Documentation/usb/proc_usb_info.txt @@ -49,10 +49,8 @@ it and 002/048 sometime later. These files can be read as binary data. The binary data consists of first the device descriptor, then the descriptors for each -configuration of the device. Multi-byte fields in the device and -configuration descriptors, but not other descriptors, are converted -to host endianness by the kernel. This information is also shown -in text form by the /proc/bus/usb/devices file, described later. +configuration of the device. That information is also shown in +text form by the /proc/bus/usb/devices file, described later. These files may also be used to write user-level drivers for the USB devices. You would open the /proc/bus/usb/BBB/DDD file read/write, diff --git a/trunk/Documentation/usb/usbmon.txt b/trunk/Documentation/usb/usbmon.txt index 270481906dc8..2917ce4ffdc4 100644 --- a/trunk/Documentation/usb/usbmon.txt +++ b/trunk/Documentation/usb/usbmon.txt @@ -34,12 +34,11 @@ if usbmon is built into the kernel. Verify that bus sockets are present. # ls /sys/kernel/debug/usbmon -0s 0u 1s 1t 1u 2s 2t 2u 3s 3t 3u 4s 4t 4u +0s 0t 0u 1s 1t 1u 2s 2t 2u 3s 3t 3u 4s 4t 4u # -Now you can choose to either use the socket '0u' (to capture packets on all -buses), and skip to step #3, or find the bus used by your device with step #2. -This allows to filter away annoying devices that talk continuously. +Now you can choose to either use the sockets numbered '0' (to capture packets on +all buses), and skip to step #3, or find the bus used by your device with step #2. 2. Find which bus connects to the desired device @@ -100,9 +99,8 @@ on the event type, but there is a set of words, common for all types. Here is the list of words, from left to right: -- URB Tag. This is used to identify URBs, and is normally an in-kernel address - of the URB structure in hexadecimal, but can be a sequence number or any - other unique string, within reason. +- URB Tag. This is used to identify URBs is normally a kernel mode address + of the URB structure in hexadecimal. - Timestamp in microseconds, a decimal number. The timestamp's resolution depends on available clock, and so it can be much worse than a microsecond diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index 09ed704f4dda..24741de12a39 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -1527,10 +1527,10 @@ W: http://ebtables.sourceforge.net/ S: Maintained ECRYPT FILE SYSTEM -P: Tyler Hicks, Dustin Kirkland -M: tyhicks@linux.vnet.ibm.com, kirkland@canonical.com -L: ecryptfs-devel@lists.launchpad.net -W: https://launchpad.net/ecryptfs +P: Mike Halcrow, Phillip Hellewell +M: mhalcrow@us.ibm.com, phillip@hellewell.homeip.net +L: ecryptfs-devel@lists.sourceforge.net +W: http://ecryptfs.sourceforge.net/ S: Supported EDAC-CORE diff --git a/trunk/arch/arm/common/sa1111.c b/trunk/arch/arm/common/sa1111.c index ef12794c3c68..47ccec95f3e8 100644 --- a/trunk/arch/arm/common/sa1111.c +++ b/trunk/arch/arm/common/sa1111.c @@ -630,7 +630,7 @@ __sa1111_probe(struct device *me, struct resource *mem, int irq) return -ENOMEM; sachip->clk = clk_get(me, "SA1111_CLK"); - if (IS_ERR(sachip->clk)) { + if (!sachip->clk) { ret = PTR_ERR(sachip->clk); goto err_free; } diff --git a/trunk/arch/arm/kernel/armksyms.c b/trunk/arch/arm/kernel/armksyms.c index 23af3c972c9a..c74f766ffc12 100644 --- a/trunk/arch/arm/kernel/armksyms.c +++ b/trunk/arch/arm/kernel/armksyms.c @@ -115,8 +115,6 @@ EXPORT_SYMBOL(__strnlen_user); EXPORT_SYMBOL(__strncpy_from_user); #ifdef CONFIG_MMU -EXPORT_SYMBOL(copy_page); - EXPORT_SYMBOL(__copy_from_user); EXPORT_SYMBOL(__copy_to_user); EXPORT_SYMBOL(__clear_user); @@ -183,6 +181,8 @@ EXPORT_SYMBOL(_find_first_bit_be); EXPORT_SYMBOL(_find_next_bit_be); #endif +EXPORT_SYMBOL(copy_page); + #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(mcount); #endif diff --git a/trunk/arch/arm/kernel/traps.c b/trunk/arch/arm/kernel/traps.c index 79abc4ddc0cf..57e6874d0b80 100644 --- a/trunk/arch/arm/kernel/traps.c +++ b/trunk/arch/arm/kernel/traps.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include diff --git a/trunk/arch/arm/mach-pxa/include/mach/reset.h b/trunk/arch/arm/mach-pxa/include/mach/reset.h index 31e6a7b6ad80..7b8842cfa5fc 100644 --- a/trunk/arch/arm/mach-pxa/include/mach/reset.h +++ b/trunk/arch/arm/mach-pxa/include/mach/reset.h @@ -12,8 +12,9 @@ extern void clear_reset_status(unsigned int mask); /** * init_gpio_reset() - register GPIO as reset generator - * @gpio: gpio nr - * @output: set gpio as out/low instead of input during normal work + * + * @gpio - gpio nr + * @output - set gpio as out/low instead of input during normal work */ extern int init_gpio_reset(int gpio, int output); diff --git a/trunk/arch/arm/mm/fault.c b/trunk/arch/arm/mm/fault.c index 22c9530e91e2..2df8d9facf57 100644 --- a/trunk/arch/arm/mm/fault.c +++ b/trunk/arch/arm/mm/fault.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/trunk/arch/ia64/Kconfig b/trunk/arch/ia64/Kconfig index 7fa8f615ba6e..6bd91ed7cd03 100644 --- a/trunk/arch/ia64/Kconfig +++ b/trunk/arch/ia64/Kconfig @@ -99,7 +99,7 @@ config GENERIC_IOMAP bool default y -config SCHED_OMIT_FRAME_POINTER +config SCHED_NO_NO_OMIT_FRAME_POINTER bool default y diff --git a/trunk/arch/ia64/hp/sim/Kconfig b/trunk/arch/ia64/hp/sim/Kconfig index 8d513a8c5266..f92306bbedb8 100644 --- a/trunk/arch/ia64/hp/sim/Kconfig +++ b/trunk/arch/ia64/hp/sim/Kconfig @@ -4,7 +4,6 @@ menu "HP Simulator drivers" config HP_SIMETH bool "Simulated Ethernet " - depends on NET config HP_SIMSERIAL bool "Simulated serial driver support" diff --git a/trunk/arch/ia64/include/asm/topology.h b/trunk/arch/ia64/include/asm/topology.h index a3cc9f65f954..35bcb641c9e5 100644 --- a/trunk/arch/ia64/include/asm/topology.h +++ b/trunk/arch/ia64/include/asm/topology.h @@ -55,6 +55,7 @@ void build_cpu_to_node_map(void); #define SD_CPU_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ @@ -79,6 +80,7 @@ void build_cpu_to_node_map(void); /* sched_domains SD_NODE_INIT for IA64 NUMA machines */ #define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/trunk/arch/m32r/Kconfig b/trunk/arch/m32r/Kconfig index cabba332cc48..17a6dab09319 100644 --- a/trunk/arch/m32r/Kconfig +++ b/trunk/arch/m32r/Kconfig @@ -274,7 +274,7 @@ config GENERIC_CALIBRATE_DELAY bool default y -config SCHED_OMIT_FRAME_POINTER +config SCHED_NO_NO_OMIT_FRAME_POINTER bool default y diff --git a/trunk/arch/mips/Kconfig b/trunk/arch/mips/Kconfig index a5255e7c79e0..f4af967a6b30 100644 --- a/trunk/arch/mips/Kconfig +++ b/trunk/arch/mips/Kconfig @@ -653,7 +653,7 @@ config GENERIC_CMOS_UPDATE bool default y -config SCHED_OMIT_FRAME_POINTER +config SCHED_NO_NO_OMIT_FRAME_POINTER bool default y diff --git a/trunk/arch/mips/Kconfig.debug b/trunk/arch/mips/Kconfig.debug index 364ca8938807..765c8e287d2b 100644 --- a/trunk/arch/mips/Kconfig.debug +++ b/trunk/arch/mips/Kconfig.debug @@ -48,7 +48,7 @@ config RUNTIME_DEBUG help If you say Y here, some debugging macros will do run-time checking. If you say N here, those macros will mostly turn to no-ops. See - arch/mips/include/asm/debug.h for debugging macros. + include/asm-mips/debug.h for debuging macros. If unsure, say N. endmenu diff --git a/trunk/arch/mips/configs/ip32_defconfig b/trunk/arch/mips/configs/ip32_defconfig index de4c7a0a96dd..fe4699df9626 100644 --- a/trunk/arch/mips/configs/ip32_defconfig +++ b/trunk/arch/mips/configs/ip32_defconfig @@ -1,71 +1,71 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.28-rc7 -# Wed Dec 10 14:39:08 2008 +# Linux kernel version: 2.6.20 +# Tue Feb 20 21:47:33 2007 # CONFIG_MIPS=y # # Machine selection # -# CONFIG_MACH_ALCHEMY is not set +CONFIG_ZONE_DMA=y +# CONFIG_MIPS_MTX1 is not set +# CONFIG_MIPS_BOSPORUS is not set +# CONFIG_MIPS_PB1000 is not set +# CONFIG_MIPS_PB1100 is not set +# CONFIG_MIPS_PB1500 is not set +# CONFIG_MIPS_PB1550 is not set +# CONFIG_MIPS_PB1200 is not set +# CONFIG_MIPS_DB1000 is not set +# CONFIG_MIPS_DB1100 is not set +# CONFIG_MIPS_DB1500 is not set +# CONFIG_MIPS_DB1550 is not set +# CONFIG_MIPS_DB1200 is not set +# CONFIG_MIPS_MIRAGE is not set # CONFIG_BASLER_EXCITE is not set -# CONFIG_BCM47XX is not set # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MACH_JAZZ is not set -# CONFIG_LASAT is not set -# CONFIG_LEMOTE_FULONG is not set # CONFIG_MIPS_MALTA is not set +# CONFIG_WR_PPMC is not set # CONFIG_MIPS_SIM is not set -# CONFIG_MACH_EMMA is not set -# CONFIG_MACH_VR41XX is not set -# CONFIG_NXP_STB220 is not set -# CONFIG_NXP_STB225 is not set +# CONFIG_MOMENCO_JAGUAR_ATX is not set +# CONFIG_MIPS_XXS1500 is not set # CONFIG_PNX8550_JBS is not set # CONFIG_PNX8550_STB810 is not set -# CONFIG_PMC_MSP is not set +# CONFIG_MACH_VR41XX is not set # CONFIG_PMC_YOSEMITE is not set +# CONFIG_MARKEINS is not set # CONFIG_SGI_IP22 is not set # CONFIG_SGI_IP27 is not set -# CONFIG_SGI_IP28 is not set CONFIG_SGI_IP32=y -# CONFIG_SIBYTE_CRHINE is not set -# CONFIG_SIBYTE_CARMEL is not set -# CONFIG_SIBYTE_CRHONE is not set -# CONFIG_SIBYTE_RHONE is not set +# CONFIG_SIBYTE_BIGSUR is not set # CONFIG_SIBYTE_SWARM is not set -# CONFIG_SIBYTE_LITTLESUR is not set # CONFIG_SIBYTE_SENTOSA is not set -# CONFIG_SIBYTE_BIGSUR is not set +# CONFIG_SIBYTE_RHONE is not set +# CONFIG_SIBYTE_CARMEL is not set +# CONFIG_SIBYTE_LITTLESUR is not set +# CONFIG_SIBYTE_CRHINE is not set +# CONFIG_SIBYTE_CRHONE is not set # CONFIG_SNI_RM is not set -# CONFIG_MACH_TX39XX is not set -# CONFIG_MACH_TX49XX is not set -# CONFIG_MIKROTIK_RB532 is not set -# CONFIG_WR_PPMC is not set +# CONFIG_TOSHIBA_JMR3927 is not set +# CONFIG_TOSHIBA_RBTX4927 is not set +# CONFIG_TOSHIBA_RBTX4938 is not set CONFIG_RWSEM_GENERIC_SPINLOCK=y # CONFIG_ARCH_HAS_ILOG2_U32 is not set # CONFIG_ARCH_HAS_ILOG2_U64 is not set -CONFIG_ARCH_SUPPORTS_OPROFILE=y CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_TIME=y -CONFIG_GENERIC_CMOS_UPDATE=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y # CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ is not set CONFIG_ARC=y -CONFIG_CEVT_R4K=y -CONFIG_CSRC_R4K=y CONFIG_DMA_NONCOHERENT=y CONFIG_DMA_NEED_PCI_MAP_STATE=y -# CONFIG_HOTPLUG_CPU is not set -# CONFIG_NO_IOPORT is not set CONFIG_CPU_BIG_ENDIAN=y # CONFIG_CPU_LITTLE_ENDIAN is not set CONFIG_SYS_SUPPORTS_BIG_ENDIAN=y -CONFIG_IRQ_CPU=y CONFIG_ARC32=y CONFIG_BOOT_ELF32=y CONFIG_MIPS_L1_CACHE_SHIFT=5 @@ -75,7 +75,6 @@ CONFIG_ARC_PROMLIB=y # # CPU selection # -# CONFIG_CPU_LOONGSON2 is not set # CONFIG_CPU_MIPS32_R1 is not set # CONFIG_CPU_MIPS32_R2 is not set # CONFIG_CPU_MIPS64_R1 is not set @@ -88,7 +87,6 @@ CONFIG_ARC_PROMLIB=y # CONFIG_CPU_TX49XX is not set CONFIG_CPU_R5000=y # CONFIG_CPU_R5432 is not set -# CONFIG_CPU_R5500 is not set # CONFIG_CPU_R6000 is not set # CONFIG_CPU_NEVADA is not set # CONFIG_CPU_R8000 is not set @@ -118,73 +116,65 @@ CONFIG_RM7000_CPU_SCACHE=y CONFIG_MIPS_MT_DISABLED=y # CONFIG_MIPS_MT_SMP is not set # CONFIG_MIPS_MT_SMTC is not set +# CONFIG_MIPS_VPE_LOADER is not set CONFIG_CPU_HAS_LLSC=y CONFIG_CPU_HAS_SYNC=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_ARCH_FLATMEM_ENABLE=y -CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y # CONFIG_DISCONTIGMEM_MANUAL is not set # CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y -CONFIG_PAGEFLAGS_EXTENDED=y +# CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_RESOURCES_64BIT=y -CONFIG_PHYS_ADDR_T_64BIT=y -CONFIG_ZONE_DMA_FLAG=0 -CONFIG_VIRT_TO_BUS=y -CONFIG_UNEVICTABLE_LRU=y -# CONFIG_NO_HZ is not set -# CONFIG_HIGH_RES_TIMERS is not set -CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_ZONE_DMA_FLAG=1 # CONFIG_HZ_48 is not set # CONFIG_HZ_100 is not set # CONFIG_HZ_128 is not set -CONFIG_HZ_250=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_256 is not set -# CONFIG_HZ_1000 is not set +CONFIG_HZ_1000=y # CONFIG_HZ_1024 is not set CONFIG_SYS_SUPPORTS_ARBIT_HZ=y -CONFIG_HZ=250 -CONFIG_PREEMPT_NONE=y -# CONFIG_PREEMPT_VOLUNTARY is not set +CONFIG_HZ=1000 +# CONFIG_PREEMPT_NONE is not set +CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set # CONFIG_KEXEC is not set -# CONFIG_SECCOMP is not set CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # -# General setup +# Code maturity level options # CONFIG_EXPERIMENTAL=y CONFIG_BROKEN_ON_SMP=y CONFIG_INIT_ENV_ARG_LIMIT=32 + +# +# General setup +# CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y +# CONFIG_IPC_NS is not set CONFIG_SYSVIPC_SYSCTL=y -CONFIG_POSIX_MQUEUE=y +# CONFIG_POSIX_MQUEUE is not set CONFIG_BSD_PROCESS_ACCT=y # CONFIG_BSD_PROCESS_ACCT_V3 is not set # CONFIG_TASKSTATS is not set -CONFIG_AUDIT=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CGROUPS is not set -# CONFIG_GROUP_SCHED is not set +# CONFIG_UTS_NS is not set +# CONFIG_AUDIT is not set +# CONFIG_IKCONFIG is not set CONFIG_SYSFS_DEPRECATED=y -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_RELAY=y -# CONFIG_NAMESPACES is not set -# CONFIG_BLK_DEV_INITRD is not set -CONFIG_CC_OPTIMIZE_FOR_SIZE=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y CONFIG_EMBEDDED=y CONFIG_SYSCTL_SYSCALL=y @@ -194,43 +184,27 @@ CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y -CONFIG_PCSPKR_PLATFORM=y -CONFIG_COMPAT_BRK=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y -CONFIG_ANON_INODES=y CONFIG_EPOLL=y -CONFIG_SIGNALFD=y -CONFIG_TIMERFD=y -CONFIG_EVENTFD=y CONFIG_SHMEM=y -CONFIG_AIO=y -CONFIG_VM_EVENT_COUNTERS=y -CONFIG_PCI_QUIRKS=y CONFIG_SLAB=y -# CONFIG_SLUB is not set -# CONFIG_SLOB is not set -CONFIG_PROFILING=y -# CONFIG_MARKERS is not set -CONFIG_OPROFILE=m -CONFIG_HAVE_OPROFILE=y -# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set -CONFIG_SLABINFO=y +CONFIG_VM_EVENT_COUNTERS=y CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 -CONFIG_MODULES=y -# CONFIG_MODULE_FORCE_LOAD is not set -CONFIG_MODULE_UNLOAD=y -# CONFIG_MODULE_FORCE_UNLOAD is not set -# CONFIG_MODVERSIONS is not set -# CONFIG_MODULE_SRCVERSION_ALL is not set -CONFIG_KMOD=y +# CONFIG_SLOB is not set + +# +# Loadable module support +# +# CONFIG_MODULES is not set + +# +# Block layer +# CONFIG_BLOCK=y # CONFIG_BLK_DEV_IO_TRACE is not set -# CONFIG_BLK_DEV_BSG is not set -# CONFIG_BLK_DEV_INTEGRITY is not set -CONFIG_BLOCK_COMPAT=y # # IO Schedulers @@ -239,50 +213,59 @@ CONFIG_IOSCHED_NOOP=y CONFIG_IOSCHED_AS=y CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_CFQ=y -# CONFIG_DEFAULT_AS is not set +CONFIG_DEFAULT_AS=y # CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_CFQ=y +# CONFIG_DEFAULT_CFQ is not set # CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="cfq" -CONFIG_CLASSIC_RCU=y -# CONFIG_FREEZER is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" # # Bus options (PCI, PCMCIA, EISA, ISA, TC) # CONFIG_HW_HAS_PCI=y CONFIG_PCI=y -CONFIG_PCI_DOMAINS=y -# CONFIG_ARCH_SUPPORTS_MSI is not set -# CONFIG_PCI_LEGACY is not set CONFIG_MMU=y + +# +# PCCARD (PCMCIA/CardBus) support +# # CONFIG_PCCARD is not set + +# +# PCI Hotplug Support +# # CONFIG_HOTPLUG_PCI is not set # # Executable file formats # CONFIG_BINFMT_ELF=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -# CONFIG_HAVE_AOUT is not set CONFIG_BINFMT_MISC=y +# CONFIG_BUILD_ELF64 is not set CONFIG_MIPS32_COMPAT=y CONFIG_COMPAT=y CONFIG_SYSVIPC_COMPAT=y CONFIG_MIPS32_O32=y -CONFIG_MIPS32_N32=y +# CONFIG_MIPS32_N32 is not set CONFIG_BINFMT_ELF32=y # # Power management options # -CONFIG_ARCH_SUSPEND_POSSIBLE=y -# CONFIG_PM is not set +CONFIG_PM=y +# CONFIG_PM_LEGACY is not set +# CONFIG_PM_DEBUG is not set +# CONFIG_PM_SYSFS_DEPRECATED is not set + +# +# Networking +# CONFIG_NET=y # # Networking options # +# CONFIG_NETDEBUG is not set CONFIG_PACKET=y CONFIG_PACKET_MMAP=y CONFIG_UNIX=y @@ -290,83 +273,56 @@ CONFIG_XFRM=y CONFIG_XFRM_USER=y # CONFIG_XFRM_SUB_POLICY is not set CONFIG_XFRM_MIGRATE=y -# CONFIG_XFRM_STATISTICS is not set -CONFIG_XFRM_IPCOMP=m CONFIG_NET_KEY=y CONFIG_NET_KEY_MIGRATE=y CONFIG_INET=y -CONFIG_IP_MULTICAST=y +# CONFIG_IP_MULTICAST is not set # CONFIG_IP_ADVANCED_ROUTER is not set CONFIG_IP_FIB_HASH=y CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y +# CONFIG_IP_PNP_DHCP is not set CONFIG_IP_PNP_BOOTP=y # CONFIG_IP_PNP_RARP is not set -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -# CONFIG_NET_IPGRE_BROADCAST is not set -# CONFIG_IP_MROUTE is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set # CONFIG_ARPD is not set # CONFIG_SYN_COOKIES is not set -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_TUNNEL=m -CONFIG_INET_TUNNEL=m +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set CONFIG_INET_XFRM_MODE_TRANSPORT=y CONFIG_INET_XFRM_MODE_TUNNEL=y CONFIG_INET_XFRM_MODE_BEET=y -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y -CONFIG_TCP_CONG_ADVANCED=y -CONFIG_TCP_CONG_BIC=m +# CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_CUBIC=y -CONFIG_TCP_CONG_WESTWOOD=m -CONFIG_TCP_CONG_HTCP=m -# CONFIG_TCP_CONG_HSTCP is not set -# CONFIG_TCP_CONG_HYBLA is not set -# CONFIG_TCP_CONG_VEGAS is not set -# CONFIG_TCP_CONG_SCALABLE is not set -# CONFIG_TCP_CONG_LP is not set -# CONFIG_TCP_CONG_VENO is not set -# CONFIG_TCP_CONG_YEAH is not set -# CONFIG_TCP_CONG_ILLINOIS is not set -# CONFIG_DEFAULT_BIC is not set -CONFIG_DEFAULT_CUBIC=y -# CONFIG_DEFAULT_HTCP is not set -# CONFIG_DEFAULT_VEGAS is not set -# CONFIG_DEFAULT_WESTWOOD is not set -# CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" CONFIG_TCP_MD5SIG=y -CONFIG_IPV6=m -# CONFIG_IPV6_PRIVACY is not set -# CONFIG_IPV6_ROUTER_PREF is not set -# CONFIG_IPV6_OPTIMISTIC_DAD is not set -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -# CONFIG_IPV6_MIP6 is not set -CONFIG_INET6_XFRM_TUNNEL=m -CONFIG_INET6_TUNNEL=m -CONFIG_INET6_XFRM_MODE_TRANSPORT=m -CONFIG_INET6_XFRM_MODE_TUNNEL=m -CONFIG_INET6_XFRM_MODE_BEET=m -# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set -CONFIG_IPV6_SIT=m -CONFIG_IPV6_NDISC_NODETYPE=y -CONFIG_IPV6_TUNNEL=m -# CONFIG_IPV6_MULTIPLE_TABLES is not set -# CONFIG_IPV6_MROUTE is not set +# CONFIG_IPV6 is not set +# CONFIG_INET6_XFRM_TUNNEL is not set +# CONFIG_INET6_TUNNEL is not set CONFIG_NETWORK_SECMARK=y # CONFIG_NETFILTER is not set + +# +# DCCP Configuration (EXPERIMENTAL) +# # CONFIG_IP_DCCP is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# # CONFIG_IP_SCTP is not set + +# +# TIPC Configuration (EXPERIMENTAL) +# # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_BRIDGE is not set -# CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set # CONFIG_DECNET is not set # CONFIG_LLC2 is not set @@ -376,6 +332,10 @@ CONFIG_NETWORK_SECMARK=y # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# # CONFIG_NET_SCHED is not set # @@ -383,14 +343,15 @@ CONFIG_NETWORK_SECMARK=y # # CONFIG_NET_PKTGEN is not set # CONFIG_HAMRADIO is not set -# CONFIG_CAN is not set # CONFIG_IRDA is not set # CONFIG_BT is not set -# CONFIG_AF_RXRPC is not set -# CONFIG_PHONET is not set -# CONFIG_WIRELESS is not set -# CONFIG_RFKILL is not set -# CONFIG_NET_9P is not set +CONFIG_IEEE80211=y +# CONFIG_IEEE80211_DEBUG is not set +CONFIG_IEEE80211_CRYPT_WEP=y +CONFIG_IEEE80211_CRYPT_CCMP=y +CONFIG_IEEE80211_SOFTMAC=y +# CONFIG_IEEE80211_SOFTMAC_DEBUG is not set +CONFIG_WIRELESS_EXT=y # # Device Drivers @@ -399,40 +360,60 @@ CONFIG_NETWORK_SECMARK=y # # Generic Driver Options # -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=y -CONFIG_FIRMWARE_IN_KERNEL=y -CONFIG_EXTRA_FIRMWARE="" # CONFIG_SYS_HYPERVISOR is not set + +# +# Connector - unified userspace <-> kernelspace linker +# CONFIG_CONNECTOR=y CONFIG_PROC_EVENTS=y + +# +# Memory Technology Devices (MTD) +# # CONFIG_MTD is not set + +# +# Parallel port support +# # CONFIG_PARPORT is not set -CONFIG_BLK_DEV=y + +# +# Plug and Play support +# +# CONFIG_PNPACPI is not set + +# +# Block devices +# # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set # CONFIG_BLK_DEV_UMEM is not set # CONFIG_BLK_DEV_COW_COMMON is not set -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m -CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_LOOP=y +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +# CONFIG_BLK_DEV_NBD is not set # CONFIG_BLK_DEV_SX8 is not set # CONFIG_BLK_DEV_RAM is not set -# CONFIG_CDROM_PKTCDVD is not set -# CONFIG_ATA_OVER_ETH is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_MISC_DEVICES=y -# CONFIG_PHANTOM is not set -# CONFIG_EEPROM_93CX6 is not set +# CONFIG_BLK_DEV_INITRD is not set +CONFIG_CDROM_PKTCDVD=y +CONFIG_CDROM_PKTCDVD_BUFFERS=8 +# CONFIG_CDROM_PKTCDVD_WCACHE is not set +CONFIG_ATA_OVER_ETH=y + +# +# Misc devices +# CONFIG_SGI_IOC4=y # CONFIG_TIFM_CORE is not set -# CONFIG_ENCLOSURE_SERVICES is not set -# CONFIG_HP_ILO is not set -# CONFIG_C2PORT is not set -CONFIG_HAVE_IDE=y + +# +# ATA/ATAPI/MFM/RLL support +# # CONFIG_IDE is not set # @@ -440,20 +421,19 @@ CONFIG_HAVE_IDE=y # CONFIG_RAID_ATTRS=y CONFIG_SCSI=y -CONFIG_SCSI_DMA=y CONFIG_SCSI_TGT=y -# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y # # SCSI support type (disk, tape, CD-ROM) # CONFIG_BLK_DEV_SD=y -# CONFIG_CHR_DEV_ST is not set -# CONFIG_CHR_DEV_OSST is not set +CONFIG_CHR_DEV_ST=y +CONFIG_CHR_DEV_OSST=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_CHR_DEV_SG=m +CONFIG_CHR_DEV_SG=y # CONFIG_CHR_DEV_SCH is not set # @@ -463,36 +443,35 @@ CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SCAN_ASYNC=y -CONFIG_SCSI_WAIT_SCAN=m # # SCSI Transports # CONFIG_SCSI_SPI_ATTRS=y -# CONFIG_SCSI_FC_ATTRS is not set +CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_ISCSI_ATTRS is not set CONFIG_SCSI_SAS_ATTRS=y CONFIG_SCSI_SAS_LIBSAS=y -CONFIG_SCSI_SAS_HOST_SMP=y # CONFIG_SCSI_SAS_LIBSAS_DEBUG is not set -# CONFIG_SCSI_SRP_ATTRS is not set -CONFIG_SCSI_LOWLEVEL=y + +# +# SCSI low-level drivers +# # CONFIG_ISCSI_TCP is not set # CONFIG_BLK_DEV_3W_XXXX_RAID is not set # CONFIG_SCSI_3W_9XXX is not set # CONFIG_SCSI_ACARD is not set # CONFIG_SCSI_AACRAID is not set CONFIG_SCSI_AIC7XXX=y -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 +CONFIG_AIC7XXX_CMDS_PER_DEVICE=8 CONFIG_AIC7XXX_RESET_DELAY_MS=15000 CONFIG_AIC7XXX_DEBUG_ENABLE=y CONFIG_AIC7XXX_DEBUG_MASK=0 CONFIG_AIC7XXX_REG_PRETTY_PRINT=y # CONFIG_SCSI_AIC7XXX_OLD is not set # CONFIG_SCSI_AIC79XX is not set -# CONFIG_SCSI_AIC94XX is not set -# CONFIG_SCSI_DPT_I2O is not set -# CONFIG_SCSI_ADVANSYS is not set +CONFIG_SCSI_AIC94XX=y +# CONFIG_AIC94XX_DEBUG is not set # CONFIG_SCSI_ARCMSR is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set @@ -503,7 +482,6 @@ CONFIG_AIC7XXX_REG_PRETTY_PRINT=y # CONFIG_SCSI_IPS is not set # CONFIG_SCSI_INITIO is not set # CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_MVSAS is not set # CONFIG_SCSI_STEX is not set # CONFIG_SCSI_SYM53C8XX_2 is not set # CONFIG_SCSI_QLOGIC_1280 is not set @@ -514,81 +492,147 @@ CONFIG_AIC7XXX_REG_PRETTY_PRINT=y # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_SRP is not set -# CONFIG_SCSI_DH is not set + +# +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# # CONFIG_ATA is not set + +# +# Multi-device support (RAID and LVM) +# # CONFIG_MD is not set + +# +# Fusion MPT device support +# # CONFIG_FUSION is not set +# CONFIG_FUSION_SPI is not set +# CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set # # IEEE 1394 (FireWire) support # +# CONFIG_IEEE1394 is not set # -# Enable only one of the two stacks, unless you know what you are doing +# I2O device support # -# CONFIG_FIREWIRE is not set -# CONFIG_IEEE1394 is not set # CONFIG_I2O is not set + +# +# Network device support +# CONFIG_NETDEVICES=y -CONFIG_DUMMY=m -CONFIG_BONDING=m -# CONFIG_MACVLAN is not set +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set -# CONFIG_VETH is not set + +# +# ARCnet devices +# # CONFIG_ARCNET is not set -# CONFIG_PHYLIB is not set + +# +# PHY device support +# +CONFIG_PHYLIB=y + +# +# MII PHY device drivers +# +CONFIG_MARVELL_PHY=y +CONFIG_DAVICOM_PHY=y +CONFIG_QSEMI_PHY=y +CONFIG_LXT_PHY=y +CONFIG_CICADA_PHY=y +CONFIG_VITESSE_PHY=y +CONFIG_SMSC_PHY=y +# CONFIG_BROADCOM_PHY is not set +# CONFIG_FIXED_PHY is not set + +# +# Ethernet (10 or 100Mbit) +# CONFIG_NET_ETHERNET=y -CONFIG_MII=y -# CONFIG_AX88796 is not set +# CONFIG_MII is not set CONFIG_SGI_O2MACE_ETH=y # CONFIG_HAPPYMEAL is not set # CONFIG_SUNGEM is not set # CONFIG_CASSINI is not set # CONFIG_NET_VENDOR_3COM is not set -# CONFIG_SMC91X is not set # CONFIG_DM9000 is not set -CONFIG_NET_TULIP=y -CONFIG_DE2104X=m -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -CONFIG_TULIP_MMIO=y -# CONFIG_TULIP_NAPI is not set -# CONFIG_DE4X5 is not set -# CONFIG_WINBOND_840 is not set -# CONFIG_DM9102 is not set -# CONFIG_ULI526X is not set + +# +# Tulip family network device support +# +# CONFIG_NET_TULIP is not set # CONFIG_HP100 is not set -# CONFIG_IBM_NEW_EMAC_ZMII is not set -# CONFIG_IBM_NEW_EMAC_RGMII is not set -# CONFIG_IBM_NEW_EMAC_TAH is not set -# CONFIG_IBM_NEW_EMAC_EMAC4 is not set -# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set -# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set -# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set # CONFIG_NET_PCI is not set -# CONFIG_B44 is not set -# CONFIG_ATL2 is not set -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SIS190 is not set +# CONFIG_SKGE is not set +# CONFIG_SKY2 is not set +# CONFIG_SK98LIN is not set +# CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set +CONFIG_QLA3XXX=y +# CONFIG_ATL1 is not set + +# +# Ethernet (10000 Mbit) +# +# CONFIG_CHELSIO_T1 is not set +CONFIG_CHELSIO_T3=y +# CONFIG_IXGB is not set +# CONFIG_S2IO is not set +# CONFIG_MYRI10GE is not set +CONFIG_NETXEN_NIC=y + +# +# Token Ring devices +# # CONFIG_TR is not set # -# Wireless LAN +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Wan interfaces # -# CONFIG_WLAN_PRE80211 is not set -# CONFIG_WLAN_80211 is not set -# CONFIG_IWLWIFI_LEDS is not set # CONFIG_WAN is not set # CONFIG_FDDI is not set # CONFIG_HIPPI is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set # CONFIG_NET_FC is not set +# CONFIG_SHAPER is not set # CONFIG_NETCONSOLE is not set # CONFIG_NETPOLL is not set # CONFIG_NET_POLL_CONTROLLER is not set + +# +# ISDN subsystem +# # CONFIG_ISDN is not set + +# +# Telephony Support +# # CONFIG_PHONE is not set # @@ -596,7 +640,6 @@ CONFIG_TULIP_MMIO=y # CONFIG_INPUT=y # CONFIG_INPUT_FF_MEMLESS is not set -# CONFIG_INPUT_POLLDEV is not set # # Userland interfaces @@ -606,32 +649,16 @@ CONFIG_INPUT_MOUSEDEV_PSAUX=y CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_JOYDEV is not set -CONFIG_INPUT_EVDEV=m +# CONFIG_INPUT_TSDEV is not set +# CONFIG_INPUT_EVDEV is not set # CONFIG_INPUT_EVBUG is not set # # Input Device Drivers # -CONFIG_INPUT_KEYBOARD=y -CONFIG_KEYBOARD_ATKBD=y -# CONFIG_KEYBOARD_SUNKBD is not set -# CONFIG_KEYBOARD_LKKBD is not set -# CONFIG_KEYBOARD_XTKBD is not set -# CONFIG_KEYBOARD_NEWTON is not set -# CONFIG_KEYBOARD_STOWAWAY is not set -CONFIG_INPUT_MOUSE=y -CONFIG_MOUSE_PS2=y -CONFIG_MOUSE_PS2_ALPS=y -CONFIG_MOUSE_PS2_LOGIPS2PP=y -CONFIG_MOUSE_PS2_SYNAPTICS=y -CONFIG_MOUSE_PS2_LIFEBOOK=y -CONFIG_MOUSE_PS2_TRACKPOINT=y -# CONFIG_MOUSE_PS2_ELANTECH is not set -# CONFIG_MOUSE_PS2_TOUCHKIT is not set -# CONFIG_MOUSE_SERIAL is not set -# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set # CONFIG_INPUT_JOYSTICK is not set -# CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set # CONFIG_INPUT_MISC is not set @@ -642,8 +669,8 @@ CONFIG_SERIO=y # CONFIG_SERIO_I8042 is not set CONFIG_SERIO_SERPORT=y # CONFIG_SERIO_PCIPS2 is not set -CONFIG_SERIO_MACEPS2=y -CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_MACEPS2 is not set +# CONFIG_SERIO_LIBPS2 is not set CONFIG_SERIO_RAW=y # CONFIG_GAMEPORT is not set @@ -651,13 +678,10 @@ CONFIG_SERIO_RAW=y # Character devices # CONFIG_VT=y -# CONFIG_CONSOLE_TRANSLATIONS is not set CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y -# CONFIG_VT_HW_CONSOLE_BINDING is not set -CONFIG_DEVKMEM=y +CONFIG_VT_HW_CONSOLE_BINDING=y # CONFIG_SERIAL_NONSTANDARD is not set -# CONFIG_NOZOMI is not set # # Serial drivers @@ -678,304 +702,192 @@ CONFIG_SERIAL_CORE_CONSOLE=y CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 + +# +# IPMI +# # CONFIG_IPMI_HANDLER is not set -CONFIG_HW_RANDOM=y + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_RTC is not set +# CONFIG_GEN_RTC is not set +# CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set +# CONFIG_DRM is not set # CONFIG_RAW_DRIVER is not set + +# +# TPM devices +# # CONFIG_TCG_TPM is not set -CONFIG_DEVPORT=y -# CONFIG_I2C is not set -# CONFIG_SPI is not set -# CONFIG_W1 is not set -# CONFIG_POWER_SUPPLY is not set -CONFIG_HWMON=y -# CONFIG_HWMON_VID is not set -# CONFIG_SENSORS_I5K_AMB is not set -# CONFIG_SENSORS_F71805F is not set -# CONFIG_SENSORS_F71882FG is not set -# CONFIG_SENSORS_IT87 is not set -# CONFIG_SENSORS_PC87360 is not set -# CONFIG_SENSORS_PC87427 is not set -# CONFIG_SENSORS_SIS5595 is not set -# CONFIG_SENSORS_SMSC47M1 is not set -# CONFIG_SENSORS_SMSC47B397 is not set -# CONFIG_SENSORS_VIA686A is not set -# CONFIG_SENSORS_VT1211 is not set -# CONFIG_SENSORS_VT8231 is not set -# CONFIG_SENSORS_W83627HF is not set -# CONFIG_SENSORS_W83627EHF is not set -# CONFIG_HWMON_DEBUG_CHIP is not set -# CONFIG_THERMAL is not set -# CONFIG_THERMAL_HWMON is not set -CONFIG_WATCHDOG=y -# CONFIG_WATCHDOG_NOWAYOUT is not set # -# Watchdog Device Drivers +# I2C support # -# CONFIG_SOFT_WATCHDOG is not set -# CONFIG_ALIM7101_WDT is not set +# CONFIG_I2C is not set # -# PCI-based Watchdog Cards +# SPI support # -# CONFIG_PCIPCWATCHDOG is not set -# CONFIG_WDTPCI is not set -CONFIG_SSB_POSSIBLE=y +# CONFIG_SPI is not set +# CONFIG_SPI_MASTER is not set # -# Sonics Silicon Backplane +# Dallas's 1-wire bus # -# CONFIG_SSB is not set +# CONFIG_W1 is not set # -# Multifunction device drivers +# Hardware Monitoring support # -# CONFIG_MFD_CORE is not set -# CONFIG_MFD_SM501 is not set -# CONFIG_HTC_PASIC3 is not set -# CONFIG_MFD_TMIO is not set -# CONFIG_REGULATOR is not set +# CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set # # Multimedia devices # +# CONFIG_VIDEO_DEV is not set # -# Multimedia core support +# Digital Video Broadcasting Devices # -CONFIG_VIDEO_DEV=m -CONFIG_VIDEO_V4L2_COMMON=m -CONFIG_VIDEO_ALLOW_V4L1=y -CONFIG_VIDEO_V4L1_COMPAT=y -# CONFIG_DVB_CORE is not set -CONFIG_VIDEO_MEDIA=m +# CONFIG_DVB is not set # -# Multimedia drivers +# Graphics support # -# CONFIG_MEDIA_ATTACH is not set -CONFIG_VIDEO_V4L2=m -CONFIG_VIDEO_V4L1=m -CONFIG_VIDEOBUF_GEN=m -CONFIG_VIDEOBUF_VMALLOC=m -CONFIG_VIDEO_CAPTURE_DRIVERS=y -# CONFIG_VIDEO_ADV_DEBUG is not set -# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set -CONFIG_VIDEO_HELPER_CHIPS_AUTO=y -CONFIG_VIDEO_VIVI=m -# CONFIG_VIDEO_CPIA is not set -# CONFIG_VIDEO_STRADIS is not set -# CONFIG_SOC_CAMERA is not set -CONFIG_RADIO_ADAPTERS=y -# CONFIG_RADIO_GEMTEK_PCI is not set -# CONFIG_RADIO_MAXIRADIO is not set -# CONFIG_RADIO_MAESTRO is not set -CONFIG_DAB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB is not set # -# Graphics support +# Console display driver support # -# CONFIG_DRM is not set -# CONFIG_VGASTATE is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y -CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y -# CONFIG_FB_DDC is not set -# CONFIG_FB_BOOT_VESA_SUPPORT is not set -CONFIG_FB_CFB_FILLRECT=y -CONFIG_FB_CFB_COPYAREA=y -CONFIG_FB_CFB_IMAGEBLIT=y -# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set -# CONFIG_FB_SYS_FILLRECT is not set -# CONFIG_FB_SYS_COPYAREA is not set -# CONFIG_FB_SYS_IMAGEBLIT is not set -# CONFIG_FB_FOREIGN_ENDIAN is not set -# CONFIG_FB_SYS_FOPS is not set -# CONFIG_FB_SVGALIB is not set -# CONFIG_FB_MACMODES is not set -# CONFIG_FB_BACKLIGHT is not set -# CONFIG_FB_MODE_HELPERS is not set -# CONFIG_FB_TILEBLITTING is not set - -# -# Frame buffer hardware drivers -# -# CONFIG_FB_CIRRUS is not set -# CONFIG_FB_PM2 is not set -# CONFIG_FB_CYBER2000 is not set -# CONFIG_FB_ASILIANT is not set -# CONFIG_FB_IMSTT is not set -# CONFIG_FB_UVESA is not set -CONFIG_FB_GBE=y -CONFIG_FB_GBE_MEM=4 -# CONFIG_FB_S1D13XXX is not set -# CONFIG_FB_NVIDIA is not set -# CONFIG_FB_RIVA is not set -# CONFIG_FB_MATROX is not set -# CONFIG_FB_RADEON is not set -# CONFIG_FB_ATY128 is not set -# CONFIG_FB_ATY is not set -# CONFIG_FB_S3 is not set -# CONFIG_FB_SAVAGE is not set -# CONFIG_FB_SIS is not set -# CONFIG_FB_VIA is not set -# CONFIG_FB_NEOMAGIC is not set -# CONFIG_FB_KYRO is not set -# CONFIG_FB_3DFX is not set -# CONFIG_FB_VOODOO1 is not set -# CONFIG_FB_VT8623 is not set -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_ARK is not set -# CONFIG_FB_PM3 is not set -# CONFIG_FB_CARMINE is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FB_METRONOME is not set -# CONFIG_FB_MB862XX is not set +# CONFIG_VGA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y # CONFIG_BACKLIGHT_LCD_SUPPORT is not set # -# Display device support +# Sound # -# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_SOUND is not set # -# Console display driver support +# HID Devices # -# CONFIG_VGA_CONSOLE is not set -CONFIG_DUMMY_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE=y -# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set -# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set -CONFIG_FONTS=y -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y -# CONFIG_FONT_6x11 is not set -# CONFIG_FONT_7x14 is not set -# CONFIG_FONT_PEARL_8x8 is not set -# CONFIG_FONT_ACORN_8x8 is not set -# CONFIG_FONT_MINI_4x6 is not set -# CONFIG_FONT_SUN8x16 is not set -# CONFIG_FONT_SUN12x22 is not set -# CONFIG_FONT_10x18 is not set -CONFIG_LOGO=y -# CONFIG_LOGO_LINUX_MONO is not set -# CONFIG_LOGO_LINUX_VGA16 is not set -# CONFIG_LOGO_LINUX_CLUT224 is not set -CONFIG_LOGO_SGI_CLUT224=y -# CONFIG_SOUND is not set -CONFIG_HID_SUPPORT=y CONFIG_HID=y # CONFIG_HID_DEBUG is not set -# CONFIG_HIDRAW is not set -# CONFIG_HID_PID is not set # -# Special HID drivers +# USB support +# +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +# CONFIG_USB is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# MMC/SD Card support # -CONFIG_HID_COMPAT=y -# CONFIG_USB_SUPPORT is not set -# CONFIG_UWB is not set # CONFIG_MMC is not set -# CONFIG_MEMSTICK is not set + +# +# LED devices +# # CONFIG_NEW_LEDS is not set -# CONFIG_ACCESSIBILITY is not set + +# +# LED drivers +# + +# +# LED Triggers +# + +# +# InfiniBand support +# # CONFIG_INFINIBAND is not set -CONFIG_RTC_LIB=y -CONFIG_RTC_CLASS=y -# CONFIG_RTC_HCTOSYS is not set -# CONFIG_RTC_DEBUG is not set # -# RTC interfaces +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) # -# CONFIG_RTC_INTF_SYSFS is not set -# CONFIG_RTC_INTF_PROC is not set -CONFIG_RTC_INTF_DEV=y -# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set -# CONFIG_RTC_DRV_TEST is not set # -# SPI RTC drivers +# Real Time Clock # +# CONFIG_RTC_CLASS is not set # -# Platform RTC drivers +# DMA Engine support # -CONFIG_RTC_DRV_CMOS=y -# CONFIG_RTC_DRV_DS1286 is not set -# CONFIG_RTC_DRV_DS1511 is not set -# CONFIG_RTC_DRV_DS1553 is not set -# CONFIG_RTC_DRV_DS1742 is not set -# CONFIG_RTC_DRV_STK17TA8 is not set -# CONFIG_RTC_DRV_M48T86 is not set -# CONFIG_RTC_DRV_M48T35 is not set -# CONFIG_RTC_DRV_M48T59 is not set -# CONFIG_RTC_DRV_BQ4802 is not set -# CONFIG_RTC_DRV_V3020 is not set +# CONFIG_DMA_ENGINE is not set # -# on-CPU RTC drivers +# DMA Clients +# + +# +# DMA Devices +# + +# +# Auxiliary Display support +# + +# +# Virtualization # -# CONFIG_DMADEVICES is not set -# CONFIG_UIO is not set -# CONFIG_STAGING is not set -CONFIG_STAGING_EXCLUDE_BUILD=y # # File systems # CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y +# CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -# CONFIG_EXT4_FS is not set -CONFIG_JBD=y -CONFIG_FS_MBCACHE=y +# CONFIG_EXT3_FS is not set +# CONFIG_EXT4DEV_FS is not set # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y -CONFIG_FILE_LOCKING=y # CONFIG_XFS_FS is not set # CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set -CONFIG_DNOTIFY=y +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y -CONFIG_QUOTA=y -# CONFIG_QUOTA_NETLINK_INTERFACE is not set -CONFIG_PRINT_QUOTA_WARNING=y -CONFIG_QFMT_V1=m -CONFIG_QFMT_V2=m -CONFIG_QUOTACTL=y -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_FUSE_FS=m +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +CONFIG_FUSE_FS=y CONFIG_GENERIC_ACL=y # # CD-ROM/DVD Filesystems # -CONFIG_ISO9660_FS=m -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_UDF_FS=m -CONFIG_UDF_NLS=y +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set # # DOS/FAT/NT Filesystems # -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_FAT_DEFAULT_CODEPAGE=437 -CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set # CONFIG_NTFS_FS is not set # @@ -984,11 +896,11 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_PROC_SYSCTL=y -CONFIG_PROC_PAGE_MONITOR=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y # CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y CONFIG_CONFIGFS_FS=y # @@ -1004,42 +916,33 @@ CONFIG_CONFIGFS_FS=y # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set # CONFIG_VXFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_OMFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set -# CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set -CONFIG_NETWORK_FILESYSTEMS=y + +# +# Network File Systems +# CONFIG_NFS_FS=y CONFIG_NFS_V3=y # CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set +# CONFIG_NFSD is not set CONFIG_ROOT_NFS=y -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -# CONFIG_NFSD_V3_ACL is not set -# CONFIG_NFSD_V4 is not set CONFIG_LOCKD=y CONFIG_LOCKD_V4=y -CONFIG_EXPORTFS=m CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y -# CONFIG_SUNRPC_REGISTER_V4 is not set # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set -CONFIG_CIFS=m -# CONFIG_CIFS_STATS is not set -# CONFIG_CIFS_WEAK_PW_HASH is not set -# CONFIG_CIFS_UPCALL is not set -# CONFIG_CIFS_XATTR is not set -# CONFIG_CIFS_DEBUG2 is not set -# CONFIG_CIFS_EXPERIMENTAL is not set +# CONFIG_CIFS is not set # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -1050,83 +953,45 @@ CONFIG_PARTITION_ADVANCED=y # CONFIG_AMIGA_PARTITION is not set # CONFIG_ATARI_PARTITION is not set # CONFIG_MAC_PARTITION is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_MSDOS_PARTITION is not set # CONFIG_LDM_PARTITION is not set CONFIG_SGI_PARTITION=y # CONFIG_ULTRIX_PARTITION is not set # CONFIG_SUN_PARTITION is not set # CONFIG_KARMA_PARTITION is not set # CONFIG_EFI_PARTITION is not set -# CONFIG_SYSV68_PARTITION is not set -CONFIG_NLS=y -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m -# CONFIG_DLM is not set + +# +# Native Language Support +# +# CONFIG_NLS is not set + +# +# Distributed Lock Manager +# +CONFIG_DLM=y +CONFIG_DLM_TCP=y +# CONFIG_DLM_SCTP is not set +# CONFIG_DLM_DEBUG is not set + +# +# Profiling support +# +# CONFIG_PROFILING is not set # # Kernel hacking # CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set -CONFIG_ENABLE_WARN_DEPRECATED=y CONFIG_ENABLE_MUST_CHECK=y -CONFIG_FRAME_WARN=2048 -CONFIG_MAGIC_SYSRQ=y +# CONFIG_MAGIC_SYSRQ is not set # CONFIG_UNUSED_SYMBOLS is not set # CONFIG_DEBUG_FS is not set # CONFIG_HEADERS_CHECK is not set # CONFIG_DEBUG_KERNEL is not set -# CONFIG_DEBUG_MEMORY_INIT is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y - -# -# Tracers -# -# CONFIG_DYNAMIC_PRINTK_DEBUG is not set -# CONFIG_SAMPLES is not set -CONFIG_HAVE_ARCH_KGDB=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="" # @@ -1135,99 +1000,51 @@ CONFIG_CMDLINE="" CONFIG_KEYS=y CONFIG_KEYS_DEBUG_PROC_KEYS=y # CONFIG_SECURITY is not set -# CONFIG_SECURITYFS is not set -# CONFIG_SECURITY_FILE_CAPABILITIES is not set -CONFIG_CRYPTO=y # -# Crypto core or helper +# Cryptographic options # -# CONFIG_CRYPTO_FIPS is not set +CONFIG_CRYPTO=y CONFIG_CRYPTO_ALGAPI=y -CONFIG_CRYPTO_AEAD=y CONFIG_CRYPTO_BLKCIPHER=y CONFIG_CRYPTO_HASH=y -CONFIG_CRYPTO_RNG=y CONFIG_CRYPTO_MANAGER=y -CONFIG_CRYPTO_GF128MUL=y -CONFIG_CRYPTO_NULL=y -# CONFIG_CRYPTO_CRYPTD is not set -CONFIG_CRYPTO_AUTHENC=m -# CONFIG_CRYPTO_TEST is not set - -# -# Authenticated Encryption with Associated Data -# -# CONFIG_CRYPTO_CCM is not set -# CONFIG_CRYPTO_GCM is not set -# CONFIG_CRYPTO_SEQIV is not set - -# -# Block modes -# -CONFIG_CRYPTO_CBC=y -# CONFIG_CRYPTO_CTR is not set -# CONFIG_CRYPTO_CTS is not set -CONFIG_CRYPTO_ECB=y -CONFIG_CRYPTO_LRW=y -CONFIG_CRYPTO_PCBC=y -# CONFIG_CRYPTO_XTS is not set - -# -# Hash modes -# CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=y - -# -# Digest -# -CONFIG_CRYPTO_CRC32C=y +CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_MD4=y CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=y -# CONFIG_CRYPTO_RMD128 is not set -# CONFIG_CRYPTO_RMD160 is not set -# CONFIG_CRYPTO_RMD256 is not set -# CONFIG_CRYPTO_RMD320 is not set CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y -CONFIG_CRYPTO_TGR192=y CONFIG_CRYPTO_WP512=y - -# -# Ciphers -# -CONFIG_CRYPTO_AES=y -CONFIG_CRYPTO_ANUBIS=y -CONFIG_CRYPTO_ARC4=y -CONFIG_CRYPTO_BLOWFISH=y -CONFIG_CRYPTO_CAMELLIA=y -CONFIG_CRYPTO_CAST5=y -CONFIG_CRYPTO_CAST6=y +CONFIG_CRYPTO_TGR192=y +CONFIG_CRYPTO_GF128MUL=y +CONFIG_CRYPTO_ECB=y +CONFIG_CRYPTO_CBC=y +CONFIG_CRYPTO_PCBC=y +CONFIG_CRYPTO_LRW=y CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_FCRYPT=y -CONFIG_CRYPTO_KHAZAD=y -# CONFIG_CRYPTO_SALSA20 is not set -# CONFIG_CRYPTO_SEED is not set -CONFIG_CRYPTO_SERPENT=y -CONFIG_CRYPTO_TEA=y +CONFIG_CRYPTO_BLOWFISH=y CONFIG_CRYPTO_TWOFISH=y CONFIG_CRYPTO_TWOFISH_COMMON=y - -# -# Compression -# +CONFIG_CRYPTO_SERPENT=y +CONFIG_CRYPTO_AES=y +CONFIG_CRYPTO_CAST5=y +CONFIG_CRYPTO_CAST6=y +CONFIG_CRYPTO_TEA=y +CONFIG_CRYPTO_ARC4=y +CONFIG_CRYPTO_KHAZAD=y +CONFIG_CRYPTO_ANUBIS=y CONFIG_CRYPTO_DEFLATE=y -# CONFIG_CRYPTO_LZO is not set +CONFIG_CRYPTO_MICHAEL_MIC=y +CONFIG_CRYPTO_CRC32C=y +CONFIG_CRYPTO_CAMELLIA=y # -# Random Number Generation +# Hardware crypto devices # -# CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRYPTO_HW=y -# CONFIG_CRYPTO_DEV_HIFN_795X is not set # # Library routines @@ -1235,15 +1052,10 @@ CONFIG_CRYPTO_HW=y CONFIG_BITREVERSE=y # CONFIG_CRC_CCITT is not set CONFIG_CRC16=y -CONFIG_CRC_T10DIF=y -CONFIG_CRC_ITU_T=m CONFIG_CRC32=y -# CONFIG_CRC7 is not set CONFIG_LIBCRC32C=y -CONFIG_AUDIT_GENERIC=y CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y CONFIG_PLIST=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y -CONFIG_HAS_DMA=y diff --git a/trunk/arch/mips/include/asm/asmmacro.h b/trunk/arch/mips/include/asm/asmmacro.h index 6c8342ae74db..7a881755800f 100644 --- a/trunk/arch/mips/include/asm/asmmacro.h +++ b/trunk/arch/mips/include/asm/asmmacro.h @@ -35,16 +35,6 @@ mtc0 \reg, CP0_TCSTATUS _ehb .endm -#elif defined(CONFIG_CPU_MIPSR2) - .macro local_irq_enable reg=t0 - ei - irq_enable_hazard - .endm - - .macro local_irq_disable reg=t0 - di - irq_disable_hazard - .endm #else .macro local_irq_enable reg=t0 mfc0 \reg, CP0_STATUS diff --git a/trunk/arch/mips/include/asm/mach-ip27/topology.h b/trunk/arch/mips/include/asm/mach-ip27/topology.h index 1fb959f98982..7785bec732f2 100644 --- a/trunk/arch/mips/include/asm/mach-ip27/topology.h +++ b/trunk/arch/mips/include/asm/mach-ip27/topology.h @@ -37,6 +37,7 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; /* sched_domains SD_NODE_INIT for SGI IP27 machines */ #define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/trunk/arch/mips/mm/dma-default.c b/trunk/arch/mips/mm/dma-default.c index e6708b3ad343..5b98d0e731c2 100644 --- a/trunk/arch/mips/mm/dma-default.c +++ b/trunk/arch/mips/mm/dma-default.c @@ -111,7 +111,6 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { - plat_unmap_dma_mem(dma_handle); free_pages((unsigned long) vaddr, get_order(size)); } @@ -122,8 +121,6 @@ void dma_free_coherent(struct device *dev, size_t size, void *vaddr, { unsigned long addr = (unsigned long) vaddr; - plat_unmap_dma_mem(dma_handle); - if (!plat_device_is_coherent(dev)) addr = CAC_ADDR(addr); diff --git a/trunk/arch/powerpc/Kconfig b/trunk/arch/powerpc/Kconfig index adb23ea1c1ef..525c13a4de93 100644 --- a/trunk/arch/powerpc/Kconfig +++ b/trunk/arch/powerpc/Kconfig @@ -141,7 +141,7 @@ config GENERIC_NVRAM bool default y if PPC32 -config SCHED_OMIT_FRAME_POINTER +config SCHED_NO_NO_OMIT_FRAME_POINTER bool default y diff --git a/trunk/arch/powerpc/boot/Makefile b/trunk/arch/powerpc/boot/Makefile index 3d3daa674299..8fc6d72849ae 100644 --- a/trunk/arch/powerpc/boot/Makefile +++ b/trunk/arch/powerpc/boot/Makefile @@ -41,7 +41,6 @@ $(obj)/4xx.o: BOOTCFLAGS += -mcpu=405 $(obj)/ebony.o: BOOTCFLAGS += -mcpu=405 $(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405 $(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405 -$(obj)/cuboot-acadia.o: BOOTCFLAGS += -mcpu=405 $(obj)/treeboot-walnut.o: BOOTCFLAGS += -mcpu=405 $(obj)/virtex405-head.o: BOOTAFLAGS += -mcpu=405 diff --git a/trunk/arch/powerpc/include/asm/ftrace.h b/trunk/arch/powerpc/include/asm/ftrace.h index e5f2ae8362f7..b298f7a631e6 100644 --- a/trunk/arch/powerpc/include/asm/ftrace.h +++ b/trunk/arch/powerpc/include/asm/ftrace.h @@ -7,19 +7,7 @@ #ifndef __ASSEMBLY__ extern void _mcount(void); - -#ifdef CONFIG_DYNAMIC_FTRACE -static inline unsigned long ftrace_call_adjust(unsigned long addr) -{ - /* reloction of mcount call site is the same as the address */ - return addr; -} - -struct dyn_arch_ftrace { - struct module *mod; -}; -#endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* __ASSEMBLY__ */ +#endif #endif diff --git a/trunk/arch/powerpc/include/asm/module.h b/trunk/arch/powerpc/include/asm/module.h index 08454880a2c0..e5f14b13ccf0 100644 --- a/trunk/arch/powerpc/include/asm/module.h +++ b/trunk/arch/powerpc/include/asm/module.h @@ -34,19 +34,11 @@ struct mod_arch_specific { #ifdef __powerpc64__ unsigned int stubs_section; /* Index of stubs section in module */ unsigned int toc_section; /* What section is the TOC? */ -#ifdef CONFIG_DYNAMIC_FTRACE - unsigned long toc; - unsigned long tramp; -#endif - -#else /* powerpc64 */ +#else /* Indices of PLT sections within module. */ unsigned int core_plt_section; unsigned int init_plt_section; -#ifdef CONFIG_DYNAMIC_FTRACE - unsigned long tramp; #endif -#endif /* powerpc64 */ /* List of BUG addresses, source line numbers and filenames */ struct list_head bug_list; @@ -76,12 +68,6 @@ struct mod_arch_specific { # endif /* MODULE */ #endif -#ifdef CONFIG_DYNAMIC_FTRACE -# ifdef MODULE - asm(".section .ftrace.tramp,\"ax\",@nobits; .align 3; .previous"); -# endif /* MODULE */ -#endif - struct exception_table_entry; void sort_ex_table(struct exception_table_entry *start, diff --git a/trunk/arch/powerpc/include/asm/topology.h b/trunk/arch/powerpc/include/asm/topology.h index 373fca394a54..c32da6f97999 100644 --- a/trunk/arch/powerpc/include/asm/topology.h +++ b/trunk/arch/powerpc/include/asm/topology.h @@ -48,6 +48,7 @@ static inline int pcibus_to_node(struct pci_bus *bus) /* sched_domains SD_NODE_INIT for PPC64 machines */ #define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/trunk/arch/powerpc/kernel/Makefile b/trunk/arch/powerpc/kernel/Makefile index d17edb4a2f9d..92673b43858d 100644 --- a/trunk/arch/powerpc/kernel/Makefile +++ b/trunk/arch/powerpc/kernel/Makefile @@ -17,7 +17,6 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog -CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog ifdef CONFIG_DYNAMIC_FTRACE # dynamic ftrace setup. diff --git a/trunk/arch/powerpc/kernel/entry_32.S b/trunk/arch/powerpc/kernel/entry_32.S index 6f7eb7e00c79..7ecc0d1855c3 100644 --- a/trunk/arch/powerpc/kernel/entry_32.S +++ b/trunk/arch/powerpc/kernel/entry_32.S @@ -1162,17 +1162,39 @@ machine_check_in_rtas: #ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) - /* - * It is required that _mcount on PPC32 must preserve the - * link register. But we have r0 to play with. We use r0 - * to push the return address back to the caller of mcount - * into the ctr register, restore the link register and - * then jump back using the ctr register. - */ - mflr r0 + stwu r1,-48(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r3 + stw r7, 28(r1) + mfcr r5 + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + stw r3, 44(r1) + stw r5, 8(r1) + subi r3, r3, MCOUNT_INSN_SIZE + .globl mcount_call +mcount_call: + bl ftrace_stub + nop + lwz r6, 8(r1) + lwz r0, 44(r1) + lwz r3, 12(r1) mtctr r0 - lwz r0, 4(r1) + lwz r4, 16(r1) + mtcr r6 + lwz r5, 20(r1) + lwz r6, 24(r1) + lwz r0, 52(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) mtlr r0 + lwz r9, 36(r1) + lwz r10,40(r1) + addi r1, r1, 48 bctr _GLOBAL(ftrace_caller) diff --git a/trunk/arch/powerpc/kernel/entry_64.S b/trunk/arch/powerpc/kernel/entry_64.S index 383ed6eb0085..e0bcf9354286 100644 --- a/trunk/arch/powerpc/kernel/entry_64.S +++ b/trunk/arch/powerpc/kernel/entry_64.S @@ -894,6 +894,18 @@ _GLOBAL(enter_prom) #ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + stdu r1, -112(r1) + std r3, 128(r1) + subi r3, r3, MCOUNT_INSN_SIZE + .globl mcount_call +mcount_call: + bl ftrace_stub + nop + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 blr _GLOBAL(ftrace_caller) diff --git a/trunk/arch/powerpc/kernel/ftrace.c b/trunk/arch/powerpc/kernel/ftrace.c index 5355244c99ff..f4b006ed0ab1 100644 --- a/trunk/arch/powerpc/kernel/ftrace.c +++ b/trunk/arch/powerpc/kernel/ftrace.c @@ -9,30 +9,22 @@ #include #include -#include -#include #include #include #include #include #include -#include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt , ...) do { } while (0) -#endif -static unsigned int ftrace_nop = PPC_NOP_INSTR; +static unsigned int ftrace_nop = 0x60000000; #ifdef CONFIG_PPC32 # define GET_ADDR(addr) addr #else /* PowerPC64's functions are data that points to the functions */ -# define GET_ADDR(addr) (*(unsigned long *)addr) +# define GET_ADDR(addr) *(unsigned long *)addr #endif @@ -41,12 +33,12 @@ static unsigned int ftrace_calc_offset(long ip, long addr) return (int)(addr - ip); } -static unsigned char *ftrace_nop_replace(void) +unsigned char *ftrace_nop_replace(void) { return (char *)&ftrace_nop; } -static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) { static unsigned int op; @@ -76,422 +68,49 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) # define _ASM_PTR " .long " #endif -static int +int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { - unsigned char replaced[MCOUNT_INSN_SIZE]; + unsigned replaced; + unsigned old = *(unsigned *)old_code; + unsigned new = *(unsigned *)new_code; + int faulted = 0; /* * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting - * as well as code changing. We do this by using the - * probe_kernel_* functions. + * as well as code changing. * * No real locking needed, this code is run through - * kstop_machine, or before SMP starts. + * kstop_machine. */ - - /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - - /* Make sure it is what we expect it to be */ - if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) - return -EINVAL; - - /* replace the text with the new text */ - if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) - return -EPERM; - - flush_icache_range(ip, ip + 8); - - return 0; -} - -/* - * Helper functions that are the same for both PPC64 and PPC32. - */ -static int test_24bit_addr(unsigned long ip, unsigned long addr) -{ - - /* use the create_branch to verify that this offset can be branched */ - return create_branch((unsigned int *)ip, addr, 0); -} - -static int is_bl_op(unsigned int op) -{ - return (op & 0xfc000003) == 0x48000001; -} - -static unsigned long find_bl_target(unsigned long ip, unsigned int op) -{ - static int offset; - - offset = (op & 0x03fffffc); - /* make it signed */ - if (offset & 0x02000000) - offset |= 0xfe000000; - - return ip + (long)offset; -} - -#ifdef CONFIG_PPC64 -static int -__ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned int op; - unsigned int jmp[5]; - unsigned long ptr; - unsigned long ip = rec->ip; - unsigned long tramp; - int offset; - - /* read where this goes */ - if (probe_kernel_read(&op, (void *)ip, sizeof(int))) - return -EFAULT; - - /* Make sure that that this is still a 24bit jump */ - if (!is_bl_op(op)) { - printk(KERN_ERR "Not expected bl: opcode is %x\n", op); - return -EINVAL; - } - - /* lets find where the pointer goes */ - tramp = find_bl_target(ip, op); - - /* - * On PPC64 the trampoline looks like: - * 0x3d, 0x82, 0x00, 0x00, addis r12,r2, - * 0x39, 0x8c, 0x00, 0x00, addi r12,r12, - * Where the bytes 2,3,6 and 7 make up the 32bit offset - * to the TOC that holds the pointer. - * to jump to. - * 0xf8, 0x41, 0x00, 0x28, std r2,40(r1) - * 0xe9, 0x6c, 0x00, 0x20, ld r11,32(r12) - * The actually address is 32 bytes from the offset - * into the TOC. - * 0xe8, 0x4c, 0x00, 0x28, ld r2,40(r12) - */ - - DEBUGP("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc); - - /* Find where the trampoline jumps to */ - if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { - printk(KERN_ERR "Failed to read %lx\n", tramp); - return -EFAULT; - } - - DEBUGP(" %08x %08x", jmp[0], jmp[1]); - - /* verify that this is what we expect it to be */ - if (((jmp[0] & 0xffff0000) != 0x3d820000) || - ((jmp[1] & 0xffff0000) != 0x398c0000) || - (jmp[2] != 0xf8410028) || - (jmp[3] != 0xe96c0020) || - (jmp[4] != 0xe84c0028)) { - printk(KERN_ERR "Not a trampoline\n"); - return -EINVAL; - } - - offset = (unsigned)((unsigned short)jmp[0]) << 16 | - (unsigned)((unsigned short)jmp[1]); - - DEBUGP(" %x ", offset); - - /* get the address this jumps too */ - tramp = mod->arch.toc + offset + 32; - DEBUGP("toc: %lx", tramp); - - if (probe_kernel_read(jmp, (void *)tramp, 8)) { - printk(KERN_ERR "Failed to read %lx\n", tramp); - return -EFAULT; - } - - DEBUGP(" %08x %08x\n", jmp[0], jmp[1]); - - ptr = ((unsigned long)jmp[0] << 32) + jmp[1]; - - /* This should match what was called */ - if (ptr != GET_ADDR(addr)) { - printk(KERN_ERR "addr does not match %lx\n", ptr); - return -EINVAL; - } - - /* - * We want to nop the line, but the next line is - * 0xe8, 0x41, 0x00, 0x28 ld r2,40(r1) - * This needs to be turned to a nop too. - */ - if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) - return -EFAULT; - - if (op != 0xe8410028) { - printk(KERN_ERR "Next line is not ld! (%08x)\n", op); - return -EINVAL; - } - - /* - * Milton Miller pointed out that we can not blindly do nops. - * If a task was preempted when calling a trace function, - * the nops will remove the way to restore the TOC in r2 - * and the r2 TOC will get corrupted. - */ - - /* - * Replace: - * bl <==== will be replaced with "b 1f" - * ld r2,40(r1) - * 1: - */ - op = 0x48000008; /* b +8 */ - - if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) - return -EPERM; - - - flush_icache_range(ip, ip + 8); - - return 0; -} - -#else /* !PPC64 */ -static int -__ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned int op; - unsigned int jmp[4]; - unsigned long ip = rec->ip; - unsigned long tramp; - - if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - - /* Make sure that that this is still a 24bit jump */ - if (!is_bl_op(op)) { - printk(KERN_ERR "Not expected bl: opcode is %x\n", op); - return -EINVAL; - } - - /* lets find where the pointer goes */ - tramp = find_bl_target(ip, op); - - /* - * On PPC32 the trampoline looks like: - * 0x3d, 0x60, 0x00, 0x00 lis r11,sym@ha - * 0x39, 0x6b, 0x00, 0x00 addi r11,r11,sym@l - * 0x7d, 0x69, 0x03, 0xa6 mtctr r11 - * 0x4e, 0x80, 0x04, 0x20 bctr - */ - - DEBUGP("ip:%lx jumps to %lx", ip, tramp); - - /* Find where the trampoline jumps to */ - if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { - printk(KERN_ERR "Failed to read %lx\n", tramp); - return -EFAULT; - } - - DEBUGP(" %08x %08x ", jmp[0], jmp[1]); - - /* verify that this is what we expect it to be */ - if (((jmp[0] & 0xffff0000) != 0x3d600000) || - ((jmp[1] & 0xffff0000) != 0x396b0000) || - (jmp[2] != 0x7d6903a6) || - (jmp[3] != 0x4e800420)) { - printk(KERN_ERR "Not a trampoline\n"); - return -EINVAL; - } - - tramp = (jmp[1] & 0xffff) | - ((jmp[0] & 0xffff) << 16); - if (tramp & 0x8000) - tramp -= 0x10000; - - DEBUGP(" %x ", tramp); - - if (tramp != addr) { - printk(KERN_ERR - "Trampoline location %08lx does not match addr\n", - tramp); - return -EINVAL; - } - - op = PPC_NOP_INSTR; - - if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) - return -EPERM; - - flush_icache_range(ip, ip + 8); - - return 0; -} -#endif /* PPC64 */ - -int ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned char *old, *new; - unsigned long ip = rec->ip; - - /* - * If the calling address is more that 24 bits away, - * then we had to use a trampoline to make the call. - * Otherwise just update the call site. - */ - if (test_24bit_addr(ip, addr)) { - /* within range */ - old = ftrace_call_replace(ip, addr); - new = ftrace_nop_replace(); - return ftrace_modify_code(ip, old, new); - } - - /* - * Out of range jumps are called from modules. - * We should either already have a pointer to the module - * or it has been passed in. - */ - if (!rec->arch.mod) { - if (!mod) { - printk(KERN_ERR "No module loaded addr=%lx\n", - addr); - return -EFAULT; - } - rec->arch.mod = mod; - } else if (mod) { - if (mod != rec->arch.mod) { - printk(KERN_ERR - "Record mod %p not equal to passed in mod %p\n", - rec->arch.mod, mod); - return -EINVAL; - } - /* nothing to do if mod == rec->arch.mod */ - } else - mod = rec->arch.mod; - - return __ftrace_make_nop(mod, rec, addr); - -} - -#ifdef CONFIG_PPC64 -static int -__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned int op[2]; - unsigned long ip = rec->ip; - - /* read where this goes */ - if (probe_kernel_read(op, (void *)ip, MCOUNT_INSN_SIZE * 2)) - return -EFAULT; - - /* - * It should be pointing to two nops or - * b +8; ld r2,40(r1) - */ - if (((op[0] != 0x48000008) || (op[1] != 0xe8410028)) && - ((op[0] != PPC_NOP_INSTR) || (op[1] != PPC_NOP_INSTR))) { - printk(KERN_ERR "Expected NOPs but have %x %x\n", op[0], op[1]); - return -EINVAL; - } - - /* If we never set up a trampoline to ftrace_caller, then bail */ - if (!rec->arch.mod->arch.tramp) { - printk(KERN_ERR "No ftrace trampoline\n"); - return -EINVAL; - } - - /* create the branch to the trampoline */ - op[0] = create_branch((unsigned int *)ip, - rec->arch.mod->arch.tramp, BRANCH_SET_LINK); - if (!op[0]) { - printk(KERN_ERR "REL24 out of range!\n"); - return -EINVAL; - } - - /* ld r2,40(r1) */ - op[1] = 0xe8410028; - - DEBUGP("write to %lx\n", rec->ip); - - if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2)) - return -EPERM; - - flush_icache_range(ip, ip + 8); - - return 0; -} -#else -static int -__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned int op; - unsigned long ip = rec->ip; - - /* read where this goes */ - if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - - /* It should be pointing to a nop */ - if (op != PPC_NOP_INSTR) { - printk(KERN_ERR "Expected NOP but have %x\n", op); - return -EINVAL; - } - - /* If we never set up a trampoline to ftrace_caller, then bail */ - if (!rec->arch.mod->arch.tramp) { - printk(KERN_ERR "No ftrace trampoline\n"); - return -EINVAL; - } - - /* create the branch to the trampoline */ - op = create_branch((unsigned int *)ip, - rec->arch.mod->arch.tramp, BRANCH_SET_LINK); - if (!op) { - printk(KERN_ERR "REL24 out of range!\n"); - return -EINVAL; - } - - DEBUGP("write to %lx\n", rec->ip); - - if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) - return -EPERM; - - flush_icache_range(ip, ip + 8); - - return 0; -} -#endif /* CONFIG_PPC64 */ - -int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned char *old, *new; - unsigned long ip = rec->ip; - - /* - * If the calling address is more that 24 bits away, - * then we had to use a trampoline to make the call. - * Otherwise just update the call site. - */ - if (test_24bit_addr(ip, addr)) { - /* within range */ - old = ftrace_nop_replace(); - new = ftrace_call_replace(ip, addr); - return ftrace_modify_code(ip, old, new); - } - - /* - * Out of range jumps are called from modules. - * Being that we are converting from nop, it had better - * already have a module defined. - */ - if (!rec->arch.mod) { - printk(KERN_ERR "No module loaded\n"); - return -EINVAL; - } - - return __ftrace_make_call(rec, addr); + asm volatile ( + "1: lwz %1, 0(%2)\n" + " cmpw %1, %5\n" + " bne 2f\n" + " stwu %3, 0(%2)\n" + "2:\n" + ".section .fixup, \"ax\"\n" + "3: li %0, 1\n" + " b 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + _ASM_ALIGN "\n" + _ASM_PTR "1b, 3b\n" + ".previous" + : "=r"(faulted), "=r"(replaced) + : "r"(ip), "r"(new), + "0"(faulted), "r"(old) + : "memory"); + + if (replaced != old && replaced != new) + faulted = 2; + + if (!faulted) + flush_icache_range(ip, ip + 8); + + return faulted; } int ftrace_update_ftrace_func(ftrace_func_t func) @@ -509,10 +128,10 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int __init ftrace_dyn_arch_init(void *data) { - /* caller expects data to be zero */ - unsigned long *p = data; + /* This is running in kstop_machine */ - *p = 0; + ftrace_mcount_set(data); return 0; } + diff --git a/trunk/arch/powerpc/kernel/idle.c b/trunk/arch/powerpc/kernel/idle.c index 88d9c1d5e5fb..31982d05d81a 100644 --- a/trunk/arch/powerpc/kernel/idle.c +++ b/trunk/arch/powerpc/kernel/idle.c @@ -69,15 +69,10 @@ void cpu_idle(void) smp_mb(); local_irq_disable(); - /* Don't trace irqs off for idle */ - stop_critical_timings(); - /* check again after disabling irqs */ if (!need_resched() && !cpu_should_die()) ppc_md.power_save(); - start_critical_timings(); - local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); diff --git a/trunk/arch/powerpc/kernel/misc_32.S b/trunk/arch/powerpc/kernel/misc_32.S index 5c33bc14bd9f..bdc8b0e860e5 100644 --- a/trunk/arch/powerpc/kernel/misc_32.S +++ b/trunk/arch/powerpc/kernel/misc_32.S @@ -479,20 +479,17 @@ _GLOBAL(_tlbil_pid) * (no broadcast) */ _GLOBAL(_tlbil_va) - mfmsr r10 - wrteei 0 slwi r4,r4,16 mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ tlbsx 0,r3 mfspr r4,SPRN_MAS1 /* check valid */ andis. r3,r4,MAS1_VALID@h - beq 1f + beqlr rlwinm r4,r4,0,1,31 mtspr SPRN_MAS1,r4 tlbwe msync isync -1: wrtee r10 blr #endif /* CONFIG_FSL_BOOKE */ diff --git a/trunk/arch/powerpc/kernel/module_32.c b/trunk/arch/powerpc/kernel/module_32.c index f832773fc28e..2df91a03462a 100644 --- a/trunk/arch/powerpc/kernel/module_32.c +++ b/trunk/arch/powerpc/kernel/module_32.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -54,9 +53,6 @@ static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num) r_addend = rela[i].r_addend; } -#ifdef CONFIG_DYNAMIC_FTRACE - _count_relocs++; /* add one for ftrace_caller */ -#endif return _count_relocs; } @@ -310,11 +306,5 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, return -ENOEXEC; } } -#ifdef CONFIG_DYNAMIC_FTRACE - module->arch.tramp = - do_plt_call(module->module_core, - (unsigned long)ftrace_caller, - sechdrs, module); -#endif return 0; } diff --git a/trunk/arch/powerpc/kernel/module_64.c b/trunk/arch/powerpc/kernel/module_64.c index 8992b031a7b6..1af2377e4992 100644 --- a/trunk/arch/powerpc/kernel/module_64.c +++ b/trunk/arch/powerpc/kernel/module_64.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -164,11 +163,6 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, } } -#ifdef CONFIG_DYNAMIC_FTRACE - /* make the trampoline to the ftrace_caller */ - relocs++; -#endif - DEBUGP("Looks like a total of %lu stubs, max\n", relocs); return relocs * sizeof(struct ppc64_stub_entry); } @@ -447,12 +441,5 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } } -#ifdef CONFIG_DYNAMIC_FTRACE - me->arch.toc = my_r2(sechdrs, me); - me->arch.tramp = stub_for_addr(sechdrs, - (unsigned long)ftrace_caller, - me); -#endif - return 0; } diff --git a/trunk/arch/powerpc/lib/Makefile b/trunk/arch/powerpc/lib/Makefile index 8db35278a4b4..d69912c07ce7 100644 --- a/trunk/arch/powerpc/lib/Makefile +++ b/trunk/arch/powerpc/lib/Makefile @@ -6,9 +6,6 @@ ifeq ($(CONFIG_PPC64),y) EXTRA_CFLAGS += -mno-minimal-toc endif -CFLAGS_REMOVE_code-patching.o = -pg -CFLAGS_REMOVE_feature-fixups.o = -pg - obj-y := string.o alloc.o \ checksum_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o diff --git a/trunk/arch/powerpc/lib/rheap.c b/trunk/arch/powerpc/lib/rheap.c index 45907c1dae66..29b2941cada0 100644 --- a/trunk/arch/powerpc/lib/rheap.c +++ b/trunk/arch/powerpc/lib/rheap.c @@ -556,7 +556,6 @@ unsigned long rh_alloc_fixed(rh_info_t * info, unsigned long start, int size, co be = blk->start + blk->size; if (s >= bs && e <= be) break; - blk = NULL; } if (blk == NULL) diff --git a/trunk/arch/powerpc/mm/hugetlbpage.c b/trunk/arch/powerpc/mm/hugetlbpage.c index f0c3b88d50fa..7bbf4e4ed430 100644 --- a/trunk/arch/powerpc/mm/hugetlbpage.c +++ b/trunk/arch/powerpc/mm/hugetlbpage.c @@ -507,9 +507,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, { struct hstate *hstate = hstate_file(file); int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); - - if (!mmu_huge_psizes[mmu_psize]) - return -EINVAL; return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); } diff --git a/trunk/arch/powerpc/mm/numa.c b/trunk/arch/powerpc/mm/numa.c index cf81049e1e51..a8397bbad3d4 100644 --- a/trunk/arch/powerpc/mm/numa.c +++ b/trunk/arch/powerpc/mm/numa.c @@ -901,17 +901,10 @@ static void mark_reserved_regions_for_nid(int nid) if (end_pfn > node_ar.end_pfn) reserve_size = (node_ar.end_pfn << PAGE_SHIFT) - (start_pfn << PAGE_SHIFT); - /* - * Only worry about *this* node, others may not - * yet have valid NODE_DATA(). - */ - if (node_ar.nid == nid) { - dbg("reserve_bootmem %lx %lx nid=%d\n", - physbase, reserve_size, node_ar.nid); - reserve_bootmem_node(NODE_DATA(node_ar.nid), - physbase, reserve_size, - BOOTMEM_DEFAULT); - } + dbg("reserve_bootmem %lx %lx nid=%d\n", physbase, + reserve_size, node_ar.nid); + reserve_bootmem_node(NODE_DATA(node_ar.nid), physbase, + reserve_size, BOOTMEM_DEFAULT); /* * if reserved region is contained in the active region * then done. @@ -936,6 +929,7 @@ static void mark_reserved_regions_for_nid(int nid) void __init do_init_bootmem(void) { int nid; + unsigned int i; min_low_pfn = 0; max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; diff --git a/trunk/arch/powerpc/platforms/cell/axon_msi.c b/trunk/arch/powerpc/platforms/cell/axon_msi.c index 0ce45c2b42f8..442cf36aa172 100644 --- a/trunk/arch/powerpc/platforms/cell/axon_msi.c +++ b/trunk/arch/powerpc/platforms/cell/axon_msi.c @@ -413,9 +413,6 @@ static int axon_msi_probe(struct of_device *device, MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE | MSIC_CTRL_FIFO_SIZE); - msic->read_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG) - & MSIC_FIFO_SIZE_MASK; - device->dev.platform_data = msic; ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs; diff --git a/trunk/arch/s390/kernel/topology.c b/trunk/arch/s390/kernel/topology.c index bf96f1b5c6ec..a947899dcba1 100644 --- a/trunk/arch/s390/kernel/topology.c +++ b/trunk/arch/s390/kernel/topology.c @@ -212,7 +212,7 @@ static void update_cpu_core_map(void) cpu_core_map[cpu] = cpu_coregroup_map(cpu); } -int arch_update_cpu_topology(void) +void arch_update_cpu_topology(void) { struct tl_info *info = tl_info; struct sys_device *sysdev; @@ -221,7 +221,7 @@ int arch_update_cpu_topology(void) if (!machine_has_topology) { update_cpu_core_map(); topology_update_polarization_simple(); - return 0; + return; } stsi(info, 15, 1, 2); tl_to_cores(info); @@ -230,7 +230,6 @@ int arch_update_cpu_topology(void) sysdev = get_cpu_sysdev(cpu); kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); } - return 1; } static void topology_work_fn(struct work_struct *work) diff --git a/trunk/arch/sh/Kconfig b/trunk/arch/sh/Kconfig index 5c9cbfc14c4d..80119b3398e7 100644 --- a/trunk/arch/sh/Kconfig +++ b/trunk/arch/sh/Kconfig @@ -55,8 +55,6 @@ config GENERIC_HARDIRQS config GENERIC_HARDIRQS_NO__DO_IRQ def_bool y - depends on SUPERH32 && (!SH_DREAMCAST && !SH_SH4202_MICRODEV && \ - !SH_7751_SYSTEMH && !HD64461) config GENERIC_IRQ_PROBE def_bool y diff --git a/trunk/arch/sh/include/asm/topology.h b/trunk/arch/sh/include/asm/topology.h index 279d9cc4a007..95f0085e098a 100644 --- a/trunk/arch/sh/include/asm/topology.h +++ b/trunk/arch/sh/include/asm/topology.h @@ -5,6 +5,7 @@ /* sched_domains SD_NODE_INIT for sh machines */ #define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/trunk/arch/um/include/asm/system.h b/trunk/arch/um/include/asm/system.h index ae5f94d6317d..753346e2cdfd 100644 --- a/trunk/arch/um/include/asm/system.h +++ b/trunk/arch/um/include/asm/system.h @@ -11,21 +11,21 @@ extern int get_signals(void); extern void block_signals(void); extern void unblock_signals(void); -#define raw_local_save_flags(flags) do { typecheck(unsigned long, flags); \ +#define local_save_flags(flags) do { typecheck(unsigned long, flags); \ (flags) = get_signals(); } while(0) -#define raw_local_irq_restore(flags) do { typecheck(unsigned long, flags); \ +#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \ set_signals(flags); } while(0) -#define raw_local_irq_save(flags) do { raw_local_save_flags(flags); \ - raw_local_irq_disable(); } while(0) +#define local_irq_save(flags) do { local_save_flags(flags); \ + local_irq_disable(); } while(0) -#define raw_local_irq_enable() unblock_signals() -#define raw_local_irq_disable() block_signals() +#define local_irq_enable() unblock_signals() +#define local_irq_disable() block_signals() #define irqs_disabled() \ ({ \ unsigned long flags; \ - raw_local_save_flags(flags); \ + local_save_flags(flags); \ (flags == 0); \ }) diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 0ca2eb7573cd..ac22bb7719f7 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -29,14 +29,11 @@ config X86 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER select HAVE_ARCH_TRACEHOOK select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS - select USER_STACKTRACE_SUPPORT config ARCH_DEFCONFIG string @@ -241,25 +238,6 @@ config X86_HAS_BOOT_CPU_ID def_bool y depends on X86_VOYAGER -config SPARSE_IRQ - bool "Support sparse irq numbering" - depends on PCI_MSI || HT_IRQ - default y - help - This enables support for sparse irq, esp for msi/msi-x. You may need - if you have lots of cards supports msi-x installed. - - If you don't know what to do here, say Y. - -config NUMA_MIGRATE_IRQ_DESC - bool "Move irq desc when changing irq smp_affinity" - depends on SPARSE_IRQ && SMP - default n - help - This enables moving irq_desc to cpu/node that irq will use handled. - - If you don't know what to do here, say N. - config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER @@ -389,10 +367,10 @@ config X86_RDC321X as R-8610-(G). If you don't have one of these chips, you should say N here. -config SCHED_OMIT_FRAME_POINTER +config SCHED_NO_NO_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" - depends on X86 + depends on X86_32 help Calculate simpler /proc//wchan values. If this option is disabled then wchan values will recurse back to the @@ -487,6 +465,10 @@ config X86_CYCLONE_TIMER def_bool y depends on X86_GENERICARCH +config ES7000_CLUSTERED_APIC + def_bool y + depends on SMP && X86_ES7000 && MPENTIUMIII + source "arch/x86/Kconfig.cpu" config HPET_TIMER @@ -600,20 +582,19 @@ config IOMMU_HELPER config MAXSMP bool "Configure Maximum number of SMP Processors and NUMA Nodes" - depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL - select CPUMASK_OFFSTACK + depends on X86_64 && SMP && BROKEN default n help Configure maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. config NR_CPUS - int "Maximum number of CPUs" if SMP && !MAXSMP - range 2 512 if SMP && !MAXSMP - default "1" if !SMP + int "Maximum number of CPUs (2-512)" if !MAXSMP + range 2 512 + depends on SMP default "4096" if MAXSMP - default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000) - default "8" if SMP + default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 + default "8" help This allows you to specify the maximum number of CPUs which this kernel will support. The maximum supported value is 512 and the @@ -1651,6 +1632,13 @@ config APM_ALLOW_INTS many of the newer IBM Thinkpads. If you experience hangs when you suspend, try setting this to Y. Otherwise, say N. +config APM_REAL_MODE_POWER_OFF + bool "Use real mode APM BIOS call to power off" + help + Use real mode APM BIOS calls to switch off the computer. This is + a work-around for a number of buggy BIOSes. Switch this option on if + your computer crashes instead of powering off properly. + endif # APM source "arch/x86/kernel/cpu/cpufreq/Kconfig" diff --git a/trunk/arch/x86/Kconfig.cpu b/trunk/arch/x86/Kconfig.cpu index 85a78575956c..b815664fe370 100644 --- a/trunk/arch/x86/Kconfig.cpu +++ b/trunk/arch/x86/Kconfig.cpu @@ -515,7 +515,6 @@ config CPU_SUP_UMC_32 config X86_DS def_bool X86_PTRACE_BTS depends on X86_DEBUGCTLMSR - select HAVE_HW_BRANCH_TRACER config X86_PTRACE_BTS bool "Branch Trace Store" diff --git a/trunk/arch/x86/Kconfig.debug b/trunk/arch/x86/Kconfig.debug index fa013f529b74..2a3dfbd5e677 100644 --- a/trunk/arch/x86/Kconfig.debug +++ b/trunk/arch/x86/Kconfig.debug @@ -186,10 +186,14 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. +config MMIOTRACE_HOOKS + bool + config MMIOTRACE bool "Memory mapped IO tracing" depends on DEBUG_KERNEL && PCI select TRACING + select MMIOTRACE_HOOKS help Mmiotrace traces Memory Mapped I/O access and is meant for debugging and reverse engineering. It is called from the ioremap diff --git a/trunk/arch/x86/include/asm/apic.h b/trunk/arch/x86/include/asm/apic.h index 25caa0738af5..3b1510b4fc57 100644 --- a/trunk/arch/x86/include/asm/apic.h +++ b/trunk/arch/x86/include/asm/apic.h @@ -193,7 +193,6 @@ extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask); static inline void lapic_shutdown(void) { } #define local_apic_timer_c2_ok 1 static inline void init_apic_mappings(void) { } -static inline void disable_local_APIC(void) { } #endif /* !CONFIG_X86_LOCAL_APIC */ diff --git a/trunk/arch/x86/include/asm/bigsmp/apic.h b/trunk/arch/x86/include/asm/bigsmp/apic.h index d8dd9f537911..1d9543b9d358 100644 --- a/trunk/arch/x86/include/asm/bigsmp/apic.h +++ b/trunk/arch/x86/include/asm/bigsmp/apic.h @@ -9,12 +9,12 @@ static inline int apic_id_registered(void) return (1); } -static inline const cpumask_t *target_cpus(void) +static inline cpumask_t target_cpus(void) { #ifdef CONFIG_SMP - return &cpu_online_map; + return cpu_online_map; #else - return &cpumask_of_cpu(0); + return cpumask_of_cpu(0); #endif } @@ -24,6 +24,8 @@ static inline const cpumask_t *target_cpus(void) #define INT_DELIVERY_MODE (dest_Fixed) #define INT_DEST_MODE (0) /* phys delivery to target proc */ #define NO_BALANCE_IRQ (0) +#define WAKE_SECONDARY_VIA_INIT + static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { @@ -79,7 +81,7 @@ static inline int apicid_to_node(int logical_apicid) static inline int cpu_present_to_apicid(int mps_cpu) { - if (mps_cpu < nr_cpu_ids) + if (mps_cpu < NR_CPUS) return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); return BAD_APICID; @@ -94,7 +96,7 @@ extern u8 cpu_2_logical_apicid[]; /* Mapping from cpu number to logical apicid */ static inline int cpu_to_logical_apicid(int cpu) { - if (cpu >= nr_cpu_ids) + if (cpu >= NR_CPUS) return BAD_APICID; return cpu_physical_id(cpu); } @@ -119,34 +121,16 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) } /* As we are using single CPU as destination, pick only one CPU here */ -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; int apicid; - cpu = first_cpu(*cpumask); + cpu = first_cpu(cpumask); apicid = cpu_to_logical_apicid(cpu); return apicid; } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - if (cpu < nr_cpu_ids) - return cpu_to_logical_apicid(cpu); - - return BAD_APICID; -} - static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; diff --git a/trunk/arch/x86/include/asm/bigsmp/ipi.h b/trunk/arch/x86/include/asm/bigsmp/ipi.h index 27fcd01b3ae6..9404c535b7ec 100644 --- a/trunk/arch/x86/include/asm/bigsmp/ipi.h +++ b/trunk/arch/x86/include/asm/bigsmp/ipi.h @@ -1,22 +1,25 @@ #ifndef __ASM_MACH_IPI_H #define __ASM_MACH_IPI_H -void send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void send_IPI_mask_sequence(cpumask_t mask, int vector); -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_mask, vector); + send_IPI_mask(cpu_online_map, vector); } #endif /* __ASM_MACH_IPI_H */ diff --git a/trunk/arch/x86/include/asm/desc.h b/trunk/arch/x86/include/asm/desc.h index dc27705f5443..e6b82b17b072 100644 --- a/trunk/arch/x86/include/asm/desc.h +++ b/trunk/arch/x86/include/asm/desc.h @@ -320,14 +320,16 @@ static inline void set_intr_gate(unsigned int n, void *addr) _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); } +#define SYS_VECTOR_FREE 0 +#define SYS_VECTOR_ALLOCED 1 + extern int first_system_vector; -/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ -extern unsigned long used_vectors[]; +extern char system_vectors[]; static inline void alloc_system_vector(int vector) { - if (!test_bit(vector, used_vectors)) { - set_bit(vector, used_vectors); + if (system_vectors[vector] == SYS_VECTOR_FREE) { + system_vectors[vector] = SYS_VECTOR_ALLOCED; if (first_system_vector > vector) first_system_vector = vector; } else diff --git a/trunk/arch/x86/include/asm/ds.h b/trunk/arch/x86/include/asm/ds.h index 99b6c39774a4..a95008457ea4 100644 --- a/trunk/arch/x86/include/asm/ds.h +++ b/trunk/arch/x86/include/asm/ds.h @@ -7,12 +7,13 @@ * * It manages: * - per-thread and per-cpu allocation of BTS and PEBS - * - buffer overflow handling (to be done) + * - buffer memory allocation (optional) + * - buffer overflow handling * - buffer access * * It assumes: - * - get_task_struct on all traced tasks - * - current is allowed to trace tasks + * - get_task_struct on all parameter tasks + * - current is allowed to trace parameter tasks * * * Copyright (C) 2007-2008 Intel Corporation. @@ -25,18 +26,11 @@ #include #include -#include #ifdef CONFIG_X86_DS struct task_struct; -struct ds_tracer; -struct bts_tracer; -struct pebs_tracer; - -typedef void (*bts_ovfl_callback_t)(struct bts_tracer *); -typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); /* * Request BTS or PEBS @@ -44,62 +38,60 @@ typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); * Due to alignement constraints, the actual buffer may be slightly * smaller than the requested or provided buffer. * - * Returns a pointer to a tracer structure on success, or - * ERR_PTR(errcode) on failure. - * - * The interrupt threshold is independent from the overflow callback - * to allow users to use their own overflow interrupt handling mechanism. + * Returns 0 on success; -Eerrno otherwise * * task: the task to request recording for; * NULL for per-cpu recording on the current cpu * base: the base pointer for the (non-pageable) buffer; - * size: the size of the provided buffer in bytes + * NULL if buffer allocation requested + * size: the size of the requested or provided buffer * ovfl: pointer to a function to be called on buffer overflow; * NULL if cyclic buffer requested - * th: the interrupt threshold in records from the end of the buffer; - * -1 if no interrupt threshold is requested. */ -extern struct bts_tracer *ds_request_bts(struct task_struct *task, - void *base, size_t size, - bts_ovfl_callback_t ovfl, size_t th); -extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, - void *base, size_t size, - pebs_ovfl_callback_t ovfl, - size_t th); +typedef void (*ds_ovfl_callback_t)(struct task_struct *); +extern int ds_request_bts(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl); +extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl); /* * Release BTS or PEBS resources * + * Frees buffers allocated on ds_request. + * * Returns 0 on success; -Eerrno otherwise * - * tracer: the tracer handle returned from ds_request_~() + * task: the task to release resources for; + * NULL to release resources for the current cpu */ -extern int ds_release_bts(struct bts_tracer *tracer); -extern int ds_release_pebs(struct pebs_tracer *tracer); +extern int ds_release_bts(struct task_struct *task); +extern int ds_release_pebs(struct task_struct *task); /* - * Get the (array) index of the write pointer. + * Return the (array) index of the write pointer. * (assuming an array of BTS/PEBS records) * - * Returns 0 on success; -Eerrno on error + * Returns -Eerrno on error * - * tracer: the tracer handle returned from ds_request_~() - * pos (out): will hold the result + * task: the task to access; + * NULL to access the current cpu + * pos (out): if not NULL, will hold the result */ -extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos); -extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos); +extern int ds_get_bts_index(struct task_struct *task, size_t *pos); +extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); /* - * Get the (array) index one record beyond the end of the array. + * Return the (array) index one record beyond the end of the array. * (assuming an array of BTS/PEBS records) * - * Returns 0 on success; -Eerrno on error + * Returns -Eerrno on error * - * tracer: the tracer handle returned from ds_request_~() - * pos (out): will hold the result + * task: the task to access; + * NULL to access the current cpu + * pos (out): if not NULL, will hold the result */ -extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos); -extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos); +extern int ds_get_bts_end(struct task_struct *task, size_t *pos); +extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); /* * Provide a pointer to the BTS/PEBS record at parameter index. @@ -110,13 +102,14 @@ extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos); * * Returns the size of a single record on success; -Eerrno on error * - * tracer: the tracer handle returned from ds_request_~() + * task: the task to access; + * NULL to access the current cpu * index: the index of the requested record * record (out): pointer to the requested record */ -extern int ds_access_bts(struct bts_tracer *tracer, +extern int ds_access_bts(struct task_struct *task, size_t index, const void **record); -extern int ds_access_pebs(struct pebs_tracer *tracer, +extern int ds_access_pebs(struct task_struct *task, size_t index, const void **record); /* @@ -136,24 +129,38 @@ extern int ds_access_pebs(struct pebs_tracer *tracer, * * Returns the number of bytes written or -Eerrno. * - * tracer: the tracer handle returned from ds_request_~() + * task: the task to access; + * NULL to access the current cpu * buffer: the buffer to write * size: the size of the buffer */ -extern int ds_write_bts(struct bts_tracer *tracer, +extern int ds_write_bts(struct task_struct *task, const void *buffer, size_t size); -extern int ds_write_pebs(struct pebs_tracer *tracer, +extern int ds_write_pebs(struct task_struct *task, const void *buffer, size_t size); +/* + * Same as ds_write_bts/pebs, but omit ownership checks. + * + * This is needed to have some other task than the owner of the + * BTS/PEBS buffer or the parameter task itself write into the + * respective buffer. + */ +extern int ds_unchecked_write_bts(struct task_struct *task, + const void *buffer, size_t size); +extern int ds_unchecked_write_pebs(struct task_struct *task, + const void *buffer, size_t size); + /* * Reset the write pointer of the BTS/PEBS buffer. * * Returns 0 on success; -Eerrno on error * - * tracer: the tracer handle returned from ds_request_~() + * task: the task to access; + * NULL to access the current cpu */ -extern int ds_reset_bts(struct bts_tracer *tracer); -extern int ds_reset_pebs(struct pebs_tracer *tracer); +extern int ds_reset_bts(struct task_struct *task); +extern int ds_reset_pebs(struct task_struct *task); /* * Clear the BTS/PEBS buffer and reset the write pointer. @@ -161,30 +168,33 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer); * * Returns 0 on success; -Eerrno on error * - * tracer: the tracer handle returned from ds_request_~() + * task: the task to access; + * NULL to access the current cpu */ -extern int ds_clear_bts(struct bts_tracer *tracer); -extern int ds_clear_pebs(struct pebs_tracer *tracer); +extern int ds_clear_bts(struct task_struct *task); +extern int ds_clear_pebs(struct task_struct *task); /* * Provide the PEBS counter reset value. * * Returns 0 on success; -Eerrno on error * - * tracer: the tracer handle returned from ds_request_pebs() + * task: the task to access; + * NULL to access the current cpu * value (out): the counter reset value */ -extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value); +extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); /* * Set the PEBS counter reset value. * * Returns 0 on success; -Eerrno on error * - * tracer: the tracer handle returned from ds_request_pebs() + * task: the task to access; + * NULL to access the current cpu * value: the new counter reset value */ -extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); +extern int ds_set_pebs_reset(struct task_struct *task, u64 value); /* * Initialization @@ -197,13 +207,17 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); /* * The DS context - part of struct thread_struct. */ -#define MAX_SIZEOF_DS (12 * 8) - struct ds_context { /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ - unsigned char ds[MAX_SIZEOF_DS]; + unsigned char *ds; /* the owner of the BTS and PEBS configuration, respectively */ - struct ds_tracer *owner[2]; + struct task_struct *owner[2]; + /* buffer overflow notification function for BTS and PEBS */ + ds_ovfl_callback_t callback[2]; + /* the original buffer address */ + void *buffer[2]; + /* the number of allocated pages for on-request allocated buffers */ + unsigned int pages[2]; /* use count */ unsigned long count; /* a pointer to the context location inside the thread_struct diff --git a/trunk/arch/x86/include/asm/emergency-restart.h b/trunk/arch/x86/include/asm/emergency-restart.h index cc70c1c78ca4..94826cf87455 100644 --- a/trunk/arch/x86/include/asm/emergency-restart.h +++ b/trunk/arch/x86/include/asm/emergency-restart.h @@ -8,9 +8,7 @@ enum reboot_type { BOOT_BIOS = 'b', #endif BOOT_ACPI = 'a', - BOOT_EFI = 'e', - BOOT_CF9 = 'p', - BOOT_CF9_COND = 'q', + BOOT_EFI = 'e' }; extern enum reboot_type reboot_type; diff --git a/trunk/arch/x86/include/asm/es7000/apic.h b/trunk/arch/x86/include/asm/es7000/apic.h index 51ac1230294e..380f0b4f17ed 100644 --- a/trunk/arch/x86/include/asm/es7000/apic.h +++ b/trunk/arch/x86/include/asm/es7000/apic.h @@ -9,27 +9,31 @@ static inline int apic_id_registered(void) return (1); } -static inline const cpumask_t *target_cpus_cluster(void) +static inline cpumask_t target_cpus(void) { - return &CPU_MASK_ALL; -} - -static inline const cpumask_t *target_cpus(void) -{ - return &cpumask_of_cpu(smp_processor_id()); +#if defined CONFIG_ES7000_CLUSTERED_APIC + return CPU_MASK_ALL; +#else + return cpumask_of_cpu(smp_processor_id()); +#endif } -#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) -#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) -#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ -#define NO_BALANCE_IRQ_CLUSTER (1) - +#if defined CONFIG_ES7000_CLUSTERED_APIC +#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) +#define INT_DELIVERY_MODE (dest_LowestPrio) +#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ +#define NO_BALANCE_IRQ (1) +#undef WAKE_SECONDARY_VIA_INIT +#define WAKE_SECONDARY_VIA_MIP +#else #define APIC_DFR_VALUE (APIC_DFR_FLAT) #define INT_DELIVERY_MODE (dest_Fixed) #define INT_DEST_MODE (0) /* phys delivery to target procs */ #define NO_BALANCE_IRQ (0) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0x0 +#define WAKE_SECONDARY_VIA_INIT +#endif static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { @@ -56,16 +60,6 @@ static inline unsigned long calculate_ldr(int cpu) * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ -static inline void init_apic_ldr_cluster(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - static inline void init_apic_ldr(void) { unsigned long val; @@ -76,14 +70,17 @@ static inline void init_apic_ldr(void) apic_write(APIC_LDR, val); } +#ifndef CONFIG_X86_GENERICARCH +extern void enable_apic_mode(void); +#endif + extern int apic_version [MAX_APICS]; static inline void setup_apic_routing(void) { int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", (apic_version[apic] == 0x14) ? - "Physical Cluster" : "Logical Cluster", - nr_ioapics, cpus_addr(*target_cpus())[0]); + "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]); } static inline int multi_timer_check(int apic, int irq) @@ -101,7 +98,7 @@ static inline int cpu_present_to_apicid(int mps_cpu) { if (!mps_cpu) return boot_cpu_physical_apicid; - else if (mps_cpu < nr_cpu_ids) + else if (mps_cpu < NR_CPUS) return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); else return BAD_APICID; @@ -121,9 +118,9 @@ extern u8 cpu_2_logical_apicid[]; static inline int cpu_to_logical_apicid(int cpu) { #ifdef CONFIG_SMP - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; + if (cpu >= NR_CPUS) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; #else return logical_smp_processor_id(); #endif @@ -147,114 +144,44 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid) return (1); } -static inline unsigned int -cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpumask_weight(cpumask); + num_bits_set = cpus_weight(cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) +#if defined CONFIG_ES7000_CLUSTERED_APIC return 0xFF; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. - */ - cpu = cpumask_first(cpumask); - apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ - printk ("%s: Not a valid mask!\n", __func__); - return 0xFF; - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - int num_bits_set; - int cpus_found = 0; - int cpu; - int apicid; - - num_bits_set = cpus_weight(*cpumask); - /* Return id to all */ - if (num_bits_set == NR_CPUS) +#else return cpu_to_logical_apicid(0); +#endif /* * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = first_cpu(*cpumask); - apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ - printk ("%s: Not a valid mask!\n", __func__); - return cpu_to_logical_apicid(0); - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - - -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int num_bits_set; - int cpus_found = 0; - int cpu; - int apicid = cpu_to_logical_apicid(0); - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - - num_bits_set = cpumask_weight(cpumask); - /* Return id to all */ - if (num_bits_set == NR_CPUS) - goto exit; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. - */ - cpu = cpumask_first(cpumask); + cpu = first_cpu(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { + if (cpu_isset(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ printk ("%s: Not a valid mask!\n", __func__); +#if defined CONFIG_ES7000_CLUSTERED_APIC + return 0xFF; +#else return cpu_to_logical_apicid(0); +#endif } apicid = new_apicid; cpus_found++; } cpu++; } -exit: - free_cpumask_var(cpumask); return apicid; } diff --git a/trunk/arch/x86/include/asm/es7000/ipi.h b/trunk/arch/x86/include/asm/es7000/ipi.h index 7e8ed24d4b8a..632a955fcc0a 100644 --- a/trunk/arch/x86/include/asm/es7000/ipi.h +++ b/trunk/arch/x86/include/asm/es7000/ipi.h @@ -1,22 +1,24 @@ #ifndef __ASM_ES7000_IPI_H #define __ASM_ES7000_IPI_H -void send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void send_IPI_mask_sequence(cpumask_t mask, int vector); -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_mask, vector); + send_IPI_mask(cpu_online_map, vector); } #endif /* __ASM_ES7000_IPI_H */ diff --git a/trunk/arch/x86/include/asm/es7000/wakecpu.h b/trunk/arch/x86/include/asm/es7000/wakecpu.h index 78f0daaee436..398493461913 100644 --- a/trunk/arch/x86/include/asm/es7000/wakecpu.h +++ b/trunk/arch/x86/include/asm/es7000/wakecpu.h @@ -1,12 +1,36 @@ #ifndef __ASM_ES7000_WAKECPU_H #define __ASM_ES7000_WAKECPU_H -#define TRAMPOLINE_PHYS_LOW 0x467 -#define TRAMPOLINE_PHYS_HIGH 0x469 +/* + * This file copes with machines that wakeup secondary CPUs by the + * INIT, INIT, STARTUP sequence. + */ + +#ifdef CONFIG_ES7000_CLUSTERED_APIC +#define WAKE_SECONDARY_VIA_MIP +#else +#define WAKE_SECONDARY_VIA_INIT +#endif + +#ifdef WAKE_SECONDARY_VIA_MIP +extern int es7000_start_cpu(int cpu, unsigned long eip); +static inline int +wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) +{ + int boot_error = 0; + boot_error = es7000_start_cpu(phys_apicid, start_eip); + return boot_error; +} +#endif + +#define TRAMPOLINE_LOW phys_to_virt(0x467) +#define TRAMPOLINE_HIGH phys_to_virt(0x469) + +#define boot_cpu_apicid boot_cpu_physical_apicid static inline void wait_for_init_deassert(atomic_t *deassert) { -#ifndef CONFIG_ES7000_CLUSTERED_APIC +#ifdef WAKE_SECONDARY_VIA_INIT while (!atomic_read(deassert)) cpu_relax(); #endif @@ -26,12 +50,9 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { } -extern void __inquire_remote_apic(int apicid); - -static inline void inquire_remote_apic(int apicid) -{ - if (apic_verbosity >= APIC_DEBUG) - __inquire_remote_apic(apicid); -} +#define inquire_remote_apic(apicid) do { \ + if (apic_verbosity >= APIC_DEBUG) \ + __inquire_remote_apic(apicid); \ + } while (0) #endif /* __ASM_MACH_WAKECPU_H */ diff --git a/trunk/arch/x86/include/asm/ftrace.h b/trunk/arch/x86/include/asm/ftrace.h index 7e61b4ceb9a4..9e8bc29b8b17 100644 --- a/trunk/arch/x86/include/asm/ftrace.h +++ b/trunk/arch/x86/include/asm/ftrace.h @@ -17,40 +17,8 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) */ return addr - 1; } +#endif -#ifdef CONFIG_DYNAMIC_FTRACE - -struct dyn_arch_ftrace { - /* No extra data needed for x86 */ -}; - -#endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - -#ifndef __ASSEMBLY__ - -/* - * Stack of return addresses for functions - * of a thread. - * Used in struct thread_info - */ -struct ftrace_ret_stack { - unsigned long ret; - unsigned long func; - unsigned long long calltime; -}; - -/* - * Primary handler of a function return. - * It relays on ftrace_return_to_handler. - * Defined in entry32.S - */ -extern void return_to_handler(void); - -#endif /* __ASSEMBLY__ */ -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - #endif /* _ASM_X86_FTRACE_H */ diff --git a/trunk/arch/x86/include/asm/genapic_32.h b/trunk/arch/x86/include/asm/genapic_32.h index 746f37a7963a..5cbd4fcc06fd 100644 --- a/trunk/arch/x86/include/asm/genapic_32.h +++ b/trunk/arch/x86/include/asm/genapic_32.h @@ -2,7 +2,6 @@ #define _ASM_X86_GENAPIC_32_H #include -#include /* * Generic APIC driver interface. @@ -24,7 +23,7 @@ struct genapic { int (*probe)(void); int (*apic_id_registered)(void); - const struct cpumask *(*target_cpus)(void); + cpumask_t (*target_cpus)(void); int int_delivery_mode; int int_dest_mode; int ESR_DISABLE; @@ -57,27 +56,15 @@ struct genapic { unsigned (*get_apic_id)(unsigned long x); unsigned long apic_id_mask; - unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask); - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); + cpumask_t (*vector_allocation_domain)(int cpu); #ifdef CONFIG_SMP /* ipi */ - void (*send_IPI_mask)(const struct cpumask *mask, int vector); - void (*send_IPI_mask_allbutself)(const struct cpumask *mask, - int vector); + void (*send_IPI_mask)(cpumask_t mask, int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); #endif - int (*wakeup_cpu)(int apicid, unsigned long start_eip); - int trampoline_phys_low; - int trampoline_phys_high; - void (*wait_for_init_deassert)(atomic_t *deassert); - void (*smp_callin_clear_local_apic)(void); - void (*store_NMI_vector)(unsigned short *high, unsigned short *low); - void (*restore_NMI_vector)(unsigned short *high, unsigned short *low); - void (*inquire_remote_apic)(int apicid); }; #define APICFUNC(x) .x = x, @@ -118,25 +105,16 @@ struct genapic { APICFUNC(get_apic_id) \ .apic_id_mask = APIC_ID_MASK, \ APICFUNC(cpu_mask_to_apicid) \ - APICFUNC(cpu_mask_to_apicid_and) \ - APICFUNC(vector_allocation_domain) \ + APICFUNC(vector_allocation_domain) \ APICFUNC(acpi_madt_oem_check) \ IPIFUNC(send_IPI_mask) \ IPIFUNC(send_IPI_allbutself) \ IPIFUNC(send_IPI_all) \ APICFUNC(enable_apic_mode) \ APICFUNC(phys_pkg_id) \ - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \ - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \ - APICFUNC(wait_for_init_deassert) \ - APICFUNC(smp_callin_clear_local_apic) \ - APICFUNC(store_NMI_vector) \ - APICFUNC(restore_NMI_vector) \ - APICFUNC(inquire_remote_apic) \ } extern struct genapic *genapic; -extern void es7000_update_genapic_to_cluster(void); enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; #define get_uv_system_type() UV_NONE diff --git a/trunk/arch/x86/include/asm/genapic_64.h b/trunk/arch/x86/include/asm/genapic_64.h index adf32fb56aa6..13c4e96199ea 100644 --- a/trunk/arch/x86/include/asm/genapic_64.h +++ b/trunk/arch/x86/include/asm/genapic_64.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_GENAPIC_64_H #define _ASM_X86_GENAPIC_64_H -#include - /* * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 @@ -20,26 +18,20 @@ struct genapic { u32 int_delivery_mode; u32 int_dest_mode; int (*apic_id_registered)(void); - const struct cpumask *(*target_cpus)(void); - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + cpumask_t (*target_cpus)(void); + cpumask_t (*vector_allocation_domain)(int cpu); void (*init_apic_ldr)(void); /* ipi */ - void (*send_IPI_mask)(const struct cpumask *mask, int vector); - void (*send_IPI_mask_allbutself)(const struct cpumask *mask, - int vector); + void (*send_IPI_mask)(cpumask_t mask, int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); void (*send_IPI_self)(int vector); /* */ - unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask); + unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); unsigned int (*phys_pkg_id)(int index_msb); unsigned int (*get_apic_id)(unsigned long x); unsigned long (*set_apic_id)(unsigned int id); unsigned long apic_id_mask; - /* wakeup_secondary_cpu */ - int (*wakeup_cpu)(int apicid, unsigned long start_eip); }; extern struct genapic *genapic; diff --git a/trunk/arch/x86/include/asm/io_apic.h b/trunk/arch/x86/include/asm/io_apic.h index 25d527ca1362..6afd9933a7dd 100644 --- a/trunk/arch/x86/include/asm/io_apic.h +++ b/trunk/arch/x86/include/asm/io_apic.h @@ -188,14 +188,17 @@ extern void restore_IO_APIC_setup(void); extern void reinit_intr_remapped_IO_APIC(int); #endif -extern void probe_nr_irqs_gsi(void); +extern int probe_nr_irqs(void); #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 static const int timer_through_8259 = 0; -static inline void ioapic_init_mappings(void) { } +static inline void ioapic_init_mappings(void) { } -static inline void probe_nr_irqs_gsi(void) { } +static inline int probe_nr_irqs(void) +{ + return NR_IRQS; +} #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/trunk/arch/x86/include/asm/ipi.h b/trunk/arch/x86/include/asm/ipi.h index c745a306f7d3..f89dffb28aa9 100644 --- a/trunk/arch/x86/include/asm/ipi.h +++ b/trunk/arch/x86/include/asm/ipi.h @@ -117,8 +117,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, native_apic_mem_write(APIC_ICR, cfg); } -static inline void send_IPI_mask_sequence(const struct cpumask *mask, - int vector) +static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) { unsigned long flags; unsigned long query_cpu; @@ -129,29 +128,11 @@ static inline void send_IPI_mask_sequence(const struct cpumask *mask, * - mbligh */ local_irq_save(flags); - for_each_cpu(query_cpu, mask) { + for_each_cpu_mask_nr(query_cpu, mask) { __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } -static inline void send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) -{ - unsigned long flags; - unsigned int query_cpu; - unsigned int this_cpu = smp_processor_id(); - - /* See Hack comment above */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) - __send_IPI_dest_field( - per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); - local_irq_restore(flags); -} - #endif /* _ASM_X86_IPI_H */ diff --git a/trunk/arch/x86/include/asm/irq.h b/trunk/arch/x86/include/asm/irq.h index 4bb732e45a85..bae0eda95486 100644 --- a/trunk/arch/x86/include/asm/irq.h +++ b/trunk/arch/x86/include/asm/irq.h @@ -37,7 +37,7 @@ extern int irqbalance_disable(char *str); #ifdef CONFIG_HOTPLUG_CPU #include -extern void fixup_irqs(void); +extern void fixup_irqs(cpumask_t map); #endif extern unsigned int do_IRQ(struct pt_regs *regs); @@ -46,6 +46,5 @@ extern void native_init_IRQ(void); /* Interrupt vector management */ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); -extern int vector_used_by_percpu_irq(unsigned int vector); #endif /* _ASM_X86_IRQ_H */ diff --git a/trunk/arch/x86/include/asm/irq_vectors.h b/trunk/arch/x86/include/asm/irq_vectors.h index f7ff65032b9d..0005adb0f941 100644 --- a/trunk/arch/x86/include/asm/irq_vectors.h +++ b/trunk/arch/x86/include/asm/irq_vectors.h @@ -101,23 +101,12 @@ #define LAST_VM86_IRQ 15 #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) -#define NR_IRQS_LEGACY 16 - #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) - -#ifndef CONFIG_SPARSE_IRQ # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif -#else -# if (8 * NR_CPUS) > (32 * MAX_IO_APICS) -# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) -# else -# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) -# endif -#endif #elif defined(CONFIG_X86_VOYAGER) diff --git a/trunk/arch/x86/include/asm/kexec.h b/trunk/arch/x86/include/asm/kexec.h index c61d8b2ab8b9..a1f22771a15a 100644 --- a/trunk/arch/x86/include/asm/kexec.h +++ b/trunk/arch/x86/include/asm/kexec.h @@ -5,8 +5,21 @@ # define PA_CONTROL_PAGE 0 # define VA_CONTROL_PAGE 1 # define PA_PGD 2 -# define PA_SWAP_PAGE 3 -# define PAGES_NR 4 +# define VA_PGD 3 +# define PA_PTE_0 4 +# define VA_PTE_0 5 +# define PA_PTE_1 6 +# define VA_PTE_1 7 +# define PA_SWAP_PAGE 8 +# ifdef CONFIG_X86_PAE +# define PA_PMD_0 9 +# define VA_PMD_0 10 +# define PA_PMD_1 11 +# define VA_PMD_1 12 +# define PAGES_NR 13 +# else +# define PAGES_NR 9 +# endif #else # define PA_CONTROL_PAGE 0 # define VA_CONTROL_PAGE 1 @@ -157,20 +170,6 @@ relocate_kernel(unsigned long indirection_page, unsigned long start_address) ATTRIB_NORET; #endif -#ifdef CONFIG_X86_32 -#define ARCH_HAS_KIMAGE_ARCH - -struct kimage_arch { - pgd_t *pgd; -#ifdef CONFIG_X86_PAE - pmd_t *pmd0; - pmd_t *pmd1; -#endif - pte_t *pte0; - pte_t *pte1; -}; -#endif - #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_KEXEC_H */ diff --git a/trunk/arch/x86/include/asm/mach-default/mach_apic.h b/trunk/arch/x86/include/asm/mach-default/mach_apic.h index cc09cbbee27e..ff3a6c236c00 100644 --- a/trunk/arch/x86/include/asm/mach-default/mach_apic.h +++ b/trunk/arch/x86/include/asm/mach-default/mach_apic.h @@ -8,12 +8,12 @@ #define APIC_DFR_VALUE (APIC_DFR_FLAT) -static inline const struct cpumask *target_cpus(void) +static inline cpumask_t target_cpus(void) { #ifdef CONFIG_SMP - return cpu_online_mask; + return cpu_online_map; #else - return cpumask_of(0); + return cpumask_of_cpu(0); #endif } @@ -28,18 +28,15 @@ static inline const struct cpumask *target_cpus(void) #define apic_id_registered (genapic->apic_id_registered) #define init_apic_ldr (genapic->init_apic_ldr) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) -#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) #define phys_pkg_id (genapic->phys_pkg_id) #define vector_allocation_domain (genapic->vector_allocation_domain) #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) #define send_IPI_self (genapic->send_IPI_self) -#define wakeup_secondary_cpu (genapic->wakeup_cpu) extern void setup_apic_routing(void); #else #define INT_DELIVERY_MODE dest_LowestPrio #define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ #define TARGET_CPUS (target_cpus()) -#define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* * Set up the logical destination ID. * @@ -62,19 +59,9 @@ static inline int apic_id_registered(void) return physid_isset(read_apic_id(), phys_cpu_present_map); } -static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) { - return cpumask_bits(cpumask)[0]; -} - -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - unsigned long mask1 = cpumask_bits(cpumask)[0]; - unsigned long mask2 = cpumask_bits(andmask)[0]; - unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; - - return (unsigned int)(mask1 & mask2 & mask3); + return cpus_addr(cpumask)[0]; } static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) @@ -99,7 +86,7 @@ static inline int apicid_to_node(int logical_apicid) #endif } -static inline void vector_allocation_domain(int cpu, struct cpumask *retmask) +static inline cpumask_t vector_allocation_domain(int cpu) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -109,7 +96,8 @@ static inline void vector_allocation_domain(int cpu, struct cpumask *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; } #endif @@ -141,7 +129,7 @@ static inline int cpu_to_logical_apicid(int cpu) static inline int cpu_present_to_apicid(int mps_cpu) { - if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) + if (mps_cpu < NR_CPUS && cpu_present(mps_cpu)) return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); else return BAD_APICID; diff --git a/trunk/arch/x86/include/asm/mach-default/mach_ipi.h b/trunk/arch/x86/include/asm/mach-default/mach_ipi.h index 191312d155da..fabca01ebacf 100644 --- a/trunk/arch/x86/include/asm/mach-default/mach_ipi.h +++ b/trunk/arch/x86/include/asm/mach-default/mach_ipi.h @@ -4,8 +4,7 @@ /* Avoid include hell */ #define NMI_VECTOR 0x02 -void send_IPI_mask_bitmask(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void send_IPI_mask_bitmask(cpumask_t mask, int vector); void __send_IPI_shortcut(unsigned int shortcut, int vector); extern int no_broadcast; @@ -13,27 +12,28 @@ extern int no_broadcast; #ifdef CONFIG_X86_64 #include #define send_IPI_mask (genapic->send_IPI_mask) -#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself) #else -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_bitmask(mask, vector); } -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); #endif static inline void __local_send_IPI_allbutself(int vector) { - if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask_allbutself(cpu_online_mask, vector); - else + if (no_broadcast || vector == NMI_VECTOR) { + cpumask_t mask = cpu_online_map; + + cpu_clear(smp_processor_id(), mask); + send_IPI_mask(mask, vector); + } else __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); } static inline void __local_send_IPI_all(int vector) { if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask(cpu_online_mask, vector); + send_IPI_mask(cpu_online_map, vector); else __send_IPI_shortcut(APIC_DEST_ALLINC, vector); } diff --git a/trunk/arch/x86/include/asm/mach-default/mach_wakecpu.h b/trunk/arch/x86/include/asm/mach-default/mach_wakecpu.h index ceb013660146..9d80db91e992 100644 --- a/trunk/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/trunk/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -1,8 +1,17 @@ #ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H #define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H -#define TRAMPOLINE_PHYS_LOW (0x467) -#define TRAMPOLINE_PHYS_HIGH (0x469) +/* + * This file copes with machines that wakeup secondary CPUs by the + * INIT, INIT, STARTUP sequence. + */ + +#define WAKE_SECONDARY_VIA_INIT + +#define TRAMPOLINE_LOW phys_to_virt(0x467) +#define TRAMPOLINE_HIGH phys_to_virt(0x469) + +#define boot_cpu_apicid boot_cpu_physical_apicid static inline void wait_for_init_deassert(atomic_t *deassert) { @@ -24,12 +33,9 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { } -extern void __inquire_remote_apic(int apicid); - -static inline void inquire_remote_apic(int apicid) -{ - if (apic_verbosity >= APIC_DEBUG) - __inquire_remote_apic(apicid); -} +#define inquire_remote_apic(apicid) do { \ + if (apic_verbosity >= APIC_DEBUG) \ + __inquire_remote_apic(apicid); \ + } while (0) #endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ diff --git a/trunk/arch/x86/include/asm/mach-default/smpboot_hooks.h b/trunk/arch/x86/include/asm/mach-default/smpboot_hooks.h index 23bf52103b89..dbab36d64d48 100644 --- a/trunk/arch/x86/include/asm/mach-default/smpboot_hooks.h +++ b/trunk/arch/x86/include/asm/mach-default/smpboot_hooks.h @@ -13,11 +13,9 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) CMOS_WRITE(0xa, 0xf); local_flush_tlb(); pr_debug("1.\n"); - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = - start_eip >> 4; + *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; pr_debug("2.\n"); - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = - start_eip & 0xf; + *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; pr_debug("3.\n"); } @@ -34,7 +32,7 @@ static inline void smpboot_restore_warm_reset_vector(void) */ CMOS_WRITE(0, 0xf); - *((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; + *((volatile long *) phys_to_virt(0x467)) = 0; } static inline void __init smpboot_setup_io_apic(void) diff --git a/trunk/arch/x86/include/asm/mach-generic/mach_apic.h b/trunk/arch/x86/include/asm/mach-generic/mach_apic.h index 48553e958ad5..5180bd7478fb 100644 --- a/trunk/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/trunk/arch/x86/include/asm/mach-generic/mach_apic.h @@ -24,11 +24,9 @@ #define check_phys_apicid_present (genapic->check_phys_apicid_present) #define check_apicid_used (genapic->check_apicid_used) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) -#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) #define vector_allocation_domain (genapic->vector_allocation_domain) #define enable_apic_mode (genapic->enable_apic_mode) #define phys_pkg_id (genapic->phys_pkg_id) -#define wakeup_secondary_cpu (genapic->wakeup_cpu) extern void generic_bigsmp_probe(void); diff --git a/trunk/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/trunk/arch/x86/include/asm/mach-generic/mach_wakecpu.h deleted file mode 100644 index 1ab16b168c8a..000000000000 --- a/trunk/arch/x86/include/asm/mach-generic/mach_wakecpu.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H -#define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H - -#define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low) -#define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high) -#define wait_for_init_deassert (genapic->wait_for_init_deassert) -#define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic) -#define store_NMI_vector (genapic->store_NMI_vector) -#define restore_NMI_vector (genapic->restore_NMI_vector) -#define inquire_remote_apic (genapic->inquire_remote_apic) - -#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/trunk/arch/x86/include/asm/numaq/apic.h b/trunk/arch/x86/include/asm/numaq/apic.h index c80f00d29965..0bf2a06b7a4e 100644 --- a/trunk/arch/x86/include/asm/numaq/apic.h +++ b/trunk/arch/x86/include/asm/numaq/apic.h @@ -7,9 +7,9 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline const cpumask_t *target_cpus(void) +static inline cpumask_t target_cpus(void) { - return &CPU_MASK_ALL; + return CPU_MASK_ALL; } #define NO_BALANCE_IRQ (1) @@ -122,13 +122,7 @@ static inline void enable_apic_mode(void) * We use physical apicids here, not logical, so just return the default * physical broadcast to stop people from breaking us */ -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - return (int) 0xF; -} - -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) { return (int) 0xF; } diff --git a/trunk/arch/x86/include/asm/numaq/ipi.h b/trunk/arch/x86/include/asm/numaq/ipi.h index a8374c652778..935588d286cf 100644 --- a/trunk/arch/x86/include/asm/numaq/ipi.h +++ b/trunk/arch/x86/include/asm/numaq/ipi.h @@ -1,22 +1,25 @@ #ifndef __ASM_NUMAQ_IPI_H #define __ASM_NUMAQ_IPI_H -void send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void send_IPI_mask_sequence(cpumask_t, int vector); -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_mask, vector); + send_IPI_mask(cpu_online_map, vector); } #endif /* __ASM_NUMAQ_IPI_H */ diff --git a/trunk/arch/x86/include/asm/numaq/wakecpu.h b/trunk/arch/x86/include/asm/numaq/wakecpu.h index 6f499df8eddb..c577bda5b1c5 100644 --- a/trunk/arch/x86/include/asm/numaq/wakecpu.h +++ b/trunk/arch/x86/include/asm/numaq/wakecpu.h @@ -3,8 +3,12 @@ /* This file copes with machines that wakeup secondary CPUs by NMIs */ -#define TRAMPOLINE_PHYS_LOW (0x8) -#define TRAMPOLINE_PHYS_HIGH (0xa) +#define WAKE_SECONDARY_VIA_NMI + +#define TRAMPOLINE_LOW phys_to_virt(0x8) +#define TRAMPOLINE_HIGH phys_to_virt(0xa) + +#define boot_cpu_apicid boot_cpu_logical_apicid /* We don't do anything here because we use NMI's to boot instead */ static inline void wait_for_init_deassert(atomic_t *deassert) @@ -23,23 +27,17 @@ static inline void smp_callin_clear_local_apic(void) static inline void store_NMI_vector(unsigned short *high, unsigned short *low) { printk("Storing NMI vector\n"); - *high = - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)); - *low = - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)); + *high = *((volatile unsigned short *) TRAMPOLINE_HIGH); + *low = *((volatile unsigned short *) TRAMPOLINE_LOW); } static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { printk("Restoring NMI vector\n"); - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = - *high; - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = - *low; + *((volatile unsigned short *) TRAMPOLINE_HIGH) = *high; + *((volatile unsigned short *) TRAMPOLINE_LOW) = *low; } -static inline void inquire_remote_apic(int apicid) -{ -} +#define inquire_remote_apic(apicid) {} #endif /* __ASM_NUMAQ_WAKECPU_H */ diff --git a/trunk/arch/x86/include/asm/reboot.h b/trunk/arch/x86/include/asm/reboot.h index 562d4fd31ba8..df7710354f85 100644 --- a/trunk/arch/x86/include/asm/reboot.h +++ b/trunk/arch/x86/include/asm/reboot.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_REBOOT_H #define _ASM_X86_REBOOT_H -#include - struct pt_regs; struct machine_ops { @@ -20,7 +18,4 @@ void native_machine_crash_shutdown(struct pt_regs *regs); void native_machine_shutdown(void); void machine_real_restart(const unsigned char *code, int length); -typedef void (*nmi_shootdown_cb)(int, struct die_args*); -void nmi_shootdown_cpus(nmi_shootdown_cb callback); - #endif /* _ASM_X86_REBOOT_H */ diff --git a/trunk/arch/x86/include/asm/setup.h b/trunk/arch/x86/include/asm/setup.h index 294daeb3a006..f12d37237465 100644 --- a/trunk/arch/x86/include/asm/setup.h +++ b/trunk/arch/x86/include/asm/setup.h @@ -16,8 +16,6 @@ static inline void visws_early_detect(void) { } static inline int is_visws_box(void) { return 0; } #endif -extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); -extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip); /* * Any setup quirks to be performed? */ @@ -41,7 +39,6 @@ struct x86_quirks { void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, unsigned short oemsize); int (*setup_ioapic_ids)(void); - int (*update_genapic)(void); }; extern struct x86_quirks *x86_quirks; diff --git a/trunk/arch/x86/include/asm/smp.h b/trunk/arch/x86/include/asm/smp.h index 830b9fcb6427..d12811ce51d9 100644 --- a/trunk/arch/x86/include/asm/smp.h +++ b/trunk/arch/x86/include/asm/smp.h @@ -60,7 +60,7 @@ struct smp_ops { void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); - void (*send_call_func_ipi)(const struct cpumask *mask); + void (*send_call_func_ipi)(cpumask_t mask); void (*send_call_func_single_ipi)(int cpu); }; @@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu) static inline void arch_send_call_function_ipi(cpumask_t mask) { - smp_ops.send_call_func_ipi(&mask); + smp_ops.send_call_func_ipi(mask); } void cpu_disable_common(void); @@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu); void native_play_dead(void); void play_dead_common(void); -void native_send_call_func_ipi(const struct cpumask *mask); +void native_send_call_func_ipi(cpumask_t mask); void native_send_call_func_single_ipi(int cpu); extern void prefill_possible_map(void); diff --git a/trunk/arch/x86/include/asm/summit/apic.h b/trunk/arch/x86/include/asm/summit/apic.h index 99327d1be49f..9b3070f1c2ac 100644 --- a/trunk/arch/x86/include/asm/summit/apic.h +++ b/trunk/arch/x86/include/asm/summit/apic.h @@ -14,13 +14,13 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline const cpumask_t *target_cpus(void) +static inline cpumask_t target_cpus(void) { /* CPU_MASK_ALL (0xff) has undefined behaviour with * dest_LowestPrio mode logical clustered apic interrupt routing * Just start on cpu 0. IRQ balancing will spread load */ - return &cpumask_of_cpu(0); + return cpumask_of_cpu(0); } #define INT_DELIVERY_MODE (dest_LowestPrio) @@ -137,14 +137,14 @@ static inline void enable_apic_mode(void) { } -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpus_weight(*cpumask); + num_bits_set = cpus_weight(cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) return (int) 0xFF; @@ -152,10 +152,10 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = first_cpu(*cpumask); + cpu = first_cpu(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask)) { + if (cpu_isset(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ @@ -170,49 +170,6 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int num_bits_set; - int cpus_found = 0; - int cpu; - int apicid = 0xFF; - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return (int) 0xFF; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - - num_bits_set = cpumask_weight(cpumask); - /* Return id to all */ - if (num_bits_set == nr_cpu_ids) - goto exit; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. - */ - cpu = cpumask_first(cpumask); - apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ - printk ("%s: Not a valid mask!\n", __func__); - return 0xFF; - } - apicid = apicid | new_apicid; - cpus_found++; - } - cpu++; - } -exit: - free_cpumask_var(cpumask); - return apicid; -} - /* cpuid returns the value latched in the HW at reset, not the APIC ID * register's value. For any box whose BIOS changes APIC IDs, like * clustered APIC systems, we must use hard_smp_processor_id. diff --git a/trunk/arch/x86/include/asm/summit/ipi.h b/trunk/arch/x86/include/asm/summit/ipi.h index a8a2c24f50cc..53bd1e7bd7b4 100644 --- a/trunk/arch/x86/include/asm/summit/ipi.h +++ b/trunk/arch/x86/include/asm/summit/ipi.h @@ -1,10 +1,9 @@ #ifndef __ASM_SUMMIT_IPI_H #define __ASM_SUMMIT_IPI_H -void send_IPI_mask_sequence(const cpumask_t *mask, int vector); -void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); +void send_IPI_mask_sequence(cpumask_t mask, int vector); -static inline void send_IPI_mask(const cpumask_t *mask, int vector) +static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_sequence(mask, vector); } @@ -15,12 +14,12 @@ static inline void send_IPI_allbutself(int vector) cpu_clear(smp_processor_id(), mask); if (!cpus_empty(mask)) - send_IPI_mask(&mask, vector); + send_IPI_mask(mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(&cpu_online_map, vector); + send_IPI_mask(cpu_online_map, vector); } #endif /* __ASM_SUMMIT_IPI_H */ diff --git a/trunk/arch/x86/include/asm/system.h b/trunk/arch/x86/include/asm/system.h index 07c3e4048991..2ed3f0f44ff7 100644 --- a/trunk/arch/x86/include/asm/system.h +++ b/trunk/arch/x86/include/asm/system.h @@ -314,8 +314,6 @@ extern void free_init_pages(char *what, unsigned long begin, unsigned long end); void default_idle(void); -void stop_this_cpu(void *dummy); - /* * Force strict CPU ordering. * And yes, this is required on UP too when we're talking diff --git a/trunk/arch/x86/include/asm/thread_info.h b/trunk/arch/x86/include/asm/thread_info.h index 0921b4018c11..e44d379faad2 100644 --- a/trunk/arch/x86/include/asm/thread_info.h +++ b/trunk/arch/x86/include/asm/thread_info.h @@ -20,8 +20,6 @@ struct task_struct; struct exec_domain; #include -#include -#include struct thread_info { struct task_struct *task; /* main task structure */ diff --git a/trunk/arch/x86/include/asm/topology.h b/trunk/arch/x86/include/asm/topology.h index 79e31e9dcdda..ff386ff50ed7 100644 --- a/trunk/arch/x86/include/asm/topology.h +++ b/trunk/arch/x86/include/asm/topology.h @@ -226,8 +226,6 @@ extern cpumask_t cpu_coregroup_map(int cpu); #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) -#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu)) -#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) /* indicates that pointers to the topology cpumask_t maps are valid */ #define arch_provides_topology_pointers yes diff --git a/trunk/arch/x86/include/asm/uaccess.h b/trunk/arch/x86/include/asm/uaccess.h index 99192bb55a53..35c54921b2e4 100644 --- a/trunk/arch/x86/include/asm/uaccess.h +++ b/trunk/arch/x86/include/asm/uaccess.h @@ -157,7 +157,6 @@ extern int __get_user_bad(void); int __ret_gu; \ unsigned long __val_gu; \ __chk_user_ptr(ptr); \ - might_fault(); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_x(1, __ret_gu, __val_gu, ptr); \ @@ -242,7 +241,6 @@ extern void __put_user_8(void); int __ret_pu; \ __typeof__(*(ptr)) __pu_val; \ __chk_user_ptr(ptr); \ - might_fault(); \ __pu_val = x; \ switch (sizeof(*(ptr))) { \ case 1: \ diff --git a/trunk/arch/x86/include/asm/uaccess_32.h b/trunk/arch/x86/include/asm/uaccess_32.h index 5e06259e90e5..d095a3aeea1b 100644 --- a/trunk/arch/x86/include/asm/uaccess_32.h +++ b/trunk/arch/x86/include/asm/uaccess_32.h @@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) static __always_inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { - might_fault(); - return __copy_to_user_inatomic(to, from, n); + might_sleep(); + return __copy_to_user_inatomic(to, from, n); } static __always_inline unsigned long @@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { - might_fault(); + might_sleep(); if (__builtin_constant_p(n)) { unsigned long ret; @@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) static __always_inline unsigned long __copy_from_user_nocache(void *to, const void __user *from, unsigned long n) { - might_fault(); + might_sleep(); if (__builtin_constant_p(n)) { unsigned long ret; diff --git a/trunk/arch/x86/include/asm/uaccess_64.h b/trunk/arch/x86/include/asm/uaccess_64.h index 84210c479fca..f8cfd00db450 100644 --- a/trunk/arch/x86/include/asm/uaccess_64.h +++ b/trunk/arch/x86/include/asm/uaccess_64.h @@ -29,8 +29,6 @@ static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) { int ret = 0; - - might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic(dst, (__force void *)src, size); switch (size) { @@ -73,8 +71,6 @@ static __always_inline __must_check int __copy_to_user(void __user *dst, const void *src, unsigned size) { int ret = 0; - - might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, src, size); switch (size) { @@ -117,8 +113,6 @@ static __always_inline __must_check int __copy_in_user(void __user *dst, const void __user *src, unsigned size) { int ret = 0; - - might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, (__force void *)src, size); diff --git a/trunk/arch/x86/include/asm/vmi.h b/trunk/arch/x86/include/asm/vmi.h index 61e08c0a2907..b7c0dea119fe 100644 --- a/trunk/arch/x86/include/asm/vmi.h +++ b/trunk/arch/x86/include/asm/vmi.h @@ -223,15 +223,9 @@ struct pci_header { } __attribute__((packed)); /* Function prototypes for bootstrapping */ -#ifdef CONFIG_VMI extern void vmi_init(void); -extern void vmi_activate(void); extern void vmi_bringup(void); -#else -static inline void vmi_init(void) {} -static inline void vmi_activate(void) {} -static inline void vmi_bringup(void) {} -#endif +extern void vmi_apply_boot_page_allocations(void); /* State needed to start an application processor in an SMP system. */ struct vmi_ap_state { diff --git a/trunk/arch/x86/kernel/Makefile b/trunk/arch/x86/kernel/Makefile index 1cad9318d217..b62a7667828e 100644 --- a/trunk/arch/x86/kernel/Makefile +++ b/trunk/arch/x86/kernel/Makefile @@ -25,7 +25,7 @@ CFLAGS_tsc.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o -obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o +obj-y += time_$(BITS).o ioport.o ldt.o obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o @@ -65,7 +65,6 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o diff --git a/trunk/arch/x86/kernel/acpi/boot.c b/trunk/arch/x86/kernel/acpi/boot.c index 65d0b72777ea..4c51a2f8fd31 100644 --- a/trunk/arch/x86/kernel/acpi/boot.c +++ b/trunk/arch/x86/kernel/acpi/boot.c @@ -1360,17 +1360,6 @@ static void __init acpi_process_madt(void) disable_acpi(); } } - - /* - * ACPI supports both logical (e.g. Hyper-Threading) and physical - * processors, where MPS only supports physical. - */ - if (acpi_lapic && acpi_ioapic) - printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " - "information\n"); - else if (acpi_lapic) - printk(KERN_INFO "Using ACPI for processor (LAPIC) " - "configuration information\n"); #endif return; } diff --git a/trunk/arch/x86/kernel/apic.c b/trunk/arch/x86/kernel/apic.c index b9019271af62..b2cef49f3085 100644 --- a/trunk/arch/x86/kernel/apic.c +++ b/trunk/arch/x86/kernel/apic.c @@ -118,6 +118,8 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); int first_system_vector = 0xfe; +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; + /* * Debug level, exported for io_apic.c */ @@ -139,7 +141,7 @@ static int lapic_next_event(unsigned long delta, struct clock_event_device *evt); static void lapic_timer_setup(enum clock_event_mode mode, struct clock_event_device *evt); -static void lapic_timer_broadcast(const cpumask_t *mask); +static void lapic_timer_broadcast(const struct cpumask *mask); static void apic_pm_activate(void); /* @@ -439,7 +441,6 @@ static void lapic_timer_setup(enum clock_event_mode mode, v = apic_read(APIC_LVTT); v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write(APIC_LVTT, v); - apic_write(APIC_TMICT, 0xffffffff); break; case CLOCK_EVT_MODE_RESUME: /* Nothing to do here */ @@ -452,10 +453,10 @@ static void lapic_timer_setup(enum clock_event_mode mode, /* * Local APIC timer broadcast function */ -static void lapic_timer_broadcast(const cpumask_t *mask) +static void lapic_timer_broadcast(const struct cpumask *mask) { #ifdef CONFIG_SMP - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); + send_IPI_mask(*mask, LOCAL_TIMER_VECTOR); #endif } @@ -558,13 +559,13 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta) } else { res = (((u64)deltapm) * mult) >> 22; do_div(res, 1000000); - pr_warning("APIC calibration not consistent " + printk(KERN_WARNING "APIC calibration not consistent " "with PM Timer: %ldms instead of 100ms\n", (long)res); /* Correct the lapic counter value */ res = (((u64)(*delta)) * pm_100ms); do_div(res, deltapm); - pr_info("APIC delta adjusted to PM-Timer: " + printk(KERN_INFO "APIC delta adjusted to PM-Timer: " "%lu (%ld)\n", (unsigned long)res, *delta); *delta = (long)res; } @@ -644,7 +645,8 @@ static int __init calibrate_APIC_clock(void) */ if (calibration_result < (1000000 / HZ)) { local_irq_enable(); - pr_warning("APIC frequency too slow, disabling apic timer\n"); + printk(KERN_WARNING + "APIC frequency too slow, disabling apic timer\n"); return -1; } @@ -670,9 +672,13 @@ static int __init calibrate_APIC_clock(void) while (lapic_cal_loops <= LAPIC_CAL_LOOPS) cpu_relax(); + local_irq_disable(); + /* Stop the lapic timer */ lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); + local_irq_enable(); + /* Jiffies delta */ deltaj = lapic_cal_j2 - lapic_cal_j1; apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); @@ -686,7 +692,8 @@ static int __init calibrate_APIC_clock(void) local_irq_enable(); if (levt->features & CLOCK_EVT_FEAT_DUMMY) { - pr_warning("APIC timer disabled due to verification failure.\n"); + printk(KERN_WARNING + "APIC timer disabled due to verification failure.\n"); return -1; } @@ -707,7 +714,7 @@ void __init setup_boot_APIC_clock(void) * broadcast mechanism is used. On UP systems simply ignore it. */ if (disable_apic_timer) { - pr_info("Disabling APIC timer\n"); + printk(KERN_INFO "Disabling APIC timer\n"); /* No broadcast on UP ! */ if (num_possible_cpus() > 1) { lapic_clockevent.mult = 1; @@ -734,7 +741,7 @@ void __init setup_boot_APIC_clock(void) if (nmi_watchdog != NMI_IO_APIC) lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; else - pr_warning("APIC timer registered as dummy," + printk(KERN_WARNING "APIC timer registered as dummy," " due to nmi_watchdog=%d!\n", nmi_watchdog); /* Setup the lapic or request the broadcast */ @@ -766,7 +773,8 @@ static void local_apic_timer_interrupt(void) * spurious. */ if (!evt->event_handler) { - pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu); + printk(KERN_WARNING + "Spurious LAPIC timer interrupt on cpu %d\n", cpu); /* Switch it off */ lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); return; @@ -1085,7 +1093,7 @@ static void __cpuinit lapic_setup_esr(void) unsigned int oldvalue, value, maxlvt; if (!lapic_is_integrated()) { - pr_info("No ESR for 82489DX.\n"); + printk(KERN_INFO "No ESR for 82489DX.\n"); return; } @@ -1096,7 +1104,7 @@ static void __cpuinit lapic_setup_esr(void) * ESR disabled - we can't do anything useful with the * errors anyway - mbligh */ - pr_info("Leaving ESR disabled.\n"); + printk(KERN_INFO "Leaving ESR disabled.\n"); return; } @@ -1290,7 +1298,7 @@ void check_x2apic(void) rdmsr(MSR_IA32_APICBASE, msr, msr2); if (msr & X2APIC_ENABLE) { - pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); + printk("x2apic enabled by BIOS, switching to x2apic ops\n"); x2apic_preenabled = x2apic = 1; apic_ops = &x2apic_ops; } @@ -1302,7 +1310,7 @@ void enable_x2apic(void) rdmsr(MSR_IA32_APICBASE, msr, msr2); if (!(msr & X2APIC_ENABLE)) { - pr_info("Enabling x2apic\n"); + printk("Enabling x2apic\n"); wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); } } @@ -1317,8 +1325,9 @@ void __init enable_IR_x2apic(void) return; if (!x2apic_preenabled && disable_x2apic) { - pr_info("Skipped enabling x2apic and Interrupt-remapping " - "because of nox2apic\n"); + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of nox2apic\n"); return; } @@ -1326,19 +1335,22 @@ void __init enable_IR_x2apic(void) panic("Bios already enabled x2apic, can't enforce nox2apic"); if (!x2apic_preenabled && skip_ioapic_setup) { - pr_info("Skipped enabling x2apic and Interrupt-remapping " - "because of skipping io-apic setup\n"); + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of skipping io-apic setup\n"); return; } ret = dmar_table_init(); if (ret) { - pr_info("dmar_table_init() failed with %d:\n", ret); + printk(KERN_INFO + "dmar_table_init() failed with %d:\n", ret); if (x2apic_preenabled) panic("x2apic enabled by bios. But IR enabling failed"); else - pr_info("Not enabling x2apic,Intr-remapping\n"); + printk(KERN_INFO + "Not enabling x2apic,Intr-remapping\n"); return; } @@ -1347,7 +1359,7 @@ void __init enable_IR_x2apic(void) ret = save_mask_IO_APIC_setup(); if (ret) { - pr_info("Saving IO-APIC state failed: %d\n", ret); + printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret); goto end; } @@ -1382,11 +1394,14 @@ void __init enable_IR_x2apic(void) if (!ret) { if (!x2apic_preenabled) - pr_info("Enabled x2apic and interrupt-remapping\n"); + printk(KERN_INFO + "Enabled x2apic and interrupt-remapping\n"); else - pr_info("Enabled Interrupt-remapping\n"); + printk(KERN_INFO + "Enabled Interrupt-remapping\n"); } else - pr_err("Failed to enable Interrupt-remapping and x2apic\n"); + printk(KERN_ERR + "Failed to enable Interrupt-remapping and x2apic\n"); #else if (!cpu_has_x2apic) return; @@ -1395,8 +1410,8 @@ void __init enable_IR_x2apic(void) panic("x2apic enabled prior OS handover," " enable CONFIG_INTR_REMAP"); - pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping " - " and x2apic\n"); + printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " + " and x2apic\n"); #endif return; @@ -1413,7 +1428,7 @@ void __init enable_IR_x2apic(void) static int __init detect_init_APIC(void) { if (!cpu_has_apic) { - pr_info("No local APIC present\n"); + printk(KERN_INFO "No local APIC present\n"); return -1; } @@ -1454,8 +1469,8 @@ static int __init detect_init_APIC(void) * "lapic" specified. */ if (!force_enable_local_apic) { - pr_info("Local APIC disabled by BIOS -- " - "you can enable it with \"lapic\"\n"); + printk(KERN_INFO "Local APIC disabled by BIOS -- " + "you can enable it with \"lapic\"\n"); return -1; } /* @@ -1465,7 +1480,8 @@ static int __init detect_init_APIC(void) */ rdmsr(MSR_IA32_APICBASE, l, h); if (!(l & MSR_IA32_APICBASE_ENABLE)) { - pr_info("Local APIC disabled by BIOS -- reenabling.\n"); + printk(KERN_INFO + "Local APIC disabled by BIOS -- reenabling.\n"); l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; wrmsr(MSR_IA32_APICBASE, l, h); @@ -1478,7 +1494,7 @@ static int __init detect_init_APIC(void) */ features = cpuid_edx(1); if (!(features & (1 << X86_FEATURE_APIC))) { - pr_warning("Could not enable APIC!\n"); + printk(KERN_WARNING "Could not enable APIC!\n"); return -1; } set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); @@ -1489,14 +1505,14 @@ static int __init detect_init_APIC(void) if (l & MSR_IA32_APICBASE_ENABLE) mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; - pr_info("Found and enabled local APIC!\n"); + printk(KERN_INFO "Found and enabled local APIC!\n"); apic_pm_activate(); return 0; no_apic: - pr_info("No local APIC present or hardware disabled\n"); + printk(KERN_INFO "No local APIC present or hardware disabled\n"); return -1; } #endif @@ -1572,12 +1588,12 @@ int __init APIC_init_uniprocessor(void) { #ifdef CONFIG_X86_64 if (disable_apic) { - pr_info("Apic disabled\n"); + printk(KERN_INFO "Apic disabled\n"); return -1; } if (!cpu_has_apic) { disable_apic = 1; - pr_info("Apic disabled by BIOS\n"); + printk(KERN_INFO "Apic disabled by BIOS\n"); return -1; } #else @@ -1589,8 +1605,8 @@ int __init APIC_init_uniprocessor(void) */ if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { - pr_err("BIOS bug, local APIC 0x%x not detected!...\n", - boot_cpu_physical_apicid); + printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n", + boot_cpu_physical_apicid); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); return -1; } @@ -1683,8 +1699,8 @@ void smp_spurious_interrupt(struct pt_regs *regs) add_pda(irq_spurious_count, 1); #else /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - pr_info("spurious APIC interrupt on CPU#%d, " - "should never happen.\n", smp_processor_id()); + printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " + "should never happen.\n", smp_processor_id()); __get_cpu_var(irq_stat).irq_spurious_count++; #endif irq_exit(); @@ -1708,18 +1724,17 @@ void smp_error_interrupt(struct pt_regs *regs) ack_APIC_irq(); atomic_inc(&irq_err_count); - /* - * Here is what the APIC error bits mean: - * 0: Send CS error - * 1: Receive CS error - * 2: Send accept error - * 3: Receive accept error - * 4: Reserved - * 5: Send illegal vector - * 6: Received illegal vector - * 7: Illegal register address - */ - pr_debug("APIC error on CPU%d: %02x(%02x)\n", + /* Here is what the APIC error bits mean: + 0: Send CS error + 1: Receive CS error + 2: Send accept error + 3: Receive accept error + 4: Reserved + 5: Send illegal vector + 6: Received illegal vector + 7: Illegal register address + */ + printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", smp_processor_id(), v , v1); irq_exit(); } @@ -1817,32 +1832,28 @@ void disconnect_bsp_APIC(int virt_wire_setup) void __cpuinit generic_processor_info(int apicid, int version) { int cpu; + cpumask_t tmp_map; /* * Validate version */ if (version == 0x0) { - pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", + printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " + "fixing up to 0x10. (tell your hw vendor)\n", version); version = 0x10; } apic_version[apicid] = version; - if (num_processors >= nr_cpu_ids) { - int max = nr_cpu_ids; - int thiscpu = max + disabled_cpus; - - pr_warning( - "ACPI: NR_CPUS/possible_cpus limit of %i reached." - " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); - - disabled_cpus++; + if (num_processors >= NR_CPUS) { + printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." + " Processor ignored.\n", NR_CPUS); return; } num_processors++; - cpu = cpumask_next_zero(-1, cpu_present_mask); + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); physid_set(apicid, phys_cpu_present_map); if (apicid == boot_cpu_physical_apicid) { @@ -1892,8 +1903,8 @@ void __cpuinit generic_processor_info(int apicid, int version) } #endif - set_cpu_possible(cpu, true); - set_cpu_present(cpu, true); + cpu_set(cpu, cpu_possible_map); + cpu_set(cpu, cpu_present_map); } #ifdef CONFIG_X86_64 @@ -2095,7 +2106,7 @@ __cpuinit int apic_is_clustered_box(void) bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); bitmap_zero(clustermap, NUM_APIC_CLUSTERS); - for (i = 0; i < nr_cpu_ids; i++) { + for (i = 0; i < NR_CPUS; i++) { /* are we being called early in kernel startup? */ if (bios_cpu_apicid) { id = bios_cpu_apicid[i]; @@ -2198,7 +2209,7 @@ static int __init apic_set_verbosity(char *arg) else if (strcmp("verbose", arg) == 0) apic_verbosity = APIC_VERBOSE; else { - pr_warning("APIC Verbosity level %s not recognised" + printk(KERN_WARNING "APIC Verbosity level %s not recognised" " use apic=verbose or apic=debug\n", arg); return -EINVAL; } diff --git a/trunk/arch/x86/kernel/apm_32.c b/trunk/arch/x86/kernel/apm_32.c index 3a26525a3f31..5145a6e72bbb 100644 --- a/trunk/arch/x86/kernel/apm_32.c +++ b/trunk/arch/x86/kernel/apm_32.c @@ -391,7 +391,11 @@ static int power_off; #else static int power_off = 1; #endif +#ifdef CONFIG_APM_REAL_MODE_POWER_OFF +static int realmode_power_off = 1; +#else static int realmode_power_off; +#endif #ifdef CONFIG_APM_ALLOW_INTS static int allow_ints = 1; #else diff --git a/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 88ea02dcb622..8e48c5d4467d 100644 --- a/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include @@ -392,7 +391,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, unsigned int next_perf_state = 0; /* Index into perf table */ unsigned int i; int result = 0; - struct power_trace it; dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); @@ -429,8 +427,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, } } - trace_power_mark(&it, POWER_PSTATE, next_perf_state); - switch (data->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; diff --git a/trunk/arch/x86/kernel/cpu/intel.c b/trunk/arch/x86/kernel/cpu/intel.c index 816f27f289b1..cce0b6118d55 100644 --- a/trunk/arch/x86/kernel/cpu/intel.c +++ b/trunk/arch/x86/kernel/cpu/intel.c @@ -307,11 +307,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_P4); if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_P3); -#endif if (cpu_has_bts) ptrace_bts_init_intel(c); +#endif + detect_extended_topology(c); if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { /* diff --git a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c index 7bd00a565672..43ea612d3e9d 100644 --- a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -534,16 +534,31 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) per_cpu(cpuid4_info, cpu) = NULL; } -static void __cpuinit get_cpu_leaves(void *_retval) +static int __cpuinit detect_cache_attributes(unsigned int cpu) { - int j, *retval = _retval, cpu = smp_processor_id(); + struct _cpuid4_info *this_leaf; + unsigned long j; + int retval; + cpumask_t oldmask; + + if (num_cache_leaves == 0) + return -ENOENT; + + per_cpu(cpuid4_info, cpu) = kzalloc( + sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); + if (per_cpu(cpuid4_info, cpu) == NULL) + return -ENOMEM; + + oldmask = current->cpus_allowed; + retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + if (retval) + goto out; /* Do cpuid and store the results */ for (j = 0; j < num_cache_leaves; j++) { - struct _cpuid4_info *this_leaf; this_leaf = CPUID4_INFO_IDX(cpu, j); - *retval = cpuid4_cache_lookup(j, this_leaf); - if (unlikely(*retval < 0)) { + retval = cpuid4_cache_lookup(j, this_leaf); + if (unlikely(retval < 0)) { int i; for (i = 0; i < j; i++) @@ -552,21 +567,9 @@ static void __cpuinit get_cpu_leaves(void *_retval) } cache_shared_cpu_map_setup(cpu, j); } -} - -static int __cpuinit detect_cache_attributes(unsigned int cpu) -{ - int retval; - - if (num_cache_leaves == 0) - return -ENOENT; - - per_cpu(cpuid4_info, cpu) = kzalloc( - sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); - if (per_cpu(cpuid4_info, cpu) == NULL) - return -ENOMEM; + set_cpus_allowed_ptr(current, &oldmask); - smp_call_function_single(cpu, get_cpu_leaves, &retval, true); +out: if (retval) { kfree(per_cpu(cpuid4_info, cpu)); per_cpu(cpuid4_info, cpu) = NULL; diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index a1de80f368f1..5eb390a4b2e9 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -83,41 +83,34 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ * CPU Initialization */ -struct thresh_restart { - struct threshold_block *b; - int reset; - u16 old_limit; -}; - /* must be called with correct cpu affinity */ -static long threshold_restart_bank(void *_tr) +static void threshold_restart_bank(struct threshold_block *b, + int reset, u16 old_limit) { - struct thresh_restart *tr = _tr; u32 mci_misc_hi, mci_misc_lo; - rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); + rdmsr(b->address, mci_misc_lo, mci_misc_hi); - if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) - tr->reset = 1; /* limit cannot be lower than err count */ + if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) + reset = 1; /* limit cannot be lower than err count */ - if (tr->reset) { /* reset err count and overflow bit */ + if (reset) { /* reset err count and overflow bit */ mci_misc_hi = (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | - (THRESHOLD_MAX - tr->b->threshold_limit); - } else if (tr->old_limit) { /* change limit w/o reset */ + (THRESHOLD_MAX - b->threshold_limit); + } else if (old_limit) { /* change limit w/o reset */ int new_count = (mci_misc_hi & THRESHOLD_MAX) + - (tr->old_limit - tr->b->threshold_limit); + (old_limit - b->threshold_limit); mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | (new_count & THRESHOLD_MAX); } - tr->b->interrupt_enable ? + b->interrupt_enable ? (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : (mci_misc_hi &= ~MASK_INT_TYPE_HI); mci_misc_hi |= MASK_COUNT_EN_HI; - wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); - return 0; + wrmsr(b->address, mci_misc_lo, mci_misc_hi); } /* cpu init entry point, called from mce.c with preempt off */ @@ -127,7 +120,6 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int cpu = smp_processor_id(); u8 lvt_off; u32 low = 0, high = 0, address = 0; - struct thresh_restart tr; for (bank = 0; bank < NR_BANKS; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { @@ -170,10 +162,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) wrmsr(address, low, high); threshold_defaults.address = address; - tr.b = &threshold_defaults; - tr.reset = 0; - tr.old_limit = 0; - threshold_restart_bank(&tr); + threshold_restart_bank(&threshold_defaults, 0, 0); } } } @@ -262,6 +251,20 @@ struct threshold_attr { ssize_t(*store) (struct threshold_block *, const char *, size_t count); }; +static void affinity_set(unsigned int cpu, cpumask_t *oldmask, + cpumask_t *newmask) +{ + *oldmask = current->cpus_allowed; + cpus_clear(*newmask); + cpu_set(cpu, *newmask); + set_cpus_allowed_ptr(current, newmask); +} + +static void affinity_restore(const cpumask_t *oldmask) +{ + set_cpus_allowed_ptr(current, oldmask); +} + #define SHOW_FIELDS(name) \ static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ { \ @@ -274,16 +277,15 @@ static ssize_t store_interrupt_enable(struct threshold_block *b, const char *buf, size_t count) { char *end; - struct thresh_restart tr; + cpumask_t oldmask, newmask; unsigned long new = simple_strtoul(buf, &end, 0); if (end == buf) return -EINVAL; b->interrupt_enable = !!new; - tr.b = b; - tr.reset = 0; - tr.old_limit = 0; - work_on_cpu(b->cpu, threshold_restart_bank, &tr); + affinity_set(b->cpu, &oldmask, &newmask); + threshold_restart_bank(b, 0, 0); + affinity_restore(&oldmask); return end - buf; } @@ -292,7 +294,8 @@ static ssize_t store_threshold_limit(struct threshold_block *b, const char *buf, size_t count) { char *end; - struct thresh_restart tr; + cpumask_t oldmask, newmask; + u16 old; unsigned long new = simple_strtoul(buf, &end, 0); if (end == buf) return -EINVAL; @@ -300,36 +303,34 @@ static ssize_t store_threshold_limit(struct threshold_block *b, new = THRESHOLD_MAX; if (new < 1) new = 1; - tr.old_limit = b->threshold_limit; + old = b->threshold_limit; b->threshold_limit = new; - tr.b = b; - tr.reset = 0; - work_on_cpu(b->cpu, threshold_restart_bank, &tr); + affinity_set(b->cpu, &oldmask, &newmask); + threshold_restart_bank(b, 0, old); + affinity_restore(&oldmask); return end - buf; } -static long local_error_count(void *_b) -{ - struct threshold_block *b = _b; - u32 low, high; - - rdmsr(b->address, low, high); - return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); -} - static ssize_t show_error_count(struct threshold_block *b, char *buf) { - return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b)); + u32 high, low; + cpumask_t oldmask, newmask; + affinity_set(b->cpu, &oldmask, &newmask); + rdmsr(b->address, low, high); + affinity_restore(&oldmask); + return sprintf(buf, "%x\n", + (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); } static ssize_t store_error_count(struct threshold_block *b, const char *buf, size_t count) { - struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; - - work_on_cpu(b->cpu, threshold_restart_bank, &tr); + cpumask_t oldmask, newmask; + affinity_set(b->cpu, &oldmask, &newmask); + threshold_restart_bank(b, 1, 0); + affinity_restore(&oldmask); return 1; } @@ -462,19 +463,12 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, return err; } -static long local_allocate_threshold_blocks(void *_bank) -{ - unsigned int *bank = _bank; - - return allocate_threshold_blocks(smp_processor_id(), *bank, 0, - MSR_IA32_MC0_MISC + *bank * 4); -} - /* symlinks sibling shared banks to first core. first core owns dir/files. */ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) { int i, err = 0; struct threshold_bank *b = NULL; + cpumask_t oldmask, newmask; char name[32]; sprintf(name, "threshold_bank%i", bank); @@ -525,7 +519,11 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) per_cpu(threshold_banks, cpu)[bank] = b; - err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank); + affinity_set(cpu, &oldmask, &newmask); + err = allocate_threshold_blocks(cpu, bank, 0, + MSR_IA32_MC0_MISC + bank * 4); + affinity_restore(&oldmask); + if (err) goto out_free; diff --git a/trunk/arch/x86/kernel/crash.c b/trunk/arch/x86/kernel/crash.c index d84a852e4cd7..268553817909 100644 --- a/trunk/arch/x86/kernel/crash.c +++ b/trunk/arch/x86/kernel/crash.c @@ -29,17 +29,34 @@ #include +/* This keeps a track of which one is crashing cpu. */ +static int crashing_cpu; #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) +static atomic_t waiting_for_crash_ipi; -static void kdump_nmi_callback(int cpu, struct die_args *args) +static int crash_nmi_callback(struct notifier_block *self, + unsigned long val, void *data) { struct pt_regs *regs; #ifdef CONFIG_X86_32 struct pt_regs fixed_regs; #endif + int cpu; - regs = args->regs; + if (val != DIE_NMI_IPI) + return NOTIFY_OK; + + regs = ((struct die_args *)data)->regs; + cpu = raw_smp_processor_id(); + + /* Don't do anything if this handler is invoked on crashing cpu. + * Otherwise, system will completely hang. Crashing cpu can get + * an NMI if system was initially booted with nmi_watchdog parameter. + */ + if (cpu == crashing_cpu) + return NOTIFY_STOP; + local_irq_disable(); #ifdef CONFIG_X86_32 if (!user_mode_vm(regs)) { @@ -48,19 +65,54 @@ static void kdump_nmi_callback(int cpu, struct die_args *args) } #endif crash_save_cpu(regs, cpu); - disable_local_APIC(); + atomic_dec(&waiting_for_crash_ipi); + /* Assume hlt works */ + halt(); + for (;;) + cpu_relax(); + + return 1; +} + +static void smp_send_nmi_allbutself(void) +{ + cpumask_t mask = cpu_online_map; + cpu_clear(safe_smp_processor_id(), mask); + if (!cpus_empty(mask)) + send_IPI_mask(mask, NMI_VECTOR); } -static void kdump_nmi_shootdown_cpus(void) +static struct notifier_block crash_nmi_nb = { + .notifier_call = crash_nmi_callback, +}; + +static void nmi_shootdown_cpus(void) { - nmi_shootdown_cpus(kdump_nmi_callback); + unsigned long msecs; + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + /* Would it be better to replace the trap vector here? */ + if (register_die_notifier(&crash_nmi_nb)) + return; /* return what? */ + /* Ensure the new callback function is set before sending + * out the NMI + */ + wmb(); + smp_send_nmi_allbutself(); + + msecs = 1000; /* Wait at most a second for the other cpus to stop */ + while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { + mdelay(1); + msecs--; + } + + /* Leave the nmi callback set */ disable_local_APIC(); } - #else -static void kdump_nmi_shootdown_cpus(void) +static void nmi_shootdown_cpus(void) { /* There are no cpus to shootdown */ } @@ -79,7 +131,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs) /* The kernel is broken so disable interrupts */ local_irq_disable(); - kdump_nmi_shootdown_cpus(); + /* Make a note of crashing cpu. Will be used in NMI callback.*/ + crashing_cpu = safe_smp_processor_id(); + nmi_shootdown_cpus(); lapic_shutdown(); #if defined(CONFIG_X86_IO_APIC) disable_IO_APIC(); diff --git a/trunk/arch/x86/kernel/ds.c b/trunk/arch/x86/kernel/ds.c index 19a8c2c0389f..a2d1176c38ee 100644 --- a/trunk/arch/x86/kernel/ds.c +++ b/trunk/arch/x86/kernel/ds.c @@ -7,12 +7,13 @@ * * It manages: * - per-thread and per-cpu allocation of BTS and PEBS - * - buffer overflow handling (to be done) + * - buffer memory allocation (optional) + * - buffer overflow handling * - buffer access * * It assumes: - * - get_task_struct on all traced tasks - * - current is allowed to trace tasks + * - get_task_struct on all parameter tasks + * - current is allowed to trace parameter tasks * * * Copyright (C) 2007-2008 Intel Corporation. @@ -27,7 +28,6 @@ #include #include #include -#include /* @@ -44,33 +44,6 @@ struct ds_configuration { }; static struct ds_configuration ds_cfg; -/* - * A BTS or PEBS tracer. - * - * This holds the configuration of the tracer and serves as a handle - * to identify tracers. - */ -struct ds_tracer { - /* the DS context (partially) owned by this tracer */ - struct ds_context *context; - /* the buffer provided on ds_request() and its size in bytes */ - void *buffer; - size_t size; -}; - -struct bts_tracer { - /* the common DS part */ - struct ds_tracer ds; - /* buffer overflow notification function */ - bts_ovfl_callback_t ovfl; -}; - -struct pebs_tracer { - /* the common DS part */ - struct ds_tracer ds; - /* buffer overflow notification function */ - pebs_ovfl_callback_t ovfl; -}; /* * Debug Store (DS) save area configuration (see Intel64 and IA32 @@ -134,14 +107,35 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, (*(unsigned long *)base) = value; } -#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ - /* - * Locking is done only for allocating BTS or PEBS resources. + * Locking is done only for allocating BTS or PEBS resources and for + * guarding context and buffer memory allocation. + * + * Most functions require the current task to own the ds context part + * they are going to access. All the locking is done when validating + * access to the context. */ static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); +/* + * Validate that the current task is allowed to access the BTS/PEBS + * buffer of the parameter task. + * + * Returns 0, if access is granted; -Eerrno, otherwise. + */ +static inline int ds_validate_access(struct ds_context *context, + enum ds_qualifier qual) +{ + if (!context) + return -EPERM; + + if (context->owner[qual] == current) + return 0; + + return -EPERM; +} + /* * We either support (system-wide) per-cpu or per-thread allocation. @@ -189,12 +183,50 @@ static inline int check_tracer(struct task_struct *task) * * Contexts are use-counted. They are allocated on first access and * deallocated when the last user puts the context. + * + * We distinguish between an allocating and a non-allocating get of a + * context: + * - the allocating get is used for requesting BTS/PEBS resources. It + * requires the caller to hold the global ds_lock. + * - the non-allocating get is used for all other cases. A + * non-existing context indicates an error. It acquires and releases + * the ds_lock itself for obtaining the context. + * + * A context and its DS configuration are allocated and deallocated + * together. A context always has a DS configuration of the + * appropriate size. */ static DEFINE_PER_CPU(struct ds_context *, system_context); #define this_system_context per_cpu(system_context, smp_processor_id()) +/* + * Returns the pointer to the parameter task's context or to the + * system-wide context, if task is NULL. + * + * Increases the use count of the returned context, if not NULL. + */ static inline struct ds_context *ds_get_context(struct task_struct *task) +{ + struct ds_context *context; + unsigned long irq; + + spin_lock_irqsave(&ds_lock, irq); + + context = (task ? task->thread.ds_ctx : this_system_context); + if (context) + context->count++; + + spin_unlock_irqrestore(&ds_lock, irq); + + return context; +} + +/* + * Same as ds_get_context, but allocates the context and it's DS + * structure, if necessary; returns NULL; if out of memory. + */ +static inline struct ds_context *ds_alloc_context(struct task_struct *task) { struct ds_context **p_context = (task ? &task->thread.ds_ctx : &this_system_context); @@ -206,9 +238,16 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) if (!context) return NULL; + context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); + if (!context->ds) { + kfree(context); + return NULL; + } + spin_lock_irqsave(&ds_lock, irq); if (*p_context) { + kfree(context->ds); kfree(context); context = *p_context; @@ -233,6 +272,10 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) return context; } +/* + * Decreases the use count of the parameter context, if not NULL. + * Deallocates the context, if the use count reaches zero. + */ static inline void ds_put_context(struct ds_context *context) { unsigned long irq; @@ -253,6 +296,13 @@ static inline void ds_put_context(struct ds_context *context) if (!context->task || (context->task == current)) wrmsrl(MSR_IA32_DS_AREA, 0); + put_tracer(context->task); + + /* free any leftover buffers from tracers that did not + * deallocate them properly. */ + kfree(context->buffer[ds_bts]); + kfree(context->buffer[ds_pebs]); + kfree(context->ds); kfree(context); out: spin_unlock_irqrestore(&ds_lock, irq); @@ -262,342 +312,345 @@ static inline void ds_put_context(struct ds_context *context) /* * Handle a buffer overflow * + * task: the task whose buffers are overflowing; + * NULL for a buffer overflow on the current cpu * context: the ds context * qual: the buffer type */ -static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) -{ - switch (qual) { - case ds_bts: { - struct bts_tracer *tracer = - container_of(context->owner[qual], - struct bts_tracer, ds); - if (tracer->ovfl) - tracer->ovfl(tracer); - } - break; - case ds_pebs: { - struct pebs_tracer *tracer = - container_of(context->owner[qual], - struct pebs_tracer, ds); - if (tracer->ovfl) - tracer->ovfl(tracer); - } - break; - } +static void ds_overflow(struct task_struct *task, struct ds_context *context, + enum ds_qualifier qual) +{ + if (!context) + return; + + if (context->callback[qual]) + (*context->callback[qual])(task); + + /* todo: do some more overflow handling */ } -static void ds_install_ds_config(struct ds_context *context, - enum ds_qualifier qual, - void *base, size_t size, size_t ith) +/* + * Allocate a non-pageable buffer of the parameter size. + * Checks the memory and the locked memory rlimit. + * + * Returns the buffer, if successful; + * NULL, if out of memory or rlimit exceeded. + * + * size: the requested buffer size in bytes + * pages (out): if not NULL, contains the number of pages reserved + */ +static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) { - unsigned long buffer, adj; + unsigned long rlim, vm, pgsz; + void *buffer; - /* adjust the buffer address and size to meet alignment - * constraints: - * - buffer is double-word aligned - * - size is multiple of record size - * - * We checked the size at the very beginning; we have enough - * space to do the adjustment. - */ - buffer = (unsigned long)base; + pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; - adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; - buffer += adj; - size -= adj; + rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + vm = current->mm->total_vm + pgsz; + if (rlim < vm) + return NULL; - size /= ds_cfg.sizeof_rec[qual]; - size *= ds_cfg.sizeof_rec[qual]; + rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + vm = current->mm->locked_vm + pgsz; + if (rlim < vm) + return NULL; - ds_set(context->ds, qual, ds_buffer_base, buffer); - ds_set(context->ds, qual, ds_index, buffer); - ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); + buffer = kzalloc(size, GFP_KERNEL); + if (!buffer) + return NULL; - /* The value for 'no threshold' is -1, which will set the - * threshold outside of the buffer, just like we want it. - */ - ds_set(context->ds, qual, - ds_interrupt_threshold, buffer + size - ith); + current->mm->total_vm += pgsz; + current->mm->locked_vm += pgsz; + + if (pages) + *pages = pgsz; + + return buffer; } -static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual, - struct task_struct *task, - void *base, size_t size, size_t th) +static int ds_request(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl, enum ds_qualifier qual) { struct ds_context *context; + unsigned long buffer, adj; + const unsigned long alignment = (1 << 3); unsigned long irq; - int error; + int error = 0; - error = -EOPNOTSUPP; if (!ds_cfg.sizeof_ds) - goto out; - - error = -EINVAL; - if (!base) - goto out; + return -EOPNOTSUPP; /* we require some space to do alignment adjustments below */ - error = -EINVAL; - if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) - goto out; - - if (th != (size_t)-1) { - th *= ds_cfg.sizeof_rec[qual]; + if (size < (alignment + ds_cfg.sizeof_rec[qual])) + return -EINVAL; - error = -EINVAL; - if (size <= th) - goto out; - } + /* buffer overflow notification is not yet implemented */ + if (ovfl) + return -EOPNOTSUPP; - tracer->buffer = base; - tracer->size = size; - error = -ENOMEM; - context = ds_get_context(task); + context = ds_alloc_context(task); if (!context) - goto out; - tracer->context = context; - + return -ENOMEM; spin_lock_irqsave(&ds_lock, irq); error = -EPERM; if (!check_tracer(task)) goto out_unlock; + get_tracer(task); + error = -EALREADY; + if (context->owner[qual] == current) + goto out_put_tracer; error = -EPERM; - if (context->owner[qual]) + if (context->owner[qual] != NULL) goto out_put_tracer; - context->owner[qual] = tracer; + context->owner[qual] = current; spin_unlock_irqrestore(&ds_lock, irq); - ds_install_ds_config(context, qual, base, size, th); + error = -ENOMEM; + if (!base) { + base = ds_allocate_buffer(size, &context->pages[qual]); + if (!base) + goto out_release; + + context->buffer[qual] = base; + } + error = 0; - return 0; + context->callback[qual] = ovfl; + + /* adjust the buffer address and size to meet alignment + * constraints: + * - buffer is double-word aligned + * - size is multiple of record size + * + * We checked the size at the very beginning; we have enough + * space to do the adjustment. + */ + buffer = (unsigned long)base; + + adj = ALIGN(buffer, alignment) - buffer; + buffer += adj; + size -= adj; + + size /= ds_cfg.sizeof_rec[qual]; + size *= ds_cfg.sizeof_rec[qual]; + + ds_set(context->ds, qual, ds_buffer_base, buffer); + ds_set(context->ds, qual, ds_index, buffer); + ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); + + if (ovfl) { + /* todo: select a suitable interrupt threshold */ + } else + ds_set(context->ds, qual, + ds_interrupt_threshold, buffer + size + 1); + + /* we keep the context until ds_release */ + return error; + + out_release: + context->owner[qual] = NULL; + ds_put_context(context); + put_tracer(task); + return error; out_put_tracer: + spin_unlock_irqrestore(&ds_lock, irq); + ds_put_context(context); put_tracer(task); + return error; + out_unlock: spin_unlock_irqrestore(&ds_lock, irq); ds_put_context(context); - tracer->context = NULL; - out: return error; } -struct bts_tracer *ds_request_bts(struct task_struct *task, - void *base, size_t size, - bts_ovfl_callback_t ovfl, size_t th) +int ds_request_bts(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl) { - struct bts_tracer *tracer; - int error; - - /* buffer overflow notification is not yet implemented */ - error = -EOPNOTSUPP; - if (ovfl) - goto out; - - error = -ENOMEM; - tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); - if (!tracer) - goto out; - tracer->ovfl = ovfl; - - error = ds_request(&tracer->ds, ds_bts, task, base, size, th); - if (error < 0) - goto out_tracer; - - return tracer; + return ds_request(task, base, size, ovfl, ds_bts); +} - out_tracer: - kfree(tracer); - out: - return ERR_PTR(error); +int ds_request_pebs(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl) +{ + return ds_request(task, base, size, ovfl, ds_pebs); } -struct pebs_tracer *ds_request_pebs(struct task_struct *task, - void *base, size_t size, - pebs_ovfl_callback_t ovfl, size_t th) +static int ds_release(struct task_struct *task, enum ds_qualifier qual) { - struct pebs_tracer *tracer; + struct ds_context *context; int error; - /* buffer overflow notification is not yet implemented */ - error = -EOPNOTSUPP; - if (ovfl) - goto out; - - error = -ENOMEM; - tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); - if (!tracer) + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) goto out; - tracer->ovfl = ovfl; - error = ds_request(&tracer->ds, ds_pebs, task, base, size, th); - if (error < 0) - goto out_tracer; + kfree(context->buffer[qual]); + context->buffer[qual] = NULL; - return tracer; + current->mm->total_vm -= context->pages[qual]; + current->mm->locked_vm -= context->pages[qual]; + context->pages[qual] = 0; + context->owner[qual] = NULL; - out_tracer: - kfree(tracer); + /* + * we put the context twice: + * once for the ds_get_context + * once for the corresponding ds_request + */ + ds_put_context(context); out: - return ERR_PTR(error); -} - -static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual) -{ - BUG_ON(tracer->context->owner[qual] != tracer); - tracer->context->owner[qual] = NULL; - - put_tracer(tracer->context->task); - ds_put_context(tracer->context); + ds_put_context(context); + return error; } -int ds_release_bts(struct bts_tracer *tracer) +int ds_release_bts(struct task_struct *task) { - if (!tracer) - return -EINVAL; - - ds_release(&tracer->ds, ds_bts); - kfree(tracer); - - return 0; + return ds_release(task, ds_bts); } -int ds_release_pebs(struct pebs_tracer *tracer) +int ds_release_pebs(struct task_struct *task) { - if (!tracer) - return -EINVAL; - - ds_release(&tracer->ds, ds_pebs); - kfree(tracer); - - return 0; + return ds_release(task, ds_pebs); } -static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual) +static int ds_get_index(struct task_struct *task, size_t *pos, + enum ds_qualifier qual) { + struct ds_context *context; unsigned long base, index; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; base = ds_get(context->ds, qual, ds_buffer_base); index = ds_get(context->ds, qual, ds_index); - return (index - base) / ds_cfg.sizeof_rec[qual]; + error = ((index - base) / ds_cfg.sizeof_rec[qual]); + if (pos) + *pos = error; + out: + ds_put_context(context); + return error; } -int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos) +int ds_get_bts_index(struct task_struct *task, size_t *pos) { - if (!tracer) - return -EINVAL; - - if (!pos) - return -EINVAL; - - *pos = ds_get_index(tracer->ds.context, ds_bts); - - return 0; + return ds_get_index(task, pos, ds_bts); } -int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos) +int ds_get_pebs_index(struct task_struct *task, size_t *pos) { - if (!tracer) - return -EINVAL; - - if (!pos) - return -EINVAL; - - *pos = ds_get_index(tracer->ds.context, ds_pebs); - - return 0; + return ds_get_index(task, pos, ds_pebs); } -static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual) +static int ds_get_end(struct task_struct *task, size_t *pos, + enum ds_qualifier qual) { - unsigned long base, max; + struct ds_context *context; + unsigned long base, end; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; base = ds_get(context->ds, qual, ds_buffer_base); - max = ds_get(context->ds, qual, ds_absolute_maximum); + end = ds_get(context->ds, qual, ds_absolute_maximum); - return (max - base) / ds_cfg.sizeof_rec[qual]; + error = ((end - base) / ds_cfg.sizeof_rec[qual]); + if (pos) + *pos = error; + out: + ds_put_context(context); + return error; } -int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos) +int ds_get_bts_end(struct task_struct *task, size_t *pos) { - if (!tracer) - return -EINVAL; - - if (!pos) - return -EINVAL; - - *pos = ds_get_end(tracer->ds.context, ds_bts); - - return 0; + return ds_get_end(task, pos, ds_bts); } -int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos) +int ds_get_pebs_end(struct task_struct *task, size_t *pos) { - if (!tracer) - return -EINVAL; - - if (!pos) - return -EINVAL; - - *pos = ds_get_end(tracer->ds.context, ds_pebs); - - return 0; + return ds_get_end(task, pos, ds_pebs); } -static int ds_access(struct ds_context *context, enum ds_qualifier qual, - size_t index, const void **record) +static int ds_access(struct task_struct *task, size_t index, + const void **record, enum ds_qualifier qual) { + struct ds_context *context; unsigned long base, idx; + int error; if (!record) return -EINVAL; + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + base = ds_get(context->ds, qual, ds_buffer_base); idx = base + (index * ds_cfg.sizeof_rec[qual]); + error = -EINVAL; if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) - return -EINVAL; + goto out; *record = (const void *)idx; - - return ds_cfg.sizeof_rec[qual]; + error = ds_cfg.sizeof_rec[qual]; + out: + ds_put_context(context); + return error; } -int ds_access_bts(struct bts_tracer *tracer, size_t index, - const void **record) +int ds_access_bts(struct task_struct *task, size_t index, const void **record) { - if (!tracer) - return -EINVAL; - - return ds_access(tracer->ds.context, ds_bts, index, record); + return ds_access(task, index, record, ds_bts); } -int ds_access_pebs(struct pebs_tracer *tracer, size_t index, - const void **record) +int ds_access_pebs(struct task_struct *task, size_t index, const void **record) { - if (!tracer) - return -EINVAL; - - return ds_access(tracer->ds.context, ds_pebs, index, record); + return ds_access(task, index, record, ds_pebs); } -static int ds_write(struct ds_context *context, enum ds_qualifier qual, - const void *record, size_t size) +static int ds_write(struct task_struct *task, const void *record, size_t size, + enum ds_qualifier qual, int force) { - int bytes_written = 0; + struct ds_context *context; + int error; if (!record) return -EINVAL; + error = -EPERM; + context = ds_get_context(task); + if (!context) + goto out; + + if (!force) { + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + } + + error = 0; while (size) { unsigned long base, index, end, write_end, int_th; unsigned long write_size, adj_write_size; @@ -625,14 +678,14 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, write_end = end; if (write_end <= index) - break; + goto out; write_size = min((unsigned long) size, write_end - index); memcpy((void *)index, record, write_size); record = (const char *)record + write_size; - size -= write_size; - bytes_written += write_size; + size -= write_size; + error += write_size; adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; adj_write_size *= ds_cfg.sizeof_rec[qual]; @@ -647,32 +700,47 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, ds_set(context->ds, qual, ds_index, index); if (index >= int_th) - ds_overflow(context, qual); + ds_overflow(task, context, qual); } - return bytes_written; + out: + ds_put_context(context); + return error; } -int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size) +int ds_write_bts(struct task_struct *task, const void *record, size_t size) { - if (!tracer) - return -EINVAL; + return ds_write(task, record, size, ds_bts, /* force = */ 0); +} - return ds_write(tracer->ds.context, ds_bts, record, size); +int ds_write_pebs(struct task_struct *task, const void *record, size_t size) +{ + return ds_write(task, record, size, ds_pebs, /* force = */ 0); } -int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size) +int ds_unchecked_write_bts(struct task_struct *task, + const void *record, size_t size) { - if (!tracer) - return -EINVAL; + return ds_write(task, record, size, ds_bts, /* force = */ 1); +} - return ds_write(tracer->ds.context, ds_pebs, record, size); +int ds_unchecked_write_pebs(struct task_struct *task, + const void *record, size_t size) +{ + return ds_write(task, record, size, ds_pebs, /* force = */ 1); } -static void ds_reset_or_clear(struct ds_context *context, - enum ds_qualifier qual, int clear) +static int ds_reset_or_clear(struct task_struct *task, + enum ds_qualifier qual, int clear) { + struct ds_context *context; unsigned long base, end; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; base = ds_get(context->ds, qual, ds_buffer_base); end = ds_get(context->ds, qual, ds_absolute_maximum); @@ -681,69 +749,70 @@ static void ds_reset_or_clear(struct ds_context *context, memset((void *)base, 0, end - base); ds_set(context->ds, qual, ds_index, base); + + error = 0; + out: + ds_put_context(context); + return error; } -int ds_reset_bts(struct bts_tracer *tracer) +int ds_reset_bts(struct task_struct *task) { - if (!tracer) - return -EINVAL; - - ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0); - - return 0; + return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); } -int ds_reset_pebs(struct pebs_tracer *tracer) +int ds_reset_pebs(struct task_struct *task) { - if (!tracer) - return -EINVAL; - - ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0); - - return 0; + return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); } -int ds_clear_bts(struct bts_tracer *tracer) +int ds_clear_bts(struct task_struct *task) { - if (!tracer) - return -EINVAL; - - ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1); - - return 0; + return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); } -int ds_clear_pebs(struct pebs_tracer *tracer) +int ds_clear_pebs(struct task_struct *task) { - if (!tracer) - return -EINVAL; - - ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1); - - return 0; + return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); } -int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value) +int ds_get_pebs_reset(struct task_struct *task, u64 *value) { - if (!tracer) - return -EINVAL; + struct ds_context *context; + int error; if (!value) return -EINVAL; - *value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); + context = ds_get_context(task); + error = ds_validate_access(context, ds_pebs); + if (error < 0) + goto out; - return 0; + *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); + + error = 0; + out: + ds_put_context(context); + return error; } -int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) +int ds_set_pebs_reset(struct task_struct *task, u64 value) { - if (!tracer) - return -EINVAL; + struct ds_context *context; + int error; - *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; + context = ds_get_context(task); + error = ds_validate_access(context, ds_pebs); + if (error < 0) + goto out; + + *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; - return 0; + error = 0; + out: + ds_put_context(context); + return error; } static const struct ds_configuration ds_cfg_var = { @@ -771,10 +840,6 @@ static inline void ds_configure(const struct ds_configuration *cfg) { ds_cfg = *cfg; - - printk(KERN_INFO "DS available\n"); - - BUG_ON(MAX_SIZEOF_DS < ds_cfg.sizeof_ds); } void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) @@ -782,16 +847,17 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) switch (c->x86) { case 0x6: switch (c->x86_model) { - case 0 ... 0xC: - /* sorry, don't know about them */ - break; case 0xD: case 0xE: /* Pentium M */ ds_configure(&ds_cfg_var); break; - default: /* Core2, Atom, ... */ + case 0xF: /* Core2 */ + case 0x1C: /* Atom */ ds_configure(&ds_cfg_64); break; + default: + /* sorry, don't know about them */ + break; } break; case 0xF: @@ -818,8 +884,6 @@ void ds_free(struct ds_context *context) * is dying. There should not be any user of that context left * to disturb us, anymore. */ unsigned long leftovers = context->count; - while (leftovers--) { - put_tracer(context->task); + while (leftovers--) ds_put_context(context); - } } diff --git a/trunk/arch/x86/kernel/dumpstack.c b/trunk/arch/x86/kernel/dumpstack.c deleted file mode 100644 index 6b1f6f6f8661..000000000000 --- a/trunk/arch/x86/kernel/dumpstack.c +++ /dev/null @@ -1,351 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "dumpstack.h" - -int panic_on_unrecovered_nmi; -unsigned int code_bytes = 64; -int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; -static int die_counter; - -void printk_address(unsigned long address, int reliable) -{ - printk(" [<%p>] %s%pS\n", (void *) address, - reliable ? "" : "? ", (void *) address); -} - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -static void -print_ftrace_graph_addr(unsigned long addr, void *data, - const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) -{ - struct task_struct *task = tinfo->task; - unsigned long ret_addr; - int index = task->curr_ret_stack; - - if (addr != (unsigned long)return_to_handler) - return; - - if (!task->ret_stack || index < *graph) - return; - - index -= *graph; - ret_addr = task->ret_stack[index].ret; - - ops->address(data, ret_addr, 1); - - (*graph)++; -} -#else -static inline void -print_ftrace_graph_addr(unsigned long addr, void *data, - const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) -{ } -#endif - -/* - * x86-64 can have up to three kernel stacks: - * process stack - * interrupt stack - * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack - */ - -static inline int valid_stack_ptr(struct thread_info *tinfo, - void *p, unsigned int size, void *end) -{ - void *t = tinfo; - if (end) { - if (p < end && p >= (end-THREAD_SIZE)) - return 1; - else - return 0; - } - return p > t && p < t + THREAD_SIZE - size; -} - -unsigned long -print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end, int *graph) -{ - struct stack_frame *frame = (struct stack_frame *)bp; - - while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { - unsigned long addr; - - addr = *stack; - if (__kernel_text_address(addr)) { - if ((unsigned long) stack == bp + sizeof(long)) { - ops->address(data, addr, 1); - frame = frame->next_frame; - bp = (unsigned long) frame; - } else { - ops->address(data, addr, bp == 0); - } - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); - } - stack++; - } - return bp; -} - - -static void -print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - printk(data); - print_symbol(msg, symbol); - printk("\n"); -} - -static void print_trace_warning(void *data, char *msg) -{ - printk("%s%s\n", (char *)data, msg); -} - -static int print_trace_stack(void *data, char *name) -{ - printk("%s <%s> ", (char *)data, name); - return 0; -} - -/* - * Print one address/symbol entries per line. - */ -static void print_trace_address(void *data, unsigned long addr, int reliable) -{ - touch_nmi_watchdog(); - printk(data); - printk_address(addr, reliable); -} - -static const struct stacktrace_ops print_trace_ops = { - .warning = print_trace_warning, - .warning_symbol = print_trace_warning_symbol, - .stack = print_trace_stack, - .address = print_trace_address, -}; - -void -show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl) -{ - printk("%sCall Trace:\n", log_lvl); - dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); -} - -void show_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp) -{ - show_trace_log_lvl(task, regs, stack, bp, ""); -} - -void show_stack(struct task_struct *task, unsigned long *sp) -{ - show_stack_log_lvl(task, NULL, sp, 0, ""); -} - -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - unsigned long bp = 0; - unsigned long stack; - -#ifdef CONFIG_FRAME_POINTER - if (!bp) - get_bp(bp); -#endif - - printk("Pid: %d, comm: %.20s %s %s %.*s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - show_trace(NULL, NULL, &stack, bp); -} -EXPORT_SYMBOL(dump_stack); - -static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; -static int die_owner = -1; -static unsigned int die_nest_count; - -unsigned __kprobes long oops_begin(void) -{ - int cpu; - unsigned long flags; - - oops_enter(); - - /* racy, but better than risking deadlock. */ - raw_local_irq_save(flags); - cpu = smp_processor_id(); - if (!__raw_spin_trylock(&die_lock)) { - if (cpu == die_owner) - /* nested oops. should stop eventually */; - else - __raw_spin_lock(&die_lock); - } - die_nest_count++; - die_owner = cpu; - console_verbose(); - bust_spinlocks(1); - return flags; -} - -void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) -{ - if (regs && kexec_should_crash(current)) - crash_kexec(regs); - - bust_spinlocks(0); - die_owner = -1; - add_taint(TAINT_DIE); - die_nest_count--; - if (!die_nest_count) - /* Nest count reaches zero, release the lock. */ - __raw_spin_unlock(&die_lock); - raw_local_irq_restore(flags); - oops_exit(); - - if (!signr) - return; - if (in_interrupt()) - panic("Fatal exception in interrupt"); - if (panic_on_oops) - panic("Fatal exception"); - do_exit(signr); -} - -int __kprobes __die(const char *str, struct pt_regs *regs, long err) -{ -#ifdef CONFIG_X86_32 - unsigned short ss; - unsigned long sp; -#endif - printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP "); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); -#endif - printk("\n"); - sysfs_printk_last_file(); - if (notify_die(DIE_OOPS, str, regs, err, - current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) - return 1; - - show_registers(regs); -#ifdef CONFIG_X86_32 - sp = (unsigned long) (®s->sp); - savesegment(ss, ss); - if (user_mode(regs)) { - sp = regs->sp; - ss = regs->ss & 0xffff; - } - printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); - print_symbol("%s", regs->ip); - printk(" SS:ESP %04x:%08lx\n", ss, sp); -#else - /* Executive summary in case the oops scrolled away */ - printk(KERN_ALERT "RIP "); - printk_address(regs->ip, 1); - printk(" RSP <%016lx>\n", regs->sp); -#endif - return 0; -} - -/* - * This is gone through when something in the kernel has done something bad - * and is about to be terminated: - */ -void die(const char *str, struct pt_regs *regs, long err) -{ - unsigned long flags = oops_begin(); - int sig = SIGSEGV; - - if (!user_mode_vm(regs)) - report_bug(regs->ip, regs); - - if (__die(str, regs, err)) - sig = 0; - oops_end(flags, regs, sig); -} - -void notrace __kprobes -die_nmi(char *str, struct pt_regs *regs, int do_panic) -{ - unsigned long flags; - - if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - - /* - * We are in trouble anyway, lets at least try - * to get a message out. - */ - flags = oops_begin(); - printk(KERN_EMERG "%s", str); - printk(" on CPU%d, ip %08lx, registers:\n", - smp_processor_id(), regs->ip); - show_registers(regs); - oops_end(flags, regs, 0); - if (do_panic || panic_on_oops) - panic("Non maskable interrupt"); - nmi_exit(); - local_irq_enable(); - do_exit(SIGBUS); -} - -static int __init oops_setup(char *s) -{ - if (!s) - return -EINVAL; - if (!strcmp(s, "panic")) - panic_on_oops = 1; - return 0; -} -early_param("oops", oops_setup); - -static int __init kstack_setup(char *s) -{ - if (!s) - return -EINVAL; - kstack_depth_to_print = simple_strtoul(s, NULL, 0); - return 0; -} -early_param("kstack", kstack_setup); - -static int __init code_bytes_setup(char *s) -{ - code_bytes = simple_strtoul(s, NULL, 0); - if (code_bytes > 8192) - code_bytes = 8192; - - return 1; -} -__setup("code_bytes=", code_bytes_setup); diff --git a/trunk/arch/x86/kernel/dumpstack.h b/trunk/arch/x86/kernel/dumpstack.h deleted file mode 100644 index da87590b8698..000000000000 --- a/trunk/arch/x86/kernel/dumpstack.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs - */ - -#ifndef DUMPSTACK_H -#define DUMPSTACK_H - -#ifdef CONFIG_X86_32 -#define STACKSLOTS_PER_LINE 8 -#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) -#else -#define STACKSLOTS_PER_LINE 4 -#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) -#endif - -extern unsigned long -print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end, int *graph); - -extern void -show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl); - -extern void -show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl); - -extern unsigned int code_bytes; -extern int kstack_depth_to_print; - -/* The form of the top of the frame on the stack */ -struct stack_frame { - struct stack_frame *next_frame; - unsigned long return_address; -}; -#endif diff --git a/trunk/arch/x86/kernel/dumpstack_32.c b/trunk/arch/x86/kernel/dumpstack_32.c index d593cd1f58dc..b3614752197b 100644 --- a/trunk/arch/x86/kernel/dumpstack_32.c +++ b/trunk/arch/x86/kernel/dumpstack_32.c @@ -17,14 +17,69 @@ #include -#include "dumpstack.h" +#define STACKSLOTS_PER_LINE 8 +#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) + +int panic_on_unrecovered_nmi; +int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; +static unsigned int code_bytes = 64; +static int die_counter; + +void printk_address(unsigned long address, int reliable) +{ + printk(" [<%p>] %s%pS\n", (void *) address, + reliable ? "" : "? ", (void *) address); +} + +static inline int valid_stack_ptr(struct thread_info *tinfo, + void *p, unsigned int size, void *end) +{ + void *t = tinfo; + if (end) { + if (p < end && p >= (end-THREAD_SIZE)) + return 1; + else + return 0; + } + return p > t && p < t + THREAD_SIZE - size; +} + +/* The form of the top of the frame on the stack */ +struct stack_frame { + struct stack_frame *next_frame; + unsigned long return_address; +}; + +static inline unsigned long +print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end) +{ + struct stack_frame *frame = (struct stack_frame *)bp; + + while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { + unsigned long addr; + + addr = *stack; + if (__kernel_text_address(addr)) { + if ((unsigned long) stack == bp + sizeof(long)) { + ops->address(data, addr, 1); + frame = frame->next_frame; + bp = (unsigned long) frame; + } else { + ops->address(data, addr, bp == 0); + } + } + stack++; + } + return bp; +} void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) { - int graph = 0; - if (!task) task = current; @@ -52,8 +107,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = print_context_stack(context, stack, bp, ops, - data, NULL, &graph); + bp = print_context_stack(context, stack, bp, ops, data, NULL); stack = (unsigned long *)context->previous_esp; if (!stack) @@ -65,7 +119,57 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, } EXPORT_SYMBOL(dump_trace); -void +static void +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) +{ + printk(data); + print_symbol(msg, symbol); + printk("\n"); +} + +static void print_trace_warning(void *data, char *msg) +{ + printk("%s%s\n", (char *)data, msg); +} + +static int print_trace_stack(void *data, char *name) +{ + printk("%s <%s> ", (char *)data, name); + return 0; +} + +/* + * Print one address/symbol entries per line. + */ +static void print_trace_address(void *data, unsigned long addr, int reliable) +{ + touch_nmi_watchdog(); + printk(data); + printk_address(addr, reliable); +} + +static const struct stacktrace_ops print_trace_ops = { + .warning = print_trace_warning, + .warning_symbol = print_trace_warning_symbol, + .stack = print_trace_stack, + .address = print_trace_address, +}; + +static void +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, char *log_lvl) +{ + printk("%sCall Trace:\n", log_lvl); + dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); +} + +void show_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp) +{ + show_trace_log_lvl(task, regs, stack, bp, ""); +} + +static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *sp, unsigned long bp, char *log_lvl) { @@ -92,6 +196,33 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, show_trace_log_lvl(task, regs, sp, bp, log_lvl); } +void show_stack(struct task_struct *task, unsigned long *sp) +{ + show_stack_log_lvl(task, NULL, sp, 0, ""); +} + +/* + * The architecture-independent dump_stack generator + */ +void dump_stack(void) +{ + unsigned long bp = 0; + unsigned long stack; + +#ifdef CONFIG_FRAME_POINTER + if (!bp) + get_bp(bp); +#endif + + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + show_trace(NULL, NULL, &stack, bp); +} + +EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) { @@ -152,3 +283,167 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } +static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; + +unsigned __kprobes long oops_begin(void) +{ + unsigned long flags; + + oops_enter(); + + if (die_owner != raw_smp_processor_id()) { + console_verbose(); + raw_local_irq_save(flags); + __raw_spin_lock(&die_lock); + die_owner = smp_processor_id(); + die_nest_count = 0; + bust_spinlocks(1); + } else { + raw_local_irq_save(flags); + } + die_nest_count++; + return flags; +} + +void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) +{ + bust_spinlocks(0); + die_owner = -1; + add_taint(TAINT_DIE); + __raw_spin_unlock(&die_lock); + raw_local_irq_restore(flags); + + if (!regs) + return; + + if (kexec_should_crash(current)) + crash_kexec(regs); + if (in_interrupt()) + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception"); + oops_exit(); + do_exit(signr); +} + +int __kprobes __die(const char *str, struct pt_regs *regs, long err) +{ + unsigned short ss; + unsigned long sp; + + printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP "); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC"); +#endif + printk("\n"); + sysfs_printk_last_file(); + if (notify_die(DIE_OOPS, str, regs, err, + current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) + return 1; + + show_registers(regs); + /* Executive summary in case the oops scrolled away */ + sp = (unsigned long) (®s->sp); + savesegment(ss, ss); + if (user_mode(regs)) { + sp = regs->sp; + ss = regs->ss & 0xffff; + } + printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); + print_symbol("%s", regs->ip); + printk(" SS:ESP %04x:%08lx\n", ss, sp); + return 0; +} + +/* + * This is gone through when something in the kernel has done something bad + * and is about to be terminated: + */ +void die(const char *str, struct pt_regs *regs, long err) +{ + unsigned long flags = oops_begin(); + + if (die_nest_count < 3) { + report_bug(regs->ip, regs); + + if (__die(str, regs, err)) + regs = NULL; + } else { + printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); + } + + oops_end(flags, regs, SIGSEGV); +} + +static DEFINE_SPINLOCK(nmi_print_lock); + +void notrace __kprobes +die_nmi(char *str, struct pt_regs *regs, int do_panic) +{ + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) + return; + + spin_lock(&nmi_print_lock); + /* + * We are in trouble anyway, lets at least try + * to get a message out: + */ + bust_spinlocks(1); + printk(KERN_EMERG "%s", str); + printk(" on CPU%d, ip %08lx, registers:\n", + smp_processor_id(), regs->ip); + show_registers(regs); + if (do_panic) + panic("Non maskable interrupt"); + console_silent(); + spin_unlock(&nmi_print_lock); + + /* + * If we are in kernel we are probably nested up pretty bad + * and might aswell get out now while we still can: + */ + if (!user_mode_vm(regs)) { + current->thread.trap_no = 2; + crash_kexec(regs); + } + + bust_spinlocks(0); + do_exit(SIGSEGV); +} + +static int __init oops_setup(char *s) +{ + if (!s) + return -EINVAL; + if (!strcmp(s, "panic")) + panic_on_oops = 1; + return 0; +} +early_param("oops", oops_setup); + +static int __init kstack_setup(char *s) +{ + if (!s) + return -EINVAL; + kstack_depth_to_print = simple_strtoul(s, NULL, 0); + return 0; +} +early_param("kstack", kstack_setup); + +static int __init code_bytes_setup(char *s) +{ + code_bytes = simple_strtoul(s, NULL, 0); + if (code_bytes > 8192) + code_bytes = 8192; + + return 1; +} +__setup("code_bytes=", code_bytes_setup); diff --git a/trunk/arch/x86/kernel/dumpstack_64.c b/trunk/arch/x86/kernel/dumpstack_64.c index c302d0707048..96a5db7da8a7 100644 --- a/trunk/arch/x86/kernel/dumpstack_64.c +++ b/trunk/arch/x86/kernel/dumpstack_64.c @@ -17,7 +17,19 @@ #include -#include "dumpstack.h" +#define STACKSLOTS_PER_LINE 4 +#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) + +int panic_on_unrecovered_nmi; +int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; +static unsigned int code_bytes = 64; +static int die_counter; + +void printk_address(unsigned long address, int reliable) +{ + printk(" [<%p>] %s%pS\n", (void *) address, + reliable ? "" : "? ", (void *) address); +} static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, unsigned *usedp, char **idp) @@ -101,6 +113,51 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ +static inline int valid_stack_ptr(struct thread_info *tinfo, + void *p, unsigned int size, void *end) +{ + void *t = tinfo; + if (end) { + if (p < end && p >= (end-THREAD_SIZE)) + return 1; + else + return 0; + } + return p > t && p < t + THREAD_SIZE - size; +} + +/* The form of the top of the frame on the stack */ +struct stack_frame { + struct stack_frame *next_frame; + unsigned long return_address; +}; + +static inline unsigned long +print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end) +{ + struct stack_frame *frame = (struct stack_frame *)bp; + + while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { + unsigned long addr; + + addr = *stack; + if (__kernel_text_address(addr)) { + if ((unsigned long) stack == bp + sizeof(long)) { + ops->address(data, addr, 1); + frame = frame->next_frame; + bp = (unsigned long) frame; + } else { + ops->address(data, addr, bp == 0); + } + } + stack++; + } + return bp; +} + void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) @@ -109,7 +166,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; unsigned used = 0; struct thread_info *tinfo; - int graph = 0; if (!task) task = current; @@ -150,7 +206,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, break; bp = print_context_stack(tinfo, stack, bp, ops, - data, estack_end, &graph); + data, estack_end); ops->stack(data, ""); /* * We link to the next stack via the @@ -169,7 +225,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, "IRQ") < 0) break; bp = print_context_stack(tinfo, stack, bp, - ops, data, irqstack_end, &graph); + ops, data, irqstack_end); /* * We link to the next stack (which would be * the process stack normally) the last @@ -187,12 +243,62 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, /* * This handles the process stack: */ - bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph); + bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); put_cpu(); } EXPORT_SYMBOL(dump_trace); -void +static void +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) +{ + printk(data); + print_symbol(msg, symbol); + printk("\n"); +} + +static void print_trace_warning(void *data, char *msg) +{ + printk("%s%s\n", (char *)data, msg); +} + +static int print_trace_stack(void *data, char *name) +{ + printk("%s <%s> ", (char *)data, name); + return 0; +} + +/* + * Print one address/symbol entries per line. + */ +static void print_trace_address(void *data, unsigned long addr, int reliable) +{ + touch_nmi_watchdog(); + printk(data); + printk_address(addr, reliable); +} + +static const struct stacktrace_ops print_trace_ops = { + .warning = print_trace_warning, + .warning_symbol = print_trace_warning_symbol, + .stack = print_trace_stack, + .address = print_trace_address, +}; + +static void +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, char *log_lvl) +{ + printk("%sCall Trace:\n", log_lvl); + dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); +} + +void show_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp) +{ + show_trace_log_lvl(task, regs, stack, bp, ""); +} + +static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *sp, unsigned long bp, char *log_lvl) { @@ -236,6 +342,33 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, show_trace_log_lvl(task, regs, sp, bp, log_lvl); } +void show_stack(struct task_struct *task, unsigned long *sp) +{ + show_stack_log_lvl(task, NULL, sp, 0, ""); +} + +/* + * The architecture-independent dump_stack generator + */ +void dump_stack(void) +{ + unsigned long bp = 0; + unsigned long stack; + +#ifdef CONFIG_FRAME_POINTER + if (!bp) + get_bp(bp); +#endif + + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + show_trace(NULL, NULL, &stack, bp); +} +EXPORT_SYMBOL(dump_stack); + void show_registers(struct pt_regs *regs) { int i; @@ -296,3 +429,147 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } +static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; + +unsigned __kprobes long oops_begin(void) +{ + int cpu; + unsigned long flags; + + oops_enter(); + + /* racy, but better than risking deadlock. */ + raw_local_irq_save(flags); + cpu = smp_processor_id(); + if (!__raw_spin_trylock(&die_lock)) { + if (cpu == die_owner) + /* nested oops. should stop eventually */; + else + __raw_spin_lock(&die_lock); + } + die_nest_count++; + die_owner = cpu; + console_verbose(); + bust_spinlocks(1); + return flags; +} + +void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) +{ + die_owner = -1; + bust_spinlocks(0); + die_nest_count--; + if (!die_nest_count) + /* Nest count reaches zero, release the lock. */ + __raw_spin_unlock(&die_lock); + raw_local_irq_restore(flags); + if (!regs) { + oops_exit(); + return; + } + if (in_interrupt()) + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception"); + oops_exit(); + do_exit(signr); +} + +int __kprobes __die(const char *str, struct pt_regs *regs, long err) +{ + printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP "); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC"); +#endif + printk("\n"); + sysfs_printk_last_file(); + if (notify_die(DIE_OOPS, str, regs, err, + current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) + return 1; + + show_registers(regs); + add_taint(TAINT_DIE); + /* Executive summary in case the oops scrolled away */ + printk(KERN_ALERT "RIP "); + printk_address(regs->ip, 1); + printk(" RSP <%016lx>\n", regs->sp); + if (kexec_should_crash(current)) + crash_kexec(regs); + return 0; +} + +void die(const char *str, struct pt_regs *regs, long err) +{ + unsigned long flags = oops_begin(); + + if (!user_mode(regs)) + report_bug(regs->ip, regs); + + if (__die(str, regs, err)) + regs = NULL; + oops_end(flags, regs, SIGSEGV); +} + +notrace __kprobes void +die_nmi(char *str, struct pt_regs *regs, int do_panic) +{ + unsigned long flags; + + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) + return; + + flags = oops_begin(); + /* + * We are in trouble anyway, lets at least try + * to get a message out. + */ + printk(KERN_EMERG "%s", str); + printk(" on CPU%d, ip %08lx, registers:\n", + smp_processor_id(), regs->ip); + show_registers(regs); + if (kexec_should_crash(current)) + crash_kexec(regs); + if (do_panic || panic_on_oops) + panic("Non maskable interrupt"); + oops_end(flags, NULL, SIGBUS); + nmi_exit(); + local_irq_enable(); + do_exit(SIGBUS); +} + +static int __init oops_setup(char *s) +{ + if (!s) + return -EINVAL; + if (!strcmp(s, "panic")) + panic_on_oops = 1; + return 0; +} +early_param("oops", oops_setup); + +static int __init kstack_setup(char *s) +{ + if (!s) + return -EINVAL; + kstack_depth_to_print = simple_strtoul(s, NULL, 0); + return 0; +} +early_param("kstack", kstack_setup); + +static int __init code_bytes_setup(char *s) +{ + code_bytes = simple_strtoul(s, NULL, 0); + if (code_bytes > 8192) + code_bytes = 8192; + + return 1; +} +__setup("code_bytes=", code_bytes_setup); diff --git a/trunk/arch/x86/kernel/entry_32.S b/trunk/arch/x86/kernel/entry_32.S index 43ceb3f454bf..28b597ef9ca1 100644 --- a/trunk/arch/x86/kernel/entry_32.S +++ b/trunk/arch/x86/kernel/entry_32.S @@ -1157,9 +1157,6 @@ ENTRY(mcount) END(mcount) ENTRY(ftrace_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - pushl %eax pushl %ecx pushl %edx @@ -1174,11 +1171,6 @@ ftrace_call: popl %edx popl %ecx popl %eax -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - jmp ftrace_stub -#endif .globl ftrace_stub ftrace_stub: @@ -1188,18 +1180,8 @@ END(ftrace_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) - cmpl $0, function_trace_stop - jne ftrace_stub - cmpl $ftrace_stub, ftrace_trace_function jnz trace -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - cmpl $ftrace_stub, ftrace_graph_return - jnz ftrace_graph_caller - - cmpl $ftrace_graph_entry_stub, ftrace_graph_entry - jnz ftrace_graph_caller -#endif .globl ftrace_stub ftrace_stub: ret @@ -1218,43 +1200,12 @@ trace: popl %edx popl %ecx popl %eax + jmp ftrace_stub END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - - pushl %eax - pushl %ecx - pushl %edx - movl 0xc(%esp), %edx - lea 0x4(%ebp), %eax - subl $MCOUNT_INSN_SIZE, %edx - call prepare_ftrace_return - popl %edx - popl %ecx - popl %eax - ret -END(ftrace_graph_caller) - -.globl return_to_handler -return_to_handler: - pushl $0 - pushl %eax - pushl %ecx - pushl %edx - call ftrace_return_to_handler - movl %eax, 0xc(%esp) - popl %edx - popl %ecx - popl %eax - ret -#endif - .section .rodata,"a" #include "syscall_table_32.S" diff --git a/trunk/arch/x86/kernel/entry_64.S b/trunk/arch/x86/kernel/entry_64.S index 54e0bbdccb99..b86f332c96a6 100644 --- a/trunk/arch/x86/kernel/entry_64.S +++ b/trunk/arch/x86/kernel/entry_64.S @@ -68,8 +68,6 @@ ENTRY(mcount) END(mcount) ENTRY(ftrace_caller) - cmpl $0, function_trace_stop - jne ftrace_stub /* taken from glibc */ subq $0x38, %rsp @@ -98,12 +96,6 @@ ftrace_call: movq (%rsp), %rax addq $0x38, %rsp -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - jmp ftrace_stub -#endif - .globl ftrace_stub ftrace_stub: retq @@ -111,20 +103,8 @@ END(ftrace_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) - cmpl $0, function_trace_stop - jne ftrace_stub - cmpq $ftrace_stub, ftrace_trace_function jnz trace - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - cmpq $ftrace_stub, ftrace_graph_return - jnz ftrace_graph_caller - - cmpq $ftrace_graph_entry_stub, ftrace_graph_entry - jnz ftrace_graph_caller -#endif - .globl ftrace_stub ftrace_stub: retq @@ -160,69 +140,6 @@ END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - - subq $0x38, %rsp - movq %rax, (%rsp) - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) - - leaq 8(%rbp), %rdi - movq 0x38(%rsp), %rsi - subq $MCOUNT_INSN_SIZE, %rsi - - call prepare_ftrace_return - - movq 48(%rsp), %r9 - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx - movq (%rsp), %rax - addq $0x38, %rsp - retq -END(ftrace_graph_caller) - - -.globl return_to_handler -return_to_handler: - subq $80, %rsp - - movq %rax, (%rsp) - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) - movq %r10, 56(%rsp) - movq %r11, 64(%rsp) - - call ftrace_return_to_handler - - movq %rax, 72(%rsp) - movq 64(%rsp), %r11 - movq 56(%rsp), %r10 - movq 48(%rsp), %r9 - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx - movq (%rsp), %rax - addq $72, %rsp - retq -#endif - - #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args #endif diff --git a/trunk/arch/x86/kernel/es7000_32.c b/trunk/arch/x86/kernel/es7000_32.c index 53699c931ad4..0aa2c443d600 100644 --- a/trunk/arch/x86/kernel/es7000_32.c +++ b/trunk/arch/x86/kernel/es7000_32.c @@ -38,11 +38,8 @@ #include #include #include -#include #include #include -#include -#include /* * ES7000 chipsets @@ -164,43 +161,6 @@ es7000_rename_gsi(int ioapic, int gsi) return gsi; } -static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) -{ - unsigned long vect = 0, psaival = 0; - - if (psai == NULL) - return -1; - - vect = ((unsigned long)__pa(eip)/0x1000) << 16; - psaival = (0x1000000 | vect | cpu); - - while (*psai & 0x1000000) - ; - - *psai = psaival; - - return 0; -} - -static void noop_wait_for_deassert(atomic_t *deassert_not_used) -{ -} - -static int __init es7000_update_genapic(void) -{ - genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; - - /* MPENTIUMIII */ - if (boot_cpu_data.x86 == 6 && - (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { - es7000_update_genapic_to_cluster(); - genapic->wait_for_init_deassert = noop_wait_for_deassert; - genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; - } - - return 0; -} - void __init setup_unisys(void) { @@ -216,8 +176,6 @@ setup_unisys(void) else es7000_plat = ES7000_CLASSIC; ioapic_renumber_irq = es7000_rename_gsi; - - x86_quirks->update_genapic = es7000_update_genapic; } /* @@ -359,6 +317,26 @@ es7000_mip_write(struct mip_reg *mip_reg) return status; } +int +es7000_start_cpu(int cpu, unsigned long eip) +{ + unsigned long vect = 0, psaival = 0; + + if (psai == NULL) + return -1; + + vect = ((unsigned long)__pa(eip)/0x1000) << 16; + psaival = (0x1000000 | vect | cpu); + + while (*psai & 0x1000000) + ; + + *psai = psaival; + + return 0; + +} + void __init es7000_sw_apic(void) { diff --git a/trunk/arch/x86/kernel/ftrace.c b/trunk/arch/x86/kernel/ftrace.c index 1b43086b097a..50ea0ac8c9bf 100644 --- a/trunk/arch/x86/kernel/ftrace.c +++ b/trunk/arch/x86/kernel/ftrace.c @@ -14,17 +14,14 @@ #include #include #include -#include #include #include #include -#include #include -#include -#ifdef CONFIG_DYNAMIC_FTRACE +static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; union ftrace_code_union { char code[MCOUNT_INSN_SIZE]; @@ -34,12 +31,18 @@ union ftrace_code_union { } __attribute__((packed)); }; + static int ftrace_calc_offset(long ip, long addr) { return (int)(addr - ip); } -static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +unsigned char *ftrace_nop_replace(void) +{ + return ftrace_nop; +} + +unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) { static union ftrace_code_union calc; @@ -53,142 +56,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) return calc.code; } -/* - * Modifying code must take extra care. On an SMP machine, if - * the code being modified is also being executed on another CPU - * that CPU will have undefined results and possibly take a GPF. - * We use kstop_machine to stop other CPUS from exectuing code. - * But this does not stop NMIs from happening. We still need - * to protect against that. We separate out the modification of - * the code to take care of this. - * - * Two buffers are added: An IP buffer and a "code" buffer. - * - * 1) Put the instruction pointer into the IP buffer - * and the new code into the "code" buffer. - * 2) Set a flag that says we are modifying code - * 3) Wait for any running NMIs to finish. - * 4) Write the code - * 5) clear the flag. - * 6) Wait for any running NMIs to finish. - * - * If an NMI is executed, the first thing it does is to call - * "ftrace_nmi_enter". This will check if the flag is set to write - * and if it is, it will write what is in the IP and "code" buffers. - * - * The trick is, it does not matter if everyone is writing the same - * content to the code location. Also, if a CPU is executing code - * it is OK to write to that code location if the contents being written - * are the same as what exists. - */ - -static atomic_t in_nmi = ATOMIC_INIT(0); -static int mod_code_status; /* holds return value of text write */ -static int mod_code_write; /* set when NMI should do the write */ -static void *mod_code_ip; /* holds the IP to write to */ -static void *mod_code_newcode; /* holds the text to write to the IP */ - -static unsigned nmi_wait_count; -static atomic_t nmi_update_count = ATOMIC_INIT(0); - -int ftrace_arch_read_dyn_info(char *buf, int size) -{ - int r; - - r = snprintf(buf, size, "%u %u", - nmi_wait_count, - atomic_read(&nmi_update_count)); - return r; -} - -static void ftrace_mod_code(void) -{ - /* - * Yes, more than one CPU process can be writing to mod_code_status. - * (and the code itself) - * But if one were to fail, then they all should, and if one were - * to succeed, then they all should. - */ - mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, - MCOUNT_INSN_SIZE); -} - -void ftrace_nmi_enter(void) -{ - atomic_inc(&in_nmi); - /* Must have in_nmi seen before reading write flag */ - smp_mb(); - if (mod_code_write) { - ftrace_mod_code(); - atomic_inc(&nmi_update_count); - } -} - -void ftrace_nmi_exit(void) -{ - /* Finish all executions before clearing in_nmi */ - smp_wmb(); - atomic_dec(&in_nmi); -} - -static void wait_for_nmi(void) -{ - int waited = 0; - - while (atomic_read(&in_nmi)) { - waited = 1; - cpu_relax(); - } - - if (waited) - nmi_wait_count++; -} - -static int -do_ftrace_mod_code(unsigned long ip, void *new_code) -{ - mod_code_ip = (void *)ip; - mod_code_newcode = new_code; - - /* The buffers need to be visible before we let NMIs write them */ - smp_wmb(); - - mod_code_write = 1; - - /* Make sure write bit is visible before we wait on NMIs */ - smp_mb(); - - wait_for_nmi(); - - /* Make sure all running NMIs have finished before we write the code */ - smp_mb(); - - ftrace_mod_code(); - - /* Make sure the write happens before clearing the bit */ - smp_wmb(); - - mod_code_write = 0; - - /* make sure NMIs see the cleared bit */ - smp_mb(); - - wait_for_nmi(); - - return mod_code_status; -} - - - - -static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; - -static unsigned char *ftrace_nop_replace(void) -{ - return ftrace_nop; -} - -static int +int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { @@ -213,7 +81,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return -EINVAL; /* replace the text with the new text */ - if (do_ftrace_mod_code(ip, new_code)) + if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) return -EPERM; sync_core(); @@ -221,29 +89,6 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return 0; } -int ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned char *new, *old; - unsigned long ip = rec->ip; - - old = ftrace_call_replace(ip, addr); - new = ftrace_nop_replace(); - - return ftrace_modify_code(rec->ip, old, new); -} - -int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned char *new, *old; - unsigned long ip = rec->ip; - - old = ftrace_nop_replace(); - new = ftrace_call_replace(ip, addr); - - return ftrace_modify_code(rec->ip, old, new); -} - int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); @@ -320,218 +165,3 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } -#endif - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - -#ifdef CONFIG_DYNAMIC_FTRACE -extern void ftrace_graph_call(void); - -static int ftrace_mod_jmp(unsigned long ip, - int old_offset, int new_offset) -{ - unsigned char code[MCOUNT_INSN_SIZE]; - - if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - - if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) - return -EINVAL; - - *(int *)(&code[1]) = new_offset; - - if (do_ftrace_mod_code(ip, &code)) - return -EPERM; - - return 0; -} - -int ftrace_enable_ftrace_graph_caller(void) -{ - unsigned long ip = (unsigned long)(&ftrace_graph_call); - int old_offset, new_offset; - - old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); - new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); - - return ftrace_mod_jmp(ip, old_offset, new_offset); -} - -int ftrace_disable_ftrace_graph_caller(void) -{ - unsigned long ip = (unsigned long)(&ftrace_graph_call); - int old_offset, new_offset; - - old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); - new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); - - return ftrace_mod_jmp(ip, old_offset, new_offset); -} - -#else /* CONFIG_DYNAMIC_FTRACE */ - -/* - * These functions are picked from those used on - * this page for dynamic ftrace. They have been - * simplified to ignore all traces in NMI context. - */ -static atomic_t in_nmi; - -void ftrace_nmi_enter(void) -{ - atomic_inc(&in_nmi); -} - -void ftrace_nmi_exit(void) -{ - atomic_dec(&in_nmi); -} - -#endif /* !CONFIG_DYNAMIC_FTRACE */ - -/* Add a function return address to the trace stack on thread info.*/ -static int push_return_trace(unsigned long ret, unsigned long long time, - unsigned long func, int *depth) -{ - int index; - - if (!current->ret_stack) - return -EBUSY; - - /* The return trace stack is full */ - if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { - atomic_inc(¤t->trace_overrun); - return -EBUSY; - } - - index = ++current->curr_ret_stack; - barrier(); - current->ret_stack[index].ret = ret; - current->ret_stack[index].func = func; - current->ret_stack[index].calltime = time; - *depth = index; - - return 0; -} - -/* Retrieve a function return address to the trace stack on thread info.*/ -static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) -{ - int index; - - index = current->curr_ret_stack; - - if (unlikely(index < 0)) { - ftrace_graph_stop(); - WARN_ON(1); - /* Might as well panic, otherwise we have no where to go */ - *ret = (unsigned long)panic; - return; - } - - *ret = current->ret_stack[index].ret; - trace->func = current->ret_stack[index].func; - trace->calltime = current->ret_stack[index].calltime; - trace->overrun = atomic_read(¤t->trace_overrun); - trace->depth = index; - barrier(); - current->curr_ret_stack--; - -} - -/* - * Send the trace to the ring-buffer. - * @return the original return address. - */ -unsigned long ftrace_return_to_handler(void) -{ - struct ftrace_graph_ret trace; - unsigned long ret; - - pop_return_trace(&trace, &ret); - trace.rettime = cpu_clock(raw_smp_processor_id()); - ftrace_graph_return(&trace); - - if (unlikely(!ret)) { - ftrace_graph_stop(); - WARN_ON(1); - /* Might as well panic. What else to do? */ - ret = (unsigned long)panic; - } - - return ret; -} - -/* - * Hook the return address and push it in the stack of return addrs - * in current thread info. - */ -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) -{ - unsigned long old; - unsigned long long calltime; - int faulted; - struct ftrace_graph_ent trace; - unsigned long return_hooker = (unsigned long) - &return_to_handler; - - /* Nmi's are currently unsupported */ - if (unlikely(atomic_read(&in_nmi))) - return; - - if (unlikely(atomic_read(¤t->tracing_graph_pause))) - return; - - /* - * Protect against fault, even if it shouldn't - * happen. This tool is too much intrusive to - * ignore such a protection. - */ - asm volatile( - "1: " _ASM_MOV " (%[parent_old]), %[old]\n" - "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" - " movl $0, %[faulted]\n" - - ".section .fixup, \"ax\"\n" - "3: movl $1, %[faulted]\n" - ".previous\n" - - _ASM_EXTABLE(1b, 3b) - _ASM_EXTABLE(2b, 3b) - - : [parent_replaced] "=r" (parent), [old] "=r" (old), - [faulted] "=r" (faulted) - : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) - : "memory" - ); - - if (unlikely(faulted)) { - ftrace_graph_stop(); - WARN_ON(1); - return; - } - - if (unlikely(!__kernel_text_address(old))) { - ftrace_graph_stop(); - *parent = old; - WARN_ON(1); - return; - } - - calltime = cpu_clock(raw_smp_processor_id()); - - if (push_return_trace(old, calltime, - self_addr, &trace.depth) == -EBUSY) { - *parent = old; - return; - } - - trace.func = self_addr; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; - *parent = old; - } -} -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/trunk/arch/x86/kernel/genapic_64.c b/trunk/arch/x86/kernel/genapic_64.c index 2bced78b0b8e..6c9bfc9e1e95 100644 --- a/trunk/arch/x86/kernel/genapic_64.c +++ b/trunk/arch/x86/kernel/genapic_64.c @@ -21,7 +21,6 @@ #include #include #include -#include extern struct genapic apic_flat; extern struct genapic apic_physflat; @@ -54,9 +53,6 @@ void __init setup_apic_routing(void) genapic = &apic_physflat; printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); } - - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); } /* Same for both flat and physical. */ diff --git a/trunk/arch/x86/kernel/genapic_flat_64.c b/trunk/arch/x86/kernel/genapic_flat_64.c index 34185488e4fb..c0262791bda4 100644 --- a/trunk/arch/x86/kernel/genapic_flat_64.c +++ b/trunk/arch/x86/kernel/genapic_flat_64.c @@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 1; } -static const struct cpumask *flat_target_cpus(void) +static cpumask_t flat_target_cpus(void) { - return cpu_online_mask; + return cpu_online_map; } -static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) +static cpumask_t flat_vector_allocation_domain(int cpu) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -45,8 +45,8 @@ static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; } /* @@ -69,8 +69,9 @@ static void flat_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static inline void _flat_send_IPI_mask(unsigned long mask, int vector) +static void flat_send_IPI_mask(cpumask_t cpumask, int vector) { + unsigned long mask = cpus_addr(cpumask)[0]; unsigned long flags; local_irq_save(flags); @@ -78,41 +79,20 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) local_irq_restore(flags); } -static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - - _flat_send_IPI_mask(mask, vector); -} - -static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, - int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - int cpu = smp_processor_id(); - - if (cpu < BITS_PER_LONG) - clear_bit(cpu, &mask); - _flat_send_IPI_mask(mask, vector); -} - static void flat_send_IPI_allbutself(int vector) { - int cpu = smp_processor_id(); #ifdef CONFIG_HOTPLUG_CPU int hotplug = 1; #else int hotplug = 0; #endif if (hotplug || vector == NMI_VECTOR) { - if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) { - unsigned long mask = cpumask_bits(cpu_online_mask)[0]; + cpumask_t allbutme = cpu_online_map; - if (cpu < BITS_PER_LONG) - clear_bit(cpu, &mask); + cpu_clear(smp_processor_id(), allbutme); - _flat_send_IPI_mask(mask, vector); - } + if (!cpus_empty(allbutme)) + flat_send_IPI_mask(allbutme, vector); } else if (num_online_cpus() > 1) { __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); } @@ -121,7 +101,7 @@ static void flat_send_IPI_allbutself(int vector) static void flat_send_IPI_all(int vector) { if (vector == NMI_VECTOR) - flat_send_IPI_mask(cpu_online_mask, vector); + flat_send_IPI_mask(cpu_online_map, vector); else __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } @@ -155,18 +135,9 @@ static int flat_apic_id_registered(void) return physid_isset(read_xapic_id(), phys_cpu_present_map); } -static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; -} - -static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) { - unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; - unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS; - - return mask1 & mask2; + return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; } static unsigned int phys_pkg_id(int index_msb) @@ -186,10 +157,8 @@ struct genapic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_allbutself = flat_send_IPI_allbutself, .send_IPI_mask = flat_send_IPI_mask, - .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, @@ -219,39 +188,35 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static const struct cpumask *physflat_target_cpus(void) +static cpumask_t physflat_target_cpus(void) { - return cpu_online_mask; + return cpu_online_map; } -static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) +static cpumask_t physflat_vector_allocation_domain(int cpu) { - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); + return cpumask_of_cpu(cpu); } -static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) +static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) { send_IPI_mask_sequence(cpumask, vector); } -static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, - int vector) -{ - send_IPI_mask_allbutself(cpumask, vector); -} - static void physflat_send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + cpumask_t allbutme = cpu_online_map; + + cpu_clear(smp_processor_id(), allbutme); + physflat_send_IPI_mask(allbutme, vector); } static void physflat_send_IPI_all(int vector) { - physflat_send_IPI_mask(cpu_online_mask, vector); + physflat_send_IPI_mask(cpu_online_map, vector); } -static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) +static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; @@ -259,31 +224,13 @@ static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); + cpu = first_cpu(cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } -static unsigned int -physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - return BAD_APICID; -} - struct genapic apic_physflat = { .name = "physical flat", .acpi_madt_oem_check = physflat_acpi_madt_oem_check, @@ -296,10 +243,8 @@ struct genapic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_mask = physflat_send_IPI_mask, - .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/trunk/arch/x86/kernel/genx2apic_cluster.c b/trunk/arch/x86/kernel/genx2apic_cluster.c index 6ce497cc372d..f6a2c8eb48a6 100644 --- a/trunk/arch/x86/kernel/genx2apic_cluster.c +++ b/trunk/arch/x86/kernel/genx2apic_cluster.c @@ -22,18 +22,19 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static const struct cpumask *x2apic_target_cpus(void) +static cpumask_t x2apic_target_cpus(void) { - return cpumask_of(0); + return cpumask_of_cpu(0); } /* * for now each logical cpu is in its own vector allocation domain. */ -static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) +static cpumask_t x2apic_vector_allocation_domain(int cpu) { - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; } static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, @@ -55,53 +56,32 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, * at once. We have 16 cpu's in a cluster. This will minimize IPI register * writes. */ -static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) { unsigned long flags; unsigned long query_cpu; local_irq_save(flags); - for_each_cpu(query_cpu, mask) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); - local_irq_restore(flags); -} - -static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) -{ - unsigned long flags; - unsigned long query_cpu; - unsigned long this_cpu = smp_processor_id(); - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); + for_each_cpu_mask(query_cpu, mask) { + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); + } local_irq_restore(flags); } static void x2apic_send_IPI_allbutself(int vector) { - unsigned long flags; - unsigned long query_cpu; - unsigned long this_cpu = smp_processor_id(); + cpumask_t mask = cpu_online_map; - local_irq_save(flags); - for_each_online_cpu(query_cpu) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); - local_irq_restore(flags); + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); } static void x2apic_send_IPI_all(int vector) { - x2apic_send_IPI_mask(cpu_online_mask, vector); + x2apic_send_IPI_mask(cpu_online_map, vector); } static int x2apic_apic_id_registered(void) @@ -109,38 +89,21 @@ static int x2apic_apic_id_registered(void) return 1; } -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; /* - * We're using fixed IRQ delivery, can only return one logical APIC ID. + * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); - if ((unsigned)cpu < nr_cpu_ids) + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) return per_cpu(x86_cpu_to_logical_apicid, cpu); else return BAD_APICID; } -static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one logical APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_logical_apicid, cpu); - return BAD_APICID; -} - static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -187,10 +150,8 @@ struct genapic apic_x2apic_cluster = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/trunk/arch/x86/kernel/genx2apic_phys.c b/trunk/arch/x86/kernel/genx2apic_phys.c index 62895cf315ff..d042211768b7 100644 --- a/trunk/arch/x86/kernel/genx2apic_phys.c +++ b/trunk/arch/x86/kernel/genx2apic_phys.c @@ -29,15 +29,16 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static const struct cpumask *x2apic_target_cpus(void) +static cpumask_t x2apic_target_cpus(void) { - return cpumask_of(0); + return cpumask_of_cpu(0); } -static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) +static cpumask_t x2apic_vector_allocation_domain(int cpu) { - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; } static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, @@ -53,54 +54,32 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, x2apic_icr_write(cfg, apicid); } -static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) { unsigned long flags; unsigned long query_cpu; local_irq_save(flags); - for_each_cpu(query_cpu, mask) { + for_each_cpu_mask(query_cpu, mask) { __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } -static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) -{ - unsigned long flags; - unsigned long query_cpu; - unsigned long this_cpu = smp_processor_id(); - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); - } - local_irq_restore(flags); -} - static void x2apic_send_IPI_allbutself(int vector) { - unsigned long flags; - unsigned long query_cpu; - unsigned long this_cpu = smp_processor_id(); + cpumask_t mask = cpu_online_map; - local_irq_save(flags); - for_each_online_cpu(query_cpu) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); - local_irq_restore(flags); + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); } static void x2apic_send_IPI_all(int vector) { - x2apic_send_IPI_mask(cpu_online_mask, vector); + x2apic_send_IPI_mask(cpu_online_map, vector); } static int x2apic_apic_id_registered(void) @@ -108,7 +87,7 @@ static int x2apic_apic_id_registered(void) return 1; } -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; @@ -116,30 +95,13 @@ static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); - if ((unsigned)cpu < nr_cpu_ids) + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } -static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - return BAD_APICID; -} - static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -183,10 +145,8 @@ struct genapic apic_x2apic_phys = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/trunk/arch/x86/kernel/genx2apic_uv_x.c b/trunk/arch/x86/kernel/genx2apic_uv_x.c index 0e88be11227d..2c7dbdb98278 100644 --- a/trunk/arch/x86/kernel/genx2apic_uv_x.c +++ b/trunk/arch/x86/kernel/genx2apic_uv_x.c @@ -75,15 +75,16 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static const struct cpumask *uv_target_cpus(void) +static cpumask_t uv_target_cpus(void) { - return cpumask_of(0); + return cpumask_of_cpu(0); } -static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) +static cpumask_t uv_vector_allocation_domain(int cpu) { - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; } int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) @@ -122,37 +123,28 @@ static void uv_send_IPI_one(int cpu, int vector) uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } -static void uv_send_IPI_mask(const struct cpumask *mask, int vector) +static void uv_send_IPI_mask(cpumask_t mask, int vector) { unsigned int cpu; - for_each_cpu(cpu, mask) - uv_send_IPI_one(cpu, vector); -} - -static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) -{ - unsigned int cpu; - unsigned int this_cpu = smp_processor_id(); - - for_each_cpu(cpu, mask) - if (cpu != this_cpu) + for_each_possible_cpu(cpu) + if (cpu_isset(cpu, mask)) uv_send_IPI_one(cpu, vector); } static void uv_send_IPI_allbutself(int vector) { - unsigned int cpu; - unsigned int this_cpu = smp_processor_id(); + cpumask_t mask = cpu_online_map; - for_each_online_cpu(cpu) - if (cpu != this_cpu) - uv_send_IPI_one(cpu, vector); + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + uv_send_IPI_mask(mask, vector); } static void uv_send_IPI_all(int vector) { - uv_send_IPI_mask(cpu_online_mask, vector); + uv_send_IPI_mask(cpu_online_map, vector); } static int uv_apic_id_registered(void) @@ -164,7 +156,7 @@ static void uv_init_apic_ldr(void) { } -static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) +static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; @@ -172,30 +164,13 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); + cpu = first_cpu(cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } -static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - return BAD_APICID; -} - static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -243,10 +218,8 @@ struct genapic apic_x2apic_uv_x = { .send_IPI_all = uv_send_IPI_all, .send_IPI_allbutself = uv_send_IPI_allbutself, .send_IPI_mask = uv_send_IPI_mask, - .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, .send_IPI_self = uv_send_IPI_self, .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/trunk/arch/x86/kernel/io_apic.c b/trunk/arch/x86/kernel/io_apic.c index 1cbf7c8d46e0..1184210e6d0c 100644 --- a/trunk/arch/x86/kernel/io_apic.c +++ b/trunk/arch/x86/kernel/io_apic.c @@ -108,272 +108,93 @@ static int __init parse_noapic(char *str) early_param("noapic", parse_noapic); struct irq_pin_list; - -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ - -struct irq_pin_list { - int apic, pin; - struct irq_pin_list *next; -}; - -static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) -{ - struct irq_pin_list *pin; - int node; - - node = cpu_to_node(cpu); - - pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); - printk(KERN_DEBUG " alloc irq_2_pin on cpu %d node %d\n", cpu, node); - - return pin; -} - struct irq_cfg { + unsigned int irq; struct irq_pin_list *irq_2_pin; - cpumask_var_t domain; - cpumask_var_t old_domain; + cpumask_t domain; + cpumask_t old_domain; unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - u8 move_desc_pending : 1; -#endif }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -#ifdef CONFIG_SPARSE_IRQ -static struct irq_cfg irq_cfgx[] = { -#else static struct irq_cfg irq_cfgx[NR_IRQS] = { -#endif - [0] = { .vector = IRQ0_VECTOR, }, - [1] = { .vector = IRQ1_VECTOR, }, - [2] = { .vector = IRQ2_VECTOR, }, - [3] = { .vector = IRQ3_VECTOR, }, - [4] = { .vector = IRQ4_VECTOR, }, - [5] = { .vector = IRQ5_VECTOR, }, - [6] = { .vector = IRQ6_VECTOR, }, - [7] = { .vector = IRQ7_VECTOR, }, - [8] = { .vector = IRQ8_VECTOR, }, - [9] = { .vector = IRQ9_VECTOR, }, - [10] = { .vector = IRQ10_VECTOR, }, - [11] = { .vector = IRQ11_VECTOR, }, - [12] = { .vector = IRQ12_VECTOR, }, - [13] = { .vector = IRQ13_VECTOR, }, - [14] = { .vector = IRQ14_VECTOR, }, - [15] = { .vector = IRQ15_VECTOR, }, + [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, + [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, + [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, + [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, + [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, + [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, + [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, + [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, + [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, + [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, + [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, + [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, + [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, + [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, + [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, + [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, }; -void __init arch_early_irq_init(void) -{ - struct irq_cfg *cfg; - struct irq_desc *desc; - int count; - int i; - - cfg = irq_cfgx; - count = ARRAY_SIZE(irq_cfgx); +#define for_each_irq_cfg(irq, cfg) \ + for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) - for (i = 0; i < count; i++) { - desc = irq_to_desc(i); - desc->chip_data = &cfg[i]; - alloc_bootmem_cpumask_var(&cfg[i].domain); - alloc_bootmem_cpumask_var(&cfg[i].old_domain); - if (i < NR_IRQS_LEGACY) - cpumask_setall(cfg[i].domain); - } -} - -#ifdef CONFIG_SPARSE_IRQ static struct irq_cfg *irq_cfg(unsigned int irq) { - struct irq_cfg *cfg = NULL; - struct irq_desc *desc; - - desc = irq_to_desc(irq); - if (desc) - cfg = desc->chip_data; - - return cfg; -} - -static struct irq_cfg *get_one_free_irq_cfg(int cpu) -{ - struct irq_cfg *cfg; - int node; - - node = cpu_to_node(cpu); - - cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); - if (cfg) { - /* FIXME: needs alloc_cpumask_var_node() */ - if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) { - kfree(cfg); - cfg = NULL; - } else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) { - free_cpumask_var(cfg->domain); - kfree(cfg); - cfg = NULL; - } else { - cpumask_clear(cfg->domain); - cpumask_clear(cfg->old_domain); - } - } - printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node); - - return cfg; -} - -void arch_init_chip_data(struct irq_desc *desc, int cpu) -{ - struct irq_cfg *cfg; - - cfg = desc->chip_data; - if (!cfg) { - desc->chip_data = get_one_free_irq_cfg(cpu); - if (!desc->chip_data) { - printk(KERN_ERR "can not alloc irq_cfg\n"); - BUG_ON(1); - } - } -} - -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - -static void -init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) -{ - struct irq_pin_list *old_entry, *head, *tail, *entry; - - cfg->irq_2_pin = NULL; - old_entry = old_cfg->irq_2_pin; - if (!old_entry) - return; - - entry = get_one_free_irq_2_pin(cpu); - if (!entry) - return; - - entry->apic = old_entry->apic; - entry->pin = old_entry->pin; - head = entry; - tail = entry; - old_entry = old_entry->next; - while (old_entry) { - entry = get_one_free_irq_2_pin(cpu); - if (!entry) { - entry = head; - while (entry) { - head = entry->next; - kfree(entry); - entry = head; - } - /* still use the old one */ - return; - } - entry->apic = old_entry->apic; - entry->pin = old_entry->pin; - tail->next = entry; - tail = entry; - old_entry = old_entry->next; - } - - tail->next = NULL; - cfg->irq_2_pin = head; + return irq < nr_irqs ? irq_cfgx + irq : NULL; } -static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) +static struct irq_cfg *irq_cfg_alloc(unsigned int irq) { - struct irq_pin_list *entry, *next; - - if (old_cfg->irq_2_pin == cfg->irq_2_pin) - return; - - entry = old_cfg->irq_2_pin; - - while (entry) { - next = entry->next; - kfree(entry); - entry = next; - } - old_cfg->irq_2_pin = NULL; + return irq_cfg(irq); } -void arch_init_copy_chip_data(struct irq_desc *old_desc, - struct irq_desc *desc, int cpu) -{ - struct irq_cfg *cfg; - struct irq_cfg *old_cfg; - - cfg = get_one_free_irq_cfg(cpu); - - if (!cfg) - return; - - desc->chip_data = cfg; - - old_cfg = old_desc->chip_data; +/* + * Rough estimation of how many shared IRQs there are, can be changed + * anytime. + */ +#define MAX_PLUS_SHARED_IRQS NR_IRQS +#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) - memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); +/* + * This is performance-critical, we want to do it O(1) + * + * the indexing order of this array favors 1:1 mappings + * between pins and IRQs. + */ - init_copy_irq_2_pin(old_cfg, cfg, cpu); -} +struct irq_pin_list { + int apic, pin; + struct irq_pin_list *next; +}; -static void free_irq_cfg(struct irq_cfg *old_cfg) -{ - kfree(old_cfg); -} +static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; +static struct irq_pin_list *irq_2_pin_ptr; -void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) +static void __init irq_2_pin_init(void) { - struct irq_cfg *old_cfg, *cfg; - - old_cfg = old_desc->chip_data; - cfg = desc->chip_data; - - if (old_cfg == cfg) - return; - - if (old_cfg) { - free_irq_2_pin(old_cfg, cfg); - free_irq_cfg(old_cfg); - old_desc->chip_data = NULL; - } -} + struct irq_pin_list *pin = irq_2_pin_head; + int i; -static void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg = desc->chip_data; + for (i = 1; i < PIN_MAP_SIZE; i++) + pin[i-1].next = &pin[i]; - if (!cfg->move_in_progress) { - /* it means that domain is not changed */ - if (!cpumask_intersects(&desc->affinity, mask)) - cfg->move_desc_pending = 1; - } + irq_2_pin_ptr = &pin[0]; } -#endif -#else -static struct irq_cfg *irq_cfg(unsigned int irq) +static struct irq_pin_list *get_one_free_irq_2_pin(void) { - return irq < nr_irqs ? irq_cfgx + irq : NULL; -} + struct irq_pin_list *pin = irq_2_pin_ptr; -#endif + if (!pin) + panic("can not get more irq_2_pin\n"); -#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC -static inline void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) -{ + irq_2_pin_ptr = pin->next; + pin->next = NULL; + return pin; } -#endif struct io_apic { unsigned int index; @@ -416,10 +237,11 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned writel(value, &io_apic->data); } -static bool io_apic_level_ack_pending(struct irq_cfg *cfg) +static bool io_apic_level_ack_pending(unsigned int irq) { struct irq_pin_list *entry; unsigned long flags; + struct irq_cfg *cfg = irq_cfg(irq); spin_lock_irqsave(&ioapic_lock, flags); entry = cfg->irq_2_pin; @@ -501,32 +323,13 @@ static void ioapic_mask_entry(int apic, int pin) } #ifdef CONFIG_SMP -static void send_cleanup_vector(struct irq_cfg *cfg) -{ - cpumask_var_t cleanup_mask; - - if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { - unsigned int i; - cfg->move_cleanup_count = 0; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - cfg->move_cleanup_count++; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); - } else { - cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - cfg->move_cleanup_count = cpumask_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - free_cpumask_var(cleanup_mask); - } - cfg->move_in_progress = 0; -} - -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) { int apic, pin; + struct irq_cfg *cfg; struct irq_pin_list *entry; - u8 vector = cfg->vector; + cfg = irq_cfg(irq); entry = cfg->irq_2_pin; for (;;) { unsigned int reg; @@ -556,61 +359,36 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq } } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); - -/* - * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid - * of that, or returns BAD_APICID and leaves desc->affinity untouched. - */ -static unsigned int -set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg; - unsigned int irq; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return BAD_APICID; - - irq = desc->irq; - cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) - return BAD_APICID; +static int assign_irq_vector(int irq, cpumask_t mask); - cpumask_and(&desc->affinity, cfg->domain, mask); - set_extra_move_desc(desc, mask); - return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); -} - -static void -set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) +static void set_ioapic_affinity_irq(unsigned int irq, + const struct cpumask *mask) { struct irq_cfg *cfg; unsigned long flags; unsigned int dest; - unsigned int irq; + cpumask_t tmp; + struct irq_desc *desc; - irq = desc->irq; - cfg = desc->chip_data; + if (!cpumask_intersects(mask, cpu_online_mask)) + return; - spin_lock_irqsave(&ioapic_lock, flags); - dest = set_desc_affinity(desc, mask); - if (dest != BAD_APICID) { - /* Only the high 8 bits are valid. */ - dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, cfg); - } - spin_unlock_irqrestore(&ioapic_lock, flags); -} + cfg = irq_cfg(irq); + if (assign_irq_vector(irq, *mask)) + return; -static void -set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) -{ - struct irq_desc *desc; + cpumask_and(&tmp, &cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + /* + * Only the high 8 bits are valid. + */ + dest = SET_APIC_LOGICAL_ID(dest); desc = irq_to_desc(irq); - - set_ioapic_affinity_irq_desc(desc, mask); + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + cpumask_copy(&desc->affinity, mask); + spin_unlock_irqrestore(&ioapic_lock, flags); } #endif /* CONFIG_SMP */ @@ -619,18 +397,16 @@ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) +static void add_pin_to_irq(unsigned int irq, int apic, int pin) { + struct irq_cfg *cfg; struct irq_pin_list *entry; + /* first time to refer irq_cfg, so with new */ + cfg = irq_cfg_alloc(irq); entry = cfg->irq_2_pin; if (!entry) { - entry = get_one_free_irq_2_pin(cpu); - if (!entry) { - printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", - apic, pin); - return; - } + entry = get_one_free_irq_2_pin(); cfg->irq_2_pin = entry; entry->apic = apic; entry->pin = pin; @@ -645,7 +421,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) entry = entry->next; } - entry->next = get_one_free_irq_2_pin(cpu); + entry->next = get_one_free_irq_2_pin(); entry = entry->next; entry->apic = apic; entry->pin = pin; @@ -654,10 +430,11 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) /* * Reroute an IRQ to a different pin. */ -static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, +static void __init replace_pin_at_irq(unsigned int irq, int oldapic, int oldpin, int newapic, int newpin) { + struct irq_cfg *cfg = irq_cfg(irq); struct irq_pin_list *entry = cfg->irq_2_pin; int replaced = 0; @@ -674,16 +451,18 @@ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, /* why? call replace before add? */ if (!replaced) - add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); + add_pin_to_irq(irq, newapic, newpin); } -static inline void io_apic_modify_irq(struct irq_cfg *cfg, +static inline void io_apic_modify_irq(unsigned int irq, int mask_and, int mask_or, void (*final)(struct irq_pin_list *entry)) { int pin; + struct irq_cfg *cfg; struct irq_pin_list *entry; + cfg = irq_cfg(irq); for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { unsigned int reg; pin = entry->pin; @@ -696,9 +475,9 @@ static inline void io_apic_modify_irq(struct irq_cfg *cfg, } } -static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) +static void __unmask_IO_APIC_irq(unsigned int irq) { - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); + io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); } #ifdef CONFIG_X86_64 @@ -713,64 +492,47 @@ void io_apic_sync(struct irq_pin_list *entry) readl(&io_apic->data); } -static void __mask_IO_APIC_irq(struct irq_cfg *cfg) +static void __mask_IO_APIC_irq(unsigned int irq) { - io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); + io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); } #else /* CONFIG_X86_32 */ -static void __mask_IO_APIC_irq(struct irq_cfg *cfg) +static void __mask_IO_APIC_irq(unsigned int irq) { - io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL); + io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); } -static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) +static void __mask_and_edge_IO_APIC_irq(unsigned int irq) { - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, + io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, IO_APIC_REDIR_MASKED, NULL); } -static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) +static void __unmask_and_level_IO_APIC_irq(unsigned int irq) { - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, + io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, IO_APIC_REDIR_LEVEL_TRIGGER, NULL); } #endif /* CONFIG_X86_32 */ -static void mask_IO_APIC_irq_desc(struct irq_desc *desc) +static void mask_IO_APIC_irq (unsigned int irq) { - struct irq_cfg *cfg = desc->chip_data; unsigned long flags; - BUG_ON(!cfg); - spin_lock_irqsave(&ioapic_lock, flags); - __mask_IO_APIC_irq(cfg); + __mask_IO_APIC_irq(irq); spin_unlock_irqrestore(&ioapic_lock, flags); } -static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) +static void unmask_IO_APIC_irq (unsigned int irq) { - struct irq_cfg *cfg = desc->chip_data; unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); - __unmask_IO_APIC_irq(cfg); + __unmask_IO_APIC_irq(irq); spin_unlock_irqrestore(&ioapic_lock, flags); } -static void mask_IO_APIC_irq(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - - mask_IO_APIC_irq_desc(desc); -} -static void unmask_IO_APIC_irq(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - - unmask_IO_APIC_irq_desc(desc); -} - static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { struct IO_APIC_route_entry entry; @@ -1047,7 +809,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); */ static int EISA_ELCR(unsigned int irq) { - if (irq < NR_IRQS_LEGACY) { + if (irq < 16) { unsigned int port = 0x4d0 + (irq >> 3); return (inb(port) >> (irq & 7)) & 1; } @@ -1272,8 +1034,7 @@ void unlock_vector_lock(void) spin_unlock(&vector_lock); } -static int -__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +static int __assign_irq_vector(int irq, cpumask_t mask) { /* * NOTE! The local APIC isn't very good at handling @@ -1288,49 +1049,52 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) */ static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; unsigned int old_vector; - int cpu, err; - cpumask_var_t tmp_mask; + int cpu; + struct irq_cfg *cfg; + + cfg = irq_cfg(irq); + + /* Only try and allocate irqs on cpus that are present */ + cpus_and(mask, mask, cpu_online_map); if ((cfg->move_in_progress) || cfg->move_cleanup_count) return -EBUSY; - if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) - return -ENOMEM; - old_vector = cfg->vector; if (old_vector) { - cpumask_and(tmp_mask, mask, cpu_online_mask); - cpumask_and(tmp_mask, cfg->domain, tmp_mask); - if (!cpumask_empty(tmp_mask)) { - free_cpumask_var(tmp_mask); + cpumask_t tmp; + cpus_and(tmp, cfg->domain, mask); + if (!cpus_empty(tmp)) return 0; - } } - /* Only try and allocate irqs on cpus that are present */ - err = -ENOSPC; - for_each_cpu_and(cpu, mask, cpu_online_mask) { + for_each_cpu_mask_nr(cpu, mask) { + cpumask_t domain, new_mask; int new_cpu; int vector, offset; - vector_allocation_domain(cpu, tmp_mask); + domain = vector_allocation_domain(cpu); + cpus_and(new_mask, domain, cpu_online_map); vector = current_vector; offset = current_offset; next: vector += 8; if (vector >= first_system_vector) { - /* If out of vectors on large boxen, must share them. */ + /* If we run out of vectors on large boxen, must share them. */ offset = (offset + 1) % 8; vector = FIRST_DEVICE_VECTOR + offset; } if (unlikely(current_vector == vector)) continue; - - if (test_bit(vector, used_vectors)) +#ifdef CONFIG_X86_64 + if (vector == IA32_SYSCALL_VECTOR) goto next; - - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) +#else + if (vector == SYSCALL_VECTOR) + goto next; +#endif + for_each_cpu_mask_nr(new_cpu, new_mask) if (per_cpu(vector_irq, new_cpu)[vector] != -1) goto next; /* Found one! */ @@ -1338,47 +1102,49 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) current_offset = offset; if (old_vector) { cfg->move_in_progress = 1; - cpumask_copy(cfg->old_domain, cfg->domain); + cfg->old_domain = cfg->domain; } - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) + for_each_cpu_mask_nr(new_cpu, new_mask) per_cpu(vector_irq, new_cpu)[vector] = irq; cfg->vector = vector; - cpumask_copy(cfg->domain, tmp_mask); - err = 0; - break; + cfg->domain = domain; + return 0; } - free_cpumask_var(tmp_mask); - return err; + return -ENOSPC; } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +static int assign_irq_vector(int irq, cpumask_t mask) { int err; unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, cfg, mask); + err = __assign_irq_vector(irq, mask); spin_unlock_irqrestore(&vector_lock, flags); return err; } -static void __clear_irq_vector(int irq, struct irq_cfg *cfg) +static void __clear_irq_vector(int irq) { + struct irq_cfg *cfg; + cpumask_t mask; int cpu, vector; + cfg = irq_cfg(irq); BUG_ON(!cfg->vector); vector = cfg->vector; - for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) + cpus_and(mask, cfg->domain, cpu_online_map); + for_each_cpu_mask_nr(cpu, mask) per_cpu(vector_irq, cpu)[vector] = -1; cfg->vector = 0; - cpumask_clear(cfg->domain); + cpus_clear(cfg->domain); if (likely(!cfg->move_in_progress)) return; - for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { + cpus_and(mask, cfg->old_domain, cpu_online_map); + for_each_cpu_mask_nr(cpu, mask) { for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (per_cpu(vector_irq, cpu)[vector] != irq) @@ -1396,14 +1162,10 @@ void __setup_vector_irq(int cpu) /* This function must be called with vector_lock held */ int irq, vector; struct irq_cfg *cfg; - struct irq_desc *desc; /* Mark the inuse vectors */ - for_each_irq_desc(irq, desc) { - if (!desc) - continue; - cfg = desc->chip_data; - if (!cpumask_test_cpu(cpu, cfg->domain)) + for_each_irq_cfg(irq, cfg) { + if (!cpu_isset(cpu, cfg->domain)) continue; vector = cfg->vector; per_cpu(vector_irq, cpu)[vector] = irq; @@ -1415,7 +1177,7 @@ void __setup_vector_irq(int cpu) continue; cfg = irq_cfg(irq); - if (!cpumask_test_cpu(cpu, cfg->domain)) + if (!cpu_isset(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; } } @@ -1453,8 +1215,11 @@ static inline int IO_APIC_irq_trigger(int irq) } #endif -static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger) +static void ioapic_register_intr(int irq, unsigned long trigger) { + struct irq_desc *desc; + + desc = irq_to_desc(irq); if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) @@ -1546,22 +1311,23 @@ static int setup_ioapic_entry(int apic, int irq, return 0; } -static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, +static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, int trigger, int polarity) { struct irq_cfg *cfg; struct IO_APIC_route_entry entry; - unsigned int dest; + cpumask_t mask; if (!IO_APIC_IRQ(irq)) return; - cfg = desc->chip_data; + cfg = irq_cfg(irq); - if (assign_irq_vector(irq, cfg, TARGET_CPUS)) + mask = TARGET_CPUS; + if (assign_irq_vector(irq, mask)) return; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + cpus_and(mask, cfg->domain, mask); apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " @@ -1571,15 +1337,16 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, - dest, trigger, polarity, cfg->vector)) { + cpu_mask_to_apicid(mask), trigger, polarity, + cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", mp_ioapics[apic].mp_apicid, pin); - __clear_irq_vector(irq, cfg); + __clear_irq_vector(irq); return; } - ioapic_register_intr(irq, desc, trigger); - if (irq < NR_IRQS_LEGACY) + ioapic_register_intr(irq, trigger); + if (irq < 16) disable_8259A_irq(irq); ioapic_write_entry(apic, pin, entry); @@ -1589,9 +1356,6 @@ static void __init setup_IO_APIC_irqs(void) { int apic, pin, idx, irq; int notcon = 0; - struct irq_desc *desc; - struct irq_cfg *cfg; - int cpu = boot_cpu_id; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); @@ -1623,15 +1387,9 @@ static void __init setup_IO_APIC_irqs(void) if (multi_timer_check(apic, irq)) continue; #endif - desc = irq_to_desc_alloc_cpu(irq, cpu); - if (!desc) { - printk(KERN_INFO "can not get irq_desc for %d\n", irq); - continue; - } - cfg = desc->chip_data; - add_pin_to_irq_cpu(cfg, cpu, apic, pin); + add_pin_to_irq(irq, apic, pin); - setup_IO_APIC_irq(apic, pin, irq, desc, + setup_IO_APIC_irq(apic, pin, irq, irq_trigger(idx), irq_polarity(idx)); } } @@ -1690,7 +1448,6 @@ __apicdebuginit(void) print_IO_APIC(void) union IO_APIC_reg_03 reg_03; unsigned long flags; struct irq_cfg *cfg; - struct irq_desc *desc; unsigned int irq; if (apic_verbosity == APIC_QUIET) @@ -1780,13 +1537,8 @@ __apicdebuginit(void) print_IO_APIC(void) } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for_each_irq_desc(irq, desc) { - struct irq_pin_list *entry; - - if (!desc) - continue; - cfg = desc->chip_data; - entry = cfg->irq_2_pin; + for_each_irq_cfg(irq, cfg) { + struct irq_pin_list *entry = cfg->irq_2_pin; if (!entry) continue; printk(KERN_DEBUG "IRQ%d ", irq); @@ -2270,16 +2022,14 @@ static unsigned int startup_ioapic_irq(unsigned int irq) { int was_pending = 0; unsigned long flags; - struct irq_cfg *cfg; spin_lock_irqsave(&ioapic_lock, flags); - if (irq < NR_IRQS_LEGACY) { + if (irq < 16) { disable_8259A_irq(irq); if (i8259A_irq_pending(irq)) was_pending = 1; } - cfg = irq_cfg(irq); - __unmask_IO_APIC_irq(cfg); + __unmask_IO_APIC_irq(irq); spin_unlock_irqrestore(&ioapic_lock, flags); return was_pending; @@ -2293,7 +2043,7 @@ static int ioapic_retrigger_irq(unsigned int irq) unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); + send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); spin_unlock_irqrestore(&vector_lock, flags); return 1; @@ -2342,35 +2092,35 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); * as simple as edge triggered migration and we can do the irq migration * with a simple atomic update to IO-APIC RTE. */ -static void -migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) +static void migrate_ioapic_irq(int irq, cpumask_t mask) { struct irq_cfg *cfg; + struct irq_desc *desc; + cpumask_t tmp, cleanup_mask; struct irte irte; int modify_ioapic_rte; unsigned int dest; unsigned long flags; - unsigned int irq; - if (!cpumask_intersects(mask, cpu_online_mask)) + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) return; - irq = desc->irq; if (get_irte(irq, &irte)) return; - cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) + if (assign_irq_vector(irq, mask)) return; - set_extra_move_desc(desc, mask); - - dest = cpu_mask_to_apicid_and(cfg->domain, mask); + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + desc = irq_to_desc(irq); modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg); + __target_IO_APIC_irq(irq, dest, cfg->vector); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -2382,20 +2132,24 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) */ modify_irte(irq, &irte); - if (cfg->move_in_progress) - send_cleanup_vector(cfg); + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } - cpumask_copy(&desc->affinity, mask); + desc->affinity = mask; } -static int migrate_irq_remapped_level_desc(struct irq_desc *desc) +static int migrate_irq_remapped_level(int irq) { int ret = -1; - struct irq_cfg *cfg = desc->chip_data; + struct irq_desc *desc = irq_to_desc(irq); - mask_IO_APIC_irq_desc(desc); + mask_IO_APIC_irq(irq); - if (io_apic_level_ack_pending(cfg)) { + if (io_apic_level_ack_pending(irq)) { /* * Interrupt in progress. Migrating irq now will change the * vector information in the IO-APIC RTE and that will confuse @@ -2407,15 +2161,14 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq_desc(desc, &desc->pending_mask); + migrate_ioapic_irq(irq, desc->pending_mask); ret = 0; desc->status &= ~IRQ_MOVE_PENDING; - cpumask_clear(&desc->pending_mask); + cpus_clear(desc->pending_mask); unmask: - unmask_IO_APIC_irq_desc(desc); - + unmask_IO_APIC_irq(irq); return ret; } @@ -2425,9 +2178,6 @@ static void ir_irq_migration(struct work_struct *work) struct irq_desc *desc; for_each_irq_desc(irq, desc) { - if (!desc) - continue; - if (desc->status & IRQ_MOVE_PENDING) { unsigned long flags; @@ -2448,24 +2198,19 @@ static void ir_irq_migration(struct work_struct *work) /* * Migrates the IRQ destination in the process context. */ -static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, - const struct cpumask *mask) +static void set_ir_ioapic_affinity_irq(unsigned int irq, + const struct cpumask *mask) { + struct irq_desc *desc = irq_to_desc(irq); + if (desc->status & IRQ_LEVEL) { desc->status |= IRQ_MOVE_PENDING; cpumask_copy(&desc->pending_mask, mask); - migrate_irq_remapped_level_desc(desc); + migrate_irq_remapped_level(irq); return; } - migrate_ioapic_irq_desc(desc, mask); -} -static void set_ir_ioapic_affinity_irq(unsigned int irq, - const struct cpumask *mask) -{ - struct irq_desc *desc = irq_to_desc(irq); - - set_ir_ioapic_affinity_irq_desc(desc, mask); + migrate_ioapic_irq(irq, *mask); } #endif @@ -2485,9 +2230,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) struct irq_cfg *cfg; irq = __get_cpu_var(vector_irq)[vector]; - if (irq == -1) - continue; - desc = irq_to_desc(irq); if (!desc) continue; @@ -2497,7 +2239,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) if (!cfg->move_cleanup_count) goto unlock; - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) goto unlock; __get_cpu_var(vector_irq)[vector] = -1; @@ -2509,44 +2251,28 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) irq_exit(); } -static void irq_complete_move(struct irq_desc **descp) +static void irq_complete_move(unsigned int irq) { - struct irq_desc *desc = *descp; - struct irq_cfg *cfg = desc->chip_data; + struct irq_cfg *cfg = irq_cfg(irq); unsigned vector, me; - if (likely(!cfg->move_in_progress)) { -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - if (likely(!cfg->move_desc_pending)) - return; - - /* domain is not change, but affinity is changed */ - me = smp_processor_id(); - if (cpu_isset(me, desc->affinity)) { - *descp = desc = move_irq_desc(desc, me); - /* get the new one */ - cfg = desc->chip_data; - cfg->move_desc_pending = 0; - } -#endif + if (likely(!cfg->move_in_progress)) return; - } vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - *descp = desc = move_irq_desc(desc, me); - /* get the new one */ - cfg = desc->chip_data; -#endif + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { + cpumask_t cleanup_mask; - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) - send_cleanup_vector(cfg); + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } } #else -static inline void irq_complete_move(struct irq_desc **descp) {} +static inline void irq_complete_move(unsigned int irq) {} #endif - #ifdef CONFIG_INTR_REMAP static void ack_x2apic_level(unsigned int irq) { @@ -2557,14 +2283,11 @@ static void ack_x2apic_edge(unsigned int irq) { ack_x2APIC_irq(); } - #endif static void ack_apic_edge(unsigned int irq) { - struct irq_desc *desc = irq_to_desc(irq); - - irq_complete_move(&desc); + irq_complete_move(irq); move_native_irq(irq); ack_APIC_irq(); } @@ -2573,21 +2296,18 @@ atomic_t irq_mis_count; static void ack_apic_level(unsigned int irq) { - struct irq_desc *desc = irq_to_desc(irq); - #ifdef CONFIG_X86_32 unsigned long v; int i; #endif - struct irq_cfg *cfg; int do_unmask_irq = 0; - irq_complete_move(&desc); + irq_complete_move(irq); #ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ - if (unlikely(desc->status & IRQ_MOVE_PENDING)) { + if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { do_unmask_irq = 1; - mask_IO_APIC_irq_desc(desc); + mask_IO_APIC_irq(irq); } #endif @@ -2611,8 +2331,7 @@ static void ack_apic_level(unsigned int irq) * operation to prevent an edge-triggered interrupt escaping meanwhile. * The idea is from Manfred Spraul. --macro */ - cfg = desc->chip_data; - i = cfg->vector; + i = irq_cfg(irq)->vector; v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); #endif @@ -2651,18 +2370,17 @@ static void ack_apic_level(unsigned int irq) * accurate and is causing problems then it is a hardware bug * and you can go talk to the chipset vendor about it. */ - cfg = desc->chip_data; - if (!io_apic_level_ack_pending(cfg)) + if (!io_apic_level_ack_pending(irq)) move_masked_irq(irq); - unmask_IO_APIC_irq_desc(desc); + unmask_IO_APIC_irq(irq); } #ifdef CONFIG_X86_32 if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(cfg); - __unmask_and_level_IO_APIC_irq(cfg); + __mask_and_edge_IO_APIC_irq(irq); + __unmask_and_level_IO_APIC_irq(irq); spin_unlock(&ioapic_lock); } #endif @@ -2713,22 +2431,20 @@ static inline void init_IO_APIC_traps(void) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for_each_irq_desc(irq, desc) { - if (!desc) - continue; - - cfg = desc->chip_data; - if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { + for_each_irq_cfg(irq, cfg) { + if (IO_APIC_IRQ(irq) && !cfg->vector) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 * interrupt if we can.. */ - if (irq < NR_IRQS_LEGACY) + if (irq < 16) make_8259A_irq(irq); - else + else { + desc = irq_to_desc(irq); /* Strange. Oh, well.. */ desc->chip = &no_irq_chip; + } } } } @@ -2753,7 +2469,7 @@ static void unmask_lapic_irq(unsigned int irq) apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); } -static void ack_lapic_irq(unsigned int irq) +static void ack_lapic_irq (unsigned int irq) { ack_APIC_irq(); } @@ -2765,8 +2481,11 @@ static struct irq_chip lapic_chip __read_mostly = { .ack = ack_lapic_irq, }; -static void lapic_register_intr(int irq, struct irq_desc *desc) +static void lapic_register_intr(int irq) { + struct irq_desc *desc; + + desc = irq_to_desc(irq); desc->status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge"); @@ -2870,9 +2589,7 @@ int timer_through_8259 __initdata; */ static inline void __init check_timer(void) { - struct irq_desc *desc = irq_to_desc(0); - struct irq_cfg *cfg = desc->chip_data; - int cpu = boot_cpu_id; + struct irq_cfg *cfg = irq_cfg(0); int apic1, pin1, apic2, pin2; unsigned long flags; unsigned int ver; @@ -2887,7 +2604,7 @@ static inline void __init check_timer(void) * get/set the timer IRQ vector: */ disable_8259A_irq(0); - assign_irq_vector(0, cfg, TARGET_CPUS); + assign_irq_vector(0, TARGET_CPUS); /* * As IRQ0 is to be enabled in the 8259A, the virtual @@ -2938,10 +2655,10 @@ static inline void __init check_timer(void) * Ok, does IRQ0 through the IOAPIC work? */ if (no_pin1) { - add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); + add_pin_to_irq(0, apic1, pin1); setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } - unmask_IO_APIC_irq_desc(desc); + unmask_IO_APIC_irq(0); if (timer_irq_works()) { if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); @@ -2967,9 +2684,9 @@ static inline void __init check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); + replace_pin_at_irq(0, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); - unmask_IO_APIC_irq_desc(desc); + unmask_IO_APIC_irq(0); enable_8259A_irq(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); @@ -3001,7 +2718,7 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); - lapic_register_intr(0, desc); + lapic_register_intr(0); apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ enable_8259A_irq(0); @@ -3186,26 +2903,22 @@ unsigned int create_irq_nr(unsigned int irq_want) unsigned int irq; unsigned int new; unsigned long flags; - struct irq_cfg *cfg_new = NULL; - int cpu = boot_cpu_id; - struct irq_desc *desc_new = NULL; + struct irq_cfg *cfg_new; + + irq_want = nr_irqs - 1; irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = irq_want; new < NR_IRQS; new++) { + for (new = irq_want; new > 0; new--) { if (platform_legacy_irq(new)) continue; - - desc_new = irq_to_desc_alloc_cpu(new, cpu); - if (!desc_new) { - printk(KERN_INFO "can not get irq_desc for %d\n", new); - continue; - } - cfg_new = desc_new->chip_data; - - if (cfg_new->vector != 0) + cfg_new = irq_cfg(new); + if (cfg_new && cfg_new->vector != 0) continue; - if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) + /* check if need to create one */ + if (!cfg_new) + cfg_new = irq_cfg_alloc(new); + if (__assign_irq_vector(new, TARGET_CPUS) == 0) irq = new; break; } @@ -3213,21 +2926,15 @@ unsigned int create_irq_nr(unsigned int irq_want) if (irq > 0) { dynamic_irq_init(irq); - /* restore it, in case dynamic_irq_init clear it */ - if (desc_new) - desc_new->chip_data = cfg_new; } return irq; } -static int nr_irqs_gsi = NR_IRQS_LEGACY; int create_irq(void) { - unsigned int irq_want; int irq; - irq_want = nr_irqs_gsi; - irq = create_irq_nr(irq_want); + irq = create_irq_nr(nr_irqs - 1); if (irq == 0) irq = -1; @@ -3238,22 +2945,14 @@ int create_irq(void) void destroy_irq(unsigned int irq) { unsigned long flags; - struct irq_cfg *cfg; - struct irq_desc *desc; - /* store it, in case dynamic_irq_cleanup clear it */ - desc = irq_to_desc(irq); - cfg = desc->chip_data; dynamic_irq_cleanup(irq); - /* connect back irq_cfg */ - if (desc) - desc->chip_data = cfg; #ifdef CONFIG_INTR_REMAP free_irte(irq); #endif spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq, cfg); + __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); } @@ -3266,13 +2965,16 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms struct irq_cfg *cfg; int err; unsigned dest; + cpumask_t tmp; - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, TARGET_CPUS); + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, tmp); if (err) return err; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { @@ -3328,46 +3030,61 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms #ifdef CONFIG_SMP static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { - struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; + cpumask_t tmp; + struct irq_desc *desc; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (assign_irq_vector(irq, *mask)) return; - cfg = desc->chip_data; + cfg = irq_cfg(irq); + cpumask_and(&tmp, &cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); - read_msi_msg_desc(desc, &msg); + read_msi_msg(irq, &msg); msg.data &= ~MSI_DATA_VECTOR_MASK; msg.data |= MSI_DATA_VECTOR(cfg->vector); msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; msg.address_lo |= MSI_ADDR_DEST_ID(dest); - write_msi_msg_desc(desc, &msg); + write_msi_msg(irq, &msg); + desc = irq_to_desc(irq); + cpumask_copy(&desc->affinity, mask); } + #ifdef CONFIG_INTR_REMAP /* * Migrate the MSI irq to another cpumask. This migration is * done in the process context using interrupt-remapping hardware. */ -static void -ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) +static void ir_set_msi_irq_affinity(unsigned int irq, + const struct cpumask *mask) { - struct irq_desc *desc = irq_to_desc(irq); - struct irq_cfg *cfg = desc->chip_data; + struct irq_cfg *cfg; unsigned int dest; + cpumask_t tmp, cleanup_mask; struct irte irte; + struct irq_desc *desc; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return; if (get_irte(irq, &irte)) return; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (assign_irq_vector(irq, *mask)) return; + cfg = irq_cfg(irq); + cpumask_and(&tmp, &cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3381,10 +3098,16 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) * at the new destination. So, time to cleanup the previous * vector allocation. */ - if (cfg->move_in_progress) - send_cleanup_vector(cfg); -} + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + desc = irq_to_desc(irq); + cpumask_copy(&desc->affinity, mask); +} #endif #endif /* CONFIG_SMP */ @@ -3443,7 +3166,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) } #endif -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) { int ret; struct msi_msg msg; @@ -3452,7 +3175,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) if (ret < 0) return ret; - set_irq_msi(irq, msidesc); + set_irq_msi(irq, desc); write_msi_msg(irq, &msg); #ifdef CONFIG_INTR_REMAP @@ -3472,13 +3195,26 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) return 0; } -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) +static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) +{ + unsigned int irq; + + irq = dev->bus->number; + irq <<= 8; + irq |= dev->devfn; + irq <<= 12; + + return irq; +} + +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { unsigned int irq; int ret; unsigned int irq_want; - irq_want = nr_irqs_gsi; + irq_want = build_irq_for_pci_dev(dev) + 0x100; + irq = create_irq_nr(irq_want); if (irq == 0) return -1; @@ -3492,7 +3228,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) goto error; no_ir: #endif - ret = setup_msi_irq(dev, msidesc, irq); + ret = setup_msi_irq(dev, desc, irq); if (ret < 0) { destroy_irq(irq); return ret; @@ -3510,7 +3246,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { unsigned int irq; int ret, sub_handle; - struct msi_desc *msidesc; + struct msi_desc *desc; unsigned int irq_want; #ifdef CONFIG_INTR_REMAP @@ -3518,11 +3254,10 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int index = 0; #endif - irq_want = nr_irqs_gsi; + irq_want = build_irq_for_pci_dev(dev) + 0x100; sub_handle = 0; - list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want); - irq_want++; + list_for_each_entry(desc, &dev->msi_list, list) { + irq = create_irq_nr(irq_want--); if (irq == 0) return -1; #ifdef CONFIG_INTR_REMAP @@ -3554,7 +3289,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) } no_ir: #endif - ret = setup_msi_irq(dev, msidesc, irq); + ret = setup_msi_irq(dev, desc, irq); if (ret < 0) goto error; sub_handle++; @@ -3575,16 +3310,21 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_SMP static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { - struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; + cpumask_t tmp; + struct irq_desc *desc; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (assign_irq_vector(irq, *mask)) return; - cfg = desc->chip_data; + cfg = irq_cfg(irq); + cpumask_and(&tmp, &cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); dmar_msi_read(irq, &msg); @@ -3594,8 +3334,9 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); + desc = irq_to_desc(irq); + cpumask_copy(&desc->affinity, mask); } - #endif /* CONFIG_SMP */ struct irq_chip dmar_msi_type = { @@ -3629,16 +3370,21 @@ int arch_setup_dmar_msi(unsigned int irq) #ifdef CONFIG_SMP static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { - struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; + struct irq_desc *desc; struct msi_msg msg; unsigned int dest; + cpumask_t tmp; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (!cpumask_intersects(mask, cpu_online_mask)) + return; + + if (assign_irq_vector(irq, *mask)) return; - cfg = desc->chip_data; + cfg = irq_cfg(irq); + cpumask_and(&tmp, &cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); hpet_msi_read(irq, &msg); @@ -3648,8 +3394,9 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); hpet_msi_write(irq, &msg); + desc = irq_to_desc(irq); + cpumask_copy(&desc->affinity, mask); } - #endif /* CONFIG_SMP */ struct irq_chip hpet_msi_type = { @@ -3704,19 +3451,25 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) { - struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; unsigned int dest; + cpumask_t tmp; + struct irq_desc *desc; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (assign_irq_vector(irq, *mask)) return; - cfg = desc->chip_data; + cfg = irq_cfg(irq); + cpumask_and(&tmp, &cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); target_ht_irq(irq, dest, cfg->vector); + desc = irq_to_desc(irq); + cpumask_copy(&desc->affinity, mask); } - #endif static struct irq_chip ht_irq_chip = { @@ -3734,14 +3487,17 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { struct irq_cfg *cfg; int err; + cpumask_t tmp; - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, TARGET_CPUS); + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, tmp); if (!err) { struct ht_irq_msg msg; unsigned dest; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); @@ -3777,7 +3533,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long mmr_offset) { - const struct cpumask *eligible_cpu = cpumask_of(cpu); + const cpumask_t *eligible_cpu = get_cpu_mask(cpu); struct irq_cfg *cfg; int mmr_pnode; unsigned long mmr_value; @@ -3785,9 +3541,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long flags; int err; - cfg = irq_cfg(irq); - - err = assign_irq_vector(irq, cfg, eligible_cpu); + err = assign_irq_vector(irq, *eligible_cpu); if (err != 0) return err; @@ -3796,6 +3550,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, irq_name); spin_unlock_irqrestore(&vector_lock, flags); + cfg = irq_cfg(irq); + mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); @@ -3806,7 +3562,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, entry->polarity = 0; entry->trigger = 0; entry->mask = 0; - entry->dest = cpu_mask_to_apicid(eligible_cpu); + entry->dest = cpu_mask_to_apicid(*eligible_cpu); mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); @@ -3847,16 +3603,9 @@ int __init io_apic_get_redir_entries (int ioapic) return reg_01.bits.entries; } -void __init probe_nr_irqs_gsi(void) +int __init probe_nr_irqs(void) { - int idx; - int nr = 0; - - for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx) + 1; - - if (nr > nr_irqs_gsi) - nr_irqs_gsi = nr; + return NR_IRQS; } /* -------------------------------------------------------------------------- @@ -3955,31 +3704,19 @@ int __init io_apic_get_version(int ioapic) int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) { - struct irq_desc *desc; - struct irq_cfg *cfg; - int cpu = boot_cpu_id; - if (!IO_APIC_IRQ(irq)) { apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", ioapic); return -EINVAL; } - desc = irq_to_desc_alloc_cpu(irq, cpu); - if (!desc) { - printk(KERN_INFO "can not get irq_desc %d\n", irq); - return 0; - } - /* * IRQs < 16 are already in the irq_2_pin[] map */ - if (irq >= NR_IRQS_LEGACY) { - cfg = desc->chip_data; - add_pin_to_irq_cpu(cfg, cpu, ioapic, pin); - } + if (irq >= 16) + add_pin_to_irq(irq, ioapic, pin); - setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity); + setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); return 0; } @@ -4017,7 +3754,7 @@ void __init setup_ioapic_dest(void) int pin, ioapic, irq, irq_entry; struct irq_desc *desc; struct irq_cfg *cfg; - const struct cpumask *mask; + cpumask_t mask; if (skip_ioapic_setup == 1) return; @@ -4033,10 +3770,9 @@ void __init setup_ioapic_dest(void) * when you have too many devices, because at that time only boot * cpu is online. */ - desc = irq_to_desc(irq); - cfg = desc->chip_data; + cfg = irq_cfg(irq); if (!cfg->vector) { - setup_IO_APIC_irq(ioapic, pin, irq, desc, + setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); continue; @@ -4046,18 +3782,19 @@ void __init setup_ioapic_dest(void) /* * Honour affinities which have been set in early boot */ + desc = irq_to_desc(irq); if (desc->status & (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = &desc->affinity; + mask = desc->affinity; else mask = TARGET_CPUS; #ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq_desc(desc, mask); + set_ir_ioapic_affinity_irq(irq, &mask); else #endif - set_ioapic_affinity_irq_desc(desc, mask); + set_ioapic_affinity_irq(irq, &mask); } } @@ -4106,6 +3843,7 @@ void __init ioapic_init_mappings(void) struct resource *ioapic_res; int i; + irq_2_pin_init(); ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { diff --git a/trunk/arch/x86/kernel/ipi.c b/trunk/arch/x86/kernel/ipi.c index 285bbf8831fa..f1c688e46f35 100644 --- a/trunk/arch/x86/kernel/ipi.c +++ b/trunk/arch/x86/kernel/ipi.c @@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) /* * This is only used on smaller machines. */ -void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) +void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) { - unsigned long mask = cpumask_bits(cpumask)[0]; + unsigned long mask = cpus_addr(cpumask)[0]; unsigned long flags; local_irq_save(flags); - WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); + WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); __send_IPI_dest_field(mask, vector); local_irq_restore(flags); } -void send_IPI_mask_sequence(const struct cpumask *mask, int vector) +void send_IPI_mask_sequence(cpumask_t mask, int vector) { unsigned long flags; unsigned int query_cpu; @@ -139,24 +139,12 @@ void send_IPI_mask_sequence(const struct cpumask *mask, int vector) */ local_irq_save(flags); - for_each_cpu(query_cpu, mask) - __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); - local_irq_restore(flags); -} - -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) -{ - unsigned long flags; - unsigned int query_cpu; - unsigned int this_cpu = smp_processor_id(); - - /* See Hack comment above */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) + for_each_possible_cpu(query_cpu) { + if (cpu_isset(query_cpu, mask)) { __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); + } + } local_irq_restore(flags); } diff --git a/trunk/arch/x86/kernel/irq.c b/trunk/arch/x86/kernel/irq.c index bce53e1352a0..d1d4dc52f649 100644 --- a/trunk/arch/x86/kernel/irq.c +++ b/trunk/arch/x86/kernel/irq.c @@ -9,7 +9,6 @@ #include #include #include -#include atomic_t irq_err_count; @@ -119,9 +118,6 @@ int show_interrupts(struct seq_file *p, void *v) } desc = irq_to_desc(i); - if (!desc) - return 0; - spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP any_count = kstat_irqs(i); @@ -191,5 +187,3 @@ u64 arch_irq_stat(void) #endif return sum; } - -EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); diff --git a/trunk/arch/x86/kernel/irq_32.c b/trunk/arch/x86/kernel/irq_32.c index 9dc5588f336a..87870a49be4e 100644 --- a/trunk/arch/x86/kernel/irq_32.c +++ b/trunk/arch/x86/kernel/irq_32.c @@ -233,28 +233,25 @@ unsigned int do_IRQ(struct pt_regs *regs) #ifdef CONFIG_HOTPLUG_CPU #include -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) +void fixup_irqs(cpumask_t map) { unsigned int irq; static int warned; struct irq_desc *desc; for_each_irq_desc(irq, desc) { - const struct cpumask *affinity; + cpumask_t mask; - if (!desc) - continue; if (irq == 2) continue; - affinity = &desc->affinity; - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + cpus_and(mask, desc->affinity, map); + if (any_online_cpu(mask) == NR_CPUS) { printk("Breaking affinity for irq %i\n", irq); - affinity = cpu_all_mask; + mask = map; } if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); + desc->chip->set_affinity(irq, &mask); else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } diff --git a/trunk/arch/x86/kernel/irq_64.c b/trunk/arch/x86/kernel/irq_64.c index fca2991443f5..7d37f847544d 100644 --- a/trunk/arch/x86/kernel/irq_64.c +++ b/trunk/arch/x86/kernel/irq_64.c @@ -83,43 +83,40 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) } #ifdef CONFIG_HOTPLUG_CPU -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) +void fixup_irqs(cpumask_t map) { unsigned int irq; static int warned; struct irq_desc *desc; for_each_irq_desc(irq, desc) { + cpumask_t mask; int break_affinity = 0; int set_affinity = 1; - const struct cpumask *affinity; - if (!desc) - continue; if (irq == 2) continue; /* interrupt's are disabled at this point */ spin_lock(&desc->lock); - affinity = &desc->affinity; if (!irq_has_action(irq) || - cpumask_equal(affinity, cpu_online_mask)) { + cpus_equal(desc->affinity, map)) { spin_unlock(&desc->lock); continue; } - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + cpus_and(mask, desc->affinity, map); + if (cpus_empty(mask)) { break_affinity = 1; - affinity = cpu_all_mask; + mask = map; } if (desc->chip->mask) desc->chip->mask(irq); if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); + desc->chip->set_affinity(irq, &mask); else if (!(warned++)) set_affinity = 0; diff --git a/trunk/arch/x86/kernel/irqinit_32.c b/trunk/arch/x86/kernel/irqinit_32.c index 61aa2a1004b5..845aa9803e80 100644 --- a/trunk/arch/x86/kernel/irqinit_32.c +++ b/trunk/arch/x86/kernel/irqinit_32.c @@ -68,7 +68,8 @@ void __init init_ISA_irqs (void) /* * 16 old-style INTA-cycle interrupts: */ - for (i = 0; i < NR_IRQS_LEGACY; i++) { + for (i = 0; i < 16; i++) { + /* first time call this irq_desc */ struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; @@ -110,18 +111,6 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 }; -int vector_used_by_percpu_irq(unsigned int vector) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (per_cpu(vector_irq, cpu)[vector] != -1) - return 1; - } - - return 0; -} - /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -158,12 +147,10 @@ void __init native_init_IRQ(void) alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); /* IPI for single call function */ - alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); + set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); - set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif #ifdef CONFIG_X86_LOCAL_APIC diff --git a/trunk/arch/x86/kernel/irqinit_64.c b/trunk/arch/x86/kernel/irqinit_64.c index 1020919efe1c..ff0235391285 100644 --- a/trunk/arch/x86/kernel/irqinit_64.c +++ b/trunk/arch/x86/kernel/irqinit_64.c @@ -135,18 +135,6 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 }; -int vector_used_by_percpu_irq(unsigned int vector) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (per_cpu(vector_irq, cpu)[vector] != -1) - return 1; - } - - return 0; -} - void __init init_ISA_irqs(void) { int i; @@ -154,7 +142,8 @@ void __init init_ISA_irqs(void) init_bsp_APIC(); init_8259A(0); - for (i = 0; i < NR_IRQS_LEGACY; i++) { + for (i = 0; i < 16; i++) { + /* first time call this irq_desc */ struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; @@ -199,7 +188,6 @@ static void __init smp_intr_init(void) /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); - set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif } diff --git a/trunk/arch/x86/kernel/machine_kexec_32.c b/trunk/arch/x86/kernel/machine_kexec_32.c index 37f420018a41..7a385746509a 100644 --- a/trunk/arch/x86/kernel/machine_kexec_32.c +++ b/trunk/arch/x86/kernel/machine_kexec_32.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include @@ -26,6 +25,15 @@ #include #include +#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) +static u32 kexec_pgd[1024] PAGE_ALIGNED; +#ifdef CONFIG_X86_PAE +static u32 kexec_pmd0[1024] PAGE_ALIGNED; +static u32 kexec_pmd1[1024] PAGE_ALIGNED; +#endif +static u32 kexec_pte0[1024] PAGE_ALIGNED; +static u32 kexec_pte1[1024] PAGE_ALIGNED; + static void set_idt(void *newidt, __u16 limit) { struct desc_ptr curidt; @@ -68,76 +76,6 @@ static void load_segments(void) #undef __STR } -static void machine_kexec_free_page_tables(struct kimage *image) -{ - free_page((unsigned long)image->arch.pgd); -#ifdef CONFIG_X86_PAE - free_page((unsigned long)image->arch.pmd0); - free_page((unsigned long)image->arch.pmd1); -#endif - free_page((unsigned long)image->arch.pte0); - free_page((unsigned long)image->arch.pte1); -} - -static int machine_kexec_alloc_page_tables(struct kimage *image) -{ - image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); -#ifdef CONFIG_X86_PAE - image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL); - image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL); -#endif - image->arch.pte0 = (pte_t *)get_zeroed_page(GFP_KERNEL); - image->arch.pte1 = (pte_t *)get_zeroed_page(GFP_KERNEL); - if (!image->arch.pgd || -#ifdef CONFIG_X86_PAE - !image->arch.pmd0 || !image->arch.pmd1 || -#endif - !image->arch.pte0 || !image->arch.pte1) { - machine_kexec_free_page_tables(image); - return -ENOMEM; - } - return 0; -} - -static void machine_kexec_page_table_set_one( - pgd_t *pgd, pmd_t *pmd, pte_t *pte, - unsigned long vaddr, unsigned long paddr) -{ - pud_t *pud; - - pgd += pgd_index(vaddr); -#ifdef CONFIG_X86_PAE - if (!(pgd_val(*pgd) & _PAGE_PRESENT)) - set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT)); -#endif - pud = pud_offset(pgd, vaddr); - pmd = pmd_offset(pud, vaddr); - if (!(pmd_val(*pmd) & _PAGE_PRESENT)) - set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); - pte = pte_offset_kernel(pmd, vaddr); - set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); -} - -static void machine_kexec_prepare_page_tables(struct kimage *image) -{ - void *control_page; - pmd_t *pmd = 0; - - control_page = page_address(image->control_code_page); -#ifdef CONFIG_X86_PAE - pmd = image->arch.pmd0; -#endif - machine_kexec_page_table_set_one( - image->arch.pgd, pmd, image->arch.pte0, - (unsigned long)control_page, __pa(control_page)); -#ifdef CONFIG_X86_PAE - pmd = image->arch.pmd1; -#endif - machine_kexec_page_table_set_one( - image->arch.pgd, pmd, image->arch.pte1, - __pa(control_page), __pa(control_page)); -} - /* * A architecture hook called to validate the * proposed image and prepare the control pages @@ -149,20 +87,12 @@ static void machine_kexec_prepare_page_tables(struct kimage *image) * reboot code buffer to allow us to avoid allocations * later. * - * - Make control page executable. - * - Allocate page tables - * - Setup page tables + * Make control page executable. */ int machine_kexec_prepare(struct kimage *image) { - int error; - if (nx_enabled) set_pages_x(image->control_code_page, 1); - error = machine_kexec_alloc_page_tables(image); - if (error) - return error; - machine_kexec_prepare_page_tables(image); return 0; } @@ -174,7 +104,6 @@ void machine_kexec_cleanup(struct kimage *image) { if (nx_enabled) set_pages_nx(image->control_code_page, 1); - machine_kexec_free_page_tables(image); } /* @@ -221,7 +150,18 @@ void machine_kexec(struct kimage *image) relocate_kernel_ptr = control_page; page_list[PA_CONTROL_PAGE] = __pa(control_page); page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; - page_list[PA_PGD] = __pa(image->arch.pgd); + page_list[PA_PGD] = __pa(kexec_pgd); + page_list[VA_PGD] = (unsigned long)kexec_pgd; +#ifdef CONFIG_X86_PAE + page_list[PA_PMD_0] = __pa(kexec_pmd0); + page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; + page_list[PA_PMD_1] = __pa(kexec_pmd1); + page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; +#endif + page_list[PA_PTE_0] = __pa(kexec_pte0); + page_list[VA_PTE_0] = (unsigned long)kexec_pte0; + page_list[PA_PTE_1] = __pa(kexec_pte1); + page_list[VA_PTE_1] = (unsigned long)kexec_pte1; if (image->type == KEXEC_TYPE_DEFAULT) page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) diff --git a/trunk/arch/x86/kernel/mpparse.c b/trunk/arch/x86/kernel/mpparse.c index 45e3b69808ba..0f4c1fd5a1f4 100644 --- a/trunk/arch/x86/kernel/mpparse.c +++ b/trunk/arch/x86/kernel/mpparse.c @@ -586,23 +586,26 @@ static void __init __get_smp_config(unsigned int early) { struct intel_mp_floating *mpf = mpf_found; - if (!mpf) - return; - + if (x86_quirks->mach_get_smp_config) { + if (x86_quirks->mach_get_smp_config(early)) + return; + } if (acpi_lapic && early) return; - /* - * MPS doesn't support hyperthreading, aka only have - * thread 0 apic id in MPS table + * ACPI supports both logical (e.g. Hyper-Threading) and physical + * processors, where MPS only supports physical. */ - if (acpi_lapic && acpi_ioapic) + if (acpi_lapic && acpi_ioapic) { + printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " + "information\n"); return; + } else if (acpi_lapic) + printk(KERN_INFO "Using ACPI for processor (LAPIC) " + "configuration information\n"); - if (x86_quirks->mach_get_smp_config) { - if (x86_quirks->mach_get_smp_config(early)) - return; - } + if (!mpf) + return; printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); diff --git a/trunk/arch/x86/kernel/nmi.c b/trunk/arch/x86/kernel/nmi.c index 13316cf57cdb..2c97f07f1c2c 100644 --- a/trunk/arch/x86/kernel/nmi.c +++ b/trunk/arch/x86/kernel/nmi.c @@ -131,11 +131,6 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) atomic_dec(&nmi_active); } -static void __acpi_nmi_disable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); -} - int __init check_nmi_watchdog(void) { unsigned int *prev_nmi_count; @@ -184,12 +179,8 @@ int __init check_nmi_watchdog(void) kfree(prev_nmi_count); return 0; error: - if (nmi_watchdog == NMI_IO_APIC) { - if (!timer_through_8259) - disable_8259A_irq(0); - on_each_cpu(__acpi_nmi_disable, NULL, 1); - } - + if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) + disable_8259A_irq(0); #ifdef CONFIG_X86_32 timer_ack = 0; #endif @@ -294,6 +285,11 @@ void acpi_nmi_enable(void) on_each_cpu(__acpi_nmi_enable, NULL, 1); } +static void __acpi_nmi_disable(void *__unused) +{ + apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); +} + /* * Disable timer based NMIs on all CPUs: */ @@ -344,8 +340,6 @@ void stop_apic_nmi_watchdog(void *unused) return; if (nmi_watchdog == NMI_LOCAL_APIC) lapic_watchdog_stop(); - else - __acpi_nmi_disable(NULL); __get_cpu_var(wd_enabled) = 0; atomic_dec(&nmi_active); } @@ -471,24 +465,6 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) #ifdef CONFIG_SYSCTL -static void enable_ioapic_nmi_watchdog_single(void *unused) -{ - __get_cpu_var(wd_enabled) = 1; - atomic_inc(&nmi_active); - __acpi_nmi_enable(NULL); -} - -static void enable_ioapic_nmi_watchdog(void) -{ - on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1); - touch_nmi_watchdog(); -} - -static void disable_ioapic_nmi_watchdog(void) -{ - on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); -} - static int __init setup_unknown_nmi_panic(char *str) { unknown_nmi_panic = 1; @@ -531,11 +507,6 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, enable_lapic_nmi_watchdog(); else disable_lapic_nmi_watchdog(); - } else if (nmi_watchdog == NMI_IO_APIC) { - if (nmi_watchdog_enabled) - enable_ioapic_nmi_watchdog(); - else - disable_ioapic_nmi_watchdog(); } else { printk(KERN_WARNING "NMI watchdog doesn't know what hardware to touch\n"); diff --git a/trunk/arch/x86/kernel/numaq_32.c b/trunk/arch/x86/kernel/numaq_32.c index 0deea37a53cf..4caff39078e0 100644 --- a/trunk/arch/x86/kernel/numaq_32.c +++ b/trunk/arch/x86/kernel/numaq_32.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include @@ -235,13 +235,6 @@ static int __init numaq_setup_ioapic_ids(void) return 1; } -static int __init numaq_update_genapic(void) -{ - genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; - - return 0; -} - static struct x86_quirks numaq_x86_quirks __initdata = { .arch_pre_time_init = numaq_pre_time_init, .arch_time_init = NULL, @@ -257,7 +250,6 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .mpc_oem_pci_bus = mpc_oem_pci_bus, .smp_read_mpc_oem = smp_read_mpc_oem, .setup_ioapic_ids = numaq_setup_ioapic_ids, - .update_genapic = numaq_update_genapic, }; void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, diff --git a/trunk/arch/x86/kernel/process.c b/trunk/arch/x86/kernel/process.c index 95d811a9594f..c622772744d8 100644 --- a/trunk/arch/x86/kernel/process.c +++ b/trunk/arch/x86/kernel/process.c @@ -7,9 +7,7 @@ #include #include #include -#include #include -#include unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -102,9 +100,6 @@ static inline int hlt_use_halt(void) void default_idle(void) { if (hlt_use_halt()) { - struct power_trace it; - - trace_power_start(&it, POWER_CSTATE, 1); current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we @@ -117,7 +112,6 @@ void default_idle(void) else local_irq_enable(); current_thread_info()->status |= TS_POLLING; - trace_power_end(&it); } else { local_irq_enable(); /* loop is done by the caller */ @@ -128,21 +122,6 @@ void default_idle(void) EXPORT_SYMBOL(default_idle); #endif -void stop_this_cpu(void *dummy) -{ - local_irq_disable(); - /* - * Remove this CPU: - */ - cpu_clear(smp_processor_id(), cpu_online_map); - disable_local_APIC(); - - for (;;) { - if (hlt_works(smp_processor_id())) - halt(); - } -} - static void do_nothing(void *unused) { } @@ -175,31 +154,24 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { - struct power_trace it; - - trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); if (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) __mwait(ax, cx); } - trace_power_end(&it); } /* Default MONITOR/MWAIT with no hints, used for default C1 state */ static void mwait_idle(void) { - struct power_trace it; if (!need_resched()) { - trace_power_start(&it, POWER_CSTATE, 1); __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) __sti_mwait(0, 0); else local_irq_enable(); - trace_power_end(&it); } else local_irq_enable(); } @@ -211,13 +183,9 @@ static void mwait_idle(void) */ static void poll_idle(void) { - struct power_trace it; - - trace_power_start(&it, POWER_CSTATE, 0); local_irq_enable(); while (!need_resched()) cpu_relax(); - trace_power_end(&it); } /* diff --git a/trunk/arch/x86/kernel/process_32.c b/trunk/arch/x86/kernel/process_32.c index 24c2276aa453..0a1302fe6d45 100644 --- a/trunk/arch/x86/kernel/process_32.c +++ b/trunk/arch/x86/kernel/process_32.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include @@ -549,8 +548,7 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, * the task-switch, and shows up in ret_from_fork in entry.S, * for example. */ -__notrace_funcgraph struct task_struct * -__switch_to(struct task_struct *prev_p, struct task_struct *next_p) +struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; diff --git a/trunk/arch/x86/kernel/process_64.c b/trunk/arch/x86/kernel/process_64.c index fbb321d53d34..c958120fb1b6 100644 --- a/trunk/arch/x86/kernel/process_64.c +++ b/trunk/arch/x86/kernel/process_64.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include @@ -552,9 +551,8 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, * - could test fs/gs bitsliced * * Kprobes not supported here. Set the probe on schedule instead. - * Function graph tracer not supported too. */ -__notrace_funcgraph struct task_struct * +struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread; diff --git a/trunk/arch/x86/kernel/ptrace.c b/trunk/arch/x86/kernel/ptrace.c index 2c8ec1ba75e6..0a6d8c12e10d 100644 --- a/trunk/arch/x86/kernel/ptrace.c +++ b/trunk/arch/x86/kernel/ptrace.c @@ -668,14 +668,14 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index, size_t bts_index, bts_end; int error; - error = ds_get_bts_end(child->bts, &bts_end); + error = ds_get_bts_end(child, &bts_end); if (error < 0) return error; if (bts_end <= index) return -EINVAL; - error = ds_get_bts_index(child->bts, &bts_index); + error = ds_get_bts_index(child, &bts_index); if (error < 0) return error; @@ -684,7 +684,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index, if (bts_end <= bts_index) bts_index -= bts_end; - error = ds_access_bts(child->bts, bts_index, &bts_record); + error = ds_access_bts(child, bts_index, &bts_record); if (error < 0) return error; @@ -705,14 +705,14 @@ static int ptrace_bts_drain(struct task_struct *child, size_t end, i; int error; - error = ds_get_bts_index(child->bts, &end); + error = ds_get_bts_index(child, &end); if (error < 0) return error; if (size < (end * sizeof(struct bts_struct))) return -EIO; - error = ds_access_bts(child->bts, 0, (const void **)&raw); + error = ds_access_bts(child, 0, (const void **)&raw); if (error < 0) return error; @@ -723,13 +723,18 @@ static int ptrace_bts_drain(struct task_struct *child, return -EFAULT; } - error = ds_clear_bts(child->bts); + error = ds_clear_bts(child); if (error < 0) return error; return end; } +static void ptrace_bts_ovfl(struct task_struct *child) +{ + send_sig(child->thread.bts_ovfl_signal, child, 0); +} + static int ptrace_bts_config(struct task_struct *child, long cfg_size, const struct ptrace_bts_config __user *ucfg) @@ -755,45 +760,23 @@ static int ptrace_bts_config(struct task_struct *child, goto errout; if (cfg.flags & PTRACE_BTS_O_ALLOC) { - bts_ovfl_callback_t ovfl = NULL; + ds_ovfl_callback_t ovfl = NULL; unsigned int sig = 0; - error = -EINVAL; - if (cfg.size < (10 * bts_cfg.sizeof_bts)) - goto errout; + /* we ignore the error in case we were not tracing child */ + (void)ds_release_bts(child); if (cfg.flags & PTRACE_BTS_O_SIGNAL) { if (!cfg.signal) goto errout; - error = -EOPNOTSUPP; - goto errout; - sig = cfg.signal; + ovfl = ptrace_bts_ovfl; } - if (child->bts) { - (void)ds_release_bts(child->bts); - kfree(child->bts_buffer); - - child->bts = NULL; - child->bts_buffer = NULL; - } - - error = -ENOMEM; - child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL); - if (!child->bts_buffer) - goto errout; - - child->bts = ds_request_bts(child, child->bts_buffer, cfg.size, - ovfl, /* th = */ (size_t)-1); - if (IS_ERR(child->bts)) { - error = PTR_ERR(child->bts); - kfree(child->bts_buffer); - child->bts = NULL; - child->bts_buffer = NULL; + error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); + if (error < 0) goto errout; - } child->thread.bts_ovfl_signal = sig; } @@ -840,15 +823,15 @@ static int ptrace_bts_status(struct task_struct *child, if (cfg_size < sizeof(cfg)) return -EIO; - error = ds_get_bts_end(child->bts, &end); + error = ds_get_bts_end(child, &end); if (error < 0) return error; - error = ds_access_bts(child->bts, /* index = */ 0, &base); + error = ds_access_bts(child, /* index = */ 0, &base); if (error < 0) return error; - error = ds_access_bts(child->bts, /* index = */ end, &max); + error = ds_access_bts(child, /* index = */ end, &max); if (error < 0) return error; @@ -901,7 +884,10 @@ static int ptrace_bts_write_record(struct task_struct *child, return -EINVAL; } - return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts); + /* The writing task will be the switched-to task on a context + * switch. It needs to write into the switched-from task's BTS + * buffer. */ + return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); } void ptrace_bts_take_timestamp(struct task_struct *tsk, @@ -943,16 +929,17 @@ void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) switch (c->x86) { case 0x6: switch (c->x86_model) { - case 0 ... 0xC: - /* sorry, don't know about them */ - break; case 0xD: case 0xE: /* Pentium M */ bts_configure(&bts_cfg_pentium_m); break; - default: /* Core2, Atom, ... */ + case 0xF: /* Core2 */ + case 0x1C: /* Atom */ bts_configure(&bts_cfg_core2); break; + default: + /* sorry, don't know about them */ + break; } break; case 0xF: @@ -986,17 +973,13 @@ void ptrace_disable(struct task_struct *child) clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif #ifdef CONFIG_X86_PTRACE_BTS - if (child->bts) { - (void)ds_release_bts(child->bts); - kfree(child->bts_buffer); - child->bts_buffer = NULL; + (void)ds_release_bts(child); - child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; - if (!child->thread.debugctlmsr) - clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; + if (!child->thread.debugctlmsr) + clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - } + clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); #endif /* CONFIG_X86_PTRACE_BTS */ } @@ -1128,16 +1111,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) (child, data, (struct ptrace_bts_config __user *)addr); break; - case PTRACE_BTS_SIZE: { - size_t size; - - ret = ds_get_bts_index(child->bts, &size); - if (ret == 0) { - BUG_ON(size != (int) size); - ret = (int) size; - } + case PTRACE_BTS_SIZE: + ret = ds_get_bts_index(child, /* pos = */ NULL); break; - } case PTRACE_BTS_GET: ret = ptrace_bts_read_record @@ -1145,7 +1121,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_CLEAR: - ret = ds_clear_bts(child->bts); + ret = ds_clear_bts(child); break; case PTRACE_BTS_DRAIN: diff --git a/trunk/arch/x86/kernel/reboot.c b/trunk/arch/x86/kernel/reboot.c index ba7b9a0e6063..cc5a2545dd41 100644 --- a/trunk/arch/x86/kernel/reboot.c +++ b/trunk/arch/x86/kernel/reboot.c @@ -21,9 +21,6 @@ # include #endif -#include - - /* * Power off function, if any */ @@ -39,10 +36,7 @@ int reboot_force; static int reboot_cpu = -1; #endif -/* This is set by the PCI code if either type 1 or type 2 PCI is detected */ -bool port_cf9_safe = false; - -/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] +/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] warm Don't set the cold reboot flag cold Set the cold reboot flag bios Reboot by jumping through the BIOS (only for X86_32) @@ -51,7 +45,6 @@ bool port_cf9_safe = false; kbd Use the keyboard controller. cold reset (default) acpi Use the RESET_REG in the FADT efi Use efi reset_system runtime service - pci Use the so-called "PCI reset register", CF9 force Avoid anything that could hang. */ static int __init reboot_setup(char *str) @@ -86,7 +79,6 @@ static int __init reboot_setup(char *str) case 'k': case 't': case 'e': - case 'p': reboot_type = *str; break; @@ -412,27 +404,12 @@ static void native_machine_emergency_restart(void) reboot_type = BOOT_KBD; break; + case BOOT_EFI: if (efi_enabled) - efi.reset_system(reboot_mode ? - EFI_RESET_WARM : - EFI_RESET_COLD, + efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); - reboot_type = BOOT_KBD; - break; - case BOOT_CF9: - port_cf9_safe = true; - /* fall through */ - - case BOOT_CF9_COND: - if (port_cf9_safe) { - u8 cf9 = inb(0xcf9) & ~6; - outb(cf9|2, 0xcf9); /* Request hard reset */ - udelay(50); - outb(cf9|6, 0xcf9); /* Actually do the reset */ - udelay(50); - } reboot_type = BOOT_KBD; break; } @@ -493,11 +470,6 @@ static void native_machine_restart(char *__unused) static void native_machine_halt(void) { - /* stop other cpus and apics */ - machine_shutdown(); - - /* stop this cpu */ - stop_this_cpu(NULL); } static void native_machine_power_off(void) @@ -551,92 +523,3 @@ void machine_crash_shutdown(struct pt_regs *regs) machine_ops.crash_shutdown(regs); } #endif - - -#if defined(CONFIG_SMP) - -/* This keeps a track of which one is crashing cpu. */ -static int crashing_cpu; -static nmi_shootdown_cb shootdown_callback; - -static atomic_t waiting_for_crash_ipi; - -static int crash_nmi_callback(struct notifier_block *self, - unsigned long val, void *data) -{ - int cpu; - - if (val != DIE_NMI_IPI) - return NOTIFY_OK; - - cpu = raw_smp_processor_id(); - - /* Don't do anything if this handler is invoked on crashing cpu. - * Otherwise, system will completely hang. Crashing cpu can get - * an NMI if system was initially booted with nmi_watchdog parameter. - */ - if (cpu == crashing_cpu) - return NOTIFY_STOP; - local_irq_disable(); - - shootdown_callback(cpu, (struct die_args *)data); - - atomic_dec(&waiting_for_crash_ipi); - /* Assume hlt works */ - halt(); - for (;;) - cpu_relax(); - - return 1; -} - -static void smp_send_nmi_allbutself(void) -{ - send_IPI_allbutself(NMI_VECTOR); -} - -static struct notifier_block crash_nmi_nb = { - .notifier_call = crash_nmi_callback, -}; - -/* Halt all other CPUs, calling the specified function on each of them - * - * This function can be used to halt all other CPUs on crash - * or emergency reboot time. The function passed as parameter - * will be called inside a NMI handler on all CPUs. - */ -void nmi_shootdown_cpus(nmi_shootdown_cb callback) -{ - unsigned long msecs; - local_irq_disable(); - - /* Make a note of crashing cpu. Will be used in NMI callback.*/ - crashing_cpu = safe_smp_processor_id(); - - shootdown_callback = callback; - - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); - /* Would it be better to replace the trap vector here? */ - if (register_die_notifier(&crash_nmi_nb)) - return; /* return what? */ - /* Ensure the new callback function is set before sending - * out the NMI - */ - wmb(); - - smp_send_nmi_allbutself(); - - msecs = 1000; /* Wait at most a second for the other cpus to stop */ - while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { - mdelay(1); - msecs--; - } - - /* Leave the nmi callback set */ -} -#else /* !CONFIG_SMP */ -void nmi_shootdown_cpus(nmi_shootdown_cb callback) -{ - /* No other CPUs to shoot down */ -} -#endif diff --git a/trunk/arch/x86/kernel/relocate_kernel_32.S b/trunk/arch/x86/kernel/relocate_kernel_32.S index a160f3119725..6f50664b2ba5 100644 --- a/trunk/arch/x86/kernel/relocate_kernel_32.S +++ b/trunk/arch/x86/kernel/relocate_kernel_32.S @@ -10,12 +10,15 @@ #include #include #include +#include /* * Must be relocatable PIC code callable as a C function */ #define PTR(x) (x << 2) +#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define PAE_PGD_ATTR (_PAGE_PRESENT) /* control_page + KEXEC_CONTROL_CODE_MAX_SIZE * ~ control_page + PAGE_SIZE are used as data storage and stack for @@ -36,6 +39,7 @@ #define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) .text + .align PAGE_SIZE .globl relocate_kernel relocate_kernel: /* Save the CPU context, used for jumping back */ @@ -56,6 +60,117 @@ relocate_kernel: movl %cr4, %eax movl %eax, CR4(%edi) +#ifdef CONFIG_X86_PAE + /* map the control page at its virtual address */ + + movl PTR(VA_PGD)(%ebp), %edi + movl PTR(VA_CONTROL_PAGE)(%ebp), %eax + andl $0xc0000000, %eax + shrl $27, %eax + addl %edi, %eax + + movl PTR(PA_PMD_0)(%ebp), %edx + orl $PAE_PGD_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PMD_0)(%ebp), %edi + movl PTR(VA_CONTROL_PAGE)(%ebp), %eax + andl $0x3fe00000, %eax + shrl $18, %eax + addl %edi, %eax + + movl PTR(PA_PTE_0)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PTE_0)(%ebp), %edi + movl PTR(VA_CONTROL_PAGE)(%ebp), %eax + andl $0x001ff000, %eax + shrl $9, %eax + addl %edi, %eax + + movl PTR(PA_CONTROL_PAGE)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + /* identity map the control page at its physical address */ + + movl PTR(VA_PGD)(%ebp), %edi + movl PTR(PA_CONTROL_PAGE)(%ebp), %eax + andl $0xc0000000, %eax + shrl $27, %eax + addl %edi, %eax + + movl PTR(PA_PMD_1)(%ebp), %edx + orl $PAE_PGD_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PMD_1)(%ebp), %edi + movl PTR(PA_CONTROL_PAGE)(%ebp), %eax + andl $0x3fe00000, %eax + shrl $18, %eax + addl %edi, %eax + + movl PTR(PA_PTE_1)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PTE_1)(%ebp), %edi + movl PTR(PA_CONTROL_PAGE)(%ebp), %eax + andl $0x001ff000, %eax + shrl $9, %eax + addl %edi, %eax + + movl PTR(PA_CONTROL_PAGE)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) +#else + /* map the control page at its virtual address */ + + movl PTR(VA_PGD)(%ebp), %edi + movl PTR(VA_CONTROL_PAGE)(%ebp), %eax + andl $0xffc00000, %eax + shrl $20, %eax + addl %edi, %eax + + movl PTR(PA_PTE_0)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PTE_0)(%ebp), %edi + movl PTR(VA_CONTROL_PAGE)(%ebp), %eax + andl $0x003ff000, %eax + shrl $10, %eax + addl %edi, %eax + + movl PTR(PA_CONTROL_PAGE)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + /* identity map the control page at its physical address */ + + movl PTR(VA_PGD)(%ebp), %edi + movl PTR(PA_CONTROL_PAGE)(%ebp), %eax + andl $0xffc00000, %eax + shrl $20, %eax + addl %edi, %eax + + movl PTR(PA_PTE_1)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PTE_1)(%ebp), %edi + movl PTR(PA_CONTROL_PAGE)(%ebp), %eax + andl $0x003ff000, %eax + shrl $10, %eax + addl %edi, %eax + + movl PTR(PA_CONTROL_PAGE)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) +#endif + +relocate_new_kernel: /* read the arguments and say goodbye to the stack */ movl 20+4(%esp), %ebx /* page_list */ movl 20+8(%esp), %ebp /* list of pages */ diff --git a/trunk/arch/x86/kernel/setup.c b/trunk/arch/x86/kernel/setup.c index 32fe42f26e79..9d5674f7b6cc 100644 --- a/trunk/arch/x86/kernel/setup.c +++ b/trunk/arch/x86/kernel/setup.c @@ -583,20 +583,7 @@ static int __init setup_elfcorehdr(char *arg) early_param("elfcorehdr", setup_elfcorehdr); #endif -static int __init default_update_genapic(void) -{ -#ifdef CONFIG_X86_SMP -# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) - genapic->wakeup_cpu = wakeup_secondary_cpu_via_init; -# endif -#endif - - return 0; -} - -static struct x86_quirks default_x86_quirks __initdata = { - .update_genapic = default_update_genapic, -}; +static struct x86_quirks default_x86_quirks __initdata; struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; @@ -807,9 +794,6 @@ void __init setup_arch(char **cmdline_p) printk(KERN_INFO "Command line: %s\n", boot_command_line); #endif - /* VMI may relocate the fixmap; do this before touching ioremap area */ - vmi_init(); - early_cpu_init(); early_ioremap_init(); @@ -896,8 +880,13 @@ void __init setup_arch(char **cmdline_p) check_efer(); #endif - /* Must be before kernel pagetables are setup */ - vmi_activate(); +#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) + /* + * Must be before kernel pagetables are setup + * or fixmap area is touched. + */ + vmi_init(); +#endif /* after early param, so could get panic from serial */ reserve_early_setup_data(); @@ -1093,7 +1082,7 @@ void __init setup_arch(char **cmdline_p) ioapic_init_mappings(); /* need to wait for io_apic is mapped */ - probe_nr_irqs_gsi(); + nr_irqs = probe_nr_irqs(); kvm_guest_init(); diff --git a/trunk/arch/x86/kernel/setup_percpu.c b/trunk/arch/x86/kernel/setup_percpu.c index 0b63b08e7530..1c2084291f97 100644 --- a/trunk/arch/x86/kernel/setup_percpu.c +++ b/trunk/arch/x86/kernel/setup_percpu.c @@ -152,11 +152,6 @@ void __init setup_per_cpu_areas(void) old_size = PERCPU_ENOUGH_ROOM; align = max_t(unsigned long, PAGE_SIZE, align); size = roundup(old_size, align); - - printk(KERN_INFO - "NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", - NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); - printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", size); @@ -173,24 +168,24 @@ void __init setup_per_cpu_areas(void) "cpu %d has no node %d or node-local memory\n", cpu, node); if (ptr) - printk(KERN_DEBUG - "per cpu data for cpu%d at %016lx\n", + printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", cpu, __pa(ptr)); } else { ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, __pa(MAX_DMA_ADDRESS)); if (ptr) - printk(KERN_DEBUG - "per cpu data for cpu%d on node%d " - "at %016lx\n", - cpu, node, __pa(ptr)); + printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", + cpu, node, __pa(ptr)); } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); } + printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", + NR_CPUS, nr_cpu_ids, nr_node_ids); + /* Setup percpu data maps */ setup_per_cpu_maps(); diff --git a/trunk/arch/x86/kernel/smp.c b/trunk/arch/x86/kernel/smp.c index 49ed667b06f3..18f9b19f5f8f 100644 --- a/trunk/arch/x86/kernel/smp.c +++ b/trunk/arch/x86/kernel/smp.c @@ -118,28 +118,41 @@ static void native_smp_send_reschedule(int cpu) WARN_ON(1); return; } - send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); + send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); } void native_send_call_func_single_ipi(int cpu) { - send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); + send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); } -void native_send_call_func_ipi(const struct cpumask *mask) +void native_send_call_func_ipi(cpumask_t mask) { cpumask_t allbutself; allbutself = cpu_online_map; cpu_clear(smp_processor_id(), allbutself); - if (cpus_equal(*mask, allbutself) && + if (cpus_equal(mask, allbutself) && cpus_equal(cpu_online_map, cpu_callout_map)) send_IPI_allbutself(CALL_FUNCTION_VECTOR); else send_IPI_mask(mask, CALL_FUNCTION_VECTOR); } +static void stop_this_cpu(void *dummy) +{ + local_irq_disable(); + /* + * Remove this CPU: + */ + cpu_clear(smp_processor_id(), cpu_online_map); + disable_local_APIC(); + if (hlt_works(smp_processor_id())) + for (;;) halt(); + for (;;); +} + /* * this function calls the 'stop' function on all other CPUs in the system. */ diff --git a/trunk/arch/x86/kernel/smpboot.c b/trunk/arch/x86/kernel/smpboot.c index 1a9941b11150..468c2f9d47ae 100644 --- a/trunk/arch/x86/kernel/smpboot.c +++ b/trunk/arch/x86/kernel/smpboot.c @@ -62,7 +62,6 @@ #include #include #include -#include #include #include @@ -289,7 +288,9 @@ static void __cpuinit start_secondary(void *unused) * fragile that we want to limit the things done here to the * most necessary things. */ +#ifdef CONFIG_VMI vmi_bringup(); +#endif cpu_init(); preempt_disable(); smp_callin(); @@ -529,7 +530,7 @@ static void impress_friends(void) pr_debug("Before bogocount - setting activated=1.\n"); } -void __inquire_remote_apic(int apicid) +static inline void __inquire_remote_apic(int apicid) { unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; char *names[] = { "ID", "VERSION", "SPIV" }; @@ -568,13 +569,14 @@ void __inquire_remote_apic(int apicid) } } +#ifdef WAKE_SECONDARY_VIA_NMI /* * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this * won't ... remember to clear down the APIC, etc later. */ -int __devinit -wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) +static int __devinit +wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) { unsigned long send_status, accept_status = 0; int maxlvt; @@ -591,7 +593,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) * Give the other CPU some time to accept the IPI. */ udelay(200); - if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { + if (APIC_INTEGRATED(apic_version[phys_apicid])) { maxlvt = lapic_get_maxlvt(); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); @@ -606,9 +608,11 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) return (send_status | accept_status); } +#endif /* WAKE_SECONDARY_VIA_NMI */ -int __devinit -wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) +#ifdef WAKE_SECONDARY_VIA_INIT +static int __devinit +wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) { unsigned long send_status, accept_status = 0; int maxlvt, num_starts, j; @@ -727,6 +731,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) return (send_status | accept_status); } +#endif /* WAKE_SECONDARY_VIA_INIT */ struct create_idle { struct work_struct work; @@ -1252,15 +1257,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) check_nmi_watchdog(); } -static int __initdata setup_possible_cpus = -1; -static int __init _setup_possible_cpus(char *str) -{ - get_option(&str, &setup_possible_cpus); - return 0; -} -early_param("possible_cpus", _setup_possible_cpus); - - /* * cpu_possible_map should be static, it cannot change as cpu's * are onlined, or offlined. The reason is per-cpu data-structures @@ -1273,7 +1269,7 @@ early_param("possible_cpus", _setup_possible_cpus); * * Three ways to find out the number of additional hotplug CPUs: * - If the BIOS specified disabled CPUs in ACPI/mptables use that. - * - The user can overwrite it with possible_cpus=NUM + * - The user can overwrite it with additional_cpus=NUM * - Otherwise don't reserve additional CPUs. * We do this because additional CPUs waste a lot of memory. * -AK @@ -1286,17 +1282,9 @@ __init void prefill_possible_map(void) if (!num_processors) num_processors = 1; - if (setup_possible_cpus == -1) - possible = num_processors + disabled_cpus; - else - possible = setup_possible_cpus; - - if (possible > CONFIG_NR_CPUS) { - printk(KERN_WARNING - "%d Processors exceeds NR_CPUS limit of %d\n", - possible, CONFIG_NR_CPUS); - possible = CONFIG_NR_CPUS; - } + possible = num_processors + disabled_cpus; + if (possible > NR_CPUS) + possible = NR_CPUS; printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", possible, max_t(int, possible - num_processors, 0)); @@ -1361,7 +1349,7 @@ void cpu_disable_common(void) lock_vector_lock(); remove_cpu_from_maps(cpu); unlock_vector_lock(); - fixup_irqs(); + fixup_irqs(cpu_online_map); } int native_cpu_disable(void) diff --git a/trunk/arch/x86/kernel/stacktrace.c b/trunk/arch/x86/kernel/stacktrace.c index 10786af95545..a03e7f6d90c3 100644 --- a/trunk/arch/x86/kernel/stacktrace.c +++ b/trunk/arch/x86/kernel/stacktrace.c @@ -6,7 +6,6 @@ #include #include #include -#include #include static void save_stack_warning(void *data, char *msg) @@ -84,66 +83,3 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) trace->entries[trace->nr_entries++] = ULONG_MAX; } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); - -/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ - -struct stack_frame { - const void __user *next_fp; - unsigned long ret_addr; -}; - -static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) -{ - int ret; - - if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) - return 0; - - ret = 1; - pagefault_disable(); - if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) - ret = 0; - pagefault_enable(); - - return ret; -} - -static inline void __save_stack_trace_user(struct stack_trace *trace) -{ - const struct pt_regs *regs = task_pt_regs(current); - const void __user *fp = (const void __user *)regs->bp; - - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = regs->ip; - - while (trace->nr_entries < trace->max_entries) { - struct stack_frame frame; - - frame.next_fp = NULL; - frame.ret_addr = 0; - if (!copy_stack_frame(fp, &frame)) - break; - if ((unsigned long)fp < regs->sp) - break; - if (frame.ret_addr) { - trace->entries[trace->nr_entries++] = - frame.ret_addr; - } - if (fp == frame.next_fp) - break; - fp = frame.next_fp; - } -} - -void save_stack_trace_user(struct stack_trace *trace) -{ - /* - * Trace user stack if we are not a kernel thread - */ - if (current->mm) { - __save_stack_trace_user(trace); - } - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = ULONG_MAX; -} - diff --git a/trunk/arch/x86/kernel/tlb_32.c b/trunk/arch/x86/kernel/tlb_32.c index 174ea90d1cbd..f4049f3513b6 100644 --- a/trunk/arch/x86/kernel/tlb_32.c +++ b/trunk/arch/x86/kernel/tlb_32.c @@ -164,7 +164,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); while (!cpus_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ diff --git a/trunk/arch/x86/kernel/tlb_64.c b/trunk/arch/x86/kernel/tlb_64.c index de6f1bda0c50..8f919ca69494 100644 --- a/trunk/arch/x86/kernel/tlb_64.c +++ b/trunk/arch/x86/kernel/tlb_64.c @@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); while (!cpus_empty(f->flush_cpumask)) cpu_relax(); diff --git a/trunk/arch/x86/kernel/traps.c b/trunk/arch/x86/kernel/traps.c index 4a6dff39a470..04d242ab0161 100644 --- a/trunk/arch/x86/kernel/traps.c +++ b/trunk/arch/x86/kernel/traps.c @@ -72,6 +72,9 @@ #include "cpu/mcheck/mce.h" +DECLARE_BITMAP(used_vectors, NR_VECTORS); +EXPORT_SYMBOL_GPL(used_vectors); + asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ @@ -86,9 +89,6 @@ gate_desc idt_table[256] __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; #endif -DECLARE_BITMAP(used_vectors, NR_VECTORS); -EXPORT_SYMBOL_GPL(used_vectors); - static int ignore_nmis; static inline void conditional_sti(struct pt_regs *regs) @@ -949,7 +949,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) void __init trap_init(void) { +#ifdef CONFIG_X86_32 int i; +#endif #ifdef CONFIG_EISA void __iomem *p = early_ioremap(0x0FFFD9, 4); @@ -1006,15 +1008,11 @@ void __init trap_init(void) } set_system_trap_gate(SYSCALL_VECTOR, &system_call); -#endif /* Reserve all the builtin and the syscall vector: */ for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) set_bit(i, used_vectors); -#ifdef CONFIG_X86_64 - set_bit(IA32_SYSCALL_VECTOR, used_vectors); -#else set_bit(SYSCALL_VECTOR, used_vectors); #endif /* diff --git a/trunk/arch/x86/kernel/vmi_32.c b/trunk/arch/x86/kernel/vmi_32.c index 22fd6577156a..8b6c393ab9fd 100644 --- a/trunk/arch/x86/kernel/vmi_32.c +++ b/trunk/arch/x86/kernel/vmi_32.c @@ -960,6 +960,8 @@ static inline int __init activate_vmi(void) void __init vmi_init(void) { + unsigned long flags; + if (!vmi_rom) probe_vmi_rom(); else @@ -971,21 +973,13 @@ void __init vmi_init(void) reserve_top_address(-vmi_rom->virtual_top); + local_irq_save(flags); + activate_vmi(); + #ifdef CONFIG_X86_IO_APIC /* This is virtual hardware; timer routing is wired correctly */ no_timer_check = 1; #endif -} - -void vmi_activate(void) -{ - unsigned long flags; - - if (!vmi_rom) - return; - - local_irq_save(flags); - activate_vmi(); local_irq_restore(flags & X86_EFLAGS_IF); } diff --git a/trunk/arch/x86/kernel/vsyscall_64.c b/trunk/arch/x86/kernel/vsyscall_64.c index 6f3d3d4cd973..0b8b6690a86d 100644 --- a/trunk/arch/x86/kernel/vsyscall_64.c +++ b/trunk/arch/x86/kernel/vsyscall_64.c @@ -17,9 +17,6 @@ * want per guest time just set the kernel.vsyscall64 sysctl to 0. */ -/* Disable profiling for userspace code: */ -#define DISABLE_BRANCH_PROFILING - #include #include #include diff --git a/trunk/arch/x86/lib/usercopy_32.c b/trunk/arch/x86/lib/usercopy_32.c index 4a20b2f9a381..9e68075544f6 100644 --- a/trunk/arch/x86/lib/usercopy_32.c +++ b/trunk/arch/x86/lib/usercopy_32.c @@ -39,7 +39,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon #define __do_strncpy_from_user(dst, src, count, res) \ do { \ int __d0, __d1, __d2; \ - might_fault(); \ + might_sleep(); \ __asm__ __volatile__( \ " testl %1,%1\n" \ " jz 2f\n" \ @@ -126,7 +126,7 @@ EXPORT_SYMBOL(strncpy_from_user); #define __do_clear_user(addr,size) \ do { \ int __d0; \ - might_fault(); \ + might_sleep(); \ __asm__ __volatile__( \ "0: rep; stosl\n" \ " movl %2,%0\n" \ @@ -155,7 +155,7 @@ do { \ unsigned long clear_user(void __user *to, unsigned long n) { - might_fault(); + might_sleep(); if (access_ok(VERIFY_WRITE, to, n)) __do_clear_user(to, n); return n; @@ -197,7 +197,7 @@ long strnlen_user(const char __user *s, long n) unsigned long mask = -__addr_ok(s); unsigned long res, tmp; - might_fault(); + might_sleep(); __asm__ __volatile__( " testl %0, %0\n" diff --git a/trunk/arch/x86/lib/usercopy_64.c b/trunk/arch/x86/lib/usercopy_64.c index 64d6c84e6353..f4df6e7c718b 100644 --- a/trunk/arch/x86/lib/usercopy_64.c +++ b/trunk/arch/x86/lib/usercopy_64.c @@ -15,7 +15,7 @@ #define __do_strncpy_from_user(dst,src,count,res) \ do { \ long __d0, __d1, __d2; \ - might_fault(); \ + might_sleep(); \ __asm__ __volatile__( \ " testq %1,%1\n" \ " jz 2f\n" \ @@ -64,7 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user); unsigned long __clear_user(void __user *addr, unsigned long size) { long __d0; - might_fault(); + might_sleep(); /* no memory constraint because it doesn't change any memory gcc knows about */ asm volatile( diff --git a/trunk/arch/x86/mach-generic/bigsmp.c b/trunk/arch/x86/mach-generic/bigsmp.c index bc4c7840b2a8..3c3b471ea496 100644 --- a/trunk/arch/x86/mach-generic/bigsmp.c +++ b/trunk/arch/x86/mach-generic/bigsmp.c @@ -17,7 +17,6 @@ #include #include #include -#include static int dmi_bigsmp; /* can be set by dmi scanners */ @@ -42,10 +41,9 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { { } }; -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static cpumask_t vector_allocation_domain(int cpu) { - cpus_clear(*retmask); - cpu_set(cpu, *retmask); + return cpumask_of_cpu(cpu); } static int probe_bigsmp(void) diff --git a/trunk/arch/x86/mach-generic/default.c b/trunk/arch/x86/mach-generic/default.c index e63a4a76d8cd..9e835a11a13a 100644 --- a/trunk/arch/x86/mach-generic/default.c +++ b/trunk/arch/x86/mach-generic/default.c @@ -16,7 +16,6 @@ #include #include #include -#include /* should be called last. */ static int probe_default(void) diff --git a/trunk/arch/x86/mach-generic/es7000.c b/trunk/arch/x86/mach-generic/es7000.c index 4ba5ccaa1584..28459cab3ddb 100644 --- a/trunk/arch/x86/mach-generic/es7000.c +++ b/trunk/arch/x86/mach-generic/es7000.c @@ -16,19 +16,7 @@ #include #include #include -#include - -void __init es7000_update_genapic_to_cluster(void) -{ - genapic->target_cpus = target_cpus_cluster; - genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER; - genapic->int_dest_mode = INT_DEST_MODE_CLUSTER; - genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER; - - genapic->init_apic_ldr = init_apic_ldr_cluster; - - genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster; -} +#include static int probe_es7000(void) { @@ -87,7 +75,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) } #endif -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static cpumask_t vector_allocation_domain(int cpu) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -97,7 +85,8 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; } struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); diff --git a/trunk/arch/x86/mach-generic/numaq.c b/trunk/arch/x86/mach-generic/numaq.c index 511d7941364f..71a309b122e6 100644 --- a/trunk/arch/x86/mach-generic/numaq.c +++ b/trunk/arch/x86/mach-generic/numaq.c @@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static cpumask_t vector_allocation_domain(int cpu) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -48,7 +48,8 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; } struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); diff --git a/trunk/arch/x86/mach-generic/probe.c b/trunk/arch/x86/mach-generic/probe.c index c346d9d0226f..5a7e4619e1c4 100644 --- a/trunk/arch/x86/mach-generic/probe.c +++ b/trunk/arch/x86/mach-generic/probe.c @@ -15,7 +15,6 @@ #include #include #include -#include extern struct genapic apic_numaq; extern struct genapic apic_summit; @@ -58,9 +57,6 @@ static int __init parse_apic(char *arg) } } - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); - /* Parsed again by __setup for debug/verbose */ return 0; } @@ -76,15 +72,12 @@ void __init generic_bigsmp_probe(void) * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support */ - if (!cmdline_apic && genapic == &apic_default) { + if (!cmdline_apic && genapic == &apic_default) if (apic_bigsmp.probe()) { genapic = &apic_bigsmp; - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); printk(KERN_INFO "Overriding APIC driver with %s\n", genapic->name); } - } #endif } @@ -101,9 +94,6 @@ void __init generic_apic_probe(void) /* Not visible without early console */ if (!apic_probe[i]) panic("Didn't find an APIC driver"); - - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); } printk(KERN_INFO "Using APIC driver %s\n", genapic->name); } @@ -118,8 +108,6 @@ int __init mps_oem_check(struct mp_config_table *mpc, char *oem, if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) { if (!cmdline_apic) { genapic = apic_probe[i]; - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); printk(KERN_INFO "Switched to APIC driver `%s'.\n", genapic->name); } @@ -136,8 +124,6 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { if (!cmdline_apic) { genapic = apic_probe[i]; - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); printk(KERN_INFO "Switched to APIC driver `%s'.\n", genapic->name); } diff --git a/trunk/arch/x86/mach-generic/summit.c b/trunk/arch/x86/mach-generic/summit.c index 2821ffc188b5..6272b5e69da6 100644 --- a/trunk/arch/x86/mach-generic/summit.c +++ b/trunk/arch/x86/mach-generic/summit.c @@ -16,7 +16,6 @@ #include #include #include -#include static int probe_summit(void) { @@ -24,7 +23,7 @@ static int probe_summit(void) return 0; } -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static cpumask_t vector_allocation_domain(int cpu) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -34,7 +33,8 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; } struct genapic apic_summit = APIC_INIT("summit", probe_summit); diff --git a/trunk/arch/x86/mach-voyager/voyager_smp.c b/trunk/arch/x86/mach-voyager/voyager_smp.c index a5bc05492b1e..9c990185e9f2 100644 --- a/trunk/arch/x86/mach-voyager/voyager_smp.c +++ b/trunk/arch/x86/mach-voyager/voyager_smp.c @@ -672,7 +672,7 @@ void __init smp_boot_cpus(void) /* loop over all the extended VIC CPUs and boot them. The * Quad CPUs must be bootstrapped by their extended VIC cpu */ - for (i = 0; i < nr_cpu_ids; i++) { + for (i = 0; i < NR_CPUS; i++) { if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) continue; do_boot_cpu(i); diff --git a/trunk/arch/x86/mm/Makefile b/trunk/arch/x86/mm/Makefile index d8cc96a2738f..fea4565ff576 100644 --- a/trunk/arch/x86/mm/Makefile +++ b/trunk/arch/x86/mm/Makefile @@ -8,8 +8,9 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o +obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o obj-$(CONFIG_MMIOTRACE) += mmiotrace.o -mmiotrace-y := kmmio.o pf_in.o mmio-mod.o +mmiotrace-y := pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o obj-$(CONFIG_NUMA) += numa_$(BITS).o diff --git a/trunk/arch/x86/mm/fault.c b/trunk/arch/x86/mm/fault.c index 21e996a70d68..31e8730fa246 100644 --- a/trunk/arch/x86/mm/fault.c +++ b/trunk/arch/x86/mm/fault.c @@ -53,7 +53,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { -#ifdef CONFIG_MMIOTRACE +#ifdef CONFIG_MMIOTRACE_HOOKS if (unlikely(is_kmmio_active())) if (kmmio_handler(regs, addr) == 1) return -1; @@ -413,7 +413,6 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, unsigned long error_code) { unsigned long flags = oops_begin(); - int sig = SIGKILL; struct task_struct *tsk; printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", @@ -424,8 +423,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, tsk->thread.trap_no = 14; tsk->thread.error_code = error_code; if (__die("Bad pagetable", regs, error_code)) - sig = 0; - oops_end(flags, regs, sig); + regs = NULL; + oops_end(flags, regs, SIGKILL); } #endif @@ -591,7 +590,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) int fault; #ifdef CONFIG_X86_64 unsigned long flags; - int sig; #endif tsk = current; @@ -851,12 +849,11 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) bust_spinlocks(0); do_exit(SIGKILL); #else - sig = SIGKILL; if (__die("Oops", regs, error_code)) - sig = 0; + regs = NULL; /* Executive summary in case the body of the oops scrolled away */ printk(KERN_EMERG "CR2: %016lx\n", address); - oops_end(flags, regs, sig); + oops_end(flags, regs, SIGKILL); #endif /* diff --git a/trunk/arch/x86/mm/numa_64.c b/trunk/arch/x86/mm/numa_64.c index 71a14f89f89e..cebcbf152d46 100644 --- a/trunk/arch/x86/mm/numa_64.c +++ b/trunk/arch/x86/mm/numa_64.c @@ -278,7 +278,7 @@ void __init numa_init_array(void) int rr, i; rr = first_node(node_online_map); - for (i = 0; i < nr_cpu_ids; i++) { + for (i = 0; i < NR_CPUS; i++) { if (early_cpu_to_node(i) != NUMA_NO_NODE) continue; numa_set_node(i, rr); @@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) memnodemap[0] = 0; node_set_online(0); node_set(0, node_possible_map); - for (i = 0; i < nr_cpu_ids; i++) + for (i = 0; i < NR_CPUS; i++) numa_set_node(i, 0); e820_register_active_regions(0, start_pfn, last_pfn); setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); diff --git a/trunk/arch/x86/mm/srat_64.c b/trunk/arch/x86/mm/srat_64.c index 09737c8af074..51c0a2fc14fe 100644 --- a/trunk/arch/x86/mm/srat_64.c +++ b/trunk/arch/x86/mm/srat_64.c @@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) if (!node_online(i)) setup_node_bootmem(i, nodes[i].start, nodes[i].end); - for (i = 0; i < nr_cpu_ids; i++) { + for (i = 0; i < NR_CPUS; i++) { int node = early_cpu_to_node(i); if (node == NUMA_NO_NODE) diff --git a/trunk/arch/x86/pci/direct.c b/trunk/arch/x86/pci/direct.c index 9a5af6c8fbe9..9915293500fb 100644 --- a/trunk/arch/x86/pci/direct.c +++ b/trunk/arch/x86/pci/direct.c @@ -173,7 +173,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, #undef PCI_CONF2_ADDRESS -struct pci_raw_ops pci_direct_conf2 = { +static struct pci_raw_ops pci_direct_conf2 = { .read = pci_conf2_read, .write = pci_conf2_write, }; @@ -289,7 +289,6 @@ int __init pci_direct_probe(void) if (pci_check_type1()) { raw_pci_ops = &pci_direct_conf1; - port_cf9_safe = true; return 1; } release_resource(region); @@ -306,7 +305,6 @@ int __init pci_direct_probe(void) if (pci_check_type2()) { raw_pci_ops = &pci_direct_conf2; - port_cf9_safe = true; return 2; } diff --git a/trunk/arch/x86/pci/pci.h b/trunk/arch/x86/pci/pci.h index 1959018aac02..15b9cf6be729 100644 --- a/trunk/arch/x86/pci/pci.h +++ b/trunk/arch/x86/pci/pci.h @@ -96,7 +96,6 @@ extern struct pci_raw_ops *raw_pci_ops; extern struct pci_raw_ops *raw_pci_ext_ops; extern struct pci_raw_ops pci_direct_conf1; -extern bool port_cf9_safe; /* arch_initcall level */ extern int pci_direct_probe(void); diff --git a/trunk/arch/x86/vdso/vclock_gettime.c b/trunk/arch/x86/vdso/vclock_gettime.c index d9d35824c56f..1ef0f90813d6 100644 --- a/trunk/arch/x86/vdso/vclock_gettime.c +++ b/trunk/arch/x86/vdso/vclock_gettime.c @@ -9,9 +9,6 @@ * Also alternative() doesn't work. */ -/* Disable profiling for userspace code: */ -#define DISABLE_BRANCH_PROFILING - #include #include #include diff --git a/trunk/arch/x86/xen/mmu.c b/trunk/arch/x86/xen/mmu.c index e59e53b11e2b..636ef4caa52d 100644 --- a/trunk/arch/x86/xen/mmu.c +++ b/trunk/arch/x86/xen/mmu.c @@ -1079,7 +1079,7 @@ static void drop_other_mm_ref(void *info) static void xen_drop_mm_ref(struct mm_struct *mm) { - cpumask_var_t mask; + cpumask_t mask; unsigned cpu; if (current->active_mm == mm) { @@ -1091,16 +1091,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm) } /* Get the "official" set of cpus referring to our pagetable. */ - if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { - for_each_online_cpu(cpu) { - if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask) - && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) - continue; - smp_call_function_single(cpu, drop_other_mm_ref, mm, 1); - } - return; - } - cpumask_copy(mask, &mm->cpu_vm_mask); + mask = mm->cpu_vm_mask; /* It's possible that a vcpu may have a stale reference to our cr3, because its in lazy mode, and it hasn't yet flushed @@ -1109,12 +1100,11 @@ static void xen_drop_mm_ref(struct mm_struct *mm) if needed. */ for_each_online_cpu(cpu) { if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) - cpumask_set_cpu(cpu, mask); + cpu_set(cpu, mask); } - if (!cpumask_empty(mask)) - smp_call_function_many(mask, drop_other_mm_ref, mm, 1); - free_cpumask_var(mask); + if (!cpus_empty(mask)) + smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); } #else static void xen_drop_mm_ref(struct mm_struct *mm) diff --git a/trunk/arch/x86/xen/smp.c b/trunk/arch/x86/xen/smp.c index c44e2069c7c7..acd9b6705e02 100644 --- a/trunk/arch/x86/xen/smp.c +++ b/trunk/arch/x86/xen/smp.c @@ -33,7 +33,7 @@ #include "xen-ops.h" #include "mmu.h" -cpumask_var_t xen_cpu_initialized_map; +cpumask_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq); static DEFINE_PER_CPU(int, callfunc_irq); @@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void) { int i, rc; - for (i = 0; i < nr_cpu_ids; i++) { + for (i = 0; i < NR_CPUS; i++) { rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); if (rc >= 0) { num_processors++; @@ -192,14 +192,11 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) if (xen_smp_intr_init(0)) BUG(); - if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL)) - panic("could not allocate xen_cpu_initialized_map\n"); - - cpumask_copy(xen_cpu_initialized_map, cpumask_of(0)); + xen_cpu_initialized_map = cpumask_of_cpu(0); /* Restrict the possible_map according to max_cpus. */ while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { - for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--) + for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--) continue; cpu_clear(cpu, cpu_possible_map); } @@ -224,7 +221,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) struct vcpu_guest_context *ctxt; struct desc_struct *gdt; - if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) + if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) return 0; ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); @@ -411,23 +408,24 @@ static void xen_smp_send_reschedule(int cpu) xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); } -static void xen_send_IPI_mask(const struct cpumask *mask, - enum ipi_vector vector) +static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) { unsigned cpu; - for_each_cpu_and(cpu, mask, cpu_online_mask) + cpus_and(mask, mask, cpu_online_map); + + for_each_cpu_mask_nr(cpu, mask) xen_send_IPI_one(cpu, vector); } -static void xen_smp_send_call_function_ipi(const struct cpumask *mask) +static void xen_smp_send_call_function_ipi(cpumask_t mask) { int cpu; xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); /* Make sure other vcpus get a chance to run if they need to. */ - for_each_cpu(cpu, mask) { + for_each_cpu_mask_nr(cpu, mask) { if (xen_vcpu_stolen(cpu)) { HYPERVISOR_sched_op(SCHEDOP_yield, 0); break; @@ -437,8 +435,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask) static void xen_smp_send_call_function_single_ipi(int cpu) { - xen_send_IPI_mask(cpumask_of(cpu), - XEN_CALL_FUNCTION_SINGLE_VECTOR); + xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); } static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) diff --git a/trunk/arch/x86/xen/suspend.c b/trunk/arch/x86/xen/suspend.c index 212ffe012b76..2a234db5949b 100644 --- a/trunk/arch/x86/xen/suspend.c +++ b/trunk/arch/x86/xen/suspend.c @@ -35,8 +35,7 @@ void xen_post_suspend(int suspend_cancelled) pfn_to_mfn(xen_start_info->console.domU.mfn); } else { #ifdef CONFIG_SMP - BUG_ON(xen_cpu_initialized_map == NULL); - cpumask_copy(xen_cpu_initialized_map, cpu_online_mask); + xen_cpu_initialized_map = cpu_online_map; #endif xen_vcpu_restore(); } diff --git a/trunk/arch/x86/xen/xen-ops.h b/trunk/arch/x86/xen/xen-ops.h index c1f8faf0a2c5..9e1afae8461f 100644 --- a/trunk/arch/x86/xen/xen-ops.h +++ b/trunk/arch/x86/xen/xen-ops.h @@ -58,7 +58,7 @@ void __init xen_init_spinlocks(void); __cpuinit void xen_init_lock_cpu(int cpu); void xen_uninit_lock_cpu(int cpu); -extern cpumask_var_t xen_cpu_initialized_map; +extern cpumask_t xen_cpu_initialized_map; #else static inline void xen_smp_init(void) {} #endif diff --git a/trunk/block/Kconfig b/trunk/block/Kconfig index 290b219fad9c..1ab7c15c8d7a 100644 --- a/trunk/block/Kconfig +++ b/trunk/block/Kconfig @@ -47,7 +47,6 @@ config BLK_DEV_IO_TRACE depends on SYSFS select RELAY select DEBUG_FS - select TRACEPOINTS help Say Y here if you want to be able to trace the block layer actions on a given queue. Tracing allows you to see any traffic happening diff --git a/trunk/block/blk-core.c b/trunk/block/blk-core.c index 561e8a1b43a4..c36aa98fafa3 100644 --- a/trunk/block/blk-core.c +++ b/trunk/block/blk-core.c @@ -28,23 +28,9 @@ #include #include #include -#include #include "blk.h" -DEFINE_TRACE(block_plug); -DEFINE_TRACE(block_unplug_io); -DEFINE_TRACE(block_unplug_timer); -DEFINE_TRACE(block_getrq); -DEFINE_TRACE(block_sleeprq); -DEFINE_TRACE(block_rq_requeue); -DEFINE_TRACE(block_bio_backmerge); -DEFINE_TRACE(block_bio_frontmerge); -DEFINE_TRACE(block_bio_queue); -DEFINE_TRACE(block_rq_complete); -DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */ -EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); - static int __make_request(struct request_queue *q, struct bio *bio); /* @@ -219,7 +205,7 @@ void blk_plug_device(struct request_queue *q) if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); - trace_block_plug(q); + blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); } } EXPORT_SYMBOL(blk_plug_device); @@ -306,7 +292,9 @@ void blk_unplug_work(struct work_struct *work) struct request_queue *q = container_of(work, struct request_queue, unplug_work); - trace_block_unplug_io(q); + blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, + q->rq.count[READ] + q->rq.count[WRITE]); + q->unplug_fn(q); } @@ -314,7 +302,9 @@ void blk_unplug_timeout(unsigned long data) { struct request_queue *q = (struct request_queue *)data; - trace_block_unplug_timer(q); + blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, + q->rq.count[READ] + q->rq.count[WRITE]); + kblockd_schedule_work(q, &q->unplug_work); } @@ -324,7 +314,9 @@ void blk_unplug(struct request_queue *q) * devices don't necessarily have an ->unplug_fn defined */ if (q->unplug_fn) { - trace_block_unplug_io(q); + blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, + q->rq.count[READ] + q->rq.count[WRITE]); + q->unplug_fn(q); } } @@ -830,7 +822,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags, if (ioc_batching(q, ioc)) ioc->nr_batch_requests--; - trace_block_getrq(q, bio, rw); + blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); out: return rq; } @@ -856,7 +848,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, prepare_to_wait_exclusive(&rl->wait[rw], &wait, TASK_UNINTERRUPTIBLE); - trace_block_sleeprq(q, bio, rw); + blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); @@ -936,7 +928,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) { blk_delete_timer(rq); blk_clear_rq_complete(rq); - trace_block_rq_requeue(q, rq); + blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); if (blk_rq_tagged(rq)) blk_queue_end_tag(q, rq); @@ -1175,7 +1167,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) if (!ll_back_merge_fn(q, req, bio)) break; - trace_block_bio_backmerge(q, bio); + blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); req->biotail->bi_next = bio; req->biotail = bio; @@ -1194,7 +1186,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) if (!ll_front_merge_fn(q, req, bio)) break; - trace_block_bio_frontmerge(q, bio); + blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); bio->bi_next = req->bio; req->bio = bio; @@ -1277,7 +1269,7 @@ static inline void blk_partition_remap(struct bio *bio) bio->bi_sector += p->start_sect; bio->bi_bdev = bdev->bd_contains; - trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, + blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio, bdev->bd_dev, bio->bi_sector, bio->bi_sector - p->start_sect); } @@ -1449,10 +1441,10 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; if (old_sector != -1) - trace_block_remap(q, bio, old_dev, bio->bi_sector, + blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, old_sector); - trace_block_bio_queue(q, bio); + blk_add_trace_bio(q, bio, BLK_TA_QUEUE); old_sector = bio->bi_sector; old_dev = bio->bi_bdev->bd_dev; @@ -1686,7 +1678,7 @@ static int __end_that_request_first(struct request *req, int error, int total_bytes, bio_nbytes, next_idx = 0; struct bio *bio; - trace_block_rq_complete(req->q, req); + blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); /* * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual diff --git a/trunk/block/blktrace.c b/trunk/block/blktrace.c index b0a2cae886db..85049a7e7a17 100644 --- a/trunk/block/blktrace.c +++ b/trunk/block/blktrace.c @@ -23,18 +23,10 @@ #include #include #include -#include #include static unsigned int blktrace_seq __read_mostly = 1; -/* Global reference count of probes */ -static DEFINE_MUTEX(blk_probe_mutex); -static atomic_t blk_probes_ref = ATOMIC_INIT(0); - -static int blk_register_tracepoints(void); -static void blk_unregister_tracepoints(void); - /* * Send out a notify message. */ @@ -127,7 +119,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK * The worker for the various blk_add_trace*() types. Fills out a * blk_io_trace structure and places it in a per-cpu subbuffer. */ -static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, +void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, int rw, u32 what, int error, int pdu_len, void *pdu_data) { struct task_struct *tsk = current; @@ -185,6 +177,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(__blk_add_trace); + static struct dentry *blk_tree_root; static DEFINE_MUTEX(blk_tree_mutex); static unsigned int root_users; @@ -243,10 +237,6 @@ static void blk_trace_cleanup(struct blk_trace *bt) free_percpu(bt->sequence); free_percpu(bt->msg_data); kfree(bt); - mutex_lock(&blk_probe_mutex); - if (atomic_dec_and_test(&blk_probes_ref)) - blk_unregister_tracepoints(); - mutex_unlock(&blk_probe_mutex); } int blk_trace_remove(struct request_queue *q) @@ -438,14 +428,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, bt->pid = buts->pid; bt->trace_state = Blktrace_setup; - mutex_lock(&blk_probe_mutex); - if (atomic_add_return(1, &blk_probes_ref) == 1) { - ret = blk_register_tracepoints(); - if (ret) - goto probe_err; - } - mutex_unlock(&blk_probe_mutex); - ret = -EBUSY; old_bt = xchg(&q->blk_trace, bt); if (old_bt) { @@ -454,9 +436,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, } return 0; -probe_err: - atomic_dec(&blk_probes_ref); - mutex_unlock(&blk_probe_mutex); err: if (dir) blk_remove_tree(dir); @@ -583,308 +562,3 @@ void blk_trace_shutdown(struct request_queue *q) blk_trace_remove(q); } } - -/* - * blktrace probes - */ - -/** - * blk_add_trace_rq - Add a trace for a request oriented action - * @q: queue the io is for - * @rq: the source request - * @what: the action - * - * Description: - * Records an action against a request. Will log the bio offset + size. - * - **/ -static void blk_add_trace_rq(struct request_queue *q, struct request *rq, - u32 what) -{ - struct blk_trace *bt = q->blk_trace; - int rw = rq->cmd_flags & 0x03; - - if (likely(!bt)) - return; - - if (blk_discard_rq(rq)) - rw |= (1 << BIO_RW_DISCARD); - - if (blk_pc_request(rq)) { - what |= BLK_TC_ACT(BLK_TC_PC); - __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, - sizeof(rq->cmd), rq->cmd); - } else { - what |= BLK_TC_ACT(BLK_TC_FS); - __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, - rw, what, rq->errors, 0, NULL); - } -} - -static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq) -{ - blk_add_trace_rq(q, rq, BLK_TA_ABORT); -} - -static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq) -{ - blk_add_trace_rq(q, rq, BLK_TA_INSERT); -} - -static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq) -{ - blk_add_trace_rq(q, rq, BLK_TA_ISSUE); -} - -static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq) -{ - blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); -} - -static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq) -{ - blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); -} - -/** - * blk_add_trace_bio - Add a trace for a bio oriented action - * @q: queue the io is for - * @bio: the source bio - * @what: the action - * - * Description: - * Records an action against a bio. Will log the bio offset + size. - * - **/ -static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, - u32 what) -{ - struct blk_trace *bt = q->blk_trace; - - if (likely(!bt)) - return; - - __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, - !bio_flagged(bio, BIO_UPTODATE), 0, NULL); -} - -static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio) -{ - blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); -} - -static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio) -{ - blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); -} - -static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio) -{ - blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); -} - -static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio) -{ - blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); -} - -static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio) -{ - blk_add_trace_bio(q, bio, BLK_TA_QUEUE); -} - -static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw) -{ - if (bio) - blk_add_trace_bio(q, bio, BLK_TA_GETRQ); - else { - struct blk_trace *bt = q->blk_trace; - - if (bt) - __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL); - } -} - - -static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw) -{ - if (bio) - blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); - else { - struct blk_trace *bt = q->blk_trace; - - if (bt) - __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL); - } -} - -static void blk_add_trace_plug(struct request_queue *q) -{ - struct blk_trace *bt = q->blk_trace; - - if (bt) - __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); -} - -static void blk_add_trace_unplug_io(struct request_queue *q) -{ - struct blk_trace *bt = q->blk_trace; - - if (bt) { - unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; - __be64 rpdu = cpu_to_be64(pdu); - - __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0, - sizeof(rpdu), &rpdu); - } -} - -static void blk_add_trace_unplug_timer(struct request_queue *q) -{ - struct blk_trace *bt = q->blk_trace; - - if (bt) { - unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; - __be64 rpdu = cpu_to_be64(pdu); - - __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0, - sizeof(rpdu), &rpdu); - } -} - -static void blk_add_trace_split(struct request_queue *q, struct bio *bio, - unsigned int pdu) -{ - struct blk_trace *bt = q->blk_trace; - - if (bt) { - __be64 rpdu = cpu_to_be64(pdu); - - __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, - BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE), - sizeof(rpdu), &rpdu); - } -} - -/** - * blk_add_trace_remap - Add a trace for a remap operation - * @q: queue the io is for - * @bio: the source bio - * @dev: target device - * @from: source sector - * @to: target sector - * - * Description: - * Device mapper or raid target sometimes need to split a bio because - * it spans a stripe (or similar). Add a trace for that action. - * - **/ -static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, - dev_t dev, sector_t from, sector_t to) -{ - struct blk_trace *bt = q->blk_trace; - struct blk_io_trace_remap r; - - if (likely(!bt)) - return; - - r.device = cpu_to_be32(dev); - r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); - r.sector = cpu_to_be64(to); - - __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, - !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); -} - -/** - * blk_add_driver_data - Add binary message with driver-specific data - * @q: queue the io is for - * @rq: io request - * @data: driver-specific data - * @len: length of driver-specific data - * - * Description: - * Some drivers might want to write driver-specific data per request. - * - **/ -void blk_add_driver_data(struct request_queue *q, - struct request *rq, - void *data, size_t len) -{ - struct blk_trace *bt = q->blk_trace; - - if (likely(!bt)) - return; - - if (blk_pc_request(rq)) - __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, - rq->errors, len, data); - else - __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, - 0, BLK_TA_DRV_DATA, rq->errors, len, data); -} -EXPORT_SYMBOL_GPL(blk_add_driver_data); - -static int blk_register_tracepoints(void) -{ - int ret; - - ret = register_trace_block_rq_abort(blk_add_trace_rq_abort); - WARN_ON(ret); - ret = register_trace_block_rq_insert(blk_add_trace_rq_insert); - WARN_ON(ret); - ret = register_trace_block_rq_issue(blk_add_trace_rq_issue); - WARN_ON(ret); - ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue); - WARN_ON(ret); - ret = register_trace_block_rq_complete(blk_add_trace_rq_complete); - WARN_ON(ret); - ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce); - WARN_ON(ret); - ret = register_trace_block_bio_complete(blk_add_trace_bio_complete); - WARN_ON(ret); - ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); - WARN_ON(ret); - ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); - WARN_ON(ret); - ret = register_trace_block_bio_queue(blk_add_trace_bio_queue); - WARN_ON(ret); - ret = register_trace_block_getrq(blk_add_trace_getrq); - WARN_ON(ret); - ret = register_trace_block_sleeprq(blk_add_trace_sleeprq); - WARN_ON(ret); - ret = register_trace_block_plug(blk_add_trace_plug); - WARN_ON(ret); - ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer); - WARN_ON(ret); - ret = register_trace_block_unplug_io(blk_add_trace_unplug_io); - WARN_ON(ret); - ret = register_trace_block_split(blk_add_trace_split); - WARN_ON(ret); - ret = register_trace_block_remap(blk_add_trace_remap); - WARN_ON(ret); - return 0; -} - -static void blk_unregister_tracepoints(void) -{ - unregister_trace_block_remap(blk_add_trace_remap); - unregister_trace_block_split(blk_add_trace_split); - unregister_trace_block_unplug_io(blk_add_trace_unplug_io); - unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer); - unregister_trace_block_plug(blk_add_trace_plug); - unregister_trace_block_sleeprq(blk_add_trace_sleeprq); - unregister_trace_block_getrq(blk_add_trace_getrq); - unregister_trace_block_bio_queue(blk_add_trace_bio_queue); - unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); - unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); - unregister_trace_block_bio_complete(blk_add_trace_bio_complete); - unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce); - unregister_trace_block_rq_complete(blk_add_trace_rq_complete); - unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue); - unregister_trace_block_rq_issue(blk_add_trace_rq_issue); - unregister_trace_block_rq_insert(blk_add_trace_rq_insert); - unregister_trace_block_rq_abort(blk_add_trace_rq_abort); - - tracepoint_synchronize_unregister(); -} diff --git a/trunk/block/elevator.c b/trunk/block/elevator.c index 86836dd179c0..a6951f76ba0c 100644 --- a/trunk/block/elevator.c +++ b/trunk/block/elevator.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include @@ -42,8 +41,6 @@ static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); -DEFINE_TRACE(block_rq_abort); - /* * Merge hash stuff. */ @@ -55,9 +52,6 @@ static const int elv_hash_shift = 6; #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) -DEFINE_TRACE(block_rq_insert); -DEFINE_TRACE(block_rq_issue); - /* * Query io scheduler to see if the current process issuing bio may be * merged with rq. @@ -592,7 +586,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) unsigned ordseq; int unplug_it = 1; - trace_block_rq_insert(q, rq); + blk_add_trace_rq(q, rq, BLK_TA_INSERT); rq->q = q; @@ -778,7 +772,7 @@ struct request *elv_next_request(struct request_queue *q) * not be passed by new incoming requests */ rq->cmd_flags |= REQ_STARTED; - trace_block_rq_issue(q, rq); + blk_add_trace_rq(q, rq, BLK_TA_ISSUE); } if (!q->boundary_rq || q->boundary_rq == rq) { @@ -920,7 +914,7 @@ void elv_abort_queue(struct request_queue *q) while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); rq->cmd_flags |= REQ_QUIET; - trace_block_rq_abort(q, rq); + blk_add_trace_rq(q, rq, BLK_TA_ABORT); __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); } } diff --git a/trunk/drivers/acpi/toshiba_acpi.c b/trunk/drivers/acpi/toshiba_acpi.c index 40e60fc2e596..25f531d892de 100644 --- a/trunk/drivers/acpi/toshiba_acpi.c +++ b/trunk/drivers/acpi/toshiba_acpi.c @@ -824,36 +824,32 @@ static int __init toshiba_acpi_init(void) toshiba_acpi_exit(); return -ENOMEM; } + } - /* Register input device for kill switch */ - toshiba_acpi.poll_dev = input_allocate_polled_device(); - if (!toshiba_acpi.poll_dev) { - printk(MY_ERR - "unable to allocate kill-switch input device\n"); - toshiba_acpi_exit(); - return -ENOMEM; - } - toshiba_acpi.poll_dev->private = &toshiba_acpi; - toshiba_acpi.poll_dev->poll = bt_poll_rfkill; - toshiba_acpi.poll_dev->poll_interval = 1000; /* msecs */ - - toshiba_acpi.poll_dev->input->name = toshiba_acpi.rfk_name; - toshiba_acpi.poll_dev->input->id.bustype = BUS_HOST; - /* Toshiba USB ID */ - toshiba_acpi.poll_dev->input->id.vendor = 0x0930; - set_bit(EV_SW, toshiba_acpi.poll_dev->input->evbit); - set_bit(SW_RFKILL_ALL, toshiba_acpi.poll_dev->input->swbit); - input_report_switch(toshiba_acpi.poll_dev->input, - SW_RFKILL_ALL, TRUE); - input_sync(toshiba_acpi.poll_dev->input); - - ret = input_register_polled_device(toshiba_acpi.poll_dev); - if (ret) { - printk(MY_ERR - "unable to register kill-switch input device\n"); - toshiba_acpi_exit(); - return ret; - } + /* Register input device for kill switch */ + toshiba_acpi.poll_dev = input_allocate_polled_device(); + if (!toshiba_acpi.poll_dev) { + printk(MY_ERR "unable to allocate kill-switch input device\n"); + toshiba_acpi_exit(); + return -ENOMEM; + } + toshiba_acpi.poll_dev->private = &toshiba_acpi; + toshiba_acpi.poll_dev->poll = bt_poll_rfkill; + toshiba_acpi.poll_dev->poll_interval = 1000; /* msecs */ + + toshiba_acpi.poll_dev->input->name = toshiba_acpi.rfk_name; + toshiba_acpi.poll_dev->input->id.bustype = BUS_HOST; + toshiba_acpi.poll_dev->input->id.vendor = 0x0930; /* Toshiba USB ID */ + set_bit(EV_SW, toshiba_acpi.poll_dev->input->evbit); + set_bit(SW_RFKILL_ALL, toshiba_acpi.poll_dev->input->swbit); + input_report_switch(toshiba_acpi.poll_dev->input, SW_RFKILL_ALL, TRUE); + input_sync(toshiba_acpi.poll_dev->input); + + ret = input_register_polled_device(toshiba_acpi.poll_dev); + if (ret) { + printk(MY_ERR "unable to register kill-switch input device\n"); + toshiba_acpi_exit(); + return ret; } return 0; diff --git a/trunk/drivers/ata/libata-core.c b/trunk/drivers/ata/libata-core.c index bc6695e3c848..5e2eb740df46 100644 --- a/trunk/drivers/ata/libata-core.c +++ b/trunk/drivers/ata/libata-core.c @@ -4050,70 +4050,17 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "ST3160023AS", "3.42", ATA_HORKAGE_NONCQ }, /* Seagate NCQ + FLUSH CACHE firmware bug */ - { "ST31500341AS", "SD15", ATA_HORKAGE_NONCQ | + { "ST31500341AS", "9JU138", ATA_HORKAGE_NONCQ | ATA_HORKAGE_FIRMWARE_WARN }, - { "ST31500341AS", "SD16", ATA_HORKAGE_NONCQ | + { "ST31000333AS", "9FZ136", ATA_HORKAGE_NONCQ | ATA_HORKAGE_FIRMWARE_WARN }, - { "ST31500341AS", "SD17", ATA_HORKAGE_NONCQ | + { "ST3640623AS", "9FZ164", ATA_HORKAGE_NONCQ | ATA_HORKAGE_FIRMWARE_WARN }, - { "ST31500341AS", "SD18", ATA_HORKAGE_NONCQ | + { "ST3640323AS", "9FZ134", ATA_HORKAGE_NONCQ | ATA_HORKAGE_FIRMWARE_WARN }, - { "ST31500341AS", "SD19", ATA_HORKAGE_NONCQ | + { "ST3320813AS", "9FZ182", ATA_HORKAGE_NONCQ | ATA_HORKAGE_FIRMWARE_WARN }, - - { "ST31000333AS", "SD15", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST31000333AS", "SD16", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST31000333AS", "SD17", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST31000333AS", "SD18", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST31000333AS", "SD19", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - - { "ST3640623AS", "SD15", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3640623AS", "SD16", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3640623AS", "SD17", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3640623AS", "SD18", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3640623AS", "SD19", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - - { "ST3640323AS", "SD15", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3640323AS", "SD16", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3640323AS", "SD17", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3640323AS", "SD18", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3640323AS", "SD19", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - - { "ST3320813AS", "SD15", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3320813AS", "SD16", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3320813AS", "SD17", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3320813AS", "SD18", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3320813AS", "SD19", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - - { "ST3320613AS", "SD15", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3320613AS", "SD16", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3320613AS", "SD17", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3320613AS", "SD18", ATA_HORKAGE_NONCQ | - ATA_HORKAGE_FIRMWARE_WARN }, - { "ST3320613AS", "SD19", ATA_HORKAGE_NONCQ | + { "ST3320613AS", "9FZ162", ATA_HORKAGE_NONCQ | ATA_HORKAGE_FIRMWARE_WARN }, /* Blacklist entries taken from Silicon Image 3124/3132 diff --git a/trunk/drivers/ata/pata_hpt366.c b/trunk/drivers/ata/pata_hpt366.c index e0c4f05d7d57..a098ba8eaab6 100644 --- a/trunk/drivers/ata/pata_hpt366.c +++ b/trunk/drivers/ata/pata_hpt366.c @@ -183,9 +183,7 @@ static unsigned long hpt366_filter(struct ata_device *adev, unsigned long mask) mask &= ~(0xF8 << ATA_SHIFT_UDMA); if (hpt_dma_blacklisted(adev, "UDMA4", bad_ata66_4)) mask &= ~(0xF0 << ATA_SHIFT_UDMA); - } else if (adev->class == ATA_DEV_ATAPI) - mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA); - + } return ata_bmdma_mode_filter(adev, mask); } @@ -213,15 +211,11 @@ static u32 hpt36x_find_mode(struct ata_port *ap, int speed) static int hpt36x_cable_detect(struct ata_port *ap) { - struct pci_dev *pdev = to_pci_dev(ap->host->dev); u8 ata66; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); - /* - * Each channel of pata_hpt366 occupies separate PCI function - * as the primary channel and bit1 indicates the cable type. - */ pci_read_config_byte(pdev, 0x5A, &ata66); - if (ata66 & 2) + if (ata66 & (1 << ap->port_no)) return ATA_CBL_PATA40; return ATA_CBL_PATA80; } diff --git a/trunk/drivers/cdrom/cdrom.c b/trunk/drivers/cdrom/cdrom.c index 7d2e91cccb13..d16b02423d61 100644 --- a/trunk/drivers/cdrom/cdrom.c +++ b/trunk/drivers/cdrom/cdrom.c @@ -2081,6 +2081,10 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, if (!q) return -ENXIO; + rq = blk_get_request(q, READ, GFP_KERNEL); + if (!rq) + return -ENOMEM; + cdi->last_sense = 0; while (nframes) { @@ -2092,17 +2096,9 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, len = nr * CD_FRAMESIZE_RAW; - rq = blk_get_request(q, READ, GFP_KERNEL); - if (!rq) { - ret = -ENOMEM; - break; - } - ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL); - if (ret) { - blk_put_request(rq); + if (ret) break; - } rq->cmd[0] = GPCMD_READ_CD; rq->cmd[1] = 1 << 2; @@ -2128,7 +2124,6 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, if (blk_rq_unmap_user(bio)) ret = -EFAULT; - blk_put_request(rq); if (ret) break; @@ -2138,6 +2133,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, ubuf += len; } + blk_put_request(rq); return ret; } diff --git a/trunk/drivers/char/cp437.uni b/trunk/drivers/char/cp437.uni index bc6163484f62..1f06889a96b9 100644 --- a/trunk/drivers/char/cp437.uni +++ b/trunk/drivers/char/cp437.uni @@ -27,7 +27,7 @@ 0x0c U+2640 0x0d U+266a 0x0e U+266b -0x0f U+263c U+00a4 +0x0f U+263c 0x10 U+25b6 U+25ba 0x11 U+25c0 U+25c4 0x12 U+2195 @@ -55,7 +55,7 @@ 0x24 U+0024 0x25 U+0025 0x26 U+0026 -0x27 U+0027 U+00b4 +0x27 U+0027 0x28 U+0028 0x29 U+0029 0x2a U+002a @@ -84,7 +84,7 @@ 0x41 U+0041 U+00c0 U+00c1 U+00c2 U+00c3 0x42 U+0042 0x43 U+0043 U+00a9 -0x44 U+0044 U+00d0 +0x44 U+0044 0x45 U+0045 U+00c8 U+00ca U+00cb 0x46 U+0046 0x47 U+0047 @@ -140,7 +140,7 @@ 0x79 U+0079 U+00fd 0x7a U+007a 0x7b U+007b -0x7c U+007c U+00a6 +0x7c U+007c U+00a5 0x7d U+007d 0x7e U+007e # @@ -263,10 +263,10 @@ 0xe8 U+03a6 U+00d8 0xe9 U+0398 0xea U+03a9 U+2126 -0xeb U+03b4 U+00f0 +0xeb U+03b4 0xec U+221e 0xed U+03c6 U+00f8 -0xee U+03b5 U+2208 +0xee U+03b5 0xef U+2229 0xf0 U+2261 0xf1 U+00b1 diff --git a/trunk/drivers/char/random.c b/trunk/drivers/char/random.c index d26891bfcd41..675076f5fca8 100644 --- a/trunk/drivers/char/random.c +++ b/trunk/drivers/char/random.c @@ -558,9 +558,23 @@ struct timer_rand_state { unsigned dont_count_entropy:1; }; -#ifndef CONFIG_SPARSE_IRQ -struct timer_rand_state *irq_timer_state[NR_IRQS]; -#endif +static struct timer_rand_state *irq_timer_state[NR_IRQS]; + +static struct timer_rand_state *get_timer_rand_state(unsigned int irq) +{ + if (irq >= nr_irqs) + return NULL; + + return irq_timer_state[irq]; +} + +static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state) +{ + if (irq >= nr_irqs) + return; + + irq_timer_state[irq] = state; +} static struct timer_rand_state input_timer_state; @@ -919,10 +933,8 @@ void rand_initialize_irq(int irq) { struct timer_rand_state *state; -#ifndef CONFIG_SPARSE_IRQ if (irq >= nr_irqs) return; -#endif state = get_timer_rand_state(irq); diff --git a/trunk/drivers/char/sysrq.c b/trunk/drivers/char/sysrq.c index 94966edfb44d..ce0d9da52a8a 100644 --- a/trunk/drivers/char/sysrq.c +++ b/trunk/drivers/char/sysrq.c @@ -274,22 +274,6 @@ static struct sysrq_key_op sysrq_showstate_blocked_op = { .enable_mask = SYSRQ_ENABLE_DUMP, }; -#ifdef CONFIG_TRACING -#include - -static void sysrq_ftrace_dump(int key, struct tty_struct *tty) -{ - ftrace_dump(); -} -static struct sysrq_key_op sysrq_ftrace_dump_op = { - .handler = sysrq_ftrace_dump, - .help_msg = "dumpZ-ftrace-buffer", - .action_msg = "Dump ftrace buffer", - .enable_mask = SYSRQ_ENABLE_DUMP, -}; -#else -#define sysrq_ftrace_dump_op (*(struct sysrq_key_op *)0) -#endif static void sysrq_handle_showmem(int key, struct tty_struct *tty) { @@ -422,7 +406,7 @@ static struct sysrq_key_op *sysrq_key_table[36] = { NULL, /* x */ /* y: May be registered on sparc64 for global register dump */ NULL, /* y */ - &sysrq_ftrace_dump_op, /* z */ + NULL /* z */ }; /* key2index calculation, -1 on invalid index */ diff --git a/trunk/drivers/char/vt.c b/trunk/drivers/char/vt.c index 008176edbd64..a5af6072e2b3 100644 --- a/trunk/drivers/char/vt.c +++ b/trunk/drivers/char/vt.c @@ -2274,7 +2274,7 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co continue; /* nothing to display */ } /* Glyph not found */ - if ((!(vc->vc_utf && !vc->vc_disp_ctrl) && c < 128) && !(c & ~charmask)) { + if ((!(vc->vc_utf && !vc->vc_disp_ctrl) || c < 128) && !(c & ~charmask)) { /* In legacy mode use the glyph we get by a 1:1 mapping. This would make absolutely no sense with Unicode in mind, but do this for ASCII characters since a font may lack diff --git a/trunk/drivers/char/xilinx_hwicap/buffer_icap.c b/trunk/drivers/char/xilinx_hwicap/buffer_icap.c index 05d897764f02..aa7f7962a9a0 100644 --- a/trunk/drivers/char/xilinx_hwicap/buffer_icap.c +++ b/trunk/drivers/char/xilinx_hwicap/buffer_icap.c @@ -21,6 +21,9 @@ * INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE. * + * Xilinx products are not intended for use in life support appliances, + * devices, or systems. Use in such applications is expressly prohibited. + * * (c) Copyright 2003-2008 Xilinx Inc. * All rights reserved. * diff --git a/trunk/drivers/char/xilinx_hwicap/buffer_icap.h b/trunk/drivers/char/xilinx_hwicap/buffer_icap.h index d4f419ee87ab..8b0252bf06e2 100644 --- a/trunk/drivers/char/xilinx_hwicap/buffer_icap.h +++ b/trunk/drivers/char/xilinx_hwicap/buffer_icap.h @@ -21,6 +21,9 @@ * INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE. * + * Xilinx products are not intended for use in life support appliances, + * devices, or systems. Use in such applications is expressly prohibited. + * * (c) Copyright 2003-2008 Xilinx Inc. * All rights reserved. * diff --git a/trunk/drivers/char/xilinx_hwicap/fifo_icap.c b/trunk/drivers/char/xilinx_hwicap/fifo_icap.c index 02225eb19cf6..776b50528478 100644 --- a/trunk/drivers/char/xilinx_hwicap/fifo_icap.c +++ b/trunk/drivers/char/xilinx_hwicap/fifo_icap.c @@ -21,6 +21,9 @@ * INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE. * + * Xilinx products are not intended for use in life support appliances, + * devices, or systems. Use in such applications is expressly prohibited. + * * (c) Copyright 2007-2008 Xilinx Inc. * All rights reserved. * diff --git a/trunk/drivers/char/xilinx_hwicap/fifo_icap.h b/trunk/drivers/char/xilinx_hwicap/fifo_icap.h index 4c9dd9a3b62a..62bda453c90b 100644 --- a/trunk/drivers/char/xilinx_hwicap/fifo_icap.h +++ b/trunk/drivers/char/xilinx_hwicap/fifo_icap.h @@ -21,6 +21,9 @@ * INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE. * + * Xilinx products are not intended for use in life support appliances, + * devices, or systems. Use in such applications is expressly prohibited. + * * (c) Copyright 2007-2008 Xilinx Inc. * All rights reserved. * diff --git a/trunk/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/trunk/drivers/char/xilinx_hwicap/xilinx_hwicap.c index f40ab699860f..d16131949097 100644 --- a/trunk/drivers/char/xilinx_hwicap/xilinx_hwicap.c +++ b/trunk/drivers/char/xilinx_hwicap/xilinx_hwicap.c @@ -21,6 +21,9 @@ * INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE. * + * Xilinx products are not intended for use in life support appliances, + * devices, or systems. Use in such applications is expressly prohibited. + * * (c) Copyright 2002 Xilinx Inc., Systems Engineering Group * (c) Copyright 2004 Xilinx Inc., Systems Engineering Group * (c) Copyright 2007-2008 Xilinx Inc. diff --git a/trunk/drivers/char/xilinx_hwicap/xilinx_hwicap.h b/trunk/drivers/char/xilinx_hwicap/xilinx_hwicap.h index 8cca11981c5f..24d0d9b938fb 100644 --- a/trunk/drivers/char/xilinx_hwicap/xilinx_hwicap.h +++ b/trunk/drivers/char/xilinx_hwicap/xilinx_hwicap.h @@ -21,6 +21,9 @@ * INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE. * + * Xilinx products are not intended for use in life support appliances, + * devices, or systems. Use in such applications is expressly prohibited. + * * (c) Copyright 2003-2007 Xilinx Inc. * All rights reserved. * diff --git a/trunk/drivers/i2c/busses/i2c-cpm.c b/trunk/drivers/i2c/busses/i2c-cpm.c index 3fcf78e906db..228f75723063 100644 --- a/trunk/drivers/i2c/busses/i2c-cpm.c +++ b/trunk/drivers/i2c/busses/i2c-cpm.c @@ -365,7 +365,6 @@ static int cpm_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) pmsg = &msgs[tptr]; if (pmsg->flags & I2C_M_RD) ret = wait_event_interruptible_timeout(cpm->i2c_wait, - (in_be16(&tbdf[tptr].cbd_sc) & BD_SC_NAK) || !(in_be16(&rbdf[rptr].cbd_sc) & BD_SC_EMPTY), 1 * HZ); else diff --git a/trunk/drivers/i2c/busses/i2c-highlander.c b/trunk/drivers/i2c/busses/i2c-highlander.c index e5a8dae4a289..f4d22ae9d294 100644 --- a/trunk/drivers/i2c/busses/i2c-highlander.c +++ b/trunk/drivers/i2c/busses/i2c-highlander.c @@ -92,7 +92,7 @@ static void highlander_i2c_setup(struct highlander_i2c_dev *dev) static void smbus_write_data(u8 *src, u16 *dst, int len) { for (; len > 1; len -= 2) { - *dst++ = be16_to_cpup((__be16 *)src); + *dst++ = be16_to_cpup((u16 *)src); src += 2; } @@ -103,7 +103,7 @@ static void smbus_write_data(u8 *src, u16 *dst, int len) static void smbus_read_data(u16 *src, u8 *dst, int len) { for (; len > 1; len -= 2) { - *(__be16 *)dst = cpu_to_be16p(src++); + *(u16 *)dst = cpu_to_be16p(src++); dst += 2; } diff --git a/trunk/drivers/i2c/busses/i2c-pmcmsp.c b/trunk/drivers/i2c/busses/i2c-pmcmsp.c index 0bdb2d7f0570..dcf2045b5222 100644 --- a/trunk/drivers/i2c/busses/i2c-pmcmsp.c +++ b/trunk/drivers/i2c/busses/i2c-pmcmsp.c @@ -486,7 +486,7 @@ static enum pmcmsptwi_xfer_result pmcmsptwi_xfer_cmd( if (cmd->type == MSP_TWI_CMD_WRITE || cmd->type == MSP_TWI_CMD_WRITE_READ) { - u64 tmp = be64_to_cpup((__be64 *)cmd->write_data); + __be64 tmp = cpu_to_be64p((u64 *)cmd->write_data); tmp >>= (MSP_MAX_BYTES_PER_RW - cmd->write_len) * 8; dev_dbg(&pmcmsptwi_adapter.dev, "Writing 0x%016llx\n", tmp); pmcmsptwi_writel(tmp & 0x00000000ffffffffLL, diff --git a/trunk/drivers/i2c/busses/i2c-s3c2410.c b/trunk/drivers/i2c/busses/i2c-s3c2410.c index b7434d24904e..1fac4e233133 100644 --- a/trunk/drivers/i2c/busses/i2c-s3c2410.c +++ b/trunk/drivers/i2c/busses/i2c-s3c2410.c @@ -56,7 +56,6 @@ enum s3c24xx_i2c_state { struct s3c24xx_i2c { spinlock_t lock; wait_queue_head_t wait; - unsigned int suspended:1; struct i2c_msg *msg; unsigned int msg_num; @@ -508,7 +507,7 @@ static int s3c24xx_i2c_doxfer(struct s3c24xx_i2c *i2c, struct i2c_msg *msgs, int unsigned long timeout; int ret; - if (i2c->suspended) + if (!(readl(i2c->regs + S3C2410_IICCON) & S3C2410_IICCON_IRQEN)) return -EIO; ret = s3c24xx_i2c_set_master(i2c); @@ -987,26 +986,17 @@ static int s3c24xx_i2c_remove(struct platform_device *pdev) } #ifdef CONFIG_PM -static int s3c24xx_i2c_suspend_late(struct platform_device *dev, - pm_message_t msg) -{ - struct s3c24xx_i2c *i2c = platform_get_drvdata(dev); - i2c->suspended = 1; - return 0; -} - static int s3c24xx_i2c_resume(struct platform_device *dev) { struct s3c24xx_i2c *i2c = platform_get_drvdata(dev); - i2c->suspended = 0; - s3c24xx_i2c_init(i2c); + if (i2c != NULL) + s3c24xx_i2c_init(i2c); return 0; } #else -#define s3c24xx_i2c_suspend_late NULL #define s3c24xx_i2c_resume NULL #endif @@ -1015,7 +1005,6 @@ static int s3c24xx_i2c_resume(struct platform_device *dev) static struct platform_driver s3c2410_i2c_driver = { .probe = s3c24xx_i2c_probe, .remove = s3c24xx_i2c_remove, - .suspend_late = s3c24xx_i2c_suspend_late, .resume = s3c24xx_i2c_resume, .driver = { .owner = THIS_MODULE, @@ -1026,7 +1015,6 @@ static struct platform_driver s3c2410_i2c_driver = { static struct platform_driver s3c2440_i2c_driver = { .probe = s3c24xx_i2c_probe, .remove = s3c24xx_i2c_remove, - .suspend_late = s3c24xx_i2c_suspend_late, .resume = s3c24xx_i2c_resume, .driver = { .owner = THIS_MODULE, diff --git a/trunk/drivers/ieee1394/nodemgr.c b/trunk/drivers/ieee1394/nodemgr.c index 79ef5fd928ae..d333ae22459c 100644 --- a/trunk/drivers/ieee1394/nodemgr.c +++ b/trunk/drivers/ieee1394/nodemgr.c @@ -115,14 +115,8 @@ static int nodemgr_bus_read(struct csr1212_csr *csr, u64 addr, u16 length, return error; } -#define OUI_FREECOM_TECHNOLOGIES_GMBH 0x0001db - static int nodemgr_get_max_rom(quadlet_t *bus_info_data, void *__ci) { - /* Freecom FireWire Hard Drive firmware bug */ - if (be32_to_cpu(bus_info_data[3]) >> 8 == OUI_FREECOM_TECHNOLOGIES_GMBH) - return 0; - return (be32_to_cpu(bus_info_data[2]) >> 8) & 0x3; } diff --git a/trunk/drivers/lguest/interrupts_and_traps.c b/trunk/drivers/lguest/interrupts_and_traps.c index 415fab0125ac..a1039068f95c 100644 --- a/trunk/drivers/lguest/interrupts_and_traps.c +++ b/trunk/drivers/lguest/interrupts_and_traps.c @@ -222,16 +222,11 @@ bool check_syscall_vector(struct lguest *lg) int init_interrupts(void) { /* If they want some strange system call vector, reserve it now */ - if (syscall_vector != SYSCALL_VECTOR) { - if (test_bit(syscall_vector, used_vectors) || - vector_used_by_percpu_irq(syscall_vector)) { - printk(KERN_ERR "lg: couldn't reserve syscall %u\n", - syscall_vector); - return -EBUSY; - } - set_bit(syscall_vector, used_vectors); + if (syscall_vector != SYSCALL_VECTOR + && test_and_set_bit(syscall_vector, used_vectors)) { + printk("lg: couldn't reserve syscall %u\n", syscall_vector); + return -EBUSY; } - return 0; } diff --git a/trunk/drivers/md/dm.c b/trunk/drivers/md/dm.c index 343094c3feeb..c99e4728ff41 100644 --- a/trunk/drivers/md/dm.c +++ b/trunk/drivers/md/dm.c @@ -21,7 +21,6 @@ #include #include #include -#include #define DM_MSG_PREFIX "core" @@ -52,8 +51,6 @@ struct dm_target_io { union map_info info; }; -DEFINE_TRACE(block_bio_complete); - union map_info *dm_get_mapinfo(struct bio *bio) { if (bio && bio->bi_private) @@ -507,7 +504,8 @@ static void dec_pending(struct dm_io *io, int error) end_io_acct(io); if (io->error != DM_ENDIO_REQUEUE) { - trace_block_bio_complete(io->md->queue, io->bio); + blk_add_trace_bio(io->md->queue, io->bio, + BLK_TA_COMPLETE); bio_endio(io->bio, io->error); } @@ -600,7 +598,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, if (r == DM_MAPIO_REMAPPED) { /* the bio has been remapped so dispatch it */ - trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, + blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, tio->io->bio->bi_bdev->bd_dev, clone->bi_sector, sector); diff --git a/trunk/drivers/net/e1000e/ich8lan.c b/trunk/drivers/net/e1000e/ich8lan.c index d115a6d30f29..523b9716a543 100644 --- a/trunk/drivers/net/e1000e/ich8lan.c +++ b/trunk/drivers/net/e1000e/ich8lan.c @@ -1893,17 +1893,12 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) ctrl |= E1000_CTRL_PHY_RST; } ret_val = e1000_acquire_swflag_ich8lan(hw); - /* Whether or not the swflag was acquired, we need to reset the part */ hw_dbg(hw, "Issuing a global reset to ich8lan"); ew32(CTRL, (ctrl | E1000_CTRL_RST)); msleep(20); - if (!ret_val) { - /* release the swflag because it is not reset by - * hardware reset - */ - e1000_release_swflag_ich8lan(hw); - } + /* release the swflag because it is not reset by hardware reset */ + e1000_release_swflag_ich8lan(hw); ret_val = e1000e_get_auto_rd_done(hw); if (ret_val) { diff --git a/trunk/drivers/net/sungem.c b/trunk/drivers/net/sungem.c index fed7eba65ead..1349e419673c 100644 --- a/trunk/drivers/net/sungem.c +++ b/trunk/drivers/net/sungem.c @@ -1142,70 +1142,6 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static void gem_pcs_reset(struct gem *gp) -{ - int limit; - u32 val; - - /* Reset PCS unit. */ - val = readl(gp->regs + PCS_MIICTRL); - val |= PCS_MIICTRL_RST; - writel(val, gp->regs + PCS_MIICTRL); - - limit = 32; - while (readl(gp->regs + PCS_MIICTRL) & PCS_MIICTRL_RST) { - udelay(100); - if (limit-- <= 0) - break; - } - if (limit <= 0) - printk(KERN_WARNING "%s: PCS reset bit would not clear.\n", - gp->dev->name); -} - -static void gem_pcs_reinit_adv(struct gem *gp) -{ - u32 val; - - /* Make sure PCS is disabled while changing advertisement - * configuration. - */ - val = readl(gp->regs + PCS_CFG); - val &= ~(PCS_CFG_ENABLE | PCS_CFG_TO); - writel(val, gp->regs + PCS_CFG); - - /* Advertise all capabilities except assymetric - * pause. - */ - val = readl(gp->regs + PCS_MIIADV); - val |= (PCS_MIIADV_FD | PCS_MIIADV_HD | - PCS_MIIADV_SP | PCS_MIIADV_AP); - writel(val, gp->regs + PCS_MIIADV); - - /* Enable and restart auto-negotiation, disable wrapback/loopback, - * and re-enable PCS. - */ - val = readl(gp->regs + PCS_MIICTRL); - val |= (PCS_MIICTRL_RAN | PCS_MIICTRL_ANE); - val &= ~PCS_MIICTRL_WB; - writel(val, gp->regs + PCS_MIICTRL); - - val = readl(gp->regs + PCS_CFG); - val |= PCS_CFG_ENABLE; - writel(val, gp->regs + PCS_CFG); - - /* Make sure serialink loopback is off. The meaning - * of this bit is logically inverted based upon whether - * you are in Serialink or SERDES mode. - */ - val = readl(gp->regs + PCS_SCTRL); - if (gp->phy_type == phy_serialink) - val &= ~PCS_SCTRL_LOOP; - else - val |= PCS_SCTRL_LOOP; - writel(val, gp->regs + PCS_SCTRL); -} - #define STOP_TRIES 32 /* Must be invoked under gp->lock and gp->tx_lock. */ @@ -1232,9 +1168,6 @@ static void gem_reset(struct gem *gp) if (limit <= 0) printk(KERN_ERR "%s: SW reset is ghetto.\n", gp->dev->name); - - if (gp->phy_type == phy_serialink || gp->phy_type == phy_serdes) - gem_pcs_reinit_adv(gp); } /* Must be invoked under gp->lock and gp->tx_lock. */ @@ -1391,7 +1324,7 @@ static int gem_set_link_modes(struct gem *gp) gp->phy_type == phy_serdes) { u32 pcs_lpa = readl(gp->regs + PCS_MIILP); - if ((pcs_lpa & PCS_MIIADV_FD) || gp->phy_type == phy_serdes) + if (pcs_lpa & PCS_MIIADV_FD) full_duplex = 1; speed = SPEED_1000; } @@ -1555,9 +1488,6 @@ static void gem_link_timer(unsigned long data) val = readl(gp->regs + PCS_MIISTAT); if ((val & PCS_MIISTAT_LS) != 0) { - if (gp->lstate == link_up) - goto restart; - gp->lstate = link_up; netif_carrier_on(gp->dev); (void)gem_set_link_modes(gp); @@ -1778,8 +1708,61 @@ static void gem_init_phy(struct gem *gp) if (gp->phy_mii.def && gp->phy_mii.def->ops->init) gp->phy_mii.def->ops->init(&gp->phy_mii); } else { - gem_pcs_reset(gp); - gem_pcs_reinit_adv(gp); + u32 val; + int limit; + + /* Reset PCS unit. */ + val = readl(gp->regs + PCS_MIICTRL); + val |= PCS_MIICTRL_RST; + writel(val, gp->regs + PCS_MIICTRL); + + limit = 32; + while (readl(gp->regs + PCS_MIICTRL) & PCS_MIICTRL_RST) { + udelay(100); + if (limit-- <= 0) + break; + } + if (limit <= 0) + printk(KERN_WARNING "%s: PCS reset bit would not clear.\n", + gp->dev->name); + + /* Make sure PCS is disabled while changing advertisement + * configuration. + */ + val = readl(gp->regs + PCS_CFG); + val &= ~(PCS_CFG_ENABLE | PCS_CFG_TO); + writel(val, gp->regs + PCS_CFG); + + /* Advertise all capabilities except assymetric + * pause. + */ + val = readl(gp->regs + PCS_MIIADV); + val |= (PCS_MIIADV_FD | PCS_MIIADV_HD | + PCS_MIIADV_SP | PCS_MIIADV_AP); + writel(val, gp->regs + PCS_MIIADV); + + /* Enable and restart auto-negotiation, disable wrapback/loopback, + * and re-enable PCS. + */ + val = readl(gp->regs + PCS_MIICTRL); + val |= (PCS_MIICTRL_RAN | PCS_MIICTRL_ANE); + val &= ~PCS_MIICTRL_WB; + writel(val, gp->regs + PCS_MIICTRL); + + val = readl(gp->regs + PCS_CFG); + val |= PCS_CFG_ENABLE; + writel(val, gp->regs + PCS_CFG); + + /* Make sure serialink loopback is off. The meaning + * of this bit is logically inverted based upon whether + * you are in Serialink or SERDES mode. + */ + val = readl(gp->regs + PCS_SCTRL); + if (gp->phy_type == phy_serialink) + val &= ~PCS_SCTRL_LOOP; + else + val |= PCS_SCTRL_LOOP; + writel(val, gp->regs + PCS_SCTRL); } /* Default aneg parameters */ @@ -2697,21 +2680,6 @@ static int gem_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) cmd->speed = 0; cmd->duplex = cmd->port = cmd->phy_address = cmd->transceiver = cmd->autoneg = 0; - - /* serdes means usually a Fibre connector, with most fixed */ - if (gp->phy_type == phy_serdes) { - cmd->port = PORT_FIBRE; - cmd->supported = (SUPPORTED_1000baseT_Half | - SUPPORTED_1000baseT_Full | - SUPPORTED_FIBRE | SUPPORTED_Autoneg | - SUPPORTED_Pause | SUPPORTED_Asym_Pause); - cmd->advertising = cmd->supported; - cmd->transceiver = XCVR_INTERNAL; - if (gp->lstate == link_up) - cmd->speed = SPEED_1000; - cmd->duplex = DUPLEX_FULL; - cmd->autoneg = 1; - } } cmd->maxtxpkt = cmd->maxrxpkt = 0; diff --git a/trunk/drivers/pci/intr_remapping.c b/trunk/drivers/pci/intr_remapping.c index f78371b22529..2de5a3238c94 100644 --- a/trunk/drivers/pci/intr_remapping.c +++ b/trunk/drivers/pci/intr_remapping.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include "intr_remapping.h" @@ -20,75 +19,17 @@ struct irq_2_iommu { u8 irte_mask; }; -#ifdef CONFIG_SPARSE_IRQ -static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu) -{ - struct irq_2_iommu *iommu; - int node; - - node = cpu_to_node(cpu); - - iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node); - printk(KERN_DEBUG "alloc irq_2_iommu on cpu %d node %d\n", cpu, node); - - return iommu; -} - -static struct irq_2_iommu *irq_2_iommu(unsigned int irq) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - - if (WARN_ON_ONCE(!desc)) - return NULL; - - return desc->irq_2_iommu; -} - -static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) -{ - struct irq_desc *desc; - struct irq_2_iommu *irq_iommu; - - /* - * alloc irq desc if not allocated already. - */ - desc = irq_to_desc_alloc_cpu(irq, cpu); - if (!desc) { - printk(KERN_INFO "can not get irq_desc for %d\n", irq); - return NULL; - } - - irq_iommu = desc->irq_2_iommu; - - if (!irq_iommu) - desc->irq_2_iommu = get_one_free_irq_2_iommu(cpu); - - return desc->irq_2_iommu; -} - -static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq) -{ - return irq_2_iommu_alloc_cpu(irq, boot_cpu_id); -} - -#else /* !CONFIG_SPARSE_IRQ */ - static struct irq_2_iommu irq_2_iommuX[NR_IRQS]; static struct irq_2_iommu *irq_2_iommu(unsigned int irq) { - if (irq < nr_irqs) - return &irq_2_iommuX[irq]; - - return NULL; + return (irq < nr_irqs) ? irq_2_iommuX + irq : NULL; } + static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq) { return irq_2_iommu(irq); } -#endif static DEFINE_SPINLOCK(irq_2_ir_lock); @@ -145,11 +86,9 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) if (!count) return -1; -#ifndef CONFIG_SPARSE_IRQ /* protect irq_2_iommu_alloc later */ if (irq >= nr_irqs) return -1; -#endif /* * start the IRTE search from index 0. @@ -191,12 +130,6 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) table->base[i].present = 1; irq_iommu = irq_2_iommu_alloc(irq); - if (!irq_iommu) { - spin_unlock(&irq_2_ir_lock); - printk(KERN_ERR "can't allocate irq_2_iommu\n"); - return -1; - } - irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = 0; @@ -244,12 +177,6 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) irq_iommu = irq_2_iommu_alloc(irq); - if (!irq_iommu) { - spin_unlock(&irq_2_ir_lock); - printk(KERN_ERR "can't allocate irq_2_iommu\n"); - return -1; - } - irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = subhandle; diff --git a/trunk/drivers/pci/msi.c b/trunk/drivers/pci/msi.c index 11a51f8ed3b3..74801f7df9c9 100644 --- a/trunk/drivers/pci/msi.c +++ b/trunk/drivers/pci/msi.c @@ -103,11 +103,11 @@ static void msix_set_enable(struct pci_dev *dev, int enable) } } -static void msix_flush_writes(struct irq_desc *desc) +static void msix_flush_writes(unsigned int irq) { struct msi_desc *entry; - entry = get_irq_desc_msi(desc); + entry = get_irq_msi(irq); BUG_ON(!entry || !entry->dev); switch (entry->msi_attrib.type) { case PCI_CAP_ID_MSI: @@ -135,11 +135,11 @@ static void msix_flush_writes(struct irq_desc *desc) * Returns 1 if it succeeded in masking the interrupt and 0 if the device * doesn't support MSI masking. */ -static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag) +static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) { struct msi_desc *entry; - entry = get_irq_desc_msi(desc); + entry = get_irq_msi(irq); BUG_ON(!entry || !entry->dev); switch (entry->msi_attrib.type) { case PCI_CAP_ID_MSI: @@ -172,9 +172,9 @@ static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag) return 1; } -void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) +void read_msi_msg(unsigned int irq, struct msi_msg *msg) { - struct msi_desc *entry = get_irq_desc_msi(desc); + struct msi_desc *entry = get_irq_msi(irq); switch(entry->msi_attrib.type) { case PCI_CAP_ID_MSI: { @@ -211,16 +211,9 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) } } -void read_msi_msg(unsigned int irq, struct msi_msg *msg) -{ - struct irq_desc *desc = irq_to_desc(irq); - - read_msi_msg_desc(desc, msg); -} - -void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) +void write_msi_msg(unsigned int irq, struct msi_msg *msg) { - struct msi_desc *entry = get_irq_desc_msi(desc); + struct msi_desc *entry = get_irq_msi(irq); switch (entry->msi_attrib.type) { case PCI_CAP_ID_MSI: { @@ -259,31 +252,21 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) entry->msg = *msg; } -void write_msi_msg(unsigned int irq, struct msi_msg *msg) -{ - struct irq_desc *desc = irq_to_desc(irq); - - write_msi_msg_desc(desc, msg); -} - void mask_msi_irq(unsigned int irq) { - struct irq_desc *desc = irq_to_desc(irq); - - msi_set_mask_bits(desc, 1, 1); - msix_flush_writes(desc); + msi_set_mask_bits(irq, 1, 1); + msix_flush_writes(irq); } void unmask_msi_irq(unsigned int irq) { - struct irq_desc *desc = irq_to_desc(irq); - - msi_set_mask_bits(desc, 1, 0); - msix_flush_writes(desc); + msi_set_mask_bits(irq, 1, 0); + msix_flush_writes(irq); } static int msi_free_irqs(struct pci_dev* dev); + static struct msi_desc* alloc_msi_entry(void) { struct msi_desc *entry; @@ -320,11 +303,9 @@ static void __pci_restore_msi_state(struct pci_dev *dev) pci_intx_for_msi(dev, 0); msi_set_enable(dev, 0); write_msi_msg(dev->irq, &entry->msg); - if (entry->msi_attrib.maskbit) { - struct irq_desc *desc = irq_to_desc(dev->irq); - msi_set_mask_bits(desc, entry->msi_attrib.maskbits_mask, + if (entry->msi_attrib.maskbit) + msi_set_mask_bits(dev->irq, entry->msi_attrib.maskbits_mask, entry->msi_attrib.masked); - } pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); control &= ~PCI_MSI_FLAGS_QSIZE; @@ -346,9 +327,8 @@ static void __pci_restore_msix_state(struct pci_dev *dev) msix_set_enable(dev, 0); list_for_each_entry(entry, &dev->msi_list, list) { - struct irq_desc *desc = irq_to_desc(entry->irq); write_msi_msg(entry->irq, &entry->msg); - msi_set_mask_bits(desc, 1, entry->msi_attrib.masked); + msi_set_mask_bits(entry->irq, 1, entry->msi_attrib.masked); } BUG_ON(list_empty(&dev->msi_list)); @@ -616,8 +596,7 @@ void pci_msi_shutdown(struct pci_dev* dev) /* Return the the pci reset with msi irqs unmasked */ if (entry->msi_attrib.maskbit) { u32 mask = entry->msi_attrib.maskbits_mask; - struct irq_desc *desc = irq_to_desc(dev->irq); - msi_set_mask_bits(desc, mask, ~mask); + msi_set_mask_bits(dev->irq, mask, ~mask); } if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) return; diff --git a/trunk/drivers/pcmcia/bfin_cf_pcmcia.c b/trunk/drivers/pcmcia/bfin_cf_pcmcia.c index b59d4115d20f..bb7338863fb9 100644 --- a/trunk/drivers/pcmcia/bfin_cf_pcmcia.c +++ b/trunk/drivers/pcmcia/bfin_cf_pcmcia.c @@ -334,6 +334,6 @@ static void __exit bfin_cf_exit(void) module_init(bfin_cf_init); module_exit(bfin_cf_exit); -MODULE_AUTHOR("Michael Hennerich "); +MODULE_AUTHOR("Michael Hennerich ") MODULE_DESCRIPTION("BFIN CF/PCMCIA Driver"); MODULE_LICENSE("GPL"); diff --git a/trunk/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/trunk/drivers/scsi/device_handler/scsi_dh_hp_sw.c index f7da7530875e..9aec4ca64e56 100644 --- a/trunk/drivers/scsi/device_handler/scsi_dh_hp_sw.c +++ b/trunk/drivers/scsi/device_handler/scsi_dh_hp_sw.c @@ -107,7 +107,6 @@ static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h) struct request *req; int ret; -retry: req = blk_get_request(sdev->request_queue, WRITE, GFP_NOIO); if (!req) return SCSI_DH_RES_TEMP_UNAVAIL; @@ -122,6 +121,7 @@ static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h) memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE); req->sense_len = 0; +retry: ret = blk_execute_rq(req->q, NULL, req, 1); if (ret == -EIO) { if (req->sense_len > 0) { @@ -136,10 +136,8 @@ static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h) h->path_state = HP_SW_PATH_ACTIVE; ret = SCSI_DH_OK; } - if (ret == SCSI_DH_IMM_RETRY) { - blk_put_request(req); + if (ret == SCSI_DH_IMM_RETRY) goto retry; - } if (ret == SCSI_DH_DEV_OFFLINED) { h->path_state = HP_SW_PATH_PASSIVE; ret = SCSI_DH_OK; @@ -202,7 +200,6 @@ static int hp_sw_start_stop(struct scsi_device *sdev, struct hp_sw_dh_data *h) struct request *req; int ret, retry; -retry: req = blk_get_request(sdev->request_queue, WRITE, GFP_NOIO); if (!req) return SCSI_DH_RES_TEMP_UNAVAIL; @@ -219,6 +216,7 @@ static int hp_sw_start_stop(struct scsi_device *sdev, struct hp_sw_dh_data *h) req->sense_len = 0; retry = h->retries; +retry: ret = blk_execute_rq(req->q, NULL, req, 1); if (ret == -EIO) { if (req->sense_len > 0) { @@ -233,10 +231,8 @@ static int hp_sw_start_stop(struct scsi_device *sdev, struct hp_sw_dh_data *h) ret = SCSI_DH_OK; if (ret == SCSI_DH_RETRY) { - if (--retry) { - blk_put_request(req); + if (--retry) goto retry; - } ret = SCSI_DH_IO; } diff --git a/trunk/drivers/sh/maple/maple.c b/trunk/drivers/sh/maple/maple.c index 63f0de29aa14..d1812d32f47d 100644 --- a/trunk/drivers/sh/maple/maple.c +++ b/trunk/drivers/sh/maple/maple.c @@ -827,7 +827,7 @@ static int __init maple_bus_init(void) maple_queue_cache = kmem_cache_create("maple_queue_cache", 0x400, 0, - SLAB_HWCACHE_ALIGN, NULL); + SLAB_POISON|SLAB_HWCACHE_ALIGN, NULL); if (!maple_queue_cache) goto cleanup_bothirqs; diff --git a/trunk/drivers/staging/Kconfig b/trunk/drivers/staging/Kconfig index 5d457c96bd7e..c95b286a1239 100644 --- a/trunk/drivers/staging/Kconfig +++ b/trunk/drivers/staging/Kconfig @@ -22,8 +22,6 @@ menuconfig STAGING If in doubt, say N here. -if STAGING - config STAGING_EXCLUDE_BUILD bool "Exclude Staging drivers from being built" if STAGING default y @@ -64,4 +62,3 @@ source "drivers/staging/at76_usb/Kconfig" source "drivers/staging/poch/Kconfig" endif # !STAGING_EXCLUDE_BUILD -endif # STAGING diff --git a/trunk/drivers/usb/class/usbtmc.c b/trunk/drivers/usb/class/usbtmc.c index 43a863c5cc43..8e74657f106c 100644 --- a/trunk/drivers/usb/class/usbtmc.c +++ b/trunk/drivers/usb/class/usbtmc.c @@ -51,7 +51,6 @@ static struct usb_device_id usbtmc_devices[] = { { USB_INTERFACE_INFO(USB_CLASS_APP_SPEC, 3, 0), }, { 0, } /* terminating entry */ }; -MODULE_DEVICE_TABLE(usb, usbtmc_devices); /* * This structure is the capabilities for the device diff --git a/trunk/drivers/usb/core/driver.c b/trunk/drivers/usb/core/driver.c index 8c081308b0e2..3d7793d93031 100644 --- a/trunk/drivers/usb/core/driver.c +++ b/trunk/drivers/usb/core/driver.c @@ -279,9 +279,7 @@ static int usb_unbind_interface(struct device *dev) * altsetting means creating new endpoint device entries). * When either of these happens, defer the Set-Interface. */ - if (intf->cur_altsetting->desc.bAlternateSetting == 0) - ; /* Already in altsetting 0 so skip Set-Interface */ - else if (!error && intf->dev.power.status == DPM_ON) + if (!error && intf->dev.power.status == DPM_ON) usb_set_interface(udev, intf->altsetting[0]. desc.bInterfaceNumber, 0); else diff --git a/trunk/drivers/usb/gadget/f_rndis.c b/trunk/drivers/usb/gadget/f_rndis.c index 3a8bb53fc473..428b5993575a 100644 --- a/trunk/drivers/usb/gadget/f_rndis.c +++ b/trunk/drivers/usb/gadget/f_rndis.c @@ -651,8 +651,6 @@ rndis_bind(struct usb_configuration *c, struct usb_function *f) fs_in_desc.bEndpointAddress; hs_out_desc.bEndpointAddress = fs_out_desc.bEndpointAddress; - hs_notify_desc.bEndpointAddress = - fs_notify_desc.bEndpointAddress; /* copy descriptors, and track endpoint copies */ f->hs_descriptors = usb_copy_descriptors(eth_hs_function); @@ -664,8 +662,6 @@ rndis_bind(struct usb_configuration *c, struct usb_function *f) f->hs_descriptors, &hs_in_desc); rndis->hs.out = usb_find_endpoint(eth_hs_function, f->hs_descriptors, &hs_out_desc); - rndis->hs.notify = usb_find_endpoint(eth_hs_function, - f->hs_descriptors, &hs_notify_desc); } rndis->port.open = rndis_open; diff --git a/trunk/drivers/usb/serial/ftdi_sio.c b/trunk/drivers/usb/serial/ftdi_sio.c index fb6f2933b01b..aad1359a3eb1 100644 --- a/trunk/drivers/usb/serial/ftdi_sio.c +++ b/trunk/drivers/usb/serial/ftdi_sio.c @@ -143,7 +143,6 @@ static struct ftdi_sio_quirk ftdi_HE_TIRA1_quirk = { static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_AMC232_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CANUSB_PID) }, - { USB_DEVICE(FTDI_VID, FTDI_CANDAPTER_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_0_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_2_PID) }, @@ -167,7 +166,6 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_PID) }, { USB_DEVICE(INTERBIOMETRICS_VID, INTERBIOMETRICS_IOBOARD_PID) }, { USB_DEVICE(INTERBIOMETRICS_VID, INTERBIOMETRICS_MINI_IOBOARD_PID) }, - { USB_DEVICE(FTDI_VID, FTDI_SPROG_II) }, { USB_DEVICE(FTDI_VID, FTDI_XF_632_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_634_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_547_PID) }, diff --git a/trunk/drivers/usb/serial/ftdi_sio.h b/trunk/drivers/usb/serial/ftdi_sio.h index 373ee09975bb..07a3992abad2 100644 --- a/trunk/drivers/usb/serial/ftdi_sio.h +++ b/trunk/drivers/usb/serial/ftdi_sio.h @@ -40,9 +40,6 @@ /* AlphaMicro Components AMC-232USB01 device */ #define FTDI_AMC232_PID 0xFF00 /* Product Id */ -/* www.candapter.com Ewert Energy Systems CANdapter device */ -#define FTDI_CANDAPTER_PID 0x9F80 /* Product Id */ - /* SCS HF Radio Modems PID's (http://www.scs-ptc.com) */ /* the VID is the standard ftdi vid (FTDI_VID) */ #define FTDI_SCS_DEVICE_0_PID 0xD010 /* SCS PTC-IIusb */ @@ -78,9 +75,6 @@ /* OpenDCC (www.opendcc.de) product id */ #define FTDI_OPENDCC_PID 0xBFD8 -/* Sprog II (Andrew Crosland's SprogII DCC interface) */ -#define FTDI_SPROG_II 0xF0C8 - /* www.crystalfontz.com devices - thanx for providing free devices for evaluation ! */ /* they use the ftdi chipset for the USB interface and the vendor id is the same */ #define FTDI_XF_632_PID 0xFC08 /* 632: 16x2 Character Display */ diff --git a/trunk/drivers/usb/serial/pl2303.c b/trunk/drivers/usb/serial/pl2303.c index 1aed584be5eb..491c8857b644 100644 --- a/trunk/drivers/usb/serial/pl2303.c +++ b/trunk/drivers/usb/serial/pl2303.c @@ -91,8 +91,6 @@ static struct usb_device_id id_table [] = { { USB_DEVICE(WS002IN_VENDOR_ID, WS002IN_PRODUCT_ID) }, { USB_DEVICE(COREGA_VENDOR_ID, COREGA_PRODUCT_ID) }, { USB_DEVICE(YCCABLE_VENDOR_ID, YCCABLE_PRODUCT_ID) }, - { USB_DEVICE(SUPERIAL_VENDOR_ID, SUPERIAL_PRODUCT_ID) }, - { USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) }, { } /* Terminating entry */ }; diff --git a/trunk/drivers/usb/serial/pl2303.h b/trunk/drivers/usb/serial/pl2303.h index 54974f446a8c..a3bd039c78e9 100644 --- a/trunk/drivers/usb/serial/pl2303.h +++ b/trunk/drivers/usb/serial/pl2303.h @@ -110,11 +110,3 @@ /* Y.C. Cable U.S.A., Inc - USB to RS-232 */ #define YCCABLE_VENDOR_ID 0x05ad #define YCCABLE_PRODUCT_ID 0x0fba - -/* "Superial" USB - Serial */ -#define SUPERIAL_VENDOR_ID 0x5372 -#define SUPERIAL_PRODUCT_ID 0x2303 - -/* Hewlett-Packard LD220-HP POS Pole Display */ -#define HP_VENDOR_ID 0x03f0 -#define HP_LD220_PRODUCT_ID 0x3524 diff --git a/trunk/drivers/usb/serial/ti_usb_3410_5052.c b/trunk/drivers/usb/serial/ti_usb_3410_5052.c index 01d0c70d60e9..31c42d1cae13 100644 --- a/trunk/drivers/usb/serial/ti_usb_3410_5052.c +++ b/trunk/drivers/usb/serial/ti_usb_3410_5052.c @@ -16,6 +16,56 @@ * For questions or problems with this driver, contact Texas Instruments * technical support, or Al Borchers , or * Peter Berger . + * + * This driver needs this hotplug script in /etc/hotplug/usb/ti_usb_3410_5052 + * or in /etc/hotplug.d/usb/ti_usb_3410_5052.hotplug to set the device + * configuration. + * + * #!/bin/bash + * + * BOOT_CONFIG=1 + * ACTIVE_CONFIG=2 + * + * if [[ "$ACTION" != "add" ]] + * then + * exit + * fi + * + * CONFIG_PATH=/sys${DEVPATH%/?*}/bConfigurationValue + * + * if [[ 0`cat $CONFIG_PATH` -ne $BOOT_CONFIG ]] + * then + * exit + * fi + * + * PRODUCT=${PRODUCT%/?*} # delete version + * VENDOR_ID=`printf "%d" 0x${PRODUCT%/?*}` + * PRODUCT_ID=`printf "%d" 0x${PRODUCT#*?/}` + * + * PARAM_PATH=/sys/module/ti_usb_3410_5052/parameters + * + * function scan() { + * s=$1 + * shift + * for i + * do + * if [[ $s -eq $i ]] + * then + * return 0 + * fi + * done + * return 1 + * } + * + * IFS=$IFS, + * + * if (scan $VENDOR_ID 1105 `cat $PARAM_PATH/vendor_3410` && + * scan $PRODUCT_ID 13328 `cat $PARAM_PATH/product_3410`) || + * (scan $VENDOR_ID 1105 `cat $PARAM_PATH/vendor_5052` && + * scan $PRODUCT_ID 20562 20818 20570 20575 `cat $PARAM_PATH/product_5052`) + * then + * echo $ACTIVE_CONFIG > $CONFIG_PATH + * fi */ #include @@ -407,10 +457,9 @@ static int ti_startup(struct usb_serial *serial) goto free_tdev; } - /* the second configuration must be set */ + /* the second configuration must be set (in sysfs by hotplug script) */ if (dev->actconfig->desc.bConfigurationValue == TI_BOOT_CONFIG) { - status = usb_driver_set_configuration(dev, TI_ACTIVE_CONFIG); - status = status ? status : -ENODEV; + status = -ENODEV; goto free_tdev; } diff --git a/trunk/drivers/usb/storage/unusual_devs.h b/trunk/drivers/usb/storage/unusual_devs.h index bfcc1fe82518..e61f2bfc64ad 100644 --- a/trunk/drivers/usb/storage/unusual_devs.h +++ b/trunk/drivers/usb/storage/unusual_devs.h @@ -167,22 +167,8 @@ UNUSUAL_DEV( 0x0421, 0x005d, 0x0001, 0x0600, US_SC_DEVICE, US_PR_DEVICE, NULL, US_FL_FIX_CAPACITY ), -/* Reported by Ozan Sener */ -UNUSUAL_DEV( 0x0421, 0x0060, 0x0551, 0x0551, - "Nokia", - "3500c", - US_SC_DEVICE, US_PR_DEVICE, NULL, - US_FL_FIX_CAPACITY ), - -/* Reported by CSECSY Laszlo */ -UNUSUAL_DEV( 0x0421, 0x0063, 0x0001, 0x0601, - "Nokia", - "Nokia 3109c", - US_SC_DEVICE, US_PR_DEVICE, NULL, - US_FL_FIX_CAPACITY ), - /* Patch for Nokia 5310 capacity */ -UNUSUAL_DEV( 0x0421, 0x006a, 0x0000, 0x0701, +UNUSUAL_DEV( 0x0421, 0x006a, 0x0000, 0x0591, "Nokia", "5310", US_SC_DEVICE, US_PR_DEVICE, NULL, diff --git a/trunk/drivers/xen/events.c b/trunk/drivers/xen/events.c index 6c8193046e0d..eba5ec5b020e 100644 --- a/trunk/drivers/xen/events.c +++ b/trunk/drivers/xen/events.c @@ -141,12 +141,8 @@ static void init_evtchn_cpu_bindings(void) int i; /* By default all event channels notify CPU#0. */ - for_each_irq_desc(i, desc) { - if (!desc) - continue; - + for_each_irq_desc(i, desc) desc->affinity = cpumask_of_cpu(0); - } #endif memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); @@ -235,7 +231,7 @@ static int find_unbound_irq(void) int irq; /* Only allocate from dynirq range */ - for (irq = 0; irq < nr_irqs; irq++) + for_each_irq_nr(irq) if (irq_bindcount[irq] == 0) break; @@ -796,7 +792,7 @@ void xen_irq_resume(void) mask_evtchn(evtchn); /* No IRQ <-> event-channel mappings. */ - for (irq = 0; irq < nr_irqs; irq++) + for_each_irq_nr(irq) irq_info[irq].evtchn = 0; /* zap event-channel binding */ for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) @@ -828,7 +824,7 @@ void __init xen_init_IRQ(void) mask_evtchn(i); /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ - for (i = 0; i < nr_irqs; i++) + for_each_irq_nr(i) irq_bindcount[i] = 0; irq_ctx_init(smp_processor_id()); diff --git a/trunk/fs/bio.c b/trunk/fs/bio.c index df99c882b807..77a55bcceedb 100644 --- a/trunk/fs/bio.c +++ b/trunk/fs/bio.c @@ -26,11 +26,8 @@ #include #include #include -#include #include /* for struct sg_iovec */ -DEFINE_TRACE(block_split); - static struct kmem_cache *bio_slab __read_mostly; static mempool_t *bio_split_pool __read_mostly; @@ -1266,7 +1263,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) if (!bp) return bp; - trace_block_split(bdev_get_queue(bi->bi_bdev), bi, + blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi, bi->bi_sector + first_sectors); BUG_ON(bi->bi_vcnt != 1); diff --git a/trunk/fs/cifs/cifssmb.c b/trunk/fs/cifs/cifssmb.c index 6d51696dc762..2af8626ced43 100644 --- a/trunk/fs/cifs/cifssmb.c +++ b/trunk/fs/cifs/cifssmb.c @@ -3983,8 +3983,7 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, node->flags = le16_to_cpu(pSMBr->DFSFlags); if (is_unicode) { - __le16 *tmp = kmalloc(strlen(searchName)*2 + 2, - GFP_KERNEL); + __le16 *tmp = kmalloc(strlen(searchName)*2, GFP_KERNEL); cifsConvertToUCS((__le16 *) tmp, searchName, PATH_MAX, nls_codepage, remap); node->path_consumed = hostlen_fromUCS(tmp, diff --git a/trunk/fs/ocfs2/ocfs2_fs.h b/trunk/fs/ocfs2/ocfs2_fs.h index 5e0c0d0aef7d..5f180cf7abbd 100644 --- a/trunk/fs/ocfs2/ocfs2_fs.h +++ b/trunk/fs/ocfs2/ocfs2_fs.h @@ -86,8 +86,7 @@ #define OCFS2_CLEAR_INCOMPAT_FEATURE(sb,mask) \ OCFS2_SB(sb)->s_feature_incompat &= ~(mask) -#define OCFS2_FEATURE_COMPAT_SUPP (OCFS2_FEATURE_COMPAT_BACKUP_SB \ - | OCFS2_FEATURE_COMPAT_JBD2_SB) +#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ @@ -153,11 +152,6 @@ */ #define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001 -/* - * The filesystem will correctly handle journal feature bits. - */ -#define OCFS2_FEATURE_COMPAT_JBD2_SB 0x0002 - /* * Unwritten extents support. */ diff --git a/trunk/fs/ocfs2/xattr.c b/trunk/fs/ocfs2/xattr.c index 74d7367ade13..054e2efb0b7e 100644 --- a/trunk/fs/ocfs2/xattr.c +++ b/trunk/fs/ocfs2/xattr.c @@ -2645,9 +2645,9 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode, return ret; } + i = xs->here - old_xh->xh_entries; + xs->here = &xs->header->xh_entries[i]; } - i = xs->here - old_xh->xh_entries; - xs->here = &xs->header->xh_entries[i]; } return ret; diff --git a/trunk/fs/proc/stat.c b/trunk/fs/proc/stat.c index 3bb1cf1e7425..81904f07679d 100644 --- a/trunk/fs/proc/stat.c +++ b/trunk/fs/proc/stat.c @@ -44,13 +44,10 @@ static int show_stat(struct seq_file *p, void *v) softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); - for_each_irq_nr(j) { -#ifdef CONFIG_SPARSE_IRQ - if (!irq_to_desc(j)) - continue; -#endif + + for_each_irq_nr(j) sum += kstat_irqs_cpu(j, i); - } + sum += arch_irq_stat_cpu(i); } sum += arch_irq_stat(); @@ -95,12 +92,7 @@ static int show_stat(struct seq_file *p, void *v) /* sum again ? it could be updated? */ for_each_irq_nr(j) { per_irq_sum = 0; -#ifdef CONFIG_SPARSE_IRQ - if (!irq_to_desc(j)) { - seq_printf(p, " %u", per_irq_sum); - continue; - } -#endif + for_each_possible_cpu(i) per_irq_sum += kstat_irqs_cpu(j, i); diff --git a/trunk/fs/seq_file.c b/trunk/fs/seq_file.c index 16c211558c22..eba2eabcd2b8 100644 --- a/trunk/fs/seq_file.c +++ b/trunk/fs/seq_file.c @@ -357,18 +357,7 @@ int seq_printf(struct seq_file *m, const char *f, ...) } EXPORT_SYMBOL(seq_printf); -/** - * mangle_path - mangle and copy path to buffer beginning - * @s: buffer start - * @p: beginning of path in above buffer - * @esc: set of characters that need escaping - * - * Copy the path from @p to @s, replacing each occurrence of character from - * @esc with usual octal escape. - * Returns pointer past last written character in @s, or NULL in case of - * failure. - */ -char *mangle_path(char *s, char *p, char *esc) +static char *mangle_path(char *s, char *p, char *esc) { while (s <= p) { char c = *p++; @@ -387,7 +376,6 @@ char *mangle_path(char *s, char *p, char *esc) } return NULL; } -EXPORT_SYMBOL(mangle_path); /* * return the absolute path of 'dentry' residing in mount 'mnt'. diff --git a/trunk/include/asm-generic/vmlinux.lds.h b/trunk/include/asm-generic/vmlinux.lds.h index eba835a2c2cd..80744606bad1 100644 --- a/trunk/include/asm-generic/vmlinux.lds.h +++ b/trunk/include/asm-generic/vmlinux.lds.h @@ -45,22 +45,6 @@ #define MCOUNT_REC() #endif -#ifdef CONFIG_TRACE_BRANCH_PROFILING -#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_annotated_branch_profile) = .; \ - *(_ftrace_annotated_branch) \ - VMLINUX_SYMBOL(__stop_annotated_branch_profile) = .; -#else -#define LIKELY_PROFILE() -#endif - -#ifdef CONFIG_PROFILE_ALL_BRANCHES -#define BRANCH_PROFILE() VMLINUX_SYMBOL(__start_branch_profile) = .; \ - *(_ftrace_branch) \ - VMLINUX_SYMBOL(__stop_branch_profile) = .; -#else -#define BRANCH_PROFILE() -#endif - /* .data section */ #define DATA_DATA \ *(.data) \ @@ -76,12 +60,9 @@ VMLINUX_SYMBOL(__start___markers) = .; \ *(__markers) \ VMLINUX_SYMBOL(__stop___markers) = .; \ - . = ALIGN(32); \ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ - VMLINUX_SYMBOL(__stop___tracepoints) = .; \ - LIKELY_PROFILE() \ - BRANCH_PROFILE() + VMLINUX_SYMBOL(__stop___tracepoints) = .; #define RO_DATA(align) \ . = ALIGN((align)); \ diff --git a/trunk/include/asm-m32r/system.h b/trunk/include/asm-m32r/system.h index c980f5ba8de7..70a57c8c002b 100644 --- a/trunk/include/asm-m32r/system.h +++ b/trunk/include/asm-m32r/system.h @@ -23,7 +23,7 @@ */ #if defined(CONFIG_FRAME_POINTER) || \ - !defined(CONFIG_SCHED_OMIT_FRAME_POINTER) + !defined(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER) #define M32R_PUSH_FP " push fp\n" #define M32R_POP_FP " pop fp\n" #else diff --git a/trunk/include/linux/Kbuild b/trunk/include/linux/Kbuild index 95ac82340c3b..e531783e5d78 100644 --- a/trunk/include/linux/Kbuild +++ b/trunk/include/linux/Kbuild @@ -313,7 +313,6 @@ unifdef-y += ptrace.h unifdef-y += qnx4_fs.h unifdef-y += quota.h unifdef-y += random.h -unifdef-y += irqnr.h unifdef-y += reboot.h unifdef-y += reiserfs_fs.h unifdef-y += reiserfs_xattr.h diff --git a/trunk/include/linux/blktrace_api.h b/trunk/include/linux/blktrace_api.h index 1dba3493d520..bdf505d33e77 100644 --- a/trunk/include/linux/blktrace_api.h +++ b/trunk/include/linux/blktrace_api.h @@ -160,6 +160,7 @@ struct blk_trace { extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); +extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *); extern int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct blk_user_trace_setup *buts); extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); @@ -185,8 +186,168 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); } while (0) #define BLK_TN_MAX_MSG 128 -extern void blk_add_driver_data(struct request_queue *q, struct request *rq, - void *data, size_t len); +/** + * blk_add_trace_rq - Add a trace for a request oriented action + * @q: queue the io is for + * @rq: the source request + * @what: the action + * + * Description: + * Records an action against a request. Will log the bio offset + size. + * + **/ +static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq, + u32 what) +{ + struct blk_trace *bt = q->blk_trace; + int rw = rq->cmd_flags & 0x03; + + if (likely(!bt)) + return; + + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + + if (blk_pc_request(rq)) { + what |= BLK_TC_ACT(BLK_TC_PC); + __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd); + } else { + what |= BLK_TC_ACT(BLK_TC_FS); + __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL); + } +} + +/** + * blk_add_trace_bio - Add a trace for a bio oriented action + * @q: queue the io is for + * @bio: the source bio + * @what: the action + * + * Description: + * Records an action against a bio. Will log the bio offset + size. + * + **/ +static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio, + u32 what) +{ + struct blk_trace *bt = q->blk_trace; + + if (likely(!bt)) + return; + + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL); +} + +/** + * blk_add_trace_generic - Add a trace for a generic action + * @q: queue the io is for + * @bio: the source bio + * @rw: the data direction + * @what: the action + * + * Description: + * Records a simple trace + * + **/ +static inline void blk_add_trace_generic(struct request_queue *q, + struct bio *bio, int rw, u32 what) +{ + struct blk_trace *bt = q->blk_trace; + + if (likely(!bt)) + return; + + if (bio) + blk_add_trace_bio(q, bio, what); + else + __blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL); +} + +/** + * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload + * @q: queue the io is for + * @what: the action + * @bio: the source bio + * @pdu: the integer payload + * + * Description: + * Adds a trace with some integer payload. This might be an unplug + * option given as the action, with the depth at unplug time given + * as the payload + * + **/ +static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what, + struct bio *bio, unsigned int pdu) +{ + struct blk_trace *bt = q->blk_trace; + __be64 rpdu = cpu_to_be64(pdu); + + if (likely(!bt)) + return; + + if (bio) + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu); + else + __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu); +} + +/** + * blk_add_trace_remap - Add a trace for a remap operation + * @q: queue the io is for + * @bio: the source bio + * @dev: target device + * @from: source sector + * @to: target sector + * + * Description: + * Device mapper or raid target sometimes need to split a bio because + * it spans a stripe (or similar). Add a trace for that action. + * + **/ +static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio, + dev_t dev, sector_t from, sector_t to) +{ + struct blk_trace *bt = q->blk_trace; + struct blk_io_trace_remap r; + + if (likely(!bt)) + return; + + r.device = cpu_to_be32(dev); + r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); + r.sector = cpu_to_be64(to); + + __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); +} + +/** + * blk_add_driver_data - Add binary message with driver-specific data + * @q: queue the io is for + * @rq: io request + * @data: driver-specific data + * @len: length of driver-specific data + * + * Description: + * Some drivers might want to write driver-specific data per request. + * + **/ +static inline void blk_add_driver_data(struct request_queue *q, + struct request *rq, + void *data, size_t len) +{ + struct blk_trace *bt = q->blk_trace; + + if (likely(!bt)) + return; + + if (blk_pc_request(rq)) + __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, + rq->errors, len, data); + else + __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, + 0, BLK_TA_DRV_DATA, rq->errors, len, data); +} + extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); @@ -195,8 +356,13 @@ extern int blk_trace_remove(struct request_queue *q); #else /* !CONFIG_BLK_DEV_IO_TRACE */ #define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) #define blk_trace_shutdown(q) do { } while (0) -#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) +#define blk_add_trace_rq(q, rq, what) do { } while (0) +#define blk_add_trace_bio(q, rq, what) do { } while (0) +#define blk_add_trace_generic(q, rq, rw, what) do { } while (0) +#define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0) +#define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0) #define blk_add_driver_data(q, rq, data, len) do {} while (0) +#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) #define blk_trace_setup(q, name, dev, arg) (-ENOTTY) #define blk_trace_startstop(q, start) (-ENOTTY) #define blk_trace_remove(q) (-ENOTTY) diff --git a/trunk/include/linux/compiler.h b/trunk/include/linux/compiler.h index ea7c6be354b7..98115d9d04da 100644 --- a/trunk/include/linux/compiler.h +++ b/trunk/include/linux/compiler.h @@ -59,88 +59,8 @@ extern void __chk_io_ptr(const volatile void __iomem *); * specific implementations come from the above header files */ -struct ftrace_branch_data { - const char *func; - const char *file; - unsigned line; - union { - struct { - unsigned long correct; - unsigned long incorrect; - }; - struct { - unsigned long miss; - unsigned long hit; - }; - }; -}; - -/* - * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code - * to disable branch tracing on a per file basis. - */ -#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING) -void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); - -#define likely_notrace(x) __builtin_expect(!!(x), 1) -#define unlikely_notrace(x) __builtin_expect(!!(x), 0) - -#define __branch_check__(x, expect) ({ \ - int ______r; \ - static struct ftrace_branch_data \ - __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_annotated_branch"))) \ - ______f = { \ - .func = __func__, \ - .file = __FILE__, \ - .line = __LINE__, \ - }; \ - ______r = likely_notrace(x); \ - ftrace_likely_update(&______f, ______r, expect); \ - ______r; \ - }) - -/* - * Using __builtin_constant_p(x) to ignore cases where the return - * value is always the same. This idea is taken from a similar patch - * written by Daniel Walker. - */ -# ifndef likely -# define likely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 1)) -# endif -# ifndef unlikely -# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0)) -# endif - -#ifdef CONFIG_PROFILE_ALL_BRANCHES -/* - * "Define 'is'", Bill Clinton - * "Define 'if'", Steven Rostedt - */ -#define if(cond) if (__builtin_constant_p((cond)) ? !!(cond) : \ - ({ \ - int ______r; \ - static struct ftrace_branch_data \ - __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_branch"))) \ - ______f = { \ - .func = __func__, \ - .file = __FILE__, \ - .line = __LINE__, \ - }; \ - ______r = !!(cond); \ - if (______r) \ - ______f.hit++; \ - else \ - ______f.miss++; \ - ______r; \ - })) -#endif /* CONFIG_PROFILE_ALL_BRANCHES */ - -#else -# define likely(x) __builtin_expect(!!(x), 1) -# define unlikely(x) __builtin_expect(!!(x), 0) -#endif +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) /* Optimization barrier */ #ifndef barrier diff --git a/trunk/include/linux/cpumask.h b/trunk/include/linux/cpumask.h index d4bf52603e6b..b5ad19a6f43f 100644 --- a/trunk/include/linux/cpumask.h +++ b/trunk/include/linux/cpumask.h @@ -1025,6 +1025,7 @@ static inline size_t cpumask_size(void) #ifdef CONFIG_CPUMASK_OFFSTACK typedef struct cpumask *cpumask_var_t; +bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); @@ -1038,6 +1039,12 @@ static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) return true; } +static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, + int node) +{ + return true; +} + static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) { } diff --git a/trunk/include/linux/debug_locks.h b/trunk/include/linux/debug_locks.h index 096476f1fb35..4aaa4afb1cb9 100644 --- a/trunk/include/linux/debug_locks.h +++ b/trunk/include/linux/debug_locks.h @@ -17,7 +17,7 @@ extern int debug_locks_off(void); ({ \ int __ret = 0; \ \ - if (!oops_in_progress && unlikely(c)) { \ + if (unlikely(c)) { \ if (debug_locks_off() && !debug_locks_silent) \ WARN_ON(1); \ __ret = 1; \ diff --git a/trunk/include/linux/ftrace.h b/trunk/include/linux/ftrace.h index 985b28dc2ba9..9c5bc6be2b09 100644 --- a/trunk/include/linux/ftrace.h +++ b/trunk/include/linux/ftrace.h @@ -8,8 +8,6 @@ #include #include #include -#include -#include #ifdef CONFIG_FUNCTION_TRACER @@ -26,45 +24,6 @@ struct ftrace_ops { struct ftrace_ops *next; }; -extern int function_trace_stop; - -/* - * Type of the current tracing. - */ -enum ftrace_tracing_type_t { - FTRACE_TYPE_ENTER = 0, /* Hook the call of the function */ - FTRACE_TYPE_RETURN, /* Hook the return of the function */ -}; - -/* Current tracing type, default is FTRACE_TYPE_ENTER */ -extern enum ftrace_tracing_type_t ftrace_tracing_type; - -/** - * ftrace_stop - stop function tracer. - * - * A quick way to stop the function tracer. Note this an on off switch, - * it is not something that is recursive like preempt_disable. - * This does not disable the calling of mcount, it only stops the - * calling of functions from mcount. - */ -static inline void ftrace_stop(void) -{ - function_trace_stop = 1; -} - -/** - * ftrace_start - start the function tracer. - * - * This function is the inverse of ftrace_stop. This does not enable - * the function tracing if the function tracer is disabled. This only - * sets the function tracer flag to continue calling the functions - * from mcount. - */ -static inline void ftrace_start(void) -{ - function_trace_stop = 0; -} - /* * The ftrace_ops must be a static and should also * be read_mostly. These functions do modify read_mostly variables @@ -83,13 +42,9 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1); # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) static inline void ftrace_kill(void) { } -static inline void ftrace_stop(void) { } -static inline void ftrace_start(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE -/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ -#include enum { FTRACE_FL_FREE = (1 << 0), @@ -105,7 +60,6 @@ struct dyn_ftrace { struct list_head list; unsigned long ip; /* address of mcount call-site */ unsigned long flags; - struct dyn_arch_ftrace arch; }; int ftrace_force_update(void); @@ -113,25 +67,19 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset); /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); +extern unsigned char *ftrace_nop_replace(void); +extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); extern int ftrace_dyn_arch_init(void *data); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -extern void ftrace_graph_caller(void); -extern int ftrace_enable_ftrace_graph_caller(void); -extern int ftrace_disable_ftrace_graph_caller(void); -#else -static inline int ftrace_enable_ftrace_graph_caller(void) { return 0; } -static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; } -#endif /** - * ftrace_make_nop - convert code into top - * @mod: module structure if called by module load initialization - * @rec: the mcount call site record - * @addr: the address that the call site should be calling + * ftrace_modify_code - modify code segment + * @ip: the address of the code segment + * @old_code: the contents of what is expected to be there + * @new_code: the code to patch in * * This is a very sensitive operation and great care needs * to be taken by the arch. The operation should carefully @@ -139,8 +87,6 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; } * what we expect it to be, and then on success of the compare, * it should write to the location. * - * The code segment at @rec->ip should be a caller to @addr - * * Return must be: * 0 on success * -EFAULT on error reading the location @@ -148,34 +94,8 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; } * -EPERM on error writing to the location * Any other value will be considered a failure. */ -extern int ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr); - -/** - * ftrace_make_call - convert a nop call site into a call to addr - * @rec: the mcount call site record - * @addr: the address that the call site should call - * - * This is a very sensitive operation and great care needs - * to be taken by the arch. The operation should carefully - * read the location, check to see if what is read is indeed - * what we expect it to be, and then on success of the compare, - * it should write to the location. - * - * The code segment at @rec->ip should be a nop - * - * Return must be: - * 0 on success - * -EFAULT on error reading the location - * -EINVAL on a failed compare of the contents - * -EPERM on error writing to the location - * Any other value will be considered a failure. - */ -extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); - - -/* May be defined in arch */ -extern int ftrace_arch_read_dyn_info(char *buf, int size); +extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code); extern int skip_trace(unsigned long ip); @@ -183,6 +103,7 @@ extern void ftrace_release(void *start, unsigned long size); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); + #else # define skip_trace(ip) ({ 0; }) # define ftrace_force_update() ({ 0; }) @@ -261,12 +182,6 @@ static inline void __ftrace_enabled_restore(int enabled) #endif #ifdef CONFIG_TRACING -extern int ftrace_dump_on_oops; - -extern void tracing_start(void); -extern void tracing_stop(void); -extern void ftrace_off_permanent(void); - extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); @@ -297,9 +212,6 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } static inline int ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); -static inline void tracing_start(void) { } -static inline void tracing_stop(void) { } -static inline void ftrace_off_permanent(void) { } static inline int ftrace_printk(const char *fmt, ...) { @@ -310,167 +222,33 @@ static inline void ftrace_dump(void) { } #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); -extern void ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end); +extern void ftrace_init_module(unsigned long *start, unsigned long *end); #else static inline void ftrace_init(void) { } static inline void -ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end) { } +ftrace_init_module(unsigned long *start, unsigned long *end) { } #endif -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; - -struct power_trace { -#ifdef CONFIG_POWER_TRACER - ktime_t stamp; - ktime_t end; - int type; - int state; -#endif -}; - -#ifdef CONFIG_POWER_TRACER -extern void trace_power_start(struct power_trace *it, unsigned int type, - unsigned int state); -extern void trace_power_mark(struct power_trace *it, unsigned int type, - unsigned int state); -extern void trace_power_end(struct power_trace *it); -#else -static inline void trace_power_start(struct power_trace *it, unsigned int type, - unsigned int state) { } -static inline void trace_power_mark(struct power_trace *it, unsigned int type, - unsigned int state) { } -static inline void trace_power_end(struct power_trace *it) { } -#endif - - -/* - * Structure that defines an entry function trace. - */ -struct ftrace_graph_ent { - unsigned long func; /* Current function */ - int depth; -}; -/* - * Structure that defines a return function trace. - */ -struct ftrace_graph_ret { - unsigned long func; /* Current function */ - unsigned long long calltime; - unsigned long long rettime; - /* Number of functions that overran the depth limit for current task */ - unsigned long overrun; - int depth; +struct boot_trace { + pid_t caller; + char func[KSYM_SYMBOL_LEN]; + int result; + unsigned long long duration; /* usecs */ + ktime_t calltime; + ktime_t rettime; }; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - -/* - * Sometimes we don't want to trace a function with the function - * graph tracer but we want them to keep traced by the usual function - * tracer if the function graph tracer is not configured. - */ -#define __notrace_funcgraph notrace - -#define FTRACE_RETFUNC_DEPTH 50 -#define FTRACE_RETSTACK_ALLOC_SIZE 32 -/* Type of the callback handlers for tracing function graph*/ -typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ -typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ - -extern int register_ftrace_graph(trace_func_graph_ret_t retfunc, - trace_func_graph_ent_t entryfunc); - -extern void ftrace_graph_stop(void); - -/* The current handlers in use */ -extern trace_func_graph_ret_t ftrace_graph_return; -extern trace_func_graph_ent_t ftrace_graph_entry; - -extern void unregister_ftrace_graph(void); - -extern void ftrace_graph_init_task(struct task_struct *t); -extern void ftrace_graph_exit_task(struct task_struct *t); - -static inline int task_curr_ret_stack(struct task_struct *t) -{ - return t->curr_ret_stack; -} - -static inline void pause_graph_tracing(void) -{ - atomic_inc(¤t->tracing_graph_pause); -} - -static inline void unpause_graph_tracing(void) -{ - atomic_dec(¤t->tracing_graph_pause); -} +#ifdef CONFIG_BOOT_TRACER +extern void trace_boot(struct boot_trace *it, initcall_t fn); +extern void start_boot_trace(void); +extern void stop_boot_trace(void); #else - -#define __notrace_funcgraph - -static inline void ftrace_graph_init_task(struct task_struct *t) { } -static inline void ftrace_graph_exit_task(struct task_struct *t) { } - -static inline int task_curr_ret_stack(struct task_struct *tsk) -{ - return -1; -} - -static inline void pause_graph_tracing(void) { } -static inline void unpause_graph_tracing(void) { } +static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } +static inline void start_boot_trace(void) { } +static inline void stop_boot_trace(void) { } #endif -#ifdef CONFIG_TRACING -#include - -/* flags for current->trace */ -enum { - TSK_TRACE_FL_TRACE_BIT = 0, - TSK_TRACE_FL_GRAPH_BIT = 1, -}; -enum { - TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT, - TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT, -}; - -static inline void set_tsk_trace_trace(struct task_struct *tsk) -{ - set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace); -} - -static inline void clear_tsk_trace_trace(struct task_struct *tsk) -{ - clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace); -} - -static inline int test_tsk_trace_trace(struct task_struct *tsk) -{ - return tsk->trace & TSK_TRACE_FL_TRACE; -} - -static inline void set_tsk_trace_graph(struct task_struct *tsk) -{ - set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace); -} - -static inline void clear_tsk_trace_graph(struct task_struct *tsk) -{ - clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace); -} - -static inline int test_tsk_trace_graph(struct task_struct *tsk) -{ - return tsk->trace & TSK_TRACE_FL_GRAPH; -} -#endif /* CONFIG_TRACING */ #endif /* _LINUX_FTRACE_H */ diff --git a/trunk/include/linux/ftrace_irq.h b/trunk/include/linux/ftrace_irq.h deleted file mode 100644 index 366a054d0b05..000000000000 --- a/trunk/include/linux/ftrace_irq.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef _LINUX_FTRACE_IRQ_H -#define _LINUX_FTRACE_IRQ_H - - -#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER) -extern void ftrace_nmi_enter(void); -extern void ftrace_nmi_exit(void); -#else -static inline void ftrace_nmi_enter(void) { } -static inline void ftrace_nmi_exit(void) { } -#endif - -#endif /* _LINUX_FTRACE_IRQ_H */ diff --git a/trunk/include/linux/futex.h b/trunk/include/linux/futex.h index 8f627b9ae2b1..586ab56a3ec3 100644 --- a/trunk/include/linux/futex.h +++ b/trunk/include/linux/futex.h @@ -164,8 +164,6 @@ union futex_key { } both; }; -#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } - #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); extern void exit_pi_state_list(struct task_struct *curr); diff --git a/trunk/include/linux/hardirq.h b/trunk/include/linux/hardirq.h index 89a56d79e4c6..181006cc94a0 100644 --- a/trunk/include/linux/hardirq.h +++ b/trunk/include/linux/hardirq.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include @@ -162,17 +161,7 @@ extern void irq_enter(void); */ extern void irq_exit(void); -#define nmi_enter() \ - do { \ - ftrace_nmi_enter(); \ - lockdep_off(); \ - __irq_enter(); \ - } while (0) -#define nmi_exit() \ - do { \ - __irq_exit(); \ - lockdep_on(); \ - ftrace_nmi_exit(); \ - } while (0) +#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) +#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/trunk/include/linux/interrupt.h b/trunk/include/linux/interrupt.h index 7e85a6e89e41..48e63934fabe 100644 --- a/trunk/include/linux/interrupt.h +++ b/trunk/include/linux/interrupt.h @@ -14,8 +14,6 @@ #include #include #include -#include - #include #include #include diff --git a/trunk/include/linux/irq.h b/trunk/include/linux/irq.h index fde5e6132018..ab70fd604d3a 100644 --- a/trunk/include/linux/irq.h +++ b/trunk/include/linux/irq.h @@ -130,8 +130,6 @@ struct irq_chip { const char *typename; }; -struct timer_rand_state; -struct irq_2_iommu; /** * struct irq_desc - interrupt descriptor * @irq: interrupt number for this descriptor @@ -157,13 +155,6 @@ struct irq_2_iommu; */ struct irq_desc { unsigned int irq; -#ifdef CONFIG_SPARSE_IRQ - struct timer_rand_state *timer_rand_state; - unsigned int *kstat_irqs; -# ifdef CONFIG_INTR_REMAP - struct irq_2_iommu *irq_2_iommu; -# endif -#endif irq_flow_handler_t handle_irq; struct irq_chip *chip; struct msi_desc *msi_desc; @@ -191,51 +182,12 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; -extern void early_irq_init(void); -extern void arch_early_irq_init(void); -extern void arch_init_chip_data(struct irq_desc *desc, int cpu); -extern void arch_init_copy_chip_data(struct irq_desc *old_desc, - struct irq_desc *desc, int cpu); -extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); -#ifndef CONFIG_SPARSE_IRQ extern struct irq_desc irq_desc[NR_IRQS]; static inline struct irq_desc *irq_to_desc(unsigned int irq) { - return (irq < NR_IRQS) ? irq_desc + irq : NULL; -} -static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) -{ - return irq_to_desc(irq); -} - -#else - -extern struct irq_desc *irq_to_desc(unsigned int irq); -extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); -extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu); - -# define for_each_irq_desc(irq, desc) \ - for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq)) -# define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq)) - -#define kstat_irqs_this_cpu(DESC) \ - ((DESC)->kstat_irqs[smp_processor_id()]) -#define kstat_incr_irqs_this_cpu(irqno, DESC) \ - ((DESC)->kstat_irqs[smp_processor_id()]++) - -#endif - -static inline struct irq_desc * -irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) -{ -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - return irq_to_desc(irq); -#else - return desc; -#endif + return (irq < nr_irqs) ? irq_desc + irq : NULL; } /* @@ -429,11 +381,6 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); #define get_irq_data(irq) (irq_to_desc(irq)->handler_data) #define get_irq_msi(irq) (irq_to_desc(irq)->msi_desc) -#define get_irq_desc_chip(desc) ((desc)->chip) -#define get_irq_desc_chip_data(desc) ((desc)->chip_data) -#define get_irq_desc_data(desc) ((desc)->handler_data) -#define get_irq_desc_msi(desc) ((desc)->msi_desc) - #endif /* CONFIG_GENERIC_HARDIRQS */ #endif /* !CONFIG_S390 */ diff --git a/trunk/include/linux/irqnr.h b/trunk/include/linux/irqnr.h index 95d2b74641f5..452c280c8115 100644 --- a/trunk/include/linux/irqnr.h +++ b/trunk/include/linux/irqnr.h @@ -1,38 +1,24 @@ #ifndef _LINUX_IRQNR_H #define _LINUX_IRQNR_H -/* - * Generic irq_desc iterators: - */ -#ifdef __KERNEL__ - #ifndef CONFIG_GENERIC_HARDIRQS #include # define nr_irqs NR_IRQS # define for_each_irq_desc(irq, desc) \ for (irq = 0; irq < nr_irqs; irq++) - -# define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs - 1; irq >= 0; irq--) #else - extern int nr_irqs; -#ifndef CONFIG_SPARSE_IRQ - -struct irq_desc; # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) -# define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1); \ - irq >= 0; irq--, desc--) -#endif -#endif -#define for_each_irq_nr(irq) \ - for (irq = 0; irq < nr_irqs; irq++) +# define for_each_irq_desc_reverse(irq, desc) \ + for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1); \ + irq >= 0; irq--, desc--) +#endif -#endif /* __KERNEL__ */ +#define for_each_irq_nr(irq) \ + for (irq = 0; irq < nr_irqs; irq++) #endif diff --git a/trunk/include/linux/kernel.h b/trunk/include/linux/kernel.h index 269df5a17b30..dc7e0d0a6474 100644 --- a/trunk/include/linux/kernel.h +++ b/trunk/include/linux/kernel.h @@ -141,15 +141,6 @@ extern int _cond_resched(void); (__x < 0) ? -__x : __x; \ }) -#ifdef CONFIG_PROVE_LOCKING -void might_fault(void); -#else -static inline void might_fault(void) -{ - might_sleep(); -} -#endif - extern struct atomic_notifier_head panic_notifier_list; extern long (*panic_blink)(long time); NORET_TYPE void panic(const char * fmt, ...) @@ -197,8 +188,6 @@ extern unsigned long long memparse(const char *ptr, char **retptr); extern int core_kernel_text(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); -extern int func_ptr_is_kernel_text(void *ptr); - struct pid; extern struct pid *session_of_pgrp(struct pid *pgrp); diff --git a/trunk/include/linux/kernel_stat.h b/trunk/include/linux/kernel_stat.h index 4ee4b3d2316f..4a145caeee07 100644 --- a/trunk/include/linux/kernel_stat.h +++ b/trunk/include/linux/kernel_stat.h @@ -28,9 +28,7 @@ struct cpu_usage_stat { struct kernel_stat { struct cpu_usage_stat cpustat; -#ifndef CONFIG_SPARSE_IRQ - unsigned int irqs[NR_IRQS]; -#endif + unsigned int irqs[NR_IRQS]; }; DECLARE_PER_CPU(struct kernel_stat, kstat); @@ -41,10 +39,6 @@ DECLARE_PER_CPU(struct kernel_stat, kstat); extern unsigned long long nr_context_switches(void); -#ifndef CONFIG_SPARSE_IRQ -#define kstat_irqs_this_cpu(irq) \ - (kstat_this_cpu.irqs[irq]) - struct irq_desc; static inline void kstat_incr_irqs_this_cpu(unsigned int irq, @@ -52,17 +46,11 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq, { kstat_this_cpu.irqs[irq]++; } -#endif - -#ifndef CONFIG_SPARSE_IRQ static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) { return kstat_cpu(cpu).irqs[irq]; } -#else -extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); -#endif /* * Number of interrupts per specific IRQ source, since bootup diff --git a/trunk/include/linux/kexec.h b/trunk/include/linux/kexec.h index adc34f2c6eff..17f76fc05173 100644 --- a/trunk/include/linux/kexec.h +++ b/trunk/include/linux/kexec.h @@ -100,10 +100,6 @@ struct kimage { #define KEXEC_TYPE_DEFAULT 0 #define KEXEC_TYPE_CRASH 1 unsigned int preserve_context : 1; - -#ifdef ARCH_HAS_KIMAGE_ARCH - struct kimage_arch arch; -#endif }; diff --git a/trunk/include/linux/lockdep.h b/trunk/include/linux/lockdep.h index 8956daf64abd..29aec6e10020 100644 --- a/trunk/include/linux/lockdep.h +++ b/trunk/include/linux/lockdep.h @@ -73,8 +73,6 @@ struct lock_class_key { struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; }; -#define LOCKSTAT_POINTS 4 - /* * The lock-class itself: */ @@ -121,8 +119,7 @@ struct lock_class { int name_version; #ifdef CONFIG_LOCK_STAT - unsigned long contention_point[LOCKSTAT_POINTS]; - unsigned long contending_point[LOCKSTAT_POINTS]; + unsigned long contention_point[4]; #endif }; @@ -147,7 +144,6 @@ enum bounce_type { struct lock_class_stats { unsigned long contention_point[4]; - unsigned long contending_point[4]; struct lock_time read_waittime; struct lock_time write_waittime; struct lock_time read_holdtime; @@ -169,7 +165,6 @@ struct lockdep_map { const char *name; #ifdef CONFIG_LOCK_STAT int cpu; - unsigned long ip; #endif }; @@ -361,7 +356,7 @@ struct lock_class_key { }; #ifdef CONFIG_LOCK_STAT extern void lock_contended(struct lockdep_map *lock, unsigned long ip); -extern void lock_acquired(struct lockdep_map *lock, unsigned long ip); +extern void lock_acquired(struct lockdep_map *lock); #define LOCK_CONTENDED(_lock, try, lock) \ do { \ @@ -369,13 +364,13 @@ do { \ lock_contended(&(_lock)->dep_map, _RET_IP_); \ lock(_lock); \ } \ - lock_acquired(&(_lock)->dep_map, _RET_IP_); \ + lock_acquired(&(_lock)->dep_map); \ } while (0) #else /* CONFIG_LOCK_STAT */ #define lock_contended(lockdep_map, ip) do {} while (0) -#define lock_acquired(lockdep_map, ip) do {} while (0) +#define lock_acquired(lockdep_map) do {} while (0) #define LOCK_CONTENDED(_lock, try, lock) \ lock(_lock) @@ -486,22 +481,4 @@ static inline void print_irqtrace_events(struct task_struct *curr) # define lock_map_release(l) do { } while (0) #endif -#ifdef CONFIG_PROVE_LOCKING -# define might_lock(lock) \ -do { \ - typecheck(struct lockdep_map *, &(lock)->dep_map); \ - lock_acquire(&(lock)->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_); \ - lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ -} while (0) -# define might_lock_read(lock) \ -do { \ - typecheck(struct lockdep_map *, &(lock)->dep_map); \ - lock_acquire(&(lock)->dep_map, 0, 0, 1, 2, NULL, _THIS_IP_); \ - lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ -} while (0) -#else -# define might_lock(lock) do { } while (0) -# define might_lock_read(lock) do { } while (0) -#endif - #endif /* __LINUX_LOCKDEP_H */ diff --git a/trunk/include/linux/marker.h b/trunk/include/linux/marker.h index b85e74ca782f..889196c7fbb1 100644 --- a/trunk/include/linux/marker.h +++ b/trunk/include/linux/marker.h @@ -12,7 +12,6 @@ * See the file COPYING for more details. */ -#include #include struct module; @@ -49,28 +48,10 @@ struct marker { void (*call)(const struct marker *mdata, void *call_private, ...); struct marker_probe_closure single; struct marker_probe_closure *multi; - const char *tp_name; /* Optional tracepoint name */ - void *tp_cb; /* Optional tracepoint callback */ } __attribute__((aligned(8))); #ifdef CONFIG_MARKERS -#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format) \ - static const char __mstrtab_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name "\0" format; \ - static struct marker __mark_##name \ - __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ - 0, 0, marker_probe_cb, { __mark_empty_function, NULL},\ - NULL, tp_name_str, tp_cb } - -#define DEFINE_MARKER(name, format) \ - _DEFINE_MARKER(name, NULL, NULL, format) - -#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format) \ - _DEFINE_MARKER(name, #tp_name, tp_cb, format) - /* * Note : the empty asm volatile with read constraint is used here instead of a * "used" attribute to fix a gcc 4.1.x bug. @@ -84,7 +65,14 @@ struct marker { */ #define __trace_mark(generic, name, call_private, format, args...) \ do { \ - DEFINE_MARKER(name, format); \ + static const char __mstrtab_##name[] \ + __attribute__((section("__markers_strings"))) \ + = #name "\0" format; \ + static struct marker __mark_##name \ + __attribute__((section("__markers"), aligned(8))) = \ + { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ + 0, 0, marker_probe_cb, \ + { __mark_empty_function, NULL}, NULL }; \ __mark_check_format(format, ## args); \ if (unlikely(__mark_##name.state)) { \ (*__mark_##name.call) \ @@ -92,39 +80,14 @@ struct marker { } \ } while (0) -#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ - do { \ - void __check_tp_type(void) \ - { \ - register_trace_##tp_name(tp_cb); \ - } \ - DEFINE_MARKER_TP(name, tp_name, tp_cb, format); \ - __mark_check_format(format, ## args); \ - (*__mark_##name.call)(&__mark_##name, call_private, \ - ## args); \ - } while (0) - extern void marker_update_probe_range(struct marker *begin, struct marker *end); - -#define GET_MARKER(name) (__mark_##name) - #else /* !CONFIG_MARKERS */ -#define DEFINE_MARKER(name, tp_name, tp_cb, format) #define __trace_mark(generic, name, call_private, format, args...) \ __mark_check_format(format, ## args) -#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ - do { \ - void __check_tp_type(void) \ - { \ - register_trace_##tp_name(tp_cb); \ - } \ - __mark_check_format(format, ## args); \ - } while (0) static inline void marker_update_probe_range(struct marker *begin, struct marker *end) { } -#define GET_MARKER(name) #endif /* CONFIG_MARKERS */ /** @@ -153,20 +116,6 @@ static inline void marker_update_probe_range(struct marker *begin, #define _trace_mark(name, format, args...) \ __trace_mark(1, name, NULL, format, ## args) -/** - * trace_mark_tp - Marker in a tracepoint callback - * @name: marker name, not quoted. - * @tp_name: tracepoint name, not quoted. - * @tp_cb: tracepoint callback. Should have an associated global symbol so it - * is not optimized away by the compiler (should not be static). - * @format: format string - * @args...: variable argument list - * - * Places a marker in a tracepoint callback. - */ -#define trace_mark_tp(name, tp_name, tp_cb, format, args...) \ - __trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args) - /** * MARK_NOARGS - Format string for a marker with no argument. */ @@ -187,6 +136,8 @@ extern marker_probe_func __mark_empty_function; extern void marker_probe_cb(const struct marker *mdata, void *call_private, ...); +extern void marker_probe_cb_noarg(const struct marker *mdata, + void *call_private, ...); /* * Connect a probe to a marker. @@ -211,10 +162,8 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe, /* * marker_synchronize_unregister must be called between the last marker probe - * unregistration and the first one of - * - the end of module exit function - * - the free of any resource used by the probes - * to ensure the code and data are valid for any possibly running probes. + * unregistration and the end of module exit to make sure there is no caller + * executing a probe when it is freed. */ #define marker_synchronize_unregister() synchronize_sched() diff --git a/trunk/include/linux/msi.h b/trunk/include/linux/msi.h index d2b8a1e8ca11..8f2939227207 100644 --- a/trunk/include/linux/msi.h +++ b/trunk/include/linux/msi.h @@ -10,11 +10,8 @@ struct msi_msg { }; /* Helper functions */ -struct irq_desc; extern void mask_msi_irq(unsigned int irq); extern void unmask_msi_irq(unsigned int irq); -extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); -extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); extern void read_msi_msg(unsigned int irq, struct msi_msg *msg); extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); diff --git a/trunk/include/linux/mutex.h b/trunk/include/linux/mutex.h index 7a0e5c4f8072..bc6da10ceee0 100644 --- a/trunk/include/linux/mutex.h +++ b/trunk/include/linux/mutex.h @@ -144,8 +144,6 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); /* * NOTE: mutex_trylock() follows the spin_trylock() convention, * not the down_trylock() convention! - * - * Returns 1 if the mutex has been acquired successfully, and 0 on contention. */ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); diff --git a/trunk/include/linux/netdevice.h b/trunk/include/linux/netdevice.h index e26f54952892..9d77b1d7dca8 100644 --- a/trunk/include/linux/netdevice.h +++ b/trunk/include/linux/netdevice.h @@ -319,7 +319,6 @@ enum { NAPI_STATE_SCHED, /* Poll is scheduled */ NAPI_STATE_DISABLE, /* Disable pending */ - NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ }; extern void __napi_schedule(struct napi_struct *n); @@ -1498,12 +1497,6 @@ static inline void netif_rx_complete(struct net_device *dev, { unsigned long flags; - /* - * don't let napi dequeue from the cpu poll list - * just in case its running on a different cpu - */ - if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state))) - return; local_irq_save(flags); __netif_rx_complete(dev, napi); local_irq_restore(flags); diff --git a/trunk/include/linux/pid.h b/trunk/include/linux/pid.h index bb206c56d1f0..d7e98ff8021e 100644 --- a/trunk/include/linux/pid.h +++ b/trunk/include/linux/pid.h @@ -147,9 +147,9 @@ pid_t pid_vnr(struct pid *pid); #define do_each_pid_task(pid, type, task) \ do { \ struct hlist_node *pos___; \ - if ((pid) != NULL) \ + if (pid != NULL) \ hlist_for_each_entry_rcu((task), pos___, \ - &(pid)->tasks[type], pids[type].node) { + &pid->tasks[type], pids[type].node) { /* * Both old and new leaders may be attached to diff --git a/trunk/include/linux/random.h b/trunk/include/linux/random.h index adbf3bd3c6b3..36f125c0c603 100644 --- a/trunk/include/linux/random.h +++ b/trunk/include/linux/random.h @@ -8,7 +8,6 @@ #define _LINUX_RANDOM_H #include -#include /* ioctl()'s for the random number generator */ @@ -45,56 +44,6 @@ struct rand_pool_info { extern void rand_initialize_irq(int irq); -struct timer_rand_state; -#ifndef CONFIG_SPARSE_IRQ - -extern struct timer_rand_state *irq_timer_state[]; - -static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq) -{ - if (irq >= nr_irqs) - return NULL; - - return irq_timer_state[irq]; -} - -static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state) -{ - if (irq >= nr_irqs) - return; - - irq_timer_state[irq] = state; -} - -#else - -#include -static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - - if (!desc) - return NULL; - - return desc->timer_rand_state; -} - -static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - - if (!desc) - return; - - desc->timer_rand_state = state; -} -#endif - - extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); extern void add_interrupt_randomness(int irq); diff --git a/trunk/include/linux/rcuclassic.h b/trunk/include/linux/rcuclassic.h index 301dda829e37..5f89b62e6983 100644 --- a/trunk/include/linux/rcuclassic.h +++ b/trunk/include/linux/rcuclassic.h @@ -41,7 +41,7 @@ #include #ifdef CONFIG_RCU_CPU_STALL_DETECTOR -#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rcp->jiffies_stall */ +#define RCU_SECONDS_TILL_STALL_CHECK ( 3 * HZ) /* for rcp->jiffies_stall */ #define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */ #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ diff --git a/trunk/include/linux/rcupdate.h b/trunk/include/linux/rcupdate.h index 895dc9c1088c..86f1f5e43e33 100644 --- a/trunk/include/linux/rcupdate.h +++ b/trunk/include/linux/rcupdate.h @@ -142,7 +142,6 @@ struct rcu_head { * on the write-side to insure proper synchronization. */ #define rcu_read_lock_sched() preempt_disable() -#define rcu_read_lock_sched_notrace() preempt_disable_notrace() /* * rcu_read_unlock_sched - marks the end of a RCU-classic critical section @@ -150,7 +149,6 @@ struct rcu_head { * See rcu_read_lock_sched for more information. */ #define rcu_read_unlock_sched() preempt_enable() -#define rcu_read_unlock_sched_notrace() preempt_enable_notrace() diff --git a/trunk/include/linux/ring_buffer.h b/trunk/include/linux/ring_buffer.h index d363467c8f13..e097c2e6b6dc 100644 --- a/trunk/include/linux/ring_buffer.h +++ b/trunk/include/linux/ring_buffer.h @@ -28,19 +28,17 @@ struct ring_buffer_event { * size = 8 bytes * * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock - * array[0] = tv_nsec - * array[1..2] = tv_sec + * array[0] = tv_nsec + * array[1] = tv_sec * size = 16 bytes * * @RINGBUF_TYPE_DATA: Data record * If len is zero: * array[0] holds the actual length - * array[1..(length+3)/4] holds data - * size = 4 + 4 + length (bytes) + * array[1..(length+3)/4-1] holds data * else * length = len << 2 - * array[0..(length+3)/4-1] holds data - * size = 4 + length (bytes) + * array[0..(length+3)/4] holds data */ enum ring_buffer_type { RINGBUF_TYPE_PADDING, @@ -124,12 +122,6 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); void tracing_on(void); void tracing_off(void); -void tracing_off_permanent(void); - -void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); -void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); -int ring_buffer_read_page(struct ring_buffer *buffer, - void **data_page, int cpu, int full); enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h index e5f928a079e8..55e30d114477 100644 --- a/trunk/include/linux/sched.h +++ b/trunk/include/linux/sched.h @@ -96,7 +96,6 @@ struct exec_domain; struct futex_pi_state; struct robust_list_head; struct bio; -struct bts_tracer; /* * List of flags we want to share for kernel threads, @@ -250,7 +249,7 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(void); extern void task_rq_unlock_wait(struct task_struct *p); -extern cpumask_var_t nohz_cpu_mask; +extern cpumask_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern int select_nohz_load_balancer(int cpu); #else @@ -260,6 +259,8 @@ static inline int select_nohz_load_balancer(int cpu) } #endif +extern unsigned long rt_needs_cpu(int cpu); + /* * Only dump TASK_* tasks. (0 for all tasks) */ @@ -763,51 +764,20 @@ enum cpu_idle_type { #define SD_SERIALIZE 1024 /* Only a single load balancing instance */ #define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */ -enum powersavings_balance_level { - POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */ - POWERSAVINGS_BALANCE_BASIC, /* Fill one thread/core/package - * first for long running threads - */ - POWERSAVINGS_BALANCE_WAKEUP, /* Also bias task wakeups to semi-idle - * cpu package for power savings - */ - MAX_POWERSAVINGS_BALANCE_LEVELS -}; - -extern int sched_mc_power_savings, sched_smt_power_savings; - -static inline int sd_balance_for_mc_power(void) -{ - if (sched_smt_power_savings) - return SD_POWERSAVINGS_BALANCE; +#define BALANCE_FOR_MC_POWER \ + (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0) - return 0; -} +#define BALANCE_FOR_PKG_POWER \ + ((sched_mc_power_savings || sched_smt_power_savings) ? \ + SD_POWERSAVINGS_BALANCE : 0) -static inline int sd_balance_for_package_power(void) -{ - if (sched_mc_power_savings | sched_smt_power_savings) - return SD_POWERSAVINGS_BALANCE; +#define test_sd_parent(sd, flag) ((sd->parent && \ + (sd->parent->flags & flag)) ? 1 : 0) - return 0; -} - -/* - * Optimise SD flags for power savings: - * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings. - * Keep default SD flags if sched_{smt,mc}_power_saving=0 - */ - -static inline int sd_power_saving_flags(void) -{ - if (sched_mc_power_savings | sched_smt_power_savings) - return SD_BALANCE_NEWIDLE; - - return 0; -} struct sched_group { struct sched_group *next; /* Must be a circular list */ + cpumask_t cpumask; /* * CPU power of this group, SCHED_LOAD_SCALE being max power for a @@ -820,15 +790,8 @@ struct sched_group { * (see include/linux/reciprocal_div.h) */ u32 reciprocal_cpu_power; - - unsigned long cpumask[]; }; -static inline struct cpumask *sched_group_cpus(struct sched_group *sg) -{ - return to_cpumask(sg->cpumask); -} - enum sched_domain_level { SD_LV_NONE = 0, SD_LV_SIBLING, @@ -852,6 +815,7 @@ struct sched_domain { struct sched_domain *parent; /* top domain must be null terminated */ struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ + cpumask_t span; /* span of all CPUs in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ @@ -906,35 +870,18 @@ struct sched_domain { #ifdef CONFIG_SCHED_DEBUG char *name; #endif - - /* span of all CPUs in this domain */ - unsigned long span[]; }; -static inline struct cpumask *sched_domain_span(struct sched_domain *sd) -{ - return to_cpumask(sd->span); -} - -extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, struct sched_domain_attr *dattr_new); extern int arch_reinit_sched_domains(void); -/* Test a flag in parent sched domain */ -static inline int test_sd_parent(struct sched_domain *sd, int flag) -{ - if (sd->parent && (sd->parent->flags & flag)) - return 1; - - return 0; -} - #else /* CONFIG_SMP */ struct sched_domain_attr; static inline void -partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +partition_sched_domains(int ndoms_new, cpumask_t *doms_new, struct sched_domain_attr *dattr_new) { } @@ -1016,7 +963,7 @@ struct sched_class { void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); void (*set_cpus_allowed)(struct task_struct *p, - const struct cpumask *newmask); + const cpumask_t *newmask); void (*rq_online)(struct rq *rq); void (*rq_offline)(struct rq *rq); @@ -1218,18 +1165,6 @@ struct task_struct { struct list_head ptraced; struct list_head ptrace_entry; -#ifdef CONFIG_X86_PTRACE_BTS - /* - * This is the tracer handle for the ptrace BTS extension. - * This field actually belongs to the ptracer task. - */ - struct bts_tracer *bts; - /* - * The buffer to hold the BTS data. - */ - void *bts_buffer; -#endif /* CONFIG_X86_PTRACE_BTS */ - /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; struct list_head thread_group; @@ -1421,23 +1356,6 @@ struct task_struct { unsigned long default_timer_slack_ns; struct list_head *scm_work_list; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - /* Index of current stored adress in ret_stack */ - int curr_ret_stack; - /* Stack of return addresses for return function tracing */ - struct ftrace_ret_stack *ret_stack; - /* - * Number of functions that haven't been traced - * because of depth overrun. - */ - atomic_t trace_overrun; - /* Pause for the tracing */ - atomic_t tracing_graph_pause; -#endif -#ifdef CONFIG_TRACING - /* state flags for use by tracers */ - unsigned long trace; -#endif }; /* @@ -1676,12 +1594,12 @@ extern cputime_t task_gtime(struct task_struct *p); #ifdef CONFIG_SMP extern int set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask); + const cpumask_t *new_mask); #else static inline int set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask) + const cpumask_t *new_mask) { - if (!cpumask_test_cpu(0, new_mask)) + if (!cpu_isset(0, *new_mask)) return -EINVAL; return 0; } @@ -2294,8 +2212,10 @@ __trace_special(void *__tr, void *__data, } #endif -extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); -extern long sched_getaffinity(pid_t pid, struct cpumask *mask); +extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); +extern long sched_getaffinity(pid_t pid, cpumask_t *mask); + +extern int sched_mc_power_savings, sched_smt_power_savings; extern void normalize_rt_tasks(void); @@ -2304,7 +2224,6 @@ extern void normalize_rt_tasks(void); extern struct task_group init_task_group; #ifdef CONFIG_USER_SCHED extern struct task_group root_task_group; -extern void set_tg_uid(struct user_struct *user); #endif extern struct task_group *sched_create_group(struct task_group *parent); diff --git a/trunk/include/linux/seq_file.h b/trunk/include/linux/seq_file.h index b3dfa72f13b9..dc50bcc282a8 100644 --- a/trunk/include/linux/seq_file.h +++ b/trunk/include/linux/seq_file.h @@ -34,7 +34,6 @@ struct seq_operations { #define SEQ_SKIP 1 -char *mangle_path(char *s, char *p, char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); loff_t seq_lseek(struct file *, loff_t, int); diff --git a/trunk/include/linux/smp.h b/trunk/include/linux/smp.h index 6e7ba16ff454..3f9a60043a97 100644 --- a/trunk/include/linux/smp.h +++ b/trunk/include/linux/smp.h @@ -146,8 +146,6 @@ static inline void smp_send_reschedule(int cpu) { } }) #define smp_call_function_mask(mask, func, info, wait) \ (up_smp_call_function(func, info)) -#define smp_call_function_many(mask, func, info, wait) \ - (up_smp_call_function(func, info)) static inline void init_call_single_data(void) { } diff --git a/trunk/include/linux/stacktrace.h b/trunk/include/linux/stacktrace.h index 1a8cecc4f38c..b106fd8e0d5c 100644 --- a/trunk/include/linux/stacktrace.h +++ b/trunk/include/linux/stacktrace.h @@ -15,17 +15,9 @@ extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); extern void print_stack_trace(struct stack_trace *trace, int spaces); - -#ifdef CONFIG_USER_STACKTRACE_SUPPORT -extern void save_stack_trace_user(struct stack_trace *trace); -#else -# define save_stack_trace_user(trace) do { } while (0) -#endif - #else # define save_stack_trace(trace) do { } while (0) # define save_stack_trace_tsk(tsk, trace) do { } while (0) -# define save_stack_trace_user(trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) #endif diff --git a/trunk/include/linux/topology.h b/trunk/include/linux/topology.h index e632d29f0544..117f1b7405cf 100644 --- a/trunk/include/linux/topology.h +++ b/trunk/include/linux/topology.h @@ -49,7 +49,7 @@ for_each_online_node(node) \ if (nr_cpus_node(node)) -int arch_update_cpu_topology(void); +void arch_update_cpu_topology(void); /* Conform to ACPI 2.0 SLIT distance definitions */ #define LOCAL_DISTANCE 10 @@ -125,8 +125,7 @@ int arch_update_cpu_topology(void); | SD_WAKE_AFFINE \ | SD_WAKE_BALANCE \ | SD_SHARE_PKG_RESOURCES\ - | sd_balance_for_mc_power()\ - | sd_power_saving_flags(),\ + | BALANCE_FOR_MC_POWER, \ .last_balance = jiffies, \ .balance_interval = 1, \ } @@ -151,8 +150,7 @@ int arch_update_cpu_topology(void); | SD_BALANCE_FORK \ | SD_WAKE_AFFINE \ | SD_WAKE_BALANCE \ - | sd_balance_for_package_power()\ - | sd_power_saving_flags(),\ + | BALANCE_FOR_PKG_POWER,\ .last_balance = jiffies, \ .balance_interval = 1, \ } diff --git a/trunk/include/linux/tracepoint.h b/trunk/include/linux/tracepoint.h index 757005458366..c5bb39c7a770 100644 --- a/trunk/include/linux/tracepoint.h +++ b/trunk/include/linux/tracepoint.h @@ -24,12 +24,8 @@ struct tracepoint { const char *name; /* Tracepoint name */ int state; /* State. */ void **funcs; -} __attribute__((aligned(32))); /* - * Aligned on 32 bytes because it is - * globally visible and gcc happily - * align these on the structure size. - * Keep in sync with vmlinux.lds.h. - */ +} __attribute__((aligned(8))); + #define TPPROTO(args...) args #define TPARGS(args...) args @@ -44,14 +40,14 @@ struct tracepoint { do { \ void **it_func; \ \ - rcu_read_lock_sched_notrace(); \ + rcu_read_lock_sched(); \ it_func = rcu_dereference((tp)->funcs); \ if (it_func) { \ do { \ ((void(*)(proto))(*it_func))(args); \ } while (*(++it_func)); \ } \ - rcu_read_unlock_sched_notrace(); \ + rcu_read_unlock_sched(); \ } while (0) /* @@ -59,40 +55,35 @@ struct tracepoint { * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. */ -#define DECLARE_TRACE(name, proto, args) \ - extern struct tracepoint __tracepoint_##name; \ +#define DEFINE_TRACE(name, proto, args) \ static inline void trace_##name(proto) \ { \ + static const char __tpstrtab_##name[] \ + __attribute__((section("__tracepoints_strings"))) \ + = #name ":" #proto; \ + static struct tracepoint __tracepoint_##name \ + __attribute__((section("__tracepoints"), aligned(8))) = \ + { __tpstrtab_##name, 0, NULL }; \ if (unlikely(__tracepoint_##name.state)) \ __DO_TRACE(&__tracepoint_##name, \ TPPROTO(proto), TPARGS(args)); \ } \ static inline int register_trace_##name(void (*probe)(proto)) \ { \ - return tracepoint_probe_register(#name, (void *)probe); \ + return tracepoint_probe_register(#name ":" #proto, \ + (void *)probe); \ } \ - static inline int unregister_trace_##name(void (*probe)(proto)) \ + static inline void unregister_trace_##name(void (*probe)(proto))\ { \ - return tracepoint_probe_unregister(#name, (void *)probe);\ + tracepoint_probe_unregister(#name ":" #proto, \ + (void *)probe); \ } -#define DEFINE_TRACE(name) \ - static const char __tpstrtab_##name[] \ - __attribute__((section("__tracepoints_strings"))) = #name; \ - struct tracepoint __tracepoint_##name \ - __attribute__((section("__tracepoints"), aligned(32))) = \ - { __tpstrtab_##name, 0, NULL } - -#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ - EXPORT_SYMBOL_GPL(__tracepoint_##name) -#define EXPORT_TRACEPOINT_SYMBOL(name) \ - EXPORT_SYMBOL(__tracepoint_##name) - extern void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end); #else /* !CONFIG_TRACEPOINTS */ -#define DECLARE_TRACE(name, proto, args) \ +#define DEFINE_TRACE(name, proto, args) \ static inline void _do_trace_##name(struct tracepoint *tp, proto) \ { } \ static inline void trace_##name(proto) \ @@ -101,14 +92,8 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, { \ return -ENOSYS; \ } \ - static inline int unregister_trace_##name(void (*probe)(proto)) \ - { \ - return -ENOSYS; \ - } - -#define DEFINE_TRACE(name) -#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) -#define EXPORT_TRACEPOINT_SYMBOL(name) + static inline void unregister_trace_##name(void (*probe)(proto))\ + { } static inline void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) @@ -127,10 +112,6 @@ extern int tracepoint_probe_register(const char *name, void *probe); */ extern int tracepoint_probe_unregister(const char *name, void *probe); -extern int tracepoint_probe_register_noupdate(const char *name, void *probe); -extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe); -extern void tracepoint_probe_update_all(void); - struct tracepoint_iter { struct module *module; struct tracepoint *tracepoint; diff --git a/trunk/include/linux/tty.h b/trunk/include/linux/tty.h index eaec37c9d83d..3b8121d4e36f 100644 --- a/trunk/include/linux/tty.h +++ b/trunk/include/linux/tty.h @@ -325,7 +325,7 @@ extern struct class *tty_class; * go away */ -static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) +extern inline struct tty_struct *tty_kref_get(struct tty_struct *tty) { if (tty) kref_get(&tty->kref); diff --git a/trunk/include/linux/uaccess.h b/trunk/include/linux/uaccess.h index 6b58367d145e..fec6decfb983 100644 --- a/trunk/include/linux/uaccess.h +++ b/trunk/include/linux/uaccess.h @@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to, \ set_fs(KERNEL_DS); \ pagefault_disable(); \ - ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \ + ret = __get_user(retval, (__force typeof(retval) __user *)(addr)); \ pagefault_enable(); \ set_fs(old_fs); \ ret; \ diff --git a/trunk/include/linux/usb/ch9.h b/trunk/include/linux/usb/ch9.h index 9b42baed3900..73a2f4eb1f7a 100644 --- a/trunk/include/linux/usb/ch9.h +++ b/trunk/include/linux/usb/ch9.h @@ -158,12 +158,8 @@ struct usb_ctrlrequest { * (rarely) accepted by SET_DESCRIPTOR. * * Note that all multi-byte values here are encoded in little endian - * byte order "on the wire". Within the kernel and when exposed - * through the Linux-USB APIs, they are not converted to cpu byte - * order; it is the responsibility of the client code to do this. - * The single exception is when device and configuration descriptors (but - * not other descriptors) are read from usbfs (i.e. /proc/bus/usb/BBB/DDD); - * in this case the fields are converted to host endianness by the kernel. + * byte order "on the wire". But when exposed through Linux-USB APIs, + * they've been converted to cpu byte order. */ /* diff --git a/trunk/include/trace/block.h b/trunk/include/trace/block.h deleted file mode 100644 index 25c6a1fd5b77..000000000000 --- a/trunk/include/trace/block.h +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef _TRACE_BLOCK_H -#define _TRACE_BLOCK_H - -#include -#include - -DECLARE_TRACE(block_rq_abort, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); - -DECLARE_TRACE(block_rq_insert, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); - -DECLARE_TRACE(block_rq_issue, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); - -DECLARE_TRACE(block_rq_requeue, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); - -DECLARE_TRACE(block_rq_complete, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); - -DECLARE_TRACE(block_bio_bounce, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); - -DECLARE_TRACE(block_bio_complete, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); - -DECLARE_TRACE(block_bio_backmerge, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); - -DECLARE_TRACE(block_bio_frontmerge, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); - -DECLARE_TRACE(block_bio_queue, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); - -DECLARE_TRACE(block_getrq, - TPPROTO(struct request_queue *q, struct bio *bio, int rw), - TPARGS(q, bio, rw)); - -DECLARE_TRACE(block_sleeprq, - TPPROTO(struct request_queue *q, struct bio *bio, int rw), - TPARGS(q, bio, rw)); - -DECLARE_TRACE(block_plug, - TPPROTO(struct request_queue *q), - TPARGS(q)); - -DECLARE_TRACE(block_unplug_timer, - TPPROTO(struct request_queue *q), - TPARGS(q)); - -DECLARE_TRACE(block_unplug_io, - TPPROTO(struct request_queue *q), - TPARGS(q)); - -DECLARE_TRACE(block_split, - TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), - TPARGS(q, bio, pdu)); - -DECLARE_TRACE(block_remap, - TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t from, sector_t to), - TPARGS(q, bio, dev, from, to)); - -#endif diff --git a/trunk/include/trace/boot.h b/trunk/include/trace/boot.h deleted file mode 100644 index 088ea089e31d..000000000000 --- a/trunk/include/trace/boot.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef _LINUX_TRACE_BOOT_H -#define _LINUX_TRACE_BOOT_H - -#include -#include -#include - -/* - * Structure which defines the trace of an initcall - * while it is called. - * You don't have to fill the func field since it is - * only used internally by the tracer. - */ -struct boot_trace_call { - pid_t caller; - char func[KSYM_SYMBOL_LEN]; -}; - -/* - * Structure which defines the trace of an initcall - * while it returns. - */ -struct boot_trace_ret { - char func[KSYM_SYMBOL_LEN]; - int result; - unsigned long long duration; /* nsecs */ -}; - -#ifdef CONFIG_BOOT_TRACER -/* Append the traces on the ring-buffer */ -extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn); -extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn); - -/* Tells the tracer that smp_pre_initcall is finished. - * So we can start the tracing - */ -extern void start_boot_trace(void); - -/* Resume the tracing of other necessary events - * such as sched switches - */ -extern void enable_boot_trace(void); - -/* Suspend this tracing. Actually, only sched_switches tracing have - * to be suspended. Initcalls doesn't need it.) - */ -extern void disable_boot_trace(void); -#else -static inline -void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { } - -static inline -void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { } - -static inline void start_boot_trace(void) { } -static inline void enable_boot_trace(void) { } -static inline void disable_boot_trace(void) { } -#endif /* CONFIG_BOOT_TRACER */ - -#endif /* __LINUX_TRACE_BOOT_H */ diff --git a/trunk/include/trace/sched.h b/trunk/include/trace/sched.h index 9b2854abf7e2..ad47369d01b5 100644 --- a/trunk/include/trace/sched.h +++ b/trunk/include/trace/sched.h @@ -4,52 +4,52 @@ #include #include -DECLARE_TRACE(sched_kthread_stop, +DEFINE_TRACE(sched_kthread_stop, TPPROTO(struct task_struct *t), TPARGS(t)); -DECLARE_TRACE(sched_kthread_stop_ret, +DEFINE_TRACE(sched_kthread_stop_ret, TPPROTO(int ret), TPARGS(ret)); -DECLARE_TRACE(sched_wait_task, +DEFINE_TRACE(sched_wait_task, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DECLARE_TRACE(sched_wakeup, +DEFINE_TRACE(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DECLARE_TRACE(sched_wakeup_new, +DEFINE_TRACE(sched_wakeup_new, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DECLARE_TRACE(sched_switch, +DEFINE_TRACE(sched_switch, TPPROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TPARGS(rq, prev, next)); -DECLARE_TRACE(sched_migrate_task, +DEFINE_TRACE(sched_migrate_task, TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu), TPARGS(rq, p, dest_cpu)); -DECLARE_TRACE(sched_process_free, +DEFINE_TRACE(sched_process_free, TPPROTO(struct task_struct *p), TPARGS(p)); -DECLARE_TRACE(sched_process_exit, +DEFINE_TRACE(sched_process_exit, TPPROTO(struct task_struct *p), TPARGS(p)); -DECLARE_TRACE(sched_process_wait, +DEFINE_TRACE(sched_process_wait, TPPROTO(struct pid *pid), TPARGS(pid)); -DECLARE_TRACE(sched_process_fork, +DEFINE_TRACE(sched_process_fork, TPPROTO(struct task_struct *parent, struct task_struct *child), TPARGS(parent, child)); -DECLARE_TRACE(sched_signal_send, +DEFINE_TRACE(sched_signal_send, TPPROTO(int sig, struct task_struct *p), TPARGS(sig, p)); diff --git a/trunk/init/Kconfig b/trunk/init/Kconfig index b3782c6d5ede..7656623f5006 100644 --- a/trunk/init/Kconfig +++ b/trunk/init/Kconfig @@ -808,7 +808,6 @@ config TRACEPOINTS config MARKERS bool "Activate markers" - depends on TRACEPOINTS help Place an empty function call at each marker site. Can be dynamically changed for a probe function. diff --git a/trunk/init/main.c b/trunk/init/main.c index 9d761aa53296..7e117a231af1 100644 --- a/trunk/init/main.c +++ b/trunk/init/main.c @@ -63,7 +63,6 @@ #include #include #include -#include #include #include @@ -540,15 +539,6 @@ void __init __weak thread_info_cache_init(void) { } -void __init __weak arch_early_irq_init(void) -{ -} - -void __init __weak early_irq_init(void) -{ - arch_early_irq_init(); -} - asmlinkage void __init start_kernel(void) { char * command_line; @@ -613,8 +603,6 @@ asmlinkage void __init start_kernel(void) sort_main_extable(); trap_init(); rcu_init(); - /* init some links before init_ISA_irqs() */ - early_irq_init(); init_IRQ(); pidhash_init(); init_timers(); @@ -715,35 +703,31 @@ core_param(initcall_debug, initcall_debug, bool, 0644); int do_one_initcall(initcall_t fn) { int count = preempt_count(); - ktime_t calltime, delta, rettime; + ktime_t delta; char msgbuf[64]; - struct boot_trace_call call; - struct boot_trace_ret ret; + struct boot_trace it; if (initcall_debug) { - call.caller = task_pid_nr(current); - printk("calling %pF @ %i\n", fn, call.caller); - calltime = ktime_get(); - trace_boot_call(&call, fn); - enable_boot_trace(); + it.caller = task_pid_nr(current); + printk("calling %pF @ %i\n", fn, it.caller); + it.calltime = ktime_get(); } - ret.result = fn(); + it.result = fn(); if (initcall_debug) { - disable_boot_trace(); - rettime = ktime_get(); - delta = ktime_sub(rettime, calltime); - ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10; - trace_boot_ret(&ret, fn); + it.rettime = ktime_get(); + delta = ktime_sub(it.rettime, it.calltime); + it.duration = (unsigned long long) delta.tv64 >> 10; printk("initcall %pF returned %d after %Ld usecs\n", fn, - ret.result, ret.duration); + it.result, it.duration); + trace_boot(&it, fn); } msgbuf[0] = 0; - if (ret.result && ret.result != -ENODEV && initcall_debug) - sprintf(msgbuf, "error code %d ", ret.result); + if (it.result && it.result != -ENODEV && initcall_debug) + sprintf(msgbuf, "error code %d ", it.result); if (preempt_count() != count) { strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf)); @@ -757,7 +741,7 @@ int do_one_initcall(initcall_t fn) printk("initcall %pF returned with %s\n", fn, msgbuf); } - return ret.result; + return it.result; } @@ -898,7 +882,7 @@ static int __init kernel_init(void * unused) * we're essentially up and running. Get rid of the * initmem segments and start the user-mode stuff.. */ - + stop_boot_trace(); init_post(); return 0; } diff --git a/trunk/kernel/Makefile b/trunk/kernel/Makefile index 6a212b842d86..19fad003b19d 100644 --- a/trunk/kernel/Makefile +++ b/trunk/kernel/Makefile @@ -19,6 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg CFLAGS_REMOVE_rtmutex-debug.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg +CFLAGS_REMOVE_sched.o = -pg endif obj-$(CONFIG_FREEZER) += freezer.o @@ -89,7 +90,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o -ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) +ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is # needed for x86 only. Why this used to be enabled for all architectures is beyond # me. I suspect most platforms don't need this, but until we know that for sure diff --git a/trunk/kernel/cgroup.c b/trunk/kernel/cgroup.c index 8185a0f09594..fe00b3b983a8 100644 --- a/trunk/kernel/cgroup.c +++ b/trunk/kernel/cgroup.c @@ -702,7 +702,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, * any child cgroups exist. This is theoretically supportable * but involves complex error handling, so it's being left until * later */ - if (root->number_of_cgroups > 1) + if (!list_empty(&cgrp->children)) return -EBUSY; /* Process each subsystem */ diff --git a/trunk/kernel/exit.c b/trunk/kernel/exit.c index 61ba5b4b10cf..2d8be7ebb0f7 100644 --- a/trunk/kernel/exit.c +++ b/trunk/kernel/exit.c @@ -53,10 +53,6 @@ #include #include -DEFINE_TRACE(sched_process_free); -DEFINE_TRACE(sched_process_exit); -DEFINE_TRACE(sched_process_wait); - static void exit_mm(struct task_struct * tsk); static inline int task_detached(struct task_struct *p) @@ -1127,6 +1123,7 @@ NORET_TYPE void do_exit(long code) preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; + schedule(); BUG(); /* Avoid "noreturn function does return". */ @@ -1324,10 +1321,10 @@ static int wait_task_zombie(struct task_struct *p, int options, * group, which consolidates times for all threads in the * group including the group leader. */ - thread_group_cputime(p, &cputime); spin_lock_irq(&p->parent->sighand->siglock); psig = p->parent->signal; sig = p->signal; + thread_group_cputime(p, &cputime); psig->cutime = cputime_add(psig->cutime, cputime_add(cputime.utime, diff --git a/trunk/kernel/extable.c b/trunk/kernel/extable.c index e136ed8d82ba..a26cb2e17023 100644 --- a/trunk/kernel/extable.c +++ b/trunk/kernel/extable.c @@ -17,7 +17,6 @@ */ #include #include -#include #include #include @@ -41,7 +40,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) return e; } -__notrace_funcgraph int core_kernel_text(unsigned long addr) +int core_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) @@ -54,7 +53,7 @@ __notrace_funcgraph int core_kernel_text(unsigned long addr) return 0; } -__notrace_funcgraph int __kernel_text_address(unsigned long addr) +int __kernel_text_address(unsigned long addr) { if (core_kernel_text(addr)) return 1; @@ -67,19 +66,3 @@ int kernel_text_address(unsigned long addr) return 1; return module_text_address(addr) != NULL; } - -/* - * On some architectures (PPC64, IA64) function pointers - * are actually only tokens to some data that then holds the - * real function address. As a result, to find if a function - * pointer is part of the kernel text, we need to do some - * special dereferencing first. - */ -int func_ptr_is_kernel_text(void *ptr) -{ - unsigned long addr; - addr = (unsigned long) dereference_function_descriptor(ptr); - if (core_kernel_text(addr)) - return 1; - return module_text_address(addr) != NULL; -} diff --git a/trunk/kernel/fork.c b/trunk/kernel/fork.c index 7b93da72d4a2..495da2e9a8b4 100644 --- a/trunk/kernel/fork.c +++ b/trunk/kernel/fork.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include #include @@ -81,8 +80,6 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ -DEFINE_TRACE(sched_process_fork); - int nr_processes(void) { int cpu; @@ -140,7 +137,6 @@ void free_task(struct task_struct *tsk) prop_local_destroy_single(&tsk->dirties); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); - ftrace_graph_exit_task(tsk); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -1140,8 +1136,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, } } - ftrace_graph_init_task(p); - p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) @@ -1150,7 +1144,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (current->nsproxy != p->nsproxy) { retval = ns_cgroup_clone(p, pid); if (retval) - goto bad_fork_free_graph; + goto bad_fork_free_pid; } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; @@ -1243,7 +1237,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; - goto bad_fork_free_graph; + goto bad_fork_free_pid; } if (clone_flags & CLONE_THREAD) { @@ -1280,8 +1274,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, cgroup_post_fork(p); return p; -bad_fork_free_graph: - ftrace_graph_exit_task(p); bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); diff --git a/trunk/kernel/futex.c b/trunk/kernel/futex.c index e10c5c8786a6..8af10027514b 100644 --- a/trunk/kernel/futex.c +++ b/trunk/kernel/futex.c @@ -122,6 +122,24 @@ struct futex_hash_bucket { static struct futex_hash_bucket futex_queues[1<mmap_sem, when futex is shared + */ +static inline void futex_lock_mm(struct rw_semaphore *fshared) +{ + if (fshared) + down_read(fshared); +} + +/* + * Release mm->mmap_sem, when the futex is shared + */ +static inline void futex_unlock_mm(struct rw_semaphore *fshared) +{ + if (fshared) + up_read(fshared); +} + /* * We hash on the keys returned from get_futex_key (see below). */ @@ -143,45 +161,6 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2) && key1->both.offset == key2->both.offset); } -/* - * Take a reference to the resource addressed by a key. - * Can be called while holding spinlocks. - * - */ -static void get_futex_key_refs(union futex_key *key) -{ - if (!key->both.ptr) - return; - - switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { - case FUT_OFF_INODE: - atomic_inc(&key->shared.inode->i_count); - break; - case FUT_OFF_MMSHARED: - atomic_inc(&key->private.mm->mm_count); - break; - } -} - -/* - * Drop a reference to the resource addressed by a key. - * The hash bucket spinlock must not be held. - */ -static void drop_futex_key_refs(union futex_key *key) -{ - if (!key->both.ptr) - return; - - switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { - case FUT_OFF_INODE: - iput(key->shared.inode); - break; - case FUT_OFF_MMSHARED: - mmdrop(key->private.mm); - break; - } -} - /** * get_futex_key - Get parameters which are the keys for a futex. * @uaddr: virtual address of the futex @@ -200,10 +179,12 @@ static void drop_futex_key_refs(union futex_key *key) * For other futexes, it points to ¤t->mm->mmap_sem and * caller must have taken the reader lock. but NOT any spinlocks. */ -static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) +static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, + union futex_key *key) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; struct page *page; int err; @@ -227,50 +208,100 @@ static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) return -EFAULT; key->private.mm = mm; key->private.address = address; - get_futex_key_refs(key); return 0; } + /* + * The futex is hashed differently depending on whether + * it's in a shared or private mapping. So check vma first. + */ + vma = find_extend_vma(mm, address); + if (unlikely(!vma)) + return -EFAULT; -again: - err = get_user_pages_fast(address, 1, 0, &page); - if (err < 0) - return err; - - lock_page(page); - if (!page->mapping) { - unlock_page(page); - put_page(page); - goto again; - } + /* + * Permissions. + */ + if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ)) + return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES; /* * Private mappings are handled in a simple way. * * NOTE: When userspace waits on a MAP_SHARED mapping, even if * it's a read-only handle, it's expected that futexes attach to - * the object not the particular process. + * the object not the particular process. Therefore we use + * VM_MAYSHARE here, not VM_SHARED which is restricted to shared + * mappings of _writable_ handles. */ - if (PageAnon(page)) { - key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ + if (likely(!(vma->vm_flags & VM_MAYSHARE))) { + key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */ key->private.mm = mm; key->private.address = address; - } else { - key->both.offset |= FUT_OFF_INODE; /* inode-based key */ - key->shared.inode = page->mapping->host; - key->shared.pgoff = page->index; + return 0; } - get_futex_key_refs(key); + /* + * Linear file mappings are also simple. + */ + key->shared.inode = vma->vm_file->f_path.dentry->d_inode; + key->both.offset |= FUT_OFF_INODE; /* inode-based key. */ + if (likely(!(vma->vm_flags & VM_NONLINEAR))) { + key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT) + + vma->vm_pgoff); + return 0; + } - unlock_page(page); - put_page(page); - return 0; + /* + * We could walk the page table to read the non-linear + * pte, and get the page index without fetching the page + * from swap. But that's a lot of code to duplicate here + * for a rare case, so we simply fetch the page. + */ + err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL); + if (err >= 0) { + key->shared.pgoff = + page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + put_page(page); + return 0; + } + return err; +} + +/* + * Take a reference to the resource addressed by a key. + * Can be called while holding spinlocks. + * + */ +static void get_futex_key_refs(union futex_key *key) +{ + if (key->both.ptr == NULL) + return; + switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { + case FUT_OFF_INODE: + atomic_inc(&key->shared.inode->i_count); + break; + case FUT_OFF_MMSHARED: + atomic_inc(&key->private.mm->mm_count); + break; + } } -static inline -void put_futex_key(int fshared, union futex_key *key) +/* + * Drop a reference to the resource addressed by a key. + * The hash bucket spinlock must not be held. + */ +static void drop_futex_key_refs(union futex_key *key) { - drop_futex_key_refs(key); + if (!key->both.ptr) + return; + switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { + case FUT_OFF_INODE: + iput(key->shared.inode); + break; + case FUT_OFF_MMSHARED: + mmdrop(key->private.mm); + break; + } } static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) @@ -297,8 +328,10 @@ static int get_futex_value_locked(u32 *dest, u32 __user *from) /* * Fault handling. + * if fshared is non NULL, current->mm->mmap_sem is already held */ -static int futex_handle_fault(unsigned long address, int attempt) +static int futex_handle_fault(unsigned long address, + struct rw_semaphore *fshared, int attempt) { struct vm_area_struct * vma; struct mm_struct *mm = current->mm; @@ -307,7 +340,8 @@ static int futex_handle_fault(unsigned long address, int attempt) if (attempt > 2) return ret; - down_read(&mm->mmap_sem); + if (!fshared) + down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (vma && address >= vma->vm_start && (vma->vm_flags & VM_WRITE)) { @@ -327,7 +361,8 @@ static int futex_handle_fault(unsigned long address, int attempt) current->min_flt++; } } - up_read(&mm->mmap_sem); + if (!fshared) + up_read(&mm->mmap_sem); return ret; } @@ -350,7 +385,6 @@ static int refill_pi_state_cache(void) /* pi_mutex gets initialized later */ pi_state->owner = NULL; atomic_set(&pi_state->refcount, 1); - pi_state->key = FUTEX_KEY_INIT; current->pi_state_cache = pi_state; @@ -428,7 +462,7 @@ void exit_pi_state_list(struct task_struct *curr) struct list_head *next, *head = &curr->pi_state_list; struct futex_pi_state *pi_state; struct futex_hash_bucket *hb; - union futex_key key = FUTEX_KEY_INIT; + union futex_key key; if (!futex_cmpxchg_enabled) return; @@ -685,17 +719,20 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) * Wake up all waiters hashed on the physical page that is mapped * to this virtual address: */ -static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) +static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, + int nr_wake, u32 bitset) { struct futex_hash_bucket *hb; struct futex_q *this, *next; struct plist_head *head; - union futex_key key = FUTEX_KEY_INIT; + union futex_key key; int ret; if (!bitset) return -EINVAL; + futex_lock_mm(fshared); + ret = get_futex_key(uaddr, fshared, &key); if (unlikely(ret != 0)) goto out; @@ -723,7 +760,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) spin_unlock(&hb->lock); out: - put_futex_key(fshared, &key); + futex_unlock_mm(fshared); return ret; } @@ -732,16 +769,19 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) * to this virtual address: */ static int -futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, +futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared, + u32 __user *uaddr2, int nr_wake, int nr_wake2, int op) { - union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; + union futex_key key1, key2; struct futex_hash_bucket *hb1, *hb2; struct plist_head *head; struct futex_q *this, *next; int ret, op_ret, attempt = 0; retryfull: + futex_lock_mm(fshared); + ret = get_futex_key(uaddr1, fshared, &key1); if (unlikely(ret != 0)) goto out; @@ -786,12 +826,18 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, */ if (attempt++) { ret = futex_handle_fault((unsigned long)uaddr2, - attempt); + fshared, attempt); if (ret) goto out; goto retry; } + /* + * If we would have faulted, release mmap_sem, + * fault it in and start all over again. + */ + futex_unlock_mm(fshared); + ret = get_user(dummy, uaddr2); if (ret) return ret; @@ -827,8 +873,7 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, if (hb1 != hb2) spin_unlock(&hb2->lock); out: - put_futex_key(fshared, &key2); - put_futex_key(fshared, &key1); + futex_unlock_mm(fshared); return ret; } @@ -837,16 +882,19 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, * Requeue all waiters hashed on one physical page to another * physical page. */ -static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, +static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, + u32 __user *uaddr2, int nr_wake, int nr_requeue, u32 *cmpval) { - union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; + union futex_key key1, key2; struct futex_hash_bucket *hb1, *hb2; struct plist_head *head1; struct futex_q *this, *next; int ret, drop_count = 0; retry: + futex_lock_mm(fshared); + ret = get_futex_key(uaddr1, fshared, &key1); if (unlikely(ret != 0)) goto out; @@ -869,6 +917,12 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, if (hb1 != hb2) spin_unlock(&hb2->lock); + /* + * If we would have faulted, release mmap_sem, fault + * it in and start all over again. + */ + futex_unlock_mm(fshared); + ret = get_user(curval, uaddr1); if (!ret) @@ -920,8 +974,7 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, drop_futex_key_refs(&key1); out: - put_futex_key(fshared, &key2); - put_futex_key(fshared, &key1); + futex_unlock_mm(fshared); return ret; } @@ -1043,7 +1096,8 @@ static void unqueue_me_pi(struct futex_q *q) * private futexes. */ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, - struct task_struct *newowner, int fshared) + struct task_struct *newowner, + struct rw_semaphore *fshared) { u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; struct futex_pi_state *pi_state = q->pi_state; @@ -1122,7 +1176,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, handle_fault: spin_unlock(q->lock_ptr); - ret = futex_handle_fault((unsigned long)uaddr, attempt++); + ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++); spin_lock(q->lock_ptr); @@ -1146,7 +1200,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, static long futex_wait_restart(struct restart_block *restart); -static int futex_wait(u32 __user *uaddr, int fshared, +static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, u32 val, ktime_t *abs_time, u32 bitset) { struct task_struct *curr = current; @@ -1164,7 +1218,8 @@ static int futex_wait(u32 __user *uaddr, int fshared, q.pi_state = NULL; q.bitset = bitset; retry: - q.key = FUTEX_KEY_INIT; + futex_lock_mm(fshared); + ret = get_futex_key(uaddr, fshared, &q.key); if (unlikely(ret != 0)) goto out_release_sem; @@ -1196,6 +1251,12 @@ static int futex_wait(u32 __user *uaddr, int fshared, if (unlikely(ret)) { queue_unlock(&q, hb); + /* + * If we would have faulted, release mmap_sem, fault it in and + * start all over again. + */ + futex_unlock_mm(fshared); + ret = get_user(uval, uaddr); if (!ret) @@ -1209,6 +1270,12 @@ static int futex_wait(u32 __user *uaddr, int fshared, /* Only actually queue if *uaddr contained val. */ queue_me(&q, hb); + /* + * Now the futex is queued and we have checked the data, we + * don't want to hold mmap_sem while we sleep. + */ + futex_unlock_mm(fshared); + /* * There might have been scheduling since the queue_me(), as we * cannot hold a spinlock across the get_user() in case it @@ -1296,7 +1363,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, queue_unlock(&q, hb); out_release_sem: - put_futex_key(fshared, &q.key); + futex_unlock_mm(fshared); return ret; } @@ -1304,13 +1371,13 @@ static int futex_wait(u32 __user *uaddr, int fshared, static long futex_wait_restart(struct restart_block *restart) { u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; - int fshared = 0; + struct rw_semaphore *fshared = NULL; ktime_t t; t.tv64 = restart->futex.time; restart->fn = do_no_restart_syscall; if (restart->futex.flags & FLAGS_SHARED) - fshared = 1; + fshared = ¤t->mm->mmap_sem; return (long)futex_wait(uaddr, fshared, restart->futex.val, &t, restart->futex.bitset); } @@ -1322,7 +1389,7 @@ static long futex_wait_restart(struct restart_block *restart) * if there are waiters then it will block, it does PI, etc. (Due to * races the kernel might see a 0 value of the futex too.) */ -static int futex_lock_pi(u32 __user *uaddr, int fshared, +static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, int detect, ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to = NULL; @@ -1345,7 +1412,8 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, q.pi_state = NULL; retry: - q.key = FUTEX_KEY_INIT; + futex_lock_mm(fshared); + ret = get_futex_key(uaddr, fshared, &q.key); if (unlikely(ret != 0)) goto out_release_sem; @@ -1434,6 +1502,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, * exit to complete. */ queue_unlock(&q, hb); + futex_unlock_mm(fshared); cond_resched(); goto retry; @@ -1465,6 +1534,12 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, */ queue_me(&q, hb); + /* + * Now the futex is queued and we have checked the data, we + * don't want to hold mmap_sem while we sleep. + */ + futex_unlock_mm(fshared); + WARN_ON(!q.pi_state); /* * Block on the PI mutex: @@ -1477,6 +1552,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, ret = ret ? 0 : -EWOULDBLOCK; } + futex_lock_mm(fshared); spin_lock(q.lock_ptr); if (!ret) { @@ -1542,6 +1618,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, /* Unqueue and drop the lock */ unqueue_me_pi(&q); + futex_unlock_mm(fshared); if (to) destroy_hrtimer_on_stack(&to->timer); @@ -1551,7 +1628,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, queue_unlock(&q, hb); out_release_sem: - put_futex_key(fshared, &q.key); + futex_unlock_mm(fshared); if (to) destroy_hrtimer_on_stack(&to->timer); return ret; @@ -1568,12 +1645,15 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, queue_unlock(&q, hb); if (attempt++) { - ret = futex_handle_fault((unsigned long)uaddr, attempt); + ret = futex_handle_fault((unsigned long)uaddr, fshared, + attempt); if (ret) goto out_release_sem; goto retry_unlocked; } + futex_unlock_mm(fshared); + ret = get_user(uval, uaddr); if (!ret && (uval != -EFAULT)) goto retry; @@ -1588,13 +1668,13 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, * This is the in-kernel slowpath: we look up the PI state (if any), * and do the rt-mutex unlock. */ -static int futex_unlock_pi(u32 __user *uaddr, int fshared) +static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared) { struct futex_hash_bucket *hb; struct futex_q *this, *next; u32 uval; struct plist_head *head; - union futex_key key = FUTEX_KEY_INIT; + union futex_key key; int ret, attempt = 0; retry: @@ -1605,6 +1685,10 @@ static int futex_unlock_pi(u32 __user *uaddr, int fshared) */ if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) return -EPERM; + /* + * First take all the futex related locks: + */ + futex_lock_mm(fshared); ret = get_futex_key(uaddr, fshared, &key); if (unlikely(ret != 0)) @@ -1663,7 +1747,7 @@ static int futex_unlock_pi(u32 __user *uaddr, int fshared) out_unlock: spin_unlock(&hb->lock); out: - put_futex_key(fshared, &key); + futex_unlock_mm(fshared); return ret; @@ -1679,13 +1763,16 @@ static int futex_unlock_pi(u32 __user *uaddr, int fshared) spin_unlock(&hb->lock); if (attempt++) { - ret = futex_handle_fault((unsigned long)uaddr, attempt); + ret = futex_handle_fault((unsigned long)uaddr, fshared, + attempt); if (ret) goto out; uval = 0; goto retry_unlocked; } + futex_unlock_mm(fshared); + ret = get_user(uval, uaddr); if (!ret && (uval != -EFAULT)) goto retry; @@ -1811,7 +1898,8 @@ int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) * PI futexes happens in exit_pi_state(): */ if (!pi && (uval & FUTEX_WAITERS)) - futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + futex_wake(uaddr, &curr->mm->mmap_sem, 1, + FUTEX_BITSET_MATCH_ANY); } return 0; } @@ -1907,10 +1995,10 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, { int ret = -ENOSYS; int cmd = op & FUTEX_CMD_MASK; - int fshared = 0; + struct rw_semaphore *fshared = NULL; if (!(op & FUTEX_PRIVATE_FLAG)) - fshared = 1; + fshared = ¤t->mm->mmap_sem; switch (cmd) { case FUTEX_WAIT: diff --git a/trunk/kernel/irq/Makefile b/trunk/kernel/irq/Makefile index 4dd5b1edac98..681c52dbfe22 100644 --- a/trunk/kernel/irq/Makefile +++ b/trunk/kernel/irq/Makefile @@ -3,4 +3,3 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o -obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o diff --git a/trunk/kernel/irq/autoprobe.c b/trunk/kernel/irq/autoprobe.c index 650ce4102a63..cc0f7321b8ce 100644 --- a/trunk/kernel/irq/autoprobe.c +++ b/trunk/kernel/irq/autoprobe.c @@ -40,9 +40,6 @@ unsigned long probe_irq_on(void) * flush such a longstanding irq before considering it as spurious. */ for_each_irq_desc_reverse(i, desc) { - if (!desc) - continue; - spin_lock_irq(&desc->lock); if (!desc->action && !(desc->status & IRQ_NOPROBE)) { /* @@ -71,9 +68,6 @@ unsigned long probe_irq_on(void) * happened in the previous stage, it may have masked itself) */ for_each_irq_desc_reverse(i, desc) { - if (!desc) - continue; - spin_lock_irq(&desc->lock); if (!desc->action && !(desc->status & IRQ_NOPROBE)) { desc->status |= IRQ_AUTODETECT | IRQ_WAITING; @@ -92,9 +86,6 @@ unsigned long probe_irq_on(void) * Now filter out any obviously spurious interrupts */ for_each_irq_desc(i, desc) { - if (!desc) - continue; - spin_lock_irq(&desc->lock); status = desc->status; @@ -133,9 +124,6 @@ unsigned int probe_irq_mask(unsigned long val) int i; for_each_irq_desc(i, desc) { - if (!desc) - continue; - spin_lock_irq(&desc->lock); status = desc->status; @@ -178,9 +166,6 @@ int probe_irq_off(unsigned long val) unsigned int status; for_each_irq_desc(i, desc) { - if (!desc) - continue; - spin_lock_irq(&desc->lock); status = desc->status; diff --git a/trunk/kernel/irq/chip.c b/trunk/kernel/irq/chip.c index b343deedae91..58d8e31daa49 100644 --- a/trunk/kernel/irq/chip.c +++ b/trunk/kernel/irq/chip.c @@ -24,10 +24,9 @@ */ void dynamic_irq_init(unsigned int irq) { - struct irq_desc *desc; + struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; - desc = irq_to_desc(irq); if (!desc) { WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq); return; @@ -353,7 +352,6 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) spin_lock(&desc->lock); mask_ack_irq(desc, irq); - desc = irq_remap_to_desc(irq, desc); if (unlikely(desc->status & IRQ_INPROGRESS)) goto out_unlock; @@ -431,7 +429,6 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) desc->status &= ~IRQ_INPROGRESS; out: desc->chip->eoi(irq); - desc = irq_remap_to_desc(irq, desc); spin_unlock(&desc->lock); } @@ -468,14 +465,12 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) !desc->action)) { desc->status |= (IRQ_PENDING | IRQ_MASKED); mask_ack_irq(desc, irq); - desc = irq_remap_to_desc(irq, desc); goto out_unlock; } kstat_incr_irqs_this_cpu(irq, desc); /* Start handling the irq */ desc->chip->ack(irq); - desc = irq_remap_to_desc(irq, desc); /* Mark the IRQ currently in progress.*/ desc->status |= IRQ_INPROGRESS; @@ -536,10 +531,8 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) if (!noirqdebug) note_interrupt(irq, desc, action_ret); - if (desc->chip->eoi) { + if (desc->chip->eoi) desc->chip->eoi(irq); - desc = irq_remap_to_desc(irq, desc); - } } void @@ -574,10 +567,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, /* Uninstall? */ if (handle == handle_bad_irq) { - if (desc->chip != &no_irq_chip) { + if (desc->chip != &no_irq_chip) mask_ack_irq(desc, irq); - desc = irq_remap_to_desc(irq, desc); - } desc->status |= IRQ_DISABLED; desc->depth = 1; } diff --git a/trunk/kernel/irq/handle.c b/trunk/kernel/irq/handle.c index f1a23069c20a..c815b42d0f5b 100644 --- a/trunk/kernel/irq/handle.c +++ b/trunk/kernel/irq/handle.c @@ -15,16 +15,9 @@ #include #include #include -#include -#include #include "internals.h" -/* - * lockdep: we want to handle all irq_desc locks as a single lock-class: - */ -struct lock_class_key irq_desc_lock_class; - /** * handle_bad_irq - handle spurious and unhandled irqs * @irq: the interrupt number @@ -56,155 +49,6 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc) int nr_irqs = NR_IRQS; EXPORT_SYMBOL_GPL(nr_irqs); -void __init __attribute__((weak)) arch_early_irq_init(void) -{ -} - -#ifdef CONFIG_SPARSE_IRQ -static struct irq_desc irq_desc_init = { - .irq = -1, - .status = IRQ_DISABLED, - .chip = &no_irq_chip, - .handle_irq = handle_bad_irq, - .depth = 1, - .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), -#ifdef CONFIG_SMP - .affinity = CPU_MASK_ALL -#endif -}; - -void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) -{ - unsigned long bytes; - char *ptr; - int node; - - /* Compute how many bytes we need per irq and allocate them */ - bytes = nr * sizeof(unsigned int); - - node = cpu_to_node(cpu); - ptr = kzalloc_node(bytes, GFP_ATOMIC, node); - printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n", cpu, node); - - if (ptr) - desc->kstat_irqs = (unsigned int *)ptr; -} - -void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu) -{ -} - -static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) -{ - memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); - desc->irq = irq; -#ifdef CONFIG_SMP - desc->cpu = cpu; -#endif - lockdep_set_class(&desc->lock, &irq_desc_lock_class); - init_kstat_irqs(desc, cpu, nr_cpu_ids); - if (!desc->kstat_irqs) { - printk(KERN_ERR "can not alloc kstat_irqs\n"); - BUG_ON(1); - } - arch_init_chip_data(desc, cpu); -} - -/* - * Protect the sparse_irqs: - */ -DEFINE_SPINLOCK(sparse_irq_lock); - -struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly; - -static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = { - [0 ... NR_IRQS_LEGACY-1] = { - .irq = -1, - .status = IRQ_DISABLED, - .chip = &no_irq_chip, - .handle_irq = handle_bad_irq, - .depth = 1, - .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), -#ifdef CONFIG_SMP - .affinity = CPU_MASK_ALL -#endif - } -}; - -/* FIXME: use bootmem alloc ...*/ -static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS]; - -void __init early_irq_init(void) -{ - struct irq_desc *desc; - int legacy_count; - int i; - - desc = irq_desc_legacy; - legacy_count = ARRAY_SIZE(irq_desc_legacy); - - for (i = 0; i < legacy_count; i++) { - desc[i].irq = i; - desc[i].kstat_irqs = kstat_irqs_legacy[i]; - - irq_desc_ptrs[i] = desc + i; - } - - for (i = legacy_count; i < NR_IRQS; i++) - irq_desc_ptrs[i] = NULL; - - arch_early_irq_init(); -} - -struct irq_desc *irq_to_desc(unsigned int irq) -{ - return (irq < NR_IRQS) ? irq_desc_ptrs[irq] : NULL; -} - -struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) -{ - struct irq_desc *desc; - unsigned long flags; - int node; - - if (irq >= NR_IRQS) { - printk(KERN_WARNING "irq >= NR_IRQS in irq_to_desc_alloc: %d %d\n", - irq, NR_IRQS); - WARN_ON(1); - return NULL; - } - - desc = irq_desc_ptrs[irq]; - if (desc) - return desc; - - spin_lock_irqsave(&sparse_irq_lock, flags); - - /* We have to check it to avoid races with another CPU */ - desc = irq_desc_ptrs[irq]; - if (desc) - goto out_unlock; - - node = cpu_to_node(cpu); - desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); - printk(KERN_DEBUG " alloc irq_desc for %d on cpu %d node %d\n", - irq, cpu, node); - if (!desc) { - printk(KERN_ERR "can not alloc irq_desc\n"); - BUG_ON(1); - } - init_one_irq_desc(irq, desc, cpu); - - irq_desc_ptrs[irq] = desc; - -out_unlock: - spin_unlock_irqrestore(&sparse_irq_lock, flags); - - return desc; -} - -#else - struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { [0 ... NR_IRQS-1] = { .status = IRQ_DISABLED, @@ -218,8 +62,6 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { } }; -#endif - /* * What should we do if we get a hw irq event on an illegal vector? * Each architecture has to answer this themself. @@ -337,11 +179,8 @@ unsigned int __do_IRQ(unsigned int irq) /* * No locking required for CPU-local interrupts: */ - if (desc->chip->ack) { + if (desc->chip->ack) desc->chip->ack(irq); - /* get new one */ - desc = irq_remap_to_desc(irq, desc); - } if (likely(!(desc->status & IRQ_DISABLED))) { action_ret = handle_IRQ_event(irq, desc->action); if (!noirqdebug) @@ -352,10 +191,8 @@ unsigned int __do_IRQ(unsigned int irq) } spin_lock(&desc->lock); - if (desc->chip->ack) { + if (desc->chip->ack) desc->chip->ack(irq); - desc = irq_remap_to_desc(irq, desc); - } /* * REPLAY is when Linux resends an IRQ that was dropped earlier * WAITING is used by probe to mark irqs that are being tested @@ -424,28 +261,17 @@ unsigned int __do_IRQ(unsigned int irq) #ifdef CONFIG_TRACE_IRQFLAGS +/* + * lockdep: we want to handle all irq_desc locks as a single lock-class: + */ +static struct lock_class_key irq_desc_lock_class; + void early_init_irq_lock_class(void) { -#ifndef CONFIG_SPARSE_IRQ struct irq_desc *desc; int i; - for_each_irq_desc(i, desc) { - if (!desc) - continue; - + for_each_irq_desc(i, desc) lockdep_set_class(&desc->lock, &irq_desc_lock_class); - } -#endif -} -#endif - -#ifdef CONFIG_SPARSE_IRQ -unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) -{ - struct irq_desc *desc = irq_to_desc(irq); - return desc->kstat_irqs[cpu]; } #endif -EXPORT_SYMBOL(kstat_irqs_cpu); - diff --git a/trunk/kernel/irq/internals.h b/trunk/kernel/irq/internals.h index e6d0a43cc125..64c1c7253dae 100644 --- a/trunk/kernel/irq/internals.h +++ b/trunk/kernel/irq/internals.h @@ -13,11 +13,6 @@ extern void compat_irq_chip_set_default_handler(struct irq_desc *desc); extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, unsigned long flags); -extern struct lock_class_key irq_desc_lock_class; -extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr); -extern spinlock_t sparse_irq_lock; -extern struct irq_desc *irq_desc_ptrs[NR_IRQS]; - #ifdef CONFIG_PROC_FS extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); extern void register_handler_proc(unsigned int irq, struct irqaction *action); diff --git a/trunk/kernel/irq/numa_migrate.c b/trunk/kernel/irq/numa_migrate.c deleted file mode 100644 index 0178e2296990..000000000000 --- a/trunk/kernel/irq/numa_migrate.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * linux/kernel/irq/handle.c - * - * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar - * Copyright (C) 2005-2006, Thomas Gleixner, Russell King - * - * This file contains the core interrupt handling code. - * - * Detailed information is available in Documentation/DocBook/genericirq - * - */ - -#include -#include -#include -#include -#include - -#include "internals.h" - -static void init_copy_kstat_irqs(struct irq_desc *old_desc, - struct irq_desc *desc, - int cpu, int nr) -{ - unsigned long bytes; - - init_kstat_irqs(desc, cpu, nr); - - if (desc->kstat_irqs != old_desc->kstat_irqs) { - /* Compute how many bytes we need per irq and allocate them */ - bytes = nr * sizeof(unsigned int); - - memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes); - } -} - -static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) -{ - if (old_desc->kstat_irqs == desc->kstat_irqs) - return; - - kfree(old_desc->kstat_irqs); - old_desc->kstat_irqs = NULL; -} - -static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, - struct irq_desc *desc, int cpu) -{ - memcpy(desc, old_desc, sizeof(struct irq_desc)); - desc->cpu = cpu; - lockdep_set_class(&desc->lock, &irq_desc_lock_class); - init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); - arch_init_copy_chip_data(old_desc, desc, cpu); -} - -static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) -{ - free_kstat_irqs(old_desc, desc); - arch_free_chip_data(old_desc, desc); -} - -static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, - int cpu) -{ - struct irq_desc *desc; - unsigned int irq; - unsigned long flags; - int node; - - irq = old_desc->irq; - - spin_lock_irqsave(&sparse_irq_lock, flags); - - /* We have to check it to avoid races with another CPU */ - desc = irq_desc_ptrs[irq]; - - if (desc && old_desc != desc) - goto out_unlock; - - node = cpu_to_node(cpu); - desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); - printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n", - irq, cpu, node); - if (!desc) { - printk(KERN_ERR "can not get new irq_desc for moving\n"); - /* still use old one */ - desc = old_desc; - goto out_unlock; - } - init_copy_one_irq_desc(irq, old_desc, desc, cpu); - - irq_desc_ptrs[irq] = desc; - - /* free the old one */ - free_one_irq_desc(old_desc, desc); - kfree(old_desc); - -out_unlock: - spin_unlock_irqrestore(&sparse_irq_lock, flags); - - return desc; -} - -struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu) -{ - int old_cpu; - int node, old_node; - - /* those all static, do move them */ - if (desc->irq < NR_IRQS_LEGACY) - return desc; - - old_cpu = desc->cpu; - printk(KERN_DEBUG - "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu); - if (old_cpu != cpu) { - node = cpu_to_node(cpu); - old_node = cpu_to_node(old_cpu); - if (old_node != node) - desc = __real_move_irq_desc(desc, cpu); - else - desc->cpu = cpu; - } - - return desc; -} - diff --git a/trunk/kernel/irq/proc.c b/trunk/kernel/irq/proc.c index d2c0e5ee53c5..8e91c9762520 100644 --- a/trunk/kernel/irq/proc.c +++ b/trunk/kernel/irq/proc.c @@ -252,11 +252,7 @@ void init_irq_proc(void) /* * Create entries for all existing IRQs. */ - for_each_irq_desc(irq, desc) { - if (!desc) - continue; - + for_each_irq_desc(irq, desc) register_irq_proc(irq, desc); - } } diff --git a/trunk/kernel/irq/spurious.c b/trunk/kernel/irq/spurious.c index 3738107531fd..dd364c11e56e 100644 --- a/trunk/kernel/irq/spurious.c +++ b/trunk/kernel/irq/spurious.c @@ -91,9 +91,6 @@ static int misrouted_irq(int irq) int i, ok = 0; for_each_irq_desc(i, desc) { - if (!desc) - continue; - if (!i) continue; @@ -115,8 +112,6 @@ static void poll_spurious_irqs(unsigned long dummy) for_each_irq_desc(i, desc) { unsigned int status; - if (!desc) - continue; if (!i) continue; diff --git a/trunk/kernel/kthread.c b/trunk/kernel/kthread.c index 4fbc456f393d..8e7a7ce3ed0a 100644 --- a/trunk/kernel/kthread.c +++ b/trunk/kernel/kthread.c @@ -21,9 +21,6 @@ static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); struct task_struct *kthreadd_task; -DEFINE_TRACE(sched_kthread_stop); -DEFINE_TRACE(sched_kthread_stop_ret); - struct kthread_create_info { /* Information passed to kthread() from kthreadd. */ diff --git a/trunk/kernel/lockdep.c b/trunk/kernel/lockdep.c index c4c7df23f8c7..46a404173db2 100644 --- a/trunk/kernel/lockdep.c +++ b/trunk/kernel/lockdep.c @@ -25,7 +25,6 @@ * Thanks to Arjan van de Ven for coming up with the initial idea of * mapping lock dependencies runtime. */ -#define DISABLE_BRANCH_PROFILING #include #include #include @@ -137,16 +136,16 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock) #ifdef CONFIG_LOCK_STAT static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); -static int lock_point(unsigned long points[], unsigned long ip) +static int lock_contention_point(struct lock_class *class, unsigned long ip) { int i; - for (i = 0; i < LOCKSTAT_POINTS; i++) { - if (points[i] == 0) { - points[i] = ip; + for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { + if (class->contention_point[i] == 0) { + class->contention_point[i] = ip; break; } - if (points[i] == ip) + if (class->contention_point[i] == ip) break; } @@ -186,9 +185,6 @@ struct lock_class_stats lock_stats(struct lock_class *class) for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) stats.contention_point[i] += pcs->contention_point[i]; - for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++) - stats.contending_point[i] += pcs->contending_point[i]; - lock_time_add(&pcs->read_waittime, &stats.read_waittime); lock_time_add(&pcs->write_waittime, &stats.write_waittime); @@ -213,7 +209,6 @@ void clear_lock_stats(struct lock_class *class) memset(cpu_stats, 0, sizeof(struct lock_class_stats)); } memset(class->contention_point, 0, sizeof(class->contention_point)); - memset(class->contending_point, 0, sizeof(class->contending_point)); } static struct lock_class_stats *get_lock_stats(struct lock_class *class) @@ -3004,7 +2999,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) struct held_lock *hlock, *prev_hlock; struct lock_class_stats *stats; unsigned int depth; - int i, contention_point, contending_point; + int i, point; depth = curr->lockdep_depth; if (DEBUG_LOCKS_WARN_ON(!depth)) @@ -3028,22 +3023,18 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) found_it: hlock->waittime_stamp = sched_clock(); - contention_point = lock_point(hlock_class(hlock)->contention_point, ip); - contending_point = lock_point(hlock_class(hlock)->contending_point, - lock->ip); + point = lock_contention_point(hlock_class(hlock), ip); stats = get_lock_stats(hlock_class(hlock)); - if (contention_point < LOCKSTAT_POINTS) - stats->contention_point[contention_point]++; - if (contending_point < LOCKSTAT_POINTS) - stats->contending_point[contending_point]++; + if (point < ARRAY_SIZE(stats->contention_point)) + stats->contention_point[point]++; if (lock->cpu != smp_processor_id()) stats->bounces[bounce_contended + !!hlock->read]++; put_lock_stats(stats); } static void -__lock_acquired(struct lockdep_map *lock, unsigned long ip) +__lock_acquired(struct lockdep_map *lock) { struct task_struct *curr = current; struct held_lock *hlock, *prev_hlock; @@ -3092,7 +3083,6 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip) put_lock_stats(stats); lock->cpu = cpu; - lock->ip = ip; } void lock_contended(struct lockdep_map *lock, unsigned long ip) @@ -3114,7 +3104,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) } EXPORT_SYMBOL_GPL(lock_contended); -void lock_acquired(struct lockdep_map *lock, unsigned long ip) +void lock_acquired(struct lockdep_map *lock) { unsigned long flags; @@ -3127,7 +3117,7 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip) raw_local_irq_save(flags); check_flags(flags); current->lockdep_recursion = 1; - __lock_acquired(lock, ip); + __lock_acquired(lock); current->lockdep_recursion = 0; raw_local_irq_restore(flags); } diff --git a/trunk/kernel/lockdep_proc.c b/trunk/kernel/lockdep_proc.c index 13716b813896..20dbcbf9c7dd 100644 --- a/trunk/kernel/lockdep_proc.c +++ b/trunk/kernel/lockdep_proc.c @@ -470,12 +470,11 @@ static void seq_line(struct seq_file *m, char c, int offset, int length) static void snprint_time(char *buf, size_t bufsiz, s64 nr) { - s64 div; - s32 rem; + unsigned long rem; nr += 5; /* for display rounding */ - div = div_s64_rem(nr, 1000, &rem); - snprintf(buf, bufsiz, "%lld.%02d", (long long)div, (int)rem/10); + rem = do_div(nr, 1000); /* XXX: do_div_signed */ + snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, (int)rem/10); } static void seq_time(struct seq_file *m, s64 time) @@ -557,7 +556,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) if (stats->read_holdtime.nr) namelen += 2; - for (i = 0; i < LOCKSTAT_POINTS; i++) { + for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { char sym[KSYM_SYMBOL_LEN]; char ip[32]; @@ -574,23 +573,6 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) stats->contention_point[i], ip, sym); } - for (i = 0; i < LOCKSTAT_POINTS; i++) { - char sym[KSYM_SYMBOL_LEN]; - char ip[32]; - - if (class->contending_point[i] == 0) - break; - - if (!i) - seq_line(m, '-', 40-namelen, namelen); - - sprint_symbol(sym, class->contending_point[i]); - snprintf(ip, sizeof(ip), "[<%p>]", - (void *)class->contending_point[i]); - seq_printf(m, "%40s %14lu %29s %s\n", name, - stats->contending_point[i], - ip, sym); - } if (i) { seq_puts(m, "\n"); seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); @@ -600,7 +582,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) static void seq_header(struct seq_file *m) { - seq_printf(m, "lock_stat version 0.3\n"); + seq_printf(m, "lock_stat version 0.2\n"); seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " "%14s %14s\n", diff --git a/trunk/kernel/marker.c b/trunk/kernel/marker.c index ea54f2647868..e9c6b2bc9400 100644 --- a/trunk/kernel/marker.c +++ b/trunk/kernel/marker.c @@ -43,7 +43,6 @@ static DEFINE_MUTEX(markers_mutex); */ #define MARKER_HASH_BITS 6 #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) -static struct hlist_head marker_table[MARKER_TABLE_SIZE]; /* * Note about RCU : @@ -65,10 +64,11 @@ struct marker_entry { void *oldptr; int rcu_pending; unsigned char ptype:1; - unsigned char format_allocated:1; char name[0]; /* Contains name'\0'format'\0' */ }; +static struct hlist_head marker_table[MARKER_TABLE_SIZE]; + /** * __mark_empty_function - Empty probe callback * @probe_private: probe private data @@ -81,7 +81,7 @@ struct marker_entry { * though the function pointer change and the marker enabling are two distinct * operations that modifies the execution flow of preemptible code. */ -notrace void __mark_empty_function(void *probe_private, void *call_private, +void __mark_empty_function(void *probe_private, void *call_private, const char *fmt, va_list *args) { } @@ -97,8 +97,7 @@ EXPORT_SYMBOL_GPL(__mark_empty_function); * need to put a full smp_rmb() in this branch. This is why we do not use * rcu_dereference() for the pointer read. */ -notrace void marker_probe_cb(const struct marker *mdata, - void *call_private, ...) +void marker_probe_cb(const struct marker *mdata, void *call_private, ...) { va_list args; char ptype; @@ -108,7 +107,7 @@ notrace void marker_probe_cb(const struct marker *mdata, * sure the teardown of the callbacks can be done correctly when they * are in modules and they insure RCU read coherency. */ - rcu_read_lock_sched_notrace(); + rcu_read_lock_sched(); ptype = mdata->ptype; if (likely(!ptype)) { marker_probe_func *func; @@ -146,7 +145,7 @@ notrace void marker_probe_cb(const struct marker *mdata, va_end(args); } } - rcu_read_unlock_sched_notrace(); + rcu_read_unlock_sched(); } EXPORT_SYMBOL_GPL(marker_probe_cb); @@ -158,13 +157,12 @@ EXPORT_SYMBOL_GPL(marker_probe_cb); * * Should be connected to markers "MARK_NOARGS". */ -static notrace void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, ...) +void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) { va_list args; /* not initialized */ char ptype; - rcu_read_lock_sched_notrace(); + rcu_read_lock_sched(); ptype = mdata->ptype; if (likely(!ptype)) { marker_probe_func *func; @@ -197,8 +195,9 @@ static notrace void marker_probe_cb_noarg(const struct marker *mdata, multi[i].func(multi[i].probe_private, call_private, mdata->format, &args); } - rcu_read_unlock_sched_notrace(); + rcu_read_unlock_sched(); } +EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); static void free_old_closure(struct rcu_head *head) { @@ -417,7 +416,6 @@ static struct marker_entry *add_marker(const char *name, const char *format) e->single.probe_private = NULL; e->multi = NULL; e->ptype = 0; - e->format_allocated = 0; e->refcount = 0; e->rcu_pending = 0; hlist_add_head(&e->hlist, head); @@ -449,8 +447,6 @@ static int remove_marker(const char *name) if (e->single.func != __mark_empty_function) return -EBUSY; hlist_del(&e->hlist); - if (e->format_allocated) - kfree(e->format); /* Make sure the call_rcu has been executed */ if (e->rcu_pending) rcu_barrier_sched(); @@ -461,34 +457,57 @@ static int remove_marker(const char *name) /* * Set the mark_entry format to the format found in the element. */ -static int marker_set_format(struct marker_entry *entry, const char *format) +static int marker_set_format(struct marker_entry **entry, const char *format) { - entry->format = kstrdup(format, GFP_KERNEL); - if (!entry->format) - return -ENOMEM; - entry->format_allocated = 1; + struct marker_entry *e; + size_t name_len = strlen((*entry)->name) + 1; + size_t format_len = strlen(format) + 1; + + e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, + GFP_KERNEL); + if (!e) + return -ENOMEM; + memcpy(&e->name[0], (*entry)->name, name_len); + e->format = &e->name[name_len]; + memcpy(e->format, format, format_len); + if (strcmp(e->format, MARK_NOARGS) == 0) + e->call = marker_probe_cb_noarg; + else + e->call = marker_probe_cb; + e->single = (*entry)->single; + e->multi = (*entry)->multi; + e->ptype = (*entry)->ptype; + e->refcount = (*entry)->refcount; + e->rcu_pending = 0; + hlist_add_before(&e->hlist, &(*entry)->hlist); + hlist_del(&(*entry)->hlist); + /* Make sure the call_rcu has been executed */ + if ((*entry)->rcu_pending) + rcu_barrier_sched(); + kfree(*entry); + *entry = e; trace_mark(core_marker_format, "name %s format %s", - entry->name, entry->format); + e->name, e->format); return 0; } /* * Sets the probe callback corresponding to one marker. */ -static int set_marker(struct marker_entry *entry, struct marker *elem, +static int set_marker(struct marker_entry **entry, struct marker *elem, int active) { - int ret = 0; - WARN_ON(strcmp(entry->name, elem->name) != 0); + int ret; + WARN_ON(strcmp((*entry)->name, elem->name) != 0); - if (entry->format) { - if (strcmp(entry->format, elem->format) != 0) { + if ((*entry)->format) { + if (strcmp((*entry)->format, elem->format) != 0) { printk(KERN_NOTICE "Format mismatch for probe %s " "(%s), marker (%s)\n", - entry->name, - entry->format, + (*entry)->name, + (*entry)->format, elem->format); return -EPERM; } @@ -504,67 +523,37 @@ static int set_marker(struct marker_entry *entry, struct marker *elem, * pass from a "safe" callback (with argument) to an "unsafe" * callback (does not set arguments). */ - elem->call = entry->call; + elem->call = (*entry)->call; /* * Sanity check : * We only update the single probe private data when the ptr is * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) */ WARN_ON(elem->single.func != __mark_empty_function - && elem->single.probe_private != entry->single.probe_private - && !elem->ptype); - elem->single.probe_private = entry->single.probe_private; + && elem->single.probe_private + != (*entry)->single.probe_private && + !elem->ptype); + elem->single.probe_private = (*entry)->single.probe_private; /* * Make sure the private data is valid when we update the * single probe ptr. */ smp_wmb(); - elem->single.func = entry->single.func; + elem->single.func = (*entry)->single.func; /* * We also make sure that the new probe callbacks array is consistent * before setting a pointer to it. */ - rcu_assign_pointer(elem->multi, entry->multi); + rcu_assign_pointer(elem->multi, (*entry)->multi); /* * Update the function or multi probe array pointer before setting the * ptype. */ smp_wmb(); - elem->ptype = entry->ptype; - - if (elem->tp_name && (active ^ elem->state)) { - WARN_ON(!elem->tp_cb); - /* - * It is ok to directly call the probe registration because type - * checking has been done in the __trace_mark_tp() macro. - */ - - if (active) { - /* - * try_module_get should always succeed because we hold - * lock_module() to get the tp_cb address. - */ - ret = try_module_get(__module_text_address( - (unsigned long)elem->tp_cb)); - BUG_ON(!ret); - ret = tracepoint_probe_register_noupdate( - elem->tp_name, - elem->tp_cb); - } else { - ret = tracepoint_probe_unregister_noupdate( - elem->tp_name, - elem->tp_cb); - /* - * tracepoint_probe_update_all() must be called - * before the module containing tp_cb is unloaded. - */ - module_put(__module_text_address( - (unsigned long)elem->tp_cb)); - } - } + elem->ptype = (*entry)->ptype; elem->state = active; - return ret; + return 0; } /* @@ -575,24 +564,7 @@ static int set_marker(struct marker_entry *entry, struct marker *elem, */ static void disable_marker(struct marker *elem) { - int ret; - /* leave "call" as is. It is known statically. */ - if (elem->tp_name && elem->state) { - WARN_ON(!elem->tp_cb); - /* - * It is ok to directly call the probe registration because type - * checking has been done in the __trace_mark_tp() macro. - */ - ret = tracepoint_probe_unregister_noupdate(elem->tp_name, - elem->tp_cb); - WARN_ON(ret); - /* - * tracepoint_probe_update_all() must be called - * before the module containing tp_cb is unloaded. - */ - module_put(__module_text_address((unsigned long)elem->tp_cb)); - } elem->state = 0; elem->single.func = __mark_empty_function; /* Update the function before setting the ptype */ @@ -622,7 +594,8 @@ void marker_update_probe_range(struct marker *begin, for (iter = begin; iter < end; iter++) { mark_entry = get_marker(iter->name); if (mark_entry) { - set_marker(mark_entry, iter, !!mark_entry->refcount); + set_marker(&mark_entry, iter, + !!mark_entry->refcount); /* * ignore error, continue */ @@ -656,7 +629,6 @@ static void marker_update_probes(void) marker_update_probe_range(__start___markers, __stop___markers); /* Markers in modules. */ module_update_markers(); - tracepoint_probe_update_all(); } /** @@ -685,7 +657,7 @@ int marker_probe_register(const char *name, const char *format, ret = PTR_ERR(entry); } else if (format) { if (!entry->format) - ret = marker_set_format(entry, format); + ret = marker_set_format(&entry, format); else if (strcmp(entry->format, format)) ret = -EPERM; } @@ -704,11 +676,10 @@ int marker_probe_register(const char *name, const char *format, goto end; } mutex_unlock(&markers_mutex); - marker_update_probes(); + marker_update_probes(); /* may update entry */ mutex_lock(&markers_mutex); entry = get_marker(name); - if (!entry) - goto end; + WARN_ON(!entry); if (entry->rcu_pending) rcu_barrier_sched(); entry->oldptr = old; @@ -749,7 +720,7 @@ int marker_probe_unregister(const char *name, rcu_barrier_sched(); old = marker_entry_remove_probe(entry, probe, probe_private); mutex_unlock(&markers_mutex); - marker_update_probes(); + marker_update_probes(); /* may update entry */ mutex_lock(&markers_mutex); entry = get_marker(name); if (!entry) @@ -830,11 +801,10 @@ int marker_probe_unregister_private_data(marker_probe_func *probe, rcu_barrier_sched(); old = marker_entry_remove_probe(entry, NULL, probe_private); mutex_unlock(&markers_mutex); - marker_update_probes(); + marker_update_probes(); /* may update entry */ mutex_lock(&markers_mutex); entry = get_marker_from_private_data(probe, probe_private); - if (!entry) - goto end; + WARN_ON(!entry); if (entry->rcu_pending) rcu_barrier_sched(); entry->oldptr = old; @@ -878,6 +848,8 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe, if (!e->ptype) { if (num == 0 && e->single.func == probe) return e->single.probe_private; + else + break; } else { struct marker_probe_closure *closure; int match = 0; @@ -889,42 +861,8 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe, return closure[i].probe_private; } } - break; } } return ERR_PTR(-ENOENT); } EXPORT_SYMBOL_GPL(marker_get_private_data); - -#ifdef CONFIG_MODULES - -int marker_module_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct module *mod = data; - - switch (val) { - case MODULE_STATE_COMING: - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - break; - case MODULE_STATE_GOING: - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - break; - } - return 0; -} - -struct notifier_block marker_module_nb = { - .notifier_call = marker_module_notify, - .priority = 0, -}; - -static int init_markers(void) -{ - return register_module_notifier(&marker_module_nb); -} -__initcall(init_markers); - -#endif /* CONFIG_MODULES */ diff --git a/trunk/kernel/module.c b/trunk/kernel/module.c index dd2a54155b54..1f4cc00e0c20 100644 --- a/trunk/kernel/module.c +++ b/trunk/kernel/module.c @@ -2184,15 +2184,24 @@ static noinline struct module *load_module(void __user *umod, struct mod_debug *debug; unsigned int num_debug; +#ifdef CONFIG_MARKERS + marker_update_probe_range(mod->markers, + mod->markers + mod->num_markers); +#endif debug = section_objs(hdr, sechdrs, secstrings, "__verbose", sizeof(*debug), &num_debug); dynamic_printk_setup(debug, num_debug); + +#ifdef CONFIG_TRACEPOINTS + tracepoint_update_probe_range(mod->tracepoints, + mod->tracepoints + mod->num_tracepoints); +#endif } /* sechdrs[0].sh_size is always zero */ mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", sizeof(*mseg), &num_mcount); - ftrace_init_module(mod, mseg, mseg + num_mcount); + ftrace_init_module(mseg, mseg + num_mcount); err = module_finalize(hdr, sechdrs, mod); if (err < 0) @@ -2704,7 +2713,7 @@ int is_module_address(unsigned long addr) /* Is this a valid kernel address? */ -__notrace_funcgraph struct module *__module_text_address(unsigned long addr) +struct module *__module_text_address(unsigned long addr) { struct module *mod; diff --git a/trunk/kernel/mutex.c b/trunk/kernel/mutex.c index 4f45d4b658ef..12c779dc65d4 100644 --- a/trunk/kernel/mutex.c +++ b/trunk/kernel/mutex.c @@ -59,7 +59,7 @@ EXPORT_SYMBOL(__mutex_init); * We also put the fastpath first in the kernel image, to make sure the * branch is predicted by the CPU as default-untaken. */ -static __used noinline void __sched +static void noinline __sched __mutex_lock_slowpath(atomic_t *lock_count); /*** @@ -96,7 +96,7 @@ void inline __sched mutex_lock(struct mutex *lock) EXPORT_SYMBOL(mutex_lock); #endif -static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); +static noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); /*** * mutex_unlock - release the mutex @@ -184,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } done: - lock_acquired(&lock->dep_map, ip); + lock_acquired(&lock->dep_map); /* got the lock - rejoice! */ mutex_remove_waiter(lock, &waiter, task_thread_info(task)); debug_mutex_set_owner(lock, task_thread_info(task)); @@ -268,7 +268,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) /* * Release the lock, slowpath: */ -static __used noinline void +static noinline void __mutex_unlock_slowpath(atomic_t *lock_count) { __mutex_unlock_common_slowpath(lock_count, 1); @@ -313,7 +313,7 @@ int __sched mutex_lock_killable(struct mutex *lock) } EXPORT_SYMBOL(mutex_lock_killable); -static __used noinline void __sched +static noinline void __sched __mutex_lock_slowpath(atomic_t *lock_count) { struct mutex *lock = container_of(lock_count, struct mutex, count); diff --git a/trunk/kernel/notifier.c b/trunk/kernel/notifier.c index 61d5aa5eced3..4282c0a40a57 100644 --- a/trunk/kernel/notifier.c +++ b/trunk/kernel/notifier.c @@ -82,14 +82,6 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl, while (nb && nr_to_call) { next_nb = rcu_dereference(nb->next); - -#ifdef CONFIG_DEBUG_NOTIFIERS - if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) { - WARN(1, "Invalid notifier called!"); - nb = next_nb; - continue; - } -#endif ret = nb->notifier_call(nb, val, v); if (nr_calls) diff --git a/trunk/kernel/posix-cpu-timers.c b/trunk/kernel/posix-cpu-timers.c index 157de3a47832..4e5288a831de 100644 --- a/trunk/kernel/posix-cpu-timers.c +++ b/trunk/kernel/posix-cpu-timers.c @@ -58,21 +58,21 @@ void thread_group_cputime( struct task_struct *tsk, struct task_cputime *times) { - struct task_cputime *totals, *tot; + struct signal_struct *sig; int i; + struct task_cputime *tot; - totals = tsk->signal->cputime.totals; - if (!totals) { + sig = tsk->signal; + if (unlikely(!sig) || !sig->cputime.totals) { times->utime = tsk->utime; times->stime = tsk->stime; times->sum_exec_runtime = tsk->se.sum_exec_runtime; return; } - times->stime = times->utime = cputime_zero; times->sum_exec_runtime = 0; for_each_possible_cpu(i) { - tot = per_cpu_ptr(totals, i); + tot = per_cpu_ptr(tsk->signal->cputime.totals, i); times->utime = cputime_add(times->utime, tot->utime); times->stime = cputime_add(times->stime, tot->stime); times->sum_exec_runtime += tot->sum_exec_runtime; diff --git a/trunk/kernel/power/disk.c b/trunk/kernel/power/disk.c index f77d3819ef57..c9d74083746f 100644 --- a/trunk/kernel/power/disk.c +++ b/trunk/kernel/power/disk.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "power.h" @@ -256,7 +257,7 @@ static int create_image(int platform_mode) int hibernation_snapshot(int platform_mode) { - int error; + int error, ftrace_save; /* Free memory before shutting down devices. */ error = swsusp_shrink_memory(); @@ -268,6 +269,7 @@ int hibernation_snapshot(int platform_mode) goto Close; suspend_console(); + ftrace_save = __ftrace_enabled_save(); error = device_suspend(PMSG_FREEZE); if (error) goto Recover_platform; @@ -297,6 +299,7 @@ int hibernation_snapshot(int platform_mode) Resume_devices: device_resume(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); + __ftrace_enabled_restore(ftrace_save); resume_console(); Close: platform_end(platform_mode); @@ -367,10 +370,11 @@ static int resume_target_kernel(void) int hibernation_restore(int platform_mode) { - int error; + int error, ftrace_save; pm_prepare_console(); suspend_console(); + ftrace_save = __ftrace_enabled_save(); error = device_suspend(PMSG_QUIESCE); if (error) goto Finish; @@ -385,6 +389,7 @@ int hibernation_restore(int platform_mode) platform_restore_cleanup(platform_mode); device_resume(PMSG_RECOVER); Finish: + __ftrace_enabled_restore(ftrace_save); resume_console(); pm_restore_console(); return error; @@ -397,7 +402,7 @@ int hibernation_restore(int platform_mode) int hibernation_platform_enter(void) { - int error; + int error, ftrace_save; if (!hibernation_ops) return -ENOSYS; @@ -412,6 +417,7 @@ int hibernation_platform_enter(void) goto Close; suspend_console(); + ftrace_save = __ftrace_enabled_save(); error = device_suspend(PMSG_HIBERNATE); if (error) { if (hibernation_ops->recover) @@ -446,6 +452,7 @@ int hibernation_platform_enter(void) hibernation_ops->finish(); Resume_devices: device_resume(PMSG_RESTORE); + __ftrace_enabled_restore(ftrace_save); resume_console(); Close: hibernation_ops->end(); diff --git a/trunk/kernel/power/main.c b/trunk/kernel/power/main.c index 613f16941b85..b8f7ce9473e8 100644 --- a/trunk/kernel/power/main.c +++ b/trunk/kernel/power/main.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "power.h" @@ -316,7 +317,7 @@ static int suspend_enter(suspend_state_t state) */ int suspend_devices_and_enter(suspend_state_t state) { - int error; + int error, ftrace_save; if (!suspend_ops) return -ENOSYS; @@ -327,6 +328,7 @@ int suspend_devices_and_enter(suspend_state_t state) goto Close; } suspend_console(); + ftrace_save = __ftrace_enabled_save(); suspend_test_start(); error = device_suspend(PMSG_SUSPEND); if (error) { @@ -358,6 +360,7 @@ int suspend_devices_and_enter(suspend_state_t state) suspend_test_start(); device_resume(PMSG_RESUME); suspend_test_finish("resume devices"); + __ftrace_enabled_restore(ftrace_save); resume_console(); Close: if (suspend_ops->end) diff --git a/trunk/kernel/profile.c b/trunk/kernel/profile.c index 4cb7d68fed82..7d620dfdde59 100644 --- a/trunk/kernel/profile.c +++ b/trunk/kernel/profile.c @@ -544,7 +544,7 @@ static const struct file_operations proc_profile_operations = { }; #ifdef CONFIG_SMP -static void profile_nop(void *unused) +static inline void profile_nop(void *unused) { } diff --git a/trunk/kernel/rcuclassic.c b/trunk/kernel/rcuclassic.c index c03ca3e61919..37f72e551542 100644 --- a/trunk/kernel/rcuclassic.c +++ b/trunk/kernel/rcuclassic.c @@ -191,7 +191,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp) /* OK, time to rat on our buddy... */ - printk(KERN_ERR "INFO: RCU detected CPU stalls:"); + printk(KERN_ERR "RCU detected CPU stalls:"); for_each_possible_cpu(cpu) { if (cpu_isset(cpu, rcp->cpumask)) printk(" %d", cpu); @@ -204,7 +204,7 @@ static void print_cpu_stall(struct rcu_ctrlblk *rcp) { unsigned long flags; - printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n", + printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n", smp_processor_id(), jiffies, jiffies - rcp->gp_start); dump_stack(); @@ -393,7 +393,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp) * unnecessarily. */ smp_mb(); - cpumask_andnot(&rcp->cpumask, cpu_online_mask, nohz_cpu_mask); + cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask); rcp->signaled = 0; } diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c index 756d981d91a4..d2d16d1273b1 100644 --- a/trunk/kernel/sched.c +++ b/trunk/kernel/sched.c @@ -118,12 +118,6 @@ */ #define RUNTIME_INF ((u64)~0ULL) -DEFINE_TRACE(sched_wait_task); -DEFINE_TRACE(sched_wakeup); -DEFINE_TRACE(sched_wakeup_new); -DEFINE_TRACE(sched_switch); -DEFINE_TRACE(sched_migrate_task); - #ifdef CONFIG_SMP /* * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) @@ -267,10 +261,6 @@ struct task_group { struct cgroup_subsys_state css; #endif -#ifdef CONFIG_USER_SCHED - uid_t uid; -#endif - #ifdef CONFIG_FAIR_GROUP_SCHED /* schedulable entities of this group on each cpu */ struct sched_entity **se; @@ -296,12 +286,6 @@ struct task_group { #ifdef CONFIG_USER_SCHED -/* Helper function to pass uid information to create_sched_user() */ -void set_tg_uid(struct user_struct *user) -{ - user->tg->uid = user->uid; -} - /* * Root task group. * Every UID task group (including init_task_group aka UID-0) will @@ -497,26 +481,18 @@ struct rt_rq { */ struct root_domain { atomic_t refcount; - cpumask_var_t span; - cpumask_var_t online; + cpumask_t span; + cpumask_t online; /* * The "RT overload" flag: it gets set if a CPU has more than * one runnable RT task. */ - cpumask_var_t rto_mask; + cpumask_t rto_mask; atomic_t rto_count; #ifdef CONFIG_SMP struct cpupri cpupri; #endif -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) - /* - * Preferred wake up cpu nominated by sched_mc balance that will be - * used when most cpus are idle in the system indicating overall very - * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2) - */ - unsigned int sched_mc_preferred_wakeup_cpu; -#endif }; /* @@ -727,18 +703,45 @@ static __read_mostly char *sched_feat_names[] = { #undef SCHED_FEAT -static int sched_feat_show(struct seq_file *m, void *v) +static int sched_feat_open(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +static ssize_t +sched_feat_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) { + char *buf; + int r = 0; + int len = 0; int i; for (i = 0; sched_feat_names[i]; i++) { - if (!(sysctl_sched_features & (1UL << i))) - seq_puts(m, "NO_"); - seq_printf(m, "%s ", sched_feat_names[i]); + len += strlen(sched_feat_names[i]); + len += 4; } - seq_puts(m, "\n"); - return 0; + buf = kmalloc(len + 2, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (i = 0; sched_feat_names[i]; i++) { + if (sysctl_sched_features & (1UL << i)) + r += sprintf(buf + r, "%s ", sched_feat_names[i]); + else + r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]); + } + + r += sprintf(buf + r, "\n"); + WARN_ON(r >= len + 2); + + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); + + kfree(buf); + + return r; } static ssize_t @@ -783,17 +786,10 @@ sched_feat_write(struct file *filp, const char __user *ubuf, return cnt; } -static int sched_feat_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, sched_feat_show, NULL); -} - static struct file_operations sched_feat_fops = { - .open = sched_feat_open, - .write = sched_feat_write, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, + .open = sched_feat_open, + .read = sched_feat_read, + .write = sched_feat_write, }; static __init int sched_init_debug(void) @@ -1478,13 +1474,27 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu, unsigned long sd_shares, unsigned long sd_rq_weight) { + int boost = 0; unsigned long shares; unsigned long rq_weight; if (!tg->se[cpu]) return; - rq_weight = tg->cfs_rq[cpu]->rq_weight; + rq_weight = tg->cfs_rq[cpu]->load.weight; + + /* + * If there are currently no tasks on the cpu pretend there is one of + * average load so that when a new task gets to run here it will not + * get delayed by group starvation. + */ + if (!rq_weight) { + boost = 1; + rq_weight = NICE_0_LOAD; + } + + if (unlikely(rq_weight > sd_rq_weight)) + rq_weight = sd_rq_weight; /* * \Sum shares * rq_weight @@ -1492,7 +1502,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu, * \Sum rq_weight * */ - shares = (sd_shares * rq_weight) / sd_rq_weight; + shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); if (abs(shares - tg->se[cpu]->load.weight) > @@ -1501,7 +1511,11 @@ update_group_shares_cpu(struct task_group *tg, int cpu, unsigned long flags; spin_lock_irqsave(&rq->lock, flags); - tg->cfs_rq[cpu]->shares = shares; + /* + * record the actual number of shares, not the boosted amount. + */ + tg->cfs_rq[cpu]->shares = boost ? 0 : shares; + tg->cfs_rq[cpu]->rq_weight = rq_weight; __set_se_shares(tg->se[cpu], shares); spin_unlock_irqrestore(&rq->lock, flags); @@ -1515,23 +1529,13 @@ update_group_shares_cpu(struct task_group *tg, int cpu, */ static int tg_shares_up(struct task_group *tg, void *data) { - unsigned long weight, rq_weight = 0; + unsigned long rq_weight = 0; unsigned long shares = 0; struct sched_domain *sd = data; int i; - for_each_cpu(i, sched_domain_span(sd)) { - /* - * If there are currently no tasks on the cpu pretend there - * is one of average load so that when a new task gets to - * run here it will not get delayed by group starvation. - */ - weight = tg->cfs_rq[i]->load.weight; - if (!weight) - weight = NICE_0_LOAD; - - tg->cfs_rq[i]->rq_weight = weight; - rq_weight += weight; + for_each_cpu_mask(i, sd->span) { + rq_weight += tg->cfs_rq[i]->load.weight; shares += tg->cfs_rq[i]->shares; } @@ -1541,7 +1545,10 @@ static int tg_shares_up(struct task_group *tg, void *data) if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) shares = tg->shares; - for_each_cpu(i, sched_domain_span(sd)) + if (!rq_weight) + rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; + + for_each_cpu_mask(i, sd->span) update_group_shares_cpu(tg, i, shares, rq_weight); return 0; @@ -1605,39 +1612,6 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) #endif -/* - * double_lock_balance - lock the busiest runqueue, this_rq is locked already. - */ -static int double_lock_balance(struct rq *this_rq, struct rq *busiest) - __releases(this_rq->lock) - __acquires(busiest->lock) - __acquires(this_rq->lock) -{ - int ret = 0; - - if (unlikely(!irqs_disabled())) { - /* printk() doesn't work good under rq->lock */ - spin_unlock(&this_rq->lock); - BUG_ON(1); - } - if (unlikely(!spin_trylock(&busiest->lock))) { - if (busiest < this_rq) { - spin_unlock(&this_rq->lock); - spin_lock(&busiest->lock); - spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); - ret = 1; - } else - spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); - } - return ret; -} - -static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) - __releases(busiest->lock) -{ - spin_unlock(&busiest->lock); - lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); -} #endif #ifdef CONFIG_FAIR_GROUP_SCHED @@ -2105,17 +2079,15 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) int i; /* Skip over this group if it has no CPUs allowed */ - if (!cpumask_intersects(sched_group_cpus(group), - &p->cpus_allowed)) + if (!cpus_intersects(group->cpumask, p->cpus_allowed)) continue; - local_group = cpumask_test_cpu(this_cpu, - sched_group_cpus(group)); + local_group = cpu_isset(this_cpu, group->cpumask); /* Tally up the load of all CPUs in the group */ avg_load = 0; - for_each_cpu(i, sched_group_cpus(group)) { + for_each_cpu_mask_nr(i, group->cpumask) { /* Bias balancing toward cpus of our domain */ if (local_group) load = source_load(i, load_idx); @@ -2147,14 +2119,17 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) * find_idlest_cpu - find the idlest cpu among the cpus in group. */ static int -find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) +find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu, + cpumask_t *tmp) { unsigned long load, min_load = ULONG_MAX; int idlest = -1; int i; /* Traverse only the allowed CPUs */ - for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { + cpus_and(*tmp, group->cpumask, p->cpus_allowed); + + for_each_cpu_mask_nr(i, *tmp) { load = weighted_cpuload(i); if (load < min_load || (load == min_load && i == this_cpu)) { @@ -2196,6 +2171,7 @@ static int sched_balance_self(int cpu, int flag) update_shares(sd); while (sd) { + cpumask_t span, tmpmask; struct sched_group *group; int new_cpu, weight; @@ -2204,13 +2180,14 @@ static int sched_balance_self(int cpu, int flag) continue; } + span = sd->span; group = find_idlest_group(sd, t, cpu); if (!group) { sd = sd->child; continue; } - new_cpu = find_idlest_cpu(group, t, cpu); + new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask); if (new_cpu == -1 || new_cpu == cpu) { /* Now try balancing at a lower domain level of cpu */ sd = sd->child; @@ -2219,10 +2196,10 @@ static int sched_balance_self(int cpu, int flag) /* Now try balancing at a lower domain level of new_cpu */ cpu = new_cpu; - weight = cpumask_weight(sched_domain_span(sd)); sd = NULL; + weight = cpus_weight(span); for_each_domain(cpu, tmp) { - if (weight <= cpumask_weight(sched_domain_span(tmp))) + if (weight <= cpus_weight(tmp->span)) break; if (tmp->flags & flag) sd = tmp; @@ -2267,7 +2244,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) cpu = task_cpu(p); for_each_domain(this_cpu, sd) { - if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { + if (cpu_isset(cpu, sd->span)) { update_shares(sd); break; } @@ -2315,7 +2292,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) else { struct sched_domain *sd; for_each_domain(this_cpu, sd) { - if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { + if (cpu_isset(cpu, sd->span)) { schedstat_inc(sd, ttwu_wake_remote); break; } @@ -2834,6 +2811,40 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) __release(rq2->lock); } +/* + * double_lock_balance - lock the busiest runqueue, this_rq is locked already. + */ +static int double_lock_balance(struct rq *this_rq, struct rq *busiest) + __releases(this_rq->lock) + __acquires(busiest->lock) + __acquires(this_rq->lock) +{ + int ret = 0; + + if (unlikely(!irqs_disabled())) { + /* printk() doesn't work good under rq->lock */ + spin_unlock(&this_rq->lock); + BUG_ON(1); + } + if (unlikely(!spin_trylock(&busiest->lock))) { + if (busiest < this_rq) { + spin_unlock(&this_rq->lock); + spin_lock(&busiest->lock); + spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); + ret = 1; + } else + spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); + } + return ret; +} + +static void double_unlock_balance(struct rq *this_rq, struct rq *busiest) + __releases(busiest->lock) +{ + spin_unlock(&busiest->lock); + lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); +} + /* * If dest_cpu is allowed for this process, migrate the task to it. * This is accomplished by forcing the cpu_allowed mask to only @@ -2847,7 +2858,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) struct rq *rq; rq = task_rq_lock(p, &flags); - if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) + if (!cpu_isset(dest_cpu, p->cpus_allowed) || unlikely(!cpu_active(dest_cpu))) goto out; @@ -2913,7 +2924,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, * 2) cannot be migrated to this CPU due to cpus_allowed, or * 3) are cache-hot on their current CPU. */ - if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) { + if (!cpu_isset(this_cpu, p->cpus_allowed)) { schedstat_inc(p, se.nr_failed_migrations_affine); return 0; } @@ -3088,7 +3099,7 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, static struct sched_group * find_busiest_group(struct sched_domain *sd, int this_cpu, unsigned long *imbalance, enum cpu_idle_type idle, - int *sd_idle, const struct cpumask *cpus, int *balance) + int *sd_idle, const cpumask_t *cpus, int *balance) { struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; unsigned long max_load, avg_load, total_load, this_load, total_pwr; @@ -3124,11 +3135,10 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, unsigned long sum_avg_load_per_task; unsigned long avg_load_per_task; - local_group = cpumask_test_cpu(this_cpu, - sched_group_cpus(group)); + local_group = cpu_isset(this_cpu, group->cpumask); if (local_group) - balance_cpu = cpumask_first(sched_group_cpus(group)); + balance_cpu = first_cpu(group->cpumask); /* Tally up the load of all CPUs in the group */ sum_weighted_load = sum_nr_running = avg_load = 0; @@ -3137,8 +3147,13 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, max_cpu_load = 0; min_cpu_load = ~0UL; - for_each_cpu_and(i, sched_group_cpus(group), cpus) { - struct rq *rq = cpu_rq(i); + for_each_cpu_mask_nr(i, group->cpumask) { + struct rq *rq; + + if (!cpu_isset(i, *cpus)) + continue; + + rq = cpu_rq(i); if (*sd_idle && rq->nr_running) *sd_idle = 0; @@ -3249,8 +3264,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, */ if ((sum_nr_running < min_nr_running) || (sum_nr_running == min_nr_running && - cpumask_first(sched_group_cpus(group)) > - cpumask_first(sched_group_cpus(group_min)))) { + first_cpu(group->cpumask) < + first_cpu(group_min->cpumask))) { group_min = group; min_nr_running = sum_nr_running; min_load_per_task = sum_weighted_load / @@ -3265,8 +3280,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, if (sum_nr_running <= group_capacity - 1) { if (sum_nr_running > leader_nr_running || (sum_nr_running == leader_nr_running && - cpumask_first(sched_group_cpus(group)) < - cpumask_first(sched_group_cpus(group_leader)))) { + first_cpu(group->cpumask) > + first_cpu(group_leader->cpumask))) { group_leader = group; leader_nr_running = sum_nr_running; } @@ -3392,10 +3407,6 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, if (this == group_leader && group_leader != group_min) { *imbalance = min_load_per_task; - if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) { - cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu = - cpumask_first(sched_group_cpus(group_leader)); - } return group_min; } #endif @@ -3409,16 +3420,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, */ static struct rq * find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, - unsigned long imbalance, const struct cpumask *cpus) + unsigned long imbalance, const cpumask_t *cpus) { struct rq *busiest = NULL, *rq; unsigned long max_load = 0; int i; - for_each_cpu(i, sched_group_cpus(group)) { + for_each_cpu_mask_nr(i, group->cpumask) { unsigned long wl; - if (!cpumask_test_cpu(i, cpus)) + if (!cpu_isset(i, *cpus)) continue; rq = cpu_rq(i); @@ -3448,7 +3459,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, */ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_domain *sd, enum cpu_idle_type idle, - int *balance, struct cpumask *cpus) + int *balance, cpumask_t *cpus) { int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; struct sched_group *group; @@ -3456,7 +3467,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct rq *busiest; unsigned long flags; - cpumask_setall(cpus); + cpus_setall(*cpus); /* * When power savings policy is enabled for the parent domain, idle @@ -3516,8 +3527,8 @@ static int load_balance(int this_cpu, struct rq *this_rq, /* All tasks on this runqueue were pinned by CPU affinity */ if (unlikely(all_pinned)) { - cpumask_clear_cpu(cpu_of(busiest), cpus); - if (!cpumask_empty(cpus)) + cpu_clear(cpu_of(busiest), *cpus); + if (!cpus_empty(*cpus)) goto redo; goto out_balanced; } @@ -3534,8 +3545,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, /* don't kick the migration_thread, if the curr * task on busiest cpu can't be moved to this_cpu */ - if (!cpumask_test_cpu(this_cpu, - &busiest->curr->cpus_allowed)) { + if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { spin_unlock_irqrestore(&busiest->lock, flags); all_pinned = 1; goto out_one_pinned; @@ -3610,7 +3620,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, */ static int load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, - struct cpumask *cpus) + cpumask_t *cpus) { struct sched_group *group; struct rq *busiest = NULL; @@ -3619,7 +3629,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, int sd_idle = 0; int all_pinned = 0; - cpumask_setall(cpus); + cpus_setall(*cpus); /* * When power savings policy is enabled for the parent domain, idle @@ -3663,71 +3673,17 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, double_unlock_balance(this_rq, busiest); if (unlikely(all_pinned)) { - cpumask_clear_cpu(cpu_of(busiest), cpus); - if (!cpumask_empty(cpus)) + cpu_clear(cpu_of(busiest), *cpus); + if (!cpus_empty(*cpus)) goto redo; } } if (!ld_moved) { - int active_balance = 0; - schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) return -1; - - if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP) - return -1; - - if (sd->nr_balance_failed++ < 2) - return -1; - - /* - * The only task running in a non-idle cpu can be moved to this - * cpu in an attempt to completely freeup the other CPU - * package. The same method used to move task in load_balance() - * have been extended for load_balance_newidle() to speedup - * consolidation at sched_mc=POWERSAVINGS_BALANCE_WAKEUP (2) - * - * The package power saving logic comes from - * find_busiest_group(). If there are no imbalance, then - * f_b_g() will return NULL. However when sched_mc={1,2} then - * f_b_g() will select a group from which a running task may be - * pulled to this cpu in order to make the other package idle. - * If there is no opportunity to make a package idle and if - * there are no imbalance, then f_b_g() will return NULL and no - * action will be taken in load_balance_newidle(). - * - * Under normal task pull operation due to imbalance, there - * will be more than one task in the source run queue and - * move_tasks() will succeed. ld_moved will be true and this - * active balance code will not be triggered. - */ - - /* Lock busiest in correct order while this_rq is held */ - double_lock_balance(this_rq, busiest); - - /* - * don't kick the migration_thread, if the curr - * task on busiest cpu can't be moved to this_cpu - */ - if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { - double_unlock_balance(this_rq, busiest); - all_pinned = 1; - return ld_moved; - } - - if (!busiest->active_balance) { - busiest->active_balance = 1; - busiest->push_cpu = this_cpu; - active_balance = 1; - } - - double_unlock_balance(this_rq, busiest); - if (active_balance) - wake_up_process(busiest->migration_thread); - } else sd->nr_balance_failed = 0; @@ -3751,12 +3707,9 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, static void idle_balance(int this_cpu, struct rq *this_rq) { struct sched_domain *sd; - int pulled_task = 0; + int pulled_task = -1; unsigned long next_balance = jiffies + HZ; - cpumask_var_t tmpmask; - - if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC)) - return; + cpumask_t tmpmask; for_each_domain(this_cpu, sd) { unsigned long interval; @@ -3767,7 +3720,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq) if (sd->flags & SD_BALANCE_NEWIDLE) /* If we've pulled tasks over stop searching: */ pulled_task = load_balance_newidle(this_cpu, this_rq, - sd, tmpmask); + sd, &tmpmask); interval = msecs_to_jiffies(sd->balance_interval); if (time_after(next_balance, sd->last_balance + interval)) @@ -3782,7 +3735,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq) */ this_rq->next_balance = next_balance; } - free_cpumask_var(tmpmask); } /* @@ -3820,7 +3772,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) /* Search for an sd spanning us and the target CPU. */ for_each_domain(target_cpu, sd) { if ((sd->flags & SD_LOAD_BALANCE) && - cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) + cpu_isset(busiest_cpu, sd->span)) break; } @@ -3839,9 +3791,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) #ifdef CONFIG_NO_HZ static struct { atomic_t load_balancer; - cpumask_var_t cpu_mask; + cpumask_t cpu_mask; } nohz ____cacheline_aligned = { .load_balancer = ATOMIC_INIT(-1), + .cpu_mask = CPU_MASK_NONE, }; /* @@ -3869,7 +3822,7 @@ int select_nohz_load_balancer(int stop_tick) int cpu = smp_processor_id(); if (stop_tick) { - cpumask_set_cpu(cpu, nohz.cpu_mask); + cpu_set(cpu, nohz.cpu_mask); cpu_rq(cpu)->in_nohz_recently = 1; /* @@ -3883,7 +3836,7 @@ int select_nohz_load_balancer(int stop_tick) } /* time for ilb owner also to sleep */ - if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { + if (cpus_weight(nohz.cpu_mask) == num_online_cpus()) { if (atomic_read(&nohz.load_balancer) == cpu) atomic_set(&nohz.load_balancer, -1); return 0; @@ -3896,10 +3849,10 @@ int select_nohz_load_balancer(int stop_tick) } else if (atomic_read(&nohz.load_balancer) == cpu) return 1; } else { - if (!cpumask_test_cpu(cpu, nohz.cpu_mask)) + if (!cpu_isset(cpu, nohz.cpu_mask)) return 0; - cpumask_clear_cpu(cpu, nohz.cpu_mask); + cpu_clear(cpu, nohz.cpu_mask); if (atomic_read(&nohz.load_balancer) == cpu) if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) @@ -3927,11 +3880,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) unsigned long next_balance = jiffies + 60*HZ; int update_next_balance = 0; int need_serialize; - cpumask_var_t tmp; - - /* Fails alloc? Rebalancing probably not a priority right now. */ - if (!alloc_cpumask_var(&tmp, GFP_ATOMIC)) - return; + cpumask_t tmp; for_each_domain(cpu, sd) { if (!(sd->flags & SD_LOAD_BALANCE)) @@ -3956,7 +3905,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) } if (time_after_eq(jiffies, sd->last_balance + interval)) { - if (load_balance(cpu, rq, sd, idle, &balance, tmp)) { + if (load_balance(cpu, rq, sd, idle, &balance, &tmp)) { /* * We've pulled tasks over so either we're no * longer idle, or one of our SMT siblings is @@ -3990,8 +3939,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) */ if (likely(update_next_balance)) rq->next_balance = next_balance; - - free_cpumask_var(tmp); } /* @@ -4016,13 +3963,12 @@ static void run_rebalance_domains(struct softirq_action *h) */ if (this_rq->idle_at_tick && atomic_read(&nohz.load_balancer) == this_cpu) { + cpumask_t cpus = nohz.cpu_mask; struct rq *rq; int balance_cpu; - for_each_cpu(balance_cpu, nohz.cpu_mask) { - if (balance_cpu == this_cpu) - continue; - + cpu_clear(this_cpu, cpus); + for_each_cpu_mask_nr(balance_cpu, cpus) { /* * If this cpu gets work to do, stop the load balancing * work being done for other cpus. Next load @@ -4060,7 +4006,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) rq->in_nohz_recently = 0; if (atomic_read(&nohz.load_balancer) == cpu) { - cpumask_clear_cpu(cpu, nohz.cpu_mask); + cpu_clear(cpu, nohz.cpu_mask); atomic_set(&nohz.load_balancer, -1); } @@ -4073,7 +4019,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) * TBD: Traverse the sched domains and nominate * the nearest cpu in the nohz.cpu_mask. */ - int ilb = cpumask_first(nohz.cpu_mask); + int ilb = first_cpu(nohz.cpu_mask); if (ilb < nr_cpu_ids) resched_cpu(ilb); @@ -4085,7 +4031,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) * cpus with ticks stopped, is it time for that to stop? */ if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu && - cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { + cpus_weight(nohz.cpu_mask) == num_online_cpus()) { resched_cpu(cpu); return; } @@ -4095,7 +4041,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) * someone else, then no need raise the SCHED_SOFTIRQ */ if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu && - cpumask_test_cpu(cpu, nohz.cpu_mask)) + cpu_isset(cpu, nohz.cpu_mask)) return; #endif if (time_after_eq(jiffies, rq->next_balance)) @@ -4257,6 +4203,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal) if (p == rq->idle) { p->stime = cputime_add(p->stime, steal); + account_group_system_time(p, steal); if (atomic_read(&rq->nr_iowait) > 0) cpustat->iowait = cputime64_add(cpustat->iowait, tmp); else @@ -4392,7 +4339,7 @@ void __kprobes sub_preempt_count(int val) /* * Underflow? */ - if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) + if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) return; /* * Is the spinlock portion underflowing? @@ -5453,9 +5400,10 @@ asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) return retval; } -long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) +long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) { - cpumask_var_t cpus_allowed, new_mask; + cpumask_t cpus_allowed; + cpumask_t new_mask = *in_mask; struct task_struct *p; int retval; @@ -5477,14 +5425,6 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) get_task_struct(p); read_unlock(&tasklist_lock); - if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { - retval = -ENOMEM; - goto out_put_task; - } - if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { - retval = -ENOMEM; - goto out_free_cpus_allowed; - } retval = -EPERM; if ((current->euid != p->euid) && (current->euid != p->uid) && !capable(CAP_SYS_NICE)) @@ -5494,41 +5434,37 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) if (retval) goto out_unlock; - cpuset_cpus_allowed(p, cpus_allowed); - cpumask_and(new_mask, in_mask, cpus_allowed); + cpuset_cpus_allowed(p, &cpus_allowed); + cpus_and(new_mask, new_mask, cpus_allowed); again: - retval = set_cpus_allowed_ptr(p, new_mask); + retval = set_cpus_allowed_ptr(p, &new_mask); if (!retval) { - cpuset_cpus_allowed(p, cpus_allowed); - if (!cpumask_subset(new_mask, cpus_allowed)) { + cpuset_cpus_allowed(p, &cpus_allowed); + if (!cpus_subset(new_mask, cpus_allowed)) { /* * We must have raced with a concurrent cpuset * update. Just reset the cpus_allowed to the * cpuset's cpus_allowed */ - cpumask_copy(new_mask, cpus_allowed); + new_mask = cpus_allowed; goto again; } } out_unlock: - free_cpumask_var(new_mask); -out_free_cpus_allowed: - free_cpumask_var(cpus_allowed); -out_put_task: put_task_struct(p); put_online_cpus(); return retval; } static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, - struct cpumask *new_mask) + cpumask_t *new_mask) { - if (len < cpumask_size()) - cpumask_clear(new_mask); - else if (len > cpumask_size()) - len = cpumask_size(); - + if (len < sizeof(cpumask_t)) { + memset(new_mask, 0, sizeof(cpumask_t)); + } else if (len > sizeof(cpumask_t)) { + len = sizeof(cpumask_t); + } return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; } @@ -5541,20 +5477,17 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr) { - cpumask_var_t new_mask; + cpumask_t new_mask; int retval; - if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) - return -ENOMEM; + retval = get_user_cpu_mask(user_mask_ptr, len, &new_mask); + if (retval) + return retval; - retval = get_user_cpu_mask(user_mask_ptr, len, new_mask); - if (retval == 0) - retval = sched_setaffinity(pid, new_mask); - free_cpumask_var(new_mask); - return retval; + return sched_setaffinity(pid, &new_mask); } -long sched_getaffinity(pid_t pid, struct cpumask *mask) +long sched_getaffinity(pid_t pid, cpumask_t *mask) { struct task_struct *p; int retval; @@ -5571,7 +5504,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) if (retval) goto out_unlock; - cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); + cpus_and(*mask, p->cpus_allowed, cpu_online_map); out_unlock: read_unlock(&tasklist_lock); @@ -5590,24 +5523,19 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr) { int ret; - cpumask_var_t mask; + cpumask_t mask; - if (len < cpumask_size()) + if (len < sizeof(cpumask_t)) return -EINVAL; - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; + ret = sched_getaffinity(pid, &mask); + if (ret < 0) + return ret; - ret = sched_getaffinity(pid, mask); - if (ret == 0) { - if (copy_to_user(user_mask_ptr, mask, cpumask_size())) - ret = -EFAULT; - else - ret = cpumask_size(); - } - free_cpumask_var(mask); + if (copy_to_user(user_mask_ptr, &mask, sizeof(cpumask_t))) + return -EFAULT; - return ret; + return sizeof(cpumask_t); } /** @@ -5949,7 +5877,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) idle->se.exec_start = sched_clock(); idle->prio = idle->normal_prio = MAX_PRIO; - cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); + idle->cpus_allowed = cpumask_of_cpu(cpu); __set_task_cpu(idle, cpu); rq->curr = rq->idle = idle; @@ -5968,7 +5896,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * The idle tasks have their own, simple scheduling class: */ idle->sched_class = &idle_sched_class; - ftrace_graph_init_task(idle); } /* @@ -5976,9 +5903,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * indicates which cpus entered this state. This is used * in the rcu update to wait only for active cpus. For system * which do not switch off the HZ timer nohz_cpu_mask should - * always be CPU_BITS_NONE. + * always be CPU_MASK_NONE. */ -cpumask_var_t nohz_cpu_mask; +cpumask_t nohz_cpu_mask = CPU_MASK_NONE; /* * Increase the granularity value when there are more CPUs, @@ -6033,7 +5960,7 @@ static inline void sched_init_granularity(void) * task must not exit() & deallocate itself prematurely. The * call is not atomic; no spinlocks may be held. */ -int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) +int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask) { struct migration_req req; unsigned long flags; @@ -6041,13 +5968,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) int ret = 0; rq = task_rq_lock(p, &flags); - if (!cpumask_intersects(new_mask, cpu_online_mask)) { + if (!cpus_intersects(*new_mask, cpu_online_map)) { ret = -EINVAL; goto out; } if (unlikely((p->flags & PF_THREAD_BOUND) && p != current && - !cpumask_equal(&p->cpus_allowed, new_mask))) { + !cpus_equal(p->cpus_allowed, *new_mask))) { ret = -EINVAL; goto out; } @@ -6055,15 +5982,15 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) if (p->sched_class->set_cpus_allowed) p->sched_class->set_cpus_allowed(p, new_mask); else { - cpumask_copy(&p->cpus_allowed, new_mask); - p->rt.nr_cpus_allowed = cpumask_weight(new_mask); + p->cpus_allowed = *new_mask; + p->rt.nr_cpus_allowed = cpus_weight(*new_mask); } /* Can the task run on the task's current CPU? If so, we're done */ - if (cpumask_test_cpu(task_cpu(p), new_mask)) + if (cpu_isset(task_cpu(p), *new_mask)) goto out; - if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { + if (migrate_task(p, any_online_cpu(*new_mask), &req)) { /* Need help from migration thread: drop lock and wait. */ task_rq_unlock(rq, &flags); wake_up_process(rq->migration_thread); @@ -6105,7 +6032,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) if (task_cpu(p) != src_cpu) goto done; /* Affinity changed (again). */ - if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) + if (!cpu_isset(dest_cpu, p->cpus_allowed)) goto fail; on_rq = p->se.on_rq; @@ -6199,46 +6126,54 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu) /* * Figure out where task on dead CPU should go, use force if necessary. + * NOTE: interrupts should be disabled by the caller */ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) { + unsigned long flags; + cpumask_t mask; + struct rq *rq; int dest_cpu; - /* FIXME: Use cpumask_of_node here. */ - cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu)); - const struct cpumask *nodemask = &_nodemask; - -again: - /* Look for allowed, online CPU in same node. */ - for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask) - if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) - goto move; - - /* Any allowed, online CPU? */ - dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask); - if (dest_cpu < nr_cpu_ids) - goto move; - - /* No more Mr. Nice Guy. */ - if (dest_cpu >= nr_cpu_ids) { - cpuset_cpus_allowed_locked(p, &p->cpus_allowed); - dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed); - /* - * Don't tell them about moving exiting tasks or - * kernel threads (both mm NULL), since they never - * leave kernel. - */ - if (p->mm && printk_ratelimit()) { - printk(KERN_INFO "process %d (%s) no " - "longer affine to cpu%d\n", - task_pid_nr(p), p->comm, dead_cpu); - } - } + do { + /* On same node? */ + mask = node_to_cpumask(cpu_to_node(dead_cpu)); + cpus_and(mask, mask, p->cpus_allowed); + dest_cpu = any_online_cpu(mask); + + /* On any allowed CPU? */ + if (dest_cpu >= nr_cpu_ids) + dest_cpu = any_online_cpu(p->cpus_allowed); + + /* No more Mr. Nice Guy. */ + if (dest_cpu >= nr_cpu_ids) { + cpumask_t cpus_allowed; -move: - /* It can have affinity changed while we were choosing. */ - if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) - goto again; + cpuset_cpus_allowed_locked(p, &cpus_allowed); + /* + * Try to stay on the same cpuset, where the + * current cpuset may be a subset of all cpus. + * The cpuset_cpus_allowed_locked() variant of + * cpuset_cpus_allowed() will not block. It must be + * called within calls to cpuset_lock/cpuset_unlock. + */ + rq = task_rq_lock(p, &flags); + p->cpus_allowed = cpus_allowed; + dest_cpu = any_online_cpu(p->cpus_allowed); + task_rq_unlock(rq, &flags); + + /* + * Don't tell them about moving exiting tasks or + * kernel threads (both mm NULL), since they never + * leave kernel. + */ + if (p->mm && printk_ratelimit()) { + printk(KERN_INFO "process %d (%s) no " + "longer affine to cpu%d\n", + task_pid_nr(p), p->comm, dead_cpu); + } + } + } while (!__migrate_task_irq(p, dead_cpu, dest_cpu)); } /* @@ -6250,7 +6185,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) */ static void migrate_nr_uninterruptible(struct rq *rq_src) { - struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask)); + struct rq *rq_dest = cpu_rq(any_online_cpu(*CPU_MASK_ALL_PTR)); unsigned long flags; local_irq_save(flags); @@ -6540,7 +6475,7 @@ static void set_rq_online(struct rq *rq) if (!rq->online) { const struct sched_class *class; - cpumask_set_cpu(rq->cpu, rq->rd->online); + cpu_set(rq->cpu, rq->rd->online); rq->online = 1; for_each_class(class) { @@ -6560,7 +6495,7 @@ static void set_rq_offline(struct rq *rq) class->rq_offline(rq); } - cpumask_clear_cpu(rq->cpu, rq->rd->online); + cpu_clear(rq->cpu, rq->rd->online); rq->online = 0; } } @@ -6601,7 +6536,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) rq = cpu_rq(cpu); spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + BUG_ON(!cpu_isset(cpu, rq->rd->span)); set_rq_online(rq); } @@ -6615,7 +6550,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) break; /* Unbind it from offline cpu so it can run. Fall thru. */ kthread_bind(cpu_rq(cpu)->migration_thread, - cpumask_any(cpu_online_mask)); + any_online_cpu(cpu_online_map)); kthread_stop(cpu_rq(cpu)->migration_thread); cpu_rq(cpu)->migration_thread = NULL; break; @@ -6665,7 +6600,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) rq = cpu_rq(cpu); spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + BUG_ON(!cpu_isset(cpu, rq->rd->span)); set_rq_offline(rq); } spin_unlock_irqrestore(&rq->lock, flags); @@ -6703,14 +6638,36 @@ early_initcall(migration_init); #ifdef CONFIG_SCHED_DEBUG +static inline const char *sd_level_to_string(enum sched_domain_level lvl) +{ + switch (lvl) { + case SD_LV_NONE: + return "NONE"; + case SD_LV_SIBLING: + return "SIBLING"; + case SD_LV_MC: + return "MC"; + case SD_LV_CPU: + return "CPU"; + case SD_LV_NODE: + return "NODE"; + case SD_LV_ALLNODES: + return "ALLNODES"; + case SD_LV_MAX: + return "MAX"; + + } + return "MAX"; +} + static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, - struct cpumask *groupmask) + cpumask_t *groupmask) { struct sched_group *group = sd->groups; char str[256]; - cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd)); - cpumask_clear(groupmask); + cpulist_scnprintf(str, sizeof(str), &sd->span); + cpus_clear(*groupmask); printk(KERN_DEBUG "%*s domain %d: ", level, "", level); @@ -6722,13 +6679,14 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, return -1; } - printk(KERN_CONT "span %s level %s\n", str, sd->name); + printk(KERN_CONT "span %s level %s\n", + str, sd_level_to_string(sd->level)); - if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) { + if (!cpu_isset(cpu, sd->span)) { printk(KERN_ERR "ERROR: domain->span does not contain " "CPU%d\n", cpu); } - if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) { + if (!cpu_isset(cpu, group->cpumask)) { printk(KERN_ERR "ERROR: domain->groups does not contain" " CPU%d\n", cpu); } @@ -6748,32 +6706,31 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, break; } - if (!cpumask_weight(sched_group_cpus(group))) { + if (!cpus_weight(group->cpumask)) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: empty group\n"); break; } - if (cpumask_intersects(groupmask, sched_group_cpus(group))) { + if (cpus_intersects(*groupmask, group->cpumask)) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: repeated CPUs\n"); break; } - cpumask_or(groupmask, groupmask, sched_group_cpus(group)); + cpus_or(*groupmask, *groupmask, group->cpumask); - cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); + cpulist_scnprintf(str, sizeof(str), &group->cpumask); printk(KERN_CONT " %s", str); group = group->next; } while (group != sd->groups); printk(KERN_CONT "\n"); - if (!cpumask_equal(sched_domain_span(sd), groupmask)) + if (!cpus_equal(sd->span, *groupmask)) printk(KERN_ERR "ERROR: groups don't span domain->span\n"); - if (sd->parent && - !cpumask_subset(groupmask, sched_domain_span(sd->parent))) + if (sd->parent && !cpus_subset(*groupmask, sd->parent->span)) printk(KERN_ERR "ERROR: parent span is not a superset " "of domain->span\n"); return 0; @@ -6781,7 +6738,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, static void sched_domain_debug(struct sched_domain *sd, int cpu) { - cpumask_var_t groupmask; + cpumask_t *groupmask; int level = 0; if (!sd) { @@ -6791,7 +6748,8 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); - if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) { + groupmask = kmalloc(sizeof(cpumask_t), GFP_KERNEL); + if (!groupmask) { printk(KERN_DEBUG "Cannot load-balance (out of memory)\n"); return; } @@ -6804,7 +6762,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) if (!sd) break; } - free_cpumask_var(groupmask); + kfree(groupmask); } #else /* !CONFIG_SCHED_DEBUG */ # define sched_domain_debug(sd, cpu) do { } while (0) @@ -6812,7 +6770,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) static int sd_degenerate(struct sched_domain *sd) { - if (cpumask_weight(sched_domain_span(sd)) == 1) + if (cpus_weight(sd->span) == 1) return 1; /* Following flags need at least 2 groups */ @@ -6843,7 +6801,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) if (sd_degenerate(parent)) return 1; - if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) + if (!cpus_equal(sd->span, parent->span)) return 0; /* Does parent contain flags not in child? */ @@ -6858,8 +6816,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) SD_BALANCE_EXEC | SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES); - if (nr_node_ids == 1) - pflags &= ~SD_SERIALIZE; } if (~cflags & pflags) return 0; @@ -6867,16 +6823,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) return 1; } -static void free_rootdomain(struct root_domain *rd) -{ - cpupri_cleanup(&rd->cpupri); - - free_cpumask_var(rd->rto_mask); - free_cpumask_var(rd->online); - free_cpumask_var(rd->span); - kfree(rd); -} - static void rq_attach_root(struct rq *rq, struct root_domain *rd) { unsigned long flags; @@ -6886,63 +6832,38 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) if (rq->rd) { struct root_domain *old_rd = rq->rd; - if (cpumask_test_cpu(rq->cpu, old_rd->online)) + if (cpu_isset(rq->cpu, old_rd->online)) set_rq_offline(rq); - cpumask_clear_cpu(rq->cpu, old_rd->span); + cpu_clear(rq->cpu, old_rd->span); if (atomic_dec_and_test(&old_rd->refcount)) - free_rootdomain(old_rd); + kfree(old_rd); } atomic_inc(&rd->refcount); rq->rd = rd; - cpumask_set_cpu(rq->cpu, rd->span); - if (cpumask_test_cpu(rq->cpu, cpu_online_mask)) + cpu_set(rq->cpu, rd->span); + if (cpu_isset(rq->cpu, cpu_online_map)) set_rq_online(rq); spin_unlock_irqrestore(&rq->lock, flags); } -static int init_rootdomain(struct root_domain *rd, bool bootmem) +static void init_rootdomain(struct root_domain *rd) { memset(rd, 0, sizeof(*rd)); - if (bootmem) { - alloc_bootmem_cpumask_var(&def_root_domain.span); - alloc_bootmem_cpumask_var(&def_root_domain.online); - alloc_bootmem_cpumask_var(&def_root_domain.rto_mask); - cpupri_init(&rd->cpupri, true); - return 0; - } + cpus_clear(rd->span); + cpus_clear(rd->online); - if (!alloc_cpumask_var(&rd->span, GFP_KERNEL)) - goto free_rd; - if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) - goto free_span; - if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) - goto free_online; - - if (cpupri_init(&rd->cpupri, false) != 0) - goto free_rto_mask; - return 0; - -free_rto_mask: - free_cpumask_var(rd->rto_mask); -free_online: - free_cpumask_var(rd->online); -free_span: - free_cpumask_var(rd->span); -free_rd: - kfree(rd); - return -ENOMEM; + cpupri_init(&rd->cpupri); } static void init_defrootdomain(void) { - init_rootdomain(&def_root_domain, true); - + init_rootdomain(&def_root_domain); atomic_set(&def_root_domain.refcount, 1); } @@ -6954,10 +6875,7 @@ static struct root_domain *alloc_rootdomain(void) if (!rd) return NULL; - if (init_rootdomain(rd, false) != 0) { - kfree(rd); - return NULL; - } + init_rootdomain(rd); return rd; } @@ -6999,12 +6917,19 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) } /* cpus with isolated domains */ -static cpumask_var_t cpu_isolated_map; +static cpumask_t cpu_isolated_map = CPU_MASK_NONE; /* Setup the mask of cpus configured for isolated domains */ static int __init isolated_cpu_setup(char *str) { - cpulist_parse(str, cpu_isolated_map); + static int __initdata ints[NR_CPUS]; + int i; + + str = get_options(str, ARRAY_SIZE(ints), ints); + cpus_clear(cpu_isolated_map); + for (i = 1; i <= ints[0]; i++) + if (ints[i] < NR_CPUS) + cpu_set(ints[i], cpu_isolated_map); return 1; } @@ -7013,43 +6938,42 @@ __setup("isolcpus=", isolated_cpu_setup); /* * init_sched_build_groups takes the cpumask we wish to span, and a pointer * to a function which identifies what group(along with sched group) a CPU - * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids - * (due to the fact that we keep track of groups covered with a struct cpumask). + * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS + * (due to the fact that we keep track of groups covered with a cpumask_t). * * init_sched_build_groups will build a circular linked list of the groups * covered by the given span, and will set each group's ->cpumask correctly, * and ->cpu_power to 0. */ static void -init_sched_build_groups(const struct cpumask *span, - const struct cpumask *cpu_map, - int (*group_fn)(int cpu, const struct cpumask *cpu_map, +init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map, + int (*group_fn)(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, - struct cpumask *tmpmask), - struct cpumask *covered, struct cpumask *tmpmask) + cpumask_t *tmpmask), + cpumask_t *covered, cpumask_t *tmpmask) { struct sched_group *first = NULL, *last = NULL; int i; - cpumask_clear(covered); + cpus_clear(*covered); - for_each_cpu(i, span) { + for_each_cpu_mask_nr(i, *span) { struct sched_group *sg; int group = group_fn(i, cpu_map, &sg, tmpmask); int j; - if (cpumask_test_cpu(i, covered)) + if (cpu_isset(i, *covered)) continue; - cpumask_clear(sched_group_cpus(sg)); + cpus_clear(sg->cpumask); sg->__cpu_power = 0; - for_each_cpu(j, span) { + for_each_cpu_mask_nr(j, *span) { if (group_fn(j, cpu_map, NULL, tmpmask) != group) continue; - cpumask_set_cpu(j, covered); - cpumask_set_cpu(j, sched_group_cpus(sg)); + cpu_set(j, *covered); + cpu_set(j, sg->cpumask); } if (!first) first = sg; @@ -7113,10 +7037,9 @@ static int find_next_best_node(int node, nodemask_t *used_nodes) * should be one that prevents unnecessary balancing, but also spreads tasks * out optimally. */ -static void sched_domain_node_span(int node, struct cpumask *span) +static void sched_domain_node_span(int node, cpumask_t *span) { nodemask_t used_nodes; - /* FIXME: use cpumask_of_node() */ node_to_cpumask_ptr(nodemask, node); int i; @@ -7137,34 +7060,19 @@ static void sched_domain_node_span(int node, struct cpumask *span) int sched_smt_power_savings = 0, sched_mc_power_savings = 0; -/* - * The cpus mask in sched_group and sched_domain hangs off the end. - * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space - * for nr_cpu_ids < CONFIG_NR_CPUS. - */ -struct static_sched_group { - struct sched_group sg; - DECLARE_BITMAP(cpus, CONFIG_NR_CPUS); -}; - -struct static_sched_domain { - struct sched_domain sd; - DECLARE_BITMAP(span, CONFIG_NR_CPUS); -}; - /* * SMT sched-domains: */ #ifdef CONFIG_SCHED_SMT -static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); -static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus); +static DEFINE_PER_CPU(struct sched_domain, cpu_domains); +static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); static int -cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, - struct sched_group **sg, struct cpumask *unused) +cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, + cpumask_t *unused) { if (sg) - *sg = &per_cpu(sched_group_cpus, cpu).sg; + *sg = &per_cpu(sched_group_cpus, cpu); return cpu; } #endif /* CONFIG_SCHED_SMT */ @@ -7173,55 +7081,56 @@ cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, * multi-core sched-domains: */ #ifdef CONFIG_SCHED_MC -static DEFINE_PER_CPU(struct static_sched_domain, core_domains); -static DEFINE_PER_CPU(struct static_sched_group, sched_group_core); +static DEFINE_PER_CPU(struct sched_domain, core_domains); +static DEFINE_PER_CPU(struct sched_group, sched_group_core); #endif /* CONFIG_SCHED_MC */ #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) static int -cpu_to_core_group(int cpu, const struct cpumask *cpu_map, - struct sched_group **sg, struct cpumask *mask) +cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, + cpumask_t *mask) { int group; - cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map); - group = cpumask_first(mask); + *mask = per_cpu(cpu_sibling_map, cpu); + cpus_and(*mask, *mask, *cpu_map); + group = first_cpu(*mask); if (sg) - *sg = &per_cpu(sched_group_core, group).sg; + *sg = &per_cpu(sched_group_core, group); return group; } #elif defined(CONFIG_SCHED_MC) static int -cpu_to_core_group(int cpu, const struct cpumask *cpu_map, - struct sched_group **sg, struct cpumask *unused) +cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, + cpumask_t *unused) { if (sg) - *sg = &per_cpu(sched_group_core, cpu).sg; + *sg = &per_cpu(sched_group_core, cpu); return cpu; } #endif -static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); -static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +static DEFINE_PER_CPU(struct sched_group, sched_group_phys); static int -cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, - struct sched_group **sg, struct cpumask *mask) +cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, + cpumask_t *mask) { int group; #ifdef CONFIG_SCHED_MC - /* FIXME: Use cpu_coregroup_mask. */ *mask = cpu_coregroup_map(cpu); cpus_and(*mask, *mask, *cpu_map); - group = cpumask_first(mask); + group = first_cpu(*mask); #elif defined(CONFIG_SCHED_SMT) - cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map); - group = cpumask_first(mask); + *mask = per_cpu(cpu_sibling_map, cpu); + cpus_and(*mask, *mask, *cpu_map); + group = first_cpu(*mask); #else group = cpu; #endif if (sg) - *sg = &per_cpu(sched_group_phys, group).sg; + *sg = &per_cpu(sched_group_phys, group); return group; } @@ -7235,21 +7144,19 @@ static DEFINE_PER_CPU(struct sched_domain, node_domains); static struct sched_group ***sched_group_nodes_bycpu; static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); -static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); +static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes); -static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, - struct sched_group **sg, - struct cpumask *nodemask) +static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map, + struct sched_group **sg, cpumask_t *nodemask) { int group; - /* FIXME: use cpumask_of_node */ - node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu)); - cpumask_and(nodemask, pnodemask, cpu_map); - group = cpumask_first(nodemask); + *nodemask = node_to_cpumask(cpu_to_node(cpu)); + cpus_and(*nodemask, *nodemask, *cpu_map); + group = first_cpu(*nodemask); if (sg) - *sg = &per_cpu(sched_group_allnodes, group).sg; + *sg = &per_cpu(sched_group_allnodes, group); return group; } @@ -7261,11 +7168,11 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) if (!sg) return; do { - for_each_cpu(j, sched_group_cpus(sg)) { + for_each_cpu_mask_nr(j, sg->cpumask) { struct sched_domain *sd; - sd = &per_cpu(phys_domains, j).sd; - if (j != cpumask_first(sched_group_cpus(sd->groups))) { + sd = &per_cpu(phys_domains, j); + if (j != first_cpu(sd->groups->cpumask)) { /* * Only add "power" once for each * physical package. @@ -7282,12 +7189,11 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) #ifdef CONFIG_NUMA /* Free memory allocated for various sched_group structures */ -static void free_sched_groups(const struct cpumask *cpu_map, - struct cpumask *nodemask) +static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) { int cpu, i; - for_each_cpu(cpu, cpu_map) { + for_each_cpu_mask_nr(cpu, *cpu_map) { struct sched_group **sched_group_nodes = sched_group_nodes_bycpu[cpu]; @@ -7296,11 +7202,10 @@ static void free_sched_groups(const struct cpumask *cpu_map, for (i = 0; i < nr_node_ids; i++) { struct sched_group *oldsg, *sg = sched_group_nodes[i]; - /* FIXME: Use cpumask_of_node */ - node_to_cpumask_ptr(pnodemask, i); - cpus_and(*nodemask, *pnodemask, *cpu_map); - if (cpumask_empty(nodemask)) + *nodemask = node_to_cpumask(i); + cpus_and(*nodemask, *nodemask, *cpu_map); + if (cpus_empty(*nodemask)) continue; if (sg == NULL) @@ -7318,8 +7223,7 @@ static void free_sched_groups(const struct cpumask *cpu_map, } } #else /* !CONFIG_NUMA */ -static void free_sched_groups(const struct cpumask *cpu_map, - struct cpumask *nodemask) +static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) { } #endif /* CONFIG_NUMA */ @@ -7345,7 +7249,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) WARN_ON(!sd || !sd->groups); - if (cpu != cpumask_first(sched_group_cpus(sd->groups))) + if (cpu != first_cpu(sd->groups->cpumask)) return; child = sd->child; @@ -7410,6 +7314,40 @@ SD_INIT_FUNC(CPU) SD_INIT_FUNC(MC) #endif +/* + * To minimize stack usage kmalloc room for cpumasks and share the + * space as the usage in build_sched_domains() dictates. Used only + * if the amount of space is significant. + */ +struct allmasks { + cpumask_t tmpmask; /* make this one first */ + union { + cpumask_t nodemask; + cpumask_t this_sibling_map; + cpumask_t this_core_map; + }; + cpumask_t send_covered; + +#ifdef CONFIG_NUMA + cpumask_t domainspan; + cpumask_t covered; + cpumask_t notcovered; +#endif +}; + +#if NR_CPUS > 128 +#define SCHED_CPUMASK_ALLOC 1 +#define SCHED_CPUMASK_FREE(v) kfree(v) +#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v +#else +#define SCHED_CPUMASK_ALLOC 0 +#define SCHED_CPUMASK_FREE(v) +#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v +#endif + +#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ + ((unsigned long)(a) + offsetof(struct allmasks, v)) + static int default_relax_domain_level = -1; static int __init setup_relax_domain_level(char *str) @@ -7449,38 +7387,17 @@ static void set_domain_attribute(struct sched_domain *sd, * Build sched domains for a given set of cpus and attach the sched domains * to the individual cpus */ -static int __build_sched_domains(const struct cpumask *cpu_map, +static int __build_sched_domains(const cpumask_t *cpu_map, struct sched_domain_attr *attr) { - int i, err = -ENOMEM; + int i; struct root_domain *rd; - cpumask_var_t nodemask, this_sibling_map, this_core_map, send_covered, - tmpmask; + SCHED_CPUMASK_DECLARE(allmasks); + cpumask_t *tmpmask; #ifdef CONFIG_NUMA - cpumask_var_t domainspan, covered, notcovered; struct sched_group **sched_group_nodes = NULL; int sd_allnodes = 0; - if (!alloc_cpumask_var(&domainspan, GFP_KERNEL)) - goto out; - if (!alloc_cpumask_var(&covered, GFP_KERNEL)) - goto free_domainspan; - if (!alloc_cpumask_var(¬covered, GFP_KERNEL)) - goto free_covered; -#endif - - if (!alloc_cpumask_var(&nodemask, GFP_KERNEL)) - goto free_notcovered; - if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL)) - goto free_nodemask; - if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL)) - goto free_this_sibling_map; - if (!alloc_cpumask_var(&send_covered, GFP_KERNEL)) - goto free_this_core_map; - if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) - goto free_send_covered; - -#ifdef CONFIG_NUMA /* * Allocate the per-node list of sched groups */ @@ -7488,37 +7405,55 @@ static int __build_sched_domains(const struct cpumask *cpu_map, GFP_KERNEL); if (!sched_group_nodes) { printk(KERN_WARNING "Can not alloc sched group node list\n"); - goto free_tmpmask; + return -ENOMEM; } #endif rd = alloc_rootdomain(); if (!rd) { printk(KERN_WARNING "Cannot alloc root domain\n"); - goto free_sched_groups; +#ifdef CONFIG_NUMA + kfree(sched_group_nodes); +#endif + return -ENOMEM; + } + +#if SCHED_CPUMASK_ALLOC + /* get space for all scratch cpumask variables */ + allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL); + if (!allmasks) { + printk(KERN_WARNING "Cannot alloc cpumask array\n"); + kfree(rd); +#ifdef CONFIG_NUMA + kfree(sched_group_nodes); +#endif + return -ENOMEM; } +#endif + tmpmask = (cpumask_t *)allmasks; + #ifdef CONFIG_NUMA - sched_group_nodes_bycpu[cpumask_first(cpu_map)] = sched_group_nodes; + sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes; #endif /* * Set up domains for cpus specified by the cpu_map. */ - for_each_cpu(i, cpu_map) { + for_each_cpu_mask_nr(i, *cpu_map) { struct sched_domain *sd = NULL, *p; + SCHED_CPUMASK_VAR(nodemask, allmasks); - /* FIXME: use cpumask_of_node */ *nodemask = node_to_cpumask(cpu_to_node(i)); cpus_and(*nodemask, *nodemask, *cpu_map); #ifdef CONFIG_NUMA - if (cpumask_weight(cpu_map) > - SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) { + if (cpus_weight(*cpu_map) > + SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) { sd = &per_cpu(allnodes_domains, i); SD_INIT(sd, ALLNODES); set_domain_attribute(sd, attr); - cpumask_copy(sched_domain_span(sd), cpu_map); + sd->span = *cpu_map; cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); p = sd; sd_allnodes = 1; @@ -7528,19 +7463,18 @@ static int __build_sched_domains(const struct cpumask *cpu_map, sd = &per_cpu(node_domains, i); SD_INIT(sd, NODE); set_domain_attribute(sd, attr); - sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); + sched_domain_node_span(cpu_to_node(i), &sd->span); sd->parent = p; if (p) p->child = sd; - cpumask_and(sched_domain_span(sd), - sched_domain_span(sd), cpu_map); + cpus_and(sd->span, sd->span, *cpu_map); #endif p = sd; - sd = &per_cpu(phys_domains, i).sd; + sd = &per_cpu(phys_domains, i); SD_INIT(sd, CPU); set_domain_attribute(sd, attr); - cpumask_copy(sched_domain_span(sd), nodemask); + sd->span = *nodemask; sd->parent = p; if (p) p->child = sd; @@ -7548,12 +7482,11 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_SCHED_MC p = sd; - sd = &per_cpu(core_domains, i).sd; + sd = &per_cpu(core_domains, i); SD_INIT(sd, MC); set_domain_attribute(sd, attr); - *sched_domain_span(sd) = cpu_coregroup_map(i); - cpumask_and(sched_domain_span(sd), - sched_domain_span(sd), cpu_map); + sd->span = cpu_coregroup_map(i); + cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; p->child = sd; cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask); @@ -7561,11 +7494,11 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_SCHED_SMT p = sd; - sd = &per_cpu(cpu_domains, i).sd; + sd = &per_cpu(cpu_domains, i); SD_INIT(sd, SIBLING); set_domain_attribute(sd, attr); - cpumask_and(sched_domain_span(sd), - &per_cpu(cpu_sibling_map, i), cpu_map); + sd->span = per_cpu(cpu_sibling_map, i); + cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; p->child = sd; cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask); @@ -7574,10 +7507,13 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_SCHED_SMT /* Set up CPU (sibling) groups */ - for_each_cpu(i, cpu_map) { - cpumask_and(this_sibling_map, - &per_cpu(cpu_sibling_map, i), cpu_map); - if (i != cpumask_first(this_sibling_map)) + for_each_cpu_mask_nr(i, *cpu_map) { + SCHED_CPUMASK_VAR(this_sibling_map, allmasks); + SCHED_CPUMASK_VAR(send_covered, allmasks); + + *this_sibling_map = per_cpu(cpu_sibling_map, i); + cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map); + if (i != first_cpu(*this_sibling_map)) continue; init_sched_build_groups(this_sibling_map, cpu_map, @@ -7588,11 +7524,13 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_SCHED_MC /* Set up multi-core groups */ - for_each_cpu(i, cpu_map) { - /* FIXME: Use cpu_coregroup_mask */ + for_each_cpu_mask_nr(i, *cpu_map) { + SCHED_CPUMASK_VAR(this_core_map, allmasks); + SCHED_CPUMASK_VAR(send_covered, allmasks); + *this_core_map = cpu_coregroup_map(i); cpus_and(*this_core_map, *this_core_map, *cpu_map); - if (i != cpumask_first(this_core_map)) + if (i != first_cpu(*this_core_map)) continue; init_sched_build_groups(this_core_map, cpu_map, @@ -7603,10 +7541,12 @@ static int __build_sched_domains(const struct cpumask *cpu_map, /* Set up physical groups */ for (i = 0; i < nr_node_ids; i++) { - /* FIXME: Use cpumask_of_node */ + SCHED_CPUMASK_VAR(nodemask, allmasks); + SCHED_CPUMASK_VAR(send_covered, allmasks); + *nodemask = node_to_cpumask(i); cpus_and(*nodemask, *nodemask, *cpu_map); - if (cpumask_empty(nodemask)) + if (cpus_empty(*nodemask)) continue; init_sched_build_groups(nodemask, cpu_map, @@ -7617,6 +7557,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_NUMA /* Set up node groups */ if (sd_allnodes) { + SCHED_CPUMASK_VAR(send_covered, allmasks); + init_sched_build_groups(cpu_map, cpu_map, &cpu_to_allnodes_group, send_covered, tmpmask); @@ -7625,58 +7567,58 @@ static int __build_sched_domains(const struct cpumask *cpu_map, for (i = 0; i < nr_node_ids; i++) { /* Set up node groups */ struct sched_group *sg, *prev; + SCHED_CPUMASK_VAR(nodemask, allmasks); + SCHED_CPUMASK_VAR(domainspan, allmasks); + SCHED_CPUMASK_VAR(covered, allmasks); int j; - /* FIXME: Use cpumask_of_node */ *nodemask = node_to_cpumask(i); - cpumask_clear(covered); + cpus_clear(*covered); cpus_and(*nodemask, *nodemask, *cpu_map); - if (cpumask_empty(nodemask)) { + if (cpus_empty(*nodemask)) { sched_group_nodes[i] = NULL; continue; } sched_domain_node_span(i, domainspan); - cpumask_and(domainspan, domainspan, cpu_map); + cpus_and(*domainspan, *domainspan, *cpu_map); - sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(), - GFP_KERNEL, i); + sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i); if (!sg) { printk(KERN_WARNING "Can not alloc domain group for " "node %d\n", i); goto error; } sched_group_nodes[i] = sg; - for_each_cpu(j, nodemask) { + for_each_cpu_mask_nr(j, *nodemask) { struct sched_domain *sd; sd = &per_cpu(node_domains, j); sd->groups = sg; } sg->__cpu_power = 0; - cpumask_copy(sched_group_cpus(sg), nodemask); + sg->cpumask = *nodemask; sg->next = sg; - cpumask_or(covered, covered, nodemask); + cpus_or(*covered, *covered, *nodemask); prev = sg; for (j = 0; j < nr_node_ids; j++) { + SCHED_CPUMASK_VAR(notcovered, allmasks); int n = (i + j) % nr_node_ids; - /* FIXME: Use cpumask_of_node */ node_to_cpumask_ptr(pnodemask, n); - cpumask_complement(notcovered, covered); - cpumask_and(tmpmask, notcovered, cpu_map); - cpumask_and(tmpmask, tmpmask, domainspan); - if (cpumask_empty(tmpmask)) + cpus_complement(*notcovered, *covered); + cpus_and(*tmpmask, *notcovered, *cpu_map); + cpus_and(*tmpmask, *tmpmask, *domainspan); + if (cpus_empty(*tmpmask)) break; - cpumask_and(tmpmask, tmpmask, pnodemask); - if (cpumask_empty(tmpmask)) + cpus_and(*tmpmask, *tmpmask, *pnodemask); + if (cpus_empty(*tmpmask)) continue; - sg = kmalloc_node(sizeof(struct sched_group) + - cpumask_size(), + sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i); if (!sg) { printk(KERN_WARNING @@ -7684,9 +7626,9 @@ static int __build_sched_domains(const struct cpumask *cpu_map, goto error; } sg->__cpu_power = 0; - cpumask_copy(sched_group_cpus(sg), tmpmask); + sg->cpumask = *tmpmask; sg->next = prev->next; - cpumask_or(covered, covered, tmpmask); + cpus_or(*covered, *covered, *tmpmask); prev->next = sg; prev = sg; } @@ -7695,22 +7637,22 @@ static int __build_sched_domains(const struct cpumask *cpu_map, /* Calculate CPU power for physical packages and nodes */ #ifdef CONFIG_SCHED_SMT - for_each_cpu(i, cpu_map) { - struct sched_domain *sd = &per_cpu(cpu_domains, i).sd; + for_each_cpu_mask_nr(i, *cpu_map) { + struct sched_domain *sd = &per_cpu(cpu_domains, i); init_sched_groups_power(i, sd); } #endif #ifdef CONFIG_SCHED_MC - for_each_cpu(i, cpu_map) { - struct sched_domain *sd = &per_cpu(core_domains, i).sd; + for_each_cpu_mask_nr(i, *cpu_map) { + struct sched_domain *sd = &per_cpu(core_domains, i); init_sched_groups_power(i, sd); } #endif - for_each_cpu(i, cpu_map) { - struct sched_domain *sd = &per_cpu(phys_domains, i).sd; + for_each_cpu_mask_nr(i, *cpu_map) { + struct sched_domain *sd = &per_cpu(phys_domains, i); init_sched_groups_power(i, sd); } @@ -7722,87 +7664,56 @@ static int __build_sched_domains(const struct cpumask *cpu_map, if (sd_allnodes) { struct sched_group *sg; - cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg, + cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg, tmpmask); init_numa_sched_groups_power(sg); } #endif /* Attach the domains */ - for_each_cpu(i, cpu_map) { + for_each_cpu_mask_nr(i, *cpu_map) { struct sched_domain *sd; #ifdef CONFIG_SCHED_SMT - sd = &per_cpu(cpu_domains, i).sd; + sd = &per_cpu(cpu_domains, i); #elif defined(CONFIG_SCHED_MC) - sd = &per_cpu(core_domains, i).sd; + sd = &per_cpu(core_domains, i); #else - sd = &per_cpu(phys_domains, i).sd; + sd = &per_cpu(phys_domains, i); #endif cpu_attach_domain(sd, rd, i); } - err = 0; - -free_tmpmask: - free_cpumask_var(tmpmask); -free_send_covered: - free_cpumask_var(send_covered); -free_this_core_map: - free_cpumask_var(this_core_map); -free_this_sibling_map: - free_cpumask_var(this_sibling_map); -free_nodemask: - free_cpumask_var(nodemask); -free_notcovered: -#ifdef CONFIG_NUMA - free_cpumask_var(notcovered); -free_covered: - free_cpumask_var(covered); -free_domainspan: - free_cpumask_var(domainspan); -out: -#endif - return err; - -free_sched_groups: -#ifdef CONFIG_NUMA - kfree(sched_group_nodes); -#endif - goto free_tmpmask; + SCHED_CPUMASK_FREE((void *)allmasks); + return 0; #ifdef CONFIG_NUMA error: free_sched_groups(cpu_map, tmpmask); - free_rootdomain(rd); - goto free_tmpmask; + SCHED_CPUMASK_FREE((void *)allmasks); + kfree(rd); + return -ENOMEM; #endif } -static int build_sched_domains(const struct cpumask *cpu_map) +static int build_sched_domains(const cpumask_t *cpu_map) { return __build_sched_domains(cpu_map, NULL); } -static struct cpumask *doms_cur; /* current sched domains */ +static cpumask_t *doms_cur; /* current sched domains */ static int ndoms_cur; /* number of sched domains in 'doms_cur' */ static struct sched_domain_attr *dattr_cur; /* attribues of custom domains in 'doms_cur' */ /* * Special case: If a kmalloc of a doms_cur partition (array of - * cpumask) fails, then fallback to a single sched domain, - * as determined by the single cpumask fallback_doms. + * cpumask_t) fails, then fallback to a single sched domain, + * as determined by the single cpumask_t fallback_doms. */ -static cpumask_var_t fallback_doms; +static cpumask_t fallback_doms; -/* - * arch_update_cpu_topology lets virtualized architectures update the - * cpu core maps. It is supposed to return 1 if the topology changed - * or 0 if it stayed the same. - */ -int __attribute__((weak)) arch_update_cpu_topology(void) +void __attribute__((weak)) arch_update_cpu_topology(void) { - return 0; } /* @@ -7810,16 +7721,16 @@ int __attribute__((weak)) arch_update_cpu_topology(void) * For now this just excludes isolated cpus, but could be used to * exclude other special cases in the future. */ -static int arch_init_sched_domains(const struct cpumask *cpu_map) +static int arch_init_sched_domains(const cpumask_t *cpu_map) { int err; arch_update_cpu_topology(); ndoms_cur = 1; - doms_cur = kmalloc(cpumask_size(), GFP_KERNEL); + doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL); if (!doms_cur) - doms_cur = fallback_doms; - cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map); + doms_cur = &fallback_doms; + cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map); dattr_cur = NULL; err = build_sched_domains(doms_cur); register_sched_domain_sysctl(); @@ -7827,8 +7738,8 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map) return err; } -static void arch_destroy_sched_domains(const struct cpumask *cpu_map, - struct cpumask *tmpmask) +static void arch_destroy_sched_domains(const cpumask_t *cpu_map, + cpumask_t *tmpmask) { free_sched_groups(cpu_map, tmpmask); } @@ -7837,16 +7748,17 @@ static void arch_destroy_sched_domains(const struct cpumask *cpu_map, * Detach sched domains from a group of cpus specified in cpu_map * These cpus will now be attached to the NULL domain */ -static void detach_destroy_domains(const struct cpumask *cpu_map) +static void detach_destroy_domains(const cpumask_t *cpu_map) { - /* Save because hotplug lock held. */ - static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS); + cpumask_t tmpmask; int i; - for_each_cpu(i, cpu_map) + unregister_sched_domain_sysctl(); + + for_each_cpu_mask_nr(i, *cpu_map) cpu_attach_domain(NULL, &def_root_domain, i); synchronize_sched(); - arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask)); + arch_destroy_sched_domains(cpu_map, &tmpmask); } /* handle null as "default" */ @@ -7871,7 +7783,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * doms_new[] to the current sched domain partitioning, doms_cur[]. * It destroys each deleted domain and builds each new domain. * - * 'doms_new' is an array of cpumask's of length 'ndoms_new'. + * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'. * The masks don't intersect (don't overlap.) We should setup one * sched domain for each mask. CPUs not in any of the cpumasks will * not be load balanced. If the same cpumask appears both in the @@ -7885,33 +7797,28 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * the single partition 'fallback_doms', it also forces the domains * to be rebuilt. * - * If doms_new == NULL it will be replaced with cpu_online_mask. + * If doms_new == NULL it will be replaced with cpu_online_map. * ndoms_new == 0 is a special case for destroying existing domains, * and it will not create the default domain. * * Call with hotplug lock held */ -/* FIXME: Change to struct cpumask *doms_new[] */ -void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, struct sched_domain_attr *dattr_new) { int i, j, n; - int new_topology; mutex_lock(&sched_domains_mutex); /* always unregister in case we don't destroy any domains */ unregister_sched_domain_sysctl(); - /* Let architecture update cpu core mappings. */ - new_topology = arch_update_cpu_topology(); - n = doms_new ? ndoms_new : 0; /* Destroy deleted domains */ for (i = 0; i < ndoms_cur; i++) { - for (j = 0; j < n && !new_topology; j++) { - if (cpumask_equal(&doms_cur[i], &doms_new[j]) + for (j = 0; j < n; j++) { + if (cpus_equal(doms_cur[i], doms_new[j]) && dattrs_equal(dattr_cur, i, dattr_new, j)) goto match1; } @@ -7923,15 +7830,15 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, if (doms_new == NULL) { ndoms_cur = 0; - doms_new = fallback_doms; - cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map); - WARN_ON_ONCE(dattr_new); + doms_new = &fallback_doms; + cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); + dattr_new = NULL; } /* Build new domains */ for (i = 0; i < ndoms_new; i++) { - for (j = 0; j < ndoms_cur && !new_topology; j++) { - if (cpumask_equal(&doms_new[i], &doms_cur[j]) + for (j = 0; j < ndoms_cur; j++) { + if (cpus_equal(doms_new[i], doms_cur[j]) && dattrs_equal(dattr_new, i, dattr_cur, j)) goto match2; } @@ -7943,7 +7850,7 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, } /* Remember the new sched domains */ - if (doms_cur != fallback_doms) + if (doms_cur != &fallback_doms) kfree(doms_cur); kfree(dattr_cur); /* kfree(NULL) is safe */ doms_cur = doms_new; @@ -7972,25 +7879,14 @@ int arch_reinit_sched_domains(void) static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) { int ret; - unsigned int level = 0; - - if (sscanf(buf, "%u", &level) != 1) - return -EINVAL; - /* - * level is always be positive so don't check for - * level < POWERSAVINGS_BALANCE_NONE which is 0 - * What happens on 0 or 1 byte write, - * need to check for count as well? - */ - - if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS) + if (buf[0] != '0' && buf[0] != '1') return -EINVAL; if (smt) - sched_smt_power_savings = level; + sched_smt_power_savings = (buf[0] == '1'); else - sched_mc_power_savings = level; + sched_mc_power_savings = (buf[0] == '1'); ret = arch_reinit_sched_domains(); @@ -8094,9 +7990,7 @@ static int update_runtime(struct notifier_block *nfb, void __init sched_init_smp(void) { - cpumask_var_t non_isolated_cpus; - - alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); + cpumask_t non_isolated_cpus; #if defined(CONFIG_NUMA) sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **), @@ -8105,10 +7999,10 @@ void __init sched_init_smp(void) #endif get_online_cpus(); mutex_lock(&sched_domains_mutex); - arch_init_sched_domains(cpu_online_mask); - cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); - if (cpumask_empty(non_isolated_cpus)) - cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); + arch_init_sched_domains(&cpu_online_map); + cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map); + if (cpus_empty(non_isolated_cpus)) + cpu_set(smp_processor_id(), non_isolated_cpus); mutex_unlock(&sched_domains_mutex); put_online_cpus(); @@ -8123,13 +8017,9 @@ void __init sched_init_smp(void) init_hrtick(); /* Move init over to a non-isolated CPU */ - if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0) + if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0) BUG(); sched_init_granularity(); - free_cpumask_var(non_isolated_cpus); - - alloc_cpumask_var(&fallback_doms, GFP_KERNEL); - init_sched_rt_class(); } #else void __init sched_init_smp(void) @@ -8444,15 +8334,6 @@ void __init sched_init(void) */ current->sched_class = &fair_sched_class; - /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ - alloc_bootmem_cpumask_var(&nohz_cpu_mask); -#ifdef CONFIG_SMP -#ifdef CONFIG_NO_HZ - alloc_bootmem_cpumask_var(&nohz.cpu_mask); -#endif - alloc_bootmem_cpumask_var(&cpu_isolated_map); -#endif /* SMP */ - scheduler_running = 1; } @@ -8611,7 +8492,7 @@ static int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) { struct cfs_rq *cfs_rq; - struct sched_entity *se; + struct sched_entity *se, *parent_se; struct rq *rq; int i; @@ -8627,17 +8508,18 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) for_each_possible_cpu(i) { rq = cpu_rq(i); - cfs_rq = kzalloc_node(sizeof(struct cfs_rq), - GFP_KERNEL, cpu_to_node(i)); + cfs_rq = kmalloc_node(sizeof(struct cfs_rq), + GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); if (!cfs_rq) goto err; - se = kzalloc_node(sizeof(struct sched_entity), - GFP_KERNEL, cpu_to_node(i)); + se = kmalloc_node(sizeof(struct sched_entity), + GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); if (!se) goto err; - init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); + parent_se = parent ? parent->se[i] : NULL; + init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se); } return 1; @@ -8698,7 +8580,7 @@ static int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) { struct rt_rq *rt_rq; - struct sched_rt_entity *rt_se; + struct sched_rt_entity *rt_se, *parent_se; struct rq *rq; int i; @@ -8715,17 +8597,18 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) for_each_possible_cpu(i) { rq = cpu_rq(i); - rt_rq = kzalloc_node(sizeof(struct rt_rq), - GFP_KERNEL, cpu_to_node(i)); + rt_rq = kmalloc_node(sizeof(struct rt_rq), + GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); if (!rt_rq) goto err; - rt_se = kzalloc_node(sizeof(struct sched_rt_entity), - GFP_KERNEL, cpu_to_node(i)); + rt_se = kmalloc_node(sizeof(struct sched_rt_entity), + GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); if (!rt_se) goto err; - init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); + parent_se = parent ? parent->rt_se[i] : NULL; + init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se); } return 1; @@ -9368,12 +9251,11 @@ struct cgroup_subsys cpu_cgroup_subsys = { * (balbir@in.ibm.com). */ -/* track cpu usage of a group of tasks and its child groups */ +/* track cpu usage of a group of tasks */ struct cpuacct { struct cgroup_subsys_state css; /* cpuusage holds pointer to a u64-type object on every cpu */ u64 *cpuusage; - struct cpuacct *parent; }; struct cgroup_subsys cpuacct_subsys; @@ -9407,9 +9289,6 @@ static struct cgroup_subsys_state *cpuacct_create( return ERR_PTR(-ENOMEM); } - if (cgrp->parent) - ca->parent = cgroup_ca(cgrp->parent); - return &ca->css; } @@ -9489,16 +9368,14 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) static void cpuacct_charge(struct task_struct *tsk, u64 cputime) { struct cpuacct *ca; - int cpu; if (!cpuacct_subsys.active) return; - cpu = task_cpu(tsk); ca = task_ca(tsk); + if (ca) { + u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk)); - for (; ca; ca = ca->parent) { - u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); *cpuusage += cputime; } } diff --git a/trunk/kernel/sched_clock.c b/trunk/kernel/sched_clock.c index e8ab096ddfe3..81787248b60f 100644 --- a/trunk/kernel/sched_clock.c +++ b/trunk/kernel/sched_clock.c @@ -118,13 +118,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) /* * scd->clock = clamp(scd->tick_gtod + delta, - * max(scd->tick_gtod, scd->clock), - * scd->tick_gtod + TICK_NSEC); + * max(scd->tick_gtod, scd->clock), + * max(scd->clock, scd->tick_gtod + TICK_NSEC)); */ clock = scd->tick_gtod + delta; min_clock = wrap_max(scd->tick_gtod, scd->clock); - max_clock = scd->tick_gtod + TICK_NSEC; + max_clock = wrap_max(scd->clock, scd->tick_gtod + TICK_NSEC); clock = wrap_max(clock, min_clock); clock = wrap_min(clock, max_clock); diff --git a/trunk/kernel/sched_cpupri.c b/trunk/kernel/sched_cpupri.c index 018b7be1db2e..52154fefab7e 100644 --- a/trunk/kernel/sched_cpupri.c +++ b/trunk/kernel/sched_cpupri.c @@ -67,21 +67,24 @@ static int convert_prio(int prio) * Returns: (int)bool - CPUs were found */ int cpupri_find(struct cpupri *cp, struct task_struct *p, - struct cpumask *lowest_mask) + cpumask_t *lowest_mask) { int idx = 0; int task_pri = convert_prio(p->prio); for_each_cpupri_active(cp->pri_active, idx) { struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; + cpumask_t mask; if (idx >= task_pri) break; - if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) + cpus_and(mask, p->cpus_allowed, vec->mask); + + if (cpus_empty(mask)) continue; - cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); + *lowest_mask = mask; return 1; } @@ -123,7 +126,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) vec->count--; if (!vec->count) clear_bit(oldpri, cp->pri_active); - cpumask_clear_cpu(cpu, vec->mask); + cpu_clear(cpu, vec->mask); spin_unlock_irqrestore(&vec->lock, flags); } @@ -133,7 +136,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) spin_lock_irqsave(&vec->lock, flags); - cpumask_set_cpu(cpu, vec->mask); + cpu_set(cpu, vec->mask); vec->count++; if (vec->count == 1) set_bit(newpri, cp->pri_active); @@ -147,11 +150,10 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) /** * cpupri_init - initialize the cpupri structure * @cp: The cpupri context - * @bootmem: true if allocations need to use bootmem * - * Returns: -ENOMEM if memory fails. + * Returns: (void) */ -int cpupri_init(struct cpupri *cp, bool bootmem) +void cpupri_init(struct cpupri *cp) { int i; @@ -162,30 +164,11 @@ int cpupri_init(struct cpupri *cp, bool bootmem) spin_lock_init(&vec->lock); vec->count = 0; - if (bootmem) - alloc_bootmem_cpumask_var(&vec->mask); - else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL)) - goto cleanup; + cpus_clear(vec->mask); } for_each_possible_cpu(i) cp->cpu_to_pri[i] = CPUPRI_INVALID; - return 0; - -cleanup: - for (i--; i >= 0; i--) - free_cpumask_var(cp->pri_to_cpu[i].mask); - return -ENOMEM; } -/** - * cpupri_cleanup - clean up the cpupri structure - * @cp: The cpupri context - */ -void cpupri_cleanup(struct cpupri *cp) -{ - int i; - for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) - free_cpumask_var(cp->pri_to_cpu[i].mask); -} diff --git a/trunk/kernel/sched_cpupri.h b/trunk/kernel/sched_cpupri.h index 642a94ef8a0a..f25811b0f931 100644 --- a/trunk/kernel/sched_cpupri.h +++ b/trunk/kernel/sched_cpupri.h @@ -14,7 +14,7 @@ struct cpupri_vec { spinlock_t lock; int count; - cpumask_var_t mask; + cpumask_t mask; }; struct cpupri { @@ -27,8 +27,7 @@ struct cpupri { int cpupri_find(struct cpupri *cp, struct task_struct *p, cpumask_t *lowest_mask); void cpupri_set(struct cpupri *cp, int cpu, int pri); -int cpupri_init(struct cpupri *cp, bool bootmem); -void cpupri_cleanup(struct cpupri *cp); +void cpupri_init(struct cpupri *cp); #else #define cpupri_set(cp, cpu, pri) do { } while (0) #define cpupri_init() do { } while (0) diff --git a/trunk/kernel/sched_debug.c b/trunk/kernel/sched_debug.c index 4293cfa9681d..26ed8e3d1c15 100644 --- a/trunk/kernel/sched_debug.c +++ b/trunk/kernel/sched_debug.c @@ -53,40 +53,6 @@ static unsigned long nsec_low(unsigned long long nsec) #define SPLIT_NS(x) nsec_high(x), nsec_low(x) -#ifdef CONFIG_FAIR_GROUP_SCHED -static void print_cfs_group_stats(struct seq_file *m, int cpu, - struct task_group *tg) -{ - struct sched_entity *se = tg->se[cpu]; - if (!se) - return; - -#define P(F) \ - SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F) -#define PN(F) \ - SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F)) - - PN(se->exec_start); - PN(se->vruntime); - PN(se->sum_exec_runtime); -#ifdef CONFIG_SCHEDSTATS - PN(se->wait_start); - PN(se->sleep_start); - PN(se->block_start); - PN(se->sleep_max); - PN(se->block_max); - PN(se->exec_max); - PN(se->slice_max); - PN(se->wait_max); - PN(se->wait_sum); - P(se->wait_count); -#endif - P(se->load.weight); -#undef PN -#undef P -} -#endif - static void print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) { @@ -155,19 +121,20 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) char path[128] = ""; + struct cgroup *cgroup = NULL; struct task_group *tg = cfs_rq->tg; - cgroup_path(tg->css.cgroup, path, sizeof(path)); + if (tg) + cgroup = tg->css.cgroup; + + if (cgroup) + cgroup_path(cgroup, path, sizeof(path)); SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); -#elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) - { - uid_t uid = cfs_rq->tg->uid; - SEQ_printf(m, "\ncfs_rq[%d] for UID: %u\n", cpu, uid); - } #else SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); #endif + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", SPLIT_NS(cfs_rq->exec_clock)); @@ -201,7 +168,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #ifdef CONFIG_SMP SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares); #endif - print_cfs_group_stats(m, cpu, cfs_rq->tg); #endif } @@ -209,9 +175,14 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) { #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED) char path[128] = ""; + struct cgroup *cgroup = NULL; struct task_group *tg = rt_rq->tg; - cgroup_path(tg->css.cgroup, path, sizeof(path)); + if (tg) + cgroup = tg->css.cgroup; + + if (cgroup) + cgroup_path(cgroup, path, sizeof(path)); SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path); #else @@ -301,7 +272,7 @@ static int sched_debug_show(struct seq_file *m, void *v) u64 now = ktime_to_ns(ktime_get()); int cpu; - SEQ_printf(m, "Sched Debug Version: v0.08, %s %.*s\n", + SEQ_printf(m, "Sched Debug Version: v0.07, %s %.*s\n", init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); diff --git a/trunk/kernel/sched_fair.c b/trunk/kernel/sched_fair.c index 36b5e34fa99e..98345e45b059 100644 --- a/trunk/kernel/sched_fair.c +++ b/trunk/kernel/sched_fair.c @@ -1017,33 +1017,16 @@ static void yield_task_fair(struct rq *rq) * search starts with cpus closest then further out as needed, * so we always favor a closer, idle cpu. * Domains may include CPUs that are not usable for migration, - * hence we need to mask them out (cpu_active_mask) + * hence we need to mask them out (cpu_active_map) * * Returns the CPU we should wake onto. */ #if defined(ARCH_HAS_SCHED_WAKE_IDLE) static int wake_idle(int cpu, struct task_struct *p) { + cpumask_t tmp; struct sched_domain *sd; int i; - unsigned int chosen_wakeup_cpu; - int this_cpu; - - /* - * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu - * are idle and this is not a kernel thread and this task's affinity - * allows it to be moved to preferred cpu, then just move! - */ - - this_cpu = smp_processor_id(); - chosen_wakeup_cpu = - cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu; - - if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP && - idle_cpu(cpu) && idle_cpu(this_cpu) && - p->mm && !(p->flags & PF_KTHREAD) && - cpu_isset(chosen_wakeup_cpu, p->cpus_allowed)) - return chosen_wakeup_cpu; /* * If it is idle, then it is the best cpu to run this task. @@ -1061,9 +1044,10 @@ static int wake_idle(int cpu, struct task_struct *p) if ((sd->flags & SD_WAKE_IDLE) || ((sd->flags & SD_WAKE_IDLE_FAR) && !task_hot(p, task_rq(p)->clock, sd))) { - for_each_cpu_and(i, sched_domain_span(sd), - &p->cpus_allowed) { - if (cpu_active(i) && idle_cpu(i)) { + cpus_and(tmp, sd->span, p->cpus_allowed); + cpus_and(tmp, tmp, cpu_active_map); + for_each_cpu_mask_nr(i, tmp) { + if (idle_cpu(i)) { if (i != task_cpu(p)) { schedstat_inc(p, se.nr_wakeups_idle); @@ -1256,13 +1240,13 @@ static int select_task_rq_fair(struct task_struct *p, int sync) * this_cpu and prev_cpu are present in: */ for_each_domain(this_cpu, sd) { - if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) { + if (cpu_isset(prev_cpu, sd->span)) { this_sd = sd; break; } } - if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed))) + if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) goto out; /* diff --git a/trunk/kernel/sched_rt.c b/trunk/kernel/sched_rt.c index 1bbd99014011..d9ba9d5f99d6 100644 --- a/trunk/kernel/sched_rt.c +++ b/trunk/kernel/sched_rt.c @@ -15,7 +15,7 @@ static inline void rt_set_overload(struct rq *rq) if (!rq->online) return; - cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); + cpu_set(rq->cpu, rq->rd->rto_mask); /* * Make sure the mask is visible before we set * the overload count. That is checked to determine @@ -34,7 +34,7 @@ static inline void rt_clear_overload(struct rq *rq) /* the order here really doesn't matter */ atomic_dec(&rq->rd->rto_count); - cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); + cpu_clear(rq->cpu, rq->rd->rto_mask); } static void update_rt_migration(struct rq *rq) @@ -139,14 +139,14 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se) } #ifdef CONFIG_SMP -static inline const struct cpumask *sched_rt_period_mask(void) +static inline cpumask_t sched_rt_period_mask(void) { return cpu_rq(smp_processor_id())->rd->span; } #else -static inline const struct cpumask *sched_rt_period_mask(void) +static inline cpumask_t sched_rt_period_mask(void) { - return cpu_online_mask; + return cpu_online_map; } #endif @@ -212,9 +212,9 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq) return rt_rq->rt_throttled; } -static inline const struct cpumask *sched_rt_period_mask(void) +static inline cpumask_t sched_rt_period_mask(void) { - return cpu_online_mask; + return cpu_online_map; } static inline @@ -241,11 +241,11 @@ static int do_balance_runtime(struct rt_rq *rt_rq) int i, weight, more = 0; u64 rt_period; - weight = cpumask_weight(rd->span); + weight = cpus_weight(rd->span); spin_lock(&rt_b->rt_runtime_lock); rt_period = ktime_to_ns(rt_b->rt_period); - for_each_cpu(i, rd->span) { + for_each_cpu_mask_nr(i, rd->span) { struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); s64 diff; @@ -324,7 +324,7 @@ static void __disable_runtime(struct rq *rq) /* * Greedy reclaim, take back as much as we can. */ - for_each_cpu(i, rd->span) { + for_each_cpu_mask(i, rd->span) { struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); s64 diff; @@ -429,13 +429,13 @@ static inline int balance_runtime(struct rt_rq *rt_rq) static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) { int i, idle = 1; - const struct cpumask *span; + cpumask_t span; if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) return 1; span = sched_rt_period_mask(); - for_each_cpu(i, span) { + for_each_cpu_mask(i, span) { int enqueue = 0; struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); struct rq *rq = rq_of_rt_rq(rt_rq); @@ -537,13 +537,13 @@ static void update_curr_rt(struct rq *rq) for_each_sched_rt_entity(rt_se) { rt_rq = rt_rq_of_se(rt_se); + spin_lock(&rt_rq->rt_runtime_lock); if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { - spin_lock(&rt_rq->rt_runtime_lock); rt_rq->rt_time += delta_exec; if (sched_rt_runtime_exceeded(rt_rq)) resched_task(curr); - spin_unlock(&rt_rq->rt_runtime_lock); } + spin_unlock(&rt_rq->rt_runtime_lock); } } @@ -805,20 +805,17 @@ static int select_task_rq_rt(struct task_struct *p, int sync) static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) { - cpumask_var_t mask; + cpumask_t mask; if (rq->curr->rt.nr_cpus_allowed == 1) return; - if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) - return; - if (p->rt.nr_cpus_allowed != 1 - && cpupri_find(&rq->rd->cpupri, p, mask)) - goto free; + && cpupri_find(&rq->rd->cpupri, p, &mask)) + return; - if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask)) - goto free; + if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) + return; /* * There appears to be other cpus that can accept @@ -827,8 +824,6 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) */ requeue_task_rt(rq, p, 1); resched_task(rq->curr); -free: - free_cpumask_var(mask); } #endif /* CONFIG_SMP */ @@ -914,12 +909,15 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) /* Only try algorithms three times */ #define RT_MAX_TRIES 3 +static int double_lock_balance(struct rq *this_rq, struct rq *busiest); +static void double_unlock_balance(struct rq *this_rq, struct rq *busiest); + static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && + (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) && (p->rt.nr_cpus_allowed > 1)) return 1; return 0; @@ -958,7 +956,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) return next; } -static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); +static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) { @@ -978,7 +976,7 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) static int find_lowest_rq(struct task_struct *task) { struct sched_domain *sd; - struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); + cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); @@ -993,7 +991,7 @@ static int find_lowest_rq(struct task_struct *task) * I guess we might want to change cpupri_find() to ignore those * in the first place. */ - cpumask_and(lowest_mask, lowest_mask, cpu_active_mask); + cpus_and(*lowest_mask, *lowest_mask, cpu_active_map); /* * At this point we have built a mask of cpus representing the @@ -1003,7 +1001,7 @@ static int find_lowest_rq(struct task_struct *task) * We prioritize the last cpu that the task executed on since * it is most likely cache-hot in that location. */ - if (cpumask_test_cpu(cpu, lowest_mask)) + if (cpu_isset(cpu, *lowest_mask)) return cpu; /* @@ -1018,8 +1016,7 @@ static int find_lowest_rq(struct task_struct *task) cpumask_t domain_mask; int best_cpu; - cpumask_and(&domain_mask, sched_domain_span(sd), - lowest_mask); + cpus_and(domain_mask, sd->span, *lowest_mask); best_cpu = pick_optimal_cpu(this_cpu, &domain_mask); @@ -1060,8 +1057,8 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) * Also make sure that it wasn't scheduled on its rq. */ if (unlikely(task_rq(task) != rq || - !cpumask_test_cpu(lowest_rq->cpu, - &task->cpus_allowed) || + !cpu_isset(lowest_rq->cpu, + task->cpus_allowed) || task_running(rq, task) || !task->se.on_rq)) { @@ -1182,7 +1179,7 @@ static int pull_rt_task(struct rq *this_rq) next = pick_next_task_rt(this_rq); - for_each_cpu(cpu, this_rq->rd->rto_mask) { + for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) { if (this_cpu == cpu) continue; @@ -1311,9 +1308,9 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, } static void set_cpus_allowed_rt(struct task_struct *p, - const struct cpumask *new_mask) + const cpumask_t *new_mask) { - int weight = cpumask_weight(new_mask); + int weight = cpus_weight(*new_mask); BUG_ON(!rt_task(p)); @@ -1334,7 +1331,7 @@ static void set_cpus_allowed_rt(struct task_struct *p, update_rt_migration(rq); } - cpumask_copy(&p->cpus_allowed, new_mask); + p->cpus_allowed = *new_mask; p->rt.nr_cpus_allowed = weight; } @@ -1377,14 +1374,6 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p, if (!rq->rt.rt_nr_running) pull_rt_task(rq); } - -static inline void init_sched_rt_class(void) -{ - unsigned int i; - - for_each_possible_cpu(i) - alloc_cpumask_var(&per_cpu(local_cpu_mask, i), GFP_KERNEL); -} #endif /* CONFIG_SMP */ /* @@ -1555,4 +1544,3 @@ static void print_rt_stats(struct seq_file *m, int cpu) rcu_read_unlock(); } #endif /* CONFIG_SCHED_DEBUG */ - diff --git a/trunk/kernel/sched_stats.h b/trunk/kernel/sched_stats.h index 5fcf0e184586..6beff1e4eeae 100644 --- a/trunk/kernel/sched_stats.h +++ b/trunk/kernel/sched_stats.h @@ -42,8 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v) for_each_domain(cpu, sd) { enum cpu_idle_type itype; - cpumask_scnprintf(mask_str, mask_len, - sched_domain_span(sd)); + cpumask_scnprintf(mask_str, mask_len, &sd->span); seq_printf(seq, "domain%d %s", dcount++, mask_str); for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; itype++) { diff --git a/trunk/kernel/signal.c b/trunk/kernel/signal.c index e9afe63da24b..4530fc654455 100644 --- a/trunk/kernel/signal.c +++ b/trunk/kernel/signal.c @@ -41,8 +41,6 @@ static struct kmem_cache *sigqueue_cachep; -DEFINE_TRACE(sched_signal_send); - static void __user *sig_handler(struct task_struct *t, int sig) { return t->sighand->action[sig - 1].sa.sa_handler; diff --git a/trunk/kernel/softlockup.c b/trunk/kernel/softlockup.c index 1ab790c67b17..dc0b3be6b7d5 100644 --- a/trunk/kernel/softlockup.c +++ b/trunk/kernel/softlockup.c @@ -164,7 +164,7 @@ unsigned long __read_mostly sysctl_hung_task_check_count = 1024; /* * Zero means infinite timeout - no checking done: */ -unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480; +unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; unsigned long __read_mostly sysctl_hung_task_warnings = 10; diff --git a/trunk/kernel/sys.c b/trunk/kernel/sys.c index 5fc3a0cfb994..31deba8f7d16 100644 --- a/trunk/kernel/sys.c +++ b/trunk/kernel/sys.c @@ -858,8 +858,8 @@ void do_sys_times(struct tms *tms) struct task_cputime cputime; cputime_t cutime, cstime; - thread_group_cputime(current, &cputime); spin_lock_irq(¤t->sighand->siglock); + thread_group_cputime(current, &cputime); cutime = current->signal->cutime; cstime = current->signal->cstime; spin_unlock_irq(¤t->sighand->siglock); diff --git a/trunk/kernel/sysctl.c b/trunk/kernel/sysctl.c index c83f566e940a..3d56fe7570da 100644 --- a/trunk/kernel/sysctl.c +++ b/trunk/kernel/sysctl.c @@ -487,16 +487,6 @@ static struct ctl_table kern_table[] = { .proc_handler = &ftrace_enable_sysctl, }, #endif -#ifdef CONFIG_TRACING - { - .ctl_name = CTL_UNNUMBERED, - .procname = "ftrace_dump_on_oops", - .data = &ftrace_dump_on_oops, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#endif #ifdef CONFIG_MODULES { .ctl_name = KERN_MODPROBE, diff --git a/trunk/kernel/time/tick-sched.c b/trunk/kernel/time/tick-sched.c index 70f872c71f4e..342fc9ccab46 100644 --- a/trunk/kernel/time/tick-sched.c +++ b/trunk/kernel/time/tick-sched.c @@ -144,7 +144,7 @@ void tick_nohz_update_jiffies(void) if (!ts->tick_stopped) return; - cpumask_clear_cpu(cpu, nohz_cpu_mask); + cpu_clear(cpu, nohz_cpu_mask); now = ktime_get(); ts->idle_waketime = now; @@ -283,7 +283,7 @@ void tick_nohz_stop_sched_tick(int inidle) if ((long)delta_jiffies >= 1) { if (delta_jiffies > 1) - cpumask_set_cpu(cpu, nohz_cpu_mask); + cpu_set(cpu, nohz_cpu_mask); /* * nohz_stop_sched_tick can be called several times before * the nohz_restart_sched_tick is called. This happens when @@ -296,7 +296,7 @@ void tick_nohz_stop_sched_tick(int inidle) /* * sched tick not stopped! */ - cpumask_clear_cpu(cpu, nohz_cpu_mask); + cpu_clear(cpu, nohz_cpu_mask); goto out; } @@ -354,7 +354,7 @@ void tick_nohz_stop_sched_tick(int inidle) * softirq. */ tick_do_update_jiffies64(ktime_get()); - cpumask_clear_cpu(cpu, nohz_cpu_mask); + cpu_clear(cpu, nohz_cpu_mask); } raise_softirq_irqoff(TIMER_SOFTIRQ); out: @@ -432,7 +432,7 @@ void tick_nohz_restart_sched_tick(void) select_nohz_load_balancer(0); now = ktime_get(); tick_do_update_jiffies64(now); - cpumask_clear_cpu(cpu, nohz_cpu_mask); + cpu_clear(cpu, nohz_cpu_mask); /* * We stopped the tick in idle. Update process times would miss the diff --git a/trunk/kernel/trace/Kconfig b/trunk/kernel/trace/Kconfig index bde6f03512d5..33dbefd471e8 100644 --- a/trunk/kernel/trace/Kconfig +++ b/trunk/kernel/trace/Kconfig @@ -3,34 +3,18 @@ # select HAVE_FUNCTION_TRACER: # -config USER_STACKTRACE_SUPPORT - bool - config NOP_TRACER bool config HAVE_FUNCTION_TRACER bool -config HAVE_FUNCTION_GRAPH_TRACER - bool - -config HAVE_FUNCTION_TRACE_MCOUNT_TEST - bool - help - This gets selected when the arch tests the function_trace_stop - variable at the mcount call site. Otherwise, this variable - is tested by the called function. - config HAVE_DYNAMIC_FTRACE bool config HAVE_FTRACE_MCOUNT_RECORD bool -config HAVE_HW_BRANCH_TRACER - bool - config TRACER_MAX_TRACE bool @@ -63,20 +47,6 @@ config FUNCTION_TRACER (the bootup default), then the overhead of the instructions is very small and not measurable even in micro-benchmarks. -config FUNCTION_GRAPH_TRACER - bool "Kernel Function Graph Tracer" - depends on HAVE_FUNCTION_GRAPH_TRACER - depends on FUNCTION_TRACER - default y - help - Enable the kernel to trace a function at both its return - and its entry. - It's first purpose is to trace the duration of functions and - draw a call graph for each thread with some informations like - the return value. - This is done by setting the current return address on the current - task structure into a stack of calls. - config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" default n @@ -168,70 +138,6 @@ config BOOT_TRACER selected, because the self-tests are an initcall as well and that would invalidate the boot trace. ) -config TRACE_BRANCH_PROFILING - bool "Trace likely/unlikely profiler" - depends on DEBUG_KERNEL - select TRACING - help - This tracer profiles all the the likely and unlikely macros - in the kernel. It will display the results in: - - /debugfs/tracing/profile_annotated_branch - - Note: this will add a significant overhead, only turn this - on if you need to profile the system's use of these macros. - - Say N if unsure. - -config PROFILE_ALL_BRANCHES - bool "Profile all if conditionals" - depends on TRACE_BRANCH_PROFILING - help - This tracer profiles all branch conditions. Every if () - taken in the kernel is recorded whether it hit or miss. - The results will be displayed in: - - /debugfs/tracing/profile_branch - - This configuration, when enabled, will impose a great overhead - on the system. This should only be enabled when the system - is to be analyzed - - Say N if unsure. - -config TRACING_BRANCHES - bool - help - Selected by tracers that will trace the likely and unlikely - conditions. This prevents the tracers themselves from being - profiled. Profiling the tracing infrastructure can only happen - when the likelys and unlikelys are not being traced. - -config BRANCH_TRACER - bool "Trace likely/unlikely instances" - depends on TRACE_BRANCH_PROFILING - select TRACING_BRANCHES - help - This traces the events of likely and unlikely condition - calls in the kernel. The difference between this and the - "Trace likely/unlikely profiler" is that this is not a - histogram of the callers, but actually places the calling - events into a running trace buffer to see when and where the - events happened, as well as their results. - - Say N if unsure. - -config POWER_TRACER - bool "Trace power consumption behavior" - depends on DEBUG_KERNEL - depends on X86 - select TRACING - help - This tracer helps developers to analyze and optimize the kernels - power management decisions, specifically the C-state and P-state - behavior. - - config STACK_TRACER bool "Trace max stack" depends on HAVE_FUNCTION_TRACER @@ -251,14 +157,6 @@ config STACK_TRACER Say N if unsure. -config BTS_TRACER - depends on HAVE_HW_BRANCH_TRACER - bool "Trace branches" - select TRACING - help - This tracer records all branches on the system in a circular - buffer giving access to the last N branches for each cpu. - config DYNAMIC_FTRACE bool "enable/disable ftrace tracepoints dynamically" depends on FUNCTION_TRACER diff --git a/trunk/kernel/trace/Makefile b/trunk/kernel/trace/Makefile index 62dc561b6676..c8228b1a49e9 100644 --- a/trunk/kernel/trace/Makefile +++ b/trunk/kernel/trace/Makefile @@ -10,11 +10,6 @@ CFLAGS_trace_selftest_dynamic.o = -pg obj-y += trace_selftest_dynamic.o endif -# If unlikely tracing is enabled, do not trace these files -ifdef CONFIG_TRACING_BRANCHES -KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -endif - obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o obj-$(CONFIG_RING_BUFFER) += ring_buffer.o @@ -29,9 +24,5 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o obj-$(CONFIG_STACK_TRACER) += trace_stack.o obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o obj-$(CONFIG_BOOT_TRACER) += trace_boot.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o -obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o -obj-$(CONFIG_BTS_TRACER) += trace_bts.o -obj-$(CONFIG_POWER_TRACER) += trace_power.o libftrace-y := ftrace.o diff --git a/trunk/kernel/trace/ftrace.c b/trunk/kernel/trace/ftrace.c index a12f80efceaa..78db083390f0 100644 --- a/trunk/kernel/trace/ftrace.c +++ b/trunk/kernel/trace/ftrace.c @@ -47,13 +47,6 @@ int ftrace_enabled __read_mostly; static int last_ftrace_enabled; -/* set when tracing only a pid */ -struct pid *ftrace_pid_trace; -static struct pid * const ftrace_swapper_pid = &init_struct_pid; - -/* Quick disabling of function tracer. */ -int function_trace_stop; - /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -62,7 +55,6 @@ static int ftrace_disabled __read_mostly; static DEFINE_SPINLOCK(ftrace_lock); static DEFINE_MUTEX(ftrace_sysctl_lock); -static DEFINE_MUTEX(ftrace_start_lock); static struct ftrace_ops ftrace_list_end __read_mostly = { @@ -71,8 +63,6 @@ static struct ftrace_ops ftrace_list_end __read_mostly = static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; -ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; -ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) { @@ -89,21 +79,6 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) }; } -static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) -{ - if (!test_tsk_trace_trace(current)) - return; - - ftrace_pid_function(ip, parent_ip); -} - -static void set_ftrace_pid_function(ftrace_func_t func) -{ - /* do not set ftrace_pid_function to itself! */ - if (func != ftrace_pid_func) - ftrace_pid_function = func; -} - /** * clear_ftrace_function - reset the ftrace function * @@ -113,23 +88,7 @@ static void set_ftrace_pid_function(ftrace_func_t func) void clear_ftrace_function(void) { ftrace_trace_function = ftrace_stub; - __ftrace_trace_function = ftrace_stub; - ftrace_pid_function = ftrace_stub; -} - -#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST -/* - * For those archs that do not test ftrace_trace_stop in their - * mcount call site, we need to do it from C. - */ -static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip) -{ - if (function_trace_stop) - return; - - __ftrace_trace_function(ip, parent_ip); } -#endif static int __register_ftrace_function(struct ftrace_ops *ops) { @@ -147,28 +106,14 @@ static int __register_ftrace_function(struct ftrace_ops *ops) ftrace_list = ops; if (ftrace_enabled) { - ftrace_func_t func; - - if (ops->next == &ftrace_list_end) - func = ops->func; - else - func = ftrace_list_func; - - if (ftrace_pid_trace) { - set_ftrace_pid_function(func); - func = ftrace_pid_func; - } - /* * For one func, simply call it directly. * For more than one func, call the chain. */ -#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - ftrace_trace_function = func; -#else - __ftrace_trace_function = func; - ftrace_trace_function = ftrace_test_stop_func; -#endif + if (ops->next == &ftrace_list_end) + ftrace_trace_function = ops->func; + else + ftrace_trace_function = ftrace_list_func; } spin_unlock(&ftrace_lock); @@ -207,19 +152,9 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_enabled) { /* If we only have one func left, then call that directly */ - if (ftrace_list->next == &ftrace_list_end) { - ftrace_func_t func = ftrace_list->func; - - if (ftrace_pid_trace) { - set_ftrace_pid_function(func); - func = ftrace_pid_func; - } -#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - ftrace_trace_function = func; -#else - __ftrace_trace_function = func; -#endif - } + if (ftrace_list == &ftrace_list_end || + ftrace_list->next == &ftrace_list_end) + ftrace_trace_function = ftrace_list->func; } out: @@ -228,36 +163,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) return ret; } -static void ftrace_update_pid_func(void) -{ - ftrace_func_t func; - - /* should not be called from interrupt context */ - spin_lock(&ftrace_lock); - - if (ftrace_trace_function == ftrace_stub) - goto out; - - func = ftrace_trace_function; - - if (ftrace_pid_trace) { - set_ftrace_pid_function(func); - func = ftrace_pid_func; - } else { - if (func == ftrace_pid_func) - func = ftrace_pid_function; - } - -#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - ftrace_trace_function = func; -#else - __ftrace_trace_function = func; -#endif - - out: - spin_unlock(&ftrace_lock); -} - #ifdef CONFIG_DYNAMIC_FTRACE #ifndef CONFIG_FTRACE_MCOUNT_RECORD # error Dynamic ftrace depends on MCOUNT_RECORD @@ -277,8 +182,6 @@ enum { FTRACE_UPDATE_TRACE_FUNC = (1 << 2), FTRACE_ENABLE_MCOUNT = (1 << 3), FTRACE_DISABLE_MCOUNT = (1 << 4), - FTRACE_START_FUNC_RET = (1 << 5), - FTRACE_STOP_FUNC_RET = (1 << 6), }; static int ftrace_filtered; @@ -405,7 +308,7 @@ ftrace_record_ip(unsigned long ip) { struct dyn_ftrace *rec; - if (ftrace_disabled) + if (!ftrace_enabled || ftrace_disabled) return NULL; rec = ftrace_alloc_dyn_node(ip); @@ -419,51 +322,14 @@ ftrace_record_ip(unsigned long ip) return rec; } -static void print_ip_ins(const char *fmt, unsigned char *p) -{ - int i; - - printk(KERN_CONT "%s", fmt); - - for (i = 0; i < MCOUNT_INSN_SIZE; i++) - printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); -} - -static void ftrace_bug(int failed, unsigned long ip) -{ - switch (failed) { - case -EFAULT: - FTRACE_WARN_ON_ONCE(1); - pr_info("ftrace faulted on modifying "); - print_ip_sym(ip); - break; - case -EINVAL: - FTRACE_WARN_ON_ONCE(1); - pr_info("ftrace failed to modify "); - print_ip_sym(ip); - print_ip_ins(" actual: ", (unsigned char *)ip); - printk(KERN_CONT "\n"); - break; - case -EPERM: - FTRACE_WARN_ON_ONCE(1); - pr_info("ftrace faulted on writing "); - print_ip_sym(ip); - break; - default: - FTRACE_WARN_ON_ONCE(1); - pr_info("ftrace faulted on unknown error "); - print_ip_sym(ip); - } -} - +#define FTRACE_ADDR ((long)(ftrace_caller)) static int -__ftrace_replace_code(struct dyn_ftrace *rec, int enable) +__ftrace_replace_code(struct dyn_ftrace *rec, + unsigned char *nop, int enable) { unsigned long ip, fl; - unsigned long ftrace_addr; - - ftrace_addr = (unsigned long)ftrace_caller; + unsigned char *call, *old, *new; ip = rec->ip; @@ -522,28 +388,34 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) } } - if (rec->flags & FTRACE_FL_ENABLED) - return ftrace_make_call(rec, ftrace_addr); - else - return ftrace_make_nop(NULL, rec, ftrace_addr); + call = ftrace_call_replace(ip, FTRACE_ADDR); + + if (rec->flags & FTRACE_FL_ENABLED) { + old = nop; + new = call; + } else { + old = call; + new = nop; + } + + return ftrace_modify_code(ip, old, new); } static void ftrace_replace_code(int enable) { int i, failed; + unsigned char *nop = NULL; struct dyn_ftrace *rec; struct ftrace_page *pg; + nop = ftrace_nop_replace(); + for (pg = ftrace_pages_start; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { rec = &pg->records[i]; - /* - * Skip over free records and records that have - * failed. - */ - if (rec->flags & FTRACE_FL_FREE || - rec->flags & FTRACE_FL_FAILED) + /* don't modify code that has already faulted */ + if (rec->flags & FTRACE_FL_FAILED) continue; /* ignore updates to this record's mcount site */ @@ -554,30 +426,68 @@ static void ftrace_replace_code(int enable) unfreeze_record(rec); } - failed = __ftrace_replace_code(rec, enable); + failed = __ftrace_replace_code(rec, nop, enable); if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { rec->flags |= FTRACE_FL_FAILED; if ((system_state == SYSTEM_BOOTING) || !core_kernel_text(rec->ip)) { ftrace_free_rec(rec); - } else - ftrace_bug(failed, rec->ip); + } } } } } +static void print_ip_ins(const char *fmt, unsigned char *p) +{ + int i; + + printk(KERN_CONT "%s", fmt); + + for (i = 0; i < MCOUNT_INSN_SIZE; i++) + printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); +} + static int -ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) +ftrace_code_disable(struct dyn_ftrace *rec) { unsigned long ip; + unsigned char *nop, *call; int ret; ip = rec->ip; - ret = ftrace_make_nop(mod, rec, mcount_addr); + nop = ftrace_nop_replace(); + call = ftrace_call_replace(ip, mcount_addr); + + ret = ftrace_modify_code(ip, call, nop); if (ret) { - ftrace_bug(ret, ip); + switch (ret) { + case -EFAULT: + FTRACE_WARN_ON_ONCE(1); + pr_info("ftrace faulted on modifying "); + print_ip_sym(ip); + break; + case -EINVAL: + FTRACE_WARN_ON_ONCE(1); + pr_info("ftrace failed to modify "); + print_ip_sym(ip); + print_ip_ins(" expected: ", call); + print_ip_ins(" actual: ", (unsigned char *)ip); + print_ip_ins(" replace: ", nop); + printk(KERN_CONT "\n"); + break; + case -EPERM: + FTRACE_WARN_ON_ONCE(1); + pr_info("ftrace faulted on writing "); + print_ip_sym(ip); + break; + default: + FTRACE_WARN_ON_ONCE(1); + pr_info("ftrace faulted on unknown error "); + print_ip_sym(ip); + } + rec->flags |= FTRACE_FL_FAILED; return 0; } @@ -596,11 +506,6 @@ static int __ftrace_modify_code(void *data) if (*command & FTRACE_UPDATE_TRACE_FUNC) ftrace_update_ftrace_func(ftrace_trace_function); - if (*command & FTRACE_START_FUNC_RET) - ftrace_enable_ftrace_graph_caller(); - else if (*command & FTRACE_STOP_FUNC_RET) - ftrace_disable_ftrace_graph_caller(); - return 0; } @@ -610,43 +515,43 @@ static void ftrace_run_update_code(int command) } static ftrace_func_t saved_ftrace_func; -static int ftrace_start_up; +static int ftrace_start; +static DEFINE_MUTEX(ftrace_start_lock); -static void ftrace_startup_enable(int command) +static void ftrace_startup(void) { - if (saved_ftrace_func != ftrace_trace_function) { - saved_ftrace_func = ftrace_trace_function; - command |= FTRACE_UPDATE_TRACE_FUNC; - } - - if (!command || !ftrace_enabled) - return; + int command = 0; - ftrace_run_update_code(command); -} - -static void ftrace_startup(int command) -{ if (unlikely(ftrace_disabled)) return; mutex_lock(&ftrace_start_lock); - ftrace_start_up++; + ftrace_start++; command |= FTRACE_ENABLE_CALLS; - ftrace_startup_enable(command); + if (saved_ftrace_func != ftrace_trace_function) { + saved_ftrace_func = ftrace_trace_function; + command |= FTRACE_UPDATE_TRACE_FUNC; + } + + if (!command || !ftrace_enabled) + goto out; + ftrace_run_update_code(command); + out: mutex_unlock(&ftrace_start_lock); } -static void ftrace_shutdown(int command) +static void ftrace_shutdown(void) { + int command = 0; + if (unlikely(ftrace_disabled)) return; mutex_lock(&ftrace_start_lock); - ftrace_start_up--; - if (!ftrace_start_up) + ftrace_start--; + if (!ftrace_start) command |= FTRACE_DISABLE_CALLS; if (saved_ftrace_func != ftrace_trace_function) { @@ -672,8 +577,8 @@ static void ftrace_startup_sysctl(void) mutex_lock(&ftrace_start_lock); /* Force update next time */ saved_ftrace_func = NULL; - /* ftrace_start_up is true if we want ftrace running */ - if (ftrace_start_up) + /* ftrace_start is true if we want ftrace running */ + if (ftrace_start) command |= FTRACE_ENABLE_CALLS; ftrace_run_update_code(command); @@ -688,8 +593,8 @@ static void ftrace_shutdown_sysctl(void) return; mutex_lock(&ftrace_start_lock); - /* ftrace_start_up is true if ftrace is running */ - if (ftrace_start_up) + /* ftrace_start is true if ftrace is running */ + if (ftrace_start) command |= FTRACE_DISABLE_CALLS; ftrace_run_update_code(command); @@ -700,7 +605,7 @@ static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; -static int ftrace_update_code(struct module *mod) +static int ftrace_update_code(void) { struct dyn_ftrace *p, *t; cycle_t start, stop; @@ -717,7 +622,7 @@ static int ftrace_update_code(struct module *mod) list_del_init(&p->list); /* convert record (i.e, patch mcount-call with NOP) */ - if (ftrace_code_disable(mod, p)) { + if (ftrace_code_disable(p)) { p->flags |= FTRACE_FL_CONVERTED; ftrace_update_cnt++; } else @@ -785,6 +690,7 @@ enum { #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ struct ftrace_iterator { + loff_t pos; struct ftrace_page *pg; unsigned idx; unsigned flags; @@ -809,8 +715,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos) iter->pg = iter->pg->next; iter->idx = 0; goto retry; - } else { - iter->idx = -1; } } else { rec = &iter->pg->records[iter->idx++]; @@ -833,6 +737,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos) } spin_unlock(&ftrace_lock); + iter->pos = *pos; + return rec; } @@ -840,15 +746,13 @@ static void *t_start(struct seq_file *m, loff_t *pos) { struct ftrace_iterator *iter = m->private; void *p = NULL; + loff_t l = -1; - if (*pos > 0) { - if (iter->idx < 0) - return p; - (*pos)--; - iter->idx--; - } + if (*pos > iter->pos) + *pos = iter->pos; - p = t_next(m, p, pos); + l = *pos; + p = t_next(m, p, &l); return p; } @@ -859,15 +763,21 @@ static void t_stop(struct seq_file *m, void *p) static int t_show(struct seq_file *m, void *v) { + struct ftrace_iterator *iter = m->private; struct dyn_ftrace *rec = v; char str[KSYM_SYMBOL_LEN]; + int ret = 0; if (!rec) return 0; kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); - seq_printf(m, "%s\n", str); + ret = seq_printf(m, "%s\n", str); + if (ret < 0) { + iter->pos--; + iter->idx--; + } return 0; } @@ -893,6 +803,7 @@ ftrace_avail_open(struct inode *inode, struct file *file) return -ENOMEM; iter->pg = ftrace_pages_start; + iter->pos = 0; ret = seq_open(file, &show_ftrace_seq_ops); if (!ret) { @@ -979,6 +890,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable) if (file->f_mode & FMODE_READ) { iter->pg = ftrace_pages_start; + iter->pos = 0; iter->flags = enable ? FTRACE_ITER_FILTER : FTRACE_ITER_NOTRACE; @@ -1269,7 +1181,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftrace_start_lock); - if (ftrace_start_up && ftrace_enabled) + if (ftrace_start && ftrace_enabled) ftrace_run_update_code(FTRACE_ENABLE_CALLS); mutex_unlock(&ftrace_start_lock); mutex_unlock(&ftrace_sysctl_lock); @@ -1321,234 +1233,13 @@ static struct file_operations ftrace_notrace_fops = { .release = ftrace_notrace_release, }; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - -static DEFINE_MUTEX(graph_lock); - -int ftrace_graph_count; -unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; - -static void * -g_next(struct seq_file *m, void *v, loff_t *pos) -{ - unsigned long *array = m->private; - int index = *pos; - - (*pos)++; - - if (index >= ftrace_graph_count) - return NULL; - - return &array[index]; -} - -static void *g_start(struct seq_file *m, loff_t *pos) -{ - void *p = NULL; - - mutex_lock(&graph_lock); - - p = g_next(m, p, pos); - - return p; -} - -static void g_stop(struct seq_file *m, void *p) -{ - mutex_unlock(&graph_lock); -} - -static int g_show(struct seq_file *m, void *v) -{ - unsigned long *ptr = v; - char str[KSYM_SYMBOL_LEN]; - - if (!ptr) - return 0; - - kallsyms_lookup(*ptr, NULL, NULL, NULL, str); - - seq_printf(m, "%s\n", str); - - return 0; -} - -static struct seq_operations ftrace_graph_seq_ops = { - .start = g_start, - .next = g_next, - .stop = g_stop, - .show = g_show, -}; - -static int -ftrace_graph_open(struct inode *inode, struct file *file) -{ - int ret = 0; - - if (unlikely(ftrace_disabled)) - return -ENODEV; - - mutex_lock(&graph_lock); - if ((file->f_mode & FMODE_WRITE) && - !(file->f_flags & O_APPEND)) { - ftrace_graph_count = 0; - memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); - } - - if (file->f_mode & FMODE_READ) { - ret = seq_open(file, &ftrace_graph_seq_ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = ftrace_graph_funcs; - } - } else - file->private_data = ftrace_graph_funcs; - mutex_unlock(&graph_lock); - - return ret; -} - -static ssize_t -ftrace_graph_read(struct file *file, char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - if (file->f_mode & FMODE_READ) - return seq_read(file, ubuf, cnt, ppos); - else - return -EPERM; -} - -static int -ftrace_set_func(unsigned long *array, int idx, char *buffer) -{ - char str[KSYM_SYMBOL_LEN]; - struct dyn_ftrace *rec; - struct ftrace_page *pg; - int found = 0; - int i, j; - - if (ftrace_disabled) - return -ENODEV; - - /* should not be called from interrupt context */ - spin_lock(&ftrace_lock); - - for (pg = ftrace_pages_start; pg; pg = pg->next) { - for (i = 0; i < pg->index; i++) { - rec = &pg->records[i]; - - if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) - continue; - - kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); - if (strcmp(str, buffer) == 0) { - found = 1; - for (j = 0; j < idx; j++) - if (array[j] == rec->ip) { - found = 0; - break; - } - if (found) - array[idx] = rec->ip; - break; - } - } - } - spin_unlock(&ftrace_lock); - - return found ? 0 : -EINVAL; -} - -static ssize_t -ftrace_graph_write(struct file *file, const char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - unsigned char buffer[FTRACE_BUFF_MAX+1]; - unsigned long *array; - size_t read = 0; - ssize_t ret; - int index = 0; - char ch; - - if (!cnt || cnt < 0) - return 0; - - mutex_lock(&graph_lock); - - if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) { - ret = -EBUSY; - goto out; - } - - if (file->f_mode & FMODE_READ) { - struct seq_file *m = file->private_data; - array = m->private; - } else - array = file->private_data; - - ret = get_user(ch, ubuf++); - if (ret) - goto out; - read++; - cnt--; - - /* skip white space */ - while (cnt && isspace(ch)) { - ret = get_user(ch, ubuf++); - if (ret) - goto out; - read++; - cnt--; - } - - if (isspace(ch)) { - *ppos += read; - ret = read; - goto out; - } - - while (cnt && !isspace(ch)) { - if (index < FTRACE_BUFF_MAX) - buffer[index++] = ch; - else { - ret = -EINVAL; - goto out; - } - ret = get_user(ch, ubuf++); - if (ret) - goto out; - read++; - cnt--; - } - buffer[index] = 0; - - /* we allow only one at a time */ - ret = ftrace_set_func(array, ftrace_graph_count, buffer); - if (ret) - goto out; - - ftrace_graph_count++; - - file->f_pos += read; - - ret = read; - out: - mutex_unlock(&graph_lock); - - return ret; -} - -static const struct file_operations ftrace_graph_fops = { - .open = ftrace_graph_open, - .read = ftrace_graph_read, - .write = ftrace_graph_write, -}; -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) +static __init int ftrace_init_debugfs(void) { + struct dentry *d_tracer; struct dentry *entry; + d_tracer = tracing_init_dentry(); + entry = debugfs_create_file("available_filter_functions", 0444, d_tracer, NULL, &ftrace_avail_fops); if (!entry) @@ -1572,20 +1263,12 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) pr_warning("Could not create debugfs " "'set_ftrace_notrace' entry\n"); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - entry = debugfs_create_file("set_graph_function", 0444, d_tracer, - NULL, - &ftrace_graph_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_graph_function' entry\n"); -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - return 0; } -static int ftrace_convert_nops(struct module *mod, - unsigned long *start, +fs_initcall(ftrace_init_debugfs); + +static int ftrace_convert_nops(unsigned long *start, unsigned long *end) { unsigned long *p; @@ -1596,32 +1279,23 @@ static int ftrace_convert_nops(struct module *mod, p = start; while (p < end) { addr = ftrace_call_adjust(*p++); - /* - * Some architecture linkers will pad between - * the different mcount_loc sections of different - * object files to satisfy alignments. - * Skip any NULL pointers. - */ - if (!addr) - continue; ftrace_record_ip(addr); } /* disable interrupts to prevent kstop machine */ local_irq_save(flags); - ftrace_update_code(mod); + ftrace_update_code(); local_irq_restore(flags); mutex_unlock(&ftrace_start_lock); return 0; } -void ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end) +void ftrace_init_module(unsigned long *start, unsigned long *end) { if (ftrace_disabled || start == end) return; - ftrace_convert_nops(mod, start, end); + ftrace_convert_nops(start, end); } extern unsigned long __start_mcount_loc[]; @@ -1651,8 +1325,7 @@ void __init ftrace_init(void) last_ftrace_enabled = ftrace_enabled = 1; - ret = ftrace_convert_nops(NULL, - __start_mcount_loc, + ret = ftrace_convert_nops(__start_mcount_loc, __stop_mcount_loc); return; @@ -1669,186 +1342,12 @@ static int __init ftrace_nodyn_init(void) } device_initcall(ftrace_nodyn_init); -static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } -static inline void ftrace_startup_enable(int command) { } -/* Keep as macros so we do not need to define the commands */ -# define ftrace_startup(command) do { } while (0) -# define ftrace_shutdown(command) do { } while (0) +# define ftrace_startup() do { } while (0) +# define ftrace_shutdown() do { } while (0) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ -static ssize_t -ftrace_pid_read(struct file *file, char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - char buf[64]; - int r; - - if (ftrace_pid_trace == ftrace_swapper_pid) - r = sprintf(buf, "swapper tasks\n"); - else if (ftrace_pid_trace) - r = sprintf(buf, "%u\n", pid_nr(ftrace_pid_trace)); - else - r = sprintf(buf, "no pid\n"); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - -static void clear_ftrace_swapper(void) -{ - struct task_struct *p; - int cpu; - - get_online_cpus(); - for_each_online_cpu(cpu) { - p = idle_task(cpu); - clear_tsk_trace_trace(p); - } - put_online_cpus(); -} - -static void set_ftrace_swapper(void) -{ - struct task_struct *p; - int cpu; - - get_online_cpus(); - for_each_online_cpu(cpu) { - p = idle_task(cpu); - set_tsk_trace_trace(p); - } - put_online_cpus(); -} - -static void clear_ftrace_pid(struct pid *pid) -{ - struct task_struct *p; - - do_each_pid_task(pid, PIDTYPE_PID, p) { - clear_tsk_trace_trace(p); - } while_each_pid_task(pid, PIDTYPE_PID, p); - put_pid(pid); -} - -static void set_ftrace_pid(struct pid *pid) -{ - struct task_struct *p; - - do_each_pid_task(pid, PIDTYPE_PID, p) { - set_tsk_trace_trace(p); - } while_each_pid_task(pid, PIDTYPE_PID, p); -} - -static void clear_ftrace_pid_task(struct pid **pid) -{ - if (*pid == ftrace_swapper_pid) - clear_ftrace_swapper(); - else - clear_ftrace_pid(*pid); - - *pid = NULL; -} - -static void set_ftrace_pid_task(struct pid *pid) -{ - if (pid == ftrace_swapper_pid) - set_ftrace_swapper(); - else - set_ftrace_pid(pid); -} - -static ssize_t -ftrace_pid_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - struct pid *pid; - char buf[64]; - long val; - int ret; - - if (cnt >= sizeof(buf)) - return -EINVAL; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - - ret = strict_strtol(buf, 10, &val); - if (ret < 0) - return ret; - - mutex_lock(&ftrace_start_lock); - if (val < 0) { - /* disable pid tracing */ - if (!ftrace_pid_trace) - goto out; - - clear_ftrace_pid_task(&ftrace_pid_trace); - - } else { - /* swapper task is special */ - if (!val) { - pid = ftrace_swapper_pid; - if (pid == ftrace_pid_trace) - goto out; - } else { - pid = find_get_pid(val); - - if (pid == ftrace_pid_trace) { - put_pid(pid); - goto out; - } - } - - if (ftrace_pid_trace) - clear_ftrace_pid_task(&ftrace_pid_trace); - - if (!pid) - goto out; - - ftrace_pid_trace = pid; - - set_ftrace_pid_task(ftrace_pid_trace); - } - - /* update the function call */ - ftrace_update_pid_func(); - ftrace_startup_enable(0); - - out: - mutex_unlock(&ftrace_start_lock); - - return cnt; -} - -static struct file_operations ftrace_pid_fops = { - .read = ftrace_pid_read, - .write = ftrace_pid_write, -}; - -static __init int ftrace_init_debugfs(void) -{ - struct dentry *d_tracer; - struct dentry *entry; - - d_tracer = tracing_init_dentry(); - if (!d_tracer) - return 0; - - ftrace_init_dyn_debugfs(d_tracer); - - entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer, - NULL, &ftrace_pid_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_ftrace_pid' entry\n"); - return 0; -} - -fs_initcall(ftrace_init_debugfs); - /** * ftrace_kill - kill ftrace * @@ -1882,11 +1381,10 @@ int register_ftrace_function(struct ftrace_ops *ops) return -1; mutex_lock(&ftrace_sysctl_lock); - ret = __register_ftrace_function(ops); - ftrace_startup(0); - + ftrace_startup(); mutex_unlock(&ftrace_sysctl_lock); + return ret; } @@ -1902,7 +1400,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) mutex_lock(&ftrace_sysctl_lock); ret = __unregister_ftrace_function(ops); - ftrace_shutdown(0); + ftrace_shutdown(); mutex_unlock(&ftrace_sysctl_lock); return ret; @@ -1951,153 +1449,3 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, return ret; } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - -static atomic_t ftrace_graph_active; - -int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) -{ - return 0; -} - -/* The callbacks that hook a function */ -trace_func_graph_ret_t ftrace_graph_return = - (trace_func_graph_ret_t)ftrace_stub; -trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; - -/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ -static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) -{ - int i; - int ret = 0; - unsigned long flags; - int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE; - struct task_struct *g, *t; - - for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) { - ret_stack_list[i] = kmalloc(FTRACE_RETFUNC_DEPTH - * sizeof(struct ftrace_ret_stack), - GFP_KERNEL); - if (!ret_stack_list[i]) { - start = 0; - end = i; - ret = -ENOMEM; - goto free; - } - } - - read_lock_irqsave(&tasklist_lock, flags); - do_each_thread(g, t) { - if (start == end) { - ret = -EAGAIN; - goto unlock; - } - - if (t->ret_stack == NULL) { - t->curr_ret_stack = -1; - /* Make sure IRQs see the -1 first: */ - barrier(); - t->ret_stack = ret_stack_list[start++]; - atomic_set(&t->tracing_graph_pause, 0); - atomic_set(&t->trace_overrun, 0); - } - } while_each_thread(g, t); - -unlock: - read_unlock_irqrestore(&tasklist_lock, flags); -free: - for (i = start; i < end; i++) - kfree(ret_stack_list[i]); - return ret; -} - -/* Allocate a return stack for each task */ -static int start_graph_tracing(void) -{ - struct ftrace_ret_stack **ret_stack_list; - int ret; - - ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE * - sizeof(struct ftrace_ret_stack *), - GFP_KERNEL); - - if (!ret_stack_list) - return -ENOMEM; - - do { - ret = alloc_retstack_tasklist(ret_stack_list); - } while (ret == -EAGAIN); - - kfree(ret_stack_list); - return ret; -} - -int register_ftrace_graph(trace_func_graph_ret_t retfunc, - trace_func_graph_ent_t entryfunc) -{ - int ret = 0; - - mutex_lock(&ftrace_sysctl_lock); - - atomic_inc(&ftrace_graph_active); - ret = start_graph_tracing(); - if (ret) { - atomic_dec(&ftrace_graph_active); - goto out; - } - - ftrace_graph_return = retfunc; - ftrace_graph_entry = entryfunc; - - ftrace_startup(FTRACE_START_FUNC_RET); - -out: - mutex_unlock(&ftrace_sysctl_lock); - return ret; -} - -void unregister_ftrace_graph(void) -{ - mutex_lock(&ftrace_sysctl_lock); - - atomic_dec(&ftrace_graph_active); - ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; - ftrace_graph_entry = ftrace_graph_entry_stub; - ftrace_shutdown(FTRACE_STOP_FUNC_RET); - - mutex_unlock(&ftrace_sysctl_lock); -} - -/* Allocate a return stack for newly created task */ -void ftrace_graph_init_task(struct task_struct *t) -{ - if (atomic_read(&ftrace_graph_active)) { - t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH - * sizeof(struct ftrace_ret_stack), - GFP_KERNEL); - if (!t->ret_stack) - return; - t->curr_ret_stack = -1; - atomic_set(&t->tracing_graph_pause, 0); - atomic_set(&t->trace_overrun, 0); - } else - t->ret_stack = NULL; -} - -void ftrace_graph_exit_task(struct task_struct *t) -{ - struct ftrace_ret_stack *ret_stack = t->ret_stack; - - t->ret_stack = NULL; - /* NULL must become visible to IRQs before we free it: */ - barrier(); - - kfree(ret_stack); -} - -void ftrace_graph_stop(void) -{ - ftrace_stop(); -} -#endif - diff --git a/trunk/kernel/trace/ring_buffer.c b/trunk/kernel/trace/ring_buffer.c index 7f69cfeaadf7..668bbb5ef2bd 100644 --- a/trunk/kernel/trace/ring_buffer.c +++ b/trunk/kernel/trace/ring_buffer.c @@ -18,46 +18,8 @@ #include "trace.h" -/* - * A fast way to enable or disable all ring buffers is to - * call tracing_on or tracing_off. Turning off the ring buffers - * prevents all ring buffers from being recorded to. - * Turning this switch on, makes it OK to write to the - * ring buffer, if the ring buffer is enabled itself. - * - * There's three layers that must be on in order to write - * to the ring buffer. - * - * 1) This global flag must be set. - * 2) The ring buffer must be enabled for recording. - * 3) The per cpu buffer must be enabled for recording. - * - * In case of an anomaly, this global flag has a bit set that - * will permantly disable all ring buffers. - */ - -/* - * Global flag to disable all recording to ring buffers - * This has two bits: ON, DISABLED - * - * ON DISABLED - * ---- ---------- - * 0 0 : ring buffers are off - * 1 0 : ring buffers are on - * X 1 : ring buffers are permanently disabled - */ - -enum { - RB_BUFFERS_ON_BIT = 0, - RB_BUFFERS_DISABLED_BIT = 1, -}; - -enum { - RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT, - RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, -}; - -static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; +/* Global flag to disable all recording to ring buffers */ +static int ring_buffers_off __read_mostly; /** * tracing_on - enable all tracing buffers @@ -67,7 +29,7 @@ static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; */ void tracing_on(void) { - set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags); + ring_buffers_off = 0; } /** @@ -80,22 +42,9 @@ void tracing_on(void) */ void tracing_off(void) { - clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags); + ring_buffers_off = 1; } -/** - * tracing_off_permanent - permanently disable ring buffers - * - * This function, once called, will disable all ring buffers - * permanenty. - */ -void tracing_off_permanent(void) -{ - set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); -} - -#include "trace.h" - /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 @@ -195,24 +144,20 @@ void *ring_buffer_event_data(struct ring_buffer_event *event) #define TS_MASK ((1ULL << TS_SHIFT) - 1) #define TS_DELTA_TEST (~TS_MASK) -struct buffer_data_page { - u64 time_stamp; /* page time stamp */ - local_t commit; /* write commited index */ - unsigned char data[]; /* data of buffer page */ -}; - +/* + * This hack stolen from mm/slob.c. + * We can store per page timing information in the page frame of the page. + * Thanks to Peter Zijlstra for suggesting this idea. + */ struct buffer_page { + u64 time_stamp; /* page time stamp */ local_t write; /* index for next write */ + local_t commit; /* write commited index */ unsigned read; /* index for next read */ struct list_head list; /* list of free pages */ - struct buffer_data_page *page; /* Actual data page */ + void *page; /* Actual data page */ }; -static void rb_init_page(struct buffer_data_page *bpage) -{ - local_set(&bpage->commit, 0); -} - /* * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing * this issue out. @@ -234,7 +179,7 @@ static inline int test_time_stamp(u64 delta) return 0; } -#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page)) +#define BUF_PAGE_SIZE PAGE_SIZE /* * head_page == tail_page && head == tail then buffer is empty. @@ -242,8 +187,7 @@ static inline int test_time_stamp(u64 delta) struct ring_buffer_per_cpu { int cpu; struct ring_buffer *buffer; - spinlock_t reader_lock; /* serialize readers */ - raw_spinlock_t lock; + spinlock_t lock; struct lock_class_key lock_key; struct list_head pages; struct buffer_page *head_page; /* read from head */ @@ -277,16 +221,32 @@ struct ring_buffer_iter { u64 read_stamp; }; -/* buffer may be either ring_buffer or ring_buffer_per_cpu */ #define RB_WARN_ON(buffer, cond) \ - ({ \ - int _____ret = unlikely(cond); \ - if (_____ret) { \ + do { \ + if (unlikely(cond)) { \ + atomic_inc(&buffer->record_disabled); \ + WARN_ON(1); \ + } \ + } while (0) + +#define RB_WARN_ON_RET(buffer, cond) \ + do { \ + if (unlikely(cond)) { \ + atomic_inc(&buffer->record_disabled); \ + WARN_ON(1); \ + return -1; \ + } \ + } while (0) + +#define RB_WARN_ON_ONCE(buffer, cond) \ + do { \ + static int once; \ + if (unlikely(cond) && !once) { \ + once++; \ atomic_inc(&buffer->record_disabled); \ WARN_ON(1); \ } \ - _____ret; \ - }) + } while (0) /** * check_pages - integrity check of buffer pages @@ -298,20 +258,16 @@ struct ring_buffer_iter { static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *head = &cpu_buffer->pages; - struct buffer_page *bpage, *tmp; + struct buffer_page *page, *tmp; - if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) - return -1; - if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) - return -1; + RB_WARN_ON_RET(cpu_buffer, head->next->prev != head); + RB_WARN_ON_RET(cpu_buffer, head->prev->next != head); - list_for_each_entry_safe(bpage, tmp, head, list) { - if (RB_WARN_ON(cpu_buffer, - bpage->list.next->prev != &bpage->list)) - return -1; - if (RB_WARN_ON(cpu_buffer, - bpage->list.prev->next != &bpage->list)) - return -1; + list_for_each_entry_safe(page, tmp, head, list) { + RB_WARN_ON_RET(cpu_buffer, + page->list.next->prev != &page->list); + RB_WARN_ON_RET(cpu_buffer, + page->list.prev->next != &page->list); } return 0; @@ -321,23 +277,22 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) { struct list_head *head = &cpu_buffer->pages; - struct buffer_page *bpage, *tmp; + struct buffer_page *page, *tmp; unsigned long addr; LIST_HEAD(pages); unsigned i; for (i = 0; i < nr_pages; i++) { - bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), + page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); - if (!bpage) + if (!page) goto free_pages; - list_add(&bpage->list, &pages); + list_add(&page->list, &pages); addr = __get_free_page(GFP_KERNEL); if (!addr) goto free_pages; - bpage->page = (void *)addr; - rb_init_page(bpage->page); + page->page = (void *)addr; } list_splice(&pages, head); @@ -347,9 +302,9 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, return 0; free_pages: - list_for_each_entry_safe(bpage, tmp, &pages, list) { - list_del_init(&bpage->list); - free_buffer_page(bpage); + list_for_each_entry_safe(page, tmp, &pages, list) { + list_del_init(&page->list); + free_buffer_page(page); } return -ENOMEM; } @@ -358,7 +313,7 @@ static struct ring_buffer_per_cpu * rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; - struct buffer_page *bpage; + struct buffer_page *page; unsigned long addr; int ret; @@ -369,21 +324,19 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) cpu_buffer->cpu = cpu; cpu_buffer->buffer = buffer; - spin_lock_init(&cpu_buffer->reader_lock); - cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + spin_lock_init(&cpu_buffer->lock); INIT_LIST_HEAD(&cpu_buffer->pages); - bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), + page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); - if (!bpage) + if (!page) goto fail_free_buffer; - cpu_buffer->reader_page = bpage; + cpu_buffer->reader_page = page; addr = __get_free_page(GFP_KERNEL); if (!addr) goto fail_free_reader; - bpage->page = (void *)addr; - rb_init_page(bpage->page); + page->page = (void *)addr; INIT_LIST_HEAD(&cpu_buffer->reader_page->list); @@ -408,14 +361,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *head = &cpu_buffer->pages; - struct buffer_page *bpage, *tmp; + struct buffer_page *page, *tmp; list_del_init(&cpu_buffer->reader_page->list); free_buffer_page(cpu_buffer->reader_page); - list_for_each_entry_safe(bpage, tmp, head, list) { - list_del_init(&bpage->list); - free_buffer_page(bpage); + list_for_each_entry_safe(page, tmp, head, list) { + list_del_init(&page->list); + free_buffer_page(page); } kfree(cpu_buffer); } @@ -512,7 +465,7 @@ static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); static void rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) { - struct buffer_page *bpage; + struct buffer_page *page; struct list_head *p; unsigned i; @@ -520,15 +473,13 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) synchronize_sched(); for (i = 0; i < nr_pages; i++) { - if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) - return; + BUG_ON(list_empty(&cpu_buffer->pages)); p = cpu_buffer->pages.next; - bpage = list_entry(p, struct buffer_page, list); - list_del_init(&bpage->list); - free_buffer_page(bpage); + page = list_entry(p, struct buffer_page, list); + list_del_init(&page->list); + free_buffer_page(page); } - if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) - return; + BUG_ON(list_empty(&cpu_buffer->pages)); rb_reset_cpu(cpu_buffer); @@ -542,7 +493,7 @@ static void rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, struct list_head *pages, unsigned nr_pages) { - struct buffer_page *bpage; + struct buffer_page *page; struct list_head *p; unsigned i; @@ -550,12 +501,11 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, synchronize_sched(); for (i = 0; i < nr_pages; i++) { - if (RB_WARN_ON(cpu_buffer, list_empty(pages))) - return; + BUG_ON(list_empty(pages)); p = pages->next; - bpage = list_entry(p, struct buffer_page, list); - list_del_init(&bpage->list); - list_add_tail(&bpage->list, &cpu_buffer->pages); + page = list_entry(p, struct buffer_page, list); + list_del_init(&page->list); + list_add_tail(&page->list, &cpu_buffer->pages); } rb_reset_cpu(cpu_buffer); @@ -582,7 +532,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) { struct ring_buffer_per_cpu *cpu_buffer; unsigned nr_pages, rm_pages, new_pages; - struct buffer_page *bpage, *tmp; + struct buffer_page *page, *tmp; unsigned long buffer_size; unsigned long addr; LIST_HEAD(pages); @@ -612,10 +562,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) if (size < buffer_size) { /* easy case, just free pages */ - if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) { - mutex_unlock(&buffer->mutex); - return -1; - } + BUG_ON(nr_pages >= buffer->pages); rm_pages = buffer->pages - nr_pages; @@ -634,26 +581,21 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) * add these pages to the cpu_buffers. Otherwise we just free * them all and return -ENOMEM; */ - if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) { - mutex_unlock(&buffer->mutex); - return -1; - } - + BUG_ON(nr_pages <= buffer->pages); new_pages = nr_pages - buffer->pages; for_each_buffer_cpu(buffer, cpu) { for (i = 0; i < new_pages; i++) { - bpage = kzalloc_node(ALIGN(sizeof(*bpage), + page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); - if (!bpage) + if (!page) goto free_pages; - list_add(&bpage->list, &pages); + list_add(&page->list, &pages); addr = __get_free_page(GFP_KERNEL); if (!addr) goto free_pages; - bpage->page = (void *)addr; - rb_init_page(bpage->page); + page->page = (void *)addr; } } @@ -662,10 +604,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) rb_insert_pages(cpu_buffer, &pages, new_pages); } - if (RB_WARN_ON(buffer, !list_empty(&pages))) { - mutex_unlock(&buffer->mutex); - return -1; - } + BUG_ON(!list_empty(&pages)); out: buffer->pages = nr_pages; @@ -674,9 +613,9 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) return size; free_pages: - list_for_each_entry_safe(bpage, tmp, &pages, list) { - list_del_init(&bpage->list); - free_buffer_page(bpage); + list_for_each_entry_safe(page, tmp, &pages, list) { + list_del_init(&page->list); + free_buffer_page(page); } mutex_unlock(&buffer->mutex); return -ENOMEM; @@ -687,15 +626,9 @@ static inline int rb_null_event(struct ring_buffer_event *event) return event->type == RINGBUF_TYPE_PADDING; } -static inline void * -__rb_data_page_index(struct buffer_data_page *bpage, unsigned index) -{ - return bpage->data + index; -} - -static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) +static inline void *__rb_page_index(struct buffer_page *page, unsigned index) { - return bpage->page->data + index; + return page->page + index; } static inline struct ring_buffer_event * @@ -725,7 +658,7 @@ static inline unsigned rb_page_write(struct buffer_page *bpage) static inline unsigned rb_page_commit(struct buffer_page *bpage) { - return local_read(&bpage->page->commit); + return local_read(&bpage->commit); } /* Size is determined by what has been commited */ @@ -760,8 +693,7 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer) head += rb_event_length(event)) { event = __rb_page_index(cpu_buffer->head_page, head); - if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) - return; + BUG_ON(rb_null_event(event)); /* Only count data entries */ if (event->type != RINGBUF_TYPE_DATA) continue; @@ -771,14 +703,14 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer) } static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, - struct buffer_page **bpage) + struct buffer_page **page) { - struct list_head *p = (*bpage)->list.next; + struct list_head *p = (*page)->list.next; if (p == &cpu_buffer->pages) p = p->next; - *bpage = list_entry(p, struct buffer_page, list); + *page = list_entry(p, struct buffer_page, list); } static inline unsigned @@ -814,18 +746,16 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, addr &= PAGE_MASK; while (cpu_buffer->commit_page->page != (void *)addr) { - if (RB_WARN_ON(cpu_buffer, - cpu_buffer->commit_page == cpu_buffer->tail_page)) - return; - cpu_buffer->commit_page->page->commit = + RB_WARN_ON(cpu_buffer, + cpu_buffer->commit_page == cpu_buffer->tail_page); + cpu_buffer->commit_page->commit = cpu_buffer->commit_page->write; rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); - cpu_buffer->write_stamp = - cpu_buffer->commit_page->page->time_stamp; + cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; } /* Now set the commit to the event's index */ - local_set(&cpu_buffer->commit_page->page->commit, index); + local_set(&cpu_buffer->commit_page->commit, index); } static inline void @@ -840,17 +770,16 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) * assign the commit to the tail. */ while (cpu_buffer->commit_page != cpu_buffer->tail_page) { - cpu_buffer->commit_page->page->commit = + cpu_buffer->commit_page->commit = cpu_buffer->commit_page->write; rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); - cpu_buffer->write_stamp = - cpu_buffer->commit_page->page->time_stamp; + cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; /* add barrier to keep gcc from optimizing too much */ barrier(); } while (rb_commit_index(cpu_buffer) != rb_page_write(cpu_buffer->commit_page)) { - cpu_buffer->commit_page->page->commit = + cpu_buffer->commit_page->commit = cpu_buffer->commit_page->write; barrier(); } @@ -858,7 +787,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) { - cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp; + cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp; cpu_buffer->reader_page->read = 0; } @@ -877,7 +806,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter) else rb_inc_page(cpu_buffer, &iter->head_page); - iter->read_stamp = iter->head_page->page->time_stamp; + iter->read_stamp = iter->head_page->time_stamp; iter->head = 0; } @@ -965,8 +894,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, if (write > BUF_PAGE_SIZE) { struct buffer_page *next_page = tail_page; - local_irq_save(flags); - __raw_spin_lock(&cpu_buffer->lock); + spin_lock_irqsave(&cpu_buffer->lock, flags); rb_inc_page(cpu_buffer, &next_page); @@ -974,8 +902,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, reader_page = cpu_buffer->reader_page; /* we grabbed the lock before incrementing */ - if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) - goto out_unlock; + RB_WARN_ON(cpu_buffer, next_page == reader_page); /* * If for some reason, we had an interrupt storm that made @@ -1013,12 +940,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, */ if (tail_page == cpu_buffer->tail_page) { local_set(&next_page->write, 0); - local_set(&next_page->page->commit, 0); + local_set(&next_page->commit, 0); cpu_buffer->tail_page = next_page; /* reread the time stamp */ *ts = ring_buffer_time_stamp(cpu_buffer->cpu); - cpu_buffer->tail_page->page->time_stamp = *ts; + cpu_buffer->tail_page->time_stamp = *ts; } /* @@ -1043,8 +970,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, rb_set_commit_to_write(cpu_buffer); } - __raw_spin_unlock(&cpu_buffer->lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&cpu_buffer->lock, flags); /* fail and let the caller try again */ return ERR_PTR(-EAGAIN); @@ -1052,8 +978,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* We reserved something on the buffer */ - if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE)) - return NULL; + BUG_ON(write > BUF_PAGE_SIZE); event = __rb_page_index(tail_page, tail); rb_update_event(event, type, length); @@ -1063,13 +988,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, * this page's time stamp. */ if (!tail && rb_is_commit(cpu_buffer, event)) - cpu_buffer->commit_page->page->time_stamp = *ts; + cpu_buffer->commit_page->time_stamp = *ts; return event; out_unlock: - __raw_spin_unlock(&cpu_buffer->lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&cpu_buffer->lock, flags); return NULL; } @@ -1114,7 +1038,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, event->time_delta = *delta & TS_MASK; event->array[0] = *delta >> TS_SHIFT; } else { - cpu_buffer->commit_page->page->time_stamp = *ts; + cpu_buffer->commit_page->time_stamp = *ts; event->time_delta = 0; event->array[0] = 0; } @@ -1152,8 +1076,10 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, * storm or we have something buggy. * Bail! */ - if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) + if (unlikely(++nr_loops > 1000)) { + RB_WARN_ON(cpu_buffer, 1); return NULL; + } ts = ring_buffer_time_stamp(cpu_buffer->cpu); @@ -1249,14 +1175,15 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, struct ring_buffer_event *event; int cpu, resched; - if (ring_buffer_flags != RB_BUFFERS_ON) + if (ring_buffers_off) return NULL; if (atomic_read(&buffer->record_disabled)) return NULL; /* If we are tracing schedule, we don't want to recurse */ - resched = ftrace_preempt_disable(); + resched = need_resched(); + preempt_disable_notrace(); cpu = raw_smp_processor_id(); @@ -1287,7 +1214,10 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, return event; out: - ftrace_preempt_enable(resched); + if (resched) + preempt_enable_no_resched_notrace(); + else + preempt_enable_notrace(); return NULL; } @@ -1329,9 +1259,12 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, /* * Only the last preempt count needs to restore preemption. */ - if (preempt_count() == 1) - ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); - else + if (preempt_count() == 1) { + if (per_cpu(rb_need_resched, cpu)) + preempt_enable_no_resched_notrace(); + else + preempt_enable_notrace(); + } else preempt_enable_no_resched_notrace(); return 0; @@ -1361,13 +1294,14 @@ int ring_buffer_write(struct ring_buffer *buffer, int ret = -EBUSY; int cpu, resched; - if (ring_buffer_flags != RB_BUFFERS_ON) + if (ring_buffers_off) return -EBUSY; if (atomic_read(&buffer->record_disabled)) return -EBUSY; - resched = ftrace_preempt_disable(); + resched = need_resched(); + preempt_disable_notrace(); cpu = raw_smp_processor_id(); @@ -1393,7 +1327,10 @@ int ring_buffer_write(struct ring_buffer *buffer, ret = 0; out: - ftrace_preempt_enable(resched); + if (resched) + preempt_enable_no_resched_notrace(); + else + preempt_enable_notrace(); return ret; } @@ -1552,7 +1489,14 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer) return overruns; } -static void rb_iter_reset(struct ring_buffer_iter *iter) +/** + * ring_buffer_iter_reset - reset an iterator + * @iter: The iterator to reset + * + * Resets the iterator, so that it will start from the beginning + * again. + */ +void ring_buffer_iter_reset(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; @@ -1567,24 +1511,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) if (iter->head) iter->read_stamp = cpu_buffer->read_stamp; else - iter->read_stamp = iter->head_page->page->time_stamp; -} - -/** - * ring_buffer_iter_reset - reset an iterator - * @iter: The iterator to reset - * - * Resets the iterator, so that it will start from the beginning - * again. - */ -void ring_buffer_iter_reset(struct ring_buffer_iter *iter) -{ - struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; - unsigned long flags; - - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - rb_iter_reset(iter); - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + iter->read_stamp = iter->head_page->time_stamp; } /** @@ -1670,8 +1597,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) unsigned long flags; int nr_loops = 0; - local_irq_save(flags); - __raw_spin_lock(&cpu_buffer->lock); + spin_lock_irqsave(&cpu_buffer->lock, flags); again: /* @@ -1680,7 +1606,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * a case where we will loop three times. There should be no * reason to loop four times (that I know of). */ - if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) { + if (unlikely(++nr_loops > 3)) { + RB_WARN_ON(cpu_buffer, 1); reader = NULL; goto out; } @@ -1692,9 +1619,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) goto out; /* Never should we have an index greater than the size */ - if (RB_WARN_ON(cpu_buffer, - cpu_buffer->reader_page->read > rb_page_size(reader))) - goto out; + RB_WARN_ON(cpu_buffer, + cpu_buffer->reader_page->read > rb_page_size(reader)); /* check if we caught up to the tail */ reader = NULL; @@ -1711,7 +1637,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page->list.prev = reader->list.prev; local_set(&cpu_buffer->reader_page->write, 0); - local_set(&cpu_buffer->reader_page->page->commit, 0); + local_set(&cpu_buffer->reader_page->commit, 0); /* Make the reader page now replace the head */ reader->list.prev->next = &cpu_buffer->reader_page->list; @@ -1733,8 +1659,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) goto again; out: - __raw_spin_unlock(&cpu_buffer->lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&cpu_buffer->lock, flags); return reader; } @@ -1748,8 +1673,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) reader = rb_get_reader_page(cpu_buffer); /* This function should not be called when buffer is empty */ - if (RB_WARN_ON(cpu_buffer, !reader)) - return; + BUG_ON(!reader); event = rb_reader_event(cpu_buffer); @@ -1776,9 +1700,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) * Check if we are at the end of the buffer. */ if (iter->head >= rb_page_size(iter->head_page)) { - if (RB_WARN_ON(buffer, - iter->head_page == cpu_buffer->commit_page)) - return; + BUG_ON(iter->head_page == cpu_buffer->commit_page); rb_inc_iter(iter); return; } @@ -1791,10 +1713,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) * This should not be called to advance the header if we are * at the tail of the buffer. */ - if (RB_WARN_ON(cpu_buffer, - (iter->head_page == cpu_buffer->commit_page) && - (iter->head + length > rb_commit_index(cpu_buffer)))) - return; + BUG_ON((iter->head_page == cpu_buffer->commit_page) && + (iter->head + length > rb_commit_index(cpu_buffer))); rb_update_iter_read_stamp(iter, event); @@ -1806,8 +1726,17 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) rb_advance_iter(iter); } -static struct ring_buffer_event * -rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) +/** + * ring_buffer_peek - peek at the next event to be read + * @buffer: The ring buffer to read + * @cpu: The cpu to peak at + * @ts: The timestamp counter of this event. + * + * This will return the event that will be read next, but does + * not consume the data. + */ +struct ring_buffer_event * +ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; @@ -1828,8 +1757,10 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) * can have. Nesting 10 deep of interrupts is clearly * an anomaly. */ - if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) + if (unlikely(++nr_loops > 10)) { + RB_WARN_ON(cpu_buffer, 1); return NULL; + } reader = rb_get_reader_page(cpu_buffer); if (!reader) @@ -1867,8 +1798,16 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) return NULL; } -static struct ring_buffer_event * -rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) +/** + * ring_buffer_iter_peek - peek at the next event to be read + * @iter: The ring buffer iterator + * @ts: The timestamp counter of this event. + * + * This will return the event that will be read next, but does + * not increment the iterator. + */ +struct ring_buffer_event * +ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) { struct ring_buffer *buffer; struct ring_buffer_per_cpu *cpu_buffer; @@ -1890,8 +1829,10 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) * can have. Nesting 10 deep of interrupts is clearly * an anomaly. */ - if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) + if (unlikely(++nr_loops > 10)) { + RB_WARN_ON(cpu_buffer, 1); return NULL; + } if (rb_per_cpu_empty(cpu_buffer)) return NULL; @@ -1927,51 +1868,6 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) return NULL; } -/** - * ring_buffer_peek - peek at the next event to be read - * @buffer: The ring buffer to read - * @cpu: The cpu to peak at - * @ts: The timestamp counter of this event. - * - * This will return the event that will be read next, but does - * not consume the data. - */ -struct ring_buffer_event * -ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) -{ - struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; - struct ring_buffer_event *event; - unsigned long flags; - - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - event = rb_buffer_peek(buffer, cpu, ts); - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - - return event; -} - -/** - * ring_buffer_iter_peek - peek at the next event to be read - * @iter: The ring buffer iterator - * @ts: The timestamp counter of this event. - * - * This will return the event that will be read next, but does - * not increment the iterator. - */ -struct ring_buffer_event * -ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) -{ - struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; - struct ring_buffer_event *event; - unsigned long flags; - - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - event = rb_iter_peek(iter, ts); - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - - return event; -} - /** * ring_buffer_consume - return an event and consume it * @buffer: The ring buffer to get the next event from @@ -1983,24 +1879,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) struct ring_buffer_event * ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) { - struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; + struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; - unsigned long flags; if (!cpu_isset(cpu, buffer->cpumask)) return NULL; - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - - event = rb_buffer_peek(buffer, cpu, ts); + event = ring_buffer_peek(buffer, cpu, ts); if (!event) - goto out; + return NULL; + cpu_buffer = buffer->buffers[cpu]; rb_advance_reader(cpu_buffer); - out: - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - return event; } @@ -2037,11 +1928,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) atomic_inc(&cpu_buffer->record_disabled); synchronize_sched(); - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - __raw_spin_lock(&cpu_buffer->lock); - rb_iter_reset(iter); - __raw_spin_unlock(&cpu_buffer->lock); - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + spin_lock_irqsave(&cpu_buffer->lock, flags); + ring_buffer_iter_reset(iter); + spin_unlock_irqrestore(&cpu_buffer->lock, flags); return iter; } @@ -2073,17 +1962,12 @@ struct ring_buffer_event * ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) { struct ring_buffer_event *event; - struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; - unsigned long flags; - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - event = rb_iter_peek(iter, ts); + event = ring_buffer_iter_peek(iter, ts); if (!event) - goto out; + return NULL; rb_advance_iter(iter); - out: - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); return event; } @@ -2103,7 +1987,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->head_page = list_entry(cpu_buffer->pages.next, struct buffer_page, list); local_set(&cpu_buffer->head_page->write, 0); - local_set(&cpu_buffer->head_page->page->commit, 0); + local_set(&cpu_buffer->head_page->commit, 0); cpu_buffer->head_page->read = 0; @@ -2112,7 +1996,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) INIT_LIST_HEAD(&cpu_buffer->reader_page->list); local_set(&cpu_buffer->reader_page->write, 0); - local_set(&cpu_buffer->reader_page->page->commit, 0); + local_set(&cpu_buffer->reader_page->commit, 0); cpu_buffer->reader_page->read = 0; cpu_buffer->overrun = 0; @@ -2132,15 +2016,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) if (!cpu_isset(cpu, buffer->cpumask)) return; - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - - __raw_spin_lock(&cpu_buffer->lock); + spin_lock_irqsave(&cpu_buffer->lock, flags); rb_reset_cpu(cpu_buffer); - __raw_spin_unlock(&cpu_buffer->lock); - - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + spin_unlock_irqrestore(&cpu_buffer->lock, flags); } /** @@ -2238,178 +2118,16 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, return 0; } -static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, - struct buffer_data_page *bpage) -{ - struct ring_buffer_event *event; - unsigned long head; - - __raw_spin_lock(&cpu_buffer->lock); - for (head = 0; head < local_read(&bpage->commit); - head += rb_event_length(event)) { - - event = __rb_data_page_index(bpage, head); - if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) - return; - /* Only count data entries */ - if (event->type != RINGBUF_TYPE_DATA) - continue; - cpu_buffer->entries--; - } - __raw_spin_unlock(&cpu_buffer->lock); -} - -/** - * ring_buffer_alloc_read_page - allocate a page to read from buffer - * @buffer: the buffer to allocate for. - * - * This function is used in conjunction with ring_buffer_read_page. - * When reading a full page from the ring buffer, these functions - * can be used to speed up the process. The calling function should - * allocate a few pages first with this function. Then when it - * needs to get pages from the ring buffer, it passes the result - * of this function into ring_buffer_read_page, which will swap - * the page that was allocated, with the read page of the buffer. - * - * Returns: - * The page allocated, or NULL on error. - */ -void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) -{ - unsigned long addr; - struct buffer_data_page *bpage; - - addr = __get_free_page(GFP_KERNEL); - if (!addr) - return NULL; - - bpage = (void *)addr; - - return bpage; -} - -/** - * ring_buffer_free_read_page - free an allocated read page - * @buffer: the buffer the page was allocate for - * @data: the page to free - * - * Free a page allocated from ring_buffer_alloc_read_page. - */ -void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) -{ - free_page((unsigned long)data); -} - -/** - * ring_buffer_read_page - extract a page from the ring buffer - * @buffer: buffer to extract from - * @data_page: the page to use allocated from ring_buffer_alloc_read_page - * @cpu: the cpu of the buffer to extract - * @full: should the extraction only happen when the page is full. - * - * This function will pull out a page from the ring buffer and consume it. - * @data_page must be the address of the variable that was returned - * from ring_buffer_alloc_read_page. This is because the page might be used - * to swap with a page in the ring buffer. - * - * for example: - * rpage = ring_buffer_alloc_page(buffer); - * if (!rpage) - * return error; - * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); - * if (ret) - * process_page(rpage); - * - * When @full is set, the function will not return true unless - * the writer is off the reader page. - * - * Note: it is up to the calling functions to handle sleeps and wakeups. - * The ring buffer can be used anywhere in the kernel and can not - * blindly call wake_up. The layer that uses the ring buffer must be - * responsible for that. - * - * Returns: - * 1 if data has been transferred - * 0 if no data has been transferred. - */ -int ring_buffer_read_page(struct ring_buffer *buffer, - void **data_page, int cpu, int full) -{ - struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; - struct ring_buffer_event *event; - struct buffer_data_page *bpage; - unsigned long flags; - int ret = 0; - - if (!data_page) - return 0; - - bpage = *data_page; - if (!bpage) - return 0; - - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - - /* - * rb_buffer_peek will get the next ring buffer if - * the current reader page is empty. - */ - event = rb_buffer_peek(buffer, cpu, NULL); - if (!event) - goto out; - - /* check for data */ - if (!local_read(&cpu_buffer->reader_page->page->commit)) - goto out; - /* - * If the writer is already off of the read page, then simply - * switch the read page with the given page. Otherwise - * we need to copy the data from the reader to the writer. - */ - if (cpu_buffer->reader_page == cpu_buffer->commit_page) { - unsigned int read = cpu_buffer->reader_page->read; - - if (full) - goto out; - /* The writer is still on the reader page, we must copy */ - bpage = cpu_buffer->reader_page->page; - memcpy(bpage->data, - cpu_buffer->reader_page->page->data + read, - local_read(&bpage->commit) - read); - - /* consume what was read */ - cpu_buffer->reader_page += read; - - } else { - /* swap the pages */ - rb_init_page(bpage); - bpage = cpu_buffer->reader_page->page; - cpu_buffer->reader_page->page = *data_page; - cpu_buffer->reader_page->read = 0; - *data_page = bpage; - } - ret = 1; - - /* update the entry counter */ - rb_remove_entries(cpu_buffer, bpage); - out: - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - - return ret; -} - static ssize_t rb_simple_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - long *p = filp->private_data; + int *p = filp->private_data; char buf[64]; int r; - if (test_bit(RB_BUFFERS_DISABLED_BIT, p)) - r = sprintf(buf, "permanently disabled\n"); - else - r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p)); + /* !ring_buffers_off == tracing_on */ + r = sprintf(buf, "%d\n", !*p); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -2418,7 +2136,7 @@ static ssize_t rb_simple_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - long *p = filp->private_data; + int *p = filp->private_data; char buf[64]; long val; int ret; @@ -2435,10 +2153,8 @@ rb_simple_write(struct file *filp, const char __user *ubuf, if (ret < 0) return ret; - if (val) - set_bit(RB_BUFFERS_ON_BIT, p); - else - clear_bit(RB_BUFFERS_ON_BIT, p); + /* !ring_buffers_off == tracing_on */ + *p = !val; (*ppos)++; @@ -2460,7 +2176,7 @@ static __init int rb_init_debugfs(void) d_tracer = tracing_init_dentry(); entry = debugfs_create_file("tracing_on", 0644, d_tracer, - &ring_buffer_flags, &rb_simple_fops); + &ring_buffers_off, &rb_simple_fops); if (!entry) pr_warning("Could not create debugfs 'tracing_on' entry\n"); diff --git a/trunk/kernel/trace/trace.c b/trunk/kernel/trace/trace.c index 6adf660fc816..d2e75479dc50 100644 --- a/trunk/kernel/trace/trace.c +++ b/trunk/kernel/trace/trace.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include @@ -44,38 +43,6 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; -/* - * We need to change this state when a selftest is running. - * A selftest will lurk into the ring-buffer to count the - * entries inserted during the selftest although some concurrent - * insertions into the ring-buffer such as ftrace_printk could occurred - * at the same time, giving false positive or negative results. - */ -static bool __read_mostly tracing_selftest_running; - -/* For tracers that don't implement custom flags */ -static struct tracer_opt dummy_tracer_opt[] = { - { } -}; - -static struct tracer_flags dummy_tracer_flags = { - .val = 0, - .opts = dummy_tracer_opt -}; - -static int dummy_set_flag(u32 old_flags, u32 bit, int set) -{ - return 0; -} - -/* - * Kill all tracing for good (never come back). - * It is initialized to 1 but will turn to zero if the initialization - * of the tracer is successful. But that is the only place that sets - * this back to zero. - */ -int tracing_disabled = 1; - static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); static inline void ftrace_disable_cpu(void) @@ -95,36 +62,7 @@ static cpumask_t __read_mostly tracing_buffer_mask; #define for_each_tracing_cpu(cpu) \ for_each_cpu_mask(cpu, tracing_buffer_mask) -/* - * ftrace_dump_on_oops - variable to dump ftrace buffer on oops - * - * If there is an oops (or kernel panic) and the ftrace_dump_on_oops - * is set, then ftrace_dump is called. This will output the contents - * of the ftrace buffers to the console. This is very useful for - * capturing traces that lead to crashes and outputing it to a - * serial console. - * - * It is default off, but you can enable it with either specifying - * "ftrace_dump_on_oops" in the kernel command line, or setting - * /proc/sys/kernel/ftrace_dump_on_oops to true. - */ -int ftrace_dump_on_oops; - -static int tracing_set_tracer(char *buf); - -static int __init set_ftrace(char *str) -{ - tracing_set_tracer(str); - return 1; -} -__setup("ftrace", set_ftrace); - -static int __init set_ftrace_dump_on_oops(char *str) -{ - ftrace_dump_on_oops = 1; - return 1; -} -__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); +static int tracing_disabled = 1; long ns2usecs(cycle_t nsec) @@ -174,19 +112,6 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data); /* tracer_enabled is used to toggle activation of a tracer */ static int tracer_enabled = 1; -/** - * tracing_is_enabled - return tracer_enabled status - * - * This function is used by other tracers to know the status - * of the tracer_enabled flag. Tracers may use this function - * to know if it should enable their features when starting - * up. See irqsoff tracer for an example (start_irqsoff_tracer). - */ -int tracing_is_enabled(void) -{ - return tracer_enabled; -} - /* function tracing enabled */ int ftrace_function_enabled; @@ -228,9 +153,8 @@ static DEFINE_MUTEX(trace_types_lock); /* trace_wait is a waitqueue for tasks blocked on trace_poll */ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); -/* trace_flags holds trace_options default values */ -unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | - TRACE_ITER_ANNOTATE; +/* trace_flags holds iter_ctrl options */ +unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; /** * trace_wake_up - wake up tasks waiting for trace input @@ -269,6 +193,13 @@ unsigned long nsecs_to_usecs(unsigned long nsecs) return nsecs / 1000; } +/* + * TRACE_ITER_SYM_MASK masks the options in trace_flags that + * control the output of kernel symbols. + */ +#define TRACE_ITER_SYM_MASK \ + (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) + /* These must match the bit postions in trace_iterator_flags */ static const char *trace_options[] = { "print-parent", @@ -282,11 +213,6 @@ static const char *trace_options[] = { "stacktrace", "sched-tree", "ftrace_printk", - "ftrace_preempt", - "branch", - "annotate", - "userstacktrace", - "sym-userobj", NULL }; @@ -433,28 +359,6 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) return trace_seq_putmem(s, hex, j); } -static int -trace_seq_path(struct trace_seq *s, struct path *path) -{ - unsigned char *p; - - if (s->len >= (PAGE_SIZE - 1)) - return 0; - p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); - if (!IS_ERR(p)) { - p = mangle_path(s->buffer + s->len, p, "\n"); - if (p) { - s->len = p - s->buffer; - return 1; - } - } else { - s->buffer[s->len++] = '?'; - return 1; - } - - return 0; -} - static void trace_seq_reset(struct trace_seq *s) { @@ -566,17 +470,7 @@ int register_tracer(struct tracer *type) return -1; } - /* - * When this gets called we hold the BKL which means that - * preemption is disabled. Various trace selftests however - * need to disable and enable preemption for successful tests. - * So we drop the BKL here and grab it after the tests again. - */ - unlock_kernel(); mutex_lock(&trace_types_lock); - - tracing_selftest_running = true; - for (t = trace_types; t; t = t->next) { if (strcmp(type->name, t->name) == 0) { /* already found */ @@ -587,20 +481,12 @@ int register_tracer(struct tracer *type) } } - if (!type->set_flag) - type->set_flag = &dummy_set_flag; - if (!type->flags) - type->flags = &dummy_tracer_flags; - else - if (!type->flags->opts) - type->flags->opts = dummy_tracer_opt; - #ifdef CONFIG_FTRACE_STARTUP_TEST if (type->selftest) { struct tracer *saved_tracer = current_trace; struct trace_array *tr = &global_trace; + int saved_ctrl = tr->ctrl; int i; - /* * Run a selftest on this tracer. * Here we reset the trace buffer, and set the current @@ -608,23 +494,25 @@ int register_tracer(struct tracer *type) * internal tracing to verify that everything is in order. * If we fail, we do not register this tracer. */ - for_each_tracing_cpu(i) + for_each_tracing_cpu(i) { tracing_reset(tr, i); - + } current_trace = type; + tr->ctrl = 0; /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); ret = type->selftest(type, tr); /* the test is responsible for resetting too */ current_trace = saved_tracer; + tr->ctrl = saved_ctrl; if (ret) { printk(KERN_CONT "FAILED!\n"); goto out; } /* Only reset on passing, to avoid touching corrupted buffers */ - for_each_tracing_cpu(i) + for_each_tracing_cpu(i) { tracing_reset(tr, i); - + } printk(KERN_CONT "PASSED\n"); } #endif @@ -636,9 +524,7 @@ int register_tracer(struct tracer *type) max_tracer_type_len = len; out: - tracing_selftest_running = false; mutex_unlock(&trace_types_lock); - lock_kernel(); return ret; } @@ -695,91 +581,6 @@ static void trace_init_cmdlines(void) cmdline_idx = 0; } -static int trace_stop_count; -static DEFINE_SPINLOCK(tracing_start_lock); - -/** - * ftrace_off_permanent - disable all ftrace code permanently - * - * This should only be called when a serious anomally has - * been detected. This will turn off the function tracing, - * ring buffers, and other tracing utilites. It takes no - * locks and can be called from any context. - */ -void ftrace_off_permanent(void) -{ - tracing_disabled = 1; - ftrace_stop(); - tracing_off_permanent(); -} - -/** - * tracing_start - quick start of the tracer - * - * If tracing is enabled but was stopped by tracing_stop, - * this will start the tracer back up. - */ -void tracing_start(void) -{ - struct ring_buffer *buffer; - unsigned long flags; - - if (tracing_disabled) - return; - - spin_lock_irqsave(&tracing_start_lock, flags); - if (--trace_stop_count) - goto out; - - if (trace_stop_count < 0) { - /* Someone screwed up their debugging */ - WARN_ON_ONCE(1); - trace_stop_count = 0; - goto out; - } - - - buffer = global_trace.buffer; - if (buffer) - ring_buffer_record_enable(buffer); - - buffer = max_tr.buffer; - if (buffer) - ring_buffer_record_enable(buffer); - - ftrace_start(); - out: - spin_unlock_irqrestore(&tracing_start_lock, flags); -} - -/** - * tracing_stop - quick stop of the tracer - * - * Light weight way to stop tracing. Use in conjunction with - * tracing_start. - */ -void tracing_stop(void) -{ - struct ring_buffer *buffer; - unsigned long flags; - - ftrace_stop(); - spin_lock_irqsave(&tracing_start_lock, flags); - if (trace_stop_count++) - goto out; - - buffer = global_trace.buffer; - if (buffer) - ring_buffer_record_disable(buffer); - - buffer = max_tr.buffer; - if (buffer) - ring_buffer_record_disable(buffer); - - out: - spin_unlock_irqrestore(&tracing_start_lock, flags); -} - void trace_stop_cmdline_recording(void); static void trace_save_cmdline(struct task_struct *tsk) @@ -817,7 +618,7 @@ static void trace_save_cmdline(struct task_struct *tsk) spin_unlock(&trace_cmdline_lock); } -char *trace_find_cmdline(int pid) +static char *trace_find_cmdline(int pid) { char *cmdline = "<...>"; unsigned map; @@ -854,7 +655,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; - entry->tgid = (tsk) ? tsk->tgid : 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | @@ -891,56 +691,6 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -static void __trace_graph_entry(struct trace_array *tr, - struct trace_array_cpu *data, - struct ftrace_graph_ent *trace, - unsigned long flags, - int pc) -{ - struct ring_buffer_event *event; - struct ftrace_graph_ent_entry *entry; - unsigned long irq_flags; - - if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) - return; - - event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), - &irq_flags); - if (!event) - return; - entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_GRAPH_ENT; - entry->graph_ent = *trace; - ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); -} - -static void __trace_graph_return(struct trace_array *tr, - struct trace_array_cpu *data, - struct ftrace_graph_ret *trace, - unsigned long flags, - int pc) -{ - struct ring_buffer_event *event; - struct ftrace_graph_ret_entry *entry; - unsigned long irq_flags; - - if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) - return; - - event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), - &irq_flags); - if (!event) - return; - entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_GRAPH_RET; - entry->ret = *trace; - ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); -} -#endif - void ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags, @@ -992,46 +742,6 @@ void __trace_stack(struct trace_array *tr, ftrace_trace_stack(tr, data, flags, skip, preempt_count()); } -static void ftrace_trace_userstack(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long flags, int pc) -{ -#ifdef CONFIG_STACKTRACE - struct ring_buffer_event *event; - struct userstack_entry *entry; - struct stack_trace trace; - unsigned long irq_flags; - - if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) - return; - - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); - if (!event) - return; - entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_USER_STACK; - - memset(&entry->caller, 0, sizeof(entry->caller)); - - trace.nr_entries = 0; - trace.max_entries = FTRACE_STACK_ENTRIES; - trace.skip = 0; - trace.entries = entry->caller; - - save_stack_trace_user(&trace); - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); -#endif -} - -void __trace_userstack(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long flags) -{ - ftrace_trace_userstack(tr, data, flags, preempt_count()); -} - static void ftrace_trace_special(void *__tr, void *__data, unsigned long arg1, unsigned long arg2, unsigned long arg3, @@ -1055,7 +765,6 @@ ftrace_trace_special(void *__tr, void *__data, entry->arg3 = arg3; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, irq_flags, 4, pc); - ftrace_trace_userstack(tr, data, irq_flags, pc); trace_wake_up(); } @@ -1094,7 +803,6 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_cpu = task_cpu(next); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, flags, 5, pc); - ftrace_trace_userstack(tr, data, flags, pc); } void @@ -1124,7 +832,6 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_cpu = task_cpu(wakee); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, flags, 6, pc); - ftrace_trace_userstack(tr, data, flags, pc); trace_wake_up(); } @@ -1134,28 +841,26 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; - unsigned long flags; int cpu; int pc; - if (tracing_disabled) + if (tracing_disabled || !tr->ctrl) return; pc = preempt_count(); - local_irq_save(flags); + preempt_disable_notrace(); cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (likely(atomic_inc_return(&data->disabled) == 1)) + if (likely(!atomic_read(&data->disabled))) ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); - atomic_dec(&data->disabled); - local_irq_restore(flags); + preempt_enable_notrace(); } #ifdef CONFIG_FUNCTION_TRACER static void -function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) +function_trace_call(unsigned long ip, unsigned long parent_ip) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1168,7 +873,8 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) return; pc = preempt_count(); - resched = ftrace_preempt_disable(); + resched = need_resched(); + preempt_disable_notrace(); local_save_flags(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -1178,96 +884,11 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) trace_function(tr, data, ip, parent_ip, flags, pc); atomic_dec(&data->disabled); - ftrace_preempt_enable(resched); -} - -static void -function_trace_call(unsigned long ip, unsigned long parent_ip) -{ - struct trace_array *tr = &global_trace; - struct trace_array_cpu *data; - unsigned long flags; - long disabled; - int cpu; - int pc; - - if (unlikely(!ftrace_function_enabled)) - return; - - /* - * Need to use raw, since this must be called before the - * recursive protection is performed. - */ - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - disabled = atomic_inc_return(&data->disabled); - - if (likely(disabled == 1)) { - pc = preempt_count(); - trace_function(tr, data, ip, parent_ip, flags, pc); - } - - atomic_dec(&data->disabled); - local_irq_restore(flags); -} - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -int trace_graph_entry(struct ftrace_graph_ent *trace) -{ - struct trace_array *tr = &global_trace; - struct trace_array_cpu *data; - unsigned long flags; - long disabled; - int cpu; - int pc; - - if (!ftrace_trace_task(current)) - return 0; - - if (!ftrace_graph_addr(trace->func)) - return 0; - - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - disabled = atomic_inc_return(&data->disabled); - if (likely(disabled == 1)) { - pc = preempt_count(); - __trace_graph_entry(tr, data, trace, flags, pc); - } - /* Only do the atomic if it is not already set */ - if (!test_tsk_trace_graph(current)) - set_tsk_trace_graph(current); - atomic_dec(&data->disabled); - local_irq_restore(flags); - - return 1; -} - -void trace_graph_return(struct ftrace_graph_ret *trace) -{ - struct trace_array *tr = &global_trace; - struct trace_array_cpu *data; - unsigned long flags; - long disabled; - int cpu; - int pc; - - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - disabled = atomic_inc_return(&data->disabled); - if (likely(disabled == 1)) { - pc = preempt_count(); - __trace_graph_return(tr, data, trace, flags, pc); - } - if (!trace->depth) - clear_tsk_trace_graph(current); - atomic_dec(&data->disabled); - local_irq_restore(flags); + if (resched) + preempt_enable_no_resched_notrace(); + else + preempt_enable_notrace(); } -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ static struct ftrace_ops trace_ops __read_mostly = { @@ -1277,14 +898,9 @@ static struct ftrace_ops trace_ops __read_mostly = void tracing_start_function_trace(void) { ftrace_function_enabled = 0; - - if (trace_flags & TRACE_ITER_PREEMPTONLY) - trace_ops.func = function_trace_call_preempt_only; - else - trace_ops.func = function_trace_call; - register_ftrace_function(&trace_ops); - ftrace_function_enabled = 1; + if (tracer_enabled) + ftrace_function_enabled = 1; } void tracing_stop_function_trace(void) @@ -1296,7 +912,6 @@ void tracing_stop_function_trace(void) enum trace_file_type { TRACE_FILE_LAT_FMT = 1, - TRACE_FILE_ANNOTATE = 2, }; static void trace_iterator_increment(struct trace_iterator *iter, int cpu) @@ -1432,6 +1047,10 @@ static void *s_start(struct seq_file *m, loff_t *pos) atomic_inc(&trace_record_cmdline_disabled); + /* let the tracer grab locks here if needed */ + if (current_trace->start) + current_trace->start(iter); + if (*pos != iter->pos) { iter->ent = NULL; iter->cpu = 0; @@ -1458,7 +1077,14 @@ static void *s_start(struct seq_file *m, loff_t *pos) static void s_stop(struct seq_file *m, void *p) { + struct trace_iterator *iter = m->private; + atomic_dec(&trace_record_cmdline_disabled); + + /* let the tracer release locks here if needed */ + if (current_trace && current_trace == iter->trace && iter->trace->stop) + iter->trace->stop(iter); + mutex_unlock(&trace_types_lock); } @@ -1517,7 +1143,7 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, # define IP_FMT "%016lx" #endif -int +static int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) { int ret; @@ -1538,78 +1164,6 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) return ret; } -static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, - unsigned long ip, unsigned long sym_flags) -{ - struct file *file = NULL; - unsigned long vmstart = 0; - int ret = 1; - - if (mm) { - const struct vm_area_struct *vma; - - down_read(&mm->mmap_sem); - vma = find_vma(mm, ip); - if (vma) { - file = vma->vm_file; - vmstart = vma->vm_start; - } - if (file) { - ret = trace_seq_path(s, &file->f_path); - if (ret) - ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart); - } - up_read(&mm->mmap_sem); - } - if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) - ret = trace_seq_printf(s, " <" IP_FMT ">", ip); - return ret; -} - -static int -seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, - unsigned long sym_flags) -{ - struct mm_struct *mm = NULL; - int ret = 1; - unsigned int i; - - if (trace_flags & TRACE_ITER_SYM_USEROBJ) { - struct task_struct *task; - /* - * we do the lookup on the thread group leader, - * since individual threads might have already quit! - */ - rcu_read_lock(); - task = find_task_by_vpid(entry->ent.tgid); - if (task) - mm = get_task_mm(task); - rcu_read_unlock(); - } - - for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { - unsigned long ip = entry->caller[i]; - - if (ip == ULONG_MAX || !ret) - break; - if (i && ret) - ret = trace_seq_puts(s, " <- "); - if (!ip) { - if (ret) - ret = trace_seq_puts(s, "??"); - continue; - } - if (!ret) - break; - if (ret) - ret = seq_print_user_ip(s, mm, ip, sym_flags); - } - - if (mm) - mmput(mm); - return ret; -} - static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n"); @@ -1784,23 +1338,6 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) trace_seq_putc(s, '\n'); } -static void test_cpu_buff_start(struct trace_iterator *iter) -{ - struct trace_seq *s = &iter->seq; - - if (!(trace_flags & TRACE_ITER_ANNOTATE)) - return; - - if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) - return; - - if (cpu_isset(iter->cpu, iter->started)) - return; - - cpu_set(iter->cpu, iter->started); - trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); -} - static enum print_line_t print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) { @@ -1820,8 +1357,6 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) if (entry->type == TRACE_CONT) return TRACE_TYPE_HANDLED; - test_cpu_buff_start(iter); - next_entry = find_next_entry(iter, NULL, &next_ts); if (!next_entry) next_ts = iter->ts; @@ -1913,27 +1448,6 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_seq_print_cont(s, iter); break; } - case TRACE_BRANCH: { - struct trace_branch *field; - - trace_assign_type(field, entry); - - trace_seq_printf(s, "[%s] %s:%s:%d\n", - field->correct ? " ok " : " MISS ", - field->func, - field->file, - field->line); - break; - } - case TRACE_USER_STACK: { - struct userstack_entry *field; - - trace_assign_type(field, entry); - - seq_print_userip_objs(field, s, sym_flags); - trace_seq_putc(s, '\n'); - break; - } default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1958,8 +1472,6 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) if (entry->type == TRACE_CONT) return TRACE_TYPE_HANDLED; - test_cpu_buff_start(iter); - comm = trace_find_cmdline(iter->ent->pid); t = ns2usecs(iter->ts); @@ -2069,37 +1581,6 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) trace_seq_print_cont(s, iter); break; } - case TRACE_GRAPH_RET: { - return print_graph_function(iter); - } - case TRACE_GRAPH_ENT: { - return print_graph_function(iter); - } - case TRACE_BRANCH: { - struct trace_branch *field; - - trace_assign_type(field, entry); - - trace_seq_printf(s, "[%s] %s:%s:%d\n", - field->correct ? " ok " : " MISS ", - field->func, - field->file, - field->line); - break; - } - case TRACE_USER_STACK: { - struct userstack_entry *field; - - trace_assign_type(field, entry); - - ret = seq_print_userip_objs(field, s, sym_flags); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_putc(s, '\n'); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - break; - } } return TRACE_TYPE_HANDLED; } @@ -2159,7 +1640,6 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: - case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; @@ -2248,7 +1728,6 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: - case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; @@ -2303,7 +1782,6 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: - case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; @@ -2369,9 +1847,7 @@ static int s_show(struct seq_file *m, void *v) seq_printf(m, "# tracer: %s\n", iter->trace->name); seq_puts(m, "#\n"); } - if (iter->trace && iter->trace->print_header) - iter->trace->print_header(m); - else if (iter->iter_flags & TRACE_FILE_LAT_FMT) { + if (iter->iter_flags & TRACE_FILE_LAT_FMT) { /* print nothing if the buffers are empty */ if (trace_empty(iter)) return 0; @@ -2423,15 +1899,6 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) iter->trace = current_trace; iter->pos = -1; - /* Notify the tracer early; before we stop tracing. */ - if (iter->trace && iter->trace->open) - iter->trace->open(iter); - - /* Annotate start of buffers if we had overruns */ - if (ring_buffer_overruns(iter->tr->buffer)) - iter->iter_flags |= TRACE_FILE_ANNOTATE; - - for_each_tracing_cpu(cpu) { iter->buffer_iter[cpu] = @@ -2450,7 +1917,13 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) m->private = iter; /* stop the trace while dumping */ - tracing_stop(); + if (iter->tr->ctrl) { + tracer_enabled = 0; + ftrace_function_enabled = 0; + } + + if (iter->trace && iter->trace->open) + iter->trace->open(iter); mutex_unlock(&trace_types_lock); @@ -2493,7 +1966,14 @@ int tracing_release(struct inode *inode, struct file *file) iter->trace->close(iter); /* reenable tracing if it was previously enabled */ - tracing_start(); + if (iter->tr->ctrl) { + tracer_enabled = 1; + /* + * It is safe to enable function tracing even if it + * isn't used + */ + ftrace_function_enabled = 1; + } mutex_unlock(&trace_types_lock); seq_release(inode, file); @@ -2671,7 +2151,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, if (err) goto err_unlock; - local_irq_disable(); + raw_local_irq_disable(); __raw_spin_lock(&ftrace_max_lock); for_each_tracing_cpu(cpu) { /* @@ -2688,7 +2168,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, } } __raw_spin_unlock(&ftrace_max_lock); - local_irq_enable(); + raw_local_irq_enable(); tracing_cpumask = tracing_cpumask_new; @@ -2709,16 +2189,13 @@ static struct file_operations tracing_cpumask_fops = { }; static ssize_t -tracing_trace_options_read(struct file *filp, char __user *ubuf, +tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - int i; char *buf; int r = 0; int len = 0; - u32 tracer_flags = current_trace->flags->val; - struct tracer_opt *trace_opts = current_trace->flags->opts; - + int i; /* calulate max size */ for (i = 0; trace_options[i]; i++) { @@ -2726,15 +2203,6 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, len += 3; /* "no" and space */ } - /* - * Increase the size with names of options specific - * of the current tracer. - */ - for (i = 0; trace_opts[i].name; i++) { - len += strlen(trace_opts[i].name); - len += 3; /* "no" and space */ - } - /* +2 for \n and \0 */ buf = kmalloc(len + 2, GFP_KERNEL); if (!buf) @@ -2747,15 +2215,6 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, r += sprintf(buf + r, "no%s ", trace_options[i]); } - for (i = 0; trace_opts[i].name; i++) { - if (tracer_flags & trace_opts[i].bit) - r += sprintf(buf + r, "%s ", - trace_opts[i].name); - else - r += sprintf(buf + r, "no%s ", - trace_opts[i].name); - } - r += sprintf(buf + r, "\n"); WARN_ON(r >= len + 2); @@ -2766,48 +2225,13 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, return r; } -/* Try to assign a tracer specific option */ -static int set_tracer_option(struct tracer *trace, char *cmp, int neg) -{ - struct tracer_flags *trace_flags = trace->flags; - struct tracer_opt *opts = NULL; - int ret = 0, i = 0; - int len; - - for (i = 0; trace_flags->opts[i].name; i++) { - opts = &trace_flags->opts[i]; - len = strlen(opts->name); - - if (strncmp(cmp, opts->name, len) == 0) { - ret = trace->set_flag(trace_flags->val, - opts->bit, !neg); - break; - } - } - /* Not found */ - if (!trace_flags->opts[i].name) - return -EINVAL; - - /* Refused to handle */ - if (ret) - return ret; - - if (neg) - trace_flags->val &= ~opts->bit; - else - trace_flags->val |= opts->bit; - - return 0; -} - static ssize_t -tracing_trace_options_write(struct file *filp, const char __user *ubuf, +tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { char buf[64]; char *cmp = buf; int neg = 0; - int ret; int i; if (cnt >= sizeof(buf)) @@ -2834,13 +2258,11 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, break; } } - - /* If no option could be set, test the specific tracer options */ - if (!trace_options[i]) { - ret = set_tracer_option(current_trace, cmp, neg); - if (ret) - return ret; - } + /* + * If no option could be set, return an error: + */ + if (!trace_options[i]) + return -EINVAL; filp->f_pos += cnt; @@ -2849,8 +2271,8 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, static struct file_operations tracing_iter_fops = { .open = tracing_open_generic, - .read = tracing_trace_options_read, - .write = tracing_trace_options_write, + .read = tracing_iter_ctrl_read, + .write = tracing_iter_ctrl_write, }; static const char readme_msg[] = @@ -2864,9 +2286,9 @@ static const char readme_msg[] = "# echo sched_switch > /debug/tracing/current_tracer\n" "# cat /debug/tracing/current_tracer\n" "sched_switch\n" - "# cat /debug/tracing/trace_options\n" + "# cat /debug/tracing/iter_ctrl\n" "noprint-parent nosym-offset nosym-addr noverbose\n" - "# echo print-parent > /debug/tracing/trace_options\n" + "# echo print-parent > /debug/tracing/iter_ctrl\n" "# echo 1 > /debug/tracing/tracing_enabled\n" "# cat /debug/tracing/trace > /tmp/trace.txt\n" "echo 0 > /debug/tracing/tracing_enabled\n" @@ -2889,10 +2311,11 @@ static ssize_t tracing_ctrl_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { + struct trace_array *tr = filp->private_data; char buf[64]; int r; - r = sprintf(buf, "%u\n", tracer_enabled); + r = sprintf(buf, "%ld\n", tr->ctrl); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -2920,18 +2343,16 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, val = !!val; mutex_lock(&trace_types_lock); - if (tracer_enabled ^ val) { - if (val) { + if (tr->ctrl ^ val) { + if (val) tracer_enabled = 1; - if (current_trace->start) - current_trace->start(tr); - tracing_start(); - } else { + else tracer_enabled = 0; - tracing_stop(); - if (current_trace->stop) - current_trace->stop(tr); - } + + tr->ctrl = val; + + if (current_trace && current_trace->ctrl_update) + current_trace->ctrl_update(tr); } mutex_unlock(&trace_types_lock); @@ -2957,11 +2378,29 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static int tracing_set_tracer(char *buf) +static ssize_t +tracing_set_trace_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) { struct trace_array *tr = &global_trace; struct tracer *t; - int ret = 0; + char buf[max_tracer_type_len+1]; + int i; + size_t ret; + + ret = cnt; + + if (cnt > max_tracer_type_len) + cnt = max_tracer_type_len; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + /* strip ending whitespace. */ + for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) + buf[i] = 0; mutex_lock(&trace_types_lock); for (t = trace_types; t; t = t->next) { @@ -2975,52 +2414,18 @@ static int tracing_set_tracer(char *buf) if (t == current_trace) goto out; - trace_branch_disable(); if (current_trace && current_trace->reset) current_trace->reset(tr); current_trace = t; - if (t->init) { - ret = t->init(tr); - if (ret) - goto out; - } + if (t->init) + t->init(tr); - trace_branch_enable(tr); out: mutex_unlock(&trace_types_lock); - return ret; -} - -static ssize_t -tracing_set_trace_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - char buf[max_tracer_type_len+1]; - int i; - size_t ret; - int err; - - ret = cnt; - - if (cnt > max_tracer_type_len) - cnt = max_tracer_type_len; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - - /* strip ending whitespace. */ - for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) - buf[i] = 0; - - err = tracing_set_tracer(buf); - if (err) - return err; - - filp->f_pos += ret; + if (ret > 0) + filp->f_pos += ret; return ret; } @@ -3087,10 +2492,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) return -ENOMEM; mutex_lock(&trace_types_lock); - - /* trace pipe does not show start of buffer */ - cpus_setall(iter->started); - iter->tr = &global_trace; iter->trace = current_trace; filp->private_data = iter; @@ -3266,7 +2667,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf, char buf[64]; int r; - r = sprintf(buf, "%lu\n", tr->entries >> 10); + r = sprintf(buf, "%lu\n", tr->entries); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -3277,6 +2678,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, unsigned long val; char buf[64]; int ret, cpu; + struct trace_array *tr = filp->private_data; if (cnt >= sizeof(buf)) return -EINVAL; @@ -3296,7 +2698,12 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, mutex_lock(&trace_types_lock); - tracing_stop(); + if (tr->ctrl) { + cnt = -EBUSY; + pr_info("ftrace: please disable tracing" + " before modifying buffer size\n"); + goto out; + } /* disable all cpu buffers */ for_each_tracing_cpu(cpu) { @@ -3306,9 +2713,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, atomic_inc(&max_tr.data[cpu]->disabled); } - /* value is in KB */ - val <<= 10; - if (val != global_trace.entries) { ret = ring_buffer_resize(global_trace.buffer, val); if (ret < 0) { @@ -3347,7 +2751,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, atomic_dec(&max_tr.data[cpu]->disabled); } - tracing_start(); max_tr.entries = global_trace.entries; mutex_unlock(&trace_types_lock); @@ -3359,7 +2762,7 @@ static int mark_printk(const char *fmt, ...) int ret; va_list args; va_start(args, fmt); - ret = trace_vprintk(0, -1, fmt, args); + ret = trace_vprintk(0, fmt, args); va_end(args); return ret; } @@ -3370,8 +2773,9 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, { char *buf; char *end; + struct trace_array *tr = &global_trace; - if (tracing_disabled) + if (!tr->ctrl || tracing_disabled) return -EINVAL; if (cnt > TRACE_BUF_SIZE) @@ -3437,38 +2841,22 @@ static struct file_operations tracing_mark_fops = { #ifdef CONFIG_DYNAMIC_FTRACE -int __weak ftrace_arch_read_dyn_info(char *buf, int size) -{ - return 0; -} - static ssize_t -tracing_read_dyn_info(struct file *filp, char __user *ubuf, +tracing_read_long(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - static char ftrace_dyn_info_buffer[1024]; - static DEFINE_MUTEX(dyn_info_mutex); unsigned long *p = filp->private_data; - char *buf = ftrace_dyn_info_buffer; - int size = ARRAY_SIZE(ftrace_dyn_info_buffer); + char buf[64]; int r; - mutex_lock(&dyn_info_mutex); - r = sprintf(buf, "%ld ", *p); - - r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r); - buf[r++] = '\n'; + r = sprintf(buf, "%ld\n", *p); - r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); - - mutex_unlock(&dyn_info_mutex); - - return r; + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static struct file_operations tracing_dyn_info_fops = { +static struct file_operations tracing_read_long_fops = { .open = tracing_open_generic, - .read = tracing_read_dyn_info, + .read = tracing_read_long, }; #endif @@ -3509,10 +2897,10 @@ static __init int tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); - entry = debugfs_create_file("trace_options", 0644, d_tracer, + entry = debugfs_create_file("iter_ctrl", 0644, d_tracer, NULL, &tracing_iter_fops); if (!entry) - pr_warning("Could not create debugfs 'trace_options' entry\n"); + pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, NULL, &tracing_cpumask_fops); @@ -3562,11 +2950,11 @@ static __init int tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'trace_pipe' entry\n"); - entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer, + entry = debugfs_create_file("trace_entries", 0644, d_tracer, &global_trace, &tracing_entries_fops); if (!entry) pr_warning("Could not create debugfs " - "'buffer_size_kb' entry\n"); + "'trace_entries' entry\n"); entry = debugfs_create_file("trace_marker", 0220, d_tracer, NULL, &tracing_mark_fops); @@ -3577,7 +2965,7 @@ static __init int tracer_init_debugfs(void) #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, - &tracing_dyn_info_fops); + &tracing_read_long_fops); if (!entry) pr_warning("Could not create debugfs " "'dyn_ftrace_total_info' entry\n"); @@ -3588,7 +2976,7 @@ static __init int tracer_init_debugfs(void) return 0; } -int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) +int trace_vprintk(unsigned long ip, const char *fmt, va_list args) { static DEFINE_SPINLOCK(trace_buf_lock); static char trace_buf[TRACE_BUF_SIZE]; @@ -3596,11 +2984,11 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; - int cpu, len = 0, size, pc; struct print_entry *entry; - unsigned long irq_flags; + unsigned long flags, irq_flags; + int cpu, len = 0, size, pc; - if (tracing_disabled || tracing_selftest_running) + if (!tr->ctrl || tracing_disabled) return 0; pc = preempt_count(); @@ -3611,8 +2999,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) if (unlikely(atomic_read(&data->disabled))) goto out; - pause_graph_tracing(); - spin_lock_irqsave(&trace_buf_lock, irq_flags); + spin_lock_irqsave(&trace_buf_lock, flags); len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); len = min(len, TRACE_BUF_SIZE-1); @@ -3623,18 +3010,17 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) if (!event) goto out_unlock; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, irq_flags, pc); + tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_PRINT; entry->ip = ip; - entry->depth = depth; memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); out_unlock: - spin_unlock_irqrestore(&trace_buf_lock, irq_flags); - unpause_graph_tracing(); + spin_unlock_irqrestore(&trace_buf_lock, flags); + out: preempt_enable_notrace(); @@ -3651,7 +3037,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) return 0; va_start(ap, fmt); - ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); + ret = trace_vprintk(ip, fmt, ap); va_end(ap); return ret; } @@ -3660,8 +3046,7 @@ EXPORT_SYMBOL_GPL(__ftrace_printk); static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { - if (ftrace_dump_on_oops) - ftrace_dump(); + ftrace_dump(); return NOTIFY_OK; } @@ -3677,8 +3062,7 @@ static int trace_die_handler(struct notifier_block *self, { switch (val) { case DIE_OOPS: - if (ftrace_dump_on_oops) - ftrace_dump(); + ftrace_dump(); break; default: break; @@ -3719,6 +3103,7 @@ trace_printk_seq(struct trace_seq *s) trace_seq_reset(s); } + void ftrace_dump(void) { static DEFINE_SPINLOCK(ftrace_dump_lock); @@ -3743,9 +3128,6 @@ void ftrace_dump(void) atomic_inc(&global_trace.data[cpu]->disabled); } - /* don't look at user memory in panic mode */ - trace_flags &= ~TRACE_ITER_SYM_USEROBJ; - printk(KERN_TRACE "Dumping ftrace buffer:\n"); iter.tr = &global_trace; @@ -3839,6 +3221,7 @@ __init static int tracer_alloc_buffers(void) #endif /* All seems OK, enable tracing */ + global_trace.ctrl = tracer_enabled; tracing_disabled = 0; atomic_notifier_chain_register(&panic_notifier_list, diff --git a/trunk/kernel/trace/trace.h b/trunk/kernel/trace/trace.h index 5ac697065a48..8465ad052707 100644 --- a/trunk/kernel/trace/trace.h +++ b/trunk/kernel/trace/trace.h @@ -8,7 +8,6 @@ #include #include #include -#include enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -22,14 +21,7 @@ enum trace_type { TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, - TRACE_BRANCH, - TRACE_BOOT_CALL, - TRACE_BOOT_RET, - TRACE_GRAPH_RET, - TRACE_GRAPH_ENT, - TRACE_USER_STACK, - TRACE_BTS, - TRACE_POWER, + TRACE_BOOT, __TRACE_LAST_TYPE }; @@ -46,7 +38,6 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; - int tgid; }; /* @@ -57,18 +48,6 @@ struct ftrace_entry { unsigned long ip; unsigned long parent_ip; }; - -/* Function call entry */ -struct ftrace_graph_ent_entry { - struct trace_entry ent; - struct ftrace_graph_ent graph_ent; -}; - -/* Function return entry */ -struct ftrace_graph_ret_entry { - struct trace_entry ent; - struct ftrace_graph_ret ret; -}; extern struct tracer boot_tracer; /* @@ -106,18 +85,12 @@ struct stack_entry { unsigned long caller[FTRACE_STACK_ENTRIES]; }; -struct userstack_entry { - struct trace_entry ent; - unsigned long caller[FTRACE_STACK_ENTRIES]; -}; - /* * ftrace_printk entry: */ struct print_entry { struct trace_entry ent; unsigned long ip; - int depth; char buf[]; }; @@ -139,35 +112,9 @@ struct trace_mmiotrace_map { struct mmiotrace_map map; }; -struct trace_boot_call { +struct trace_boot { struct trace_entry ent; - struct boot_trace_call boot_call; -}; - -struct trace_boot_ret { - struct trace_entry ent; - struct boot_trace_ret boot_ret; -}; - -#define TRACE_FUNC_SIZE 30 -#define TRACE_FILE_SIZE 20 -struct trace_branch { - struct trace_entry ent; - unsigned line; - char func[TRACE_FUNC_SIZE+1]; - char file[TRACE_FILE_SIZE+1]; - char correct; -}; - -struct bts_entry { - struct trace_entry ent; - unsigned long from; - unsigned long to; -}; - -struct trace_power { - struct trace_entry ent; - struct power_trace state_data; + struct boot_trace initcall; }; /* @@ -225,6 +172,7 @@ struct trace_iterator; struct trace_array { struct ring_buffer *buffer; unsigned long entries; + long ctrl; int cpu; cycle_t time_start; struct task_struct *waiter; @@ -264,22 +212,13 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \ IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ - IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ TRACE_MMIO_MAP); \ - IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ - IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ - IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ - IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ - TRACE_GRAPH_ENT); \ - IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ - TRACE_GRAPH_RET); \ - IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\ - IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ + IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \ __ftrace_bad_type(); \ } while (0) @@ -290,56 +229,29 @@ enum print_line_t { TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ }; - -/* - * An option specific to a tracer. This is a boolean value. - * The bit is the bit index that sets its value on the - * flags value in struct tracer_flags. - */ -struct tracer_opt { - const char *name; /* Will appear on the trace_options file */ - u32 bit; /* Mask assigned in val field in tracer_flags */ -}; - -/* - * The set of specific options for a tracer. Your tracer - * have to set the initial value of the flags val. - */ -struct tracer_flags { - u32 val; - struct tracer_opt *opts; -}; - -/* Makes more easy to define a tracer opt */ -#define TRACER_OPT(s, b) .name = #s, .bit = b - /* * A specific tracer, represented by methods that operate on a trace array: */ struct tracer { const char *name; - /* Your tracer should raise a warning if init fails */ - int (*init)(struct trace_array *tr); + void (*init)(struct trace_array *tr); void (*reset)(struct trace_array *tr); - void (*start)(struct trace_array *tr); - void (*stop)(struct trace_array *tr); void (*open)(struct trace_iterator *iter); void (*pipe_open)(struct trace_iterator *iter); void (*close)(struct trace_iterator *iter); + void (*start)(struct trace_iterator *iter); + void (*stop)(struct trace_iterator *iter); ssize_t (*read)(struct trace_iterator *iter, struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos); + void (*ctrl_update)(struct trace_array *tr); #ifdef CONFIG_FTRACE_STARTUP_TEST int (*selftest)(struct tracer *trace, struct trace_array *tr); #endif - void (*print_header)(struct seq_file *m); enum print_line_t (*print_line)(struct trace_iterator *iter); - /* If you handled the flag setting, return 0 */ - int (*set_flag)(u32 old_flags, u32 bit, int set); struct tracer *next; int print_max; - struct tracer_flags *flags; }; struct trace_seq { @@ -367,11 +279,8 @@ struct trace_iterator { unsigned long iter_flags; loff_t pos; long idx; - - cpumask_t started; }; -int tracing_is_enabled(void); void trace_wake_up(void); void tracing_reset(struct trace_array *tr, int cpu); int tracing_open_generic(struct inode *inode, struct file *filp); @@ -412,17 +321,8 @@ void trace_function(struct trace_array *tr, unsigned long parent_ip, unsigned long flags, int pc); -void trace_graph_return(struct ftrace_graph_ret *trace); -int trace_graph_entry(struct ftrace_graph_ent *trace); -void trace_bts(struct trace_array *tr, - unsigned long from, - unsigned long to); - void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); -void tracing_sched_switch_assign_trace(struct trace_array *tr); -void tracing_stop_sched_switch_record(void); -void tracing_start_sched_switch_record(void); int register_tracer(struct tracer *type); void unregister_tracer(struct tracer *type); @@ -458,7 +358,6 @@ struct tracer_switch_ops { struct tracer_switch_ops *next; }; -char *trace_find_cmdline(int pid); #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE @@ -484,79 +383,19 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr); -extern int trace_selftest_startup_branch(struct tracer *trace, - struct trace_array *tr); #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); extern void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter); - -extern int -seq_print_ip_sym(struct trace_seq *s, unsigned long ip, - unsigned long sym_flags); extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt); extern long ns2usecs(cycle_t nsec); -extern int -trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); +extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern unsigned long trace_flags; -/* Standard output formatting function used for function return traces */ -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -extern enum print_line_t print_graph_function(struct trace_iterator *iter); - -#ifdef CONFIG_DYNAMIC_FTRACE -/* TODO: make this variable */ -#define FTRACE_GRAPH_MAX_FUNCS 32 -extern int ftrace_graph_count; -extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; - -static inline int ftrace_graph_addr(unsigned long addr) -{ - int i; - - if (!ftrace_graph_count || test_tsk_trace_graph(current)) - return 1; - - for (i = 0; i < ftrace_graph_count; i++) { - if (addr == ftrace_graph_funcs[i]) - return 1; - } - - return 0; -} -#else -static inline int ftrace_trace_addr(unsigned long addr) -{ - return 1; -} -static inline int ftrace_graph_addr(unsigned long addr) -{ - return 1; -} -#endif /* CONFIG_DYNAMIC_FTRACE */ - -#else /* CONFIG_FUNCTION_GRAPH_TRACER */ -static inline enum print_line_t -print_graph_function(struct trace_iterator *iter) -{ - return TRACE_TYPE_UNHANDLED; -} -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -extern struct pid *ftrace_pid_trace; - -static inline int ftrace_trace_task(struct task_struct *task) -{ - if (!ftrace_pid_trace) - return 1; - - return test_tsk_trace_trace(task); -} - /* * trace_iterator_flags is an enumeration that defines bit * positions into trace_flags that controls the output. @@ -576,92 +415,8 @@ enum trace_iterator_flags { TRACE_ITER_STACKTRACE = 0x100, TRACE_ITER_SCHED_TREE = 0x200, TRACE_ITER_PRINTK = 0x400, - TRACE_ITER_PREEMPTONLY = 0x800, - TRACE_ITER_BRANCH = 0x1000, - TRACE_ITER_ANNOTATE = 0x2000, - TRACE_ITER_USERSTACKTRACE = 0x4000, - TRACE_ITER_SYM_USEROBJ = 0x8000 }; -/* - * TRACE_ITER_SYM_MASK masks the options in trace_flags that - * control the output of kernel symbols. - */ -#define TRACE_ITER_SYM_MASK \ - (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) - extern struct tracer nop_trace; -/** - * ftrace_preempt_disable - disable preemption scheduler safe - * - * When tracing can happen inside the scheduler, there exists - * cases that the tracing might happen before the need_resched - * flag is checked. If this happens and the tracer calls - * preempt_enable (after a disable), a schedule might take place - * causing an infinite recursion. - * - * To prevent this, we read the need_recshed flag before - * disabling preemption. When we want to enable preemption we - * check the flag, if it is set, then we call preempt_enable_no_resched. - * Otherwise, we call preempt_enable. - * - * The rational for doing the above is that if need resched is set - * and we have yet to reschedule, we are either in an atomic location - * (where we do not need to check for scheduling) or we are inside - * the scheduler and do not want to resched. - */ -static inline int ftrace_preempt_disable(void) -{ - int resched; - - resched = need_resched(); - preempt_disable_notrace(); - - return resched; -} - -/** - * ftrace_preempt_enable - enable preemption scheduler safe - * @resched: the return value from ftrace_preempt_disable - * - * This is a scheduler safe way to enable preemption and not miss - * any preemption checks. The disabled saved the state of preemption. - * If resched is set, then we were either inside an atomic or - * are inside the scheduler (we would have already scheduled - * otherwise). In this case, we do not want to call normal - * preempt_enable, but preempt_enable_no_resched instead. - */ -static inline void ftrace_preempt_enable(int resched) -{ - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); -} - -#ifdef CONFIG_BRANCH_TRACER -extern int enable_branch_tracing(struct trace_array *tr); -extern void disable_branch_tracing(void); -static inline int trace_branch_enable(struct trace_array *tr) -{ - if (trace_flags & TRACE_ITER_BRANCH) - return enable_branch_tracing(tr); - return 0; -} -static inline void trace_branch_disable(void) -{ - /* due to races, always disable */ - disable_branch_tracing(); -} -#else -static inline int trace_branch_enable(struct trace_array *tr) -{ - return 0; -} -static inline void trace_branch_disable(void) -{ -} -#endif /* CONFIG_BRANCH_TRACER */ - #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/trunk/kernel/trace/trace_boot.c b/trunk/kernel/trace/trace_boot.c index a4fa2c57e34e..d0a5e50eeff2 100644 --- a/trunk/kernel/trace/trace_boot.c +++ b/trunk/kernel/trace/trace_boot.c @@ -13,117 +13,73 @@ #include "trace.h" static struct trace_array *boot_trace; -static bool pre_initcalls_finished; +static int trace_boot_enabled; -/* Tells the boot tracer that the pre_smp_initcalls are finished. - * So we are ready . - * It doesn't enable sched events tracing however. - * You have to call enable_boot_trace to do so. - */ -void start_boot_trace(void) -{ - pre_initcalls_finished = true; -} -void enable_boot_trace(void) +/* Should be started after do_pre_smp_initcalls() in init/main.c */ +void start_boot_trace(void) { - if (pre_initcalls_finished) - tracing_start_sched_switch_record(); + trace_boot_enabled = 1; } -void disable_boot_trace(void) +void stop_boot_trace(void) { - if (pre_initcalls_finished) - tracing_stop_sched_switch_record(); + trace_boot_enabled = 0; } -static void reset_boot_trace(struct trace_array *tr) +void reset_boot_trace(struct trace_array *tr) { - int cpu; - - tr->time_start = ftrace_now(tr->cpu); - - for_each_online_cpu(cpu) - tracing_reset(tr, cpu); + stop_boot_trace(); } -static int boot_trace_init(struct trace_array *tr) +static void boot_trace_init(struct trace_array *tr) { int cpu; boot_trace = tr; + trace_boot_enabled = 0; + for_each_cpu_mask(cpu, cpu_possible_map) tracing_reset(tr, cpu); - - tracing_sched_switch_assign_trace(tr); - return 0; } -static enum print_line_t -initcall_call_print_line(struct trace_iterator *iter) +static void boot_trace_ctrl_update(struct trace_array *tr) { - struct trace_entry *entry = iter->ent; - struct trace_seq *s = &iter->seq; - struct trace_boot_call *field; - struct boot_trace_call *call; - u64 ts; - unsigned long nsec_rem; - int ret; - - trace_assign_type(field, entry); - call = &field->boot_call; - ts = iter->ts; - nsec_rem = do_div(ts, 1000000000); - - ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", - (unsigned long)ts, nsec_rem, call->func, call->caller); - - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + if (tr->ctrl) + start_boot_trace(); else - return TRACE_TYPE_HANDLED; + stop_boot_trace(); } -static enum print_line_t -initcall_ret_print_line(struct trace_iterator *iter) +static enum print_line_t initcall_print_line(struct trace_iterator *iter) { + int ret; struct trace_entry *entry = iter->ent; + struct trace_boot *field = (struct trace_boot *)entry; + struct boot_trace *it = &field->initcall; struct trace_seq *s = &iter->seq; - struct trace_boot_ret *field; - struct boot_trace_ret *init_ret; - u64 ts; - unsigned long nsec_rem; - int ret; - - trace_assign_type(field, entry); - init_ret = &field->boot_ret; - ts = iter->ts; - nsec_rem = do_div(ts, 1000000000); - - ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " - "returned %d after %llu msecs\n", - (unsigned long) ts, - nsec_rem, - init_ret->func, init_ret->result, init_ret->duration); - - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - else + struct timespec calltime = ktime_to_timespec(it->calltime); + struct timespec rettime = ktime_to_timespec(it->rettime); + + if (entry->type == TRACE_BOOT) { + ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", + calltime.tv_sec, + calltime.tv_nsec, + it->func, it->caller); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " + "returned %d after %lld msecs\n", + rettime.tv_sec, + rettime.tv_nsec, + it->func, it->result, it->duration); + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; return TRACE_TYPE_HANDLED; -} - -static enum print_line_t initcall_print_line(struct trace_iterator *iter) -{ - struct trace_entry *entry = iter->ent; - - switch (entry->type) { - case TRACE_BOOT_CALL: - return initcall_call_print_line(iter); - case TRACE_BOOT_RET: - return initcall_ret_print_line(iter); - default: - return TRACE_TYPE_UNHANDLED; } + return TRACE_TYPE_UNHANDLED; } struct tracer boot_tracer __read_mostly = @@ -131,53 +87,27 @@ struct tracer boot_tracer __read_mostly = .name = "initcall", .init = boot_trace_init, .reset = reset_boot_trace, + .ctrl_update = boot_trace_ctrl_update, .print_line = initcall_print_line, }; -void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) +void trace_boot(struct boot_trace *it, initcall_t fn) { struct ring_buffer_event *event; - struct trace_boot_call *entry; + struct trace_boot *entry; + struct trace_array_cpu *data; unsigned long irq_flags; struct trace_array *tr = boot_trace; - if (!pre_initcalls_finished) + if (!trace_boot_enabled) return; /* Get its name now since this function could * disappear because it is in the .init section. */ - sprint_symbol(bt->func, (unsigned long)fn); - preempt_disable(); - - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_BOOT_CALL; - entry->boot_call = *bt; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - - trace_wake_up(); - - out: - preempt_enable(); -} - -void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) -{ - struct ring_buffer_event *event; - struct trace_boot_ret *entry; - unsigned long irq_flags; - struct trace_array *tr = boot_trace; - - if (!pre_initcalls_finished) - return; - - sprint_symbol(bt->func, (unsigned long)fn); + sprint_symbol(it->func, (unsigned long)fn); preempt_disable(); + data = tr->data[smp_processor_id()]; event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq_flags); @@ -185,8 +115,8 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) goto out; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_BOOT_RET; - entry->boot_ret = *bt; + entry->ent.type = TRACE_BOOT; + entry->initcall = *it; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); trace_wake_up(); diff --git a/trunk/kernel/trace/trace_branch.c b/trunk/kernel/trace/trace_branch.c deleted file mode 100644 index 6c00feb3bac7..000000000000 --- a/trunk/kernel/trace/trace_branch.c +++ /dev/null @@ -1,342 +0,0 @@ -/* - * unlikely profiler - * - * Copyright (C) 2008 Steven Rostedt - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "trace.h" - -#ifdef CONFIG_BRANCH_TRACER - -static int branch_tracing_enabled __read_mostly; -static DEFINE_MUTEX(branch_tracing_mutex); -static struct trace_array *branch_tracer; - -static void -probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) -{ - struct trace_array *tr = branch_tracer; - struct ring_buffer_event *event; - struct trace_branch *entry; - unsigned long flags, irq_flags; - int cpu, pc; - const char *p; - - /* - * I would love to save just the ftrace_likely_data pointer, but - * this code can also be used by modules. Ugly things can happen - * if the module is unloaded, and then we go and read the - * pointer. This is slower, but much safer. - */ - - if (unlikely(!tr)) - return; - - local_irq_save(flags); - cpu = raw_smp_processor_id(); - if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) - goto out; - - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); - if (!event) - goto out; - - pc = preempt_count(); - entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_BRANCH; - - /* Strip off the path, only save the file */ - p = f->file + strlen(f->file); - while (p >= f->file && *p != '/') - p--; - p++; - - strncpy(entry->func, f->func, TRACE_FUNC_SIZE); - strncpy(entry->file, p, TRACE_FILE_SIZE); - entry->func[TRACE_FUNC_SIZE] = 0; - entry->file[TRACE_FILE_SIZE] = 0; - entry->line = f->line; - entry->correct = val == expect; - - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - - out: - atomic_dec(&tr->data[cpu]->disabled); - local_irq_restore(flags); -} - -static inline -void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect) -{ - if (!branch_tracing_enabled) - return; - - probe_likely_condition(f, val, expect); -} - -int enable_branch_tracing(struct trace_array *tr) -{ - int ret = 0; - - mutex_lock(&branch_tracing_mutex); - branch_tracer = tr; - /* - * Must be seen before enabling. The reader is a condition - * where we do not need a matching rmb() - */ - smp_wmb(); - branch_tracing_enabled++; - mutex_unlock(&branch_tracing_mutex); - - return ret; -} - -void disable_branch_tracing(void) -{ - mutex_lock(&branch_tracing_mutex); - - if (!branch_tracing_enabled) - goto out_unlock; - - branch_tracing_enabled--; - - out_unlock: - mutex_unlock(&branch_tracing_mutex); -} - -static void start_branch_trace(struct trace_array *tr) -{ - enable_branch_tracing(tr); -} - -static void stop_branch_trace(struct trace_array *tr) -{ - disable_branch_tracing(); -} - -static int branch_trace_init(struct trace_array *tr) -{ - int cpu; - - for_each_online_cpu(cpu) - tracing_reset(tr, cpu); - - start_branch_trace(tr); - return 0; -} - -static void branch_trace_reset(struct trace_array *tr) -{ - stop_branch_trace(tr); -} - -struct tracer branch_trace __read_mostly = -{ - .name = "branch", - .init = branch_trace_init, - .reset = branch_trace_reset, -#ifdef CONFIG_FTRACE_SELFTEST - .selftest = trace_selftest_startup_branch, -#endif -}; - -__init static int init_branch_trace(void) -{ - return register_tracer(&branch_trace); -} - -device_initcall(init_branch_trace); -#else -static inline -void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect) -{ -} -#endif /* CONFIG_BRANCH_TRACER */ - -void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect) -{ - /* - * I would love to have a trace point here instead, but the - * trace point code is so inundated with unlikely and likely - * conditions that the recursive nightmare that exists is too - * much to try to get working. At least for now. - */ - trace_likely_condition(f, val, expect); - - /* FIXME: Make this atomic! */ - if (val == expect) - f->correct++; - else - f->incorrect++; -} -EXPORT_SYMBOL(ftrace_likely_update); - -struct ftrace_pointer { - void *start; - void *stop; - int hit; -}; - -static void * -t_next(struct seq_file *m, void *v, loff_t *pos) -{ - const struct ftrace_pointer *f = m->private; - struct ftrace_branch_data *p = v; - - (*pos)++; - - if (v == (void *)1) - return f->start; - - ++p; - - if ((void *)p >= (void *)f->stop) - return NULL; - - return p; -} - -static void *t_start(struct seq_file *m, loff_t *pos) -{ - void *t = (void *)1; - loff_t l = 0; - - for (; t && l < *pos; t = t_next(m, t, &l)) - ; - - return t; -} - -static void t_stop(struct seq_file *m, void *p) -{ -} - -static int t_show(struct seq_file *m, void *v) -{ - const struct ftrace_pointer *fp = m->private; - struct ftrace_branch_data *p = v; - const char *f; - long percent; - - if (v == (void *)1) { - if (fp->hit) - seq_printf(m, " miss hit %% "); - else - seq_printf(m, " correct incorrect %% "); - seq_printf(m, " Function " - " File Line\n" - " ------- --------- - " - " -------- " - " ---- ----\n"); - return 0; - } - - /* Only print the file, not the path */ - f = p->file + strlen(p->file); - while (f >= p->file && *f != '/') - f--; - f++; - - /* - * The miss is overlayed on correct, and hit on incorrect. - */ - if (p->correct) { - percent = p->incorrect * 100; - percent /= p->correct + p->incorrect; - } else - percent = p->incorrect ? 100 : -1; - - seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect); - if (percent < 0) - seq_printf(m, " X "); - else - seq_printf(m, "%3ld ", percent); - seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line); - return 0; -} - -static struct seq_operations tracing_likely_seq_ops = { - .start = t_start, - .next = t_next, - .stop = t_stop, - .show = t_show, -}; - -static int tracing_branch_open(struct inode *inode, struct file *file) -{ - int ret; - - ret = seq_open(file, &tracing_likely_seq_ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = (void *)inode->i_private; - } - - return ret; -} - -static const struct file_operations tracing_branch_fops = { - .open = tracing_branch_open, - .read = seq_read, - .llseek = seq_lseek, -}; - -#ifdef CONFIG_PROFILE_ALL_BRANCHES -extern unsigned long __start_branch_profile[]; -extern unsigned long __stop_branch_profile[]; - -static const struct ftrace_pointer ftrace_branch_pos = { - .start = __start_branch_profile, - .stop = __stop_branch_profile, - .hit = 1, -}; - -#endif /* CONFIG_PROFILE_ALL_BRANCHES */ - -extern unsigned long __start_annotated_branch_profile[]; -extern unsigned long __stop_annotated_branch_profile[]; - -static const struct ftrace_pointer ftrace_annotated_branch_pos = { - .start = __start_annotated_branch_profile, - .stop = __stop_annotated_branch_profile, -}; - -static __init int ftrace_branch_init(void) -{ - struct dentry *d_tracer; - struct dentry *entry; - - d_tracer = tracing_init_dentry(); - - entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer, - (void *)&ftrace_annotated_branch_pos, - &tracing_branch_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'profile_annotatet_branch' entry\n"); - -#ifdef CONFIG_PROFILE_ALL_BRANCHES - entry = debugfs_create_file("profile_branch", 0444, d_tracer, - (void *)&ftrace_branch_pos, - &tracing_branch_fops); - if (!entry) - pr_warning("Could not create debugfs" - " 'profile_branch' entry\n"); -#endif - - return 0; -} - -device_initcall(ftrace_branch_init); diff --git a/trunk/kernel/trace/trace_bts.c b/trunk/kernel/trace/trace_bts.c deleted file mode 100644 index 23b76e4690ef..000000000000 --- a/trunk/kernel/trace/trace_bts.c +++ /dev/null @@ -1,276 +0,0 @@ -/* - * BTS tracer - * - * Copyright (C) 2008 Markus Metzger - * - */ - -#include -#include -#include -#include -#include - -#include - -#include "trace.h" - - -#define SIZEOF_BTS (1 << 13) - -static DEFINE_PER_CPU(struct bts_tracer *, tracer); -static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); - -#define this_tracer per_cpu(tracer, smp_processor_id()) -#define this_buffer per_cpu(buffer, smp_processor_id()) - - -/* - * Information to interpret a BTS record. - * This will go into an in-kernel BTS interface. - */ -static unsigned char sizeof_field; -static unsigned long debugctl_mask; - -#define sizeof_bts (3 * sizeof_field) - -static void bts_trace_cpuinit(struct cpuinfo_x86 *c) -{ - switch (c->x86) { - case 0x6: - switch (c->x86_model) { - case 0x0 ... 0xC: - break; - case 0xD: - case 0xE: /* Pentium M */ - sizeof_field = sizeof(long); - debugctl_mask = (1<<6)|(1<<7); - break; - default: - sizeof_field = 8; - debugctl_mask = (1<<6)|(1<<7); - break; - } - break; - case 0xF: - switch (c->x86_model) { - case 0x0: - case 0x1: - case 0x2: /* Netburst */ - sizeof_field = sizeof(long); - debugctl_mask = (1<<2)|(1<<3); - break; - default: - /* sorry, don't know about them */ - break; - } - break; - default: - /* sorry, don't know about them */ - break; - } -} - -static inline void bts_enable(void) -{ - unsigned long debugctl; - - rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); - wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl | debugctl_mask); -} - -static inline void bts_disable(void) -{ - unsigned long debugctl; - - rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); - wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl & ~debugctl_mask); -} - -static void bts_trace_reset(struct trace_array *tr) -{ - int cpu; - - tr->time_start = ftrace_now(tr->cpu); - - for_each_online_cpu(cpu) - tracing_reset(tr, cpu); -} - -static void bts_trace_start_cpu(void *arg) -{ - this_tracer = - ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS, - /* ovfl = */ NULL, /* th = */ (size_t)-1); - if (IS_ERR(this_tracer)) { - this_tracer = NULL; - return; - } - - bts_enable(); -} - -static void bts_trace_start(struct trace_array *tr) -{ - int cpu; - - bts_trace_reset(tr); - - for_each_cpu_mask(cpu, cpu_possible_map) - smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1); -} - -static void bts_trace_stop_cpu(void *arg) -{ - if (this_tracer) { - bts_disable(); - - ds_release_bts(this_tracer); - this_tracer = NULL; - } -} - -static void bts_trace_stop(struct trace_array *tr) -{ - int cpu; - - for_each_cpu_mask(cpu, cpu_possible_map) - smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); -} - -static int bts_trace_init(struct trace_array *tr) -{ - bts_trace_cpuinit(&boot_cpu_data); - bts_trace_reset(tr); - bts_trace_start(tr); - - return 0; -} - -static void bts_trace_print_header(struct seq_file *m) -{ -#ifdef __i386__ - seq_puts(m, "# CPU# FROM TO FUNCTION\n"); - seq_puts(m, "# | | | |\n"); -#else - seq_puts(m, - "# CPU# FROM TO FUNCTION\n"); - seq_puts(m, - "# | | | |\n"); -#endif -} - -static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) -{ - struct trace_entry *entry = iter->ent; - struct trace_seq *seq = &iter->seq; - struct bts_entry *it; - - trace_assign_type(it, entry); - - if (entry->type == TRACE_BTS) { - int ret; -#ifdef CONFIG_KALLSYMS - char function[KSYM_SYMBOL_LEN]; - sprint_symbol(function, it->from); -#else - char *function = ""; -#endif - - ret = trace_seq_printf(seq, "%4d 0x%lx -> 0x%lx [%s]\n", - entry->cpu, it->from, it->to, function); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE;; - return TRACE_TYPE_HANDLED; - } - return TRACE_TYPE_UNHANDLED; -} - -void trace_bts(struct trace_array *tr, unsigned long from, unsigned long to) -{ - struct ring_buffer_event *event; - struct bts_entry *entry; - unsigned long irq; - - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq); - if (!event) - return; - entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, from); - entry->ent.type = TRACE_BTS; - entry->ent.cpu = smp_processor_id(); - entry->from = from; - entry->to = to; - ring_buffer_unlock_commit(tr->buffer, event, irq); -} - -static void trace_bts_at(struct trace_array *tr, size_t index) -{ - const void *raw = NULL; - unsigned long from, to; - int err; - - err = ds_access_bts(this_tracer, index, &raw); - if (err < 0) - return; - - from = *(const unsigned long *)raw; - to = *(const unsigned long *)((const char *)raw + sizeof_field); - - trace_bts(tr, from, to); -} - -static void trace_bts_cpu(void *arg) -{ - struct trace_array *tr = (struct trace_array *) arg; - size_t index = 0, end = 0, i; - int err; - - if (!this_tracer) - return; - - bts_disable(); - - err = ds_get_bts_index(this_tracer, &index); - if (err < 0) - goto out; - - err = ds_get_bts_end(this_tracer, &end); - if (err < 0) - goto out; - - for (i = index; i < end; i++) - trace_bts_at(tr, i); - - for (i = 0; i < index; i++) - trace_bts_at(tr, i); - -out: - bts_enable(); -} - -static void trace_bts_prepare(struct trace_iterator *iter) -{ - int cpu; - - for_each_cpu_mask(cpu, cpu_possible_map) - smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1); -} - -struct tracer bts_tracer __read_mostly = -{ - .name = "bts", - .init = bts_trace_init, - .reset = bts_trace_stop, - .print_header = bts_trace_print_header, - .print_line = bts_trace_print_line, - .start = bts_trace_start, - .stop = bts_trace_stop, - .open = trace_bts_prepare -}; - -__init static int init_bts_trace(void) -{ - return register_tracer(&bts_tracer); -} -device_initcall(init_bts_trace); diff --git a/trunk/kernel/trace/trace_functions.c b/trunk/kernel/trace/trace_functions.c index e74f6d0a3216..0f85a64003d3 100644 --- a/trunk/kernel/trace/trace_functions.c +++ b/trunk/kernel/trace/trace_functions.c @@ -42,20 +42,24 @@ static void stop_function_trace(struct trace_array *tr) tracing_stop_cmdline_record(); } -static int function_trace_init(struct trace_array *tr) +static void function_trace_init(struct trace_array *tr) { - start_function_trace(tr); - return 0; + if (tr->ctrl) + start_function_trace(tr); } static void function_trace_reset(struct trace_array *tr) { - stop_function_trace(tr); + if (tr->ctrl) + stop_function_trace(tr); } -static void function_trace_start(struct trace_array *tr) +static void function_trace_ctrl_update(struct trace_array *tr) { - function_reset(tr); + if (tr->ctrl) + start_function_trace(tr); + else + stop_function_trace(tr); } static struct tracer function_trace __read_mostly = @@ -63,7 +67,7 @@ static struct tracer function_trace __read_mostly = .name = "function", .init = function_trace_init, .reset = function_trace_reset, - .start = function_trace_start, + .ctrl_update = function_trace_ctrl_update, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_function, #endif diff --git a/trunk/kernel/trace/trace_functions_graph.c b/trunk/kernel/trace/trace_functions_graph.c deleted file mode 100644 index af60eef4cbcc..000000000000 --- a/trunk/kernel/trace/trace_functions_graph.c +++ /dev/null @@ -1,611 +0,0 @@ -/* - * - * Function graph tracer. - * Copyright (c) 2008 Frederic Weisbecker - * Mostly borrowed from function tracer which - * is Copyright (c) Steven Rostedt - * - */ -#include -#include -#include -#include - -#include "trace.h" - -#define TRACE_GRAPH_INDENT 2 - -/* Flag options */ -#define TRACE_GRAPH_PRINT_OVERRUN 0x1 -#define TRACE_GRAPH_PRINT_CPU 0x2 -#define TRACE_GRAPH_PRINT_OVERHEAD 0x4 -#define TRACE_GRAPH_PRINT_PROC 0x8 - -static struct tracer_opt trace_opts[] = { - /* Display overruns ? */ - { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) }, - /* Display CPU ? */ - { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) }, - /* Display Overhead ? */ - { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) }, - /* Display proc name/pid */ - { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) }, - { } /* Empty entry */ -}; - -static struct tracer_flags tracer_flags = { - /* Don't display overruns and proc by default */ - .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD, - .opts = trace_opts -}; - -/* pid on the last trace processed */ -static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 }; - -static int graph_trace_init(struct trace_array *tr) -{ - int cpu, ret; - - for_each_online_cpu(cpu) - tracing_reset(tr, cpu); - - ret = register_ftrace_graph(&trace_graph_return, - &trace_graph_entry); - if (ret) - return ret; - tracing_start_cmdline_record(); - - return 0; -} - -static void graph_trace_reset(struct trace_array *tr) -{ - tracing_stop_cmdline_record(); - unregister_ftrace_graph(); -} - -static inline int log10_cpu(int nb) -{ - if (nb / 100) - return 3; - if (nb / 10) - return 2; - return 1; -} - -static enum print_line_t -print_graph_cpu(struct trace_seq *s, int cpu) -{ - int i; - int ret; - int log10_this = log10_cpu(cpu); - int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map)); - - - /* - * Start with a space character - to make it stand out - * to the right a bit when trace output is pasted into - * email: - */ - ret = trace_seq_printf(s, " "); - - /* - * Tricky - we space the CPU field according to the max - * number of online CPUs. On a 2-cpu system it would take - * a maximum of 1 digit - on a 128 cpu system it would - * take up to 3 digits: - */ - for (i = 0; i < log10_all - log10_this; i++) { - ret = trace_seq_printf(s, " "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - ret = trace_seq_printf(s, "%d) ", cpu); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; -} - -#define TRACE_GRAPH_PROCINFO_LENGTH 14 - -static enum print_line_t -print_graph_proc(struct trace_seq *s, pid_t pid) -{ - int i; - int ret; - int len; - char comm[8]; - int spaces = 0; - /* sign + log10(MAX_INT) + '\0' */ - char pid_str[11]; - - strncpy(comm, trace_find_cmdline(pid), 7); - comm[7] = '\0'; - sprintf(pid_str, "%d", pid); - - /* 1 stands for the "-" character */ - len = strlen(comm) + strlen(pid_str) + 1; - - if (len < TRACE_GRAPH_PROCINFO_LENGTH) - spaces = TRACE_GRAPH_PROCINFO_LENGTH - len; - - /* First spaces to align center */ - for (i = 0; i < spaces / 2; i++) { - ret = trace_seq_printf(s, " "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - ret = trace_seq_printf(s, "%s-%s", comm, pid_str); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - /* Last spaces to align center */ - for (i = 0; i < spaces - (spaces / 2); i++) { - ret = trace_seq_printf(s, " "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - return TRACE_TYPE_HANDLED; -} - - -/* If the pid changed since the last trace, output this event */ -static enum print_line_t -verif_pid(struct trace_seq *s, pid_t pid, int cpu) -{ - pid_t prev_pid; - int ret; - - if (last_pid[cpu] != -1 && last_pid[cpu] == pid) - return TRACE_TYPE_HANDLED; - - prev_pid = last_pid[cpu]; - last_pid[cpu] = pid; - -/* - * Context-switch trace line: - - ------------------------------------------ - | 1) migration/0--1 => sshd-1755 - ------------------------------------------ - - */ - ret = trace_seq_printf(s, - " ------------------------------------------\n"); - if (!ret) - TRACE_TYPE_PARTIAL_LINE; - - ret = print_graph_cpu(s, cpu); - if (ret == TRACE_TYPE_PARTIAL_LINE) - TRACE_TYPE_PARTIAL_LINE; - - ret = print_graph_proc(s, prev_pid); - if (ret == TRACE_TYPE_PARTIAL_LINE) - TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_printf(s, " => "); - if (!ret) - TRACE_TYPE_PARTIAL_LINE; - - ret = print_graph_proc(s, pid); - if (ret == TRACE_TYPE_PARTIAL_LINE) - TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_printf(s, - "\n ------------------------------------------\n\n"); - if (!ret) - TRACE_TYPE_PARTIAL_LINE; - - return ret; -} - -static bool -trace_branch_is_leaf(struct trace_iterator *iter, - struct ftrace_graph_ent_entry *curr) -{ - struct ring_buffer_iter *ring_iter; - struct ring_buffer_event *event; - struct ftrace_graph_ret_entry *next; - - ring_iter = iter->buffer_iter[iter->cpu]; - - if (!ring_iter) - return false; - - event = ring_buffer_iter_peek(ring_iter, NULL); - - if (!event) - return false; - - next = ring_buffer_event_data(event); - - if (next->ent.type != TRACE_GRAPH_RET) - return false; - - if (curr->ent.pid != next->ent.pid || - curr->graph_ent.func != next->ret.func) - return false; - - return true; -} - - -static enum print_line_t -print_graph_duration(unsigned long long duration, struct trace_seq *s) -{ - unsigned long nsecs_rem = do_div(duration, 1000); - /* log10(ULONG_MAX) + '\0' */ - char msecs_str[21]; - char nsecs_str[5]; - int ret, len; - int i; - - sprintf(msecs_str, "%lu", (unsigned long) duration); - - /* Print msecs */ - ret = trace_seq_printf(s, msecs_str); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - len = strlen(msecs_str); - - /* Print nsecs (we don't want to exceed 7 numbers) */ - if (len < 7) { - snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem); - ret = trace_seq_printf(s, ".%s", nsecs_str); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - len += strlen(nsecs_str); - } - - ret = trace_seq_printf(s, " us "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - /* Print remaining spaces to fit the row's width */ - for (i = len; i < 7; i++) { - ret = trace_seq_printf(s, " "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - ret = trace_seq_printf(s, "| "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - return TRACE_TYPE_HANDLED; - -} - -/* Signal a overhead of time execution to the output */ -static int -print_graph_overhead(unsigned long long duration, struct trace_seq *s) -{ - /* Duration exceeded 100 msecs */ - if (duration > 100000ULL) - return trace_seq_printf(s, "! "); - - /* Duration exceeded 10 msecs */ - if (duration > 10000ULL) - return trace_seq_printf(s, "+ "); - - return trace_seq_printf(s, " "); -} - -/* Case of a leaf function on its call entry */ -static enum print_line_t -print_graph_entry_leaf(struct trace_iterator *iter, - struct ftrace_graph_ent_entry *entry, struct trace_seq *s) -{ - struct ftrace_graph_ret_entry *ret_entry; - struct ftrace_graph_ret *graph_ret; - struct ring_buffer_event *event; - struct ftrace_graph_ent *call; - unsigned long long duration; - int ret; - int i; - - event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); - ret_entry = ring_buffer_event_data(event); - graph_ret = &ret_entry->ret; - call = &entry->graph_ent; - duration = graph_ret->rettime - graph_ret->calltime; - - /* Overhead */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { - ret = print_graph_overhead(duration, s); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - /* Duration */ - ret = print_graph_duration(duration, s); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - - /* Function */ - for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { - ret = trace_seq_printf(s, " "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - ret = seq_print_ip_sym(s, call->func, 0); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_printf(s, "();\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; -} - -static enum print_line_t -print_graph_entry_nested(struct ftrace_graph_ent_entry *entry, - struct trace_seq *s) -{ - int i; - int ret; - struct ftrace_graph_ent *call = &entry->graph_ent; - - /* No overhead */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { - ret = trace_seq_printf(s, " "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - /* No time */ - ret = trace_seq_printf(s, " | "); - - /* Function */ - for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { - ret = trace_seq_printf(s, " "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - ret = seq_print_ip_sym(s, call->func, 0); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_printf(s, "() {\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; -} - -static enum print_line_t -print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, - struct trace_iterator *iter, int cpu) -{ - int ret; - struct trace_entry *ent = iter->ent; - - /* Pid */ - if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - - /* Cpu */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { - ret = print_graph_cpu(s, cpu); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - } - - /* Proc */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { - ret = print_graph_proc(s, ent->pid); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_printf(s, " | "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - if (trace_branch_is_leaf(iter, field)) - return print_graph_entry_leaf(iter, field, s); - else - return print_graph_entry_nested(field, s); - -} - -static enum print_line_t -print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, - struct trace_entry *ent, int cpu) -{ - int i; - int ret; - unsigned long long duration = trace->rettime - trace->calltime; - - /* Pid */ - if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - - /* Cpu */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { - ret = print_graph_cpu(s, cpu); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - } - - /* Proc */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { - ret = print_graph_proc(s, ent->pid); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_printf(s, " | "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - /* Overhead */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { - ret = print_graph_overhead(duration, s); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - /* Duration */ - ret = print_graph_duration(duration, s); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - - /* Closing brace */ - for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { - ret = trace_seq_printf(s, " "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - ret = trace_seq_printf(s, "}\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - /* Overrun */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { - ret = trace_seq_printf(s, " (Overruns: %lu)\n", - trace->overrun); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - return TRACE_TYPE_HANDLED; -} - -static enum print_line_t -print_graph_comment(struct print_entry *trace, struct trace_seq *s, - struct trace_entry *ent, struct trace_iterator *iter) -{ - int i; - int ret; - - /* Pid */ - if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - - /* Cpu */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { - ret = print_graph_cpu(s, iter->cpu); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - } - - /* Proc */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { - ret = print_graph_proc(s, ent->pid); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_printf(s, " | "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - /* No overhead */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { - ret = trace_seq_printf(s, " "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - /* No time */ - ret = trace_seq_printf(s, " | "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - /* Indentation */ - if (trace->depth > 0) - for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) { - ret = trace_seq_printf(s, " "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - /* The comment */ - ret = trace_seq_printf(s, "/* %s", trace->buf); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - if (ent->flags & TRACE_FLAG_CONT) - trace_seq_print_cont(s, iter); - - ret = trace_seq_printf(s, " */\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; -} - - -enum print_line_t -print_graph_function(struct trace_iterator *iter) -{ - struct trace_seq *s = &iter->seq; - struct trace_entry *entry = iter->ent; - - switch (entry->type) { - case TRACE_GRAPH_ENT: { - struct ftrace_graph_ent_entry *field; - trace_assign_type(field, entry); - return print_graph_entry(field, s, iter, - iter->cpu); - } - case TRACE_GRAPH_RET: { - struct ftrace_graph_ret_entry *field; - trace_assign_type(field, entry); - return print_graph_return(&field->ret, s, entry, iter->cpu); - } - case TRACE_PRINT: { - struct print_entry *field; - trace_assign_type(field, entry); - return print_graph_comment(field, s, entry, iter); - } - default: - return TRACE_TYPE_UNHANDLED; - } -} - -static void print_graph_headers(struct seq_file *s) -{ - /* 1st line */ - seq_printf(s, "# "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) - seq_printf(s, "CPU "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) - seq_printf(s, "TASK/PID "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) - seq_printf(s, "OVERHEAD/"); - seq_printf(s, "DURATION FUNCTION CALLS\n"); - - /* 2nd line */ - seq_printf(s, "# "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) - seq_printf(s, "| "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) - seq_printf(s, "| | "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { - seq_printf(s, "| "); - seq_printf(s, "| | | | |\n"); - } else - seq_printf(s, " | | | | |\n"); -} -static struct tracer graph_trace __read_mostly = { - .name = "function_graph", - .init = graph_trace_init, - .reset = graph_trace_reset, - .print_line = print_graph_function, - .print_header = print_graph_headers, - .flags = &tracer_flags, -}; - -static __init int init_graph_trace(void) -{ - return register_tracer(&graph_trace); -} - -device_initcall(init_graph_trace); diff --git a/trunk/kernel/trace/trace_irqsoff.c b/trunk/kernel/trace/trace_irqsoff.c index 7c2e326bbc8b..9c74071c10e0 100644 --- a/trunk/kernel/trace/trace_irqsoff.c +++ b/trunk/kernel/trace/trace_irqsoff.c @@ -353,28 +353,15 @@ void trace_preempt_off(unsigned long a0, unsigned long a1) } #endif /* CONFIG_PREEMPT_TRACER */ -/* - * save_tracer_enabled is used to save the state of the tracer_enabled - * variable when we disable it when we open a trace output file. - */ -static int save_tracer_enabled; - static void start_irqsoff_tracer(struct trace_array *tr) { register_ftrace_function(&trace_ops); - if (tracing_is_enabled()) { - tracer_enabled = 1; - save_tracer_enabled = 1; - } else { - tracer_enabled = 0; - save_tracer_enabled = 0; - } + tracer_enabled = 1; } static void stop_irqsoff_tracer(struct trace_array *tr) { tracer_enabled = 0; - save_tracer_enabled = 0; unregister_ftrace_function(&trace_ops); } @@ -383,55 +370,53 @@ static void __irqsoff_tracer_init(struct trace_array *tr) irqsoff_trace = tr; /* make sure that the tracer is visible */ smp_wmb(); - start_irqsoff_tracer(tr); -} -static void irqsoff_tracer_reset(struct trace_array *tr) -{ - stop_irqsoff_tracer(tr); + if (tr->ctrl) + start_irqsoff_tracer(tr); } -static void irqsoff_tracer_start(struct trace_array *tr) +static void irqsoff_tracer_reset(struct trace_array *tr) { - tracer_enabled = 1; - save_tracer_enabled = 1; + if (tr->ctrl) + stop_irqsoff_tracer(tr); } -static void irqsoff_tracer_stop(struct trace_array *tr) +static void irqsoff_tracer_ctrl_update(struct trace_array *tr) { - tracer_enabled = 0; - save_tracer_enabled = 0; + if (tr->ctrl) + start_irqsoff_tracer(tr); + else + stop_irqsoff_tracer(tr); } static void irqsoff_tracer_open(struct trace_iterator *iter) { /* stop the trace while dumping */ - tracer_enabled = 0; + if (iter->tr->ctrl) + stop_irqsoff_tracer(iter->tr); } static void irqsoff_tracer_close(struct trace_iterator *iter) { - /* restart tracing */ - tracer_enabled = save_tracer_enabled; + if (iter->tr->ctrl) + start_irqsoff_tracer(iter->tr); } #ifdef CONFIG_IRQSOFF_TRACER -static int irqsoff_tracer_init(struct trace_array *tr) +static void irqsoff_tracer_init(struct trace_array *tr) { trace_type = TRACER_IRQS_OFF; __irqsoff_tracer_init(tr); - return 0; } static struct tracer irqsoff_tracer __read_mostly = { .name = "irqsoff", .init = irqsoff_tracer_init, .reset = irqsoff_tracer_reset, - .start = irqsoff_tracer_start, - .stop = irqsoff_tracer_stop, .open = irqsoff_tracer_open, .close = irqsoff_tracer_close, + .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_irqsoff, @@ -443,12 +428,11 @@ static struct tracer irqsoff_tracer __read_mostly = #endif #ifdef CONFIG_PREEMPT_TRACER -static int preemptoff_tracer_init(struct trace_array *tr) +static void preemptoff_tracer_init(struct trace_array *tr) { trace_type = TRACER_PREEMPT_OFF; __irqsoff_tracer_init(tr); - return 0; } static struct tracer preemptoff_tracer __read_mostly = @@ -456,10 +440,9 @@ static struct tracer preemptoff_tracer __read_mostly = .name = "preemptoff", .init = preemptoff_tracer_init, .reset = irqsoff_tracer_reset, - .start = irqsoff_tracer_start, - .stop = irqsoff_tracer_stop, .open = irqsoff_tracer_open, .close = irqsoff_tracer_close, + .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptoff, @@ -473,12 +456,11 @@ static struct tracer preemptoff_tracer __read_mostly = #if defined(CONFIG_IRQSOFF_TRACER) && \ defined(CONFIG_PREEMPT_TRACER) -static int preemptirqsoff_tracer_init(struct trace_array *tr) +static void preemptirqsoff_tracer_init(struct trace_array *tr) { trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF; __irqsoff_tracer_init(tr); - return 0; } static struct tracer preemptirqsoff_tracer __read_mostly = @@ -486,10 +468,9 @@ static struct tracer preemptirqsoff_tracer __read_mostly = .name = "preemptirqsoff", .init = preemptirqsoff_tracer_init, .reset = irqsoff_tracer_reset, - .start = irqsoff_tracer_start, - .stop = irqsoff_tracer_stop, .open = irqsoff_tracer_open, .close = irqsoff_tracer_close, + .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptirqsoff, diff --git a/trunk/kernel/trace/trace_mmiotrace.c b/trunk/kernel/trace/trace_mmiotrace.c index 2fb6da6523b3..e62cbf78eab6 100644 --- a/trunk/kernel/trace/trace_mmiotrace.c +++ b/trunk/kernel/trace/trace_mmiotrace.c @@ -32,29 +32,34 @@ static void mmio_reset_data(struct trace_array *tr) tracing_reset(tr, cpu); } -static int mmio_trace_init(struct trace_array *tr) +static void mmio_trace_init(struct trace_array *tr) { pr_debug("in %s\n", __func__); mmio_trace_array = tr; - - mmio_reset_data(tr); - enable_mmiotrace(); - return 0; + if (tr->ctrl) { + mmio_reset_data(tr); + enable_mmiotrace(); + } } static void mmio_trace_reset(struct trace_array *tr) { pr_debug("in %s\n", __func__); - - disable_mmiotrace(); + if (tr->ctrl) + disable_mmiotrace(); mmio_reset_data(tr); mmio_trace_array = NULL; } -static void mmio_trace_start(struct trace_array *tr) +static void mmio_trace_ctrl_update(struct trace_array *tr) { pr_debug("in %s\n", __func__); - mmio_reset_data(tr); + if (tr->ctrl) { + mmio_reset_data(tr); + enable_mmiotrace(); + } else { + disable_mmiotrace(); + } } static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) @@ -291,10 +296,10 @@ static struct tracer mmio_tracer __read_mostly = .name = "mmiotrace", .init = mmio_trace_init, .reset = mmio_trace_reset, - .start = mmio_trace_start, .pipe_open = mmio_pipe_open, .close = mmio_close, .read = mmio_read, + .ctrl_update = mmio_trace_ctrl_update, .print_line = mmio_print_line, }; @@ -366,5 +371,5 @@ void mmio_trace_mapping(struct mmiotrace_map *map) int mmio_trace_printk(const char *fmt, va_list args) { - return trace_vprintk(0, -1, fmt, args); + return trace_vprintk(0, fmt, args); } diff --git a/trunk/kernel/trace/trace_nop.c b/trunk/kernel/trace/trace_nop.c index b9767acd30ac..4592b4862515 100644 --- a/trunk/kernel/trace/trace_nop.c +++ b/trunk/kernel/trace/trace_nop.c @@ -12,27 +12,6 @@ #include "trace.h" -/* Our two options */ -enum { - TRACE_NOP_OPT_ACCEPT = 0x1, - TRACE_NOP_OPT_REFUSE = 0x2 -}; - -/* Options for the tracer (see trace_options file) */ -static struct tracer_opt nop_opts[] = { - /* Option that will be accepted by set_flag callback */ - { TRACER_OPT(test_nop_accept, TRACE_NOP_OPT_ACCEPT) }, - /* Option that will be refused by set_flag callback */ - { TRACER_OPT(test_nop_refuse, TRACE_NOP_OPT_REFUSE) }, - { } /* Always set a last empty entry */ -}; - -static struct tracer_flags nop_flags = { - /* You can check your flags value here when you want. */ - .val = 0, /* By default: all flags disabled */ - .opts = nop_opts -}; - static struct trace_array *ctx_trace; static void start_nop_trace(struct trace_array *tr) @@ -45,7 +24,7 @@ static void stop_nop_trace(struct trace_array *tr) /* Nothing to do! */ } -static int nop_trace_init(struct trace_array *tr) +static void nop_trace_init(struct trace_array *tr) { int cpu; ctx_trace = tr; @@ -53,53 +32,33 @@ static int nop_trace_init(struct trace_array *tr) for_each_online_cpu(cpu) tracing_reset(tr, cpu); - start_nop_trace(tr); - return 0; + if (tr->ctrl) + start_nop_trace(tr); } static void nop_trace_reset(struct trace_array *tr) { - stop_nop_trace(tr); + if (tr->ctrl) + stop_nop_trace(tr); } -/* It only serves as a signal handler and a callback to - * accept or refuse tthe setting of a flag. - * If you don't implement it, then the flag setting will be - * automatically accepted. - */ -static int nop_set_flag(u32 old_flags, u32 bit, int set) +static void nop_trace_ctrl_update(struct trace_array *tr) { - /* - * Note that you don't need to update nop_flags.val yourself. - * The tracing Api will do it automatically if you return 0 - */ - if (bit == TRACE_NOP_OPT_ACCEPT) { - printk(KERN_DEBUG "nop_test_accept flag set to %d: we accept." - " Now cat trace_options to see the result\n", - set); - return 0; - } - - if (bit == TRACE_NOP_OPT_REFUSE) { - printk(KERN_DEBUG "nop_test_refuse flag set to %d: we refuse." - "Now cat trace_options to see the result\n", - set); - return -EINVAL; - } - - return 0; + /* When starting a new trace, reset the buffers */ + if (tr->ctrl) + start_nop_trace(tr); + else + stop_nop_trace(tr); } - struct tracer nop_trace __read_mostly = { .name = "nop", .init = nop_trace_init, .reset = nop_trace_reset, + .ctrl_update = nop_trace_ctrl_update, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_nop, #endif - .flags = &nop_flags, - .set_flag = nop_set_flag }; diff --git a/trunk/kernel/trace/trace_power.c b/trunk/kernel/trace/trace_power.c deleted file mode 100644 index a7172a352f62..000000000000 --- a/trunk/kernel/trace/trace_power.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * ring buffer based C-state tracer - * - * Arjan van de Ven - * Copyright (C) 2008 Intel Corporation - * - * Much is borrowed from trace_boot.c which is - * Copyright (C) 2008 Frederic Weisbecker - * - */ - -#include -#include -#include -#include -#include - -#include "trace.h" - -static struct trace_array *power_trace; -static int __read_mostly trace_power_enabled; - - -static void start_power_trace(struct trace_array *tr) -{ - trace_power_enabled = 1; -} - -static void stop_power_trace(struct trace_array *tr) -{ - trace_power_enabled = 0; -} - - -static int power_trace_init(struct trace_array *tr) -{ - int cpu; - power_trace = tr; - - trace_power_enabled = 1; - - for_each_cpu_mask(cpu, cpu_possible_map) - tracing_reset(tr, cpu); - return 0; -} - -static enum print_line_t power_print_line(struct trace_iterator *iter) -{ - int ret = 0; - struct trace_entry *entry = iter->ent; - struct trace_power *field ; - struct power_trace *it; - struct trace_seq *s = &iter->seq; - struct timespec stamp; - struct timespec duration; - - trace_assign_type(field, entry); - it = &field->state_data; - stamp = ktime_to_timespec(it->stamp); - duration = ktime_to_timespec(ktime_sub(it->end, it->stamp)); - - if (entry->type == TRACE_POWER) { - if (it->type == POWER_CSTATE) - ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n", - stamp.tv_sec, - stamp.tv_nsec, - it->state, iter->cpu, - duration.tv_sec, - duration.tv_nsec); - if (it->type == POWER_PSTATE) - ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n", - stamp.tv_sec, - stamp.tv_nsec, - it->state, iter->cpu); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - return TRACE_TYPE_HANDLED; - } - return TRACE_TYPE_UNHANDLED; -} - -static struct tracer power_tracer __read_mostly = -{ - .name = "power", - .init = power_trace_init, - .start = start_power_trace, - .stop = stop_power_trace, - .reset = stop_power_trace, - .print_line = power_print_line, -}; - -static int init_power_trace(void) -{ - return register_tracer(&power_tracer); -} -device_initcall(init_power_trace); - -void trace_power_start(struct power_trace *it, unsigned int type, - unsigned int level) -{ - if (!trace_power_enabled) - return; - - memset(it, 0, sizeof(struct power_trace)); - it->state = level; - it->type = type; - it->stamp = ktime_get(); -} -EXPORT_SYMBOL_GPL(trace_power_start); - - -void trace_power_end(struct power_trace *it) -{ - struct ring_buffer_event *event; - struct trace_power *entry; - struct trace_array_cpu *data; - unsigned long irq_flags; - struct trace_array *tr = power_trace; - - if (!trace_power_enabled) - return; - - preempt_disable(); - it->end = ktime_get(); - data = tr->data[smp_processor_id()]; - - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_POWER; - entry->state_data = *it; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - - trace_wake_up(); - - out: - preempt_enable(); -} -EXPORT_SYMBOL_GPL(trace_power_end); - -void trace_power_mark(struct power_trace *it, unsigned int type, - unsigned int level) -{ - struct ring_buffer_event *event; - struct trace_power *entry; - struct trace_array_cpu *data; - unsigned long irq_flags; - struct trace_array *tr = power_trace; - - if (!trace_power_enabled) - return; - - memset(it, 0, sizeof(struct power_trace)); - it->state = level; - it->type = type; - it->stamp = ktime_get(); - preempt_disable(); - it->end = it->stamp; - data = tr->data[smp_processor_id()]; - - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_POWER; - entry->state_data = *it; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - - trace_wake_up(); - - out: - preempt_enable(); -} -EXPORT_SYMBOL_GPL(trace_power_mark); diff --git a/trunk/kernel/trace/trace_sched_switch.c b/trunk/kernel/trace/trace_sched_switch.c index 863390557b44..b8f56beb1a62 100644 --- a/trunk/kernel/trace/trace_sched_switch.c +++ b/trunk/kernel/trace/trace_sched_switch.c @@ -16,8 +16,7 @@ static struct trace_array *ctx_trace; static int __read_mostly tracer_enabled; -static int sched_ref; -static DEFINE_MUTEX(sched_register_mutex); +static atomic_t sched_ref; static void probe_sched_switch(struct rq *__rq, struct task_struct *prev, @@ -28,7 +27,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev, int cpu; int pc; - if (!sched_ref) + if (!atomic_read(&sched_ref)) return; tracing_record_cmdline(prev); @@ -124,18 +123,20 @@ static void tracing_sched_unregister(void) static void tracing_start_sched_switch(void) { - mutex_lock(&sched_register_mutex); - if (!(sched_ref++)) + long ref; + + ref = atomic_inc_return(&sched_ref); + if (ref == 1) tracing_sched_register(); - mutex_unlock(&sched_register_mutex); } static void tracing_stop_sched_switch(void) { - mutex_lock(&sched_register_mutex); - if (!(--sched_ref)) + long ref; + + ref = atomic_dec_and_test(&sched_ref); + if (ref) tracing_sched_unregister(); - mutex_unlock(&sched_register_mutex); } void tracing_start_cmdline_record(void) @@ -148,86 +149,40 @@ void tracing_stop_cmdline_record(void) tracing_stop_sched_switch(); } -/** - * tracing_start_sched_switch_record - start tracing context switches - * - * Turns on context switch tracing for a tracer. - */ -void tracing_start_sched_switch_record(void) -{ - if (unlikely(!ctx_trace)) { - WARN_ON(1); - return; - } - - tracing_start_sched_switch(); - - mutex_lock(&sched_register_mutex); - tracer_enabled++; - mutex_unlock(&sched_register_mutex); -} - -/** - * tracing_stop_sched_switch_record - start tracing context switches - * - * Turns off context switch tracing for a tracer. - */ -void tracing_stop_sched_switch_record(void) -{ - mutex_lock(&sched_register_mutex); - tracer_enabled--; - WARN_ON(tracer_enabled < 0); - mutex_unlock(&sched_register_mutex); - - tracing_stop_sched_switch(); -} - -/** - * tracing_sched_switch_assign_trace - assign a trace array for ctx switch - * @tr: trace array pointer to assign - * - * Some tracers might want to record the context switches in their - * trace. This function lets those tracers assign the trace array - * to use. - */ -void tracing_sched_switch_assign_trace(struct trace_array *tr) -{ - ctx_trace = tr; -} - static void start_sched_trace(struct trace_array *tr) { sched_switch_reset(tr); - tracing_start_sched_switch_record(); + tracing_start_cmdline_record(); + tracer_enabled = 1; } static void stop_sched_trace(struct trace_array *tr) { - tracing_stop_sched_switch_record(); + tracer_enabled = 0; + tracing_stop_cmdline_record(); } -static int sched_switch_trace_init(struct trace_array *tr) +static void sched_switch_trace_init(struct trace_array *tr) { ctx_trace = tr; - start_sched_trace(tr); - return 0; + + if (tr->ctrl) + start_sched_trace(tr); } static void sched_switch_trace_reset(struct trace_array *tr) { - if (sched_ref) + if (tr->ctrl) stop_sched_trace(tr); } -static void sched_switch_trace_start(struct trace_array *tr) -{ - sched_switch_reset(tr); - tracing_start_sched_switch(); -} - -static void sched_switch_trace_stop(struct trace_array *tr) +static void sched_switch_trace_ctrl_update(struct trace_array *tr) { - tracing_stop_sched_switch(); + /* When starting a new trace, reset the buffers */ + if (tr->ctrl) + start_sched_trace(tr); + else + stop_sched_trace(tr); } static struct tracer sched_switch_trace __read_mostly = @@ -235,8 +190,7 @@ static struct tracer sched_switch_trace __read_mostly = .name = "sched_switch", .init = sched_switch_trace_init, .reset = sched_switch_trace_reset, - .start = sched_switch_trace_start, - .stop = sched_switch_trace_stop, + .ctrl_update = sched_switch_trace_ctrl_update, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_sched_switch, #endif @@ -244,6 +198,14 @@ static struct tracer sched_switch_trace __read_mostly = __init static int init_sched_switch_trace(void) { + int ret = 0; + + if (atomic_read(&sched_ref)) + ret = tracing_sched_register(); + if (ret) { + pr_info("error registering scheduler trace\n"); + return ret; + } return register_tracer(&sched_switch_trace); } device_initcall(init_sched_switch_trace); diff --git a/trunk/kernel/trace/trace_sched_wakeup.c b/trunk/kernel/trace/trace_sched_wakeup.c index 0067b49746c1..3ae93f16b565 100644 --- a/trunk/kernel/trace/trace_sched_wakeup.c +++ b/trunk/kernel/trace/trace_sched_wakeup.c @@ -50,7 +50,8 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) return; pc = preempt_count(); - resched = ftrace_preempt_disable(); + resched = need_resched(); + preempt_disable_notrace(); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -80,7 +81,15 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) out: atomic_dec(&data->disabled); - ftrace_preempt_enable(resched); + /* + * To prevent recursion from the scheduler, if the + * resched flag was set before we entered, then + * don't reschedule. + */ + if (resched) + preempt_enable_no_resched_notrace(); + else + preempt_enable_notrace(); } static struct ftrace_ops trace_ops __read_mostly = @@ -262,12 +271,6 @@ probe_wakeup(struct rq *rq, struct task_struct *p) atomic_dec(&wakeup_trace->data[cpu]->disabled); } -/* - * save_tracer_enabled is used to save the state of the tracer_enabled - * variable when we disable it when we open a trace output file. - */ -static int save_tracer_enabled; - static void start_wakeup_tracer(struct trace_array *tr) { int ret; @@ -306,13 +309,7 @@ static void start_wakeup_tracer(struct trace_array *tr) register_ftrace_function(&trace_ops); - if (tracing_is_enabled()) { - tracer_enabled = 1; - save_tracer_enabled = 1; - } else { - tracer_enabled = 0; - save_tracer_enabled = 0; - } + tracer_enabled = 1; return; fail_deprobe_wake_new: @@ -324,53 +321,49 @@ static void start_wakeup_tracer(struct trace_array *tr) static void stop_wakeup_tracer(struct trace_array *tr) { tracer_enabled = 0; - save_tracer_enabled = 0; unregister_ftrace_function(&trace_ops); unregister_trace_sched_switch(probe_wakeup_sched_switch); unregister_trace_sched_wakeup_new(probe_wakeup); unregister_trace_sched_wakeup(probe_wakeup); } -static int wakeup_tracer_init(struct trace_array *tr) +static void wakeup_tracer_init(struct trace_array *tr) { wakeup_trace = tr; - start_wakeup_tracer(tr); - return 0; -} -static void wakeup_tracer_reset(struct trace_array *tr) -{ - stop_wakeup_tracer(tr); - /* make sure we put back any tasks we are tracing */ - wakeup_reset(tr); + if (tr->ctrl) + start_wakeup_tracer(tr); } -static void wakeup_tracer_start(struct trace_array *tr) +static void wakeup_tracer_reset(struct trace_array *tr) { - wakeup_reset(tr); - tracer_enabled = 1; - save_tracer_enabled = 1; + if (tr->ctrl) { + stop_wakeup_tracer(tr); + /* make sure we put back any tasks we are tracing */ + wakeup_reset(tr); + } } -static void wakeup_tracer_stop(struct trace_array *tr) +static void wakeup_tracer_ctrl_update(struct trace_array *tr) { - tracer_enabled = 0; - save_tracer_enabled = 0; + if (tr->ctrl) + start_wakeup_tracer(tr); + else + stop_wakeup_tracer(tr); } static void wakeup_tracer_open(struct trace_iterator *iter) { /* stop the trace while dumping */ - tracer_enabled = 0; + if (iter->tr->ctrl) + stop_wakeup_tracer(iter->tr); } static void wakeup_tracer_close(struct trace_iterator *iter) { /* forget about any processes we were recording */ - if (save_tracer_enabled) { - wakeup_reset(iter->tr); - tracer_enabled = 1; - } + if (iter->tr->ctrl) + start_wakeup_tracer(iter->tr); } static struct tracer wakeup_tracer __read_mostly = @@ -378,10 +371,9 @@ static struct tracer wakeup_tracer __read_mostly = .name = "wakeup", .init = wakeup_tracer_init, .reset = wakeup_tracer_reset, - .start = wakeup_tracer_start, - .stop = wakeup_tracer_stop, .open = wakeup_tracer_open, .close = wakeup_tracer_close, + .ctrl_update = wakeup_tracer_ctrl_update, .print_max = 1, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_wakeup, diff --git a/trunk/kernel/trace/trace_selftest.c b/trunk/kernel/trace/trace_selftest.c index 88c8eb70f54a..90bc752a7580 100644 --- a/trunk/kernel/trace/trace_selftest.c +++ b/trunk/kernel/trace/trace_selftest.c @@ -13,7 +13,6 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_STACK: case TRACE_PRINT: case TRACE_SPECIAL: - case TRACE_BRANCH: return 1; } return 0; @@ -52,7 +51,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) int cpu, ret = 0; /* Don't allow flipping of max traces now */ - local_irq_save(flags); + raw_local_irq_save(flags); __raw_spin_lock(&ftrace_max_lock); cnt = ring_buffer_entries(tr->buffer); @@ -63,7 +62,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) break; } __raw_spin_unlock(&ftrace_max_lock); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (count) *count = cnt; @@ -71,11 +70,6 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) return ret; } -static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret) -{ - printk(KERN_WARNING "Failed to init %s tracer, init returned %d\n", - trace->name, init_ret); -} #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE @@ -116,11 +110,8 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ftrace_set_filter(func_name, strlen(func_name), 1); /* enable tracing */ - ret = trace->init(tr); - if (ret) { - warn_failed_init_tracer(trace, ret); - goto out; - } + tr->ctrl = 1; + trace->init(tr); /* Sleep for a 1/10 of a second */ msleep(100); @@ -143,13 +134,13 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, msleep(100); /* stop the tracing. */ - tracing_stop(); + tr->ctrl = 0; + trace->ctrl_update(tr); ftrace_enabled = 0; /* check the trace buffer */ ret = trace_test_buffer(tr, &count); trace->reset(tr); - tracing_start(); /* we should only have one item */ if (!ret && count != 1) { @@ -157,7 +148,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ret = -1; goto out; } - out: ftrace_enabled = save_ftrace_enabled; tracer_enabled = save_tracer_enabled; @@ -190,22 +180,18 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ftrace_enabled = 1; tracer_enabled = 1; - ret = trace->init(tr); - if (ret) { - warn_failed_init_tracer(trace, ret); - goto out; - } - + tr->ctrl = 1; + trace->init(tr); /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ - tracing_stop(); + tr->ctrl = 0; + trace->ctrl_update(tr); ftrace_enabled = 0; /* check the trace buffer */ ret = trace_test_buffer(tr, &count); trace->reset(tr); - tracing_start(); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); @@ -237,12 +223,8 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - ret = trace->init(tr); - if (ret) { - warn_failed_init_tracer(trace, ret); - return ret; - } - + tr->ctrl = 1; + trace->init(tr); /* reset the max latency */ tracing_max_latency = 0; /* disable interrupts for a bit */ @@ -250,13 +232,13 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) udelay(100); local_irq_enable(); /* stop the tracing. */ - tracing_stop(); + tr->ctrl = 0; + trace->ctrl_update(tr); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); if (!ret) ret = trace_test_buffer(&max_tr, &count); trace->reset(tr); - tracing_start(); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); @@ -277,26 +259,9 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) unsigned long count; int ret; - /* - * Now that the big kernel lock is no longer preemptable, - * and this is called with the BKL held, it will always - * fail. If preemption is already disabled, simply - * pass the test. When the BKL is removed, or becomes - * preemptible again, we will once again test this, - * so keep it in. - */ - if (preempt_count()) { - printk(KERN_CONT "can not test ... force "); - return 0; - } - /* start the tracing */ - ret = trace->init(tr); - if (ret) { - warn_failed_init_tracer(trace, ret); - return ret; - } - + tr->ctrl = 1; + trace->init(tr); /* reset the max latency */ tracing_max_latency = 0; /* disable preemption for a bit */ @@ -304,13 +269,13 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) udelay(100); preempt_enable(); /* stop the tracing. */ - tracing_stop(); + tr->ctrl = 0; + trace->ctrl_update(tr); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); if (!ret) ret = trace_test_buffer(&max_tr, &count); trace->reset(tr); - tracing_start(); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); @@ -331,25 +296,9 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * unsigned long count; int ret; - /* - * Now that the big kernel lock is no longer preemptable, - * and this is called with the BKL held, it will always - * fail. If preemption is already disabled, simply - * pass the test. When the BKL is removed, or becomes - * preemptible again, we will once again test this, - * so keep it in. - */ - if (preempt_count()) { - printk(KERN_CONT "can not test ... force "); - return 0; - } - /* start the tracing */ - ret = trace->init(tr); - if (ret) { - warn_failed_init_tracer(trace, ret); - goto out; - } + tr->ctrl = 1; + trace->init(tr); /* reset the max latency */ tracing_max_latency = 0; @@ -363,30 +312,27 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * local_irq_enable(); /* stop the tracing. */ - tracing_stop(); + tr->ctrl = 0; + trace->ctrl_update(tr); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); - if (ret) { - tracing_start(); + if (ret) goto out; - } ret = trace_test_buffer(&max_tr, &count); - if (ret) { - tracing_start(); + if (ret) goto out; - } if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); ret = -1; - tracing_start(); goto out; } /* do the test by disabling interrupts first this time */ tracing_max_latency = 0; - tracing_start(); + tr->ctrl = 1; + trace->ctrl_update(tr); preempt_disable(); local_irq_disable(); udelay(100); @@ -395,7 +341,8 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * local_irq_enable(); /* stop the tracing. */ - tracing_stop(); + tr->ctrl = 0; + trace->ctrl_update(tr); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); if (ret) @@ -411,7 +358,6 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * out: trace->reset(tr); - tracing_start(); tracing_max_latency = save_max; return ret; @@ -477,12 +423,8 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) wait_for_completion(&isrt); /* start the tracing */ - ret = trace->init(tr); - if (ret) { - warn_failed_init_tracer(trace, ret); - return ret; - } - + tr->ctrl = 1; + trace->init(tr); /* reset the max latency */ tracing_max_latency = 0; @@ -506,7 +448,8 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) msleep(100); /* stop the tracing. */ - tracing_stop(); + tr->ctrl = 0; + trace->ctrl_update(tr); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); if (!ret) @@ -514,7 +457,6 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) trace->reset(tr); - tracing_start(); tracing_max_latency = save_max; @@ -538,20 +480,16 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr int ret; /* start the tracing */ - ret = trace->init(tr); - if (ret) { - warn_failed_init_tracer(trace, ret); - return ret; - } - + tr->ctrl = 1; + trace->init(tr); /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ - tracing_stop(); + tr->ctrl = 0; + trace->ctrl_update(tr); /* check the trace buffer */ ret = trace_test_buffer(tr, &count); trace->reset(tr); - tracing_start(); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); @@ -570,48 +508,17 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - ret = trace->init(tr); - if (ret) { - warn_failed_init_tracer(trace, ret); - return 0; - } - + tr->ctrl = 1; + trace->init(tr); /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ - tracing_stop(); + tr->ctrl = 0; + trace->ctrl_update(tr); /* check the trace buffer */ ret = trace_test_buffer(tr, &count); trace->reset(tr); - tracing_start(); return ret; } #endif /* CONFIG_SYSPROF_TRACER */ - -#ifdef CONFIG_BRANCH_TRACER -int -trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) -{ - unsigned long count; - int ret; - - /* start the tracing */ - ret = trace->init(tr); - if (ret) { - warn_failed_init_tracer(trace, ret); - return ret; - } - - /* Sleep for a 1/10 of a second */ - msleep(100); - /* stop the tracing. */ - tracing_stop(); - /* check the trace buffer */ - ret = trace_test_buffer(tr, &count); - trace->reset(tr); - tracing_start(); - - return ret; -} -#endif /* CONFIG_BRANCH_TRACER */ diff --git a/trunk/kernel/trace/trace_stack.c b/trunk/kernel/trace/trace_stack.c index 0b863f2cbc8e..3bdb44bde4b7 100644 --- a/trunk/kernel/trace/trace_stack.c +++ b/trunk/kernel/trace/trace_stack.c @@ -48,7 +48,7 @@ static inline void check_stack(void) if (!object_is_on_stack(&this_size)) return; - local_irq_save(flags); + raw_local_irq_save(flags); __raw_spin_lock(&max_stack_lock); /* a race could have already updated it */ @@ -78,7 +78,6 @@ static inline void check_stack(void) * on a new max, so it is far from a fast path. */ while (i < max_stack_trace.nr_entries) { - int found = 0; stack_dump_index[i] = this_size; p = start; @@ -87,19 +86,17 @@ static inline void check_stack(void) if (*p == stack_dump_trace[i]) { this_size = stack_dump_index[i++] = (top - p) * sizeof(unsigned long); - found = 1; /* Start the search from here */ start = p + 1; } } - if (!found) - i++; + i++; } out: __raw_spin_unlock(&max_stack_lock); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void @@ -110,7 +107,8 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) if (unlikely(!ftrace_enabled || stack_trace_disabled)) return; - resched = ftrace_preempt_disable(); + resched = need_resched(); + preempt_disable_notrace(); cpu = raw_smp_processor_id(); /* no atomic needed, we only modify this variable by this cpu */ @@ -122,7 +120,10 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) out: per_cpu(trace_active, cpu)--; /* prevent recursion in schedule */ - ftrace_preempt_enable(resched); + if (resched) + preempt_enable_no_resched_notrace(); + else + preempt_enable_notrace(); } static struct ftrace_ops trace_ops __read_mostly = @@ -165,11 +166,11 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, if (ret < 0) return ret; - local_irq_save(flags); + raw_local_irq_save(flags); __raw_spin_lock(&max_stack_lock); *ptr = val; __raw_spin_unlock(&max_stack_lock); - local_irq_restore(flags); + raw_local_irq_restore(flags); return count; } diff --git a/trunk/kernel/trace/trace_sysprof.c b/trunk/kernel/trace/trace_sysprof.c index 54960edb96d0..9587d3bcba55 100644 --- a/trunk/kernel/trace/trace_sysprof.c +++ b/trunk/kernel/trace/trace_sysprof.c @@ -261,17 +261,27 @@ static void stop_stack_trace(struct trace_array *tr) mutex_unlock(&sample_timer_lock); } -static int stack_trace_init(struct trace_array *tr) +static void stack_trace_init(struct trace_array *tr) { sysprof_trace = tr; - start_stack_trace(tr); - return 0; + if (tr->ctrl) + start_stack_trace(tr); } static void stack_trace_reset(struct trace_array *tr) { - stop_stack_trace(tr); + if (tr->ctrl) + stop_stack_trace(tr); +} + +static void stack_trace_ctrl_update(struct trace_array *tr) +{ + /* When starting a new trace, reset the buffers */ + if (tr->ctrl) + start_stack_trace(tr); + else + stop_stack_trace(tr); } static struct tracer stack_trace __read_mostly = @@ -279,6 +289,7 @@ static struct tracer stack_trace __read_mostly = .name = "sysprof", .init = stack_trace_init, .reset = stack_trace_reset, + .ctrl_update = stack_trace_ctrl_update, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_sysprof, #endif diff --git a/trunk/kernel/tracepoint.c b/trunk/kernel/tracepoint.c index 79602740bbb5..af8c85664882 100644 --- a/trunk/kernel/tracepoint.c +++ b/trunk/kernel/tracepoint.c @@ -43,7 +43,6 @@ static DEFINE_MUTEX(tracepoints_mutex); */ #define TRACEPOINT_HASH_BITS 6 #define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) -static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; /* * Note about RCU : @@ -55,43 +54,40 @@ struct tracepoint_entry { struct hlist_node hlist; void **funcs; int refcount; /* Number of times armed. 0 if disarmed. */ + struct rcu_head rcu; + void *oldptr; + unsigned char rcu_pending:1; char name[0]; }; -struct tp_probes { - union { - struct rcu_head rcu; - struct list_head list; - } u; - void *probes[0]; -}; - -static inline void *allocate_probes(int count) -{ - struct tp_probes *p = kmalloc(count * sizeof(void *) - + sizeof(struct tp_probes), GFP_KERNEL); - return p == NULL ? NULL : p->probes; -} +static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; -static void rcu_free_old_probes(struct rcu_head *head) +static void free_old_closure(struct rcu_head *head) { - kfree(container_of(head, struct tp_probes, u.rcu)); + struct tracepoint_entry *entry = container_of(head, + struct tracepoint_entry, rcu); + kfree(entry->oldptr); + /* Make sure we free the data before setting the pending flag to 0 */ + smp_wmb(); + entry->rcu_pending = 0; } -static inline void release_probes(void *old) +static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old) { - if (old) { - struct tp_probes *tp_probes = container_of(old, - struct tp_probes, probes[0]); - call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes); - } + if (!old) + return; + entry->oldptr = old; + entry->rcu_pending = 1; + /* write rcu_pending before calling the RCU callback */ + smp_wmb(); + call_rcu_sched(&entry->rcu, free_old_closure); } static void debug_print_probes(struct tracepoint_entry *entry) { int i; - if (!tracepoint_debug || !entry->funcs) + if (!tracepoint_debug) return; for (i = 0; entry->funcs[i]; i++) @@ -115,13 +111,12 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe) return ERR_PTR(-EEXIST); } /* + 2 : one for new probe, one for NULL func */ - new = allocate_probes(nr_probes + 2); + new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL); if (new == NULL) return ERR_PTR(-ENOMEM); if (old) memcpy(new, old, nr_probes * sizeof(void *)); new[nr_probes] = probe; - new[nr_probes + 1] = NULL; entry->refcount = nr_probes + 1; entry->funcs = new; debug_print_probes(entry); @@ -137,7 +132,7 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe) old = entry->funcs; if (!old) - return ERR_PTR(-ENOENT); + return NULL; debug_print_probes(entry); /* (N -> M), (N > 1, M >= 0) probes */ @@ -156,13 +151,13 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe) int j = 0; /* N -> M, (N > 1, M > 0) */ /* + 1 for NULL */ - new = allocate_probes(nr_probes - nr_del + 1); + new = kzalloc((nr_probes - nr_del + 1) + * sizeof(void *), GFP_KERNEL); if (new == NULL) return ERR_PTR(-ENOMEM); for (i = 0; old[i]; i++) if ((probe && old[i] != probe)) new[j++] = old[i]; - new[nr_probes - nr_del] = NULL; entry->refcount = nr_probes - nr_del; entry->funcs = new; } @@ -220,6 +215,7 @@ static struct tracepoint_entry *add_tracepoint(const char *name) memcpy(&e->name[0], name, name_len); e->funcs = NULL; e->refcount = 0; + e->rcu_pending = 0; hlist_add_head(&e->hlist, head); return e; } @@ -228,10 +224,32 @@ static struct tracepoint_entry *add_tracepoint(const char *name) * Remove the tracepoint from the tracepoint hash table. Must be called with * mutex_lock held. */ -static inline void remove_tracepoint(struct tracepoint_entry *e) +static int remove_tracepoint(const char *name) { + struct hlist_head *head; + struct hlist_node *node; + struct tracepoint_entry *e; + int found = 0; + size_t len = strlen(name) + 1; + u32 hash = jhash(name, len-1, 0); + + head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; + hlist_for_each_entry(e, node, head, hlist) { + if (!strcmp(name, e->name)) { + found = 1; + break; + } + } + if (!found) + return -ENOENT; + if (e->refcount) + return -EBUSY; hlist_del(&e->hlist); + /* Make sure the call_rcu_sched has been executed */ + if (e->rcu_pending) + rcu_barrier_sched(); kfree(e); + return 0; } /* @@ -262,7 +280,6 @@ static void set_tracepoint(struct tracepoint_entry **entry, static void disable_tracepoint(struct tracepoint *elem) { elem->state = 0; - rcu_assign_pointer(elem->funcs, NULL); } /** @@ -303,23 +320,6 @@ static void tracepoint_update_probes(void) module_update_tracepoints(); } -static void *tracepoint_add_probe(const char *name, void *probe) -{ - struct tracepoint_entry *entry; - void *old; - - entry = get_tracepoint(name); - if (!entry) { - entry = add_tracepoint(name); - if (IS_ERR(entry)) - return entry; - } - old = tracepoint_entry_add_probe(entry, probe); - if (IS_ERR(old) && !entry->refcount) - remove_tracepoint(entry); - return old; -} - /** * tracepoint_probe_register - Connect a probe to a tracepoint * @name: tracepoint name @@ -330,35 +330,43 @@ static void *tracepoint_add_probe(const char *name, void *probe) */ int tracepoint_probe_register(const char *name, void *probe) { + struct tracepoint_entry *entry; + int ret = 0; void *old; mutex_lock(&tracepoints_mutex); - old = tracepoint_add_probe(name, probe); + entry = get_tracepoint(name); + if (!entry) { + entry = add_tracepoint(name); + if (IS_ERR(entry)) { + ret = PTR_ERR(entry); + goto end; + } + } + /* + * If we detect that a call_rcu_sched is pending for this tracepoint, + * make sure it's executed now. + */ + if (entry->rcu_pending) + rcu_barrier_sched(); + old = tracepoint_entry_add_probe(entry, probe); + if (IS_ERR(old)) { + ret = PTR_ERR(old); + goto end; + } mutex_unlock(&tracepoints_mutex); - if (IS_ERR(old)) - return PTR_ERR(old); - tracepoint_update_probes(); /* may update entry */ - release_probes(old); - return 0; -} -EXPORT_SYMBOL_GPL(tracepoint_probe_register); - -static void *tracepoint_remove_probe(const char *name, void *probe) -{ - struct tracepoint_entry *entry; - void *old; - + mutex_lock(&tracepoints_mutex); entry = get_tracepoint(name); - if (!entry) - return ERR_PTR(-ENOENT); - old = tracepoint_entry_remove_probe(entry, probe); - if (IS_ERR(old)) - return old; - if (!entry->refcount) - remove_tracepoint(entry); - return old; + WARN_ON(!entry); + if (entry->rcu_pending) + rcu_barrier_sched(); + tracepoint_entry_free_old(entry, old); +end: + mutex_unlock(&tracepoints_mutex); + return ret; } +EXPORT_SYMBOL_GPL(tracepoint_probe_register); /** * tracepoint_probe_unregister - Disconnect a probe from a tracepoint @@ -372,104 +380,38 @@ static void *tracepoint_remove_probe(const char *name, void *probe) */ int tracepoint_probe_unregister(const char *name, void *probe) { + struct tracepoint_entry *entry; void *old; + int ret = -ENOENT; mutex_lock(&tracepoints_mutex); - old = tracepoint_remove_probe(name, probe); - mutex_unlock(&tracepoints_mutex); - if (IS_ERR(old)) - return PTR_ERR(old); - - tracepoint_update_probes(); /* may update entry */ - release_probes(old); - return 0; -} -EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); - -static LIST_HEAD(old_probes); -static int need_update; - -static void tracepoint_add_old_probes(void *old) -{ - need_update = 1; - if (old) { - struct tp_probes *tp_probes = container_of(old, - struct tp_probes, probes[0]); - list_add(&tp_probes->u.list, &old_probes); - } -} - -/** - * tracepoint_probe_register_noupdate - register a probe but not connect - * @name: tracepoint name - * @probe: probe handler - * - * caller must call tracepoint_probe_update_all() - */ -int tracepoint_probe_register_noupdate(const char *name, void *probe) -{ - void *old; - - mutex_lock(&tracepoints_mutex); - old = tracepoint_add_probe(name, probe); - if (IS_ERR(old)) { - mutex_unlock(&tracepoints_mutex); - return PTR_ERR(old); - } - tracepoint_add_old_probes(old); - mutex_unlock(&tracepoints_mutex); - return 0; -} -EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate); - -/** - * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect - * @name: tracepoint name - * @probe: probe function pointer - * - * caller must call tracepoint_probe_update_all() - */ -int tracepoint_probe_unregister_noupdate(const char *name, void *probe) -{ - void *old; - - mutex_lock(&tracepoints_mutex); - old = tracepoint_remove_probe(name, probe); - if (IS_ERR(old)) { - mutex_unlock(&tracepoints_mutex); - return PTR_ERR(old); + entry = get_tracepoint(name); + if (!entry) + goto end; + if (entry->rcu_pending) + rcu_barrier_sched(); + old = tracepoint_entry_remove_probe(entry, probe); + if (!old) { + printk(KERN_WARNING "Warning: Trying to unregister a probe" + "that doesn't exist\n"); + goto end; } - tracepoint_add_old_probes(old); mutex_unlock(&tracepoints_mutex); - return 0; -} -EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate); - -/** - * tracepoint_probe_update_all - update tracepoints - */ -void tracepoint_probe_update_all(void) -{ - LIST_HEAD(release_probes); - struct tp_probes *pos, *next; - + tracepoint_update_probes(); /* may update entry */ mutex_lock(&tracepoints_mutex); - if (!need_update) { - mutex_unlock(&tracepoints_mutex); - return; - } - if (!list_empty(&old_probes)) - list_replace_init(&old_probes, &release_probes); - need_update = 0; + entry = get_tracepoint(name); + if (!entry) + goto end; + if (entry->rcu_pending) + rcu_barrier_sched(); + tracepoint_entry_free_old(entry, old); + remove_tracepoint(name); /* Ignore busy error message */ + ret = 0; +end: mutex_unlock(&tracepoints_mutex); - - tracepoint_update_probes(); - list_for_each_entry_safe(pos, next, &release_probes, u.list) { - list_del(&pos->u.list); - call_rcu_sched(&pos->u.rcu, rcu_free_old_probes); - } + return ret; } -EXPORT_SYMBOL_GPL(tracepoint_probe_update_all); +EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); /** * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. @@ -541,36 +483,3 @@ void tracepoint_iter_reset(struct tracepoint_iter *iter) iter->tracepoint = NULL; } EXPORT_SYMBOL_GPL(tracepoint_iter_reset); - -#ifdef CONFIG_MODULES - -int tracepoint_module_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct module *mod = data; - - switch (val) { - case MODULE_STATE_COMING: - tracepoint_update_probe_range(mod->tracepoints, - mod->tracepoints + mod->num_tracepoints); - break; - case MODULE_STATE_GOING: - tracepoint_update_probe_range(mod->tracepoints, - mod->tracepoints + mod->num_tracepoints); - break; - } - return 0; -} - -struct notifier_block tracepoint_module_nb = { - .notifier_call = tracepoint_module_notify, - .priority = 0, -}; - -static int init_tracepoints(void) -{ - return register_module_notifier(&tracepoint_module_nb); -} -__initcall(init_tracepoints); - -#endif /* CONFIG_MODULES */ diff --git a/trunk/kernel/user.c b/trunk/kernel/user.c index cec2224bc9f5..39d6159fae43 100644 --- a/trunk/kernel/user.c +++ b/trunk/kernel/user.c @@ -101,8 +101,6 @@ static int sched_create_user(struct user_struct *up) if (IS_ERR(up->tg)) rc = -ENOMEM; - set_tg_uid(up); - return rc; } diff --git a/trunk/lib/Kconfig.debug b/trunk/lib/Kconfig.debug index 1e3fd3e3436a..b0f239e443bc 100644 --- a/trunk/lib/Kconfig.debug +++ b/trunk/lib/Kconfig.debug @@ -545,16 +545,6 @@ config DEBUG_SG If unsure, say N. -config DEBUG_NOTIFIERS - bool "Debug notifier call chains" - depends on DEBUG_KERNEL - help - Enable this to turn on sanity checking for notifier call chains. - This is most useful for kernel developers to make sure that - modules properly unregister themselves from notifier chains. - This is a relatively cheap check but if you care about maximum - performance, say N. - config FRAME_POINTER bool "Compile the kernel with frame pointers" depends on DEBUG_KERNEL && \ diff --git a/trunk/lib/cpumask.c b/trunk/lib/cpumask.c index 8d03f22c6ced..3f258f58c85b 100644 --- a/trunk/lib/cpumask.c +++ b/trunk/lib/cpumask.c @@ -76,15 +76,14 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) /* These are not inline because of header tangles. */ #ifdef CONFIG_CPUMASK_OFFSTACK -bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { if (likely(slab_is_available())) - *mask = kmalloc(cpumask_size(), flags); + *mask = kmalloc_node(cpumask_size(), flags, node); else { #ifdef CONFIG_DEBUG_PER_CPU_MAPS printk(KERN_ERR "=> alloc_cpumask_var: kmalloc not available!\n"); - dump_stack(); #endif *mask = NULL; } @@ -96,6 +95,12 @@ bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) #endif return *mask != NULL; } +EXPORT_SYMBOL(alloc_cpumask_var_node); + +bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + return alloc_cpumask_var_node(mask, flags, numa_node_id()); +} EXPORT_SYMBOL(alloc_cpumask_var); void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask) diff --git a/trunk/lib/dynamic_printk.c b/trunk/lib/dynamic_printk.c index 8e30295e8566..d83660fd6fdd 100644 --- a/trunk/lib/dynamic_printk.c +++ b/trunk/lib/dynamic_printk.c @@ -135,7 +135,7 @@ int unregister_dynamic_debug_module(char *mod_name) nr_entries--; out: up(&debug_list_mutex); - return ret; + return 0; } EXPORT_SYMBOL_GPL(unregister_dynamic_debug_module); @@ -289,7 +289,7 @@ static ssize_t pr_debug_write(struct file *file, const char __user *buf, dynamic_enabled = DYNAMIC_ENABLED_SOME; err = 0; printk(KERN_DEBUG - "debugging enabled for module %s\n", + "debugging enabled for module %s", elem->name); } else if (!value && (elem->enable == 1)) { elem->enable = 0; @@ -309,7 +309,7 @@ static ssize_t pr_debug_write(struct file *file, const char __user *buf, err = 0; printk(KERN_DEBUG "debugging disabled for module " - "%s\n", elem->name); + "%s", elem->name); } } } diff --git a/trunk/mm/bounce.c b/trunk/mm/bounce.c index bf0cf7c8387b..06722c403058 100644 --- a/trunk/mm/bounce.c +++ b/trunk/mm/bounce.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #define POOL_SIZE 64 @@ -22,8 +21,6 @@ static mempool_t *page_pool, *isa_page_pool; -DEFINE_TRACE(block_bio_bounce); - #ifdef CONFIG_HIGHMEM static __init int init_emergency_pool(void) { @@ -225,7 +222,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, if (!bio) return; - trace_block_bio_bounce(q, *bio_orig); + blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE); /* * at least one page was bounced, fill in possible non-highmem diff --git a/trunk/mm/memory.c b/trunk/mm/memory.c index fc031d68327e..164951c47305 100644 --- a/trunk/mm/memory.c +++ b/trunk/mm/memory.c @@ -3049,18 +3049,3 @@ void print_vma_addr(char *prefix, unsigned long ip) } up_read(¤t->mm->mmap_sem); } - -#ifdef CONFIG_PROVE_LOCKING -void might_fault(void) -{ - might_sleep(); - /* - * it would be nicer only to annotate paths which are not under - * pagefault_disable, however that requires a larger audit and - * providing helpers like get_user_atomic. - */ - if (!in_atomic() && current->mm) - might_lock_read(¤t->mm->mmap_sem); -} -EXPORT_SYMBOL(might_fault); -#endif diff --git a/trunk/mm/migrate.c b/trunk/mm/migrate.c index 037b0967c1e3..d8f07667fc80 100644 --- a/trunk/mm/migrate.c +++ b/trunk/mm/migrate.c @@ -998,7 +998,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, unsigned long addr = (unsigned long)(*pages); struct vm_area_struct *vma; struct page *page; - int err = -EFAULT; + int err; vma = find_vma(mm, addr); if (!vma) diff --git a/trunk/mm/slob.c b/trunk/mm/slob.c index bf7e8fc3aed8..cb675d126791 100644 --- a/trunk/mm/slob.c +++ b/trunk/mm/slob.c @@ -535,7 +535,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, struct kmem_cache *c; c = slob_alloc(sizeof(struct kmem_cache), - GFP_KERNEL, ARCH_KMALLOC_MINALIGN, -1); + flags, ARCH_KMALLOC_MINALIGN, -1); if (c) { c->name = name; diff --git a/trunk/net/core/netpoll.c b/trunk/net/core/netpoll.c index dadac6281f20..6c7af390be0a 100644 --- a/trunk/net/core/netpoll.c +++ b/trunk/net/core/netpoll.c @@ -133,11 +133,9 @@ static int poll_one_napi(struct netpoll_info *npinfo, npinfo->rx_flags |= NETPOLL_RX_DROP; atomic_inc(&trapped); - set_bit(NAPI_STATE_NPSVC, &napi->state); work = napi->poll(napi, budget); - clear_bit(NAPI_STATE_NPSVC, &napi->state); atomic_dec(&trapped); npinfo->rx_flags &= ~NETPOLL_RX_DROP; diff --git a/trunk/net/ipv4/netfilter/nf_nat_rule.c b/trunk/net/ipv4/netfilter/nf_nat_rule.c index 8d489e746b21..bea54a685109 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_rule.c +++ b/trunk/net/ipv4/netfilter/nf_nat_rule.c @@ -61,7 +61,7 @@ static struct static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(nat_table.lock), + .lock = __RW_LOCK_UNLOCKED(__nat_table.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/trunk/net/ipv4/tcp_vegas.c b/trunk/net/ipv4/tcp_vegas.c index a453aac91bd3..7cd22262de3a 100644 --- a/trunk/net/ipv4/tcp_vegas.c +++ b/trunk/net/ipv4/tcp_vegas.c @@ -40,14 +40,18 @@ #include "tcp_vegas.h" -static int alpha = 2; -static int beta = 4; -static int gamma = 1; +/* Default values of the Vegas variables, in fixed-point representation + * with V_PARAM_SHIFT bits to the right of the binary point. + */ +#define V_PARAM_SHIFT 1 +static int alpha = 2<beg_snd_nxt)) { /* Do the Vegas once-per-RTT cwnd adjustment. */ + u32 old_wnd, old_snd_cwnd; + + + /* Here old_wnd is essentially the window of data that was + * sent during the previous RTT, and has all + * been acknowledged in the course of the RTT that ended + * with the ACK we just received. Likewise, old_snd_cwnd + * is the cwnd during the previous RTT. + */ + old_wnd = (vegas->beg_snd_nxt - vegas->beg_snd_una) / + tp->mss_cache; + old_snd_cwnd = vegas->beg_snd_cwnd; /* Save the extent of the current window so we can use this * at the end of the next RTT. */ + vegas->beg_snd_una = vegas->beg_snd_nxt; vegas->beg_snd_nxt = tp->snd_nxt; + vegas->beg_snd_cwnd = tp->snd_cwnd; /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got @@ -212,14 +252,22 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * * This is: * (actual rate in segments) * baseRTT + * We keep it as a fixed point number with + * V_PARAM_SHIFT bits to the right of the binary point. */ - target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt; + target_cwnd = ((u64)old_wnd * vegas->baseRTT); + target_cwnd <<= V_PARAM_SHIFT; + do_div(target_cwnd, rtt); /* Calculate the difference between the window we had, * and the window we would like to have. This quantity * is the "Diff" from the Arizona Vegas papers. + * + * Again, this is a fixed point number with + * V_PARAM_SHIFT bits to the right of the binary + * point. */ - diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; + diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd; if (diff > gamma && tp->snd_ssthresh > 2 ) { /* Going too fast. Time to slow down @@ -234,13 +282,16 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * truncation robs us of full link * utilization. */ - tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); + tp->snd_cwnd = min(tp->snd_cwnd, + ((u32)target_cwnd >> + V_PARAM_SHIFT)+1); } else if (tp->snd_cwnd <= tp->snd_ssthresh) { /* Slow start. */ tcp_slow_start(tp); } else { /* Congestion avoidance. */ + u32 next_snd_cwnd; /* Figure out where we would like cwnd * to be. @@ -249,17 +300,26 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) /* The old window was too fast, so * we slow down. */ - tp->snd_cwnd--; + next_snd_cwnd = old_snd_cwnd - 1; } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. */ - tp->snd_cwnd++; + next_snd_cwnd = old_snd_cwnd + 1; } else { /* Sending just as fast as we * should be. */ + next_snd_cwnd = old_snd_cwnd; } + + /* Adjust cwnd upward or downward, toward the + * desired value. + */ + if (next_snd_cwnd > tp->snd_cwnd) + tp->snd_cwnd++; + else if (next_snd_cwnd < tp->snd_cwnd) + tp->snd_cwnd--; } if (tp->snd_cwnd < 2) diff --git a/trunk/net/ipv6/ndisc.c b/trunk/net/ipv6/ndisc.c index d0f54d18e19b..172438320eec 100644 --- a/trunk/net/ipv6/ndisc.c +++ b/trunk/net/ipv6/ndisc.c @@ -912,13 +912,8 @@ static void ndisc_recv_na(struct sk_buff *skb) is invalid, but ndisc specs say nothing about it. It could be misconfiguration, or an smart proxy agent tries to help us :-) - - We should not print the error if NA has been - received from loopback - it is just our own - unsolicited advertisement. */ - if (skb->pkt_type != PACKET_LOOPBACK) - ND_PRINTK1(KERN_WARNING + ND_PRINTK1(KERN_WARNING "ICMPv6 NA: someone advertises our address on %s!\n", ifp->idev->dev->name); in6_ifa_put(ifp); diff --git a/trunk/net/netlabel/netlabel_unlabeled.c b/trunk/net/netlabel/netlabel_unlabeled.c index 8c0308032178..90c8506a0aac 100644 --- a/trunk/net/netlabel/netlabel_unlabeled.c +++ b/trunk/net/netlabel/netlabel_unlabeled.c @@ -562,6 +562,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, const struct in_addr *mask, struct netlbl_audit *audit_info) { + int ret_val = 0; struct netlbl_af4list *list_entry; struct netlbl_unlhsh_addr4 *entry; struct audit_buffer *audit_buf; @@ -576,7 +577,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, if (list_entry != NULL) entry = netlbl_unlhsh_addr4_entry(list_entry); else - entry = NULL; + ret_val = -ENOENT; audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); @@ -587,21 +588,19 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, addr->s_addr, mask->s_addr); if (dev != NULL) dev_put(dev); - if (entry != NULL && - security_secid_to_secctx(entry->secid, - &secctx, &secctx_len) == 0) { + if (entry && security_secid_to_secctx(entry->secid, + &secctx, + &secctx_len) == 0) { audit_log_format(audit_buf, " sec_obj=%s", secctx); security_release_secctx(secctx, secctx_len); } - audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0); + audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); audit_log_end(audit_buf); } - if (entry == NULL) - return -ENOENT; - - call_rcu(&entry->rcu, netlbl_unlhsh_free_addr4); - return 0; + if (ret_val == 0) + call_rcu(&entry->rcu, netlbl_unlhsh_free_addr4); + return ret_val; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -625,6 +624,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, const struct in6_addr *mask, struct netlbl_audit *audit_info) { + int ret_val = 0; struct netlbl_af6list *list_entry; struct netlbl_unlhsh_addr6 *entry; struct audit_buffer *audit_buf; @@ -638,7 +638,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, if (list_entry != NULL) entry = netlbl_unlhsh_addr6_entry(list_entry); else - entry = NULL; + ret_val = -ENOENT; audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); @@ -649,21 +649,19 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, addr, mask); if (dev != NULL) dev_put(dev); - if (entry != NULL && - security_secid_to_secctx(entry->secid, - &secctx, &secctx_len) == 0) { + if (entry && security_secid_to_secctx(entry->secid, + &secctx, + &secctx_len) == 0) { audit_log_format(audit_buf, " sec_obj=%s", secctx); security_release_secctx(secctx, secctx_len); } - audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0); + audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); audit_log_end(audit_buf); } - if (entry == NULL) - return -ENOENT; - - call_rcu(&entry->rcu, netlbl_unlhsh_free_addr6); - return 0; + if (ret_val == 0) + call_rcu(&entry->rcu, netlbl_unlhsh_free_addr6); + return ret_val; } #endif /* IPv6 */ diff --git a/trunk/net/phonet/pep-gprs.c b/trunk/net/phonet/pep-gprs.c index 803eeef0aa85..9978afbd9f2a 100644 --- a/trunk/net/phonet/pep-gprs.c +++ b/trunk/net/phonet/pep-gprs.c @@ -155,13 +155,12 @@ static void gprs_data_ready(struct sock *sk, int len) static void gprs_write_space(struct sock *sk) { struct gprs_dev *dev = sk->sk_user_data; - struct net_device *net = dev->net; unsigned credits = pep_writeable(sk); spin_lock_bh(&dev->tx_lock); dev->tx_max = credits; - if (credits > skb_queue_len(&dev->tx_queue) && netif_running(net)) - netif_wake_queue(net); + if (credits > skb_queue_len(&dev->tx_queue)) + netif_wake_queue(dev->net); spin_unlock_bh(&dev->tx_lock); } @@ -169,23 +168,6 @@ static void gprs_write_space(struct sock *sk) * Network device callbacks */ -static int gprs_open(struct net_device *dev) -{ - struct gprs_dev *gp = netdev_priv(dev); - - gprs_write_space(gp->sk); - return 0; -} - -static int gprs_close(struct net_device *dev) -{ - struct gprs_dev *gp = netdev_priv(dev); - - netif_stop_queue(dev); - flush_work(&gp->tx_work); - return 0; -} - static int gprs_xmit(struct sk_buff *skb, struct net_device *net) { struct gprs_dev *dev = netdev_priv(net); @@ -272,8 +254,6 @@ static void gprs_setup(struct net_device *net) net->tx_queue_len = 10; net->destructor = free_netdev; - net->open = gprs_open; - net->stop = gprs_close; net->hard_start_xmit = gprs_xmit; /* mandatory */ net->change_mtu = gprs_set_mtu; net->get_stats = gprs_get_stats; @@ -338,6 +318,7 @@ int gprs_attach(struct sock *sk) dev->sk = sk; printk(KERN_DEBUG"%s: attached\n", net->name); + gprs_write_space(sk); /* kick off TX */ return net->ifindex; out_rel: @@ -360,5 +341,7 @@ void gprs_detach(struct sock *sk) printk(KERN_DEBUG"%s: detached\n", net->name); unregister_netdev(net); + flush_scheduled_work(); sock_put(sk); + skb_queue_purge(&dev->tx_queue); } diff --git a/trunk/net/sched/sch_netem.c b/trunk/net/sched/sch_netem.c index 98402f0efa47..a11959908d9a 100644 --- a/trunk/net/sched/sch_netem.c +++ b/trunk/net/sched/sch_netem.c @@ -46,6 +46,9 @@ layering other disciplines. It does not need to do bandwidth control either since that can be handled by using token bucket or other rate control. + + The simulator is limited by the Linux timer resolution + and will create packet bursts on the HZ boundary (1ms). */ struct netem_sched_data { diff --git a/trunk/samples/tracepoints/tp-samples-trace.h b/trunk/samples/tracepoints/tp-samples-trace.h index 01724e04c556..0216b55bd640 100644 --- a/trunk/samples/tracepoints/tp-samples-trace.h +++ b/trunk/samples/tracepoints/tp-samples-trace.h @@ -4,10 +4,10 @@ #include /* for struct inode and struct file */ #include -DECLARE_TRACE(subsys_event, +DEFINE_TRACE(subsys_event, TPPROTO(struct inode *inode, struct file *file), TPARGS(inode, file)); -DECLARE_TRACE(subsys_eventb, +DEFINE_TRACE(subsys_eventb, TPPROTO(void), TPARGS()); #endif diff --git a/trunk/samples/tracepoints/tracepoint-probe-sample.c b/trunk/samples/tracepoints/tracepoint-probe-sample.c index e3a964889dc7..55abfdda4bd4 100644 --- a/trunk/samples/tracepoints/tracepoint-probe-sample.c +++ b/trunk/samples/tracepoints/tracepoint-probe-sample.c @@ -46,7 +46,6 @@ void __exit tp_sample_trace_exit(void) { unregister_trace_subsys_eventb(probe_subsys_eventb); unregister_trace_subsys_event(probe_subsys_event); - tracepoint_synchronize_unregister(); } module_exit(tp_sample_trace_exit); diff --git a/trunk/samples/tracepoints/tracepoint-probe-sample2.c b/trunk/samples/tracepoints/tracepoint-probe-sample2.c index 685a5acb4562..5e9fcf4afffe 100644 --- a/trunk/samples/tracepoints/tracepoint-probe-sample2.c +++ b/trunk/samples/tracepoints/tracepoint-probe-sample2.c @@ -33,7 +33,6 @@ module_init(tp_sample_trace_init); void __exit tp_sample_trace_exit(void) { unregister_trace_subsys_event(probe_subsys_event); - tracepoint_synchronize_unregister(); } module_exit(tp_sample_trace_exit); diff --git a/trunk/samples/tracepoints/tracepoint-sample.c b/trunk/samples/tracepoints/tracepoint-sample.c index 00d169792a3e..4ae4b7fcc043 100644 --- a/trunk/samples/tracepoints/tracepoint-sample.c +++ b/trunk/samples/tracepoints/tracepoint-sample.c @@ -13,9 +13,6 @@ #include #include "tp-samples-trace.h" -DEFINE_TRACE(subsys_event); -DEFINE_TRACE(subsys_eventb); - struct proc_dir_entry *pentry_example; static int my_open(struct inode *inode, struct file *file) diff --git a/trunk/scripts/Makefile.build b/trunk/scripts/Makefile.build index 7a176773af85..468fbc9016c7 100644 --- a/trunk/scripts/Makefile.build +++ b/trunk/scripts/Makefile.build @@ -198,10 +198,16 @@ cmd_modversions = \ fi; endif +ifdef CONFIG_64BIT +arch_bits = 64 +else +arch_bits = 32 +endif + ifdef CONFIG_FTRACE_MCOUNT_RECORD -cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ - "$(if $(CONFIG_64BIT),64,32)" \ - "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)"; +cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl \ + "$(ARCH)" "$(arch_bits)" "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" \ + "$(NM)" "$(RM)" "$(MV)" "$(@)"; endif define rule_cc_o_c diff --git a/trunk/scripts/bootgraph.pl b/trunk/scripts/bootgraph.pl index f0af9aa9b243..d2c61efc216f 100644 --- a/trunk/scripts/bootgraph.pl +++ b/trunk/scripts/bootgraph.pl @@ -78,13 +78,11 @@ } if ($count == 0) { - print STDERR < output.svg -END - exit 1; + print "No data found in the dmesg. Make sure that 'printk.time=1' and\n"; + print "'initcall_debug' are passed on the kernel command line.\n\n"; + print "Usage: \n"; + print " dmesg | perl scripts/bootgraph.pl > output.svg\n\n"; + exit; } print " \n"; @@ -111,8 +109,8 @@ END my %rows; my $rowscount = 1; my @initcalls = sort { $start{$a} <=> $start{$b} } keys(%start); - -foreach my $key (@initcalls) { +my $key; +foreach $key (@initcalls) { my $duration = $end{$key} - $start{$key}; if ($duration >= $threshold) { diff --git a/trunk/scripts/recordmcount.pl b/trunk/scripts/recordmcount.pl index 0b1dc9f9bb06..6b9fe3eb8360 100755 --- a/trunk/scripts/recordmcount.pl +++ b/trunk/scripts/recordmcount.pl @@ -112,8 +112,6 @@ # Acceptable sections to record. my %text_sections = ( ".text" => 1, - ".sched.text" => 1, - ".spinlock.text" => 1, ); $objdump = "objdump" if ((length $objdump) == 0); @@ -132,13 +130,10 @@ my %convert; # List of local functions used that needs conversion my $type; -my $nm_regex; # Find the local functions (return function) my $section_regex; # Find the start of a section my $function_regex; # Find the name of a function # (return offset and func name) my $mcount_regex; # Find the call site to mcount (return offset) -my $alignment; # The .align value to use for $mcount_section -my $section_type; # Section header plus possible alignment command if ($arch eq "x86") { if ($bits == 64) { @@ -148,21 +143,11 @@ } } -# -# We base the defaults off of i386, the other archs may -# feel free to change them in the below if statements. -# -$nm_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\S+)"; -$section_regex = "Disassembly of section\\s+(\\S+):"; -$function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:"; -$mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$"; -$section_type = '@progbits'; -$type = ".long"; - if ($arch eq "x86_64") { + $section_regex = "Disassembly of section\\s+(\\S+):"; + $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:"; $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$"; $type = ".quad"; - $alignment = 8; # force flags for this arch $ld .= " -m elf_x86_64"; @@ -171,7 +156,10 @@ $cc .= " -m64"; } elsif ($arch eq "i386") { - $alignment = 4; + $section_regex = "Disassembly of section\\s+(\\S+):"; + $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:"; + $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$"; + $type = ".long"; # force flags for this arch $ld .= " -m elf_i386"; @@ -179,27 +167,6 @@ $objcopy .= " -O elf32-i386"; $cc .= " -m32"; -} elsif ($arch eq "sh") { - $alignment = 2; - - # force flags for this arch - $ld .= " -m shlelf_linux"; - $objcopy .= " -O elf32-sh-linux"; - $cc .= " -m32"; - -} elsif ($arch eq "powerpc") { - $nm_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)"; - $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?.*?)>:"; - $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s\\.?_mcount\$"; - - if ($bits == 64) { - $type = ".quad"; - } - -} elsif ($arch eq "arm") { - $alignment = 2; - $section_type = '%progbits'; - } else { die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD"; } @@ -269,7 +236,7 @@ # open (IN, "$nm $inputfile|") || die "error running $nm"; while () { - if (/$nm_regex/) { + if (/^[0-9a-fA-F]+\s+t\s+(\S+)/) { $locals{$1} = 1; } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) { $weak{$2} = $1; @@ -320,8 +287,7 @@ sub update_funcs if (!$opened) { open(FILE, ">$mcount_s") || die "can't create $mcount_s\n"; $opened = 1; - print FILE "\t.section $mcount_section,\"a\",$section_type\n"; - print FILE "\t.align $alignment\n" if (defined($alignment)); + print FILE "\t.section $mcount_section,\"a\",\@progbits\n"; } printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset; } diff --git a/trunk/scripts/trace/power.pl b/trunk/scripts/trace/power.pl deleted file mode 100644 index 4f729b3501e0..000000000000 --- a/trunk/scripts/trace/power.pl +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/perl - -# Copyright 2008, Intel Corporation -# -# This file is part of the Linux kernel -# -# This program file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program in a file named COPYING; if not, write to the -# Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, -# Boston, MA 02110-1301 USA -# -# Authors: -# Arjan van de Ven - - -# -# This script turns a cstate ftrace output into a SVG graphic that shows -# historic C-state information -# -# -# cat /sys/kernel/debug/tracing/trace | perl power.pl > out.svg -# - -my @styles; -my $base = 0; - -my @pstate_last; -my @pstate_level; - -$styles[0] = "fill:rgb(0,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; -$styles[1] = "fill:rgb(0,255,0);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; -$styles[2] = "fill:rgb(255,0,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; -$styles[3] = "fill:rgb(255,255,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; -$styles[4] = "fill:rgb(255,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; -$styles[5] = "fill:rgb(0,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; -$styles[6] = "fill:rgb(0,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; -$styles[7] = "fill:rgb(0,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; -$styles[8] = "fill:rgb(0,25,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; - - -print " \n"; -print "\n"; - -my $scale = 30000.0; -while (<>) { - my $line = $_; - if ($line =~ /([0-9\.]+)\] CSTATE: Going to C([0-9]) on cpu ([0-9]+) for ([0-9\.]+)/) { - if ($base == 0) { - $base = $1; - } - my $time = $1 - $base; - $time = $time * $scale; - my $C = $2; - my $cpu = $3; - my $y = 400 * $cpu; - my $duration = $4 * $scale; - my $msec = int($4 * 100000)/100.0; - my $height = $C * 20; - $style = $styles[$C]; - - $y = $y + 140 - $height; - - $x2 = $time + 4; - $y2 = $y + 4; - - - print "\n"; - print "C$C $msec\n"; - } - if ($line =~ /([0-9\.]+)\] PSTATE: Going to P([0-9]) on cpu ([0-9]+)/) { - my $time = $1 - $base; - my $state = $2; - my $cpu = $3; - - if (defined($pstate_last[$cpu])) { - my $from = $pstate_last[$cpu]; - my $oldstate = $pstate_state[$cpu]; - my $duration = ($time-$from) * $scale; - - $from = $from * $scale; - my $to = $from + $duration; - my $height = 140 - ($oldstate * (140/8)); - - my $y = 400 * $cpu + 200 + $height; - my $y2 = $y+4; - my $style = $styles[8]; - - print "\n"; - print "P$oldstate (cpu $cpu)\n"; - }; - - $pstate_last[$cpu] = $time; - $pstate_state[$cpu] = $state; - } -} - - -print "\n"; diff --git a/trunk/scripts/tracing/draw_functrace.py b/trunk/scripts/tracing/draw_functrace.py deleted file mode 100644 index 902f9a992620..000000000000 --- a/trunk/scripts/tracing/draw_functrace.py +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/python - -""" -Copyright 2008 (c) Frederic Weisbecker -Licensed under the terms of the GNU GPL License version 2 - -This script parses a trace provided by the function tracer in -kernel/trace/trace_functions.c -The resulted trace is processed into a tree to produce a more human -view of the call stack by drawing textual but hierarchical tree of -calls. Only the functions's names and the the call time are provided. - -Usage: - Be sure that you have CONFIG_FUNCTION_TRACER - # mkdir /debugfs - # mount -t debug debug /debug - # echo function > /debug/tracing/current_tracer - $ cat /debug/tracing/trace_pipe > ~/raw_trace_func - Wait some times but not too much, the script is a bit slow. - Break the pipe (Ctrl + Z) - $ scripts/draw_functrace.py < raw_trace_func > draw_functrace - Then you have your drawn trace in draw_functrace -""" - - -import sys, re - -class CallTree: - """ This class provides a tree representation of the functions - call stack. If a function has no parent in the kernel (interrupt, - syscall, kernel thread...) then it is attached to a virtual parent - called ROOT. - """ - ROOT = None - - def __init__(self, func, time = None, parent = None): - self._func = func - self._time = time - if parent is None: - self._parent = CallTree.ROOT - else: - self._parent = parent - self._children = [] - - def calls(self, func, calltime): - """ If a function calls another one, call this method to insert it - into the tree at the appropriate place. - @return: A reference to the newly created child node. - """ - child = CallTree(func, calltime, self) - self._children.append(child) - return child - - def getParent(self, func): - """ Retrieve the last parent of the current node that - has the name given by func. If this function is not - on a parent, then create it as new child of root - @return: A reference to the parent. - """ - tree = self - while tree != CallTree.ROOT and tree._func != func: - tree = tree._parent - if tree == CallTree.ROOT: - child = CallTree.ROOT.calls(func, None) - return child - return tree - - def __repr__(self): - return self.__toString("", True) - - def __toString(self, branch, lastChild): - if self._time is not None: - s = "%s----%s (%s)\n" % (branch, self._func, self._time) - else: - s = "%s----%s\n" % (branch, self._func) - - i = 0 - if lastChild: - branch = branch[:-1] + " " - while i < len(self._children): - if i != len(self._children) - 1: - s += "%s" % self._children[i].__toString(branch +\ - " |", False) - else: - s += "%s" % self._children[i].__toString(branch +\ - " |", True) - i += 1 - return s - -class BrokenLineException(Exception): - """If the last line is not complete because of the pipe breakage, - we want to stop the processing and ignore this line. - """ - pass - -class CommentLineException(Exception): - """ If the line is a comment (as in the beginning of the trace file), - just ignore it. - """ - pass - - -def parseLine(line): - line = line.strip() - if line.startswith("#"): - raise CommentLineException - m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line) - if m is None: - raise BrokenLineException - return (m.group(1), m.group(2), m.group(3)) - - -def main(): - CallTree.ROOT = CallTree("Root (Nowhere)", None, None) - tree = CallTree.ROOT - - for line in sys.stdin: - try: - calltime, callee, caller = parseLine(line) - except BrokenLineException: - break - except CommentLineException: - continue - tree = tree.getParent(caller) - tree = tree.calls(callee, calltime) - - print CallTree.ROOT - -if __name__ == "__main__": - main()