diff --git a/[refs] b/[refs]
index 98dbe08220b5..c5e849afafbb 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
---
-refs/heads/master: f7e6746ebae984ea67b0a1a1e23c7e6698240631
+refs/heads/master: 8d973b624ece3b85cfae9474935795d034f72faf
diff --git a/trunk/Documentation/DocBook/debugobjects.tmpl b/trunk/Documentation/DocBook/debugobjects.tmpl
index 24979f691e3e..08ff908aa7a2 100644
--- a/trunk/Documentation/DocBook/debugobjects.tmpl
+++ b/trunk/Documentation/DocBook/debugobjects.tmpl
@@ -96,7 +96,6 @@
debug_object_deactivate
debug_object_destroy
debug_object_free
- debug_object_assert_init
Each of these functions takes the address of the real object and
a pointer to the object type specific debug description
@@ -274,26 +273,6 @@
debug checks.
-
-
- debug_object_assert_init
-
- This function is called to assert that an object has been
- initialized.
-
-
- When the real object is not tracked by debugobjects, it calls
- fixup_assert_init of the object type description structure
- provided by the caller, with the hardcoded object state
- ODEBUG_NOT_AVAILABLE. The fixup function can correct the problem
- by calling debug_object_init and other specific initializing
- functions.
-
-
- When the real object is already tracked by debugobjects it is
- ignored.
-
-
Fixup functions
@@ -402,35 +381,6 @@
statistics.
-
- fixup_assert_init
-
- This function is called from the debug code whenever a problem
- in debug_object_assert_init is detected.
-
-
- Called from debug_object_assert_init() with a hardcoded state
- ODEBUG_STATE_NOTAVAILABLE when the object is not found in the
- debug bucket.
-
-
- The function returns 1 when the fixup was successful,
- otherwise 0. The return value is used to update the
- statistics.
-
-
- Note, this function should make sure debug_object_init() is
- called before returning.
-
-
- The handling of statically initialized objects is a special
- case. The fixup function should check if this is a legitimate
- case of a statically initialized object or not. In this case only
- debug_object_init() should be called to make the object known to
- the tracker. Then the function should return 0 because this is not
- a real fixup.
-
-
Known Bugs And Assumptions
diff --git a/trunk/Documentation/RCU/checklist.txt b/trunk/Documentation/RCU/checklist.txt
index bff2d8be1e18..0c134f8afc6f 100644
--- a/trunk/Documentation/RCU/checklist.txt
+++ b/trunk/Documentation/RCU/checklist.txt
@@ -328,12 +328,6 @@ over a rather long period of time, but improvements are always welcome!
RCU rather than SRCU, because RCU is almost always faster and
easier to use than is SRCU.
- If you need to enter your read-side critical section in a
- hardirq or exception handler, and then exit that same read-side
- critical section in the task that was interrupted, then you need
- to srcu_read_lock_raw() and srcu_read_unlock_raw(), which avoid
- the lockdep checking that would otherwise this practice illegal.
-
Also unlike other forms of RCU, explicit initialization
and cleanup is required via init_srcu_struct() and
cleanup_srcu_struct(). These are passed a "struct srcu_struct"
diff --git a/trunk/Documentation/RCU/rcu.txt b/trunk/Documentation/RCU/rcu.txt
index bf778332a28f..31852705b586 100644
--- a/trunk/Documentation/RCU/rcu.txt
+++ b/trunk/Documentation/RCU/rcu.txt
@@ -38,11 +38,11 @@ o How can the updater tell when a grace period has completed
Preemptible variants of RCU (CONFIG_TREE_PREEMPT_RCU) get the
same effect, but require that the readers manipulate CPU-local
- counters. These counters allow limited types of blocking within
- RCU read-side critical sections. SRCU also uses CPU-local
- counters, and permits general blocking within RCU read-side
- critical sections. These variants of RCU detect grace periods
- by sampling these counters.
+ counters. These counters allow limited types of blocking
+ within RCU read-side critical sections. SRCU also uses
+ CPU-local counters, and permits general blocking within
+ RCU read-side critical sections. These two variants of
+ RCU detect grace periods by sampling these counters.
o If I am running on a uniprocessor kernel, which can only do one
thing at a time, why should I wait for a grace period?
diff --git a/trunk/Documentation/RCU/stallwarn.txt b/trunk/Documentation/RCU/stallwarn.txt
index 083d88cbc089..4e959208f736 100644
--- a/trunk/Documentation/RCU/stallwarn.txt
+++ b/trunk/Documentation/RCU/stallwarn.txt
@@ -101,11 +101,6 @@ o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
CONFIG_TREE_PREEMPT_RCU case, you might see stall-warning
messages.
-o A hardware or software issue shuts off the scheduler-clock
- interrupt on a CPU that is not in dyntick-idle mode. This
- problem really has happened, and seems to be most likely to
- result in RCU CPU stall warnings for CONFIG_NO_HZ=n kernels.
-
o A bug in the RCU implementation.
o A hardware failure. This is quite unlikely, but has occurred
@@ -114,11 +109,12 @@ o A hardware failure. This is quite unlikely, but has occurred
This resulted in a series of RCU CPU stall warnings, eventually
leading the realization that the CPU had failed.
-The RCU, RCU-sched, and RCU-bh implementations have CPU stall warning.
-SRCU does not have its own CPU stall warnings, but its calls to
-synchronize_sched() will result in RCU-sched detecting RCU-sched-related
-CPU stalls. Please note that RCU only detects CPU stalls when there is
-a grace period in progress. No grace period, no CPU stall warnings.
+The RCU, RCU-sched, and RCU-bh implementations have CPU stall
+warning. SRCU does not have its own CPU stall warnings, but its
+calls to synchronize_sched() will result in RCU-sched detecting
+RCU-sched-related CPU stalls. Please note that RCU only detects
+CPU stalls when there is a grace period in progress. No grace period,
+no CPU stall warnings.
To diagnose the cause of the stall, inspect the stack traces.
The offending function will usually be near the top of the stack.
diff --git a/trunk/Documentation/RCU/torture.txt b/trunk/Documentation/RCU/torture.txt
index d67068d0d2b9..783d6c134d3f 100644
--- a/trunk/Documentation/RCU/torture.txt
+++ b/trunk/Documentation/RCU/torture.txt
@@ -61,24 +61,11 @@ nreaders This is the number of RCU reading threads supported.
To properly exercise RCU implementations with preemptible
read-side critical sections.
-onoff_interval
- The number of seconds between each attempt to execute a
- randomly selected CPU-hotplug operation. Defaults to
- zero, which disables CPU hotplugging. In HOTPLUG_CPU=n
- kernels, rcutorture will silently refuse to do any
- CPU-hotplug operations regardless of what value is
- specified for onoff_interval.
-
shuffle_interval
The number of seconds to keep the test threads affinitied
to a particular subset of the CPUs, defaults to 3 seconds.
Used in conjunction with test_no_idle_hz.
-shutdown_secs The number of seconds to run the test before terminating
- the test and powering off the system. The default is
- zero, which disables test termination and system shutdown.
- This capability is useful for automated testing.
-
stat_interval The number of seconds between output of torture
statistics (via printk()). Regardless of the interval,
statistics are printed when the module is unloaded.
diff --git a/trunk/Documentation/RCU/trace.txt b/trunk/Documentation/RCU/trace.txt
index 49587abfc2f7..aaf65f6c6cd7 100644
--- a/trunk/Documentation/RCU/trace.txt
+++ b/trunk/Documentation/RCU/trace.txt
@@ -105,10 +105,14 @@ o "dt" is the current value of the dyntick counter that is incremented
or one greater than the interrupt-nesting depth otherwise.
The number after the second "/" is the NMI nesting depth.
+ This field is displayed only for CONFIG_NO_HZ kernels.
+
o "df" is the number of times that some other CPU has forced a
quiescent state on behalf of this CPU due to this CPU being in
dynticks-idle state.
+ This field is displayed only for CONFIG_NO_HZ kernels.
+
o "of" is the number of times that some other CPU has forced a
quiescent state on behalf of this CPU due to this CPU being
offline. In a perfect world, this might never happen, but it
diff --git a/trunk/Documentation/RCU/whatisRCU.txt b/trunk/Documentation/RCU/whatisRCU.txt
index 6bbe8dcdc3da..6ef692667e2f 100644
--- a/trunk/Documentation/RCU/whatisRCU.txt
+++ b/trunk/Documentation/RCU/whatisRCU.txt
@@ -4,7 +4,6 @@ to start learning about RCU:
1. What is RCU, Fundamentally? http://lwn.net/Articles/262464/
2. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/
3. RCU part 3: the RCU API http://lwn.net/Articles/264090/
-4. The RCU API, 2010 Edition http://lwn.net/Articles/418853/
What is RCU?
@@ -835,8 +834,6 @@ SRCU: Critical sections Grace period Barrier
srcu_read_lock synchronize_srcu N/A
srcu_read_unlock synchronize_srcu_expedited
- srcu_read_lock_raw
- srcu_read_unlock_raw
srcu_dereference
SRCU: Initialization/cleanup
@@ -858,33 +855,27 @@ list can be helpful:
a. Will readers need to block? If so, you need SRCU.
-b. Is it necessary to start a read-side critical section in a
- hardirq handler or exception handler, and then to complete
- this read-side critical section in the task that was
- interrupted? If so, you need SRCU's srcu_read_lock_raw() and
- srcu_read_unlock_raw() primitives.
-
-c. What about the -rt patchset? If readers would need to block
+b. What about the -rt patchset? If readers would need to block
in an non-rt kernel, you need SRCU. If readers would block
in a -rt kernel, but not in a non-rt kernel, SRCU is not
necessary.
-d. Do you need to treat NMI handlers, hardirq handlers,
+c. Do you need to treat NMI handlers, hardirq handlers,
and code segments with preemption disabled (whether
via preempt_disable(), local_irq_save(), local_bh_disable(),
or some other mechanism) as if they were explicit RCU readers?
If so, you need RCU-sched.
-e. Do you need RCU grace periods to complete even in the face
+d. Do you need RCU grace periods to complete even in the face
of softirq monopolization of one or more of the CPUs? For
example, is your code subject to network-based denial-of-service
attacks? If so, you need RCU-bh.
-f. Is your workload too update-intensive for normal use of
+e. Is your workload too update-intensive for normal use of
RCU, but inappropriate for other synchronization mechanisms?
If so, consider SLAB_DESTROY_BY_RCU. But please be careful!
-g. Otherwise, use RCU.
+f. Otherwise, use RCU.
Of course, this all assumes that you have determined that RCU is in fact
the right tool for your job.
diff --git a/trunk/Documentation/atomic_ops.txt b/trunk/Documentation/atomic_ops.txt
index 27f2b21a9d5c..3bd585b44927 100644
--- a/trunk/Documentation/atomic_ops.txt
+++ b/trunk/Documentation/atomic_ops.txt
@@ -84,93 +84,6 @@ compiler optimizes the section accessing atomic_t variables.
*** YOU HAVE BEEN WARNED! ***
-Properly aligned pointers, longs, ints, and chars (and unsigned
-equivalents) may be atomically loaded from and stored to in the same
-sense as described for atomic_read() and atomic_set(). The ACCESS_ONCE()
-macro should be used to prevent the compiler from using optimizations
-that might otherwise optimize accesses out of existence on the one hand,
-or that might create unsolicited accesses on the other.
-
-For example consider the following code:
-
- while (a > 0)
- do_something();
-
-If the compiler can prove that do_something() does not store to the
-variable a, then the compiler is within its rights transforming this to
-the following:
-
- tmp = a;
- if (a > 0)
- for (;;)
- do_something();
-
-If you don't want the compiler to do this (and you probably don't), then
-you should use something like the following:
-
- while (ACCESS_ONCE(a) < 0)
- do_something();
-
-Alternatively, you could place a barrier() call in the loop.
-
-For another example, consider the following code:
-
- tmp_a = a;
- do_something_with(tmp_a);
- do_something_else_with(tmp_a);
-
-If the compiler can prove that do_something_with() does not store to the
-variable a, then the compiler is within its rights to manufacture an
-additional load as follows:
-
- tmp_a = a;
- do_something_with(tmp_a);
- tmp_a = a;
- do_something_else_with(tmp_a);
-
-This could fatally confuse your code if it expected the same value
-to be passed to do_something_with() and do_something_else_with().
-
-The compiler would be likely to manufacture this additional load if
-do_something_with() was an inline function that made very heavy use
-of registers: reloading from variable a could save a flush to the
-stack and later reload. To prevent the compiler from attacking your
-code in this manner, write the following:
-
- tmp_a = ACCESS_ONCE(a);
- do_something_with(tmp_a);
- do_something_else_with(tmp_a);
-
-For a final example, consider the following code, assuming that the
-variable a is set at boot time before the second CPU is brought online
-and never changed later, so that memory barriers are not needed:
-
- if (a)
- b = 9;
- else
- b = 42;
-
-The compiler is within its rights to manufacture an additional store
-by transforming the above code into the following:
-
- b = 42;
- if (a)
- b = 9;
-
-This could come as a fatal surprise to other code running concurrently
-that expected b to never have the value 42 if a was zero. To prevent
-the compiler from doing this, write something like:
-
- if (a)
- ACCESS_ONCE(b) = 9;
- else
- ACCESS_ONCE(b) = 42;
-
-Don't even -think- about doing this without proper use of memory barriers,
-locks, or atomic operations if variable a can change at runtime!
-
-*** WARNING: ACCESS_ONCE() DOES NOT IMPLY A BARRIER! ***
-
Now, we move onto the atomic operation interfaces typically implemented with
the help of assembly code.
diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt
index e229769606f2..81c287fad79d 100644
--- a/trunk/Documentation/kernel-parameters.txt
+++ b/trunk/Documentation/kernel-parameters.txt
@@ -1885,11 +1885,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
arch_perfmon: [X86] Force use of architectural
perfmon on Intel CPUs instead of the
CPU specific event set.
- timer: [X86] Force use of architectural NMI
- timer mode (see also oprofile.timer
- for generic hr timer mode)
- [s390] Force legacy basic mode sampling
- (report cpu_type "timer")
oops=panic Always panic on oopses. Default is to just kill the
process, but there is a small probability of
@@ -2755,10 +2750,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
functions are at fixed addresses, they make nice
targets for exploits that can control RIP.
- emulate [default] Vsyscalls turn into traps and are
- emulated reasonably safely.
+ emulate Vsyscalls turn into traps and are emulated
+ reasonably safely.
- native Vsyscalls are native syscall instructions.
+ native [default] Vsyscalls are native syscall
+ instructions.
This is a little bit faster than trapping
and makes a few dynamic recompilers work
better than they would in emulation mode.
diff --git a/trunk/Documentation/lockdep-design.txt b/trunk/Documentation/lockdep-design.txt
index 5dbc99c04f6e..abf768c681e2 100644
--- a/trunk/Documentation/lockdep-design.txt
+++ b/trunk/Documentation/lockdep-design.txt
@@ -221,66 +221,3 @@ when the chain is validated for the first time, is then put into a hash
table, which hash-table can be checked in a lockfree manner. If the
locking chain occurs again later on, the hash table tells us that we
dont have to validate the chain again.
-
-Troubleshooting:
-----------------
-
-The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes.
-Exceeding this number will trigger the following lockdep warning:
-
- (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
-
-By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical
-desktop systems have less than 1,000 lock classes, so this warning
-normally results from lock-class leakage or failure to properly
-initialize locks. These two problems are illustrated below:
-
-1. Repeated module loading and unloading while running the validator
- will result in lock-class leakage. The issue here is that each
- load of the module will create a new set of lock classes for
- that module's locks, but module unloading does not remove old
- classes (see below discussion of reuse of lock classes for why).
- Therefore, if that module is loaded and unloaded repeatedly,
- the number of lock classes will eventually reach the maximum.
-
-2. Using structures such as arrays that have large numbers of
- locks that are not explicitly initialized. For example,
- a hash table with 8192 buckets where each bucket has its own
- spinlock_t will consume 8192 lock classes -unless- each spinlock
- is explicitly initialized at runtime, for example, using the
- run-time spin_lock_init() as opposed to compile-time initializers
- such as __SPIN_LOCK_UNLOCKED(). Failure to properly initialize
- the per-bucket spinlocks would guarantee lock-class overflow.
- In contrast, a loop that called spin_lock_init() on each lock
- would place all 8192 locks into a single lock class.
-
- The moral of this story is that you should always explicitly
- initialize your locks.
-
-One might argue that the validator should be modified to allow
-lock classes to be reused. However, if you are tempted to make this
-argument, first review the code and think through the changes that would
-be required, keeping in mind that the lock classes to be removed are
-likely to be linked into the lock-dependency graph. This turns out to
-be harder to do than to say.
-
-Of course, if you do run out of lock classes, the next thing to do is
-to find the offending lock classes. First, the following command gives
-you the number of lock classes currently in use along with the maximum:
-
- grep "lock-classes" /proc/lockdep_stats
-
-This command produces the following output on a modest system:
-
- lock-classes: 748 [max: 8191]
-
-If the number allocated (748 above) increases continually over time,
-then there is likely a leak. The following command can be used to
-identify the leaking lock classes:
-
- grep "BD" /proc/lockdep
-
-Run the command and save the output, then compare against the output from
-a later run of this command to identify the leakers. This same output
-can also help you find situations where runtime lock initialization has
-been omitted.
diff --git a/trunk/Documentation/trace/events.txt b/trunk/Documentation/trace/events.txt
index bb24c2a0e870..b510564aac7e 100644
--- a/trunk/Documentation/trace/events.txt
+++ b/trunk/Documentation/trace/events.txt
@@ -191,6 +191,8 @@ And for string fields they are:
Currently, only exact string matches are supported.
+Currently, the maximum number of predicates in a filter is 16.
+
5.2 Setting filters
-------------------
diff --git a/trunk/Documentation/virtual/kvm/api.txt b/trunk/Documentation/virtual/kvm/api.txt
index e2a4b5287361..7945b0bd35e2 100644
--- a/trunk/Documentation/virtual/kvm/api.txt
+++ b/trunk/Documentation/virtual/kvm/api.txt
@@ -1100,15 +1100,6 @@ emulate them efficiently. The fields in each entry are defined as follows:
eax, ebx, ecx, edx: the values returned by the cpuid instruction for
this function/index combination
-The TSC deadline timer feature (CPUID leaf 1, ecx[24]) is always returned
-as false, since the feature depends on KVM_CREATE_IRQCHIP for local APIC
-support. Instead it is reported via
-
- ioctl(KVM_CHECK_EXTENSION, KVM_CAP_TSC_DEADLINE_TIMER)
-
-if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the
-feature in userspace, then you can enable the feature for KVM_SET_CPUID2.
-
4.47 KVM_PPC_GET_PVINFO
Capability: KVM_CAP_PPC_GET_PVINFO
@@ -1160,13 +1151,6 @@ following flags are specified:
/* Depends on KVM_CAP_IOMMU */
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
-The KVM_DEV_ASSIGN_ENABLE_IOMMU flag is a mandatory option to ensure
-isolation of the device. Usages not specifying this flag are deprecated.
-
-Only PCI header type 0 devices with PCI BAR resources are supported by
-device assignment. The user requesting this ioctl must have read/write
-access to the PCI sysfs resource files associated with the device.
-
4.49 KVM_DEASSIGN_PCI_DEVICE
Capability: KVM_CAP_DEVICE_DEASSIGNMENT
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
index 62f1cd357ddf..6afba60c3904 100644
--- a/trunk/MAINTAINERS
+++ b/trunk/MAINTAINERS
@@ -1698,9 +1698,11 @@ F: arch/x86/include/asm/tce.h
CAN NETWORK LAYER
M: Oliver Hartkopp
+M: Oliver Hartkopp
+M: Urs Thuermann
L: linux-can@vger.kernel.org
-W: http://gitorious.org/linux-can
-T: git git://gitorious.org/linux-can/linux-can-next.git
+L: netdev@vger.kernel.org
+W: http://developer.berlios.de/projects/socketcan/
S: Maintained
F: net/can/
F: include/linux/can.h
@@ -1711,10 +1713,9 @@ F: include/linux/can/gw.h
CAN NETWORK DRIVERS
M: Wolfgang Grandegger
-M: Marc Kleine-Budde
L: linux-can@vger.kernel.org
-W: http://gitorious.org/linux-can
-T: git git://gitorious.org/linux-can/linux-can-next.git
+L: netdev@vger.kernel.org
+W: http://developer.berlios.de/projects/socketcan/
S: Maintained
F: drivers/net/can/
F: include/linux/can/dev.h
@@ -2699,7 +2700,7 @@ FIREWIRE SUBSYSTEM
M: Stefan Richter
L: linux1394-devel@lists.sourceforge.net
W: http://ieee1394.wiki.kernel.org/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394-2.6.git
S: Maintained
F: drivers/firewire/
F: include/linux/firewire*.h
diff --git a/trunk/Makefile b/trunk/Makefile
index adddd11c3b3b..a43733df3978 100644
--- a/trunk/Makefile
+++ b/trunk/Makefile
@@ -1,7 +1,7 @@
VERSION = 3
PATCHLEVEL = 2
SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc6
NAME = Saber-toothed Squirrel
# *DOCUMENTATION*
diff --git a/trunk/arch/Kconfig b/trunk/arch/Kconfig
index 2505740b81d2..4b0669cbb3b0 100644
--- a/trunk/arch/Kconfig
+++ b/trunk/arch/Kconfig
@@ -30,10 +30,6 @@ config OPROFILE_EVENT_MULTIPLEX
config HAVE_OPROFILE
bool
-config OPROFILE_NMI_TIMER
- def_bool y
- depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
-
config KPROBES
bool "Kprobes"
depends on MODULES
diff --git a/trunk/arch/arm/Kconfig b/trunk/arch/arm/Kconfig
index b259c7c644e3..776d76b8cb69 100644
--- a/trunk/arch/arm/Kconfig
+++ b/trunk/arch/arm/Kconfig
@@ -1246,7 +1246,7 @@ config PL310_ERRATA_588369
config ARM_ERRATA_720789
bool "ARM errata: TLBIASIDIS and TLBIMVAIS operations can broadcast a faulty ASID"
- depends on CPU_V7
+ depends on CPU_V7 && SMP
help
This option enables the workaround for the 720789 Cortex-A9 (prior to
r2p0) erratum. A faulty ASID can be sent to the other CPUs for the
@@ -1282,7 +1282,7 @@ config ARM_ERRATA_743622
config ARM_ERRATA_751472
bool "ARM errata: Interrupted ICIALLUIS may prevent completion of broadcasted operation"
- depends on CPU_V7
+ depends on CPU_V7 && SMP
help
This option enables the workaround for the 751472 Cortex-A9 (prior
to r3p0) erratum. An interrupted ICIALLUIS operation may prevent the
diff --git a/trunk/arch/arm/common/pl330.c b/trunk/arch/arm/common/pl330.c
index 8d8df744f7a5..f407a6b35d3d 100644
--- a/trunk/arch/arm/common/pl330.c
+++ b/trunk/arch/arm/common/pl330.c
@@ -221,6 +221,17 @@
*/
#define MCODE_BUFF_PER_REQ 256
+/*
+ * Mark a _pl330_req as free.
+ * We do it by writing DMAEND as the first instruction
+ * because no valid request is going to have DMAEND as
+ * its first instruction to execute.
+ */
+#define MARK_FREE(req) do { \
+ _emit_END(0, (req)->mc_cpu); \
+ (req)->mc_len = 0; \
+ } while (0)
+
/* If the _pl330_req is available to the client */
#define IS_FREE(req) (*((u8 *)((req)->mc_cpu)) == CMD_DMAEND)
@@ -290,10 +301,8 @@ struct pl330_thread {
struct pl330_dmac *dmac;
/* Only two at a time */
struct _pl330_req req[2];
- /* Index of the last enqueued request */
+ /* Index of the last submitted request */
unsigned lstenq;
- /* Index of the last submitted request or -1 if the DMA is stopped */
- int req_running;
};
enum pl330_dmac_state {
@@ -769,22 +778,6 @@ static inline void _execute_DBGINSN(struct pl330_thread *thrd,
writel(0, regs + DBGCMD);
}
-/*
- * Mark a _pl330_req as free.
- * We do it by writing DMAEND as the first instruction
- * because no valid request is going to have DMAEND as
- * its first instruction to execute.
- */
-static void mark_free(struct pl330_thread *thrd, int idx)
-{
- struct _pl330_req *req = &thrd->req[idx];
-
- _emit_END(0, req->mc_cpu);
- req->mc_len = 0;
-
- thrd->req_running = -1;
-}
-
static inline u32 _state(struct pl330_thread *thrd)
{
void __iomem *regs = thrd->dmac->pinfo->base;
@@ -843,6 +836,31 @@ static inline u32 _state(struct pl330_thread *thrd)
}
}
+/* If the request 'req' of thread 'thrd' is currently active */
+static inline bool _req_active(struct pl330_thread *thrd,
+ struct _pl330_req *req)
+{
+ void __iomem *regs = thrd->dmac->pinfo->base;
+ u32 buf = req->mc_bus, pc = readl(regs + CPC(thrd->id));
+
+ if (IS_FREE(req))
+ return false;
+
+ return (pc >= buf && pc <= buf + req->mc_len) ? true : false;
+}
+
+/* Returns 0 if the thread is inactive, ID of active req + 1 otherwise */
+static inline unsigned _thrd_active(struct pl330_thread *thrd)
+{
+ if (_req_active(thrd, &thrd->req[0]))
+ return 1; /* First req active */
+
+ if (_req_active(thrd, &thrd->req[1]))
+ return 2; /* Second req active */
+
+ return 0;
+}
+
static void _stop(struct pl330_thread *thrd)
{
void __iomem *regs = thrd->dmac->pinfo->base;
@@ -874,22 +892,17 @@ static bool _trigger(struct pl330_thread *thrd)
struct _arg_GO go;
unsigned ns;
u8 insn[6] = {0, 0, 0, 0, 0, 0};
- int idx;
/* Return if already ACTIVE */
if (_state(thrd) != PL330_STATE_STOPPED)
return true;
- idx = 1 - thrd->lstenq;
- if (!IS_FREE(&thrd->req[idx]))
- req = &thrd->req[idx];
- else {
- idx = thrd->lstenq;
- if (!IS_FREE(&thrd->req[idx]))
- req = &thrd->req[idx];
- else
- req = NULL;
- }
+ if (!IS_FREE(&thrd->req[1 - thrd->lstenq]))
+ req = &thrd->req[1 - thrd->lstenq];
+ else if (!IS_FREE(&thrd->req[thrd->lstenq]))
+ req = &thrd->req[thrd->lstenq];
+ else
+ req = NULL;
/* Return if no request */
if (!req || !req->r)
@@ -920,8 +933,6 @@ static bool _trigger(struct pl330_thread *thrd)
/* Only manager can execute GO */
_execute_DBGINSN(thrd, insn, true);
- thrd->req_running = idx;
-
return true;
}
@@ -1371,8 +1382,8 @@ static void pl330_dotask(unsigned long data)
thrd->req[0].r = NULL;
thrd->req[1].r = NULL;
- mark_free(thrd, 0);
- mark_free(thrd, 1);
+ MARK_FREE(&thrd->req[0]);
+ MARK_FREE(&thrd->req[1]);
/* Clear the reset flag */
pl330->dmac_tbd.reset_chan &= ~(1 << i);
@@ -1450,12 +1461,14 @@ int pl330_update(const struct pl330_info *pi)
thrd = &pl330->channels[id];
- active = thrd->req_running;
- if (active == -1) /* Aborted */
+ active = _thrd_active(thrd);
+ if (!active) /* Aborted */
continue;
+ active -= 1;
+
rqdone = &thrd->req[active];
- mark_free(thrd, active);
+ MARK_FREE(rqdone);
/* Get going again ASAP */
_start(thrd);
@@ -1496,7 +1509,7 @@ int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op)
struct pl330_thread *thrd = ch_id;
struct pl330_dmac *pl330;
unsigned long flags;
- int ret = 0, active = thrd->req_running;
+ int ret = 0, active;
if (!thrd || thrd->free || thrd->dmac->state == DYING)
return -EINVAL;
@@ -1512,24 +1525,28 @@ int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op)
thrd->req[0].r = NULL;
thrd->req[1].r = NULL;
- mark_free(thrd, 0);
- mark_free(thrd, 1);
+ MARK_FREE(&thrd->req[0]);
+ MARK_FREE(&thrd->req[1]);
break;
case PL330_OP_ABORT:
+ active = _thrd_active(thrd);
+
/* Make sure the channel is stopped */
_stop(thrd);
/* ABORT is only for the active req */
- if (active == -1)
+ if (!active)
break;
+ active--;
+
thrd->req[active].r = NULL;
- mark_free(thrd, active);
+ MARK_FREE(&thrd->req[active]);
/* Start the next */
case PL330_OP_START:
- if ((active == -1) && !_start(thrd))
+ if (!_thrd_active(thrd) && !_start(thrd))
ret = -EIO;
break;
@@ -1570,13 +1587,14 @@ int pl330_chan_status(void *ch_id, struct pl330_chanstatus *pstatus)
else
pstatus->faulting = false;
- active = thrd->req_running;
+ active = _thrd_active(thrd);
- if (active == -1) {
+ if (!active) {
/* Indicate that the thread is not running */
pstatus->top_req = NULL;
pstatus->wait_req = NULL;
} else {
+ active--;
pstatus->top_req = thrd->req[active].r;
pstatus->wait_req = !IS_FREE(&thrd->req[1 - active])
? thrd->req[1 - active].r : NULL;
@@ -1641,9 +1659,9 @@ void *pl330_request_channel(const struct pl330_info *pi)
thrd->free = false;
thrd->lstenq = 1;
thrd->req[0].r = NULL;
- mark_free(thrd, 0);
+ MARK_FREE(&thrd->req[0]);
thrd->req[1].r = NULL;
- mark_free(thrd, 1);
+ MARK_FREE(&thrd->req[1]);
break;
}
}
@@ -1749,14 +1767,14 @@ static inline void _reset_thread(struct pl330_thread *thrd)
thrd->req[0].mc_bus = pl330->mcode_bus
+ (thrd->id * pi->mcbufsz);
thrd->req[0].r = NULL;
- mark_free(thrd, 0);
+ MARK_FREE(&thrd->req[0]);
thrd->req[1].mc_cpu = thrd->req[0].mc_cpu
+ pi->mcbufsz / 2;
thrd->req[1].mc_bus = thrd->req[0].mc_bus
+ pi->mcbufsz / 2;
thrd->req[1].r = NULL;
- mark_free(thrd, 1);
+ MARK_FREE(&thrd->req[1]);
}
static int dmac_alloc_threads(struct pl330_dmac *pl330)
diff --git a/trunk/arch/arm/configs/imx_v4_v5_defconfig b/trunk/arch/arm/configs/imx_v4_v5_defconfig
index cf497ce41dfe..11a4192197c8 100644
--- a/trunk/arch/arm/configs/imx_v4_v5_defconfig
+++ b/trunk/arch/arm/configs/imx_v4_v5_defconfig
@@ -18,10 +18,9 @@ CONFIG_ARCH_MXC=y
CONFIG_ARCH_IMX_V4_V5=y
CONFIG_ARCH_MX1ADS=y
CONFIG_MACH_SCB9328=y
-CONFIG_MACH_APF9328=y
CONFIG_MACH_MX21ADS=y
CONFIG_MACH_MX25_3DS=y
-CONFIG_MACH_EUKREA_CPUIMX25SD=y
+CONFIG_MACH_EUKREA_CPUIMX25=y
CONFIG_MACH_MX27ADS=y
CONFIG_MACH_PCM038=y
CONFIG_MACH_CPUIMX27=y
@@ -73,16 +72,17 @@ CONFIG_MTD_CFI_GEOMETRY=y
CONFIG_MTD_CFI_INTELEXT=y
CONFIG_MTD_PHYSMAP=y
CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_MXC=y
CONFIG_MTD_UBI=y
CONFIG_MISC_DEVICES=y
CONFIG_EEPROM_AT24=y
CONFIG_EEPROM_AT25=y
CONFIG_NETDEVICES=y
-CONFIG_DM9000=y
+CONFIG_NET_ETHERNET=y
CONFIG_SMC91X=y
+CONFIG_DM9000=y
CONFIG_SMC911X=y
-CONFIG_SMSC_PHY=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT_MOUSEDEV is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_KEYBOARD is not set
@@ -100,7 +100,6 @@ CONFIG_I2C_CHARDEV=y
CONFIG_I2C_IMX=y
CONFIG_SPI=y
CONFIG_SPI_IMX=y
-CONFIG_SPI_SPIDEV=y
CONFIG_W1=y
CONFIG_W1_MASTER_MXC=y
CONFIG_W1_SLAVE_THERM=y
@@ -140,7 +139,6 @@ CONFIG_MMC=y
CONFIG_MMC_MXC=y
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_GPIO=y
CONFIG_LEDS_MC13783=y
CONFIG_LEDS_TRIGGERS=y
CONFIG_LEDS_TRIGGER_TIMER=y
diff --git a/trunk/arch/arm/kernel/process.c b/trunk/arch/arm/kernel/process.c
index e8e8fe505df1..3d0c6fb74ae4 100644
--- a/trunk/arch/arm/kernel/process.c
+++ b/trunk/arch/arm/kernel/process.c
@@ -183,8 +183,7 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
- tick_nohz_idle_enter();
- rcu_idle_enter();
+ tick_nohz_stop_sched_tick(1);
leds_event(led_idle_start);
while (!need_resched()) {
#ifdef CONFIG_HOTPLUG_CPU
@@ -214,8 +213,7 @@ void cpu_idle(void)
}
}
leds_event(led_idle_end);
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
schedule();
preempt_disable();
diff --git a/trunk/arch/arm/kernel/setup.c b/trunk/arch/arm/kernel/setup.c
index c0b59bff6be6..8fc2c8fcbdc6 100644
--- a/trunk/arch/arm/kernel/setup.c
+++ b/trunk/arch/arm/kernel/setup.c
@@ -52,7 +52,6 @@
#include
#include
#include
-#include
#if defined(CONFIG_DEPRECATED_PARAM_STRUCT)
#include "compat.h"
diff --git a/trunk/arch/arm/mach-exynos/cpu.c b/trunk/arch/arm/mach-exynos/cpu.c
index cc8d4bd6d0f7..90ec247f3b37 100644
--- a/trunk/arch/arm/mach-exynos/cpu.c
+++ b/trunk/arch/arm/mach-exynos/cpu.c
@@ -110,6 +110,11 @@ static struct map_desc exynos4_iodesc[] __initdata = {
.pfn = __phys_to_pfn(EXYNOS4_PA_DMC0),
.length = SZ_4K,
.type = MT_DEVICE,
+ }, {
+ .virtual = (unsigned long)S5P_VA_SROMC,
+ .pfn = __phys_to_pfn(EXYNOS4_PA_SROMC),
+ .length = SZ_4K,
+ .type = MT_DEVICE,
}, {
.virtual = (unsigned long)S3C_VA_USB_HSPHY,
.pfn = __phys_to_pfn(EXYNOS4_PA_HSPHY),
diff --git a/trunk/arch/arm/mach-imx/Kconfig b/trunk/arch/arm/mach-imx/Kconfig
index 0e6f1af260b6..c44aa974e79c 100644
--- a/trunk/arch/arm/mach-imx/Kconfig
+++ b/trunk/arch/arm/mach-imx/Kconfig
@@ -132,7 +132,7 @@ config MACH_MX25_3DS
select IMX_HAVE_PLATFORM_MXC_NAND
select IMX_HAVE_PLATFORM_SDHCI_ESDHC_IMX
-config MACH_EUKREA_CPUIMX25SD
+config MACH_EUKREA_CPUIMX25
bool "Support Eukrea CPUIMX25 Platform"
select SOC_IMX25
select IMX_HAVE_PLATFORM_FLEXCAN
@@ -148,7 +148,7 @@ config MACH_EUKREA_CPUIMX25SD
choice
prompt "Baseboard"
- depends on MACH_EUKREA_CPUIMX25SD
+ depends on MACH_EUKREA_CPUIMX25
default MACH_EUKREA_MBIMXSD25_BASEBOARD
config MACH_EUKREA_MBIMXSD25_BASEBOARD
@@ -542,7 +542,7 @@ config MACH_MX35_3DS
Include support for MX35PDK platform. This includes specific
configurations for the board and its peripherals.
-config MACH_EUKREA_CPUIMX35SD
+config MACH_EUKREA_CPUIMX35
bool "Support Eukrea CPUIMX35 Platform"
select SOC_IMX35
select IMX_HAVE_PLATFORM_FLEXCAN
@@ -560,7 +560,7 @@ config MACH_EUKREA_CPUIMX35SD
choice
prompt "Baseboard"
- depends on MACH_EUKREA_CPUIMX35SD
+ depends on MACH_EUKREA_CPUIMX35
default MACH_EUKREA_MBIMXSD35_BASEBOARD
config MACH_EUKREA_MBIMXSD35_BASEBOARD
diff --git a/trunk/arch/arm/mach-imx/Makefile b/trunk/arch/arm/mach-imx/Makefile
index d97f409ce98b..aba73214c2a8 100644
--- a/trunk/arch/arm/mach-imx/Makefile
+++ b/trunk/arch/arm/mach-imx/Makefile
@@ -24,7 +24,7 @@ obj-$(CONFIG_MACH_MX21ADS) += mach-mx21ads.o
# i.MX25 based machines
obj-$(CONFIG_MACH_MX25_3DS) += mach-mx25_3ds.o
-obj-$(CONFIG_MACH_EUKREA_CPUIMX25SD) += mach-eukrea_cpuimx25.o
+obj-$(CONFIG_MACH_EUKREA_CPUIMX25) += mach-eukrea_cpuimx25.o
obj-$(CONFIG_MACH_EUKREA_MBIMXSD25_BASEBOARD) += eukrea_mbimxsd25-baseboard.o
# i.MX27 based machines
@@ -57,7 +57,7 @@ obj-$(CONFIG_MACH_BUG) += mach-bug.o
# i.MX35 based machines
obj-$(CONFIG_MACH_PCM043) += mach-pcm043.o
obj-$(CONFIG_MACH_MX35_3DS) += mach-mx35_3ds.o
-obj-$(CONFIG_MACH_EUKREA_CPUIMX35SD) += mach-cpuimx35.o
+obj-$(CONFIG_MACH_EUKREA_CPUIMX35) += mach-cpuimx35.o
obj-$(CONFIG_MACH_EUKREA_MBIMXSD35_BASEBOARD) += eukrea_mbimxsd35-baseboard.o
obj-$(CONFIG_MACH_VPR200) += mach-vpr200.o
diff --git a/trunk/arch/arm/mach-imx/clock-imx35.c b/trunk/arch/arm/mach-imx/clock-imx35.c
index ac8238caecb9..8116f119517d 100644
--- a/trunk/arch/arm/mach-imx/clock-imx35.c
+++ b/trunk/arch/arm/mach-imx/clock-imx35.c
@@ -507,7 +507,7 @@ static struct clk_lookup lookups[] = {
int __init mx35_clocks_init()
{
- unsigned int cgr2 = 3 << 26;
+ unsigned int cgr2 = 3 << 26, cgr3 = 0;
#if defined(CONFIG_DEBUG_LL) && !defined(CONFIG_DEBUG_ICEDCC)
cgr2 |= 3 << 16;
@@ -521,12 +521,6 @@ int __init mx35_clocks_init()
__raw_writel((3 << 18), CCM_BASE + CCM_CGR0);
__raw_writel((3 << 2) | (3 << 4) | (3 << 6) | (3 << 8) | (3 << 16),
CCM_BASE + CCM_CGR1);
- __raw_writel(cgr2, CCM_BASE + CCM_CGR2);
- __raw_writel(0, CCM_BASE + CCM_CGR3);
-
- clk_enable(&iim_clk);
- imx_print_silicon_rev("i.MX35", mx35_revision());
- clk_disable(&iim_clk);
/*
* Check if we came up in internal boot mode. If yes, we need some
@@ -535,11 +529,17 @@ int __init mx35_clocks_init()
*/
if (!(__raw_readl(CCM_BASE + CCM_RCSR) & (3 << 10))) {
/* Additionally turn on UART1, SCC, and IIM clocks */
- clk_enable(&iim_clk);
- clk_enable(&uart1_clk);
- clk_enable(&scc_clk);
+ cgr2 |= 3 << 16 | 3 << 4;
+ cgr3 |= 3 << 2;
}
+ __raw_writel(cgr2, CCM_BASE + CCM_CGR2);
+ __raw_writel(cgr3, CCM_BASE + CCM_CGR3);
+
+ clk_enable(&iim_clk);
+ imx_print_silicon_rev("i.MX35", mx35_revision());
+ clk_disable(&iim_clk);
+
#ifdef CONFIG_MXC_USE_EPIT
epit_timer_init(&epit1_clk,
MX35_IO_ADDRESS(MX35_EPIT1_BASE_ADDR), MX35_INT_EPIT1);
diff --git a/trunk/arch/arm/mach-imx/mach-cpuimx35.c b/trunk/arch/arm/mach-imx/mach-cpuimx35.c
index 362aae780601..66af2e8f7e57 100644
--- a/trunk/arch/arm/mach-imx/mach-cpuimx35.c
+++ b/trunk/arch/arm/mach-imx/mach-cpuimx35.c
@@ -53,18 +53,12 @@ static const struct imxi2c_platform_data
.bitrate = 100000,
};
-#define TSC2007_IRQGPIO IMX_GPIO_NR(3, 2)
-static int tsc2007_get_pendown_state(void)
-{
- return !gpio_get_value(TSC2007_IRQGPIO);
-}
-
static struct tsc2007_platform_data tsc2007_info = {
.model = 2007,
.x_plate_ohms = 180,
- .get_pendown_state = tsc2007_get_pendown_state,
};
+#define TSC2007_IRQGPIO IMX_GPIO_NR(3, 2)
static struct i2c_board_info eukrea_cpuimx35_i2c_devices[] = {
{
I2C_BOARD_INFO("pcf8563", 0x51),
diff --git a/trunk/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/trunk/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index eef43e2e163e..7f8915ad5099 100644
--- a/trunk/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/trunk/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -3247,14 +3247,18 @@ static __initdata struct omap_hwmod *omap3xxx_hwmods[] = {
/* 3430ES1-only hwmods */
static __initdata struct omap_hwmod *omap3430es1_hwmods[] = {
+ &omap3xxx_iva_hwmod,
&omap3430es1_dss_core_hwmod,
+ &omap3xxx_mailbox_hwmod,
NULL
};
/* 3430ES2+-only hwmods */
static __initdata struct omap_hwmod *omap3430es2plus_hwmods[] = {
+ &omap3xxx_iva_hwmod,
&omap3xxx_dss_core_hwmod,
&omap3xxx_usbhsotg_hwmod,
+ &omap3xxx_mailbox_hwmod,
NULL
};
diff --git a/trunk/arch/arm/mm/init.c b/trunk/arch/arm/mm/init.c
index 7c38474e533a..fbdd12ea3a58 100644
--- a/trunk/arch/arm/mm/init.c
+++ b/trunk/arch/arm/mm/init.c
@@ -32,7 +32,6 @@
#include
#include
-#include
#include "mm.h"
@@ -333,6 +332,7 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL);
+ memblock_init();
for (i = 0; i < mi->nr_banks; i++)
memblock_add(mi->bank[i].start, mi->bank[i].size);
@@ -371,7 +371,7 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
if (mdesc->reserve)
mdesc->reserve();
- memblock_allow_resize();
+ memblock_analyze();
memblock_dump_all();
}
diff --git a/trunk/arch/arm/mm/proc-v7.S b/trunk/arch/arm/mm/proc-v7.S
index e70a73731eaa..2c559ac38142 100644
--- a/trunk/arch/arm/mm/proc-v7.S
+++ b/trunk/arch/arm/mm/proc-v7.S
@@ -363,13 +363,11 @@ __v7_setup:
orreq r10, r10, #1 << 6 @ set bit #6
mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
-#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP)
- ALT_SMP(cmp r6, #0x30) @ present prior to r3p0
- ALT_UP_B(1f)
+#ifdef CONFIG_ARM_ERRATA_751472
+ cmp r6, #0x30 @ present prior to r3p0
mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register
orrlt r10, r10, #1 << 11 @ set bit #11
mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register
-1:
#endif
3: mov r10, #0
diff --git a/trunk/arch/arm/plat-mxc/cpufreq.c b/trunk/arch/arm/plat-mxc/cpufreq.c
index 73db34bf588a..adbff706ef6f 100644
--- a/trunk/arch/arm/plat-mxc/cpufreq.c
+++ b/trunk/arch/arm/plat-mxc/cpufreq.c
@@ -98,7 +98,7 @@ static int mxc_set_target(struct cpufreq_policy *policy,
return ret;
}
-static int mxc_cpufreq_init(struct cpufreq_policy *policy)
+static int __init mxc_cpufreq_init(struct cpufreq_policy *policy)
{
int ret;
int i;
diff --git a/trunk/arch/arm/plat-mxc/include/mach/uncompress.h b/trunk/arch/arm/plat-mxc/include/mach/uncompress.h
index 477971b00930..88fd40452567 100644
--- a/trunk/arch/arm/plat-mxc/include/mach/uncompress.h
+++ b/trunk/arch/arm/plat-mxc/include/mach/uncompress.h
@@ -98,7 +98,6 @@ static __inline__ void __arch_decomp_setup(unsigned long arch_id)
case MACH_TYPE_PCM043:
case MACH_TYPE_LILLY1131:
case MACH_TYPE_VPR200:
- case MACH_TYPE_EUKREA_CPUIMX35SD:
uart_base = MX3X_UART1_BASE_ADDR;
break;
case MACH_TYPE_MAGX_ZN5:
diff --git a/trunk/arch/arm/plat-mxc/pwm.c b/trunk/arch/arm/plat-mxc/pwm.c
index e032717f7d02..845de59f07ed 100644
--- a/trunk/arch/arm/plat-mxc/pwm.c
+++ b/trunk/arch/arm/plat-mxc/pwm.c
@@ -77,15 +77,6 @@ int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns)
do_div(c, period_ns);
duty_cycles = c;
- /*
- * according to imx pwm RM, the real period value should be
- * PERIOD value in PWMPR plus 2.
- */
- if (period_cycles > 2)
- period_cycles -= 2;
- else
- period_cycles = 0;
-
writel(duty_cycles, pwm->mmio_base + MX3_PWMSAR);
writel(period_cycles, pwm->mmio_base + MX3_PWMPR);
diff --git a/trunk/arch/arm/plat-orion/gpio.c b/trunk/arch/arm/plat-orion/gpio.c
index 10d160888133..41ab97ebe4cf 100644
--- a/trunk/arch/arm/plat-orion/gpio.c
+++ b/trunk/arch/arm/plat-orion/gpio.c
@@ -384,16 +384,12 @@ void __init orion_gpio_init(int gpio_base, int ngpio,
struct orion_gpio_chip *ochip;
struct irq_chip_generic *gc;
struct irq_chip_type *ct;
- char gc_label[16];
if (orion_gpio_chip_count == ARRAY_SIZE(orion_gpio_chips))
return;
- snprintf(gc_label, sizeof(gc_label), "orion_gpio%d",
- orion_gpio_chip_count);
-
ochip = orion_gpio_chips + orion_gpio_chip_count;
- ochip->chip.label = kstrdup(gc_label, GFP_KERNEL);
+ ochip->chip.label = "orion_gpio";
ochip->chip.request = orion_gpio_request;
ochip->chip.direction_input = orion_gpio_direction_input;
ochip->chip.get = orion_gpio_get;
diff --git a/trunk/arch/arm/plat-samsung/include/plat/cpu-freq-core.h b/trunk/arch/arm/plat-samsung/include/plat/cpu-freq-core.h
index 95509d8eb140..dac4760c0f0a 100644
--- a/trunk/arch/arm/plat-samsung/include/plat/cpu-freq-core.h
+++ b/trunk/arch/arm/plat-samsung/include/plat/cpu-freq-core.h
@@ -202,6 +202,14 @@ extern int s3c_plltab_register(struct cpufreq_frequency_table *plls,
extern struct s3c_cpufreq_config *s3c_cpufreq_getconfig(void);
extern struct s3c_iotimings *s3c_cpufreq_getiotimings(void);
+extern void s3c2410_iotiming_debugfs(struct seq_file *seq,
+ struct s3c_cpufreq_config *cfg,
+ union s3c_iobank *iob);
+
+extern void s3c2412_iotiming_debugfs(struct seq_file *seq,
+ struct s3c_cpufreq_config *cfg,
+ union s3c_iobank *iob);
+
#ifdef CONFIG_CPU_FREQ_S3C24XX_DEBUGFS
#define s3c_cpufreq_debugfs_call(x) x
#else
@@ -218,10 +226,6 @@ extern void s3c2410_cpufreq_setrefresh(struct s3c_cpufreq_config *cfg);
extern void s3c2410_set_fvco(struct s3c_cpufreq_config *cfg);
#ifdef CONFIG_S3C2410_IOTIMING
-extern void s3c2410_iotiming_debugfs(struct seq_file *seq,
- struct s3c_cpufreq_config *cfg,
- union s3c_iobank *iob);
-
extern int s3c2410_iotiming_calc(struct s3c_cpufreq_config *cfg,
struct s3c_iotimings *iot);
@@ -231,7 +235,6 @@ extern int s3c2410_iotiming_get(struct s3c_cpufreq_config *cfg,
extern void s3c2410_iotiming_set(struct s3c_cpufreq_config *cfg,
struct s3c_iotimings *iot);
#else
-#define s3c2410_iotiming_debugfs NULL
#define s3c2410_iotiming_calc NULL
#define s3c2410_iotiming_get NULL
#define s3c2410_iotiming_set NULL
@@ -239,10 +242,8 @@ extern void s3c2410_iotiming_set(struct s3c_cpufreq_config *cfg,
/* S3C2412 compatible routines */
-#ifdef CONFIG_S3C2412_IOTIMING
-extern void s3c2412_iotiming_debugfs(struct seq_file *seq,
- struct s3c_cpufreq_config *cfg,
- union s3c_iobank *iob);
+extern int s3c2412_iotiming_get(struct s3c_cpufreq_config *cfg,
+ struct s3c_iotimings *timings);
extern int s3c2412_iotiming_get(struct s3c_cpufreq_config *cfg,
struct s3c_iotimings *timings);
@@ -252,12 +253,6 @@ extern int s3c2412_iotiming_calc(struct s3c_cpufreq_config *cfg,
extern void s3c2412_iotiming_set(struct s3c_cpufreq_config *cfg,
struct s3c_iotimings *iot);
-#else
-#define s3c2412_iotiming_debugfs NULL
-#define s3c2412_iotiming_calc NULL
-#define s3c2412_iotiming_get NULL
-#define s3c2412_iotiming_set NULL
-#endif /* CONFIG_S3C2412_IOTIMING */
#ifdef CONFIG_CPU_FREQ_S3C24XX_DEBUG
#define s3c_freq_dbg(x...) printk(KERN_INFO x)
diff --git a/trunk/arch/avr32/kernel/process.c b/trunk/arch/avr32/kernel/process.c
index ea3395750324..ef5a2a08fcca 100644
--- a/trunk/arch/avr32/kernel/process.c
+++ b/trunk/arch/avr32/kernel/process.c
@@ -34,12 +34,10 @@ void cpu_idle(void)
{
/* endless idle loop with no priority at all */
while (1) {
- tick_nohz_idle_enter();
- rcu_idle_enter();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched())
cpu_idle_sleep();
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
schedule();
preempt_disable();
diff --git a/trunk/arch/blackfin/kernel/process.c b/trunk/arch/blackfin/kernel/process.c
index 8dd0416673cb..6a80a9e9fc4a 100644
--- a/trunk/arch/blackfin/kernel/process.c
+++ b/trunk/arch/blackfin/kernel/process.c
@@ -88,12 +88,10 @@ void cpu_idle(void)
#endif
if (!idle)
idle = default_idle;
- tick_nohz_idle_enter();
- rcu_idle_enter();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched())
idle();
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
schedule();
preempt_disable();
diff --git a/trunk/arch/cris/arch-v32/kernel/time.c b/trunk/arch/cris/arch-v32/kernel/time.c
index 6773fc83a670..bb978ede8985 100644
--- a/trunk/arch/cris/arch-v32/kernel/time.c
+++ b/trunk/arch/cris/arch-v32/kernel/time.c
@@ -47,12 +47,14 @@ static struct clocksource cont_rotime = {
.rating = 300,
.read = read_cont_rotime,
.mask = CLOCKSOURCE_MASK(32),
+ .shift = 10,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
static int __init etrax_init_cont_rotime(void)
{
- clocksource_register_khz(&cont_rotime, 100000);
+ cont_rotime.mult = clocksource_khz2mult(100000, cont_rotime.shift);
+ clocksource_register(&cont_rotime);
return 0;
}
arch_initcall(etrax_init_cont_rotime);
diff --git a/trunk/arch/ia64/Kconfig b/trunk/arch/ia64/Kconfig
index 3b7a7c483785..27489b6dd533 100644
--- a/trunk/arch/ia64/Kconfig
+++ b/trunk/arch/ia64/Kconfig
@@ -23,9 +23,6 @@ config IA64
select HAVE_ARCH_TRACEHOOK
select HAVE_DMA_API_DEBUG
select HAVE_GENERIC_HARDIRQS
- select HAVE_MEMBLOCK
- select HAVE_MEMBLOCK_NODE_MAP
- select ARCH_DISCARD_MEMBLOCK
select GENERIC_IRQ_PROBE
select GENERIC_PENDING_IRQ if SMP
select IRQ_PER_CPU
@@ -477,6 +474,9 @@ config NODES_SHIFT
MAX_NUMNODES will be 2^(This value).
If in doubt, use the default.
+config ARCH_POPULATES_NODE_MAP
+ def_bool y
+
# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
# VIRTUAL_MEM_MAP has been retained for historical reasons.
config VIRTUAL_MEM_MAP
diff --git a/trunk/arch/ia64/include/asm/cputime.h b/trunk/arch/ia64/include/asm/cputime.h
index 3deac956d325..6073b187528a 100644
--- a/trunk/arch/ia64/include/asm/cputime.h
+++ b/trunk/arch/ia64/include/asm/cputime.h
@@ -26,53 +26,59 @@
#include
#include
-typedef u64 __nocast cputime_t;
-typedef u64 __nocast cputime64_t;
+typedef u64 cputime_t;
+typedef u64 cputime64_t;
+#define cputime_zero ((cputime_t)0)
#define cputime_one_jiffy jiffies_to_cputime(1)
+#define cputime_max ((~((cputime_t)0) >> 1) - 1)
+#define cputime_add(__a, __b) ((__a) + (__b))
+#define cputime_sub(__a, __b) ((__a) - (__b))
+#define cputime_div(__a, __n) ((__a) / (__n))
+#define cputime_halve(__a) ((__a) >> 1)
+#define cputime_eq(__a, __b) ((__a) == (__b))
+#define cputime_gt(__a, __b) ((__a) > (__b))
+#define cputime_ge(__a, __b) ((__a) >= (__b))
+#define cputime_lt(__a, __b) ((__a) < (__b))
+#define cputime_le(__a, __b) ((__a) <= (__b))
+
+#define cputime64_zero ((cputime64_t)0)
+#define cputime64_add(__a, __b) ((__a) + (__b))
+#define cputime64_sub(__a, __b) ((__a) - (__b))
+#define cputime_to_cputime64(__ct) (__ct)
/*
* Convert cputime <-> jiffies (HZ)
*/
-#define cputime_to_jiffies(__ct) \
- ((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
-#define jiffies_to_cputime(__jif) \
- (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
-#define cputime64_to_jiffies64(__ct) \
- ((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
-#define jiffies64_to_cputime64(__jif) \
- (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime_to_jiffies(__ct) ((__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies_to_cputime(__jif) ((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime64_to_jiffies64(__ct) ((__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies64_to_cputime64(__jif) ((__jif) * (NSEC_PER_SEC / HZ))
/*
* Convert cputime <-> microseconds
*/
-#define cputime_to_usecs(__ct) \
- ((__force u64)(__ct) / NSEC_PER_USEC)
-#define usecs_to_cputime(__usecs) \
- (__force cputime_t)((__usecs) * NSEC_PER_USEC)
-#define usecs_to_cputime64(__usecs) \
- (__force cputime64_t)((__usecs) * NSEC_PER_USEC)
+#define cputime_to_usecs(__ct) ((__ct) / NSEC_PER_USEC)
+#define usecs_to_cputime(__usecs) ((__usecs) * NSEC_PER_USEC)
/*
* Convert cputime <-> seconds
*/
-#define cputime_to_secs(__ct) \
- ((__force u64)(__ct) / NSEC_PER_SEC)
-#define secs_to_cputime(__secs) \
- (__force cputime_t)((__secs) * NSEC_PER_SEC)
+#define cputime_to_secs(__ct) ((__ct) / NSEC_PER_SEC)
+#define secs_to_cputime(__secs) ((__secs) * NSEC_PER_SEC)
/*
* Convert cputime <-> timespec (nsec)
*/
static inline cputime_t timespec_to_cputime(const struct timespec *val)
{
- u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
- return (__force cputime_t) ret;
+ cputime_t ret = val->tv_sec * NSEC_PER_SEC;
+ return (ret + val->tv_nsec);
}
static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
{
- val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
- val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
+ val->tv_sec = ct / NSEC_PER_SEC;
+ val->tv_nsec = ct % NSEC_PER_SEC;
}
/*
@@ -80,28 +86,25 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
*/
static inline cputime_t timeval_to_cputime(struct timeval *val)
{
- u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
- return (__force cputime_t) ret;
+ cputime_t ret = val->tv_sec * NSEC_PER_SEC;
+ return (ret + val->tv_usec * NSEC_PER_USEC);
}
static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
{
- val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
- val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
+ val->tv_sec = ct / NSEC_PER_SEC;
+ val->tv_usec = (ct % NSEC_PER_SEC) / NSEC_PER_USEC;
}
/*
* Convert cputime <-> clock (USER_HZ)
*/
-#define cputime_to_clock_t(__ct) \
- ((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
-#define clock_t_to_cputime(__x) \
- (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
+#define cputime_to_clock_t(__ct) ((__ct) / (NSEC_PER_SEC / USER_HZ))
+#define clock_t_to_cputime(__x) ((__x) * (NSEC_PER_SEC / USER_HZ))
/*
* Convert cputime64 to clock.
*/
-#define cputime64_to_clock_t(__ct) \
- cputime_to_clock_t((__force cputime_t)__ct)
+#define cputime64_to_clock_t(__ct) cputime_to_clock_t((cputime_t)__ct)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* __IA64_CPUTIME_H */
diff --git a/trunk/arch/ia64/mm/contig.c b/trunk/arch/ia64/mm/contig.c
index 1516d1dc11fd..f114a3b14c6a 100644
--- a/trunk/arch/ia64/mm/contig.c
+++ b/trunk/arch/ia64/mm/contig.c
@@ -16,7 +16,6 @@
*/
#include
#include
-#include
#include
#include
#include
@@ -349,7 +348,7 @@ paging_init (void)
printk("Virtual mem_map starts at 0x%p\n", mem_map);
}
#else /* !CONFIG_VIRTUAL_MEM_MAP */
- memblock_add_node(0, PFN_PHYS(max_low_pfn), 0);
+ add_active_range(0, 0, max_low_pfn);
free_area_init_nodes(max_zone_pfns);
#endif /* !CONFIG_VIRTUAL_MEM_MAP */
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
diff --git a/trunk/arch/ia64/mm/init.c b/trunk/arch/ia64/mm/init.c
index 13df239dbed1..00cb0e26c64e 100644
--- a/trunk/arch/ia64/mm/init.c
+++ b/trunk/arch/ia64/mm/init.c
@@ -10,7 +10,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -558,7 +557,8 @@ int __init register_active_ranges(u64 start, u64 len, int nid)
#endif
if (start < end)
- memblock_add_node(__pa(start), end - start, nid);
+ add_active_range(nid, __pa(start) >> PAGE_SHIFT,
+ __pa(end) >> PAGE_SHIFT);
return 0;
}
diff --git a/trunk/arch/m68k/platform/68328/timers.c b/trunk/arch/m68k/platform/68328/timers.c
index f2678866067b..309f725995bf 100644
--- a/trunk/arch/m68k/platform/68328/timers.c
+++ b/trunk/arch/m68k/platform/68328/timers.c
@@ -93,6 +93,7 @@ static struct clocksource m68328_clk = {
.name = "timer",
.rating = 250,
.read = m68328_read_clk,
+ .shift = 20,
.mask = CLOCKSOURCE_MASK(32),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
@@ -114,7 +115,8 @@ void hw_timer_init(void)
/* Enable timer 1 */
TCTL |= TCTL_TEN;
- clocksource_register_hz(&m68328_clk, TICKS_PER_JIFFY*HZ);
+ m68328_clk.mult = clocksource_hz2mult(TICKS_PER_JIFFY*HZ, m68328_clk.shift);
+ clocksource_register(&m68328_clk);
}
/***************************************************************************/
diff --git a/trunk/arch/m68k/platform/coldfire/dma_timer.c b/trunk/arch/m68k/platform/coldfire/dma_timer.c
index 235ad57c4707..a5f562823d7a 100644
--- a/trunk/arch/m68k/platform/coldfire/dma_timer.c
+++ b/trunk/arch/m68k/platform/coldfire/dma_timer.c
@@ -44,6 +44,7 @@ static struct clocksource clocksource_cf_dt = {
.rating = 200,
.read = cf_dt_get_cycles,
.mask = CLOCKSOURCE_MASK(32),
+ .shift = 20,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
@@ -59,7 +60,9 @@ static int __init init_cf_dt_clocksource(void)
__raw_writeb(0x00, DTER0);
__raw_writel(0x00000000, DTRR0);
__raw_writew(DMA_DTMR_CLK_DIV_16 | DMA_DTMR_ENABLE, DTMR0);
- return clocksource_register_hz(&clocksource_cf_dt, DMA_FREQ);
+ clocksource_cf_dt.mult = clocksource_hz2mult(DMA_FREQ,
+ clocksource_cf_dt.shift);
+ return clocksource_register(&clocksource_cf_dt);
}
arch_initcall(init_cf_dt_clocksource);
diff --git a/trunk/arch/m68k/platform/coldfire/pit.c b/trunk/arch/m68k/platform/coldfire/pit.c
index 02663d25822d..c2b980926bec 100644
--- a/trunk/arch/m68k/platform/coldfire/pit.c
+++ b/trunk/arch/m68k/platform/coldfire/pit.c
@@ -144,6 +144,7 @@ static struct clocksource pit_clk = {
.name = "pit",
.rating = 100,
.read = pit_read_clk,
+ .shift = 20,
.mask = CLOCKSOURCE_MASK(32),
};
@@ -161,7 +162,8 @@ void hw_timer_init(void)
setup_irq(MCFINT_VECBASE + MCFINT_PIT1, &pit_irq);
- clocksource_register_hz(&pit_clk, FREQ);
+ pit_clk.mult = clocksource_hz2mult(FREQ, pit_clk.shift);
+ clocksource_register(&pit_clk);
}
/***************************************************************************/
diff --git a/trunk/arch/m68k/platform/coldfire/sltimers.c b/trunk/arch/m68k/platform/coldfire/sltimers.c
index b7f822b552bb..6a85daf9a7fd 100644
--- a/trunk/arch/m68k/platform/coldfire/sltimers.c
+++ b/trunk/arch/m68k/platform/coldfire/sltimers.c
@@ -114,6 +114,7 @@ static struct clocksource mcfslt_clk = {
.name = "slt",
.rating = 250,
.read = mcfslt_read_clk,
+ .shift = 20,
.mask = CLOCKSOURCE_MASK(32),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
@@ -135,7 +136,8 @@ void hw_timer_init(void)
setup_irq(MCF_IRQ_TIMER, &mcfslt_timer_irq);
- clocksource_register_hz(&mcfslt_clk, MCF_BUSCLK);
+ mcfslt_clk.mult = clocksource_hz2mult(MCF_BUSCLK, mcfslt_clk.shift);
+ clocksource_register(&mcfslt_clk);
#ifdef CONFIG_HIGHPROFILE
mcfslt_profile_init();
diff --git a/trunk/arch/m68k/platform/coldfire/timers.c b/trunk/arch/m68k/platform/coldfire/timers.c
index 0d90da32fcdb..60242f65fea9 100644
--- a/trunk/arch/m68k/platform/coldfire/timers.c
+++ b/trunk/arch/m68k/platform/coldfire/timers.c
@@ -88,6 +88,7 @@ static struct clocksource mcftmr_clk = {
.name = "tmr",
.rating = 250,
.read = mcftmr_read_clk,
+ .shift = 20,
.mask = CLOCKSOURCE_MASK(32),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
@@ -108,7 +109,8 @@ void hw_timer_init(void)
__raw_writew(MCFTIMER_TMR_ENORI | MCFTIMER_TMR_CLK16 |
MCFTIMER_TMR_RESTART | MCFTIMER_TMR_ENABLE, TA(MCFTIMER_TMR));
- clocksource_register_hz(&mcftmr_clk, FREQ);
+ mcftmr_clk.mult = clocksource_hz2mult(FREQ, mcftmr_clk.shift);
+ clocksource_register(&mcftmr_clk);
setup_irq(MCF_IRQ_TIMER, &mcftmr_timer_irq);
diff --git a/trunk/arch/microblaze/include/asm/memblock.h b/trunk/arch/microblaze/include/asm/memblock.h
new file mode 100644
index 000000000000..20a8e257c77f
--- /dev/null
+++ b/trunk/arch/microblaze/include/asm/memblock.h
@@ -0,0 +1,14 @@
+/*
+ * Copyright (C) 2008 Michal Simek
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef _ASM_MICROBLAZE_MEMBLOCK_H
+#define _ASM_MICROBLAZE_MEMBLOCK_H
+
+#endif /* _ASM_MICROBLAZE_MEMBLOCK_H */
+
+
diff --git a/trunk/arch/microblaze/kernel/process.c b/trunk/arch/microblaze/kernel/process.c
index 7dcb5bfffb75..95cc295976a7 100644
--- a/trunk/arch/microblaze/kernel/process.c
+++ b/trunk/arch/microblaze/kernel/process.c
@@ -103,12 +103,10 @@ void cpu_idle(void)
if (!idle)
idle = default_idle;
- tick_nohz_idle_enter();
- rcu_idle_enter();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched())
idle();
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
schedule();
diff --git a/trunk/arch/microblaze/kernel/prom.c b/trunk/arch/microblaze/kernel/prom.c
index 80d314e81901..977484add216 100644
--- a/trunk/arch/microblaze/kernel/prom.c
+++ b/trunk/arch/microblaze/kernel/prom.c
@@ -122,6 +122,7 @@ void __init early_init_devtree(void *params)
of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line);
/* Scan memory nodes and rebuild MEMBLOCKs */
+ memblock_init();
of_scan_flat_dt(early_init_dt_scan_root, NULL);
of_scan_flat_dt(early_init_dt_scan_memory, NULL);
@@ -129,7 +130,7 @@ void __init early_init_devtree(void *params)
strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
parse_early_param();
- memblock_allow_resize();
+ memblock_analyze();
pr_debug("Phys. mem: %lx\n", (unsigned long) memblock_phys_mem_size());
diff --git a/trunk/arch/mips/Kconfig b/trunk/arch/mips/Kconfig
index 9c652eb68aaa..d46f1da18a3c 100644
--- a/trunk/arch/mips/Kconfig
+++ b/trunk/arch/mips/Kconfig
@@ -25,9 +25,6 @@ config MIPS
select GENERIC_IRQ_SHOW
select HAVE_ARCH_JUMP_LABEL
select IRQ_FORCED_THREADING
- select HAVE_MEMBLOCK
- select HAVE_MEMBLOCK_NODE_MAP
- select ARCH_DISCARD_MEMBLOCK
menu "Machine selection"
@@ -2067,6 +2064,9 @@ config ARCH_DISCONTIGMEM_ENABLE
or have huge holes in the physical address space for other reasons.
See for more.
+config ARCH_POPULATES_NODE_MAP
+ def_bool y
+
config ARCH_SPARSEMEM_ENABLE
bool
select SPARSEMEM_STATIC
diff --git a/trunk/arch/mips/kernel/process.c b/trunk/arch/mips/kernel/process.c
index 7955409051c4..c47f96e453c0 100644
--- a/trunk/arch/mips/kernel/process.c
+++ b/trunk/arch/mips/kernel/process.c
@@ -56,8 +56,7 @@ void __noreturn cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
- tick_nohz_idle_enter();
- rcu_idle_enter();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched() && cpu_online(cpu)) {
#ifdef CONFIG_MIPS_MT_SMTC
extern void smtc_idle_loop_hook(void);
@@ -78,8 +77,7 @@ void __noreturn cpu_idle(void)
system_state == SYSTEM_BOOTING))
play_dead();
#endif
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
schedule();
preempt_disable();
diff --git a/trunk/arch/mips/kernel/setup.c b/trunk/arch/mips/kernel/setup.c
index b1cb8f87d7b4..84af26ab2212 100644
--- a/trunk/arch/mips/kernel/setup.c
+++ b/trunk/arch/mips/kernel/setup.c
@@ -14,7 +14,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -353,7 +352,7 @@ static void __init bootmem_init(void)
continue;
#endif
- memblock_add_node(PFN_PHYS(start), PFN_PHYS(end - start), 0);
+ add_active_range(0, start, end);
}
/*
diff --git a/trunk/arch/mips/sgi-ip27/ip27-memory.c b/trunk/arch/mips/sgi-ip27/ip27-memory.c
index b105eca3c020..bc1297109cc5 100644
--- a/trunk/arch/mips/sgi-ip27/ip27-memory.c
+++ b/trunk/arch/mips/sgi-ip27/ip27-memory.c
@@ -12,7 +12,6 @@
*/
#include
#include
-#include
#include
#include
#include
@@ -382,8 +381,8 @@ static void __init szmem(void)
continue;
}
num_physpages += slot_psize;
- memblock_add_node(PFN_PHYS(slot_getbasepfn(node, slot)),
- PFN_PHYS(slot_psize), node);
+ add_active_range(node, slot_getbasepfn(node, slot),
+ slot_getbasepfn(node, slot) + slot_psize);
}
}
}
diff --git a/trunk/arch/openrisc/include/asm/memblock.h b/trunk/arch/openrisc/include/asm/memblock.h
new file mode 100644
index 000000000000..bbe5a1c788cb
--- /dev/null
+++ b/trunk/arch/openrisc/include/asm/memblock.h
@@ -0,0 +1,24 @@
+/*
+ * OpenRISC Linux
+ *
+ * Linux architectural port borrowing liberally from similar works of
+ * others. All original copyrights apply as per the original source
+ * declaration.
+ *
+ * OpenRISC implementation:
+ * Copyright (C) 2003 Matjaz Breskvar
+ * Copyright (C) 2010-2011 Jonas Bonn
+ * et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __ASM_OPENRISC_MEMBLOCK_H
+#define __ASM_OPENRISC_MEMBLOCK_H
+
+/* empty */
+
+#endif /* __ASM_OPENRISC_MEMBLOCK_H */
diff --git a/trunk/arch/openrisc/kernel/idle.c b/trunk/arch/openrisc/kernel/idle.c
index e5fc78877830..d5bc5f813e89 100644
--- a/trunk/arch/openrisc/kernel/idle.c
+++ b/trunk/arch/openrisc/kernel/idle.c
@@ -51,8 +51,7 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
- tick_nohz_idle_enter();
- rcu_idle_enter();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched()) {
check_pgt_cache();
@@ -70,8 +69,7 @@ void cpu_idle(void)
set_thread_flag(TIF_POLLING_NRFLAG);
}
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
schedule();
preempt_disable();
diff --git a/trunk/arch/openrisc/kernel/prom.c b/trunk/arch/openrisc/kernel/prom.c
index 3d4478f6c942..1bb58ba89afa 100644
--- a/trunk/arch/openrisc/kernel/prom.c
+++ b/trunk/arch/openrisc/kernel/prom.c
@@ -76,13 +76,14 @@ void __init early_init_devtree(void *params)
of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line);
/* Scan memory nodes and rebuild MEMBLOCKs */
+ memblock_init();
of_scan_flat_dt(early_init_dt_scan_root, NULL);
of_scan_flat_dt(early_init_dt_scan_memory, NULL);
/* Save command line for /proc/cmdline and then parse parameters */
strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
- memblock_allow_resize();
+ memblock_analyze();
/* We must copy the flattend device tree from init memory to regular
* memory because the device tree references the strings in it
diff --git a/trunk/arch/parisc/kernel/time.c b/trunk/arch/parisc/kernel/time.c
index 7c0774397b89..45b7389d77aa 100644
--- a/trunk/arch/parisc/kernel/time.c
+++ b/trunk/arch/parisc/kernel/time.c
@@ -198,6 +198,8 @@ static struct clocksource clocksource_cr16 = {
.rating = 300,
.read = read_cr16,
.mask = CLOCKSOURCE_MASK(BITS_PER_LONG),
+ .mult = 0, /* to be set */
+ .shift = 22,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
@@ -268,5 +270,7 @@ void __init time_init(void)
/* register at clocksource framework */
current_cr16_khz = PAGE0->mem_10msec/10; /* kHz */
- clocksource_register_khz(&clocksource_cr16, current_cr16_khz);
+ clocksource_cr16.mult = clocksource_khz2mult(current_cr16_khz,
+ clocksource_cr16.shift);
+ clocksource_register(&clocksource_cr16);
}
diff --git a/trunk/arch/powerpc/Kconfig b/trunk/arch/powerpc/Kconfig
index ead0bc68439d..951e18f5335b 100644
--- a/trunk/arch/powerpc/Kconfig
+++ b/trunk/arch/powerpc/Kconfig
@@ -117,7 +117,6 @@ config PPC
select HAVE_KRETPROBES
select HAVE_ARCH_TRACEHOOK
select HAVE_MEMBLOCK
- select HAVE_MEMBLOCK_NODE_MAP
select HAVE_DMA_ATTRS
select HAVE_DMA_API_DEBUG
select USE_GENERIC_SMP_HELPERS if SMP
@@ -422,6 +421,9 @@ config ARCH_SPARSEMEM_DEFAULT
def_bool y
depends on (SMP && PPC_PSERIES) || PPC_PS3
+config ARCH_POPULATES_NODE_MAP
+ def_bool y
+
config SYS_SUPPORTS_HUGETLBFS
bool
diff --git a/trunk/arch/powerpc/include/asm/cputime.h b/trunk/arch/powerpc/include/asm/cputime.h
index 6ec1c380a4d6..1cf20bdfbeca 100644
--- a/trunk/arch/powerpc/include/asm/cputime.h
+++ b/trunk/arch/powerpc/include/asm/cputime.h
@@ -29,8 +29,25 @@ static inline void setup_cputime_one_jiffy(void) { }
#include
#include
-typedef u64 __nocast cputime_t;
-typedef u64 __nocast cputime64_t;
+typedef u64 cputime_t;
+typedef u64 cputime64_t;
+
+#define cputime_zero ((cputime_t)0)
+#define cputime_max ((~((cputime_t)0) >> 1) - 1)
+#define cputime_add(__a, __b) ((__a) + (__b))
+#define cputime_sub(__a, __b) ((__a) - (__b))
+#define cputime_div(__a, __n) ((__a) / (__n))
+#define cputime_halve(__a) ((__a) >> 1)
+#define cputime_eq(__a, __b) ((__a) == (__b))
+#define cputime_gt(__a, __b) ((__a) > (__b))
+#define cputime_ge(__a, __b) ((__a) >= (__b))
+#define cputime_lt(__a, __b) ((__a) < (__b))
+#define cputime_le(__a, __b) ((__a) <= (__b))
+
+#define cputime64_zero ((cputime64_t)0)
+#define cputime64_add(__a, __b) ((__a) + (__b))
+#define cputime64_sub(__a, __b) ((__a) - (__b))
+#define cputime_to_cputime64(__ct) (__ct)
#ifdef __KERNEL__
@@ -48,7 +65,7 @@ DECLARE_PER_CPU(unsigned long, cputime_scaled_last_delta);
static inline unsigned long cputime_to_jiffies(const cputime_t ct)
{
- return mulhdu((__force u64) ct, __cputime_jiffies_factor);
+ return mulhdu(ct, __cputime_jiffies_factor);
}
/* Estimate the scaled cputime by scaling the real cputime based on
@@ -57,15 +74,14 @@ static inline cputime_t cputime_to_scaled(const cputime_t ct)
{
if (cpu_has_feature(CPU_FTR_SPURR) &&
__get_cpu_var(cputime_last_delta))
- return (__force u64) ct *
- __get_cpu_var(cputime_scaled_last_delta) /
- __get_cpu_var(cputime_last_delta);
+ return ct * __get_cpu_var(cputime_scaled_last_delta) /
+ __get_cpu_var(cputime_last_delta);
return ct;
}
static inline cputime_t jiffies_to_cputime(const unsigned long jif)
{
- u64 ct;
+ cputime_t ct;
unsigned long sec;
/* have to be a little careful about overflow */
@@ -77,7 +93,7 @@ static inline cputime_t jiffies_to_cputime(const unsigned long jif)
}
if (sec)
ct += (cputime_t) sec * tb_ticks_per_sec;
- return (__force cputime_t) ct;
+ return ct;
}
static inline void setup_cputime_one_jiffy(void)
@@ -87,7 +103,7 @@ static inline void setup_cputime_one_jiffy(void)
static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
{
- u64 ct;
+ cputime_t ct;
u64 sec;
/* have to be a little careful about overflow */
@@ -98,13 +114,13 @@ static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
do_div(ct, HZ);
}
if (sec)
- ct += (u64) sec * tb_ticks_per_sec;
- return (__force cputime64_t) ct;
+ ct += (cputime_t) sec * tb_ticks_per_sec;
+ return ct;
}
static inline u64 cputime64_to_jiffies64(const cputime_t ct)
{
- return mulhdu((__force u64) ct, __cputime_jiffies_factor);
+ return mulhdu(ct, __cputime_jiffies_factor);
}
/*
@@ -114,12 +130,12 @@ extern u64 __cputime_msec_factor;
static inline unsigned long cputime_to_usecs(const cputime_t ct)
{
- return mulhdu((__force u64) ct, __cputime_msec_factor) * USEC_PER_MSEC;
+ return mulhdu(ct, __cputime_msec_factor) * USEC_PER_MSEC;
}
static inline cputime_t usecs_to_cputime(const unsigned long us)
{
- u64 ct;
+ cputime_t ct;
unsigned long sec;
/* have to be a little careful about overflow */
@@ -131,11 +147,9 @@ static inline cputime_t usecs_to_cputime(const unsigned long us)
}
if (sec)
ct += (cputime_t) sec * tb_ticks_per_sec;
- return (__force cputime_t) ct;
+ return ct;
}
-#define usecs_to_cputime64(us) usecs_to_cputime(us)
-
/*
* Convert cputime <-> seconds
*/
@@ -143,12 +157,12 @@ extern u64 __cputime_sec_factor;
static inline unsigned long cputime_to_secs(const cputime_t ct)
{
- return mulhdu((__force u64) ct, __cputime_sec_factor);
+ return mulhdu(ct, __cputime_sec_factor);
}
static inline cputime_t secs_to_cputime(const unsigned long sec)
{
- return (__force cputime_t)((u64) sec * tb_ticks_per_sec);
+ return (cputime_t) sec * tb_ticks_per_sec;
}
/*
@@ -156,7 +170,7 @@ static inline cputime_t secs_to_cputime(const unsigned long sec)
*/
static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p)
{
- u64 x = (__force u64) ct;
+ u64 x = ct;
unsigned int frac;
frac = do_div(x, tb_ticks_per_sec);
@@ -168,11 +182,11 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p)
static inline cputime_t timespec_to_cputime(const struct timespec *p)
{
- u64 ct;
+ cputime_t ct;
ct = (u64) p->tv_nsec * tb_ticks_per_sec;
do_div(ct, 1000000000);
- return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec);
+ return ct + (u64) p->tv_sec * tb_ticks_per_sec;
}
/*
@@ -180,7 +194,7 @@ static inline cputime_t timespec_to_cputime(const struct timespec *p)
*/
static inline void cputime_to_timeval(const cputime_t ct, struct timeval *p)
{
- u64 x = (__force u64) ct;
+ u64 x = ct;
unsigned int frac;
frac = do_div(x, tb_ticks_per_sec);
@@ -192,11 +206,11 @@ static inline void cputime_to_timeval(const cputime_t ct, struct timeval *p)
static inline cputime_t timeval_to_cputime(const struct timeval *p)
{
- u64 ct;
+ cputime_t ct;
ct = (u64) p->tv_usec * tb_ticks_per_sec;
do_div(ct, 1000000);
- return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec);
+ return ct + (u64) p->tv_sec * tb_ticks_per_sec;
}
/*
@@ -206,12 +220,12 @@ extern u64 __cputime_clockt_factor;
static inline unsigned long cputime_to_clock_t(const cputime_t ct)
{
- return mulhdu((__force u64) ct, __cputime_clockt_factor);
+ return mulhdu(ct, __cputime_clockt_factor);
}
static inline cputime_t clock_t_to_cputime(const unsigned long clk)
{
- u64 ct;
+ cputime_t ct;
unsigned long sec;
/* have to be a little careful about overflow */
@@ -222,8 +236,8 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
do_div(ct, USER_HZ);
}
if (sec)
- ct += (u64) sec * tb_ticks_per_sec;
- return (__force cputime_t) ct;
+ ct += (cputime_t) sec * tb_ticks_per_sec;
+ return ct;
}
#define cputime64_to_clock_t(ct) cputime_to_clock_t((cputime_t)(ct))
diff --git a/trunk/arch/powerpc/include/asm/kvm_book3s.h b/trunk/arch/powerpc/include/asm/kvm_book3s.h
index 69c7377d2071..d4df013ad779 100644
--- a/trunk/arch/powerpc/include/asm/kvm_book3s.h
+++ b/trunk/arch/powerpc/include/asm/kvm_book3s.h
@@ -381,6 +381,39 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
}
#endif
+static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
+ unsigned long pte_index)
+{
+ unsigned long rb, va_low;
+
+ rb = (v & ~0x7fUL) << 16; /* AVA field */
+ va_low = pte_index >> 3;
+ if (v & HPTE_V_SECONDARY)
+ va_low = ~va_low;
+ /* xor vsid from AVA */
+ if (!(v & HPTE_V_1TB_SEG))
+ va_low ^= v >> 12;
+ else
+ va_low ^= v >> 24;
+ va_low &= 0x7ff;
+ if (v & HPTE_V_LARGE) {
+ rb |= 1; /* L field */
+ if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+ (r & 0xff000)) {
+ /* non-16MB large page, must be 64k */
+ /* (masks depend on page size) */
+ rb |= 0x1000; /* page encoding in LP field */
+ rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
+ rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
+ }
+ } else {
+ /* 4kB page */
+ rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
+ }
+ rb |= (v >> 54) & 0x300; /* B field */
+ return rb;
+}
+
/* Magic register values loaded into r3 and r4 before the 'sc' assembly
* instruction for the OSI hypercalls */
#define OSI_SC_MAGIC_R3 0x113724FA
diff --git a/trunk/arch/powerpc/include/asm/kvm_book3s_64.h b/trunk/arch/powerpc/include/asm/kvm_book3s_64.h
index d0ac94f98f9e..e43fe42b9875 100644
--- a/trunk/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/trunk/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -29,37 +29,4 @@ static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu)
#define SPAPR_TCE_SHIFT 12
-static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
- unsigned long pte_index)
-{
- unsigned long rb, va_low;
-
- rb = (v & ~0x7fUL) << 16; /* AVA field */
- va_low = pte_index >> 3;
- if (v & HPTE_V_SECONDARY)
- va_low = ~va_low;
- /* xor vsid from AVA */
- if (!(v & HPTE_V_1TB_SEG))
- va_low ^= v >> 12;
- else
- va_low ^= v >> 24;
- va_low &= 0x7ff;
- if (v & HPTE_V_LARGE) {
- rb |= 1; /* L field */
- if (cpu_has_feature(CPU_FTR_ARCH_206) &&
- (r & 0xff000)) {
- /* non-16MB large page, must be 64k */
- /* (masks depend on page size) */
- rb |= 0x1000; /* page encoding in LP field */
- rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
- rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
- }
- } else {
- /* 4kB page */
- rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
- }
- rb |= (v >> 54) & 0x300; /* B field */
- return rb;
-}
-
#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/trunk/arch/powerpc/include/asm/memblock.h b/trunk/arch/powerpc/include/asm/memblock.h
new file mode 100644
index 000000000000..43efc345065e
--- /dev/null
+++ b/trunk/arch/powerpc/include/asm/memblock.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_POWERPC_MEMBLOCK_H
+#define _ASM_POWERPC_MEMBLOCK_H
+
+#include
+
+#define MEMBLOCK_DBG(fmt...) udbg_printf(fmt)
+
+#endif /* _ASM_POWERPC_MEMBLOCK_H */
diff --git a/trunk/arch/powerpc/kernel/idle.c b/trunk/arch/powerpc/kernel/idle.c
index 9c3cd490b1bd..39a2baa6ad58 100644
--- a/trunk/arch/powerpc/kernel/idle.c
+++ b/trunk/arch/powerpc/kernel/idle.c
@@ -46,12 +46,6 @@ static int __init powersave_off(char *arg)
}
__setup("powersave=off", powersave_off);
-#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_TRACEPOINTS)
-static const bool idle_uses_rcu = 1;
-#else
-static const bool idle_uses_rcu;
-#endif
-
/*
* The body of the idle task.
*/
@@ -62,10 +56,7 @@ void cpu_idle(void)
set_thread_flag(TIF_POLLING_NRFLAG);
while (1) {
- tick_nohz_idle_enter();
- if (!idle_uses_rcu)
- rcu_idle_enter();
-
+ tick_nohz_stop_sched_tick(1);
while (!need_resched() && !cpu_should_die()) {
ppc64_runlatch_off();
@@ -102,9 +93,7 @@ void cpu_idle(void)
HMT_medium();
ppc64_runlatch_on();
- if (!idle_uses_rcu)
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
if (cpu_should_die())
cpu_die();
diff --git a/trunk/arch/powerpc/kernel/machine_kexec.c b/trunk/arch/powerpc/kernel/machine_kexec.c
index a2158a395d96..9ce1672afb59 100644
--- a/trunk/arch/powerpc/kernel/machine_kexec.c
+++ b/trunk/arch/powerpc/kernel/machine_kexec.c
@@ -107,6 +107,9 @@ void __init reserve_crashkernel(void)
unsigned long long crash_size, crash_base;
int ret;
+ /* this is necessary because of memblock_phys_mem_size() */
+ memblock_analyze();
+
/* use common parsing */
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
&crash_size, &crash_base);
diff --git a/trunk/arch/powerpc/kernel/prom.c b/trunk/arch/powerpc/kernel/prom.c
index abe405dab34d..fa1235b0503b 100644
--- a/trunk/arch/powerpc/kernel/prom.c
+++ b/trunk/arch/powerpc/kernel/prom.c
@@ -733,6 +733,8 @@ void __init early_init_devtree(void *params)
of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line);
/* Scan memory nodes and rebuild MEMBLOCKs */
+ memblock_init();
+
of_scan_flat_dt(early_init_dt_scan_root, NULL);
of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
@@ -754,14 +756,20 @@ void __init early_init_devtree(void *params)
early_reserve_mem();
phyp_dump_reserve_mem();
- /*
- * Ensure that total memory size is page-aligned, because otherwise
- * mark_bootmem() gets upset.
- */
- limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
+ limit = memory_limit;
+ if (! limit) {
+ phys_addr_t memsize;
+
+ /* Ensure that total memory size is page-aligned, because
+ * otherwise mark_bootmem() gets upset. */
+ memblock_analyze();
+ memsize = memblock_phys_mem_size();
+ if ((memsize & PAGE_MASK) != memsize)
+ limit = memsize & PAGE_MASK;
+ }
memblock_enforce_memory_limit(limit);
- memblock_allow_resize();
+ memblock_analyze();
memblock_dump_all();
DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
diff --git a/trunk/arch/powerpc/kvm/book3s_hv.c b/trunk/arch/powerpc/kvm/book3s_hv.c
index 336983da9e72..0cb137a9b038 100644
--- a/trunk/arch/powerpc/kvm/book3s_hv.c
+++ b/trunk/arch/powerpc/kvm/book3s_hv.c
@@ -538,7 +538,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
tpaca->kvm_hstate.napping = 0;
vcpu->cpu = vc->pcpu;
smp_wmb();
-#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
+#ifdef CONFIG_PPC_ICP_NATIVE
if (vcpu->arch.ptid) {
tpaca->cpu_start = 0x80;
wmb();
diff --git a/trunk/arch/powerpc/kvm/book3s_pr.c b/trunk/arch/powerpc/kvm/book3s_pr.c
index e2cfb9e1e20e..3c791e1eb675 100644
--- a/trunk/arch/powerpc/kvm/book3s_pr.c
+++ b/trunk/arch/powerpc/kvm/book3s_pr.c
@@ -658,12 +658,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
ulong cmd = kvmppc_get_gpr(vcpu, 3);
int i;
-#ifdef CONFIG_KVM_BOOK3S_64_PR
if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
r = RESUME_GUEST;
break;
}
-#endif
run->papr_hcall.nr = cmd;
for (i = 0; i < 9; ++i) {
diff --git a/trunk/arch/powerpc/kvm/e500.c b/trunk/arch/powerpc/kvm/e500.c
index 8c0d45a6faf7..26d20903f2bc 100644
--- a/trunk/arch/powerpc/kvm/e500.c
+++ b/trunk/arch/powerpc/kvm/e500.c
@@ -15,7 +15,6 @@
#include
#include
#include
-#include
#include
#include
diff --git a/trunk/arch/powerpc/mm/init_32.c b/trunk/arch/powerpc/mm/init_32.c
index 58861fa1220e..161cefde5c15 100644
--- a/trunk/arch/powerpc/mm/init_32.c
+++ b/trunk/arch/powerpc/mm/init_32.c
@@ -134,7 +134,8 @@ void __init MMU_init(void)
if (memblock.memory.cnt > 1) {
#ifndef CONFIG_WII
- memblock_enforce_memory_limit(memblock.memory.regions[0].size);
+ memblock.memory.cnt = 1;
+ memblock_analyze();
printk(KERN_WARNING "Only using first contiguous memory region");
#else
wii_memory_fixups();
@@ -157,6 +158,7 @@ void __init MMU_init(void)
#ifndef CONFIG_HIGHMEM
total_memory = total_lowmem;
memblock_enforce_memory_limit(total_lowmem);
+ memblock_analyze();
#endif /* CONFIG_HIGHMEM */
}
diff --git a/trunk/arch/powerpc/mm/mem.c b/trunk/arch/powerpc/mm/mem.c
index 8e2eb6611b0b..2dd6bdd31fe1 100644
--- a/trunk/arch/powerpc/mm/mem.c
+++ b/trunk/arch/powerpc/mm/mem.c
@@ -199,7 +199,7 @@ void __init do_init_bootmem(void)
unsigned long start_pfn, end_pfn;
start_pfn = memblock_region_memory_base_pfn(reg);
end_pfn = memblock_region_memory_end_pfn(reg);
- memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
+ add_active_range(0, start_pfn, end_pfn);
}
/* Add all physical memory to the bootmem map, mark each area
diff --git a/trunk/arch/powerpc/mm/numa.c b/trunk/arch/powerpc/mm/numa.c
index e6eea0ac80c8..b22a83a91cb8 100644
--- a/trunk/arch/powerpc/mm/numa.c
+++ b/trunk/arch/powerpc/mm/numa.c
@@ -127,25 +127,45 @@ static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
}
/*
- * get_node_active_region - Return active region containing pfn
- * Active range returned is empty if none found.
- * @pfn: The page to return the region for
- * @node_ar: Returned set to the active region containing @pfn
+ * get_active_region_work_fn - A helper function for get_node_active_region
+ * Returns datax set to the start_pfn and end_pfn if they contain
+ * the initial value of datax->start_pfn between them
+ * @start_pfn: start page(inclusive) of region to check
+ * @end_pfn: end page(exclusive) of region to check
+ * @datax: comes in with ->start_pfn set to value to search for and
+ * goes out with active range if it contains it
+ * Returns 1 if search value is in range else 0
*/
-static void __init get_node_active_region(unsigned long pfn,
- struct node_active_region *node_ar)
+static int __init get_active_region_work_fn(unsigned long start_pfn,
+ unsigned long end_pfn, void *datax)
{
- unsigned long start_pfn, end_pfn;
- int i, nid;
+ struct node_active_region *data;
+ data = (struct node_active_region *)datax;
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
- if (pfn >= start_pfn && pfn < end_pfn) {
- node_ar->nid = nid;
- node_ar->start_pfn = start_pfn;
- node_ar->end_pfn = end_pfn;
- break;
- }
+ if (start_pfn <= data->start_pfn && end_pfn > data->start_pfn) {
+ data->start_pfn = start_pfn;
+ data->end_pfn = end_pfn;
+ return 1;
}
+ return 0;
+
+}
+
+/*
+ * get_node_active_region - Return active region containing start_pfn
+ * Active range returned is empty if none found.
+ * @start_pfn: The page to return the region for.
+ * @node_ar: Returned set to the active region containing start_pfn
+ */
+static void __init get_node_active_region(unsigned long start_pfn,
+ struct node_active_region *node_ar)
+{
+ int nid = early_pfn_to_nid(start_pfn);
+
+ node_ar->nid = nid;
+ node_ar->start_pfn = start_pfn;
+ node_ar->end_pfn = start_pfn;
+ work_with_active_regions(nid, get_active_region_work_fn, node_ar);
}
static void map_cpu_to_node(int cpu, int node)
@@ -690,7 +710,9 @@ static void __init parse_drconf_memory(struct device_node *memory)
node_set_online(nid);
sz = numa_enforce_memory_limit(base, size);
if (sz)
- memblock_set_node(base, sz, nid);
+ add_active_range(nid, base >> PAGE_SHIFT,
+ (base >> PAGE_SHIFT)
+ + (sz >> PAGE_SHIFT));
} while (--ranges);
}
}
@@ -780,7 +802,8 @@ static int __init parse_numa_properties(void)
continue;
}
- memblock_set_node(start, size, nid);
+ add_active_range(nid, start >> PAGE_SHIFT,
+ (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
if (--ranges)
goto new_range;
@@ -816,8 +839,7 @@ static void __init setup_nonnuma(void)
end_pfn = memblock_region_memory_end_pfn(reg);
fake_numa_create_new_node(end_pfn, &nid);
- memblock_set_node(PFN_PHYS(start_pfn),
- PFN_PHYS(end_pfn - start_pfn), nid);
+ add_active_range(nid, start_pfn, end_pfn);
node_set_online(nid);
}
}
diff --git a/trunk/arch/powerpc/mm/tlb_nohash.c b/trunk/arch/powerpc/mm/tlb_nohash.c
index 573ba3b69d1f..4e13d6f9023e 100644
--- a/trunk/arch/powerpc/mm/tlb_nohash.c
+++ b/trunk/arch/powerpc/mm/tlb_nohash.c
@@ -615,6 +615,7 @@ static void __early_init_mmu(int boot_cpu)
/* limit memory so we dont have linear faults */
memblock_enforce_memory_limit(linear_map_top);
+ memblock_analyze();
patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e);
diff --git a/trunk/arch/powerpc/platforms/embedded6xx/wii.c b/trunk/arch/powerpc/platforms/embedded6xx/wii.c
index 6d8dadf19f0b..1b5dc1a2e145 100644
--- a/trunk/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/trunk/arch/powerpc/platforms/embedded6xx/wii.c
@@ -79,18 +79,23 @@ void __init wii_memory_fixups(void)
BUG_ON(memblock.memory.cnt != 2);
BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base));
- /* trim unaligned tail */
- memblock_remove(ALIGN(p[1].base + p[1].size, PAGE_SIZE),
- (phys_addr_t)ULLONG_MAX);
+ p[0].size = _ALIGN_DOWN(p[0].size, PAGE_SIZE);
+ p[1].size = _ALIGN_DOWN(p[1].size, PAGE_SIZE);
- /* determine hole, add & reserve them */
- wii_hole_start = ALIGN(p[0].base + p[0].size, PAGE_SIZE);
+ wii_hole_start = p[0].base + p[0].size;
wii_hole_size = p[1].base - wii_hole_start;
- memblock_add(wii_hole_start, wii_hole_size);
- memblock_reserve(wii_hole_start, wii_hole_size);
- BUG_ON(memblock.memory.cnt != 1);
- __memblock_dump_all();
+ pr_info("MEM1: <%08llx %08llx>\n", p[0].base, p[0].size);
+ pr_info("HOLE: <%08lx %08lx>\n", wii_hole_start, wii_hole_size);
+ pr_info("MEM2: <%08llx %08llx>\n", p[1].base, p[1].size);
+
+ p[0].size += wii_hole_size + p[1].size;
+
+ memblock.memory.cnt = 1;
+ memblock_analyze();
+
+ /* reserve the hole */
+ memblock_reserve(wii_hole_start, wii_hole_size);
/* allow ioremapping the address space in the hole */
__allow_ioremap_reserved = 1;
diff --git a/trunk/arch/powerpc/platforms/iseries/setup.c b/trunk/arch/powerpc/platforms/iseries/setup.c
index 8fc62586a973..ea0acbd8966d 100644
--- a/trunk/arch/powerpc/platforms/iseries/setup.c
+++ b/trunk/arch/powerpc/platforms/iseries/setup.c
@@ -563,8 +563,7 @@ static void yield_shared_processor(void)
static void iseries_shared_idle(void)
{
while (1) {
- tick_nohz_idle_enter();
- rcu_idle_enter();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched() && !hvlpevent_is_pending()) {
local_irq_disable();
ppc64_runlatch_off();
@@ -578,8 +577,7 @@ static void iseries_shared_idle(void)
}
ppc64_runlatch_on();
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
if (hvlpevent_is_pending())
process_iSeries_events();
@@ -595,8 +593,7 @@ static void iseries_dedicated_idle(void)
set_thread_flag(TIF_POLLING_NRFLAG);
while (1) {
- tick_nohz_idle_enter();
- rcu_idle_enter();
+ tick_nohz_stop_sched_tick(1);
if (!need_resched()) {
while (!need_resched()) {
ppc64_runlatch_off();
@@ -613,8 +610,7 @@ static void iseries_dedicated_idle(void)
}
ppc64_runlatch_on();
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
schedule();
preempt_disable();
diff --git a/trunk/arch/powerpc/platforms/ps3/mm.c b/trunk/arch/powerpc/platforms/ps3/mm.c
index 8bd6ba542691..72714ad27842 100644
--- a/trunk/arch/powerpc/platforms/ps3/mm.c
+++ b/trunk/arch/powerpc/platforms/ps3/mm.c
@@ -319,6 +319,7 @@ static int __init ps3_mm_add_memory(void)
}
memblock_add(start_addr, map.r1.size);
+ memblock_analyze();
result = online_pages(start_pfn, nr_pages);
diff --git a/trunk/arch/powerpc/platforms/pseries/lpar.c b/trunk/arch/powerpc/platforms/pseries/lpar.c
index 52d429be6c76..27a49508b410 100644
--- a/trunk/arch/powerpc/platforms/pseries/lpar.c
+++ b/trunk/arch/powerpc/platforms/pseries/lpar.c
@@ -555,8 +555,6 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
(*depth)++;
trace_hcall_entry(opcode, args);
- if (opcode == H_CEDE)
- rcu_idle_enter();
(*depth)--;
out:
@@ -577,8 +575,6 @@ void __trace_hcall_exit(long opcode, unsigned long retval,
goto out;
(*depth)++;
- if (opcode == H_CEDE)
- rcu_idle_exit();
trace_hcall_exit(opcode, retval, retbuf);
(*depth)--;
diff --git a/trunk/arch/s390/Kconfig b/trunk/arch/s390/Kconfig
index d48ede334434..373679b3744a 100644
--- a/trunk/arch/s390/Kconfig
+++ b/trunk/arch/s390/Kconfig
@@ -92,9 +92,6 @@ config S390
select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
select HAVE_RCU_TABLE_FREE if SMP
select ARCH_SAVE_PAGE_KEYS if HIBERNATION
- select HAVE_MEMBLOCK
- select HAVE_MEMBLOCK_NODE_MAP
- select ARCH_DISCARD_MEMBLOCK
select ARCH_INLINE_SPIN_TRYLOCK
select ARCH_INLINE_SPIN_TRYLOCK_BH
select ARCH_INLINE_SPIN_LOCK
@@ -348,6 +345,9 @@ config WARN_DYNAMIC_STACK
Say N if you are unsure.
+config ARCH_POPULATES_NODE_MAP
+ def_bool y
+
comment "Kernel preemption"
source "kernel/Kconfig.preempt"
diff --git a/trunk/arch/s390/appldata/appldata_os.c b/trunk/arch/s390/appldata/appldata_os.c
index 4de031d6b76c..92f1cb745d69 100644
--- a/trunk/arch/s390/appldata/appldata_os.c
+++ b/trunk/arch/s390/appldata/appldata_os.c
@@ -115,21 +115,21 @@ static void appldata_get_os_data(void *data)
j = 0;
for_each_online_cpu(i) {
os_data->os_cpu[j].per_cpu_user =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
+ cputime_to_jiffies(kstat_cpu(i).cpustat.user);
os_data->os_cpu[j].per_cpu_nice =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
+ cputime_to_jiffies(kstat_cpu(i).cpustat.nice);
os_data->os_cpu[j].per_cpu_system =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
+ cputime_to_jiffies(kstat_cpu(i).cpustat.system);
os_data->os_cpu[j].per_cpu_idle =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
+ cputime_to_jiffies(kstat_cpu(i).cpustat.idle);
os_data->os_cpu[j].per_cpu_irq =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
+ cputime_to_jiffies(kstat_cpu(i).cpustat.irq);
os_data->os_cpu[j].per_cpu_softirq =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
+ cputime_to_jiffies(kstat_cpu(i).cpustat.softirq);
os_data->os_cpu[j].per_cpu_iowait =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
+ cputime_to_jiffies(kstat_cpu(i).cpustat.iowait);
os_data->os_cpu[j].per_cpu_steal =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
+ cputime_to_jiffies(kstat_cpu(i).cpustat.steal);
os_data->os_cpu[j].cpu_id = i;
j++;
}
diff --git a/trunk/arch/s390/include/asm/cputime.h b/trunk/arch/s390/include/asm/cputime.h
index c23c3900c304..081434878296 100644
--- a/trunk/arch/s390/include/asm/cputime.h
+++ b/trunk/arch/s390/include/asm/cputime.h
@@ -16,100 +16,114 @@
/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
-typedef unsigned long long __nocast cputime_t;
-typedef unsigned long long __nocast cputime64_t;
+typedef unsigned long long cputime_t;
+typedef unsigned long long cputime64_t;
-static inline unsigned long __div(unsigned long long n, unsigned long base)
-{
#ifndef __s390x__
+
+static inline unsigned int
+__div(unsigned long long n, unsigned int base)
+{
register_pair rp;
rp.pair = n >> 1;
asm ("dr %0,%1" : "+d" (rp) : "d" (base >> 1));
return rp.subreg.odd;
-#else /* __s390x__ */
- return n / base;
-#endif /* __s390x__ */
}
-#define cputime_one_jiffy jiffies_to_cputime(1)
-
-/*
- * Convert cputime to jiffies and back.
- */
-static inline unsigned long cputime_to_jiffies(const cputime_t cputime)
-{
- return __div((__force unsigned long long) cputime, 4096000000ULL / HZ);
-}
+#else /* __s390x__ */
-static inline cputime_t jiffies_to_cputime(const unsigned int jif)
+static inline unsigned int
+__div(unsigned long long n, unsigned int base)
{
- return (__force cputime_t)(jif * (4096000000ULL / HZ));
+ return n / base;
}
-static inline u64 cputime64_to_jiffies64(cputime64_t cputime)
-{
- unsigned long long jif = (__force unsigned long long) cputime;
- do_div(jif, 4096000000ULL / HZ);
- return jif;
-}
+#endif /* __s390x__ */
-static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
-{
- return (__force cputime64_t)(jif * (4096000000ULL / HZ));
+#define cputime_zero (0ULL)
+#define cputime_one_jiffy jiffies_to_cputime(1)
+#define cputime_max ((~0UL >> 1) - 1)
+#define cputime_add(__a, __b) ((__a) + (__b))
+#define cputime_sub(__a, __b) ((__a) - (__b))
+#define cputime_div(__a, __n) ({ \
+ unsigned long long __div = (__a); \
+ do_div(__div,__n); \
+ __div; \
+})
+#define cputime_halve(__a) ((__a) >> 1)
+#define cputime_eq(__a, __b) ((__a) == (__b))
+#define cputime_gt(__a, __b) ((__a) > (__b))
+#define cputime_ge(__a, __b) ((__a) >= (__b))
+#define cputime_lt(__a, __b) ((__a) < (__b))
+#define cputime_le(__a, __b) ((__a) <= (__b))
+#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ))
+#define cputime_to_scaled(__ct) (__ct)
+#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ))
+
+#define cputime64_zero (0ULL)
+#define cputime64_add(__a, __b) ((__a) + (__b))
+#define cputime_to_cputime64(__ct) (__ct)
+
+static inline u64
+cputime64_to_jiffies64(cputime64_t cputime)
+{
+ do_div(cputime, 4096000000ULL / HZ);
+ return cputime;
}
/*
* Convert cputime to microseconds and back.
*/
-static inline unsigned int cputime_to_usecs(const cputime_t cputime)
+static inline unsigned int
+cputime_to_usecs(const cputime_t cputime)
{
- return (__force unsigned long long) cputime >> 12;
+ return cputime_div(cputime, 4096);
}
-static inline cputime_t usecs_to_cputime(const unsigned int m)
+static inline cputime_t
+usecs_to_cputime(const unsigned int m)
{
- return (__force cputime_t)(m * 4096ULL);
+ return (cputime_t) m * 4096;
}
-#define usecs_to_cputime64(m) usecs_to_cputime(m)
-
/*
* Convert cputime to milliseconds and back.
*/
-static inline unsigned int cputime_to_secs(const cputime_t cputime)
+static inline unsigned int
+cputime_to_secs(const cputime_t cputime)
{
- return __div((__force unsigned long long) cputime, 2048000000) >> 1;
+ return __div(cputime, 2048000000) >> 1;
}
-static inline cputime_t secs_to_cputime(const unsigned int s)
+static inline cputime_t
+secs_to_cputime(const unsigned int s)
{
- return (__force cputime_t)(s * 4096000000ULL);
+ return (cputime_t) s * 4096000000ULL;
}
/*
* Convert cputime to timespec and back.
*/
-static inline cputime_t timespec_to_cputime(const struct timespec *value)
+static inline cputime_t
+timespec_to_cputime(const struct timespec *value)
{
- unsigned long long ret = value->tv_sec * 4096000000ULL;
- return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000);
+ return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL;
}
-static inline void cputime_to_timespec(const cputime_t cputime,
- struct timespec *value)
+static inline void
+cputime_to_timespec(const cputime_t cputime, struct timespec *value)
{
- unsigned long long __cputime = (__force unsigned long long) cputime;
#ifndef __s390x__
register_pair rp;
- rp.pair = __cputime >> 1;
+ rp.pair = cputime >> 1;
asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
value->tv_nsec = rp.subreg.even * 1000 / 4096;
value->tv_sec = rp.subreg.odd;
#else
- value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096;
- value->tv_sec = __cputime / 4096000000ULL;
+ value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096;
+ value->tv_sec = cputime / 4096000000ULL;
#endif
}
@@ -118,52 +132,50 @@ static inline void cputime_to_timespec(const cputime_t cputime,
* Since cputime and timeval have the same resolution (microseconds)
* this is easy.
*/
-static inline cputime_t timeval_to_cputime(const struct timeval *value)
+static inline cputime_t
+timeval_to_cputime(const struct timeval *value)
{
- unsigned long long ret = value->tv_sec * 4096000000ULL;
- return (__force cputime_t)(ret + value->tv_usec * 4096ULL);
+ return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL;
}
-static inline void cputime_to_timeval(const cputime_t cputime,
- struct timeval *value)
+static inline void
+cputime_to_timeval(const cputime_t cputime, struct timeval *value)
{
- unsigned long long __cputime = (__force unsigned long long) cputime;
#ifndef __s390x__
register_pair rp;
- rp.pair = __cputime >> 1;
+ rp.pair = cputime >> 1;
asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
value->tv_usec = rp.subreg.even / 4096;
value->tv_sec = rp.subreg.odd;
#else
- value->tv_usec = (__cputime % 4096000000ULL) / 4096;
- value->tv_sec = __cputime / 4096000000ULL;
+ value->tv_usec = (cputime % 4096000000ULL) / 4096;
+ value->tv_sec = cputime / 4096000000ULL;
#endif
}
/*
* Convert cputime to clock and back.
*/
-static inline clock_t cputime_to_clock_t(cputime_t cputime)
+static inline clock_t
+cputime_to_clock_t(cputime_t cputime)
{
- unsigned long long clock = (__force unsigned long long) cputime;
- do_div(clock, 4096000000ULL / USER_HZ);
- return clock;
+ return cputime_div(cputime, 4096000000ULL / USER_HZ);
}
-static inline cputime_t clock_t_to_cputime(unsigned long x)
+static inline cputime_t
+clock_t_to_cputime(unsigned long x)
{
- return (__force cputime_t)(x * (4096000000ULL / USER_HZ));
+ return (cputime_t) x * (4096000000ULL / USER_HZ);
}
/*
* Convert cputime64 to clock.
*/
-static inline clock_t cputime64_to_clock_t(cputime64_t cputime)
+static inline clock_t
+cputime64_to_clock_t(cputime64_t cputime)
{
- unsigned long long clock = (__force unsigned long long) cputime;
- do_div(clock, 4096000000ULL / USER_HZ);
- return clock;
+ return cputime_div(cputime, 4096000000ULL / USER_HZ);
}
struct s390_idle_data {
diff --git a/trunk/arch/s390/kernel/process.c b/trunk/arch/s390/kernel/process.c
index 3201ae447990..9451b210a1b4 100644
--- a/trunk/arch/s390/kernel/process.c
+++ b/trunk/arch/s390/kernel/process.c
@@ -91,12 +91,10 @@ static void default_idle(void)
void cpu_idle(void)
{
for (;;) {
- tick_nohz_idle_enter();
- rcu_idle_enter();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched())
default_idle();
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
schedule();
preempt_disable();
diff --git a/trunk/arch/s390/kernel/setup.c b/trunk/arch/s390/kernel/setup.c
index f11d1b037c50..e54c4ff8abaa 100644
--- a/trunk/arch/s390/kernel/setup.c
+++ b/trunk/arch/s390/kernel/setup.c
@@ -21,7 +21,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -821,8 +820,7 @@ setup_memory(void)
end_chunk = min(end_chunk, end_pfn);
if (start_chunk >= end_chunk)
continue;
- memblock_add_node(PFN_PHYS(start_chunk),
- PFN_PHYS(end_chunk - start_chunk), 0);
+ add_active_range(0, start_chunk, end_chunk);
pfn = max(start_chunk, start_pfn);
for (; pfn < end_chunk; pfn++)
page_set_storage_key(PFN_PHYS(pfn),
diff --git a/trunk/arch/s390/oprofile/hwsampler.c b/trunk/arch/s390/oprofile/hwsampler.c
index 9daee91e6c3f..f43c0e4282af 100644
--- a/trunk/arch/s390/oprofile/hwsampler.c
+++ b/trunk/arch/s390/oprofile/hwsampler.c
@@ -22,7 +22,6 @@
#include
#include "hwsampler.h"
-#include "op_counter.h"
#define MAX_NUM_SDB 511
#define MIN_NUM_SDB 1
@@ -897,8 +896,6 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
if (sample_data_ptr->P == 1) {
/* userspace sample */
unsigned int pid = sample_data_ptr->prim_asn;
- if (!counter_config.user)
- goto skip_sample;
rcu_read_lock();
tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
if (tsk)
@@ -906,8 +903,6 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
rcu_read_unlock();
} else {
/* kernelspace sample */
- if (!counter_config.kernel)
- goto skip_sample;
regs = task_pt_regs(current);
}
@@ -915,7 +910,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0,
!sample_data_ptr->P, tsk);
mutex_unlock(&hws_sem);
- skip_sample:
+
sample_data_ptr++;
}
}
diff --git a/trunk/arch/s390/oprofile/init.c b/trunk/arch/s390/oprofile/init.c
index 2297be406c61..bd58b72454cf 100644
--- a/trunk/arch/s390/oprofile/init.c
+++ b/trunk/arch/s390/oprofile/init.c
@@ -2,11 +2,10 @@
* arch/s390/oprofile/init.c
*
* S390 Version
- * Copyright (C) 2002-2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Author(s): Thomas Spatzier (tspat@de.ibm.com)
* Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
* Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
- * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
*
* @remark Copyright 2002-2011 OProfile authors
*/
@@ -15,8 +14,6 @@
#include
#include
#include
-#include
-#include
#include "../../../drivers/oprofile/oprof.h"
@@ -25,7 +22,6 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
#ifdef CONFIG_64BIT
#include "hwsampler.h"
-#include "op_counter.h"
#define DEFAULT_INTERVAL 4127518
@@ -39,41 +35,16 @@ static unsigned long oprofile_max_interval;
static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
-static int hwsampler_enabled;
+static int hwsampler_file;
static int hwsampler_running; /* start_mutex must be held to change */
-static int hwsampler_available;
static struct oprofile_operations timer_ops;
-struct op_counter_config counter_config;
-
-enum __force_cpu_type {
- reserved = 0, /* do not force */
- timer,
-};
-static int force_cpu_type;
-
-static int set_cpu_type(const char *str, struct kernel_param *kp)
-{
- if (!strcmp(str, "timer")) {
- force_cpu_type = timer;
- printk(KERN_INFO "oprofile: forcing timer to be returned "
- "as cpu type\n");
- } else {
- force_cpu_type = 0;
- }
-
- return 0;
-}
-module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
-MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
- "(report cpu_type \"timer\"");
-
static int oprofile_hwsampler_start(void)
{
int retval;
- hwsampler_running = hwsampler_enabled;
+ hwsampler_running = hwsampler_file;
if (!hwsampler_running)
return timer_ops.start();
@@ -101,16 +72,10 @@ static void oprofile_hwsampler_stop(void)
return;
}
-/*
- * File ops used for:
- * /dev/oprofile/0/enabled
- * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer)
- */
-
static ssize_t hwsampler_read(struct file *file, char __user *buf,
size_t count, loff_t *offset)
{
- return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
+ return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset);
}
static ssize_t hwsampler_write(struct file *file, char const __user *buf,
@@ -126,9 +91,6 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
if (retval <= 0)
return retval;
- if (val != 0 && val != 1)
- return -EINVAL;
-
if (oprofile_started)
/*
* save to do without locking as we set
@@ -137,7 +99,7 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
*/
return -EBUSY;
- hwsampler_enabled = val;
+ hwsampler_file = val;
return count;
}
@@ -147,311 +109,38 @@ static const struct file_operations hwsampler_fops = {
.write = hwsampler_write,
};
-/*
- * File ops used for:
- * /dev/oprofile/0/count
- * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer)
- *
- * Make sure that the value is within the hardware range.
- */
-
-static ssize_t hw_interval_read(struct file *file, char __user *buf,
- size_t count, loff_t *offset)
-{
- return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
- count, offset);
-}
-
-static ssize_t hw_interval_write(struct file *file, char const __user *buf,
- size_t count, loff_t *offset)
-{
- unsigned long val;
- int retval;
-
- if (*offset)
- return -EINVAL;
- retval = oprofilefs_ulong_from_user(&val, buf, count);
- if (retval)
- return retval;
- if (val < oprofile_min_interval)
- oprofile_hw_interval = oprofile_min_interval;
- else if (val > oprofile_max_interval)
- oprofile_hw_interval = oprofile_max_interval;
- else
- oprofile_hw_interval = val;
-
- return count;
-}
-
-static const struct file_operations hw_interval_fops = {
- .read = hw_interval_read,
- .write = hw_interval_write,
-};
-
-/*
- * File ops used for:
- * /dev/oprofile/0/event
- * Only a single event with number 0 is supported with this counter.
- *
- * /dev/oprofile/0/unit_mask
- * This is a dummy file needed by the user space tools.
- * No value other than 0 is accepted or returned.
- */
-
-static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
- size_t count, loff_t *offset)
-{
- return oprofilefs_ulong_to_user(0, buf, count, offset);
-}
-
-static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
- size_t count, loff_t *offset)
-{
- unsigned long val;
- int retval;
-
- if (*offset)
- return -EINVAL;
-
- retval = oprofilefs_ulong_from_user(&val, buf, count);
- if (retval)
- return retval;
- if (val != 0)
- return -EINVAL;
- return count;
-}
-
-static const struct file_operations zero_fops = {
- .read = hwsampler_zero_read,
- .write = hwsampler_zero_write,
-};
-
-/* /dev/oprofile/0/kernel file ops. */
-
-static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
- size_t count, loff_t *offset)
-{
- return oprofilefs_ulong_to_user(counter_config.kernel,
- buf, count, offset);
-}
-
-static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
- size_t count, loff_t *offset)
-{
- unsigned long val;
- int retval;
-
- if (*offset)
- return -EINVAL;
-
- retval = oprofilefs_ulong_from_user(&val, buf, count);
- if (retval)
- return retval;
-
- if (val != 0 && val != 1)
- return -EINVAL;
-
- counter_config.kernel = val;
-
- return count;
-}
-
-static const struct file_operations kernel_fops = {
- .read = hwsampler_kernel_read,
- .write = hwsampler_kernel_write,
-};
-
-/* /dev/oprofile/0/user file ops. */
-
-static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
- size_t count, loff_t *offset)
-{
- return oprofilefs_ulong_to_user(counter_config.user,
- buf, count, offset);
-}
-
-static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
- size_t count, loff_t *offset)
-{
- unsigned long val;
- int retval;
-
- if (*offset)
- return -EINVAL;
-
- retval = oprofilefs_ulong_from_user(&val, buf, count);
- if (retval)
- return retval;
-
- if (val != 0 && val != 1)
- return -EINVAL;
-
- counter_config.user = val;
-
- return count;
-}
-
-static const struct file_operations user_fops = {
- .read = hwsampler_user_read,
- .write = hwsampler_user_write,
-};
-
-
-/*
- * File ops used for: /dev/oprofile/timer/enabled
- * The value always has to be the inverted value of hwsampler_enabled. So
- * no separate variable is created. That way we do not need locking.
- */
-
-static ssize_t timer_enabled_read(struct file *file, char __user *buf,
- size_t count, loff_t *offset)
-{
- return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
-}
-
-static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
- size_t count, loff_t *offset)
-{
- unsigned long val;
- int retval;
-
- if (*offset)
- return -EINVAL;
-
- retval = oprofilefs_ulong_from_user(&val, buf, count);
- if (retval)
- return retval;
-
- if (val != 0 && val != 1)
- return -EINVAL;
-
- /* Timer cannot be disabled without having hardware sampling. */
- if (val == 0 && !hwsampler_available)
- return -EINVAL;
-
- if (oprofile_started)
- /*
- * save to do without locking as we set
- * hwsampler_running in start() when start_mutex is
- * held
- */
- return -EBUSY;
-
- hwsampler_enabled = !val;
-
- return count;
-}
-
-static const struct file_operations timer_enabled_fops = {
- .read = timer_enabled_read,
- .write = timer_enabled_write,
-};
-
-
static int oprofile_create_hwsampling_files(struct super_block *sb,
- struct dentry *root)
+ struct dentry *root)
{
- struct dentry *dir;
-
- dir = oprofilefs_mkdir(sb, root, "timer");
- if (!dir)
- return -EINVAL;
-
- oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops);
-
- if (!hwsampler_available)
- return 0;
+ struct dentry *hw_dir;
/* reinitialize default values */
- hwsampler_enabled = 1;
- counter_config.kernel = 1;
- counter_config.user = 1;
-
- if (!force_cpu_type) {
- /*
- * Create the counter file system. A single virtual
- * counter is created which can be used to
- * enable/disable hardware sampling dynamically from
- * user space. The user space will configure a single
- * counter with a single event. The value of 'event'
- * and 'unit_mask' are not evaluated by the kernel code
- * and can only be set to 0.
- */
+ hwsampler_file = 1;
- dir = oprofilefs_mkdir(sb, root, "0");
- if (!dir)
- return -EINVAL;
+ hw_dir = oprofilefs_mkdir(sb, root, "hwsampling");
+ if (!hw_dir)
+ return -EINVAL;
- oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops);
- oprofilefs_create_file(sb, dir, "event", &zero_fops);
- oprofilefs_create_file(sb, dir, "count", &hw_interval_fops);
- oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops);
- oprofilefs_create_file(sb, dir, "kernel", &kernel_fops);
- oprofilefs_create_file(sb, dir, "user", &user_fops);
- oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
- &oprofile_sdbt_blocks);
+ oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops);
+ oprofilefs_create_ulong(sb, hw_dir, "hw_interval",
+ &oprofile_hw_interval);
+ oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval",
+ &oprofile_min_interval);
+ oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval",
+ &oprofile_max_interval);
+ oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks",
+ &oprofile_sdbt_blocks);
- } else {
- /*
- * Hardware sampling can be used but the cpu_type is
- * forced to timer in order to deal with legacy user
- * space tools. The /dev/oprofile/hwsampling fs is
- * provided in that case.
- */
- dir = oprofilefs_mkdir(sb, root, "hwsampling");
- if (!dir)
- return -EINVAL;
-
- oprofilefs_create_file(sb, dir, "hwsampler",
- &hwsampler_fops);
- oprofilefs_create_file(sb, dir, "hw_interval",
- &hw_interval_fops);
- oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval",
- &oprofile_min_interval);
- oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval",
- &oprofile_max_interval);
- oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
- &oprofile_sdbt_blocks);
- }
return 0;
}
static int oprofile_hwsampler_init(struct oprofile_operations *ops)
{
- /*
- * Initialize the timer mode infrastructure as well in order
- * to be able to switch back dynamically. oprofile_timer_init
- * is not supposed to fail.
- */
- if (oprofile_timer_init(ops))
- BUG();
-
- memcpy(&timer_ops, ops, sizeof(timer_ops));
- ops->create_files = oprofile_create_hwsampling_files;
-
- /*
- * If the user space tools do not support newer cpu types,
- * the force_cpu_type module parameter
- * can be used to always return \"timer\" as cpu type.
- */
- if (force_cpu_type != timer) {
- struct cpuid id;
-
- get_cpu_id (&id);
-
- switch (id.machine) {
- case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
- case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
- default: return -ENODEV;
- }
- }
-
if (hwsampler_setup())
return -ENODEV;
/*
- * Query the range for the sampling interval from the
- * hardware.
+ * create hwsampler files only if hwsampler_setup() succeeds.
*/
oprofile_min_interval = hwsampler_query_min_interval();
if (oprofile_min_interval == 0)
@@ -466,17 +155,23 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
if (oprofile_hw_interval > oprofile_max_interval)
oprofile_hw_interval = oprofile_max_interval;
- printk(KERN_INFO "oprofile: System z hardware sampling "
- "facility found.\n");
+ if (oprofile_timer_init(ops))
+ return -ENODEV;
+
+ printk(KERN_INFO "oprofile: using hardware sampling\n");
+
+ memcpy(&timer_ops, ops, sizeof(timer_ops));
ops->start = oprofile_hwsampler_start;
ops->stop = oprofile_hwsampler_stop;
+ ops->create_files = oprofile_create_hwsampling_files;
return 0;
}
static void oprofile_hwsampler_exit(void)
{
+ oprofile_timer_exit();
hwsampler_shutdown();
}
@@ -487,15 +182,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
ops->backtrace = s390_backtrace;
#ifdef CONFIG_64BIT
-
- /*
- * -ENODEV is not reported to the caller. The module itself
- * will use the timer mode sampling as fallback and this is
- * always available.
- */
- hwsampler_available = oprofile_hwsampler_init(ops) == 0;
-
- return 0;
+ return oprofile_hwsampler_init(ops);
#else
return -ENODEV;
#endif
diff --git a/trunk/arch/s390/oprofile/op_counter.h b/trunk/arch/s390/oprofile/op_counter.h
deleted file mode 100644
index 1a8d3ca09014..000000000000
--- a/trunk/arch/s390/oprofile/op_counter.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/**
- * arch/s390/oprofile/op_counter.h
- *
- * Copyright (C) 2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
- *
- * @remark Copyright 2011 OProfile authors
- */
-
-#ifndef OP_COUNTER_H
-#define OP_COUNTER_H
-
-struct op_counter_config {
- /* `enabled' maps to the hwsampler_file variable. */
- /* `count' maps to the oprofile_hw_interval variable. */
- /* `event' and `unit_mask' are unused. */
- unsigned long kernel;
- unsigned long user;
-};
-
-extern struct op_counter_config counter_config;
-
-#endif /* OP_COUNTER_H */
diff --git a/trunk/arch/score/Kconfig b/trunk/arch/score/Kconfig
index 8b0c9464aa9d..df169e84db4e 100644
--- a/trunk/arch/score/Kconfig
+++ b/trunk/arch/score/Kconfig
@@ -4,9 +4,6 @@ config SCORE
def_bool y
select HAVE_GENERIC_HARDIRQS
select GENERIC_IRQ_SHOW
- select HAVE_MEMBLOCK
- select HAVE_MEMBLOCK_NODE_MAP
- select ARCH_DISCARD_MEMBLOCK
choice
prompt "System type"
@@ -63,6 +60,9 @@ config 32BIT
config ARCH_FLATMEM_ENABLE
def_bool y
+config ARCH_POPULATES_NODE_MAP
+ def_bool y
+
source "mm/Kconfig"
config MEMORY_START
diff --git a/trunk/arch/score/kernel/setup.c b/trunk/arch/score/kernel/setup.c
index b48459afefdd..6f898c057878 100644
--- a/trunk/arch/score/kernel/setup.c
+++ b/trunk/arch/score/kernel/setup.c
@@ -26,7 +26,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -55,8 +54,7 @@ static void __init bootmem_init(void)
/* Initialize the boot-time allocator with low memory only. */
bootmap_size = init_bootmem_node(NODE_DATA(0), start_pfn,
min_low_pfn, max_low_pfn);
- memblock_add_node(PFN_PHYS(min_low_pfn),
- PFN_PHYS(max_low_pfn - min_low_pfn), 0);
+ add_active_range(0, min_low_pfn, max_low_pfn);
free_bootmem(PFN_PHYS(start_pfn),
(max_low_pfn - start_pfn) << PAGE_SHIFT);
diff --git a/trunk/arch/sh/Kconfig b/trunk/arch/sh/Kconfig
index 47a2f1c2cb0d..5629e2099130 100644
--- a/trunk/arch/sh/Kconfig
+++ b/trunk/arch/sh/Kconfig
@@ -4,7 +4,6 @@ config SUPERH
select CLKDEV_LOOKUP
select HAVE_IDE if HAS_IOPORT
select HAVE_MEMBLOCK
- select HAVE_MEMBLOCK_NODE_MAP
select HAVE_OPROFILE
select HAVE_GENERIC_DMA_COHERENT
select HAVE_ARCH_TRACEHOOK
diff --git a/trunk/arch/sh/include/asm/memblock.h b/trunk/arch/sh/include/asm/memblock.h
new file mode 100644
index 000000000000..e87063fad2ea
--- /dev/null
+++ b/trunk/arch/sh/include/asm/memblock.h
@@ -0,0 +1,4 @@
+#ifndef __ASM_SH_MEMBLOCK_H
+#define __ASM_SH_MEMBLOCK_H
+
+#endif /* __ASM_SH_MEMBLOCK_H */
diff --git a/trunk/arch/sh/kernel/idle.c b/trunk/arch/sh/kernel/idle.c
index 406508d4ce74..db4ecd731a00 100644
--- a/trunk/arch/sh/kernel/idle.c
+++ b/trunk/arch/sh/kernel/idle.c
@@ -89,8 +89,7 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
- tick_nohz_idle_enter();
- rcu_idle_enter();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched()) {
check_pgt_cache();
@@ -112,8 +111,7 @@ void cpu_idle(void)
start_critical_timings();
}
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
schedule();
preempt_disable();
diff --git a/trunk/arch/sh/kernel/machine_kexec.c b/trunk/arch/sh/kernel/machine_kexec.c
index 9fea49f6e667..c5a33f007f88 100644
--- a/trunk/arch/sh/kernel/machine_kexec.c
+++ b/trunk/arch/sh/kernel/machine_kexec.c
@@ -157,6 +157,9 @@ void __init reserve_crashkernel(void)
unsigned long long crash_size, crash_base;
int ret;
+ /* this is necessary because of memblock_phys_mem_size() */
+ memblock_analyze();
+
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
&crash_size, &crash_base);
if (ret == 0 && crash_size > 0) {
diff --git a/trunk/arch/sh/kernel/setup.c b/trunk/arch/sh/kernel/setup.c
index 7b57bf1dc855..1a0e946679a4 100644
--- a/trunk/arch/sh/kernel/setup.c
+++ b/trunk/arch/sh/kernel/setup.c
@@ -230,8 +230,7 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
pmb_bolt_mapping((unsigned long)__va(start), start, end - start,
PAGE_KERNEL);
- memblock_set_node(PFN_PHYS(start_pfn),
- PFN_PHYS(end_pfn - start_pfn), nid);
+ add_active_range(nid, start_pfn, end_pfn);
}
void __init __weak plat_early_device_setup(void)
diff --git a/trunk/arch/sh/mm/Kconfig b/trunk/arch/sh/mm/Kconfig
index cb8f9920f4dd..c3e61b366493 100644
--- a/trunk/arch/sh/mm/Kconfig
+++ b/trunk/arch/sh/mm/Kconfig
@@ -143,6 +143,9 @@ config MAX_ACTIVE_REGIONS
CPU_SUBTYPE_SH7785)
default "1"
+config ARCH_POPULATES_NODE_MAP
+ def_bool y
+
config ARCH_SELECT_MEMORY_MODEL
def_bool y
diff --git a/trunk/arch/sh/mm/init.c b/trunk/arch/sh/mm/init.c
index 82cc576fab15..939ca0f356f6 100644
--- a/trunk/arch/sh/mm/init.c
+++ b/trunk/arch/sh/mm/init.c
@@ -324,6 +324,7 @@ void __init paging_init(void)
unsigned long vaddr, end;
int nid;
+ memblock_init();
sh_mv.mv_mem_init();
early_reserve_mem();
@@ -336,7 +337,7 @@ void __init paging_init(void)
sh_mv.mv_mem_reserve();
memblock_enforce_memory_limit(memory_limit);
- memblock_allow_resize();
+ memblock_analyze();
memblock_dump_all();
diff --git a/trunk/arch/sparc/Kconfig b/trunk/arch/sparc/Kconfig
index 70ae9d81870e..f92602e86607 100644
--- a/trunk/arch/sparc/Kconfig
+++ b/trunk/arch/sparc/Kconfig
@@ -43,7 +43,6 @@ config SPARC64
select HAVE_KPROBES
select HAVE_RCU_TABLE_FREE if SMP
select HAVE_MEMBLOCK
- select HAVE_MEMBLOCK_NODE_MAP
select HAVE_SYSCALL_WRAPPERS
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
@@ -353,6 +352,9 @@ config NODES_SPAN_OTHER_NODES
def_bool y
depends on NEED_MULTIPLE_NODES
+config ARCH_POPULATES_NODE_MAP
+ def_bool y if SPARC64
+
config ARCH_SELECT_MEMORY_MODEL
def_bool y if SPARC64
diff --git a/trunk/arch/sparc/include/asm/memblock.h b/trunk/arch/sparc/include/asm/memblock.h
new file mode 100644
index 000000000000..c67b047ef85e
--- /dev/null
+++ b/trunk/arch/sparc/include/asm/memblock.h
@@ -0,0 +1,8 @@
+#ifndef _SPARC64_MEMBLOCK_H
+#define _SPARC64_MEMBLOCK_H
+
+#include
+
+#define MEMBLOCK_DBG(fmt...) prom_printf(fmt)
+
+#endif /* !(_SPARC64_MEMBLOCK_H) */
diff --git a/trunk/arch/sparc/kernel/pci_sun4v.c b/trunk/arch/sparc/kernel/pci_sun4v.c
index af5755d20fbe..b272cda35a01 100644
--- a/trunk/arch/sparc/kernel/pci_sun4v.c
+++ b/trunk/arch/sparc/kernel/pci_sun4v.c
@@ -849,10 +849,10 @@ static int pci_sun4v_msiq_build_irq(struct pci_pbm_info *pbm,
if (!irq)
return -ENOMEM;
- if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
- return -EINVAL;
if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
return -EINVAL;
+ if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
+ return -EINVAL;
return irq;
}
diff --git a/trunk/arch/sparc/kernel/process_64.c b/trunk/arch/sparc/kernel/process_64.c
index 39d8b05201a2..3739a06a76cb 100644
--- a/trunk/arch/sparc/kernel/process_64.c
+++ b/trunk/arch/sparc/kernel/process_64.c
@@ -95,14 +95,12 @@ void cpu_idle(void)
set_thread_flag(TIF_POLLING_NRFLAG);
while(1) {
- tick_nohz_idle_enter();
- rcu_idle_enter();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched() && !cpu_is_offline(cpu))
sparc64_yield(cpu);
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
diff --git a/trunk/arch/sparc/kernel/setup_32.c b/trunk/arch/sparc/kernel/setup_32.c
index ffb883ddd0f0..fe1e3fc31bc5 100644
--- a/trunk/arch/sparc/kernel/setup_32.c
+++ b/trunk/arch/sparc/kernel/setup_32.c
@@ -84,7 +84,7 @@ static void prom_sync_me(void)
prom_printf("PROM SYNC COMMAND...\n");
show_free_areas(0);
- if (!is_idle_task(current)) {
+ if(current->pid != 0) {
local_irq_enable();
sys_sync();
local_irq_disable();
diff --git a/trunk/arch/sparc/mm/init_64.c b/trunk/arch/sparc/mm/init_64.c
index b3f5e7dfea51..8e073d802139 100644
--- a/trunk/arch/sparc/mm/init_64.c
+++ b/trunk/arch/sparc/mm/init_64.c
@@ -790,7 +790,7 @@ static int find_node(unsigned long addr)
return -1;
}
-static u64 memblock_nid_range(u64 start, u64 end, int *nid)
+u64 memblock_nid_range(u64 start, u64 end, int *nid)
{
*nid = find_node(start);
start += PAGE_SIZE;
@@ -808,7 +808,7 @@ static u64 memblock_nid_range(u64 start, u64 end, int *nid)
return start;
}
#else
-static u64 memblock_nid_range(u64 start, u64 end, int *nid)
+u64 memblock_nid_range(u64 start, u64 end, int *nid)
{
*nid = 0;
return end;
@@ -816,7 +816,7 @@ static u64 memblock_nid_range(u64 start, u64 end, int *nid)
#endif
/* This must be invoked after performing all of the necessary
- * memblock_set_node() calls for 'nid'. We need to be able to get
+ * add_active_range() calls for 'nid'. We need to be able to get
* correct data from get_pfn_range_for_nid().
*/
static void __init allocate_node_data(int nid)
@@ -987,11 +987,14 @@ static void __init add_node_ranges(void)
this_end = memblock_nid_range(start, end, &nid);
- numadbg("Setting memblock NUMA node nid[%d] "
+ numadbg("Adding active range nid[%d] "
"start[%lx] end[%lx]\n",
nid, start, this_end);
- memblock_set_node(start, this_end - start, nid);
+ add_active_range(nid,
+ start >> PAGE_SHIFT,
+ this_end >> PAGE_SHIFT);
+
start = this_end;
}
}
@@ -1279,6 +1282,7 @@ static void __init bootmem_init_nonnuma(void)
{
unsigned long top_of_ram = memblock_end_of_DRAM();
unsigned long total_ram = memblock_phys_mem_size();
+ struct memblock_region *reg;
numadbg("bootmem_init_nonnuma()\n");
@@ -1288,8 +1292,20 @@ static void __init bootmem_init_nonnuma(void)
(top_of_ram - total_ram) >> 20);
init_node_masks_nonnuma();
- memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
+
+ for_each_memblock(memory, reg) {
+ unsigned long start_pfn, end_pfn;
+
+ if (!reg->size)
+ continue;
+
+ start_pfn = memblock_region_memory_base_pfn(reg);
+ end_pfn = memblock_region_memory_end_pfn(reg);
+ add_active_range(0, start_pfn, end_pfn);
+ }
+
allocate_node_data(0);
+
node_set_online(0);
}
@@ -1753,6 +1769,8 @@ void __init paging_init(void)
sun4v_ktsb_init();
}
+ memblock_init();
+
/* Find available physical memory...
*
* Read it twice in order to work around a bug in openfirmware.
@@ -1778,7 +1796,7 @@ void __init paging_init(void)
memblock_enforce_memory_limit(cmdline_memory_size);
- memblock_allow_resize();
+ memblock_analyze();
memblock_dump_all();
set_bit(0, mmu_context_bmap);
diff --git a/trunk/arch/tile/kernel/process.c b/trunk/arch/tile/kernel/process.c
index 4c1ac6e5347a..9c45d8bbdf57 100644
--- a/trunk/arch/tile/kernel/process.c
+++ b/trunk/arch/tile/kernel/process.c
@@ -85,8 +85,7 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
- tick_nohz_idle_enter();
- rcu_idle_enter();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched()) {
if (cpu_is_offline(cpu))
BUG(); /* no HOTPLUG_CPU */
@@ -106,8 +105,7 @@ void cpu_idle(void)
local_irq_enable();
current_thread_info()->status |= TS_POLLING;
}
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
schedule();
preempt_disable();
diff --git a/trunk/arch/tile/mm/fault.c b/trunk/arch/tile/mm/fault.c
index c1eaaa1fcc20..25b7b90fd620 100644
--- a/trunk/arch/tile/mm/fault.c
+++ b/trunk/arch/tile/mm/fault.c
@@ -54,7 +54,7 @@ static noinline void force_sig_info_fault(const char *type, int si_signo,
if (unlikely(tsk->pid < 2)) {
panic("Signal %d (code %d) at %#lx sent to %s!",
si_signo, si_code & 0xffff, address,
- is_idle_task(tsk) ? "the idle task" : "init");
+ tsk->pid ? "init" : "the idle task");
}
info.si_signo = si_signo;
@@ -515,7 +515,7 @@ static int handle_page_fault(struct pt_regs *regs,
if (unlikely(tsk->pid < 2)) {
panic("Kernel page fault running %s!",
- is_idle_task(tsk) ? "the idle task" : "init");
+ tsk->pid ? "init" : "the idle task");
}
/*
diff --git a/trunk/arch/um/kernel/process.c b/trunk/arch/um/kernel/process.c
index 69f24905abdc..c5338351aecd 100644
--- a/trunk/arch/um/kernel/process.c
+++ b/trunk/arch/um/kernel/process.c
@@ -246,12 +246,10 @@ void default_idle(void)
if (need_resched())
schedule();
- tick_nohz_idle_enter();
- rcu_idle_enter();
+ tick_nohz_stop_sched_tick(1);
nsecs = disable_timer();
idle_sleep(nsecs);
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
}
}
diff --git a/trunk/arch/um/kernel/time.c b/trunk/arch/um/kernel/time.c
index 82a6e22f1f35..a08d9fab81f2 100644
--- a/trunk/arch/um/kernel/time.c
+++ b/trunk/arch/um/kernel/time.c
@@ -75,6 +75,8 @@ static struct clocksource itimer_clocksource = {
.rating = 300,
.read = itimer_read,
.mask = CLOCKSOURCE_MASK(64),
+ .mult = 1000,
+ .shift = 0,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
@@ -92,9 +94,9 @@ static void __init setup_itimer(void)
clockevent_delta2ns(60 * HZ, &itimer_clockevent);
itimer_clockevent.min_delta_ns =
clockevent_delta2ns(1, &itimer_clockevent);
- err = clocksource_register_hz(&itimer_clocksource, USEC_PER_SEC);
+ err = clocksource_register(&itimer_clocksource);
if (err) {
- printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
+ printk(KERN_ERR "clocksource_register returned %d\n", err);
return;
}
clockevents_register_device(&itimer_clockevent);
diff --git a/trunk/arch/unicore32/kernel/process.c b/trunk/arch/unicore32/kernel/process.c
index 52edc2b62873..ba401df971ed 100644
--- a/trunk/arch/unicore32/kernel/process.c
+++ b/trunk/arch/unicore32/kernel/process.c
@@ -55,8 +55,7 @@ void cpu_idle(void)
{
/* endless idle loop with no priority at all */
while (1) {
- tick_nohz_idle_enter();
- rcu_idle_enter();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched()) {
local_irq_disable();
stop_critical_timings();
@@ -64,8 +63,7 @@ void cpu_idle(void)
local_irq_enable();
start_critical_timings();
}
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
schedule();
preempt_disable();
diff --git a/trunk/arch/unicore32/kernel/setup.c b/trunk/arch/unicore32/kernel/setup.c
index 673d7a89d8ff..471b6bca8da4 100644
--- a/trunk/arch/unicore32/kernel/setup.c
+++ b/trunk/arch/unicore32/kernel/setup.c
@@ -37,7 +37,6 @@
#include
#include
#include
-#include
#include "setup.h"
diff --git a/trunk/arch/unicore32/mm/init.c b/trunk/arch/unicore32/mm/init.c
index de186bde8975..3b379cddbc64 100644
--- a/trunk/arch/unicore32/mm/init.c
+++ b/trunk/arch/unicore32/mm/init.c
@@ -26,7 +26,6 @@
#include
#include
#include
-#include
#include
#include "mm.h"
@@ -246,6 +245,7 @@ void __init uc32_memblock_init(struct meminfo *mi)
sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]),
meminfo_cmp, NULL);
+ memblock_init();
for (i = 0; i < mi->nr_banks; i++)
memblock_add(mi->bank[i].start, mi->bank[i].size);
@@ -264,7 +264,7 @@ void __init uc32_memblock_init(struct meminfo *mi)
uc32_mm_memblock_reserve();
- memblock_allow_resize();
+ memblock_analyze();
memblock_dump_all();
}
diff --git a/trunk/arch/unicore32/mm/mmu.c b/trunk/arch/unicore32/mm/mmu.c
index 43c20b40e444..3e5c3e5a0b45 100644
--- a/trunk/arch/unicore32/mm/mmu.c
+++ b/trunk/arch/unicore32/mm/mmu.c
@@ -25,7 +25,6 @@
#include
#include
#include
-#include
#include
diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig
index 5731eb70e0a0..efb42949cc09 100644
--- a/trunk/arch/x86/Kconfig
+++ b/trunk/arch/x86/Kconfig
@@ -26,8 +26,6 @@ config X86
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
select HAVE_MEMBLOCK
- select HAVE_MEMBLOCK_NODE_MAP
- select ARCH_DISCARD_MEMBLOCK
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_WANT_FRAME_POINTERS
select HAVE_DMA_ATTRS
@@ -206,6 +204,9 @@ config ZONE_DMA32
bool
default X86_64
+config ARCH_POPULATES_NODE_MAP
+ def_bool y
+
config AUDIT_ARCH
bool
default X86_64
@@ -342,7 +343,6 @@ config X86_EXTENDED_PLATFORM
If you enable this option then you'll be able to select support
for the following (non-PC) 64 bit x86 platforms:
- Numascale NumaChip
ScaleMP vSMP
SGI Ultraviolet
@@ -351,18 +351,6 @@ config X86_EXTENDED_PLATFORM
endif
# This is an alphabetically sorted list of 64 bit extended platforms
# Please maintain the alphabetic order if and when there are additions
-config X86_NUMACHIP
- bool "Numascale NumaChip"
- depends on X86_64
- depends on X86_EXTENDED_PLATFORM
- depends on NUMA
- depends on SMP
- depends on X86_X2APIC
- depends on !EDAC_AMD64
- ---help---
- Adds support for Numascale NumaChip large-SMP systems. Needed to
- enable more than ~168 cores.
- If you don't have one of these, you should say N here.
config X86_VSMP
bool "ScaleMP vSMP"
diff --git a/trunk/arch/x86/ia32/ia32entry.S b/trunk/arch/x86/ia32/ia32entry.S
index 3e274564f6bf..a6253ec1b284 100644
--- a/trunk/arch/x86/ia32/ia32entry.S
+++ b/trunk/arch/x86/ia32/ia32entry.S
@@ -134,7 +134,7 @@ ENTRY(ia32_sysenter_target)
CFI_REL_OFFSET rsp,0
pushfq_cfi
/*CFI_REL_OFFSET rflags,0*/
- movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
+ movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d
CFI_REGISTER rip,r10
pushq_cfi $__USER32_CS
/*CFI_REL_OFFSET cs,0*/
@@ -150,8 +150,9 @@ ENTRY(ia32_sysenter_target)
.section __ex_table,"a"
.quad 1b,ia32_badarg
.previous
- orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
- testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ GET_THREAD_INFO(%r10)
+ orl $TS_COMPAT,TI_status(%r10)
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
CFI_REMEMBER_STATE
jnz sysenter_tracesys
cmpq $(IA32_NR_syscalls-1),%rax
@@ -161,12 +162,13 @@ sysenter_do_call:
sysenter_dispatch:
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
+ GET_THREAD_INFO(%r10)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
jnz sysexit_audit
sysexit_from_sys_call:
- andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ andl $~TS_COMPAT,TI_status(%r10)
/* clear IF, that popfq doesn't enable interrupts early */
andl $~0x200,EFLAGS-R11(%rsp)
movl RIP-R11(%rsp),%edx /* User %eip */
@@ -203,7 +205,7 @@ sysexit_from_sys_call:
.endm
.macro auditsys_exit exit
- testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
jnz ia32_ret_from_sys_call
TRACE_IRQS_ON
sti
@@ -213,11 +215,12 @@ sysexit_from_sys_call:
movzbl %al,%edi /* zero-extend that into %edi */
inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
call audit_syscall_exit
+ GET_THREAD_INFO(%r10)
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
cli
TRACE_IRQS_OFF
- testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl %edi,TI_flags(%r10)
jz \exit
CLEAR_RREGS -ARGOFFSET
jmp int_with_check
@@ -235,7 +238,7 @@ sysexit_audit:
sysenter_tracesys:
#ifdef CONFIG_AUDITSYSCALL
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
jz sysenter_auditsys
#endif
SAVE_REST
@@ -306,8 +309,9 @@ ENTRY(ia32_cstar_target)
.section __ex_table,"a"
.quad 1b,ia32_badarg
.previous
- orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
- testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ GET_THREAD_INFO(%r10)
+ orl $TS_COMPAT,TI_status(%r10)
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
CFI_REMEMBER_STATE
jnz cstar_tracesys
cmpq $IA32_NR_syscalls-1,%rax
@@ -317,12 +321,13 @@ cstar_do_call:
cstar_dispatch:
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
+ GET_THREAD_INFO(%r10)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
jnz sysretl_audit
sysretl_from_sys_call:
- andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ andl $~TS_COMPAT,TI_status(%r10)
RESTORE_ARGS 0,-ARG_SKIP,0,0,0
movl RIP-ARGOFFSET(%rsp),%ecx
CFI_REGISTER rip,rcx
@@ -350,7 +355,7 @@ sysretl_audit:
cstar_tracesys:
#ifdef CONFIG_AUDITSYSCALL
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
jz cstar_auditsys
#endif
xchgl %r9d,%ebp
@@ -415,8 +420,9 @@ ENTRY(ia32_syscall)
/* note the registers are not zero extended to the sf.
this could be a problem. */
SAVE_ARGS 0,1,0
- orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
- testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ GET_THREAD_INFO(%r10)
+ orl $TS_COMPAT,TI_status(%r10)
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
jnz ia32_tracesys
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
@@ -453,8 +459,8 @@ quiet_ni_syscall:
CFI_ENDPROC
.macro PTREGSCALL label, func, arg
- ALIGN
-GLOBAL(\label)
+ .globl \label
+\label:
leaq \func(%rip),%rax
leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
jmp ia32_ptregs_common
@@ -471,8 +477,7 @@ GLOBAL(\label)
PTREGSCALL stub32_vfork, sys_vfork, %rdi
PTREGSCALL stub32_iopl, sys_iopl, %rsi
- ALIGN
-ia32_ptregs_common:
+ENTRY(ia32_ptregs_common)
popq %r11
CFI_ENDPROC
CFI_STARTPROC32 simple
diff --git a/trunk/arch/x86/include/asm/alternative-asm.h b/trunk/arch/x86/include/asm/alternative-asm.h
index 952bd0100c5c..091508b533b4 100644
--- a/trunk/arch/x86/include/asm/alternative-asm.h
+++ b/trunk/arch/x86/include/asm/alternative-asm.h
@@ -4,10 +4,10 @@
#ifdef CONFIG_SMP
.macro LOCK_PREFIX
-672: lock
+1: lock
.section .smp_locks,"a"
.balign 4
- .long 672b - .
+ .long 1b - .
.previous
.endm
#else
diff --git a/trunk/arch/x86/include/asm/apic.h b/trunk/arch/x86/include/asm/apic.h
index 3ab9bdd87e79..1a6c09af048f 100644
--- a/trunk/arch/x86/include/asm/apic.h
+++ b/trunk/arch/x86/include/asm/apic.h
@@ -176,7 +176,6 @@ static inline u64 native_x2apic_icr_read(void)
}
extern int x2apic_phys;
-extern int x2apic_preenabled;
extern void check_x2apic(void);
extern void enable_x2apic(void);
extern void x2apic_icr_write(u32 low, u32 id);
@@ -199,9 +198,6 @@ static inline void x2apic_force_phys(void)
x2apic_phys = 1;
}
#else
-static inline void disable_x2apic(void)
-{
-}
static inline void check_x2apic(void)
{
}
@@ -216,7 +212,6 @@ static inline void x2apic_force_phys(void)
{
}
-#define nox2apic 0
#define x2apic_preenabled 0
#define x2apic_supported() 0
#endif
@@ -415,7 +410,6 @@ extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
#endif
#ifdef CONFIG_X86_LOCAL_APIC
-
static inline u32 apic_read(u32 reg)
{
return apic->read(reg);
diff --git a/trunk/arch/x86/include/asm/apic_flat_64.h b/trunk/arch/x86/include/asm/apic_flat_64.h
deleted file mode 100644
index a2d312796440..000000000000
--- a/trunk/arch/x86/include/asm/apic_flat_64.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _ASM_X86_APIC_FLAT_64_H
-#define _ASM_X86_APIC_FLAT_64_H
-
-extern void flat_init_apic_ldr(void);
-
-#endif
-
diff --git a/trunk/arch/x86/include/asm/apicdef.h b/trunk/arch/x86/include/asm/apicdef.h
index 134bba00df09..3925d8007864 100644
--- a/trunk/arch/x86/include/asm/apicdef.h
+++ b/trunk/arch/x86/include/asm/apicdef.h
@@ -144,7 +144,6 @@
#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
#define APIC_BASE_MSR 0x800
-#define XAPIC_ENABLE (1UL << 11)
#define X2APIC_ENABLE (1UL << 10)
#ifdef CONFIG_X86_32
diff --git a/trunk/arch/x86/include/asm/bitops.h b/trunk/arch/x86/include/asm/bitops.h
index b97596e2b68c..1775d6e5920e 100644
--- a/trunk/arch/x86/include/asm/bitops.h
+++ b/trunk/arch/x86/include/asm/bitops.h
@@ -380,8 +380,6 @@ static inline unsigned long __fls(unsigned long word)
return word;
}
-#undef ADDR
-
#ifdef __KERNEL__
/**
* ffs - find first set bit in word
@@ -397,25 +395,10 @@ static inline unsigned long __fls(unsigned long word)
static inline int ffs(int x)
{
int r;
-
-#ifdef CONFIG_X86_64
- /*
- * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
- * dest reg is undefined if x==0, but their CPU architect says its
- * value is written to set it to the same as before, except that the
- * top 32 bits will be cleared.
- *
- * We cannot do this on 32 bits because at the very least some
- * 486 CPUs did not behave this way.
- */
- long tmp = -1;
- asm("bsfl %1,%0"
- : "=r" (r)
- : "rm" (x), "0" (tmp));
-#elif defined(CONFIG_X86_CMOV)
+#ifdef CONFIG_X86_CMOV
asm("bsfl %1,%0\n\t"
"cmovzl %2,%0"
- : "=&r" (r) : "rm" (x), "r" (-1));
+ : "=r" (r) : "rm" (x), "r" (-1));
#else
asm("bsfl %1,%0\n\t"
"jnz 1f\n\t"
@@ -439,22 +422,7 @@ static inline int ffs(int x)
static inline int fls(int x)
{
int r;
-
-#ifdef CONFIG_X86_64
- /*
- * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
- * dest reg is undefined if x==0, but their CPU architect says its
- * value is written to set it to the same as before, except that the
- * top 32 bits will be cleared.
- *
- * We cannot do this on 32 bits because at the very least some
- * 486 CPUs did not behave this way.
- */
- long tmp = -1;
- asm("bsrl %1,%0"
- : "=r" (r)
- : "rm" (x), "0" (tmp));
-#elif defined(CONFIG_X86_CMOV)
+#ifdef CONFIG_X86_CMOV
asm("bsrl %1,%0\n\t"
"cmovzl %2,%0"
: "=&r" (r) : "rm" (x), "rm" (-1));
@@ -466,35 +434,11 @@ static inline int fls(int x)
#endif
return r + 1;
}
+#endif /* __KERNEL__ */
-/**
- * fls64 - find last set bit in a 64-bit word
- * @x: the word to search
- *
- * This is defined in a similar way as the libc and compiler builtin
- * ffsll, but returns the position of the most significant set bit.
- *
- * fls64(value) returns 0 if value is 0 or the position of the last
- * set bit if value is nonzero. The last (most significant) bit is
- * at position 64.
- */
-#ifdef CONFIG_X86_64
-static __always_inline int fls64(__u64 x)
-{
- long bitpos = -1;
- /*
- * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
- * dest reg is undefined if x==0, but their CPU architect says its
- * value is written to set it to the same as before.
- */
- asm("bsrq %1,%0"
- : "+r" (bitpos)
- : "rm" (x));
- return bitpos + 1;
-}
-#else
-#include
-#endif
+#undef ADDR
+
+#ifdef __KERNEL__
#include
@@ -506,6 +450,12 @@ static __always_inline int fls64(__u64 x)
#include
+#endif /* __KERNEL__ */
+
+#include
+
+#ifdef __KERNEL__
+
#include
#include
diff --git a/trunk/arch/x86/include/asm/cmpxchg.h b/trunk/arch/x86/include/asm/cmpxchg.h
index 0c9fa2745f13..5d3acdf5a7a6 100644
--- a/trunk/arch/x86/include/asm/cmpxchg.h
+++ b/trunk/arch/x86/include/asm/cmpxchg.h
@@ -14,8 +14,6 @@ extern void __cmpxchg_wrong_size(void)
__compiletime_error("Bad argument size for cmpxchg");
extern void __xadd_wrong_size(void)
__compiletime_error("Bad argument size for xadd");
-extern void __add_wrong_size(void)
- __compiletime_error("Bad argument size for add");
/*
* Constants for operation sizes. On 32-bit, the 64-bit size it set to
@@ -33,47 +31,60 @@ extern void __add_wrong_size(void)
#define __X86_CASE_Q -1 /* sizeof will never return -1 */
#endif
-/*
- * An exchange-type operation, which takes a value and a pointer, and
- * returns a the old value.
- */
-#define __xchg_op(ptr, arg, op, lock) \
- ({ \
- __typeof__ (*(ptr)) __ret = (arg); \
- switch (sizeof(*(ptr))) { \
- case __X86_CASE_B: \
- asm volatile (lock #op "b %b0, %1\n" \
- : "+r" (__ret), "+m" (*(ptr)) \
- : : "memory", "cc"); \
- break; \
- case __X86_CASE_W: \
- asm volatile (lock #op "w %w0, %1\n" \
- : "+r" (__ret), "+m" (*(ptr)) \
- : : "memory", "cc"); \
- break; \
- case __X86_CASE_L: \
- asm volatile (lock #op "l %0, %1\n" \
- : "+r" (__ret), "+m" (*(ptr)) \
- : : "memory", "cc"); \
- break; \
- case __X86_CASE_Q: \
- asm volatile (lock #op "q %q0, %1\n" \
- : "+r" (__ret), "+m" (*(ptr)) \
- : : "memory", "cc"); \
- break; \
- default: \
- __ ## op ## _wrong_size(); \
- } \
- __ret; \
- })
-
/*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
* Since this is generally used to protect other memory information, we
* use "asm volatile" and "memory" clobbers to prevent gcc from moving
* information around.
*/
-#define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "")
+#define __xchg(x, ptr, size) \
+({ \
+ __typeof(*(ptr)) __x = (x); \
+ switch (size) { \
+ case __X86_CASE_B: \
+ { \
+ volatile u8 *__ptr = (volatile u8 *)(ptr); \
+ asm volatile("xchgb %0,%1" \
+ : "=q" (__x), "+m" (*__ptr) \
+ : "0" (__x) \
+ : "memory"); \
+ break; \
+ } \
+ case __X86_CASE_W: \
+ { \
+ volatile u16 *__ptr = (volatile u16 *)(ptr); \
+ asm volatile("xchgw %0,%1" \
+ : "=r" (__x), "+m" (*__ptr) \
+ : "0" (__x) \
+ : "memory"); \
+ break; \
+ } \
+ case __X86_CASE_L: \
+ { \
+ volatile u32 *__ptr = (volatile u32 *)(ptr); \
+ asm volatile("xchgl %0,%1" \
+ : "=r" (__x), "+m" (*__ptr) \
+ : "0" (__x) \
+ : "memory"); \
+ break; \
+ } \
+ case __X86_CASE_Q: \
+ { \
+ volatile u64 *__ptr = (volatile u64 *)(ptr); \
+ asm volatile("xchgq %0,%1" \
+ : "=r" (__x), "+m" (*__ptr) \
+ : "0" (__x) \
+ : "memory"); \
+ break; \
+ } \
+ default: \
+ __xchg_wrong_size(); \
+ } \
+ __x; \
+})
+
+#define xchg(ptr, v) \
+ __xchg((v), (ptr), sizeof(*ptr))
/*
* Atomic compare and exchange. Compare OLD with MEM, if identical,
@@ -154,80 +165,46 @@ extern void __add_wrong_size(void)
__cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
#endif
-/*
- * xadd() adds "inc" to "*ptr" and atomically returns the previous
- * value of "*ptr".
- *
- * xadd() is locked when multiple CPUs are online
- * xadd_sync() is always locked
- * xadd_local() is never locked
- */
-#define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock)
-#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX)
-#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ")
-#define xadd_local(ptr, inc) __xadd((ptr), (inc), "")
-
-#define __add(ptr, inc, lock) \
+#define __xadd(ptr, inc, lock) \
({ \
__typeof__ (*(ptr)) __ret = (inc); \
switch (sizeof(*(ptr))) { \
case __X86_CASE_B: \
- asm volatile (lock "addb %b1, %0\n" \
- : "+m" (*(ptr)) : "ri" (inc) \
- : "memory", "cc"); \
+ asm volatile (lock "xaddb %b0, %1\n" \
+ : "+r" (__ret), "+m" (*(ptr)) \
+ : : "memory", "cc"); \
break; \
case __X86_CASE_W: \
- asm volatile (lock "addw %w1, %0\n" \
- : "+m" (*(ptr)) : "ri" (inc) \
- : "memory", "cc"); \
+ asm volatile (lock "xaddw %w0, %1\n" \
+ : "+r" (__ret), "+m" (*(ptr)) \
+ : : "memory", "cc"); \
break; \
case __X86_CASE_L: \
- asm volatile (lock "addl %1, %0\n" \
- : "+m" (*(ptr)) : "ri" (inc) \
- : "memory", "cc"); \
+ asm volatile (lock "xaddl %0, %1\n" \
+ : "+r" (__ret), "+m" (*(ptr)) \
+ : : "memory", "cc"); \
break; \
case __X86_CASE_Q: \
- asm volatile (lock "addq %1, %0\n" \
- : "+m" (*(ptr)) : "ri" (inc) \
- : "memory", "cc"); \
+ asm volatile (lock "xaddq %q0, %1\n" \
+ : "+r" (__ret), "+m" (*(ptr)) \
+ : : "memory", "cc"); \
break; \
default: \
- __add_wrong_size(); \
+ __xadd_wrong_size(); \
} \
__ret; \
})
/*
- * add_*() adds "inc" to "*ptr"
+ * xadd() adds "inc" to "*ptr" and atomically returns the previous
+ * value of "*ptr".
*
- * __add() takes a lock prefix
- * add_smp() is locked when multiple CPUs are online
- * add_sync() is always locked
+ * xadd() is locked when multiple CPUs are online
+ * xadd_sync() is always locked
+ * xadd_local() is never locked
*/
-#define add_smp(ptr, inc) __add((ptr), (inc), LOCK_PREFIX)
-#define add_sync(ptr, inc) __add((ptr), (inc), "lock; ")
-
-#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \
-({ \
- bool __ret; \
- __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \
- __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \
- BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
- BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
- VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \
- VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \
- asm volatile(pfx "cmpxchg%c4b %2; sete %0" \
- : "=a" (__ret), "+d" (__old2), \
- "+m" (*(p1)), "+m" (*(p2)) \
- : "i" (2 * sizeof(long)), "a" (__old1), \
- "b" (__new1), "c" (__new2)); \
- __ret; \
-})
-
-#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
- __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
-
-#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
- __cmpxchg_double(, p1, p2, o1, o2, n1, n2)
+#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX)
+#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ")
+#define xadd_local(ptr, inc) __xadd((ptr), (inc), "")
#endif /* ASM_X86_CMPXCHG_H */
diff --git a/trunk/arch/x86/include/asm/cmpxchg_32.h b/trunk/arch/x86/include/asm/cmpxchg_32.h
index 53f4b219336b..fbebb07dd80b 100644
--- a/trunk/arch/x86/include/asm/cmpxchg_32.h
+++ b/trunk/arch/x86/include/asm/cmpxchg_32.h
@@ -166,6 +166,52 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
#endif
+#define cmpxchg8b(ptr, o1, o2, n1, n2) \
+({ \
+ char __ret; \
+ __typeof__(o2) __dummy; \
+ __typeof__(*(ptr)) __old1 = (o1); \
+ __typeof__(o2) __old2 = (o2); \
+ __typeof__(*(ptr)) __new1 = (n1); \
+ __typeof__(o2) __new2 = (n2); \
+ asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1" \
+ : "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\
+ : "a" (__old1), "d"(__old2), \
+ "b" (__new1), "c" (__new2) \
+ : "memory"); \
+ __ret; })
+
+
+#define cmpxchg8b_local(ptr, o1, o2, n1, n2) \
+({ \
+ char __ret; \
+ __typeof__(o2) __dummy; \
+ __typeof__(*(ptr)) __old1 = (o1); \
+ __typeof__(o2) __old2 = (o2); \
+ __typeof__(*(ptr)) __new1 = (n1); \
+ __typeof__(o2) __new2 = (n2); \
+ asm volatile("cmpxchg8b %2; setz %1" \
+ : "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\
+ : "a" (__old), "d"(__old2), \
+ "b" (__new1), "c" (__new2), \
+ : "memory"); \
+ __ret; })
+
+
+#define cmpxchg_double(ptr, o1, o2, n1, n2) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
+ VM_BUG_ON((unsigned long)(ptr) % 8); \
+ cmpxchg8b((ptr), (o1), (o2), (n1), (n2)); \
+})
+
+#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
+ VM_BUG_ON((unsigned long)(ptr) % 8); \
+ cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \
+})
+
#define system_has_cmpxchg_double() cpu_has_cx8
#endif /* _ASM_X86_CMPXCHG_32_H */
diff --git a/trunk/arch/x86/include/asm/cmpxchg_64.h b/trunk/arch/x86/include/asm/cmpxchg_64.h
index 614be87f1a9b..285da02c38fa 100644
--- a/trunk/arch/x86/include/asm/cmpxchg_64.h
+++ b/trunk/arch/x86/include/asm/cmpxchg_64.h
@@ -20,6 +20,49 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
cmpxchg_local((ptr), (o), (n)); \
})
+#define cmpxchg16b(ptr, o1, o2, n1, n2) \
+({ \
+ char __ret; \
+ __typeof__(o2) __junk; \
+ __typeof__(*(ptr)) __old1 = (o1); \
+ __typeof__(o2) __old2 = (o2); \
+ __typeof__(*(ptr)) __new1 = (n1); \
+ __typeof__(o2) __new2 = (n2); \
+ asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1" \
+ : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
+ : "b"(__new1), "c"(__new2), \
+ "a"(__old1), "d"(__old2)); \
+ __ret; })
+
+
+#define cmpxchg16b_local(ptr, o1, o2, n1, n2) \
+({ \
+ char __ret; \
+ __typeof__(o2) __junk; \
+ __typeof__(*(ptr)) __old1 = (o1); \
+ __typeof__(o2) __old2 = (o2); \
+ __typeof__(*(ptr)) __new1 = (n1); \
+ __typeof__(o2) __new2 = (n2); \
+ asm volatile("cmpxchg16b %2;setz %1" \
+ : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
+ : "b"(__new1), "c"(__new2), \
+ "a"(__old1), "d"(__old2)); \
+ __ret; })
+
+#define cmpxchg_double(ptr, o1, o2, n1, n2) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ VM_BUG_ON((unsigned long)(ptr) % 16); \
+ cmpxchg16b((ptr), (o1), (o2), (n1), (n2)); \
+})
+
+#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ VM_BUG_ON((unsigned long)(ptr) % 16); \
+ cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \
+})
+
#define system_has_cmpxchg_double() cpu_has_cx16
#endif /* _ASM_X86_CMPXCHG_64_H */
diff --git a/trunk/arch/x86/include/asm/div64.h b/trunk/arch/x86/include/asm/div64.h
index ced283ac79df..9a2d644c08ef 100644
--- a/trunk/arch/x86/include/asm/div64.h
+++ b/trunk/arch/x86/include/asm/div64.h
@@ -4,7 +4,6 @@
#ifdef CONFIG_X86_32
#include
-#include
/*
* do_div() is NOT a C function. It wants to return
@@ -22,20 +21,15 @@
({ \
unsigned long __upper, __low, __high, __mod, __base; \
__base = (base); \
- if (__builtin_constant_p(__base) && is_power_of_2(__base)) { \
- __mod = n & (__base - 1); \
- n >>= ilog2(__base); \
- } else { \
- asm("" : "=a" (__low), "=d" (__high) : "A" (n));\
- __upper = __high; \
- if (__high) { \
- __upper = __high % (__base); \
- __high = __high / (__base); \
- } \
- asm("divl %2" : "=a" (__low), "=d" (__mod) \
- : "rm" (__base), "0" (__low), "1" (__upper)); \
- asm("" : "=A" (n) : "a" (__low), "d" (__high)); \
+ asm("":"=a" (__low), "=d" (__high) : "A" (n)); \
+ __upper = __high; \
+ if (__high) { \
+ __upper = __high % (__base); \
+ __high = __high / (__base); \
} \
+ asm("divl %2":"=a" (__low), "=d" (__mod) \
+ : "rm" (__base), "0" (__low), "1" (__upper)); \
+ asm("":"=A" (n) : "a" (__low), "d" (__high)); \
__mod; \
})
diff --git a/trunk/arch/x86/include/asm/e820.h b/trunk/arch/x86/include/asm/e820.h
index 37782566af24..908b96957d88 100644
--- a/trunk/arch/x86/include/asm/e820.h
+++ b/trunk/arch/x86/include/asm/e820.h
@@ -117,7 +117,7 @@ static inline void early_memtest(unsigned long start, unsigned long end)
extern unsigned long e820_end_of_ram_pfn(void);
extern unsigned long e820_end_of_low_ram_pfn(void);
-extern u64 early_reserve_e820(u64 sizet, u64 align);
+extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
void memblock_x86_fill(void);
void memblock_find_dma_reserve(void);
diff --git a/trunk/arch/x86/include/asm/hardirq.h b/trunk/arch/x86/include/asm/hardirq.h
index da0b3ca815b7..55e4de613f0e 100644
--- a/trunk/arch/x86/include/asm/hardirq.h
+++ b/trunk/arch/x86/include/asm/hardirq.h
@@ -11,7 +11,6 @@ typedef struct {
#ifdef CONFIG_X86_LOCAL_APIC
unsigned int apic_timer_irqs; /* arch dependent */
unsigned int irq_spurious_count;
- unsigned int icr_read_retry_count;
#endif
unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;
diff --git a/trunk/arch/x86/include/asm/i387.h b/trunk/arch/x86/include/asm/i387.h
index 6919e936345b..c9e09ea05644 100644
--- a/trunk/arch/x86/include/asm/i387.h
+++ b/trunk/arch/x86/include/asm/i387.h
@@ -218,7 +218,7 @@ static inline void fpu_fxsave(struct fpu *fpu)
#ifdef CONFIG_SMP
#define safe_address (__per_cpu_offset[0])
#else
-#define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER])
+#define safe_address (kstat_cpu(0).cpustat.user)
#endif
/*
diff --git a/trunk/arch/x86/include/asm/insn.h b/trunk/arch/x86/include/asm/insn.h
index 74df3f1eddfd..88c765e16410 100644
--- a/trunk/arch/x86/include/asm/insn.h
+++ b/trunk/arch/x86/include/asm/insn.h
@@ -137,13 +137,6 @@ static inline int insn_is_avx(struct insn *insn)
return (insn->vex_prefix.value != 0);
}
-/* Ensure this instruction is decoded completely */
-static inline int insn_complete(struct insn *insn)
-{
- return insn->opcode.got && insn->modrm.got && insn->sib.got &&
- insn->displacement.got && insn->immediate.got;
-}
-
static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
{
if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
diff --git a/trunk/arch/x86/include/asm/mach_timer.h b/trunk/arch/x86/include/asm/mach_timer.h
index 88d0c3c74c13..853728519ae9 100644
--- a/trunk/arch/x86/include/asm/mach_timer.h
+++ b/trunk/arch/x86/include/asm/mach_timer.h
@@ -15,7 +15,7 @@
#define CALIBRATE_TIME_MSEC 30 /* 30 msecs */
#define CALIBRATE_LATCH \
- ((PIT_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000)
+ ((CLOCK_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000)
static inline void mach_prepare_counter(void)
{
diff --git a/trunk/arch/x86/include/asm/mc146818rtc.h b/trunk/arch/x86/include/asm/mc146818rtc.h
index 0e8e85bb7c51..01fdf5674e24 100644
--- a/trunk/arch/x86/include/asm/mc146818rtc.h
+++ b/trunk/arch/x86/include/asm/mc146818rtc.h
@@ -81,8 +81,8 @@ static inline unsigned char current_lock_cmos_reg(void)
#else
#define lock_cmos_prefix(reg) do {} while (0)
#define lock_cmos_suffix(reg) do {} while (0)
-#define lock_cmos(reg) do { } while (0)
-#define unlock_cmos() do { } while (0)
+#define lock_cmos(reg)
+#define unlock_cmos()
#define do_i_have_lock_cmos() 0
#define current_lock_cmos_reg() 0
#endif
diff --git a/trunk/arch/x86/include/asm/mce.h b/trunk/arch/x86/include/asm/mce.h
index 6add827381c9..0e8ae57d3656 100644
--- a/trunk/arch/x86/include/asm/mce.h
+++ b/trunk/arch/x86/include/asm/mce.h
@@ -50,11 +50,10 @@
#define MCJ_CTX_MASK 3
#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
#define MCJ_CTX_RANDOM 0 /* inject context: random */
-#define MCJ_CTX_PROCESS 0x1 /* inject context: process */
-#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
-#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
-#define MCJ_EXCEPTION 0x8 /* raise as exception */
-#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
+#define MCJ_CTX_PROCESS 1 /* inject context: process */
+#define MCJ_CTX_IRQ 2 /* inject context: IRQ */
+#define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */
+#define MCJ_EXCEPTION 8 /* raise as exception */
/* Fields are zero when not available */
struct mce {
@@ -121,8 +120,7 @@ struct mce_log {
#ifdef __KERNEL__
-extern void mce_register_decode_chain(struct notifier_block *nb);
-extern void mce_unregister_decode_chain(struct notifier_block *nb);
+extern struct atomic_notifier_head x86_mce_decoder_chain;
#include
#include
diff --git a/trunk/arch/x86/include/asm/memblock.h b/trunk/arch/x86/include/asm/memblock.h
new file mode 100644
index 000000000000..0cd3800f33b9
--- /dev/null
+++ b/trunk/arch/x86/include/asm/memblock.h
@@ -0,0 +1,23 @@
+#ifndef _X86_MEMBLOCK_H
+#define _X86_MEMBLOCK_H
+
+#define ARCH_DISCARD_MEMBLOCK
+
+u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align);
+
+void memblock_x86_reserve_range(u64 start, u64 end, char *name);
+void memblock_x86_free_range(u64 start, u64 end);
+struct range;
+int __get_free_all_memory_range(struct range **range, int nodeid,
+ unsigned long start_pfn, unsigned long end_pfn);
+int get_free_all_memory_range(struct range **rangep, int nodeid);
+
+void memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
+ unsigned long last_pfn);
+u64 memblock_x86_hole_size(u64 start, u64 end);
+u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align);
+u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit);
+u64 memblock_x86_memory_in_range(u64 addr, u64 limit);
+bool memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align);
+
+#endif
diff --git a/trunk/arch/x86/include/asm/microcode.h b/trunk/arch/x86/include/asm/microcode.h
index 4ebe157bf73d..24215072d0e1 100644
--- a/trunk/arch/x86/include/asm/microcode.h
+++ b/trunk/arch/x86/include/asm/microcode.h
@@ -48,7 +48,6 @@ static inline struct microcode_ops * __init init_intel_microcode(void)
#ifdef CONFIG_MICROCODE_AMD
extern struct microcode_ops * __init init_amd_microcode(void);
-extern void __exit exit_amd_microcode(void);
static inline void get_ucode_data(void *to, const u8 *from, size_t n)
{
@@ -60,7 +59,6 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
{
return NULL;
}
-static inline void __exit exit_amd_microcode(void) {}
#endif
#endif /* _ASM_X86_MICROCODE_H */
diff --git a/trunk/arch/x86/include/asm/numachip/numachip_csr.h b/trunk/arch/x86/include/asm/numachip/numachip_csr.h
deleted file mode 100644
index 660f843df928..000000000000
--- a/trunk/arch/x86/include/asm/numachip/numachip_csr.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Numascale NumaConnect-Specific Header file
- *
- * Copyright (C) 2011 Numascale AS. All rights reserved.
- *
- * Send feedback to
- *
- */
-
-#ifndef _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
-#define _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
-
-#include
-#include
-#include
-#include
-#include
-#include
-
-#define CSR_NODE_SHIFT 16
-#define CSR_NODE_BITS(p) (((unsigned long)(p)) << CSR_NODE_SHIFT)
-#define CSR_NODE_MASK 0x0fff /* 4K nodes */
-
-/* 32K CSR space, b15 indicates geo/non-geo */
-#define CSR_OFFSET_MASK 0x7fffUL
-
-/* Global CSR space covers all 4K possible nodes with 64K CSR space per node */
-#define NUMACHIP_GCSR_BASE 0x3fff00000000ULL
-#define NUMACHIP_GCSR_LIM 0x3fff0fffffffULL
-#define NUMACHIP_GCSR_SIZE (NUMACHIP_GCSR_LIM - NUMACHIP_GCSR_BASE + 1)
-
-/*
- * Local CSR space starts in global CSR space with "nodeid" = 0xfff0, however
- * when using the direct mapping on x86_64, both start and size needs to be
- * aligned with PMD_SIZE which is 2M
- */
-#define NUMACHIP_LCSR_BASE 0x3ffffe000000ULL
-#define NUMACHIP_LCSR_LIM 0x3fffffffffffULL
-#define NUMACHIP_LCSR_SIZE (NUMACHIP_LCSR_LIM - NUMACHIP_LCSR_BASE + 1)
-
-static inline void *gcsr_address(int node, unsigned long offset)
-{
- return __va(NUMACHIP_GCSR_BASE | (1UL << 15) |
- CSR_NODE_BITS(node & CSR_NODE_MASK) | (offset & CSR_OFFSET_MASK));
-}
-
-static inline void *lcsr_address(unsigned long offset)
-{
- return __va(NUMACHIP_LCSR_BASE | (1UL << 15) |
- CSR_NODE_BITS(0xfff0) | (offset & CSR_OFFSET_MASK));
-}
-
-static inline unsigned int read_gcsr(int node, unsigned long offset)
-{
- return swab32(readl(gcsr_address(node, offset)));
-}
-
-static inline void write_gcsr(int node, unsigned long offset, unsigned int val)
-{
- writel(swab32(val), gcsr_address(node, offset));
-}
-
-static inline unsigned int read_lcsr(unsigned long offset)
-{
- return swab32(readl(lcsr_address(offset)));
-}
-
-static inline void write_lcsr(unsigned long offset, unsigned int val)
-{
- writel(swab32(val), lcsr_address(offset));
-}
-
-/* ========================================================================= */
-/* CSR_G0_STATE_CLEAR */
-/* ========================================================================= */
-
-#define CSR_G0_STATE_CLEAR (0x000 + (0 << 12))
-union numachip_csr_g0_state_clear {
- unsigned int v;
- struct numachip_csr_g0_state_clear_s {
- unsigned int _state:2;
- unsigned int _rsvd_2_6:5;
- unsigned int _lost:1;
- unsigned int _rsvd_8_31:24;
- } s;
-};
-
-/* ========================================================================= */
-/* CSR_G0_NODE_IDS */
-/* ========================================================================= */
-
-#define CSR_G0_NODE_IDS (0x008 + (0 << 12))
-union numachip_csr_g0_node_ids {
- unsigned int v;
- struct numachip_csr_g0_node_ids_s {
- unsigned int _initialid:16;
- unsigned int _nodeid:12;
- unsigned int _rsvd_28_31:4;
- } s;
-};
-
-/* ========================================================================= */
-/* CSR_G3_EXT_IRQ_GEN */
-/* ========================================================================= */
-
-#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12))
-union numachip_csr_g3_ext_irq_gen {
- unsigned int v;
- struct numachip_csr_g3_ext_irq_gen_s {
- unsigned int _vector:8;
- unsigned int _msgtype:3;
- unsigned int _index:5;
- unsigned int _destination_apic_id:16;
- } s;
-};
-
-/* ========================================================================= */
-/* CSR_G3_EXT_IRQ_STATUS */
-/* ========================================================================= */
-
-#define CSR_G3_EXT_IRQ_STATUS (0x034 + (3 << 12))
-union numachip_csr_g3_ext_irq_status {
- unsigned int v;
- struct numachip_csr_g3_ext_irq_status_s {
- unsigned int _result:32;
- } s;
-};
-
-/* ========================================================================= */
-/* CSR_G3_EXT_IRQ_DEST */
-/* ========================================================================= */
-
-#define CSR_G3_EXT_IRQ_DEST (0x038 + (3 << 12))
-union numachip_csr_g3_ext_irq_dest {
- unsigned int v;
- struct numachip_csr_g3_ext_irq_dest_s {
- unsigned int _irq:8;
- unsigned int _rsvd_8_31:24;
- } s;
-};
-
-/* ========================================================================= */
-/* CSR_G3_NC_ATT_MAP_SELECT */
-/* ========================================================================= */
-
-#define CSR_G3_NC_ATT_MAP_SELECT (0x7fc + (3 << 12))
-union numachip_csr_g3_nc_att_map_select {
- unsigned int v;
- struct numachip_csr_g3_nc_att_map_select_s {
- unsigned int _upper_address_bits:4;
- unsigned int _select_ram:4;
- unsigned int _rsvd_8_31:24;
- } s;
-};
-
-/* ========================================================================= */
-/* CSR_G3_NC_ATT_MAP_SELECT_0-255 */
-/* ========================================================================= */
-
-#define CSR_G3_NC_ATT_MAP_SELECT_0 (0x800 + (3 << 12))
-
-#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */
-
diff --git a/trunk/arch/x86/include/asm/percpu.h b/trunk/arch/x86/include/asm/percpu.h
index 529bf07e8067..3470c9d0ebba 100644
--- a/trunk/arch/x86/include/asm/percpu.h
+++ b/trunk/arch/x86/include/asm/percpu.h
@@ -451,20 +451,23 @@ do { \
#endif /* !CONFIG_M386 */
#ifdef CONFIG_X86_CMPXCHG64
-#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
+#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \
({ \
- bool __ret; \
- typeof(pcp1) __o1 = (o1), __n1 = (n1); \
- typeof(pcp2) __o2 = (o2), __n2 = (n2); \
+ char __ret; \
+ typeof(o1) __o1 = o1; \
+ typeof(o1) __n1 = n1; \
+ typeof(o2) __o2 = o2; \
+ typeof(o2) __n2 = n2; \
+ typeof(o2) __dummy = n2; \
asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \
- : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \
- : "b" (__n1), "c" (__n2), "a" (__o1)); \
+ : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \
+ : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \
__ret; \
})
-#define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
-#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
-#define irqsafe_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
+#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
+#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
+#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
#endif /* CONFIG_X86_CMPXCHG64 */
/*
@@ -505,23 +508,31 @@ do { \
* it in software. The address used in the cmpxchg16 instruction must be
* aligned to a 16 byte boundary.
*/
-#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \
+#ifdef CONFIG_SMP
+#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3
+#else
+#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2
+#endif
+#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \
({ \
- bool __ret; \
- typeof(pcp1) __o1 = (o1), __n1 = (n1); \
- typeof(pcp2) __o2 = (o2), __n2 = (n2); \
- alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
- "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \
+ char __ret; \
+ typeof(o1) __o1 = o1; \
+ typeof(o1) __n1 = n1; \
+ typeof(o2) __o2 = o2; \
+ typeof(o2) __n2 = n2; \
+ typeof(o2) __dummy; \
+ alternative_io(CMPXCHG16B_EMU_CALL, \
+ "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \
X86_FEATURE_CX16, \
- ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \
- "+m" (pcp2), "+d" (__o2)), \
- "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \
+ ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \
+ "S" (&pcp1), "b"(__n1), "c"(__n2), \
+ "a"(__o1), "d"(__o2) : "memory"); \
__ret; \
})
-#define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
-#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
-#define irqsafe_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
+#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
+#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
+#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
#endif
diff --git a/trunk/arch/x86/include/asm/perf_event.h b/trunk/arch/x86/include/asm/perf_event.h
index 096c975e099f..f61c62f7d5d8 100644
--- a/trunk/arch/x86/include/asm/perf_event.h
+++ b/trunk/arch/x86/include/asm/perf_event.h
@@ -57,7 +57,6 @@
(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
-#define ARCH_PERFMON_EVENTS_COUNT 7
/*
* Intel "Architectural Performance Monitoring" CPUID
@@ -73,19 +72,6 @@ union cpuid10_eax {
unsigned int full;
};
-union cpuid10_ebx {
- struct {
- unsigned int no_unhalted_core_cycles:1;
- unsigned int no_instructions_retired:1;
- unsigned int no_unhalted_reference_cycles:1;
- unsigned int no_llc_reference:1;
- unsigned int no_llc_misses:1;
- unsigned int no_branch_instruction_retired:1;
- unsigned int no_branch_misses_retired:1;
- } split;
- unsigned int full;
-};
-
union cpuid10_edx {
struct {
unsigned int num_counters_fixed:5;
@@ -95,15 +81,6 @@ union cpuid10_edx {
unsigned int full;
};
-struct x86_pmu_capability {
- int version;
- int num_counters_gp;
- int num_counters_fixed;
- int bit_width_gp;
- int bit_width_fixed;
- unsigned int events_mask;
- int events_mask_len;
-};
/*
* Fixed-purpose performance events:
@@ -112,24 +89,23 @@ struct x86_pmu_capability {
/*
* All 3 fixed-mode PMCs are configured via this single MSR:
*/
-#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
+#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
/*
* The counts are available in three separate MSRs:
*/
/* Instr_Retired.Any: */
-#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
-#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
+#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
+#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
/* CPU_CLK_Unhalted.Core: */
-#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
-#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
+#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
+#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
/* CPU_CLK_Unhalted.Ref: */
-#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
-#define X86_PMC_IDX_FIXED_REF_CYCLES (X86_PMC_IDX_FIXED + 2)
-#define X86_PMC_MSK_FIXED_REF_CYCLES (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES)
+#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
+#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
/*
* We model BTS tracing as another fixed-mode PMC.
@@ -226,7 +202,6 @@ struct perf_guest_switch_msr {
};
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
-extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
#else
static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
@@ -234,11 +209,6 @@ static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
return NULL;
}
-static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
-{
- memset(cap, 0, sizeof(*cap));
-}
-
static inline void perf_events_lapic_init(void) { }
#endif
diff --git a/trunk/arch/x86/include/asm/pgtable.h b/trunk/arch/x86/include/asm/pgtable.h
index 49afb3f41eb6..18601c86fab1 100644
--- a/trunk/arch/x86/include/asm/pgtable.h
+++ b/trunk/arch/x86/include/asm/pgtable.h
@@ -703,7 +703,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
pte_update(mm, addr, ptep);
}
-#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
+#define flush_tlb_fix_spurious_fault(vma, address)
#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
diff --git a/trunk/arch/x86/include/asm/processor-flags.h b/trunk/arch/x86/include/asm/processor-flags.h
index f8ab3eaad128..2dddb317bb39 100644
--- a/trunk/arch/x86/include/asm/processor-flags.h
+++ b/trunk/arch/x86/include/asm/processor-flags.h
@@ -6,7 +6,6 @@
* EFLAGS bits
*/
#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
-#define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */
#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
#define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */
#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
diff --git a/trunk/arch/x86/include/asm/processor.h b/trunk/arch/x86/include/asm/processor.h
index aa9088c26931..b650435ffb53 100644
--- a/trunk/arch/x86/include/asm/processor.h
+++ b/trunk/arch/x86/include/asm/processor.h
@@ -99,6 +99,7 @@ struct cpuinfo_x86 {
u16 apicid;
u16 initial_apicid;
u16 x86_clflush_size;
+#ifdef CONFIG_SMP
/* number of cores as seen by the OS: */
u16 booted_cores;
/* Physical processor id: */
@@ -109,6 +110,7 @@ struct cpuinfo_x86 {
u8 compute_unit_id;
/* Index into per_cpu list: */
u16 cpu_index;
+#endif
u32 microcode;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
diff --git a/trunk/arch/x86/include/asm/spinlock.h b/trunk/arch/x86/include/asm/spinlock.h
index a82c2bf504b6..972c260919a3 100644
--- a/trunk/arch/x86/include/asm/spinlock.h
+++ b/trunk/arch/x86/include/asm/spinlock.h
@@ -79,10 +79,23 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
}
+#if (NR_CPUS < 256)
static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
{
- __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
+ asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
+ : "+m" (lock->head_tail)
+ :
+ : "memory", "cc");
}
+#else
+static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
+{
+ asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
+ : "+m" (lock->head_tail)
+ :
+ : "memory", "cc");
+}
+#endif
static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
{
diff --git a/trunk/arch/x86/include/asm/thread_info.h b/trunk/arch/x86/include/asm/thread_info.h
index 185b719ec61a..a1fe5c127b52 100644
--- a/trunk/arch/x86/include/asm/thread_info.h
+++ b/trunk/arch/x86/include/asm/thread_info.h
@@ -40,8 +40,7 @@ struct thread_info {
*/
__u8 supervisor_stack[0];
#endif
- int sig_on_uaccess_error:1;
- int uaccess_err:1; /* uaccess failed */
+ int uaccess_err;
};
#define INIT_THREAD_INFO(tsk) \
@@ -232,12 +231,6 @@ static inline struct thread_info *current_thread_info(void)
movq PER_CPU_VAR(kernel_stack),reg ; \
subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
-/*
- * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
- * a certain register (to be used in assembler memory operands).
- */
-#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
-
#endif
#endif /* !X86_32 */
diff --git a/trunk/arch/x86/include/asm/topology.h b/trunk/arch/x86/include/asm/topology.h
index 800f77c60051..c00692476e9f 100644
--- a/trunk/arch/x86/include/asm/topology.h
+++ b/trunk/arch/x86/include/asm/topology.h
@@ -130,8 +130,10 @@ extern void setup_node_to_cpumask_map(void);
.balance_interval = 1, \
}
+#ifdef CONFIG_X86_64
extern int __node_distance(int, int);
#define node_distance(a, b) __node_distance(a, b)
+#endif
#else /* !CONFIG_NUMA */
diff --git a/trunk/arch/x86/include/asm/tsc.h b/trunk/arch/x86/include/asm/tsc.h
index 15d99153a96d..83e2efd181e2 100644
--- a/trunk/arch/x86/include/asm/tsc.h
+++ b/trunk/arch/x86/include/asm/tsc.h
@@ -51,8 +51,6 @@ extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void);
extern unsigned long native_calibrate_tsc(void);
-extern int tsc_clocksource_reliable;
-
/*
* Boot-time check whether the TSCs are synchronized across
* all CPUs/cores:
diff --git a/trunk/arch/x86/include/asm/uaccess.h b/trunk/arch/x86/include/asm/uaccess.h
index 8be5f54d9360..36361bf6fdd1 100644
--- a/trunk/arch/x86/include/asm/uaccess.h
+++ b/trunk/arch/x86/include/asm/uaccess.h
@@ -462,7 +462,7 @@ struct __large_struct { unsigned long buf[100]; };
barrier();
#define uaccess_catch(err) \
- (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \
+ (err) |= current_thread_info()->uaccess_err; \
current_thread_info()->uaccess_err = prev_err; \
} while (0)
diff --git a/trunk/arch/x86/include/asm/x86_init.h b/trunk/arch/x86/include/asm/x86_init.h
index 1ac860a09849..1971e652d24b 100644
--- a/trunk/arch/x86/include/asm/x86_init.h
+++ b/trunk/arch/x86/include/asm/x86_init.h
@@ -7,7 +7,6 @@
struct mpc_bus;
struct mpc_cpu;
struct mpc_table;
-struct cpuinfo_x86;
/**
* struct x86_init_mpparse - platform specific mpparse ops
@@ -148,7 +147,6 @@ struct x86_init_ops {
*/
struct x86_cpuinit_ops {
void (*setup_percpu_clockev)(void);
- void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
};
/**
@@ -188,6 +186,5 @@ extern struct x86_msi_ops x86_msi;
extern void x86_init_noop(void);
extern void x86_init_uint_noop(unsigned int unused);
-extern void x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node);
#endif
diff --git a/trunk/arch/x86/kernel/acpi/boot.c b/trunk/arch/x86/kernel/acpi/boot.c
index ce664f33ea8e..4558f0d0822d 100644
--- a/trunk/arch/x86/kernel/acpi/boot.c
+++ b/trunk/arch/x86/kernel/acpi/boot.c
@@ -219,8 +219,6 @@ static int __init
acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
{
struct acpi_madt_local_x2apic *processor = NULL;
- int apic_id;
- u8 enabled;
processor = (struct acpi_madt_local_x2apic *)header;
@@ -229,8 +227,6 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
acpi_table_print_madt_entry(header);
- apic_id = processor->local_apic_id;
- enabled = processor->lapic_flags & ACPI_MADT_ENABLED;
#ifdef CONFIG_X86_X2APIC
/*
* We need to register disabled CPU as well to permit
@@ -239,10 +235,8 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
* to not preallocating memory for all NR_CPUS
* when we use CPU hotplug.
*/
- if (!cpu_has_x2apic && (apic_id >= 0xff) && enabled)
- printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
- else
- acpi_register_lapic(apic_id, enabled);
+ acpi_register_lapic(processor->local_apic_id, /* APIC ID */
+ processor->lapic_flags & ACPI_MADT_ENABLED);
#else
printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
#endif
diff --git a/trunk/arch/x86/kernel/amd_nb.c b/trunk/arch/x86/kernel/amd_nb.c
index 013c1810ce72..4c39baa8facc 100644
--- a/trunk/arch/x86/kernel/amd_nb.c
+++ b/trunk/arch/x86/kernel/amd_nb.c
@@ -123,14 +123,16 @@ int amd_get_subcaches(int cpu)
{
struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
unsigned int mask;
- int cuid;
+ int cuid = 0;
if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
return 0;
pci_read_config_dword(link, 0x1d4, &mask);
+#ifdef CONFIG_SMP
cuid = cpu_data(cpu).compute_unit_id;
+#endif
return (mask >> (4 * cuid)) & 0xf;
}
@@ -139,7 +141,7 @@ int amd_set_subcaches(int cpu, int mask)
static unsigned int reset, ban;
struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
unsigned int reg;
- int cuid;
+ int cuid = 0;
if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf)
return -EINVAL;
@@ -157,7 +159,9 @@ int amd_set_subcaches(int cpu, int mask)
pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
}
+#ifdef CONFIG_SMP
cuid = cpu_data(cpu).compute_unit_id;
+#endif
mask <<= 4 * cuid;
mask |= (0xf ^ (1 << cuid)) << 26;
diff --git a/trunk/arch/x86/kernel/aperture_64.c b/trunk/arch/x86/kernel/aperture_64.c
index 6e76c191a835..3d2661ca6542 100644
--- a/trunk/arch/x86/kernel/aperture_64.c
+++ b/trunk/arch/x86/kernel/aperture_64.c
@@ -88,13 +88,13 @@ static u32 __init allocate_aperture(void)
*/
addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
aper_size, aper_size);
- if (!addr || addr + aper_size > GART_MAX_ADDR) {
+ if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) {
printk(KERN_ERR
"Cannot allocate aperture memory hole (%lx,%uK)\n",
addr, aper_size>>10);
return 0;
}
- memblock_reserve(addr, aper_size);
+ memblock_x86_reserve_range(addr, addr + aper_size, "aperture64");
/*
* Kmemleak should not scan this block as it may not be mapped via the
* kernel direct mapping.
diff --git a/trunk/arch/x86/kernel/apic/Makefile b/trunk/arch/x86/kernel/apic/Makefile
index 0ae0323b1f9c..767fd04f2843 100644
--- a/trunk/arch/x86/kernel/apic/Makefile
+++ b/trunk/arch/x86/kernel/apic/Makefile
@@ -10,7 +10,6 @@ obj-$(CONFIG_SMP) += ipi.o
ifeq ($(CONFIG_X86_64),y)
# APIC probe will depend on the listing order here
-obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o
obj-$(CONFIG_X86_UV) += x2apic_uv_x.o
obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o
obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
diff --git a/trunk/arch/x86/kernel/apic/apic.c b/trunk/arch/x86/kernel/apic/apic.c
index 2eec05b6d1b8..f98d84caf94c 100644
--- a/trunk/arch/x86/kernel/apic/apic.c
+++ b/trunk/arch/x86/kernel/apic/apic.c
@@ -146,26 +146,16 @@ __setup("apicpmtimer", setup_apicpmtimer);
int x2apic_mode;
#ifdef CONFIG_X86_X2APIC
/* x2apic enabled before OS handover */
-int x2apic_preenabled;
-static int x2apic_disabled;
-static int nox2apic;
+static int x2apic_preenabled;
static __init int setup_nox2apic(char *str)
{
if (x2apic_enabled()) {
- int apicid = native_apic_msr_read(APIC_ID);
-
- if (apicid >= 255) {
- pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
- apicid);
- return 0;
- }
-
- pr_warning("x2apic already enabled. will disable it\n");
- } else
- setup_clear_cpu_cap(X86_FEATURE_X2APIC);
-
- nox2apic = 1;
+ pr_warning("Bios already enabled x2apic, "
+ "can't enforce nox2apic");
+ return 0;
+ }
+ setup_clear_cpu_cap(X86_FEATURE_X2APIC);
return 0;
}
early_param("nox2apic", setup_nox2apic);
@@ -260,7 +250,6 @@ u32 native_safe_apic_wait_icr_idle(void)
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
if (!send_status)
break;
- inc_irq_stat(icr_read_retry_count);
udelay(100);
} while (timeout++ < 1000);
@@ -887,8 +876,8 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
* Besides, if we don't timer interrupts ignore the global
* interrupt lock, which is the WrongThing (tm) to do.
*/
- irq_enter();
exit_idle();
+ irq_enter();
local_apic_timer_interrupt();
irq_exit();
@@ -1442,45 +1431,6 @@ void __init bsp_end_local_APIC_setup(void)
}
#ifdef CONFIG_X86_X2APIC
-/*
- * Need to disable xapic and x2apic at the same time and then enable xapic mode
- */
-static inline void __disable_x2apic(u64 msr)
-{
- wrmsrl(MSR_IA32_APICBASE,
- msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
- wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
-}
-
-static __init void disable_x2apic(void)
-{
- u64 msr;
-
- if (!cpu_has_x2apic)
- return;
-
- rdmsrl(MSR_IA32_APICBASE, msr);
- if (msr & X2APIC_ENABLE) {
- u32 x2apic_id = read_apic_id();
-
- if (x2apic_id >= 255)
- panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
-
- pr_info("Disabling x2apic\n");
- __disable_x2apic(msr);
-
- if (nox2apic) {
- clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC);
- setup_clear_cpu_cap(X86_FEATURE_X2APIC);
- }
-
- x2apic_disabled = 1;
- x2apic_mode = 0;
-
- register_lapic_address(mp_lapic_addr);
- }
-}
-
void check_x2apic(void)
{
if (x2apic_enabled()) {
@@ -1491,20 +1441,15 @@ void check_x2apic(void)
void enable_x2apic(void)
{
- u64 msr;
-
- rdmsrl(MSR_IA32_APICBASE, msr);
- if (x2apic_disabled) {
- __disable_x2apic(msr);
- return;
- }
+ int msr, msr2;
if (!x2apic_mode)
return;
+ rdmsr(MSR_IA32_APICBASE, msr, msr2);
if (!(msr & X2APIC_ENABLE)) {
printk_once(KERN_INFO "Enabling x2apic\n");
- wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
+ wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, msr2);
}
}
#endif /* CONFIG_X86_X2APIC */
@@ -1541,34 +1486,25 @@ void __init enable_IR_x2apic(void)
ret = save_ioapic_entries();
if (ret) {
pr_info("Saving IO-APIC state failed: %d\n", ret);
- return;
+ goto out;
}
local_irq_save(flags);
legacy_pic->mask_all();
mask_ioapic_entries();
- if (x2apic_preenabled && nox2apic)
- disable_x2apic();
-
if (dmar_table_init_ret)
ret = -1;
else
ret = enable_IR();
- if (!x2apic_supported())
- goto skip_x2apic;
-
if (ret < 0) {
/* IR is required if there is APIC ID > 255 even when running
* under KVM
*/
if (max_physical_apicid > 255 ||
- !hypervisor_x2apic_available()) {
- if (x2apic_preenabled)
- disable_x2apic();
- goto skip_x2apic;
- }
+ !hypervisor_x2apic_available())
+ goto nox2apic;
/*
* without IR all CPUs can be addressed by IOAPIC/MSI
* only in physical mode
@@ -1576,10 +1512,8 @@ void __init enable_IR_x2apic(void)
x2apic_force_phys();
}
- if (ret == IRQ_REMAP_XAPIC_MODE) {
- pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
- goto skip_x2apic;
- }
+ if (ret == IRQ_REMAP_XAPIC_MODE)
+ goto nox2apic;
x2apic_enabled = 1;
@@ -1589,11 +1523,22 @@ void __init enable_IR_x2apic(void)
pr_info("Enabled x2apic\n");
}
-skip_x2apic:
+nox2apic:
if (ret < 0) /* IR enabling failed */
restore_ioapic_entries();
legacy_pic->restore_mask();
local_irq_restore(flags);
+
+out:
+ if (x2apic_enabled || !x2apic_supported())
+ return;
+
+ if (x2apic_preenabled)
+ panic("x2apic: enabled by BIOS but kernel init failed.");
+ else if (ret == IRQ_REMAP_XAPIC_MODE)
+ pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
+ else if (ret < 0)
+ pr_info("x2apic not enabled, IRQ remapping init failed\n");
}
#ifdef CONFIG_X86_64
@@ -1864,8 +1809,8 @@ void smp_spurious_interrupt(struct pt_regs *regs)
{
u32 v;
- irq_enter();
exit_idle();
+ irq_enter();
/*
* Check if this really is a spurious interrupt and ACK it
* if it is a vectored one. Just in case...
@@ -1901,8 +1846,8 @@ void smp_error_interrupt(struct pt_regs *regs)
"Illegal register address", /* APIC Error Bit 7 */
};
- irq_enter();
exit_idle();
+ irq_enter();
/* First tickle the hardware, only then report what went on. -- REW */
v0 = apic_read(APIC_ESR);
apic_write(APIC_ESR, 0);
diff --git a/trunk/arch/x86/kernel/apic/apic_flat_64.c b/trunk/arch/x86/kernel/apic/apic_flat_64.c
index 8c3cdded6f2b..f7a41e4cae47 100644
--- a/trunk/arch/x86/kernel/apic/apic_flat_64.c
+++ b/trunk/arch/x86/kernel/apic/apic_flat_64.c
@@ -62,7 +62,7 @@ static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
* an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
* document number 292116). So here it goes...
*/
-void flat_init_apic_ldr(void)
+static void flat_init_apic_ldr(void)
{
unsigned long val;
unsigned long num, id;
@@ -171,14 +171,9 @@ static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
return initial_apic_id >> index_msb;
}
-static int flat_probe(void)
-{
- return 1;
-}
-
static struct apic apic_flat = {
.name = "flat",
- .probe = flat_probe,
+ .probe = NULL,
.acpi_madt_oem_check = flat_acpi_madt_oem_check,
.apic_id_registered = flat_apic_id_registered,
diff --git a/trunk/arch/x86/kernel/apic/apic_numachip.c b/trunk/arch/x86/kernel/apic/apic_numachip.c
deleted file mode 100644
index 09d3d8c1cd99..000000000000
--- a/trunk/arch/x86/kernel/apic/apic_numachip.c
+++ /dev/null
@@ -1,294 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Numascale NumaConnect-Specific APIC Code
- *
- * Copyright (C) 2011 Numascale AS. All rights reserved.
- *
- * Send feedback to
- *
- */
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
-#include
-#include
-#include
-
-static int numachip_system __read_mostly;
-
-static struct apic apic_numachip __read_mostly;
-
-static unsigned int get_apic_id(unsigned long x)
-{
- unsigned long value;
- unsigned int id;
-
- rdmsrl(MSR_FAM10H_NODE_ID, value);
- id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U);
-
- return id;
-}
-
-static unsigned long set_apic_id(unsigned int id)
-{
- unsigned long x;
-
- x = ((id & 0xffU) << 24);
- return x;
-}
-
-static unsigned int read_xapic_id(void)
-{
- return get_apic_id(apic_read(APIC_ID));
-}
-
-static int numachip_apic_id_registered(void)
-{
- return physid_isset(read_xapic_id(), phys_cpu_present_map);
-}
-
-static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
-{
- return initial_apic_id >> index_msb;
-}
-
-static const struct cpumask *numachip_target_cpus(void)
-{
- return cpu_online_mask;
-}
-
-static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- cpumask_clear(retmask);
- cpumask_set_cpu(cpu, retmask);
-}
-
-static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
-{
- union numachip_csr_g3_ext_irq_gen int_gen;
-
- int_gen.s._destination_apic_id = phys_apicid;
- int_gen.s._vector = 0;
- int_gen.s._msgtype = APIC_DM_INIT >> 8;
- int_gen.s._index = 0;
-
- write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
-
- int_gen.s._msgtype = APIC_DM_STARTUP >> 8;
- int_gen.s._vector = start_rip >> 12;
-
- write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
-
- atomic_set(&init_deasserted, 1);
- return 0;
-}
-
-static void numachip_send_IPI_one(int cpu, int vector)
-{
- union numachip_csr_g3_ext_irq_gen int_gen;
- int apicid = per_cpu(x86_cpu_to_apicid, cpu);
-
- int_gen.s._destination_apic_id = apicid;
- int_gen.s._vector = vector;
- int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8;
- int_gen.s._index = 0;
-
- write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
-}
-
-static void numachip_send_IPI_mask(const struct cpumask *mask, int vector)
-{
- unsigned int cpu;
-
- for_each_cpu(cpu, mask)
- numachip_send_IPI_one(cpu, vector);
-}
-
-static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask,
- int vector)
-{
- unsigned int this_cpu = smp_processor_id();
- unsigned int cpu;
-
- for_each_cpu(cpu, mask) {
- if (cpu != this_cpu)
- numachip_send_IPI_one(cpu, vector);
- }
-}
-
-static void numachip_send_IPI_allbutself(int vector)
-{
- unsigned int this_cpu = smp_processor_id();
- unsigned int cpu;
-
- for_each_online_cpu(cpu) {
- if (cpu != this_cpu)
- numachip_send_IPI_one(cpu, vector);
- }
-}
-
-static void numachip_send_IPI_all(int vector)
-{
- numachip_send_IPI_mask(cpu_online_mask, vector);
-}
-
-static void numachip_send_IPI_self(int vector)
-{
- __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
-}
-
-static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- cpu = cpumask_first(cpumask);
- if (likely((unsigned)cpu < nr_cpu_ids))
- return per_cpu(x86_cpu_to_apicid, cpu);
-
- return BAD_APICID;
-}
-
-static unsigned int
-numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- for_each_cpu_and(cpu, cpumask, andmask) {
- if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
- }
- return per_cpu(x86_cpu_to_apicid, cpu);
-}
-
-static int __init numachip_probe(void)
-{
- return apic == &apic_numachip;
-}
-
-static void __init map_csrs(void)
-{
- printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n",
- NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1);
- init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
-
- printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n",
- NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1);
- init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE);
-}
-
-static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
-{
- c->phys_proc_id = node;
- per_cpu(cpu_llc_id, smp_processor_id()) = node;
-}
-
-static int __init numachip_system_init(void)
-{
- unsigned int val;
-
- if (!numachip_system)
- return 0;
-
- x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
-
- map_csrs();
-
- val = read_lcsr(CSR_G0_NODE_IDS);
- printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val);
-
- return 0;
-}
-early_initcall(numachip_system_init);
-
-static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- if (!strncmp(oem_id, "NUMASC", 6)) {
- numachip_system = 1;
- return 1;
- }
-
- return 0;
-}
-
-static struct apic apic_numachip __refconst = {
-
- .name = "NumaConnect system",
- .probe = numachip_probe,
- .acpi_madt_oem_check = numachip_acpi_madt_oem_check,
- .apic_id_registered = numachip_apic_id_registered,
-
- .irq_delivery_mode = dest_Fixed,
- .irq_dest_mode = 0, /* physical */
-
- .target_cpus = numachip_target_cpus,
- .disable_esr = 0,
- .dest_logical = 0,
- .check_apicid_used = NULL,
- .check_apicid_present = NULL,
-
- .vector_allocation_domain = numachip_vector_allocation_domain,
- .init_apic_ldr = flat_init_apic_ldr,
-
- .ioapic_phys_id_map = NULL,
- .setup_apic_routing = NULL,
- .multi_timer_check = NULL,
- .cpu_present_to_apicid = default_cpu_present_to_apicid,
- .apicid_to_cpu_present = NULL,
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
- .phys_pkg_id = numachip_phys_pkg_id,
- .mps_oem_check = NULL,
-
- .get_apic_id = get_apic_id,
- .set_apic_id = set_apic_id,
- .apic_id_mask = 0xffU << 24,
-
- .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid,
- .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and,
-
- .send_IPI_mask = numachip_send_IPI_mask,
- .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself,
- .send_IPI_allbutself = numachip_send_IPI_allbutself,
- .send_IPI_all = numachip_send_IPI_all,
- .send_IPI_self = numachip_send_IPI_self,
-
- .wakeup_secondary_cpu = numachip_wakeup_secondary,
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
- .wait_for_init_deassert = NULL,
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = NULL, /* REMRD not supported */
-
- .read = native_apic_mem_read,
- .write = native_apic_mem_write,
- .icr_read = native_apic_icr_read,
- .icr_write = native_apic_icr_write,
- .wait_icr_idle = native_apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
-};
-apic_driver(apic_numachip);
-
diff --git a/trunk/arch/x86/kernel/apic/io_apic.c b/trunk/arch/x86/kernel/apic/io_apic.c
index fb072754bc1d..6d939d7847e2 100644
--- a/trunk/arch/x86/kernel/apic/io_apic.c
+++ b/trunk/arch/x86/kernel/apic/io_apic.c
@@ -2421,8 +2421,8 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
unsigned vector, me;
ack_APIC_irq();
- irq_enter();
exit_idle();
+ irq_enter();
me = smp_processor_id();
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
@@ -2948,10 +2948,6 @@ static inline void __init check_timer(void)
}
local_irq_disable();
apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
- if (x2apic_preenabled)
- apic_printk(APIC_QUIET, KERN_INFO
- "Perhaps problem with the pre-enabled x2apic mode\n"
- "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
"report. Then try booting with the 'noapic' option.\n");
out:
diff --git a/trunk/arch/x86/kernel/check.c b/trunk/arch/x86/kernel/check.c
index 5da1269e8ddc..452932d34730 100644
--- a/trunk/arch/x86/kernel/check.c
+++ b/trunk/arch/x86/kernel/check.c
@@ -62,8 +62,7 @@ early_param("memory_corruption_check_size", set_corruption_check_size);
void __init setup_bios_corruption_check(void)
{
- phys_addr_t start, end;
- u64 i;
+ u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */
if (memory_corruption_check == -1) {
memory_corruption_check =
@@ -83,23 +82,28 @@ void __init setup_bios_corruption_check(void)
corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
- for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) {
- start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE),
- PAGE_SIZE, corruption_check_size);
- end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE),
- PAGE_SIZE, corruption_check_size);
- if (start >= end)
- continue;
+ while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
+ u64 size;
+ addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE);
- memblock_reserve(start, end - start);
- scan_areas[num_scan_areas].addr = start;
- scan_areas[num_scan_areas].size = end - start;
+ if (addr == MEMBLOCK_ERROR)
+ break;
+
+ if (addr >= corruption_check_size)
+ break;
+
+ if ((addr + size) > corruption_check_size)
+ size = corruption_check_size - addr;
+
+ memblock_x86_reserve_range(addr, addr + size, "SCAN RAM");
+ scan_areas[num_scan_areas].addr = addr;
+ scan_areas[num_scan_areas].size = size;
+ num_scan_areas++;
/* Assume we've already mapped this early memory */
- memset(__va(start), 0, end - start);
+ memset(__va(addr), 0, size);
- if (++num_scan_areas >= MAX_SCAN_AREAS)
- break;
+ addr += size;
}
if (num_scan_areas)
diff --git a/trunk/arch/x86/kernel/cpu/amd.c b/trunk/arch/x86/kernel/cpu/amd.c
index f4773f4aae35..0bab2b18bb20 100644
--- a/trunk/arch/x86/kernel/cpu/amd.c
+++ b/trunk/arch/x86/kernel/cpu/amd.c
@@ -148,6 +148,7 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
{
+#ifdef CONFIG_SMP
/* calling is from identify_secondary_cpu() ? */
if (!c->cpu_index)
return;
@@ -191,6 +192,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
valid_k7:
;
+#endif
}
static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
@@ -351,13 +353,6 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
if (node == NUMA_NO_NODE)
node = per_cpu(cpu_llc_id, cpu);
- /*
- * If core numbers are inconsistent, it's likely a multi-fabric platform,
- * so invoke platform-specific handler
- */
- if (c->phys_proc_id != node)
- x86_cpuinit.fixup_cpu_id(c, node);
-
if (!node_online(node)) {
/*
* Two possibilities here:
diff --git a/trunk/arch/x86/kernel/cpu/centaur.c b/trunk/arch/x86/kernel/cpu/centaur.c
index 159103c0b1f4..e58d978e0758 100644
--- a/trunk/arch/x86/kernel/cpu/centaur.c
+++ b/trunk/arch/x86/kernel/cpu/centaur.c
@@ -278,7 +278,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
}
#ifdef CONFIG_X86_32
/* Cyrix III family needs CX8 & PGE explicitly enabled. */
- if (c->x86_model >= 6 && c->x86_model <= 13) {
+ if (c->x86_model >= 6 && c->x86_model <= 9) {
rdmsr(MSR_VIA_FCR, lo, hi);
lo |= (1<<1 | 1<<7);
wrmsr(MSR_VIA_FCR, lo, hi);
diff --git a/trunk/arch/x86/kernel/cpu/common.c b/trunk/arch/x86/kernel/cpu/common.c
index 850f2963a420..aa003b13a831 100644
--- a/trunk/arch/x86/kernel/cpu/common.c
+++ b/trunk/arch/x86/kernel/cpu/common.c
@@ -676,7 +676,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
+#ifdef CONFIG_SMP
c->cpu_index = 0;
+#endif
filter_cpuid_features(c, false);
setup_smep(c);
@@ -762,7 +764,10 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
c->apicid = c->initial_apicid;
# endif
#endif
+
+#ifdef CONFIG_X86_HT
c->phys_proc_id = c->initial_apicid;
+#endif
}
setup_smep(c);
@@ -1135,15 +1140,6 @@ static void dbg_restore_debug_regs(void)
#define dbg_restore_debug_regs()
#endif /* ! CONFIG_KGDB */
-/*
- * Prints an error where the NUMA and configured core-number mismatch and the
- * platform didn't override this to fix it up
- */
-void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node)
-{
- pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id);
-}
-
/*
* cpu_init() initializes state that is per-CPU. Some data is already
* initialized (naturally) in the bootstrap process, such as the GDT
diff --git a/trunk/arch/x86/kernel/cpu/cpu.h b/trunk/arch/x86/kernel/cpu/cpu.h
index 8bacc7826fb3..1b22dcc51af4 100644
--- a/trunk/arch/x86/kernel/cpu/cpu.h
+++ b/trunk/arch/x86/kernel/cpu/cpu.h
@@ -1,4 +1,5 @@
#ifndef ARCH_X86_CPU_H
+
#define ARCH_X86_CPU_H
struct cpu_model_info {
@@ -34,4 +35,6 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
-#endif /* ARCH_X86_CPU_H */
+extern void get_cpu_cap(struct cpuinfo_x86 *c);
+
+#endif
diff --git a/trunk/arch/x86/kernel/cpu/intel.c b/trunk/arch/x86/kernel/cpu/intel.c
index 3e6ff6cbf42a..523131213f08 100644
--- a/trunk/arch/x86/kernel/cpu/intel.c
+++ b/trunk/arch/x86/kernel/cpu/intel.c
@@ -181,6 +181,7 @@ static void __cpuinit trap_init_f00f_bug(void)
static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
{
+#ifdef CONFIG_SMP
/* calling is from identify_secondary_cpu() ? */
if (!c->cpu_index)
return;
@@ -197,6 +198,7 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
"with B stepping processors.\n");
}
+#endif
}
static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c b/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c
index fc4beb393577..319882ef848d 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -17,7 +17,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -93,18 +92,6 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
return NMI_HANDLED;
}
-static void mce_irq_ipi(void *info)
-{
- int cpu = smp_processor_id();
- struct mce *m = &__get_cpu_var(injectm);
-
- if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
- m->inject_flags & MCJ_EXCEPTION) {
- cpumask_clear_cpu(cpu, mce_inject_cpumask);
- raise_exception(m, NULL);
- }
-}
-
/* Inject mce on current CPU */
static int raise_local(void)
{
@@ -152,10 +139,9 @@ static void raise_mce(struct mce *m)
return;
#ifdef CONFIG_X86_LOCAL_APIC
- if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) {
+ if (m->inject_flags & MCJ_NMI_BROADCAST) {
unsigned long start;
int cpu;
-
get_online_cpus();
cpumask_copy(mce_inject_cpumask, cpu_online_mask);
cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
@@ -165,25 +151,13 @@ static void raise_mce(struct mce *m)
MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
cpumask_clear_cpu(cpu, mce_inject_cpumask);
}
- if (!cpumask_empty(mce_inject_cpumask)) {
- if (m->inject_flags & MCJ_IRQ_BRAODCAST) {
- /*
- * don't wait because mce_irq_ipi is necessary
- * to be sync with following raise_local
- */
- preempt_disable();
- smp_call_function_many(mce_inject_cpumask,
- mce_irq_ipi, NULL, 0);
- preempt_enable();
- } else if (m->inject_flags & MCJ_NMI_BROADCAST)
- apic->send_IPI_mask(mce_inject_cpumask,
- NMI_VECTOR);
- }
+ if (!cpumask_empty(mce_inject_cpumask))
+ apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR);
start = jiffies;
while (!cpumask_empty(mce_inject_cpumask)) {
if (!time_before(jiffies, start + 2*HZ)) {
printk(KERN_ERR
- "Timeout waiting for mce inject %lx\n",
+ "Timeout waiting for mce inject NMI %lx\n",
*cpumask_bits(mce_inject_cpumask));
break;
}
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce.c b/trunk/arch/x86/kernel/cpu/mcheck/mce.c
index cbe82b5918ce..2af127d4c3d1 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/mce.c
@@ -95,6 +95,13 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
+/*
+ * CPU/chipset specific EDAC code can register a notifier call here to print
+ * MCE errors in a human-readable form.
+ */
+ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
+EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
+
/* MCA banks polled by the period polling timer for corrected events */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
@@ -102,12 +109,6 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
static DEFINE_PER_CPU(struct work_struct, mce_work);
-/*
- * CPU/chipset specific EDAC code can register a notifier call here to print
- * MCE errors in a human-readable form.
- */
-ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
-
/* Do initial initialization of a struct mce */
void mce_setup(struct mce *m)
{
@@ -118,7 +119,9 @@ void mce_setup(struct mce *m)
m->time = get_seconds();
m->cpuvendor = boot_cpu_data.x86_vendor;
m->cpuid = cpuid_eax(1);
+#ifdef CONFIG_SMP
m->socketid = cpu_data(m->extcpu).phys_proc_id;
+#endif
m->apicid = cpu_data(m->extcpu).initial_apicid;
rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
}
@@ -187,57 +190,6 @@ void mce_log(struct mce *mce)
set_bit(0, &mce_need_notify);
}
-static void drain_mcelog_buffer(void)
-{
- unsigned int next, i, prev = 0;
-
- next = rcu_dereference_check_mce(mcelog.next);
-
- do {
- struct mce *m;
-
- /* drain what was logged during boot */
- for (i = prev; i < next; i++) {
- unsigned long start = jiffies;
- unsigned retries = 1;
-
- m = &mcelog.entry[i];
-
- while (!m->finished) {
- if (time_after_eq(jiffies, start + 2*retries))
- retries++;
-
- cpu_relax();
-
- if (!m->finished && retries >= 4) {
- pr_err("MCE: skipping error being logged currently!\n");
- break;
- }
- }
- smp_rmb();
- atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
- }
-
- memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m));
- prev = next;
- next = cmpxchg(&mcelog.next, prev, 0);
- } while (next != prev);
-}
-
-
-void mce_register_decode_chain(struct notifier_block *nb)
-{
- atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
- drain_mcelog_buffer();
-}
-EXPORT_SYMBOL_GPL(mce_register_decode_chain);
-
-void mce_unregister_decode_chain(struct notifier_block *nb)
-{
- atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
-}
-EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
-
static void print_mce(struct mce *m)
{
int ret = 0;
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 1d76872b6a45..f5474218cffe 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -64,9 +64,11 @@ struct threshold_bank {
};
static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
+#ifdef CONFIG_SMP
static unsigned char shared_bank[NR_BANKS] = {
0, 0, 0, 0, 1
};
+#endif
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
@@ -200,9 +202,10 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
if (!block)
per_cpu(bank_map, cpu) |= (1 << bank);
+#ifdef CONFIG_SMP
if (shared_bank[bank] && c->cpu_core_id)
break;
-
+#endif
offset = setup_APIC_mce(offset,
(high & MASK_LVTOFF_HI) >> 20);
@@ -528,6 +531,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
sprintf(name, "threshold_bank%i", bank);
+#ifdef CONFIG_SMP
if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
i = cpumask_first(cpu_llc_shared_mask(cpu));
@@ -554,6 +558,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
goto out;
}
+#endif
b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
if (!b) {
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c b/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 39c6089891e4..787e06c84ea6 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -323,6 +323,17 @@ device_initcall(thermal_throttle_init_device);
#endif /* CONFIG_SYSFS */
+/*
+ * Set up the most two significant bit to notify mce log that this thermal
+ * event type.
+ * This is a temp solution. May be changed in the future with mce log
+ * infrasture.
+ */
+#define CORE_THROTTLED (0)
+#define CORE_POWER_LIMIT ((__u64)1 << 62)
+#define PACKAGE_THROTTLED ((__u64)2 << 62)
+#define PACKAGE_POWER_LIMIT ((__u64)3 << 62)
+
static void notify_thresholds(__u64 msr_val)
{
/* check whether the interrupt handler is defined;
@@ -352,23 +363,27 @@ static void intel_thermal_interrupt(void)
if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
THERMAL_THROTTLING_EVENT,
CORE_LEVEL) != 0)
- mce_log_therm_throt_event(msr_val);
+ mce_log_therm_throt_event(CORE_THROTTLED | msr_val);
if (this_cpu_has(X86_FEATURE_PLN))
- therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
+ if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
POWER_LIMIT_EVENT,
- CORE_LEVEL);
+ CORE_LEVEL) != 0)
+ mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
if (this_cpu_has(X86_FEATURE_PTS)) {
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
- therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
+ if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
THERMAL_THROTTLING_EVENT,
- PACKAGE_LEVEL);
+ PACKAGE_LEVEL) != 0)
+ mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
if (this_cpu_has(X86_FEATURE_PLN))
- therm_throt_process(msr_val &
+ if (therm_throt_process(msr_val &
PACKAGE_THERM_STATUS_POWER_LIMIT,
POWER_LIMIT_EVENT,
- PACKAGE_LEVEL);
+ PACKAGE_LEVEL) != 0)
+ mce_log_therm_throt_event(PACKAGE_POWER_LIMIT
+ | msr_val);
}
}
@@ -382,8 +397,8 @@ static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
{
- irq_enter();
exit_idle();
+ irq_enter();
inc_irq_stat(irq_thermal_count);
smp_thermal_vector();
irq_exit();
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/threshold.c b/trunk/arch/x86/kernel/cpu/mcheck/threshold.c
index aa578cadb940..d746df2909c9 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -19,8 +19,8 @@ void (*mce_threshold_vector)(void) = default_threshold_interrupt;
asmlinkage void smp_threshold_interrupt(void)
{
- irq_enter();
exit_idle();
+ irq_enter();
inc_irq_stat(irq_threshold_count);
mce_threshold_vector();
irq_exit();
diff --git a/trunk/arch/x86/kernel/cpu/perf_event.c b/trunk/arch/x86/kernel/cpu/perf_event.c
index 5adce1040b11..2bda212a0010 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event.c
@@ -484,195 +484,18 @@ static inline int is_x86_event(struct perf_event *event)
return event->pmu == &pmu;
}
-/*
- * Event scheduler state:
- *
- * Assign events iterating over all events and counters, beginning
- * with events with least weights first. Keep the current iterator
- * state in struct sched_state.
- */
-struct sched_state {
- int weight;
- int event; /* event index */
- int counter; /* counter index */
- int unassigned; /* number of events to be assigned left */
- unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-};
-
-/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
-#define SCHED_STATES_MAX 2
-
-struct perf_sched {
- int max_weight;
- int max_events;
- struct event_constraint **constraints;
- struct sched_state state;
- int saved_states;
- struct sched_state saved[SCHED_STATES_MAX];
-};
-
-/*
- * Initialize interator that runs through all events and counters.
- */
-static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
- int num, int wmin, int wmax)
-{
- int idx;
-
- memset(sched, 0, sizeof(*sched));
- sched->max_events = num;
- sched->max_weight = wmax;
- sched->constraints = c;
-
- for (idx = 0; idx < num; idx++) {
- if (c[idx]->weight == wmin)
- break;
- }
-
- sched->state.event = idx; /* start with min weight */
- sched->state.weight = wmin;
- sched->state.unassigned = num;
-}
-
-static void perf_sched_save_state(struct perf_sched *sched)
-{
- if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
- return;
-
- sched->saved[sched->saved_states] = sched->state;
- sched->saved_states++;
-}
-
-static bool perf_sched_restore_state(struct perf_sched *sched)
-{
- if (!sched->saved_states)
- return false;
-
- sched->saved_states--;
- sched->state = sched->saved[sched->saved_states];
-
- /* continue with next counter: */
- clear_bit(sched->state.counter++, sched->state.used);
-
- return true;
-}
-
-/*
- * Select a counter for the current event to schedule. Return true on
- * success.
- */
-static bool __perf_sched_find_counter(struct perf_sched *sched)
-{
- struct event_constraint *c;
- int idx;
-
- if (!sched->state.unassigned)
- return false;
-
- if (sched->state.event >= sched->max_events)
- return false;
-
- c = sched->constraints[sched->state.event];
-
- /* Prefer fixed purpose counters */
- if (x86_pmu.num_counters_fixed) {
- idx = X86_PMC_IDX_FIXED;
- for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
- if (!__test_and_set_bit(idx, sched->state.used))
- goto done;
- }
- }
- /* Grab the first unused counter starting with idx */
- idx = sched->state.counter;
- for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
- if (!__test_and_set_bit(idx, sched->state.used))
- goto done;
- }
-
- return false;
-
-done:
- sched->state.counter = idx;
-
- if (c->overlap)
- perf_sched_save_state(sched);
-
- return true;
-}
-
-static bool perf_sched_find_counter(struct perf_sched *sched)
-{
- while (!__perf_sched_find_counter(sched)) {
- if (!perf_sched_restore_state(sched))
- return false;
- }
-
- return true;
-}
-
-/*
- * Go through all unassigned events and find the next one to schedule.
- * Take events with the least weight first. Return true on success.
- */
-static bool perf_sched_next_event(struct perf_sched *sched)
-{
- struct event_constraint *c;
-
- if (!sched->state.unassigned || !--sched->state.unassigned)
- return false;
-
- do {
- /* next event */
- sched->state.event++;
- if (sched->state.event >= sched->max_events) {
- /* next weight */
- sched->state.event = 0;
- sched->state.weight++;
- if (sched->state.weight > sched->max_weight)
- return false;
- }
- c = sched->constraints[sched->state.event];
- } while (c->weight != sched->state.weight);
-
- sched->state.counter = 0; /* start with first counter */
-
- return true;
-}
-
-/*
- * Assign a counter for each event.
- */
-static int perf_assign_events(struct event_constraint **constraints, int n,
- int wmin, int wmax, int *assign)
-{
- struct perf_sched sched;
-
- perf_sched_init(&sched, constraints, n, wmin, wmax);
-
- do {
- if (!perf_sched_find_counter(&sched))
- break; /* failed */
- if (assign)
- assign[sched.state.event] = sched.state.counter;
- } while (perf_sched_next_event(&sched));
-
- return sched.state.unassigned;
-}
-
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- int i, wmin, wmax, num = 0;
+ int i, j, w, wmax, num = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
- for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+ for (i = 0; i < n; i++) {
c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
constraints[i] = c;
- wmin = min(wmin, c->weight);
- wmax = max(wmax, c->weight);
}
/*
@@ -698,11 +521,59 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (assign)
assign[i] = hwc->idx;
}
+ if (i == n)
+ goto done;
+
+ /*
+ * begin slow path
+ */
+
+ bitmap_zero(used_mask, X86_PMC_IDX_MAX);
- /* slow path */
- if (i != n)
- num = perf_assign_events(constraints, n, wmin, wmax, assign);
+ /*
+ * weight = number of possible counters
+ *
+ * 1 = most constrained, only works on one counter
+ * wmax = least constrained, works on any counter
+ *
+ * assign events to counters starting with most
+ * constrained events.
+ */
+ wmax = x86_pmu.num_counters;
+ /*
+ * when fixed event counters are present,
+ * wmax is incremented by 1 to account
+ * for one more choice
+ */
+ if (x86_pmu.num_counters_fixed)
+ wmax++;
+
+ for (w = 1, num = n; num && w <= wmax; w++) {
+ /* for each event */
+ for (i = 0; num && i < n; i++) {
+ c = constraints[i];
+ hwc = &cpuc->event_list[i]->hw;
+
+ if (c->weight != w)
+ continue;
+
+ for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
+ if (!test_bit(j, used_mask))
+ break;
+ }
+
+ if (j == X86_PMC_IDX_MAX)
+ break;
+
+ __set_bit(j, used_mask);
+
+ if (assign)
+ assign[i] = j;
+ num--;
+ }
+ }
+done:
/*
* scheduling failed or is just a simulation,
* free resources if necessary
@@ -1248,7 +1119,6 @@ static void __init pmu_check_apic(void)
static int __init init_hw_perf_events(void)
{
- struct x86_pmu_quirk *quirk;
struct event_constraint *c;
int err;
@@ -1277,8 +1147,8 @@ static int __init init_hw_perf_events(void)
pr_cont("%s PMU driver.\n", x86_pmu.name);
- for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
- quirk->func();
+ if (x86_pmu.quirks)
+ x86_pmu.quirks();
if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
@@ -1301,18 +1171,12 @@ static int __init init_hw_perf_events(void)
unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
- 0, x86_pmu.num_counters, 0);
+ 0, x86_pmu.num_counters);
if (x86_pmu.event_constraints) {
- /*
- * event on fixed counter2 (REF_CYCLES) only works on this
- * counter, so do not extend mask to generic counters
- */
for_each_event_constraint(c, x86_pmu.event_constraints) {
- if (c->cmask != X86_RAW_EVENT_MASK
- || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) {
+ if (c->cmask != X86_RAW_EVENT_MASK)
continue;
- }
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
c->weight += x86_pmu.num_counters;
@@ -1702,15 +1566,3 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
return misc;
}
-
-void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
-{
- cap->version = x86_pmu.version;
- cap->num_counters_gp = x86_pmu.num_counters;
- cap->num_counters_fixed = x86_pmu.num_counters_fixed;
- cap->bit_width_gp = x86_pmu.cntval_bits;
- cap->bit_width_fixed = x86_pmu.cntval_bits;
- cap->events_mask = (unsigned int)x86_pmu.events_maskl;
- cap->events_mask_len = x86_pmu.events_mask_len;
-}
-EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
diff --git a/trunk/arch/x86/kernel/cpu/perf_event.h b/trunk/arch/x86/kernel/cpu/perf_event.h
index 8944062f46e2..b9698d40ac4b 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event.h
+++ b/trunk/arch/x86/kernel/cpu/perf_event.h
@@ -45,7 +45,6 @@ struct event_constraint {
u64 code;
u64 cmask;
int weight;
- int overlap;
};
struct amd_nb {
@@ -152,40 +151,15 @@ struct cpu_hw_events {
void *kfree_on_online;
};
-#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
+#define __EVENT_CONSTRAINT(c, n, m, w) {\
{ .idxmsk64 = (n) }, \
.code = (c), \
.cmask = (m), \
.weight = (w), \
- .overlap = (o), \
}
#define EVENT_CONSTRAINT(c, n, m) \
- __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
-
-/*
- * The overlap flag marks event constraints with overlapping counter
- * masks. This is the case if the counter mask of such an event is not
- * a subset of any other counter mask of a constraint with an equal or
- * higher weight, e.g.:
- *
- * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
- * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
- * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
- *
- * The event scheduler may not select the correct counter in the first
- * cycle because it needs to know which subsequent events will be
- * scheduled. It may fail to schedule the events then. So we set the
- * overlap flag for such constraints to give the scheduler a hint which
- * events to select for counter rescheduling.
- *
- * Care must be taken as the rescheduling algorithm is O(n!) which
- * will increase scheduling cycles for an over-commited system
- * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
- * and its counter masks must be kept at a minimum.
- */
-#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
- __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
+ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
/*
* Constraint on the Event code.
@@ -261,11 +235,6 @@ union perf_capabilities {
u64 capabilities;
};
-struct x86_pmu_quirk {
- struct x86_pmu_quirk *next;
- void (*func)(void);
-};
-
/*
* struct x86_pmu - generic x86 pmu
*/
@@ -290,11 +259,6 @@ struct x86_pmu {
int num_counters_fixed;
int cntval_bits;
u64 cntval_mask;
- union {
- unsigned long events_maskl;
- unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
- };
- int events_mask_len;
int apic;
u64 max_period;
struct event_constraint *
@@ -304,7 +268,7 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event);
struct event_constraint *event_constraints;
- struct x86_pmu_quirk *quirks;
+ void (*quirks)(void);
int perfctr_second_write;
int (*cpu_prepare)(int cpu);
@@ -345,15 +309,6 @@ struct x86_pmu {
struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
};
-#define x86_add_quirk(func_) \
-do { \
- static struct x86_pmu_quirk __quirk __initdata = { \
- .func = func_, \
- }; \
- __quirk.next = x86_pmu.quirks; \
- x86_pmu.quirks = &__quirk; \
-} while (0)
-
#define ERF_NO_HT_SHARING 1
#define ERF_HAS_RSP_1 2
diff --git a/trunk/arch/x86/kernel/cpu/perf_event_amd.c b/trunk/arch/x86/kernel/cpu/perf_event_amd.c
index 0397b23be8e9..aeefd45697a2 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event_amd.c
@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = {
static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
-static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
+static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
diff --git a/trunk/arch/x86/kernel/cpu/perf_event_intel.c b/trunk/arch/x86/kernel/cpu/perf_event_intel.c
index 3bd37bdf1b8e..121f1be4da19 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event_intel.c
@@ -28,7 +28,6 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
- [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
};
static struct event_constraint intel_core_event_constraints[] __read_mostly =
@@ -46,7 +45,12 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ /*
+ * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
+ * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
+ * ratio between these counters.
+ */
+ /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -64,7 +68,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
@@ -86,7 +90,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
@@ -98,7 +102,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
@@ -121,7 +125,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
EVENT_CONSTRAINT_END
};
@@ -1515,7 +1519,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.guest_get_msrs = intel_guest_get_msrs,
};
-static __init void intel_clovertown_quirk(void)
+static void intel_clovertown_quirks(void)
{
/*
* PEBS is unreliable due to:
@@ -1541,60 +1545,19 @@ static __init void intel_clovertown_quirk(void)
x86_pmu.pebs_constraints = NULL;
}
-static __init void intel_sandybridge_quirk(void)
+static void intel_sandybridge_quirks(void)
{
printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
x86_pmu.pebs = 0;
x86_pmu.pebs_constraints = NULL;
}
-static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
- { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
- { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
- { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
- { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
- { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
- { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
- { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
-};
-
-static __init void intel_arch_events_quirk(void)
-{
- int bit;
-
- /* disable event that reported as not presend by cpuid */
- for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
- intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
- printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n",
- intel_arch_events_map[bit].name);
- }
-}
-
-static __init void intel_nehalem_quirk(void)
-{
- union cpuid10_ebx ebx;
-
- ebx.full = x86_pmu.events_maskl;
- if (ebx.split.no_branch_misses_retired) {
- /*
- * Erratum AAJ80 detected, we work it around by using
- * the BR_MISP_EXEC.ANY event. This will over-count
- * branch-misses, but it's still much better than the
- * architectural event which is often completely bogus:
- */
- intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
- ebx.split.no_branch_misses_retired = 0;
- x86_pmu.events_maskl = ebx.full;
- printk(KERN_INFO "CPU erratum AAJ80 worked around\n");
- }
-}
-
__init int intel_pmu_init(void)
{
union cpuid10_edx edx;
union cpuid10_eax eax;
- union cpuid10_ebx ebx;
unsigned int unused;
+ unsigned int ebx;
int version;
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -1611,8 +1574,8 @@ __init int intel_pmu_init(void)
* Check whether the Architectural PerfMon supports
* Branch Misses Retired hw_event or not.
*/
- cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
- if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
+ cpuid(10, &eax.full, &ebx, &unused, &edx.full);
+ if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
return -ENODEV;
version = eax.split.version_id;
@@ -1626,9 +1589,6 @@ __init int intel_pmu_init(void)
x86_pmu.cntval_bits = eax.split.bit_width;
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
- x86_pmu.events_maskl = ebx.full;
- x86_pmu.events_mask_len = eax.split.mask_length;
-
/*
* Quirk: v2 perfmon does not report fixed-purpose events, so
* assume at least 3 events:
@@ -1648,8 +1608,6 @@ __init int intel_pmu_init(void)
intel_ds_init();
- x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
-
/*
* Install the hw-cache-events table:
*/
@@ -1659,7 +1617,7 @@ __init int intel_pmu_init(void)
break;
case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
- x86_add_quirk(intel_clovertown_quirk);
+ x86_pmu.quirks = intel_clovertown_quirks;
case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
case 29: /* six-core 45 nm xeon "Dunnington" */
@@ -1693,8 +1651,17 @@ __init int intel_pmu_init(void)
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
- x86_add_quirk(intel_nehalem_quirk);
+ if (ebx & 0x40) {
+ /*
+ * Erratum AAJ80 detected, we work it around by using
+ * the BR_MISP_EXEC.ANY event. This will over-count
+ * branch-misses, but it's still much better than the
+ * architectural event which is often completely bogus:
+ */
+ intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+ pr_cont("erratum AAJ80 worked around, ");
+ }
pr_cont("Nehalem events, ");
break;
@@ -1734,7 +1701,7 @@ __init int intel_pmu_init(void)
break;
case 42: /* SandyBridge */
- x86_add_quirk(intel_sandybridge_quirk);
+ x86_pmu.quirks = intel_sandybridge_quirks;
case 45: /* SandyBridge, "Romely-EP" */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -1771,6 +1738,5 @@ __init int intel_pmu_init(void)
break;
}
}
-
return 0;
}
diff --git a/trunk/arch/x86/kernel/cpu/powerflags.c b/trunk/arch/x86/kernel/cpu/powerflags.c
index 7b3fe56b1c21..5abbea297e0c 100644
--- a/trunk/arch/x86/kernel/cpu/powerflags.c
+++ b/trunk/arch/x86/kernel/cpu/powerflags.c
@@ -16,6 +16,5 @@ const char *const x86_power_flags[32] = {
"100mhzsteps",
"hwpstate",
"", /* tsc invariant mapped to constant_tsc */
- "cpb", /* core performance boost */
- "eff_freq_ro", /* Readonly aperf/mperf */
+ /* nothing */
};
diff --git a/trunk/arch/x86/kernel/cpu/proc.c b/trunk/arch/x86/kernel/cpu/proc.c
index 8022c6681485..14b23140e81f 100644
--- a/trunk/arch/x86/kernel/cpu/proc.c
+++ b/trunk/arch/x86/kernel/cpu/proc.c
@@ -64,10 +64,12 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
static int show_cpuinfo(struct seq_file *m, void *v)
{
struct cpuinfo_x86 *c = v;
- unsigned int cpu;
+ unsigned int cpu = 0;
int i;
+#ifdef CONFIG_SMP
cpu = c->cpu_index;
+#endif
seq_printf(m, "processor\t: %u\n"
"vendor_id\t: %s\n"
"cpu family\t: %d\n"
diff --git a/trunk/arch/x86/kernel/e820.c b/trunk/arch/x86/kernel/e820.c
index 8071e2f3d6eb..303a0e48f076 100644
--- a/trunk/arch/x86/kernel/e820.c
+++ b/trunk/arch/x86/kernel/e820.c
@@ -738,17 +738,35 @@ core_initcall(e820_mark_nvs_memory);
/*
* pre allocated 4k and reserved it in memblock and e820_saved
*/
-u64 __init early_reserve_e820(u64 size, u64 align)
+u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
{
+ u64 size = 0;
u64 addr;
+ u64 start;
- addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
- if (addr) {
- e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED);
- printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
- update_e820_saved();
+ for (start = startt; ; start += size) {
+ start = memblock_x86_find_in_range_size(start, &size, align);
+ if (start == MEMBLOCK_ERROR)
+ return 0;
+ if (size >= sizet)
+ break;
}
+#ifdef CONFIG_X86_32
+ if (start >= MAXMEM)
+ return 0;
+ if (start + size > MAXMEM)
+ size = MAXMEM - start;
+#endif
+
+ addr = round_down(start + size - sizet, align);
+ if (addr < start)
+ return 0;
+ memblock_x86_reserve_range(addr, addr + sizet, "new next");
+ e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
+ printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
+ update_e820_saved();
+
return addr;
}
@@ -1072,7 +1090,7 @@ void __init memblock_x86_fill(void)
* We are safe to enable resizing, beause memblock_x86_fill()
* is rather later for x86
*/
- memblock_allow_resize();
+ memblock_can_resize = 1;
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
@@ -1087,36 +1105,22 @@ void __init memblock_x86_fill(void)
memblock_add(ei->addr, ei->size);
}
+ memblock_analyze();
memblock_dump_all();
}
void __init memblock_find_dma_reserve(void)
{
#ifdef CONFIG_X86_64
- u64 nr_pages = 0, nr_free_pages = 0;
- unsigned long start_pfn, end_pfn;
- phys_addr_t start, end;
- int i;
- u64 u;
-
+ u64 free_size_pfn;
+ u64 mem_size_pfn;
/*
* need to find out used area below MAX_DMA_PFN
* need to use memblock to get free size in [0, MAX_DMA_PFN]
* at first, and assume boot_mem will not take below MAX_DMA_PFN
*/
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
- start_pfn = min_t(unsigned long, start_pfn, MAX_DMA_PFN);
- end_pfn = min_t(unsigned long, end_pfn, MAX_DMA_PFN);
- nr_pages += end_pfn - start_pfn;
- }
-
- for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) {
- start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
- end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
- if (start_pfn < end_pfn)
- nr_free_pages += end_pfn - start_pfn;
- }
-
- set_dma_reserve(nr_pages - nr_free_pages);
+ mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
+ free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
+ set_dma_reserve(mem_size_pfn - free_size_pfn);
#endif
}
diff --git a/trunk/arch/x86/kernel/entry_32.S b/trunk/arch/x86/kernel/entry_32.S
index 22d0e21b4dd7..f3f6f5344001 100644
--- a/trunk/arch/x86/kernel/entry_32.S
+++ b/trunk/arch/x86/kernel/entry_32.S
@@ -625,8 +625,6 @@ work_notifysig: # deal with pending signals and
movl %esp, %eax
jne work_notifysig_v86 # returning to kernel-space or
# vm86-space
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
xorl %edx, %edx
call do_notify_resume
jmp resume_userspace_sig
@@ -640,8 +638,6 @@ work_notifysig_v86:
#else
movl %esp, %eax
#endif
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
xorl %edx, %edx
call do_notify_resume
jmp resume_userspace_sig
diff --git a/trunk/arch/x86/kernel/entry_64.S b/trunk/arch/x86/kernel/entry_64.S
index a20e1cb9dc87..faf8d5e74b0b 100644
--- a/trunk/arch/x86/kernel/entry_64.S
+++ b/trunk/arch/x86/kernel/entry_64.S
@@ -221,7 +221,7 @@ ENDPROC(native_usergs_sysret64)
/*CFI_REL_OFFSET ss,0*/
pushq_cfi %rax /* rsp */
CFI_REL_OFFSET rsp,0
- pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
+ pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */
/*CFI_REL_OFFSET rflags,0*/
pushq_cfi $__KERNEL_CS /* cs */
/*CFI_REL_OFFSET cs,0*/
@@ -411,7 +411,7 @@ ENTRY(ret_from_fork)
RESTORE_REST
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
- jz retint_restore_args
+ je int_ret_from_sys_call
testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
jnz int_ret_from_sys_call
@@ -465,7 +465,7 @@ ENTRY(system_call)
* after the swapgs, so that it can do the swapgs
* for the guest and jump here on syscall.
*/
-GLOBAL(system_call_after_swapgs)
+ENTRY(system_call_after_swapgs)
movq %rsp,PER_CPU_VAR(old_rsp)
movq PER_CPU_VAR(kernel_stack),%rsp
@@ -478,7 +478,8 @@ GLOBAL(system_call_after_swapgs)
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
- testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ GET_THREAD_INFO(%rcx)
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
jnz tracesys
system_call_fastpath:
cmpq $__NR_syscall_max,%rax
@@ -495,9 +496,10 @@ ret_from_sys_call:
/* edi: flagmask */
sysret_check:
LOCKDEP_SYS_EXIT
+ GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
+ movl TI_flags(%rcx),%edx
andl %edi,%edx
jnz sysret_careful
CFI_REMEMBER_STATE
@@ -581,7 +583,7 @@ sysret_audit:
/* Do syscall tracing */
tracesys:
#ifdef CONFIG_AUDITSYSCALL
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
jz auditsys
#endif
SAVE_REST
@@ -610,6 +612,8 @@ tracesys:
GLOBAL(int_ret_from_sys_call)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
+ testl $3,CS-ARGOFFSET(%rsp)
+ je retint_restore_args
movl $_TIF_ALLWORK_MASK,%edi
/* edi: mask to check */
GLOBAL(int_with_check)
@@ -949,7 +953,6 @@ END(common_interrupt)
ENTRY(\sym)
INTR_FRAME
pushq_cfi $~(\num)
-.Lcommon_\sym:
interrupt \do_sym
jmp ret_from_intr
CFI_ENDPROC
@@ -973,21 +976,13 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_SMP
- ALIGN
- INTR_FRAME
-.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
+.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
.if NUM_INVALIDATE_TLB_VECTORS > \idx
-ENTRY(invalidate_interrupt\idx)
- pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx)
- jmp .Lcommon_invalidate_interrupt0
- CFI_ADJUST_CFA_OFFSET -8
-END(invalidate_interrupt\idx)
+apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
+ invalidate_interrupt\idx smp_invalidate_interrupt
.endif
.endr
- CFI_ENDPROC
-apicinterrupt INVALIDATE_TLB_VECTOR_START, \
- invalidate_interrupt0, smp_invalidate_interrupt
#endif
apicinterrupt THRESHOLD_APIC_VECTOR \
diff --git a/trunk/arch/x86/kernel/head.c b/trunk/arch/x86/kernel/head.c
index 48d9d4ea1020..af0699ba48cf 100644
--- a/trunk/arch/x86/kernel/head.c
+++ b/trunk/arch/x86/kernel/head.c
@@ -52,5 +52,5 @@ void __init reserve_ebda_region(void)
lowmem = 0x9f000;
/* reserve all memory between lowmem and the 1MB mark */
- memblock_reserve(lowmem, 0x100000 - lowmem);
+ memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved");
}
diff --git a/trunk/arch/x86/kernel/head32.c b/trunk/arch/x86/kernel/head32.c
index 51ff18616d50..3bb08509a7a1 100644
--- a/trunk/arch/x86/kernel/head32.c
+++ b/trunk/arch/x86/kernel/head32.c
@@ -31,8 +31,9 @@ static void __init i386_default_early_setup(void)
void __init i386_start_kernel(void)
{
- memblock_reserve(__pa_symbol(&_text),
- __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
+ memblock_init();
+
+ memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
@@ -41,7 +42,7 @@ void __init i386_start_kernel(void)
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
- memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
+ memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
diff --git a/trunk/arch/x86/kernel/head64.c b/trunk/arch/x86/kernel/head64.c
index 3a3b779f41d3..5655c2272adb 100644
--- a/trunk/arch/x86/kernel/head64.c
+++ b/trunk/arch/x86/kernel/head64.c
@@ -98,8 +98,9 @@ void __init x86_64_start_reservations(char *real_mode_data)
{
copy_bootdata(__va(real_mode_data));
- memblock_reserve(__pa_symbol(&_text),
- __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
+ memblock_init();
+
+ memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
@@ -108,7 +109,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
- memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
+ memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
diff --git a/trunk/arch/x86/kernel/hpet.c b/trunk/arch/x86/kernel/hpet.c
index 07b0a56a754d..1bb0bf4d92cd 100644
--- a/trunk/arch/x86/kernel/hpet.c
+++ b/trunk/arch/x86/kernel/hpet.c
@@ -32,6 +32,8 @@
#define HPET_MIN_CYCLES 128
#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
+#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
+
/*
* HPET address is set in acpi/boot.c, when an ACPI entry exists
*/
@@ -53,11 +55,6 @@ struct hpet_dev {
char name[10];
};
-inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev)
-{
- return container_of(evtdev, struct hpet_dev, evt);
-}
-
inline unsigned int hpet_readl(unsigned int a)
{
return readl(hpet_virt_address + a);
diff --git a/trunk/arch/x86/kernel/irq.c b/trunk/arch/x86/kernel/irq.c
index 7943e0c21bde..429e0c92924e 100644
--- a/trunk/arch/x86/kernel/irq.c
+++ b/trunk/arch/x86/kernel/irq.c
@@ -74,10 +74,6 @@ int arch_show_interrupts(struct seq_file *p, int prec)
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
seq_printf(p, " IRQ work interrupts\n");
- seq_printf(p, "%*s: ", prec, "RTR");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
- seq_printf(p, " APIC ICR read retries\n");
#endif
if (x86_platform_ipi_callback) {
seq_printf(p, "%*s: ", prec, "PLT");
@@ -140,7 +136,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->irq_spurious_count;
sum += irq_stats(cpu)->apic_perf_irqs;
sum += irq_stats(cpu)->apic_irq_work_irqs;
- sum += irq_stats(cpu)->icr_read_retry_count;
#endif
if (x86_platform_ipi_callback)
sum += irq_stats(cpu)->x86_platform_ipis;
@@ -186,8 +181,8 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
unsigned vector = ~regs->orig_ax;
unsigned irq;
- irq_enter();
exit_idle();
+ irq_enter();
irq = __this_cpu_read(vector_irq[vector]);
@@ -214,10 +209,10 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
ack_APIC_irq();
- irq_enter();
-
exit_idle();
+ irq_enter();
+
inc_irq_stat(x86_platform_ipis);
if (x86_platform_ipi_callback)
diff --git a/trunk/arch/x86/kernel/jump_label.c b/trunk/arch/x86/kernel/jump_label.c
index 2889b3d43882..ea9d5f2f13ef 100644
--- a/trunk/arch/x86/kernel/jump_label.c
+++ b/trunk/arch/x86/kernel/jump_label.c
@@ -50,7 +50,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
put_online_cpus();
}
-__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
+void arch_jump_label_transform_static(struct jump_entry *entry,
enum jump_label_type type)
{
__jump_label_transform(entry, type, text_poke_early);
diff --git a/trunk/arch/x86/kernel/microcode_amd.c b/trunk/arch/x86/kernel/microcode_amd.c
index fe86493f3ed1..d494799aafcd 100644
--- a/trunk/arch/x86/kernel/microcode_amd.c
+++ b/trunk/arch/x86/kernel/microcode_amd.c
@@ -1,18 +1,14 @@
/*
* AMD CPU Microcode Update Driver for Linux
- * Copyright (C) 2008-2011 Advanced Micro Devices Inc.
+ * Copyright (C) 2008 Advanced Micro Devices Inc.
*
* Author: Peter Oruba
*
* Based on work by:
* Tigran Aivazian
*
- * Maintainers:
- * Andreas Herrmann
- * Borislav Petkov
- *
- * This driver allows to upgrade microcode on F10h AMD
- * CPUs and later.
+ * This driver allows to upgrade microcode on AMD
+ * family 0x10 and 0x11 processors.
*
* Licensed under the terms of the GNU General Public
* License version 2. See file COPYING for details.
@@ -75,9 +71,6 @@ struct microcode_amd {
static struct equiv_cpu_entry *equiv_cpu_table;
-/* page-sized ucode patch buffer */
-void *patch;
-
static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -93,76 +86,27 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
return 0;
}
-static unsigned int verify_ucode_size(int cpu, u32 patch_size,
- unsigned int size)
+static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr,
+ int rev)
{
- struct cpuinfo_x86 *c = &cpu_data(cpu);
- u32 max_size;
-
-#define F1XH_MPB_MAX_SIZE 2048
-#define F14H_MPB_MAX_SIZE 1824
-#define F15H_MPB_MAX_SIZE 4096
-
- switch (c->x86) {
- case 0x14:
- max_size = F14H_MPB_MAX_SIZE;
- break;
- case 0x15:
- max_size = F15H_MPB_MAX_SIZE;
- break;
- default:
- max_size = F1XH_MPB_MAX_SIZE;
- break;
- }
-
- if (patch_size > min_t(u32, size, max_size)) {
- pr_err("patch size mismatch\n");
- return 0;
- }
-
- return patch_size;
-}
-
-static u16 find_equiv_id(void)
-{
- unsigned int current_cpu_id, i = 0;
+ unsigned int current_cpu_id;
+ u16 equiv_cpu_id = 0;
+ unsigned int i = 0;
BUG_ON(equiv_cpu_table == NULL);
-
current_cpu_id = cpuid_eax(0x00000001);
while (equiv_cpu_table[i].installed_cpu != 0) {
- if (current_cpu_id == equiv_cpu_table[i].installed_cpu)
- return equiv_cpu_table[i].equiv_cpu;
-
+ if (current_cpu_id == equiv_cpu_table[i].installed_cpu) {
+ equiv_cpu_id = equiv_cpu_table[i].equiv_cpu;
+ break;
+ }
i++;
}
- return 0;
-}
-/*
- * we signal a good patch is found by returning its size > 0
- */
-static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
- unsigned int leftover_size, int rev,
- unsigned int *current_size)
-{
- struct microcode_header_amd *mc_hdr;
- unsigned int actual_size;
- u16 equiv_cpu_id;
-
- /* size of the current patch we're staring at */
- *current_size = *(u32 *)(ucode_ptr + 4) + SECTION_HDR_SIZE;
-
- equiv_cpu_id = find_equiv_id();
if (!equiv_cpu_id)
return 0;
- /*
- * let's look at the patch header itself now
- */
- mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE);
-
if (mc_hdr->processor_rev_id != equiv_cpu_id)
return 0;
@@ -176,20 +120,7 @@ static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
if (mc_hdr->patch_id <= rev)
return 0;
- /*
- * now that the header looks sane, verify its size
- */
- actual_size = verify_ucode_size(cpu, *current_size, leftover_size);
- if (!actual_size)
- return 0;
-
- /* clear the patch buffer */
- memset(patch, 0, PAGE_SIZE);
-
- /* all looks ok, get the binary patch */
- get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size);
-
- return actual_size;
+ return 1;
}
static int apply_microcode_amd(int cpu)
@@ -224,6 +155,63 @@ static int apply_microcode_amd(int cpu)
return 0;
}
+static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size)
+{
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ u32 max_size, actual_size;
+
+#define F1XH_MPB_MAX_SIZE 2048
+#define F14H_MPB_MAX_SIZE 1824
+#define F15H_MPB_MAX_SIZE 4096
+
+ switch (c->x86) {
+ case 0x14:
+ max_size = F14H_MPB_MAX_SIZE;
+ break;
+ case 0x15:
+ max_size = F15H_MPB_MAX_SIZE;
+ break;
+ default:
+ max_size = F1XH_MPB_MAX_SIZE;
+ break;
+ }
+
+ actual_size = *(u32 *)(buf + 4);
+
+ if (actual_size + SECTION_HDR_SIZE > size || actual_size > max_size) {
+ pr_err("section size mismatch\n");
+ return 0;
+ }
+
+ return actual_size;
+}
+
+static struct microcode_header_amd *
+get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size)
+{
+ struct microcode_header_amd *mc = NULL;
+ unsigned int actual_size = 0;
+
+ if (*(u32 *)buf != UCODE_UCODE_TYPE) {
+ pr_err("invalid type field in container file section header\n");
+ goto out;
+ }
+
+ actual_size = verify_ucode_size(cpu, buf, size);
+ if (!actual_size)
+ goto out;
+
+ mc = vzalloc(actual_size);
+ if (!mc)
+ goto out;
+
+ get_ucode_data(mc, buf + SECTION_HDR_SIZE, actual_size);
+ *mc_size = actual_size + SECTION_HDR_SIZE;
+
+out:
+ return mc;
+}
+
static int install_equiv_cpu_table(const u8 *buf)
{
unsigned int *ibuf = (unsigned int *)buf;
@@ -259,38 +247,36 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
struct microcode_header_amd *mc_hdr = NULL;
- unsigned int mc_size, leftover, current_size = 0;
+ unsigned int mc_size, leftover;
int offset;
const u8 *ucode_ptr = data;
void *new_mc = NULL;
unsigned int new_rev = uci->cpu_sig.rev;
- enum ucode_state state = UCODE_ERROR;
+ enum ucode_state state = UCODE_OK;
offset = install_equiv_cpu_table(ucode_ptr);
if (offset < 0) {
pr_err("failed to create equivalent cpu table\n");
- goto out;
+ return UCODE_ERROR;
}
+
ucode_ptr += offset;
leftover = size - offset;
- if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) {
- pr_err("invalid type field in container file section header\n");
- goto free_table;
- }
-
while (leftover) {
- mc_size = get_matching_microcode(cpu, ucode_ptr, leftover,
- new_rev, ¤t_size);
- if (mc_size) {
- mc_hdr = patch;
- new_mc = patch;
+ mc_hdr = get_next_ucode(cpu, ucode_ptr, leftover, &mc_size);
+ if (!mc_hdr)
+ break;
+
+ if (get_matching_microcode(cpu, mc_hdr, new_rev)) {
+ vfree(new_mc);
new_rev = mc_hdr->patch_id;
- goto out_ok;
- }
+ new_mc = mc_hdr;
+ } else
+ vfree(mc_hdr);
- ucode_ptr += current_size;
- leftover -= current_size;
+ ucode_ptr += mc_size;
+ leftover -= mc_size;
}
if (!new_mc) {
@@ -298,16 +284,19 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
goto free_table;
}
-out_ok:
- uci->mc = new_mc;
- state = UCODE_OK;
- pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
- cpu, uci->cpu_sig.rev, new_rev);
+ if (!leftover) {
+ vfree(uci->mc);
+ uci->mc = new_mc;
+ pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
+ cpu, uci->cpu_sig.rev, new_rev);
+ } else {
+ vfree(new_mc);
+ state = UCODE_ERROR;
+ }
free_table:
free_equiv_cpu_table();
-out:
return state;
}
@@ -348,6 +337,7 @@ static void microcode_fini_cpu_amd(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ vfree(uci->mc);
uci->mc = NULL;
}
@@ -361,14 +351,5 @@ static struct microcode_ops microcode_amd_ops = {
struct microcode_ops * __init init_amd_microcode(void)
{
- patch = (void *)get_zeroed_page(GFP_KERNEL);
- if (!patch)
- return NULL;
-
return µcode_amd_ops;
}
-
-void __exit exit_amd_microcode(void)
-{
- free_page((unsigned long)patch);
-}
diff --git a/trunk/arch/x86/kernel/microcode_core.c b/trunk/arch/x86/kernel/microcode_core.c
index 9302e2d0eb4b..9d46f5e43b51 100644
--- a/trunk/arch/x86/kernel/microcode_core.c
+++ b/trunk/arch/x86/kernel/microcode_core.c
@@ -563,8 +563,6 @@ module_init(microcode_init);
static void __exit microcode_exit(void)
{
- struct cpuinfo_x86 *c = &cpu_data(0);
-
microcode_dev_exit();
unregister_hotcpu_notifier(&mc_cpu_notifier);
@@ -582,9 +580,6 @@ static void __exit microcode_exit(void)
microcode_ops = NULL;
- if (c->x86_vendor == X86_VENDOR_AMD)
- exit_amd_microcode();
-
pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
}
module_exit(microcode_exit);
diff --git a/trunk/arch/x86/kernel/mpparse.c b/trunk/arch/x86/kernel/mpparse.c
index ca470e4c92dc..0741b062a304 100644
--- a/trunk/arch/x86/kernel/mpparse.c
+++ b/trunk/arch/x86/kernel/mpparse.c
@@ -564,7 +564,9 @@ void __init default_get_smp_config(unsigned int early)
static void __init smp_reserve_memory(struct mpf_intel *mpf)
{
- memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr));
+ unsigned long size = get_mpc_size(mpf->physptr);
+
+ memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc");
}
static int __init smp_scan_config(unsigned long base, unsigned long length)
@@ -593,7 +595,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
mpf, (u64)virt_to_phys(mpf));
mem = virt_to_phys(mpf);
- memblock_reserve(mem, sizeof(*mpf));
+ memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf");
if (mpf->physptr)
smp_reserve_memory(mpf);
@@ -834,8 +836,10 @@ early_param("alloc_mptable", parse_alloc_mptable_opt);
void __init early_reserve_e820_mpc_new(void)
{
- if (enable_update_mptable && alloc_mptable)
- mpc_new_phys = early_reserve_e820(mpc_new_length, 4);
+ if (enable_update_mptable && alloc_mptable) {
+ u64 startt = 0;
+ mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
+ }
}
static int __init update_mp_table(void)
diff --git a/trunk/arch/x86/kernel/process.c b/trunk/arch/x86/kernel/process.c
index 15763af7bfe3..ee5d4fbd53b4 100644
--- a/trunk/arch/x86/kernel/process.c
+++ b/trunk/arch/x86/kernel/process.c
@@ -293,7 +293,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
regs.orig_ax = -1;
regs.ip = (unsigned long) kernel_thread_helper;
regs.cs = __KERNEL_CS | get_kernel_rpl();
- regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+ regs.flags = X86_EFLAGS_IF | 0x2;
/* Ok, create the new process.. */
return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL);
diff --git a/trunk/arch/x86/kernel/process_32.c b/trunk/arch/x86/kernel/process_32.c
index 485204f58cda..795b79f984c2 100644
--- a/trunk/arch/x86/kernel/process_32.c
+++ b/trunk/arch/x86/kernel/process_32.c
@@ -99,8 +99,7 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
- tick_nohz_idle_enter();
- rcu_idle_enter();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched()) {
check_pgt_cache();
@@ -117,8 +116,7 @@ void cpu_idle(void)
pm_idle();
start_critical_timings();
}
- rcu_idle_exit();
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
schedule();
preempt_disable();
diff --git a/trunk/arch/x86/kernel/process_64.c b/trunk/arch/x86/kernel/process_64.c
index 9b9fe4a85c87..3bd7e6eebf31 100644
--- a/trunk/arch/x86/kernel/process_64.c
+++ b/trunk/arch/x86/kernel/process_64.c
@@ -122,7 +122,7 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
- tick_nohz_idle_enter();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched()) {
rmb();
@@ -139,14 +139,8 @@ void cpu_idle(void)
enter_idle();
/* Don't trace irqs off for idle */
stop_critical_timings();
-
- /* enter_idle() needs rcu for notifiers */
- rcu_idle_enter();
-
if (cpuidle_idle_call())
pm_idle();
-
- rcu_idle_exit();
start_critical_timings();
/* In many cases the interrupt that ended idle
@@ -155,7 +149,7 @@ void cpu_idle(void)
__exit_idle();
}
- tick_nohz_idle_exit();
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
schedule();
preempt_disable();
@@ -299,12 +293,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
- p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
- IO_BITMAP_BYTES, GFP_KERNEL);
+ p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
p->thread.io_bitmap_max = 0;
return -ENOMEM;
}
+ memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
+ IO_BITMAP_BYTES);
set_tsk_thread_flag(p, TIF_IO_BITMAP);
}
diff --git a/trunk/arch/x86/kernel/ptrace.c b/trunk/arch/x86/kernel/ptrace.c
index 89a04c7b5bb6..82528799c5de 100644
--- a/trunk/arch/x86/kernel/ptrace.c
+++ b/trunk/arch/x86/kernel/ptrace.c
@@ -749,8 +749,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
/*
* Handle PTRACE_POKEUSR calls for the debug register area.
*/
-static int ptrace_set_debugreg(struct task_struct *tsk, int n,
- unsigned long val)
+int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
{
struct thread_struct *thread = &(tsk->thread);
int rc = 0;
diff --git a/trunk/arch/x86/kernel/setup.c b/trunk/arch/x86/kernel/setup.c
index d05444ac2aea..cf0ef986cb6d 100644
--- a/trunk/arch/x86/kernel/setup.c
+++ b/trunk/arch/x86/kernel/setup.c
@@ -306,8 +306,7 @@ static void __init cleanup_highmap(void)
static void __init reserve_brk(void)
{
if (_brk_end > _brk_start)
- memblock_reserve(__pa(_brk_start),
- __pa(_brk_end) - __pa(_brk_start));
+ memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK");
/* Mark brk area as locked down and no longer taking any
new allocations */
@@ -332,13 +331,13 @@ static void __init relocate_initrd(void)
ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
PAGE_SIZE);
- if (!ramdisk_here)
+ if (ramdisk_here == MEMBLOCK_ERROR)
panic("Cannot find place for new RAMDISK of size %lld\n",
ramdisk_size);
/* Note: this includes all the lowmem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
- memblock_reserve(ramdisk_here, area_size);
+ memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK");
initrd_start = ramdisk_here + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
@@ -394,7 +393,7 @@ static void __init reserve_initrd(void)
initrd_start = 0;
if (ramdisk_size >= (end_of_lowmem>>1)) {
- memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
+ memblock_x86_free_range(ramdisk_image, ramdisk_end);
printk(KERN_ERR "initrd too large to handle, "
"disabling initrd\n");
return;
@@ -417,7 +416,7 @@ static void __init reserve_initrd(void)
relocate_initrd();
- memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
+ memblock_x86_free_range(ramdisk_image, ramdisk_end);
}
#else
static void __init reserve_initrd(void)
@@ -491,13 +490,15 @@ static void __init memblock_x86_reserve_range_setup_data(void)
{
struct setup_data *data;
u64 pa_data;
+ char buf[32];
if (boot_params.hdr.version < 0x0209)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = early_memremap(pa_data, sizeof(*data));
- memblock_reserve(pa_data, sizeof(*data) + data->len);
+ sprintf(buf, "setup data %x", data->type);
+ memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf);
pa_data = data->next;
early_iounmap(data, sizeof(*data));
}
@@ -553,7 +554,7 @@ static void __init reserve_crashkernel(void)
crash_base = memblock_find_in_range(alignment,
CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
- if (!crash_base) {
+ if (crash_base == MEMBLOCK_ERROR) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
@@ -567,7 +568,7 @@ static void __init reserve_crashkernel(void)
return;
}
}
- memblock_reserve(crash_base, crash_size);
+ memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL");
printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
"for crashkernel (System RAM: %ldMB)\n",
@@ -625,7 +626,7 @@ static __init void reserve_ibft_region(void)
addr = find_ibft_region(&size);
if (size)
- memblock_reserve(addr, size);
+ memblock_x86_reserve_range(addr, addr + size, "* ibft");
}
static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
diff --git a/trunk/arch/x86/kernel/smpboot.c b/trunk/arch/x86/kernel/smpboot.c
index e38e21754eea..9f548cb4a958 100644
--- a/trunk/arch/x86/kernel/smpboot.c
+++ b/trunk/arch/x86/kernel/smpboot.c
@@ -840,8 +840,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
- !physid_isset(apicid, phys_cpu_present_map) ||
- (!x2apic_mode && apicid >= 255)) {
+ !physid_isset(apicid, phys_cpu_present_map)) {
printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
return -EINVAL;
}
diff --git a/trunk/arch/x86/kernel/trampoline.c b/trunk/arch/x86/kernel/trampoline.c
index a73b61055ad6..a91ae7709b49 100644
--- a/trunk/arch/x86/kernel/trampoline.c
+++ b/trunk/arch/x86/kernel/trampoline.c
@@ -14,11 +14,11 @@ void __init setup_trampolines(void)
/* Has to be in very low memory so we can execute real-mode AP code. */
mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
- if (!mem)
+ if (mem == MEMBLOCK_ERROR)
panic("Cannot allocate trampoline\n");
x86_trampoline_base = __va(mem);
- memblock_reserve(mem, size);
+ memblock_x86_reserve_range(mem, mem + size, "TRAMPOLINE");
printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
x86_trampoline_base, (unsigned long long)mem, size);
diff --git a/trunk/arch/x86/kernel/traps.c b/trunk/arch/x86/kernel/traps.c
index fa1191fb679d..a8e3eb83466c 100644
--- a/trunk/arch/x86/kernel/traps.c
+++ b/trunk/arch/x86/kernel/traps.c
@@ -306,10 +306,15 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
== NOTIFY_STOP)
return;
#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
-
+#ifdef CONFIG_KPROBES
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
return;
+#else
+ if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
+ == NOTIFY_STOP)
+ return;
+#endif
preempt_conditional_sti(regs);
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
diff --git a/trunk/arch/x86/kernel/tsc.c b/trunk/arch/x86/kernel/tsc.c
index 2c9cf0fd78f5..db483369f10b 100644
--- a/trunk/arch/x86/kernel/tsc.c
+++ b/trunk/arch/x86/kernel/tsc.c
@@ -35,7 +35,7 @@ static int __read_mostly tsc_unstable;
erroneous rdtsc usage on !cpu_has_tsc processors */
static int __read_mostly tsc_disabled = -1;
-int tsc_clocksource_reliable;
+static int tsc_clocksource_reliable;
/*
* Scheduler clock - returns current time in nanosec units.
*/
@@ -178,11 +178,11 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
}
#define CAL_MS 10
-#define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS))
+#define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS))
#define CAL_PIT_LOOPS 1000
#define CAL2_MS 50
-#define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS))
+#define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS))
#define CAL2_PIT_LOOPS 5000
diff --git a/trunk/arch/x86/kernel/tsc_sync.c b/trunk/arch/x86/kernel/tsc_sync.c
index 9eba29b46cb7..0aa5fed8b9e6 100644
--- a/trunk/arch/x86/kernel/tsc_sync.c
+++ b/trunk/arch/x86/kernel/tsc_sync.c
@@ -113,7 +113,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
if (unsynchronized_tsc())
return;
- if (tsc_clocksource_reliable) {
+ if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
pr_info(
"Skipped synchronization checks as TSC is reliable.\n");
@@ -172,7 +172,7 @@ void __cpuinit check_tsc_sync_target(void)
{
int cpus = 2;
- if (unsynchronized_tsc() || tsc_clocksource_reliable)
+ if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
return;
/*
diff --git a/trunk/arch/x86/kernel/vsyscall_64.c b/trunk/arch/x86/kernel/vsyscall_64.c
index b07ba9393564..e4d4a22e8b94 100644
--- a/trunk/arch/x86/kernel/vsyscall_64.c
+++ b/trunk/arch/x86/kernel/vsyscall_64.c
@@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
};
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
+static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE;
static int __init vsyscall_setup(char *str)
{
@@ -140,40 +140,11 @@ static int addr_to_vsyscall_nr(unsigned long addr)
return nr;
}
-static bool write_ok_or_segv(unsigned long ptr, size_t size)
-{
- /*
- * XXX: if access_ok, get_user, and put_user handled
- * sig_on_uaccess_error, this could go away.
- */
-
- if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
- siginfo_t info;
- struct thread_struct *thread = ¤t->thread;
-
- thread->error_code = 6; /* user fault, no page, write */
- thread->cr2 = ptr;
- thread->trap_no = 14;
-
- memset(&info, 0, sizeof(info));
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- info.si_code = SEGV_MAPERR;
- info.si_addr = (void __user *)ptr;
-
- force_sig_info(SIGSEGV, &info, current);
- return false;
- } else {
- return true;
- }
-}
-
bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
struct task_struct *tsk;
unsigned long caller;
int vsyscall_nr;
- int prev_sig_on_uaccess_error;
long ret;
/*
@@ -209,65 +180,35 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
if (seccomp_mode(&tsk->seccomp))
do_exit(SIGKILL);
- /*
- * With a real vsyscall, page faults cause SIGSEGV. We want to
- * preserve that behavior to make writing exploits harder.
- */
- prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
- current_thread_info()->sig_on_uaccess_error = 1;
-
- /*
- * 0 is a valid user pointer (in the access_ok sense) on 32-bit and
- * 64-bit, so we don't need to special-case it here. For all the
- * vsyscalls, 0 means "don't write anything" not "write it at
- * address 0".
- */
- ret = -EFAULT;
switch (vsyscall_nr) {
case 0:
- if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
- !write_ok_or_segv(regs->si, sizeof(struct timezone)))
- break;
-
ret = sys_gettimeofday(
(struct timeval __user *)regs->di,
(struct timezone __user *)regs->si);
break;
case 1:
- if (!write_ok_or_segv(regs->di, sizeof(time_t)))
- break;
-
ret = sys_time((time_t __user *)regs->di);
break;
case 2:
- if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
- !write_ok_or_segv(regs->si, sizeof(unsigned)))
- break;
-
ret = sys_getcpu((unsigned __user *)regs->di,
(unsigned __user *)regs->si,
0);
break;
}
- current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
-
if (ret == -EFAULT) {
- /* Bad news -- userspace fed a bad pointer to a vsyscall. */
- warn_bad_vsyscall(KERN_INFO, regs,
- "vsyscall fault (exploit attempt?)");
-
/*
- * If we failed to generate a signal for any reason,
- * generate one here. (This should be impossible.)
+ * Bad news -- userspace fed a bad pointer to a vsyscall.
+ *
+ * With a real vsyscall, that would have caused SIGSEGV.
+ * To make writing reliable exploits using the emulated
+ * vsyscalls harder, generate SIGSEGV here as well.
*/
- if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
- !sigismember(&tsk->pending.signal, SIGSEGV)))
- goto sigsegv;
-
- return true; /* Don't emulate the ret. */
+ warn_bad_vsyscall(KERN_INFO, regs,
+ "vsyscall fault (exploit attempt?)");
+ goto sigsegv;
}
regs->ax = ret;
diff --git a/trunk/arch/x86/kernel/x86_init.c b/trunk/arch/x86/kernel/x86_init.c
index 91f83e21b989..c1d6cd549397 100644
--- a/trunk/arch/x86/kernel/x86_init.c
+++ b/trunk/arch/x86/kernel/x86_init.c
@@ -92,7 +92,6 @@ struct x86_init_ops x86_init __initdata = {
struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
.setup_percpu_clockev = setup_secondary_APIC_clock,
- .fixup_cpu_id = x86_default_fixup_cpu_id,
};
static void default_nmi_init(void) { };
diff --git a/trunk/arch/x86/kvm/i8254.c b/trunk/arch/x86/kvm/i8254.c
index 405f2620392f..76e3f1cd0369 100644
--- a/trunk/arch/x86/kvm/i8254.c
+++ b/trunk/arch/x86/kvm/i8254.c
@@ -338,15 +338,11 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
return HRTIMER_NORESTART;
}
-static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
+static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
{
- struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
struct kvm_timer *pt = &ps->pit_timer;
s64 interval;
- if (!irqchip_in_kernel(kvm))
- return;
-
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
pr_debug("create pit timer, interval is %llu nsec\n", interval);
@@ -398,13 +394,13 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
/* FIXME: enhance mode 4 precision */
case 4:
if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) {
- create_pit_timer(kvm, val, 0);
+ create_pit_timer(ps, val, 0);
}
break;
case 2:
case 3:
if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){
- create_pit_timer(kvm, val, 1);
+ create_pit_timer(ps, val, 1);
}
break;
default:
diff --git a/trunk/arch/x86/kvm/x86.c b/trunk/arch/x86/kvm/x86.c
index 4c938da2ba00..c38efd7b792e 100644
--- a/trunk/arch/x86/kvm/x86.c
+++ b/trunk/arch/x86/kvm/x86.c
@@ -602,6 +602,7 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
struct kvm_lapic *apic = vcpu->arch.apic;
+ u32 timer_mode_mask;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (!best)
@@ -614,12 +615,15 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
best->ecx |= bit(X86_FEATURE_OSXSAVE);
}
- if (apic) {
- if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
- apic->lapic_timer.timer_mode_mask = 3 << 17;
- else
- apic->lapic_timer.timer_mode_mask = 1 << 17;
- }
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ best->function == 0x1) {
+ best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER);
+ timer_mode_mask = 3 << 17;
+ } else
+ timer_mode_mask = 1 << 17;
+
+ if (apic)
+ apic->lapic_timer.timer_mode_mask = timer_mode_mask;
}
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -2131,9 +2135,6 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_TSC_CONTROL:
r = kvm_has_tsc_control;
break;
- case KVM_CAP_TSC_DEADLINE_TIMER:
- r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
- break;
default:
r = 0;
break;
diff --git a/trunk/arch/x86/lib/inat.c b/trunk/arch/x86/lib/inat.c
index 88ad5fbda6e1..46fc4ee09fc4 100644
--- a/trunk/arch/x86/lib/inat.c
+++ b/trunk/arch/x86/lib/inat.c
@@ -82,16 +82,9 @@ insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
const insn_attr_t *table;
if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
return 0;
- /* At first, this checks the master table */
- table = inat_avx_tables[vex_m][0];
+ table = inat_avx_tables[vex_m][vex_p];
if (!table)
return 0;
- if (!inat_is_group(table[opcode]) && vex_p) {
- /* If this is not a group, get attribute directly */
- table = inat_avx_tables[vex_m][vex_p];
- if (!table)
- return 0;
- }
return table[opcode];
}
diff --git a/trunk/arch/x86/lib/insn.c b/trunk/arch/x86/lib/insn.c
index 5a1f9f3e3fbb..374562ed6704 100644
--- a/trunk/arch/x86/lib/insn.c
+++ b/trunk/arch/x86/lib/insn.c
@@ -202,7 +202,7 @@ void insn_get_opcode(struct insn *insn)
m = insn_vex_m_bits(insn);
p = insn_vex_p_bits(insn);
insn->attr = inat_get_avx_attribute(op, m, p);
- if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
+ if (!inat_accept_vex(insn->attr))
insn->attr = 0; /* This instruction is bad */
goto end; /* VEX has only 1 byte for opcode */
}
@@ -249,8 +249,6 @@ void insn_get_modrm(struct insn *insn)
pfx = insn_last_prefix(insn);
insn->attr = inat_get_group_attribute(mod, pfx,
insn->attr);
- if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
- insn->attr = 0; /* This is bad */
}
}
diff --git a/trunk/arch/x86/lib/string_32.c b/trunk/arch/x86/lib/string_32.c
index bd59090825db..82004d2bf05e 100644
--- a/trunk/arch/x86/lib/string_32.c
+++ b/trunk/arch/x86/lib/string_32.c
@@ -164,13 +164,15 @@ EXPORT_SYMBOL(strchr);
size_t strlen(const char *s)
{
int d0;
- size_t res;
+ int res;
asm volatile("repne\n\t"
- "scasb"
+ "scasb\n\t"
+ "notl %0\n\t"
+ "decl %0"
: "=c" (res), "=&D" (d0)
: "1" (s), "a" (0), "0" (0xffffffffu)
: "memory");
- return ~res - 1;
+ return res;
}
EXPORT_SYMBOL(strlen);
#endif
diff --git a/trunk/arch/x86/lib/x86-opcode-map.txt b/trunk/arch/x86/lib/x86-opcode-map.txt
index 5b83c51c12e0..8641bbb8e006 100644
--- a/trunk/arch/x86/lib/x86-opcode-map.txt
+++ b/trunk/arch/x86/lib/x86-opcode-map.txt
@@ -1,11 +1,5 @@
# x86 Opcode Maps
#
-# This is (mostly) based on following documentations.
-# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2
-# (#325383-040US, October 2011)
-# - Intel(R) Advanced Vector Extensions Programming Reference
-# (#319433-011,JUNE 2011).
-#
#
# Table: table-name
# Referrer: escaped-name
@@ -21,13 +15,10 @@
# EndTable
#
# AVX Superscripts
-# (v): this opcode requires VEX prefix.
-# (v1): this opcode only supports 128bit VEX.
-#
-# Last Prefix Superscripts
-# - (66): the last prefix is 0x66
-# - (F3): the last prefix is 0xF3
-# - (F2): the last prefix is 0xF2
+# (VEX): this opcode can accept VEX prefix.
+# (oVEX): this opcode requires VEX prefix.
+# (o128): this opcode only supports 128bit VEX.
+# (o256): this opcode only supports 256bit VEX.
#
Table: one byte opcode
@@ -208,8 +199,8 @@ a0: MOV AL,Ob
a1: MOV rAX,Ov
a2: MOV Ob,AL
a3: MOV Ov,rAX
-a4: MOVS/B Yb,Xb
-a5: MOVS/W/D/Q Yv,Xv
+a4: MOVS/B Xb,Yb
+a5: MOVS/W/D/Q Xv,Yv
a6: CMPS/B Xb,Yb
a7: CMPS/W/D Xv,Yv
a8: TEST AL,Ib
@@ -219,7 +210,9 @@ ab: STOS/W/D/Q Yv,rAX
ac: LODS/B AL,Xb
ad: LODS/W/D/Q rAX,Xv
ae: SCAS/B AL,Yb
-af: SCAS/W/D/Q rAX,Xv
+# Note: The May 2011 Intel manual shows Xv for the second parameter of the
+# next instruction but Yv is correct
+af: SCAS/W/D/Q rAX,Yv
# 0xb0 - 0xbf
b0: MOV AL/R8L,Ib
b1: MOV CL/R9L,Ib
@@ -242,8 +235,8 @@ c0: Grp2 Eb,Ib (1A)
c1: Grp2 Ev,Ib (1A)
c2: RETN Iw (f64)
c3: RETN
-c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
-c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
+c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix)
+c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix)
c6: Grp11 Eb,Ib (1A)
c7: Grp11 Ev,Iz (1A)
c8: ENTER Iw,Ib
@@ -329,19 +322,14 @@ AVXcode: 1
# 3DNow! uses the last imm byte as opcode extension.
0f: 3DNow! Pq,Qq,Ib
# 0x0f 0x10-0x1f
-# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands
-# but it actually has operands. And also, vmovss and vmovsd only accept 128bit.
-# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form.
-# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming
-# Reference A.1
-10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1)
-11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1)
-12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2)
-13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1)
-14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66)
-15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66)
-16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3)
-17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
+10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128)
+11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128)
+12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX)
+13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128)
+14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX)
+15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX)
+16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX)
+17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128)
18: Grp16 (1A)
19:
1a:
@@ -359,14 +347,14 @@ AVXcode: 1
25:
26:
27:
-28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66)
-29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66)
-2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1)
-2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66)
-2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1)
-2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1)
-2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1)
-2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1)
+28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX)
+29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX)
+2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128)
+2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX)
+2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128)
+2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128)
+2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128)
+2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128)
# 0x0f 0x30-0x3f
30: WRMSR
31: RDTSC
@@ -402,66 +390,65 @@ AVXcode: 1
4e: CMOVLE/NG Gv,Ev
4f: CMOVNLE/G Gv,Ev
# 0x0f 0x50-0x5f
-50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66)
-51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1)
-52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1)
-53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1)
-54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66)
-55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66)
-56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66)
-57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66)
-58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
-59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
-5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
-5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
-5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
-5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
-5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
-5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1)
+50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX)
+51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128)
+52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128)
+53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128)
+54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX)
+55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX)
+56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX)
+57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX)
+58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128)
+59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128)
+5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128)
+5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX)
+5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128)
+5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128)
+5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128)
+5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128)
# 0x0f 0x60-0x6f
-60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1)
-61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1)
-62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1)
-63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1)
-64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1)
-65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1)
-66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1)
-67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1)
-68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1)
-69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1)
-6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1)
-6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1)
-6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
-6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
-6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
-6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
+60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128)
+61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128)
+62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128)
+63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128)
+64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128)
+65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128)
+66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128)
+67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128)
+68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128)
+69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128)
+6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128)
+6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128)
+6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128)
+6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128)
+6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128)
+6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX)
# 0x0f 0x70-0x7f
-70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
+70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128)
71: Grp12 (1A)
72: Grp13 (1A)
73: Grp14 (1A)
-74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1)
-75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1)
-76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
-# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
-77: emms | vzeroupper | vzeroall
-78: VMREAD Ey,Gy
-79: VMWRITE Gy,Ey
+74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128)
+75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128)
+76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128)
+77: emms/vzeroupper/vzeroall (VEX)
+78: VMREAD Ed/q,Gd/q
+79: VMWRITE Gd/q,Ed/q
7a:
7b:
-7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
-7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
-7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
-7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
+7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX)
+7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX)
+7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128)
+7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX)
# 0x0f 0x80-0x8f
80: JO Jz (f64)
81: JNO Jz (f64)
-82: JB/JC/JNAE Jz (f64)
-83: JAE/JNB/JNC Jz (f64)
-84: JE/JZ Jz (f64)
-85: JNE/JNZ Jz (f64)
+82: JB/JNAE/JC Jz (f64)
+83: JNB/JAE/JNC Jz (f64)
+84: JZ/JE Jz (f64)
+85: JNZ/JNE Jz (f64)
86: JBE/JNA Jz (f64)
-87: JA/JNBE Jz (f64)
+87: JNBE/JA Jz (f64)
88: JS Jz (f64)
89: JNS Jz (f64)
8a: JP/JPE Jz (f64)
@@ -517,18 +504,18 @@ b8: JMPE | POPCNT Gv,Ev (F3)
b9: Grp10 (1A)
ba: Grp8 Ev,Ib (1A)
bb: BTC Ev,Gv
-bc: BSF Gv,Ev | TZCNT Gv,Ev (F3)
-bd: BSR Gv,Ev | LZCNT Gv,Ev (F3)
+bc: BSF Gv,Ev
+bd: BSR Gv,Ev
be: MOVSX Gv,Eb
bf: MOVSX Gv,Ew
# 0x0f 0xc0-0xcf
c0: XADD Eb,Gb
c1: XADD Ev,Gv
-c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1)
-c3: movnti My,Gy
-c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1)
-c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1)
-c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66)
+c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX)
+c3: movnti Md/q,Gd/q
+c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128)
+c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128)
+c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX)
c7: Grp9 (1A)
c8: BSWAP RAX/EAX/R8/R8D
c9: BSWAP RCX/ECX/R9/R9D
@@ -539,55 +526,55 @@ cd: BSWAP RBP/EBP/R13/R13D
ce: BSWAP RSI/ESI/R14/R14D
cf: BSWAP RDI/EDI/R15/R15D
# 0x0f 0xd0-0xdf
-d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2)
-d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1)
-d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1)
-d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1)
-d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1)
-d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1)
-d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
-d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
-d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
-d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
-da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
-db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
-dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
-dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
-de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
-df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
+d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX)
+d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128)
+d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128)
+d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128)
+d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128)
+d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128)
+d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
+d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128)
+d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128)
+d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128)
+da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128)
+db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128)
+dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128)
+dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128)
+de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128)
+df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128)
# 0x0f 0xe0-0xef
-e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
-e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
-e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
-e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
-e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
-e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
-e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
-e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
-e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
-e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
-ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
-eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
-ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
-ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
-ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
-ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
+e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128)
+e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128)
+e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128)
+e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128)
+e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128)
+e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128)
+e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX)
+e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX)
+e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128)
+e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128)
+ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128)
+eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128)
+ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128)
+ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128)
+ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128)
+ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128)
# 0x0f 0xf0-0xff
-f0: vlddqu Vx,Mx (F2)
-f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
-f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1)
-f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1)
-f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1)
-f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1)
-f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1)
-f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1)
-f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1)
-f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1)
-fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1)
-fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
-fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
-fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
-fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
+f0: lddqu Vdq,Mdq (F2),(VEX)
+f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128)
+f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128)
+f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128)
+f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128)
+f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128)
+f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128)
+f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128)
+f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128)
+f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128)
+fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128)
+fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128)
+fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128)
+fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128)
+fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128)
ff:
EndTable
@@ -595,193 +582,155 @@ Table: 3-byte opcode 1 (0x0f 0x38)
Referrer: 3-byte escape 1
AVXcode: 2
# 0x0f 0x38 0x00-0x0f
-00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1)
-01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1)
-02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1)
-03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1)
-04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1)
-05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1)
-06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1)
-07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1)
-08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1)
-09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1)
-0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1)
-0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1)
-0c: vpermilps Vx,Hx,Wx (66),(v)
-0d: vpermilpd Vx,Hx,Wx (66),(v)
-0e: vtestps Vx,Wx (66),(v)
-0f: vtestpd Vx,Wx (66),(v)
+00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128)
+01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128)
+02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128)
+03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128)
+04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128)
+05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128)
+06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128)
+07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128)
+08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128)
+09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128)
+0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128)
+0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128)
+0c: Vpermilps /r (66),(oVEX)
+0d: Vpermilpd /r (66),(oVEX)
+0e: vtestps /r (66),(oVEX)
+0f: vtestpd /r (66),(oVEX)
# 0x0f 0x38 0x10-0x1f
10: pblendvb Vdq,Wdq (66)
11:
12:
-13: vcvtph2ps Vx,Wx,Ib (66),(v)
+13:
14: blendvps Vdq,Wdq (66)
15: blendvpd Vdq,Wdq (66)
-16: vpermps Vqq,Hqq,Wqq (66),(v)
-17: vptest Vx,Wx (66)
-18: vbroadcastss Vx,Wd (66),(v)
-19: vbroadcastsd Vqq,Wq (66),(v)
-1a: vbroadcastf128 Vqq,Mdq (66),(v)
+16:
+17: ptest Vdq,Wdq (66),(VEX)
+18: vbroadcastss /r (66),(oVEX)
+19: vbroadcastsd /r (66),(oVEX),(o256)
+1a: vbroadcastf128 /r (66),(oVEX),(o256)
1b:
-1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
-1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
-1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
+1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128)
+1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128)
+1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128)
1f:
# 0x0f 0x38 0x20-0x2f
-20: vpmovsxbw Vx,Ux/Mq (66),(v1)
-21: vpmovsxbd Vx,Ux/Md (66),(v1)
-22: vpmovsxbq Vx,Ux/Mw (66),(v1)
-23: vpmovsxwd Vx,Ux/Mq (66),(v1)
-24: vpmovsxwq Vx,Ux/Md (66),(v1)
-25: vpmovsxdq Vx,Ux/Mq (66),(v1)
+20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128)
+21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128)
+22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128)
+23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128)
+24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128)
+25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128)
26:
27:
-28: vpmuldq Vx,Hx,Wx (66),(v1)
-29: vpcmpeqq Vx,Hx,Wx (66),(v1)
-2a: vmovntdqa Vx,Mx (66),(v1)
-2b: vpackusdw Vx,Hx,Wx (66),(v1)
-2c: vmaskmovps Vx,Hx,Mx (66),(v)
-2d: vmaskmovpd Vx,Hx,Mx (66),(v)
-2e: vmaskmovps Mx,Hx,Vx (66),(v)
-2f: vmaskmovpd Mx,Hx,Vx (66),(v)
+28: pmuldq Vdq,Wdq (66),(VEX),(o128)
+29: pcmpeqq Vdq,Wdq (66),(VEX),(o128)
+2a: movntdqa Vdq,Mdq (66),(VEX),(o128)
+2b: packusdw Vdq,Wdq (66),(VEX),(o128)
+2c: vmaskmovps(ld) /r (66),(oVEX)
+2d: vmaskmovpd(ld) /r (66),(oVEX)
+2e: vmaskmovps(st) /r (66),(oVEX)
+2f: vmaskmovpd(st) /r (66),(oVEX)
# 0x0f 0x38 0x30-0x3f
-30: vpmovzxbw Vx,Ux/Mq (66),(v1)
-31: vpmovzxbd Vx,Ux/Md (66),(v1)
-32: vpmovzxbq Vx,Ux/Mw (66),(v1)
-33: vpmovzxwd Vx,Ux/Mq (66),(v1)
-34: vpmovzxwq Vx,Ux/Md (66),(v1)
-35: vpmovzxdq Vx,Ux/Mq (66),(v1)
-36: vpermd Vqq,Hqq,Wqq (66),(v)
-37: vpcmpgtq Vx,Hx,Wx (66),(v1)
-38: vpminsb Vx,Hx,Wx (66),(v1)
-39: vpminsd Vx,Hx,Wx (66),(v1)
-3a: vpminuw Vx,Hx,Wx (66),(v1)
-3b: vpminud Vx,Hx,Wx (66),(v1)
-3c: vpmaxsb Vx,Hx,Wx (66),(v1)
-3d: vpmaxsd Vx,Hx,Wx (66),(v1)
-3e: vpmaxuw Vx,Hx,Wx (66),(v1)
-3f: vpmaxud Vx,Hx,Wx (66),(v1)
+30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128)
+31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128)
+32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128)
+33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128)
+34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128)
+35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128)
+36:
+37: pcmpgtq Vdq,Wdq (66),(VEX),(o128)
+38: pminsb Vdq,Wdq (66),(VEX),(o128)
+39: pminsd Vdq,Wdq (66),(VEX),(o128)
+3a: pminuw Vdq,Wdq (66),(VEX),(o128)
+3b: pminud Vdq,Wdq (66),(VEX),(o128)
+3c: pmaxsb Vdq,Wdq (66),(VEX),(o128)
+3d: pmaxsd Vdq,Wdq (66),(VEX),(o128)
+3e: pmaxuw Vdq,Wdq (66),(VEX),(o128)
+3f: pmaxud Vdq,Wdq (66),(VEX),(o128)
# 0x0f 0x38 0x40-0x8f
-40: vpmulld Vx,Hx,Wx (66),(v1)
-41: vphminposuw Vdq,Wdq (66),(v1)
-42:
-43:
-44:
-45: vpsrlvd/q Vx,Hx,Wx (66),(v)
-46: vpsravd Vx,Hx,Wx (66),(v)
-47: vpsllvd/q Vx,Hx,Wx (66),(v)
-# Skip 0x48-0x57
-58: vpbroadcastd Vx,Wx (66),(v)
-59: vpbroadcastq Vx,Wx (66),(v)
-5a: vbroadcasti128 Vqq,Mdq (66),(v)
-# Skip 0x5b-0x77
-78: vpbroadcastb Vx,Wx (66),(v)
-79: vpbroadcastw Vx,Wx (66),(v)
-# Skip 0x7a-0x7f
-80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
-82: INVPCID Gy,Mdq (66)
-8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
-8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
+40: pmulld Vdq,Wdq (66),(VEX),(o128)
+41: phminposuw Vdq,Wdq (66),(VEX),(o128)
+80: INVEPT Gd/q,Mdq (66)
+81: INVPID Gd/q,Mdq (66)
# 0x0f 0x38 0x90-0xbf (FMA)
-90: vgatherdd/q Vx,Hx,Wx (66),(v)
-91: vgatherqd/q Vx,Hx,Wx (66),(v)
-92: vgatherdps/d Vx,Hx,Wx (66),(v)
-93: vgatherqps/d Vx,Hx,Wx (66),(v)
-94:
-95:
-96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v)
-97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
-98: vfmadd132ps/d Vx,Hx,Wx (66),(v)
-99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
-9a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
-9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
-9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
-9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
-9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
-9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
-a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
-a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
-a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
-a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
-aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
-ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
-ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
-ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
-ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
-af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
-b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
-b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
-b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
-b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
-ba: vfmsub231ps/d Vx,Hx,Wx (66),(v)
-bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
-bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v)
-bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
-be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
-bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
+96: vfmaddsub132pd/ps /r (66),(VEX)
+97: vfmsubadd132pd/ps /r (66),(VEX)
+98: vfmadd132pd/ps /r (66),(VEX)
+99: vfmadd132sd/ss /r (66),(VEX),(o128)
+9a: vfmsub132pd/ps /r (66),(VEX)
+9b: vfmsub132sd/ss /r (66),(VEX),(o128)
+9c: vfnmadd132pd/ps /r (66),(VEX)
+9d: vfnmadd132sd/ss /r (66),(VEX),(o128)
+9e: vfnmsub132pd/ps /r (66),(VEX)
+9f: vfnmsub132sd/ss /r (66),(VEX),(o128)
+a6: vfmaddsub213pd/ps /r (66),(VEX)
+a7: vfmsubadd213pd/ps /r (66),(VEX)
+a8: vfmadd213pd/ps /r (66),(VEX)
+a9: vfmadd213sd/ss /r (66),(VEX),(o128)
+aa: vfmsub213pd/ps /r (66),(VEX)
+ab: vfmsub213sd/ss /r (66),(VEX),(o128)
+ac: vfnmadd213pd/ps /r (66),(VEX)
+ad: vfnmadd213sd/ss /r (66),(VEX),(o128)
+ae: vfnmsub213pd/ps /r (66),(VEX)
+af: vfnmsub213sd/ss /r (66),(VEX),(o128)
+b6: vfmaddsub231pd/ps /r (66),(VEX)
+b7: vfmsubadd231pd/ps /r (66),(VEX)
+b8: vfmadd231pd/ps /r (66),(VEX)
+b9: vfmadd231sd/ss /r (66),(VEX),(o128)
+ba: vfmsub231pd/ps /r (66),(VEX)
+bb: vfmsub231sd/ss /r (66),(VEX),(o128)
+bc: vfnmadd231pd/ps /r (66),(VEX)
+bd: vfnmadd231sd/ss /r (66),(VEX),(o128)
+be: vfnmsub231pd/ps /r (66),(VEX)
+bf: vfnmsub231sd/ss /r (66),(VEX),(o128)
# 0x0f 0x38 0xc0-0xff
-db: VAESIMC Vdq,Wdq (66),(v1)
-dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
-dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
-de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
-df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
-f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2)
-f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2)
-f3: ANDN Gy,By,Ey (v)
-f4: Grp17 (1A)
-f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
-f6: MULX By,Gy,rDX,Ey (F2),(v)
-f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
+db: aesimc Vdq,Wdq (66),(VEX),(o128)
+dc: aesenc Vdq,Wdq (66),(VEX),(o128)
+dd: aesenclast Vdq,Wdq (66),(VEX),(o128)
+de: aesdec Vdq,Wdq (66),(VEX),(o128)
+df: aesdeclast Vdq,Wdq (66),(VEX),(o128)
+f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2)
+f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2)
EndTable
Table: 3-byte opcode 2 (0x0f 0x3a)
Referrer: 3-byte escape 2
AVXcode: 3
# 0x0f 0x3a 0x00-0xff
-00: vpermq Vqq,Wqq,Ib (66),(v)
-01: vpermpd Vqq,Wqq,Ib (66),(v)
-02: vpblendd Vx,Hx,Wx,Ib (66),(v)
-03:
-04: vpermilps Vx,Wx,Ib (66),(v)
-05: vpermilpd Vx,Wx,Ib (66),(v)
-06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
-07:
-08: vroundps Vx,Wx,Ib (66)
-09: vroundpd Vx,Wx,Ib (66)
-0a: vroundss Vss,Wss,Ib (66),(v1)
-0b: vroundsd Vsd,Wsd,Ib (66),(v1)
-0c: vblendps Vx,Hx,Wx,Ib (66)
-0d: vblendpd Vx,Hx,Wx,Ib (66)
-0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
-0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1)
-14: vpextrb Rd/Mb,Vdq,Ib (66),(v1)
-15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
-16: vpextrd/q Ey,Vdq,Ib (66),(v1)
-17: vextractps Ed,Vdq,Ib (66),(v1)
-18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
-19: vextractf128 Wdq,Vqq,Ib (66),(v)
-1d: vcvtps2ph Wx,Vx,Ib (66),(v)
-20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
-21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
-22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
-38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
-39: vextracti128 Wdq,Vqq,Ib (66),(v)
-40: vdpps Vx,Hx,Wx,Ib (66)
-41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
-42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
-44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
-46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
-4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
-4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
-4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
-60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
-61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
-62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
-63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
-df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
-f0: RORX Gy,Ey,Ib (F2),(v)
+04: vpermilps /r,Ib (66),(oVEX)
+05: vpermilpd /r,Ib (66),(oVEX)
+06: vperm2f128 /r,Ib (66),(oVEX),(o256)
+08: roundps Vdq,Wdq,Ib (66),(VEX)
+09: roundpd Vdq,Wdq,Ib (66),(VEX)
+0a: roundss Vss,Wss,Ib (66),(VEX),(o128)
+0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128)
+0c: blendps Vdq,Wdq,Ib (66),(VEX)
+0d: blendpd Vdq,Wdq,Ib (66),(VEX)
+0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128)
+0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128)
+14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128)
+15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128)
+16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128)
+17: extractps Ed,Vdq,Ib (66),(VEX),(o128)
+18: vinsertf128 /r,Ib (66),(oVEX),(o256)
+19: vextractf128 /r,Ib (66),(oVEX),(o256)
+20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128)
+21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128)
+22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128)
+40: dpps Vdq,Wdq,Ib (66),(VEX)
+41: dppd Vdq,Wdq,Ib (66),(VEX),(o128)
+42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128)
+44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128)
+4a: vblendvps /r,Ib (66),(oVEX)
+4b: vblendvpd /r,Ib (66),(oVEX)
+4c: vpblendvb /r,Ib (66),(oVEX),(o128)
+60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128)
+61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128)
+62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128)
+63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128)
+df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128)
EndTable
GrpTable: Grp1
@@ -843,7 +792,7 @@ GrpTable: Grp5
2: CALLN Ev (f64)
3: CALLF Ep
4: JMPN Ev (f64)
-5: JMPF Mp
+5: JMPF Ep
6: PUSH Ev (d64)
7:
EndTable
@@ -860,7 +809,7 @@ EndTable
GrpTable: Grp7
0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001)
-2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B)
+2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B)
3: LIDT Ms
4: SMSW Mw/Rv
5:
@@ -877,45 +826,44 @@ EndTable
GrpTable: Grp9
1: CMPXCHG8B/16B Mq/Mdq
-6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
-7: VMPTRST Mq | VMPTRST Mq (F3)
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3)
+7: VMPTRST Mq
EndTable
GrpTable: Grp10
EndTable
GrpTable: Grp11
-# Note: the operands are given by group opcode
0: MOV
EndTable
GrpTable: Grp12
-2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1)
-4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1)
-6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1)
+2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128)
+4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128)
+6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128)
EndTable
GrpTable: Grp13
-2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
-4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
-6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
+2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128)
+4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128)
+6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128)
EndTable
GrpTable: Grp14
-2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1)
-3: vpsrldq Hx,Ux,Ib (66),(11B),(v1)
-6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1)
-7: vpslldq Hx,Ux,Ib (66),(11B),(v1)
+2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128)
+3: psrldq Udq,Ib (66),(11B),(VEX),(o128)
+6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128)
+7: pslldq Udq,Ib (66),(11B),(VEX),(o128)
EndTable
GrpTable: Grp15
-0: fxsave | RDFSBASE Ry (F3),(11B)
-1: fxstor | RDGSBASE Ry (F3),(11B)
-2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
-3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
+0: fxsave
+1: fxstor
+2: ldmxcsr (VEX)
+3: stmxcsr (VEX)
4: XSAVE
5: XRSTOR | lfence (11B)
-6: XSAVEOPT | mfence (11B)
+6: mfence (11B)
7: clflush | sfence (11B)
EndTable
@@ -926,12 +874,6 @@ GrpTable: Grp16
3: prefetch T2
EndTable
-GrpTable: Grp17
-1: BLSR By,Ey (v)
-2: BLSMSK By,Ey (v)
-3: BLSI By,Ey (v)
-EndTable
-
# AMD's Prefetch Group
GrpTable: GrpP
0: PREFETCH
diff --git a/trunk/arch/x86/mm/Makefile b/trunk/arch/x86/mm/Makefile
index 23d8e5fecf76..3d11327c9ab4 100644
--- a/trunk/arch/x86/mm/Makefile
+++ b/trunk/arch/x86/mm/Makefile
@@ -27,4 +27,6 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o
obj-$(CONFIG_ACPI_NUMA) += srat.o
obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
+obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
+
obj-$(CONFIG_MEMTEST) += memtest.o
diff --git a/trunk/arch/x86/mm/extable.c b/trunk/arch/x86/mm/extable.c
index 1fb85dbe390a..d0474ad2a6e5 100644
--- a/trunk/arch/x86/mm/extable.c
+++ b/trunk/arch/x86/mm/extable.c
@@ -25,7 +25,7 @@ int fixup_exception(struct pt_regs *regs)
if (fixup) {
/* If fixup is less than 16, it means uaccess error */
if (fixup->fixup < 16) {
- current_thread_info()->uaccess_err = 1;
+ current_thread_info()->uaccess_err = -EFAULT;
regs->ip += fixup->fixup;
return 1;
}
diff --git a/trunk/arch/x86/mm/fault.c b/trunk/arch/x86/mm/fault.c
index 9d74824a708d..5db0490deb07 100644
--- a/trunk/arch/x86/mm/fault.c
+++ b/trunk/arch/x86/mm/fault.c
@@ -626,7 +626,7 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code,
static noinline void
no_context(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, int signal, int si_code)
+ unsigned long address)
{
struct task_struct *tsk = current;
unsigned long *stackend;
@@ -634,17 +634,8 @@ no_context(struct pt_regs *regs, unsigned long error_code,
int sig;
/* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs)) {
- if (current_thread_info()->sig_on_uaccess_error && signal) {
- tsk->thread.trap_no = 14;
- tsk->thread.error_code = error_code | PF_USER;
- tsk->thread.cr2 = address;
-
- /* XXX: hwpoison faults will set the wrong code. */
- force_sig_info_fault(signal, si_code, address, tsk, 0);
- }
+ if (fixup_exception(regs))
return;
- }
/*
* 32-bit:
@@ -764,7 +755,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
if (is_f00f_bug(regs, address))
return;
- no_context(regs, error_code, address, SIGSEGV, si_code);
+ no_context(regs, error_code, address);
}
static noinline void
@@ -828,7 +819,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
/* Kernel mode? Handle exceptions or die: */
if (!(error_code & PF_USER)) {
- no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
+ no_context(regs, error_code, address);
return;
}
@@ -863,7 +854,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
if (!(fault & VM_FAULT_RETRY))
up_read(¤t->mm->mmap_sem);
if (!(error_code & PF_USER))
- no_context(regs, error_code, address, 0, 0);
+ no_context(regs, error_code, address);
return 1;
}
if (!(fault & VM_FAULT_ERROR))
@@ -873,8 +864,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
/* Kernel mode? Handle exceptions or die: */
if (!(error_code & PF_USER)) {
up_read(¤t->mm->mmap_sem);
- no_context(regs, error_code, address,
- SIGSEGV, SEGV_MAPERR);
+ no_context(regs, error_code, address);
return 1;
}
diff --git a/trunk/arch/x86/mm/init.c b/trunk/arch/x86/mm/init.c
index a298914058f9..87488b93a65c 100644
--- a/trunk/arch/x86/mm/init.c
+++ b/trunk/arch/x86/mm/init.c
@@ -67,7 +67,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
good_end = max_pfn_mapped << PAGE_SHIFT;
base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
- if (!base)
+ if (base == MEMBLOCK_ERROR)
panic("Cannot find space for the kernel page tables");
pgt_buf_start = base >> PAGE_SHIFT;
@@ -80,7 +80,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
void __init native_pagetable_reserve(u64 start, u64 end)
{
- memblock_reserve(start, end - start);
+ memblock_x86_reserve_range(start, end, "PGTABLE");
}
struct map_range {
@@ -279,8 +279,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
* pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
* so that they can be reused for other purposes.
*
- * On native it just means calling memblock_reserve, on Xen it also
- * means marking RW the pagetable pages that we allocated before
+ * On native it just means calling memblock_x86_reserve_range, on Xen it
+ * also means marking RW the pagetable pages that we allocated before
* but that haven't been used.
*
* In fact on xen we mark RO the whole range pgt_buf_start -
diff --git a/trunk/arch/x86/mm/init_32.c b/trunk/arch/x86/mm/init_32.c
index 0c1da394a634..29f7c6d98179 100644
--- a/trunk/arch/x86/mm/init_32.c
+++ b/trunk/arch/x86/mm/init_32.c
@@ -427,17 +427,23 @@ static void __init add_one_highpage_init(struct page *page)
void __init add_highpages_with_active_regions(int nid,
unsigned long start_pfn, unsigned long end_pfn)
{
- phys_addr_t start, end;
- u64 i;
-
- for_each_free_mem_range(i, nid, &start, &end, NULL) {
- unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
- start_pfn, end_pfn);
- unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),
- start_pfn, end_pfn);
- for ( ; pfn < e_pfn; pfn++)
- if (pfn_valid(pfn))
- add_one_highpage_init(pfn_to_page(pfn));
+ struct range *range;
+ int nr_range;
+ int i;
+
+ nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn);
+
+ for (i = 0; i < nr_range; i++) {
+ struct page *page;
+ int node_pfn;
+
+ for (node_pfn = range[i].start; node_pfn < range[i].end;
+ node_pfn++) {
+ if (!pfn_valid(node_pfn))
+ continue;
+ page = pfn_to_page(node_pfn);
+ add_one_highpage_init(page);
+ }
}
}
#else
@@ -644,18 +650,18 @@ void __init initmem_init(void)
highstart_pfn = highend_pfn = max_pfn;
if (max_pfn > max_low_pfn)
highstart_pfn = max_low_pfn;
+ memblock_x86_register_active_regions(0, 0, highend_pfn);
+ sparse_memory_present_with_active_regions(0);
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
pages_to_mb(highend_pfn - highstart_pfn));
num_physpages = highend_pfn;
high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
#else
+ memblock_x86_register_active_regions(0, 0, max_low_pfn);
+ sparse_memory_present_with_active_regions(0);
num_physpages = max_low_pfn;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
#endif
-
- memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
- sparse_memory_present_with_active_regions(0);
-
#ifdef CONFIG_FLATMEM
max_mapnr = num_physpages;
#endif
diff --git a/trunk/arch/x86/mm/init_64.c b/trunk/arch/x86/mm/init_64.c
index a8a56ce3a962..bbaaa005bf0e 100644
--- a/trunk/arch/x86/mm/init_64.c
+++ b/trunk/arch/x86/mm/init_64.c
@@ -608,7 +608,7 @@ kernel_physical_mapping_init(unsigned long start,
#ifndef CONFIG_NUMA
void __init initmem_init(void)
{
- memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
+ memblock_x86_register_active_regions(0, 0, max_pfn);
}
#endif
diff --git a/trunk/arch/x86/mm/memblock.c b/trunk/arch/x86/mm/memblock.c
new file mode 100644
index 000000000000..992da5ec5a64
--- /dev/null
+++ b/trunk/arch/x86/mm/memblock.c
@@ -0,0 +1,348 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+/* Check for already reserved areas */
+bool __init memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align)
+{
+ struct memblock_region *r;
+ u64 addr = *addrp, last;
+ u64 size = *sizep;
+ bool changed = false;
+
+again:
+ last = addr + size;
+ for_each_memblock(reserved, r) {
+ if (last > r->base && addr < r->base) {
+ size = r->base - addr;
+ changed = true;
+ goto again;
+ }
+ if (last > (r->base + r->size) && addr < (r->base + r->size)) {
+ addr = round_up(r->base + r->size, align);
+ size = last - addr;
+ changed = true;
+ goto again;
+ }
+ if (last <= (r->base + r->size) && addr >= r->base) {
+ *sizep = 0;
+ return false;
+ }
+ }
+ if (changed) {
+ *addrp = addr;
+ *sizep = size;
+ }
+ return changed;
+}
+
+/*
+ * Find next free range after start, and size is returned in *sizep
+ */
+u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align)
+{
+ struct memblock_region *r;
+
+ for_each_memblock(memory, r) {
+ u64 ei_start = r->base;
+ u64 ei_last = ei_start + r->size;
+ u64 addr;
+
+ addr = round_up(ei_start, align);
+ if (addr < start)
+ addr = round_up(start, align);
+ if (addr >= ei_last)
+ continue;
+ *sizep = ei_last - addr;
+ while (memblock_x86_check_reserved_size(&addr, sizep, align))
+ ;
+
+ if (*sizep)
+ return addr;
+ }
+
+ return MEMBLOCK_ERROR;
+}
+
+static __init struct range *find_range_array(int count)
+{
+ u64 end, size, mem;
+ struct range *range;
+
+ size = sizeof(struct range) * count;
+ end = memblock.current_limit;
+
+ mem = memblock_find_in_range(0, end, size, sizeof(struct range));
+ if (mem == MEMBLOCK_ERROR)
+ panic("can not find more space for range array");
+
+ /*
+ * This range is tempoaray, so don't reserve it, it will not be
+ * overlapped because We will not alloccate new buffer before
+ * We discard this one
+ */
+ range = __va(mem);
+ memset(range, 0, size);
+
+ return range;
+}
+
+static void __init memblock_x86_subtract_reserved(struct range *range, int az)
+{
+ u64 final_start, final_end;
+ struct memblock_region *r;
+
+ /* Take out region array itself at first*/
+ memblock_free_reserved_regions();
+
+ memblock_dbg("Subtract (%ld early reservations)\n", memblock.reserved.cnt);
+
+ for_each_memblock(reserved, r) {
+ memblock_dbg(" [%010llx-%010llx]\n", (u64)r->base, (u64)r->base + r->size - 1);
+ final_start = PFN_DOWN(r->base);
+ final_end = PFN_UP(r->base + r->size);
+ if (final_start >= final_end)
+ continue;
+ subtract_range(range, az, final_start, final_end);
+ }
+
+ /* Put region array back ? */
+ memblock_reserve_reserved_regions();
+}
+
+struct count_data {
+ int nr;
+};
+
+static int __init count_work_fn(unsigned long start_pfn,
+ unsigned long end_pfn, void *datax)
+{
+ struct count_data *data = datax;
+
+ data->nr++;
+
+ return 0;
+}
+
+static int __init count_early_node_map(int nodeid)
+{
+ struct count_data data;
+
+ data.nr = 0;
+ work_with_active_regions(nodeid, count_work_fn, &data);
+
+ return data.nr;
+}
+
+int __init __get_free_all_memory_range(struct range **rangep, int nodeid,
+ unsigned long start_pfn, unsigned long end_pfn)
+{
+ int count;
+ struct range *range;
+ int nr_range;
+
+ count = (memblock.reserved.cnt + count_early_node_map(nodeid)) * 2;
+
+ range = find_range_array(count);
+ nr_range = 0;
+
+ /*
+ * Use early_node_map[] and memblock.reserved.region to get range array
+ * at first
+ */
+ nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
+ subtract_range(range, count, 0, start_pfn);
+ subtract_range(range, count, end_pfn, -1ULL);
+
+ memblock_x86_subtract_reserved(range, count);
+ nr_range = clean_sort_range(range, count);
+
+ *rangep = range;
+ return nr_range;
+}
+
+int __init get_free_all_memory_range(struct range **rangep, int nodeid)
+{
+ unsigned long end_pfn = -1UL;
+
+#ifdef CONFIG_X86_32
+ end_pfn = max_low_pfn;
+#endif
+ return __get_free_all_memory_range(rangep, nodeid, 0, end_pfn);
+}
+
+static u64 __init __memblock_x86_memory_in_range(u64 addr, u64 limit, bool get_free)
+{
+ int i, count;
+ struct range *range;
+ int nr_range;
+ u64 final_start, final_end;
+ u64 free_size;
+ struct memblock_region *r;
+
+ count = (memblock.reserved.cnt + memblock.memory.cnt) * 2;
+
+ range = find_range_array(count);
+ nr_range = 0;
+
+ addr = PFN_UP(addr);
+ limit = PFN_DOWN(limit);
+
+ for_each_memblock(memory, r) {
+ final_start = PFN_UP(r->base);
+ final_end = PFN_DOWN(r->base + r->size);
+ if (final_start >= final_end)
+ continue;
+ if (final_start >= limit || final_end <= addr)
+ continue;
+
+ nr_range = add_range(range, count, nr_range, final_start, final_end);
+ }
+ subtract_range(range, count, 0, addr);
+ subtract_range(range, count, limit, -1ULL);
+
+ /* Subtract memblock.reserved.region in range ? */
+ if (!get_free)
+ goto sort_and_count_them;
+ for_each_memblock(reserved, r) {
+ final_start = PFN_DOWN(r->base);
+ final_end = PFN_UP(r->base + r->size);
+ if (final_start >= final_end)
+ continue;
+ if (final_start >= limit || final_end <= addr)
+ continue;
+
+ subtract_range(range, count, final_start, final_end);
+ }
+
+sort_and_count_them:
+ nr_range = clean_sort_range(range, count);
+
+ free_size = 0;
+ for (i = 0; i < nr_range; i++)
+ free_size += range[i].end - range[i].start;
+
+ return free_size << PAGE_SHIFT;
+}
+
+u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit)
+{
+ return __memblock_x86_memory_in_range(addr, limit, true);
+}
+
+u64 __init memblock_x86_memory_in_range(u64 addr, u64 limit)
+{
+ return __memblock_x86_memory_in_range(addr, limit, false);
+}
+
+void __init memblock_x86_reserve_range(u64 start, u64 end, char *name)
+{
+ if (start == end)
+ return;
+
+ if (WARN_ONCE(start > end, "memblock_x86_reserve_range: wrong range [%#llx, %#llx)\n", start, end))
+ return;
+
+ memblock_dbg(" memblock_x86_reserve_range: [%#010llx-%#010llx] %16s\n", start, end - 1, name);
+
+ memblock_reserve(start, end - start);
+}
+
+void __init memblock_x86_free_range(u64 start, u64 end)
+{
+ if (start == end)
+ return;
+
+ if (WARN_ONCE(start > end, "memblock_x86_free_range: wrong range [%#llx, %#llx)\n", start, end))
+ return;
+
+ memblock_dbg(" memblock_x86_free_range: [%#010llx-%#010llx]\n", start, end - 1);
+
+ memblock_free(start, end - start);
+}
+
+/*
+ * Need to call this function after memblock_x86_register_active_regions,
+ * so early_node_map[] is filled already.
+ */
+u64 __init memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align)
+{
+ u64 addr;
+ addr = find_memory_core_early(nid, size, align, start, end);
+ if (addr != MEMBLOCK_ERROR)
+ return addr;
+
+ /* Fallback, should already have start end within node range */
+ return memblock_find_in_range(start, end, size, align);
+}
+
+/*
+ * Finds an active region in the address range from start_pfn to last_pfn and
+ * returns its range in ei_startpfn and ei_endpfn for the memblock entry.
+ */
+static int __init memblock_x86_find_active_region(const struct memblock_region *ei,
+ unsigned long start_pfn,
+ unsigned long last_pfn,
+ unsigned long *ei_startpfn,
+ unsigned long *ei_endpfn)
+{
+ u64 align = PAGE_SIZE;
+
+ *ei_startpfn = round_up(ei->base, align) >> PAGE_SHIFT;
+ *ei_endpfn = round_down(ei->base + ei->size, align) >> PAGE_SHIFT;
+
+ /* Skip map entries smaller than a page */
+ if (*ei_startpfn >= *ei_endpfn)
+ return 0;
+
+ /* Skip if map is outside the node */
+ if (*ei_endpfn <= start_pfn || *ei_startpfn >= last_pfn)
+ return 0;
+
+ /* Check for overlaps */
+ if (*ei_startpfn < start_pfn)
+ *ei_startpfn = start_pfn;
+ if (*ei_endpfn > last_pfn)
+ *ei_endpfn = last_pfn;
+
+ return 1;
+}
+
+/* Walk the memblock.memory map and register active regions within a node */
+void __init memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
+ unsigned long last_pfn)
+{
+ unsigned long ei_startpfn;
+ unsigned long ei_endpfn;
+ struct memblock_region *r;
+
+ for_each_memblock(memory, r)
+ if (memblock_x86_find_active_region(r, start_pfn, last_pfn,
+ &ei_startpfn, &ei_endpfn))
+ add_active_range(nid, ei_startpfn, ei_endpfn);
+}
+
+/*
+ * Find the hole size (in bytes) in the memory range.
+ * @start: starting address of the memory range to scan
+ * @end: ending address of the memory range to scan
+ */
+u64 __init memblock_x86_hole_size(u64 start, u64 end)
+{
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long last_pfn = end >> PAGE_SHIFT;
+ unsigned long ei_startpfn, ei_endpfn, ram = 0;
+ struct memblock_region *r;
+
+ for_each_memblock(memory, r)
+ if (memblock_x86_find_active_region(r, start_pfn, last_pfn,
+ &ei_startpfn, &ei_endpfn))
+ ram += ei_endpfn - ei_startpfn;
+
+ return end - start - ((u64)ram << PAGE_SHIFT);
+}
diff --git a/trunk/arch/x86/mm/memtest.c b/trunk/arch/x86/mm/memtest.c
index c80b9fb95734..92faf3a1c53e 100644
--- a/trunk/arch/x86/mm/memtest.c
+++ b/trunk/arch/x86/mm/memtest.c
@@ -34,7 +34,7 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
(unsigned long long) pattern,
(unsigned long long) start_bad,
(unsigned long long) end_bad);
- memblock_reserve(start_bad, end_bad - start_bad);
+ memblock_x86_reserve_range(start_bad, end_bad, "BAD RAM");
}
static void __init memtest(u64 pattern, u64 start_phys, u64 size)
@@ -70,19 +70,24 @@ static void __init memtest(u64 pattern, u64 start_phys, u64 size)
static void __init do_one_pass(u64 pattern, u64 start, u64 end)
{
- u64 i;
- phys_addr_t this_start, this_end;
-
- for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) {
- this_start = clamp_t(phys_addr_t, this_start, start, end);
- this_end = clamp_t(phys_addr_t, this_end, start, end);
- if (this_start < this_end) {
- printk(KERN_INFO " %010llx - %010llx pattern %016llx\n",
- (unsigned long long)this_start,
- (unsigned long long)this_end,
- (unsigned long long)cpu_to_be64(pattern));
- memtest(pattern, this_start, this_end - this_start);
- }
+ u64 size = 0;
+
+ while (start < end) {
+ start = memblock_x86_find_in_range_size(start, &size, 1);
+
+ /* done ? */
+ if (start >= end)
+ break;
+ if (start + size > end)
+ size = end - start;
+
+ printk(KERN_INFO " %010llx - %010llx pattern %016llx\n",
+ (unsigned long long) start,
+ (unsigned long long) start + size,
+ (unsigned long long) cpu_to_be64(pattern));
+ memtest(pattern, start, size);
+
+ start += size;
}
}
diff --git a/trunk/arch/x86/mm/numa.c b/trunk/arch/x86/mm/numa.c
index 496f494593bf..fbeaaf416610 100644
--- a/trunk/arch/x86/mm/numa.c
+++ b/trunk/arch/x86/mm/numa.c
@@ -192,6 +192,8 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
/* Initialize NODE_DATA for a node on the local memory */
static void __init setup_node_data(int nid, u64 start, u64 end)
{
+ const u64 nd_low = PFN_PHYS(MAX_DMA_PFN);
+ const u64 nd_high = PFN_PHYS(max_pfn_mapped);
const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
bool remapped = false;
u64 nd_pa;
@@ -222,12 +224,17 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
nd_pa = __pa(nd);
remapped = true;
} else {
- nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
- if (!nd_pa) {
+ nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high,
+ nd_size, SMP_CACHE_BYTES);
+ if (nd_pa == MEMBLOCK_ERROR)
+ nd_pa = memblock_find_in_range(nd_low, nd_high,
+ nd_size, SMP_CACHE_BYTES);
+ if (nd_pa == MEMBLOCK_ERROR) {
pr_err("Cannot find %zu bytes in node %d\n",
nd_size, nid);
return;
}
+ memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
nd = __va(nd_pa);
}
@@ -364,7 +371,8 @@ void __init numa_reset_distance(void)
/* numa_distance could be 1LU marking allocation failure, test cnt */
if (numa_distance_cnt)
- memblock_free(__pa(numa_distance), size);
+ memblock_x86_free_range(__pa(numa_distance),
+ __pa(numa_distance) + size);
numa_distance_cnt = 0;
numa_distance = NULL; /* enable table creation */
}
@@ -387,13 +395,13 @@ static int __init numa_alloc_distance(void)
phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
size, PAGE_SIZE);
- if (!phys) {
+ if (phys == MEMBLOCK_ERROR) {
pr_warning("NUMA: Warning: can't allocate distance table!\n");
/* don't retry until explicitly reset */
numa_distance = (void *)1LU;
return -ENOMEM;
}
- memblock_reserve(phys, size);
+ memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
numa_distance = __va(phys);
numa_distance_cnt = cnt;
@@ -474,8 +482,8 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
numaram = 0;
}
- e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
-
+ e820ram = max_pfn - (memblock_x86_hole_size(0,
+ PFN_PHYS(max_pfn)) >> PAGE_SHIFT);
/* We seem to lose 3 pages somewhere. Allow 1M of slack. */
if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
@@ -497,10 +505,13 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
if (WARN_ON(nodes_empty(node_possible_map)))
return -EINVAL;
- for (i = 0; i < mi->nr_blks; i++) {
- struct numa_memblk *mb = &mi->blk[i];
- memblock_set_node(mb->start, mb->end - mb->start, mb->nid);
- }
+ for (i = 0; i < mi->nr_blks; i++)
+ memblock_x86_register_active_regions(mi->blk[i].nid,
+ mi->blk[i].start >> PAGE_SHIFT,
+ mi->blk[i].end >> PAGE_SHIFT);
+
+ /* for out of order entries */
+ sort_node_map();
/*
* If sections array is gonna be used for pfn -> nid mapping, check
@@ -534,8 +545,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
setup_node_data(nid, start, end);
}
- /* Dump memblock with node info and return. */
- memblock_dump_all();
return 0;
}
@@ -573,7 +582,7 @@ static int __init numa_init(int (*init_func)(void))
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
memset(&numa_meminfo, 0, sizeof(numa_meminfo));
- WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
+ remove_all_active_ranges();
numa_reset_distance();
ret = init_func();
diff --git a/trunk/arch/x86/mm/numa_32.c b/trunk/arch/x86/mm/numa_32.c
index 534255a36b6b..3adebe7e536a 100644
--- a/trunk/arch/x86/mm/numa_32.c
+++ b/trunk/arch/x86/mm/numa_32.c
@@ -199,23 +199,23 @@ void __init init_alloc_remap(int nid, u64 start, u64 end)
/* allocate node memory and the lowmem remap area */
node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES);
- if (!node_pa) {
+ if (node_pa == MEMBLOCK_ERROR) {
pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
size, nid);
return;
}
- memblock_reserve(node_pa, size);
+ memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
max_low_pfn << PAGE_SHIFT,
size, LARGE_PAGE_BYTES);
- if (!remap_pa) {
+ if (remap_pa == MEMBLOCK_ERROR) {
pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
size, nid);
- memblock_free(node_pa, size);
+ memblock_x86_free_range(node_pa, node_pa + size);
return;
}
- memblock_reserve(remap_pa, size);
+ memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG");
remap_va = phys_to_virt(remap_pa);
/* perform actual remap */
diff --git a/trunk/arch/x86/mm/numa_64.c b/trunk/arch/x86/mm/numa_64.c
index 92e27119ee1a..dd27f401f0a0 100644
--- a/trunk/arch/x86/mm/numa_64.c
+++ b/trunk/arch/x86/mm/numa_64.c
@@ -19,7 +19,7 @@ unsigned long __init numa_free_all_bootmem(void)
for_each_online_node(i)
pages += free_all_bootmem_node(NODE_DATA(i));
- pages += free_low_memory_core_early(MAX_NUMNODES);
+ pages += free_all_memory_core_early(MAX_NUMNODES);
return pages;
}
diff --git a/trunk/arch/x86/mm/numa_emulation.c b/trunk/arch/x86/mm/numa_emulation.c
index 46db56845f18..d0ed086b6247 100644
--- a/trunk/arch/x86/mm/numa_emulation.c
+++ b/trunk/arch/x86/mm/numa_emulation.c
@@ -28,16 +28,6 @@ static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
return -ENOENT;
}
-static u64 mem_hole_size(u64 start, u64 end)
-{
- unsigned long start_pfn = PFN_UP(start);
- unsigned long end_pfn = PFN_DOWN(end);
-
- if (start_pfn < end_pfn)
- return PFN_PHYS(absent_pages_in_range(start_pfn, end_pfn));
- return 0;
-}
-
/*
* Sets up nid to range from @start to @end. The return value is -errno if
* something went wrong, 0 otherwise.
@@ -99,7 +89,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
* Calculate target node size. x86_32 freaks on __udivdi3() so do
* the division in ulong number of pages and convert back.
*/
- size = max_addr - addr - mem_hole_size(addr, max_addr);
+ size = max_addr - addr - memblock_x86_hole_size(addr, max_addr);
size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes);
/*
@@ -145,7 +135,8 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
* Continue to add memory to this fake node if its
* non-reserved memory is less than the per-node size.
*/
- while (end - start - mem_hole_size(start, end) < size) {
+ while (end - start -
+ memblock_x86_hole_size(start, end) < size) {
end += FAKE_NODE_MIN_SIZE;
if (end > limit) {
end = limit;
@@ -159,7 +150,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
* this one must extend to the boundary.
*/
if (end < dma32_end && dma32_end - end -
- mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
+ memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
end = dma32_end;
/*
@@ -167,7 +158,8 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
* next node, this one must extend to the end of the
* physical node.
*/
- if (limit - end - mem_hole_size(end, limit) < size)
+ if (limit - end -
+ memblock_x86_hole_size(end, limit) < size)
end = limit;
ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
@@ -188,7 +180,7 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
{
u64 end = start + size;
- while (end - start - mem_hole_size(start, end) < size) {
+ while (end - start - memblock_x86_hole_size(start, end) < size) {
end += FAKE_NODE_MIN_SIZE;
if (end > max_addr) {
end = max_addr;
@@ -219,7 +211,8 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
* creates a uniform distribution of node sizes across the entire
* machine (but not necessarily over physical nodes).
*/
- min_size = (max_addr - addr - mem_hole_size(addr, max_addr)) / MAX_NUMNODES;
+ min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) /
+ MAX_NUMNODES;
min_size = max(min_size, FAKE_NODE_MIN_SIZE);
if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
min_size = (min_size + FAKE_NODE_MIN_SIZE) &
@@ -259,7 +252,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
* this one must extend to the boundary.
*/
if (end < dma32_end && dma32_end - end -
- mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
+ memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
end = dma32_end;
/*
@@ -267,7 +260,8 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
* next node, this one must extend to the end of the
* physical node.
*/
- if (limit - end - mem_hole_size(end, limit) < size)
+ if (limit - end -
+ memblock_x86_hole_size(end, limit) < size)
end = limit;
ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
@@ -357,11 +351,11 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
phys_size, PAGE_SIZE);
- if (!phys) {
+ if (phys == MEMBLOCK_ERROR) {
pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
goto no_emu;
}
- memblock_reserve(phys, phys_size);
+ memblock_x86_reserve_range(phys, phys + phys_size, "TMP NUMA DIST");
phys_dist = __va(phys);
for (i = 0; i < numa_dist_cnt; i++)
@@ -430,7 +424,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
/* free the copied physical distance table */
if (phys_dist)
- memblock_free(__pa(phys_dist), phys_size);
+ memblock_x86_free_range(__pa(phys_dist), __pa(phys_dist) + phys_size);
return;
no_emu:
diff --git a/trunk/arch/x86/mm/pageattr.c b/trunk/arch/x86/mm/pageattr.c
index eda2acbb6e81..f9e526742fa1 100644
--- a/trunk/arch/x86/mm/pageattr.c
+++ b/trunk/arch/x86/mm/pageattr.c
@@ -998,7 +998,7 @@ int set_memory_uc(unsigned long addr, int numpages)
}
EXPORT_SYMBOL(set_memory_uc);
-static int _set_memory_array(unsigned long *addr, int addrinarray,
+int _set_memory_array(unsigned long *addr, int addrinarray,
unsigned long new_type)
{
int i, j;
diff --git a/trunk/arch/x86/mm/srat.c b/trunk/arch/x86/mm/srat.c
index fd61b3fb7341..81dbfdeb080d 100644
--- a/trunk/arch/x86/mm/srat.c
+++ b/trunk/arch/x86/mm/srat.c
@@ -69,12 +69,6 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
return;
pxm = pa->proximity_domain;
- apic_id = pa->apic_id;
- if (!cpu_has_x2apic && (apic_id >= 0xff)) {
- printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n",
- pxm, apic_id);
- return;
- }
node = setup_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
@@ -82,6 +76,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
return;
}
+ apic_id = pa->apic_id;
if (apic_id >= MAX_LOCAL_APIC) {
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
return;
diff --git a/trunk/arch/x86/net/bpf_jit_comp.c b/trunk/arch/x86/net/bpf_jit_comp.c
index 7b65f752c5f8..bfab3fa10edc 100644
--- a/trunk/arch/x86/net/bpf_jit_comp.c
+++ b/trunk/arch/x86/net/bpf_jit_comp.c
@@ -568,8 +568,8 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
break;
}
if (filter[i].jt != 0) {
- if (filter[i].jf && f_offset)
- t_offset += is_near(f_offset) ? 2 : 5;
+ if (filter[i].jf)
+ t_offset += is_near(f_offset) ? 2 : 6;
EMIT_COND_JMP(t_op, t_offset);
if (filter[i].jf)
EMIT_JMP(f_offset);
diff --git a/trunk/arch/x86/oprofile/Makefile b/trunk/arch/x86/oprofile/Makefile
index 1599f568f0e2..446902b2a6b6 100644
--- a/trunk/arch/x86/oprofile/Makefile
+++ b/trunk/arch/x86/oprofile/Makefile
@@ -4,8 +4,9 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprof.o cpu_buffer.o buffer_sync.o \
event_buffer.o oprofile_files.o \
oprofilefs.o oprofile_stats.o \
- timer_int.o nmi_timer_int.o )
+ timer_int.o )
oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \
op_model_ppro.o op_model_p4.o
+oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o
diff --git a/trunk/arch/x86/oprofile/init.c b/trunk/arch/x86/oprofile/init.c
index 9e138d00ad36..f148cf652678 100644
--- a/trunk/arch/x86/oprofile/init.c
+++ b/trunk/arch/x86/oprofile/init.c
@@ -16,23 +16,37 @@
* with the NMI mode driver.
*/
-#ifdef CONFIG_X86_LOCAL_APIC
extern int op_nmi_init(struct oprofile_operations *ops);
+extern int op_nmi_timer_init(struct oprofile_operations *ops);
extern void op_nmi_exit(void);
-#else
-static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; }
-static void op_nmi_exit(void) { }
-#endif
-
extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
+static int nmi_timer;
+
int __init oprofile_arch_init(struct oprofile_operations *ops)
{
+ int ret;
+
+ ret = -ENODEV;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ ret = op_nmi_init(ops);
+#endif
+ nmi_timer = (ret != 0);
+#ifdef CONFIG_X86_IO_APIC
+ if (nmi_timer)
+ ret = op_nmi_timer_init(ops);
+#endif
ops->backtrace = x86_backtrace;
- return op_nmi_init(ops);
+
+ return ret;
}
+
void oprofile_arch_exit(void)
{
- op_nmi_exit();
+#ifdef CONFIG_X86_LOCAL_APIC
+ if (!nmi_timer)
+ op_nmi_exit();
+#endif
}
diff --git a/trunk/arch/x86/oprofile/nmi_int.c b/trunk/arch/x86/oprofile/nmi_int.c
index 26b8a8514ee5..75f9528e0372 100644
--- a/trunk/arch/x86/oprofile/nmi_int.c
+++ b/trunk/arch/x86/oprofile/nmi_int.c
@@ -595,36 +595,24 @@ static int __init p4_init(char **cpu_type)
return 0;
}
-enum __force_cpu_type {
- reserved = 0, /* do not force */
- timer,
- arch_perfmon,
-};
-
-static int force_cpu_type;
-
-static int set_cpu_type(const char *str, struct kernel_param *kp)
+static int force_arch_perfmon;
+static int force_cpu_type(const char *str, struct kernel_param *kp)
{
- if (!strcmp(str, "timer")) {
- force_cpu_type = timer;
- printk(KERN_INFO "oprofile: forcing NMI timer mode\n");
- } else if (!strcmp(str, "arch_perfmon")) {
- force_cpu_type = arch_perfmon;
+ if (!strcmp(str, "arch_perfmon")) {
+ force_arch_perfmon = 1;
printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
- } else {
- force_cpu_type = 0;
}
return 0;
}
-module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
+module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
static int __init ppro_init(char **cpu_type)
{
__u8 cpu_model = boot_cpu_data.x86_model;
struct op_x86_model_spec *spec = &op_ppro_spec; /* default */
- if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon)
+ if (force_arch_perfmon && cpu_has_arch_perfmon)
return 0;
/*
@@ -691,9 +679,6 @@ int __init op_nmi_init(struct oprofile_operations *ops)
if (!cpu_has_apic)
return -ENODEV;
- if (force_cpu_type == timer)
- return -ENODEV;
-
switch (vendor) {
case X86_VENDOR_AMD:
/* Needs to be at least an Athlon (or hammer in 32bit mode) */
diff --git a/trunk/arch/x86/oprofile/nmi_timer_int.c b/trunk/arch/x86/oprofile/nmi_timer_int.c
new file mode 100644
index 000000000000..7f8052cd6620
--- /dev/null
+++ b/trunk/arch/x86/oprofile/nmi_timer_int.c
@@ -0,0 +1,50 @@
+/**
+ * @file nmi_timer_int.c
+ *
+ * @remark Copyright 2003 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Zwane Mwaikambo
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs)
+{
+ oprofile_add_sample(regs, 0);
+ return NMI_HANDLED;
+}
+
+static int timer_start(void)
+{
+ if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify,
+ 0, "oprofile-timer"))
+ return 1;
+ return 0;
+}
+
+
+static void timer_stop(void)
+{
+ unregister_nmi_handler(NMI_LOCAL, "oprofile-timer");
+ synchronize_sched(); /* Allow already-started NMIs to complete. */
+}
+
+
+int __init op_nmi_timer_init(struct oprofile_operations *ops)
+{
+ ops->start = timer_start;
+ ops->stop = timer_stop;
+ ops->cpu_type = "timer";
+ printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
+ return 0;
+}
diff --git a/trunk/arch/x86/platform/efi/efi.c b/trunk/arch/x86/platform/efi/efi.c
index 4cf9bd0a1653..37718f0f053d 100644
--- a/trunk/arch/x86/platform/efi/efi.c
+++ b/trunk/arch/x86/platform/efi/efi.c
@@ -238,8 +238,7 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
spin_lock_irqsave(&rtc_lock, flags);
efi_call_phys_prelog();
- status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
- virt_to_phys(tc));
+ status = efi_call_phys2(efi_phys.get_time, tm, tc);
efi_call_phys_epilog();
spin_unlock_irqrestore(&rtc_lock, flags);
return status;
@@ -353,7 +352,8 @@ void __init efi_memblock_x86_reserve_range(void)
boot_params.efi_info.efi_memdesc_size;
memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
- memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
+ memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size,
+ "EFI memmap");
}
#if EFI_DEBUG
@@ -397,14 +397,16 @@ void __init efi_reserve_boot_services(void)
if ((start+size >= virt_to_phys(_text)
&& start <= virt_to_phys(_end)) ||
!e820_all_mapped(start, start+size, E820_RAM) ||
- memblock_is_region_reserved(start, size)) {
+ memblock_x86_check_reserved_size(&start, &size,
+ 1<num_pages = 0;
memblock_dbg(PFX "Could not reserve boot range "
"[0x%010llx-0x%010llx]\n",
start, start+size-1);
} else
- memblock_reserve(start, size);
+ memblock_x86_reserve_range(start, start+size,
+ "EFI Boot");
}
}
diff --git a/trunk/arch/x86/tools/Makefile b/trunk/arch/x86/tools/Makefile
index d511aa97533a..f82082677337 100644
--- a/trunk/arch/x86/tools/Makefile
+++ b/trunk/arch/x86/tools/Makefile
@@ -18,21 +18,14 @@ chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk
quiet_cmd_posttest = TEST $@
cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose)
-quiet_cmd_sanitytest = TEST $@
- cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000
-
-posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity
+posttest: $(obj)/test_get_len vmlinux
$(call cmd,posttest)
- $(call cmd,sanitytest)
-hostprogs-y += test_get_len insn_sanity
+hostprogs-y := test_get_len
# -I needed for generated C source and C source which in the kernel tree.
HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
-HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
-
# Dependencies are also needed.
$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
-$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
diff --git a/trunk/arch/x86/tools/gen-insn-attr-x86.awk b/trunk/arch/x86/tools/gen-insn-attr-x86.awk
index 5f6a5b6c3a15..eaf11f52fc0b 100644
--- a/trunk/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/trunk/arch/x86/tools/gen-insn-attr-x86.awk
@@ -47,7 +47,7 @@ BEGIN {
sep_expr = "^\\|$"
group_expr = "^Grp[0-9A-Za-z]+"
- imm_expr = "^[IJAOL][a-z]"
+ imm_expr = "^[IJAO][a-z]"
imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
@@ -59,7 +59,6 @@ BEGIN {
imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
imm_flag["Ob"] = "INAT_MOFFSET"
imm_flag["Ov"] = "INAT_MOFFSET"
- imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
force64_expr = "\\([df]64\\)"
@@ -71,12 +70,8 @@ BEGIN {
lprefix3_expr = "\\(F2\\)"
max_lprefix = 4
- # All opcodes starting with lower-case 'v' or with (v1) superscript
- # accepts VEX prefix
- vexok_opcode_expr = "^v.*"
- vexok_expr = "\\(v1\\)"
- # All opcodes with (v) superscript supports *only* VEX prefix
- vexonly_expr = "\\(v\\)"
+ vexok_expr = "\\(VEX\\)"
+ vexonly_expr = "\\(oVEX\\)"
prefix_expr = "\\(Prefix\\)"
prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@@ -90,8 +85,8 @@ BEGIN {
prefix_num["SEG=GS"] = "INAT_PFX_GS"
prefix_num["SEG=SS"] = "INAT_PFX_SS"
prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
- prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
- prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
+ prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2"
+ prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3"
clear_vars()
}
@@ -315,10 +310,12 @@ function convert_operands(count,opnd, i,j,imm,mod)
if (match(opcode, fpu_expr))
flags = add_flags(flags, "INAT_MODRM")
- # check VEX codes
+ # check VEX only code
if (match(ext, vexonly_expr))
flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
- else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
+
+ # check VEX only code
+ if (match(ext, vexok_expr))
flags = add_flags(flags, "INAT_VEXOK")
# check prefixes
diff --git a/trunk/arch/x86/tools/insn_sanity.c b/trunk/arch/x86/tools/insn_sanity.c
deleted file mode 100644
index cc2f8c131286..000000000000
--- a/trunk/arch/x86/tools/insn_sanity.c
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * x86 decoder sanity test - based on test_get_insn.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2009
- * Copyright (C) Hitachi, Ltd., 2011
- */
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#define unlikely(cond) (cond)
-#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
-
-#include
-#include
-#include
-
-/*
- * Test of instruction analysis against tampering.
- * Feed random binary to instruction decoder and ensure not to
- * access out-of-instruction-buffer.
- */
-
-#define DEFAULT_MAX_ITER 10000
-#define INSN_NOP 0x90
-
-static const char *prog; /* Program name */
-static int verbose; /* Verbosity */
-static int x86_64; /* x86-64 bit mode flag */
-static unsigned int seed; /* Random seed */
-static unsigned long iter_start; /* Start of iteration number */
-static unsigned long iter_end = DEFAULT_MAX_ITER; /* End of iteration number */
-static FILE *input_file; /* Input file name */
-
-static void usage(const char *err)
-{
- if (err)
- fprintf(stderr, "Error: %s\n\n", err);
- fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog);
- fprintf(stderr, "\t-y 64bit mode\n");
- fprintf(stderr, "\t-n 32bit mode\n");
- fprintf(stderr, "\t-v Verbosity(-vv dumps any decoded result)\n");
- fprintf(stderr, "\t-s Give a random seed (and iteration number)\n");
- fprintf(stderr, "\t-m Give a maximum iteration number\n");
- fprintf(stderr, "\t-i Give an input file with decoded binary\n");
- exit(1);
-}
-
-static void dump_field(FILE *fp, const char *name, const char *indent,
- struct insn_field *field)
-{
- fprintf(fp, "%s.%s = {\n", indent, name);
- fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n",
- indent, field->value, field->bytes[0], field->bytes[1],
- field->bytes[2], field->bytes[3]);
- fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent,
- field->got, field->nbytes);
-}
-
-static void dump_insn(FILE *fp, struct insn *insn)
-{
- fprintf(fp, "Instruction = {\n");
- dump_field(fp, "prefixes", "\t", &insn->prefixes);
- dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix);
- dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix);
- dump_field(fp, "opcode", "\t", &insn->opcode);
- dump_field(fp, "modrm", "\t", &insn->modrm);
- dump_field(fp, "sib", "\t", &insn->sib);
- dump_field(fp, "displacement", "\t", &insn->displacement);
- dump_field(fp, "immediate1", "\t", &insn->immediate1);
- dump_field(fp, "immediate2", "\t", &insn->immediate2);
- fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n",
- insn->attr, insn->opnd_bytes, insn->addr_bytes);
- fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n",
- insn->length, insn->x86_64, insn->kaddr);
-}
-
-static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter,
- unsigned char *insn_buf, struct insn *insn)
-{
- int i;
-
- fprintf(fp, "%s:\n", msg);
-
- dump_insn(fp, insn);
-
- fprintf(fp, "You can reproduce this with below command(s);\n");
-
- /* Input a decoded instruction sequence directly */
- fprintf(fp, " $ echo ");
- for (i = 0; i < MAX_INSN_SIZE; i++)
- fprintf(fp, " %02x", insn_buf[i]);
- fprintf(fp, " | %s -i -\n", prog);
-
- if (!input_file) {
- fprintf(fp, "Or \n");
- /* Give a seed and iteration number */
- fprintf(fp, " $ %s -s 0x%x,%lu\n", prog, seed, nr_iter);
- }
-}
-
-static void init_random_seed(void)
-{
- int fd;
-
- fd = open("/dev/urandom", O_RDONLY);
- if (fd < 0)
- goto fail;
-
- if (read(fd, &seed, sizeof(seed)) != sizeof(seed))
- goto fail;
-
- close(fd);
- return;
-fail:
- usage("Failed to open /dev/urandom");
-}
-
-/* Read given instruction sequence from the input file */
-static int read_next_insn(unsigned char *insn_buf)
-{
- char buf[256] = "", *tmp;
- int i;
-
- tmp = fgets(buf, ARRAY_SIZE(buf), input_file);
- if (tmp == NULL || feof(input_file))
- return 0;
-
- for (i = 0; i < MAX_INSN_SIZE; i++) {
- insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16);
- if (*tmp != ' ')
- break;
- }
-
- return i;
-}
-
-static int generate_insn(unsigned char *insn_buf)
-{
- int i;
-
- if (input_file)
- return read_next_insn(insn_buf);
-
- /* Fills buffer with random binary up to MAX_INSN_SIZE */
- for (i = 0; i < MAX_INSN_SIZE - 1; i += 2)
- *(unsigned short *)(&insn_buf[i]) = random() & 0xffff;
-
- while (i < MAX_INSN_SIZE)
- insn_buf[i++] = random() & 0xff;
-
- return i;
-}
-
-static void parse_args(int argc, char **argv)
-{
- int c;
- char *tmp = NULL;
- int set_seed = 0;
-
- prog = argv[0];
- while ((c = getopt(argc, argv, "ynvs:m:i:")) != -1) {
- switch (c) {
- case 'y':
- x86_64 = 1;
- break;
- case 'n':
- x86_64 = 0;
- break;
- case 'v':
- verbose++;
- break;
- case 'i':
- if (strcmp("-", optarg) == 0)
- input_file = stdin;
- else
- input_file = fopen(optarg, "r");
- if (!input_file)
- usage("Failed to open input file");
- break;
- case 's':
- seed = (unsigned int)strtoul(optarg, &tmp, 0);
- if (*tmp == ',') {
- optarg = tmp + 1;
- iter_start = strtoul(optarg, &tmp, 0);
- }
- if (*tmp != '\0' || tmp == optarg)
- usage("Failed to parse seed");
- set_seed = 1;
- break;
- case 'm':
- iter_end = strtoul(optarg, &tmp, 0);
- if (*tmp != '\0' || tmp == optarg)
- usage("Failed to parse max_iter");
- break;
- default:
- usage(NULL);
- }
- }
-
- /* Check errors */
- if (iter_end < iter_start)
- usage("Max iteration number must be bigger than iter-num");
-
- if (set_seed && input_file)
- usage("Don't use input file (-i) with random seed (-s)");
-
- /* Initialize random seed */
- if (!input_file) {
- if (!set_seed) /* No seed is given */
- init_random_seed();
- srand(seed);
- }
-}
-
-int main(int argc, char **argv)
-{
- struct insn insn;
- int insns = 0;
- int errors = 0;
- unsigned long i;
- unsigned char insn_buf[MAX_INSN_SIZE * 2];
-
- parse_args(argc, argv);
-
- /* Prepare stop bytes with NOPs */
- memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE);
-
- for (i = 0; i < iter_end; i++) {
- if (generate_insn(insn_buf) <= 0)
- break;
-
- if (i < iter_start) /* Skip to given iteration number */
- continue;
-
- /* Decode an instruction */
- insn_init(&insn, insn_buf, x86_64);
- insn_get_length(&insn);
-
- if (insn.next_byte <= insn.kaddr ||
- insn.kaddr + MAX_INSN_SIZE < insn.next_byte) {
- /* Access out-of-range memory */
- dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn);
- errors++;
- } else if (verbose && !insn_complete(&insn))
- dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn);
- else if (verbose >= 2)
- dump_insn(stdout, &insn);
- insns++;
- }
-
- fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed);
-
- return errors ? 1 : 0;
-}
diff --git a/trunk/arch/x86/xen/enlighten.c b/trunk/arch/x86/xen/enlighten.c
index 12eb07bfb267..1f928659c338 100644
--- a/trunk/arch/x86/xen/enlighten.c
+++ b/trunk/arch/x86/xen/enlighten.c
@@ -1215,6 +1215,8 @@ asmlinkage void __init xen_start_kernel(void)
local_irq_disable();
early_boot_irqs_disabled = true;
+ memblock_init();
+
xen_raw_console_write("mapping kernel into physical memory\n");
pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
xen_ident_map_ISA();
diff --git a/trunk/arch/x86/xen/mmu.c b/trunk/arch/x86/xen/mmu.c
index f4bf8aa574f4..87f6673b1207 100644
--- a/trunk/arch/x86/xen/mmu.c
+++ b/trunk/arch/x86/xen/mmu.c
@@ -1774,8 +1774,10 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
__xen_write_cr3(true, __pa(pgd));
xen_mc_issue(PARAVIRT_LAZY_CPU);
- memblock_reserve(__pa(xen_start_info->pt_base),
- xen_start_info->nr_pt_frames * PAGE_SIZE);
+ memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
+ __pa(xen_start_info->pt_base +
+ xen_start_info->nr_pt_frames * PAGE_SIZE),
+ "XEN PAGETABLES");
return pgd;
}
@@ -1851,8 +1853,10 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
PFN_DOWN(__pa(initial_page_table)));
xen_write_cr3(__pa(initial_page_table));
- memblock_reserve(__pa(xen_start_info->pt_base),
- xen_start_info->nr_pt_frames * PAGE_SIZE));
+ memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
+ __pa(xen_start_info->pt_base +
+ xen_start_info->nr_pt_frames * PAGE_SIZE),
+ "XEN PAGETABLES");
return initial_page_table;
}
diff --git a/trunk/arch/x86/xen/setup.c b/trunk/arch/x86/xen/setup.c
index e03c63692176..b2c7179fa263 100644
--- a/trunk/arch/x86/xen/setup.c
+++ b/trunk/arch/x86/xen/setup.c
@@ -75,7 +75,7 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
if (i == XEN_EXTRA_MEM_MAX_REGIONS)
printk(KERN_WARNING "Warning: not enough extra memory regions\n");
- memblock_reserve(start, size);
+ memblock_x86_reserve_range(start, start + size, "XEN EXTRA");
xen_max_p2m_pfn = PFN_DOWN(start + size);
@@ -311,8 +311,9 @@ char * __init xen_memory_setup(void)
* - xen_start_info
* See comment above "struct start_info" in
*/
- memblock_reserve(__pa(xen_start_info->mfn_list),
- xen_start_info->pt_base - xen_start_info->mfn_list);
+ memblock_x86_reserve_range(__pa(xen_start_info->mfn_list),
+ __pa(xen_start_info->pt_base),
+ "XEN START INFO");
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
diff --git a/trunk/arch/xtensa/kernel/time.c b/trunk/arch/xtensa/kernel/time.c
index ac62f9cf1e10..f3e5eb43f71c 100644
--- a/trunk/arch/xtensa/kernel/time.c
+++ b/trunk/arch/xtensa/kernel/time.c
@@ -41,6 +41,14 @@ static struct clocksource ccount_clocksource = {
.rating = 200,
.read = ccount_read,
.mask = CLOCKSOURCE_MASK(32),
+ /*
+ * With a shift of 22 the lower limit of the cpu clock is
+ * 1MHz, where NSEC_PER_CCOUNT is 1000 or a bit less than
+ * 2^10: Since we have 32 bits and the multiplicator can
+ * already take up as much as 10 bits, this leaves us with
+ * remaining upper 22 bits.
+ */
+ .shift = 22,
};
static irqreturn_t timer_interrupt(int irq, void *dev_id);
@@ -58,7 +66,10 @@ void __init time_init(void)
printk("%d.%02d MHz\n", (int)ccount_per_jiffy/(1000000/HZ),
(int)(ccount_per_jiffy/(10000/HZ))%100);
#endif
- clocksource_register_hz(&ccount_clocksource, CCOUNT_PER_JIFFY * HZ);
+ ccount_clocksource.mult =
+ clocksource_hz2mult(CCOUNT_PER_JIFFY * HZ,
+ ccount_clocksource.shift);
+ clocksource_register(&ccount_clocksource);
/* Initialize the linux timer interrupt. */
diff --git a/trunk/block/blk-map.c b/trunk/block/blk-map.c
index 623e1cd4cffe..164cd0059706 100644
--- a/trunk/block/blk-map.c
+++ b/trunk/block/blk-map.c
@@ -311,7 +311,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
if (IS_ERR(bio))
return PTR_ERR(bio);
- if (!reading)
+ if (rq_data_dir(rq) == WRITE)
bio->bi_rw |= REQ_WRITE;
if (do_copy)
diff --git a/trunk/block/blk-tag.c b/trunk/block/blk-tag.c
index 4af6f5cc1167..e74d6d13838f 100644
--- a/trunk/block/blk-tag.c
+++ b/trunk/block/blk-tag.c
@@ -282,9 +282,18 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
void blk_queue_end_tag(struct request_queue *q, struct request *rq)
{
struct blk_queue_tag *bqt = q->queue_tags;
- unsigned tag = rq->tag; /* negative tags invalid */
+ int tag = rq->tag;
- BUG_ON(tag >= bqt->real_max_depth);
+ BUG_ON(tag == -1);
+
+ if (unlikely(tag >= bqt->max_depth)) {
+ /*
+ * This can happen after tag depth has been reduced.
+ * But tag shouldn't be larger than real_max_depth.
+ */
+ WARN_ON(tag >= bqt->real_max_depth);
+ return;
+ }
list_del_init(&rq->queuelist);
rq->cmd_flags &= ~REQ_QUEUED;
diff --git a/trunk/block/cfq-iosched.c b/trunk/block/cfq-iosched.c
index 3548705b04e4..4c12869fcf77 100644
--- a/trunk/block/cfq-iosched.c
+++ b/trunk/block/cfq-iosched.c
@@ -1655,8 +1655,6 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
struct request *next)
{
struct cfq_queue *cfqq = RQ_CFQQ(rq);
- struct cfq_data *cfqd = q->elevator->elevator_data;
-
/*
* reposition in fifo if next is older than rq
*/
@@ -1671,16 +1669,6 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
cfq_remove_request(next);
cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg,
rq_data_dir(next), rq_is_sync(next));
-
- cfqq = RQ_CFQQ(next);
- /*
- * all requests of this queue are merged to other queues, delete it
- * from the service tree. If it's the active_queue,
- * cfq_dispatch_requests() will choose to expire it or do idle
- */
- if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list) &&
- cfqq != cfqd->active_queue)
- cfq_del_cfqq_rr(cfqd, cfqq);
}
static int cfq_allow_merge(struct request_queue *q, struct request *rq,
diff --git a/trunk/block/ioctl.c b/trunk/block/ioctl.c
index d510c2a4eff8..ca939fc1030f 100644
--- a/trunk/block/ioctl.c
+++ b/trunk/block/ioctl.c
@@ -179,26 +179,6 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
*/
EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);
-/*
- * Is it an unrecognized ioctl? The correct returns are either
- * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a
- * fallback"). ENOIOCTLCMD gets turned into ENOTTY by the ioctl
- * code before returning.
- *
- * Confused drivers sometimes return EINVAL, which is wrong. It
- * means "I understood the ioctl command, but the parameters to
- * it were wrong".
- *
- * We should aim to just fix the broken drivers, the EINVAL case
- * should go away.
- */
-static inline int is_unrecognized_ioctl(int ret)
-{
- return ret == -EINVAL ||
- ret == -ENOTTY ||
- ret == -ENOIOCTLCMD;
-}
-
/*
* always keep this in sync with compat_blkdev_ioctl()
*/
@@ -216,7 +196,8 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
return -EACCES;
ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
- if (!is_unrecognized_ioctl(ret))
+ /* -EINVAL to handle old uncorrected drivers */
+ if (ret != -EINVAL && ret != -ENOTTY)
return ret;
fsync_bdev(bdev);
@@ -225,7 +206,8 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKROSET:
ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
- if (!is_unrecognized_ioctl(ret))
+ /* -EINVAL to handle old uncorrected drivers */
+ if (ret != -EINVAL && ret != -ENOTTY)
return ret;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
diff --git a/trunk/drivers/ata/Kconfig b/trunk/drivers/ata/Kconfig
index cf047c406d92..6bdedd7cca2c 100644
--- a/trunk/drivers/ata/Kconfig
+++ b/trunk/drivers/ata/Kconfig
@@ -820,7 +820,7 @@ config PATA_PLATFORM
config PATA_OF_PLATFORM
tristate "OpenFirmware platform device PATA support"
- depends on PATA_PLATFORM && OF && OF_IRQ
+ depends on PATA_PLATFORM && OF
help
This option enables support for generic directly connected ATA
devices commonly found on embedded systems with OpenFirmware
diff --git a/trunk/drivers/base/cpu.c b/trunk/drivers/base/cpu.c
index 3991502b21e5..251acea3d359 100644
--- a/trunk/drivers/base/cpu.c
+++ b/trunk/drivers/base/cpu.c
@@ -247,13 +247,6 @@ struct sys_device *get_cpu_sysdev(unsigned cpu)
}
EXPORT_SYMBOL_GPL(get_cpu_sysdev);
-bool cpu_is_hotpluggable(unsigned cpu)
-{
- struct sys_device *dev = get_cpu_sysdev(cpu);
- return dev && container_of(dev, struct cpu, sysdev)->hotpluggable;
-}
-EXPORT_SYMBOL_GPL(cpu_is_hotpluggable);
-
int __init cpu_dev_init(void)
{
int err;
diff --git a/trunk/drivers/char/random.c b/trunk/drivers/char/random.c
index 85da8740586b..6035ab8d5ef7 100644
--- a/trunk/drivers/char/random.c
+++ b/trunk/drivers/char/random.c
@@ -624,8 +624,8 @@ static struct timer_rand_state input_timer_state;
static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
{
struct {
+ cycles_t cycles;
long jiffies;
- unsigned cycles;
unsigned num;
} sample;
long delta, delta2, delta3;
@@ -637,11 +637,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
goto out;
sample.jiffies = jiffies;
-
- /* Use arch random value, fall back to cycles */
- if (!arch_get_random_int(&sample.cycles))
- sample.cycles = get_cycles();
-
+ sample.cycles = get_cycles();
sample.num = num;
mix_pool_bytes(&input_pool, &sample, sizeof(sample));
diff --git a/trunk/drivers/clocksource/acpi_pm.c b/trunk/drivers/clocksource/acpi_pm.c
index 6b5cf02c35c8..effe7974aa9a 100644
--- a/trunk/drivers/clocksource/acpi_pm.c
+++ b/trunk/drivers/clocksource/acpi_pm.c
@@ -143,7 +143,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_LE,
#ifndef CONFIG_X86_64
#include
#define PMTMR_EXPECTED_RATE \
- ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (PIT_TICK_RATE>>10))
+ ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10))
/*
* Some boards have the PMTMR running way too fast. We check
* the PMTMR rate against PIT channel 2 to catch these cases.
diff --git a/trunk/drivers/clocksource/i8253.c b/trunk/drivers/clocksource/i8253.c
index e7cab2da910f..27c49e60b7d6 100644
--- a/trunk/drivers/clocksource/i8253.c
+++ b/trunk/drivers/clocksource/i8253.c
@@ -53,7 +53,7 @@ static cycle_t i8253_read(struct clocksource *cs)
count |= inb_p(PIT_CH0) << 8;
/* VIA686a test code... reset the latch if count > max + 1 */
- if (count > PIT_LATCH) {
+ if (count > LATCH) {
outb_p(0x34, PIT_MODE);
outb_p(PIT_LATCH & 0xff, PIT_CH0);
outb_p(PIT_LATCH >> 8, PIT_CH0);
@@ -114,8 +114,8 @@ static void init_pit_timer(enum clock_event_mode mode,
case CLOCK_EVT_MODE_PERIODIC:
/* binary, mode 2, LSB/MSB, ch 0 */
outb_p(0x34, PIT_MODE);
- outb_p(PIT_LATCH & 0xff , PIT_CH0); /* LSB */
- outb_p(PIT_LATCH >> 8 , PIT_CH0); /* MSB */
+ outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
+ outb_p(LATCH >> 8 , PIT_CH0); /* MSB */
break;
case CLOCK_EVT_MODE_SHUTDOWN:
diff --git a/trunk/drivers/clocksource/tcb_clksrc.c b/trunk/drivers/clocksource/tcb_clksrc.c
index 55d0f95f82f9..79c47e88d5d1 100644
--- a/trunk/drivers/clocksource/tcb_clksrc.c
+++ b/trunk/drivers/clocksource/tcb_clksrc.c
@@ -59,6 +59,7 @@ static struct clocksource clksrc = {
.rating = 200,
.read = tc_get_cycles,
.mask = CLOCKSOURCE_MASK(32),
+ .shift = 18,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
@@ -255,6 +256,7 @@ static int __init tcb_clksrc_init(void)
best_divisor_idx = i;
}
+ clksrc.mult = clocksource_hz2mult(divided_rate, clksrc.shift);
printk(bootinfo, clksrc.name, CONFIG_ATMEL_TCB_CLKSRC_BLOCK,
divided_rate / 1000000,
@@ -290,7 +292,7 @@ static int __init tcb_clksrc_init(void)
__raw_writel(ATMEL_TC_SYNC, tcaddr + ATMEL_TC_BCR);
/* and away we go! */
- clocksource_register_hz(&clksrc, divided_rate);
+ clocksource_register(&clksrc);
/* channel 2: periodic and oneshot timer support */
setup_clkevents(tc, clk32k_divisor_idx);
diff --git a/trunk/drivers/cpufreq/cpufreq_conservative.c b/trunk/drivers/cpufreq/cpufreq_conservative.c
index 235a340e81f2..c97b468ee9f7 100644
--- a/trunk/drivers/cpufreq/cpufreq_conservative.c
+++ b/trunk/drivers/cpufreq/cpufreq_conservative.c
@@ -95,26 +95,27 @@ static struct dbs_tuners {
.freq_step = 5,
};
-static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
+static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
+ cputime64_t *wall)
{
- u64 idle_time;
- u64 cur_wall_time;
- u64 busy_time;
+ cputime64_t idle_time;
+ cputime64_t cur_wall_time;
+ cputime64_t busy_time;
cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
+ busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
+ kstat_cpu(cpu).cpustat.system);
- busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
- busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
- busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
- busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
- busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
- busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
- idle_time = cur_wall_time - busy_time;
+ idle_time = cputime64_sub(cur_wall_time, busy_time);
if (wall)
- *wall = jiffies_to_usecs(cur_wall_time);
+ *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
- return jiffies_to_usecs(idle_time);
+ return (cputime64_t)jiffies_to_usecs(idle_time);
}
static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
@@ -271,7 +272,7 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
&dbs_info->prev_cpu_wall);
if (dbs_tuners_ins.ignore_nice)
- dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+ dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
}
return count;
}
@@ -352,20 +353,20 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
- wall_time = (unsigned int)
- (cur_wall_time - j_dbs_info->prev_cpu_wall);
+ wall_time = (unsigned int) cputime64_sub(cur_wall_time,
+ j_dbs_info->prev_cpu_wall);
j_dbs_info->prev_cpu_wall = cur_wall_time;
- idle_time = (unsigned int)
- (cur_idle_time - j_dbs_info->prev_cpu_idle);
+ idle_time = (unsigned int) cputime64_sub(cur_idle_time,
+ j_dbs_info->prev_cpu_idle);
j_dbs_info->prev_cpu_idle = cur_idle_time;
if (dbs_tuners_ins.ignore_nice) {
- u64 cur_nice;
+ cputime64_t cur_nice;
unsigned long cur_nice_jiffies;
- cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
- j_dbs_info->prev_cpu_nice;
+ cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
+ j_dbs_info->prev_cpu_nice);
/*
* Assumption: nice time between sampling periods will
* be less than 2^32 jiffies for 32 bit sys
@@ -373,7 +374,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
cur_nice_jiffies = (unsigned long)
cputime64_to_jiffies64(cur_nice);
- j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+ j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
idle_time += jiffies_to_usecs(cur_nice_jiffies);
}
@@ -500,9 +501,10 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
&j_dbs_info->prev_cpu_wall);
- if (dbs_tuners_ins.ignore_nice)
+ if (dbs_tuners_ins.ignore_nice) {
j_dbs_info->prev_cpu_nice =
- kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+ kstat_cpu(j).cpustat.nice;
+ }
}
this_dbs_info->down_skip = 0;
this_dbs_info->requested_freq = policy->cur;
diff --git a/trunk/drivers/cpufreq/cpufreq_ondemand.c b/trunk/drivers/cpufreq/cpufreq_ondemand.c
index 3d679eee70a1..fa8af4ebb1d6 100644
--- a/trunk/drivers/cpufreq/cpufreq_ondemand.c
+++ b/trunk/drivers/cpufreq/cpufreq_ondemand.c
@@ -119,26 +119,27 @@ static struct dbs_tuners {
.powersave_bias = 0,
};
-static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
+static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
+ cputime64_t *wall)
{
- u64 idle_time;
- u64 cur_wall_time;
- u64 busy_time;
+ cputime64_t idle_time;
+ cputime64_t cur_wall_time;
+ cputime64_t busy_time;
cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
+ busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
+ kstat_cpu(cpu).cpustat.system);
- busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
- busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
- busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
- busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
- busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
- busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
- idle_time = cur_wall_time - busy_time;
+ idle_time = cputime64_sub(cur_wall_time, busy_time);
if (wall)
- *wall = jiffies_to_usecs(cur_wall_time);
+ *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
- return jiffies_to_usecs(idle_time);
+ return (cputime64_t)jiffies_to_usecs(idle_time);
}
static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
@@ -344,7 +345,7 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
&dbs_info->prev_cpu_wall);
if (dbs_tuners_ins.ignore_nice)
- dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+ dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
}
return count;
@@ -441,24 +442,24 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
- wall_time = (unsigned int)
- (cur_wall_time - j_dbs_info->prev_cpu_wall);
+ wall_time = (unsigned int) cputime64_sub(cur_wall_time,
+ j_dbs_info->prev_cpu_wall);
j_dbs_info->prev_cpu_wall = cur_wall_time;
- idle_time = (unsigned int)
- (cur_idle_time - j_dbs_info->prev_cpu_idle);
+ idle_time = (unsigned int) cputime64_sub(cur_idle_time,
+ j_dbs_info->prev_cpu_idle);
j_dbs_info->prev_cpu_idle = cur_idle_time;
- iowait_time = (unsigned int)
- (cur_iowait_time - j_dbs_info->prev_cpu_iowait);
+ iowait_time = (unsigned int) cputime64_sub(cur_iowait_time,
+ j_dbs_info->prev_cpu_iowait);
j_dbs_info->prev_cpu_iowait = cur_iowait_time;
if (dbs_tuners_ins.ignore_nice) {
- u64 cur_nice;
+ cputime64_t cur_nice;
unsigned long cur_nice_jiffies;
- cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
- j_dbs_info->prev_cpu_nice;
+ cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
+ j_dbs_info->prev_cpu_nice);
/*
* Assumption: nice time between sampling periods will
* be less than 2^32 jiffies for 32 bit sys
@@ -466,7 +467,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
cur_nice_jiffies = (unsigned long)
cputime64_to_jiffies64(cur_nice);
- j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+ j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
idle_time += jiffies_to_usecs(cur_nice_jiffies);
}
@@ -645,9 +646,10 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
&j_dbs_info->prev_cpu_wall);
- if (dbs_tuners_ins.ignore_nice)
+ if (dbs_tuners_ins.ignore_nice) {
j_dbs_info->prev_cpu_nice =
- kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+ kstat_cpu(j).cpustat.nice;
+ }
}
this_dbs_info->cpu = cpu;
this_dbs_info->rate_mult = 1;
diff --git a/trunk/drivers/cpufreq/cpufreq_stats.c b/trunk/drivers/cpufreq/cpufreq_stats.c
index 2a508edd768b..c5072a91e848 100644
--- a/trunk/drivers/cpufreq/cpufreq_stats.c
+++ b/trunk/drivers/cpufreq/cpufreq_stats.c
@@ -61,8 +61,9 @@ static int cpufreq_stats_update(unsigned int cpu)
spin_lock(&cpufreq_stats_lock);
stat = per_cpu(cpufreq_stats_table, cpu);
if (stat->time_in_state)
- stat->time_in_state[stat->last_index] +=
- cur_time - stat->last_time;
+ stat->time_in_state[stat->last_index] =
+ cputime64_add(stat->time_in_state[stat->last_index],
+ cputime_sub(cur_time, stat->last_time));
stat->last_time = cur_time;
spin_unlock(&cpufreq_stats_lock);
return 0;
diff --git a/trunk/drivers/dma/Kconfig b/trunk/drivers/dma/Kconfig
index 5a99bb3f255a..ab8f469f5cf8 100644
--- a/trunk/drivers/dma/Kconfig
+++ b/trunk/drivers/dma/Kconfig
@@ -124,7 +124,7 @@ config MV_XOR
config MX3_IPU
bool "MX3x Image Processing Unit support"
- depends on SOC_IMX31 ||Â SOC_IMX35
+ depends on ARCH_MX3
select DMA_ENGINE
default y
help
@@ -216,7 +216,7 @@ config PCH_DMA
config IMX_SDMA
tristate "i.MX SDMA support"
- depends on ARCH_MX25 || SOC_IMX31 ||Â SOC_IMX35 || ARCH_MX5
+ depends on ARCH_MX25 || ARCH_MX3 || ARCH_MX5
select DMA_ENGINE
help
Support the i.MX SDMA engine. This engine is integrated into
diff --git a/trunk/drivers/edac/i7core_edac.c b/trunk/drivers/edac/i7core_edac.c
index 8568d9b61875..70ad8923f1d7 100644
--- a/trunk/drivers/edac/i7core_edac.c
+++ b/trunk/drivers/edac/i7core_edac.c
@@ -2234,7 +2234,7 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
if (pvt->enable_scrub)
disable_sdram_scrub_setting(mci);
- mce_unregister_decode_chain(&i7_mce_dec);
+ atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec);
/* Disable EDAC polling */
i7core_pci_ctl_release(pvt);
@@ -2336,7 +2336,7 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
/* DCLK for scrub rate setting */
pvt->dclk_freq = get_dclk_freq();
- mce_register_decode_chain(&i7_mce_dec);
+ atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec);
return 0;
diff --git a/trunk/drivers/edac/mce_amd.c b/trunk/drivers/edac/mce_amd.c
index bd926ea2e00c..d0864d9c38ad 100644
--- a/trunk/drivers/edac/mce_amd.c
+++ b/trunk/drivers/edac/mce_amd.c
@@ -884,7 +884,7 @@ static int __init mce_amd_init(void)
pr_info("MCE: In-kernel MCE decoding enabled.\n");
- mce_register_decode_chain(&amd_mce_dec_nb);
+ atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
return 0;
}
@@ -893,7 +893,7 @@ early_initcall(mce_amd_init);
#ifdef MODULE
static void __exit mce_amd_exit(void)
{
- mce_unregister_decode_chain(&amd_mce_dec_nb);
+ atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
kfree(fam_ops);
}
diff --git a/trunk/drivers/edac/sb_edac.c b/trunk/drivers/edac/sb_edac.c
index 1dc118d83cc6..7a402bfbee7d 100644
--- a/trunk/drivers/edac/sb_edac.c
+++ b/trunk/drivers/edac/sb_edac.c
@@ -1609,9 +1609,11 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
mce->cpuvendor, mce->cpuid, mce->time,
mce->socketid, mce->apicid);
+#ifdef CONFIG_SMP
/* Only handle if it is the right mc controller */
if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc)
return NOTIFY_DONE;
+#endif
smp_rmb();
if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
@@ -1659,7 +1661,8 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
__func__, mci, &sbridge_dev->pdev[0]->dev);
- mce_unregister_decode_chain(&sbridge_mce_dec);
+ atomic_notifier_chain_unregister(&x86_mce_decoder_chain,
+ &sbridge_mce_dec);
/* Remove MC sysfs nodes */
edac_mc_del_mc(mci->dev);
@@ -1728,7 +1731,8 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev)
goto fail0;
}
- mce_register_decode_chain(&sbridge_mce_dec);
+ atomic_notifier_chain_register(&x86_mce_decoder_chain,
+ &sbridge_mce_dec);
return 0;
fail0:
diff --git a/trunk/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/trunk/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index b9da8900ae4e..c681dc149d2a 100644
--- a/trunk/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/trunk/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -756,9 +756,9 @@ intel_enable_semaphores(struct drm_device *dev)
if (i915_semaphores >= 0)
return i915_semaphores;
- /* Disable semaphores on SNB */
+ /* Enable semaphores on SNB when IO remapping is off */
if (INTEL_INFO(dev)->gen == 6)
- return 0;
+ return !intel_iommu_enabled;
return 1;
}
diff --git a/trunk/drivers/gpu/drm/i915/intel_display.c b/trunk/drivers/gpu/drm/i915/intel_display.c
index daa5743ccbd6..d809b038ca88 100644
--- a/trunk/drivers/gpu/drm/i915/intel_display.c
+++ b/trunk/drivers/gpu/drm/i915/intel_display.c
@@ -7922,11 +7922,13 @@ static bool intel_enable_rc6(struct drm_device *dev)
return 0;
/*
- * Disable rc6 on Sandybridge
+ * Enable rc6 on Sandybridge if DMA remapping is disabled
*/
if (INTEL_INFO(dev)->gen == 6) {
- DRM_DEBUG_DRIVER("Sandybridge: RC6 disabled\n");
- return 0;
+ DRM_DEBUG_DRIVER("Sandybridge: intel_iommu_enabled %s -- RC6 %sabled\n",
+ intel_iommu_enabled ? "true" : "false",
+ !intel_iommu_enabled ? "en" : "dis");
+ return !intel_iommu_enabled;
}
DRM_DEBUG_DRIVER("RC6 enabled\n");
return 1;
diff --git a/trunk/drivers/gpu/drm/radeon/evergreen.c b/trunk/drivers/gpu/drm/radeon/evergreen.c
index 92c9628c572d..5e00d1670aa9 100644
--- a/trunk/drivers/gpu/drm/radeon/evergreen.c
+++ b/trunk/drivers/gpu/drm/radeon/evergreen.c
@@ -3276,18 +3276,6 @@ int evergreen_init(struct radeon_device *rdev)
rdev->accel_working = false;
}
}
-
- /* Don't start up if the MC ucode is missing on BTC parts.
- * The default clocks and voltages before the MC ucode
- * is loaded are not suffient for advanced operations.
- */
- if (ASIC_IS_DCE5(rdev)) {
- if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
- DRM_ERROR("radeon: MC ucode required for NI+.\n");
- return -EINVAL;
- }
- }
-
return 0;
}
diff --git a/trunk/drivers/gpu/drm/radeon/radeon_atombios.c b/trunk/drivers/gpu/drm/radeon/radeon_atombios.c
index 5082d17d14dc..d24baf30efcb 100644
--- a/trunk/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/trunk/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -2560,11 +2560,7 @@ void radeon_atombios_get_power_modes(struct radeon_device *rdev)
rdev->pm.current_power_state_index = rdev->pm.default_power_state_index;
rdev->pm.current_clock_mode_index = 0;
- if (rdev->pm.default_power_state_index >= 0)
- rdev->pm.current_vddc =
- rdev->pm.power_state[rdev->pm.default_power_state_index].clock_info[0].voltage.voltage;
- else
- rdev->pm.current_vddc = 0;
+ rdev->pm.current_vddc = rdev->pm.power_state[rdev->pm.default_power_state_index].clock_info[0].voltage.voltage;
}
void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable)
diff --git a/trunk/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/trunk/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index f94b33ae2215..8aa1dbb45c67 100644
--- a/trunk/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/trunk/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1093,6 +1093,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
struct vmw_surface *surface = NULL;
struct vmw_dma_buffer *bo = NULL;
struct ttm_base_object *user_obj;
+ u64 required_size;
int ret;
/**
@@ -1101,9 +1102,8 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
* requested framebuffer.
*/
- if (!vmw_kms_validate_mode_vram(dev_priv,
- mode_cmd->pitch,
- mode_cmd->height)) {
+ required_size = mode_cmd->pitch * mode_cmd->height;
+ if (unlikely(required_size > (u64) dev_priv->vram_size)) {
DRM_ERROR("VRAM size is too small for requested mode.\n");
return ERR_PTR(-ENOMEM);
}
diff --git a/trunk/drivers/hwmon/coretemp.c b/trunk/drivers/hwmon/coretemp.c
index 1fdef885341c..104b3767516c 100644
--- a/trunk/drivers/hwmon/coretemp.c
+++ b/trunk/drivers/hwmon/coretemp.c
@@ -57,15 +57,16 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
#define TOTAL_ATTRS (MAX_CORE_ATTRS + 1)
#define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
+#ifdef CONFIG_SMP
#define TO_PHYS_ID(cpu) cpu_data(cpu).phys_proc_id
#define TO_CORE_ID(cpu) cpu_data(cpu).cpu_core_id
-#define TO_ATTR_NO(cpu) (TO_CORE_ID(cpu) + BASE_SYSFS_ATTR_NO)
-
-#ifdef CONFIG_SMP
#define for_each_sibling(i, cpu) for_each_cpu(i, cpu_sibling_mask(cpu))
#else
+#define TO_PHYS_ID(cpu) (cpu)
+#define TO_CORE_ID(cpu) (cpu)
#define for_each_sibling(i, cpu) for (i = 0; false; )
#endif
+#define TO_ATTR_NO(cpu) (TO_CORE_ID(cpu) + BASE_SYSFS_ATTR_NO)
/*
* Per-Core Temperature Data
diff --git a/trunk/drivers/input/mouse/sentelic.c b/trunk/drivers/input/mouse/sentelic.c
index 86d6f39178b0..c5b12d2e955a 100644
--- a/trunk/drivers/input/mouse/sentelic.c
+++ b/trunk/drivers/input/mouse/sentelic.c
@@ -2,7 +2,7 @@
* Finger Sensing Pad PS/2 mouse driver.
*
* Copyright (C) 2005-2007 Asia Vital Components Co., Ltd.
- * Copyright (C) 2005-2011 Tai-hwa Liang, Sentelic Corporation.
+ * Copyright (C) 2005-2010 Tai-hwa Liang, Sentelic Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -162,7 +162,7 @@ static int fsp_reg_write(struct psmouse *psmouse, int reg_addr, int reg_val)
ps2_sendbyte(ps2dev, v, FSP_CMD_TIMEOUT2);
if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
- goto out;
+ return -1;
if ((v = fsp_test_invert_cmd(reg_val)) != reg_val) {
/* inversion is required */
@@ -261,7 +261,7 @@ static int fsp_page_reg_write(struct psmouse *psmouse, int reg_val)
ps2_sendbyte(ps2dev, 0x88, FSP_CMD_TIMEOUT2);
if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
- goto out;
+ return -1;
if ((v = fsp_test_invert_cmd(reg_val)) != reg_val) {
ps2_sendbyte(ps2dev, 0x47, FSP_CMD_TIMEOUT2);
@@ -309,7 +309,7 @@ static int fsp_get_buttons(struct psmouse *psmouse, int *btn)
};
int val;
- if (fsp_reg_read(psmouse, FSP_REG_TMOD_STATUS, &val) == -1)
+ if (fsp_reg_read(psmouse, FSP_REG_TMOD_STATUS1, &val) == -1)
return -EIO;
*btn = buttons[(val & 0x30) >> 4];
diff --git a/trunk/drivers/input/mouse/sentelic.h b/trunk/drivers/input/mouse/sentelic.h
index 2e4af24f8c15..ed1395ac7b8b 100644
--- a/trunk/drivers/input/mouse/sentelic.h
+++ b/trunk/drivers/input/mouse/sentelic.h
@@ -2,7 +2,7 @@
* Finger Sensing Pad PS/2 mouse driver.
*
* Copyright (C) 2005-2007 Asia Vital Components Co., Ltd.
- * Copyright (C) 2005-2011 Tai-hwa Liang, Sentelic Corporation.
+ * Copyright (C) 2005-2009 Tai-hwa Liang, Sentelic Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -33,7 +33,6 @@
/* Finger-sensing Pad control registers */
#define FSP_REG_SYSCTL1 0x10
#define FSP_BIT_EN_REG_CLK BIT(5)
-#define FSP_REG_TMOD_STATUS 0x20
#define FSP_REG_OPC_QDOWN 0x31
#define FSP_BIT_EN_OPC_TAG BIT(7)
#define FSP_REG_OPTZ_XLO 0x34
diff --git a/trunk/drivers/iommu/intel-iommu.c b/trunk/drivers/iommu/intel-iommu.c
index 31053a951c34..bdc447fd4766 100644
--- a/trunk/drivers/iommu/intel-iommu.c
+++ b/trunk/drivers/iommu/intel-iommu.c
@@ -41,7 +41,6 @@
#include
#include
#include
-#include
#include
#include
@@ -2189,6 +2188,18 @@ static inline void iommu_prepare_isa(void)
static int md_domain_init(struct dmar_domain *domain, int guest_width);
+static int __init si_domain_work_fn(unsigned long start_pfn,
+ unsigned long end_pfn, void *datax)
+{
+ int *ret = datax;
+
+ *ret = iommu_domain_identity_map(si_domain,
+ (uint64_t)start_pfn << PAGE_SHIFT,
+ (uint64_t)end_pfn << PAGE_SHIFT);
+ return *ret;
+
+}
+
static int __init si_domain_init(int hw)
{
struct dmar_drhd_unit *drhd;
@@ -2220,15 +2231,9 @@ static int __init si_domain_init(int hw)
return 0;
for_each_online_node(nid) {
- unsigned long start_pfn, end_pfn;
- int i;
-
- for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
- ret = iommu_domain_identity_map(si_domain,
- PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
- if (ret)
- return ret;
- }
+ work_with_active_regions(nid, si_domain_work_fn, &ret);
+ if (ret)
+ return ret;
}
return 0;
diff --git a/trunk/drivers/iommu/iommu.c b/trunk/drivers/iommu/iommu.c
index 5b5fa5cdaa31..2fb2963df553 100644
--- a/trunk/drivers/iommu/iommu.c
+++ b/trunk/drivers/iommu/iommu.c
@@ -90,7 +90,7 @@ struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
if (bus == NULL || bus->iommu_ops == NULL)
return NULL;
- domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+ domain = kmalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return NULL;
diff --git a/trunk/drivers/lguest/x86/core.c b/trunk/drivers/lguest/x86/core.c
index 39809035320a..65af42f2d593 100644
--- a/trunk/drivers/lguest/x86/core.c
+++ b/trunk/drivers/lguest/x86/core.c
@@ -697,7 +697,7 @@ void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start)
* interrupts are enabled. We always leave interrupts enabled while
* running the Guest.
*/
- regs->eflags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+ regs->eflags = X86_EFLAGS_IF | 0x2;
/*
* The "Extended Instruction Pointer" register says where the Guest is
diff --git a/trunk/drivers/macintosh/rack-meter.c b/trunk/drivers/macintosh/rack-meter.c
index 6dc26b61219b..2637c139777b 100644
--- a/trunk/drivers/macintosh/rack-meter.c
+++ b/trunk/drivers/macintosh/rack-meter.c
@@ -81,13 +81,13 @@ static int rackmeter_ignore_nice;
*/
static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
{
- u64 retval;
+ cputime64_t retval;
- retval = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE] +
- kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
+ retval = cputime64_add(kstat_cpu(cpu).cpustat.idle,
+ kstat_cpu(cpu).cpustat.iowait);
if (rackmeter_ignore_nice)
- retval += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
+ retval = cputime64_add(retval, kstat_cpu(cpu).cpustat.nice);
return retval;
}
@@ -220,11 +220,13 @@ static void rackmeter_do_timer(struct work_struct *work)
int i, offset, load, cumm, pause;
cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
- total_ticks = (unsigned int) (cur_jiffies - rcpu->prev_wall);
+ total_ticks = (unsigned int)cputime64_sub(cur_jiffies,
+ rcpu->prev_wall);
rcpu->prev_wall = cur_jiffies;
total_idle_ticks = get_cpu_idle_time(cpu);
- idle_ticks = (unsigned int) (total_idle_ticks - rcpu->prev_idle);
+ idle_ticks = (unsigned int) cputime64_sub(total_idle_ticks,
+ rcpu->prev_idle);
rcpu->prev_idle = total_idle_ticks;
/* We do a very dumb calculation to update the LEDs for now,
diff --git a/trunk/drivers/md/bitmap.c b/trunk/drivers/md/bitmap.c
index 6d03774b176e..b6907118283a 100644
--- a/trunk/drivers/md/bitmap.c
+++ b/trunk/drivers/md/bitmap.c
@@ -1393,6 +1393,9 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
atomic_read(&bitmap->behind_writes),
bitmap->mddev->bitmap_info.max_write_behind);
}
+ if (bitmap->mddev->degraded)
+ /* Never clear bits or update events_cleared when degraded */
+ success = 0;
while (sectors) {
sector_t blocks;
@@ -1406,7 +1409,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
return;
}
- if (success && !bitmap->mddev->degraded &&
+ if (success &&
bitmap->events_cleared < bitmap->mddev->events) {
bitmap->events_cleared = bitmap->mddev->events;
bitmap->need_sync = 1;
diff --git a/trunk/drivers/md/linear.c b/trunk/drivers/md/linear.c
index 627456542fb3..c3273efd08cb 100644
--- a/trunk/drivers/md/linear.c
+++ b/trunk/drivers/md/linear.c
@@ -230,7 +230,6 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
return -EINVAL;
rdev->raid_disk = rdev->saved_raid_disk;
- rdev->saved_raid_disk = -1;
newconf = linear_conf(mddev,mddev->raid_disks+1);
diff --git a/trunk/drivers/md/md.c b/trunk/drivers/md/md.c
index f47f1f8ac44b..ee981737edfc 100644
--- a/trunk/drivers/md/md.c
+++ b/trunk/drivers/md/md.c
@@ -7360,7 +7360,8 @@ static int remove_and_add_spares(struct mddev *mddev)
spares++;
md_new_event(mddev);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
- }
+ } else
+ break;
}
}
}
diff --git a/trunk/drivers/md/raid5.c b/trunk/drivers/md/raid5.c
index 858fdbb7eb07..31670f8d6b65 100644
--- a/trunk/drivers/md/raid5.c
+++ b/trunk/drivers/md/raid5.c
@@ -3065,17 +3065,11 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
}
} else if (test_bit(In_sync, &rdev->flags))
set_bit(R5_Insync, &dev->flags);
- else if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
+ else {
/* in sync if before recovery_offset */
- set_bit(R5_Insync, &dev->flags);
- else if (test_bit(R5_UPTODATE, &dev->flags) &&
- test_bit(R5_Expanded, &dev->flags))
- /* If we've reshaped into here, we assume it is Insync.
- * We will shortly update recovery_offset to make
- * it official.
- */
- set_bit(R5_Insync, &dev->flags);
-
+ if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
+ set_bit(R5_Insync, &dev->flags);
+ }
if (rdev && test_bit(R5_WriteError, &dev->flags)) {
clear_bit(R5_Insync, &dev->flags);
if (!test_bit(Faulty, &rdev->flags)) {
diff --git a/trunk/drivers/media/video/gspca/gspca.c b/trunk/drivers/media/video/gspca/gspca.c
index 2ca10dfec91f..881e04c7ffe6 100644
--- a/trunk/drivers/media/video/gspca/gspca.c
+++ b/trunk/drivers/media/video/gspca/gspca.c
@@ -838,13 +838,13 @@ static int gspca_init_transfer(struct gspca_dev *gspca_dev)
gspca_dev->usb_err = 0;
/* do the specific subdriver stuff before endpoint selection */
- intf = usb_ifnum_to_if(gspca_dev->dev, gspca_dev->iface);
- gspca_dev->alt = gspca_dev->cam.bulk ? intf->num_altsetting : 0;
+ gspca_dev->alt = 0;
if (gspca_dev->sd_desc->isoc_init) {
ret = gspca_dev->sd_desc->isoc_init(gspca_dev);
if (ret < 0)
goto unlock;
}
+ intf = usb_ifnum_to_if(gspca_dev->dev, gspca_dev->iface);
xfer = gspca_dev->cam.bulk ? USB_ENDPOINT_XFER_BULK
: USB_ENDPOINT_XFER_ISOC;
@@ -957,7 +957,7 @@ static int gspca_init_transfer(struct gspca_dev *gspca_dev)
ret = -EIO;
goto out;
}
- gspca_dev->alt = ep_tb[--alt_idx].alt;
+ alt = ep_tb[--alt_idx].alt;
}
}
out:
diff --git a/trunk/drivers/media/video/omap3isp/ispccdc.c b/trunk/drivers/media/video/omap3isp/ispccdc.c
index 54a4a3f22e2e..b0b0fa5a3572 100644
--- a/trunk/drivers/media/video/omap3isp/ispccdc.c
+++ b/trunk/drivers/media/video/omap3isp/ispccdc.c
@@ -1408,7 +1408,7 @@ static void ccdc_hs_vs_isr(struct isp_ccdc_device *ccdc)
{
struct isp_pipeline *pipe =
to_isp_pipeline(&ccdc->video_out.video.entity);
- struct video_device *vdev = ccdc->subdev.devnode;
+ struct video_device *vdev = &ccdc->subdev.devnode;
struct v4l2_event event;
memset(&event, 0, sizeof(event));
diff --git a/trunk/drivers/media/video/omap3isp/ispstat.c b/trunk/drivers/media/video/omap3isp/ispstat.c
index bc0b2c7349b9..68d539456c55 100644
--- a/trunk/drivers/media/video/omap3isp/ispstat.c
+++ b/trunk/drivers/media/video/omap3isp/ispstat.c
@@ -496,7 +496,7 @@ static int isp_stat_bufs_alloc(struct ispstat *stat, u32 size)
static void isp_stat_queue_event(struct ispstat *stat, int err)
{
- struct video_device *vdev = stat->subdev.devnode;
+ struct video_device *vdev = &stat->subdev.devnode;
struct v4l2_event event;
struct omap3isp_stat_event_status *status = (void *)event.u.data;
diff --git a/trunk/drivers/mfd/ab5500-debugfs.c b/trunk/drivers/mfd/ab5500-debugfs.c
index b7b2d3483fd4..43c0ebb81956 100644
--- a/trunk/drivers/mfd/ab5500-debugfs.c
+++ b/trunk/drivers/mfd/ab5500-debugfs.c
@@ -4,7 +4,7 @@
* Debugfs support for the AB5500 MFD driver
*/
-#include
+#include
#include
#include
#include
diff --git a/trunk/drivers/mfd/ab8500-core.c b/trunk/drivers/mfd/ab8500-core.c
index d3d572b2317b..1e9173804ede 100644
--- a/trunk/drivers/mfd/ab8500-core.c
+++ b/trunk/drivers/mfd/ab8500-core.c
@@ -620,7 +620,6 @@ static struct resource __devinitdata ab8500_fg_resources[] = {
static struct resource __devinitdata ab8500_chargalg_resources[] = {};
-#ifdef CONFIG_DEBUG_FS
static struct resource __devinitdata ab8500_debug_resources[] = {
{
.name = "IRQ_FIRST",
@@ -635,7 +634,6 @@ static struct resource __devinitdata ab8500_debug_resources[] = {
.flags = IORESOURCE_IRQ,
},
};
-#endif
static struct resource __devinitdata ab8500_usb_resources[] = {
{
diff --git a/trunk/drivers/mfd/adp5520.c b/trunk/drivers/mfd/adp5520.c
index 8d816cce8322..f1d88483112c 100644
--- a/trunk/drivers/mfd/adp5520.c
+++ b/trunk/drivers/mfd/adp5520.c
@@ -109,7 +109,7 @@ int adp5520_set_bits(struct device *dev, int reg, uint8_t bit_mask)
ret = __adp5520_read(chip->client, reg, ®_val);
- if (!ret && ((reg_val & bit_mask) != bit_mask)) {
+ if (!ret && ((reg_val & bit_mask) == 0)) {
reg_val |= bit_mask;
ret = __adp5520_write(chip->client, reg, reg_val);
}
diff --git a/trunk/drivers/mfd/da903x.c b/trunk/drivers/mfd/da903x.c
index 1924b857a0fb..1b79c37fd599 100644
--- a/trunk/drivers/mfd/da903x.c
+++ b/trunk/drivers/mfd/da903x.c
@@ -182,7 +182,7 @@ int da903x_set_bits(struct device *dev, int reg, uint8_t bit_mask)
if (ret)
goto out;
- if ((reg_val & bit_mask) != bit_mask) {
+ if ((reg_val & bit_mask) == 0) {
reg_val |= bit_mask;
ret = __da903x_write(chip->client, reg, reg_val);
}
@@ -549,7 +549,6 @@ static int __devexit da903x_remove(struct i2c_client *client)
struct da903x_chip *chip = i2c_get_clientdata(client);
da903x_remove_subdevs(chip);
- free_irq(client->irq, chip);
kfree(chip);
return 0;
}
diff --git a/trunk/drivers/mfd/jz4740-adc.c b/trunk/drivers/mfd/jz4740-adc.c
index ef39528088f2..1e9ee533eacb 100644
--- a/trunk/drivers/mfd/jz4740-adc.c
+++ b/trunk/drivers/mfd/jz4740-adc.c
@@ -16,7 +16,6 @@
*/
#include
-#include
#include
#include
#include
diff --git a/trunk/drivers/mfd/tps6586x.c b/trunk/drivers/mfd/tps6586x.c
index a5ddf31b60ca..bba26d96c240 100644
--- a/trunk/drivers/mfd/tps6586x.c
+++ b/trunk/drivers/mfd/tps6586x.c
@@ -197,7 +197,7 @@ int tps6586x_set_bits(struct device *dev, int reg, uint8_t bit_mask)
if (ret)
goto out;
- if ((reg_val & bit_mask) != bit_mask) {
+ if ((reg_val & bit_mask) == 0) {
reg_val |= bit_mask;
ret = __tps6586x_write(to_i2c_client(dev), reg, reg_val);
}
diff --git a/trunk/drivers/mfd/tps65910.c b/trunk/drivers/mfd/tps65910.c
index c1da84bc1573..6f5b8cf2f652 100644
--- a/trunk/drivers/mfd/tps65910.c
+++ b/trunk/drivers/mfd/tps65910.c
@@ -120,7 +120,7 @@ int tps65910_clear_bits(struct tps65910 *tps65910, u8 reg, u8 mask)
goto out;
}
- data &= ~mask;
+ data &= mask;
err = tps65910_i2c_write(tps65910, reg, 1, &data);
if (err)
dev_err(tps65910->dev, "write to reg %x failed\n", reg);
diff --git a/trunk/drivers/mfd/twl-core.c b/trunk/drivers/mfd/twl-core.c
index 61e70cfaa774..bfbd66021afd 100644
--- a/trunk/drivers/mfd/twl-core.c
+++ b/trunk/drivers/mfd/twl-core.c
@@ -363,13 +363,13 @@ int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
return -EPERM;
}
- if (unlikely(!inuse)) {
- pr_err("%s: not initialized\n", DRIVER_NAME);
- return -EPERM;
- }
sid = twl_map[mod_no].sid;
twl = &twl_modules[sid];
+ if (unlikely(!inuse)) {
+ pr_err("%s: client %d is not initialized\n", DRIVER_NAME, sid);
+ return -EPERM;
+ }
mutex_lock(&twl->xfer_lock);
/*
* [MSG1]: fill the register address data
@@ -420,13 +420,13 @@ int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
return -EPERM;
}
- if (unlikely(!inuse)) {
- pr_err("%s: not initialized\n", DRIVER_NAME);
- return -EPERM;
- }
sid = twl_map[mod_no].sid;
twl = &twl_modules[sid];
+ if (unlikely(!inuse)) {
+ pr_err("%s: client %d is not initialized\n", DRIVER_NAME, sid);
+ return -EPERM;
+ }
mutex_lock(&twl->xfer_lock);
/* [MSG1] fill the register address data */
msg = &twl->xfer_msg[0];
diff --git a/trunk/drivers/mfd/twl4030-irq.c b/trunk/drivers/mfd/twl4030-irq.c
index 29f11e0765fe..f062c8cc6c38 100644
--- a/trunk/drivers/mfd/twl4030-irq.c
+++ b/trunk/drivers/mfd/twl4030-irq.c
@@ -432,7 +432,6 @@ struct sih_agent {
u32 edge_change;
struct mutex irq_lock;
- char *irq_name;
};
/*----------------------------------------------------------------------*/
@@ -590,7 +589,7 @@ static inline int sih_read_isr(const struct sih *sih)
* Generic handler for SIH interrupts ... we "know" this is called
* in task context, with IRQs enabled.
*/
-static irqreturn_t handle_twl4030_sih(int irq, void *data)
+static void handle_twl4030_sih(unsigned irq, struct irq_desc *desc)
{
struct sih_agent *agent = irq_get_handler_data(irq);
const struct sih *sih = agent->sih;
@@ -603,7 +602,7 @@ static irqreturn_t handle_twl4030_sih(int irq, void *data)
pr_err("twl4030: %s SIH, read ISR error %d\n",
sih->name, isr);
/* REVISIT: recover; eventually mask it all, etc */
- return IRQ_HANDLED;
+ return;
}
while (isr) {
@@ -617,7 +616,6 @@ static irqreturn_t handle_twl4030_sih(int irq, void *data)
pr_err("twl4030: %s SIH, invalid ISR bit %d\n",
sih->name, irq);
}
- return IRQ_HANDLED;
}
static unsigned twl4030_irq_next;
@@ -670,19 +668,18 @@ int twl4030_sih_setup(int module)
activate_irq(irq);
}
+ status = irq_base;
twl4030_irq_next += i;
/* replace generic PIH handler (handle_simple_irq) */
irq = sih_mod + twl4030_irq_base;
irq_set_handler_data(irq, agent);
- agent->irq_name = kasprintf(GFP_KERNEL, "twl4030_%s", sih->name);
- status = request_threaded_irq(irq, NULL, handle_twl4030_sih, 0,
- agent->irq_name ?: sih->name, NULL);
+ irq_set_chained_handler(irq, handle_twl4030_sih);
pr_info("twl4030: %s (irq %d) chaining IRQs %d..%d\n", sih->name,
irq, irq_base, twl4030_irq_next - 1);
- return status < 0 ? status : irq_base;
+ return status;
}
/* FIXME need a call to reverse twl4030_sih_setup() ... */
@@ -736,9 +733,8 @@ int twl4030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
}
/* install an irq handler to demultiplex the TWL4030 interrupt */
- status = request_threaded_irq(irq_num, NULL, handle_twl4030_pih,
- IRQF_ONESHOT,
- "TWL4030-PIH", NULL);
+ status = request_threaded_irq(irq_num, NULL, handle_twl4030_pih, 0,
+ "TWL4030-PIH", NULL);
if (status < 0) {
pr_err("twl4030: could not claim irq%d: %d\n", irq_num, status);
goto fail_rqirq;
diff --git a/trunk/drivers/mfd/wm8994-core.c b/trunk/drivers/mfd/wm8994-core.c
index 61894fced8ea..5d6ba132837e 100644
--- a/trunk/drivers/mfd/wm8994-core.c
+++ b/trunk/drivers/mfd/wm8994-core.c
@@ -239,7 +239,6 @@ static int wm8994_suspend(struct device *dev)
switch (wm8994->type) {
case WM8958:
- case WM1811:
ret = wm8994_reg_read(wm8994, WM8958_MIC_DETECT_1);
if (ret < 0) {
dev_err(dev, "Failed to read power status: %d\n", ret);
diff --git a/trunk/drivers/mmc/host/mmci.c b/trunk/drivers/mmc/host/mmci.c
index 0726e59fd418..50b5f9926f64 100644
--- a/trunk/drivers/mmc/host/mmci.c
+++ b/trunk/drivers/mmc/host/mmci.c
@@ -675,8 +675,7 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
unsigned int status)
{
/* First check for errors */
- if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR|
- MCI_TXUNDERRUN|MCI_RXOVERRUN)) {
+ if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_TXUNDERRUN|MCI_RXOVERRUN)) {
u32 remain, success;
/* Terminate the DMA transfer */
@@ -755,12 +754,8 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd,
}
if (!cmd->data || cmd->error) {
- if (host->data) {
- /* Terminate the DMA transfer */
- if (dma_inprogress(host))
- mmci_dma_data_error(host);
+ if (host->data)
mmci_stop_data(host);
- }
mmci_request_end(host, cmd->mrq);
} else if (!(cmd->data->flags & MMC_DATA_READ)) {
mmci_start_data(host, cmd->data);
@@ -960,9 +955,8 @@ static irqreturn_t mmci_irq(int irq, void *dev_id)
dev_dbg(mmc_dev(host->mmc), "irq0 (data+cmd) %08x\n", status);
data = host->data;
- if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR|
- MCI_TXUNDERRUN|MCI_RXOVERRUN|MCI_DATAEND|
- MCI_DATABLOCKEND) && data)
+ if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_TXUNDERRUN|
+ MCI_RXOVERRUN|MCI_DATAEND|MCI_DATABLOCKEND) && data)
mmci_data_irq(host, data, status);
cmd = host->cmd;
diff --git a/trunk/drivers/net/ethernet/freescale/Kconfig b/trunk/drivers/net/ethernet/freescale/Kconfig
index 9de37642f09f..5272f9d4dda9 100644
--- a/trunk/drivers/net/ethernet/freescale/Kconfig
+++ b/trunk/drivers/net/ethernet/freescale/Kconfig
@@ -23,8 +23,8 @@ if NET_VENDOR_FREESCALE
config FEC
bool "FEC ethernet controller (of ColdFire and some i.MX CPUs)"
depends on (M523x || M527x || M5272 || M528x || M520x || M532x || \
- ARCH_MXC || SOC_IMX28)
- default ARCH_MXC || SOC_IMX28 if ARM
+ ARCH_MXC || ARCH_MXS)
+ default ARCH_MXC || ARCH_MXS if ARM
select PHYLIB
---help---
Say Y here if you want to use the built-in 10/100 Fast ethernet
diff --git a/trunk/drivers/net/ethernet/marvell/skge.c b/trunk/drivers/net/ethernet/marvell/skge.c
index dea0cb4400e2..c7b60839ac99 100644
--- a/trunk/drivers/net/ethernet/marvell/skge.c
+++ b/trunk/drivers/net/ethernet/marvell/skge.c
@@ -2606,9 +2606,6 @@ static int skge_up(struct net_device *dev)
spin_unlock_irq(&hw->hw_lock);
napi_enable(&skge->napi);
-
- skge_set_multicast(dev);
-
return 0;
free_tx_ring:
diff --git a/trunk/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/trunk/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 5829e0b47e7e..227997d775e8 100644
--- a/trunk/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/trunk/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -147,7 +147,6 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
if (priv->mdev->dev->caps.comp_pool && cq->vector)
mlx4_release_eq(priv->mdev->dev, cq->vector);
- cq->vector = 0;
cq->buf_size = 0;
cq->buf = NULL;
}
diff --git a/trunk/drivers/net/ethernet/realtek/r8169.c b/trunk/drivers/net/ethernet/realtek/r8169.c
index c8f47f17186f..67bf07819992 100644
--- a/trunk/drivers/net/ethernet/realtek/r8169.c
+++ b/trunk/drivers/net/ethernet/realtek/r8169.c
@@ -477,6 +477,7 @@ enum rtl_register_content {
/* Config1 register p.24 */
LEDS1 = (1 << 7),
LEDS0 = (1 << 6),
+ MSIEnable = (1 << 5), /* Enable Message Signaled Interrupt */
Speed_down = (1 << 4),
MEMMAP = (1 << 3),
IOMAP = (1 << 2),
@@ -484,7 +485,6 @@ enum rtl_register_content {
PMEnable = (1 << 0), /* Power Management Enable */
/* Config2 register p. 25 */
- MSIEnable = (1 << 5), /* 8169 only. Reserved in the 8168. */
PCI_Clock_66MHz = 0x01,
PCI_Clock_33MHz = 0x00,
@@ -3426,24 +3426,22 @@ static const struct rtl_cfg_info {
};
/* Cfg9346_Unlock assumed. */
-static unsigned rtl_try_msi(struct rtl8169_private *tp,
+static unsigned rtl_try_msi(struct pci_dev *pdev, void __iomem *ioaddr,
const struct rtl_cfg_info *cfg)
{
- void __iomem *ioaddr = tp->mmio_addr;
unsigned msi = 0;
u8 cfg2;
cfg2 = RTL_R8(Config2) & ~MSIEnable;
if (cfg->features & RTL_FEATURE_MSI) {
- if (pci_enable_msi(tp->pci_dev)) {
- netif_info(tp, hw, tp->dev, "no MSI. Back to INTx.\n");
+ if (pci_enable_msi(pdev)) {
+ dev_info(&pdev->dev, "no MSI. Back to INTx.\n");
} else {
cfg2 |= MSIEnable;
msi = RTL_FEATURE_MSI;
}
}
- if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
- RTL_W8(Config2, cfg2);
+ RTL_W8(Config2, cfg2);
return msi;
}
@@ -4079,7 +4077,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
tp->features |= RTL_FEATURE_WOL;
if ((RTL_R8(Config5) & (UWF | BWF | MWF)) != 0)
tp->features |= RTL_FEATURE_WOL;
- tp->features |= rtl_try_msi(tp, cfg);
+ tp->features |= rtl_try_msi(pdev, ioaddr, cfg);
RTL_W8(Cfg9346, Cfg9346_Lock);
if (rtl_tbi_enabled(tp)) {
diff --git a/trunk/drivers/net/ethernet/ti/davinci_cpdma.c b/trunk/drivers/net/ethernet/ti/davinci_cpdma.c
index c97d2f590855..dca9d3369cdd 100644
--- a/trunk/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/trunk/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -836,13 +836,11 @@ int cpdma_chan_stop(struct cpdma_chan *chan)
chan_write(chan, cp, CPDMA_TEARDOWN_VALUE);
/* handle completed packets */
- spin_unlock_irqrestore(&chan->lock, flags);
do {
ret = __cpdma_chan_process(chan);
if (ret < 0)
break;
} while ((ret & CPDMA_DESC_TD_COMPLETE) == 0);
- spin_lock_irqsave(&chan->lock, flags);
/* remaining packets haven't been tx/rx'ed, clean them up */
while (chan->head) {
diff --git a/trunk/drivers/net/usb/asix.c b/trunk/drivers/net/usb/asix.c
index e95f0e60a9bc..e6fed4d4cb77 100644
--- a/trunk/drivers/net/usb/asix.c
+++ b/trunk/drivers/net/usb/asix.c
@@ -1655,10 +1655,6 @@ static const struct usb_device_id products [] = {
// ASIX 88772a
USB_DEVICE(0x0db0, 0xa877),
.driver_info = (unsigned long) &ax88772_info,
-}, {
- // Asus USB Ethernet Adapter
- USB_DEVICE (0x0b95, 0x7e2b),
- .driver_info = (unsigned long) &ax88772_info,
},
{ }, // END
};
diff --git a/trunk/drivers/net/wireless/ath/ath9k/main.c b/trunk/drivers/net/wireless/ath/ath9k/main.c
index a9c5ae75277e..d2348a5a7809 100644
--- a/trunk/drivers/net/wireless/ath/ath9k/main.c
+++ b/trunk/drivers/net/wireless/ath/ath9k/main.c
@@ -1843,9 +1843,6 @@ static void ath9k_sta_notify(struct ieee80211_hw *hw,
struct ath_softc *sc = hw->priv;
struct ath_node *an = (struct ath_node *) sta->drv_priv;
- if (!(sc->sc_flags & SC_OP_TXAGGR))
- return;
-
switch (cmd) {
case STA_NOTIFY_SLEEP:
an->sleeping = true;
diff --git a/trunk/drivers/net/wireless/ath/ath9k/rc.c b/trunk/drivers/net/wireless/ath/ath9k/rc.c
index 528d5f3e868c..888abc2be3a5 100644
--- a/trunk/drivers/net/wireless/ath/ath9k/rc.c
+++ b/trunk/drivers/net/wireless/ath/ath9k/rc.c
@@ -1271,9 +1271,7 @@ static void ath_rc_init(struct ath_softc *sc,
ath_rc_priv->max_valid_rate = k;
ath_rc_sort_validrates(rate_table, ath_rc_priv);
- ath_rc_priv->rate_max_phy = (k > 4) ?
- ath_rc_priv->valid_rate_index[k-4] :
- ath_rc_priv->valid_rate_index[k-1];
+ ath_rc_priv->rate_max_phy = ath_rc_priv->valid_rate_index[k-4];
ath_rc_priv->rate_table = rate_table;
ath_dbg(common, ATH_DBG_CONFIG,
diff --git a/trunk/drivers/net/wireless/b43/pio.c b/trunk/drivers/net/wireless/b43/pio.c
index 279a53eae4c5..fcff923b3c18 100644
--- a/trunk/drivers/net/wireless/b43/pio.c
+++ b/trunk/drivers/net/wireless/b43/pio.c
@@ -617,19 +617,9 @@ static bool pio_rx_frame(struct b43_pio_rxqueue *q)
const char *err_msg = NULL;
struct b43_rxhdr_fw4 *rxhdr =
(struct b43_rxhdr_fw4 *)wl->pio_scratchspace;
- size_t rxhdr_size = sizeof(*rxhdr);
BUILD_BUG_ON(sizeof(wl->pio_scratchspace) < sizeof(*rxhdr));
- switch (dev->fw.hdr_format) {
- case B43_FW_HDR_410:
- case B43_FW_HDR_351:
- rxhdr_size -= sizeof(rxhdr->format_598) -
- sizeof(rxhdr->format_351);
- break;
- case B43_FW_HDR_598:
- break;
- }
- memset(rxhdr, 0, rxhdr_size);
+ memset(rxhdr, 0, sizeof(*rxhdr));
/* Check if we have data and wait for it to get ready. */
if (q->rev >= 8) {
@@ -667,11 +657,11 @@ static bool pio_rx_frame(struct b43_pio_rxqueue *q)
/* Get the preamble (RX header) */
if (q->rev >= 8) {
- b43_block_read(dev, rxhdr, rxhdr_size,
+ b43_block_read(dev, rxhdr, sizeof(*rxhdr),
q->mmio_base + B43_PIO8_RXDATA,
sizeof(u32));
} else {
- b43_block_read(dev, rxhdr, rxhdr_size,
+ b43_block_read(dev, rxhdr, sizeof(*rxhdr),
q->mmio_base + B43_PIO_RXDATA,
sizeof(u16));
}
diff --git a/trunk/drivers/net/wireless/iwlwifi/iwl-agn-rxon.c b/trunk/drivers/net/wireless/iwlwifi/iwl-agn-rxon.c
index 5c7c17c7166a..a7a6def40d05 100644
--- a/trunk/drivers/net/wireless/iwlwifi/iwl-agn-rxon.c
+++ b/trunk/drivers/net/wireless/iwlwifi/iwl-agn-rxon.c
@@ -606,8 +606,8 @@ int iwlagn_mac_config(struct ieee80211_hw *hw, u32 changed)
if (ctx->ht.enabled) {
/* if HT40 is used, it should not change
* after associated except channel switch */
- if (!ctx->ht.is_40mhz ||
- !iwl_is_associated_ctx(ctx))
+ if (iwl_is_associated_ctx(ctx) &&
+ !ctx->ht.is_40mhz)
iwlagn_config_ht40(conf, ctx);
} else
ctx->ht.is_40mhz = false;
diff --git a/trunk/drivers/net/wireless/iwlwifi/iwl-agn-tx.c b/trunk/drivers/net/wireless/iwlwifi/iwl-agn-tx.c
index df1540ca6102..35a6b71f358c 100644
--- a/trunk/drivers/net/wireless/iwlwifi/iwl-agn-tx.c
+++ b/trunk/drivers/net/wireless/iwlwifi/iwl-agn-tx.c
@@ -91,10 +91,7 @@ static void iwlagn_tx_cmd_build_basic(struct iwl_priv *priv,
tx_cmd->tid_tspec = qc[0] & 0xf;
tx_flags &= ~TX_CMD_FLG_SEQ_CTL_MSK;
} else {
- if (info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ)
- tx_flags |= TX_CMD_FLG_SEQ_CTL_MSK;
- else
- tx_flags &= ~TX_CMD_FLG_SEQ_CTL_MSK;
+ tx_flags |= TX_CMD_FLG_SEQ_CTL_MSK;
}
iwlagn_tx_cmd_protection(priv, info, fc, &tx_flags);
diff --git a/trunk/drivers/net/wireless/iwlwifi/iwl-agn.c b/trunk/drivers/net/wireless/iwlwifi/iwl-agn.c
index e0e9a3dfbc00..bacc06c95e7a 100644
--- a/trunk/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/trunk/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -2850,9 +2850,6 @@ static int iwlagn_mac_tx_sync(struct ieee80211_hw *hw,
int ret;
u8 sta_id;
- if (ctx->ctxid != IWL_RXON_CTX_PAN)
- return 0;
-
IWL_DEBUG_MAC80211(priv, "enter\n");
mutex_lock(&priv->shrd->mutex);
@@ -2901,9 +2898,6 @@ static void iwlagn_mac_finish_tx_sync(struct ieee80211_hw *hw,
struct iwl_vif_priv *vif_priv = (void *)vif->drv_priv;
struct iwl_rxon_context *ctx = vif_priv->ctx;
- if (ctx->ctxid != IWL_RXON_CTX_PAN)
- return;
-
IWL_DEBUG_MAC80211(priv, "enter\n");
mutex_lock(&priv->shrd->mutex);
diff --git a/trunk/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c b/trunk/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c
index 5f17ab8e76ba..ce918980e977 100644
--- a/trunk/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c
+++ b/trunk/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c
@@ -1197,7 +1197,9 @@ static int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
iwl_print_hex_dump(trans, IWL_DL_TX, (u8 *)tx_cmd->hdr, hdr_len);
/* Set up entry for this TFD in Tx byte-count array */
- iwl_trans_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len));
+ if (is_agg)
+ iwl_trans_txq_update_byte_cnt_tbl(trans, txq,
+ le16_to_cpu(tx_cmd->len));
dma_sync_single_for_device(bus(trans)->dev, txcmd_phys, firstlen,
DMA_BIDIRECTIONAL);
diff --git a/trunk/drivers/net/wireless/mwifiex/cmdevt.c b/trunk/drivers/net/wireless/mwifiex/cmdevt.c
index 6e0a3eaecf70..ac278156d390 100644
--- a/trunk/drivers/net/wireless/mwifiex/cmdevt.c
+++ b/trunk/drivers/net/wireless/mwifiex/cmdevt.c
@@ -939,6 +939,7 @@ mwifiex_cancel_pending_ioctl(struct mwifiex_adapter *adapter)
{
struct cmd_ctrl_node *cmd_node = NULL, *tmp_node = NULL;
unsigned long cmd_flags;
+ unsigned long cmd_pending_q_flags;
unsigned long scan_pending_q_flags;
uint16_t cancel_scan_cmd = false;
@@ -948,9 +949,12 @@ mwifiex_cancel_pending_ioctl(struct mwifiex_adapter *adapter)
cmd_node = adapter->curr_cmd;
cmd_node->wait_q_enabled = false;
cmd_node->cmd_flag |= CMD_F_CANCELED;
+ spin_lock_irqsave(&adapter->cmd_pending_q_lock,
+ cmd_pending_q_flags);
+ list_del(&cmd_node->list);
+ spin_unlock_irqrestore(&adapter->cmd_pending_q_lock,
+ cmd_pending_q_flags);
mwifiex_insert_cmd_to_free_q(adapter, cmd_node);
- mwifiex_complete_cmd(adapter, adapter->curr_cmd);
- adapter->curr_cmd = NULL;
spin_unlock_irqrestore(&adapter->mwifiex_cmd_lock, cmd_flags);
}
@@ -977,6 +981,7 @@ mwifiex_cancel_pending_ioctl(struct mwifiex_adapter *adapter)
spin_unlock_irqrestore(&adapter->mwifiex_cmd_lock, cmd_flags);
}
adapter->cmd_wait_q.status = -1;
+ mwifiex_complete_cmd(adapter, adapter->curr_cmd);
}
/*
diff --git a/trunk/drivers/net/wireless/mwifiex/sta_ioctl.c b/trunk/drivers/net/wireless/mwifiex/sta_ioctl.c
index 1679c2593b7b..ea4a29b7e331 100644
--- a/trunk/drivers/net/wireless/mwifiex/sta_ioctl.c
+++ b/trunk/drivers/net/wireless/mwifiex/sta_ioctl.c
@@ -55,14 +55,9 @@ int mwifiex_wait_queue_complete(struct mwifiex_adapter *adapter)
{
bool cancel_flag = false;
int status = adapter->cmd_wait_q.status;
- struct cmd_ctrl_node *cmd_queued;
+ struct cmd_ctrl_node *cmd_queued = adapter->cmd_queued;
- if (!adapter->cmd_queued)
- return 0;
-
- cmd_queued = adapter->cmd_queued;
adapter->cmd_queued = NULL;
-
dev_dbg(adapter->dev, "cmd pending\n");
atomic_inc(&adapter->cmd_pending);
diff --git a/trunk/drivers/of/platform.c b/trunk/drivers/of/platform.c
index 63b3ec48c203..cbd5d701c7e0 100644
--- a/trunk/drivers/of/platform.c
+++ b/trunk/drivers/of/platform.c
@@ -314,7 +314,7 @@ static const struct of_dev_auxdata *of_dev_lookup(const struct of_dev_auxdata *l
if (!lookup)
return NULL;
- for(; lookup->compatible != NULL; lookup++) {
+ for(; lookup->name != NULL; lookup++) {
if (!of_device_is_compatible(np, lookup->compatible))
continue;
if (of_address_to_resource(np, 0, &res))
diff --git a/trunk/drivers/oprofile/nmi_timer_int.c b/trunk/drivers/oprofile/nmi_timer_int.c
deleted file mode 100644
index 76f1c9357f39..000000000000
--- a/trunk/drivers/oprofile/nmi_timer_int.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/**
- * @file nmi_timer_int.c
- *
- * @remark Copyright 2011 Advanced Micro Devices, Inc.
- *
- * @author Robert Richter
- */
-
-#include
-#include
-#include
-#include
-#include
-
-#ifdef CONFIG_OPROFILE_NMI_TIMER
-
-static DEFINE_PER_CPU(struct perf_event *, nmi_timer_events);
-static int ctr_running;
-
-static struct perf_event_attr nmi_timer_attr = {
- .type = PERF_TYPE_HARDWARE,
- .config = PERF_COUNT_HW_CPU_CYCLES,
- .size = sizeof(struct perf_event_attr),
- .pinned = 1,
- .disabled = 1,
-};
-
-static void nmi_timer_callback(struct perf_event *event,
- struct perf_sample_data *data,
- struct pt_regs *regs)
-{
- event->hw.interrupts = 0; /* don't throttle interrupts */
- oprofile_add_sample(regs, 0);
-}
-
-static int nmi_timer_start_cpu(int cpu)
-{
- struct perf_event *event = per_cpu(nmi_timer_events, cpu);
-
- if (!event) {
- event = perf_event_create_kernel_counter(&nmi_timer_attr, cpu, NULL,
- nmi_timer_callback, NULL);
- if (IS_ERR(event))
- return PTR_ERR(event);
- per_cpu(nmi_timer_events, cpu) = event;
- }
-
- if (event && ctr_running)
- perf_event_enable(event);
-
- return 0;
-}
-
-static void nmi_timer_stop_cpu(int cpu)
-{
- struct perf_event *event = per_cpu(nmi_timer_events, cpu);
-
- if (event && ctr_running)
- perf_event_disable(event);
-}
-
-static int nmi_timer_cpu_notifier(struct notifier_block *b, unsigned long action,
- void *data)
-{
- int cpu = (unsigned long)data;
- switch (action) {
- case CPU_DOWN_FAILED:
- case CPU_ONLINE:
- nmi_timer_start_cpu(cpu);
- break;
- case CPU_DOWN_PREPARE:
- nmi_timer_stop_cpu(cpu);
- break;
- }
- return NOTIFY_DONE;
-}
-
-static struct notifier_block nmi_timer_cpu_nb = {
- .notifier_call = nmi_timer_cpu_notifier
-};
-
-static int nmi_timer_start(void)
-{
- int cpu;
-
- get_online_cpus();
- ctr_running = 1;
- for_each_online_cpu(cpu)
- nmi_timer_start_cpu(cpu);
- put_online_cpus();
-
- return 0;
-}
-
-static void nmi_timer_stop(void)
-{
- int cpu;
-
- get_online_cpus();
- for_each_online_cpu(cpu)
- nmi_timer_stop_cpu(cpu);
- ctr_running = 0;
- put_online_cpus();
-}
-
-static void nmi_timer_shutdown(void)
-{
- struct perf_event *event;
- int cpu;
-
- get_online_cpus();
- unregister_cpu_notifier(&nmi_timer_cpu_nb);
- for_each_possible_cpu(cpu) {
- event = per_cpu(nmi_timer_events, cpu);
- if (!event)
- continue;
- perf_event_disable(event);
- per_cpu(nmi_timer_events, cpu) = NULL;
- perf_event_release_kernel(event);
- }
-
- put_online_cpus();
-}
-
-static int nmi_timer_setup(void)
-{
- int cpu, err;
- u64 period;
-
- /* clock cycles per tick: */
- period = (u64)cpu_khz * 1000;
- do_div(period, HZ);
- nmi_timer_attr.sample_period = period;
-
- get_online_cpus();
- err = register_cpu_notifier(&nmi_timer_cpu_nb);
- if (err)
- goto out;
- /* can't attach events to offline cpus: */
- for_each_online_cpu(cpu) {
- err = nmi_timer_start_cpu(cpu);
- if (err)
- break;
- }
- if (err)
- nmi_timer_shutdown();
-out:
- put_online_cpus();
- return err;
-}
-
-int __init op_nmi_timer_init(struct oprofile_operations *ops)
-{
- int err = 0;
-
- err = nmi_timer_setup();
- if (err)
- return err;
- nmi_timer_shutdown(); /* only check, don't alloc */
-
- ops->create_files = NULL;
- ops->setup = nmi_timer_setup;
- ops->shutdown = nmi_timer_shutdown;
- ops->start = nmi_timer_start;
- ops->stop = nmi_timer_stop;
- ops->cpu_type = "timer";
-
- printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
-
- return 0;
-}
-
-#endif
diff --git a/trunk/drivers/oprofile/oprof.c b/trunk/drivers/oprofile/oprof.c
index ed2c3ec07024..f8c752e408a6 100644
--- a/trunk/drivers/oprofile/oprof.c
+++ b/trunk/drivers/oprofile/oprof.c
@@ -246,31 +246,37 @@ static int __init oprofile_init(void)
int err;
/* always init architecture to setup backtrace support */
- timer_mode = 0;
err = oprofile_arch_init(&oprofile_ops);
- if (!err) {
- if (!timer && !oprofilefs_register())
- return 0;
- oprofile_arch_exit();
- }
- /* setup timer mode: */
- timer_mode = 1;
- /* no nmi timer mode if oprofile.timer is set */
- if (timer || op_nmi_timer_init(&oprofile_ops)) {
+ timer_mode = err || timer; /* fall back to timer mode on errors */
+ if (timer_mode) {
+ if (!err)
+ oprofile_arch_exit();
err = oprofile_timer_init(&oprofile_ops);
if (err)
return err;
}
- return oprofilefs_register();
+ err = oprofilefs_register();
+ if (!err)
+ return 0;
+
+ /* failed */
+ if (timer_mode)
+ oprofile_timer_exit();
+ else
+ oprofile_arch_exit();
+
+ return err;
}
static void __exit oprofile_exit(void)
{
oprofilefs_unregister();
- if (!timer_mode)
+ if (timer_mode)
+ oprofile_timer_exit();
+ else
oprofile_arch_exit();
}
diff --git a/trunk/drivers/oprofile/oprof.h b/trunk/drivers/oprofile/oprof.h
index d32ef816337c..177b73de5e5f 100644
--- a/trunk/drivers/oprofile/oprof.h
+++ b/trunk/drivers/oprofile/oprof.h
@@ -35,15 +35,7 @@ struct dentry;
void oprofile_create_files(struct super_block *sb, struct dentry *root);
int oprofile_timer_init(struct oprofile_operations *ops);
-#ifdef CONFIG_OPROFILE_NMI_TIMER
-int op_nmi_timer_init(struct oprofile_operations *ops);
-#else
-static inline int op_nmi_timer_init(struct oprofile_operations *ops)
-{
- return -ENODEV;
-}
-#endif
-
+void oprofile_timer_exit(void);
int oprofile_set_ulong(unsigned long *addr, unsigned long val);
int oprofile_set_timeout(unsigned long time);
diff --git a/trunk/drivers/oprofile/timer_int.c b/trunk/drivers/oprofile/timer_int.c
index 93404f72dfa8..878fba126582 100644
--- a/trunk/drivers/oprofile/timer_int.c
+++ b/trunk/drivers/oprofile/timer_int.c
@@ -97,24 +97,24 @@ static struct notifier_block __refdata oprofile_cpu_notifier = {
.notifier_call = oprofile_cpu_notify,
};
-static int oprofile_hrtimer_setup(void)
+int oprofile_timer_init(struct oprofile_operations *ops)
{
- return register_hotcpu_notifier(&oprofile_cpu_notifier);
+ int rc;
+
+ rc = register_hotcpu_notifier(&oprofile_cpu_notifier);
+ if (rc)
+ return rc;
+ ops->create_files = NULL;
+ ops->setup = NULL;
+ ops->shutdown = NULL;
+ ops->start = oprofile_hrtimer_start;
+ ops->stop = oprofile_hrtimer_stop;
+ ops->cpu_type = "timer";
+ printk(KERN_INFO "oprofile: using timer interrupt.\n");
+ return 0;
}
-static void oprofile_hrtimer_shutdown(void)
+void oprofile_timer_exit(void)
{
unregister_hotcpu_notifier(&oprofile_cpu_notifier);
}
-
-int oprofile_timer_init(struct oprofile_operations *ops)
-{
- ops->create_files = NULL;
- ops->setup = oprofile_hrtimer_setup;
- ops->shutdown = oprofile_hrtimer_shutdown;
- ops->start = oprofile_hrtimer_start;
- ops->stop = oprofile_hrtimer_stop;
- ops->cpu_type = "timer";
- printk(KERN_INFO "oprofile: using timer interrupt.\n");
- return 0;
-}
diff --git a/trunk/drivers/pci/Kconfig b/trunk/drivers/pci/Kconfig
index 37856f7c7781..f02b5235056d 100644
--- a/trunk/drivers/pci/Kconfig
+++ b/trunk/drivers/pci/Kconfig
@@ -98,11 +98,11 @@ config PCI_PASID
If unsure, say N.
config PCI_IOAPIC
- tristate "PCI IO-APIC hotplug support" if X86
+ bool
depends on PCI
depends on ACPI
depends on HOTPLUG
- default !X86
+ default y
config PCI_LABEL
def_bool y if (DMI || ACPI)
diff --git a/trunk/drivers/pci/ioapic.c b/trunk/drivers/pci/ioapic.c
index 205af8dc83c2..5775638ac017 100644
--- a/trunk/drivers/pci/ioapic.c
+++ b/trunk/drivers/pci/ioapic.c
@@ -17,7 +17,7 @@
*/
#include
-#include
+#include
#include
#include
#include
@@ -27,7 +27,7 @@ struct ioapic {
u32 gsi_base;
};
-static int __devinit ioapic_probe(struct pci_dev *dev, const struct pci_device_id *ent)
+static int ioapic_probe(struct pci_dev *dev, const struct pci_device_id *ent)
{
acpi_handle handle;
acpi_status status;
@@ -88,7 +88,7 @@ static int __devinit ioapic_probe(struct pci_dev *dev, const struct pci_device_i
return -ENODEV;
}
-static void __devexit ioapic_remove(struct pci_dev *dev)
+static void ioapic_remove(struct pci_dev *dev)
{
struct ioapic *ioapic = pci_get_drvdata(dev);
@@ -99,12 +99,13 @@ static void __devexit ioapic_remove(struct pci_dev *dev)
}
-static DEFINE_PCI_DEVICE_TABLE(ioapic_devices) = {
- { PCI_DEVICE_CLASS(PCI_CLASS_SYSTEM_PIC_IOAPIC, ~0) },
- { PCI_DEVICE_CLASS(PCI_CLASS_SYSTEM_PIC_IOXAPIC, ~0) },
+static struct pci_device_id ioapic_devices[] = {
+ { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_SYSTEM_PIC_IOAPIC << 8, 0xffff00, },
+ { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_SYSTEM_PIC_IOXAPIC << 8, 0xffff00, },
{ }
};
-MODULE_DEVICE_TABLE(pci, ioapic_devices);
static struct pci_driver ioapic_driver = {
.name = "ioapic",
diff --git a/trunk/drivers/rtc/interface.c b/trunk/drivers/rtc/interface.c
index 8e286259a007..3bcc7cfcaba7 100644
--- a/trunk/drivers/rtc/interface.c
+++ b/trunk/drivers/rtc/interface.c
@@ -73,6 +73,8 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
err = -EINVAL;
mutex_unlock(&rtc->ops_lock);
+ /* A timer might have just expired */
+ schedule_work(&rtc->irqwork);
return err;
}
EXPORT_SYMBOL_GPL(rtc_set_time);
@@ -112,6 +114,8 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs)
err = -EINVAL;
mutex_unlock(&rtc->ops_lock);
+ /* A timer might have just expired */
+ schedule_work(&rtc->irqwork);
return err;
}
@@ -319,6 +323,20 @@ int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
}
EXPORT_SYMBOL_GPL(rtc_read_alarm);
+static int ___rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
+{
+ int err;
+
+ if (!rtc->ops)
+ err = -ENODEV;
+ else if (!rtc->ops->set_alarm)
+ err = -EINVAL;
+ else
+ err = rtc->ops->set_alarm(rtc->dev.parent, alarm);
+
+ return err;
+}
+
static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
{
struct rtc_time tm;
@@ -342,14 +360,7 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
* over right here, before we set the alarm.
*/
- if (!rtc->ops)
- err = -ENODEV;
- else if (!rtc->ops->set_alarm)
- err = -EINVAL;
- else
- err = rtc->ops->set_alarm(rtc->dev.parent, alarm);
-
- return err;
+ return ___rtc_set_alarm(rtc, alarm);
}
int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
@@ -396,6 +407,8 @@ int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
timerqueue_add(&rtc->timerqueue, &rtc->aie_timer.node);
}
mutex_unlock(&rtc->ops_lock);
+ /* maybe that was in the past.*/
+ schedule_work(&rtc->irqwork);
return err;
}
EXPORT_SYMBOL_GPL(rtc_initialize_alarm);
@@ -763,6 +776,20 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
return 0;
}
+static void rtc_alarm_disable(struct rtc_device *rtc)
+{
+ struct rtc_wkalrm alarm;
+ struct rtc_time tm;
+
+ __rtc_read_time(rtc, &tm);
+
+ alarm.time = rtc_ktime_to_tm(ktime_add(rtc_tm_to_ktime(tm),
+ ktime_set(300, 0)));
+ alarm.enabled = 0;
+
+ ___rtc_set_alarm(rtc, &alarm);
+}
+
/**
* rtc_timer_remove - Removes a rtc_timer from the rtc_device timerqueue
* @rtc rtc device
@@ -784,8 +811,10 @@ static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer)
struct rtc_wkalrm alarm;
int err;
next = timerqueue_getnext(&rtc->timerqueue);
- if (!next)
+ if (!next) {
+ rtc_alarm_disable(rtc);
return;
+ }
alarm.time = rtc_ktime_to_tm(next->expires);
alarm.enabled = 1;
err = __rtc_set_alarm(rtc, &alarm);
@@ -847,7 +876,8 @@ void rtc_timer_do_work(struct work_struct *work)
err = __rtc_set_alarm(rtc, &alarm);
if (err == -ETIME)
goto again;
- }
+ } else
+ rtc_alarm_disable(rtc);
mutex_unlock(&rtc->ops_lock);
}
diff --git a/trunk/drivers/usb/dwc3/core.c b/trunk/drivers/usb/dwc3/core.c
index 600d82348511..717ebc9ff941 100644
--- a/trunk/drivers/usb/dwc3/core.c
+++ b/trunk/drivers/usb/dwc3/core.c
@@ -264,7 +264,7 @@ static int __devinit dwc3_core_init(struct dwc3 *dwc)
ret = -ENODEV;
goto err0;
}
- dwc->revision = reg;
+ dwc->revision = reg & DWC3_GSNPSREV_MASK;
dwc3_core_soft_reset(dwc);
diff --git a/trunk/drivers/usb/gadget/epautoconf.c b/trunk/drivers/usb/gadget/epautoconf.c
index 4dff83d2f265..596a0b464e61 100644
--- a/trunk/drivers/usb/gadget/epautoconf.c
+++ b/trunk/drivers/usb/gadget/epautoconf.c
@@ -130,6 +130,9 @@ ep_matches (
num_req_streams = ep_comp->bmAttributes & 0x1f;
if (num_req_streams > ep->max_streams)
return 0;
+ /* Update the ep_comp descriptor if needed */
+ if (num_req_streams != ep->max_streams)
+ ep_comp->bmAttributes = ep->max_streams;
}
}
diff --git a/trunk/drivers/usb/host/isp1760-if.c b/trunk/drivers/usb/host/isp1760-if.c
index 2ac4ac2e4ef9..a7dc1e1d45f2 100644
--- a/trunk/drivers/usb/host/isp1760-if.c
+++ b/trunk/drivers/usb/host/isp1760-if.c
@@ -18,7 +18,7 @@
#include "isp1760-hcd.h"
-#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ)
+#ifdef CONFIG_OF
#include
#include
#include
@@ -31,7 +31,7 @@
#include
#endif
-#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ)
+#ifdef CONFIG_OF
struct isp1760 {
struct usb_hcd *hcd;
int rst_gpio;
@@ -437,7 +437,7 @@ static int __init isp1760_init(void)
ret = platform_driver_register(&isp1760_plat_driver);
if (!ret)
any_ret = 0;
-#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ)
+#ifdef CONFIG_OF
ret = platform_driver_register(&isp1760_of_driver);
if (!ret)
any_ret = 0;
@@ -457,7 +457,7 @@ module_init(isp1760_init);
static void __exit isp1760_exit(void)
{
platform_driver_unregister(&isp1760_plat_driver);
-#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ)
+#ifdef CONFIG_OF
platform_driver_unregister(&isp1760_of_driver);
#endif
#ifdef CONFIG_PCI
diff --git a/trunk/drivers/usb/musb/musb_host.c b/trunk/drivers/usb/musb/musb_host.c
index 79cb0af779fa..60ddba8066ea 100644
--- a/trunk/drivers/usb/musb/musb_host.c
+++ b/trunk/drivers/usb/musb/musb_host.c
@@ -774,10 +774,6 @@ static void musb_ep_program(struct musb *musb, u8 epnum,
if (musb->double_buffer_not_ok)
musb_writew(epio, MUSB_TXMAXP,
hw_ep->max_packet_sz_tx);
- else if (can_bulk_split(musb, qh->type))
- musb_writew(epio, MUSB_TXMAXP, packet_sz
- | ((hw_ep->max_packet_sz_tx /
- packet_sz) - 1) << 11);
else
musb_writew(epio, MUSB_TXMAXP,
qh->maxpacket |
diff --git a/trunk/drivers/watchdog/coh901327_wdt.c b/trunk/drivers/watchdog/coh901327_wdt.c
index 5b89f7d6cd0f..03f449a430d2 100644
--- a/trunk/drivers/watchdog/coh901327_wdt.c
+++ b/trunk/drivers/watchdog/coh901327_wdt.c
@@ -76,6 +76,8 @@ static int irq;
static void __iomem *virtbase;
static unsigned long coh901327_users;
static unsigned long boot_status;
+static u16 wdogenablestore;
+static u16 irqmaskstore;
static struct device *parent;
/*
@@ -459,10 +461,6 @@ static int __init coh901327_probe(struct platform_device *pdev)
}
#ifdef CONFIG_PM
-
-static u16 wdogenablestore;
-static u16 irqmaskstore;
-
static int coh901327_suspend(struct platform_device *pdev, pm_message_t state)
{
irqmaskstore = readw(virtbase + U300_WDOG_IMR) & 0x0001U;
diff --git a/trunk/drivers/watchdog/hpwdt.c b/trunk/drivers/watchdog/hpwdt.c
index 8464ea1c36a1..3774c9b8dac9 100644
--- a/trunk/drivers/watchdog/hpwdt.c
+++ b/trunk/drivers/watchdog/hpwdt.c
@@ -231,7 +231,6 @@ static int __devinit cru_detect(unsigned long map_entry,
cmn_regs.u1.reax = CRU_BIOS_SIGNATURE_VALUE;
- set_memory_x((unsigned long)bios32_entrypoint, (2 * PAGE_SIZE));
asminline_call(&cmn_regs, bios32_entrypoint);
if (cmn_regs.u1.ral != 0) {
@@ -249,10 +248,8 @@ static int __devinit cru_detect(unsigned long map_entry,
if ((physical_bios_base + physical_bios_offset)) {
cru_rom_addr =
ioremap(cru_physical_address, cru_length);
- if (cru_rom_addr) {
- set_memory_x((unsigned long)cru_rom_addr, cru_length);
+ if (cru_rom_addr)
retval = 0;
- }
}
printk(KERN_DEBUG "hpwdt: CRU Base Address: 0x%lx\n",
diff --git a/trunk/drivers/watchdog/iTCO_wdt.c b/trunk/drivers/watchdog/iTCO_wdt.c
index 99796c5d913d..ba6ad662635a 100644
--- a/trunk/drivers/watchdog/iTCO_wdt.c
+++ b/trunk/drivers/watchdog/iTCO_wdt.c
@@ -384,10 +384,10 @@ MODULE_PARM_DESC(nowayout,
"Watchdog cannot be stopped once started (default="
__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-static int turn_SMI_watchdog_clear_off = 1;
+static int turn_SMI_watchdog_clear_off = 0;
module_param(turn_SMI_watchdog_clear_off, int, 0);
MODULE_PARM_DESC(turn_SMI_watchdog_clear_off,
- "Turn off SMI clearing watchdog (depends on TCO-version)(default=1)");
+ "Turn off SMI clearing watchdog (default=0)");
/*
* Some TCO specific functions
@@ -813,7 +813,7 @@ static int __devinit iTCO_wdt_init(struct pci_dev *pdev,
ret = -EIO;
goto out_unmap;
}
- if (turn_SMI_watchdog_clear_off >= iTCO_wdt_private.iTCO_version) {
+ if (turn_SMI_watchdog_clear_off) {
/* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# */
val32 = inl(SMI_EN);
val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */
diff --git a/trunk/drivers/watchdog/sp805_wdt.c b/trunk/drivers/watchdog/sp805_wdt.c
index bfaf9bb1ee0d..cc2cfbe33b30 100644
--- a/trunk/drivers/watchdog/sp805_wdt.c
+++ b/trunk/drivers/watchdog/sp805_wdt.c
@@ -351,7 +351,7 @@ static int __devexit sp805_wdt_remove(struct amba_device *adev)
return 0;
}
-static struct amba_id sp805_wdt_ids[] = {
+static struct amba_id sp805_wdt_ids[] __initdata = {
{
.id = 0x00141805,
.mask = 0x00ffffff,
diff --git a/trunk/firmware/README.AddingFirmware b/trunk/firmware/README.AddingFirmware
index ea78c3a17eec..e24cd8986d8b 100644
--- a/trunk/firmware/README.AddingFirmware
+++ b/trunk/firmware/README.AddingFirmware
@@ -12,7 +12,7 @@ here.
This directory is _NOT_ for adding arbitrary new firmware images. The
place to add those is the separate linux-firmware repository:
- git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git
+ git://git.kernel.org/pub/scm/linux/kernel/git/dwmw2/linux-firmware.git
That repository contains all these firmware images which have been
extracted from older drivers, as well various new firmware images which
@@ -22,7 +22,6 @@ been permitted to redistribute under separate cover.
To submit firmware to that repository, please send either a git binary
diff or preferably a git pull request to:
David Woodhouse
- Ben Hutchings
Your commit should include an update to the WHENCE file clearly
identifying the licence under which the firmware is available, and
diff --git a/trunk/fs/btrfs/async-thread.c b/trunk/fs/btrfs/async-thread.c
index 0b394580d860..cb97174e2366 100644
--- a/trunk/fs/btrfs/async-thread.c
+++ b/trunk/fs/btrfs/async-thread.c
@@ -563,8 +563,8 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
struct list_head *fallback;
int ret;
- spin_lock_irqsave(&workers->lock, flags);
again:
+ spin_lock_irqsave(&workers->lock, flags);
worker = next_worker(workers);
if (!worker) {
@@ -579,7 +579,6 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
spin_unlock_irqrestore(&workers->lock, flags);
/* we're below the limit, start another worker */
ret = __btrfs_start_workers(workers);
- spin_lock_irqsave(&workers->lock, flags);
if (ret)
goto fallback;
goto again;
diff --git a/trunk/fs/btrfs/inode.c b/trunk/fs/btrfs/inode.c
index fd1a06df5bc6..0a6b928813a4 100644
--- a/trunk/fs/btrfs/inode.c
+++ b/trunk/fs/btrfs/inode.c
@@ -4590,6 +4590,10 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
int err = btrfs_add_link(trans, dir, inode,
dentry->d_name.name, dentry->d_name.len,
backref, index);
+ if (!err) {
+ d_instantiate(dentry, inode);
+ return 0;
+ }
if (err > 0)
err = -EEXIST;
return err;
@@ -4651,7 +4655,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
else {
init_special_inode(inode, inode->i_mode, rdev);
btrfs_update_inode(trans, root, inode);
- d_instantiate(dentry, inode);
}
out_unlock:
nr = trans->blocks_used;
@@ -4719,7 +4722,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
inode->i_mapping->a_ops = &btrfs_aops;
inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
- d_instantiate(dentry, inode);
}
out_unlock:
nr = trans->blocks_used;
@@ -4777,7 +4779,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *parent = dentry->d_parent;
err = btrfs_update_inode(trans, root, inode);
BUG_ON(err);
- d_instantiate(dentry, inode);
btrfs_log_new_name(trans, inode, NULL, parent);
}
@@ -7244,8 +7245,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
drop_inode = 1;
out_unlock:
- if (!err)
- d_instantiate(dentry, inode);
nr = trans->blocks_used;
btrfs_end_transaction_throttle(trans, root);
if (drop_inode) {
diff --git a/trunk/fs/ceph/dir.c b/trunk/fs/ceph/dir.c
index 98954003a8d3..3eeb97661262 100644
--- a/trunk/fs/ceph/dir.c
+++ b/trunk/fs/ceph/dir.c
@@ -1094,19 +1094,42 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry,
/*
* Set/clear/test dir complete flag on the dir's dentry.
*/
+static struct dentry * __d_find_any_alias(struct inode *inode)
+{
+ struct dentry *alias;
+
+ if (list_empty(&inode->i_dentry))
+ return NULL;
+ alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
+ return alias;
+}
+
void ceph_dir_set_complete(struct inode *inode)
{
- /* not yet implemented */
+ struct dentry *dentry = __d_find_any_alias(inode);
+
+ if (dentry && ceph_dentry(dentry)) {
+ dout(" marking %p (%p) complete\n", inode, dentry);
+ set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
+ }
}
void ceph_dir_clear_complete(struct inode *inode)
{
- /* not yet implemented */
+ struct dentry *dentry = __d_find_any_alias(inode);
+
+ if (dentry && ceph_dentry(dentry)) {
+ dout(" marking %p (%p) NOT complete\n", inode, dentry);
+ clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
+ }
}
bool ceph_dir_test_complete(struct inode *inode)
{
- /* not yet implemented */
+ struct dentry *dentry = __d_find_any_alias(inode);
+
+ if (dentry && ceph_dentry(dentry))
+ return test_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
return false;
}
diff --git a/trunk/fs/cifs/connect.c b/trunk/fs/cifs/connect.c
index f3670cf72587..8cd4b52d4217 100644
--- a/trunk/fs/cifs/connect.c
+++ b/trunk/fs/cifs/connect.c
@@ -282,7 +282,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
byte_count = be32_to_cpu(pTargetSMB->smb_buf_length);
byte_count += total_in_buf2;
/* don't allow buffer to overflow */
- if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4)
+ if (byte_count > CIFSMaxBufSize)
return -ENOBUFS;
pTargetSMB->smb_buf_length = cpu_to_be32(byte_count);
@@ -2122,7 +2122,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
warned_on_ntlm = true;
cERROR(1, "default security mechanism requested. The default "
"security mechanism will be upgraded from ntlm to "
- "ntlmv2 in kernel release 3.3");
+ "ntlmv2 in kernel release 3.2");
}
ses->overrideSecFlg = volume_info->secFlg;
diff --git a/trunk/fs/compat_ioctl.c b/trunk/fs/compat_ioctl.c
index a10e428b32b4..51352de88ef1 100644
--- a/trunk/fs/compat_ioctl.c
+++ b/trunk/fs/compat_ioctl.c
@@ -1506,6 +1506,35 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
return -ENOIOCTLCMD;
}
+static void compat_ioctl_error(struct file *filp, unsigned int fd,
+ unsigned int cmd, unsigned long arg)
+{
+ char buf[10];
+ char *fn = "?";
+ char *path;
+
+ /* find the name of the device. */
+ path = (char *)__get_free_page(GFP_KERNEL);
+ if (path) {
+ fn = d_path(&filp->f_path, path, PAGE_SIZE);
+ if (IS_ERR(fn))
+ fn = "?";
+ }
+
+ sprintf(buf,"'%c'", (cmd>>_IOC_TYPESHIFT) & _IOC_TYPEMASK);
+ if (!isprint(buf[1]))
+ sprintf(buf, "%02x", buf[1]);
+ compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
+ "cmd(%08x){t:%s;sz:%u} arg(%08x) on %s\n",
+ current->comm, current->pid,
+ (int)fd, (unsigned int)cmd, buf,
+ (cmd >> _IOC_SIZESHIFT) & _IOC_SIZEMASK,
+ (unsigned int)arg, fn);
+
+ if (path)
+ free_page((unsigned long)path);
+}
+
static int compat_ioctl_check_table(unsigned int xcmd)
{
int i;
@@ -1592,8 +1621,13 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
goto found_handler;
error = do_ioctl_trans(fd, cmd, arg, filp);
- if (error == -ENOIOCTLCMD)
- error = -ENOTTY;
+ if (error == -ENOIOCTLCMD) {
+ static int count;
+
+ if (++count <= 50)
+ compat_ioctl_error(filp, fd, cmd, arg);
+ error = -EINVAL;
+ }
goto out_fput;
diff --git a/trunk/fs/fs-writeback.c b/trunk/fs/fs-writeback.c
index 517f211a3bd4..ac86f8b3e3cb 100644
--- a/trunk/fs/fs-writeback.c
+++ b/trunk/fs/fs-writeback.c
@@ -47,6 +47,17 @@ struct wb_writeback_work {
struct completion *done; /* set if the caller waits */
};
+const char *wb_reason_name[] = {
+ [WB_REASON_BACKGROUND] = "background",
+ [WB_REASON_TRY_TO_FREE_PAGES] = "try_to_free_pages",
+ [WB_REASON_SYNC] = "sync",
+ [WB_REASON_PERIODIC] = "periodic",
+ [WB_REASON_LAPTOP_TIMER] = "laptop_timer",
+ [WB_REASON_FREE_MORE_MEM] = "free_more_memory",
+ [WB_REASON_FS_FREE_SPACE] = "fs_free_space",
+ [WB_REASON_FORKER_THREAD] = "forker_thread"
+};
+
/*
* Include the creation of the trace points after defining the
* wb_writeback_work structure so that the definition remains local to this
diff --git a/trunk/fs/ioctl.c b/trunk/fs/ioctl.c
index 066836e81848..1d9b9fcb2db4 100644
--- a/trunk/fs/ioctl.c
+++ b/trunk/fs/ioctl.c
@@ -42,7 +42,7 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd,
error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
if (error == -ENOIOCTLCMD)
- error = -ENOTTY;
+ error = -EINVAL;
out:
return error;
}
diff --git a/trunk/fs/locks.c b/trunk/fs/locks.c
index 637694bf3a03..3b0d05dcd7c1 100644
--- a/trunk/fs/locks.c
+++ b/trunk/fs/locks.c
@@ -1205,8 +1205,6 @@ int __break_lease(struct inode *inode, unsigned int mode)
int want_write = (mode & O_ACCMODE) != O_RDONLY;
new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
- if (IS_ERR(new_fl))
- return PTR_ERR(new_fl);
lock_flocks();
@@ -1223,6 +1221,12 @@ int __break_lease(struct inode *inode, unsigned int mode)
if (fl->fl_owner == current->files)
i_have_this_lease = 1;
+ if (IS_ERR(new_fl) && !i_have_this_lease
+ && ((mode & O_NONBLOCK) == 0)) {
+ error = PTR_ERR(new_fl);
+ goto out;
+ }
+
break_time = 0;
if (lease_break_time > 0) {
break_time = jiffies + lease_break_time * HZ;
@@ -1280,7 +1284,8 @@ int __break_lease(struct inode *inode, unsigned int mode)
out:
unlock_flocks();
- locks_free_lock(new_fl);
+ if (!IS_ERR(new_fl))
+ locks_free_lock(new_fl);
return error;
}
diff --git a/trunk/fs/minix/inode.c b/trunk/fs/minix/inode.c
index 4d46a6a59070..1d9e33966db0 100644
--- a/trunk/fs/minix/inode.c
+++ b/trunk/fs/minix/inode.c
@@ -263,6 +263,23 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
goto out_no_root;
}
+ ret = -ENOMEM;
+ s->s_root = d_alloc_root(root_inode);
+ if (!s->s_root)
+ goto out_iput;
+
+ if (!(s->s_flags & MS_RDONLY)) {
+ if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
+ ms->s_state &= ~MINIX_VALID_FS;
+ mark_buffer_dirty(bh);
+ }
+ if (!(sbi->s_mount_state & MINIX_VALID_FS))
+ printk("MINIX-fs: mounting unchecked file system, "
+ "running fsck is recommended\n");
+ else if (sbi->s_mount_state & MINIX_ERROR_FS)
+ printk("MINIX-fs: mounting file system with errors, "
+ "running fsck is recommended\n");
+
/* Apparently minix can create filesystems that allocate more blocks for
* the bitmaps than needed. We simply ignore that, but verify it didn't
* create one with not enough blocks and bail out if so.
@@ -283,23 +300,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
goto out_iput;
}
- ret = -ENOMEM;
- s->s_root = d_alloc_root(root_inode);
- if (!s->s_root)
- goto out_iput;
-
- if (!(s->s_flags & MS_RDONLY)) {
- if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
- ms->s_state &= ~MINIX_VALID_FS;
- mark_buffer_dirty(bh);
- }
- if (!(sbi->s_mount_state & MINIX_VALID_FS))
- printk("MINIX-fs: mounting unchecked file system, "
- "running fsck is recommended\n");
- else if (sbi->s_mount_state & MINIX_ERROR_FS)
- printk("MINIX-fs: mounting file system with errors, "
- "running fsck is recommended\n");
-
return 0;
out_iput:
diff --git a/trunk/fs/proc/array.c b/trunk/fs/proc/array.c
index 8c344f037bd0..3a1dafd228d1 100644
--- a/trunk/fs/proc/array.c
+++ b/trunk/fs/proc/array.c
@@ -394,8 +394,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
sigemptyset(&sigign);
sigemptyset(&sigcatch);
- cutime = cstime = utime = stime = 0;
- cgtime = gtime = 0;
+ cutime = cstime = utime = stime = cputime_zero;
+ cgtime = gtime = cputime_zero;
if (lock_task_sighand(task, &flags)) {
struct signal_struct *sig = task->signal;
@@ -423,14 +423,14 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
do {
min_flt += t->min_flt;
maj_flt += t->maj_flt;
- gtime += t->gtime;
+ gtime = cputime_add(gtime, t->gtime);
t = next_thread(t);
} while (t != task);
min_flt += sig->min_flt;
maj_flt += sig->maj_flt;
thread_group_times(task, &utime, &stime);
- gtime += sig->gtime;
+ gtime = cputime_add(gtime, sig->gtime);
}
sid = task_session_nr_ns(task, ns);
diff --git a/trunk/fs/proc/stat.c b/trunk/fs/proc/stat.c
index 121f77cfef76..2a30d67dd6b8 100644
--- a/trunk/fs/proc/stat.c
+++ b/trunk/fs/proc/stat.c
@@ -22,29 +22,31 @@
#define arch_idle_time(cpu) 0
#endif
-static u64 get_idle_time(int cpu)
+static cputime64_t get_idle_time(int cpu)
{
- u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
+ u64 idle_time = get_cpu_idle_time_us(cpu, NULL);
+ cputime64_t idle;
if (idle_time == -1ULL) {
/* !NO_HZ so we can rely on cpustat.idle */
- idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
- idle += arch_idle_time(cpu);
+ idle = kstat_cpu(cpu).cpustat.idle;
+ idle = cputime64_add(idle, arch_idle_time(cpu));
} else
- idle = usecs_to_cputime64(idle_time);
+ idle = nsecs_to_jiffies64(1000 * idle_time);
return idle;
}
-static u64 get_iowait_time(int cpu)
+static cputime64_t get_iowait_time(int cpu)
{
- u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL);
+ u64 iowait_time = get_cpu_iowait_time_us(cpu, NULL);
+ cputime64_t iowait;
if (iowait_time == -1ULL)
/* !NO_HZ so we can rely on cpustat.iowait */
- iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
+ iowait = kstat_cpu(cpu).cpustat.iowait;
else
- iowait = usecs_to_cputime64(iowait_time);
+ iowait = nsecs_to_jiffies64(1000 * iowait_time);
return iowait;
}
@@ -53,30 +55,31 @@ static int show_stat(struct seq_file *p, void *v)
{
int i, j;
unsigned long jif;
- u64 user, nice, system, idle, iowait, irq, softirq, steal;
- u64 guest, guest_nice;
+ cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
+ cputime64_t guest, guest_nice;
u64 sum = 0;
u64 sum_softirq = 0;
unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
struct timespec boottime;
user = nice = system = idle = iowait =
- irq = softirq = steal = 0;
- guest = guest_nice = 0;
+ irq = softirq = steal = cputime64_zero;
+ guest = guest_nice = cputime64_zero;
getboottime(&boottime);
jif = boottime.tv_sec;
for_each_possible_cpu(i) {
- user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
- nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
- system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
- idle += get_idle_time(i);
- iowait += get_iowait_time(i);
- irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
- softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
- steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
- guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
- guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
+ user = cputime64_add(user, kstat_cpu(i).cpustat.user);
+ nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
+ system = cputime64_add(system, kstat_cpu(i).cpustat.system);
+ idle = cputime64_add(idle, get_idle_time(i));
+ iowait = cputime64_add(iowait, get_iowait_time(i));
+ irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
+ softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
+ steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
+ guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
+ guest_nice = cputime64_add(guest_nice,
+ kstat_cpu(i).cpustat.guest_nice);
sum += kstat_cpu_irqs_sum(i);
sum += arch_irq_stat_cpu(i);
@@ -103,16 +106,16 @@ static int show_stat(struct seq_file *p, void *v)
(unsigned long long)cputime64_to_clock_t(guest_nice));
for_each_online_cpu(i) {
/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
- user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
- nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
- system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
+ user = kstat_cpu(i).cpustat.user;
+ nice = kstat_cpu(i).cpustat.nice;
+ system = kstat_cpu(i).cpustat.system;
idle = get_idle_time(i);
iowait = get_iowait_time(i);
- irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
- softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
- steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
- guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
- guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
+ irq = kstat_cpu(i).cpustat.irq;
+ softirq = kstat_cpu(i).cpustat.softirq;
+ steal = kstat_cpu(i).cpustat.steal;
+ guest = kstat_cpu(i).cpustat.guest;
+ guest_nice = kstat_cpu(i).cpustat.guest_nice;
seq_printf(p,
"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
"%llu\n",
diff --git a/trunk/fs/proc/uptime.c b/trunk/fs/proc/uptime.c
index 9610ac772d7e..766b1d456050 100644
--- a/trunk/fs/proc/uptime.c
+++ b/trunk/fs/proc/uptime.c
@@ -11,20 +11,15 @@ static int uptime_proc_show(struct seq_file *m, void *v)
{
struct timespec uptime;
struct timespec idle;
- u64 idletime;
- u64 nsec;
- u32 rem;
int i;
+ cputime_t idletime = cputime_zero;
- idletime = 0;
for_each_possible_cpu(i)
- idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
+ idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle);
do_posix_clock_monotonic_gettime(&uptime);
monotonic_to_bootbased(&uptime);
- nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
- idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
- idle.tv_nsec = rem;
+ cputime_to_timespec(idletime, &idle);
seq_printf(m, "%lu.%02lu %lu.%02lu\n",
(unsigned long) uptime.tv_sec,
(uptime.tv_nsec / (NSEC_PER_SEC / 100)),
diff --git a/trunk/fs/xfs/xfs_super.c b/trunk/fs/xfs/xfs_super.c
index 8a899496fd5f..3eca58f51ae9 100644
--- a/trunk/fs/xfs/xfs_super.c
+++ b/trunk/fs/xfs/xfs_super.c
@@ -868,6 +868,27 @@ xfs_fs_dirty_inode(
XFS_I(inode)->i_update_core = 1;
}
+STATIC int
+xfs_log_inode(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ int error;
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+ error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ return error;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ return xfs_trans_commit(tp, 0);
+}
+
STATIC int
xfs_fs_write_inode(
struct inode *inode,
@@ -881,8 +902,10 @@ xfs_fs_write_inode(
if (XFS_FORCED_SHUTDOWN(mp))
return -XFS_ERROR(EIO);
+ if (!ip->i_update_core)
+ return 0;
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
+ if (wbc->sync_mode == WB_SYNC_ALL) {
/*
* Make sure the inode has made it it into the log. Instead
* of forcing it all the way to stable storage using a
@@ -890,14 +913,11 @@ xfs_fs_write_inode(
* ->sync_fs call do that for thus, which reduces the number
* of synchronous log forces dramatically.
*/
- error = xfs_log_dirty_inode(ip, NULL, 0);
+ error = xfs_log_inode(ip);
if (error)
goto out;
return 0;
} else {
- if (!ip->i_update_core)
- return 0;
-
/*
* We make this non-blocking if the inode is contended, return
* EAGAIN to indicate to the caller that they did not succeed.
diff --git a/trunk/fs/xfs/xfs_sync.c b/trunk/fs/xfs/xfs_sync.c
index f0994aedcd15..be5c51d8f757 100644
--- a/trunk/fs/xfs/xfs_sync.c
+++ b/trunk/fs/xfs/xfs_sync.c
@@ -336,32 +336,6 @@ xfs_sync_fsdata(
return error;
}
-int
-xfs_log_dirty_inode(
- struct xfs_inode *ip,
- struct xfs_perag *pag,
- int flags)
-{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_trans *tp;
- int error;
-
- if (!ip->i_update_core)
- return 0;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
- error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- return error;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- return xfs_trans_commit(tp, 0);
-}
-
/*
* When remounting a filesystem read-only or freezing the filesystem, we have
* two phases to execute. This first phase is syncing the data before we
@@ -385,16 +359,6 @@ xfs_quiesce_data(
{
int error, error2 = 0;
- /*
- * Log all pending size and timestamp updates. The vfs writeback
- * code is supposed to do this, but due to its overagressive
- * livelock detection it will skip inodes where appending writes
- * were written out in the first non-blocking sync phase if their
- * completion took long enough that it happened after taking the
- * timestamp for the cut-off in the blocking phase.
- */
- xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
-
xfs_qm_sync(mp, SYNC_TRYLOCK);
xfs_qm_sync(mp, SYNC_WAIT);
diff --git a/trunk/fs/xfs/xfs_sync.h b/trunk/fs/xfs/xfs_sync.h
index fa965479d788..941202e7ac6e 100644
--- a/trunk/fs/xfs/xfs_sync.h
+++ b/trunk/fs/xfs/xfs_sync.h
@@ -34,8 +34,6 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
void xfs_flush_inodes(struct xfs_inode *ip);
-int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags);
-
int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
int xfs_reclaim_inodes_count(struct xfs_mount *mp);
void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
diff --git a/trunk/include/asm-generic/cputime.h b/trunk/include/asm-generic/cputime.h
index 9a62937c56ca..62ce6823c0f2 100644
--- a/trunk/include/asm-generic/cputime.h
+++ b/trunk/include/asm-generic/cputime.h
@@ -4,66 +4,70 @@
#include
#include
-typedef unsigned long __nocast cputime_t;
+typedef unsigned long cputime_t;
+#define cputime_zero (0UL)
#define cputime_one_jiffy jiffies_to_cputime(1)
-#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct)
+#define cputime_max ((~0UL >> 1) - 1)
+#define cputime_add(__a, __b) ((__a) + (__b))
+#define cputime_sub(__a, __b) ((__a) - (__b))
+#define cputime_div(__a, __n) ((__a) / (__n))
+#define cputime_halve(__a) ((__a) >> 1)
+#define cputime_eq(__a, __b) ((__a) == (__b))
+#define cputime_gt(__a, __b) ((__a) > (__b))
+#define cputime_ge(__a, __b) ((__a) >= (__b))
+#define cputime_lt(__a, __b) ((__a) < (__b))
+#define cputime_le(__a, __b) ((__a) <= (__b))
+#define cputime_to_jiffies(__ct) (__ct)
#define cputime_to_scaled(__ct) (__ct)
-#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz)
+#define jiffies_to_cputime(__hz) (__hz)
-typedef u64 __nocast cputime64_t;
+typedef u64 cputime64_t;
-#define cputime64_to_jiffies64(__ct) (__force u64)(__ct)
-#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif)
+#define cputime64_zero (0ULL)
+#define cputime64_add(__a, __b) ((__a) + (__b))
+#define cputime64_sub(__a, __b) ((__a) - (__b))
+#define cputime64_to_jiffies64(__ct) (__ct)
+#define jiffies64_to_cputime64(__jif) (__jif)
+#define cputime_to_cputime64(__ct) ((u64) __ct)
+#define cputime64_gt(__a, __b) ((__a) > (__b))
-#define nsecs_to_cputime64(__ct) \
- jiffies64_to_cputime64(nsecs_to_jiffies64(__ct))
+#define nsecs_to_cputime64(__ct) nsecs_to_jiffies64(__ct)
/*
* Convert cputime to microseconds and back.
*/
-#define cputime_to_usecs(__ct) \
- jiffies_to_usecs(cputime_to_jiffies(__ct))
-#define usecs_to_cputime(__usec) \
- jiffies_to_cputime(usecs_to_jiffies(__usec))
-#define usecs_to_cputime64(__usec) \
- jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
+#define cputime_to_usecs(__ct) jiffies_to_usecs(__ct)
+#define usecs_to_cputime(__msecs) usecs_to_jiffies(__msecs)
/*
* Convert cputime to seconds and back.
*/
-#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ)
-#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ)
+#define cputime_to_secs(jif) ((jif) / HZ)
+#define secs_to_cputime(sec) ((sec) * HZ)
/*
* Convert cputime to timespec and back.
*/
-#define timespec_to_cputime(__val) \
- jiffies_to_cputime(timespec_to_jiffies(__val))
-#define cputime_to_timespec(__ct,__val) \
- jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
+#define timespec_to_cputime(__val) timespec_to_jiffies(__val)
+#define cputime_to_timespec(__ct,__val) jiffies_to_timespec(__ct,__val)
/*
* Convert cputime to timeval and back.
*/
-#define timeval_to_cputime(__val) \
- jiffies_to_cputime(timeval_to_jiffies(__val))
-#define cputime_to_timeval(__ct,__val) \
- jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
+#define timeval_to_cputime(__val) timeval_to_jiffies(__val)
+#define cputime_to_timeval(__ct,__val) jiffies_to_timeval(__ct,__val)
/*
* Convert cputime to clock and back.
*/
-#define cputime_to_clock_t(__ct) \
- jiffies_to_clock_t(cputime_to_jiffies(__ct))
-#define clock_t_to_cputime(__x) \
- jiffies_to_cputime(clock_t_to_jiffies(__x))
+#define cputime_to_clock_t(__ct) jiffies_to_clock_t(__ct)
+#define clock_t_to_cputime(__x) clock_t_to_jiffies(__x)
/*
* Convert cputime64 to clock.
*/
-#define cputime64_to_clock_t(__ct) \
- jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
+#define cputime64_to_clock_t(__ct) jiffies_64_to_clock_t(__ct)
#endif
diff --git a/trunk/include/linux/bitops.h b/trunk/include/linux/bitops.h
index 3c1063acb2ab..a3ef66a2a083 100644
--- a/trunk/include/linux/bitops.h
+++ b/trunk/include/linux/bitops.h
@@ -22,14 +22,8 @@ extern unsigned long __sw_hweight64(__u64 w);
#include
#define for_each_set_bit(bit, addr, size) \
- for ((bit) = find_first_bit((addr), (size)); \
- (bit) < (size); \
- (bit) = find_next_bit((addr), (size), (bit) + 1))
-
-/* same as for_each_set_bit() but use bit as value to start with */
-#define for_each_set_bit_cont(bit, addr, size) \
- for ((bit) = find_next_bit((addr), (size), (bit)); \
- (bit) < (size); \
+ for ((bit) = find_first_bit((addr), (size)); \
+ (bit) < (size); \
(bit) = find_next_bit((addr), (size), (bit) + 1))
static __inline__ int get_bitmask_order(unsigned int count)
diff --git a/trunk/include/linux/bootmem.h b/trunk/include/linux/bootmem.h
index 66d3e954eb6c..ab344a521105 100644
--- a/trunk/include/linux/bootmem.h
+++ b/trunk/include/linux/bootmem.h
@@ -44,7 +44,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
unsigned long endpfn);
extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
-extern unsigned long free_low_memory_core_early(int nodeid);
+unsigned long free_all_memory_core_early(int nodeid);
extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
extern unsigned long free_all_bootmem(void);
diff --git a/trunk/include/linux/cpu.h b/trunk/include/linux/cpu.h
index 305c263021e7..6cb60fd2ea84 100644
--- a/trunk/include/linux/cpu.h
+++ b/trunk/include/linux/cpu.h
@@ -27,7 +27,6 @@ struct cpu {
extern int register_cpu(struct cpu *cpu, int num);
extern struct sys_device *get_cpu_sysdev(unsigned cpu);
-extern bool cpu_is_hotpluggable(unsigned cpu);
extern int cpu_add_sysdev_attr(struct sysdev_attribute *attr);
extern void cpu_remove_sysdev_attr(struct sysdev_attribute *attr);
diff --git a/trunk/include/linux/debugobjects.h b/trunk/include/linux/debugobjects.h
index 0e5f5785d9f2..65970b811e22 100644
--- a/trunk/include/linux/debugobjects.h
+++ b/trunk/include/linux/debugobjects.h
@@ -46,8 +46,6 @@ struct debug_obj {
* fails
* @fixup_free: fixup function, which is called when the free check
* fails
- * @fixup_assert_init: fixup function, which is called when the assert_init
- * check fails
*/
struct debug_obj_descr {
const char *name;
@@ -56,7 +54,6 @@ struct debug_obj_descr {
int (*fixup_activate) (void *addr, enum debug_obj_state state);
int (*fixup_destroy) (void *addr, enum debug_obj_state state);
int (*fixup_free) (void *addr, enum debug_obj_state state);
- int (*fixup_assert_init)(void *addr, enum debug_obj_state state);
};
#ifdef CONFIG_DEBUG_OBJECTS
@@ -67,7 +64,6 @@ extern void debug_object_activate (void *addr, struct debug_obj_descr *descr);
extern void debug_object_deactivate(void *addr, struct debug_obj_descr *descr);
extern void debug_object_destroy (void *addr, struct debug_obj_descr *descr);
extern void debug_object_free (void *addr, struct debug_obj_descr *descr);
-extern void debug_object_assert_init(void *addr, struct debug_obj_descr *descr);
/*
* Active state:
@@ -93,8 +89,6 @@ static inline void
debug_object_destroy (void *addr, struct debug_obj_descr *descr) { }
static inline void
debug_object_free (void *addr, struct debug_obj_descr *descr) { }
-static inline void
-debug_object_assert_init(void *addr, struct debug_obj_descr *descr) { }
static inline void debug_objects_early_init(void) { }
static inline void debug_objects_mem_init(void) { }
diff --git a/trunk/include/linux/hardirq.h b/trunk/include/linux/hardirq.h
index bb7f30971858..f743883f769e 100644
--- a/trunk/include/linux/hardirq.h
+++ b/trunk/include/linux/hardirq.h
@@ -139,7 +139,20 @@ static inline void account_system_vtime(struct task_struct *tsk)
extern void account_system_vtime(struct task_struct *tsk);
#endif
+#if defined(CONFIG_NO_HZ)
#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
+extern void rcu_enter_nohz(void);
+extern void rcu_exit_nohz(void);
+
+static inline void rcu_irq_enter(void)
+{
+ rcu_exit_nohz();
+}
+
+static inline void rcu_irq_exit(void)
+{
+ rcu_enter_nohz();
+}
static inline void rcu_nmi_enter(void)
{
@@ -150,9 +163,17 @@ static inline void rcu_nmi_exit(void)
}
#else
+extern void rcu_irq_enter(void);
+extern void rcu_irq_exit(void);
extern void rcu_nmi_enter(void);
extern void rcu_nmi_exit(void);
#endif
+#else
+# define rcu_irq_enter() do { } while (0)
+# define rcu_irq_exit() do { } while (0)
+# define rcu_nmi_enter() do { } while (0)
+# define rcu_nmi_exit() do { } while (0)
+#endif /* #if defined(CONFIG_NO_HZ) */
/*
* It is safe to do non-atomic ops on ->hardirq_context,
diff --git a/trunk/include/linux/jump_label.h b/trunk/include/linux/jump_label.h
index 5ce8b140428f..388b0d425b50 100644
--- a/trunk/include/linux/jump_label.h
+++ b/trunk/include/linux/jump_label.h
@@ -3,7 +3,6 @@
#include
#include
-#include
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
@@ -15,12 +14,6 @@ struct jump_label_key {
#endif
};
-struct jump_label_key_deferred {
- struct jump_label_key key;
- unsigned long timeout;
- struct delayed_work work;
-};
-
# include
# define HAVE_JUMP_LABEL
#endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */
@@ -58,11 +51,8 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry,
extern int jump_label_text_reserved(void *start, void *end);
extern void jump_label_inc(struct jump_label_key *key);
extern void jump_label_dec(struct jump_label_key *key);
-extern void jump_label_dec_deferred(struct jump_label_key_deferred *key);
extern bool jump_label_enabled(struct jump_label_key *key);
extern void jump_label_apply_nops(struct module *mod);
-extern void jump_label_rate_limit(struct jump_label_key_deferred *key,
- unsigned long rl);
#else /* !HAVE_JUMP_LABEL */
@@ -78,10 +68,6 @@ static __always_inline void jump_label_init(void)
{
}
-struct jump_label_key_deferred {
- struct jump_label_key key;
-};
-
static __always_inline bool static_branch(struct jump_label_key *key)
{
if (unlikely(atomic_read(&key->enabled)))
@@ -99,11 +85,6 @@ static inline void jump_label_dec(struct jump_label_key *key)
atomic_dec(&key->enabled);
}
-static inline void jump_label_dec_deferred(struct jump_label_key_deferred *key)
-{
- jump_label_dec(&key->key);
-}
-
static inline int jump_label_text_reserved(void *start, void *end)
{
return 0;
@@ -121,14 +102,6 @@ static inline int jump_label_apply_nops(struct module *mod)
{
return 0;
}
-
-static inline void jump_label_rate_limit(struct jump_label_key_deferred *key,
- unsigned long rl)
-{
-}
#endif /* HAVE_JUMP_LABEL */
-#define jump_label_key_enabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(1), })
-#define jump_label_key_disabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(0), })
-
#endif /* _LINUX_JUMP_LABEL_H */
diff --git a/trunk/include/linux/kernel_stat.h b/trunk/include/linux/kernel_stat.h
index 2fbd9053c2df..0cce2db580c3 100644
--- a/trunk/include/linux/kernel_stat.h
+++ b/trunk/include/linux/kernel_stat.h
@@ -6,7 +6,6 @@
#include
#include
#include
-#include
#include
#include
@@ -16,25 +15,21 @@
* used by rstatd/perfmeter
*/
-enum cpu_usage_stat {
- CPUTIME_USER,
- CPUTIME_NICE,
- CPUTIME_SYSTEM,
- CPUTIME_SOFTIRQ,
- CPUTIME_IRQ,
- CPUTIME_IDLE,
- CPUTIME_IOWAIT,
- CPUTIME_STEAL,
- CPUTIME_GUEST,
- CPUTIME_GUEST_NICE,
- NR_STATS,
-};
-
-struct kernel_cpustat {
- u64 cpustat[NR_STATS];
+struct cpu_usage_stat {
+ cputime64_t user;
+ cputime64_t nice;
+ cputime64_t system;
+ cputime64_t softirq;
+ cputime64_t irq;
+ cputime64_t idle;
+ cputime64_t iowait;
+ cputime64_t steal;
+ cputime64_t guest;
+ cputime64_t guest_nice;
};
struct kernel_stat {
+ struct cpu_usage_stat cpustat;
#ifndef CONFIG_GENERIC_HARDIRQS
unsigned int irqs[NR_IRQS];
#endif
@@ -43,13 +38,10 @@ struct kernel_stat {
};
DECLARE_PER_CPU(struct kernel_stat, kstat);
-DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
+#define kstat_cpu(cpu) per_cpu(kstat, cpu)
/* Must have preemption disabled for this to be meaningful. */
-#define kstat_this_cpu (&__get_cpu_var(kstat))
-#define kcpustat_this_cpu (&__get_cpu_var(kernel_cpustat))
-#define kstat_cpu(cpu) per_cpu(kstat, cpu)
-#define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu)
+#define kstat_this_cpu __get_cpu_var(kstat)
extern unsigned long long nr_context_switches(void);
diff --git a/trunk/include/linux/kvm.h b/trunk/include/linux/kvm.h
index 68e67e50d028..c3892fc1d538 100644
--- a/trunk/include/linux/kvm.h
+++ b/trunk/include/linux/kvm.h
@@ -557,7 +557,6 @@ struct kvm_ppc_pvinfo {
#define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */
#define KVM_CAP_PPC_PAPR 68
#define KVM_CAP_S390_GMAP 71
-#define KVM_CAP_TSC_DEADLINE_TIMER 72
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/trunk/include/linux/latencytop.h b/trunk/include/linux/latencytop.h
index e23121f9d82a..b0e99898527c 100644
--- a/trunk/include/linux/latencytop.h
+++ b/trunk/include/linux/latencytop.h
@@ -10,8 +10,6 @@
#define _INCLUDE_GUARD_LATENCYTOP_H_
#include
-struct task_struct;
-
#ifdef CONFIG_LATENCYTOP
#define LT_SAVECOUNT 32
@@ -25,6 +23,7 @@ struct latency_record {
};
+struct task_struct;
extern int latencytop_enabled;
void __account_scheduler_latency(struct task_struct *task, int usecs, int inter);
diff --git a/trunk/include/linux/lglock.h b/trunk/include/linux/lglock.h
index 87f402ccec55..f549056fb20b 100644
--- a/trunk/include/linux/lglock.h
+++ b/trunk/include/linux/lglock.h
@@ -22,7 +22,6 @@
#include
#include
#include
-#include
/* can make br locks by using local lock for read side, global lock for write */
#define br_lock_init(name) name##_lock_init()
@@ -73,31 +72,9 @@
#define DEFINE_LGLOCK(name) \
\
- DEFINE_SPINLOCK(name##_cpu_lock); \
- cpumask_t name##_cpus __read_mostly; \
DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \
DEFINE_LGLOCK_LOCKDEP(name); \
\
- static int \
- name##_lg_cpu_callback(struct notifier_block *nb, \
- unsigned long action, void *hcpu) \
- { \
- switch (action & ~CPU_TASKS_FROZEN) { \
- case CPU_UP_PREPARE: \
- spin_lock(&name##_cpu_lock); \
- cpu_set((unsigned long)hcpu, name##_cpus); \
- spin_unlock(&name##_cpu_lock); \
- break; \
- case CPU_UP_CANCELED: case CPU_DEAD: \
- spin_lock(&name##_cpu_lock); \
- cpu_clear((unsigned long)hcpu, name##_cpus); \
- spin_unlock(&name##_cpu_lock); \
- } \
- return NOTIFY_OK; \
- } \
- static struct notifier_block name##_lg_cpu_notifier = { \
- .notifier_call = name##_lg_cpu_callback, \
- }; \
void name##_lock_init(void) { \
int i; \
LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \
@@ -106,11 +83,6 @@
lock = &per_cpu(name##_lock, i); \
*lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \
} \
- register_hotcpu_notifier(&name##_lg_cpu_notifier); \
- get_online_cpus(); \
- for_each_online_cpu(i) \
- cpu_set(i, name##_cpus); \
- put_online_cpus(); \
} \
EXPORT_SYMBOL(name##_lock_init); \
\
@@ -152,9 +124,9 @@
\
void name##_global_lock_online(void) { \
int i; \
- spin_lock(&name##_cpu_lock); \
+ preempt_disable(); \
rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \
- for_each_cpu(i, &name##_cpus) { \
+ for_each_online_cpu(i) { \
arch_spinlock_t *lock; \
lock = &per_cpu(name##_lock, i); \
arch_spin_lock(lock); \
@@ -165,12 +137,12 @@
void name##_global_unlock_online(void) { \
int i; \
rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \
- for_each_cpu(i, &name##_cpus) { \
+ for_each_online_cpu(i) { \
arch_spinlock_t *lock; \
lock = &per_cpu(name##_lock, i); \
arch_spin_unlock(lock); \
} \
- spin_unlock(&name##_cpu_lock); \
+ preempt_enable(); \
} \
EXPORT_SYMBOL(name##_global_unlock_online); \
\
diff --git a/trunk/include/linux/lockdep.h b/trunk/include/linux/lockdep.h
index d36619ead3ba..b6a56e37284c 100644
--- a/trunk/include/linux/lockdep.h
+++ b/trunk/include/linux/lockdep.h
@@ -343,8 +343,6 @@ extern void lockdep_trace_alloc(gfp_t mask);
#define lockdep_assert_held(l) WARN_ON(debug_locks && !lockdep_is_held(l))
-#define lockdep_recursing(tsk) ((tsk)->lockdep_recursion)
-
#else /* !LOCKDEP */
static inline void lockdep_off(void)
@@ -394,8 +392,6 @@ struct lock_class_key { };
#define lockdep_assert_held(l) do { } while (0)
-#define lockdep_recursing(tsk) (0)
-
#endif /* !LOCKDEP */
#ifdef CONFIG_LOCK_STAT
diff --git a/trunk/include/linux/memblock.h b/trunk/include/linux/memblock.h
index a6bb10235148..e6b843e16e81 100644
--- a/trunk/include/linux/memblock.h
+++ b/trunk/include/linux/memblock.h
@@ -2,6 +2,8 @@
#define _LINUX_MEMBLOCK_H
#ifdef __KERNEL__
+#define MEMBLOCK_ERROR 0
+
#ifdef CONFIG_HAVE_MEMBLOCK
/*
* Logical memory blocks.
@@ -17,161 +19,81 @@
#include
#include
+#include
+
#define INIT_MEMBLOCK_REGIONS 128
struct memblock_region {
phys_addr_t base;
phys_addr_t size;
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
- int nid;
-#endif
};
struct memblock_type {
unsigned long cnt; /* number of regions */
unsigned long max; /* size of the allocated array */
- phys_addr_t total_size; /* size of all regions */
struct memblock_region *regions;
};
struct memblock {
phys_addr_t current_limit;
+ phys_addr_t memory_size; /* Updated by memblock_analyze() */
struct memblock_type memory;
struct memblock_type reserved;
};
extern struct memblock memblock;
extern int memblock_debug;
+extern int memblock_can_resize;
#define memblock_dbg(fmt, ...) \
if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
-phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end,
- phys_addr_t size, phys_addr_t align, int nid);
-phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
- phys_addr_t size, phys_addr_t align);
+u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align);
int memblock_free_reserved_regions(void);
int memblock_reserve_reserved_regions(void);
-void memblock_allow_resize(void);
-int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
-int memblock_add(phys_addr_t base, phys_addr_t size);
-int memblock_remove(phys_addr_t base, phys_addr_t size);
-int memblock_free(phys_addr_t base, phys_addr_t size);
-int memblock_reserve(phys_addr_t base, phys_addr_t size);
-
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
- unsigned long *out_end_pfn, int *out_nid);
-
-/**
- * for_each_mem_pfn_range - early memory pfn range iterator
- * @i: an integer used as loop variable
- * @nid: node selector, %MAX_NUMNODES for all nodes
- * @p_start: ptr to ulong for start pfn of the range, can be %NULL
- * @p_end: ptr to ulong for end pfn of the range, can be %NULL
- * @p_nid: ptr to int for nid of the range, can be %NULL
- *
- * Walks over configured memory ranges. Available after early_node_map is
- * populated.
- */
-#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \
- for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
- i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
-#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-
-void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
- phys_addr_t *out_end, int *out_nid);
-
-/**
- * for_each_free_mem_range - iterate through free memblock areas
- * @i: u64 used as loop variable
- * @nid: node selector, %MAX_NUMNODES for all nodes
- * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
- * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
- * @p_nid: ptr to int for nid of the range, can be %NULL
- *
- * Walks over free (memory && !reserved) areas of memblock. Available as
- * soon as memblock is initialized.
- */
-#define for_each_free_mem_range(i, nid, p_start, p_end, p_nid) \
- for (i = 0, \
- __next_free_mem_range(&i, nid, p_start, p_end, p_nid); \
- i != (u64)ULLONG_MAX; \
- __next_free_mem_range(&i, nid, p_start, p_end, p_nid))
-
-void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
- phys_addr_t *out_end, int *out_nid);
+extern void memblock_init(void);
+extern void memblock_analyze(void);
+extern long memblock_add(phys_addr_t base, phys_addr_t size);
+extern long memblock_remove(phys_addr_t base, phys_addr_t size);
+extern long memblock_free(phys_addr_t base, phys_addr_t size);
+extern long memblock_reserve(phys_addr_t base, phys_addr_t size);
-/**
- * for_each_free_mem_range_reverse - rev-iterate through free memblock areas
- * @i: u64 used as loop variable
- * @nid: node selector, %MAX_NUMNODES for all nodes
- * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
- * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
- * @p_nid: ptr to int for nid of the range, can be %NULL
- *
- * Walks over free (memory && !reserved) areas of memblock in reverse
- * order. Available as soon as memblock is initialized.
+/* The numa aware allocator is only available if
+ * CONFIG_ARCH_POPULATES_NODE_MAP is set
*/
-#define for_each_free_mem_range_reverse(i, nid, p_start, p_end, p_nid) \
- for (i = (u64)ULLONG_MAX, \
- __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid); \
- i != (u64)ULLONG_MAX; \
- __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid))
+extern phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align,
+ int nid);
+extern phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
+ int nid);
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid);
-
-static inline void memblock_set_region_node(struct memblock_region *r, int nid)
-{
- r->nid = nid;
-}
-
-static inline int memblock_get_region_node(const struct memblock_region *r)
-{
- return r->nid;
-}
-#else
-static inline void memblock_set_region_node(struct memblock_region *r, int nid)
-{
-}
-
-static inline int memblock_get_region_node(const struct memblock_region *r)
-{
- return 0;
-}
-#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-
-phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
-phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
-
-phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
+extern phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */
#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0)
#define MEMBLOCK_ALLOC_ACCESSIBLE 0
-phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
- phys_addr_t max_addr);
-phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
- phys_addr_t max_addr);
-phys_addr_t memblock_phys_mem_size(void);
-phys_addr_t memblock_start_of_DRAM(void);
-phys_addr_t memblock_end_of_DRAM(void);
-void memblock_enforce_memory_limit(phys_addr_t memory_limit);
-int memblock_is_memory(phys_addr_t addr);
-int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
-int memblock_is_reserved(phys_addr_t addr);
-int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
-
-extern void __memblock_dump_all(void);
-
-static inline void memblock_dump_all(void)
-{
- if (memblock_debug)
- __memblock_dump_all();
-}
+extern phys_addr_t memblock_alloc_base(phys_addr_t size,
+ phys_addr_t align,
+ phys_addr_t max_addr);
+extern phys_addr_t __memblock_alloc_base(phys_addr_t size,
+ phys_addr_t align,
+ phys_addr_t max_addr);
+extern phys_addr_t memblock_phys_mem_size(void);
+extern phys_addr_t memblock_start_of_DRAM(void);
+extern phys_addr_t memblock_end_of_DRAM(void);
+extern void memblock_enforce_memory_limit(phys_addr_t memory_limit);
+extern int memblock_is_memory(phys_addr_t addr);
+extern int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
+extern int memblock_is_reserved(phys_addr_t addr);
+extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
+
+extern void memblock_dump_all(void);
+
+/* Provided by the architecture */
+extern phys_addr_t memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid);
+extern int memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1,
+ phys_addr_t addr2, phys_addr_t size2);
/**
* memblock_set_current_limit - Set the current allocation limit to allow
@@ -179,7 +101,7 @@ static inline void memblock_dump_all(void)
* accessible during boot
* @limit: New limit value (physical address)
*/
-void memblock_set_current_limit(phys_addr_t limit);
+extern void memblock_set_current_limit(phys_addr_t limit);
/*
@@ -232,9 +154,9 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
region++)
-#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
-#define __init_memblock __meminit
-#define __initdata_memblock __meminitdata
+#ifdef ARCH_DISCARD_MEMBLOCK
+#define __init_memblock __init
+#define __initdata_memblock __initdata
#else
#define __init_memblock
#define __initdata_memblock
@@ -243,7 +165,7 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
#else
static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
{
- return 0;
+ return MEMBLOCK_ERROR;
}
#endif /* CONFIG_HAVE_MEMBLOCK */
diff --git a/trunk/include/linux/mm.h b/trunk/include/linux/mm.h
index 5d9b4c9813bd..4baadd18f4ad 100644
--- a/trunk/include/linux/mm.h
+++ b/trunk/include/linux/mm.h
@@ -1253,34 +1253,41 @@ static inline void pgtable_page_dtor(struct page *page)
extern void free_area_init(unsigned long * zones_size);
extern void free_area_init_node(int nid, unsigned long * zones_size,
unsigned long zone_start_pfn, unsigned long *zholes_size);
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
/*
- * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its
+ * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its
* zones, allocate the backing mem_map and account for memory holes in a more
* architecture independent manner. This is a substitute for creating the
* zone_sizes[] and zholes_size[] arrays and passing them to
* free_area_init_node()
*
* An architecture is expected to register range of page frames backed by
- * physical memory with memblock_add[_node]() before calling
+ * physical memory with add_active_range() before calling
* free_area_init_nodes() passing in the PFN each zone ends at. At a basic
* usage, an architecture is expected to do something like
*
* unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,
* max_highmem_pfn};
* for_each_valid_physical_page_range()
- * memblock_add_node(base, size, nid)
+ * add_active_range(node_id, start_pfn, end_pfn)
* free_area_init_nodes(max_zone_pfns);
*
- * free_bootmem_with_active_regions() calls free_bootmem_node() for each
- * registered physical page range. Similarly
- * sparse_memory_present_with_active_regions() calls memory_present() for
- * each range when SPARSEMEM is enabled.
+ * If the architecture guarantees that there are no holes in the ranges
+ * registered with add_active_range(), free_bootmem_active_regions()
+ * will call free_bootmem_node() for each registered physical page range.
+ * Similarly sparse_memory_present_with_active_regions() calls
+ * memory_present() for each range when SPARSEMEM is enabled.
*
* See mm/page_alloc.c for more information on each function exposed by
- * CONFIG_HAVE_MEMBLOCK_NODE_MAP.
+ * CONFIG_ARCH_POPULATES_NODE_MAP
*/
extern void free_area_init_nodes(unsigned long *max_zone_pfn);
+extern void add_active_range(unsigned int nid, unsigned long start_pfn,
+ unsigned long end_pfn);
+extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
+ unsigned long end_pfn);
+extern void remove_all_active_ranges(void);
+void sort_node_map(void);
unsigned long node_map_pfn_alignment(void);
unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
unsigned long end_pfn);
@@ -1293,11 +1300,14 @@ extern void free_bootmem_with_active_regions(int nid,
unsigned long max_low_pfn);
int add_from_early_node_map(struct range *range, int az,
int nr_range, int nid);
+u64 __init find_memory_core_early(int nid, u64 size, u64 align,
+ u64 goal, u64 limit);
+typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
+extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
extern void sparse_memory_present_with_active_regions(int nid);
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
-#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-
-#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
+#if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \
!defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
static inline int __early_pfn_to_nid(unsigned long pfn)
{
diff --git a/trunk/include/linux/mmzone.h b/trunk/include/linux/mmzone.h
index 3ac040f19369..188cb2ffe8db 100644
--- a/trunk/include/linux/mmzone.h
+++ b/trunk/include/linux/mmzone.h
@@ -598,13 +598,13 @@ struct zonelist {
#endif
};
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
struct node_active_region {
unsigned long start_pfn;
unsigned long end_pfn;
int nid;
};
-#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
#ifndef CONFIG_DISCONTIGMEM
/* The array of struct pages - for discontigmem use pgdat->lmem_map */
@@ -720,7 +720,7 @@ extern int movable_zone;
static inline int zone_movable_is_highmem(void)
{
-#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE)
+#if defined(CONFIG_HIGHMEM) && defined(CONFIG_ARCH_POPULATES_NODE_MAP)
return movable_zone == ZONE_HIGHMEM;
#else
return 0;
@@ -938,7 +938,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
#endif
#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \
- !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
+ !defined(CONFIG_ARCH_POPULATES_NODE_MAP)
static inline unsigned long early_pfn_to_nid(unsigned long pfn)
{
return 0;
diff --git a/trunk/include/linux/perf_event.h b/trunk/include/linux/perf_event.h
index 08855613ceb3..b1f89122bf6a 100644
--- a/trunk/include/linux/perf_event.h
+++ b/trunk/include/linux/perf_event.h
@@ -54,7 +54,6 @@ enum perf_hw_id {
PERF_COUNT_HW_BUS_CYCLES = 6,
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7,
PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8,
- PERF_COUNT_HW_REF_CPU_CYCLES = 9,
PERF_COUNT_HW_MAX, /* non-ABI */
};
@@ -891,7 +890,6 @@ struct perf_event_context {
int nr_active;
int is_active;
int nr_stat;
- int nr_freq;
int rotate_disable;
atomic_t refcount;
struct task_struct *task;
@@ -1065,12 +1063,12 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
}
}
-extern struct jump_label_key_deferred perf_sched_events;
+extern struct jump_label_key perf_sched_events;
static inline void perf_event_task_sched_in(struct task_struct *prev,
struct task_struct *task)
{
- if (static_branch(&perf_sched_events.key))
+ if (static_branch(&perf_sched_events))
__perf_event_task_sched_in(prev, task);
}
@@ -1079,7 +1077,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
{
perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
- if (static_branch(&perf_sched_events.key))
+ if (static_branch(&perf_sched_events))
__perf_event_task_sched_out(prev, next);
}
diff --git a/trunk/include/linux/poison.h b/trunk/include/linux/poison.h
index 2110a81c5e2a..79159de0e341 100644
--- a/trunk/include/linux/poison.h
+++ b/trunk/include/linux/poison.h
@@ -40,6 +40,12 @@
#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */
#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+#define MEMBLOCK_INACTIVE 0x3a84fb0144c9e71bULL
+#else
+#define MEMBLOCK_INACTIVE 0x44c9e71bUL
+#endif
+
#define SLUB_RED_INACTIVE 0xbb
#define SLUB_RED_ACTIVE 0xcc
diff --git a/trunk/include/linux/rcupdate.h b/trunk/include/linux/rcupdate.h
index 81c04f4348ec..2cf4226ade7e 100644
--- a/trunk/include/linux/rcupdate.h
+++ b/trunk/include/linux/rcupdate.h
@@ -51,8 +51,6 @@ extern int rcutorture_runnable; /* for sysctl */
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
extern void rcutorture_record_test_transition(void);
extern void rcutorture_record_progress(unsigned long vernum);
-extern void do_trace_rcu_torture_read(char *rcutorturename,
- struct rcu_head *rhp);
#else
static inline void rcutorture_record_test_transition(void)
{
@@ -60,12 +58,6 @@ static inline void rcutorture_record_test_transition(void)
static inline void rcutorture_record_progress(unsigned long vernum)
{
}
-#ifdef CONFIG_RCU_TRACE
-extern void do_trace_rcu_torture_read(char *rcutorturename,
- struct rcu_head *rhp);
-#else
-#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
-#endif
#endif
#define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b))
@@ -185,10 +177,23 @@ extern void rcu_sched_qs(int cpu);
extern void rcu_bh_qs(int cpu);
extern void rcu_check_callbacks(int cpu, int user);
struct notifier_block;
-extern void rcu_idle_enter(void);
-extern void rcu_idle_exit(void);
-extern void rcu_irq_enter(void);
-extern void rcu_irq_exit(void);
+
+#ifdef CONFIG_NO_HZ
+
+extern void rcu_enter_nohz(void);
+extern void rcu_exit_nohz(void);
+
+#else /* #ifdef CONFIG_NO_HZ */
+
+static inline void rcu_enter_nohz(void)
+{
+}
+
+static inline void rcu_exit_nohz(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_NO_HZ */
/*
* Infrastructure to implement the synchronize_() primitives in
@@ -228,30 +233,22 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-#ifdef CONFIG_PROVE_RCU
-extern int rcu_is_cpu_idle(void);
-#else /* !CONFIG_PROVE_RCU */
-static inline int rcu_is_cpu_idle(void)
-{
- return 0;
-}
-#endif /* else !CONFIG_PROVE_RCU */
-
-static inline void rcu_lock_acquire(struct lockdep_map *map)
-{
- WARN_ON_ONCE(rcu_is_cpu_idle());
- lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_);
-}
-
-static inline void rcu_lock_release(struct lockdep_map *map)
-{
- WARN_ON_ONCE(rcu_is_cpu_idle());
- lock_release(map, 1, _THIS_IP_);
-}
-
extern struct lockdep_map rcu_lock_map;
+# define rcu_read_acquire() \
+ lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_)
+
extern struct lockdep_map rcu_bh_lock_map;
+# define rcu_read_acquire_bh() \
+ lock_acquire(&rcu_bh_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define rcu_read_release_bh() lock_release(&rcu_bh_lock_map, 1, _THIS_IP_)
+
extern struct lockdep_map rcu_sched_lock_map;
+# define rcu_read_acquire_sched() \
+ lock_acquire(&rcu_sched_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define rcu_read_release_sched() \
+ lock_release(&rcu_sched_lock_map, 1, _THIS_IP_)
+
extern int debug_lockdep_rcu_enabled(void);
/**
@@ -265,18 +262,11 @@ extern int debug_lockdep_rcu_enabled(void);
*
* Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
* and while lockdep is disabled.
- *
- * Note that rcu_read_lock() and the matching rcu_read_unlock() must
- * occur in the same context, for example, it is illegal to invoke
- * rcu_read_unlock() in process context if the matching rcu_read_lock()
- * was invoked from within an irq handler.
*/
static inline int rcu_read_lock_held(void)
{
if (!debug_lockdep_rcu_enabled())
return 1;
- if (rcu_is_cpu_idle())
- return 0;
return lock_is_held(&rcu_lock_map);
}
@@ -300,19 +290,6 @@ extern int rcu_read_lock_bh_held(void);
*
* Check debug_lockdep_rcu_enabled() to prevent false positives during boot
* and while lockdep is disabled.
- *
- * Note that if the CPU is in the idle loop from an RCU point of
- * view (ie: that we are in the section between rcu_idle_enter() and
- * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU
- * did an rcu_read_lock(). The reason for this is that RCU ignores CPUs
- * that are in such a section, considering these as in extended quiescent
- * state, so such a CPU is effectively never in an RCU read-side critical
- * section regardless of what RCU primitives it invokes. This state of
- * affairs is required --- we need to keep an RCU-free window in idle
- * where the CPU may possibly enter into low power mode. This way we can
- * notice an extended quiescent state to other CPUs that started a grace
- * period. Otherwise we would delay any grace period as long as we run in
- * the idle task.
*/
#ifdef CONFIG_PREEMPT_COUNT
static inline int rcu_read_lock_sched_held(void)
@@ -321,8 +298,6 @@ static inline int rcu_read_lock_sched_held(void)
if (!debug_lockdep_rcu_enabled())
return 1;
- if (rcu_is_cpu_idle())
- return 0;
if (debug_locks)
lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
@@ -336,8 +311,12 @@ static inline int rcu_read_lock_sched_held(void)
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-# define rcu_lock_acquire(a) do { } while (0)
-# define rcu_lock_release(a) do { } while (0)
+# define rcu_read_acquire() do { } while (0)
+# define rcu_read_release() do { } while (0)
+# define rcu_read_acquire_bh() do { } while (0)
+# define rcu_read_release_bh() do { } while (0)
+# define rcu_read_acquire_sched() do { } while (0)
+# define rcu_read_release_sched() do { } while (0)
static inline int rcu_read_lock_held(void)
{
@@ -658,7 +637,7 @@ static inline void rcu_read_lock(void)
{
__rcu_read_lock();
__acquire(RCU);
- rcu_lock_acquire(&rcu_lock_map);
+ rcu_read_acquire();
}
/*
@@ -678,7 +657,7 @@ static inline void rcu_read_lock(void)
*/
static inline void rcu_read_unlock(void)
{
- rcu_lock_release(&rcu_lock_map);
+ rcu_read_release();
__release(RCU);
__rcu_read_unlock();
}
@@ -694,17 +673,12 @@ static inline void rcu_read_unlock(void)
* critical sections in interrupt context can use just rcu_read_lock(),
* though this should at least be commented to avoid confusing people
* reading the code.
- *
- * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh()
- * must occur in the same context, for example, it is illegal to invoke
- * rcu_read_unlock_bh() from one task if the matching rcu_read_lock_bh()
- * was invoked from some other task.
*/
static inline void rcu_read_lock_bh(void)
{
local_bh_disable();
__acquire(RCU_BH);
- rcu_lock_acquire(&rcu_bh_lock_map);
+ rcu_read_acquire_bh();
}
/*
@@ -714,7 +688,7 @@ static inline void rcu_read_lock_bh(void)
*/
static inline void rcu_read_unlock_bh(void)
{
- rcu_lock_release(&rcu_bh_lock_map);
+ rcu_read_release_bh();
__release(RCU_BH);
local_bh_enable();
}
@@ -726,17 +700,12 @@ static inline void rcu_read_unlock_bh(void)
* are being done using call_rcu_sched() or synchronize_rcu_sched().
* Read-side critical sections can also be introduced by anything that
* disables preemption, including local_irq_disable() and friends.
- *
- * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched()
- * must occur in the same context, for example, it is illegal to invoke
- * rcu_read_unlock_sched() from process context if the matching
- * rcu_read_lock_sched() was invoked from an NMI handler.
*/
static inline void rcu_read_lock_sched(void)
{
preempt_disable();
__acquire(RCU_SCHED);
- rcu_lock_acquire(&rcu_sched_lock_map);
+ rcu_read_acquire_sched();
}
/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
@@ -753,7 +722,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void)
*/
static inline void rcu_read_unlock_sched(void)
{
- rcu_lock_release(&rcu_sched_lock_map);
+ rcu_read_release_sched();
__release(RCU_SCHED);
preempt_enable();
}
diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h
index cf0eb342bcba..1c4f3e9b9bc5 100644
--- a/trunk/include/linux/sched.h
+++ b/trunk/include/linux/sched.h
@@ -273,11 +273,9 @@ extern int runqueue_is_locked(int cpu);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
extern void select_nohz_load_balancer(int stop_tick);
-extern void set_cpu_sd_state_idle(void);
extern int get_nohz_timer_target(void);
#else
static inline void select_nohz_load_balancer(int stop_tick) { }
-static inline void set_cpu_sd_state_idle(void) { }
#endif
/*
@@ -485,8 +483,8 @@ struct task_cputime {
#define INIT_CPUTIME \
(struct task_cputime) { \
- .utime = 0, \
- .stime = 0, \
+ .utime = cputime_zero, \
+ .stime = cputime_zero, \
.sum_exec_runtime = 0, \
}
@@ -903,10 +901,6 @@ struct sched_group_power {
* single CPU.
*/
unsigned int power, power_orig;
- /*
- * Number of busy cpus in this group.
- */
- atomic_t nr_busy_cpus;
};
struct sched_group {
@@ -931,15 +925,6 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
return to_cpumask(sg->cpumask);
}
-/**
- * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
- * @group: The group whose first cpu is to be returned.
- */
-static inline unsigned int group_first_cpu(struct sched_group *group)
-{
- return cpumask_first(sched_group_cpus(group));
-}
-
struct sched_domain_attr {
int relax_domain_level;
};
@@ -1330,8 +1315,8 @@ struct task_struct {
* older sibling, respectively. (p->father can be replaced with
* p->real_parent->pid)
*/
- struct task_struct __rcu *real_parent; /* real parent process */
- struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
+ struct task_struct *real_parent; /* real parent process */
+ struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */
/*
* children/sibling forms the list of my natural children
*/
@@ -2085,14 +2070,6 @@ extern int sched_setscheduler(struct task_struct *, int,
extern int sched_setscheduler_nocheck(struct task_struct *, int,
const struct sched_param *);
extern struct task_struct *idle_task(int cpu);
-/**
- * is_idle_task - is the specified task an idle task?
- * @tsk: the task in question.
- */
-static inline bool is_idle_task(struct task_struct *p)
-{
- return p->pid == 0;
-}
extern struct task_struct *curr_task(int cpu);
extern void set_curr_task(int cpu, struct task_struct *p);
diff --git a/trunk/include/linux/security.h b/trunk/include/linux/security.h
index e8c619d39291..19d8e04e1688 100644
--- a/trunk/include/linux/security.h
+++ b/trunk/include/linux/security.h
@@ -2056,7 +2056,7 @@ static inline int security_old_inode_init_security(struct inode *inode,
char **name, void **value,
size_t *len)
{
- return -EOPNOTSUPP;
+ return 0;
}
static inline int security_inode_create(struct inode *dir,
diff --git a/trunk/include/linux/srcu.h b/trunk/include/linux/srcu.h
index e1b005918bbb..58971e891f48 100644
--- a/trunk/include/linux/srcu.h
+++ b/trunk/include/linux/srcu.h
@@ -28,7 +28,6 @@
#define _LINUX_SRCU_H
#include
-#include
struct srcu_struct_array {
int c[2];
@@ -61,10 +60,18 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name,
__init_srcu_struct((sp), #sp, &__srcu_key); \
})
+# define srcu_read_acquire(sp) \
+ lock_acquire(&(sp)->dep_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define srcu_read_release(sp) \
+ lock_release(&(sp)->dep_map, 1, _THIS_IP_)
+
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
int init_srcu_struct(struct srcu_struct *sp);
+# define srcu_read_acquire(sp) do { } while (0)
+# define srcu_read_release(sp) do { } while (0)
+
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
void cleanup_srcu_struct(struct srcu_struct *sp);
@@ -83,32 +90,12 @@ long srcu_batches_completed(struct srcu_struct *sp);
* read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
* this assumes we are in an SRCU read-side critical section unless it can
* prove otherwise.
- *
- * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
- * and while lockdep is disabled.
- *
- * Note that if the CPU is in the idle loop from an RCU point of view
- * (ie: that we are in the section between rcu_idle_enter() and
- * rcu_idle_exit()) then srcu_read_lock_held() returns false even if
- * the CPU did an srcu_read_lock(). The reason for this is that RCU
- * ignores CPUs that are in such a section, considering these as in
- * extended quiescent state, so such a CPU is effectively never in an
- * RCU read-side critical section regardless of what RCU primitives it
- * invokes. This state of affairs is required --- we need to keep an
- * RCU-free window in idle where the CPU may possibly enter into low
- * power mode. This way we can notice an extended quiescent state to
- * other CPUs that started a grace period. Otherwise we would delay any
- * grace period as long as we run in the idle task.
*/
static inline int srcu_read_lock_held(struct srcu_struct *sp)
{
- if (rcu_is_cpu_idle())
- return 0;
-
- if (!debug_lockdep_rcu_enabled())
- return 1;
-
- return lock_is_held(&sp->dep_map);
+ if (debug_locks)
+ return lock_is_held(&sp->dep_map);
+ return 1;
}
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -158,17 +145,12 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
* one way to indirectly wait on an SRCU grace period is to acquire
* a mutex that is held elsewhere while calling synchronize_srcu() or
* synchronize_srcu_expedited().
- *
- * Note that srcu_read_lock() and the matching srcu_read_unlock() must
- * occur in the same context, for example, it is illegal to invoke
- * srcu_read_unlock() in an irq handler if the matching srcu_read_lock()
- * was invoked in process context.
*/
static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
{
int retval = __srcu_read_lock(sp);
- rcu_lock_acquire(&(sp)->dep_map);
+ srcu_read_acquire(sp);
return retval;
}
@@ -182,51 +164,8 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
__releases(sp)
{
- rcu_lock_release(&(sp)->dep_map);
- __srcu_read_unlock(sp, idx);
-}
-
-/**
- * srcu_read_lock_raw - register a new reader for an SRCU-protected structure.
- * @sp: srcu_struct in which to register the new reader.
- *
- * Enter an SRCU read-side critical section. Similar to srcu_read_lock(),
- * but avoids the RCU-lockdep checking. This means that it is legal to
- * use srcu_read_lock_raw() in one context, for example, in an exception
- * handler, and then have the matching srcu_read_unlock_raw() in another
- * context, for example in the task that took the exception.
- *
- * However, the entire SRCU read-side critical section must reside within a
- * single task. For example, beware of using srcu_read_lock_raw() in
- * a device interrupt handler and srcu_read_unlock() in the interrupted
- * task: This will not work if interrupts are threaded.
- */
-static inline int srcu_read_lock_raw(struct srcu_struct *sp)
-{
- unsigned long flags;
- int ret;
-
- local_irq_save(flags);
- ret = __srcu_read_lock(sp);
- local_irq_restore(flags);
- return ret;
-}
-
-/**
- * srcu_read_unlock_raw - unregister reader from an SRCU-protected structure.
- * @sp: srcu_struct in which to unregister the old reader.
- * @idx: return value from corresponding srcu_read_lock_raw().
- *
- * Exit an SRCU read-side critical section without lockdep-RCU checking.
- * See srcu_read_lock_raw() for more details.
- */
-static inline void srcu_read_unlock_raw(struct srcu_struct *sp, int idx)
-{
- unsigned long flags;
-
- local_irq_save(flags);
+ srcu_read_release(sp);
__srcu_read_unlock(sp, idx);
- local_irq_restore(flags);
}
#endif
diff --git a/trunk/include/linux/tick.h b/trunk/include/linux/tick.h
index ab8be90b5cc9..b232ccc0ee29 100644
--- a/trunk/include/linux/tick.h
+++ b/trunk/include/linux/tick.h
@@ -7,7 +7,6 @@
#define _LINUX_TICK_H
#include
-#include
#ifdef CONFIG_GENERIC_CLOCKEVENTS
@@ -122,16 +121,14 @@ static inline int tick_oneshot_mode_active(void) { return 0; }
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
# ifdef CONFIG_NO_HZ
-extern void tick_nohz_idle_enter(void);
-extern void tick_nohz_idle_exit(void);
-extern void tick_nohz_irq_exit(void);
+extern void tick_nohz_stop_sched_tick(int inidle);
+extern void tick_nohz_restart_sched_tick(void);
extern ktime_t tick_nohz_get_sleep_length(void);
extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
# else
-static inline void tick_nohz_idle_enter(void) { }
-static inline void tick_nohz_idle_exit(void) { }
-
+static inline void tick_nohz_stop_sched_tick(int inidle) { }
+static inline void tick_nohz_restart_sched_tick(void) { }
static inline ktime_t tick_nohz_get_sleep_length(void)
{
ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
diff --git a/trunk/include/linux/wait.h b/trunk/include/linux/wait.h
index a9ce45e8501c..3efc9f3f43a0 100644
--- a/trunk/include/linux/wait.h
+++ b/trunk/include/linux/wait.h
@@ -77,13 +77,13 @@ struct task_struct;
#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
{ .flags = word, .bit_nr = bit, }
-extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *);
+extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *);
#define init_waitqueue_head(q) \
do { \
static struct lock_class_key __key; \
\
- __init_waitqueue_head((q), #q, &__key); \
+ __init_waitqueue_head((q), &__key); \
} while (0)
#ifdef CONFIG_LOCKDEP
diff --git a/trunk/include/net/dst.h b/trunk/include/net/dst.h
index 75766b42660e..6faec1a60216 100644
--- a/trunk/include/net/dst.h
+++ b/trunk/include/net/dst.h
@@ -53,7 +53,6 @@ struct dst_entry {
#define DST_NOHASH 0x0008
#define DST_NOCACHE 0x0010
#define DST_NOCOUNT 0x0020
-#define DST_NOPEER 0x0040
short error;
short obsolete;
diff --git a/trunk/include/net/flow.h b/trunk/include/net/flow.h
index 57f15a7f1cdd..a09447749e2d 100644
--- a/trunk/include/net/flow.h
+++ b/trunk/include/net/flow.h
@@ -207,7 +207,6 @@ extern struct flow_cache_object *flow_cache_lookup(
u8 dir, flow_resolve_t resolver, void *ctx);
extern void flow_cache_flush(void);
-extern void flow_cache_flush_deferred(void);
extern atomic_t flow_cache_genid;
#endif
diff --git a/trunk/include/net/ip_vs.h b/trunk/include/net/ip_vs.h
index e5a7b9aaf552..873d5be7926c 100644
--- a/trunk/include/net/ip_vs.h
+++ b/trunk/include/net/ip_vs.h
@@ -1207,7 +1207,7 @@ extern void ip_vs_control_cleanup(void);
extern struct ip_vs_dest *
ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
__be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
- __u16 protocol, __u32 fwmark, __u32 flags);
+ __u16 protocol, __u32 fwmark);
extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
diff --git a/trunk/include/net/sctp/structs.h b/trunk/include/net/sctp/structs.h
index a15432da27c3..e90e7a9935dd 100644
--- a/trunk/include/net/sctp/structs.h
+++ b/trunk/include/net/sctp/structs.h
@@ -241,9 +241,6 @@ extern struct sctp_globals {
* bits is an indicator of when to send and window update SACK.
*/
int rwnd_update_shift;
-
- /* Threshold for autoclose timeout, in seconds. */
- unsigned long max_autoclose;
} sctp_globals;
#define sctp_rto_initial (sctp_globals.rto_initial)
@@ -284,7 +281,6 @@ extern struct sctp_globals {
#define sctp_auth_enable (sctp_globals.auth_enable)
#define sctp_checksum_disable (sctp_globals.checksum_disable)
#define sctp_rwnd_upd_shift (sctp_globals.rwnd_update_shift)
-#define sctp_max_autoclose (sctp_globals.max_autoclose)
/* SCTP Socket type: UDP or TCP style. */
typedef enum {
diff --git a/trunk/include/net/sock.h b/trunk/include/net/sock.h
index 32e39371fba6..abb6e0f0c3c3 100644
--- a/trunk/include/net/sock.h
+++ b/trunk/include/net/sock.h
@@ -637,14 +637,12 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
/*
* Take into account size of receive queue and backlog queue
- * Do not take into account this skb truesize,
- * to allow even a single big packet to come.
*/
static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb)
{
unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);
- return qsize > sk->sk_rcvbuf;
+ return qsize + skb->truesize > sk->sk_rcvbuf;
}
/* The per-socket spinlock must be held here. */
diff --git a/trunk/include/trace/events/rcu.h b/trunk/include/trace/events/rcu.h
index d2d88bed891b..669fbd62ec25 100644
--- a/trunk/include/trace/events/rcu.h
+++ b/trunk/include/trace/events/rcu.h
@@ -241,73 +241,24 @@ TRACE_EVENT(rcu_fqs,
/*
* Tracepoint for dyntick-idle entry/exit events. These take a string
- * as argument: "Start" for entering dyntick-idle mode, "End" for
- * leaving it, "--=" for events moving towards idle, and "++=" for events
- * moving away from idle. "Error on entry: not idle task" and "Error on
- * exit: not idle task" indicate that a non-idle task is erroneously
- * toying with the idle loop.
- *
- * These events also take a pair of numbers, which indicate the nesting
- * depth before and after the event of interest. Note that task-related
- * events use the upper bits of each number, while interrupt-related
- * events use the lower bits.
+ * as argument: "Start" for entering dyntick-idle mode and "End" for
+ * leaving it.
*/
TRACE_EVENT(rcu_dyntick,
- TP_PROTO(char *polarity, long long oldnesting, long long newnesting),
+ TP_PROTO(char *polarity),
- TP_ARGS(polarity, oldnesting, newnesting),
+ TP_ARGS(polarity),
TP_STRUCT__entry(
__field(char *, polarity)
- __field(long long, oldnesting)
- __field(long long, newnesting)
),
TP_fast_assign(
__entry->polarity = polarity;
- __entry->oldnesting = oldnesting;
- __entry->newnesting = newnesting;
- ),
-
- TP_printk("%s %llx %llx", __entry->polarity,
- __entry->oldnesting, __entry->newnesting)
-);
-
-/*
- * Tracepoint for RCU preparation for idle, the goal being to get RCU
- * processing done so that the current CPU can shut off its scheduling
- * clock and enter dyntick-idle mode. One way to accomplish this is
- * to drain all RCU callbacks from this CPU, and the other is to have
- * done everything RCU requires for the current grace period. In this
- * latter case, the CPU will be awakened at the end of the current grace
- * period in order to process the remainder of its callbacks.
- *
- * These tracepoints take a string as argument:
- *
- * "No callbacks": Nothing to do, no callbacks on this CPU.
- * "In holdoff": Nothing to do, holding off after unsuccessful attempt.
- * "Begin holdoff": Attempt failed, don't retry until next jiffy.
- * "Dyntick with callbacks": Entering dyntick-idle despite callbacks.
- * "More callbacks": Still more callbacks, try again to clear them out.
- * "Callbacks drained": All callbacks processed, off to dyntick idle!
- * "Timer": Timer fired to cause CPU to continue processing callbacks.
- */
-TRACE_EVENT(rcu_prep_idle,
-
- TP_PROTO(char *reason),
-
- TP_ARGS(reason),
-
- TP_STRUCT__entry(
- __field(char *, reason)
- ),
-
- TP_fast_assign(
- __entry->reason = reason;
),
- TP_printk("%s", __entry->reason)
+ TP_printk("%s", __entry->polarity)
);
/*
@@ -461,71 +412,27 @@ TRACE_EVENT(rcu_invoke_kfree_callback,
/*
* Tracepoint for exiting rcu_do_batch after RCU callbacks have been
- * invoked. The first argument is the name of the RCU flavor,
- * the second argument is number of callbacks actually invoked,
- * the third argument (cb) is whether or not any of the callbacks that
- * were ready to invoke at the beginning of this batch are still
- * queued, the fourth argument (nr) is the return value of need_resched(),
- * the fifth argument (iit) is 1 if the current task is the idle task,
- * and the sixth argument (risk) is the return value from
- * rcu_is_callbacks_kthread().
+ * invoked. The first argument is the name of the RCU flavor and
+ * the second argument is number of callbacks actually invoked.
*/
TRACE_EVENT(rcu_batch_end,
- TP_PROTO(char *rcuname, int callbacks_invoked,
- bool cb, bool nr, bool iit, bool risk),
+ TP_PROTO(char *rcuname, int callbacks_invoked),
- TP_ARGS(rcuname, callbacks_invoked, cb, nr, iit, risk),
+ TP_ARGS(rcuname, callbacks_invoked),
TP_STRUCT__entry(
__field(char *, rcuname)
__field(int, callbacks_invoked)
- __field(bool, cb)
- __field(bool, nr)
- __field(bool, iit)
- __field(bool, risk)
),
TP_fast_assign(
__entry->rcuname = rcuname;
__entry->callbacks_invoked = callbacks_invoked;
- __entry->cb = cb;
- __entry->nr = nr;
- __entry->iit = iit;
- __entry->risk = risk;
- ),
-
- TP_printk("%s CBs-invoked=%d idle=%c%c%c%c",
- __entry->rcuname, __entry->callbacks_invoked,
- __entry->cb ? 'C' : '.',
- __entry->nr ? 'S' : '.',
- __entry->iit ? 'I' : '.',
- __entry->risk ? 'R' : '.')
-);
-
-/*
- * Tracepoint for rcutorture readers. The first argument is the name
- * of the RCU flavor from rcutorture's viewpoint and the second argument
- * is the callback address.
- */
-TRACE_EVENT(rcu_torture_read,
-
- TP_PROTO(char *rcutorturename, struct rcu_head *rhp),
-
- TP_ARGS(rcutorturename, rhp),
-
- TP_STRUCT__entry(
- __field(char *, rcutorturename)
- __field(struct rcu_head *, rhp)
- ),
-
- TP_fast_assign(
- __entry->rcutorturename = rcutorturename;
- __entry->rhp = rhp;
),
- TP_printk("%s torture read %p",
- __entry->rcutorturename, __entry->rhp)
+ TP_printk("%s CBs-invoked=%d",
+ __entry->rcuname, __entry->callbacks_invoked)
);
#else /* #ifdef CONFIG_RCU_TRACE */
@@ -536,16 +443,13 @@ TRACE_EVENT(rcu_torture_read,
#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0)
#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0)
-#define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0)
-#define trace_rcu_prep_idle(reason) do { } while (0)
+#define trace_rcu_dyntick(polarity) do { } while (0)
#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0)
#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0)
#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0)
#define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0)
#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
-#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
- do { } while (0)
-#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0)
#endif /* #else #ifdef CONFIG_RCU_TRACE */
diff --git a/trunk/include/trace/events/sched.h b/trunk/include/trace/events/sched.h
index 6ba596b07a72..959ff18b63b6 100644
--- a/trunk/include/trace/events/sched.h
+++ b/trunk/include/trace/events/sched.h
@@ -330,13 +330,6 @@ DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
TP_PROTO(struct task_struct *tsk, u64 delay),
TP_ARGS(tsk, delay));
-/*
- * Tracepoint for accounting blocked time (time the task is in uninterruptible).
- */
-DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
- TP_PROTO(struct task_struct *tsk, u64 delay),
- TP_ARGS(tsk, delay));
-
/*
* Tracepoint for accounting runtime (time the task is executing
* on a CPU).
@@ -370,56 +363,6 @@ TRACE_EVENT(sched_stat_runtime,
(unsigned long long)__entry->vruntime)
);
-#ifdef CREATE_TRACE_POINTS
-static inline u64 trace_get_sleeptime(struct task_struct *tsk)
-{
-#ifdef CONFIG_SCHEDSTATS
- u64 block, sleep;
-
- block = tsk->se.statistics.block_start;
- sleep = tsk->se.statistics.sleep_start;
- tsk->se.statistics.block_start = 0;
- tsk->se.statistics.sleep_start = 0;
-
- return block ? block : sleep ? sleep : 0;
-#else
- return 0;
-#endif
-}
-#endif
-
-/*
- * Tracepoint for accounting sleeptime (time the task is sleeping
- * or waiting for I/O).
- */
-TRACE_EVENT(sched_stat_sleeptime,
-
- TP_PROTO(struct task_struct *tsk, u64 now),
-
- TP_ARGS(tsk, now),
-
- TP_STRUCT__entry(
- __array( char, comm, TASK_COMM_LEN )
- __field( pid_t, pid )
- __field( u64, sleeptime )
- ),
-
- TP_fast_assign(
- memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
- __entry->pid = tsk->pid;
- __entry->sleeptime = trace_get_sleeptime(tsk);
- __entry->sleeptime = __entry->sleeptime ?
- now - __entry->sleeptime : 0;
- )
- TP_perf_assign(
- __perf_count(__entry->sleeptime);
- ),
-
- TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]",
- __entry->comm, __entry->pid,
- (unsigned long long)__entry->sleeptime)
-);
-
/*
* Tracepoint for showing priority inheritance modifying a tasks
* priority.
diff --git a/trunk/include/trace/events/writeback.h b/trunk/include/trace/events/writeback.h
index 99d1d0decf88..b99caa8b780c 100644
--- a/trunk/include/trace/events/writeback.h
+++ b/trunk/include/trace/events/writeback.h
@@ -21,16 +21,6 @@
{I_REFERENCED, "I_REFERENCED"} \
)
-#define WB_WORK_REASON \
- {WB_REASON_BACKGROUND, "background"}, \
- {WB_REASON_TRY_TO_FREE_PAGES, "try_to_free_pages"}, \
- {WB_REASON_SYNC, "sync"}, \
- {WB_REASON_PERIODIC, "periodic"}, \
- {WB_REASON_LAPTOP_TIMER, "laptop_timer"}, \
- {WB_REASON_FREE_MORE_MEM, "free_more_memory"}, \
- {WB_REASON_FS_FREE_SPACE, "fs_free_space"}, \
- {WB_REASON_FORKER_THREAD, "forker_thread"}
-
struct wb_writeback_work;
DECLARE_EVENT_CLASS(writeback_work_class,
@@ -65,7 +55,7 @@ DECLARE_EVENT_CLASS(writeback_work_class,
__entry->for_kupdate,
__entry->range_cyclic,
__entry->for_background,
- __print_symbolic(__entry->reason, WB_WORK_REASON)
+ wb_reason_name[__entry->reason]
)
);
#define DEFINE_WRITEBACK_WORK_EVENT(name) \
@@ -194,8 +184,7 @@ TRACE_EVENT(writeback_queue_io,
__entry->older, /* older_than_this in jiffies */
__entry->age, /* older_than_this in relative milliseconds */
__entry->moved,
- __print_symbolic(__entry->reason, WB_WORK_REASON)
- )
+ wb_reason_name[__entry->reason])
);
TRACE_EVENT(global_dirty_state,
diff --git a/trunk/init/Kconfig b/trunk/init/Kconfig
index a34cd174b768..43298f9810fb 100644
--- a/trunk/init/Kconfig
+++ b/trunk/init/Kconfig
@@ -469,14 +469,14 @@ config RCU_FANOUT_EXACT
config RCU_FAST_NO_HZ
bool "Accelerate last non-dyntick-idle CPU's grace periods"
- depends on NO_HZ && SMP
+ depends on TREE_RCU && NO_HZ && SMP
default n
help
This option causes RCU to attempt to accelerate grace periods
- in order to allow CPUs to enter dynticks-idle state more
- quickly. On the other hand, this option increases the overhead
- of the dynticks-idle checking, particularly on systems with
- large numbers of CPUs.
+ in order to allow the final CPU to enter dynticks-idle state
+ more quickly. On the other hand, this option increases the
+ overhead of the dynticks-idle checking, particularly on systems
+ with large numbers of CPUs.
Say Y if energy efficiency is critically important, particularly
if you have relatively few CPUs.
@@ -702,6 +702,7 @@ config CGROUP_PERF
menuconfig CGROUP_SCHED
bool "Group CPU scheduler"
+ depends on EXPERIMENTAL
default n
help
This feature lets CPU scheduler recognize task groups and control CPU
diff --git a/trunk/init/main.c b/trunk/init/main.c
index 2c76efb513c2..217ed23e9487 100644
--- a/trunk/init/main.c
+++ b/trunk/init/main.c
@@ -469,12 +469,13 @@ asmlinkage void __init start_kernel(void)
char * command_line;
extern const struct kernel_param __start___param[], __stop___param[];
+ smp_setup_processor_id();
+
/*
* Need to run as early as possible, to initialize the
* lockdep hash:
*/
lockdep_init();
- smp_setup_processor_id();
debug_objects_early_init();
/*
diff --git a/trunk/kernel/Makefile b/trunk/kernel/Makefile
index f70396e5a24b..e898c5b9d02c 100644
--- a/trunk/kernel/Makefile
+++ b/trunk/kernel/Makefile
@@ -2,15 +2,16 @@
# Makefile for the linux kernel.
#
-obj-y = fork.o exec_domain.o panic.o printk.o \
+obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
cpu.o exit.o itimer.o time.o softirq.o resource.o \
sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
- notifier.o ksysfs.o cred.o \
- async.o range.o groups.o
+ notifier.o ksysfs.o sched_clock.o cred.o \
+ async.o range.o
+obj-y += groups.o
ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files
@@ -19,11 +20,10 @@ CFLAGS_REMOVE_lockdep_proc.o = -pg
CFLAGS_REMOVE_mutex-debug.o = -pg
CFLAGS_REMOVE_rtmutex-debug.o = -pg
CFLAGS_REMOVE_cgroup-debug.o = -pg
+CFLAGS_REMOVE_sched_clock.o = -pg
CFLAGS_REMOVE_irq_work.o = -pg
endif
-obj-y += sched/
-
obj-$(CONFIG_FREEZER) += freezer.o
obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
@@ -99,6 +99,7 @@ obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_X86_DS) += trace/
obj-$(CONFIG_RING_BUFFER) += trace/
obj-$(CONFIG_TRACEPOINTS) += trace/
+obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
@@ -109,6 +110,15 @@ obj-$(CONFIG_PADATA) += padata.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
+# According to Alan Modra , the -fno-omit-frame-pointer is
+# needed for x86 only. Why this used to be enabled for all architectures is beyond
+# me. I suspect most platforms don't need this, but until we know that for sure
+# I turn this off for IA-64 only. Andreas Schwab says it's also needed on m68k
+# to get a correct value for the wait-channel (WCHAN in ps). --davidm
+CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer
+endif
+
$(obj)/configs.o: $(obj)/config_data.h
# config_data.h contains the same information as ikconfig.h but gzipped.
diff --git a/trunk/kernel/acct.c b/trunk/kernel/acct.c
index 203dfead2e06..fa7eb3de2ddc 100644
--- a/trunk/kernel/acct.c
+++ b/trunk/kernel/acct.c
@@ -613,8 +613,8 @@ void acct_collect(long exitcode, int group_dead)
pacct->ac_flag |= ACORE;
if (current->flags & PF_SIGNALED)
pacct->ac_flag |= AXSIG;
- pacct->ac_utime += current->utime;
- pacct->ac_stime += current->stime;
+ pacct->ac_utime = cputime_add(pacct->ac_utime, current->utime);
+ pacct->ac_stime = cputime_add(pacct->ac_stime, current->stime);
pacct->ac_minflt += current->min_flt;
pacct->ac_majflt += current->maj_flt;
spin_unlock_irq(¤t->sighand->siglock);
diff --git a/trunk/kernel/cpu.c b/trunk/kernel/cpu.c
index 5ca38d5d238a..563f13609470 100644
--- a/trunk/kernel/cpu.c
+++ b/trunk/kernel/cpu.c
@@ -178,7 +178,8 @@ static inline void check_for_tasks(int cpu)
write_lock_irq(&tasklist_lock);
for_each_process(p) {
if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
- (p->utime || p->stime))
+ (!cputime_eq(p->utime, cputime_zero) ||
+ !cputime_eq(p->stime, cputime_zero)))
printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
"(state = %ld, flags = %x)\n",
p->comm, task_pid_nr(p), cpu,
@@ -379,7 +380,6 @@ int __cpuinit cpu_up(unsigned int cpu)
cpu_maps_update_done();
return err;
}
-EXPORT_SYMBOL_GPL(cpu_up);
#ifdef CONFIG_PM_SLEEP_SMP
static cpumask_var_t frozen_cpus;
diff --git a/trunk/kernel/debug/kdb/kdb_support.c b/trunk/kernel/debug/kdb/kdb_support.c
index 7d6fb40d2188..5532dd37aa86 100644
--- a/trunk/kernel/debug/kdb/kdb_support.c
+++ b/trunk/kernel/debug/kdb/kdb_support.c
@@ -636,7 +636,7 @@ char kdb_task_state_char (const struct task_struct *p)
(p->exit_state & EXIT_ZOMBIE) ? 'Z' :
(p->exit_state & EXIT_DEAD) ? 'E' :
(p->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
- if (is_idle_task(p)) {
+ if (p->pid == 0) {
/* Idle task. Is it really idle, apart from the kdb
* interrupt? */
if (!kdb_task_has_cpu(p) || kgdb_info[cpu].irq_depth == 1) {
diff --git a/trunk/kernel/events/Makefile b/trunk/kernel/events/Makefile
index 22d901f9caf4..89e5e8aa4c36 100644
--- a/trunk/kernel/events/Makefile
+++ b/trunk/kernel/events/Makefile
@@ -2,5 +2,5 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_core.o = -pg
endif
-obj-y := core.o ring_buffer.o callchain.o
+obj-y := core.o ring_buffer.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
diff --git a/trunk/kernel/events/callchain.c b/trunk/kernel/events/callchain.c
deleted file mode 100644
index 057e24b665cf..000000000000
--- a/trunk/kernel/events/callchain.c
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Performance events callchain code, extracted from core.c:
- *
- * Copyright (C) 2008 Thomas Gleixner
- * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
- * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
- * Copyright © 2009 Paul Mackerras, IBM Corp.
- *
- * For licensing details see kernel-base/COPYING
- */
-
-#include
-#include
-#include "internal.h"
-
-struct callchain_cpus_entries {
- struct rcu_head rcu_head;
- struct perf_callchain_entry *cpu_entries[0];
-};
-
-static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
-static atomic_t nr_callchain_events;
-static DEFINE_MUTEX(callchain_mutex);
-static struct callchain_cpus_entries *callchain_cpus_entries;
-
-
-__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
- struct pt_regs *regs)
-{
-}
-
-__weak void perf_callchain_user(struct perf_callchain_entry *entry,
- struct pt_regs *regs)
-{
-}
-
-static void release_callchain_buffers_rcu(struct rcu_head *head)
-{
- struct callchain_cpus_entries *entries;
- int cpu;
-
- entries = container_of(head, struct callchain_cpus_entries, rcu_head);
-
- for_each_possible_cpu(cpu)
- kfree(entries->cpu_entries[cpu]);
-
- kfree(entries);
-}
-
-static void release_callchain_buffers(void)
-{
- struct callchain_cpus_entries *entries;
-
- entries = callchain_cpus_entries;
- rcu_assign_pointer(callchain_cpus_entries, NULL);
- call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
-}
-
-static int alloc_callchain_buffers(void)
-{
- int cpu;
- int size;
- struct callchain_cpus_entries *entries;
-
- /*
- * We can't use the percpu allocation API for data that can be
- * accessed from NMI. Use a temporary manual per cpu allocation
- * until that gets sorted out.
- */
- size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]);
-
- entries = kzalloc(size, GFP_KERNEL);
- if (!entries)
- return -ENOMEM;
-
- size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
-
- for_each_possible_cpu(cpu) {
- entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
- cpu_to_node(cpu));
- if (!entries->cpu_entries[cpu])
- goto fail;
- }
-
- rcu_assign_pointer(callchain_cpus_entries, entries);
-
- return 0;
-
-fail:
- for_each_possible_cpu(cpu)
- kfree(entries->cpu_entries[cpu]);
- kfree(entries);
-
- return -ENOMEM;
-}
-
-int get_callchain_buffers(void)
-{
- int err = 0;
- int count;
-
- mutex_lock(&callchain_mutex);
-
- count = atomic_inc_return(&nr_callchain_events);
- if (WARN_ON_ONCE(count < 1)) {
- err = -EINVAL;
- goto exit;
- }
-
- if (count > 1) {
- /* If the allocation failed, give up */
- if (!callchain_cpus_entries)
- err = -ENOMEM;
- goto exit;
- }
-
- err = alloc_callchain_buffers();
- if (err)
- release_callchain_buffers();
-exit:
- mutex_unlock(&callchain_mutex);
-
- return err;
-}
-
-void put_callchain_buffers(void)
-{
- if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
- release_callchain_buffers();
- mutex_unlock(&callchain_mutex);
- }
-}
-
-static struct perf_callchain_entry *get_callchain_entry(int *rctx)
-{
- int cpu;
- struct callchain_cpus_entries *entries;
-
- *rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
- if (*rctx == -1)
- return NULL;
-
- entries = rcu_dereference(callchain_cpus_entries);
- if (!entries)
- return NULL;
-
- cpu = smp_processor_id();
-
- return &entries->cpu_entries[cpu][*rctx];
-}
-
-static void
-put_callchain_entry(int rctx)
-{
- put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
-}
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
- int rctx;
- struct perf_callchain_entry *entry;
-
-
- entry = get_callchain_entry(&rctx);
- if (rctx == -1)
- return NULL;
-
- if (!entry)
- goto exit_put;
-
- entry->nr = 0;
-
- if (!user_mode(regs)) {
- perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
- perf_callchain_kernel(entry, regs);
- if (current->mm)
- regs = task_pt_regs(current);
- else
- regs = NULL;
- }
-
- if (regs) {
- perf_callchain_store(entry, PERF_CONTEXT_USER);
- perf_callchain_user(entry, regs);
- }
-
-exit_put:
- put_callchain_entry(rctx);
-
- return entry;
-}
diff --git a/trunk/kernel/events/core.c b/trunk/kernel/events/core.c
index 890eb02c2f21..58690af323e4 100644
--- a/trunk/kernel/events/core.c
+++ b/trunk/kernel/events/core.c
@@ -128,7 +128,7 @@ enum event_type_t {
* perf_sched_events : >0 events exist
* perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
*/
-struct jump_label_key_deferred perf_sched_events __read_mostly;
+struct jump_label_key perf_sched_events __read_mostly;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static atomic_t nr_mmap_events __read_mostly;
@@ -1130,8 +1130,6 @@ event_sched_out(struct perf_event *event,
if (!is_software_event(event))
cpuctx->active_oncpu--;
ctx->nr_active--;
- if (event->attr.freq && event->attr.sample_freq)
- ctx->nr_freq--;
if (event->attr.exclusive || !cpuctx->active_oncpu)
cpuctx->exclusive = 0;
}
@@ -1327,7 +1325,6 @@ void perf_event_disable(struct perf_event *event)
}
raw_spin_unlock_irq(&ctx->lock);
}
-EXPORT_SYMBOL_GPL(perf_event_disable);
static void perf_set_shadow_time(struct perf_event *event,
struct perf_event_context *ctx,
@@ -1409,8 +1406,6 @@ event_sched_in(struct perf_event *event,
if (!is_software_event(event))
cpuctx->active_oncpu++;
ctx->nr_active++;
- if (event->attr.freq && event->attr.sample_freq)
- ctx->nr_freq++;
if (event->attr.exclusive)
cpuctx->exclusive = 1;
@@ -1667,7 +1662,8 @@ perf_install_in_context(struct perf_event_context *ctx,
* Note: this works for group members as well as group leaders
* since the non-leader members' sibling_lists will be empty.
*/
-static void __perf_event_mark_enabled(struct perf_event *event)
+static void __perf_event_mark_enabled(struct perf_event *event,
+ struct perf_event_context *ctx)
{
struct perf_event *sub;
u64 tstamp = perf_event_time(event);
@@ -1705,7 +1701,7 @@ static int __perf_event_enable(void *info)
*/
perf_cgroup_set_timestamp(current, ctx);
- __perf_event_mark_enabled(event);
+ __perf_event_mark_enabled(event, ctx);
if (!event_filter_match(event)) {
if (is_cgroup_event(event))
@@ -1786,7 +1782,7 @@ void perf_event_enable(struct perf_event *event)
retry:
if (!ctx->is_active) {
- __perf_event_mark_enabled(event);
+ __perf_event_mark_enabled(event, ctx);
goto out;
}
@@ -1813,7 +1809,6 @@ void perf_event_enable(struct perf_event *event)
out:
raw_spin_unlock_irq(&ctx->lock);
}
-EXPORT_SYMBOL_GPL(perf_event_enable);
int perf_event_refresh(struct perf_event *event, int refresh)
{
@@ -2332,9 +2327,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
u64 interrupts, now;
s64 delta;
- if (!ctx->nr_freq)
- return;
-
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
if (event->state != PERF_EVENT_STATE_ACTIVE)
continue;
@@ -2390,14 +2382,12 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
{
u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
struct perf_event_context *ctx = NULL;
- int rotate = 0, remove = 1, freq = 0;
+ int rotate = 0, remove = 1;
if (cpuctx->ctx.nr_events) {
remove = 0;
if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
rotate = 1;
- if (cpuctx->ctx.nr_freq)
- freq = 1;
}
ctx = cpuctx->task_ctx;
@@ -2405,40 +2395,33 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
remove = 0;
if (ctx->nr_events != ctx->nr_active)
rotate = 1;
- if (ctx->nr_freq)
- freq = 1;
}
- if (!rotate && !freq)
- goto done;
-
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(cpuctx->ctx.pmu);
+ perf_ctx_adjust_freq(&cpuctx->ctx, interval);
+ if (ctx)
+ perf_ctx_adjust_freq(ctx, interval);
- if (freq) {
- perf_ctx_adjust_freq(&cpuctx->ctx, interval);
- if (ctx)
- perf_ctx_adjust_freq(ctx, interval);
- }
-
- if (rotate) {
- cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
- if (ctx)
- ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
+ if (!rotate)
+ goto done;
- rotate_ctx(&cpuctx->ctx);
- if (ctx)
- rotate_ctx(ctx);
+ cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
+ if (ctx)
+ ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
- perf_event_sched_in(cpuctx, ctx, current);
- }
+ rotate_ctx(&cpuctx->ctx);
+ if (ctx)
+ rotate_ctx(ctx);
- perf_pmu_enable(cpuctx->ctx.pmu);
- perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
+ perf_event_sched_in(cpuctx, ctx, current);
done:
if (remove)
list_del_init(&cpuctx->rotation_list);
+
+ perf_pmu_enable(cpuctx->ctx.pmu);
+ perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
void perf_event_task_tick(void)
@@ -2465,7 +2448,7 @@ static int event_enable_on_exec(struct perf_event *event,
if (event->state >= PERF_EVENT_STATE_INACTIVE)
return 0;
- __perf_event_mark_enabled(event);
+ __perf_event_mark_enabled(event, ctx);
return 1;
}
@@ -2497,7 +2480,13 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
raw_spin_lock(&ctx->lock);
task_ctx_sched_out(ctx);
- list_for_each_entry(event, &ctx->event_list, event_entry) {
+ list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
+ ret = event_enable_on_exec(event, ctx);
+ if (ret)
+ enabled = 1;
+ }
+
+ list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
ret = event_enable_on_exec(event, ctx);
if (ret)
enabled = 1;
@@ -2584,6 +2573,215 @@ static u64 perf_event_read(struct perf_event *event)
return perf_event_count(event);
}
+/*
+ * Callchain support
+ */
+
+struct callchain_cpus_entries {
+ struct rcu_head rcu_head;
+ struct perf_callchain_entry *cpu_entries[0];
+};
+
+static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
+static atomic_t nr_callchain_events;
+static DEFINE_MUTEX(callchain_mutex);
+struct callchain_cpus_entries *callchain_cpus_entries;
+
+
+__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+}
+
+__weak void perf_callchain_user(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+}
+
+static void release_callchain_buffers_rcu(struct rcu_head *head)
+{
+ struct callchain_cpus_entries *entries;
+ int cpu;
+
+ entries = container_of(head, struct callchain_cpus_entries, rcu_head);
+
+ for_each_possible_cpu(cpu)
+ kfree(entries->cpu_entries[cpu]);
+
+ kfree(entries);
+}
+
+static void release_callchain_buffers(void)
+{
+ struct callchain_cpus_entries *entries;
+
+ entries = callchain_cpus_entries;
+ rcu_assign_pointer(callchain_cpus_entries, NULL);
+ call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
+}
+
+static int alloc_callchain_buffers(void)
+{
+ int cpu;
+ int size;
+ struct callchain_cpus_entries *entries;
+
+ /*
+ * We can't use the percpu allocation API for data that can be
+ * accessed from NMI. Use a temporary manual per cpu allocation
+ * until that gets sorted out.
+ */
+ size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]);
+
+ entries = kzalloc(size, GFP_KERNEL);
+ if (!entries)
+ return -ENOMEM;
+
+ size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
+
+ for_each_possible_cpu(cpu) {
+ entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
+ cpu_to_node(cpu));
+ if (!entries->cpu_entries[cpu])
+ goto fail;
+ }
+
+ rcu_assign_pointer(callchain_cpus_entries, entries);
+
+ return 0;
+
+fail:
+ for_each_possible_cpu(cpu)
+ kfree(entries->cpu_entries[cpu]);
+ kfree(entries);
+
+ return -ENOMEM;
+}
+
+static int get_callchain_buffers(void)
+{
+ int err = 0;
+ int count;
+
+ mutex_lock(&callchain_mutex);
+
+ count = atomic_inc_return(&nr_callchain_events);
+ if (WARN_ON_ONCE(count < 1)) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ if (count > 1) {
+ /* If the allocation failed, give up */
+ if (!callchain_cpus_entries)
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ err = alloc_callchain_buffers();
+ if (err)
+ release_callchain_buffers();
+exit:
+ mutex_unlock(&callchain_mutex);
+
+ return err;
+}
+
+static void put_callchain_buffers(void)
+{
+ if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
+ release_callchain_buffers();
+ mutex_unlock(&callchain_mutex);
+ }
+}
+
+static int get_recursion_context(int *recursion)
+{
+ int rctx;
+
+ if (in_nmi())
+ rctx = 3;
+ else if (in_irq())
+ rctx = 2;
+ else if (in_softirq())
+ rctx = 1;
+ else
+ rctx = 0;
+
+ if (recursion[rctx])
+ return -1;
+
+ recursion[rctx]++;
+ barrier();
+
+ return rctx;
+}
+
+static inline void put_recursion_context(int *recursion, int rctx)
+{
+ barrier();
+ recursion[rctx]--;
+}
+
+static struct perf_callchain_entry *get_callchain_entry(int *rctx)
+{
+ int cpu;
+ struct callchain_cpus_entries *entries;
+
+ *rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
+ if (*rctx == -1)
+ return NULL;
+
+ entries = rcu_dereference(callchain_cpus_entries);
+ if (!entries)
+ return NULL;
+
+ cpu = smp_processor_id();
+
+ return &entries->cpu_entries[cpu][*rctx];
+}
+
+static void
+put_callchain_entry(int rctx)
+{
+ put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
+}
+
+static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+ int rctx;
+ struct perf_callchain_entry *entry;
+
+
+ entry = get_callchain_entry(&rctx);
+ if (rctx == -1)
+ return NULL;
+
+ if (!entry)
+ goto exit_put;
+
+ entry->nr = 0;
+
+ if (!user_mode(regs)) {
+ perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
+ perf_callchain_kernel(entry, regs);
+ if (current->mm)
+ regs = task_pt_regs(current);
+ else
+ regs = NULL;
+ }
+
+ if (regs) {
+ perf_callchain_store(entry, PERF_CONTEXT_USER);
+ perf_callchain_user(entry, regs);
+ }
+
+exit_put:
+ put_callchain_entry(rctx);
+
+ return entry;
+}
+
/*
* Initialize the perf_event context in a task_struct:
*/
@@ -2748,7 +2946,7 @@ static void free_event(struct perf_event *event)
if (!event->parent) {
if (event->attach_state & PERF_ATTACH_TASK)
- jump_label_dec_deferred(&perf_sched_events);
+ jump_label_dec(&perf_sched_events);
if (event->attr.mmap || event->attr.mmap_data)
atomic_dec(&nr_mmap_events);
if (event->attr.comm)
@@ -2759,7 +2957,7 @@ static void free_event(struct perf_event *event)
put_callchain_buffers();
if (is_cgroup_event(event)) {
atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
- jump_label_dec_deferred(&perf_sched_events);
+ jump_label_dec(&perf_sched_events);
}
}
@@ -4622,6 +4820,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
struct hw_perf_event *hwc = &event->hw;
int throttle = 0;
+ data->period = event->hw.last_period;
if (!overflow)
overflow = perf_swevent_set_period(event);
@@ -4655,12 +4854,6 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
if (!is_sampling_event(event))
return;
- if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
- data->period = nr;
- return perf_swevent_overflow(event, 1, data, regs);
- } else
- data->period = event->hw.last_period;
-
if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
return perf_swevent_overflow(event, 1, data, regs);
@@ -5173,7 +5366,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
regs = get_irq_regs();
if (regs && !perf_exclude_event(event, regs)) {
- if (!(event->attr.exclude_idle && is_idle_task(current)))
+ if (!(event->attr.exclude_idle && current->pid == 0))
if (perf_event_overflow(event, &data, regs))
ret = HRTIMER_NORESTART;
}
@@ -5788,7 +5981,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
if (!event->parent) {
if (event->attach_state & PERF_ATTACH_TASK)
- jump_label_inc(&perf_sched_events.key);
+ jump_label_inc(&perf_sched_events);
if (event->attr.mmap || event->attr.mmap_data)
atomic_inc(&nr_mmap_events);
if (event->attr.comm)
@@ -6026,7 +6219,7 @@ SYSCALL_DEFINE5(perf_event_open,
* - that may need work on context switch
*/
atomic_inc(&per_cpu(perf_cgroup_events, event->cpu));
- jump_label_inc(&perf_sched_events.key);
+ jump_label_inc(&perf_sched_events);
}
/*
@@ -6872,9 +7065,6 @@ void __init perf_event_init(void)
ret = init_hw_breakpoint();
WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
-
- /* do not patch jump label more than once per second */
- jump_label_rate_limit(&perf_sched_events, HZ);
}
static int __init perf_event_sysfs_init(void)
diff --git a/trunk/kernel/events/internal.h b/trunk/kernel/events/internal.h
index b0b107f90afc..64568a699375 100644
--- a/trunk/kernel/events/internal.h
+++ b/trunk/kernel/events/internal.h
@@ -1,10 +1,6 @@
#ifndef _KERNEL_EVENTS_INTERNAL_H
#define _KERNEL_EVENTS_INTERNAL_H
-#include
-
-/* Buffer handling */
-
#define RING_BUFFER_WRITABLE 0x01
struct ring_buffer {
@@ -71,7 +67,7 @@ static inline int page_order(struct ring_buffer *rb)
}
#endif
-static inline unsigned long perf_data_size(struct ring_buffer *rb)
+static unsigned long perf_data_size(struct ring_buffer *rb)
{
return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
}
@@ -100,37 +96,4 @@ __output_copy(struct perf_output_handle *handle,
} while (len);
}
-/* Callchain handling */
-extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
-extern int get_callchain_buffers(void);
-extern void put_callchain_buffers(void);
-
-static inline int get_recursion_context(int *recursion)
-{
- int rctx;
-
- if (in_nmi())
- rctx = 3;
- else if (in_irq())
- rctx = 2;
- else if (in_softirq())
- rctx = 1;
- else
- rctx = 0;
-
- if (recursion[rctx])
- return -1;
-
- recursion[rctx]++;
- barrier();
-
- return rctx;
-}
-
-static inline void put_recursion_context(int *recursion, int rctx)
-{
- barrier();
- recursion[rctx]--;
-}
-
#endif /* _KERNEL_EVENTS_INTERNAL_H */
diff --git a/trunk/kernel/exit.c b/trunk/kernel/exit.c
index d579a459309d..d0b7d988f873 100644
--- a/trunk/kernel/exit.c
+++ b/trunk/kernel/exit.c
@@ -121,9 +121,9 @@ static void __exit_signal(struct task_struct *tsk)
* We won't ever get here for the group leader, since it
* will have been the last reference on the signal_struct.
*/
- sig->utime += tsk->utime;
- sig->stime += tsk->stime;
- sig->gtime += tsk->gtime;
+ sig->utime = cputime_add(sig->utime, tsk->utime);
+ sig->stime = cputime_add(sig->stime, tsk->stime);
+ sig->gtime = cputime_add(sig->gtime, tsk->gtime);
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
sig->nvcsw += tsk->nvcsw;
@@ -1255,9 +1255,19 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
spin_lock_irq(&p->real_parent->sighand->siglock);
psig = p->real_parent->signal;
sig = p->signal;
- psig->cutime += tgutime + sig->cutime;
- psig->cstime += tgstime + sig->cstime;
- psig->cgtime += p->gtime + sig->gtime + sig->cgtime;
+ psig->cutime =
+ cputime_add(psig->cutime,
+ cputime_add(tgutime,
+ sig->cutime));
+ psig->cstime =
+ cputime_add(psig->cstime,
+ cputime_add(tgstime,
+ sig->cstime));
+ psig->cgtime =
+ cputime_add(psig->cgtime,
+ cputime_add(p->gtime,
+ cputime_add(sig->gtime,
+ sig->cgtime)));
psig->cmin_flt +=
p->min_flt + sig->min_flt + sig->cmin_flt;
psig->cmaj_flt +=
@@ -1530,15 +1540,8 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
}
/* dead body doesn't have much to contribute */
- if (unlikely(p->exit_state == EXIT_DEAD)) {
- /*
- * But do not ignore this task until the tracer does
- * wait_task_zombie()->do_notify_parent().
- */
- if (likely(!ptrace) && unlikely(ptrace_reparented(p)))
- wo->notask_error = 0;
+ if (p->exit_state == EXIT_DEAD)
return 0;
- }
/* slay zombie? */
if (p->exit_state == EXIT_ZOMBIE) {
diff --git a/trunk/kernel/fork.c b/trunk/kernel/fork.c
index b058c5820ecd..da4a6a10d088 100644
--- a/trunk/kernel/fork.c
+++ b/trunk/kernel/fork.c
@@ -1023,8 +1023,8 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
*/
static void posix_cpu_timers_init(struct task_struct *tsk)
{
- tsk->cputime_expires.prof_exp = 0;
- tsk->cputime_expires.virt_exp = 0;
+ tsk->cputime_expires.prof_exp = cputime_zero;
+ tsk->cputime_expires.virt_exp = cputime_zero;
tsk->cputime_expires.sched_exp = 0;
INIT_LIST_HEAD(&tsk->cpu_timers[0]);
INIT_LIST_HEAD(&tsk->cpu_timers[1]);
@@ -1132,10 +1132,14 @@ static struct task_struct *copy_process(unsigned long clone_flags,
init_sigpending(&p->pending);
- p->utime = p->stime = p->gtime = 0;
- p->utimescaled = p->stimescaled = 0;
+ p->utime = cputime_zero;
+ p->stime = cputime_zero;
+ p->gtime = cputime_zero;
+ p->utimescaled = cputime_zero;
+ p->stimescaled = cputime_zero;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
- p->prev_utime = p->prev_stime = 0;
+ p->prev_utime = cputime_zero;
+ p->prev_stime = cputime_zero;
#endif
#if defined(SPLIT_RSS_COUNTING)
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
diff --git a/trunk/kernel/futex.c b/trunk/kernel/futex.c
index 1614be20173d..ea87f4d2f455 100644
--- a/trunk/kernel/futex.c
+++ b/trunk/kernel/futex.c
@@ -314,29 +314,17 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
#endif
lock_page(page_head);
-
- /*
- * If page_head->mapping is NULL, then it cannot be a PageAnon
- * page; but it might be the ZERO_PAGE or in the gate area or
- * in a special mapping (all cases which we are happy to fail);
- * or it may have been a good file page when get_user_pages_fast
- * found it, but truncated or holepunched or subjected to
- * invalidate_complete_page2 before we got the page lock (also
- * cases which we are happy to fail). And we hold a reference,
- * so refcount care in invalidate_complete_page's remove_mapping
- * prevents drop_caches from setting mapping to NULL beneath us.
- *
- * The case we do have to guard against is when memory pressure made
- * shmem_writepage move it from filecache to swapcache beneath us:
- * an unlikely race, but we do need to retry for page_head->mapping.
- */
if (!page_head->mapping) {
- int shmem_swizzled = PageSwapCache(page_head);
unlock_page(page_head);
put_page(page_head);
- if (shmem_swizzled)
- goto again;
- return -EFAULT;
+ /*
+ * ZERO_PAGE pages don't have a mapping. Avoid a busy loop
+ * trying to find one. RW mapping would have COW'd (and thus
+ * have a mapping) so this page is RO and won't ever change.
+ */
+ if ((page_head == ZERO_PAGE(address)))
+ return -EFAULT;
+ goto again;
}
/*
diff --git a/trunk/kernel/hung_task.c b/trunk/kernel/hung_task.c
index 2e48ec0c2e91..8b1748d0172c 100644
--- a/trunk/kernel/hung_task.c
+++ b/trunk/kernel/hung_task.c
@@ -74,17 +74,11 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
/*
* Ensure the task is not frozen.
- * Also, skip vfork and any other user process that freezer should skip.
+ * Also, when a freshly created task is scheduled once, changes
+ * its state to TASK_UNINTERRUPTIBLE without having ever been
+ * switched out once, it musn't be checked.
*/
- if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP)))
- return;
-
- /*
- * When a freshly created task is scheduled once, changes its state to
- * TASK_UNINTERRUPTIBLE without having ever been switched out once, it
- * musn't be checked.
- */
- if (unlikely(!switch_count))
+ if (unlikely(t->flags & PF_FROZEN || !switch_count))
return;
if (switch_count != t->last_switch_count) {
diff --git a/trunk/kernel/itimer.c b/trunk/kernel/itimer.c
index 22000c3db0dd..d802883153da 100644
--- a/trunk/kernel/itimer.c
+++ b/trunk/kernel/itimer.c
@@ -52,22 +52,22 @@ static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
cval = it->expires;
cinterval = it->incr;
- if (cval) {
+ if (!cputime_eq(cval, cputime_zero)) {
struct task_cputime cputime;
cputime_t t;
thread_group_cputimer(tsk, &cputime);
if (clock_id == CPUCLOCK_PROF)
- t = cputime.utime + cputime.stime;
+ t = cputime_add(cputime.utime, cputime.stime);
else
/* CPUCLOCK_VIRT */
t = cputime.utime;
- if (cval < t)
+ if (cputime_le(cval, t))
/* about to fire */
cval = cputime_one_jiffy;
else
- cval = cval - t;
+ cval = cputime_sub(cval, t);
}
spin_unlock_irq(&tsk->sighand->siglock);
@@ -161,9 +161,10 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
cval = it->expires;
cinterval = it->incr;
- if (cval || nval) {
- if (nval > 0)
- nval += cputime_one_jiffy;
+ if (!cputime_eq(cval, cputime_zero) ||
+ !cputime_eq(nval, cputime_zero)) {
+ if (cputime_gt(nval, cputime_zero))
+ nval = cputime_add(nval, cputime_one_jiffy);
set_process_cpu_timer(tsk, clock_id, &nval, &cval);
}
it->expires = nval;
diff --git a/trunk/kernel/jump_label.c b/trunk/kernel/jump_label.c
index 30c3c7708132..66ff7109f697 100644
--- a/trunk/kernel/jump_label.c
+++ b/trunk/kernel/jump_label.c
@@ -72,46 +72,15 @@ void jump_label_inc(struct jump_label_key *key)
jump_label_unlock();
}
-static void __jump_label_dec(struct jump_label_key *key,
- unsigned long rate_limit, struct delayed_work *work)
+void jump_label_dec(struct jump_label_key *key)
{
if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex))
return;
- if (rate_limit) {
- atomic_inc(&key->enabled);
- schedule_delayed_work(work, rate_limit);
- } else
- jump_label_update(key, JUMP_LABEL_DISABLE);
-
+ jump_label_update(key, JUMP_LABEL_DISABLE);
jump_label_unlock();
}
-static void jump_label_update_timeout(struct work_struct *work)
-{
- struct jump_label_key_deferred *key =
- container_of(work, struct jump_label_key_deferred, work.work);
- __jump_label_dec(&key->key, 0, NULL);
-}
-
-void jump_label_dec(struct jump_label_key *key)
-{
- __jump_label_dec(key, 0, NULL);
-}
-
-void jump_label_dec_deferred(struct jump_label_key_deferred *key)
-{
- __jump_label_dec(&key->key, key->timeout, &key->work);
-}
-
-
-void jump_label_rate_limit(struct jump_label_key_deferred *key,
- unsigned long rl)
-{
- key->timeout = rl;
- INIT_DELAYED_WORK(&key->work, jump_label_update_timeout);
-}
-
static int addr_conflict(struct jump_entry *entry, void *start, void *end)
{
if (entry->code <= (unsigned long)end &&
@@ -142,7 +111,7 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start,
* running code can override this to make the non-live update case
* cheaper.
*/
-void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry *entry,
+void __weak arch_jump_label_transform_static(struct jump_entry *entry,
enum jump_label_type type)
{
arch_jump_label_transform(entry, type);
@@ -248,13 +217,8 @@ void jump_label_apply_nops(struct module *mod)
if (iter_start == iter_stop)
return;
- for (iter = iter_start; iter < iter_stop; iter++) {
- struct jump_label_key *iterk;
-
- iterk = (struct jump_label_key *)(unsigned long)iter->key;
- arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ?
- JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE);
- }
+ for (iter = iter_start; iter < iter_stop; iter++)
+ arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE);
}
static int jump_label_add_module(struct module *mod)
@@ -294,7 +258,8 @@ static int jump_label_add_module(struct module *mod)
key->next = jlm;
if (jump_label_enabled(key))
- __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE);
+ __jump_label_update(key, iter, iter_stop,
+ JUMP_LABEL_ENABLE);
}
return 0;
diff --git a/trunk/kernel/lockdep.c b/trunk/kernel/lockdep.c
index 8889f7dd7c46..b2e08c932d91 100644
--- a/trunk/kernel/lockdep.c
+++ b/trunk/kernel/lockdep.c
@@ -431,7 +431,6 @@ unsigned int max_lockdep_depth;
* about it later on, in lockdep_info().
*/
static int lockdep_init_error;
-static const char *lock_init_error;
static unsigned long lockdep_init_trace_data[20];
static struct stack_trace lockdep_init_trace = {
.max_entries = ARRAY_SIZE(lockdep_init_trace_data),
@@ -500,32 +499,36 @@ void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS])
usage[i] = '\0';
}
-static void __print_lock_name(struct lock_class *class)
+static int __print_lock_name(struct lock_class *class)
{
char str[KSYM_NAME_LEN];
const char *name;
name = class->name;
- if (!name) {
+ if (!name)
name = __get_key_name(class->key, str);
- printk("%s", name);
- } else {
- printk("%s", name);
- if (class->name_version > 1)
- printk("#%d", class->name_version);
- if (class->subclass)
- printk("/%d", class->subclass);
- }
+
+ return printk("%s", name);
}
static void print_lock_name(struct lock_class *class)
{
- char usage[LOCK_USAGE_CHARS];
+ char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS];
+ const char *name;
get_usage_chars(class, usage);
- printk(" (");
- __print_lock_name(class);
+ name = class->name;
+ if (!name) {
+ name = __get_key_name(class->key, str);
+ printk(" (%s", name);
+ } else {
+ printk(" (%s", name);
+ if (class->name_version > 1)
+ printk("#%d", class->name_version);
+ if (class->subclass)
+ printk("/%d", class->subclass);
+ }
printk("){%s}", usage);
}
@@ -565,12 +568,11 @@ static void lockdep_print_held_locks(struct task_struct *curr)
}
}
-static void print_kernel_ident(void)
+static void print_kernel_version(void)
{
- printk("%s %.*s %s\n", init_utsname()->release,
+ printk("%s %.*s\n", init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
- init_utsname()->version,
- print_tainted());
+ init_utsname()->version);
}
static int very_verbose(struct lock_class *class)
@@ -654,7 +656,6 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
if (unlikely(!lockdep_initialized)) {
lockdep_init();
lockdep_init_error = 1;
- lock_init_error = lock->name;
save_stack_trace(&lockdep_init_trace);
}
#endif
@@ -722,7 +723,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
class = look_up_lock_class(lock, subclass);
if (likely(class))
- goto out_set_class_cache;
+ return class;
/*
* Debug-check: all keys must be persistent!
@@ -807,7 +808,6 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
graph_unlock();
raw_local_irq_restore(flags);
-out_set_class_cache:
if (!subclass || force)
lock->class_cache[0] = class;
else if (subclass < NR_LOCKDEP_CACHING_CLASSES)
@@ -1149,7 +1149,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
printk("\n");
printk("======================================================\n");
printk("[ INFO: possible circular locking dependency detected ]\n");
- print_kernel_ident();
+ print_kernel_version();
printk("-------------------------------------------------------\n");
printk("%s/%d is trying to acquire lock:\n",
curr->comm, task_pid_nr(curr));
@@ -1488,7 +1488,7 @@ print_bad_irq_dependency(struct task_struct *curr,
printk("======================================================\n");
printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
irqclass, irqclass);
- print_kernel_ident();
+ print_kernel_version();
printk("------------------------------------------------------\n");
printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
curr->comm, task_pid_nr(curr),
@@ -1717,7 +1717,7 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
printk("\n");
printk("=============================================\n");
printk("[ INFO: possible recursive locking detected ]\n");
- print_kernel_ident();
+ print_kernel_version();
printk("---------------------------------------------\n");
printk("%s/%d is trying to acquire lock:\n",
curr->comm, task_pid_nr(curr));
@@ -2224,7 +2224,7 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
printk("\n");
printk("=================================\n");
printk("[ INFO: inconsistent lock state ]\n");
- print_kernel_ident();
+ print_kernel_version();
printk("---------------------------------\n");
printk("inconsistent {%s} -> {%s} usage.\n",
@@ -2289,7 +2289,7 @@ print_irq_inversion_bug(struct task_struct *curr,
printk("\n");
printk("=========================================================\n");
printk("[ INFO: possible irq lock inversion dependency detected ]\n");
- print_kernel_ident();
+ print_kernel_version();
printk("---------------------------------------------------------\n");
printk("%s/%d just changed the state of lock:\n",
curr->comm, task_pid_nr(curr));
@@ -3175,7 +3175,6 @@ print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
printk("\n");
printk("=====================================\n");
printk("[ BUG: bad unlock balance detected! ]\n");
- print_kernel_ident();
printk("-------------------------------------\n");
printk("%s/%d is trying to release lock (",
curr->comm, task_pid_nr(curr));
@@ -3620,7 +3619,6 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
printk("\n");
printk("=================================\n");
printk("[ BUG: bad contention detected! ]\n");
- print_kernel_ident();
printk("---------------------------------\n");
printk("%s/%d is trying to contend lock (",
curr->comm, task_pid_nr(curr));
@@ -3976,8 +3974,7 @@ void __init lockdep_info(void)
#ifdef CONFIG_DEBUG_LOCKDEP
if (lockdep_init_error) {
- printk("WARNING: lockdep init error! lock-%s was acquired"
- "before lockdep_init\n", lock_init_error);
+ printk("WARNING: lockdep init error! Arch code didn't call lockdep_init() early enough?\n");
printk("Call stack leading to lockdep invocation was:\n");
print_stack_trace(&lockdep_init_trace, 0);
}
@@ -3996,7 +3993,6 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
printk("\n");
printk("=========================\n");
printk("[ BUG: held lock freed! ]\n");
- print_kernel_ident();
printk("-------------------------\n");
printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
@@ -4054,7 +4050,6 @@ static void print_held_locks_bug(struct task_struct *curr)
printk("\n");
printk("=====================================\n");
printk("[ BUG: lock held at task exit time! ]\n");
- print_kernel_ident();
printk("-------------------------------------\n");
printk("%s/%d is exiting with locks still held!\n",
curr->comm, task_pid_nr(curr));
@@ -4152,7 +4147,6 @@ void lockdep_sys_exit(void)
printk("\n");
printk("================================================\n");
printk("[ BUG: lock held when returning to user space! ]\n");
- print_kernel_ident();
printk("------------------------------------------------\n");
printk("%s/%d is leaving the kernel with locks still held!\n",
curr->comm, curr->pid);
@@ -4172,33 +4166,10 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
printk("\n");
printk("===============================\n");
printk("[ INFO: suspicious RCU usage. ]\n");
- print_kernel_ident();
printk("-------------------------------\n");
printk("%s:%d %s!\n", file, line, s);
printk("\nother info that might help us debug this:\n\n");
printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks);
-
- /*
- * If a CPU is in the RCU-free window in idle (ie: in the section
- * between rcu_idle_enter() and rcu_idle_exit(), then RCU
- * considers that CPU to be in an "extended quiescent state",
- * which means that RCU will be completely ignoring that CPU.
- * Therefore, rcu_read_lock() and friends have absolutely no
- * effect on a CPU running in that state. In other words, even if
- * such an RCU-idle CPU has called rcu_read_lock(), RCU might well
- * delete data structures out from under it. RCU really has no
- * choice here: we need to keep an RCU-free window in idle where
- * the CPU may possibly enter into low power mode. This way we can
- * notice an extended quiescent state to other CPUs that started a grace
- * period. Otherwise we would delay any grace period as long as we run
- * in the idle task.
- *
- * So complain bitterly if someone does call rcu_read_lock(),
- * rcu_read_lock_bh() and so on from extended quiescent states.
- */
- if (rcu_is_cpu_idle())
- printk("RCU used illegally from extended quiescent state!\n");
-
lockdep_print_held_locks(curr);
printk("\nstack backtrace:\n");
dump_stack();
diff --git a/trunk/kernel/panic.c b/trunk/kernel/panic.c
index 3458469eb7c3..b26593604214 100644
--- a/trunk/kernel/panic.c
+++ b/trunk/kernel/panic.c
@@ -237,20 +237,11 @@ void add_taint(unsigned flag)
* Can't trust the integrity of the kernel anymore.
* We don't call directly debug_locks_off() because the issue
* is not necessarily serious enough to set oops_in_progress to 1
- * Also we want to keep up lockdep for staging/out-of-tree
- * development and post-warning case.
+ * Also we want to keep up lockdep for staging development and
+ * post-warning case.
*/
- switch (flag) {
- case TAINT_CRAP:
- case TAINT_OOT_MODULE:
- case TAINT_WARN:
- case TAINT_FIRMWARE_WORKAROUND:
- break;
-
- default:
- if (__debug_locks_off())
- printk(KERN_WARNING "Disabling lock debugging due to kernel taint\n");
- }
+ if (flag != TAINT_CRAP && flag != TAINT_WARN && __debug_locks_off())
+ printk(KERN_WARNING "Disabling lock debugging due to kernel taint\n");
set_bit(flag, &tainted_mask);
}
diff --git a/trunk/kernel/posix-cpu-timers.c b/trunk/kernel/posix-cpu-timers.c
index 125cb67daa21..e7cb76dc18f5 100644
--- a/trunk/kernel/posix-cpu-timers.c
+++ b/trunk/kernel/posix-cpu-timers.c
@@ -78,7 +78,7 @@ static inline int cpu_time_before(const clockid_t which_clock,
if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
return now.sched < then.sched;
} else {
- return now.cpu < then.cpu;
+ return cputime_lt(now.cpu, then.cpu);
}
}
static inline void cpu_time_add(const clockid_t which_clock,
@@ -88,7 +88,7 @@ static inline void cpu_time_add(const clockid_t which_clock,
if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
acc->sched += val.sched;
} else {
- acc->cpu += val.cpu;
+ acc->cpu = cputime_add(acc->cpu, val.cpu);
}
}
static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
@@ -98,11 +98,24 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
a.sched -= b.sched;
} else {
- a.cpu -= b.cpu;
+ a.cpu = cputime_sub(a.cpu, b.cpu);
}
return a;
}
+/*
+ * Divide and limit the result to res >= 1
+ *
+ * This is necessary to prevent signal delivery starvation, when the result of
+ * the division would be rounded down to 0.
+ */
+static inline cputime_t cputime_div_non_zero(cputime_t time, unsigned long div)
+{
+ cputime_t res = cputime_div(time, div);
+
+ return max_t(cputime_t, res, 1);
+}
+
/*
* Update expiry time from increment, and increase overrun count,
* given the current clock sample.
@@ -135,26 +148,28 @@ static void bump_cpu_timer(struct k_itimer *timer,
} else {
cputime_t delta, incr;
- if (now.cpu < timer->it.cpu.expires.cpu)
+ if (cputime_lt(now.cpu, timer->it.cpu.expires.cpu))
return;
incr = timer->it.cpu.incr.cpu;
- delta = now.cpu + incr - timer->it.cpu.expires.cpu;
+ delta = cputime_sub(cputime_add(now.cpu, incr),
+ timer->it.cpu.expires.cpu);
/* Don't use (incr*2 < delta), incr*2 might overflow. */
- for (i = 0; incr < delta - incr; i++)
- incr += incr;
- for (; i >= 0; incr = incr >> 1, i--) {
- if (delta < incr)
+ for (i = 0; cputime_lt(incr, cputime_sub(delta, incr)); i++)
+ incr = cputime_add(incr, incr);
+ for (; i >= 0; incr = cputime_halve(incr), i--) {
+ if (cputime_lt(delta, incr))
continue;
- timer->it.cpu.expires.cpu += incr;
+ timer->it.cpu.expires.cpu =
+ cputime_add(timer->it.cpu.expires.cpu, incr);
timer->it_overrun += 1 << i;
- delta -= incr;
+ delta = cputime_sub(delta, incr);
}
}
}
static inline cputime_t prof_ticks(struct task_struct *p)
{
- return p->utime + p->stime;
+ return cputime_add(p->utime, p->stime);
}
static inline cputime_t virt_ticks(struct task_struct *p)
{
@@ -233,8 +248,8 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
t = tsk;
do {
- times->utime += t->utime;
- times->stime += t->stime;
+ times->utime = cputime_add(times->utime, t->utime);
+ times->stime = cputime_add(times->stime, t->stime);
times->sum_exec_runtime += task_sched_runtime(t);
} while_each_thread(tsk, t);
out:
@@ -243,10 +258,10 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
{
- if (b->utime > a->utime)
+ if (cputime_gt(b->utime, a->utime))
a->utime = b->utime;
- if (b->stime > a->stime)
+ if (cputime_gt(b->stime, a->stime))
a->stime = b->stime;
if (b->sum_exec_runtime > a->sum_exec_runtime)
@@ -291,7 +306,7 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
return -EINVAL;
case CPUCLOCK_PROF:
thread_group_cputime(p, &cputime);
- cpu->cpu = cputime.utime + cputime.stime;
+ cpu->cpu = cputime_add(cputime.utime, cputime.stime);
break;
case CPUCLOCK_VIRT:
thread_group_cputime(p, &cputime);
@@ -455,24 +470,26 @@ static void cleanup_timers(struct list_head *head,
unsigned long long sum_exec_runtime)
{
struct cpu_timer_list *timer, *next;
- cputime_t ptime = utime + stime;
+ cputime_t ptime = cputime_add(utime, stime);
list_for_each_entry_safe(timer, next, head, entry) {
list_del_init(&timer->entry);
- if (timer->expires.cpu < ptime) {
- timer->expires.cpu = 0;
+ if (cputime_lt(timer->expires.cpu, ptime)) {
+ timer->expires.cpu = cputime_zero;
} else {
- timer->expires.cpu -= ptime;
+ timer->expires.cpu = cputime_sub(timer->expires.cpu,
+ ptime);
}
}
++head;
list_for_each_entry_safe(timer, next, head, entry) {
list_del_init(&timer->entry);
- if (timer->expires.cpu < utime) {
- timer->expires.cpu = 0;
+ if (cputime_lt(timer->expires.cpu, utime)) {
+ timer->expires.cpu = cputime_zero;
} else {
- timer->expires.cpu -= utime;
+ timer->expires.cpu = cputime_sub(timer->expires.cpu,
+ utime);
}
}
@@ -503,7 +520,8 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
struct signal_struct *const sig = tsk->signal;
cleanup_timers(tsk->signal->cpu_timers,
- tsk->utime + sig->utime, tsk->stime + sig->stime,
+ cputime_add(tsk->utime, sig->utime),
+ cputime_add(tsk->stime, sig->stime),
tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
}
@@ -522,7 +540,8 @@ static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
static inline int expires_gt(cputime_t expires, cputime_t new_exp)
{
- return expires == 0 || expires > new_exp;
+ return cputime_eq(expires, cputime_zero) ||
+ cputime_gt(expires, new_exp);
}
/*
@@ -632,7 +651,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
default:
return -EINVAL;
case CPUCLOCK_PROF:
- cpu->cpu = cputime.utime + cputime.stime;
+ cpu->cpu = cputime_add(cputime.utime, cputime.stime);
break;
case CPUCLOCK_VIRT:
cpu->cpu = cputime.utime;
@@ -899,12 +918,12 @@ static void check_thread_timers(struct task_struct *tsk,
unsigned long soft;
maxfire = 20;
- tsk->cputime_expires.prof_exp = 0;
+ tsk->cputime_expires.prof_exp = cputime_zero;
while (!list_empty(timers)) {
struct cpu_timer_list *t = list_first_entry(timers,
struct cpu_timer_list,
entry);
- if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) {
+ if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) {
tsk->cputime_expires.prof_exp = t->expires.cpu;
break;
}
@@ -914,12 +933,12 @@ static void check_thread_timers(struct task_struct *tsk,
++timers;
maxfire = 20;
- tsk->cputime_expires.virt_exp = 0;
+ tsk->cputime_expires.virt_exp = cputime_zero;
while (!list_empty(timers)) {
struct cpu_timer_list *t = list_first_entry(timers,
struct cpu_timer_list,
entry);
- if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) {
+ if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) {
tsk->cputime_expires.virt_exp = t->expires.cpu;
break;
}
@@ -990,19 +1009,20 @@ static u32 onecputick;
static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
cputime_t *expires, cputime_t cur_time, int signo)
{
- if (!it->expires)
+ if (cputime_eq(it->expires, cputime_zero))
return;
- if (cur_time >= it->expires) {
- if (it->incr) {
- it->expires += it->incr;
+ if (cputime_ge(cur_time, it->expires)) {
+ if (!cputime_eq(it->incr, cputime_zero)) {
+ it->expires = cputime_add(it->expires, it->incr);
it->error += it->incr_error;
if (it->error >= onecputick) {
- it->expires -= cputime_one_jiffy;
+ it->expires = cputime_sub(it->expires,
+ cputime_one_jiffy);
it->error -= onecputick;
}
} else {
- it->expires = 0;
+ it->expires = cputime_zero;
}
trace_itimer_expire(signo == SIGPROF ?
@@ -1011,7 +1031,9 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
}
- if (it->expires && (!*expires || it->expires < *expires)) {
+ if (!cputime_eq(it->expires, cputime_zero) &&
+ (cputime_eq(*expires, cputime_zero) ||
+ cputime_lt(it->expires, *expires))) {
*expires = it->expires;
}
}
@@ -1026,7 +1048,9 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
*/
static inline int task_cputime_zero(const struct task_cputime *cputime)
{
- if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
+ if (cputime_eq(cputime->utime, cputime_zero) &&
+ cputime_eq(cputime->stime, cputime_zero) &&
+ cputime->sum_exec_runtime == 0)
return 1;
return 0;
}
@@ -1052,15 +1076,15 @@ static void check_process_timers(struct task_struct *tsk,
*/
thread_group_cputimer(tsk, &cputime);
utime = cputime.utime;
- ptime = utime + cputime.stime;
+ ptime = cputime_add(utime, cputime.stime);
sum_sched_runtime = cputime.sum_exec_runtime;
maxfire = 20;
- prof_expires = 0;
+ prof_expires = cputime_zero;
while (!list_empty(timers)) {
struct cpu_timer_list *tl = list_first_entry(timers,
struct cpu_timer_list,
entry);
- if (!--maxfire || ptime < tl->expires.cpu) {
+ if (!--maxfire || cputime_lt(ptime, tl->expires.cpu)) {
prof_expires = tl->expires.cpu;
break;
}
@@ -1070,12 +1094,12 @@ static void check_process_timers(struct task_struct *tsk,
++timers;
maxfire = 20;
- virt_expires = 0;
+ virt_expires = cputime_zero;
while (!list_empty(timers)) {
struct cpu_timer_list *tl = list_first_entry(timers,
struct cpu_timer_list,
entry);
- if (!--maxfire || utime < tl->expires.cpu) {
+ if (!--maxfire || cputime_lt(utime, tl->expires.cpu)) {
virt_expires = tl->expires.cpu;
break;
}
@@ -1130,7 +1154,8 @@ static void check_process_timers(struct task_struct *tsk,
}
}
x = secs_to_cputime(soft);
- if (!prof_expires || x < prof_expires) {
+ if (cputime_eq(prof_expires, cputime_zero) ||
+ cputime_lt(x, prof_expires)) {
prof_expires = x;
}
}
@@ -1224,9 +1249,12 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
static inline int task_cputime_expired(const struct task_cputime *sample,
const struct task_cputime *expires)
{
- if (expires->utime && sample->utime >= expires->utime)
+ if (!cputime_eq(expires->utime, cputime_zero) &&
+ cputime_ge(sample->utime, expires->utime))
return 1;
- if (expires->stime && sample->utime + sample->stime >= expires->stime)
+ if (!cputime_eq(expires->stime, cputime_zero) &&
+ cputime_ge(cputime_add(sample->utime, sample->stime),
+ expires->stime))
return 1;
if (expires->sum_exec_runtime != 0 &&
sample->sum_exec_runtime >= expires->sum_exec_runtime)
@@ -1361,18 +1389,18 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
* it to be relative, *newval argument is relative and we update
* it to be absolute.
*/
- if (*oldval) {
- if (*oldval <= now.cpu) {
+ if (!cputime_eq(*oldval, cputime_zero)) {
+ if (cputime_le(*oldval, now.cpu)) {
/* Just about to fire. */
*oldval = cputime_one_jiffy;
} else {
- *oldval -= now.cpu;
+ *oldval = cputime_sub(*oldval, now.cpu);
}
}
- if (!*newval)
+ if (cputime_eq(*newval, cputime_zero))
return;
- *newval += now.cpu;
+ *newval = cputime_add(*newval, now.cpu);
}
/*
diff --git a/trunk/kernel/printk.c b/trunk/kernel/printk.c
index 989e4a52da76..7982a0a841ea 100644
--- a/trunk/kernel/printk.c
+++ b/trunk/kernel/printk.c
@@ -199,7 +199,7 @@ void __init setup_log_buf(int early)
unsigned long mem;
mem = memblock_alloc(new_log_buf_len, PAGE_SIZE);
- if (!mem)
+ if (mem == MEMBLOCK_ERROR)
return;
new_log_buf = __va(mem);
} else {
@@ -688,7 +688,6 @@ static void zap_locks(void)
oops_timestamp = jiffies;
- debug_locks_off();
/* If a crash is occurring, make sure we can't deadlock */
raw_spin_lock_init(&logbuf_lock);
/* And make sure that we print immediately */
@@ -841,8 +840,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
boot_delay_msec();
printk_delay();
+ preempt_disable();
/* This stops the holder of console_sem just where we want him */
- local_irq_save(flags);
+ raw_local_irq_save(flags);
this_cpu = smp_processor_id();
/*
@@ -856,7 +856,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
* recursion and return - but flag the recursion so that
* it can be printed at the next appropriate moment:
*/
- if (!oops_in_progress && !lockdep_recursing(current)) {
+ if (!oops_in_progress) {
recursion_bug = 1;
goto out_restore_irqs;
}
@@ -962,8 +962,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
lockdep_on();
out_restore_irqs:
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
+ preempt_enable();
return printed_len;
}
EXPORT_SYMBOL(printk);
diff --git a/trunk/kernel/ptrace.c b/trunk/kernel/ptrace.c
index 78ab24a7b0e4..24d04477b257 100644
--- a/trunk/kernel/ptrace.c
+++ b/trunk/kernel/ptrace.c
@@ -96,20 +96,9 @@ void __ptrace_unlink(struct task_struct *child)
*/
if (!(child->flags & PF_EXITING) &&
(child->signal->flags & SIGNAL_STOP_STOPPED ||
- child->signal->group_stop_count)) {
+ child->signal->group_stop_count))
child->jobctl |= JOBCTL_STOP_PENDING;
- /*
- * This is only possible if this thread was cloned by the
- * traced task running in the stopped group, set the signal
- * for the future reports.
- * FIXME: we should change ptrace_init_task() to handle this
- * case.
- */
- if (!(child->jobctl & JOBCTL_STOP_SIGMASK))
- child->jobctl |= SIGSTOP;
- }
-
/*
* If transition to TASK_STOPPED is pending or in TASK_TRACED, kick
* @child in the butt. Note that @resume should be used iff @child
diff --git a/trunk/kernel/rcu.h b/trunk/kernel/rcu.h
index aa88baab5f78..f600868d550d 100644
--- a/trunk/kernel/rcu.h
+++ b/trunk/kernel/rcu.h
@@ -29,13 +29,6 @@
#define RCU_TRACE(stmt)
#endif /* #else #ifdef CONFIG_RCU_TRACE */
-/*
- * Process-level increment to ->dynticks_nesting field. This allows for
- * architectures that use half-interrupts and half-exceptions from
- * process context.
- */
-#define DYNTICK_TASK_NESTING (LLONG_MAX / 2 - 1)
-
/*
* debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
* by call_rcu() and rcu callback execution, and are therefore not part of the
diff --git a/trunk/kernel/rcupdate.c b/trunk/kernel/rcupdate.c
index 2bc4e135ff23..c5b98e565aee 100644
--- a/trunk/kernel/rcupdate.c
+++ b/trunk/kernel/rcupdate.c
@@ -93,8 +93,6 @@ int rcu_read_lock_bh_held(void)
{
if (!debug_lockdep_rcu_enabled())
return 1;
- if (rcu_is_cpu_idle())
- return 0;
return in_softirq() || irqs_disabled();
}
EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
@@ -318,13 +316,3 @@ struct debug_obj_descr rcuhead_debug_descr = {
};
EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
-
-#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
-void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp)
-{
- trace_rcu_torture_read(rcutorturename, rhp);
-}
-EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
-#else
-#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
-#endif
diff --git a/trunk/kernel/rcutiny.c b/trunk/kernel/rcutiny.c
index 977296dca0a4..636af6d9c6e5 100644
--- a/trunk/kernel/rcutiny.c
+++ b/trunk/kernel/rcutiny.c
@@ -53,137 +53,31 @@ static void __call_rcu(struct rcu_head *head,
#include "rcutiny_plugin.h"
-static long long rcu_dynticks_nesting = DYNTICK_TASK_NESTING;
+#ifdef CONFIG_NO_HZ
-/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
-static void rcu_idle_enter_common(long long oldval)
-{
- if (rcu_dynticks_nesting) {
- RCU_TRACE(trace_rcu_dyntick("--=",
- oldval, rcu_dynticks_nesting));
- return;
- }
- RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting));
- if (!is_idle_task(current)) {
- struct task_struct *idle = idle_task(smp_processor_id());
-
- RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task",
- oldval, rcu_dynticks_nesting));
- ftrace_dump(DUMP_ALL);
- WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
- current->pid, current->comm,
- idle->pid, idle->comm); /* must be idle task! */
- }
- rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
-}
-
-/*
- * Enter idle, which is an extended quiescent state if we have fully
- * entered that mode (i.e., if the new value of dynticks_nesting is zero).
- */
-void rcu_idle_enter(void)
-{
- unsigned long flags;
- long long oldval;
-
- local_irq_save(flags);
- oldval = rcu_dynticks_nesting;
- rcu_dynticks_nesting = 0;
- rcu_idle_enter_common(oldval);
- local_irq_restore(flags);
-}
+static long rcu_dynticks_nesting = 1;
/*
- * Exit an interrupt handler towards idle.
+ * Enter dynticks-idle mode, which is an extended quiescent state
+ * if we have fully entered that mode (i.e., if the new value of
+ * dynticks_nesting is zero).
*/
-void rcu_irq_exit(void)
+void rcu_enter_nohz(void)
{
- unsigned long flags;
- long long oldval;
-
- local_irq_save(flags);
- oldval = rcu_dynticks_nesting;
- rcu_dynticks_nesting--;
- WARN_ON_ONCE(rcu_dynticks_nesting < 0);
- rcu_idle_enter_common(oldval);
- local_irq_restore(flags);
-}
-
-/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */
-static void rcu_idle_exit_common(long long oldval)
-{
- if (oldval) {
- RCU_TRACE(trace_rcu_dyntick("++=",
- oldval, rcu_dynticks_nesting));
- return;
- }
- RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting));
- if (!is_idle_task(current)) {
- struct task_struct *idle = idle_task(smp_processor_id());
-
- RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task",
- oldval, rcu_dynticks_nesting));
- ftrace_dump(DUMP_ALL);
- WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
- current->pid, current->comm,
- idle->pid, idle->comm); /* must be idle task! */
- }
+ if (--rcu_dynticks_nesting == 0)
+ rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
}
/*
- * Exit idle, so that we are no longer in an extended quiescent state.
- */
-void rcu_idle_exit(void)
-{
- unsigned long flags;
- long long oldval;
-
- local_irq_save(flags);
- oldval = rcu_dynticks_nesting;
- WARN_ON_ONCE(oldval != 0);
- rcu_dynticks_nesting = DYNTICK_TASK_NESTING;
- rcu_idle_exit_common(oldval);
- local_irq_restore(flags);
-}
-
-/*
- * Enter an interrupt handler, moving away from idle.
+ * Exit dynticks-idle mode, so that we are no longer in an extended
+ * quiescent state.
*/
-void rcu_irq_enter(void)
+void rcu_exit_nohz(void)
{
- unsigned long flags;
- long long oldval;
-
- local_irq_save(flags);
- oldval = rcu_dynticks_nesting;
rcu_dynticks_nesting++;
- WARN_ON_ONCE(rcu_dynticks_nesting == 0);
- rcu_idle_exit_common(oldval);
- local_irq_restore(flags);
-}
-
-#ifdef CONFIG_PROVE_RCU
-
-/*
- * Test whether RCU thinks that the current CPU is idle.
- */
-int rcu_is_cpu_idle(void)
-{
- return !rcu_dynticks_nesting;
}
-EXPORT_SYMBOL(rcu_is_cpu_idle);
-
-#endif /* #ifdef CONFIG_PROVE_RCU */
-/*
- * Test whether the current CPU was interrupted from idle. Nested
- * interrupts don't count, we must be running at the first interrupt
- * level.
- */
-int rcu_is_cpu_rrupt_from_idle(void)
-{
- return rcu_dynticks_nesting <= 0;
-}
+#endif /* #ifdef CONFIG_NO_HZ */
/*
* Helper function for rcu_sched_qs() and rcu_bh_qs().
@@ -232,13 +126,14 @@ void rcu_bh_qs(int cpu)
/*
* Check to see if the scheduling-clock interrupt came from an extended
- * quiescent state, and, if so, tell RCU about it. This function must
- * be called from hardirq context. It is normally called from the
- * scheduling-clock interrupt.
+ * quiescent state, and, if so, tell RCU about it.
*/
void rcu_check_callbacks(int cpu, int user)
{
- if (user || rcu_is_cpu_rrupt_from_idle())
+ if (user ||
+ (idle_cpu(cpu) &&
+ !in_softirq() &&
+ hardirq_count() <= (1 << HARDIRQ_SHIFT)))
rcu_sched_qs(cpu);
else if (!in_softirq())
rcu_bh_qs(cpu);
@@ -259,11 +154,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
/* If no RCU callbacks ready to invoke, just return. */
if (&rcp->rcucblist == rcp->donetail) {
RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1));
- RCU_TRACE(trace_rcu_batch_end(rcp->name, 0,
- ACCESS_ONCE(rcp->rcucblist),
- need_resched(),
- is_idle_task(current),
- rcu_is_callbacks_kthread()));
+ RCU_TRACE(trace_rcu_batch_end(rcp->name, 0));
return;
}
@@ -292,9 +183,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
RCU_TRACE(cb_count++);
}
RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
- RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(),
- is_idle_task(current),
- rcu_is_callbacks_kthread()));
+ RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count));
}
static void rcu_process_callbacks(struct softirq_action *unused)
diff --git a/trunk/kernel/rcutiny_plugin.h b/trunk/kernel/rcutiny_plugin.h
index 9cb1ae4aabdd..2b0484a5dc28 100644
--- a/trunk/kernel/rcutiny_plugin.h
+++ b/trunk/kernel/rcutiny_plugin.h
@@ -312,8 +312,8 @@ static int rcu_boost(void)
rt_mutex_lock(&mtx);
rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
- return ACCESS_ONCE(rcu_preempt_ctrlblk.boost_tasks) != NULL ||
- ACCESS_ONCE(rcu_preempt_ctrlblk.exp_tasks) != NULL;
+ return rcu_preempt_ctrlblk.boost_tasks != NULL ||
+ rcu_preempt_ctrlblk.exp_tasks != NULL;
}
/*
@@ -885,19 +885,6 @@ static void invoke_rcu_callbacks(void)
wake_up(&rcu_kthread_wq);
}
-#ifdef CONFIG_RCU_TRACE
-
-/*
- * Is the current CPU running the RCU-callbacks kthread?
- * Caller must have preemption disabled.
- */
-static bool rcu_is_callbacks_kthread(void)
-{
- return rcu_kthread_task == current;
-}
-
-#endif /* #ifdef CONFIG_RCU_TRACE */
-
/*
* This kthread invokes RCU callbacks whose grace periods have
* elapsed. It is awakened as needed, and takes the place of the
@@ -951,18 +938,6 @@ void invoke_rcu_callbacks(void)
raise_softirq(RCU_SOFTIRQ);
}
-#ifdef CONFIG_RCU_TRACE
-
-/*
- * There is no callback kthread, so this thread is never it.
- */
-static bool rcu_is_callbacks_kthread(void)
-{
- return false;
-}
-
-#endif /* #ifdef CONFIG_RCU_TRACE */
-
void rcu_init(void)
{
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
diff --git a/trunk/kernel/rcutorture.c b/trunk/kernel/rcutorture.c
index 88f17b8a3b1d..764825c2685c 100644
--- a/trunk/kernel/rcutorture.c
+++ b/trunk/kernel/rcutorture.c
@@ -61,11 +61,9 @@ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
static int stutter = 5; /* Start/stop testing interval (in sec) */
static int irqreader = 1; /* RCU readers from irq (timers). */
-static int fqs_duration; /* Duration of bursts (us), 0 to disable. */
-static int fqs_holdoff; /* Hold time within burst (us). */
+static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */
+static int fqs_holdoff = 0; /* Hold time within burst (us). */
static int fqs_stutter = 3; /* Wait time between bursts (s). */
-static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */
-static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */
static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */
static int test_boost_interval = 7; /* Interval between boost tests, seconds. */
static int test_boost_duration = 4; /* Duration of each boost test, seconds. */
@@ -93,10 +91,6 @@ module_param(fqs_holdoff, int, 0444);
MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
module_param(fqs_stutter, int, 0444);
MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
-module_param(onoff_interval, int, 0444);
-MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable");
-module_param(shutdown_secs, int, 0444);
-MODULE_PARM_DESC(shutdown_secs, "Shutdown time (s), zero to disable.");
module_param(test_boost, int, 0444);
MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
module_param(test_boost_interval, int, 0444);
@@ -125,10 +119,6 @@ static struct task_struct *shuffler_task;
static struct task_struct *stutter_task;
static struct task_struct *fqs_task;
static struct task_struct *boost_tasks[NR_CPUS];
-static struct task_struct *shutdown_task;
-#ifdef CONFIG_HOTPLUG_CPU
-static struct task_struct *onoff_task;
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
#define RCU_TORTURE_PIPE_LEN 10
@@ -159,10 +149,6 @@ static long n_rcu_torture_boost_rterror;
static long n_rcu_torture_boost_failure;
static long n_rcu_torture_boosts;
static long n_rcu_torture_timers;
-static long n_offline_attempts;
-static long n_offline_successes;
-static long n_online_attempts;
-static long n_online_successes;
static struct list_head rcu_torture_removed;
static cpumask_var_t shuffle_tmp_mask;
@@ -174,8 +160,6 @@ static int stutter_pause_test;
#define RCUTORTURE_RUNNABLE_INIT 0
#endif
int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
-module_param(rcutorture_runnable, int, 0444);
-MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot");
#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
#define rcu_can_boost() 1
@@ -183,7 +167,6 @@ MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot");
#define rcu_can_boost() 0
#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
-static unsigned long shutdown_time; /* jiffies to system shutdown. */
static unsigned long boost_starttime; /* jiffies of next boost test start. */
DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
/* and boost task create/destroy. */
@@ -199,9 +182,6 @@ static int fullstop = FULLSTOP_RMMOD;
*/
static DEFINE_MUTEX(fullstop_mutex);
-/* Forward reference. */
-static void rcu_torture_cleanup(void);
-
/*
* Detect and respond to a system shutdown.
*/
@@ -632,30 +612,6 @@ static struct rcu_torture_ops srcu_ops = {
.name = "srcu"
};
-static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl)
-{
- return srcu_read_lock_raw(&srcu_ctl);
-}
-
-static void srcu_torture_read_unlock_raw(int idx) __releases(&srcu_ctl)
-{
- srcu_read_unlock_raw(&srcu_ctl, idx);
-}
-
-static struct rcu_torture_ops srcu_raw_ops = {
- .init = srcu_torture_init,
- .cleanup = srcu_torture_cleanup,
- .readlock = srcu_torture_read_lock_raw,
- .read_delay = srcu_read_delay,
- .readunlock = srcu_torture_read_unlock_raw,
- .completed = srcu_torture_completed,
- .deferred_free = rcu_sync_torture_deferred_free,
- .sync = srcu_torture_synchronize,
- .cb_barrier = NULL,
- .stats = srcu_torture_stats,
- .name = "srcu_raw"
-};
-
static void srcu_torture_synchronize_expedited(void)
{
synchronize_srcu_expedited(&srcu_ctl);
@@ -957,18 +913,6 @@ rcu_torture_fakewriter(void *arg)
return 0;
}
-void rcutorture_trace_dump(void)
-{
- static atomic_t beenhere = ATOMIC_INIT(0);
-
- if (atomic_read(&beenhere))
- return;
- if (atomic_xchg(&beenhere, 1) != 0)
- return;
- do_trace_rcu_torture_read(cur_ops->name, (struct rcu_head *)~0UL);
- ftrace_dump(DUMP_ALL);
-}
-
/*
* RCU torture reader from timer handler. Dereferences rcu_torture_current,
* incrementing the corresponding element of the pipeline array. The
@@ -990,7 +934,6 @@ static void rcu_torture_timer(unsigned long unused)
rcu_read_lock_bh_held() ||
rcu_read_lock_sched_held() ||
srcu_read_lock_held(&srcu_ctl));
- do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
if (p == NULL) {
/* Leave because rcu_torture_writer is not yet underway */
cur_ops->readunlock(idx);
@@ -1008,8 +951,6 @@ static void rcu_torture_timer(unsigned long unused)
/* Should not happen, but... */
pipe_count = RCU_TORTURE_PIPE_LEN;
}
- if (pipe_count > 1)
- rcutorture_trace_dump();
__this_cpu_inc(rcu_torture_count[pipe_count]);
completed = cur_ops->completed() - completed;
if (completed > RCU_TORTURE_PIPE_LEN) {
@@ -1053,7 +994,6 @@ rcu_torture_reader(void *arg)
rcu_read_lock_bh_held() ||
rcu_read_lock_sched_held() ||
srcu_read_lock_held(&srcu_ctl));
- do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
if (p == NULL) {
/* Wait for rcu_torture_writer to get underway */
cur_ops->readunlock(idx);
@@ -1069,8 +1009,6 @@ rcu_torture_reader(void *arg)
/* Should not happen, but... */
pipe_count = RCU_TORTURE_PIPE_LEN;
}
- if (pipe_count > 1)
- rcutorture_trace_dump();
__this_cpu_inc(rcu_torture_count[pipe_count]);
completed = cur_ops->completed() - completed;
if (completed > RCU_TORTURE_PIPE_LEN) {
@@ -1118,8 +1056,7 @@ rcu_torture_printk(char *page)
cnt += sprintf(&page[cnt],
"rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
"rtmbe: %d rtbke: %ld rtbre: %ld "
- "rtbf: %ld rtb: %ld nt: %ld "
- "onoff: %ld/%ld:%ld/%ld",
+ "rtbf: %ld rtb: %ld nt: %ld",
rcu_torture_current,
rcu_torture_current_version,
list_empty(&rcu_torture_freelist),
@@ -1131,11 +1068,7 @@ rcu_torture_printk(char *page)
n_rcu_torture_boost_rterror,
n_rcu_torture_boost_failure,
n_rcu_torture_boosts,
- n_rcu_torture_timers,
- n_online_successes,
- n_online_attempts,
- n_offline_successes,
- n_offline_attempts);
+ n_rcu_torture_timers);
if (atomic_read(&n_rcu_torture_mberror) != 0 ||
n_rcu_torture_boost_ktrerror != 0 ||
n_rcu_torture_boost_rterror != 0 ||
@@ -1299,14 +1232,12 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag)
"shuffle_interval=%d stutter=%d irqreader=%d "
"fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
"test_boost=%d/%d test_boost_interval=%d "
- "test_boost_duration=%d shutdown_secs=%d "
- "onoff_interval=%d\n",
+ "test_boost_duration=%d\n",
torture_type, tag, nrealreaders, nfakewriters,
stat_interval, verbose, test_no_idle_hz, shuffle_interval,
stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
test_boost, cur_ops->can_boost,
- test_boost_interval, test_boost_duration, shutdown_secs,
- onoff_interval);
+ test_boost_interval, test_boost_duration);
}
static struct notifier_block rcutorture_shutdown_nb = {
@@ -1356,131 +1287,6 @@ static int rcutorture_booster_init(int cpu)
return 0;
}
-/*
- * Cause the rcutorture test to shutdown the system after the test has
- * run for the time specified by the shutdown_secs module parameter.
- */
-static int
-rcu_torture_shutdown(void *arg)
-{
- long delta;
- unsigned long jiffies_snap;
-
- VERBOSE_PRINTK_STRING("rcu_torture_shutdown task started");
- jiffies_snap = ACCESS_ONCE(jiffies);
- while (ULONG_CMP_LT(jiffies_snap, shutdown_time) &&
- !kthread_should_stop()) {
- delta = shutdown_time - jiffies_snap;
- if (verbose)
- printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_shutdown task: %lu "
- "jiffies remaining\n",
- torture_type, delta);
- schedule_timeout_interruptible(delta);
- jiffies_snap = ACCESS_ONCE(jiffies);
- }
- if (kthread_should_stop()) {
- VERBOSE_PRINTK_STRING("rcu_torture_shutdown task stopping");
- return 0;
- }
-
- /* OK, shut down the system. */
-
- VERBOSE_PRINTK_STRING("rcu_torture_shutdown task shutting down system");
- shutdown_task = NULL; /* Avoid self-kill deadlock. */
- rcu_torture_cleanup(); /* Get the success/failure message. */
- kernel_power_off(); /* Shut down the system. */
- return 0;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/*
- * Execute random CPU-hotplug operations at the interval specified
- * by the onoff_interval.
- */
-static int
-rcu_torture_onoff(void *arg)
-{
- int cpu;
- int maxcpu = -1;
- DEFINE_RCU_RANDOM(rand);
-
- VERBOSE_PRINTK_STRING("rcu_torture_onoff task started");
- for_each_online_cpu(cpu)
- maxcpu = cpu;
- WARN_ON(maxcpu < 0);
- while (!kthread_should_stop()) {
- cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1);
- if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) {
- if (verbose)
- printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_onoff task: offlining %d\n",
- torture_type, cpu);
- n_offline_attempts++;
- if (cpu_down(cpu) == 0) {
- if (verbose)
- printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_onoff task: "
- "offlined %d\n",
- torture_type, cpu);
- n_offline_successes++;
- }
- } else if (cpu_is_hotpluggable(cpu)) {
- if (verbose)
- printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_onoff task: onlining %d\n",
- torture_type, cpu);
- n_online_attempts++;
- if (cpu_up(cpu) == 0) {
- if (verbose)
- printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_onoff task: "
- "onlined %d\n",
- torture_type, cpu);
- n_online_successes++;
- }
- }
- schedule_timeout_interruptible(onoff_interval * HZ);
- }
- VERBOSE_PRINTK_STRING("rcu_torture_onoff task stopping");
- return 0;
-}
-
-static int
-rcu_torture_onoff_init(void)
-{
- if (onoff_interval <= 0)
- return 0;
- onoff_task = kthread_run(rcu_torture_onoff, NULL, "rcu_torture_onoff");
- if (IS_ERR(onoff_task)) {
- onoff_task = NULL;
- return PTR_ERR(onoff_task);
- }
- return 0;
-}
-
-static void rcu_torture_onoff_cleanup(void)
-{
- if (onoff_task == NULL)
- return;
- VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task");
- kthread_stop(onoff_task);
-}
-
-#else /* #ifdef CONFIG_HOTPLUG_CPU */
-
-static void
-rcu_torture_onoff_init(void)
-{
-}
-
-static void rcu_torture_onoff_cleanup(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
-
static int rcutorture_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
@@ -1585,11 +1391,6 @@ rcu_torture_cleanup(void)
for_each_possible_cpu(i)
rcutorture_booster_cleanup(i);
}
- if (shutdown_task != NULL) {
- VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task");
- kthread_stop(shutdown_task);
- }
- rcu_torture_onoff_cleanup();
/* Wait for all RCU callbacks to fire. */
@@ -1615,7 +1416,7 @@ rcu_torture_init(void)
static struct rcu_torture_ops *torture_ops[] =
{ &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
&rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
- &srcu_ops, &srcu_raw_ops, &srcu_expedited_ops,
+ &srcu_ops, &srcu_expedited_ops,
&sched_ops, &sched_sync_ops, &sched_expedited_ops, };
mutex_lock(&fullstop_mutex);
@@ -1806,18 +1607,6 @@ rcu_torture_init(void)
}
}
}
- if (shutdown_secs > 0) {
- shutdown_time = jiffies + shutdown_secs * HZ;
- shutdown_task = kthread_run(rcu_torture_shutdown, NULL,
- "rcu_torture_shutdown");
- if (IS_ERR(shutdown_task)) {
- firsterr = PTR_ERR(shutdown_task);
- VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown");
- shutdown_task = NULL;
- goto unwind;
- }
- }
- rcu_torture_onoff_init();
register_reboot_notifier(&rcutorture_shutdown_nb);
rcutorture_record_test_transition();
mutex_unlock(&fullstop_mutex);
diff --git a/trunk/kernel/rcutree.c b/trunk/kernel/rcutree.c
index 6c4a6722abfd..6b76d812740c 100644
--- a/trunk/kernel/rcutree.c
+++ b/trunk/kernel/rcutree.c
@@ -69,7 +69,7 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
NUM_RCU_LVL_3, \
NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
}, \
- .fqs_state = RCU_GP_IDLE, \
+ .signaled = RCU_GP_IDLE, \
.gpnum = -300, \
.completed = -300, \
.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \
@@ -195,10 +195,12 @@ void rcu_note_context_switch(int cpu)
}
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
+#ifdef CONFIG_NO_HZ
DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
- .dynticks_nesting = DYNTICK_TASK_NESTING,
+ .dynticks_nesting = 1,
.dynticks = ATOMIC_INIT(1),
};
+#endif /* #ifdef CONFIG_NO_HZ */
static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
static int qhimark = 10000; /* If this many pending, ignore blimit. */
@@ -326,11 +328,11 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
return 1;
}
- /*
- * The CPU is online, so send it a reschedule IPI. This forces
- * it through the scheduler, and (inefficiently) also handles cases
- * where idle loops fail to inform RCU about the CPU being idle.
- */
+ /* If preemptible RCU, no point in sending reschedule IPI. */
+ if (rdp->preemptible)
+ return 0;
+
+ /* The CPU is online, so send it a reschedule IPI. */
if (rdp->cpu != smp_processor_id())
smp_send_reschedule(rdp->cpu);
else
@@ -341,181 +343,59 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
#endif /* #ifdef CONFIG_SMP */
-/*
- * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
- *
- * If the new value of the ->dynticks_nesting counter now is zero,
- * we really have entered idle, and must do the appropriate accounting.
- * The caller must have disabled interrupts.
- */
-static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
-{
- trace_rcu_dyntick("Start", oldval, 0);
- if (!is_idle_task(current)) {
- struct task_struct *idle = idle_task(smp_processor_id());
-
- trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
- ftrace_dump(DUMP_ALL);
- WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
- current->pid, current->comm,
- idle->pid, idle->comm); /* must be idle task! */
- }
- rcu_prepare_for_idle(smp_processor_id());
- /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
- smp_mb__before_atomic_inc(); /* See above. */
- atomic_inc(&rdtp->dynticks);
- smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
- WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
-}
+#ifdef CONFIG_NO_HZ
/**
- * rcu_idle_enter - inform RCU that current CPU is entering idle
+ * rcu_enter_nohz - inform RCU that current CPU is entering nohz
*
- * Enter idle mode, in other words, -leave- the mode in which RCU
+ * Enter nohz mode, in other words, -leave- the mode in which RCU
* read-side critical sections can occur. (Though RCU read-side
- * critical sections can occur in irq handlers in idle, a possibility
- * handled by irq_enter() and irq_exit().)
- *
- * We crowbar the ->dynticks_nesting field to zero to allow for
- * the possibility of usermode upcalls having messed up our count
- * of interrupt nesting level during the prior busy period.
+ * critical sections can occur in irq handlers in nohz mode, a possibility
+ * handled by rcu_irq_enter() and rcu_irq_exit()).
*/
-void rcu_idle_enter(void)
+void rcu_enter_nohz(void)
{
unsigned long flags;
- long long oldval;
struct rcu_dynticks *rdtp;
local_irq_save(flags);
rdtp = &__get_cpu_var(rcu_dynticks);
- oldval = rdtp->dynticks_nesting;
- rdtp->dynticks_nesting = 0;
- rcu_idle_enter_common(rdtp, oldval);
+ if (--rdtp->dynticks_nesting) {
+ local_irq_restore(flags);
+ return;
+ }
+ trace_rcu_dyntick("Start");
+ /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
+ smp_mb__before_atomic_inc(); /* See above. */
+ atomic_inc(&rdtp->dynticks);
+ smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
+ WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
local_irq_restore(flags);
}
-/**
- * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
- *
- * Exit from an interrupt handler, which might possibly result in entering
- * idle mode, in other words, leaving the mode in which read-side critical
- * sections can occur.
- *
- * This code assumes that the idle loop never does anything that might
- * result in unbalanced calls to irq_enter() and irq_exit(). If your
- * architecture violates this assumption, RCU will give you what you
- * deserve, good and hard. But very infrequently and irreproducibly.
- *
- * Use things like work queues to work around this limitation.
+/*
+ * rcu_exit_nohz - inform RCU that current CPU is leaving nohz
*
- * You have been warned.
+ * Exit nohz mode, in other words, -enter- the mode in which RCU
+ * read-side critical sections normally occur.
*/
-void rcu_irq_exit(void)
+void rcu_exit_nohz(void)
{
unsigned long flags;
- long long oldval;
struct rcu_dynticks *rdtp;
local_irq_save(flags);
rdtp = &__get_cpu_var(rcu_dynticks);
- oldval = rdtp->dynticks_nesting;
- rdtp->dynticks_nesting--;
- WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
- if (rdtp->dynticks_nesting)
- trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting);
- else
- rcu_idle_enter_common(rdtp, oldval);
- local_irq_restore(flags);
-}
-
-/*
- * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle
- *
- * If the new value of the ->dynticks_nesting counter was previously zero,
- * we really have exited idle, and must do the appropriate accounting.
- * The caller must have disabled interrupts.
- */
-static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
-{
+ if (rdtp->dynticks_nesting++) {
+ local_irq_restore(flags);
+ return;
+ }
smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
atomic_inc(&rdtp->dynticks);
/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
smp_mb__after_atomic_inc(); /* See above. */
WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
- rcu_cleanup_after_idle(smp_processor_id());
- trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
- if (!is_idle_task(current)) {
- struct task_struct *idle = idle_task(smp_processor_id());
-
- trace_rcu_dyntick("Error on exit: not idle task",
- oldval, rdtp->dynticks_nesting);
- ftrace_dump(DUMP_ALL);
- WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
- current->pid, current->comm,
- idle->pid, idle->comm); /* must be idle task! */
- }
-}
-
-/**
- * rcu_idle_exit - inform RCU that current CPU is leaving idle
- *
- * Exit idle mode, in other words, -enter- the mode in which RCU
- * read-side critical sections can occur.
- *
- * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to
- * allow for the possibility of usermode upcalls messing up our count
- * of interrupt nesting level during the busy period that is just
- * now starting.
- */
-void rcu_idle_exit(void)
-{
- unsigned long flags;
- struct rcu_dynticks *rdtp;
- long long oldval;
-
- local_irq_save(flags);
- rdtp = &__get_cpu_var(rcu_dynticks);
- oldval = rdtp->dynticks_nesting;
- WARN_ON_ONCE(oldval != 0);
- rdtp->dynticks_nesting = DYNTICK_TASK_NESTING;
- rcu_idle_exit_common(rdtp, oldval);
- local_irq_restore(flags);
-}
-
-/**
- * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
- *
- * Enter an interrupt handler, which might possibly result in exiting
- * idle mode, in other words, entering the mode in which read-side critical
- * sections can occur.
- *
- * Note that the Linux kernel is fully capable of entering an interrupt
- * handler that it never exits, for example when doing upcalls to
- * user mode! This code assumes that the idle loop never does upcalls to
- * user mode. If your architecture does do upcalls from the idle loop (or
- * does anything else that results in unbalanced calls to the irq_enter()
- * and irq_exit() functions), RCU will give you what you deserve, good
- * and hard. But very infrequently and irreproducibly.
- *
- * Use things like work queues to work around this limitation.
- *
- * You have been warned.
- */
-void rcu_irq_enter(void)
-{
- unsigned long flags;
- struct rcu_dynticks *rdtp;
- long long oldval;
-
- local_irq_save(flags);
- rdtp = &__get_cpu_var(rcu_dynticks);
- oldval = rdtp->dynticks_nesting;
- rdtp->dynticks_nesting++;
- WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
- if (oldval)
- trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting);
- else
- rcu_idle_exit_common(rdtp, oldval);
+ trace_rcu_dyntick("End");
local_irq_restore(flags);
}
@@ -562,37 +442,27 @@ void rcu_nmi_exit(void)
WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
}
-#ifdef CONFIG_PROVE_RCU
-
/**
- * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
+ * rcu_irq_enter - inform RCU of entry to hard irq context
*
- * If the current CPU is in its idle loop and is neither in an interrupt
- * or NMI handler, return true.
+ * If the CPU was idle with dynamic ticks active, this updates the
+ * rdtp->dynticks to let the RCU handling know that the CPU is active.
*/
-int rcu_is_cpu_idle(void)
+void rcu_irq_enter(void)
{
- int ret;
-
- preempt_disable();
- ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
- preempt_enable();
- return ret;
+ rcu_exit_nohz();
}
-EXPORT_SYMBOL(rcu_is_cpu_idle);
-
-#endif /* #ifdef CONFIG_PROVE_RCU */
/**
- * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
+ * rcu_irq_exit - inform RCU of exit from hard irq context
*
- * If the current CPU is idle or running at a first-level (not nested)
- * interrupt from idle, return true. The caller must have at least
- * disabled preemption.
+ * If the CPU was idle with dynamic ticks active, update the rdp->dynticks
+ * to put let the RCU handling be aware that the CPU is going back to idle
+ * with no ticks.
*/
-int rcu_is_cpu_rrupt_from_idle(void)
+void rcu_irq_exit(void)
{
- return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
+ rcu_enter_nohz();
}
#ifdef CONFIG_SMP
@@ -605,7 +475,7 @@ int rcu_is_cpu_rrupt_from_idle(void)
static int dyntick_save_progress_counter(struct rcu_data *rdp)
{
rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
- return (rdp->dynticks_snap & 0x1) == 0;
+ return 0;
}
/*
@@ -642,6 +512,26 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
#endif /* #ifdef CONFIG_SMP */
+#else /* #ifdef CONFIG_NO_HZ */
+
+#ifdef CONFIG_SMP
+
+static int dyntick_save_progress_counter(struct rcu_data *rdp)
+{
+ return 0;
+}
+
+static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
+{
+ return rcu_implicit_offline_qs(rdp);
+}
+
+#endif /* #ifdef CONFIG_SMP */
+
+#endif /* #else #ifdef CONFIG_NO_HZ */
+
+int rcu_cpu_stall_suppress __read_mostly;
+
static void record_gp_stall_check_time(struct rcu_state *rsp)
{
rsp->gp_start = jiffies;
@@ -976,8 +866,8 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
/* Advance to a new grace period and initialize state. */
rsp->gpnum++;
trace_rcu_grace_period(rsp->name, rsp->gpnum, "start");
- WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT);
- rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */
+ WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT);
+ rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
record_gp_stall_check_time(rsp);
@@ -987,7 +877,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
rnp->qsmask = rnp->qsmaskinit;
rnp->gpnum = rsp->gpnum;
rnp->completed = rsp->completed;
- rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */
+ rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
rcu_start_gp_per_cpu(rsp, rnp, rdp);
rcu_preempt_boost_start_gp(rnp);
trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
@@ -1037,7 +927,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
rnp = rcu_get_root(rsp);
raw_spin_lock(&rnp->lock); /* irqs already disabled. */
- rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
+ rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
}
@@ -1101,7 +991,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
rsp->completed = rsp->gpnum; /* Declare the grace period complete. */
trace_rcu_grace_period(rsp->name, rsp->completed, "end");
- rsp->fqs_state = RCU_GP_IDLE;
+ rsp->signaled = RCU_GP_IDLE;
rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
}
@@ -1331,7 +1221,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
else
raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (need_report & RCU_OFL_TASKS_EXP_GP)
- rcu_report_exp_rnp(rsp, rnp, true);
+ rcu_report_exp_rnp(rsp, rnp);
rcu_node_kthread_setaffinity(rnp, -1);
}
@@ -1373,9 +1263,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
/* If no callbacks are ready, just return.*/
if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
trace_rcu_batch_start(rsp->name, 0, 0);
- trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
- need_resched(), is_idle_task(current),
- rcu_is_callbacks_kthread());
+ trace_rcu_batch_end(rsp->name, 0);
return;
}
@@ -1403,17 +1291,12 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
debug_rcu_head_unqueue(list);
__rcu_reclaim(rsp->name, list);
list = next;
- /* Stop only if limit reached and CPU has something to do. */
- if (++count >= bl &&
- (need_resched() ||
- (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
+ if (++count >= bl)
break;
}
local_irq_save(flags);
- trace_rcu_batch_end(rsp->name, count, !!list, need_resched(),
- is_idle_task(current),
- rcu_is_callbacks_kthread());
+ trace_rcu_batch_end(rsp->name, count);
/* Update count, and requeue any remaining callbacks. */
rdp->qlen -= count;
@@ -1451,14 +1334,16 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
* (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
* Also schedule RCU core processing.
*
- * This function must be called from hardirq context. It is normally
+ * This function must be called with hardirqs disabled. It is normally
* invoked from the scheduling-clock interrupt. If rcu_pending returns
* false, there is no point in invoking rcu_check_callbacks().
*/
void rcu_check_callbacks(int cpu, int user)
{
trace_rcu_utilization("Start scheduler-tick");
- if (user || rcu_is_cpu_rrupt_from_idle()) {
+ if (user ||
+ (idle_cpu(cpu) && rcu_scheduler_active &&
+ !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
/*
* Get here if this CPU took its interrupt from user
@@ -1572,7 +1457,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
goto unlock_fqs_ret; /* no GP in progress, time updated. */
}
rsp->fqs_active = 1;
- switch (rsp->fqs_state) {
+ switch (rsp->signaled) {
case RCU_GP_IDLE:
case RCU_GP_INIT:
@@ -1588,7 +1473,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
force_qs_rnp(rsp, dyntick_save_progress_counter);
raw_spin_lock(&rnp->lock); /* irqs already disabled */
if (rcu_gp_in_progress(rsp))
- rsp->fqs_state = RCU_FORCE_QS;
+ rsp->signaled = RCU_FORCE_QS;
break;
case RCU_FORCE_QS:
@@ -1927,7 +1812,7 @@ static int rcu_pending(int cpu)
* by the current CPU, even if none need be done immediately, returning
* 1 if so.
*/
-static int rcu_cpu_has_callbacks(int cpu)
+static int rcu_needs_cpu_quick_check(int cpu)
{
/* RCU callbacks either ready or pending? */
return per_cpu(rcu_sched_data, cpu).nxtlist ||
@@ -2028,9 +1913,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
for (i = 0; i < RCU_NEXT_SIZE; i++)
rdp->nxttail[i] = &rdp->nxtlist;
rdp->qlen = 0;
+#ifdef CONFIG_NO_HZ
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
- WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING);
- WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
+#endif /* #ifdef CONFIG_NO_HZ */
rdp->cpu = cpu;
rdp->rsp = rsp;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -2057,10 +1942,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
rdp->qlen_last_fqs_check = 0;
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->blimit = blimit;
- rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING;
- atomic_set(&rdp->dynticks->dynticks,
- (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
- rcu_prepare_for_idle_init(cpu);
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
/*
@@ -2142,7 +2023,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
rcu_send_cbs_to_online(&rcu_bh_state);
rcu_send_cbs_to_online(&rcu_sched_state);
rcu_preempt_send_cbs_to_online();
- rcu_cleanup_after_idle(cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
diff --git a/trunk/kernel/rcutree.h b/trunk/kernel/rcutree.h
index fddff92d6676..849ce9ec51fe 100644
--- a/trunk/kernel/rcutree.h
+++ b/trunk/kernel/rcutree.h
@@ -84,10 +84,9 @@
* Dynticks per-CPU state.
*/
struct rcu_dynticks {
- long long dynticks_nesting; /* Track irq/process nesting level. */
- /* Process level is worth LLONG_MAX/2. */
- int dynticks_nmi_nesting; /* Track NMI nesting level. */
- atomic_t dynticks; /* Even value for idle, else odd. */
+ int dynticks_nesting; /* Track irq/process nesting level. */
+ int dynticks_nmi_nesting; /* Track NMI nesting level. */
+ atomic_t dynticks; /* Even value for dynticks-idle, else odd. */
};
/* RCU's kthread states for tracing. */
@@ -275,12 +274,16 @@ struct rcu_data {
/* did other CPU force QS recently? */
long blimit; /* Upper limit on a processed batch */
+#ifdef CONFIG_NO_HZ
/* 3) dynticks interface. */
struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
int dynticks_snap; /* Per-GP tracking for dynticks. */
+#endif /* #ifdef CONFIG_NO_HZ */
/* 4) reasons this CPU needed to be kicked by force_quiescent_state */
+#ifdef CONFIG_NO_HZ
unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
+#endif /* #ifdef CONFIG_NO_HZ */
unsigned long offline_fqs; /* Kicked due to being offline. */
unsigned long resched_ipi; /* Sent a resched IPI. */
@@ -299,12 +302,16 @@ struct rcu_data {
struct rcu_state *rsp;
};
-/* Values for fqs_state field in struct rcu_state. */
+/* Values for signaled field in struct rcu_state. */
#define RCU_GP_IDLE 0 /* No grace period in progress. */
#define RCU_GP_INIT 1 /* Grace period being initialized. */
#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
#define RCU_FORCE_QS 3 /* Need to force quiescent state. */
+#ifdef CONFIG_NO_HZ
#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
+#else /* #ifdef CONFIG_NO_HZ */
+#define RCU_SIGNAL_INIT RCU_FORCE_QS
+#endif /* #else #ifdef CONFIG_NO_HZ */
#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
@@ -354,7 +361,7 @@ struct rcu_state {
/* The following fields are guarded by the root rcu_node's lock. */
- u8 fqs_state ____cacheline_internodealigned_in_smp;
+ u8 signaled ____cacheline_internodealigned_in_smp;
/* Force QS state. */
u8 fqs_active; /* force_quiescent_state() */
/* is running. */
@@ -444,8 +451,7 @@ static void rcu_preempt_check_callbacks(int cpu);
static void rcu_preempt_process_callbacks(void);
void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
-static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
- bool wake);
+static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
static int rcu_preempt_pending(int cpu);
static int rcu_preempt_needs_cpu(int cpu);
@@ -455,7 +461,6 @@ static void __init __rcu_init_preempt(void);
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
static void invoke_rcu_callbacks_kthread(void);
-static bool rcu_is_callbacks_kthread(void);
#ifdef CONFIG_RCU_BOOST
static void rcu_preempt_do_callbacks(void);
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
@@ -468,8 +473,5 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg);
#endif /* #ifdef CONFIG_RCU_BOOST */
static void rcu_cpu_kthread_setrt(int cpu, int to_rt);
static void __cpuinit rcu_prepare_kthreads(int cpu);
-static void rcu_prepare_for_idle_init(int cpu);
-static void rcu_cleanup_after_idle(int cpu);
-static void rcu_prepare_for_idle(int cpu);
#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/trunk/kernel/rcutree_plugin.h b/trunk/kernel/rcutree_plugin.h
index 8bb35d73e1f9..4b9b9f8a4184 100644
--- a/trunk/kernel/rcutree_plugin.h
+++ b/trunk/kernel/rcutree_plugin.h
@@ -312,7 +312,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
{
int empty;
int empty_exp;
- int empty_exp_now;
unsigned long flags;
struct list_head *np;
#ifdef CONFIG_RCU_BOOST
@@ -383,10 +382,8 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
/*
* If this was the last task on the current list, and if
* we aren't waiting on any CPUs, report the quiescent state.
- * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
- * so we must take a snapshot of the expedited state.
+ * Note that rcu_report_unblock_qs_rnp() releases rnp->lock.
*/
- empty_exp_now = !rcu_preempted_readers_exp(rnp);
if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {
trace_rcu_quiescent_state_report("preempt_rcu",
rnp->gpnum,
@@ -409,8 +406,8 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
* If this was the last task on the expedited lists,
* then we need to report up the rcu_node hierarchy.
*/
- if (!empty_exp && empty_exp_now)
- rcu_report_exp_rnp(&rcu_preempt_state, rnp, true);
+ if (!empty_exp && !rcu_preempted_readers_exp(rnp))
+ rcu_report_exp_rnp(&rcu_preempt_state, rnp);
} else {
local_irq_restore(flags);
}
@@ -732,13 +729,9 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
* recursively up the tree. (Calm down, calm down, we do the recursion
* iteratively!)
*
- * Most callers will set the "wake" flag, but the task initiating the
- * expedited grace period need not wake itself.
- *
* Caller must hold sync_rcu_preempt_exp_mutex.
*/
-static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
- bool wake)
+static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
{
unsigned long flags;
unsigned long mask;
@@ -751,8 +744,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
}
if (rnp->parent == NULL) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
- if (wake)
- wake_up(&sync_rcu_preempt_exp_wq);
+ wake_up(&sync_rcu_preempt_exp_wq);
break;
}
mask = rnp->grpmask;
@@ -785,7 +777,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
must_wait = 1;
}
if (!must_wait)
- rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */
+ rcu_report_exp_rnp(rsp, rnp);
}
/*
@@ -1077,9 +1069,9 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
* report on tasks preempted in RCU read-side critical sections during
* expedited RCU grace periods.
*/
-static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
- bool wake)
+static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
{
+ return;
}
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -1165,6 +1157,8 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp)
#endif /* #else #ifdef CONFIG_RCU_TRACE */
+static struct lock_class_key rcu_boost_class;
+
/*
* Carry out RCU priority boosting on the task indicated by ->exp_tasks
* or ->boost_tasks, advancing the pointer to the next task in the
@@ -1227,13 +1221,15 @@ static int rcu_boost(struct rcu_node *rnp)
*/
t = container_of(tb, struct task_struct, rcu_node_entry);
rt_mutex_init_proxy_locked(&mtx, t);
+ /* Avoid lockdep false positives. This rt_mutex is its own thing. */
+ lockdep_set_class_and_name(&mtx.wait_lock, &rcu_boost_class,
+ "rcu_boost_mutex");
t->rcu_boost_mutex = &mtx;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
- return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
- ACCESS_ONCE(rnp->boost_tasks) != NULL;
+ return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL;
}
/*
@@ -1332,15 +1328,6 @@ static void invoke_rcu_callbacks_kthread(void)
local_irq_restore(flags);
}
-/*
- * Is the current CPU running the RCU-callbacks kthread?
- * Caller must have preemption disabled.
- */
-static bool rcu_is_callbacks_kthread(void)
-{
- return __get_cpu_var(rcu_cpu_kthread_task) == current;
-}
-
/*
* Set the affinity of the boost kthread. The CPU-hotplug locks are
* held, so no one should be messing with the existence of the boost
@@ -1785,11 +1772,6 @@ static void invoke_rcu_callbacks_kthread(void)
WARN_ON_ONCE(1);
}
-static bool rcu_is_callbacks_kthread(void)
-{
- return false;
-}
-
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
{
}
@@ -1925,7 +1907,7 @@ void synchronize_sched_expedited(void)
* grace period works for us.
*/
get_online_cpus();
- snap = atomic_read(&sync_sched_expedited_started);
+ snap = atomic_read(&sync_sched_expedited_started) - 1;
smp_mb(); /* ensure read is before try_stop_cpus(). */
}
@@ -1957,243 +1939,88 @@ EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
* 1 if so. This function is part of the RCU implementation; it is -not-
* an exported member of the RCU API.
*
- * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
- * any flavor of RCU.
+ * Because we have preemptible RCU, just check whether this CPU needs
+ * any flavor of RCU. Do not chew up lots of CPU cycles with preemption
+ * disabled in a most-likely vain attempt to cause RCU not to need this CPU.
*/
int rcu_needs_cpu(int cpu)
{
- return rcu_cpu_has_callbacks(cpu);
-}
-
-/*
- * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it.
- */
-static void rcu_prepare_for_idle_init(int cpu)
-{
-}
-
-/*
- * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
- * after it.
- */
-static void rcu_cleanup_after_idle(int cpu)
-{
-}
-
-/*
- * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y,
- * is nothing.
- */
-static void rcu_prepare_for_idle(int cpu)
-{
+ return rcu_needs_cpu_quick_check(cpu);
}
#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
-/*
- * This code is invoked when a CPU goes idle, at which point we want
- * to have the CPU do everything required for RCU so that it can enter
- * the energy-efficient dyntick-idle mode. This is handled by a
- * state machine implemented by rcu_prepare_for_idle() below.
- *
- * The following three proprocessor symbols control this state machine:
- *
- * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt
- * to satisfy RCU. Beyond this point, it is better to incur a periodic
- * scheduling-clock interrupt than to loop through the state machine
- * at full power.
- * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are
- * optional if RCU does not need anything immediately from this
- * CPU, even if this CPU still has RCU callbacks queued. The first
- * times through the state machine are mandatory: we need to give
- * the state machine a chance to communicate a quiescent state
- * to the RCU core.
- * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
- * to sleep in dyntick-idle mode with RCU callbacks pending. This
- * is sized to be roughly one RCU grace period. Those energy-efficiency
- * benchmarkers who might otherwise be tempted to set this to a large
- * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
- * system. And if you are -that- concerned about energy efficiency,
- * just power the system down and be done with it!
- *
- * The values below work well in practice. If future workloads require
- * adjustment, they can be converted into kernel config parameters, though
- * making the state machine smarter might be a better option.
- */
-#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */
-#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */
-#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */
-
+#define RCU_NEEDS_CPU_FLUSHES 5
static DEFINE_PER_CPU(int, rcu_dyntick_drain);
static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
-static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer);
-static ktime_t rcu_idle_gp_wait;
-
-/*
- * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
- * callbacks on this CPU, (2) this CPU has not yet attempted to enter
- * dyntick-idle mode, or (3) this CPU is in the process of attempting to
- * enter dyntick-idle mode. Otherwise, if we have recently tried and failed
- * to enter dyntick-idle mode, we refuse to try to enter it. After all,
- * it is better to incur scheduling-clock interrupts than to spin
- * continuously for the same time duration!
- */
-int rcu_needs_cpu(int cpu)
-{
- /* If no callbacks, RCU doesn't need the CPU. */
- if (!rcu_cpu_has_callbacks(cpu))
- return 0;
- /* Otherwise, RCU needs the CPU only if it recently tried and failed. */
- return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies;
-}
-
-/*
- * Timer handler used to force CPU to start pushing its remaining RCU
- * callbacks in the case where it entered dyntick-idle mode with callbacks
- * pending. The hander doesn't really need to do anything because the
- * real work is done upon re-entry to idle, or by the next scheduling-clock
- * interrupt should idle not be re-entered.
- */
-static enum hrtimer_restart rcu_idle_gp_timer_func(struct hrtimer *hrtp)
-{
- trace_rcu_prep_idle("Timer");
- return HRTIMER_NORESTART;
-}
-
-/*
- * Initialize the timer used to pull CPUs out of dyntick-idle mode.
- */
-static void rcu_prepare_for_idle_init(int cpu)
-{
- static int firsttime = 1;
- struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu);
-
- hrtimer_init(hrtp, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- hrtp->function = rcu_idle_gp_timer_func;
- if (firsttime) {
- unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY);
-
- rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000);
- firsttime = 0;
- }
-}
-
-/*
- * Clean up for exit from idle. Because we are exiting from idle, there
- * is no longer any point to rcu_idle_gp_timer, so cancel it. This will
- * do nothing if this timer is not active, so just cancel it unconditionally.
- */
-static void rcu_cleanup_after_idle(int cpu)
-{
- hrtimer_cancel(&per_cpu(rcu_idle_gp_timer, cpu));
-}
/*
- * Check to see if any RCU-related work can be done by the current CPU,
- * and if so, schedule a softirq to get it done. This function is part
- * of the RCU implementation; it is -not- an exported member of the RCU API.
+ * Check to see if any future RCU-related work will need to be done
+ * by the current CPU, even if none need be done immediately, returning
+ * 1 if so. This function is part of the RCU implementation; it is -not-
+ * an exported member of the RCU API.
*
- * The idea is for the current CPU to clear out all work required by the
- * RCU core for the current grace period, so that this CPU can be permitted
- * to enter dyntick-idle mode. In some cases, it will need to be awakened
- * at the end of the grace period by whatever CPU ends the grace period.
- * This allows CPUs to go dyntick-idle more quickly, and to reduce the
- * number of wakeups by a modest integer factor.
+ * Because we are not supporting preemptible RCU, attempt to accelerate
+ * any current grace periods so that RCU no longer needs this CPU, but
+ * only if all other CPUs are already in dynticks-idle mode. This will
+ * allow the CPU cores to be powered down immediately, as opposed to after
+ * waiting many milliseconds for grace periods to elapse.
*
* Because it is not legal to invoke rcu_process_callbacks() with irqs
* disabled, we do one pass of force_quiescent_state(), then do a
* invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
* later. The per-cpu rcu_dyntick_drain variable controls the sequencing.
- *
- * The caller must have disabled interrupts.
*/
-static void rcu_prepare_for_idle(int cpu)
+int rcu_needs_cpu(int cpu)
{
- unsigned long flags;
-
- local_irq_save(flags);
-
- /*
- * If there are no callbacks on this CPU, enter dyntick-idle mode.
- * Also reset state to avoid prejudicing later attempts.
- */
- if (!rcu_cpu_has_callbacks(cpu)) {
- per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
- per_cpu(rcu_dyntick_drain, cpu) = 0;
- local_irq_restore(flags);
- trace_rcu_prep_idle("No callbacks");
- return;
- }
-
- /*
- * If in holdoff mode, just return. We will presumably have
- * refrained from disabling the scheduling-clock tick.
- */
- if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) {
- local_irq_restore(flags);
- trace_rcu_prep_idle("In holdoff");
- return;
+ int c = 0;
+ int snap;
+ int thatcpu;
+
+ /* Check for being in the holdoff period. */
+ if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies)
+ return rcu_needs_cpu_quick_check(cpu);
+
+ /* Don't bother unless we are the last non-dyntick-idle CPU. */
+ for_each_online_cpu(thatcpu) {
+ if (thatcpu == cpu)
+ continue;
+ snap = atomic_add_return(0, &per_cpu(rcu_dynticks,
+ thatcpu).dynticks);
+ smp_mb(); /* Order sampling of snap with end of grace period. */
+ if ((snap & 0x1) != 0) {
+ per_cpu(rcu_dyntick_drain, cpu) = 0;
+ per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
+ return rcu_needs_cpu_quick_check(cpu);
+ }
}
/* Check and update the rcu_dyntick_drain sequencing. */
if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
/* First time through, initialize the counter. */
- per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES;
- } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES &&
- !rcu_pending(cpu)) {
- /* Can we go dyntick-idle despite still having callbacks? */
- trace_rcu_prep_idle("Dyntick with callbacks");
- per_cpu(rcu_dyntick_drain, cpu) = 0;
- per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
- hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu),
- rcu_idle_gp_wait, HRTIMER_MODE_REL);
- return; /* Nothing more to do immediately. */
+ per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
} else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
/* We have hit the limit, so time to give up. */
per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
- local_irq_restore(flags);
- trace_rcu_prep_idle("Begin holdoff");
- invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */
- return;
+ return rcu_needs_cpu_quick_check(cpu);
}
- /*
- * Do one step of pushing the remaining RCU callbacks through
- * the RCU core state machine.
- */
-#ifdef CONFIG_TREE_PREEMPT_RCU
- if (per_cpu(rcu_preempt_data, cpu).nxtlist) {
- local_irq_restore(flags);
- rcu_preempt_qs(cpu);
- force_quiescent_state(&rcu_preempt_state, 0);
- local_irq_save(flags);
- }
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+ /* Do one step pushing remaining RCU callbacks through. */
if (per_cpu(rcu_sched_data, cpu).nxtlist) {
- local_irq_restore(flags);
rcu_sched_qs(cpu);
force_quiescent_state(&rcu_sched_state, 0);
- local_irq_save(flags);
+ c = c || per_cpu(rcu_sched_data, cpu).nxtlist;
}
if (per_cpu(rcu_bh_data, cpu).nxtlist) {
- local_irq_restore(flags);
rcu_bh_qs(cpu);
force_quiescent_state(&rcu_bh_state, 0);
- local_irq_save(flags);
+ c = c || per_cpu(rcu_bh_data, cpu).nxtlist;
}
- /*
- * If RCU callbacks are still pending, RCU still needs this CPU.
- * So try forcing the callbacks through the grace period.
- */
- if (rcu_cpu_has_callbacks(cpu)) {
- local_irq_restore(flags);
- trace_rcu_prep_idle("More callbacks");
+ /* If RCU callbacks are still pending, RCU still needs this CPU. */
+ if (c)
invoke_rcu_core();
- } else {
- local_irq_restore(flags);
- trace_rcu_prep_idle("Callbacks drained");
- }
+ return c;
}
#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
diff --git a/trunk/kernel/rcutree_trace.c b/trunk/kernel/rcutree_trace.c
index 654cfe67f0d1..9feffa4c0695 100644
--- a/trunk/kernel/rcutree_trace.c
+++ b/trunk/kernel/rcutree_trace.c
@@ -67,11 +67,13 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
rdp->completed, rdp->gpnum,
rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
rdp->qs_pending);
- seq_printf(m, " dt=%d/%llx/%d df=%lu",
+#ifdef CONFIG_NO_HZ
+ seq_printf(m, " dt=%d/%d/%d df=%lu",
atomic_read(&rdp->dynticks->dynticks),
rdp->dynticks->dynticks_nesting,
rdp->dynticks->dynticks_nmi_nesting,
rdp->dynticks_fqs);
+#endif /* #ifdef CONFIG_NO_HZ */
seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
seq_printf(m, " ql=%ld qs=%c%c%c%c",
rdp->qlen,
@@ -139,11 +141,13 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
rdp->completed, rdp->gpnum,
rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
rdp->qs_pending);
- seq_printf(m, ",%d,%llx,%d,%lu",
+#ifdef CONFIG_NO_HZ
+ seq_printf(m, ",%d,%d,%d,%lu",
atomic_read(&rdp->dynticks->dynticks),
rdp->dynticks->dynticks_nesting,
rdp->dynticks->dynticks_nmi_nesting,
rdp->dynticks_fqs);
+#endif /* #ifdef CONFIG_NO_HZ */
seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen,
".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
@@ -167,7 +171,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
static int show_rcudata_csv(struct seq_file *m, void *unused)
{
seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
+#ifdef CONFIG_NO_HZ
seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
+#endif /* #ifdef CONFIG_NO_HZ */
seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\"");
#ifdef CONFIG_RCU_BOOST
seq_puts(m, "\"kt\",\"ktl\"");
@@ -272,7 +278,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
gpnum = rsp->gpnum;
seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x "
"nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n",
- rsp->completed, gpnum, rsp->fqs_state,
+ rsp->completed, gpnum, rsp->signaled,
(long)(rsp->jiffies_force_qs - jiffies),
(int)(jiffies & 0xffff),
rsp->n_force_qs, rsp->n_force_qs_ngp,
diff --git a/trunk/kernel/rtmutex-debug.c b/trunk/kernel/rtmutex-debug.c
index 16502d3a71c8..8eafd1bd273e 100644
--- a/trunk/kernel/rtmutex-debug.c
+++ b/trunk/kernel/rtmutex-debug.c
@@ -101,7 +101,6 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
printk("\n============================================\n");
printk( "[ BUG: circular locking deadlock detected! ]\n");
- printk("%s\n", print_tainted());
printk( "--------------------------------------------\n");
printk("%s/%d is deadlocking current task %s/%d\n\n",
task->comm, task_pid_nr(task),
diff --git a/trunk/kernel/rtmutex.c b/trunk/kernel/rtmutex.c
index a242e691c993..f9d8482dd487 100644
--- a/trunk/kernel/rtmutex.c
+++ b/trunk/kernel/rtmutex.c
@@ -579,6 +579,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
struct rt_mutex_waiter *waiter)
{
int ret = 0;
+ int was_disabled;
for (;;) {
/* Try to acquire the lock: */
@@ -601,10 +602,17 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
raw_spin_unlock(&lock->wait_lock);
+ was_disabled = irqs_disabled();
+ if (was_disabled)
+ local_irq_enable();
+
debug_rt_mutex_print_deadlock(waiter);
schedule_rt_mutex(lock);
+ if (was_disabled)
+ local_irq_disable();
+
raw_spin_lock(&lock->wait_lock);
set_current_state(state);
}
diff --git a/trunk/kernel/sched/core.c b/trunk/kernel/sched.c
similarity index 79%
rename from trunk/kernel/sched/core.c
rename to trunk/kernel/sched.c
index 457c881873cb..d6b149ccf925 100644
--- a/trunk/kernel/sched/core.c
+++ b/trunk/kernel/sched.c
@@ -1,5 +1,5 @@
/*
- * kernel/sched/core.c
+ * kernel/sched.c
*
* Kernel scheduler and related syscalls
*
@@ -56,6 +56,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -74,17 +75,129 @@
#include
#include
+#include
#ifdef CONFIG_PARAVIRT
#include
#endif
-#include "sched.h"
-#include "../workqueue_sched.h"
+#include "sched_cpupri.h"
+#include "workqueue_sched.h"
+#include "sched_autogroup.h"
#define CREATE_TRACE_POINTS
#include
-void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
+/*
+ * Convert user-nice values [ -20 ... 0 ... 19 ]
+ * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
+ * and back.
+ */
+#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
+#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
+#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
+
+/*
+ * 'User priority' is the nice value converted to something we
+ * can work with better when scaling various scheduler parameters,
+ * it's a [ 0 ... 39 ] range.
+ */
+#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
+#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
+#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
+
+/*
+ * Helpers for converting nanosecond timing to jiffy resolution
+ */
+#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
+
+#define NICE_0_LOAD SCHED_LOAD_SCALE
+#define NICE_0_SHIFT SCHED_LOAD_SHIFT
+
+/*
+ * These are the 'tuning knobs' of the scheduler:
+ *
+ * default timeslice is 100 msecs (used only for SCHED_RR tasks).
+ * Timeslices get refilled after they expire.
+ */
+#define DEF_TIMESLICE (100 * HZ / 1000)
+
+/*
+ * single value that denotes runtime == period, ie unlimited time.
+ */
+#define RUNTIME_INF ((u64)~0ULL)
+
+static inline int rt_policy(int policy)
+{
+ if (policy == SCHED_FIFO || policy == SCHED_RR)
+ return 1;
+ return 0;
+}
+
+static inline int task_has_rt_policy(struct task_struct *p)
+{
+ return rt_policy(p->policy);
+}
+
+/*
+ * This is the priority-queue data structure of the RT scheduling class:
+ */
+struct rt_prio_array {
+ DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
+ struct list_head queue[MAX_RT_PRIO];
+};
+
+struct rt_bandwidth {
+ /* nests inside the rq lock: */
+ raw_spinlock_t rt_runtime_lock;
+ ktime_t rt_period;
+ u64 rt_runtime;
+ struct hrtimer rt_period_timer;
+};
+
+static struct rt_bandwidth def_rt_bandwidth;
+
+static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
+
+static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
+{
+ struct rt_bandwidth *rt_b =
+ container_of(timer, struct rt_bandwidth, rt_period_timer);
+ ktime_t now;
+ int overrun;
+ int idle = 0;
+
+ for (;;) {
+ now = hrtimer_cb_get_time(timer);
+ overrun = hrtimer_forward(timer, now, rt_b->rt_period);
+
+ if (!overrun)
+ break;
+
+ idle = do_sched_rt_period_timer(rt_b, overrun);
+ }
+
+ return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
+}
+
+static
+void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
+{
+ rt_b->rt_period = ns_to_ktime(period);
+ rt_b->rt_runtime = runtime;
+
+ raw_spin_lock_init(&rt_b->rt_runtime_lock);
+
+ hrtimer_init(&rt_b->rt_period_timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ rt_b->rt_period_timer.function = sched_rt_period_timer;
+}
+
+static inline int rt_bandwidth_enabled(void)
+{
+ return sysctl_sched_rt_runtime >= 0;
+}
+
+static void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
{
unsigned long delta;
ktime_t soft, hard, now;
@@ -104,12 +217,580 @@ void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
}
}
-DEFINE_MUTEX(sched_domains_mutex);
-DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
+{
+ if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
+ return;
+
+ if (hrtimer_active(&rt_b->rt_period_timer))
+ return;
+
+ raw_spin_lock(&rt_b->rt_runtime_lock);
+ start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
+ raw_spin_unlock(&rt_b->rt_runtime_lock);
+}
+
+#ifdef CONFIG_RT_GROUP_SCHED
+static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
+{
+ hrtimer_cancel(&rt_b->rt_period_timer);
+}
+#endif
+
+/*
+ * sched_domains_mutex serializes calls to init_sched_domains,
+ * detach_destroy_domains and partition_sched_domains.
+ */
+static DEFINE_MUTEX(sched_domains_mutex);
+
+#ifdef CONFIG_CGROUP_SCHED
+
+#include
+
+struct cfs_rq;
+
+static LIST_HEAD(task_groups);
+
+struct cfs_bandwidth {
+#ifdef CONFIG_CFS_BANDWIDTH
+ raw_spinlock_t lock;
+ ktime_t period;
+ u64 quota, runtime;
+ s64 hierarchal_quota;
+ u64 runtime_expires;
+
+ int idle, timer_active;
+ struct hrtimer period_timer, slack_timer;
+ struct list_head throttled_cfs_rq;
+
+ /* statistics */
+ int nr_periods, nr_throttled;
+ u64 throttled_time;
+#endif
+};
+
+/* task group related information */
+struct task_group {
+ struct cgroup_subsys_state css;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ /* schedulable entities of this group on each cpu */
+ struct sched_entity **se;
+ /* runqueue "owned" by this group on each cpu */
+ struct cfs_rq **cfs_rq;
+ unsigned long shares;
+
+ atomic_t load_weight;
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ struct sched_rt_entity **rt_se;
+ struct rt_rq **rt_rq;
+
+ struct rt_bandwidth rt_bandwidth;
+#endif
+
+ struct rcu_head rcu;
+ struct list_head list;
+
+ struct task_group *parent;
+ struct list_head siblings;
+ struct list_head children;
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+ struct autogroup *autogroup;
+#endif
+
+ struct cfs_bandwidth cfs_bandwidth;
+};
+
+/* task_group_lock serializes the addition/removal of task groups */
+static DEFINE_SPINLOCK(task_group_lock);
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+
+# define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
+
+/*
+ * A weight of 0 or 1 can cause arithmetics problems.
+ * A weight of a cfs_rq is the sum of weights of which entities
+ * are queued on this cfs_rq, so a weight of a entity should not be
+ * too large, so as the shares value of a task group.
+ * (The default weight is 1024 - so there's no practical
+ * limitation from this.)
+ */
+#define MIN_SHARES (1UL << 1)
+#define MAX_SHARES (1UL << 18)
+
+static int root_task_group_load = ROOT_TASK_GROUP_LOAD;
+#endif
+
+/* Default task group.
+ * Every task in system belong to this group at bootup.
+ */
+struct task_group root_task_group;
+
+#endif /* CONFIG_CGROUP_SCHED */
+
+/* CFS-related fields in a runqueue */
+struct cfs_rq {
+ struct load_weight load;
+ unsigned long nr_running, h_nr_running;
+
+ u64 exec_clock;
+ u64 min_vruntime;
+#ifndef CONFIG_64BIT
+ u64 min_vruntime_copy;
+#endif
+
+ struct rb_root tasks_timeline;
+ struct rb_node *rb_leftmost;
+
+ struct list_head tasks;
+ struct list_head *balance_iterator;
+
+ /*
+ * 'curr' points to currently running entity on this cfs_rq.
+ * It is set to NULL otherwise (i.e when none are currently running).
+ */
+ struct sched_entity *curr, *next, *last, *skip;
+
+#ifdef CONFIG_SCHED_DEBUG
+ unsigned int nr_spread_over;
+#endif
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
+
+ /*
+ * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
+ * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
+ * (like users, containers etc.)
+ *
+ * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
+ * list is used during load balance.
+ */
+ int on_list;
+ struct list_head leaf_cfs_rq_list;
+ struct task_group *tg; /* group that "owns" this runqueue */
+
+#ifdef CONFIG_SMP
+ /*
+ * the part of load.weight contributed by tasks
+ */
+ unsigned long task_weight;
+
+ /*
+ * h_load = weight * f(tg)
+ *
+ * Where f(tg) is the recursive weight fraction assigned to
+ * this group.
+ */
+ unsigned long h_load;
+
+ /*
+ * Maintaining per-cpu shares distribution for group scheduling
+ *
+ * load_stamp is the last time we updated the load average
+ * load_last is the last time we updated the load average and saw load
+ * load_unacc_exec_time is currently unaccounted execution time
+ */
+ u64 load_avg;
+ u64 load_period;
+ u64 load_stamp, load_last, load_unacc_exec_time;
+
+ unsigned long load_contribution;
+#endif
+#ifdef CONFIG_CFS_BANDWIDTH
+ int runtime_enabled;
+ u64 runtime_expires;
+ s64 runtime_remaining;
+
+ u64 throttled_timestamp;
+ int throttled, throttle_count;
+ struct list_head throttled_list;
+#endif
+#endif
+};
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_CFS_BANDWIDTH
+static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
+{
+ return &tg->cfs_bandwidth;
+}
+
+static inline u64 default_cfs_period(void);
+static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun);
+static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b);
+
+static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
+{
+ struct cfs_bandwidth *cfs_b =
+ container_of(timer, struct cfs_bandwidth, slack_timer);
+ do_sched_cfs_slack_timer(cfs_b);
+
+ return HRTIMER_NORESTART;
+}
+
+static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
+{
+ struct cfs_bandwidth *cfs_b =
+ container_of(timer, struct cfs_bandwidth, period_timer);
+ ktime_t now;
+ int overrun;
+ int idle = 0;
+
+ for (;;) {
+ now = hrtimer_cb_get_time(timer);
+ overrun = hrtimer_forward(timer, now, cfs_b->period);
+
+ if (!overrun)
+ break;
+
+ idle = do_sched_cfs_period_timer(cfs_b, overrun);
+ }
+
+ return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
+}
+
+static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
+{
+ raw_spin_lock_init(&cfs_b->lock);
+ cfs_b->runtime = 0;
+ cfs_b->quota = RUNTIME_INF;
+ cfs_b->period = ns_to_ktime(default_cfs_period());
+
+ INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
+ hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ cfs_b->period_timer.function = sched_cfs_period_timer;
+ hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ cfs_b->slack_timer.function = sched_cfs_slack_timer;
+}
+
+static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
+{
+ cfs_rq->runtime_enabled = 0;
+ INIT_LIST_HEAD(&cfs_rq->throttled_list);
+}
+
+/* requires cfs_b->lock, may release to reprogram timer */
+static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
+{
+ /*
+ * The timer may be active because we're trying to set a new bandwidth
+ * period or because we're racing with the tear-down path
+ * (timer_active==0 becomes visible before the hrtimer call-back
+ * terminates). In either case we ensure that it's re-programmed
+ */
+ while (unlikely(hrtimer_active(&cfs_b->period_timer))) {
+ raw_spin_unlock(&cfs_b->lock);
+ /* ensure cfs_b->lock is available while we wait */
+ hrtimer_cancel(&cfs_b->period_timer);
+
+ raw_spin_lock(&cfs_b->lock);
+ /* if someone else restarted the timer then we're done */
+ if (cfs_b->timer_active)
+ return;
+ }
+
+ cfs_b->timer_active = 1;
+ start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period);
+}
+
+static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
+{
+ hrtimer_cancel(&cfs_b->period_timer);
+ hrtimer_cancel(&cfs_b->slack_timer);
+}
+#else
+static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
+static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
+static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
+
+static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
+{
+ return NULL;
+}
+#endif /* CONFIG_CFS_BANDWIDTH */
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
+/* Real-Time classes' related field in a runqueue: */
+struct rt_rq {
+ struct rt_prio_array active;
+ unsigned long rt_nr_running;
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
+ struct {
+ int curr; /* highest queued rt task prio */
+#ifdef CONFIG_SMP
+ int next; /* next highest */
+#endif
+ } highest_prio;
+#endif
+#ifdef CONFIG_SMP
+ unsigned long rt_nr_migratory;
+ unsigned long rt_nr_total;
+ int overloaded;
+ struct plist_head pushable_tasks;
+#endif
+ int rt_throttled;
+ u64 rt_time;
+ u64 rt_runtime;
+ /* Nests inside the rq lock: */
+ raw_spinlock_t rt_runtime_lock;
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ unsigned long rt_nr_boosted;
+
+ struct rq *rq;
+ struct list_head leaf_rt_rq_list;
+ struct task_group *tg;
+#endif
+};
+
+#ifdef CONFIG_SMP
+
+/*
+ * We add the notion of a root-domain which will be used to define per-domain
+ * variables. Each exclusive cpuset essentially defines an island domain by
+ * fully partitioning the member cpus from any other cpuset. Whenever a new
+ * exclusive cpuset is created, we also create and attach a new root-domain
+ * object.
+ *
+ */
+struct root_domain {
+ atomic_t refcount;
+ atomic_t rto_count;
+ struct rcu_head rcu;
+ cpumask_var_t span;
+ cpumask_var_t online;
+
+ /*
+ * The "RT overload" flag: it gets set if a CPU has more than
+ * one runnable RT task.
+ */
+ cpumask_var_t rto_mask;
+ struct cpupri cpupri;
+};
+
+/*
+ * By default the system creates a single root-domain with all cpus as
+ * members (mimicking the global state we have today).
+ */
+static struct root_domain def_root_domain;
+
+#endif /* CONFIG_SMP */
+
+/*
+ * This is the main, per-CPU runqueue data structure.
+ *
+ * Locking rule: those places that want to lock multiple runqueues
+ * (such as the load balancing or the thread migration code), lock
+ * acquire operations must be ordered by ascending &runqueue.
+ */
+struct rq {
+ /* runqueue lock: */
+ raw_spinlock_t lock;
+
+ /*
+ * nr_running and cpu_load should be in the same cacheline because
+ * remote CPUs use both these fields when doing load calculation.
+ */
+ unsigned long nr_running;
+ #define CPU_LOAD_IDX_MAX 5
+ unsigned long cpu_load[CPU_LOAD_IDX_MAX];
+ unsigned long last_load_update_tick;
+#ifdef CONFIG_NO_HZ
+ u64 nohz_stamp;
+ unsigned char nohz_balance_kick;
+#endif
+ int skip_clock_update;
+
+ /* capture load from *all* tasks on this cpu: */
+ struct load_weight load;
+ unsigned long nr_load_updates;
+ u64 nr_switches;
+
+ struct cfs_rq cfs;
+ struct rt_rq rt;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ /* list of leaf cfs_rq on this cpu: */
+ struct list_head leaf_cfs_rq_list;
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
+ struct list_head leaf_rt_rq_list;
+#endif
+
+ /*
+ * This is part of a global counter where only the total sum
+ * over all CPUs matters. A task can increase this counter on
+ * one CPU and if it got migrated afterwards it may decrease
+ * it on another CPU. Always updated under the runqueue lock:
+ */
+ unsigned long nr_uninterruptible;
+
+ struct task_struct *curr, *idle, *stop;
+ unsigned long next_balance;
+ struct mm_struct *prev_mm;
+
+ u64 clock;
+ u64 clock_task;
+
+ atomic_t nr_iowait;
+
+#ifdef CONFIG_SMP
+ struct root_domain *rd;
+ struct sched_domain *sd;
+
+ unsigned long cpu_power;
+
+ unsigned char idle_balance;
+ /* For active balancing */
+ int post_schedule;
+ int active_balance;
+ int push_cpu;
+ struct cpu_stop_work active_balance_work;
+ /* cpu of this runqueue: */
+ int cpu;
+ int online;
+
+ u64 rt_avg;
+ u64 age_stamp;
+ u64 idle_stamp;
+ u64 avg_idle;
+#endif
+
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+ u64 prev_irq_time;
+#endif
+#ifdef CONFIG_PARAVIRT
+ u64 prev_steal_time;
+#endif
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+ u64 prev_steal_time_rq;
+#endif
+
+ /* calc_load related fields */
+ unsigned long calc_load_update;
+ long calc_load_active;
+
+#ifdef CONFIG_SCHED_HRTICK
+#ifdef CONFIG_SMP
+ int hrtick_csd_pending;
+ struct call_single_data hrtick_csd;
+#endif
+ struct hrtimer hrtick_timer;
+#endif
+
+#ifdef CONFIG_SCHEDSTATS
+ /* latency stats */
+ struct sched_info rq_sched_info;
+ unsigned long long rq_cpu_time;
+ /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
+
+ /* sys_sched_yield() stats */
+ unsigned int yld_count;
+
+ /* schedule() stats */
+ unsigned int sched_switch;
+ unsigned int sched_count;
+ unsigned int sched_goidle;
+
+ /* try_to_wake_up() stats */
+ unsigned int ttwu_count;
+ unsigned int ttwu_local;
+#endif
+
+#ifdef CONFIG_SMP
+ struct llist_head wake_list;
+#endif
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+
+
+static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
+
+static inline int cpu_of(struct rq *rq)
+{
+#ifdef CONFIG_SMP
+ return rq->cpu;
+#else
+ return 0;
+#endif
+}
+
+#define rcu_dereference_check_sched_domain(p) \
+ rcu_dereference_check((p), \
+ lockdep_is_held(&sched_domains_mutex))
+
+/*
+ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
+ * See detach_destroy_domains: synchronize_sched for details.
+ *
+ * The domain tree of any CPU may only be accessed from within
+ * preempt-disabled sections.
+ */
+#define for_each_domain(cpu, __sd) \
+ for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
+
+#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
+#define this_rq() (&__get_cpu_var(runqueues))
+#define task_rq(p) cpu_rq(task_cpu(p))
+#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
+#define raw_rq() (&__raw_get_cpu_var(runqueues))
+
+#ifdef CONFIG_CGROUP_SCHED
+
+/*
+ * Return the group to which this tasks belongs.
+ *
+ * We use task_subsys_state_check() and extend the RCU verification with
+ * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each
+ * task it moves into the cgroup. Therefore by holding either of those locks,
+ * we pin the task to the current cgroup.
+ */
+static inline struct task_group *task_group(struct task_struct *p)
+{
+ struct task_group *tg;
+ struct cgroup_subsys_state *css;
+
+ css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
+ lockdep_is_held(&p->pi_lock) ||
+ lockdep_is_held(&task_rq(p)->lock));
+ tg = container_of(css, struct task_group, css);
+
+ return autogroup_task_group(p, tg);
+}
+
+/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
+static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
+ p->se.parent = task_group(p)->se[cpu];
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ p->rt.rt_rq = task_group(p)->rt_rq[cpu];
+ p->rt.parent = task_group(p)->rt_se[cpu];
+#endif
+}
+
+#else /* CONFIG_CGROUP_SCHED */
+
+static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
+static inline struct task_group *task_group(struct task_struct *p)
+{
+ return NULL;
+}
+
+#endif /* CONFIG_CGROUP_SCHED */
static void update_rq_clock_task(struct rq *rq, s64 delta);
-void update_rq_clock(struct rq *rq)
+static void update_rq_clock(struct rq *rq)
{
s64 delta;
@@ -121,15 +802,45 @@ void update_rq_clock(struct rq *rq)
update_rq_clock_task(rq, delta);
}
+/*
+ * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
+ */
+#ifdef CONFIG_SCHED_DEBUG
+# define const_debug __read_mostly
+#else
+# define const_debug static const
+#endif
+
+/**
+ * runqueue_is_locked - Returns true if the current cpu runqueue is locked
+ * @cpu: the processor in question.
+ *
+ * This interface allows printk to be called with the runqueue lock
+ * held and know whether or not it is OK to wake up the klogd.
+ */
+int runqueue_is_locked(int cpu)
+{
+ return raw_spin_is_locked(&cpu_rq(cpu)->lock);
+}
+
/*
* Debugging: various feature bits
*/
+#define SCHED_FEAT(name, enabled) \
+ __SCHED_FEAT_##name ,
+
+enum {
+#include "sched_features.h"
+};
+
+#undef SCHED_FEAT
+
#define SCHED_FEAT(name, enabled) \
(1UL << __SCHED_FEAT_##name) * enabled |
const_debug unsigned int sysctl_sched_features =
-#include "features.h"
+#include "sched_features.h"
0;
#undef SCHED_FEAT
@@ -139,7 +850,7 @@ const_debug unsigned int sysctl_sched_features =
#name ,
static __read_mostly char *sched_feat_names[] = {
-#include "features.h"
+#include "sched_features.h"
NULL
};
@@ -149,7 +860,7 @@ static int sched_feat_show(struct seq_file *m, void *v)
{
int i;
- for (i = 0; i < __SCHED_FEAT_NR; i++) {
+ for (i = 0; sched_feat_names[i]; i++) {
if (!(sysctl_sched_features & (1UL << i)))
seq_puts(m, "NO_");
seq_printf(m, "%s ", sched_feat_names[i]);
@@ -159,36 +870,6 @@ static int sched_feat_show(struct seq_file *m, void *v)
return 0;
}
-#ifdef HAVE_JUMP_LABEL
-
-#define jump_label_key__true jump_label_key_enabled
-#define jump_label_key__false jump_label_key_disabled
-
-#define SCHED_FEAT(name, enabled) \
- jump_label_key__##enabled ,
-
-struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = {
-#include "features.h"
-};
-
-#undef SCHED_FEAT
-
-static void sched_feat_disable(int i)
-{
- if (jump_label_enabled(&sched_feat_keys[i]))
- jump_label_dec(&sched_feat_keys[i]);
-}
-
-static void sched_feat_enable(int i)
-{
- if (!jump_label_enabled(&sched_feat_keys[i]))
- jump_label_inc(&sched_feat_keys[i]);
-}
-#else
-static void sched_feat_disable(int i) { };
-static void sched_feat_enable(int i) { };
-#endif /* HAVE_JUMP_LABEL */
-
static ssize_t
sched_feat_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
@@ -212,20 +893,17 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
cmp += 3;
}
- for (i = 0; i < __SCHED_FEAT_NR; i++) {
+ for (i = 0; sched_feat_names[i]; i++) {
if (strcmp(cmp, sched_feat_names[i]) == 0) {
- if (neg) {
+ if (neg)
sysctl_sched_features &= ~(1UL << i);
- sched_feat_disable(i);
- } else {
+ else
sysctl_sched_features |= (1UL << i);
- sched_feat_enable(i);
- }
break;
}
}
- if (i == __SCHED_FEAT_NR)
+ if (!sched_feat_names[i])
return -EINVAL;
*ppos += cnt;
@@ -254,7 +932,10 @@ static __init int sched_init_debug(void)
return 0;
}
late_initcall(sched_init_debug);
-#endif /* CONFIG_SCHED_DEBUG */
+
+#endif
+
+#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
/*
* Number of tasks to iterate in a single balance run.
@@ -276,7 +957,7 @@ const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC;
*/
unsigned int sysctl_sched_rt_period = 1000000;
-__read_mostly int scheduler_running;
+static __read_mostly int scheduler_running;
/*
* part of the period that we allow rt tasks to run in us.
@@ -284,7 +965,112 @@ __read_mostly int scheduler_running;
*/
int sysctl_sched_rt_runtime = 950000;
+static inline u64 global_rt_period(void)
+{
+ return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
+}
+
+static inline u64 global_rt_runtime(void)
+{
+ if (sysctl_sched_rt_runtime < 0)
+ return RUNTIME_INF;
+
+ return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
+}
+
+#ifndef prepare_arch_switch
+# define prepare_arch_switch(next) do { } while (0)
+#endif
+#ifndef finish_arch_switch
+# define finish_arch_switch(prev) do { } while (0)
+#endif
+
+static inline int task_current(struct rq *rq, struct task_struct *p)
+{
+ return rq->curr == p;
+}
+
+static inline int task_running(struct rq *rq, struct task_struct *p)
+{
+#ifdef CONFIG_SMP
+ return p->on_cpu;
+#else
+ return task_current(rq, p);
+#endif
+}
+
+#ifndef __ARCH_WANT_UNLOCKED_CTXSW
+static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
+{
+#ifdef CONFIG_SMP
+ /*
+ * We can optimise this out completely for !SMP, because the
+ * SMP rebalancing from interrupt is the only thing that cares
+ * here.
+ */
+ next->on_cpu = 1;
+#endif
+}
+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
+{
+#ifdef CONFIG_SMP
+ /*
+ * After ->on_cpu is cleared, the task can be moved to a different CPU.
+ * We must ensure this doesn't happen until the switch is completely
+ * finished.
+ */
+ smp_wmb();
+ prev->on_cpu = 0;
+#endif
+#ifdef CONFIG_DEBUG_SPINLOCK
+ /* this is a valid case when another task releases the spinlock */
+ rq->lock.owner = current;
+#endif
+ /*
+ * If we are tracking spinlock dependencies then we have to
+ * fix up the runqueue lock - which gets 'carried over' from
+ * prev into current:
+ */
+ spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
+
+ raw_spin_unlock_irq(&rq->lock);
+}
+
+#else /* __ARCH_WANT_UNLOCKED_CTXSW */
+static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
+{
+#ifdef CONFIG_SMP
+ /*
+ * We can optimise this out completely for !SMP, because the
+ * SMP rebalancing from interrupt is the only thing that cares
+ * here.
+ */
+ next->on_cpu = 1;
+#endif
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+ raw_spin_unlock_irq(&rq->lock);
+#else
+ raw_spin_unlock(&rq->lock);
+#endif
+}
+
+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
+{
+#ifdef CONFIG_SMP
+ /*
+ * After ->on_cpu is cleared, the task can be moved to a different CPU.
+ * We must ensure this doesn't happen until the switch is completely
+ * finished.
+ */
+ smp_wmb();
+ prev->on_cpu = 0;
+#endif
+#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+ local_irq_enable();
+#endif
+}
+#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
/*
* __task_rq_lock - lock the rq @p resides on.
@@ -367,6 +1153,20 @@ static struct rq *this_rq_lock(void)
* rq->lock.
*/
+/*
+ * Use hrtick when:
+ * - enabled by features
+ * - hrtimer is actually high res
+ */
+static inline int hrtick_enabled(struct rq *rq)
+{
+ if (!sched_feat(HRTICK))
+ return 0;
+ if (!cpu_active(cpu_of(rq)))
+ return 0;
+ return hrtimer_is_hres_active(&rq->hrtick_timer);
+}
+
static void hrtick_clear(struct rq *rq)
{
if (hrtimer_active(&rq->hrtick_timer))
@@ -410,7 +1210,7 @@ static void __hrtick_start(void *arg)
*
* called with rq->lock held and irqs disabled
*/
-void hrtick_start(struct rq *rq, u64 delay)
+static void hrtick_start(struct rq *rq, u64 delay)
{
struct hrtimer *timer = &rq->hrtick_timer;
ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
@@ -454,7 +1254,7 @@ static __init void init_hrtick(void)
*
* called with rq->lock held and irqs disabled
*/
-void hrtick_start(struct rq *rq, u64 delay)
+static void hrtick_start(struct rq *rq, u64 delay)
{
__hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
HRTIMER_MODE_REL_PINNED, 0);
@@ -505,7 +1305,7 @@ static inline void init_hrtick(void)
#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
#endif
-void resched_task(struct task_struct *p)
+static void resched_task(struct task_struct *p)
{
int cpu;
@@ -526,7 +1326,7 @@ void resched_task(struct task_struct *p)
smp_send_reschedule(cpu);
}
-void resched_cpu(int cpu)
+static void resched_cpu(int cpu)
{
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
@@ -599,60 +1399,234 @@ void wake_up_idle_cpu(int cpu)
*/
set_tsk_need_resched(rq->idle);
- /* NEED_RESCHED must be visible before we test polling */
- smp_mb();
- if (!tsk_is_polling(rq->idle))
- smp_send_reschedule(cpu);
+ /* NEED_RESCHED must be visible before we test polling */
+ smp_mb();
+ if (!tsk_is_polling(rq->idle))
+ smp_send_reschedule(cpu);
+}
+
+static inline bool got_nohz_idle_kick(void)
+{
+ return idle_cpu(smp_processor_id()) && this_rq()->nohz_balance_kick;
+}
+
+#else /* CONFIG_NO_HZ */
+
+static inline bool got_nohz_idle_kick(void)
+{
+ return false;
+}
+
+#endif /* CONFIG_NO_HZ */
+
+static u64 sched_avg_period(void)
+{
+ return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
+}
+
+static void sched_avg_update(struct rq *rq)
+{
+ s64 period = sched_avg_period();
+
+ while ((s64)(rq->clock - rq->age_stamp) > period) {
+ /*
+ * Inline assembly required to prevent the compiler
+ * optimising this loop into a divmod call.
+ * See __iter_div_u64_rem() for another example of this.
+ */
+ asm("" : "+rm" (rq->age_stamp));
+ rq->age_stamp += period;
+ rq->rt_avg /= 2;
+ }
+}
+
+static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
+{
+ rq->rt_avg += rt_delta;
+ sched_avg_update(rq);
+}
+
+#else /* !CONFIG_SMP */
+static void resched_task(struct task_struct *p)
+{
+ assert_raw_spin_locked(&task_rq(p)->lock);
+ set_tsk_need_resched(p);
+}
+
+static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
+{
+}
+
+static void sched_avg_update(struct rq *rq)
+{
+}
+#endif /* CONFIG_SMP */
+
+#if BITS_PER_LONG == 32
+# define WMULT_CONST (~0UL)
+#else
+# define WMULT_CONST (1UL << 32)
+#endif
+
+#define WMULT_SHIFT 32
+
+/*
+ * Shift right and round:
+ */
+#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
+
+/*
+ * delta *= weight / lw
+ */
+static unsigned long
+calc_delta_mine(unsigned long delta_exec, unsigned long weight,
+ struct load_weight *lw)
+{
+ u64 tmp;
+
+ /*
+ * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched
+ * entities since MIN_SHARES = 2. Treat weight as 1 if less than
+ * 2^SCHED_LOAD_RESOLUTION.
+ */
+ if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION)))
+ tmp = (u64)delta_exec * scale_load_down(weight);
+ else
+ tmp = (u64)delta_exec;
+
+ if (!lw->inv_weight) {
+ unsigned long w = scale_load_down(lw->weight);
+
+ if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
+ lw->inv_weight = 1;
+ else if (unlikely(!w))
+ lw->inv_weight = WMULT_CONST;
+ else
+ lw->inv_weight = WMULT_CONST / w;
+ }
+
+ /*
+ * Check whether we'd overflow the 64-bit multiplication:
+ */
+ if (unlikely(tmp > WMULT_CONST))
+ tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
+ WMULT_SHIFT/2);
+ else
+ tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT);
+
+ return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
}
-static inline bool got_nohz_idle_kick(void)
+static inline void update_load_add(struct load_weight *lw, unsigned long inc)
{
- int cpu = smp_processor_id();
- return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
+ lw->weight += inc;
+ lw->inv_weight = 0;
}
-#else /* CONFIG_NO_HZ */
+static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
+{
+ lw->weight -= dec;
+ lw->inv_weight = 0;
+}
-static inline bool got_nohz_idle_kick(void)
+static inline void update_load_set(struct load_weight *lw, unsigned long w)
{
- return false;
+ lw->weight = w;
+ lw->inv_weight = 0;
}
-#endif /* CONFIG_NO_HZ */
+/*
+ * To aid in avoiding the subversion of "niceness" due to uneven distribution
+ * of tasks with abnormal "nice" values across CPUs the contribution that
+ * each task makes to its run queue's load is weighted according to its
+ * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
+ * scaled version of the new time slice allocation that they receive on time
+ * slice expiry etc.
+ */
-void sched_avg_update(struct rq *rq)
-{
- s64 period = sched_avg_period();
+#define WEIGHT_IDLEPRIO 3
+#define WMULT_IDLEPRIO 1431655765
- while ((s64)(rq->clock - rq->age_stamp) > period) {
- /*
- * Inline assembly required to prevent the compiler
- * optimising this loop into a divmod call.
- * See __iter_div_u64_rem() for another example of this.
- */
- asm("" : "+rm" (rq->age_stamp));
- rq->age_stamp += period;
- rq->rt_avg /= 2;
- }
+/*
+ * Nice levels are multiplicative, with a gentle 10% change for every
+ * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
+ * nice 1, it will get ~10% less CPU time than another CPU-bound task
+ * that remained on nice 0.
+ *
+ * The "10% effect" is relative and cumulative: from _any_ nice level,
+ * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
+ * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
+ * If a task goes up by ~10% and another task goes down by ~10% then
+ * the relative distance between them is ~25%.)
+ */
+static const int prio_to_weight[40] = {
+ /* -20 */ 88761, 71755, 56483, 46273, 36291,
+ /* -15 */ 29154, 23254, 18705, 14949, 11916,
+ /* -10 */ 9548, 7620, 6100, 4904, 3906,
+ /* -5 */ 3121, 2501, 1991, 1586, 1277,
+ /* 0 */ 1024, 820, 655, 526, 423,
+ /* 5 */ 335, 272, 215, 172, 137,
+ /* 10 */ 110, 87, 70, 56, 45,
+ /* 15 */ 36, 29, 23, 18, 15,
+};
+
+/*
+ * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated.
+ *
+ * In cases where the weight does not change often, we can use the
+ * precalculated inverse to speed up arithmetics by turning divisions
+ * into multiplications:
+ */
+static const u32 prio_to_wmult[40] = {
+ /* -20 */ 48388, 59856, 76040, 92818, 118348,
+ /* -15 */ 147320, 184698, 229616, 287308, 360437,
+ /* -10 */ 449829, 563644, 704093, 875809, 1099582,
+ /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326,
+ /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587,
+ /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126,
+ /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
+ /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
+};
+
+/* Time spent by the tasks of the cpu accounting group executing in ... */
+enum cpuacct_stat_index {
+ CPUACCT_STAT_USER, /* ... user mode */
+ CPUACCT_STAT_SYSTEM, /* ... kernel mode */
+
+ CPUACCT_STAT_NSTATS,
+};
+
+#ifdef CONFIG_CGROUP_CPUACCT
+static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+static void cpuacct_update_stats(struct task_struct *tsk,
+ enum cpuacct_stat_index idx, cputime_t val);
+#else
+static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
+static inline void cpuacct_update_stats(struct task_struct *tsk,
+ enum cpuacct_stat_index idx, cputime_t val) {}
+#endif
+
+static inline void inc_cpu_load(struct rq *rq, unsigned long load)
+{
+ update_load_add(&rq->load, load);
}
-#else /* !CONFIG_SMP */
-void resched_task(struct task_struct *p)
+static inline void dec_cpu_load(struct rq *rq, unsigned long load)
{
- assert_raw_spin_locked(&task_rq(p)->lock);
- set_tsk_need_resched(p);
+ update_load_sub(&rq->load, load);
}
-#endif /* CONFIG_SMP */
#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
(defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH)))
+typedef int (*tg_visitor)(struct task_group *, void *);
+
/*
* Iterate task_group tree rooted at *from, calling @down when first entering a
* node and @up when leaving it for the final time.
*
* Caller must hold rcu_lock or sufficient equivalent.
*/
-int walk_tg_tree_from(struct task_group *from,
+static int walk_tg_tree_from(struct task_group *from,
tg_visitor down, tg_visitor up, void *data)
{
struct task_group *parent, *child;
@@ -683,13 +1657,270 @@ int walk_tg_tree_from(struct task_group *from,
return ret;
}
-int tg_nop(struct task_group *tg, void *data)
+/*
+ * Iterate the full tree, calling @down when first entering a node and @up when
+ * leaving it for the final time.
+ *
+ * Caller must hold rcu_lock or sufficient equivalent.
+ */
+
+static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
+{
+ return walk_tg_tree_from(&root_task_group, down, up, data);
+}
+
+static int tg_nop(struct task_group *tg, void *data)
+{
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_SMP
+/* Used instead of source_load when we know the type == 0 */
+static unsigned long weighted_cpuload(const int cpu)
+{
+ return cpu_rq(cpu)->load.weight;
+}
+
+/*
+ * Return a low guess at the load of a migration-source cpu weighted
+ * according to the scheduling class and "nice" value.
+ *
+ * We want to under-estimate the load of migration sources, to
+ * balance conservatively.
+ */
+static unsigned long source_load(int cpu, int type)
+{
+ struct rq *rq = cpu_rq(cpu);
+ unsigned long total = weighted_cpuload(cpu);
+
+ if (type == 0 || !sched_feat(LB_BIAS))
+ return total;
+
+ return min(rq->cpu_load[type-1], total);
+}
+
+/*
+ * Return a high guess at the load of a migration-target cpu weighted
+ * according to the scheduling class and "nice" value.
+ */
+static unsigned long target_load(int cpu, int type)
+{
+ struct rq *rq = cpu_rq(cpu);
+ unsigned long total = weighted_cpuload(cpu);
+
+ if (type == 0 || !sched_feat(LB_BIAS))
+ return total;
+
+ return max(rq->cpu_load[type-1], total);
+}
+
+static unsigned long power_of(int cpu)
+{
+ return cpu_rq(cpu)->cpu_power;
+}
+
+static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
+
+static unsigned long cpu_avg_load_per_task(int cpu)
{
+ struct rq *rq = cpu_rq(cpu);
+ unsigned long nr_running = ACCESS_ONCE(rq->nr_running);
+
+ if (nr_running)
+ return rq->load.weight / nr_running;
+
return 0;
}
+
+#ifdef CONFIG_PREEMPT
+
+static void double_rq_lock(struct rq *rq1, struct rq *rq2);
+
+/*
+ * fair double_lock_balance: Safely acquires both rq->locks in a fair
+ * way at the expense of forcing extra atomic operations in all
+ * invocations. This assures that the double_lock is acquired using the
+ * same underlying policy as the spinlock_t on this architecture, which
+ * reduces latency compared to the unfair variant below. However, it
+ * also adds more overhead and therefore may reduce throughput.
+ */
+static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ __releases(this_rq->lock)
+ __acquires(busiest->lock)
+ __acquires(this_rq->lock)
+{
+ raw_spin_unlock(&this_rq->lock);
+ double_rq_lock(this_rq, busiest);
+
+ return 1;
+}
+
+#else
+/*
+ * Unfair double_lock_balance: Optimizes throughput at the expense of
+ * latency by eliminating extra atomic operations when the locks are
+ * already in proper order on entry. This favors lower cpu-ids and will
+ * grant the double lock to lower cpus over higher ids under contention,
+ * regardless of entry order into the function.
+ */
+static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ __releases(this_rq->lock)
+ __acquires(busiest->lock)
+ __acquires(this_rq->lock)
+{
+ int ret = 0;
+
+ if (unlikely(!raw_spin_trylock(&busiest->lock))) {
+ if (busiest < this_rq) {
+ raw_spin_unlock(&this_rq->lock);
+ raw_spin_lock(&busiest->lock);
+ raw_spin_lock_nested(&this_rq->lock,
+ SINGLE_DEPTH_NESTING);
+ ret = 1;
+ } else
+ raw_spin_lock_nested(&busiest->lock,
+ SINGLE_DEPTH_NESTING);
+ }
+ return ret;
+}
+
+#endif /* CONFIG_PREEMPT */
+
+/*
+ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ */
+static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+{
+ if (unlikely(!irqs_disabled())) {
+ /* printk() doesn't work good under rq->lock */
+ raw_spin_unlock(&this_rq->lock);
+ BUG_ON(1);
+ }
+
+ return _double_lock_balance(this_rq, busiest);
+}
+
+static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+ __releases(busiest->lock)
+{
+ raw_spin_unlock(&busiest->lock);
+ lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+}
+
+/*
+ * double_rq_lock - safely lock two runqueues
+ *
+ * Note this does not disable interrupts like task_rq_lock,
+ * you need to do so manually before calling.
+ */
+static void double_rq_lock(struct rq *rq1, struct rq *rq2)
+ __acquires(rq1->lock)
+ __acquires(rq2->lock)
+{
+ BUG_ON(!irqs_disabled());
+ if (rq1 == rq2) {
+ raw_spin_lock(&rq1->lock);
+ __acquire(rq2->lock); /* Fake it out ;) */
+ } else {
+ if (rq1 < rq2) {
+ raw_spin_lock(&rq1->lock);
+ raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
+ } else {
+ raw_spin_lock(&rq2->lock);
+ raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
+ }
+ }
+}
+
+/*
+ * double_rq_unlock - safely unlock two runqueues
+ *
+ * Note this does not restore interrupts like task_rq_unlock,
+ * you need to do so manually after calling.
+ */
+static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+ __releases(rq1->lock)
+ __releases(rq2->lock)
+{
+ raw_spin_unlock(&rq1->lock);
+ if (rq1 != rq2)
+ raw_spin_unlock(&rq2->lock);
+ else
+ __release(rq2->lock);
+}
+
+#else /* CONFIG_SMP */
+
+/*
+ * double_rq_lock - safely lock two runqueues
+ *
+ * Note this does not disable interrupts like task_rq_lock,
+ * you need to do so manually before calling.
+ */
+static void double_rq_lock(struct rq *rq1, struct rq *rq2)
+ __acquires(rq1->lock)
+ __acquires(rq2->lock)
+{
+ BUG_ON(!irqs_disabled());
+ BUG_ON(rq1 != rq2);
+ raw_spin_lock(&rq1->lock);
+ __acquire(rq2->lock); /* Fake it out ;) */
+}
+
+/*
+ * double_rq_unlock - safely unlock two runqueues
+ *
+ * Note this does not restore interrupts like task_rq_unlock,
+ * you need to do so manually after calling.
+ */
+static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+ __releases(rq1->lock)
+ __releases(rq2->lock)
+{
+ BUG_ON(rq1 != rq2);
+ raw_spin_unlock(&rq1->lock);
+ __release(rq2->lock);
+}
+
+#endif
+
+static void calc_load_account_idle(struct rq *this_rq);
+static void update_sysctl(void);
+static int get_update_sysctl_factor(void);
+static void update_cpu_load(struct rq *this_rq);
+
+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+ set_task_rq(p, cpu);
+#ifdef CONFIG_SMP
+ /*
+ * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
+ * successfully executed on another CPU. We must ensure that updates of
+ * per-task data have been completed by this moment.
+ */
+ smp_wmb();
+ task_thread_info(p)->cpu = cpu;
#endif
+}
+
+static const struct sched_class rt_sched_class;
+
+#define sched_class_highest (&stop_sched_class)
+#define for_each_class(class) \
+ for (class = sched_class_highest; class; class = class->next)
+
+#include "sched_stats.h"
+
+static void inc_nr_running(struct rq *rq)
+{
+ rq->nr_running++;
+}
-void update_cpu_load(struct rq *this_rq);
+static void dec_nr_running(struct rq *rq)
+{
+ rq->nr_running--;
+}
static void set_load_weight(struct task_struct *p)
{
@@ -726,7 +1957,7 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
/*
* activate_task - move a task to the runqueue.
*/
-void activate_task(struct rq *rq, struct task_struct *p, int flags)
+static void activate_task(struct rq *rq, struct task_struct *p, int flags)
{
if (task_contributes_to_load(p))
rq->nr_uninterruptible--;
@@ -737,7 +1968,7 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
/*
* deactivate_task - remove a task from the runqueue.
*/
-void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
+static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
{
if (task_contributes_to_load(p))
rq->nr_uninterruptible++;
@@ -928,14 +2159,14 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
static int irqtime_account_hi_update(void)
{
- u64 *cpustat = kcpustat_this_cpu->cpustat;
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
unsigned long flags;
u64 latest_ns;
int ret = 0;
local_irq_save(flags);
latest_ns = this_cpu_read(cpu_hardirq_time);
- if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
+ if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq))
ret = 1;
local_irq_restore(flags);
return ret;
@@ -943,14 +2174,14 @@ static int irqtime_account_hi_update(void)
static int irqtime_account_si_update(void)
{
- u64 *cpustat = kcpustat_this_cpu->cpustat;
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
unsigned long flags;
u64 latest_ns;
int ret = 0;
local_irq_save(flags);
latest_ns = this_cpu_read(cpu_softirq_time);
- if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
+ if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq))
ret = 1;
local_irq_restore(flags);
return ret;
@@ -962,6 +2193,15 @@ static int irqtime_account_si_update(void)
#endif
+#include "sched_idletask.c"
+#include "sched_fair.c"
+#include "sched_rt.c"
+#include "sched_autogroup.c"
+#include "sched_stoptask.c"
+#ifdef CONFIG_SCHED_DEBUG
+# include "sched_debug.c"
+#endif
+
void sched_set_stop_task(int cpu, struct task_struct *stop)
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
@@ -1059,7 +2299,7 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
p->sched_class->prio_changed(rq, p, oldprio);
}
-void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
+static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
{
const struct sched_class *class;
@@ -1085,6 +2325,38 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
}
#ifdef CONFIG_SMP
+/*
+ * Is this task likely cache-hot:
+ */
+static int
+task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
+{
+ s64 delta;
+
+ if (p->sched_class != &fair_sched_class)
+ return 0;
+
+ if (unlikely(p->policy == SCHED_IDLE))
+ return 0;
+
+ /*
+ * Buddy candidates are cache hot:
+ */
+ if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running &&
+ (&p->se == cfs_rq_of(&p->se)->next ||
+ &p->se == cfs_rq_of(&p->se)->last))
+ return 1;
+
+ if (sysctl_sched_migration_cost == -1)
+ return 1;
+ if (sysctl_sched_migration_cost == 0)
+ return 0;
+
+ delta = now - p->se.exec_start;
+
+ return delta < (s64)sysctl_sched_migration_cost;
+}
+
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
{
#ifdef CONFIG_SCHED_DEBUG
@@ -1511,11 +2783,6 @@ static int ttwu_activate_remote(struct task_struct *p, int wake_flags)
}
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
-
-static inline int ttwu_share_cache(int this_cpu, int that_cpu)
-{
- return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
-}
#endif /* CONFIG_SMP */
static void ttwu_queue(struct task_struct *p, int cpu)
@@ -1523,7 +2790,7 @@ static void ttwu_queue(struct task_struct *p, int cpu)
struct rq *rq = cpu_rq(cpu);
#if defined(CONFIG_SMP)
- if (sched_feat(TTWU_QUEUE) && !ttwu_share_cache(smp_processor_id(), cpu)) {
+ if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
sched_clock_cpu(cpu); /* sync clocks x-cpu */
ttwu_queue_remote(p, cpu);
return;
@@ -1937,7 +3204,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
local_irq_enable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
finish_lock_switch(rq, prev);
- trace_sched_stat_sleeptime(current, rq->clock);
fire_sched_in_preempt_notifiers(current);
if (mm)
@@ -2173,7 +3439,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
*/
static atomic_long_t calc_load_tasks_idle;
-void calc_load_account_idle(struct rq *this_rq)
+static void calc_load_account_idle(struct rq *this_rq)
{
long delta;
@@ -2317,7 +3583,7 @@ static void calc_global_nohz(unsigned long ticks)
*/
}
#else
-void calc_load_account_idle(struct rq *this_rq)
+static void calc_load_account_idle(struct rq *this_rq)
{
}
@@ -2460,7 +3726,7 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
* scheduler tick (TICK_NSEC). With tickless idle this will not be called
* every tick. We fix it up based on jiffies.
*/
-void update_cpu_load(struct rq *this_rq)
+static void update_cpu_load(struct rq *this_rq)
{
unsigned long this_load = this_rq->load.weight;
unsigned long curr_jiffies = jiffies;
@@ -2538,10 +3804,8 @@ void sched_exec(void)
#endif
DEFINE_PER_CPU(struct kernel_stat, kstat);
-DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
EXPORT_PER_CPU_SYMBOL(kstat);
-EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
/*
* Return any ns on the sched_clock that have not yet been accounted in
@@ -2594,42 +3858,6 @@ unsigned long long task_sched_runtime(struct task_struct *p)
return ns;
}
-#ifdef CONFIG_CGROUP_CPUACCT
-struct cgroup_subsys cpuacct_subsys;
-struct cpuacct root_cpuacct;
-#endif
-
-static inline void task_group_account_field(struct task_struct *p, int index,
- u64 tmp)
-{
-#ifdef CONFIG_CGROUP_CPUACCT
- struct kernel_cpustat *kcpustat;
- struct cpuacct *ca;
-#endif
- /*
- * Since all updates are sure to touch the root cgroup, we
- * get ourselves ahead and touch it first. If the root cgroup
- * is the only cgroup, then nothing else should be necessary.
- *
- */
- __get_cpu_var(kernel_cpustat).cpustat[index] += tmp;
-
-#ifdef CONFIG_CGROUP_CPUACCT
- if (unlikely(!cpuacct_subsys.active))
- return;
-
- rcu_read_lock();
- ca = task_ca(p);
- while (ca && (ca != &root_cpuacct)) {
- kcpustat = this_cpu_ptr(ca->cpustat);
- kcpustat->cpustat[index] += tmp;
- ca = parent_ca(ca);
- }
- rcu_read_unlock();
-#endif
-}
-
-
/*
* Account user cpu time to a process.
* @p: the process that the cpu time gets accounted to
@@ -2639,18 +3867,22 @@ static inline void task_group_account_field(struct task_struct *p, int index,
void account_user_time(struct task_struct *p, cputime_t cputime,
cputime_t cputime_scaled)
{
- int index;
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ cputime64_t tmp;
/* Add user time to process. */
- p->utime += cputime;
- p->utimescaled += cputime_scaled;
+ p->utime = cputime_add(p->utime, cputime);
+ p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
account_group_user_time(p, cputime);
- index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
-
/* Add user time to cpustat. */
- task_group_account_field(p, index, (__force u64) cputime);
+ tmp = cputime_to_cputime64(cputime);
+ if (TASK_NICE(p) > 0)
+ cpustat->nice = cputime64_add(cpustat->nice, tmp);
+ else
+ cpustat->user = cputime64_add(cpustat->user, tmp);
+ cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
/* Account for user time used */
acct_update_integrals(p);
}
@@ -2664,21 +3896,24 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
static void account_guest_time(struct task_struct *p, cputime_t cputime,
cputime_t cputime_scaled)
{
- u64 *cpustat = kcpustat_this_cpu->cpustat;
+ cputime64_t tmp;
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+
+ tmp = cputime_to_cputime64(cputime);
/* Add guest time to process. */
- p->utime += cputime;
- p->utimescaled += cputime_scaled;
+ p->utime = cputime_add(p->utime, cputime);
+ p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
account_group_user_time(p, cputime);
- p->gtime += cputime;
+ p->gtime = cputime_add(p->gtime, cputime);
/* Add guest time to cpustat. */
if (TASK_NICE(p) > 0) {
- cpustat[CPUTIME_NICE] += (__force u64) cputime;
- cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
+ cpustat->nice = cputime64_add(cpustat->nice, tmp);
+ cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp);
} else {
- cpustat[CPUTIME_USER] += (__force u64) cputime;
- cpustat[CPUTIME_GUEST] += (__force u64) cputime;
+ cpustat->user = cputime64_add(cpustat->user, tmp);
+ cpustat->guest = cputime64_add(cpustat->guest, tmp);
}
}
@@ -2691,15 +3926,18 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
*/
static inline
void __account_system_time(struct task_struct *p, cputime_t cputime,
- cputime_t cputime_scaled, int index)
+ cputime_t cputime_scaled, cputime64_t *target_cputime64)
{
+ cputime64_t tmp = cputime_to_cputime64(cputime);
+
/* Add system time to process. */
- p->stime += cputime;
- p->stimescaled += cputime_scaled;
+ p->stime = cputime_add(p->stime, cputime);
+ p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
account_group_system_time(p, cputime);
/* Add system time to cpustat. */
- task_group_account_field(p, index, (__force u64) cputime);
+ *target_cputime64 = cputime64_add(*target_cputime64, tmp);
+ cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
/* Account for system time used */
acct_update_integrals(p);
@@ -2715,7 +3953,8 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
void account_system_time(struct task_struct *p, int hardirq_offset,
cputime_t cputime, cputime_t cputime_scaled)
{
- int index;
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ cputime64_t *target_cputime64;
if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
account_guest_time(p, cputime, cputime_scaled);
@@ -2723,13 +3962,13 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
}
if (hardirq_count() - hardirq_offset)
- index = CPUTIME_IRQ;
+ target_cputime64 = &cpustat->irq;
else if (in_serving_softirq())
- index = CPUTIME_SOFTIRQ;
+ target_cputime64 = &cpustat->softirq;
else
- index = CPUTIME_SYSTEM;
+ target_cputime64 = &cpustat->system;
- __account_system_time(p, cputime, cputime_scaled, index);
+ __account_system_time(p, cputime, cputime_scaled, target_cputime64);
}
/*
@@ -2738,9 +3977,10 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
*/
void account_steal_time(cputime_t cputime)
{
- u64 *cpustat = kcpustat_this_cpu->cpustat;
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ cputime64_t cputime64 = cputime_to_cputime64(cputime);
- cpustat[CPUTIME_STEAL] += (__force u64) cputime;
+ cpustat->steal = cputime64_add(cpustat->steal, cputime64);
}
/*
@@ -2749,13 +3989,14 @@ void account_steal_time(cputime_t cputime)
*/
void account_idle_time(cputime_t cputime)
{
- u64 *cpustat = kcpustat_this_cpu->cpustat;
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ cputime64_t cputime64 = cputime_to_cputime64(cputime);
struct rq *rq = this_rq();
if (atomic_read(&rq->nr_iowait) > 0)
- cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
+ cpustat->iowait = cputime64_add(cpustat->iowait, cputime64);
else
- cpustat[CPUTIME_IDLE] += (__force u64) cputime;
+ cpustat->idle = cputime64_add(cpustat->idle, cputime64);
}
static __always_inline bool steal_account_process_tick(void)
@@ -2805,15 +4046,16 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
struct rq *rq)
{
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
- u64 *cpustat = kcpustat_this_cpu->cpustat;
+ cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
if (steal_account_process_tick())
return;
if (irqtime_account_hi_update()) {
- cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy;
+ cpustat->irq = cputime64_add(cpustat->irq, tmp);
} else if (irqtime_account_si_update()) {
- cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy;
+ cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
} else if (this_cpu_ksoftirqd() == p) {
/*
* ksoftirqd time do not get accounted in cpu_softirq_time.
@@ -2821,7 +4063,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
* Also, p->stime needs to be updated for ksoftirqd.
*/
__account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
- CPUTIME_SOFTIRQ);
+ &cpustat->softirq);
} else if (user_tick) {
account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
} else if (p == rq->idle) {
@@ -2830,7 +4072,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
} else {
__account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
- CPUTIME_SYSTEM);
+ &cpustat->system);
}
}
@@ -2929,7 +4171,7 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
- cputime_t rtime, utime = p->utime, total = utime + p->stime;
+ cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime);
/*
* Use CFS's precise accounting:
@@ -2937,11 +4179,11 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
if (total) {
- u64 temp = (__force u64) rtime;
+ u64 temp = rtime;
- temp *= (__force u64) utime;
- do_div(temp, (__force u32) total);
- utime = (__force cputime_t) temp;
+ temp *= utime;
+ do_div(temp, total);
+ utime = (cputime_t)temp;
} else
utime = rtime;
@@ -2949,7 +4191,7 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
* Compare with previous values, to keep monotonicity:
*/
p->prev_utime = max(p->prev_utime, utime);
- p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
+ p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime));
*ut = p->prev_utime;
*st = p->prev_stime;
@@ -2966,20 +4208,21 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
thread_group_cputime(p, &cputime);
- total = cputime.utime + cputime.stime;
+ total = cputime_add(cputime.utime, cputime.stime);
rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
if (total) {
- u64 temp = (__force u64) rtime;
+ u64 temp = rtime;
- temp *= (__force u64) cputime.utime;
- do_div(temp, (__force u32) total);
- utime = (__force cputime_t) temp;
+ temp *= cputime.utime;
+ do_div(temp, total);
+ utime = (cputime_t)temp;
} else
utime = rtime;
sig->prev_utime = max(sig->prev_utime, utime);
- sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime);
+ sig->prev_stime = max(sig->prev_stime,
+ cputime_sub(rtime, sig->prev_utime));
*ut = sig->prev_utime;
*st = sig->prev_stime;
@@ -3078,9 +4321,6 @@ static noinline void __schedule_bug(struct task_struct *prev)
{
struct pt_regs *regs = get_irq_regs();
- if (oops_in_progress)
- return;
-
printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
prev->comm, prev->pid, preempt_count());
@@ -4612,13 +5852,6 @@ bool __sched yield_to(struct task_struct *p, bool preempt)
*/
if (preempt && rq != p_rq)
resched_task(p_rq->curr);
- } else {
- /*
- * We might have set it in task_yield_fair(), but are
- * not going to schedule(), so don't want to skip
- * the next update.
- */
- rq->skip_clock_update = 0;
}
out:
@@ -4786,7 +6019,7 @@ void sched_show_task(struct task_struct *p)
free = stack_not_used(p);
#endif
printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
- task_pid_nr(p), task_pid_nr(rcu_dereference(p->real_parent)),
+ task_pid_nr(p), task_pid_nr(p->real_parent),
(unsigned long)task_thread_info(p)->flags);
show_stack(p, NULL);
@@ -4885,6 +6118,53 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
#endif
}
+/*
+ * Increase the granularity value when there are more CPUs,
+ * because with more CPUs the 'effective latency' as visible
+ * to users decreases. But the relationship is not linear,
+ * so pick a second-best guess by going with the log2 of the
+ * number of CPUs.
+ *
+ * This idea comes from the SD scheduler of Con Kolivas:
+ */
+static int get_update_sysctl_factor(void)
+{
+ unsigned int cpus = min_t(int, num_online_cpus(), 8);
+ unsigned int factor;
+
+ switch (sysctl_sched_tunable_scaling) {
+ case SCHED_TUNABLESCALING_NONE:
+ factor = 1;
+ break;
+ case SCHED_TUNABLESCALING_LINEAR:
+ factor = cpus;
+ break;
+ case SCHED_TUNABLESCALING_LOG:
+ default:
+ factor = 1 + ilog2(cpus);
+ break;
+ }
+
+ return factor;
+}
+
+static void update_sysctl(void)
+{
+ unsigned int factor = get_update_sysctl_factor();
+
+#define SET_SYSCTL(name) \
+ (sysctl_##name = (factor) * normalized_sysctl_##name)
+ SET_SYSCTL(sched_min_granularity);
+ SET_SYSCTL(sched_latency);
+ SET_SYSCTL(sched_wakeup_granularity);
+#undef SET_SYSCTL
+}
+
+static inline void sched_init_granularity(void)
+{
+ update_sysctl();
+}
+
#ifdef CONFIG_SMP
void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
@@ -5071,6 +6351,30 @@ static void calc_global_load_remove(struct rq *rq)
rq->calc_load_active = 0;
}
+#ifdef CONFIG_CFS_BANDWIDTH
+static void unthrottle_offline_cfs_rqs(struct rq *rq)
+{
+ struct cfs_rq *cfs_rq;
+
+ for_each_leaf_cfs_rq(rq, cfs_rq) {
+ struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+
+ if (!cfs_rq->runtime_enabled)
+ continue;
+
+ /*
+ * clock_task is not advancing so we just need to make sure
+ * there's some valid quota amount
+ */
+ cfs_rq->runtime_remaining = cfs_b->quota;
+ if (cfs_rq_throttled(cfs_rq))
+ unthrottle_cfs_rq(cfs_rq);
+ }
+}
+#else
+static void unthrottle_offline_cfs_rqs(struct rq *rq) {}
+#endif
+
/*
* Migrate all tasks from the rq, sleeping tasks will be migrated by
* try_to_wake_up()->select_task_rq().
@@ -5676,12 +6980,6 @@ static int init_rootdomain(struct root_domain *rd)
return -ENOMEM;
}
-/*
- * By default the system creates a single root-domain with all cpus as
- * members (mimicking the global state we have today).
- */
-struct root_domain def_root_domain;
-
static void init_defrootdomain(void)
{
init_rootdomain(&def_root_domain);
@@ -5752,31 +7050,6 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
destroy_sched_domain(sd, cpu);
}
-/*
- * Keep a special pointer to the highest sched_domain that has
- * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
- * allows us to avoid some pointer chasing select_idle_sibling().
- *
- * Also keep a unique ID per domain (we use the first cpu number in
- * the cpumask of the domain), this allows us to quickly tell if
- * two cpus are in the same cache domain, see ttwu_share_cache().
- */
-DEFINE_PER_CPU(struct sched_domain *, sd_llc);
-DEFINE_PER_CPU(int, sd_llc_id);
-
-static void update_top_cache_domain(int cpu)
-{
- struct sched_domain *sd;
- int id = cpu;
-
- sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
- if (sd)
- id = cpumask_first(sched_domain_span(sd));
-
- rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
- per_cpu(sd_llc_id, cpu) = id;
-}
-
/*
* Attach the domain 'sd' to 'cpu' as its base domain. Callers must
* hold the hotplug lock.
@@ -5816,8 +7089,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
tmp = rq->sd;
rcu_assign_pointer(rq->sd, sd);
destroy_sched_domains(tmp, cpu);
-
- update_top_cache_domain(cpu);
}
/* cpus with isolated domains */
@@ -5977,7 +7248,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
continue;
sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
- GFP_KERNEL, cpu_to_node(cpu));
+ GFP_KERNEL, cpu_to_node(i));
if (!sg)
goto fail;
@@ -6115,12 +7386,6 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
return;
update_group_power(sd, cpu);
- atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
-}
-
-int __weak arch_sd_sibling_asym_packing(void)
-{
- return 0*SD_ASYM_PACKING;
}
/*
@@ -6758,6 +8023,29 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
}
}
+static int update_runtime(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ int cpu = (int)(long)hcpu;
+
+ switch (action) {
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ disable_runtime(cpu_rq(cpu));
+ return NOTIFY_OK;
+
+ case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ enable_runtime(cpu_rq(cpu));
+ return NOTIFY_OK;
+
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
void __init sched_init_smp(void)
{
cpumask_var_t non_isolated_cpus;
@@ -6806,11 +8094,104 @@ int in_sched_functions(unsigned long addr)
&& addr < (unsigned long)__sched_text_end);
}
-#ifdef CONFIG_CGROUP_SCHED
-struct task_group root_task_group;
+static void init_cfs_rq(struct cfs_rq *cfs_rq)
+{
+ cfs_rq->tasks_timeline = RB_ROOT;
+ INIT_LIST_HEAD(&cfs_rq->tasks);
+ cfs_rq->min_vruntime = (u64)(-(1LL << 20));
+#ifndef CONFIG_64BIT
+ cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
+#endif
+}
+
+static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
+{
+ struct rt_prio_array *array;
+ int i;
+
+ array = &rt_rq->active;
+ for (i = 0; i < MAX_RT_PRIO; i++) {
+ INIT_LIST_HEAD(array->queue + i);
+ __clear_bit(i, array->bitmap);
+ }
+ /* delimiter for bitsearch: */
+ __set_bit(MAX_RT_PRIO, array->bitmap);
+
+#if defined CONFIG_SMP
+ rt_rq->highest_prio.curr = MAX_RT_PRIO;
+ rt_rq->highest_prio.next = MAX_RT_PRIO;
+ rt_rq->rt_nr_migratory = 0;
+ rt_rq->overloaded = 0;
+ plist_head_init(&rt_rq->pushable_tasks);
+#endif
+
+ rt_rq->rt_time = 0;
+ rt_rq->rt_throttled = 0;
+ rt_rq->rt_runtime = 0;
+ raw_spin_lock_init(&rt_rq->rt_runtime_lock);
+}
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
+ struct sched_entity *se, int cpu,
+ struct sched_entity *parent)
+{
+ struct rq *rq = cpu_rq(cpu);
+
+ cfs_rq->tg = tg;
+ cfs_rq->rq = rq;
+#ifdef CONFIG_SMP
+ /* allow initial update_cfs_load() to truncate */
+ cfs_rq->load_stamp = 1;
+#endif
+ init_cfs_rq_runtime(cfs_rq);
+
+ tg->cfs_rq[cpu] = cfs_rq;
+ tg->se[cpu] = se;
+
+ /* se could be NULL for root_task_group */
+ if (!se)
+ return;
+
+ if (!parent)
+ se->cfs_rq = &rq->cfs;
+ else
+ se->cfs_rq = parent->my_q;
+
+ se->my_q = cfs_rq;
+ update_load_set(&se->load, 0);
+ se->parent = parent;
+}
#endif
-DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
+#ifdef CONFIG_RT_GROUP_SCHED
+static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
+ struct sched_rt_entity *rt_se, int cpu,
+ struct sched_rt_entity *parent)
+{
+ struct rq *rq = cpu_rq(cpu);
+
+ rt_rq->highest_prio.curr = MAX_RT_PRIO;
+ rt_rq->rt_nr_boosted = 0;
+ rt_rq->rq = rq;
+ rt_rq->tg = tg;
+
+ tg->rt_rq[cpu] = rt_rq;
+ tg->rt_se[cpu] = rt_se;
+
+ if (!rt_se)
+ return;
+
+ if (!parent)
+ rt_se->rt_rq = &rq->rt;
+ else
+ rt_se->rt_rq = parent->my_q;
+
+ rt_se->my_q = rt_rq;
+ rt_se->parent = parent;
+ INIT_LIST_HEAD(&rt_se->run_list);
+}
+#endif
void __init sched_init(void)
{
@@ -6868,17 +8249,9 @@ void __init sched_init(void)
#ifdef CONFIG_CGROUP_SCHED
list_add(&root_task_group.list, &task_groups);
INIT_LIST_HEAD(&root_task_group.children);
- INIT_LIST_HEAD(&root_task_group.siblings);
autogroup_init(&init_task);
-
#endif /* CONFIG_CGROUP_SCHED */
-#ifdef CONFIG_CGROUP_CPUACCT
- root_cpuacct.cpustat = &kernel_cpustat;
- root_cpuacct.cpuusage = alloc_percpu(u64);
- /* Too early, not expected to fail */
- BUG_ON(!root_cpuacct.cpuusage);
-#endif
for_each_possible_cpu(i) {
struct rq *rq;
@@ -6890,7 +8263,7 @@ void __init sched_init(void)
init_cfs_rq(&rq->cfs);
init_rt_rq(&rq->rt, rq);
#ifdef CONFIG_FAIR_GROUP_SCHED
- root_task_group.shares = ROOT_TASK_GROUP_LOAD;
+ root_task_group.shares = root_task_group_load;
INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
/*
* How much cpu bandwidth does root_task_group get?
@@ -6940,7 +8313,7 @@ void __init sched_init(void)
rq->avg_idle = 2*sysctl_sched_migration_cost;
rq_attach_root(rq, &def_root_domain);
#ifdef CONFIG_NO_HZ
- rq->nohz_flags = 0;
+ rq->nohz_balance_kick = 0;
#endif
#endif
init_rq_hrtick(rq);
@@ -6953,6 +8326,10 @@ void __init sched_init(void)
INIT_HLIST_HEAD(&init_task.preempt_notifiers);
#endif
+#ifdef CONFIG_SMP
+ open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
+#endif
+
#ifdef CONFIG_RT_MUTEXES
plist_head_init(&init_task.pi_waiters);
#endif
@@ -6980,11 +8357,17 @@ void __init sched_init(void)
#ifdef CONFIG_SMP
zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
+#ifdef CONFIG_NO_HZ
+ zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
+ alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT);
+ atomic_set(&nohz.load_balancer, nr_cpu_ids);
+ atomic_set(&nohz.first_pick_cpu, nr_cpu_ids);
+ atomic_set(&nohz.second_pick_cpu, nr_cpu_ids);
+#endif
/* May be allocated at isolcpus cmdline parse time */
if (cpu_isolated_map == NULL)
zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
-#endif
- init_sched_fair_class();
+#endif /* SMP */
scheduler_running = 1;
}
@@ -7136,10 +8519,169 @@ void set_curr_task(int cpu, struct task_struct *p)
#endif
-#ifdef CONFIG_CGROUP_SCHED
-/* task_group_lock serializes the addition/removal of task groups */
-static DEFINE_SPINLOCK(task_group_lock);
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void free_fair_sched_group(struct task_group *tg)
+{
+ int i;
+
+ destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
+
+ for_each_possible_cpu(i) {
+ if (tg->cfs_rq)
+ kfree(tg->cfs_rq[i]);
+ if (tg->se)
+ kfree(tg->se[i]);
+ }
+
+ kfree(tg->cfs_rq);
+ kfree(tg->se);
+}
+
+static
+int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
+{
+ struct cfs_rq *cfs_rq;
+ struct sched_entity *se;
+ int i;
+
+ tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
+ if (!tg->cfs_rq)
+ goto err;
+ tg->se = kzalloc(sizeof(se) * nr_cpu_ids, GFP_KERNEL);
+ if (!tg->se)
+ goto err;
+
+ tg->shares = NICE_0_LOAD;
+
+ init_cfs_bandwidth(tg_cfs_bandwidth(tg));
+
+ for_each_possible_cpu(i) {
+ cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
+ GFP_KERNEL, cpu_to_node(i));
+ if (!cfs_rq)
+ goto err;
+
+ se = kzalloc_node(sizeof(struct sched_entity),
+ GFP_KERNEL, cpu_to_node(i));
+ if (!se)
+ goto err_free_rq;
+
+ init_cfs_rq(cfs_rq);
+ init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
+ }
+
+ return 1;
+
+err_free_rq:
+ kfree(cfs_rq);
+err:
+ return 0;
+}
+
+static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+ unsigned long flags;
+
+ /*
+ * Only empty task groups can be destroyed; so we can speculatively
+ * check on_list without danger of it being re-added.
+ */
+ if (!tg->cfs_rq[cpu]->on_list)
+ return;
+
+ raw_spin_lock_irqsave(&rq->lock, flags);
+ list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
+}
+#else /* !CONFIG_FAIR_GROUP_SCHED */
+static inline void free_fair_sched_group(struct task_group *tg)
+{
+}
+
+static inline
+int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
+{
+ return 1;
+}
+
+static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
+{
+}
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
+#ifdef CONFIG_RT_GROUP_SCHED
+static void free_rt_sched_group(struct task_group *tg)
+{
+ int i;
+
+ if (tg->rt_se)
+ destroy_rt_bandwidth(&tg->rt_bandwidth);
+
+ for_each_possible_cpu(i) {
+ if (tg->rt_rq)
+ kfree(tg->rt_rq[i]);
+ if (tg->rt_se)
+ kfree(tg->rt_se[i]);
+ }
+
+ kfree(tg->rt_rq);
+ kfree(tg->rt_se);
+}
+
+static
+int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
+{
+ struct rt_rq *rt_rq;
+ struct sched_rt_entity *rt_se;
+ int i;
+
+ tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL);
+ if (!tg->rt_rq)
+ goto err;
+ tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL);
+ if (!tg->rt_se)
+ goto err;
+
+ init_rt_bandwidth(&tg->rt_bandwidth,
+ ktime_to_ns(def_rt_bandwidth.rt_period), 0);
+
+ for_each_possible_cpu(i) {
+ rt_rq = kzalloc_node(sizeof(struct rt_rq),
+ GFP_KERNEL, cpu_to_node(i));
+ if (!rt_rq)
+ goto err;
+
+ rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
+ GFP_KERNEL, cpu_to_node(i));
+ if (!rt_se)
+ goto err_free_rq;
+
+ init_rt_rq(rt_rq, cpu_rq(i));
+ rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
+ init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
+ }
+
+ return 1;
+
+err_free_rq:
+ kfree(rt_rq);
+err:
+ return 0;
+}
+#else /* !CONFIG_RT_GROUP_SCHED */
+static inline void free_rt_sched_group(struct task_group *tg)
+{
+}
+static inline
+int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
+{
+ return 1;
+}
+#endif /* CONFIG_RT_GROUP_SCHED */
+
+#ifdef CONFIG_CGROUP_SCHED
static void free_sched_group(struct task_group *tg)
{
free_fair_sched_group(tg);
@@ -7244,6 +8786,50 @@ void sched_move_task(struct task_struct *tsk)
}
#endif /* CONFIG_CGROUP_SCHED */
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static DEFINE_MUTEX(shares_mutex);
+
+int sched_group_set_shares(struct task_group *tg, unsigned long shares)
+{
+ int i;
+ unsigned long flags;
+
+ /*
+ * We can't change the weight of the root cgroup.
+ */
+ if (!tg->se[0])
+ return -EINVAL;
+
+ shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES));
+
+ mutex_lock(&shares_mutex);
+ if (tg->shares == shares)
+ goto done;
+
+ tg->shares = shares;
+ for_each_possible_cpu(i) {
+ struct rq *rq = cpu_rq(i);
+ struct sched_entity *se;
+
+ se = tg->se[i];
+ /* Propagate contribution to hierarchy */
+ raw_spin_lock_irqsave(&rq->lock, flags);
+ for_each_sched_entity(se)
+ update_cfs_shares(group_cfs_rq(se));
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
+ }
+
+done:
+ mutex_unlock(&shares_mutex);
+ return 0;
+}
+
+unsigned long sched_group_shares(struct task_group *tg)
+{
+ return tg->shares;
+}
+#endif
+
#if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH)
static unsigned long to_ratio(u64 period, u64 runtime)
{
@@ -7266,7 +8852,7 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
struct task_struct *g, *p;
do_each_thread(g, p) {
- if (rt_task(p) && task_rq(p)->rt.tg == tg)
+ if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
return 1;
} while_each_thread(g, p);
@@ -7617,8 +9203,8 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
{
- int i, ret = 0, runtime_enabled, runtime_was_enabled;
- struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+ int i, ret = 0, runtime_enabled;
+ struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
if (tg == &root_task_group)
return -EINVAL;
@@ -7645,8 +9231,6 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
goto out_unlock;
runtime_enabled = quota != RUNTIME_INF;
- runtime_was_enabled = cfs_b->quota != RUNTIME_INF;
- account_cfs_bandwidth_used(runtime_enabled, runtime_was_enabled);
raw_spin_lock_irq(&cfs_b->lock);
cfs_b->period = ns_to_ktime(period);
cfs_b->quota = quota;
@@ -7662,13 +9246,13 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
for_each_possible_cpu(i) {
struct cfs_rq *cfs_rq = tg->cfs_rq[i];
- struct rq *rq = cfs_rq->rq;
+ struct rq *rq = rq_of(cfs_rq);
raw_spin_lock_irq(&rq->lock);
cfs_rq->runtime_enabled = runtime_enabled;
cfs_rq->runtime_remaining = 0;
- if (cfs_rq->throttled)
+ if (cfs_rq_throttled(cfs_rq))
unthrottle_cfs_rq(cfs_rq);
raw_spin_unlock_irq(&rq->lock);
}
@@ -7682,7 +9266,7 @@ int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
{
u64 quota, period;
- period = ktime_to_ns(tg->cfs_bandwidth.period);
+ period = ktime_to_ns(tg_cfs_bandwidth(tg)->period);
if (cfs_quota_us < 0)
quota = RUNTIME_INF;
else
@@ -7695,10 +9279,10 @@ long tg_get_cfs_quota(struct task_group *tg)
{
u64 quota_us;
- if (tg->cfs_bandwidth.quota == RUNTIME_INF)
+ if (tg_cfs_bandwidth(tg)->quota == RUNTIME_INF)
return -1;
- quota_us = tg->cfs_bandwidth.quota;
+ quota_us = tg_cfs_bandwidth(tg)->quota;
do_div(quota_us, NSEC_PER_USEC);
return quota_us;
@@ -7709,7 +9293,10 @@ int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
u64 quota, period;
period = (u64)cfs_period_us * NSEC_PER_USEC;
- quota = tg->cfs_bandwidth.quota;
+ quota = tg_cfs_bandwidth(tg)->quota;
+
+ if (period <= 0)
+ return -EINVAL;
return tg_set_cfs_bandwidth(tg, period, quota);
}
@@ -7718,7 +9305,7 @@ long tg_get_cfs_period(struct task_group *tg)
{
u64 cfs_period_us;
- cfs_period_us = ktime_to_ns(tg->cfs_bandwidth.period);
+ cfs_period_us = ktime_to_ns(tg_cfs_bandwidth(tg)->period);
do_div(cfs_period_us, NSEC_PER_USEC);
return cfs_period_us;
@@ -7778,13 +9365,13 @@ static u64 normalize_cfs_quota(struct task_group *tg,
static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
{
struct cfs_schedulable_data *d = data;
- struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+ struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
s64 quota = 0, parent_quota = -1;
if (!tg->parent) {
quota = RUNTIME_INF;
} else {
- struct cfs_bandwidth *parent_b = &tg->parent->cfs_bandwidth;
+ struct cfs_bandwidth *parent_b = tg_cfs_bandwidth(tg->parent);
quota = normalize_cfs_quota(tg, d);
parent_quota = parent_b->hierarchal_quota;
@@ -7828,7 +9415,7 @@ static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft,
struct cgroup_map_cb *cb)
{
struct task_group *tg = cgroup_tg(cgrp);
- struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+ struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
cb->fill(cb, "nr_periods", cfs_b->nr_periods);
cb->fill(cb, "nr_throttled", cfs_b->nr_throttled);
@@ -7929,16 +9516,38 @@ struct cgroup_subsys cpu_cgroup_subsys = {
* (balbir@in.ibm.com).
*/
+/* track cpu usage of a group of tasks and its child groups */
+struct cpuacct {
+ struct cgroup_subsys_state css;
+ /* cpuusage holds pointer to a u64-type object on every cpu */
+ u64 __percpu *cpuusage;
+ struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
+ struct cpuacct *parent;
+};
+
+struct cgroup_subsys cpuacct_subsys;
+
+/* return cpu accounting group corresponding to this container */
+static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
+{
+ return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id),
+ struct cpuacct, css);
+}
+
+/* return cpu accounting group to which this task belongs */
+static inline struct cpuacct *task_ca(struct task_struct *tsk)
+{
+ return container_of(task_subsys_state(tsk, cpuacct_subsys_id),
+ struct cpuacct, css);
+}
+
/* create a new cpu accounting group */
static struct cgroup_subsys_state *cpuacct_create(
struct cgroup_subsys *ss, struct cgroup *cgrp)
{
- struct cpuacct *ca;
-
- if (!cgrp->parent)
- return &root_cpuacct.css;
+ struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+ int i;
- ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca)
goto out;
@@ -7946,13 +9555,18 @@ static struct cgroup_subsys_state *cpuacct_create(
if (!ca->cpuusage)
goto out_free_ca;
- ca->cpustat = alloc_percpu(struct kernel_cpustat);
- if (!ca->cpustat)
- goto out_free_cpuusage;
+ for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
+ if (percpu_counter_init(&ca->cpustat[i], 0))
+ goto out_free_counters;
+
+ if (cgrp->parent)
+ ca->parent = cgroup_ca(cgrp->parent);
return &ca->css;
-out_free_cpuusage:
+out_free_counters:
+ while (--i >= 0)
+ percpu_counter_destroy(&ca->cpustat[i]);
free_percpu(ca->cpuusage);
out_free_ca:
kfree(ca);
@@ -7965,8 +9579,10 @@ static void
cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
{
struct cpuacct *ca = cgroup_ca(cgrp);
+ int i;
- free_percpu(ca->cpustat);
+ for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
+ percpu_counter_destroy(&ca->cpustat[i]);
free_percpu(ca->cpuusage);
kfree(ca);
}
@@ -8059,31 +9675,16 @@ static const char *cpuacct_stat_desc[] = {
};
static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
- struct cgroup_map_cb *cb)
+ struct cgroup_map_cb *cb)
{
struct cpuacct *ca = cgroup_ca(cgrp);
- int cpu;
- s64 val = 0;
-
- for_each_online_cpu(cpu) {
- struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
- val += kcpustat->cpustat[CPUTIME_USER];
- val += kcpustat->cpustat[CPUTIME_NICE];
- }
- val = cputime64_to_clock_t(val);
- cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val);
+ int i;
- val = 0;
- for_each_online_cpu(cpu) {
- struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
- val += kcpustat->cpustat[CPUTIME_SYSTEM];
- val += kcpustat->cpustat[CPUTIME_IRQ];
- val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
+ for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
+ s64 val = percpu_counter_read(&ca->cpustat[i]);
+ val = cputime64_to_clock_t(val);
+ cb->fill(cb, cpuacct_stat_desc[i], val);
}
-
- val = cputime64_to_clock_t(val);
- cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
-
return 0;
}
@@ -8113,7 +9714,7 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
*
* called with rq->lock held.
*/
-void cpuacct_charge(struct task_struct *tsk, u64 cputime)
+static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
int cpu;
@@ -8127,7 +9728,7 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
ca = task_ca(tsk);
- for (; ca; ca = parent_ca(ca)) {
+ for (; ca; ca = ca->parent) {
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
}
@@ -8135,6 +9736,45 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
rcu_read_unlock();
}
+/*
+ * When CONFIG_VIRT_CPU_ACCOUNTING is enabled one jiffy can be very large
+ * in cputime_t units. As a result, cpuacct_update_stats calls
+ * percpu_counter_add with values large enough to always overflow the
+ * per cpu batch limit causing bad SMP scalability.
+ *
+ * To fix this we scale percpu_counter_batch by cputime_one_jiffy so we
+ * batch the same amount of time with CONFIG_VIRT_CPU_ACCOUNTING disabled
+ * and enabled. We cap it at INT_MAX which is the largest allowed batch value.
+ */
+#ifdef CONFIG_SMP
+#define CPUACCT_BATCH \
+ min_t(long, percpu_counter_batch * cputime_one_jiffy, INT_MAX)
+#else
+#define CPUACCT_BATCH 0
+#endif
+
+/*
+ * Charge the system/user time to the task's accounting group.
+ */
+static void cpuacct_update_stats(struct task_struct *tsk,
+ enum cpuacct_stat_index idx, cputime_t val)
+{
+ struct cpuacct *ca;
+ int batch = CPUACCT_BATCH;
+
+ if (unlikely(!cpuacct_subsys.active))
+ return;
+
+ rcu_read_lock();
+ ca = task_ca(tsk);
+
+ do {
+ __percpu_counter_add(&ca->cpustat[idx], val, batch);
+ ca = ca->parent;
+ } while (ca);
+ rcu_read_unlock();
+}
+
struct cgroup_subsys cpuacct_subsys = {
.name = "cpuacct",
.create = cpuacct_create,
diff --git a/trunk/kernel/sched/Makefile b/trunk/kernel/sched/Makefile
deleted file mode 100644
index 9a7dd35102a3..000000000000
--- a/trunk/kernel/sched/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-ifdef CONFIG_FUNCTION_TRACER
-CFLAGS_REMOVE_clock.o = -pg
-endif
-
-ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
-# According to Alan Modra , the -fno-omit-frame-pointer is
-# needed for x86 only. Why this used to be enabled for all architectures is beyond
-# me. I suspect most platforms don't need this, but until we know that for sure
-# I turn this off for IA-64 only. Andreas Schwab says it's also needed on m68k
-# to get a correct value for the wait-channel (WCHAN in ps). --davidm
-CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
-endif
-
-obj-y += core.o clock.o idle_task.o fair.o rt.o stop_task.o
-obj-$(CONFIG_SMP) += cpupri.o
-obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
-obj-$(CONFIG_SCHEDSTATS) += stats.o
-obj-$(CONFIG_SCHED_DEBUG) += debug.o
-
-
diff --git a/trunk/kernel/sched/sched.h b/trunk/kernel/sched/sched.h
deleted file mode 100644
index 98c0c2623db8..000000000000
--- a/trunk/kernel/sched/sched.h
+++ /dev/null
@@ -1,1166 +0,0 @@
-
-#include
-#include
-#include
-#include
-
-#include "cpupri.h"
-
-extern __read_mostly int scheduler_running;
-
-/*
- * Convert user-nice values [ -20 ... 0 ... 19 ]
- * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
- * and back.
- */
-#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
-#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
-#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
-
-/*
- * 'User priority' is the nice value converted to something we
- * can work with better when scaling various scheduler parameters,
- * it's a [ 0 ... 39 ] range.
- */
-#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
-#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
-#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
-
-/*
- * Helpers for converting nanosecond timing to jiffy resolution
- */
-#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
-
-#define NICE_0_LOAD SCHED_LOAD_SCALE
-#define NICE_0_SHIFT SCHED_LOAD_SHIFT
-
-/*
- * These are the 'tuning knobs' of the scheduler:
- *
- * default timeslice is 100 msecs (used only for SCHED_RR tasks).
- * Timeslices get refilled after they expire.
- */
-#define DEF_TIMESLICE (100 * HZ / 1000)
-
-/*
- * single value that denotes runtime == period, ie unlimited time.
- */
-#define RUNTIME_INF ((u64)~0ULL)
-
-static inline int rt_policy(int policy)
-{
- if (policy == SCHED_FIFO || policy == SCHED_RR)
- return 1;
- return 0;
-}
-
-static inline int task_has_rt_policy(struct task_struct *p)
-{
- return rt_policy(p->policy);
-}
-
-/*
- * This is the priority-queue data structure of the RT scheduling class:
- */
-struct rt_prio_array {
- DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
- struct list_head queue[MAX_RT_PRIO];
-};
-
-struct rt_bandwidth {
- /* nests inside the rq lock: */
- raw_spinlock_t rt_runtime_lock;
- ktime_t rt_period;
- u64 rt_runtime;
- struct hrtimer rt_period_timer;
-};
-
-extern struct mutex sched_domains_mutex;
-
-#ifdef CONFIG_CGROUP_SCHED
-
-#include
-
-struct cfs_rq;
-struct rt_rq;
-
-static LIST_HEAD(task_groups);
-
-struct cfs_bandwidth {
-#ifdef CONFIG_CFS_BANDWIDTH
- raw_spinlock_t lock;
- ktime_t period;
- u64 quota, runtime;
- s64 hierarchal_quota;
- u64 runtime_expires;
-
- int idle, timer_active;
- struct hrtimer period_timer, slack_timer;
- struct list_head throttled_cfs_rq;
-
- /* statistics */
- int nr_periods, nr_throttled;
- u64 throttled_time;
-#endif
-};
-
-/* task group related information */
-struct task_group {
- struct cgroup_subsys_state css;
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
- /* schedulable entities of this group on each cpu */
- struct sched_entity **se;
- /* runqueue "owned" by this group on each cpu */
- struct cfs_rq **cfs_rq;
- unsigned long shares;
-
- atomic_t load_weight;
-#endif
-
-#ifdef CONFIG_RT_GROUP_SCHED
- struct sched_rt_entity **rt_se;
- struct rt_rq **rt_rq;
-
- struct rt_bandwidth rt_bandwidth;
-#endif
-
- struct rcu_head rcu;
- struct list_head list;
-
- struct task_group *parent;
- struct list_head siblings;
- struct list_head children;
-
-#ifdef CONFIG_SCHED_AUTOGROUP
- struct autogroup *autogroup;
-#endif
-
- struct cfs_bandwidth cfs_bandwidth;
-};
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
-
-/*
- * A weight of 0 or 1 can cause arithmetics problems.
- * A weight of a cfs_rq is the sum of weights of which entities
- * are queued on this cfs_rq, so a weight of a entity should not be
- * too large, so as the shares value of a task group.
- * (The default weight is 1024 - so there's no practical
- * limitation from this.)
- */
-#define MIN_SHARES (1UL << 1)
-#define MAX_SHARES (1UL << 18)
-#endif
-
-/* Default task group.
- * Every task in system belong to this group at bootup.
- */
-extern struct task_group root_task_group;
-
-typedef int (*tg_visitor)(struct task_group *, void *);
-
-extern int walk_tg_tree_from(struct task_group *from,
- tg_visitor down, tg_visitor up, void *data);
-
-/*
- * Iterate the full tree, calling @down when first entering a node and @up when
- * leaving it for the final time.
- *
- * Caller must hold rcu_lock or sufficient equivalent.
- */
-static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
-{
- return walk_tg_tree_from(&root_task_group, down, up, data);
-}
-
-extern int tg_nop(struct task_group *tg, void *data);
-
-extern void free_fair_sched_group(struct task_group *tg);
-extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
-extern void unregister_fair_sched_group(struct task_group *tg, int cpu);
-extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
- struct sched_entity *se, int cpu,
- struct sched_entity *parent);
-extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
-extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
-
-extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
-extern void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
-extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
-
-extern void free_rt_sched_group(struct task_group *tg);
-extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
-extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
- struct sched_rt_entity *rt_se, int cpu,
- struct sched_rt_entity *parent);
-
-#else /* CONFIG_CGROUP_SCHED */
-
-struct cfs_bandwidth { };
-
-#endif /* CONFIG_CGROUP_SCHED */
-
-/* CFS-related fields in a runqueue */
-struct cfs_rq {
- struct load_weight load;
- unsigned long nr_running, h_nr_running;
-
- u64 exec_clock;
- u64 min_vruntime;
-#ifndef CONFIG_64BIT
- u64 min_vruntime_copy;
-#endif
-
- struct rb_root tasks_timeline;
- struct rb_node *rb_leftmost;
-
- struct list_head tasks;
- struct list_head *balance_iterator;
-
- /*
- * 'curr' points to currently running entity on this cfs_rq.
- * It is set to NULL otherwise (i.e when none are currently running).
- */
- struct sched_entity *curr, *next, *last, *skip;
-
-#ifdef CONFIG_SCHED_DEBUG
- unsigned int nr_spread_over;
-#endif
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
- struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
-
- /*
- * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
- * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
- * (like users, containers etc.)
- *
- * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
- * list is used during load balance.
- */
- int on_list;
- struct list_head leaf_cfs_rq_list;
- struct task_group *tg; /* group that "owns" this runqueue */
-
-#ifdef CONFIG_SMP
- /*
- * the part of load.weight contributed by tasks
- */
- unsigned long task_weight;
-
- /*
- * h_load = weight * f(tg)
- *
- * Where f(tg) is the recursive weight fraction assigned to
- * this group.
- */
- unsigned long h_load;
-
- /*
- * Maintaining per-cpu shares distribution for group scheduling
- *
- * load_stamp is the last time we updated the load average
- * load_last is the last time we updated the load average and saw load
- * load_unacc_exec_time is currently unaccounted execution time
- */
- u64 load_avg;
- u64 load_period;
- u64 load_stamp, load_last, load_unacc_exec_time;
-
- unsigned long load_contribution;
-#endif /* CONFIG_SMP */
-#ifdef CONFIG_CFS_BANDWIDTH
- int runtime_enabled;
- u64 runtime_expires;
- s64 runtime_remaining;
-
- u64 throttled_timestamp;
- int throttled, throttle_count;
- struct list_head throttled_list;
-#endif /* CONFIG_CFS_BANDWIDTH */
-#endif /* CONFIG_FAIR_GROUP_SCHED */
-};
-
-static inline int rt_bandwidth_enabled(void)
-{
- return sysctl_sched_rt_runtime >= 0;
-}
-
-/* Real-Time classes' related field in a runqueue: */
-struct rt_rq {
- struct rt_prio_array active;
- unsigned long rt_nr_running;
-#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
- struct {
- int curr; /* highest queued rt task prio */
-#ifdef CONFIG_SMP
- int next; /* next highest */
-#endif
- } highest_prio;
-#endif
-#ifdef CONFIG_SMP
- unsigned long rt_nr_migratory;
- unsigned long rt_nr_total;
- int overloaded;
- struct plist_head pushable_tasks;
-#endif
- int rt_throttled;
- u64 rt_time;
- u64 rt_runtime;
- /* Nests inside the rq lock: */
- raw_spinlock_t rt_runtime_lock;
-
-#ifdef CONFIG_RT_GROUP_SCHED
- unsigned long rt_nr_boosted;
-
- struct rq *rq;
- struct list_head leaf_rt_rq_list;
- struct task_group *tg;
-#endif
-};
-
-#ifdef CONFIG_SMP
-
-/*
- * We add the notion of a root-domain which will be used to define per-domain
- * variables. Each exclusive cpuset essentially defines an island domain by
- * fully partitioning the member cpus from any other cpuset. Whenever a new
- * exclusive cpuset is created, we also create and attach a new root-domain
- * object.
- *
- */
-struct root_domain {
- atomic_t refcount;
- atomic_t rto_count;
- struct rcu_head rcu;
- cpumask_var_t span;
- cpumask_var_t online;
-
- /*
- * The "RT overload" flag: it gets set if a CPU has more than
- * one runnable RT task.
- */
- cpumask_var_t rto_mask;
- struct cpupri cpupri;
-};
-
-extern struct root_domain def_root_domain;
-
-#endif /* CONFIG_SMP */
-
-/*
- * This is the main, per-CPU runqueue data structure.
- *
- * Locking rule: those places that want to lock multiple runqueues
- * (such as the load balancing or the thread migration code), lock
- * acquire operations must be ordered by ascending &runqueue.
- */
-struct rq {
- /* runqueue lock: */
- raw_spinlock_t lock;
-
- /*
- * nr_running and cpu_load should be in the same cacheline because
- * remote CPUs use both these fields when doing load calculation.
- */
- unsigned long nr_running;
- #define CPU_LOAD_IDX_MAX 5
- unsigned long cpu_load[CPU_LOAD_IDX_MAX];
- unsigned long last_load_update_tick;
-#ifdef CONFIG_NO_HZ
- u64 nohz_stamp;
- unsigned long nohz_flags;
-#endif
- int skip_clock_update;
-
- /* capture load from *all* tasks on this cpu: */
- struct load_weight load;
- unsigned long nr_load_updates;
- u64 nr_switches;
-
- struct cfs_rq cfs;
- struct rt_rq rt;
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
- /* list of leaf cfs_rq on this cpu: */
- struct list_head leaf_cfs_rq_list;
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
- struct list_head leaf_rt_rq_list;
-#endif
-
- /*
- * This is part of a global counter where only the total sum
- * over all CPUs matters. A task can increase this counter on
- * one CPU and if it got migrated afterwards it may decrease
- * it on another CPU. Always updated under the runqueue lock:
- */
- unsigned long nr_uninterruptible;
-
- struct task_struct *curr, *idle, *stop;
- unsigned long next_balance;
- struct mm_struct *prev_mm;
-
- u64 clock;
- u64 clock_task;
-
- atomic_t nr_iowait;
-
-#ifdef CONFIG_SMP
- struct root_domain *rd;
- struct sched_domain *sd;
-
- unsigned long cpu_power;
-
- unsigned char idle_balance;
- /* For active balancing */
- int post_schedule;
- int active_balance;
- int push_cpu;
- struct cpu_stop_work active_balance_work;
- /* cpu of this runqueue: */
- int cpu;
- int online;
-
- u64 rt_avg;
- u64 age_stamp;
- u64 idle_stamp;
- u64 avg_idle;
-#endif
-
-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
- u64 prev_irq_time;
-#endif
-#ifdef CONFIG_PARAVIRT
- u64 prev_steal_time;
-#endif
-#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
- u64 prev_steal_time_rq;
-#endif
-
- /* calc_load related fields */
- unsigned long calc_load_update;
- long calc_load_active;
-
-#ifdef CONFIG_SCHED_HRTICK
-#ifdef CONFIG_SMP
- int hrtick_csd_pending;
- struct call_single_data hrtick_csd;
-#endif
- struct hrtimer hrtick_timer;
-#endif
-
-#ifdef CONFIG_SCHEDSTATS
- /* latency stats */
- struct sched_info rq_sched_info;
- unsigned long long rq_cpu_time;
- /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
-
- /* sys_sched_yield() stats */
- unsigned int yld_count;
-
- /* schedule() stats */
- unsigned int sched_switch;
- unsigned int sched_count;
- unsigned int sched_goidle;
-
- /* try_to_wake_up() stats */
- unsigned int ttwu_count;
- unsigned int ttwu_local;
-#endif
-
-#ifdef CONFIG_SMP
- struct llist_head wake_list;
-#endif
-};
-
-static inline int cpu_of(struct rq *rq)
-{
-#ifdef CONFIG_SMP
- return rq->cpu;
-#else
- return 0;
-#endif
-}
-
-DECLARE_PER_CPU(struct rq, runqueues);
-
-#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
-#define this_rq() (&__get_cpu_var(runqueues))
-#define task_rq(p) cpu_rq(task_cpu(p))
-#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
-#define raw_rq() (&__raw_get_cpu_var(runqueues))
-
-#ifdef CONFIG_SMP
-
-#define rcu_dereference_check_sched_domain(p) \
- rcu_dereference_check((p), \
- lockdep_is_held(&sched_domains_mutex))
-
-/*
- * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
- * See detach_destroy_domains: synchronize_sched for details.
- *
- * The domain tree of any CPU may only be accessed from within
- * preempt-disabled sections.
- */
-#define for_each_domain(cpu, __sd) \
- for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
- __sd; __sd = __sd->parent)
-
-#define for_each_lower_domain(sd) for (; sd; sd = sd->child)
-
-/**
- * highest_flag_domain - Return highest sched_domain containing flag.
- * @cpu: The cpu whose highest level of sched domain is to
- * be returned.
- * @flag: The flag to check for the highest sched_domain
- * for the given cpu.
- *
- * Returns the highest sched_domain of a cpu which contains the given flag.
- */
-static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
-{
- struct sched_domain *sd, *hsd = NULL;
-
- for_each_domain(cpu, sd) {
- if (!(sd->flags & flag))
- break;
- hsd = sd;
- }
-
- return hsd;
-}
-
-DECLARE_PER_CPU(struct sched_domain *, sd_llc);
-DECLARE_PER_CPU(int, sd_llc_id);
-
-#endif /* CONFIG_SMP */
-
-#include "stats.h"
-#include "auto_group.h"
-
-#ifdef CONFIG_CGROUP_SCHED
-
-/*
- * Return the group to which this tasks belongs.
- *
- * We use task_subsys_state_check() and extend the RCU verification with
- * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each
- * task it moves into the cgroup. Therefore by holding either of those locks,
- * we pin the task to the current cgroup.
- */
-static inline struct task_group *task_group(struct task_struct *p)
-{
- struct task_group *tg;
- struct cgroup_subsys_state *css;
-
- css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
- lockdep_is_held(&p->pi_lock) ||
- lockdep_is_held(&task_rq(p)->lock));
- tg = container_of(css, struct task_group, css);
-
- return autogroup_task_group(p, tg);
-}
-
-/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
-static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
-{
-#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
- struct task_group *tg = task_group(p);
-#endif
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
- p->se.cfs_rq = tg->cfs_rq[cpu];
- p->se.parent = tg->se[cpu];
-#endif
-
-#ifdef CONFIG_RT_GROUP_SCHED
- p->rt.rt_rq = tg->rt_rq[cpu];
- p->rt.parent = tg->rt_se[cpu];
-#endif
-}
-
-#else /* CONFIG_CGROUP_SCHED */
-
-static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
-static inline struct task_group *task_group(struct task_struct *p)
-{
- return NULL;
-}
-
-#endif /* CONFIG_CGROUP_SCHED */
-
-static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
-{
- set_task_rq(p, cpu);
-#ifdef CONFIG_SMP
- /*
- * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
- * successfuly executed on another CPU. We must ensure that updates of
- * per-task data have been completed by this moment.
- */
- smp_wmb();
- task_thread_info(p)->cpu = cpu;
-#endif
-}
-
-/*
- * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
- */
-#ifdef CONFIG_SCHED_DEBUG
-# include
-# define const_debug __read_mostly
-#else
-# define const_debug const
-#endif
-
-extern const_debug unsigned int sysctl_sched_features;
-
-#define SCHED_FEAT(name, enabled) \
- __SCHED_FEAT_##name ,
-
-enum {
-#include "features.h"
- __SCHED_FEAT_NR,
-};
-
-#undef SCHED_FEAT
-
-#if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL)
-static __always_inline bool static_branch__true(struct jump_label_key *key)
-{
- return likely(static_branch(key)); /* Not out of line branch. */
-}
-
-static __always_inline bool static_branch__false(struct jump_label_key *key)
-{
- return unlikely(static_branch(key)); /* Out of line branch. */
-}
-
-#define SCHED_FEAT(name, enabled) \
-static __always_inline bool static_branch_##name(struct jump_label_key *key) \
-{ \
- return static_branch__##enabled(key); \
-}
-
-#include "features.h"
-
-#undef SCHED_FEAT
-
-extern struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR];
-#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
-#else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */
-#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
-#endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
-
-static inline u64 global_rt_period(void)
-{
- return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
-}
-
-static inline u64 global_rt_runtime(void)
-{
- if (sysctl_sched_rt_runtime < 0)
- return RUNTIME_INF;
-
- return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
-}
-
-
-
-static inline int task_current(struct rq *rq, struct task_struct *p)
-{
- return rq->curr == p;
-}
-
-static inline int task_running(struct rq *rq, struct task_struct *p)
-{
-#ifdef CONFIG_SMP
- return p->on_cpu;
-#else
- return task_current(rq, p);
-#endif
-}
-
-
-#ifndef prepare_arch_switch
-# define prepare_arch_switch(next) do { } while (0)
-#endif
-#ifndef finish_arch_switch
-# define finish_arch_switch(prev) do { } while (0)
-#endif
-
-#ifndef __ARCH_WANT_UNLOCKED_CTXSW
-static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
-{
-#ifdef CONFIG_SMP
- /*
- * We can optimise this out completely for !SMP, because the
- * SMP rebalancing from interrupt is the only thing that cares
- * here.
- */
- next->on_cpu = 1;
-#endif
-}
-
-static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
-{
-#ifdef CONFIG_SMP
- /*
- * After ->on_cpu is cleared, the task can be moved to a different CPU.
- * We must ensure this doesn't happen until the switch is completely
- * finished.
- */
- smp_wmb();
- prev->on_cpu = 0;
-#endif
-#ifdef CONFIG_DEBUG_SPINLOCK
- /* this is a valid case when another task releases the spinlock */
- rq->lock.owner = current;
-#endif
- /*
- * If we are tracking spinlock dependencies then we have to
- * fix up the runqueue lock - which gets 'carried over' from
- * prev into current:
- */
- spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-
- raw_spin_unlock_irq(&rq->lock);
-}
-
-#else /* __ARCH_WANT_UNLOCKED_CTXSW */
-static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
-{
-#ifdef CONFIG_SMP
- /*
- * We can optimise this out completely for !SMP, because the
- * SMP rebalancing from interrupt is the only thing that cares
- * here.
- */
- next->on_cpu = 1;
-#endif
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
- raw_spin_unlock_irq(&rq->lock);
-#else
- raw_spin_unlock(&rq->lock);
-#endif
-}
-
-static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
-{
-#ifdef CONFIG_SMP
- /*
- * After ->on_cpu is cleared, the task can be moved to a different CPU.
- * We must ensure this doesn't happen until the switch is completely
- * finished.
- */
- smp_wmb();
- prev->on_cpu = 0;
-#endif
-#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
- local_irq_enable();
-#endif
-}
-#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
-
-
-static inline void update_load_add(struct load_weight *lw, unsigned long inc)
-{
- lw->weight += inc;
- lw->inv_weight = 0;
-}
-
-static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
-{
- lw->weight -= dec;
- lw->inv_weight = 0;
-}
-
-static inline void update_load_set(struct load_weight *lw, unsigned long w)
-{
- lw->weight = w;
- lw->inv_weight = 0;
-}
-
-/*
- * To aid in avoiding the subversion of "niceness" due to uneven distribution
- * of tasks with abnormal "nice" values across CPUs the contribution that
- * each task makes to its run queue's load is weighted according to its
- * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
- * scaled version of the new time slice allocation that they receive on time
- * slice expiry etc.
- */
-
-#define WEIGHT_IDLEPRIO 3
-#define WMULT_IDLEPRIO 1431655765
-
-/*
- * Nice levels are multiplicative, with a gentle 10% change for every
- * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
- * nice 1, it will get ~10% less CPU time than another CPU-bound task
- * that remained on nice 0.
- *
- * The "10% effect" is relative and cumulative: from _any_ nice level,
- * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
- * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
- * If a task goes up by ~10% and another task goes down by ~10% then
- * the relative distance between them is ~25%.)
- */
-static const int prio_to_weight[40] = {
- /* -20 */ 88761, 71755, 56483, 46273, 36291,
- /* -15 */ 29154, 23254, 18705, 14949, 11916,
- /* -10 */ 9548, 7620, 6100, 4904, 3906,
- /* -5 */ 3121, 2501, 1991, 1586, 1277,
- /* 0 */ 1024, 820, 655, 526, 423,
- /* 5 */ 335, 272, 215, 172, 137,
- /* 10 */ 110, 87, 70, 56, 45,
- /* 15 */ 36, 29, 23, 18, 15,
-};
-
-/*
- * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated.
- *
- * In cases where the weight does not change often, we can use the
- * precalculated inverse to speed up arithmetics by turning divisions
- * into multiplications:
- */
-static const u32 prio_to_wmult[40] = {
- /* -20 */ 48388, 59856, 76040, 92818, 118348,
- /* -15 */ 147320, 184698, 229616, 287308, 360437,
- /* -10 */ 449829, 563644, 704093, 875809, 1099582,
- /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326,
- /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587,
- /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126,
- /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
- /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
-};
-
-/* Time spent by the tasks of the cpu accounting group executing in ... */
-enum cpuacct_stat_index {
- CPUACCT_STAT_USER, /* ... user mode */
- CPUACCT_STAT_SYSTEM, /* ... kernel mode */
-
- CPUACCT_STAT_NSTATS,
-};
-
-
-#define sched_class_highest (&stop_sched_class)
-#define for_each_class(class) \
- for (class = sched_class_highest; class; class = class->next)
-
-extern const struct sched_class stop_sched_class;
-extern const struct sched_class rt_sched_class;
-extern const struct sched_class fair_sched_class;
-extern const struct sched_class idle_sched_class;
-
-
-#ifdef CONFIG_SMP
-
-extern void trigger_load_balance(struct rq *rq, int cpu);
-extern void idle_balance(int this_cpu, struct rq *this_rq);
-
-#else /* CONFIG_SMP */
-
-static inline void idle_balance(int cpu, struct rq *rq)
-{
-}
-
-#endif
-
-extern void sysrq_sched_debug_show(void);
-extern void sched_init_granularity(void);
-extern void update_max_interval(void);
-extern void update_group_power(struct sched_domain *sd, int cpu);
-extern int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu);
-extern void init_sched_rt_class(void);
-extern void init_sched_fair_class(void);
-
-extern void resched_task(struct task_struct *p);
-extern void resched_cpu(int cpu);
-
-extern struct rt_bandwidth def_rt_bandwidth;
-extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
-
-extern void update_cpu_load(struct rq *this_rq);
-
-#ifdef CONFIG_CGROUP_CPUACCT
-#include
-/* track cpu usage of a group of tasks and its child groups */
-struct cpuacct {
- struct cgroup_subsys_state css;
- /* cpuusage holds pointer to a u64-type object on every cpu */
- u64 __percpu *cpuusage;
- struct kernel_cpustat __percpu *cpustat;
-};
-
-/* return cpu accounting group corresponding to this container */
-static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
-{
- return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id),
- struct cpuacct, css);
-}
-
-/* return cpu accounting group to which this task belongs */
-static inline struct cpuacct *task_ca(struct task_struct *tsk)
-{
- return container_of(task_subsys_state(tsk, cpuacct_subsys_id),
- struct cpuacct, css);
-}
-
-static inline struct cpuacct *parent_ca(struct cpuacct *ca)
-{
- if (!ca || !ca->css.cgroup->parent)
- return NULL;
- return cgroup_ca(ca->css.cgroup->parent);
-}
-
-extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
-#else
-static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
-#endif
-
-static inline void inc_nr_running(struct rq *rq)
-{
- rq->nr_running++;
-}
-
-static inline void dec_nr_running(struct rq *rq)
-{
- rq->nr_running--;
-}
-
-extern void update_rq_clock(struct rq *rq);
-
-extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
-extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
-
-extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
-
-extern const_debug unsigned int sysctl_sched_time_avg;
-extern const_debug unsigned int sysctl_sched_nr_migrate;
-extern const_debug unsigned int sysctl_sched_migration_cost;
-
-static inline u64 sched_avg_period(void)
-{
- return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
-}
-
-void calc_load_account_idle(struct rq *this_rq);
-
-#ifdef CONFIG_SCHED_HRTICK
-
-/*
- * Use hrtick when:
- * - enabled by features
- * - hrtimer is actually high res
- */
-static inline int hrtick_enabled(struct rq *rq)
-{
- if (!sched_feat(HRTICK))
- return 0;
- if (!cpu_active(cpu_of(rq)))
- return 0;
- return hrtimer_is_hres_active(&rq->hrtick_timer);
-}
-
-void hrtick_start(struct rq *rq, u64 delay);
-
-#else
-
-static inline int hrtick_enabled(struct rq *rq)
-{
- return 0;
-}
-
-#endif /* CONFIG_SCHED_HRTICK */
-
-#ifdef CONFIG_SMP
-extern void sched_avg_update(struct rq *rq);
-static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
-{
- rq->rt_avg += rt_delta;
- sched_avg_update(rq);
-}
-#else
-static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
-static inline void sched_avg_update(struct rq *rq) { }
-#endif
-
-extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
-
-#ifdef CONFIG_SMP
-#ifdef CONFIG_PREEMPT
-
-static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
-
-/*
- * fair double_lock_balance: Safely acquires both rq->locks in a fair
- * way at the expense of forcing extra atomic operations in all
- * invocations. This assures that the double_lock is acquired using the
- * same underlying policy as the spinlock_t on this architecture, which
- * reduces latency compared to the unfair variant below. However, it
- * also adds more overhead and therefore may reduce throughput.
- */
-static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
- __releases(this_rq->lock)
- __acquires(busiest->lock)
- __acquires(this_rq->lock)
-{
- raw_spin_unlock(&this_rq->lock);
- double_rq_lock(this_rq, busiest);
-
- return 1;
-}
-
-#else
-/*
- * Unfair double_lock_balance: Optimizes throughput at the expense of
- * latency by eliminating extra atomic operations when the locks are
- * already in proper order on entry. This favors lower cpu-ids and will
- * grant the double lock to lower cpus over higher ids under contention,
- * regardless of entry order into the function.
- */
-static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
- __releases(this_rq->lock)
- __acquires(busiest->lock)
- __acquires(this_rq->lock)
-{
- int ret = 0;
-
- if (unlikely(!raw_spin_trylock(&busiest->lock))) {
- if (busiest < this_rq) {
- raw_spin_unlock(&this_rq->lock);
- raw_spin_lock(&busiest->lock);
- raw_spin_lock_nested(&this_rq->lock,
- SINGLE_DEPTH_NESTING);
- ret = 1;
- } else
- raw_spin_lock_nested(&busiest->lock,
- SINGLE_DEPTH_NESTING);
- }
- return ret;
-}
-
-#endif /* CONFIG_PREEMPT */
-
-/*
- * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
- */
-static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
-{
- if (unlikely(!irqs_disabled())) {
- /* printk() doesn't work good under rq->lock */
- raw_spin_unlock(&this_rq->lock);
- BUG_ON(1);
- }
-
- return _double_lock_balance(this_rq, busiest);
-}
-
-static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
- __releases(busiest->lock)
-{
- raw_spin_unlock(&busiest->lock);
- lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
-}
-
-/*
- * double_rq_lock - safely lock two runqueues
- *
- * Note this does not disable interrupts like task_rq_lock,
- * you need to do so manually before calling.
- */
-static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
- __acquires(rq1->lock)
- __acquires(rq2->lock)
-{
- BUG_ON(!irqs_disabled());
- if (rq1 == rq2) {
- raw_spin_lock(&rq1->lock);
- __acquire(rq2->lock); /* Fake it out ;) */
- } else {
- if (rq1 < rq2) {
- raw_spin_lock(&rq1->lock);
- raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
- } else {
- raw_spin_lock(&rq2->lock);
- raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
- }
- }
-}
-
-/*
- * double_rq_unlock - safely unlock two runqueues
- *
- * Note this does not restore interrupts like task_rq_unlock,
- * you need to do so manually after calling.
- */
-static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
- __releases(rq1->lock)
- __releases(rq2->lock)
-{
- raw_spin_unlock(&rq1->lock);
- if (rq1 != rq2)
- raw_spin_unlock(&rq2->lock);
- else
- __release(rq2->lock);
-}
-
-#else /* CONFIG_SMP */
-
-/*
- * double_rq_lock - safely lock two runqueues
- *
- * Note this does not disable interrupts like task_rq_lock,
- * you need to do so manually before calling.
- */
-static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
- __acquires(rq1->lock)
- __acquires(rq2->lock)
-{
- BUG_ON(!irqs_disabled());
- BUG_ON(rq1 != rq2);
- raw_spin_lock(&rq1->lock);
- __acquire(rq2->lock); /* Fake it out ;) */
-}
-
-/*
- * double_rq_unlock - safely unlock two runqueues
- *
- * Note this does not restore interrupts like task_rq_unlock,
- * you need to do so manually after calling.
- */
-static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
- __releases(rq1->lock)
- __releases(rq2->lock)
-{
- BUG_ON(rq1 != rq2);
- raw_spin_unlock(&rq1->lock);
- __release(rq2->lock);
-}
-
-#endif
-
-extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
-extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
-extern void print_cfs_stats(struct seq_file *m, int cpu);
-extern void print_rt_stats(struct seq_file *m, int cpu);
-
-extern void init_cfs_rq(struct cfs_rq *cfs_rq);
-extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq);
-extern void unthrottle_offline_cfs_rqs(struct rq *rq);
-
-extern void account_cfs_bandwidth_used(int enabled, int was_enabled);
-
-#ifdef CONFIG_NO_HZ
-enum rq_nohz_flag_bits {
- NOHZ_TICK_STOPPED,
- NOHZ_BALANCE_KICK,
- NOHZ_IDLE,
-};
-
-#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
-#endif
diff --git a/trunk/kernel/sched/stats.c b/trunk/kernel/sched/stats.c
deleted file mode 100644
index 2a581ba8e190..000000000000
--- a/trunk/kernel/sched/stats.c
+++ /dev/null
@@ -1,111 +0,0 @@
-
-#include