diff --git a/[refs] b/[refs]
index d9866101c5d6..370831b02c09 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
---
-refs/heads/master: 37d9869ed928268409b48f52c57449918c0fd307
+refs/heads/master: 68c072388d2339af504c033a51886ea7c6b8d806
diff --git a/trunk/Documentation/00-INDEX b/trunk/Documentation/00-INDEX
index 73060819ed99..5b5aba404aac 100644
--- a/trunk/Documentation/00-INDEX
+++ b/trunk/Documentation/00-INDEX
@@ -251,6 +251,8 @@ mono.txt
- how to execute Mono-based .NET binaries with the help of BINFMT_MISC.
moxa-smartio
- file with info on installing/using Moxa multiport serial driver.
+mtrr.txt
+ - how to use PPro Memory Type Range Registers to increase performance.
mutex-design.txt
- info on the generic mutex subsystem.
namespaces/
diff --git a/trunk/Documentation/DMA-API.txt b/trunk/Documentation/DMA-API.txt
index b8e86460046e..d8b63d164e41 100644
--- a/trunk/Documentation/DMA-API.txt
+++ b/trunk/Documentation/DMA-API.txt
@@ -337,7 +337,7 @@ With scatterlists, you use the resulting mapping like this:
int i, count = dma_map_sg(dev, sglist, nents, direction);
struct scatterlist *sg;
- for_each_sg(sglist, sg, count, i) {
+ for (i = 0, sg = sglist; i < count; i++, sg++) {
hw_address[i] = sg_dma_address(sg);
hw_len[i] = sg_dma_len(sg);
}
diff --git a/trunk/Documentation/DocBook/kernel-api.tmpl b/trunk/Documentation/DocBook/kernel-api.tmpl
index 9d0058e788e5..b7b1482f6e04 100644
--- a/trunk/Documentation/DocBook/kernel-api.tmpl
+++ b/trunk/Documentation/DocBook/kernel-api.tmpl
@@ -283,7 +283,6 @@ X!Earch/x86/kernel/mca_32.c
Security Framework
!Isecurity/security.c
-!Esecurity/inode.c
@@ -365,10 +364,6 @@ X!Edrivers/pnp/system.c
!Eblock/blk-barrier.c
!Eblock/blk-tag.c
!Iblock/blk-tag.c
-!Eblock/blk-integrity.c
-!Iblock/blktrace.c
-!Iblock/genhd.c
-!Eblock/genhd.c
diff --git a/trunk/Documentation/RCU/checklist.txt b/trunk/Documentation/RCU/checklist.txt
index 6e253407b3dc..cf5562cbe356 100644
--- a/trunk/Documentation/RCU/checklist.txt
+++ b/trunk/Documentation/RCU/checklist.txt
@@ -210,7 +210,7 @@ over a rather long period of time, but improvements are always welcome!
number of updates per grace period.
9. All RCU list-traversal primitives, which include
- rcu_dereference(), list_for_each_entry_rcu(),
+ rcu_dereference(), list_for_each_rcu(), list_for_each_entry_rcu(),
list_for_each_continue_rcu(), and list_for_each_safe_rcu(),
must be either within an RCU read-side critical section or
must be protected by appropriate update-side locks. RCU
diff --git a/trunk/Documentation/RCU/rcuref.txt b/trunk/Documentation/RCU/rcuref.txt
index 4202ad093130..451de2ad8329 100644
--- a/trunk/Documentation/RCU/rcuref.txt
+++ b/trunk/Documentation/RCU/rcuref.txt
@@ -29,9 +29,9 @@ release_referenced() delete()
}
If this list/array is made lock free using RCU as in changing the
-write_lock() in add() and delete() to spin_lock() and changing read_lock()
-in search_and_reference() to rcu_read_lock(), the atomic_inc() in
-search_and_reference() could potentially hold reference to an element which
+write_lock() in add() and delete() to spin_lock and changing read_lock
+in search_and_reference to rcu_read_lock(), the atomic_get in
+search_and_reference could potentially hold reference to an element which
has already been deleted from the list/array. Use atomic_inc_not_zero()
in this scenario as follows:
@@ -40,20 +40,20 @@ add() search_and_reference()
{ {
alloc_object rcu_read_lock();
... search_for_element
- atomic_set(&el->rc, 1); if (!atomic_inc_not_zero(&el->rc)) {
- spin_lock(&list_lock); rcu_read_unlock();
+ atomic_set(&el->rc, 1); if (atomic_inc_not_zero(&el->rc)) {
+ write_lock(&list_lock); rcu_read_unlock();
return FAIL;
add_element }
... ...
- spin_unlock(&list_lock); rcu_read_unlock();
+ write_unlock(&list_lock); rcu_read_unlock();
} }
3. 4.
release_referenced() delete()
{ {
- ... spin_lock(&list_lock);
+ ... write_lock(&list_lock);
if (atomic_dec_and_test(&el->rc)) ...
call_rcu(&el->head, el_free); delete_element
- ... spin_unlock(&list_lock);
+ ... write_unlock(&list_lock);
} ...
if (atomic_dec_and_test(&el->rc))
call_rcu(&el->head, el_free);
diff --git a/trunk/Documentation/RCU/whatisRCU.txt b/trunk/Documentation/RCU/whatisRCU.txt
index 96170824a717..e04d643a9f57 100644
--- a/trunk/Documentation/RCU/whatisRCU.txt
+++ b/trunk/Documentation/RCU/whatisRCU.txt
@@ -786,6 +786,8 @@ RCU pointer/list traversal:
list_for_each_entry_rcu
hlist_for_each_entry_rcu
+ list_for_each_rcu (to be deprecated in favor of
+ list_for_each_entry_rcu)
list_for_each_continue_rcu (to be deprecated in favor of new
list_for_each_entry_continue_rcu)
diff --git a/trunk/Documentation/SELinux.txt b/trunk/Documentation/SELinux.txt
deleted file mode 100644
index 07eae00f3314..000000000000
--- a/trunk/Documentation/SELinux.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-If you want to use SELinux, chances are you will want
-to use the distro-provided policies, or install the
-latest reference policy release from
- http://oss.tresys.com/projects/refpolicy
-
-However, if you want to install a dummy policy for
-testing, you can do using 'mdp' provided under
-scripts/selinux. Note that this requires the selinux
-userspace to be installed - in particular you will
-need checkpolicy to compile a kernel, and setfiles and
-fixfiles to label the filesystem.
-
- 1. Compile the kernel with selinux enabled.
- 2. Type 'make' to compile mdp.
- 3. Make sure that you are not running with
- SELinux enabled and a real policy. If
- you are, reboot with selinux disabled
- before continuing.
- 4. Run install_policy.sh:
- cd scripts/selinux
- sh install_policy.sh
-
-Step 4 will create a new dummy policy valid for your
-kernel, with a single selinux user, role, and type.
-It will compile the policy, will set your SELINUXTYPE to
-dummy in /etc/selinux/config, install the compiled policy
-as 'dummy', and relabel your filesystem.
diff --git a/trunk/Documentation/block/deadline-iosched.txt b/trunk/Documentation/block/deadline-iosched.txt
index 72576769e0f4..c23cab13c3d1 100644
--- a/trunk/Documentation/block/deadline-iosched.txt
+++ b/trunk/Documentation/block/deadline-iosched.txt
@@ -30,18 +30,12 @@ write_expire (in ms)
Similar to read_expire mentioned above, but for writes.
-fifo_batch (number of requests)
+fifo_batch
----------
-Requests are grouped into ``batches'' of a particular data direction (read or
-write) which are serviced in increasing sector order. To limit extra seeking,
-deadline expiries are only checked between batches. fifo_batch controls the
-maximum number of requests per batch.
-
-This parameter tunes the balance between per-request latency and aggregate
-throughput. When low latency is the primary concern, smaller is better (where
-a value of 1 yields first-come first-served behaviour). Increasing fifo_batch
-generally improves throughput, at the cost of latency variation.
+When a read request expires its deadline, we must move some requests from
+the sorted io scheduler list to the block device dispatch queue. fifo_batch
+controls how many requests we move.
writes_starved (number of dispatches)
diff --git a/trunk/Documentation/cdrom/ide-cd b/trunk/Documentation/cdrom/ide-cd
index 2c558cd6c1ef..91c0dcc6fa5c 100644
--- a/trunk/Documentation/cdrom/ide-cd
+++ b/trunk/Documentation/cdrom/ide-cd
@@ -145,7 +145,8 @@ useful for reading photocds.
To play an audio CD, you should first unmount and remove any data
CDROM. Any of the CDROM player programs should then work (workman,
-workbone, cdplayer, etc.).
+workbone, cdplayer, etc.). Lacking anything else, you could use the
+cdtester program in Documentation/cdrom/sbpcd.
On a few drives, you can read digital audio directly using a program
such as cdda2wav. The only types of drive which I've heard support
diff --git a/trunk/Documentation/kernel-doc-nano-HOWTO.txt b/trunk/Documentation/kernel-doc-nano-HOWTO.txt
index c6841eee9598..0bd32748a467 100644
--- a/trunk/Documentation/kernel-doc-nano-HOWTO.txt
+++ b/trunk/Documentation/kernel-doc-nano-HOWTO.txt
@@ -168,10 +168,10 @@ if ($#ARGV < 0) {
mkdir $ARGV[0],0777;
$state = 0;
while () {
- if (/^\.TH \"[^\"]*\" 9 \"([^\"]*)\"/) {
+ if (/^\.TH \"[^\"]*\" 4 \"([^\"]*)\"/) {
if ($state == 1) { close OUT }
$state = 1;
- $fn = "$ARGV[0]/$1.9";
+ $fn = "$ARGV[0]/$1.4";
print STDERR "Creating $fn\n";
open OUT, ">$fn" or die "can't open $fn: $!\n";
print OUT $_;
diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt
index 329dcabe4c5e..1150444a21ab 100644
--- a/trunk/Documentation/kernel-parameters.txt
+++ b/trunk/Documentation/kernel-parameters.txt
@@ -463,6 +463,12 @@ and is between 256 and 4096 characters. It is defined in the file
Range: 0 - 8192
Default: 64
+ disable_8254_timer
+ enable_8254_timer
+ [IA32/X86_64] Disable/Enable interrupt 0 timer routing
+ over the 8254 in addition to over the IO-APIC. The
+ kernel tries to set a sensible default.
+
hpet= [X86-32,HPET] option to control HPET usage
Format: { enable (default) | disable | force }
disable: disable HPET and use PIT instead
@@ -1876,12 +1882,6 @@ and is between 256 and 4096 characters. It is defined in the file
shapers= [NET]
Maximal number of shapers.
- show_msr= [x86] show boot-time MSR settings
- Format: { }
- Show boot-time (BIOS-initialized) MSR settings.
- The parameter means the number of CPUs to show,
- for example 1 means boot CPU only.
-
sim710= [SCSI,HW]
See header of drivers/scsi/sim710.c.
diff --git a/trunk/Documentation/x86/mtrr.txt b/trunk/Documentation/mtrr.txt
similarity index 99%
rename from trunk/Documentation/x86/mtrr.txt
rename to trunk/Documentation/mtrr.txt
index cc071dc333c2..c39ac395970e 100644
--- a/trunk/Documentation/x86/mtrr.txt
+++ b/trunk/Documentation/mtrr.txt
@@ -18,7 +18,7 @@ Richard Gooch
The AMD K6-2 (stepping 8 and above) and K6-3 processors have two
MTRRs. These are supported. The AMD Athlon family provide 8 Intel
style MTRRs.
-
+
The Centaur C6 (WinChip) has 8 MCRs, allowing write-combining. These
are supported.
@@ -87,7 +87,7 @@ reg00: base=0x00000000 ( 0MB), size= 64MB: write-back, count=1
reg01: base=0xfb000000 (4016MB), size= 16MB: write-combining, count=1
reg02: base=0xfb000000 (4016MB), size= 4kB: uncachable, count=1
-Some cards (especially Voodoo Graphics boards) need this 4 kB area
+Some cards (especially Voodoo Graphics boards) need this 4 kB area
excluded from the beginning of the region because it is used for
registers.
diff --git a/trunk/Documentation/s390/CommonIO b/trunk/Documentation/s390/CommonIO
index 339207d11d95..bf0baa19ec24 100644
--- a/trunk/Documentation/s390/CommonIO
+++ b/trunk/Documentation/s390/CommonIO
@@ -70,19 +70,13 @@ Command line parameters
Note: While already known devices can be added to the list of devices to be
ignored, there will be no effect on then. However, if such a device
- disappears and then reappears, it will then be ignored. To make
- known devices go away, you need the "purge" command (see below).
+ disappears and then reappears, it will then be ignored.
For example,
"echo add 0.0.a000-0.0.accc, 0.0.af00-0.0.afff > /proc/cio_ignore"
will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored
devices.
- You can remove already known but now ignored devices via
- "echo purge > /proc/cio_ignore"
- All devices ignored but still registered and not online (= not in use)
- will be deregistered and thus removed from the system.
-
The devices can be specified either by bus id (0.x.abcd) or, for 2.4 backward
compatibility, by the device number in hexadecimal (0xabcd or abcd). Device
numbers given as 0xabcd will be interpreted as 0.0.abcd.
@@ -104,7 +98,8 @@ debugfs entries
handling).
- /sys/kernel/debug/s390dbf/cio_msg/sprintf
- Various debug messages from the common I/O-layer.
+ Various debug messages from the common I/O-layer, including messages
+ printed when cio_msg=yes.
- /sys/kernel/debug/s390dbf/cio_trace/hex_ascii
Logs the calling of functions in the common I/O-layer and, if applicable,
diff --git a/trunk/Documentation/scheduler/sched-design-CFS.txt b/trunk/Documentation/scheduler/sched-design-CFS.txt
index 9d8eb553884c..88bcb8767335 100644
--- a/trunk/Documentation/scheduler/sched-design-CFS.txt
+++ b/trunk/Documentation/scheduler/sched-design-CFS.txt
@@ -1,242 +1,151 @@
- =============
- CFS Scheduler
- =============
-
-1. OVERVIEW
-
-CFS stands for "Completely Fair Scheduler," and is the new "desktop" process
-scheduler implemented by Ingo Molnar and merged in Linux 2.6.23. It is the
-replacement for the previous vanilla scheduler's SCHED_OTHER interactivity
-code.
-
-80% of CFS's design can be summed up in a single sentence: CFS basically models
-an "ideal, precise multi-tasking CPU" on real hardware.
-
-"Ideal multi-tasking CPU" is a (non-existent :-)) CPU that has 100% physical
-power and which can run each task at precise equal speed, in parallel, each at
-1/nr_running speed. For example: if there are 2 tasks running, then it runs
-each at 50% physical power --- i.e., actually in parallel.
-
-On real hardware, we can run only a single task at once, so we have to
-introduce the concept of "virtual runtime." The virtual runtime of a task
-specifies when its next timeslice would start execution on the ideal
-multi-tasking CPU described above. In practice, the virtual runtime of a task
-is its actual runtime normalized to the total number of running tasks.
-
-
-
-2. FEW IMPLEMENTATION DETAILS
-
-In CFS the virtual runtime is expressed and tracked via the per-task
-p->se.vruntime (nanosec-unit) value. This way, it's possible to accurately
-timestamp and measure the "expected CPU time" a task should have gotten.
-
-[ small detail: on "ideal" hardware, at any time all tasks would have the same
- p->se.vruntime value --- i.e., tasks would execute simultaneously and no task
- would ever get "out of balance" from the "ideal" share of CPU time. ]
-
-CFS's task picking logic is based on this p->se.vruntime value and it is thus
-very simple: it always tries to run the task with the smallest p->se.vruntime
-value (i.e., the task which executed least so far). CFS always tries to split
-up CPU time between runnable tasks as close to "ideal multitasking hardware" as
-possible.
-
-Most of the rest of CFS's design just falls out of this really simple concept,
-with a few add-on embellishments like nice levels, multiprocessing and various
-algorithm variants to recognize sleepers.
-
-
-
-3. THE RBTREE
-
-CFS's design is quite radical: it does not use the old data structures for the
-runqueues, but it uses a time-ordered rbtree to build a "timeline" of future
-task execution, and thus has no "array switch" artifacts (by which both the
-previous vanilla scheduler and RSDL/SD are affected).
-
-CFS also maintains the rq->cfs.min_vruntime value, which is a monotonic
-increasing value tracking the smallest vruntime among all tasks in the
-runqueue. The total amount of work done by the system is tracked using
-min_vruntime; that value is used to place newly activated entities on the left
-side of the tree as much as possible.
-
-The total number of running tasks in the runqueue is accounted through the
-rq->cfs.load value, which is the sum of the weights of the tasks queued on the
-runqueue.
-
-CFS maintains a time-ordered rbtree, where all runnable tasks are sorted by the
-p->se.vruntime key (there is a subtraction using rq->cfs.min_vruntime to
-account for possible wraparounds). CFS picks the "leftmost" task from this
-tree and sticks to it.
-As the system progresses forwards, the executed tasks are put into the tree
-more and more to the right --- slowly but surely giving a chance for every task
-to become the "leftmost task" and thus get on the CPU within a deterministic
-amount of time.
-
-Summing up, CFS works like this: it runs a task a bit, and when the task
-schedules (or a scheduler tick happens) the task's CPU usage is "accounted
-for": the (small) time it just spent using the physical CPU is added to
-p->se.vruntime. Once p->se.vruntime gets high enough so that another task
-becomes the "leftmost task" of the time-ordered rbtree it maintains (plus a
-small amount of "granularity" distance relative to the leftmost task so that we
-do not over-schedule tasks and trash the cache), then the new leftmost task is
-picked and the current task is preempted.
-
-
-
-4. SOME FEATURES OF CFS
-
-CFS uses nanosecond granularity accounting and does not rely on any jiffies or
-other HZ detail. Thus the CFS scheduler has no notion of "timeslices" in the
-way the previous scheduler had, and has no heuristics whatsoever. There is
-only one central tunable (you have to switch on CONFIG_SCHED_DEBUG):
-
- /proc/sys/kernel/sched_granularity_ns
-
-which can be used to tune the scheduler from "desktop" (i.e., low latencies) to
-"server" (i.e., good batching) workloads. It defaults to a setting suitable
-for desktop workloads. SCHED_BATCH is handled by the CFS scheduler module too.
-
-Due to its design, the CFS scheduler is not prone to any of the "attacks" that
-exist today against the heuristics of the stock scheduler: fiftyp.c, thud.c,
-chew.c, ring-test.c, massive_intr.c all work fine and do not impact
-interactivity and produce the expected behavior.
-
-The CFS scheduler has a much stronger handling of nice levels and SCHED_BATCH
-than the previous vanilla scheduler: both types of workloads are isolated much
-more aggressively.
-
-SMP load-balancing has been reworked/sanitized: the runqueue-walking
-assumptions are gone from the load-balancing code now, and iterators of the
-scheduling modules are used. The balancing code got quite a bit simpler as a
-result.
-
-
-
-5. Scheduling policies
-
-CFS implements three scheduling policies:
-
- - SCHED_NORMAL (traditionally called SCHED_OTHER): The scheduling
- policy that is used for regular tasks.
-
- - SCHED_BATCH: Does not preempt nearly as often as regular tasks
- would, thereby allowing tasks to run longer and make better use of
- caches but at the cost of interactivity. This is well suited for
- batch jobs.
-
- - SCHED_IDLE: This is even weaker than nice 19, but its not a true
- idle timer scheduler in order to avoid to get into priority
- inversion problems which would deadlock the machine.
-
-SCHED_FIFO/_RR are implemented in sched_rt.c and are as specified by
-POSIX.
-
-The command chrt from util-linux-ng 2.13.1.1 can set all of these except
-SCHED_IDLE.
-
-
-
-6. SCHEDULING CLASSES
-
-The new CFS scheduler has been designed in such a way to introduce "Scheduling
-Classes," an extensible hierarchy of scheduler modules. These modules
-encapsulate scheduling policy details and are handled by the scheduler core
-without the core code assuming too much about them.
-
-sched_fair.c implements the CFS scheduler described above.
-
-sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler way than
-the previous vanilla scheduler did. It uses 100 runqueues (for all 100 RT
-priority levels, instead of 140 in the previous scheduler) and it needs no
-expired array.
-
-Scheduling classes are implemented through the sched_class structure, which
-contains hooks to functions that must be called whenever an interesting event
-occurs.
-
-This is the (partial) list of the hooks:
-
- - enqueue_task(...)
-
- Called when a task enters a runnable state.
- It puts the scheduling entity (task) into the red-black tree and
- increments the nr_running variable.
-
- - dequeue_tree(...)
-
- When a task is no longer runnable, this function is called to keep the
- corresponding scheduling entity out of the red-black tree. It decrements
- the nr_running variable.
-
- - yield_task(...)
-
- This function is basically just a dequeue followed by an enqueue, unless the
- compat_yield sysctl is turned on; in that case, it places the scheduling
- entity at the right-most end of the red-black tree.
-
- - check_preempt_curr(...)
-
- This function checks if a task that entered the runnable state should
- preempt the currently running task.
-
- - pick_next_task(...)
-
- This function chooses the most appropriate task eligible to run next.
-
- - set_curr_task(...)
-
- This function is called when a task changes its scheduling class or changes
- its task group.
-
- - task_tick(...)
-
- This function is mostly called from time tick functions; it might lead to
- process switch. This drives the running preemption.
-
- - task_new(...)
-
- The core scheduler gives the scheduling module an opportunity to manage new
- task startup. The CFS scheduling module uses it for group scheduling, while
- the scheduling module for a real-time task does not use it.
-
-
-
-7. GROUP SCHEDULER EXTENSIONS TO CFS
-
-Normally, the scheduler operates on individual tasks and strives to provide
-fair CPU time to each task. Sometimes, it may be desirable to group tasks and
-provide fair CPU time to each such task group. For example, it may be
-desirable to first provide fair CPU time to each user on the system and then to
-each task belonging to a user.
-
-CONFIG_GROUP_SCHED strives to achieve exactly that. It lets tasks to be
-grouped and divides CPU time fairly among such groups.
-
-CONFIG_RT_GROUP_SCHED permits to group real-time (i.e., SCHED_FIFO and
-SCHED_RR) tasks.
-
-CONFIG_FAIR_GROUP_SCHED permits to group CFS (i.e., SCHED_NORMAL and
-SCHED_BATCH) tasks.
-
-At present, there are two (mutually exclusive) mechanisms to group tasks for
-CPU bandwidth control purposes:
-
- - Based on user id (CONFIG_USER_SCHED)
-
- With this option, tasks are grouped according to their user id.
-
- - Based on "cgroup" pseudo filesystem (CONFIG_CGROUP_SCHED)
-
- This options needs CONFIG_CGROUPS to be defined, and lets the administrator
- create arbitrary groups of tasks, using the "cgroup" pseudo filesystem. See
- Documentation/cgroups.txt for more information about this filesystem.
+This is the CFS scheduler.
+
+80% of CFS's design can be summed up in a single sentence: CFS basically
+models an "ideal, precise multi-tasking CPU" on real hardware.
+
+"Ideal multi-tasking CPU" is a (non-existent :-)) CPU that has 100%
+physical power and which can run each task at precise equal speed, in
+parallel, each at 1/nr_running speed. For example: if there are 2 tasks
+running then it runs each at 50% physical power - totally in parallel.
+
+On real hardware, we can run only a single task at once, so while that
+one task runs, the other tasks that are waiting for the CPU are at a
+disadvantage - the current task gets an unfair amount of CPU time. In
+CFS this fairness imbalance is expressed and tracked via the per-task
+p->wait_runtime (nanosec-unit) value. "wait_runtime" is the amount of
+time the task should now run on the CPU for it to become completely fair
+and balanced.
+
+( small detail: on 'ideal' hardware, the p->wait_runtime value would
+ always be zero - no task would ever get 'out of balance' from the
+ 'ideal' share of CPU time. )
+
+CFS's task picking logic is based on this p->wait_runtime value and it
+is thus very simple: it always tries to run the task with the largest
+p->wait_runtime value. In other words, CFS tries to run the task with
+the 'gravest need' for more CPU time. So CFS always tries to split up
+CPU time between runnable tasks as close to 'ideal multitasking
+hardware' as possible.
+
+Most of the rest of CFS's design just falls out of this really simple
+concept, with a few add-on embellishments like nice levels,
+multiprocessing and various algorithm variants to recognize sleepers.
+
+In practice it works like this: the system runs a task a bit, and when
+the task schedules (or a scheduler tick happens) the task's CPU usage is
+'accounted for': the (small) time it just spent using the physical CPU
+is deducted from p->wait_runtime. [minus the 'fair share' it would have
+gotten anyway]. Once p->wait_runtime gets low enough so that another
+task becomes the 'leftmost task' of the time-ordered rbtree it maintains
+(plus a small amount of 'granularity' distance relative to the leftmost
+task so that we do not over-schedule tasks and trash the cache) then the
+new leftmost task is picked and the current task is preempted.
+
+The rq->fair_clock value tracks the 'CPU time a runnable task would have
+fairly gotten, had it been runnable during that time'. So by using
+rq->fair_clock values we can accurately timestamp and measure the
+'expected CPU time' a task should have gotten. All runnable tasks are
+sorted in the rbtree by the "rq->fair_clock - p->wait_runtime" key, and
+CFS picks the 'leftmost' task and sticks to it. As the system progresses
+forwards, newly woken tasks are put into the tree more and more to the
+right - slowly but surely giving a chance for every task to become the
+'leftmost task' and thus get on the CPU within a deterministic amount of
+time.
+
+Some implementation details:
+
+ - the introduction of Scheduling Classes: an extensible hierarchy of
+ scheduler modules. These modules encapsulate scheduling policy
+ details and are handled by the scheduler core without the core
+ code assuming about them too much.
+
+ - sched_fair.c implements the 'CFS desktop scheduler': it is a
+ replacement for the vanilla scheduler's SCHED_OTHER interactivity
+ code.
+
+ I'd like to give credit to Con Kolivas for the general approach here:
+ he has proven via RSDL/SD that 'fair scheduling' is possible and that
+ it results in better desktop scheduling. Kudos Con!
+
+ The CFS patch uses a completely different approach and implementation
+ from RSDL/SD. My goal was to make CFS's interactivity quality exceed
+ that of RSDL/SD, which is a high standard to meet :-) Testing
+ feedback is welcome to decide this one way or another. [ and, in any
+ case, all of SD's logic could be added via a kernel/sched_sd.c module
+ as well, if Con is interested in such an approach. ]
+
+ CFS's design is quite radical: it does not use runqueues, it uses a
+ time-ordered rbtree to build a 'timeline' of future task execution,
+ and thus has no 'array switch' artifacts (by which both the vanilla
+ scheduler and RSDL/SD are affected).
+
+ CFS uses nanosecond granularity accounting and does not rely on any
+ jiffies or other HZ detail. Thus the CFS scheduler has no notion of
+ 'timeslices' and has no heuristics whatsoever. There is only one
+ central tunable (you have to switch on CONFIG_SCHED_DEBUG):
+
+ /proc/sys/kernel/sched_granularity_ns
+
+ which can be used to tune the scheduler from 'desktop' (low
+ latencies) to 'server' (good batching) workloads. It defaults to a
+ setting suitable for desktop workloads. SCHED_BATCH is handled by the
+ CFS scheduler module too.
+
+ Due to its design, the CFS scheduler is not prone to any of the
+ 'attacks' that exist today against the heuristics of the stock
+ scheduler: fiftyp.c, thud.c, chew.c, ring-test.c, massive_intr.c all
+ work fine and do not impact interactivity and produce the expected
+ behavior.
+
+ the CFS scheduler has a much stronger handling of nice levels and
+ SCHED_BATCH: both types of workloads should be isolated much more
+ agressively than under the vanilla scheduler.
+
+ ( another detail: due to nanosec accounting and timeline sorting,
+ sched_yield() support is very simple under CFS, and in fact under
+ CFS sched_yield() behaves much better than under any other
+ scheduler i have tested so far. )
+
+ - sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler
+ way than the vanilla scheduler does. It uses 100 runqueues (for all
+ 100 RT priority levels, instead of 140 in the vanilla scheduler)
+ and it needs no expired array.
+
+ - reworked/sanitized SMP load-balancing: the runqueue-walking
+ assumptions are gone from the load-balancing code now, and
+ iterators of the scheduling modules are used. The balancing code got
+ quite a bit simpler as a result.
+
+
+Group scheduler extension to CFS
+================================
+
+Normally the scheduler operates on individual tasks and strives to provide
+fair CPU time to each task. Sometimes, it may be desirable to group tasks
+and provide fair CPU time to each such task group. For example, it may
+be desirable to first provide fair CPU time to each user on the system
+and then to each task belonging to a user.
+
+CONFIG_FAIR_GROUP_SCHED strives to achieve exactly that. It lets
+SCHED_NORMAL/BATCH tasks be be grouped and divides CPU time fairly among such
+groups. At present, there are two (mutually exclusive) mechanisms to group
+tasks for CPU bandwidth control purpose:
+
+ - Based on user id (CONFIG_FAIR_USER_SCHED)
+ In this option, tasks are grouped according to their user id.
+ - Based on "cgroup" pseudo filesystem (CONFIG_FAIR_CGROUP_SCHED)
+ This options lets the administrator create arbitrary groups
+ of tasks, using the "cgroup" pseudo filesystem. See
+ Documentation/cgroups.txt for more information about this
+ filesystem.
Only one of these options to group tasks can be chosen and not both.
-When CONFIG_USER_SCHED is defined, a directory is created in sysfs for each new
-user and a "cpu_share" file is added in that directory.
+Group scheduler tunables:
+
+When CONFIG_FAIR_USER_SCHED is defined, a directory is created in sysfs for
+each new user and a "cpu_share" file is added in that directory.
# cd /sys/kernel/uids
# cat 512/cpu_share # Display user 512's CPU share
@@ -246,14 +155,16 @@ user and a "cpu_share" file is added in that directory.
2048
#
-CPU bandwidth between two users is divided in the ratio of their CPU shares.
-For example: if you would like user "root" to get twice the bandwidth of user
-"guest," then set the cpu_share for both the users such that "root"'s cpu_share
-is twice "guest"'s cpu_share.
+CPU bandwidth between two users are divided in the ratio of their CPU shares.
+For ex: if you would like user "root" to get twice the bandwidth of user
+"guest", then set the cpu_share for both the users such that "root"'s
+cpu_share is twice "guest"'s cpu_share
+
-When CONFIG_CGROUP_SCHED is defined, a "cpu.shares" file is created for each
-group created using the pseudo filesystem. See example steps below to create
-task groups and modify their CPU share using the "cgroups" pseudo filesystem.
+When CONFIG_FAIR_CGROUP_SCHED is defined, a "cpu.shares" file is created
+for each group created using the pseudo filesystem. See example steps
+below to create task groups and modify their CPU share using the "cgroups"
+pseudo filesystem
# mkdir /dev/cpuctl
# mount -t cgroup -ocpu none /dev/cpuctl
diff --git a/trunk/Documentation/scsi/scsi_fc_transport.txt b/trunk/Documentation/scsi/scsi_fc_transport.txt
index 38d324d62b25..75143f0c23b6 100644
--- a/trunk/Documentation/scsi/scsi_fc_transport.txt
+++ b/trunk/Documentation/scsi/scsi_fc_transport.txt
@@ -436,42 +436,6 @@ Other:
was updated to remove all vports for the fc_host as well.
-Transport supplied functions
-----------------------------
-
-The following functions are supplied by the FC-transport for use by LLDs.
-
- fc_vport_create - create a vport
- fc_vport_terminate - detach and remove a vport
-
-Details:
-
-/**
- * fc_vport_create - Admin App or LLDD requests creation of a vport
- * @shost: scsi host the virtual port is connected to.
- * @ids: The world wide names, FC4 port roles, etc for
- * the virtual port.
- *
- * Notes:
- * This routine assumes no locks are held on entry.
- */
-struct fc_vport *
-fc_vport_create(struct Scsi_Host *shost, struct fc_vport_identifiers *ids)
-
-/**
- * fc_vport_terminate - Admin App or LLDD requests termination of a vport
- * @vport: fc_vport to be terminated
- *
- * Calls the LLDD vport_delete() function, then deallocates and removes
- * the vport from the shost and object tree.
- *
- * Notes:
- * This routine assumes no locks are held on entry.
- */
-int
-fc_vport_terminate(struct fc_vport *vport)
-
-
Credits
=======
The following people have contributed to this document:
diff --git a/trunk/Documentation/x86/00-INDEX b/trunk/Documentation/x86/00-INDEX
deleted file mode 100644
index dbe3377754af..000000000000
--- a/trunk/Documentation/x86/00-INDEX
+++ /dev/null
@@ -1,4 +0,0 @@
-00-INDEX
- - this file
-mtrr.txt
- - how to use x86 Memory Type Range Registers to increase performance
diff --git a/trunk/Documentation/x86/boot.txt b/trunk/Documentation/x86/i386/boot.txt
similarity index 99%
rename from trunk/Documentation/x86/boot.txt
rename to trunk/Documentation/x86/i386/boot.txt
index 83c0033ee9e0..147bfe511cdd 100644
--- a/trunk/Documentation/x86/boot.txt
+++ b/trunk/Documentation/x86/i386/boot.txt
@@ -308,7 +308,7 @@ Protocol: 2.00+
Field name: start_sys
Type: read
-Offset/size: 0x20c/2
+Offset/size: 0x20c/4
Protocol: 2.00+
The load low segment (0x1000). Obsolete.
diff --git a/trunk/Documentation/x86/usb-legacy-support.txt b/trunk/Documentation/x86/i386/usb-legacy-support.txt
similarity index 100%
rename from trunk/Documentation/x86/usb-legacy-support.txt
rename to trunk/Documentation/x86/i386/usb-legacy-support.txt
diff --git a/trunk/Documentation/x86/zero-page.txt b/trunk/Documentation/x86/i386/zero-page.txt
similarity index 100%
rename from trunk/Documentation/x86/zero-page.txt
rename to trunk/Documentation/x86/i386/zero-page.txt
diff --git a/trunk/Documentation/x86/pat.txt b/trunk/Documentation/x86/pat.txt
index c93ff5f4c0dd..17965f927c15 100644
--- a/trunk/Documentation/x86/pat.txt
+++ b/trunk/Documentation/x86/pat.txt
@@ -14,10 +14,6 @@ PAT allows for different types of memory attributes. The most commonly used
ones that will be supported at this time are Write-back, Uncached,
Write-combined and Uncached Minus.
-
-PAT APIs
---------
-
There are many different APIs in the kernel that allows setting of memory
attributes at the page level. In order to avoid aliasing, these interfaces
should be used thoughtfully. Below is a table of interfaces available,
@@ -30,38 +26,38 @@ address range to avoid any aliasing.
API | RAM | ACPI,... | Reserved/Holes |
-----------------------|----------|------------|------------------|
| | | |
-ioremap | -- | UC- | UC- |
+ioremap | -- | UC | UC |
| | | |
ioremap_cache | -- | WB | WB |
| | | |
-ioremap_nocache | -- | UC- | UC- |
+ioremap_nocache | -- | UC | UC |
| | | |
ioremap_wc | -- | -- | WC |
| | | |
-set_memory_uc | UC- | -- | -- |
+set_memory_uc | UC | -- | -- |
set_memory_wb | | | |
| | | |
set_memory_wc | WC | -- | -- |
set_memory_wb | | | |
| | | |
-pci sysfs resource | -- | -- | UC- |
+pci sysfs resource | -- | -- | UC |
| | | |
pci sysfs resource_wc | -- | -- | WC |
is IORESOURCE_PREFETCH| | | |
| | | |
-pci proc | -- | -- | UC- |
+pci proc | -- | -- | UC |
!PCIIOC_WRITE_COMBINE | | | |
| | | |
pci proc | -- | -- | WC |
PCIIOC_WRITE_COMBINE | | | |
| | | |
-/dev/mem | -- | WB/WC/UC- | WB/WC/UC- |
+/dev/mem | -- | UC | UC |
read-write | | | |
| | | |
-/dev/mem | -- | UC- | UC- |
+/dev/mem | -- | UC | UC |
mmap SYNC flag | | | |
| | | |
-/dev/mem | -- | WB/WC/UC- | WB/WC/UC- |
+/dev/mem | -- | WB/WC/UC | WB/WC/UC |
mmap !SYNC flag | |(from exist-| (from exist- |
and | | ing alias)| ing alias) |
any alias to this area| | | |
@@ -72,7 +68,7 @@ pci proc | -- | -- | WC |
and | | | |
MTRR says WB | | | |
| | | |
-/dev/mem | -- | -- | UC- |
+/dev/mem | -- | -- | UC_MINUS |
mmap !SYNC flag | | | |
no alias to this area | | | |
and | | | |
@@ -102,35 +98,3 @@ types.
Drivers should use set_memory_[uc|wc] to set access type for RAM ranges.
-
-PAT debugging
--------------
-
-With CONFIG_DEBUG_FS enabled, PAT memtype list can be examined by
-
-# mount -t debugfs debugfs /sys/kernel/debug
-# cat /sys/kernel/debug/x86/pat_memtype_list
-PAT memtype list:
-uncached-minus @ 0x7fadf000-0x7fae0000
-uncached-minus @ 0x7fb19000-0x7fb1a000
-uncached-minus @ 0x7fb1a000-0x7fb1b000
-uncached-minus @ 0x7fb1b000-0x7fb1c000
-uncached-minus @ 0x7fb1c000-0x7fb1d000
-uncached-minus @ 0x7fb1d000-0x7fb1e000
-uncached-minus @ 0x7fb1e000-0x7fb25000
-uncached-minus @ 0x7fb25000-0x7fb26000
-uncached-minus @ 0x7fb26000-0x7fb27000
-uncached-minus @ 0x7fb27000-0x7fb28000
-uncached-minus @ 0x7fb28000-0x7fb2e000
-uncached-minus @ 0x7fb2e000-0x7fb2f000
-uncached-minus @ 0x7fb2f000-0x7fb30000
-uncached-minus @ 0x7fb31000-0x7fb32000
-uncached-minus @ 0x80000000-0x90000000
-
-This list shows physical address ranges and various PAT settings used to
-access those physical address ranges.
-
-Another, more verbose way of getting PAT related debug messages is with
-"debugpat" boot parameter. With this parameter, various debug messages are
-printed to dmesg log.
-
diff --git a/trunk/Documentation/x86/x86_64/boot-options.txt b/trunk/Documentation/x86/x86_64/boot-options.txt
index 72ffb5373ec7..b0c7b6c4abda 100644
--- a/trunk/Documentation/x86/x86_64/boot-options.txt
+++ b/trunk/Documentation/x86/x86_64/boot-options.txt
@@ -54,6 +54,10 @@ APICs
apicmaintimer. Useful when your PIT timer is totally
broken.
+ disable_8254_timer / enable_8254_timer
+ Enable interrupt 0 timer routing over the 8254 in addition to over
+ the IO-APIC. The kernel tries to set a sensible default.
+
Early Console
syntax: earlyprintk=vga
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
index 7a03bd5a91a3..8dae4555f10e 100644
--- a/trunk/MAINTAINERS
+++ b/trunk/MAINTAINERS
@@ -3649,9 +3649,8 @@ M: jmorris@namei.org
P: Eric Paris
M: eparis@parisplace.org
L: linux-kernel@vger.kernel.org (kernel issues)
-L: selinux@tycho.nsa.gov (subscribers-only, general discussion)
-W: http://selinuxproject.org
-T: git kernel.org:pub/scm/linux/kernel/git/jmorris/security-testing-2.6.git
+L: selinux@tycho.nsa.gov (subscribers-only, general discussion)
+W: http://www.nsa.gov/selinux
S: Supported
SENSABLE PHANTOM
diff --git a/trunk/arch/alpha/kernel/smp.c b/trunk/arch/alpha/kernel/smp.c
index 06b6fdab639f..83df541650fc 100644
--- a/trunk/arch/alpha/kernel/smp.c
+++ b/trunk/arch/alpha/kernel/smp.c
@@ -149,9 +149,6 @@ smp_callin(void)
atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm;
- /* inform the notifiers about the new cpu */
- notify_cpu_starting(cpuid);
-
/* Must have completely accurate bogos. */
local_irq_enable();
diff --git a/trunk/arch/arm/kernel/smp.c b/trunk/arch/arm/kernel/smp.c
index e42a749a56dd..e9842f6767f9 100644
--- a/trunk/arch/arm/kernel/smp.c
+++ b/trunk/arch/arm/kernel/smp.c
@@ -277,7 +277,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
/*
* Enable local interrupts.
*/
- notify_cpu_starting(cpu);
local_irq_enable();
local_fiq_enable();
diff --git a/trunk/arch/cris/arch-v32/kernel/smp.c b/trunk/arch/cris/arch-v32/kernel/smp.c
index 52e16c6436f9..952a24b2f5a9 100644
--- a/trunk/arch/cris/arch-v32/kernel/smp.c
+++ b/trunk/arch/cris/arch-v32/kernel/smp.c
@@ -178,7 +178,6 @@ void __init smp_callin(void)
unmask_irq(IPI_INTR_VECT);
unmask_irq(TIMER0_INTR_VECT);
preempt_disable();
- notify_cpu_starting(cpu);
local_irq_enable();
cpu_set(cpu, cpu_online_map);
diff --git a/trunk/arch/ia64/kernel/smpboot.c b/trunk/arch/ia64/kernel/smpboot.c
index 1dcbb85fc4ee..d8f05e504fbf 100644
--- a/trunk/arch/ia64/kernel/smpboot.c
+++ b/trunk/arch/ia64/kernel/smpboot.c
@@ -401,7 +401,6 @@ smp_callin (void)
spin_lock(&vector_lock);
/* Setup the per cpu irq handling data structures */
__setup_vector_irq(cpuid);
- notify_cpu_starting(cpuid);
cpu_set(cpuid, cpu_online_map);
per_cpu(cpu_state, cpuid) = CPU_ONLINE;
spin_unlock(&vector_lock);
diff --git a/trunk/arch/m32r/kernel/smpboot.c b/trunk/arch/m32r/kernel/smpboot.c
index fc2994811f15..2c03ac1d005f 100644
--- a/trunk/arch/m32r/kernel/smpboot.c
+++ b/trunk/arch/m32r/kernel/smpboot.c
@@ -498,8 +498,6 @@ static void __init smp_online(void)
{
int cpu_id = smp_processor_id();
- notify_cpu_starting(cpu_id);
-
local_irq_enable();
/* Get our bogomips. */
diff --git a/trunk/arch/mips/kernel/smp.c b/trunk/arch/mips/kernel/smp.c
index 7b59cfb7e602..4410f172b8ab 100644
--- a/trunk/arch/mips/kernel/smp.c
+++ b/trunk/arch/mips/kernel/smp.c
@@ -121,8 +121,6 @@ asmlinkage __cpuinit void start_secondary(void)
cpu = smp_processor_id();
cpu_data[cpu].udelay_val = loops_per_jiffy;
- notify_cpu_starting(cpu);
-
mp_ops->smp_finish();
set_cpu_sibling_map(cpu);
diff --git a/trunk/arch/powerpc/kernel/smp.c b/trunk/arch/powerpc/kernel/smp.c
index c27b10a1bd79..5337ca7bb649 100644
--- a/trunk/arch/powerpc/kernel/smp.c
+++ b/trunk/arch/powerpc/kernel/smp.c
@@ -453,7 +453,6 @@ int __devinit start_secondary(void *unused)
secondary_cpu_time_init();
ipi_call_lock();
- notify_cpu_starting(cpu);
cpu_set(cpu, cpu_online_map);
/* Update sibling maps */
base = cpu_first_thread_in_core(cpu);
diff --git a/trunk/arch/s390/Kconfig b/trunk/arch/s390/Kconfig
index 4c03049e7db9..8d41908e2513 100644
--- a/trunk/arch/s390/Kconfig
+++ b/trunk/arch/s390/Kconfig
@@ -74,7 +74,6 @@ config S390
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_KVM if 64BIT
- select HAVE_ARCH_TRACEHOOK
source "init/Kconfig"
diff --git a/trunk/arch/s390/include/asm/dasd.h b/trunk/arch/s390/include/asm/dasd.h
index 55b2b80cdf6e..3f002e13d024 100644
--- a/trunk/arch/s390/include/asm/dasd.h
+++ b/trunk/arch/s390/include/asm/dasd.h
@@ -3,8 +3,6 @@
* Author(s)......: Holger Smolinski
* Bugreports.to..:
* (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000
- * EMC Symmetrix ioctl Copyright EMC Corporation, 2008
- * Author.........: Nigel Hislop
*
* This file is the interface of the DASD device driver, which is exported to user space
* any future changes wrt the API will result in a change of the APIVERSION reported
@@ -204,16 +202,6 @@ typedef struct attrib_data_t {
#define DASD_SEQ_PRESTAGE 0x4
#define DASD_REC_ACCESS 0x5
-/*
- * Perform EMC Symmetrix I/O
- */
-typedef struct dasd_symmio_parms {
- unsigned char reserved[8]; /* compat with older releases */
- unsigned long long psf_data; /* char * cast to u64 */
- unsigned long long rssd_result; /* char * cast to u64 */
- int psf_data_len;
- int rssd_result_len;
-} __attribute__ ((packed)) dasd_symmio_parms_t;
/********************************************************************************
* SECTION: Definition of IOCTLs
@@ -259,7 +247,6 @@ typedef struct dasd_symmio_parms {
/* Set Attributes (cache operations) */
#define BIODASDSATTR _IOW(DASD_IOCTL_LETTER,2,attrib_data_t)
-#define BIODASDSYMMIO _IOWR(DASD_IOCTL_LETTER, 240, dasd_symmio_parms_t)
#endif /* DASD_H */
diff --git a/trunk/arch/s390/include/asm/delay.h b/trunk/arch/s390/include/asm/delay.h
index a356c958e260..78357314c450 100644
--- a/trunk/arch/s390/include/asm/delay.h
+++ b/trunk/arch/s390/include/asm/delay.h
@@ -15,7 +15,6 @@
#define _S390_DELAY_H
extern void __udelay(unsigned long usecs);
-extern void udelay_simple(unsigned long usecs);
extern void __delay(unsigned long loops);
#define udelay(n) __udelay(n)
diff --git a/trunk/arch/s390/include/asm/pgtable.h b/trunk/arch/s390/include/asm/pgtable.h
index 1a928f84afd6..0bdb704ae051 100644
--- a/trunk/arch/s390/include/asm/pgtable.h
+++ b/trunk/arch/s390/include/asm/pgtable.h
@@ -281,9 +281,6 @@ extern char empty_zero_page[PAGE_SIZE];
#define RCP_GR_BIT 50
#define RCP_GC_BIT 49
-/* User dirty bit for KVM's migration feature */
-#define KVM_UD_BIT 47
-
#ifndef __s390x__
/* Bits in the segment table address-space-control-element */
@@ -578,16 +575,12 @@ static inline void ptep_rcp_copy(pte_t *ptep)
unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
skey = page_get_storage_key(page_to_phys(page));
- if (skey & _PAGE_CHANGED) {
+ if (skey & _PAGE_CHANGED)
set_bit_simple(RCP_GC_BIT, pgste);
- set_bit_simple(KVM_UD_BIT, pgste);
- }
if (skey & _PAGE_REFERENCED)
set_bit_simple(RCP_GR_BIT, pgste);
- if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) {
+ if (test_and_clear_bit_simple(RCP_HC_BIT, pgste))
SetPageDirty(page);
- set_bit_simple(KVM_UD_BIT, pgste);
- }
if (test_and_clear_bit_simple(RCP_HR_BIT, pgste))
SetPageReferenced(page);
#endif
@@ -751,40 +744,6 @@ static inline pte_t pte_mkspecial(pte_t pte)
return pte;
}
-#ifdef CONFIG_PGSTE
-/*
- * Get (and clear) the user dirty bit for a PTE.
- */
-static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm,
- pte_t *ptep)
-{
- int dirty;
- unsigned long *pgste;
- struct page *page;
- unsigned int skey;
-
- if (!mm->context.pgstes)
- return -EINVAL;
- rcp_lock(ptep);
- pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
- page = virt_to_page(pte_val(*ptep));
- skey = page_get_storage_key(page_to_phys(page));
- if (skey & _PAGE_CHANGED) {
- set_bit_simple(RCP_GC_BIT, pgste);
- set_bit_simple(KVM_UD_BIT, pgste);
- }
- if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) {
- SetPageDirty(page);
- set_bit_simple(KVM_UD_BIT, pgste);
- }
- dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste);
- if (skey & _PAGE_CHANGED)
- page_clear_dirty(page);
- rcp_unlock(ptep);
- return dirty;
-}
-#endif
-
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
diff --git a/trunk/arch/s390/include/asm/ptrace.h b/trunk/arch/s390/include/asm/ptrace.h
index a7226f8143fb..af2c9ac28a07 100644
--- a/trunk/arch/s390/include/asm/ptrace.h
+++ b/trunk/arch/s390/include/asm/ptrace.h
@@ -490,7 +490,6 @@ extern void user_disable_single_step(struct task_struct *);
#define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
-#define user_stack_pointer(regs)((regs)->gprs[15])
#define regs_return_value(regs)((regs)->gprs[2])
#define profile_pc(regs) instruction_pointer(regs)
extern void show_regs(struct pt_regs * regs);
diff --git a/trunk/arch/s390/include/asm/qdio.h b/trunk/arch/s390/include/asm/qdio.h
index 4734c3f05354..6813772171f2 100644
--- a/trunk/arch/s390/include/asm/qdio.h
+++ b/trunk/arch/s390/include/asm/qdio.h
@@ -299,13 +299,7 @@ struct qdio_ssqd_desc {
u8 mbccnt;
u16 qdioac2;
u64 sch_token;
- u8 mro;
- u8 mri;
- u8:8;
- u8 sbalic;
- u16:16;
- u8:8;
- u8 mmwc;
+ u64:64;
} __attribute__ ((packed));
/* params are: ccw_device, qdio_error, queue_number,
diff --git a/trunk/arch/s390/include/asm/syscall.h b/trunk/arch/s390/include/asm/syscall.h
deleted file mode 100644
index 6e623971fbb9..000000000000
--- a/trunk/arch/s390/include/asm/syscall.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Access to user system call parameters and results
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- */
-
-#ifndef _ASM_SYSCALL_H
-#define _ASM_SYSCALL_H 1
-
-#include
-
-static inline long syscall_get_nr(struct task_struct *task,
- struct pt_regs *regs)
-{
- if (regs->trap != __LC_SVC_OLD_PSW)
- return -1;
- return regs->gprs[2];
-}
-
-static inline void syscall_rollback(struct task_struct *task,
- struct pt_regs *regs)
-{
- regs->gprs[2] = regs->orig_gpr2;
-}
-
-static inline long syscall_get_error(struct task_struct *task,
- struct pt_regs *regs)
-{
- return (regs->gprs[2] >= -4096UL) ? -regs->gprs[2] : 0;
-}
-
-static inline long syscall_get_return_value(struct task_struct *task,
- struct pt_regs *regs)
-{
- return regs->gprs[2];
-}
-
-static inline void syscall_set_return_value(struct task_struct *task,
- struct pt_regs *regs,
- int error, long val)
-{
- regs->gprs[2] = error ? -error : val;
-}
-
-static inline void syscall_get_arguments(struct task_struct *task,
- struct pt_regs *regs,
- unsigned int i, unsigned int n,
- unsigned long *args)
-{
- BUG_ON(i + n > 6);
-#ifdef CONFIG_COMPAT
- if (test_tsk_thread_flag(task, TIF_31BIT)) {
- if (i + n == 6)
- args[--n] = (u32) regs->args[0];
- while (n-- > 0)
- args[n] = (u32) regs->gprs[2 + i + n];
- }
-#endif
- if (i + n == 6)
- args[--n] = regs->args[0];
- memcpy(args, ®s->gprs[2 + i], n * sizeof(args[0]));
-}
-
-static inline void syscall_set_arguments(struct task_struct *task,
- struct pt_regs *regs,
- unsigned int i, unsigned int n,
- const unsigned long *args)
-{
- BUG_ON(i + n > 6);
- if (i + n == 6)
- regs->args[0] = args[--n];
- memcpy(®s->gprs[2 + i], args, n * sizeof(args[0]));
-}
-
-#endif /* _ASM_SYSCALL_H */
diff --git a/trunk/arch/s390/include/asm/thread_info.h b/trunk/arch/s390/include/asm/thread_info.h
index ea40a9d690fc..91a8f93ad355 100644
--- a/trunk/arch/s390/include/asm/thread_info.h
+++ b/trunk/arch/s390/include/asm/thread_info.h
@@ -86,7 +86,6 @@ static inline struct thread_info *current_thread_info(void)
* thread information flags bit numbers
*/
#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
-#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_RESTART_SVC 4 /* restart svc with new svc number */
@@ -101,7 +100,6 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_RESTORE_SIGMASK 20 /* restore signal mask in do_signal() */
#define _TIF_SYSCALL_TRACE (1<state = TASK_INTERRUPTIBLE;
+ schedule();
+ return -ERESTARTNOHAND;
+}
+
asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf,
size_t count, u32 poshi, u32 poslo)
{
diff --git a/trunk/arch/s390/kernel/compat_linux.h b/trunk/arch/s390/kernel/compat_linux.h
index 05f8516366ab..20723a062017 100644
--- a/trunk/arch/s390/kernel/compat_linux.h
+++ b/trunk/arch/s390/kernel/compat_linux.h
@@ -206,6 +206,7 @@ long sys32_gettimeofday(struct compat_timeval __user *tv,
struct timezone __user *tz);
long sys32_settimeofday(struct compat_timeval __user *tv,
struct timezone __user *tz);
+long sys32_pause(void);
long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count,
u32 poshi, u32 poslo);
long sys32_pwrite64(unsigned int fd, const char __user *ubuf,
diff --git a/trunk/arch/s390/kernel/compat_wrapper.S b/trunk/arch/s390/kernel/compat_wrapper.S
index ee51ca9e23b5..328a20e880b5 100644
--- a/trunk/arch/s390/kernel/compat_wrapper.S
+++ b/trunk/arch/s390/kernel/compat_wrapper.S
@@ -128,6 +128,8 @@ sys32_alarm_wrapper:
llgfr %r2,%r2 # unsigned int
jg sys_alarm # branch to system call
+#sys32_pause_wrapper # void
+
.globl compat_sys_utime_wrapper
compat_sys_utime_wrapper:
llgtr %r2,%r2 # char *
diff --git a/trunk/arch/s390/kernel/entry.S b/trunk/arch/s390/kernel/entry.S
index ed500ef799b7..708cf9cf9a35 100644
--- a/trunk/arch/s390/kernel/entry.S
+++ b/trunk/arch/s390/kernel/entry.S
@@ -49,9 +49,9 @@ SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC
SP_TRAP = STACK_FRAME_OVERHEAD + __PT_TRAP
SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE
-_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
+_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP )
-_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
+_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_MCCK_PENDING)
STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
@@ -318,8 +318,6 @@ sysc_work:
bo BASED(sysc_reschedule)
tm __TI_flags+3(%r9),_TIF_SIGPENDING
bnz BASED(sysc_sigpending)
- tm __TI_flags+3(%r9),_TIF_NOTIFY_RESUME
- bnz BASED(sysc_notify_resume)
tm __TI_flags+3(%r9),_TIF_RESTART_SVC
bo BASED(sysc_restart)
tm __TI_flags+3(%r9),_TIF_SINGLE_STEP
@@ -357,16 +355,6 @@ sysc_sigpending:
bo BASED(sysc_singlestep)
b BASED(sysc_work_loop)
-#
-# _TIF_NOTIFY_RESUME is set, call do_notify_resume
-#
-sysc_notify_resume:
- la %r2,SP_PTREGS(%r15) # load pt_regs
- l %r1,BASED(.Ldo_notify_resume)
- la %r14,BASED(sysc_work_loop)
- br %r1 # call do_notify_resume
-
-
#
# _TIF_RESTART_SVC is set, set up registers and restart svc
#
@@ -390,21 +378,20 @@ sysc_singlestep:
br %r1 # branch to do_single_step
#
-# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
-# and after the system call
+# call trace before and after sys_call
#
sysc_tracesys:
- l %r1,BASED(.Ltrace_entry)
+ l %r1,BASED(.Ltrace)
la %r2,SP_PTREGS(%r15) # load pt_regs
la %r3,0
srl %r7,2
st %r7,SP_R2(%r15)
basr %r14,%r1
- cl %r2,BASED(.Lnr_syscalls)
+ clc SP_R2(4,%r15),BASED(.Lnr_syscalls)
bnl BASED(sysc_tracenogo)
l %r8,BASED(.Lsysc_table)
- lr %r7,%r2
- sll %r7,2 # *4
+ l %r7,SP_R2(%r15) # strace might have changed the
+ sll %r7,2 # system call
l %r8,0(%r7,%r8)
sysc_tracego:
lm %r3,%r6,SP_R3(%r15)
@@ -414,8 +401,9 @@ sysc_tracego:
sysc_tracenogo:
tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
bz BASED(sysc_return)
- l %r1,BASED(.Ltrace_exit)
+ l %r1,BASED(.Ltrace)
la %r2,SP_PTREGS(%r15) # load pt_regs
+ la %r3,1
la %r14,BASED(sysc_return)
br %r1
@@ -678,8 +666,6 @@ io_work_loop:
bo BASED(io_reschedule)
tm __TI_flags+3(%r9),_TIF_SIGPENDING
bnz BASED(io_sigpending)
- tm __TI_flags+3(%r9),_TIF_NOTIFY_RESUME
- bnz BASED(io_notify_resume)
b BASED(io_restore)
io_work_done:
@@ -718,19 +704,6 @@ io_sigpending:
TRACE_IRQS_OFF
b BASED(io_work_loop)
-#
-# _TIF_SIGPENDING is set, call do_signal
-#
-io_notify_resume:
- TRACE_IRQS_ON
- stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
- la %r2,SP_PTREGS(%r15) # load pt_regs
- l %r1,BASED(.Ldo_notify_resume)
- basr %r14,%r1 # call do_signal
- stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
- b BASED(io_work_loop)
-
/*
* External interrupt handler routine
*/
@@ -1097,8 +1070,6 @@ cleanup_io_leave_insn:
.Ldo_IRQ: .long do_IRQ
.Ldo_extint: .long do_extint
.Ldo_signal: .long do_signal
-.Ldo_notify_resume:
- .long do_notify_resume
.Lhandle_per: .long do_single_step
.Ldo_execve: .long do_execve
.Lexecve_tail: .long execve_tail
@@ -1108,8 +1079,7 @@ cleanup_io_leave_insn:
.Lpreempt_schedule_irq:
.long preempt_schedule_irq
#endif
-.Ltrace_entry: .long do_syscall_trace_enter
-.Ltrace_exit: .long do_syscall_trace_exit
+.Ltrace: .long syscall_trace
.Lschedtail: .long schedule_tail
.Lsysc_table: .long sys_call_table
#ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/trunk/arch/s390/kernel/entry64.S b/trunk/arch/s390/kernel/entry64.S
index d7ce150453f2..fee10177dbfc 100644
--- a/trunk/arch/s390/kernel/entry64.S
+++ b/trunk/arch/s390/kernel/entry64.S
@@ -52,9 +52,9 @@ SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE
STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
STACK_SIZE = 1 << STACK_SHIFT
-_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
+_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP )
-_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
+_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_MCCK_PENDING)
#define BASED(name) name-system_call(%r13)
@@ -310,8 +310,6 @@ sysc_work:
jo sysc_reschedule
tm __TI_flags+7(%r9),_TIF_SIGPENDING
jnz sysc_sigpending
- tm __TI_flags+7(%r9),_TIF_NOTIFY_RESUME
- jnz sysc_notify_resume
tm __TI_flags+7(%r9),_TIF_RESTART_SVC
jo sysc_restart
tm __TI_flags+7(%r9),_TIF_SINGLE_STEP
@@ -346,14 +344,6 @@ sysc_sigpending:
jo sysc_singlestep
j sysc_work_loop
-#
-# _TIF_NOTIFY_RESUME is set, call do_notify_resume
-#
-sysc_notify_resume:
- la %r2,SP_PTREGS(%r15) # load pt_regs
- larl %r14,sysc_work_loop
- jg do_notify_resume # call do_notify_resume
-
#
# _TIF_RESTART_SVC is set, set up registers and restart svc
#
@@ -377,19 +367,20 @@ sysc_singlestep:
jg do_single_step # branch to do_sigtrap
#
-# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
-# and after the system call
+# call syscall_trace before and after system call
+# special linkage: %r12 contains the return address for trace_svc
#
sysc_tracesys:
la %r2,SP_PTREGS(%r15) # load pt_regs
la %r3,0
srl %r7,2
stg %r7,SP_R2(%r15)
- brasl %r14,do_syscall_trace_enter
+ brasl %r14,syscall_trace
lghi %r0,NR_syscalls
- clgr %r0,%r2
+ clg %r0,SP_R2(%r15)
jnh sysc_tracenogo
- slag %r7,%r2,2 # *4
+ lg %r7,SP_R2(%r15) # strace might have changed the
+ sll %r7,2 # system call
lgf %r8,0(%r7,%r10)
sysc_tracego:
lmg %r3,%r6,SP_R3(%r15)
@@ -400,8 +391,9 @@ sysc_tracenogo:
tm __TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
jz sysc_return
la %r2,SP_PTREGS(%r15) # load pt_regs
+ la %r3,1
larl %r14,sysc_return # return point is sysc_return
- jg do_syscall_trace_exit
+ jg syscall_trace
#
# a new process exits the kernel with ret_from_fork
@@ -680,8 +672,6 @@ io_work_loop:
jo io_reschedule
tm __TI_flags+7(%r9),_TIF_SIGPENDING
jnz io_sigpending
- tm __TI_flags+7(%r9),_TIF_NOTIFY_RESUME
- jnz io_notify_resume
j io_restore
io_work_done:
@@ -722,18 +712,6 @@ io_sigpending:
TRACE_IRQS_OFF
j io_work_loop
-#
-# _TIF_NOTIFY_RESUME or is set, call do_notify_resume
-#
-io_notify_resume:
- TRACE_IRQS_ON
- stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
- la %r2,SP_PTREGS(%r15) # load pt_regs
- brasl %r14,do_notify_resume # call do_notify_resume
- stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
- j io_work_loop
-
/*
* External interrupt handler routine
*/
diff --git a/trunk/arch/s390/kernel/ptrace.c b/trunk/arch/s390/kernel/ptrace.c
index 1f31be1ecc4b..c8b08289eb87 100644
--- a/trunk/arch/s390/kernel/ptrace.c
+++ b/trunk/arch/s390/kernel/ptrace.c
@@ -35,7 +35,6 @@
#include
#include
#include
-#include
#include
#include
@@ -640,44 +639,40 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
}
#endif
-asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
+asmlinkage void
+syscall_trace(struct pt_regs *regs, int entryexit)
{
- long ret;
+ if (unlikely(current->audit_context) && entryexit)
+ audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]);
+
+ if (!test_thread_flag(TIF_SYSCALL_TRACE))
+ goto out;
+ if (!(current->ptrace & PT_PTRACED))
+ goto out;
+ ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+ ? 0x80 : 0));
/*
- * The sysc_tracesys code in entry.S stored the system
- * call number to gprs[2].
+ * If the debuffer has set an invalid system call number,
+ * we prepare to skip the system call restart handling.
*/
- ret = regs->gprs[2];
- if (test_thread_flag(TIF_SYSCALL_TRACE) &&
- (tracehook_report_syscall_entry(regs) ||
- regs->gprs[2] >= NR_syscalls)) {
- /*
- * Tracing decided this syscall should not happen or the
- * debugger stored an invalid system call number. Skip
- * the system call and the system call restart handling.
- */
+ if (!entryexit && regs->gprs[2] >= NR_syscalls)
regs->trap = -1;
- ret = -1;
- }
-
- if (unlikely(current->audit_context))
- audit_syscall_entry(test_thread_flag(TIF_31BIT) ?
- AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
- regs->gprs[2], regs->orig_gpr2,
- regs->gprs[3], regs->gprs[4],
- regs->gprs[5]);
- return ret;
-}
-asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
-{
- if (unlikely(current->audit_context))
- audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]),
- regs->gprs[2]);
-
- if (test_thread_flag(TIF_SYSCALL_TRACE))
- tracehook_report_syscall_exit(regs, 0);
+ /*
+ * this isn't the same as continuing with a signal, but it will do
+ * for normal use. strace only continues with a signal if the
+ * stopping signal is not SIGTRAP. -brl
+ */
+ if (current->exit_code) {
+ send_sig(current->exit_code, current, 1);
+ current->exit_code = 0;
+ }
+ out:
+ if (unlikely(current->audit_context) && !entryexit)
+ audit_syscall_entry(test_thread_flag(TIF_31BIT)?AUDIT_ARCH_S390:AUDIT_ARCH_S390X,
+ regs->gprs[2], regs->orig_gpr2, regs->gprs[3],
+ regs->gprs[4], regs->gprs[5]);
}
/*
diff --git a/trunk/arch/s390/kernel/signal.c b/trunk/arch/s390/kernel/signal.c
index 4f7fc3059a8e..b97682040215 100644
--- a/trunk/arch/s390/kernel/signal.c
+++ b/trunk/arch/s390/kernel/signal.c
@@ -24,7 +24,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -508,12 +507,6 @@ void do_signal(struct pt_regs *regs)
*/
if (current->thread.per_info.single_step)
set_thread_flag(TIF_SINGLE_STEP);
-
- /*
- * Let tracing know that we've done the handler setup.
- */
- tracehook_signal_handler(signr, &info, &ka, regs,
- test_thread_flag(TIF_SINGLE_STEP));
}
return;
}
@@ -533,9 +526,3 @@ void do_signal(struct pt_regs *regs)
set_thread_flag(TIF_RESTART_SVC);
}
}
-
-void do_notify_resume(struct pt_regs *regs)
-{
- clear_thread_flag(TIF_NOTIFY_RESUME);
- tracehook_notify_resume(regs);
-}
diff --git a/trunk/arch/s390/kernel/smp.c b/trunk/arch/s390/kernel/smp.c
index 9e8b1f9b8f4d..00b9b4dec5eb 100644
--- a/trunk/arch/s390/kernel/smp.c
+++ b/trunk/arch/s390/kernel/smp.c
@@ -585,8 +585,6 @@ int __cpuinit start_secondary(void *cpuvoid)
/* Enable pfault pseudo page faults on this cpu. */
pfault_init();
- /* call cpu notifiers */
- notify_cpu_starting(smp_processor_id());
/* Mark this cpu as online */
spin_lock(&call_lock);
cpu_set(smp_processor_id(), cpu_online_map);
diff --git a/trunk/arch/s390/kernel/syscalls.S b/trunk/arch/s390/kernel/syscalls.S
index 3ae303914b42..c66d35e55142 100644
--- a/trunk/arch/s390/kernel/syscalls.S
+++ b/trunk/arch/s390/kernel/syscalls.S
@@ -37,7 +37,7 @@ SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper) /* 25 old stime syscall *
SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper)
SYSCALL(sys_alarm,sys_alarm,sys32_alarm_wrapper)
NI_SYSCALL /* old fstat syscall */
-SYSCALL(sys_pause,sys_pause,sys_pause)
+SYSCALL(sys_pause,sys_pause,sys32_pause)
SYSCALL(sys_utime,sys_utime,compat_sys_utime_wrapper) /* 30 */
NI_SYSCALL /* old stty syscall */
NI_SYSCALL /* old gtty syscall */
diff --git a/trunk/arch/s390/kernel/time.c b/trunk/arch/s390/kernel/time.c
index b94e9e3b694a..06acb1a18bbc 100644
--- a/trunk/arch/s390/kernel/time.c
+++ b/trunk/arch/s390/kernel/time.c
@@ -1356,7 +1356,7 @@ static void __init stp_reset(void)
stp_page = alloc_bootmem_pages(PAGE_SIZE);
rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
- if (rc == 0)
+ if (rc == 1)
set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
else if (stp_online) {
printk(KERN_WARNING "Running on non STP capable machine.\n");
diff --git a/trunk/arch/s390/lib/delay.c b/trunk/arch/s390/lib/delay.c
index 6ccb9fab055a..0953cee05efc 100644
--- a/trunk/arch/s390/lib/delay.c
+++ b/trunk/arch/s390/lib/delay.c
@@ -92,16 +92,3 @@ void __udelay(unsigned long usecs)
local_irq_restore(flags);
preempt_enable();
}
-
-/*
- * Simple udelay variant. To be used on startup and reboot
- * when the interrupt handler isn't working.
- */
-void udelay_simple(unsigned long usecs)
-{
- u64 end;
-
- end = get_clock() + ((u64) usecs << 12);
- while (get_clock() < end)
- cpu_relax();
-}
diff --git a/trunk/arch/s390/mm/extmem.c b/trunk/arch/s390/mm/extmem.c
index 580fc64cc735..f231f5ec74b6 100644
--- a/trunk/arch/s390/mm/extmem.c
+++ b/trunk/arch/s390/mm/extmem.c
@@ -43,40 +43,20 @@
#define DCSS_FINDSEG 0x0c
#define DCSS_LOADNOLY 0x10
#define DCSS_SEGEXT 0x18
-#define DCSS_LOADSHRX 0x20
-#define DCSS_LOADNSRX 0x24
-#define DCSS_FINDSEGX 0x2c
-#define DCSS_SEGEXTX 0x38
#define DCSS_FINDSEGA 0x0c
struct qrange {
- unsigned long start; /* last byte type */
- unsigned long end; /* last byte reserved */
+ unsigned int start; // 3byte start address, 1 byte type
+ unsigned int end; // 3byte end address, 1 byte reserved
};
struct qout64 {
- unsigned long segstart;
- unsigned long segend;
- int segcnt;
- int segrcnt;
- struct qrange range[6];
-};
-
-#ifdef CONFIG_64BIT
-struct qrange_old {
- unsigned int start; /* last byte type */
- unsigned int end; /* last byte reserved */
-};
-
-/* output area format for the Diag x'64' old subcode x'18' */
-struct qout64_old {
int segstart;
int segend;
int segcnt;
int segrcnt;
- struct qrange_old range[6];
+ struct qrange range[6];
};
-#endif
struct qin64 {
char qopcode;
@@ -106,55 +86,6 @@ static DEFINE_MUTEX(dcss_lock);
static LIST_HEAD(dcss_list);
static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC",
"EW/EN-MIXED" };
-static int loadshr_scode, loadnsr_scode, findseg_scode;
-static int segext_scode, purgeseg_scode;
-static int scode_set;
-
-/* set correct Diag x'64' subcodes. */
-static int
-dcss_set_subcodes(void)
-{
-#ifdef CONFIG_64BIT
- char *name = kmalloc(8 * sizeof(char), GFP_DMA);
- unsigned long rx, ry;
- int rc;
-
- if (name == NULL)
- return -ENOMEM;
-
- rx = (unsigned long) name;
- ry = DCSS_FINDSEGX;
-
- strcpy(name, "dummy");
- asm volatile(
- " diag %0,%1,0x64\n"
- "0: ipm %2\n"
- " srl %2,28\n"
- " j 2f\n"
- "1: la %2,3\n"
- "2:\n"
- EX_TABLE(0b, 1b)
- : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
-
- kfree(name);
- /* Diag x'64' new subcodes are supported, set to new subcodes */
- if (rc != 3) {
- loadshr_scode = DCSS_LOADSHRX;
- loadnsr_scode = DCSS_LOADNSRX;
- purgeseg_scode = DCSS_PURGESEG;
- findseg_scode = DCSS_FINDSEGX;
- segext_scode = DCSS_SEGEXTX;
- return 0;
- }
-#endif
- /* Diag x'64' new subcodes are not supported, set to old subcodes */
- loadshr_scode = DCSS_LOADNOLY;
- loadnsr_scode = DCSS_LOADNSR;
- purgeseg_scode = DCSS_PURGESEG;
- findseg_scode = DCSS_FINDSEG;
- segext_scode = DCSS_SEGEXT;
- return 0;
-}
/*
* Create the 8 bytes, ebcdic VM segment name from
@@ -204,45 +135,25 @@ segment_by_name (char *name)
* Perform a function on a dcss segment.
*/
static inline int
-dcss_diag(int *func, void *parameter,
+dcss_diag (__u8 func, void *parameter,
unsigned long *ret1, unsigned long *ret2)
{
unsigned long rx, ry;
int rc;
- if (scode_set == 0) {
- rc = dcss_set_subcodes();
- if (rc < 0)
- return rc;
- scode_set = 1;
- }
rx = (unsigned long) parameter;
- ry = (unsigned long) *func;
-
+ ry = (unsigned long) func;
+ asm volatile(
#ifdef CONFIG_64BIT
- /* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */
- if (*func > DCSS_SEGEXT)
- asm volatile(
- " diag %0,%1,0x64\n"
- " ipm %2\n"
- " srl %2,28\n"
- : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
- /* 31-bit Diag x'64' old subcode, switch to 31-bit addressing mode */
- else
- asm volatile(
- " sam31\n"
- " diag %0,%1,0x64\n"
- " sam64\n"
- " ipm %2\n"
- " srl %2,28\n"
- : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
+ " sam31\n"
+ " diag %0,%1,0x64\n"
+ " sam64\n"
#else
- asm volatile(
" diag %0,%1,0x64\n"
+#endif
" ipm %2\n"
" srl %2,28\n"
: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
-#endif
*ret1 = rx;
*ret2 = ry;
return rc;
@@ -279,45 +190,14 @@ query_segment_type (struct dcss_segment *seg)
qin->qoutlen = sizeof(struct qout64);
memcpy (qin->qname, seg->dcss_name, 8);
- diag_cc = dcss_diag(&segext_scode, qin, &dummy, &vmrc);
+ diag_cc = dcss_diag (DCSS_SEGEXT, qin, &dummy, &vmrc);
- if (diag_cc < 0) {
- rc = diag_cc;
- goto out_free;
- }
if (diag_cc > 1) {
PRINT_WARN ("segment_type: diag returned error %ld\n", vmrc);
rc = dcss_diag_translate_rc (vmrc);
goto out_free;
}
-#ifdef CONFIG_64BIT
- /* Only old format of output area of Diagnose x'64' is supported,
- copy data for the new format. */
- if (segext_scode == DCSS_SEGEXT) {
- struct qout64_old *qout_old;
- qout_old = kzalloc(sizeof(struct qout64_old), GFP_DMA);
- if (qout_old == NULL) {
- rc = -ENOMEM;
- goto out_free;
- }
- memcpy(qout_old, qout, sizeof(struct qout64_old));
- qout->segstart = (unsigned long) qout_old->segstart;
- qout->segend = (unsigned long) qout_old->segend;
- qout->segcnt = qout_old->segcnt;
- qout->segrcnt = qout_old->segrcnt;
-
- if (qout->segcnt > 6)
- qout->segrcnt = 6;
- for (i = 0; i < qout->segrcnt; i++) {
- qout->range[i].start =
- (unsigned long) qout_old->range[i].start;
- qout->range[i].end =
- (unsigned long) qout_old->range[i].end;
- }
- kfree(qout_old);
- }
-#endif
if (qout->segcnt > 6) {
rc = -ENOTSUPP;
goto out_free;
@@ -388,30 +268,6 @@ segment_type (char* name)
return seg.vm_segtype;
}
-/*
- * check if segment collides with other segments that are currently loaded
- * returns 1 if this is the case, 0 if no collision was found
- */
-static int
-segment_overlaps_others (struct dcss_segment *seg)
-{
- struct list_head *l;
- struct dcss_segment *tmp;
-
- BUG_ON(!mutex_is_locked(&dcss_lock));
- list_for_each(l, &dcss_list) {
- tmp = list_entry(l, struct dcss_segment, list);
- if ((tmp->start_addr >> 20) > (seg->end >> 20))
- continue;
- if ((tmp->end >> 20) < (seg->start_addr >> 20))
- continue;
- if (seg == tmp)
- continue;
- return 1;
- }
- return 0;
-}
-
/*
* real segment loading function, called from segment_load
*/
@@ -420,8 +276,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
{
struct dcss_segment *seg = kmalloc(sizeof(struct dcss_segment),
GFP_DMA);
- int rc, diag_cc;
- unsigned long start_addr, end_addr, dummy;
+ int dcss_command, rc, diag_cc;
if (seg == NULL) {
rc = -ENOMEM;
@@ -432,13 +287,6 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
if (rc < 0)
goto out_free;
- if (loadshr_scode == DCSS_LOADSHRX) {
- if (segment_overlaps_others(seg)) {
- rc = -EBUSY;
- goto out_free;
- }
- }
-
rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
if (rc)
@@ -468,28 +316,20 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
}
if (do_nonshared)
- diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
- &start_addr, &end_addr);
+ dcss_command = DCSS_LOADNSR;
else
- diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name,
- &start_addr, &end_addr);
- if (diag_cc < 0) {
- dcss_diag(&purgeseg_scode, seg->dcss_name,
- &dummy, &dummy);
- rc = diag_cc;
- goto out_resource;
- }
+ dcss_command = DCSS_LOADNOLY;
+
+ diag_cc = dcss_diag(dcss_command, seg->dcss_name,
+ &seg->start_addr, &seg->end);
if (diag_cc > 1) {
PRINT_WARN ("segment_load: could not load segment %s - "
- "diag returned error (%ld)\n",
- name, end_addr);
- rc = dcss_diag_translate_rc(end_addr);
- dcss_diag(&purgeseg_scode, seg->dcss_name,
- &dummy, &dummy);
+ "diag returned error (%ld)\n",name,seg->end);
+ rc = dcss_diag_translate_rc (seg->end);
+ dcss_diag(DCSS_PURGESEG, seg->dcss_name,
+ &seg->start_addr, &seg->end);
goto out_resource;
}
- seg->start_addr = start_addr;
- seg->end = end_addr;
seg->do_nonshared = do_nonshared;
atomic_set(&seg->ref_count, 1);
list_add(&seg->list, &dcss_list);
@@ -583,8 +423,8 @@ int
segment_modify_shared (char *name, int do_nonshared)
{
struct dcss_segment *seg;
- unsigned long start_addr, end_addr, dummy;
- int rc, diag_cc;
+ unsigned long dummy;
+ int dcss_command, rc, diag_cc;
mutex_lock(&dcss_lock);
seg = segment_by_name (name);
@@ -605,51 +445,38 @@ segment_modify_shared (char *name, int do_nonshared)
goto out_unlock;
}
release_resource(seg->res);
- if (do_nonshared)
+ if (do_nonshared) {
+ dcss_command = DCSS_LOADNSR;
seg->res->flags &= ~IORESOURCE_READONLY;
- else
+ } else {
+ dcss_command = DCSS_LOADNOLY;
if (seg->vm_segtype == SEG_TYPE_SR ||
seg->vm_segtype == SEG_TYPE_ER)
seg->res->flags |= IORESOURCE_READONLY;
-
+ }
if (request_resource(&iomem_resource, seg->res)) {
PRINT_WARN("segment_modify_shared: could not reload segment %s"
" - overlapping resources\n", name);
rc = -EBUSY;
kfree(seg->res);
- goto out_del_mem;
- }
-
- dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
- if (do_nonshared)
- diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
- &start_addr, &end_addr);
- else
- diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name,
- &start_addr, &end_addr);
- if (diag_cc < 0) {
- rc = diag_cc;
- goto out_del_res;
+ goto out_del;
}
+ dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
+ diag_cc = dcss_diag(dcss_command, seg->dcss_name,
+ &seg->start_addr, &seg->end);
if (diag_cc > 1) {
PRINT_WARN ("segment_modify_shared: could not reload segment %s"
- " - diag returned error (%ld)\n",
- name, end_addr);
- rc = dcss_diag_translate_rc(end_addr);
- goto out_del_res;
+ " - diag returned error (%ld)\n",name,seg->end);
+ rc = dcss_diag_translate_rc (seg->end);
+ goto out_del;
}
- seg->start_addr = start_addr;
- seg->end = end_addr;
seg->do_nonshared = do_nonshared;
rc = 0;
goto out_unlock;
- out_del_res:
- release_resource(seg->res);
- kfree(seg->res);
- out_del_mem:
+ out_del:
vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
list_del(&seg->list);
- dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+ dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
kfree(seg);
out_unlock:
mutex_unlock(&dcss_lock);
@@ -683,7 +510,7 @@ segment_unload(char *name)
kfree(seg->res);
vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
list_del(&seg->list);
- dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+ dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
kfree(seg);
out_unlock:
mutex_unlock(&dcss_lock);
@@ -718,7 +545,7 @@ segment_save(char *name)
endpfn = (seg->end) >> PAGE_SHIFT;
sprintf(cmd1, "DEFSEG %s", name);
for (i=0; isegcnt; i++) {
- sprintf(cmd1+strlen(cmd1), " %lX-%lX %s",
+ sprintf(cmd1+strlen(cmd1), " %X-%X %s",
seg->range[i].start >> PAGE_SHIFT,
seg->range[i].end >> PAGE_SHIFT,
segtype_string[seg->range[i].start & 0xff]);
diff --git a/trunk/arch/sh/kernel/smp.c b/trunk/arch/sh/kernel/smp.c
index 001778f9adaf..60c50841143e 100644
--- a/trunk/arch/sh/kernel/smp.c
+++ b/trunk/arch/sh/kernel/smp.c
@@ -82,8 +82,6 @@ asmlinkage void __cpuinit start_secondary(void)
preempt_disable();
- notify_cpu_starting(smp_processor_id());
-
local_irq_enable();
calibrate_delay();
diff --git a/trunk/arch/sparc/kernel/sun4d_smp.c b/trunk/arch/sparc/kernel/sun4d_smp.c
index 446767e8f569..69596402a500 100644
--- a/trunk/arch/sparc/kernel/sun4d_smp.c
+++ b/trunk/arch/sparc/kernel/sun4d_smp.c
@@ -88,7 +88,6 @@ void __init smp4d_callin(void)
local_flush_cache_all();
local_flush_tlb_all();
- notify_cpu_starting(cpuid);
/*
* Unblock the master CPU _only_ when the scheduler state
* of all secondary CPUs will be up-to-date, so after
diff --git a/trunk/arch/sparc/kernel/sun4m_smp.c b/trunk/arch/sparc/kernel/sun4m_smp.c
index 9964890dc1db..a14a76ac7f36 100644
--- a/trunk/arch/sparc/kernel/sun4m_smp.c
+++ b/trunk/arch/sparc/kernel/sun4m_smp.c
@@ -71,8 +71,6 @@ void __cpuinit smp4m_callin(void)
local_flush_cache_all();
local_flush_tlb_all();
- notify_cpu_starting(cpuid);
-
/* Get our local ticker going. */
smp_setup_percpu_timer();
diff --git a/trunk/arch/um/kernel/smp.c b/trunk/arch/um/kernel/smp.c
index 045772142844..be2d50c3aa95 100644
--- a/trunk/arch/um/kernel/smp.c
+++ b/trunk/arch/um/kernel/smp.c
@@ -85,7 +85,6 @@ static int idle_proc(void *cpup)
while (!cpu_isset(cpu, smp_commenced_mask))
cpu_relax();
- notify_cpu_starting(cpu);
cpu_set(cpu, cpu_online_map);
default_idle();
return 0;
diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig
index 97f0d2b6dc0c..ed92864d1325 100644
--- a/trunk/arch/x86/Kconfig
+++ b/trunk/arch/x86/Kconfig
@@ -29,7 +29,6 @@ config X86
select HAVE_FTRACE
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER
- select HAVE_ARCH_TRACEHOOK
select HAVE_GENERIC_DMA_COHERENT if X86_32
select HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -1021,7 +1020,7 @@ config HAVE_ARCH_ALLOC_REMAP
config ARCH_FLATMEM_ENABLE
def_bool y
- depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA
+ depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC && !NUMA
config ARCH_DISCONTIGMEM_ENABLE
def_bool y
@@ -1037,7 +1036,7 @@ config ARCH_SPARSEMEM_DEFAULT
config ARCH_SPARSEMEM_ENABLE
def_bool y
- depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_GENERICARCH
+ depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC)
select SPARSEMEM_STATIC if X86_32
select SPARSEMEM_VMEMMAP_ENABLE if X86_64
@@ -1118,10 +1117,10 @@ config MTRR
You can safely say Y even if your machine doesn't have MTRRs, you'll
just add about 9 KB to your kernel.
- See for more information.
+ See for more information.
config MTRR_SANITIZER
- def_bool y
+ bool
prompt "MTRR cleanup support"
depends on MTRR
help
@@ -1132,7 +1131,7 @@ config MTRR_SANITIZER
The largest mtrr entry size for a continous block can be set with
mtrr_chunk_size.
- If unsure, say Y.
+ If unsure, say N.
config MTRR_SANITIZER_ENABLE_DEFAULT
int "MTRR cleanup enable value (0-1)"
@@ -1192,6 +1191,7 @@ config IRQBALANCE
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
+ depends on PROC_FS
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
@@ -1199,7 +1199,7 @@ config SECCOMP
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
- enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
+ enabled via /proc//seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
@@ -1356,14 +1356,14 @@ config PHYSICAL_ALIGN
Don't change this unless you know what you are doing.
config HOTPLUG_CPU
- bool "Support for hot-pluggable CPUs"
- depends on SMP && HOTPLUG && !X86_VOYAGER
+ bool "Support for suspend on SMP and hot-pluggable CPUs (EXPERIMENTAL)"
+ depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER
---help---
- Say Y here to allow turning CPUs off and on. CPUs can be
- controlled through /sys/devices/system/cpu.
- ( Note: power management support will enable this option
- automatically on SMP systems. )
- Say N if you want to disable CPU hotplug.
+ Say Y here to experiment with turning CPUs off and on, and to
+ enable suspend on SMP systems. CPUs can be controlled through
+ /sys/devices/system/cpu.
+ Say N if you want to disable CPU hotplug and don't need to
+ suspend.
config COMPAT_VDSO
def_bool y
@@ -1378,51 +1378,6 @@ config COMPAT_VDSO
If unsure, say Y.
-config CMDLINE_BOOL
- bool "Built-in kernel command line"
- default n
- help
- Allow for specifying boot arguments to the kernel at
- build time. On some systems (e.g. embedded ones), it is
- necessary or convenient to provide some or all of the
- kernel boot arguments with the kernel itself (that is,
- to not rely on the boot loader to provide them.)
-
- To compile command line arguments into the kernel,
- set this option to 'Y', then fill in the
- the boot arguments in CONFIG_CMDLINE.
-
- Systems with fully functional boot loaders (i.e. non-embedded)
- should leave this option set to 'N'.
-
-config CMDLINE
- string "Built-in kernel command string"
- depends on CMDLINE_BOOL
- default ""
- help
- Enter arguments here that should be compiled into the kernel
- image and used at boot time. If the boot loader provides a
- command line at boot time, it is appended to this string to
- form the full kernel command line, when the system boots.
-
- However, you can use the CONFIG_CMDLINE_OVERRIDE option to
- change this behavior.
-
- In most cases, the command line (whether built-in or provided
- by the boot loader) should specify the device for the root
- file system.
-
-config CMDLINE_OVERRIDE
- bool "Built-in command line overrides boot loader arguments"
- default n
- depends on CMDLINE_BOOL
- help
- Set this option to 'Y' to have the kernel ignore the boot loader
- command line, and use ONLY the built-in command line.
-
- This is used to work around broken boot loaders. This should
- be set to 'N' under normal conditions.
-
endmenu
config ARCH_ENABLE_MEMORY_HOTPLUG
@@ -1818,7 +1773,7 @@ config COMPAT_FOR_U64_ALIGNMENT
config SYSVIPC_COMPAT
def_bool y
- depends on COMPAT && SYSVIPC
+ depends on X86_64 && COMPAT && SYSVIPC
endmenu
diff --git a/trunk/arch/x86/Kconfig.cpu b/trunk/arch/x86/Kconfig.cpu
index 60a85768cfcb..b225219c448c 100644
--- a/trunk/arch/x86/Kconfig.cpu
+++ b/trunk/arch/x86/Kconfig.cpu
@@ -418,21 +418,3 @@ config X86_MINIMUM_CPU_FAMILY
config X86_DEBUGCTLMSR
def_bool y
depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
-
-config X86_DS
- bool "Debug Store support"
- default y
- help
- Add support for Debug Store.
- This allows the kernel to provide a memory buffer to the hardware
- to store various profiling and tracing events.
-
-config X86_PTRACE_BTS
- bool "ptrace interface to Branch Trace Store"
- default y
- depends on (X86_DS && X86_DEBUGCTLMSR)
- help
- Add a ptrace interface to allow collecting an execution trace
- of the traced task.
- This collects control flow changes in a (cyclic) buffer and allows
- debuggers to fill in the gaps and show an execution trace of the debuggee.
diff --git a/trunk/arch/x86/boot/compressed/head_32.S b/trunk/arch/x86/boot/compressed/head_32.S
index 29c5fbf08392..ba7736cf2ec7 100644
--- a/trunk/arch/x86/boot/compressed/head_32.S
+++ b/trunk/arch/x86/boot/compressed/head_32.S
@@ -137,15 +137,14 @@ relocated:
*/
movl output_len(%ebx), %eax
pushl %eax
- # push arguments for decompress_kernel:
pushl %ebp # output address
movl input_len(%ebx), %eax
pushl %eax # input_len
leal input_data(%ebx), %eax
pushl %eax # input_data
leal boot_heap(%ebx), %eax
- pushl %eax # heap area
- pushl %esi # real mode pointer
+ pushl %eax # heap area as third argument
+ pushl %esi # real mode pointer as second arg
call decompress_kernel
addl $20, %esp
popl %ecx
diff --git a/trunk/arch/x86/boot/compressed/misc.c b/trunk/arch/x86/boot/compressed/misc.c
index 5780d361105b..9fea73706479 100644
--- a/trunk/arch/x86/boot/compressed/misc.c
+++ b/trunk/arch/x86/boot/compressed/misc.c
@@ -16,7 +16,7 @@
*/
#undef CONFIG_PARAVIRT
#ifdef CONFIG_X86_32
-#define ASM_X86__DESC_H 1
+#define _ASM_DESC_H_ 1
#endif
#ifdef CONFIG_X86_64
@@ -27,7 +27,7 @@
#include
#include
#include
-#include
+#include
#include
#include
#include
@@ -251,7 +251,7 @@ static void __putstr(int error, const char *s)
y--;
}
} else {
- vidmem[(x + cols * y) * 2] = c;
+ vidmem [(x + cols * y) * 2] = c;
if (++x >= cols) {
x = 0;
if (++y >= lines) {
@@ -277,8 +277,7 @@ static void *memset(void *s, int c, unsigned n)
int i;
char *ss = s;
- for (i = 0; i < n; i++)
- ss[i] = c;
+ for (i = 0; i < n; i++) ss[i] = c;
return s;
}
@@ -288,8 +287,7 @@ static void *memcpy(void *dest, const void *src, unsigned n)
const char *s = src;
char *d = dest;
- for (i = 0; i < n; i++)
- d[i] = s[i];
+ for (i = 0; i < n; i++) d[i] = s[i];
return dest;
}
diff --git a/trunk/arch/x86/boot/header.S b/trunk/arch/x86/boot/header.S
index b993062e9a5f..af86e431acfa 100644
--- a/trunk/arch/x86/boot/header.S
+++ b/trunk/arch/x86/boot/header.S
@@ -30,6 +30,7 @@ SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */
SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */
/* to be loaded */
ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */
+SWAP_DEV = 0 /* SWAP_DEV is now written by "build" */
#ifndef SVGA_MODE
#define SVGA_MODE ASK_VGA
diff --git a/trunk/arch/x86/configs/i386_defconfig b/trunk/arch/x86/configs/i386_defconfig
index ef9a52005ec9..104275e191a8 100644
--- a/trunk/arch/x86/configs/i386_defconfig
+++ b/trunk/arch/x86/configs/i386_defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.27-rc5
-# Wed Sep 3 17:23:09 2008
+# Linux kernel version: 2.6.27-rc4
+# Mon Aug 25 15:04:00 2008
#
# CONFIG_64BIT is not set
CONFIG_X86_32=y
@@ -202,7 +202,7 @@ CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
-CONFIG_M686=y
+# CONFIG_M686 is not set
# CONFIG_MPENTIUMII is not set
# CONFIG_MPENTIUMIII is not set
# CONFIG_MPENTIUMM is not set
@@ -221,14 +221,13 @@ CONFIG_M686=y
# CONFIG_MVIAC3_2 is not set
# CONFIG_MVIAC7 is not set
# CONFIG_MPSC is not set
-# CONFIG_MCORE2 is not set
+CONFIG_MCORE2=y
# CONFIG_GENERIC_CPU is not set
CONFIG_X86_GENERIC=y
CONFIG_X86_CPU=y
CONFIG_X86_CMPXCHG=y
CONFIG_X86_L1_CACHE_SHIFT=7
CONFIG_X86_XADD=y
-# CONFIG_X86_PPRO_FENCE is not set
CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
@@ -236,15 +235,14 @@ CONFIG_X86_POPAD_OK=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_X86_TSC=y
-CONFIG_X86_CMOV=y
CONFIG_X86_MINIMUM_CPU_FAMILY=4
CONFIG_X86_DEBUGCTLMSR=y
CONFIG_HPET_TIMER=y
CONFIG_HPET_EMULATE_RTC=y
CONFIG_DMI=y
# CONFIG_IOMMU_HELPER is not set
-CONFIG_NR_CPUS=64
-CONFIG_SCHED_SMT=y
+CONFIG_NR_CPUS=4
+# CONFIG_SCHED_SMT is not set
CONFIG_SCHED_MC=y
# CONFIG_PREEMPT_NONE is not set
CONFIG_PREEMPT_VOLUNTARY=y
@@ -256,8 +254,7 @@ CONFIG_VM86=y
# CONFIG_TOSHIBA is not set
# CONFIG_I8K is not set
CONFIG_X86_REBOOTFIXUPS=y
-CONFIG_MICROCODE=y
-CONFIG_MICROCODE_OLD_INTERFACE=y
+# CONFIG_MICROCODE is not set
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
# CONFIG_NOHIGHMEM is not set
@@ -2118,7 +2115,7 @@ CONFIG_IO_DELAY_0X80=y
CONFIG_DEFAULT_IO_DELAY_TYPE=0
CONFIG_DEBUG_BOOT_PARAMS=y
# CONFIG_CPA_DEBUG is not set
-CONFIG_OPTIMIZE_INLINING=y
+# CONFIG_OPTIMIZE_INLINING is not set
#
# Security options
diff --git a/trunk/arch/x86/configs/x86_64_defconfig b/trunk/arch/x86/configs/x86_64_defconfig
index e620ea6e2a7a..678c8acefe04 100644
--- a/trunk/arch/x86/configs/x86_64_defconfig
+++ b/trunk/arch/x86/configs/x86_64_defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.27-rc5
-# Wed Sep 3 17:13:39 2008
+# Linux kernel version: 2.6.27-rc4
+# Mon Aug 25 14:40:46 2008
#
CONFIG_64BIT=y
# CONFIG_X86_32 is not set
@@ -218,14 +218,17 @@ CONFIG_X86_PC=y
# CONFIG_MVIAC3_2 is not set
# CONFIG_MVIAC7 is not set
# CONFIG_MPSC is not set
-# CONFIG_MCORE2 is not set
-CONFIG_GENERIC_CPU=y
+CONFIG_MCORE2=y
+# CONFIG_GENERIC_CPU is not set
CONFIG_X86_CPU=y
-CONFIG_X86_L1_CACHE_BYTES=128
-CONFIG_X86_INTERNODE_CACHE_BYTES=128
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_L1_CACHE_SHIFT=6
CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INTEL_USERCOPY=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_P6_NOP=y
CONFIG_X86_TSC=y
CONFIG_X86_CMPXCHG64=y
CONFIG_X86_CMOV=y
@@ -240,8 +243,9 @@ CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y
CONFIG_AMD_IOMMU=y
CONFIG_SWIOTLB=y
CONFIG_IOMMU_HELPER=y
-CONFIG_NR_CPUS=64
-CONFIG_SCHED_SMT=y
+# CONFIG_MAXSMP is not set
+CONFIG_NR_CPUS=4
+# CONFIG_SCHED_SMT is not set
CONFIG_SCHED_MC=y
# CONFIG_PREEMPT_NONE is not set
CONFIG_PREEMPT_VOLUNTARY=y
@@ -250,8 +254,7 @@ CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
# CONFIG_X86_MCE is not set
# CONFIG_I8K is not set
-CONFIG_MICROCODE=y
-CONFIG_MICROCODE_OLD_INTERFACE=y
+# CONFIG_MICROCODE is not set
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
CONFIG_NUMA=y
@@ -287,7 +290,7 @@ CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
CONFIG_MTRR=y
# CONFIG_MTRR_SANITIZER is not set
-CONFIG_X86_PAT=y
+# CONFIG_X86_PAT is not set
CONFIG_EFI=y
CONFIG_SECCOMP=y
# CONFIG_HZ_100 is not set
@@ -2086,7 +2089,7 @@ CONFIG_IO_DELAY_0X80=y
CONFIG_DEFAULT_IO_DELAY_TYPE=0
CONFIG_DEBUG_BOOT_PARAMS=y
# CONFIG_CPA_DEBUG is not set
-CONFIG_OPTIMIZE_INLINING=y
+# CONFIG_OPTIMIZE_INLINING is not set
#
# Security options
diff --git a/trunk/arch/x86/crypto/Makefile b/trunk/arch/x86/crypto/Makefile
index 903de4aa5094..3874c2de5403 100644
--- a/trunk/arch/x86/crypto/Makefile
+++ b/trunk/arch/x86/crypto/Makefile
@@ -10,8 +10,6 @@ obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
-obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
-
aes-i586-y := aes-i586-asm_32.o aes_glue.o
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
diff --git a/trunk/arch/x86/crypto/crc32c-intel.c b/trunk/arch/x86/crypto/crc32c-intel.c
deleted file mode 100644
index 070afc5b6c94..000000000000
--- a/trunk/arch/x86/crypto/crc32c-intel.c
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
- * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
- * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
- * http://www.intel.com/products/processor/manuals/
- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
- * Volume 2A: Instruction Set Reference, A-M
- *
- * Copyright (c) 2008 Austin Zhang
- * Copyright (c) 2008 Kent Liu
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-#include
-#include
-#include
-#include
-#include
-
-#include
-
-#define CHKSUM_BLOCK_SIZE 1
-#define CHKSUM_DIGEST_SIZE 4
-
-#define SCALE_F sizeof(unsigned long)
-
-#ifdef CONFIG_X86_64
-#define REX_PRE "0x48, "
-#else
-#define REX_PRE
-#endif
-
-static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
-{
- while (length--) {
- __asm__ __volatile__(
- ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
- :"=S"(crc)
- :"0"(crc), "c"(*data)
- );
- data++;
- }
-
- return crc;
-}
-
-static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len)
-{
- unsigned int iquotient = len / SCALE_F;
- unsigned int iremainder = len % SCALE_F;
- unsigned long *ptmp = (unsigned long *)p;
-
- while (iquotient--) {
- __asm__ __volatile__(
- ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
- :"=S"(crc)
- :"0"(crc), "c"(*ptmp)
- );
- ptmp++;
- }
-
- if (iremainder)
- crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
- iremainder);
-
- return crc;
-}
-
-/*
- * Setting the seed allows arbitrary accumulators and flexible XOR policy
- * If your algorithm starts with ~0, then XOR with ~0 before you set
- * the seed.
- */
-static int crc32c_intel_setkey(struct crypto_ahash *hash, const u8 *key,
- unsigned int keylen)
-{
- u32 *mctx = crypto_ahash_ctx(hash);
-
- if (keylen != sizeof(u32)) {
- crypto_ahash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
- return -EINVAL;
- }
- *mctx = le32_to_cpup((__le32 *)key);
- return 0;
-}
-
-static int crc32c_intel_init(struct ahash_request *req)
-{
- u32 *mctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
- u32 *crcp = ahash_request_ctx(req);
-
- *crcp = *mctx;
-
- return 0;
-}
-
-static int crc32c_intel_update(struct ahash_request *req)
-{
- struct crypto_hash_walk walk;
- u32 *crcp = ahash_request_ctx(req);
- u32 crc = *crcp;
- int nbytes;
-
- for (nbytes = crypto_hash_walk_first(req, &walk); nbytes;
- nbytes = crypto_hash_walk_done(&walk, 0))
- crc = crc32c_intel_le_hw(crc, walk.data, nbytes);
-
- *crcp = crc;
- return 0;
-}
-
-static int crc32c_intel_final(struct ahash_request *req)
-{
- u32 *crcp = ahash_request_ctx(req);
-
- *(__le32 *)req->result = ~cpu_to_le32p(crcp);
- return 0;
-}
-
-static int crc32c_intel_digest(struct ahash_request *req)
-{
- struct crypto_hash_walk walk;
- u32 *mctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
- u32 crc = *mctx;
- int nbytes;
-
- for (nbytes = crypto_hash_walk_first(req, &walk); nbytes;
- nbytes = crypto_hash_walk_done(&walk, 0))
- crc = crc32c_intel_le_hw(crc, walk.data, nbytes);
-
- *(__le32 *)req->result = ~cpu_to_le32(crc);
- return 0;
-}
-
-static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
-{
- u32 *key = crypto_tfm_ctx(tfm);
-
- *key = ~0;
-
- tfm->crt_ahash.reqsize = sizeof(u32);
-
- return 0;
-}
-
-static struct crypto_alg alg = {
- .cra_name = "crc32c",
- .cra_driver_name = "crc32c-intel",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_AHASH,
- .cra_blocksize = CHKSUM_BLOCK_SIZE,
- .cra_alignmask = 3,
- .cra_ctxsize = sizeof(u32),
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(alg.cra_list),
- .cra_init = crc32c_intel_cra_init,
- .cra_type = &crypto_ahash_type,
- .cra_u = {
- .ahash = {
- .digestsize = CHKSUM_DIGEST_SIZE,
- .setkey = crc32c_intel_setkey,
- .init = crc32c_intel_init,
- .update = crc32c_intel_update,
- .final = crc32c_intel_final,
- .digest = crc32c_intel_digest,
- }
- }
-};
-
-
-static int __init crc32c_intel_mod_init(void)
-{
- if (cpu_has_xmm4_2)
- return crypto_register_alg(&alg);
- else
- return -ENODEV;
-}
-
-static void __exit crc32c_intel_mod_fini(void)
-{
- crypto_unregister_alg(&alg);
-}
-
-module_init(crc32c_intel_mod_init);
-module_exit(crc32c_intel_mod_fini);
-
-MODULE_AUTHOR("Austin Zhang , Kent Liu ");
-MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware.");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("crc32c");
-MODULE_ALIAS("crc32c-intel");
-
diff --git a/trunk/arch/x86/ia32/ia32_aout.c b/trunk/arch/x86/ia32/ia32_aout.c
index 127ec3f07214..a0e1dbe67dc1 100644
--- a/trunk/arch/x86/ia32/ia32_aout.c
+++ b/trunk/arch/x86/ia32/ia32_aout.c
@@ -85,10 +85,8 @@ static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
dump->regs.ax = regs->ax;
dump->regs.ds = current->thread.ds;
dump->regs.es = current->thread.es;
- savesegment(fs, fs);
- dump->regs.fs = fs;
- savesegment(gs, gs);
- dump->regs.gs = gs;
+ asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs;
+ asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs;
dump->regs.orig_ax = regs->orig_ax;
dump->regs.ip = regs->ip;
dump->regs.cs = regs->cs;
@@ -432,9 +430,8 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
current->mm->start_stack =
(unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
/* start thread */
- loadsegment(fs, 0);
- loadsegment(ds, __USER32_DS);
- loadsegment(es, __USER32_DS);
+ asm volatile("movl %0,%%fs" :: "r" (0)); \
+ asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS));
load_gs_index(0);
(regs)->ip = ex.a_entry;
(regs)->sp = current->mm->start_stack;
diff --git a/trunk/arch/x86/ia32/ia32_signal.c b/trunk/arch/x86/ia32/ia32_signal.c
index f1a2ac777faf..20af4c79579a 100644
--- a/trunk/arch/x86/ia32/ia32_signal.c
+++ b/trunk/arch/x86/ia32/ia32_signal.c
@@ -206,7 +206,7 @@ struct rt_sigframe
{ unsigned int cur; \
unsigned short pre; \
err |= __get_user(pre, &sc->seg); \
- savesegment(seg, cur); \
+ asm volatile("movl %%" #seg ",%0" : "=r" (cur)); \
pre |= mask; \
if (pre != cur) loadsegment(seg, pre); }
@@ -235,7 +235,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
*/
err |= __get_user(gs, &sc->gs);
gs |= 3;
- savesegment(gs, oldgs);
+ asm("movl %%gs,%0" : "=r" (oldgs));
if (gs != oldgs)
load_gs_index(gs);
@@ -355,13 +355,14 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
{
int tmp, err = 0;
- savesegment(gs, tmp);
+ tmp = 0;
+ __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
- savesegment(fs, tmp);
+ __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
- savesegment(ds, tmp);
+ __asm__("movl %%ds,%0" : "=r"(tmp): "0"(tmp));
err |= __put_user(tmp, (unsigned int __user *)&sc->ds);
- savesegment(es, tmp);
+ __asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp));
err |= __put_user(tmp, (unsigned int __user *)&sc->es);
err |= __put_user((u32)regs->di, &sc->di);
@@ -497,8 +498,8 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
regs->dx = 0;
regs->cx = 0;
- loadsegment(ds, __USER32_DS);
- loadsegment(es, __USER32_DS);
+ asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
+ asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
regs->cs = __USER32_CS;
regs->ss = __USER32_DS;
@@ -590,8 +591,8 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->dx = (unsigned long) &frame->info;
regs->cx = (unsigned long) &frame->uc;
- loadsegment(ds, __USER32_DS);
- loadsegment(es, __USER32_DS);
+ asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
+ asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
regs->cs = __USER32_CS;
regs->ss = __USER32_DS;
diff --git a/trunk/arch/x86/ia32/sys_ia32.c b/trunk/arch/x86/ia32/sys_ia32.c
index beda4232ce69..d3c64088b981 100644
--- a/trunk/arch/x86/ia32/sys_ia32.c
+++ b/trunk/arch/x86/ia32/sys_ia32.c
@@ -556,6 +556,15 @@ asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig,
return ret;
}
+/* These are here just in case some old ia32 binary calls it. */
+asmlinkage long sys32_pause(void)
+{
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ return -ERESTARTNOHAND;
+}
+
+
#ifdef CONFIG_SYSCTL_SYSCALL
struct sysctl_ia32 {
unsigned int name;
diff --git a/trunk/arch/x86/kernel/acpi/boot.c b/trunk/arch/x86/kernel/acpi/boot.c
index 7d40ef7b36e3..c102af85df9c 100644
--- a/trunk/arch/x86/kernel/acpi/boot.c
+++ b/trunk/arch/x86/kernel/acpi/boot.c
@@ -58,6 +58,7 @@ EXPORT_SYMBOL(acpi_disabled);
#ifdef CONFIG_X86_64
#include
+#include
#else /* X86 */
@@ -96,6 +97,8 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
#warning ACPI uses CMPXCHG, i486 and later hardware
#endif
+static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
+
/* --------------------------------------------------------------------------
Boot-time Configuration
-------------------------------------------------------------------------- */
@@ -157,8 +160,6 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
struct acpi_mcfg_allocation *pci_mmcfg_config;
int pci_mmcfg_config_num;
-static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
-
static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
{
if (!strcmp(mcfg->header.oem_id, "SGI"))
diff --git a/trunk/arch/x86/kernel/alternative.c b/trunk/arch/x86/kernel/alternative.c
index fb04e49776ba..65a0c1b48696 100644
--- a/trunk/arch/x86/kernel/alternative.c
+++ b/trunk/arch/x86/kernel/alternative.c
@@ -231,25 +231,25 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
continue;
if (*ptr > text_end)
continue;
- /* turn DS segment override prefix into lock prefix */
- text_poke(*ptr, ((unsigned char []){0xf0}), 1);
+ text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
};
}
static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
{
u8 **ptr;
+ char insn[1];
if (noreplace_smp)
return;
+ add_nops(insn, 1);
for (ptr = start; ptr < end; ptr++) {
if (*ptr < text)
continue;
if (*ptr > text_end)
continue;
- /* turn lock prefix into DS segment override prefix */
- text_poke(*ptr, ((unsigned char []){0x3E}), 1);
+ text_poke(*ptr, insn, 1);
};
}
diff --git a/trunk/arch/x86/kernel/aperture_64.c b/trunk/arch/x86/kernel/aperture_64.c
index 9a32b37ee2ee..44e21826db11 100644
--- a/trunk/arch/x86/kernel/aperture_64.c
+++ b/trunk/arch/x86/kernel/aperture_64.c
@@ -455,11 +455,11 @@ void __init gart_iommu_hole_init(void)
force_iommu ||
valid_agp ||
fallback_aper_force) {
- printk(KERN_INFO
+ printk(KERN_ERR
"Your BIOS doesn't leave a aperture memory hole\n");
- printk(KERN_INFO
+ printk(KERN_ERR
"Please enable the IOMMU option in the BIOS setup\n");
- printk(KERN_INFO
+ printk(KERN_ERR
"This costs you %d MB of RAM\n",
32 << fallback_aper_order);
diff --git a/trunk/arch/x86/kernel/apm_32.c b/trunk/arch/x86/kernel/apm_32.c
index 5145a6e72bbb..732d1f4e10ee 100644
--- a/trunk/arch/x86/kernel/apm_32.c
+++ b/trunk/arch/x86/kernel/apm_32.c
@@ -228,6 +228,7 @@
#include
#include
#include
+#include
#include
#include
diff --git a/trunk/arch/x86/kernel/asm-offsets_64.c b/trunk/arch/x86/kernel/asm-offsets_64.c
index 505543a75a56..aa89387006fe 100644
--- a/trunk/arch/x86/kernel/asm-offsets_64.c
+++ b/trunk/arch/x86/kernel/asm-offsets_64.c
@@ -22,7 +22,7 @@
#define __NO_STUBS 1
#undef __SYSCALL
-#undef ASM_X86__UNISTD_64_H
+#undef _ASM_X86_64_UNISTD_H_
#define __SYSCALL(nr, sym) [nr] = 1,
static char syscalls[] = {
#include
diff --git a/trunk/arch/x86/kernel/bios_uv.c b/trunk/arch/x86/kernel/bios_uv.c
index fdd585f9c53d..c639bd55391c 100644
--- a/trunk/arch/x86/kernel/bios_uv.c
+++ b/trunk/arch/x86/kernel/bios_uv.c
@@ -25,11 +25,11 @@ x86_bios_strerror(long status)
{
const char *str;
switch (status) {
- case 0: str = "Call completed without error"; break;
- case -1: str = "Not implemented"; break;
- case -2: str = "Invalid argument"; break;
- case -3: str = "Call completed with error"; break;
- default: str = "Unknown BIOS status code"; break;
+ case 0: str = "Call completed without error"; break;
+ case -1: str = "Not implemented"; break;
+ case -2: str = "Invalid argument"; break;
+ case -3: str = "Call completed with error"; break;
+ default: str = "Unknown BIOS status code"; break;
}
return str;
}
diff --git a/trunk/arch/x86/kernel/cpu/common_64.c b/trunk/arch/x86/kernel/cpu/common_64.c
index 305b465889b0..a11f5d4477cd 100644
--- a/trunk/arch/x86/kernel/cpu/common_64.c
+++ b/trunk/arch/x86/kernel/cpu/common_64.c
@@ -430,49 +430,6 @@ static __init int setup_noclflush(char *arg)
}
__setup("noclflush", setup_noclflush);
-struct msr_range {
- unsigned min;
- unsigned max;
-};
-
-static struct msr_range msr_range_array[] __cpuinitdata = {
- { 0x00000000, 0x00000418},
- { 0xc0000000, 0xc000040b},
- { 0xc0010000, 0xc0010142},
- { 0xc0011000, 0xc001103b},
-};
-
-static void __cpuinit print_cpu_msr(void)
-{
- unsigned index;
- u64 val;
- int i;
- unsigned index_min, index_max;
-
- for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
- index_min = msr_range_array[i].min;
- index_max = msr_range_array[i].max;
- for (index = index_min; index < index_max; index++) {
- if (rdmsrl_amd_safe(index, &val))
- continue;
- printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
- }
- }
-}
-
-static int show_msr __cpuinitdata;
-static __init int setup_show_msr(char *arg)
-{
- int num;
-
- get_option(&arg, &num);
-
- if (num > 0)
- show_msr = num;
- return 1;
-}
-__setup("show_msr=", setup_show_msr);
-
void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
{
if (c->x86_model_id[0])
@@ -482,14 +439,6 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
printk(KERN_CONT " stepping %02x\n", c->x86_mask);
else
printk(KERN_CONT "\n");
-
-#ifdef CONFIG_SMP
- if (c->cpu_index < show_msr)
- print_cpu_msr();
-#else
- if (show_msr)
- print_cpu_msr();
-#endif
}
static __init int setup_disablecpuid(char *arg)
diff --git a/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index c24c4a487b7c..dd097b835839 100644
--- a/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -256,8 +256,7 @@ static u32 get_cur_val(const cpumask_t *mask)
* Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
* no meaning should be associated with absolute values of these MSRs.
*/
-static unsigned int get_measured_perf(struct cpufreq_policy *policy,
- unsigned int cpu)
+static unsigned int get_measured_perf(unsigned int cpu)
{
union {
struct {
@@ -327,7 +326,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
#endif
- retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100;
+ retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100;
put_cpu();
set_cpus_allowed_ptr(current, &saved_mask);
@@ -786,11 +785,7 @@ static int __init acpi_cpufreq_init(void)
if (ret)
return ret;
- ret = cpufreq_register_driver(&acpi_cpufreq_driver);
- if (ret)
- free_percpu(acpi_perf_data);
-
- return ret;
+ return cpufreq_register_driver(&acpi_cpufreq_driver);
}
static void __exit acpi_cpufreq_exit(void)
@@ -800,6 +795,8 @@ static void __exit acpi_cpufreq_exit(void)
cpufreq_unregister_driver(&acpi_cpufreq_driver);
free_percpu(acpi_perf_data);
+
+ return;
}
module_param(acpi_pstate_strict, uint, 0644);
diff --git a/trunk/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/trunk/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index fe613c93b366..e4a4bf870e94 100644
--- a/trunk/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/trunk/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -25,8 +25,8 @@
#include
#include
-#include
-#include
+#include
+#include
#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */
#define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */
@@ -82,7 +82,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
u8 clockspeed_reg; /* Clock Speed Register */
local_irq_disable();
- outb_p(0x80, REG_CSCIR);
+ outb_p(0x80,REG_CSCIR);
clockspeed_reg = inb_p(REG_CSCDR);
local_irq_enable();
@@ -98,10 +98,10 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
}
/* 33 MHz is not 32 MHz... */
- if ((clockspeed_reg & 0xE0) == 0xA0)
+ if ((clockspeed_reg & 0xE0)==0xA0)
return 33000;
- return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000;
+ return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000);
}
@@ -117,7 +117,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
* There is no return value.
*/
-static void elanfreq_set_cpu_state(unsigned int state)
+static void elanfreq_set_cpu_state (unsigned int state)
{
struct cpufreq_freqs freqs;
@@ -144,20 +144,20 @@ static void elanfreq_set_cpu_state(unsigned int state)
*/
local_irq_disable();
- outb_p(0x40, REG_CSCIR); /* Disable hyperspeed mode */
- outb_p(0x00, REG_CSCDR);
+ outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */
+ outb_p(0x00,REG_CSCDR);
local_irq_enable(); /* wait till internal pipelines and */
udelay(1000); /* buffers have cleaned up */
local_irq_disable();
/* now, set the CPU clock speed register (0x80) */
- outb_p(0x80, REG_CSCIR);
- outb_p(elan_multiplier[state].val80h, REG_CSCDR);
+ outb_p(0x80,REG_CSCIR);
+ outb_p(elan_multiplier[state].val80h,REG_CSCDR);
/* now, the hyperspeed bit in PMU Force Mode Register (0x40) */
- outb_p(0x40, REG_CSCIR);
- outb_p(elan_multiplier[state].val40h, REG_CSCDR);
+ outb_p(0x40,REG_CSCIR);
+ outb_p(elan_multiplier[state].val40h,REG_CSCDR);
udelay(10000);
local_irq_enable();
@@ -173,12 +173,12 @@ static void elanfreq_set_cpu_state(unsigned int state)
* for the hardware supported by the driver.
*/
-static int elanfreq_verify(struct cpufreq_policy *policy)
+static int elanfreq_verify (struct cpufreq_policy *policy)
{
return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]);
}
-static int elanfreq_target(struct cpufreq_policy *policy,
+static int elanfreq_target (struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
@@ -205,7 +205,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy)
/* capability check */
if ((c->x86_vendor != X86_VENDOR_AMD) ||
- (c->x86 != 4) || (c->x86_model != 10))
+ (c->x86 != 4) || (c->x86_model!=10))
return -ENODEV;
/* max freq */
@@ -213,7 +213,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy)
max_freq = elanfreq_get_cpu_frequency(0);
/* table init */
- for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+ for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
if (elanfreq_table[i].frequency > max_freq)
elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID;
}
@@ -224,7 +224,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy)
result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table);
if (result)
- return result;
+ return (result);
cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu);
return 0;
@@ -260,7 +260,7 @@ __setup("elanfreq=", elanfreq_setup);
#endif
-static struct freq_attr *elanfreq_attr[] = {
+static struct freq_attr* elanfreq_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
NULL,
};
@@ -284,9 +284,9 @@ static int __init elanfreq_init(void)
/* Test if we have the right hardware */
if ((c->x86_vendor != X86_VENDOR_AMD) ||
- (c->x86 != 4) || (c->x86_model != 10)) {
+ (c->x86 != 4) || (c->x86_model!=10)) {
printk(KERN_INFO "elanfreq: error: no Elan processor found!\n");
- return -ENODEV;
+ return -ENODEV;
}
return cpufreq_register_driver(&elanfreq_driver);
}
@@ -298,7 +298,7 @@ static void __exit elanfreq_exit(void)
}
-module_param(max_freq, int, 0444);
+module_param (max_freq, int, 0444);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Robert Schwebel , Sven Geggus ");
diff --git a/trunk/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/trunk/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index b5ced806a316..eb9b62b0830c 100644
--- a/trunk/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/trunk/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -15,11 +15,12 @@
#include
#include
-#include
-#include
+#include
+#include
-#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long
- as it is unused */
+
+#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long
+ as it is unused */
static unsigned int busfreq; /* FSB, in 10 kHz */
static unsigned int max_multiplier;
@@ -52,7 +53,7 @@ static int powernow_k6_get_cpu_multiplier(void)
msrval = POWERNOW_IOPORT + 0x1;
wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
- invalue = inl(POWERNOW_IOPORT + 0x8);
+ invalue=inl(POWERNOW_IOPORT + 0x8);
msrval = POWERNOW_IOPORT + 0x0;
wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
@@ -66,9 +67,9 @@ static int powernow_k6_get_cpu_multiplier(void)
*
* Tries to change the PowerNow! multiplier
*/
-static void powernow_k6_set_state(unsigned int best_i)
+static void powernow_k6_set_state (unsigned int best_i)
{
- unsigned long outvalue = 0, invalue = 0;
+ unsigned long outvalue=0, invalue=0;
unsigned long msrval;
struct cpufreq_freqs freqs;
@@ -89,10 +90,10 @@ static void powernow_k6_set_state(unsigned int best_i)
msrval = POWERNOW_IOPORT + 0x1;
wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
- invalue = inl(POWERNOW_IOPORT + 0x8);
+ invalue=inl(POWERNOW_IOPORT + 0x8);
invalue = invalue & 0xf;
outvalue = outvalue | invalue;
- outl(outvalue , (POWERNOW_IOPORT + 0x8));
+ outl(outvalue ,(POWERNOW_IOPORT + 0x8));
msrval = POWERNOW_IOPORT + 0x0;
wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
@@ -123,7 +124,7 @@ static int powernow_k6_verify(struct cpufreq_policy *policy)
*
* sets a new CPUFreq policy
*/
-static int powernow_k6_target(struct cpufreq_policy *policy,
+static int powernow_k6_target (struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
@@ -151,7 +152,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
busfreq = cpu_khz / max_multiplier;
/* table init */
- for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
+ for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
if (clock_ratio[i].index > max_multiplier)
clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
else
@@ -164,7 +165,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
if (result)
- return result;
+ return (result);
cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu);
@@ -175,8 +176,8 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
{
unsigned int i;
- for (i = 0; i < 8; i++) {
- if (i == max_multiplier)
+ for (i=0; i<8; i++) {
+ if (i==max_multiplier)
powernow_k6_set_state(i);
}
cpufreq_frequency_table_put_attr(policy->cpu);
@@ -188,7 +189,7 @@ static unsigned int powernow_k6_get(unsigned int cpu)
return busfreq * powernow_k6_get_cpu_multiplier();
}
-static struct freq_attr *powernow_k6_attr[] = {
+static struct freq_attr* powernow_k6_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
NULL,
};
@@ -226,7 +227,7 @@ static int __init powernow_k6_init(void)
}
if (cpufreq_register_driver(&powernow_k6_driver)) {
- release_region(POWERNOW_IOPORT, 16);
+ release_region (POWERNOW_IOPORT, 16);
return -EINVAL;
}
@@ -242,13 +243,13 @@ static int __init powernow_k6_init(void)
static void __exit powernow_k6_exit(void)
{
cpufreq_unregister_driver(&powernow_k6_driver);
- release_region(POWERNOW_IOPORT, 16);
+ release_region (POWERNOW_IOPORT, 16);
}
-MODULE_AUTHOR("Arjan van de Ven , Dave Jones , Dominik Brodowski ");
-MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
-MODULE_LICENSE("GPL");
+MODULE_AUTHOR ("Arjan van de Ven , Dave Jones , Dominik Brodowski ");
+MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
+MODULE_LICENSE ("GPL");
module_init(powernow_k6_init);
module_exit(powernow_k6_exit);
diff --git a/trunk/arch/x86/kernel/cpu/intel.c b/trunk/arch/x86/kernel/cpu/intel.c
index f113ef4595f6..b75f2569b8f8 100644
--- a/trunk/arch/x86/kernel/cpu/intel.c
+++ b/trunk/arch/x86/kernel/cpu/intel.c
@@ -222,11 +222,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_BTS);
if (!(l1 & (1<<12)))
set_cpu_cap(c, X86_FEATURE_PEBS);
- ds_init_intel(c);
}
if (cpu_has_bts)
- ptrace_bts_init_intel(c);
+ ds_init_intel(c);
/*
* See if we have a good local APIC by checking for buggy Pentia,
diff --git a/trunk/arch/x86/kernel/cpu/mtrr/generic.c b/trunk/arch/x86/kernel/cpu/mtrr/generic.c
index 4e8d77f01eeb..cb7d3b6a80eb 100644
--- a/trunk/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/trunk/arch/x86/kernel/cpu/mtrr/generic.c
@@ -401,7 +401,12 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
tmp |= ~((1<<(hi - 1)) - 1);
if (tmp != mask_lo) {
- WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n");
+ static int once = 1;
+
+ if (once) {
+ printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n");
+ once = 0;
+ }
mask_lo = tmp;
}
}
diff --git a/trunk/arch/x86/kernel/cpu/mtrr/if.c b/trunk/arch/x86/kernel/cpu/mtrr/if.c
index 4c4214690dd1..84c480bb3715 100644
--- a/trunk/arch/x86/kernel/cpu/mtrr/if.c
+++ b/trunk/arch/x86/kernel/cpu/mtrr/if.c
@@ -405,9 +405,9 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
}
/* RED-PEN: base can be > 32bit */
len += seq_printf(seq,
- "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n",
+ "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
i, base, base >> (20 - PAGE_SHIFT), size, factor,
- mtrr_usage_table[i], mtrr_attrib_to_str(type));
+ mtrr_attrib_to_str(type), mtrr_usage_table[i]);
}
}
return 0;
diff --git a/trunk/arch/x86/kernel/cpu/mtrr/main.c b/trunk/arch/x86/kernel/cpu/mtrr/main.c
index c78c04821ea1..885c8265e6b5 100644
--- a/trunk/arch/x86/kernel/cpu/mtrr/main.c
+++ b/trunk/arch/x86/kernel/cpu/mtrr/main.c
@@ -729,7 +729,7 @@ struct var_mtrr_range_state {
mtrr_type type;
};
-static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
static int __initdata debug_print;
static int __init
@@ -759,8 +759,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
/* take out UC ranges */
for (i = 0; i < num_var_ranges; i++) {
type = range_state[i].type;
- if (type != MTRR_TYPE_UNCACHABLE &&
- type != MTRR_TYPE_WRPROT)
+ if (type != MTRR_TYPE_UNCACHABLE)
continue;
size = range_state[i].size_pfn;
if (!size)
@@ -837,13 +836,6 @@ static int __init enable_mtrr_cleanup_setup(char *str)
}
early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
-static int __init mtrr_cleanup_debug_setup(char *str)
-{
- debug_print = 1;
- return 0;
-}
-early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
-
struct var_mtrr_state {
unsigned long range_startk;
unsigned long range_sizek;
@@ -906,27 +898,6 @@ set_var_mtrr_all(unsigned int address_bits)
}
}
-static unsigned long to_size_factor(unsigned long sizek, char *factorp)
-{
- char factor;
- unsigned long base = sizek;
-
- if (base & ((1<<10) - 1)) {
- /* not MB alignment */
- factor = 'K';
- } else if (base & ((1<<20) - 1)){
- factor = 'M';
- base >>= 10;
- } else {
- factor = 'G';
- base >>= 20;
- }
-
- *factorp = factor;
-
- return base;
-}
-
static unsigned int __init
range_to_mtrr(unsigned int reg, unsigned long range_startk,
unsigned long range_sizek, unsigned char type)
@@ -948,21 +919,13 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
align = max_align;
sizek = 1 << align;
- if (debug_print) {
- char start_factor = 'K', size_factor = 'K';
- unsigned long start_base, size_base;
-
- start_base = to_size_factor(range_startk, &start_factor),
- size_base = to_size_factor(sizek, &size_factor),
-
+ if (debug_print)
printk(KERN_DEBUG "Setting variable MTRR %d, "
- "base: %ld%cB, range: %ld%cB, type %s\n",
- reg, start_base, start_factor,
- size_base, size_factor,
+ "base: %ldMB, range: %ldMB, type %s\n",
+ reg, range_startk >> 10, sizek >> 10,
(type == MTRR_TYPE_UNCACHABLE)?"UC":
((type == MTRR_TYPE_WRBACK)?"WB":"Other")
);
- }
save_var_mtrr(reg++, range_startk, sizek, type);
range_startk += sizek;
range_sizek -= sizek;
@@ -1007,8 +970,6 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
/* try to append some small hole */
range0_basek = state->range_startk;
range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
-
- /* no increase */
if (range0_sizek == state->range_sizek) {
if (debug_print)
printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
@@ -1019,40 +980,13 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
return 0;
}
- /* only cut back, when it is not the last */
- if (sizek) {
- while (range0_basek + range0_sizek > (basek + sizek)) {
- if (range0_sizek >= chunk_sizek)
- range0_sizek -= chunk_sizek;
- else
- range0_sizek = 0;
-
- if (!range0_sizek)
- break;
- }
- }
-
-second_try:
- range_basek = range0_basek + range0_sizek;
-
- /* one hole in the middle */
- if (range_basek > basek && range_basek <= (basek + sizek))
- second_sizek = range_basek - basek;
-
- if (range0_sizek > state->range_sizek) {
-
- /* one hole in middle or at end */
- hole_sizek = range0_sizek - state->range_sizek - second_sizek;
-
- /* hole size should be less than half of range0 size */
- if (hole_sizek >= (range0_sizek >> 1) &&
- range0_sizek >= chunk_sizek) {
- range0_sizek -= chunk_sizek;
- second_sizek = 0;
- hole_sizek = 0;
-
- goto second_try;
- }
+ range0_sizek -= chunk_sizek;
+ if (range0_sizek && sizek) {
+ while (range0_basek + range0_sizek > (basek + sizek)) {
+ range0_sizek -= chunk_sizek;
+ if (!range0_sizek)
+ break;
+ }
}
if (range0_sizek) {
@@ -1062,28 +996,50 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
(range0_basek + range0_sizek)<<10);
state->reg = range_to_mtrr(state->reg, range0_basek,
range0_sizek, MTRR_TYPE_WRBACK);
+
}
- if (range0_sizek < state->range_sizek) {
- /* need to handle left over */
- range_sizek = state->range_sizek - range0_sizek;
+ range_basek = range0_basek + range0_sizek;
+ range_sizek = chunk_sizek;
- if (debug_print)
- printk(KERN_DEBUG "range: %016lx - %016lx\n",
- range_basek<<10,
- (range_basek + range_sizek)<<10);
- state->reg = range_to_mtrr(state->reg, range_basek,
- range_sizek, MTRR_TYPE_WRBACK);
+ if (range_basek + range_sizek > basek &&
+ range_basek + range_sizek <= (basek + sizek)) {
+ /* one hole */
+ second_basek = basek;
+ second_sizek = range_basek + range_sizek - basek;
+ }
+
+ /* if last piece, only could one hole near end */
+ if ((second_basek || !basek) &&
+ range_sizek - (state->range_sizek - range0_sizek) - second_sizek <
+ (chunk_sizek >> 1)) {
+ /*
+ * one hole in middle (second_sizek is 0) or at end
+ * (second_sizek is 0 )
+ */
+ hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
+ - second_sizek;
+ hole_basek = range_basek + range_sizek - hole_sizek
+ - second_sizek;
+ } else {
+ /* fallback for big hole, or several holes */
+ range_sizek = state->range_sizek - range0_sizek;
+ second_basek = 0;
+ second_sizek = 0;
}
+ if (debug_print)
+ printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
+ (range_basek + range_sizek)<<10);
+ state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
+ MTRR_TYPE_WRBACK);
if (hole_sizek) {
- hole_basek = range_basek - hole_sizek - second_sizek;
if (debug_print)
printk(KERN_DEBUG "hole: %016lx - %016lx\n",
- hole_basek<<10,
- (hole_basek + hole_sizek)<<10);
- state->reg = range_to_mtrr(state->reg, hole_basek,
- hole_sizek, MTRR_TYPE_UNCACHABLE);
+ hole_basek<<10, (hole_basek + hole_sizek)<<10);
+ state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
+ MTRR_TYPE_UNCACHABLE);
+
}
return second_sizek;
@@ -1198,11 +1154,11 @@ struct mtrr_cleanup_result {
};
/*
- * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
- * chunk size: gran_size, ..., 2G
- * so we need (1+16)*8
+ * gran_size: 1M, 2M, ..., 2G
+ * chunk size: gran_size, ..., 4G
+ * so we need (2+13)*6
*/
-#define NUM_RESULT 136
+#define NUM_RESULT 90
#define PSHIFT (PAGE_SHIFT - 10)
static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
@@ -1212,14 +1168,13 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM];
static int __init mtrr_cleanup(unsigned address_bits)
{
unsigned long extra_remove_base, extra_remove_size;
- unsigned long base, size, def, dummy;
+ unsigned long i, base, size, def, dummy;
mtrr_type type;
int nr_range, nr_range_new;
u64 chunk_size, gran_size;
unsigned long range_sums, range_sums_new;
int index_good;
int num_reg_good;
- int i;
/* extra one for all 0 */
int num[MTRR_NUM_TYPES + 1];
@@ -1249,8 +1204,6 @@ static int __init mtrr_cleanup(unsigned address_bits)
continue;
if (!size)
type = MTRR_NUM_TYPES;
- if (type == MTRR_TYPE_WRPROT)
- type = MTRR_TYPE_UNCACHABLE;
num[type]++;
}
@@ -1263,57 +1216,23 @@ static int __init mtrr_cleanup(unsigned address_bits)
num_var_ranges - num[MTRR_NUM_TYPES])
return 0;
- /* print original var MTRRs at first, for debugging: */
- printk(KERN_DEBUG "original variable MTRRs\n");
- for (i = 0; i < num_var_ranges; i++) {
- char start_factor = 'K', size_factor = 'K';
- unsigned long start_base, size_base;
-
- size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
- if (!size_base)
- continue;
-
- size_base = to_size_factor(size_base, &size_factor),
- start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
- start_base = to_size_factor(start_base, &start_factor),
- type = range_state[i].type;
-
- printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
- i, start_base, start_factor,
- size_base, size_factor,
- (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
- ((type == MTRR_TYPE_WRPROT) ? "WP" :
- ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
- );
- }
-
memset(range, 0, sizeof(range));
extra_remove_size = 0;
- extra_remove_base = 1 << (32 - PAGE_SHIFT);
- if (mtrr_tom2)
+ if (mtrr_tom2) {
+ extra_remove_base = 1 << (32 - PAGE_SHIFT);
extra_remove_size =
(mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
+ }
nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
extra_remove_size);
- /*
- * [0, 1M) should always be coverred by var mtrr with WB
- * and fixed mtrrs should take effective before var mtrr for it
- */
- nr_range = add_range_with_merge(range, nr_range, 0,
- (1ULL<<(20 - PAGE_SHIFT)) - 1);
- /* sort the ranges */
- sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
-
range_sums = sum_ranges(range, nr_range);
printk(KERN_INFO "total RAM coverred: %ldM\n",
range_sums >> (20 - PAGE_SHIFT));
if (mtrr_chunk_size && mtrr_gran_size) {
int num_reg;
- char gran_factor, chunk_factor, lose_factor;
- unsigned long gran_base, chunk_base, lose_base;
- debug_print++;
+ debug_print = 1;
/* convert ranges to var ranges state */
num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
mtrr_gran_size);
@@ -1337,48 +1256,34 @@ static int __init mtrr_cleanup(unsigned address_bits)
result[i].lose_cover_sizek =
(range_sums - range_sums_new) << PSHIFT;
- gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
- chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
- lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
- printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
- result[i].bad?"*BAD*":" ",
- gran_base, gran_factor, chunk_base, chunk_factor);
- printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
+ printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
+ result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10,
+ result[i].chunk_sizek >> 10);
+ printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n",
result[i].num_reg, result[i].bad?"-":"",
- lose_base, lose_factor);
+ result[i].lose_cover_sizek >> 10);
if (!result[i].bad) {
set_var_mtrr_all(address_bits);
return 1;
}
printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
"will find optimal one\n");
- debug_print--;
+ debug_print = 0;
memset(result, 0, sizeof(result[0]));
}
i = 0;
memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
memset(result, 0, sizeof(result));
- for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
- char gran_factor;
- unsigned long gran_base;
-
- if (debug_print)
- gran_base = to_size_factor(gran_size >> 10, &gran_factor);
-
- for (chunk_size = gran_size; chunk_size < (1ULL<<32);
+ for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
+ for (chunk_size = gran_size; chunk_size < (1ULL<<33);
chunk_size <<= 1) {
int num_reg;
- if (debug_print) {
- char chunk_factor;
- unsigned long chunk_base;
-
- chunk_base = to_size_factor(chunk_size>>10, &chunk_factor),
- printk(KERN_INFO "\n");
- printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n",
- gran_base, gran_factor, chunk_base, chunk_factor);
- }
+ if (debug_print)
+ printk(KERN_INFO
+ "\ngran_size: %lldM chunk_size_size: %lldM\n",
+ gran_size >> 20, chunk_size >> 20);
if (i >= NUM_RESULT)
continue;
@@ -1421,18 +1326,12 @@ static int __init mtrr_cleanup(unsigned address_bits)
/* print out all */
for (i = 0; i < NUM_RESULT; i++) {
- char gran_factor, chunk_factor, lose_factor;
- unsigned long gran_base, chunk_base, lose_base;
-
- gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
- chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
- lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
- printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
- result[i].bad?"*BAD*":" ",
- gran_base, gran_factor, chunk_base, chunk_factor);
- printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
- result[i].num_reg, result[i].bad?"-":"",
- lose_base, lose_factor);
+ printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
+ result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
+ result[i].chunk_sizek >> 10);
+ printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n",
+ result[i].num_reg, result[i].bad?"-":"",
+ result[i].lose_cover_sizek >> 10);
}
/* try to find the optimal index */
@@ -1440,8 +1339,10 @@ static int __init mtrr_cleanup(unsigned address_bits)
nr_mtrr_spare_reg = num_var_ranges - 1;
num_reg_good = -1;
for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
- if (!min_loss_pfn[i])
+ if (!min_loss_pfn[i]) {
num_reg_good = i;
+ break;
+ }
}
index_good = -1;
@@ -1457,26 +1358,21 @@ static int __init mtrr_cleanup(unsigned address_bits)
}
if (index_good != -1) {
- char gran_factor, chunk_factor, lose_factor;
- unsigned long gran_base, chunk_base, lose_base;
-
printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
i = index_good;
- gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
- chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
- lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
- printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t",
- gran_base, gran_factor, chunk_base, chunk_factor);
- printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n",
- result[i].num_reg, lose_base, lose_factor);
+ printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t",
+ result[i].gran_sizek >> 10,
+ result[i].chunk_sizek >> 10);
+ printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n",
+ result[i].num_reg,
+ result[i].lose_cover_sizek >> 10);
/* convert ranges to var ranges state */
chunk_size = result[i].chunk_sizek;
chunk_size <<= 10;
gran_size = result[i].gran_sizek;
gran_size <<= 10;
- debug_print++;
+ debug_print = 1;
x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
- debug_print--;
set_var_mtrr_all(address_bits);
return 1;
}
diff --git a/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c b/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c
index 6bff382094f5..05cc22dbd4ff 100644
--- a/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -295,19 +295,13 @@ static int setup_k7_watchdog(unsigned nmi_hz)
/* setup the timer */
wrmsr(evntsel_msr, evntsel, 0);
write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
-
- /* initialize the wd struct before enabling */
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = 0; /* unused */
-
- /* ok, everything is initialized, announce that we're set */
- cpu_nmi_set_wd_enabled();
-
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= K7_EVNTSEL_ENABLE;
wrmsr(evntsel_msr, evntsel, 0);
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = 0; /* unused */
return 1;
}
@@ -385,19 +379,13 @@ static int setup_p6_watchdog(unsigned nmi_hz)
wrmsr(evntsel_msr, evntsel, 0);
nmi_hz = adjust_for_32bit_ctr(nmi_hz);
write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
-
- /* initialize the wd struct before enabling */
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = 0; /* unused */
-
- /* ok, everything is initialized, announce that we're set */
- cpu_nmi_set_wd_enabled();
-
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= P6_EVNTSEL0_ENABLE;
wrmsr(evntsel_msr, evntsel, 0);
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = 0; /* unused */
return 1;
}
@@ -444,27 +432,6 @@ static const struct wd_ops p6_wd_ops = {
#define P4_CCCR_ENABLE (1 << 12)
#define P4_CCCR_OVF (1 << 31)
-#define P4_CONTROLS 18
-static unsigned int p4_controls[18] = {
- MSR_P4_BPU_CCCR0,
- MSR_P4_BPU_CCCR1,
- MSR_P4_BPU_CCCR2,
- MSR_P4_BPU_CCCR3,
- MSR_P4_MS_CCCR0,
- MSR_P4_MS_CCCR1,
- MSR_P4_MS_CCCR2,
- MSR_P4_MS_CCCR3,
- MSR_P4_FLAME_CCCR0,
- MSR_P4_FLAME_CCCR1,
- MSR_P4_FLAME_CCCR2,
- MSR_P4_FLAME_CCCR3,
- MSR_P4_IQ_CCCR0,
- MSR_P4_IQ_CCCR1,
- MSR_P4_IQ_CCCR2,
- MSR_P4_IQ_CCCR3,
- MSR_P4_IQ_CCCR4,
- MSR_P4_IQ_CCCR5,
-};
/*
* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
* CRU_ESCR0 (with any non-null event selector) through a complemented
@@ -506,26 +473,6 @@ static int setup_p4_watchdog(unsigned nmi_hz)
evntsel_msr = MSR_P4_CRU_ESCR0;
cccr_msr = MSR_P4_IQ_CCCR0;
cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
-
- /*
- * If we're on the kdump kernel or other situation, we may
- * still have other performance counter registers set to
- * interrupt and they'll keep interrupting forever because
- * of the P4_CCCR_OVF quirk. So we need to ACK all the
- * pending interrupts and disable all the registers here,
- * before reenabling the NMI delivery. Refer to p4_rearm()
- * about the P4_CCCR_OVF quirk.
- */
- if (reset_devices) {
- unsigned int low, high;
- int i;
-
- for (i = 0; i < P4_CONTROLS; i++) {
- rdmsr(p4_controls[i], low, high);
- low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
- wrmsr(p4_controls[i], low, high);
- }
- }
} else {
/* logical cpu 1 */
perfctr_msr = MSR_P4_IQ_PERFCTR1;
@@ -552,17 +499,12 @@ static int setup_p4_watchdog(unsigned nmi_hz)
wrmsr(evntsel_msr, evntsel, 0);
wrmsr(cccr_msr, cccr_val, 0);
write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
-
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = cccr_msr;
-
- /* ok, everything is initialized, announce that we're set */
- cpu_nmi_set_wd_enabled();
-
apic_write(APIC_LVTPC, APIC_DM_NMI);
cccr_val |= P4_CCCR_ENABLE;
wrmsr(cccr_msr, cccr_val, 0);
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = cccr_msr;
return 1;
}
@@ -678,17 +620,13 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
wrmsr(evntsel_msr, evntsel, 0);
nmi_hz = adjust_for_32bit_ctr(nmi_hz);
write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsr(evntsel_msr, evntsel, 0);
wd->perfctr_msr = perfctr_msr;
wd->evntsel_msr = evntsel_msr;
wd->cccr_msr = 0; /* unused */
-
- /* ok, everything is initialized, announce that we're set */
- cpu_nmi_set_wd_enabled();
-
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
- wrmsr(evntsel_msr, evntsel, 0);
intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
return 1;
}
diff --git a/trunk/arch/x86/kernel/cpuid.c b/trunk/arch/x86/kernel/cpuid.c
index 6a44d6465991..8e9cd6a8ec12 100644
--- a/trunk/arch/x86/kernel/cpuid.c
+++ b/trunk/arch/x86/kernel/cpuid.c
@@ -36,6 +36,7 @@
#include
#include
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/x86/kernel/crash_dump_64.c b/trunk/arch/x86/kernel/crash_dump_64.c
index e90a60ef10c2..15e6c6bc4a46 100644
--- a/trunk/arch/x86/kernel/crash_dump_64.c
+++ b/trunk/arch/x86/kernel/crash_dump_64.c
@@ -7,8 +7,9 @@
#include
#include
-#include
-#include
+
+#include
+#include
/**
* copy_oldmem_page - copy one page from "oldmem"
@@ -24,7 +25,7 @@
* in the current kernel. We stitch up a pte, similar to kmap_atomic.
*/
ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
- size_t csize, unsigned long offset, int userbuf)
+ size_t csize, unsigned long offset, int userbuf)
{
void *vaddr;
@@ -32,16 +33,14 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
return 0;
vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
- if (!vaddr)
- return -ENOMEM;
if (userbuf) {
- if (copy_to_user(buf, vaddr + offset, csize)) {
+ if (copy_to_user(buf, (vaddr + offset), csize)) {
iounmap(vaddr);
return -EFAULT;
}
} else
- memcpy(buf, vaddr + offset, csize);
+ memcpy(buf, (vaddr + offset), csize);
iounmap(vaddr);
return csize;
diff --git a/trunk/arch/x86/kernel/ds.c b/trunk/arch/x86/kernel/ds.c
index 2b69994fd3a8..11c11b8ec48d 100644
--- a/trunk/arch/x86/kernel/ds.c
+++ b/trunk/arch/x86/kernel/ds.c
@@ -2,49 +2,26 @@
* Debug Store support
*
* This provides a low-level interface to the hardware's Debug Store
- * feature that is used for branch trace store (BTS) and
+ * feature that is used for last branch recording (LBR) and
* precise-event based sampling (PEBS).
*
- * It manages:
- * - per-thread and per-cpu allocation of BTS and PEBS
- * - buffer memory allocation (optional)
- * - buffer overflow handling
- * - buffer access
+ * Different architectures use a different DS layout/pointer size.
+ * The below functions therefore work on a void*.
*
- * It assumes:
- * - get_task_struct on all parameter tasks
- * - current is allowed to trace parameter tasks
*
+ * Since there is no user for PEBS, yet, only LBR (or branch
+ * trace store, BTS) is supported.
*
- * Copyright (C) 2007-2008 Intel Corporation.
- * Markus Metzger , 2007-2008
+ *
+ * Copyright (C) 2007 Intel Corporation.
+ * Markus Metzger , Dec 2007
*/
-
-#ifdef CONFIG_X86_DS
-
#include
#include
#include
#include
-#include
-#include
-
-
-/*
- * The configuration for a particular DS hardware implementation.
- */
-struct ds_configuration {
- /* the size of the DS structure in bytes */
- unsigned char sizeof_ds;
- /* the size of one pointer-typed field in the DS structure in bytes;
- this covers the first 8 fields related to buffer management. */
- unsigned char sizeof_field;
- /* the size of a BTS/PEBS record in bytes */
- unsigned char sizeof_rec[2];
-};
-static struct ds_configuration ds_cfg;
/*
@@ -67,747 +44,378 @@ static struct ds_configuration ds_cfg;
* (interrupt occurs when write pointer passes interrupt pointer)
* - value to which counter is reset following counter overflow
*
- * Later architectures use 64bit pointers throughout, whereas earlier
- * architectures use 32bit pointers in 32bit mode.
+ * On later architectures, the last branch recording hardware uses
+ * 64bit pointers even in 32bit mode.
*
*
- * We compute the base address for the first 8 fields based on:
- * - the field size stored in the DS configuration
- * - the relative field position
- * - an offset giving the start of the respective region
+ * Branch Trace Store (BTS) records store information about control
+ * flow changes. They at least provide the following information:
+ * - source linear address
+ * - destination linear address
*
- * This offset is further used to index various arrays holding
- * information for BTS and PEBS at the respective index.
+ * Netburst supported a predicated bit that had been dropped in later
+ * architectures. We do not suppor it.
*
- * On later 32bit processors, we only access the lower 32bit of the
- * 64bit pointer fields. The upper halves will be zeroed out.
- */
-
-enum ds_field {
- ds_buffer_base = 0,
- ds_index,
- ds_absolute_maximum,
- ds_interrupt_threshold,
-};
-
-enum ds_qualifier {
- ds_bts = 0,
- ds_pebs
-};
-
-static inline unsigned long ds_get(const unsigned char *base,
- enum ds_qualifier qual, enum ds_field field)
-{
- base += (ds_cfg.sizeof_field * (field + (4 * qual)));
- return *(unsigned long *)base;
-}
-
-static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
- enum ds_field field, unsigned long value)
-{
- base += (ds_cfg.sizeof_field * (field + (4 * qual)));
- (*(unsigned long *)base) = value;
-}
-
-
-/*
- * Locking is done only for allocating BTS or PEBS resources and for
- * guarding context and buffer memory allocation.
*
- * Most functions require the current task to own the ds context part
- * they are going to access. All the locking is done when validating
- * access to the context.
+ * In order to abstract from the actual DS and BTS layout, we describe
+ * the access to the relevant fields.
+ * Thanks to Andi Kleen for proposing this design.
+ *
+ * The implementation, however, is not as general as it might seem. In
+ * order to stay somewhat simple and efficient, we assume an
+ * underlying unsigned type (mostly a pointer type) and we expect the
+ * field to be at least as big as that type.
*/
-static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
/*
- * Validate that the current task is allowed to access the BTS/PEBS
- * buffer of the parameter task.
- *
- * Returns 0, if access is granted; -Eerrno, otherwise.
+ * A special from_ip address to indicate that the BTS record is an
+ * info record that needs to be interpreted or skipped.
*/
-static inline int ds_validate_access(struct ds_context *context,
- enum ds_qualifier qual)
-{
- if (!context)
- return -EPERM;
-
- if (context->owner[qual] == current)
- return 0;
-
- return -EPERM;
-}
-
+#define BTS_ESCAPE_ADDRESS (-1)
/*
- * We either support (system-wide) per-cpu or per-thread allocation.
- * We distinguish the two based on the task_struct pointer, where a
- * NULL pointer indicates per-cpu allocation for the current cpu.
- *
- * Allocations are use-counted. As soon as resources are allocated,
- * further allocations must be of the same type (per-cpu or
- * per-thread). We model this by counting allocations (i.e. the number
- * of tracers of a certain type) for one type negatively:
- * =0 no tracers
- * >0 number of per-thread tracers
- * <0 number of per-cpu tracers
- *
- * The below functions to get and put tracers and to check the
- * allocation type require the ds_lock to be held by the caller.
- *
- * Tracers essentially gives the number of ds contexts for a certain
- * type of allocation.
+ * A field access descriptor
*/
-static long tracers;
-
-static inline void get_tracer(struct task_struct *task)
-{
- tracers += (task ? 1 : -1);
-}
-
-static inline void put_tracer(struct task_struct *task)
-{
- tracers -= (task ? 1 : -1);
-}
-
-static inline int check_tracer(struct task_struct *task)
-{
- return (task ? (tracers >= 0) : (tracers <= 0));
-}
-
+struct access_desc {
+ unsigned char offset;
+ unsigned char size;
+};
/*
- * The DS context is either attached to a thread or to a cpu:
- * - in the former case, the thread_struct contains a pointer to the
- * attached context.
- * - in the latter case, we use a static array of per-cpu context
- * pointers.
- *
- * Contexts are use-counted. They are allocated on first access and
- * deallocated when the last user puts the context.
- *
- * We distinguish between an allocating and a non-allocating get of a
- * context:
- * - the allocating get is used for requesting BTS/PEBS resources. It
- * requires the caller to hold the global ds_lock.
- * - the non-allocating get is used for all other cases. A
- * non-existing context indicates an error. It acquires and releases
- * the ds_lock itself for obtaining the context.
- *
- * A context and its DS configuration are allocated and deallocated
- * together. A context always has a DS configuration of the
- * appropriate size.
+ * The configuration for a particular DS/BTS hardware implementation.
*/
-static DEFINE_PER_CPU(struct ds_context *, system_context);
-
-#define this_system_context per_cpu(system_context, smp_processor_id())
+struct ds_configuration {
+ /* the DS configuration */
+ unsigned char sizeof_ds;
+ struct access_desc bts_buffer_base;
+ struct access_desc bts_index;
+ struct access_desc bts_absolute_maximum;
+ struct access_desc bts_interrupt_threshold;
+ /* the BTS configuration */
+ unsigned char sizeof_bts;
+ struct access_desc from_ip;
+ struct access_desc to_ip;
+ /* BTS variants used to store additional information like
+ timestamps */
+ struct access_desc info_type;
+ struct access_desc info_data;
+ unsigned long debugctl_mask;
+};
/*
- * Returns the pointer to the parameter task's context or to the
- * system-wide context, if task is NULL.
- *
- * Increases the use count of the returned context, if not NULL.
+ * The global configuration used by the below accessor functions
*/
-static inline struct ds_context *ds_get_context(struct task_struct *task)
-{
- struct ds_context *context;
-
- spin_lock(&ds_lock);
-
- context = (task ? task->thread.ds_ctx : this_system_context);
- if (context)
- context->count++;
-
- spin_unlock(&ds_lock);
-
- return context;
-}
+static struct ds_configuration ds_cfg;
/*
- * Same as ds_get_context, but allocates the context and it's DS
- * structure, if necessary; returns NULL; if out of memory.
- *
- * pre: requires ds_lock to be held
+ * Accessor functions for some DS and BTS fields using the above
+ * global ptrace_bts_cfg.
*/
-static inline struct ds_context *ds_alloc_context(struct task_struct *task)
+static inline unsigned long get_bts_buffer_base(char *base)
{
- struct ds_context **p_context =
- (task ? &task->thread.ds_ctx : &this_system_context);
- struct ds_context *context = *p_context;
-
- if (!context) {
- context = kzalloc(sizeof(*context), GFP_KERNEL);
-
- if (!context)
- return NULL;
-
- context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
- if (!context->ds) {
- kfree(context);
- return NULL;
- }
-
- *p_context = context;
-
- context->this = p_context;
- context->task = task;
-
- if (task)
- set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
-
- if (!task || (task == current))
- wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
-
- get_tracer(task);
- }
-
- context->count++;
-
- return context;
+ return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset);
}
-
-/*
- * Decreases the use count of the parameter context, if not NULL.
- * Deallocates the context, if the use count reaches zero.
- */
-static inline void ds_put_context(struct ds_context *context)
+static inline void set_bts_buffer_base(char *base, unsigned long value)
{
- if (!context)
- return;
-
- spin_lock(&ds_lock);
-
- if (--context->count)
- goto out;
-
- *(context->this) = NULL;
-
- if (context->task)
- clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
-
- if (!context->task || (context->task == current))
- wrmsrl(MSR_IA32_DS_AREA, 0);
-
- put_tracer(context->task);
-
- /* free any leftover buffers from tracers that did not
- * deallocate them properly. */
- kfree(context->buffer[ds_bts]);
- kfree(context->buffer[ds_pebs]);
- kfree(context->ds);
- kfree(context);
- out:
- spin_unlock(&ds_lock);
+ (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value;
}
-
-
-/*
- * Handle a buffer overflow
- *
- * task: the task whose buffers are overflowing;
- * NULL for a buffer overflow on the current cpu
- * context: the ds context
- * qual: the buffer type
- */
-static void ds_overflow(struct task_struct *task, struct ds_context *context,
- enum ds_qualifier qual)
+static inline unsigned long get_bts_index(char *base)
{
- if (!context)
- return;
-
- if (context->callback[qual])
- (*context->callback[qual])(task);
-
- /* todo: do some more overflow handling */
+ return *(unsigned long *)(base + ds_cfg.bts_index.offset);
}
-
-
-/*
- * Allocate a non-pageable buffer of the parameter size.
- * Checks the memory and the locked memory rlimit.
- *
- * Returns the buffer, if successful;
- * NULL, if out of memory or rlimit exceeded.
- *
- * size: the requested buffer size in bytes
- * pages (out): if not NULL, contains the number of pages reserved
- */
-static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
+static inline void set_bts_index(char *base, unsigned long value)
{
- unsigned long rlim, vm, pgsz;
- void *buffer;
-
- pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
- rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
- vm = current->mm->total_vm + pgsz;
- if (rlim < vm)
- return NULL;
-
- rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
- vm = current->mm->locked_vm + pgsz;
- if (rlim < vm)
- return NULL;
-
- buffer = kzalloc(size, GFP_KERNEL);
- if (!buffer)
- return NULL;
-
- current->mm->total_vm += pgsz;
- current->mm->locked_vm += pgsz;
-
- if (pages)
- *pages = pgsz;
-
- return buffer;
+ (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value;
}
-
-static int ds_request(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
+static inline unsigned long get_bts_absolute_maximum(char *base)
{
- struct ds_context *context;
- unsigned long buffer, adj;
- const unsigned long alignment = (1 << 3);
- int error = 0;
-
- if (!ds_cfg.sizeof_ds)
- return -EOPNOTSUPP;
-
- /* we require some space to do alignment adjustments below */
- if (size < (alignment + ds_cfg.sizeof_rec[qual]))
- return -EINVAL;
-
- /* buffer overflow notification is not yet implemented */
- if (ovfl)
- return -EOPNOTSUPP;
-
-
- spin_lock(&ds_lock);
-
- if (!check_tracer(task))
- return -EPERM;
-
- error = -ENOMEM;
- context = ds_alloc_context(task);
- if (!context)
- goto out_unlock;
-
- error = -EALREADY;
- if (context->owner[qual] == current)
- goto out_unlock;
- error = -EPERM;
- if (context->owner[qual] != NULL)
- goto out_unlock;
- context->owner[qual] = current;
-
- spin_unlock(&ds_lock);
-
-
- error = -ENOMEM;
- if (!base) {
- base = ds_allocate_buffer(size, &context->pages[qual]);
- if (!base)
- goto out_release;
-
- context->buffer[qual] = base;
- }
- error = 0;
-
- context->callback[qual] = ovfl;
-
- /* adjust the buffer address and size to meet alignment
- * constraints:
- * - buffer is double-word aligned
- * - size is multiple of record size
- *
- * We checked the size at the very beginning; we have enough
- * space to do the adjustment.
- */
- buffer = (unsigned long)base;
-
- adj = ALIGN(buffer, alignment) - buffer;
- buffer += adj;
- size -= adj;
-
- size /= ds_cfg.sizeof_rec[qual];
- size *= ds_cfg.sizeof_rec[qual];
-
- ds_set(context->ds, qual, ds_buffer_base, buffer);
- ds_set(context->ds, qual, ds_index, buffer);
- ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
-
- if (ovfl) {
- /* todo: select a suitable interrupt threshold */
- } else
- ds_set(context->ds, qual,
- ds_interrupt_threshold, buffer + size + 1);
-
- /* we keep the context until ds_release */
- return error;
-
- out_release:
- context->owner[qual] = NULL;
- ds_put_context(context);
- return error;
-
- out_unlock:
- spin_unlock(&ds_lock);
- ds_put_context(context);
- return error;
+ return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset);
}
-
-int ds_request_bts(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl)
+static inline void set_bts_absolute_maximum(char *base, unsigned long value)
{
- return ds_request(task, base, size, ovfl, ds_bts);
+ (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value;
}
-
-int ds_request_pebs(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl)
+static inline unsigned long get_bts_interrupt_threshold(char *base)
{
- return ds_request(task, base, size, ovfl, ds_pebs);
+ return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset);
}
-
-static int ds_release(struct task_struct *task, enum ds_qualifier qual)
+static inline void set_bts_interrupt_threshold(char *base, unsigned long value)
{
- struct ds_context *context;
- int error;
-
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
-
- kfree(context->buffer[qual]);
- context->buffer[qual] = NULL;
-
- current->mm->total_vm -= context->pages[qual];
- current->mm->locked_vm -= context->pages[qual];
- context->pages[qual] = 0;
- context->owner[qual] = NULL;
-
- /*
- * we put the context twice:
- * once for the ds_get_context
- * once for the corresponding ds_request
- */
- ds_put_context(context);
- out:
- ds_put_context(context);
- return error;
+ (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value;
}
-
-int ds_release_bts(struct task_struct *task)
+static inline unsigned long get_from_ip(char *base)
{
- return ds_release(task, ds_bts);
+ return *(unsigned long *)(base + ds_cfg.from_ip.offset);
}
-
-int ds_release_pebs(struct task_struct *task)
+static inline void set_from_ip(char *base, unsigned long value)
{
- return ds_release(task, ds_pebs);
+ (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value;
}
-
-static int ds_get_index(struct task_struct *task, size_t *pos,
- enum ds_qualifier qual)
+static inline unsigned long get_to_ip(char *base)
{
- struct ds_context *context;
- unsigned long base, index;
- int error;
-
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
-
- base = ds_get(context->ds, qual, ds_buffer_base);
- index = ds_get(context->ds, qual, ds_index);
-
- error = ((index - base) / ds_cfg.sizeof_rec[qual]);
- if (pos)
- *pos = error;
- out:
- ds_put_context(context);
- return error;
+ return *(unsigned long *)(base + ds_cfg.to_ip.offset);
}
-
-int ds_get_bts_index(struct task_struct *task, size_t *pos)
+static inline void set_to_ip(char *base, unsigned long value)
{
- return ds_get_index(task, pos, ds_bts);
+ (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value;
}
-
-int ds_get_pebs_index(struct task_struct *task, size_t *pos)
+static inline unsigned char get_info_type(char *base)
{
- return ds_get_index(task, pos, ds_pebs);
+ return *(unsigned char *)(base + ds_cfg.info_type.offset);
}
-
-static int ds_get_end(struct task_struct *task, size_t *pos,
- enum ds_qualifier qual)
+static inline void set_info_type(char *base, unsigned char value)
{
- struct ds_context *context;
- unsigned long base, end;
- int error;
-
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
-
- base = ds_get(context->ds, qual, ds_buffer_base);
- end = ds_get(context->ds, qual, ds_absolute_maximum);
-
- error = ((end - base) / ds_cfg.sizeof_rec[qual]);
- if (pos)
- *pos = error;
- out:
- ds_put_context(context);
- return error;
+ (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value;
}
-
-int ds_get_bts_end(struct task_struct *task, size_t *pos)
+static inline unsigned long get_info_data(char *base)
{
- return ds_get_end(task, pos, ds_bts);
+ return *(unsigned long *)(base + ds_cfg.info_data.offset);
}
-
-int ds_get_pebs_end(struct task_struct *task, size_t *pos)
+static inline void set_info_data(char *base, unsigned long value)
{
- return ds_get_end(task, pos, ds_pebs);
+ (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value;
}
-static int ds_access(struct task_struct *task, size_t index,
- const void **record, enum ds_qualifier qual)
+
+int ds_allocate(void **dsp, size_t bts_size_in_bytes)
{
- struct ds_context *context;
- unsigned long base, idx;
- int error;
+ size_t bts_size_in_records;
+ unsigned long bts;
+ void *ds;
- if (!record)
+ if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+ return -EOPNOTSUPP;
+
+ if (bts_size_in_bytes < 0)
return -EINVAL;
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
+ bts_size_in_records =
+ bts_size_in_bytes / ds_cfg.sizeof_bts;
+ bts_size_in_bytes =
+ bts_size_in_records * ds_cfg.sizeof_bts;
+
+ if (bts_size_in_bytes <= 0)
+ return -EINVAL;
- base = ds_get(context->ds, qual, ds_buffer_base);
- idx = base + (index * ds_cfg.sizeof_rec[qual]);
+ bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL);
- error = -EINVAL;
- if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
- goto out;
+ if (!bts)
+ return -ENOMEM;
- *record = (const void *)idx;
- error = ds_cfg.sizeof_rec[qual];
- out:
- ds_put_context(context);
- return error;
-}
+ ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
-int ds_access_bts(struct task_struct *task, size_t index, const void **record)
-{
- return ds_access(task, index, record, ds_bts);
+ if (!ds) {
+ kfree((void *)bts);
+ return -ENOMEM;
+ }
+
+ set_bts_buffer_base(ds, bts);
+ set_bts_index(ds, bts);
+ set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
+ set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
+
+ *dsp = ds;
+ return 0;
}
-int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
+int ds_free(void **dsp)
{
- return ds_access(task, index, record, ds_pebs);
+ if (*dsp) {
+ kfree((void *)get_bts_buffer_base(*dsp));
+ kfree(*dsp);
+ *dsp = NULL;
+ }
+ return 0;
}
-static int ds_write(struct task_struct *task, const void *record, size_t size,
- enum ds_qualifier qual, int force)
+int ds_get_bts_size(void *ds)
{
- struct ds_context *context;
- int error;
+ int size_in_bytes;
- if (!record)
- return -EINVAL;
+ if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+ return -EOPNOTSUPP;
- error = -EPERM;
- context = ds_get_context(task);
- if (!context)
- goto out;
+ if (!ds)
+ return 0;
- if (!force) {
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
- }
+ size_in_bytes =
+ get_bts_absolute_maximum(ds) -
+ get_bts_buffer_base(ds);
+ return size_in_bytes;
+}
- error = 0;
- while (size) {
- unsigned long base, index, end, write_end, int_th;
- unsigned long write_size, adj_write_size;
-
- /*
- * write as much as possible without producing an
- * overflow interrupt.
- *
- * interrupt_threshold must either be
- * - bigger than absolute_maximum or
- * - point to a record between buffer_base and absolute_maximum
- *
- * index points to a valid record.
- */
- base = ds_get(context->ds, qual, ds_buffer_base);
- index = ds_get(context->ds, qual, ds_index);
- end = ds_get(context->ds, qual, ds_absolute_maximum);
- int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
-
- write_end = min(end, int_th);
-
- /* if we are already beyond the interrupt threshold,
- * we fill the entire buffer */
- if (write_end <= index)
- write_end = end;
-
- if (write_end <= index)
- goto out;
-
- write_size = min((unsigned long) size, write_end - index);
- memcpy((void *)index, record, write_size);
-
- record = (const char *)record + write_size;
- size -= write_size;
- error += write_size;
-
- adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
- adj_write_size *= ds_cfg.sizeof_rec[qual];
-
- /* zero out trailing bytes */
- memset((char *)index + write_size, 0,
- adj_write_size - write_size);
- index += adj_write_size;
-
- if (index >= end)
- index = base;
- ds_set(context->ds, qual, ds_index, index);
-
- if (index >= int_th)
- ds_overflow(task, context, qual);
- }
+int ds_get_bts_end(void *ds)
+{
+ int size_in_bytes = ds_get_bts_size(ds);
+
+ if (size_in_bytes <= 0)
+ return size_in_bytes;
- out:
- ds_put_context(context);
- return error;
+ return size_in_bytes / ds_cfg.sizeof_bts;
}
-int ds_write_bts(struct task_struct *task, const void *record, size_t size)
+int ds_get_bts_index(void *ds)
{
- return ds_write(task, record, size, ds_bts, /* force = */ 0);
+ int index_offset_in_bytes;
+
+ if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+ return -EOPNOTSUPP;
+
+ index_offset_in_bytes =
+ get_bts_index(ds) -
+ get_bts_buffer_base(ds);
+
+ return index_offset_in_bytes / ds_cfg.sizeof_bts;
}
-int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
+int ds_set_overflow(void *ds, int method)
{
- return ds_write(task, record, size, ds_pebs, /* force = */ 0);
+ switch (method) {
+ case DS_O_SIGNAL:
+ return -EOPNOTSUPP;
+ case DS_O_WRAP:
+ return 0;
+ default:
+ return -EINVAL;
+ }
}
-int ds_unchecked_write_bts(struct task_struct *task,
- const void *record, size_t size)
+int ds_get_overflow(void *ds)
{
- return ds_write(task, record, size, ds_bts, /* force = */ 1);
+ return DS_O_WRAP;
}
-int ds_unchecked_write_pebs(struct task_struct *task,
- const void *record, size_t size)
+int ds_clear(void *ds)
{
- return ds_write(task, record, size, ds_pebs, /* force = */ 1);
+ int bts_size = ds_get_bts_size(ds);
+ unsigned long bts_base;
+
+ if (bts_size <= 0)
+ return bts_size;
+
+ bts_base = get_bts_buffer_base(ds);
+ memset((void *)bts_base, 0, bts_size);
+
+ set_bts_index(ds, bts_base);
+ return 0;
}
-static int ds_reset_or_clear(struct task_struct *task,
- enum ds_qualifier qual, int clear)
+int ds_read_bts(void *ds, int index, struct bts_struct *out)
{
- struct ds_context *context;
- unsigned long base, end;
- int error;
+ void *bts;
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
-
- base = ds_get(context->ds, qual, ds_buffer_base);
- end = ds_get(context->ds, qual, ds_absolute_maximum);
+ if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+ return -EOPNOTSUPP;
- if (clear)
- memset((void *)base, 0, end - base);
+ if (index < 0)
+ return -EINVAL;
- ds_set(context->ds, qual, ds_index, base);
+ if (index >= ds_get_bts_size(ds))
+ return -EINVAL;
- error = 0;
- out:
- ds_put_context(context);
- return error;
-}
+ bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts));
-int ds_reset_bts(struct task_struct *task)
-{
- return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
-}
+ memset(out, 0, sizeof(*out));
+ if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
+ out->qualifier = get_info_type(bts);
+ out->variant.jiffies = get_info_data(bts);
+ } else {
+ out->qualifier = BTS_BRANCH;
+ out->variant.lbr.from_ip = get_from_ip(bts);
+ out->variant.lbr.to_ip = get_to_ip(bts);
+ }
-int ds_reset_pebs(struct task_struct *task)
-{
- return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
+ return sizeof(*out);;
}
-int ds_clear_bts(struct task_struct *task)
+int ds_write_bts(void *ds, const struct bts_struct *in)
{
- return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
-}
+ unsigned long bts;
-int ds_clear_pebs(struct task_struct *task)
-{
- return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
-}
+ if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+ return -EOPNOTSUPP;
-int ds_get_pebs_reset(struct task_struct *task, u64 *value)
-{
- struct ds_context *context;
- int error;
+ if (ds_get_bts_size(ds) <= 0)
+ return -ENXIO;
- if (!value)
- return -EINVAL;
+ bts = get_bts_index(ds);
- context = ds_get_context(task);
- error = ds_validate_access(context, ds_pebs);
- if (error < 0)
- goto out;
+ memset((void *)bts, 0, ds_cfg.sizeof_bts);
+ switch (in->qualifier) {
+ case BTS_INVALID:
+ break;
- *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
+ case BTS_BRANCH:
+ set_from_ip((void *)bts, in->variant.lbr.from_ip);
+ set_to_ip((void *)bts, in->variant.lbr.to_ip);
+ break;
- error = 0;
- out:
- ds_put_context(context);
- return error;
-}
+ case BTS_TASK_ARRIVES:
+ case BTS_TASK_DEPARTS:
+ set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS);
+ set_info_type((void *)bts, in->qualifier);
+ set_info_data((void *)bts, in->variant.jiffies);
+ break;
-int ds_set_pebs_reset(struct task_struct *task, u64 value)
-{
- struct ds_context *context;
- int error;
+ default:
+ return -EINVAL;
+ }
- context = ds_get_context(task);
- error = ds_validate_access(context, ds_pebs);
- if (error < 0)
- goto out;
+ bts = bts + ds_cfg.sizeof_bts;
+ if (bts >= get_bts_absolute_maximum(ds))
+ bts = get_bts_buffer_base(ds);
+ set_bts_index(ds, bts);
- *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
+ return ds_cfg.sizeof_bts;
+}
- error = 0;
- out:
- ds_put_context(context);
- return error;
+unsigned long ds_debugctl_mask(void)
+{
+ return ds_cfg.debugctl_mask;
}
-static const struct ds_configuration ds_cfg_var = {
- .sizeof_ds = sizeof(long) * 12,
- .sizeof_field = sizeof(long),
- .sizeof_rec[ds_bts] = sizeof(long) * 3,
- .sizeof_rec[ds_pebs] = sizeof(long) * 10
+#ifdef __i386__
+static const struct ds_configuration ds_cfg_netburst = {
+ .sizeof_ds = 9 * 4,
+ .bts_buffer_base = { 0, 4 },
+ .bts_index = { 4, 4 },
+ .bts_absolute_maximum = { 8, 4 },
+ .bts_interrupt_threshold = { 12, 4 },
+ .sizeof_bts = 3 * 4,
+ .from_ip = { 0, 4 },
+ .to_ip = { 4, 4 },
+ .info_type = { 4, 1 },
+ .info_data = { 8, 4 },
+ .debugctl_mask = (1<<2)|(1<<3)
+};
+
+static const struct ds_configuration ds_cfg_pentium_m = {
+ .sizeof_ds = 9 * 4,
+ .bts_buffer_base = { 0, 4 },
+ .bts_index = { 4, 4 },
+ .bts_absolute_maximum = { 8, 4 },
+ .bts_interrupt_threshold = { 12, 4 },
+ .sizeof_bts = 3 * 4,
+ .from_ip = { 0, 4 },
+ .to_ip = { 4, 4 },
+ .info_type = { 4, 1 },
+ .info_data = { 8, 4 },
+ .debugctl_mask = (1<<6)|(1<<7)
};
-static const struct ds_configuration ds_cfg_64 = {
- .sizeof_ds = 8 * 12,
- .sizeof_field = 8,
- .sizeof_rec[ds_bts] = 8 * 3,
- .sizeof_rec[ds_pebs] = 8 * 10
+#endif /* _i386_ */
+
+static const struct ds_configuration ds_cfg_core2 = {
+ .sizeof_ds = 9 * 8,
+ .bts_buffer_base = { 0, 8 },
+ .bts_index = { 8, 8 },
+ .bts_absolute_maximum = { 16, 8 },
+ .bts_interrupt_threshold = { 24, 8 },
+ .sizeof_bts = 3 * 8,
+ .from_ip = { 0, 8 },
+ .to_ip = { 8, 8 },
+ .info_type = { 8, 1 },
+ .info_data = { 16, 8 },
+ .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
};
static inline void
@@ -821,13 +429,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
switch (c->x86) {
case 0x6:
switch (c->x86_model) {
+#ifdef __i386__
case 0xD:
case 0xE: /* Pentium M */
- ds_configure(&ds_cfg_var);
+ ds_configure(&ds_cfg_pentium_m);
break;
+#endif /* _i386_ */
case 0xF: /* Core2 */
- case 0x1C: /* Atom */
- ds_configure(&ds_cfg_64);
+ ds_configure(&ds_cfg_core2);
break;
default:
/* sorry, don't know about them */
@@ -836,11 +445,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
break;
case 0xF:
switch (c->x86_model) {
+#ifdef __i386__
case 0x0:
case 0x1:
case 0x2: /* Netburst */
- ds_configure(&ds_cfg_var);
+ ds_configure(&ds_cfg_netburst);
break;
+#endif /* _i386_ */
default:
/* sorry, don't know about them */
break;
@@ -851,14 +462,3 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
break;
}
}
-
-void ds_free(struct ds_context *context)
-{
- /* This is called when the task owning the parameter context
- * is dying. There should not be any user of that context left
- * to disturb us, anymore. */
- unsigned long leftovers = context->count;
- while (leftovers--)
- ds_put_context(context);
-}
-#endif /* CONFIG_X86_DS */
diff --git a/trunk/arch/x86/kernel/efi.c b/trunk/arch/x86/kernel/efi.c
index 945a31cdd81f..06cc8d4254b1 100644
--- a/trunk/arch/x86/kernel/efi.c
+++ b/trunk/arch/x86/kernel/efi.c
@@ -414,11 +414,9 @@ void __init efi_init(void)
if (memmap.map == NULL)
printk(KERN_ERR "Could not map the EFI memory map!\n");
memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
-
if (memmap.desc_size != sizeof(efi_memory_desc_t))
- printk(KERN_WARNING
- "Kernel-defined memdesc doesn't match the one from EFI!\n");
-
+ printk(KERN_WARNING "Kernel-defined memdesc"
+ "doesn't match the one from EFI!\n");
if (add_efi_memmap)
do_add_efi_memmap();
diff --git a/trunk/arch/x86/kernel/entry_64.S b/trunk/arch/x86/kernel/entry_64.S
index cf3a0b2d0059..89434d439605 100644
--- a/trunk/arch/x86/kernel/entry_64.S
+++ b/trunk/arch/x86/kernel/entry_64.S
@@ -275,9 +275,9 @@ ENTRY(native_usergs_sysret64)
ENTRY(ret_from_fork)
CFI_DEFAULT_STACK
push kernel_eflags(%rip)
- CFI_ADJUST_CFA_OFFSET 8
+ CFI_ADJUST_CFA_OFFSET 4
popf # reset kernel eflags
- CFI_ADJUST_CFA_OFFSET -8
+ CFI_ADJUST_CFA_OFFSET -4
call schedule_tail
GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
diff --git a/trunk/arch/x86/kernel/head64.c b/trunk/arch/x86/kernel/head64.c
index d16084f90649..9bfc4d72fb2e 100644
--- a/trunk/arch/x86/kernel/head64.c
+++ b/trunk/arch/x86/kernel/head64.c
@@ -108,11 +108,12 @@ void __init x86_64_start_kernel(char * real_mode_data)
}
load_idt((const struct desc_ptr *)&idt_descr);
- if (console_loglevel == 10)
- early_printk("Kernel alive\n");
+ early_printk("Kernel alive\n");
x86_64_init_pda();
+ early_printk("Kernel really alive\n");
+
x86_64_start_reservations(real_mode_data);
}
diff --git a/trunk/arch/x86/kernel/ioport.c b/trunk/arch/x86/kernel/ioport.c
index 191914302744..50e5e4a31c85 100644
--- a/trunk/arch/x86/kernel/ioport.c
+++ b/trunk/arch/x86/kernel/ioport.c
@@ -14,7 +14,6 @@
#include
#include
#include
-#include
/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
static void set_bitmap(unsigned long *bitmap, unsigned int base,
diff --git a/trunk/arch/x86/kernel/ipi.c b/trunk/arch/x86/kernel/ipi.c
index f1c688e46f35..3f7537b669d3 100644
--- a/trunk/arch/x86/kernel/ipi.c
+++ b/trunk/arch/x86/kernel/ipi.c
@@ -20,8 +20,6 @@
#ifdef CONFIG_X86_32
#include
-#include
-
/*
* the following functions deal with sending IPIs between CPUs.
*
@@ -149,6 +147,7 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
}
/* must come after the send_IPI functions above for inlining */
+#include
static int convert_apicid_to_cpu(int apic_id)
{
int i;
diff --git a/trunk/arch/x86/kernel/irq_32.c b/trunk/arch/x86/kernel/irq_32.c
index b71e02d42f4f..1cf8c1fcc088 100644
--- a/trunk/arch/x86/kernel/irq_32.c
+++ b/trunk/arch/x86/kernel/irq_32.c
@@ -325,7 +325,7 @@ int show_interrupts(struct seq_file *p, void *v)
for_each_online_cpu(j)
seq_printf(p, "%10u ",
per_cpu(irq_stat,j).irq_call_count);
- seq_printf(p, " Function call interrupts\n");
+ seq_printf(p, " function call interrupts\n");
seq_printf(p, "TLB: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ",
diff --git a/trunk/arch/x86/kernel/irq_64.c b/trunk/arch/x86/kernel/irq_64.c
index f065fe9071b9..1f78b238d8d2 100644
--- a/trunk/arch/x86/kernel/irq_64.c
+++ b/trunk/arch/x86/kernel/irq_64.c
@@ -129,7 +129,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_printf(p, "CAL: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
- seq_printf(p, " Function call interrupts\n");
+ seq_printf(p, " function call interrupts\n");
seq_printf(p, "TLB: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
diff --git a/trunk/arch/x86/kernel/kvm.c b/trunk/arch/x86/kernel/kvm.c
index 478bca986eca..8b7a3cf37d2b 100644
--- a/trunk/arch/x86/kernel/kvm.c
+++ b/trunk/arch/x86/kernel/kvm.c
@@ -178,7 +178,7 @@ static void kvm_flush_tlb(void)
kvm_deferred_mmu_op(&ftlb, sizeof ftlb);
}
-static void kvm_release_pt(unsigned long pfn)
+static void kvm_release_pt(u32 pfn)
{
struct kvm_mmu_op_release_pt rpt = {
.header.op = KVM_MMU_OP_RELEASE_PT,
diff --git a/trunk/arch/x86/kernel/ldt.c b/trunk/arch/x86/kernel/ldt.c
index 0ed5f939b905..b68e21f06f4f 100644
--- a/trunk/arch/x86/kernel/ldt.c
+++ b/trunk/arch/x86/kernel/ldt.c
@@ -18,7 +18,6 @@
#include
#include
#include
-#include
#ifdef CONFIG_SMP
static void flush_ldt(void *current_mm)
diff --git a/trunk/arch/x86/kernel/nmi.c b/trunk/arch/x86/kernel/nmi.c
index 2c97f07f1c2c..abb78a2cc4ad 100644
--- a/trunk/arch/x86/kernel/nmi.c
+++ b/trunk/arch/x86/kernel/nmi.c
@@ -299,15 +299,6 @@ void acpi_nmi_disable(void)
on_each_cpu(__acpi_nmi_disable, NULL, 1);
}
-/*
- * This function is called as soon the LAPIC NMI watchdog driver has everything
- * in place and it's ready to check if the NMIs belong to the NMI watchdog
- */
-void cpu_nmi_set_wd_enabled(void)
-{
- __get_cpu_var(wd_enabled) = 1;
-}
-
void setup_apic_nmi_watchdog(void *unused)
{
if (__get_cpu_var(wd_enabled))
@@ -320,6 +311,8 @@ void setup_apic_nmi_watchdog(void *unused)
switch (nmi_watchdog) {
case NMI_LOCAL_APIC:
+ /* enable it before to avoid race with handler */
+ __get_cpu_var(wd_enabled) = 1;
if (lapic_watchdog_init(nmi_hz) < 0) {
__get_cpu_var(wd_enabled) = 0;
return;
diff --git a/trunk/arch/x86/kernel/olpc.c b/trunk/arch/x86/kernel/olpc.c
index 7a13fac63a1f..3e6672274807 100644
--- a/trunk/arch/x86/kernel/olpc.c
+++ b/trunk/arch/x86/kernel/olpc.c
@@ -190,12 +190,12 @@ EXPORT_SYMBOL_GPL(olpc_ec_cmd);
static void __init platform_detect(void)
{
size_t propsize;
- __be32 rev;
+ u32 rev;
if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4,
&propsize) || propsize != 4) {
printk(KERN_ERR "ofw: getprop call failed!\n");
- rev = cpu_to_be32(0);
+ rev = 0;
}
olpc_platform_info.boardrev = be32_to_cpu(rev);
}
@@ -203,7 +203,7 @@ static void __init platform_detect(void)
static void __init platform_detect(void)
{
/* stopgap until OFW support is added to the kernel */
- olpc_platform_info.boardrev = 0xc2;
+ olpc_platform_info.boardrev = be32_to_cpu(0xc2);
}
#endif
diff --git a/trunk/arch/x86/kernel/paravirt.c b/trunk/arch/x86/kernel/paravirt.c
index e2f43768723a..300da17e61cb 100644
--- a/trunk/arch/x86/kernel/paravirt.c
+++ b/trunk/arch/x86/kernel/paravirt.c
@@ -330,7 +330,6 @@ struct pv_cpu_ops pv_cpu_ops = {
#endif
.wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe,
- .read_msr_amd = native_read_msr_amd_safe,
.write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
diff --git a/trunk/arch/x86/kernel/paravirt_patch_32.c b/trunk/arch/x86/kernel/paravirt_patch_32.c
index 9fe644f4861d..58262218781b 100644
--- a/trunk/arch/x86/kernel/paravirt_patch_32.c
+++ b/trunk/arch/x86/kernel/paravirt_patch_32.c
@@ -23,7 +23,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
start = start_##ops##_##x; \
end = end_##ops##_##x; \
goto patch_site
- switch (type) {
+ switch(type) {
PATCH_SITE(pv_irq_ops, irq_disable);
PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, restore_fl);
diff --git a/trunk/arch/x86/kernel/pci-dma.c b/trunk/arch/x86/kernel/pci-dma.c
index f704cb51ff82..87d4d6964ec2 100644
--- a/trunk/arch/x86/kernel/pci-dma.c
+++ b/trunk/arch/x86/kernel/pci-dma.c
@@ -82,7 +82,7 @@ void __init dma32_reserve_bootmem(void)
* using 512M as goal
*/
align = 64ULL<<20;
- size = roundup(dma32_bootmem_size, align);
+ size = round_up(dma32_bootmem_size, align);
dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
512ULL<<20);
if (dma32_bootmem_ptr)
diff --git a/trunk/arch/x86/kernel/pci-gart_64.c b/trunk/arch/x86/kernel/pci-gart_64.c
index 1a895a582534..be33a5442d82 100644
--- a/trunk/arch/x86/kernel/pci-gart_64.c
+++ b/trunk/arch/x86/kernel/pci-gart_64.c
@@ -82,8 +82,7 @@ AGPEXTERN __u32 *agp_gatt_table;
static unsigned long next_bit; /* protected by iommu_bitmap_lock */
static int need_flush; /* global flush state. set for each gart wrap */
-static unsigned long alloc_iommu(struct device *dev, int size,
- unsigned long align_mask)
+static unsigned long alloc_iommu(struct device *dev, int size)
{
unsigned long offset, flags;
unsigned long boundary_size;
@@ -91,17 +90,16 @@ static unsigned long alloc_iommu(struct device *dev, int size,
base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
PAGE_SIZE) >> PAGE_SHIFT;
- boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1,
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
PAGE_SIZE) >> PAGE_SHIFT;
spin_lock_irqsave(&iommu_bitmap_lock, flags);
offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
- size, base_index, boundary_size, align_mask);
+ size, base_index, boundary_size, 0);
if (offset == -1) {
need_flush = 1;
offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
- size, base_index, boundary_size,
- align_mask);
+ size, base_index, boundary_size, 0);
}
if (offset != -1) {
next_bit = offset+size;
@@ -238,10 +236,10 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
* Caller needs to check if the iommu is needed and flush.
*/
static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
- size_t size, int dir, unsigned long align_mask)
+ size_t size, int dir)
{
unsigned long npages = iommu_num_pages(phys_mem, size);
- unsigned long iommu_page = alloc_iommu(dev, npages, align_mask);
+ unsigned long iommu_page = alloc_iommu(dev, npages);
int i;
if (iommu_page == -1) {
@@ -264,11 +262,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
static dma_addr_t
gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir)
{
- dma_addr_t map;
- unsigned long align_mask;
-
- align_mask = (1UL << get_order(size)) - 1;
- map = dma_map_area(dev, paddr, size, dir, align_mask);
+ dma_addr_t map = dma_map_area(dev, paddr, size, dir);
flush_gart();
@@ -287,8 +281,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
if (!need_iommu(dev, paddr, size))
return paddr;
- bus = dma_map_area(dev, paddr, size, dir, 0);
- flush_gart();
+ bus = gart_map_simple(dev, paddr, size, dir);
return bus;
}
@@ -347,7 +340,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
unsigned long addr = sg_phys(s);
if (nonforced_iommu(dev, addr, s->length)) {
- addr = dma_map_area(dev, addr, s->length, dir, 0);
+ addr = dma_map_area(dev, addr, s->length, dir);
if (addr == bad_dma_address) {
if (i > 0)
gart_unmap_sg(dev, sg, i, dir);
@@ -369,7 +362,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
int nelems, struct scatterlist *sout,
unsigned long pages)
{
- unsigned long iommu_start = alloc_iommu(dev, pages, 0);
+ unsigned long iommu_start = alloc_iommu(dev, pages);
unsigned long iommu_page = iommu_start;
struct scatterlist *s;
int i;
diff --git a/trunk/arch/x86/kernel/pcspeaker.c b/trunk/arch/x86/kernel/pcspeaker.c
index a311ffcaad16..bc1f2d3ea277 100644
--- a/trunk/arch/x86/kernel/pcspeaker.c
+++ b/trunk/arch/x86/kernel/pcspeaker.c
@@ -1,13 +1,20 @@
#include
-#include
+#include
#include
static __init int add_pcspkr(void)
{
struct platform_device *pd;
+ int ret;
- pd = platform_device_register_simple("pcspkr", -1, NULL, 0);
+ pd = platform_device_alloc("pcspkr", -1);
+ if (!pd)
+ return -ENOMEM;
- return IS_ERR(pd) ? PTR_ERR(pd) : 0;
+ ret = platform_device_add(pd);
+ if (ret)
+ platform_device_put(pd);
+
+ return ret;
}
device_initcall(add_pcspkr);
diff --git a/trunk/arch/x86/kernel/process.c b/trunk/arch/x86/kernel/process.c
index ec7a2ba9bce8..876e91890777 100644
--- a/trunk/arch/x86/kernel/process.c
+++ b/trunk/arch/x86/kernel/process.c
@@ -185,8 +185,7 @@ static void mwait_idle(void)
static void poll_idle(void)
{
local_irq_enable();
- while (!need_resched())
- cpu_relax();
+ cpu_relax();
}
/*
diff --git a/trunk/arch/x86/kernel/process_32.c b/trunk/arch/x86/kernel/process_32.c
index 205188db9626..31f40b24bf5d 100644
--- a/trunk/arch/x86/kernel/process_32.c
+++ b/trunk/arch/x86/kernel/process_32.c
@@ -37,7 +37,6 @@
#include
#include
#include
-#include
#include
#include
@@ -57,8 +56,6 @@
#include
#include
#include
-#include
-#include
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -164,7 +161,6 @@ void __show_registers(struct pt_regs *regs, int all)
unsigned long d0, d1, d2, d3, d6, d7;
unsigned long sp;
unsigned short ss, gs;
- const char *board;
if (user_mode_vm(regs)) {
sp = regs->sp;
@@ -177,15 +173,11 @@ void __show_registers(struct pt_regs *regs, int all)
}
printk("\n");
-
- board = dmi_get_system_info(DMI_PRODUCT_NAME);
- if (!board)
- board = "";
- printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
+ printk("Pid: %d, comm: %s %s (%s %.*s)\n",
task_pid_nr(current), current->comm,
print_tainted(), init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
- init_utsname()->version, board);
+ init_utsname()->version);
printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
(u16)regs->cs, regs->ip, regs->flags,
@@ -285,14 +277,6 @@ void exit_thread(void)
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
put_cpu();
}
-#ifdef CONFIG_X86_DS
- /* Free any DS contexts that have not been properly released. */
- if (unlikely(current->thread.ds_ctx)) {
- /* we clear debugctl to make sure DS is not used. */
- update_debugctlmsr(0);
- ds_free(current->thread.ds_ctx);
- }
-#endif /* CONFIG_X86_DS */
}
void flush_thread(void)
@@ -454,35 +438,6 @@ int set_tsc_mode(unsigned int val)
return 0;
}
-#ifdef CONFIG_X86_DS
-static int update_debugctl(struct thread_struct *prev,
- struct thread_struct *next, unsigned long debugctl)
-{
- unsigned long ds_prev = 0;
- unsigned long ds_next = 0;
-
- if (prev->ds_ctx)
- ds_prev = (unsigned long)prev->ds_ctx->ds;
- if (next->ds_ctx)
- ds_next = (unsigned long)next->ds_ctx->ds;
-
- if (ds_next != ds_prev) {
- /* we clear debugctl to make sure DS
- * is not in use when we change it */
- debugctl = 0;
- update_debugctlmsr(0);
- wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
- }
- return debugctl;
-}
-#else
-static int update_debugctl(struct thread_struct *prev,
- struct thread_struct *next, unsigned long debugctl)
-{
- return debugctl;
-}
-#endif /* CONFIG_X86_DS */
-
static noinline void
__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
@@ -493,7 +448,14 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
prev = &prev_p->thread;
next = &next_p->thread;
- debugctl = update_debugctl(prev, next, prev->debugctlmsr);
+ debugctl = prev->debugctlmsr;
+ if (next->ds_area_msr != prev->ds_area_msr) {
+ /* we clear debugctl to make sure DS
+ * is not in use when we change it */
+ debugctl = 0;
+ update_debugctlmsr(0);
+ wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
+ }
if (next->debugctlmsr != debugctl)
update_debugctlmsr(next->debugctlmsr);
@@ -517,13 +479,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
hard_enable_TSC();
}
-#ifdef CONFIG_X86_PTRACE_BTS
+#ifdef X86_BTS
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
-#endif /* CONFIG_X86_PTRACE_BTS */
+#endif
if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
diff --git a/trunk/arch/x86/kernel/process_64.c b/trunk/arch/x86/kernel/process_64.c
index 2a8ccb9238b4..e12e0e4dd256 100644
--- a/trunk/arch/x86/kernel/process_64.c
+++ b/trunk/arch/x86/kernel/process_64.c
@@ -37,11 +37,11 @@
#include
#include
#include
-#include
-#include
+#include
#include
#include
+#include
#include
#include
#include
@@ -51,7 +51,6 @@
#include
#include
#include
-#include
asmlinkage extern void ret_from_fork(void);
@@ -89,7 +88,7 @@ void exit_idle(void)
#ifdef CONFIG_HOTPLUG_CPU
DECLARE_PER_CPU(int, cpu_state);
-#include
+#include
/* We halt the CPU with physical CPU hotplug */
static inline void play_dead(void)
{
@@ -154,7 +153,7 @@ void cpu_idle(void)
}
/* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs *regs)
+void __show_regs(struct pt_regs * regs)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
unsigned long d0, d1, d2, d3, d6, d7;
@@ -163,61 +162,59 @@ void __show_regs(struct pt_regs *regs)
printk("\n");
print_modules();
- printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
+ printk("Pid: %d, comm: %.20s %s %s %.*s\n",
current->pid, current->comm, print_tainted(),
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
- printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
+ printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
printk_address(regs->ip, 1);
- printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
- regs->sp, regs->flags);
- printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
+ printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
+ regs->flags);
+ printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
regs->ax, regs->bx, regs->cx);
- printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
+ printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
regs->dx, regs->si, regs->di);
- printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
+ printk("RBP: %016lx R08: %016lx R09: %016lx\n",
regs->bp, regs->r8, regs->r9);
- printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
- regs->r10, regs->r11, regs->r12);
- printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
- regs->r13, regs->r14, regs->r15);
-
- asm("movl %%ds,%0" : "=r" (ds));
- asm("movl %%cs,%0" : "=r" (cs));
- asm("movl %%es,%0" : "=r" (es));
+ printk("R10: %016lx R11: %016lx R12: %016lx\n",
+ regs->r10, regs->r11, regs->r12);
+ printk("R13: %016lx R14: %016lx R15: %016lx\n",
+ regs->r13, regs->r14, regs->r15);
+
+ asm("movl %%ds,%0" : "=r" (ds));
+ asm("movl %%cs,%0" : "=r" (cs));
+ asm("movl %%es,%0" : "=r" (es));
asm("movl %%fs,%0" : "=r" (fsindex));
asm("movl %%gs,%0" : "=r" (gsindex));
rdmsrl(MSR_FS_BASE, fs);
- rdmsrl(MSR_GS_BASE, gs);
- rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
+ rdmsrl(MSR_GS_BASE, gs);
+ rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
cr4 = read_cr4();
- printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
- fs, fsindex, gs, gsindex, shadowgs);
- printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
- es, cr0);
- printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
- cr4);
+ printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
+ fs,fsindex,gs,gsindex,shadowgs);
+ printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
+ printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
get_debugreg(d0, 0);
get_debugreg(d1, 1);
get_debugreg(d2, 2);
- printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
+ printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
get_debugreg(d3, 3);
get_debugreg(d6, 6);
get_debugreg(d7, 7);
- printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+ printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
}
void show_regs(struct pt_regs *regs)
{
- printk(KERN_INFO "CPU %d:", smp_processor_id());
+ printk("CPU %d:", smp_processor_id());
__show_regs(regs);
show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
}
@@ -243,14 +240,6 @@ void exit_thread(void)
t->io_bitmap_max = 0;
put_cpu();
}
-#ifdef CONFIG_X86_DS
- /* Free any DS contexts that have not been properly released. */
- if (unlikely(t->ds_ctx)) {
- /* we clear debugctl to make sure DS is not used. */
- update_debugctlmsr(0);
- ds_free(t->ds_ctx);
- }
-#endif /* CONFIG_X86_DS */
}
void flush_thread(void)
@@ -326,10 +315,10 @@ void prepare_to_copy(struct task_struct *tsk)
int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
unsigned long unused,
- struct task_struct *p, struct pt_regs *regs)
+ struct task_struct * p, struct pt_regs * regs)
{
int err;
- struct pt_regs *childregs;
+ struct pt_regs * childregs;
struct task_struct *me = current;
childregs = ((struct pt_regs *)
@@ -374,10 +363,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
if (test_thread_flag(TIF_IA32))
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
- else
-#endif
- err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
- if (err)
+ else
+#endif
+ err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
+ if (err)
goto out;
}
err = 0;
@@ -484,27 +473,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
next = &next_p->thread;
debugctl = prev->debugctlmsr;
-
-#ifdef CONFIG_X86_DS
- {
- unsigned long ds_prev = 0, ds_next = 0;
-
- if (prev->ds_ctx)
- ds_prev = (unsigned long)prev->ds_ctx->ds;
- if (next->ds_ctx)
- ds_next = (unsigned long)next->ds_ctx->ds;
-
- if (ds_next != ds_prev) {
- /*
- * We clear debugctl to make sure DS
- * is not in use when we change it:
- */
- debugctl = 0;
- update_debugctlmsr(0);
- wrmsrl(MSR_IA32_DS_AREA, ds_next);
- }
+ if (next->ds_area_msr != prev->ds_area_msr) {
+ /* we clear debugctl to make sure DS
+ * is not in use when we change it */
+ debugctl = 0;
+ update_debugctlmsr(0);
+ wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
}
-#endif /* CONFIG_X86_DS */
if (next->debugctlmsr != debugctl)
update_debugctlmsr(next->debugctlmsr);
@@ -542,13 +517,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
-#ifdef CONFIG_X86_PTRACE_BTS
+#ifdef X86_BTS
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
-#endif /* CONFIG_X86_PTRACE_BTS */
+#endif
}
/*
@@ -570,7 +545,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
unsigned fsindex, gsindex;
/* we're going to use this soon, after a few expensive things */
- if (next_p->fpu_counter > 5)
+ if (next_p->fpu_counter>5)
prefetch(next->xstate);
/*
@@ -578,13 +553,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
load_sp0(tss, next);
- /*
+ /*
* Switch DS and ES.
* This won't pick up thread selector changes, but I guess that is ok.
*/
savesegment(es, prev->es);
if (unlikely(next->es | prev->es))
- loadsegment(es, next->es);
+ loadsegment(es, next->es);
savesegment(ds, prev->ds);
if (unlikely(next->ds | prev->ds))
@@ -610,7 +585,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
arch_leave_lazy_cpu_mode();
- /*
+ /*
* Switch FS and GS.
*
* Segment register != 0 always requires a reload. Also
@@ -619,13 +594,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
if (unlikely(fsindex | next->fsindex | prev->fs)) {
loadsegment(fs, next->fsindex);
- /*
+ /*
* Check if the user used a selector != 0; if yes
* clear 64bit base, since overloaded base is always
* mapped to the Null selector
*/
if (fsindex)
- prev->fs = 0;
+ prev->fs = 0;
}
/* when next process has a 64bit base use it */
if (next->fs)
@@ -635,7 +610,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (unlikely(gsindex | next->gsindex | prev->gs)) {
load_gs_index(next->gsindex);
if (gsindex)
- prev->gs = 0;
+ prev->gs = 0;
}
if (next->gs)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
@@ -644,12 +619,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* Must be after DS reload */
unlazy_fpu(prev_p);
- /*
+ /*
* Switch the PDA and FPU contexts.
*/
prev->usersp = read_pda(oldrsp);
write_pda(oldrsp, next->usersp);
- write_pda(pcurrent, next_p);
+ write_pda(pcurrent, next_p);
write_pda(kernelstack,
(unsigned long)task_stack_page(next_p) +
@@ -690,7 +665,7 @@ long sys_execve(char __user *name, char __user * __user *argv,
char __user * __user *envp, struct pt_regs *regs)
{
long error;
- char *filename;
+ char * filename;
filename = getname(name);
error = PTR_ERR(filename);
@@ -748,55 +723,55 @@ asmlinkage long sys_vfork(struct pt_regs *regs)
unsigned long get_wchan(struct task_struct *p)
{
unsigned long stack;
- u64 fp, ip;
+ u64 fp,ip;
int count = 0;
- if (!p || p == current || p->state == TASK_RUNNING)
- return 0;
+ if (!p || p == current || p->state==TASK_RUNNING)
+ return 0;
stack = (unsigned long)task_stack_page(p);
if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
return 0;
fp = *(u64 *)(p->thread.sp);
- do {
+ do {
if (fp < (unsigned long)stack ||
fp > (unsigned long)stack+THREAD_SIZE)
- return 0;
+ return 0;
ip = *(u64 *)(fp+8);
if (!in_sched_functions(ip))
return ip;
- fp = *(u64 *)fp;
- } while (count++ < 16);
+ fp = *(u64 *)fp;
+ } while (count++ < 16);
return 0;
}
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
-{
- int ret = 0;
+{
+ int ret = 0;
int doit = task == current;
int cpu;
- switch (code) {
+ switch (code) {
case ARCH_SET_GS:
if (addr >= TASK_SIZE_OF(task))
- return -EPERM;
+ return -EPERM;
cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
+ /* handle small bases via the GDT because that's faster to
switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, GS_TLS, addr);
- if (doit) {
+ if (addr <= 0xffffffff) {
+ set_32bit_tls(task, GS_TLS, addr);
+ if (doit) {
load_TLS(&task->thread, cpu);
- load_gs_index(GS_TLS_SEL);
+ load_gs_index(GS_TLS_SEL);
}
- task->thread.gsindex = GS_TLS_SEL;
+ task->thread.gsindex = GS_TLS_SEL;
task->thread.gs = 0;
- } else {
+ } else {
task->thread.gsindex = 0;
task->thread.gs = addr;
if (doit) {
load_gs_index(0);
ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
- }
+ }
}
put_cpu();
break;
@@ -850,7 +825,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
rdmsrl(MSR_KERNEL_GS_BASE, base);
else
base = task->thread.gs;
- } else
+ }
+ else
base = task->thread.gs;
ret = put_user(base, (unsigned long __user *)addr);
break;
diff --git a/trunk/arch/x86/kernel/ptrace.c b/trunk/arch/x86/kernel/ptrace.c
index e375b658efc3..e37dccce85db 100644
--- a/trunk/arch/x86/kernel/ptrace.c
+++ b/trunk/arch/x86/kernel/ptrace.c
@@ -14,7 +14,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -70,7 +69,7 @@ static inline bool invalid_selector(u16 value)
#define FLAG_MASK FLAG_MASK_32
-static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
+static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
{
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
regno >>= 2;
@@ -555,115 +554,45 @@ static int ptrace_set_debugreg(struct task_struct *child,
return 0;
}
-#ifdef CONFIG_X86_PTRACE_BTS
-/*
- * The configuration for a particular BTS hardware implementation.
- */
-struct bts_configuration {
- /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
- unsigned char sizeof_bts;
- /* the size of a field in the BTS record in bytes */
- unsigned char sizeof_field;
- /* a bitmask to enable/disable BTS in DEBUGCTL MSR */
- unsigned long debugctl_mask;
-};
-static struct bts_configuration bts_cfg;
-
-#define BTS_MAX_RECORD_SIZE (8 * 3)
-
-
-/*
- * Branch Trace Store (BTS) uses the following format. Different
- * architectures vary in the size of those fields.
- * - source linear address
- * - destination linear address
- * - flags
- *
- * Later architectures use 64bit pointers throughout, whereas earlier
- * architectures use 32bit pointers in 32bit mode.
- *
- * We compute the base address for the first 8 fields based on:
- * - the field size stored in the DS configuration
- * - the relative field position
- *
- * In order to store additional information in the BTS buffer, we use
- * a special source address to indicate that the record requires
- * special interpretation.
- *
- * Netburst indicated via a bit in the flags field whether the branch
- * was predicted; this is ignored.
- */
-
-enum bts_field {
- bts_from = 0,
- bts_to,
- bts_flags,
-
- bts_escape = (unsigned long)-1,
- bts_qual = bts_to,
- bts_jiffies = bts_flags
-};
-
-static inline unsigned long bts_get(const char *base, enum bts_field field)
-{
- base += (bts_cfg.sizeof_field * field);
- return *(unsigned long *)base;
-}
+#ifdef X86_BTS
-static inline void bts_set(char *base, enum bts_field field, unsigned long val)
+static int ptrace_bts_get_size(struct task_struct *child)
{
- base += (bts_cfg.sizeof_field * field);;
- (*(unsigned long *)base) = val;
-}
+ if (!child->thread.ds_area_msr)
+ return -ENXIO;
-/*
- * Translate a BTS record from the raw format into the bts_struct format
- *
- * out (out): bts_struct interpretation
- * raw: raw BTS record
- */
-static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
-{
- memset(out, 0, sizeof(*out));
- if (bts_get(raw, bts_from) == bts_escape) {
- out->qualifier = bts_get(raw, bts_qual);
- out->variant.jiffies = bts_get(raw, bts_jiffies);
- } else {
- out->qualifier = BTS_BRANCH;
- out->variant.lbr.from_ip = bts_get(raw, bts_from);
- out->variant.lbr.to_ip = bts_get(raw, bts_to);
- }
+ return ds_get_bts_index((void *)child->thread.ds_area_msr);
}
-static int ptrace_bts_read_record(struct task_struct *child, size_t index,
+static int ptrace_bts_read_record(struct task_struct *child,
+ long index,
struct bts_struct __user *out)
{
struct bts_struct ret;
- const void *bts_record;
- size_t bts_index, bts_end;
- int error;
+ int retval;
+ int bts_end;
+ int bts_index;
- error = ds_get_bts_end(child, &bts_end);
- if (error < 0)
- return error;
+ if (!child->thread.ds_area_msr)
+ return -ENXIO;
- if (bts_end <= index)
+ if (index < 0)
return -EINVAL;
- error = ds_get_bts_index(child, &bts_index);
- if (error < 0)
- return error;
+ bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
+ if (bts_end <= index)
+ return -EINVAL;
/* translate the ptrace bts index into the ds bts index */
- bts_index += bts_end - (index + 1);
- if (bts_end <= bts_index)
- bts_index -= bts_end;
+ bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr);
+ bts_index -= (index + 1);
+ if (bts_index < 0)
+ bts_index += bts_end;
- error = ds_access_bts(child, bts_index, &bts_record);
- if (error < 0)
- return error;
-
- ptrace_bts_translate_record(&ret, bts_record);
+ retval = ds_read_bts((void *)child->thread.ds_area_msr,
+ bts_index, &ret);
+ if (retval < 0)
+ return retval;
if (copy_to_user(out, &ret, sizeof(ret)))
return -EFAULT;
@@ -671,106 +600,101 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
return sizeof(ret);
}
+static int ptrace_bts_clear(struct task_struct *child)
+{
+ if (!child->thread.ds_area_msr)
+ return -ENXIO;
+
+ return ds_clear((void *)child->thread.ds_area_msr);
+}
+
static int ptrace_bts_drain(struct task_struct *child,
long size,
struct bts_struct __user *out)
{
- struct bts_struct ret;
- const unsigned char *raw;
- size_t end, i;
- int error;
+ int end, i;
+ void *ds = (void *)child->thread.ds_area_msr;
- error = ds_get_bts_index(child, &end);
- if (error < 0)
- return error;
+ if (!ds)
+ return -ENXIO;
+
+ end = ds_get_bts_index(ds);
+ if (end <= 0)
+ return end;
if (size < (end * sizeof(struct bts_struct)))
return -EIO;
- error = ds_access_bts(child, 0, (const void **)&raw);
- if (error < 0)
- return error;
+ for (i = 0; i < end; i++, out++) {
+ struct bts_struct ret;
+ int retval;
- for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) {
- ptrace_bts_translate_record(&ret, raw);
+ retval = ds_read_bts(ds, i, &ret);
+ if (retval < 0)
+ return retval;
if (copy_to_user(out, &ret, sizeof(ret)))
return -EFAULT;
}
- error = ds_clear_bts(child);
- if (error < 0)
- return error;
+ ds_clear(ds);
return end;
}
-static void ptrace_bts_ovfl(struct task_struct *child)
-{
- send_sig(child->thread.bts_ovfl_signal, child, 0);
-}
-
static int ptrace_bts_config(struct task_struct *child,
long cfg_size,
const struct ptrace_bts_config __user *ucfg)
{
struct ptrace_bts_config cfg;
- int error = 0;
-
- error = -EOPNOTSUPP;
- if (!bts_cfg.sizeof_bts)
- goto errout;
+ int bts_size, ret = 0;
+ void *ds;
- error = -EIO;
if (cfg_size < sizeof(cfg))
- goto errout;
+ return -EIO;
- error = -EFAULT;
if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
- goto errout;
-
- error = -EINVAL;
- if ((cfg.flags & PTRACE_BTS_O_SIGNAL) &&
- !(cfg.flags & PTRACE_BTS_O_ALLOC))
- goto errout;
-
- if (cfg.flags & PTRACE_BTS_O_ALLOC) {
- ds_ovfl_callback_t ovfl = NULL;
- unsigned int sig = 0;
-
- /* we ignore the error in case we were not tracing child */
- (void)ds_release_bts(child);
+ return -EFAULT;
- if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
- if (!cfg.signal)
- goto errout;
+ if ((int)cfg.size < 0)
+ return -EINVAL;
- sig = cfg.signal;
- ovfl = ptrace_bts_ovfl;
- }
+ bts_size = 0;
+ ds = (void *)child->thread.ds_area_msr;
+ if (ds) {
+ bts_size = ds_get_bts_size(ds);
+ if (bts_size < 0)
+ return bts_size;
+ }
+ cfg.size = PAGE_ALIGN(cfg.size);
- error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl);
- if (error < 0)
+ if (bts_size != cfg.size) {
+ ret = ptrace_bts_realloc(child, cfg.size,
+ cfg.flags & PTRACE_BTS_O_CUT_SIZE);
+ if (ret < 0)
goto errout;
- child->thread.bts_ovfl_signal = sig;
+ ds = (void *)child->thread.ds_area_msr;
}
- error = -EINVAL;
- if (!child->thread.ds_ctx && cfg.flags)
+ if (cfg.flags & PTRACE_BTS_O_SIGNAL)
+ ret = ds_set_overflow(ds, DS_O_SIGNAL);
+ else
+ ret = ds_set_overflow(ds, DS_O_WRAP);
+ if (ret < 0)
goto errout;
if (cfg.flags & PTRACE_BTS_O_TRACE)
- child->thread.debugctlmsr |= bts_cfg.debugctl_mask;
+ child->thread.debugctlmsr |= ds_debugctl_mask();
else
- child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
+ child->thread.debugctlmsr &= ~ds_debugctl_mask();
if (cfg.flags & PTRACE_BTS_O_SCHED)
set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
else
clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
- error = sizeof(cfg);
+ ret = sizeof(cfg);
out:
if (child->thread.debugctlmsr)
@@ -778,10 +702,10 @@ static int ptrace_bts_config(struct task_struct *child,
else
clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
- return error;
+ return ret;
errout:
- child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
+ child->thread.debugctlmsr &= ~ds_debugctl_mask();
clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
goto out;
}
@@ -790,40 +714,29 @@ static int ptrace_bts_status(struct task_struct *child,
long cfg_size,
struct ptrace_bts_config __user *ucfg)
{
+ void *ds = (void *)child->thread.ds_area_msr;
struct ptrace_bts_config cfg;
- size_t end;
- const void *base, *max;
- int error;
if (cfg_size < sizeof(cfg))
return -EIO;
- error = ds_get_bts_end(child, &end);
- if (error < 0)
- return error;
+ memset(&cfg, 0, sizeof(cfg));
- error = ds_access_bts(child, /* index = */ 0, &base);
- if (error < 0)
- return error;
+ if (ds) {
+ cfg.size = ds_get_bts_size(ds);
- error = ds_access_bts(child, /* index = */ end, &max);
- if (error < 0)
- return error;
+ if (ds_get_overflow(ds) == DS_O_SIGNAL)
+ cfg.flags |= PTRACE_BTS_O_SIGNAL;
- memset(&cfg, 0, sizeof(cfg));
- cfg.size = (max - base);
- cfg.signal = child->thread.bts_ovfl_signal;
- cfg.bts_size = sizeof(struct bts_struct);
+ if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
+ child->thread.debugctlmsr & ds_debugctl_mask())
+ cfg.flags |= PTRACE_BTS_O_TRACE;
- if (cfg.signal)
- cfg.flags |= PTRACE_BTS_O_SIGNAL;
-
- if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
- child->thread.debugctlmsr & bts_cfg.debugctl_mask)
- cfg.flags |= PTRACE_BTS_O_TRACE;
+ if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
+ cfg.flags |= PTRACE_BTS_O_SCHED;
+ }
- if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
- cfg.flags |= PTRACE_BTS_O_SCHED;
+ cfg.bts_size = sizeof(struct bts_struct);
if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
return -EFAULT;
@@ -831,38 +744,89 @@ static int ptrace_bts_status(struct task_struct *child,
return sizeof(cfg);
}
+
static int ptrace_bts_write_record(struct task_struct *child,
const struct bts_struct *in)
{
- unsigned char bts_record[BTS_MAX_RECORD_SIZE];
+ int retval;
- BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts);
+ if (!child->thread.ds_area_msr)
+ return -ENXIO;
- memset(bts_record, 0, bts_cfg.sizeof_bts);
- switch (in->qualifier) {
- case BTS_INVALID:
- break;
+ retval = ds_write_bts((void *)child->thread.ds_area_msr, in);
+ if (retval)
+ return retval;
- case BTS_BRANCH:
- bts_set(bts_record, bts_from, in->variant.lbr.from_ip);
- bts_set(bts_record, bts_to, in->variant.lbr.to_ip);
- break;
+ return sizeof(*in);
+}
- case BTS_TASK_ARRIVES:
- case BTS_TASK_DEPARTS:
- bts_set(bts_record, bts_from, bts_escape);
- bts_set(bts_record, bts_qual, in->qualifier);
- bts_set(bts_record, bts_jiffies, in->variant.jiffies);
- break;
+static int ptrace_bts_realloc(struct task_struct *child,
+ int size, int reduce_size)
+{
+ unsigned long rlim, vm;
+ int ret, old_size;
- default:
+ if (size < 0)
return -EINVAL;
+
+ old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
+ if (old_size < 0)
+ return old_size;
+
+ ret = ds_free((void **)&child->thread.ds_area_msr);
+ if (ret < 0)
+ goto out;
+
+ size >>= PAGE_SHIFT;
+ old_size >>= PAGE_SHIFT;
+
+ current->mm->total_vm -= old_size;
+ current->mm->locked_vm -= old_size;
+
+ if (size == 0)
+ goto out;
+
+ rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+ vm = current->mm->total_vm + size;
+ if (rlim < vm) {
+ ret = -ENOMEM;
+
+ if (!reduce_size)
+ goto out;
+
+ size = rlim - current->mm->total_vm;
+ if (size <= 0)
+ goto out;
+ }
+
+ rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+ vm = current->mm->locked_vm + size;
+ if (rlim < vm) {
+ ret = -ENOMEM;
+
+ if (!reduce_size)
+ goto out;
+
+ size = rlim - current->mm->locked_vm;
+ if (size <= 0)
+ goto out;
}
- /* The writing task will be the switched-to task on a context
- * switch. It needs to write into the switched-from task's BTS
- * buffer. */
- return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
+ ret = ds_allocate((void **)&child->thread.ds_area_msr,
+ size << PAGE_SHIFT);
+ if (ret < 0)
+ goto out;
+
+ current->mm->total_vm += size;
+ current->mm->locked_vm += size;
+
+out:
+ if (child->thread.ds_area_msr)
+ set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+ else
+ clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+
+ return ret;
}
void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -875,66 +839,7 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk,
ptrace_bts_write_record(tsk, &rec);
}
-
-static const struct bts_configuration bts_cfg_netburst = {
- .sizeof_bts = sizeof(long) * 3,
- .sizeof_field = sizeof(long),
- .debugctl_mask = (1<<2)|(1<<3)|(1<<5)
-};
-
-static const struct bts_configuration bts_cfg_pentium_m = {
- .sizeof_bts = sizeof(long) * 3,
- .sizeof_field = sizeof(long),
- .debugctl_mask = (1<<6)|(1<<7)
-};
-
-static const struct bts_configuration bts_cfg_core2 = {
- .sizeof_bts = 8 * 3,
- .sizeof_field = 8,
- .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
-};
-
-static inline void bts_configure(const struct bts_configuration *cfg)
-{
- bts_cfg = *cfg;
-}
-
-void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
-{
- switch (c->x86) {
- case 0x6:
- switch (c->x86_model) {
- case 0xD:
- case 0xE: /* Pentium M */
- bts_configure(&bts_cfg_pentium_m);
- break;
- case 0xF: /* Core2 */
- case 0x1C: /* Atom */
- bts_configure(&bts_cfg_core2);
- break;
- default:
- /* sorry, don't know about them */
- break;
- }
- break;
- case 0xF:
- switch (c->x86_model) {
- case 0x0:
- case 0x1:
- case 0x2: /* Netburst */
- bts_configure(&bts_cfg_netburst);
- break;
- default:
- /* sorry, don't know about them */
- break;
- }
- break;
- default:
- /* sorry, don't know about them */
- break;
- }
-}
-#endif /* CONFIG_X86_PTRACE_BTS */
+#endif /* X86_BTS */
/*
* Called by kernel/ptrace.c when detaching..
@@ -947,15 +852,15 @@ void ptrace_disable(struct task_struct *child)
#ifdef TIF_SYSCALL_EMU
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
#endif
-#ifdef CONFIG_X86_PTRACE_BTS
- (void)ds_release_bts(child);
-
- child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
- if (!child->thread.debugctlmsr)
- clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
-
- clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
-#endif /* CONFIG_X86_PTRACE_BTS */
+ if (child->thread.ds_area_msr) {
+#ifdef X86_BTS
+ ptrace_bts_realloc(child, 0, 0);
+#endif
+ child->thread.debugctlmsr &= ~ds_debugctl_mask();
+ if (!child->thread.debugctlmsr)
+ clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+ clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+ }
}
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -1075,7 +980,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
/*
* These bits need more cooking - not enabled yet:
*/
-#ifdef CONFIG_X86_PTRACE_BTS
+#ifdef X86_BTS
case PTRACE_BTS_CONFIG:
ret = ptrace_bts_config
(child, data, (struct ptrace_bts_config __user *)addr);
@@ -1087,7 +992,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
break;
case PTRACE_BTS_SIZE:
- ret = ds_get_bts_index(child, /* pos = */ NULL);
+ ret = ptrace_bts_get_size(child);
break;
case PTRACE_BTS_GET:
@@ -1096,14 +1001,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
break;
case PTRACE_BTS_CLEAR:
- ret = ds_clear_bts(child);
+ ret = ptrace_bts_clear(child);
break;
case PTRACE_BTS_DRAIN:
ret = ptrace_bts_drain
(child, data, (struct bts_struct __user *) addr);
break;
-#endif /* CONFIG_X86_PTRACE_BTS */
+#endif
default:
ret = ptrace_request(child, request, addr, data);
@@ -1470,6 +1375,30 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
force_sig_info(SIGTRAP, &info, tsk);
}
+static void syscall_trace(struct pt_regs *regs)
+{
+ if (!(current->ptrace & PT_PTRACED))
+ return;
+
+#if 0
+ printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
+ current->comm,
+ regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
+ current_thread_info()->flags, current->ptrace);
+#endif
+
+ ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+ ? 0x80 : 0));
+ /*
+ * this isn't the same as continuing with a signal, but it will do
+ * for normal use. strace only continues with a signal if the
+ * stopping signal is not SIGTRAP. -brl
+ */
+ if (current->exit_code) {
+ send_sig(current->exit_code, current, 1);
+ current->exit_code = 0;
+ }
+}
#ifdef CONFIG_X86_32
# define IS_IA32 1
@@ -1503,9 +1432,8 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
ret = -1L;
- if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
- tracehook_report_syscall_entry(regs))
- ret = -1L;
+ if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
+ syscall_trace(regs);
if (unlikely(current->audit_context)) {
if (IS_IA32)
@@ -1531,7 +1459,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
if (test_thread_flag(TIF_SYSCALL_TRACE))
- tracehook_report_syscall_exit(regs, 0);
+ syscall_trace(regs);
/*
* If TIF_SYSCALL_EMU is set, we only get here because of
@@ -1547,6 +1475,6 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
* system call instruction.
*/
if (test_thread_flag(TIF_SINGLESTEP) &&
- tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL))
+ (current->ptrace & PT_PTRACED))
send_sigtrap(current, regs, 0);
}
diff --git a/trunk/arch/x86/kernel/reboot.c b/trunk/arch/x86/kernel/reboot.c
index f4c93f1cfc19..724adfc63cb9 100644
--- a/trunk/arch/x86/kernel/reboot.c
+++ b/trunk/arch/x86/kernel/reboot.c
@@ -29,11 +29,7 @@ EXPORT_SYMBOL(pm_power_off);
static const struct desc_ptr no_idt = {};
static int reboot_mode;
-/*
- * Keyboard reset and triple fault may result in INIT, not RESET, which
- * doesn't work when we're in vmx root mode. Try ACPI first.
- */
-enum reboot_type reboot_type = BOOT_ACPI;
+enum reboot_type reboot_type = BOOT_KBD;
int reboot_force;
#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
diff --git a/trunk/arch/x86/kernel/setup.c b/trunk/arch/x86/kernel/setup.c
index 141efab52400..9838f2539dfc 100644
--- a/trunk/arch/x86/kernel/setup.c
+++ b/trunk/arch/x86/kernel/setup.c
@@ -223,9 +223,6 @@ unsigned long saved_video_mode;
#define RAMDISK_LOAD_FLAG 0x4000
static char __initdata command_line[COMMAND_LINE_SIZE];
-#ifdef CONFIG_CMDLINE_BOOL
-static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
-#endif
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
struct edd edd;
@@ -668,19 +665,6 @@ void __init setup_arch(char **cmdline_p)
bss_resource.start = virt_to_phys(&__bss_start);
bss_resource.end = virt_to_phys(&__bss_stop)-1;
-#ifdef CONFIG_CMDLINE_BOOL
-#ifdef CONFIG_CMDLINE_OVERRIDE
- strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
-#else
- if (builtin_cmdline[0]) {
- /* append boot loader cmdline to builtin */
- strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
- strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
- strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
- }
-#endif
-#endif
-
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
diff --git a/trunk/arch/x86/kernel/setup_percpu.c b/trunk/arch/x86/kernel/setup_percpu.c
index 0e67f72d9316..76e305e064f9 100644
--- a/trunk/arch/x86/kernel/setup_percpu.c
+++ b/trunk/arch/x86/kernel/setup_percpu.c
@@ -162,16 +162,9 @@ void __init setup_per_cpu_areas(void)
printk(KERN_INFO
"cpu %d has no node %d or node-local memory\n",
cpu, node);
- if (ptr)
- printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n",
- cpu, __pa(ptr));
}
- else {
+ else
ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
- if (ptr)
- printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
- cpu, node, __pa(ptr));
- }
#endif
per_cpu_offset(cpu) = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
diff --git a/trunk/arch/x86/kernel/sigframe.h b/trunk/arch/x86/kernel/sigframe.h
index 8b4956e800ac..72bbb519d2dc 100644
--- a/trunk/arch/x86/kernel/sigframe.h
+++ b/trunk/arch/x86/kernel/sigframe.h
@@ -24,9 +24,4 @@ struct rt_sigframe {
struct ucontext uc;
struct siginfo info;
};
-
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs *regs);
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
- sigset_t *set, struct pt_regs *regs);
#endif
diff --git a/trunk/arch/x86/kernel/signal_32.c b/trunk/arch/x86/kernel/signal_32.c
index 2a2435d3037d..6fb5bcdd8933 100644
--- a/trunk/arch/x86/kernel/signal_32.c
+++ b/trunk/arch/x86/kernel/signal_32.c
@@ -17,7 +17,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -27,7 +26,6 @@
#include
#include
#include
-#include
#include "sigframe.h"
@@ -560,6 +558,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
* handler too.
*/
regs->flags &= ~X86_EFLAGS_TF;
+ if (test_thread_flag(TIF_SINGLESTEP))
+ ptrace_notify(SIGTRAP);
spin_lock_irq(¤t->sighand->siglock);
sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask);
@@ -568,9 +568,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
recalc_sigpending();
spin_unlock_irq(¤t->sighand->siglock);
- tracehook_signal_handler(sig, info, ka, regs,
- test_thread_flag(TIF_SINGLESTEP));
-
return 0;
}
@@ -664,10 +661,5 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
- if (thread_info_flags & _TIF_NOTIFY_RESUME) {
- clear_thread_flag(TIF_NOTIFY_RESUME);
- tracehook_notify_resume(regs);
- }
-
clear_thread_flag(TIF_IRET);
}
diff --git a/trunk/arch/x86/kernel/signal_64.c b/trunk/arch/x86/kernel/signal_64.c
index 694aa888bb19..ca316b5b742c 100644
--- a/trunk/arch/x86/kernel/signal_64.c
+++ b/trunk/arch/x86/kernel/signal_64.c
@@ -15,21 +15,17 @@
#include
#include
#include
-#include
#include
#include
#include
#include
-#include
-
#include
#include
+#include
#include
#include
#include
#include
-#include
-#include
#include "sigframe.h"
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -45,6 +41,11 @@
# define FIX_EFLAGS __FIX_EFLAGS
#endif
+int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs * regs);
+int ia32_setup_frame(int sig, struct k_sigaction *ka,
+ sigset_t *set, struct pt_regs * regs);
+
asmlinkage long
sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
struct pt_regs *regs)
@@ -127,7 +128,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
-#define COPY(x) (err |= __get_user(regs->x, &sc->x))
+#define COPY(x) err |= __get_user(regs->x, &sc->x)
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
@@ -157,7 +158,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
}
{
- struct _fpstate __user *buf;
+ struct _fpstate __user * buf;
err |= __get_user(buf, &sc->fpstate);
if (buf) {
@@ -197,7 +198,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
current->blocked = set;
recalc_sigpending();
spin_unlock_irq(¤t->sighand->siglock);
-
+
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
@@ -207,17 +208,16 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
return ax;
badframe:
- signal_fault(regs, frame, "sigreturn");
+ signal_fault(regs,frame,"sigreturn");
return 0;
-}
+}
/*
* Set up a signal frame.
*/
static inline int
-setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
- unsigned long mask, struct task_struct *me)
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me)
{
int err = 0;
@@ -273,35 +273,35 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
}
static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs *regs)
+ sigset_t *set, struct pt_regs * regs)
{
struct rt_sigframe __user *frame;
- struct _fpstate __user *fp = NULL;
+ struct _fpstate __user *fp = NULL;
int err = 0;
struct task_struct *me = current;
if (used_math()) {
- fp = get_stack(ka, regs, sizeof(struct _fpstate));
+ fp = get_stack(ka, regs, sizeof(struct _fpstate));
frame = (void __user *)round_down(
(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
goto give_sigsegv;
- if (save_i387(fp) < 0)
- err |= -1;
+ if (save_i387(fp) < 0)
+ err |= -1;
} else
frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv;
- if (ka->sa.sa_flags & SA_SIGINFO) {
+ if (ka->sa.sa_flags & SA_SIGINFO) {
err |= copy_siginfo_to_user(&frame->info, info);
if (err)
goto give_sigsegv;
}
-
+
/* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
@@ -311,9 +311,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
- if (sizeof(*set) == 16) {
+ if (sizeof(*set) == 16) {
__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
- __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
+ __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
} else
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
@@ -324,7 +324,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
} else {
/* could use a vstub here */
- goto give_sigsegv;
+ goto give_sigsegv;
}
if (err)
@@ -332,7 +332,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
/* Set up registers for signal handler */
regs->di = sig;
- /* In case the signal handler was declared without prototypes */
+ /* In case the signal handler was declared without prototypes */
regs->ax = 0;
/* This also works for non SA_SIGINFO handlers because they expect the
@@ -355,8 +355,37 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
}
/*
- * OK, we're invoking a handler
+ * Return -1L or the syscall number that @regs is executing.
*/
+static long current_syscall(struct pt_regs *regs)
+{
+ /*
+ * We always sign-extend a -1 value being set here,
+ * so this is always either -1L or a syscall number.
+ */
+ return regs->orig_ax;
+}
+
+/*
+ * Return a value that is -EFOO if the system call in @regs->orig_ax
+ * returned an error. This only works for @regs from @current.
+ */
+static long current_syscall_ret(struct pt_regs *regs)
+{
+#ifdef CONFIG_IA32_EMULATION
+ if (test_thread_flag(TIF_IA32))
+ /*
+ * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
+ * and will match correctly in comparisons.
+ */
+ return (int) regs->ax;
+#endif
+ return regs->ax;
+}
+
+/*
+ * OK, we're invoking a handler
+ */
static int
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
@@ -365,9 +394,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
int ret;
/* Are we from a system call? */
- if (syscall_get_nr(current, regs) >= 0) {
+ if (current_syscall(regs) >= 0) {
/* If so, check system call restarting.. */
- switch (syscall_get_error(current, regs)) {
+ switch (current_syscall_ret(regs)) {
case -ERESTART_RESTARTBLOCK:
case -ERESTARTNOHAND:
regs->ax = -EINTR;
@@ -400,7 +429,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs);
else
ret = ia32_setup_frame(sig, ka, oldset, regs);
- } else
+ } else
#endif
ret = setup_rt_frame(sig, ka, info, oldset, regs);
@@ -424,16 +453,15 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
* handler too.
*/
regs->flags &= ~X86_EFLAGS_TF;
+ if (test_thread_flag(TIF_SINGLESTEP))
+ ptrace_notify(SIGTRAP);
spin_lock_irq(¤t->sighand->siglock);
- sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask);
+ sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask);
if (!(ka->sa.sa_flags & SA_NODEFER))
- sigaddset(¤t->blocked, sig);
+ sigaddset(¤t->blocked,sig);
recalc_sigpending();
spin_unlock_irq(¤t->sighand->siglock);
-
- tracehook_signal_handler(sig, info, ka, regs,
- test_thread_flag(TIF_SINGLESTEP));
}
return ret;
@@ -490,9 +518,9 @@ static void do_signal(struct pt_regs *regs)
}
/* Did we come from a system call? */
- if (syscall_get_nr(current, regs) >= 0) {
+ if (current_syscall(regs) >= 0) {
/* Restart the system call - no handlers present */
- switch (syscall_get_error(current, regs)) {
+ switch (current_syscall_ret(regs)) {
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
@@ -530,23 +558,17 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
-
- if (thread_info_flags & _TIF_NOTIFY_RESUME) {
- clear_thread_flag(TIF_NOTIFY_RESUME);
- tracehook_notify_resume(regs);
- }
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
-{
- struct task_struct *me = current;
+{
+ struct task_struct *me = current;
if (show_unhandled_signals && printk_ratelimit()) {
printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
- me->comm, me->pid, where, frame, regs->ip,
- regs->sp, regs->orig_ax);
+ me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax);
print_vma_addr(" in ", regs->ip);
printk("\n");
}
- force_sig(SIGSEGV, me);
-}
+ force_sig(SIGSEGV, me);
+}
diff --git a/trunk/arch/x86/kernel/smpboot.c b/trunk/arch/x86/kernel/smpboot.c
index 4e7ccb0e2a9b..7985c5b3f916 100644
--- a/trunk/arch/x86/kernel/smpboot.c
+++ b/trunk/arch/x86/kernel/smpboot.c
@@ -88,7 +88,7 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
#else
-static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
+struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
#define get_idle_for_cpu(x) (idle_thread_array[(x)])
#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
#endif
@@ -129,7 +129,7 @@ static int boot_cpu_logical_apicid;
static cpumask_t cpu_sibling_setup_map;
/* Set if we find a B stepping CPU */
-static int __cpuinitdata smp_b_stepping;
+int __cpuinitdata smp_b_stepping;
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
@@ -257,7 +257,6 @@ static void __cpuinit smp_callin(void)
end_local_APIC_setup();
map_cpu_to_logical_apicid();
- notify_cpu_starting(cpuid);
/*
* Get our bogomips.
*
@@ -1314,13 +1313,16 @@ __init void prefill_possible_map(void)
if (!num_processors)
num_processors = 1;
+#ifdef CONFIG_HOTPLUG_CPU
if (additional_cpus == -1) {
if (disabled_cpus > 0)
additional_cpus = disabled_cpus;
else
additional_cpus = 0;
}
-
+#else
+ additional_cpus = 0;
+#endif
possible = num_processors + additional_cpus;
if (possible > NR_CPUS)
possible = NR_CPUS;
diff --git a/trunk/arch/x86/kernel/sys_i386_32.c b/trunk/arch/x86/kernel/sys_i386_32.c
index 1884a8d12bfa..7066cb855a60 100644
--- a/trunk/arch/x86/kernel/sys_i386_32.c
+++ b/trunk/arch/x86/kernel/sys_i386_32.c
@@ -22,8 +22,6 @@
#include
#include
-#include
-
asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff)
diff --git a/trunk/arch/x86/kernel/sys_x86_64.c b/trunk/arch/x86/kernel/sys_x86_64.c
index 6bc211accf08..3b360ef33817 100644
--- a/trunk/arch/x86/kernel/sys_x86_64.c
+++ b/trunk/arch/x86/kernel/sys_x86_64.c
@@ -13,17 +13,15 @@
#include
#include
#include
-#include
+#include
#include
-#include
-asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long off)
+asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long off)
{
long error;
- struct file *file;
+ struct file * file;
error = -EINVAL;
if (off & ~PAGE_MASK)
@@ -58,9 +56,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
unmapped base down for this case. This can give
conflicts with the heap, but we assume that glibc
malloc knows how to fall back to mmap. Give it 1GB
- of playground for now. -AK */
- *begin = 0x40000000;
- *end = 0x80000000;
+ of playground for now. -AK */
+ *begin = 0x40000000;
+ *end = 0x80000000;
if (current->flags & PF_RANDOMIZE) {
new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
if (new_begin)
@@ -68,9 +66,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
}
} else {
*begin = TASK_UNMAPPED_BASE;
- *end = TASK_SIZE;
+ *end = TASK_SIZE;
}
-}
+}
unsigned long
arch_get_unmapped_area(struct file *filp, unsigned long addr,
@@ -80,11 +78,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct vm_area_struct *vma;
unsigned long start_addr;
unsigned long begin, end;
-
+
if (flags & MAP_FIXED)
return addr;
- find_start_end(flags, &begin, &end);
+ find_start_end(flags, &begin, &end);
if (len > end)
return -ENOMEM;
@@ -98,12 +96,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
}
if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32))
&& len <= mm->cached_hole_size) {
- mm->cached_hole_size = 0;
+ mm->cached_hole_size = 0;
mm->free_area_cache = begin;
}
addr = mm->free_area_cache;
- if (addr < begin)
- addr = begin;
+ if (addr < begin)
+ addr = begin;
start_addr = addr;
full_search:
@@ -129,7 +127,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
return addr;
}
if (addr + mm->cached_hole_size < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
+ mm->cached_hole_size = vma->vm_start - addr;
addr = vma->vm_end;
}
@@ -179,7 +177,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
vma = find_vma(mm, addr-len);
if (!vma || addr <= vma->vm_start)
/* remember the address as a hint for next time */
- return mm->free_area_cache = addr-len;
+ return (mm->free_area_cache = addr-len);
}
if (mm->mmap_base < len)
@@ -196,7 +194,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
vma = find_vma(mm, addr);
if (!vma || addr+len <= vma->vm_start)
/* remember the address as a hint for next time */
- return mm->free_area_cache = addr;
+ return (mm->free_area_cache = addr);
/* remember the largest hole we saw so far */
if (addr + mm->cached_hole_size < vma->vm_start)
@@ -226,13 +224,13 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
}
-asmlinkage long sys_uname(struct new_utsname __user *name)
+asmlinkage long sys_uname(struct new_utsname __user * name)
{
int err;
down_read(&uts_sem);
- err = copy_to_user(name, utsname(), sizeof(*name));
+ err = copy_to_user(name, utsname(), sizeof (*name));
up_read(&uts_sem);
- if (personality(current->personality) == PER_LINUX32)
- err |= copy_to_user(&name->machine, "i686", 5);
+ if (personality(current->personality) == PER_LINUX32)
+ err |= copy_to_user(&name->machine, "i686", 5);
return err ? -EFAULT : 0;
}
diff --git a/trunk/arch/x86/kernel/syscall_64.c b/trunk/arch/x86/kernel/syscall_64.c
index 3d1be4f0fac5..170d43c17487 100644
--- a/trunk/arch/x86/kernel/syscall_64.c
+++ b/trunk/arch/x86/kernel/syscall_64.c
@@ -8,12 +8,12 @@
#define __NO_STUBS
#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
-#undef ASM_X86__UNISTD_64_H
+#undef _ASM_X86_64_UNISTD_H_
#include
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = sym,
-#undef ASM_X86__UNISTD_64_H
+#undef _ASM_X86_64_UNISTD_H_
typedef void (*sys_call_ptr_t)(void);
diff --git a/trunk/arch/x86/kernel/time_32.c b/trunk/arch/x86/kernel/time_32.c
index bbecf8b6bf96..ffe3c664afc0 100644
--- a/trunk/arch/x86/kernel/time_32.c
+++ b/trunk/arch/x86/kernel/time_32.c
@@ -36,7 +36,6 @@
#include
#include
#include
-#include
#include "do_timer.h"
diff --git a/trunk/arch/x86/kernel/tls.c b/trunk/arch/x86/kernel/tls.c
index 6bb7b8579e70..ab6bf375a307 100644
--- a/trunk/arch/x86/kernel/tls.c
+++ b/trunk/arch/x86/kernel/tls.c
@@ -10,7 +10,6 @@
#include
#include
#include
-#include
#include "tls.h"
diff --git a/trunk/arch/x86/kernel/traps_64.c b/trunk/arch/x86/kernel/traps_64.c
index 7a31f104bef9..513caaca7115 100644
--- a/trunk/arch/x86/kernel/traps_64.c
+++ b/trunk/arch/x86/kernel/traps_64.c
@@ -32,8 +32,6 @@
#include
#include
#include
-#include
-#include
#if defined(CONFIG_EDAC)
#include
@@ -47,6 +45,9 @@
#include
#include
#include
+#include
+#include
+#include
#include
#include
#include
@@ -84,8 +85,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
void printk_address(unsigned long address, int reliable)
{
- printk(" [<%016lx>] %s%pS\n",
- address, reliable ? "" : "? ", (void *) address);
+ printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
}
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -98,8 +98,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
[STACKFAULT_STACK - 1] = "#SS",
[MCE_STACK - 1] = "#MC",
#if DEBUG_STKSZ > EXCEPTION_STKSZ
- [N_EXCEPTION_STACKS ...
- N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
+ [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
#endif
};
unsigned k;
@@ -164,7 +163,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
}
/*
- * x86-64 can have up to three kernel stacks:
+ * x86-64 can have up to three kernel stacks:
* process stack
* interrupt stack
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
@@ -220,7 +219,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
const struct stacktrace_ops *ops, void *data)
{
const unsigned cpu = get_cpu();
- unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
+ unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
unsigned used = 0;
struct thread_info *tinfo;
@@ -238,7 +237,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
if (!bp) {
if (task == current) {
/* Grab bp right from our regs */
- asm("movq %%rbp, %0" : "=r" (bp) : );
+ asm("movq %%rbp, %0" : "=r" (bp) :);
} else {
/* bp is the last reg pushed by switch_to */
bp = *(unsigned long *) task->thread.sp;
@@ -340,8 +339,9 @@ static void
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp, char *log_lvl)
{
- printk("Call Trace:\n");
+ printk("\nCall Trace:\n");
dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+ printk("\n");
}
void show_trace(struct task_struct *task, struct pt_regs *regs,
@@ -357,15 +357,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack;
int i;
const int cpu = smp_processor_id();
- unsigned long *irqstack_end =
- (unsigned long *) (cpu_pda(cpu)->irqstackptr);
- unsigned long *irqstack =
- (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+ unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
+ unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
- /*
- * debugging aid: "show_stack(NULL, NULL);" prints the
- * back trace for this cpu.
- */
+ // debugging aid: "show_stack(NULL, NULL);" prints the
+ // back trace for this cpu.
if (sp == NULL) {
if (task)
@@ -390,7 +386,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
printk(" %016lx", *stack++);
touch_nmi_watchdog();
}
- printk("\n");
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
@@ -409,7 +404,7 @@ void dump_stack(void)
#ifdef CONFIG_FRAME_POINTER
if (!bp)
- asm("movq %%rbp, %0" : "=r" (bp) : );
+ asm("movq %%rbp, %0" : "=r" (bp):);
#endif
printk("Pid: %d, comm: %.20s %s %s %.*s\n",
@@ -419,6 +414,7 @@ void dump_stack(void)
init_utsname()->version);
show_trace(NULL, NULL, &stack, bp);
}
+
EXPORT_SYMBOL(dump_stack);
void show_registers(struct pt_regs *regs)
@@ -447,6 +443,7 @@ void show_registers(struct pt_regs *regs)
printk("Stack: ");
show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
regs->bp, "");
+ printk("\n");
printk(KERN_EMERG "Code: ");
@@ -496,7 +493,7 @@ unsigned __kprobes long oops_begin(void)
raw_local_irq_save(flags);
cpu = smp_processor_id();
if (!__raw_spin_trylock(&die_lock)) {
- if (cpu == die_owner)
+ if (cpu == die_owner)
/* nested oops. should stop eventually */;
else
__raw_spin_lock(&die_lock);
@@ -641,7 +638,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
}
#define DO_ERROR(trapnr, signr, str, name) \
-asmlinkage void do_##name(struct pt_regs *regs, long error_code) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
{ \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
== NOTIFY_STOP) \
@@ -651,7 +648,7 @@ asmlinkage void do_##name(struct pt_regs *regs, long error_code) \
}
#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-asmlinkage void do_##name(struct pt_regs *regs, long error_code) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
{ \
siginfo_t info; \
info.si_signo = signr; \
@@ -686,7 +683,7 @@ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
preempt_conditional_cli(regs);
}
-asmlinkage void do_double_fault(struct pt_regs *regs, long error_code)
+asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
{
static const char str[] = "double fault";
struct task_struct *tsk = current;
@@ -781,10 +778,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
}
static notrace __kprobes void
-unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
+unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
{
- if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
- NOTIFY_STOP)
+ if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
return;
printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
reason);
@@ -886,7 +882,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
else if (user_mode(eregs))
regs = task_pt_regs(current);
/* Exception from kernel and interrupts are enabled. Move to
- kernel process stack. */
+ kernel process stack. */
else if (eregs->flags & X86_EFLAGS_IF)
regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
if (eregs != regs)
@@ -895,7 +891,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
}
/* runs on IST stack. */
-asmlinkage void __kprobes do_debug(struct pt_regs *regs,
+asmlinkage void __kprobes do_debug(struct pt_regs * regs,
unsigned long error_code)
{
struct task_struct *tsk = current;
@@ -1039,7 +1035,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs)
asmlinkage void bad_intr(void)
{
- printk("bad interrupt");
+ printk("bad interrupt");
}
asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
@@ -1051,7 +1047,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
conditional_sti(regs);
if (!user_mode(regs) &&
- kernel_math_error(regs, "kernel simd math error", 19))
+ kernel_math_error(regs, "kernel simd math error", 19))
return;
/*
@@ -1096,7 +1092,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
force_sig_info(SIGFPE, &info, task);
}
-asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs)
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs)
{
}
@@ -1153,10 +1149,8 @@ void __init trap_init(void)
set_intr_gate(0, ÷_error);
set_intr_gate_ist(1, &debug, DEBUG_STACK);
set_intr_gate_ist(2, &nmi, NMI_STACK);
- /* int3 can be called from all */
- set_system_gate_ist(3, &int3, DEBUG_STACK);
- /* int4 can be called from all */
- set_system_gate(4, &overflow);
+ set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */
+ set_system_gate(4, &overflow); /* int4 can be called from all */
set_intr_gate(5, &bounds);
set_intr_gate(6, &invalid_op);
set_intr_gate(7, &device_not_available);
diff --git a/trunk/arch/x86/kernel/tsc.c b/trunk/arch/x86/kernel/tsc.c
index 161bb850fc47..8f98e9de1b82 100644
--- a/trunk/arch/x86/kernel/tsc.c
+++ b/trunk/arch/x86/kernel/tsc.c
@@ -104,7 +104,7 @@ __setup("notsc", notsc_setup);
/*
* Read TSC and the reference counters. Take care of SMI disturbance
*/
-static u64 tsc_read_refs(u64 *p, int hpet)
+static u64 tsc_read_refs(u64 *pm, u64 *hpet)
{
u64 t1, t2;
int i;
@@ -112,9 +112,9 @@ static u64 tsc_read_refs(u64 *p, int hpet)
for (i = 0; i < MAX_RETRIES; i++) {
t1 = get_cycles();
if (hpet)
- *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
+ *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
else
- *p = acpi_pm_read_early();
+ *pm = acpi_pm_read_early();
t2 = get_cycles();
if ((t2 - t1) < SMI_TRESHOLD)
return t2;
@@ -122,52 +122,6 @@ static u64 tsc_read_refs(u64 *p, int hpet)
return ULLONG_MAX;
}
-/*
- * Calculate the TSC frequency from HPET reference
- */
-static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
-{
- u64 tmp;
-
- if (hpet2 < hpet1)
- hpet2 += 0x100000000ULL;
- hpet2 -= hpet1;
- tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
- do_div(tmp, 1000000);
- do_div(deltatsc, tmp);
-
- return (unsigned long) deltatsc;
-}
-
-/*
- * Calculate the TSC frequency from PMTimer reference
- */
-static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
-{
- u64 tmp;
-
- if (!pm1 && !pm2)
- return ULONG_MAX;
-
- if (pm2 < pm1)
- pm2 += (u64)ACPI_PM_OVRRUN;
- pm2 -= pm1;
- tmp = pm2 * 1000000000LL;
- do_div(tmp, PMTMR_TICKS_PER_SEC);
- do_div(deltatsc, tmp);
-
- return (unsigned long) deltatsc;
-}
-
-#define CAL_MS 10
-#define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS))
-#define CAL_PIT_LOOPS 1000
-
-#define CAL2_MS 50
-#define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS))
-#define CAL2_PIT_LOOPS 5000
-
-
/*
* Try to calibrate the TSC against the Programmable
* Interrupt Timer and return the frequency of the TSC
@@ -175,7 +129,7 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
*
* Return ULONG_MAX on failure to calibrate.
*/
-static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
+static unsigned long pit_calibrate_tsc(void)
{
u64 tsc, t1, t2, delta;
unsigned long tscmin, tscmax;
@@ -190,8 +144,8 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
* (LSB then MSB) to begin countdown.
*/
outb(0xb0, 0x43);
- outb(latch & 0xff, 0x42);
- outb(latch >> 8, 0x42);
+ outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
+ outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
tsc = t1 = t2 = get_cycles();
@@ -212,154 +166,31 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
/*
* Sanity checks:
*
- * If we were not able to read the PIT more than loopmin
+ * If we were not able to read the PIT more than 5000
* times, then we have been hit by a massive SMI
*
* If the maximum is 10 times larger than the minimum,
* then we got hit by an SMI as well.
*/
- if (pitcnt < loopmin || tscmax > 10 * tscmin)
+ if (pitcnt < 5000 || tscmax > 10 * tscmin)
return ULONG_MAX;
/* Calculate the PIT value */
delta = t2 - t1;
- do_div(delta, ms);
+ do_div(delta, 50);
return delta;
}
-/*
- * This reads the current MSB of the PIT counter, and
- * checks if we are running on sufficiently fast and
- * non-virtualized hardware.
- *
- * Our expectations are:
- *
- * - the PIT is running at roughly 1.19MHz
- *
- * - each IO is going to take about 1us on real hardware,
- * but we allow it to be much faster (by a factor of 10) or
- * _slightly_ slower (ie we allow up to a 2us read+counter
- * update - anything else implies a unacceptably slow CPU
- * or PIT for the fast calibration to work.
- *
- * - with 256 PIT ticks to read the value, we have 214us to
- * see the same MSB (and overhead like doing a single TSC
- * read per MSB value etc).
- *
- * - We're doing 2 reads per loop (LSB, MSB), and we expect
- * them each to take about a microsecond on real hardware.
- * So we expect a count value of around 100. But we'll be
- * generous, and accept anything over 50.
- *
- * - if the PIT is stuck, and we see *many* more reads, we
- * return early (and the next caller of pit_expect_msb()
- * then consider it a failure when they don't see the
- * next expected value).
- *
- * These expectations mean that we know that we have seen the
- * transition from one expected value to another with a fairly
- * high accuracy, and we didn't miss any events. We can thus
- * use the TSC value at the transitions to calculate a pretty
- * good value for the TSC frequencty.
- */
-static inline int pit_expect_msb(unsigned char val)
-{
- int count = 0;
-
- for (count = 0; count < 50000; count++) {
- /* Ignore LSB */
- inb(0x42);
- if (inb(0x42) != val)
- break;
- }
- return count > 50;
-}
-
-/*
- * How many MSB values do we want to see? We aim for a
- * 15ms calibration, which assuming a 2us counter read
- * error should give us roughly 150 ppm precision for
- * the calibration.
- */
-#define QUICK_PIT_MS 15
-#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
-
-static unsigned long quick_pit_calibrate(void)
-{
- /* Set the Gate high, disable speaker */
- outb((inb(0x61) & ~0x02) | 0x01, 0x61);
-
- /*
- * Counter 2, mode 0 (one-shot), binary count
- *
- * NOTE! Mode 2 decrements by two (and then the
- * output is flipped each time, giving the same
- * final output frequency as a decrement-by-one),
- * so mode 0 is much better when looking at the
- * individual counts.
- */
- outb(0xb0, 0x43);
-
- /* Start at 0xffff */
- outb(0xff, 0x42);
- outb(0xff, 0x42);
-
- if (pit_expect_msb(0xff)) {
- int i;
- u64 t1, t2, delta;
- unsigned char expect = 0xfe;
-
- t1 = get_cycles();
- for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) {
- if (!pit_expect_msb(expect))
- goto failed;
- }
- t2 = get_cycles();
-
- /*
- * Make sure we can rely on the second TSC timestamp:
- */
- if (!pit_expect_msb(expect))
- goto failed;
-
- /*
- * Ok, if we get here, then we've seen the
- * MSB of the PIT decrement QUICK_PIT_ITERATIONS
- * times, and each MSB had many hits, so we never
- * had any sudden jumps.
- *
- * As a result, we can depend on there not being
- * any odd delays anywhere, and the TSC reads are
- * reliable.
- *
- * kHz = ticks / time-in-seconds / 1000;
- * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000
- * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000)
- */
- delta = (t2 - t1)*PIT_TICK_RATE;
- do_div(delta, QUICK_PIT_ITERATIONS*256*1000);
- printk("Fast TSC calibration using PIT\n");
- return delta;
- }
-failed:
- return 0;
-}
/**
* native_calibrate_tsc - calibrate the tsc on boot
*/
unsigned long native_calibrate_tsc(void)
{
- u64 tsc1, tsc2, delta, ref1, ref2;
+ u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2;
unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
- unsigned long flags, latch, ms, fast_calibrate;
- int hpet = is_hpet_enabled(), i, loopmin;
-
- local_irq_save(flags);
- fast_calibrate = quick_pit_calibrate();
- local_irq_restore(flags);
- if (fast_calibrate)
- return fast_calibrate;
+ unsigned long flags;
+ int hpet = is_hpet_enabled(), i;
/*
* Run 5 calibration loops to get the lowest frequency value
@@ -385,13 +216,7 @@ unsigned long native_calibrate_tsc(void)
* calibration delay loop as we have to wait for a certain
* amount of time anyway.
*/
-
- /* Preset PIT loop values */
- latch = CAL_LATCH;
- ms = CAL_MS;
- loopmin = CAL_PIT_LOOPS;
-
- for (i = 0; i < 3; i++) {
+ for (i = 0; i < 5; i++) {
unsigned long tsc_pit_khz;
/*
@@ -401,16 +226,16 @@ unsigned long native_calibrate_tsc(void)
* read the end value.
*/
local_irq_save(flags);
- tsc1 = tsc_read_refs(&ref1, hpet);
- tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
- tsc2 = tsc_read_refs(&ref2, hpet);
+ tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
+ tsc_pit_khz = pit_calibrate_tsc();
+ tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
local_irq_restore(flags);
/* Pick the lowest PIT TSC calibration so far */
tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
/* hpet or pmtimer available ? */
- if (!hpet && !ref1 && !ref2)
+ if (!hpet && !pm1 && !pm2)
continue;
/* Check, whether the sampling was disturbed by an SMI */
@@ -418,41 +243,23 @@ unsigned long native_calibrate_tsc(void)
continue;
tsc2 = (tsc2 - tsc1) * 1000000LL;
- if (hpet)
- tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
- else
- tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
- tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
-
- /* Check the reference deviation */
- delta = ((u64) tsc_pit_min) * 100;
- do_div(delta, tsc_ref_min);
-
- /*
- * If both calibration results are inside a 10% window
- * then we can be sure, that the calibration
- * succeeded. We break out of the loop right away. We
- * use the reference value, as it is more precise.
- */
- if (delta >= 90 && delta <= 110) {
- printk(KERN_INFO
- "TSC: PIT calibration matches %s. %d loops\n",
- hpet ? "HPET" : "PMTIMER", i + 1);
- return tsc_ref_min;
+ if (hpet) {
+ if (hpet2 < hpet1)
+ hpet2 += 0x100000000ULL;
+ hpet2 -= hpet1;
+ tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
+ do_div(tsc1, 1000000);
+ } else {
+ if (pm2 < pm1)
+ pm2 += (u64)ACPI_PM_OVRRUN;
+ pm2 -= pm1;
+ tsc1 = pm2 * 1000000000LL;
+ do_div(tsc1, PMTMR_TICKS_PER_SEC);
}
- /*
- * Check whether PIT failed more than once. This
- * happens in virtualized environments. We need to
- * give the virtual PC a slightly longer timeframe for
- * the HPET/PMTIMER to make the result precise.
- */
- if (i == 1 && tsc_pit_min == ULONG_MAX) {
- latch = CAL2_LATCH;
- ms = CAL2_MS;
- loopmin = CAL2_PIT_LOOPS;
- }
+ do_div(tsc2, tsc1);
+ tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
}
/*
@@ -463,7 +270,7 @@ unsigned long native_calibrate_tsc(void)
printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n");
/* We don't have an alternative source, disable TSC */
- if (!hpet && !ref1 && !ref2) {
+ if (!hpet && !pm1 && !pm2) {
printk("TSC: No reference (HPET/PMTIMER) available\n");
return 0;
}
@@ -471,7 +278,7 @@ unsigned long native_calibrate_tsc(void)
/* The alternative source failed as well, disable TSC */
if (tsc_ref_min == ULONG_MAX) {
printk(KERN_WARNING "TSC: HPET/PMTIMER calibration "
- "failed.\n");
+ "failed due to SMI disturbance.\n");
return 0;
}
@@ -483,25 +290,44 @@ unsigned long native_calibrate_tsc(void)
}
/* We don't have an alternative source, use the PIT calibration value */
- if (!hpet && !ref1 && !ref2) {
+ if (!hpet && !pm1 && !pm2) {
printk(KERN_INFO "TSC: Using PIT calibration value\n");
return tsc_pit_min;
}
/* The alternative source failed, use the PIT calibration value */
if (tsc_ref_min == ULONG_MAX) {
- printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. "
- "Using PIT calibration\n");
+ printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due "
+ "to SMI disturbance. Using PIT calibration\n");
return tsc_pit_min;
}
+ /* Check the reference deviation */
+ delta = ((u64) tsc_pit_min) * 100;
+ do_div(delta, tsc_ref_min);
+
/*
- * The calibration values differ too much. In doubt, we use
- * the PIT value as we know that there are PMTIMERs around
- * running at double speed. At least we let the user know:
+ * If both calibration results are inside a 5% window, the we
+ * use the lower frequency of those as it is probably the
+ * closest estimate.
*/
+ if (delta >= 95 && delta <= 105) {
+ printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n",
+ hpet ? "HPET" : "PMTIMER");
+ printk(KERN_INFO "TSC: using %s calibration value\n",
+ tsc_pit_min <= tsc_ref_min ? "PIT" :
+ hpet ? "HPET" : "PMTIMER");
+ return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min;
+ }
+
printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
+
+ /*
+ * The calibration values differ too much. In doubt, we use
+ * the PIT value as we know that there are PMTIMERs around
+ * running at double speed.
+ */
printk(KERN_INFO "TSC: Using PIT calibration value\n");
return tsc_pit_min;
}
diff --git a/trunk/arch/x86/kernel/visws_quirks.c b/trunk/arch/x86/kernel/visws_quirks.c
index 61a97e616f70..594ef47f0a63 100644
--- a/trunk/arch/x86/kernel/visws_quirks.c
+++ b/trunk/arch/x86/kernel/visws_quirks.c
@@ -25,31 +25,45 @@
#include
#include
#include
-#include
#include
#include
#include
#include
+#include
#include
#include
#include "mach_apic.h"
+#include
+#include
+
#include
+#include
+#include
+#include
+#include
#include
#include
+#include
#include
+#include
#include
#include
+#include
#include
#include
extern int no_broadcast;
+#include
#include
+#include
+#include
+#include
char visws_board_type = -1;
char visws_board_rev = -1;
diff --git a/trunk/arch/x86/kernel/vm86_32.c b/trunk/arch/x86/kernel/vm86_32.c
index 4eeb5cf9720d..38f566fa27d2 100644
--- a/trunk/arch/x86/kernel/vm86_32.c
+++ b/trunk/arch/x86/kernel/vm86_32.c
@@ -46,7 +46,6 @@
#include
#include
#include
-#include
/*
* Known problems:
diff --git a/trunk/arch/x86/kernel/vmi_32.c b/trunk/arch/x86/kernel/vmi_32.c
index 8c9ad02af5a2..edfb09f30479 100644
--- a/trunk/arch/x86/kernel/vmi_32.c
+++ b/trunk/arch/x86/kernel/vmi_32.c
@@ -393,13 +393,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
}
#endif
-static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
+static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn)
{
vmi_set_page_type(pfn, VMI_PAGE_L1);
vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
}
-static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn)
+static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn)
{
/*
* This call comes in very early, before mem_map is setup.
@@ -410,20 +410,20 @@ static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn)
vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
}
-static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count)
+static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
{
vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
vmi_check_page_type(clonepfn, VMI_PAGE_L2);
vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
}
-static void vmi_release_pte(unsigned long pfn)
+static void vmi_release_pte(u32 pfn)
{
vmi_ops.release_page(pfn, VMI_PAGE_L1);
vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
}
-static void vmi_release_pmd(unsigned long pfn)
+static void vmi_release_pmd(u32 pfn)
{
vmi_ops.release_page(pfn, VMI_PAGE_L2);
vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
diff --git a/trunk/arch/x86/lib/msr-on-cpu.c b/trunk/arch/x86/lib/msr-on-cpu.c
index 321cf720dbb6..01b868ba82f8 100644
--- a/trunk/arch/x86/lib/msr-on-cpu.c
+++ b/trunk/arch/x86/lib/msr-on-cpu.c
@@ -16,46 +16,37 @@ static void __rdmsr_on_cpu(void *info)
rdmsr(rv->msr_no, rv->l, rv->h);
}
-static void __wrmsr_on_cpu(void *info)
+static void __rdmsr_safe_on_cpu(void *info)
{
struct msr_info *rv = info;
- wrmsr(rv->msr_no, rv->l, rv->h);
+ rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h);
}
-int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe)
{
- int err;
+ int err = 0;
struct msr_info rv;
rv.msr_no = msr_no;
- err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
+ if (safe) {
+ err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu,
+ &rv, 1);
+ err = err ? err : rv.err;
+ } else {
+ err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
+ }
*l = rv.l;
*h = rv.h;
return err;
}
-int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
- int err;
- struct msr_info rv;
-
- rv.msr_no = msr_no;
- rv.l = l;
- rv.h = h;
- err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
-
- return err;
-}
-
-/* These "safe" variants are slower and should be used when the target MSR
- may not actually exist. */
-static void __rdmsr_safe_on_cpu(void *info)
+static void __wrmsr_on_cpu(void *info)
{
struct msr_info *rv = info;
- rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h);
+ wrmsr(rv->msr_no, rv->l, rv->h);
}
static void __wrmsr_safe_on_cpu(void *info)
@@ -65,30 +56,45 @@ static void __wrmsr_safe_on_cpu(void *info)
rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h);
}
-int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe)
{
- int err;
+ int err = 0;
struct msr_info rv;
rv.msr_no = msr_no;
- err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
- *l = rv.l;
- *h = rv.h;
+ rv.l = l;
+ rv.h = h;
+ if (safe) {
+ err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu,
+ &rv, 1);
+ err = err ? err : rv.err;
+ } else {
+ err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
+ }
- return err ? err : rv.err;
+ return err;
}
-int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
{
- int err;
- struct msr_info rv;
+ return _wrmsr_on_cpu(cpu, msr_no, l, h, 0);
+}
- rv.msr_no = msr_no;
- rv.l = l;
- rv.h = h;
- err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
+int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+{
+ return _rdmsr_on_cpu(cpu, msr_no, l, h, 0);
+}
+
+/* These "safe" variants are slower and should be used when the target MSR
+ may not actually exist. */
+int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+ return _wrmsr_on_cpu(cpu, msr_no, l, h, 1);
+}
- return err ? err : rv.err;
+int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+{
+ return _rdmsr_on_cpu(cpu, msr_no, l, h, 1);
}
EXPORT_SYMBOL(rdmsr_on_cpu);
diff --git a/trunk/arch/x86/lib/string_32.c b/trunk/arch/x86/lib/string_32.c
index 82004d2bf05e..94972e7c094d 100644
--- a/trunk/arch/x86/lib/string_32.c
+++ b/trunk/arch/x86/lib/string_32.c
@@ -22,7 +22,7 @@ char *strcpy(char *dest, const char *src)
"testb %%al,%%al\n\t"
"jne 1b"
: "=&S" (d0), "=&D" (d1), "=&a" (d2)
- : "0" (src), "1" (dest) : "memory");
+ :"0" (src), "1" (dest) : "memory");
return dest;
}
EXPORT_SYMBOL(strcpy);
@@ -42,7 +42,7 @@ char *strncpy(char *dest, const char *src, size_t count)
"stosb\n"
"2:"
: "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
- : "0" (src), "1" (dest), "2" (count) : "memory");
+ :"0" (src), "1" (dest), "2" (count) : "memory");
return dest;
}
EXPORT_SYMBOL(strncpy);
@@ -60,7 +60,7 @@ char *strcat(char *dest, const char *src)
"testb %%al,%%al\n\t"
"jne 1b"
: "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
- : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu) : "memory");
+ : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu): "memory");
return dest;
}
EXPORT_SYMBOL(strcat);
@@ -105,9 +105,9 @@ int strcmp(const char *cs, const char *ct)
"2:\tsbbl %%eax,%%eax\n\t"
"orb $1,%%al\n"
"3:"
- : "=a" (res), "=&S" (d0), "=&D" (d1)
- : "1" (cs), "2" (ct)
- : "memory");
+ :"=a" (res), "=&S" (d0), "=&D" (d1)
+ :"1" (cs), "2" (ct)
+ :"memory");
return res;
}
EXPORT_SYMBOL(strcmp);
@@ -130,9 +130,9 @@ int strncmp(const char *cs, const char *ct, size_t count)
"3:\tsbbl %%eax,%%eax\n\t"
"orb $1,%%al\n"
"4:"
- : "=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
- : "1" (cs), "2" (ct), "3" (count)
- : "memory");
+ :"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
+ :"1" (cs), "2" (ct), "3" (count)
+ :"memory");
return res;
}
EXPORT_SYMBOL(strncmp);
@@ -152,9 +152,9 @@ char *strchr(const char *s, int c)
"movl $1,%1\n"
"2:\tmovl %1,%0\n\t"
"decl %0"
- : "=a" (res), "=&S" (d0)
- : "1" (s), "0" (c)
- : "memory");
+ :"=a" (res), "=&S" (d0)
+ :"1" (s), "0" (c)
+ :"memory");
return res;
}
EXPORT_SYMBOL(strchr);
@@ -169,9 +169,9 @@ size_t strlen(const char *s)
"scasb\n\t"
"notl %0\n\t"
"decl %0"
- : "=c" (res), "=&D" (d0)
- : "1" (s), "a" (0), "0" (0xffffffffu)
- : "memory");
+ :"=c" (res), "=&D" (d0)
+ :"1" (s), "a" (0), "0" (0xffffffffu)
+ :"memory");
return res;
}
EXPORT_SYMBOL(strlen);
@@ -189,9 +189,9 @@ void *memchr(const void *cs, int c, size_t count)
"je 1f\n\t"
"movl $1,%0\n"
"1:\tdecl %0"
- : "=D" (res), "=&c" (d0)
- : "a" (c), "0" (cs), "1" (count)
- : "memory");
+ :"=D" (res), "=&c" (d0)
+ :"a" (c), "0" (cs), "1" (count)
+ :"memory");
return res;
}
EXPORT_SYMBOL(memchr);
@@ -228,9 +228,9 @@ size_t strnlen(const char *s, size_t count)
"cmpl $-1,%1\n\t"
"jne 1b\n"
"3:\tsubl %2,%0"
- : "=a" (res), "=&d" (d0)
- : "c" (s), "1" (count)
- : "memory");
+ :"=a" (res), "=&d" (d0)
+ :"c" (s), "1" (count)
+ :"memory");
return res;
}
EXPORT_SYMBOL(strnlen);
diff --git a/trunk/arch/x86/lib/strstr_32.c b/trunk/arch/x86/lib/strstr_32.c
index 8e2d55f754bf..42e8a50303f3 100644
--- a/trunk/arch/x86/lib/strstr_32.c
+++ b/trunk/arch/x86/lib/strstr_32.c
@@ -23,9 +23,9 @@ __asm__ __volatile__(
"jne 1b\n\t"
"xorl %%eax,%%eax\n\t"
"2:"
- : "=a" (__res), "=&c" (d0), "=&S" (d1)
- : "0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
- : "dx", "di");
+ :"=a" (__res), "=&c" (d0), "=&S" (d1)
+ :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
+ :"dx", "di");
return __res;
}
diff --git a/trunk/arch/x86/mach-default/setup.c b/trunk/arch/x86/mach-default/setup.c
index 3f2cf11f201a..3d317836be9e 100644
--- a/trunk/arch/x86/mach-default/setup.c
+++ b/trunk/arch/x86/mach-default/setup.c
@@ -10,15 +10,13 @@
#include
#include
-#include
-
#ifdef CONFIG_HOTPLUG_CPU
#define DEFAULT_SEND_IPI (1)
#else
#define DEFAULT_SEND_IPI (0)
#endif
-int no_broadcast = DEFAULT_SEND_IPI;
+int no_broadcast=DEFAULT_SEND_IPI;
/**
* pre_intr_init_hook - initialisation prior to setting up interrupt vectors
diff --git a/trunk/arch/x86/mach-voyager/voyager_smp.c b/trunk/arch/x86/mach-voyager/voyager_smp.c
index 199a5f4a873c..ee0fba092157 100644
--- a/trunk/arch/x86/mach-voyager/voyager_smp.c
+++ b/trunk/arch/x86/mach-voyager/voyager_smp.c
@@ -448,8 +448,6 @@ static void __init start_secondary(void *unused)
VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid));
- notify_cpu_starting(cpuid);
-
/* enable interrupts */
local_irq_enable();
diff --git a/trunk/arch/x86/mm/discontig_32.c b/trunk/arch/x86/mm/discontig_32.c
index 847c164725f4..62fa440678d8 100644
--- a/trunk/arch/x86/mm/discontig_32.c
+++ b/trunk/arch/x86/mm/discontig_32.c
@@ -328,7 +328,7 @@ void __init initmem_init(unsigned long start_pfn,
get_memcfg_numa();
- kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
+ kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
do {
diff --git a/trunk/arch/x86/mm/dump_pagetables.c b/trunk/arch/x86/mm/dump_pagetables.c
index e7277cbcfb40..a20d1fa64b4e 100644
--- a/trunk/arch/x86/mm/dump_pagetables.c
+++ b/trunk/arch/x86/mm/dump_pagetables.c
@@ -148,8 +148,8 @@ static void note_page(struct seq_file *m, struct pg_state *st,
* we have now. "break" is either changing perms, levels or
* address space marker.
*/
- prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
- cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
+ prot = pgprot_val(new_prot) & ~(PTE_PFN_MASK);
+ cur = pgprot_val(st->current_prot) & ~(PTE_PFN_MASK);
if (!st->level) {
/* First entry */
diff --git a/trunk/arch/x86/mm/fault.c b/trunk/arch/x86/mm/fault.c
index 8f92cac4e6db..455f3fe67b42 100644
--- a/trunk/arch/x86/mm/fault.c
+++ b/trunk/arch/x86/mm/fault.c
@@ -35,7 +35,6 @@
#include
#include
#include
-#include
/*
* Page fault error code bits
@@ -358,6 +357,8 @@ static int is_errata100(struct pt_regs *regs, unsigned long address)
return 0;
}
+void do_invalid_op(struct pt_regs *, unsigned long);
+
static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
{
#ifdef CONFIG_X86_F00F_BUG
diff --git a/trunk/arch/x86/mm/init_32.c b/trunk/arch/x86/mm/init_32.c
index 6b9a9358b330..60ec1d08ff24 100644
--- a/trunk/arch/x86/mm/init_32.c
+++ b/trunk/arch/x86/mm/init_32.c
@@ -47,7 +47,6 @@
#include
#include
#include
-#include