diff --git a/[refs] b/[refs]
index 4cfa370f31a0..4df93be69c15 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
---
-refs/heads/master: 00431707be0cc1236ee08459367872b57da5be29
+refs/heads/master: bf785ee0aeea7a3e717cb1e11df4135b6cbde7da
diff --git a/trunk/Documentation/CodingStyle b/trunk/Documentation/CodingStyle
index ce780ef648f1..ce5d2c038cf5 100644
--- a/trunk/Documentation/CodingStyle
+++ b/trunk/Documentation/CodingStyle
@@ -199,7 +199,7 @@ The rationale is:
modifications are prevented
- saves the compiler work to optimize redundant code away ;)
-int fun(int )
+int fun(int a)
{
int result = 0;
char *buffer = kmalloc(SIZE);
diff --git a/trunk/Documentation/DocBook/.gitignore b/trunk/Documentation/DocBook/.gitignore
new file mode 100644
index 000000000000..c102c02ecf89
--- /dev/null
+++ b/trunk/Documentation/DocBook/.gitignore
@@ -0,0 +1,6 @@
+*.xml
+*.ps
+*.pdf
+*.html
+*.9.gz
+*.9
diff --git a/trunk/Documentation/DocBook/kernel-api.tmpl b/trunk/Documentation/DocBook/kernel-api.tmpl
index 3c47a3f0dc55..8c9c6704e85b 100644
--- a/trunk/Documentation/DocBook/kernel-api.tmpl
+++ b/trunk/Documentation/DocBook/kernel-api.tmpl
@@ -53,6 +53,11 @@
!Iinclude/linux/sched.h
!Ekernel/sched.c
!Ekernel/timer.c
+
+ High-resolution timers
+!Iinclude/linux/ktime.h
+!Iinclude/linux/hrtimer.h
+!Ekernel/hrtimer.c
Internal Functions
!Ikernel/exit.c
diff --git a/trunk/Documentation/applying-patches.txt b/trunk/Documentation/applying-patches.txt
index 05a08c2c1889..a083ba35d1ad 100644
--- a/trunk/Documentation/applying-patches.txt
+++ b/trunk/Documentation/applying-patches.txt
@@ -3,8 +3,7 @@
------------------------------------
Original by: Jesper Juhl, August 2005
- Last update: 2005-12-02
-
+ Last update: 2006-01-05
A frequently asked question on the Linux Kernel Mailing List is how to apply
@@ -77,7 +76,7 @@ instead:
If you wish to uncompress the patch file by hand first before applying it
(what I assume you've done in the examples below), then you simply run
-gunzip or bunzip2 on the file - like this:
+gunzip or bunzip2 on the file -- like this:
gunzip patch-x.y.z.gz
bunzip2 patch-x.y.z.bz2
@@ -95,7 +94,7 @@ Common errors when patching
---
When patch applies a patch file it attempts to verify the sanity of the
file in different ways.
-Checking that the file looks like a valid patch file, checking the code
+Checking that the file looks like a valid patch file & checking the code
around the bits being modified matches the context provided in the patch are
just two of the basic sanity checks patch does.
@@ -122,7 +121,7 @@ outright and leaves a file with a .rej extension (a reject file). You can
read this file to see exactly what change couldn't be applied, so you can
go fix it up by hand if you wish.
-If you don't have any third party patches applied to your kernel source, but
+If you don't have any third-party patches applied to your kernel source, but
only patches from kernel.org and you apply the patches in the correct order,
and have made no modifications yourself to the source files, then you should
never see a fuzz or reject message from patch. If you do see such messages
@@ -137,7 +136,7 @@ If patch stops and presents a "File to patch:" prompt, then patch could not
find a file to be patched. Most likely you forgot to specify -p1 or you are
in the wrong directory. Less often, you'll find patches that need to be
applied with -p0 instead of -p1 (reading the patch file should reveal if
-this is the case - if so, then this is an error by the person who created
+this is the case -- if so, then this is an error by the person who created
the patch but is not fatal).
If you get "Hunk #2 succeeded at 1887 with fuzz 2 (offset 7 lines)." or a
@@ -168,13 +167,17 @@ the patch will in fact apply it.
A message similar to "patch: **** unexpected end of file in patch" or "patch
unexpectedly ends in middle of line" means that patch could make no sense of
-the file you fed to it. Either your download is broken or you tried to feed
-patch a compressed patch file without uncompressing it first.
+the file you fed to it. Either your download is broken, you tried to feed
+patch a compressed patch file without uncompressing it first, or the patch
+file that you are using has been mangled by a mail client or mail transfer
+agent along the way somewhere, e.g., by splitting a long line into two lines.
+Often these warnings can easily be fixed by joining (concatenating) the
+two lines that had been split.
As I already mentioned above, these errors should never happen if you apply
a patch from kernel.org to the correct version of an unmodified source tree.
So if you get these errors with kernel.org patches then you should probably
-assume that either your patch file or your tree is broken and I'd advice you
+assume that either your patch file or your tree is broken and I'd advise you
to start over with a fresh download of a full kernel tree and the patch you
wish to apply.
@@ -200,10 +203,10 @@ do the additional steps since interdiff can get things wrong in some cases.
Another alternative is `ketchup', which is a python script for automatic
downloading and applying of patches (http://www.selenic.com/ketchup/).
- Other nice tools are diffstat which shows a summary of changes made by a
-patch, lsdiff which displays a short listing of affected files in a patch
-file, along with (optionally) the line numbers of the start of each patch
-and grepdiff which displays a list of the files modified by a patch where
+ Other nice tools are diffstat, which shows a summary of changes made by a
+patch; lsdiff, which displays a short listing of affected files in a patch
+file, along with (optionally) the line numbers of the start of each patch;
+and grepdiff, which displays a list of the files modified by a patch where
the patch contains a given regular expression.
@@ -228,8 +231,8 @@ The -mm kernels live at
In place of ftp.kernel.org you can use ftp.cc.kernel.org, where cc is a
country code. This way you'll be downloading from a mirror site that's most
likely geographically closer to you, resulting in faster downloads for you,
-less bandwidth used globally and less load on the main kernel.org servers -
-these are good things, do use mirrors when possible.
+less bandwidth used globally and less load on the main kernel.org servers --
+these are good things, so do use mirrors when possible.
The 2.6.x kernels
@@ -237,14 +240,14 @@ The 2.6.x kernels
These are the base stable releases released by Linus. The highest numbered
release is the most recent.
-If regressions or other serious flaws are found then a -stable fix patch
+If regressions or other serious flaws are found, then a -stable fix patch
will be released (see below) on top of this base. Once a new 2.6.x base
kernel is released, a patch is made available that is a delta between the
previous 2.6.x kernel and the new one.
-To apply a patch moving from 2.6.11 to 2.6.12 you'd do the following (note
+To apply a patch moving from 2.6.11 to 2.6.12, you'd do the following (note
that such patches do *NOT* apply on top of 2.6.x.y kernels but on top of the
-base 2.6.x kernel - if you need to move from 2.6.x.y to 2.6.x+1 you need to
+base 2.6.x kernel -- if you need to move from 2.6.x.y to 2.6.x+1 you need to
first revert the 2.6.x.y patch).
Here are some examples:
@@ -266,7 +269,7 @@ $ mv linux-2.6.11.1 linux-2.6.12 # rename source dir
The 2.6.x.y kernels
---
- Kernels with 4 digit versions are -stable kernels. They contain small(ish)
+ Kernels with 4-digit versions are -stable kernels. They contain small(ish)
critical fixes for security problems or significant regressions discovered
in a given 2.6.x kernel.
@@ -277,9 +280,14 @@ versions.
If no 2.6.x.y kernel is available, then the highest numbered 2.6.x kernel is
the current stable kernel.
+ note: the -stable team usually do make incremental patches available as well
+ as patches against the latest mainline release, but I only cover the
+ non-incremental ones below. The incremental ones can be found at
+ ftp://ftp.kernel.org/pub/linux/kernel/v2.6/incr/
+
These patches are not incremental, meaning that for example the 2.6.12.3
patch does not apply on top of the 2.6.12.2 kernel source, but rather on top
-of the base 2.6.12 kernel source.
+of the base 2.6.12 kernel source .
So, in order to apply the 2.6.12.3 patch to your existing 2.6.12.2 kernel
source you have to first back out the 2.6.12.2 patch (so you are left with a
base 2.6.12 kernel source) and then apply the new 2.6.12.3 patch.
@@ -345,12 +353,12 @@ The -git kernels
repository, hence the name).
These patches are usually released daily and represent the current state of
-Linus' tree. They are more experimental than -rc kernels since they are
+Linus's tree. They are more experimental than -rc kernels since they are
generated automatically without even a cursory glance to see if they are
sane.
-git patches are not incremental and apply either to a base 2.6.x kernel or
-a base 2.6.x-rc kernel - you can see which from their name.
+a base 2.6.x-rc kernel -- you can see which from their name.
A patch named 2.6.12-git1 applies to the 2.6.12 kernel source and a patch
named 2.6.13-rc3-git2 applies to the source of the 2.6.13-rc3 kernel.
@@ -393,12 +401,12 @@ You should generally strive to get your patches into mainline via -mm to
ensure maximum testing.
This branch is in constant flux and contains many experimental features, a
-lot of debugging patches not appropriate for mainline etc and is the most
+lot of debugging patches not appropriate for mainline etc., and is the most
experimental of the branches described in this document.
These kernels are not appropriate for use on systems that are supposed to be
stable and they are more risky to run than any of the other branches (make
-sure you have up-to-date backups - that goes for any experimental kernel but
+sure you have up-to-date backups -- that goes for any experimental kernel but
even more so for -mm kernels).
These kernels in addition to all the other experimental patches they contain
diff --git a/trunk/Documentation/cpusets.txt b/trunk/Documentation/cpusets.txt
index 9e49b1c35729..990998ee10b6 100644
--- a/trunk/Documentation/cpusets.txt
+++ b/trunk/Documentation/cpusets.txt
@@ -135,7 +135,7 @@ Cpusets extends these two mechanisms as follows:
The implementation of cpusets requires a few, simple hooks
into the rest of the kernel, none in performance critical paths:
- - in main/init.c, to initialize the root cpuset at system boot.
+ - in init/main.c, to initialize the root cpuset at system boot.
- in fork and exit, to attach and detach a task from its cpuset.
- in sched_setaffinity, to mask the requested CPUs by what's
allowed in that tasks cpuset.
@@ -146,7 +146,7 @@ into the rest of the kernel, none in performance critical paths:
and related changes in both sched.c and arch/ia64/kernel/domain.c
- in the mbind and set_mempolicy system calls, to mask the requested
Memory Nodes by what's allowed in that tasks cpuset.
- - in page_alloc, to restrict memory to allowed nodes.
+ - in page_alloc.c, to restrict memory to allowed nodes.
- in vmscan.c, to restrict page recovery to the current cpuset.
In addition a new file system, of type "cpuset" may be mounted,
diff --git a/trunk/Documentation/filesystems/ext3.txt b/trunk/Documentation/filesystems/ext3.txt
index 22e4040564d5..afb1335c05d6 100644
--- a/trunk/Documentation/filesystems/ext3.txt
+++ b/trunk/Documentation/filesystems/ext3.txt
@@ -2,11 +2,11 @@
Ext3 Filesystem
===============
-ext3 was originally released in September 1999. Written by Stephen Tweedie
-for 2.2 branch, and ported to 2.4 kernels by Peter Braam, Andreas Dilger,
+Ext3 was originally released in September 1999. Written by Stephen Tweedie
+for the 2.2 branch, and ported to 2.4 kernels by Peter Braam, Andreas Dilger,
Andrew Morton, Alexander Viro, Ted Ts'o and Stephen Tweedie.
-ext3 is ext2 filesystem enhanced with journalling capabilities.
+Ext3 is the ext2 filesystem enhanced with journalling capabilities.
Options
=======
@@ -14,81 +14,81 @@ Options
When mounting an ext3 filesystem, the following option are accepted:
(*) == default
-jounal=update Update the ext3 file system's journal to the
- current format.
+journal=update Update the ext3 file system's journal to the current
+ format.
-journal=inum When a journal already exists, this option is
- ignored. Otherwise, it specifies the number of
- the inode which will represent the ext3 file
- system's journal file.
+journal=inum When a journal already exists, this option is ignored.
+ Otherwise, it specifies the number of the inode which
+ will represent the ext3 file system's journal file.
journal_dev=devnum When the external journal device's major/minor numbers
- have changed, this option allows to specify the new
- journal location. The journal device is identified
- through its new major/minor numbers encoded in devnum.
+ have changed, this option allows the user to specify
+ the new journal location. The journal device is
+ identified through its new major/minor numbers encoded
+ in devnum.
noload Don't load the journal on mounting.
-data=journal All data are committed into the journal prior
- to being written into the main file system.
+data=journal All data are committed into the journal prior to being
+ written into the main file system.
data=ordered (*) All data are forced directly out to the main file
- system prior to its metadata being committed to
- the journal.
+ system prior to its metadata being committed to the
+ journal.
-data=writeback Data ordering is not preserved, data may be
- written into the main file system after its
- metadata has been committed to the journal.
+data=writeback Data ordering is not preserved, data may be written
+ into the main file system after its metadata has been
+ committed to the journal.
commit=nrsec (*) Ext3 can be told to sync all its data and metadata
every 'nrsec' seconds. The default value is 5 seconds.
- This means that if you lose your power, you will lose,
- as much, the latest 5 seconds of work (your filesystem
- will not be damaged though, thanks to journaling). This
- default value (or any low value) will hurt performance,
- but it's good for data-safety. Setting it to 0 will
- have the same effect than leaving the default 5 sec.
+ This means that if you lose your power, you will lose
+ as much as the latest 5 seconds of work (your
+ filesystem will not be damaged though, thanks to the
+ journaling). This default value (or any low value)
+ will hurt performance, but it's good for data-safety.
+ Setting it to 0 will have the same effect as leaving
+ it at the default (5 seconds).
Setting it to very large values will improve
performance.
-barrier=1 This enables/disables barriers. barrier=0 disables it,
- barrier=1 enables it.
+barrier=1 This enables/disables barriers. barrier=0 disables
+ it, barrier=1 enables it.
-orlov (*) This enables the new Orlov block allocator. It's enabled
- by default.
+orlov (*) This enables the new Orlov block allocator. It is
+ enabled by default.
-oldalloc This disables the Orlov block allocator and enables the
- old block allocator. Orlov should have better performance,
- we'd like to get some feedback if it's the contrary for
- you.
+oldalloc This disables the Orlov block allocator and enables
+ the old block allocator. Orlov should have better
+ performance - we'd like to get some feedback if it's
+ the contrary for you.
-user_xattr Enables Extended User Attributes. Additionally, you need
- to have extended attribute support enabled in the kernel
- configuration (CONFIG_EXT3_FS_XATTR). See the attr(5)
- manual page and http://acl.bestbits.at to learn more
- about extended attributes.
+user_xattr Enables Extended User Attributes. Additionally, you
+ need to have extended attribute support enabled in the
+ kernel configuration (CONFIG_EXT3_FS_XATTR). See the
+ attr(5) manual page and http://acl.bestbits.at/ to
+ learn more about extended attributes.
nouser_xattr Disables Extended User Attributes.
-acl Enables POSIX Access Control Lists support. Additionally,
- you need to have ACL support enabled in the kernel
- configuration (CONFIG_EXT3_FS_POSIX_ACL). See the acl(5)
- manual page and http://acl.bestbits.at for more
- information.
+acl Enables POSIX Access Control Lists support.
+ Additionally, you need to have ACL support enabled in
+ the kernel configuration (CONFIG_EXT3_FS_POSIX_ACL).
+ See the acl(5) manual page and http://acl.bestbits.at/
+ for more information.
-noacl This option disables POSIX Access Control List support.
+noacl This option disables POSIX Access Control List
+ support.
reservation
noreservation
-resize=
-
bsddf (*) Make 'df' act like BSD.
minixdf Make 'df' act like Minix.
check=none Don't do extra checking of bitmaps on mount.
-nocheck
+nocheck
debug Extra debugging information is sent to syslog.
@@ -97,7 +97,7 @@ errors=continue Keep going on a filesystem error.
errors=panic Panic and halt the machine if an error occurs.
grpid Give objects the same group ID as their creator.
-bsdgroups
+bsdgroups
nogrpid (*) New objects have the group ID of their creator.
sysvgroups
@@ -108,81 +108,83 @@ resuid=n The user ID which may use the reserved blocks.
sb=n Use alternate superblock at this location.
-quota Quota options are currently silently ignored.
-noquota (see fs/ext3/super.c, line 594)
+quota
+noquota
grpquota
usrquota
Specification
=============
-ext3 shares all disk implementation with ext2 filesystem, and add
-transactions capabilities to ext2. Journaling is done by the
-Journaling block device layer.
+Ext3 shares all disk implementation with the ext2 filesystem, and adds
+transactions capabilities to ext2. Journaling is done by the Journaling Block
+Device layer.
Journaling Block Device layer
-----------------------------
-The Journaling Block Device layer (JBD) isn't ext3 specific. It was
-design to add journaling capabilities on a block device. The ext3
-filesystem code will inform the JBD of modifications it is performing
-(Call a transaction). the journal support the transactions start and
-stop, and in case of crash, the journal can replayed the transactions
-to put the partition on a consistent state fastly.
+The Journaling Block Device layer (JBD) isn't ext3 specific. It was design to
+add journaling capabilities on a block device. The ext3 filesystem code will
+inform the JBD of modifications it is performing (called a transaction). The
+journal supports the transactions start and stop, and in case of crash, the
+journal can replayed the transactions to put the partition back in a
+consistent state fast.
-handles represent a single atomic update to a filesystem. JBD can
-handle external journal on a block device.
+Handles represent a single atomic update to a filesystem. JBD can handle an
+external journal on a block device.
Data Mode
---------
-There's 3 different data modes:
+There are 3 different data modes:
* writeback mode
-In data=writeback mode, ext3 does not journal data at all. This mode
-provides a similar level of journaling as XFS, JFS, and ReiserFS in its
-default mode - metadata journaling. A crash+recovery can cause
-incorrect data to appear in files which were written shortly before the
-crash. This mode will typically provide the best ext3 performance.
+In data=writeback mode, ext3 does not journal data at all. This mode provides
+a similar level of journaling as that of XFS, JFS, and ReiserFS in its default
+mode - metadata journaling. A crash+recovery can cause incorrect data to
+appear in files which were written shortly before the crash. This mode will
+typically provide the best ext3 performance.
* ordered mode
-In data=ordered mode, ext3 only officially journals metadata, but it
-logically groups metadata and data blocks into a single unit called a
-transaction. When it's time to write the new metadata out to disk, the
-associated data blocks are written first. In general, this mode
-perform slightly slower than writeback but significantly faster than
-journal mode.
+In data=ordered mode, ext3 only officially journals metadata, but it logically
+groups metadata and data blocks into a single unit called a transaction. When
+it's time to write the new metadata out to disk, the associated data blocks
+are written first. In general, this mode performs slightly slower than
+writeback but significantly faster than journal mode.
* journal mode
-data=journal mode provides full data and metadata journaling. All new
-data is written to the journal first, and then to its final location.
-In the event of a crash, the journal can be replayed, bringing both
-data and metadata into a consistent state. This mode is the slowest
-except when data needs to be read from and written to disk at the same
-time where it outperform all others mode.
+data=journal mode provides full data and metadata journaling. All new data is
+written to the journal first, and then to its final location.
+In the event of a crash, the journal can be replayed, bringing both data and
+metadata into a consistent state. This mode is the slowest except when data
+needs to be read from and written to disk at the same time where it
+outperforms all others modes.
Compatibility
-------------
Ext2 partitions can be easily convert to ext3, with `tune2fs -j `.
-Ext3 is fully compatible with Ext2. Ext3 partitions can easily be
-mounted as Ext2.
+Ext3 is fully compatible with Ext2. Ext3 partitions can easily be mounted as
+Ext2.
+
External Tools
==============
-see manual pages to know more.
+See manual pages to learn more.
+
+tune2fs: create a ext3 journal on a ext2 partition with the -j flag.
+mke2fs: create a ext3 partition with the -j flag.
+debugfs: ext2 and ext3 file system debugger.
+ext2online: online (mounted) ext2 and ext3 filesystem resizer
-tune2fs: create a ext3 journal on a ext2 partition with the -j flags
-mke2fs: create a ext3 partition with the -j flags
-debugfs: ext2 and ext3 file system debugger
References
==========
-kernel source: file:/usr/src/linux/fs/ext3
- file:/usr/src/linux/fs/jbd
+kernel source:
+
-programs: http://e2fsprogs.sourceforge.net
+programs: http://e2fsprogs.sourceforge.net/
+ http://ext2resize.sourceforge.net
-useful link:
- http://www.zip.com.au/~akpm/linux/ext3/ext3-usage.html
+useful links: http://www.zip.com.au/~akpm/linux/ext3/ext3-usage.html
http://www-106.ibm.com/developerworks/linux/library/l-fs7/
http://www-106.ibm.com/developerworks/linux/library/l-fs8/
diff --git a/trunk/Documentation/hrtimers.txt b/trunk/Documentation/hrtimers.txt
new file mode 100644
index 000000000000..7620ff735faf
--- /dev/null
+++ b/trunk/Documentation/hrtimers.txt
@@ -0,0 +1,178 @@
+
+hrtimers - subsystem for high-resolution kernel timers
+----------------------------------------------------
+
+This patch introduces a new subsystem for high-resolution kernel timers.
+
+One might ask the question: we already have a timer subsystem
+(kernel/timers.c), why do we need two timer subsystems? After a lot of
+back and forth trying to integrate high-resolution and high-precision
+features into the existing timer framework, and after testing various
+such high-resolution timer implementations in practice, we came to the
+conclusion that the timer wheel code is fundamentally not suitable for
+such an approach. We initially didnt believe this ('there must be a way
+to solve this'), and spent a considerable effort trying to integrate
+things into the timer wheel, but we failed. In hindsight, there are
+several reasons why such integration is hard/impossible:
+
+- the forced handling of low-resolution and high-resolution timers in
+ the same way leads to a lot of compromises, macro magic and #ifdef
+ mess. The timers.c code is very "tightly coded" around jiffies and
+ 32-bitness assumptions, and has been honed and micro-optimized for a
+ relatively narrow use case (jiffies in a relatively narrow HZ range)
+ for many years - and thus even small extensions to it easily break
+ the wheel concept, leading to even worse compromises. The timer wheel
+ code is very good and tight code, there's zero problems with it in its
+ current usage - but it is simply not suitable to be extended for
+ high-res timers.
+
+- the unpredictable [O(N)] overhead of cascading leads to delays which
+ necessiate a more complex handling of high resolution timers, which
+ in turn decreases robustness. Such a design still led to rather large
+ timing inaccuracies. Cascading is a fundamental property of the timer
+ wheel concept, it cannot be 'designed out' without unevitably
+ degrading other portions of the timers.c code in an unacceptable way.
+
+- the implementation of the current posix-timer subsystem on top of
+ the timer wheel has already introduced a quite complex handling of
+ the required readjusting of absolute CLOCK_REALTIME timers at
+ settimeofday or NTP time - further underlying our experience by
+ example: that the timer wheel data structure is too rigid for high-res
+ timers.
+
+- the timer wheel code is most optimal for use cases which can be
+ identified as "timeouts". Such timeouts are usually set up to cover
+ error conditions in various I/O paths, such as networking and block
+ I/O. The vast majority of those timers never expire and are rarely
+ recascaded because the expected correct event arrives in time so they
+ can be removed from the timer wheel before any further processing of
+ them becomes necessary. Thus the users of these timeouts can accept
+ the granularity and precision tradeoffs of the timer wheel, and
+ largely expect the timer subsystem to have near-zero overhead.
+ Accurate timing for them is not a core purpose - in fact most of the
+ timeout values used are ad-hoc. For them it is at most a necessary
+ evil to guarantee the processing of actual timeout completions
+ (because most of the timeouts are deleted before completion), which
+ should thus be as cheap and unintrusive as possible.
+
+The primary users of precision timers are user-space applications that
+utilize nanosleep, posix-timers and itimer interfaces. Also, in-kernel
+users like drivers and subsystems which require precise timed events
+(e.g. multimedia) can benefit from the availability of a seperate
+high-resolution timer subsystem as well.
+
+While this subsystem does not offer high-resolution clock sources just
+yet, the hrtimer subsystem can be easily extended with high-resolution
+clock capabilities, and patches for that exist and are maturing quickly.
+The increasing demand for realtime and multimedia applications along
+with other potential users for precise timers gives another reason to
+separate the "timeout" and "precise timer" subsystems.
+
+Another potential benefit is that such a seperation allows even more
+special-purpose optimization of the existing timer wheel for the low
+resolution and low precision use cases - once the precision-sensitive
+APIs are separated from the timer wheel and are migrated over to
+hrtimers. E.g. we could decrease the frequency of the timeout subsystem
+from 250 Hz to 100 HZ (or even smaller).
+
+hrtimer subsystem implementation details
+----------------------------------------
+
+the basic design considerations were:
+
+- simplicity
+
+- data structure not bound to jiffies or any other granularity. All the
+ kernel logic works at 64-bit nanoseconds resolution - no compromises.
+
+- simplification of existing, timing related kernel code
+
+another basic requirement was the immediate enqueueing and ordering of
+timers at activation time. After looking at several possible solutions
+such as radix trees and hashes, we chose the red black tree as the basic
+data structure. Rbtrees are available as a library in the kernel and are
+used in various performance-critical areas of e.g. memory management and
+file systems. The rbtree is solely used for time sorted ordering, while
+a separate list is used to give the expiry code fast access to the
+queued timers, without having to walk the rbtree.
+
+(This seperate list is also useful for later when we'll introduce
+high-resolution clocks, where we need seperate pending and expired
+queues while keeping the time-order intact.)
+
+Time-ordered enqueueing is not purely for the purposes of
+high-resolution clocks though, it also simplifies the handling of
+absolute timers based on a low-resolution CLOCK_REALTIME. The existing
+implementation needed to keep an extra list of all armed absolute
+CLOCK_REALTIME timers along with complex locking. In case of
+settimeofday and NTP, all the timers (!) had to be dequeued, the
+time-changing code had to fix them up one by one, and all of them had to
+be enqueued again. The time-ordered enqueueing and the storage of the
+expiry time in absolute time units removes all this complex and poorly
+scaling code from the posix-timer implementation - the clock can simply
+be set without having to touch the rbtree. This also makes the handling
+of posix-timers simpler in general.
+
+The locking and per-CPU behavior of hrtimers was mostly taken from the
+existing timer wheel code, as it is mature and well suited. Sharing code
+was not really a win, due to the different data structures. Also, the
+hrtimer functions now have clearer behavior and clearer names - such as
+hrtimer_try_to_cancel() and hrtimer_cancel() [which are roughly
+equivalent to del_timer() and del_timer_sync()] - so there's no direct
+1:1 mapping between them on the algorithmical level, and thus no real
+potential for code sharing either.
+
+Basic data types: every time value, absolute or relative, is in a
+special nanosecond-resolution type: ktime_t. The kernel-internal
+representation of ktime_t values and operations is implemented via
+macros and inline functions, and can be switched between a "hybrid
+union" type and a plain "scalar" 64bit nanoseconds representation (at
+compile time). The hybrid union type optimizes time conversions on 32bit
+CPUs. This build-time-selectable ktime_t storage format was implemented
+to avoid the performance impact of 64-bit multiplications and divisions
+on 32bit CPUs. Such operations are frequently necessary to convert
+between the storage formats provided by kernel and userspace interfaces
+and the internal time format. (See include/linux/ktime.h for further
+details.)
+
+hrtimers - rounding of timer values
+-----------------------------------
+
+the hrtimer code will round timer events to lower-resolution clocks
+because it has to. Otherwise it will do no artificial rounding at all.
+
+one question is, what resolution value should be returned to the user by
+the clock_getres() interface. This will return whatever real resolution
+a given clock has - be it low-res, high-res, or artificially-low-res.
+
+hrtimers - testing and verification
+----------------------------------
+
+We used the high-resolution clock subsystem ontop of hrtimers to verify
+the hrtimer implementation details in praxis, and we also ran the posix
+timer tests in order to ensure specification compliance. We also ran
+tests on low-resolution clocks.
+
+The hrtimer patch converts the following kernel functionality to use
+hrtimers:
+
+ - nanosleep
+ - itimers
+ - posix-timers
+
+The conversion of nanosleep and posix-timers enabled the unification of
+nanosleep and clock_nanosleep.
+
+The code was successfully compiled for the following platforms:
+
+ i386, x86_64, ARM, PPC, PPC64, IA64
+
+The code was run-tested on the following platforms:
+
+ i386(UP/SMP), x86_64(UP/SMP), ARM, PPC
+
+hrtimers were also integrated into the -rt tree, along with a
+hrtimers-based high-resolution clock implementation, so the hrtimers
+code got a healthy amount of testing and use in practice.
+
+ Thomas Gleixner, Ingo Molnar
diff --git a/trunk/Documentation/kbuild/makefiles.txt b/trunk/Documentation/kbuild/makefiles.txt
index d802ce88bedc..443230b43e09 100644
--- a/trunk/Documentation/kbuild/makefiles.txt
+++ b/trunk/Documentation/kbuild/makefiles.txt
@@ -1033,9 +1033,9 @@ When kbuild executes the following steps are followed (roughly):
Example:
#arch/i386/Makefile
- GCC_VERSION := $(call cc-version)
cflags-y += $(shell \
- if [ $(GCC_VERSION) -ge 0300 ] ; then echo "-mregparm=3"; fi ;)
+ if [ $(call cc-version) -ge 0300 ] ; then \
+ echo "-mregparm=3"; fi ;)
In the above example -mregparm=3 is only used for gcc version greater
than or equal to gcc 3.0.
diff --git a/trunk/Documentation/kdump/gdbmacros.txt b/trunk/Documentation/kdump/gdbmacros.txt
index bc1b9eb92ae1..dcf5580380ab 100644
--- a/trunk/Documentation/kdump/gdbmacros.txt
+++ b/trunk/Documentation/kdump/gdbmacros.txt
@@ -177,3 +177,25 @@ document trapinfo
'trapinfo ' will tell you by which trap & possibly
addresthe kernel paniced.
end
+
+
+define dmesg
+ set $i = 0
+ set $end_idx = (log_end - 1) & (log_buf_len - 1)
+
+ while ($i < logged_chars)
+ set $idx = (log_end - 1 - logged_chars + $i) & (log_buf_len - 1)
+
+ if ($idx + 100 <= $end_idx) || \
+ ($end_idx <= $idx && $idx + 100 < log_buf_len)
+ printf "%.100s", &log_buf[$idx]
+ set $i = $i + 100
+ else
+ printf "%c", log_buf[$idx]
+ set $i = $i + 1
+ end
+ end
+end
+document dmesg
+ print the kernel ring buffer
+end
diff --git a/trunk/Documentation/kdump/kdump.txt b/trunk/Documentation/kdump/kdump.txt
index 5f08f9ce6046..212cf3c21abf 100644
--- a/trunk/Documentation/kdump/kdump.txt
+++ b/trunk/Documentation/kdump/kdump.txt
@@ -4,10 +4,10 @@ Documentation for kdump - the kexec-based crash dumping solution
DESIGN
======
-Kdump uses kexec to reboot to a second kernel whenever a dump needs to be taken.
-This second kernel is booted with very little memory. The first kernel reserves
-the section of memory that the second kernel uses. This ensures that on-going
-DMA from the first kernel does not corrupt the second kernel.
+Kdump uses kexec to reboot to a second kernel whenever a dump needs to be
+taken. This second kernel is booted with very little memory. The first kernel
+reserves the section of memory that the second kernel uses. This ensures that
+on-going DMA from the first kernel does not corrupt the second kernel.
All the necessary information about Core image is encoded in ELF format and
stored in reserved area of memory before crash. Physical address of start of
@@ -35,77 +35,82 @@ In the second kernel, "old memory" can be accessed in two ways.
SETUP
=====
-1) Download http://www.xmission.com/~ebiederm/files/kexec/kexec-tools-1.101.tar.gz
- and apply http://lse.sourceforge.net/kdump/patches/kexec-tools-1.101-kdump.patch
- and after that build the source.
+1) Download the upstream kexec-tools userspace package from
+ http://www.xmission.com/~ebiederm/files/kexec/kexec-tools-1.101.tar.gz.
-2) Download and build the appropriate (2.6.13-rc1 onwards) vanilla kernel.
+ Apply the latest consolidated kdump patch on top of kexec-tools-1.101
+ from http://lse.sourceforge.net/kdump/. This arrangment has been made
+ till all the userspace patches supporting kdump are integrated with
+ upstream kexec-tools userspace.
+2) Download and build the appropriate (2.6.13-rc1 onwards) vanilla kernels.
Two kernels need to be built in order to get this feature working.
+ Following are the steps to properly configure the two kernels specific
+ to kexec and kdump features:
- A) First kernel:
+ A) First kernel or regular kernel:
+ ----------------------------------
a) Enable "kexec system call" feature (in Processor type and features).
- CONFIG_KEXEC=y
- b) This kernel's physical load address should be the default value of
- 0x100000 (0x100000, 1 MB) (in Processor type and features).
- CONFIG_PHYSICAL_START=0x100000
- c) Enable "sysfs file system support" (in Pseudo filesystems).
- CONFIG_SYSFS=y
+ CONFIG_KEXEC=y
+ b) Enable "sysfs file system support" (in Pseudo filesystems).
+ CONFIG_SYSFS=y
+ c) make
d) Boot into first kernel with the command line parameter "crashkernel=Y@X".
Use appropriate values for X and Y. Y denotes how much memory to reserve
- for the second kernel, and X denotes at what physical address the reserved
- memory section starts. For example: "crashkernel=64M@16M".
-
- B) Second kernel:
- a) Enable "kernel crash dumps" feature (in Processor type and features).
- CONFIG_CRASH_DUMP=y
- b) Specify a suitable value for "Physical address where the kernel is
- loaded" (in Processor type and features). Typically this value
- should be same as X (See option d) above, e.g., 16 MB or 0x1000000.
- CONFIG_PHYSICAL_START=0x1000000
- c) Enable "/proc/vmcore support" (Optional, in Pseudo filesystems).
- CONFIG_PROC_VMCORE=y
- d) Disable SMP support and build a UP kernel (Until it is fixed).
- CONFIG_SMP=n
- e) Enable "Local APIC support on uniprocessors".
- CONFIG_X86_UP_APIC=y
- f) Enable "IO-APIC support on uniprocessors"
- CONFIG_X86_UP_IOAPIC=y
-
- Note: i) Options a) and b) depend upon "Configure standard kernel features
- (for small systems)" (under General setup).
- ii) Option a) also depends on CONFIG_HIGHMEM (under Processor
- type and features).
- iii) Both option a) and b) are under "Processor type and features".
-
-3) Boot into the first kernel. You are now ready to try out kexec-based crash
- dumps.
-
-4) Load the second kernel to be booted using:
+ for the second kernel, and X denotes at what physical address the
+ reserved memory section starts. For example: "crashkernel=64M@16M".
+
+
+ B) Second kernel or dump capture kernel:
+ ---------------------------------------
+ a) For i386 architecture enable Highmem support
+ CONFIG_HIGHMEM=y
+ b) Enable "kernel crash dumps" feature (under "Processor type and features")
+ CONFIG_CRASH_DUMP=y
+ c) Make sure a suitable value for "Physical address where the kernel is
+ loaded" (under "Processor type and features"). By default this value
+ is 0x1000000 (16MB) and it should be same as X (See option d above),
+ e.g., 16 MB or 0x1000000.
+ CONFIG_PHYSICAL_START=0x1000000
+ d) Enable "/proc/vmcore support" (Optional, under "Pseudo filesystems").
+ CONFIG_PROC_VMCORE=y
+
+3) After booting to regular kernel or first kernel, load the second kernel
+ using the following command:
kexec -p --args-linux --elf32-core-headers
- --append="root= init 1 irqpoll"
-
- Note: i) has to be a vmlinux image. bzImage will not work,
- as of now.
- ii) By default ELF headers are stored in ELF64 format. Option
- --elf32-core-headers forces generation of ELF32 headers. gdb can
- not open ELF64 headers on 32 bit systems. So creating ELF32
- headers can come handy for users who have got non-PAE systems and
- hence have memory less than 4GB.
- iii) Specify "irqpoll" as command line parameter. This reduces driver
- initialization failures in second kernel due to shared interrupts.
- iv) needs to be specified in a format corresponding to
- the root device name in the output of mount command.
- v) If you have built the drivers required to mount root file
- system as modules in , then, specify
- --initrd=.
-
-5) System reboots into the second kernel when a panic occurs. A module can be
- written to force the panic or "ALT-SysRq-c" can be used initiate a crash
- dump for testing purposes.
-
-6) Write out the dump file using
+ --append="root= init 1 irqpoll maxcpus=1"
+
+ Notes:
+ ======
+ i) has to be a vmlinux image ie uncompressed elf image.
+ bzImage will not work, as of now.
+ ii) --args-linux has to be speicfied as if kexec it loading an elf image,
+ it needs to know that the arguments supplied are of linux type.
+ iii) By default ELF headers are stored in ELF64 format to support systems
+ with more than 4GB memory. Option --elf32-core-headers forces generation
+ of ELF32 headers. The reason for this option being, as of now gdb can
+ not open vmcore file with ELF64 headers on a 32 bit systems. So ELF32
+ headers can be used if one has non-PAE systems and hence memory less
+ than 4GB.
+ iv) Specify "irqpoll" as command line parameter. This reduces driver
+ initialization failures in second kernel due to shared interrupts.
+ v) needs to be specified in a format corresponding to the root
+ device name in the output of mount command.
+ vi) If you have built the drivers required to mount root file system as
+ modules in , then, specify
+ --initrd=.
+ vii) Specify maxcpus=1 as, if during first kernel run, if panic happens on
+ non-boot cpus, second kernel doesn't seem to be boot up all the cpus.
+ The other option is to always built the second kernel without SMP
+ support ie CONFIG_SMP=n
+
+4) After successfully loading the second kernel as above, if a panic occurs
+ system reboots into the second kernel. A module can be written to force
+ the panic or "ALT-SysRq-c" can be used initiate a crash dump for testing
+ purposes.
+
+5) Once the second kernel has booted, write out the dump file using
cp /proc/vmcore
@@ -119,9 +124,9 @@ SETUP
Entire memory: dd if=/dev/oldmem of=oldmem.001
+
ANALYSIS
========
-
Limited analysis can be done using gdb on the dump file copied out of
/proc/vmcore. Use vmlinux built with -g and run
@@ -132,15 +137,19 @@ work fine.
Note: gdb cannot analyse core files generated in ELF64 format for i386.
+Latest "crash" (crash-4.0-2.18) as available on Dave Anderson's site
+http://people.redhat.com/~anderson/ works well with kdump format.
+
+
TODO
====
-
1) Provide a kernel pages filtering mechanism so that core file size is not
insane on systems having huge memory banks.
-2) Modify "crash" tool to make it recognize this dump.
+2) Relocatable kernel can help in maintaining multiple kernels for crashdump
+ and same kernel as the first kernel can be used to capture the dump.
+
CONTACT
=======
-
Vivek Goyal (vgoyal@in.ibm.com)
Maneesh Soni (maneesh@in.ibm.com)
diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt
index 0dc848bf0b56..fe11fccf7e41 100644
--- a/trunk/Documentation/kernel-parameters.txt
+++ b/trunk/Documentation/kernel-parameters.txt
@@ -475,10 +475,11 @@ running once the system is up.
See Documentation/block/as-iosched.txt and
Documentation/block/deadline-iosched.txt for details.
- elfcorehdr= [IA-32]
+ elfcorehdr= [IA-32, X86_64]
Specifies physical address of start of kernel core
- image elf header.
- See Documentation/kdump.txt for details.
+ image elf header. Generally kexec loader will
+ pass this option to capture kernel.
+ See Documentation/kdump/kdump.txt for details.
enforcing [SELINUX] Set initial enforcing status.
Format: {"0" | "1"}
@@ -832,7 +833,7 @@ running once the system is up.
mem=nopentium [BUGS=IA-32] Disable usage of 4MB pages for kernel
memory.
- memmap=exactmap [KNL,IA-32] Enable setting of an exact
+ memmap=exactmap [KNL,IA-32,X86_64] Enable setting of an exact
E820 memory map, as specified by the user.
Such memmap=exactmap lines can be constructed based on
BIOS output or other requirements. See the memmap=nn@ss
@@ -855,6 +856,49 @@ running once the system is up.
mga= [HW,DRM]
+ migration_cost=
+ [KNL,SMP] debug: override scheduler migration costs
+ Format: ,,...
+ This debugging option can be used to override the
+ default scheduler migration cost matrix. The numbers
+ are indexed by 'CPU domain distance'.
+ E.g. migration_cost=1000,2000,3000 on an SMT NUMA
+ box will set up an intra-core migration cost of
+ 1 msec, an inter-core migration cost of 2 msecs,
+ and an inter-node migration cost of 3 msecs.
+
+ WARNING: using the wrong values here can break
+ scheduler performance, so it's only for scheduler
+ development purposes, not production environments.
+
+ migration_debug=
+ [KNL,SMP] migration cost auto-detect verbosity
+ Format=<0|1|2>
+ If a system's migration matrix reported at bootup
+ seems erroneous then this option can be used to
+ increase verbosity of the detection process.
+ We default to 0 (no extra messages), 1 will print
+ some more information, and 2 will be really
+ verbose (probably only useful if you also have a
+ serial console attached to the system).
+
+ migration_factor=
+ [KNL,SMP] multiply/divide migration costs by a factor
+ Format=
+ This debug option can be used to proportionally
+ increase or decrease the auto-detected migration
+ costs for all entries of the migration matrix.
+ E.g. migration_factor=150 will increase migration
+ costs by 50%. (and thus the scheduler will be less
+ eager migrating cache-hot tasks)
+ migration_factor=80 will decrease migration costs
+ by 20%. (thus the scheduler will be more eager to
+ migrate tasks)
+
+ WARNING: using the wrong values here can break
+ scheduler performance, so it's only for scheduler
+ development purposes, not production environments.
+
mousedev.tap_time=
[MOUSE] Maximum time between finger touching and
leaving touchpad surface for touch to be considered
diff --git a/trunk/Documentation/laptop-mode.txt b/trunk/Documentation/laptop-mode.txt
index dc4e810afdcd..f42e4c089356 100644
--- a/trunk/Documentation/laptop-mode.txt
+++ b/trunk/Documentation/laptop-mode.txt
@@ -3,7 +3,7 @@ How to conserve battery power using laptop-mode
Document Author: Bart Samwel (bart@samwel.tk)
Date created: January 2, 2004
-Last modified: July 10, 2004
+Last modified: December 06, 2004
Introduction
------------
@@ -33,7 +33,7 @@ or anything. Simply install all the files included in this document, and
laptop mode will automatically be started when you're on battery. For
your convenience, a tarball containing an installer can be downloaded at:
-http://www.xs4all.nl/~bsamwel/laptop_mode/tools
+http://www.xs4all.nl/~bsamwel/laptop_mode/tools/
To configure laptop mode, you need to edit the configuration file, which is
located in /etc/default/laptop-mode on Debian-based systems, or in
@@ -912,7 +912,7 @@ void usage()
exit(0);
}
-int main(int ac, char **av)
+int main(int argc, char **argv)
{
int fd;
char *disk = 0;
diff --git a/trunk/Documentation/locks.txt b/trunk/Documentation/locks.txt
index ce1be79edfb8..e3b402ef33bd 100644
--- a/trunk/Documentation/locks.txt
+++ b/trunk/Documentation/locks.txt
@@ -65,20 +65,3 @@ The default is to disallow mandatory locking. The intention is that
mandatory locking only be enabled on a local filesystem as the specific need
arises.
-Until an updated version of mount(8) becomes available you may have to apply
-this patch to the mount sources (based on the version distributed with Rick
-Faith's util-linux-2.5 package):
-
-*** mount.c.orig Sat Jun 8 09:14:31 1996
---- mount.c Sat Jun 8 09:13:02 1996
-***************
-*** 100,105 ****
---- 100,107 ----
- { "noauto", 0, MS_NOAUTO }, /* Can only be mounted explicitly */
- { "user", 0, MS_USER }, /* Allow ordinary user to mount */
- { "nouser", 1, MS_USER }, /* Forbid ordinary user to mount */
-+ { "mand", 0, MS_MANDLOCK }, /* Allow mandatory locks on this FS */
-+ { "nomand", 1, MS_MANDLOCK }, /* Forbid mandatory locks on this FS */
- /* add new options here */
- #ifdef MS_NOSUB
- { "sub", 1, MS_NOSUB }, /* allow submounts */
diff --git a/trunk/Documentation/oops-tracing.txt b/trunk/Documentation/oops-tracing.txt
index 05960f8a748e..2503404ae5c2 100644
--- a/trunk/Documentation/oops-tracing.txt
+++ b/trunk/Documentation/oops-tracing.txt
@@ -41,11 +41,9 @@ the disk is not available then you have three options :-
run a null modem to a second machine and capture the output there
using your favourite communication program. Minicom works well.
-(3) Patch the kernel with one of the crash dump patches. These save
- data to a floppy disk or video rom or a swap partition. None of
- these are standard kernel patches so you have to find and apply
- them yourself. Search kernel archives for kmsgdump, lkcd and
- oops+smram.
+(3) Use Kdump (see Documentation/kdump/kdump.txt),
+ extract the kernel ring buffer from old memory with using dmesg
+ gdbmacro in Documentation/kdump/gdbmacros.txt.
Full Information
diff --git a/trunk/Documentation/stable_kernel_rules.txt b/trunk/Documentation/stable_kernel_rules.txt
index 2c81305090df..e409e5d07486 100644
--- a/trunk/Documentation/stable_kernel_rules.txt
+++ b/trunk/Documentation/stable_kernel_rules.txt
@@ -1,58 +1,56 @@
Everything you ever wanted to know about Linux 2.6 -stable releases.
-Rules on what kind of patches are accepted, and what ones are not, into
-the "-stable" tree:
+Rules on what kind of patches are accepted, and which ones are not, into the
+"-stable" tree:
- It must be obviously correct and tested.
- - It can not bigger than 100 lines, with context.
+ - It can not be bigger than 100 lines, with context.
- It must fix only one thing.
- It must fix a real bug that bothers people (not a, "This could be a
- problem..." type thing.)
+ problem..." type thing).
- It must fix a problem that causes a build error (but not for things
marked CONFIG_BROKEN), an oops, a hang, data corruption, a real
- security issue, or some "oh, that's not good" issue. In short,
- something critical.
- - No "theoretical race condition" issues, unless an explanation of how
- the race can be exploited.
+ security issue, or some "oh, that's not good" issue. In short, something
+ critical.
+ - No "theoretical race condition" issues, unless an explanation of how the
+ race can be exploited is also provided.
- It can not contain any "trivial" fixes in it (spelling changes,
- whitespace cleanups, etc.)
+ whitespace cleanups, etc).
- It must be accepted by the relevant subsystem maintainer.
- - It must follow Documentation/SubmittingPatches rules.
+ - It must follow the Documentation/SubmittingPatches rules.
Procedure for submitting patches to the -stable tree:
- Send the patch, after verifying that it follows the above rules, to
stable@kernel.org.
- - The sender will receive an ack when the patch has been accepted into
- the queue, or a nak if the patch is rejected. This response might
- take a few days, according to the developer's schedules.
- - If accepted, the patch will be added to the -stable queue, for review
- by other developers.
+ - The sender will receive an ACK when the patch has been accepted into the
+ queue, or a NAK if the patch is rejected. This response might take a few
+ days, according to the developer's schedules.
+ - If accepted, the patch will be added to the -stable queue, for review by
+ other developers.
- Security patches should not be sent to this alias, but instead to the
- documented security@kernel.org.
+ documented security@kernel.org address.
Review cycle:
- - When the -stable maintainers decide for a review cycle, the patches
- will be sent to the review committee, and the maintainer of the
- affected area of the patch (unless the submitter is the maintainer of
- the area) and CC: to the linux-kernel mailing list.
- - The review committee has 48 hours in which to ack or nak the patch.
+ - When the -stable maintainers decide for a review cycle, the patches will be
+ sent to the review committee, and the maintainer of the affected area of
+ the patch (unless the submitter is the maintainer of the area) and CC: to
+ the linux-kernel mailing list.
+ - The review committee has 48 hours in which to ACK or NAK the patch.
- If the patch is rejected by a member of the committee, or linux-kernel
- members object to the patch, bringing up issues that the maintainers
- and members did not realize, the patch will be dropped from the
- queue.
- - At the end of the review cycle, the acked patches will be added to
- the latest -stable release, and a new -stable release will happen.
- - Security patches will be accepted into the -stable tree directly from
- the security kernel team, and not go through the normal review cycle.
+ members object to the patch, bringing up issues that the maintainers and
+ members did not realize, the patch will be dropped from the queue.
+ - At the end of the review cycle, the ACKed patches will be added to the
+ latest -stable release, and a new -stable release will happen.
+ - Security patches will be accepted into the -stable tree directly from the
+ security kernel team, and not go through the normal review cycle.
Contact the kernel security team for more details on this procedure.
Review committe:
- - This will be made up of a number of kernel developers who have
- volunteered for this task, and a few that haven't.
-
+ - This is made up of a number of kernel developers who have volunteered for
+ this task, and a few that haven't.
diff --git a/trunk/Documentation/video4linux/CARDLIST.bttv b/trunk/Documentation/video4linux/CARDLIST.bttv
index 74fb085e178b..b72706c58a44 100644
--- a/trunk/Documentation/video4linux/CARDLIST.bttv
+++ b/trunk/Documentation/video4linux/CARDLIST.bttv
@@ -142,3 +142,4 @@
141 -> Asound Skyeye PCTV
142 -> Sabrent TV-FM (bttv version)
143 -> Hauppauge ImpactVCB (bt878) [0070:13eb]
+144 -> MagicTV
diff --git a/trunk/Documentation/video4linux/CARDLIST.cx88 b/trunk/Documentation/video4linux/CARDLIST.cx88
index 34b6e59f2968..56e194f1a0b0 100644
--- a/trunk/Documentation/video4linux/CARDLIST.cx88
+++ b/trunk/Documentation/video4linux/CARDLIST.cx88
@@ -19,7 +19,7 @@
18 -> Hauppauge Nova-T DVB-T [0070:9002,0070:9001]
19 -> Conexant DVB-T reference design [14f1:0187]
20 -> Provideo PV259 [1540:2580]
- 21 -> DViCO FusionHDTV DVB-T Plus [18ac:db10]
+ 21 -> DViCO FusionHDTV DVB-T Plus [18ac:db10,18ac:db11]
22 -> pcHDTV HD3000 HDTV [7063:3000]
23 -> digitalnow DNTV Live! DVB-T [17de:a8a6]
24 -> Hauppauge WinTV 28xxx (Roslyn) models [0070:2801]
diff --git a/trunk/Documentation/x86_64/boot-options.txt b/trunk/Documentation/x86_64/boot-options.txt
index e566affeed7f..72ab9b99b22c 100644
--- a/trunk/Documentation/x86_64/boot-options.txt
+++ b/trunk/Documentation/x86_64/boot-options.txt
@@ -125,7 +125,7 @@ SMP
cpumask=MASK only use cpus with bits set in mask
additional_cpus=NUM Allow NUM more CPUs for hotplug
- (defaults are specified by the BIOS or half the available CPUs)
+ (defaults are specified by the BIOS, see Documentation/x86_64/cpu-hotplug-spec)
NUMA
diff --git a/trunk/Documentation/x86_64/cpu-hotplug-spec b/trunk/Documentation/x86_64/cpu-hotplug-spec
new file mode 100644
index 000000000000..5c0fa345e556
--- /dev/null
+++ b/trunk/Documentation/x86_64/cpu-hotplug-spec
@@ -0,0 +1,21 @@
+Firmware support for CPU hotplug under Linux/x86-64
+---------------------------------------------------
+
+Linux/x86-64 supports CPU hotplug now. For various reasons Linux wants to
+know in advance boot time the maximum number of CPUs that could be plugged
+into the system. ACPI 3.0 currently has no official way to supply
+this information from the firmware to the operating system.
+
+In ACPI each CPU needs an LAPIC object in the MADT table (5.2.11.5 in the
+ACPI 3.0 specification). ACPI already has the concept of disabled LAPIC
+objects by setting the Enabled bit in the LAPIC object to zero.
+
+For CPU hotplug Linux/x86-64 expects now that any possible future hotpluggable
+CPU is already available in the MADT. If the CPU is not available yet
+it should have its LAPIC Enabled bit set to 0. Linux will use the number
+of disabled LAPICs to compute the maximum number of future CPUs.
+
+In the worst case the user can overwrite this choice using a command line
+option (additional_cpus=...), but it is recommended to supply the correct
+number (or a reasonable approximation of it, with erring towards more not less)
+in the MADT to avoid manual configuration.
diff --git a/trunk/Kbuild b/trunk/Kbuild
index 79003918f37f..95d6a00bace0 100644
--- a/trunk/Kbuild
+++ b/trunk/Kbuild
@@ -22,8 +22,6 @@ sed-$(CONFIG_MIPS) := "/^@@@/s///p"
quiet_cmd_offsets = GEN $@
define cmd_offsets
- mkdir -p $(dir $@); \
- cat $< | \
(set -e; \
echo "#ifndef __ASM_OFFSETS_H__"; \
echo "#define __ASM_OFFSETS_H__"; \
@@ -34,7 +32,7 @@ define cmd_offsets
echo " *"; \
echo " */"; \
echo ""; \
- sed -ne $(sed-y); \
+ sed -ne $(sed-y) $<; \
echo ""; \
echo "#endif" ) > $@
endef
@@ -45,5 +43,6 @@ arch/$(ARCH)/kernel/asm-offsets.s: arch/$(ARCH)/kernel/asm-offsets.c FORCE
$(call if_changed_dep,cc_s_c)
$(obj)/$(offsets-file): arch/$(ARCH)/kernel/asm-offsets.s Kbuild
+ $(Q)mkdir -p $(dir $@)
$(call cmd,offsets)
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
index 07420161e669..0db72a36e245 100644
--- a/trunk/MAINTAINERS
+++ b/trunk/MAINTAINERS
@@ -804,6 +804,7 @@ S: Maintained
DOCBOOK FOR DOCUMENTATION
P: Martin Waitz
M: tali@admingilde.org
+T: git http://tali.admingilde.org/git/linux-docbook.git
S: Maintained
DOUBLETALK DRIVER
@@ -2358,13 +2359,6 @@ P: Nicolas Pitre
M: nico@cam.org
S: Maintained
-SNA NETWORK LAYER
-P: Jay Schulist
-M: jschlst@samba.org
-L: linux-sna@turbolinux.com
-W: http://www.linux-sna.org
-S: Supported
-
SOFTWARE RAID (Multiple Disks) SUPPORT
P: Ingo Molnar
M: mingo@redhat.com
@@ -2486,7 +2480,7 @@ P: Paul Mundt
M: lethal@linux-sh.org
P: Kazumoto Kojima
M: kkojima@rr.iij4u.or.jp
-L: linux-sh@m17n.org
+L: linuxsh-dev@lists.sourceforge.net
W: http://www.linux-sh.org
W: http://www.m17n.org/linux-sh/
W: http://www.rr.iij4u.or.jp/~kkojima/linux-sh4.html
diff --git a/trunk/Makefile b/trunk/Makefile
index fb497ea8bc74..deedaf79cdca 100644
--- a/trunk/Makefile
+++ b/trunk/Makefile
@@ -141,24 +141,6 @@ VPATH := $(srctree)
export srctree objtree VPATH TOPDIR
-nullstring :=
-space := $(nullstring) # end of line
-
-# Take the contents of any files called localversion* and the config
-# variable CONFIG_LOCALVERSION and append them to KERNELRELEASE. Be
-# careful not to include files twice if building in the source
-# directory. LOCALVERSION from the command line override all of this
-
-localver := $(objtree)/localversion* $(srctree)/localversion*
-localver := $(sort $(wildcard $(localver)))
-# skip backup files (containing '~')
-localver := $(foreach f, $(localver), $(if $(findstring ~, $(f)),,$(f)))
-
-LOCALVERSION = $(subst $(space),, \
- $(shell cat /dev/null $(localver)) \
- $(patsubst "%",%,$(CONFIG_LOCALVERSION)))
-
-KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)$(LOCALVERSION)
# SUBARCH tells the usermode build what the underlying arch is. That is set
# first, and if a usermode build is happening, the "ARCH=um" on the command
@@ -353,7 +335,10 @@ CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-ffreestanding
AFLAGS := -D__ASSEMBLY__
-export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION LOCALVERSION KERNELRELEASE \
+# Read KERNELRELEASE from .kernelrelease (if it exists)
+KERNELRELEASE = $(shell cat .kernelrelease 2> /dev/null)
+
+export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE \
ARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \
CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE \
HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
@@ -551,26 +536,6 @@ export KBUILD_IMAGE ?= vmlinux
# images. Default is /boot, but you can set it to other values
export INSTALL_PATH ?= /boot
-# If CONFIG_LOCALVERSION_AUTO is set, we automatically perform some tests
-# and try to determine if the current source tree is a release tree, of any sort,
-# or if is a pure development tree.
-#
-# A 'release tree' is any tree with a git TAG associated
-# with it. The primary goal of this is to make it safe for a native
-# git/CVS/SVN user to build a release tree (i.e, 2.6.9) and also to
-# continue developing against the current Linus tree, without having the Linus
-# tree overwrite the 2.6.9 tree when installed.
-#
-# Currently, only git is supported.
-# Other SCMs can edit scripts/setlocalversion and add the appropriate
-# checks as needed.
-
-
-ifdef CONFIG_LOCALVERSION_AUTO
- localversion-auto := $(shell $(PERL) $(srctree)/scripts/setlocalversion $(srctree))
- LOCALVERSION := $(LOCALVERSION)$(localversion-auto)
-endif
-
#
# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory
# relocations required by build roots. This is not defined in the
@@ -782,6 +747,50 @@ $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ;
$(vmlinux-dirs): prepare scripts
$(Q)$(MAKE) $(build)=$@
+# Build the kernel release string
+# The KERNELRELEASE is stored in a file named .kernelrelease
+# to be used when executing for example make install or make modules_install
+#
+# Take the contents of any files called localversion* and the config
+# variable CONFIG_LOCALVERSION and append them to KERNELRELEASE.
+# LOCALVERSION from the command line override all of this
+
+nullstring :=
+space := $(nullstring) # end of line
+
+___localver = $(objtree)/localversion* $(srctree)/localversion*
+__localver = $(sort $(wildcard $(___localver)))
+# skip backup files (containing '~')
+_localver = $(foreach f, $(__localver), $(if $(findstring ~, $(f)),,$(f)))
+
+localver = $(subst $(space),, \
+ $(shell cat /dev/null $(_localver)) \
+ $(patsubst "%",%,$(CONFIG_LOCALVERSION)))
+
+# If CONFIG_LOCALVERSION_AUTO is set scripts/setlocalversion is called
+# and if the SCM is know a tag from the SCM is appended.
+# The appended tag is determinded by the SCM used.
+#
+# Currently, only git is supported.
+# Other SCMs can edit scripts/setlocalversion and add the appropriate
+# checks as needed.
+ifdef CONFIG_LOCALVERSION_AUTO
+ _localver-auto = $(shell $(CONFIG_SHELL) \
+ $(srctree)/scripts/setlocalversion $(srctree))
+ localver-auto = $(LOCALVERSION)$(_localver-auto)
+endif
+
+localver-full = $(localver)$(localver-auto)
+
+# Store (new) KERNELRELASE string in .kernelrelease
+kernelrelease = \
+ $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)$(localver-full)
+.kernelrelease: FORCE
+ $(Q)rm -f .kernelrelease
+ $(Q)echo $(kernelrelease) > .kernelrelease
+ $(Q)echo " Building kernel $(kernelrelease)"
+
+
# Things we need to do before we recursively start building the kernel
# or the modules are listed in "prepare".
# A multi level approach is used. prepareN is processed before prepareN-1.
@@ -798,8 +807,7 @@ $(vmlinux-dirs): prepare scripts
# and if so do:
# 1) Check that make has not been executed in the kernel src $(srctree)
# 2) Create the include2 directory, used for the second asm symlink
-
-prepare3:
+prepare3: .kernelrelease
ifneq ($(KBUILD_SRC),)
@echo ' Using $(srctree) as source for kernel'
$(Q)if [ -f $(srctree)/.config ]; then \
@@ -984,9 +992,9 @@ CLEAN_FILES += vmlinux System.map \
# Directories & files removed with 'make mrproper'
MRPROPER_DIRS += include/config include2
-MRPROPER_FILES += .config .config.old include/asm .version \
+MRPROPER_FILES += .config .config.old include/asm .version .old_version \
include/linux/autoconf.h include/linux/version.h \
- Module.symvers tags TAGS cscope*
+ .kernelrelease Module.symvers tags TAGS cscope*
# clean - Delete most, but leave enough to build external modules
#
@@ -1072,6 +1080,7 @@ help:
@echo ' tags/TAGS - Generate tags file for editors'
@echo ' cscope - Generate cscope index'
@echo ' kernelrelease - Output the release version string'
+ @echo ' kernelversion - Output the version stored in Makefile'
@echo ''
@echo 'Static analysers'
@echo ' buildcheck - List dangling references to vmlinux discarded sections'
@@ -1293,6 +1302,8 @@ checkstack:
kernelrelease:
@echo $(KERNELRELEASE)
+kernelversion:
+ @echo $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
# FIXME Should go into a make.lib or something
# ===========================================================================
diff --git a/trunk/arch/alpha/kernel/alpha_ksyms.c b/trunk/arch/alpha/kernel/alpha_ksyms.c
index f3e98f837784..1898ea79d0e2 100644
--- a/trunk/arch/alpha/kernel/alpha_ksyms.c
+++ b/trunk/arch/alpha/kernel/alpha_ksyms.c
@@ -40,7 +40,6 @@
#include
extern struct hwrpb_struct *hwrpb;
-extern void dump_thread(struct pt_regs *, struct user *);
extern spinlock_t rtc_lock;
/* these are C runtime functions with special calling conventions: */
diff --git a/trunk/arch/alpha/kernel/pci-noop.c b/trunk/arch/alpha/kernel/pci-noop.c
index 9903e3a79102..fff5cf93e816 100644
--- a/trunk/arch/alpha/kernel/pci-noop.c
+++ b/trunk/arch/alpha/kernel/pci-noop.c
@@ -7,6 +7,7 @@
#include
#include
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/alpha/kernel/process.c b/trunk/arch/alpha/kernel/process.c
index abb739b88ed1..9924fd07743a 100644
--- a/trunk/arch/alpha/kernel/process.c
+++ b/trunk/arch/alpha/kernel/process.c
@@ -276,7 +276,7 @@ copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
{
extern void ret_from_fork(void);
- struct thread_info *childti = p->thread_info;
+ struct thread_info *childti = task_thread_info(p);
struct pt_regs * childregs;
struct switch_stack * childstack, *stack;
unsigned long stack_offset, settls;
@@ -285,7 +285,7 @@ copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
if (!(regs->ps & 8))
stack_offset = (PAGE_SIZE-1) & (unsigned long) regs;
childregs = (struct pt_regs *)
- (stack_offset + PAGE_SIZE + (long) childti);
+ (stack_offset + PAGE_SIZE + task_stack_page(p));
*childregs = *regs;
settls = regs->r20;
@@ -428,30 +428,15 @@ dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt, struct thread_info *ti)
int
dump_elf_task(elf_greg_t *dest, struct task_struct *task)
{
- struct thread_info *ti;
- struct pt_regs *pt;
-
- ti = task->thread_info;
- pt = (struct pt_regs *)((unsigned long)ti + 2*PAGE_SIZE) - 1;
-
- dump_elf_thread(dest, pt, ti);
-
+ dump_elf_thread(dest, task_pt_regs(task), task_thread_info(task));
return 1;
}
int
dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task)
{
- struct thread_info *ti;
- struct pt_regs *pt;
- struct switch_stack *sw;
-
- ti = task->thread_info;
- pt = (struct pt_regs *)((unsigned long)ti + 2*PAGE_SIZE) - 1;
- sw = (struct switch_stack *)pt - 1;
-
+ struct switch_stack *sw = (struct switch_stack *)task_pt_regs(task) - 1;
memcpy(dest, sw->fp, 32 * 8);
-
return 1;
}
@@ -492,8 +477,8 @@ do_sys_execve(char __user *ufilename, char __user * __user *argv,
unsigned long
thread_saved_pc(task_t *t)
{
- unsigned long base = (unsigned long)t->thread_info;
- unsigned long fp, sp = t->thread_info->pcb.ksp;
+ unsigned long base = (unsigned long)task_stack_page(t);
+ unsigned long fp, sp = task_thread_info(t)->pcb.ksp;
if (sp > base && sp+6*8 < base + 16*1024) {
fp = ((unsigned long*)sp)[6];
@@ -523,7 +508,7 @@ get_wchan(struct task_struct *p)
pc = thread_saved_pc(p);
if (in_sched_functions(pc)) {
- schedule_frame = ((unsigned long *)p->thread_info->pcb.ksp)[6];
+ schedule_frame = ((unsigned long *)task_thread_info(p)->pcb.ksp)[6];
return ((unsigned long *)schedule_frame)[12];
}
return pc;
diff --git a/trunk/arch/alpha/kernel/ptrace.c b/trunk/arch/alpha/kernel/ptrace.c
index 9969d212e94d..0cd060598f9a 100644
--- a/trunk/arch/alpha/kernel/ptrace.c
+++ b/trunk/arch/alpha/kernel/ptrace.c
@@ -72,6 +72,13 @@ enum {
REG_R0 = 0, REG_F0 = 32, REG_FPCR = 63, REG_PC = 64
};
+#define PT_REG(reg) \
+ (PAGE_SIZE*2 - sizeof(struct pt_regs) + offsetof(struct pt_regs, reg))
+
+#define SW_REG(reg) \
+ (PAGE_SIZE*2 - sizeof(struct pt_regs) - sizeof(struct switch_stack) \
+ + offsetof(struct switch_stack, reg))
+
static int regoff[] = {
PT_REG( r0), PT_REG( r1), PT_REG( r2), PT_REG( r3),
PT_REG( r4), PT_REG( r5), PT_REG( r6), PT_REG( r7),
@@ -103,14 +110,14 @@ get_reg_addr(struct task_struct * task, unsigned long regno)
unsigned long *addr;
if (regno == 30) {
- addr = &task->thread_info->pcb.usp;
+ addr = &task_thread_info(task)->pcb.usp;
} else if (regno == 65) {
- addr = &task->thread_info->pcb.unique;
+ addr = &task_thread_info(task)->pcb.unique;
} else if (regno == 31 || regno > 65) {
zero = 0;
addr = &zero;
} else {
- addr = (void *)task->thread_info + regoff[regno];
+ addr = task_stack_page(task) + regoff[regno];
}
return addr;
}
@@ -125,7 +132,7 @@ get_reg(struct task_struct * task, unsigned long regno)
if (regno == 63) {
unsigned long fpcr = *get_reg_addr(task, regno);
unsigned long swcr
- = task->thread_info->ieee_state & IEEE_SW_MASK;
+ = task_thread_info(task)->ieee_state & IEEE_SW_MASK;
swcr = swcr_update_status(swcr, fpcr);
return fpcr | swcr;
}
@@ -139,8 +146,8 @@ static int
put_reg(struct task_struct *task, unsigned long regno, unsigned long data)
{
if (regno == 63) {
- task->thread_info->ieee_state
- = ((task->thread_info->ieee_state & ~IEEE_SW_MASK)
+ task_thread_info(task)->ieee_state
+ = ((task_thread_info(task)->ieee_state & ~IEEE_SW_MASK)
| (data & IEEE_SW_MASK));
data = (data & FPCR_DYN_MASK) | ieee_swcr_to_fpcr(data);
}
@@ -188,35 +195,35 @@ ptrace_set_bpt(struct task_struct * child)
* branch (emulation can be tricky for fp branches).
*/
displ = ((s32)(insn << 11)) >> 9;
- child->thread_info->bpt_addr[nsaved++] = pc + 4;
+ task_thread_info(child)->bpt_addr[nsaved++] = pc + 4;
if (displ) /* guard against unoptimized code */
- child->thread_info->bpt_addr[nsaved++]
+ task_thread_info(child)->bpt_addr[nsaved++]
= pc + 4 + displ;
DBG(DBG_BPT, ("execing branch\n"));
} else if (op_code == 0x1a) {
reg_b = (insn >> 16) & 0x1f;
- child->thread_info->bpt_addr[nsaved++] = get_reg(child, reg_b);
+ task_thread_info(child)->bpt_addr[nsaved++] = get_reg(child, reg_b);
DBG(DBG_BPT, ("execing jump\n"));
} else {
- child->thread_info->bpt_addr[nsaved++] = pc + 4;
+ task_thread_info(child)->bpt_addr[nsaved++] = pc + 4;
DBG(DBG_BPT, ("execing normal insn\n"));
}
/* install breakpoints: */
for (i = 0; i < nsaved; ++i) {
- res = read_int(child, child->thread_info->bpt_addr[i],
+ res = read_int(child, task_thread_info(child)->bpt_addr[i],
(int *) &insn);
if (res < 0)
return res;
- child->thread_info->bpt_insn[i] = insn;
+ task_thread_info(child)->bpt_insn[i] = insn;
DBG(DBG_BPT, (" -> next_pc=%lx\n",
- child->thread_info->bpt_addr[i]));
- res = write_int(child, child->thread_info->bpt_addr[i],
+ task_thread_info(child)->bpt_addr[i]));
+ res = write_int(child, task_thread_info(child)->bpt_addr[i],
BREAKINST);
if (res < 0)
return res;
}
- child->thread_info->bpt_nsaved = nsaved;
+ task_thread_info(child)->bpt_nsaved = nsaved;
return 0;
}
@@ -227,9 +234,9 @@ ptrace_set_bpt(struct task_struct * child)
int
ptrace_cancel_bpt(struct task_struct * child)
{
- int i, nsaved = child->thread_info->bpt_nsaved;
+ int i, nsaved = task_thread_info(child)->bpt_nsaved;
- child->thread_info->bpt_nsaved = 0;
+ task_thread_info(child)->bpt_nsaved = 0;
if (nsaved > 2) {
printk("ptrace_cancel_bpt: bogus nsaved: %d!\n", nsaved);
@@ -237,8 +244,8 @@ ptrace_cancel_bpt(struct task_struct * child)
}
for (i = 0; i < nsaved; ++i) {
- write_int(child, child->thread_info->bpt_addr[i],
- child->thread_info->bpt_insn[i]);
+ write_int(child, task_thread_info(child)->bpt_addr[i],
+ task_thread_info(child)->bpt_insn[i]);
}
return (nsaved != 0);
}
@@ -355,7 +362,7 @@ do_sys_ptrace(long request, long pid, long addr, long data,
if (!valid_signal(data))
break;
/* Mark single stepping. */
- child->thread_info->bpt_nsaved = -1;
+ task_thread_info(child)->bpt_nsaved = -1;
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
child->exit_code = data;
wake_up_process(child);
diff --git a/trunk/arch/alpha/kernel/smp.c b/trunk/arch/alpha/kernel/smp.c
index da0be3465791..4b873527ce1c 100644
--- a/trunk/arch/alpha/kernel/smp.c
+++ b/trunk/arch/alpha/kernel/smp.c
@@ -302,7 +302,7 @@ secondary_cpu_start(int cpuid, struct task_struct *idle)
+ hwrpb->processor_offset
+ cpuid * hwrpb->processor_size);
hwpcb = (struct pcb_struct *) cpu->hwpcb;
- ipcb = &idle->thread_info->pcb;
+ ipcb = &task_thread_info(idle)->pcb;
/* Initialize the CPU's HWPCB to something just good enough for
us to get started. Immediately after starting, we'll swpctx
diff --git a/trunk/arch/alpha/mm/init.c b/trunk/arch/alpha/mm/init.c
index 90752f6d8867..486d7945583d 100644
--- a/trunk/arch/alpha/mm/init.c
+++ b/trunk/arch/alpha/mm/init.c
@@ -7,6 +7,7 @@
/* 2.3.x zone allocator, 1999 Andrea Arcangeli */
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/arm/common/rtctime.c b/trunk/arch/arm/common/rtctime.c
index 00f6278f42b8..48b1e19b131f 100644
--- a/trunk/arch/arm/common/rtctime.c
+++ b/trunk/arch/arm/common/rtctime.c
@@ -17,6 +17,7 @@
#include
#include
#include
+#include
#include
#include
diff --git a/trunk/arch/arm/kernel/apm.c b/trunk/arch/arm/kernel/apm.c
index b9df1b782bb1..766b6c05c6db 100644
--- a/trunk/arch/arm/kernel/apm.c
+++ b/trunk/arch/arm/kernel/apm.c
@@ -18,6 +18,7 @@
#include
#include
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/arm/kernel/process.c b/trunk/arch/arm/kernel/process.c
index 54a21bdcba5c..4b4e4cf79c80 100644
--- a/trunk/arch/arm/kernel/process.c
+++ b/trunk/arch/arm/kernel/process.c
@@ -342,10 +342,10 @@ void flush_thread(void)
void release_thread(struct task_struct *dead_task)
{
#if defined(CONFIG_VFP)
- vfp_release_thread(&dead_task->thread_info->vfpstate);
+ vfp_release_thread(&task_thread_info(dead_task)->vfpstate);
#endif
#if defined(CONFIG_IWMMXT)
- iwmmxt_task_release(dead_task->thread_info);
+ iwmmxt_task_release(task_thread_info(dead_task));
#endif
}
@@ -355,10 +355,9 @@ int
copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start,
unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs)
{
- struct thread_info *thread = p->thread_info;
- struct pt_regs *childregs;
+ struct thread_info *thread = task_thread_info(p);
+ struct pt_regs *childregs = task_pt_regs(p);
- childregs = (void *)thread + THREAD_START_SP - sizeof(*regs);
*childregs = *regs;
childregs->ARM_r0 = 0;
childregs->ARM_sp = stack_start;
@@ -460,8 +459,8 @@ unsigned long get_wchan(struct task_struct *p)
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
- stack_start = (unsigned long)(p->thread_info + 1);
- stack_end = ((unsigned long)p->thread_info) + THREAD_SIZE;
+ stack_start = (unsigned long)end_of_stack(p);
+ stack_end = (unsigned long)task_stack_page(p) + THREAD_SIZE;
fp = thread_saved_fp(p);
do {
diff --git a/trunk/arch/arm/kernel/ptrace.c b/trunk/arch/arm/kernel/ptrace.c
index 2b84f78d7b0f..e591f72bcdeb 100644
--- a/trunk/arch/arm/kernel/ptrace.c
+++ b/trunk/arch/arm/kernel/ptrace.c
@@ -54,23 +54,6 @@
#define BREAKINST_THUMB 0xde01
#endif
-/*
- * Get the address of the live pt_regs for the specified task.
- * These are saved onto the top kernel stack when the process
- * is not running.
- *
- * Note: if a user thread is execve'd from kernel space, the
- * kernel stack will not be empty on entry to the kernel, so
- * ptracing these tasks will fail.
- */
-static inline struct pt_regs *
-get_user_regs(struct task_struct *task)
-{
- return (struct pt_regs *)
- ((unsigned long)task->thread_info + THREAD_SIZE -
- 8 - sizeof(struct pt_regs));
-}
-
/*
* this routine will get a word off of the processes privileged stack.
* the offset is how far from the base addr as stored in the THREAD.
@@ -79,7 +62,7 @@ get_user_regs(struct task_struct *task)
*/
static inline long get_user_reg(struct task_struct *task, int offset)
{
- return get_user_regs(task)->uregs[offset];
+ return task_pt_regs(task)->uregs[offset];
}
/*
@@ -91,7 +74,7 @@ static inline long get_user_reg(struct task_struct *task, int offset)
static inline int
put_user_reg(struct task_struct *task, int offset, long data)
{
- struct pt_regs newregs, *regs = get_user_regs(task);
+ struct pt_regs newregs, *regs = task_pt_regs(task);
int ret = -EINVAL;
newregs = *regs;
@@ -421,7 +404,7 @@ void ptrace_set_bpt(struct task_struct *child)
u32 insn;
int res;
- regs = get_user_regs(child);
+ regs = task_pt_regs(child);
pc = instruction_pointer(regs);
if (thumb_mode(regs)) {
@@ -572,7 +555,7 @@ static int ptrace_write_user(struct task_struct *tsk, unsigned long off,
*/
static int ptrace_getregs(struct task_struct *tsk, void __user *uregs)
{
- struct pt_regs *regs = get_user_regs(tsk);
+ struct pt_regs *regs = task_pt_regs(tsk);
return copy_to_user(uregs, regs, sizeof(struct pt_regs)) ? -EFAULT : 0;
}
@@ -587,7 +570,7 @@ static int ptrace_setregs(struct task_struct *tsk, void __user *uregs)
ret = -EFAULT;
if (copy_from_user(&newregs, uregs, sizeof(struct pt_regs)) == 0) {
- struct pt_regs *regs = get_user_regs(tsk);
+ struct pt_regs *regs = task_pt_regs(tsk);
ret = -EINVAL;
if (valid_user_regs(&newregs)) {
@@ -604,7 +587,7 @@ static int ptrace_setregs(struct task_struct *tsk, void __user *uregs)
*/
static int ptrace_getfpregs(struct task_struct *tsk, void __user *ufp)
{
- return copy_to_user(ufp, &tsk->thread_info->fpstate,
+ return copy_to_user(ufp, &task_thread_info(tsk)->fpstate,
sizeof(struct user_fp)) ? -EFAULT : 0;
}
@@ -613,7 +596,7 @@ static int ptrace_getfpregs(struct task_struct *tsk, void __user *ufp)
*/
static int ptrace_setfpregs(struct task_struct *tsk, void __user *ufp)
{
- struct thread_info *thread = tsk->thread_info;
+ struct thread_info *thread = task_thread_info(tsk);
thread->used_cp[1] = thread->used_cp[2] = 1;
return copy_from_user(&thread->fpstate, ufp,
sizeof(struct user_fp)) ? -EFAULT : 0;
@@ -626,7 +609,7 @@ static int ptrace_setfpregs(struct task_struct *tsk, void __user *ufp)
*/
static int ptrace_getwmmxregs(struct task_struct *tsk, void __user *ufp)
{
- struct thread_info *thread = tsk->thread_info;
+ struct thread_info *thread = task_thread_info(tsk);
void *ptr = &thread->fpstate;
if (!test_ti_thread_flag(thread, TIF_USING_IWMMXT))
@@ -643,7 +626,7 @@ static int ptrace_getwmmxregs(struct task_struct *tsk, void __user *ufp)
*/
static int ptrace_setwmmxregs(struct task_struct *tsk, void __user *ufp)
{
- struct thread_info *thread = tsk->thread_info;
+ struct thread_info *thread = task_thread_info(tsk);
void *ptr = &thread->fpstate;
if (!test_ti_thread_flag(thread, TIF_USING_IWMMXT))
@@ -779,7 +762,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
#endif
case PTRACE_GET_THREAD_AREA:
- ret = put_user(child->thread_info->tp_value,
+ ret = put_user(task_thread_info(child)->tp_value,
(unsigned long __user *) data);
break;
diff --git a/trunk/arch/arm/kernel/smp.c b/trunk/arch/arm/kernel/smp.c
index 373c0959bc2f..7338948bd7d3 100644
--- a/trunk/arch/arm/kernel/smp.c
+++ b/trunk/arch/arm/kernel/smp.c
@@ -114,7 +114,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
* We need to tell the secondary core where to find
* its stack and the page tables.
*/
- secondary_data.stack = (void *)idle->thread_info + THREAD_START_SP;
+ secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
secondary_data.pgdir = virt_to_phys(pgd);
wmb();
@@ -245,7 +245,7 @@ void __cpuexit cpu_die(void)
__asm__("mov sp, %0\n"
" b secondary_start_kernel"
:
- : "r" ((void *)current->thread_info + THREAD_SIZE - 8));
+ : "r" (task_stack_page(current) + THREAD_SIZE - 8));
}
#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/trunk/arch/arm/kernel/traps.c b/trunk/arch/arm/kernel/traps.c
index c9fe6f5f7ee3..93cfd3ffcc72 100644
--- a/trunk/arch/arm/kernel/traps.c
+++ b/trunk/arch/arm/kernel/traps.c
@@ -164,7 +164,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
} else if (verify_stack(fp)) {
printk("invalid frame pointer 0x%08x", fp);
ok = 0;
- } else if (fp < (unsigned long)(tsk->thread_info + 1))
+ } else if (fp < (unsigned long)end_of_stack(tsk))
printk("frame pointer underflow");
printk("\n");
@@ -210,7 +210,7 @@ static void __die(const char *str, int err, struct thread_info *thread, struct p
if (!user_mode(regs) || in_interrupt()) {
dump_mem("Stack: ", regs->ARM_sp,
- THREAD_SIZE + (unsigned long)tsk->thread_info);
+ THREAD_SIZE + (unsigned long)task_stack_page(tsk));
dump_backtrace(regs, tsk);
dump_instr(regs);
}
diff --git a/trunk/arch/arm/plat-omap/dma.c b/trunk/arch/arm/plat-omap/dma.c
index f5cc21ad0956..a4e5ac77f6df 100644
--- a/trunk/arch/arm/plat-omap/dma.c
+++ b/trunk/arch/arm/plat-omap/dma.c
@@ -64,7 +64,7 @@ static int dma_chan_count;
static spinlock_t dma_chan_lock;
static struct omap_dma_lch dma_chan[OMAP_LOGICAL_DMA_CH_COUNT];
-const static u8 omap1_dma_irq[OMAP_LOGICAL_DMA_CH_COUNT] = {
+static const u8 omap1_dma_irq[OMAP_LOGICAL_DMA_CH_COUNT] = {
INT_DMA_CH0_6, INT_DMA_CH1_7, INT_DMA_CH2_8, INT_DMA_CH3,
INT_DMA_CH4, INT_DMA_CH5, INT_1610_DMA_CH6, INT_1610_DMA_CH7,
INT_1610_DMA_CH8, INT_1610_DMA_CH9, INT_1610_DMA_CH10,
diff --git a/trunk/arch/arm26/kernel/armksyms.c b/trunk/arch/arm26/kernel/armksyms.c
index 35514b398e2e..811a6376c624 100644
--- a/trunk/arch/arm26/kernel/armksyms.c
+++ b/trunk/arch/arm26/kernel/armksyms.c
@@ -35,7 +35,6 @@
#include
#include
-extern void dump_thread(struct pt_regs *, struct user *);
extern int dump_fpu(struct pt_regs *, struct user_fp_struct *);
extern void inswb(unsigned int port, void *to, int len);
extern void outswb(unsigned int port, const void *to, int len);
diff --git a/trunk/arch/arm26/kernel/process.c b/trunk/arch/arm26/kernel/process.c
index 15833a0057dd..386305659171 100644
--- a/trunk/arch/arm26/kernel/process.c
+++ b/trunk/arch/arm26/kernel/process.c
@@ -277,10 +277,9 @@ int
copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start,
unsigned long unused, struct task_struct *p, struct pt_regs *regs)
{
- struct thread_info *thread = p->thread_info;
- struct pt_regs *childregs;
+ struct thread_info *thread = task_thread_info(p);
+ struct pt_regs *childregs = task_pt_regs(p);
- childregs = __get_user_regs(thread);
*childregs = *regs;
childregs->ARM_r0 = 0;
childregs->ARM_sp = stack_start;
diff --git a/trunk/arch/arm26/kernel/ptrace.c b/trunk/arch/arm26/kernel/ptrace.c
index 4e6b7356a722..3c3371d4683e 100644
--- a/trunk/arch/arm26/kernel/ptrace.c
+++ b/trunk/arch/arm26/kernel/ptrace.c
@@ -39,21 +39,6 @@
*/
#define BREAKINST_ARM 0xef9f0001
-/*
- * Get the address of the live pt_regs for the specified task.
- * These are saved onto the top kernel stack when the process
- * is not running.
- *
- * Note: if a user thread is execve'd from kernel space, the
- * kernel stack will not be empty on entry to the kernel, so
- * ptracing these tasks will fail.
- */
-static inline struct pt_regs *
-get_user_regs(struct task_struct *task)
-{
- return __get_user_regs(task->thread_info);
-}
-
/*
* this routine will get a word off of the processes privileged stack.
* the offset is how far from the base addr as stored in the THREAD.
@@ -62,7 +47,7 @@ get_user_regs(struct task_struct *task)
*/
static inline long get_user_reg(struct task_struct *task, int offset)
{
- return get_user_regs(task)->uregs[offset];
+ return task_pt_regs(task)->uregs[offset];
}
/*
@@ -74,7 +59,7 @@ static inline long get_user_reg(struct task_struct *task, int offset)
static inline int
put_user_reg(struct task_struct *task, int offset, long data)
{
- struct pt_regs newregs, *regs = get_user_regs(task);
+ struct pt_regs newregs, *regs = task_pt_regs(task);
int ret = -EINVAL;
newregs = *regs;
@@ -377,7 +362,7 @@ void ptrace_set_bpt(struct task_struct *child)
u32 insn;
int res;
- regs = get_user_regs(child);
+ regs = task_pt_regs(child);
pc = instruction_pointer(regs);
res = read_instr(child, pc, &insn);
@@ -500,7 +485,7 @@ static int ptrace_write_user(struct task_struct *tsk, unsigned long off,
*/
static int ptrace_getregs(struct task_struct *tsk, void *uregs)
{
- struct pt_regs *regs = get_user_regs(tsk);
+ struct pt_regs *regs = task_pt_regs(tsk);
return copy_to_user(uregs, regs, sizeof(struct pt_regs)) ? -EFAULT : 0;
}
@@ -515,7 +500,7 @@ static int ptrace_setregs(struct task_struct *tsk, void *uregs)
ret = -EFAULT;
if (copy_from_user(&newregs, uregs, sizeof(struct pt_regs)) == 0) {
- struct pt_regs *regs = get_user_regs(tsk);
+ struct pt_regs *regs = task_pt_regs(tsk);
ret = -EINVAL;
if (valid_user_regs(&newregs)) {
@@ -532,7 +517,7 @@ static int ptrace_setregs(struct task_struct *tsk, void *uregs)
*/
static int ptrace_getfpregs(struct task_struct *tsk, void *ufp)
{
- return copy_to_user(ufp, &tsk->thread_info->fpstate,
+ return copy_to_user(ufp, &task_thread_info(tsk)->fpstate,
sizeof(struct user_fp)) ? -EFAULT : 0;
}
@@ -542,7 +527,7 @@ static int ptrace_getfpregs(struct task_struct *tsk, void *ufp)
static int ptrace_setfpregs(struct task_struct *tsk, void *ufp)
{
set_stopped_child_used_math(tsk);
- return copy_from_user(&tsk->thread_info->fpstate, ufp,
+ return copy_from_user(&task_threas_info(tsk)->fpstate, ufp,
sizeof(struct user_fp)) ? -EFAULT : 0;
}
diff --git a/trunk/arch/arm26/kernel/traps.c b/trunk/arch/arm26/kernel/traps.c
index f64f59022392..5847ea5d7747 100644
--- a/trunk/arch/arm26/kernel/traps.c
+++ b/trunk/arch/arm26/kernel/traps.c
@@ -132,7 +132,7 @@ static void dump_instr(struct pt_regs *regs)
/*static*/ void __dump_stack(struct task_struct *tsk, unsigned long sp)
{
- dump_mem("Stack: ", sp, 8192+(unsigned long)tsk->thread_info);
+ dump_mem("Stack: ", sp, 8192+(unsigned long)task_stack_page(tsk));
}
void dump_stack(void)
@@ -158,7 +158,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
} else if (verify_stack(fp)) {
printk("invalid frame pointer 0x%08x", fp);
ok = 0;
- } else if (fp < (unsigned long)(tsk->thread_info + 1))
+ } else if (fp < (unsigned long)end_of_stack(tsk))
printk("frame pointer underflow");
printk("\n");
@@ -168,7 +168,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
/* FIXME - this is probably wrong.. */
void show_stack(struct task_struct *task, unsigned long *sp) {
- dump_mem("Stack: ", (unsigned long)sp, 8192+(unsigned long)task->thread_info);
+ dump_mem("Stack: ", (unsigned long)sp, 8192+(unsigned long)task_stack_page(task));
}
DEFINE_SPINLOCK(die_lock);
@@ -187,7 +187,7 @@ NORET_TYPE void die(const char *str, struct pt_regs *regs, int err)
printk("CPU: %d\n", smp_processor_id());
show_regs(regs);
printk("Process %s (pid: %d, stack limit = 0x%p)\n",
- current->comm, current->pid, tsk->thread_info + 1);
+ current->comm, current->pid, end_of_stack(tsk));
if (!user_mode(regs) || in_interrupt()) {
__dump_stack(tsk, (unsigned long)(regs + 1));
diff --git a/trunk/arch/cris/arch-v10/drivers/ds1302.c b/trunk/arch/cris/arch-v10/drivers/ds1302.c
index 10795f67f687..b100f26497c4 100644
--- a/trunk/arch/cris/arch-v10/drivers/ds1302.c
+++ b/trunk/arch/cris/arch-v10/drivers/ds1302.c
@@ -148,6 +148,7 @@
#include
#include
#include
+#include
#include
#include
diff --git a/trunk/arch/cris/arch-v10/drivers/pcf8563.c b/trunk/arch/cris/arch-v10/drivers/pcf8563.c
index f2c55742e90c..af517c210383 100644
--- a/trunk/arch/cris/arch-v10/drivers/pcf8563.c
+++ b/trunk/arch/cris/arch-v10/drivers/pcf8563.c
@@ -28,6 +28,7 @@
#include
#include
#include
+#include
#include
#include
diff --git a/trunk/arch/cris/arch-v10/kernel/process.c b/trunk/arch/cris/arch-v10/kernel/process.c
index 69e28b4057e8..0a675ce9e099 100644
--- a/trunk/arch/cris/arch-v10/kernel/process.c
+++ b/trunk/arch/cris/arch-v10/kernel/process.c
@@ -79,7 +79,7 @@ void hard_reset_now (void)
*/
unsigned long thread_saved_pc(struct task_struct *t)
{
- return (unsigned long)user_regs(t->thread_info)->irp;
+ return task_pt_regs(t)->irp;
}
static void kernel_thread_helper(void* dummy, int (*fn)(void *), void * arg)
@@ -128,7 +128,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
* remember that the task_struct doubles as the kernel stack for the task
*/
- childregs = user_regs(p->thread_info);
+ childregs = task_pt_regs(p);
*childregs = *regs; /* struct copy of pt_regs */
diff --git a/trunk/arch/cris/arch-v10/kernel/ptrace.c b/trunk/arch/cris/arch-v10/kernel/ptrace.c
index 6cbd34a27b90..f214f74f264e 100644
--- a/trunk/arch/cris/arch-v10/kernel/ptrace.c
+++ b/trunk/arch/cris/arch-v10/kernel/ptrace.c
@@ -37,7 +37,7 @@ inline long get_reg(struct task_struct *task, unsigned int regno)
if (regno == PT_USP)
return task->thread.usp;
else if (regno < PT_MAX)
- return ((unsigned long *)user_regs(task->thread_info))[regno];
+ return ((unsigned long *)task_pt_regs(task))[regno];
else
return 0;
}
@@ -51,7 +51,7 @@ inline int put_reg(struct task_struct *task, unsigned int regno,
if (regno == PT_USP)
task->thread.usp = data;
else if (regno < PT_MAX)
- ((unsigned long *)user_regs(task->thread_info))[regno] = data;
+ ((unsigned long *)task_pt_regs(task))[regno] = data;
else
return -1;
return 0;
diff --git a/trunk/arch/cris/arch-v32/kernel/process.c b/trunk/arch/cris/arch-v32/kernel/process.c
index 882be42114f7..843513102d3c 100644
--- a/trunk/arch/cris/arch-v32/kernel/process.c
+++ b/trunk/arch/cris/arch-v32/kernel/process.c
@@ -96,7 +96,7 @@ hard_reset_now(void)
*/
unsigned long thread_saved_pc(struct task_struct *t)
{
- return (unsigned long)user_regs(t->thread_info)->erp;
+ return task_pt_regs(t)->erp;
}
static void
@@ -148,7 +148,7 @@ copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
* fix it up. Note: the task_struct doubles as the kernel stack for the
* task.
*/
- childregs = user_regs(p->thread_info);
+ childregs = task_pt_regs(p);
*childregs = *regs; /* Struct copy of pt_regs. */
p->set_child_tid = p->clear_child_tid = NULL;
childregs->r10 = 0; /* Child returns 0 after a fork/clone. */
@@ -157,7 +157,7 @@ copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
* The TLS is in $mof beacuse it is the 5th argument to sys_clone.
*/
if (p->mm && (clone_flags & CLONE_SETTLS)) {
- p->thread_info->tls = regs->mof;
+ task_thread_info(p)->tls = regs->mof;
}
/* Put the switch stack right below the pt_regs. */
diff --git a/trunk/arch/cris/arch-v32/kernel/ptrace.c b/trunk/arch/cris/arch-v32/kernel/ptrace.c
index 5528b83a622b..82cf2e3624a4 100644
--- a/trunk/arch/cris/arch-v32/kernel/ptrace.c
+++ b/trunk/arch/cris/arch-v32/kernel/ptrace.c
@@ -46,7 +46,7 @@ long get_reg(struct task_struct *task, unsigned int regno)
unsigned long ret;
if (regno <= PT_EDA)
- ret = ((unsigned long *)user_regs(task->thread_info))[regno];
+ ret = ((unsigned long *)task_pt_regs(task))[regno];
else if (regno == PT_USP)
ret = task->thread.usp;
else if (regno == PT_PPC)
@@ -65,13 +65,13 @@ long get_reg(struct task_struct *task, unsigned int regno)
int put_reg(struct task_struct *task, unsigned int regno, unsigned long data)
{
if (regno <= PT_EDA)
- ((unsigned long *)user_regs(task->thread_info))[regno] = data;
+ ((unsigned long *)task_pt_regs(task))[regno] = data;
else if (regno == PT_USP)
task->thread.usp = data;
else if (regno == PT_PPC) {
/* Write pseudo-PC to ERP only if changed. */
if (data != get_pseudo_pc(task))
- ((unsigned long *)user_regs(task->thread_info))[PT_ERP] = data;
+ task_pt_regs(task)->erp = data;
} else if (regno <= PT_MAX)
return put_debugreg(task->pid, regno, data);
else
diff --git a/trunk/arch/cris/arch-v32/kernel/smp.c b/trunk/arch/cris/arch-v32/kernel/smp.c
index 13867f4fad16..da40d19a151e 100644
--- a/trunk/arch/cris/arch-v32/kernel/smp.c
+++ b/trunk/arch/cris/arch-v32/kernel/smp.c
@@ -113,10 +113,10 @@ smp_boot_one_cpu(int cpuid)
if (IS_ERR(idle))
panic("SMP: fork failed for CPU:%d", cpuid);
- idle->thread_info->cpu = cpuid;
+ task_thread_info(idle)->cpu = cpuid;
/* Information to the CPU that is about to boot */
- smp_init_current_idle_thread = idle->thread_info;
+ smp_init_current_idle_thread = task_thread_info(idle);
cpu_now_booting = cpuid;
/* Wait for CPU to come online */
diff --git a/trunk/arch/cris/arch-v32/mm/tlb.c b/trunk/arch/cris/arch-v32/mm/tlb.c
index b08a28bb58ab..9d75d7692303 100644
--- a/trunk/arch/cris/arch-v32/mm/tlb.c
+++ b/trunk/arch/cris/arch-v32/mm/tlb.c
@@ -198,9 +198,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
per_cpu(current_pgd, cpu) = next->pgd;
/* Switch context in the MMU. */
- if (tsk && tsk->thread_info)
+ if (tsk && task_thread_info(tsk))
{
- SPEC_REG_WR(SPEC_REG_PID, next->context.page_id | tsk->thread_info->tls);
+ SPEC_REG_WR(SPEC_REG_PID, next->context.page_id | task_thread_info(tsk)->tls);
}
else
{
diff --git a/trunk/arch/cris/kernel/crisksyms.c b/trunk/arch/cris/kernel/crisksyms.c
index 85833d704ebb..de39725da920 100644
--- a/trunk/arch/cris/kernel/crisksyms.c
+++ b/trunk/arch/cris/kernel/crisksyms.c
@@ -21,7 +21,6 @@
#include
#include
-extern void dump_thread(struct pt_regs *, struct user *);
extern unsigned long get_cmos_time(void);
extern void __Udiv(void);
extern void __Umod(void);
@@ -33,7 +32,6 @@ extern void __lshrdi3(void);
extern void iounmap(volatile void * __iomem);
/* Platform dependent support */
-EXPORT_SYMBOL(dump_thread);
EXPORT_SYMBOL(kernel_thread);
EXPORT_SYMBOL(get_cmos_time);
EXPORT_SYMBOL(loops_per_usec);
diff --git a/trunk/arch/cris/kernel/process.c b/trunk/arch/cris/kernel/process.c
index 7c80afb10460..4ab3e87115b6 100644
--- a/trunk/arch/cris/kernel/process.c
+++ b/trunk/arch/cris/kernel/process.c
@@ -257,34 +257,6 @@ void flush_thread(void)
{
}
-/*
- * fill in the user structure for a core dump..
- */
-void dump_thread(struct pt_regs * regs, struct user * dump)
-{
-#if 0
- int i;
-
- /* changed the size calculations - should hopefully work better. lbt */
- dump->magic = CMAGIC;
- dump->start_code = 0;
- dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
- dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
- dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
- dump->u_dsize -= dump->u_tsize;
- dump->u_ssize = 0;
- for (i = 0; i < 8; i++)
- dump->u_debugreg[i] = current->debugreg[i];
-
- if (dump->start_stack < TASK_SIZE)
- dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
-
- dump->regs = *regs;
-
- dump->u_fpvalid = dump_fpu (regs, &dump->i387);
-#endif
-}
-
/* Fill in the fpu structure for a core dump. */
int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
{
diff --git a/trunk/arch/frv/Kconfig b/trunk/arch/frv/Kconfig
index 61261b78ced7..60a617aff8ba 100644
--- a/trunk/arch/frv/Kconfig
+++ b/trunk/arch/frv/Kconfig
@@ -6,10 +6,6 @@ config FRV
bool
default y
-config UID16
- bool
- default y
-
config RWSEM_GENERIC_SPINLOCK
bool
default y
diff --git a/trunk/arch/frv/boot/Makefile b/trunk/arch/frv/boot/Makefile
index d75e0d771366..5dfc93fd945a 100644
--- a/trunk/arch/frv/boot/Makefile
+++ b/trunk/arch/frv/boot/Makefile
@@ -57,10 +57,10 @@ initrd:
# installation
#
install: $(CONFIGURE) Image
- sh ./install.sh $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) Image $(TOPDIR)/System.map "$(INSTALL_PATH)"
+ sh ./install.sh $(KERNELRELEASE) Image $(TOPDIR)/System.map "$(INSTALL_PATH)"
zinstall: $(CONFIGURE) zImage
- sh ./install.sh $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) zImage $(TOPDIR)/System.map "$(INSTALL_PATH)"
+ sh ./install.sh $(KERNELRELEASE) zImage $(TOPDIR)/System.map "$(INSTALL_PATH)"
#
# miscellany
diff --git a/trunk/arch/frv/kernel/frv_ksyms.c b/trunk/arch/frv/kernel/frv_ksyms.c
index 5f118c89d091..0f1c6cbc4f50 100644
--- a/trunk/arch/frv/kernel/frv_ksyms.c
+++ b/trunk/arch/frv/kernel/frv_ksyms.c
@@ -18,7 +18,6 @@
#include
#include
-extern void dump_thread(struct pt_regs *, struct user *);
extern long __memcpy_user(void *dst, const void *src, size_t count);
extern long __memset_user(void *dst, const void *src, size_t count);
@@ -27,7 +26,6 @@ extern long __memset_user(void *dst, const void *src, size_t count);
EXPORT_SYMBOL(__ioremap);
EXPORT_SYMBOL(iounmap);
-EXPORT_SYMBOL(dump_thread);
EXPORT_SYMBOL(strnlen);
EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(strstr);
diff --git a/trunk/arch/frv/kernel/process.c b/trunk/arch/frv/kernel/process.c
index 54a452136f00..0fff8a61ef2a 100644
--- a/trunk/arch/frv/kernel/process.c
+++ b/trunk/arch/frv/kernel/process.c
@@ -204,7 +204,7 @@ int copy_thread(int nr, unsigned long clone_flags,
regs0 = __kernel_frame0_ptr;
childregs0 = (struct pt_regs *)
- ((unsigned long) p->thread_info + THREAD_SIZE - USER_CONTEXT_SIZE);
+ (task_stack_page(p) + THREAD_SIZE - USER_CONTEXT_SIZE);
childregs = childregs0;
/* set up the userspace frame (the only place that the USP is stored) */
@@ -220,7 +220,7 @@ int copy_thread(int nr, unsigned long clone_flags,
*childregs = *regs;
childregs->sp = (unsigned long) childregs0;
childregs->next_frame = childregs0;
- childregs->gr15 = (unsigned long) p->thread_info;
+ childregs->gr15 = (unsigned long) task_thread_info(p);
childregs->gr29 = (unsigned long) p;
}
@@ -243,28 +243,6 @@ int copy_thread(int nr, unsigned long clone_flags,
return 0;
} /* end copy_thread() */
-/*
- * fill in the user structure for a core dump..
- */
-void dump_thread(struct pt_regs *regs, struct user *dump)
-{
-#if 0
- /* changed the size calculations - should hopefully work better. lbt */
- dump->magic = CMAGIC;
- dump->start_code = 0;
- dump->start_stack = user_stack(regs) & ~(PAGE_SIZE - 1);
- dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
- dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
- dump->u_dsize -= dump->u_tsize;
- dump->u_ssize = 0;
-
- if (dump->start_stack < TASK_SIZE)
- dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
-
- dump->regs = *(struct user_context *) regs;
-#endif
-}
-
/*
* sys_execve() executes a new program.
*/
diff --git a/trunk/arch/h8300/kernel/gpio.c b/trunk/arch/h8300/kernel/gpio.c
index 795682b873e2..d195568ca8a2 100644
--- a/trunk/arch/h8300/kernel/gpio.c
+++ b/trunk/arch/h8300/kernel/gpio.c
@@ -122,7 +122,7 @@ int h8300_get_gpio_dir(int port_bit)
static char *port_status(int portno)
{
static char result[10];
- const static char io[2]={'I','O'};
+ static const char io[2]={'I','O'};
char *rp;
int c;
unsigned char used,ddr;
@@ -143,7 +143,7 @@ static int gpio_proc_read(char *buf, char **start, off_t offset,
int len, int *unused_i, void *unused_v)
{
int c,outlen;
- const static char port_name[]="123456789ABCDEFGH";
+ static const char port_name[]="123456789ABCDEFGH";
outlen = 0;
for (c = 0; c < MAX_PORT; c++) {
if (ddrs[c] == NULL)
diff --git a/trunk/arch/h8300/kernel/h8300_ksyms.c b/trunk/arch/h8300/kernel/h8300_ksyms.c
index 5a630233112f..5cc76efaf7aa 100644
--- a/trunk/arch/h8300/kernel/h8300_ksyms.c
+++ b/trunk/arch/h8300/kernel/h8300_ksyms.c
@@ -22,11 +22,8 @@
//asmlinkage long long __lshrdi3 (long long, int);
extern char h8300_debug_device[];
-extern void dump_thread(struct pt_regs *, struct user *);
-
/* platform dependent support */
-EXPORT_SYMBOL(dump_thread);
EXPORT_SYMBOL(strnlen);
EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(strstr);
@@ -103,10 +100,6 @@ EXPORT_SYMBOL(__udivsi3);
EXPORT_SYMBOL(__umoddi3);
EXPORT_SYMBOL(__umodsi3);
-#ifdef MAGIC_ROM_PTR
-EXPORT_SYMBOL(is_in_rom);
-#endif
-
EXPORT_SYMBOL(h8300_reserved_gpio);
EXPORT_SYMBOL(h8300_free_gpio);
EXPORT_SYMBOL(h8300_set_gpio_dir);
diff --git a/trunk/arch/h8300/kernel/process.c b/trunk/arch/h8300/kernel/process.c
index fe21adf3e75e..ed79ae20e88d 100644
--- a/trunk/arch/h8300/kernel/process.c
+++ b/trunk/arch/h8300/kernel/process.c
@@ -195,7 +195,7 @@ int copy_thread(int nr, unsigned long clone_flags,
{
struct pt_regs * childregs;
- childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
+ childregs = (struct pt_regs *) (THREAD_SIZE + task_stack_page(p)) - 1;
*childregs = *regs;
childregs->retpc = (unsigned long) ret_from_fork;
@@ -207,34 +207,6 @@ int copy_thread(int nr, unsigned long clone_flags,
return 0;
}
-/*
- * fill in the user structure for a core dump..
- */
-void dump_thread(struct pt_regs * regs, struct user * dump)
-{
-/* changed the size calculations - should hopefully work better. lbt */
- dump->magic = CMAGIC;
- dump->start_code = 0;
- dump->start_stack = rdusp() & ~(PAGE_SIZE - 1);
- dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
- dump->u_dsize = ((unsigned long) (current->mm->brk +
- (PAGE_SIZE-1))) >> PAGE_SHIFT;
- dump->u_dsize -= dump->u_tsize;
- dump->u_ssize = 0;
-
- dump->u_ar0 = (struct user_regs_struct *)(((int)(&dump->regs)) -((int)(dump)));
- dump->regs.er0 = regs->er0;
- dump->regs.er1 = regs->er1;
- dump->regs.er2 = regs->er2;
- dump->regs.er3 = regs->er3;
- dump->regs.er4 = regs->er4;
- dump->regs.er5 = regs->er5;
- dump->regs.er6 = regs->er6;
- dump->regs.orig_er0 = regs->orig_er0;
- dump->regs.ccr = regs->ccr;
- dump->regs.pc = regs->pc;
-}
-
/*
* sys_execve() executes a new program.
*/
diff --git a/trunk/arch/h8300/mm/memory.c b/trunk/arch/h8300/mm/memory.c
index f4ddece3216f..81eace93f867 100644
--- a/trunk/arch/h8300/mm/memory.c
+++ b/trunk/arch/h8300/mm/memory.c
@@ -55,16 +55,3 @@ unsigned long kernel_map(unsigned long paddr, unsigned long size,
return paddr;
}
-#ifdef MAGIC_ROM_PTR
-
-int is_in_rom(unsigned long addr)
-{
- /* Anything not in operational RAM is returned as in rom! */
- if (addr < _ramstart || addr >= _ramend)
- return 1;
- else
- return 0;
-}
-
-#endif
-
diff --git a/trunk/arch/h8300/platform/h8300h/ptrace_h8300h.c b/trunk/arch/h8300/platform/h8300h/ptrace_h8300h.c
index 6ac93c05a1ae..746b1ae672a1 100644
--- a/trunk/arch/h8300/platform/h8300h/ptrace_h8300h.c
+++ b/trunk/arch/h8300/platform/h8300h/ptrace_h8300h.c
@@ -98,7 +98,7 @@ struct optable {
.type = jmp, \
}
-const static struct optable optable_0[] = {
+static const struct optable optable_0[] = {
OPTABLE(0x00,0xff, 1,none), /* 0x00 */
OPTABLE(0x01,0xff,-1,none), /* 0x01 */
OPTABLE(0x02,0xfe, 1,none), /* 0x02-0x03 */
@@ -131,31 +131,31 @@ const static struct optable optable_0[] = {
OPTABLE(0x80,0x80, 1,none), /* 0x80-0xff */
};
-const static struct optable optable_1[] = {
+static const struct optable optable_1[] = {
OPTABLE(0x00,0xff,-3,none), /* 0x0100 */
OPTABLE(0x40,0xf0,-3,none), /* 0x0140-0x14f */
OPTABLE(0x80,0xf0, 1,none), /* 0x0180-0x018f */
OPTABLE(0xc0,0xc0, 2,none), /* 0x01c0-0x01ff */
};
-const static struct optable optable_2[] = {
+static const struct optable optable_2[] = {
OPTABLE(0x00,0x20, 2,none), /* 0x6a0?/0x6a8?/0x6b0?/0x6b8? */
OPTABLE(0x20,0x20, 3,none), /* 0x6a2?/0x6aa?/0x6b2?/0x6ba? */
};
-const static struct optable optable_3[] = {
+static const struct optable optable_3[] = {
OPTABLE(0x69,0xfb, 2,none), /* 0x010069/0x01006d/014069/0x01406d */
OPTABLE(0x6b,0xff,-4,none), /* 0x01006b/0x01406b */
OPTABLE(0x6f,0xff, 3,none), /* 0x01006f/0x01406f */
OPTABLE(0x78,0xff, 5,none), /* 0x010078/0x014078 */
};
-const static struct optable optable_4[] = {
+static const struct optable optable_4[] = {
OPTABLE(0x00,0x78, 3,none), /* 0x0100690?/0x01006d0?/0140690/0x01406d0?/0x0100698?/0x01006d8?/0140698?/0x01406d8? */
OPTABLE(0x20,0x78, 4,none), /* 0x0100692?/0x01006d2?/0140692/0x01406d2?/0x010069a?/0x01006da?/014069a?/0x01406da? */
};
-const static struct optables_list {
+static const struct optables_list {
const struct optable *ptr;
int size;
} optables[] = {
diff --git a/trunk/arch/h8300/platform/h8s/ints.c b/trunk/arch/h8300/platform/h8s/ints.c
index 5441cdd12a39..f6ed663bdde0 100644
--- a/trunk/arch/h8300/platform/h8s/ints.c
+++ b/trunk/arch/h8300/platform/h8s/ints.c
@@ -52,7 +52,7 @@ struct irq_pins {
unsigned char bit_no;
};
/* ISTR = 0 */
-const static struct irq_pins irq_assign_table0[16]={
+static const struct irq_pins irq_assign_table0[16]={
{H8300_GPIO_P5,H8300_GPIO_B0},{H8300_GPIO_P5,H8300_GPIO_B1},
{H8300_GPIO_P5,H8300_GPIO_B2},{H8300_GPIO_P5,H8300_GPIO_B3},
{H8300_GPIO_P5,H8300_GPIO_B4},{H8300_GPIO_P5,H8300_GPIO_B5},
@@ -63,7 +63,7 @@ const static struct irq_pins irq_assign_table0[16]={
{H8300_GPIO_PF,H8300_GPIO_B1},{H8300_GPIO_PF,H8300_GPIO_B2},
};
/* ISTR = 1 */
-const static struct irq_pins irq_assign_table1[16]={
+static const struct irq_pins irq_assign_table1[16]={
{H8300_GPIO_P8,H8300_GPIO_B0},{H8300_GPIO_P8,H8300_GPIO_B1},
{H8300_GPIO_P8,H8300_GPIO_B2},{H8300_GPIO_P8,H8300_GPIO_B3},
{H8300_GPIO_P8,H8300_GPIO_B4},{H8300_GPIO_P8,H8300_GPIO_B5},
diff --git a/trunk/arch/h8300/platform/h8s/ints_h8s.c b/trunk/arch/h8300/platform/h8s/ints_h8s.c
index f53de493e3e8..8268dfd12f1f 100644
--- a/trunk/arch/h8300/platform/h8s/ints_h8s.c
+++ b/trunk/arch/h8300/platform/h8s/ints_h8s.c
@@ -42,7 +42,7 @@ struct irq_pins {
unsigned char bit_no;
} __attribute__((aligned(1),packed));
/* ISTR = 0 */
-const static struct irq_pins irq_assign_table0[16]={
+static const struct irq_pins irq_assign_table0[16]={
{H8300_GPIO_P5,H8300_GPIO_B0},{H8300_GPIO_P5,H8300_GPIO_B1},
{H8300_GPIO_P5,H8300_GPIO_B2},{H8300_GPIO_P5,H8300_GPIO_B3},
{H8300_GPIO_P5,H8300_GPIO_B4},{H8300_GPIO_P5,H8300_GPIO_B5},
@@ -53,7 +53,7 @@ const static struct irq_pins irq_assign_table0[16]={
{H8300_GPIO_PF,H8300_GPIO_B1},{H8300_GPIO_PF,H8300_GPIO_B2},
};
/* ISTR = 1 */
-const static struct irq_pins irq_assign_table1[16]={
+static const struct irq_pins irq_assign_table1[16]={
{H8300_GPIO_P8,H8300_GPIO_B0},{H8300_GPIO_P8,H8300_GPIO_B1},
{H8300_GPIO_P8,H8300_GPIO_B2},{H8300_GPIO_P8,H8300_GPIO_B3},
{H8300_GPIO_P8,H8300_GPIO_B4},{H8300_GPIO_P8,H8300_GPIO_B5},
diff --git a/trunk/arch/i386/Kconfig b/trunk/arch/i386/Kconfig
index d849c6870e3a..d5d0df7f04fc 100644
--- a/trunk/arch/i386/Kconfig
+++ b/trunk/arch/i386/Kconfig
@@ -41,8 +41,21 @@ config ARCH_MAY_HAVE_PC_FDC
bool
default y
+config DMI
+ bool
+ default y
+
source "init/Kconfig"
+config DOUBLEFAULT
+ default y
+ bool "Enable doublefault exception handler" if EMBEDDED
+ help
+ This option allows trapping of rare doublefault exceptions that
+ would otherwise cause a system to silently reboot. Disabling this
+ option saves about 4k and might cause you much additional grey
+ hair.
+
menu "Processor type and features"
choice
@@ -645,17 +658,6 @@ config SECCOMP
source kernel/Kconfig.hz
-config PHYSICAL_START
- hex "Physical address where the kernel is loaded" if EMBEDDED
- default "0x100000"
- help
- This gives the physical address where the kernel is loaded.
- Primarily used in the case of kexec on panic where the
- fail safe kernel needs to run at a different address than
- the panic-ed kernel.
-
- Don't change this unless you know what you are doing.
-
config KEXEC
bool "kexec system call (EXPERIMENTAL)"
depends on EXPERIMENTAL
@@ -675,11 +677,31 @@ config KEXEC
config CRASH_DUMP
bool "kernel crash dumps (EXPERIMENTAL)"
- depends on EMBEDDED
depends on EXPERIMENTAL
depends on HIGHMEM
help
Generate crash dump after being started by kexec.
+
+config PHYSICAL_START
+ hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
+
+ default "0x1000000" if CRASH_DUMP
+ default "0x100000"
+ help
+ This gives the physical address where the kernel is loaded. Normally
+ for regular kernels this value is 0x100000 (1MB). But in the case
+ of kexec on panic the fail safe kernel needs to run at a different
+ address than the panic-ed kernel. This option is used to set the load
+ address for kernels used to capture crash dump on being kexec'ed
+ after panic. The default value for crash dump kernels is
+ 0x1000000 (16MB). This can also be set based on the "X" value as
+ specified in the "crashkernel=YM@XM" command line boot parameter
+ passed to the panic-ed kernel. Typically this parameter is set as
+ crashkernel=64M@16M. Please take a look at
+ Documentation/kdump/kdump.txt for more details about crash dumps.
+
+ Don't change this unless you know what you are doing.
+
endmenu
@@ -1051,3 +1073,7 @@ config X86_TRAMPOLINE
bool
depends on X86_SMP || (X86_VOYAGER && SMP)
default y
+
+config KTIME_SCALAR
+ bool
+ default y
diff --git a/trunk/arch/i386/Makefile b/trunk/arch/i386/Makefile
index b84119f9cc63..d3c0409d201c 100644
--- a/trunk/arch/i386/Makefile
+++ b/trunk/arch/i386/Makefile
@@ -37,7 +37,10 @@ CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
# CPU-specific tuning. Anything which can be shared with UML should go here.
include $(srctree)/arch/i386/Makefile.cpu
-cflags-$(CONFIG_REGPARM) += -mregparm=3
+# -mregparm=3 works ok on gcc-3.0 and later
+#
+cflags-$(CONFIG_REGPARM) += $(shell if [ $(call cc-version) -ge 0300 ] ; then \
+ echo "-mregparm=3"; fi ;)
# Disable unit-at-a-time mode, it makes gcc use a lot more stack
# due to the lack of sharing of stacklots.
@@ -100,7 +103,7 @@ AFLAGS += $(mflags-y)
boot := arch/i386/boot
.PHONY: zImage bzImage compressed zlilo bzlilo \
- zdisk bzdisk fdimage fdimage144 fdimage288 install kernel_install
+ zdisk bzdisk fdimage fdimage144 fdimage288 install
all: bzImage
@@ -122,8 +125,7 @@ zdisk bzdisk: vmlinux
fdimage fdimage144 fdimage288: vmlinux
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
-install: vmlinux
-install kernel_install:
+install:
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
archclean:
diff --git a/trunk/arch/i386/boot/Makefile b/trunk/arch/i386/boot/Makefile
index 1e71382d413a..f136752563b1 100644
--- a/trunk/arch/i386/boot/Makefile
+++ b/trunk/arch/i386/boot/Makefile
@@ -100,5 +100,5 @@ zlilo: $(BOOTIMAGE)
cp System.map $(INSTALL_PATH)/
if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
-install: $(BOOTIMAGE)
- sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $< System.map "$(INSTALL_PATH)"
+install:
+ sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)"
diff --git a/trunk/arch/i386/boot/install.sh b/trunk/arch/i386/boot/install.sh
index f17b40dfc0f4..5e44c736eea8 100644
--- a/trunk/arch/i386/boot/install.sh
+++ b/trunk/arch/i386/boot/install.sh
@@ -19,6 +19,20 @@
# $4 - default install path (blank if root directory)
#
+verify () {
+ if [ ! -f "$1" ]; then
+ echo "" 1>&2
+ echo " *** Missing file: $1" 1>&2
+ echo ' *** You need to run "make" before "make install".' 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+}
+
+# Make sure the files actually exist
+verify "$2"
+verify "$3"
+
# User may have a custom install script
if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
diff --git a/trunk/arch/i386/boot/video.S b/trunk/arch/i386/boot/video.S
index 92f669470142..2ac40c8244c4 100644
--- a/trunk/arch/i386/boot/video.S
+++ b/trunk/arch/i386/boot/video.S
@@ -97,7 +97,6 @@
#define PARAM_VESAPM_OFF 0x30
#define PARAM_LFB_PAGES 0x32
#define PARAM_VESA_ATTRIB 0x34
-#define PARAM_CAPABILITIES 0x36
/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */
#ifdef CONFIG_VIDEO_RETAIN
@@ -234,10 +233,6 @@ mopar_gr:
movw 18(%di), %ax
movl %eax, %fs:(PARAM_LFB_SIZE)
-# store mode capabilities
- movl 10(%di), %eax
- movl %eax, %fs:(PARAM_CAPABILITIES)
-
# switching the DAC to 8-bit is for <= 8 bpp only
movw %fs:(PARAM_LFB_DEPTH), %ax
cmpw $8, %ax
diff --git a/trunk/arch/i386/kernel/Makefile b/trunk/arch/i386/kernel/Makefile
index be1880bb75b4..60c3f76dfca4 100644
--- a/trunk/arch/i386/kernel/Makefile
+++ b/trunk/arch/i386/kernel/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
+obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_X86_NUMAQ) += numaq.o
obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o
obj-$(CONFIG_KPROBES) += kprobes.o
diff --git a/trunk/arch/i386/kernel/acpi/boot.c b/trunk/arch/i386/kernel/acpi/boot.c
index 447fa9e33ffb..2111529dea77 100644
--- a/trunk/arch/i386/kernel/acpi/boot.c
+++ b/trunk/arch/i386/kernel/acpi/boot.c
@@ -108,7 +108,7 @@ char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
if (!phys_addr || !size)
return NULL;
- if (phys_addr < (end_pfn_map << PAGE_SHIFT))
+ if (phys_addr+size <= (end_pfn_map << PAGE_SHIFT) + PAGE_SIZE)
return __va(phys_addr);
return NULL;
diff --git a/trunk/arch/i386/kernel/apic.c b/trunk/arch/i386/kernel/apic.c
index d8f94e78de8a..acd3f1e34ca6 100644
--- a/trunk/arch/i386/kernel/apic.c
+++ b/trunk/arch/i386/kernel/apic.c
@@ -26,6 +26,7 @@
#include
#include
#include
+#include
#include
#include
@@ -37,9 +38,16 @@
#include
#include
+#include
#include "io_ports.h"
+/*
+ * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
+ * IPIs in place of local APIC timers
+ */
+static cpumask_t timer_bcast_ipi;
+
/*
* Knob to control our willingness to enable the local APIC.
*/
@@ -92,10 +100,6 @@ void __init apic_intr_init(void)
/* Using APIC to generate smp_local_timer_interrupt? */
int using_apic_timer = 0;
-static DEFINE_PER_CPU(int, prof_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_counter) = 1;
-
static int enabled_via_apicbase;
void enable_NMI_through_LVT0 (void * dummy)
@@ -935,11 +939,16 @@ void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound;
static void __setup_APIC_LVTT(unsigned int clocks)
{
unsigned int lvtt_value, tmp_value, ver;
+ int cpu = smp_processor_id();
ver = GET_APIC_VERSION(apic_read(APIC_LVR));
lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
if (!APIC_INTEGRATED(ver))
lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
+
+ if (cpu_isset(cpu, timer_bcast_ipi))
+ lvtt_value |= APIC_LVT_MASKED;
+
apic_write_around(APIC_LVTT, lvtt_value);
/*
@@ -1072,7 +1081,7 @@ void __devinit setup_secondary_APIC_clock(void)
setup_APIC_timer(calibration_result);
}
-void __devinit disable_APIC_timer(void)
+void disable_APIC_timer(void)
{
if (using_apic_timer) {
unsigned long v;
@@ -1084,7 +1093,10 @@ void __devinit disable_APIC_timer(void)
void enable_APIC_timer(void)
{
- if (using_apic_timer) {
+ int cpu = smp_processor_id();
+
+ if (using_apic_timer &&
+ !cpu_isset(cpu, timer_bcast_ipi)) {
unsigned long v;
v = apic_read(APIC_LVTT);
@@ -1092,33 +1104,31 @@ void enable_APIC_timer(void)
}
}
-/*
- * the frequency of the profiling timer can be changed
- * by writing a multiplier value into /proc/profile.
- */
-int setup_profiling_timer(unsigned int multiplier)
+void switch_APIC_timer_to_ipi(void *cpumask)
{
- int i;
+ cpumask_t mask = *(cpumask_t *)cpumask;
+ int cpu = smp_processor_id();
- /*
- * Sanity check. [at least 500 APIC cycles should be
- * between APIC interrupts as a rule of thumb, to avoid
- * irqs flooding us]
- */
- if ( (!multiplier) || (calibration_result/multiplier < 500))
- return -EINVAL;
-
- /*
- * Set the new multiplier for each CPU. CPUs don't start using the
- * new values until the next timer interrupt in which they do process
- * accounting. At that time they also adjust their APIC timers
- * accordingly.
- */
- for (i = 0; i < NR_CPUS; ++i)
- per_cpu(prof_multiplier, i) = multiplier;
+ if (cpu_isset(cpu, mask) &&
+ !cpu_isset(cpu, timer_bcast_ipi)) {
+ disable_APIC_timer();
+ cpu_set(cpu, timer_bcast_ipi);
+ }
+}
+EXPORT_SYMBOL(switch_APIC_timer_to_ipi);
- return 0;
+void switch_ipi_to_APIC_timer(void *cpumask)
+{
+ cpumask_t mask = *(cpumask_t *)cpumask;
+ int cpu = smp_processor_id();
+
+ if (cpu_isset(cpu, mask) &&
+ cpu_isset(cpu, timer_bcast_ipi)) {
+ cpu_clear(cpu, timer_bcast_ipi);
+ enable_APIC_timer();
+ }
}
+EXPORT_SYMBOL(switch_ipi_to_APIC_timer);
#undef APIC_DIVISOR
@@ -1134,32 +1144,10 @@ int setup_profiling_timer(unsigned int multiplier)
inline void smp_local_timer_interrupt(struct pt_regs * regs)
{
- int cpu = smp_processor_id();
-
profile_tick(CPU_PROFILING, regs);
- if (--per_cpu(prof_counter, cpu) <= 0) {
- /*
- * The multiplier may have changed since the last time we got
- * to this point as a result of the user writing to
- * /proc/profile. In this case we need to adjust the APIC
- * timer accordingly.
- *
- * Interrupts are already masked off at this point.
- */
- per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
- if (per_cpu(prof_counter, cpu) !=
- per_cpu(prof_old_multiplier, cpu)) {
- __setup_APIC_LVTT(
- calibration_result/
- per_cpu(prof_counter, cpu));
- per_cpu(prof_old_multiplier, cpu) =
- per_cpu(prof_counter, cpu);
- }
-
#ifdef CONFIG_SMP
- update_process_times(user_mode_vm(regs));
+ update_process_times(user_mode_vm(regs));
#endif
- }
/*
* We take the 'long' return path, and there every subsystem
@@ -1206,6 +1194,43 @@ fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
irq_exit();
}
+#ifndef CONFIG_SMP
+static void up_apic_timer_interrupt_call(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * the NMI deadlock-detector uses this.
+ */
+ per_cpu(irq_stat, cpu).apic_timer_irqs++;
+
+ smp_local_timer_interrupt(regs);
+}
+#endif
+
+void smp_send_timer_broadcast_ipi(struct pt_regs *regs)
+{
+ cpumask_t mask;
+
+ cpus_and(mask, cpu_online_map, timer_bcast_ipi);
+ if (!cpus_empty(mask)) {
+#ifdef CONFIG_SMP
+ send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+#else
+ /*
+ * We can directly call the apic timer interrupt handler
+ * in UP case. Minus all irq related functions
+ */
+ up_apic_timer_interrupt_call(regs);
+#endif
+ }
+}
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+ return -EINVAL;
+}
+
/*
* This interrupt should _never_ happen with our APIC/SMP architecture
*/
diff --git a/trunk/arch/i386/kernel/apm.c b/trunk/arch/i386/kernel/apm.c
index 9d8827156e54..05312a8abb8b 100644
--- a/trunk/arch/i386/kernel/apm.c
+++ b/trunk/arch/i386/kernel/apm.c
@@ -219,6 +219,7 @@
#include
#include
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/i386/kernel/cpu/amd.c b/trunk/arch/i386/kernel/cpu/amd.c
index e7697e077f6b..333578a4e91a 100644
--- a/trunk/arch/i386/kernel/cpu/amd.c
+++ b/trunk/arch/i386/kernel/cpu/amd.c
@@ -216,6 +216,12 @@ static void __init init_amd(struct cpuinfo_x86 *c)
c->x86_max_cores = 1;
}
+ if (cpuid_eax(0x80000000) >= 0x80000007) {
+ c->x86_power = cpuid_edx(0x80000007);
+ if (c->x86_power & (1<<8))
+ set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+ }
+
#ifdef CONFIG_X86_HT
/*
* On a AMD dual core setup the lower bits of the APIC id
@@ -233,6 +239,7 @@ static void __init init_amd(struct cpuinfo_x86 *c)
cpu, c->x86_max_cores, cpu_core_id[cpu]);
}
#endif
+
}
static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
diff --git a/trunk/arch/i386/kernel/cpu/common.c b/trunk/arch/i386/kernel/cpu/common.c
index 170400879f44..15aee26ec2b6 100644
--- a/trunk/arch/i386/kernel/cpu/common.c
+++ b/trunk/arch/i386/kernel/cpu/common.c
@@ -204,7 +204,10 @@ static int __devinit have_cpuid_p(void)
/* Do minimum CPU detection early.
Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
- The others are not touched to avoid unwanted side effects. */
+ The others are not touched to avoid unwanted side effects.
+
+ WARNING: this function is only called on the BP. Don't add code here
+ that is supposed to run on all CPUs. */
static void __init early_cpu_detect(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -236,12 +239,6 @@ static void __init early_cpu_detect(void)
if (cap0 & (1<<19))
c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
}
-
- early_intel_workaround(c);
-
-#ifdef CONFIG_X86_HT
- phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
-#endif
}
void __devinit generic_identify(struct cpuinfo_x86 * c)
@@ -289,6 +286,12 @@ void __devinit generic_identify(struct cpuinfo_x86 * c)
get_model_name(c); /* Default name */
}
}
+
+ early_intel_workaround(c);
+
+#ifdef CONFIG_X86_HT
+ phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
+#endif
}
static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
diff --git a/trunk/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/trunk/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index 871366b83b3f..7975e79d5fa4 100644
--- a/trunk/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/trunk/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -40,8 +40,6 @@
#include
#include
-#include "speedstep-est-common.h"
-
#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
@@ -367,6 +365,7 @@ acpi_cpufreq_cpu_init (
unsigned int cpu = policy->cpu;
struct cpufreq_acpi_io *data;
unsigned int result = 0;
+ struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
union acpi_object arg0 = {ACPI_TYPE_BUFFER};
u32 arg0_buf[3];
@@ -390,7 +389,7 @@ acpi_cpufreq_cpu_init (
if (result)
goto err_free;
- if (is_const_loops_cpu(cpu)) {
+ if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
}
diff --git a/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
index edb9873e27e3..9a826cde4fd1 100644
--- a/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -35,8 +35,6 @@
#include
#include
-#include "speedstep-est-common.h"
-
#define PFX "speedstep-centrino: "
#define MAINTAINER "Jeremy Fitzhardinge "
@@ -493,12 +491,13 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
unsigned l, h;
int ret;
int i;
+ struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
/* Only Intel makes Enhanced Speedstep-capable CPUs */
if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST))
return -ENODEV;
- if (is_const_loops_cpu(policy->cpu)) {
+ if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
centrino_driver.flags |= CPUFREQ_CONST_LOOPS;
}
diff --git a/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h b/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h
deleted file mode 100644
index 5ce995c9d866..000000000000
--- a/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Routines common for drivers handling Enhanced Speedstep Technology
- * Copyright (C) 2004 Venkatesh Pallipadi
- *
- * Licensed under the terms of the GNU GPL License version 2 -- see
- * COPYING for details.
- */
-
-static inline int is_const_loops_cpu(unsigned int cpu)
-{
- struct cpuinfo_x86 *c = cpu_data + cpu;
-
- if (c->x86_vendor != X86_VENDOR_INTEL || !cpu_has(c, X86_FEATURE_EST))
- return 0;
-
- /*
- * on P-4s, the TSC runs with constant frequency independent of cpu freq
- * when we use EST
- */
- if (c->x86 == 0xf)
- return 1;
-
- return 0;
-}
-
diff --git a/trunk/arch/i386/kernel/cpu/intel.c b/trunk/arch/i386/kernel/cpu/intel.c
index 5e2da704f0fa..8c0120186b9f 100644
--- a/trunk/arch/i386/kernel/cpu/intel.c
+++ b/trunk/arch/i386/kernel/cpu/intel.c
@@ -183,10 +183,13 @@ static void __devinit init_intel(struct cpuinfo_x86 *c)
}
#endif
- if (c->x86 == 15)
+ if (c->x86 == 15)
set_bit(X86_FEATURE_P4, c->x86_capability);
if (c->x86 == 6)
set_bit(X86_FEATURE_P3, c->x86_capability);
+ if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+ (c->x86 == 0x6 && c->x86_model >= 0x0e))
+ set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
}
diff --git a/trunk/arch/i386/kernel/cpu/mtrr/if.c b/trunk/arch/i386/kernel/cpu/mtrr/if.c
index cf39e205d33c..5ac051bb9d55 100644
--- a/trunk/arch/i386/kernel/cpu/mtrr/if.c
+++ b/trunk/arch/i386/kernel/cpu/mtrr/if.c
@@ -1,5 +1,6 @@
#include
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/i386/kernel/cpu/proc.c b/trunk/arch/i386/kernel/cpu/proc.c
index 6d91b274589c..89a85af33d28 100644
--- a/trunk/arch/i386/kernel/cpu/proc.c
+++ b/trunk/arch/i386/kernel/cpu/proc.c
@@ -29,7 +29,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
- NULL, "fxsr_opt", NULL, NULL, NULL, "lm", "3dnowext", "3dnow",
+ NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow",
/* Transmeta-defined */
"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
@@ -40,7 +40,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
/* Other (Linux-defined) */
"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ "constant_tsc", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -57,11 +57,21 @@ static int show_cpuinfo(struct seq_file *m, void *v)
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* AMD-defined (#2) */
- "lahf_lm", "cmp_legacy", NULL, NULL, NULL, NULL, NULL, NULL,
+ "lahf_lm", "cmp_legacy", "svm", NULL, "cr8legacy", NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
+ static char *x86_power_flags[] = {
+ "ts", /* temperature sensor */
+ "fid", /* frequency id control */
+ "vid", /* voltage id control */
+ "ttp", /* thermal trip */
+ "tm",
+ "stc",
+ NULL,
+ /* nothing */ /* constant_tsc - moved to flags */
+ };
struct cpuinfo_x86 *c = v;
int i, n = c - cpu_data;
int fpu_exception;
@@ -131,6 +141,17 @@ static int show_cpuinfo(struct seq_file *m, void *v)
x86_cap_flags[i] != NULL )
seq_printf(m, " %s", x86_cap_flags[i]);
+ for (i = 0; i < 32; i++)
+ if (c->x86_power & (1 << i)) {
+ if (i < ARRAY_SIZE(x86_power_flags) &&
+ x86_power_flags[i])
+ seq_printf(m, "%s%s",
+ x86_power_flags[i][0]?" ":"",
+ x86_power_flags[i]);
+ else
+ seq_printf(m, " [%d]", i);
+ }
+
seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n",
c->loops_per_jiffy/(500000/HZ),
(c->loops_per_jiffy/(5000/HZ)) % 100);
diff --git a/trunk/arch/i386/kernel/crash.c b/trunk/arch/i386/kernel/crash.c
index 0248e084017c..d49dbe8dc96b 100644
--- a/trunk/arch/i386/kernel/crash.c
+++ b/trunk/arch/i386/kernel/crash.c
@@ -25,7 +25,6 @@
#include
-note_buf_t crash_notes[NR_CPUS];
/* This keeps a track of which one is crashing cpu. */
static int crashing_cpu;
@@ -72,7 +71,9 @@ static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
* squirrelled away. ELF notes happen to provide
* all of that that no need to invent something new.
*/
- buf = &crash_notes[cpu][0];
+ buf = (u32*)per_cpu_ptr(crash_notes, cpu);
+ if (!buf)
+ return;
memset(&prstatus, 0, sizeof(prstatus));
prstatus.pr_pid = current->pid;
elf_core_copy_regs(&prstatus.pr_reg, regs);
@@ -81,51 +82,12 @@ static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
final_note(buf);
}
-static void crash_get_current_regs(struct pt_regs *regs)
-{
- __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx));
- __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx));
- __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx));
- __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi));
- __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi));
- __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp));
- __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax));
- __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp));
- __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss));
- __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs));
- __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds));
- __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes));
- __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags));
-
- regs->eip = (unsigned long)current_text_addr();
-}
-
-/* CPU does not save ss and esp on stack if execution is already
- * running in kernel mode at the time of NMI occurrence. This code
- * fixes it.
- */
-static void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs)
-{
- memcpy(newregs, oldregs, sizeof(*newregs));
- newregs->esp = (unsigned long)&(oldregs->esp);
- __asm__ __volatile__("xorl %eax, %eax;");
- __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(newregs->xss));
-}
-
-/* We may have saved_regs from where the error came from
- * or it is NULL if via a direct panic().
- */
-static void crash_save_self(struct pt_regs *saved_regs)
+static void crash_save_self(struct pt_regs *regs)
{
- struct pt_regs regs;
int cpu;
cpu = smp_processor_id();
- if (saved_regs)
- crash_setup_regs(®s, saved_regs);
- else
- crash_get_current_regs(®s);
- crash_save_this_cpu(®s, cpu);
+ crash_save_this_cpu(regs, cpu);
}
#ifdef CONFIG_SMP
@@ -144,7 +106,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
local_irq_disable();
if (!user_mode(regs)) {
- crash_setup_regs(&fixed_regs, regs);
+ crash_fixup_ss_esp(&fixed_regs, regs);
regs = &fixed_regs;
}
crash_save_this_cpu(regs, cpu);
diff --git a/trunk/arch/i386/kernel/crash_dump.c b/trunk/arch/i386/kernel/crash_dump.c
new file mode 100644
index 000000000000..3f532df488bc
--- /dev/null
+++ b/trunk/arch/i386/kernel/crash_dump.c
@@ -0,0 +1,74 @@
+/*
+ * kernel/crash_dump.c - Memory preserving reboot related code.
+ *
+ * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
+ * Copyright (C) IBM Corporation, 2004. All rights reserved
+ */
+
+#include
+#include
+#include
+
+#include
+
+static void *kdump_buf_page;
+
+/**
+ * copy_oldmem_page - copy one page from "oldmem"
+ * @pfn: page frame number to be copied
+ * @buf: target memory address for the copy; this can be in kernel address
+ * space or user address space (see @userbuf)
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page (based on pfn) to begin the copy
+ * @userbuf: if set, @buf is in user address space, use copy_to_user(),
+ * otherwise @buf is in kernel address space, use memcpy().
+ *
+ * Copy a page from "oldmem". For this page, there is no pte mapped
+ * in the current kernel. We stitch up a pte, similar to kmap_atomic.
+ *
+ * Calling copy_to_user() in atomic context is not desirable. Hence first
+ * copying the data to a pre-allocated kernel page and then copying to user
+ * space in non-atomic context.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+ size_t csize, unsigned long offset, int userbuf)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+
+ vaddr = kmap_atomic_pfn(pfn, KM_PTE0);
+
+ if (!userbuf) {
+ memcpy(buf, (vaddr + offset), csize);
+ kunmap_atomic(vaddr, KM_PTE0);
+ } else {
+ if (!kdump_buf_page) {
+ printk(KERN_WARNING "Kdump: Kdump buffer page not"
+ " allocated\n");
+ return -EFAULT;
+ }
+ copy_page(kdump_buf_page, vaddr);
+ kunmap_atomic(vaddr, KM_PTE0);
+ if (copy_to_user(buf, (kdump_buf_page + offset), csize))
+ return -EFAULT;
+ }
+
+ return csize;
+}
+
+static int __init kdump_buf_page_init(void)
+{
+ int ret = 0;
+
+ kdump_buf_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!kdump_buf_page) {
+ printk(KERN_WARNING "Kdump: Failed to allocate kdump buffer"
+ " page\n");
+ ret = -ENOMEM;
+ }
+
+ return ret;
+}
+arch_initcall(kdump_buf_page_init);
diff --git a/trunk/arch/i386/kernel/dmi_scan.c b/trunk/arch/i386/kernel/dmi_scan.c
index 58516e2ac172..6a93d75db431 100644
--- a/trunk/arch/i386/kernel/dmi_scan.c
+++ b/trunk/arch/i386/kernel/dmi_scan.c
@@ -4,7 +4,7 @@
#include
#include
#include
-
+#include
static char * __init dmi_string(struct dmi_header *dm, u8 s)
{
@@ -19,7 +19,7 @@ static char * __init dmi_string(struct dmi_header *dm, u8 s)
}
if (*bp != 0) {
- str = alloc_bootmem(strlen(bp) + 1);
+ str = dmi_alloc(strlen(bp) + 1);
if (str != NULL)
strcpy(str, bp);
else
@@ -40,7 +40,7 @@ static int __init dmi_table(u32 base, int len, int num,
u8 *buf, *data;
int i = 0;
- buf = bt_ioremap(base, len);
+ buf = dmi_ioremap(base, len);
if (buf == NULL)
return -1;
@@ -65,7 +65,7 @@ static int __init dmi_table(u32 base, int len, int num,
data += 2;
i++;
}
- bt_iounmap(buf, len);
+ dmi_iounmap(buf, len);
return 0;
}
@@ -112,7 +112,7 @@ static void __init dmi_save_devices(struct dmi_header *dm)
if ((*d & 0x80) == 0)
continue;
- dev = alloc_bootmem(sizeof(*dev));
+ dev = dmi_alloc(sizeof(*dev));
if (!dev) {
printk(KERN_ERR "dmi_save_devices: out of memory.\n");
break;
@@ -131,7 +131,7 @@ static void __init dmi_save_ipmi_device(struct dmi_header *dm)
struct dmi_device *dev;
void * data;
- data = alloc_bootmem(dm->length);
+ data = dmi_alloc(dm->length);
if (data == NULL) {
printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n");
return;
@@ -139,7 +139,7 @@ static void __init dmi_save_ipmi_device(struct dmi_header *dm)
memcpy(data, dm, dm->length);
- dev = alloc_bootmem(sizeof(*dev));
+ dev = dmi_alloc(sizeof(*dev));
if (!dev) {
printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n");
return;
@@ -221,7 +221,7 @@ void __init dmi_scan_machine(void)
}
}
-out: printk(KERN_INFO "DMI not present.\n");
+out: printk(KERN_INFO "DMI not present or invalid.\n");
}
diff --git a/trunk/arch/i386/kernel/io_apic.c b/trunk/arch/i386/kernel/io_apic.c
index 7554f8fd874a..f2dd218d88cb 100644
--- a/trunk/arch/i386/kernel/io_apic.c
+++ b/trunk/arch/i386/kernel/io_apic.c
@@ -1649,7 +1649,7 @@ static void __init enable_IO_APIC(void)
for(apic = 0; apic < nr_ioapics; apic++) {
int pin;
/* See if any of the pins is in ExtINT mode */
- for(pin = 0; pin < nr_ioapic_registers[i]; pin++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
struct IO_APIC_route_entry entry;
spin_lock_irqsave(&ioapic_lock, flags);
*(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
diff --git a/trunk/arch/i386/kernel/ioport.c b/trunk/arch/i386/kernel/ioport.c
index b59a34dbe262..79026f026b85 100644
--- a/trunk/arch/i386/kernel/ioport.c
+++ b/trunk/arch/i386/kernel/ioport.c
@@ -7,6 +7,7 @@
#include
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/i386/kernel/kprobes.c b/trunk/arch/i386/kernel/kprobes.c
index 19edcd526ba4..6483eeb1a4e8 100644
--- a/trunk/arch/i386/kernel/kprobes.c
+++ b/trunk/arch/i386/kernel/kprobes.c
@@ -57,14 +57,10 @@ static inline int is_IF_modifier(kprobe_opcode_t opcode)
}
int __kprobes arch_prepare_kprobe(struct kprobe *p)
-{
- return 0;
-}
-
-void __kprobes arch_copy_kprobe(struct kprobe *p)
{
memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
p->opcode = *p->addr;
+ return 0;
}
void __kprobes arch_arm_kprobe(struct kprobe *p)
@@ -81,10 +77,6 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
(unsigned long) p->addr + sizeof(kprobe_opcode_t));
}
-void __kprobes arch_remove_kprobe(struct kprobe *p)
-{
-}
-
static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
{
kcb->prev_kprobe.kp = kprobe_running();
@@ -196,6 +188,19 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
kcb->kprobe_status = KPROBE_REENTER;
return 1;
} else {
+ if (regs->eflags & VM_MASK) {
+ /* We are in virtual-8086 mode. Return 0 */
+ goto no_kprobe;
+ }
+ if (*addr != BREAKPOINT_INSTRUCTION) {
+ /* The breakpoint instruction was removed by
+ * another cpu right after we hit, no further
+ * handling of this interrupt is appropriate
+ */
+ regs->eip -= sizeof(kprobe_opcode_t);
+ ret = 1;
+ goto no_kprobe;
+ }
p = __get_cpu_var(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
goto ss_probe;
diff --git a/trunk/arch/i386/kernel/microcode.c b/trunk/arch/i386/kernel/microcode.c
index 165f13158c60..d3fdf0057d82 100644
--- a/trunk/arch/i386/kernel/microcode.c
+++ b/trunk/arch/i386/kernel/microcode.c
@@ -70,6 +70,7 @@
*/
//#define DEBUG /* pr_debug */
+#include
#include
#include
#include
@@ -165,7 +166,7 @@ static void collect_cpu_info (void *unused)
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* see notes above for revision 1.07. Apparent chip bug */
- serialize_cpu();
+ sync_core();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev);
pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
@@ -379,7 +380,7 @@ static void do_update_one (void * unused)
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* see notes above for revision 1.07. Apparent chip bug */
- serialize_cpu();
+ sync_core();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
diff --git a/trunk/arch/i386/kernel/process.c b/trunk/arch/i386/kernel/process.c
index 035928f3f6c1..2185377fdde1 100644
--- a/trunk/arch/i386/kernel/process.c
+++ b/trunk/arch/i386/kernel/process.c
@@ -424,18 +424,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
struct task_struct *tsk;
int err;
- childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
- /*
- * The below -8 is to reserve 8 bytes on top of the ring0 stack.
- * This is necessary to guarantee that the entire "struct pt_regs"
- * is accessable even if the CPU haven't stored the SS/ESP registers
- * on the stack (interrupt gate does not save these registers
- * when switching to the same priv ring).
- * Therefore beware: accessing the xss/esp fields of the
- * "struct pt_regs" is possible, but they may contain the
- * completely wrong values.
- */
- childregs = (struct pt_regs *) ((unsigned long) childregs - 8);
+ childregs = task_pt_regs(p);
*childregs = *regs;
childregs->eax = 0;
childregs->esp = esp;
@@ -540,12 +529,7 @@ EXPORT_SYMBOL(dump_thread);
*/
int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
{
- struct pt_regs ptregs;
-
- ptregs = *(struct pt_regs *)
- ((unsigned long)tsk->thread_info +
- /* see comments in copy_thread() about -8 */
- THREAD_SIZE - sizeof(ptregs) - 8);
+ struct pt_regs ptregs = *task_pt_regs(tsk);
ptregs.xcs &= 0xffff;
ptregs.xds &= 0xffff;
ptregs.xes &= 0xffff;
@@ -601,8 +585,8 @@ static inline void disable_tsc(struct task_struct *prev_p,
* gcc should eliminate the ->thread_info dereference if
* has_secure_computing returns 0 at compile time (SECCOMP=n).
*/
- prev = prev_p->thread_info;
- next = next_p->thread_info;
+ prev = task_thread_info(prev_p);
+ next = task_thread_info(next_p);
if (has_secure_computing(prev) || has_secure_computing(next)) {
/* slow path here */
@@ -787,7 +771,7 @@ unsigned long get_wchan(struct task_struct *p)
int count = 0;
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
- stack_page = (unsigned long)p->thread_info;
+ stack_page = (unsigned long)task_stack_page(p);
esp = p->thread.esp;
if (!stack_page || esp < stack_page || esp > top_esp+stack_page)
return 0;
diff --git a/trunk/arch/i386/kernel/reboot.c b/trunk/arch/i386/kernel/reboot.c
index 2fa5803a759d..d207242976d3 100644
--- a/trunk/arch/i386/kernel/reboot.c
+++ b/trunk/arch/i386/kernel/reboot.c
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -355,10 +356,10 @@ void machine_halt(void)
void machine_power_off(void)
{
- machine_shutdown();
-
- if (pm_power_off)
+ if (pm_power_off) {
+ machine_shutdown();
pm_power_off();
+ }
}
diff --git a/trunk/arch/i386/kernel/setup.c b/trunk/arch/i386/kernel/setup.c
index 27c956db0461..51e513b4f72d 100644
--- a/trunk/arch/i386/kernel/setup.c
+++ b/trunk/arch/i386/kernel/setup.c
@@ -45,6 +45,7 @@
#include
#include
#include
+#include
#include