diff --git a/[refs] b/[refs]
index 846dde5d8870..c9c36177ecfc 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
---
-refs/heads/master: ef38ff9d372d4fe69e415370939a0f1fb5783af1
+refs/heads/master: 8e8a4603b5422c9145880e73b23bc4c2c4de0098
diff --git a/trunk/Documentation/00-INDEX b/trunk/Documentation/00-INDEX
index f7923a42e769..e8fb24671967 100644
--- a/trunk/Documentation/00-INDEX
+++ b/trunk/Documentation/00-INDEX
@@ -167,8 +167,10 @@ highuid.txt
- notes on the change from 16 bit to 32 bit user/group IDs.
hpet.txt
- High Precision Event Timer Driver for Linux.
-timers/
- - info on the timer related topics
+hrtimer/
+ - info on the timer_stats debugging facility for timer (ab)use.
+hrtimers/
+ - info on the hrtimers subsystem for high-resolution kernel timers.
hw_random.txt
- info on Linux support for random number generator in i8xx chipsets.
hwmon/
diff --git a/trunk/Documentation/DocBook/Makefile b/trunk/Documentation/DocBook/Makefile
index e471bc466a7e..300e1707893f 100644
--- a/trunk/Documentation/DocBook/Makefile
+++ b/trunk/Documentation/DocBook/Makefile
@@ -9,7 +9,7 @@
DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
procfs-guide.xml writing_usb_driver.xml networking.xml \
- kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \
+ kernel-api.xml filesystems.xml lsm.xml usb.xml \
gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml
diff --git a/trunk/Documentation/DocBook/kgdb.tmpl b/trunk/Documentation/DocBook/kgdb.tmpl
deleted file mode 100644
index 97618bed4d65..000000000000
--- a/trunk/Documentation/DocBook/kgdb.tmpl
+++ /dev/null
@@ -1,447 +0,0 @@
-
-
-
-
-
- Using kgdb and the kgdb Internals
-
-
-
- Jason
- Wessel
-
-
- jason.wessel@windriver.com
-
-
-
-
-
-
-
- Tom
- Rini
-
-
- trini@kernel.crashing.org
-
-
-
-
-
-
-
- Amit S.
- Kale
-
-
- amitkale@linsyssoft.com
-
-
-
-
-
-
- 2008
- Wind River Systems, Inc.
-
-
- 2004-2005
- MontaVista Software, Inc.
-
-
- 2004
- Amit S. Kale
-
-
-
-
- This file is licensed under the terms of the GNU General Public License
- version 2. This program is licensed "as is" without any warranty of any
- kind, whether express or implied.
-
-
-
-
-
-
-
- Introduction
-
- kgdb is a source level debugger for linux kernel. It is used along
- with gdb to debug a linux kernel. The expectation is that gdb can
- be used to "break in" to the kernel to inspect memory, variables
- and look through a cal stack information similar to what an
- application developer would use gdb for. It is possible to place
- breakpoints in kernel code and perform some limited execution
- stepping.
-
-
- Two machines are required for using kgdb. One of these machines is a
- development machine and the other is a test machine. The kernel
- to be debugged runs on the test machine. The development machine
- runs an instance of gdb against the vmlinux file which contains
- the symbols (not boot image such as bzImage, zImage, uImage...).
- In gdb the developer specifies the connection parameters and
- connects to kgdb. Depending on which kgdb I/O modules exist in
- the kernel for a given architecture, it may be possible to debug
- the test machine's kernel with the development machine using a
- rs232 or ethernet connection.
-
-
-
- Compiling a kernel
-
- To enable CONFIG_KGDB, look under the "Kernel debugging"
- and then select "KGDB: kernel debugging with remote gdb".
-
-
- Next you should choose one of more I/O drivers to interconnect debugging
- host and debugged target. Early boot debugging requires a KGDB
- I/O driver that supports early debugging and the driver must be
- built into the kernel directly. Kgdb I/O driver configuration
- takes place via kernel or module parameters, see following
- chapter.
-
-
- The kgdb test compile options are described in the kgdb test suite chapter.
-
-
-
-
- Enable kgdb for debugging
-
- In order to use kgdb you must activate it by passing configuration
- information to one of the kgdb I/O drivers. If you do not pass any
- configuration information kgdb will not do anything at all. Kgdb
- will only actively hook up to the kernel trap hooks if a kgdb I/O
- driver is loaded and configured. If you unconfigure a kgdb I/O
- driver, kgdb will unregister all the kernel hook points.
-
-
- All drivers can be reconfigured at run time, if
- CONFIG_SYSFS and CONFIG_MODULES
- are enabled, by echo'ing a new config string to
- /sys/module/<driver>/parameter/<option>.
- The driver can be unconfigured by passing an empty string. You cannot
- change the configuration while the debugger is attached. Make sure
- to detach the debugger with the detach command
- prior to trying unconfigure a kgdb I/O driver.
-
-
- Kernel parameter: kgdbwait
-
- The Kernel command line option kgdbwait makes
- kgdb wait for a debugger connection during booting of a kernel. You
- can only use this option you compiled a kgdb I/O driver into the
- kernel and you specified the I/O driver configuration as a kernel
- command line option. The kgdbwait parameter should always follow the
- configuration parameter for the kgdb I/O driver in the kernel
- command line else the I/O driver will not be configured prior to
- asking the kernel to use it to wait.
-
-
- The kernel will stop and wait as early as the I/O driver and
- architecture will allow when you use this option. If you build the
- kgdb I/O driver as a kernel module kgdbwait will not do anything.
-
-
-
- Kernel parameter: kgdboc
-
- The kgdboc driver was originally an abbreviation meant to stand for
- "kgdb over console". Kgdboc is designed to work with a single
- serial port. It was meant to cover the circumstance
- where you wanted to use a serial console as your primary console as
- well as using it to perform kernel debugging. Of course you can
- also use kgdboc without assigning a console to the same port.
-
-
- Using kgdboc
-
- You can configure kgdboc via sysfs or a module or kernel boot line
- parameter depending on if you build with CONFIG_KGDBOC as a module
- or built-in.
-
- From the module load or build-in
- kgdboc=<tty-device>,[baud]
-
- The example here would be if your console port was typically ttyS0, you would use something like kgdboc=ttyS0,115200 or on the ARM Versatile AB you would likely use kgdboc=ttyAMA0,115200
-
-
- From sysfs
- echo ttyS0 > /sys/module/kgdboc/parameters/kgdboc
-
-
-
-
- NOTE: Kgdboc does not support interrupting the target via the
- gdb remote protocol. You must manually send a sysrq-g unless you
- have a proxy that splits console output to a terminal problem and
- has a separate port for the debugger to connect to that sends the
- sysrq-g for you.
-
- When using kgdboc with no debugger proxy, you can end up
- connecting the debugger for one of two entry points. If an
- exception occurs after you have loaded kgdboc a message should print
- on the console stating it is waiting for the debugger. In case you
- disconnect your terminal program and then connect the debugger in
- its place. If you want to interrupt the target system and forcibly
- enter a debug session you have to issue a Sysrq sequence and then
- type the letter g. Then you disconnect the
- terminal session and connect gdb. Your options if you don't like
- this are to hack gdb to send the sysrq-g for you as well as on the
- initial connect, or to use a debugger proxy that allows an
- unmodified gdb to do the debugging.
-
-
-
-
- Kernel parameter: kgdbcon
-
- Kgdb supports using the gdb serial protocol to send console messages
- to the debugger when the debugger is connected and running. There
- are two ways to activate this feature.
-
- Activate with the kernel command line option:
- kgdbcon
-
- Use sysfs before configuring an io driver
-
- echo 1 > /sys/module/kgdb/parameters/kgdb_use_con
-
-
- NOTE: If you do this after you configure the kgdb I/O driver, the
- setting will not take effect until the next point the I/O is
- reconfigured.
-
-
-
-
-
- IMPORTANT NOTE: Using this option with kgdb over the console
- (kgdboc) or kgdb over ethernet (kgdboe) is not supported.
-
-
-
-
- Connecting gdb
-
- If you are using kgdboc, you need to have used kgdbwait as a boot
- argument, issued a sysrq-g, or the system you are going to debug
- has already taken an exception and is waiting for the debugger to
- attach before you can connect gdb.
-
-
- If you are not using different kgdb I/O driver other than kgdboc,
- you should be able to connect and the target will automatically
- respond.
-
-
- Example (using a serial port):
-
-
- % gdb ./vmlinux
- (gdb) set remotebaud 115200
- (gdb) target remote /dev/ttyS0
-
-
- Example (kgdb to a terminal server):
-
-
- % gdb ./vmlinux
- (gdb) target remote udp:192.168.2.2:6443
-
-
- Example (kgdb over ethernet):
-
-
- % gdb ./vmlinux
- (gdb) target remote udp:192.168.2.2:6443
-
-
- Once connected, you can debug a kernel the way you would debug an
- application program.
-
-
- If you are having problems connecting or something is going
- seriously wrong while debugging, it will most often be the case
- that you want to enable gdb to be verbose about its target
- communications. You do this prior to issuing the target
- remote command by typing in: set remote debug 1
-
-
-
- kgdb Test Suite
-
- When kgdb is enabled in the kernel config you can also elect to
- enable the config parameter KGDB_TESTS. Turning this on will
- enable a special kgdb I/O module which is designed to test the
- kgdb internal functions.
-
-
- The kgdb tests are mainly intended for developers to test the kgdb
- internals as well as a tool for developing a new kgdb architecture
- specific implementation. These tests are not really for end users
- of the Linux kernel. The primary source of documentation would be
- to look in the drivers/misc/kgdbts.c file.
-
-
- The kgdb test suite can also be configured at compile time to run
- the core set of tests by setting the kernel config parameter
- KGDB_TESTS_ON_BOOT. This particular option is aimed at automated
- regression testing and does not require modifying the kernel boot
- config arguments. If this is turned on, the kgdb test suite can
- be disabled by specifying "kgdbts=" as a kernel boot argument.
-
-
-
- KGDB Internals
-
- Architecture Specifics
-
- Kgdb is organized into three basic components:
-
- kgdb core
-
- The kgdb core is found in kernel/kgdb.c. It contains:
-
- All the logic to implement the gdb serial protocol
- A generic OS exception handler which includes sync'ing the processors into a stopped state on an multi cpu system.
- The API to talk to the kgdb I/O drivers
- The API to make calls to the arch specific kgdb implementation
- The logic to perform safe memory reads and writes to memory while using the debugger
- A full implementation for software breakpoints unless overridden by the arch
-
-
-
- kgdb arch specific implementation
-
- This implementation is generally found in arch/*/kernel/kgdb.c.
- As an example, arch/x86/kernel/kgdb.c contains the specifics to
- implement HW breakpoint as well as the initialization to
- dynamically register and unregister for the trap handlers on
- this architecture. The arch specific portion implements:
-
- contains an arch specific trap catcher which
- invokes kgdb_handle_exception() to start kgdb about doing its
- work
- translation to and from gdb specific packet format to pt_regs
- Registration and unregistration of architecture specific trap hooks
- Any special exception handling and cleanup
- NMI exception handling and cleanup
- (optional)HW breakpoints
-
-
-
- kgdb I/O driver
-
- Each kgdb I/O driver has to provide an implemenation for the following:
-
- configuration via builtin or module
- dynamic configuration and kgdb hook registration calls
- read and write character interface
- A cleanup handler for unconfiguring from the kgdb core
- (optional) Early debug methodology
-
- Any given kgdb I/O driver has to operate very closely with the
- hardware and must do it in such a way that does not enable
- interrupts or change other parts of the system context without
- completely restoring them. The kgdb core will repeatedly "poll"
- a kgdb I/O driver for characters when it needs input. The I/O
- driver is expected to return immediately if there is no data
- available. Doing so allows for the future possibility to touch
- watch dog hardware in such a way as to have a target system not
- reset when these are enabled.
-
-
-
-
-
- If you are intent on adding kgdb architecture specific support
- for a new architecture, the architecture should define
- HAVE_ARCH_KGDB in the architecture specific
- Kconfig file. This will enable kgdb for the architecture, and
- at that point you must create an architecture specific kgdb
- implementation.
-
-
- There are a few flags which must be set on every architecture in
- their <asm/kgdb.h> file. These are:
-
-
-
- NUMREGBYTES: The size in bytes of all of the registers, so
- that we can ensure they will all fit into a packet.
-
-
- BUFMAX: The size in bytes of the buffer GDB will read into.
- This must be larger than NUMREGBYTES.
-
-
- CACHE_FLUSH_IS_SAFE: Set to 1 if it is always safe to call
- flush_cache_range or flush_icache_range. On some architectures,
- these functions may not be safe to call on SMP since we keep other
- CPUs in a holding pattern.
-
-
-
-
-
- There are also the following functions for the common backend,
- found in kernel/kgdb.c, that must be supplied by the
- architecture-specific backend unless marked as (optional), in
- which case a default function maybe used if the architecture
- does not need to provide a specific implementation.
-
-!Iinclude/linux/kgdb.h
-
-
- kgdboc internals
-
- The kgdboc driver is actually a very thin driver that relies on the
- underlying low level to the hardware driver having "polling hooks"
- which the to which the tty driver is attached. In the initial
- implementation of kgdboc it the serial_core was changed to expose a
- low level uart hook for doing polled mode reading and writing of a
- single character while in an atomic context. When kgdb makes an I/O
- request to the debugger, kgdboc invokes a call back in the serial
- core which in turn uses the call back in the uart driver. It is
- certainly possible to extend kgdboc to work with non-uart based
- consoles in the future.
-
-
- When using kgdboc with a uart, the uart driver must implement two callbacks in the struct uart_ops. Example from drivers/8250.c:
-#ifdef CONFIG_CONSOLE_POLL
- .poll_get_char = serial8250_get_poll_char,
- .poll_put_char = serial8250_put_poll_char,
-#endif
-
- Any implementation specifics around creating a polling driver use the
- #ifdef CONFIG_CONSOLE_POLL, as shown above.
- Keep in mind that polling hooks have to be implemented in such a way
- that they can be called from an atomic context and have to restore
- the state of the uart chip on return such that the system can return
- to normal when the debugger detaches. You need to be very careful
- with any kind of lock you consider, because failing here is most
- going to mean pressing the reset button.
-
-
-
-
- Credits
-
- The following people have contributed to this document:
-
- Amit Kaleamitkale@linsyssoft.com
- Tom Rinitrini@kernel.crashing.org
-
- In March 2008 this document was completely rewritten by:
-
- Jason Wesseljason.wessel@windriver.com
-
-
-
-
-
diff --git a/trunk/Documentation/feature-removal-schedule.txt b/trunk/Documentation/feature-removal-schedule.txt
index 164c89394cff..bf0e3df8e7a1 100644
--- a/trunk/Documentation/feature-removal-schedule.txt
+++ b/trunk/Documentation/feature-removal-schedule.txt
@@ -212,7 +212,7 @@ Who: Stephen Hemminger
---------------------------
What: i386/x86_64 bzImage symlinks
-When: April 2010
+When: April 2008
Why: The i386/x86_64 merge provides a symlink to the old bzImage
location so not yet updated user space tools, e.g. package
diff --git a/trunk/Documentation/filesystems/xfs.txt b/trunk/Documentation/filesystems/xfs.txt
index 0a1668ba2600..74aeb142ae5f 100644
--- a/trunk/Documentation/filesystems/xfs.txt
+++ b/trunk/Documentation/filesystems/xfs.txt
@@ -52,15 +52,16 @@ When mounting an XFS filesystem, the following options are accepted.
and also gets the setgid bit set if it is a directory itself.
ihashsize=value
- In memory inode hashes have been removed, so this option has
- no function as of August 2007. Option is deprecated.
+ Sets the number of hash buckets available for hashing the
+ in-memory inodes of the specified mount point. If a value
+ of zero is used, the value selected by the default algorithm
+ will be displayed in /proc/mounts.
ikeep/noikeep
- When ikeep is specified, XFS does not delete empty inode clusters
- and keeps them around on disk. ikeep is the traditional XFS
- behaviour. When noikeep is specified, empty inode clusters
- are returned to the free space pool. The default is noikeep for
- non-DMAPI mounts, while ikeep is the default when DMAPI is in use.
+ When inode clusters are emptied of inodes, keep them around
+ on the disk (ikeep) - this is the traditional XFS behaviour
+ and is still the default for now. Using the noikeep option,
+ inode clusters are returned to the free space pool.
inode64
Indicates that XFS is allowed to create inodes at any location
diff --git a/trunk/Documentation/timers/timer_stats.txt b/trunk/Documentation/hrtimer/timer_stats.txt
similarity index 100%
rename from trunk/Documentation/timers/timer_stats.txt
rename to trunk/Documentation/hrtimer/timer_stats.txt
diff --git a/trunk/Documentation/timers/highres.txt b/trunk/Documentation/hrtimers/highres.txt
similarity index 100%
rename from trunk/Documentation/timers/highres.txt
rename to trunk/Documentation/hrtimers/highres.txt
diff --git a/trunk/Documentation/timers/hrtimers.txt b/trunk/Documentation/hrtimers/hrtimers.txt
similarity index 100%
rename from trunk/Documentation/timers/hrtimers.txt
rename to trunk/Documentation/hrtimers/hrtimers.txt
diff --git a/trunk/Documentation/i386/boot.txt b/trunk/Documentation/i386/boot.txt
index 2eb16100bb3f..fc49b79bc1ab 100644
--- a/trunk/Documentation/i386/boot.txt
+++ b/trunk/Documentation/i386/boot.txt
@@ -170,8 +170,6 @@ Offset Proto Name Meaning
0238/4 2.06+ cmdline_size Maximum size of the kernel command line
023C/4 2.07+ hardware_subarch Hardware subarchitecture
0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data
-0248/4 2.08+ payload_offset Offset of kernel payload
-024C/4 2.08+ payload_length Length of kernel payload
(1) For backwards compatibility, if the setup_sects field contains 0, the
real value is 4.
@@ -514,32 +512,6 @@ Protocol: 2.07+
A pointer to data that is specific to hardware subarch
-Field name: payload_offset
-Type: read
-Offset/size: 0x248/4
-Protocol: 2.08+
-
- If non-zero then this field contains the offset from the end of the
- real-mode code to the payload.
-
- The payload may be compressed. The format of both the compressed and
- uncompressed data should be determined using the standard magic
- numbers. Currently only gzip compressed ELF is used.
-
-Field name: payload_length
-Type: read
-Offset/size: 0x24c/4
-Protocol: 2.08+
-
- The length of the payload.
-
-**** THE IMAGE CHECKSUM
-
-From boot protocol version 2.08 onwards the CRC-32 is calculated over
-the entire file using the characteristic polynomial 0x04C11DB7 and an
-initial remainder of 0xffffffff. The checksum is appended to the
-file; therefore the CRC of the file up to the limit specified in the
-syssize field of the header is always 0.
**** THE KERNEL COMMAND LINE
diff --git a/trunk/Documentation/ide/ide.txt b/trunk/Documentation/ide/ide.txt
index 486c699f4aea..818676aad45a 100644
--- a/trunk/Documentation/ide/ide.txt
+++ b/trunk/Documentation/ide/ide.txt
@@ -71,6 +71,29 @@ This driver automatically probes for most IDE interfaces (including all PCI
ones), for the drives/geometries attached to those interfaces, and for the IRQ
lines being used by the interfaces (normally 14, 15 for ide0/ide1).
+For special cases, interfaces may be specified using kernel "command line"
+options. For example,
+
+ ide3=0x168,0x36e,10 /* ioports 0x168-0x16f,0x36e, irq 10 */
+
+Normally the irq number need not be specified, as ide.c will probe for it:
+
+ ide3=0x168,0x36e /* ioports 0x168-0x16f,0x36e */
+
+The standard port, and irq values are these:
+
+ ide0=0x1f0,0x3f6,14
+ ide1=0x170,0x376,15
+ ide2=0x1e8,0x3ee,11
+ ide3=0x168,0x36e,10
+
+Note that the first parameter reserves 8 contiguous ioports, whereas the
+second value denotes a single ioport. If in doubt, do a 'cat /proc/ioports'.
+
+In all probability the device uses these ports and IRQs if it is attached
+to the appropriate ide channel. Pass the parameter for the correct ide
+channel to the kernel, as explained above.
+
Any number of interfaces may share a single IRQ if necessary, at a slight
performance penalty, whether on separate cards or a single VLB card.
The IDE driver automatically detects and handles this. However, this may
@@ -161,6 +184,13 @@ provided it is mounted with the default block size of 1024 (as above).
Please pass on any feedback on any of this stuff to the maintainer,
whose address can be found in linux/MAINTAINERS.
+Note that if BOTH hd.c and ide.c are configured into the kernel,
+hd.c will normally be allowed to control the primary IDE interface.
+This is useful for older hardware that may be incompatible with ide.c,
+and still allows newer hardware to run on the 2nd/3rd/4th IDE ports
+under control of ide.c. To have ide.c also "take over" the primary
+IDE port in this situation, use the "command line" parameter: ide0=0x1f0
+
The IDE driver is modularized. The high level disk/CD-ROM/tape/floppy
drivers can always be compiled as loadable modules, the chipset drivers
can only be compiled into the kernel, and the core code (ide.c) can be
@@ -176,7 +206,7 @@ When ide.c is used as a module, you can pass command line parameters to the
driver using the "options=" keyword to insmod, while replacing any ',' with
';'. For example:
- insmod ide.o options="hda=nodma hdb=nodma"
+ insmod ide.o options="ide0=serialize ide1=serialize ide2=0x1e8;0x3ee;11"
================================================================================
@@ -217,11 +247,21 @@ Summary of ide driver parameters for kernel command line
As for VLB, it is safest to not specify it.
Bigger values are safer than smaller ones.
+ "idex=base" : probe for an interface at the addr specified,
+ where "base" is usually 0x1f0 or 0x170
+ and "ctl" is assumed to be "base"+0x206
+
+ "idex=base,ctl" : specify both base and ctl
+
+ "idex=base,ctl,irq" : specify base, ctl, and irq number
+
"idex=serialize" : do not overlap operations on idex. Please note
that you will have to specify this option for
both the respective primary and secondary channel
to take effect.
+ "idex=four" : four drives on idex and ide(x^1) share same ports
+
"idex=reset" : reset interface after probe
"idex=ata66" : informs the interface that it has an 80c cable
@@ -229,6 +269,8 @@ Summary of ide driver parameters for kernel command line
ability to bit test for detection is currently
unknown.
+ "ide=reverse" : formerly called to pci sub-system, but now local.
+
"ide=doubler" : probe/support IDE doublers on Amiga
There may be more options than shown -- use the source, Luke!
@@ -248,9 +290,6 @@ Also for legacy CMD640 host driver (cmd640) you need to use "probe_vlb"
kernel paremeter to enable probing for VLB version of the chipset (PCI ones
are detected automatically).
-You also need to use "probe" kernel parameter for ide-4drives driver
-(support for IDE generic chipset with four drives on one port).
-
================================================================================
Some Terminology
diff --git a/trunk/Documentation/ide/warm-plug-howto.txt b/trunk/Documentation/ide/warm-plug-howto.txt
deleted file mode 100644
index d5885468b072..000000000000
--- a/trunk/Documentation/ide/warm-plug-howto.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-
-IDE warm-plug HOWTO
-===================
-
-To warm-plug devices on a port 'idex':
-
-# echo -n "1" > /sys/class/ide_port/idex/delete_devices
-
-unplug old device(s) and plug new device(s)
-
-# echo -n "1" > /sys/class/ide_port/idex/scan
-
-done
diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt
index 256a2162503c..dafd001bf833 100644
--- a/trunk/Documentation/kernel-parameters.txt
+++ b/trunk/Documentation/kernel-parameters.txt
@@ -763,11 +763,11 @@ and is between 256 and 4096 characters. It is defined in the file
Format: [,[,[,]]]
ide= [HW] (E)IDE subsystem
- Format: ide=nodma or ide=doubler
+ Format: ide=nodma or ide=doubler or ide=reverse
See Documentation/ide/ide.txt.
ide?= [HW] (E)IDE subsystem
- Format: ide?=ata66 or chipset specific parameters.
+ Format: ide?=noprobe or chipset specific parameters.
See Documentation/ide/ide.txt.
idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed
@@ -812,19 +812,6 @@ and is between 256 and 4096 characters. It is defined in the file
inttest= [IA64]
- iommu= [x86]
- off
- force
- noforce
- biomerge
- panic
- nopanic
- merge
- nomerge
- forcesac
- soft
-
-
intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option
off
Disable intel iommu driver.
@@ -941,11 +928,6 @@ and is between 256 and 4096 characters. It is defined in the file
kstack=N [X86-32,X86-64] Print N words from the kernel stack
in oops dumps.
- kgdboc= [HW] kgdb over consoles.
- Requires a tty driver that supports console polling.
- (only serial suported for now)
- Format: [,baud]
-
l2cr= [PPC]
lapic [X86-32,APIC] Enable the local APIC even if BIOS
@@ -1152,11 +1134,6 @@ and is between 256 and 4096 characters. It is defined in the file
or
memmap=0x10000$0x18690000
- memtest= [KNL,X86_64] Enable memtest
- Format:
- range: 0,4 : pattern number
- default : 0
-
meye.*= [HW] Set MotionEye Camera parameters
See Documentation/video4linux/meye.txt.
@@ -1362,10 +1339,6 @@ and is between 256 and 4096 characters. It is defined in the file
nowb [ARM]
- nptcg= [IA64] Override max number of concurrent global TLB
- purges which is reported from either PAL_VM_SUMMARY or
- SAL PALO.
-
numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
one of ['zone', 'node', 'default'] can be specified
This can be set from sysctl after boot.
diff --git a/trunk/Documentation/s390/s390dbf.txt b/trunk/Documentation/s390/s390dbf.txt
index e05420973698..0eb7c58916de 100644
--- a/trunk/Documentation/s390/s390dbf.txt
+++ b/trunk/Documentation/s390/s390dbf.txt
@@ -115,27 +115,6 @@ Return Value: Handle for generated debug area
Description: Allocates memory for a debug log
Must not be called within an interrupt handler
-----------------------------------------------------------------------------
-debug_info_t *debug_register_mode(char *name, int pages, int nr_areas,
- int buf_size, mode_t mode, uid_t uid,
- gid_t gid);
-
-Parameter: name: Name of debug log (e.g. used for debugfs entry)
- pages: Number of pages, which will be allocated per area
- nr_areas: Number of debug areas
- buf_size: Size of data area in each debug entry
- mode: File mode for debugfs files. E.g. S_IRWXUGO
- uid: User ID for debugfs files. Currently only 0 is
- supported.
- gid: Group ID for debugfs files. Currently only 0 is
- supported.
-
-Return Value: Handle for generated debug area
- NULL if register failed
-
-Description: Allocates memory for a debug log
- Must not be called within an interrupt handler
-
---------------------------------------------------------------------------
void debug_unregister (debug_info_t * id);
diff --git a/trunk/Documentation/x86/pat.txt b/trunk/Documentation/x86/pat.txt
deleted file mode 100644
index 17965f927c15..000000000000
--- a/trunk/Documentation/x86/pat.txt
+++ /dev/null
@@ -1,100 +0,0 @@
-
-PAT (Page Attribute Table)
-
-x86 Page Attribute Table (PAT) allows for setting the memory attribute at the
-page level granularity. PAT is complementary to the MTRR settings which allows
-for setting of memory types over physical address ranges. However, PAT is
-more flexible than MTRR due to its capability to set attributes at page level
-and also due to the fact that there are no hardware limitations on number of
-such attribute settings allowed. Added flexibility comes with guidelines for
-not having memory type aliasing for the same physical memory with multiple
-virtual addresses.
-
-PAT allows for different types of memory attributes. The most commonly used
-ones that will be supported at this time are Write-back, Uncached,
-Write-combined and Uncached Minus.
-
-There are many different APIs in the kernel that allows setting of memory
-attributes at the page level. In order to avoid aliasing, these interfaces
-should be used thoughtfully. Below is a table of interfaces available,
-their intended usage and their memory attribute relationships. Internally,
-these APIs use a reserve_memtype()/free_memtype() interface on the physical
-address range to avoid any aliasing.
-
-
--------------------------------------------------------------------
-API | RAM | ACPI,... | Reserved/Holes |
------------------------|----------|------------|------------------|
- | | | |
-ioremap | -- | UC | UC |
- | | | |
-ioremap_cache | -- | WB | WB |
- | | | |
-ioremap_nocache | -- | UC | UC |
- | | | |
-ioremap_wc | -- | -- | WC |
- | | | |
-set_memory_uc | UC | -- | -- |
- set_memory_wb | | | |
- | | | |
-set_memory_wc | WC | -- | -- |
- set_memory_wb | | | |
- | | | |
-pci sysfs resource | -- | -- | UC |
- | | | |
-pci sysfs resource_wc | -- | -- | WC |
- is IORESOURCE_PREFETCH| | | |
- | | | |
-pci proc | -- | -- | UC |
- !PCIIOC_WRITE_COMBINE | | | |
- | | | |
-pci proc | -- | -- | WC |
- PCIIOC_WRITE_COMBINE | | | |
- | | | |
-/dev/mem | -- | UC | UC |
- read-write | | | |
- | | | |
-/dev/mem | -- | UC | UC |
- mmap SYNC flag | | | |
- | | | |
-/dev/mem | -- | WB/WC/UC | WB/WC/UC |
- mmap !SYNC flag | |(from exist-| (from exist- |
- and | | ing alias)| ing alias) |
- any alias to this area| | | |
- | | | |
-/dev/mem | -- | WB | WB |
- mmap !SYNC flag | | | |
- no alias to this area | | | |
- and | | | |
- MTRR says WB | | | |
- | | | |
-/dev/mem | -- | -- | UC_MINUS |
- mmap !SYNC flag | | | |
- no alias to this area | | | |
- and | | | |
- MTRR says !WB | | | |
- | | | |
--------------------------------------------------------------------
-
-Notes:
-
--- in the above table mean "Not suggested usage for the API". Some of the --'s
-are strictly enforced by the kernel. Some others are not really enforced
-today, but may be enforced in future.
-
-For ioremap and pci access through /sys or /proc - The actual type returned
-can be more restrictive, in case of any existing aliasing for that address.
-For example: If there is an existing uncached mapping, a new ioremap_wc can
-return uncached mapping in place of write-combine requested.
-
-set_memory_[uc|wc] and set_memory_wb should be used in pairs, where driver will
-first make a region uc or wc and switch it back to wb after use.
-
-Over time writes to /proc/mtrr will be deprecated in favor of using PAT based
-interfaces. Users writing to /proc/mtrr are suggested to use above interfaces.
-
-Drivers should use ioremap_[uc|wc] to access PCI BARs with [uc|wc] access
-types.
-
-Drivers should use set_memory_[uc|wc] to set access type for RAM ranges.
-
diff --git a/trunk/Documentation/x86_64/boot-options.txt b/trunk/Documentation/x86_64/boot-options.txt
index b0c7b6c4abda..34abae4e9442 100644
--- a/trunk/Documentation/x86_64/boot-options.txt
+++ b/trunk/Documentation/x86_64/boot-options.txt
@@ -307,8 +307,3 @@ Debugging
stuck (default)
Miscellaneous
-
- nogbpages
- Do not use GB pages for kernel direct mappings.
- gbpages
- Use GB pages for kernel direct mappings.
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
index 3eceebb48c92..e46775868019 100644
--- a/trunk/MAINTAINERS
+++ b/trunk/MAINTAINERS
@@ -2319,12 +2319,6 @@ L: linux-kernel@vger.kernel.org
L: kexec@lists.infradead.org
S: Maintained
-KGDB
-P: Jason Wessel
-M: jason.wessel@windriver.com
-L: kgdb-bugreport@lists.sourceforge.net
-S: Maintained
-
KPROBES
P: Ananth N Mavinakayanahalli
M: ananth@in.ibm.com
diff --git a/trunk/arch/alpha/kernel/Makefile b/trunk/arch/alpha/kernel/Makefile
index ac706c1d7ada..dccf05245d4d 100644
--- a/trunk/arch/alpha/kernel/Makefile
+++ b/trunk/arch/alpha/kernel/Makefile
@@ -7,7 +7,7 @@ EXTRA_AFLAGS := $(KBUILD_CFLAGS)
EXTRA_CFLAGS := -Werror -Wno-sign-compare
obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \
- irq_alpha.o signal.o setup.o ptrace.o time.o \
+ irq_alpha.o signal.o setup.o ptrace.o time.o semaphore.o \
alpha_ksyms.o systbls.o err_common.o io.o
obj-$(CONFIG_VGA_HOSE) += console.o
diff --git a/trunk/arch/alpha/kernel/alpha_ksyms.c b/trunk/arch/alpha/kernel/alpha_ksyms.c
index d96e742d4dc2..e9762a33b043 100644
--- a/trunk/arch/alpha/kernel/alpha_ksyms.c
+++ b/trunk/arch/alpha/kernel/alpha_ksyms.c
@@ -77,6 +77,15 @@ EXPORT_SYMBOL(__do_clear_user);
EXPORT_SYMBOL(__strncpy_from_user);
EXPORT_SYMBOL(__strnlen_user);
+/* Semaphore helper functions. */
+EXPORT_SYMBOL(__down_failed);
+EXPORT_SYMBOL(__down_failed_interruptible);
+EXPORT_SYMBOL(__up_wakeup);
+EXPORT_SYMBOL(down);
+EXPORT_SYMBOL(down_interruptible);
+EXPORT_SYMBOL(down_trylock);
+EXPORT_SYMBOL(up);
+
/*
* SMP-specific symbols.
*/
diff --git a/trunk/arch/alpha/kernel/semaphore.c b/trunk/arch/alpha/kernel/semaphore.c
new file mode 100644
index 000000000000..8d2982aa1b8d
--- /dev/null
+++ b/trunk/arch/alpha/kernel/semaphore.c
@@ -0,0 +1,224 @@
+/*
+ * Alpha semaphore implementation.
+ *
+ * (C) Copyright 1996 Linus Torvalds
+ * (C) Copyright 1999, 2000 Richard Henderson
+ */
+
+#include
+#include
+#include
+
+/*
+ * This is basically the PPC semaphore scheme ported to use
+ * the Alpha ll/sc sequences, so see the PPC code for
+ * credits.
+ */
+
+/*
+ * Atomically update sem->count.
+ * This does the equivalent of the following:
+ *
+ * old_count = sem->count;
+ * tmp = MAX(old_count, 0) + incr;
+ * sem->count = tmp;
+ * return old_count;
+ */
+static inline int __sem_update_count(struct semaphore *sem, int incr)
+{
+ long old_count, tmp = 0;
+
+ __asm__ __volatile__(
+ "1: ldl_l %0,%2\n"
+ " cmovgt %0,%0,%1\n"
+ " addl %1,%3,%1\n"
+ " stl_c %1,%2\n"
+ " beq %1,2f\n"
+ " mb\n"
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
+ : "Ir" (incr), "1" (tmp), "m" (sem->count));
+
+ return old_count;
+}
+
+/*
+ * Perform the "down" function. Return zero for semaphore acquired,
+ * return negative for signalled out of the function.
+ *
+ * If called from down, the return is ignored and the wait loop is
+ * not interruptible. This means that a task waiting on a semaphore
+ * using "down()" cannot be killed until someone does an "up()" on
+ * the semaphore.
+ *
+ * If called from down_interruptible, the return value gets checked
+ * upon return. If the return value is negative then the task continues
+ * with the negative value in the return register (it can be tested by
+ * the caller).
+ *
+ * Either form may be used in conjunction with "up()".
+ */
+
+void __sched
+__down_failed(struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): down failed(%p)\n",
+ tsk->comm, task_pid_nr(tsk), sem);
+#endif
+
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ wmb();
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ /*
+ * Try to get the semaphore. If the count is > 0, then we've
+ * got the semaphore; we decrement count and exit the loop.
+ * If the count is 0 or negative, we set it to -1, indicating
+ * that we are asleep, and then sleep.
+ */
+ while (__sem_update_count(sem, -1) <= 0) {
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+
+ /*
+ * If there are any more sleepers, wake one of them up so
+ * that it can either get the semaphore, or set count to -1
+ * indicating that there are still processes sleeping.
+ */
+ wake_up(&sem->wait);
+
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): down acquired(%p)\n",
+ tsk->comm, task_pid_nr(tsk), sem);
+#endif
+}
+
+int __sched
+__down_failed_interruptible(struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ long ret = 0;
+
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): down failed(%p)\n",
+ tsk->comm, task_pid_nr(tsk), sem);
+#endif
+
+ tsk->state = TASK_INTERRUPTIBLE;
+ wmb();
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ while (__sem_update_count(sem, -1) <= 0) {
+ if (signal_pending(current)) {
+ /*
+ * A signal is pending - give up trying.
+ * Set sem->count to 0 if it is negative,
+ * since we are no longer sleeping.
+ */
+ __sem_update_count(sem, 0);
+ ret = -EINTR;
+ break;
+ }
+ schedule();
+ set_task_state(tsk, TASK_INTERRUPTIBLE);
+ }
+
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+ wake_up(&sem->wait);
+
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): down %s(%p)\n",
+ current->comm, task_pid_nr(current),
+ (ret < 0 ? "interrupted" : "acquired"), sem);
+#endif
+ return ret;
+}
+
+void
+__up_wakeup(struct semaphore *sem)
+{
+ /*
+ * Note that we incremented count in up() before we came here,
+ * but that was ineffective since the result was <= 0, and
+ * any negative value of count is equivalent to 0.
+ * This ends up setting count to 1, unless count is now > 0
+ * (i.e. because some other cpu has called up() in the meantime),
+ * in which case we just increment count.
+ */
+ __sem_update_count(sem, 1);
+ wake_up(&sem->wait);
+}
+
+void __sched
+down(struct semaphore *sem)
+{
+#ifdef WAITQUEUE_DEBUG
+ CHECK_MAGIC(sem->__magic);
+#endif
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): down(%p) from %p\n",
+ current->comm, task_pid_nr(current), sem,
+ atomic_read(&sem->count), __builtin_return_address(0));
+#endif
+ __down(sem);
+}
+
+int __sched
+down_interruptible(struct semaphore *sem)
+{
+#ifdef WAITQUEUE_DEBUG
+ CHECK_MAGIC(sem->__magic);
+#endif
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): down(%p) from %p\n",
+ current->comm, task_pid_nr(current), sem,
+ atomic_read(&sem->count), __builtin_return_address(0));
+#endif
+ return __down_interruptible(sem);
+}
+
+int
+down_trylock(struct semaphore *sem)
+{
+ int ret;
+
+#ifdef WAITQUEUE_DEBUG
+ CHECK_MAGIC(sem->__magic);
+#endif
+
+ ret = __down_trylock(sem);
+
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): down_trylock %s from %p\n",
+ current->comm, task_pid_nr(current),
+ ret ? "failed" : "acquired",
+ __builtin_return_address(0));
+#endif
+
+ return ret;
+}
+
+void
+up(struct semaphore *sem)
+{
+#ifdef WAITQUEUE_DEBUG
+ CHECK_MAGIC(sem->__magic);
+#endif
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): up(%p) from %p\n",
+ current->comm, task_pid_nr(current), sem,
+ atomic_read(&sem->count), __builtin_return_address(0));
+#endif
+ __up(sem);
+}
diff --git a/trunk/arch/arm/kernel/Makefile b/trunk/arch/arm/kernel/Makefile
index 6235f72a14f0..00d44c6fbfe9 100644
--- a/trunk/arch/arm/kernel/Makefile
+++ b/trunk/arch/arm/kernel/Makefile
@@ -7,7 +7,7 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
# Object file lists.
obj-y := compat.o entry-armv.o entry-common.o irq.o \
- process.o ptrace.o setup.o signal.o \
+ process.o ptrace.o semaphore.o setup.o signal.o \
sys_arm.o stacktrace.o time.o traps.o
obj-$(CONFIG_ISA_DMA_API) += dma.o
diff --git a/trunk/arch/arm/kernel/semaphore.c b/trunk/arch/arm/kernel/semaphore.c
new file mode 100644
index 000000000000..981fe5c6ccbe
--- /dev/null
+++ b/trunk/arch/arm/kernel/semaphore.c
@@ -0,0 +1,221 @@
+/*
+ * ARM semaphore implementation, taken from
+ *
+ * i386 semaphore implementation.
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ *
+ * Modified for ARM by Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include
+#include
+#include
+#include
+
+#include
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is
+ * protected by the semaphore spinlock.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ * - only on a boundary condition do we need to care. When we go
+ * from a negative count to a non-negative, we wake people up.
+ * - when we go from a non-negative count to a negative do we
+ * (a) synchronize with the "sleeper" count and (b) make sure
+ * that we're on the wakeup list before we synchronize so that
+ * we cannot lose wakeup events.
+ */
+
+void __up(struct semaphore *sem)
+{
+ wake_up(&sem->wait);
+}
+
+static DEFINE_SPINLOCK(semaphore_lock);
+
+void __sched __down(struct semaphore * sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ spin_lock_irq(&semaphore_lock);
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irq(&semaphore_lock);
+
+ schedule();
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ spin_lock_irq(&semaphore_lock);
+ }
+ spin_unlock_irq(&semaphore_lock);
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+ wake_up(&sem->wait);
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ tsk->state = TASK_INTERRUPTIBLE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ spin_lock_irq(&semaphore_lock);
+ sem->sleepers ++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * With signals pending, this turns into
+ * the trylock failure case - we won't be
+ * sleeping, and we* can't get the lock as
+ * it has contention. Just correct the count
+ * and exit.
+ */
+ if (signal_pending(current)) {
+ retval = -EINTR;
+ sem->sleepers = 0;
+ atomic_add(sleepers, &sem->count);
+ break;
+ }
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock. The
+ * "-1" is because we're still hoping to get
+ * the lock.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irq(&semaphore_lock);
+
+ schedule();
+ tsk->state = TASK_INTERRUPTIBLE;
+ spin_lock_irq(&semaphore_lock);
+ }
+ spin_unlock_irq(&semaphore_lock);
+ tsk->state = TASK_RUNNING;
+ remove_wait_queue(&sem->wait, &wait);
+ wake_up(&sem->wait);
+ return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for
+ * having decremented the count.
+ *
+ * We could have done the trylock with a
+ * single "cmpxchg" without failure cases,
+ * but then it wouldn't work on a 386.
+ */
+int __down_trylock(struct semaphore * sem)
+{
+ int sleepers;
+ unsigned long flags;
+
+ spin_lock_irqsave(&semaphore_lock, flags);
+ sleepers = sem->sleepers + 1;
+ sem->sleepers = 0;
+
+ /*
+ * Add "everybody else" and us into it. They aren't
+ * playing, because we own the spinlock.
+ */
+ if (!atomic_add_negative(sleepers, &sem->count))
+ wake_up(&sem->wait);
+
+ spin_unlock_irqrestore(&semaphore_lock, flags);
+ return 1;
+}
+
+/*
+ * The semaphore operations have a special calling sequence that
+ * allow us to do a simpler in-line version of them. These routines
+ * need to convert that sequence back into the C sequence when
+ * there is contention on the semaphore.
+ *
+ * ip contains the semaphore pointer on entry. Save the C-clobbered
+ * registers (r0 to r3 and lr), but not ip, as we use it as a return
+ * value in some cases..
+ * To remain AAPCS compliant (64-bit stack align) we save r4 as well.
+ */
+asm(" .section .sched.text,\"ax\",%progbits \n\
+ .align 5 \n\
+ .globl __down_failed \n\
+__down_failed: \n\
+ stmfd sp!, {r0 - r4, lr} \n\
+ mov r0, ip \n\
+ bl __down \n\
+ ldmfd sp!, {r0 - r4, pc} \n\
+ \n\
+ .align 5 \n\
+ .globl __down_interruptible_failed \n\
+__down_interruptible_failed: \n\
+ stmfd sp!, {r0 - r4, lr} \n\
+ mov r0, ip \n\
+ bl __down_interruptible \n\
+ mov ip, r0 \n\
+ ldmfd sp!, {r0 - r4, pc} \n\
+ \n\
+ .align 5 \n\
+ .globl __down_trylock_failed \n\
+__down_trylock_failed: \n\
+ stmfd sp!, {r0 - r4, lr} \n\
+ mov r0, ip \n\
+ bl __down_trylock \n\
+ mov ip, r0 \n\
+ ldmfd sp!, {r0 - r4, pc} \n\
+ \n\
+ .align 5 \n\
+ .globl __up_wakeup \n\
+__up_wakeup: \n\
+ stmfd sp!, {r0 - r4, lr} \n\
+ mov r0, ip \n\
+ bl __up \n\
+ ldmfd sp!, {r0 - r4, pc} \n\
+ ");
+
+EXPORT_SYMBOL(__down_failed);
+EXPORT_SYMBOL(__down_interruptible_failed);
+EXPORT_SYMBOL(__down_trylock_failed);
+EXPORT_SYMBOL(__up_wakeup);
diff --git a/trunk/arch/avr32/kernel/Makefile b/trunk/arch/avr32/kernel/Makefile
index 18229d0d1861..e4b6d122b033 100644
--- a/trunk/arch/avr32/kernel/Makefile
+++ b/trunk/arch/avr32/kernel/Makefile
@@ -6,7 +6,7 @@ extra-y := head.o vmlinux.lds
obj-$(CONFIG_SUBARCH_AVR32B) += entry-avr32b.o
obj-y += syscall_table.o syscall-stubs.o irq.o
-obj-y += setup.o traps.o ocd.o ptrace.o
+obj-y += setup.o traps.o semaphore.o ocd.o ptrace.o
obj-y += signal.o sys_avr32.o process.o time.o
obj-y += init_task.o switch_to.o cpu.o
obj-$(CONFIG_MODULES) += module.o avr32_ksyms.o
diff --git a/trunk/arch/avr32/kernel/semaphore.c b/trunk/arch/avr32/kernel/semaphore.c
new file mode 100644
index 000000000000..1e2705a05016
--- /dev/null
+++ b/trunk/arch/avr32/kernel/semaphore.c
@@ -0,0 +1,148 @@
+/*
+ * AVR32 sempahore implementation.
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/arch/i386/kernel/semaphore.c
+ * Copyright (C) 1999 Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include
+#include
+#include
+
+#include
+#include
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is protected
+ * by the spinlock in the semaphore's waitqueue head.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ * - only on a boundary condition do we need to care. When we go
+ * from a negative count to a non-negative, we wake people up.
+ * - when we go from a non-negative count to a negative do we
+ * (a) synchronize with the "sleeper" count and (b) make sure
+ * that we're on the wakeup list before we synchronize so that
+ * we cannot lose wakeup events.
+ */
+
+void __up(struct semaphore *sem)
+{
+ wake_up(&sem->wait);
+}
+EXPORT_SYMBOL(__up);
+
+void __sched __down(struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * the wait_queue_head.
+ */
+ if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+ tsk->state = TASK_RUNNING;
+}
+EXPORT_SYMBOL(__down);
+
+int __sched __down_interruptible(struct semaphore *sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_INTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * With signals pending, this turns into the trylock
+ * failure case - we won't be sleeping, and we can't
+ * get the lock as it has contention. Just correct the
+ * count and exit.
+ */
+ if (signal_pending(current)) {
+ retval = -EINTR;
+ sem->sleepers = 0;
+ atomic_add(sleepers, &sem->count);
+ break;
+ }
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * the wait_queue_head.
+ */
+ if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_INTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ tsk->state = TASK_RUNNING;
+ return retval;
+}
+EXPORT_SYMBOL(__down_interruptible);
diff --git a/trunk/arch/blackfin/Kconfig b/trunk/arch/blackfin/Kconfig
index 2dd1f300a5cf..589c6aca4803 100644
--- a/trunk/arch/blackfin/Kconfig
+++ b/trunk/arch/blackfin/Kconfig
@@ -31,6 +31,10 @@ config ZONE_DMA
bool
default y
+config SEMAPHORE_SLEEPERS
+ bool
+ default y
+
config GENERIC_FIND_NEXT_BIT
bool
default y
diff --git a/trunk/arch/blackfin/kernel/bfin_ksyms.c b/trunk/arch/blackfin/kernel/bfin_ksyms.c
index 053edff6c0d8..0bfbb269e350 100644
--- a/trunk/arch/blackfin/kernel/bfin_ksyms.c
+++ b/trunk/arch/blackfin/kernel/bfin_ksyms.c
@@ -42,6 +42,11 @@ EXPORT_SYMBOL(ip_fast_csum);
EXPORT_SYMBOL(kernel_thread);
+EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_trylock);
+EXPORT_SYMBOL(__down_interruptible);
+
EXPORT_SYMBOL(is_in_rom);
EXPORT_SYMBOL(bfin_return_from_exception);
diff --git a/trunk/arch/cris/kernel/Makefile b/trunk/arch/cris/kernel/Makefile
index ee7bcd4d20b2..c8e8ea570989 100644
--- a/trunk/arch/cris/kernel/Makefile
+++ b/trunk/arch/cris/kernel/Makefile
@@ -5,7 +5,8 @@
extra-y := vmlinux.lds
-obj-y := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o
+obj-y := process.o traps.o irq.o ptrace.o setup.o \
+ time.o sys_cris.o semaphore.o
obj-$(CONFIG_MODULES) += crisksyms.o
obj-$(CONFIG_MODULES) += module.o
diff --git a/trunk/arch/cris/kernel/crisksyms.c b/trunk/arch/cris/kernel/crisksyms.c
index 7ac000f6a888..62f0e752915a 100644
--- a/trunk/arch/cris/kernel/crisksyms.c
+++ b/trunk/arch/cris/kernel/crisksyms.c
@@ -9,6 +9,7 @@
#include
#include
+#include
#include
#include
#include
@@ -48,6 +49,12 @@ EXPORT_SYMBOL(__negdi2);
EXPORT_SYMBOL(__ioremap);
EXPORT_SYMBOL(iounmap);
+/* Semaphore functions */
+EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_interruptible);
+EXPORT_SYMBOL(__down_trylock);
+
/* Userspace access functions */
EXPORT_SYMBOL(__copy_user_zeroing);
EXPORT_SYMBOL(__copy_user);
diff --git a/trunk/arch/cris/kernel/semaphore.c b/trunk/arch/cris/kernel/semaphore.c
new file mode 100644
index 000000000000..f137a439041f
--- /dev/null
+++ b/trunk/arch/cris/kernel/semaphore.c
@@ -0,0 +1,129 @@
+/*
+ * Generic semaphore code. Buyer beware. Do your own
+ * specific changes in
+ */
+
+#include
+#include
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to sleep, while the "waking" variable is
+ * incremented when the "up()" code goes to wake up waiting
+ * processes.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * waking_non_zero() (from asm/semaphore.h) must execute
+ * atomically.
+ *
+ * When __up() is called, the count was negative before
+ * incrementing it, and we need to wake up somebody.
+ *
+ * This routine adds one to the count of processes that need to
+ * wake up and exit. ALL waiting processes actually wake up but
+ * only the one that gets to the "waking" field first will gate
+ * through and acquire the semaphore. The others will go back
+ * to sleep.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+void __up(struct semaphore *sem)
+{
+ wake_one_more(sem);
+ wake_up(&sem->wait);
+}
+
+/*
+ * Perform the "down" function. Return zero for semaphore acquired,
+ * return negative for signalled out of the function.
+ *
+ * If called from __down, the return is ignored and the wait loop is
+ * not interruptible. This means that a task waiting on a semaphore
+ * using "down()" cannot be killed until someone does an "up()" on
+ * the semaphore.
+ *
+ * If called from __down_interruptible, the return value gets checked
+ * upon return. If the return value is negative then the task continues
+ * with the negative value in the return register (it can be tested by
+ * the caller).
+ *
+ * Either form may be used in conjunction with "up()".
+ *
+ */
+
+#define DOWN_VAR \
+ struct task_struct *tsk = current; \
+ wait_queue_t wait; \
+ init_waitqueue_entry(&wait, tsk);
+
+#define DOWN_HEAD(task_state) \
+ \
+ \
+ tsk->state = (task_state); \
+ add_wait_queue(&sem->wait, &wait); \
+ \
+ /* \
+ * Ok, we're set up. sem->count is known to be less than zero \
+ * so we must wait. \
+ * \
+ * We can let go the lock for purposes of waiting. \
+ * We re-acquire it after awaking so as to protect \
+ * all semaphore operations. \
+ * \
+ * If "up()" is called before we call waking_non_zero() then \
+ * we will catch it right away. If it is called later then \
+ * we will have to go through a wakeup cycle to catch it. \
+ * \
+ * Multiple waiters contend for the semaphore lock to see \
+ * who gets to gate through and who has to wait some more. \
+ */ \
+ for (;;) {
+
+#define DOWN_TAIL(task_state) \
+ tsk->state = (task_state); \
+ } \
+ tsk->state = TASK_RUNNING; \
+ remove_wait_queue(&sem->wait, &wait);
+
+void __sched __down(struct semaphore * sem)
+{
+ DOWN_VAR
+ DOWN_HEAD(TASK_UNINTERRUPTIBLE)
+ if (waking_non_zero(sem))
+ break;
+ schedule();
+ DOWN_TAIL(TASK_UNINTERRUPTIBLE)
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ int ret = 0;
+ DOWN_VAR
+ DOWN_HEAD(TASK_INTERRUPTIBLE)
+
+ ret = waking_non_zero_interruptible(sem, tsk);
+ if (ret)
+ {
+ if (ret == 1)
+ /* ret != 0 only if we get interrupted -arca */
+ ret = 0;
+ break;
+ }
+ schedule();
+ DOWN_TAIL(TASK_INTERRUPTIBLE)
+ return ret;
+}
+
+int __down_trylock(struct semaphore * sem)
+{
+ return waking_non_zero_trylock(sem);
+}
diff --git a/trunk/arch/frv/kernel/Makefile b/trunk/arch/frv/kernel/Makefile
index c36f70b6699a..e8f73ed28b52 100644
--- a/trunk/arch/frv/kernel/Makefile
+++ b/trunk/arch/frv/kernel/Makefile
@@ -9,7 +9,7 @@ extra-y:= head.o init_task.o vmlinux.lds
obj-y := $(heads-y) entry.o entry-table.o break.o switch_to.o kernel_thread.o \
kernel_execve.o process.o traps.o ptrace.o signal.o dma.o \
- sys_frv.o time.o setup.o frv_ksyms.o \
+ sys_frv.o time.o semaphore.o setup.o frv_ksyms.o \
debug-stub.o irq.o sleep.o uaccess.o
obj-$(CONFIG_GDBSTUB) += gdb-stub.o gdb-io.o
diff --git a/trunk/arch/frv/kernel/frv_ksyms.c b/trunk/arch/frv/kernel/frv_ksyms.c
index 0316b3c50eff..f772704b3d28 100644
--- a/trunk/arch/frv/kernel/frv_ksyms.c
+++ b/trunk/arch/frv/kernel/frv_ksyms.c
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/frv/kernel/semaphore.c b/trunk/arch/frv/kernel/semaphore.c
new file mode 100644
index 000000000000..7ee3a147b471
--- /dev/null
+++ b/trunk/arch/frv/kernel/semaphore.c
@@ -0,0 +1,155 @@
+/* semaphore.c: FR-V semaphores
+ *
+ * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from lib/rwsem-spinlock.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include
+#include
+#include
+
+struct sem_waiter {
+ struct list_head list;
+ struct task_struct *task;
+};
+
+#ifdef CONFIG_DEBUG_SEMAPHORE
+void semtrace(struct semaphore *sem, const char *str)
+{
+ if (sem->debug)
+ printk("[%d] %s({%d,%d})\n",
+ current->pid,
+ str,
+ sem->counter,
+ list_empty(&sem->wait_list) ? 0 : 1);
+}
+#else
+#define semtrace(SEM,STR) do { } while(0)
+#endif
+
+/*
+ * wait for a token to be granted from a semaphore
+ * - entered with lock held and interrupts disabled
+ */
+void __down(struct semaphore *sem, unsigned long flags)
+{
+ struct task_struct *tsk = current;
+ struct sem_waiter waiter;
+
+ semtrace(sem, "Entering __down");
+
+ /* set up my own style of waitqueue */
+ waiter.task = tsk;
+ get_task_struct(tsk);
+
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we don't need to touch the semaphore struct anymore */
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ /* wait to be given the semaphore */
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+
+ for (;;) {
+ if (list_empty(&waiter.list))
+ break;
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+
+ tsk->state = TASK_RUNNING;
+ semtrace(sem, "Leaving __down");
+}
+
+EXPORT_SYMBOL(__down);
+
+/*
+ * interruptibly wait for a token to be granted from a semaphore
+ * - entered with lock held and interrupts disabled
+ */
+int __down_interruptible(struct semaphore *sem, unsigned long flags)
+{
+ struct task_struct *tsk = current;
+ struct sem_waiter waiter;
+ int ret;
+
+ semtrace(sem,"Entering __down_interruptible");
+
+ /* set up my own style of waitqueue */
+ waiter.task = tsk;
+ get_task_struct(tsk);
+
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we don't need to touch the semaphore struct anymore */
+ set_task_state(tsk, TASK_INTERRUPTIBLE);
+
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ /* wait to be given the semaphore */
+ ret = 0;
+ for (;;) {
+ if (list_empty(&waiter.list))
+ break;
+ if (unlikely(signal_pending(current)))
+ goto interrupted;
+ schedule();
+ set_task_state(tsk, TASK_INTERRUPTIBLE);
+ }
+
+ out:
+ tsk->state = TASK_RUNNING;
+ semtrace(sem, "Leaving __down_interruptible");
+ return ret;
+
+ interrupted:
+ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ if (!list_empty(&waiter.list)) {
+ list_del(&waiter.list);
+ ret = -EINTR;
+ }
+
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+ if (ret == -EINTR)
+ put_task_struct(current);
+ goto out;
+}
+
+EXPORT_SYMBOL(__down_interruptible);
+
+/*
+ * release a single token back to a semaphore
+ * - entered with lock held and interrupts disabled
+ */
+void __up(struct semaphore *sem)
+{
+ struct task_struct *tsk;
+ struct sem_waiter *waiter;
+
+ semtrace(sem,"Entering __up");
+
+ /* grant the token to the process at the front of the queue */
+ waiter = list_entry(sem->wait_list.next, struct sem_waiter, list);
+
+ /* We must be careful not to touch 'waiter' after we set ->task = NULL.
+ * It is allocated on the waiter's stack and may become invalid at
+ * any time after that point (due to a wakeup from another source).
+ */
+ list_del_init(&waiter->list);
+ tsk = waiter->task;
+ mb();
+ waiter->task = NULL;
+ wake_up_process(tsk);
+ put_task_struct(tsk);
+
+ semtrace(sem,"Leaving __up");
+}
+
+EXPORT_SYMBOL(__up);
diff --git a/trunk/arch/h8300/kernel/Makefile b/trunk/arch/h8300/kernel/Makefile
index 6c248c3c5c3b..874f6aefee65 100644
--- a/trunk/arch/h8300/kernel/Makefile
+++ b/trunk/arch/h8300/kernel/Makefile
@@ -5,7 +5,7 @@
extra-y := vmlinux.lds
obj-y := process.o traps.o ptrace.o irq.o \
- sys_h8300.o time.o signal.o \
+ sys_h8300.o time.o semaphore.o signal.o \
setup.o gpio.o init_task.o syscalls.o \
entry.o
diff --git a/trunk/arch/h8300/kernel/h8300_ksyms.c b/trunk/arch/h8300/kernel/h8300_ksyms.c
index 6866bd9c7fb4..d1b15267ac81 100644
--- a/trunk/arch/h8300/kernel/h8300_ksyms.c
+++ b/trunk/arch/h8300/kernel/h8300_ksyms.c
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/h8300/kernel/semaphore.c b/trunk/arch/h8300/kernel/semaphore.c
new file mode 100644
index 000000000000..d12cbbfe6ebd
--- /dev/null
+++ b/trunk/arch/h8300/kernel/semaphore.c
@@ -0,0 +1,132 @@
+/*
+ * Generic semaphore code. Buyer beware. Do your own
+ * specific changes in
+ */
+
+#include
+#include
+#include
+
+#ifndef CONFIG_RMW_INSNS
+spinlock_t semaphore_wake_lock;
+#endif
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to sleep, while the "waking" variable is
+ * incremented when the "up()" code goes to wake up waiting
+ * processes.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * waking_non_zero() (from asm/semaphore.h) must execute
+ * atomically.
+ *
+ * When __up() is called, the count was negative before
+ * incrementing it, and we need to wake up somebody.
+ *
+ * This routine adds one to the count of processes that need to
+ * wake up and exit. ALL waiting processes actually wake up but
+ * only the one that gets to the "waking" field first will gate
+ * through and acquire the semaphore. The others will go back
+ * to sleep.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+void __up(struct semaphore *sem)
+{
+ wake_one_more(sem);
+ wake_up(&sem->wait);
+}
+
+/*
+ * Perform the "down" function. Return zero for semaphore acquired,
+ * return negative for signalled out of the function.
+ *
+ * If called from __down, the return is ignored and the wait loop is
+ * not interruptible. This means that a task waiting on a semaphore
+ * using "down()" cannot be killed until someone does an "up()" on
+ * the semaphore.
+ *
+ * If called from __down_interruptible, the return value gets checked
+ * upon return. If the return value is negative then the task continues
+ * with the negative value in the return register (it can be tested by
+ * the caller).
+ *
+ * Either form may be used in conjunction with "up()".
+ *
+ */
+
+
+#define DOWN_HEAD(task_state) \
+ \
+ \
+ current->state = (task_state); \
+ add_wait_queue(&sem->wait, &wait); \
+ \
+ /* \
+ * Ok, we're set up. sem->count is known to be less than zero \
+ * so we must wait. \
+ * \
+ * We can let go the lock for purposes of waiting. \
+ * We re-acquire it after awaking so as to protect \
+ * all semaphore operations. \
+ * \
+ * If "up()" is called before we call waking_non_zero() then \
+ * we will catch it right away. If it is called later then \
+ * we will have to go through a wakeup cycle to catch it. \
+ * \
+ * Multiple waiters contend for the semaphore lock to see \
+ * who gets to gate through and who has to wait some more. \
+ */ \
+ for (;;) {
+
+#define DOWN_TAIL(task_state) \
+ current->state = (task_state); \
+ } \
+ current->state = TASK_RUNNING; \
+ remove_wait_queue(&sem->wait, &wait);
+
+void __sched __down(struct semaphore * sem)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ DOWN_HEAD(TASK_UNINTERRUPTIBLE)
+ if (waking_non_zero(sem))
+ break;
+ schedule();
+ DOWN_TAIL(TASK_UNINTERRUPTIBLE)
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ int ret = 0;
+
+ DOWN_HEAD(TASK_INTERRUPTIBLE)
+
+ ret = waking_non_zero_interruptible(sem, current);
+ if (ret)
+ {
+ if (ret == 1)
+ /* ret != 0 only if we get interrupted -arca */
+ ret = 0;
+ break;
+ }
+ schedule();
+ DOWN_TAIL(TASK_INTERRUPTIBLE)
+ return ret;
+}
+
+int __down_trylock(struct semaphore * sem)
+{
+ return waking_non_zero_trylock(sem);
+}
diff --git a/trunk/arch/ia64/Kconfig b/trunk/arch/ia64/Kconfig
index ed21737a00c5..8fa3faf5ef1b 100644
--- a/trunk/arch/ia64/Kconfig
+++ b/trunk/arch/ia64/Kconfig
@@ -283,17 +283,6 @@ config FORCE_MAX_ZONEORDER
default "17" if HUGETLB_PAGE
default "11"
-config VIRT_CPU_ACCOUNTING
- bool "Deterministic task and CPU time accounting"
- default n
- help
- Select this option to enable more accurate task and CPU time
- accounting. This is done by reading a CPU counter on each
- kernel entry and exit and on transitions within the kernel
- between system, softirq and hardirq state, so there is a
- small performance impact.
- If in doubt, say N here.
-
config SMP
bool "Symmetric multi-processing support"
help
@@ -622,9 +611,6 @@ config IRQ_PER_CPU
bool
default y
-config IOMMU_HELPER
- def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC)
-
source "arch/ia64/hp/sim/Kconfig"
source "arch/ia64/Kconfig.debug"
diff --git a/trunk/arch/ia64/hp/common/sba_iommu.c b/trunk/arch/ia64/hp/common/sba_iommu.c
index 9409de5c9441..523eae6d3e49 100644
--- a/trunk/arch/ia64/hp/common/sba_iommu.c
+++ b/trunk/arch/ia64/hp/common/sba_iommu.c
@@ -35,7 +35,6 @@
#include
#include /* hweight64() */
#include
-#include
#include /* ia64_get_itc() */
#include
@@ -461,13 +460,6 @@ get_iovp_order (unsigned long size)
return order;
}
-static unsigned long ptr_to_pide(struct ioc *ioc, unsigned long *res_ptr,
- unsigned int bitshiftcnt)
-{
- return (((unsigned long)res_ptr - (unsigned long)ioc->res_map) << 3)
- + bitshiftcnt;
-}
-
/**
* sba_search_bitmap - find free space in IO PDIR resource bitmap
* @ioc: IO MMU structure which owns the pdir we are interested in.
@@ -479,25 +471,15 @@ static unsigned long ptr_to_pide(struct ioc *ioc, unsigned long *res_ptr,
* Cool perf optimization: search for log2(size) bits at a time.
*/
static SBA_INLINE unsigned long
-sba_search_bitmap(struct ioc *ioc, struct device *dev,
- unsigned long bits_wanted, int use_hint)
+sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
{
unsigned long *res_ptr;
unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);
- unsigned long flags, pide = ~0UL, tpide;
- unsigned long boundary_size;
- unsigned long shift;
- int ret;
+ unsigned long flags, pide = ~0UL;
ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);
ASSERT(res_ptr < res_end);
- boundary_size = (unsigned long long)dma_get_seg_boundary(dev) + 1;
- boundary_size = ALIGN(boundary_size, 1ULL << iovp_shift) >> iovp_shift;
-
- BUG_ON(ioc->ibase & ~iovp_mask);
- shift = ioc->ibase >> iovp_shift;
-
spin_lock_irqsave(&ioc->res_lock, flags);
/* Allow caller to force a search through the entire resource space */
@@ -522,7 +504,9 @@ sba_search_bitmap(struct ioc *ioc, struct device *dev,
if (likely(*res_ptr != ~0UL)) {
bitshiftcnt = ffz(*res_ptr);
*res_ptr |= (1UL << bitshiftcnt);
- pide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
+ pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
+ pide <<= 3; /* convert to bit address */
+ pide += bitshiftcnt;
ioc->res_bitshift = bitshiftcnt + bits_wanted;
goto found_it;
}
@@ -551,13 +535,11 @@ sba_search_bitmap(struct ioc *ioc, struct device *dev,
DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr);
ASSERT(0 != mask);
for (; mask ; mask <<= o, bitshiftcnt += o) {
- tpide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
- ret = iommu_is_span_boundary(tpide, bits_wanted,
- shift,
- boundary_size);
- if ((0 == ((*res_ptr) & mask)) && !ret) {
+ if(0 == ((*res_ptr) & mask)) {
*res_ptr |= mask; /* mark resources busy! */
- pide = tpide;
+ pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
+ pide <<= 3; /* convert to bit address */
+ pide += bitshiftcnt;
ioc->res_bitshift = bitshiftcnt + bits_wanted;
goto found_it;
}
@@ -578,11 +560,6 @@ sba_search_bitmap(struct ioc *ioc, struct device *dev,
end = res_end - qwords;
for (; res_ptr < end; res_ptr++) {
- tpide = ptr_to_pide(ioc, res_ptr, 0);
- ret = iommu_is_span_boundary(tpide, bits_wanted,
- shift, boundary_size);
- if (ret)
- goto next_ptr;
for (i = 0 ; i < qwords ; i++) {
if (res_ptr[i] != 0)
goto next_ptr;
@@ -595,7 +572,8 @@ sba_search_bitmap(struct ioc *ioc, struct device *dev,
res_ptr[i] = ~0UL;
res_ptr[i] |= RESMAP_MASK(bits);
- pide = tpide;
+ pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
+ pide <<= 3; /* convert to bit address */
res_ptr += qwords;
ioc->res_bitshift = bits;
goto found_it;
@@ -627,7 +605,7 @@ sba_search_bitmap(struct ioc *ioc, struct device *dev,
* resource bit map.
*/
static int
-sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
+sba_alloc_range(struct ioc *ioc, size_t size)
{
unsigned int pages_needed = size >> iovp_shift;
#ifdef PDIR_SEARCH_TIMING
@@ -644,9 +622,9 @@ sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
/*
** "seek and ye shall find"...praying never hurts either...
*/
- pide = sba_search_bitmap(ioc, dev, pages_needed, 1);
+ pide = sba_search_bitmap(ioc, pages_needed, 1);
if (unlikely(pide >= (ioc->res_size << 3))) {
- pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
+ pide = sba_search_bitmap(ioc, pages_needed, 0);
if (unlikely(pide >= (ioc->res_size << 3))) {
#if DELAYED_RESOURCE_CNT > 0
unsigned long flags;
@@ -675,7 +653,7 @@ sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
}
spin_unlock_irqrestore(&ioc->saved_lock, flags);
- pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
+ pide = sba_search_bitmap(ioc, pages_needed, 0);
if (unlikely(pide >= (ioc->res_size << 3)))
panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
ioc->ioc_hpa);
@@ -958,7 +936,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir)
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
- pide = sba_alloc_range(ioc, dev, size);
+ pide = sba_alloc_range(ioc, size);
iovp = (dma_addr_t) pide << iovp_shift;
@@ -1395,7 +1373,7 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask;
ASSERT(dma_len <= DMA_CHUNK_SIZE);
dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG
- | (sba_alloc_range(ioc, dev, dma_len) << iovp_shift)
+ | (sba_alloc_range(ioc, dma_len) << iovp_shift)
| dma_offset);
n_mappings++;
}
diff --git a/trunk/arch/ia64/ia32/elfcore32.h b/trunk/arch/ia64/ia32/elfcore32.h
index 9a3abf58cea3..446c9aac924d 100644
--- a/trunk/arch/ia64/ia32/elfcore32.h
+++ b/trunk/arch/ia64/ia32/elfcore32.h
@@ -30,19 +30,7 @@ struct elf_siginfo
int si_errno; /* errno */
};
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-/*
- * Hacks are here since types between compat_timeval (= pair of s32) and
- * ia64-native timeval (= pair of s64) are not compatible, at least a file
- * arch/ia64/ia32/../../../fs/binfmt_elf.c will get warnings from compiler on
- * use of cputime_to_timeval(), which usually an alias of jiffies_to_timeval().
- */
-#define cputime_to_timeval(a,b) \
- do { (b)->tv_usec = 0; (b)->tv_sec = (a)/NSEC_PER_SEC; } while(0)
-#else
-#define jiffies_to_timeval(a,b) \
- do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; } while(0)
-#endif
+#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
struct elf_prstatus
{
diff --git a/trunk/arch/ia64/ia32/sys_ia32.c b/trunk/arch/ia64/ia32/sys_ia32.c
index 7e028ceb93ba..b1bf51fe97b4 100644
--- a/trunk/arch/ia64/ia32/sys_ia32.c
+++ b/trunk/arch/ia64/ia32/sys_ia32.c
@@ -38,7 +38,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -2388,45 +2387,16 @@ get_free_idx (void)
return -ESRCH;
}
-static void set_tls_desc(struct task_struct *p, int idx,
- const struct ia32_user_desc *info, int n)
-{
- struct thread_struct *t = &p->thread;
- struct desc_struct *desc = &t->tls_array[idx - GDT_ENTRY_TLS_MIN];
- int cpu;
-
- /*
- * We must not get preempted while modifying the TLS.
- */
- cpu = get_cpu();
-
- while (n-- > 0) {
- if (LDT_empty(info)) {
- desc->a = 0;
- desc->b = 0;
- } else {
- desc->a = LDT_entry_a(info);
- desc->b = LDT_entry_b(info);
- }
-
- ++info;
- ++desc;
- }
-
- if (t == ¤t->thread)
- load_TLS(t, cpu);
-
- put_cpu();
-}
-
/*
* Set a given TLS descriptor:
*/
asmlinkage int
sys32_set_thread_area (struct ia32_user_desc __user *u_info)
{
+ struct thread_struct *t = ¤t->thread;
struct ia32_user_desc info;
- int idx;
+ struct desc_struct *desc;
+ int cpu, idx;
if (copy_from_user(&info, u_info, sizeof(info)))
return -EFAULT;
@@ -2446,7 +2416,18 @@ sys32_set_thread_area (struct ia32_user_desc __user *u_info)
if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;
- set_tls_desc(current, idx, &info, 1);
+ desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+ cpu = smp_processor_id();
+
+ if (LDT_empty(&info)) {
+ desc->a = 0;
+ desc->b = 0;
+ } else {
+ desc->a = LDT_entry_a(&info);
+ desc->b = LDT_entry_b(&info);
+ }
+ load_TLS(t, cpu);
return 0;
}
@@ -2470,20 +2451,6 @@ sys32_set_thread_area (struct ia32_user_desc __user *u_info)
#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
-static void fill_user_desc(struct ia32_user_desc *info, int idx,
- const struct desc_struct *desc)
-{
- info->entry_number = idx;
- info->base_addr = GET_BASE(desc);
- info->limit = GET_LIMIT(desc);
- info->seg_32bit = GET_32BIT(desc);
- info->contents = GET_CONTENTS(desc);
- info->read_exec_only = !GET_WRITABLE(desc);
- info->limit_in_pages = GET_LIMIT_PAGES(desc);
- info->seg_not_present = !GET_PRESENT(desc);
- info->useable = GET_USEABLE(desc);
-}
-
asmlinkage int
sys32_get_thread_area (struct ia32_user_desc __user *u_info)
{
@@ -2497,588 +2464,22 @@ sys32_get_thread_area (struct ia32_user_desc __user *u_info)
return -EINVAL;
desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
- fill_user_desc(&info, idx, desc);
-
- if (copy_to_user(u_info, &info, sizeof(info)))
- return -EFAULT;
- return 0;
-}
-
-struct regset_get {
- void *kbuf;
- void __user *ubuf;
-};
-
-struct regset_set {
- const void *kbuf;
- const void __user *ubuf;
-};
-
-struct regset_getset {
- struct task_struct *target;
- const struct user_regset *regset;
- union {
- struct regset_get get;
- struct regset_set set;
- } u;
- unsigned int pos;
- unsigned int count;
- int ret;
-};
-
-static void getfpreg(struct task_struct *task, int regno, int *val)
-{
- switch (regno / sizeof(int)) {
- case 0:
- *val = task->thread.fcr & 0xffff;
- break;
- case 1:
- *val = task->thread.fsr & 0xffff;
- break;
- case 2:
- *val = (task->thread.fsr>>16) & 0xffff;
- break;
- case 3:
- *val = task->thread.fir;
- break;
- case 4:
- *val = (task->thread.fir>>32) & 0xffff;
- break;
- case 5:
- *val = task->thread.fdr;
- break;
- case 6:
- *val = (task->thread.fdr >> 32) & 0xffff;
- break;
- }
-}
-
-static void setfpreg(struct task_struct *task, int regno, int val)
-{
- switch (regno / sizeof(int)) {
- case 0:
- task->thread.fcr = (task->thread.fcr & (~0x1f3f))
- | (val & 0x1f3f);
- break;
- case 1:
- task->thread.fsr = (task->thread.fsr & (~0xffff)) | val;
- break;
- case 2:
- task->thread.fsr = (task->thread.fsr & (~0xffff0000))
- | (val << 16);
- break;
- case 3:
- task->thread.fir = (task->thread.fir & (~0xffffffff)) | val;
- break;
- case 5:
- task->thread.fdr = (task->thread.fdr & (~0xffffffff)) | val;
- break;
- }
-}
-
-static void access_fpreg_ia32(int regno, void *reg,
- struct pt_regs *pt, struct switch_stack *sw,
- int tos, int write)
-{
- void *f;
-
- if ((regno += tos) >= 8)
- regno -= 8;
- if (regno < 4)
- f = &pt->f8 + regno;
- else if (regno <= 7)
- f = &sw->f12 + (regno - 4);
- else {
- printk(KERN_ERR "regno must be less than 7 \n");
- return;
- }
-
- if (write)
- memcpy(f, reg, sizeof(struct _fpreg_ia32));
- else
- memcpy(reg, f, sizeof(struct _fpreg_ia32));
-}
-
-static void do_fpregs_get(struct unw_frame_info *info, void *arg)
-{
- struct regset_getset *dst = arg;
- struct task_struct *task = dst->target;
- struct pt_regs *pt;
- int start, end, tos;
- char buf[80];
-
- if (dst->count == 0 || unw_unwind_to_user(info) < 0)
- return;
- if (dst->pos < 7 * sizeof(int)) {
- end = min((dst->pos + dst->count),
- (unsigned int)(7 * sizeof(int)));
- for (start = dst->pos; start < end; start += sizeof(int))
- getfpreg(task, start, (int *)(buf + start));
- dst->ret = user_regset_copyout(&dst->pos, &dst->count,
- &dst->u.get.kbuf, &dst->u.get.ubuf, buf,
- 0, 7 * sizeof(int));
- if (dst->ret || dst->count == 0)
- return;
- }
- if (dst->pos < sizeof(struct ia32_user_i387_struct)) {
- pt = task_pt_regs(task);
- tos = (task->thread.fsr >> 11) & 7;
- end = min(dst->pos + dst->count,
- (unsigned int)(sizeof(struct ia32_user_i387_struct)));
- start = (dst->pos - 7 * sizeof(int)) /
- sizeof(struct _fpreg_ia32);
- end = (end - 7 * sizeof(int)) / sizeof(struct _fpreg_ia32);
- for (; start < end; start++)
- access_fpreg_ia32(start,
- (struct _fpreg_ia32 *)buf + start,
- pt, info->sw, tos, 0);
- dst->ret = user_regset_copyout(&dst->pos, &dst->count,
- &dst->u.get.kbuf, &dst->u.get.ubuf,
- buf, 7 * sizeof(int),
- sizeof(struct ia32_user_i387_struct));
- if (dst->ret || dst->count == 0)
- return;
- }
-}
-
-static void do_fpregs_set(struct unw_frame_info *info, void *arg)
-{
- struct regset_getset *dst = arg;
- struct task_struct *task = dst->target;
- struct pt_regs *pt;
- char buf[80];
- int end, start, tos;
-
- if (dst->count == 0 || unw_unwind_to_user(info) < 0)
- return;
-
- if (dst->pos < 7 * sizeof(int)) {
- start = dst->pos;
- dst->ret = user_regset_copyin(&dst->pos, &dst->count,
- &dst->u.set.kbuf, &dst->u.set.ubuf, buf,
- 0, 7 * sizeof(int));
- if (dst->ret)
- return;
- for (; start < dst->pos; start += sizeof(int))
- setfpreg(task, start, *((int *)(buf + start)));
- if (dst->count == 0)
- return;
- }
- if (dst->pos < sizeof(struct ia32_user_i387_struct)) {
- start = (dst->pos - 7 * sizeof(int)) /
- sizeof(struct _fpreg_ia32);
- dst->ret = user_regset_copyin(&dst->pos, &dst->count,
- &dst->u.set.kbuf, &dst->u.set.ubuf,
- buf, 7 * sizeof(int),
- sizeof(struct ia32_user_i387_struct));
- if (dst->ret)
- return;
- pt = task_pt_regs(task);
- tos = (task->thread.fsr >> 11) & 7;
- end = (dst->pos - 7 * sizeof(int)) / sizeof(struct _fpreg_ia32);
- for (; start < end; start++)
- access_fpreg_ia32(start,
- (struct _fpreg_ia32 *)buf + start,
- pt, info->sw, tos, 1);
- if (dst->count == 0)
- return;
- }
-}
-
-#define OFFSET(member) ((int)(offsetof(struct ia32_user_fxsr_struct, member)))
-static void getfpxreg(struct task_struct *task, int start, int end, char *buf)
-{
- int min_val;
-
- min_val = min(end, OFFSET(fop));
- while (start < min_val) {
- if (start == OFFSET(cwd))
- *((short *)buf) = task->thread.fcr & 0xffff;
- else if (start == OFFSET(swd))
- *((short *)buf) = task->thread.fsr & 0xffff;
- else if (start == OFFSET(twd))
- *((short *)buf) = (task->thread.fsr>>16) & 0xffff;
- buf += 2;
- start += 2;
- }
- /* skip fop element */
- if (start == OFFSET(fop)) {
- start += 2;
- buf += 2;
- }
- while (start < end) {
- if (start == OFFSET(fip))
- *((int *)buf) = task->thread.fir;
- else if (start == OFFSET(fcs))
- *((int *)buf) = (task->thread.fir>>32) & 0xffff;
- else if (start == OFFSET(foo))
- *((int *)buf) = task->thread.fdr;
- else if (start == OFFSET(fos))
- *((int *)buf) = (task->thread.fdr>>32) & 0xffff;
- else if (start == OFFSET(mxcsr))
- *((int *)buf) = ((task->thread.fcr>>32) & 0xff80)
- | ((task->thread.fsr>>32) & 0x3f);
- buf += 4;
- start += 4;
- }
-}
-
-static void setfpxreg(struct task_struct *task, int start, int end, char *buf)
-{
- int min_val, num32;
- short num;
- unsigned long num64;
-
- min_val = min(end, OFFSET(fop));
- while (start < min_val) {
- num = *((short *)buf);
- if (start == OFFSET(cwd)) {
- task->thread.fcr = (task->thread.fcr & (~0x1f3f))
- | (num & 0x1f3f);
- } else if (start == OFFSET(swd)) {
- task->thread.fsr = (task->thread.fsr & (~0xffff)) | num;
- } else if (start == OFFSET(twd)) {
- task->thread.fsr = (task->thread.fsr & (~0xffff0000))
- | (((int)num) << 16);
- }
- buf += 2;
- start += 2;
- }
- /* skip fop element */
- if (start == OFFSET(fop)) {
- start += 2;
- buf += 2;
- }
- while (start < end) {
- num32 = *((int *)buf);
- if (start == OFFSET(fip))
- task->thread.fir = (task->thread.fir & (~0xffffffff))
- | num32;
- else if (start == OFFSET(foo))
- task->thread.fdr = (task->thread.fdr & (~0xffffffff))
- | num32;
- else if (start == OFFSET(mxcsr)) {
- num64 = num32 & 0xff10;
- task->thread.fcr = (task->thread.fcr &
- (~0xff1000000000UL)) | (num64<<32);
- num64 = num32 & 0x3f;
- task->thread.fsr = (task->thread.fsr &
- (~0x3f00000000UL)) | (num64<<32);
- }
- buf += 4;
- start += 4;
- }
-}
-
-static void do_fpxregs_get(struct unw_frame_info *info, void *arg)
-{
- struct regset_getset *dst = arg;
- struct task_struct *task = dst->target;
- struct pt_regs *pt;
- char buf[128];
- int start, end, tos;
-
- if (dst->count == 0 || unw_unwind_to_user(info) < 0)
- return;
- if (dst->pos < OFFSET(st_space[0])) {
- end = min(dst->pos + dst->count, (unsigned int)32);
- getfpxreg(task, dst->pos, end, buf);
- dst->ret = user_regset_copyout(&dst->pos, &dst->count,
- &dst->u.get.kbuf, &dst->u.get.ubuf, buf,
- 0, OFFSET(st_space[0]));
- if (dst->ret || dst->count == 0)
- return;
- }
- if (dst->pos < OFFSET(xmm_space[0])) {
- pt = task_pt_regs(task);
- tos = (task->thread.fsr >> 11) & 7;
- end = min(dst->pos + dst->count,
- (unsigned int)OFFSET(xmm_space[0]));
- start = (dst->pos - OFFSET(st_space[0])) / 16;
- end = (end - OFFSET(st_space[0])) / 16;
- for (; start < end; start++)
- access_fpreg_ia32(start, buf + 16 * start, pt,
- info->sw, tos, 0);
- dst->ret = user_regset_copyout(&dst->pos, &dst->count,
- &dst->u.get.kbuf, &dst->u.get.ubuf,
- buf, OFFSET(st_space[0]), OFFSET(xmm_space[0]));
- if (dst->ret || dst->count == 0)
- return;
- }
- if (dst->pos < OFFSET(padding[0]))
- dst->ret = user_regset_copyout(&dst->pos, &dst->count,
- &dst->u.get.kbuf, &dst->u.get.ubuf,
- &info->sw->f16, OFFSET(xmm_space[0]),
- OFFSET(padding[0]));
-}
-
-static void do_fpxregs_set(struct unw_frame_info *info, void *arg)
-{
- struct regset_getset *dst = arg;
- struct task_struct *task = dst->target;
- char buf[128];
- int start, end;
-
- if (dst->count == 0 || unw_unwind_to_user(info) < 0)
- return;
-
- if (dst->pos < OFFSET(st_space[0])) {
- start = dst->pos;
- dst->ret = user_regset_copyin(&dst->pos, &dst->count,
- &dst->u.set.kbuf, &dst->u.set.ubuf,
- buf, 0, OFFSET(st_space[0]));
- if (dst->ret)
- return;
- setfpxreg(task, start, dst->pos, buf);
- if (dst->count == 0)
- return;
- }
- if (dst->pos < OFFSET(xmm_space[0])) {
- struct pt_regs *pt;
- int tos;
- pt = task_pt_regs(task);
- tos = (task->thread.fsr >> 11) & 7;
- start = (dst->pos - OFFSET(st_space[0])) / 16;
- dst->ret = user_regset_copyin(&dst->pos, &dst->count,
- &dst->u.set.kbuf, &dst->u.set.ubuf,
- buf, OFFSET(st_space[0]), OFFSET(xmm_space[0]));
- if (dst->ret)
- return;
- end = (dst->pos - OFFSET(st_space[0])) / 16;
- for (; start < end; start++)
- access_fpreg_ia32(start, buf + 16 * start, pt, info->sw,
- tos, 1);
- if (dst->count == 0)
- return;
- }
- if (dst->pos < OFFSET(padding[0]))
- dst->ret = user_regset_copyin(&dst->pos, &dst->count,
- &dst->u.set.kbuf, &dst->u.set.ubuf,
- &info->sw->f16, OFFSET(xmm_space[0]),
- OFFSET(padding[0]));
-}
-#undef OFFSET
-
-static int do_regset_call(void (*call)(struct unw_frame_info *, void *),
- struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- struct regset_getset info = { .target = target, .regset = regset,
- .pos = pos, .count = count,
- .u.set = { .kbuf = kbuf, .ubuf = ubuf },
- .ret = 0 };
-
- if (target == current)
- unw_init_running(call, &info);
- else {
- struct unw_frame_info ufi;
- memset(&ufi, 0, sizeof(ufi));
- unw_init_from_blocked_task(&ufi, target);
- (*call)(&ufi, &info);
- }
-
- return info.ret;
-}
-
-static int ia32_fpregs_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return do_regset_call(do_fpregs_get, target, regset, pos, count,
- kbuf, ubuf);
-}
-
-static int ia32_fpregs_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return do_regset_call(do_fpregs_set, target, regset, pos, count,
- kbuf, ubuf);
-}
-
-static int ia32_fpxregs_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return do_regset_call(do_fpxregs_get, target, regset, pos, count,
- kbuf, ubuf);
-}
-
-static int ia32_fpxregs_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return do_regset_call(do_fpxregs_set, target, regset, pos, count,
- kbuf, ubuf);
-}
-
-static int ia32_genregs_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- if (kbuf) {
- u32 *kp = kbuf;
- while (count > 0) {
- *kp++ = getreg(target, pos);
- pos += 4;
- count -= 4;
- }
- } else {
- u32 __user *up = ubuf;
- while (count > 0) {
- if (__put_user(getreg(target, pos), up++))
- return -EFAULT;
- pos += 4;
- count -= 4;
- }
- }
- return 0;
-}
-
-static int ia32_genregs_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret = 0;
- if (kbuf) {
- const u32 *kp = kbuf;
- while (!ret && count > 0) {
- putreg(target, pos, *kp++);
- pos += 4;
- count -= 4;
- }
- } else {
- const u32 __user *up = ubuf;
- u32 val;
- while (!ret && count > 0) {
- ret = __get_user(val, up++);
- if (!ret)
- putreg(target, pos, val);
- pos += 4;
- count -= 4;
- }
- }
- return ret;
-}
+ info.entry_number = idx;
+ info.base_addr = GET_BASE(desc);
+ info.limit = GET_LIMIT(desc);
+ info.seg_32bit = GET_32BIT(desc);
+ info.contents = GET_CONTENTS(desc);
+ info.read_exec_only = !GET_WRITABLE(desc);
+ info.limit_in_pages = GET_LIMIT_PAGES(desc);
+ info.seg_not_present = !GET_PRESENT(desc);
+ info.useable = GET_USEABLE(desc);
-static int ia32_tls_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- struct thread_struct *t = &target->thread;
- int n = GDT_ENTRY_TLS_ENTRIES;
- while (n > 0 && desc_empty(&t->tls_array[n -1]))
- --n;
- return n;
-}
-
-static int ia32_tls_get(struct task_struct *target,
- const struct user_regset *regset, unsigned int pos,
- unsigned int count, void *kbuf, void __user *ubuf)
-{
- const struct desc_struct *tls;
-
- if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct ia32_user_desc) ||
- (pos % sizeof(struct ia32_user_desc)) != 0 ||
- (count % sizeof(struct ia32_user_desc)) != 0)
- return -EINVAL;
-
- pos /= sizeof(struct ia32_user_desc);
- count /= sizeof(struct ia32_user_desc);
-
- tls = &target->thread.tls_array[pos];
-
- if (kbuf) {
- struct ia32_user_desc *info = kbuf;
- while (count-- > 0)
- fill_user_desc(info++, GDT_ENTRY_TLS_MIN + pos++,
- tls++);
- } else {
- struct ia32_user_desc __user *u_info = ubuf;
- while (count-- > 0) {
- struct ia32_user_desc info;
- fill_user_desc(&info, GDT_ENTRY_TLS_MIN + pos++, tls++);
- if (__copy_to_user(u_info++, &info, sizeof(info)))
- return -EFAULT;
- }
- }
-
- return 0;
-}
-
-static int ia32_tls_set(struct task_struct *target,
- const struct user_regset *regset, unsigned int pos,
- unsigned int count, const void *kbuf, const void __user *ubuf)
-{
- struct ia32_user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
- const struct ia32_user_desc *info;
-
- if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct ia32_user_desc) ||
- (pos % sizeof(struct ia32_user_desc)) != 0 ||
- (count % sizeof(struct ia32_user_desc)) != 0)
- return -EINVAL;
-
- if (kbuf)
- info = kbuf;
- else if (__copy_from_user(infobuf, ubuf, count))
+ if (copy_to_user(u_info, &info, sizeof(info)))
return -EFAULT;
- else
- info = infobuf;
-
- set_tls_desc(target,
- GDT_ENTRY_TLS_MIN + (pos / sizeof(struct ia32_user_desc)),
- info, count / sizeof(struct ia32_user_desc));
-
return 0;
}
-/*
- * This should match arch/i386/kernel/ptrace.c:native_regsets.
- * XXX ioperm? vm86?
- */
-static const struct user_regset ia32_regsets[] = {
- {
- .core_note_type = NT_PRSTATUS,
- .n = sizeof(struct user_regs_struct32)/4,
- .size = 4, .align = 4,
- .get = ia32_genregs_get, .set = ia32_genregs_set
- },
- {
- .core_note_type = NT_PRFPREG,
- .n = sizeof(struct ia32_user_i387_struct) / 4,
- .size = 4, .align = 4,
- .get = ia32_fpregs_get, .set = ia32_fpregs_set
- },
- {
- .core_note_type = NT_PRXFPREG,
- .n = sizeof(struct ia32_user_fxsr_struct) / 4,
- .size = 4, .align = 4,
- .get = ia32_fpxregs_get, .set = ia32_fpxregs_set
- },
- {
- .core_note_type = NT_386_TLS,
- .n = GDT_ENTRY_TLS_ENTRIES,
- .bias = GDT_ENTRY_TLS_MIN,
- .size = sizeof(struct ia32_user_desc),
- .align = sizeof(struct ia32_user_desc),
- .active = ia32_tls_active,
- .get = ia32_tls_get, .set = ia32_tls_set,
- },
-};
-
-const struct user_regset_view user_ia32_view = {
- .name = "i386", .e_machine = EM_386,
- .regsets = ia32_regsets, .n = ARRAY_SIZE(ia32_regsets)
-};
-
long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
__u32 len_low, __u32 len_high, int advice)
{
diff --git a/trunk/arch/ia64/kernel/Makefile b/trunk/arch/ia64/kernel/Makefile
index 13fd10e8699e..33e5a598672d 100644
--- a/trunk/arch/ia64/kernel/Makefile
+++ b/trunk/arch/ia64/kernel/Makefile
@@ -6,7 +6,7 @@ extra-y := head.o init_task.o vmlinux.lds
obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \
- salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
+ salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
unwind.o mca.o mca_asm.o topology.o
obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o
diff --git a/trunk/arch/ia64/kernel/acpi.c b/trunk/arch/ia64/kernel/acpi.c
index c7467f863c7a..78f28d825f30 100644
--- a/trunk/arch/ia64/kernel/acpi.c
+++ b/trunk/arch/ia64/kernel/acpi.c
@@ -423,7 +423,6 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag))
#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag))
static struct acpi_table_slit __initdata *slit_table;
-cpumask_t early_cpu_possible_map = CPU_MASK_NONE;
static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
{
@@ -483,7 +482,6 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
(pa->apic_id << 8) | (pa->local_sapic_eid);
/* nid should be overridden as logical node id later */
node_cpuid[srat_num_cpus].nid = pxm;
- cpu_set(srat_num_cpus, early_cpu_possible_map);
srat_num_cpus++;
}
@@ -561,7 +559,7 @@ void __init acpi_numa_arch_fixup(void)
}
/* set logical node id in cpu structure */
- for_each_possible_early_cpu(i)
+ for (i = 0; i < srat_num_cpus; i++)
node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid);
printk(KERN_INFO "Number of logical nodes in system = %d\n",
diff --git a/trunk/arch/ia64/kernel/asm-offsets.c b/trunk/arch/ia64/kernel/asm-offsets.c
index 230a6f92367f..0aebc6f79e95 100644
--- a/trunk/arch/ia64/kernel/asm-offsets.c
+++ b/trunk/arch/ia64/kernel/asm-offsets.c
@@ -7,7 +7,6 @@
#define ASM_OFFSETS_C 1
#include
-#include
#include
#include
@@ -35,29 +34,17 @@ void foo(void)
DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
- BUILD_BUG_ON(sizeof(struct upid) != 32);
- DEFINE(IA64_UPID_SHIFT, 5);
-
BLANK();
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
- DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
- DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
- DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime));
-#endif
BLANK();
DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
- DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
- DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
- DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
diff --git a/trunk/arch/ia64/kernel/crash.c b/trunk/arch/ia64/kernel/crash.c
index 90ef338cf46f..fbe742ad2fde 100644
--- a/trunk/arch/ia64/kernel/crash.c
+++ b/trunk/arch/ia64/kernel/crash.c
@@ -24,7 +24,6 @@ int kdump_status[NR_CPUS];
static atomic_t kdump_cpu_frozen;
atomic_t kdump_in_progress;
static int kdump_on_init = 1;
-static int kdump_on_fatal_mca = 1;
static inline Elf64_Word
*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
@@ -119,7 +118,6 @@ machine_crash_shutdown(struct pt_regs *pt)
static void
machine_kdump_on_init(void)
{
- crash_save_vmcoreinfo();
local_irq_disable();
kexec_disable_iosapic();
machine_kexec(ia64_kimage);
@@ -150,7 +148,7 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
struct ia64_mca_notify_die *nd;
struct die_args *args = data;
- if (!kdump_on_init && !kdump_on_fatal_mca)
+ if (!kdump_on_init)
return NOTIFY_DONE;
if (!ia64_kimage) {
@@ -175,38 +173,32 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
return NOTIFY_DONE;
switch (val) {
- case DIE_INIT_MONARCH_PROCESS:
- if (kdump_on_init) {
+ case DIE_INIT_MONARCH_PROCESS:
atomic_set(&kdump_in_progress, 1);
*(nd->monarch_cpu) = -1;
- }
- break;
- case DIE_INIT_MONARCH_LEAVE:
- if (kdump_on_init)
+ break;
+ case DIE_INIT_MONARCH_LEAVE:
machine_kdump_on_init();
- break;
- case DIE_INIT_SLAVE_LEAVE:
- if (atomic_read(&kdump_in_progress))
- unw_init_running(kdump_cpu_freeze, NULL);
- break;
- case DIE_MCA_RENDZVOUS_LEAVE:
- if (atomic_read(&kdump_in_progress))
- unw_init_running(kdump_cpu_freeze, NULL);
- break;
- case DIE_MCA_MONARCH_LEAVE:
- /* die_register->signr indicate if MCA is recoverable */
- if (kdump_on_fatal_mca && !args->signr) {
- atomic_set(&kdump_in_progress, 1);
- *(nd->monarch_cpu) = -1;
- machine_kdump_on_init();
- }
- break;
+ break;
+ case DIE_INIT_SLAVE_LEAVE:
+ if (atomic_read(&kdump_in_progress))
+ unw_init_running(kdump_cpu_freeze, NULL);
+ break;
+ case DIE_MCA_RENDZVOUS_LEAVE:
+ if (atomic_read(&kdump_in_progress))
+ unw_init_running(kdump_cpu_freeze, NULL);
+ break;
+ case DIE_MCA_MONARCH_LEAVE:
+ /* die_register->signr indicate if MCA is recoverable */
+ if (!args->signr)
+ machine_kdump_on_init();
+ break;
}
return NOTIFY_DONE;
}
#ifdef CONFIG_SYSCTL
-static ctl_table kdump_ctl_table[] = {
+static ctl_table kdump_on_init_table[] = {
{
.ctl_name = CTL_UNNUMBERED,
.procname = "kdump_on_init",
@@ -215,14 +207,6 @@ static ctl_table kdump_ctl_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
- {
- .ctl_name = CTL_UNNUMBERED,
- .procname = "kdump_on_fatal_mca",
- .data = &kdump_on_fatal_mca,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
{ .ctl_name = 0 }
};
@@ -231,7 +215,7 @@ static ctl_table sys_table[] = {
.ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
- .child = kdump_ctl_table,
+ .child = kdump_on_init_table,
},
{ .ctl_name = 0 }
};
diff --git a/trunk/arch/ia64/kernel/efi.c b/trunk/arch/ia64/kernel/efi.c
index d45f215bc8fc..728d7247a1a6 100644
--- a/trunk/arch/ia64/kernel/efi.c
+++ b/trunk/arch/ia64/kernel/efi.c
@@ -37,7 +37,6 @@
#include
#include
#include
-#include
#define EFI_DEBUG 0
@@ -404,41 +403,6 @@ efi_get_pal_addr (void)
return NULL;
}
-
-static u8 __init palo_checksum(u8 *buffer, u32 length)
-{
- u8 sum = 0;
- u8 *end = buffer + length;
-
- while (buffer < end)
- sum = (u8) (sum + *(buffer++));
-
- return sum;
-}
-
-/*
- * Parse and handle PALO table which is published at:
- * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
- */
-static void __init handle_palo(unsigned long palo_phys)
-{
- struct palo_table *palo = __va(palo_phys);
- u8 checksum;
-
- if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) {
- printk(KERN_INFO "PALO signature incorrect.\n");
- return;
- }
-
- checksum = palo_checksum((u8 *)palo, palo->length);
- if (checksum) {
- printk(KERN_INFO "PALO checksum incorrect.\n");
- return;
- }
-
- setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO);
-}
-
void
efi_map_pal_code (void)
{
@@ -468,7 +432,6 @@ efi_init (void)
u64 efi_desc_size;
char *cp, vendor[100] = "unknown";
int i;
- unsigned long palo_phys;
/*
* It's too early to be able to use the standard kernel command line
@@ -533,8 +496,6 @@ efi_init (void)
efi.hcdp = EFI_INVALID_TABLE_ADDR;
efi.uga = EFI_INVALID_TABLE_ADDR;
- palo_phys = EFI_INVALID_TABLE_ADDR;
-
for (i = 0; i < (int) efi.systab->nr_tables; i++) {
if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
efi.mps = config_tables[i].table;
@@ -554,17 +515,10 @@ efi_init (void)
} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
efi.hcdp = config_tables[i].table;
printk(" HCDP=0x%lx", config_tables[i].table);
- } else if (efi_guidcmp(config_tables[i].guid,
- PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID) == 0) {
- palo_phys = config_tables[i].table;
- printk(" PALO=0x%lx", config_tables[i].table);
}
}
printk("\n");
- if (palo_phys != EFI_INVALID_TABLE_ADDR)
- handle_palo(palo_phys);
-
runtime = __va(efi.systab->runtime);
efi.get_time = phys_get_time;
efi.set_time = phys_set_time;
diff --git a/trunk/arch/ia64/kernel/entry.S b/trunk/arch/ia64/kernel/entry.S
index b0be4a280174..3c331c464b40 100644
--- a/trunk/arch/ia64/kernel/entry.S
+++ b/trunk/arch/ia64/kernel/entry.S
@@ -710,16 +710,6 @@ ENTRY(ia64_leave_syscall)
(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
#endif
.work_processed_syscall:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- adds r2=PT(LOADRS)+16,r12
-(pUStk) mov.m r22=ar.itc // fetch time at leave
- adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
-(p6) ld4 r31=[r18] // load current_thread_info()->flags
- ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
- adds r3=PT(AR_BSPSTORE)+16,r12 // deferred
- ;;
-#else
adds r2=PT(LOADRS)+16,r12
adds r3=PT(AR_BSPSTORE)+16,r12
adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -728,7 +718,6 @@ ENTRY(ia64_leave_syscall)
ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
nop.i 0
;;
-#endif
mov r16=ar.bsp // M2 get existing backing store pointer
ld8 r18=[r2],PT(R9)-PT(B6) // load b6
(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
@@ -748,21 +737,12 @@ ENTRY(ia64_leave_syscall)
ld8 r29=[r2],16 // M0|1 load cr.ipsr
ld8 r28=[r3],16 // M0|1 load cr.iip
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
- ;;
- ld8 r30=[r2],16 // M0|1 load cr.ifs
- ld8 r25=[r3],16 // M0|1 load ar.unat
-(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
- ;;
-#else
mov r22=r0 // A clear r22
;;
ld8 r30=[r2],16 // M0|1 load cr.ifs
ld8 r25=[r3],16 // M0|1 load ar.unat
(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
;;
-#endif
ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
nop 0
@@ -779,11 +759,7 @@ ENTRY(ia64_leave_syscall)
ld8.fill r1=[r3],16 // M0|1 load r1
(pUStk) mov r17=1 // A
;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-(pUStk) st1 [r15]=r17 // M2|3
-#else
(pUStk) st1 [r14]=r17 // M2|3
-#endif
ld8.fill r13=[r3],16 // M0|1
mov f8=f0 // F clear f8
;;
@@ -799,22 +775,12 @@ ENTRY(ia64_leave_syscall)
shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
cover // B add current frame into dirty partition & set cr.ifs
;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- mov r19=ar.bsp // M2 get new backing store pointer
- st8 [r14]=r22 // M save time at leave
- mov f10=f0 // F clear f10
-
- mov r22=r0 // A clear r22
- movl r14=__kernel_syscall_via_epc // X
- ;;
-#else
mov r19=ar.bsp // M2 get new backing store pointer
mov f10=f0 // F clear f10
nop.m 0
movl r14=__kernel_syscall_via_epc // X
;;
-#endif
mov.m ar.csd=r0 // M2 clear ar.csd
mov.m ar.ccv=r0 // M2 clear ar.ccv
mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc)
@@ -947,18 +913,10 @@ GLOBAL_ENTRY(ia64_leave_kernel)
adds r16=PT(CR_IPSR)+16,r12
adds r17=PT(CR_IIP)+16,r12
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- .pred.rel.mutex pUStk,pKStk
-(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
-(pUStk) mov.m r22=ar.itc // M fetch time at leave
- nop.i 0
- ;;
-#else
(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
nop.i 0
nop.i 0
;;
-#endif
ld8 r29=[r16],16 // load cr.ipsr
ld8 r28=[r17],16 // load cr.iip
;;
@@ -980,37 +938,15 @@ GLOBAL_ENTRY(ia64_leave_kernel)
;;
ld8.fill r12=[r16],16
ld8.fill r13=[r17],16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
-#else
(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
-#endif
;;
ld8 r20=[r16],16 // ar.fpsr
ld8.fill r15=[r17],16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred
-#endif
;;
ld8.fill r14=[r16],16
ld8.fill r2=[r17]
(pUStk) mov r17=1
;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;;
- // mib : mov add br -> mib : ld8 add br
- // bbb_ : br nop cover;; mbb_ : mov br cover;;
- //
- // no one require bsp in r16 if (pKStk) branch is selected.
-(pUStk) st8 [r3]=r22 // save time at leave
-(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack
- shr.u r18=r19,16 // get byte size of existing "dirty" partition
- ;;
- ld8.fill r3=[r16] // deferred
- LOAD_PHYS_STACK_REG_SIZE(r17)
-(pKStk) br.cond.dpnt skip_rbs_switch
- mov r16=ar.bsp // get existing backing store pointer
-#else
ld8.fill r3=[r16]
(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack
shr.u r18=r19,16 // get byte size of existing "dirty" partition
@@ -1018,7 +954,6 @@ GLOBAL_ENTRY(ia64_leave_kernel)
mov r16=ar.bsp // get existing backing store pointer
LOAD_PHYS_STACK_REG_SIZE(r17)
(pKStk) br.cond.dpnt skip_rbs_switch
-#endif
/*
* Restore user backing store.
diff --git a/trunk/arch/ia64/kernel/fsys.S b/trunk/arch/ia64/kernel/fsys.S
index c1625c7e1779..44841971f077 100644
--- a/trunk/arch/ia64/kernel/fsys.S
+++ b/trunk/arch/ia64/kernel/fsys.S
@@ -61,29 +61,13 @@ ENTRY(fsys_getpid)
.prologue
.altrp b6
.body
- add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
- ;;
- ld8 r17=[r17] // r17 = current->group_leader
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
;;
ld4 r9=[r9]
- add r17=IA64_TASK_TGIDLINK_OFFSET,r17
+ add r8=IA64_TASK_TGID_OFFSET,r16
;;
and r9=TIF_ALLWORK_MASK,r9
- ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid
- ;;
- add r8=IA64_PID_LEVEL_OFFSET,r17
- ;;
- ld4 r8=[r8] // r8 = pid->level
- add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
- ;;
- shl r8=r8,IA64_UPID_SHIFT
- ;;
- add r17=r17,r8 // r17 = &pid->numbers[pid->level]
- ;;
- ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
- ;;
- mov r17=0
+ ld4 r8=[r8] // r8 = current->tgid
;;
cmp.ne p8,p0=0,r9
(p8) br.spnt.many fsys_fallback_syscall
@@ -142,25 +126,15 @@ ENTRY(fsys_set_tid_address)
.altrp b6
.body
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
- add r17=IA64_TASK_TGIDLINK_OFFSET,r16
;;
ld4 r9=[r9]
tnat.z p6,p7=r32 // check argument register for being NaT
- ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid
;;
and r9=TIF_ALLWORK_MASK,r9
- add r8=IA64_PID_LEVEL_OFFSET,r17
+ add r8=IA64_TASK_PID_OFFSET,r16
add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
;;
- ld4 r8=[r8] // r8 = pid->level
- add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
- ;;
- shl r8=r8,IA64_UPID_SHIFT
- ;;
- add r17=r17,r8 // r17 = &pid->numbers[pid->level]
- ;;
- ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
- ;;
+ ld4 r8=[r8]
cmp.ne p8,p0=0,r9
mov r17=-1
;;
@@ -236,25 +210,27 @@ ENTRY(fsys_gettimeofday)
// Note that instructions are optimized for McKinley. McKinley can
// process two bundles simultaneously and therefore we continuously
// try to feed the CPU two bundles and then a stop.
-
+ //
+ // Additional note that code has changed a lot. Optimization is TBD.
+ // Comments begin with "?" are maybe outdated.
+ tnat.nz p6,p0 = r31 // ? branch deferred to fit later bundle
+ mov pr = r30,0xc000 // Set predicates according to function
add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
- tnat.nz p6,p0 = r31 // guard against Nat argument
-(p6) br.cond.spnt.few .fail_einval
movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
;;
- ld4 r2 = [r2] // process work pending flags
movl r29 = itc_jitter_data // itc_jitter
add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time
- add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
- mov pr = r30,0xc000 // Set predicates according to function
+ ld4 r2 = [r2] // process work pending flags
;;
- and r2 = TIF_ALLWORK_MASK,r2
- add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
+ add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
+ add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+ and r2 = TIF_ALLWORK_MASK,r2
+(p6) br.cond.spnt.few .fail_einval // ? deferred branch
;;
- add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
+ add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
-(p6) br.cond.spnt.many fsys_fallback_syscall
+(p6) br.cond.spnt.many fsys_fallback_syscall
;;
// Begin critical section
.time_redo:
@@ -282,6 +258,7 @@ ENTRY(fsys_gettimeofday)
(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues..
(p13) ld8 r25 = [r19] // get itc_lastcycle value
+ ;; // ? could be removed by moving the last add upward
ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec
;;
ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec
@@ -308,12 +285,13 @@ ENTRY(fsys_gettimeofday)
EX(.fail_efault, probe.w.fault r31, 3)
xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
;;
+ // ? simulate tbit.nz.or p7,p0 = r28,0
getf.sig r2 = f8
mf
;;
ld4 r10 = [r20] // gtod_lock.sequence
shr.u r2 = r2,r23 // shift by factor
- ;;
+ ;; // ? overloaded 3 bundles!
add r8 = r8,r2 // Add xtime.nsecs
cmp4.ne p7,p0 = r28,r10
(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo
@@ -341,9 +319,9 @@ EX(.fail_efault, probe.w.fault r31, 3)
EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it
;;
+ mov r8 = r0
(p14) getf.sig r2 = f8
;;
- mov r8 = r0
(p14) shr.u r21 = r2, 4
;;
EX(.fail_efault, st8 [r31] = r9)
@@ -682,11 +660,7 @@ GLOBAL_ENTRY(fsys_bubble_down)
nop.i 0
;;
mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- mov.m r30=ar.itc // M get cycle for accounting
-#else
nop.m 0
-#endif
nop.i 0
;;
mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore
@@ -708,28 +682,6 @@ GLOBAL_ENTRY(fsys_bubble_down)
cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
br.call.sptk.many b7=ia64_syscall_setup // B
;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- // mov.m r30=ar.itc is called in advance
- add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
- add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
- ;;
- ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel
- ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel
- ;;
- ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime
- ld8 r21=[r17] // cumulated utime
- sub r22=r19,r18 // stime before leave kernel
- ;;
- st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp
- sub r18=r30,r19 // elapsed time in user mode
- ;;
- add r20=r20,r22 // sum stime
- add r21=r21,r18 // sum utime
- ;;
- st8 [r16]=r20 // update stime
- st8 [r17]=r21 // update utime
- ;;
-#endif
mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
mov rp=r14 // I0 set the real return addr
and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A
diff --git a/trunk/arch/ia64/kernel/head.S b/trunk/arch/ia64/kernel/head.S
index ddeab4e36fd5..d3a41d5f8d12 100644
--- a/trunk/arch/ia64/kernel/head.S
+++ b/trunk/arch/ia64/kernel/head.S
@@ -1002,26 +1002,6 @@ GLOBAL_ENTRY(sched_clock)
br.ret.sptk.many rp
END(sched_clock)
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-GLOBAL_ENTRY(cycle_to_cputime)
- alloc r16=ar.pfs,1,0,0,0
- addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
- ;;
- ldf8 f8=[r8]
- ;;
- setf.sig f9=r32
- ;;
- xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc)
- xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product
- ;;
- getf.sig r8=f10 // (5 cyc)
- getf.sig r9=f11
- ;;
- shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
- br.ret.sptk.many rp
-END(cycle_to_cputime)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
-
GLOBAL_ENTRY(start_kernel_thread)
.prologue
.save rp, r0 // this is the end of the call-chain
diff --git a/trunk/arch/ia64/kernel/ia64_ksyms.c b/trunk/arch/ia64/kernel/ia64_ksyms.c
index 6da1f20d7372..8e7193d55528 100644
--- a/trunk/arch/ia64/kernel/ia64_ksyms.c
+++ b/trunk/arch/ia64/kernel/ia64_ksyms.c
@@ -19,6 +19,12 @@ EXPORT_SYMBOL_GPL(empty_zero_page);
EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */
EXPORT_SYMBOL(csum_ipv6_magic);
+#include
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_interruptible);
+EXPORT_SYMBOL(__down_trylock);
+EXPORT_SYMBOL(__up);
+
#include
EXPORT_SYMBOL(clear_page);
diff --git a/trunk/arch/ia64/kernel/irq_ia64.c b/trunk/arch/ia64/kernel/irq_ia64.c
index 5538471e8d68..d8be23fbe6bc 100644
--- a/trunk/arch/ia64/kernel/irq_ia64.c
+++ b/trunk/arch/ia64/kernel/irq_ia64.c
@@ -472,7 +472,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
static unsigned char count;
static long last_time;
- if (time_after(jiffies, last_time + 5 * HZ))
+ if (jiffies - last_time > 5*HZ)
count = 0;
if (++count < 5) {
last_time = jiffies;
diff --git a/trunk/arch/ia64/kernel/ivt.S b/trunk/arch/ia64/kernel/ivt.S
index 6678c49daba3..34f44d8be00d 100644
--- a/trunk/arch/ia64/kernel/ivt.S
+++ b/trunk/arch/ia64/kernel/ivt.S
@@ -805,13 +805,8 @@ ENTRY(break_fault)
(p8) adds r28=16,r28 // A switch cr.iip to next bundle
(p9) adds r8=1,r8 // A increment ei to next slot
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- ;;
- mov b6=r30 // I0 setup syscall handler branch reg early
-#else
nop.i 0
;;
-#endif
mov.m r25=ar.unat // M2 (5 cyc)
dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr
@@ -822,11 +817,7 @@ ENTRY(break_fault)
//
///////////////////////////////////////////////////////////////////////
st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- mov.m r30=ar.itc // M get cycle for accounting
-#else
mov b6=r30 // I0 setup syscall handler branch reg early
-#endif
cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already?
and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit
@@ -838,30 +829,6 @@ ENTRY(break_fault)
cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited?
br.call.sptk.many b7=ia64_syscall_setup // B
1:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- // mov.m r30=ar.itc is called in advance, and r13 is current
- add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A
- add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A
-(pKStk) br.cond.spnt .skip_accounting // B unlikely skip
- ;;
- ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // M get last stamp
- ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // M time at leave
- ;;
- ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // M cumulated stime
- ld8 r21=[r17] // M cumulated utime
- sub r22=r19,r18 // A stime before leave
- ;;
- st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // M update stamp
- sub r18=r30,r19 // A elapsed time in user
- ;;
- add r20=r20,r22 // A sum stime
- add r21=r21,r18 // A sum utime
- ;;
- st8 [r16]=r20 // M update stime
- st8 [r17]=r21 // M update utime
- ;;
-.skip_accounting:
-#endif
mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
nop 0
bsw.1 // B (6 cyc) regs are saved, switch to bank 1
@@ -961,7 +928,6 @@ END(interrupt)
* - r27: saved ar.rsc
* - r28: saved cr.iip
* - r29: saved cr.ipsr
- * - r30: ar.itc for accounting (don't touch)
* - r31: saved pr
* - b0: original contents (to be saved)
* On exit:
@@ -1124,41 +1090,6 @@ END(dispatch_illegal_op_fault)
DBG_FAULT(16)
FAULT(16)
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- /*
- * There is no particular reason for this code to be here, other than
- * that there happens to be space here that would go unused otherwise.
- * If this fault ever gets "unreserved", simply moved the following
- * code to a more suitable spot...
- *
- * account_sys_enter is called from SAVE_MIN* macros if accounting is
- * enabled and if the macro is entered from user mode.
- */
-ENTRY(account_sys_enter)
- // mov.m r20=ar.itc is called in advance, and r13 is current
- add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13
- add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13
- ;;
- ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel
- ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at left from kernel
- ;;
- ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime
- ld8 r21=[r17] // cumulated utime
- sub r22=r19,r18 // stime before leave kernel
- ;;
- st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP // update stamp
- sub r18=r20,r19 // elapsed time in user mode
- ;;
- add r23=r23,r22 // sum stime
- add r21=r21,r18 // sum utime
- ;;
- st8 [r16]=r23 // update stime
- st8 [r17]=r21 // update utime
- ;;
- br.ret.sptk.many rp
-END(account_sys_enter)
-#endif
-
.org ia64_ivt+0x4400
/////////////////////////////////////////////////////////////////////////////////////////
// 0x4400 Entry 17 (size 64 bundles) Reserved
diff --git a/trunk/arch/ia64/kernel/kprobes.c b/trunk/arch/ia64/kernel/kprobes.c
index 233434f4f88f..8d9a446a0d17 100644
--- a/trunk/arch/ia64/kernel/kprobes.c
+++ b/trunk/arch/ia64/kernel/kprobes.c
@@ -78,20 +78,6 @@ static enum instruction_type bundle_encoding[32][3] = {
{ u, u, u }, /* 1F */
};
-/* Insert a long branch code */
-static void __kprobes set_brl_inst(void *from, void *to)
-{
- s64 rel = ((s64) to - (s64) from) >> 4;
- bundle_t *brl;
- brl = (bundle_t *) ((u64) from & ~0xf);
- brl->quad0.template = 0x05; /* [MLX](stop) */
- brl->quad0.slot0 = NOP_M_INST; /* nop.m 0x0 */
- brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2;
- brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46);
- /* brl.cond.sptk.many.clr rel<<4 (qp=0) */
- brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff);
-}
-
/*
* In this function we check to see if the instruction
* is IP relative instruction and update the kprobe
@@ -510,77 +496,6 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
}
-/* Check the instruction in the slot is break */
-static int __kprobes __is_ia64_break_inst(bundle_t *bundle, uint slot)
-{
- unsigned int major_opcode;
- unsigned int template = bundle->quad0.template;
- unsigned long kprobe_inst;
-
- /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
- if (slot == 1 && bundle_encoding[template][1] == L)
- slot++;
-
- /* Get Kprobe probe instruction at given slot*/
- get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
-
- /* For break instruction,
- * Bits 37:40 Major opcode to be zero
- * Bits 27:32 X6 to be zero
- * Bits 32:35 X3 to be zero
- */
- if (major_opcode || ((kprobe_inst >> 27) & 0x1FF)) {
- /* Not a break instruction */
- return 0;
- }
-
- /* Is a break instruction */
- return 1;
-}
-
-/*
- * In this function, we check whether the target bundle modifies IP or
- * it triggers an exception. If so, it cannot be boostable.
- */
-static int __kprobes can_boost(bundle_t *bundle, uint slot,
- unsigned long bundle_addr)
-{
- unsigned int template = bundle->quad0.template;
-
- do {
- if (search_exception_tables(bundle_addr + slot) ||
- __is_ia64_break_inst(bundle, slot))
- return 0; /* exception may occur in this bundle*/
- } while ((++slot) < 3);
- template &= 0x1e;
- if (template >= 0x10 /* including B unit */ ||
- template == 0x04 /* including X unit */ ||
- template == 0x06) /* undefined */
- return 0;
-
- return 1;
-}
-
-/* Prepare long jump bundle and disables other boosters if need */
-static void __kprobes prepare_booster(struct kprobe *p)
-{
- unsigned long addr = (unsigned long)p->addr & ~0xFULL;
- unsigned int slot = (unsigned long)p->addr & 0xf;
- struct kprobe *other_kp;
-
- if (can_boost(&p->ainsn.insn[0].bundle, slot, addr)) {
- set_brl_inst(&p->ainsn.insn[1].bundle, (bundle_t *)addr + 1);
- p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE;
- }
-
- /* disables boosters in previous slots */
- for (; addr < (unsigned long)p->addr; addr++) {
- other_kp = get_kprobe((void *)addr);
- if (other_kp)
- other_kp->ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE;
- }
-}
-
int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
unsigned long addr = (unsigned long) p->addr;
@@ -615,8 +530,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp);
- prepare_booster(p);
-
return 0;
}
@@ -630,9 +543,7 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
src = &p->opcode.bundle;
flush_icache_range((unsigned long)p->ainsn.insn,
- (unsigned long)p->ainsn.insn +
- sizeof(kprobe_opcode_t) * MAX_INSN_SIZE);
-
+ (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
switch (p->ainsn.slot) {
case 0:
dest->quad0.slot0 = src->quad0.slot0;
@@ -673,13 +584,13 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
mutex_lock(&kprobe_mutex);
- free_insn_slot(p->ainsn.insn, p->ainsn.inst_flag & INST_FLAG_BOOSTABLE);
+ free_insn_slot(p->ainsn.insn, 0);
mutex_unlock(&kprobe_mutex);
}
/*
* We are resuming execution after a single step fault, so the pt_regs
* structure reflects the register state after we executed the instruction
- * located in the kprobe (p->ainsn.insn->bundle). We still need to adjust
+ * located in the kprobe (p->ainsn.insn.bundle). We still need to adjust
* the ip to point back to the original stack address. To set the IP address
* to original stack address, handle the case where we need to fixup the
* relative IP address and/or fixup branch register.
@@ -696,7 +607,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
if (slot == 1 && bundle_encoding[template][1] == L)
slot = 2;
- if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) {
+ if (p->ainsn.inst_flag) {
if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
/* Fix relative IP address */
@@ -775,12 +686,33 @@ static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
{
unsigned int slot = ia64_psr(regs)->ri;
+ unsigned int template, major_opcode;
+ unsigned long kprobe_inst;
unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip;
bundle_t bundle;
memcpy(&bundle, kprobe_addr, sizeof(bundle_t));
+ template = bundle.quad0.template;
+
+ /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
+ if (slot == 1 && bundle_encoding[template][1] == L)
+ slot++;
- return __is_ia64_break_inst(&bundle, slot);
+ /* Get Kprobe probe instruction at given slot*/
+ get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode);
+
+ /* For break instruction,
+ * Bits 37:40 Major opcode to be zero
+ * Bits 27:32 X6 to be zero
+ * Bits 32:35 X3 to be zero
+ */
+ if (major_opcode || ((kprobe_inst >> 27) & 0x1FF) ) {
+ /* Not a break instruction */
+ return 0;
+ }
+
+ /* Is a break instruction */
+ return 1;
}
static int __kprobes pre_kprobes_handler(struct die_args *args)
@@ -870,19 +802,6 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
return 1;
ss_probe:
-#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
- if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) {
- /* Boost up -- we can execute copied instructions directly */
- ia64_psr(regs)->ri = p->ainsn.slot;
- regs->cr_iip = (unsigned long)&p->ainsn.insn->bundle & ~0xFULL;
- /* turn single stepping off */
- ia64_psr(regs)->ss = 0;
-
- reset_current_kprobe();
- preempt_enable_no_resched();
- return 1;
- }
-#endif
prepare_ss(p, regs);
kcb->kprobe_status = KPROBE_HIT_SS;
return 1;
diff --git a/trunk/arch/ia64/kernel/mca.c b/trunk/arch/ia64/kernel/mca.c
index e51bced3b0fa..6c18221dba36 100644
--- a/trunk/arch/ia64/kernel/mca.c
+++ b/trunk/arch/ia64/kernel/mca.c
@@ -69,7 +69,6 @@
* 2007-04-27 Russ Anderson
* Support multiple cpus going through OS_MCA in the same event.
*/
-#include
#include
#include
#include
@@ -98,7 +97,6 @@
#include
#include
-#include
#include "mca_drv.h"
#include "entry.h"
@@ -114,7 +112,6 @@ DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */
DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */
-DEFINE_PER_CPU(u64, ia64_mca_tr_reload); /* Flag for TR reload */
unsigned long __per_cpu_mca[NR_CPUS];
@@ -296,8 +293,7 @@ static void ia64_mlogbuf_dump_from_init(void)
if (mlogbuf_finished)
return;
- if (mlogbuf_timestamp &&
- time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) {
+ if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) {
printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT "
" and the system seems to be messed up.\n");
ia64_mlogbuf_finish(0);
@@ -1186,49 +1182,6 @@ ia64_wait_for_slaves(int monarch, const char *type)
return;
}
-/* mca_insert_tr
- *
- * Switch rid when TR reload and needed!
- * iord: 1: itr, 2: itr;
- *
-*/
-static void mca_insert_tr(u64 iord)
-{
-
- int i;
- u64 old_rr;
- struct ia64_tr_entry *p;
- unsigned long psr;
- int cpu = smp_processor_id();
-
- psr = ia64_clear_ic();
- for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) {
- p = &__per_cpu_idtrs[cpu][iord-1][i];
- if (p->pte & 0x1) {
- old_rr = ia64_get_rr(p->ifa);
- if (old_rr != p->rr) {
- ia64_set_rr(p->ifa, p->rr);
- ia64_srlz_d();
- }
- ia64_ptr(iord, p->ifa, p->itir >> 2);
- ia64_srlz_i();
- if (iord & 0x1) {
- ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2);
- ia64_srlz_i();
- }
- if (iord & 0x2) {
- ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2);
- ia64_srlz_i();
- }
- if (old_rr != p->rr) {
- ia64_set_rr(p->ifa, old_rr);
- ia64_srlz_d();
- }
- }
- }
- ia64_set_psr(psr);
-}
-
/*
* ia64_mca_handler
*
@@ -1313,17 +1266,16 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
} else {
/* Dump buffered message to console */
ia64_mlogbuf_finish(1);
+#ifdef CONFIG_KEXEC
+ atomic_set(&kdump_in_progress, 1);
+ monarch_cpu = -1;
+#endif
}
-
- if (__get_cpu_var(ia64_mca_tr_reload)) {
- mca_insert_tr(0x1); /*Reload dynamic itrs*/
- mca_insert_tr(0x2); /*Reload dynamic itrs*/
- }
-
if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
== NOTIFY_STOP)
ia64_mca_spin(__func__);
+
if (atomic_dec_return(&mca_count) > 0) {
int i;
diff --git a/trunk/arch/ia64/kernel/mca_asm.S b/trunk/arch/ia64/kernel/mca_asm.S
index a06d46548ff9..8bc7d259e0c6 100644
--- a/trunk/arch/ia64/kernel/mca_asm.S
+++ b/trunk/arch/ia64/kernel/mca_asm.S
@@ -219,13 +219,8 @@ ia64_reload_tr:
mov r20=IA64_TR_CURRENT_STACK
;;
itr.d dtr[r20]=r16
- GET_THIS_PADDR(r2, ia64_mca_tr_reload)
- mov r18 = 1
;;
srlz.d
- ;;
- st8 [r2] =r18
- ;;
done_tlb_purge_and_reload:
diff --git a/trunk/arch/ia64/kernel/minstate.h b/trunk/arch/ia64/kernel/minstate.h
index 7c548ac52bbc..c9ac8bada786 100644
--- a/trunk/arch/ia64/kernel/minstate.h
+++ b/trunk/arch/ia64/kernel/minstate.h
@@ -3,18 +3,6 @@
#include "entry.h"
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-/* read ar.itc in advance, and use it before leaving bank 0 */
-#define ACCOUNT_GET_STAMP \
-(pUStk) mov.m r20=ar.itc;
-#define ACCOUNT_SYS_ENTER \
-(pUStk) br.call.spnt rp=account_sys_enter \
- ;;
-#else
-#define ACCOUNT_GET_STAMP
-#define ACCOUNT_SYS_ENTER
-#endif
-
/*
* DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
* the minimum state necessary that allows us to turn psr.ic back
@@ -134,13 +122,11 @@
;; \
.mem.offset 0,0; st8.spill [r16]=r2,16; \
.mem.offset 8,0; st8.spill [r17]=r3,16; \
- ACCOUNT_GET_STAMP \
adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
;; \
EXTRA; \
movl r1=__gp; /* establish kernel global pointer */ \
;; \
- ACCOUNT_SYS_ENTER \
bsw.1; /* switch back to bank 1 (must be last in insn group) */ \
;;
diff --git a/trunk/arch/ia64/kernel/numa.c b/trunk/arch/ia64/kernel/numa.c
index c93420c97409..a78b45f5fe2f 100644
--- a/trunk/arch/ia64/kernel/numa.c
+++ b/trunk/arch/ia64/kernel/numa.c
@@ -73,7 +73,7 @@ void __init build_cpu_to_node_map(void)
for(node=0; node < MAX_NUMNODES; node++)
cpus_clear(node_to_cpu_mask[node]);
- for_each_possible_early_cpu(cpu) {
+ for(cpu = 0; cpu < NR_CPUS; ++cpu) {
node = -1;
for (i = 0; i < NR_CPUS; ++i)
if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
diff --git a/trunk/arch/ia64/kernel/patch.c b/trunk/arch/ia64/kernel/patch.c
index e0dca8743dbb..2cb9425e0421 100644
--- a/trunk/arch/ia64/kernel/patch.c
+++ b/trunk/arch/ia64/kernel/patch.c
@@ -135,10 +135,10 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
while (offp < (s32 *) end) {
wp = (u64 *) ia64_imva((char *) offp + *offp);
- wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
- wp[1] = 0x0084006880000200UL;
- wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
- wp[3] = 0x0004000000000200UL;
+ wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
+ wp[1] = 0x0004000000000200UL;
+ wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
+ wp[3] = 0x0084006880000200UL;
ia64_fc(wp); ia64_fc(wp + 2);
++offp;
}
diff --git a/trunk/arch/ia64/kernel/perfmon.c b/trunk/arch/ia64/kernel/perfmon.c
index d1d24f4598da..a2aabfdc80d9 100644
--- a/trunk/arch/ia64/kernel/perfmon.c
+++ b/trunk/arch/ia64/kernel/perfmon.c
@@ -4204,10 +4204,10 @@ pfm_check_task_exist(pfm_context_t *ctx)
do_each_thread (g, t) {
if (t->thread.pfm_context == ctx) {
ret = 0;
- goto out;
+ break;
}
} while_each_thread (g, t);
-out:
+
read_unlock(&tasklist_lock);
DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));
diff --git a/trunk/arch/ia64/kernel/process.c b/trunk/arch/ia64/kernel/process.c
index a5ea817cbcbf..49937a383b23 100644
--- a/trunk/arch/ia64/kernel/process.c
+++ b/trunk/arch/ia64/kernel/process.c
@@ -625,12 +625,42 @@ do_dump_fpu (struct unw_frame_info *info, void *arg)
do_dump_task_fpu(current, info, arg);
}
+int
+dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
+{
+ struct unw_frame_info tcore_info;
+
+ if (current == task) {
+ unw_init_running(do_copy_regs, regs);
+ } else {
+ memset(&tcore_info, 0, sizeof(tcore_info));
+ unw_init_from_blocked_task(&tcore_info, task);
+ do_copy_task_regs(task, &tcore_info, regs);
+ }
+ return 1;
+}
+
void
ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
{
unw_init_running(do_copy_regs, dst);
}
+int
+dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
+{
+ struct unw_frame_info tcore_info;
+
+ if (current == task) {
+ unw_init_running(do_dump_fpu, dst);
+ } else {
+ memset(&tcore_info, 0, sizeof(tcore_info));
+ unw_init_from_blocked_task(&tcore_info, task);
+ do_dump_task_fpu(task, &tcore_info, dst);
+ }
+ return 1;
+}
+
int
dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
{
diff --git a/trunk/arch/ia64/kernel/ptrace.c b/trunk/arch/ia64/kernel/ptrace.c
index 2a9943b5947f..ab784ec4319d 100644
--- a/trunk/arch/ia64/kernel/ptrace.c
+++ b/trunk/arch/ia64/kernel/ptrace.c
@@ -3,9 +3,6 @@
*
* Copyright (C) 1999-2005 Hewlett-Packard Co
* David Mosberger-Tang
- * Copyright (C) 2006 Intel Co
- * 2006-08-12 - IA64 Native Utrace implementation support added by
- * Anil S Keshavamurthy
*
* Derived from the x86 and Alpha versions.
*/
@@ -20,8 +17,6 @@
#include
#include
#include
-#include
-#include
#include
#include
@@ -745,6 +740,25 @@ ia64_sync_fph (struct task_struct *task)
psr->dfh = 1;
}
+static int
+access_fr (struct unw_frame_info *info, int regnum, int hi,
+ unsigned long *data, int write_access)
+{
+ struct ia64_fpreg fpval;
+ int ret;
+
+ ret = unw_get_fr(info, regnum, &fpval);
+ if (ret < 0)
+ return ret;
+
+ if (write_access) {
+ fpval.u.bits[hi] = *data;
+ ret = unw_set_fr(info, regnum, fpval);
+ } else
+ *data = fpval.u.bits[hi];
+ return ret;
+}
+
/*
* Change the machine-state of CHILD such that it will return via the normal
* kernel exit-path, rather than the syscall-exit path.
@@ -846,7 +860,309 @@ access_nat_bits (struct task_struct *child, struct pt_regs *pt,
static int
access_uarea (struct task_struct *child, unsigned long addr,
- unsigned long *data, int write_access);
+ unsigned long *data, int write_access)
+{
+ unsigned long *ptr, regnum, urbs_end, cfm;
+ struct switch_stack *sw;
+ struct pt_regs *pt;
+# define pt_reg_addr(pt, reg) ((void *) \
+ ((unsigned long) (pt) \
+ + offsetof(struct pt_regs, reg)))
+
+
+ pt = task_pt_regs(child);
+ sw = (struct switch_stack *) (child->thread.ksp + 16);
+
+ if ((addr & 0x7) != 0) {
+ dprintk("ptrace: unaligned register address 0x%lx\n", addr);
+ return -1;
+ }
+
+ if (addr < PT_F127 + 16) {
+ /* accessing fph */
+ if (write_access)
+ ia64_sync_fph(child);
+ else
+ ia64_flush_fph(child);
+ ptr = (unsigned long *)
+ ((unsigned long) &child->thread.fph + addr);
+ } else if ((addr >= PT_F10) && (addr < PT_F11 + 16)) {
+ /* scratch registers untouched by kernel (saved in pt_regs) */
+ ptr = pt_reg_addr(pt, f10) + (addr - PT_F10);
+ } else if (addr >= PT_F12 && addr < PT_F15 + 16) {
+ /*
+ * Scratch registers untouched by kernel (saved in
+ * switch_stack).
+ */
+ ptr = (unsigned long *) ((long) sw
+ + (addr - PT_NAT_BITS - 32));
+ } else if (addr < PT_AR_LC + 8) {
+ /* preserved state: */
+ struct unw_frame_info info;
+ char nat = 0;
+ int ret;
+
+ unw_init_from_blocked_task(&info, child);
+ if (unw_unwind_to_user(&info) < 0)
+ return -1;
+
+ switch (addr) {
+ case PT_NAT_BITS:
+ return access_nat_bits(child, pt, &info,
+ data, write_access);
+
+ case PT_R4: case PT_R5: case PT_R6: case PT_R7:
+ if (write_access) {
+ /* read NaT bit first: */
+ unsigned long dummy;
+
+ ret = unw_get_gr(&info, (addr - PT_R4)/8 + 4,
+ &dummy, &nat);
+ if (ret < 0)
+ return ret;
+ }
+ return unw_access_gr(&info, (addr - PT_R4)/8 + 4, data,
+ &nat, write_access);
+
+ case PT_B1: case PT_B2: case PT_B3:
+ case PT_B4: case PT_B5:
+ return unw_access_br(&info, (addr - PT_B1)/8 + 1, data,
+ write_access);
+
+ case PT_AR_EC:
+ return unw_access_ar(&info, UNW_AR_EC, data,
+ write_access);
+
+ case PT_AR_LC:
+ return unw_access_ar(&info, UNW_AR_LC, data,
+ write_access);
+
+ default:
+ if (addr >= PT_F2 && addr < PT_F5 + 16)
+ return access_fr(&info, (addr - PT_F2)/16 + 2,
+ (addr & 8) != 0, data,
+ write_access);
+ else if (addr >= PT_F16 && addr < PT_F31 + 16)
+ return access_fr(&info,
+ (addr - PT_F16)/16 + 16,
+ (addr & 8) != 0,
+ data, write_access);
+ else {
+ dprintk("ptrace: rejecting access to register "
+ "address 0x%lx\n", addr);
+ return -1;
+ }
+ }
+ } else if (addr < PT_F9+16) {
+ /* scratch state */
+ switch (addr) {
+ case PT_AR_BSP:
+ /*
+ * By convention, we use PT_AR_BSP to refer to
+ * the end of the user-level backing store.
+ * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
+ * to get the real value of ar.bsp at the time
+ * the kernel was entered.
+ *
+ * Furthermore, when changing the contents of
+ * PT_AR_BSP (or PT_CFM) while the task is
+ * blocked in a system call, convert the state
+ * so that the non-system-call exit
+ * path is used. This ensures that the proper
+ * state will be picked up when resuming
+ * execution. However, it *also* means that
+ * once we write PT_AR_BSP/PT_CFM, it won't be
+ * possible to modify the syscall arguments of
+ * the pending system call any longer. This
+ * shouldn't be an issue because modifying
+ * PT_AR_BSP/PT_CFM generally implies that
+ * we're either abandoning the pending system
+ * call or that we defer it's re-execution
+ * (e.g., due to GDB doing an inferior
+ * function call).
+ */
+ urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
+ if (write_access) {
+ if (*data != urbs_end) {
+ if (in_syscall(pt))
+ convert_to_non_syscall(child,
+ pt,
+ cfm);
+ /*
+ * Simulate user-level write
+ * of ar.bsp:
+ */
+ pt->loadrs = 0;
+ pt->ar_bspstore = *data;
+ }
+ } else
+ *data = urbs_end;
+ return 0;
+
+ case PT_CFM:
+ urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
+ if (write_access) {
+ if (((cfm ^ *data) & PFM_MASK) != 0) {
+ if (in_syscall(pt))
+ convert_to_non_syscall(child,
+ pt,
+ cfm);
+ pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
+ | (*data & PFM_MASK));
+ }
+ } else
+ *data = cfm;
+ return 0;
+
+ case PT_CR_IPSR:
+ if (write_access) {
+ unsigned long tmp = *data;
+ /* psr.ri==3 is a reserved value: SDM 2:25 */
+ if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
+ tmp &= ~IA64_PSR_RI;
+ pt->cr_ipsr = ((tmp & IPSR_MASK)
+ | (pt->cr_ipsr & ~IPSR_MASK));
+ } else
+ *data = (pt->cr_ipsr & IPSR_MASK);
+ return 0;
+
+ case PT_AR_RSC:
+ if (write_access)
+ pt->ar_rsc = *data | (3 << 2); /* force PL3 */
+ else
+ *data = pt->ar_rsc;
+ return 0;
+
+ case PT_AR_RNAT:
+ ptr = pt_reg_addr(pt, ar_rnat);
+ break;
+ case PT_R1:
+ ptr = pt_reg_addr(pt, r1);
+ break;
+ case PT_R2: case PT_R3:
+ ptr = pt_reg_addr(pt, r2) + (addr - PT_R2);
+ break;
+ case PT_R8: case PT_R9: case PT_R10: case PT_R11:
+ ptr = pt_reg_addr(pt, r8) + (addr - PT_R8);
+ break;
+ case PT_R12: case PT_R13:
+ ptr = pt_reg_addr(pt, r12) + (addr - PT_R12);
+ break;
+ case PT_R14:
+ ptr = pt_reg_addr(pt, r14);
+ break;
+ case PT_R15:
+ ptr = pt_reg_addr(pt, r15);
+ break;
+ case PT_R16: case PT_R17: case PT_R18: case PT_R19:
+ case PT_R20: case PT_R21: case PT_R22: case PT_R23:
+ case PT_R24: case PT_R25: case PT_R26: case PT_R27:
+ case PT_R28: case PT_R29: case PT_R30: case PT_R31:
+ ptr = pt_reg_addr(pt, r16) + (addr - PT_R16);
+ break;
+ case PT_B0:
+ ptr = pt_reg_addr(pt, b0);
+ break;
+ case PT_B6:
+ ptr = pt_reg_addr(pt, b6);
+ break;
+ case PT_B7:
+ ptr = pt_reg_addr(pt, b7);
+ break;
+ case PT_F6: case PT_F6+8: case PT_F7: case PT_F7+8:
+ case PT_F8: case PT_F8+8: case PT_F9: case PT_F9+8:
+ ptr = pt_reg_addr(pt, f6) + (addr - PT_F6);
+ break;
+ case PT_AR_BSPSTORE:
+ ptr = pt_reg_addr(pt, ar_bspstore);
+ break;
+ case PT_AR_UNAT:
+ ptr = pt_reg_addr(pt, ar_unat);
+ break;
+ case PT_AR_PFS:
+ ptr = pt_reg_addr(pt, ar_pfs);
+ break;
+ case PT_AR_CCV:
+ ptr = pt_reg_addr(pt, ar_ccv);
+ break;
+ case PT_AR_FPSR:
+ ptr = pt_reg_addr(pt, ar_fpsr);
+ break;
+ case PT_CR_IIP:
+ ptr = pt_reg_addr(pt, cr_iip);
+ break;
+ case PT_PR:
+ ptr = pt_reg_addr(pt, pr);
+ break;
+ /* scratch register */
+
+ default:
+ /* disallow accessing anything else... */
+ dprintk("ptrace: rejecting access to register "
+ "address 0x%lx\n", addr);
+ return -1;
+ }
+ } else if (addr <= PT_AR_SSD) {
+ ptr = pt_reg_addr(pt, ar_csd) + (addr - PT_AR_CSD);
+ } else {
+ /* access debug registers */
+
+ if (addr >= PT_IBR) {
+ regnum = (addr - PT_IBR) >> 3;
+ ptr = &child->thread.ibr[0];
+ } else {
+ regnum = (addr - PT_DBR) >> 3;
+ ptr = &child->thread.dbr[0];
+ }
+
+ if (regnum >= 8) {
+ dprintk("ptrace: rejecting access to register "
+ "address 0x%lx\n", addr);
+ return -1;
+ }
+#ifdef CONFIG_PERFMON
+ /*
+ * Check if debug registers are used by perfmon. This
+ * test must be done once we know that we can do the
+ * operation, i.e. the arguments are all valid, but
+ * before we start modifying the state.
+ *
+ * Perfmon needs to keep a count of how many processes
+ * are trying to modify the debug registers for system
+ * wide monitoring sessions.
+ *
+ * We also include read access here, because they may
+ * cause the PMU-installed debug register state
+ * (dbr[], ibr[]) to be reset. The two arrays are also
+ * used by perfmon, but we do not use
+ * IA64_THREAD_DBG_VALID. The registers are restored
+ * by the PMU context switch code.
+ */
+ if (pfm_use_debug_registers(child)) return -1;
+#endif
+
+ if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
+ child->thread.flags |= IA64_THREAD_DBG_VALID;
+ memset(child->thread.dbr, 0,
+ sizeof(child->thread.dbr));
+ memset(child->thread.ibr, 0,
+ sizeof(child->thread.ibr));
+ }
+
+ ptr += regnum;
+
+ if ((regnum & 1) && write_access) {
+ /* don't let the user set kernel-level breakpoints: */
+ *ptr = *data & ~(7UL << 56);
+ return 0;
+ }
+ }
+ if (write_access)
+ *ptr = *data;
+ else
+ *data = *ptr;
+ return 0;
+}
static long
ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
@@ -1310,892 +1626,3 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
if (test_thread_flag(TIF_RESTORE_RSE))
ia64_sync_krbs();
}
-
-/* Utrace implementation starts here */
-struct regset_get {
- void *kbuf;
- void __user *ubuf;
-};
-
-struct regset_set {
- const void *kbuf;
- const void __user *ubuf;
-};
-
-struct regset_getset {
- struct task_struct *target;
- const struct user_regset *regset;
- union {
- struct regset_get get;
- struct regset_set set;
- } u;
- unsigned int pos;
- unsigned int count;
- int ret;
-};
-
-static int
-access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info,
- unsigned long addr, unsigned long *data, int write_access)
-{
- struct pt_regs *pt;
- unsigned long *ptr = NULL;
- int ret;
- char nat = 0;
-
- pt = task_pt_regs(target);
- switch (addr) {
- case ELF_GR_OFFSET(1):
- ptr = &pt->r1;
- break;
- case ELF_GR_OFFSET(2):
- case ELF_GR_OFFSET(3):
- ptr = (void *)&pt->r2 + (addr - ELF_GR_OFFSET(2));
- break;
- case ELF_GR_OFFSET(4) ... ELF_GR_OFFSET(7):
- if (write_access) {
- /* read NaT bit first: */
- unsigned long dummy;
-
- ret = unw_get_gr(info, addr/8, &dummy, &nat);
- if (ret < 0)
- return ret;
- }
- return unw_access_gr(info, addr/8, data, &nat, write_access);
- case ELF_GR_OFFSET(8) ... ELF_GR_OFFSET(11):
- ptr = (void *)&pt->r8 + addr - ELF_GR_OFFSET(8);
- break;
- case ELF_GR_OFFSET(12):
- case ELF_GR_OFFSET(13):
- ptr = (void *)&pt->r12 + addr - ELF_GR_OFFSET(12);
- break;
- case ELF_GR_OFFSET(14):
- ptr = &pt->r14;
- break;
- case ELF_GR_OFFSET(15):
- ptr = &pt->r15;
- }
- if (write_access)
- *ptr = *data;
- else
- *data = *ptr;
- return 0;
-}
-
-static int
-access_elf_breg(struct task_struct *target, struct unw_frame_info *info,
- unsigned long addr, unsigned long *data, int write_access)
-{
- struct pt_regs *pt;
- unsigned long *ptr = NULL;
-
- pt = task_pt_regs(target);
- switch (addr) {
- case ELF_BR_OFFSET(0):
- ptr = &pt->b0;
- break;
- case ELF_BR_OFFSET(1) ... ELF_BR_OFFSET(5):
- return unw_access_br(info, (addr - ELF_BR_OFFSET(0))/8,
- data, write_access);
- case ELF_BR_OFFSET(6):
- ptr = &pt->b6;
- break;
- case ELF_BR_OFFSET(7):
- ptr = &pt->b7;
- }
- if (write_access)
- *ptr = *data;
- else
- *data = *ptr;
- return 0;
-}
-
-static int
-access_elf_areg(struct task_struct *target, struct unw_frame_info *info,
- unsigned long addr, unsigned long *data, int write_access)
-{
- struct pt_regs *pt;
- unsigned long cfm, urbs_end;
- unsigned long *ptr = NULL;
-
- pt = task_pt_regs(target);
- if (addr >= ELF_AR_RSC_OFFSET && addr <= ELF_AR_SSD_OFFSET) {
- switch (addr) {
- case ELF_AR_RSC_OFFSET:
- /* force PL3 */
- if (write_access)
- pt->ar_rsc = *data | (3 << 2);
- else
- *data = pt->ar_rsc;
- return 0;
- case ELF_AR_BSP_OFFSET:
- /*
- * By convention, we use PT_AR_BSP to refer to
- * the end of the user-level backing store.
- * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
- * to get the real value of ar.bsp at the time
- * the kernel was entered.
- *
- * Furthermore, when changing the contents of
- * PT_AR_BSP (or PT_CFM) while the task is
- * blocked in a system call, convert the state
- * so that the non-system-call exit
- * path is used. This ensures that the proper
- * state will be picked up when resuming
- * execution. However, it *also* means that
- * once we write PT_AR_BSP/PT_CFM, it won't be
- * possible to modify the syscall arguments of
- * the pending system call any longer. This
- * shouldn't be an issue because modifying
- * PT_AR_BSP/PT_CFM generally implies that
- * we're either abandoning the pending system
- * call or that we defer it's re-execution
- * (e.g., due to GDB doing an inferior
- * function call).
- */
- urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
- if (write_access) {
- if (*data != urbs_end) {
- if (in_syscall(pt))
- convert_to_non_syscall(target,
- pt,
- cfm);
- /*
- * Simulate user-level write
- * of ar.bsp:
- */
- pt->loadrs = 0;
- pt->ar_bspstore = *data;
- }
- } else
- *data = urbs_end;
- return 0;
- case ELF_AR_BSPSTORE_OFFSET:
- ptr = &pt->ar_bspstore;
- break;
- case ELF_AR_RNAT_OFFSET:
- ptr = &pt->ar_rnat;
- break;
- case ELF_AR_CCV_OFFSET:
- ptr = &pt->ar_ccv;
- break;
- case ELF_AR_UNAT_OFFSET:
- ptr = &pt->ar_unat;
- break;
- case ELF_AR_FPSR_OFFSET:
- ptr = &pt->ar_fpsr;
- break;
- case ELF_AR_PFS_OFFSET:
- ptr = &pt->ar_pfs;
- break;
- case ELF_AR_LC_OFFSET:
- return unw_access_ar(info, UNW_AR_LC, data,
- write_access);
- case ELF_AR_EC_OFFSET:
- return unw_access_ar(info, UNW_AR_EC, data,
- write_access);
- case ELF_AR_CSD_OFFSET:
- ptr = &pt->ar_csd;
- break;
- case ELF_AR_SSD_OFFSET:
- ptr = &pt->ar_ssd;
- }
- } else if (addr >= ELF_CR_IIP_OFFSET && addr <= ELF_CR_IPSR_OFFSET) {
- switch (addr) {
- case ELF_CR_IIP_OFFSET:
- ptr = &pt->cr_iip;
- break;
- case ELF_CFM_OFFSET:
- urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
- if (write_access) {
- if (((cfm ^ *data) & PFM_MASK) != 0) {
- if (in_syscall(pt))
- convert_to_non_syscall(target,
- pt,
- cfm);
- pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
- | (*data & PFM_MASK));
- }
- } else
- *data = cfm;
- return 0;
- case ELF_CR_IPSR_OFFSET:
- if (write_access) {
- unsigned long tmp = *data;
- /* psr.ri==3 is a reserved value: SDM 2:25 */
- if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
- tmp &= ~IA64_PSR_RI;
- pt->cr_ipsr = ((tmp & IPSR_MASK)
- | (pt->cr_ipsr & ~IPSR_MASK));
- } else
- *data = (pt->cr_ipsr & IPSR_MASK);
- return 0;
- }
- } else if (addr == ELF_NAT_OFFSET)
- return access_nat_bits(target, pt, info,
- data, write_access);
- else if (addr == ELF_PR_OFFSET)
- ptr = &pt->pr;
- else
- return -1;
-
- if (write_access)
- *ptr = *data;
- else
- *data = *ptr;
-
- return 0;
-}
-
-static int
-access_elf_reg(struct task_struct *target, struct unw_frame_info *info,
- unsigned long addr, unsigned long *data, int write_access)
-{
- if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(15))
- return access_elf_gpreg(target, info, addr, data, write_access);
- else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7))
- return access_elf_breg(target, info, addr, data, write_access);
- else
- return access_elf_areg(target, info, addr, data, write_access);
-}
-
-void do_gpregs_get(struct unw_frame_info *info, void *arg)
-{
- struct pt_regs *pt;
- struct regset_getset *dst = arg;
- elf_greg_t tmp[16];
- unsigned int i, index, min_copy;
-
- if (unw_unwind_to_user(info) < 0)
- return;
-
- /*
- * coredump format:
- * r0-r31
- * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
- * predicate registers (p0-p63)
- * b0-b7
- * ip cfm user-mask
- * ar.rsc ar.bsp ar.bspstore ar.rnat
- * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
- */
-
-
- /* Skip r0 */
- if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
- dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
- &dst->u.get.kbuf,
- &dst->u.get.ubuf,
- 0, ELF_GR_OFFSET(1));
- if (dst->ret || dst->count == 0)
- return;
- }
-
- /* gr1 - gr15 */
- if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
- index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
- min_copy = ELF_GR_OFFSET(16) > (dst->pos + dst->count) ?
- (dst->pos + dst->count) : ELF_GR_OFFSET(16);
- for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
- index++)
- if (access_elf_reg(dst->target, info, i,
- &tmp[index], 0) < 0) {
- dst->ret = -EIO;
- return;
- }
- dst->ret = user_regset_copyout(&dst->pos, &dst->count,
- &dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
- ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
- if (dst->ret || dst->count == 0)
- return;
- }
-
- /* r16-r31 */
- if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
- pt = task_pt_regs(dst->target);
- dst->ret = user_regset_copyout(&dst->pos, &dst->count,
- &dst->u.get.kbuf, &dst->u.get.ubuf, &pt->r16,
- ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
- if (dst->ret || dst->count == 0)
- return;
- }
-
- /* nat, pr, b0 - b7 */
- if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
- index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
- min_copy = ELF_CR_IIP_OFFSET > (dst->pos + dst->count) ?
- (dst->pos + dst->count) : ELF_CR_IIP_OFFSET;
- for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
- index++)
- if (access_elf_reg(dst->target, info, i,
- &tmp[index], 0) < 0) {
- dst->ret = -EIO;
- return;
- }
- dst->ret = user_regset_copyout(&dst->pos, &dst->count,
- &dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
- ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
- if (dst->ret || dst->count == 0)
- return;
- }
-
- /* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
- * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
- */
- if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
- index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
- min_copy = ELF_AR_END_OFFSET > (dst->pos + dst->count) ?
- (dst->pos + dst->count) : ELF_AR_END_OFFSET;
- for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
- index++)
- if (access_elf_reg(dst->target, info, i,
- &tmp[index], 0) < 0) {
- dst->ret = -EIO;
- return;
- }
- dst->ret = user_regset_copyout(&dst->pos, &dst->count,
- &dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
- ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
- }
-}
-
-void do_gpregs_set(struct unw_frame_info *info, void *arg)
-{
- struct pt_regs *pt;
- struct regset_getset *dst = arg;
- elf_greg_t tmp[16];
- unsigned int i, index;
-
- if (unw_unwind_to_user(info) < 0)
- return;
-
- /* Skip r0 */
- if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
- dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
- &dst->u.set.kbuf,
- &dst->u.set.ubuf,
- 0, ELF_GR_OFFSET(1));
- if (dst->ret || dst->count == 0)
- return;
- }
-
- /* gr1-gr15 */
- if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
- i = dst->pos;
- index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
- dst->ret = user_regset_copyin(&dst->pos, &dst->count,
- &dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
- ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
- if (dst->ret)
- return;
- for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
- if (access_elf_reg(dst->target, info, i,
- &tmp[index], 1) < 0) {
- dst->ret = -EIO;
- return;
- }
- if (dst->count == 0)
- return;
- }
-
- /* gr16-gr31 */
- if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
- pt = task_pt_regs(dst->target);
- dst->ret = user_regset_copyin(&dst->pos, &dst->count,
- &dst->u.set.kbuf, &dst->u.set.ubuf, &pt->r16,
- ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
- if (dst->ret || dst->count == 0)
- return;
- }
-
- /* nat, pr, b0 - b7 */
- if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
- i = dst->pos;
- index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
- dst->ret = user_regset_copyin(&dst->pos, &dst->count,
- &dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
- ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
- if (dst->ret)
- return;
- for (; i < dst->pos; i += sizeof(elf_greg_t), index++)
- if (access_elf_reg(dst->target, info, i,
- &tmp[index], 1) < 0) {
- dst->ret = -EIO;
- return;
- }
- if (dst->count == 0)
- return;
- }
-
- /* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
- * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
- */
- if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
- i = dst->pos;
- index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
- dst->ret = user_regset_copyin(&dst->pos, &dst->count,
- &dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
- ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
- if (dst->ret)
- return;
- for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
- if (access_elf_reg(dst->target, info, i,
- &tmp[index], 1) < 0) {
- dst->ret = -EIO;
- return;
- }
- }
-}
-
-#define ELF_FP_OFFSET(i) (i * sizeof(elf_fpreg_t))
-
-void do_fpregs_get(struct unw_frame_info *info, void *arg)
-{
- struct regset_getset *dst = arg;
- struct task_struct *task = dst->target;
- elf_fpreg_t tmp[30];
- int index, min_copy, i;
-
- if (unw_unwind_to_user(info) < 0)
- return;
-
- /* Skip pos 0 and 1 */
- if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
- dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
- &dst->u.get.kbuf,
- &dst->u.get.ubuf,
- 0, ELF_FP_OFFSET(2));
- if (dst->count == 0 || dst->ret)
- return;
- }
-
- /* fr2-fr31 */
- if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
- index = (dst->pos - ELF_FP_OFFSET(2)) / sizeof(elf_fpreg_t);
-
- min_copy = min(((unsigned int)ELF_FP_OFFSET(32)),
- dst->pos + dst->count);
- for (i = dst->pos; i < min_copy; i += sizeof(elf_fpreg_t),
- index++)
- if (unw_get_fr(info, i / sizeof(elf_fpreg_t),
- &tmp[index])) {
- dst->ret = -EIO;
- return;
- }
- dst->ret = user_regset_copyout(&dst->pos, &dst->count,
- &dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
- ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
- if (dst->count == 0 || dst->ret)
- return;
- }
-
- /* fph */
- if (dst->count > 0) {
- ia64_flush_fph(dst->target);
- if (task->thread.flags & IA64_THREAD_FPH_VALID)
- dst->ret = user_regset_copyout(
- &dst->pos, &dst->count,
- &dst->u.get.kbuf, &dst->u.get.ubuf,
- &dst->target->thread.fph,
- ELF_FP_OFFSET(32), -1);
- else
- /* Zero fill instead. */
- dst->ret = user_regset_copyout_zero(
- &dst->pos, &dst->count,
- &dst->u.get.kbuf, &dst->u.get.ubuf,
- ELF_FP_OFFSET(32), -1);
- }
-}
-
-void do_fpregs_set(struct unw_frame_info *info, void *arg)
-{
- struct regset_getset *dst = arg;
- elf_fpreg_t fpreg, tmp[30];
- int index, start, end;
-
- if (unw_unwind_to_user(info) < 0)
- return;
-
- /* Skip pos 0 and 1 */
- if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
- dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
- &dst->u.set.kbuf,
- &dst->u.set.ubuf,
- 0, ELF_FP_OFFSET(2));
- if (dst->count == 0 || dst->ret)
- return;
- }
-
- /* fr2-fr31 */
- if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
- start = dst->pos;
- end = min(((unsigned int)ELF_FP_OFFSET(32)),
- dst->pos + dst->count);
- dst->ret = user_regset_copyin(&dst->pos, &dst->count,
- &dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
- ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
- if (dst->ret)
- return;
-
- if (start & 0xF) { /* only write high part */
- if (unw_get_fr(info, start / sizeof(elf_fpreg_t),
- &fpreg)) {
- dst->ret = -EIO;
- return;
- }
- tmp[start / sizeof(elf_fpreg_t) - 2].u.bits[0]
- = fpreg.u.bits[0];
- start &= ~0xFUL;
- }
- if (end & 0xF) { /* only write low part */
- if (unw_get_fr(info, end / sizeof(elf_fpreg_t),
- &fpreg)) {
- dst->ret = -EIO;
- return;
- }
- tmp[end / sizeof(elf_fpreg_t) - 2].u.bits[1]
- = fpreg.u.bits[1];
- end = (end + 0xF) & ~0xFUL;
- }
-
- for ( ; start < end ; start += sizeof(elf_fpreg_t)) {
- index = start / sizeof(elf_fpreg_t);
- if (unw_set_fr(info, index, tmp[index - 2])) {
- dst->ret = -EIO;
- return;
- }
- }
- if (dst->ret || dst->count == 0)
- return;
- }
-
- /* fph */
- if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(128)) {
- ia64_sync_fph(dst->target);
- dst->ret = user_regset_copyin(&dst->pos, &dst->count,
- &dst->u.set.kbuf,
- &dst->u.set.ubuf,
- &dst->target->thread.fph,
- ELF_FP_OFFSET(32), -1);
- }
-}
-
-static int
-do_regset_call(void (*call)(struct unw_frame_info *, void *),
- struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- struct regset_getset info = { .target = target, .regset = regset,
- .pos = pos, .count = count,
- .u.set = { .kbuf = kbuf, .ubuf = ubuf },
- .ret = 0 };
-
- if (target == current)
- unw_init_running(call, &info);
- else {
- struct unw_frame_info ufi;
- memset(&ufi, 0, sizeof(ufi));
- unw_init_from_blocked_task(&ufi, target);
- (*call)(&ufi, &info);
- }
-
- return info.ret;
-}
-
-static int
-gpregs_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return do_regset_call(do_gpregs_get, target, regset, pos, count,
- kbuf, ubuf);
-}
-
-static int gpregs_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return do_regset_call(do_gpregs_set, target, regset, pos, count,
- kbuf, ubuf);
-}
-
-static void do_gpregs_writeback(struct unw_frame_info *info, void *arg)
-{
- do_sync_rbs(info, ia64_sync_user_rbs);
-}
-
-/*
- * This is called to write back the register backing store.
- * ptrace does this before it stops, so that a tracer reading the user
- * memory after the thread stops will get the current register data.
- */
-static int
-gpregs_writeback(struct task_struct *target,
- const struct user_regset *regset,
- int now)
-{
- if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE))
- return 0;
- tsk_set_notify_resume(target);
- return do_regset_call(do_gpregs_writeback, target, regset, 0, 0,
- NULL, NULL);
-}
-
-static int
-fpregs_active(struct task_struct *target, const struct user_regset *regset)
-{
- return (target->thread.flags & IA64_THREAD_FPH_VALID) ? 128 : 32;
-}
-
-static int fpregs_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return do_regset_call(do_fpregs_get, target, regset, pos, count,
- kbuf, ubuf);
-}
-
-static int fpregs_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return do_regset_call(do_fpregs_set, target, regset, pos, count,
- kbuf, ubuf);
-}
-
-static int
-access_uarea(struct task_struct *child, unsigned long addr,
- unsigned long *data, int write_access)
-{
- unsigned int pos = -1; /* an invalid value */
- int ret;
- unsigned long *ptr, regnum;
-
- if ((addr & 0x7) != 0) {
- dprintk("ptrace: unaligned register address 0x%lx\n", addr);
- return -1;
- }
- if ((addr >= PT_NAT_BITS + 8 && addr < PT_F2) ||
- (addr >= PT_R7 + 8 && addr < PT_B1) ||
- (addr >= PT_AR_LC + 8 && addr < PT_CR_IPSR) ||
- (addr >= PT_AR_SSD + 8 && addr < PT_DBR)) {
- dprintk("ptrace: rejecting access to register "
- "address 0x%lx\n", addr);
- return -1;
- }
-
- switch (addr) {
- case PT_F32 ... (PT_F127 + 15):
- pos = addr - PT_F32 + ELF_FP_OFFSET(32);
- break;
- case PT_F2 ... (PT_F5 + 15):
- pos = addr - PT_F2 + ELF_FP_OFFSET(2);
- break;
- case PT_F10 ... (PT_F31 + 15):
- pos = addr - PT_F10 + ELF_FP_OFFSET(10);
- break;
- case PT_F6 ... (PT_F9 + 15):
- pos = addr - PT_F6 + ELF_FP_OFFSET(6);
- break;
- }
-
- if (pos != -1) {
- if (write_access)
- ret = fpregs_set(child, NULL, pos,
- sizeof(unsigned long), data, NULL);
- else
- ret = fpregs_get(child, NULL, pos,
- sizeof(unsigned long), data, NULL);
- if (ret != 0)
- return -1;
- return 0;
- }
-
- switch (addr) {
- case PT_NAT_BITS:
- pos = ELF_NAT_OFFSET;
- break;
- case PT_R4 ... PT_R7:
- pos = addr - PT_R4 + ELF_GR_OFFSET(4);
- break;
- case PT_B1 ... PT_B5:
- pos = addr - PT_B1 + ELF_BR_OFFSET(1);
- break;
- case PT_AR_EC:
- pos = ELF_AR_EC_OFFSET;
- break;
- case PT_AR_LC:
- pos = ELF_AR_LC_OFFSET;
- break;
- case PT_CR_IPSR:
- pos = ELF_CR_IPSR_OFFSET;
- break;
- case PT_CR_IIP:
- pos = ELF_CR_IIP_OFFSET;
- break;
- case PT_CFM:
- pos = ELF_CFM_OFFSET;
- break;
- case PT_AR_UNAT:
- pos = ELF_AR_UNAT_OFFSET;
- break;
- case PT_AR_PFS:
- pos = ELF_AR_PFS_OFFSET;
- break;
- case PT_AR_RSC:
- pos = ELF_AR_RSC_OFFSET;
- break;
- case PT_AR_RNAT:
- pos = ELF_AR_RNAT_OFFSET;
- break;
- case PT_AR_BSPSTORE:
- pos = ELF_AR_BSPSTORE_OFFSET;
- break;
- case PT_PR:
- pos = ELF_PR_OFFSET;
- break;
- case PT_B6:
- pos = ELF_BR_OFFSET(6);
- break;
- case PT_AR_BSP:
- pos = ELF_AR_BSP_OFFSET;
- break;
- case PT_R1 ... PT_R3:
- pos = addr - PT_R1 + ELF_GR_OFFSET(1);
- break;
- case PT_R12 ... PT_R15:
- pos = addr - PT_R12 + ELF_GR_OFFSET(12);
- break;
- case PT_R8 ... PT_R11:
- pos = addr - PT_R8 + ELF_GR_OFFSET(8);
- break;
- case PT_R16 ... PT_R31:
- pos = addr - PT_R16 + ELF_GR_OFFSET(16);
- break;
- case PT_AR_CCV:
- pos = ELF_AR_CCV_OFFSET;
- break;
- case PT_AR_FPSR:
- pos = ELF_AR_FPSR_OFFSET;
- break;
- case PT_B0:
- pos = ELF_BR_OFFSET(0);
- break;
- case PT_B7:
- pos = ELF_BR_OFFSET(7);
- break;
- case PT_AR_CSD:
- pos = ELF_AR_CSD_OFFSET;
- break;
- case PT_AR_SSD:
- pos = ELF_AR_SSD_OFFSET;
- break;
- }
-
- if (pos != -1) {
- if (write_access)
- ret = gpregs_set(child, NULL, pos,
- sizeof(unsigned long), data, NULL);
- else
- ret = gpregs_get(child, NULL, pos,
- sizeof(unsigned long), data, NULL);
- if (ret != 0)
- return -1;
- return 0;
- }
-
- /* access debug registers */
- if (addr >= PT_IBR) {
- regnum = (addr - PT_IBR) >> 3;
- ptr = &child->thread.ibr[0];
- } else {
- regnum = (addr - PT_DBR) >> 3;
- ptr = &child->thread.dbr[0];
- }
-
- if (regnum >= 8) {
- dprintk("ptrace: rejecting access to register "
- "address 0x%lx\n", addr);
- return -1;
- }
-#ifdef CONFIG_PERFMON
- /*
- * Check if debug registers are used by perfmon. This
- * test must be done once we know that we can do the
- * operation, i.e. the arguments are all valid, but
- * before we start modifying the state.
- *
- * Perfmon needs to keep a count of how many processes
- * are trying to modify the debug registers for system
- * wide monitoring sessions.
- *
- * We also include read access here, because they may
- * cause the PMU-installed debug register state
- * (dbr[], ibr[]) to be reset. The two arrays are also
- * used by perfmon, but we do not use
- * IA64_THREAD_DBG_VALID. The registers are restored
- * by the PMU context switch code.
- */
- if (pfm_use_debug_registers(child))
- return -1;
-#endif
-
- if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
- child->thread.flags |= IA64_THREAD_DBG_VALID;
- memset(child->thread.dbr, 0,
- sizeof(child->thread.dbr));
- memset(child->thread.ibr, 0,
- sizeof(child->thread.ibr));
- }
-
- ptr += regnum;
-
- if ((regnum & 1) && write_access) {
- /* don't let the user set kernel-level breakpoints: */
- *ptr = *data & ~(7UL << 56);
- return 0;
- }
- if (write_access)
- *ptr = *data;
- else
- *data = *ptr;
- return 0;
-}
-
-static const struct user_regset native_regsets[] = {
- {
- .core_note_type = NT_PRSTATUS,
- .n = ELF_NGREG,
- .size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t),
- .get = gpregs_get, .set = gpregs_set,
- .writeback = gpregs_writeback
- },
- {
- .core_note_type = NT_PRFPREG,
- .n = ELF_NFPREG,
- .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t),
- .get = fpregs_get, .set = fpregs_set, .active = fpregs_active
- },
-};
-
-static const struct user_regset_view user_ia64_view = {
- .name = "ia64",
- .e_machine = EM_IA_64,
- .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
-};
-
-const struct user_regset_view *task_user_regset_view(struct task_struct *tsk)
-{
-#ifdef CONFIG_IA32_SUPPORT
- extern const struct user_regset_view user_ia32_view;
- if (IS_IA32_PROCESS(task_pt_regs(tsk)))
- return &user_ia32_view;
-#endif
- return &user_ia64_view;
-}
diff --git a/trunk/arch/ia64/kernel/semaphore.c b/trunk/arch/ia64/kernel/semaphore.c
new file mode 100644
index 000000000000..2724ef3fbae2
--- /dev/null
+++ b/trunk/arch/ia64/kernel/semaphore.c
@@ -0,0 +1,165 @@
+/*
+ * IA-64 semaphore implementation (derived from x86 version).
+ *
+ * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
+ * David Mosberger-Tang
+ */
+
+/*
+ * Semaphores are implemented using a two-way counter: The "count"
+ * variable is decremented for each process that tries to acquire the
+ * semaphore, while the "sleepers" variable is a count of such
+ * acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can efficiently
+ * test if they need to do any extra work (up needs to do something
+ * only if count was negative before the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is protected
+ * by the spinlock in the semaphore's waitqueue head.
+ *
+ * Note that these functions are only called when there is contention
+ * on the lock, and as such all this is the "non-critical" part of the
+ * whole semaphore business. The critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+#include
+#include
+
+#include
+#include
+
+/*
+ * Logic:
+ * - Only on a boundary condition do we need to care. When we go
+ * from a negative count to a non-negative, we wake people up.
+ * - When we go from a non-negative count to a negative do we
+ * (a) synchronize with the "sleepers" count and (b) make sure
+ * that we're on the wakeup list before we synchronize so that
+ * we cannot lose wakeup events.
+ */
+
+void
+__up (struct semaphore *sem)
+{
+ wake_up(&sem->wait);
+}
+
+void __sched __down (struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * the wait_queue_head.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+ tsk->state = TASK_RUNNING;
+}
+
+int __sched __down_interruptible (struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_INTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers ++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * With signals pending, this turns into
+ * the trylock failure case - we won't be
+ * sleeping, and we* can't get the lock as
+ * it has contention. Just correct the count
+ * and exit.
+ */
+ if (signal_pending(current)) {
+ retval = -EINTR;
+ sem->sleepers = 0;
+ atomic_add(sleepers, &sem->count);
+ break;
+ }
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * wait_queue_head. The "-1" is because we're
+ * still hoping to get the semaphore.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_INTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ tsk->state = TASK_RUNNING;
+ return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for having decremented the
+ * count.
+ */
+int
+__down_trylock (struct semaphore *sem)
+{
+ unsigned long flags;
+ int sleepers;
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ sleepers = sem->sleepers + 1;
+ sem->sleepers = 0;
+
+ /*
+ * Add "everybody else" and us into it. They aren't
+ * playing, because we own the spinlock in the
+ * wait_queue_head.
+ */
+ if (!atomic_add_negative(sleepers, &sem->count)) {
+ wake_up_locked(&sem->wait);
+ }
+
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+ return 1;
+}
diff --git a/trunk/arch/ia64/kernel/setup.c b/trunk/arch/ia64/kernel/setup.c
index 5015ca1275ca..4aa9eaea76c3 100644
--- a/trunk/arch/ia64/kernel/setup.c
+++ b/trunk/arch/ia64/kernel/setup.c
@@ -59,7 +59,6 @@
#include
#include
#include
-#include
#include
#include
@@ -177,29 +176,6 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
return 0;
}
-/*
- * Similar to "filter_rsvd_memory()", but the reserved memory ranges
- * are not filtered out.
- */
-int __init
-filter_memory(unsigned long start, unsigned long end, void *arg)
-{
- void (*func)(unsigned long, unsigned long, int);
-
-#if IGNORE_PFN0
- if (start == PAGE_OFFSET) {
- printk(KERN_WARNING "warning: skipping physical page 0\n");
- start += PAGE_SIZE;
- if (start >= end)
- return 0;
- }
-#endif
- func = arg;
- if (start < end)
- call_pernode_memory(__pa(start), end - start, func);
- return 0;
-}
-
static void __init
sort_regions (struct rsvd_region *rsvd_region, int max)
{
@@ -517,8 +493,6 @@ setup_arch (char **cmdline_p)
acpi_table_init();
# ifdef CONFIG_ACPI_NUMA
acpi_numa_init();
- per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ?
- 32 : cpus_weight(early_cpu_possible_map)), additional_cpus);
# endif
#else
# ifdef CONFIG_SMP
@@ -972,10 +946,9 @@ cpu_init (void)
#endif
/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
- if (ia64_pal_vm_summary(NULL, &vmi) == 0) {
+ if (ia64_pal_vm_summary(NULL, &vmi) == 0)
max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
- setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL);
- } else {
+ else {
printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
max_ctx = (1U << 15) - 1; /* use architected minimum */
}
diff --git a/trunk/arch/ia64/kernel/smp.c b/trunk/arch/ia64/kernel/smp.c
index 9a9d4c489330..4e446aa5f4ac 100644
--- a/trunk/arch/ia64/kernel/smp.c
+++ b/trunk/arch/ia64/kernel/smp.c
@@ -209,19 +209,6 @@ send_IPI_allbutself (int op)
}
}
-/*
- * Called with preemption disabled.
- */
-static inline void
-send_IPI_mask(cpumask_t mask, int op)
-{
- unsigned int cpu;
-
- for_each_cpu_mask(cpu, mask) {
- send_IPI_single(cpu, op);
- }
-}
-
/*
* Called with preemption disabled.
*/
@@ -414,75 +401,6 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int
}
EXPORT_SYMBOL(smp_call_function_single);
-/**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
- * The set of cpus to run on. Must not include the current cpu.
- * The function to run. This must be fast and non-blocking.
- * An arbitrary pointer to pass to the function.
- * If true, wait (atomically) until function
- * has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function_mask(cpumask_t mask,
- void (*func)(void *), void *info,
- int wait)
-{
- struct call_data_struct data;
- cpumask_t allbutself;
- int cpus;
-
- spin_lock(&call_lock);
- allbutself = cpu_online_map;
- cpu_clear(smp_processor_id(), allbutself);
-
- cpus_and(mask, mask, allbutself);
- cpus = cpus_weight(mask);
- if (!cpus) {
- spin_unlock(&call_lock);
- return 0;
- }
-
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- call_data = &data;
- mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC*/
-
- /* Send a message to other CPUs */
- if (cpus_equal(mask, allbutself))
- send_IPI_allbutself(IPI_CALL_FUNC);
- else
- send_IPI_mask(mask, IPI_CALL_FUNC);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- cpu_relax();
-
- if (wait)
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
- call_data = NULL;
-
- spin_unlock(&call_lock);
- return 0;
-
-}
-EXPORT_SYMBOL(smp_call_function_mask);
-
/*
* this function sends a 'generic call function' IPI to all other CPUs
* in the system.
diff --git a/trunk/arch/ia64/kernel/smpboot.c b/trunk/arch/ia64/kernel/smpboot.c
index 16483be18c0b..32ee5979a042 100644
--- a/trunk/arch/ia64/kernel/smpboot.c
+++ b/trunk/arch/ia64/kernel/smpboot.c
@@ -400,9 +400,9 @@ smp_callin (void)
/* Setup the per cpu irq handling data structures */
__setup_vector_irq(cpuid);
cpu_set(cpuid, cpu_online_map);
+ unlock_ipi_calllock();
per_cpu(cpu_state, cpuid) = CPU_ONLINE;
spin_unlock(&vector_lock);
- unlock_ipi_calllock();
smp_setup_percpu_timer();
diff --git a/trunk/arch/ia64/kernel/time.c b/trunk/arch/ia64/kernel/time.c
index 48e15a51782f..17fda5293c67 100644
--- a/trunk/arch/ia64/kernel/time.c
+++ b/trunk/arch/ia64/kernel/time.c
@@ -59,84 +59,6 @@ static struct clocksource clocksource_itc = {
};
static struct clocksource *itc_clocksource;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-
-#include
-
-extern cputime_t cycle_to_cputime(u64 cyc);
-
-/*
- * Called from the context switch with interrupts disabled, to charge all
- * accumulated times to the current process, and to prepare accounting on
- * the next process.
- */
-void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
-{
- struct thread_info *pi = task_thread_info(prev);
- struct thread_info *ni = task_thread_info(next);
- cputime_t delta_stime, delta_utime;
- __u64 now;
-
- now = ia64_get_itc();
-
- delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
- account_system_time(prev, 0, delta_stime);
- account_system_time_scaled(prev, delta_stime);
-
- if (pi->ac_utime) {
- delta_utime = cycle_to_cputime(pi->ac_utime);
- account_user_time(prev, delta_utime);
- account_user_time_scaled(prev, delta_utime);
- }
-
- pi->ac_stamp = ni->ac_stamp = now;
- ni->ac_stime = ni->ac_utime = 0;
-}
-
-/*
- * Account time for a transition between system, hard irq or soft irq state.
- * Note that this function is called with interrupts enabled.
- */
-void account_system_vtime(struct task_struct *tsk)
-{
- struct thread_info *ti = task_thread_info(tsk);
- unsigned long flags;
- cputime_t delta_stime;
- __u64 now;
-
- local_irq_save(flags);
-
- now = ia64_get_itc();
-
- delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
- account_system_time(tsk, 0, delta_stime);
- account_system_time_scaled(tsk, delta_stime);
- ti->ac_stime = 0;
-
- ti->ac_stamp = now;
-
- local_irq_restore(flags);
-}
-
-/*
- * Called from the timer interrupt handler to charge accumulated user time
- * to the current process. Must be called with interrupts disabled.
- */
-void account_process_tick(struct task_struct *p, int user_tick)
-{
- struct thread_info *ti = task_thread_info(p);
- cputime_t delta_utime;
-
- if (ti->ac_utime) {
- delta_utime = cycle_to_cputime(ti->ac_utime);
- account_user_time(p, delta_utime);
- account_user_time_scaled(p, delta_utime);
- ti->ac_utime = 0;
- }
-}
-
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
-
static irqreturn_t
timer_interrupt (int irq, void *dev_id)
{
diff --git a/trunk/arch/ia64/kernel/unaligned.c b/trunk/arch/ia64/kernel/unaligned.c
index ff0e7c10faa7..6903361d11a5 100644
--- a/trunk/arch/ia64/kernel/unaligned.c
+++ b/trunk/arch/ia64/kernel/unaligned.c
@@ -13,7 +13,6 @@
* 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
* 2001/01/17 Add support emulation of unaligned kernel accesses.
*/
-#include
#include
#include
#include
@@ -1291,7 +1290,7 @@ within_logging_rate_limit (void)
{
static unsigned long count, last_time;
- if (time_after(jiffies, last_time + 5 * HZ))
+ if (jiffies - last_time > 5*HZ)
count = 0;
if (count < 5) {
last_time = jiffies;
diff --git a/trunk/arch/ia64/mm/contig.c b/trunk/arch/ia64/mm/contig.c
index 798bf9835a51..344f64eca7a9 100644
--- a/trunk/arch/ia64/mm/contig.c
+++ b/trunk/arch/ia64/mm/contig.c
@@ -45,6 +45,8 @@ void show_mem(void)
printk(KERN_INFO "Mem-info:\n");
show_free_areas();
+ printk(KERN_INFO "Free swap: %6ldkB\n",
+ nr_swap_pages<<(PAGE_SHIFT-10));
printk(KERN_INFO "Node memory in pages:\n");
for_each_online_pgdat(pgdat) {
unsigned long present;
@@ -253,7 +255,7 @@ paging_init (void)
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_VIRTUAL_MEM_MAP
- efi_memmap_walk(filter_memory, register_active_ranges);
+ efi_memmap_walk(register_active_ranges, NULL);
efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
if (max_gap < LARGE_GAP) {
vmem_map = (struct page *) 0;
diff --git a/trunk/arch/ia64/mm/discontig.c b/trunk/arch/ia64/mm/discontig.c
index 544dc420c65e..ee5e68b2af94 100644
--- a/trunk/arch/ia64/mm/discontig.c
+++ b/trunk/arch/ia64/mm/discontig.c
@@ -104,7 +104,7 @@ static int __meminit early_nr_cpus_node(int node)
{
int cpu, n = 0;
- for_each_possible_early_cpu(cpu)
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
if (node == node_cpuid[cpu].nid)
n++;
@@ -124,7 +124,6 @@ static unsigned long __meminit compute_pernodesize(int node)
pernodesize += node * L1_CACHE_BYTES;
pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
- pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
pernodesize = PAGE_ALIGN(pernodesize);
return pernodesize;
}
@@ -143,7 +142,7 @@ static void *per_cpu_node_setup(void *cpu_data, int node)
#ifdef CONFIG_SMP
int cpu;
- for_each_possible_early_cpu(cpu) {
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
if (node == node_cpuid[cpu].nid) {
memcpy(__va(cpu_data), __phys_per_cpu_start,
__per_cpu_end - __per_cpu_start);
@@ -346,7 +345,7 @@ static void __init initialize_pernode_data(void)
#ifdef CONFIG_SMP
/* Set the node_data pointer for each per-cpu struct */
- for_each_possible_early_cpu(cpu) {
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
node = node_cpuid[cpu].nid;
per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
}
@@ -445,7 +444,7 @@ void __init find_memory(void)
mem_data[node].min_pfn = ~0UL;
}
- efi_memmap_walk(filter_memory, register_active_ranges);
+ efi_memmap_walk(register_active_ranges, NULL);
/*
* Initialize the boot memory maps in reverse order since that's
@@ -494,9 +493,13 @@ void __cpuinit *per_cpu_init(void)
int cpu;
static int first_time = 1;
+
+ if (smp_processor_id() != 0)
+ return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
+
if (first_time) {
first_time = 0;
- for_each_possible_early_cpu(cpu)
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
}
@@ -519,6 +522,8 @@ void show_mem(void)
printk(KERN_INFO "Mem-info:\n");
show_free_areas();
+ printk(KERN_INFO "Free swap: %6ldkB\n",
+ nr_swap_pages<<(PAGE_SHIFT-10));
printk(KERN_INFO "Node memory in pages:\n");
for_each_online_pgdat(pgdat) {
unsigned long present;
diff --git a/trunk/arch/ia64/mm/init.c b/trunk/arch/ia64/mm/init.c
index 5c1de53c8c1c..a4ca657c72c6 100644
--- a/trunk/arch/ia64/mm/init.c
+++ b/trunk/arch/ia64/mm/init.c
@@ -58,6 +58,7 @@ __ia64_sync_icache_dcache (pte_t pte)
{
unsigned long addr;
struct page *page;
+ unsigned long order;
page = pte_page(pte);
addr = (unsigned long) page_address(page);
@@ -65,7 +66,12 @@ __ia64_sync_icache_dcache (pte_t pte)
if (test_bit(PG_arch_1, &page->flags))
return; /* i-cache is already coherent with d-cache */
- flush_icache_range(addr, addr + (PAGE_SIZE << compound_order(page)));
+ if (PageCompound(page)) {
+ order = compound_order(page);
+ flush_icache_range(addr, addr + (1UL << order << PAGE_SHIFT));
+ }
+ else
+ flush_icache_range(addr, addr + PAGE_SIZE);
set_bit(PG_arch_1, &page->flags); /* mark page as clean */
}
@@ -547,10 +553,12 @@ find_largest_hole (u64 start, u64 end, void *arg)
#endif /* CONFIG_VIRTUAL_MEM_MAP */
int __init
-register_active_ranges(u64 start, u64 len, int nid)
+register_active_ranges(u64 start, u64 end, void *arg)
{
- u64 end = start + len;
+ int nid = paddr_to_nid(__pa(start));
+ if (nid < 0)
+ nid = 0;
#ifdef CONFIG_KEXEC
if (start > crashk_res.start && start < crashk_res.end)
start = crashk_res.end;
diff --git a/trunk/arch/ia64/mm/numa.c b/trunk/arch/ia64/mm/numa.c
index b73bf1838e57..7807fc5c0422 100644
--- a/trunk/arch/ia64/mm/numa.c
+++ b/trunk/arch/ia64/mm/numa.c
@@ -27,9 +27,7 @@
*/
int num_node_memblks;
struct node_memblk_s node_memblk[NR_NODE_MEMBLKS];
-struct node_cpuid_s node_cpuid[NR_CPUS] =
- { [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } };
-
+struct node_cpuid_s node_cpuid[NR_CPUS];
/*
* This is a matrix with "distances" between nodes, they should be
* proportional to the memory access latency ratios.
diff --git a/trunk/arch/ia64/mm/tlb.c b/trunk/arch/ia64/mm/tlb.c
index d52ec4e83409..655da240d13c 100644
--- a/trunk/arch/ia64/mm/tlb.c
+++ b/trunk/arch/ia64/mm/tlb.c
@@ -11,9 +11,6 @@
* Rohit Seth
* Ken Chen
* Christophe de Dinechin : Avoid ptc.e on memory allocation
- * Copyright (C) 2007 Intel Corp
- * Fenghua Yu
- * Add multiple ptc.g/ptc.ga instruction support in global tlb purge.
*/
#include
#include
@@ -29,9 +26,6 @@
#include
#include
#include
-#include
-#include
-#include
static struct {
unsigned long mask; /* mask of supported purge page-sizes */
@@ -45,10 +39,6 @@ struct ia64_ctx ia64_ctx = {
};
DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
-DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/
-DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/
-
-struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX];
/*
* Initializes the ia64_ctx.bitmap array based on max_ctx+1.
@@ -94,139 +84,13 @@ wrap_mmu_context (struct mm_struct *mm)
local_flush_tlb_all();
}
-/*
- * Implement "spinaphores" ... like counting semaphores, but they
- * spin instead of sleeping. If there are ever any other users for
- * this primitive it can be moved up to a spinaphore.h header.
- */
-struct spinaphore {
- atomic_t cur;
-};
-
-static inline void spinaphore_init(struct spinaphore *ss, int val)
-{
- atomic_set(&ss->cur, val);
-}
-
-static inline void down_spin(struct spinaphore *ss)
-{
- while (unlikely(!atomic_add_unless(&ss->cur, -1, 0)))
- while (atomic_read(&ss->cur) == 0)
- cpu_relax();
-}
-
-static inline void up_spin(struct spinaphore *ss)
-{
- atomic_add(1, &ss->cur);
-}
-
-static struct spinaphore ptcg_sem;
-static u16 nptcg = 1;
-static int need_ptcg_sem = 1;
-static int toolatetochangeptcgsem = 0;
-
-/*
- * Kernel parameter "nptcg=" overrides max number of concurrent global TLB
- * purges which is reported from either PAL or SAL PALO.
- *
- * We don't have sanity checking for nptcg value. It's the user's responsibility
- * for valid nptcg value on the platform. Otherwise, kernel may hang in some
- * cases.
- */
-static int __init
-set_nptcg(char *str)
-{
- int value = 0;
-
- get_option(&str, &value);
- setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER);
-
- return 1;
-}
-
-__setup("nptcg=", set_nptcg);
-
-/*
- * Maximum number of simultaneous ptc.g purges in the system can
- * be defined by PAL_VM_SUMMARY (in which case we should take
- * the smallest value for any cpu in the system) or by the PAL
- * override table (in which case we should ignore the value from
- * PAL_VM_SUMMARY).
- *
- * Kernel parameter "nptcg=" overrides maximum number of simultanesous ptc.g
- * purges defined in either PAL_VM_SUMMARY or PAL override table. In this case,
- * we should ignore the value from either PAL_VM_SUMMARY or PAL override table.
- *
- * Complicating the logic here is the fact that num_possible_cpus()
- * isn't fully setup until we start bringing cpus online.
- */
-void
-setup_ptcg_sem(int max_purges, int nptcg_from)
-{
- static int kp_override;
- static int palo_override;
- static int firstcpu = 1;
-
- if (toolatetochangeptcgsem) {
- BUG_ON(max_purges < nptcg);
- return;
- }
-
- if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) {
- kp_override = 1;
- nptcg = max_purges;
- goto resetsema;
- }
- if (kp_override) {
- need_ptcg_sem = num_possible_cpus() > nptcg;
- return;
- }
-
- if (nptcg_from == NPTCG_FROM_PALO) {
- palo_override = 1;
-
- /* In PALO max_purges == 0 really means it! */
- if (max_purges == 0)
- panic("Whoa! Platform does not support global TLB purges.\n");
- nptcg = max_purges;
- if (nptcg == PALO_MAX_TLB_PURGES) {
- need_ptcg_sem = 0;
- return;
- }
- goto resetsema;
- }
- if (palo_override) {
- if (nptcg != PALO_MAX_TLB_PURGES)
- need_ptcg_sem = (num_possible_cpus() > nptcg);
- return;
- }
-
- /* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */
- if (max_purges == 0) max_purges = 1;
-
- if (firstcpu) {
- nptcg = max_purges;
- firstcpu = 0;
- }
- if (max_purges < nptcg)
- nptcg = max_purges;
- if (nptcg == PAL_MAX_PURGES) {
- need_ptcg_sem = 0;
- return;
- } else
- need_ptcg_sem = (num_possible_cpus() > nptcg);
-
-resetsema:
- spinaphore_init(&ptcg_sem, max_purges);
-}
-
void
ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long nbits)
{
- struct mm_struct *active_mm = current->active_mm;
+ static DEFINE_SPINLOCK(ptcg_lock);
- toolatetochangeptcgsem = 1;
+ struct mm_struct *active_mm = current->active_mm;
if (mm != active_mm) {
/* Restore region IDs for mm */
@@ -238,20 +102,19 @@ ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
}
}
- if (need_ptcg_sem)
- down_spin(&ptcg_sem);
-
- do {
- /*
- * Flush ALAT entries also.
- */
- ia64_ptcga(start, (nbits << 2));
- ia64_srlz_i();
- start += (1UL << nbits);
- } while (start < end);
-
- if (need_ptcg_sem)
- up_spin(&ptcg_sem);
+ /* HW requires global serialization of ptc.ga. */
+ spin_lock(&ptcg_lock);
+ {
+ do {
+ /*
+ * Flush ALAT entries also.
+ */
+ ia64_ptcga(start, (nbits<<2));
+ ia64_srlz_i();
+ start += (1UL << nbits);
+ } while (start < end);
+ }
+ spin_unlock(&ptcg_lock);
if (mm != active_mm) {
activate_context(active_mm);
@@ -327,9 +190,6 @@ ia64_tlb_init (void)
ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */
unsigned long tr_pgbits;
long status;
- pal_vm_info_1_u_t vm_info_1;
- pal_vm_info_2_u_t vm_info_2;
- int cpu = smp_processor_id();
if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) {
printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; "
@@ -346,191 +206,4 @@ ia64_tlb_init (void)
local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
local_flush_tlb_all(); /* nuke left overs from bootstrapping... */
- status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2);
-
- if (status) {
- printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
- per_cpu(ia64_tr_num, cpu) = 8;
- return;
- }
- per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
- if (per_cpu(ia64_tr_num, cpu) >
- (vm_info_1.pal_vm_info_1_s.max_dtr_entry+1))
- per_cpu(ia64_tr_num, cpu) =
- vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
- if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) {
- per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX;
- printk(KERN_DEBUG "TR register number exceeds IA64_TR_ALLOC_MAX!"
- "IA64_TR_ALLOC_MAX should be extended\n");
- }
-}
-
-/*
- * is_tr_overlap
- *
- * Check overlap with inserted TRs.
- */
-static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size)
-{
- u64 tr_log_size;
- u64 tr_end;
- u64 va_rr = ia64_get_rr(va);
- u64 va_rid = RR_TO_RID(va_rr);
- u64 va_end = va + (1<rr))
- return 0;
- tr_log_size = (p->itir & 0xff) >> 2;
- tr_end = p->ifa + (1< tr_end || p->ifa > va_end)
- return 0;
- return 1;
-
-}
-
-/*
- * ia64_insert_tr in virtual mode. Allocate a TR slot
- *
- * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr
- *
- * va : virtual address.
- * pte : pte entries inserted.
- * log_size: range to be covered.
- *
- * Return value: <0 : error No.
- *
- * >=0 : slot number allocated for TR.
- * Must be called with preemption disabled.
- */
-int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
-{
- int i, r;
- unsigned long psr;
- struct ia64_tr_entry *p;
- int cpu = smp_processor_id();
-
- r = -EINVAL;
- /*Check overlap with existing TR entries*/
- if (target_mask & 0x1) {
- p = &__per_cpu_idtrs[cpu][0][0];
- for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
- i++, p++) {
- if (p->pte & 0x1)
- if (is_tr_overlap(p, va, log_size)) {
- printk(KERN_DEBUG "Overlapped Entry"
- "Inserted for TR Reigster!!\n");
- goto out;
- }
- }
- }
- if (target_mask & 0x2) {
- p = &__per_cpu_idtrs[cpu][1][0];
- for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
- i++, p++) {
- if (p->pte & 0x1)
- if (is_tr_overlap(p, va, log_size)) {
- printk(KERN_DEBUG "Overlapped Entry"
- "Inserted for TR Reigster!!\n");
- goto out;
- }
- }
- }
-
- for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) {
- switch (target_mask & 0x3) {
- case 1:
- if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1))
- goto found;
- continue;
- case 2:
- if (!(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
- goto found;
- continue;
- case 3:
- if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1) &&
- !(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
- goto found;
- continue;
- default:
- r = -EINVAL;
- goto out;
- }
- }
-found:
- if (i >= per_cpu(ia64_tr_num, cpu))
- return -EBUSY;
-
- /*Record tr info for mca hander use!*/
- if (i > per_cpu(ia64_tr_used, cpu))
- per_cpu(ia64_tr_used, cpu) = i;
-
- psr = ia64_clear_ic();
- if (target_mask & 0x1) {
- ia64_itr(0x1, i, va, pte, log_size);
- ia64_srlz_i();
- p = &__per_cpu_idtrs[cpu][0][i];
- p->ifa = va;
- p->pte = pte;
- p->itir = log_size << 2;
- p->rr = ia64_get_rr(va);
- }
- if (target_mask & 0x2) {
- ia64_itr(0x2, i, va, pte, log_size);
- ia64_srlz_i();
- p = &__per_cpu_idtrs[cpu][1][i];
- p->ifa = va;
- p->pte = pte;
- p->itir = log_size << 2;
- p->rr = ia64_get_rr(va);
- }
- ia64_set_psr(psr);
- r = i;
-out:
- return r;
-}
-EXPORT_SYMBOL_GPL(ia64_itr_entry);
-
-/*
- * ia64_purge_tr
- *
- * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr.
- * slot: slot number to be freed.
- *
- * Must be called with preemption disabled.
- */
-void ia64_ptr_entry(u64 target_mask, int slot)
-{
- int cpu = smp_processor_id();
- int i;
- struct ia64_tr_entry *p;
-
- if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu))
- return;
-
- if (target_mask & 0x1) {
- p = &__per_cpu_idtrs[cpu][0][slot];
- if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
- p->pte = 0;
- ia64_ptr(0x1, p->ifa, p->itir>>2);
- ia64_srlz_i();
- }
- }
-
- if (target_mask & 0x2) {
- p = &__per_cpu_idtrs[cpu][1][slot];
- if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
- p->pte = 0;
- ia64_ptr(0x2, p->ifa, p->itir>>2);
- ia64_srlz_i();
- }
- }
-
- for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) {
- if ((__per_cpu_idtrs[cpu][0][i].pte & 0x1) ||
- (__per_cpu_idtrs[cpu][1][i].pte & 0x1))
- break;
- }
- per_cpu(ia64_tr_used, cpu) = i;
}
-EXPORT_SYMBOL_GPL(ia64_ptr_entry);
diff --git a/trunk/arch/ia64/sn/kernel/xpc_main.c b/trunk/arch/ia64/sn/kernel/xpc_main.c
index 9e0b164da9c2..81785b78bc1e 100644
--- a/trunk/arch/ia64/sn/kernel/xpc_main.c
+++ b/trunk/arch/ia64/sn/kernel/xpc_main.c
@@ -199,7 +199,7 @@ xpc_timeout_partition_disengage_request(unsigned long data)
struct xpc_partition *part = (struct xpc_partition *) data;
- DBUG_ON(time_before(jiffies, part->disengage_request_timeout));
+ DBUG_ON(jiffies < part->disengage_request_timeout);
(void) xpc_partition_disengaged(part);
@@ -230,7 +230,7 @@ xpc_hb_beater(unsigned long dummy)
{
xpc_vars->heartbeat++;
- if (time_after_eq(jiffies, xpc_hb_check_timeout)) {
+ if (jiffies >= xpc_hb_check_timeout) {
wake_up_interruptible(&xpc_act_IRQ_wq);
}
@@ -270,7 +270,7 @@ xpc_hb_checker(void *ignore)
/* checking of remote heartbeats is skewed by IRQ handling */
- if (time_after_eq(jiffies, xpc_hb_check_timeout)) {
+ if (jiffies >= xpc_hb_check_timeout) {
dev_dbg(xpc_part, "checking remote heartbeats\n");
xpc_check_remote_hb();
@@ -305,7 +305,7 @@ xpc_hb_checker(void *ignore)
/* wait for IRQ or timeout */
(void) wait_event_interruptible(xpc_act_IRQ_wq,
(last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
- time_after_eq(jiffies, xpc_hb_check_timeout) ||
+ jiffies >= xpc_hb_check_timeout ||
(volatile int) xpc_exiting));
}
diff --git a/trunk/arch/ia64/sn/kernel/xpc_partition.c b/trunk/arch/ia64/sn/kernel/xpc_partition.c
index 9e97c2684832..7ba403232cb8 100644
--- a/trunk/arch/ia64/sn/kernel/xpc_partition.c
+++ b/trunk/arch/ia64/sn/kernel/xpc_partition.c
@@ -877,7 +877,7 @@ xpc_partition_disengaged(struct xpc_partition *part)
disengaged = (xpc_partition_engaged(1UL << partid) == 0);
if (part->disengage_request_timeout) {
if (!disengaged) {
- if (time_before(jiffies, part->disengage_request_timeout)) {
+ if (jiffies < part->disengage_request_timeout) {
/* timelimit hasn't been reached yet */
return 0;
}
diff --git a/trunk/arch/m32r/kernel/Makefile b/trunk/arch/m32r/kernel/Makefile
index 09200d4886e3..e97e26e87c9e 100644
--- a/trunk/arch/m32r/kernel/Makefile
+++ b/trunk/arch/m32r/kernel/Makefile
@@ -5,7 +5,7 @@
extra-y := head.o init_task.o vmlinux.lds
obj-y := process.o entry.o traps.o align.o irq.o setup.o time.o \
- m32r_ksyms.o sys_m32r.o signal.o ptrace.o
+ m32r_ksyms.o sys_m32r.o semaphore.o signal.o ptrace.o
obj-$(CONFIG_SMP) += smp.o smpboot.o
obj-$(CONFIG_MODULES) += module.o
diff --git a/trunk/arch/m32r/kernel/m32r_ksyms.c b/trunk/arch/m32r/kernel/m32r_ksyms.c
index e6709fe950ba..41a4c95e06d6 100644
--- a/trunk/arch/m32r/kernel/m32r_ksyms.c
+++ b/trunk/arch/m32r/kernel/m32r_ksyms.c
@@ -7,6 +7,7 @@
#include
#include
+#include
#include
#include
#include
@@ -21,6 +22,10 @@ EXPORT_SYMBOL(dump_fpu);
EXPORT_SYMBOL(__ioremap);
EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(kernel_thread);
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_interruptible);
+EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(__down_trylock);
/* Networking helper routines. */
/* Delay loops */
diff --git a/trunk/arch/m32r/kernel/semaphore.c b/trunk/arch/m32r/kernel/semaphore.c
new file mode 100644
index 000000000000..940c2d37cfd1
--- /dev/null
+++ b/trunk/arch/m32r/kernel/semaphore.c
@@ -0,0 +1,185 @@
+/*
+ * linux/arch/m32r/semaphore.c
+ * orig : i386 2.6.4
+ *
+ * M32R semaphore implementation.
+ *
+ * Copyright (c) 2002 - 2004 Hitoshi Yamamoto
+ */
+
+/*
+ * i386 semaphore implementation.
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ *
+ * Portions Copyright 1999 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * rw semaphores implemented November 1999 by Benjamin LaHaise
+ */
+#include
+#include
+#include
+#include
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is protected
+ * by the spinlock in the semaphore's waitqueue head.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ * - only on a boundary condition do we need to care. When we go
+ * from a negative count to a non-negative, we wake people up.
+ * - when we go from a non-negative count to a negative do we
+ * (a) synchronize with the "sleeper" count and (b) make sure
+ * that we're on the wakeup list before we synchronize so that
+ * we cannot lose wakeup events.
+ */
+
+asmlinkage void __up(struct semaphore *sem)
+{
+ wake_up(&sem->wait);
+}
+
+asmlinkage void __sched __down(struct semaphore * sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * the wait_queue_head.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+ tsk->state = TASK_RUNNING;
+}
+
+asmlinkage int __sched __down_interruptible(struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_INTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * With signals pending, this turns into
+ * the trylock failure case - we won't be
+ * sleeping, and we* can't get the lock as
+ * it has contention. Just correct the count
+ * and exit.
+ */
+ if (signal_pending(current)) {
+ retval = -EINTR;
+ sem->sleepers = 0;
+ atomic_add(sleepers, &sem->count);
+ break;
+ }
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * wait_queue_head. The "-1" is because we're
+ * still hoping to get the semaphore.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_INTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ tsk->state = TASK_RUNNING;
+ return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for
+ * having decremented the count.
+ *
+ * We could have done the trylock with a
+ * single "cmpxchg" without failure cases,
+ * but then it wouldn't work on a 386.
+ */
+asmlinkage int __down_trylock(struct semaphore * sem)
+{
+ int sleepers;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ sleepers = sem->sleepers + 1;
+ sem->sleepers = 0;
+
+ /*
+ * Add "everybody else" and us into it. They aren't
+ * playing, because we own the spinlock in the
+ * wait_queue_head.
+ */
+ if (!atomic_add_negative(sleepers, &sem->count)) {
+ wake_up_locked(&sem->wait);
+ }
+
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+ return 1;
+}
diff --git a/trunk/arch/m68k/kernel/Makefile b/trunk/arch/m68k/kernel/Makefile
index 7a62a718143b..a806208c7fb5 100644
--- a/trunk/arch/m68k/kernel/Makefile
+++ b/trunk/arch/m68k/kernel/Makefile
@@ -10,7 +10,7 @@ endif
extra-y += vmlinux.lds
obj-y := entry.o process.o traps.o ints.o signal.o ptrace.o module.o \
- sys_m68k.o time.o setup.o m68k_ksyms.o devres.o
+ sys_m68k.o time.o semaphore.o setup.o m68k_ksyms.o devres.o
devres-y = ../../../kernel/irq/devres.o
diff --git a/trunk/arch/m68k/kernel/m68k_ksyms.c b/trunk/arch/m68k/kernel/m68k_ksyms.c
index d900e77e5363..6fc69c74fe2e 100644
--- a/trunk/arch/m68k/kernel/m68k_ksyms.c
+++ b/trunk/arch/m68k/kernel/m68k_ksyms.c
@@ -1,4 +1,5 @@
#include
+#include
asmlinkage long long __ashldi3 (long long, int);
asmlinkage long long __ashrdi3 (long long, int);
@@ -14,3 +15,8 @@ EXPORT_SYMBOL(__ashrdi3);
EXPORT_SYMBOL(__lshrdi3);
EXPORT_SYMBOL(__muldi3);
+EXPORT_SYMBOL(__down_failed);
+EXPORT_SYMBOL(__down_failed_interruptible);
+EXPORT_SYMBOL(__down_failed_trylock);
+EXPORT_SYMBOL(__up_wakeup);
+
diff --git a/trunk/arch/m68k/kernel/semaphore.c b/trunk/arch/m68k/kernel/semaphore.c
new file mode 100644
index 000000000000..d12cbbfe6ebd
--- /dev/null
+++ b/trunk/arch/m68k/kernel/semaphore.c
@@ -0,0 +1,132 @@
+/*
+ * Generic semaphore code. Buyer beware. Do your own
+ * specific changes in
+ */
+
+#include
+#include
+#include
+
+#ifndef CONFIG_RMW_INSNS
+spinlock_t semaphore_wake_lock;
+#endif
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to sleep, while the "waking" variable is
+ * incremented when the "up()" code goes to wake up waiting
+ * processes.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * waking_non_zero() (from asm/semaphore.h) must execute
+ * atomically.
+ *
+ * When __up() is called, the count was negative before
+ * incrementing it, and we need to wake up somebody.
+ *
+ * This routine adds one to the count of processes that need to
+ * wake up and exit. ALL waiting processes actually wake up but
+ * only the one that gets to the "waking" field first will gate
+ * through and acquire the semaphore. The others will go back
+ * to sleep.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+void __up(struct semaphore *sem)
+{
+ wake_one_more(sem);
+ wake_up(&sem->wait);
+}
+
+/*
+ * Perform the "down" function. Return zero for semaphore acquired,
+ * return negative for signalled out of the function.
+ *
+ * If called from __down, the return is ignored and the wait loop is
+ * not interruptible. This means that a task waiting on a semaphore
+ * using "down()" cannot be killed until someone does an "up()" on
+ * the semaphore.
+ *
+ * If called from __down_interruptible, the return value gets checked
+ * upon return. If the return value is negative then the task continues
+ * with the negative value in the return register (it can be tested by
+ * the caller).
+ *
+ * Either form may be used in conjunction with "up()".
+ *
+ */
+
+
+#define DOWN_HEAD(task_state) \
+ \
+ \
+ current->state = (task_state); \
+ add_wait_queue(&sem->wait, &wait); \
+ \
+ /* \
+ * Ok, we're set up. sem->count is known to be less than zero \
+ * so we must wait. \
+ * \
+ * We can let go the lock for purposes of waiting. \
+ * We re-acquire it after awaking so as to protect \
+ * all semaphore operations. \
+ * \
+ * If "up()" is called before we call waking_non_zero() then \
+ * we will catch it right away. If it is called later then \
+ * we will have to go through a wakeup cycle to catch it. \
+ * \
+ * Multiple waiters contend for the semaphore lock to see \
+ * who gets to gate through and who has to wait some more. \
+ */ \
+ for (;;) {
+
+#define DOWN_TAIL(task_state) \
+ current->state = (task_state); \
+ } \
+ current->state = TASK_RUNNING; \
+ remove_wait_queue(&sem->wait, &wait);
+
+void __sched __down(struct semaphore * sem)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ DOWN_HEAD(TASK_UNINTERRUPTIBLE)
+ if (waking_non_zero(sem))
+ break;
+ schedule();
+ DOWN_TAIL(TASK_UNINTERRUPTIBLE)
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ int ret = 0;
+
+ DOWN_HEAD(TASK_INTERRUPTIBLE)
+
+ ret = waking_non_zero_interruptible(sem, current);
+ if (ret)
+ {
+ if (ret == 1)
+ /* ret != 0 only if we get interrupted -arca */
+ ret = 0;
+ break;
+ }
+ schedule();
+ DOWN_TAIL(TASK_INTERRUPTIBLE)
+ return ret;
+}
+
+int __down_trylock(struct semaphore * sem)
+{
+ return waking_non_zero_trylock(sem);
+}
diff --git a/trunk/arch/m68k/lib/Makefile b/trunk/arch/m68k/lib/Makefile
index a18af095cd7c..6bbf19f96007 100644
--- a/trunk/arch/m68k/lib/Makefile
+++ b/trunk/arch/m68k/lib/Makefile
@@ -5,4 +5,4 @@
EXTRA_AFLAGS := -traditional
lib-y := ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
- checksum.o string.o uaccess.o
+ checksum.o string.o semaphore.o uaccess.o
diff --git a/trunk/arch/m68k/lib/semaphore.S b/trunk/arch/m68k/lib/semaphore.S
new file mode 100644
index 000000000000..0215624c1602
--- /dev/null
+++ b/trunk/arch/m68k/lib/semaphore.S
@@ -0,0 +1,53 @@
+/*
+ * linux/arch/m68k/lib/semaphore.S
+ *
+ * Copyright (C) 1996 Linus Torvalds
+ *
+ * m68k version by Andreas Schwab
+ */
+
+#include
+#include
+
+/*
+ * The semaphore operations have a special calling sequence that
+ * allow us to do a simpler in-line version of them. These routines
+ * need to convert that sequence back into the C sequence when
+ * there is contention on the semaphore.
+ */
+ENTRY(__down_failed)
+ moveml %a0/%d0/%d1,-(%sp)
+ movel %a1,-(%sp)
+ jbsr __down
+ movel (%sp)+,%a1
+ moveml (%sp)+,%a0/%d0/%d1
+ rts
+
+ENTRY(__down_failed_interruptible)
+ movel %a0,-(%sp)
+ movel %d1,-(%sp)
+ movel %a1,-(%sp)
+ jbsr __down_interruptible
+ movel (%sp)+,%a1
+ movel (%sp)+,%d1
+ movel (%sp)+,%a0
+ rts
+
+ENTRY(__down_failed_trylock)
+ movel %a0,-(%sp)
+ movel %d1,-(%sp)
+ movel %a1,-(%sp)
+ jbsr __down_trylock
+ movel (%sp)+,%a1
+ movel (%sp)+,%d1
+ movel (%sp)+,%a0
+ rts
+
+ENTRY(__up_wakeup)
+ moveml %a0/%d0/%d1,-(%sp)
+ movel %a1,-(%sp)
+ jbsr __up
+ movel (%sp)+,%a1
+ moveml (%sp)+,%a0/%d0/%d1
+ rts
+
diff --git a/trunk/arch/m68knommu/kernel/Makefile b/trunk/arch/m68knommu/kernel/Makefile
index f0eab3dedb5a..1524b39ad63f 100644
--- a/trunk/arch/m68knommu/kernel/Makefile
+++ b/trunk/arch/m68knommu/kernel/Makefile
@@ -5,7 +5,7 @@
extra-y := vmlinux.lds
obj-y += dma.o entry.o init_task.o irq.o m68k_ksyms.o process.o ptrace.o \
- setup.o signal.o syscalltable.o sys_m68k.o time.o traps.o
+ semaphore.o setup.o signal.o syscalltable.o sys_m68k.o time.o traps.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_COMEMPCI) += comempci.o
diff --git a/trunk/arch/m68knommu/kernel/m68k_ksyms.c b/trunk/arch/m68knommu/kernel/m68k_ksyms.c
index 39fe0a7aec32..53fad1490282 100644
--- a/trunk/arch/m68knommu/kernel/m68k_ksyms.c
+++ b/trunk/arch/m68knommu/kernel/m68k_ksyms.c
@@ -13,6 +13,7 @@
#include
#include
#include
+#include
#include
#include
@@ -38,6 +39,11 @@ EXPORT_SYMBOL(csum_partial_copy_nocheck);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(__down_failed);
+EXPORT_SYMBOL(__down_failed_interruptible);
+EXPORT_SYMBOL(__down_failed_trylock);
+EXPORT_SYMBOL(__up_wakeup);
+
/*
* libgcc functions - functions that are used internally by the
* compiler... (prototypes are not correct though, but that
diff --git a/trunk/arch/m68knommu/kernel/semaphore.c b/trunk/arch/m68knommu/kernel/semaphore.c
new file mode 100644
index 000000000000..bce2bc7d87c6
--- /dev/null
+++ b/trunk/arch/m68knommu/kernel/semaphore.c
@@ -0,0 +1,133 @@
+/*
+ * Generic semaphore code. Buyer beware. Do your own
+ * specific changes in
+ */
+
+#include
+#include
+#include
+#include
+
+#ifndef CONFIG_RMW_INSNS
+spinlock_t semaphore_wake_lock;
+#endif
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to sleep, while the "waking" variable is
+ * incremented when the "up()" code goes to wake up waiting
+ * processes.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * waking_non_zero() (from asm/semaphore.h) must execute
+ * atomically.
+ *
+ * When __up() is called, the count was negative before
+ * incrementing it, and we need to wake up somebody.
+ *
+ * This routine adds one to the count of processes that need to
+ * wake up and exit. ALL waiting processes actually wake up but
+ * only the one that gets to the "waking" field first will gate
+ * through and acquire the semaphore. The others will go back
+ * to sleep.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+void __up(struct semaphore *sem)
+{
+ wake_one_more(sem);
+ wake_up(&sem->wait);
+}
+
+/*
+ * Perform the "down" function. Return zero for semaphore acquired,
+ * return negative for signalled out of the function.
+ *
+ * If called from __down, the return is ignored and the wait loop is
+ * not interruptible. This means that a task waiting on a semaphore
+ * using "down()" cannot be killed until someone does an "up()" on
+ * the semaphore.
+ *
+ * If called from __down_interruptible, the return value gets checked
+ * upon return. If the return value is negative then the task continues
+ * with the negative value in the return register (it can be tested by
+ * the caller).
+ *
+ * Either form may be used in conjunction with "up()".
+ *
+ */
+
+
+#define DOWN_HEAD(task_state) \
+ \
+ \
+ current->state = (task_state); \
+ add_wait_queue(&sem->wait, &wait); \
+ \
+ /* \
+ * Ok, we're set up. sem->count is known to be less than zero \
+ * so we must wait. \
+ * \
+ * We can let go the lock for purposes of waiting. \
+ * We re-acquire it after awaking so as to protect \
+ * all semaphore operations. \
+ * \
+ * If "up()" is called before we call waking_non_zero() then \
+ * we will catch it right away. If it is called later then \
+ * we will have to go through a wakeup cycle to catch it. \
+ * \
+ * Multiple waiters contend for the semaphore lock to see \
+ * who gets to gate through and who has to wait some more. \
+ */ \
+ for (;;) {
+
+#define DOWN_TAIL(task_state) \
+ current->state = (task_state); \
+ } \
+ current->state = TASK_RUNNING; \
+ remove_wait_queue(&sem->wait, &wait);
+
+void __sched __down(struct semaphore * sem)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ DOWN_HEAD(TASK_UNINTERRUPTIBLE)
+ if (waking_non_zero(sem))
+ break;
+ schedule();
+ DOWN_TAIL(TASK_UNINTERRUPTIBLE)
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ int ret = 0;
+
+ DOWN_HEAD(TASK_INTERRUPTIBLE)
+
+ ret = waking_non_zero_interruptible(sem, current);
+ if (ret)
+ {
+ if (ret == 1)
+ /* ret != 0 only if we get interrupted -arca */
+ ret = 0;
+ break;
+ }
+ schedule();
+ DOWN_TAIL(TASK_INTERRUPTIBLE)
+ return ret;
+}
+
+int __down_trylock(struct semaphore * sem)
+{
+ return waking_non_zero_trylock(sem);
+}
diff --git a/trunk/arch/m68knommu/lib/Makefile b/trunk/arch/m68knommu/lib/Makefile
index d94d709665aa..e051a7913987 100644
--- a/trunk/arch/m68knommu/lib/Makefile
+++ b/trunk/arch/m68knommu/lib/Makefile
@@ -4,4 +4,4 @@
lib-y := ashldi3.o ashrdi3.o lshrdi3.o \
muldi3.o mulsi3.o divsi3.o udivsi3.o modsi3.o umodsi3.o \
- checksum.o memcpy.o memset.o delay.o
+ checksum.o semaphore.o memcpy.o memset.o delay.o
diff --git a/trunk/arch/m68knommu/lib/semaphore.S b/trunk/arch/m68knommu/lib/semaphore.S
new file mode 100644
index 000000000000..87c746034376
--- /dev/null
+++ b/trunk/arch/m68knommu/lib/semaphore.S
@@ -0,0 +1,66 @@
+/*
+ * linux/arch/m68k/lib/semaphore.S
+ *
+ * Copyright (C) 1996 Linus Torvalds
+ *
+ * m68k version by Andreas Schwab
+ *
+ * MAR/1999 -- modified to support ColdFire (gerg@snapgear.com)
+ */
+
+#include
+#include
+
+/*
+ * "down_failed" is called with the eventual return address
+ * in %a0, and the address of the semaphore in %a1. We need
+ * to increment the number of waiters on the semaphore,
+ * call "__down()", and then eventually return to try again.
+ */
+ENTRY(__down_failed)
+#ifdef CONFIG_COLDFIRE
+ subl #12,%sp
+ moveml %a0/%d0/%d1,(%sp)
+#else
+ moveml %a0/%d0/%d1,-(%sp)
+#endif
+ movel %a1,-(%sp)
+ jbsr __down
+ movel (%sp)+,%a1
+ movel (%sp)+,%d0
+ movel (%sp)+,%d1
+ rts
+
+ENTRY(__down_failed_interruptible)
+ movel %a0,-(%sp)
+ movel %d1,-(%sp)
+ movel %a1,-(%sp)
+ jbsr __down_interruptible
+ movel (%sp)+,%a1
+ movel (%sp)+,%d1
+ rts
+
+ENTRY(__up_wakeup)
+#ifdef CONFIG_COLDFIRE
+ subl #12,%sp
+ moveml %a0/%d0/%d1,(%sp)
+#else
+ moveml %a0/%d0/%d1,-(%sp)
+#endif
+ movel %a1,-(%sp)
+ jbsr __up
+ movel (%sp)+,%a1
+ movel (%sp)+,%d0
+ movel (%sp)+,%d1
+ rts
+
+ENTRY(__down_failed_trylock)
+ movel %a0,-(%sp)
+ movel %d1,-(%sp)
+ movel %a1,-(%sp)
+ jbsr __down_trylock
+ movel (%sp)+,%a1
+ movel (%sp)+,%d1
+ movel (%sp)+,%a0
+ rts
+
diff --git a/trunk/arch/mips/kernel/Makefile b/trunk/arch/mips/kernel/Makefile
index 6fcdb6fda2e2..9e78e1a4ca17 100644
--- a/trunk/arch/mips/kernel/Makefile
+++ b/trunk/arch/mips/kernel/Makefile
@@ -5,7 +5,7 @@
extra-y := head.o init_task.o vmlinux.lds
obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \
- ptrace.o reset.o setup.o signal.o syscall.o \
+ ptrace.o reset.o semaphore.o setup.o signal.o syscall.o \
time.o topology.o traps.o unaligned.o
obj-$(CONFIG_CEVT_BCM1480) += cevt-bcm1480.o
diff --git a/trunk/arch/mips/kernel/semaphore.c b/trunk/arch/mips/kernel/semaphore.c
new file mode 100644
index 000000000000..1265358cdca1
--- /dev/null
+++ b/trunk/arch/mips/kernel/semaphore.c
@@ -0,0 +1,168 @@
+/*
+ * MIPS-specific semaphore code.
+ *
+ * Copyright (C) 1999 Cort Dougan
+ * Copyright (C) 2004 Ralf Baechle
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * April 2001 - Reworked by Paul Mackerras
+ * to eliminate the SMP races in the old version between the updates
+ * of `count' and `waking'. Now we use negative `count' values to
+ * indicate that some process(es) are waiting for the semaphore.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+/*
+ * Atomically update sem->count.
+ * This does the equivalent of the following:
+ *
+ * old_count = sem->count;
+ * tmp = MAX(old_count, 0) + incr;
+ * sem->count = tmp;
+ * return old_count;
+ *
+ * On machines without lld/scd we need a spinlock to make the manipulation of
+ * sem->count and sem->waking atomic. Scalability isn't an issue because
+ * this lock is used on UP only so it's just an empty variable.
+ */
+static inline int __sem_update_count(struct semaphore *sem, int incr)
+{
+ int old_count, tmp;
+
+ if (cpu_has_llsc && R10000_LLSC_WAR) {
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: ll %0, %2 # __sem_update_count \n"
+ " sra %1, %0, 31 \n"
+ " not %1 \n"
+ " and %1, %0, %1 \n"
+ " addu %1, %1, %3 \n"
+ " sc %1, %2 \n"
+ " beqzl %1, 1b \n"
+ " .set mips0 \n"
+ : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
+ : "r" (incr), "m" (sem->count));
+ } else if (cpu_has_llsc) {
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: ll %0, %2 # __sem_update_count \n"
+ " sra %1, %0, 31 \n"
+ " not %1 \n"
+ " and %1, %0, %1 \n"
+ " addu %1, %1, %3 \n"
+ " sc %1, %2 \n"
+ " beqz %1, 1b \n"
+ " .set mips0 \n"
+ : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
+ : "r" (incr), "m" (sem->count));
+ } else {
+ static DEFINE_SPINLOCK(semaphore_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&semaphore_lock, flags);
+ old_count = atomic_read(&sem->count);
+ tmp = max_t(int, old_count, 0) + incr;
+ atomic_set(&sem->count, tmp);
+ spin_unlock_irqrestore(&semaphore_lock, flags);
+ }
+
+ return old_count;
+}
+
+void __up(struct semaphore *sem)
+{
+ /*
+ * Note that we incremented count in up() before we came here,
+ * but that was ineffective since the result was <= 0, and
+ * any negative value of count is equivalent to 0.
+ * This ends up setting count to 1, unless count is now > 0
+ * (i.e. because some other cpu has called up() in the meantime),
+ * in which case we just increment count.
+ */
+ __sem_update_count(sem, 1);
+ wake_up(&sem->wait);
+}
+
+EXPORT_SYMBOL(__up);
+
+/*
+ * Note that when we come in to __down or __down_interruptible,
+ * we have already decremented count, but that decrement was
+ * ineffective since the result was < 0, and any negative value
+ * of count is equivalent to 0.
+ * Thus it is only when we decrement count from some value > 0
+ * that we have actually got the semaphore.
+ */
+void __sched __down(struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ /*
+ * Try to get the semaphore. If the count is > 0, then we've
+ * got the semaphore; we decrement count and exit the loop.
+ * If the count is 0 or negative, we set it to -1, indicating
+ * that we are asleep, and then sleep.
+ */
+ while (__sem_update_count(sem, -1) <= 0) {
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ __set_task_state(tsk, TASK_RUNNING);
+
+ /*
+ * If there are any more sleepers, wake one of them up so
+ * that it can either get the semaphore, or set count to -1
+ * indicating that there are still processes sleeping.
+ */
+ wake_up(&sem->wait);
+}
+
+EXPORT_SYMBOL(__down);
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ __set_task_state(tsk, TASK_INTERRUPTIBLE);
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ while (__sem_update_count(sem, -1) <= 0) {
+ if (signal_pending(current)) {
+ /*
+ * A signal is pending - give up trying.
+ * Set sem->count to 0 if it is negative,
+ * since we are no longer sleeping.
+ */
+ __sem_update_count(sem, 0);
+ retval = -EINTR;
+ break;
+ }
+ schedule();
+ set_task_state(tsk, TASK_INTERRUPTIBLE);
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ __set_task_state(tsk, TASK_RUNNING);
+
+ wake_up(&sem->wait);
+ return retval;
+}
+
+EXPORT_SYMBOL(__down_interruptible);
diff --git a/trunk/arch/mn10300/kernel/Makefile b/trunk/arch/mn10300/kernel/Makefile
index 23f2ab67574c..ef07c956170a 100644
--- a/trunk/arch/mn10300/kernel/Makefile
+++ b/trunk/arch/mn10300/kernel/Makefile
@@ -3,7 +3,7 @@
#
extra-y := head.o init_task.o vmlinux.lds
-obj-y := process.o signal.o entry.o fpu.o traps.o irq.o \
+obj-y := process.o semaphore.o signal.o entry.o fpu.o traps.o irq.o \
ptrace.o setup.o time.o sys_mn10300.o io.o kthread.o \
switch_to.o mn10300_ksyms.o kernel_execve.o
diff --git a/trunk/arch/mn10300/kernel/semaphore.c b/trunk/arch/mn10300/kernel/semaphore.c
new file mode 100644
index 000000000000..9153c4039fd2
--- /dev/null
+++ b/trunk/arch/mn10300/kernel/semaphore.c
@@ -0,0 +1,149 @@
+/* MN10300 Semaphore implementation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include
+#include
+#include
+
+struct sem_waiter {
+ struct list_head list;
+ struct task_struct *task;
+};
+
+#if SEMAPHORE_DEBUG
+void semtrace(struct semaphore *sem, const char *str)
+{
+ if (sem->debug)
+ printk(KERN_DEBUG "[%d] %s({%d,%d})\n",
+ current->pid,
+ str,
+ atomic_read(&sem->count),
+ list_empty(&sem->wait_list) ? 0 : 1);
+}
+#else
+#define semtrace(SEM, STR) do { } while (0)
+#endif
+
+/*
+ * wait for a token to be granted from a semaphore
+ * - entered with lock held and interrupts disabled
+ */
+void __down(struct semaphore *sem, unsigned long flags)
+{
+ struct task_struct *tsk = current;
+ struct sem_waiter waiter;
+
+ semtrace(sem, "Entering __down");
+
+ /* set up my own style of waitqueue */
+ waiter.task = tsk;
+ get_task_struct(tsk);
+
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we don't need to touch the semaphore struct anymore */
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ /* wait to be given the semaphore */
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+
+ for (;;) {
+ if (!waiter.task)
+ break;
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+
+ tsk->state = TASK_RUNNING;
+ semtrace(sem, "Leaving __down");
+}
+EXPORT_SYMBOL(__down);
+
+/*
+ * interruptibly wait for a token to be granted from a semaphore
+ * - entered with lock held and interrupts disabled
+ */
+int __down_interruptible(struct semaphore *sem, unsigned long flags)
+{
+ struct task_struct *tsk = current;
+ struct sem_waiter waiter;
+ int ret;
+
+ semtrace(sem, "Entering __down_interruptible");
+
+ /* set up my own style of waitqueue */
+ waiter.task = tsk;
+ get_task_struct(tsk);
+
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we don't need to touch the semaphore struct anymore */
+ set_task_state(tsk, TASK_INTERRUPTIBLE);
+
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ /* wait to be given the semaphore */
+ ret = 0;
+ for (;;) {
+ if (!waiter.task)
+ break;
+ if (unlikely(signal_pending(current)))
+ goto interrupted;
+ schedule();
+ set_task_state(tsk, TASK_INTERRUPTIBLE);
+ }
+
+ out:
+ tsk->state = TASK_RUNNING;
+ semtrace(sem, "Leaving __down_interruptible");
+ return ret;
+
+ interrupted:
+ spin_lock_irqsave(&sem->wait_lock, flags);
+ list_del(&waiter.list);
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ ret = 0;
+ if (!waiter.task) {
+ put_task_struct(current);
+ ret = -EINTR;
+ }
+ goto out;
+}
+EXPORT_SYMBOL(__down_interruptible);
+
+/*
+ * release a single token back to a semaphore
+ * - entered with lock held and interrupts disabled
+ */
+void __up(struct semaphore *sem)
+{
+ struct task_struct *tsk;
+ struct sem_waiter *waiter;
+
+ semtrace(sem, "Entering __up");
+
+ /* grant the token to the process at the front of the queue */
+ waiter = list_entry(sem->wait_list.next, struct sem_waiter, list);
+
+ /* We must be careful not to touch 'waiter' after we set ->task = NULL.
+ * It is an allocated on the waiter's stack and may become invalid at
+ * any time after that point (due to a wakeup from another source).
+ */
+ list_del_init(&waiter->list);
+ tsk = waiter->task;
+ smp_mb();
+ waiter->task = NULL;
+ wake_up_process(tsk);
+ put_task_struct(tsk);
+
+ semtrace(sem, "Leaving __up");
+}
+EXPORT_SYMBOL(__up);
diff --git a/trunk/arch/parisc/kernel/Makefile b/trunk/arch/parisc/kernel/Makefile
index 1f6585a56f97..27827bc3717e 100644
--- a/trunk/arch/parisc/kernel/Makefile
+++ b/trunk/arch/parisc/kernel/Makefile
@@ -9,7 +9,7 @@ AFLAGS_pacache.o := -traditional
obj-y := cache.o pacache.o setup.o traps.o time.o irq.o \
pa7300lc.o syscall.o entry.o sys_parisc.o firmware.o \
- ptrace.o hardware.o inventory.o drivers.o \
+ ptrace.o hardware.o inventory.o drivers.o semaphore.o \
signal.o hpmc.o real2.o parisc_ksyms.o unaligned.o \
process.o processor.o pdc_cons.o pdc_chassis.o unwind.o \
topology.o
diff --git a/trunk/arch/parisc/kernel/parisc_ksyms.c b/trunk/arch/parisc/kernel/parisc_ksyms.c
index 5b7fc4aa044d..7aca704e96f0 100644
--- a/trunk/arch/parisc/kernel/parisc_ksyms.c
+++ b/trunk/arch/parisc/kernel/parisc_ksyms.c
@@ -69,6 +69,11 @@ EXPORT_SYMBOL(memcpy_toio);
EXPORT_SYMBOL(memcpy_fromio);
EXPORT_SYMBOL(memset_io);
+#include
+EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(__down_interruptible);
+EXPORT_SYMBOL(__down);
+
extern void $$divI(void);
extern void $$divU(void);
extern void $$remI(void);
diff --git a/trunk/arch/parisc/kernel/semaphore.c b/trunk/arch/parisc/kernel/semaphore.c
new file mode 100644
index 000000000000..ee806bcc3726
--- /dev/null
+++ b/trunk/arch/parisc/kernel/semaphore.c
@@ -0,0 +1,102 @@
+/*
+ * Semaphore implementation Copyright (c) 2001 Matthew Wilcox, Hewlett-Packard
+ */
+
+#include
+#include
+#include
+#include
+
+/*
+ * Semaphores are complex as we wish to avoid using two variables.
+ * `count' has multiple roles, depending on its value. If it is positive
+ * or zero, there are no waiters. The functions here will never be
+ * called; see
+ *
+ * When count is -1 it indicates there is at least one task waiting
+ * for the semaphore.
+ *
+ * When count is less than that, there are '- count - 1' wakeups
+ * pending. ie if it has value -3, there are 2 wakeups pending.
+ *
+ * Note that these functions are only called when there is contention
+ * on the lock, and as such all this is the "non-critical" part of the
+ * whole semaphore business. The critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+void __up(struct semaphore *sem)
+{
+ sem->count--;
+ wake_up(&sem->wait);
+}
+
+#define wakers(count) (-1 - count)
+
+#define DOWN_HEAD \
+ int ret = 0; \
+ DECLARE_WAITQUEUE(wait, current); \
+ \
+ /* Note that someone is waiting */ \
+ if (sem->count == 0) \
+ sem->count = -1; \
+ \
+ /* protected by the sentry still -- use unlocked version */ \
+ wait.flags = WQ_FLAG_EXCLUSIVE; \
+ __add_wait_queue_tail(&sem->wait, &wait); \
+ lost_race: \
+ spin_unlock_irq(&sem->sentry); \
+
+#define DOWN_TAIL \
+ spin_lock_irq(&sem->sentry); \
+ if (wakers(sem->count) == 0 && ret == 0) \
+ goto lost_race; /* Someone stole our wakeup */ \
+ __remove_wait_queue(&sem->wait, &wait); \
+ current->state = TASK_RUNNING; \
+ if (!waitqueue_active(&sem->wait) && (sem->count < 0)) \
+ sem->count = wakers(sem->count);
+
+#define UPDATE_COUNT \
+ sem->count += (sem->count < 0) ? 1 : - 1;
+
+
+void __sched __down(struct semaphore * sem)
+{
+ DOWN_HEAD
+
+ for(;;) {
+ set_task_state(current, TASK_UNINTERRUPTIBLE);
+ /* we can _read_ this without the sentry */
+ if (sem->count != -1)
+ break;
+ schedule();
+ }
+
+ DOWN_TAIL
+ UPDATE_COUNT
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ DOWN_HEAD
+
+ for(;;) {
+ set_task_state(current, TASK_INTERRUPTIBLE);
+ /* we can _read_ this without the sentry */
+ if (sem->count != -1)
+ break;
+
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ break;
+ }
+ schedule();
+ }
+
+ DOWN_TAIL
+
+ if (!ret) {
+ UPDATE_COUNT
+ }
+
+ return ret;
+}
diff --git a/trunk/arch/powerpc/kernel/Makefile b/trunk/arch/powerpc/kernel/Makefile
index b9dbfff9afe9..c1baf9d5903f 100644
--- a/trunk/arch/powerpc/kernel/Makefile
+++ b/trunk/arch/powerpc/kernel/Makefile
@@ -12,7 +12,7 @@ CFLAGS_prom_init.o += -fPIC
CFLAGS_btext.o += -fPIC
endif
-obj-y := cputable.o ptrace.o syscalls.o \
+obj-y := semaphore.o cputable.o ptrace.o syscalls.o \
irq.o align.o signal_32.o pmc.o vdso.o \
init_task.o process.o systbl.o idle.o \
signal.o
diff --git a/trunk/arch/powerpc/kernel/ppc_ksyms.c b/trunk/arch/powerpc/kernel/ppc_ksyms.c
index 65d14e6ddc3c..9c98424277a8 100644
--- a/trunk/arch/powerpc/kernel/ppc_ksyms.c
+++ b/trunk/arch/powerpc/kernel/ppc_ksyms.c
@@ -15,6 +15,7 @@
#include
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/powerpc/kernel/semaphore.c b/trunk/arch/powerpc/kernel/semaphore.c
new file mode 100644
index 000000000000..2f8c3c951394
--- /dev/null
+++ b/trunk/arch/powerpc/kernel/semaphore.c
@@ -0,0 +1,135 @@
+/*
+ * PowerPC-specific semaphore code.
+ *
+ * Copyright (C) 1999 Cort Dougan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * April 2001 - Reworked by Paul Mackerras
+ * to eliminate the SMP races in the old version between the updates
+ * of `count' and `waking'. Now we use negative `count' values to
+ * indicate that some process(es) are waiting for the semaphore.
+ */
+
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+/*
+ * Atomically update sem->count.
+ * This does the equivalent of the following:
+ *
+ * old_count = sem->count;
+ * tmp = MAX(old_count, 0) + incr;
+ * sem->count = tmp;
+ * return old_count;
+ */
+static inline int __sem_update_count(struct semaphore *sem, int incr)
+{
+ int old_count, tmp;
+
+ __asm__ __volatile__("\n"
+"1: lwarx %0,0,%3\n"
+" srawi %1,%0,31\n"
+" andc %1,%0,%1\n"
+" add %1,%1,%4\n"
+ PPC405_ERR77(0,%3)
+" stwcx. %1,0,%3\n"
+" bne 1b"
+ : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
+ : "r" (&sem->count), "r" (incr), "m" (sem->count)
+ : "cc");
+
+ return old_count;
+}
+
+void __up(struct semaphore *sem)
+{
+ /*
+ * Note that we incremented count in up() before we came here,
+ * but that was ineffective since the result was <= 0, and
+ * any negative value of count is equivalent to 0.
+ * This ends up setting count to 1, unless count is now > 0
+ * (i.e. because some other cpu has called up() in the meantime),
+ * in which case we just increment count.
+ */
+ __sem_update_count(sem, 1);
+ wake_up(&sem->wait);
+}
+EXPORT_SYMBOL(__up);
+
+/*
+ * Note that when we come in to __down or __down_interruptible,
+ * we have already decremented count, but that decrement was
+ * ineffective since the result was < 0, and any negative value
+ * of count is equivalent to 0.
+ * Thus it is only when we decrement count from some value > 0
+ * that we have actually got the semaphore.
+ */
+void __sched __down(struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ /*
+ * Try to get the semaphore. If the count is > 0, then we've
+ * got the semaphore; we decrement count and exit the loop.
+ * If the count is 0 or negative, we set it to -1, indicating
+ * that we are asleep, and then sleep.
+ */
+ while (__sem_update_count(sem, -1) <= 0) {
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ __set_task_state(tsk, TASK_RUNNING);
+
+ /*
+ * If there are any more sleepers, wake one of them up so
+ * that it can either get the semaphore, or set count to -1
+ * indicating that there are still processes sleeping.
+ */
+ wake_up(&sem->wait);
+}
+EXPORT_SYMBOL(__down);
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ __set_task_state(tsk, TASK_INTERRUPTIBLE);
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ while (__sem_update_count(sem, -1) <= 0) {
+ if (signal_pending(current)) {
+ /*
+ * A signal is pending - give up trying.
+ * Set sem->count to 0 if it is negative,
+ * since we are no longer sleeping.
+ */
+ __sem_update_count(sem, 0);
+ retval = -EINTR;
+ break;
+ }
+ schedule();
+ set_task_state(tsk, TASK_INTERRUPTIBLE);
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ __set_task_state(tsk, TASK_RUNNING);
+
+ wake_up(&sem->wait);
+ return retval;
+}
+EXPORT_SYMBOL(__down_interruptible);
diff --git a/trunk/arch/powerpc/kernel/setup_32.c b/trunk/arch/powerpc/kernel/setup_32.c
index 06d918d94dd1..cd870a823d18 100644
--- a/trunk/arch/powerpc/kernel/setup_32.c
+++ b/trunk/arch/powerpc/kernel/setup_32.c
@@ -10,6 +10,9 @@
#include
#include
#include
+#if defined(CONFIG_IDE) || defined(CONFIG_IDE_MODULE)
+#include
+#endif
#include
#include
#include
@@ -48,6 +51,11 @@
extern void bootx_init(unsigned long r4, unsigned long phys);
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+struct ide_machdep_calls ppc_ide_md;
+EXPORT_SYMBOL(ppc_ide_md);
+#endif
+
int boot_cpuid;
EXPORT_SYMBOL_GPL(boot_cpuid);
int boot_cpuid_phys;
diff --git a/trunk/arch/powerpc/platforms/powermac/pci.c b/trunk/arch/powerpc/platforms/powermac/pci.c
index bcf50d7056e9..1c58db9d42cb 100644
--- a/trunk/arch/powerpc/platforms/powermac/pci.c
+++ b/trunk/arch/powerpc/platforms/powermac/pci.c
@@ -1144,6 +1144,28 @@ void __init pmac_pcibios_after_init(void)
{
struct device_node* nd;
+#ifdef CONFIG_BLK_DEV_IDE
+ struct pci_dev *dev = NULL;
+
+ /* OF fails to initialize IDE controllers on macs
+ * (and maybe other machines)
+ *
+ * Ideally, this should be moved to the IDE layer, but we need
+ * to check specifically with Andre Hedrick how to do it cleanly
+ * since the common IDE code seem to care about the fact that the
+ * BIOS may have disabled a controller.
+ *
+ * -- BenH
+ */
+ for_each_pci_dev(dev) {
+ if ((dev->class >> 16) != PCI_BASE_CLASS_STORAGE)
+ continue;
+ if (pci_enable_device(dev))
+ printk(KERN_WARNING
+ "pci: Failed to enable %s\n", pci_name(dev));
+ }
+#endif /* CONFIG_BLK_DEV_IDE */
+
for_each_node_by_name(nd, "firewire") {
if (nd->parent && (of_device_is_compatible(nd, "pci106b,18") ||
of_device_is_compatible(nd, "pci106b,30") ||
diff --git a/trunk/arch/powerpc/platforms/powermac/pmac.h b/trunk/arch/powerpc/platforms/powermac/pmac.h
index 3362e781b6a7..b3abaaf61eb4 100644
--- a/trunk/arch/powerpc/platforms/powermac/pmac.h
+++ b/trunk/arch/powerpc/platforms/powermac/pmac.h
@@ -2,6 +2,7 @@
#define __PMAC_H__
#include
+#include
#include
/*
@@ -34,6 +35,10 @@ extern void pmac_check_ht_link(void);
extern void pmac_setup_smp(void);
+extern unsigned long pmac_ide_get_base(int index);
+extern void pmac_ide_init_hwif_ports(hw_regs_t *hw,
+ unsigned long data_port, unsigned long ctrl_port, int *irq);
+
extern int pmac_nvram_init(void);
extern void pmac_pic_init(void);
diff --git a/trunk/arch/powerpc/platforms/powermac/setup.c b/trunk/arch/powerpc/platforms/powermac/setup.c
index 2693fc371eab..36ff1b6b7fac 100644
--- a/trunk/arch/powerpc/platforms/powermac/setup.c
+++ b/trunk/arch/powerpc/platforms/powermac/setup.c
@@ -574,6 +574,14 @@ static int __init pmac_probe(void)
ISA_DMA_THRESHOLD = ~0L;
DMA_MODE_READ = 1;
DMA_MODE_WRITE = 2;
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+#ifdef CONFIG_BLK_DEV_IDE_PMAC
+ ppc_ide_md.ide_init_hwif = pmac_ide_init_hwif_ports;
+ ppc_ide_md.default_io_base = pmac_ide_get_base;
+#endif /* CONFIG_BLK_DEV_IDE_PMAC */
+#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */
+
#endif /* CONFIG_PPC32 */
#ifdef CONFIG_PMAC_SMU
diff --git a/trunk/arch/ppc/configs/sandpoint_defconfig b/trunk/arch/ppc/configs/sandpoint_defconfig
index 9525e34138fc..fb493a67c60d 100644
--- a/trunk/arch/ppc/configs/sandpoint_defconfig
+++ b/trunk/arch/ppc/configs/sandpoint_defconfig
@@ -189,7 +189,7 @@ CONFIG_IDE_TASKFILE_IO=y
#
# IDE chipset support/bugfixes
#
-CONFIG_BLK_DEV_SL82C105=y
+CONFIG_IDE_GENERIC=y
# CONFIG_BLK_DEV_IDEPCI is not set
# CONFIG_BLK_DEV_IDEDMA is not set
# CONFIG_IDEDMA_AUTO is not set
diff --git a/trunk/arch/ppc/kernel/ppc_ksyms.c b/trunk/arch/ppc/kernel/ppc_ksyms.c
index 2ba659f401be..c35350250cfc 100644
--- a/trunk/arch/ppc/kernel/ppc_ksyms.c
+++ b/trunk/arch/ppc/kernel/ppc_ksyms.c
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
@@ -123,6 +124,10 @@ EXPORT_SYMBOL(__ioremap);
EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+EXPORT_SYMBOL(ppc_ide_md);
+#endif
+
#ifdef CONFIG_PCI
EXPORT_SYMBOL(isa_io_base);
EXPORT_SYMBOL(isa_mem_base);
diff --git a/trunk/arch/ppc/kernel/semaphore.c b/trunk/arch/ppc/kernel/semaphore.c
new file mode 100644
index 000000000000..2fe429b27c14
--- /dev/null
+++ b/trunk/arch/ppc/kernel/semaphore.c
@@ -0,0 +1,131 @@
+/*
+ * PowerPC-specific semaphore code.
+ *
+ * Copyright (C) 1999 Cort Dougan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * April 2001 - Reworked by Paul Mackerras
+ * to eliminate the SMP races in the old version between the updates
+ * of `count' and `waking'. Now we use negative `count' values to
+ * indicate that some process(es) are waiting for the semaphore.
+ */
+
+#include
+#include
+#include
+#include
+#include
+
+/*
+ * Atomically update sem->count.
+ * This does the equivalent of the following:
+ *
+ * old_count = sem->count;
+ * tmp = MAX(old_count, 0) + incr;
+ * sem->count = tmp;
+ * return old_count;
+ */
+static inline int __sem_update_count(struct semaphore *sem, int incr)
+{
+ int old_count, tmp;
+
+ __asm__ __volatile__("\n"
+"1: lwarx %0,0,%3\n"
+" srawi %1,%0,31\n"
+" andc %1,%0,%1\n"
+" add %1,%1,%4\n"
+ PPC405_ERR77(0,%3)
+" stwcx. %1,0,%3\n"
+" bne 1b"
+ : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
+ : "r" (&sem->count), "r" (incr), "m" (sem->count)
+ : "cc");
+
+ return old_count;
+}
+
+void __up(struct semaphore *sem)
+{
+ /*
+ * Note that we incremented count in up() before we came here,
+ * but that was ineffective since the result was <= 0, and
+ * any negative value of count is equivalent to 0.
+ * This ends up setting count to 1, unless count is now > 0
+ * (i.e. because some other cpu has called up() in the meantime),
+ * in which case we just increment count.
+ */
+ __sem_update_count(sem, 1);
+ wake_up(&sem->wait);
+}
+
+/*
+ * Note that when we come in to __down or __down_interruptible,
+ * we have already decremented count, but that decrement was
+ * ineffective since the result was < 0, and any negative value
+ * of count is equivalent to 0.
+ * Thus it is only when we decrement count from some value > 0
+ * that we have actually got the semaphore.
+ */
+void __sched __down(struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+ smp_wmb();
+
+ /*
+ * Try to get the semaphore. If the count is > 0, then we've
+ * got the semaphore; we decrement count and exit the loop.
+ * If the count is 0 or negative, we set it to -1, indicating
+ * that we are asleep, and then sleep.
+ */
+ while (__sem_update_count(sem, -1) <= 0) {
+ schedule();
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+
+ /*
+ * If there are any more sleepers, wake one of them up so
+ * that it can either get the semaphore, or set count to -1
+ * indicating that there are still processes sleeping.
+ */
+ wake_up(&sem->wait);
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ tsk->state = TASK_INTERRUPTIBLE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+ smp_wmb();
+
+ while (__sem_update_count(sem, -1) <= 0) {
+ if (signal_pending(current)) {
+ /*
+ * A signal is pending - give up trying.
+ * Set sem->count to 0 if it is negative,
+ * since we are no longer sleeping.
+ */
+ __sem_update_count(sem, 0);
+ retval = -EINTR;
+ break;
+ }
+ schedule();
+ tsk->state = TASK_INTERRUPTIBLE;
+ }
+ tsk->state = TASK_RUNNING;
+ remove_wait_queue(&sem->wait, &wait);
+ wake_up(&sem->wait);
+ return retval;
+}
diff --git a/trunk/arch/ppc/kernel/setup.c b/trunk/arch/ppc/kernel/setup.c
index bfddfdee0b65..294055902f0c 100644
--- a/trunk/arch/ppc/kernel/setup.c
+++ b/trunk/arch/ppc/kernel/setup.c
@@ -10,6 +10,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -56,6 +57,7 @@ extern void ppc6xx_idle(void);
extern void power4_idle(void);
extern boot_infos_t *boot_infos;
+struct ide_machdep_calls ppc_ide_md;
/* Used with the BI_MEMSIZE bootinfo parameter to store the memory
size value reported by the boot loader. */
diff --git a/trunk/arch/ppc/platforms/4xx/bamboo.c b/trunk/arch/ppc/platforms/4xx/bamboo.c
index 01f20f4c14fe..017623c9bc4b 100644
--- a/trunk/arch/ppc/platforms/4xx/bamboo.c
+++ b/trunk/arch/ppc/platforms/4xx/bamboo.c
@@ -22,6 +22,7 @@
#include
#include
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/ppc/platforms/4xx/ebony.c b/trunk/arch/ppc/platforms/4xx/ebony.c
index 8027a36fc5bb..453643a0eeea 100644
--- a/trunk/arch/ppc/platforms/4xx/ebony.c
+++ b/trunk/arch/ppc/platforms/4xx/ebony.c
@@ -25,6 +25,7 @@
#include
#include
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/ppc/platforms/4xx/luan.c b/trunk/arch/ppc/platforms/4xx/luan.c
index f6d8c2e8b6b7..b79ebb8a3e6c 100644
--- a/trunk/arch/ppc/platforms/4xx/luan.c
+++ b/trunk/arch/ppc/platforms/4xx/luan.c
@@ -23,6 +23,7 @@
#include
#include
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/ppc/platforms/4xx/ocotea.c b/trunk/arch/ppc/platforms/4xx/ocotea.c
index 308386ef6f77..28a712cd4800 100644
--- a/trunk/arch/ppc/platforms/4xx/ocotea.c
+++ b/trunk/arch/ppc/platforms/4xx/ocotea.c
@@ -23,6 +23,7 @@
#include