diff --git a/[refs] b/[refs]
index b1e5a9fb2d20..4e56dd849da1 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
---
-refs/heads/master: 07fe944e87d79f8d7e1b090913fe9f2ace78f41d
+refs/heads/master: a594eeb1a1d320981fccc29584b6f21fcebd765f
diff --git a/trunk/Documentation/00-INDEX b/trunk/Documentation/00-INDEX
index f7923a42e769..e8fb24671967 100644
--- a/trunk/Documentation/00-INDEX
+++ b/trunk/Documentation/00-INDEX
@@ -167,8 +167,10 @@ highuid.txt
- notes on the change from 16 bit to 32 bit user/group IDs.
hpet.txt
- High Precision Event Timer Driver for Linux.
-timers/
- - info on the timer related topics
+hrtimer/
+ - info on the timer_stats debugging facility for timer (ab)use.
+hrtimers/
+ - info on the hrtimers subsystem for high-resolution kernel timers.
hw_random.txt
- info on Linux support for random number generator in i8xx chipsets.
hwmon/
diff --git a/trunk/Documentation/DocBook/Makefile b/trunk/Documentation/DocBook/Makefile
index e471bc466a7e..300e1707893f 100644
--- a/trunk/Documentation/DocBook/Makefile
+++ b/trunk/Documentation/DocBook/Makefile
@@ -9,7 +9,7 @@
DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
procfs-guide.xml writing_usb_driver.xml networking.xml \
- kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \
+ kernel-api.xml filesystems.xml lsm.xml usb.xml \
gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml
diff --git a/trunk/Documentation/DocBook/kgdb.tmpl b/trunk/Documentation/DocBook/kgdb.tmpl
deleted file mode 100644
index 97618bed4d65..000000000000
--- a/trunk/Documentation/DocBook/kgdb.tmpl
+++ /dev/null
@@ -1,447 +0,0 @@
-
-
-
-
-
- Using kgdb and the kgdb Internals
-
-
-
- Jason
- Wessel
-
-
- jason.wessel@windriver.com
-
-
-
-
-
-
-
- Tom
- Rini
-
-
- trini@kernel.crashing.org
-
-
-
-
-
-
-
- Amit S.
- Kale
-
-
- amitkale@linsyssoft.com
-
-
-
-
-
-
- 2008
- Wind River Systems, Inc.
-
-
- 2004-2005
- MontaVista Software, Inc.
-
-
- 2004
- Amit S. Kale
-
-
-
-
- This file is licensed under the terms of the GNU General Public License
- version 2. This program is licensed "as is" without any warranty of any
- kind, whether express or implied.
-
-
-
-
-
-
-
- Introduction
-
- kgdb is a source level debugger for linux kernel. It is used along
- with gdb to debug a linux kernel. The expectation is that gdb can
- be used to "break in" to the kernel to inspect memory, variables
- and look through a cal stack information similar to what an
- application developer would use gdb for. It is possible to place
- breakpoints in kernel code and perform some limited execution
- stepping.
-
-
- Two machines are required for using kgdb. One of these machines is a
- development machine and the other is a test machine. The kernel
- to be debugged runs on the test machine. The development machine
- runs an instance of gdb against the vmlinux file which contains
- the symbols (not boot image such as bzImage, zImage, uImage...).
- In gdb the developer specifies the connection parameters and
- connects to kgdb. Depending on which kgdb I/O modules exist in
- the kernel for a given architecture, it may be possible to debug
- the test machine's kernel with the development machine using a
- rs232 or ethernet connection.
-
-
-
- Compiling a kernel
-
- To enable CONFIG_KGDB, look under the "Kernel debugging"
- and then select "KGDB: kernel debugging with remote gdb".
-
-
- Next you should choose one of more I/O drivers to interconnect debugging
- host and debugged target. Early boot debugging requires a KGDB
- I/O driver that supports early debugging and the driver must be
- built into the kernel directly. Kgdb I/O driver configuration
- takes place via kernel or module parameters, see following
- chapter.
-
-
- The kgdb test compile options are described in the kgdb test suite chapter.
-
-
-
-
- Enable kgdb for debugging
-
- In order to use kgdb you must activate it by passing configuration
- information to one of the kgdb I/O drivers. If you do not pass any
- configuration information kgdb will not do anything at all. Kgdb
- will only actively hook up to the kernel trap hooks if a kgdb I/O
- driver is loaded and configured. If you unconfigure a kgdb I/O
- driver, kgdb will unregister all the kernel hook points.
-
-
- All drivers can be reconfigured at run time, if
- CONFIG_SYSFS and CONFIG_MODULES
- are enabled, by echo'ing a new config string to
- /sys/module/<driver>/parameter/<option>.
- The driver can be unconfigured by passing an empty string. You cannot
- change the configuration while the debugger is attached. Make sure
- to detach the debugger with the detach command
- prior to trying unconfigure a kgdb I/O driver.
-
-
- Kernel parameter: kgdbwait
-
- The Kernel command line option kgdbwait makes
- kgdb wait for a debugger connection during booting of a kernel. You
- can only use this option you compiled a kgdb I/O driver into the
- kernel and you specified the I/O driver configuration as a kernel
- command line option. The kgdbwait parameter should always follow the
- configuration parameter for the kgdb I/O driver in the kernel
- command line else the I/O driver will not be configured prior to
- asking the kernel to use it to wait.
-
-
- The kernel will stop and wait as early as the I/O driver and
- architecture will allow when you use this option. If you build the
- kgdb I/O driver as a kernel module kgdbwait will not do anything.
-
-
-
- Kernel parameter: kgdboc
-
- The kgdboc driver was originally an abbreviation meant to stand for
- "kgdb over console". Kgdboc is designed to work with a single
- serial port. It was meant to cover the circumstance
- where you wanted to use a serial console as your primary console as
- well as using it to perform kernel debugging. Of course you can
- also use kgdboc without assigning a console to the same port.
-
-
- Using kgdboc
-
- You can configure kgdboc via sysfs or a module or kernel boot line
- parameter depending on if you build with CONFIG_KGDBOC as a module
- or built-in.
-
- From the module load or build-in
- kgdboc=<tty-device>,[baud]
-
- The example here would be if your console port was typically ttyS0, you would use something like kgdboc=ttyS0,115200 or on the ARM Versatile AB you would likely use kgdboc=ttyAMA0,115200
-
-
- From sysfs
- echo ttyS0 > /sys/module/kgdboc/parameters/kgdboc
-
-
-
-
- NOTE: Kgdboc does not support interrupting the target via the
- gdb remote protocol. You must manually send a sysrq-g unless you
- have a proxy that splits console output to a terminal problem and
- has a separate port for the debugger to connect to that sends the
- sysrq-g for you.
-
- When using kgdboc with no debugger proxy, you can end up
- connecting the debugger for one of two entry points. If an
- exception occurs after you have loaded kgdboc a message should print
- on the console stating it is waiting for the debugger. In case you
- disconnect your terminal program and then connect the debugger in
- its place. If you want to interrupt the target system and forcibly
- enter a debug session you have to issue a Sysrq sequence and then
- type the letter g. Then you disconnect the
- terminal session and connect gdb. Your options if you don't like
- this are to hack gdb to send the sysrq-g for you as well as on the
- initial connect, or to use a debugger proxy that allows an
- unmodified gdb to do the debugging.
-
-
-
-
- Kernel parameter: kgdbcon
-
- Kgdb supports using the gdb serial protocol to send console messages
- to the debugger when the debugger is connected and running. There
- are two ways to activate this feature.
-
- Activate with the kernel command line option:
- kgdbcon
-
- Use sysfs before configuring an io driver
-
- echo 1 > /sys/module/kgdb/parameters/kgdb_use_con
-
-
- NOTE: If you do this after you configure the kgdb I/O driver, the
- setting will not take effect until the next point the I/O is
- reconfigured.
-
-
-
-
-
- IMPORTANT NOTE: Using this option with kgdb over the console
- (kgdboc) or kgdb over ethernet (kgdboe) is not supported.
-
-
-
-
- Connecting gdb
-
- If you are using kgdboc, you need to have used kgdbwait as a boot
- argument, issued a sysrq-g, or the system you are going to debug
- has already taken an exception and is waiting for the debugger to
- attach before you can connect gdb.
-
-
- If you are not using different kgdb I/O driver other than kgdboc,
- you should be able to connect and the target will automatically
- respond.
-
-
- Example (using a serial port):
-
-
- % gdb ./vmlinux
- (gdb) set remotebaud 115200
- (gdb) target remote /dev/ttyS0
-
-
- Example (kgdb to a terminal server):
-
-
- % gdb ./vmlinux
- (gdb) target remote udp:192.168.2.2:6443
-
-
- Example (kgdb over ethernet):
-
-
- % gdb ./vmlinux
- (gdb) target remote udp:192.168.2.2:6443
-
-
- Once connected, you can debug a kernel the way you would debug an
- application program.
-
-
- If you are having problems connecting or something is going
- seriously wrong while debugging, it will most often be the case
- that you want to enable gdb to be verbose about its target
- communications. You do this prior to issuing the target
- remote command by typing in: set remote debug 1
-
-
-
- kgdb Test Suite
-
- When kgdb is enabled in the kernel config you can also elect to
- enable the config parameter KGDB_TESTS. Turning this on will
- enable a special kgdb I/O module which is designed to test the
- kgdb internal functions.
-
-
- The kgdb tests are mainly intended for developers to test the kgdb
- internals as well as a tool for developing a new kgdb architecture
- specific implementation. These tests are not really for end users
- of the Linux kernel. The primary source of documentation would be
- to look in the drivers/misc/kgdbts.c file.
-
-
- The kgdb test suite can also be configured at compile time to run
- the core set of tests by setting the kernel config parameter
- KGDB_TESTS_ON_BOOT. This particular option is aimed at automated
- regression testing and does not require modifying the kernel boot
- config arguments. If this is turned on, the kgdb test suite can
- be disabled by specifying "kgdbts=" as a kernel boot argument.
-
-
-
- KGDB Internals
-
- Architecture Specifics
-
- Kgdb is organized into three basic components:
-
- kgdb core
-
- The kgdb core is found in kernel/kgdb.c. It contains:
-
- All the logic to implement the gdb serial protocol
- A generic OS exception handler which includes sync'ing the processors into a stopped state on an multi cpu system.
- The API to talk to the kgdb I/O drivers
- The API to make calls to the arch specific kgdb implementation
- The logic to perform safe memory reads and writes to memory while using the debugger
- A full implementation for software breakpoints unless overridden by the arch
-
-
-
- kgdb arch specific implementation
-
- This implementation is generally found in arch/*/kernel/kgdb.c.
- As an example, arch/x86/kernel/kgdb.c contains the specifics to
- implement HW breakpoint as well as the initialization to
- dynamically register and unregister for the trap handlers on
- this architecture. The arch specific portion implements:
-
- contains an arch specific trap catcher which
- invokes kgdb_handle_exception() to start kgdb about doing its
- work
- translation to and from gdb specific packet format to pt_regs
- Registration and unregistration of architecture specific trap hooks
- Any special exception handling and cleanup
- NMI exception handling and cleanup
- (optional)HW breakpoints
-
-
-
- kgdb I/O driver
-
- Each kgdb I/O driver has to provide an implemenation for the following:
-
- configuration via builtin or module
- dynamic configuration and kgdb hook registration calls
- read and write character interface
- A cleanup handler for unconfiguring from the kgdb core
- (optional) Early debug methodology
-
- Any given kgdb I/O driver has to operate very closely with the
- hardware and must do it in such a way that does not enable
- interrupts or change other parts of the system context without
- completely restoring them. The kgdb core will repeatedly "poll"
- a kgdb I/O driver for characters when it needs input. The I/O
- driver is expected to return immediately if there is no data
- available. Doing so allows for the future possibility to touch
- watch dog hardware in such a way as to have a target system not
- reset when these are enabled.
-
-
-
-
-
- If you are intent on adding kgdb architecture specific support
- for a new architecture, the architecture should define
- HAVE_ARCH_KGDB in the architecture specific
- Kconfig file. This will enable kgdb for the architecture, and
- at that point you must create an architecture specific kgdb
- implementation.
-
-
- There are a few flags which must be set on every architecture in
- their <asm/kgdb.h> file. These are:
-
-
-
- NUMREGBYTES: The size in bytes of all of the registers, so
- that we can ensure they will all fit into a packet.
-
-
- BUFMAX: The size in bytes of the buffer GDB will read into.
- This must be larger than NUMREGBYTES.
-
-
- CACHE_FLUSH_IS_SAFE: Set to 1 if it is always safe to call
- flush_cache_range or flush_icache_range. On some architectures,
- these functions may not be safe to call on SMP since we keep other
- CPUs in a holding pattern.
-
-
-
-
-
- There are also the following functions for the common backend,
- found in kernel/kgdb.c, that must be supplied by the
- architecture-specific backend unless marked as (optional), in
- which case a default function maybe used if the architecture
- does not need to provide a specific implementation.
-
-!Iinclude/linux/kgdb.h
-
-
- kgdboc internals
-
- The kgdboc driver is actually a very thin driver that relies on the
- underlying low level to the hardware driver having "polling hooks"
- which the to which the tty driver is attached. In the initial
- implementation of kgdboc it the serial_core was changed to expose a
- low level uart hook for doing polled mode reading and writing of a
- single character while in an atomic context. When kgdb makes an I/O
- request to the debugger, kgdboc invokes a call back in the serial
- core which in turn uses the call back in the uart driver. It is
- certainly possible to extend kgdboc to work with non-uart based
- consoles in the future.
-
-
- When using kgdboc with a uart, the uart driver must implement two callbacks in the struct uart_ops. Example from drivers/8250.c:
-#ifdef CONFIG_CONSOLE_POLL
- .poll_get_char = serial8250_get_poll_char,
- .poll_put_char = serial8250_put_poll_char,
-#endif
-
- Any implementation specifics around creating a polling driver use the
- #ifdef CONFIG_CONSOLE_POLL, as shown above.
- Keep in mind that polling hooks have to be implemented in such a way
- that they can be called from an atomic context and have to restore
- the state of the uart chip on return such that the system can return
- to normal when the debugger detaches. You need to be very careful
- with any kind of lock you consider, because failing here is most
- going to mean pressing the reset button.
-
-
-
-
- Credits
-
- The following people have contributed to this document:
-
- Amit Kaleamitkale@linsyssoft.com
- Tom Rinitrini@kernel.crashing.org
-
- In March 2008 this document was completely rewritten by:
-
- Jason Wesseljason.wessel@windriver.com
-
-
-
-
-
diff --git a/trunk/Documentation/feature-removal-schedule.txt b/trunk/Documentation/feature-removal-schedule.txt
index 164c89394cff..bf0e3df8e7a1 100644
--- a/trunk/Documentation/feature-removal-schedule.txt
+++ b/trunk/Documentation/feature-removal-schedule.txt
@@ -212,7 +212,7 @@ Who: Stephen Hemminger
---------------------------
What: i386/x86_64 bzImage symlinks
-When: April 2010
+When: April 2008
Why: The i386/x86_64 merge provides a symlink to the old bzImage
location so not yet updated user space tools, e.g. package
diff --git a/trunk/Documentation/timers/timer_stats.txt b/trunk/Documentation/hrtimer/timer_stats.txt
similarity index 100%
rename from trunk/Documentation/timers/timer_stats.txt
rename to trunk/Documentation/hrtimer/timer_stats.txt
diff --git a/trunk/Documentation/timers/highres.txt b/trunk/Documentation/hrtimers/highres.txt
similarity index 100%
rename from trunk/Documentation/timers/highres.txt
rename to trunk/Documentation/hrtimers/highres.txt
diff --git a/trunk/Documentation/timers/hrtimers.txt b/trunk/Documentation/hrtimers/hrtimers.txt
similarity index 100%
rename from trunk/Documentation/timers/hrtimers.txt
rename to trunk/Documentation/hrtimers/hrtimers.txt
diff --git a/trunk/Documentation/i386/boot.txt b/trunk/Documentation/i386/boot.txt
index 2eb16100bb3f..fc49b79bc1ab 100644
--- a/trunk/Documentation/i386/boot.txt
+++ b/trunk/Documentation/i386/boot.txt
@@ -170,8 +170,6 @@ Offset Proto Name Meaning
0238/4 2.06+ cmdline_size Maximum size of the kernel command line
023C/4 2.07+ hardware_subarch Hardware subarchitecture
0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data
-0248/4 2.08+ payload_offset Offset of kernel payload
-024C/4 2.08+ payload_length Length of kernel payload
(1) For backwards compatibility, if the setup_sects field contains 0, the
real value is 4.
@@ -514,32 +512,6 @@ Protocol: 2.07+
A pointer to data that is specific to hardware subarch
-Field name: payload_offset
-Type: read
-Offset/size: 0x248/4
-Protocol: 2.08+
-
- If non-zero then this field contains the offset from the end of the
- real-mode code to the payload.
-
- The payload may be compressed. The format of both the compressed and
- uncompressed data should be determined using the standard magic
- numbers. Currently only gzip compressed ELF is used.
-
-Field name: payload_length
-Type: read
-Offset/size: 0x24c/4
-Protocol: 2.08+
-
- The length of the payload.
-
-**** THE IMAGE CHECKSUM
-
-From boot protocol version 2.08 onwards the CRC-32 is calculated over
-the entire file using the characteristic polynomial 0x04C11DB7 and an
-initial remainder of 0xffffffff. The checksum is appended to the
-file; therefore the CRC of the file up to the limit specified in the
-syssize field of the header is always 0.
**** THE KERNEL COMMAND LINE
diff --git a/trunk/Documentation/ide/ide.txt b/trunk/Documentation/ide/ide.txt
index 818676aad45a..ce7c01af30f6 100644
--- a/trunk/Documentation/ide/ide.txt
+++ b/trunk/Documentation/ide/ide.txt
@@ -269,8 +269,6 @@ Summary of ide driver parameters for kernel command line
ability to bit test for detection is currently
unknown.
- "ide=reverse" : formerly called to pci sub-system, but now local.
-
"ide=doubler" : probe/support IDE doublers on Amiga
There may be more options than shown -- use the source, Luke!
diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt
index c867f506366d..228d85a3f319 100644
--- a/trunk/Documentation/kernel-parameters.txt
+++ b/trunk/Documentation/kernel-parameters.txt
@@ -763,7 +763,7 @@ and is between 256 and 4096 characters. It is defined in the file
Format: [,[,[,]]]
ide= [HW] (E)IDE subsystem
- Format: ide=nodma or ide=doubler or ide=reverse
+ Format: ide=nodma or ide=doubler
See Documentation/ide/ide.txt.
ide?= [HW] (E)IDE subsystem
@@ -812,19 +812,6 @@ and is between 256 and 4096 characters. It is defined in the file
inttest= [IA64]
- iommu= [x86]
- off
- force
- noforce
- biomerge
- panic
- nopanic
- merge
- nomerge
- forcesac
- soft
-
-
intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option
off
Disable intel iommu driver.
@@ -941,11 +928,6 @@ and is between 256 and 4096 characters. It is defined in the file
kstack=N [X86-32,X86-64] Print N words from the kernel stack
in oops dumps.
- kgdboc= [HW] kgdb over consoles.
- Requires a tty driver that supports console polling.
- (only serial suported for now)
- Format: [,baud]
-
l2cr= [PPC]
lapic [X86-32,APIC] Enable the local APIC even if BIOS
@@ -1152,11 +1134,6 @@ and is between 256 and 4096 characters. It is defined in the file
or
memmap=0x10000$0x18690000
- memtest= [KNL,X86_64] Enable memtest
- Format:
- range: 0,4 : pattern number
- default : 0
-
meye.*= [HW] Set MotionEye Camera parameters
See Documentation/video4linux/meye.txt.
diff --git a/trunk/Documentation/s390/s390dbf.txt b/trunk/Documentation/s390/s390dbf.txt
index e05420973698..0eb7c58916de 100644
--- a/trunk/Documentation/s390/s390dbf.txt
+++ b/trunk/Documentation/s390/s390dbf.txt
@@ -115,27 +115,6 @@ Return Value: Handle for generated debug area
Description: Allocates memory for a debug log
Must not be called within an interrupt handler
-----------------------------------------------------------------------------
-debug_info_t *debug_register_mode(char *name, int pages, int nr_areas,
- int buf_size, mode_t mode, uid_t uid,
- gid_t gid);
-
-Parameter: name: Name of debug log (e.g. used for debugfs entry)
- pages: Number of pages, which will be allocated per area
- nr_areas: Number of debug areas
- buf_size: Size of data area in each debug entry
- mode: File mode for debugfs files. E.g. S_IRWXUGO
- uid: User ID for debugfs files. Currently only 0 is
- supported.
- gid: Group ID for debugfs files. Currently only 0 is
- supported.
-
-Return Value: Handle for generated debug area
- NULL if register failed
-
-Description: Allocates memory for a debug log
- Must not be called within an interrupt handler
-
---------------------------------------------------------------------------
void debug_unregister (debug_info_t * id);
diff --git a/trunk/Documentation/x86/pat.txt b/trunk/Documentation/x86/pat.txt
deleted file mode 100644
index 17965f927c15..000000000000
--- a/trunk/Documentation/x86/pat.txt
+++ /dev/null
@@ -1,100 +0,0 @@
-
-PAT (Page Attribute Table)
-
-x86 Page Attribute Table (PAT) allows for setting the memory attribute at the
-page level granularity. PAT is complementary to the MTRR settings which allows
-for setting of memory types over physical address ranges. However, PAT is
-more flexible than MTRR due to its capability to set attributes at page level
-and also due to the fact that there are no hardware limitations on number of
-such attribute settings allowed. Added flexibility comes with guidelines for
-not having memory type aliasing for the same physical memory with multiple
-virtual addresses.
-
-PAT allows for different types of memory attributes. The most commonly used
-ones that will be supported at this time are Write-back, Uncached,
-Write-combined and Uncached Minus.
-
-There are many different APIs in the kernel that allows setting of memory
-attributes at the page level. In order to avoid aliasing, these interfaces
-should be used thoughtfully. Below is a table of interfaces available,
-their intended usage and their memory attribute relationships. Internally,
-these APIs use a reserve_memtype()/free_memtype() interface on the physical
-address range to avoid any aliasing.
-
-
--------------------------------------------------------------------
-API | RAM | ACPI,... | Reserved/Holes |
------------------------|----------|------------|------------------|
- | | | |
-ioremap | -- | UC | UC |
- | | | |
-ioremap_cache | -- | WB | WB |
- | | | |
-ioremap_nocache | -- | UC | UC |
- | | | |
-ioremap_wc | -- | -- | WC |
- | | | |
-set_memory_uc | UC | -- | -- |
- set_memory_wb | | | |
- | | | |
-set_memory_wc | WC | -- | -- |
- set_memory_wb | | | |
- | | | |
-pci sysfs resource | -- | -- | UC |
- | | | |
-pci sysfs resource_wc | -- | -- | WC |
- is IORESOURCE_PREFETCH| | | |
- | | | |
-pci proc | -- | -- | UC |
- !PCIIOC_WRITE_COMBINE | | | |
- | | | |
-pci proc | -- | -- | WC |
- PCIIOC_WRITE_COMBINE | | | |
- | | | |
-/dev/mem | -- | UC | UC |
- read-write | | | |
- | | | |
-/dev/mem | -- | UC | UC |
- mmap SYNC flag | | | |
- | | | |
-/dev/mem | -- | WB/WC/UC | WB/WC/UC |
- mmap !SYNC flag | |(from exist-| (from exist- |
- and | | ing alias)| ing alias) |
- any alias to this area| | | |
- | | | |
-/dev/mem | -- | WB | WB |
- mmap !SYNC flag | | | |
- no alias to this area | | | |
- and | | | |
- MTRR says WB | | | |
- | | | |
-/dev/mem | -- | -- | UC_MINUS |
- mmap !SYNC flag | | | |
- no alias to this area | | | |
- and | | | |
- MTRR says !WB | | | |
- | | | |
--------------------------------------------------------------------
-
-Notes:
-
--- in the above table mean "Not suggested usage for the API". Some of the --'s
-are strictly enforced by the kernel. Some others are not really enforced
-today, but may be enforced in future.
-
-For ioremap and pci access through /sys or /proc - The actual type returned
-can be more restrictive, in case of any existing aliasing for that address.
-For example: If there is an existing uncached mapping, a new ioremap_wc can
-return uncached mapping in place of write-combine requested.
-
-set_memory_[uc|wc] and set_memory_wb should be used in pairs, where driver will
-first make a region uc or wc and switch it back to wb after use.
-
-Over time writes to /proc/mtrr will be deprecated in favor of using PAT based
-interfaces. Users writing to /proc/mtrr are suggested to use above interfaces.
-
-Drivers should use ioremap_[uc|wc] to access PCI BARs with [uc|wc] access
-types.
-
-Drivers should use set_memory_[uc|wc] to set access type for RAM ranges.
-
diff --git a/trunk/Documentation/x86_64/boot-options.txt b/trunk/Documentation/x86_64/boot-options.txt
index b0c7b6c4abda..34abae4e9442 100644
--- a/trunk/Documentation/x86_64/boot-options.txt
+++ b/trunk/Documentation/x86_64/boot-options.txt
@@ -307,8 +307,3 @@ Debugging
stuck (default)
Miscellaneous
-
- nogbpages
- Do not use GB pages for kernel direct mappings.
- gbpages
- Use GB pages for kernel direct mappings.
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
index 3eceebb48c92..e46775868019 100644
--- a/trunk/MAINTAINERS
+++ b/trunk/MAINTAINERS
@@ -2319,12 +2319,6 @@ L: linux-kernel@vger.kernel.org
L: kexec@lists.infradead.org
S: Maintained
-KGDB
-P: Jason Wessel
-M: jason.wessel@windriver.com
-L: kgdb-bugreport@lists.sourceforge.net
-S: Maintained
-
KPROBES
P: Ananth N Mavinakayanahalli
M: ananth@in.ibm.com
diff --git a/trunk/arch/alpha/kernel/Makefile b/trunk/arch/alpha/kernel/Makefile
index ac706c1d7ada..dccf05245d4d 100644
--- a/trunk/arch/alpha/kernel/Makefile
+++ b/trunk/arch/alpha/kernel/Makefile
@@ -7,7 +7,7 @@ EXTRA_AFLAGS := $(KBUILD_CFLAGS)
EXTRA_CFLAGS := -Werror -Wno-sign-compare
obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \
- irq_alpha.o signal.o setup.o ptrace.o time.o \
+ irq_alpha.o signal.o setup.o ptrace.o time.o semaphore.o \
alpha_ksyms.o systbls.o err_common.o io.o
obj-$(CONFIG_VGA_HOSE) += console.o
diff --git a/trunk/arch/alpha/kernel/alpha_ksyms.c b/trunk/arch/alpha/kernel/alpha_ksyms.c
index d96e742d4dc2..e9762a33b043 100644
--- a/trunk/arch/alpha/kernel/alpha_ksyms.c
+++ b/trunk/arch/alpha/kernel/alpha_ksyms.c
@@ -77,6 +77,15 @@ EXPORT_SYMBOL(__do_clear_user);
EXPORT_SYMBOL(__strncpy_from_user);
EXPORT_SYMBOL(__strnlen_user);
+/* Semaphore helper functions. */
+EXPORT_SYMBOL(__down_failed);
+EXPORT_SYMBOL(__down_failed_interruptible);
+EXPORT_SYMBOL(__up_wakeup);
+EXPORT_SYMBOL(down);
+EXPORT_SYMBOL(down_interruptible);
+EXPORT_SYMBOL(down_trylock);
+EXPORT_SYMBOL(up);
+
/*
* SMP-specific symbols.
*/
diff --git a/trunk/arch/alpha/kernel/semaphore.c b/trunk/arch/alpha/kernel/semaphore.c
new file mode 100644
index 000000000000..8d2982aa1b8d
--- /dev/null
+++ b/trunk/arch/alpha/kernel/semaphore.c
@@ -0,0 +1,224 @@
+/*
+ * Alpha semaphore implementation.
+ *
+ * (C) Copyright 1996 Linus Torvalds
+ * (C) Copyright 1999, 2000 Richard Henderson
+ */
+
+#include
+#include
+#include
+
+/*
+ * This is basically the PPC semaphore scheme ported to use
+ * the Alpha ll/sc sequences, so see the PPC code for
+ * credits.
+ */
+
+/*
+ * Atomically update sem->count.
+ * This does the equivalent of the following:
+ *
+ * old_count = sem->count;
+ * tmp = MAX(old_count, 0) + incr;
+ * sem->count = tmp;
+ * return old_count;
+ */
+static inline int __sem_update_count(struct semaphore *sem, int incr)
+{
+ long old_count, tmp = 0;
+
+ __asm__ __volatile__(
+ "1: ldl_l %0,%2\n"
+ " cmovgt %0,%0,%1\n"
+ " addl %1,%3,%1\n"
+ " stl_c %1,%2\n"
+ " beq %1,2f\n"
+ " mb\n"
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
+ : "Ir" (incr), "1" (tmp), "m" (sem->count));
+
+ return old_count;
+}
+
+/*
+ * Perform the "down" function. Return zero for semaphore acquired,
+ * return negative for signalled out of the function.
+ *
+ * If called from down, the return is ignored and the wait loop is
+ * not interruptible. This means that a task waiting on a semaphore
+ * using "down()" cannot be killed until someone does an "up()" on
+ * the semaphore.
+ *
+ * If called from down_interruptible, the return value gets checked
+ * upon return. If the return value is negative then the task continues
+ * with the negative value in the return register (it can be tested by
+ * the caller).
+ *
+ * Either form may be used in conjunction with "up()".
+ */
+
+void __sched
+__down_failed(struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): down failed(%p)\n",
+ tsk->comm, task_pid_nr(tsk), sem);
+#endif
+
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ wmb();
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ /*
+ * Try to get the semaphore. If the count is > 0, then we've
+ * got the semaphore; we decrement count and exit the loop.
+ * If the count is 0 or negative, we set it to -1, indicating
+ * that we are asleep, and then sleep.
+ */
+ while (__sem_update_count(sem, -1) <= 0) {
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+
+ /*
+ * If there are any more sleepers, wake one of them up so
+ * that it can either get the semaphore, or set count to -1
+ * indicating that there are still processes sleeping.
+ */
+ wake_up(&sem->wait);
+
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): down acquired(%p)\n",
+ tsk->comm, task_pid_nr(tsk), sem);
+#endif
+}
+
+int __sched
+__down_failed_interruptible(struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ long ret = 0;
+
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): down failed(%p)\n",
+ tsk->comm, task_pid_nr(tsk), sem);
+#endif
+
+ tsk->state = TASK_INTERRUPTIBLE;
+ wmb();
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ while (__sem_update_count(sem, -1) <= 0) {
+ if (signal_pending(current)) {
+ /*
+ * A signal is pending - give up trying.
+ * Set sem->count to 0 if it is negative,
+ * since we are no longer sleeping.
+ */
+ __sem_update_count(sem, 0);
+ ret = -EINTR;
+ break;
+ }
+ schedule();
+ set_task_state(tsk, TASK_INTERRUPTIBLE);
+ }
+
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+ wake_up(&sem->wait);
+
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): down %s(%p)\n",
+ current->comm, task_pid_nr(current),
+ (ret < 0 ? "interrupted" : "acquired"), sem);
+#endif
+ return ret;
+}
+
+void
+__up_wakeup(struct semaphore *sem)
+{
+ /*
+ * Note that we incremented count in up() before we came here,
+ * but that was ineffective since the result was <= 0, and
+ * any negative value of count is equivalent to 0.
+ * This ends up setting count to 1, unless count is now > 0
+ * (i.e. because some other cpu has called up() in the meantime),
+ * in which case we just increment count.
+ */
+ __sem_update_count(sem, 1);
+ wake_up(&sem->wait);
+}
+
+void __sched
+down(struct semaphore *sem)
+{
+#ifdef WAITQUEUE_DEBUG
+ CHECK_MAGIC(sem->__magic);
+#endif
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): down(%p) from %p\n",
+ current->comm, task_pid_nr(current), sem,
+ atomic_read(&sem->count), __builtin_return_address(0));
+#endif
+ __down(sem);
+}
+
+int __sched
+down_interruptible(struct semaphore *sem)
+{
+#ifdef WAITQUEUE_DEBUG
+ CHECK_MAGIC(sem->__magic);
+#endif
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): down(%p) from %p\n",
+ current->comm, task_pid_nr(current), sem,
+ atomic_read(&sem->count), __builtin_return_address(0));
+#endif
+ return __down_interruptible(sem);
+}
+
+int
+down_trylock(struct semaphore *sem)
+{
+ int ret;
+
+#ifdef WAITQUEUE_DEBUG
+ CHECK_MAGIC(sem->__magic);
+#endif
+
+ ret = __down_trylock(sem);
+
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): down_trylock %s from %p\n",
+ current->comm, task_pid_nr(current),
+ ret ? "failed" : "acquired",
+ __builtin_return_address(0));
+#endif
+
+ return ret;
+}
+
+void
+up(struct semaphore *sem)
+{
+#ifdef WAITQUEUE_DEBUG
+ CHECK_MAGIC(sem->__magic);
+#endif
+#ifdef CONFIG_DEBUG_SEMAPHORE
+ printk("%s(%d): up(%p) from %p\n",
+ current->comm, task_pid_nr(current), sem,
+ atomic_read(&sem->count), __builtin_return_address(0));
+#endif
+ __up(sem);
+}
diff --git a/trunk/arch/arm/kernel/Makefile b/trunk/arch/arm/kernel/Makefile
index 6235f72a14f0..00d44c6fbfe9 100644
--- a/trunk/arch/arm/kernel/Makefile
+++ b/trunk/arch/arm/kernel/Makefile
@@ -7,7 +7,7 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
# Object file lists.
obj-y := compat.o entry-armv.o entry-common.o irq.o \
- process.o ptrace.o setup.o signal.o \
+ process.o ptrace.o semaphore.o setup.o signal.o \
sys_arm.o stacktrace.o time.o traps.o
obj-$(CONFIG_ISA_DMA_API) += dma.o
diff --git a/trunk/arch/arm/kernel/semaphore.c b/trunk/arch/arm/kernel/semaphore.c
new file mode 100644
index 000000000000..981fe5c6ccbe
--- /dev/null
+++ b/trunk/arch/arm/kernel/semaphore.c
@@ -0,0 +1,221 @@
+/*
+ * ARM semaphore implementation, taken from
+ *
+ * i386 semaphore implementation.
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ *
+ * Modified for ARM by Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include
+#include
+#include
+#include
+
+#include
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is
+ * protected by the semaphore spinlock.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ * - only on a boundary condition do we need to care. When we go
+ * from a negative count to a non-negative, we wake people up.
+ * - when we go from a non-negative count to a negative do we
+ * (a) synchronize with the "sleeper" count and (b) make sure
+ * that we're on the wakeup list before we synchronize so that
+ * we cannot lose wakeup events.
+ */
+
+void __up(struct semaphore *sem)
+{
+ wake_up(&sem->wait);
+}
+
+static DEFINE_SPINLOCK(semaphore_lock);
+
+void __sched __down(struct semaphore * sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ spin_lock_irq(&semaphore_lock);
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irq(&semaphore_lock);
+
+ schedule();
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ spin_lock_irq(&semaphore_lock);
+ }
+ spin_unlock_irq(&semaphore_lock);
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+ wake_up(&sem->wait);
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ tsk->state = TASK_INTERRUPTIBLE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ spin_lock_irq(&semaphore_lock);
+ sem->sleepers ++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * With signals pending, this turns into
+ * the trylock failure case - we won't be
+ * sleeping, and we* can't get the lock as
+ * it has contention. Just correct the count
+ * and exit.
+ */
+ if (signal_pending(current)) {
+ retval = -EINTR;
+ sem->sleepers = 0;
+ atomic_add(sleepers, &sem->count);
+ break;
+ }
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock. The
+ * "-1" is because we're still hoping to get
+ * the lock.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irq(&semaphore_lock);
+
+ schedule();
+ tsk->state = TASK_INTERRUPTIBLE;
+ spin_lock_irq(&semaphore_lock);
+ }
+ spin_unlock_irq(&semaphore_lock);
+ tsk->state = TASK_RUNNING;
+ remove_wait_queue(&sem->wait, &wait);
+ wake_up(&sem->wait);
+ return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for
+ * having decremented the count.
+ *
+ * We could have done the trylock with a
+ * single "cmpxchg" without failure cases,
+ * but then it wouldn't work on a 386.
+ */
+int __down_trylock(struct semaphore * sem)
+{
+ int sleepers;
+ unsigned long flags;
+
+ spin_lock_irqsave(&semaphore_lock, flags);
+ sleepers = sem->sleepers + 1;
+ sem->sleepers = 0;
+
+ /*
+ * Add "everybody else" and us into it. They aren't
+ * playing, because we own the spinlock.
+ */
+ if (!atomic_add_negative(sleepers, &sem->count))
+ wake_up(&sem->wait);
+
+ spin_unlock_irqrestore(&semaphore_lock, flags);
+ return 1;
+}
+
+/*
+ * The semaphore operations have a special calling sequence that
+ * allow us to do a simpler in-line version of them. These routines
+ * need to convert that sequence back into the C sequence when
+ * there is contention on the semaphore.
+ *
+ * ip contains the semaphore pointer on entry. Save the C-clobbered
+ * registers (r0 to r3 and lr), but not ip, as we use it as a return
+ * value in some cases..
+ * To remain AAPCS compliant (64-bit stack align) we save r4 as well.
+ */
+asm(" .section .sched.text,\"ax\",%progbits \n\
+ .align 5 \n\
+ .globl __down_failed \n\
+__down_failed: \n\
+ stmfd sp!, {r0 - r4, lr} \n\
+ mov r0, ip \n\
+ bl __down \n\
+ ldmfd sp!, {r0 - r4, pc} \n\
+ \n\
+ .align 5 \n\
+ .globl __down_interruptible_failed \n\
+__down_interruptible_failed: \n\
+ stmfd sp!, {r0 - r4, lr} \n\
+ mov r0, ip \n\
+ bl __down_interruptible \n\
+ mov ip, r0 \n\
+ ldmfd sp!, {r0 - r4, pc} \n\
+ \n\
+ .align 5 \n\
+ .globl __down_trylock_failed \n\
+__down_trylock_failed: \n\
+ stmfd sp!, {r0 - r4, lr} \n\
+ mov r0, ip \n\
+ bl __down_trylock \n\
+ mov ip, r0 \n\
+ ldmfd sp!, {r0 - r4, pc} \n\
+ \n\
+ .align 5 \n\
+ .globl __up_wakeup \n\
+__up_wakeup: \n\
+ stmfd sp!, {r0 - r4, lr} \n\
+ mov r0, ip \n\
+ bl __up \n\
+ ldmfd sp!, {r0 - r4, pc} \n\
+ ");
+
+EXPORT_SYMBOL(__down_failed);
+EXPORT_SYMBOL(__down_interruptible_failed);
+EXPORT_SYMBOL(__down_trylock_failed);
+EXPORT_SYMBOL(__up_wakeup);
diff --git a/trunk/arch/avr32/kernel/Makefile b/trunk/arch/avr32/kernel/Makefile
index 18229d0d1861..e4b6d122b033 100644
--- a/trunk/arch/avr32/kernel/Makefile
+++ b/trunk/arch/avr32/kernel/Makefile
@@ -6,7 +6,7 @@ extra-y := head.o vmlinux.lds
obj-$(CONFIG_SUBARCH_AVR32B) += entry-avr32b.o
obj-y += syscall_table.o syscall-stubs.o irq.o
-obj-y += setup.o traps.o ocd.o ptrace.o
+obj-y += setup.o traps.o semaphore.o ocd.o ptrace.o
obj-y += signal.o sys_avr32.o process.o time.o
obj-y += init_task.o switch_to.o cpu.o
obj-$(CONFIG_MODULES) += module.o avr32_ksyms.o
diff --git a/trunk/arch/avr32/kernel/semaphore.c b/trunk/arch/avr32/kernel/semaphore.c
new file mode 100644
index 000000000000..1e2705a05016
--- /dev/null
+++ b/trunk/arch/avr32/kernel/semaphore.c
@@ -0,0 +1,148 @@
+/*
+ * AVR32 sempahore implementation.
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/arch/i386/kernel/semaphore.c
+ * Copyright (C) 1999 Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include
+#include
+#include
+
+#include
+#include
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is protected
+ * by the spinlock in the semaphore's waitqueue head.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ * - only on a boundary condition do we need to care. When we go
+ * from a negative count to a non-negative, we wake people up.
+ * - when we go from a non-negative count to a negative do we
+ * (a) synchronize with the "sleeper" count and (b) make sure
+ * that we're on the wakeup list before we synchronize so that
+ * we cannot lose wakeup events.
+ */
+
+void __up(struct semaphore *sem)
+{
+ wake_up(&sem->wait);
+}
+EXPORT_SYMBOL(__up);
+
+void __sched __down(struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * the wait_queue_head.
+ */
+ if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+ tsk->state = TASK_RUNNING;
+}
+EXPORT_SYMBOL(__down);
+
+int __sched __down_interruptible(struct semaphore *sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_INTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * With signals pending, this turns into the trylock
+ * failure case - we won't be sleeping, and we can't
+ * get the lock as it has contention. Just correct the
+ * count and exit.
+ */
+ if (signal_pending(current)) {
+ retval = -EINTR;
+ sem->sleepers = 0;
+ atomic_add(sleepers, &sem->count);
+ break;
+ }
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * the wait_queue_head.
+ */
+ if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_INTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ tsk->state = TASK_RUNNING;
+ return retval;
+}
+EXPORT_SYMBOL(__down_interruptible);
diff --git a/trunk/arch/blackfin/Kconfig b/trunk/arch/blackfin/Kconfig
index 2dd1f300a5cf..589c6aca4803 100644
--- a/trunk/arch/blackfin/Kconfig
+++ b/trunk/arch/blackfin/Kconfig
@@ -31,6 +31,10 @@ config ZONE_DMA
bool
default y
+config SEMAPHORE_SLEEPERS
+ bool
+ default y
+
config GENERIC_FIND_NEXT_BIT
bool
default y
diff --git a/trunk/arch/blackfin/kernel/bfin_ksyms.c b/trunk/arch/blackfin/kernel/bfin_ksyms.c
index 053edff6c0d8..0bfbb269e350 100644
--- a/trunk/arch/blackfin/kernel/bfin_ksyms.c
+++ b/trunk/arch/blackfin/kernel/bfin_ksyms.c
@@ -42,6 +42,11 @@ EXPORT_SYMBOL(ip_fast_csum);
EXPORT_SYMBOL(kernel_thread);
+EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_trylock);
+EXPORT_SYMBOL(__down_interruptible);
+
EXPORT_SYMBOL(is_in_rom);
EXPORT_SYMBOL(bfin_return_from_exception);
diff --git a/trunk/arch/cris/kernel/Makefile b/trunk/arch/cris/kernel/Makefile
index ee7bcd4d20b2..c8e8ea570989 100644
--- a/trunk/arch/cris/kernel/Makefile
+++ b/trunk/arch/cris/kernel/Makefile
@@ -5,7 +5,8 @@
extra-y := vmlinux.lds
-obj-y := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o
+obj-y := process.o traps.o irq.o ptrace.o setup.o \
+ time.o sys_cris.o semaphore.o
obj-$(CONFIG_MODULES) += crisksyms.o
obj-$(CONFIG_MODULES) += module.o
diff --git a/trunk/arch/cris/kernel/crisksyms.c b/trunk/arch/cris/kernel/crisksyms.c
index 7ac000f6a888..62f0e752915a 100644
--- a/trunk/arch/cris/kernel/crisksyms.c
+++ b/trunk/arch/cris/kernel/crisksyms.c
@@ -9,6 +9,7 @@
#include
#include
+#include
#include
#include
#include
@@ -48,6 +49,12 @@ EXPORT_SYMBOL(__negdi2);
EXPORT_SYMBOL(__ioremap);
EXPORT_SYMBOL(iounmap);
+/* Semaphore functions */
+EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_interruptible);
+EXPORT_SYMBOL(__down_trylock);
+
/* Userspace access functions */
EXPORT_SYMBOL(__copy_user_zeroing);
EXPORT_SYMBOL(__copy_user);
diff --git a/trunk/arch/cris/kernel/semaphore.c b/trunk/arch/cris/kernel/semaphore.c
new file mode 100644
index 000000000000..f137a439041f
--- /dev/null
+++ b/trunk/arch/cris/kernel/semaphore.c
@@ -0,0 +1,129 @@
+/*
+ * Generic semaphore code. Buyer beware. Do your own
+ * specific changes in
+ */
+
+#include
+#include
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to sleep, while the "waking" variable is
+ * incremented when the "up()" code goes to wake up waiting
+ * processes.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * waking_non_zero() (from asm/semaphore.h) must execute
+ * atomically.
+ *
+ * When __up() is called, the count was negative before
+ * incrementing it, and we need to wake up somebody.
+ *
+ * This routine adds one to the count of processes that need to
+ * wake up and exit. ALL waiting processes actually wake up but
+ * only the one that gets to the "waking" field first will gate
+ * through and acquire the semaphore. The others will go back
+ * to sleep.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+void __up(struct semaphore *sem)
+{
+ wake_one_more(sem);
+ wake_up(&sem->wait);
+}
+
+/*
+ * Perform the "down" function. Return zero for semaphore acquired,
+ * return negative for signalled out of the function.
+ *
+ * If called from __down, the return is ignored and the wait loop is
+ * not interruptible. This means that a task waiting on a semaphore
+ * using "down()" cannot be killed until someone does an "up()" on
+ * the semaphore.
+ *
+ * If called from __down_interruptible, the return value gets checked
+ * upon return. If the return value is negative then the task continues
+ * with the negative value in the return register (it can be tested by
+ * the caller).
+ *
+ * Either form may be used in conjunction with "up()".
+ *
+ */
+
+#define DOWN_VAR \
+ struct task_struct *tsk = current; \
+ wait_queue_t wait; \
+ init_waitqueue_entry(&wait, tsk);
+
+#define DOWN_HEAD(task_state) \
+ \
+ \
+ tsk->state = (task_state); \
+ add_wait_queue(&sem->wait, &wait); \
+ \
+ /* \
+ * Ok, we're set up. sem->count is known to be less than zero \
+ * so we must wait. \
+ * \
+ * We can let go the lock for purposes of waiting. \
+ * We re-acquire it after awaking so as to protect \
+ * all semaphore operations. \
+ * \
+ * If "up()" is called before we call waking_non_zero() then \
+ * we will catch it right away. If it is called later then \
+ * we will have to go through a wakeup cycle to catch it. \
+ * \
+ * Multiple waiters contend for the semaphore lock to see \
+ * who gets to gate through and who has to wait some more. \
+ */ \
+ for (;;) {
+
+#define DOWN_TAIL(task_state) \
+ tsk->state = (task_state); \
+ } \
+ tsk->state = TASK_RUNNING; \
+ remove_wait_queue(&sem->wait, &wait);
+
+void __sched __down(struct semaphore * sem)
+{
+ DOWN_VAR
+ DOWN_HEAD(TASK_UNINTERRUPTIBLE)
+ if (waking_non_zero(sem))
+ break;
+ schedule();
+ DOWN_TAIL(TASK_UNINTERRUPTIBLE)
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ int ret = 0;
+ DOWN_VAR
+ DOWN_HEAD(TASK_INTERRUPTIBLE)
+
+ ret = waking_non_zero_interruptible(sem, tsk);
+ if (ret)
+ {
+ if (ret == 1)
+ /* ret != 0 only if we get interrupted -arca */
+ ret = 0;
+ break;
+ }
+ schedule();
+ DOWN_TAIL(TASK_INTERRUPTIBLE)
+ return ret;
+}
+
+int __down_trylock(struct semaphore * sem)
+{
+ return waking_non_zero_trylock(sem);
+}
diff --git a/trunk/arch/frv/kernel/Makefile b/trunk/arch/frv/kernel/Makefile
index c36f70b6699a..e8f73ed28b52 100644
--- a/trunk/arch/frv/kernel/Makefile
+++ b/trunk/arch/frv/kernel/Makefile
@@ -9,7 +9,7 @@ extra-y:= head.o init_task.o vmlinux.lds
obj-y := $(heads-y) entry.o entry-table.o break.o switch_to.o kernel_thread.o \
kernel_execve.o process.o traps.o ptrace.o signal.o dma.o \
- sys_frv.o time.o setup.o frv_ksyms.o \
+ sys_frv.o time.o semaphore.o setup.o frv_ksyms.o \
debug-stub.o irq.o sleep.o uaccess.o
obj-$(CONFIG_GDBSTUB) += gdb-stub.o gdb-io.o
diff --git a/trunk/arch/frv/kernel/frv_ksyms.c b/trunk/arch/frv/kernel/frv_ksyms.c
index 0316b3c50eff..f772704b3d28 100644
--- a/trunk/arch/frv/kernel/frv_ksyms.c
+++ b/trunk/arch/frv/kernel/frv_ksyms.c
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/frv/kernel/semaphore.c b/trunk/arch/frv/kernel/semaphore.c
new file mode 100644
index 000000000000..7ee3a147b471
--- /dev/null
+++ b/trunk/arch/frv/kernel/semaphore.c
@@ -0,0 +1,155 @@
+/* semaphore.c: FR-V semaphores
+ *
+ * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from lib/rwsem-spinlock.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include
+#include
+#include
+
+struct sem_waiter {
+ struct list_head list;
+ struct task_struct *task;
+};
+
+#ifdef CONFIG_DEBUG_SEMAPHORE
+void semtrace(struct semaphore *sem, const char *str)
+{
+ if (sem->debug)
+ printk("[%d] %s({%d,%d})\n",
+ current->pid,
+ str,
+ sem->counter,
+ list_empty(&sem->wait_list) ? 0 : 1);
+}
+#else
+#define semtrace(SEM,STR) do { } while(0)
+#endif
+
+/*
+ * wait for a token to be granted from a semaphore
+ * - entered with lock held and interrupts disabled
+ */
+void __down(struct semaphore *sem, unsigned long flags)
+{
+ struct task_struct *tsk = current;
+ struct sem_waiter waiter;
+
+ semtrace(sem, "Entering __down");
+
+ /* set up my own style of waitqueue */
+ waiter.task = tsk;
+ get_task_struct(tsk);
+
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we don't need to touch the semaphore struct anymore */
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ /* wait to be given the semaphore */
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+
+ for (;;) {
+ if (list_empty(&waiter.list))
+ break;
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+
+ tsk->state = TASK_RUNNING;
+ semtrace(sem, "Leaving __down");
+}
+
+EXPORT_SYMBOL(__down);
+
+/*
+ * interruptibly wait for a token to be granted from a semaphore
+ * - entered with lock held and interrupts disabled
+ */
+int __down_interruptible(struct semaphore *sem, unsigned long flags)
+{
+ struct task_struct *tsk = current;
+ struct sem_waiter waiter;
+ int ret;
+
+ semtrace(sem,"Entering __down_interruptible");
+
+ /* set up my own style of waitqueue */
+ waiter.task = tsk;
+ get_task_struct(tsk);
+
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we don't need to touch the semaphore struct anymore */
+ set_task_state(tsk, TASK_INTERRUPTIBLE);
+
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ /* wait to be given the semaphore */
+ ret = 0;
+ for (;;) {
+ if (list_empty(&waiter.list))
+ break;
+ if (unlikely(signal_pending(current)))
+ goto interrupted;
+ schedule();
+ set_task_state(tsk, TASK_INTERRUPTIBLE);
+ }
+
+ out:
+ tsk->state = TASK_RUNNING;
+ semtrace(sem, "Leaving __down_interruptible");
+ return ret;
+
+ interrupted:
+ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ if (!list_empty(&waiter.list)) {
+ list_del(&waiter.list);
+ ret = -EINTR;
+ }
+
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+ if (ret == -EINTR)
+ put_task_struct(current);
+ goto out;
+}
+
+EXPORT_SYMBOL(__down_interruptible);
+
+/*
+ * release a single token back to a semaphore
+ * - entered with lock held and interrupts disabled
+ */
+void __up(struct semaphore *sem)
+{
+ struct task_struct *tsk;
+ struct sem_waiter *waiter;
+
+ semtrace(sem,"Entering __up");
+
+ /* grant the token to the process at the front of the queue */
+ waiter = list_entry(sem->wait_list.next, struct sem_waiter, list);
+
+ /* We must be careful not to touch 'waiter' after we set ->task = NULL.
+ * It is allocated on the waiter's stack and may become invalid at
+ * any time after that point (due to a wakeup from another source).
+ */
+ list_del_init(&waiter->list);
+ tsk = waiter->task;
+ mb();
+ waiter->task = NULL;
+ wake_up_process(tsk);
+ put_task_struct(tsk);
+
+ semtrace(sem,"Leaving __up");
+}
+
+EXPORT_SYMBOL(__up);
diff --git a/trunk/arch/h8300/kernel/Makefile b/trunk/arch/h8300/kernel/Makefile
index 6c248c3c5c3b..874f6aefee65 100644
--- a/trunk/arch/h8300/kernel/Makefile
+++ b/trunk/arch/h8300/kernel/Makefile
@@ -5,7 +5,7 @@
extra-y := vmlinux.lds
obj-y := process.o traps.o ptrace.o irq.o \
- sys_h8300.o time.o signal.o \
+ sys_h8300.o time.o semaphore.o signal.o \
setup.o gpio.o init_task.o syscalls.o \
entry.o
diff --git a/trunk/arch/h8300/kernel/h8300_ksyms.c b/trunk/arch/h8300/kernel/h8300_ksyms.c
index 6866bd9c7fb4..d1b15267ac81 100644
--- a/trunk/arch/h8300/kernel/h8300_ksyms.c
+++ b/trunk/arch/h8300/kernel/h8300_ksyms.c
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/h8300/kernel/semaphore.c b/trunk/arch/h8300/kernel/semaphore.c
new file mode 100644
index 000000000000..d12cbbfe6ebd
--- /dev/null
+++ b/trunk/arch/h8300/kernel/semaphore.c
@@ -0,0 +1,132 @@
+/*
+ * Generic semaphore code. Buyer beware. Do your own
+ * specific changes in
+ */
+
+#include
+#include
+#include
+
+#ifndef CONFIG_RMW_INSNS
+spinlock_t semaphore_wake_lock;
+#endif
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to sleep, while the "waking" variable is
+ * incremented when the "up()" code goes to wake up waiting
+ * processes.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * waking_non_zero() (from asm/semaphore.h) must execute
+ * atomically.
+ *
+ * When __up() is called, the count was negative before
+ * incrementing it, and we need to wake up somebody.
+ *
+ * This routine adds one to the count of processes that need to
+ * wake up and exit. ALL waiting processes actually wake up but
+ * only the one that gets to the "waking" field first will gate
+ * through and acquire the semaphore. The others will go back
+ * to sleep.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+void __up(struct semaphore *sem)
+{
+ wake_one_more(sem);
+ wake_up(&sem->wait);
+}
+
+/*
+ * Perform the "down" function. Return zero for semaphore acquired,
+ * return negative for signalled out of the function.
+ *
+ * If called from __down, the return is ignored and the wait loop is
+ * not interruptible. This means that a task waiting on a semaphore
+ * using "down()" cannot be killed until someone does an "up()" on
+ * the semaphore.
+ *
+ * If called from __down_interruptible, the return value gets checked
+ * upon return. If the return value is negative then the task continues
+ * with the negative value in the return register (it can be tested by
+ * the caller).
+ *
+ * Either form may be used in conjunction with "up()".
+ *
+ */
+
+
+#define DOWN_HEAD(task_state) \
+ \
+ \
+ current->state = (task_state); \
+ add_wait_queue(&sem->wait, &wait); \
+ \
+ /* \
+ * Ok, we're set up. sem->count is known to be less than zero \
+ * so we must wait. \
+ * \
+ * We can let go the lock for purposes of waiting. \
+ * We re-acquire it after awaking so as to protect \
+ * all semaphore operations. \
+ * \
+ * If "up()" is called before we call waking_non_zero() then \
+ * we will catch it right away. If it is called later then \
+ * we will have to go through a wakeup cycle to catch it. \
+ * \
+ * Multiple waiters contend for the semaphore lock to see \
+ * who gets to gate through and who has to wait some more. \
+ */ \
+ for (;;) {
+
+#define DOWN_TAIL(task_state) \
+ current->state = (task_state); \
+ } \
+ current->state = TASK_RUNNING; \
+ remove_wait_queue(&sem->wait, &wait);
+
+void __sched __down(struct semaphore * sem)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ DOWN_HEAD(TASK_UNINTERRUPTIBLE)
+ if (waking_non_zero(sem))
+ break;
+ schedule();
+ DOWN_TAIL(TASK_UNINTERRUPTIBLE)
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ int ret = 0;
+
+ DOWN_HEAD(TASK_INTERRUPTIBLE)
+
+ ret = waking_non_zero_interruptible(sem, current);
+ if (ret)
+ {
+ if (ret == 1)
+ /* ret != 0 only if we get interrupted -arca */
+ ret = 0;
+ break;
+ }
+ schedule();
+ DOWN_TAIL(TASK_INTERRUPTIBLE)
+ return ret;
+}
+
+int __down_trylock(struct semaphore * sem)
+{
+ return waking_non_zero_trylock(sem);
+}
diff --git a/trunk/arch/ia64/kernel/Makefile b/trunk/arch/ia64/kernel/Makefile
index 13fd10e8699e..33e5a598672d 100644
--- a/trunk/arch/ia64/kernel/Makefile
+++ b/trunk/arch/ia64/kernel/Makefile
@@ -6,7 +6,7 @@ extra-y := head.o init_task.o vmlinux.lds
obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \
- salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
+ salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
unwind.o mca.o mca_asm.o topology.o
obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o
diff --git a/trunk/arch/ia64/kernel/ia64_ksyms.c b/trunk/arch/ia64/kernel/ia64_ksyms.c
index 6da1f20d7372..8e7193d55528 100644
--- a/trunk/arch/ia64/kernel/ia64_ksyms.c
+++ b/trunk/arch/ia64/kernel/ia64_ksyms.c
@@ -19,6 +19,12 @@ EXPORT_SYMBOL_GPL(empty_zero_page);
EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */
EXPORT_SYMBOL(csum_ipv6_magic);
+#include
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_interruptible);
+EXPORT_SYMBOL(__down_trylock);
+EXPORT_SYMBOL(__up);
+
#include
EXPORT_SYMBOL(clear_page);
diff --git a/trunk/arch/ia64/kernel/semaphore.c b/trunk/arch/ia64/kernel/semaphore.c
new file mode 100644
index 000000000000..2724ef3fbae2
--- /dev/null
+++ b/trunk/arch/ia64/kernel/semaphore.c
@@ -0,0 +1,165 @@
+/*
+ * IA-64 semaphore implementation (derived from x86 version).
+ *
+ * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
+ * David Mosberger-Tang
+ */
+
+/*
+ * Semaphores are implemented using a two-way counter: The "count"
+ * variable is decremented for each process that tries to acquire the
+ * semaphore, while the "sleepers" variable is a count of such
+ * acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can efficiently
+ * test if they need to do any extra work (up needs to do something
+ * only if count was negative before the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is protected
+ * by the spinlock in the semaphore's waitqueue head.
+ *
+ * Note that these functions are only called when there is contention
+ * on the lock, and as such all this is the "non-critical" part of the
+ * whole semaphore business. The critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+#include
+#include
+
+#include
+#include
+
+/*
+ * Logic:
+ * - Only on a boundary condition do we need to care. When we go
+ * from a negative count to a non-negative, we wake people up.
+ * - When we go from a non-negative count to a negative do we
+ * (a) synchronize with the "sleepers" count and (b) make sure
+ * that we're on the wakeup list before we synchronize so that
+ * we cannot lose wakeup events.
+ */
+
+void
+__up (struct semaphore *sem)
+{
+ wake_up(&sem->wait);
+}
+
+void __sched __down (struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * the wait_queue_head.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+ tsk->state = TASK_RUNNING;
+}
+
+int __sched __down_interruptible (struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_INTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers ++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * With signals pending, this turns into
+ * the trylock failure case - we won't be
+ * sleeping, and we* can't get the lock as
+ * it has contention. Just correct the count
+ * and exit.
+ */
+ if (signal_pending(current)) {
+ retval = -EINTR;
+ sem->sleepers = 0;
+ atomic_add(sleepers, &sem->count);
+ break;
+ }
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * wait_queue_head. The "-1" is because we're
+ * still hoping to get the semaphore.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_INTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ tsk->state = TASK_RUNNING;
+ return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for having decremented the
+ * count.
+ */
+int
+__down_trylock (struct semaphore *sem)
+{
+ unsigned long flags;
+ int sleepers;
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ sleepers = sem->sleepers + 1;
+ sem->sleepers = 0;
+
+ /*
+ * Add "everybody else" and us into it. They aren't
+ * playing, because we own the spinlock in the
+ * wait_queue_head.
+ */
+ if (!atomic_add_negative(sleepers, &sem->count)) {
+ wake_up_locked(&sem->wait);
+ }
+
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+ return 1;
+}
diff --git a/trunk/arch/m32r/kernel/Makefile b/trunk/arch/m32r/kernel/Makefile
index 09200d4886e3..e97e26e87c9e 100644
--- a/trunk/arch/m32r/kernel/Makefile
+++ b/trunk/arch/m32r/kernel/Makefile
@@ -5,7 +5,7 @@
extra-y := head.o init_task.o vmlinux.lds
obj-y := process.o entry.o traps.o align.o irq.o setup.o time.o \
- m32r_ksyms.o sys_m32r.o signal.o ptrace.o
+ m32r_ksyms.o sys_m32r.o semaphore.o signal.o ptrace.o
obj-$(CONFIG_SMP) += smp.o smpboot.o
obj-$(CONFIG_MODULES) += module.o
diff --git a/trunk/arch/m32r/kernel/m32r_ksyms.c b/trunk/arch/m32r/kernel/m32r_ksyms.c
index e6709fe950ba..41a4c95e06d6 100644
--- a/trunk/arch/m32r/kernel/m32r_ksyms.c
+++ b/trunk/arch/m32r/kernel/m32r_ksyms.c
@@ -7,6 +7,7 @@
#include
#include
+#include
#include
#include
#include
@@ -21,6 +22,10 @@ EXPORT_SYMBOL(dump_fpu);
EXPORT_SYMBOL(__ioremap);
EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(kernel_thread);
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_interruptible);
+EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(__down_trylock);
/* Networking helper routines. */
/* Delay loops */
diff --git a/trunk/arch/m32r/kernel/semaphore.c b/trunk/arch/m32r/kernel/semaphore.c
new file mode 100644
index 000000000000..940c2d37cfd1
--- /dev/null
+++ b/trunk/arch/m32r/kernel/semaphore.c
@@ -0,0 +1,185 @@
+/*
+ * linux/arch/m32r/semaphore.c
+ * orig : i386 2.6.4
+ *
+ * M32R semaphore implementation.
+ *
+ * Copyright (c) 2002 - 2004 Hitoshi Yamamoto
+ */
+
+/*
+ * i386 semaphore implementation.
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ *
+ * Portions Copyright 1999 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * rw semaphores implemented November 1999 by Benjamin LaHaise
+ */
+#include
+#include
+#include
+#include
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is protected
+ * by the spinlock in the semaphore's waitqueue head.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ * - only on a boundary condition do we need to care. When we go
+ * from a negative count to a non-negative, we wake people up.
+ * - when we go from a non-negative count to a negative do we
+ * (a) synchronize with the "sleeper" count and (b) make sure
+ * that we're on the wakeup list before we synchronize so that
+ * we cannot lose wakeup events.
+ */
+
+asmlinkage void __up(struct semaphore *sem)
+{
+ wake_up(&sem->wait);
+}
+
+asmlinkage void __sched __down(struct semaphore * sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * the wait_queue_head.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+ tsk->state = TASK_RUNNING;
+}
+
+asmlinkage int __sched __down_interruptible(struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_INTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * With signals pending, this turns into
+ * the trylock failure case - we won't be
+ * sleeping, and we* can't get the lock as
+ * it has contention. Just correct the count
+ * and exit.
+ */
+ if (signal_pending(current)) {
+ retval = -EINTR;
+ sem->sleepers = 0;
+ atomic_add(sleepers, &sem->count);
+ break;
+ }
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * wait_queue_head. The "-1" is because we're
+ * still hoping to get the semaphore.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_INTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ tsk->state = TASK_RUNNING;
+ return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for
+ * having decremented the count.
+ *
+ * We could have done the trylock with a
+ * single "cmpxchg" without failure cases,
+ * but then it wouldn't work on a 386.
+ */
+asmlinkage int __down_trylock(struct semaphore * sem)
+{
+ int sleepers;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ sleepers = sem->sleepers + 1;
+ sem->sleepers = 0;
+
+ /*
+ * Add "everybody else" and us into it. They aren't
+ * playing, because we own the spinlock in the
+ * wait_queue_head.
+ */
+ if (!atomic_add_negative(sleepers, &sem->count)) {
+ wake_up_locked(&sem->wait);
+ }
+
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+ return 1;
+}
diff --git a/trunk/arch/m68k/kernel/Makefile b/trunk/arch/m68k/kernel/Makefile
index 7a62a718143b..a806208c7fb5 100644
--- a/trunk/arch/m68k/kernel/Makefile
+++ b/trunk/arch/m68k/kernel/Makefile
@@ -10,7 +10,7 @@ endif
extra-y += vmlinux.lds
obj-y := entry.o process.o traps.o ints.o signal.o ptrace.o module.o \
- sys_m68k.o time.o setup.o m68k_ksyms.o devres.o
+ sys_m68k.o time.o semaphore.o setup.o m68k_ksyms.o devres.o
devres-y = ../../../kernel/irq/devres.o
diff --git a/trunk/arch/m68k/kernel/m68k_ksyms.c b/trunk/arch/m68k/kernel/m68k_ksyms.c
index d900e77e5363..6fc69c74fe2e 100644
--- a/trunk/arch/m68k/kernel/m68k_ksyms.c
+++ b/trunk/arch/m68k/kernel/m68k_ksyms.c
@@ -1,4 +1,5 @@
#include
+#include
asmlinkage long long __ashldi3 (long long, int);
asmlinkage long long __ashrdi3 (long long, int);
@@ -14,3 +15,8 @@ EXPORT_SYMBOL(__ashrdi3);
EXPORT_SYMBOL(__lshrdi3);
EXPORT_SYMBOL(__muldi3);
+EXPORT_SYMBOL(__down_failed);
+EXPORT_SYMBOL(__down_failed_interruptible);
+EXPORT_SYMBOL(__down_failed_trylock);
+EXPORT_SYMBOL(__up_wakeup);
+
diff --git a/trunk/arch/m68k/kernel/semaphore.c b/trunk/arch/m68k/kernel/semaphore.c
new file mode 100644
index 000000000000..d12cbbfe6ebd
--- /dev/null
+++ b/trunk/arch/m68k/kernel/semaphore.c
@@ -0,0 +1,132 @@
+/*
+ * Generic semaphore code. Buyer beware. Do your own
+ * specific changes in
+ */
+
+#include
+#include
+#include
+
+#ifndef CONFIG_RMW_INSNS
+spinlock_t semaphore_wake_lock;
+#endif
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to sleep, while the "waking" variable is
+ * incremented when the "up()" code goes to wake up waiting
+ * processes.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * waking_non_zero() (from asm/semaphore.h) must execute
+ * atomically.
+ *
+ * When __up() is called, the count was negative before
+ * incrementing it, and we need to wake up somebody.
+ *
+ * This routine adds one to the count of processes that need to
+ * wake up and exit. ALL waiting processes actually wake up but
+ * only the one that gets to the "waking" field first will gate
+ * through and acquire the semaphore. The others will go back
+ * to sleep.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+void __up(struct semaphore *sem)
+{
+ wake_one_more(sem);
+ wake_up(&sem->wait);
+}
+
+/*
+ * Perform the "down" function. Return zero for semaphore acquired,
+ * return negative for signalled out of the function.
+ *
+ * If called from __down, the return is ignored and the wait loop is
+ * not interruptible. This means that a task waiting on a semaphore
+ * using "down()" cannot be killed until someone does an "up()" on
+ * the semaphore.
+ *
+ * If called from __down_interruptible, the return value gets checked
+ * upon return. If the return value is negative then the task continues
+ * with the negative value in the return register (it can be tested by
+ * the caller).
+ *
+ * Either form may be used in conjunction with "up()".
+ *
+ */
+
+
+#define DOWN_HEAD(task_state) \
+ \
+ \
+ current->state = (task_state); \
+ add_wait_queue(&sem->wait, &wait); \
+ \
+ /* \
+ * Ok, we're set up. sem->count is known to be less than zero \
+ * so we must wait. \
+ * \
+ * We can let go the lock for purposes of waiting. \
+ * We re-acquire it after awaking so as to protect \
+ * all semaphore operations. \
+ * \
+ * If "up()" is called before we call waking_non_zero() then \
+ * we will catch it right away. If it is called later then \
+ * we will have to go through a wakeup cycle to catch it. \
+ * \
+ * Multiple waiters contend for the semaphore lock to see \
+ * who gets to gate through and who has to wait some more. \
+ */ \
+ for (;;) {
+
+#define DOWN_TAIL(task_state) \
+ current->state = (task_state); \
+ } \
+ current->state = TASK_RUNNING; \
+ remove_wait_queue(&sem->wait, &wait);
+
+void __sched __down(struct semaphore * sem)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ DOWN_HEAD(TASK_UNINTERRUPTIBLE)
+ if (waking_non_zero(sem))
+ break;
+ schedule();
+ DOWN_TAIL(TASK_UNINTERRUPTIBLE)
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ int ret = 0;
+
+ DOWN_HEAD(TASK_INTERRUPTIBLE)
+
+ ret = waking_non_zero_interruptible(sem, current);
+ if (ret)
+ {
+ if (ret == 1)
+ /* ret != 0 only if we get interrupted -arca */
+ ret = 0;
+ break;
+ }
+ schedule();
+ DOWN_TAIL(TASK_INTERRUPTIBLE)
+ return ret;
+}
+
+int __down_trylock(struct semaphore * sem)
+{
+ return waking_non_zero_trylock(sem);
+}
diff --git a/trunk/arch/m68k/lib/Makefile b/trunk/arch/m68k/lib/Makefile
index a18af095cd7c..6bbf19f96007 100644
--- a/trunk/arch/m68k/lib/Makefile
+++ b/trunk/arch/m68k/lib/Makefile
@@ -5,4 +5,4 @@
EXTRA_AFLAGS := -traditional
lib-y := ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
- checksum.o string.o uaccess.o
+ checksum.o string.o semaphore.o uaccess.o
diff --git a/trunk/arch/m68k/lib/semaphore.S b/trunk/arch/m68k/lib/semaphore.S
new file mode 100644
index 000000000000..0215624c1602
--- /dev/null
+++ b/trunk/arch/m68k/lib/semaphore.S
@@ -0,0 +1,53 @@
+/*
+ * linux/arch/m68k/lib/semaphore.S
+ *
+ * Copyright (C) 1996 Linus Torvalds
+ *
+ * m68k version by Andreas Schwab
+ */
+
+#include
+#include
+
+/*
+ * The semaphore operations have a special calling sequence that
+ * allow us to do a simpler in-line version of them. These routines
+ * need to convert that sequence back into the C sequence when
+ * there is contention on the semaphore.
+ */
+ENTRY(__down_failed)
+ moveml %a0/%d0/%d1,-(%sp)
+ movel %a1,-(%sp)
+ jbsr __down
+ movel (%sp)+,%a1
+ moveml (%sp)+,%a0/%d0/%d1
+ rts
+
+ENTRY(__down_failed_interruptible)
+ movel %a0,-(%sp)
+ movel %d1,-(%sp)
+ movel %a1,-(%sp)
+ jbsr __down_interruptible
+ movel (%sp)+,%a1
+ movel (%sp)+,%d1
+ movel (%sp)+,%a0
+ rts
+
+ENTRY(__down_failed_trylock)
+ movel %a0,-(%sp)
+ movel %d1,-(%sp)
+ movel %a1,-(%sp)
+ jbsr __down_trylock
+ movel (%sp)+,%a1
+ movel (%sp)+,%d1
+ movel (%sp)+,%a0
+ rts
+
+ENTRY(__up_wakeup)
+ moveml %a0/%d0/%d1,-(%sp)
+ movel %a1,-(%sp)
+ jbsr __up
+ movel (%sp)+,%a1
+ moveml (%sp)+,%a0/%d0/%d1
+ rts
+
diff --git a/trunk/arch/m68knommu/kernel/Makefile b/trunk/arch/m68knommu/kernel/Makefile
index f0eab3dedb5a..1524b39ad63f 100644
--- a/trunk/arch/m68knommu/kernel/Makefile
+++ b/trunk/arch/m68knommu/kernel/Makefile
@@ -5,7 +5,7 @@
extra-y := vmlinux.lds
obj-y += dma.o entry.o init_task.o irq.o m68k_ksyms.o process.o ptrace.o \
- setup.o signal.o syscalltable.o sys_m68k.o time.o traps.o
+ semaphore.o setup.o signal.o syscalltable.o sys_m68k.o time.o traps.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_COMEMPCI) += comempci.o
diff --git a/trunk/arch/m68knommu/kernel/m68k_ksyms.c b/trunk/arch/m68knommu/kernel/m68k_ksyms.c
index 39fe0a7aec32..53fad1490282 100644
--- a/trunk/arch/m68knommu/kernel/m68k_ksyms.c
+++ b/trunk/arch/m68knommu/kernel/m68k_ksyms.c
@@ -13,6 +13,7 @@
#include
#include
#include
+#include
#include
#include
@@ -38,6 +39,11 @@ EXPORT_SYMBOL(csum_partial_copy_nocheck);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(__down_failed);
+EXPORT_SYMBOL(__down_failed_interruptible);
+EXPORT_SYMBOL(__down_failed_trylock);
+EXPORT_SYMBOL(__up_wakeup);
+
/*
* libgcc functions - functions that are used internally by the
* compiler... (prototypes are not correct though, but that
diff --git a/trunk/arch/m68knommu/kernel/semaphore.c b/trunk/arch/m68knommu/kernel/semaphore.c
new file mode 100644
index 000000000000..bce2bc7d87c6
--- /dev/null
+++ b/trunk/arch/m68knommu/kernel/semaphore.c
@@ -0,0 +1,133 @@
+/*
+ * Generic semaphore code. Buyer beware. Do your own
+ * specific changes in
+ */
+
+#include
+#include
+#include
+#include
+
+#ifndef CONFIG_RMW_INSNS
+spinlock_t semaphore_wake_lock;
+#endif
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to sleep, while the "waking" variable is
+ * incremented when the "up()" code goes to wake up waiting
+ * processes.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * waking_non_zero() (from asm/semaphore.h) must execute
+ * atomically.
+ *
+ * When __up() is called, the count was negative before
+ * incrementing it, and we need to wake up somebody.
+ *
+ * This routine adds one to the count of processes that need to
+ * wake up and exit. ALL waiting processes actually wake up but
+ * only the one that gets to the "waking" field first will gate
+ * through and acquire the semaphore. The others will go back
+ * to sleep.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+void __up(struct semaphore *sem)
+{
+ wake_one_more(sem);
+ wake_up(&sem->wait);
+}
+
+/*
+ * Perform the "down" function. Return zero for semaphore acquired,
+ * return negative for signalled out of the function.
+ *
+ * If called from __down, the return is ignored and the wait loop is
+ * not interruptible. This means that a task waiting on a semaphore
+ * using "down()" cannot be killed until someone does an "up()" on
+ * the semaphore.
+ *
+ * If called from __down_interruptible, the return value gets checked
+ * upon return. If the return value is negative then the task continues
+ * with the negative value in the return register (it can be tested by
+ * the caller).
+ *
+ * Either form may be used in conjunction with "up()".
+ *
+ */
+
+
+#define DOWN_HEAD(task_state) \
+ \
+ \
+ current->state = (task_state); \
+ add_wait_queue(&sem->wait, &wait); \
+ \
+ /* \
+ * Ok, we're set up. sem->count is known to be less than zero \
+ * so we must wait. \
+ * \
+ * We can let go the lock for purposes of waiting. \
+ * We re-acquire it after awaking so as to protect \
+ * all semaphore operations. \
+ * \
+ * If "up()" is called before we call waking_non_zero() then \
+ * we will catch it right away. If it is called later then \
+ * we will have to go through a wakeup cycle to catch it. \
+ * \
+ * Multiple waiters contend for the semaphore lock to see \
+ * who gets to gate through and who has to wait some more. \
+ */ \
+ for (;;) {
+
+#define DOWN_TAIL(task_state) \
+ current->state = (task_state); \
+ } \
+ current->state = TASK_RUNNING; \
+ remove_wait_queue(&sem->wait, &wait);
+
+void __sched __down(struct semaphore * sem)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ DOWN_HEAD(TASK_UNINTERRUPTIBLE)
+ if (waking_non_zero(sem))
+ break;
+ schedule();
+ DOWN_TAIL(TASK_UNINTERRUPTIBLE)
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ int ret = 0;
+
+ DOWN_HEAD(TASK_INTERRUPTIBLE)
+
+ ret = waking_non_zero_interruptible(sem, current);
+ if (ret)
+ {
+ if (ret == 1)
+ /* ret != 0 only if we get interrupted -arca */
+ ret = 0;
+ break;
+ }
+ schedule();
+ DOWN_TAIL(TASK_INTERRUPTIBLE)
+ return ret;
+}
+
+int __down_trylock(struct semaphore * sem)
+{
+ return waking_non_zero_trylock(sem);
+}
diff --git a/trunk/arch/m68knommu/lib/Makefile b/trunk/arch/m68knommu/lib/Makefile
index d94d709665aa..e051a7913987 100644
--- a/trunk/arch/m68knommu/lib/Makefile
+++ b/trunk/arch/m68knommu/lib/Makefile
@@ -4,4 +4,4 @@
lib-y := ashldi3.o ashrdi3.o lshrdi3.o \
muldi3.o mulsi3.o divsi3.o udivsi3.o modsi3.o umodsi3.o \
- checksum.o memcpy.o memset.o delay.o
+ checksum.o semaphore.o memcpy.o memset.o delay.o
diff --git a/trunk/arch/m68knommu/lib/semaphore.S b/trunk/arch/m68knommu/lib/semaphore.S
new file mode 100644
index 000000000000..87c746034376
--- /dev/null
+++ b/trunk/arch/m68knommu/lib/semaphore.S
@@ -0,0 +1,66 @@
+/*
+ * linux/arch/m68k/lib/semaphore.S
+ *
+ * Copyright (C) 1996 Linus Torvalds
+ *
+ * m68k version by Andreas Schwab
+ *
+ * MAR/1999 -- modified to support ColdFire (gerg@snapgear.com)
+ */
+
+#include
+#include
+
+/*
+ * "down_failed" is called with the eventual return address
+ * in %a0, and the address of the semaphore in %a1. We need
+ * to increment the number of waiters on the semaphore,
+ * call "__down()", and then eventually return to try again.
+ */
+ENTRY(__down_failed)
+#ifdef CONFIG_COLDFIRE
+ subl #12,%sp
+ moveml %a0/%d0/%d1,(%sp)
+#else
+ moveml %a0/%d0/%d1,-(%sp)
+#endif
+ movel %a1,-(%sp)
+ jbsr __down
+ movel (%sp)+,%a1
+ movel (%sp)+,%d0
+ movel (%sp)+,%d1
+ rts
+
+ENTRY(__down_failed_interruptible)
+ movel %a0,-(%sp)
+ movel %d1,-(%sp)
+ movel %a1,-(%sp)
+ jbsr __down_interruptible
+ movel (%sp)+,%a1
+ movel (%sp)+,%d1
+ rts
+
+ENTRY(__up_wakeup)
+#ifdef CONFIG_COLDFIRE
+ subl #12,%sp
+ moveml %a0/%d0/%d1,(%sp)
+#else
+ moveml %a0/%d0/%d1,-(%sp)
+#endif
+ movel %a1,-(%sp)
+ jbsr __up
+ movel (%sp)+,%a1
+ movel (%sp)+,%d0
+ movel (%sp)+,%d1
+ rts
+
+ENTRY(__down_failed_trylock)
+ movel %a0,-(%sp)
+ movel %d1,-(%sp)
+ movel %a1,-(%sp)
+ jbsr __down_trylock
+ movel (%sp)+,%a1
+ movel (%sp)+,%d1
+ movel (%sp)+,%a0
+ rts
+
diff --git a/trunk/arch/mips/kernel/Makefile b/trunk/arch/mips/kernel/Makefile
index 6fcdb6fda2e2..9e78e1a4ca17 100644
--- a/trunk/arch/mips/kernel/Makefile
+++ b/trunk/arch/mips/kernel/Makefile
@@ -5,7 +5,7 @@
extra-y := head.o init_task.o vmlinux.lds
obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \
- ptrace.o reset.o setup.o signal.o syscall.o \
+ ptrace.o reset.o semaphore.o setup.o signal.o syscall.o \
time.o topology.o traps.o unaligned.o
obj-$(CONFIG_CEVT_BCM1480) += cevt-bcm1480.o
diff --git a/trunk/arch/mips/kernel/semaphore.c b/trunk/arch/mips/kernel/semaphore.c
new file mode 100644
index 000000000000..1265358cdca1
--- /dev/null
+++ b/trunk/arch/mips/kernel/semaphore.c
@@ -0,0 +1,168 @@
+/*
+ * MIPS-specific semaphore code.
+ *
+ * Copyright (C) 1999 Cort Dougan
+ * Copyright (C) 2004 Ralf Baechle
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * April 2001 - Reworked by Paul Mackerras
+ * to eliminate the SMP races in the old version between the updates
+ * of `count' and `waking'. Now we use negative `count' values to
+ * indicate that some process(es) are waiting for the semaphore.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+/*
+ * Atomically update sem->count.
+ * This does the equivalent of the following:
+ *
+ * old_count = sem->count;
+ * tmp = MAX(old_count, 0) + incr;
+ * sem->count = tmp;
+ * return old_count;
+ *
+ * On machines without lld/scd we need a spinlock to make the manipulation of
+ * sem->count and sem->waking atomic. Scalability isn't an issue because
+ * this lock is used on UP only so it's just an empty variable.
+ */
+static inline int __sem_update_count(struct semaphore *sem, int incr)
+{
+ int old_count, tmp;
+
+ if (cpu_has_llsc && R10000_LLSC_WAR) {
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: ll %0, %2 # __sem_update_count \n"
+ " sra %1, %0, 31 \n"
+ " not %1 \n"
+ " and %1, %0, %1 \n"
+ " addu %1, %1, %3 \n"
+ " sc %1, %2 \n"
+ " beqzl %1, 1b \n"
+ " .set mips0 \n"
+ : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
+ : "r" (incr), "m" (sem->count));
+ } else if (cpu_has_llsc) {
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: ll %0, %2 # __sem_update_count \n"
+ " sra %1, %0, 31 \n"
+ " not %1 \n"
+ " and %1, %0, %1 \n"
+ " addu %1, %1, %3 \n"
+ " sc %1, %2 \n"
+ " beqz %1, 1b \n"
+ " .set mips0 \n"
+ : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
+ : "r" (incr), "m" (sem->count));
+ } else {
+ static DEFINE_SPINLOCK(semaphore_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&semaphore_lock, flags);
+ old_count = atomic_read(&sem->count);
+ tmp = max_t(int, old_count, 0) + incr;
+ atomic_set(&sem->count, tmp);
+ spin_unlock_irqrestore(&semaphore_lock, flags);
+ }
+
+ return old_count;
+}
+
+void __up(struct semaphore *sem)
+{
+ /*
+ * Note that we incremented count in up() before we came here,
+ * but that was ineffective since the result was <= 0, and
+ * any negative value of count is equivalent to 0.
+ * This ends up setting count to 1, unless count is now > 0
+ * (i.e. because some other cpu has called up() in the meantime),
+ * in which case we just increment count.
+ */
+ __sem_update_count(sem, 1);
+ wake_up(&sem->wait);
+}
+
+EXPORT_SYMBOL(__up);
+
+/*
+ * Note that when we come in to __down or __down_interruptible,
+ * we have already decremented count, but that decrement was
+ * ineffective since the result was < 0, and any negative value
+ * of count is equivalent to 0.
+ * Thus it is only when we decrement count from some value > 0
+ * that we have actually got the semaphore.
+ */
+void __sched __down(struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ /*
+ * Try to get the semaphore. If the count is > 0, then we've
+ * got the semaphore; we decrement count and exit the loop.
+ * If the count is 0 or negative, we set it to -1, indicating
+ * that we are asleep, and then sleep.
+ */
+ while (__sem_update_count(sem, -1) <= 0) {
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ __set_task_state(tsk, TASK_RUNNING);
+
+ /*
+ * If there are any more sleepers, wake one of them up so
+ * that it can either get the semaphore, or set count to -1
+ * indicating that there are still processes sleeping.
+ */
+ wake_up(&sem->wait);
+}
+
+EXPORT_SYMBOL(__down);
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ __set_task_state(tsk, TASK_INTERRUPTIBLE);
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ while (__sem_update_count(sem, -1) <= 0) {
+ if (signal_pending(current)) {
+ /*
+ * A signal is pending - give up trying.
+ * Set sem->count to 0 if it is negative,
+ * since we are no longer sleeping.
+ */
+ __sem_update_count(sem, 0);
+ retval = -EINTR;
+ break;
+ }
+ schedule();
+ set_task_state(tsk, TASK_INTERRUPTIBLE);
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ __set_task_state(tsk, TASK_RUNNING);
+
+ wake_up(&sem->wait);
+ return retval;
+}
+
+EXPORT_SYMBOL(__down_interruptible);
diff --git a/trunk/arch/mn10300/kernel/Makefile b/trunk/arch/mn10300/kernel/Makefile
index 23f2ab67574c..ef07c956170a 100644
--- a/trunk/arch/mn10300/kernel/Makefile
+++ b/trunk/arch/mn10300/kernel/Makefile
@@ -3,7 +3,7 @@
#
extra-y := head.o init_task.o vmlinux.lds
-obj-y := process.o signal.o entry.o fpu.o traps.o irq.o \
+obj-y := process.o semaphore.o signal.o entry.o fpu.o traps.o irq.o \
ptrace.o setup.o time.o sys_mn10300.o io.o kthread.o \
switch_to.o mn10300_ksyms.o kernel_execve.o
diff --git a/trunk/arch/mn10300/kernel/semaphore.c b/trunk/arch/mn10300/kernel/semaphore.c
new file mode 100644
index 000000000000..9153c4039fd2
--- /dev/null
+++ b/trunk/arch/mn10300/kernel/semaphore.c
@@ -0,0 +1,149 @@
+/* MN10300 Semaphore implementation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include
+#include
+#include
+
+struct sem_waiter {
+ struct list_head list;
+ struct task_struct *task;
+};
+
+#if SEMAPHORE_DEBUG
+void semtrace(struct semaphore *sem, const char *str)
+{
+ if (sem->debug)
+ printk(KERN_DEBUG "[%d] %s({%d,%d})\n",
+ current->pid,
+ str,
+ atomic_read(&sem->count),
+ list_empty(&sem->wait_list) ? 0 : 1);
+}
+#else
+#define semtrace(SEM, STR) do { } while (0)
+#endif
+
+/*
+ * wait for a token to be granted from a semaphore
+ * - entered with lock held and interrupts disabled
+ */
+void __down(struct semaphore *sem, unsigned long flags)
+{
+ struct task_struct *tsk = current;
+ struct sem_waiter waiter;
+
+ semtrace(sem, "Entering __down");
+
+ /* set up my own style of waitqueue */
+ waiter.task = tsk;
+ get_task_struct(tsk);
+
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we don't need to touch the semaphore struct anymore */
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ /* wait to be given the semaphore */
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+
+ for (;;) {
+ if (!waiter.task)
+ break;
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+
+ tsk->state = TASK_RUNNING;
+ semtrace(sem, "Leaving __down");
+}
+EXPORT_SYMBOL(__down);
+
+/*
+ * interruptibly wait for a token to be granted from a semaphore
+ * - entered with lock held and interrupts disabled
+ */
+int __down_interruptible(struct semaphore *sem, unsigned long flags)
+{
+ struct task_struct *tsk = current;
+ struct sem_waiter waiter;
+ int ret;
+
+ semtrace(sem, "Entering __down_interruptible");
+
+ /* set up my own style of waitqueue */
+ waiter.task = tsk;
+ get_task_struct(tsk);
+
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we don't need to touch the semaphore struct anymore */
+ set_task_state(tsk, TASK_INTERRUPTIBLE);
+
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ /* wait to be given the semaphore */
+ ret = 0;
+ for (;;) {
+ if (!waiter.task)
+ break;
+ if (unlikely(signal_pending(current)))
+ goto interrupted;
+ schedule();
+ set_task_state(tsk, TASK_INTERRUPTIBLE);
+ }
+
+ out:
+ tsk->state = TASK_RUNNING;
+ semtrace(sem, "Leaving __down_interruptible");
+ return ret;
+
+ interrupted:
+ spin_lock_irqsave(&sem->wait_lock, flags);
+ list_del(&waiter.list);
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ ret = 0;
+ if (!waiter.task) {
+ put_task_struct(current);
+ ret = -EINTR;
+ }
+ goto out;
+}
+EXPORT_SYMBOL(__down_interruptible);
+
+/*
+ * release a single token back to a semaphore
+ * - entered with lock held and interrupts disabled
+ */
+void __up(struct semaphore *sem)
+{
+ struct task_struct *tsk;
+ struct sem_waiter *waiter;
+
+ semtrace(sem, "Entering __up");
+
+ /* grant the token to the process at the front of the queue */
+ waiter = list_entry(sem->wait_list.next, struct sem_waiter, list);
+
+ /* We must be careful not to touch 'waiter' after we set ->task = NULL.
+ * It is an allocated on the waiter's stack and may become invalid at
+ * any time after that point (due to a wakeup from another source).
+ */
+ list_del_init(&waiter->list);
+ tsk = waiter->task;
+ smp_mb();
+ waiter->task = NULL;
+ wake_up_process(tsk);
+ put_task_struct(tsk);
+
+ semtrace(sem, "Leaving __up");
+}
+EXPORT_SYMBOL(__up);
diff --git a/trunk/arch/parisc/kernel/Makefile b/trunk/arch/parisc/kernel/Makefile
index 1f6585a56f97..27827bc3717e 100644
--- a/trunk/arch/parisc/kernel/Makefile
+++ b/trunk/arch/parisc/kernel/Makefile
@@ -9,7 +9,7 @@ AFLAGS_pacache.o := -traditional
obj-y := cache.o pacache.o setup.o traps.o time.o irq.o \
pa7300lc.o syscall.o entry.o sys_parisc.o firmware.o \
- ptrace.o hardware.o inventory.o drivers.o \
+ ptrace.o hardware.o inventory.o drivers.o semaphore.o \
signal.o hpmc.o real2.o parisc_ksyms.o unaligned.o \
process.o processor.o pdc_cons.o pdc_chassis.o unwind.o \
topology.o
diff --git a/trunk/arch/parisc/kernel/parisc_ksyms.c b/trunk/arch/parisc/kernel/parisc_ksyms.c
index 5b7fc4aa044d..7aca704e96f0 100644
--- a/trunk/arch/parisc/kernel/parisc_ksyms.c
+++ b/trunk/arch/parisc/kernel/parisc_ksyms.c
@@ -69,6 +69,11 @@ EXPORT_SYMBOL(memcpy_toio);
EXPORT_SYMBOL(memcpy_fromio);
EXPORT_SYMBOL(memset_io);
+#include
+EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(__down_interruptible);
+EXPORT_SYMBOL(__down);
+
extern void $$divI(void);
extern void $$divU(void);
extern void $$remI(void);
diff --git a/trunk/arch/parisc/kernel/semaphore.c b/trunk/arch/parisc/kernel/semaphore.c
new file mode 100644
index 000000000000..ee806bcc3726
--- /dev/null
+++ b/trunk/arch/parisc/kernel/semaphore.c
@@ -0,0 +1,102 @@
+/*
+ * Semaphore implementation Copyright (c) 2001 Matthew Wilcox, Hewlett-Packard
+ */
+
+#include
+#include
+#include
+#include
+
+/*
+ * Semaphores are complex as we wish to avoid using two variables.
+ * `count' has multiple roles, depending on its value. If it is positive
+ * or zero, there are no waiters. The functions here will never be
+ * called; see
+ *
+ * When count is -1 it indicates there is at least one task waiting
+ * for the semaphore.
+ *
+ * When count is less than that, there are '- count - 1' wakeups
+ * pending. ie if it has value -3, there are 2 wakeups pending.
+ *
+ * Note that these functions are only called when there is contention
+ * on the lock, and as such all this is the "non-critical" part of the
+ * whole semaphore business. The critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+void __up(struct semaphore *sem)
+{
+ sem->count--;
+ wake_up(&sem->wait);
+}
+
+#define wakers(count) (-1 - count)
+
+#define DOWN_HEAD \
+ int ret = 0; \
+ DECLARE_WAITQUEUE(wait, current); \
+ \
+ /* Note that someone is waiting */ \
+ if (sem->count == 0) \
+ sem->count = -1; \
+ \
+ /* protected by the sentry still -- use unlocked version */ \
+ wait.flags = WQ_FLAG_EXCLUSIVE; \
+ __add_wait_queue_tail(&sem->wait, &wait); \
+ lost_race: \
+ spin_unlock_irq(&sem->sentry); \
+
+#define DOWN_TAIL \
+ spin_lock_irq(&sem->sentry); \
+ if (wakers(sem->count) == 0 && ret == 0) \
+ goto lost_race; /* Someone stole our wakeup */ \
+ __remove_wait_queue(&sem->wait, &wait); \
+ current->state = TASK_RUNNING; \
+ if (!waitqueue_active(&sem->wait) && (sem->count < 0)) \
+ sem->count = wakers(sem->count);
+
+#define UPDATE_COUNT \
+ sem->count += (sem->count < 0) ? 1 : - 1;
+
+
+void __sched __down(struct semaphore * sem)
+{
+ DOWN_HEAD
+
+ for(;;) {
+ set_task_state(current, TASK_UNINTERRUPTIBLE);
+ /* we can _read_ this without the sentry */
+ if (sem->count != -1)
+ break;
+ schedule();
+ }
+
+ DOWN_TAIL
+ UPDATE_COUNT
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ DOWN_HEAD
+
+ for(;;) {
+ set_task_state(current, TASK_INTERRUPTIBLE);
+ /* we can _read_ this without the sentry */
+ if (sem->count != -1)
+ break;
+
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ break;
+ }
+ schedule();
+ }
+
+ DOWN_TAIL
+
+ if (!ret) {
+ UPDATE_COUNT
+ }
+
+ return ret;
+}
diff --git a/trunk/arch/powerpc/kernel/Makefile b/trunk/arch/powerpc/kernel/Makefile
index b9dbfff9afe9..c1baf9d5903f 100644
--- a/trunk/arch/powerpc/kernel/Makefile
+++ b/trunk/arch/powerpc/kernel/Makefile
@@ -12,7 +12,7 @@ CFLAGS_prom_init.o += -fPIC
CFLAGS_btext.o += -fPIC
endif
-obj-y := cputable.o ptrace.o syscalls.o \
+obj-y := semaphore.o cputable.o ptrace.o syscalls.o \
irq.o align.o signal_32.o pmc.o vdso.o \
init_task.o process.o systbl.o idle.o \
signal.o
diff --git a/trunk/arch/powerpc/kernel/ppc_ksyms.c b/trunk/arch/powerpc/kernel/ppc_ksyms.c
index 65d14e6ddc3c..9c98424277a8 100644
--- a/trunk/arch/powerpc/kernel/ppc_ksyms.c
+++ b/trunk/arch/powerpc/kernel/ppc_ksyms.c
@@ -15,6 +15,7 @@
#include
#include
+#include
#include
#include
#include
diff --git a/trunk/arch/powerpc/kernel/semaphore.c b/trunk/arch/powerpc/kernel/semaphore.c
new file mode 100644
index 000000000000..2f8c3c951394
--- /dev/null
+++ b/trunk/arch/powerpc/kernel/semaphore.c
@@ -0,0 +1,135 @@
+/*
+ * PowerPC-specific semaphore code.
+ *
+ * Copyright (C) 1999 Cort Dougan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * April 2001 - Reworked by Paul Mackerras
+ * to eliminate the SMP races in the old version between the updates
+ * of `count' and `waking'. Now we use negative `count' values to
+ * indicate that some process(es) are waiting for the semaphore.
+ */
+
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+/*
+ * Atomically update sem->count.
+ * This does the equivalent of the following:
+ *
+ * old_count = sem->count;
+ * tmp = MAX(old_count, 0) + incr;
+ * sem->count = tmp;
+ * return old_count;
+ */
+static inline int __sem_update_count(struct semaphore *sem, int incr)
+{
+ int old_count, tmp;
+
+ __asm__ __volatile__("\n"
+"1: lwarx %0,0,%3\n"
+" srawi %1,%0,31\n"
+" andc %1,%0,%1\n"
+" add %1,%1,%4\n"
+ PPC405_ERR77(0,%3)
+" stwcx. %1,0,%3\n"
+" bne 1b"
+ : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
+ : "r" (&sem->count), "r" (incr), "m" (sem->count)
+ : "cc");
+
+ return old_count;
+}
+
+void __up(struct semaphore *sem)
+{
+ /*
+ * Note that we incremented count in up() before we came here,
+ * but that was ineffective since the result was <= 0, and
+ * any negative value of count is equivalent to 0.
+ * This ends up setting count to 1, unless count is now > 0
+ * (i.e. because some other cpu has called up() in the meantime),
+ * in which case we just increment count.
+ */
+ __sem_update_count(sem, 1);
+ wake_up(&sem->wait);
+}
+EXPORT_SYMBOL(__up);
+
+/*
+ * Note that when we come in to __down or __down_interruptible,
+ * we have already decremented count, but that decrement was
+ * ineffective since the result was < 0, and any negative value
+ * of count is equivalent to 0.
+ * Thus it is only when we decrement count from some value > 0
+ * that we have actually got the semaphore.
+ */
+void __sched __down(struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ /*
+ * Try to get the semaphore. If the count is > 0, then we've
+ * got the semaphore; we decrement count and exit the loop.
+ * If the count is 0 or negative, we set it to -1, indicating
+ * that we are asleep, and then sleep.
+ */
+ while (__sem_update_count(sem, -1) <= 0) {
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ __set_task_state(tsk, TASK_RUNNING);
+
+ /*
+ * If there are any more sleepers, wake one of them up so
+ * that it can either get the semaphore, or set count to -1
+ * indicating that there are still processes sleeping.
+ */
+ wake_up(&sem->wait);
+}
+EXPORT_SYMBOL(__down);
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ __set_task_state(tsk, TASK_INTERRUPTIBLE);
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ while (__sem_update_count(sem, -1) <= 0) {
+ if (signal_pending(current)) {
+ /*
+ * A signal is pending - give up trying.
+ * Set sem->count to 0 if it is negative,
+ * since we are no longer sleeping.
+ */
+ __sem_update_count(sem, 0);
+ retval = -EINTR;
+ break;
+ }
+ schedule();
+ set_task_state(tsk, TASK_INTERRUPTIBLE);
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ __set_task_state(tsk, TASK_RUNNING);
+
+ wake_up(&sem->wait);
+ return retval;
+}
+EXPORT_SYMBOL(__down_interruptible);
diff --git a/trunk/arch/ppc/kernel/semaphore.c b/trunk/arch/ppc/kernel/semaphore.c
new file mode 100644
index 000000000000..2fe429b27c14
--- /dev/null
+++ b/trunk/arch/ppc/kernel/semaphore.c
@@ -0,0 +1,131 @@
+/*
+ * PowerPC-specific semaphore code.
+ *
+ * Copyright (C) 1999 Cort Dougan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * April 2001 - Reworked by Paul Mackerras
+ * to eliminate the SMP races in the old version between the updates
+ * of `count' and `waking'. Now we use negative `count' values to
+ * indicate that some process(es) are waiting for the semaphore.
+ */
+
+#include
+#include
+#include
+#include
+#include
+
+/*
+ * Atomically update sem->count.
+ * This does the equivalent of the following:
+ *
+ * old_count = sem->count;
+ * tmp = MAX(old_count, 0) + incr;
+ * sem->count = tmp;
+ * return old_count;
+ */
+static inline int __sem_update_count(struct semaphore *sem, int incr)
+{
+ int old_count, tmp;
+
+ __asm__ __volatile__("\n"
+"1: lwarx %0,0,%3\n"
+" srawi %1,%0,31\n"
+" andc %1,%0,%1\n"
+" add %1,%1,%4\n"
+ PPC405_ERR77(0,%3)
+" stwcx. %1,0,%3\n"
+" bne 1b"
+ : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
+ : "r" (&sem->count), "r" (incr), "m" (sem->count)
+ : "cc");
+
+ return old_count;
+}
+
+void __up(struct semaphore *sem)
+{
+ /*
+ * Note that we incremented count in up() before we came here,
+ * but that was ineffective since the result was <= 0, and
+ * any negative value of count is equivalent to 0.
+ * This ends up setting count to 1, unless count is now > 0
+ * (i.e. because some other cpu has called up() in the meantime),
+ * in which case we just increment count.
+ */
+ __sem_update_count(sem, 1);
+ wake_up(&sem->wait);
+}
+
+/*
+ * Note that when we come in to __down or __down_interruptible,
+ * we have already decremented count, but that decrement was
+ * ineffective since the result was < 0, and any negative value
+ * of count is equivalent to 0.
+ * Thus it is only when we decrement count from some value > 0
+ * that we have actually got the semaphore.
+ */
+void __sched __down(struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+ smp_wmb();
+
+ /*
+ * Try to get the semaphore. If the count is > 0, then we've
+ * got the semaphore; we decrement count and exit the loop.
+ * If the count is 0 or negative, we set it to -1, indicating
+ * that we are asleep, and then sleep.
+ */
+ while (__sem_update_count(sem, -1) <= 0) {
+ schedule();
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+
+ /*
+ * If there are any more sleepers, wake one of them up so
+ * that it can either get the semaphore, or set count to -1
+ * indicating that there are still processes sleeping.
+ */
+ wake_up(&sem->wait);
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ tsk->state = TASK_INTERRUPTIBLE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+ smp_wmb();
+
+ while (__sem_update_count(sem, -1) <= 0) {
+ if (signal_pending(current)) {
+ /*
+ * A signal is pending - give up trying.
+ * Set sem->count to 0 if it is negative,
+ * since we are no longer sleeping.
+ */
+ __sem_update_count(sem, 0);
+ retval = -EINTR;
+ break;
+ }
+ schedule();
+ tsk->state = TASK_INTERRUPTIBLE;
+ }
+ tsk->state = TASK_RUNNING;
+ remove_wait_queue(&sem->wait, &wait);
+ wake_up(&sem->wait);
+ return retval;
+}
diff --git a/trunk/arch/s390/Kconfig b/trunk/arch/s390/Kconfig
index f6a68e178fc5..1831833c430e 100644
--- a/trunk/arch/s390/Kconfig
+++ b/trunk/arch/s390/Kconfig
@@ -3,10 +3,6 @@
# see Documentation/kbuild/kconfig-language.txt.
#
-config SCHED_MC
- def_bool y
- depends on SMP
-
config MMU
def_bool y
@@ -43,9 +39,6 @@ config GENERIC_HWEIGHT
config GENERIC_TIME
def_bool y
-config GENERIC_CLOCKEVENTS
- def_bool y
-
config GENERIC_BUG
bool
depends on BUG
@@ -76,8 +69,6 @@ menu "Base setup"
comment "Processor type and features"
-source "kernel/time/Kconfig"
-
config 64BIT
bool "64 bit kernel"
help
@@ -310,7 +301,10 @@ config QDIO
tristate "QDIO support"
---help---
This driver provides the Queued Direct I/O base support for
- IBM System z.
+ IBM mainframes.
+
+ For details please refer to the documentation provided by IBM at
+
To compile this driver as a module, choose M here: the
module will be called qdio.
@@ -492,6 +486,25 @@ config APPLDATA_NET_SUM
source kernel/Kconfig.hz
+config NO_IDLE_HZ
+ bool "No HZ timer ticks in idle"
+ help
+ Switches the regular HZ timer off when the system is going idle.
+ This helps z/VM to detect that the Linux system is idle. VM can
+ then "swap-out" this guest which reduces memory usage. It also
+ reduces the overhead of idle systems.
+
+ The HZ timer can be switched on/off via /proc/sys/kernel/hz_timer.
+ hz_timer=0 means HZ timer is disabled. hz_timer=1 means HZ
+ timer is active.
+
+config NO_IDLE_HZ_INIT
+ bool "HZ timer in idle off by default"
+ depends on NO_IDLE_HZ
+ help
+ The HZ timer is switched off in idle by default. That means the
+ HZ timer is already disabled at boot time.
+
config S390_HYPFS_FS
bool "s390 hypervisor file system support"
select SYS_HYPERVISOR
diff --git a/trunk/arch/s390/crypto/aes_s390.c b/trunk/arch/s390/crypto/aes_s390.c
index e33f32b54c08..a3f67f8b5427 100644
--- a/trunk/arch/s390/crypto/aes_s390.c
+++ b/trunk/arch/s390/crypto/aes_s390.c
@@ -499,7 +499,7 @@ static struct crypto_alg cbc_aes_alg = {
}
};
-static int __init aes_s390_init(void)
+static int __init aes_init(void)
{
int ret;
@@ -542,15 +542,15 @@ static int __init aes_s390_init(void)
goto out;
}
-static void __exit aes_s390_fini(void)
+static void __exit aes_fini(void)
{
crypto_unregister_alg(&cbc_aes_alg);
crypto_unregister_alg(&ecb_aes_alg);
crypto_unregister_alg(&aes_alg);
}
-module_init(aes_s390_init);
-module_exit(aes_s390_fini);
+module_init(aes_init);
+module_exit(aes_fini);
MODULE_ALIAS("aes");
diff --git a/trunk/arch/s390/crypto/des_s390.c b/trunk/arch/s390/crypto/des_s390.c
index 4aba83b31596..ea22707f435f 100644
--- a/trunk/arch/s390/crypto/des_s390.c
+++ b/trunk/arch/s390/crypto/des_s390.c
@@ -550,7 +550,7 @@ static struct crypto_alg cbc_des3_192_alg = {
}
};
-static int des_s390_init(void)
+static int init(void)
{
int ret = 0;
@@ -612,7 +612,7 @@ static int des_s390_init(void)
goto out;
}
-static void __exit des_s390_fini(void)
+static void __exit fini(void)
{
crypto_unregister_alg(&cbc_des3_192_alg);
crypto_unregister_alg(&ecb_des3_192_alg);
@@ -625,8 +625,8 @@ static void __exit des_s390_fini(void)
crypto_unregister_alg(&des_alg);
}
-module_init(des_s390_init);
-module_exit(des_s390_fini);
+module_init(init);
+module_exit(fini);
MODULE_ALIAS("des");
MODULE_ALIAS("des3_ede");
diff --git a/trunk/arch/s390/crypto/sha1_s390.c b/trunk/arch/s390/crypto/sha1_s390.c
index 9cf9eca22747..5a834f6578ab 100644
--- a/trunk/arch/s390/crypto/sha1_s390.c
+++ b/trunk/arch/s390/crypto/sha1_s390.c
@@ -137,7 +137,7 @@ static struct crypto_alg alg = {
.dia_final = sha1_final } }
};
-static int __init sha1_s390_init(void)
+static int __init init(void)
{
if (!crypt_s390_func_available(KIMD_SHA_1))
return -EOPNOTSUPP;
@@ -145,13 +145,13 @@ static int __init sha1_s390_init(void)
return crypto_register_alg(&alg);
}
-static void __exit sha1_s390_fini(void)
+static void __exit fini(void)
{
crypto_unregister_alg(&alg);
}
-module_init(sha1_s390_init);
-module_exit(sha1_s390_fini);
+module_init(init);
+module_exit(fini);
MODULE_ALIAS("sha1");
diff --git a/trunk/arch/s390/crypto/sha256_s390.c b/trunk/arch/s390/crypto/sha256_s390.c
index 2a3d756b35d4..ccf8633c4f65 100644
--- a/trunk/arch/s390/crypto/sha256_s390.c
+++ b/trunk/arch/s390/crypto/sha256_s390.c
@@ -133,7 +133,7 @@ static struct crypto_alg alg = {
.dia_final = sha256_final } }
};
-static int sha256_s390_init(void)
+static int init(void)
{
if (!crypt_s390_func_available(KIMD_SHA_256))
return -EOPNOTSUPP;
@@ -141,13 +141,13 @@ static int sha256_s390_init(void)
return crypto_register_alg(&alg);
}
-static void __exit sha256_s390_fini(void)
+static void __exit fini(void)
{
crypto_unregister_alg(&alg);
}
-module_init(sha256_s390_init);
-module_exit(sha256_s390_fini);
+module_init(init);
+module_exit(fini);
MODULE_ALIAS("sha256");
diff --git a/trunk/arch/s390/defconfig b/trunk/arch/s390/defconfig
index dcc3ec2ef643..62f6b5a606dd 100644
--- a/trunk/arch/s390/defconfig
+++ b/trunk/arch/s390/defconfig
@@ -3,7 +3,6 @@
# Linux kernel version: 2.6.25-rc4
# Wed Mar 5 11:22:59 2008
#
-CONFIG_SCHED_MC=y
CONFIG_MMU=y
CONFIG_ZONE_DMA=y
CONFIG_LOCKDEP_SUPPORT=y
diff --git a/trunk/arch/s390/kernel/Makefile b/trunk/arch/s390/kernel/Makefile
index 77051cd27925..4d3e38392cb1 100644
--- a/trunk/arch/s390/kernel/Makefile
+++ b/trunk/arch/s390/kernel/Makefile
@@ -11,7 +11,7 @@ CFLAGS_smp.o := -Wno-nonnull
obj-y := bitmap.o traps.o time.o process.o base.o early.o \
setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
- s390_ext.o debug.o irq.o ipl.o dis.o diag.o
+ semaphore.o s390_ext.o debug.o irq.o ipl.o dis.o diag.o
obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o)
obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
@@ -19,7 +19,7 @@ obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
extra-y += head.o init_task.o vmlinux.lds
obj-$(CONFIG_MODULES) += s390_ksyms.o module.o
-obj-$(CONFIG_SMP) += smp.o topology.o
+obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_AUDIT) += audit.o
compat-obj-$(CONFIG_AUDIT) += compat_audit.o
diff --git a/trunk/arch/s390/kernel/compat_linux.h b/trunk/arch/s390/kernel/compat_linux.h
index 20723a062017..e89f8c0c42a0 100644
--- a/trunk/arch/s390/kernel/compat_linux.h
+++ b/trunk/arch/s390/kernel/compat_linux.h
@@ -162,77 +162,4 @@ struct ucontext32 {
compat_sigset_t uc_sigmask; /* mask last for extensibility */
};
-struct __sysctl_args32;
-struct stat64_emu31;
-struct mmap_arg_struct_emu31;
-struct fadvise64_64_args;
-struct old_sigaction32;
-struct old_sigaction32;
-
-long sys32_chown16(const char __user * filename, u16 user, u16 group);
-long sys32_lchown16(const char __user * filename, u16 user, u16 group);
-long sys32_fchown16(unsigned int fd, u16 user, u16 group);
-long sys32_setregid16(u16 rgid, u16 egid);
-long sys32_setgid16(u16 gid);
-long sys32_setreuid16(u16 ruid, u16 euid);
-long sys32_setuid16(u16 uid);
-long sys32_setresuid16(u16 ruid, u16 euid, u16 suid);
-long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid);
-long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid);
-long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid);
-long sys32_setfsuid16(u16 uid);
-long sys32_setfsgid16(u16 gid);
-long sys32_getgroups16(int gidsetsize, u16 __user *grouplist);
-long sys32_setgroups16(int gidsetsize, u16 __user *grouplist);
-long sys32_getuid16(void);
-long sys32_geteuid16(void);
-long sys32_getgid16(void);
-long sys32_getegid16(void);
-long sys32_ipc(u32 call, int first, int second, int third, u32 ptr);
-long sys32_truncate64(const char __user * path, unsigned long high,
- unsigned long low);
-long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low);
-long sys32_sched_rr_get_interval(compat_pid_t pid,
- struct compat_timespec __user *interval);
-long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
- compat_sigset_t __user *oset, size_t sigsetsize);
-long sys32_rt_sigpending(compat_sigset_t __user *set, size_t sigsetsize);
-long sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo);
-long sys32_execve(void);
-long sys32_init_module(void __user *umod, unsigned long len,
- const char __user *uargs);
-long sys32_delete_module(const char __user *name_user, unsigned int flags);
-long sys32_gettimeofday(struct compat_timeval __user *tv,
- struct timezone __user *tz);
-long sys32_settimeofday(struct compat_timeval __user *tv,
- struct timezone __user *tz);
-long sys32_pause(void);
-long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count,
- u32 poshi, u32 poslo);
-long sys32_pwrite64(unsigned int fd, const char __user *ubuf,
- size_t count, u32 poshi, u32 poslo);
-compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count);
-long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset,
- size_t count);
-long sys32_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset,
- s32 count);
-long sys32_sysctl(struct __sysctl_args32 __user *args);
-long sys32_stat64(char __user * filename, struct stat64_emu31 __user * statbuf);
-long sys32_lstat64(char __user * filename,
- struct stat64_emu31 __user * statbuf);
-long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf);
-long sys32_fstatat64(unsigned int dfd, char __user *filename,
- struct stat64_emu31 __user* statbuf, int flag);
-unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg);
-long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg);
-long sys32_read(unsigned int fd, char __user * buf, size_t count);
-long sys32_write(unsigned int fd, char __user * buf, size_t count);
-long sys32_clone(void);
-long sys32_fadvise64(int fd, loff_t offset, size_t len, int advise);
-long sys32_fadvise64_64(struct fadvise64_64_args __user *args);
-long sys32_sigaction(int sig, const struct old_sigaction32 __user *act,
- struct old_sigaction32 __user *oact);
-long sys32_rt_sigaction(int sig, const struct sigaction32 __user *act,
- struct sigaction32 __user *oact, size_t sigsetsize);
-long sys32_sigaltstack(const stack_t32 __user *uss, stack_t32 __user *uoss);
#endif /* _ASM_S390X_S390_H */
diff --git a/trunk/arch/s390/kernel/compat_signal.c b/trunk/arch/s390/kernel/compat_signal.c
index c7f02e777af2..a5692c460bad 100644
--- a/trunk/arch/s390/kernel/compat_signal.c
+++ b/trunk/arch/s390/kernel/compat_signal.c
@@ -29,7 +29,6 @@
#include
#include "compat_linux.h"
#include "compat_ptrace.h"
-#include "entry.h"
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -429,10 +428,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
/* Default to using normal stack */
sp = (unsigned long) A(regs->gprs[15]);
- /* Overflow on alternate signal stack gives SIGSEGV. */
- if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
- return (void __user *) -1UL;
-
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (! sas_ss_flags(sp))
@@ -466,9 +461,6 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe32)))
goto give_sigsegv;
- if (frame == (void __user *) -1UL)
- goto give_sigsegv;
-
if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32))
goto give_sigsegv;
@@ -522,9 +514,6 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe32)))
goto give_sigsegv;
- if (frame == (void __user *) -1UL)
- goto give_sigsegv;
-
if (copy_siginfo_to_user32(&frame->info, info))
goto give_sigsegv;
diff --git a/trunk/arch/s390/kernel/debug.c b/trunk/arch/s390/kernel/debug.c
index 1e7d4ac7068b..1b2f5ce45320 100644
--- a/trunk/arch/s390/kernel/debug.c
+++ b/trunk/arch/s390/kernel/debug.c
@@ -73,7 +73,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf,
static int debug_open(struct inode *inode, struct file *file);
static int debug_close(struct inode *inode, struct file *file);
static debug_info_t* debug_info_create(char *name, int pages_per_area,
- int nr_areas, int buf_size, mode_t mode);
+ int nr_areas, int buf_size);
static void debug_info_get(debug_info_t *);
static void debug_info_put(debug_info_t *);
static int debug_prolog_level_fn(debug_info_t * id,
@@ -157,7 +157,7 @@ struct debug_view debug_sprintf_view = {
};
/* used by dump analysis tools to determine version of debug feature */
-static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION;
+unsigned int debug_feature_version = __DEBUG_FEATURE_VERSION;
/* static globals */
@@ -327,8 +327,7 @@ debug_info_free(debug_info_t* db_info){
*/
static debug_info_t*
-debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size,
- mode_t mode)
+debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size)
{
debug_info_t* rc;
@@ -337,8 +336,6 @@ debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size,
if(!rc)
goto out;
- rc->mode = mode & ~S_IFMT;
-
/* create root directory */
rc->debugfs_root_entry = debugfs_create_dir(rc->name,
debug_debugfs_root_entry);
@@ -679,30 +676,23 @@ debug_close(struct inode *inode, struct file *file)
}
/*
- * debug_register_mode:
- * - Creates and initializes debug area for the caller
- * The mode parameter allows to specify access rights for the s390dbf files
- * - Returns handle for debug area
+ * debug_register:
+ * - creates and initializes debug area for the caller
+ * - returns handle for debug area
*/
-debug_info_t *debug_register_mode(char *name, int pages_per_area, int nr_areas,
- int buf_size, mode_t mode, uid_t uid,
- gid_t gid)
+debug_info_t*
+debug_register (char *name, int pages_per_area, int nr_areas, int buf_size)
{
debug_info_t *rc = NULL;
- /* Since debugfs currently does not support uid/gid other than root, */
- /* we do not allow gid/uid != 0 until we get support for that. */
- if ((uid != 0) || (gid != 0))
- printk(KERN_WARNING "debug: Warning - Currently only uid/gid "
- "= 0 are supported. Using root as owner now!");
if (!initialized)
BUG();
mutex_lock(&debug_mutex);
/* create new debug_info */
- rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode);
+ rc = debug_info_create(name, pages_per_area, nr_areas, buf_size);
if(!rc)
goto out;
debug_register_view(rc, &debug_level_view);
@@ -715,20 +705,6 @@ debug_info_t *debug_register_mode(char *name, int pages_per_area, int nr_areas,
mutex_unlock(&debug_mutex);
return rc;
}
-EXPORT_SYMBOL(debug_register_mode);
-
-/*
- * debug_register:
- * - creates and initializes debug area for the caller
- * - returns handle for debug area
- */
-
-debug_info_t *debug_register(char *name, int pages_per_area, int nr_areas,
- int buf_size)
-{
- return debug_register_mode(name, pages_per_area, nr_areas, buf_size,
- S_IRUSR | S_IWUSR, 0, 0);
-}
/*
* debug_unregister:
@@ -1097,16 +1073,15 @@ debug_register_view(debug_info_t * id, struct debug_view *view)
int rc = 0;
int i;
unsigned long flags;
- mode_t mode;
+ mode_t mode = S_IFREG;
struct dentry *pde;
if (!id)
goto out;
- mode = (id->mode | S_IFREG) & ~S_IXUGO;
- if (!(view->prolog_proc || view->format_proc || view->header_proc))
- mode &= ~(S_IRUSR | S_IRGRP | S_IROTH);
- if (!view->input_proc)
- mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
+ if (view->prolog_proc || view->format_proc || view->header_proc)
+ mode |= S_IRUSR;
+ if (view->input_proc)
+ mode |= S_IWUSR;
pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry,
id , &debug_file_ops);
if (!pde){
diff --git a/trunk/arch/s390/kernel/early.c b/trunk/arch/s390/kernel/early.c
index 540a67f979b6..01832c440636 100644
--- a/trunk/arch/s390/kernel/early.c
+++ b/trunk/arch/s390/kernel/early.c
@@ -21,7 +21,6 @@
#include
#include
#include
-#include "entry.h"
/*
* Create a Kernel NSS if the SAVESYS= parameter is defined
diff --git a/trunk/arch/s390/kernel/entry.h b/trunk/arch/s390/kernel/entry.h
deleted file mode 100644
index 6b1896345eda..000000000000
--- a/trunk/arch/s390/kernel/entry.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef _ENTRY_H
-#define _ENTRY_H
-
-#include
-#include
-#include
-
-typedef void pgm_check_handler_t(struct pt_regs *, long);
-extern pgm_check_handler_t *pgm_check_table[128];
-pgm_check_handler_t do_protection_exception;
-pgm_check_handler_t do_dat_exception;
-
-extern int sysctl_userprocess_debug;
-
-void do_single_step(struct pt_regs *regs);
-void syscall_trace(struct pt_regs *regs, int entryexit);
-void kernel_stack_overflow(struct pt_regs * regs);
-void do_signal(struct pt_regs *regs);
-int handle_signal32(unsigned long sig, struct k_sigaction *ka,
- siginfo_t *info, sigset_t *oldset, struct pt_regs *regs);
-
-void do_extint(struct pt_regs *regs, unsigned short code);
-int __cpuinit start_secondary(void *cpuvoid);
-void __init startup_init(void);
-void die(const char * str, struct pt_regs * regs, long err);
-
-struct new_utsname;
-struct mmap_arg_struct;
-struct fadvise64_64_args;
-struct old_sigaction;
-struct sel_arg_struct;
-
-long sys_pipe(unsigned long __user *fildes);
-long sys_mmap2(struct mmap_arg_struct __user *arg);
-long old_mmap(struct mmap_arg_struct __user *arg);
-long sys_ipc(uint call, int first, unsigned long second,
- unsigned long third, void __user *ptr);
-long s390x_newuname(struct new_utsname __user *name);
-long s390x_personality(unsigned long personality);
-long s390_fadvise64(int fd, u32 offset_high, u32 offset_low,
- size_t len, int advice);
-long s390_fadvise64_64(struct fadvise64_64_args __user *args);
-long s390_fallocate(int fd, int mode, loff_t offset, u32 len_high, u32 len_low);
-long sys_fork(void);
-long sys_clone(void);
-long sys_vfork(void);
-void execve_tail(void);
-long sys_execve(void);
-int sys_sigsuspend(int history0, int history1, old_sigset_t mask);
-long sys_sigaction(int sig, const struct old_sigaction __user *act,
- struct old_sigaction __user *oact);
-long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss);
-long sys_sigreturn(void);
-long sys_rt_sigreturn(void);
-long sys32_sigreturn(void);
-long sys32_rt_sigreturn(void);
-long old_select(struct sel_arg_struct __user *arg);
-long sys_ptrace(long request, long pid, long addr, long data);
-
-#endif /* _ENTRY_H */
diff --git a/trunk/arch/s390/kernel/entry64.S b/trunk/arch/s390/kernel/entry64.S
index cd959c0b2e16..efde6e178f6c 100644
--- a/trunk/arch/s390/kernel/entry64.S
+++ b/trunk/arch/s390/kernel/entry64.S
@@ -475,7 +475,6 @@ pgm_check_handler:
pgm_no_vtime:
#endif
lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct
- mvc SP_ARGS(8,%r15),__LC_LAST_BREAK
TRACE_IRQS_OFF
lgf %r3,__LC_PGM_ILC # load program interruption code
lghi %r8,0x7f
@@ -848,7 +847,6 @@ stack_overflow:
je 0f
la %r1,__LC_SAVE_AREA+32
0: mvc SP_R12(32,%r15),0(%r1) # move %r12-%r15 to stack
- mvc SP_ARGS(8,%r15),__LC_LAST_BREAK
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain
la %r2,SP_PTREGS(%r15) # load pt_regs
jg kernel_stack_overflow
diff --git a/trunk/arch/s390/kernel/ipl.c b/trunk/arch/s390/kernel/ipl.c
index 532542447d66..375232c46c7a 100644
--- a/trunk/arch/s390/kernel/ipl.c
+++ b/trunk/arch/s390/kernel/ipl.c
@@ -655,7 +655,7 @@ static struct kobj_attribute reipl_type_attr =
static struct kset *reipl_kset;
-static void reipl_run(struct shutdown_trigger *trigger)
+void reipl_run(struct shutdown_trigger *trigger)
{
struct ccw_dev_id devid;
static char buf[100];
diff --git a/trunk/arch/s390/kernel/kprobes.c b/trunk/arch/s390/kernel/kprobes.c
index ed04d1372d5d..c5549a206284 100644
--- a/trunk/arch/s390/kernel/kprobes.c
+++ b/trunk/arch/s390/kernel/kprobes.c
@@ -360,7 +360,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
* - When the probed function returns, this probe
* causes the handlers to fire
*/
-static void __used kretprobe_trampoline_holder(void)
+void kretprobe_trampoline_holder(void)
{
asm volatile(".global kretprobe_trampoline\n"
"kretprobe_trampoline: bcr 0,0\n");
diff --git a/trunk/arch/s390/kernel/process.c b/trunk/arch/s390/kernel/process.c
index c1aff194141d..ce203154d8ce 100644
--- a/trunk/arch/s390/kernel/process.c
+++ b/trunk/arch/s390/kernel/process.c
@@ -36,8 +36,6 @@
#include
#include
#include
-#include
-#include
#include
#include
#include
@@ -46,7 +44,6 @@
#include
#include
#include
-#include "entry.h"
asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -79,7 +76,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
* Need to know about CPUs going idle?
*/
static ATOMIC_NOTIFIER_HEAD(idle_chain);
-DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
int register_idle_notifier(struct notifier_block *nb)
{
@@ -93,33 +89,9 @@ int unregister_idle_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL(unregister_idle_notifier);
-static int s390_idle_enter(void)
-{
- struct s390_idle_data *idle;
- int nr_calls = 0;
- void *hcpu;
- int rc;
-
- hcpu = (void *)(long)smp_processor_id();
- rc = __atomic_notifier_call_chain(&idle_chain, S390_CPU_IDLE, hcpu, -1,
- &nr_calls);
- if (rc == NOTIFY_BAD) {
- nr_calls--;
- __atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
- hcpu, nr_calls, NULL);
- return rc;
- }
- idle = &__get_cpu_var(s390_idle);
- spin_lock(&idle->lock);
- idle->idle_count++;
- idle->in_idle = 1;
- idle->idle_enter = get_clock();
- spin_unlock(&idle->lock);
- return NOTIFY_OK;
-}
-
-void s390_idle_leave(void)
+void do_monitor_call(struct pt_regs *regs, long interruption_code)
{
+#ifdef CONFIG_SMP
struct s390_idle_data *idle;
idle = &__get_cpu_var(s390_idle);
@@ -127,6 +99,10 @@ void s390_idle_leave(void)
idle->idle_time += get_clock() - idle->idle_enter;
idle->in_idle = 0;
spin_unlock(&idle->lock);
+#endif
+ /* disable monitor call class 0 */
+ __ctl_clear_bit(8, 15);
+
atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
(void *)(long) smp_processor_id());
}
@@ -137,30 +113,61 @@ extern void s390_handle_mcck(void);
*/
static void default_idle(void)
{
+ int cpu, rc;
+ int nr_calls = 0;
+ void *hcpu;
+#ifdef CONFIG_SMP
+ struct s390_idle_data *idle;
+#endif
+
/* CPU is going idle. */
+ cpu = smp_processor_id();
+ hcpu = (void *)(long)cpu;
local_irq_disable();
if (need_resched()) {
local_irq_enable();
return;
}
- if (s390_idle_enter() == NOTIFY_BAD) {
+
+ rc = __atomic_notifier_call_chain(&idle_chain, S390_CPU_IDLE, hcpu, -1,
+ &nr_calls);
+ if (rc == NOTIFY_BAD) {
+ nr_calls--;
+ __atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
+ hcpu, nr_calls, NULL);
local_irq_enable();
return;
}
+
+ /* enable monitor call class 0 */
+ __ctl_set_bit(8, 15);
+
#ifdef CONFIG_HOTPLUG_CPU
- if (cpu_is_offline(smp_processor_id())) {
+ if (cpu_is_offline(cpu)) {
preempt_enable_no_resched();
cpu_die();
}
#endif
+
local_mcck_disable();
if (test_thread_flag(TIF_MCCK_PENDING)) {
local_mcck_enable();
- s390_idle_leave();
+ /* disable monitor call class 0 */
+ __ctl_clear_bit(8, 15);
+ atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
+ hcpu);
local_irq_enable();
s390_handle_mcck();
return;
}
+#ifdef CONFIG_SMP
+ idle = &__get_cpu_var(s390_idle);
+ spin_lock(&idle->lock);
+ idle->idle_count++;
+ idle->in_idle = 1;
+ idle->idle_enter = get_clock();
+ spin_unlock(&idle->lock);
+#endif
trace_hardirqs_on();
/* Wait for external, I/O or machine check interrupt. */
__load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
@@ -170,10 +177,9 @@ static void default_idle(void)
void cpu_idle(void)
{
for (;;) {
- tick_nohz_stop_sched_tick();
while (!need_resched())
default_idle();
- tick_nohz_restart_sched_tick();
+
preempt_enable_no_resched();
schedule();
preempt_disable();
@@ -195,7 +201,6 @@ void show_regs(struct pt_regs *regs)
/* Show stack backtrace if pt_regs is from kernel mode */
if (!(regs->psw.mask & PSW_MASK_PSTATE))
show_trace(NULL, (unsigned long *) regs->gprs[15]);
- show_last_breaking_event(regs);
}
extern void kernel_thread_starter(void);
diff --git a/trunk/arch/s390/kernel/ptrace.c b/trunk/arch/s390/kernel/ptrace.c
index 58a064296987..6e036bae9875 100644
--- a/trunk/arch/s390/kernel/ptrace.c
+++ b/trunk/arch/s390/kernel/ptrace.c
@@ -41,7 +41,6 @@
#include
#include
#include
-#include "entry.h"
#ifdef CONFIG_COMPAT
#include "compat_ptrace.h"
diff --git a/trunk/arch/s390/kernel/s390_ext.c b/trunk/arch/s390/kernel/s390_ext.c
index e019b419efc6..acf93dba7727 100644
--- a/trunk/arch/s390/kernel/s390_ext.c
+++ b/trunk/arch/s390/kernel/s390_ext.c
@@ -13,12 +13,11 @@
#include
#include
#include
-#include
+
#include
#include
#include
#include
-#include "entry.h"
/*
* ext_int_hash[index] is the start of the list for all external interrupts
@@ -120,10 +119,13 @@ void do_extint(struct pt_regs *regs, unsigned short code)
old_regs = set_irq_regs(regs);
irq_enter();
- s390_idle_check();
- if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
- /* Serve timer interrupts first. */
- clock_comparator_work();
+ asm volatile ("mc 0,0");
+ if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
+ /**
+ * Make sure that the i/o interrupt did not "overtake"
+ * the last HZ timer interrupt.
+ */
+ account_ticks(S390_lowcore.int_clock);
kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++;
index = ext_hash(code);
for (p = ext_int_hash[index]; p; p = p->next) {
diff --git a/trunk/arch/s390/kernel/s390_ksyms.c b/trunk/arch/s390/kernel/s390_ksyms.c
index 48238a114ce9..7234c737f825 100644
--- a/trunk/arch/s390/kernel/s390_ksyms.c
+++ b/trunk/arch/s390/kernel/s390_ksyms.c
@@ -26,6 +26,13 @@ EXPORT_SYMBOL(_ni_bitmap);
EXPORT_SYMBOL(_zb_findmap);
EXPORT_SYMBOL(_sb_findmap);
+/*
+ * semaphore ops
+ */
+EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_interruptible);
+
/*
* binfmt_elf loader
*/
diff --git a/trunk/arch/s390/kernel/semaphore.c b/trunk/arch/s390/kernel/semaphore.c
new file mode 100644
index 000000000000..191303f6c1d8
--- /dev/null
+++ b/trunk/arch/s390/kernel/semaphore.c
@@ -0,0 +1,108 @@
+/*
+ * linux/arch/s390/kernel/semaphore.c
+ *
+ * S390 version
+ * Copyright (C) 1998-2000 IBM Corporation
+ * Author(s): Martin Schwidefsky
+ *
+ * Derived from "linux/arch/i386/kernel/semaphore.c
+ * Copyright (C) 1999, Linus Torvalds
+ *
+ */
+#include
+#include
+#include
+
+#include
+
+/*
+ * Atomically update sem->count. Equivalent to:
+ * old_val = sem->count.counter;
+ * new_val = ((old_val >= 0) ? old_val : 0) + incr;
+ * sem->count.counter = new_val;
+ * return old_val;
+ */
+static inline int __sem_update_count(struct semaphore *sem, int incr)
+{
+ int old_val, new_val;
+
+ asm volatile(
+ " l %0,0(%3)\n"
+ "0: ltr %1,%0\n"
+ " jhe 1f\n"
+ " lhi %1,0\n"
+ "1: ar %1,%4\n"
+ " cs %0,%1,0(%3)\n"
+ " jl 0b\n"
+ : "=&d" (old_val), "=&d" (new_val), "=m" (sem->count)
+ : "a" (&sem->count), "d" (incr), "m" (sem->count)
+ : "cc");
+ return old_val;
+}
+
+/*
+ * The inline function up() incremented count but the result
+ * was <= 0. This indicates that some process is waiting on
+ * the semaphore. The semaphore is free and we'll wake the
+ * first sleeping process, so we set count to 1 unless some
+ * other cpu has called up in the meantime in which case
+ * we just increment count by 1.
+ */
+void __up(struct semaphore *sem)
+{
+ __sem_update_count(sem, 1);
+ wake_up(&sem->wait);
+}
+
+/*
+ * The inline function down() decremented count and the result
+ * was < 0. The wait loop will atomically test and update the
+ * semaphore counter following the rules:
+ * count > 0: decrement count, wake up queue and exit.
+ * count <= 0: set count to -1, go to sleep.
+ */
+void __sched __down(struct semaphore * sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ add_wait_queue_exclusive(&sem->wait, &wait);
+ while (__sem_update_count(sem, -1) <= 0) {
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ __set_task_state(tsk, TASK_RUNNING);
+ wake_up(&sem->wait);
+}
+
+/*
+ * Same as __down() with an additional test for signals.
+ * If a signal is pending the count is updated as follows:
+ * count > 0: wake up queue and exit.
+ * count <= 0: set count to 0, wake up queue and exit.
+ */
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ __set_task_state(tsk, TASK_INTERRUPTIBLE);
+ add_wait_queue_exclusive(&sem->wait, &wait);
+ while (__sem_update_count(sem, -1) <= 0) {
+ if (signal_pending(current)) {
+ __sem_update_count(sem, 0);
+ retval = -EINTR;
+ break;
+ }
+ schedule();
+ set_task_state(tsk, TASK_INTERRUPTIBLE);
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ __set_task_state(tsk, TASK_RUNNING);
+ wake_up(&sem->wait);
+ return retval;
+}
+
diff --git a/trunk/arch/s390/kernel/setup.c b/trunk/arch/s390/kernel/setup.c
index 7141147e6b63..290e504061a3 100644
--- a/trunk/arch/s390/kernel/setup.c
+++ b/trunk/arch/s390/kernel/setup.c
@@ -39,7 +39,6 @@
#include
#include
#include
-#include
#include
#include
@@ -428,7 +427,7 @@ setup_lowcore(void)
lc->io_new_psw.mask = psw_kernel_bits;
lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
lc->ipl_device = S390_lowcore.ipl_device;
- lc->clock_comparator = -1ULL;
+ lc->jiffy_timer = -1LL;
lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
lc->async_stack = (unsigned long)
__alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
@@ -688,7 +687,7 @@ static __init unsigned int stfl(void)
return S390_lowcore.stfl_fac_list;
}
-static int __init __stfle(unsigned long long *list, int doublewords)
+static __init int stfle(unsigned long long *list, int doublewords)
{
typedef struct { unsigned long long _[doublewords]; } addrtype;
register unsigned long __nr asm("0") = doublewords - 1;
@@ -698,13 +697,6 @@ static int __init __stfle(unsigned long long *list, int doublewords)
return __nr + 1;
}
-int __init stfle(unsigned long long *list, int doublewords)
-{
- if (!(stfl() & (1UL << 24)))
- return -EOPNOTSUPP;
- return __stfle(list, doublewords);
-}
-
/*
* Setup hardware capabilities.
*/
@@ -749,7 +741,7 @@ static void __init setup_hwcaps(void)
* HWCAP_S390_DFP bit 6.
*/
if ((elf_hwcap & (1UL << 2)) &&
- __stfle(&facility_list_extended, 1) > 0) {
+ stfle(&facility_list_extended, 1) > 0) {
if (facility_list_extended & (1ULL << (64 - 43)))
elf_hwcap |= 1UL << 6;
}
@@ -831,7 +823,6 @@ setup_arch(char **cmdline_p)
cpu_init();
__cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr;
- s390_init_cpu_topology();
/*
* Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
diff --git a/trunk/arch/s390/kernel/signal.c b/trunk/arch/s390/kernel/signal.c
index b97682040215..4449bf32cbf1 100644
--- a/trunk/arch/s390/kernel/signal.c
+++ b/trunk/arch/s390/kernel/signal.c
@@ -27,7 +27,6 @@
#include
#include
#include
-#include "entry.h"
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -236,10 +235,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
/* Default to using normal stack */
sp = regs->gprs[15];
- /* Overflow on alternate signal stack gives SIGSEGV. */
- if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
- return (void __user *) -1UL;
-
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (! sas_ss_flags(sp))
@@ -275,9 +270,6 @@ static int setup_frame(int sig, struct k_sigaction *ka,
if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe)))
goto give_sigsegv;
- if (frame == (void __user *) -1UL)
- goto give_sigsegv;
-
if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE))
goto give_sigsegv;
@@ -335,9 +327,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe)))
goto give_sigsegv;
- if (frame == (void __user *) -1UL)
- goto give_sigsegv;
-
if (copy_siginfo_to_user(&frame->info, info))
goto give_sigsegv;
@@ -485,6 +474,11 @@ void do_signal(struct pt_regs *regs)
int ret;
#ifdef CONFIG_COMPAT
if (test_thread_flag(TIF_31BIT)) {
+ extern int handle_signal32(unsigned long sig,
+ struct k_sigaction *ka,
+ siginfo_t *info,
+ sigset_t *oldset,
+ struct pt_regs *regs);
ret = handle_signal32(signr, &ka, &info, oldset, regs);
}
else
diff --git a/trunk/arch/s390/kernel/smp.c b/trunk/arch/s390/kernel/smp.c
index 0dfa988c1b26..8f894d380a62 100644
--- a/trunk/arch/s390/kernel/smp.c
+++ b/trunk/arch/s390/kernel/smp.c
@@ -44,7 +44,6 @@
#include
#include
#include
-#include "entry.h"
/*
* An array with a pointer the lowcore of every CPU.
@@ -68,12 +67,13 @@ enum s390_cpu_state {
CPU_STATE_CONFIGURED,
};
-DEFINE_MUTEX(smp_cpu_state_mutex);
-int smp_cpu_polarization[NR_CPUS];
+#ifdef CONFIG_HOTPLUG_CPU
+static DEFINE_MUTEX(smp_cpu_state_mutex);
+#endif
static int smp_cpu_state[NR_CPUS];
-static int cpu_management;
static DEFINE_PER_CPU(struct cpu, cpu_devices);
+DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
static void smp_ext_bitcall(int, ec_bit_sig);
@@ -298,7 +298,7 @@ static void smp_ext_bitcall(int cpu, ec_bit_sig sig)
/*
* this function sends a 'purge tlb' signal to another CPU.
*/
-static void smp_ptlb_callback(void *info)
+void smp_ptlb_callback(void *info)
{
__tlb_flush_local();
}
@@ -456,7 +456,6 @@ static int smp_rescan_cpus_sigp(cpumask_t avail)
if (cpu_known(cpu_id))
continue;
__cpu_logical_map[logical_cpu] = cpu_id;
- smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
if (!cpu_stopped(logical_cpu))
continue;
cpu_set(logical_cpu, cpu_present_map);
@@ -490,7 +489,6 @@ static int smp_rescan_cpus_sclp(cpumask_t avail)
if (cpu_known(cpu_id))
continue;
__cpu_logical_map[logical_cpu] = cpu_id;
- smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
cpu_set(logical_cpu, cpu_present_map);
if (cpu >= info->configured)
smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY;
@@ -848,7 +846,6 @@ void __init smp_prepare_boot_cpu(void)
S390_lowcore.percpu_offset = __per_cpu_offset[0];
current_set[0] = current;
smp_cpu_state[0] = CPU_STATE_CONFIGURED;
- smp_cpu_polarization[0] = POLARIZATION_UNKNWN;
spin_lock_init(&(&__get_cpu_var(s390_idle))->lock);
}
@@ -900,19 +897,15 @@ static ssize_t cpu_configure_store(struct sys_device *dev, const char *buf,
case 0:
if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) {
rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]);
- if (!rc) {
+ if (!rc)
smp_cpu_state[cpu] = CPU_STATE_STANDBY;
- smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
- }
}
break;
case 1:
if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) {
rc = sclp_cpu_configure(__cpu_logical_map[cpu]);
- if (!rc) {
+ if (!rc)
smp_cpu_state[cpu] = CPU_STATE_CONFIGURED;
- smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
- }
}
break;
default:
@@ -926,34 +919,6 @@ static ssize_t cpu_configure_store(struct sys_device *dev, const char *buf,
static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
#endif /* CONFIG_HOTPLUG_CPU */
-static ssize_t cpu_polarization_show(struct sys_device *dev, char *buf)
-{
- int cpu = dev->id;
- ssize_t count;
-
- mutex_lock(&smp_cpu_state_mutex);
- switch (smp_cpu_polarization[cpu]) {
- case POLARIZATION_HRZ:
- count = sprintf(buf, "horizontal\n");
- break;
- case POLARIZATION_VL:
- count = sprintf(buf, "vertical:low\n");
- break;
- case POLARIZATION_VM:
- count = sprintf(buf, "vertical:medium\n");
- break;
- case POLARIZATION_VH:
- count = sprintf(buf, "vertical:high\n");
- break;
- default:
- count = sprintf(buf, "unknown\n");
- break;
- }
- mutex_unlock(&smp_cpu_state_mutex);
- return count;
-}
-static SYSDEV_ATTR(polarization, 0444, cpu_polarization_show, NULL);
-
static ssize_t show_cpu_address(struct sys_device *dev, char *buf)
{
return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]);
@@ -966,7 +931,6 @@ static struct attribute *cpu_common_attrs[] = {
&attr_configure.attr,
#endif
&attr_address.attr,
- &attr_polarization.attr,
NULL,
};
@@ -1111,48 +1075,11 @@ static ssize_t __ref rescan_store(struct sys_device *dev,
out:
put_online_cpus();
mutex_unlock(&smp_cpu_state_mutex);
- if (!cpus_empty(newcpus))
- topology_schedule_update();
return rc ? rc : count;
}
static SYSDEV_ATTR(rescan, 0200, NULL, rescan_store);
#endif /* CONFIG_HOTPLUG_CPU */
-static ssize_t dispatching_show(struct sys_device *dev, char *buf)
-{
- ssize_t count;
-
- mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", cpu_management);
- mutex_unlock(&smp_cpu_state_mutex);
- return count;
-}
-
-static ssize_t dispatching_store(struct sys_device *dev, const char *buf,
- size_t count)
-{
- int val, rc;
- char delim;
-
- if (sscanf(buf, "%d %c", &val, &delim) != 1)
- return -EINVAL;
- if (val != 0 && val != 1)
- return -EINVAL;
- rc = 0;
- mutex_lock(&smp_cpu_state_mutex);
- get_online_cpus();
- if (cpu_management == val)
- goto out;
- rc = topology_set_cpu_management(val);
- if (!rc)
- cpu_management = val;
-out:
- put_online_cpus();
- mutex_unlock(&smp_cpu_state_mutex);
- return rc ? rc : count;
-}
-static SYSDEV_ATTR(dispatching, 0644, dispatching_show, dispatching_store);
-
static int __init topology_init(void)
{
int cpu;
@@ -1166,10 +1093,6 @@ static int __init topology_init(void)
if (rc)
return rc;
#endif
- rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
- &attr_dispatching.attr);
- if (rc)
- return rc;
for_each_present_cpu(cpu) {
rc = smp_add_present_cpu(cpu);
if (rc)
diff --git a/trunk/arch/s390/kernel/sys_s390.c b/trunk/arch/s390/kernel/sys_s390.c
index 988d0d64c2c8..fefee99f28aa 100644
--- a/trunk/arch/s390/kernel/sys_s390.c
+++ b/trunk/arch/s390/kernel/sys_s390.c
@@ -29,8 +29,8 @@
#include
#include
#include
+
#include
-#include "entry.h"
/*
* sys_pipe() is the normal C calling standard for creating
diff --git a/trunk/arch/s390/kernel/time.c b/trunk/arch/s390/kernel/time.c
index 7aec676fefd5..cb232c155360 100644
--- a/trunk/arch/s390/kernel/time.c
+++ b/trunk/arch/s390/kernel/time.c
@@ -30,7 +30,7 @@
#include
#include
#include
-#include
+
#include
#include
#include
@@ -39,7 +39,6 @@
#include
#include
#include
-#include
/* change this if you have some constant time drift */
#define USECS_PER_JIFFY ((unsigned long) 1000000/HZ)
@@ -58,16 +57,16 @@
static ext_int_info_t ext_int_info_cc;
static ext_int_info_t ext_int_etr_cc;
+static u64 init_timer_cc;
static u64 jiffies_timer_cc;
-
-static DEFINE_PER_CPU(struct clock_event_device, comparators);
+static u64 xtime_cc;
/*
* Scheduler clock - returns current time in nanosec units.
*/
unsigned long long sched_clock(void)
{
- return ((get_clock_xt() - jiffies_timer_cc) * 125) >> 9;
+ return ((get_clock() - jiffies_timer_cc) * 125) >> 9;
}
/*
@@ -96,40 +95,162 @@ void tod_to_timeval(__u64 todval, struct timespec *xtime)
#define s390_do_profile() do { ; } while(0)
#endif /* CONFIG_PROFILING */
-void clock_comparator_work(void)
+/*
+ * Advance the per cpu tick counter up to the time given with the
+ * "time" argument. The per cpu update consists of accounting
+ * the virtual cpu time, calling update_process_times and calling
+ * the profiling hook. If xtime is before time it is advanced as well.
+ */
+void account_ticks(u64 time)
{
- struct clock_event_device *cd;
+ __u32 ticks;
+ __u64 tmp;
+
+ /* Calculate how many ticks have passed. */
+ if (time < S390_lowcore.jiffy_timer)
+ return;
+ tmp = time - S390_lowcore.jiffy_timer;
+ if (tmp >= 2*CLK_TICKS_PER_JIFFY) { /* more than two ticks ? */
+ ticks = __div(tmp, CLK_TICKS_PER_JIFFY) + 1;
+ S390_lowcore.jiffy_timer +=
+ CLK_TICKS_PER_JIFFY * (__u64) ticks;
+ } else if (tmp >= CLK_TICKS_PER_JIFFY) {
+ ticks = 2;
+ S390_lowcore.jiffy_timer += 2*CLK_TICKS_PER_JIFFY;
+ } else {
+ ticks = 1;
+ S390_lowcore.jiffy_timer += CLK_TICKS_PER_JIFFY;
+ }
+
+#ifdef CONFIG_SMP
+ /*
+ * Do not rely on the boot cpu to do the calls to do_timer.
+ * Spread it over all cpus instead.
+ */
+ write_seqlock(&xtime_lock);
+ if (S390_lowcore.jiffy_timer > xtime_cc) {
+ __u32 xticks;
+ tmp = S390_lowcore.jiffy_timer - xtime_cc;
+ if (tmp >= 2*CLK_TICKS_PER_JIFFY) {
+ xticks = __div(tmp, CLK_TICKS_PER_JIFFY);
+ xtime_cc += (__u64) xticks * CLK_TICKS_PER_JIFFY;
+ } else {
+ xticks = 1;
+ xtime_cc += CLK_TICKS_PER_JIFFY;
+ }
+ do_timer(xticks);
+ }
+ write_sequnlock(&xtime_lock);
+#else
+ do_timer(ticks);
+#endif
+
+ while (ticks--)
+ update_process_times(user_mode(get_irq_regs()));
- S390_lowcore.clock_comparator = -1ULL;
- set_clock_comparator(S390_lowcore.clock_comparator);
- cd = &__get_cpu_var(comparators);
- cd->event_handler(cd);
s390_do_profile();
}
+#ifdef CONFIG_NO_IDLE_HZ
+
+#ifdef CONFIG_NO_IDLE_HZ_INIT
+int sysctl_hz_timer = 0;
+#else
+int sysctl_hz_timer = 1;
+#endif
+
+/*
+ * Stop the HZ tick on the current CPU.
+ * Only cpu_idle may call this function.
+ */
+static void stop_hz_timer(void)
+{
+ unsigned long flags;
+ unsigned long seq, next;
+ __u64 timer, todval;
+ int cpu = smp_processor_id();
+
+ if (sysctl_hz_timer != 0)
+ return;
+
+ cpu_set(cpu, nohz_cpu_mask);
+
+ /*
+ * Leave the clock comparator set up for the next timer
+ * tick if either rcu or a softirq is pending.
+ */
+ if (rcu_needs_cpu(cpu) || local_softirq_pending()) {
+ cpu_clear(cpu, nohz_cpu_mask);
+ return;
+ }
+
+ /*
+ * This cpu is going really idle. Set up the clock comparator
+ * for the next event.
+ */
+ next = next_timer_interrupt();
+ do {
+ seq = read_seqbegin_irqsave(&xtime_lock, flags);
+ timer = ((__u64) next) - ((__u64) jiffies) + jiffies_64;
+ } while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
+ todval = -1ULL;
+ /* Be careful about overflows. */
+ if (timer < (-1ULL / CLK_TICKS_PER_JIFFY)) {
+ timer = jiffies_timer_cc + timer * CLK_TICKS_PER_JIFFY;
+ if (timer >= jiffies_timer_cc)
+ todval = timer;
+ }
+ set_clock_comparator(todval);
+}
+
/*
- * Fixup the clock comparator.
+ * Start the HZ tick on the current CPU.
+ * Only cpu_idle may call this function.
*/
-static void fixup_clock_comparator(unsigned long long delta)
+static void start_hz_timer(void)
{
- /* If nobody is waiting there's nothing to fix. */
- if (S390_lowcore.clock_comparator == -1ULL)
+ if (!cpu_isset(smp_processor_id(), nohz_cpu_mask))
return;
- S390_lowcore.clock_comparator += delta;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ account_ticks(get_clock());
+ set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION);
+ cpu_clear(smp_processor_id(), nohz_cpu_mask);
}
-static int s390_next_event(unsigned long delta,
- struct clock_event_device *evt)
+static int nohz_idle_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
{
- S390_lowcore.clock_comparator = get_clock() + delta;
- set_clock_comparator(S390_lowcore.clock_comparator);
- return 0;
+ switch (action) {
+ case S390_CPU_IDLE:
+ stop_hz_timer();
+ break;
+ case S390_CPU_NOT_IDLE:
+ start_hz_timer();
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block nohz_idle_nb = {
+ .notifier_call = nohz_idle_notify,
+};
+
+static void __init nohz_init(void)
+{
+ if (register_idle_notifier(&nohz_idle_nb))
+ panic("Couldn't register idle notifier");
}
-static void s390_set_mode(enum clock_event_mode mode,
- struct clock_event_device *evt)
+#endif
+
+/*
+ * Set up per cpu jiffy timer and set the clock comparator.
+ */
+static void setup_jiffy_timer(void)
{
+ /* Set up clock comparator to next jiffy. */
+ S390_lowcore.jiffy_timer =
+ jiffies_timer_cc + (jiffies_64 + 1) * CLK_TICKS_PER_JIFFY;
+ set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION);
}
/*
@@ -138,26 +259,7 @@ static void s390_set_mode(enum clock_event_mode mode,
*/
void init_cpu_timer(void)
{
- struct clock_event_device *cd;
- int cpu;
-
- S390_lowcore.clock_comparator = -1ULL;
- set_clock_comparator(S390_lowcore.clock_comparator);
-
- cpu = smp_processor_id();
- cd = &per_cpu(comparators, cpu);
- cd->name = "comparator";
- cd->features = CLOCK_EVT_FEAT_ONESHOT;
- cd->mult = 16777;
- cd->shift = 12;
- cd->min_delta_ns = 1;
- cd->max_delta_ns = LONG_MAX;
- cd->rating = 400;
- cd->cpumask = cpumask_of_cpu(cpu);
- cd->set_next_event = s390_next_event;
- cd->set_mode = s390_set_mode;
-
- clockevents_register_device(cd);
+ setup_jiffy_timer();
/* Enable clock comparator timer interrupt. */
__ctl_set_bit(0,11);
@@ -168,6 +270,8 @@ void init_cpu_timer(void)
static void clock_comparator_interrupt(__u16 code)
{
+ /* set clock comparator for next tick */
+ set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION);
}
static void etr_reset(void);
@@ -212,9 +316,8 @@ static struct clocksource clocksource_tod = {
*/
void __init time_init(void)
{
- u64 init_timer_cc;
-
init_timer_cc = reset_tod_clock();
+ xtime_cc = init_timer_cc + CLK_TICKS_PER_JIFFY;
jiffies_timer_cc = init_timer_cc - jiffies_64 * CLK_TICKS_PER_JIFFY;
/* set xtime */
@@ -239,6 +342,10 @@ void __init time_init(void)
/* Enable TOD clock interrupts on the boot cpu. */
init_cpu_timer();
+#ifdef CONFIG_NO_IDLE_HZ
+ nohz_init();
+#endif
+
#ifdef CONFIG_VIRT_TIMER
vtime_init();
#endif
@@ -592,49 +699,53 @@ static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p)
}
/*
- * The time is "clock". old is what we think the time is.
+ * The time is "clock". xtime is what we think the time is.
* Adjust the value by a multiple of jiffies and add the delta to ntp.
* "delay" is an approximation how long the synchronization took. If
* the time correction is positive, then "delay" is subtracted from
* the time difference and only the remaining part is passed to ntp.
*/
-static unsigned long long etr_adjust_time(unsigned long long old,
- unsigned long long clock,
- unsigned long long delay)
+static void etr_adjust_time(unsigned long long clock, unsigned long long delay)
{
unsigned long long delta, ticks;
struct timex adjust;
- if (clock > old) {
+ /*
+ * We don't have to take the xtime lock because the cpu
+ * executing etr_adjust_time is running disabled in
+ * tasklet context and all other cpus are looping in
+ * etr_sync_cpu_start.
+ */
+ if (clock > xtime_cc) {
/* It is later than we thought. */
- delta = ticks = clock - old;
+ delta = ticks = clock - xtime_cc;
delta = ticks = (delta < delay) ? 0 : delta - delay;
delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
+ init_timer_cc = init_timer_cc + delta;
+ jiffies_timer_cc = jiffies_timer_cc + delta;
+ xtime_cc = xtime_cc + delta;
adjust.offset = ticks * (1000000 / HZ);
} else {
/* It is earlier than we thought. */
- delta = ticks = old - clock;
+ delta = ticks = xtime_cc - clock;
delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
- delta = -delta;
+ init_timer_cc = init_timer_cc - delta;
+ jiffies_timer_cc = jiffies_timer_cc - delta;
+ xtime_cc = xtime_cc - delta;
adjust.offset = -ticks * (1000000 / HZ);
}
- jiffies_timer_cc += delta;
if (adjust.offset != 0) {
printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n",
adjust.offset);
adjust.modes = ADJ_OFFSET_SINGLESHOT;
do_adjtimex(&adjust);
}
- return delta;
}
-static struct {
- int in_sync;
- unsigned long long fixup_cc;
-} etr_sync;
-
static void etr_sync_cpu_start(void *dummy)
{
+ int *in_sync = dummy;
+
etr_enable_sync_clock();
/*
* This looks like a busy wait loop but it isn't. etr_sync_cpus
@@ -642,7 +753,7 @@ static void etr_sync_cpu_start(void *dummy)
* __udelay will stop the cpu on an enabled wait psw until the
* TOD is running again.
*/
- while (etr_sync.in_sync == 0) {
+ while (*in_sync == 0) {
__udelay(1);
/*
* A different cpu changes *in_sync. Therefore use
@@ -650,14 +761,14 @@ static void etr_sync_cpu_start(void *dummy)
*/
barrier();
}
- if (etr_sync.in_sync != 1)
+ if (*in_sync != 1)
/* Didn't work. Clear per-cpu in sync bit again. */
etr_disable_sync_clock(NULL);
/*
* This round of TOD syncing is done. Set the clock comparator
* to the next tick and let the processor continue.
*/
- fixup_clock_comparator(etr_sync.fixup_cc);
+ setup_jiffy_timer();
}
static void etr_sync_cpu_end(void *dummy)
@@ -672,8 +783,8 @@ static void etr_sync_cpu_end(void *dummy)
static int etr_sync_clock(struct etr_aib *aib, int port)
{
struct etr_aib *sync_port;
- unsigned long long clock, old_clock, delay, delta;
- int follows;
+ unsigned long long clock, delay;
+ int in_sync, follows;
int rc;
/* Check if the current aib is adjacent to the sync port aib. */
@@ -688,9 +799,9 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
* successfully synced the clock. smp_call_function will
* return after all other cpus are in etr_sync_cpu_start.
*/
- memset(&etr_sync, 0, sizeof(etr_sync));
+ in_sync = 0;
preempt_disable();
- smp_call_function(etr_sync_cpu_start, NULL, 0, 0);
+ smp_call_function(etr_sync_cpu_start,&in_sync,0,0);
local_irq_disable();
etr_enable_sync_clock();
@@ -698,7 +809,6 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
__ctl_set_bit(14, 21);
__ctl_set_bit(0, 29);
clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32;
- old_clock = get_clock();
if (set_clock(clock) == 0) {
__udelay(1); /* Wait for the clock to start. */
__ctl_clear_bit(0, 29);
@@ -707,17 +817,16 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
/* Adjust Linux timing variables. */
delay = (unsigned long long)
(aib->edf2.etv - sync_port->edf2.etv) << 32;
- delta = etr_adjust_time(old_clock, clock, delay);
- etr_sync.fixup_cc = delta;
- fixup_clock_comparator(delta);
+ etr_adjust_time(clock, delay);
+ setup_jiffy_timer();
/* Verify that the clock is properly set. */
if (!etr_aib_follows(sync_port, aib, port)) {
/* Didn't work. */
etr_disable_sync_clock(NULL);
- etr_sync.in_sync = -EAGAIN;
+ in_sync = -EAGAIN;
rc = -EAGAIN;
} else {
- etr_sync.in_sync = 1;
+ in_sync = 1;
rc = 0;
}
} else {
@@ -725,7 +834,7 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
__ctl_clear_bit(0, 29);
__ctl_clear_bit(14, 21);
etr_disable_sync_clock(NULL);
- etr_sync.in_sync = -EAGAIN;
+ in_sync = -EAGAIN;
rc = -EAGAIN;
}
local_irq_enable();
diff --git a/trunk/arch/s390/kernel/topology.c b/trunk/arch/s390/kernel/topology.c
deleted file mode 100644
index 12b39b3d9c38..000000000000
--- a/trunk/arch/s390/kernel/topology.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/*
- * Copyright IBM Corp. 2007
- * Author(s): Heiko Carstens
- */
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#define CPU_BITS 64
-#define NR_MAG 6
-
-#define PTF_HORIZONTAL (0UL)
-#define PTF_VERTICAL (1UL)
-#define PTF_CHECK (2UL)
-
-struct tl_cpu {
- unsigned char reserved0[4];
- unsigned char :6;
- unsigned char pp:2;
- unsigned char reserved1;
- unsigned short origin;
- unsigned long mask[CPU_BITS / BITS_PER_LONG];
-};
-
-struct tl_container {
- unsigned char reserved[8];
-};
-
-union tl_entry {
- unsigned char nl;
- struct tl_cpu cpu;
- struct tl_container container;
-};
-
-struct tl_info {
- unsigned char reserved0[2];
- unsigned short length;
- unsigned char mag[NR_MAG];
- unsigned char reserved1;
- unsigned char mnest;
- unsigned char reserved2[4];
- union tl_entry tle[0];
-};
-
-struct core_info {
- struct core_info *next;
- cpumask_t mask;
-};
-
-static void topology_work_fn(struct work_struct *work);
-static struct tl_info *tl_info;
-static struct core_info core_info;
-static int machine_has_topology;
-static int machine_has_topology_irq;
-static struct timer_list topology_timer;
-static void set_topology_timer(void);
-static DECLARE_WORK(topology_work, topology_work_fn);
-
-cpumask_t cpu_coregroup_map(unsigned int cpu)
-{
- struct core_info *core = &core_info;
- cpumask_t mask;
-
- cpus_clear(mask);
- if (!machine_has_topology)
- return cpu_present_map;
- mutex_lock(&smp_cpu_state_mutex);
- while (core) {
- if (cpu_isset(cpu, core->mask)) {
- mask = core->mask;
- break;
- }
- core = core->next;
- }
- mutex_unlock(&smp_cpu_state_mutex);
- if (cpus_empty(mask))
- mask = cpumask_of_cpu(cpu);
- return mask;
-}
-
-static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)
-{
- unsigned int cpu;
-
- for (cpu = find_first_bit(&tl_cpu->mask[0], CPU_BITS);
- cpu < CPU_BITS;
- cpu = find_next_bit(&tl_cpu->mask[0], CPU_BITS, cpu + 1))
- {
- unsigned int rcpu, lcpu;
-
- rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin;
- for_each_present_cpu(lcpu) {
- if (__cpu_logical_map[lcpu] == rcpu) {
- cpu_set(lcpu, core->mask);
- smp_cpu_polarization[lcpu] = tl_cpu->pp;
- }
- }
- }
-}
-
-static void clear_cores(void)
-{
- struct core_info *core = &core_info;
-
- while (core) {
- cpus_clear(core->mask);
- core = core->next;
- }
-}
-
-static union tl_entry *next_tle(union tl_entry *tle)
-{
- if (tle->nl)
- return (union tl_entry *)((struct tl_container *)tle + 1);
- else
- return (union tl_entry *)((struct tl_cpu *)tle + 1);
-}
-
-static void tl_to_cores(struct tl_info *info)
-{
- union tl_entry *tle, *end;
- struct core_info *core = &core_info;
-
- mutex_lock(&smp_cpu_state_mutex);
- clear_cores();
- tle = info->tle;
- end = (union tl_entry *)((unsigned long)info + info->length);
- while (tle < end) {
- switch (tle->nl) {
- case 5:
- case 4:
- case 3:
- case 2:
- break;
- case 1:
- core = core->next;
- break;
- case 0:
- add_cpus_to_core(&tle->cpu, core);
- break;
- default:
- clear_cores();
- machine_has_topology = 0;
- return;
- }
- tle = next_tle(tle);
- }
- mutex_unlock(&smp_cpu_state_mutex);
-}
-
-static void topology_update_polarization_simple(void)
-{
- int cpu;
-
- mutex_lock(&smp_cpu_state_mutex);
- for_each_present_cpu(cpu)
- smp_cpu_polarization[cpu] = POLARIZATION_HRZ;
- mutex_unlock(&smp_cpu_state_mutex);
-}
-
-static int ptf(unsigned long fc)
-{
- int rc;
-
- asm volatile(
- " .insn rre,0xb9a20000,%1,%1\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (rc)
- : "d" (fc) : "cc");
- return rc;
-}
-
-int topology_set_cpu_management(int fc)
-{
- int cpu;
- int rc;
-
- if (!machine_has_topology)
- return -EOPNOTSUPP;
- if (fc)
- rc = ptf(PTF_VERTICAL);
- else
- rc = ptf(PTF_HORIZONTAL);
- if (rc)
- return -EBUSY;
- for_each_present_cpu(cpu)
- smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
- return rc;
-}
-
-void arch_update_cpu_topology(void)
-{
- struct tl_info *info = tl_info;
- struct sys_device *sysdev;
- int cpu;
-
- if (!machine_has_topology) {
- topology_update_polarization_simple();
- return;
- }
- stsi(info, 15, 1, 2);
- tl_to_cores(info);
- for_each_online_cpu(cpu) {
- sysdev = get_cpu_sysdev(cpu);
- kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
- }
-}
-
-static void topology_work_fn(struct work_struct *work)
-{
- arch_reinit_sched_domains();
-}
-
-void topology_schedule_update(void)
-{
- schedule_work(&topology_work);
-}
-
-static void topology_timer_fn(unsigned long ignored)
-{
- if (ptf(PTF_CHECK))
- topology_schedule_update();
- set_topology_timer();
-}
-
-static void set_topology_timer(void)
-{
- topology_timer.function = topology_timer_fn;
- topology_timer.data = 0;
- topology_timer.expires = jiffies + 60 * HZ;
- add_timer(&topology_timer);
-}
-
-static void topology_interrupt(__u16 code)
-{
- schedule_work(&topology_work);
-}
-
-static int __init init_topology_update(void)
-{
- int rc;
-
- if (!machine_has_topology) {
- topology_update_polarization_simple();
- return 0;
- }
- init_timer_deferrable(&topology_timer);
- if (machine_has_topology_irq) {
- rc = register_external_interrupt(0x2005, topology_interrupt);
- if (rc)
- return rc;
- ctl_set_bit(0, 8);
- }
- else
- set_topology_timer();
- return 0;
-}
-__initcall(init_topology_update);
-
-void __init s390_init_cpu_topology(void)
-{
- unsigned long long facility_bits;
- struct tl_info *info;
- struct core_info *core;
- int nr_cores;
- int i;
-
- if (stfle(&facility_bits, 1) <= 0)
- return;
- if (!(facility_bits & (1ULL << 52)) || !(facility_bits & (1ULL << 61)))
- return;
- machine_has_topology = 1;
-
- if (facility_bits & (1ULL << 51))
- machine_has_topology_irq = 1;
-
- tl_info = alloc_bootmem_pages(PAGE_SIZE);
- if (!tl_info)
- goto error;
- info = tl_info;
- stsi(info, 15, 1, 2);
-
- nr_cores = info->mag[NR_MAG - 2];
- for (i = 0; i < info->mnest - 2; i++)
- nr_cores *= info->mag[NR_MAG - 3 - i];
-
- printk(KERN_INFO "CPU topology:");
- for (i = 0; i < NR_MAG; i++)
- printk(" %d", info->mag[i]);
- printk(" / %d\n", info->mnest);
-
- core = &core_info;
- for (i = 0; i < nr_cores; i++) {
- core->next = alloc_bootmem(sizeof(struct core_info));
- core = core->next;
- if (!core)
- goto error;
- }
- return;
-error:
- machine_has_topology = 0;
- machine_has_topology_irq = 0;
-}
diff --git a/trunk/arch/s390/kernel/traps.c b/trunk/arch/s390/kernel/traps.c
index 57b607b61100..60f728aeaf12 100644
--- a/trunk/arch/s390/kernel/traps.c
+++ b/trunk/arch/s390/kernel/traps.c
@@ -42,8 +42,11 @@
#include
#include
#include
-#include "entry.h"
+/* Called from entry.S only */
+extern void handle_per_exception(struct pt_regs *regs);
+
+typedef void pgm_check_handler_t(struct pt_regs *, long);
pgm_check_handler_t *pgm_check_table[128];
#ifdef CONFIG_SYSCTL
@@ -56,6 +59,7 @@ int sysctl_userprocess_debug = 0;
extern pgm_check_handler_t do_protection_exception;
extern pgm_check_handler_t do_dat_exception;
+extern pgm_check_handler_t do_monitor_call;
extern pgm_check_handler_t do_asce_exception;
#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; })
@@ -134,6 +138,7 @@ void show_trace(struct task_struct *task, unsigned long *stack)
else
__show_trace(sp, S390_lowcore.thread_info,
S390_lowcore.thread_info + THREAD_SIZE);
+ printk("\n");
if (!task)
task = current;
debug_show_held_locks(task);
@@ -161,15 +166,6 @@ void show_stack(struct task_struct *task, unsigned long *sp)
show_trace(task, sp);
}
-#ifdef CONFIG_64BIT
-void show_last_breaking_event(struct pt_regs *regs)
-{
- printk("Last Breaking-Event-Address:\n");
- printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN);
- print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN);
-}
-#endif
-
/*
* The architecture-independent dump_stack generator
*/
@@ -743,5 +739,6 @@ void __init trap_init(void)
pgm_check_table[0x15] = &operand_exception;
pgm_check_table[0x1C] = &space_switch_exception;
pgm_check_table[0x1D] = &hfp_sqrt_exception;
+ pgm_check_table[0x40] = &do_monitor_call;
pfault_irq_init();
}
diff --git a/trunk/arch/s390/lib/delay.c b/trunk/arch/s390/lib/delay.c
index eae21a8ac72d..70f2a862b670 100644
--- a/trunk/arch/s390/lib/delay.c
+++ b/trunk/arch/s390/lib/delay.c
@@ -34,7 +34,7 @@ void __delay(unsigned long loops)
*/
void __udelay(unsigned long usecs)
{
- u64 end, time, old_cc = 0;
+ u64 end, time, jiffy_timer = 0;
unsigned long flags, cr0, mask, dummy;
int irq_context;
@@ -43,8 +43,8 @@ void __udelay(unsigned long usecs)
local_bh_disable();
local_irq_save(flags);
if (raw_irqs_disabled_flags(flags)) {
- old_cc = S390_lowcore.clock_comparator;
- S390_lowcore.clock_comparator = -1ULL;
+ jiffy_timer = S390_lowcore.jiffy_timer;
+ S390_lowcore.jiffy_timer = -1ULL - (4096 << 12);
__ctl_store(cr0, 0, 0);
dummy = (cr0 & 0xffff00e0) | 0x00000800;
__ctl_load(dummy , 0, 0);
@@ -55,8 +55,8 @@ void __udelay(unsigned long usecs)
end = get_clock() + ((u64) usecs << 12);
do {
- time = end < S390_lowcore.clock_comparator ?
- end : S390_lowcore.clock_comparator;
+ time = end < S390_lowcore.jiffy_timer ?
+ end : S390_lowcore.jiffy_timer;
set_clock_comparator(time);
trace_hardirqs_on();
__load_psw_mask(mask);
@@ -65,10 +65,10 @@ void __udelay(unsigned long usecs)
if (raw_irqs_disabled_flags(flags)) {
__ctl_load(cr0, 0, 0);
- S390_lowcore.clock_comparator = old_cc;
+ S390_lowcore.jiffy_timer = jiffy_timer;
}
if (!irq_context)
_local_bh_enable();
- set_clock_comparator(S390_lowcore.clock_comparator);
+ set_clock_comparator(S390_lowcore.jiffy_timer);
local_irq_restore(flags);
}
diff --git a/trunk/arch/s390/lib/uaccess_pt.c b/trunk/arch/s390/lib/uaccess_pt.c
index d66215b0fde9..5efdfe9f5e76 100644
--- a/trunk/arch/s390/lib/uaccess_pt.c
+++ b/trunk/arch/s390/lib/uaccess_pt.c
@@ -302,10 +302,6 @@ static size_t copy_in_user_pt(size_t n, void __user *to,
pte_t *pte_from, *pte_to;
int write_user;
- if (segment_eq(get_fs(), KERNEL_DS)) {
- memcpy((void __force *) to, (void __force *) from, n);
- return 0;
- }
done = 0;
retry:
spin_lock(&mm->page_table_lock);
@@ -365,10 +361,18 @@ static size_t copy_in_user_pt(size_t n, void __user *to,
: "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
"m" (*uaddr) : "cc" );
-static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
+int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
{
int oldval = 0, newval, ret;
+ spin_lock(¤t->mm->page_table_lock);
+ uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
+ if (!uaddr) {
+ spin_unlock(¤t->mm->page_table_lock);
+ return -EFAULT;
+ }
+ get_page(virt_to_page(uaddr));
+ spin_unlock(¤t->mm->page_table_lock);
switch (op) {
case FUTEX_OP_SET:
__futex_atomic_op("lr %2,%5\n",
@@ -393,17 +397,17 @@ static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
default:
ret = -ENOSYS;
}
- if (ret == 0)
- *old = oldval;
+ put_page(virt_to_page(uaddr));
+ *old = oldval;
return ret;
}
-int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
+int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
{
int ret;
- if (segment_eq(get_fs(), KERNEL_DS))
- return __futex_atomic_op_pt(op, uaddr, oparg, old);
+ if (!current->mm)
+ return -EFAULT;
spin_lock(¤t->mm->page_table_lock);
uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
if (!uaddr) {
@@ -412,40 +416,13 @@ int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
}
get_page(virt_to_page(uaddr));
spin_unlock(¤t->mm->page_table_lock);
- ret = __futex_atomic_op_pt(op, uaddr, oparg, old);
- put_page(virt_to_page(uaddr));
- return ret;
-}
-
-static int __futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
-{
- int ret;
-
- asm volatile("0: cs %1,%4,0(%5)\n"
- "1: lr %0,%1\n"
- "2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ asm volatile(" cs %1,%4,0(%5)\n"
+ "0: lr %0,%1\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
: "=d" (ret), "+d" (oldval), "=m" (*uaddr)
: "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
: "cc", "memory" );
- return ret;
-}
-
-int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
-{
- int ret;
-
- if (segment_eq(get_fs(), KERNEL_DS))
- return __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
- spin_lock(¤t->mm->page_table_lock);
- uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
- if (!uaddr) {
- spin_unlock(¤t->mm->page_table_lock);
- return -EFAULT;
- }
- get_page(virt_to_page(uaddr));
- spin_unlock(¤t->mm->page_table_lock);
- ret = __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
put_page(virt_to_page(uaddr));
return ret;
}
diff --git a/trunk/arch/s390/mm/extmem.c b/trunk/arch/s390/mm/extmem.c
index ed2af0a3303b..880b0ebf894b 100644
--- a/trunk/arch/s390/mm/extmem.c
+++ b/trunk/arch/s390/mm/extmem.c
@@ -289,8 +289,22 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
rc = add_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
- if (rc)
+ switch (rc) {
+ case 0:
+ break;
+ case -ENOSPC:
+ PRINT_WARN("segment_load: not loading segment %s - overlaps "
+ "storage/segment\n", name);
+ goto out_free;
+ case -ERANGE:
+ PRINT_WARN("segment_load: not loading segment %s - exceeds "
+ "kernel mapping range\n", name);
+ goto out_free;
+ default:
+ PRINT_WARN("segment_load: not loading segment %s (rc: %d)\n",
+ name, rc);
goto out_free;
+ }
seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL);
if (seg->res == NULL) {
@@ -568,59 +582,8 @@ segment_save(char *name)
mutex_unlock(&dcss_lock);
}
-/*
- * print appropriate error message for segment_load()/segment_type()
- * return code
- */
-void segment_warning(int rc, char *seg_name)
-{
- switch (rc) {
- case -ENOENT:
- PRINT_WARN("cannot load/query segment %s, "
- "does not exist\n", seg_name);
- break;
- case -ENOSYS:
- PRINT_WARN("cannot load/query segment %s, "
- "not running on VM\n", seg_name);
- break;
- case -EIO:
- PRINT_WARN("cannot load/query segment %s, "
- "hardware error\n", seg_name);
- break;
- case -ENOTSUPP:
- PRINT_WARN("cannot load/query segment %s, "
- "is a multi-part segment\n", seg_name);
- break;
- case -ENOSPC:
- PRINT_WARN("cannot load/query segment %s, "
- "overlaps with storage\n", seg_name);
- break;
- case -EBUSY:
- PRINT_WARN("cannot load/query segment %s, "
- "overlaps with already loaded dcss\n", seg_name);
- break;
- case -EPERM:
- PRINT_WARN("cannot load/query segment %s, "
- "already loaded in incompatible mode\n", seg_name);
- break;
- case -ENOMEM:
- PRINT_WARN("cannot load/query segment %s, "
- "out of memory\n", seg_name);
- break;
- case -ERANGE:
- PRINT_WARN("cannot load/query segment %s, "
- "exceeds kernel mapping range\n", seg_name);
- break;
- default:
- PRINT_WARN("cannot load/query segment %s, "
- "return value %i\n", seg_name, rc);
- break;
- }
-}
-
EXPORT_SYMBOL(segment_load);
EXPORT_SYMBOL(segment_unload);
EXPORT_SYMBOL(segment_save);
EXPORT_SYMBOL(segment_type);
EXPORT_SYMBOL(segment_modify_shared);
-EXPORT_SYMBOL(segment_warning);
diff --git a/trunk/arch/s390/mm/fault.c b/trunk/arch/s390/mm/fault.c
index 2650f46001d0..ed13d429a487 100644
--- a/trunk/arch/s390/mm/fault.c
+++ b/trunk/arch/s390/mm/fault.c
@@ -28,11 +28,11 @@
#include
#include
#include
+
#include
#include
#include
#include
-#include "../kernel/entry.h"
#ifndef CONFIG_64BIT
#define __FAIL_ADDR_MASK 0x7ffff000
@@ -50,6 +50,8 @@
extern int sysctl_userprocess_debug;
#endif
+extern void die(const char *,struct pt_regs *,long);
+
#ifdef CONFIG_KPROBES
static inline int notify_page_fault(struct pt_regs *regs, long err)
{
@@ -243,6 +245,11 @@ static void do_sigbus(struct pt_regs *regs, unsigned long error_code,
}
#ifdef CONFIG_S390_EXEC_PROTECT
+extern long sys_sigreturn(struct pt_regs *regs);
+extern long sys_rt_sigreturn(struct pt_regs *regs);
+extern long sys32_sigreturn(struct pt_regs *regs);
+extern long sys32_rt_sigreturn(struct pt_regs *regs);
+
static int signal_return(struct mm_struct *mm, struct pt_regs *regs,
unsigned long address, unsigned long error_code)
{
@@ -263,15 +270,15 @@ static int signal_return(struct mm_struct *mm, struct pt_regs *regs,
#ifdef CONFIG_COMPAT
compat = test_tsk_thread_flag(current, TIF_31BIT);
if (compat && instruction == 0x0a77)
- sys32_sigreturn();
+ sys32_sigreturn(regs);
else if (compat && instruction == 0x0aad)
- sys32_rt_sigreturn();
+ sys32_rt_sigreturn(regs);
else
#endif
if (instruction == 0x0a77)
- sys_sigreturn();
+ sys_sigreturn(regs);
else if (instruction == 0x0aad)
- sys_rt_sigreturn();
+ sys_rt_sigreturn(regs);
else {
current->thread.prot_addr = address;
current->thread.trap_no = error_code;
@@ -417,7 +424,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write)
}
void __kprobes do_protection_exception(struct pt_regs *regs,
- long error_code)
+ unsigned long error_code)
{
/* Protection exception is supressing, decrement psw address. */
regs->psw.addr -= (error_code >> 16);
@@ -433,7 +440,7 @@ void __kprobes do_protection_exception(struct pt_regs *regs,
do_exception(regs, 4, 1);
}
-void __kprobes do_dat_exception(struct pt_regs *regs, long error_code)
+void __kprobes do_dat_exception(struct pt_regs *regs, unsigned long error_code)
{
do_exception(regs, error_code & 0xff, 0);
}
diff --git a/trunk/arch/s390/mm/init.c b/trunk/arch/s390/mm/init.c
index 202c952a29b4..8053245fe259 100644
--- a/trunk/arch/s390/mm/init.c
+++ b/trunk/arch/s390/mm/init.c
@@ -50,6 +50,7 @@ void show_mem(void)
printk("Mem-info:\n");
show_free_areas();
+ printk("Free swap: %6ldkB\n", nr_swap_pages << (PAGE_SHIFT - 10));
i = max_mapnr;
while (i-- > 0) {
if (!pfn_valid(i))
diff --git a/trunk/arch/sh/kernel/Makefile_32 b/trunk/arch/sh/kernel/Makefile_32
index 4bbdce36b92b..62bf373266f7 100644
--- a/trunk/arch/sh/kernel/Makefile_32
+++ b/trunk/arch/sh/kernel/Makefile_32
@@ -5,7 +5,7 @@
extra-y := head_32.o init_task.o vmlinux.lds
obj-y := debugtraps.o io.o io_generic.o irq.o machvec.o process_32.o \
- ptrace_32.o setup.o signal_32.o sys_sh.o sys_sh32.o \
+ ptrace_32.o semaphore.o setup.o signal_32.o sys_sh.o sys_sh32.o \
syscalls_32.o time_32.o topology.o traps.o traps_32.o
obj-y += cpu/ timers/
diff --git a/trunk/arch/sh/kernel/Makefile_64 b/trunk/arch/sh/kernel/Makefile_64
index 6edf53b93d94..e01283d49cbf 100644
--- a/trunk/arch/sh/kernel/Makefile_64
+++ b/trunk/arch/sh/kernel/Makefile_64
@@ -1,7 +1,7 @@
extra-y := head_64.o init_task.o vmlinux.lds
obj-y := debugtraps.o io.o io_generic.o irq.o machvec.o process_64.o \
- ptrace_64.o setup.o signal_64.o sys_sh.o sys_sh64.o \
+ ptrace_64.o semaphore.o setup.o signal_64.o sys_sh.o sys_sh64.o \
syscalls_64.o time_64.o topology.o traps.o traps_64.o
obj-y += cpu/ timers/
diff --git a/trunk/arch/sh/kernel/semaphore.c b/trunk/arch/sh/kernel/semaphore.c
new file mode 100644
index 000000000000..184119eeae56
--- /dev/null
+++ b/trunk/arch/sh/kernel/semaphore.c
@@ -0,0 +1,139 @@
+/*
+ * Just taken from alpha implementation.
+ * This can't work well, perhaps.
+ */
+/*
+ * Generic semaphore code. Buyer beware. Do your own
+ * specific changes in
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+DEFINE_SPINLOCK(semaphore_wake_lock);
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to sleep, while the "waking" variable is
+ * incremented when the "up()" code goes to wake up waiting
+ * processes.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * waking_non_zero() (from asm/semaphore.h) must execute
+ * atomically.
+ *
+ * When __up() is called, the count was negative before
+ * incrementing it, and we need to wake up somebody.
+ *
+ * This routine adds one to the count of processes that need to
+ * wake up and exit. ALL waiting processes actually wake up but
+ * only the one that gets to the "waking" field first will gate
+ * through and acquire the semaphore. The others will go back
+ * to sleep.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+void __up(struct semaphore *sem)
+{
+ wake_one_more(sem);
+ wake_up(&sem->wait);
+}
+
+/*
+ * Perform the "down" function. Return zero for semaphore acquired,
+ * return negative for signalled out of the function.
+ *
+ * If called from __down, the return is ignored and the wait loop is
+ * not interruptible. This means that a task waiting on a semaphore
+ * using "down()" cannot be killed until someone does an "up()" on
+ * the semaphore.
+ *
+ * If called from __down_interruptible, the return value gets checked
+ * upon return. If the return value is negative then the task continues
+ * with the negative value in the return register (it can be tested by
+ * the caller).
+ *
+ * Either form may be used in conjunction with "up()".
+ *
+ */
+
+#define DOWN_VAR \
+ struct task_struct *tsk = current; \
+ wait_queue_t wait; \
+ init_waitqueue_entry(&wait, tsk);
+
+#define DOWN_HEAD(task_state) \
+ \
+ \
+ tsk->state = (task_state); \
+ add_wait_queue(&sem->wait, &wait); \
+ \
+ /* \
+ * Ok, we're set up. sem->count is known to be less than zero \
+ * so we must wait. \
+ * \
+ * We can let go the lock for purposes of waiting. \
+ * We re-acquire it after awaking so as to protect \
+ * all semaphore operations. \
+ * \
+ * If "up()" is called before we call waking_non_zero() then \
+ * we will catch it right away. If it is called later then \
+ * we will have to go through a wakeup cycle to catch it. \
+ * \
+ * Multiple waiters contend for the semaphore lock to see \
+ * who gets to gate through and who has to wait some more. \
+ */ \
+ for (;;) {
+
+#define DOWN_TAIL(task_state) \
+ tsk->state = (task_state); \
+ } \
+ tsk->state = TASK_RUNNING; \
+ remove_wait_queue(&sem->wait, &wait);
+
+void __sched __down(struct semaphore * sem)
+{
+ DOWN_VAR
+ DOWN_HEAD(TASK_UNINTERRUPTIBLE)
+ if (waking_non_zero(sem))
+ break;
+ schedule();
+ DOWN_TAIL(TASK_UNINTERRUPTIBLE)
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ int ret = 0;
+ DOWN_VAR
+ DOWN_HEAD(TASK_INTERRUPTIBLE)
+
+ ret = waking_non_zero_interruptible(sem, tsk);
+ if (ret)
+ {
+ if (ret == 1)
+ /* ret != 0 only if we get interrupted -arca */
+ ret = 0;
+ break;
+ }
+ schedule();
+ DOWN_TAIL(TASK_INTERRUPTIBLE)
+ return ret;
+}
+
+int __down_trylock(struct semaphore * sem)
+{
+ return waking_non_zero_trylock(sem);
+}
diff --git a/trunk/arch/sh/kernel/sh_ksyms_32.c b/trunk/arch/sh/kernel/sh_ksyms_32.c
index 6d405462cee8..45bb333fd9ec 100644
--- a/trunk/arch/sh/kernel/sh_ksyms_32.c
+++ b/trunk/arch/sh/kernel/sh_ksyms_32.c
@@ -9,6 +9,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -47,6 +48,12 @@ EXPORT_SYMBOL(__copy_user);
EXPORT_SYMBOL(get_vm_area);
#endif
+/* semaphore exports */
+EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_interruptible);
+EXPORT_SYMBOL(__down_trylock);
+
EXPORT_SYMBOL(__udelay);
EXPORT_SYMBOL(__ndelay);
EXPORT_SYMBOL(__const_udelay);
diff --git a/trunk/arch/sh/kernel/sh_ksyms_64.c b/trunk/arch/sh/kernel/sh_ksyms_64.c
index a310c9707f03..b6410ce4bd1d 100644
--- a/trunk/arch/sh/kernel/sh_ksyms_64.c
+++ b/trunk/arch/sh/kernel/sh_ksyms_64.c
@@ -16,6 +16,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -36,6 +37,9 @@ EXPORT_SYMBOL(csum_partial_copy_nocheck);
EXPORT_SYMBOL(screen_info);
#endif
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_trylock);
+EXPORT_SYMBOL(__up);
EXPORT_SYMBOL(__put_user_asm_l);
EXPORT_SYMBOL(__get_user_asm_l);
EXPORT_SYMBOL(copy_page);
diff --git a/trunk/arch/sparc/kernel/Makefile b/trunk/arch/sparc/kernel/Makefile
index 2712bb166f6f..bf1b15d3f6f5 100644
--- a/trunk/arch/sparc/kernel/Makefile
+++ b/trunk/arch/sparc/kernel/Makefile
@@ -12,7 +12,7 @@ obj-y := entry.o wof.o wuf.o etrap.o rtrap.o traps.o $(IRQ_OBJS) \
sys_sparc.o sunos_asm.o systbls.o \
time.o windows.o cpu.o devices.o sclow.o \
tadpole.o tick14.o ptrace.o sys_solaris.o \
- unaligned.o una_asm.o muldiv.o \
+ unaligned.o una_asm.o muldiv.o semaphore.o \
prom.o of_device.o devres.o
devres-y = ../../../kernel/irq/devres.o
diff --git a/trunk/arch/sparc/kernel/semaphore.c b/trunk/arch/sparc/kernel/semaphore.c
new file mode 100644
index 000000000000..0c37c1a7cd7e
--- /dev/null
+++ b/trunk/arch/sparc/kernel/semaphore.c
@@ -0,0 +1,155 @@
+/* $Id: semaphore.c,v 1.7 2001/04/18 21:06:05 davem Exp $ */
+
+/* sparc32 semaphore implementation, based on i386 version */
+
+#include
+#include
+#include
+
+#include
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is
+ * protected by the semaphore spinlock.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ * - only on a boundary condition do we need to care. When we go
+ * from a negative count to a non-negative, we wake people up.
+ * - when we go from a non-negative count to a negative do we
+ * (a) synchronize with the "sleeper" count and (b) make sure
+ * that we're on the wakeup list before we synchronize so that
+ * we cannot lose wakeup events.
+ */
+
+void __up(struct semaphore *sem)
+{
+ wake_up(&sem->wait);
+}
+
+static DEFINE_SPINLOCK(semaphore_lock);
+
+void __sched __down(struct semaphore * sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ spin_lock_irq(&semaphore_lock);
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock.
+ */
+ if (!atomic24_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irq(&semaphore_lock);
+
+ schedule();
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ spin_lock_irq(&semaphore_lock);
+ }
+ spin_unlock_irq(&semaphore_lock);
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+ wake_up(&sem->wait);
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ tsk->state = TASK_INTERRUPTIBLE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ spin_lock_irq(&semaphore_lock);
+ sem->sleepers ++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * With signals pending, this turns into
+ * the trylock failure case - we won't be
+ * sleeping, and we* can't get the lock as
+ * it has contention. Just correct the count
+ * and exit.
+ */
+ if (signal_pending(current)) {
+ retval = -EINTR;
+ sem->sleepers = 0;
+ atomic24_add(sleepers, &sem->count);
+ break;
+ }
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock. The
+ * "-1" is because we're still hoping to get
+ * the lock.
+ */
+ if (!atomic24_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irq(&semaphore_lock);
+
+ schedule();
+ tsk->state = TASK_INTERRUPTIBLE;
+ spin_lock_irq(&semaphore_lock);
+ }
+ spin_unlock_irq(&semaphore_lock);
+ tsk->state = TASK_RUNNING;
+ remove_wait_queue(&sem->wait, &wait);
+ wake_up(&sem->wait);
+ return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for
+ * having decremented the count.
+ */
+int __down_trylock(struct semaphore * sem)
+{
+ int sleepers;
+ unsigned long flags;
+
+ spin_lock_irqsave(&semaphore_lock, flags);
+ sleepers = sem->sleepers + 1;
+ sem->sleepers = 0;
+
+ /*
+ * Add "everybody else" and us into it. They aren't
+ * playing, because we own the spinlock.
+ */
+ if (!atomic24_add_negative(sleepers, &sem->count))
+ wake_up(&sem->wait);
+
+ spin_unlock_irqrestore(&semaphore_lock, flags);
+ return 1;
+}
diff --git a/trunk/arch/sparc/kernel/sparc_ksyms.c b/trunk/arch/sparc/kernel/sparc_ksyms.c
index 97b1de0e9094..c1025e551650 100644
--- a/trunk/arch/sparc/kernel/sparc_ksyms.c
+++ b/trunk/arch/sparc/kernel/sparc_ksyms.c
@@ -107,6 +107,11 @@ EXPORT_SYMBOL(___rw_read_try);
EXPORT_SYMBOL(___rw_read_exit);
EXPORT_SYMBOL(___rw_write_enter);
#endif
+/* semaphores */
+EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_trylock);
+EXPORT_SYMBOL(__down_interruptible);
EXPORT_SYMBOL(sparc_valid_addr_bitmap);
EXPORT_SYMBOL(phys_base);
diff --git a/trunk/arch/sparc64/kernel/Makefile b/trunk/arch/sparc64/kernel/Makefile
index 459462e80a12..1bf5b187de49 100644
--- a/trunk/arch/sparc64/kernel/Makefile
+++ b/trunk/arch/sparc64/kernel/Makefile
@@ -10,7 +10,7 @@ extra-y := head.o init_task.o vmlinux.lds
obj-y := process.o setup.o cpu.o idprom.o \
traps.o auxio.o una_asm.o sysfs.o iommu.o \
irq.o ptrace.o time.o sys_sparc.o signal.o \
- unaligned.o central.o pci.o starfire.o \
+ unaligned.o central.o pci.o starfire.o semaphore.o \
power.o sbus.o sparc64_ksyms.o chmc.o \
visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o
diff --git a/trunk/arch/sparc64/kernel/semaphore.c b/trunk/arch/sparc64/kernel/semaphore.c
new file mode 100644
index 000000000000..9974a6899551
--- /dev/null
+++ b/trunk/arch/sparc64/kernel/semaphore.c
@@ -0,0 +1,254 @@
+/* semaphore.c: Sparc64 semaphore implementation.
+ *
+ * This is basically the PPC semaphore scheme ported to use
+ * the sparc64 atomic instructions, so see the PPC code for
+ * credits.
+ */
+
+#include