From 995ccf93a07733ca425a9e440e9c4ccaca177846 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 12 Jun 2009 08:49:20 +0100
Subject: [PATCH] --- yaml --- r: 148006 b: refs/heads/master c:
 63997775b795f97ef51f3e56bc3abc9edc04bbb0 h: refs/heads/master v: v3

---
 [refs]                                        |    2 +-
 trunk/Documentation/ide/ide.txt               |    2 -
 trunk/Documentation/kernel-parameters.txt     |    7 +-
 trunk/Documentation/lguest/Makefile           |    3 +-
 trunk/Documentation/lguest/lguest.c           | 1008 +++++++++++------
 trunk/Documentation/lguest/lguest.txt         |    1 +
 trunk/arch/alpha/mm/extable.c                 |   21 -
 trunk/arch/avr32/kernel/module.c              |    2 +
 trunk/arch/cris/kernel/module.c               |    2 +
 trunk/arch/frv/kernel/module.c                |    2 +
 trunk/arch/h8300/kernel/module.c              |    2 +
 trunk/arch/ia64/mm/extable.c                  |   26 -
 trunk/arch/m32r/kernel/module.c               |    2 +
 trunk/arch/m68k/kernel/module.c               |    2 +
 trunk/arch/m68knommu/kernel/module.c          |    2 +
 trunk/arch/mips/kernel/module.c               |    2 +
 trunk/arch/mn10300/kernel/module.c            |    2 +
 trunk/arch/parisc/kernel/module.c             |    2 +
 trunk/arch/powerpc/kernel/module.c            |    2 +
 trunk/arch/s390/kernel/module.c               |    2 +
 trunk/arch/sh/kernel/module.c                 |    2 +
 trunk/arch/sparc/include/asm/uaccess_32.h     |    3 -
 trunk/arch/sparc/kernel/module.c              |    2 +
 trunk/arch/sparc/mm/extable.c                 |   29 -
 trunk/arch/um/include/asm/pgtable.h           |    7 +-
 trunk/arch/um/sys-i386/Makefile               |    2 +-
 trunk/arch/um/sys-x86_64/Makefile             |    4 +-
 trunk/arch/um/sys-x86_64/um_module.c          |   21 +
 trunk/arch/x86/include/asm/lguest.h           |    7 +-
 trunk/arch/x86/include/asm/lguest_hcall.h     |   15 +-
 trunk/arch/x86/include/asm/pgtable_32_types.h |    4 -
 trunk/arch/x86/kernel/Makefile                |    2 +-
 trunk/arch/x86/kernel/asm-offsets_32.c        |    1 -
 trunk/arch/x86/kernel/module_32.c             |  152 +++
 .../arch/x86/kernel/{module.c => module_64.c} |   82 +-
 trunk/arch/x86/kernel/setup.c                 |   15 +-
 trunk/arch/x86/kernel/vmlinux.lds.S           |    2 -
 trunk/arch/x86/lguest/Kconfig                 |    1 +
 trunk/arch/x86/lguest/boot.c                  |  158 +--
 trunk/arch/x86/lguest/i386_head.S             |   60 +-
 trunk/arch/xtensa/kernel/module.c             |    2 +
 trunk/drivers/block/virtio_blk.c              |   10 +-
 trunk/drivers/char/hw_random/virtio-rng.c     |   30 +-
 trunk/drivers/char/virtio_console.c           |   26 +-
 trunk/drivers/ide/at91_ide.c                  |    7 +-
 trunk/drivers/ide/au1xxx-ide.c                |    8 +-
 trunk/drivers/ide/buddha.c                    |    9 +-
 trunk/drivers/ide/cmd640.c                    |    7 +-
 trunk/drivers/ide/cs5520.c                    |    4 +-
 trunk/drivers/ide/delkin_cb.c                 |    6 +-
 trunk/drivers/ide/falconide.c                 |    9 +-
 trunk/drivers/ide/gayle.c                     |    9 +-
 trunk/drivers/ide/hpt366.c                    |   25 +-
 trunk/drivers/ide/icside.c                    |   77 +-
 trunk/drivers/ide/ide-4drives.c               |    6 +-
 trunk/drivers/ide/ide-atapi.c                 |    2 +-
 trunk/drivers/ide/ide-cs.c                    |    6 +-
 trunk/drivers/ide/ide-disk.c                  |   75 +-
 trunk/drivers/ide/ide-dma.c                   |    1 +
 trunk/drivers/ide/ide-eh.c                    |   14 +-
 trunk/drivers/ide/ide-gd.c                    |   14 -
 trunk/drivers/ide/ide-generic.c               |    7 +-
 trunk/drivers/ide/ide-h8300.c                 |   10 +-
 trunk/drivers/ide/ide-io.c                    |   77 +-
 trunk/drivers/ide/ide-iops.c                  |   26 +-
 trunk/drivers/ide/ide-legacy.c                |    7 +-
 trunk/drivers/ide/ide-pnp.c                   |    6 +-
 trunk/drivers/ide/ide-probe.c                 |   95 +-
 trunk/drivers/ide/ide-tape.c                  |   90 +-
 trunk/drivers/ide/ide-taskfile.c              |    3 +-
 trunk/drivers/ide/ide.c                       |   10 -
 trunk/drivers/ide/ide_platform.c              |    9 +-
 trunk/drivers/ide/macide.c                    |    9 +-
 trunk/drivers/ide/palm_bk3710.c               |    6 +-
 trunk/drivers/ide/pdc202xx_new.c              |   26 +
 trunk/drivers/ide/pdc202xx_old.c              |   92 +-
 trunk/drivers/ide/pmac.c                      |   13 +-
 trunk/drivers/ide/q40ide.c                    |   11 +-
 trunk/drivers/ide/rapide.c                    |    8 +-
 trunk/drivers/ide/scc_pata.c                  |    6 +-
 trunk/drivers/ide/setup-pci.c                 |   85 +-
 trunk/drivers/ide/sgiioc4.c                   |    7 +-
 trunk/drivers/ide/siimage.c                   |    4 +-
 trunk/drivers/ide/sl82c105.c                  |    9 +-
 trunk/drivers/ide/tx4938ide.c                 |    5 +-
 trunk/drivers/ide/tx4939ide.c                 |    5 +-
 trunk/drivers/lguest/Kconfig                  |    2 +-
 trunk/drivers/lguest/core.c                   |   30 +-
 trunk/drivers/lguest/hypercalls.c             |   14 -
 trunk/drivers/lguest/interrupts_and_traps.c   |   57 +-
 trunk/drivers/lguest/lg.h                     |   28 +-
 trunk/drivers/lguest/lguest_device.c          |   41 +-
 trunk/drivers/lguest/lguest_user.c            |  127 +--
 trunk/drivers/lguest/page_tables.c            |  396 +------
 trunk/drivers/lguest/segments.c               |    2 +-
 trunk/drivers/net/virtio_net.c                |   45 +-
 trunk/drivers/s390/kvm/kvm_virtio.c           |   43 +-
 trunk/drivers/video/aty/aty128fb.c            |    2 +-
 trunk/drivers/video/cyber2000fb.c             |    9 +-
 trunk/drivers/video/uvesafb.c                 |   10 +-
 trunk/drivers/virtio/virtio.c                 |   29 +-
 trunk/drivers/virtio/virtio_balloon.c         |   27 +-
 trunk/drivers/virtio/virtio_pci.c             |  307 +----
 trunk/drivers/virtio/virtio_ring.c            |  102 +-
 trunk/fs/Kconfig                              |   10 -
 trunk/fs/eventfd.c                            |    3 -
 trunk/fs/fuse/Makefile                        |    1 -
 trunk/fs/fuse/cuse.c                          |  610 ----------
 trunk/fs/fuse/dev.c                           |   15 +-
 trunk/fs/fuse/dir.c                           |   33 +-
 trunk/fs/fuse/file.c                          |  346 +++---
 trunk/fs/fuse/fuse_i.h                        |   47 +-
 trunk/fs/fuse/inode.c                         |  118 +-
 trunk/fs/gfs2/Makefile                        |    1 +
 trunk/fs/gfs2/bmap.c                          |    3 +
 trunk/fs/gfs2/glock.c                         |   12 +-
 trunk/fs/gfs2/log.c                           |    9 +-
 trunk/fs/gfs2/lops.c                          |    3 +
 trunk/fs/gfs2/ops_fstype.c                    |    2 +
 trunk/fs/gfs2/rgrp.c                          |   11 +-
 trunk/fs/gfs2/trace_gfs2.h                    |  407 +++++++
 trunk/fs/partitions/check.c                   |   42 +-
 trunk/include/linux/blkdev.h                  |    2 -
 trunk/include/linux/compiler.h                |    5 -
 trunk/include/linux/fuse.h                    |   31 -
 trunk/include/linux/genhd.h                   |    1 -
 trunk/include/linux/ide.h                     |   46 +-
 trunk/include/linux/lguest.h                  |    4 -
 trunk/include/linux/lguest_launcher.h         |    3 +-
 trunk/include/linux/module.h                  |    1 -
 trunk/include/linux/moduleparam.h             |   40 +-
 trunk/include/linux/virtio.h                  |   15 +-
 trunk/include/linux/virtio_config.h           |   49 +-
 trunk/include/linux/virtio_pci.h              |   10 +-
 trunk/include/linux/virtio_ring.h             |    8 +-
 trunk/kernel/module.c                         |    1 -
 trunk/kernel/params.c                         |   46 +-
 trunk/kernel/sched.c                          |    1 -
 trunk/lib/extable.c                           |   21 +-
 trunk/net/9p/trans_virtio.c                   |    6 +-
 trunk/scripts/mod/file2alias.c                |    2 +-
 141 files changed, 2487 insertions(+), 3385 deletions(-)
 create mode 100644 trunk/arch/um/sys-x86_64/um_module.c
 create mode 100644 trunk/arch/x86/kernel/module_32.c
 rename trunk/arch/x86/kernel/{module.c => module_64.c} (74%)
 delete mode 100644 trunk/fs/fuse/cuse.c
 create mode 100644 trunk/fs/gfs2/trace_gfs2.h

diff --git a/[refs] b/[refs]
index ae71335b4800..b48e1cc0adaf 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 7f3591cfacf2d79c4f42238e46c7d053da8e020d
+refs/heads/master: 63997775b795f97ef51f3e56bc3abc9edc04bbb0
diff --git a/trunk/Documentation/ide/ide.txt b/trunk/Documentation/ide/ide.txt
index e77bebfa7b0d..0c78f4b1d9d9 100644
--- a/trunk/Documentation/ide/ide.txt
+++ b/trunk/Documentation/ide/ide.txt
@@ -216,8 +216,6 @@ Other kernel parameters for ide_core are:
 
 * "noflush=[interface_number.device_number]" to disable flush requests
 
-* "nohpa=[interface_number.device_number]" to disable Host Protected Area
-
 * "noprobe=[interface_number.device_number]" to skip probing
 
 * "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit
diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt
index 0bf8a882ee9e..7bcdebffdab3 100644
--- a/trunk/Documentation/kernel-parameters.txt
+++ b/trunk/Documentation/kernel-parameters.txt
@@ -887,8 +887,11 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	ide-core.nodma=	[HW] (E)IDE subsystem
 			Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc
-			.vlb_clock .pci_clock .noflush .nohpa .noprobe .nowerr
-			.cdrom .chs .ignore_cable are additional options
+			.vlb_clock .pci_clock .noflush .noprobe .nowerr .cdrom
+			.chs .ignore_cable are additional options
+			See Documentation/ide/ide.txt.
+
+	idebus=		[HW] (E)IDE subsystem - VLB/PCI bus speed
 			See Documentation/ide/ide.txt.
 
 	ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
diff --git a/trunk/Documentation/lguest/Makefile b/trunk/Documentation/lguest/Makefile
index 28c8cdfcafd8..1f4f9e888bd1 100644
--- a/trunk/Documentation/lguest/Makefile
+++ b/trunk/Documentation/lguest/Makefile
@@ -1,5 +1,6 @@
 # This creates the demonstration utility "lguest" which runs a Linux guest.
-CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include -U_FORTIFY_SOURCE
+CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include -U_FORTIFY_SOURCE
+LDLIBS:=-lz
 
 all: lguest
 
diff --git a/trunk/Documentation/lguest/lguest.c b/trunk/Documentation/lguest/lguest.c
index 9ebcd6ef361b..d36fcc0f2715 100644
--- a/trunk/Documentation/lguest/lguest.c
+++ b/trunk/Documentation/lguest/lguest.c
@@ -16,7 +16,6 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
-#include <sys/eventfd.h>
 #include <fcntl.h>
 #include <stdbool.h>
 #include <errno.h>
@@ -60,6 +59,7 @@ typedef uint8_t u8;
 /*:*/
 
 #define PAGE_PRESENT 0x7 	/* Present, RW, Execute */
+#define NET_PEERNUM 1
 #define BRIDGE_PFX "bridge:"
 #ifndef SIOCBRADDIF
 #define SIOCBRADDIF	0x89a2		/* add interface to bridge      */
@@ -76,12 +76,19 @@ static bool verbose;
 	do { if (verbose) printf(args); } while(0)
 /*:*/
 
+/* File descriptors for the Waker. */
+struct {
+	int pipe[2];
+	int lguest_fd;
+} waker_fds;
+
 /* The pointer to the start of guest memory. */
 static void *guest_base;
 /* The maximum guest physical address allowed, and maximum possible. */
 static unsigned long guest_limit, guest_max;
-/* The /dev/lguest file descriptor. */
-static int lguest_fd;
+/* The pipe for signal hander to write to. */
+static int timeoutpipe[2];
+static unsigned int timeout_usec = 500;
 
 /* a per-cpu variable indicating whose vcpu is currently running */
 static unsigned int __thread cpu_id;
@@ -89,6 +96,11 @@ static unsigned int __thread cpu_id;
 /* This is our list of devices. */
 struct device_list
 {
+	/* Summary information about the devices in our list: ready to pass to
+	 * select() to ask which need servicing.*/
+	fd_set infds;
+	int max_infd;
+
 	/* Counter to assign interrupt numbers. */
 	unsigned int next_irq;
 
@@ -114,21 +126,22 @@ struct device
 	/* The linked-list pointer. */
 	struct device *next;
 
-	/* The device's descriptor, as mapped into the Guest. */
+	/* The this device's descriptor, as mapped into the Guest. */
 	struct lguest_device_desc *desc;
 
-	/* We can't trust desc values once Guest has booted: we use these. */
-	unsigned int feature_len;
-	unsigned int num_vq;
-
 	/* The name of this device, for --verbose. */
 	const char *name;
 
+	/* If handle_input is set, it wants to be called when this file
+	 * descriptor is ready. */
+	int fd;
+	bool (*handle_input)(int fd, struct device *me);
+
 	/* Any queues attached to this device */
 	struct virtqueue *vq;
 
-	/* Is it operational */
-	bool running;
+	/* Handle status being finalized (ie. feature bits stable). */
+	void (*ready)(struct device *me);
 
 	/* Device-specific data. */
 	void *priv;
@@ -151,28 +164,22 @@ struct virtqueue
 	/* Last available index we saw. */
 	u16 last_avail_idx;
 
-	/* How many are used since we sent last irq? */
-	unsigned int pending_used;
+	/* The routine to call when the Guest pings us, or timeout. */
+	void (*handle_output)(int fd, struct virtqueue *me, bool timeout);
 
-	/* Eventfd where Guest notifications arrive. */
-	int eventfd;
+	/* Outstanding buffers */
+	unsigned int inflight;
 
-	/* Function for the thread which is servicing this virtqueue. */
-	void (*service)(struct virtqueue *vq);
-	pid_t thread;
+	/* Is this blocked awaiting a timer? */
+	bool blocked;
 };
 
 /* Remember the arguments to the program so we can "reboot" */
 static char **main_args;
 
-/* The original tty settings to restore on exit. */
-static struct termios orig_term;
-
-/* We have to be careful with barriers: our devices are all run in separate
- * threads and so we need to make sure that changes visible to the Guest happen
- * in precise order. */
-#define wmb() __asm__ __volatile__("" : : : "memory")
-#define mb() __asm__ __volatile__("" : : : "memory")
+/* Since guest is UP and we don't run at the same time, we don't need barriers.
+ * But I include them in the code in case others copy it. */
+#define wmb()
 
 /* Convert an iovec element to the given type.
  *
@@ -238,7 +245,7 @@ static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len)
 static u8 *get_feature_bits(struct device *dev)
 {
 	return (u8 *)(dev->desc + 1)
-		+ dev->num_vq * sizeof(struct lguest_vqconfig);
+		+ dev->desc->num_vq * sizeof(struct lguest_vqconfig);
 }
 
 /*L:100 The Launcher code itself takes us out into userspace, that scary place
@@ -498,19 +505,99 @@ static void concat(char *dst, char *args[])
  * saw the arguments it expects when we looked at initialize() in lguest_user.c:
  * the base of Guest "physical" memory, the top physical page to allow and the
  * entry point for the Guest. */
-static void tell_kernel(unsigned long start)
+static int tell_kernel(unsigned long start)
 {
 	unsigned long args[] = { LHREQ_INITIALIZE,
 				 (unsigned long)guest_base,
 				 guest_limit / getpagesize(), start };
+	int fd;
+
 	verbose("Guest: %p - %p (%#lx)\n",
 		guest_base, guest_base + guest_limit, guest_limit);
-	lguest_fd = open_or_die("/dev/lguest", O_RDWR);
-	if (write(lguest_fd, args, sizeof(args)) < 0)
+	fd = open_or_die("/dev/lguest", O_RDWR);
+	if (write(fd, args, sizeof(args)) < 0)
 		err(1, "Writing to /dev/lguest");
+
+	/* We return the /dev/lguest file descriptor to control this Guest */
+	return fd;
 }
 /*:*/
 
+static void add_device_fd(int fd)
+{
+	FD_SET(fd, &devices.infds);
+	if (fd > devices.max_infd)
+		devices.max_infd = fd;
+}
+
+/*L:200
+ * The Waker.
+ *
+ * With console, block and network devices, we can have lots of input which we
+ * need to process.  We could try to tell the kernel what file descriptors to
+ * watch, but handing a file descriptor mask through to the kernel is fairly
+ * icky.
+ *
+ * Instead, we clone off a thread which watches the file descriptors and writes
+ * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host
+ * stop running the Guest.  This causes the Launcher to return from the
+ * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset
+ * the LHREQ_BREAK and wake us up again.
+ *
+ * This, of course, is merely a different *kind* of icky.
+ *
+ * Given my well-known antipathy to threads, I'd prefer to use processes.  But
+ * it's easier to share Guest memory with threads, and trivial to share the
+ * devices.infds as the Launcher changes it.
+ */
+static int waker(void *unused)
+{
+	/* Close the write end of the pipe: only the Launcher has it open. */
+	close(waker_fds.pipe[1]);
+
+	for (;;) {
+		fd_set rfds = devices.infds;
+		unsigned long args[] = { LHREQ_BREAK, 1 };
+		unsigned int maxfd = devices.max_infd;
+
+		/* We also listen to the pipe from the Launcher. */
+		FD_SET(waker_fds.pipe[0], &rfds);
+		if (waker_fds.pipe[0] > maxfd)
+			maxfd = waker_fds.pipe[0];
+
+		/* Wait until input is ready from one of the devices. */
+		select(maxfd+1, &rfds, NULL, NULL, NULL);
+
+		/* Message from Launcher? */
+		if (FD_ISSET(waker_fds.pipe[0], &rfds)) {
+			char c;
+			/* If this fails, then assume Launcher has exited.
+			 * Don't do anything on exit: we're just a thread! */
+			if (read(waker_fds.pipe[0], &c, 1) != 1)
+				_exit(0);
+			continue;
+		}
+
+		/* Send LHREQ_BREAK command to snap the Launcher out of it. */
+		pwrite(waker_fds.lguest_fd, args, sizeof(args), cpu_id);
+	}
+	return 0;
+}
+
+/* This routine just sets up a pipe to the Waker process. */
+static void setup_waker(int lguest_fd)
+{
+	/* This pipe is closed when Launcher dies, telling Waker. */
+	if (pipe(waker_fds.pipe) != 0)
+		err(1, "Creating pipe for Waker");
+
+	/* Waker also needs to know the lguest fd */
+	waker_fds.lguest_fd = lguest_fd;
+
+	if (clone(waker, malloc(4096) + 4096, CLONE_VM | SIGCHLD, NULL) == -1)
+		err(1, "Creating Waker");
+}
+
 /*
  * Device Handling.
  *
@@ -536,90 +623,49 @@ static void *_check_pointer(unsigned long addr, unsigned int size,
 /* Each buffer in the virtqueues is actually a chain of descriptors.  This
  * function returns the next descriptor in the chain, or vq->vring.num if we're
  * at the end. */
-static unsigned next_desc(struct vring_desc *desc,
-			  unsigned int i, unsigned int max)
+static unsigned next_desc(struct virtqueue *vq, unsigned int i)
 {
 	unsigned int next;
 
 	/* If this descriptor says it doesn't chain, we're done. */
-	if (!(desc[i].flags & VRING_DESC_F_NEXT))
-		return max;
+	if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT))
+		return vq->vring.num;
 
 	/* Check they're not leading us off end of descriptors. */
-	next = desc[i].next;
+	next = vq->vring.desc[i].next;
 	/* Make sure compiler knows to grab that: we don't want it changing! */
 	wmb();
 
-	if (next >= max)
+	if (next >= vq->vring.num)
 		errx(1, "Desc next is %u", next);
 
 	return next;
 }
 
-/* This actually sends the interrupt for this virtqueue */
-static void trigger_irq(struct virtqueue *vq)
-{
-	unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
-
-	/* Don't inform them if nothing used. */
-	if (!vq->pending_used)
-		return;
-	vq->pending_used = 0;
-
-	/* If they don't want an interrupt, don't send one, unless empty. */
-	if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
-	    && lg_last_avail(vq) != vq->vring.avail->idx)
-		return;
-
-	/* Send the Guest an interrupt tell them we used something up. */
-	if (write(lguest_fd, buf, sizeof(buf)) != 0)
-		err(1, "Triggering irq %i", vq->config.irq);
-}
-
 /* This looks in the virtqueue and for the first available buffer, and converts
  * it to an iovec for convenient access.  Since descriptors consist of some
  * number of output then some number of input descriptors, it's actually two
  * iovecs, but we pack them into one and note how many of each there were.
  *
- * This function returns the descriptor number found. */
-static unsigned wait_for_vq_desc(struct virtqueue *vq,
-				 struct iovec iov[],
-				 unsigned int *out_num, unsigned int *in_num)
+ * This function returns the descriptor number found, or vq->vring.num (which
+ * is never a valid descriptor number) if none was found. */
+static unsigned get_vq_desc(struct virtqueue *vq,
+			    struct iovec iov[],
+			    unsigned int *out_num, unsigned int *in_num)
 {
-	unsigned int i, head, max;
-	struct vring_desc *desc;
-	u16 last_avail = lg_last_avail(vq);
-
-	while (last_avail == vq->vring.avail->idx) {
-		u64 event;
-
-		/* OK, tell Guest about progress up to now. */
-		trigger_irq(vq);
-
-		/* OK, now we need to know about added descriptors. */
-		vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
-
-		/* They could have slipped one in as we were doing that: make
-		 * sure it's written, then check again. */
-		mb();
-		if (last_avail != vq->vring.avail->idx) {
-			vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
-			break;
-		}
-
-		/* Nothing new?  Wait for eventfd to tell us they refilled. */
-		if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event))
-			errx(1, "Event read failed?");
-
-		/* We don't need to be notified again. */
-		vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
-	}
+	unsigned int i, head;
+	u16 last_avail;
 
 	/* Check it isn't doing very strange things with descriptor numbers. */
+	last_avail = lg_last_avail(vq);
 	if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num)
 		errx(1, "Guest moved used index from %u to %u",
 		     last_avail, vq->vring.avail->idx);
 
+	/* If there's nothing new since last we looked, return invalid. */
+	if (vq->vring.avail->idx == last_avail)
+		return vq->vring.num;
+
 	/* Grab the next descriptor number they're advertising, and increment
 	 * the index we've seen. */
 	head = vq->vring.avail->ring[last_avail % vq->vring.num];
@@ -632,28 +678,15 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq,
 	/* When we start there are none of either input nor output. */
 	*out_num = *in_num = 0;
 
-	max = vq->vring.num;
-	desc = vq->vring.desc;
 	i = head;
-
-	/* If this is an indirect entry, then this buffer contains a descriptor
-	 * table which we handle as if it's any normal descriptor chain. */
-	if (desc[i].flags & VRING_DESC_F_INDIRECT) {
-		if (desc[i].len % sizeof(struct vring_desc))
-			errx(1, "Invalid size for indirect buffer table");
-
-		max = desc[i].len / sizeof(struct vring_desc);
-		desc = check_pointer(desc[i].addr, desc[i].len);
-		i = 0;
-	}
-
 	do {
 		/* Grab the first descriptor, and check it's OK. */
-		iov[*out_num + *in_num].iov_len = desc[i].len;
+		iov[*out_num + *in_num].iov_len = vq->vring.desc[i].len;
 		iov[*out_num + *in_num].iov_base
-			= check_pointer(desc[i].addr, desc[i].len);
+			= check_pointer(vq->vring.desc[i].addr,
+					vq->vring.desc[i].len);
 		/* If this is an input descriptor, increment that count. */
-		if (desc[i].flags & VRING_DESC_F_WRITE)
+		if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE)
 			(*in_num)++;
 		else {
 			/* If it's an output descriptor, they're all supposed
@@ -664,10 +697,11 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq,
 		}
 
 		/* If we've got too many, that implies a descriptor loop. */
-		if (*out_num + *in_num > max)
+		if (*out_num + *in_num > vq->vring.num)
 			errx(1, "Looped descriptor");
-	} while ((i = next_desc(desc, i, max)) != max);
+	} while ((i = next_desc(vq, i)) != vq->vring.num);
 
+	vq->inflight++;
 	return head;
 }
 
@@ -685,20 +719,44 @@ static void add_used(struct virtqueue *vq, unsigned int head, int len)
 	/* Make sure buffer is written before we update index. */
 	wmb();
 	vq->vring.used->idx++;
-	vq->pending_used++;
+	vq->inflight--;
+}
+
+/* This actually sends the interrupt for this virtqueue */
+static void trigger_irq(int fd, struct virtqueue *vq)
+{
+	unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
+
+	/* If they don't want an interrupt, don't send one, unless empty. */
+	if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+	    && vq->inflight)
+		return;
+
+	/* Send the Guest an interrupt tell them we used something up. */
+	if (write(fd, buf, sizeof(buf)) != 0)
+		err(1, "Triggering irq %i", vq->config.irq);
 }
 
 /* And here's the combo meal deal.  Supersize me! */
-static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len)
+static void add_used_and_trigger(int fd, struct virtqueue *vq,
+				 unsigned int head, int len)
 {
 	add_used(vq, head, len);
-	trigger_irq(vq);
+	trigger_irq(fd, vq);
 }
 
 /*
  * The Console
  *
- * We associate some data with the console for our exit hack. */
+ * Here is the input terminal setting we save, and the routine to restore them
+ * on exit so the user gets their terminal back. */
+static struct termios orig_term;
+static void restore_term(void)
+{
+	tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
+}
+
+/* We associate some data with the console for our exit hack. */
 struct console_abort
 {
 	/* How many times have they hit ^C? */
@@ -708,275 +766,276 @@ struct console_abort
 };
 
 /* This is the routine which handles console input (ie. stdin). */
-static void console_input(struct virtqueue *vq)
+static bool handle_console_input(int fd, struct device *dev)
 {
 	int len;
 	unsigned int head, in_num, out_num;
-	struct console_abort *abort = vq->dev->priv;
-	struct iovec iov[vq->vring.num];
+	struct iovec iov[dev->vq->vring.num];
+	struct console_abort *abort = dev->priv;
+
+	/* First we need a console buffer from the Guests's input virtqueue. */
+	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
+
+	/* If they're not ready for input, stop listening to this file
+	 * descriptor.  We'll start again once they add an input buffer. */
+	if (head == dev->vq->vring.num)
+		return false;
 
-	/* Make sure there's a descriptor waiting. */
-	head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
 	if (out_num)
 		errx(1, "Output buffers in console in queue?");
 
-	/* Read it in. */
-	len = readv(STDIN_FILENO, iov, in_num);
+	/* This is why we convert to iovecs: the readv() call uses them, and so
+	 * it reads straight into the Guest's buffer. */
+	len = readv(dev->fd, iov, in_num);
 	if (len <= 0) {
-		/* Ran out of input? */
+		/* This implies that the console is closed, is /dev/null, or
+		 * something went terribly wrong. */
 		warnx("Failed to get console input, ignoring console.");
-		/* For simplicity, dying threads kill the whole Launcher.  So
-		 * just nap here. */
-		for (;;)
-			pause();
+		/* Put the input terminal back. */
+		restore_term();
+		/* Remove callback from input vq, so it doesn't restart us. */
+		dev->vq->handle_output = NULL;
+		/* Stop listening to this fd: don't call us again. */
+		return false;
 	}
 
-	add_used_and_trigger(vq, head, len);
+	/* Tell the Guest about the new input. */
+	add_used_and_trigger(fd, dev->vq, head, len);
 
 	/* Three ^C within one second?  Exit.
 	 *
-	 * This is such a hack, but works surprisingly well.  Each ^C has to
-	 * be in a buffer by itself, so they can't be too fast.  But we check
-	 * that we get three within about a second, so they can't be too
-	 * slow. */
-	if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) {
+	 * This is such a hack, but works surprisingly well.  Each ^C has to be
+	 * in a buffer by itself, so they can't be too fast.  But we check that
+	 * we get three within about a second, so they can't be too slow. */
+	if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) {
+		if (!abort->count++)
+			gettimeofday(&abort->start, NULL);
+		else if (abort->count == 3) {
+			struct timeval now;
+			gettimeofday(&now, NULL);
+			if (now.tv_sec <= abort->start.tv_sec+1) {
+				unsigned long args[] = { LHREQ_BREAK, 0 };
+				/* Close the fd so Waker will know it has to
+				 * exit. */
+				close(waker_fds.pipe[1]);
+				/* Just in case Waker is blocked in BREAK, send
+				 * unbreak now. */
+				write(fd, args, sizeof(args));
+				exit(2);
+			}
+			abort->count = 0;
+		}
+	} else
+		/* Any other key resets the abort counter. */
 		abort->count = 0;
-		return;
-	}
 
-	abort->count++;
-	if (abort->count == 1)
-		gettimeofday(&abort->start, NULL);
-	else if (abort->count == 3) {
-		struct timeval now;
-		gettimeofday(&now, NULL);
-		/* Kill all Launcher processes with SIGINT, like normal ^C */
-		if (now.tv_sec <= abort->start.tv_sec+1)
-			kill(0, SIGINT);
-		abort->count = 0;
-	}
+	/* Everything went OK! */
+	return true;
 }
 
-/* This is the routine which handles console output (ie. stdout). */
-static void console_output(struct virtqueue *vq)
+/* Handling output for console is simple: we just get all the output buffers
+ * and write them to stdout. */
+static void handle_console_output(int fd, struct virtqueue *vq, bool timeout)
 {
 	unsigned int head, out, in;
+	int len;
 	struct iovec iov[vq->vring.num];
 
-	head = wait_for_vq_desc(vq, iov, &out, &in);
-	if (in)
-		errx(1, "Input buffers in console output queue?");
-	while (!iov_empty(iov, out)) {
-		int len = writev(STDOUT_FILENO, iov, out);
-		if (len <= 0)
-			err(1, "Write to stdout gave %i", len);
-		iov_consume(iov, out, len);
+	/* Keep getting output buffers from the Guest until we run out. */
+	while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
+		if (in)
+			errx(1, "Input buffers in output queue?");
+		len = writev(STDOUT_FILENO, iov, out);
+		add_used_and_trigger(fd, vq, head, len);
 	}
-	add_used(vq, head, 0);
+}
+
+/* This is called when we no longer want to hear about Guest changes to a
+ * virtqueue.  This is more efficient in high-traffic cases, but it means we
+ * have to set a timer to check if any more changes have occurred. */
+static void block_vq(struct virtqueue *vq)
+{
+	struct itimerval itm;
+
+	vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+	vq->blocked = true;
+
+	itm.it_interval.tv_sec = 0;
+	itm.it_interval.tv_usec = 0;
+	itm.it_value.tv_sec = 0;
+	itm.it_value.tv_usec = timeout_usec;
+
+	setitimer(ITIMER_REAL, &itm, NULL);
 }
 
 /*
  * The Network
  *
  * Handling output for network is also simple: we get all the output buffers
- * and write them to /dev/net/tun.
+ * and write them (ignoring the first element) to this device's file descriptor
+ * (/dev/net/tun).
  */
-struct net_info {
-	int tunfd;
-};
-
-static void net_output(struct virtqueue *vq)
+static void handle_net_output(int fd, struct virtqueue *vq, bool timeout)
 {
-	struct net_info *net_info = vq->dev->priv;
-	unsigned int head, out, in;
+	unsigned int head, out, in, num = 0;
+	int len;
 	struct iovec iov[vq->vring.num];
+	static int last_timeout_num;
+
+	/* Keep getting output buffers from the Guest until we run out. */
+	while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
+		if (in)
+			errx(1, "Input buffers in output queue?");
+		len = writev(vq->dev->fd, iov, out);
+		if (len < 0)
+			err(1, "Writing network packet to tun");
+		add_used_and_trigger(fd, vq, head, len);
+		num++;
+	}
 
-	head = wait_for_vq_desc(vq, iov, &out, &in);
-	if (in)
-		errx(1, "Input buffers in net output queue?");
-	if (writev(net_info->tunfd, iov, out) < 0)
-		errx(1, "Write to tun failed?");
-	add_used(vq, head, 0);
-}
-
-/* Will reading from this file descriptor block? */
-static bool will_block(int fd)
-{
-	fd_set fdset;
-	struct timeval zero = { 0, 0 };
-	FD_ZERO(&fdset);
-	FD_SET(fd, &fdset);
-	return select(fd+1, &fdset, NULL, NULL, &zero) != 1;
+	/* Block further kicks and set up a timer if we saw anything. */
+	if (!timeout && num)
+		block_vq(vq);
+
+	/* We never quite know how long should we wait before we check the
+	 * queue again for more packets.  We start at 500 microseconds, and if
+	 * we get fewer packets than last time, we assume we made the timeout
+	 * too small and increase it by 10 microseconds.  Otherwise, we drop it
+	 * by one microsecond every time.  It seems to work well enough. */
+	if (timeout) {
+		if (num < last_timeout_num)
+			timeout_usec += 10;
+		else if (timeout_usec > 1)
+			timeout_usec--;
+		last_timeout_num = num;
+	}
 }
 
-/* This is where we handle packets coming in from the tun device to our
+/* This is where we handle a packet coming in from the tun device to our
  * Guest. */
-static void net_input(struct virtqueue *vq)
+static bool handle_tun_input(int fd, struct device *dev)
 {
+	unsigned int head, in_num, out_num;
 	int len;
-	unsigned int head, out, in;
-	struct iovec iov[vq->vring.num];
-	struct net_info *net_info = vq->dev->priv;
-
-	head = wait_for_vq_desc(vq, iov, &out, &in);
-	if (out)
-		errx(1, "Output buffers in net input queue?");
-
-	/* Deliver interrupt now, since we're about to sleep. */
-	if (vq->pending_used && will_block(net_info->tunfd))
-		trigger_irq(vq);
-
-	len = readv(net_info->tunfd, iov, in);
+	struct iovec iov[dev->vq->vring.num];
+
+	/* First we need a network buffer from the Guests's recv virtqueue. */
+	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
+	if (head == dev->vq->vring.num) {
+		/* Now, it's expected that if we try to send a packet too
+		 * early, the Guest won't be ready yet.  Wait until the device
+		 * status says it's ready. */
+		/* FIXME: Actually want DRIVER_ACTIVE here. */
+
+		/* Now tell it we want to know if new things appear. */
+		dev->vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+		wmb();
+
+		/* We'll turn this back on if input buffers are registered. */
+		return false;
+	} else if (out_num)
+		errx(1, "Output buffers in network recv queue?");
+
+	/* Read the packet from the device directly into the Guest's buffer. */
+	len = readv(dev->fd, iov, in_num);
 	if (len <= 0)
-		err(1, "Failed to read from tun.");
-	add_used(vq, head, len);
-}
+		err(1, "reading network");
 
-/* This is the helper to create threads. */
-static int do_thread(void *_vq)
-{
-	struct virtqueue *vq = _vq;
+	/* Tell the Guest about the new packet. */
+	add_used_and_trigger(fd, dev->vq, head, len);
 
-	for (;;)
-		vq->service(vq);
-	return 0;
-}
+	verbose("tun input packet len %i [%02x %02x] (%s)\n", len,
+		((u8 *)iov[1].iov_base)[0], ((u8 *)iov[1].iov_base)[1],
+		head != dev->vq->vring.num ? "sent" : "discarded");
 
-/* When a child dies, we kill our entire process group with SIGTERM.  This
- * also has the side effect that the shell restores the console for us! */
-static void kill_launcher(int signal)
-{
-	kill(0, SIGTERM);
+	/* All good. */
+	return true;
 }
 
-static void reset_device(struct device *dev)
+/*L:215 This is the callback attached to the network and console input
+ * virtqueues: it ensures we try again, in case we stopped console or net
+ * delivery because Guest didn't have any buffers. */
+static void enable_fd(int fd, struct virtqueue *vq, bool timeout)
 {
-	struct virtqueue *vq;
-
-	verbose("Resetting device %s\n", dev->name);
-
-	/* Clear any features they've acked. */
-	memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len);
-
-	/* We're going to be explicitly killing threads, so ignore them. */
-	signal(SIGCHLD, SIG_IGN);
-
-	/* Zero out the virtqueues, get rid of their threads */
-	for (vq = dev->vq; vq; vq = vq->next) {
-		if (vq->thread != (pid_t)-1) {
-			kill(vq->thread, SIGTERM);
-			waitpid(vq->thread, NULL, 0);
-			vq->thread = (pid_t)-1;
-		}
-		memset(vq->vring.desc, 0,
-		       vring_size(vq->config.num, LGUEST_VRING_ALIGN));
-		lg_last_avail(vq) = 0;
-	}
-	dev->running = false;
-
-	/* Now we care if threads die. */
-	signal(SIGCHLD, (void *)kill_launcher);
+	add_device_fd(vq->dev->fd);
+	/* Snap the Waker out of its select loop. */
+	write(waker_fds.pipe[1], "", 1);
 }
 
-static void create_thread(struct virtqueue *vq)
+static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout)
 {
-	/* Create stack for thread and run it.  Since stack grows
-	 * upwards, we point the stack pointer to the end of this
-	 * region. */
-	char *stack = malloc(32768);
-	unsigned long args[] = { LHREQ_EVENTFD,
-				 vq->config.pfn*getpagesize(), 0 };
-
-	/* Create a zero-initialized eventfd. */
-	vq->eventfd = eventfd(0, 0);
-	if (vq->eventfd < 0)
-		err(1, "Creating eventfd");
-	args[2] = vq->eventfd;
-
-	/* Attach an eventfd to this virtqueue: it will go off
-	 * when the Guest does an LHCALL_NOTIFY for this vq. */
-	if (write(lguest_fd, &args, sizeof(args)) != 0)
-		err(1, "Attaching eventfd");
-
-	/* CLONE_VM: because it has to access the Guest memory, and
-	 * SIGCHLD so we get a signal if it dies. */
-	vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq);
-	if (vq->thread == (pid_t)-1)
-		err(1, "Creating clone");
-	/* We close our local copy, now the child has it. */
-	close(vq->eventfd);
+	/* We don't need to know again when Guest refills receive buffer. */
+	vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+	enable_fd(fd, vq, timeout);
 }
 
-static void start_device(struct device *dev)
+/* When the Guest tells us they updated the status field, we handle it. */
+static void update_device_status(struct device *dev)
 {
-	unsigned int i;
 	struct virtqueue *vq;
 
-	verbose("Device %s OK: offered", dev->name);
-	for (i = 0; i < dev->feature_len; i++)
-		verbose(" %02x", get_feature_bits(dev)[i]);
-	verbose(", accepted");
-	for (i = 0; i < dev->feature_len; i++)
-		verbose(" %02x", get_feature_bits(dev)
-			[dev->feature_len+i]);
-
-	for (vq = dev->vq; vq; vq = vq->next) {
-		if (vq->service)
-			create_thread(vq);
-	}
-	dev->running = true;
-}
+	/* This is a reset. */
+	if (dev->desc->status == 0) {
+		verbose("Resetting device %s\n", dev->name);
 
-static void cleanup_devices(void)
-{
-	struct device *dev;
-
-	for (dev = devices.dev; dev; dev = dev->next)
-		reset_device(dev);
-
-	/* If we saved off the original terminal settings, restore them now. */
-	if (orig_term.c_lflag & (ISIG|ICANON|ECHO))
-		tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
-}
+		/* Clear any features they've acked. */
+		memset(get_feature_bits(dev) + dev->desc->feature_len, 0,
+		       dev->desc->feature_len);
 
-/* When the Guest tells us they updated the status field, we handle it. */
-static void update_device_status(struct device *dev)
-{
-	/* A zero status is a reset, otherwise it's a set of flags. */
-	if (dev->desc->status == 0)
-		reset_device(dev);
-	else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
+		/* Zero out the virtqueues. */
+		for (vq = dev->vq; vq; vq = vq->next) {
+			memset(vq->vring.desc, 0,
+			       vring_size(vq->config.num, LGUEST_VRING_ALIGN));
+			lg_last_avail(vq) = 0;
+		}
+	} else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
 		warnx("Device %s configuration FAILED", dev->name);
-		if (dev->running)
-			reset_device(dev);
 	} else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
-		if (!dev->running)
-			start_device(dev);
+		unsigned int i;
+
+		verbose("Device %s OK: offered", dev->name);
+		for (i = 0; i < dev->desc->feature_len; i++)
+			verbose(" %02x", get_feature_bits(dev)[i]);
+		verbose(", accepted");
+		for (i = 0; i < dev->desc->feature_len; i++)
+			verbose(" %02x", get_feature_bits(dev)
+				[dev->desc->feature_len+i]);
+
+		if (dev->ready)
+			dev->ready(dev);
 	}
 }
 
 /* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */
-static void handle_output(unsigned long addr)
+static void handle_output(int fd, unsigned long addr)
 {
 	struct device *i;
+	struct virtqueue *vq;
 
-	/* Check each device. */
+	/* Check each device and virtqueue. */
 	for (i = devices.dev; i; i = i->next) {
-		struct virtqueue *vq;
-
 		/* Notifications to device descriptors update device status. */
 		if (from_guest_phys(addr) == i->desc) {
 			update_device_status(i);
 			return;
 		}
 
-		/* Devices *can* be used before status is set to DRIVER_OK. */
+		/* Notifications to virtqueues mean output has occurred. */
 		for (vq = i->vq; vq; vq = vq->next) {
-			if (addr != vq->config.pfn*getpagesize())
+			if (vq->config.pfn != addr/getpagesize())
 				continue;
-			if (i->running)
-				errx(1, "Notification on running %s", i->name);
-			start_device(i);
+
+			/* Guest should acknowledge (and set features!)  before
+			 * using the device. */
+			if (i->desc->status == 0) {
+				warnx("%s gave early output", i->name);
+				return;
+			}
+
+			if (strcmp(vq->dev->name, "console") != 0)
+				verbose("Output to %s\n", vq->dev->name);
+			if (vq->handle_output)
+				vq->handle_output(fd, vq, false);
 			return;
 		}
 	}
@@ -990,6 +1049,71 @@ static void handle_output(unsigned long addr)
 	      strnlen(from_guest_phys(addr), guest_limit - addr));
 }
 
+static void handle_timeout(int fd)
+{
+	char buf[32];
+	struct device *i;
+	struct virtqueue *vq;
+
+	/* Clear the pipe */
+	read(timeoutpipe[0], buf, sizeof(buf));
+
+	/* Check each device and virtqueue: flush blocked ones. */
+	for (i = devices.dev; i; i = i->next) {
+		for (vq = i->vq; vq; vq = vq->next) {
+			if (!vq->blocked)
+				continue;
+
+			vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+			vq->blocked = false;
+			if (vq->handle_output)
+				vq->handle_output(fd, vq, true);
+		}
+	}
+}
+
+/* This is called when the Waker wakes us up: check for incoming file
+ * descriptors. */
+static void handle_input(int fd)
+{
+	/* select() wants a zeroed timeval to mean "don't wait". */
+	struct timeval poll = { .tv_sec = 0, .tv_usec = 0 };
+
+	for (;;) {
+		struct device *i;
+		fd_set fds = devices.infds;
+		int num;
+
+		num = select(devices.max_infd+1, &fds, NULL, NULL, &poll);
+		/* Could get interrupted */
+		if (num < 0)
+			continue;
+		/* If nothing is ready, we're done. */
+		if (num == 0)
+			break;
+
+		/* Otherwise, call the device(s) which have readable file
+		 * descriptors and a method of handling them.  */
+		for (i = devices.dev; i; i = i->next) {
+			if (i->handle_input && FD_ISSET(i->fd, &fds)) {
+				if (i->handle_input(fd, i))
+					continue;
+
+				/* If handle_input() returns false, it means we
+				 * should no longer service it.  Networking and
+				 * console do this when there's no input
+				 * buffers to deliver into.  Console also uses
+				 * it when it discovers that stdin is closed. */
+				FD_CLR(i->fd, &devices.infds);
+			}
+		}
+
+		/* Is this the timeout fd? */
+		if (FD_ISSET(timeoutpipe[0], &fds))
+			handle_timeout(fd);
+	}
+}
+
 /*L:190
  * Device Setup
  *
@@ -1005,8 +1129,8 @@ static void handle_output(unsigned long addr)
 static u8 *device_config(const struct device *dev)
 {
 	return (void *)(dev->desc + 1)
-		+ dev->num_vq * sizeof(struct lguest_vqconfig)
-		+ dev->feature_len * 2;
+		+ dev->desc->num_vq * sizeof(struct lguest_vqconfig)
+		+ dev->desc->feature_len * 2;
 }
 
 /* This routine allocates a new "struct lguest_device_desc" from descriptor
@@ -1035,7 +1159,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type)
 /* Each device descriptor is followed by the description of its virtqueues.  We
  * specify how many descriptors the virtqueue is to have. */
 static void add_virtqueue(struct device *dev, unsigned int num_descs,
-			  void (*service)(struct virtqueue *))
+			  void (*handle_output)(int, struct virtqueue *, bool))
 {
 	unsigned int pages;
 	struct virtqueue **i, *vq = malloc(sizeof(*vq));
@@ -1050,8 +1174,8 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
 	vq->next = NULL;
 	vq->last_avail_idx = 0;
 	vq->dev = dev;
-	vq->service = service;
-	vq->thread = (pid_t)-1;
+	vq->inflight = 0;
+	vq->blocked = false;
 
 	/* Initialize the configuration. */
 	vq->config.num = num_descs;
@@ -1067,7 +1191,6 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
 	 * yet, otherwise we'd be overwriting them. */
 	assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0);
 	memcpy(device_config(dev), &vq->config, sizeof(vq->config));
-	dev->num_vq++;
 	dev->desc->num_vq++;
 
 	verbose("Virtqueue page %#lx\n", to_guest_phys(p));
@@ -1076,6 +1199,15 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
 	 * second.  */
 	for (i = &dev->vq; *i; i = &(*i)->next);
 	*i = vq;
+
+	/* Set the routine to call when the Guest does something to this
+	 * virtqueue. */
+	vq->handle_output = handle_output;
+
+	/* As an optimization, set the advisory "Don't Notify Me" flag if we
+	 * don't have a handler */
+	if (!handle_output)
+		vq->vring.used->flags = VRING_USED_F_NO_NOTIFY;
 }
 
 /* The first half of the feature bitmask is for us to advertise features.  The
@@ -1087,7 +1219,7 @@ static void add_feature(struct device *dev, unsigned bit)
 	/* We can't extend the feature bits once we've added config bytes */
 	if (dev->desc->feature_len <= bit / CHAR_BIT) {
 		assert(dev->desc->config_len == 0);
-		dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1;
+		dev->desc->feature_len = (bit / CHAR_BIT) + 1;
 	}
 
 	features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT));
@@ -1111,17 +1243,22 @@ static void set_config(struct device *dev, unsigned len, const void *conf)
  * calling new_dev_desc() to allocate the descriptor and device memory.
  *
  * See what I mean about userspace being boring? */
-static struct device *new_device(const char *name, u16 type)
+static struct device *new_device(const char *name, u16 type, int fd,
+				 bool (*handle_input)(int, struct device *))
 {
 	struct device *dev = malloc(sizeof(*dev));
 
 	/* Now we populate the fields one at a time. */
+	dev->fd = fd;
+	/* If we have an input handler for this file descriptor, then we add it
+	 * to the device_list's fdset and maxfd. */
+	if (handle_input)
+		add_device_fd(dev->fd);
 	dev->desc = new_dev_desc(type);
+	dev->handle_input = handle_input;
 	dev->name = name;
 	dev->vq = NULL;
-	dev->feature_len = 0;
-	dev->num_vq = 0;
-	dev->running = false;
+	dev->ready = NULL;
 
 	/* Append to device list.  Prepending to a single-linked list is
 	 * easier, but the user expects the devices to be arranged on the bus
@@ -1149,10 +1286,13 @@ static void setup_console(void)
 		 * raw input stream to the Guest. */
 		term.c_lflag &= ~(ISIG|ICANON|ECHO);
 		tcsetattr(STDIN_FILENO, TCSANOW, &term);
+		/* If we exit gracefully, the original settings will be
+		 * restored so the user can see what they're typing. */
+		atexit(restore_term);
 	}
 
-	dev = new_device("console", VIRTIO_ID_CONSOLE);
-
+	dev = new_device("console", VIRTIO_ID_CONSOLE,
+			 STDIN_FILENO, handle_console_input);
 	/* We store the console state in dev->priv, and initialize it. */
 	dev->priv = malloc(sizeof(struct console_abort));
 	((struct console_abort *)dev->priv)->count = 0;
@@ -1161,13 +1301,31 @@ static void setup_console(void)
 	 * they put something the input queue, we make sure we're listening to
 	 * stdin.  When they put something in the output queue, we write it to
 	 * stdout. */
-	add_virtqueue(dev, VIRTQUEUE_NUM, console_input);
-	add_virtqueue(dev, VIRTQUEUE_NUM, console_output);
+	add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd);
+	add_virtqueue(dev, VIRTQUEUE_NUM, handle_console_output);
 
-	verbose("device %u: console\n", ++devices.device_num);
+	verbose("device %u: console\n", devices.device_num++);
 }
 /*:*/
 
+static void timeout_alarm(int sig)
+{
+	write(timeoutpipe[1], "", 1);
+}
+
+static void setup_timeout(void)
+{
+	if (pipe(timeoutpipe) != 0)
+		err(1, "Creating timeout pipe");
+
+	if (fcntl(timeoutpipe[1], F_SETFL,
+		  fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0)
+		err(1, "Making timeout pipe nonblocking");
+
+	add_device_fd(timeoutpipe[0]);
+	signal(SIGALRM, timeout_alarm);
+}
+
 /*M:010 Inter-guest networking is an interesting area.  Simplest is to have a
  * --sharenet=<name> option which opens or creates a named pipe.  This can be
  * used to send packets to another guest in a 1:1 manner.
@@ -1289,23 +1447,21 @@ static int get_tun_device(char tapif[IFNAMSIZ])
 static void setup_tun_net(char *arg)
 {
 	struct device *dev;
-	struct net_info *net_info = malloc(sizeof(*net_info));
-	int ipfd;
+	int netfd, ipfd;
 	u32 ip = INADDR_ANY;
 	bool bridging = false;
 	char tapif[IFNAMSIZ], *p;
 	struct virtio_net_config conf;
 
-	net_info->tunfd = get_tun_device(tapif);
+	netfd = get_tun_device(tapif);
 
 	/* First we create a new network device. */
-	dev = new_device("net", VIRTIO_ID_NET);
-	dev->priv = net_info;
+	dev = new_device("net", VIRTIO_ID_NET, netfd, handle_tun_input);
 
 	/* Network devices need a receive and a send queue, just like
 	 * console. */
-	add_virtqueue(dev, VIRTQUEUE_NUM, net_input);
-	add_virtqueue(dev, VIRTQUEUE_NUM, net_output);
+	add_virtqueue(dev, VIRTQUEUE_NUM, net_enable_fd);
+	add_virtqueue(dev, VIRTQUEUE_NUM, handle_net_output);
 
 	/* We need a socket to perform the magic network ioctls to bring up the
 	 * tap interface, connect to the bridge etc.  Any socket will do! */
@@ -1346,8 +1502,6 @@ static void setup_tun_net(char *arg)
 	add_feature(dev, VIRTIO_NET_F_HOST_TSO4);
 	add_feature(dev, VIRTIO_NET_F_HOST_TSO6);
 	add_feature(dev, VIRTIO_NET_F_HOST_ECN);
-	/* We handle indirect ring entries */
-	add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC);
 	set_config(dev, sizeof(conf), &conf);
 
 	/* We don't need the socket any more; setup is done. */
@@ -1396,18 +1550,20 @@ struct vblk_info
  * Remember that the block device is handled by a separate I/O thread.  We head
  * straight into the core of that thread here:
  */
-static void blk_request(struct virtqueue *vq)
+static bool service_io(struct device *dev)
 {
-	struct vblk_info *vblk = vq->dev->priv;
+	struct vblk_info *vblk = dev->priv;
 	unsigned int head, out_num, in_num, wlen;
 	int ret;
 	u8 *in;
 	struct virtio_blk_outhdr *out;
-	struct iovec iov[vq->vring.num];
+	struct iovec iov[dev->vq->vring.num];
 	off64_t off;
 
-	/* Get the next request. */
-	head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
+	/* See if there's a request waiting.  If not, nothing to do. */
+	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
+	if (head == dev->vq->vring.num)
+		return false;
 
 	/* Every block request should contain at least one output buffer
 	 * (detailing the location on disk and the type of request) and one
@@ -1481,21 +1637,83 @@ static void blk_request(struct virtqueue *vq)
 	if (out->type & VIRTIO_BLK_T_BARRIER)
 		fdatasync(vblk->fd);
 
-	add_used(vq, head, wlen);
+	/* We can't trigger an IRQ, because we're not the Launcher.  It does
+	 * that when we tell it we're done. */
+	add_used(dev->vq, head, wlen);
+	return true;
+}
+
+/* This is the thread which actually services the I/O. */
+static int io_thread(void *_dev)
+{
+	struct device *dev = _dev;
+	struct vblk_info *vblk = dev->priv;
+	char c;
+
+	/* Close other side of workpipe so we get 0 read when main dies. */
+	close(vblk->workpipe[1]);
+	/* Close the other side of the done_fd pipe. */
+	close(dev->fd);
+
+	/* When this read fails, it means Launcher died, so we follow. */
+	while (read(vblk->workpipe[0], &c, 1) == 1) {
+		/* We acknowledge each request immediately to reduce latency,
+		 * rather than waiting until we've done them all.  I haven't
+		 * measured to see if it makes any difference.
+		 *
+		 * That would be an interesting test, wouldn't it?  You could
+		 * also try having more than one I/O thread. */
+		while (service_io(dev))
+			write(vblk->done_fd, &c, 1);
+	}
+	return 0;
+}
+
+/* Now we've seen the I/O thread, we return to the Launcher to see what happens
+ * when that thread tells us it's completed some I/O. */
+static bool handle_io_finish(int fd, struct device *dev)
+{
+	char c;
+
+	/* If the I/O thread died, presumably it printed the error, so we
+	 * simply exit. */
+	if (read(dev->fd, &c, 1) != 1)
+		exit(1);
+
+	/* It did some work, so trigger the irq. */
+	trigger_irq(fd, dev->vq);
+	return true;
+}
+
+/* When the Guest submits some I/O, we just need to wake the I/O thread. */
+static void handle_virtblk_output(int fd, struct virtqueue *vq, bool timeout)
+{
+	struct vblk_info *vblk = vq->dev->priv;
+	char c = 0;
+
+	/* Wake up I/O thread and tell it to go to work! */
+	if (write(vblk->workpipe[1], &c, 1) != 1)
+		/* Presumably it indicated why it died. */
+		exit(1);
 }
 
 /*L:198 This actually sets up a virtual block device. */
 static void setup_block_file(const char *filename)
 {
+	int p[2];
 	struct device *dev;
 	struct vblk_info *vblk;
+	void *stack;
 	struct virtio_blk_config conf;
 
+	/* This is the pipe the I/O thread will use to tell us I/O is done. */
+	pipe(p);
+
 	/* The device responds to return from I/O thread. */
-	dev = new_device("block", VIRTIO_ID_BLOCK);
+	dev = new_device("block", VIRTIO_ID_BLOCK, p[0], handle_io_finish);
 
 	/* The device has one virtqueue, where the Guest places requests. */
-	add_virtqueue(dev, VIRTQUEUE_NUM, blk_request);
+	add_virtqueue(dev, VIRTQUEUE_NUM, handle_virtblk_output);
 
 	/* Allocate the room for our own bookkeeping */
 	vblk = dev->priv = malloc(sizeof(*vblk));
@@ -1517,29 +1735,49 @@ static void setup_block_file(const char *filename)
 
 	set_config(dev, sizeof(conf), &conf);
 
+	/* The I/O thread writes to this end of the pipe when done. */
+	vblk->done_fd = p[1];
+
+	/* This is the second pipe, which is how we tell the I/O thread about
+	 * more work. */
+	pipe(vblk->workpipe);
+
+	/* Create stack for thread and run it.  Since stack grows upwards, we
+	 * point the stack pointer to the end of this region. */
+	stack = malloc(32768);
+	/* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from
+	 * becoming a zombie. */
+	if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1)
+		err(1, "Creating clone");
+
+	/* We don't need to keep the I/O thread's end of the pipes open. */
+	close(vblk->done_fd);
+	close(vblk->workpipe[0]);
+
 	verbose("device %u: virtblock %llu sectors\n",
-		++devices.device_num, le64_to_cpu(conf.capacity));
+		devices.device_num, le64_to_cpu(conf.capacity));
 }
 
-struct rng_info {
-	int rfd;
-};
-
 /* Our random number generator device reads from /dev/random into the Guest's
  * input buffers.  The usual case is that the Guest doesn't want random numbers
  * and so has no buffers although /dev/random is still readable, whereas
  * console is the reverse.
  *
  * The same logic applies, however. */
-static void rng_input(struct virtqueue *vq)
+static bool handle_rng_input(int fd, struct device *dev)
 {
 	int len;
 	unsigned int head, in_num, out_num, totlen = 0;
-	struct rng_info *rng_info = vq->dev->priv;
-	struct iovec iov[vq->vring.num];
+	struct iovec iov[dev->vq->vring.num];
 
 	/* First we need a buffer from the Guests's virtqueue. */
-	head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
+	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
+
+	/* If they're not ready for input, stop listening to this file
+	 * descriptor.  We'll start again once they add an input buffer. */
+	if (head == dev->vq->vring.num)
+		return false;
+
 	if (out_num)
 		errx(1, "Output buffers in rng?");
 
@@ -1547,7 +1785,7 @@ static void rng_input(struct virtqueue *vq)
 	 * it reads straight into the Guest's buffer.  We loop to make sure we
 	 * fill it. */
 	while (!iov_empty(iov, in_num)) {
-		len = readv(rng_info->rfd, iov, in_num);
+		len = readv(dev->fd, iov, in_num);
 		if (len <= 0)
 			err(1, "Read from /dev/random gave %i", len);
 		iov_consume(iov, in_num, len);
@@ -1555,23 +1793,25 @@ static void rng_input(struct virtqueue *vq)
 	}
 
 	/* Tell the Guest about the new input. */
-	add_used(vq, head, totlen);
+	add_used_and_trigger(fd, dev->vq, head, totlen);
+
+	/* Everything went OK! */
+	return true;
 }
 
 /* And this creates a "hardware" random number device for the Guest. */
 static void setup_rng(void)
 {
 	struct device *dev;
-	struct rng_info *rng_info = malloc(sizeof(*rng_info));
+	int fd;
 
-	rng_info->rfd = open_or_die("/dev/random", O_RDONLY);
+	fd = open_or_die("/dev/random", O_RDONLY);
 
 	/* The device responds to return from I/O thread. */
-	dev = new_device("rng", VIRTIO_ID_RNG);
-	dev->priv = rng_info;
+	dev = new_device("rng", VIRTIO_ID_RNG, fd, handle_rng_input);
 
 	/* The device has one virtqueue, where the Guest places inbufs. */
-	add_virtqueue(dev, VIRTQUEUE_NUM, rng_input);
+	add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd);
 
 	verbose("device %u: rng\n", devices.device_num++);
 }
@@ -1587,18 +1827,17 @@ static void __attribute__((noreturn)) restart_guest(void)
 	for (i = 3; i < FD_SETSIZE; i++)
 		close(i);
 
-	/* Reset all the devices (kills all threads). */
-	cleanup_devices();
-
+	/* The exec automatically gets rid of the I/O and Waker threads. */
 	execv(main_args[0], main_args);
 	err(1, "Could not exec %s", main_args[0]);
 }
 
 /*L:220 Finally we reach the core of the Launcher which runs the Guest, serves
  * its input and output, and finally, lays it to rest. */
-static void __attribute__((noreturn)) run_guest(void)
+static void __attribute__((noreturn)) run_guest(int lguest_fd)
 {
 	for (;;) {
+		unsigned long args[] = { LHREQ_BREAK, 0 };
 		unsigned long notify_addr;
 		int readval;
 
@@ -1609,7 +1848,8 @@ static void __attribute__((noreturn)) run_guest(void)
 		/* One unsigned long means the Guest did HCALL_NOTIFY */
 		if (readval == sizeof(notify_addr)) {
 			verbose("Notify on address %#lx\n", notify_addr);
-			handle_output(notify_addr);
+			handle_output(lguest_fd, notify_addr);
+			continue;
 		/* ENOENT means the Guest died.  Reading tells us why. */
 		} else if (errno == ENOENT) {
 			char reason[1024] = { 0 };
@@ -1618,9 +1858,19 @@ static void __attribute__((noreturn)) run_guest(void)
 		/* ERESTART means that we need to reboot the guest */
 		} else if (errno == ERESTART) {
 			restart_guest();
-		/* Anything else means a bug or incompatible change. */
-		} else
+		/* EAGAIN means a signal (timeout).
+		 * Anything else means a bug or incompatible change. */
+		} else if (errno != EAGAIN)
 			err(1, "Running guest failed");
+
+		/* Only service input on thread for CPU 0. */
+		if (cpu_id != 0)
+			continue;
+
+		/* Service input, then unset the BREAK to release the Waker. */
+		handle_input(lguest_fd);
+		if (pwrite(lguest_fd, args, sizeof(args), cpu_id) < 0)
+			err(1, "Resetting break");
 	}
 }
 /*L:240
@@ -1654,8 +1904,8 @@ int main(int argc, char *argv[])
 	/* Memory, top-level pagetable, code startpoint and size of the
 	 * (optional) initrd. */
 	unsigned long mem = 0, start, initrd_size = 0;
-	/* Two temporaries. */
-	int i, c;
+	/* Two temporaries and the /dev/lguest file descriptor. */
+	int i, c, lguest_fd;
 	/* The boot information for the Guest. */
 	struct boot_params *boot;
 	/* If they specify an initrd file to load. */
@@ -1663,10 +1913,18 @@ int main(int argc, char *argv[])
 
 	/* Save the args: we "reboot" by execing ourselves again. */
 	main_args = argv;
+	/* We don't "wait" for the children, so prevent them from becoming
+	 * zombies. */
+	signal(SIGCHLD, SIG_IGN);
 
-	/* First we initialize the device list.  We keep a pointer to the last
-	 * device, and the next interrupt number to use for devices (1:
-	 * remember that 0 is used by the timer). */
+	/* First we initialize the device list.  Since console and network
+	 * device receive input from a file descriptor, we keep an fdset
+	 * (infds) and the maximum fd number (max_infd) with the head of the
+	 * list.  We also keep a pointer to the last device.  Finally, we keep
+	 * the next interrupt number to use for devices (1: remember that 0 is
+	 * used by the timer). */
+	FD_ZERO(&devices.infds);
+	devices.max_infd = -1;
 	devices.lastdev = NULL;
 	devices.next_irq = 1;
 
@@ -1724,6 +1982,9 @@ int main(int argc, char *argv[])
 	/* We always have a console device */
 	setup_console();
 
+	/* We can timeout waiting for Guest network transmit. */
+	setup_timeout();
+
 	/* Now we load the kernel */
 	start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));
 
@@ -1762,16 +2023,15 @@ int main(int argc, char *argv[])
 
 	/* We tell the kernel to initialize the Guest: this returns the open
 	 * /dev/lguest file descriptor. */
-	tell_kernel(start);
-
-	/* Ensure that we terminate if a child dies. */
-	signal(SIGCHLD, kill_launcher);
+	lguest_fd = tell_kernel(start);
 
-	/* If we exit via err(), this kills all the threads, restores tty. */
-	atexit(cleanup_devices);
+	/* We clone off a thread, which wakes the Launcher whenever one of the
+	 * input file descriptors needs attention.  We call this the Waker, and
+	 * we'll cover it in a moment. */
+	setup_waker(lguest_fd);
 
 	/* Finally, run the Guest.  This doesn't return. */
-	run_guest();
+	run_guest(lguest_fd);
 }
 /*:*/
 
diff --git a/trunk/Documentation/lguest/lguest.txt b/trunk/Documentation/lguest/lguest.txt
index efb3a6a045a2..28c747362f95 100644
--- a/trunk/Documentation/lguest/lguest.txt
+++ b/trunk/Documentation/lguest/lguest.txt
@@ -37,6 +37,7 @@ Running Lguest:
      "Paravirtualized guest support" = Y
         "Lguest guest support" = Y
      "High Memory Support" = off/4GB
+     "PAE (Physical Address Extension) Support" = N
      "Alignment value to which kernel should be aligned" = 0x100000
         (CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and
          CONFIG_PHYSICAL_ALIGN=0x100000)
diff --git a/trunk/arch/alpha/mm/extable.c b/trunk/arch/alpha/mm/extable.c
index 813c9b63c0e1..62dc379d301a 100644
--- a/trunk/arch/alpha/mm/extable.c
+++ b/trunk/arch/alpha/mm/extable.c
@@ -48,27 +48,6 @@ void sort_extable(struct exception_table_entry *start,
 	     cmp_ex, swap_ex);
 }
 
-#ifdef CONFIG_MODULES
-/*
- * Any entry referring to the module init will be at the beginning or
- * the end.
- */
-void trim_init_extable(struct module *m)
-{
-	/*trim the beginning*/
-	while (m->num_exentries &&
-	       within_module_init(ex_to_addr(&m->extable[0]), m)) {
-		m->extable++;
-		m->num_exentries--;
-	}
-	/*trim the end*/
-	while (m->num_exentries &&
-	       within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]),
-				  m))
-		m->num_exentries--;
-}
-#endif /* CONFIG_MODULES */
-
 const struct exception_table_entry *
 search_extable(const struct exception_table_entry *first,
 	       const struct exception_table_entry *last,
diff --git a/trunk/arch/avr32/kernel/module.c b/trunk/arch/avr32/kernel/module.c
index 98f94d041d9c..1167fe9cf6c4 100644
--- a/trunk/arch/avr32/kernel/module.c
+++ b/trunk/arch/avr32/kernel/module.c
@@ -32,6 +32,8 @@ void module_free(struct module *mod, void *module_region)
 	mod->arch.syminfo = NULL;
 
 	vfree(module_region);
+	/* FIXME: if module_region == mod->init_region, trim exception
+	 * table entries. */
 }
 
 static inline int check_rela(Elf32_Rela *rela, struct module *module,
diff --git a/trunk/arch/cris/kernel/module.c b/trunk/arch/cris/kernel/module.c
index abc13e368b90..a187833febc8 100644
--- a/trunk/arch/cris/kernel/module.c
+++ b/trunk/arch/cris/kernel/module.c
@@ -48,6 +48,8 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	FREE_MODULE(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+	   table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/trunk/arch/frv/kernel/module.c b/trunk/arch/frv/kernel/module.c
index 711763c8a6f3..850d168f69fc 100644
--- a/trunk/arch/frv/kernel/module.c
+++ b/trunk/arch/frv/kernel/module.c
@@ -35,6 +35,8 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/trunk/arch/h8300/kernel/module.c b/trunk/arch/h8300/kernel/module.c
index 0865e291c20d..cfc9127d2ced 100644
--- a/trunk/arch/h8300/kernel/module.c
+++ b/trunk/arch/h8300/kernel/module.c
@@ -23,6 +23,8 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/trunk/arch/ia64/mm/extable.c b/trunk/arch/ia64/mm/extable.c
index e95d5ad9285d..71c50dd8f870 100644
--- a/trunk/arch/ia64/mm/extable.c
+++ b/trunk/arch/ia64/mm/extable.c
@@ -53,32 +53,6 @@ void sort_extable (struct exception_table_entry *start,
 	     cmp_ex, swap_ex);
 }
 
-static inline unsigned long ex_to_addr(const struct exception_table_entry *x)
-{
-	return (unsigned long)&x->insn + x->insn;
-}
-
-#ifdef CONFIG_MODULES
-/*
- * Any entry referring to the module init will be at the beginning or
- * the end.
- */
-void trim_init_extable(struct module *m)
-{
-	/*trim the beginning*/
-	while (m->num_exentries &&
-	       within_module_init(ex_to_addr(&m->extable[0]), m)) {
-		m->extable++;
-		m->num_exentries--;
-	}
-	/*trim the end*/
-	while (m->num_exentries &&
-	       within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]),
-				  m))
-		m->num_exentries--;
-}
-#endif /* CONFIG_MODULES */
-
 const struct exception_table_entry *
 search_extable (const struct exception_table_entry *first,
 		const struct exception_table_entry *last,
diff --git a/trunk/arch/m32r/kernel/module.c b/trunk/arch/m32r/kernel/module.c
index cb5f37d78d49..8d4205794380 100644
--- a/trunk/arch/m32r/kernel/module.c
+++ b/trunk/arch/m32r/kernel/module.c
@@ -44,6 +44,8 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/trunk/arch/m68k/kernel/module.c b/trunk/arch/m68k/kernel/module.c
index cd6bcb1c957e..774862bc6977 100644
--- a/trunk/arch/m68k/kernel/module.c
+++ b/trunk/arch/m68k/kernel/module.c
@@ -31,6 +31,8 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/trunk/arch/m68knommu/kernel/module.c b/trunk/arch/m68knommu/kernel/module.c
index d11ffae7956a..3b1a2ff61ddc 100644
--- a/trunk/arch/m68knommu/kernel/module.c
+++ b/trunk/arch/m68knommu/kernel/module.c
@@ -23,6 +23,8 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/trunk/arch/mips/kernel/module.c b/trunk/arch/mips/kernel/module.c
index 3e9100dcc12d..1f60e27523d9 100644
--- a/trunk/arch/mips/kernel/module.c
+++ b/trunk/arch/mips/kernel/module.c
@@ -68,6 +68,8 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
 }
 
 int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
diff --git a/trunk/arch/mn10300/kernel/module.c b/trunk/arch/mn10300/kernel/module.c
index 4fa0e3648d8e..6b287f2e8e84 100644
--- a/trunk/arch/mn10300/kernel/module.c
+++ b/trunk/arch/mn10300/kernel/module.c
@@ -48,6 +48,8 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+	 * table entries. */
 }
 
 /*
diff --git a/trunk/arch/parisc/kernel/module.c b/trunk/arch/parisc/kernel/module.c
index ef5caf2e6ed0..ecd1c5024447 100644
--- a/trunk/arch/parisc/kernel/module.c
+++ b/trunk/arch/parisc/kernel/module.c
@@ -267,6 +267,8 @@ void module_free(struct module *mod, void *module_region)
 	mod->arch.section = NULL;
 
 	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
 }
 
 /* Additional bytes needed in front of individual sections */
diff --git a/trunk/arch/powerpc/kernel/module.c b/trunk/arch/powerpc/kernel/module.c
index 477c663e0140..43e7e3a7f130 100644
--- a/trunk/arch/powerpc/kernel/module.c
+++ b/trunk/arch/powerpc/kernel/module.c
@@ -43,6 +43,8 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
 }
 
 static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
diff --git a/trunk/arch/s390/kernel/module.c b/trunk/arch/s390/kernel/module.c
index ab2e3ed28abc..eed4a00cb676 100644
--- a/trunk/arch/s390/kernel/module.c
+++ b/trunk/arch/s390/kernel/module.c
@@ -56,6 +56,8 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
 }
 
 static void
diff --git a/trunk/arch/sh/kernel/module.c b/trunk/arch/sh/kernel/module.c
index c2efdcde266f..c19b0f7d2cc1 100644
--- a/trunk/arch/sh/kernel/module.c
+++ b/trunk/arch/sh/kernel/module.c
@@ -46,6 +46,8 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/trunk/arch/sparc/include/asm/uaccess_32.h b/trunk/arch/sparc/include/asm/uaccess_32.h
index 8303ac481034..47d5619d43fa 100644
--- a/trunk/arch/sparc/include/asm/uaccess_32.h
+++ b/trunk/arch/sparc/include/asm/uaccess_32.h
@@ -17,9 +17,6 @@
 
 #ifndef __ASSEMBLY__
 
-#define ARCH_HAS_SORT_EXTABLE
-#define ARCH_HAS_SEARCH_EXTABLE
-
 /* Sparc is not segmented, however we need to be able to fool access_ok()
  * when doing system calls from kernel mode legitimately.
  *
diff --git a/trunk/arch/sparc/kernel/module.c b/trunk/arch/sparc/kernel/module.c
index 0ee642f63234..90273765e81f 100644
--- a/trunk/arch/sparc/kernel/module.c
+++ b/trunk/arch/sparc/kernel/module.c
@@ -75,6 +75,8 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
 }
 
 /* Make generic code ignore STT_REGISTER dummy undefined symbols.  */
diff --git a/trunk/arch/sparc/mm/extable.c b/trunk/arch/sparc/mm/extable.c
index a61c349448e1..16cc28935e39 100644
--- a/trunk/arch/sparc/mm/extable.c
+++ b/trunk/arch/sparc/mm/extable.c
@@ -28,10 +28,6 @@ search_extable(const struct exception_table_entry *start,
 	 *	word 3: last insn address + 4 bytes
 	 *	word 4: fixup code address
 	 *
-	 * Deleted entries are encoded as:
-	 *	word 1: unused
-	 *	word 2: -1
-	 *
 	 * See asm/uaccess.h for more details.
 	 */
 
@@ -43,10 +39,6 @@ search_extable(const struct exception_table_entry *start,
 			continue;
 		}
 
-		/* A deleted entry; see trim_init_extable */
-		if (walk->fixup == -1)
-			continue;
-
 		if (walk->insn == value)
 			return walk;
 	}
@@ -65,27 +57,6 @@ search_extable(const struct exception_table_entry *start,
         return NULL;
 }
 
-#ifdef CONFIG_MODULES
-/* We could memmove them around; easier to mark the trimmed ones. */
-void trim_init_extable(struct module *m)
-{
-	unsigned int i;
-	bool range;
-
-	for (i = 0; i < m->num_exentries; i += range ? 2 : 1) {
-		range = m->extable[i].fixup == 0;
-
-		if (within_module_init(m->extable[i].insn, m)) {
-			m->extable[i].fixup = -1;
-			if (range)
-				m->extable[i+1].fixup = -1;
-		}
-		if (range)
-			i++;
-	}
-}
-#endif /* CONFIG_MODULES */
-
 /* Special extable search, which handles ranges.  Returns fixup */
 unsigned long search_extables_range(unsigned long addr, unsigned long *g2)
 {
diff --git a/trunk/arch/um/include/asm/pgtable.h b/trunk/arch/um/include/asm/pgtable.h
index 9ce3f165111a..58da2480a7f4 100644
--- a/trunk/arch/um/include/asm/pgtable.h
+++ b/trunk/arch/um/include/asm/pgtable.h
@@ -53,21 +53,16 @@ extern unsigned long end_iomem;
 #else
 # define VMALLOC_END	(FIXADDR_START-2*PAGE_SIZE)
 #endif
-#define MODULES_VADDR	VMALLOC_START
-#define MODULES_END	VMALLOC_END
-#define MODULES_LEN	(MODULES_VADDR - MODULES_END)
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define __PAGE_KERNEL_EXEC                                              \
-	 (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+
 #define PAGE_NONE	__pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
 #define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
 #define PAGE_COPY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 #define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 #define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
-#define PAGE_KERNEL_EXEC	__pgprot(__PAGE_KERNEL_EXEC)
 
 /*
  * The i386 can't do page protection for execute, and considers that the same
diff --git a/trunk/arch/um/sys-i386/Makefile b/trunk/arch/um/sys-i386/Makefile
index 1b549bca4645..598b5c1903af 100644
--- a/trunk/arch/um/sys-i386/Makefile
+++ b/trunk/arch/um/sys-i386/Makefile
@@ -8,7 +8,7 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
 
 subarch-obj-y = lib/semaphore_32.o lib/string_32.o
 subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o
-subarch-obj-$(CONFIG_MODULES) += kernel/module.o
+subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o
 
 USER_OBJS := bugs.o ptrace_user.o fault.o
 
diff --git a/trunk/arch/um/sys-x86_64/Makefile b/trunk/arch/um/sys-x86_64/Makefile
index 2201e9c20e4a..c8b4cce9cfe1 100644
--- a/trunk/arch/um/sys-x86_64/Makefile
+++ b/trunk/arch/um/sys-x86_64/Makefile
@@ -8,8 +8,10 @@ obj-y = bug.o bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \
 	setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \
 	sysrq.o ksyms.o tls.o
 
+obj-$(CONFIG_MODULES) += um_module.o
+
 subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o
-subarch-obj-$(CONFIG_MODULES) += kernel/module.o
+subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o
 
 ldt-y = ../sys-i386/ldt.o
 
diff --git a/trunk/arch/um/sys-x86_64/um_module.c b/trunk/arch/um/sys-x86_64/um_module.c
new file mode 100644
index 000000000000..3dead392a415
--- /dev/null
+++ b/trunk/arch/um/sys-x86_64/um_module.c
@@ -0,0 +1,21 @@
+#include <linux/vmalloc.h>
+#include <linux/moduleloader.h>
+
+/* Copied from i386 arch/i386/kernel/module.c */
+void *module_alloc(unsigned long size)
+{
+	if (size == 0)
+		return NULL;
+	return vmalloc_exec(size);
+}
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+	vfree(module_region);
+	/*
+	 * FIXME: If module_region == mod->init_region, trim exception
+	 * table entries.
+	 */
+}
+
diff --git a/trunk/arch/x86/include/asm/lguest.h b/trunk/arch/x86/include/asm/lguest.h
index 313389cd50d2..1caf57628b9c 100644
--- a/trunk/arch/x86/include/asm/lguest.h
+++ b/trunk/arch/x86/include/asm/lguest.h
@@ -17,13 +17,8 @@
 /* Pages for switcher itself, then two pages per cpu */
 #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids)
 
-/* We map at -4M (-2M when PAE is activated) for ease of mapping
- * into the guest (one PTE page). */
-#ifdef CONFIG_X86_PAE
-#define SWITCHER_ADDR 0xFFE00000
-#else
+/* We map at -4M for ease of mapping into the guest (one PTE page). */
 #define SWITCHER_ADDR 0xFFC00000
-#endif
 
 /* Found in switcher.S */
 extern unsigned long default_idt_entries[];
diff --git a/trunk/arch/x86/include/asm/lguest_hcall.h b/trunk/arch/x86/include/asm/lguest_hcall.h
index d31c4a684078..faae1996487b 100644
--- a/trunk/arch/x86/include/asm/lguest_hcall.h
+++ b/trunk/arch/x86/include/asm/lguest_hcall.h
@@ -12,13 +12,11 @@
 #define LHCALL_TS		8
 #define LHCALL_SET_CLOCKEVENT	9
 #define LHCALL_HALT		10
-#define LHCALL_SET_PMD		13
 #define LHCALL_SET_PTE		14
-#define LHCALL_SET_PGD		15
+#define LHCALL_SET_PMD		15
 #define LHCALL_LOAD_TLS		16
 #define LHCALL_NOTIFY		17
 #define LHCALL_LOAD_GDT_ENTRY	18
-#define LHCALL_SEND_INTERRUPTS	19
 
 #define LGUEST_TRAP_ENTRY 0x1F
 
@@ -34,10 +32,10 @@
  * operations?  There are two ways: the direct way is to make a "hypercall",
  * to make requests of the Host Itself.
  *
- * We use the KVM hypercall mechanism. Seventeen hypercalls are
+ * We use the KVM hypercall mechanism. Eighteen hypercalls are
  * available: the hypercall number is put in the %eax register, and the
- * arguments (when required) are placed in %ebx, %ecx, %edx and %esi.
- * If a return value makes sense, it's returned in %eax.
+ * arguments (when required) are placed in %ebx, %ecx and %edx.  If a return
+ * value makes sense, it's returned in %eax.
  *
  * Grossly invalid calls result in Sudden Death at the hands of the vengeful
  * Host, rather than returning failure.  This reflects Winston Churchill's
@@ -49,9 +47,8 @@
 
 #define LHCALL_RING_SIZE 64
 struct hcall_args {
-	/* These map directly onto eax, ebx, ecx, edx and esi
-	 * in struct lguest_regs */
-	unsigned long arg0, arg1, arg2, arg3, arg4;
+	/* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */
+	unsigned long arg0, arg1, arg2, arg3;
 };
 
 #endif /* !__ASSEMBLY__ */
diff --git a/trunk/arch/x86/include/asm/pgtable_32_types.h b/trunk/arch/x86/include/asm/pgtable_32_types.h
index 5e67c1532314..2733fad45f98 100644
--- a/trunk/arch/x86/include/asm/pgtable_32_types.h
+++ b/trunk/arch/x86/include/asm/pgtable_32_types.h
@@ -46,10 +46,6 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
 # define VMALLOC_END	(FIXADDR_START - 2 * PAGE_SIZE)
 #endif
 
-#define MODULES_VADDR	VMALLOC_START
-#define MODULES_END	VMALLOC_END
-#define MODULES_LEN	(MODULES_VADDR - MODULES_END)
-
 #define MAXMEM	(VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
 
 #endif /* _ASM_X86_PGTABLE_32_DEFS_H */
diff --git a/trunk/arch/x86/kernel/Makefile b/trunk/arch/x86/kernel/Makefile
index f3477bb84566..4f78bd682125 100644
--- a/trunk/arch/x86/kernel/Makefile
+++ b/trunk/arch/x86/kernel/Makefile
@@ -73,7 +73,7 @@ obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
 obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
-obj-$(CONFIG_MODULES)		+= module.o
+obj-$(CONFIG_MODULES)		+= module_$(BITS).o
 obj-$(CONFIG_EFI) 		+= efi.o efi_$(BITS).o efi_stub_$(BITS).o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
diff --git a/trunk/arch/x86/kernel/asm-offsets_32.c b/trunk/arch/x86/kernel/asm-offsets_32.c
index dfdbf6403895..1a830cbd7015 100644
--- a/trunk/arch/x86/kernel/asm-offsets_32.c
+++ b/trunk/arch/x86/kernel/asm-offsets_32.c
@@ -126,7 +126,6 @@ void foo(void)
 #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
 	BLANK();
 	OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
-	OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending);
 	OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);
 
 	BLANK();
diff --git a/trunk/arch/x86/kernel/module_32.c b/trunk/arch/x86/kernel/module_32.c
new file mode 100644
index 000000000000..0edd819050e7
--- /dev/null
+++ b/trunk/arch/x86/kernel/module_32.c
@@ -0,0 +1,152 @@
+/*  Kernel module help for i386.
+    Copyright (C) 2001 Rusty Russell.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt...)
+#endif
+
+void *module_alloc(unsigned long size)
+{
+	if (size == 0)
+		return NULL;
+	return vmalloc_exec(size);
+}
+
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+	   table entries. */
+}
+
+/* We don't need anything special. */
+int module_frob_arch_sections(Elf_Ehdr *hdr,
+			      Elf_Shdr *sechdrs,
+			      char *secstrings,
+			      struct module *mod)
+{
+	return 0;
+}
+
+int apply_relocate(Elf32_Shdr *sechdrs,
+		   const char *strtab,
+		   unsigned int symindex,
+		   unsigned int relsec,
+		   struct module *me)
+{
+	unsigned int i;
+	Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Sym *sym;
+	uint32_t *location;
+
+	DEBUGP("Applying relocate section %u to %u\n", relsec,
+	       sechdrs[relsec].sh_info);
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+		/* This is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rel[i].r_offset;
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+			+ ELF32_R_SYM(rel[i].r_info);
+
+		switch (ELF32_R_TYPE(rel[i].r_info)) {
+		case R_386_32:
+			/* We add the value into the location given */
+			*location += sym->st_value;
+			break;
+		case R_386_PC32:
+			/* Add the value, subtract its postition */
+			*location += sym->st_value - (uint32_t)location;
+			break;
+		default:
+			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+			       me->name, ELF32_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *me)
+{
+	printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n",
+	       me->name);
+	return -ENOEXEC;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *me)
+{
+	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+		*para = NULL;
+	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+		if (!strcmp(".text", secstrings + s->sh_name))
+			text = s;
+		if (!strcmp(".altinstructions", secstrings + s->sh_name))
+			alt = s;
+		if (!strcmp(".smp_locks", secstrings + s->sh_name))
+			locks = s;
+		if (!strcmp(".parainstructions", secstrings + s->sh_name))
+			para = s;
+	}
+
+	if (alt) {
+		/* patch .altinstructions */
+		void *aseg = (void *)alt->sh_addr;
+		apply_alternatives(aseg, aseg + alt->sh_size);
+	}
+	if (locks && text) {
+		void *lseg = (void *)locks->sh_addr;
+		void *tseg = (void *)text->sh_addr;
+		alternatives_smp_module_add(me, me->name,
+					    lseg, lseg + locks->sh_size,
+					    tseg, tseg + text->sh_size);
+	}
+
+	if (para) {
+		void *pseg = (void *)para->sh_addr;
+		apply_paravirt(pseg, pseg + para->sh_size);
+	}
+
+	return module_bug_finalize(hdr, sechdrs, me);
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+	alternatives_smp_module_del(mod);
+	module_bug_cleanup(mod);
+}
diff --git a/trunk/arch/x86/kernel/module.c b/trunk/arch/x86/kernel/module_64.c
similarity index 74%
rename from trunk/arch/x86/kernel/module.c
rename to trunk/arch/x86/kernel/module_64.c
index 89f386f044e4..c23880b90b5c 100644
--- a/trunk/arch/x86/kernel/module.c
+++ b/trunk/arch/x86/kernel/module_64.c
@@ -1,5 +1,6 @@
-/*  Kernel module help for x86.
+/*  Kernel module help for x86-64
     Copyright (C) 2001 Rusty Russell.
+    Copyright (C) 2002,2003 Andi Kleen, SuSE Labs.
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -21,18 +22,23 @@
 #include <linux/fs.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
-#include <linux/bug.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/bug.h>
 
 #include <asm/system.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
-#if 0
-#define DEBUGP printk
-#else
 #define DEBUGP(fmt...)
-#endif
+
+#ifndef CONFIG_UML
+void module_free(struct module *mod, void *module_region)
+{
+	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+	   table entries. */
+}
 
 void *module_alloc(unsigned long size)
 {
@@ -48,15 +54,9 @@ void *module_alloc(unsigned long size)
 	if (!area)
 		return NULL;
 
-	return __vmalloc_area(area, GFP_KERNEL | __GFP_HIGHMEM,
-					PAGE_KERNEL_EXEC);
-}
-
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
-{
-	vfree(module_region);
+	return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC);
 }
+#endif
 
 /* We don't need anything special. */
 int module_frob_arch_sections(Elf_Ehdr *hdr,
@@ -67,58 +67,6 @@ int module_frob_arch_sections(Elf_Ehdr *hdr,
 	return 0;
 }
 
-#ifdef CONFIG_X86_32
-int apply_relocate(Elf32_Shdr *sechdrs,
-		   const char *strtab,
-		   unsigned int symindex,
-		   unsigned int relsec,
-		   struct module *me)
-{
-	unsigned int i;
-	Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr;
-	Elf32_Sym *sym;
-	uint32_t *location;
-
-	DEBUGP("Applying relocate section %u to %u\n", relsec,
-	       sechdrs[relsec].sh_info);
-	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
-		/* This is where to make the change */
-		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
-			+ rel[i].r_offset;
-		/* This is the symbol it is referring to.  Note that all
-		   undefined symbols have been resolved.  */
-		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
-			+ ELF32_R_SYM(rel[i].r_info);
-
-		switch (ELF32_R_TYPE(rel[i].r_info)) {
-		case R_386_32:
-			/* We add the value into the location given */
-			*location += sym->st_value;
-			break;
-		case R_386_PC32:
-			/* Add the value, subtract its postition */
-			*location += sym->st_value - (uint32_t)location;
-			break;
-		default:
-			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
-			       me->name, ELF32_R_TYPE(rel[i].r_info));
-			return -ENOEXEC;
-		}
-	}
-	return 0;
-}
-
-int apply_relocate_add(Elf32_Shdr *sechdrs,
-		       const char *strtab,
-		       unsigned int symindex,
-		       unsigned int relsec,
-		       struct module *me)
-{
-	printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n",
-	       me->name);
-	return -ENOEXEC;
-}
-#else /*X86_64*/
 int apply_relocate_add(Elf64_Shdr *sechdrs,
 		   const char *strtab,
 		   unsigned int symindex,
@@ -199,8 +147,6 @@ int apply_relocate(Elf_Shdr *sechdrs,
 	return -ENOSYS;
 }
 
-#endif
-
 int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
diff --git a/trunk/arch/x86/kernel/setup.c b/trunk/arch/x86/kernel/setup.c
index be5ae80f897f..d1c636bf31a7 100644
--- a/trunk/arch/x86/kernel/setup.c
+++ b/trunk/arch/x86/kernel/setup.c
@@ -301,13 +301,15 @@ static void __init reserve_brk(void)
 
 #ifdef CONFIG_BLK_DEV_INITRD
 
+#ifdef CONFIG_X86_32
+
 #define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
 static void __init relocate_initrd(void)
 {
 
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
-	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
+	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
 	u64 ramdisk_here;
 	unsigned long slop, clen, mapaddr;
 	char *p, *q;
@@ -363,13 +365,14 @@ static void __init relocate_initrd(void)
 		ramdisk_image, ramdisk_image + ramdisk_size - 1,
 		ramdisk_here, ramdisk_here + ramdisk_size - 1);
 }
+#endif
 
 static void __init reserve_initrd(void)
 {
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
 	u64 ramdisk_end   = ramdisk_image + ramdisk_size;
-	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
+	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
 
 	if (!boot_params.hdr.type_of_loader ||
 	    !ramdisk_image || !ramdisk_size)
@@ -399,8 +402,14 @@ static void __init reserve_initrd(void)
 		return;
 	}
 
+#ifdef CONFIG_X86_32
 	relocate_initrd();
-
+#else
+	printk(KERN_ERR "initrd extends beyond end of memory "
+	       "(0x%08llx > 0x%08llx)\ndisabling initrd\n",
+	       ramdisk_end, end_of_lowmem);
+	initrd_start = 0;
+#endif
 	free_early(ramdisk_image, ramdisk_end);
 }
 #else
diff --git a/trunk/arch/x86/kernel/vmlinux.lds.S b/trunk/arch/x86/kernel/vmlinux.lds.S
index 367e87882041..4c85b2e2bb65 100644
--- a/trunk/arch/x86/kernel/vmlinux.lds.S
+++ b/trunk/arch/x86/kernel/vmlinux.lds.S
@@ -108,8 +108,6 @@ SECTIONS
 	/* Data */
 	. = ALIGN(PAGE_SIZE);
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {
-		/* Start of data section */
-		_sdata = .;
 		DATA_DATA
 		CONSTRUCTORS
 
diff --git a/trunk/arch/x86/lguest/Kconfig b/trunk/arch/x86/lguest/Kconfig
index 38718041efc3..8dab8f7844d3 100644
--- a/trunk/arch/x86/lguest/Kconfig
+++ b/trunk/arch/x86/lguest/Kconfig
@@ -2,6 +2,7 @@ config LGUEST_GUEST
 	bool "Lguest guest support"
 	select PARAVIRT
 	depends on X86_32
+	depends on !X86_PAE
 	select VIRTIO
 	select VIRTIO_RING
 	select VIRTIO_CONSOLE
diff --git a/trunk/arch/x86/lguest/boot.c b/trunk/arch/x86/lguest/boot.c
index 7bc65f0f62c4..4e0c26559395 100644
--- a/trunk/arch/x86/lguest/boot.c
+++ b/trunk/arch/x86/lguest/boot.c
@@ -87,7 +87,7 @@ struct lguest_data lguest_data = {
 
 /*G:037 async_hcall() is pretty simple: I'm quite proud of it really.  We have a
  * ring buffer of stored hypercalls which the Host will run though next time we
- * do a normal hypercall.  Each entry in the ring has 5 slots for the hypercall
+ * do a normal hypercall.  Each entry in the ring has 4 slots for the hypercall
  * arguments, and a "hcall_status" word which is 0 if the call is ready to go,
  * and 255 once the Host has finished with it.
  *
@@ -96,8 +96,7 @@ struct lguest_data lguest_data = {
  * effect of causing the Host to run all the stored calls in the ring buffer
  * which empties it for next time! */
 static void async_hcall(unsigned long call, unsigned long arg1,
-			unsigned long arg2, unsigned long arg3,
-			unsigned long arg4)
+			unsigned long arg2, unsigned long arg3)
 {
 	/* Note: This code assumes we're uniprocessor. */
 	static unsigned int next_call;
@@ -109,13 +108,12 @@ static void async_hcall(unsigned long call, unsigned long arg1,
 	local_irq_save(flags);
 	if (lguest_data.hcall_status[next_call] != 0xFF) {
 		/* Table full, so do normal hcall which will flush table. */
-		kvm_hypercall4(call, arg1, arg2, arg3, arg4);
+		kvm_hypercall3(call, arg1, arg2, arg3);
 	} else {
 		lguest_data.hcalls[next_call].arg0 = call;
 		lguest_data.hcalls[next_call].arg1 = arg1;
 		lguest_data.hcalls[next_call].arg2 = arg2;
 		lguest_data.hcalls[next_call].arg3 = arg3;
-		lguest_data.hcalls[next_call].arg4 = arg4;
 		/* Arguments must all be written before we mark it to go */
 		wmb();
 		lguest_data.hcall_status[next_call] = 0;
@@ -143,7 +141,7 @@ static void lazy_hcall1(unsigned long call,
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
 		kvm_hypercall1(call, arg1);
 	else
-		async_hcall(call, arg1, 0, 0, 0);
+		async_hcall(call, arg1, 0, 0);
 }
 
 static void lazy_hcall2(unsigned long call,
@@ -153,7 +151,7 @@ static void lazy_hcall2(unsigned long call,
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
 		kvm_hypercall2(call, arg1, arg2);
 	else
-		async_hcall(call, arg1, arg2, 0, 0);
+		async_hcall(call, arg1, arg2, 0);
 }
 
 static void lazy_hcall3(unsigned long call,
@@ -164,23 +162,9 @@ static void lazy_hcall3(unsigned long call,
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
 		kvm_hypercall3(call, arg1, arg2, arg3);
 	else
-		async_hcall(call, arg1, arg2, arg3, 0);
+		async_hcall(call, arg1, arg2, arg3);
 }
 
-#ifdef CONFIG_X86_PAE
-static void lazy_hcall4(unsigned long call,
-		       unsigned long arg1,
-		       unsigned long arg2,
-		       unsigned long arg3,
-		       unsigned long arg4)
-{
-	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
-		kvm_hypercall4(call, arg1, arg2, arg3, arg4);
-	else
-		async_hcall(call, arg1, arg2, arg3, arg4);
-}
-#endif
-
 /* When lazy mode is turned off reset the per-cpu lazy mode variable and then
  * issue the do-nothing hypercall to flush any stored calls. */
 static void lguest_leave_lazy_mmu_mode(void)
@@ -195,7 +179,7 @@ static void lguest_end_context_switch(struct task_struct *next)
 	paravirt_end_context_switch(next);
 }
 
-/*G:032
+/*G:033
  * After that diversion we return to our first native-instruction
  * replacements: four functions for interrupt control.
  *
@@ -215,28 +199,30 @@ static unsigned long save_fl(void)
 {
 	return lguest_data.irq_enabled;
 }
+PV_CALLEE_SAVE_REGS_THUNK(save_fl);
+
+/* restore_flags() just sets the flags back to the value given. */
+static void restore_fl(unsigned long flags)
+{
+	lguest_data.irq_enabled = flags;
+}
+PV_CALLEE_SAVE_REGS_THUNK(restore_fl);
 
 /* Interrupts go off... */
 static void irq_disable(void)
 {
 	lguest_data.irq_enabled = 0;
 }
-
-/* Let's pause a moment.  Remember how I said these are called so often?
- * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to
- * break some rules.  In particular, these functions are assumed to save their
- * own registers if they need to: normal C functions assume they can trash the
- * eax register.  To use normal C functions, we use
- * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the
- * C function, then restores it. */
-PV_CALLEE_SAVE_REGS_THUNK(save_fl);
 PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
-/*:*/
 
-/* These are in i386_head.S */
-extern void lg_irq_enable(void);
-extern void lg_restore_fl(unsigned long flags);
+/* Interrupts go on... */
+static void irq_enable(void)
+{
+	lguest_data.irq_enabled = X86_EFLAGS_IF;
+}
+PV_CALLEE_SAVE_REGS_THUNK(irq_enable);
 
+/*:*/
 /*M:003 Note that we don't check for outstanding interrupts when we re-enable
  * them (or when we unmask an interrupt).  This seems to work for the moment,
  * since interrupts are rare and we'll just get the interrupt on the next timer
@@ -382,8 +368,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 	case 1:	/* Basic feature request. */
 		/* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
 		*cx &= 0x00002201;
-		/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PAE. */
-		*dx &= 0x07808151;
+		/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */
+		*dx &= 0x07808111;
 		/* The Host can do a nice optimization if it knows that the
 		 * kernel mappings (addresses above 0xC0000000 or whatever
 		 * PAGE_OFFSET is set to) haven't changed.  But Linux calls
@@ -402,11 +388,6 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 		if (*ax > 0x80000008)
 			*ax = 0x80000008;
 		break;
-	case 0x80000001:
-		/* Here we should fix nx cap depending on host. */
-		/* For this version of PAE, we just clear NX bit. */
-		*dx &= ~(1 << 20);
-		break;
 	}
 }
 
@@ -540,52 +521,25 @@ static void lguest_write_cr4(unsigned long val)
 static void lguest_pte_update(struct mm_struct *mm, unsigned long addr,
 			       pte_t *ptep)
 {
-#ifdef CONFIG_X86_PAE
-	lazy_hcall4(LHCALL_SET_PTE, __pa(mm->pgd), addr,
-		    ptep->pte_low, ptep->pte_high);
-#else
 	lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low);
-#endif
 }
 
 static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pteval)
 {
-	native_set_pte(ptep, pteval);
+	*ptep = pteval;
 	lguest_pte_update(mm, addr, ptep);
 }
 
-/* The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd
- * to set a middle-level entry when PAE is activated.
- * Again, we set the entry then tell the Host which page we changed,
- * and the index of the entry we changed. */
-#ifdef CONFIG_X86_PAE
-static void lguest_set_pud(pud_t *pudp, pud_t pudval)
-{
-	native_set_pud(pudp, pudval);
-
-	/* 32 bytes aligned pdpt address and the index. */
-	lazy_hcall2(LHCALL_SET_PGD, __pa(pudp) & 0xFFFFFFE0,
-		   (__pa(pudp) & 0x1F) / sizeof(pud_t));
-}
-
+/* The Guest calls this to set a top-level entry.  Again, we set the entry then
+ * tell the Host which top-level page we changed, and the index of the entry we
+ * changed. */
 static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
-	native_set_pmd(pmdp, pmdval);
+	*pmdp = pmdval;
 	lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK,
-		   (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
+		   (__pa(pmdp) & (PAGE_SIZE - 1)) / 4);
 }
-#else
-
-/* The Guest calls lguest_set_pmd to set a top-level entry when PAE is not
- * activated. */
-static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
-	native_set_pmd(pmdp, pmdval);
-	lazy_hcall2(LHCALL_SET_PGD, __pa(pmdp) & PAGE_MASK,
-		   (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
-}
-#endif
 
 /* There are a couple of legacy places where the kernel sets a PTE, but we
  * don't know the top level any more.  This is useless for us, since we don't
@@ -598,31 +552,11 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
  * which brings boot back to 0.25 seconds. */
 static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 {
-	native_set_pte(ptep, pteval);
-	if (cr3_changed)
-		lazy_hcall1(LHCALL_FLUSH_TLB, 1);
-}
-
-#ifdef CONFIG_X86_PAE
-static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte)
-{
-	native_set_pte_atomic(ptep, pte);
+	*ptep = pteval;
 	if (cr3_changed)
 		lazy_hcall1(LHCALL_FLUSH_TLB, 1);
 }
 
-void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-	native_pte_clear(mm, addr, ptep);
-	lguest_pte_update(mm, addr, ptep);
-}
-
-void lguest_pmd_clear(pmd_t *pmdp)
-{
-	lguest_set_pmd(pmdp, __pmd(0));
-}
-#endif
-
 /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
  * native page table operations.  On native hardware you can set a new page
  * table entry whenever you want, but if you want to remove one you have to do
@@ -694,12 +628,13 @@ static void __init lguest_init_IRQ(void)
 {
 	unsigned int i;
 
-	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+	for (i = 0; i < LGUEST_IRQS; i++) {
+		int vector = FIRST_EXTERNAL_VECTOR + i;
 		/* Some systems map "vectors" to interrupts weirdly.  Lguest has
 		 * a straightforward 1 to 1 mapping, so force that here. */
-		__get_cpu_var(vector_irq)[i] = i - FIRST_EXTERNAL_VECTOR;
-		if (i != SYSCALL_VECTOR)
-			set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+		__get_cpu_var(vector_irq)[vector] = i;
+		if (vector != SYSCALL_VECTOR)
+			set_intr_gate(vector, interrupt[i]);
 	}
 	/* This call is required to set up for 4k stacks, where we have
 	 * separate stacks for hard and soft interrupts. */
@@ -1038,10 +973,10 @@ static void lguest_restart(char *reason)
  *
  * Our current solution is to allow the paravirt back end to optionally patch
  * over the indirect calls to replace them with something more efficient.  We
- * patch two of the simplest of the most commonly called functions: disable
- * interrupts and save interrupts.  We usually have 6 or 10 bytes to patch
- * into: the Guest versions of these operations are small enough that we can
- * fit comfortably.
+ * patch the four most commonly called functions: disable interrupts, enable
+ * interrupts, restore interrupts and save interrupts.  We usually have 6 or 10
+ * bytes to patch into: the Guest versions of these operations are small enough
+ * that we can fit comfortably.
  *
  * First we need assembly templates of each of the patchable Guest operations,
  * and these are in i386_head.S. */
@@ -1052,6 +987,8 @@ static const struct lguest_insns
 	const char *start, *end;
 } lguest_insns[] = {
 	[PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },
+	[PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti },
+	[PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf },
 	[PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
 };
 
@@ -1089,7 +1026,6 @@ __init void lguest_init(void)
 	pv_info.name = "lguest";
 	pv_info.paravirt_enabled = 1;
 	pv_info.kernel_rpl = 1;
-	pv_info.shared_kernel_pmd = 1;
 
 	/* We set up all the lguest overrides for sensitive operations.  These
 	 * are detailed with the operations themselves. */
@@ -1097,9 +1033,9 @@ __init void lguest_init(void)
 	/* interrupt-related operations */
 	pv_irq_ops.init_IRQ = lguest_init_IRQ;
 	pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl);
-	pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl);
+	pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl);
 	pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable);
-	pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable);
+	pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable);
 	pv_irq_ops.safe_halt = lguest_safe_halt;
 
 	/* init-time operations */
@@ -1135,12 +1071,6 @@ __init void lguest_init(void)
 	pv_mmu_ops.set_pte = lguest_set_pte;
 	pv_mmu_ops.set_pte_at = lguest_set_pte_at;
 	pv_mmu_ops.set_pmd = lguest_set_pmd;
-#ifdef CONFIG_X86_PAE
-	pv_mmu_ops.set_pte_atomic = lguest_set_pte_atomic;
-	pv_mmu_ops.pte_clear = lguest_pte_clear;
-	pv_mmu_ops.pmd_clear = lguest_pmd_clear;
-	pv_mmu_ops.set_pud = lguest_set_pud;
-#endif
 	pv_mmu_ops.read_cr2 = lguest_read_cr2;
 	pv_mmu_ops.read_cr3 = lguest_read_cr3;
 	pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
diff --git a/trunk/arch/x86/lguest/i386_head.S b/trunk/arch/x86/lguest/i386_head.S
index a9c8cfe61cd4..f79541989471 100644
--- a/trunk/arch/x86/lguest/i386_head.S
+++ b/trunk/arch/x86/lguest/i386_head.S
@@ -46,64 +46,10 @@ ENTRY(lguest_entry)
 	.globl lgstart_##name; .globl lgend_##name
 
 LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
+LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled)
+LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled)
 LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
-
-/*G:033 But using those wrappers is inefficient (we'll see why that doesn't
- * matter for save_fl and irq_disable later).  If we write our routines
- * carefully in assembler, we can avoid clobbering any registers and avoid
- * jumping through the wrapper functions.
- *
- * I skipped over our first piece of assembler, but this one is worth studying
- * in a bit more detail so I'll describe in easy stages.  First, the routine
- * to enable interrupts: */
-ENTRY(lg_irq_enable)
-	/* The reverse of irq_disable, this sets lguest_data.irq_enabled to
-	 * X86_EFLAGS_IF (ie. "Interrupts enabled"). */
-	movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled
-	/* But now we need to check if the Host wants to know: there might have
-	 * been interrupts waiting to be delivered, in which case it will have
-	 * set lguest_data.irq_pending to X86_EFLAGS_IF.  If it's not zero, we
-	 * jump to send_interrupts, otherwise we're done. */
-	testl $0, lguest_data+LGUEST_DATA_irq_pending
-	jnz send_interrupts
-	/* One cool thing about x86 is that you can do many things without using
-	 * a register.  In this case, the normal path hasn't needed to save or
-	 * restore any registers at all! */
-	ret
-send_interrupts:
-	/* OK, now we need a register: eax is used for the hypercall number,
-	 * which is LHCALL_SEND_INTERRUPTS.
-	 *
-	 * We used not to bother with this pending detection at all, which was
-	 * much simpler.  Sooner or later the Host would realize it had to
-	 * send us an interrupt.  But that turns out to make performance 7
-	 * times worse on a simple tcp benchmark.  So now we do this the hard
-	 * way. */
-	pushl %eax
-	movl $LHCALL_SEND_INTERRUPTS, %eax
-	/* This is a vmcall instruction (same thing that KVM uses).  Older
-	 * assembler versions might not know the "vmcall" instruction, so we
-	 * create one manually here. */
-	.byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
-	popl %eax
-	ret
-
-/* Finally, the "popf" or "restore flags" routine.  The %eax register holds the
- * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're
- * enabling interrupts again, if it's 0 we're leaving them off. */
-ENTRY(lg_restore_fl)
-	/* This is just "lguest_data.irq_enabled = flags;" */
-	movl %eax, lguest_data+LGUEST_DATA_irq_enabled
-	/* Now, if the %eax value has enabled interrupts and
-	 * lguest_data.irq_pending is set, we want to tell the Host so it can
-	 * deliver any outstanding interrupts.  Fortunately, both values will
-	 * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl"
-	 * instruction will AND them together for us.  If both are set, we
-	 * jump to send_interrupts. */
-	testl lguest_data+LGUEST_DATA_irq_pending, %eax
-	jnz send_interrupts
-	/* Again, the normal path has used no extra registers.  Clever, huh? */
-	ret
+/*:*/
 
 /* These demark the EIP range where host should never deliver interrupts. */
 .global lguest_noirq_start
diff --git a/trunk/arch/xtensa/kernel/module.c b/trunk/arch/xtensa/kernel/module.c
index c1accea8cb56..3981a466c779 100644
--- a/trunk/arch/xtensa/kernel/module.c
+++ b/trunk/arch/xtensa/kernel/module.c
@@ -34,6 +34,8 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+	   table entries. */
 }
 
 int module_frob_arch_sections(Elf32_Ehdr *hdr,
diff --git a/trunk/drivers/block/virtio_blk.c b/trunk/drivers/block/virtio_blk.c
index 43db3ea15b54..c0facaa55cf4 100644
--- a/trunk/drivers/block/virtio_blk.c
+++ b/trunk/drivers/block/virtio_blk.c
@@ -254,7 +254,7 @@ static int index_to_minor(int index)
 	return index << PART_BITS;
 }
 
-static int __devinit virtblk_probe(struct virtio_device *vdev)
+static int virtblk_probe(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk;
 	int err;
@@ -288,7 +288,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 	sg_init_table(vblk->sg, vblk->sg_elems);
 
 	/* We expect one virtqueue, for output. */
-	vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
+	vblk->vq = vdev->config->find_vq(vdev, 0, blk_done);
 	if (IS_ERR(vblk->vq)) {
 		err = PTR_ERR(vblk->vq);
 		goto out_free_vblk;
@@ -388,14 +388,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 out_mempool:
 	mempool_destroy(vblk->pool);
 out_free_vq:
-	vdev->config->del_vqs(vdev);
+	vdev->config->del_vq(vblk->vq);
 out_free_vblk:
 	kfree(vblk);
 out:
 	return err;
 }
 
-static void __devexit virtblk_remove(struct virtio_device *vdev)
+static void virtblk_remove(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk = vdev->priv;
 
@@ -409,7 +409,7 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
 	blk_cleanup_queue(vblk->disk->queue);
 	put_disk(vblk->disk);
 	mempool_destroy(vblk->pool);
-	vdev->config->del_vqs(vdev);
+	vdev->config->del_vq(vblk->vq);
 	kfree(vblk);
 }
 
diff --git a/trunk/drivers/char/hw_random/virtio-rng.c b/trunk/drivers/char/hw_random/virtio-rng.c
index 32216b623248..86e83f883139 100644
--- a/trunk/drivers/char/hw_random/virtio-rng.c
+++ b/trunk/drivers/char/hw_random/virtio-rng.c
@@ -35,13 +35,13 @@ static DECLARE_COMPLETION(have_data);
 
 static void random_recv_done(struct virtqueue *vq)
 {
-	unsigned int len;
+	int len;
 
 	/* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */
 	if (!vq->vq_ops->get_buf(vq, &len))
 		return;
 
-	data_left += len;
+	data_left = len / sizeof(random_data[0]);
 	complete(&have_data);
 }
 
@@ -49,7 +49,7 @@ static void register_buffer(void)
 {
 	struct scatterlist sg;
 
-	sg_init_one(&sg, random_data+data_left, RANDOM_DATA_SIZE-data_left);
+	sg_init_one(&sg, random_data, RANDOM_DATA_SIZE);
 	/* There should always be room for one buffer. */
 	if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) != 0)
 		BUG();
@@ -59,32 +59,24 @@ static void register_buffer(void)
 /* At least we don't udelay() in a loop like some other drivers. */
 static int virtio_data_present(struct hwrng *rng, int wait)
 {
-	if (data_left >= sizeof(u32))
+	if (data_left)
 		return 1;
 
-again:
 	if (!wait)
 		return 0;
 
 	wait_for_completion(&have_data);
-
-	/* Not enough?  Re-register. */
-	if (unlikely(data_left < sizeof(u32))) {
-		register_buffer();
-		goto again;
-	}
-
 	return 1;
 }
 
 /* virtio_data_present() must have succeeded before this is called. */
 static int virtio_data_read(struct hwrng *rng, u32 *data)
 {
-	BUG_ON(data_left < sizeof(u32));
-	data_left -= sizeof(u32);
-	*data = random_data[data_left / 4];
+	BUG_ON(!data_left);
+
+	*data = random_data[--data_left];
 
-	if (data_left < sizeof(u32)) {
+	if (!data_left) {
 		init_completion(&have_data);
 		register_buffer();
 	}
@@ -102,13 +94,13 @@ static int virtrng_probe(struct virtio_device *vdev)
 	int err;
 
 	/* We expect a single virtqueue. */
-	vq = virtio_find_single_vq(vdev, random_recv_done, "input");
+	vq = vdev->config->find_vq(vdev, 0, random_recv_done);
 	if (IS_ERR(vq))
 		return PTR_ERR(vq);
 
 	err = hwrng_register(&virtio_hwrng);
 	if (err) {
-		vdev->config->del_vqs(vdev);
+		vdev->config->del_vq(vq);
 		return err;
 	}
 
@@ -120,7 +112,7 @@ static void virtrng_remove(struct virtio_device *vdev)
 {
 	vdev->config->reset(vdev);
 	hwrng_unregister(&virtio_hwrng);
-	vdev->config->del_vqs(vdev);
+	vdev->config->del_vq(vq);
 }
 
 static struct virtio_device_id id_table[] = {
diff --git a/trunk/drivers/char/virtio_console.c b/trunk/drivers/char/virtio_console.c
index c74dacfa6795..ff6f5a4b58fb 100644
--- a/trunk/drivers/char/virtio_console.c
+++ b/trunk/drivers/char/virtio_console.c
@@ -188,9 +188,6 @@ static void hvc_handle_input(struct virtqueue *vq)
  * Finally we put our input buffer in the input queue, ready to receive. */
 static int __devinit virtcons_probe(struct virtio_device *dev)
 {
-	vq_callback_t *callbacks[] = { hvc_handle_input, NULL};
-	const char *names[] = { "input", "output" };
-	struct virtqueue *vqs[2];
 	int err;
 
 	vdev = dev;
@@ -202,15 +199,20 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
 		goto fail;
 	}
 
-	/* Find the queues. */
+	/* Find the input queue. */
 	/* FIXME: This is why we want to wean off hvc: we do nothing
 	 * when input comes in. */
-	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
-	if (err)
+	in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input);
+	if (IS_ERR(in_vq)) {
+		err = PTR_ERR(in_vq);
 		goto free;
+	}
 
-	in_vq = vqs[0];
-	out_vq = vqs[1];
+	out_vq = vdev->config->find_vq(vdev, 1, NULL);
+	if (IS_ERR(out_vq)) {
+		err = PTR_ERR(out_vq);
+		goto free_in_vq;
+	}
 
 	/* Start using the new console output. */
 	virtio_cons.get_chars = get_chars;
@@ -231,15 +233,17 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
 	hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE);
 	if (IS_ERR(hvc)) {
 		err = PTR_ERR(hvc);
-		goto free_vqs;
+		goto free_out_vq;
 	}
 
 	/* Register the input buffer the first time. */
 	add_inbuf();
 	return 0;
 
-free_vqs:
-	vdev->config->del_vqs(vdev);
+free_out_vq:
+	vdev->config->del_vq(out_vq);
+free_in_vq:
+	vdev->config->del_vq(in_vq);
 free:
 	kfree(inbuf);
 fail:
diff --git a/trunk/drivers/ide/at91_ide.c b/trunk/drivers/ide/at91_ide.c
index fc0949a8cfde..403d0e4265db 100644
--- a/trunk/drivers/ide/at91_ide.c
+++ b/trunk/drivers/ide/at91_ide.c
@@ -216,7 +216,6 @@ static const struct ide_port_info at91_ide_port_info __initdata = {
 	.host_flags 	= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA | IDE_HFLAG_SINGLE |
 			  IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_UNMASK_IRQS,
 	.pio_mask 	= ATA_PIO6,
-	.chipset	= ide_generic,
 };
 
 /*
@@ -247,7 +246,8 @@ irqreturn_t at91_irq_handler(int irq, void *dev_id)
 static int __init at91_ide_probe(struct platform_device *pdev)
 {
 	int ret;
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw;
+	hw_regs_t *hws[] = { &hw, NULL, NULL, NULL };
 	struct ide_host *host;
 	struct resource *res;
 	unsigned long tf_base = 0, ctl_base = 0;
@@ -304,9 +304,10 @@ static int __init at91_ide_probe(struct platform_device *pdev)
 		ide_std_init_ports(&hw, tf_base, ctl_base + 6);
 
 	hw.irq = board->irq_pin;
+	hw.chipset = ide_generic;
 	hw.dev = &pdev->dev;
 
-	host = ide_host_alloc(&at91_ide_port_info, hws, 1);
+	host = ide_host_alloc(&at91_ide_port_info, hws);
 	if (!host) {
 		perr("failed to allocate ide host\n");
 		return -ENOMEM;
diff --git a/trunk/drivers/ide/au1xxx-ide.c b/trunk/drivers/ide/au1xxx-ide.c
index 58121bd6c115..46013644c965 100644
--- a/trunk/drivers/ide/au1xxx-ide.c
+++ b/trunk/drivers/ide/au1xxx-ide.c
@@ -449,7 +449,7 @@ static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d)
 }
 #endif
 
-static void auide_setup_ports(struct ide_hw *hw, _auide_hwif *ahwif)
+static void auide_setup_ports(hw_regs_t *hw, _auide_hwif *ahwif)
 {
 	int i;
 	unsigned long *ata_regs = hw->io_ports_array;
@@ -499,7 +499,6 @@ static const struct ide_port_info au1xxx_port_info = {
 #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
 	.mwdma_mask		= ATA_MWDMA2,
 #endif
-	.chipset		= ide_au1xxx,
 };
 
 static int au_ide_probe(struct platform_device *dev)
@@ -508,7 +507,7 @@ static int au_ide_probe(struct platform_device *dev)
 	struct resource *res;
 	struct ide_host *host;
 	int ret = 0;
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 
 #if defined(CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA)
 	char *mode = "MWDMA2";
@@ -549,8 +548,9 @@ static int au_ide_probe(struct platform_device *dev)
 	auide_setup_ports(&hw, ahwif);
 	hw.irq = ahwif->irq;
 	hw.dev = &dev->dev;
+	hw.chipset = ide_au1xxx;
 
-	ret = ide_host_add(&au1xxx_port_info, hws, 1, &host);
+	ret = ide_host_add(&au1xxx_port_info, hws, &host);
 	if (ret)
 		goto out;
 
diff --git a/trunk/drivers/ide/buddha.c b/trunk/drivers/ide/buddha.c
index e3c6a5913305..d028f8864bc1 100644
--- a/trunk/drivers/ide/buddha.c
+++ b/trunk/drivers/ide/buddha.c
@@ -121,7 +121,7 @@ static int xsurf_ack_intr(ide_hwif_t *hwif)
     return 1;
 }
 
-static void __init buddha_setup_ports(struct ide_hw *hw, unsigned long base,
+static void __init buddha_setup_ports(hw_regs_t *hw, unsigned long base,
 				      unsigned long ctl, unsigned long irq_port,
 				      ide_ack_intr_t *ack_intr)
 {
@@ -139,12 +139,13 @@ static void __init buddha_setup_ports(struct ide_hw *hw, unsigned long base,
 
 	hw->irq = IRQ_AMIGA_PORTS;
 	hw->ack_intr = ack_intr;
+
+	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info buddha_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
-	.chipset		= ide_generic,
 };
 
     /*
@@ -160,7 +161,7 @@ static int __init buddha_init(void)
 
 	while ((z = zorro_find_device(ZORRO_WILDCARD, z))) {
 		unsigned long board;
-		struct ide_hw hw[MAX_NUM_HWIFS], *hws[MAX_NUM_HWIFS];
+		hw_regs_t hw[MAX_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL };
 
 		if (z->id == ZORRO_PROD_INDIVIDUAL_COMPUTERS_BUDDHA) {
 			buddha_num_hwifs = BUDDHA_NUM_HWIFS;
@@ -224,7 +225,7 @@ static int __init buddha_init(void)
 			hws[i] = &hw[i];
 		}
 
-		ide_host_add(&buddha_port_info, hws, i, NULL);
+		ide_host_add(&buddha_port_info, hws, NULL);
 	}
 
 	return 0;
diff --git a/trunk/drivers/ide/cmd640.c b/trunk/drivers/ide/cmd640.c
index 1683ed5c7329..8890276fef7f 100644
--- a/trunk/drivers/ide/cmd640.c
+++ b/trunk/drivers/ide/cmd640.c
@@ -708,7 +708,7 @@ static int __init cmd640x_init(void)
 	int second_port_cmd640 = 0, rc;
 	const char *bus_type, *port2;
 	u8 b, cfr;
-	struct ide_hw hw[2], *hws[2];
+	hw_regs_t hw[2], *hws[] = { NULL, NULL, NULL, NULL };
 
 	if (cmd640_vlb && probe_for_cmd640_vlb()) {
 		bus_type = "VLB";
@@ -762,9 +762,11 @@ static int __init cmd640x_init(void)
 
 	ide_std_init_ports(&hw[0], 0x1f0, 0x3f6);
 	hw[0].irq = 14;
+	hw[0].chipset = ide_cmd640;
 
 	ide_std_init_ports(&hw[1], 0x170, 0x376);
 	hw[1].irq = 15;
+	hw[1].chipset = ide_cmd640;
 
 	printk(KERN_INFO "cmd640: buggy cmd640%c interface on %s, config=0x%02x"
 			 "\n", 'a' + cmd640_chip_version - 1, bus_type, cfr);
@@ -822,8 +824,7 @@ static int __init cmd640x_init(void)
 	cmd640_dump_regs();
 #endif
 
-	return ide_host_add(&cmd640_port_info, hws, second_port_cmd640 ? 2 : 1,
-			    NULL);
+	return ide_host_add(&cmd640_port_info, hws, NULL);
 }
 
 module_param_named(probe_vlb, cmd640_vlb, bool, 0);
diff --git a/trunk/drivers/ide/cs5520.c b/trunk/drivers/ide/cs5520.c
index bd066bb9d611..87987a7d36c9 100644
--- a/trunk/drivers/ide/cs5520.c
+++ b/trunk/drivers/ide/cs5520.c
@@ -110,7 +110,7 @@ static const struct ide_port_info cyrix_chipset __devinitdata = {
 static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	const struct ide_port_info *d = &cyrix_chipset;
-	struct ide_hw hw[2], *hws[] = { NULL, NULL };
+	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
 
 	ide_setup_pci_noise(dev, d);
 
@@ -136,7 +136,7 @@ static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_devic
 	ide_pci_setup_ports(dev, d, &hw[0], &hws[0]);
 	hw[0].irq = 14;
 
-	return ide_host_add(d, hws, 2, NULL);
+	return ide_host_add(d, hws, NULL);
 }
 
 static const struct pci_device_id cs5520_pci_tbl[] = {
diff --git a/trunk/drivers/ide/delkin_cb.c b/trunk/drivers/ide/delkin_cb.c
index 1e10eba62ceb..f153b95619bb 100644
--- a/trunk/drivers/ide/delkin_cb.c
+++ b/trunk/drivers/ide/delkin_cb.c
@@ -68,7 +68,6 @@ static const struct ide_port_info delkin_cb_port_info = {
 				  IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
 	.init_chipset		= delkin_cb_init_chipset,
-	.chipset		= ide_pci,
 };
 
 static int __devinit
@@ -77,7 +76,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 	struct ide_host *host;
 	unsigned long base;
 	int rc;
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 
 	rc = pci_enable_device(dev);
 	if (rc) {
@@ -98,8 +97,9 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 	ide_std_init_ports(&hw, base + 0x10, base + 0x1e);
 	hw.irq = dev->irq;
 	hw.dev = &dev->dev;
+	hw.chipset = ide_pci;		/* this enables IRQ sharing */
 
-	rc = ide_host_add(&delkin_cb_port_info, hws, 1, &host);
+	rc = ide_host_add(&delkin_cb_port_info, hws, &host);
 	if (rc)
 		goto out_disable;
 
diff --git a/trunk/drivers/ide/falconide.c b/trunk/drivers/ide/falconide.c
index 22fa27389c3b..0e2df6755ec9 100644
--- a/trunk/drivers/ide/falconide.c
+++ b/trunk/drivers/ide/falconide.c
@@ -111,10 +111,9 @@ static const struct ide_port_info falconide_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE |
 				  IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
-	.chipset		= ide_generic,
 };
 
-static void __init falconide_setup_ports(struct ide_hw *hw)
+static void __init falconide_setup_ports(hw_regs_t *hw)
 {
 	int i;
 
@@ -129,6 +128,8 @@ static void __init falconide_setup_ports(struct ide_hw *hw)
 
 	hw->irq = IRQ_MFP_IDE;
 	hw->ack_intr = NULL;
+
+	hw->chipset = ide_generic;
 }
 
     /*
@@ -138,7 +139,7 @@ static void __init falconide_setup_ports(struct ide_hw *hw)
 static int __init falconide_init(void)
 {
 	struct ide_host *host;
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 	int rc;
 
 	if (!MACH_IS_ATARI || !ATARIHW_PRESENT(IDE))
@@ -153,7 +154,7 @@ static int __init falconide_init(void)
 
 	falconide_setup_ports(&hw);
 
-	host = ide_host_alloc(&falconide_port_info, hws, 1);
+	host = ide_host_alloc(&falconide_port_info, hws);
 	if (host == NULL) {
 		rc = -ENOMEM;
 		goto err;
diff --git a/trunk/drivers/ide/gayle.c b/trunk/drivers/ide/gayle.c
index 4451a6a5dfe0..c7119516c5a7 100644
--- a/trunk/drivers/ide/gayle.c
+++ b/trunk/drivers/ide/gayle.c
@@ -88,7 +88,7 @@ static int gayle_ack_intr_a1200(ide_hwif_t *hwif)
     return 1;
 }
 
-static void __init gayle_setup_ports(struct ide_hw *hw, unsigned long base,
+static void __init gayle_setup_ports(hw_regs_t *hw, unsigned long base,
 				     unsigned long ctl, unsigned long irq_port,
 				     ide_ack_intr_t *ack_intr)
 {
@@ -106,13 +106,14 @@ static void __init gayle_setup_ports(struct ide_hw *hw, unsigned long base,
 
 	hw->irq = IRQ_AMIGA_PORTS;
 	hw->ack_intr = ack_intr;
+
+	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info gayle_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE |
 				  IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
-	.chipset		= ide_generic,
 };
 
     /*
@@ -125,7 +126,7 @@ static int __init gayle_init(void)
     unsigned long base, ctrlport, irqport;
     ide_ack_intr_t *ack_intr;
     int a4000, i, rc;
-    struct ide_hw hw[GAYLE_NUM_HWIFS], *hws[GAYLE_NUM_HWIFS];
+    hw_regs_t hw[GAYLE_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL };
 
     if (!MACH_IS_AMIGA)
 	return -ENODEV;
@@ -170,7 +171,7 @@ static int __init gayle_init(void)
 	hws[i] = &hw[i];
     }
 
-    rc = ide_host_add(&gayle_port_info, hws, i, NULL);
+    rc = ide_host_add(&gayle_port_info, hws, NULL);
     if (rc)
 	release_mem_region(res_start, res_n);
 
diff --git a/trunk/drivers/ide/hpt366.c b/trunk/drivers/ide/hpt366.c
index 7ce68ef6b904..0feb66c720e1 100644
--- a/trunk/drivers/ide/hpt366.c
+++ b/trunk/drivers/ide/hpt366.c
@@ -138,6 +138,14 @@
 #undef	HPT_RESET_STATE_ENGINE
 #undef	HPT_DELAY_INTERRUPT
 
+static const char *quirk_drives[] = {
+	"QUANTUM FIREBALLlct08 08",
+	"QUANTUM FIREBALLP KA6.4",
+	"QUANTUM FIREBALLP LM20.4",
+	"QUANTUM FIREBALLP LM20.5",
+	NULL
+};
+
 static const char *bad_ata100_5[] = {
 	"IBM-DTLA-307075",
 	"IBM-DTLA-307060",
@@ -721,13 +729,27 @@ static void hpt3xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	hpt3xx_set_mode(drive, XFER_PIO_0 + pio);
 }
 
+static void hpt3xx_quirkproc(ide_drive_t *drive)
+{
+	char *m			= (char *)&drive->id[ATA_ID_PROD];
+	const  char **list	= quirk_drives;
+
+	while (*list)
+		if (strstr(m, *list++)) {
+			drive->quirk_list = 1;
+			return;
+		}
+
+	drive->quirk_list = 0;
+}
+
 static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
 {
 	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev	*dev	= to_pci_dev(hwif->dev);
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 
-	if ((drive->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0)
+	if (drive->quirk_list == 0)
 		return;
 
 	if (info->chip_type >= HPT370) {
@@ -1382,6 +1404,7 @@ static int __devinit hpt36x_init(struct pci_dev *dev, struct pci_dev *dev2)
 static const struct ide_port_ops hpt3xx_port_ops = {
 	.set_pio_mode		= hpt3xx_set_pio_mode,
 	.set_dma_mode		= hpt3xx_set_mode,
+	.quirkproc		= hpt3xx_quirkproc,
 	.maskproc		= hpt3xx_maskproc,
 	.mdma_filter		= hpt3xx_mdma_filter,
 	.udma_filter		= hpt3xx_udma_filter,
diff --git a/trunk/drivers/ide/icside.c b/trunk/drivers/ide/icside.c
index 5af3d0ffaf0a..36da913cc553 100644
--- a/trunk/drivers/ide/icside.c
+++ b/trunk/drivers/ide/icside.c
@@ -65,6 +65,8 @@ static struct cardinfo icside_cardinfo_v6_2 = {
 };
 
 struct icside_state {
+	unsigned int channel;
+	unsigned int enabled;
 	void __iomem *irq_port;
 	void __iomem *ioc_base;
 	unsigned int sel;
@@ -114,11 +116,18 @@ static void icside_irqenable_arcin_v6 (struct expansion_card *ec, int irqnr)
 	struct icside_state *state = ec->irq_data;
 	void __iomem *base = state->irq_port;
 
-	writeb(0, base + ICS_ARCIN_V6_INTROFFSET_1);
-	readb(base + ICS_ARCIN_V6_INTROFFSET_2);
+	state->enabled = 1;
 
-	writeb(0, base + ICS_ARCIN_V6_INTROFFSET_2);
-	readb(base + ICS_ARCIN_V6_INTROFFSET_1);
+	switch (state->channel) {
+	case 0:
+		writeb(0, base + ICS_ARCIN_V6_INTROFFSET_1);
+		readb(base + ICS_ARCIN_V6_INTROFFSET_2);
+		break;
+	case 1:
+		writeb(0, base + ICS_ARCIN_V6_INTROFFSET_2);
+		readb(base + ICS_ARCIN_V6_INTROFFSET_1);
+		break;
+	}
 }
 
 /* Prototype: icside_irqdisable_arcin_v6 (struct expansion_card *ec, int irqnr)
@@ -128,6 +137,8 @@ static void icside_irqdisable_arcin_v6 (struct expansion_card *ec, int irqnr)
 {
 	struct icside_state *state = ec->irq_data;
 
+	state->enabled = 0;
+
 	readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
 	readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
 }
@@ -149,6 +160,44 @@ static const expansioncard_ops_t icside_ops_arcin_v6 = {
 	.irqpending	= icside_irqpending_arcin_v6,
 };
 
+/*
+ * Handle routing of interrupts.  This is called before
+ * we write the command to the drive.
+ */
+static void icside_maskproc(ide_drive_t *drive, int mask)
+{
+	ide_hwif_t *hwif = drive->hwif;
+	struct expansion_card *ec = ECARD_DEV(hwif->dev);
+	struct icside_state *state = ecard_get_drvdata(ec);
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	state->channel = hwif->channel;
+
+	if (state->enabled && !mask) {
+		switch (hwif->channel) {
+		case 0:
+			writeb(0, state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
+			readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
+			break;
+		case 1:
+			writeb(0, state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
+			readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
+			break;
+		}
+	} else {
+		readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
+		readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
+	}
+
+	local_irq_restore(flags);
+}
+
+static const struct ide_port_ops icside_v6_no_dma_port_ops = {
+	.maskproc		= icside_maskproc,
+};
+
 #ifdef CONFIG_BLK_DEV_IDEDMA_ICS
 /*
  * SG-DMA support.
@@ -226,6 +275,7 @@ static void icside_set_dma_mode(ide_drive_t *drive, const u8 xfer_mode)
 
 static const struct ide_port_ops icside_v6_port_ops = {
 	.set_dma_mode		= icside_set_dma_mode,
+	.maskproc		= icside_maskproc,
 };
 
 static void icside_dma_host_set(ide_drive_t *drive, int on)
@@ -269,6 +319,11 @@ static int icside_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd)
 	 */
 	BUG_ON(dma_channel_active(ec->dma));
 
+	/*
+	 * Ensure that we have the right interrupt routed.
+	 */
+	icside_maskproc(drive, 0);
+
 	/*
 	 * Route the DMA signals to the correct interface.
 	 */
@@ -326,7 +381,7 @@ static int icside_dma_off_init(ide_hwif_t *hwif, const struct ide_port_info *d)
 	return -EOPNOTSUPP;
 }
 
-static void icside_setup_ports(struct ide_hw *hw, void __iomem *base,
+static void icside_setup_ports(hw_regs_t *hw, void __iomem *base,
 			       struct cardinfo *info, struct expansion_card *ec)
 {
 	unsigned long port = (unsigned long)base + info->dataoffset;
@@ -343,11 +398,11 @@ static void icside_setup_ports(struct ide_hw *hw, void __iomem *base,
 
 	hw->irq = ec->irq;
 	hw->dev = &ec->dev;
+	hw->chipset = ide_acorn;
 }
 
 static const struct ide_port_info icside_v5_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
-	.chipset		= ide_acorn,
 };
 
 static int __devinit
@@ -355,7 +410,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec)
 {
 	void __iomem *base;
 	struct ide_host *host;
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 	int ret;
 
 	base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0);
@@ -376,7 +431,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec)
 
 	icside_setup_ports(&hw, base, &icside_cardinfo_v5, ec);
 
-	host = ide_host_alloc(&icside_v5_port_info, hws, 1);
+	host = ide_host_alloc(&icside_v5_port_info, hws);
 	if (host == NULL)
 		return -ENODEV;
 
@@ -397,11 +452,11 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec)
 
 static const struct ide_port_info icside_v6_port_info __initdata = {
 	.init_dma		= icside_dma_off_init,
+	.port_ops		= &icside_v6_no_dma_port_ops,
 	.dma_ops		= &icside_v6_dma_ops,
 	.host_flags		= IDE_HFLAG_SERIALIZE | IDE_HFLAG_MMIO,
 	.mwdma_mask		= ATA_MWDMA2,
 	.swdma_mask		= ATA_SWDMA2,
-	.chipset		= ide_acorn,
 };
 
 static int __devinit
@@ -411,7 +466,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec)
 	struct ide_host *host;
 	unsigned int sel = 0;
 	int ret;
-	struct ide_hw hw[2], *hws[] = { &hw[0], &hw[1] };
+	hw_regs_t hw[2], *hws[] = { &hw[0], &hw[1], NULL, NULL };
 	struct ide_port_info d = icside_v6_port_info;
 
 	ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0);
@@ -451,7 +506,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec)
 	icside_setup_ports(&hw[0], easi_base, &icside_cardinfo_v6_1, ec);
 	icside_setup_ports(&hw[1], easi_base, &icside_cardinfo_v6_2, ec);
 
-	host = ide_host_alloc(&d, hws, 2);
+	host = ide_host_alloc(&d, hws);
 	if (host == NULL)
 		return -ENODEV;
 
diff --git a/trunk/drivers/ide/ide-4drives.c b/trunk/drivers/ide/ide-4drives.c
index 979d342c338a..78aca75a2c48 100644
--- a/trunk/drivers/ide/ide-4drives.c
+++ b/trunk/drivers/ide/ide-4drives.c
@@ -25,13 +25,12 @@ static const struct ide_port_info ide_4drives_port_info = {
 	.port_ops		= &ide_4drives_port_ops,
 	.host_flags		= IDE_HFLAG_SERIALIZE | IDE_HFLAG_NO_DMA |
 				  IDE_HFLAG_4DRIVES,
-	.chipset		= ide_4drives,
 };
 
 static int __init ide_4drives_init(void)
 {
 	unsigned long base = 0x1f0, ctl = 0x3f6;
-	struct ide_hw hw, *hws[] = { &hw, &hw };
+	hw_regs_t hw, *hws[] = { &hw, &hw, NULL, NULL };
 
 	if (probe_4drives == 0)
 		return -ENODEV;
@@ -53,8 +52,9 @@ static int __init ide_4drives_init(void)
 
 	ide_std_init_ports(&hw, base, ctl);
 	hw.irq = 14;
+	hw.chipset = ide_4drives;
 
-	return ide_host_add(&ide_4drives_port_info, hws, 2, NULL);
+	return ide_host_add(&ide_4drives_port_info, hws, NULL);
 }
 
 module_init(ide_4drives_init);
diff --git a/trunk/drivers/ide/ide-atapi.c b/trunk/drivers/ide/ide-atapi.c
index bbdd2547f12a..757e5956b132 100644
--- a/trunk/drivers/ide/ide-atapi.c
+++ b/trunk/drivers/ide/ide-atapi.c
@@ -259,7 +259,7 @@ void ide_retry_pc(ide_drive_t *drive)
 	pc->req_xfer = blk_rq_bytes(sense_rq);
 
 	if (drive->media == ide_tape)
-		drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
+		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
 
 	/*
 	 * Push back the failed request and put request sense on top
diff --git a/trunk/drivers/ide/ide-cs.c b/trunk/drivers/ide/ide-cs.c
index 527908ff298c..9e47f3529d55 100644
--- a/trunk/drivers/ide/ide-cs.c
+++ b/trunk/drivers/ide/ide-cs.c
@@ -155,7 +155,6 @@ static const struct ide_port_info idecs_port_info = {
 	.port_ops		= &idecs_port_ops,
 	.host_flags		= IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
-	.chipset		= ide_pci,
 };
 
 static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
@@ -164,7 +163,7 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
     struct ide_host *host;
     ide_hwif_t *hwif;
     int i, rc;
-    struct ide_hw hw, *hws[] = { &hw };
+    hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 
     if (!request_region(io, 8, DRV_NAME)) {
 	printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n",
@@ -182,9 +181,10 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
     memset(&hw, 0, sizeof(hw));
     ide_std_init_ports(&hw, io, ctl);
     hw.irq = irq;
+    hw.chipset = ide_pci;
     hw.dev = &handle->dev;
 
-    rc = ide_host_add(&idecs_port_info, hws, 1, &host);
+    rc = ide_host_add(&idecs_port_info, hws, &host);
     if (rc)
 	goto out_release;
 
diff --git a/trunk/drivers/ide/ide-disk.c b/trunk/drivers/ide/ide-disk.c
index 6a1de2169709..c6f7fcfb9d67 100644
--- a/trunk/drivers/ide/ide-disk.c
+++ b/trunk/drivers/ide/ide-disk.c
@@ -302,12 +302,14 @@ static const struct drive_list_entry hpa_list[] = {
 	{ NULL,		NULL }
 };
 
-static u64 ide_disk_hpa_get_native_capacity(ide_drive_t *drive, int lba48)
+static void idedisk_check_hpa(ide_drive_t *drive)
 {
-	u64 capacity, set_max;
+	unsigned long long capacity, set_max;
+	int lba48 = ata_id_lba48_enabled(drive->id);
 
 	capacity = drive->capacity64;
-	set_max  = idedisk_read_native_max_address(drive, lba48);
+
+	set_max = idedisk_read_native_max_address(drive, lba48);
 
 	if (ide_in_drive_list(drive->id, hpa_list)) {
 		/*
@@ -318,31 +320,9 @@ static u64 ide_disk_hpa_get_native_capacity(ide_drive_t *drive, int lba48)
 			set_max--;
 	}
 
-	return set_max;
-}
-
-static u64 ide_disk_hpa_set_capacity(ide_drive_t *drive, u64 set_max, int lba48)
-{
-	set_max = idedisk_set_max_address(drive, set_max, lba48);
-	if (set_max)
-		drive->capacity64 = set_max;
-
-	return set_max;
-}
-
-static void idedisk_check_hpa(ide_drive_t *drive)
-{
-	u64 capacity, set_max;
-	int lba48 = ata_id_lba48_enabled(drive->id);
-
-	capacity = drive->capacity64;
-	set_max  = ide_disk_hpa_get_native_capacity(drive, lba48);
-
 	if (set_max <= capacity)
 		return;
 
-	drive->probed_capacity = set_max;
-
 	printk(KERN_INFO "%s: Host Protected Area detected.\n"
 			 "\tcurrent capacity is %llu sectors (%llu MB)\n"
 			 "\tnative  capacity is %llu sectors (%llu MB)\n",
@@ -350,13 +330,13 @@ static void idedisk_check_hpa(ide_drive_t *drive)
 			 capacity, sectors_to_MB(capacity),
 			 set_max, sectors_to_MB(set_max));
 
-	if ((drive->dev_flags & IDE_DFLAG_NOHPA) == 0)
-		return;
+	set_max = idedisk_set_max_address(drive, set_max, lba48);
 
-	set_max = ide_disk_hpa_set_capacity(drive, set_max, lba48);
-	if (set_max)
+	if (set_max) {
+		drive->capacity64 = set_max;
 		printk(KERN_INFO "%s: Host Protected Area disabled.\n",
 				 drive->name);
+	}
 }
 
 static int ide_disk_get_capacity(ide_drive_t *drive)
@@ -378,8 +358,6 @@ static int ide_disk_get_capacity(ide_drive_t *drive)
 		drive->capacity64 = drive->cyl * drive->head * drive->sect;
 	}
 
-	drive->probed_capacity = drive->capacity64;
-
 	if (lba) {
 		drive->dev_flags |= IDE_DFLAG_LBA;
 
@@ -398,7 +376,7 @@ static int ide_disk_get_capacity(ide_drive_t *drive)
 		       "%llu sectors (%llu MB)\n",
 		       drive->name, (unsigned long long)drive->capacity64,
 		       sectors_to_MB(drive->capacity64));
-		drive->probed_capacity = drive->capacity64 = 1ULL << 28;
+		drive->capacity64 = 1ULL << 28;
 	}
 
 	if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
@@ -414,34 +392,6 @@ static int ide_disk_get_capacity(ide_drive_t *drive)
 	return 0;
 }
 
-static u64 ide_disk_set_capacity(ide_drive_t *drive, u64 capacity)
-{
-	u64 set = min(capacity, drive->probed_capacity);
-	u16 *id = drive->id;
-	int lba48 = ata_id_lba48_enabled(id);
-
-	if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 ||
-	    ata_id_hpa_enabled(id) == 0)
-		goto out;
-
-	/*
-	 * according to the spec the SET MAX ADDRESS command shall be
-	 * immediately preceded by a READ NATIVE MAX ADDRESS command
-	 */
-	capacity = ide_disk_hpa_get_native_capacity(drive, lba48);
-	if (capacity == 0)
-		goto out;
-
-	set = ide_disk_hpa_set_capacity(drive, set, lba48);
-	if (set) {
-		/* needed for ->resume to disable HPA */
-		drive->dev_flags |= IDE_DFLAG_NOHPA;
-		return set;
-	}
-out:
-	return drive->capacity64;
-}
-
 static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
 {
 	ide_drive_t *drive = q->queuedata;
@@ -478,14 +428,14 @@ static int set_multcount(ide_drive_t *drive, int arg)
 	if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff))
 		return -EINVAL;
 
-	if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
+	if (drive->special.b.set_multmode)
 		return -EBUSY;
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 
 	drive->mult_req = arg;
-	drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
+	drive->special.b.set_multmode = 1;
 	error = blk_execute_rq(drive->queue, NULL, rq, 0);
 	blk_put_request(rq);
 
@@ -790,7 +740,6 @@ static int ide_disk_set_doorlock(ide_drive_t *drive, struct gendisk *disk,
 
 const struct ide_disk_ops ide_ata_disk_ops = {
 	.check		= ide_disk_check,
-	.set_capacity	= ide_disk_set_capacity,
 	.get_capacity	= ide_disk_get_capacity,
 	.setup		= ide_disk_setup,
 	.flush		= ide_disk_flush,
diff --git a/trunk/drivers/ide/ide-dma.c b/trunk/drivers/ide/ide-dma.c
index 219e6fb78dc6..001f68f0bb28 100644
--- a/trunk/drivers/ide/ide-dma.c
+++ b/trunk/drivers/ide/ide-dma.c
@@ -347,6 +347,7 @@ u8 ide_find_dma_mode(ide_drive_t *drive, u8 req_mode)
 
 	return mode;
 }
+EXPORT_SYMBOL_GPL(ide_find_dma_mode);
 
 static int ide_tune_dma(ide_drive_t *drive)
 {
diff --git a/trunk/drivers/ide/ide-eh.c b/trunk/drivers/ide/ide-eh.c
index 2b9141979613..5d5fb961b5ce 100644
--- a/trunk/drivers/ide/ide-eh.c
+++ b/trunk/drivers/ide/ide-eh.c
@@ -52,7 +52,7 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq,
 	}
 
 	if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
-		drive->special_flags |= IDE_SFLAG_RECALIBRATE;
+		drive->special.b.recalibrate = 1;
 
 	++rq->errors;
 
@@ -268,8 +268,9 @@ static void ide_disk_pre_reset(ide_drive_t *drive)
 {
 	int legacy = (drive->id[ATA_ID_CFS_ENABLE_2] & 0x0400) ? 0 : 1;
 
-	drive->special_flags =
-		legacy ? (IDE_SFLAG_SET_GEOMETRY | IDE_SFLAG_RECALIBRATE) : 0;
+	drive->special.all = 0;
+	drive->special.b.set_geometry = legacy;
+	drive->special.b.recalibrate  = legacy;
 
 	drive->mult_count = 0;
 	drive->dev_flags &= ~IDE_DFLAG_PARKED;
@@ -279,7 +280,7 @@ static void ide_disk_pre_reset(ide_drive_t *drive)
 		drive->mult_req = 0;
 
 	if (drive->mult_req != drive->mult_count)
-		drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
+		drive->special.b.set_multmode = 1;
 }
 
 static void pre_reset(ide_drive_t *drive)
@@ -407,9 +408,8 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi)
 	/* more than enough time */
 	udelay(10);
 	/* clear SRST, leave nIEN (unless device is on the quirk list) */
-	tp_ops->write_devctl(hwif,
-		((drive->dev_flags & IDE_DFLAG_NIEN_QUIRK) ? 0 : ATA_NIEN) |
-		 ATA_DEVCTL_OBS);
+	tp_ops->write_devctl(hwif, (drive->quirk_list == 2 ? 0 : ATA_NIEN) |
+			     ATA_DEVCTL_OBS);
 	/* more than enough time */
 	udelay(10);
 	hwif->poll_timeout = jiffies + WAIT_WORSTCASE;
diff --git a/trunk/drivers/ide/ide-gd.c b/trunk/drivers/ide/ide-gd.c
index 214119026b3f..4b6b71e2cdf5 100644
--- a/trunk/drivers/ide/ide-gd.c
+++ b/trunk/drivers/ide/ide-gd.c
@@ -287,19 +287,6 @@ static int ide_gd_media_changed(struct gendisk *disk)
 	return ret;
 }
 
-static unsigned long long ide_gd_set_capacity(struct gendisk *disk,
-					      unsigned long long capacity)
-{
-	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
-	const struct ide_disk_ops *disk_ops = drive->disk_ops;
-
-	if (disk_ops->set_capacity)
-		return disk_ops->set_capacity(drive, capacity);
-
-	return drive->capacity64;
-}
-
 static int ide_gd_revalidate_disk(struct gendisk *disk)
 {
 	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
@@ -328,7 +315,6 @@ static struct block_device_operations ide_gd_ops = {
 	.locked_ioctl		= ide_gd_ioctl,
 	.getgeo			= ide_gd_getgeo,
 	.media_changed		= ide_gd_media_changed,
-	.set_capacity		= ide_gd_set_capacity,
 	.revalidate_disk	= ide_gd_revalidate_disk
 };
 
diff --git a/trunk/drivers/ide/ide-generic.c b/trunk/drivers/ide/ide-generic.c
index 54d7c4685d23..7812ca0be13b 100644
--- a/trunk/drivers/ide/ide-generic.c
+++ b/trunk/drivers/ide/ide-generic.c
@@ -29,7 +29,6 @@ MODULE_PARM_DESC(probe_mask, "probe mask for legacy ISA IDE ports");
 
 static const struct ide_port_info ide_generic_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
-	.chipset		= ide_generic,
 };
 
 #ifdef CONFIG_ARM
@@ -86,7 +85,7 @@ static void ide_generic_check_pci_legacy_iobases(int *primary, int *secondary)
 
 static int __init ide_generic_init(void)
 {
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 	unsigned long io_addr;
 	int i, rc = 0, primary = 0, secondary = 0;
 
@@ -133,7 +132,9 @@ static int __init ide_generic_init(void)
 #else
 			hw.irq = legacy_irqs[i];
 #endif
-			rc = ide_host_add(&ide_generic_port_info, hws, 1, NULL);
+			hw.chipset = ide_generic;
+
+			rc = ide_host_add(&ide_generic_port_info, hws, NULL);
 			if (rc) {
 				release_region(io_addr + 0x206, 1);
 				release_region(io_addr, 8);
diff --git a/trunk/drivers/ide/ide-h8300.c b/trunk/drivers/ide/ide-h8300.c
index 520f42c5445a..c06ebdc4a130 100644
--- a/trunk/drivers/ide/ide-h8300.c
+++ b/trunk/drivers/ide/ide-h8300.c
@@ -64,26 +64,26 @@ static const struct ide_tp_ops h8300_tp_ops = {
 
 #define H8300_IDE_GAP (2)
 
-static inline void hw_setup(struct ide_hw *hw)
+static inline void hw_setup(hw_regs_t *hw)
 {
 	int i;
 
-	memset(hw, 0, sizeof(*hw));
+	memset(hw, 0, sizeof(hw_regs_t));
 	for (i = 0; i <= 7; i++)
 		hw->io_ports_array[i] = CONFIG_H8300_IDE_BASE + H8300_IDE_GAP*i;
 	hw->io_ports.ctl_addr = CONFIG_H8300_IDE_ALT;
 	hw->irq = EXT_IRQ0 + CONFIG_H8300_IDE_IRQ;
+	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info h8300_port_info = {
 	.tp_ops			= &h8300_tp_ops,
 	.host_flags		= IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_NO_DMA,
-	.chipset		= ide_generic,
 };
 
 static int __init h8300_ide_init(void)
 {
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 
 	printk(KERN_INFO DRV_NAME ": H8/300 generic IDE interface\n");
 
@@ -96,7 +96,7 @@ static int __init h8300_ide_init(void)
 
 	hw_setup(&hw);
 
-	return ide_host_add(&h8300_port_info, hws, 1, NULL);
+	return ide_host_add(&h8300_port_info, hws, NULL);
 
 out_busy:
 	printk(KERN_ERR "ide-h8300: IDE I/F resource already used.\n");
diff --git a/trunk/drivers/ide/ide-io.c b/trunk/drivers/ide/ide-io.c
index 272cc38f6dbe..bba4297f2f03 100644
--- a/trunk/drivers/ide/ide-io.c
+++ b/trunk/drivers/ide/ide-io.c
@@ -184,42 +184,29 @@ static void ide_tf_set_setmult_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
 	tf->command = ATA_CMD_SET_MULTI;
 }
 
-/**
- *	do_special		-	issue some special commands
- *	@drive: drive the command is for
- *
- *	do_special() is used to issue ATA_CMD_INIT_DEV_PARAMS,
- *	ATA_CMD_RESTORE and ATA_CMD_SET_MULTI commands to a drive.
- */
-
-static ide_startstop_t do_special(ide_drive_t *drive)
+static ide_startstop_t ide_disk_special(ide_drive_t *drive)
 {
+	special_t *s = &drive->special;
 	struct ide_cmd cmd;
 
-#ifdef DEBUG
-	printk(KERN_DEBUG "%s: %s: 0x%02x\n", drive->name, __func__,
-		drive->special_flags);
-#endif
-	if (drive->media != ide_disk) {
-		drive->special_flags = 0;
-		drive->mult_req = 0;
-		return ide_stopped;
-	}
-
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.protocol = ATA_PROT_NODATA;
 
-	if (drive->special_flags & IDE_SFLAG_SET_GEOMETRY) {
-		drive->special_flags &= ~IDE_SFLAG_SET_GEOMETRY;
+	if (s->b.set_geometry) {
+		s->b.set_geometry = 0;
 		ide_tf_set_specify_cmd(drive, &cmd.tf);
-	} else if (drive->special_flags & IDE_SFLAG_RECALIBRATE) {
-		drive->special_flags &= ~IDE_SFLAG_RECALIBRATE;
+	} else if (s->b.recalibrate) {
+		s->b.recalibrate = 0;
 		ide_tf_set_restore_cmd(drive, &cmd.tf);
-	} else if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) {
-		drive->special_flags &= ~IDE_SFLAG_SET_MULTMODE;
+	} else if (s->b.set_multmode) {
+		s->b.set_multmode = 0;
 		ide_tf_set_setmult_cmd(drive, &cmd.tf);
-	} else
-		BUG();
+	} else if (s->all) {
+		int special = s->all;
+		s->all = 0;
+		printk(KERN_ERR "%s: bad special flag: 0x%02x\n", drive->name, special);
+		return ide_stopped;
+	}
 
 	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
 	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
@@ -230,6 +217,31 @@ static ide_startstop_t do_special(ide_drive_t *drive)
 	return ide_started;
 }
 
+/**
+ *	do_special		-	issue some special commands
+ *	@drive: drive the command is for
+ *
+ *	do_special() is used to issue ATA_CMD_INIT_DEV_PARAMS,
+ *	ATA_CMD_RESTORE and ATA_CMD_SET_MULTI commands to a drive.
+ *
+ *	It used to do much more, but has been scaled back.
+ */
+
+static ide_startstop_t do_special (ide_drive_t *drive)
+{
+	special_t *s = &drive->special;
+
+#ifdef DEBUG
+	printk("%s: do_special: 0x%02x\n", drive->name, s->all);
+#endif
+	if (drive->media == ide_disk)
+		return ide_disk_special(drive);
+
+	s->all = 0;
+	drive->mult_req = 0;
+	return ide_stopped;
+}
+
 void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd)
 {
 	ide_hwif_t *hwif = drive->hwif;
@@ -339,8 +351,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 		printk(KERN_ERR "%s: drive not ready for command\n", drive->name);
 		return startstop;
 	}
-
-	if (drive->special_flags == 0) {
+	if (!drive->special.all) {
 		struct ide_driver *drv;
 
 		/*
@@ -488,15 +499,11 @@ void do_ide_request(struct request_queue *q)
 
 		if ((hwif->host->host_flags & IDE_HFLAG_SERIALIZE) &&
 		    hwif != prev_port) {
-			ide_drive_t *cur_dev =
-				prev_port ? prev_port->cur_dev : NULL;
-
 			/*
 			 * set nIEN for previous port, drives in the
-			 * quirk list may not like intr setups/cleanups
+			 * quirk_list may not like intr setups/cleanups
 			 */
-			if (cur_dev &&
-			    (cur_dev->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0)
+			if (prev_port && prev_port->cur_dev->quirk_list == 0)
 				prev_port->tp_ops->write_devctl(prev_port,
 								ATA_NIEN |
 								ATA_DEVCTL_OBS);
diff --git a/trunk/drivers/ide/ide-iops.c b/trunk/drivers/ide/ide-iops.c
index fa047150a1c6..06fe002116ec 100644
--- a/trunk/drivers/ide/ide-iops.c
+++ b/trunk/drivers/ide/ide-iops.c
@@ -282,29 +282,6 @@ u8 eighty_ninty_three(ide_drive_t *drive)
 	return 0;
 }
 
-static const char *nien_quirk_list[] = {
-	"QUANTUM FIREBALLlct08 08",
-	"QUANTUM FIREBALLP KA6.4",
-	"QUANTUM FIREBALLP KA9.1",
-	"QUANTUM FIREBALLP KX13.6",
-	"QUANTUM FIREBALLP KX20.5",
-	"QUANTUM FIREBALLP KX27.3",
-	"QUANTUM FIREBALLP LM20.4",
-	"QUANTUM FIREBALLP LM20.5",
-	NULL
-};
-
-void ide_check_nien_quirk_list(ide_drive_t *drive)
-{
-	const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
-
-	for (list = nien_quirk_list; *list != NULL; list++)
-		if (strstr(m, *list) != NULL) {
-			drive->dev_flags |= IDE_DFLAG_NIEN_QUIRK;
-			return;
-		}
-}
-
 int ide_driveid_update(ide_drive_t *drive)
 {
 	u16 *id;
@@ -334,6 +311,7 @@ int ide_driveid_update(ide_drive_t *drive)
 
 	return 1;
 out_err:
+	SELECT_MASK(drive, 0);
 	if (rc == 2)
 		printk(KERN_ERR "%s: %s: bad status\n", drive->name, __func__);
 	kfree(id);
@@ -387,7 +365,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
 
 	tp_ops->exec_command(hwif, ATA_CMD_SET_FEATURES);
 
-	if (drive->dev_flags & IDE_DFLAG_NIEN_QUIRK)
+	if (drive->quirk_list == 2)
 		tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
 
 	error = __ide_wait_stat(drive, drive->ready_stat,
diff --git a/trunk/drivers/ide/ide-legacy.c b/trunk/drivers/ide/ide-legacy.c
index b9654a7bb7be..8c5dcbf22547 100644
--- a/trunk/drivers/ide/ide-legacy.c
+++ b/trunk/drivers/ide/ide-legacy.c
@@ -1,7 +1,7 @@
 #include <linux/kernel.h>
 #include <linux/ide.h>
 
-static void ide_legacy_init_one(struct ide_hw **hws, struct ide_hw *hw,
+static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw,
 				u8 port_no, const struct ide_port_info *d,
 				unsigned long config)
 {
@@ -33,6 +33,7 @@ static void ide_legacy_init_one(struct ide_hw **hws, struct ide_hw *hw,
 
 	ide_std_init_ports(hw, base, ctl);
 	hw->irq = irq;
+	hw->chipset = d->chipset;
 	hw->config = config;
 
 	hws[port_no] = hw;
@@ -40,7 +41,7 @@ static void ide_legacy_init_one(struct ide_hw **hws, struct ide_hw *hw,
 
 int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config)
 {
-	struct ide_hw hw[2], *hws[] = { NULL, NULL };
+	hw_regs_t hw[2], *hws[] = { NULL, NULL, NULL, NULL };
 
 	memset(&hw, 0, sizeof(hw));
 
@@ -52,6 +53,6 @@ int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config)
 	    (d->host_flags & IDE_HFLAG_SINGLE))
 		return -ENOENT;
 
-	return ide_host_add(d, hws, 2, NULL);
+	return ide_host_add(d, hws, NULL);
 }
 EXPORT_SYMBOL_GPL(ide_legacy_device_add);
diff --git a/trunk/drivers/ide/ide-pnp.c b/trunk/drivers/ide/ide-pnp.c
index 017b1df3b805..6e80b774e88a 100644
--- a/trunk/drivers/ide/ide-pnp.c
+++ b/trunk/drivers/ide/ide-pnp.c
@@ -29,7 +29,6 @@ static struct pnp_device_id idepnp_devices[] = {
 
 static const struct ide_port_info ide_pnp_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
-	.chipset		= ide_generic,
 };
 
 static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
@@ -37,7 +36,7 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
 	struct ide_host *host;
 	unsigned long base, ctl;
 	int rc;
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 
 	printk(KERN_INFO DRV_NAME ": generic PnP IDE interface\n");
 
@@ -63,8 +62,9 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
 	memset(&hw, 0, sizeof(hw));
 	ide_std_init_ports(&hw, base, ctl);
 	hw.irq = pnp_irq(dev, 0);
+	hw.chipset = ide_generic;
 
-	rc = ide_host_add(&ide_pnp_port_info, hws, 1, &host);
+	rc = ide_host_add(&ide_pnp_port_info, hws, &host);
 	if (rc)
 		goto out;
 
diff --git a/trunk/drivers/ide/ide-probe.c b/trunk/drivers/ide/ide-probe.c
index f371b0de314f..c895ed52b2e8 100644
--- a/trunk/drivers/ide/ide-probe.c
+++ b/trunk/drivers/ide/ide-probe.c
@@ -97,7 +97,7 @@ static void ide_disk_init_mult_count(ide_drive_t *drive)
 		drive->mult_req = id[ATA_ID_MULTSECT] & 0xff;
 
 		if (drive->mult_req)
-			drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
+			drive->special.b.set_multmode = 1;
 	}
 }
 
@@ -465,8 +465,23 @@ static u8 probe_for_drive(ide_drive_t *drive)
 	int rc;
 	u8 cmd;
 
+	/*
+	 *	In order to keep things simple we have an id
+	 *	block for all drives at all times. If the device
+	 *	is pre ATA or refuses ATA/ATAPI identify we
+	 *	will add faked data to this.
+	 *
+	 *	Also note that 0 everywhere means "can't do X"
+	 */
+ 
 	drive->dev_flags &= ~IDE_DFLAG_ID_READ;
 
+	drive->id = kzalloc(SECTOR_SIZE, GFP_KERNEL);
+	if (drive->id == NULL) {
+		printk(KERN_ERR "ide: out of memory for id data.\n");
+		return 0;
+	}
+
 	m = (char *)&drive->id[ATA_ID_PROD];
 	strcpy(m, "UNKNOWN");
 
@@ -482,7 +497,7 @@ static u8 probe_for_drive(ide_drive_t *drive)
 		}
 
 		if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
-			return 0;
+			goto out_free;
 
 		/* identification failed? */
 		if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) {
@@ -506,7 +521,7 @@ static u8 probe_for_drive(ide_drive_t *drive)
 	}
 
 	if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
-		return 0;
+		goto out_free;
 
 	/* The drive wasn't being helpful. Add generic info only */
 	if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) {
@@ -520,6 +535,9 @@ static u8 probe_for_drive(ide_drive_t *drive)
 	}
 
 	return 1;
+out_free:
+	kfree(drive->id);
+	return 0;
 }
 
 static void hwif_release_dev(struct device *dev)
@@ -684,14 +702,8 @@ static int ide_probe_port(ide_hwif_t *hwif)
 	if (irqd)
 		disable_irq(hwif->irq);
 
-	rc = ide_port_wait_ready(hwif);
-	if (rc == -ENODEV) {
-		printk(KERN_INFO "%s: no devices on the port\n", hwif->name);
-		goto out;
-	} else if (rc == -EBUSY)
-		printk(KERN_ERR "%s: not ready before the probe\n", hwif->name);
-	else
-		rc = -ENODEV;
+	if (ide_port_wait_ready(hwif) == -EBUSY)
+		printk(KERN_DEBUG "%s: Wait for ready failed before probe !\n", hwif->name);
 
 	/*
 	 * Second drive should only exist if first drive was found,
@@ -702,7 +714,7 @@ static int ide_probe_port(ide_hwif_t *hwif)
 		if (drive->dev_flags & IDE_DFLAG_PRESENT)
 			rc = 0;
 	}
-out:
+
 	/*
 	 * Use cached IRQ number. It might be (and is...) changed by probe
 	 * code above
@@ -720,8 +732,6 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
 	int i;
 
 	ide_port_for_each_present_dev(i, drive, hwif) {
-		ide_check_nien_quirk_list(drive);
-
 		if (port_ops && port_ops->quirkproc)
 			port_ops->quirkproc(drive);
 	}
@@ -807,6 +817,8 @@ static int ide_port_setup_devices(ide_hwif_t *hwif)
 		if (ide_init_queue(drive)) {
 			printk(KERN_ERR "ide: failed to init %s\n",
 					drive->name);
+			kfree(drive->id);
+			drive->id = NULL;
 			drive->dev_flags &= ~IDE_DFLAG_PRESENT;
 			continue;
 		}
@@ -935,6 +947,9 @@ static void drive_release_dev (struct device *dev)
 	blk_cleanup_queue(drive->queue);
 	drive->queue = NULL;
 
+	kfree(drive->id);
+	drive->id = NULL;
+
 	drive->dev_flags &= ~IDE_DFLAG_PRESENT;
 
 	complete(&drive->gendev_rel_comp);
@@ -1020,15 +1035,6 @@ static void ide_port_init_devices(ide_hwif_t *hwif)
 		if (port_ops && port_ops->init_dev)
 			port_ops->init_dev(drive);
 	}
-
-	ide_port_for_each_dev(i, drive, hwif) {
-		/*
-		 * default to PIO Mode 0 before we figure out
-		 * the most suited mode for the attached device
-		 */
-		if (port_ops && port_ops->set_pio_mode)
-			port_ops->set_pio_mode(drive, 0);
-	}
 }
 
 static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
@@ -1036,7 +1042,8 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
 {
 	hwif->channel = port;
 
-	hwif->chipset = d->chipset ? d->chipset : ide_pci;
+	if (d->chipset)
+		hwif->chipset = d->chipset;
 
 	if (d->init_iops)
 		d->init_iops(hwif);
@@ -1117,19 +1124,16 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
 
 	ide_port_for_each_dev(i, drive, hwif) {
 		u8 j = (hwif->index * MAX_DRIVES) + i;
-		u16 *saved_id = drive->id;
 
 		memset(drive, 0, sizeof(*drive));
-		memset(saved_id, 0, SECTOR_SIZE);
-		drive->id = saved_id;
 
 		drive->media			= ide_disk;
 		drive->select			= (i << 4) | ATA_DEVICE_OBS;
 		drive->hwif			= hwif;
 		drive->ready_stat		= ATA_DRDY;
 		drive->bad_wstat		= BAD_W_STAT;
-		drive->special_flags		= IDE_SFLAG_RECALIBRATE |
-						  IDE_SFLAG_SET_GEOMETRY;
+		drive->special.b.recalibrate	= 1;
+		drive->special.b.set_geometry	= 1;
 		drive->name[0]			= 'h';
 		drive->name[1]			= 'd';
 		drive->name[2]			= 'a' + j;
@@ -1164,10 +1168,11 @@ static void ide_init_port_data(ide_hwif_t *hwif, unsigned int index)
 	ide_port_init_devices_data(hwif);
 }
 
-static void ide_init_port_hw(ide_hwif_t *hwif, struct ide_hw *hw)
+static void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw)
 {
 	memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports));
 	hwif->irq = hw->irq;
+	hwif->chipset = hw->chipset;
 	hwif->dev = hw->dev;
 	hwif->gendev.parent = hw->parent ? hw->parent : hw->dev;
 	hwif->ack_intr = hw->ack_intr;
@@ -1228,10 +1233,8 @@ static void ide_port_free_devices(ide_hwif_t *hwif)
 	ide_drive_t *drive;
 	int i;
 
-	ide_port_for_each_dev(i, drive, hwif) {
-		kfree(drive->id);
+	ide_port_for_each_dev(i, drive, hwif)
 		kfree(drive);
-	}
 }
 
 static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
@@ -1245,18 +1248,6 @@ static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
 		if (drive == NULL)
 			goto out_nomem;
 
-		/*
-		 * In order to keep things simple we have an id
-		 * block for all drives at all times. If the device
-		 * is pre ATA or refuses ATA/ATAPI identify we
-		 * will add faked data to this.
-		 *
-		 * Also note that 0 everywhere means "can't do X"
-		 */
-		drive->id = kzalloc_node(SECTOR_SIZE, GFP_KERNEL, node);
-		if (drive->id == NULL)
-			goto out_nomem;
-
 		hwif->devices[i] = drive;
 	}
 	return 0;
@@ -1266,8 +1257,7 @@ static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
 	return -ENOMEM;
 }
 
-struct ide_host *ide_host_alloc(const struct ide_port_info *d,
-				struct ide_hw **hws, unsigned int n_ports)
+struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws)
 {
 	struct ide_host *host;
 	struct device *dev = hws[0] ? hws[0]->dev : NULL;
@@ -1278,7 +1268,7 @@ struct ide_host *ide_host_alloc(const struct ide_port_info *d,
 	if (host == NULL)
 		return NULL;
 
-	for (i = 0; i < n_ports; i++) {
+	for (i = 0; i < MAX_HOST_PORTS; i++) {
 		ide_hwif_t *hwif;
 		int idx;
 
@@ -1298,7 +1288,6 @@ struct ide_host *ide_host_alloc(const struct ide_port_info *d,
 		if (idx < 0) {
 			printk(KERN_ERR "%s: no free slot for interface\n",
 					d ? d->name : "ide");
-			ide_port_free_devices(hwif);
 			kfree(hwif);
 			continue;
 		}
@@ -1355,7 +1344,7 @@ static void ide_disable_port(ide_hwif_t *hwif)
 }
 
 int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
-		      struct ide_hw **hws)
+		      hw_regs_t **hws)
 {
 	ide_hwif_t *hwif, *mate = NULL;
 	int i, j = 0;
@@ -1449,13 +1438,13 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
 }
 EXPORT_SYMBOL_GPL(ide_host_register);
 
-int ide_host_add(const struct ide_port_info *d, struct ide_hw **hws,
-		 unsigned int n_ports, struct ide_host **hostp)
+int ide_host_add(const struct ide_port_info *d, hw_regs_t **hws,
+		 struct ide_host **hostp)
 {
 	struct ide_host *host;
 	int rc;
 
-	host = ide_host_alloc(d, hws, n_ports);
+	host = ide_host_alloc(d, hws);
 	if (host == NULL)
 		return -ENOMEM;
 
diff --git a/trunk/drivers/ide/ide-tape.c b/trunk/drivers/ide/ide-tape.c
index 4b447a8a49d4..d9764f0bc82f 100644
--- a/trunk/drivers/ide/ide-tape.c
+++ b/trunk/drivers/ide/ide-tape.c
@@ -240,27 +240,18 @@ static struct class *idetape_sysfs_class;
 
 static void ide_tape_release(struct device *);
 
-static struct ide_tape_obj *idetape_devs[MAX_HWIFS * MAX_DRIVES];
-
-static struct ide_tape_obj *ide_tape_get(struct gendisk *disk, bool cdev,
-					 unsigned int i)
+static struct ide_tape_obj *ide_tape_get(struct gendisk *disk)
 {
 	struct ide_tape_obj *tape = NULL;
 
 	mutex_lock(&idetape_ref_mutex);
-
-	if (cdev)
-		tape = idetape_devs[i];
-	else
-		tape = ide_drv_g(disk, ide_tape_obj);
-
+	tape = ide_drv_g(disk, ide_tape_obj);
 	if (tape) {
 		if (ide_device_get(tape->drive))
 			tape = NULL;
 		else
 			get_device(&tape->dev);
 	}
-
 	mutex_unlock(&idetape_ref_mutex);
 	return tape;
 }
@@ -275,6 +266,24 @@ static void ide_tape_put(struct ide_tape_obj *tape)
 	mutex_unlock(&idetape_ref_mutex);
 }
 
+/*
+ * The variables below are used for the character device interface. Additional
+ * state variables are defined in our ide_drive_t structure.
+ */
+static struct ide_tape_obj *idetape_devs[MAX_HWIFS * MAX_DRIVES];
+
+static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i)
+{
+	struct ide_tape_obj *tape = NULL;
+
+	mutex_lock(&idetape_ref_mutex);
+	tape = idetape_devs[i];
+	if (tape)
+		get_device(&tape->dev);
+	mutex_unlock(&idetape_ref_mutex);
+	return tape;
+}
+
 /*
  * called on each failed packet command retry to analyze the request sense. We
  * currently do not utilize this information.
@@ -388,8 +397,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
 		if (readpos[0] & 0x4) {
 			printk(KERN_INFO "ide-tape: Block location is unknown"
 					 "to the tape\n");
-			clear_bit(ilog2(IDE_AFLAG_ADDRESS_VALID),
-				  &drive->atapi_flags);
+			clear_bit(IDE_AFLAG_ADDRESS_VALID, &drive->atapi_flags);
 			uptodate = 0;
 			err = IDE_DRV_ERROR_GENERAL;
 		} else {
@@ -398,8 +406,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
 
 			tape->partition = readpos[1];
 			tape->first_frame = be32_to_cpup((__be32 *)&readpos[4]);
-			set_bit(ilog2(IDE_AFLAG_ADDRESS_VALID),
-				&drive->atapi_flags);
+			set_bit(IDE_AFLAG_ADDRESS_VALID, &drive->atapi_flags);
 		}
 	}
 
@@ -649,15 +656,15 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 
 	if ((drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) == 0 &&
 	    (rq->cmd[13] & REQ_IDETAPE_PC2) == 0)
-		drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
+		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
 
 	if (drive->dev_flags & IDE_DFLAG_POST_RESET) {
-		drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
+		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
 		drive->dev_flags &= ~IDE_DFLAG_POST_RESET;
 	}
 
-	if (!(drive->atapi_flags & IDE_AFLAG_IGNORE_DSC) &&
-	    !(stat & ATA_DSC)) {
+	if (!test_and_clear_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags) &&
+	    (stat & ATA_DSC) == 0) {
 		if (postponed_rq == NULL) {
 			tape->dsc_polling_start = jiffies;
 			tape->dsc_poll_freq = tape->best_dsc_rw_freq;
@@ -677,9 +684,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 			tape->dsc_poll_freq = IDETAPE_DSC_MA_SLOW;
 		idetape_postpone_request(drive);
 		return ide_stopped;
-	} else
-		drive->atapi_flags &= ~IDE_AFLAG_IGNORE_DSC;
-
+	}
 	if (rq->cmd[13] & REQ_IDETAPE_READ) {
 		pc = &tape->queued_pc;
 		ide_tape_create_rw_cmd(tape, pc, rq, READ_6);
@@ -739,7 +744,7 @@ static int idetape_wait_ready(ide_drive_t *drive, unsigned long timeout)
 	int load_attempted = 0;
 
 	/* Wait for the tape to become ready */
-	set_bit(ilog2(IDE_AFLAG_MEDIUM_PRESENT), &drive->atapi_flags);
+	set_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags);
 	timeout += jiffies;
 	while (time_before(jiffies, timeout)) {
 		if (ide_do_test_unit_ready(drive, disk) == 0)
@@ -815,7 +820,7 @@ static void __ide_tape_discard_merge_buffer(ide_drive_t *drive)
 	if (tape->chrdev_dir != IDETAPE_DIR_READ)
 		return;
 
-	clear_bit(ilog2(IDE_AFLAG_FILEMARK), &drive->atapi_flags);
+	clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags);
 	tape->valid = 0;
 	if (tape->buf != NULL) {
 		kfree(tape->buf);
@@ -1108,8 +1113,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op,
 
 	if (tape->chrdev_dir == IDETAPE_DIR_READ) {
 		tape->valid = 0;
-		if (test_and_clear_bit(ilog2(IDE_AFLAG_FILEMARK),
-				       &drive->atapi_flags))
+		if (test_and_clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
 			++count;
 		ide_tape_discard_merge_buffer(drive, 0);
 	}
@@ -1164,7 +1168,7 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 	debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
 
 	if (tape->chrdev_dir != IDETAPE_DIR_READ) {
-		if (test_bit(ilog2(IDE_AFLAG_DETECT_BS), &drive->atapi_flags))
+		if (test_bit(IDE_AFLAG_DETECT_BS, &drive->atapi_flags))
 			if (count > tape->blk_size &&
 			    (count % tape->blk_size) == 0)
 				tape->user_bs_factor = count / tape->blk_size;
@@ -1180,8 +1184,7 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 		/* refill if staging buffer is empty */
 		if (!tape->valid) {
 			/* If we are at a filemark, nothing more to read */
-			if (test_bit(ilog2(IDE_AFLAG_FILEMARK),
-				     &drive->atapi_flags))
+			if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
 				break;
 			/* read */
 			if (idetape_queue_rw_tail(drive, REQ_IDETAPE_READ,
@@ -1199,7 +1202,7 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 		done += todo;
 	}
 
-	if (!done && test_bit(ilog2(IDE_AFLAG_FILEMARK), &drive->atapi_flags)) {
+	if (!done && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) {
 		debug_log(DBG_SENSE, "%s: spacing over filemark\n", tape->name);
 
 		idetape_space_over_filemarks(drive, MTFSF, 1);
@@ -1333,8 +1336,7 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count)
 		ide_tape_discard_merge_buffer(drive, 0);
 		retval = ide_do_start_stop(drive, disk, !IDETAPE_LU_LOAD_MASK);
 		if (!retval)
-			clear_bit(ilog2(IDE_AFLAG_MEDIUM_PRESENT),
-				  &drive->atapi_flags);
+			clear_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags);
 		return retval;
 	case MTNOP:
 		ide_tape_discard_merge_buffer(drive, 0);
@@ -1356,11 +1358,9 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count)
 			    mt_count % tape->blk_size)
 				return -EIO;
 			tape->user_bs_factor = mt_count / tape->blk_size;
-			clear_bit(ilog2(IDE_AFLAG_DETECT_BS),
-				  &drive->atapi_flags);
+			clear_bit(IDE_AFLAG_DETECT_BS, &drive->atapi_flags);
 		} else
-			set_bit(ilog2(IDE_AFLAG_DETECT_BS),
-				&drive->atapi_flags);
+			set_bit(IDE_AFLAG_DETECT_BS, &drive->atapi_flags);
 		return 0;
 	case MTSEEK:
 		ide_tape_discard_merge_buffer(drive, 0);
@@ -1486,7 +1486,7 @@ static int idetape_chrdev_open(struct inode *inode, struct file *filp)
 		return -ENXIO;
 
 	lock_kernel();
-	tape = ide_tape_get(NULL, true, i);
+	tape = ide_tape_chrdev_get(i);
 	if (!tape) {
 		unlock_kernel();
 		return -ENXIO;
@@ -1505,20 +1505,20 @@ static int idetape_chrdev_open(struct inode *inode, struct file *filp)
 
 	filp->private_data = tape;
 
-	if (test_and_set_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags)) {
+	if (test_and_set_bit(IDE_AFLAG_BUSY, &drive->atapi_flags)) {
 		retval = -EBUSY;
 		goto out_put_tape;
 	}
 
 	retval = idetape_wait_ready(drive, 60 * HZ);
 	if (retval) {
-		clear_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags);
+		clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags);
 		printk(KERN_ERR "ide-tape: %s: drive not ready\n", tape->name);
 		goto out_put_tape;
 	}
 
 	idetape_read_position(drive);
-	if (!test_bit(ilog2(IDE_AFLAG_ADDRESS_VALID), &drive->atapi_flags))
+	if (!test_bit(IDE_AFLAG_ADDRESS_VALID, &drive->atapi_flags))
 		(void)idetape_rewind_tape(drive);
 
 	/* Read block size and write protect status from drive. */
@@ -1534,7 +1534,7 @@ static int idetape_chrdev_open(struct inode *inode, struct file *filp)
 	if (tape->write_prot) {
 		if ((filp->f_flags & O_ACCMODE) == O_WRONLY ||
 		    (filp->f_flags & O_ACCMODE) == O_RDWR) {
-			clear_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags);
+			clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags);
 			retval = -EROFS;
 			goto out_put_tape;
 		}
@@ -1591,17 +1591,15 @@ static int idetape_chrdev_release(struct inode *inode, struct file *filp)
 			ide_tape_discard_merge_buffer(drive, 1);
 	}
 
-	if (minor < 128 && test_bit(ilog2(IDE_AFLAG_MEDIUM_PRESENT),
-				    &drive->atapi_flags))
+	if (minor < 128 && test_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags))
 		(void) idetape_rewind_tape(drive);
-
 	if (tape->chrdev_dir == IDETAPE_DIR_NONE) {
 		if (tape->door_locked == DOOR_LOCKED) {
 			if (!ide_set_media_lock(drive, tape->disk, 0))
 				tape->door_locked = DOOR_UNLOCKED;
 		}
 	}
-	clear_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags);
+	clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags);
 	ide_tape_put(tape);
 	unlock_kernel();
 	return 0;
@@ -1907,7 +1905,7 @@ static const struct file_operations idetape_fops = {
 
 static int idetape_open(struct block_device *bdev, fmode_t mode)
 {
-	struct ide_tape_obj *tape = ide_tape_get(bdev->bd_disk, false, 0);
+	struct ide_tape_obj *tape = ide_tape_get(bdev->bd_disk);
 
 	if (!tape)
 		return -ENXIO;
diff --git a/trunk/drivers/ide/ide-taskfile.c b/trunk/drivers/ide/ide-taskfile.c
index 75b85a8cd2d4..a0c3e1b2f73c 100644
--- a/trunk/drivers/ide/ide-taskfile.c
+++ b/trunk/drivers/ide/ide-taskfile.c
@@ -98,6 +98,7 @@ ide_startstop_t do_rw_taskfile(ide_drive_t *drive, struct ide_cmd *orig_cmd)
 	if ((cmd->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) {
 		ide_tf_dump(drive->name, cmd);
 		tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
+		SELECT_MASK(drive, 0);
 
 		if (cmd->ftf_flags & IDE_FTFLAG_OUT_DATA) {
 			u8 data[2] = { cmd->tf.data, cmd->hob.data };
@@ -165,7 +166,7 @@ static ide_startstop_t task_no_data_intr(ide_drive_t *drive)
 	if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) {
 		if (custom && tf->command == ATA_CMD_SET_MULTI) {
 			drive->mult_req = drive->mult_count = 0;
-			drive->special_flags |= IDE_SFLAG_RECALIBRATE;
+			drive->special.b.recalibrate = 1;
 			(void)ide_dump_status(drive, __func__, stat);
 			return ide_stopped;
 		} else if (custom && tf->command == ATA_CMD_INIT_DEV_PARAMS) {
diff --git a/trunk/drivers/ide/ide.c b/trunk/drivers/ide/ide.c
index 16d056939f9f..92c9b90931e7 100644
--- a/trunk/drivers/ide/ide.c
+++ b/trunk/drivers/ide/ide.c
@@ -211,11 +211,6 @@ static unsigned int ide_noflush;
 module_param_call(noflush, ide_set_dev_param_mask, NULL, &ide_noflush, 0);
 MODULE_PARM_DESC(noflush, "disable flush requests for a device");
 
-static unsigned int ide_nohpa;
-
-module_param_call(nohpa, ide_set_dev_param_mask, NULL, &ide_nohpa, 0);
-MODULE_PARM_DESC(nohpa, "disable Host Protected Area for a device");
-
 static unsigned int ide_noprobe;
 
 module_param_call(noprobe, ide_set_dev_param_mask, NULL, &ide_noprobe, 0);
@@ -286,11 +281,6 @@ static void ide_dev_apply_params(ide_drive_t *drive, u8 unit)
 				 drive->name);
 		drive->dev_flags |= IDE_DFLAG_NOFLUSH;
 	}
-	if (ide_nohpa & (1 << i)) {
-		printk(KERN_INFO "ide: disabling Host Protected Area for %s\n",
-				 drive->name);
-		drive->dev_flags |= IDE_DFLAG_NOHPA;
-	}
 	if (ide_noprobe & (1 << i)) {
 		printk(KERN_INFO "ide: skipping probe for %s\n", drive->name);
 		drive->dev_flags |= IDE_DFLAG_NOPROBE;
diff --git a/trunk/drivers/ide/ide_platform.c b/trunk/drivers/ide/ide_platform.c
index ee9b55ecc62b..051b4ab0f359 100644
--- a/trunk/drivers/ide/ide_platform.c
+++ b/trunk/drivers/ide/ide_platform.c
@@ -21,7 +21,7 @@
 #include <linux/platform_device.h>
 #include <linux/io.h>
 
-static void __devinit plat_ide_setup_ports(struct ide_hw *hw,
+static void __devinit plat_ide_setup_ports(hw_regs_t *hw,
 					   void __iomem *base,
 					   void __iomem *ctrl,
 					   struct pata_platform_info *pdata,
@@ -40,11 +40,12 @@ static void __devinit plat_ide_setup_ports(struct ide_hw *hw,
 	hw->io_ports.ctl_addr = (unsigned long)ctrl;
 
 	hw->irq = irq;
+
+	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info platform_ide_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
-	.chipset		= ide_generic,
 };
 
 static int __devinit plat_ide_probe(struct platform_device *pdev)
@@ -54,7 +55,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev)
 	struct pata_platform_info *pdata;
 	struct ide_host *host;
 	int ret = 0, mmio = 0;
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 	struct ide_port_info d = platform_ide_port_info;
 
 	pdata = pdev->dev.platform_data;
@@ -98,7 +99,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev)
 	if (mmio)
 		d.host_flags |= IDE_HFLAG_MMIO;
 
-	ret = ide_host_add(&d, hws, 1, &host);
+	ret = ide_host_add(&d, hws, &host);
 	if (ret)
 		goto out;
 
diff --git a/trunk/drivers/ide/macide.c b/trunk/drivers/ide/macide.c
index 1447c8c90565..4b1718e83283 100644
--- a/trunk/drivers/ide/macide.c
+++ b/trunk/drivers/ide/macide.c
@@ -62,7 +62,7 @@ int macide_ack_intr(ide_hwif_t* hwif)
 	return 0;
 }
 
-static void __init macide_setup_ports(struct ide_hw *hw, unsigned long base,
+static void __init macide_setup_ports(hw_regs_t *hw, unsigned long base,
 				      int irq, ide_ack_intr_t *ack_intr)
 {
 	int i;
@@ -76,12 +76,13 @@ static void __init macide_setup_ports(struct ide_hw *hw, unsigned long base,
 
 	hw->irq = irq;
 	hw->ack_intr = ack_intr;
+
+	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info macide_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
-	.chipset		= ide_generic,
 };
 
 static const char *mac_ide_name[] =
@@ -96,7 +97,7 @@ static int __init macide_init(void)
 	ide_ack_intr_t *ack_intr;
 	unsigned long base;
 	int irq;
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 
 	if (!MACH_IS_MAC)
 		return -ENODEV;
@@ -126,7 +127,7 @@ static int __init macide_init(void)
 
 	macide_setup_ports(&hw, base, irq, ack_intr);
 
-	return ide_host_add(&macide_port_info, hws, 1, NULL);
+	return ide_host_add(&macide_port_info, hws, NULL);
 }
 
 module_init(macide_init);
diff --git a/trunk/drivers/ide/palm_bk3710.c b/trunk/drivers/ide/palm_bk3710.c
index 3c1dc0152153..09d813d313f4 100644
--- a/trunk/drivers/ide/palm_bk3710.c
+++ b/trunk/drivers/ide/palm_bk3710.c
@@ -306,7 +306,6 @@ static struct ide_port_info __devinitdata palm_bk3710_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO,
 	.pio_mask		= ATA_PIO4,
 	.mwdma_mask		= ATA_MWDMA2,
-	.chipset		= ide_palm3710,
 };
 
 static int __init palm_bk3710_probe(struct platform_device *pdev)
@@ -316,7 +315,7 @@ static int __init palm_bk3710_probe(struct platform_device *pdev)
 	void __iomem *base;
 	unsigned long rate, mem_size;
 	int i, rc;
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 
 	clk = clk_get(&pdev->dev, "IDECLK");
 	if (IS_ERR(clk))
@@ -364,12 +363,13 @@ static int __init palm_bk3710_probe(struct platform_device *pdev)
 			(base + IDE_PALM_ATA_PRI_CTL_OFFSET);
 	hw.irq = irq->start;
 	hw.dev = &pdev->dev;
+	hw.chipset = ide_palm3710;
 
 	palm_bk3710_port_info.udma_mask = rate < 100000000 ? ATA_UDMA4 :
 							     ATA_UDMA5;
 
 	/* Register the IDE interface with Linux */
-	rc = ide_host_add(&palm_bk3710_port_info, hws, 1, NULL);
+	rc = ide_host_add(&palm_bk3710_port_info, hws, NULL);
 	if (rc)
 		goto out;
 
diff --git a/trunk/drivers/ide/pdc202xx_new.c b/trunk/drivers/ide/pdc202xx_new.c
index 65ba8239e7b5..b68906c3c17e 100644
--- a/trunk/drivers/ide/pdc202xx_new.c
+++ b/trunk/drivers/ide/pdc202xx_new.c
@@ -40,6 +40,18 @@
 #define DBG(fmt, args...)
 #endif
 
+static const char *pdc_quirk_drives[] = {
+	"QUANTUM FIREBALLlct08 08",
+	"QUANTUM FIREBALLP KA6.4",
+	"QUANTUM FIREBALLP KA9.1",
+	"QUANTUM FIREBALLP LM20.4",
+	"QUANTUM FIREBALLP KX13.6",
+	"QUANTUM FIREBALLP KX20.5",
+	"QUANTUM FIREBALLP KX27.3",
+	"QUANTUM FIREBALLP LM20.5",
+	NULL
+};
+
 static u8 max_dma_rate(struct pci_dev *pdev)
 {
 	u8 mode;
@@ -188,6 +200,19 @@ static u8 pdcnew_cable_detect(ide_hwif_t *hwif)
 		return ATA_CBL_PATA80;
 }
 
+static void pdcnew_quirkproc(ide_drive_t *drive)
+{
+	const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
+
+	for (list = pdc_quirk_drives; *list != NULL; list++)
+		if (strstr(m, *list) != NULL) {
+			drive->quirk_list = 2;
+			return;
+		}
+
+	drive->quirk_list = 0;
+}
+
 static void pdcnew_reset(ide_drive_t *drive)
 {
 	/*
@@ -448,6 +473,7 @@ static struct pci_dev * __devinit pdc20270_get_dev2(struct pci_dev *dev)
 static const struct ide_port_ops pdcnew_port_ops = {
 	.set_pio_mode		= pdcnew_set_pio_mode,
 	.set_dma_mode		= pdcnew_set_dma_mode,
+	.quirkproc		= pdcnew_quirkproc,
 	.resetproc		= pdcnew_reset,
 	.cable_detect		= pdcnew_cable_detect,
 };
diff --git a/trunk/drivers/ide/pdc202xx_old.c b/trunk/drivers/ide/pdc202xx_old.c
index b6abf7e52cac..e24ecc87a9b1 100644
--- a/trunk/drivers/ide/pdc202xx_old.c
+++ b/trunk/drivers/ide/pdc202xx_old.c
@@ -23,6 +23,18 @@
 
 #define PDC202XX_DEBUG_DRIVE_INFO	0
 
+static const char *pdc_quirk_drives[] = {
+	"QUANTUM FIREBALLlct08 08",
+	"QUANTUM FIREBALLP KA6.4",
+	"QUANTUM FIREBALLP KA9.1",
+	"QUANTUM FIREBALLP LM20.4",
+	"QUANTUM FIREBALLP KX13.6",
+	"QUANTUM FIREBALLP KX20.5",
+	"QUANTUM FIREBALLP KX27.3",
+	"QUANTUM FIREBALLP LM20.5",
+	NULL
+};
+
 static void pdc_old_disable_66MHz_clock(ide_hwif_t *);
 
 static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
@@ -139,6 +151,19 @@ static void pdc_old_disable_66MHz_clock(ide_hwif_t *hwif)
 	outb(clock & ~(hwif->channel ? 0x08 : 0x02), clock_reg);
 }
 
+static void pdc202xx_quirkproc(ide_drive_t *drive)
+{
+	const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
+
+	for (list = pdc_quirk_drives; *list != NULL; list++)
+		if (strstr(m, *list) != NULL) {
+			drive->quirk_list = 2;
+			return;
+		}
+
+	drive->quirk_list = 0;
+}
+
 static void pdc202xx_dma_start(ide_drive_t *drive)
 {
 	if (drive->current_speed > XFER_UDMA_2)
@@ -178,6 +203,52 @@ static int pdc202xx_dma_end(ide_drive_t *drive)
 	return ide_dma_end(drive);
 }
 
+static int pdc202xx_dma_test_irq(ide_drive_t *drive)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	unsigned long high_16	= hwif->extra_base - 16;
+	u8 dma_stat		= inb(hwif->dma_base + ATA_DMA_STATUS);
+	u8 sc1d			= inb(high_16 + 0x001d);
+
+	if (hwif->channel) {
+		/* bit7: Error, bit6: Interrupting, bit5: FIFO Full, bit4: FIFO Empty */
+		if ((sc1d & 0x50) == 0x50)
+			goto somebody_else;
+		else if ((sc1d & 0x40) == 0x40)
+			return (dma_stat & 4) == 4;
+	} else {
+		/* bit3: Error, bit2: Interrupting, bit1: FIFO Full, bit0: FIFO Empty */
+		if ((sc1d & 0x05) == 0x05)
+			goto somebody_else;
+		else if ((sc1d & 0x04) == 0x04)
+			return (dma_stat & 4) == 4;
+	}
+somebody_else:
+	return (dma_stat & 4) == 4;	/* return 1 if INTR asserted */
+}
+
+static void pdc202xx_reset(ide_drive_t *drive)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	unsigned long high_16	= hwif->extra_base - 16;
+	u8 udma_speed_flag	= inb(high_16 | 0x001f);
+
+	printk(KERN_WARNING "PDC202xx: software reset...\n");
+
+	outb(udma_speed_flag | 0x10, high_16 | 0x001f);
+	mdelay(100);
+	outb(udma_speed_flag & ~0x10, high_16 | 0x001f);
+	mdelay(2000);	/* 2 seconds ?! */
+
+	ide_set_max_pio(drive);
+}
+
+static void pdc202xx_dma_lost_irq(ide_drive_t *drive)
+{
+	pdc202xx_reset(drive);
+	ide_dma_lost_irq(drive);
+}
+
 static int init_chipset_pdc202xx(struct pci_dev *dev)
 {
 	unsigned long dmabase = pci_resource_start(dev, 4);
@@ -231,22 +302,37 @@ static void __devinit pdc202ata4_fixup_irq(struct pci_dev *dev,
 static const struct ide_port_ops pdc20246_port_ops = {
 	.set_pio_mode		= pdc202xx_set_pio_mode,
 	.set_dma_mode		= pdc202xx_set_mode,
+	.quirkproc		= pdc202xx_quirkproc,
 };
 
 static const struct ide_port_ops pdc2026x_port_ops = {
 	.set_pio_mode		= pdc202xx_set_pio_mode,
 	.set_dma_mode		= pdc202xx_set_mode,
+	.quirkproc		= pdc202xx_quirkproc,
+	.resetproc		= pdc202xx_reset,
 	.cable_detect		= pdc2026x_cable_detect,
 };
 
+static const struct ide_dma_ops pdc20246_dma_ops = {
+	.dma_host_set		= ide_dma_host_set,
+	.dma_setup		= ide_dma_setup,
+	.dma_start		= ide_dma_start,
+	.dma_end		= ide_dma_end,
+	.dma_test_irq		= pdc202xx_dma_test_irq,
+	.dma_lost_irq		= ide_dma_lost_irq,
+	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
+};
+
 static const struct ide_dma_ops pdc2026x_dma_ops = {
 	.dma_host_set		= ide_dma_host_set,
 	.dma_setup		= ide_dma_setup,
 	.dma_start		= pdc202xx_dma_start,
 	.dma_end		= pdc202xx_dma_end,
-	.dma_test_irq		= ide_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
+	.dma_test_irq		= pdc202xx_dma_test_irq,
+	.dma_lost_irq		= pdc202xx_dma_lost_irq,
 	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
+	.dma_clear		= pdc202xx_reset,
 	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
@@ -268,7 +354,7 @@ static const struct ide_port_info pdc202xx_chipsets[] __devinitdata = {
 		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_pdc202xx,
 		.port_ops	= &pdc20246_port_ops,
-		.dma_ops	= &sff_dma_ops,
+		.dma_ops	= &pdc20246_dma_ops,
 		.host_flags	= IDE_HFLAGS_PDC202XX,
 		.pio_mask	= ATA_PIO4,
 		.mwdma_mask	= ATA_MWDMA2,
diff --git a/trunk/drivers/ide/pmac.c b/trunk/drivers/ide/pmac.c
index 97642a7a79c4..f76e4e6b408f 100644
--- a/trunk/drivers/ide/pmac.c
+++ b/trunk/drivers/ide/pmac.c
@@ -1023,14 +1023,13 @@ static const struct ide_port_info pmac_port_info = {
  * Setup, register & probe an IDE channel driven by this driver, this is
  * called by one of the 2 probe functions (macio or PCI).
  */
-static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif,
-					   struct ide_hw *hw)
+static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
 {
 	struct device_node *np = pmif->node;
 	const int *bidp;
 	struct ide_host *host;
 	ide_hwif_t *hwif;
-	struct ide_hw *hws[] = { hw };
+	hw_regs_t *hws[] = { hw, NULL, NULL, NULL };
 	struct ide_port_info d = pmac_port_info;
 	int rc;
 
@@ -1078,7 +1077,7 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif,
 	/* Make sure we have sane timings */
 	sanitize_timings(pmif);
 
-	host = ide_host_alloc(&d, hws, 1);
+	host = ide_host_alloc(&d, hws);
 	if (host == NULL)
 		return -ENOMEM;
 	hwif = host->ports[0];
@@ -1125,7 +1124,7 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif,
 	return 0;
 }
 
-static void __devinit pmac_ide_init_ports(struct ide_hw *hw, unsigned long base)
+static void __devinit pmac_ide_init_ports(hw_regs_t *hw, unsigned long base)
 {
 	int i;
 
@@ -1145,7 +1144,7 @@ pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_device_id *match)
 	unsigned long regbase;
 	pmac_ide_hwif_t *pmif;
 	int irq, rc;
-	struct ide_hw hw;
+	hw_regs_t hw;
 
 	pmif = kzalloc(sizeof(*pmif), GFP_KERNEL);
 	if (pmif == NULL)
@@ -1269,7 +1268,7 @@ pmac_ide_pci_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 	void __iomem *base;
 	unsigned long rbase, rlen;
 	int rc;
-	struct ide_hw hw;
+	hw_regs_t hw;
 
 	np = pci_device_to_OF_node(pdev);
 	if (np == NULL) {
diff --git a/trunk/drivers/ide/q40ide.c b/trunk/drivers/ide/q40ide.c
index ab49a97023d9..c79346679244 100644
--- a/trunk/drivers/ide/q40ide.c
+++ b/trunk/drivers/ide/q40ide.c
@@ -51,11 +51,11 @@ static int q40ide_default_irq(unsigned long base)
 /*
  * Addresses are pretranslated for Q40 ISA access.
  */
-static void q40_ide_setup_ports(struct ide_hw *hw, unsigned long base,
+static void q40_ide_setup_ports(hw_regs_t *hw, unsigned long base,
 			ide_ack_intr_t *ack_intr,
 			int irq)
 {
-	memset(hw, 0, sizeof(*hw));
+	memset(hw, 0, sizeof(hw_regs_t));
 	/* BIG FAT WARNING: 
 	   assumption: only DATA port is ever used in 16 bit mode */
 	hw->io_ports.data_addr = Q40_ISA_IO_W(base);
@@ -70,6 +70,8 @@ static void q40_ide_setup_ports(struct ide_hw *hw, unsigned long base,
 
 	hw->irq = irq;
 	hw->ack_intr = ack_intr;
+
+	hw->chipset = ide_generic;
 }
 
 static void q40ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd,
@@ -117,7 +119,6 @@ static const struct ide_port_info q40ide_port_info = {
 	.tp_ops			= &q40ide_tp_ops,
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
-	.chipset		= ide_generic,
 };
 
 /* 
@@ -135,7 +136,7 @@ static const char *q40_ide_names[Q40IDE_NUM_HWIFS]={
 static int __init q40ide_init(void)
 {
     int i;
-    struct ide_hw hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL };
+    hw_regs_t hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL };
 
     if (!MACH_IS_Q40)
       return -ENODEV;
@@ -162,7 +163,7 @@ static int __init q40ide_init(void)
 	hws[i] = &hw[i];
     }
 
-    return ide_host_add(&q40ide_port_info, hws, Q40IDE_NUM_HWIFS, NULL);
+    return ide_host_add(&q40ide_port_info, hws, NULL);
 }
 
 module_init(q40ide_init);
diff --git a/trunk/drivers/ide/rapide.c b/trunk/drivers/ide/rapide.c
index 00f54248f41f..d5003ca69801 100644
--- a/trunk/drivers/ide/rapide.c
+++ b/trunk/drivers/ide/rapide.c
@@ -13,10 +13,9 @@
 
 static const struct ide_port_info rapide_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
-	.chipset		= ide_generic,
 };
 
-static void rapide_setup_ports(struct ide_hw *hw, void __iomem *base,
+static void rapide_setup_ports(hw_regs_t *hw, void __iomem *base,
 			       void __iomem *ctrl, unsigned int sz, int irq)
 {
 	unsigned long port = (unsigned long)base;
@@ -36,7 +35,7 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
 	void __iomem *base;
 	struct ide_host *host;
 	int ret;
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 
 	ret = ecard_request_resources(ec);
 	if (ret)
@@ -50,9 +49,10 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
 
 	memset(&hw, 0, sizeof(hw));
 	rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq);
+	hw.chipset = ide_generic;
 	hw.dev = &ec->dev;
 
-	ret = ide_host_add(&rapide_port_info, hws, 1, &host);
+	ret = ide_host_add(&rapide_port_info, hws, &host);
 	if (ret)
 		goto release;
 
diff --git a/trunk/drivers/ide/scc_pata.c b/trunk/drivers/ide/scc_pata.c
index 1104bb301eb9..5be41f25204f 100644
--- a/trunk/drivers/ide/scc_pata.c
+++ b/trunk/drivers/ide/scc_pata.c
@@ -559,7 +559,7 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev,
 {
 	struct scc_ports *ports = pci_get_drvdata(dev);
 	struct ide_host *host;
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 	int i, rc;
 
 	memset(&hw, 0, sizeof(hw));
@@ -567,8 +567,9 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev,
 		hw.io_ports_array[i] = ports->dma + 0x20 + i * 4;
 	hw.irq = dev->irq;
 	hw.dev = &dev->dev;
+	hw.chipset = ide_pci;
 
-	rc = ide_host_add(d, hws, 1, &host);
+	rc = ide_host_add(d, hws, &host);
 	if (rc)
 		return rc;
 
@@ -822,7 +823,6 @@ static const struct ide_port_info scc_chipset __devinitdata = {
 	.host_flags	= IDE_HFLAG_SINGLE,
 	.irq_flags	= IRQF_SHARED,
 	.pio_mask	= ATA_PIO4,
-	.chipset	= ide_pci,
 };
 
 /**
diff --git a/trunk/drivers/ide/setup-pci.c b/trunk/drivers/ide/setup-pci.c
index ab3db61d2ba0..7a3a12d6e638 100644
--- a/trunk/drivers/ide/setup-pci.c
+++ b/trunk/drivers/ide/setup-pci.c
@@ -1,7 +1,7 @@
 /*
  *  Copyright (C) 1998-2000  Andre Hedrick <andre@linux-ide.org>
  *  Copyright (C) 1995-1998  Mark Lord
- *  Copyright (C) 2007-2009  Bartlomiej Zolnierkiewicz
+ *  Copyright (C)      2007  Bartlomiej Zolnierkiewicz
  *
  *  May be copied or modified under the terms of the GNU General Public License
  */
@@ -301,11 +301,11 @@ static int ide_pci_check_iomem(struct pci_dev *dev, const struct ide_port_info *
 }
 
 /**
- *	ide_hw_configure	-	configure a struct ide_hw instance
+ *	ide_hw_configure	-	configure a hw_regs_t instance
  *	@dev: PCI device holding interface
  *	@d: IDE port info
  *	@port: port number
- *	@hw: struct ide_hw instance corresponding to this port
+ *	@hw: hw_regs_t instance corresponding to this port
  *
  *	Perform the initial set up for the hardware interface structure. This
  *	is done per interface port rather than per PCI device. There may be
@@ -315,7 +315,7 @@ static int ide_pci_check_iomem(struct pci_dev *dev, const struct ide_port_info *
  */
 
 static int ide_hw_configure(struct pci_dev *dev, const struct ide_port_info *d,
-			    unsigned int port, struct ide_hw *hw)
+			    unsigned int port, hw_regs_t *hw)
 {
 	unsigned long ctl = 0, base = 0;
 
@@ -344,6 +344,7 @@ static int ide_hw_configure(struct pci_dev *dev, const struct ide_port_info *d,
 
 	memset(hw, 0, sizeof(*hw));
 	hw->dev = &dev->dev;
+	hw->chipset = d->chipset ? d->chipset : ide_pci;
 	ide_std_init_ports(hw, base, ctl | 2);
 
 	return 0;
@@ -445,8 +446,8 @@ static int ide_setup_pci_controller(struct pci_dev *dev,
  *	ide_pci_setup_ports	-	configure ports/devices on PCI IDE
  *	@dev: PCI device
  *	@d: IDE port info
- *	@hw: struct ide_hw instances corresponding to this PCI IDE device
- *	@hws: struct ide_hw pointers table to update
+ *	@hw: hw_regs_t instances corresponding to this PCI IDE device
+ *	@hws: hw_regs_t pointers table to update
  *
  *	Scan the interfaces attached to this device and do any
  *	necessary per port setup. Attach the devices and ask the
@@ -458,7 +459,7 @@ static int ide_setup_pci_controller(struct pci_dev *dev,
  */
 
 void ide_pci_setup_ports(struct pci_dev *dev, const struct ide_port_info *d,
-			 struct ide_hw *hw, struct ide_hw **hws)
+			 hw_regs_t *hw, hw_regs_t **hws)
 {
 	int channels = (d->host_flags & IDE_HFLAG_SINGLE) ? 1 : 2, port;
 	u8 tmp;
@@ -534,15 +535,61 @@ static int do_ide_setup_pci_device(struct pci_dev *dev,
 	return ret;
 }
 
+int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
+		     void *priv)
+{
+	struct ide_host *host;
+	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
+	int ret;
+
+	ret = ide_setup_pci_controller(dev, d, 1);
+	if (ret < 0)
+		goto out;
+
+	ide_pci_setup_ports(dev, d, &hw[0], &hws[0]);
+
+	host = ide_host_alloc(d, hws);
+	if (host == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	host->dev[0] = &dev->dev;
+
+	host->host_priv = priv;
+
+	host->irq_flags = IRQF_SHARED;
+
+	pci_set_drvdata(dev, host);
+
+	ret = do_ide_setup_pci_device(dev, d, 1);
+	if (ret < 0)
+		goto out;
+
+	/* fixup IRQ */
+	if (ide_pci_is_in_compatibility_mode(dev)) {
+		hw[0].irq = pci_get_legacy_ide_irq(dev, 0);
+		hw[1].irq = pci_get_legacy_ide_irq(dev, 1);
+	} else
+		hw[1].irq = hw[0].irq = ret;
+
+	ret = ide_host_register(host, d, hws);
+	if (ret)
+		ide_host_free(host);
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ide_pci_init_one);
+
 int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
 		     const struct ide_port_info *d, void *priv)
 {
 	struct pci_dev *pdev[] = { dev1, dev2 };
 	struct ide_host *host;
-	int ret, i, n_ports = dev2 ? 4 : 2;
-	struct ide_hw hw[4], *hws[] = { NULL, NULL, NULL, NULL };
+	int ret, i;
+	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
 
-	for (i = 0; i < n_ports / 2; i++) {
+	for (i = 0; i < 2; i++) {
 		ret = ide_setup_pci_controller(pdev[i], d, !i);
 		if (ret < 0)
 			goto out;
@@ -550,24 +597,23 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
 		ide_pci_setup_ports(pdev[i], d, &hw[i*2], &hws[i*2]);
 	}
 
-	host = ide_host_alloc(d, hws, n_ports);
+	host = ide_host_alloc(d, hws);
 	if (host == NULL) {
 		ret = -ENOMEM;
 		goto out;
 	}
 
 	host->dev[0] = &dev1->dev;
-	if (dev2)
-		host->dev[1] = &dev2->dev;
+	host->dev[1] = &dev2->dev;
 
 	host->host_priv = priv;
+
 	host->irq_flags = IRQF_SHARED;
 
 	pci_set_drvdata(pdev[0], host);
-	if (dev2)
-		pci_set_drvdata(pdev[1], host);
+	pci_set_drvdata(pdev[1], host);
 
-	for (i = 0; i < n_ports / 2; i++) {
+	for (i = 0; i < 2; i++) {
 		ret = do_ide_setup_pci_device(pdev[i], d, !i);
 
 		/*
@@ -593,13 +639,6 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
 }
 EXPORT_SYMBOL_GPL(ide_pci_init_two);
 
-int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
-		     void *priv)
-{
-	return ide_pci_init_two(dev, NULL, d, priv);
-}
-EXPORT_SYMBOL_GPL(ide_pci_init_one);
-
 void ide_pci_remove(struct pci_dev *dev)
 {
 	struct ide_host *host = pci_get_drvdata(dev);
diff --git a/trunk/drivers/ide/sgiioc4.c b/trunk/drivers/ide/sgiioc4.c
index 5f37f168f944..e5d2a48a84de 100644
--- a/trunk/drivers/ide/sgiioc4.c
+++ b/trunk/drivers/ide/sgiioc4.c
@@ -91,7 +91,7 @@ typedef struct {
 
 
 static void
-sgiioc4_init_hwif_ports(struct ide_hw *hw, unsigned long data_port,
+sgiioc4_init_hwif_ports(hw_regs_t * hw, unsigned long data_port,
 			unsigned long ctrl_port, unsigned long irq_port)
 {
 	unsigned long reg = data_port;
@@ -546,7 +546,7 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
 	unsigned long cmd_base, irqport;
 	unsigned long bar0, cmd_phys_base, ctl;
 	void __iomem *virt_base;
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 	int rc;
 
 	/*  Get the CmdBlk and CtrlBlk Base Registers */
@@ -575,12 +575,13 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
 	memset(&hw, 0, sizeof(hw));
 	sgiioc4_init_hwif_ports(&hw, cmd_base, ctl, irqport);
 	hw.irq = dev->irq;
+	hw.chipset = ide_pci;
 	hw.dev = &dev->dev;
 
 	/* Initializing chipset IRQ Registers */
 	writel(0x03, (void __iomem *)(irqport + IOC4_INTR_SET * 4));
 
-	rc = ide_host_add(&sgiioc4_port_info, hws, 1, NULL);
+	rc = ide_host_add(&sgiioc4_port_info, hws, NULL);
 	if (!rc)
 		return 0;
 
diff --git a/trunk/drivers/ide/siimage.c b/trunk/drivers/ide/siimage.c
index bd82d228608c..e4973cd1fba9 100644
--- a/trunk/drivers/ide/siimage.c
+++ b/trunk/drivers/ide/siimage.c
@@ -451,8 +451,8 @@ static int sil_sata_reset_poll(ide_drive_t *drive)
 static void sil_sata_pre_reset(ide_drive_t *drive)
 {
 	if (drive->media == ide_disk) {
-		drive->special_flags &=
-			~(IDE_SFLAG_SET_GEOMETRY | IDE_SFLAG_RECALIBRATE);
+		drive->special.b.set_geometry = 0;
+		drive->special.b.recalibrate = 0;
 	}
 }
 
diff --git a/trunk/drivers/ide/sl82c105.c b/trunk/drivers/ide/sl82c105.c
index 0924abff52ff..b0a460625335 100644
--- a/trunk/drivers/ide/sl82c105.c
+++ b/trunk/drivers/ide/sl82c105.c
@@ -10,7 +10,7 @@
  * with the timing registers setup.
  *  -- Benjamin Herrenschmidt (01/11/03) benh@kernel.crashing.org
  *
- * Copyright (C) 2006-2007,2009 MontaVista Software, Inc. <source@mvista.com>
+ * Copyright (C) 2006-2007 MontaVista Software, Inc. <source@mvista.com>
  * Copyright (C)      2007 Bartlomiej Zolnierkiewicz
  */
 
@@ -146,15 +146,14 @@ static void sl82c105_dma_lost_irq(ide_drive_t *drive)
 	u32 val, mask		= hwif->channel ? CTRL_IDE_IRQB : CTRL_IDE_IRQA;
 	u8 dma_cmd;
 
-	printk(KERN_WARNING "sl82c105: lost IRQ, resetting host\n");
+	printk("sl82c105: lost IRQ, resetting host\n");
 
 	/*
 	 * Check the raw interrupt from the drive.
 	 */
 	pci_read_config_dword(dev, 0x40, &val);
 	if (val & mask)
-		printk(KERN_INFO "sl82c105: drive was requesting IRQ, "
-		       "but host lost it\n");
+		printk("sl82c105: drive was requesting IRQ, but host lost it\n");
 
 	/*
 	 * Was DMA enabled?  If so, disable it - we're resetting the
@@ -163,7 +162,7 @@ static void sl82c105_dma_lost_irq(ide_drive_t *drive)
 	dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD);
 	if (dma_cmd & 1) {
 		outb(dma_cmd & ~1, hwif->dma_base + ATA_DMA_CMD);
-		printk(KERN_INFO "sl82c105: DMA was enabled\n");
+		printk("sl82c105: DMA was enabled\n");
 	}
 
 	sl82c105_reset_host(dev);
diff --git a/trunk/drivers/ide/tx4938ide.c b/trunk/drivers/ide/tx4938ide.c
index ea89fddeed91..e33d764e2945 100644
--- a/trunk/drivers/ide/tx4938ide.c
+++ b/trunk/drivers/ide/tx4938ide.c
@@ -130,7 +130,8 @@ static const struct ide_port_info tx4938ide_port_info __initdata = {
 
 static int __init tx4938ide_probe(struct platform_device *pdev)
 {
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw;
+	hw_regs_t *hws[] = { &hw, NULL, NULL, NULL };
 	struct ide_host *host;
 	struct resource *res;
 	struct tx4938ide_platform_info *pdata = pdev->dev.platform_data;
@@ -182,7 +183,7 @@ static int __init tx4938ide_probe(struct platform_device *pdev)
 		tx4938ide_tune_ebusc(pdata->ebus_ch, pdata->gbus_clock, 0);
 	else
 		d.port_ops = NULL;
-	ret = ide_host_add(&d, hws, 1, &host);
+	ret = ide_host_add(&d, hws, &host);
 	if (!ret)
 		platform_set_drvdata(pdev, host);
 	return ret;
diff --git a/trunk/drivers/ide/tx4939ide.c b/trunk/drivers/ide/tx4939ide.c
index 64b58ecc3f0e..5ca76224f6d1 100644
--- a/trunk/drivers/ide/tx4939ide.c
+++ b/trunk/drivers/ide/tx4939ide.c
@@ -537,7 +537,8 @@ static const struct ide_port_info tx4939ide_port_info __initdata = {
 
 static int __init tx4939ide_probe(struct platform_device *pdev)
 {
-	struct ide_hw hw, *hws[] = { &hw };
+	hw_regs_t hw;
+	hw_regs_t *hws[] = { &hw, NULL, NULL, NULL };
 	struct ide_host *host;
 	struct resource *res;
 	int irq, ret;
@@ -580,7 +581,7 @@ static int __init tx4939ide_probe(struct platform_device *pdev)
 	hw.dev = &pdev->dev;
 
 	pr_info("TX4939 IDE interface (base %#lx, irq %d)\n", mapbase, irq);
-	host = ide_host_alloc(&tx4939ide_port_info, hws, 1);
+	host = ide_host_alloc(&tx4939ide_port_info, hws);
 	if (!host)
 		return -ENOMEM;
 	/* use extra_base for base address of the all registers */
diff --git a/trunk/drivers/lguest/Kconfig b/trunk/drivers/lguest/Kconfig
index 0aaa0597a622..a3d3cbab359a 100644
--- a/trunk/drivers/lguest/Kconfig
+++ b/trunk/drivers/lguest/Kconfig
@@ -1,6 +1,6 @@
 config LGUEST
 	tristate "Linux hypervisor example code"
-	depends on X86_32 && EXPERIMENTAL && EVENTFD
+	depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX
 	select HVC_DRIVER
 	---help---
 	  This is a very simple module which allows you to run
diff --git a/trunk/drivers/lguest/core.c b/trunk/drivers/lguest/core.c
index a6974e9b8ebf..4845fb3cf74b 100644
--- a/trunk/drivers/lguest/core.c
+++ b/trunk/drivers/lguest/core.c
@@ -95,7 +95,7 @@ static __init int map_switcher(void)
 	 * array of struct pages.  It increments that pointer, but we don't
 	 * care. */
 	pagep = switcher_page;
-	err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
+	err = map_vm_area(switcher_vma, PAGE_KERNEL, &pagep);
 	if (err) {
 		printk("lguest: map_vm_area failed: %i\n", err);
 		goto free_vma;
@@ -188,9 +188,6 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 {
 	/* We stop running once the Guest is dead. */
 	while (!cpu->lg->dead) {
-		unsigned int irq;
-		bool more;
-
 		/* First we run any hypercalls the Guest wants done. */
 		if (cpu->hcall)
 			do_hypercalls(cpu);
@@ -198,23 +195,23 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 		/* It's possible the Guest did a NOTIFY hypercall to the
 		 * Launcher, in which case we return from the read() now. */
 		if (cpu->pending_notify) {
-			if (!send_notify_to_eventfd(cpu)) {
-				if (put_user(cpu->pending_notify, user))
-					return -EFAULT;
-				return sizeof(cpu->pending_notify);
-			}
+			if (put_user(cpu->pending_notify, user))
+				return -EFAULT;
+			return sizeof(cpu->pending_notify);
 		}
 
 		/* Check for signals */
 		if (signal_pending(current))
 			return -ERESTARTSYS;
 
+		/* If Waker set break_out, return to Launcher. */
+		if (cpu->break_out)
+			return -EAGAIN;
+
 		/* Check if there are any interrupts which can be delivered now:
 		 * if so, this sets up the hander to be executed when we next
 		 * run the Guest. */
-		irq = interrupt_pending(cpu, &more);
-		if (irq < LGUEST_IRQS)
-			try_deliver_interrupt(cpu, irq, more);
+		maybe_do_interrupt(cpu);
 
 		/* All long-lived kernel loops need to check with this horrible
 		 * thing called the freezer.  If the Host is trying to suspend,
@@ -227,15 +224,10 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 			break;
 
 		/* If the Guest asked to be stopped, we sleep.  The Guest's
-		 * clock timer will wake us. */
+		 * clock timer or LHREQ_BREAK from the Waker will wake us. */
 		if (cpu->halted) {
 			set_current_state(TASK_INTERRUPTIBLE);
-			/* Just before we sleep, make sure no interrupt snuck in
-			 * which we should be doing. */
-			if (interrupt_pending(cpu, &more) < LGUEST_IRQS)
-				set_current_state(TASK_RUNNING);
-			else
-				schedule();
+			schedule();
 			continue;
 		}
 
diff --git a/trunk/drivers/lguest/hypercalls.c b/trunk/drivers/lguest/hypercalls.c
index c29ffa19cb74..54d66f05fefa 100644
--- a/trunk/drivers/lguest/hypercalls.c
+++ b/trunk/drivers/lguest/hypercalls.c
@@ -37,10 +37,6 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
 		/* This call does nothing, except by breaking out of the Guest
 		 * it makes us process all the asynchronous hypercalls. */
 		break;
-	case LHCALL_SEND_INTERRUPTS:
-		/* This call does nothing too, but by breaking out of the Guest
-		 * it makes us process any pending interrupts. */
-		break;
 	case LHCALL_LGUEST_INIT:
 		/* You can't get here unless you're already initialized.  Don't
 		 * do that. */
@@ -77,21 +73,11 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
 		guest_set_stack(cpu, args->arg1, args->arg2, args->arg3);
 		break;
 	case LHCALL_SET_PTE:
-#ifdef CONFIG_X86_PAE
-		guest_set_pte(cpu, args->arg1, args->arg2,
-				__pte(args->arg3 | (u64)args->arg4 << 32));
-#else
 		guest_set_pte(cpu, args->arg1, args->arg2, __pte(args->arg3));
-#endif
-		break;
-	case LHCALL_SET_PGD:
-		guest_set_pgd(cpu->lg, args->arg1, args->arg2);
 		break;
-#ifdef CONFIG_X86_PAE
 	case LHCALL_SET_PMD:
 		guest_set_pmd(cpu->lg, args->arg1, args->arg2);
 		break;
-#endif
 	case LHCALL_SET_CLOCKEVENT:
 		guest_set_clockevent(cpu, args->arg1);
 		break;
diff --git a/trunk/drivers/lguest/interrupts_and_traps.c b/trunk/drivers/lguest/interrupts_and_traps.c
index 0e9067b0d507..6e99adbe1946 100644
--- a/trunk/drivers/lguest/interrupts_and_traps.c
+++ b/trunk/drivers/lguest/interrupts_and_traps.c
@@ -128,39 +128,30 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
 /*H:205
  * Virtual Interrupts.
  *
- * interrupt_pending() returns the first pending interrupt which isn't blocked
- * by the Guest.  It is called before every entry to the Guest, and just before
- * we go to sleep when the Guest has halted itself. */
-unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more)
+ * maybe_do_interrupt() gets called before every entry to the Guest, to see if
+ * we should divert the Guest to running an interrupt handler. */
+void maybe_do_interrupt(struct lg_cpu *cpu)
 {
 	unsigned int irq;
 	DECLARE_BITMAP(blk, LGUEST_IRQS);
+	struct desc_struct *idt;
 
 	/* If the Guest hasn't even initialized yet, we can do nothing. */
 	if (!cpu->lg->lguest_data)
-		return LGUEST_IRQS;
+		return;
 
 	/* Take our "irqs_pending" array and remove any interrupts the Guest
 	 * wants blocked: the result ends up in "blk". */
 	if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts,
 			   sizeof(blk)))
-		return LGUEST_IRQS;
+		return;
 	bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS);
 
 	/* Find the first interrupt. */
 	irq = find_first_bit(blk, LGUEST_IRQS);
-	*more = find_next_bit(blk, LGUEST_IRQS, irq+1);
-
-	return irq;
-}
-
-/* This actually diverts the Guest to running an interrupt handler, once an
- * interrupt has been identified by interrupt_pending(). */
-void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
-{
-	struct desc_struct *idt;
-
-	BUG_ON(irq >= LGUEST_IRQS);
+	/* None?  Nothing to do */
+	if (irq >= LGUEST_IRQS)
+		return;
 
 	/* They may be in the middle of an iret, where they asked us never to
 	 * deliver interrupts. */
@@ -179,12 +170,8 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
 		u32 irq_enabled;
 		if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled))
 			irq_enabled = 0;
-		if (!irq_enabled) {
-			/* Make sure they know an IRQ is pending. */
-			put_user(X86_EFLAGS_IF,
-				 &cpu->lg->lguest_data->irq_pending);
+		if (!irq_enabled)
 			return;
-		}
 	}
 
 	/* Look at the IDT entry the Guest gave us for this interrupt.  The
@@ -207,25 +194,6 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
 	 * here is a compromise which means at least it gets updated every
 	 * timer interrupt. */
 	write_timestamp(cpu);
-
-	/* If there are no other interrupts we want to deliver, clear
-	 * the pending flag. */
-	if (!more)
-		put_user(0, &cpu->lg->lguest_data->irq_pending);
-}
-
-/* And this is the routine when we want to set an interrupt for the Guest. */
-void set_interrupt(struct lg_cpu *cpu, unsigned int irq)
-{
-	/* Next time the Guest runs, the core code will see if it can deliver
-	 * this interrupt. */
-	set_bit(irq, cpu->irqs_pending);
-
-	/* Make sure it sees it; it might be asleep (eg. halted), or
-	 * running the Guest right now, in which case kick_process()
-	 * will knock it out. */
-	if (!wake_up_process(cpu->tsk))
-		kick_process(cpu->tsk);
 }
 /*:*/
 
@@ -542,7 +510,10 @@ static enum hrtimer_restart clockdev_fn(struct hrtimer *timer)
 	struct lg_cpu *cpu = container_of(timer, struct lg_cpu, hrt);
 
 	/* Remember the first interrupt is the timer interrupt. */
-	set_interrupt(cpu, 0);
+	set_bit(0, cpu->irqs_pending);
+	/* If the Guest is actually stopped, we need to wake it up. */
+	if (cpu->halted)
+		wake_up_process(cpu->tsk);
 	return HRTIMER_NORESTART;
 }
 
diff --git a/trunk/drivers/lguest/lg.h b/trunk/drivers/lguest/lg.h
index d4e8979735cb..af92a176697f 100644
--- a/trunk/drivers/lguest/lg.h
+++ b/trunk/drivers/lguest/lg.h
@@ -49,7 +49,7 @@ struct lg_cpu {
 	u32 cr2;
 	int ts;
 	u32 esp1;
-	u16 ss1;
+	u8 ss1;
 
 	/* Bitmap of what has changed: see CHANGED_* above. */
 	int changed;
@@ -71,7 +71,9 @@ struct lg_cpu {
 	/* Virtual clock device */
 	struct hrtimer hrt;
 
-	/* Did the Guest tell us to halt? */
+	/* Do we need to stop what we're doing and return to userspace? */
+	int break_out;
+	wait_queue_head_t break_wq;
 	int halted;
 
 	/* Pending virtual interrupts */
@@ -80,16 +82,6 @@ struct lg_cpu {
 	struct lg_cpu_arch arch;
 };
 
-struct lg_eventfd {
-	unsigned long addr;
-	struct file *event;
-};
-
-struct lg_eventfd_map {
-	unsigned int num;
-	struct lg_eventfd map[];
-};
-
 /* The private info the thread maintains about the guest. */
 struct lguest
 {
@@ -110,8 +102,6 @@ struct lguest
 	unsigned int stack_pages;
 	u32 tsc_khz;
 
-	struct lg_eventfd_map *eventfds;
-
 	/* Dead? */
 	const char *dead;
 };
@@ -147,13 +137,9 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
  * in the kernel. */
 #define pgd_flags(x)	(pgd_val(x) & ~PAGE_MASK)
 #define pgd_pfn(x)	(pgd_val(x) >> PAGE_SHIFT)
-#define pmd_flags(x)    (pmd_val(x) & ~PAGE_MASK)
-#define pmd_pfn(x)	(pmd_val(x) >> PAGE_SHIFT)
 
 /* interrupts_and_traps.c: */
-unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more);
-void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more);
-void set_interrupt(struct lg_cpu *cpu, unsigned int irq);
+void maybe_do_interrupt(struct lg_cpu *cpu);
 bool deliver_trap(struct lg_cpu *cpu, unsigned int num);
 void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i,
 			  u32 low, u32 hi);
@@ -164,7 +150,6 @@ void setup_default_idt_entries(struct lguest_ro_state *state,
 void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
 		const unsigned long *def);
 void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta);
-bool send_notify_to_eventfd(struct lg_cpu *cpu);
 void init_clockdev(struct lg_cpu *cpu);
 bool check_syscall_vector(struct lguest *lg);
 int init_interrupts(void);
@@ -183,10 +168,7 @@ void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt);
 int init_guest_pagetable(struct lguest *lg);
 void free_guest_pagetable(struct lguest *lg);
 void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable);
-void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 i);
-#ifdef CONFIG_X86_PAE
 void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
-#endif
 void guest_pagetable_clear_all(struct lg_cpu *cpu);
 void guest_pagetable_flush_user(struct lg_cpu *cpu);
 void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir,
diff --git a/trunk/drivers/lguest/lguest_device.c b/trunk/drivers/lguest/lguest_device.c
index e082cdac88b4..df44d962626d 100644
--- a/trunk/drivers/lguest/lguest_device.c
+++ b/trunk/drivers/lguest/lguest_device.c
@@ -228,8 +228,7 @@ extern void lguest_setup_irq(unsigned int irq);
  * function. */
 static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 				    unsigned index,
-				    void (*callback)(struct virtqueue *vq),
-				    const char *name)
+				    void (*callback)(struct virtqueue *vq))
 {
 	struct lguest_device *ldev = to_lgdev(vdev);
 	struct lguest_vq_info *lvq;
@@ -264,7 +263,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 	/* OK, tell virtio_ring.c to set up a virtqueue now we know its size
 	 * and we've got a pointer to its pages. */
 	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN,
-				 vdev, lvq->pages, lg_notify, callback, name);
+				 vdev, lvq->pages, lg_notify, callback);
 	if (!vq) {
 		err = -ENOMEM;
 		goto unmap;
@@ -313,38 +312,6 @@ static void lg_del_vq(struct virtqueue *vq)
 	kfree(lvq);
 }
 
-static void lg_del_vqs(struct virtio_device *vdev)
-{
-	struct virtqueue *vq, *n;
-
-	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
-		lg_del_vq(vq);
-}
-
-static int lg_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-		       struct virtqueue *vqs[],
-		       vq_callback_t *callbacks[],
-		       const char *names[])
-{
-	struct lguest_device *ldev = to_lgdev(vdev);
-	int i;
-
-	/* We must have this many virtqueues. */
-	if (nvqs > ldev->desc->num_vq)
-		return -ENOENT;
-
-	for (i = 0; i < nvqs; ++i) {
-		vqs[i] = lg_find_vq(vdev, i, callbacks[i], names[i]);
-		if (IS_ERR(vqs[i]))
-			goto error;
-	}
-	return 0;
-
-error:
-	lg_del_vqs(vdev);
-	return PTR_ERR(vqs[i]);
-}
-
 /* The ops structure which hooks everything together. */
 static struct virtio_config_ops lguest_config_ops = {
 	.get_features = lg_get_features,
@@ -354,8 +321,8 @@ static struct virtio_config_ops lguest_config_ops = {
 	.get_status = lg_get_status,
 	.set_status = lg_set_status,
 	.reset = lg_reset,
-	.find_vqs = lg_find_vqs,
-	.del_vqs = lg_del_vqs,
+	.find_vq = lg_find_vq,
+	.del_vq = lg_del_vq,
 };
 
 /* The root device for the lguest virtio devices.  This makes them appear as
diff --git a/trunk/drivers/lguest/lguest_user.c b/trunk/drivers/lguest/lguest_user.c
index 32e297121058..b8ee103eed5f 100644
--- a/trunk/drivers/lguest/lguest_user.c
+++ b/trunk/drivers/lguest/lguest_user.c
@@ -7,83 +7,32 @@
 #include <linux/miscdevice.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
-#include <linux/eventfd.h>
-#include <linux/file.h>
 #include "lg.h"
 
-bool send_notify_to_eventfd(struct lg_cpu *cpu)
+/*L:055 When something happens, the Waker process needs a way to stop the
+ * kernel running the Guest and return to the Launcher.  So the Waker writes
+ * LHREQ_BREAK and the value "1" to /dev/lguest to do this.  Once the Launcher
+ * has done whatever needs attention, it writes LHREQ_BREAK and "0" to release
+ * the Waker. */
+static int break_guest_out(struct lg_cpu *cpu, const unsigned long __user*input)
 {
-	unsigned int i;
-	struct lg_eventfd_map *map;
-
-	/* lg->eventfds is RCU-protected */
-	rcu_read_lock();
-	map = rcu_dereference(cpu->lg->eventfds);
-	for (i = 0; i < map->num; i++) {
-		if (map->map[i].addr == cpu->pending_notify) {
-			eventfd_signal(map->map[i].event, 1);
-			cpu->pending_notify = 0;
-			break;
-		}
-	}
-	rcu_read_unlock();
-	return cpu->pending_notify == 0;
-}
-
-static int add_eventfd(struct lguest *lg, unsigned long addr, int fd)
-{
-	struct lg_eventfd_map *new, *old = lg->eventfds;
-
-	if (!addr)
-		return -EINVAL;
-
-	/* Replace the old array with the new one, carefully: others can
-	 * be accessing it at the same time */
-	new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1),
-		      GFP_KERNEL);
-	if (!new)
-		return -ENOMEM;
-
-	/* First make identical copy. */
-	memcpy(new->map, old->map, sizeof(old->map[0]) * old->num);
-	new->num = old->num;
-
-	/* Now append new entry. */
-	new->map[new->num].addr = addr;
-	new->map[new->num].event = eventfd_fget(fd);
-	if (IS_ERR(new->map[new->num].event)) {
-		kfree(new);
-		return PTR_ERR(new->map[new->num].event);
-	}
-	new->num++;
-
-	/* Now put new one in place. */
-	rcu_assign_pointer(lg->eventfds, new);
-
-	/* We're not in a big hurry.  Wait until noone's looking at old
-	 * version, then delete it. */
-	synchronize_rcu();
-	kfree(old);
-
-	return 0;
-}
-
-static int attach_eventfd(struct lguest *lg, const unsigned long __user *input)
-{
-	unsigned long addr, fd;
-	int err;
+	unsigned long on;
 
-	if (get_user(addr, input) != 0)
-		return -EFAULT;
-	input++;
-	if (get_user(fd, input) != 0)
+	/* Fetch whether they're turning break on or off. */
+	if (get_user(on, input) != 0)
 		return -EFAULT;
 
-	mutex_lock(&lguest_lock);
-	err = add_eventfd(lg, addr, fd);
-	mutex_unlock(&lguest_lock);
-
-	return 0;
+	if (on) {
+		cpu->break_out = 1;
+		/* Pop it out of the Guest (may be running on different CPU) */
+		wake_up_process(cpu->tsk);
+		/* Wait for them to reset it */
+		return wait_event_interruptible(cpu->break_wq, !cpu->break_out);
+	} else {
+		cpu->break_out = 0;
+		wake_up(&cpu->break_wq);
+		return 0;
+	}
 }
 
 /*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt
@@ -96,8 +45,9 @@ static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input)
 		return -EFAULT;
 	if (irq >= LGUEST_IRQS)
 		return -EINVAL;
-
-	set_interrupt(cpu, irq);
+	/* Next time the Guest runs, the core code will see if it can deliver
+	 * this interrupt. */
+	set_bit(irq, cpu->irqs_pending);
 	return 0;
 }
 
@@ -176,6 +126,9 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
 	 * address. */
 	lguest_arch_setup_regs(cpu, start_ip);
 
+	/* Initialize the queue for the Waker to wait on */
+	init_waitqueue_head(&cpu->break_wq);
+
 	/* We keep a pointer to the Launcher task (ie. current task) for when
 	 * other Guests want to wake this one (eg. console input). */
 	cpu->tsk = current;
@@ -232,13 +185,6 @@ static int initialize(struct file *file, const unsigned long __user *input)
 		goto unlock;
 	}
 
-	lg->eventfds = kmalloc(sizeof(*lg->eventfds), GFP_KERNEL);
-	if (!lg->eventfds) {
-		err = -ENOMEM;
-		goto free_lg;
-	}
-	lg->eventfds->num = 0;
-
 	/* Populate the easy fields of our "struct lguest" */
 	lg->mem_base = (void __user *)args[0];
 	lg->pfn_limit = args[1];
@@ -246,7 +192,7 @@ static int initialize(struct file *file, const unsigned long __user *input)
 	/* This is the first cpu (cpu 0) and it will start booting at args[2] */
 	err = lg_cpu_start(&lg->cpus[0], 0, args[2]);
 	if (err)
-		goto free_eventfds;
+		goto release_guest;
 
 	/* Initialize the Guest's shadow page tables, using the toplevel
 	 * address the Launcher gave us.  This allocates memory, so can fail. */
@@ -265,9 +211,7 @@ static int initialize(struct file *file, const unsigned long __user *input)
 free_regs:
 	/* FIXME: This should be in free_vcpu */
 	free_page(lg->cpus[0].regs_page);
-free_eventfds:
-	kfree(lg->eventfds);
-free_lg:
+release_guest:
 	kfree(lg);
 unlock:
 	mutex_unlock(&lguest_lock);
@@ -308,6 +252,11 @@ static ssize_t write(struct file *file, const char __user *in,
 		/* Once the Guest is dead, you can only read() why it died. */
 		if (lg->dead)
 			return -ENOENT;
+
+		/* If you're not the task which owns the Guest, all you can do
+		 * is break the Launcher out of running the Guest. */
+		if (current != cpu->tsk && req != LHREQ_BREAK)
+			return -EPERM;
 	}
 
 	switch (req) {
@@ -315,8 +264,8 @@ static ssize_t write(struct file *file, const char __user *in,
 		return initialize(file, input);
 	case LHREQ_IRQ:
 		return user_send_irq(cpu, input);
-	case LHREQ_EVENTFD:
-		return attach_eventfd(lg, input);
+	case LHREQ_BREAK:
+		return break_guest_out(cpu, input);
 	default:
 		return -EINVAL;
 	}
@@ -354,12 +303,6 @@ static int close(struct inode *inode, struct file *file)
 		 * the Launcher's memory management structure. */
 		mmput(lg->cpus[i].mm);
 	}
-
-	/* Release any eventfds they registered. */
-	for (i = 0; i < lg->eventfds->num; i++)
-		fput(lg->eventfds->map[i].event);
-	kfree(lg->eventfds);
-
 	/* If lg->dead doesn't contain an error code it will be NULL or a
 	 * kmalloc()ed string, either of which is ok to hand to kfree(). */
 	if (!IS_ERR(lg->dead))
diff --git a/trunk/drivers/lguest/page_tables.c b/trunk/drivers/lguest/page_tables.c
index a6fe1abda240..a059cf9980f7 100644
--- a/trunk/drivers/lguest/page_tables.c
+++ b/trunk/drivers/lguest/page_tables.c
@@ -53,17 +53,6 @@
  * page.  */
 #define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1)
 
-/* For PAE we need the PMD index as well. We use the last 2MB, so we
- * will need the last pmd entry of the last pmd page.  */
-#ifdef CONFIG_X86_PAE
-#define SWITCHER_PMD_INDEX 	(PTRS_PER_PMD - 1)
-#define RESERVE_MEM 		2U
-#define CHECK_GPGD_MASK		_PAGE_PRESENT
-#else
-#define RESERVE_MEM 		4U
-#define CHECK_GPGD_MASK		_PAGE_TABLE
-#endif
-
 /* We actually need a separate PTE page for each CPU.  Remember that after the
  * Switcher code itself comes two pages for each CPU, and we don't want this
  * CPU's guest to see the pages of any other CPU. */
@@ -84,59 +73,24 @@ static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr)
 {
 	unsigned int index = pgd_index(vaddr);
 
-#ifndef CONFIG_X86_PAE
 	/* We kill any Guest trying to touch the Switcher addresses. */
 	if (index >= SWITCHER_PGD_INDEX) {
 		kill_guest(cpu, "attempt to access switcher pages");
 		index = 0;
 	}
-#endif
 	/* Return a pointer index'th pgd entry for the i'th page table. */
 	return &cpu->lg->pgdirs[i].pgdir[index];
 }
 
-#ifdef CONFIG_X86_PAE
-/* This routine then takes the PGD entry given above, which contains the
- * address of the PMD page.  It then returns a pointer to the PMD entry for the
- * given address. */
-static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
-{
-	unsigned int index = pmd_index(vaddr);
-	pmd_t *page;
-
-	/* We kill any Guest trying to touch the Switcher addresses. */
-	if (pgd_index(vaddr) == SWITCHER_PGD_INDEX &&
-					index >= SWITCHER_PMD_INDEX) {
-		kill_guest(cpu, "attempt to access switcher pages");
-		index = 0;
-	}
-
-	/* You should never call this if the PGD entry wasn't valid */
-	BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
-	page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
-
-	return &page[index];
-}
-#endif
-
 /* This routine then takes the page directory entry returned above, which
  * contains the address of the page table entry (PTE) page.  It then returns a
  * pointer to the PTE entry for the given address. */
-static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
+static pte_t *spte_addr(pgd_t spgd, unsigned long vaddr)
 {
-#ifdef CONFIG_X86_PAE
-	pmd_t *pmd = spmd_addr(cpu, spgd, vaddr);
-	pte_t *page = __va(pmd_pfn(*pmd) << PAGE_SHIFT);
-
-	/* You should never call this if the PMD entry wasn't valid */
-	BUG_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT));
-#else
 	pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
 	/* You should never call this if the PGD entry wasn't valid */
 	BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
-#endif
-
-	return &page[pte_index(vaddr)];
+	return &page[(vaddr >> PAGE_SHIFT) % PTRS_PER_PTE];
 }
 
 /* These two functions just like the above two, except they access the Guest
@@ -147,32 +101,12 @@ static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr)
 	return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t);
 }
 
-#ifdef CONFIG_X86_PAE
-static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr)
-{
-	unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
-	BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
-	return gpage + pmd_index(vaddr) * sizeof(pmd_t);
-}
-
-static unsigned long gpte_addr(struct lg_cpu *cpu,
-			       pmd_t gpmd, unsigned long vaddr)
-{
-	unsigned long gpage = pmd_pfn(gpmd) << PAGE_SHIFT;
-
-	BUG_ON(!(pmd_flags(gpmd) & _PAGE_PRESENT));
-	return gpage + pte_index(vaddr) * sizeof(pte_t);
-}
-#else
-static unsigned long gpte_addr(struct lg_cpu *cpu,
-				pgd_t gpgd, unsigned long vaddr)
+static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr)
 {
 	unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
-
 	BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
-	return gpage + pte_index(vaddr) * sizeof(pte_t);
+	return gpage + ((vaddr>>PAGE_SHIFT) % PTRS_PER_PTE) * sizeof(pte_t);
 }
-#endif
 /*:*/
 
 /*M:014 get_pfn is slow: we could probably try to grab batches of pages here as
@@ -237,7 +171,7 @@ static void release_pte(pte_t pte)
 	/* Remember that get_user_pages_fast() took a reference to the page, in
 	 * get_pfn()?  We have to put it back now. */
 	if (pte_flags(pte) & _PAGE_PRESENT)
-		put_page(pte_page(pte));
+		put_page(pfn_to_page(pte_pfn(pte)));
 }
 /*:*/
 
@@ -250,20 +184,11 @@ static void check_gpte(struct lg_cpu *cpu, pte_t gpte)
 
 static void check_gpgd(struct lg_cpu *cpu, pgd_t gpgd)
 {
-	if ((pgd_flags(gpgd) & ~CHECK_GPGD_MASK) ||
+	if ((pgd_flags(gpgd) & ~_PAGE_TABLE) ||
 	   (pgd_pfn(gpgd) >= cpu->lg->pfn_limit))
 		kill_guest(cpu, "bad page directory entry");
 }
 
-#ifdef CONFIG_X86_PAE
-static void check_gpmd(struct lg_cpu *cpu, pmd_t gpmd)
-{
-	if ((pmd_flags(gpmd) & ~_PAGE_TABLE) ||
-	   (pmd_pfn(gpmd) >= cpu->lg->pfn_limit))
-		kill_guest(cpu, "bad page middle directory entry");
-}
-#endif
-
 /*H:330
  * (i) Looking up a page table entry when the Guest faults.
  *
@@ -282,11 +207,6 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 	pte_t gpte;
 	pte_t *spte;
 
-#ifdef CONFIG_X86_PAE
-	pmd_t *spmd;
-	pmd_t gpmd;
-#endif
-
 	/* First step: get the top-level Guest page table entry. */
 	gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
 	/* Toplevel not present?  We can't map it in. */
@@ -308,45 +228,12 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 		check_gpgd(cpu, gpgd);
 		/* And we copy the flags to the shadow PGD entry.  The page
 		 * number in the shadow PGD is the page we just allocated. */
-		set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd)));
+		*spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd));
 	}
 
-#ifdef CONFIG_X86_PAE
-	gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
-	/* middle level not present?  We can't map it in. */
-	if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
-		return false;
-
-	/* Now look at the matching shadow entry. */
-	spmd = spmd_addr(cpu, *spgd, vaddr);
-
-	if (!(pmd_flags(*spmd) & _PAGE_PRESENT)) {
-		/* No shadow entry: allocate a new shadow PTE page. */
-		unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
-
-		/* This is not really the Guest's fault, but killing it is
-		* simple for this corner case. */
-		if (!ptepage) {
-			kill_guest(cpu, "out of memory allocating pte page");
-			return false;
-		}
-
-		/* We check that the Guest pmd is OK. */
-		check_gpmd(cpu, gpmd);
-
-		/* And we copy the flags to the shadow PMD entry.  The page
-		 * number in the shadow PMD is the page we just allocated. */
-		native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
-	}
-
-	/* OK, now we look at the lower level in the Guest page table: keep its
-	 * address, because we might update it later. */
-	gpte_ptr = gpte_addr(cpu, gpmd, vaddr);
-#else
 	/* OK, now we look at the lower level in the Guest page table: keep its
 	 * address, because we might update it later. */
-	gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
-#endif
+	gpte_ptr = gpte_addr(gpgd, vaddr);
 	gpte = lgread(cpu, gpte_ptr, pte_t);
 
 	/* If this page isn't in the Guest page tables, we can't page it in. */
@@ -372,7 +259,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 		gpte = pte_mkdirty(gpte);
 
 	/* Get the pointer to the shadow PTE entry we're going to set. */
-	spte = spte_addr(cpu, *spgd, vaddr);
+	spte = spte_addr(*spgd, vaddr);
 	/* If there was a valid shadow PTE entry here before, we release it.
 	 * This can happen with a write to a previously read-only entry. */
 	release_pte(*spte);
@@ -386,7 +273,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 		 * table entry, even if the Guest says it's writable.  That way
 		 * we will come back here when a write does actually occur, so
 		 * we can update the Guest's _PAGE_DIRTY flag. */
-		native_set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));
+		*spte = gpte_to_spte(cpu, pte_wrprotect(gpte), 0);
 
 	/* Finally, we write the Guest PTE entry back: we've set the
 	 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */
@@ -414,23 +301,14 @@ static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr)
 	pgd_t *spgd;
 	unsigned long flags;
 
-#ifdef CONFIG_X86_PAE
-	pmd_t *spmd;
-#endif
 	/* Look at the current top level entry: is it present? */
 	spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
 	if (!(pgd_flags(*spgd) & _PAGE_PRESENT))
 		return false;
 
-#ifdef CONFIG_X86_PAE
-	spmd = spmd_addr(cpu, *spgd, vaddr);
-	if (!(pmd_flags(*spmd) & _PAGE_PRESENT))
-		return false;
-#endif
-
 	/* Check the flags on the pte entry itself: it must be present and
 	 * writable. */
-	flags = pte_flags(*(spte_addr(cpu, *spgd, vaddr)));
+	flags = pte_flags(*(spte_addr(*spgd, vaddr)));
 
 	return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
 }
@@ -444,43 +322,8 @@ void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
 		kill_guest(cpu, "bad stack page %#lx", vaddr);
 }
 
-#ifdef CONFIG_X86_PAE
-static void release_pmd(pmd_t *spmd)
-{
-	/* If the entry's not present, there's nothing to release. */
-	if (pmd_flags(*spmd) & _PAGE_PRESENT) {
-		unsigned int i;
-		pte_t *ptepage = __va(pmd_pfn(*spmd) << PAGE_SHIFT);
-		/* For each entry in the page, we might need to release it. */
-		for (i = 0; i < PTRS_PER_PTE; i++)
-			release_pte(ptepage[i]);
-		/* Now we can free the page of PTEs */
-		free_page((long)ptepage);
-		/* And zero out the PMD entry so we never release it twice. */
-		native_set_pmd(spmd, __pmd(0));
-	}
-}
-
-static void release_pgd(pgd_t *spgd)
-{
-	/* If the entry's not present, there's nothing to release. */
-	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
-		unsigned int i;
-		pmd_t *pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
-
-		for (i = 0; i < PTRS_PER_PMD; i++)
-			release_pmd(&pmdpage[i]);
-
-		/* Now we can free the page of PMDs */
-		free_page((long)pmdpage);
-		/* And zero out the PGD entry so we never release it twice. */
-		set_pgd(spgd, __pgd(0));
-	}
-}
-
-#else /* !CONFIG_X86_PAE */
 /*H:450 If we chase down the release_pgd() code, it looks like this: */
-static void release_pgd(pgd_t *spgd)
+static void release_pgd(struct lguest *lg, pgd_t *spgd)
 {
 	/* If the entry's not present, there's nothing to release. */
 	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
@@ -498,7 +341,7 @@ static void release_pgd(pgd_t *spgd)
 		*spgd = __pgd(0);
 	}
 }
-#endif
+
 /*H:445 We saw flush_user_mappings() twice: once from the flush_user_mappings()
  * hypercall and once in new_pgdir() when we re-used a top-level pgdir page.
  * It simply releases every PTE page from 0 up to the Guest's kernel address. */
@@ -507,7 +350,7 @@ static void flush_user_mappings(struct lguest *lg, int idx)
 	unsigned int i;
 	/* Release every pgd entry up to the kernel's address. */
 	for (i = 0; i < pgd_index(lg->kernel_address); i++)
-		release_pgd(lg->pgdirs[idx].pgdir + i);
+		release_pgd(lg, lg->pgdirs[idx].pgdir + i);
 }
 
 /*H:440 (v) Flushing (throwing away) page tables,
@@ -526,9 +369,7 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
 {
 	pgd_t gpgd;
 	pte_t gpte;
-#ifdef CONFIG_X86_PAE
-	pmd_t gpmd;
-#endif
+
 	/* First step: get the top-level Guest page table entry. */
 	gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
 	/* Toplevel not present?  We can't map it in. */
@@ -537,14 +378,7 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
 		return -1UL;
 	}
 
-#ifdef CONFIG_X86_PAE
-	gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
-	if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
-		kill_guest(cpu, "Bad address %#lx", vaddr);
-	gpte = lgread(cpu, gpte_addr(cpu, gpmd, vaddr), pte_t);
-#else
-	gpte = lgread(cpu, gpte_addr(cpu, gpgd, vaddr), pte_t);
-#endif
+	gpte = lgread(cpu, gpte_addr(gpgd, vaddr), pte_t);
 	if (!(pte_flags(gpte) & _PAGE_PRESENT))
 		kill_guest(cpu, "Bad address %#lx", vaddr);
 
@@ -571,9 +405,6 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
 			      int *blank_pgdir)
 {
 	unsigned int next;
-#ifdef CONFIG_X86_PAE
-	pmd_t *pmd_table;
-#endif
 
 	/* We pick one entry at random to throw out.  Choosing the Least
 	 * Recently Used might be better, but this is easy. */
@@ -585,27 +416,10 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
 		/* If the allocation fails, just keep using the one we have */
 		if (!cpu->lg->pgdirs[next].pgdir)
 			next = cpu->cpu_pgd;
-		else {
-#ifdef CONFIG_X86_PAE
-			/* In PAE mode, allocate a pmd page and populate the
-			 * last pgd entry. */
-			pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL);
-			if (!pmd_table) {
-				free_page((long)cpu->lg->pgdirs[next].pgdir);
-				set_pgd(cpu->lg->pgdirs[next].pgdir, __pgd(0));
-				next = cpu->cpu_pgd;
-			} else {
-				set_pgd(cpu->lg->pgdirs[next].pgdir +
-					SWITCHER_PGD_INDEX,
-					__pgd(__pa(pmd_table) | _PAGE_PRESENT));
-				/* This is a blank page, so there are no kernel
-				 * mappings: caller must map the stack! */
-				*blank_pgdir = 1;
-			}
-#else
+		else
+			/* This is a blank page, so there are no kernel
+			 * mappings: caller must map the stack! */
 			*blank_pgdir = 1;
-#endif
-		}
 	}
 	/* Record which Guest toplevel this shadows. */
 	cpu->lg->pgdirs[next].gpgdir = gpgdir;
@@ -617,7 +431,7 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
 
 /*H:430 (iv) Switching page tables
  *
- * Now we've seen all the page table setting and manipulation, let's see
+ * Now we've seen all the page table setting and manipulation, let's see what
  * what happens when the Guest changes page tables (ie. changes the top-level
  * pgdir).  This occurs on almost every context switch. */
 void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
@@ -646,25 +460,10 @@ static void release_all_pagetables(struct lguest *lg)
 
 	/* Every shadow pagetable this Guest has */
 	for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
-		if (lg->pgdirs[i].pgdir) {
-#ifdef CONFIG_X86_PAE
-			pgd_t *spgd;
-			pmd_t *pmdpage;
-			unsigned int k;
-
-			/* Get the last pmd page. */
-			spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX;
-			pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
-
-			/* And release the pmd entries of that pmd page,
-			 * except for the switcher pmd. */
-			for (k = 0; k < SWITCHER_PMD_INDEX; k++)
-				release_pmd(&pmdpage[k]);
-#endif
+		if (lg->pgdirs[i].pgdir)
 			/* Every PGD entry except the Switcher at the top */
 			for (j = 0; j < SWITCHER_PGD_INDEX; j++)
-				release_pgd(lg->pgdirs[i].pgdir + j);
-		}
+				release_pgd(lg, lg->pgdirs[i].pgdir + j);
 }
 
 /* We also throw away everything when a Guest tells us it's changed a kernel
@@ -705,37 +504,24 @@ static void do_set_pte(struct lg_cpu *cpu, int idx,
 {
 	/* Look up the matching shadow page directory entry. */
 	pgd_t *spgd = spgd_addr(cpu, idx, vaddr);
-#ifdef CONFIG_X86_PAE
-	pmd_t *spmd;
-#endif
 
 	/* If the top level isn't present, there's no entry to update. */
 	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
-#ifdef CONFIG_X86_PAE
-		spmd = spmd_addr(cpu, *spgd, vaddr);
-		if (pmd_flags(*spmd) & _PAGE_PRESENT) {
-#endif
-			/* Otherwise, we start by releasing
-			 * the existing entry. */
-			pte_t *spte = spte_addr(cpu, *spgd, vaddr);
-			release_pte(*spte);
-
-			/* If they're setting this entry as dirty or accessed,
-			 * we might as well put that entry they've given us
-			 * in now.  This shaves 10% off a
-			 * copy-on-write micro-benchmark. */
-			if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
-				check_gpte(cpu, gpte);
-				native_set_pte(spte,
-						gpte_to_spte(cpu, gpte,
-						pte_flags(gpte) & _PAGE_DIRTY));
-			} else
-				/* Otherwise kill it and we can demand_page()
-				 * it in later. */
-				native_set_pte(spte, __pte(0));
-#ifdef CONFIG_X86_PAE
-		}
-#endif
+		/* Otherwise, we start by releasing the existing entry. */
+		pte_t *spte = spte_addr(*spgd, vaddr);
+		release_pte(*spte);
+
+		/* If they're setting this entry as dirty or accessed, we might
+		 * as well put that entry they've given us in now.  This shaves
+		 * 10% off a copy-on-write micro-benchmark. */
+		if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
+			check_gpte(cpu, gpte);
+			*spte = gpte_to_spte(cpu, gpte,
+					     pte_flags(gpte) & _PAGE_DIRTY);
+		} else
+			/* Otherwise kill it and we can demand_page() it in
+			 * later. */
+			*spte = __pte(0);
 	}
 }
 
@@ -782,10 +568,12 @@ void guest_set_pte(struct lg_cpu *cpu,
  *
  * So with that in mind here's our code to to update a (top-level) PGD entry:
  */
-void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx)
+void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx)
 {
 	int pgdir;
 
+	/* The kernel seems to try to initialize this early on: we ignore its
+	 * attempts to map over the Switcher. */
 	if (idx >= SWITCHER_PGD_INDEX)
 		return;
 
@@ -793,14 +581,8 @@ void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx)
 	pgdir = find_pgdir(lg, gpgdir);
 	if (pgdir < ARRAY_SIZE(lg->pgdirs))
 		/* ... throw it away. */
-		release_pgd(lg->pgdirs[pgdir].pgdir + idx);
+		release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx);
 }
-#ifdef CONFIG_X86_PAE
-void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx)
-{
-	guest_pagetable_clear_all(&lg->cpus[0]);
-}
-#endif
 
 /* Once we know how much memory we have we can construct simple identity
  * (which set virtual == physical) and linear mappings
@@ -814,16 +596,8 @@ static unsigned long setup_pagetables(struct lguest *lg,
 {
 	pgd_t __user *pgdir;
 	pte_t __user *linear;
+	unsigned int mapped_pages, i, linear_pages, phys_linear;
 	unsigned long mem_base = (unsigned long)lg->mem_base;
-	unsigned int mapped_pages, i, linear_pages;
-#ifdef CONFIG_X86_PAE
-	pmd_t __user *pmds;
-	unsigned int j;
-	pgd_t pgd;
-	pmd_t pmd;
-#else
-	unsigned int phys_linear;
-#endif
 
 	/* We have mapped_pages frames to map, so we need
 	 * linear_pages page tables to map them. */
@@ -836,9 +610,6 @@ static unsigned long setup_pagetables(struct lguest *lg,
 	/* Now we use the next linear_pages pages as pte pages */
 	linear = (void *)pgdir - linear_pages * PAGE_SIZE;
 
-#ifdef CONFIG_X86_PAE
-	pmds = (void *)linear - PAGE_SIZE;
-#endif
 	/* Linear mapping is easy: put every page's address into the
 	 * mapping in order. */
 	for (i = 0; i < mapped_pages; i++) {
@@ -850,22 +621,6 @@ static unsigned long setup_pagetables(struct lguest *lg,
 
 	/* The top level points to the linear page table pages above.
 	 * We setup the identity and linear mappings here. */
-#ifdef CONFIG_X86_PAE
-	for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD;
-	     i += PTRS_PER_PTE, j++) {
-		native_set_pmd(&pmd, __pmd(((unsigned long)(linear + i)
-		- mem_base) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
-
-		if (copy_to_user(&pmds[j], &pmd, sizeof(pmd)) != 0)
-			return -EFAULT;
-	}
-
-	set_pgd(&pgd, __pgd(((u32)pmds - mem_base) | _PAGE_PRESENT));
-	if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0)
-		return -EFAULT;
-	if (copy_to_user(&pgdir[3], &pgd, sizeof(pgd)) != 0)
-		return -EFAULT;
-#else
 	phys_linear = (unsigned long)linear - mem_base;
 	for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) {
 		pgd_t pgd;
@@ -878,7 +633,6 @@ static unsigned long setup_pagetables(struct lguest *lg,
 				    &pgd, sizeof(pgd)))
 			return -EFAULT;
 	}
-#endif
 
 	/* We return the top level (guest-physical) address: remember where
 	 * this is. */
@@ -894,10 +648,7 @@ int init_guest_pagetable(struct lguest *lg)
 	u64 mem;
 	u32 initrd_size;
 	struct boot_params __user *boot = (struct boot_params *)lg->mem_base;
-#ifdef CONFIG_X86_PAE
-	pgd_t *pgd;
-	pmd_t *pmd_table;
-#endif
+
 	/* Get the Guest memory size and the ramdisk size from the boot header
 	 * located at lg->mem_base (Guest address 0). */
 	if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem))
@@ -912,15 +663,6 @@ int init_guest_pagetable(struct lguest *lg)
 	lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
 	if (!lg->pgdirs[0].pgdir)
 		return -ENOMEM;
-#ifdef CONFIG_X86_PAE
-	pgd = lg->pgdirs[0].pgdir;
-	pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL);
-	if (!pmd_table)
-		return -ENOMEM;
-
-	set_pgd(pgd + SWITCHER_PGD_INDEX,
-		__pgd(__pa(pmd_table) | _PAGE_PRESENT));
-#endif
 	lg->cpus[0].cpu_pgd = 0;
 	return 0;
 }
@@ -930,24 +672,17 @@ void page_table_guest_data_init(struct lg_cpu *cpu)
 {
 	/* We get the kernel address: above this is all kernel memory. */
 	if (get_user(cpu->lg->kernel_address,
-		&cpu->lg->lguest_data->kernel_address)
-		/* We tell the Guest that it can't use the top 2 or 4 MB
-		 * of virtual addresses used by the Switcher. */
-		|| put_user(RESERVE_MEM * 1024 * 1024,
-			&cpu->lg->lguest_data->reserve_mem)
-		|| put_user(cpu->lg->pgdirs[0].gpgdir,
-			&cpu->lg->lguest_data->pgdir))
+		     &cpu->lg->lguest_data->kernel_address)
+	    /* We tell the Guest that it can't use the top 4MB of virtual
+	     * addresses used by the Switcher. */
+	    || put_user(4U*1024*1024, &cpu->lg->lguest_data->reserve_mem)
+	    || put_user(cpu->lg->pgdirs[0].gpgdir, &cpu->lg->lguest_data->pgdir))
 		kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
 
 	/* In flush_user_mappings() we loop from 0 to
 	 * "pgd_index(lg->kernel_address)".  This assumes it won't hit the
 	 * Switcher mappings, so check that now. */
-#ifdef CONFIG_X86_PAE
-	if (pgd_index(cpu->lg->kernel_address) == SWITCHER_PGD_INDEX &&
-		pmd_index(cpu->lg->kernel_address) == SWITCHER_PMD_INDEX)
-#else
 	if (pgd_index(cpu->lg->kernel_address) >= SWITCHER_PGD_INDEX)
-#endif
 		kill_guest(cpu, "bad kernel address %#lx",
 				 cpu->lg->kernel_address);
 }
@@ -973,30 +708,16 @@ void free_guest_pagetable(struct lguest *lg)
 void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 {
 	pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
+	pgd_t switcher_pgd;
 	pte_t regs_pte;
 	unsigned long pfn;
 
-#ifdef CONFIG_X86_PAE
-	pmd_t switcher_pmd;
-	pmd_t *pmd_table;
-
-	native_set_pmd(&switcher_pmd, pfn_pmd(__pa(switcher_pte_page) >>
-		       PAGE_SHIFT, PAGE_KERNEL_EXEC));
-
-	pmd_table = __va(pgd_pfn(cpu->lg->
-			pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX])
-								<< PAGE_SHIFT);
-	native_set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd);
-#else
-	pgd_t switcher_pgd;
-
 	/* Make the last PGD entry for this Guest point to the Switcher's PTE
 	 * page for this CPU (with appropriate flags). */
-	switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC);
+	switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL);
 
 	cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
 
-#endif
 	/* We also change the Switcher PTE page.  When we're running the Guest,
 	 * we want the Guest's "regs" page to appear where the first Switcher
 	 * page for this CPU is.  This is an optimization: when the Switcher
@@ -1005,9 +726,8 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 	 * page is already mapped there, we don't have to copy them out
 	 * again. */
 	pfn = __pa(cpu->regs_page) >> PAGE_SHIFT;
-	native_set_pte(&regs_pte, pfn_pte(pfn, PAGE_KERNEL));
-	native_set_pte(&switcher_pte_page[pte_index((unsigned long)pages)],
-			regs_pte);
+	regs_pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL));
+	switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte;
 }
 /*:*/
 
@@ -1032,21 +752,21 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
 
 	/* The first entries are easy: they map the Switcher code. */
 	for (i = 0; i < pages; i++) {
-		native_set_pte(&pte[i], mk_pte(switcher_page[i],
-				__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
+		pte[i] = mk_pte(switcher_page[i],
+				__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED));
 	}
 
 	/* The only other thing we map is this CPU's pair of pages. */
 	i = pages + cpu*2;
 
 	/* First page (Guest registers) is writable from the Guest */
-	native_set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]),
-			 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW)));
+	pte[i] = pfn_pte(page_to_pfn(switcher_page[i]),
+			 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW));
 
 	/* The second page contains the "struct lguest_ro_state", and is
 	 * read-only. */
-	native_set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]),
-			   __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
+	pte[i+1] = pfn_pte(page_to_pfn(switcher_page[i+1]),
+			   __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED));
 }
 
 /* We've made it through the page table code.  Perhaps our tired brains are
diff --git a/trunk/drivers/lguest/segments.c b/trunk/drivers/lguest/segments.c
index 482ed5a18750..7ede64ffeef9 100644
--- a/trunk/drivers/lguest/segments.c
+++ b/trunk/drivers/lguest/segments.c
@@ -150,7 +150,7 @@ void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi)
 {
 	/* We assume the Guest has the same number of GDT entries as the
 	 * Host, otherwise we'd have to dynamically allocate the Guest GDT. */
-	if (num >= ARRAY_SIZE(cpu->arch.gdt))
+	if (num > ARRAY_SIZE(cpu->arch.gdt))
 		kill_guest(cpu, "too many gdt entries %i", num);
 
 	/* Set it up, then fix it. */
diff --git a/trunk/drivers/net/virtio_net.c b/trunk/drivers/net/virtio_net.c
index 7fa620ddeb21..4d1d47953fc6 100644
--- a/trunk/drivers/net/virtio_net.c
+++ b/trunk/drivers/net/virtio_net.c
@@ -845,10 +845,6 @@ static int virtnet_probe(struct virtio_device *vdev)
 	int err;
 	struct net_device *dev;
 	struct virtnet_info *vi;
-	struct virtqueue *vqs[3];
-	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
-	const char *names[] = { "input", "output", "control" };
-	int nvqs;
 
 	/* Allocate ourselves a network device with room for our info */
 	dev = alloc_etherdev(sizeof(struct virtnet_info));
@@ -909,19 +905,25 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;
 
-	/* We expect two virtqueues, receive then send,
-	 * and optionally control. */
-	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
-
-	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
-	if (err)
+	/* We expect two virtqueues, receive then send. */
+	vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
+	if (IS_ERR(vi->rvq)) {
+		err = PTR_ERR(vi->rvq);
 		goto free;
+	}
 
-	vi->rvq = vqs[0];
-	vi->svq = vqs[1];
+	vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done);
+	if (IS_ERR(vi->svq)) {
+		err = PTR_ERR(vi->svq);
+		goto free_recv;
+	}
 
 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
-		vi->cvq = vqs[2];
+		vi->cvq = vdev->config->find_vq(vdev, 2, NULL);
+		if (IS_ERR(vi->cvq)) {
+			err = PTR_ERR(vi->svq);
+			goto free_send;
+		}
 
 		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
 			dev->features |= NETIF_F_HW_VLAN_FILTER;
@@ -939,7 +941,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	err = register_netdev(dev);
 	if (err) {
 		pr_debug("virtio_net: registering device failed\n");
-		goto free_vqs;
+		goto free_ctrl;
 	}
 
 	/* Last of all, set up some receive buffers. */
@@ -960,8 +962,13 @@ static int virtnet_probe(struct virtio_device *vdev)
 
 unregister:
 	unregister_netdev(dev);
-free_vqs:
-	vdev->config->del_vqs(vdev);
+free_ctrl:
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
+		vdev->config->del_vq(vi->cvq);
+free_send:
+	vdev->config->del_vq(vi->svq);
+free_recv:
+	vdev->config->del_vq(vi->rvq);
 free:
 	free_netdev(dev);
 	return err;
@@ -987,10 +994,12 @@ static void virtnet_remove(struct virtio_device *vdev)
 
 	BUG_ON(vi->num != 0);
 
+	vdev->config->del_vq(vi->svq);
+	vdev->config->del_vq(vi->rvq);
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
+		vdev->config->del_vq(vi->cvq);
 	unregister_netdev(vi->dev);
 
-	vdev->config->del_vqs(vi->vdev);
-
 	while (vi->pages)
 		__free_pages(get_a_page(vi, GFP_KERNEL), 0);
 
diff --git a/trunk/drivers/s390/kvm/kvm_virtio.c b/trunk/drivers/s390/kvm/kvm_virtio.c
index e38e5d306faf..cbc8566fab70 100644
--- a/trunk/drivers/s390/kvm/kvm_virtio.c
+++ b/trunk/drivers/s390/kvm/kvm_virtio.c
@@ -173,9 +173,8 @@ static void kvm_notify(struct virtqueue *vq)
  * this device and sets it up.
  */
 static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
-				     unsigned index,
-				     void (*callback)(struct virtqueue *vq),
-				     const char *name)
+				    unsigned index,
+				    void (*callback)(struct virtqueue *vq))
 {
 	struct kvm_device *kdev = to_kvmdev(vdev);
 	struct kvm_vqconfig *config;
@@ -195,7 +194,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
 
 	vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN,
 				 vdev, (void *) config->address,
-				 kvm_notify, callback, name);
+				 kvm_notify, callback);
 	if (!vq) {
 		err = -ENOMEM;
 		goto unmap;
@@ -227,38 +226,6 @@ static void kvm_del_vq(struct virtqueue *vq)
 				       KVM_S390_VIRTIO_RING_ALIGN));
 }
 
-static void kvm_del_vqs(struct virtio_device *vdev)
-{
-	struct virtqueue *vq, *n;
-
-	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
-		kvm_del_vq(vq);
-}
-
-static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-			struct virtqueue *vqs[],
-			vq_callback_t *callbacks[],
-			const char *names[])
-{
-	struct kvm_device *kdev = to_kvmdev(vdev);
-	int i;
-
-	/* We must have this many virtqueues. */
-	if (nvqs > kdev->desc->num_vq)
-		return -ENOENT;
-
-	for (i = 0; i < nvqs; ++i) {
-		vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i]);
-		if (IS_ERR(vqs[i]))
-			goto error;
-	}
-	return 0;
-
-error:
-	kvm_del_vqs(vdev);
-	return PTR_ERR(vqs[i]);
-}
-
 /*
  * The config ops structure as defined by virtio config
  */
@@ -270,8 +237,8 @@ static struct virtio_config_ops kvm_vq_configspace_ops = {
 	.get_status = kvm_get_status,
 	.set_status = kvm_set_status,
 	.reset = kvm_reset,
-	.find_vqs = kvm_find_vqs,
-	.del_vqs = kvm_del_vqs,
+	.find_vq = kvm_find_vq,
+	.del_vq = kvm_del_vq,
 };
 
 /*
diff --git a/trunk/drivers/video/aty/aty128fb.c b/trunk/drivers/video/aty/aty128fb.c
index e4e4d433b007..35e8eb02b9e9 100644
--- a/trunk/drivers/video/aty/aty128fb.c
+++ b/trunk/drivers/video/aty/aty128fb.c
@@ -354,7 +354,7 @@ static int default_crt_on __devinitdata = 0;
 static int default_lcd_on __devinitdata = 1;
 
 #ifdef CONFIG_MTRR
-static bool mtrr = true;
+static int mtrr = 1;
 #endif
 
 #ifdef CONFIG_PMAC_BACKLIGHT
diff --git a/trunk/drivers/video/cyber2000fb.c b/trunk/drivers/video/cyber2000fb.c
index da7c01b39be2..83c5cefc266c 100644
--- a/trunk/drivers/video/cyber2000fb.c
+++ b/trunk/drivers/video/cyber2000fb.c
@@ -1736,8 +1736,10 @@ static int __init cyber2000fb_init(void)
 
 #ifdef CONFIG_ARCH_SHARK
 	err = cyberpro_vl_probe();
-	if (!err)
+	if (!err) {
 		ret = 0;
+		__module_get(THIS_MODULE);
+	}
 #endif
 #ifdef CONFIG_PCI
 	err = pci_register_driver(&cyberpro_driver);
@@ -1747,15 +1749,14 @@ static int __init cyber2000fb_init(void)
 
 	return ret ? err : 0;
 }
-module_init(cyber2000fb_init);
 
-#ifndef CONFIG_ARCH_SHARK
 static void __exit cyberpro_exit(void)
 {
 	pci_unregister_driver(&cyberpro_driver);
 }
+
+module_init(cyber2000fb_init);
 module_exit(cyberpro_exit);
-#endif
 
 MODULE_AUTHOR("Russell King");
 MODULE_DESCRIPTION("CyberPro 2000, 2010 and 5000 framebuffer driver");
diff --git a/trunk/drivers/video/uvesafb.c b/trunk/drivers/video/uvesafb.c
index ca5b4643a401..421770b5e6ab 100644
--- a/trunk/drivers/video/uvesafb.c
+++ b/trunk/drivers/video/uvesafb.c
@@ -45,7 +45,7 @@ static struct fb_fix_screeninfo uvesafb_fix __devinitdata = {
 static int mtrr		__devinitdata = 3; /* enable mtrr by default */
 static int blank	= 1;		   /* enable blanking by default */
 static int ypan		= 1; 		 /* 0: scroll, 1: ypan, 2: ywrap */
-static bool pmi_setpal	__devinitdata = true; /* use PMI for palette changes */
+static int pmi_setpal	__devinitdata = 1; /* use PMI for palette changes */
 static int nocrtc	__devinitdata; /* ignore CRTC settings */
 static int noedid	__devinitdata; /* don't try DDC transfers */
 static int vram_remap	__devinitdata; /* set amt. of memory to be used */
@@ -2002,7 +2002,11 @@ static void __devexit uvesafb_exit(void)
 
 module_exit(uvesafb_exit);
 
-#define param_get_scroll NULL
+static int param_get_scroll(char *buffer, struct kernel_param *kp)
+{
+	return 0;
+}
+
 static int param_set_scroll(const char *val, struct kernel_param *kp)
 {
 	ypan = 0;
@@ -2013,8 +2017,6 @@ static int param_set_scroll(const char *val, struct kernel_param *kp)
 		ypan = 1;
 	else if (!strcmp(val, "ywrap"))
 		ypan = 2;
-	else
-		return -EINVAL;
 
 	return 0;
 }
diff --git a/trunk/drivers/virtio/virtio.c b/trunk/drivers/virtio/virtio.c
index 3a43ebf83a49..018c070a357f 100644
--- a/trunk/drivers/virtio/virtio.c
+++ b/trunk/drivers/virtio/virtio.c
@@ -31,37 +31,21 @@ static ssize_t modalias_show(struct device *_d,
 	return sprintf(buf, "virtio:d%08Xv%08X\n",
 		       dev->id.device, dev->id.vendor);
 }
-static ssize_t features_show(struct device *_d,
-			     struct device_attribute *attr, char *buf)
-{
-	struct virtio_device *dev = container_of(_d, struct virtio_device, dev);
-	unsigned int i;
-	ssize_t len = 0;
-
-	/* We actually represent this as a bitstring, as it could be
-	 * arbitrary length in future. */
-	for (i = 0; i < ARRAY_SIZE(dev->features)*BITS_PER_LONG; i++)
-		len += sprintf(buf+len, "%c",
-			       test_bit(i, dev->features) ? '1' : '0');
-	len += sprintf(buf+len, "\n");
-	return len;
-}
 static struct device_attribute virtio_dev_attrs[] = {
 	__ATTR_RO(device),
 	__ATTR_RO(vendor),
 	__ATTR_RO(status),
 	__ATTR_RO(modalias),
-	__ATTR_RO(features),
 	__ATTR_NULL
 };
 
 static inline int virtio_id_match(const struct virtio_device *dev,
 				  const struct virtio_device_id *id)
 {
-	if (id->device != dev->id.device && id->device != VIRTIO_DEV_ANY_ID)
+	if (id->device != dev->id.device)
 		return 0;
 
-	return id->vendor == VIRTIO_DEV_ANY_ID || id->vendor == dev->id.vendor;
+	return id->vendor == VIRTIO_DEV_ANY_ID || id->vendor != dev->id.vendor;
 }
 
 /* This looks through all the IDs a driver claims to support.  If any of them
@@ -134,14 +118,13 @@ static int virtio_dev_probe(struct device *_d)
 		if (device_features & (1 << i))
 			set_bit(i, dev->features);
 
-	dev->config->finalize_features(dev);
-
 	err = drv->probe(dev);
 	if (err)
 		add_status(dev, VIRTIO_CONFIG_S_FAILED);
-	else
+	else {
+		dev->config->finalize_features(dev);
 		add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
-
+	}
 	return err;
 }
 
@@ -202,8 +185,6 @@ int register_virtio_device(struct virtio_device *dev)
 	/* Acknowledge that we've seen the device. */
 	add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
 
-	INIT_LIST_HEAD(&dev->vqs);
-
 	/* device_register() causes the bus infrastructure to look for a
 	 * matching driver. */
 	err = device_register(&dev->dev);
diff --git a/trunk/drivers/virtio/virtio_balloon.c b/trunk/drivers/virtio/virtio_balloon.c
index 26b278264796..9c76a061a04d 100644
--- a/trunk/drivers/virtio/virtio_balloon.c
+++ b/trunk/drivers/virtio/virtio_balloon.c
@@ -204,9 +204,6 @@ static int balloon(void *_vballoon)
 static int virtballoon_probe(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb;
-	struct virtqueue *vqs[2];
-	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
-	const char *names[] = { "inflate", "deflate" };
 	int err;
 
 	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
@@ -221,17 +218,22 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	vb->vdev = vdev;
 
 	/* We expect two virtqueues. */
-	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
-	if (err)
+	vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack);
+	if (IS_ERR(vb->inflate_vq)) {
+		err = PTR_ERR(vb->inflate_vq);
 		goto out_free_vb;
+	}
 
-	vb->inflate_vq = vqs[0];
-	vb->deflate_vq = vqs[1];
+	vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack);
+	if (IS_ERR(vb->deflate_vq)) {
+		err = PTR_ERR(vb->deflate_vq);
+		goto out_del_inflate_vq;
+	}
 
 	vb->thread = kthread_run(balloon, vb, "vballoon");
 	if (IS_ERR(vb->thread)) {
 		err = PTR_ERR(vb->thread);
-		goto out_del_vqs;
+		goto out_del_deflate_vq;
 	}
 
 	vb->tell_host_first
@@ -239,8 +241,10 @@ static int virtballoon_probe(struct virtio_device *vdev)
 
 	return 0;
 
-out_del_vqs:
-	vdev->config->del_vqs(vdev);
+out_del_deflate_vq:
+	vdev->config->del_vq(vb->deflate_vq);
+out_del_inflate_vq:
+	vdev->config->del_vq(vb->inflate_vq);
 out_free_vb:
 	kfree(vb);
 out:
@@ -260,7 +264,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
 	/* Now we reset the device so we can clean up the queues. */
 	vdev->config->reset(vdev);
 
-	vdev->config->del_vqs(vdev);
+	vdev->config->del_vq(vb->deflate_vq);
+	vdev->config->del_vq(vb->inflate_vq);
 	kfree(vb);
 }
 
diff --git a/trunk/drivers/virtio/virtio_pci.c b/trunk/drivers/virtio/virtio_pci.c
index 193c8f0e5cc5..330aacbdec1f 100644
--- a/trunk/drivers/virtio/virtio_pci.c
+++ b/trunk/drivers/virtio/virtio_pci.c
@@ -42,26 +42,6 @@ struct virtio_pci_device
 	/* a list of queues so we can dispatch IRQs */
 	spinlock_t lock;
 	struct list_head virtqueues;
-
-	/* MSI-X support */
-	int msix_enabled;
-	int intx_enabled;
-	struct msix_entry *msix_entries;
-	/* Name strings for interrupts. This size should be enough,
-	 * and I'm too lazy to allocate each name separately. */
-	char (*msix_names)[256];
-	/* Number of available vectors */
-	unsigned msix_vectors;
-	/* Vectors allocated */
-	unsigned msix_used_vectors;
-};
-
-/* Constants for MSI-X */
-/* Use first vector for configuration changes, second and the rest for
- * virtqueues Thus, we need at least 2 vectors for MSI. */
-enum {
-	VP_MSIX_CONFIG_VECTOR = 0,
-	VP_MSIX_VQ_VECTOR = 1,
 };
 
 struct virtio_pci_vq_info
@@ -80,9 +60,6 @@ struct virtio_pci_vq_info
 
 	/* the list node for the virtqueues list */
 	struct list_head node;
-
-	/* MSI-X vector (or none) */
-	unsigned vector;
 };
 
 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
@@ -132,8 +109,7 @@ static void vp_get(struct virtio_device *vdev, unsigned offset,
 		   void *buf, unsigned len)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	void __iomem *ioaddr = vp_dev->ioaddr +
-				VIRTIO_PCI_CONFIG(vp_dev) + offset;
+	void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset;
 	u8 *ptr = buf;
 	int i;
 
@@ -147,8 +123,7 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
 		   const void *buf, unsigned len)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	void __iomem *ioaddr = vp_dev->ioaddr +
-				VIRTIO_PCI_CONFIG(vp_dev) + offset;
+	void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset;
 	const u8 *ptr = buf;
 	int i;
 
@@ -189,37 +164,6 @@ static void vp_notify(struct virtqueue *vq)
 	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
 }
 
-/* Handle a configuration change: Tell driver if it wants to know. */
-static irqreturn_t vp_config_changed(int irq, void *opaque)
-{
-	struct virtio_pci_device *vp_dev = opaque;
-	struct virtio_driver *drv;
-	drv = container_of(vp_dev->vdev.dev.driver,
-			   struct virtio_driver, driver);
-
-	if (drv && drv->config_changed)
-		drv->config_changed(&vp_dev->vdev);
-	return IRQ_HANDLED;
-}
-
-/* Notify all virtqueues on an interrupt. */
-static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
-{
-	struct virtio_pci_device *vp_dev = opaque;
-	struct virtio_pci_vq_info *info;
-	irqreturn_t ret = IRQ_NONE;
-	unsigned long flags;
-
-	spin_lock_irqsave(&vp_dev->lock, flags);
-	list_for_each_entry(info, &vp_dev->virtqueues, node) {
-		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
-			ret = IRQ_HANDLED;
-	}
-	spin_unlock_irqrestore(&vp_dev->lock, flags);
-
-	return ret;
-}
-
 /* A small wrapper to also acknowledge the interrupt when it's handled.
  * I really need an EIO hook for the vring so I can ack the interrupt once we
  * know that we'll be handling the IRQ but before we invoke the callback since
@@ -229,6 +173,9 @@ static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
 static irqreturn_t vp_interrupt(int irq, void *opaque)
 {
 	struct virtio_pci_device *vp_dev = opaque;
+	struct virtio_pci_vq_info *info;
+	irqreturn_t ret = IRQ_NONE;
+	unsigned long flags;
 	u8 isr;
 
 	/* reading the ISR has the effect of also clearing it so it's very
@@ -240,137 +187,34 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
 		return IRQ_NONE;
 
 	/* Configuration change?  Tell driver if it wants to know. */
-	if (isr & VIRTIO_PCI_ISR_CONFIG)
-		vp_config_changed(irq, opaque);
+	if (isr & VIRTIO_PCI_ISR_CONFIG) {
+		struct virtio_driver *drv;
+		drv = container_of(vp_dev->vdev.dev.driver,
+				   struct virtio_driver, driver);
 
-	return vp_vring_interrupt(irq, opaque);
-}
-
-static void vp_free_vectors(struct virtio_device *vdev)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	int i;
-
-	if (vp_dev->intx_enabled) {
-		free_irq(vp_dev->pci_dev->irq, vp_dev);
-		vp_dev->intx_enabled = 0;
-	}
-
-	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
-		free_irq(vp_dev->msix_entries[i].vector, vp_dev);
-	vp_dev->msix_used_vectors = 0;
-
-	if (vp_dev->msix_enabled) {
-		/* Disable the vector used for configuration */
-		iowrite16(VIRTIO_MSI_NO_VECTOR,
-			  vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
-		/* Flush the write out to device */
-		ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
-
-		vp_dev->msix_enabled = 0;
-		pci_disable_msix(vp_dev->pci_dev);
+		if (drv && drv->config_changed)
+			drv->config_changed(&vp_dev->vdev);
 	}
-}
-
-static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
-			  int *options, int noptions)
-{
-	int i;
-	for (i = 0; i < noptions; ++i)
-		if (!pci_enable_msix(dev, entries, options[i]))
-			return options[i];
-	return -EBUSY;
-}
 
-static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	const char *name = dev_name(&vp_dev->vdev.dev);
-	unsigned i, v;
-	int err = -ENOMEM;
-	/* We want at most one vector per queue and one for config changes.
-	 * Fallback to separate vectors for config and a shared for queues.
-	 * Finally fall back to regular interrupts. */
-	int options[] = { max_vqs + 1, 2 };
-	int nvectors = max(options[0], options[1]);
-
-	vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
-				       GFP_KERNEL);
-	if (!vp_dev->msix_entries)
-		goto error_entries;
-	vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
-				     GFP_KERNEL);
-	if (!vp_dev->msix_names)
-		goto error_names;
-
-	for (i = 0; i < nvectors; ++i)
-		vp_dev->msix_entries[i].entry = i;
-
-	err = vp_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries,
-			     options, ARRAY_SIZE(options));
-	if (err < 0) {
-		/* Can't allocate enough MSI-X vectors, use regular interrupt */
-		vp_dev->msix_vectors = 0;
-		err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
-				  IRQF_SHARED, name, vp_dev);
-		if (err)
-			goto error_irq;
-		vp_dev->intx_enabled = 1;
-	} else {
-		vp_dev->msix_vectors = err;
-		vp_dev->msix_enabled = 1;
-
-		/* Set the vector used for configuration */
-		v = vp_dev->msix_used_vectors;
-		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
-			 "%s-config", name);
-		err = request_irq(vp_dev->msix_entries[v].vector,
-				  vp_config_changed, 0, vp_dev->msix_names[v],
-				  vp_dev);
-		if (err)
-			goto error_irq;
-		++vp_dev->msix_used_vectors;
-
-		iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
-		/* Verify we had enough resources to assign the vector */
-		v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
-		if (v == VIRTIO_MSI_NO_VECTOR) {
-			err = -EBUSY;
-			goto error_irq;
-		}
+	spin_lock_irqsave(&vp_dev->lock, flags);
+	list_for_each_entry(info, &vp_dev->virtqueues, node) {
+		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
+			ret = IRQ_HANDLED;
 	}
+	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
-	if (vp_dev->msix_vectors && vp_dev->msix_vectors != max_vqs + 1) {
-		/* Shared vector for all VQs */
-		v = vp_dev->msix_used_vectors;
-		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
-			 "%s-virtqueues", name);
-		err = request_irq(vp_dev->msix_entries[v].vector,
-				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
-				  vp_dev);
-		if (err)
-			goto error_irq;
-		++vp_dev->msix_used_vectors;
-	}
-	return 0;
-error_irq:
-	vp_free_vectors(vdev);
-	kfree(vp_dev->msix_names);
-error_names:
-	kfree(vp_dev->msix_entries);
-error_entries:
-	return err;
+	return ret;
 }
 
+/* the config->find_vq() implementation */
 static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
-				    void (*callback)(struct virtqueue *vq),
-				    const char *name)
+				    void (*callback)(struct virtqueue *vq))
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtio_pci_vq_info *info;
 	struct virtqueue *vq;
 	unsigned long flags, size;
-	u16 num, vector;
+	u16 num;
 	int err;
 
 	/* Select the queue we're interested in */
@@ -389,7 +233,6 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 
 	info->queue_index = index;
 	info->num = num;
-	info->vector = VIRTIO_MSI_NO_VECTOR;
 
 	size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
 	info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
@@ -404,7 +247,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 
 	/* create the vring */
 	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
-				 vdev, info->queue, vp_notify, callback, name);
+				 vdev, info->queue, vp_notify, callback);
 	if (!vq) {
 		err = -ENOMEM;
 		goto out_activate_queue;
@@ -413,43 +256,12 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 	vq->priv = info;
 	info->vq = vq;
 
-	/* allocate per-vq vector if available and necessary */
-	if (callback && vp_dev->msix_used_vectors < vp_dev->msix_vectors) {
-		vector = vp_dev->msix_used_vectors;
-		snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names,
-			 "%s-%s", dev_name(&vp_dev->vdev.dev), name);
-		err = request_irq(vp_dev->msix_entries[vector].vector,
-				  vring_interrupt, 0,
-				  vp_dev->msix_names[vector], vq);
-		if (err)
-			goto out_request_irq;
-		info->vector = vector;
-		++vp_dev->msix_used_vectors;
-	} else
-		vector = VP_MSIX_VQ_VECTOR;
-
-	 if (callback && vp_dev->msix_enabled) {
-		iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
-		vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
-		if (vector == VIRTIO_MSI_NO_VECTOR) {
-			err = -EBUSY;
-			goto out_assign;
-		}
-	}
-
 	spin_lock_irqsave(&vp_dev->lock, flags);
 	list_add(&info->node, &vp_dev->virtqueues);
 	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
 	return vq;
 
-out_assign:
-	if (info->vector != VIRTIO_MSI_NO_VECTOR) {
-		free_irq(vp_dev->msix_entries[info->vector].vector, vq);
-		--vp_dev->msix_used_vectors;
-	}
-out_request_irq:
-	vring_del_virtqueue(vq);
 out_activate_queue:
 	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 	free_pages_exact(info->queue, size);
@@ -458,27 +270,21 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 	return ERR_PTR(err);
 }
 
+/* the config->del_vq() implementation */
 static void vp_del_vq(struct virtqueue *vq)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 	struct virtio_pci_vq_info *info = vq->priv;
-	unsigned long size;
-
-	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
-
-	if (info->vector != VIRTIO_MSI_NO_VECTOR)
-		free_irq(vp_dev->msix_entries[info->vector].vector, vq);
+	unsigned long flags, size;
 
-	if (vp_dev->msix_enabled) {
-		iowrite16(VIRTIO_MSI_NO_VECTOR,
-			  vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
-		/* Flush the write out to device */
-		ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
-	}
+	spin_lock_irqsave(&vp_dev->lock, flags);
+	list_del(&info->node);
+	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
 	vring_del_virtqueue(vq);
 
 	/* Select and deactivate the queue */
+	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN));
@@ -486,57 +292,14 @@ static void vp_del_vq(struct virtqueue *vq)
 	kfree(info);
 }
 
-/* the config->del_vqs() implementation */
-static void vp_del_vqs(struct virtio_device *vdev)
-{
-	struct virtqueue *vq, *n;
-
-	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
-		vp_del_vq(vq);
-
-	vp_free_vectors(vdev);
-}
-
-/* the config->find_vqs() implementation */
-static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-		       struct virtqueue *vqs[],
-		       vq_callback_t *callbacks[],
-		       const char *names[])
-{
-	int vectors = 0;
-	int i, err;
-
-	/* How many vectors would we like? */
-	for (i = 0; i < nvqs; ++i)
-		if (callbacks[i])
-			++vectors;
-
-	err = vp_request_vectors(vdev, vectors);
-	if (err)
-		goto error_request;
-
-	for (i = 0; i < nvqs; ++i) {
-		vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i]);
-		if (IS_ERR(vqs[i]))
-			goto error_find;
-	}
-	return 0;
-
-error_find:
-	vp_del_vqs(vdev);
-
-error_request:
-	return PTR_ERR(vqs[i]);
-}
-
 static struct virtio_config_ops virtio_pci_config_ops = {
 	.get		= vp_get,
 	.set		= vp_set,
 	.get_status	= vp_get_status,
 	.set_status	= vp_set_status,
 	.reset		= vp_reset,
-	.find_vqs	= vp_find_vqs,
-	.del_vqs	= vp_del_vqs,
+	.find_vq	= vp_find_vq,
+	.del_vq		= vp_del_vq,
 	.get_features	= vp_get_features,
 	.finalize_features = vp_finalize_features,
 };
@@ -547,7 +310,7 @@ static void virtio_pci_release_dev(struct device *_d)
 	struct virtio_pci_device *vp_dev = to_vp_device(dev);
 	struct pci_dev *pci_dev = vp_dev->pci_dev;
 
-	vp_del_vqs(dev);
+	free_irq(pci_dev->irq, vp_dev);
 	pci_set_drvdata(pci_dev, NULL);
 	pci_iounmap(pci_dev, vp_dev->ioaddr);
 	pci_release_regions(pci_dev);
@@ -606,13 +369,21 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,
 	vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
 	vp_dev->vdev.id.device = pci_dev->subsystem_device;
 
+	/* register a handler for the queue with the PCI device's interrupt */
+	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
+			  dev_name(&vp_dev->vdev.dev), vp_dev);
+	if (err)
+		goto out_set_drvdata;
+
 	/* finally register the virtio device */
 	err = register_virtio_device(&vp_dev->vdev);
 	if (err)
-		goto out_set_drvdata;
+		goto out_req_irq;
 
 	return 0;
 
+out_req_irq:
+	free_irq(pci_dev->irq, vp_dev);
 out_set_drvdata:
 	pci_set_drvdata(pci_dev, NULL);
 	pci_iounmap(pci_dev, vp_dev->ioaddr);
diff --git a/trunk/drivers/virtio/virtio_ring.c b/trunk/drivers/virtio/virtio_ring.c
index a882f2606515..5c52369ab9bb 100644
--- a/trunk/drivers/virtio/virtio_ring.c
+++ b/trunk/drivers/virtio/virtio_ring.c
@@ -23,30 +23,21 @@
 
 #ifdef DEBUG
 /* For development, we want to crash whenever the ring is screwed. */
-#define BAD_RING(_vq, fmt, args...)				\
-	do {							\
-		dev_err(&(_vq)->vq.vdev->dev,			\
-			"%s:"fmt, (_vq)->vq.name, ##args);	\
-		BUG();						\
-	} while (0)
+#define BAD_RING(_vq, fmt...)			\
+	do { dev_err(&(_vq)->vq.vdev->dev, fmt); BUG(); } while(0)
 /* Caller is supposed to guarantee no reentry. */
 #define START_USE(_vq)						\
 	do {							\
 		if ((_vq)->in_use)				\
-			panic("%s:in_use = %i\n",		\
-			      (_vq)->vq.name, (_vq)->in_use);	\
+			panic("in_use = %i\n", (_vq)->in_use);	\
 		(_vq)->in_use = __LINE__;			\
 		mb();						\
-	} while (0)
+	} while(0)
 #define END_USE(_vq) \
 	do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; mb(); } while(0)
 #else
-#define BAD_RING(_vq, fmt, args...)				\
-	do {							\
-		dev_err(&_vq->vq.vdev->dev,			\
-			"%s:"fmt, (_vq)->vq.name, ##args);	\
-		(_vq)->broken = true;				\
-	} while (0)
+#define BAD_RING(_vq, fmt...)			\
+	do { dev_err(&_vq->vq.vdev->dev, fmt); (_vq)->broken = true; } while(0)
 #define START_USE(vq)
 #define END_USE(vq)
 #endif
@@ -61,9 +52,6 @@ struct vring_virtqueue
 	/* Other side has made a mess, don't try any more. */
 	bool broken;
 
-	/* Host supports indirect buffers */
-	bool indirect;
-
 	/* Number of free buffers */
 	unsigned int num_free;
 	/* Head of free buffer list. */
@@ -88,55 +76,6 @@ struct vring_virtqueue
 
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
-/* Set up an indirect table of descriptors and add it to the queue. */
-static int vring_add_indirect(struct vring_virtqueue *vq,
-			      struct scatterlist sg[],
-			      unsigned int out,
-			      unsigned int in)
-{
-	struct vring_desc *desc;
-	unsigned head;
-	int i;
-
-	desc = kmalloc((out + in) * sizeof(struct vring_desc), GFP_ATOMIC);
-	if (!desc)
-		return vq->vring.num;
-
-	/* Transfer entries from the sg list into the indirect page */
-	for (i = 0; i < out; i++) {
-		desc[i].flags = VRING_DESC_F_NEXT;
-		desc[i].addr = sg_phys(sg);
-		desc[i].len = sg->length;
-		desc[i].next = i+1;
-		sg++;
-	}
-	for (; i < (out + in); i++) {
-		desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
-		desc[i].addr = sg_phys(sg);
-		desc[i].len = sg->length;
-		desc[i].next = i+1;
-		sg++;
-	}
-
-	/* Last one doesn't continue. */
-	desc[i-1].flags &= ~VRING_DESC_F_NEXT;
-	desc[i-1].next = 0;
-
-	/* We're about to use a buffer */
-	vq->num_free--;
-
-	/* Use a single buffer which doesn't continue */
-	head = vq->free_head;
-	vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
-	vq->vring.desc[head].addr = virt_to_phys(desc);
-	vq->vring.desc[head].len = i * sizeof(struct vring_desc);
-
-	/* Update free pointer */
-	vq->free_head = vq->vring.desc[head].next;
-
-	return head;
-}
-
 static int vring_add_buf(struct virtqueue *_vq,
 			 struct scatterlist sg[],
 			 unsigned int out,
@@ -146,21 +85,12 @@ static int vring_add_buf(struct virtqueue *_vq,
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	unsigned int i, avail, head, uninitialized_var(prev);
 
-	START_USE(vq);
-
 	BUG_ON(data == NULL);
-
-	/* If the host supports indirect descriptor tables, and we have multiple
-	 * buffers, then go indirect. FIXME: tune this threshold */
-	if (vq->indirect && (out + in) > 1 && vq->num_free) {
-		head = vring_add_indirect(vq, sg, out, in);
-		if (head != vq->vring.num)
-			goto add_head;
-	}
-
 	BUG_ON(out + in > vq->vring.num);
 	BUG_ON(out + in == 0);
 
+	START_USE(vq);
+
 	if (vq->num_free < out + in) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
 			 out + in, vq->num_free);
@@ -197,7 +127,6 @@ static int vring_add_buf(struct virtqueue *_vq,
 	/* Update free pointer */
 	vq->free_head = i;
 
-add_head:
 	/* Set token. */
 	vq->data[head] = data;
 
@@ -241,11 +170,6 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
 
 	/* Put back on free list: find end */
 	i = head;
-
-	/* Free the indirect table */
-	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
-		kfree(phys_to_virt(vq->vring.desc[i].addr));
-
 	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
 		i = vq->vring.desc[i].next;
 		vq->num_free++;
@@ -360,8 +284,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      struct virtio_device *vdev,
 				      void *pages,
 				      void (*notify)(struct virtqueue *),
-				      void (*callback)(struct virtqueue *),
-				      const char *name)
+				      void (*callback)(struct virtqueue *))
 {
 	struct vring_virtqueue *vq;
 	unsigned int i;
@@ -380,18 +303,14 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.vq_ops = &vring_vq_ops;
-	vq->vq.name = name;
 	vq->notify = notify;
 	vq->broken = false;
 	vq->last_used_idx = 0;
 	vq->num_added = 0;
-	list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
 	vq->in_use = false;
 #endif
 
-	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
-
 	/* No callback?  Tell other side not to bother us. */
 	if (!callback)
 		vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
@@ -408,7 +327,6 @@ EXPORT_SYMBOL_GPL(vring_new_virtqueue);
 
 void vring_del_virtqueue(struct virtqueue *vq)
 {
-	list_del(&vq->list);
 	kfree(to_vvq(vq));
 }
 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
@@ -420,8 +338,6 @@ void vring_transport_features(struct virtio_device *vdev)
 
 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
 		switch (i) {
-		case VIRTIO_RING_F_INDIRECT_DESC:
-			break;
 		default:
 			/* We don't understand this bit. */
 			clear_bit(i, vdev->features);
diff --git a/trunk/fs/Kconfig b/trunk/fs/Kconfig
index 525da2e8f73b..9f7270f36b2a 100644
--- a/trunk/fs/Kconfig
+++ b/trunk/fs/Kconfig
@@ -62,16 +62,6 @@ source "fs/autofs/Kconfig"
 source "fs/autofs4/Kconfig"
 source "fs/fuse/Kconfig"
 
-config CUSE
-	tristate "Character device in Userpace support"
-	depends on FUSE_FS
-	help
-	  This FUSE extension allows character devices to be
-	  implemented in userspace.
-
-	  If you want to develop or use userspace character device
-	  based on CUSE, answer Y or M.
-
 config GENERIC_ACL
 	bool
 	select FS_POSIX_ACL
diff --git a/trunk/fs/eventfd.c b/trunk/fs/eventfd.c
index 3f0e1974abdc..2a701d593d35 100644
--- a/trunk/fs/eventfd.c
+++ b/trunk/fs/eventfd.c
@@ -16,7 +16,6 @@
 #include <linux/anon_inodes.h>
 #include <linux/eventfd.h>
 #include <linux/syscalls.h>
-#include <linux/module.h>
 
 struct eventfd_ctx {
 	wait_queue_head_t wqh;
@@ -57,7 +56,6 @@ int eventfd_signal(struct file *file, int n)
 
 	return n;
 }
-EXPORT_SYMBOL_GPL(eventfd_signal);
 
 static int eventfd_release(struct inode *inode, struct file *file)
 {
@@ -199,7 +197,6 @@ struct file *eventfd_fget(int fd)
 
 	return file;
 }
-EXPORT_SYMBOL_GPL(eventfd_fget);
 
 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 {
diff --git a/trunk/fs/fuse/Makefile b/trunk/fs/fuse/Makefile
index e95eeb445e58..72437065f6ad 100644
--- a/trunk/fs/fuse/Makefile
+++ b/trunk/fs/fuse/Makefile
@@ -3,6 +3,5 @@
 #
 
 obj-$(CONFIG_FUSE_FS) += fuse.o
-obj-$(CONFIG_CUSE) += cuse.o
 
 fuse-objs := dev.o dir.o file.o inode.o control.o
diff --git a/trunk/fs/fuse/cuse.c b/trunk/fs/fuse/cuse.c
deleted file mode 100644
index de792dcf3274..000000000000
--- a/trunk/fs/fuse/cuse.c
+++ /dev/null
@@ -1,610 +0,0 @@
-/*
- * CUSE: Character device in Userspace
- *
- * Copyright (C) 2008-2009  SUSE Linux Products GmbH
- * Copyright (C) 2008-2009  Tejun Heo <tj@kernel.org>
- *
- * This file is released under the GPLv2.
- *
- * CUSE enables character devices to be implemented from userland much
- * like FUSE allows filesystems.  On initialization /dev/cuse is
- * created.  By opening the file and replying to the CUSE_INIT request
- * userland CUSE server can create a character device.  After that the
- * operation is very similar to FUSE.
- *
- * A CUSE instance involves the following objects.
- *
- * cuse_conn	: contains fuse_conn and serves as bonding structure
- * channel	: file handle connected to the userland CUSE server
- * cdev		: the implemented character device
- * dev		: generic device for cdev
- *
- * Note that 'channel' is what 'dev' is in FUSE.  As CUSE deals with
- * devices, it's called 'channel' to reduce confusion.
- *
- * channel determines when the character device dies.  When channel is
- * closed, everything begins to destruct.  The cuse_conn is taken off
- * the lookup table preventing further access from cdev, cdev and
- * generic device are removed and the base reference of cuse_conn is
- * put.
- *
- * On each open, the matching cuse_conn is looked up and if found an
- * additional reference is taken which is released when the file is
- * closed.
- */
-
-#include <linux/fuse.h>
-#include <linux/cdev.h>
-#include <linux/device.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/kdev_t.h>
-#include <linux/kthread.h>
-#include <linux/list.h>
-#include <linux/magic.h>
-#include <linux/miscdevice.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
-#include <linux/stat.h>
-
-#include "fuse_i.h"
-
-#define CUSE_CONNTBL_LEN	64
-
-struct cuse_conn {
-	struct list_head	list;	/* linked on cuse_conntbl */
-	struct fuse_conn	fc;	/* fuse connection */
-	struct cdev		*cdev;	/* associated character device */
-	struct device		*dev;	/* device representing @cdev */
-
-	/* init parameters, set once during initialization */
-	bool			unrestricted_ioctl;
-};
-
-static DEFINE_SPINLOCK(cuse_lock);		/* protects cuse_conntbl */
-static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN];
-static struct class *cuse_class;
-
-static struct cuse_conn *fc_to_cc(struct fuse_conn *fc)
-{
-	return container_of(fc, struct cuse_conn, fc);
-}
-
-static struct list_head *cuse_conntbl_head(dev_t devt)
-{
-	return &cuse_conntbl[(MAJOR(devt) + MINOR(devt)) % CUSE_CONNTBL_LEN];
-}
-
-
-/**************************************************************************
- * CUSE frontend operations
- *
- * These are file operations for the character device.
- *
- * On open, CUSE opens a file from the FUSE mnt and stores it to
- * private_data of the open file.  All other ops call FUSE ops on the
- * FUSE file.
- */
-
-static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
-			 loff_t *ppos)
-{
-	loff_t pos = 0;
-
-	return fuse_direct_io(file, buf, count, &pos, 0);
-}
-
-static ssize_t cuse_write(struct file *file, const char __user *buf,
-			  size_t count, loff_t *ppos)
-{
-	loff_t pos = 0;
-	/*
-	 * No locking or generic_write_checks(), the server is
-	 * responsible for locking and sanity checks.
-	 */
-	return fuse_direct_io(file, buf, count, &pos, 1);
-}
-
-static int cuse_open(struct inode *inode, struct file *file)
-{
-	dev_t devt = inode->i_cdev->dev;
-	struct cuse_conn *cc = NULL, *pos;
-	int rc;
-
-	/* look up and get the connection */
-	spin_lock(&cuse_lock);
-	list_for_each_entry(pos, cuse_conntbl_head(devt), list)
-		if (pos->dev->devt == devt) {
-			fuse_conn_get(&pos->fc);
-			cc = pos;
-			break;
-		}
-	spin_unlock(&cuse_lock);
-
-	/* dead? */
-	if (!cc)
-		return -ENODEV;
-
-	/*
-	 * Generic permission check is already done against the chrdev
-	 * file, proceed to open.
-	 */
-	rc = fuse_do_open(&cc->fc, 0, file, 0);
-	if (rc)
-		fuse_conn_put(&cc->fc);
-	return rc;
-}
-
-static int cuse_release(struct inode *inode, struct file *file)
-{
-	struct fuse_file *ff = file->private_data;
-	struct fuse_conn *fc = ff->fc;
-
-	fuse_sync_release(ff, file->f_flags);
-	fuse_conn_put(fc);
-
-	return 0;
-}
-
-static long cuse_file_ioctl(struct file *file, unsigned int cmd,
-			    unsigned long arg)
-{
-	struct fuse_file *ff = file->private_data;
-	struct cuse_conn *cc = fc_to_cc(ff->fc);
-	unsigned int flags = 0;
-
-	if (cc->unrestricted_ioctl)
-		flags |= FUSE_IOCTL_UNRESTRICTED;
-
-	return fuse_do_ioctl(file, cmd, arg, flags);
-}
-
-static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
-				   unsigned long arg)
-{
-	struct fuse_file *ff = file->private_data;
-	struct cuse_conn *cc = fc_to_cc(ff->fc);
-	unsigned int flags = FUSE_IOCTL_COMPAT;
-
-	if (cc->unrestricted_ioctl)
-		flags |= FUSE_IOCTL_UNRESTRICTED;
-
-	return fuse_do_ioctl(file, cmd, arg, flags);
-}
-
-static const struct file_operations cuse_frontend_fops = {
-	.owner			= THIS_MODULE,
-	.read			= cuse_read,
-	.write			= cuse_write,
-	.open			= cuse_open,
-	.release		= cuse_release,
-	.unlocked_ioctl		= cuse_file_ioctl,
-	.compat_ioctl		= cuse_file_compat_ioctl,
-	.poll			= fuse_file_poll,
-};
-
-
-/**************************************************************************
- * CUSE channel initialization and destruction
- */
-
-struct cuse_devinfo {
-	const char		*name;
-};
-
-/**
- * cuse_parse_one - parse one key=value pair
- * @pp: i/o parameter for the current position
- * @end: points to one past the end of the packed string
- * @keyp: out parameter for key
- * @valp: out parameter for value
- *
- * *@pp points to packed strings - "key0=val0\0key1=val1\0" which ends
- * at @end - 1.  This function parses one pair and set *@keyp to the
- * start of the key and *@valp to the start of the value.  Note that
- * the original string is modified such that the key string is
- * terminated with '\0'.  *@pp is updated to point to the next string.
- *
- * RETURNS:
- * 1 on successful parse, 0 on EOF, -errno on failure.
- */
-static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
-{
-	char *p = *pp;
-	char *key, *val;
-
-	while (p < end && *p == '\0')
-		p++;
-	if (p == end)
-		return 0;
-
-	if (end[-1] != '\0') {
-		printk(KERN_ERR "CUSE: info not properly terminated\n");
-		return -EINVAL;
-	}
-
-	key = val = p;
-	p += strlen(p);
-
-	if (valp) {
-		strsep(&val, "=");
-		if (!val)
-			val = key + strlen(key);
-		key = strstrip(key);
-		val = strstrip(val);
-	} else
-		key = strstrip(key);
-
-	if (!strlen(key)) {
-		printk(KERN_ERR "CUSE: zero length info key specified\n");
-		return -EINVAL;
-	}
-
-	*pp = p;
-	*keyp = key;
-	if (valp)
-		*valp = val;
-
-	return 1;
-}
-
-/**
- * cuse_parse_dev_info - parse device info
- * @p: device info string
- * @len: length of device info string
- * @devinfo: out parameter for parsed device info
- *
- * Parse @p to extract device info and store it into @devinfo.  String
- * pointed to by @p is modified by parsing and @devinfo points into
- * them, so @p shouldn't be freed while @devinfo is in use.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
-{
-	char *end = p + len;
-	char *key, *val;
-	int rc;
-
-	while (true) {
-		rc = cuse_parse_one(&p, end, &key, &val);
-		if (rc < 0)
-			return rc;
-		if (!rc)
-			break;
-		if (strcmp(key, "DEVNAME") == 0)
-			devinfo->name = val;
-		else
-			printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n",
-			       key);
-	}
-
-	if (!devinfo->name || !strlen(devinfo->name)) {
-		printk(KERN_ERR "CUSE: DEVNAME unspecified\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static void cuse_gendev_release(struct device *dev)
-{
-	kfree(dev);
-}
-
-/**
- * cuse_process_init_reply - finish initializing CUSE channel
- *
- * This function creates the character device and sets up all the
- * required data structures for it.  Please read the comment at the
- * top of this file for high level overview.
- */
-static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
-{
-	struct cuse_conn *cc = fc_to_cc(fc);
-	struct cuse_init_out *arg = &req->misc.cuse_init_out;
-	struct page *page = req->pages[0];
-	struct cuse_devinfo devinfo = { };
-	struct device *dev;
-	struct cdev *cdev;
-	dev_t devt;
-	int rc;
-
-	if (req->out.h.error ||
-	    arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
-		goto err;
-	}
-
-	fc->minor = arg->minor;
-	fc->max_read = max_t(unsigned, arg->max_read, 4096);
-	fc->max_write = max_t(unsigned, arg->max_write, 4096);
-
-	/* parse init reply */
-	cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL;
-
-	rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size,
-				&devinfo);
-	if (rc)
-		goto err;
-
-	/* determine and reserve devt */
-	devt = MKDEV(arg->dev_major, arg->dev_minor);
-	if (!MAJOR(devt))
-		rc = alloc_chrdev_region(&devt, MINOR(devt), 1, devinfo.name);
-	else
-		rc = register_chrdev_region(devt, 1, devinfo.name);
-	if (rc) {
-		printk(KERN_ERR "CUSE: failed to register chrdev region\n");
-		goto err;
-	}
-
-	/* devt determined, create device */
-	rc = -ENOMEM;
-	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (!dev)
-		goto err_region;
-
-	device_initialize(dev);
-	dev_set_uevent_suppress(dev, 1);
-	dev->class = cuse_class;
-	dev->devt = devt;
-	dev->release = cuse_gendev_release;
-	dev_set_drvdata(dev, cc);
-	dev_set_name(dev, "%s", devinfo.name);
-
-	rc = device_add(dev);
-	if (rc)
-		goto err_device;
-
-	/* register cdev */
-	rc = -ENOMEM;
-	cdev = cdev_alloc();
-	if (!cdev)
-		goto err_device;
-
-	cdev->owner = THIS_MODULE;
-	cdev->ops = &cuse_frontend_fops;
-
-	rc = cdev_add(cdev, devt, 1);
-	if (rc)
-		goto err_cdev;
-
-	cc->dev = dev;
-	cc->cdev = cdev;
-
-	/* make the device available */
-	spin_lock(&cuse_lock);
-	list_add(&cc->list, cuse_conntbl_head(devt));
-	spin_unlock(&cuse_lock);
-
-	/* announce device availability */
-	dev_set_uevent_suppress(dev, 0);
-	kobject_uevent(&dev->kobj, KOBJ_ADD);
-out:
-	__free_page(page);
-	return;
-
-err_cdev:
-	cdev_del(cdev);
-err_device:
-	put_device(dev);
-err_region:
-	unregister_chrdev_region(devt, 1);
-err:
-	fc->conn_error = 1;
-	goto out;
-}
-
-static int cuse_send_init(struct cuse_conn *cc)
-{
-	int rc;
-	struct fuse_req *req;
-	struct page *page;
-	struct fuse_conn *fc = &cc->fc;
-	struct cuse_init_in *arg;
-
-	BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
-
-	req = fuse_get_req(fc);
-	if (IS_ERR(req)) {
-		rc = PTR_ERR(req);
-		goto err;
-	}
-
-	rc = -ENOMEM;
-	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!page)
-		goto err_put_req;
-
-	arg = &req->misc.cuse_init_in;
-	arg->major = FUSE_KERNEL_VERSION;
-	arg->minor = FUSE_KERNEL_MINOR_VERSION;
-	arg->flags |= CUSE_UNRESTRICTED_IOCTL;
-	req->in.h.opcode = CUSE_INIT;
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(struct cuse_init_in);
-	req->in.args[0].value = arg;
-	req->out.numargs = 2;
-	req->out.args[0].size = sizeof(struct cuse_init_out);
-	req->out.args[0].value = &req->misc.cuse_init_out;
-	req->out.args[1].size = CUSE_INIT_INFO_MAX;
-	req->out.argvar = 1;
-	req->out.argpages = 1;
-	req->pages[0] = page;
-	req->num_pages = 1;
-	req->end = cuse_process_init_reply;
-	fuse_request_send_background(fc, req);
-
-	return 0;
-
-err_put_req:
-	fuse_put_request(fc, req);
-err:
-	return rc;
-}
-
-static void cuse_fc_release(struct fuse_conn *fc)
-{
-	struct cuse_conn *cc = fc_to_cc(fc);
-	kfree(cc);
-}
-
-/**
- * cuse_channel_open - open method for /dev/cuse
- * @inode: inode for /dev/cuse
- * @file: file struct being opened
- *
- * Userland CUSE server can create a CUSE device by opening /dev/cuse
- * and replying to the initilaization request kernel sends.  This
- * function is responsible for handling CUSE device initialization.
- * Because the fd opened by this function is used during
- * initialization, this function only creates cuse_conn and sends
- * init.  The rest is delegated to a kthread.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-static int cuse_channel_open(struct inode *inode, struct file *file)
-{
-	struct cuse_conn *cc;
-	int rc;
-
-	/* set up cuse_conn */
-	cc = kzalloc(sizeof(*cc), GFP_KERNEL);
-	if (!cc)
-		return -ENOMEM;
-
-	fuse_conn_init(&cc->fc);
-
-	INIT_LIST_HEAD(&cc->list);
-	cc->fc.release = cuse_fc_release;
-
-	cc->fc.connected = 1;
-	cc->fc.blocked = 0;
-	rc = cuse_send_init(cc);
-	if (rc) {
-		fuse_conn_put(&cc->fc);
-		return rc;
-	}
-	file->private_data = &cc->fc;	/* channel owns base reference to cc */
-
-	return 0;
-}
-
-/**
- * cuse_channel_release - release method for /dev/cuse
- * @inode: inode for /dev/cuse
- * @file: file struct being closed
- *
- * Disconnect the channel, deregister CUSE device and initiate
- * destruction by putting the default reference.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-static int cuse_channel_release(struct inode *inode, struct file *file)
-{
-	struct cuse_conn *cc = fc_to_cc(file->private_data);
-	int rc;
-
-	/* remove from the conntbl, no more access from this point on */
-	spin_lock(&cuse_lock);
-	list_del_init(&cc->list);
-	spin_unlock(&cuse_lock);
-
-	/* remove device */
-	if (cc->dev)
-		device_unregister(cc->dev);
-	if (cc->cdev) {
-		unregister_chrdev_region(cc->cdev->dev, 1);
-		cdev_del(cc->cdev);
-	}
-
-	/* kill connection and shutdown channel */
-	fuse_conn_kill(&cc->fc);
-	rc = fuse_dev_release(inode, file);	/* puts the base reference */
-
-	return rc;
-}
-
-static struct file_operations cuse_channel_fops; /* initialized during init */
-
-
-/**************************************************************************
- * Misc stuff and module initializatiion
- *
- * CUSE exports the same set of attributes to sysfs as fusectl.
- */
-
-static ssize_t cuse_class_waiting_show(struct device *dev,
-				       struct device_attribute *attr, char *buf)
-{
-	struct cuse_conn *cc = dev_get_drvdata(dev);
-
-	return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting));
-}
-
-static ssize_t cuse_class_abort_store(struct device *dev,
-				      struct device_attribute *attr,
-				      const char *buf, size_t count)
-{
-	struct cuse_conn *cc = dev_get_drvdata(dev);
-
-	fuse_abort_conn(&cc->fc);
-	return count;
-}
-
-static struct device_attribute cuse_class_dev_attrs[] = {
-	__ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL),
-	__ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store),
-	{ }
-};
-
-static struct miscdevice cuse_miscdev = {
-	.minor		= MISC_DYNAMIC_MINOR,
-	.name		= "cuse",
-	.fops		= &cuse_channel_fops,
-};
-
-static int __init cuse_init(void)
-{
-	int i, rc;
-
-	/* init conntbl */
-	for (i = 0; i < CUSE_CONNTBL_LEN; i++)
-		INIT_LIST_HEAD(&cuse_conntbl[i]);
-
-	/* inherit and extend fuse_dev_operations */
-	cuse_channel_fops		= fuse_dev_operations;
-	cuse_channel_fops.owner		= THIS_MODULE;
-	cuse_channel_fops.open		= cuse_channel_open;
-	cuse_channel_fops.release	= cuse_channel_release;
-
-	cuse_class = class_create(THIS_MODULE, "cuse");
-	if (IS_ERR(cuse_class))
-		return PTR_ERR(cuse_class);
-
-	cuse_class->dev_attrs = cuse_class_dev_attrs;
-
-	rc = misc_register(&cuse_miscdev);
-	if (rc) {
-		class_destroy(cuse_class);
-		return rc;
-	}
-
-	return 0;
-}
-
-static void __exit cuse_exit(void)
-{
-	misc_deregister(&cuse_miscdev);
-	class_destroy(cuse_class);
-}
-
-module_init(cuse_init);
-module_exit(cuse_exit);
-
-MODULE_AUTHOR("Tejun Heo <tj@kernel.org>");
-MODULE_DESCRIPTION("Character device in Userspace");
-MODULE_LICENSE("GPL");
diff --git a/trunk/fs/fuse/dev.c b/trunk/fs/fuse/dev.c
index 8fed2ed12f38..ba76b68c52ff 100644
--- a/trunk/fs/fuse/dev.c
+++ b/trunk/fs/fuse/dev.c
@@ -46,7 +46,6 @@ struct fuse_req *fuse_request_alloc(void)
 		fuse_request_init(req);
 	return req;
 }
-EXPORT_SYMBOL_GPL(fuse_request_alloc);
 
 struct fuse_req *fuse_request_alloc_nofs(void)
 {
@@ -125,7 +124,6 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
 	atomic_dec(&fc->num_waiting);
 	return ERR_PTR(err);
 }
-EXPORT_SYMBOL_GPL(fuse_get_req);
 
 /*
  * Return request in fuse_file->reserved_req.  However that may
@@ -210,7 +208,6 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 			fuse_request_free(req);
 	}
 }
-EXPORT_SYMBOL_GPL(fuse_put_request);
 
 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 {
@@ -285,7 +282,7 @@ __releases(&fc->lock)
 			wake_up_all(&fc->blocked_waitq);
 		}
 		if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
-		    fc->connected && fc->bdi_initialized) {
+		    fc->connected) {
 			clear_bdi_congested(&fc->bdi, READ);
 			clear_bdi_congested(&fc->bdi, WRITE);
 		}
@@ -403,7 +400,6 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 	}
 	spin_unlock(&fc->lock);
 }
-EXPORT_SYMBOL_GPL(fuse_request_send);
 
 static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 					    struct fuse_req *req)
@@ -412,8 +408,7 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 	fc->num_background++;
 	if (fc->num_background == FUSE_MAX_BACKGROUND)
 		fc->blocked = 1;
-	if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
-	    fc->bdi_initialized) {
+	if (fc->num_background == FUSE_CONGESTION_THRESHOLD) {
 		set_bdi_congested(&fc->bdi, READ);
 		set_bdi_congested(&fc->bdi, WRITE);
 	}
@@ -444,7 +439,6 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 	req->isreply = 1;
 	fuse_request_send_nowait(fc, req);
 }
-EXPORT_SYMBOL_GPL(fuse_request_send_background);
 
 /*
  * Called under fc->lock
@@ -1111,9 +1105,8 @@ void fuse_abort_conn(struct fuse_conn *fc)
 	}
 	spin_unlock(&fc->lock);
 }
-EXPORT_SYMBOL_GPL(fuse_abort_conn);
 
-int fuse_dev_release(struct inode *inode, struct file *file)
+static int fuse_dev_release(struct inode *inode, struct file *file)
 {
 	struct fuse_conn *fc = fuse_get_conn(file);
 	if (fc) {
@@ -1127,7 +1120,6 @@ int fuse_dev_release(struct inode *inode, struct file *file)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(fuse_dev_release);
 
 static int fuse_dev_fasync(int fd, struct file *file, int on)
 {
@@ -1150,7 +1142,6 @@ const struct file_operations fuse_dev_operations = {
 	.release	= fuse_dev_release,
 	.fasync		= fuse_dev_fasync,
 };
-EXPORT_SYMBOL_GPL(fuse_dev_operations);
 
 static struct miscdevice fuse_miscdevice = {
 	.minor = FUSE_MINOR,
diff --git a/trunk/fs/fuse/dir.c b/trunk/fs/fuse/dir.c
index b3089a083d30..8b8eebc5614b 100644
--- a/trunk/fs/fuse/dir.c
+++ b/trunk/fs/fuse/dir.c
@@ -361,6 +361,19 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	return ERR_PTR(err);
 }
 
+/*
+ * Synchronous release for the case when something goes wrong in CREATE_OPEN
+ */
+static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff,
+			      u64 nodeid, int flags)
+{
+	fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE);
+	ff->reserved_req->force = 1;
+	fuse_request_send(fc, ff->reserved_req);
+	fuse_put_request(fc, ff->reserved_req);
+	kfree(ff);
+}
+
 /*
  * Atomic create+open operation
  *
@@ -432,14 +445,12 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 		goto out_free_ff;
 
 	fuse_put_request(fc, req);
-	ff->fh = outopen.fh;
-	ff->nodeid = outentry.nodeid;
-	ff->open_flags = outopen.open_flags;
 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
 	if (!inode) {
 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
-		fuse_sync_release(ff, flags);
+		ff->fh = outopen.fh;
+		fuse_sync_release(fc, ff, outentry.nodeid, flags);
 		fuse_send_forget(fc, forget_req, outentry.nodeid, 1);
 		return -ENOMEM;
 	}
@@ -449,11 +460,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 	fuse_invalidate_attr(dir);
 	file = lookup_instantiate_filp(nd, entry, generic_file_open);
 	if (IS_ERR(file)) {
-		fuse_sync_release(ff, flags);
+		ff->fh = outopen.fh;
+		fuse_sync_release(fc, ff, outentry.nodeid, flags);
 		return PTR_ERR(file);
 	}
-	file->private_data = fuse_file_get(ff);
-	fuse_finish_open(inode, file);
+	fuse_finish_open(inode, file, ff, &outopen);
 	return 0;
 
  out_free_ff:
@@ -1024,7 +1035,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
 	req->out.argpages = 1;
 	req->num_pages = 1;
 	req->pages[0] = page;
-	fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR);
+	fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR);
 	fuse_request_send(fc, req);
 	nbytes = req->out.args[0].size;
 	err = req->out.h.error;
@@ -1090,14 +1101,12 @@ static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
 
 static int fuse_dir_open(struct inode *inode, struct file *file)
 {
-	return fuse_open_common(inode, file, true);
+	return fuse_open_common(inode, file, 1);
 }
 
 static int fuse_dir_release(struct inode *inode, struct file *file)
 {
-	fuse_release_common(file, FUSE_RELEASEDIR);
-
-	return 0;
+	return fuse_release_common(inode, file, 1);
 }
 
 static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
diff --git a/trunk/fs/fuse/file.c b/trunk/fs/fuse/file.c
index fce6ce694fde..06f30e965676 100644
--- a/trunk/fs/fuse/file.c
+++ b/trunk/fs/fuse/file.c
@@ -12,13 +12,13 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/module.h>
 
 static const struct file_operations fuse_direct_io_file_operations;
 
-static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
-			  int opcode, struct fuse_open_out *outargp)
+static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
+			  struct fuse_open_out *outargp)
 {
+	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_open_in inarg;
 	struct fuse_req *req;
 	int err;
@@ -31,8 +31,8 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 	inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
 	if (!fc->atomic_o_trunc)
 		inarg.flags &= ~O_TRUNC;
-	req->in.h.opcode = opcode;
-	req->in.h.nodeid = nodeid;
+	req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
+	req->in.h.nodeid = get_node_id(inode);
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(inarg);
 	req->in.args[0].value = &inarg;
@@ -49,27 +49,22 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 {
 	struct fuse_file *ff;
-
 	ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
-	if (unlikely(!ff))
-		return NULL;
-
-	ff->fc = fc;
-	ff->reserved_req = fuse_request_alloc();
-	if (unlikely(!ff->reserved_req)) {
-		kfree(ff);
-		return NULL;
+	if (ff) {
+		ff->reserved_req = fuse_request_alloc();
+		if (!ff->reserved_req) {
+			kfree(ff);
+			return NULL;
+		} else {
+			INIT_LIST_HEAD(&ff->write_entry);
+			atomic_set(&ff->count, 0);
+			spin_lock(&fc->lock);
+			ff->kh = ++fc->khctr;
+			spin_unlock(&fc->lock);
+		}
+		RB_CLEAR_NODE(&ff->polled_node);
+		init_waitqueue_head(&ff->poll_wait);
 	}
-
-	INIT_LIST_HEAD(&ff->write_entry);
-	atomic_set(&ff->count, 0);
-	RB_CLEAR_NODE(&ff->polled_node);
-	init_waitqueue_head(&ff->poll_wait);
-
-	spin_lock(&fc->lock);
-	ff->kh = ++fc->khctr;
-	spin_unlock(&fc->lock);
-
 	return ff;
 }
 
@@ -79,7 +74,7 @@ void fuse_file_free(struct fuse_file *ff)
 	kfree(ff);
 }
 
-struct fuse_file *fuse_file_get(struct fuse_file *ff)
+static struct fuse_file *fuse_file_get(struct fuse_file *ff)
 {
 	atomic_inc(&ff->count);
 	return ff;
@@ -87,65 +82,40 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
 
 static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
 {
-	path_put(&req->misc.release.path);
+	dput(req->misc.release.dentry);
+	mntput(req->misc.release.vfsmount);
 }
 
 static void fuse_file_put(struct fuse_file *ff)
 {
 	if (atomic_dec_and_test(&ff->count)) {
 		struct fuse_req *req = ff->reserved_req;
-
+		struct inode *inode = req->misc.release.dentry->d_inode;
+		struct fuse_conn *fc = get_fuse_conn(inode);
 		req->end = fuse_release_end;
-		fuse_request_send_background(ff->fc, req);
+		fuse_request_send_background(fc, req);
 		kfree(ff);
 	}
 }
 
-int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
-		 bool isdir)
+void fuse_finish_open(struct inode *inode, struct file *file,
+		      struct fuse_file *ff, struct fuse_open_out *outarg)
 {
-	struct fuse_open_out outarg;
-	struct fuse_file *ff;
-	int err;
-	int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
-
-	ff = fuse_file_alloc(fc);
-	if (!ff)
-		return -ENOMEM;
-
-	err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
-	if (err) {
-		fuse_file_free(ff);
-		return err;
-	}
-
-	if (isdir)
-		outarg.open_flags &= ~FOPEN_DIRECT_IO;
-
-	ff->fh = outarg.fh;
-	ff->nodeid = nodeid;
-	ff->open_flags = outarg.open_flags;
-	file->private_data = fuse_file_get(ff);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(fuse_do_open);
-
-void fuse_finish_open(struct inode *inode, struct file *file)
-{
-	struct fuse_file *ff = file->private_data;
-
-	if (ff->open_flags & FOPEN_DIRECT_IO)
+	if (outarg->open_flags & FOPEN_DIRECT_IO)
 		file->f_op = &fuse_direct_io_file_operations;
-	if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+	if (!(outarg->open_flags & FOPEN_KEEP_CACHE))
 		invalidate_inode_pages2(inode->i_mapping);
-	if (ff->open_flags & FOPEN_NONSEEKABLE)
+	if (outarg->open_flags & FOPEN_NONSEEKABLE)
 		nonseekable_open(inode, file);
+	ff->fh = outarg->fh;
+	file->private_data = fuse_file_get(ff);
 }
 
-int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
+int fuse_open_common(struct inode *inode, struct file *file, int isdir)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_open_out outarg;
+	struct fuse_file *ff;
 	int err;
 
 	/* VFS checks this, but only _after_ ->open() */
@@ -156,85 +126,78 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
 	if (err)
 		return err;
 
-	err = fuse_do_open(fc, get_node_id(inode), file, isdir);
-	if (err)
-		return err;
+	ff = fuse_file_alloc(fc);
+	if (!ff)
+		return -ENOMEM;
 
-	fuse_finish_open(inode, file);
+	err = fuse_send_open(inode, file, isdir, &outarg);
+	if (err)
+		fuse_file_free(ff);
+	else {
+		if (isdir)
+			outarg.open_flags &= ~FOPEN_DIRECT_IO;
+		fuse_finish_open(inode, file, ff, &outarg);
+	}
 
-	return 0;
+	return err;
 }
 
-static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
+void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode)
 {
-	struct fuse_conn *fc = ff->fc;
 	struct fuse_req *req = ff->reserved_req;
 	struct fuse_release_in *inarg = &req->misc.release.in;
 
-	spin_lock(&fc->lock);
-	list_del(&ff->write_entry);
-	if (!RB_EMPTY_NODE(&ff->polled_node))
-		rb_erase(&ff->polled_node, &fc->polled_files);
-	spin_unlock(&fc->lock);
-
-	wake_up_interruptible_sync(&ff->poll_wait);
-
 	inarg->fh = ff->fh;
 	inarg->flags = flags;
 	req->in.h.opcode = opcode;
-	req->in.h.nodeid = ff->nodeid;
+	req->in.h.nodeid = nodeid;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(struct fuse_release_in);
 	req->in.args[0].value = inarg;
 }
 
-void fuse_release_common(struct file *file, int opcode)
+int fuse_release_common(struct inode *inode, struct file *file, int isdir)
 {
-	struct fuse_file *ff;
-	struct fuse_req *req;
+	struct fuse_file *ff = file->private_data;
+	if (ff) {
+		struct fuse_conn *fc = get_fuse_conn(inode);
+		struct fuse_req *req = ff->reserved_req;
 
-	ff = file->private_data;
-	if (unlikely(!ff))
-		return;
+		fuse_release_fill(ff, get_node_id(inode), file->f_flags,
+				  isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
 
-	req = ff->reserved_req;
-	fuse_prepare_release(ff, file->f_flags, opcode);
+		/* Hold vfsmount and dentry until release is finished */
+		req->misc.release.vfsmount = mntget(file->f_path.mnt);
+		req->misc.release.dentry = dget(file->f_path.dentry);
+
+		spin_lock(&fc->lock);
+		list_del(&ff->write_entry);
+		if (!RB_EMPTY_NODE(&ff->polled_node))
+			rb_erase(&ff->polled_node, &fc->polled_files);
+		spin_unlock(&fc->lock);
 
-	/* Hold vfsmount and dentry until release is finished */
-	path_get(&file->f_path);
-	req->misc.release.path = file->f_path;
+		wake_up_interruptible_sync(&ff->poll_wait);
+		/*
+		 * Normally this will send the RELEASE request,
+		 * however if some asynchronous READ or WRITE requests
+		 * are outstanding, the sending will be delayed
+		 */
+		fuse_file_put(ff);
+	}
 
-	/*
-	 * Normally this will send the RELEASE request, however if
-	 * some asynchronous READ or WRITE requests are outstanding,
-	 * the sending will be delayed.
-	 */
-	fuse_file_put(ff);
+	/* Return value is ignored by VFS */
+	return 0;
 }
 
 static int fuse_open(struct inode *inode, struct file *file)
 {
-	return fuse_open_common(inode, file, false);
+	return fuse_open_common(inode, file, 0);
 }
 
 static int fuse_release(struct inode *inode, struct file *file)
 {
-	fuse_release_common(file, FUSE_RELEASE);
-
-	/* return value is ignored by VFS */
-	return 0;
-}
-
-void fuse_sync_release(struct fuse_file *ff, int flags)
-{
-	WARN_ON(atomic_read(&ff->count) > 1);
-	fuse_prepare_release(ff, flags, FUSE_RELEASE);
-	ff->reserved_req->force = 1;
-	fuse_request_send(ff->fc, ff->reserved_req);
-	fuse_put_request(ff->fc, ff->reserved_req);
-	kfree(ff);
+	return fuse_release_common(inode, file, 0);
 }
-EXPORT_SYMBOL_GPL(fuse_sync_release);
 
 /*
  * Scramble the ID space with XTEA, so that the value of the files_struct
@@ -408,8 +371,8 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
 	return fuse_fsync_common(file, de, datasync, 0);
 }
 
-void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
-		    size_t count, int opcode)
+void fuse_read_fill(struct fuse_req *req, struct file *file,
+		    struct inode *inode, loff_t pos, size_t count, int opcode)
 {
 	struct fuse_read_in *inarg = &req->misc.read.in;
 	struct fuse_file *ff = file->private_data;
@@ -419,7 +382,7 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
 	inarg->size = count;
 	inarg->flags = file->f_flags;
 	req->in.h.opcode = opcode;
-	req->in.h.nodeid = ff->nodeid;
+	req->in.h.nodeid = get_node_id(inode);
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(struct fuse_read_in);
 	req->in.args[0].value = inarg;
@@ -429,12 +392,12 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
 }
 
 static size_t fuse_send_read(struct fuse_req *req, struct file *file,
-			     loff_t pos, size_t count, fl_owner_t owner)
+			     struct inode *inode, loff_t pos, size_t count,
+			     fl_owner_t owner)
 {
-	struct fuse_file *ff = file->private_data;
-	struct fuse_conn *fc = ff->fc;
+	struct fuse_conn *fc = get_fuse_conn(inode);
 
-	fuse_read_fill(req, file, pos, count, FUSE_READ);
+	fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
 	if (owner != NULL) {
 		struct fuse_read_in *inarg = &req->misc.read.in;
 
@@ -492,7 +455,7 @@ static int fuse_readpage(struct file *file, struct page *page)
 	req->out.argpages = 1;
 	req->num_pages = 1;
 	req->pages[0] = page;
-	num_read = fuse_send_read(req, file, pos, count, NULL);
+	num_read = fuse_send_read(req, file, inode, pos, count, NULL);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 
@@ -541,18 +504,19 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
 		fuse_file_put(req->ff);
 }
 
-static void fuse_send_readpages(struct fuse_req *req, struct file *file)
+static void fuse_send_readpages(struct fuse_req *req, struct file *file,
+				struct inode *inode)
 {
-	struct fuse_file *ff = file->private_data;
-	struct fuse_conn *fc = ff->fc;
+	struct fuse_conn *fc = get_fuse_conn(inode);
 	loff_t pos = page_offset(req->pages[0]);
 	size_t count = req->num_pages << PAGE_CACHE_SHIFT;
 
 	req->out.argpages = 1;
 	req->out.page_zeroing = 1;
-	fuse_read_fill(req, file, pos, count, FUSE_READ);
+	fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
 	req->misc.read.attr_ver = fuse_get_attr_version(fc);
 	if (fc->async_read) {
+		struct fuse_file *ff = file->private_data;
 		req->ff = fuse_file_get(ff);
 		req->end = fuse_readpages_end;
 		fuse_request_send_background(fc, req);
@@ -582,7 +546,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
 	    (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
 	     (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
 	     req->pages[req->num_pages - 1]->index + 1 != page->index)) {
-		fuse_send_readpages(req, data->file);
+		fuse_send_readpages(req, data->file, inode);
 		data->req = req = fuse_get_req(fc);
 		if (IS_ERR(req)) {
 			unlock_page(page);
@@ -616,7 +580,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
 	if (!err) {
 		if (data.req->num_pages)
-			fuse_send_readpages(data.req, file);
+			fuse_send_readpages(data.req, file, inode);
 		else
 			fuse_put_request(fc, data.req);
 	}
@@ -643,19 +607,24 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	return generic_file_aio_read(iocb, iov, nr_segs, pos);
 }
 
-static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
-			    loff_t pos, size_t count)
+static void fuse_write_fill(struct fuse_req *req, struct file *file,
+			    struct fuse_file *ff, struct inode *inode,
+			    loff_t pos, size_t count, int writepage)
 {
+	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_write_in *inarg = &req->misc.write.in;
 	struct fuse_write_out *outarg = &req->misc.write.out;
 
+	memset(inarg, 0, sizeof(struct fuse_write_in));
 	inarg->fh = ff->fh;
 	inarg->offset = pos;
 	inarg->size = count;
+	inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0;
+	inarg->flags = file ? file->f_flags : 0;
 	req->in.h.opcode = FUSE_WRITE;
-	req->in.h.nodeid = ff->nodeid;
+	req->in.h.nodeid = get_node_id(inode);
 	req->in.numargs = 2;
-	if (ff->fc->minor < 9)
+	if (fc->minor < 9)
 		req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
 	else
 		req->in.args[0].size = sizeof(struct fuse_write_in);
@@ -667,15 +636,13 @@ static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
 }
 
 static size_t fuse_send_write(struct fuse_req *req, struct file *file,
-			      loff_t pos, size_t count, fl_owner_t owner)
+			      struct inode *inode, loff_t pos, size_t count,
+			      fl_owner_t owner)
 {
-	struct fuse_file *ff = file->private_data;
-	struct fuse_conn *fc = ff->fc;
-	struct fuse_write_in *inarg = &req->misc.write.in;
-
-	fuse_write_fill(req, ff, pos, count);
-	inarg->flags = file->f_flags;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	fuse_write_fill(req, file, file->private_data, inode, pos, count, 0);
 	if (owner != NULL) {
+		struct fuse_write_in *inarg = &req->misc.write.in;
 		inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
 		inarg->lock_owner = fuse_lock_owner_id(fc, owner);
 	}
@@ -733,7 +700,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
 	req->num_pages = 1;
 	req->pages[0] = page;
 	req->page_offset = offset;
-	nres = fuse_send_write(req, file, pos, count, NULL);
+	nres = fuse_send_write(req, file, inode, pos, count, NULL);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (!err && !nres)
@@ -774,7 +741,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
 	for (i = 0; i < req->num_pages; i++)
 		fuse_wait_on_page_writeback(inode, req->pages[i]->index);
 
-	res = fuse_send_write(req, file, pos, count, NULL);
+	res = fuse_send_write(req, file, inode, pos, count, NULL);
 
 	offset = req->page_offset;
 	count = res;
@@ -1012,23 +979,25 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
 	return 0;
 }
 
-ssize_t fuse_direct_io(struct file *file, const char __user *buf,
-		       size_t count, loff_t *ppos, int write)
+static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+			      size_t count, loff_t *ppos, int write)
 {
-	struct fuse_file *ff = file->private_data;
-	struct fuse_conn *fc = ff->fc;
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
 	size_t nmax = write ? fc->max_write : fc->max_read;
 	loff_t pos = *ppos;
 	ssize_t res = 0;
 	struct fuse_req *req;
 
+	if (is_bad_inode(inode))
+		return -EIO;
+
 	req = fuse_get_req(fc);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
 	while (count) {
 		size_t nres;
-		fl_owner_t owner = current->files;
 		size_t nbytes = min(count, nmax);
 		int err = fuse_get_user_pages(req, buf, &nbytes, write);
 		if (err) {
@@ -1037,10 +1006,11 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 		}
 
 		if (write)
-			nres = fuse_send_write(req, file, pos, nbytes, owner);
+			nres = fuse_send_write(req, file, inode, pos, nbytes,
+					       current->files);
 		else
-			nres = fuse_send_read(req, file, pos, nbytes, owner);
-
+			nres = fuse_send_read(req, file, inode, pos, nbytes,
+					      current->files);
 		fuse_release_user_pages(req, !write);
 		if (req->out.h.error) {
 			if (!res)
@@ -1064,27 +1034,20 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 		}
 	}
 	fuse_put_request(fc, req);
-	if (res > 0)
+	if (res > 0) {
+		if (write)
+			fuse_write_update_size(inode, pos);
 		*ppos = pos;
+	}
+	fuse_invalidate_attr(inode);
 
 	return res;
 }
-EXPORT_SYMBOL_GPL(fuse_direct_io);
 
 static ssize_t fuse_direct_read(struct file *file, char __user *buf,
 				     size_t count, loff_t *ppos)
 {
-	ssize_t res;
-	struct inode *inode = file->f_path.dentry->d_inode;
-
-	if (is_bad_inode(inode))
-		return -EIO;
-
-	res = fuse_direct_io(file, buf, count, ppos, 0);
-
-	fuse_invalidate_attr(inode);
-
-	return res;
+	return fuse_direct_io(file, buf, count, ppos, 0);
 }
 
 static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
@@ -1092,22 +1055,12 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
 {
 	struct inode *inode = file->f_path.dentry->d_inode;
 	ssize_t res;
-
-	if (is_bad_inode(inode))
-		return -EIO;
-
 	/* Don't allow parallel writes to the same file */
 	mutex_lock(&inode->i_mutex);
 	res = generic_write_checks(file, ppos, &count, 0);
-	if (!res) {
+	if (!res)
 		res = fuse_direct_io(file, buf, count, ppos, 1);
-		if (res > 0)
-			fuse_write_update_size(inode, *ppos);
-	}
 	mutex_unlock(&inode->i_mutex);
-
-	fuse_invalidate_attr(inode);
-
 	return res;
 }
 
@@ -1224,10 +1177,9 @@ static int fuse_writepage_locked(struct page *page)
 	req->ff = fuse_file_get(ff);
 	spin_unlock(&fc->lock);
 
-	fuse_write_fill(req, ff, page_offset(page), 0);
+	fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1);
 
 	copy_highpage(tmp_page, page);
-	req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
 	req->in.argpages = 1;
 	req->num_pages = 1;
 	req->pages[0] = tmp_page;
@@ -1651,11 +1603,12 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
  * limits ioctl data transfers to well-formed ioctls and is the forced
  * behavior for all FUSE servers.
  */
-long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
-		   unsigned int flags)
+static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
+			       unsigned long arg, unsigned int flags)
 {
+	struct inode *inode = file->f_dentry->d_inode;
 	struct fuse_file *ff = file->private_data;
-	struct fuse_conn *fc = ff->fc;
+	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_ioctl_in inarg = {
 		.fh = ff->fh,
 		.cmd = cmd,
@@ -1674,6 +1627,13 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 	/* assume all the iovs returned by client always fits in a page */
 	BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
 
+	if (!fuse_allow_task(fc, current))
+		return -EACCES;
+
+	err = -EIO;
+	if (is_bad_inode(inode))
+		goto out;
+
 	err = -ENOMEM;
 	pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
 	iov_page = alloc_page(GFP_KERNEL);
@@ -1734,7 +1694,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 
 	/* okay, let's send it to the client */
 	req->in.h.opcode = FUSE_IOCTL;
-	req->in.h.nodeid = ff->nodeid;
+	req->in.h.nodeid = get_node_id(inode);
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(inarg);
 	req->in.args[0].value = &inarg;
@@ -1817,33 +1777,17 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 
 	return err ? err : outarg.result;
 }
-EXPORT_SYMBOL_GPL(fuse_do_ioctl);
-
-static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
-				   unsigned long arg, unsigned int flags)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct fuse_conn *fc = get_fuse_conn(inode);
-
-	if (!fuse_allow_task(fc, current))
-		return -EACCES;
-
-	if (is_bad_inode(inode))
-		return -EIO;
-
-	return fuse_do_ioctl(file, cmd, arg, flags);
-}
 
 static long fuse_file_ioctl(struct file *file, unsigned int cmd,
 			    unsigned long arg)
 {
-	return fuse_file_ioctl_common(file, cmd, arg, 0);
+	return fuse_file_do_ioctl(file, cmd, arg, 0);
 }
 
 static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
 				   unsigned long arg)
 {
-	return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
+	return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT);
 }
 
 /*
@@ -1897,10 +1841,11 @@ static void fuse_register_polled_file(struct fuse_conn *fc,
 	spin_unlock(&fc->lock);
 }
 
-unsigned fuse_file_poll(struct file *file, poll_table *wait)
+static unsigned fuse_file_poll(struct file *file, poll_table *wait)
 {
+	struct inode *inode = file->f_dentry->d_inode;
 	struct fuse_file *ff = file->private_data;
-	struct fuse_conn *fc = ff->fc;
+	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
 	struct fuse_poll_out outarg;
 	struct fuse_req *req;
@@ -1925,7 +1870,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
 		return PTR_ERR(req);
 
 	req->in.h.opcode = FUSE_POLL;
-	req->in.h.nodeid = ff->nodeid;
+	req->in.h.nodeid = get_node_id(inode);
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(inarg);
 	req->in.args[0].value = &inarg;
@@ -1944,7 +1889,6 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
 	}
 	return POLLERR;
 }
-EXPORT_SYMBOL_GPL(fuse_file_poll);
 
 /*
  * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and
diff --git a/trunk/fs/fuse/fuse_i.h b/trunk/fs/fuse/fuse_i.h
index aaf2f9ff970e..6fc5aedaa0d5 100644
--- a/trunk/fs/fuse/fuse_i.h
+++ b/trunk/fs/fuse/fuse_i.h
@@ -97,13 +97,8 @@ struct fuse_inode {
 	struct list_head writepages;
 };
 
-struct fuse_conn;
-
 /** FUSE specific file data */
 struct fuse_file {
-	/** Fuse connection for this file */
-	struct fuse_conn *fc;
-
 	/** Request reserved for flush and release */
 	struct fuse_req *reserved_req;
 
@@ -113,15 +108,9 @@ struct fuse_file {
 	/** File handle used by userspace */
 	u64 fh;
 
-	/** Node id of this file */
-	u64 nodeid;
-
 	/** Refcount */
 	atomic_t count;
 
-	/** FOPEN_* flags returned by open */
-	u32 open_flags;
-
 	/** Entry on inode's write_files list */
 	struct list_head write_entry;
 
@@ -196,6 +185,8 @@ enum fuse_req_state {
 	FUSE_REQ_FINISHED
 };
 
+struct fuse_conn;
+
 /**
  * A request to the client
  */
@@ -257,12 +248,11 @@ struct fuse_req {
 		struct fuse_forget_in forget_in;
 		struct {
 			struct fuse_release_in in;
-			struct path path;
+			struct vfsmount *vfsmount;
+			struct dentry *dentry;
 		} release;
 		struct fuse_init_in init_in;
 		struct fuse_init_out init_out;
-		struct cuse_init_in cuse_init_in;
-		struct cuse_init_out cuse_init_out;
 		struct {
 			struct fuse_read_in in;
 			u64 attr_ver;
@@ -396,9 +386,6 @@ struct fuse_conn {
 	/** Filesystem supports NFS exporting.  Only set in INIT */
 	unsigned export_support:1;
 
-	/** Set if bdi is valid */
-	unsigned bdi_initialized:1;
-
 	/*
 	 * The following bitfields are only for optimization purposes
 	 * and hence races in setting them will not cause malfunction
@@ -528,24 +515,25 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
  * Initialize READ or READDIR request
  */
 void fuse_read_fill(struct fuse_req *req, struct file *file,
-		    loff_t pos, size_t count, int opcode);
+		    struct inode *inode, loff_t pos, size_t count, int opcode);
 
 /**
  * Send OPEN or OPENDIR request
  */
-int fuse_open_common(struct inode *inode, struct file *file, bool isdir);
+int fuse_open_common(struct inode *inode, struct file *file, int isdir);
 
 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc);
-struct fuse_file *fuse_file_get(struct fuse_file *ff);
 void fuse_file_free(struct fuse_file *ff);
-void fuse_finish_open(struct inode *inode, struct file *file);
+void fuse_finish_open(struct inode *inode, struct file *file,
+		      struct fuse_file *ff, struct fuse_open_out *outarg);
 
-void fuse_sync_release(struct fuse_file *ff, int flags);
+/** Fill in ff->reserved_req with a RELEASE request */
+void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode);
 
 /**
  * Send RELEASE or RELEASEDIR request
  */
-void fuse_release_common(struct file *file, int opcode);
+int fuse_release_common(struct inode *inode, struct file *file, int isdir);
 
 /**
  * Send FSYNC or FSYNCDIR request
@@ -664,12 +652,10 @@ void fuse_invalidate_entry_cache(struct dentry *entry);
  */
 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
 
-void fuse_conn_kill(struct fuse_conn *fc);
-
 /**
  * Initialize fuse_conn
  */
-void fuse_conn_init(struct fuse_conn *fc);
+int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb);
 
 /**
  * Release reference to fuse_conn
@@ -708,13 +694,4 @@ void fuse_release_nowrite(struct inode *inode);
 
 u64 fuse_get_attr_version(struct fuse_conn *fc);
 
-int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
-		 bool isdir);
-ssize_t fuse_direct_io(struct file *file, const char __user *buf,
-		       size_t count, loff_t *ppos, int write);
-long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
-		   unsigned int flags);
-unsigned fuse_file_poll(struct file *file, poll_table *wait);
-int fuse_dev_release(struct inode *inode, struct file *file);
-
 #endif /* _FS_FUSE_I_H */
diff --git a/trunk/fs/fuse/inode.c b/trunk/fs/fuse/inode.c
index f0df55a52929..91f7c85f1ffd 100644
--- a/trunk/fs/fuse/inode.c
+++ b/trunk/fs/fuse/inode.c
@@ -277,14 +277,11 @@ static void fuse_send_destroy(struct fuse_conn *fc)
 	}
 }
 
-static void fuse_bdi_destroy(struct fuse_conn *fc)
+static void fuse_put_super(struct super_block *sb)
 {
-	if (fc->bdi_initialized)
-		bdi_destroy(&fc->bdi);
-}
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
 
-void fuse_conn_kill(struct fuse_conn *fc)
-{
+	fuse_send_destroy(fc);
 	spin_lock(&fc->lock);
 	fc->connected = 0;
 	fc->blocked = 0;
@@ -298,16 +295,7 @@ void fuse_conn_kill(struct fuse_conn *fc)
 	list_del(&fc->entry);
 	fuse_ctl_remove_conn(fc);
 	mutex_unlock(&fuse_mutex);
-	fuse_bdi_destroy(fc);
-}
-EXPORT_SYMBOL_GPL(fuse_conn_kill);
-
-static void fuse_put_super(struct super_block *sb)
-{
-	struct fuse_conn *fc = get_fuse_conn_super(sb);
-
-	fuse_send_destroy(fc);
-	fuse_conn_kill(fc);
+	bdi_destroy(&fc->bdi);
 	fuse_conn_put(fc);
 }
 
@@ -478,8 +466,10 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
 	return 0;
 }
 
-void fuse_conn_init(struct fuse_conn *fc)
+int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb)
 {
+	int err;
+
 	memset(fc, 0, sizeof(*fc));
 	spin_lock_init(&fc->lock);
 	mutex_init(&fc->inst_mutex);
@@ -494,12 +484,49 @@ void fuse_conn_init(struct fuse_conn *fc)
 	INIT_LIST_HEAD(&fc->bg_queue);
 	INIT_LIST_HEAD(&fc->entry);
 	atomic_set(&fc->num_waiting, 0);
+	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.unplug_io_fn = default_unplug_io_fn;
+	/* fuse does it's own writeback accounting */
+	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
 	fc->khctr = 0;
 	fc->polled_files = RB_ROOT;
+	fc->dev = sb->s_dev;
+	err = bdi_init(&fc->bdi);
+	if (err)
+		goto error_mutex_destroy;
+	if (sb->s_bdev) {
+		err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
+				   MAJOR(fc->dev), MINOR(fc->dev));
+	} else {
+		err = bdi_register_dev(&fc->bdi, fc->dev);
+	}
+	if (err)
+		goto error_bdi_destroy;
+	/*
+	 * For a single fuse filesystem use max 1% of dirty +
+	 * writeback threshold.
+	 *
+	 * This gives about 1M of write buffer for memory maps on a
+	 * machine with 1G and 10% dirty_ratio, which should be more
+	 * than enough.
+	 *
+	 * Privileged users can raise it by writing to
+	 *
+	 *    /sys/class/bdi/<bdi>/max_ratio
+	 */
+	bdi_set_max_ratio(&fc->bdi, 1);
 	fc->reqctr = 0;
 	fc->blocked = 1;
 	fc->attr_version = 1;
 	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
+
+	return 0;
+
+ error_bdi_destroy:
+	bdi_destroy(&fc->bdi);
+ error_mutex_destroy:
+	mutex_destroy(&fc->inst_mutex);
+	return err;
 }
 EXPORT_SYMBOL_GPL(fuse_conn_init);
 
@@ -512,14 +539,12 @@ void fuse_conn_put(struct fuse_conn *fc)
 		fc->release(fc);
 	}
 }
-EXPORT_SYMBOL_GPL(fuse_conn_put);
 
 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
 {
 	atomic_inc(&fc->count);
 	return fc;
 }
-EXPORT_SYMBOL_GPL(fuse_conn_get);
 
 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
 {
@@ -772,48 +797,6 @@ static void fuse_free_conn(struct fuse_conn *fc)
 	kfree(fc);
 }
 
-static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
-{
-	int err;
-
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
-	fc->bdi.unplug_io_fn = default_unplug_io_fn;
-	/* fuse does it's own writeback accounting */
-	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
-
-	err = bdi_init(&fc->bdi);
-	if (err)
-		return err;
-
-	fc->bdi_initialized = 1;
-
-	if (sb->s_bdev) {
-		err =  bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
-				    MAJOR(fc->dev), MINOR(fc->dev));
-	} else {
-		err = bdi_register_dev(&fc->bdi, fc->dev);
-	}
-
-	if (err)
-		return err;
-
-	/*
-	 * For a single fuse filesystem use max 1% of dirty +
-	 * writeback threshold.
-	 *
-	 * This gives about 1M of write buffer for memory maps on a
-	 * machine with 1G and 10% dirty_ratio, which should be more
-	 * than enough.
-	 *
-	 * Privileged users can raise it by writing to
-	 *
-	 *    /sys/class/bdi/<bdi>/max_ratio
-	 */
-	bdi_set_max_ratio(&fc->bdi, 1);
-
-	return 0;
-}
-
 static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct fuse_conn *fc;
@@ -860,12 +843,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (!fc)
 		goto err_fput;
 
-	fuse_conn_init(fc);
-
-	fc->dev = sb->s_dev;
-	err = fuse_bdi_init(fc, sb);
-	if (err)
-		goto err_put_conn;
+	err = fuse_conn_init(fc, sb);
+	if (err) {
+		kfree(fc);
+		goto err_fput;
+	}
 
 	fc->release = fuse_free_conn;
 	fc->flags = d.flags;
@@ -929,7 +911,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
  err_put_root:
 	dput(root_dentry);
  err_put_conn:
-	fuse_bdi_destroy(fc);
+	bdi_destroy(&fc->bdi);
 	fuse_conn_put(fc);
  err_fput:
 	fput(file);
diff --git a/trunk/fs/gfs2/Makefile b/trunk/fs/gfs2/Makefile
index d53a9bea1c2f..3da2f1f4f738 100644
--- a/trunk/fs/gfs2/Makefile
+++ b/trunk/fs/gfs2/Makefile
@@ -1,3 +1,4 @@
+EXTRA_CFLAGS := -I$(src)
 obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \
 	glops.o inode.o log.o lops.o main.o meta_io.o \
diff --git a/trunk/fs/gfs2/bmap.c b/trunk/fs/gfs2/bmap.c
index 329763530dc0..6d47379e794b 100644
--- a/trunk/fs/gfs2/bmap.c
+++ b/trunk/fs/gfs2/bmap.c
@@ -25,6 +25,7 @@
 #include "trans.h"
 #include "dir.h"
 #include "util.h"
+#include "trace_gfs2.h"
 
 /* This doesn't need to be that large as max 64 bit pointers in a 4k
  * block is 512, so __u16 is fine for that. It saves stack space to
@@ -589,6 +590,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
 	clear_buffer_mapped(bh_map);
 	clear_buffer_new(bh_map);
 	clear_buffer_boundary(bh_map);
+	trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
 	if (gfs2_is_dir(ip)) {
 		bsize = sdp->sd_jbsize;
 		arr = sdp->sd_jheightsize;
@@ -623,6 +625,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
 	ret = 0;
 out:
 	release_metapath(&mp);
+	trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
 	bmap_unlock(ip, create);
 	return ret;
 
diff --git a/trunk/fs/gfs2/glock.c b/trunk/fs/gfs2/glock.c
index 2bf62bcc5181..297421c0427a 100644
--- a/trunk/fs/gfs2/glock.c
+++ b/trunk/fs/gfs2/glock.c
@@ -39,6 +39,8 @@
 #include "super.h"
 #include "util.h"
 #include "bmap.h"
+#define CREATE_TRACE_POINTS
+#include "trace_gfs2.h"
 
 struct gfs2_gl_hash_bucket {
         struct hlist_head hb_list;
@@ -155,7 +157,7 @@ static void glock_free(struct gfs2_glock *gl)
 
 	if (aspace)
 		gfs2_aspace_put(aspace);
-
+	trace_gfs2_glock_put(gl);
 	sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl);
 }
 
@@ -317,14 +319,17 @@ __acquires(&gl->gl_spin)
 						return 2;
 					gh->gh_error = ret;
 					list_del_init(&gh->gh_list);
+					trace_gfs2_glock_queue(gh, 0);
 					gfs2_holder_wake(gh);
 					goto restart;
 				}
 				set_bit(HIF_HOLDER, &gh->gh_iflags);
+				trace_gfs2_promote(gh, 1);
 				gfs2_holder_wake(gh);
 				goto restart;
 			}
 			set_bit(HIF_HOLDER, &gh->gh_iflags);
+			trace_gfs2_promote(gh, 0);
 			gfs2_holder_wake(gh);
 			continue;
 		}
@@ -354,6 +359,7 @@ static inline void do_error(struct gfs2_glock *gl, const int ret)
 		else
 			continue;
 		list_del_init(&gh->gh_list);
+		trace_gfs2_glock_queue(gh, 0);
 		gfs2_holder_wake(gh);
 	}
 }
@@ -422,6 +428,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
 	int rv;
 
 	spin_lock(&gl->gl_spin);
+	trace_gfs2_glock_state_change(gl, state);
 	state_change(gl, state);
 	gh = find_first_waiter(gl);
 
@@ -851,6 +858,7 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
 			gl->gl_demote_state != state) {
 		gl->gl_demote_state = LM_ST_UNLOCKED;
 	}
+	trace_gfs2_demote_rq(gl);
 }
 
 /**
@@ -936,6 +944,7 @@ __acquires(&gl->gl_spin)
 			goto do_cancel;
 		return;
 	}
+	trace_gfs2_glock_queue(gh, 1);
 	list_add_tail(&gh->gh_list, insert_pt);
 do_cancel:
 	gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
@@ -1032,6 +1041,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 		    !test_bit(GLF_DEMOTE, &gl->gl_flags))
 			fast_path = 1;
 	}
+	trace_gfs2_glock_queue(gh, 0);
 	spin_unlock(&gl->gl_spin);
 	if (likely(fast_path))
 		return;
diff --git a/trunk/fs/gfs2/log.c b/trunk/fs/gfs2/log.c
index f2e449c595b4..13c6237c5f67 100644
--- a/trunk/fs/gfs2/log.c
+++ b/trunk/fs/gfs2/log.c
@@ -28,6 +28,7 @@
 #include "meta_io.h"
 #include "util.h"
 #include "dir.h"
+#include "trace_gfs2.h"
 
 #define PULL 1
 
@@ -313,6 +314,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 		gfs2_log_lock(sdp);
 	}
 	atomic_sub(blks, &sdp->sd_log_blks_free);
+	trace_gfs2_log_blocks(sdp, -blks);
 	gfs2_log_unlock(sdp);
 	mutex_unlock(&sdp->sd_log_reserve_mutex);
 
@@ -333,6 +335,7 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
 
 	gfs2_log_lock(sdp);
 	atomic_add(blks, &sdp->sd_log_blks_free);
+	trace_gfs2_log_blocks(sdp, blks);
 	gfs2_assert_withdraw(sdp,
 			     atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
 	gfs2_log_unlock(sdp);
@@ -558,6 +561,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
 
 	gfs2_log_lock(sdp);
 	atomic_add(dist, &sdp->sd_log_blks_free);
+	trace_gfs2_log_blocks(sdp, dist);
 	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
 	gfs2_log_unlock(sdp);
 
@@ -715,6 +719,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 		up_write(&sdp->sd_log_flush_lock);
 		return;
 	}
+	trace_gfs2_log_flush(sdp, 1);
 
 	ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
 	INIT_LIST_HEAD(&ai->ai_ail1_list);
@@ -746,6 +751,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 	else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
 		gfs2_log_lock(sdp);
 		atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
+		trace_gfs2_log_blocks(sdp, -1);
 		gfs2_log_unlock(sdp);
 		log_write_header(sdp, 0, PULL);
 	}
@@ -763,7 +769,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 		ai = NULL;
 	}
 	gfs2_log_unlock(sdp);
-
+	trace_gfs2_log_flush(sdp, 0);
 	up_write(&sdp->sd_log_flush_lock);
 
 	kfree(ai);
@@ -787,6 +793,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved);
 	unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved;
 	atomic_add(unused, &sdp->sd_log_blks_free);
+	trace_gfs2_log_blocks(sdp, unused);
 	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
 			     sdp->sd_jdesc->jd_blocks);
 	sdp->sd_log_blks_reserved = reserved;
diff --git a/trunk/fs/gfs2/lops.c b/trunk/fs/gfs2/lops.c
index 00315f50fa46..9969ff062c5b 100644
--- a/trunk/fs/gfs2/lops.c
+++ b/trunk/fs/gfs2/lops.c
@@ -27,6 +27,7 @@
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
+#include "trace_gfs2.h"
 
 /**
  * gfs2_pin - Pin a buffer in memory
@@ -53,6 +54,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
 	if (bd->bd_ail)
 		list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
 	get_bh(bh);
+	trace_gfs2_pin(bd, 1);
 }
 
 /**
@@ -89,6 +91,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
 	bd->bd_ail = ai;
 	list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
 	clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+	trace_gfs2_pin(bd, 0);
 	gfs2_log_unlock(sdp);
 	unlock_buffer(bh);
 }
diff --git a/trunk/fs/gfs2/ops_fstype.c b/trunk/fs/gfs2/ops_fstype.c
index cc34f271b3e7..7bc3c45cd676 100644
--- a/trunk/fs/gfs2/ops_fstype.c
+++ b/trunk/fs/gfs2/ops_fstype.c
@@ -33,6 +33,7 @@
 #include "log.h"
 #include "quota.h"
 #include "dir.h"
+#include "trace_gfs2.h"
 
 #define DO 0
 #define UNDO 1
@@ -775,6 +776,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
 		/* Map the extents for this journal's blocks */
 		map_journal_extents(sdp);
 	}
+	trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free));
 
 	if (sdp->sd_lockstruct.ls_first) {
 		unsigned int x;
diff --git a/trunk/fs/gfs2/rgrp.c b/trunk/fs/gfs2/rgrp.c
index de3239731db8..daa4ae341a29 100644
--- a/trunk/fs/gfs2/rgrp.c
+++ b/trunk/fs/gfs2/rgrp.c
@@ -29,6 +29,7 @@
 #include "util.h"
 #include "log.h"
 #include "inode.h"
+#include "trace_gfs2.h"
 
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
@@ -1519,7 +1520,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
 	spin_lock(&sdp->sd_rindex_spin);
 	rgd->rd_free_clone -= *n;
 	spin_unlock(&sdp->sd_rindex_spin);
-
+	trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED);
 	*bn = block;
 	return 0;
 
@@ -1571,7 +1572,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
 	spin_lock(&sdp->sd_rindex_spin);
 	rgd->rd_free_clone--;
 	spin_unlock(&sdp->sd_rindex_spin);
-
+	trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE);
 	return block;
 }
 
@@ -1591,7 +1592,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
 	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
 	if (!rgd)
 		return;
-
+	trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
 	rgd->rd_free += blen;
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1619,7 +1620,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
 	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
 	if (!rgd)
 		return;
-
+	trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
 	rgd->rd_free += blen;
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1642,6 +1643,7 @@ void gfs2_unlink_di(struct inode *inode)
 	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
 	if (!rgd)
 		return;
+	trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
 	gfs2_trans_add_rg(rgd);
@@ -1673,6 +1675,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
 	gfs2_free_uninit_di(rgd, ip->i_no_addr);
+	trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE);
 	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
 }
diff --git a/trunk/fs/gfs2/trace_gfs2.h b/trunk/fs/gfs2/trace_gfs2.h
new file mode 100644
index 000000000000..98d6ef1c1dc0
--- /dev/null
+++ b/trunk/fs/gfs2/trace_gfs2.h
@@ -0,0 +1,407 @@
+#if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_GFS2_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gfs2
+#define TRACE_INCLUDE_FILE trace_gfs2
+
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/dlmconstants.h>
+#include <linux/gfs2_ondisk.h>
+#include "incore.h"
+#include "glock.h"
+
+#define dlm_state_name(nn) { DLM_LOCK_##nn, #nn }
+#define glock_trace_name(x) __print_symbolic(x,		\
+			    dlm_state_name(IV),		\
+			    dlm_state_name(NL),		\
+			    dlm_state_name(CR),		\
+			    dlm_state_name(CW),		\
+			    dlm_state_name(PR),		\
+			    dlm_state_name(PW),		\
+			    dlm_state_name(EX))
+
+#define block_state_name(x) __print_symbolic(x,			\
+			    { GFS2_BLKST_FREE, "free" },	\
+			    { GFS2_BLKST_USED, "used" },	\
+			    { GFS2_BLKST_DINODE, "dinode" },	\
+			    { GFS2_BLKST_UNLINKED, "unlinked" })
+
+#define show_glock_flags(flags) __print_flags(flags, "",	\
+	{(1UL << GLF_LOCK),			"l" },		\
+	{(1UL << GLF_DEMOTE),			"D" },		\
+	{(1UL << GLF_PENDING_DEMOTE),		"d" },		\
+	{(1UL << GLF_DEMOTE_IN_PROGRESS),	"p" },		\
+	{(1UL << GLF_DIRTY),			"y" },		\
+	{(1UL << GLF_LFLUSH),			"f" },		\
+	{(1UL << GLF_INVALIDATE_IN_PROGRESS),	"i" },		\
+	{(1UL << GLF_REPLY_PENDING),		"r" },		\
+	{(1UL << GLF_INITIAL),			"I" },		\
+	{(1UL << GLF_FROZEN),			"F" })
+
+#ifndef NUMPTY
+#define NUMPTY
+static inline u8 glock_trace_state(unsigned int state)
+{
+	switch(state) {
+	case LM_ST_SHARED:
+		return DLM_LOCK_PR;
+	case LM_ST_DEFERRED:
+		return DLM_LOCK_CW;
+	case LM_ST_EXCLUSIVE:
+		return DLM_LOCK_EX;
+	}
+	return DLM_LOCK_NL;
+}
+#endif
+
+/* Section 1 - Locking
+ *
+ * Objectives:
+ * Latency: Remote demote request to state change
+ * Latency: Local lock request to state change
+ * Latency: State change to lock grant
+ * Correctness: Ordering of local lock state vs. I/O requests
+ * Correctness: Responses to remote demote requests
+ */
+
+/* General glock state change (DLM lock request completes) */
+TRACE_EVENT(gfs2_glock_state_change,
+
+	TP_PROTO(const struct gfs2_glock *gl, unsigned int new_state),
+
+	TP_ARGS(gl, new_state),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	u64,	glnum			)
+		__field(	u32,	gltype			)
+		__field(	u8,	cur_state		)
+		__field(	u8,	new_state		)
+		__field(	u8,	dmt_state		)
+		__field(	u8,	tgt_state		)
+		__field(	unsigned long,	flags		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= gl->gl_sbd->sd_vfs->s_dev;
+		__entry->glnum		= gl->gl_name.ln_number;
+		__entry->gltype		= gl->gl_name.ln_type;
+		__entry->cur_state	= glock_trace_state(gl->gl_state);
+		__entry->new_state	= glock_trace_state(new_state);
+		__entry->tgt_state	= glock_trace_state(gl->gl_target);
+		__entry->dmt_state	= glock_trace_state(gl->gl_demote_state);
+		__entry->flags		= gl->gl_flags;
+	),
+
+	TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+		 (unsigned long long)__entry->glnum,
+		  glock_trace_name(__entry->cur_state),
+		  glock_trace_name(__entry->new_state),
+		  glock_trace_name(__entry->tgt_state),
+		  glock_trace_name(__entry->dmt_state),
+		  show_glock_flags(__entry->flags))
+);
+
+/* State change -> unlocked, glock is being deallocated */
+TRACE_EVENT(gfs2_glock_put,
+
+	TP_PROTO(const struct gfs2_glock *gl),
+
+	TP_ARGS(gl),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	glnum			)
+		__field(	u32,	gltype			)
+		__field(	u8,	cur_state		)
+		__field(	unsigned long,	flags		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= gl->gl_sbd->sd_vfs->s_dev;
+		__entry->gltype		= gl->gl_name.ln_type;
+		__entry->glnum		= gl->gl_name.ln_number;
+		__entry->cur_state	= glock_trace_state(gl->gl_state);
+		__entry->flags		= gl->gl_flags;
+	),
+
+	TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->gltype, (unsigned long long)__entry->glnum,
+                  glock_trace_name(__entry->cur_state),
+		  glock_trace_name(DLM_LOCK_IV),
+		  show_glock_flags(__entry->flags))
+
+);
+
+/* Callback (local or remote) requesting lock demotion */
+TRACE_EVENT(gfs2_demote_rq,
+
+	TP_PROTO(const struct gfs2_glock *gl),
+
+	TP_ARGS(gl),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	glnum			)
+		__field(	u32,	gltype			)
+		__field(	u8,	cur_state		)
+		__field(	u8,	dmt_state		)
+		__field(	unsigned long,	flags		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= gl->gl_sbd->sd_vfs->s_dev;
+		__entry->gltype		= gl->gl_name.ln_type;
+		__entry->glnum		= gl->gl_name.ln_number;
+		__entry->cur_state	= glock_trace_state(gl->gl_state);
+		__entry->dmt_state	= glock_trace_state(gl->gl_demote_state);
+		__entry->flags		= gl->gl_flags;
+	),
+
+	TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+		  (unsigned long long)__entry->glnum,
+                  glock_trace_name(__entry->cur_state),
+                  glock_trace_name(__entry->dmt_state),
+		  show_glock_flags(__entry->flags))
+
+);
+
+/* Promotion/grant of a glock */
+TRACE_EVENT(gfs2_promote,
+
+	TP_PROTO(const struct gfs2_holder *gh, int first),
+
+	TP_ARGS(gh, first),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	glnum			)
+		__field(	u32,	gltype			)
+		__field(	int,	first			)
+		__field(	u8,	state			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= gh->gh_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->glnum	= gh->gh_gl->gl_name.ln_number;
+		__entry->gltype	= gh->gh_gl->gl_name.ln_type;
+		__entry->first	= first;
+		__entry->state	= glock_trace_state(gh->gh_state);
+	),
+
+	TP_printk("%u,%u glock %u:%llu promote %s %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+		  (unsigned long long)__entry->glnum,
+		  __entry->first ? "first": "other",
+		  glock_trace_name(__entry->state))
+);
+
+/* Queue/dequeue a lock request */
+TRACE_EVENT(gfs2_glock_queue,
+
+	TP_PROTO(const struct gfs2_holder *gh, int queue),
+
+	TP_ARGS(gh, queue),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	glnum			)
+		__field(	u32,	gltype			)
+		__field(	int,	queue			)
+		__field(	u8,	state			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= gh->gh_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->glnum	= gh->gh_gl->gl_name.ln_number;
+		__entry->gltype	= gh->gh_gl->gl_name.ln_type;
+		__entry->queue	= queue;
+		__entry->state	= glock_trace_state(gh->gh_state);
+	),
+
+	TP_printk("%u,%u glock %u:%llu %squeue %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+		  (unsigned long long)__entry->glnum,
+		  __entry->queue ? "" : "de",
+		  glock_trace_name(__entry->state))
+);
+
+/* Section 2 - Log/journal
+ *
+ * Objectives:
+ * Latency: Log flush time
+ * Correctness: pin/unpin vs. disk I/O ordering
+ * Performance: Log usage stats
+ */
+
+/* Pin/unpin a block in the log */
+TRACE_EVENT(gfs2_pin,
+
+	TP_PROTO(const struct gfs2_bufdata *bd, int pin),
+
+	TP_ARGS(bd, pin),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	int,	pin			)
+		__field(	u32,	len			)
+		__field(	sector_t,	block		)
+		__field(	u64,	ino			)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bd->bd_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->pin		= pin;
+		__entry->len		= bd->bd_bh->b_size;
+		__entry->block		= bd->bd_bh->b_blocknr;
+		__entry->ino		= bd->bd_gl->gl_name.ln_number;
+	),
+
+	TP_printk("%u,%u log %s %llu/%lu inode %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->pin ? "pin" : "unpin",
+		  (unsigned long long)__entry->block,
+		  (unsigned long)__entry->len,
+		  (unsigned long long)__entry->ino)
+);
+
+/* Flushing the log */
+TRACE_EVENT(gfs2_log_flush,
+
+	TP_PROTO(const struct gfs2_sbd *sdp, int start),
+
+	TP_ARGS(sdp, start),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	int,	start			)
+		__field(	u64,	log_seq			)
+	),
+
+	TP_fast_assign(
+		__entry->dev            = sdp->sd_vfs->s_dev;
+		__entry->start		= start;
+		__entry->log_seq	= sdp->sd_log_sequence;
+	),
+
+	TP_printk("%u,%u log flush %s %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->start ? "start" : "end",
+		  (unsigned long long)__entry->log_seq)
+);
+
+/* Reserving/releasing blocks in the log */
+TRACE_EVENT(gfs2_log_blocks,
+
+	TP_PROTO(const struct gfs2_sbd *sdp, int blocks),
+
+	TP_ARGS(sdp, blocks),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	int,	blocks			)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= sdp->sd_vfs->s_dev;
+		__entry->blocks		= blocks;
+	),
+
+	TP_printk("%u,%u log reserve %d", MAJOR(__entry->dev),
+		  MINOR(__entry->dev), __entry->blocks)
+);
+
+/* Section 3 - bmap
+ *
+ * Objectives:
+ * Latency: Bmap request time
+ * Performance: Block allocator tracing
+ * Correctness: Test of disard generation vs. blocks allocated
+ */
+
+/* Map an extent of blocks, possibly a new allocation */
+TRACE_EVENT(gfs2_bmap,
+
+	TP_PROTO(const struct gfs2_inode *ip, const struct buffer_head *bh,
+		sector_t lblock, int create, int errno),
+
+	TP_ARGS(ip, bh, lblock, create, errno),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	sector_t, lblock		)
+		__field(	sector_t, pblock		)
+		__field(	u64,	inum			)
+		__field(	unsigned long, state		)
+		__field(	u32,	len			)
+		__field(	int,	create			)
+		__field(	int,	errno			)
+	),
+
+	TP_fast_assign(
+		__entry->dev            = ip->i_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->lblock		= lblock;
+		__entry->pblock		= buffer_mapped(bh) ?  bh->b_blocknr : 0;
+		__entry->inum		= ip->i_no_addr;
+		__entry->state		= bh->b_state;
+		__entry->len		= bh->b_size;
+		__entry->create		= create;
+		__entry->errno		= errno;
+	),
+
+	TP_printk("%u,%u bmap %llu map %llu/%lu to %llu flags:%08lx %s %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long long)__entry->inum,
+		  (unsigned long long)__entry->lblock,
+		  (unsigned long)__entry->len,
+		  (unsigned long long)__entry->pblock,
+		  __entry->state, __entry->create ? "create " : "nocreate",
+		  __entry->errno)
+);
+
+/* Keep track of blocks as they are allocated/freed */
+TRACE_EVENT(gfs2_block_alloc,
+
+	TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len,
+		u8 block_state),
+
+	TP_ARGS(ip, block, len, block_state),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	start			)
+		__field(	u64,	inum			)
+		__field(	u32,	len			)
+		__field(	u8,	block_state		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= ip->i_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->start		= block;
+		__entry->inum		= ip->i_no_addr;
+		__entry->len		= len;
+		__entry->block_state	= block_state;
+	),
+
+	TP_printk("%u,%u bmap %llu alloc %llu/%lu %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long long)__entry->inum,
+		  (unsigned long long)__entry->start,
+		  (unsigned long)__entry->len,
+		  block_state_name(__entry->block_state))
+);
+
+#endif /* _TRACE_GFS2_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
+
diff --git a/trunk/fs/partitions/check.c b/trunk/fs/partitions/check.c
index 1a9c7878f864..0af36085eb28 100644
--- a/trunk/fs/partitions/check.c
+++ b/trunk/fs/partitions/check.c
@@ -556,49 +556,27 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 
 	/* add partitions */
 	for (p = 1; p < state->limit; p++) {
-		sector_t size, from;
-try_scan:
-		size = state->parts[p].size;
+		sector_t size = state->parts[p].size;
+		sector_t from = state->parts[p].from;
 		if (!size)
 			continue;
-
-		from = state->parts[p].from;
 		if (from >= get_capacity(disk)) {
 			printk(KERN_WARNING
 			       "%s: p%d ignored, start %llu is behind the end of the disk\n",
 			       disk->disk_name, p, (unsigned long long) from);
 			continue;
 		}
-
 		if (from + size > get_capacity(disk)) {
-			struct block_device_operations *bdops = disk->fops;
-			unsigned long long capacity;
-
+			/*
+			 * we can not ignore partitions of broken tables
+			 * created by for example camera firmware, but we
+			 * limit them to the end of the disk to avoid
+			 * creating invalid block devices
+			 */
 			printk(KERN_WARNING
-			       "%s: p%d size %llu exceeds device capacity, ",
+			       "%s: p%d size %llu limited to end of disk\n",
 			       disk->disk_name, p, (unsigned long long) size);
-
-			if (bdops->set_capacity &&
-			    (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) {
-				printk(KERN_CONT "enabling native capacity\n");
-				capacity = bdops->set_capacity(disk, ~0ULL);
-				disk->flags |= GENHD_FL_NATIVE_CAPACITY;
-				if (capacity > get_capacity(disk)) {
-					set_capacity(disk, capacity);
-					check_disk_size_change(disk, bdev);
-					bdev->bd_invalidated = 0;
-				}
-				goto try_scan;
-			} else {
-				/*
-				 * we can not ignore partitions of broken tables
-				 * created by for example camera firmware, but
-				 * we limit them to the end of the disk to avoid
-				 * creating invalid block devices
-				 */
-				printk(KERN_CONT "limited to end of disk\n");
-				size = get_capacity(disk) - from;
-			}
+			size = get_capacity(disk) - from;
 		}
 		part = add_partition(disk, p, from, size,
 				     state->parts[p].flags);
diff --git a/trunk/include/linux/blkdev.h b/trunk/include/linux/blkdev.h
index 0b1a6cae9de1..ebdfde8fe556 100644
--- a/trunk/include/linux/blkdev.h
+++ b/trunk/include/linux/blkdev.h
@@ -1226,8 +1226,6 @@ struct block_device_operations {
 	int (*direct_access) (struct block_device *, sector_t,
 						void **, unsigned long *);
 	int (*media_changed) (struct gendisk *);
-	unsigned long long (*set_capacity) (struct gendisk *,
-						unsigned long long);
 	int (*revalidate_disk) (struct gendisk *);
 	int (*getgeo)(struct block_device *, struct hd_geometry *);
 	struct module *owner;
diff --git a/trunk/include/linux/compiler.h b/trunk/include/linux/compiler.h
index 04fb5135b4e1..37bcb50a4d7c 100644
--- a/trunk/include/linux/compiler.h
+++ b/trunk/include/linux/compiler.h
@@ -261,11 +261,6 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define __section(S) __attribute__ ((__section__(#S)))
 #endif
 
-/* Are two types/vars the same type (ignoring qualifiers)? */
-#ifndef __same_type
-# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
-#endif
-
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
diff --git a/trunk/include/linux/fuse.h b/trunk/include/linux/fuse.h
index d41ed593f79f..162e5defe683 100644
--- a/trunk/include/linux/fuse.h
+++ b/trunk/include/linux/fuse.h
@@ -120,13 +120,6 @@ struct fuse_file_lock {
 #define FUSE_EXPORT_SUPPORT	(1 << 4)
 #define FUSE_BIG_WRITES		(1 << 5)
 
-/**
- * CUSE INIT request/reply flags
- *
- * CUSE_UNRESTRICTED_IOCTL:  use unrestricted ioctl
- */
-#define CUSE_UNRESTRICTED_IOCTL	(1 << 0)
-
 /**
  * Release flags
  */
@@ -217,9 +210,6 @@ enum fuse_opcode {
 	FUSE_DESTROY       = 38,
 	FUSE_IOCTL         = 39,
 	FUSE_POLL          = 40,
-
-	/* CUSE specific operations */
-	CUSE_INIT          = 4096,
 };
 
 enum fuse_notify_code {
@@ -411,27 +401,6 @@ struct fuse_init_out {
 	__u32	max_write;
 };
 
-#define CUSE_INIT_INFO_MAX 4096
-
-struct cuse_init_in {
-	__u32	major;
-	__u32	minor;
-	__u32	unused;
-	__u32	flags;
-};
-
-struct cuse_init_out {
-	__u32	major;
-	__u32	minor;
-	__u32	unused;
-	__u32	flags;
-	__u32	max_read;
-	__u32	max_write;
-	__u32	dev_major;		/* chardev major */
-	__u32	dev_minor;		/* chardev minor */
-	__u32	spare[10];
-};
-
 struct fuse_interrupt_in {
 	__u64	unique;
 };
diff --git a/trunk/include/linux/genhd.h b/trunk/include/linux/genhd.h
index 7cbd38d363a2..149fda264c86 100644
--- a/trunk/include/linux/genhd.h
+++ b/trunk/include/linux/genhd.h
@@ -114,7 +114,6 @@ struct hd_struct {
 #define GENHD_FL_UP				16
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
 #define GENHD_FL_EXT_DEVT			64 /* allow extended devt */
-#define GENHD_FL_NATIVE_CAPACITY		128
 
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
diff --git a/trunk/include/linux/ide.h b/trunk/include/linux/ide.h
index a6c6a2fad7c8..867cb68d8461 100644
--- a/trunk/include/linux/ide.h
+++ b/trunk/include/linux/ide.h
@@ -178,7 +178,7 @@ typedef u8 hwif_chipset_t;
 /*
  * Structure to hold all information about the location of this port
  */
-struct ide_hw {
+typedef struct hw_regs_s {
 	union {
 		struct ide_io_ports	io_ports;
 		unsigned long		io_ports_array[IDE_NR_PORTS];
@@ -186,11 +186,12 @@ struct ide_hw {
 
 	int		irq;			/* our irq number */
 	ide_ack_intr_t	*ack_intr;		/* acknowledge interrupt */
+	hwif_chipset_t  chipset;
 	struct device	*dev, *parent;
 	unsigned long	config;
-};
+} hw_regs_t;
 
-static inline void ide_std_init_ports(struct ide_hw *hw,
+static inline void ide_std_init_ports(hw_regs_t *hw,
 				      unsigned long io_addr,
 				      unsigned long ctl_addr)
 {
@@ -217,12 +218,21 @@ static inline void ide_std_init_ports(struct ide_hw *hw,
 
 /*
  * Special Driver Flags
+ *
+ * set_geometry	: respecify drive geometry
+ * recalibrate	: seek to cyl 0
+ * set_multmode	: set multmode count
+ * reserved	: unused
  */
-enum {
-	IDE_SFLAG_SET_GEOMETRY		= (1 << 0),
-	IDE_SFLAG_RECALIBRATE		= (1 << 1),
-	IDE_SFLAG_SET_MULTMODE		= (1 << 2),
-};
+typedef union {
+	unsigned all			: 8;
+	struct {
+		unsigned set_geometry	: 1;
+		unsigned recalibrate	: 1;
+		unsigned set_multmode	: 1;
+		unsigned reserved	: 5;
+	} b;
+} special_t;
 
 /*
  * Status returned from various ide_ functions
@@ -381,7 +391,6 @@ struct ide_drive_s;
 struct ide_disk_ops {
 	int		(*check)(struct ide_drive_s *, const char *);
 	int		(*get_capacity)(struct ide_drive_s *);
-	u64		(*set_capacity)(struct ide_drive_s *, u64);
 	void		(*setup)(struct ide_drive_s *);
 	void		(*flush)(struct ide_drive_s *);
 	int		(*init_media)(struct ide_drive_s *, struct gendisk *);
@@ -459,8 +468,6 @@ enum {
 	IDE_DFLAG_NICE1			= (1 << 5),
 	/* device is physically present */
 	IDE_DFLAG_PRESENT		= (1 << 6),
-	/* disable Host Protected Area */
-	IDE_DFLAG_NOHPA			= (1 << 7),
 	/* id read from device (synthetic if not set) */
 	IDE_DFLAG_ID_READ		= (1 << 8),
 	IDE_DFLAG_NOPROBE		= (1 << 9),
@@ -499,7 +506,6 @@ enum {
 	/* write protect */
 	IDE_DFLAG_WP			= (1 << 29),
 	IDE_DFLAG_FORMAT_IN_PROGRESS	= (1 << 30),
-	IDE_DFLAG_NIEN_QUIRK		= (1 << 31),
 };
 
 struct ide_drive_s {
@@ -524,13 +530,14 @@ struct ide_drive_s {
 	unsigned long sleep;		/* sleep until this time */
 	unsigned long timeout;		/* max time to wait for irq */
 
-	u8	special_flags;		/* special action flags */
+	special_t	special;	/* special action flags */
 
 	u8	select;			/* basic drive/head select reg value */
 	u8	retry_pio;		/* retrying dma capable host in pio */
 	u8	waiting_for_dma;	/* dma currently in progress */
 	u8	dma;			/* atapi dma flag */
 
+        u8	quirk_list;	/* considered quirky, set for a specific host */
         u8	init_speed;	/* transfer rate set at boot */
         u8	current_speed;	/* current transfer rate set */
 	u8	desired_speed;	/* desired transfer rate set */
@@ -555,7 +562,8 @@ struct ide_drive_s {
 	unsigned int	drive_data;	/* used by set_pio_mode/dev_select() */
 	unsigned int	failures;	/* current failure count */
 	unsigned int	max_failures;	/* maximum allowed failure count */
-	u64		probed_capacity;/* initial/native media capacity */
+	u64		probed_capacity;/* initial reported media capacity (ide-cd only currently) */
+
 	u64		capacity64;	/* total number of sectors */
 
 	int		lun;		/* logical unit */
@@ -1214,7 +1222,7 @@ static inline int ide_pci_is_in_compatibility_mode(struct pci_dev *dev)
 }
 
 void ide_pci_setup_ports(struct pci_dev *, const struct ide_port_info *,
-			 struct ide_hw *, struct ide_hw **);
+			 hw_regs_t *, hw_regs_t **);
 void ide_setup_pci_noise(struct pci_dev *, const struct ide_port_info *);
 
 #ifdef CONFIG_BLK_DEV_IDEDMA_PCI
@@ -1453,18 +1461,16 @@ static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {}
 void ide_register_region(struct gendisk *);
 void ide_unregister_region(struct gendisk *);
 
-void ide_check_nien_quirk_list(ide_drive_t *);
 void ide_undecoded_slave(ide_drive_t *);
 
 void ide_port_apply_params(ide_hwif_t *);
 int ide_sysfs_register_port(ide_hwif_t *);
 
-struct ide_host *ide_host_alloc(const struct ide_port_info *, struct ide_hw **,
-				unsigned int);
+struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **);
 void ide_host_free(struct ide_host *);
 int ide_host_register(struct ide_host *, const struct ide_port_info *,
-		      struct ide_hw **);
-int ide_host_add(const struct ide_port_info *, struct ide_hw **, unsigned int,
+		      hw_regs_t **);
+int ide_host_add(const struct ide_port_info *, hw_regs_t **,
 		 struct ide_host **);
 void ide_host_remove(struct ide_host *);
 int ide_legacy_device_add(const struct ide_port_info *, unsigned long);
diff --git a/trunk/include/linux/lguest.h b/trunk/include/linux/lguest.h
index 7bc1440fc473..175e63f4a8c0 100644
--- a/trunk/include/linux/lguest.h
+++ b/trunk/include/linux/lguest.h
@@ -30,10 +30,6 @@ struct lguest_data
 	/* Wallclock time set by the Host. */
 	struct timespec time;
 
-	/* Interrupt pending set by the Host.  The Guest should do a hypercall
-	 * if it re-enables interrupts and sees this set (to X86_EFLAGS_IF). */
-	int irq_pending;
-
 	/* Async hypercall ring.  Instead of directly making hypercalls, we can
 	 * place them in here for processing the next time the Host wants.
 	 * This batching can be quite efficient. */
diff --git a/trunk/include/linux/lguest_launcher.h b/trunk/include/linux/lguest_launcher.h
index bfefbdf7498a..a53407a4165c 100644
--- a/trunk/include/linux/lguest_launcher.h
+++ b/trunk/include/linux/lguest_launcher.h
@@ -57,8 +57,7 @@ enum lguest_req
 	LHREQ_INITIALIZE, /* + base, pfnlimit, start */
 	LHREQ_GETDMA, /* No longer used */
 	LHREQ_IRQ, /* + irq */
-	LHREQ_BREAK, /* No longer used */
-	LHREQ_EVENTFD, /* + address, fd. */
+	LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */
 };
 
 /* The alignment to use between consumer and producer parts of vring.
diff --git a/trunk/include/linux/module.h b/trunk/include/linux/module.h
index a7bc6e7b43a7..a8f2c0aa4c32 100644
--- a/trunk/include/linux/module.h
+++ b/trunk/include/linux/module.h
@@ -77,7 +77,6 @@ search_extable(const struct exception_table_entry *first,
 void sort_extable(struct exception_table_entry *start,
 		  struct exception_table_entry *finish);
 void sort_main_extable(void);
-void trim_init_extable(struct module *m);
 
 #ifdef MODULE
 #define MODULE_GENERIC_TABLE(gtype,name)			\
diff --git a/trunk/include/linux/moduleparam.h b/trunk/include/linux/moduleparam.h
index 6547c3cdbc4c..a4f0b931846c 100644
--- a/trunk/include/linux/moduleparam.h
+++ b/trunk/include/linux/moduleparam.h
@@ -36,14 +36,9 @@ typedef int (*param_set_fn)(const char *val, struct kernel_param *kp);
 /* Returns length written or -errno.  Buffer is 4k (ie. be short!) */
 typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp);
 
-/* Flag bits for kernel_param.flags */
-#define KPARAM_KMALLOCED	1
-#define KPARAM_ISBOOL		2
-
 struct kernel_param {
 	const char *name;
-	u16 perm;
-	u16 flags;
+	unsigned int perm;
 	param_set_fn set;
 	param_get_fn get;
 	union {
@@ -84,7 +79,7 @@ struct kparam_array
    parameters.  perm sets the visibility in sysfs: 000 means it's
    not there, read bits mean it's readable, write bits mean it's
    writable. */
-#define __module_param_call(prefix, name, set, get, arg, isbool, perm)	\
+#define __module_param_call(prefix, name, set, get, arg, perm)		\
 	/* Default value instead of permissions? */			\
 	static int __param_perm_check_##name __attribute__((unused)) =	\
 	BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2))	\
@@ -93,13 +88,10 @@ struct kparam_array
 	static struct kernel_param __moduleparam_const __param_##name	\
 	__used								\
     __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
-	= { __param_str_##name, perm, isbool ? KPARAM_ISBOOL : 0,	\
-	    set, get, { arg } }
+	= { __param_str_##name, perm, set, get, { arg } }
 
 #define module_param_call(name, set, get, arg, perm)			      \
-	__module_param_call(MODULE_PARAM_PREFIX,			      \
-			    name, set, get, arg,			      \
-			    __same_type(*(arg), bool), perm)
+	__module_param_call(MODULE_PARAM_PREFIX, name, set, get, arg, perm)
 
 /* Helper functions: type is byte, short, ushort, int, uint, long,
    ulong, charp, bool or invbool, or XXX if you define param_get_XXX,
@@ -128,16 +120,15 @@ struct kparam_array
 #define core_param(name, var, type, perm)				\
 	param_check_##type(name, &(var));				\
 	__module_param_call("", name, param_set_##type, param_get_##type, \
-			    &var, __same_type(var, bool), perm)
+			    &var, perm)
 #endif /* !MODULE */
 
 /* Actually copy string: maxlen param is usually sizeof(string). */
 #define module_param_string(name, string, len, perm)			\
 	static const struct kparam_string __param_string_##name		\
 		= { len, string };					\
-	__module_param_call(MODULE_PARAM_PREFIX, name,			\
-			    param_set_copystring, param_get_string,	\
-			    .str = &__param_string_##name, 0, perm);	\
+	module_param_call(name, param_set_copystring, param_get_string,	\
+			  .str = &__param_string_##name, perm);		\
 	__MODULE_PARM_TYPE(name, "string")
 
 /* Called on module insert or kernel boot */
@@ -195,30 +186,21 @@ extern int param_set_charp(const char *val, struct kernel_param *kp);
 extern int param_get_charp(char *buffer, struct kernel_param *kp);
 #define param_check_charp(name, p) __param_check(name, p, char *)
 
-/* For historical reasons "bool" parameters can be (unsigned) "int". */
 extern int param_set_bool(const char *val, struct kernel_param *kp);
 extern int param_get_bool(char *buffer, struct kernel_param *kp);
-#define param_check_bool(name, p)					\
-	static inline void __check_##name(void)				\
-	{								\
-		BUILD_BUG_ON(!__same_type(*(p), bool) &&		\
-			     !__same_type(*(p), unsigned int) &&	\
-			     !__same_type(*(p), int));			\
-	}
+#define param_check_bool(name, p) __param_check(name, p, int)
 
 extern int param_set_invbool(const char *val, struct kernel_param *kp);
 extern int param_get_invbool(char *buffer, struct kernel_param *kp);
-#define param_check_invbool(name, p) __param_check(name, p, bool)
+#define param_check_invbool(name, p) __param_check(name, p, int)
 
 /* Comma-separated array: *nump is set to number they actually specified. */
 #define module_param_array_named(name, array, type, nump, perm)		\
 	static const struct kparam_array __param_arr_##name		\
 	= { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type,\
 	    sizeof(array[0]), array };					\
-	__module_param_call(MODULE_PARAM_PREFIX, name,			\
-			    param_array_set, param_array_get,		\
-			    .arr = &__param_arr_##name,			\
-			    __same_type(array[0], bool), perm);		\
+	module_param_call(name, param_array_set, param_array_get, 	\
+			  .arr = &__param_arr_##name, perm);		\
 	__MODULE_PARM_TYPE(name, "array of " #type)
 
 #define module_param_array(name, type, nump, perm)		\
diff --git a/trunk/include/linux/virtio.h b/trunk/include/linux/virtio.h
index 4fca4f5440ba..06005fa9e982 100644
--- a/trunk/include/linux/virtio.h
+++ b/trunk/include/linux/virtio.h
@@ -10,17 +10,14 @@
 
 /**
  * virtqueue - a queue to register buffers for sending or receiving.
- * @list: the chain of virtqueues for this device
  * @callback: the function to call when buffers are consumed (can be NULL).
- * @name: the name of this virtqueue (mainly for debugging)
  * @vdev: the virtio device this queue was created for.
  * @vq_ops: the operations for this virtqueue (see below).
  * @priv: a pointer for the virtqueue implementation to use.
  */
-struct virtqueue {
-	struct list_head list;
+struct virtqueue
+{
 	void (*callback)(struct virtqueue *vq);
-	const char *name;
 	struct virtio_device *vdev;
 	struct virtqueue_ops *vq_ops;
 	void *priv;
@@ -79,16 +76,15 @@ struct virtqueue_ops {
  * @dev: underlying device.
  * @id: the device type identification (used to match it with a driver).
  * @config: the configuration ops for this device.
- * @vqs: the list of virtqueues for this device.
  * @features: the features supported by both driver and device.
  * @priv: private pointer for the driver's use.
  */
-struct virtio_device {
+struct virtio_device
+{
 	int index;
 	struct device dev;
 	struct virtio_device_id id;
 	struct virtio_config_ops *config;
-	struct list_head vqs;
 	/* Note that this is a Linux set_bit-style bitmap. */
 	unsigned long features[1];
 	void *priv;
@@ -103,7 +99,8 @@ void unregister_virtio_device(struct virtio_device *dev);
  * @id_table: the ids serviced by this driver.
  * @feature_table: an array of feature numbers supported by this device.
  * @feature_table_size: number of entries in the feature table array.
- * @probe: the function to call when a device is found.  Returns 0 or -errno.
+ * @probe: the function to call when a device is found.  Returns a token for
+ *    remove, or PTR_ERR().
  * @remove: the function when a device is removed.
  * @config_changed: optional function to call when the device configuration
  *    changes; may be called in interrupt context.
diff --git a/trunk/include/linux/virtio_config.h b/trunk/include/linux/virtio_config.h
index 99f514575f6a..bf8ec283b232 100644
--- a/trunk/include/linux/virtio_config.h
+++ b/trunk/include/linux/virtio_config.h
@@ -29,7 +29,6 @@
 #define VIRTIO_F_NOTIFY_ON_EMPTY	24
 
 #ifdef __KERNEL__
-#include <linux/err.h>
 #include <linux/virtio.h>
 
 /**
@@ -50,26 +49,15 @@
  * @set_status: write the status byte
  *	vdev: the virtio_device
  *	status: the new status byte
- * @request_vqs: request the specified number of virtqueues
- *	vdev: the virtio_device
- *	max_vqs: the max number of virtqueues we want
- *      If supplied, must call before any virtqueues are instantiated.
- *      To modify the max number of virtqueues after request_vqs has been
- *      called, call free_vqs and then request_vqs with a new value.
- * @free_vqs: cleanup resources allocated by request_vqs
- *	vdev: the virtio_device
- *      If supplied, must call after all virtqueues have been deleted.
  * @reset: reset the device
  *	vdev: the virtio device
  *	After this, status and feature negotiation must be done again
- * @find_vqs: find virtqueues and instantiate them.
+ * @find_vq: find a virtqueue and instantiate it.
  *	vdev: the virtio_device
- *	nvqs: the number of virtqueues to find
- *	vqs: on success, includes new virtqueues
- *	callbacks: array of callbacks, for each virtqueue
- *	names: array of virtqueue names (mainly for debugging)
- *	Returns 0 on success or error status
- * @del_vqs: free virtqueues found by find_vqs().
+ *	index: the 0-based virtqueue number in case there's more than one.
+ *	callback: the virqtueue callback
+ *	Returns the new virtqueue or ERR_PTR() (eg. -ENOENT).
+ * @del_vq: free a virtqueue found by find_vq().
  * @get_features: get the array of feature bits for this device.
  *	vdev: the virtio_device
  *	Returns the first 32 feature bits (all we currently need).
@@ -78,7 +66,6 @@
  *	This gives the final feature bits for the device: it can change
  *	the dev->feature bits if it wants.
  */
-typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops
 {
 	void (*get)(struct virtio_device *vdev, unsigned offset,
@@ -88,11 +75,10 @@ struct virtio_config_ops
 	u8 (*get_status)(struct virtio_device *vdev);
 	void (*set_status)(struct virtio_device *vdev, u8 status);
 	void (*reset)(struct virtio_device *vdev);
-	int (*find_vqs)(struct virtio_device *, unsigned nvqs,
-			struct virtqueue *vqs[],
-			vq_callback_t *callbacks[],
-			const char *names[]);
-	void (*del_vqs)(struct virtio_device *);
+	struct virtqueue *(*find_vq)(struct virtio_device *vdev,
+				     unsigned index,
+				     void (*callback)(struct virtqueue *));
+	void (*del_vq)(struct virtqueue *vq);
 	u32 (*get_features)(struct virtio_device *vdev);
 	void (*finalize_features)(struct virtio_device *vdev);
 };
@@ -113,9 +99,7 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
 	if (__builtin_constant_p(fbit))
 		BUILD_BUG_ON(fbit >= 32);
 
-	if (fbit < VIRTIO_TRANSPORT_F_START)
-		virtio_check_driver_offered_feature(vdev, fbit);
-
+	virtio_check_driver_offered_feature(vdev, fbit);
 	return test_bit(fbit, vdev->features);
 }
 
@@ -142,18 +126,5 @@ static inline int virtio_config_buf(struct virtio_device *vdev,
 	vdev->config->get(vdev, offset, buf, len);
 	return 0;
 }
-
-static inline
-struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
-					vq_callback_t *c, const char *n)
-{
-	vq_callback_t *callbacks[] = { c };
-	const char *names[] = { n };
-	struct virtqueue *vq;
-	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names);
-	if (err < 0)
-		return ERR_PTR(err);
-	return vq;
-}
 #endif /* __KERNEL__ */
 #endif /* _LINUX_VIRTIO_CONFIG_H */
diff --git a/trunk/include/linux/virtio_pci.h b/trunk/include/linux/virtio_pci.h
index 9a3d7c48c622..cd0fd5d181a6 100644
--- a/trunk/include/linux/virtio_pci.h
+++ b/trunk/include/linux/virtio_pci.h
@@ -47,17 +47,9 @@
 /* The bit of the ISR which indicates a device configuration change. */
 #define VIRTIO_PCI_ISR_CONFIG		0x2
 
-/* MSI-X registers: only enabled if MSI-X is enabled. */
-/* A 16-bit vector for configuration changes. */
-#define VIRTIO_MSI_CONFIG_VECTOR        20
-/* A 16-bit vector for selected queue notifications. */
-#define VIRTIO_MSI_QUEUE_VECTOR         22
-/* Vector value used to disable MSI for queue */
-#define VIRTIO_MSI_NO_VECTOR            0xffff
-
 /* The remaining space is defined by each driver as the per-driver
  * configuration space */
-#define VIRTIO_PCI_CONFIG(dev)		((dev)->msix_enabled ? 24 : 20)
+#define VIRTIO_PCI_CONFIG		20
 
 /* Virtio ABI version, this must match exactly */
 #define VIRTIO_PCI_ABI_VERSION		0
diff --git a/trunk/include/linux/virtio_ring.h b/trunk/include/linux/virtio_ring.h
index 693e0ec5afa6..71e03722fb59 100644
--- a/trunk/include/linux/virtio_ring.h
+++ b/trunk/include/linux/virtio_ring.h
@@ -14,8 +14,6 @@
 #define VRING_DESC_F_NEXT	1
 /* This marks a buffer as write-only (otherwise read-only). */
 #define VRING_DESC_F_WRITE	2
-/* This means the buffer contains a list of buffer descriptors. */
-#define VRING_DESC_F_INDIRECT	4
 
 /* The Host uses this in used->flags to advise the Guest: don't kick me when
  * you add a buffer.  It's unreliable, so it's simply an optimization.  Guest
@@ -26,9 +24,6 @@
  * optimization.  */
 #define VRING_AVAIL_F_NO_INTERRUPT	1
 
-/* We support indirect buffer descriptors */
-#define VIRTIO_RING_F_INDIRECT_DESC	28
-
 /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
 struct vring_desc
 {
@@ -124,8 +119,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      struct virtio_device *vdev,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
-				      void (*callback)(struct virtqueue *vq),
-				      const char *name);
+				      void (*callback)(struct virtqueue *vq));
 void vring_del_virtqueue(struct virtqueue *vq);
 /* Filter out transport-specific feature bits. */
 void vring_transport_features(struct virtio_device *vdev);
diff --git a/trunk/kernel/module.c b/trunk/kernel/module.c
index e4ab36ce7672..35f7de00bf0d 100644
--- a/trunk/kernel/module.c
+++ b/trunk/kernel/module.c
@@ -2455,7 +2455,6 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 	mutex_lock(&module_mutex);
 	/* Drop initial reference. */
 	module_put(mod);
-	trim_init_extable(mod);
 	module_free(mod, mod->module_init);
 	mod->module_init = NULL;
 	mod->init_size = 0;
diff --git a/trunk/kernel/params.c b/trunk/kernel/params.c
index 7f6912ced2ba..de273ec85bd2 100644
--- a/trunk/kernel/params.c
+++ b/trunk/kernel/params.c
@@ -24,6 +24,9 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 
+/* We abuse the high bits of "perm" to record whether we kmalloc'ed. */
+#define KPARAM_KMALLOCED	0x80000000
+
 #if 0
 #define DEBUGP printk
 #else
@@ -217,13 +220,13 @@ int param_set_charp(const char *val, struct kernel_param *kp)
 		return -ENOSPC;
 	}
 
-	if (kp->flags & KPARAM_KMALLOCED)
+	if (kp->perm & KPARAM_KMALLOCED)
 		kfree(*(char **)kp->arg);
 
 	/* This is a hack.  We can't need to strdup in early boot, and we
 	 * don't need to; this mangled commandline is preserved. */
 	if (slab_is_available()) {
-		kp->flags |= KPARAM_KMALLOCED;
+		kp->perm |= KPARAM_KMALLOCED;
 		*(char **)kp->arg = kstrdup(val, GFP_KERNEL);
 		if (!kp->arg)
 			return -ENOMEM;
@@ -238,63 +241,44 @@ int param_get_charp(char *buffer, struct kernel_param *kp)
 	return sprintf(buffer, "%s", *((char **)kp->arg));
 }
 
-/* Actually could be a bool or an int, for historical reasons. */
 int param_set_bool(const char *val, struct kernel_param *kp)
 {
-	bool v;
-
 	/* No equals means "set"... */
 	if (!val) val = "1";
 
 	/* One of =[yYnN01] */
 	switch (val[0]) {
 	case 'y': case 'Y': case '1':
-		v = true;
-		break;
+		*(int *)kp->arg = 1;
+		return 0;
 	case 'n': case 'N': case '0':
-		v = false;
-		break;
-	default:
-		return -EINVAL;
+		*(int *)kp->arg = 0;
+		return 0;
 	}
-
-	if (kp->flags & KPARAM_ISBOOL)
-		*(bool *)kp->arg = v;
-	else
-		*(int *)kp->arg = v;
-	return 0;
+	return -EINVAL;
 }
 
 int param_get_bool(char *buffer, struct kernel_param *kp)
 {
-	bool val;
-	if (kp->flags & KPARAM_ISBOOL)
-		val = *(bool *)kp->arg;
-	else
-		val = *(int *)kp->arg;
-
 	/* Y and N chosen as being relatively non-coder friendly */
-	return sprintf(buffer, "%c", val ? 'Y' : 'N');
+	return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'Y' : 'N');
 }
 
-/* This one must be bool. */
 int param_set_invbool(const char *val, struct kernel_param *kp)
 {
-	int ret;
-	bool boolval;
+	int boolval, ret;
 	struct kernel_param dummy;
 
 	dummy.arg = &boolval;
-	dummy.flags = KPARAM_ISBOOL;
 	ret = param_set_bool(val, &dummy);
 	if (ret == 0)
-		*(bool *)kp->arg = !boolval;
+		*(int *)kp->arg = !boolval;
 	return ret;
 }
 
 int param_get_invbool(char *buffer, struct kernel_param *kp)
 {
-	return sprintf(buffer, "%c", (*(bool *)kp->arg) ? 'N' : 'Y');
+	return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'N' : 'Y');
 }
 
 /* We break the rule and mangle the string. */
@@ -607,7 +591,7 @@ void destroy_params(const struct kernel_param *params, unsigned num)
 	unsigned int i;
 
 	for (i = 0; i < num; i++)
-		if (params[i].flags & KPARAM_KMALLOCED)
+		if (params[i].perm & KPARAM_KMALLOCED)
 			kfree(*(char **)params[i].arg);
 }
 
diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c
index 8ec9d13140be..f04aa9664504 100644
--- a/trunk/kernel/sched.c
+++ b/trunk/kernel/sched.c
@@ -2192,7 +2192,6 @@ void kick_process(struct task_struct *p)
 		smp_send_reschedule(cpu);
 	preempt_enable();
 }
-EXPORT_SYMBOL_GPL(kick_process);
 
 /*
  * Return a low guess at the load of a migration-source cpu weighted
diff --git a/trunk/lib/extable.c b/trunk/lib/extable.c
index 4cac81ec225e..179c08745595 100644
--- a/trunk/lib/extable.c
+++ b/trunk/lib/extable.c
@@ -39,26 +39,7 @@ void sort_extable(struct exception_table_entry *start,
 	sort(start, finish - start, sizeof(struct exception_table_entry),
 	     cmp_ex, NULL);
 }
-
-#ifdef CONFIG_MODULES
-/*
- * If the exception table is sorted, any referring to the module init
- * will be at the beginning or the end.
- */
-void trim_init_extable(struct module *m)
-{
-	/*trim the beginning*/
-	while (m->num_exentries && within_module_init(m->extable[0].insn, m)) {
-		m->extable++;
-		m->num_exentries--;
-	}
-	/*trim the end*/
-	while (m->num_exentries &&
-		within_module_init(m->extable[m->num_exentries-1].insn, m))
-		m->num_exentries--;
-}
-#endif /* CONFIG_MODULES */
-#endif /* !ARCH_HAS_SORT_EXTABLE */
+#endif
 
 #ifndef ARCH_HAS_SEARCH_EXTABLE
 /*
diff --git a/trunk/net/9p/trans_virtio.c b/trunk/net/9p/trans_virtio.c
index a49484e67e1d..bb8579a141a8 100644
--- a/trunk/net/9p/trans_virtio.c
+++ b/trunk/net/9p/trans_virtio.c
@@ -246,7 +246,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	chan->vdev = vdev;
 
 	/* We expect one virtqueue, for requests. */
-	chan->vq = virtio_find_single_vq(vdev, req_done, "requests");
+	chan->vq = vdev->config->find_vq(vdev, 0, req_done);
 	if (IS_ERR(chan->vq)) {
 		err = PTR_ERR(chan->vq);
 		goto out_free_vq;
@@ -261,7 +261,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	return 0;
 
 out_free_vq:
-	vdev->config->del_vqs(vdev);
+	vdev->config->del_vq(chan->vq);
 fail:
 	mutex_lock(&virtio_9p_lock);
 	chan_index--;
@@ -332,7 +332,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
 	BUG_ON(chan->inuse);
 
 	if (chan->initialized) {
-		vdev->config->del_vqs(vdev);
+		vdev->config->del_vq(chan->vq);
 		chan->initialized = false;
 	}
 }
diff --git a/trunk/scripts/mod/file2alias.c b/trunk/scripts/mod/file2alias.c
index 40e0045876ee..a3344285ccf4 100644
--- a/trunk/scripts/mod/file2alias.c
+++ b/trunk/scripts/mod/file2alias.c
@@ -641,7 +641,7 @@ static int do_virtio_entry(const char *filename, struct virtio_device_id *id,
 	id->vendor = TO_NATIVE(id->vendor);
 
 	strcpy(alias, "virtio:");
-	ADD(alias, "d", id->device != VIRTIO_DEV_ANY_ID, id->device);
+	ADD(alias, "d", 1, id->device);
 	ADD(alias, "v", id->vendor != VIRTIO_DEV_ANY_ID, id->vendor);
 
 	add_wildcard(alias);