From fa9d71e573cc0d2b3a7c21f3e5aa1860c9e1b541 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@iki.fi>
Date: Sun, 26 Jun 2011 19:36:46 +0300
Subject: [PATCH] --- yaml --- r: 255285 b: refs/heads/master c:
 e0377e25206328998d036cafddcd00a7c3252e3e h: refs/heads/master i:   255283:
 c4e999213ac45a4813c364fbacfeba8c5e3c7095 v: v3

---
 [refs]                                        |    2 +-
 trunk/Documentation/filesystems/ubifs.txt     |   28 +
 trunk/Documentation/mmc/00-INDEX              |    2 -
 trunk/Documentation/mmc/mmc-async-req.txt     |   87 --
 trunk/Documentation/virtual/lguest/lguest.c   |   10 +-
 trunk/MAINTAINERS                             |   13 +-
 trunk/arch/arm/configs/mmp2_defconfig         |    9 +-
 .../arm/mach-at91/include/mach}/at91_mci.h    |    2 +-
 trunk/arch/arm/mach-mmp/brownstone.c          |   10 +-
 trunk/arch/arm/mach-mmp/include/mach/mmp2.h   |    2 +-
 trunk/arch/arm/mach-mmp/jasper.c              |    2 +-
 trunk/arch/arm/mach-mmp/mmp2.c                |   16 +-
 trunk/arch/arm/plat-pxa/include/plat/sdhci.h  |   35 +
 trunk/arch/x86/include/asm/xen/pci.h          |    5 +-
 trunk/arch/x86/pci/xen.c                      |  371 +++--
 trunk/arch/x86/xen/Makefile                   |    2 +-
 trunk/arch/x86/xen/enlighten.c                |    8 -
 trunk/arch/x86/xen/platform-pci-unplug.c      |    2 +-
 trunk/arch/x86/xen/vga.c                      |   67 -
 trunk/arch/x86/xen/xen-ops.h                  |   11 -
 trunk/drivers/block/xen-blkback/xenbus.c      |    2 +-
 trunk/drivers/mmc/card/block.c                |  681 +++-----
 trunk/drivers/mmc/card/mmc_test.c             |  498 +-----
 trunk/drivers/mmc/card/queue.c                |  217 +--
 trunk/drivers/mmc/card/queue.h                |   33 +-
 trunk/drivers/mmc/core/core.c                 |  197 +--
 trunk/drivers/mmc/core/sd.c                   |   68 +-
 trunk/drivers/mmc/core/sdio_bus.c             |    8 +-
 trunk/drivers/mmc/host/Kconfig                |   84 +-
 trunk/drivers/mmc/host/Makefile               |   25 +-
 trunk/drivers/mmc/host/at91_mci.c             |    3 +-
 trunk/drivers/mmc/host/atmel-mci.c            |   63 -
 trunk/drivers/mmc/host/dw_mmc.c               |  446 ++----
 trunk/drivers/mmc/host/dw_mmc.h               |   17 +-
 trunk/drivers/mmc/host/mmci.c                 |  147 +-
 trunk/drivers/mmc/host/mmci.h                 |    8 -
 trunk/drivers/mmc/host/mxs-mmc.c              |   30 +-
 trunk/drivers/mmc/host/omap_hsmmc.c           |  671 +++++---
 trunk/drivers/mmc/host/sdhci-cns3xxx.c        |   44 +-
 trunk/drivers/mmc/host/sdhci-dove.c           |   43 +-
 trunk/drivers/mmc/host/sdhci-esdhc-imx.c      |  125 +-
 trunk/drivers/mmc/host/sdhci-of-core.c        |  253 +++
 trunk/drivers/mmc/host/sdhci-of-esdhc.c       |   86 +-
 trunk/drivers/mmc/host/sdhci-of-hlwd.c        |   67 +-
 trunk/drivers/mmc/host/sdhci-of.h             |   42 +
 trunk/drivers/mmc/host/sdhci-pci.c            |   54 +-
 trunk/drivers/mmc/host/sdhci-pltfm.c          |  216 +--
 trunk/drivers/mmc/host/sdhci-pltfm.h          |   90 +-
 trunk/drivers/mmc/host/sdhci-pxa.c            |  303 ++++
 trunk/drivers/mmc/host/sdhci-pxav2.c          |  244 ---
 trunk/drivers/mmc/host/sdhci-pxav3.c          |  289 ----
 trunk/drivers/mmc/host/sdhci-s3c.c            |    6 +-
 trunk/drivers/mmc/host/sdhci-tegra.c          |  117 +-
 trunk/drivers/mmc/host/sdhci.c                |   34 +-
 trunk/drivers/mmc/host/sh_mmcif.c             |   27 +-
 trunk/drivers/mmc/host/sh_mobile_sdhi.c       |   36 -
 trunk/drivers/mmc/host/tmio_mmc.h             |   53 +-
 trunk/drivers/mmc/host/tmio_mmc_dma.c         |    7 +-
 trunk/drivers/mmc/host/tmio_mmc_pio.c         |  205 +--
 trunk/drivers/mtd/ubi/build.c                 |   28 +-
 trunk/drivers/mtd/ubi/debug.c                 |  269 +---
 trunk/drivers/mtd/ubi/debug.h                 |  113 +-
 trunk/drivers/mtd/ubi/io.c                    |   20 +-
 trunk/drivers/mtd/ubi/scan.c                  |    2 +-
 trunk/drivers/mtd/ubi/ubi.h                   |    8 +-
 trunk/drivers/mtd/ubi/vmt.c                   |    2 +-
 trunk/drivers/mtd/ubi/vtbl.c                  |   18 +-
 trunk/drivers/mtd/ubi/wl.c                    |   42 +-
 trunk/drivers/pci/quirks.c                    |   23 -
 trunk/drivers/xen/Kconfig                     |   46 -
 trunk/drivers/xen/Makefile                    |    4 +-
 trunk/drivers/xen/events.c                    |    7 +-
 trunk/drivers/xen/tmem.c                      |  170 +-
 trunk/drivers/xen/xen-balloon.c               |    2 -
 trunk/drivers/xen/xen-pciback/Makefile        |    7 -
 trunk/drivers/xen/xen-pciback/conf_space.c    |  438 ------
 trunk/drivers/xen/xen-pciback/conf_space.h    |  126 --
 .../xen/xen-pciback/conf_space_capability.c   |  207 ---
 .../xen/xen-pciback/conf_space_header.c       |  386 -----
 .../xen/xen-pciback/conf_space_quirks.c       |  140 --
 .../xen/xen-pciback/conf_space_quirks.h       |   33 -
 trunk/drivers/xen/xen-pciback/passthrough.c   |  194 ---
 trunk/drivers/xen/xen-pciback/pci_stub.c      | 1376 -----------------
 trunk/drivers/xen/xen-pciback/pciback.h       |  183 ---
 trunk/drivers/xen/xen-pciback/pciback_ops.c   |  384 -----
 trunk/drivers/xen/xen-pciback/vpci.c          |  259 ----
 trunk/drivers/xen/xen-pciback/xenbus.c        |  749 ---------
 trunk/drivers/xen/xen-selfballoon.c           |  485 ------
 trunk/drivers/xen/xenbus/xenbus_probe.c       |   44 +-
 trunk/drivers/xen/xenbus/xenbus_probe.h       |    2 -
 .../drivers/xen/xenbus/xenbus_probe_backend.c |    9 +-
 .../xen/xenbus/xenbus_probe_frontend.c        |    6 +-
 trunk/fs/dlm/ast.c                            |  265 ++--
 trunk/fs/dlm/ast.h                            |   15 +-
 trunk/fs/dlm/config.c                         |   75 +-
 trunk/fs/dlm/config.h                         |    2 +-
 trunk/fs/dlm/dlm_internal.h                   |   29 +-
 trunk/fs/dlm/lock.c                           |  225 ++-
 trunk/fs/dlm/lockspace.c                      |  177 ++-
 trunk/fs/dlm/lowcomms.c                       |    9 +-
 trunk/fs/dlm/memory.c                         |   22 +-
 trunk/fs/dlm/memory.h                         |    2 +-
 trunk/fs/dlm/recoverd.c                       |   12 +-
 trunk/fs/dlm/user.c                           |   12 +-
 trunk/fs/gfs2/bmap.c                          |   12 +-
 trunk/fs/gfs2/dir.c                           |  221 +--
 trunk/fs/gfs2/dir.h                           |    1 -
 trunk/fs/gfs2/file.c                          |    4 +-
 trunk/fs/gfs2/glock.c                         |   39 +-
 trunk/fs/gfs2/glock.h                         |    6 -
 trunk/fs/gfs2/glops.c                         |    7 +-
 trunk/fs/gfs2/incore.h                        |    3 +-
 trunk/fs/gfs2/main.c                          |    1 -
 trunk/fs/gfs2/ops_fstype.c                    |    1 -
 trunk/fs/gfs2/rgrp.c                          |   52 +-
 trunk/fs/gfs2/rgrp.h                          |    4 +-
 trunk/fs/gfs2/super.c                         |    2 +-
 trunk/fs/hfsplus/brec.c                       |    4 -
 trunk/fs/hfsplus/catalog.c                    |   14 +-
 trunk/fs/hfsplus/dir.c                        |    8 +-
 trunk/fs/hfsplus/extents.c                    |   50 +-
 trunk/fs/hfsplus/hfsplus_fs.h                 |   18 +-
 trunk/fs/hfsplus/inode.c                      |   12 +-
 trunk/fs/hfsplus/part_tbl.c                   |   32 +-
 trunk/fs/hfsplus/super.c                      |   43 +-
 trunk/fs/hfsplus/unicode.c                    |   35 +-
 trunk/fs/hfsplus/wrapper.c                    |   92 +-
 trunk/fs/ubifs/commit.c                       |    8 +-
 trunk/fs/ubifs/debug.c                        |  762 ++++-----
 trunk/fs/ubifs/debug.h                        |  241 ++-
 trunk/fs/ubifs/dir.c                          |   16 +-
 trunk/fs/ubifs/file.c                         |    2 +-
 trunk/fs/ubifs/io.c                           |  168 +-
 trunk/fs/ubifs/log.c                          |    6 +-
 trunk/fs/ubifs/lprops.c                       |    8 +-
 trunk/fs/ubifs/lpt.c                          |   37 +-
 trunk/fs/ubifs/lpt_commit.c                   |   40 +-
 trunk/fs/ubifs/misc.h                         |  103 +-
 trunk/fs/ubifs/orphan.c                       |    2 +-
 trunk/fs/ubifs/recovery.c                     |   43 +-
 trunk/fs/ubifs/replay.c                       |    3 +-
 trunk/fs/ubifs/sb.c                           |    6 +-
 trunk/fs/ubifs/scan.c                         |    4 +-
 trunk/fs/ubifs/super.c                        |    6 +-
 trunk/fs/ubifs/tnc.c                          |   26 +-
 trunk/fs/ubifs/tnc_commit.c                   |  145 +-
 trunk/fs/ubifs/ubifs.h                        |   21 +-
 trunk/fs/xfs/Makefile                         |    2 +
 trunk/fs/xfs/linux-2.6/xfs_acl.c              |    2 +-
 trunk/fs/xfs/linux-2.6/xfs_aops.c             |   20 +-
 trunk/fs/xfs/linux-2.6/xfs_buf.c              |   79 +-
 trunk/fs/xfs/linux-2.6/xfs_buf.h              |   64 +-
 trunk/fs/xfs/linux-2.6/xfs_export.c           |    4 +-
 trunk/fs/xfs/linux-2.6/xfs_file.c             |    2 +-
 trunk/fs/xfs/linux-2.6/xfs_iops.c             |  433 +-----
 trunk/fs/xfs/linux-2.6/xfs_linux.h            |    7 +-
 trunk/fs/xfs/linux-2.6/xfs_super.c            |   36 +-
 trunk/fs/xfs/linux-2.6/xfs_sync.c             |   10 +-
 trunk/fs/xfs/linux-2.6/xfs_sync.h             |    8 +
 trunk/fs/xfs/linux-2.6/xfs_trace.h            |   60 +-
 trunk/fs/xfs/quota/xfs_dquot.c                |   48 +-
 trunk/fs/xfs/quota/xfs_dquot.h                |    6 +
 trunk/fs/xfs/quota/xfs_qm.c                   |   49 +-
 trunk/fs/xfs/quota/xfs_qm.h                   |    6 +
 trunk/fs/xfs/quota/xfs_qm_syscalls.c          |  355 ++++-
 trunk/fs/xfs/quota/xfs_trans_dquot.c          |   15 +-
 trunk/fs/xfs/xfs.h                            |    1 +
 trunk/fs/xfs/xfs_alloc.c                      |   14 +-
 trunk/fs/xfs/xfs_alloc_btree.c                |   84 +-
 trunk/fs/xfs/xfs_arch.h                       |  136 ++
 trunk/fs/xfs/xfs_attr.c                       |   41 +-
 trunk/fs/xfs/xfs_attr_leaf.c                  |   60 +-
 trunk/fs/xfs/xfs_bmap.c                       |   41 +-
 trunk/fs/xfs/xfs_bmap_btree.c                 |  106 +-
 trunk/fs/xfs/xfs_btree.c                      |   29 +-
 trunk/fs/xfs/xfs_btree.h                      |   38 +-
 trunk/fs/xfs/xfs_btree_trace.c                |  249 +++
 trunk/fs/xfs/xfs_btree_trace.h                |   99 ++
 trunk/fs/xfs/xfs_buf_item.c                   |   75 +-
 trunk/fs/xfs/xfs_da_btree.c                   |  272 ++--
 trunk/fs/xfs/xfs_da_btree.h                   |   13 +-
 trunk/fs/xfs/xfs_dir2.c                       |  140 +-
 trunk/fs/xfs/xfs_dir2.h                       |   54 +-
 trunk/fs/xfs/xfs_dir2_block.c                 |  253 +--
 trunk/fs/xfs/xfs_dir2_block.h                 |   92 ++
 trunk/fs/xfs/xfs_dir2_data.c                  |  327 ++--
 trunk/fs/xfs/xfs_dir2_data.h                  |  184 +++
 trunk/fs/xfs/xfs_dir2_format.h                |  597 -------
 trunk/fs/xfs/xfs_dir2_leaf.c                  |  417 +++--
 trunk/fs/xfs/xfs_dir2_leaf.h                  |  253 +++
 trunk/fs/xfs/xfs_dir2_node.c                  |  201 ++-
 trunk/fs/xfs/xfs_dir2_node.h                  |  100 ++
 trunk/fs/xfs/xfs_dir2_priv.h                  |  135 --
 trunk/fs/xfs/xfs_dir2_sf.c                    |  338 ++--
 trunk/fs/xfs/xfs_dir2_sf.h                    |  171 ++
 trunk/fs/xfs/xfs_fs.h                         |    5 -
 trunk/fs/xfs/xfs_ialloc.c                     |   14 +-
 trunk/fs/xfs/xfs_ialloc_btree.c               |   75 +
 trunk/fs/xfs/xfs_iget.c                       |    1 +
 trunk/fs/xfs/xfs_inode.c                      |  537 +++++--
 trunk/fs/xfs/xfs_inode.h                      |   25 +-
 trunk/fs/xfs/xfs_inode_item.c                 |   17 +-
 trunk/fs/xfs/xfs_inum.h                       |   11 +
 trunk/fs/xfs/xfs_log.c                        |   64 +-
 trunk/fs/xfs/xfs_log_recover.c                |   38 +-
 trunk/fs/xfs/xfs_mount.c                      |   71 +-
 trunk/fs/xfs/xfs_mount.h                      |    2 +-
 trunk/fs/xfs/xfs_trans.c                      |   27 +-
 trunk/fs/xfs/xfs_trans_ail.c                  |  214 ++-
 trunk/fs/xfs/xfs_trans_buf.c                  |  118 +-
 trunk/fs/xfs/xfs_trans_inode.c                |    9 +
 trunk/fs/xfs/xfs_trans_priv.h                 |   14 +-
 trunk/fs/xfs/xfs_vnodeops.c                   |  479 +++++-
 trunk/fs/xfs/xfs_vnodeops.h                   |    3 +-
 trunk/include/linux/mfd/tmio.h                |    8 -
 trunk/include/linux/mmc/boot.h                |    6 +-
 trunk/include/linux/mmc/card.h                |    2 +-
 trunk/include/linux/mmc/core.h                |    9 +-
 trunk/include/linux/mmc/dw_mmc.h              |   28 +-
 trunk/include/linux/mmc/host.h                |   26 +-
 trunk/include/linux/mmc/ioctl.h               |    2 +-
 trunk/include/linux/mmc/mmc.h                 |   17 +-
 trunk/include/linux/mmc/pm.h                  |    2 +-
 trunk/include/linux/mmc/sd.h                  |    7 +-
 trunk/include/linux/mmc/sdhci-pltfm.h         |   35 +
 trunk/include/linux/mmc/sdhci-spear.h         |    6 +-
 trunk/include/linux/mmc/sdhci.h               |    6 +-
 trunk/include/linux/mmc/sdio.h                |    7 +-
 trunk/include/linux/mmc/sdio_func.h           |    7 +-
 trunk/include/linux/mmc/sdio_ids.h            |    6 +-
 trunk/include/linux/mmc/sh_mmcif.h            |    6 +-
 trunk/include/linux/mmc/sh_mobile_sdhi.h      |    6 +-
 trunk/include/linux/mmc/tmio.h                |    8 +-
 trunk/include/linux/mtd/ubi.h                 |   14 +-
 trunk/include/linux/platform_data/pxa_sdhci.h |   60 -
 trunk/include/linux/slab.h                    |   20 -
 trunk/include/linux/slab_def.h                |   52 +-
 trunk/include/linux/slob_def.h                |   10 +
 trunk/include/linux/slub_def.h                |   23 +-
 trunk/include/xen/balloon.h                   |   10 -
 trunk/include/xen/events.h                    |    2 +
 trunk/include/xen/hvc-console.h               |    4 +-
 trunk/include/xen/interface/xen.h             |   39 -
 trunk/include/xen/tmem.h                      |    5 -
 trunk/include/xen/xenbus.h                    |    2 -
 trunk/mm/slab.c                               |   17 +-
 trunk/mm/slob.c                               |    6 -
 trunk/mm/slub.c                               |  105 +-
 248 files changed, 8551 insertions(+), 14620 deletions(-)
 delete mode 100644 trunk/Documentation/mmc/mmc-async-req.txt
 rename trunk/{drivers/mmc/host => arch/arm/mach-at91/include/mach}/at91_mci.h (99%)
 create mode 100644 trunk/arch/arm/plat-pxa/include/plat/sdhci.h
 delete mode 100644 trunk/arch/x86/xen/vga.c
 create mode 100644 trunk/drivers/mmc/host/sdhci-of-core.c
 create mode 100644 trunk/drivers/mmc/host/sdhci-of.h
 create mode 100644 trunk/drivers/mmc/host/sdhci-pxa.c
 delete mode 100644 trunk/drivers/mmc/host/sdhci-pxav2.c
 delete mode 100644 trunk/drivers/mmc/host/sdhci-pxav3.c
 delete mode 100644 trunk/drivers/xen/xen-pciback/Makefile
 delete mode 100644 trunk/drivers/xen/xen-pciback/conf_space.c
 delete mode 100644 trunk/drivers/xen/xen-pciback/conf_space.h
 delete mode 100644 trunk/drivers/xen/xen-pciback/conf_space_capability.c
 delete mode 100644 trunk/drivers/xen/xen-pciback/conf_space_header.c
 delete mode 100644 trunk/drivers/xen/xen-pciback/conf_space_quirks.c
 delete mode 100644 trunk/drivers/xen/xen-pciback/conf_space_quirks.h
 delete mode 100644 trunk/drivers/xen/xen-pciback/passthrough.c
 delete mode 100644 trunk/drivers/xen/xen-pciback/pci_stub.c
 delete mode 100644 trunk/drivers/xen/xen-pciback/pciback.h
 delete mode 100644 trunk/drivers/xen/xen-pciback/pciback_ops.c
 delete mode 100644 trunk/drivers/xen/xen-pciback/vpci.c
 delete mode 100644 trunk/drivers/xen/xen-pciback/xenbus.c
 delete mode 100644 trunk/drivers/xen/xen-selfballoon.c
 create mode 100644 trunk/fs/xfs/xfs_arch.h
 create mode 100644 trunk/fs/xfs/xfs_btree_trace.c
 create mode 100644 trunk/fs/xfs/xfs_btree_trace.h
 create mode 100644 trunk/fs/xfs/xfs_dir2_block.h
 create mode 100644 trunk/fs/xfs/xfs_dir2_data.h
 delete mode 100644 trunk/fs/xfs/xfs_dir2_format.h
 create mode 100644 trunk/fs/xfs/xfs_dir2_leaf.h
 create mode 100644 trunk/fs/xfs/xfs_dir2_node.h
 delete mode 100644 trunk/fs/xfs/xfs_dir2_priv.h
 create mode 100644 trunk/fs/xfs/xfs_dir2_sf.h
 create mode 100644 trunk/include/linux/mmc/sdhci-pltfm.h
 delete mode 100644 trunk/include/linux/platform_data/pxa_sdhci.h
 delete mode 100644 trunk/include/xen/tmem.h

diff --git a/[refs] b/[refs]
index 528498dbb708..367c7065b14a 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 111ad119d1765b1bbef2629a5f2bd825caeb7e74
+refs/heads/master: e0377e25206328998d036cafddcd00a7c3252e3e
diff --git a/trunk/Documentation/filesystems/ubifs.txt b/trunk/Documentation/filesystems/ubifs.txt
index a0a61d2f389f..8e4fab639d9c 100644
--- a/trunk/Documentation/filesystems/ubifs.txt
+++ b/trunk/Documentation/filesystems/ubifs.txt
@@ -111,6 +111,34 @@ The following is an example of the kernel boot arguments to attach mtd0
 to UBI and mount volume "rootfs":
 ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs
 
+
+Module Parameters for Debugging
+===============================
+
+When UBIFS has been compiled with debugging enabled, there are 2 module
+parameters that are available to control aspects of testing and debugging.
+
+debug_chks	Selects extra checks that UBIFS can do while running:
+
+		Check					Flag value
+
+		General checks				1
+		Check Tree Node Cache (TNC)		2
+		Check indexing tree size		4
+		Check orphan area			8
+		Check old indexing tree			16
+		Check LEB properties (lprops)		32
+		Check leaf nodes and inodes		64
+
+debug_tsts	Selects a mode of testing, as follows:
+
+		Test mode				Flag value
+
+		Failure mode for recovery testing	4
+
+For example, set debug_chks to 3 to enable general and TNC checks.
+
+
 References
 ==========
 
diff --git a/trunk/Documentation/mmc/00-INDEX b/trunk/Documentation/mmc/00-INDEX
index a9ba6720ffdf..93dd7a714075 100644
--- a/trunk/Documentation/mmc/00-INDEX
+++ b/trunk/Documentation/mmc/00-INDEX
@@ -4,5 +4,3 @@ mmc-dev-attrs.txt
         - info on SD and MMC device attributes
 mmc-dev-parts.txt
         - info on SD and MMC device partitions
-mmc-async-req.txt
-        - info on mmc asynchronous requests
diff --git a/trunk/Documentation/mmc/mmc-async-req.txt b/trunk/Documentation/mmc/mmc-async-req.txt
deleted file mode 100644
index ae1907b10e4a..000000000000
--- a/trunk/Documentation/mmc/mmc-async-req.txt
+++ /dev/null
@@ -1,87 +0,0 @@
-Rationale
-=========
-
-How significant is the cache maintenance overhead?
-It depends. Fast eMMC and multiple cache levels with speculative cache
-pre-fetch makes the cache overhead relatively significant. If the DMA
-preparations for the next request are done in parallel with the current
-transfer, the DMA preparation overhead would not affect the MMC performance.
-The intention of non-blocking (asynchronous) MMC requests is to minimize the
-time between when an MMC request ends and another MMC request begins.
-Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and
-dma_unmap_sg are processing. Using non-blocking MMC requests makes it
-possible to prepare the caches for next job in parallel with an active
-MMC request.
-
-MMC block driver
-================
-
-The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking.
-The increase in throughput is proportional to the time it takes to
-prepare (major part of preparations are dma_map_sg() and dma_unmap_sg())
-a request and how fast the memory is. The faster the MMC/SD is the
-more significant the prepare request time becomes. Roughly the expected
-performance gain is 5% for large writes and 10% on large reads on a L2 cache
-platform. In power save mode, when clocks run on a lower frequency, the DMA
-preparation may cost even more. As long as these slower preparations are run
-in parallel with the transfer performance won't be affected.
-
-Details on measurements from IOZone and mmc_test
-================================================
-
-https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req
-
-MMC core API extension
-======================
-
-There is one new public function mmc_start_req().
-It starts a new MMC command request for a host. The function isn't
-truly non-blocking. If there is an ongoing async request it waits
-for completion of that request and starts the new one and returns. It
-doesn't wait for the new request to complete. If there is no ongoing
-request it starts the new request and returns immediately.
-
-MMC host extensions
-===================
-
-There are two optional members in the mmc_host_ops -- pre_req() and
-post_req() -- that the host driver may implement in order to move work
-to before and after the actual mmc_host_ops.request() function is called.
-In the DMA case pre_req() may do dma_map_sg() and prepare the DMA
-descriptor, and post_req() runs the dma_unmap_sg().
-
-Optimize for the first request
-==============================
-
-The first request in a series of requests can't be prepared in parallel
-with the previous transfer, since there is no previous request.
-The argument is_first_req in pre_req() indicates that there is no previous
-request. The host driver may optimize for this scenario to minimize
-the performance loss. A way to optimize for this is to split the current
-request in two chunks, prepare the first chunk and start the request,
-and finally prepare the second chunk and start the transfer.
-
-Pseudocode to handle is_first_req scenario with minimal prepare overhead:
-
-if (is_first_req && req->size > threshold)
-   /* start MMC transfer for the complete transfer size */
-   mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE);
-
-   /*
-    * Begin to prepare DMA while cmd is being processed by MMC.
-    * The first chunk of the request should take the same time
-    * to prepare as the "MMC process command time".
-    * If prepare time exceeds MMC cmd time
-    * the transfer is delayed, guesstimate max 4k as first chunk size.
-    */
-    prepare_1st_chunk_for_dma(req);
-    /* flush pending desc to the DMAC (dmaengine.h) */
-    dma_issue_pending(req->dma_desc);
-
-    prepare_2nd_chunk_for_dma(req);
-    /*
-     * The second issue_pending should be called before MMC runs out
-     * of the first chunk. If the MMC runs out of the first data chunk
-     * before this call, the transfer is delayed.
-     */
-    dma_issue_pending(req->dma_desc);
diff --git a/trunk/Documentation/virtual/lguest/lguest.c b/trunk/Documentation/virtual/lguest/lguest.c
index cd9d6af61d07..e3b9bb7a644a 100644
--- a/trunk/Documentation/virtual/lguest/lguest.c
+++ b/trunk/Documentation/virtual/lguest/lguest.c
@@ -861,8 +861,10 @@ static void console_output(struct virtqueue *vq)
 	/* writev can return a partial write, so we loop here. */
 	while (!iov_empty(iov, out)) {
 		int len = writev(STDOUT_FILENO, iov, out);
-		if (len <= 0)
-			err(1, "Write to stdout gave %i", len);
+		if (len <= 0) {
+			warn("Write to stdout gave %i (%d)", len, errno);
+			break;
+		}
 		iov_consume(iov, out, len);
 	}
 
@@ -898,7 +900,7 @@ static void net_output(struct virtqueue *vq)
 	 * same format: what a coincidence!
 	 */
 	if (writev(net_info->tunfd, iov, out) < 0)
-		errx(1, "Write to tun failed?");
+		warnx("Write to tun failed (%d)?", errno);
 
 	/*
 	 * Done with that one; wait_for_vq_desc() will send the interrupt if
@@ -955,7 +957,7 @@ static void net_input(struct virtqueue *vq)
 	 */
 	len = readv(net_info->tunfd, iov, in);
 	if (len <= 0)
-		err(1, "Failed to read from tun.");
+		warn("Failed to read from tun (%d).", errno);
 
 	/*
 	 * Mark that packet buffer as used, but don't interrupt here.  We want
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
index 789fed662140..187282da9213 100644
--- a/trunk/MAINTAINERS
+++ b/trunk/MAINTAINERS
@@ -1,5 +1,4 @@
 
-
 	List of maintainers and how to submit kernel changes
 
 Please try to follow the guidelines below.  This will make things
@@ -4585,8 +4584,9 @@ S:	Maintained
 F:	drivers/mmc/host/omap.c
 
 OMAP HS MMC SUPPORT
+M:	Madhusudhan Chikkature <madhu.cr@ti.com>
 L:	linux-omap@vger.kernel.org
-S:	Orphan
+S:	Maintained
 F:	drivers/mmc/host/omap_hsmmc.c
 
 OMAP RANDOM NUMBER GENERATOR SUPPORT
@@ -6242,14 +6242,9 @@ F:	drivers/char/toshiba.c
 F:	include/linux/toshiba.h
 
 TMIO MMC DRIVER
-M:	Guennadi Liakhovetski <g.liakhovetski@gmx.de>
 M:	Ian Molton <ian@mnementh.co.uk>
-L:	linux-mmc@vger.kernel.org
 S:	Maintained
-F:	drivers/mmc/host/tmio_mmc*
-F:	drivers/mmc/host/sh_mobile_sdhi.c
-F:	include/linux/mmc/tmio.h
-F:	include/linux/mmc/sh_mobile_sdhi.h
+F:	drivers/mmc/host/tmio_mmc.*
 
 TMPFS (SHMEM FILESYSTEM)
 M:	Hugh Dickins <hughd@google.com>
@@ -6326,7 +6321,7 @@ F:	drivers/scsi/u14-34f.c
 
 UBI FILE SYSTEM (UBIFS)
 M:	Artem Bityutskiy <dedekind1@gmail.com>
-M:	Adrian Hunter <adrian.hunter@intel.com>
+M:	Adrian Hunter <adrian.hunter@nokia.com>
 L:	linux-mtd@lists.infradead.org
 T:	git git://git.infradead.org/ubifs-2.6.git
 W:	http://www.linux-mtd.infradead.org/doc/ubifs.html
diff --git a/trunk/arch/arm/configs/mmp2_defconfig b/trunk/arch/arm/configs/mmp2_defconfig
index 5a584520db2f..47ad3b1a4fee 100644
--- a/trunk/arch/arm/configs/mmp2_defconfig
+++ b/trunk/arch/arm/configs/mmp2_defconfig
@@ -8,7 +8,6 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_ARCH_MMP=y
-CONFIG_MACH_BROWNSTONE=y
 CONFIG_MACH_FLINT=y
 CONFIG_MACH_MARVELL_JASPER=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -64,16 +63,10 @@ CONFIG_BACKLIGHT_MAX8925=y
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_MAX8925=y
-CONFIG_MMC=y
 # CONFIG_DNOTIFY is not set
 CONFIG_INOTIFY=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT4_FS=y
-CONFIG_MSDOS_FS=y
-CONFIG_FAT_DEFAULT_CODEPAGE=437
 CONFIG_JFFS2_FS=y
 CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
@@ -88,7 +81,7 @@ CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_PREEMPT is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
-# CONFIG_DYNAMIC_DEBUG is not set
+CONFIG_DYNAMIC_DEBUG=y
 CONFIG_DEBUG_USER=y
 CONFIG_DEBUG_ERRORS=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/trunk/drivers/mmc/host/at91_mci.h b/trunk/arch/arm/mach-at91/include/mach/at91_mci.h
similarity index 99%
rename from trunk/drivers/mmc/host/at91_mci.h
rename to trunk/arch/arm/mach-at91/include/mach/at91_mci.h
index eec3a6b1c2bc..02182c16a022 100644
--- a/trunk/drivers/mmc/host/at91_mci.h
+++ b/trunk/arch/arm/mach-at91/include/mach/at91_mci.h
@@ -1,5 +1,5 @@
 /*
- * drivers/mmc/host/at91_mci.h
+ * arch/arm/mach-at91/include/mach/at91_mci.h
  *
  * Copyright (C) 2005 Ivan Kokshaysky
  * Copyright (C) SAN People
diff --git a/trunk/arch/arm/mach-mmp/brownstone.c b/trunk/arch/arm/mach-mmp/brownstone.c
index c79162a50f28..7bb78fd5a2a6 100644
--- a/trunk/arch/arm/mach-mmp/brownstone.c
+++ b/trunk/arch/arm/mach-mmp/brownstone.c
@@ -177,16 +177,9 @@ static struct i2c_board_info brownstone_twsi1_info[] = {
 };
 
 static struct sdhci_pxa_platdata mmp2_sdh_platdata_mmc0 = {
-	.clk_delay_cycles = 0x1f,
+	.max_speed	= 25000000,
 };
 
-static struct sdhci_pxa_platdata mmp2_sdh_platdata_mmc2 = {
-	.clk_delay_cycles = 0x1f,
-	.flags = PXA_FLAG_CARD_PERMANENT
-		| PXA_FLAG_SD_8_BIT_CAPABLE_SLOT,
-};
-
-
 static void __init brownstone_init(void)
 {
 	mfp_config(ARRAY_AND_SIZE(brownstone_pin_config));
@@ -196,7 +189,6 @@ static void __init brownstone_init(void)
 	mmp2_add_uart(3);
 	mmp2_add_twsi(1, NULL, ARRAY_AND_SIZE(brownstone_twsi1_info));
 	mmp2_add_sdhost(0, &mmp2_sdh_platdata_mmc0); /* SD/MMC */
-	mmp2_add_sdhost(2, &mmp2_sdh_platdata_mmc2); /* eMMC */
 
 	/* enable 5v regulator */
 	platform_device_register(&brownstone_v_5vp_device);
diff --git a/trunk/arch/arm/mach-mmp/include/mach/mmp2.h b/trunk/arch/arm/mach-mmp/include/mach/mmp2.h
index de7b88826ad7..2cbf6df09b82 100644
--- a/trunk/arch/arm/mach-mmp/include/mach/mmp2.h
+++ b/trunk/arch/arm/mach-mmp/include/mach/mmp2.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_MACH_MMP2_H
 #define __ASM_MACH_MMP2_H
 
-#include <linux/platform_data/pxa_sdhci.h>
+#include <plat/sdhci.h>
 
 struct sys_timer;
 
diff --git a/trunk/arch/arm/mach-mmp/jasper.c b/trunk/arch/arm/mach-mmp/jasper.c
index 5d6421d63254..24172a0aad59 100644
--- a/trunk/arch/arm/mach-mmp/jasper.c
+++ b/trunk/arch/arm/mach-mmp/jasper.c
@@ -154,7 +154,7 @@ static struct i2c_board_info jasper_twsi1_info[] = {
 };
 
 static struct sdhci_pxa_platdata mmp2_sdh_platdata_mmc0 = {
-	.clk_delay_cycles = 0x1f,
+	.max_speed	= 25000000,
 };
 
 static void __init jasper_init(void)
diff --git a/trunk/arch/arm/mach-mmp/mmp2.c b/trunk/arch/arm/mach-mmp/mmp2.c
index 079c18861d5c..8e6c3ac7f7c1 100644
--- a/trunk/arch/arm/mach-mmp/mmp2.c
+++ b/trunk/arch/arm/mach-mmp/mmp2.c
@@ -168,10 +168,10 @@ static struct clk_lookup mmp2_clkregs[] = {
 	INIT_CLKREG(&clk_twsi5, "pxa2xx-i2c.4", NULL),
 	INIT_CLKREG(&clk_twsi6, "pxa2xx-i2c.5", NULL),
 	INIT_CLKREG(&clk_nand, "pxa3xx-nand", NULL),
-	INIT_CLKREG(&clk_sdh0, "sdhci-pxav3.0", "PXA-SDHCLK"),
-	INIT_CLKREG(&clk_sdh1, "sdhci-pxav3.1", "PXA-SDHCLK"),
-	INIT_CLKREG(&clk_sdh2, "sdhci-pxav3.2", "PXA-SDHCLK"),
-	INIT_CLKREG(&clk_sdh3, "sdhci-pxav3.3", "PXA-SDHCLK"),
+	INIT_CLKREG(&clk_sdh0, "sdhci-pxa.0", "PXA-SDHCLK"),
+	INIT_CLKREG(&clk_sdh1, "sdhci-pxa.1", "PXA-SDHCLK"),
+	INIT_CLKREG(&clk_sdh2, "sdhci-pxa.2", "PXA-SDHCLK"),
+	INIT_CLKREG(&clk_sdh3, "sdhci-pxa.3", "PXA-SDHCLK"),
 };
 
 static int __init mmp2_init(void)
@@ -222,8 +222,8 @@ MMP2_DEVICE(twsi4, "pxa2xx-i2c", 3, TWSI4, 0xd4033000, 0x70);
 MMP2_DEVICE(twsi5, "pxa2xx-i2c", 4, TWSI5, 0xd4033800, 0x70);
 MMP2_DEVICE(twsi6, "pxa2xx-i2c", 5, TWSI6, 0xd4034000, 0x70);
 MMP2_DEVICE(nand, "pxa3xx-nand", -1, NAND, 0xd4283000, 0x100, 28, 29);
-MMP2_DEVICE(sdh0, "sdhci-pxav3", 0, MMC, 0xd4280000, 0x120);
-MMP2_DEVICE(sdh1, "sdhci-pxav3", 1, MMC2, 0xd4280800, 0x120);
-MMP2_DEVICE(sdh2, "sdhci-pxav3", 2, MMC3, 0xd4281000, 0x120);
-MMP2_DEVICE(sdh3, "sdhci-pxav3", 3, MMC4, 0xd4281800, 0x120);
+MMP2_DEVICE(sdh0, "sdhci-pxa", 0, MMC, 0xd4280000, 0x120);
+MMP2_DEVICE(sdh1, "sdhci-pxa", 1, MMC2, 0xd4280800, 0x120);
+MMP2_DEVICE(sdh2, "sdhci-pxa", 2, MMC3, 0xd4281000, 0x120);
+MMP2_DEVICE(sdh3, "sdhci-pxa", 3, MMC4, 0xd4281800, 0x120);
 
diff --git a/trunk/arch/arm/plat-pxa/include/plat/sdhci.h b/trunk/arch/arm/plat-pxa/include/plat/sdhci.h
new file mode 100644
index 000000000000..1ab332e37d7d
--- /dev/null
+++ b/trunk/arch/arm/plat-pxa/include/plat/sdhci.h
@@ -0,0 +1,35 @@
+/* linux/arch/arm/plat-pxa/include/plat/sdhci.h
+ *
+ * Copyright 2010 Marvell
+ *	Zhangfei Gao <zhangfei.gao@marvell.com>
+ *
+ * PXA Platform - SDHCI platform data definitions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __PLAT_PXA_SDHCI_H
+#define __PLAT_PXA_SDHCI_H
+
+/* pxa specific flag */
+/* Require clock free running */
+#define PXA_FLAG_DISABLE_CLOCK_GATING (1<<0)
+
+/* Board design supports 8-bit data on SD/SDIO BUS */
+#define PXA_FLAG_SD_8_BIT_CAPABLE_SLOT (1<<2)
+
+/*
+ * struct pxa_sdhci_platdata() - Platform device data for PXA SDHCI
+ * @max_speed: the maximum speed supported
+ * @quirks: quirks of specific device
+ * @flags: flags for platform requirement
+ */
+struct sdhci_pxa_platdata {
+	unsigned int	max_speed;
+	unsigned int	quirks;
+	unsigned int	flags;
+};
+
+#endif /* __PLAT_PXA_SDHCI_H */
diff --git a/trunk/arch/x86/include/asm/xen/pci.h b/trunk/arch/x86/include/asm/xen/pci.h
index 968d57dd54c9..4fbda9a3f339 100644
--- a/trunk/arch/x86/include/asm/xen/pci.h
+++ b/trunk/arch/x86/include/asm/xen/pci.h
@@ -14,14 +14,13 @@ static inline int pci_xen_hvm_init(void)
 }
 #endif
 #if defined(CONFIG_XEN_DOM0)
-int __init pci_xen_initial_domain(void);
+void __init xen_setup_pirqs(void);
 int xen_find_device_domain_owner(struct pci_dev *dev);
 int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
 int xen_unregister_device_domain_owner(struct pci_dev *dev);
 #else
-static inline int __init pci_xen_initial_domain(void)
+static inline void __init xen_setup_pirqs(void)
 {
-	return -1;
 }
 static inline int xen_find_device_domain_owner(struct pci_dev *dev)
 {
diff --git a/trunk/arch/x86/pci/xen.c b/trunk/arch/x86/pci/xen.c
index 1017c7bee388..f567965c0620 100644
--- a/trunk/arch/x86/pci/xen.c
+++ b/trunk/arch/x86/pci/xen.c
@@ -1,13 +1,8 @@
 /*
- * Xen PCI - handle PCI (INTx) and MSI infrastructure calls for PV, HVM and
- * initial domain support. We also handle the DSDT _PRT callbacks for GSI's
- * used in HVM and initial domain mode (PV does not parse ACPI, so it has no
- * concept of GSIs). Under PV we hook under the pnbbios API for IRQs and
- * 0xcf8 PCI configuration read/write.
+ * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux
+ *			   x86 PCI core to support the Xen PCI Frontend
  *
  *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
- *           Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
- *           Stefano Stabellini <stefano.stabellini@eu.citrix.com>
  */
 #include <linux/module.h>
 #include <linux/init.h>
@@ -24,53 +19,22 @@
 #include <xen/events.h>
 #include <asm/xen/pci.h>
 
-static int xen_pcifront_enable_irq(struct pci_dev *dev)
-{
-	int rc;
-	int share = 1;
-	int pirq;
-	u8 gsi;
-
-	rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
-	if (rc < 0) {
-		dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
-			 rc);
-		return rc;
-	}
-	/* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/
-	pirq = gsi;
-
-	if (gsi < NR_IRQS_LEGACY)
-		share = 0;
-
-	rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront");
-	if (rc < 0) {
-		dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n",
-			 gsi, pirq, rc);
-		return rc;
-	}
-
-	dev->irq = rc;
-	dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
-	return 0;
-}
-
 #ifdef CONFIG_ACPI
-static int xen_register_pirq(u32 gsi, int gsi_override, int triggering,
-			     bool set_pirq)
+static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
+				 int trigger, int polarity)
 {
-	int rc, pirq = -1, irq = -1;
+	int rc, irq;
 	struct physdev_map_pirq map_irq;
 	int shareable = 0;
 	char *name;
 
-	if (set_pirq)
-		pirq = gsi;
+	if (!xen_hvm_domain())
+		return -1;
 
 	map_irq.domid = DOMID_SELF;
 	map_irq.type = MAP_PIRQ_TYPE_GSI;
 	map_irq.index = gsi;
-	map_irq.pirq = pirq;
+	map_irq.pirq = -1;
 
 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
 	if (rc) {
@@ -78,7 +42,7 @@ static int xen_register_pirq(u32 gsi, int gsi_override, int triggering,
 		return -1;
 	}
 
-	if (triggering == ACPI_EDGE_SENSITIVE) {
+	if (trigger == ACPI_EDGE_SENSITIVE) {
 		shareable = 0;
 		name = "ioapic-edge";
 	} else {
@@ -86,63 +50,12 @@ static int xen_register_pirq(u32 gsi, int gsi_override, int triggering,
 		name = "ioapic-level";
 	}
 
-	if (gsi_override >= 0)
-		gsi = gsi_override;
-
 	irq = xen_bind_pirq_gsi_to_irq(gsi, map_irq.pirq, shareable, name);
-	if (irq < 0)
-		goto out;
-
-	printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", map_irq.pirq, irq, gsi);
-out:
-	return irq;
-}
-
-static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
-				     int trigger, int polarity)
-{
-	if (!xen_hvm_domain())
-		return -1;
 
-	return xen_register_pirq(gsi, -1 /* no GSI override */, trigger,
-				 false /* no mapping of GSI to PIRQ */);
-}
-
-#ifdef CONFIG_XEN_DOM0
-static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int polarity)
-{
-	int rc, irq;
-	struct physdev_setup_gsi setup_gsi;
-
-	if (!xen_pv_domain())
-		return -1;
-
-	printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
-			gsi, triggering, polarity);
-
-	irq = xen_register_pirq(gsi, gsi_override, triggering, true);
-
-	setup_gsi.gsi = gsi;
-	setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1);
-	setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
-
-	rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
-	if (rc == -EEXIST)
-		printk(KERN_INFO "Already setup the GSI :%d\n", gsi);
-	else if (rc) {
-		printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n",
-				gsi, rc);
-	}
+	printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
 
 	return irq;
 }
-
-static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
-				 int trigger, int polarity)
-{
-	return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, polarity);
-}
-#endif
 #endif
 
 #if defined(CONFIG_PCI_MSI)
@@ -152,43 +65,6 @@ static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
 struct xen_pci_frontend_ops *xen_pci_frontend;
 EXPORT_SYMBOL_GPL(xen_pci_frontend);
 
-static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-	int irq, ret, i;
-	struct msi_desc *msidesc;
-	int *v;
-
-	v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL);
-	if (!v)
-		return -ENOMEM;
-
-	if (type == PCI_CAP_ID_MSIX)
-		ret = xen_pci_frontend_enable_msix(dev, v, nvec);
-	else
-		ret = xen_pci_frontend_enable_msi(dev, v);
-	if (ret)
-		goto error;
-	i = 0;
-	list_for_each_entry(msidesc, &dev->msi_list, list) {
-		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
-					       (type == PCI_CAP_ID_MSIX) ?
-					       "pcifront-msi-x" :
-					       "pcifront-msi",
-						DOMID_SELF);
-		if (irq < 0)
-			goto free;
-		i++;
-	}
-	kfree(v);
-	return 0;
-
-error:
-	dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
-free:
-	kfree(v);
-	return ret;
-}
-
 #define XEN_PIRQ_MSI_DATA  (MSI_DATA_TRIGGER_EDGE | \
 		MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0))
 
@@ -247,6 +123,67 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	return -ENODEV;
 }
 
+/*
+ * For MSI interrupts we have to use drivers/xen/event.s functions to
+ * allocate an irq_desc and setup the right */
+
+
+static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	int irq, ret, i;
+	struct msi_desc *msidesc;
+	int *v;
+
+	v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL);
+	if (!v)
+		return -ENOMEM;
+
+	if (type == PCI_CAP_ID_MSIX)
+		ret = xen_pci_frontend_enable_msix(dev, v, nvec);
+	else
+		ret = xen_pci_frontend_enable_msi(dev, v);
+	if (ret)
+		goto error;
+	i = 0;
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
+					       (type == PCI_CAP_ID_MSIX) ?
+					       "pcifront-msi-x" :
+					       "pcifront-msi",
+						DOMID_SELF);
+		if (irq < 0)
+			goto free;
+		i++;
+	}
+	kfree(v);
+	return 0;
+
+error:
+	dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+free:
+	kfree(v);
+	return ret;
+}
+
+static void xen_teardown_msi_irqs(struct pci_dev *dev)
+{
+	struct msi_desc *msidesc;
+
+	msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
+	if (msidesc->msi_attrib.is_msix)
+		xen_pci_frontend_disable_msix(dev);
+	else
+		xen_pci_frontend_disable_msi(dev);
+
+	/* Free the IRQ's and the msidesc using the generic code. */
+	default_teardown_msi_irqs(dev);
+}
+
+static void xen_teardown_msi_irq(unsigned int irq)
+{
+	xen_destroy_irq(irq);
+}
+
 #ifdef CONFIG_XEN_DOM0
 static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
@@ -305,27 +242,44 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	return ret;
 }
 #endif
+#endif
 
-static void xen_teardown_msi_irqs(struct pci_dev *dev)
+static int xen_pcifront_enable_irq(struct pci_dev *dev)
 {
-	struct msi_desc *msidesc;
+	int rc;
+	int share = 1;
+	int pirq;
+	u8 gsi;
 
-	msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
-	if (msidesc->msi_attrib.is_msix)
-		xen_pci_frontend_disable_msix(dev);
-	else
-		xen_pci_frontend_disable_msi(dev);
+	rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
+	if (rc < 0) {
+		dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
+			 rc);
+		return rc;
+	}
 
-	/* Free the IRQ's and the msidesc using the generic code. */
-	default_teardown_msi_irqs(dev);
-}
+	rc = xen_allocate_pirq_gsi(gsi);
+	if (rc < 0) {
+		dev_warn(&dev->dev, "Xen PCI: failed to allocate a PIRQ for GSI%d: %d\n",
+			 gsi, rc);
+		return rc;
+	}
+	pirq = rc;
 
-static void xen_teardown_msi_irq(unsigned int irq)
-{
-	xen_destroy_irq(irq);
-}
+	if (gsi < NR_IRQS_LEGACY)
+		share = 0;
 
-#endif
+	rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront");
+	if (rc < 0) {
+		dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n",
+			 gsi, pirq, rc);
+		return rc;
+	}
+
+	dev->irq = rc;
+	dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
+	return 0;
+}
 
 int __init pci_xen_init(void)
 {
@@ -373,6 +327,79 @@ int __init pci_xen_hvm_init(void)
 }
 
 #ifdef CONFIG_XEN_DOM0
+static int xen_register_pirq(u32 gsi, int gsi_override, int triggering)
+{
+	int rc, pirq, irq = -1;
+	struct physdev_map_pirq map_irq;
+	int shareable = 0;
+	char *name;
+
+	if (!xen_pv_domain())
+		return -1;
+
+	if (triggering == ACPI_EDGE_SENSITIVE) {
+		shareable = 0;
+		name = "ioapic-edge";
+	} else {
+		shareable = 1;
+		name = "ioapic-level";
+	}
+	pirq = xen_allocate_pirq_gsi(gsi);
+	if (pirq < 0)
+		goto out;
+
+	if (gsi_override >= 0)
+		irq = xen_bind_pirq_gsi_to_irq(gsi_override, pirq, shareable, name);
+	else
+		irq = xen_bind_pirq_gsi_to_irq(gsi, pirq, shareable, name);
+	if (irq < 0)
+		goto out;
+
+	printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", pirq, irq, gsi);
+
+	map_irq.domid = DOMID_SELF;
+	map_irq.type = MAP_PIRQ_TYPE_GSI;
+	map_irq.index = gsi;
+	map_irq.pirq = pirq;
+
+	rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+	if (rc) {
+		printk(KERN_WARNING "xen map irq failed %d\n", rc);
+		return -1;
+	}
+
+out:
+	return irq;
+}
+
+static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int polarity)
+{
+	int rc, irq;
+	struct physdev_setup_gsi setup_gsi;
+
+	if (!xen_pv_domain())
+		return -1;
+
+	printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
+			gsi, triggering, polarity);
+
+	irq = xen_register_pirq(gsi, gsi_override, triggering);
+
+	setup_gsi.gsi = gsi;
+	setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1);
+	setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+
+	rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
+	if (rc == -EEXIST)
+		printk(KERN_INFO "Already setup the GSI :%d\n", gsi);
+	else if (rc) {
+		printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n",
+				gsi, rc);
+	}
+
+	return irq;
+}
+
 static __init void xen_setup_acpi_sci(void)
 {
 	int rc;
@@ -392,7 +419,7 @@ static __init void xen_setup_acpi_sci(void)
 	}
 	trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
 	polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
-
+	
 	printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d "
 			"polarity=%d\n", gsi, trigger, polarity);
 
@@ -407,9 +434,10 @@ static __init void xen_setup_acpi_sci(void)
 	 * the ACPI interpreter and keels over since IRQ 9 has not been
 	 * setup as we had setup IRQ 20 for it).
 	 */
+	/* Check whether the GSI != IRQ */
 	if (acpi_gsi_to_irq(gsi, &irq) == 0) {
-		/* Use the provided value if it's valid. */
-		if (irq >= 0)
+		if (irq >= 0 && irq != gsi)
+			/* Bugger, we MUST have that IRQ. */
 			gsi_override = irq;
 	}
 
@@ -419,16 +447,41 @@ static __init void xen_setup_acpi_sci(void)
 	return;
 }
 
-int __init pci_xen_initial_domain(void)
+static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
+				 int trigger, int polarity)
 {
-	int irq;
+	return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, polarity);
+}
 
+static int __init pci_xen_initial_domain(void)
+{
 #ifdef CONFIG_PCI_MSI
 	x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
 	x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
 #endif
 	xen_setup_acpi_sci();
 	__acpi_register_gsi = acpi_register_gsi_xen;
+
+	return 0;
+}
+
+void __init xen_setup_pirqs(void)
+{
+	int pirq, irq;
+
+	pci_xen_initial_domain();
+
+	if (0 == nr_ioapics) {
+		for (irq = 0; irq < NR_IRQS_LEGACY; irq++) {
+			pirq = xen_allocate_pirq_gsi(irq);
+			if (WARN(pirq < 0,
+				 "Could not allocate PIRQ for legacy interrupt\n"))
+				break;
+			irq = xen_bind_pirq_gsi_to_irq(irq, pirq, 0, "xt-pic");
+		}
+		return;
+	}
+
 	/* Pre-allocate legacy irqs */
 	for (irq = 0; irq < NR_IRQS_LEGACY; irq++) {
 		int trigger, polarity;
@@ -437,16 +490,12 @@ int __init pci_xen_initial_domain(void)
 			continue;
 
 		xen_register_pirq(irq, -1 /* no GSI override */,
-			trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE,
-			true /* Map GSI to PIRQ */);
+			trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE);
 	}
-	if (0 == nr_ioapics) {
-		for (irq = 0; irq < NR_IRQS_LEGACY; irq++)
-			xen_bind_pirq_gsi_to_irq(irq, irq, 0, "xt-pic");
-	}
-	return 0;
 }
+#endif
 
+#ifdef CONFIG_XEN_DOM0
 struct xen_device_domain_owner {
 	domid_t domain;
 	struct pci_dev *dev;
diff --git a/trunk/arch/x86/xen/Makefile b/trunk/arch/x86/xen/Makefile
index a6575b949b11..17c565de3d64 100644
--- a/trunk/arch/x86/xen/Makefile
+++ b/trunk/arch/x86/xen/Makefile
@@ -18,5 +18,5 @@ obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
 obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
-obj-$(CONFIG_XEN_DOM0)		+= vga.o
+
 obj-$(CONFIG_SWIOTLB_XEN)	+= pci-swiotlb-xen.o
diff --git a/trunk/arch/x86/xen/enlighten.c b/trunk/arch/x86/xen/enlighten.c
index 53257421082b..5525163a0398 100644
--- a/trunk/arch/x86/xen/enlighten.c
+++ b/trunk/arch/x86/xen/enlighten.c
@@ -1248,14 +1248,6 @@ asmlinkage void __init xen_start_kernel(void)
 		if (pci_xen)
 			x86_init.pci.arch_init = pci_xen_init;
 	} else {
-		const struct dom0_vga_console_info *info =
-			(void *)((char *)xen_start_info +
-				 xen_start_info->console.dom0.info_off);
-
-		xen_init_vga(info, xen_start_info->console.dom0.info_size);
-		xen_start_info->console.domU.mfn = 0;
-		xen_start_info->console.domU.evtchn = 0;
-
 		/* Make sure ACS will be enabled */
 		pci_request_acs();
 	}
diff --git a/trunk/arch/x86/xen/platform-pci-unplug.c b/trunk/arch/x86/xen/platform-pci-unplug.c
index ffcf2615640b..25c52f94a27c 100644
--- a/trunk/arch/x86/xen/platform-pci-unplug.c
+++ b/trunk/arch/x86/xen/platform-pci-unplug.c
@@ -35,7 +35,7 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
 #ifdef CONFIG_XEN_PVHVM
 static int xen_emul_unplug;
 
-static int check_platform_magic(void)
+static int __init check_platform_magic(void)
 {
 	short magic;
 	char protocol;
diff --git a/trunk/arch/x86/xen/vga.c b/trunk/arch/x86/xen/vga.c
deleted file mode 100644
index 1cd7f4d11e29..000000000000
--- a/trunk/arch/x86/xen/vga.c
+++ /dev/null
@@ -1,67 +0,0 @@
-#include <linux/screen_info.h>
-#include <linux/init.h>
-
-#include <asm/bootparam.h>
-#include <asm/setup.h>
-
-#include <xen/interface/xen.h>
-
-#include "xen-ops.h"
-
-void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
-{
-	struct screen_info *screen_info = &boot_params.screen_info;
-
-	/* This is drawn from a dump from vgacon:startup in
-	 * standard Linux. */
-	screen_info->orig_video_mode = 3;
-	screen_info->orig_video_isVGA = 1;
-	screen_info->orig_video_lines = 25;
-	screen_info->orig_video_cols = 80;
-	screen_info->orig_video_ega_bx = 3;
-	screen_info->orig_video_points = 16;
-	screen_info->orig_y = screen_info->orig_video_lines - 1;
-
-	switch (info->video_type) {
-	case XEN_VGATYPE_TEXT_MODE_3:
-		if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3)
-		    + sizeof(info->u.text_mode_3))
-			break;
-		screen_info->orig_video_lines = info->u.text_mode_3.rows;
-		screen_info->orig_video_cols = info->u.text_mode_3.columns;
-		screen_info->orig_x = info->u.text_mode_3.cursor_x;
-		screen_info->orig_y = info->u.text_mode_3.cursor_y;
-		screen_info->orig_video_points =
-			info->u.text_mode_3.font_height;
-		break;
-
-	case XEN_VGATYPE_VESA_LFB:
-		if (size < offsetof(struct dom0_vga_console_info,
-				    u.vesa_lfb.gbl_caps))
-			break;
-		screen_info->orig_video_isVGA = VIDEO_TYPE_VLFB;
-		screen_info->lfb_width = info->u.vesa_lfb.width;
-		screen_info->lfb_height = info->u.vesa_lfb.height;
-		screen_info->lfb_depth = info->u.vesa_lfb.bits_per_pixel;
-		screen_info->lfb_base = info->u.vesa_lfb.lfb_base;
-		screen_info->lfb_size = info->u.vesa_lfb.lfb_size;
-		screen_info->lfb_linelength = info->u.vesa_lfb.bytes_per_line;
-		screen_info->red_size = info->u.vesa_lfb.red_size;
-		screen_info->red_pos = info->u.vesa_lfb.red_pos;
-		screen_info->green_size = info->u.vesa_lfb.green_size;
-		screen_info->green_pos = info->u.vesa_lfb.green_pos;
-		screen_info->blue_size = info->u.vesa_lfb.blue_size;
-		screen_info->blue_pos = info->u.vesa_lfb.blue_pos;
-		screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size;
-		screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos;
-		if (size >= offsetof(struct dom0_vga_console_info,
-				     u.vesa_lfb.gbl_caps)
-		    + sizeof(info->u.vesa_lfb.gbl_caps))
-			screen_info->capabilities = info->u.vesa_lfb.gbl_caps;
-		if (size >= offsetof(struct dom0_vga_console_info,
-				     u.vesa_lfb.mode_attrs)
-		    + sizeof(info->u.vesa_lfb.mode_attrs))
-			screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs;
-		break;
-	}
-}
diff --git a/trunk/arch/x86/xen/xen-ops.h b/trunk/arch/x86/xen/xen-ops.h
index b095739ccd4c..97dfdc8757b3 100644
--- a/trunk/arch/x86/xen/xen-ops.h
+++ b/trunk/arch/x86/xen/xen-ops.h
@@ -88,17 +88,6 @@ static inline void xen_uninit_lock_cpu(int cpu)
 }
 #endif
 
-struct dom0_vga_console_info;
-
-#ifdef CONFIG_XEN_DOM0
-void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
-#else
-static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
-				       size_t size)
-{
-}
-#endif
-
 /* Declare an asm function, along with symbols needed to make it
    inlineable */
 #define DECL_ASM(ret, name, ...)		\
diff --git a/trunk/drivers/block/xen-blkback/xenbus.c b/trunk/drivers/block/xen-blkback/xenbus.c
index 3f129b45451a..6cc0db1bf522 100644
--- a/trunk/drivers/block/xen-blkback/xenbus.c
+++ b/trunk/drivers/block/xen-blkback/xenbus.c
@@ -684,7 +684,7 @@ static void connect(struct backend_info *be)
 
 	err = xenbus_switch_state(dev, XenbusStateConnected);
 	if (err)
-		xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
+		xenbus_dev_fatal(dev, err, "switching to Connected state",
 				 dev->nodename);
 
 	return;
diff --git a/trunk/drivers/mmc/card/block.c b/trunk/drivers/mmc/card/block.c
index 1ff5486213fb..f85e42224559 100644
--- a/trunk/drivers/mmc/card/block.c
+++ b/trunk/drivers/mmc/card/block.c
@@ -106,16 +106,6 @@ struct mmc_blk_data {
 
 static DEFINE_MUTEX(open_lock);
 
-enum mmc_blk_status {
-	MMC_BLK_SUCCESS = 0,
-	MMC_BLK_PARTIAL,
-	MMC_BLK_RETRY,
-	MMC_BLK_RETRY_SINGLE,
-	MMC_BLK_DATA_ERR,
-	MMC_BLK_CMD_ERR,
-	MMC_BLK_ABORT,
-};
-
 module_param(perdev_minors, int, 0444);
 MODULE_PARM_DESC(perdev_minors, "Minors numbers to allocate per device");
 
@@ -437,6 +427,14 @@ static const struct block_device_operations mmc_bdops = {
 #endif
 };
 
+struct mmc_blk_request {
+	struct mmc_request	mrq;
+	struct mmc_command	sbc;
+	struct mmc_command	cmd;
+	struct mmc_command	stop;
+	struct mmc_data		data;
+};
+
 static inline int mmc_blk_part_switch(struct mmc_card *card,
 				      struct mmc_blk_data *md)
 {
@@ -527,20 +525,7 @@ static u32 mmc_sd_num_wr_blocks(struct mmc_card *card)
 	return result;
 }
 
-static int send_stop(struct mmc_card *card, u32 *status)
-{
-	struct mmc_command cmd = {0};
-	int err;
-
-	cmd.opcode = MMC_STOP_TRANSMISSION;
-	cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
-	err = mmc_wait_for_cmd(card->host, &cmd, 5);
-	if (err == 0)
-		*status = cmd.resp[0];
-	return err;
-}
-
-static int get_card_status(struct mmc_card *card, u32 *status, int retries)
+static u32 get_card_status(struct mmc_card *card, struct request *req)
 {
 	struct mmc_command cmd = {0};
 	int err;
@@ -549,141 +534,11 @@ static int get_card_status(struct mmc_card *card, u32 *status, int retries)
 	if (!mmc_host_is_spi(card->host))
 		cmd.arg = card->rca << 16;
 	cmd.flags = MMC_RSP_SPI_R2 | MMC_RSP_R1 | MMC_CMD_AC;
-	err = mmc_wait_for_cmd(card->host, &cmd, retries);
-	if (err == 0)
-		*status = cmd.resp[0];
-	return err;
-}
-
-#define ERR_RETRY	2
-#define ERR_ABORT	1
-#define ERR_CONTINUE	0
-
-static int mmc_blk_cmd_error(struct request *req, const char *name, int error,
-	bool status_valid, u32 status)
-{
-	switch (error) {
-	case -EILSEQ:
-		/* response crc error, retry the r/w cmd */
-		pr_err("%s: %s sending %s command, card status %#x\n",
-			req->rq_disk->disk_name, "response CRC error",
-			name, status);
-		return ERR_RETRY;
-
-	case -ETIMEDOUT:
-		pr_err("%s: %s sending %s command, card status %#x\n",
-			req->rq_disk->disk_name, "timed out", name, status);
-
-		/* If the status cmd initially failed, retry the r/w cmd */
-		if (!status_valid)
-			return ERR_RETRY;
-
-		/*
-		 * If it was a r/w cmd crc error, or illegal command
-		 * (eg, issued in wrong state) then retry - we should
-		 * have corrected the state problem above.
-		 */
-		if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND))
-			return ERR_RETRY;
-
-		/* Otherwise abort the command */
-		return ERR_ABORT;
-
-	default:
-		/* We don't understand the error code the driver gave us */
-		pr_err("%s: unknown error %d sending read/write command, card status %#x\n",
-		       req->rq_disk->disk_name, error, status);
-		return ERR_ABORT;
-	}
-}
-
-/*
- * Initial r/w and stop cmd error recovery.
- * We don't know whether the card received the r/w cmd or not, so try to
- * restore things back to a sane state.  Essentially, we do this as follows:
- * - Obtain card status.  If the first attempt to obtain card status fails,
- *   the status word will reflect the failed status cmd, not the failed
- *   r/w cmd.  If we fail to obtain card status, it suggests we can no
- *   longer communicate with the card.
- * - Check the card state.  If the card received the cmd but there was a
- *   transient problem with the response, it might still be in a data transfer
- *   mode.  Try to send it a stop command.  If this fails, we can't recover.
- * - If the r/w cmd failed due to a response CRC error, it was probably
- *   transient, so retry the cmd.
- * - If the r/w cmd timed out, but we didn't get the r/w cmd status, retry.
- * - If the r/w cmd timed out, and the r/w cmd failed due to CRC error or
- *   illegal cmd, retry.
- * Otherwise we don't understand what happened, so abort.
- */
-static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req,
-	struct mmc_blk_request *brq)
-{
-	bool prev_cmd_status_valid = true;
-	u32 status, stop_status = 0;
-	int err, retry;
-
-	/*
-	 * Try to get card status which indicates both the card state
-	 * and why there was no response.  If the first attempt fails,
-	 * we can't be sure the returned status is for the r/w command.
-	 */
-	for (retry = 2; retry >= 0; retry--) {
-		err = get_card_status(card, &status, 0);
-		if (!err)
-			break;
-
-		prev_cmd_status_valid = false;
-		pr_err("%s: error %d sending status command, %sing\n",
-		       req->rq_disk->disk_name, err, retry ? "retry" : "abort");
-	}
-
-	/* We couldn't get a response from the card.  Give up. */
+	err = mmc_wait_for_cmd(card->host, &cmd, 0);
 	if (err)
-		return ERR_ABORT;
-
-	/*
-	 * Check the current card state.  If it is in some data transfer
-	 * mode, tell it to stop (and hopefully transition back to TRAN.)
-	 */
-	if (R1_CURRENT_STATE(status) == R1_STATE_DATA ||
-	    R1_CURRENT_STATE(status) == R1_STATE_RCV) {
-		err = send_stop(card, &stop_status);
-		if (err)
-			pr_err("%s: error %d sending stop command\n",
-			       req->rq_disk->disk_name, err);
-
-		/*
-		 * If the stop cmd also timed out, the card is probably
-		 * not present, so abort.  Other errors are bad news too.
-		 */
-		if (err)
-			return ERR_ABORT;
-	}
-
-	/* Check for set block count errors */
-	if (brq->sbc.error)
-		return mmc_blk_cmd_error(req, "SET_BLOCK_COUNT", brq->sbc.error,
-				prev_cmd_status_valid, status);
-
-	/* Check for r/w command errors */
-	if (brq->cmd.error)
-		return mmc_blk_cmd_error(req, "r/w cmd", brq->cmd.error,
-				prev_cmd_status_valid, status);
-
-	/* Now for stop errors.  These aren't fatal to the transfer. */
-	pr_err("%s: error %d sending stop command, original cmd response %#x, card status %#x\n",
-	       req->rq_disk->disk_name, brq->stop.error,
-	       brq->cmd.resp[0], status);
-
-	/*
-	 * Subsitute in our own stop status as this will give the error
-	 * state which happened during the execution of the r/w command.
-	 */
-	if (stop_status) {
-		brq->stop.resp[0] = stop_status;
-		brq->stop.error = 0;
-	}
-	return ERR_CONTINUE;
+		printk(KERN_ERR "%s: error %d sending status command",
+		       req->rq_disk->disk_name, err);
+	return cmd.resp[0];
 }
 
 static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
@@ -814,114 +669,12 @@ static inline void mmc_apply_rel_rw(struct mmc_blk_request *brq,
 	}
 }
 
-#define CMD_ERRORS							\
-	(R1_OUT_OF_RANGE |	/* Command argument out of range */	\
-	 R1_ADDRESS_ERROR |	/* Misaligned address */		\
-	 R1_BLOCK_LEN_ERROR |	/* Transferred block length incorrect */\
-	 R1_WP_VIOLATION |	/* Tried to write to protected block */	\
-	 R1_CC_ERROR |		/* Card controller error */		\
-	 R1_ERROR)		/* General/unknown error */
-
-static int mmc_blk_err_check(struct mmc_card *card,
-			     struct mmc_async_req *areq)
-{
-	enum mmc_blk_status ret = MMC_BLK_SUCCESS;
-	struct mmc_queue_req *mq_mrq = container_of(areq, struct mmc_queue_req,
-						    mmc_active);
-	struct mmc_blk_request *brq = &mq_mrq->brq;
-	struct request *req = mq_mrq->req;
-
-	/*
-	 * sbc.error indicates a problem with the set block count
-	 * command.  No data will have been transferred.
-	 *
-	 * cmd.error indicates a problem with the r/w command.  No
-	 * data will have been transferred.
-	 *
-	 * stop.error indicates a problem with the stop command.  Data
-	 * may have been transferred, or may still be transferring.
-	 */
-	if (brq->sbc.error || brq->cmd.error || brq->stop.error) {
-		switch (mmc_blk_cmd_recovery(card, req, brq)) {
-		case ERR_RETRY:
-			return MMC_BLK_RETRY;
-		case ERR_ABORT:
-			return MMC_BLK_ABORT;
-		case ERR_CONTINUE:
-			break;
-		}
-	}
-
-	/*
-	 * Check for errors relating to the execution of the
-	 * initial command - such as address errors.  No data
-	 * has been transferred.
-	 */
-	if (brq->cmd.resp[0] & CMD_ERRORS) {
-		pr_err("%s: r/w command failed, status = %#x\n",
-		       req->rq_disk->disk_name, brq->cmd.resp[0]);
-		return MMC_BLK_ABORT;
-	}
-
-	/*
-	 * Everything else is either success, or a data error of some
-	 * kind.  If it was a write, we may have transitioned to
-	 * program mode, which we have to wait for it to complete.
-	 */
-	if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) {
-		u32 status;
-		do {
-			int err = get_card_status(card, &status, 5);
-			if (err) {
-				printk(KERN_ERR "%s: error %d requesting status\n",
-				       req->rq_disk->disk_name, err);
-				return MMC_BLK_CMD_ERR;
-			}
-			/*
-			 * Some cards mishandle the status bits,
-			 * so make sure to check both the busy
-			 * indication and the card state.
-			 */
-		} while (!(status & R1_READY_FOR_DATA) ||
-			 (R1_CURRENT_STATE(status) == R1_STATE_PRG));
-	}
-
-	if (brq->data.error) {
-		pr_err("%s: error %d transferring data, sector %u, nr %u, cmd response %#x, card status %#x\n",
-		       req->rq_disk->disk_name, brq->data.error,
-		       (unsigned)blk_rq_pos(req),
-		       (unsigned)blk_rq_sectors(req),
-		       brq->cmd.resp[0], brq->stop.resp[0]);
-
-		if (rq_data_dir(req) == READ) {
-			if (brq->data.blocks > 1) {
-				/* Redo read one sector at a time */
-				pr_warning("%s: retrying using single block read\n",
-					   req->rq_disk->disk_name);
-				return MMC_BLK_RETRY_SINGLE;
-			}
-			return MMC_BLK_DATA_ERR;
-		} else {
-			return MMC_BLK_CMD_ERR;
-		}
-	}
-
-	if (ret == MMC_BLK_SUCCESS &&
-	    blk_rq_bytes(req) != brq->data.bytes_xfered)
-		ret = MMC_BLK_PARTIAL;
-
-	return ret;
-}
-
-static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
-			       struct mmc_card *card,
-			       int disable_multi,
-			       struct mmc_queue *mq)
+static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 {
-	u32 readcmd, writecmd;
-	struct mmc_blk_request *brq = &mqrq->brq;
-	struct request *req = mqrq->req;
 	struct mmc_blk_data *md = mq->data;
+	struct mmc_card *card = md->queue.card;
+	struct mmc_blk_request brq;
+	int ret = 1, disable_multi = 0;
 
 	/*
 	 * Reliable writes are used to implement Forced Unit Access and
@@ -932,206 +685,224 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 		(rq_data_dir(req) == WRITE) &&
 		(md->flags & MMC_BLK_REL_WR);
 
-	memset(brq, 0, sizeof(struct mmc_blk_request));
-	brq->mrq.cmd = &brq->cmd;
-	brq->mrq.data = &brq->data;
+	do {
+		struct mmc_command cmd = {0};
+		u32 readcmd, writecmd, status = 0;
+
+		memset(&brq, 0, sizeof(struct mmc_blk_request));
+		brq.mrq.cmd = &brq.cmd;
+		brq.mrq.data = &brq.data;
+
+		brq.cmd.arg = blk_rq_pos(req);
+		if (!mmc_card_blockaddr(card))
+			brq.cmd.arg <<= 9;
+		brq.cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
+		brq.data.blksz = 512;
+		brq.stop.opcode = MMC_STOP_TRANSMISSION;
+		brq.stop.arg = 0;
+		brq.stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
+		brq.data.blocks = blk_rq_sectors(req);
 
-	brq->cmd.arg = blk_rq_pos(req);
-	if (!mmc_card_blockaddr(card))
-		brq->cmd.arg <<= 9;
-	brq->cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
-	brq->data.blksz = 512;
-	brq->stop.opcode = MMC_STOP_TRANSMISSION;
-	brq->stop.arg = 0;
-	brq->stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
-	brq->data.blocks = blk_rq_sectors(req);
+		/*
+		 * The block layer doesn't support all sector count
+		 * restrictions, so we need to be prepared for too big
+		 * requests.
+		 */
+		if (brq.data.blocks > card->host->max_blk_count)
+			brq.data.blocks = card->host->max_blk_count;
 
-	/*
-	 * The block layer doesn't support all sector count
-	 * restrictions, so we need to be prepared for too big
-	 * requests.
-	 */
-	if (brq->data.blocks > card->host->max_blk_count)
-		brq->data.blocks = card->host->max_blk_count;
+		/*
+		 * After a read error, we redo the request one sector at a time
+		 * in order to accurately determine which sectors can be read
+		 * successfully.
+		 */
+		if (disable_multi && brq.data.blocks > 1)
+			brq.data.blocks = 1;
 
-	/*
-	 * After a read error, we redo the request one sector at a time
-	 * in order to accurately determine which sectors can be read
-	 * successfully.
-	 */
-	if (disable_multi && brq->data.blocks > 1)
-		brq->data.blocks = 1;
+		if (brq.data.blocks > 1 || do_rel_wr) {
+			/* SPI multiblock writes terminate using a special
+			 * token, not a STOP_TRANSMISSION request.
+			 */
+			if (!mmc_host_is_spi(card->host) ||
+			    rq_data_dir(req) == READ)
+				brq.mrq.stop = &brq.stop;
+			readcmd = MMC_READ_MULTIPLE_BLOCK;
+			writecmd = MMC_WRITE_MULTIPLE_BLOCK;
+		} else {
+			brq.mrq.stop = NULL;
+			readcmd = MMC_READ_SINGLE_BLOCK;
+			writecmd = MMC_WRITE_BLOCK;
+		}
+		if (rq_data_dir(req) == READ) {
+			brq.cmd.opcode = readcmd;
+			brq.data.flags |= MMC_DATA_READ;
+		} else {
+			brq.cmd.opcode = writecmd;
+			brq.data.flags |= MMC_DATA_WRITE;
+		}
 
-	if (brq->data.blocks > 1 || do_rel_wr) {
-		/* SPI multiblock writes terminate using a special
-		 * token, not a STOP_TRANSMISSION request.
+		if (do_rel_wr)
+			mmc_apply_rel_rw(&brq, card, req);
+
+		/*
+		 * Pre-defined multi-block transfers are preferable to
+		 * open ended-ones (and necessary for reliable writes).
+		 * However, it is not sufficient to just send CMD23,
+		 * and avoid the final CMD12, as on an error condition
+		 * CMD12 (stop) needs to be sent anyway. This, coupled
+		 * with Auto-CMD23 enhancements provided by some
+		 * hosts, means that the complexity of dealing
+		 * with this is best left to the host. If CMD23 is
+		 * supported by card and host, we'll fill sbc in and let
+		 * the host deal with handling it correctly. This means
+		 * that for hosts that don't expose MMC_CAP_CMD23, no
+		 * change of behavior will be observed.
+		 *
+		 * N.B: Some MMC cards experience perf degradation.
+		 * We'll avoid using CMD23-bounded multiblock writes for
+		 * these, while retaining features like reliable writes.
 		 */
-		if (!mmc_host_is_spi(card->host) ||
-		    rq_data_dir(req) == READ)
-			brq->mrq.stop = &brq->stop;
-		readcmd = MMC_READ_MULTIPLE_BLOCK;
-		writecmd = MMC_WRITE_MULTIPLE_BLOCK;
-	} else {
-		brq->mrq.stop = NULL;
-		readcmd = MMC_READ_SINGLE_BLOCK;
-		writecmd = MMC_WRITE_BLOCK;
-	}
-	if (rq_data_dir(req) == READ) {
-		brq->cmd.opcode = readcmd;
-		brq->data.flags |= MMC_DATA_READ;
-	} else {
-		brq->cmd.opcode = writecmd;
-		brq->data.flags |= MMC_DATA_WRITE;
-	}
 
-	if (do_rel_wr)
-		mmc_apply_rel_rw(brq, card, req);
+		if ((md->flags & MMC_BLK_CMD23) &&
+		    mmc_op_multi(brq.cmd.opcode) &&
+		    (do_rel_wr || !(card->quirks & MMC_QUIRK_BLK_NO_CMD23))) {
+			brq.sbc.opcode = MMC_SET_BLOCK_COUNT;
+			brq.sbc.arg = brq.data.blocks |
+				(do_rel_wr ? (1 << 31) : 0);
+			brq.sbc.flags = MMC_RSP_R1 | MMC_CMD_AC;
+			brq.mrq.sbc = &brq.sbc;
+		}
 
-	/*
-	 * Pre-defined multi-block transfers are preferable to
-	 * open ended-ones (and necessary for reliable writes).
-	 * However, it is not sufficient to just send CMD23,
-	 * and avoid the final CMD12, as on an error condition
-	 * CMD12 (stop) needs to be sent anyway. This, coupled
-	 * with Auto-CMD23 enhancements provided by some
-	 * hosts, means that the complexity of dealing
-	 * with this is best left to the host. If CMD23 is
-	 * supported by card and host, we'll fill sbc in and let
-	 * the host deal with handling it correctly. This means
-	 * that for hosts that don't expose MMC_CAP_CMD23, no
-	 * change of behavior will be observed.
-	 *
-	 * N.B: Some MMC cards experience perf degradation.
-	 * We'll avoid using CMD23-bounded multiblock writes for
-	 * these, while retaining features like reliable writes.
-	 */
+		mmc_set_data_timeout(&brq.data, card);
 
-	if ((md->flags & MMC_BLK_CMD23) &&
-	    mmc_op_multi(brq->cmd.opcode) &&
-	    (do_rel_wr || !(card->quirks & MMC_QUIRK_BLK_NO_CMD23))) {
-		brq->sbc.opcode = MMC_SET_BLOCK_COUNT;
-		brq->sbc.arg = brq->data.blocks |
-			(do_rel_wr ? (1 << 31) : 0);
-		brq->sbc.flags = MMC_RSP_R1 | MMC_CMD_AC;
-		brq->mrq.sbc = &brq->sbc;
-	}
+		brq.data.sg = mq->sg;
+		brq.data.sg_len = mmc_queue_map_sg(mq);
 
-	mmc_set_data_timeout(&brq->data, card);
+		/*
+		 * Adjust the sg list so it is the same size as the
+		 * request.
+		 */
+		if (brq.data.blocks != blk_rq_sectors(req)) {
+			int i, data_size = brq.data.blocks << 9;
+			struct scatterlist *sg;
+
+			for_each_sg(brq.data.sg, sg, brq.data.sg_len, i) {
+				data_size -= sg->length;
+				if (data_size <= 0) {
+					sg->length += data_size;
+					i++;
+					break;
+				}
+			}
+			brq.data.sg_len = i;
+		}
 
-	brq->data.sg = mqrq->sg;
-	brq->data.sg_len = mmc_queue_map_sg(mq, mqrq);
+		mmc_queue_bounce_pre(mq);
 
-	/*
-	 * Adjust the sg list so it is the same size as the
-	 * request.
-	 */
-	if (brq->data.blocks != blk_rq_sectors(req)) {
-		int i, data_size = brq->data.blocks << 9;
-		struct scatterlist *sg;
-
-		for_each_sg(brq->data.sg, sg, brq->data.sg_len, i) {
-			data_size -= sg->length;
-			if (data_size <= 0) {
-				sg->length += data_size;
-				i++;
-				break;
+		mmc_wait_for_req(card->host, &brq.mrq);
+
+		mmc_queue_bounce_post(mq);
+
+		/*
+		 * Check for errors here, but don't jump to cmd_err
+		 * until later as we need to wait for the card to leave
+		 * programming mode even when things go wrong.
+		 */
+		if (brq.sbc.error || brq.cmd.error ||
+		    brq.data.error || brq.stop.error) {
+			if (brq.data.blocks > 1 && rq_data_dir(req) == READ) {
+				/* Redo read one sector at a time */
+				printk(KERN_WARNING "%s: retrying using single "
+				       "block read\n", req->rq_disk->disk_name);
+				disable_multi = 1;
+				continue;
 			}
+			status = get_card_status(card, req);
 		}
-		brq->data.sg_len = i;
-	}
 
-	mqrq->mmc_active.mrq = &brq->mrq;
-	mqrq->mmc_active.err_check = mmc_blk_err_check;
+		if (brq.sbc.error) {
+			printk(KERN_ERR "%s: error %d sending SET_BLOCK_COUNT "
+			       "command, response %#x, card status %#x\n",
+			       req->rq_disk->disk_name, brq.sbc.error,
+			       brq.sbc.resp[0], status);
+		}
 
-	mmc_queue_bounce_pre(mqrq);
-}
+		if (brq.cmd.error) {
+			printk(KERN_ERR "%s: error %d sending read/write "
+			       "command, response %#x, card status %#x\n",
+			       req->rq_disk->disk_name, brq.cmd.error,
+			       brq.cmd.resp[0], status);
+		}
 
-static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
-{
-	struct mmc_blk_data *md = mq->data;
-	struct mmc_card *card = md->queue.card;
-	struct mmc_blk_request *brq = &mq->mqrq_cur->brq;
-	int ret = 1, disable_multi = 0, retry = 0;
-	enum mmc_blk_status status;
-	struct mmc_queue_req *mq_rq;
-	struct request *req;
-	struct mmc_async_req *areq;
-
-	if (!rqc && !mq->mqrq_prev->req)
-		return 0;
+		if (brq.data.error) {
+			if (brq.data.error == -ETIMEDOUT && brq.mrq.stop)
+				/* 'Stop' response contains card status */
+				status = brq.mrq.stop->resp[0];
+			printk(KERN_ERR "%s: error %d transferring data,"
+			       " sector %u, nr %u, card status %#x\n",
+			       req->rq_disk->disk_name, brq.data.error,
+			       (unsigned)blk_rq_pos(req),
+			       (unsigned)blk_rq_sectors(req), status);
+		}
 
-	do {
-		if (rqc) {
-			mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq);
-			areq = &mq->mqrq_cur->mmc_active;
-		} else
-			areq = NULL;
-		areq = mmc_start_req(card->host, areq, (int *) &status);
-		if (!areq)
-			return 0;
-
-		mq_rq = container_of(areq, struct mmc_queue_req, mmc_active);
-		brq = &mq_rq->brq;
-		req = mq_rq->req;
-		mmc_queue_bounce_post(mq_rq);
-
-		switch (status) {
-		case MMC_BLK_SUCCESS:
-		case MMC_BLK_PARTIAL:
-			/*
-			 * A block was successfully transferred.
-			 */
-			spin_lock_irq(&md->lock);
-			ret = __blk_end_request(req, 0,
-						brq->data.bytes_xfered);
-			spin_unlock_irq(&md->lock);
-			if (status == MMC_BLK_SUCCESS && ret) {
+		if (brq.stop.error) {
+			printk(KERN_ERR "%s: error %d sending stop command, "
+			       "response %#x, card status %#x\n",
+			       req->rq_disk->disk_name, brq.stop.error,
+			       brq.stop.resp[0], status);
+		}
+
+		if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) {
+			do {
+				int err;
+
+				cmd.opcode = MMC_SEND_STATUS;
+				cmd.arg = card->rca << 16;
+				cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
+				err = mmc_wait_for_cmd(card->host, &cmd, 5);
+				if (err) {
+					printk(KERN_ERR "%s: error %d requesting status\n",
+					       req->rq_disk->disk_name, err);
+					goto cmd_err;
+				}
 				/*
-				 * The blk_end_request has returned non zero
-				 * even though all data is transfered and no
-				 * erros returned by host.
-				 * If this happen it's a bug.
+				 * Some cards mishandle the status bits,
+				 * so make sure to check both the busy
+				 * indication and the card state.
 				 */
-				printk(KERN_ERR "%s BUG rq_tot %d d_xfer %d\n",
-				       __func__, blk_rq_bytes(req),
-				       brq->data.bytes_xfered);
-				rqc = NULL;
-				goto cmd_abort;
+			} while (!(cmd.resp[0] & R1_READY_FOR_DATA) ||
+				(R1_CURRENT_STATE(cmd.resp[0]) == 7));
+
+#if 0
+			if (cmd.resp[0] & ~0x00000900)
+				printk(KERN_ERR "%s: status = %08x\n",
+				       req->rq_disk->disk_name, cmd.resp[0]);
+			if (mmc_decode_status(cmd.resp))
+				goto cmd_err;
+#endif
+		}
+
+		if (brq.cmd.error || brq.stop.error || brq.data.error) {
+			if (rq_data_dir(req) == READ) {
+				/*
+				 * After an error, we redo I/O one sector at a
+				 * time, so we only reach here after trying to
+				 * read a single sector.
+				 */
+				spin_lock_irq(&md->lock);
+				ret = __blk_end_request(req, -EIO, brq.data.blksz);
+				spin_unlock_irq(&md->lock);
+				continue;
 			}
-			break;
-		case MMC_BLK_CMD_ERR:
 			goto cmd_err;
-		case MMC_BLK_RETRY_SINGLE:
-			disable_multi = 1;
-			break;
-		case MMC_BLK_RETRY:
-			if (retry++ < 5)
-				break;
-		case MMC_BLK_ABORT:
-			goto cmd_abort;
-		case MMC_BLK_DATA_ERR:
-			/*
-			 * After an error, we redo I/O one sector at a
-			 * time, so we only reach here after trying to
-			 * read a single sector.
-			 */
-			spin_lock_irq(&md->lock);
-			ret = __blk_end_request(req, -EIO,
-						brq->data.blksz);
-			spin_unlock_irq(&md->lock);
-			if (!ret)
-				goto start_new_req;
-			break;
 		}
 
-		if (ret) {
-			/*
-			 * In case of a none complete request
-			 * prepare it again and resend.
-			 */
-			mmc_blk_rw_rq_prep(mq_rq, card, disable_multi, mq);
-			mmc_start_req(card->host, &mq_rq->mmc_active, NULL);
-		}
+		/*
+		 * A block was successfully transferred.
+		 */
+		spin_lock_irq(&md->lock);
+		ret = __blk_end_request(req, 0, brq.data.bytes_xfered);
+		spin_unlock_irq(&md->lock);
 	} while (ret);
 
 	return 1;
@@ -1156,22 +927,15 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 		}
 	} else {
 		spin_lock_irq(&md->lock);
-		ret = __blk_end_request(req, 0, brq->data.bytes_xfered);
+		ret = __blk_end_request(req, 0, brq.data.bytes_xfered);
 		spin_unlock_irq(&md->lock);
 	}
 
- cmd_abort:
 	spin_lock_irq(&md->lock);
 	while (ret)
 		ret = __blk_end_request(req, -EIO, blk_rq_cur_bytes(req));
 	spin_unlock_irq(&md->lock);
 
- start_new_req:
-	if (rqc) {
-		mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq);
-		mmc_start_req(card->host, &mq->mqrq_cur->mmc_active, NULL);
-	}
-
 	return 0;
 }
 
@@ -1181,37 +945,26 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	struct mmc_blk_data *md = mq->data;
 	struct mmc_card *card = md->queue.card;
 
-	if (req && !mq->mqrq_prev->req)
-		/* claim host only for the first request */
-		mmc_claim_host(card->host);
-
+	mmc_claim_host(card->host);
 	ret = mmc_blk_part_switch(card, md);
 	if (ret) {
 		ret = 0;
 		goto out;
 	}
 
-	if (req && req->cmd_flags & REQ_DISCARD) {
-		/* complete ongoing async transfer before issuing discard */
-		if (card->host->areq)
-			mmc_blk_issue_rw_rq(mq, NULL);
+	if (req->cmd_flags & REQ_DISCARD) {
 		if (req->cmd_flags & REQ_SECURE)
 			ret = mmc_blk_issue_secdiscard_rq(mq, req);
 		else
 			ret = mmc_blk_issue_discard_rq(mq, req);
-	} else if (req && req->cmd_flags & REQ_FLUSH) {
-		/* complete ongoing async transfer before issuing flush */
-		if (card->host->areq)
-			mmc_blk_issue_rw_rq(mq, NULL);
+	} else if (req->cmd_flags & REQ_FLUSH) {
 		ret = mmc_blk_issue_flush(mq, req);
 	} else {
 		ret = mmc_blk_issue_rw_rq(mq, req);
 	}
 
 out:
-	if (!req)
-		/* release host only when there are no more requests */
-		mmc_release_host(card->host);
+	mmc_release_host(card->host);
 	return ret;
 }
 
diff --git a/trunk/drivers/mmc/card/mmc_test.c b/trunk/drivers/mmc/card/mmc_test.c
index 006a5e9f8ab8..233cdfae92f4 100644
--- a/trunk/drivers/mmc/card/mmc_test.c
+++ b/trunk/drivers/mmc/card/mmc_test.c
@@ -148,27 +148,6 @@ struct mmc_test_card {
 	struct mmc_test_general_result	*gr;
 };
 
-enum mmc_test_prep_media {
-	MMC_TEST_PREP_NONE = 0,
-	MMC_TEST_PREP_WRITE_FULL = 1 << 0,
-	MMC_TEST_PREP_ERASE = 1 << 1,
-};
-
-struct mmc_test_multiple_rw {
-	unsigned int *sg_len;
-	unsigned int *bs;
-	unsigned int len;
-	unsigned int size;
-	bool do_write;
-	bool do_nonblock_req;
-	enum mmc_test_prep_media prepare;
-};
-
-struct mmc_test_async_req {
-	struct mmc_async_req areq;
-	struct mmc_test_card *test;
-};
-
 /*******************************************************************/
 /*  General helper functions                                       */
 /*******************************************************************/
@@ -388,26 +367,21 @@ static struct mmc_test_mem *mmc_test_alloc_mem(unsigned long min_sz,
  * Map memory into a scatterlist.  Optionally allow the same memory to be
  * mapped more than once.
  */
-static int mmc_test_map_sg(struct mmc_test_mem *mem, unsigned long size,
+static int mmc_test_map_sg(struct mmc_test_mem *mem, unsigned long sz,
 			   struct scatterlist *sglist, int repeat,
 			   unsigned int max_segs, unsigned int max_seg_sz,
-			   unsigned int *sg_len, int min_sg_len)
+			   unsigned int *sg_len)
 {
 	struct scatterlist *sg = NULL;
 	unsigned int i;
-	unsigned long sz = size;
 
 	sg_init_table(sglist, max_segs);
-	if (min_sg_len > max_segs)
-		min_sg_len = max_segs;
 
 	*sg_len = 0;
 	do {
 		for (i = 0; i < mem->cnt; i++) {
 			unsigned long len = PAGE_SIZE << mem->arr[i].order;
 
-			if (min_sg_len && (size / min_sg_len < len))
-				len = ALIGN(size / min_sg_len, 512);
 			if (len > sz)
 				len = sz;
 			if (len > max_seg_sz)
@@ -580,12 +554,11 @@ static void mmc_test_print_avg_rate(struct mmc_test_card *test, uint64_t bytes,
 
 	printk(KERN_INFO "%s: Transfer of %u x %u sectors (%u x %u%s KiB) took "
 			 "%lu.%09lu seconds (%u kB/s, %u KiB/s, "
-			 "%u.%02u IOPS, sg_len %d)\n",
+			 "%u.%02u IOPS)\n",
 			 mmc_hostname(test->card->host), count, sectors, count,
 			 sectors >> 1, (sectors & 1 ? ".5" : ""),
 			 (unsigned long)ts.tv_sec, (unsigned long)ts.tv_nsec,
-			 rate / 1000, rate / 1024, iops / 100, iops % 100,
-			 test->area.sg_len);
+			 rate / 1000, rate / 1024, iops / 100, iops % 100);
 
 	mmc_test_save_transfer_result(test, count, sectors, ts, rate, iops);
 }
@@ -688,7 +661,7 @@ static void mmc_test_prepare_broken_mrq(struct mmc_test_card *test,
  * Checks that a normal transfer didn't have any errors
  */
 static int mmc_test_check_result(struct mmc_test_card *test,
-				 struct mmc_request *mrq)
+	struct mmc_request *mrq)
 {
 	int ret;
 
@@ -712,17 +685,6 @@ static int mmc_test_check_result(struct mmc_test_card *test,
 	return ret;
 }
 
-static int mmc_test_check_result_async(struct mmc_card *card,
-				       struct mmc_async_req *areq)
-{
-	struct mmc_test_async_req *test_async =
-		container_of(areq, struct mmc_test_async_req, areq);
-
-	mmc_test_wait_busy(test_async->test);
-
-	return mmc_test_check_result(test_async->test, areq->mrq);
-}
-
 /*
  * Checks that a "short transfer" behaved as expected
  */
@@ -757,85 +719,6 @@ static int mmc_test_check_broken_result(struct mmc_test_card *test,
 	return ret;
 }
 
-/*
- * Tests nonblock transfer with certain parameters
- */
-static void mmc_test_nonblock_reset(struct mmc_request *mrq,
-				    struct mmc_command *cmd,
-				    struct mmc_command *stop,
-				    struct mmc_data *data)
-{
-	memset(mrq, 0, sizeof(struct mmc_request));
-	memset(cmd, 0, sizeof(struct mmc_command));
-	memset(data, 0, sizeof(struct mmc_data));
-	memset(stop, 0, sizeof(struct mmc_command));
-
-	mrq->cmd = cmd;
-	mrq->data = data;
-	mrq->stop = stop;
-}
-static int mmc_test_nonblock_transfer(struct mmc_test_card *test,
-				      struct scatterlist *sg, unsigned sg_len,
-				      unsigned dev_addr, unsigned blocks,
-				      unsigned blksz, int write, int count)
-{
-	struct mmc_request mrq1;
-	struct mmc_command cmd1;
-	struct mmc_command stop1;
-	struct mmc_data data1;
-
-	struct mmc_request mrq2;
-	struct mmc_command cmd2;
-	struct mmc_command stop2;
-	struct mmc_data data2;
-
-	struct mmc_test_async_req test_areq[2];
-	struct mmc_async_req *done_areq;
-	struct mmc_async_req *cur_areq = &test_areq[0].areq;
-	struct mmc_async_req *other_areq = &test_areq[1].areq;
-	int i;
-	int ret;
-
-	test_areq[0].test = test;
-	test_areq[1].test = test;
-
-	mmc_test_nonblock_reset(&mrq1, &cmd1, &stop1, &data1);
-	mmc_test_nonblock_reset(&mrq2, &cmd2, &stop2, &data2);
-
-	cur_areq->mrq = &mrq1;
-	cur_areq->err_check = mmc_test_check_result_async;
-	other_areq->mrq = &mrq2;
-	other_areq->err_check = mmc_test_check_result_async;
-
-	for (i = 0; i < count; i++) {
-		mmc_test_prepare_mrq(test, cur_areq->mrq, sg, sg_len, dev_addr,
-				     blocks, blksz, write);
-		done_areq = mmc_start_req(test->card->host, cur_areq, &ret);
-
-		if (ret || (!done_areq && i > 0))
-			goto err;
-
-		if (done_areq) {
-			if (done_areq->mrq == &mrq2)
-				mmc_test_nonblock_reset(&mrq2, &cmd2,
-							&stop2, &data2);
-			else
-				mmc_test_nonblock_reset(&mrq1, &cmd1,
-							&stop1, &data1);
-		}
-		done_areq = cur_areq;
-		cur_areq = other_areq;
-		other_areq = done_areq;
-		dev_addr += blocks;
-	}
-
-	done_areq = mmc_start_req(test->card->host, NULL, &ret);
-
-	return ret;
-err:
-	return ret;
-}
-
 /*
  * Tests a basic transfer with certain parameters
  */
@@ -1419,7 +1302,7 @@ static int mmc_test_no_highmem(struct mmc_test_card *test)
  * Map sz bytes so that it can be transferred.
  */
 static int mmc_test_area_map(struct mmc_test_card *test, unsigned long sz,
-			     int max_scatter, int min_sg_len)
+			     int max_scatter)
 {
 	struct mmc_test_area *t = &test->area;
 	int err;
@@ -1432,7 +1315,7 @@ static int mmc_test_area_map(struct mmc_test_card *test, unsigned long sz,
 				       &t->sg_len);
 	} else {
 		err = mmc_test_map_sg(t->mem, sz, t->sg, 1, t->max_segs,
-				      t->max_seg_sz, &t->sg_len, min_sg_len);
+				      t->max_seg_sz, &t->sg_len);
 	}
 	if (err)
 		printk(KERN_INFO "%s: Failed to map sg list\n",
@@ -1453,17 +1336,14 @@ static int mmc_test_area_transfer(struct mmc_test_card *test,
 }
 
 /*
- * Map and transfer bytes for multiple transfers.
+ * Map and transfer bytes.
  */
-static int mmc_test_area_io_seq(struct mmc_test_card *test, unsigned long sz,
-				unsigned int dev_addr, int write,
-				int max_scatter, int timed, int count,
-				bool nonblock, int min_sg_len)
+static int mmc_test_area_io(struct mmc_test_card *test, unsigned long sz,
+			    unsigned int dev_addr, int write, int max_scatter,
+			    int timed)
 {
 	struct timespec ts1, ts2;
-	int ret = 0;
-	int i;
-	struct mmc_test_area *t = &test->area;
+	int ret;
 
 	/*
 	 * In the case of a maximally scattered transfer, the maximum transfer
@@ -1481,21 +1361,14 @@ static int mmc_test_area_io_seq(struct mmc_test_card *test, unsigned long sz,
 			sz = max_tfr;
 	}
 
-	ret = mmc_test_area_map(test, sz, max_scatter, min_sg_len);
+	ret = mmc_test_area_map(test, sz, max_scatter);
 	if (ret)
 		return ret;
 
 	if (timed)
 		getnstimeofday(&ts1);
-	if (nonblock)
-		ret = mmc_test_nonblock_transfer(test, t->sg, t->sg_len,
-				 dev_addr, t->blocks, 512, write, count);
-	else
-		for (i = 0; i < count && ret == 0; i++) {
-			ret = mmc_test_area_transfer(test, dev_addr, write);
-			dev_addr += sz >> 9;
-		}
 
+	ret = mmc_test_area_transfer(test, dev_addr, write);
 	if (ret)
 		return ret;
 
@@ -1503,19 +1376,11 @@ static int mmc_test_area_io_seq(struct mmc_test_card *test, unsigned long sz,
 		getnstimeofday(&ts2);
 
 	if (timed)
-		mmc_test_print_avg_rate(test, sz, count, &ts1, &ts2);
+		mmc_test_print_rate(test, sz, &ts1, &ts2);
 
 	return 0;
 }
 
-static int mmc_test_area_io(struct mmc_test_card *test, unsigned long sz,
-			    unsigned int dev_addr, int write, int max_scatter,
-			    int timed)
-{
-	return mmc_test_area_io_seq(test, sz, dev_addr, write, max_scatter,
-				    timed, 1, false, 0);
-}
-
 /*
  * Write the test area entirely.
  */
@@ -2089,245 +1954,6 @@ static int mmc_test_large_seq_write_perf(struct mmc_test_card *test)
 	return mmc_test_large_seq_perf(test, 1);
 }
 
-static int mmc_test_rw_multiple(struct mmc_test_card *test,
-				struct mmc_test_multiple_rw *tdata,
-				unsigned int reqsize, unsigned int size,
-				int min_sg_len)
-{
-	unsigned int dev_addr;
-	struct mmc_test_area *t = &test->area;
-	int ret = 0;
-
-	/* Set up test area */
-	if (size > mmc_test_capacity(test->card) / 2 * 512)
-		size = mmc_test_capacity(test->card) / 2 * 512;
-	if (reqsize > t->max_tfr)
-		reqsize = t->max_tfr;
-	dev_addr = mmc_test_capacity(test->card) / 4;
-	if ((dev_addr & 0xffff0000))
-		dev_addr &= 0xffff0000; /* Round to 64MiB boundary */
-	else
-		dev_addr &= 0xfffff800; /* Round to 1MiB boundary */
-	if (!dev_addr)
-		goto err;
-
-	if (reqsize > size)
-		return 0;
-
-	/* prepare test area */
-	if (mmc_can_erase(test->card) &&
-	    tdata->prepare & MMC_TEST_PREP_ERASE) {
-		ret = mmc_erase(test->card, dev_addr,
-				size / 512, MMC_SECURE_ERASE_ARG);
-		if (ret)
-			ret = mmc_erase(test->card, dev_addr,
-					size / 512, MMC_ERASE_ARG);
-		if (ret)
-			goto err;
-	}
-
-	/* Run test */
-	ret = mmc_test_area_io_seq(test, reqsize, dev_addr,
-				   tdata->do_write, 0, 1, size / reqsize,
-				   tdata->do_nonblock_req, min_sg_len);
-	if (ret)
-		goto err;
-
-	return ret;
- err:
-	printk(KERN_INFO "[%s] error\n", __func__);
-	return ret;
-}
-
-static int mmc_test_rw_multiple_size(struct mmc_test_card *test,
-				     struct mmc_test_multiple_rw *rw)
-{
-	int ret = 0;
-	int i;
-	void *pre_req = test->card->host->ops->pre_req;
-	void *post_req = test->card->host->ops->post_req;
-
-	if (rw->do_nonblock_req &&
-	    ((!pre_req && post_req) || (pre_req && !post_req))) {
-		printk(KERN_INFO "error: only one of pre/post is defined\n");
-		return -EINVAL;
-	}
-
-	for (i = 0 ; i < rw->len && ret == 0; i++) {
-		ret = mmc_test_rw_multiple(test, rw, rw->bs[i], rw->size, 0);
-		if (ret)
-			break;
-	}
-	return ret;
-}
-
-static int mmc_test_rw_multiple_sg_len(struct mmc_test_card *test,
-				       struct mmc_test_multiple_rw *rw)
-{
-	int ret = 0;
-	int i;
-
-	for (i = 0 ; i < rw->len && ret == 0; i++) {
-		ret = mmc_test_rw_multiple(test, rw, 512*1024, rw->size,
-					   rw->sg_len[i]);
-		if (ret)
-			break;
-	}
-	return ret;
-}
-
-/*
- * Multiple blocking write 4k to 4 MB chunks
- */
-static int mmc_test_profile_mult_write_blocking_perf(struct mmc_test_card *test)
-{
-	unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16,
-			     1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22};
-	struct mmc_test_multiple_rw test_data = {
-		.bs = bs,
-		.size = TEST_AREA_MAX_SIZE,
-		.len = ARRAY_SIZE(bs),
-		.do_write = true,
-		.do_nonblock_req = false,
-		.prepare = MMC_TEST_PREP_ERASE,
-	};
-
-	return mmc_test_rw_multiple_size(test, &test_data);
-};
-
-/*
- * Multiple non-blocking write 4k to 4 MB chunks
- */
-static int mmc_test_profile_mult_write_nonblock_perf(struct mmc_test_card *test)
-{
-	unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16,
-			     1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22};
-	struct mmc_test_multiple_rw test_data = {
-		.bs = bs,
-		.size = TEST_AREA_MAX_SIZE,
-		.len = ARRAY_SIZE(bs),
-		.do_write = true,
-		.do_nonblock_req = true,
-		.prepare = MMC_TEST_PREP_ERASE,
-	};
-
-	return mmc_test_rw_multiple_size(test, &test_data);
-}
-
-/*
- * Multiple blocking read 4k to 4 MB chunks
- */
-static int mmc_test_profile_mult_read_blocking_perf(struct mmc_test_card *test)
-{
-	unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16,
-			     1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22};
-	struct mmc_test_multiple_rw test_data = {
-		.bs = bs,
-		.size = TEST_AREA_MAX_SIZE,
-		.len = ARRAY_SIZE(bs),
-		.do_write = false,
-		.do_nonblock_req = false,
-		.prepare = MMC_TEST_PREP_NONE,
-	};
-
-	return mmc_test_rw_multiple_size(test, &test_data);
-}
-
-/*
- * Multiple non-blocking read 4k to 4 MB chunks
- */
-static int mmc_test_profile_mult_read_nonblock_perf(struct mmc_test_card *test)
-{
-	unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16,
-			     1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22};
-	struct mmc_test_multiple_rw test_data = {
-		.bs = bs,
-		.size = TEST_AREA_MAX_SIZE,
-		.len = ARRAY_SIZE(bs),
-		.do_write = false,
-		.do_nonblock_req = true,
-		.prepare = MMC_TEST_PREP_NONE,
-	};
-
-	return mmc_test_rw_multiple_size(test, &test_data);
-}
-
-/*
- * Multiple blocking write 1 to 512 sg elements
- */
-static int mmc_test_profile_sglen_wr_blocking_perf(struct mmc_test_card *test)
-{
-	unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
-				 1 << 7, 1 << 8, 1 << 9};
-	struct mmc_test_multiple_rw test_data = {
-		.sg_len = sg_len,
-		.size = TEST_AREA_MAX_SIZE,
-		.len = ARRAY_SIZE(sg_len),
-		.do_write = true,
-		.do_nonblock_req = false,
-		.prepare = MMC_TEST_PREP_ERASE,
-	};
-
-	return mmc_test_rw_multiple_sg_len(test, &test_data);
-};
-
-/*
- * Multiple non-blocking write 1 to 512 sg elements
- */
-static int mmc_test_profile_sglen_wr_nonblock_perf(struct mmc_test_card *test)
-{
-	unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
-				 1 << 7, 1 << 8, 1 << 9};
-	struct mmc_test_multiple_rw test_data = {
-		.sg_len = sg_len,
-		.size = TEST_AREA_MAX_SIZE,
-		.len = ARRAY_SIZE(sg_len),
-		.do_write = true,
-		.do_nonblock_req = true,
-		.prepare = MMC_TEST_PREP_ERASE,
-	};
-
-	return mmc_test_rw_multiple_sg_len(test, &test_data);
-}
-
-/*
- * Multiple blocking read 1 to 512 sg elements
- */
-static int mmc_test_profile_sglen_r_blocking_perf(struct mmc_test_card *test)
-{
-	unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
-				 1 << 7, 1 << 8, 1 << 9};
-	struct mmc_test_multiple_rw test_data = {
-		.sg_len = sg_len,
-		.size = TEST_AREA_MAX_SIZE,
-		.len = ARRAY_SIZE(sg_len),
-		.do_write = false,
-		.do_nonblock_req = false,
-		.prepare = MMC_TEST_PREP_NONE,
-	};
-
-	return mmc_test_rw_multiple_sg_len(test, &test_data);
-}
-
-/*
- * Multiple non-blocking read 1 to 512 sg elements
- */
-static int mmc_test_profile_sglen_r_nonblock_perf(struct mmc_test_card *test)
-{
-	unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
-				 1 << 7, 1 << 8, 1 << 9};
-	struct mmc_test_multiple_rw test_data = {
-		.sg_len = sg_len,
-		.size = TEST_AREA_MAX_SIZE,
-		.len = ARRAY_SIZE(sg_len),
-		.do_write = false,
-		.do_nonblock_req = true,
-		.prepare = MMC_TEST_PREP_NONE,
-	};
-
-	return mmc_test_rw_multiple_sg_len(test, &test_data);
-}
-
 static const struct mmc_test_case mmc_test_cases[] = {
 	{
 		.name = "Basic write (no data verification)",
@@ -2595,61 +2221,6 @@ static const struct mmc_test_case mmc_test_cases[] = {
 		.cleanup = mmc_test_area_cleanup,
 	},
 
-	{
-		.name = "Write performance with blocking req 4k to 4MB",
-		.prepare = mmc_test_area_prepare,
-		.run = mmc_test_profile_mult_write_blocking_perf,
-		.cleanup = mmc_test_area_cleanup,
-	},
-
-	{
-		.name = "Write performance with non-blocking req 4k to 4MB",
-		.prepare = mmc_test_area_prepare,
-		.run = mmc_test_profile_mult_write_nonblock_perf,
-		.cleanup = mmc_test_area_cleanup,
-	},
-
-	{
-		.name = "Read performance with blocking req 4k to 4MB",
-		.prepare = mmc_test_area_prepare,
-		.run = mmc_test_profile_mult_read_blocking_perf,
-		.cleanup = mmc_test_area_cleanup,
-	},
-
-	{
-		.name = "Read performance with non-blocking req 4k to 4MB",
-		.prepare = mmc_test_area_prepare,
-		.run = mmc_test_profile_mult_read_nonblock_perf,
-		.cleanup = mmc_test_area_cleanup,
-	},
-
-	{
-		.name = "Write performance blocking req 1 to 512 sg elems",
-		.prepare = mmc_test_area_prepare,
-		.run = mmc_test_profile_sglen_wr_blocking_perf,
-		.cleanup = mmc_test_area_cleanup,
-	},
-
-	{
-		.name = "Write performance non-blocking req 1 to 512 sg elems",
-		.prepare = mmc_test_area_prepare,
-		.run = mmc_test_profile_sglen_wr_nonblock_perf,
-		.cleanup = mmc_test_area_cleanup,
-	},
-
-	{
-		.name = "Read performance blocking req 1 to 512 sg elems",
-		.prepare = mmc_test_area_prepare,
-		.run = mmc_test_profile_sglen_r_blocking_perf,
-		.cleanup = mmc_test_area_cleanup,
-	},
-
-	{
-		.name = "Read performance non-blocking req 1 to 512 sg elems",
-		.prepare = mmc_test_area_prepare,
-		.run = mmc_test_profile_sglen_r_nonblock_perf,
-		.cleanup = mmc_test_area_cleanup,
-	},
 };
 
 static DEFINE_MUTEX(mmc_test_lock);
@@ -2874,32 +2445,6 @@ static const struct file_operations mmc_test_fops_test = {
 	.release	= single_release,
 };
 
-static int mtf_testlist_show(struct seq_file *sf, void *data)
-{
-	int i;
-
-	mutex_lock(&mmc_test_lock);
-
-	for (i = 0; i < ARRAY_SIZE(mmc_test_cases); i++)
-		seq_printf(sf, "%d:\t%s\n", i+1, mmc_test_cases[i].name);
-
-	mutex_unlock(&mmc_test_lock);
-
-	return 0;
-}
-
-static int mtf_testlist_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, mtf_testlist_show, inode->i_private);
-}
-
-static const struct file_operations mmc_test_fops_testlist = {
-	.open		= mtf_testlist_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static void mmc_test_free_file_test(struct mmc_card *card)
 {
 	struct mmc_test_dbgfs_file *df, *dfs;
@@ -2931,18 +2476,7 @@ static int mmc_test_register_file_test(struct mmc_card *card)
 
 	if (IS_ERR_OR_NULL(file)) {
 		dev_err(&card->dev,
-			"Can't create test. Perhaps debugfs is disabled.\n");
-		ret = -ENODEV;
-		goto err;
-	}
-
-	if (card->debugfs_root)
-		file = debugfs_create_file("testlist", S_IRUGO,
-			card->debugfs_root, card, &mmc_test_fops_testlist);
-
-	if (IS_ERR_OR_NULL(file)) {
-		dev_err(&card->dev,
-			"Can't create testlist. Perhaps debugfs is disabled.\n");
+			"Can't create file. Perhaps debugfs is disabled.\n");
 		ret = -ENODEV;
 		goto err;
 	}
diff --git a/trunk/drivers/mmc/card/queue.c b/trunk/drivers/mmc/card/queue.c
index 45fb362e3f01..6413afa318d2 100644
--- a/trunk/drivers/mmc/card/queue.c
+++ b/trunk/drivers/mmc/card/queue.c
@@ -52,18 +52,14 @@ static int mmc_queue_thread(void *d)
 	down(&mq->thread_sem);
 	do {
 		struct request *req = NULL;
-		struct mmc_queue_req *tmp;
 
 		spin_lock_irq(q->queue_lock);
 		set_current_state(TASK_INTERRUPTIBLE);
 		req = blk_fetch_request(q);
-		mq->mqrq_cur->req = req;
+		mq->req = req;
 		spin_unlock_irq(q->queue_lock);
 
-		if (req || mq->mqrq_prev->req) {
-			set_current_state(TASK_RUNNING);
-			mq->issue_fn(mq, req);
-		} else {
+		if (!req) {
 			if (kthread_should_stop()) {
 				set_current_state(TASK_RUNNING);
 				break;
@@ -71,14 +67,11 @@ static int mmc_queue_thread(void *d)
 			up(&mq->thread_sem);
 			schedule();
 			down(&mq->thread_sem);
+			continue;
 		}
+		set_current_state(TASK_RUNNING);
 
-		/* Current request becomes previous request and vice versa. */
-		mq->mqrq_prev->brq.mrq.data = NULL;
-		mq->mqrq_prev->req = NULL;
-		tmp = mq->mqrq_prev;
-		mq->mqrq_prev = mq->mqrq_cur;
-		mq->mqrq_cur = tmp;
+		mq->issue_fn(mq, req);
 	} while (1);
 	up(&mq->thread_sem);
 
@@ -104,46 +97,10 @@ static void mmc_request(struct request_queue *q)
 		return;
 	}
 
-	if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
+	if (!mq->req)
 		wake_up_process(mq->thread);
 }
 
-struct scatterlist *mmc_alloc_sg(int sg_len, int *err)
-{
-	struct scatterlist *sg;
-
-	sg = kmalloc(sizeof(struct scatterlist)*sg_len, GFP_KERNEL);
-	if (!sg)
-		*err = -ENOMEM;
-	else {
-		*err = 0;
-		sg_init_table(sg, sg_len);
-	}
-
-	return sg;
-}
-
-static void mmc_queue_setup_discard(struct request_queue *q,
-				    struct mmc_card *card)
-{
-	unsigned max_discard;
-
-	max_discard = mmc_calc_max_discard(card);
-	if (!max_discard)
-		return;
-
-	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
-	q->limits.max_discard_sectors = max_discard;
-	if (card->erased_byte == 0)
-		q->limits.discard_zeroes_data = 1;
-	q->limits.discard_granularity = card->pref_erase << 9;
-	/* granularity must not be greater than max. discard */
-	if (card->pref_erase > max_discard)
-		q->limits.discard_granularity = 0;
-	if (mmc_can_secure_erase_trim(card))
-		queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
-}
-
 /**
  * mmc_init_queue - initialise a queue structure.
  * @mq: mmc queue
@@ -159,8 +116,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 	struct mmc_host *host = card->host;
 	u64 limit = BLK_BOUNCE_HIGH;
 	int ret;
-	struct mmc_queue_req *mqrq_cur = &mq->mqrq[0];
-	struct mmc_queue_req *mqrq_prev = &mq->mqrq[1];
 
 	if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
 		limit = *mmc_dev(host)->dma_mask;
@@ -170,16 +125,21 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 	if (!mq->queue)
 		return -ENOMEM;
 
-	memset(&mq->mqrq_cur, 0, sizeof(mq->mqrq_cur));
-	memset(&mq->mqrq_prev, 0, sizeof(mq->mqrq_prev));
-	mq->mqrq_cur = mqrq_cur;
-	mq->mqrq_prev = mqrq_prev;
 	mq->queue->queuedata = mq;
+	mq->req = NULL;
 
 	blk_queue_prep_rq(mq->queue, mmc_prep_request);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
-	if (mmc_can_erase(card))
-		mmc_queue_setup_discard(mq->queue, card);
+	if (mmc_can_erase(card)) {
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue);
+		mq->queue->limits.max_discard_sectors = UINT_MAX;
+		if (card->erased_byte == 0)
+			mq->queue->limits.discard_zeroes_data = 1;
+		mq->queue->limits.discard_granularity = card->pref_erase << 9;
+		if (mmc_can_secure_erase_trim(card))
+			queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD,
+						mq->queue);
+	}
 
 #ifdef CONFIG_MMC_BLOCK_BOUNCE
 	if (host->max_segs == 1) {
@@ -195,64 +155,53 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 			bouncesz = host->max_blk_count * 512;
 
 		if (bouncesz > 512) {
-			mqrq_cur->bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
-			if (!mqrq_cur->bounce_buf) {
-				printk(KERN_WARNING "%s: unable to "
-					"allocate bounce cur buffer\n",
-					mmc_card_name(card));
-			}
-			mqrq_prev->bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
-			if (!mqrq_prev->bounce_buf) {
+			mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
+			if (!mq->bounce_buf) {
 				printk(KERN_WARNING "%s: unable to "
-					"allocate bounce prev buffer\n",
+					"allocate bounce buffer\n",
 					mmc_card_name(card));
-				kfree(mqrq_cur->bounce_buf);
-				mqrq_cur->bounce_buf = NULL;
 			}
 		}
 
-		if (mqrq_cur->bounce_buf && mqrq_prev->bounce_buf) {
+		if (mq->bounce_buf) {
 			blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
 			blk_queue_max_hw_sectors(mq->queue, bouncesz / 512);
 			blk_queue_max_segments(mq->queue, bouncesz / 512);
 			blk_queue_max_segment_size(mq->queue, bouncesz);
 
-			mqrq_cur->sg = mmc_alloc_sg(1, &ret);
-			if (ret)
-				goto cleanup_queue;
-
-			mqrq_cur->bounce_sg =
-				mmc_alloc_sg(bouncesz / 512, &ret);
-			if (ret)
-				goto cleanup_queue;
-
-			mqrq_prev->sg = mmc_alloc_sg(1, &ret);
-			if (ret)
+			mq->sg = kmalloc(sizeof(struct scatterlist),
+				GFP_KERNEL);
+			if (!mq->sg) {
+				ret = -ENOMEM;
 				goto cleanup_queue;
+			}
+			sg_init_table(mq->sg, 1);
 
-			mqrq_prev->bounce_sg =
-				mmc_alloc_sg(bouncesz / 512, &ret);
-			if (ret)
+			mq->bounce_sg = kmalloc(sizeof(struct scatterlist) *
+				bouncesz / 512, GFP_KERNEL);
+			if (!mq->bounce_sg) {
+				ret = -ENOMEM;
 				goto cleanup_queue;
+			}
+			sg_init_table(mq->bounce_sg, bouncesz / 512);
 		}
 	}
 #endif
 
-	if (!mqrq_cur->bounce_buf && !mqrq_prev->bounce_buf) {
+	if (!mq->bounce_buf) {
 		blk_queue_bounce_limit(mq->queue, limit);
 		blk_queue_max_hw_sectors(mq->queue,
 			min(host->max_blk_count, host->max_req_size / 512));
 		blk_queue_max_segments(mq->queue, host->max_segs);
 		blk_queue_max_segment_size(mq->queue, host->max_seg_size);
 
-		mqrq_cur->sg = mmc_alloc_sg(host->max_segs, &ret);
-		if (ret)
-			goto cleanup_queue;
-
-
-		mqrq_prev->sg = mmc_alloc_sg(host->max_segs, &ret);
-		if (ret)
+		mq->sg = kmalloc(sizeof(struct scatterlist) *
+			host->max_segs, GFP_KERNEL);
+		if (!mq->sg) {
+			ret = -ENOMEM;
 			goto cleanup_queue;
+		}
+		sg_init_table(mq->sg, host->max_segs);
 	}
 
 	sema_init(&mq->thread_sem, 1);
@@ -267,22 +216,16 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 
 	return 0;
  free_bounce_sg:
-	kfree(mqrq_cur->bounce_sg);
-	mqrq_cur->bounce_sg = NULL;
-	kfree(mqrq_prev->bounce_sg);
-	mqrq_prev->bounce_sg = NULL;
-
+ 	if (mq->bounce_sg)
+ 		kfree(mq->bounce_sg);
+ 	mq->bounce_sg = NULL;
  cleanup_queue:
-	kfree(mqrq_cur->sg);
-	mqrq_cur->sg = NULL;
-	kfree(mqrq_cur->bounce_buf);
-	mqrq_cur->bounce_buf = NULL;
-
-	kfree(mqrq_prev->sg);
-	mqrq_prev->sg = NULL;
-	kfree(mqrq_prev->bounce_buf);
-	mqrq_prev->bounce_buf = NULL;
-
+ 	if (mq->sg)
+		kfree(mq->sg);
+	mq->sg = NULL;
+	if (mq->bounce_buf)
+		kfree(mq->bounce_buf);
+	mq->bounce_buf = NULL;
 	blk_cleanup_queue(mq->queue);
 	return ret;
 }
@@ -291,8 +234,6 @@ void mmc_cleanup_queue(struct mmc_queue *mq)
 {
 	struct request_queue *q = mq->queue;
 	unsigned long flags;
-	struct mmc_queue_req *mqrq_cur = mq->mqrq_cur;
-	struct mmc_queue_req *mqrq_prev = mq->mqrq_prev;
 
 	/* Make sure the queue isn't suspended, as that will deadlock */
 	mmc_queue_resume(mq);
@@ -306,23 +247,16 @@ void mmc_cleanup_queue(struct mmc_queue *mq)
 	blk_start_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	kfree(mqrq_cur->bounce_sg);
-	mqrq_cur->bounce_sg = NULL;
+ 	if (mq->bounce_sg)
+ 		kfree(mq->bounce_sg);
+ 	mq->bounce_sg = NULL;
 
-	kfree(mqrq_cur->sg);
-	mqrq_cur->sg = NULL;
+	kfree(mq->sg);
+	mq->sg = NULL;
 
-	kfree(mqrq_cur->bounce_buf);
-	mqrq_cur->bounce_buf = NULL;
-
-	kfree(mqrq_prev->bounce_sg);
-	mqrq_prev->bounce_sg = NULL;
-
-	kfree(mqrq_prev->sg);
-	mqrq_prev->sg = NULL;
-
-	kfree(mqrq_prev->bounce_buf);
-	mqrq_prev->bounce_buf = NULL;
+	if (mq->bounce_buf)
+		kfree(mq->bounce_buf);
+	mq->bounce_buf = NULL;
 
 	mq->card = NULL;
 }
@@ -375,27 +309,27 @@ void mmc_queue_resume(struct mmc_queue *mq)
 /*
  * Prepare the sg list(s) to be handed of to the host driver
  */
-unsigned int mmc_queue_map_sg(struct mmc_queue *mq, struct mmc_queue_req *mqrq)
+unsigned int mmc_queue_map_sg(struct mmc_queue *mq)
 {
 	unsigned int sg_len;
 	size_t buflen;
 	struct scatterlist *sg;
 	int i;
 
-	if (!mqrq->bounce_buf)
-		return blk_rq_map_sg(mq->queue, mqrq->req, mqrq->sg);
+	if (!mq->bounce_buf)
+		return blk_rq_map_sg(mq->queue, mq->req, mq->sg);
 
-	BUG_ON(!mqrq->bounce_sg);
+	BUG_ON(!mq->bounce_sg);
 
-	sg_len = blk_rq_map_sg(mq->queue, mqrq->req, mqrq->bounce_sg);
+	sg_len = blk_rq_map_sg(mq->queue, mq->req, mq->bounce_sg);
 
-	mqrq->bounce_sg_len = sg_len;
+	mq->bounce_sg_len = sg_len;
 
 	buflen = 0;
-	for_each_sg(mqrq->bounce_sg, sg, sg_len, i)
+	for_each_sg(mq->bounce_sg, sg, sg_len, i)
 		buflen += sg->length;
 
-	sg_init_one(mqrq->sg, mqrq->bounce_buf, buflen);
+	sg_init_one(mq->sg, mq->bounce_buf, buflen);
 
 	return 1;
 }
@@ -404,30 +338,31 @@ unsigned int mmc_queue_map_sg(struct mmc_queue *mq, struct mmc_queue_req *mqrq)
  * If writing, bounce the data to the buffer before the request
  * is sent to the host driver
  */
-void mmc_queue_bounce_pre(struct mmc_queue_req *mqrq)
+void mmc_queue_bounce_pre(struct mmc_queue *mq)
 {
-	if (!mqrq->bounce_buf)
+	if (!mq->bounce_buf)
 		return;
 
-	if (rq_data_dir(mqrq->req) != WRITE)
+	if (rq_data_dir(mq->req) != WRITE)
 		return;
 
-	sg_copy_to_buffer(mqrq->bounce_sg, mqrq->bounce_sg_len,
-		mqrq->bounce_buf, mqrq->sg[0].length);
+	sg_copy_to_buffer(mq->bounce_sg, mq->bounce_sg_len,
+		mq->bounce_buf, mq->sg[0].length);
 }
 
 /*
  * If reading, bounce the data from the buffer after the request
  * has been handled by the host driver
  */
-void mmc_queue_bounce_post(struct mmc_queue_req *mqrq)
+void mmc_queue_bounce_post(struct mmc_queue *mq)
 {
-	if (!mqrq->bounce_buf)
+	if (!mq->bounce_buf)
 		return;
 
-	if (rq_data_dir(mqrq->req) != READ)
+	if (rq_data_dir(mq->req) != READ)
 		return;
 
-	sg_copy_from_buffer(mqrq->bounce_sg, mqrq->bounce_sg_len,
-		mqrq->bounce_buf, mqrq->sg[0].length);
+	sg_copy_from_buffer(mq->bounce_sg, mq->bounce_sg_len,
+		mq->bounce_buf, mq->sg[0].length);
 }
+
diff --git a/trunk/drivers/mmc/card/queue.h b/trunk/drivers/mmc/card/queue.h
index d2a1eb4b9f9f..6223ef8dc9cd 100644
--- a/trunk/drivers/mmc/card/queue.h
+++ b/trunk/drivers/mmc/card/queue.h
@@ -4,35 +4,19 @@
 struct request;
 struct task_struct;
 
-struct mmc_blk_request {
-	struct mmc_request	mrq;
-	struct mmc_command	sbc;
-	struct mmc_command	cmd;
-	struct mmc_command	stop;
-	struct mmc_data		data;
-};
-
-struct mmc_queue_req {
-	struct request		*req;
-	struct mmc_blk_request	brq;
-	struct scatterlist	*sg;
-	char			*bounce_buf;
-	struct scatterlist	*bounce_sg;
-	unsigned int		bounce_sg_len;
-	struct mmc_async_req	mmc_active;
-};
-
 struct mmc_queue {
 	struct mmc_card		*card;
 	struct task_struct	*thread;
 	struct semaphore	thread_sem;
 	unsigned int		flags;
+	struct request		*req;
 	int			(*issue_fn)(struct mmc_queue *, struct request *);
 	void			*data;
 	struct request_queue	*queue;
-	struct mmc_queue_req	mqrq[2];
-	struct mmc_queue_req	*mqrq_cur;
-	struct mmc_queue_req	*mqrq_prev;
+	struct scatterlist	*sg;
+	char			*bounce_buf;
+	struct scatterlist	*bounce_sg;
+	unsigned int		bounce_sg_len;
 };
 
 extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
@@ -41,9 +25,8 @@ extern void mmc_cleanup_queue(struct mmc_queue *);
 extern void mmc_queue_suspend(struct mmc_queue *);
 extern void mmc_queue_resume(struct mmc_queue *);
 
-extern unsigned int mmc_queue_map_sg(struct mmc_queue *,
-				     struct mmc_queue_req *);
-extern void mmc_queue_bounce_pre(struct mmc_queue_req *);
-extern void mmc_queue_bounce_post(struct mmc_queue_req *);
+extern unsigned int mmc_queue_map_sg(struct mmc_queue *);
+extern void mmc_queue_bounce_pre(struct mmc_queue *);
+extern void mmc_queue_bounce_post(struct mmc_queue *);
 
 #endif
diff --git a/trunk/drivers/mmc/core/core.c b/trunk/drivers/mmc/core/core.c
index f091b43d00c4..7843efe22359 100644
--- a/trunk/drivers/mmc/core/core.c
+++ b/trunk/drivers/mmc/core/core.c
@@ -198,109 +198,9 @@ mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
 
 static void mmc_wait_done(struct mmc_request *mrq)
 {
-	complete(&mrq->completion);
+	complete(mrq->done_data);
 }
 
-static void __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq)
-{
-	init_completion(&mrq->completion);
-	mrq->done = mmc_wait_done;
-	mmc_start_request(host, mrq);
-}
-
-static void mmc_wait_for_req_done(struct mmc_host *host,
-				  struct mmc_request *mrq)
-{
-	wait_for_completion(&mrq->completion);
-}
-
-/**
- *	mmc_pre_req - Prepare for a new request
- *	@host: MMC host to prepare command
- *	@mrq: MMC request to prepare for
- *	@is_first_req: true if there is no previous started request
- *                     that may run in parellel to this call, otherwise false
- *
- *	mmc_pre_req() is called in prior to mmc_start_req() to let
- *	host prepare for the new request. Preparation of a request may be
- *	performed while another request is running on the host.
- */
-static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq,
-		 bool is_first_req)
-{
-	if (host->ops->pre_req)
-		host->ops->pre_req(host, mrq, is_first_req);
-}
-
-/**
- *	mmc_post_req - Post process a completed request
- *	@host: MMC host to post process command
- *	@mrq: MMC request to post process for
- *	@err: Error, if non zero, clean up any resources made in pre_req
- *
- *	Let the host post process a completed request. Post processing of
- *	a request may be performed while another reuqest is running.
- */
-static void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq,
-			 int err)
-{
-	if (host->ops->post_req)
-		host->ops->post_req(host, mrq, err);
-}
-
-/**
- *	mmc_start_req - start a non-blocking request
- *	@host: MMC host to start command
- *	@areq: async request to start
- *	@error: out parameter returns 0 for success, otherwise non zero
- *
- *	Start a new MMC custom command request for a host.
- *	If there is on ongoing async request wait for completion
- *	of that request and start the new one and return.
- *	Does not wait for the new request to complete.
- *
- *      Returns the completed request, NULL in case of none completed.
- *	Wait for the an ongoing request (previoulsy started) to complete and
- *	return the completed request. If there is no ongoing request, NULL
- *	is returned without waiting. NULL is not an error condition.
- */
-struct mmc_async_req *mmc_start_req(struct mmc_host *host,
-				    struct mmc_async_req *areq, int *error)
-{
-	int err = 0;
-	struct mmc_async_req *data = host->areq;
-
-	/* Prepare a new request */
-	if (areq)
-		mmc_pre_req(host, areq->mrq, !host->areq);
-
-	if (host->areq) {
-		mmc_wait_for_req_done(host, host->areq->mrq);
-		err = host->areq->err_check(host->card, host->areq);
-		if (err) {
-			mmc_post_req(host, host->areq->mrq, 0);
-			if (areq)
-				mmc_post_req(host, areq->mrq, -EINVAL);
-
-			host->areq = NULL;
-			goto out;
-		}
-	}
-
-	if (areq)
-		__mmc_start_req(host, areq->mrq);
-
-	if (host->areq)
-		mmc_post_req(host, host->areq->mrq, 0);
-
-	host->areq = areq;
- out:
-	if (error)
-		*error = err;
-	return data;
-}
-EXPORT_SYMBOL(mmc_start_req);
-
 /**
  *	mmc_wait_for_req - start a request and wait for completion
  *	@host: MMC host to start command
@@ -312,9 +212,16 @@ EXPORT_SYMBOL(mmc_start_req);
  */
 void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq)
 {
-	__mmc_start_req(host, mrq);
-	mmc_wait_for_req_done(host, mrq);
+	DECLARE_COMPLETION_ONSTACK(complete);
+
+	mrq->done_data = &complete;
+	mrq->done = mmc_wait_done;
+
+	mmc_start_request(host, mrq);
+
+	wait_for_completion(&complete);
 }
+
 EXPORT_SYMBOL(mmc_wait_for_req);
 
 /**
@@ -1609,82 +1516,6 @@ int mmc_erase_group_aligned(struct mmc_card *card, unsigned int from,
 }
 EXPORT_SYMBOL(mmc_erase_group_aligned);
 
-static unsigned int mmc_do_calc_max_discard(struct mmc_card *card,
-					    unsigned int arg)
-{
-	struct mmc_host *host = card->host;
-	unsigned int max_discard, x, y, qty = 0, max_qty, timeout;
-	unsigned int last_timeout = 0;
-
-	if (card->erase_shift)
-		max_qty = UINT_MAX >> card->erase_shift;
-	else if (mmc_card_sd(card))
-		max_qty = UINT_MAX;
-	else
-		max_qty = UINT_MAX / card->erase_size;
-
-	/* Find the largest qty with an OK timeout */
-	do {
-		y = 0;
-		for (x = 1; x && x <= max_qty && max_qty - x >= qty; x <<= 1) {
-			timeout = mmc_erase_timeout(card, arg, qty + x);
-			if (timeout > host->max_discard_to)
-				break;
-			if (timeout < last_timeout)
-				break;
-			last_timeout = timeout;
-			y = x;
-		}
-		qty += y;
-	} while (y);
-
-	if (!qty)
-		return 0;
-
-	if (qty == 1)
-		return 1;
-
-	/* Convert qty to sectors */
-	if (card->erase_shift)
-		max_discard = --qty << card->erase_shift;
-	else if (mmc_card_sd(card))
-		max_discard = qty;
-	else
-		max_discard = --qty * card->erase_size;
-
-	return max_discard;
-}
-
-unsigned int mmc_calc_max_discard(struct mmc_card *card)
-{
-	struct mmc_host *host = card->host;
-	unsigned int max_discard, max_trim;
-
-	if (!host->max_discard_to)
-		return UINT_MAX;
-
-	/*
-	 * Without erase_group_def set, MMC erase timeout depends on clock
-	 * frequence which can change.  In that case, the best choice is
-	 * just the preferred erase size.
-	 */
-	if (mmc_card_mmc(card) && !(card->ext_csd.erase_group_def & 1))
-		return card->pref_erase;
-
-	max_discard = mmc_do_calc_max_discard(card, MMC_ERASE_ARG);
-	if (mmc_can_trim(card)) {
-		max_trim = mmc_do_calc_max_discard(card, MMC_TRIM_ARG);
-		if (max_trim < max_discard)
-			max_discard = max_trim;
-	} else if (max_discard < card->erase_size) {
-		max_discard = 0;
-	}
-	pr_debug("%s: calculated max. discard sectors %u for timeout %u ms\n",
-		 mmc_hostname(host), max_discard, host->max_discard_to);
-	return max_discard;
-}
-EXPORT_SYMBOL(mmc_calc_max_discard);
-
 int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen)
 {
 	struct mmc_command cmd = {0};
@@ -1832,10 +1663,6 @@ int mmc_power_save_host(struct mmc_host *host)
 {
 	int ret = 0;
 
-#ifdef CONFIG_MMC_DEBUG
-	pr_info("%s: %s: powering down\n", mmc_hostname(host), __func__);
-#endif
-
 	mmc_bus_get(host);
 
 	if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) {
@@ -1858,10 +1685,6 @@ int mmc_power_restore_host(struct mmc_host *host)
 {
 	int ret;
 
-#ifdef CONFIG_MMC_DEBUG
-	pr_info("%s: %s: powering up\n", mmc_hostname(host), __func__);
-#endif
-
 	mmc_bus_get(host);
 
 	if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) {
diff --git a/trunk/drivers/mmc/core/sd.c b/trunk/drivers/mmc/core/sd.c
index 633975ff2bb3..ff2774128aa9 100644
--- a/trunk/drivers/mmc/core/sd.c
+++ b/trunk/drivers/mmc/core/sd.c
@@ -409,62 +409,52 @@ int mmc_sd_switch_hs(struct mmc_card *card)
 
 static int sd_select_driver_type(struct mmc_card *card, u8 *status)
 {
-	int host_drv_type = SD_DRIVER_TYPE_B;
-	int card_drv_type = SD_DRIVER_TYPE_B;
-	int drive_strength;
+	int host_drv_type = 0, card_drv_type = 0;
 	int err;
 
 	/*
 	 * If the host doesn't support any of the Driver Types A,C or D,
-	 * or there is no board specific handler then default Driver
-	 * Type B is used.
+	 * default Driver Type B is used.
 	 */
 	if (!(card->host->caps & (MMC_CAP_DRIVER_TYPE_A | MMC_CAP_DRIVER_TYPE_C
 	    | MMC_CAP_DRIVER_TYPE_D)))
 		return 0;
 
-	if (!card->host->ops->select_drive_strength)
-		return 0;
-
-	if (card->host->caps & MMC_CAP_DRIVER_TYPE_A)
-		host_drv_type |= SD_DRIVER_TYPE_A;
-
-	if (card->host->caps & MMC_CAP_DRIVER_TYPE_C)
-		host_drv_type |= SD_DRIVER_TYPE_C;
-
-	if (card->host->caps & MMC_CAP_DRIVER_TYPE_D)
-		host_drv_type |= SD_DRIVER_TYPE_D;
-
-	if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A)
-		card_drv_type |= SD_DRIVER_TYPE_A;
-
-	if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
-		card_drv_type |= SD_DRIVER_TYPE_C;
-
-	if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_D)
-		card_drv_type |= SD_DRIVER_TYPE_D;
-
-	/*
-	 * The drive strength that the hardware can support
-	 * depends on the board design.  Pass the appropriate
-	 * information and let the hardware specific code
-	 * return what is possible given the options
-	 */
-	drive_strength = card->host->ops->select_drive_strength(
-		card->sw_caps.uhs_max_dtr,
-		host_drv_type, card_drv_type);
+	if (card->host->caps & MMC_CAP_DRIVER_TYPE_A) {
+		host_drv_type = MMC_SET_DRIVER_TYPE_A;
+		if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A)
+			card_drv_type = MMC_SET_DRIVER_TYPE_A;
+		else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B)
+			card_drv_type = MMC_SET_DRIVER_TYPE_B;
+		else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
+			card_drv_type = MMC_SET_DRIVER_TYPE_C;
+	} else if (card->host->caps & MMC_CAP_DRIVER_TYPE_C) {
+		host_drv_type = MMC_SET_DRIVER_TYPE_C;
+		if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
+			card_drv_type = MMC_SET_DRIVER_TYPE_C;
+	} else if (!(card->host->caps & MMC_CAP_DRIVER_TYPE_D)) {
+		/*
+		 * If we are here, that means only the default driver type
+		 * B is supported by the host.
+		 */
+		host_drv_type = MMC_SET_DRIVER_TYPE_B;
+		if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B)
+			card_drv_type = MMC_SET_DRIVER_TYPE_B;
+		else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
+			card_drv_type = MMC_SET_DRIVER_TYPE_C;
+	}
 
-	err = mmc_sd_switch(card, 1, 2, drive_strength, status);
+	err = mmc_sd_switch(card, 1, 2, card_drv_type, status);
 	if (err)
 		return err;
 
-	if ((status[15] & 0xF) != drive_strength) {
-		printk(KERN_WARNING "%s: Problem setting drive strength!\n",
+	if ((status[15] & 0xF) != card_drv_type) {
+		printk(KERN_WARNING "%s: Problem setting driver strength!\n",
 			mmc_hostname(card->host));
 		return 0;
 	}
 
-	mmc_set_driver_type(card->host, drive_strength);
+	mmc_set_driver_type(card->host, host_drv_type);
 
 	return 0;
 }
diff --git a/trunk/drivers/mmc/core/sdio_bus.c b/trunk/drivers/mmc/core/sdio_bus.c
index e4e6822d09e3..d2565df8a7fb 100644
--- a/trunk/drivers/mmc/core/sdio_bus.c
+++ b/trunk/drivers/mmc/core/sdio_bus.c
@@ -167,8 +167,11 @@ static int sdio_bus_remove(struct device *dev)
 	int ret = 0;
 
 	/* Make sure card is powered before invoking ->remove() */
-	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
-		pm_runtime_get_sync(dev);
+	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) {
+		ret = pm_runtime_get_sync(dev);
+		if (ret < 0)
+			goto out;
+	}
 
 	drv->remove(func);
 
@@ -188,6 +191,7 @@ static int sdio_bus_remove(struct device *dev)
 	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
 		pm_runtime_put_sync(dev);
 
+out:
 	return ret;
 }
 
diff --git a/trunk/drivers/mmc/host/Kconfig b/trunk/drivers/mmc/host/Kconfig
index 8c87096531e9..56dbf3f6ad08 100644
--- a/trunk/drivers/mmc/host/Kconfig
+++ b/trunk/drivers/mmc/host/Kconfig
@@ -81,32 +81,28 @@ config MMC_RICOH_MMC
 
 	  If unsure, say Y.
 
-config MMC_SDHCI_PLTFM
-	tristate "SDHCI platform and OF driver helper"
-	depends on MMC_SDHCI
+config MMC_SDHCI_OF
+	tristate "SDHCI support on OpenFirmware platforms"
+	depends on MMC_SDHCI && OF
 	help
-	  This selects the common helper functions support for Secure Digital
-	  Host Controller Interface based platform and OF drivers.
-
-	  If you have a controller with this interface, say Y or M here.
+	  This selects the OF support for Secure Digital Host Controller
+	  Interfaces.
 
 	  If unsure, say N.
 
 config MMC_SDHCI_OF_ESDHC
-	tristate "SDHCI OF support for the Freescale eSDHC controller"
-	depends on MMC_SDHCI_PLTFM
+	bool "SDHCI OF support for the Freescale eSDHC controller"
+	depends on MMC_SDHCI_OF
 	depends on PPC_OF
 	select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
 	help
 	  This selects the Freescale eSDHC controller support.
 
-	  If you have a controller with this interface, say Y or M here.
-
 	  If unsure, say N.
 
 config MMC_SDHCI_OF_HLWD
-	tristate "SDHCI OF support for the Nintendo Wii SDHCI controllers"
-	depends on MMC_SDHCI_PLTFM
+	bool "SDHCI OF support for the Nintendo Wii SDHCI controllers"
+	depends on MMC_SDHCI_OF
 	depends on PPC_OF
 	select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
 	help
@@ -114,36 +110,40 @@ config MMC_SDHCI_OF_HLWD
 	  found in the "Hollywood" chipset of the Nintendo Wii video game
 	  console.
 
+	  If unsure, say N.
+
+config MMC_SDHCI_PLTFM
+	tristate "SDHCI support on the platform specific bus"
+	depends on MMC_SDHCI
+	help
+	  This selects the platform specific bus support for Secure Digital Host
+	  Controller Interface.
+
 	  If you have a controller with this interface, say Y or M here.
 
 	  If unsure, say N.
 
 config MMC_SDHCI_CNS3XXX
-	tristate "SDHCI support on the Cavium Networks CNS3xxx SoC"
+	bool "SDHCI support on the Cavium Networks CNS3xxx SoC"
 	depends on ARCH_CNS3XXX
 	depends on MMC_SDHCI_PLTFM
 	help
 	  This selects the SDHCI support for CNS3xxx System-on-Chip devices.
 
-	  If you have a controller with this interface, say Y or M here.
-
 	  If unsure, say N.
 
 config MMC_SDHCI_ESDHC_IMX
-	tristate "SDHCI platform support for the Freescale eSDHC i.MX controller"
-	depends on ARCH_MX25 || ARCH_MX35 || ARCH_MX5
-	depends on MMC_SDHCI_PLTFM
+	bool "SDHCI platform support for the Freescale eSDHC i.MX controller"
+	depends on MMC_SDHCI_PLTFM && (ARCH_MX25 || ARCH_MX35 || ARCH_MX5)
 	select MMC_SDHCI_IO_ACCESSORS
 	help
 	  This selects the Freescale eSDHC controller support on the platform
 	  bus, found on platforms like mx35/51.
 
-	  If you have a controller with this interface, say Y or M here.
-
 	  If unsure, say N.
 
 config MMC_SDHCI_DOVE
-	tristate "SDHCI support on Marvell's Dove SoC"
+	bool "SDHCI support on Marvell's Dove SoC"
 	depends on ARCH_DOVE
 	depends on MMC_SDHCI_PLTFM
 	select MMC_SDHCI_IO_ACCESSORS
@@ -151,14 +151,11 @@ config MMC_SDHCI_DOVE
 	  This selects the Secure Digital Host Controller Interface in
 	  Marvell's Dove SoC.
 
-	  If you have a controller with this interface, say Y or M here.
-
 	  If unsure, say N.
 
 config MMC_SDHCI_TEGRA
-	tristate "SDHCI platform support for the Tegra SD/MMC Controller"
-	depends on ARCH_TEGRA
-	depends on MMC_SDHCI_PLTFM
+	bool "SDHCI platform support for the Tegra SD/MMC Controller"
+	depends on MMC_SDHCI_PLTFM && ARCH_TEGRA
 	select MMC_SDHCI_IO_ACCESSORS
 	help
 	  This selects the Tegra SD/MMC controller. If you have a Tegra
@@ -181,28 +178,14 @@ config MMC_SDHCI_S3C
 
 	  If unsure, say N.
 
-config MMC_SDHCI_PXAV3
-	tristate "Marvell MMP2 SD Host Controller support (PXAV3)"
-	depends on CLKDEV_LOOKUP
+config MMC_SDHCI_PXA
+	tristate "Marvell PXA168/PXA910/MMP2 SD Host Controller support"
+	depends on ARCH_PXA || ARCH_MMP
 	select MMC_SDHCI
-	select MMC_SDHCI_PLTFM
-	default CPU_MMP2
-	help
-	  This selects the Marvell(R) PXAV3 SD Host Controller.
-	  If you have a MMP2 platform with SD Host Controller
-	  and a card slot, say Y or M here.
-
-	  If unsure, say N.
-
-config MMC_SDHCI_PXAV2
-	tristate "Marvell PXA9XX SD Host Controller support (PXAV2)"
-	depends on CLKDEV_LOOKUP
-	select MMC_SDHCI
-	select MMC_SDHCI_PLTFM
-	default CPU_PXA910
+	select MMC_SDHCI_IO_ACCESSORS
 	help
-	  This selects the Marvell(R) PXAV2 SD Host Controller.
-	  If you have a PXA9XX platform with SD Host Controller
+	  This selects the Marvell(R) PXA168/PXA910/MMP2 SD Host Controller.
+	  If you have a PXA168/PXA910/MMP2 platform with SD Host Controller
 	  and a card slot, say Y or M here.
 
 	  If unsure, say N.
@@ -298,12 +281,13 @@ config MMC_ATMELMCI
 endchoice
 
 config MMC_ATMELMCI_DMA
-	bool "Atmel MCI DMA support"
-	depends on MMC_ATMELMCI && (AVR32 || ARCH_AT91SAM9G45) && DMA_ENGINE
+	bool "Atmel MCI DMA support (EXPERIMENTAL)"
+	depends on MMC_ATMELMCI && (AVR32 || ARCH_AT91SAM9G45) && DMA_ENGINE && EXPERIMENTAL
 	help
 	  Say Y here to have the Atmel MCI driver use a DMA engine to
 	  do data transfers and thus increase the throughput and
-	  reduce the CPU utilization.
+	  reduce the CPU utilization. Note that this is highly
+	  experimental and may cause the driver to lock up.
 
 	  If unsure, say N.
 
diff --git a/trunk/drivers/mmc/host/Makefile b/trunk/drivers/mmc/host/Makefile
index b4b83f302e32..58a5cf73d6e9 100644
--- a/trunk/drivers/mmc/host/Makefile
+++ b/trunk/drivers/mmc/host/Makefile
@@ -9,8 +9,7 @@ obj-$(CONFIG_MMC_MXC)		+= mxcmmc.o
 obj-$(CONFIG_MMC_MXS)		+= mxs-mmc.o
 obj-$(CONFIG_MMC_SDHCI)		+= sdhci.o
 obj-$(CONFIG_MMC_SDHCI_PCI)	+= sdhci-pci.o
-obj-$(CONFIG_MMC_SDHCI_PXAV3)	+= sdhci-pxav3.o
-obj-$(CONFIG_MMC_SDHCI_PXAV2)	+= sdhci-pxav2.o
+obj-$(CONFIG_MMC_SDHCI_PXA)	+= sdhci-pxa.o
 obj-$(CONFIG_MMC_SDHCI_S3C)	+= sdhci-s3c.o
 obj-$(CONFIG_MMC_SDHCI_SPEAR)	+= sdhci-spear.o
 obj-$(CONFIG_MMC_WBSD)		+= wbsd.o
@@ -32,7 +31,9 @@ obj-$(CONFIG_MMC_SDRICOH_CS)	+= sdricoh_cs.o
 obj-$(CONFIG_MMC_TMIO)		+= tmio_mmc.o
 obj-$(CONFIG_MMC_TMIO_CORE)	+= tmio_mmc_core.o
 tmio_mmc_core-y			:= tmio_mmc_pio.o
-tmio_mmc_core-$(subst m,y,$(CONFIG_MMC_SDHI))	+= tmio_mmc_dma.o
+ifneq ($(CONFIG_MMC_SDHI),n)
+tmio_mmc_core-y			+= tmio_mmc_dma.o
+endif
 obj-$(CONFIG_MMC_SDHI)		+= sh_mobile_sdhi.o
 obj-$(CONFIG_MMC_CB710)		+= cb710-mmc.o
 obj-$(CONFIG_MMC_VIA_SDMMC)	+= via-sdmmc.o
@@ -43,13 +44,17 @@ obj-$(CONFIG_MMC_JZ4740)	+= jz4740_mmc.o
 obj-$(CONFIG_MMC_VUB300)	+= vub300.o
 obj-$(CONFIG_MMC_USHC)		+= ushc.o
 
-obj-$(CONFIG_MMC_SDHCI_PLTFM)		+= sdhci-pltfm.o
-obj-$(CONFIG_MMC_SDHCI_CNS3XXX)		+= sdhci-cns3xxx.o
-obj-$(CONFIG_MMC_SDHCI_ESDHC_IMX)	+= sdhci-esdhc-imx.o
-obj-$(CONFIG_MMC_SDHCI_DOVE)		+= sdhci-dove.o
-obj-$(CONFIG_MMC_SDHCI_TEGRA)		+= sdhci-tegra.o
-obj-$(CONFIG_MMC_SDHCI_OF_ESDHC)	+= sdhci-of-esdhc.o
-obj-$(CONFIG_MMC_SDHCI_OF_HLWD)		+= sdhci-of-hlwd.o
+obj-$(CONFIG_MMC_SDHCI_PLTFM)			+= sdhci-platform.o
+sdhci-platform-y				:= sdhci-pltfm.o
+sdhci-platform-$(CONFIG_MMC_SDHCI_CNS3XXX)	+= sdhci-cns3xxx.o
+sdhci-platform-$(CONFIG_MMC_SDHCI_ESDHC_IMX)	+= sdhci-esdhc-imx.o
+sdhci-platform-$(CONFIG_MMC_SDHCI_DOVE)		+= sdhci-dove.o
+sdhci-platform-$(CONFIG_MMC_SDHCI_TEGRA)	+= sdhci-tegra.o
+
+obj-$(CONFIG_MMC_SDHCI_OF)	+= sdhci-of.o
+sdhci-of-y				:= sdhci-of-core.o
+sdhci-of-$(CONFIG_MMC_SDHCI_OF_ESDHC)	+= sdhci-of-esdhc.o
+sdhci-of-$(CONFIG_MMC_SDHCI_OF_HLWD)	+= sdhci-of-hlwd.o
 
 ifeq ($(CONFIG_CB710_DEBUG),y)
 	CFLAGS-cb710-mmc	+= -DDEBUG
diff --git a/trunk/drivers/mmc/host/at91_mci.c b/trunk/drivers/mmc/host/at91_mci.c
index a4aa3af86fed..d3e6a962f423 100644
--- a/trunk/drivers/mmc/host/at91_mci.c
+++ b/trunk/drivers/mmc/host/at91_mci.c
@@ -77,8 +77,7 @@
 
 #include <mach/board.h>
 #include <mach/cpu.h>
-
-#include "at91_mci.h"
+#include <mach/at91_mci.h>
 
 #define DRIVER_NAME "at91_mci"
 
diff --git a/trunk/drivers/mmc/host/atmel-mci.c b/trunk/drivers/mmc/host/atmel-mci.c
index fa8cae1d7005..aa8039f473c4 100644
--- a/trunk/drivers/mmc/host/atmel-mci.c
+++ b/trunk/drivers/mmc/host/atmel-mci.c
@@ -203,7 +203,6 @@ struct atmel_mci_slot {
 #define ATMCI_CARD_PRESENT	0
 #define ATMCI_CARD_NEED_INIT	1
 #define ATMCI_SHUTDOWN		2
-#define ATMCI_SUSPENDED		3
 
 	int			detect_pin;
 	int			wp_pin;
@@ -1879,72 +1878,10 @@ static int __exit atmci_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int atmci_suspend(struct device *dev)
-{
-	struct atmel_mci *host = dev_get_drvdata(dev);
-	int i;
-
-	 for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
-		struct atmel_mci_slot *slot = host->slot[i];
-		int ret;
-
-		if (!slot)
-			continue;
-		ret = mmc_suspend_host(slot->mmc);
-		if (ret < 0) {
-			while (--i >= 0) {
-				slot = host->slot[i];
-				if (slot
-				&& test_bit(ATMCI_SUSPENDED, &slot->flags)) {
-					mmc_resume_host(host->slot[i]->mmc);
-					clear_bit(ATMCI_SUSPENDED, &slot->flags);
-				}
-			}
-			return ret;
-		} else {
-			set_bit(ATMCI_SUSPENDED, &slot->flags);
-		}
-	}
-
-	return 0;
-}
-
-static int atmci_resume(struct device *dev)
-{
-	struct atmel_mci *host = dev_get_drvdata(dev);
-	int i;
-	int ret = 0;
-
-	for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
-		struct atmel_mci_slot *slot = host->slot[i];
-		int err;
-
-		slot = host->slot[i];
-		if (!slot)
-			continue;
-		if (!test_bit(ATMCI_SUSPENDED, &slot->flags))
-			continue;
-		err = mmc_resume_host(slot->mmc);
-		if (err < 0)
-			ret = err;
-		else
-			clear_bit(ATMCI_SUSPENDED, &slot->flags);
-	}
-
-	return ret;
-}
-static SIMPLE_DEV_PM_OPS(atmci_pm, atmci_suspend, atmci_resume);
-#define ATMCI_PM_OPS	(&atmci_pm)
-#else
-#define ATMCI_PM_OPS	NULL
-#endif
-
 static struct platform_driver atmci_driver = {
 	.remove		= __exit_p(atmci_remove),
 	.driver		= {
 		.name		= "atmel_mci",
-		.pm		= ATMCI_PM_OPS,
 	},
 };
 
diff --git a/trunk/drivers/mmc/host/dw_mmc.c b/trunk/drivers/mmc/host/dw_mmc.c
index 0c839d3338db..66dcddb9c205 100644
--- a/trunk/drivers/mmc/host/dw_mmc.c
+++ b/trunk/drivers/mmc/host/dw_mmc.c
@@ -33,7 +33,6 @@
 #include <linux/mmc/dw_mmc.h>
 #include <linux/bitops.h>
 #include <linux/regulator/consumer.h>
-#include <linux/workqueue.h>
 
 #include "dw_mmc.h"
 
@@ -101,8 +100,6 @@ struct dw_mci_slot {
 	int			last_detect_state;
 };
 
-static struct workqueue_struct *dw_mci_card_workqueue;
-
 #if defined(CONFIG_DEBUG_FS)
 static int dw_mci_req_show(struct seq_file *s, void *v)
 {
@@ -287,7 +284,7 @@ static void send_stop_cmd(struct dw_mci *host, struct mmc_data *data)
 /* DMA interface functions */
 static void dw_mci_stop_dma(struct dw_mci *host)
 {
-	if (host->using_dma) {
+	if (host->use_dma) {
 		host->dma_ops->stop(host);
 		host->dma_ops->cleanup(host);
 	} else {
@@ -435,8 +432,6 @@ static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data)
 	unsigned int i, direction, sg_len;
 	u32 temp;
 
-	host->using_dma = 0;
-
 	/* If we don't have a channel, we can't do DMA */
 	if (!host->use_dma)
 		return -ENODEV;
@@ -456,8 +451,6 @@ static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data)
 			return -EINVAL;
 	}
 
-	host->using_dma = 1;
-
 	if (data->flags & MMC_DATA_READ)
 		direction = DMA_FROM_DEVICE;
 	else
@@ -496,18 +489,14 @@ static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data)
 	host->sg = NULL;
 	host->data = data;
 
-	if (data->flags & MMC_DATA_READ)
-		host->dir_status = DW_MCI_RECV_STATUS;
-	else
-		host->dir_status = DW_MCI_SEND_STATUS;
-
 	if (dw_mci_submit_data_dma(host, data)) {
 		host->sg = data->sg;
 		host->pio_offset = 0;
-		host->part_buf_start = 0;
-		host->part_buf_count = 0;
+		if (data->flags & MMC_DATA_READ)
+			host->dir_status = DW_MCI_RECV_STATUS;
+		else
+			host->dir_status = DW_MCI_SEND_STATUS;
 
-		mci_writel(host, RINTSTS, SDMMC_INT_TXDR | SDMMC_INT_RXDR);
 		temp = mci_readl(host, INTMASK);
 		temp |= SDMMC_INT_TXDR | SDMMC_INT_RXDR;
 		mci_writel(host, INTMASK, temp);
@@ -585,7 +574,7 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot)
 	}
 
 	/* Set the current slot bus width */
-	mci_writel(host, CTYPE, (slot->ctype << slot->id));
+	mci_writel(host, CTYPE, slot->ctype);
 }
 
 static void dw_mci_start_request(struct dw_mci *host,
@@ -635,13 +624,13 @@ static void dw_mci_start_request(struct dw_mci *host,
 		host->stop_cmdr = dw_mci_prepare_command(slot->mmc, mrq->stop);
 }
 
-/* must be called with host->lock held */
 static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot,
 				 struct mmc_request *mrq)
 {
 	dev_vdbg(&slot->mmc->class_dev, "queue request: state=%d\n",
 		 host->state);
 
+	spin_lock_bh(&host->lock);
 	slot->mrq = mrq;
 
 	if (host->state == STATE_IDLE) {
@@ -650,6 +639,8 @@ static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot,
 	} else {
 		list_add_tail(&slot->queue_node, &host->queue);
 	}
+
+	spin_unlock_bh(&host->lock);
 }
 
 static void dw_mci_request(struct mmc_host *mmc, struct mmc_request *mrq)
@@ -659,23 +650,14 @@ static void dw_mci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 
 	WARN_ON(slot->mrq);
 
-	/*
-	 * The check for card presence and queueing of the request must be
-	 * atomic, otherwise the card could be removed in between and the
-	 * request wouldn't fail until another card was inserted.
-	 */
-	spin_lock_bh(&host->lock);
-
 	if (!test_bit(DW_MMC_CARD_PRESENT, &slot->flags)) {
-		spin_unlock_bh(&host->lock);
 		mrq->cmd->error = -ENOMEDIUM;
 		mmc_request_done(mmc, mrq);
 		return;
 	}
 
+	/* We don't support multiple blocks of weird lengths. */
 	dw_mci_queue_request(host, slot, mrq);
-
-	spin_unlock_bh(&host->lock);
 }
 
 static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
@@ -849,7 +831,7 @@ static void dw_mci_tasklet_func(unsigned long priv)
 	struct mmc_command *cmd;
 	enum dw_mci_state state;
 	enum dw_mci_state prev_state;
-	u32 status, ctrl;
+	u32 status;
 
 	spin_lock(&host->lock);
 
@@ -909,19 +891,13 @@ static void dw_mci_tasklet_func(unsigned long priv)
 
 			if (status & DW_MCI_DATA_ERROR_FLAGS) {
 				if (status & SDMMC_INT_DTO) {
+					dev_err(&host->pdev->dev,
+						"data timeout error\n");
 					data->error = -ETIMEDOUT;
 				} else if (status & SDMMC_INT_DCRC) {
+					dev_err(&host->pdev->dev,
+						"data CRC error\n");
 					data->error = -EILSEQ;
-				} else if (status & SDMMC_INT_EBE &&
-					   host->dir_status ==
-							DW_MCI_SEND_STATUS) {
-					/*
-					 * No data CRC status was returned.
-					 * The number of bytes transferred will
-					 * be exaggerated in PIO mode.
-					 */
-					data->bytes_xfered = 0;
-					data->error = -ETIMEDOUT;
 				} else {
 					dev_err(&host->pdev->dev,
 						"data FIFO error "
@@ -929,16 +905,6 @@ static void dw_mci_tasklet_func(unsigned long priv)
 						status);
 					data->error = -EIO;
 				}
-				/*
-				 * After an error, there may be data lingering
-				 * in the FIFO, so reset it - doing so
-				 * generates a block interrupt, hence setting
-				 * the scatter-gather pointer to NULL.
-				 */
-				host->sg = NULL;
-				ctrl = mci_readl(host, CTRL);
-				ctrl |= SDMMC_CTRL_FIFO_RESET;
-				mci_writel(host, CTRL, ctrl);
 			} else {
 				data->bytes_xfered = data->blocks * data->blksz;
 				data->error = 0;
@@ -980,278 +946,84 @@ static void dw_mci_tasklet_func(unsigned long priv)
 
 }
 
-/* push final bytes to part_buf, only use during push */
-static void dw_mci_set_part_bytes(struct dw_mci *host, void *buf, int cnt)
+static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt)
 {
-	memcpy((void *)&host->part_buf, buf, cnt);
-	host->part_buf_count = cnt;
-}
+	u16 *pdata = (u16 *)buf;
 
-/* append bytes to part_buf, only use during push */
-static int dw_mci_push_part_bytes(struct dw_mci *host, void *buf, int cnt)
-{
-	cnt = min(cnt, (1 << host->data_shift) - host->part_buf_count);
-	memcpy((void *)&host->part_buf + host->part_buf_count, buf, cnt);
-	host->part_buf_count += cnt;
-	return cnt;
-}
+	WARN_ON(cnt % 2 != 0);
 
-/* pull first bytes from part_buf, only use during pull */
-static int dw_mci_pull_part_bytes(struct dw_mci *host, void *buf, int cnt)
-{
-	cnt = min(cnt, (int)host->part_buf_count);
-	if (cnt) {
-		memcpy(buf, (void *)&host->part_buf + host->part_buf_start,
-		       cnt);
-		host->part_buf_count -= cnt;
-		host->part_buf_start += cnt;
+	cnt = cnt >> 1;
+	while (cnt > 0) {
+		mci_writew(host, DATA, *pdata++);
+		cnt--;
 	}
-	return cnt;
 }
 
-/* pull final bytes from the part_buf, assuming it's just been filled */
-static void dw_mci_pull_final_bytes(struct dw_mci *host, void *buf, int cnt)
+static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt)
 {
-	memcpy(buf, &host->part_buf, cnt);
-	host->part_buf_start = cnt;
-	host->part_buf_count = (1 << host->data_shift) - cnt;
-}
+	u16 *pdata = (u16 *)buf;
 
-static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt)
-{
-	/* try and push anything in the part_buf */
-	if (unlikely(host->part_buf_count)) {
-		int len = dw_mci_push_part_bytes(host, buf, cnt);
-		buf += len;
-		cnt -= len;
-		if (!sg_next(host->sg) || host->part_buf_count == 2) {
-			mci_writew(host, DATA, host->part_buf16);
-			host->part_buf_count = 0;
-		}
-	}
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-	if (unlikely((unsigned long)buf & 0x1)) {
-		while (cnt >= 2) {
-			u16 aligned_buf[64];
-			int len = min(cnt & -2, (int)sizeof(aligned_buf));
-			int items = len >> 1;
-			int i;
-			/* memcpy from input buffer into aligned buffer */
-			memcpy(aligned_buf, buf, len);
-			buf += len;
-			cnt -= len;
-			/* push data from aligned buffer into fifo */
-			for (i = 0; i < items; ++i)
-				mci_writew(host, DATA, aligned_buf[i]);
-		}
-	} else
-#endif
-	{
-		u16 *pdata = buf;
-		for (; cnt >= 2; cnt -= 2)
-			mci_writew(host, DATA, *pdata++);
-		buf = pdata;
-	}
-	/* put anything remaining in the part_buf */
-	if (cnt) {
-		dw_mci_set_part_bytes(host, buf, cnt);
-		if (!sg_next(host->sg))
-			mci_writew(host, DATA, host->part_buf16);
-	}
-}
+	WARN_ON(cnt % 2 != 0);
 
-static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt)
-{
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-	if (unlikely((unsigned long)buf & 0x1)) {
-		while (cnt >= 2) {
-			/* pull data from fifo into aligned buffer */
-			u16 aligned_buf[64];
-			int len = min(cnt & -2, (int)sizeof(aligned_buf));
-			int items = len >> 1;
-			int i;
-			for (i = 0; i < items; ++i)
-				aligned_buf[i] = mci_readw(host, DATA);
-			/* memcpy from aligned buffer into output buffer */
-			memcpy(buf, aligned_buf, len);
-			buf += len;
-			cnt -= len;
-		}
-	} else
-#endif
-	{
-		u16 *pdata = buf;
-		for (; cnt >= 2; cnt -= 2)
-			*pdata++ = mci_readw(host, DATA);
-		buf = pdata;
-	}
-	if (cnt) {
-		host->part_buf16 = mci_readw(host, DATA);
-		dw_mci_pull_final_bytes(host, buf, cnt);
+	cnt = cnt >> 1;
+	while (cnt > 0) {
+		*pdata++ = mci_readw(host, DATA);
+		cnt--;
 	}
 }
 
 static void dw_mci_push_data32(struct dw_mci *host, void *buf, int cnt)
 {
-	/* try and push anything in the part_buf */
-	if (unlikely(host->part_buf_count)) {
-		int len = dw_mci_push_part_bytes(host, buf, cnt);
-		buf += len;
-		cnt -= len;
-		if (!sg_next(host->sg) || host->part_buf_count == 4) {
-			mci_writel(host, DATA, host->part_buf32);
-			host->part_buf_count = 0;
-		}
-	}
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-	if (unlikely((unsigned long)buf & 0x3)) {
-		while (cnt >= 4) {
-			u32 aligned_buf[32];
-			int len = min(cnt & -4, (int)sizeof(aligned_buf));
-			int items = len >> 2;
-			int i;
-			/* memcpy from input buffer into aligned buffer */
-			memcpy(aligned_buf, buf, len);
-			buf += len;
-			cnt -= len;
-			/* push data from aligned buffer into fifo */
-			for (i = 0; i < items; ++i)
-				mci_writel(host, DATA, aligned_buf[i]);
-		}
-	} else
-#endif
-	{
-		u32 *pdata = buf;
-		for (; cnt >= 4; cnt -= 4)
-			mci_writel(host, DATA, *pdata++);
-		buf = pdata;
-	}
-	/* put anything remaining in the part_buf */
-	if (cnt) {
-		dw_mci_set_part_bytes(host, buf, cnt);
-		if (!sg_next(host->sg))
-			mci_writel(host, DATA, host->part_buf32);
+	u32 *pdata = (u32 *)buf;
+
+	WARN_ON(cnt % 4 != 0);
+	WARN_ON((unsigned long)pdata & 0x3);
+
+	cnt = cnt >> 2;
+	while (cnt > 0) {
+		mci_writel(host, DATA, *pdata++);
+		cnt--;
 	}
 }
 
 static void dw_mci_pull_data32(struct dw_mci *host, void *buf, int cnt)
 {
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-	if (unlikely((unsigned long)buf & 0x3)) {
-		while (cnt >= 4) {
-			/* pull data from fifo into aligned buffer */
-			u32 aligned_buf[32];
-			int len = min(cnt & -4, (int)sizeof(aligned_buf));
-			int items = len >> 2;
-			int i;
-			for (i = 0; i < items; ++i)
-				aligned_buf[i] = mci_readl(host, DATA);
-			/* memcpy from aligned buffer into output buffer */
-			memcpy(buf, aligned_buf, len);
-			buf += len;
-			cnt -= len;
-		}
-	} else
-#endif
-	{
-		u32 *pdata = buf;
-		for (; cnt >= 4; cnt -= 4)
-			*pdata++ = mci_readl(host, DATA);
-		buf = pdata;
-	}
-	if (cnt) {
-		host->part_buf32 = mci_readl(host, DATA);
-		dw_mci_pull_final_bytes(host, buf, cnt);
+	u32 *pdata = (u32 *)buf;
+
+	WARN_ON(cnt % 4 != 0);
+	WARN_ON((unsigned long)pdata & 0x3);
+
+	cnt = cnt >> 2;
+	while (cnt > 0) {
+		*pdata++ = mci_readl(host, DATA);
+		cnt--;
 	}
 }
 
 static void dw_mci_push_data64(struct dw_mci *host, void *buf, int cnt)
 {
-	/* try and push anything in the part_buf */
-	if (unlikely(host->part_buf_count)) {
-		int len = dw_mci_push_part_bytes(host, buf, cnt);
-		buf += len;
-		cnt -= len;
-		if (!sg_next(host->sg) || host->part_buf_count == 8) {
-			mci_writew(host, DATA, host->part_buf);
-			host->part_buf_count = 0;
-		}
-	}
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-	if (unlikely((unsigned long)buf & 0x7)) {
-		while (cnt >= 8) {
-			u64 aligned_buf[16];
-			int len = min(cnt & -8, (int)sizeof(aligned_buf));
-			int items = len >> 3;
-			int i;
-			/* memcpy from input buffer into aligned buffer */
-			memcpy(aligned_buf, buf, len);
-			buf += len;
-			cnt -= len;
-			/* push data from aligned buffer into fifo */
-			for (i = 0; i < items; ++i)
-				mci_writeq(host, DATA, aligned_buf[i]);
-		}
-	} else
-#endif
-	{
-		u64 *pdata = buf;
-		for (; cnt >= 8; cnt -= 8)
-			mci_writeq(host, DATA, *pdata++);
-		buf = pdata;
-	}
-	/* put anything remaining in the part_buf */
-	if (cnt) {
-		dw_mci_set_part_bytes(host, buf, cnt);
-		if (!sg_next(host->sg))
-			mci_writeq(host, DATA, host->part_buf);
-	}
-}
+	u64 *pdata = (u64 *)buf;
 
-static void dw_mci_pull_data64(struct dw_mci *host, void *buf, int cnt)
-{
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-	if (unlikely((unsigned long)buf & 0x7)) {
-		while (cnt >= 8) {
-			/* pull data from fifo into aligned buffer */
-			u64 aligned_buf[16];
-			int len = min(cnt & -8, (int)sizeof(aligned_buf));
-			int items = len >> 3;
-			int i;
-			for (i = 0; i < items; ++i)
-				aligned_buf[i] = mci_readq(host, DATA);
-			/* memcpy from aligned buffer into output buffer */
-			memcpy(buf, aligned_buf, len);
-			buf += len;
-			cnt -= len;
-		}
-	} else
-#endif
-	{
-		u64 *pdata = buf;
-		for (; cnt >= 8; cnt -= 8)
-			*pdata++ = mci_readq(host, DATA);
-		buf = pdata;
-	}
-	if (cnt) {
-		host->part_buf = mci_readq(host, DATA);
-		dw_mci_pull_final_bytes(host, buf, cnt);
+	WARN_ON(cnt % 8 != 0);
+
+	cnt = cnt >> 3;
+	while (cnt > 0) {
+		mci_writeq(host, DATA, *pdata++);
+		cnt--;
 	}
 }
 
-static void dw_mci_pull_data(struct dw_mci *host, void *buf, int cnt)
+static void dw_mci_pull_data64(struct dw_mci *host, void *buf, int cnt)
 {
-	int len;
+	u64 *pdata = (u64 *)buf;
 
-	/* get remaining partial bytes */
-	len = dw_mci_pull_part_bytes(host, buf, cnt);
-	if (unlikely(len == cnt))
-		return;
-	buf += len;
-	cnt -= len;
+	WARN_ON(cnt % 8 != 0);
 
-	/* get the rest of the data */
-	host->pull_data(host, buf, cnt);
+	cnt = cnt >> 3;
+	while (cnt > 0) {
+		*pdata++ = mci_readq(host, DATA);
+		cnt--;
+	}
 }
 
 static void dw_mci_read_data_pio(struct dw_mci *host)
@@ -1265,10 +1037,9 @@ static void dw_mci_read_data_pio(struct dw_mci *host)
 	unsigned int nbytes = 0, len;
 
 	do {
-		len = host->part_buf_count +
-			(SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift);
+		len = SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift;
 		if (offset + len <= sg->length) {
-			dw_mci_pull_data(host, (void *)(buf + offset), len);
+			host->pull_data(host, (void *)(buf + offset), len);
 
 			offset += len;
 			nbytes += len;
@@ -1284,8 +1055,8 @@ static void dw_mci_read_data_pio(struct dw_mci *host)
 			}
 		} else {
 			unsigned int remaining = sg->length - offset;
-			dw_mci_pull_data(host, (void *)(buf + offset),
-					 remaining);
+			host->pull_data(host, (void *)(buf + offset),
+					remaining);
 			nbytes += remaining;
 
 			flush_dcache_page(sg_page(sg));
@@ -1295,7 +1066,7 @@ static void dw_mci_read_data_pio(struct dw_mci *host)
 
 			offset = len - remaining;
 			buf = sg_virt(sg);
-			dw_mci_pull_data(host, buf, offset);
+			host->pull_data(host, buf, offset);
 			nbytes += offset;
 		}
 
@@ -1312,6 +1083,7 @@ static void dw_mci_read_data_pio(struct dw_mci *host)
 			return;
 		}
 	} while (status & SDMMC_INT_RXDR); /*if the RXDR is ready read again*/
+	len = SDMMC_GET_FCNT(mci_readl(host, STATUS));
 	host->pio_offset = offset;
 	data->bytes_xfered += nbytes;
 	return;
@@ -1333,9 +1105,8 @@ static void dw_mci_write_data_pio(struct dw_mci *host)
 	unsigned int nbytes = 0, len;
 
 	do {
-		len = ((host->fifo_depth -
-			SDMMC_GET_FCNT(mci_readl(host, STATUS))) << shift)
-			- host->part_buf_count;
+		len = SDMMC_FIFO_SZ -
+			(SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift);
 		if (offset + len <= sg->length) {
 			host->push_data(host, (void *)(buf + offset), len);
 
@@ -1380,8 +1151,10 @@ static void dw_mci_write_data_pio(struct dw_mci *host)
 			return;
 		}
 	} while (status & SDMMC_INT_TXDR); /* if TXDR write again */
+
 	host->pio_offset = offset;
 	data->bytes_xfered += nbytes;
+
 	return;
 
 done:
@@ -1429,6 +1202,7 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 			host->cmd_status = status;
 			smp_wmb();
 			set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
+			tasklet_schedule(&host->tasklet);
 		}
 
 		if (pending & DW_MCI_DATA_ERROR_FLAGS) {
@@ -1437,9 +1211,7 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 			host->data_status = status;
 			smp_wmb();
 			set_bit(EVENT_DATA_ERROR, &host->pending_events);
-			if (!(pending & (SDMMC_INT_DTO | SDMMC_INT_DCRC |
-					 SDMMC_INT_SBE | SDMMC_INT_EBE)))
-				tasklet_schedule(&host->tasklet);
+			tasklet_schedule(&host->tasklet);
 		}
 
 		if (pending & SDMMC_INT_DATA_OVER) {
@@ -1457,13 +1229,13 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 
 		if (pending & SDMMC_INT_RXDR) {
 			mci_writel(host, RINTSTS, SDMMC_INT_RXDR);
-			if (host->dir_status == DW_MCI_RECV_STATUS && host->sg)
+			if (host->sg)
 				dw_mci_read_data_pio(host);
 		}
 
 		if (pending & SDMMC_INT_TXDR) {
 			mci_writel(host, RINTSTS, SDMMC_INT_TXDR);
-			if (host->dir_status == DW_MCI_SEND_STATUS && host->sg)
+			if (host->sg)
 				dw_mci_write_data_pio(host);
 		}
 
@@ -1474,7 +1246,7 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 
 		if (pending & SDMMC_INT_CD) {
 			mci_writel(host, RINTSTS, SDMMC_INT_CD);
-			queue_work(dw_mci_card_workqueue, &host->card_work);
+			tasklet_schedule(&host->card_tasklet);
 		}
 
 	} while (pass_count++ < 5);
@@ -1493,9 +1265,9 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static void dw_mci_work_routine_card(struct work_struct *work)
+static void dw_mci_tasklet_card(unsigned long data)
 {
-	struct dw_mci *host = container_of(work, struct dw_mci, card_work);
+	struct dw_mci *host = (struct dw_mci *)data;
 	int i;
 
 	for (i = 0; i < host->num_slots; i++) {
@@ -1507,21 +1279,22 @@ static void dw_mci_work_routine_card(struct work_struct *work)
 
 		present = dw_mci_get_cd(mmc);
 		while (present != slot->last_detect_state) {
+			spin_lock(&host->lock);
+
 			dev_dbg(&slot->mmc->class_dev, "card %s\n",
 				present ? "inserted" : "removed");
 
-			/* Power up slot (before spin_lock, may sleep) */
-			if (present != 0 && host->pdata->setpower)
-				host->pdata->setpower(slot->id, mmc->ocr_avail);
-
-			spin_lock_bh(&host->lock);
-
 			/* Card change detected */
 			slot->last_detect_state = present;
 
-			/* Mark card as present if applicable */
-			if (present != 0)
+			/* Power up slot */
+			if (present != 0) {
+				if (host->pdata->setpower)
+					host->pdata->setpower(slot->id,
+							      mmc->ocr_avail);
+
 				set_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+			}
 
 			/* Clean up queue if present */
 			mrq = slot->mrq;
@@ -1571,6 +1344,8 @@ static void dw_mci_work_routine_card(struct work_struct *work)
 
 			/* Power down slot */
 			if (present == 0) {
+				if (host->pdata->setpower)
+					host->pdata->setpower(slot->id, 0);
 				clear_bit(DW_MMC_CARD_PRESENT, &slot->flags);
 
 				/*
@@ -1592,12 +1367,7 @@ static void dw_mci_work_routine_card(struct work_struct *work)
 
 			}
 
-			spin_unlock_bh(&host->lock);
-
-			/* Power down slot (after spin_unlock, may sleep) */
-			if (present == 0 && host->pdata->setpower)
-				host->pdata->setpower(slot->id, 0);
-
+			spin_unlock(&host->lock);
 			present = dw_mci_get_cd(mmc);
 		}
 
@@ -1697,7 +1467,7 @@ static int __init dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 	 * Card may have been plugged in prior to boot so we
 	 * need to run the detect tasklet
 	 */
-	queue_work(dw_mci_card_workqueue, &host->card_work);
+	tasklet_schedule(&host->card_tasklet);
 
 	return 0;
 }
@@ -1875,19 +1645,8 @@ static int dw_mci_probe(struct platform_device *pdev)
 	 * FIFO threshold settings  RxMark  = fifo_size / 2 - 1,
 	 *                          Tx Mark = fifo_size / 2 DMA Size = 8
 	 */
-	if (!host->pdata->fifo_depth) {
-		/*
-		 * Power-on value of RX_WMark is FIFO_DEPTH-1, but this may
-		 * have been overwritten by the bootloader, just like we're
-		 * about to do, so if you know the value for your hardware, you
-		 * should put it in the platform data.
-		 */
-		fifo_size = mci_readl(host, FIFOTH);
-		fifo_size = 1 + ((fifo_size >> 16) & 0x7ff);
-	} else {
-		fifo_size = host->pdata->fifo_depth;
-	}
-	host->fifo_depth = fifo_size;
+	fifo_size = mci_readl(host, FIFOTH);
+	fifo_size = (fifo_size >> 16) & 0x7ff;
 	host->fifoth_val = ((0x2 << 28) | ((fifo_size/2 - 1) << 16) |
 			((fifo_size/2) << 0));
 	mci_writel(host, FIFOTH, host->fifoth_val);
@@ -1897,15 +1656,12 @@ static int dw_mci_probe(struct platform_device *pdev)
 	mci_writel(host, CLKSRC, 0);
 
 	tasklet_init(&host->tasklet, dw_mci_tasklet_func, (unsigned long)host);
-	dw_mci_card_workqueue = alloc_workqueue("dw-mci-card",
-			WQ_MEM_RECLAIM | WQ_NON_REENTRANT, 1);
-	if (!dw_mci_card_workqueue)
-		goto err_dmaunmap;
-	INIT_WORK(&host->card_work, dw_mci_work_routine_card);
+	tasklet_init(&host->card_tasklet,
+		     dw_mci_tasklet_card, (unsigned long)host);
 
 	ret = request_irq(irq, dw_mci_interrupt, 0, "dw-mci", host);
 	if (ret)
-		goto err_workqueue;
+		goto err_dmaunmap;
 
 	platform_set_drvdata(pdev, host);
 
@@ -1934,9 +1690,7 @@ static int dw_mci_probe(struct platform_device *pdev)
 	mci_writel(host, CTRL, SDMMC_CTRL_INT_ENABLE); /* Enable mci interrupt */
 
 	dev_info(&pdev->dev, "DW MMC controller at irq %d, "
-		 "%d bit host data width, "
-		 "%u deep fifo\n",
-		 irq, width, fifo_size);
+		 "%d bit host data width\n", irq, width);
 	if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO)
 		dev_info(&pdev->dev, "Internal DMAC interrupt fix enabled.\n");
 
@@ -1951,9 +1705,6 @@ static int dw_mci_probe(struct platform_device *pdev)
 	}
 	free_irq(irq, host);
 
-err_workqueue:
-	destroy_workqueue(dw_mci_card_workqueue);
-
 err_dmaunmap:
 	if (host->use_dma && host->dma_ops->exit)
 		host->dma_ops->exit(host);
@@ -1993,7 +1744,6 @@ static int __exit dw_mci_remove(struct platform_device *pdev)
 	mci_writel(host, CLKSRC, 0);
 
 	free_irq(platform_get_irq(pdev, 0), host);
-	destroy_workqueue(dw_mci_card_workqueue);
 	dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
 
 	if (host->use_dma && host->dma_ops->exit)
diff --git a/trunk/drivers/mmc/host/dw_mmc.h b/trunk/drivers/mmc/host/dw_mmc.h
index 027d37735394..23c662af5616 100644
--- a/trunk/drivers/mmc/host/dw_mmc.h
+++ b/trunk/drivers/mmc/host/dw_mmc.h
@@ -118,6 +118,7 @@
 #define SDMMC_CMD_INDX(n)		((n) & 0x1F)
 /* Status register defines */
 #define SDMMC_GET_FCNT(x)		(((x)>>17) & 0x1FF)
+#define SDMMC_FIFO_SZ			32
 /* Internal DMAC interrupt defines */
 #define SDMMC_IDMAC_INT_AI		BIT(9)
 #define SDMMC_IDMAC_INT_NI		BIT(8)
@@ -133,22 +134,22 @@
 
 /* Register access macros */
 #define mci_readl(dev, reg)			\
-	__raw_readl((dev)->regs + SDMMC_##reg)
+	__raw_readl(dev->regs + SDMMC_##reg)
 #define mci_writel(dev, reg, value)			\
-	__raw_writel((value), (dev)->regs + SDMMC_##reg)
+	__raw_writel((value), dev->regs + SDMMC_##reg)
 
 /* 16-bit FIFO access macros */
 #define mci_readw(dev, reg)			\
-	__raw_readw((dev)->regs + SDMMC_##reg)
+	__raw_readw(dev->regs + SDMMC_##reg)
 #define mci_writew(dev, reg, value)			\
-	__raw_writew((value), (dev)->regs + SDMMC_##reg)
+	__raw_writew((value), dev->regs + SDMMC_##reg)
 
 /* 64-bit FIFO access macros */
 #ifdef readq
 #define mci_readq(dev, reg)			\
-	__raw_readq((dev)->regs + SDMMC_##reg)
+	__raw_readq(dev->regs + SDMMC_##reg)
 #define mci_writeq(dev, reg, value)			\
-	__raw_writeq((value), (dev)->regs + SDMMC_##reg)
+	__raw_writeq((value), dev->regs + SDMMC_##reg)
 #else
 /*
  * Dummy readq implementation for architectures that don't define it.
@@ -159,9 +160,9 @@
  * rest of the code free from ifdefs.
  */
 #define mci_readq(dev, reg)			\
-	(*(volatile u64 __force *)((dev)->regs + SDMMC_##reg))
+	(*(volatile u64 __force *)(dev->regs + SDMMC_##reg))
 #define mci_writeq(dev, reg, value)			\
-	(*(volatile u64 __force *)((dev)->regs + SDMMC_##reg) = (value))
+	(*(volatile u64 __force *)(dev->regs + SDMMC_##reg) = value)
 #endif
 
 #endif /* _DW_MMC_H_ */
diff --git a/trunk/drivers/mmc/host/mmci.c b/trunk/drivers/mmc/host/mmci.c
index fef7140eb1d0..fe140724a02e 100644
--- a/trunk/drivers/mmc/host/mmci.c
+++ b/trunk/drivers/mmc/host/mmci.c
@@ -226,9 +226,6 @@ static void __devinit mmci_dma_setup(struct mmci_host *host)
 		return;
 	}
 
-	/* initialize pre request cookie */
-	host->next_data.cookie = 1;
-
 	/* Try to acquire a generic DMA engine slave channel */
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
@@ -338,8 +335,7 @@ static void mmci_dma_unmap(struct mmci_host *host, struct mmc_data *data)
 		dir = DMA_FROM_DEVICE;
 	}
 
-	if (!data->host_cookie)
-		dma_unmap_sg(chan->device->dev, data->sg, data->sg_len, dir);
+	dma_unmap_sg(chan->device->dev, data->sg, data->sg_len, dir);
 
 	/*
 	 * Use of DMA with scatter-gather is impossible.
@@ -357,8 +353,7 @@ static void mmci_dma_data_error(struct mmci_host *host)
 	dmaengine_terminate_all(host->dma_current);
 }
 
-static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data,
-			      struct mmci_host_next *next)
+static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl)
 {
 	struct variant_data *variant = host->variant;
 	struct dma_slave_config conf = {
@@ -369,20 +364,13 @@ static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data,
 		.src_maxburst = variant->fifohalfsize >> 2, /* # of words */
 		.dst_maxburst = variant->fifohalfsize >> 2, /* # of words */
 	};
+	struct mmc_data *data = host->data;
 	struct dma_chan *chan;
 	struct dma_device *device;
 	struct dma_async_tx_descriptor *desc;
 	int nr_sg;
 
-	/* Check if next job is already prepared */
-	if (data->host_cookie && !next &&
-	    host->dma_current && host->dma_desc_current)
-		return 0;
-
-	if (!next) {
-		host->dma_current = NULL;
-		host->dma_desc_current = NULL;
-	}
+	host->dma_current = NULL;
 
 	if (data->flags & MMC_DATA_READ) {
 		conf.direction = DMA_FROM_DEVICE;
@@ -397,7 +385,7 @@ static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data,
 		return -EINVAL;
 
 	/* If less than or equal to the fifo size, don't bother with DMA */
-	if (data->blksz * data->blocks <= variant->fifosize)
+	if (host->size <= variant->fifosize)
 		return -EINVAL;
 
 	device = chan->device;
@@ -411,38 +399,14 @@ static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data,
 	if (!desc)
 		goto unmap_exit;
 
-	if (next) {
-		next->dma_chan = chan;
-		next->dma_desc = desc;
-	} else {
-		host->dma_current = chan;
-		host->dma_desc_current = desc;
-	}
-
-	return 0;
-
- unmap_exit:
-	if (!next)
-		dmaengine_terminate_all(chan);
-	dma_unmap_sg(device->dev, data->sg, data->sg_len, conf.direction);
-	return -ENOMEM;
-}
-
-static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl)
-{
-	int ret;
-	struct mmc_data *data = host->data;
-
-	ret = mmci_dma_prep_data(host, host->data, NULL);
-	if (ret)
-		return ret;
-
 	/* Okay, go for it. */
+	host->dma_current = chan;
+
 	dev_vdbg(mmc_dev(host->mmc),
 		 "Submit MMCI DMA job, sglen %d blksz %04x blks %04x flags %08x\n",
 		 data->sg_len, data->blksz, data->blocks, data->flags);
-	dmaengine_submit(host->dma_desc_current);
-	dma_async_issue_pending(host->dma_current);
+	dmaengine_submit(desc);
+	dma_async_issue_pending(chan);
 
 	datactrl |= MCI_DPSM_DMAENABLE;
 
@@ -457,90 +421,14 @@ static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl)
 	writel(readl(host->base + MMCIMASK0) | MCI_DATAENDMASK,
 	       host->base + MMCIMASK0);
 	return 0;
-}
-
-static void mmci_get_next_data(struct mmci_host *host, struct mmc_data *data)
-{
-	struct mmci_host_next *next = &host->next_data;
 
-	if (data->host_cookie && data->host_cookie != next->cookie) {
-		printk(KERN_WARNING "[%s] invalid cookie: data->host_cookie %d"
-		       " host->next_data.cookie %d\n",
-		       __func__, data->host_cookie, host->next_data.cookie);
-		data->host_cookie = 0;
-	}
-
-	if (!data->host_cookie)
-		return;
-
-	host->dma_desc_current = next->dma_desc;
-	host->dma_current = next->dma_chan;
-
-	next->dma_desc = NULL;
-	next->dma_chan = NULL;
-}
-
-static void mmci_pre_request(struct mmc_host *mmc, struct mmc_request *mrq,
-			     bool is_first_req)
-{
-	struct mmci_host *host = mmc_priv(mmc);
-	struct mmc_data *data = mrq->data;
-	struct mmci_host_next *nd = &host->next_data;
-
-	if (!data)
-		return;
-
-	if (data->host_cookie) {
-		data->host_cookie = 0;
-		return;
-	}
-
-	/* if config for dma */
-	if (((data->flags & MMC_DATA_WRITE) && host->dma_tx_channel) ||
-	    ((data->flags & MMC_DATA_READ) && host->dma_rx_channel)) {
-		if (mmci_dma_prep_data(host, data, nd))
-			data->host_cookie = 0;
-		else
-			data->host_cookie = ++nd->cookie < 0 ? 1 : nd->cookie;
-	}
-}
-
-static void mmci_post_request(struct mmc_host *mmc, struct mmc_request *mrq,
-			      int err)
-{
-	struct mmci_host *host = mmc_priv(mmc);
-	struct mmc_data *data = mrq->data;
-	struct dma_chan *chan;
-	enum dma_data_direction dir;
-
-	if (!data)
-		return;
-
-	if (data->flags & MMC_DATA_READ) {
-		dir = DMA_FROM_DEVICE;
-		chan = host->dma_rx_channel;
-	} else {
-		dir = DMA_TO_DEVICE;
-		chan = host->dma_tx_channel;
-	}
-
-
-	/* if config for dma */
-	if (chan) {
-		if (err)
-			dmaengine_terminate_all(chan);
-		if (err || data->host_cookie)
-			dma_unmap_sg(mmc_dev(host->mmc), data->sg,
-				     data->sg_len, dir);
-		mrq->data->host_cookie = 0;
-	}
+unmap_exit:
+	dmaengine_terminate_all(chan);
+	dma_unmap_sg(device->dev, data->sg, data->sg_len, conf.direction);
+	return -ENOMEM;
 }
-
 #else
 /* Blank functions if the DMA engine is not available */
-static void mmci_get_next_data(struct mmci_host *host, struct mmc_data *data)
-{
-}
 static inline void mmci_dma_setup(struct mmci_host *host)
 {
 }
@@ -561,10 +449,6 @@ static inline int mmci_dma_start_data(struct mmci_host *host, unsigned int datac
 {
 	return -ENOSYS;
 }
-
-#define mmci_pre_request NULL
-#define mmci_post_request NULL
-
 #endif
 
 static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
@@ -988,9 +872,6 @@ static void mmci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 
 	host->mrq = mrq;
 
-	if (mrq->data)
-		mmci_get_next_data(host, mrq->data);
-
 	if (mrq->data && mrq->data->flags & MMC_DATA_READ)
 		mmci_start_data(host, mrq->data);
 
@@ -1105,8 +986,6 @@ static irqreturn_t mmci_cd_irq(int irq, void *dev_id)
 
 static const struct mmc_host_ops mmci_ops = {
 	.request	= mmci_request,
-	.pre_req	= mmci_pre_request,
-	.post_req	= mmci_post_request,
 	.set_ios	= mmci_set_ios,
 	.get_ro		= mmci_get_ro,
 	.get_cd		= mmci_get_cd,
diff --git a/trunk/drivers/mmc/host/mmci.h b/trunk/drivers/mmc/host/mmci.h
index 79e4143ab9df..2164e8c6476c 100644
--- a/trunk/drivers/mmc/host/mmci.h
+++ b/trunk/drivers/mmc/host/mmci.h
@@ -166,12 +166,6 @@ struct clk;
 struct variant_data;
 struct dma_chan;
 
-struct mmci_host_next {
-	struct dma_async_tx_descriptor	*dma_desc;
-	struct dma_chan			*dma_chan;
-	s32				cookie;
-};
-
 struct mmci_host {
 	phys_addr_t		phybase;
 	void __iomem		*base;
@@ -209,8 +203,6 @@ struct mmci_host {
 	struct dma_chan		*dma_current;
 	struct dma_chan		*dma_rx_channel;
 	struct dma_chan		*dma_tx_channel;
-	struct dma_async_tx_descriptor	*dma_desc_current;
-	struct mmci_host_next	next_data;
 
 #define dma_inprogress(host)	((host)->dma_current)
 #else
diff --git a/trunk/drivers/mmc/host/mxs-mmc.c b/trunk/drivers/mmc/host/mxs-mmc.c
index d513d47364d0..99d39a6a1032 100644
--- a/trunk/drivers/mmc/host/mxs-mmc.c
+++ b/trunk/drivers/mmc/host/mxs-mmc.c
@@ -564,38 +564,40 @@ static void mxs_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 
 static void mxs_mmc_set_clk_rate(struct mxs_mmc_host *host, unsigned int rate)
 {
-	unsigned int ssp_clk, ssp_sck;
-	u32 clock_divide, clock_rate;
+	unsigned int ssp_rate, bit_rate;
+	u32 div1, div2;
 	u32 val;
 
-	ssp_clk = clk_get_rate(host->clk);
+	ssp_rate = clk_get_rate(host->clk);
 
-	for (clock_divide = 2; clock_divide <= 254; clock_divide += 2) {
-		clock_rate = DIV_ROUND_UP(ssp_clk, rate * clock_divide);
-		clock_rate = (clock_rate > 0) ? clock_rate - 1 : 0;
-		if (clock_rate <= 255)
+	for (div1 = 2; div1 < 254; div1 += 2) {
+		div2 = ssp_rate / rate / div1;
+		if (div2 < 0x100)
 			break;
 	}
 
-	if (clock_divide > 254) {
+	if (div1 >= 254) {
 		dev_err(mmc_dev(host->mmc),
 			"%s: cannot set clock to %d\n", __func__, rate);
 		return;
 	}
 
-	ssp_sck = ssp_clk / clock_divide / (1 + clock_rate);
+	if (div2 == 0)
+		bit_rate = ssp_rate / div1;
+	else
+		bit_rate = ssp_rate / div1 / div2;
 
 	val = readl(host->base + HW_SSP_TIMING);
 	val &= ~(BM_SSP_TIMING_CLOCK_DIVIDE | BM_SSP_TIMING_CLOCK_RATE);
-	val |= BF_SSP(clock_divide, TIMING_CLOCK_DIVIDE);
-	val |= BF_SSP(clock_rate, TIMING_CLOCK_RATE);
+	val |= BF_SSP(div1, TIMING_CLOCK_DIVIDE);
+	val |= BF_SSP(div2 - 1, TIMING_CLOCK_RATE);
 	writel(val, host->base + HW_SSP_TIMING);
 
-	host->clk_rate = ssp_sck;
+	host->clk_rate = bit_rate;
 
 	dev_dbg(mmc_dev(host->mmc),
-		"%s: clock_divide %d, clock_rate %d, ssp_clk %d, rate_actual %d, rate_requested %d\n",
-		__func__, clock_divide, clock_rate, ssp_clk, ssp_sck, rate);
+		"%s: div1 %d, div2 %d, ssp %d, bit %d, rate %d\n",
+		__func__, div1, div2, ssp_rate, bit_rate, rate);
 }
 
 static void mxs_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
diff --git a/trunk/drivers/mmc/host/omap_hsmmc.c b/trunk/drivers/mmc/host/omap_hsmmc.c
index 21e4a799df48..dedf3dab8a3b 100644
--- a/trunk/drivers/mmc/host/omap_hsmmc.c
+++ b/trunk/drivers/mmc/host/omap_hsmmc.c
@@ -17,7 +17,6 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/kernel.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
@@ -34,7 +33,6 @@
 #include <linux/semaphore.h>
 #include <linux/gpio.h>
 #include <linux/regulator/consumer.h>
-#include <linux/pm_runtime.h>
 #include <plat/dma.h>
 #include <mach/hardware.h>
 #include <plat/board.h>
@@ -118,13 +116,15 @@
 #define OMAP_MMC4_DEVID		3
 #define OMAP_MMC5_DEVID		4
 
-#define MMC_AUTOSUSPEND_DELAY	100
 #define MMC_TIMEOUT_MS		20
 #define OMAP_MMC_MASTER_CLOCK	96000000
-#define OMAP_MMC_MIN_CLOCK	400000
-#define OMAP_MMC_MAX_CLOCK	52000000
 #define DRIVER_NAME		"omap_hsmmc"
 
+/* Timeouts for entering power saving states on inactivity, msec */
+#define OMAP_MMC_DISABLED_TIMEOUT	100
+#define OMAP_MMC_SLEEP_TIMEOUT		1000
+#define OMAP_MMC_OFF_TIMEOUT		8000
+
 /*
  * One controller can have multiple slots, like on some omap boards using
  * omap.c controller driver. Luckily this is not currently done on any known
@@ -141,11 +141,6 @@
 #define OMAP_HSMMC_WRITE(base, reg, val) \
 	__raw_writel((val), (base) + OMAP_HSMMC_##reg)
 
-struct omap_hsmmc_next {
-	unsigned int	dma_len;
-	s32		cookie;
-};
-
 struct omap_hsmmc_host {
 	struct	device		*dev;
 	struct	mmc_host	*mmc;
@@ -153,6 +148,7 @@ struct omap_hsmmc_host {
 	struct	mmc_command	*cmd;
 	struct	mmc_data	*data;
 	struct	clk		*fclk;
+	struct	clk		*iclk;
 	struct	clk		*dbclk;
 	/*
 	 * vcc == configured supply
@@ -188,7 +184,6 @@ struct omap_hsmmc_host {
 	int			reqs_blocked;
 	int			use_reg;
 	int			req_in_progress;
-	struct omap_hsmmc_next	next_data;
 
 	struct	omap_mmc_platform_data	*pdata;
 };
@@ -552,15 +547,6 @@ static void omap_hsmmc_gpio_free(struct omap_mmc_platform_data *pdata)
 		gpio_free(pdata->slots[0].switch_pin);
 }
 
-/*
- * Start clock to the card
- */
-static void omap_hsmmc_start_clock(struct omap_hsmmc_host *host)
-{
-	OMAP_HSMMC_WRITE(host->base, SYSCTL,
-		OMAP_HSMMC_READ(host->base, SYSCTL) | CEN);
-}
-
 /*
  * Stop clock to the card
  */
@@ -598,81 +584,6 @@ static void omap_hsmmc_disable_irq(struct omap_hsmmc_host *host)
 	OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
 }
 
-/* Calculate divisor for the given clock frequency */
-static u16 calc_divisor(struct mmc_ios *ios)
-{
-	u16 dsor = 0;
-
-	if (ios->clock) {
-		dsor = DIV_ROUND_UP(OMAP_MMC_MASTER_CLOCK, ios->clock);
-		if (dsor > 250)
-			dsor = 250;
-	}
-
-	return dsor;
-}
-
-static void omap_hsmmc_set_clock(struct omap_hsmmc_host *host)
-{
-	struct mmc_ios *ios = &host->mmc->ios;
-	unsigned long regval;
-	unsigned long timeout;
-
-	dev_dbg(mmc_dev(host->mmc), "Set clock to %uHz\n", ios->clock);
-
-	omap_hsmmc_stop_clock(host);
-
-	regval = OMAP_HSMMC_READ(host->base, SYSCTL);
-	regval = regval & ~(CLKD_MASK | DTO_MASK);
-	regval = regval | (calc_divisor(ios) << 6) | (DTO << 16);
-	OMAP_HSMMC_WRITE(host->base, SYSCTL, regval);
-	OMAP_HSMMC_WRITE(host->base, SYSCTL,
-		OMAP_HSMMC_READ(host->base, SYSCTL) | ICE);
-
-	/* Wait till the ICS bit is set */
-	timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS);
-	while ((OMAP_HSMMC_READ(host->base, SYSCTL) & ICS) != ICS
-		&& time_before(jiffies, timeout))
-		cpu_relax();
-
-	omap_hsmmc_start_clock(host);
-}
-
-static void omap_hsmmc_set_bus_width(struct omap_hsmmc_host *host)
-{
-	struct mmc_ios *ios = &host->mmc->ios;
-	u32 con;
-
-	con = OMAP_HSMMC_READ(host->base, CON);
-	switch (ios->bus_width) {
-	case MMC_BUS_WIDTH_8:
-		OMAP_HSMMC_WRITE(host->base, CON, con | DW8);
-		break;
-	case MMC_BUS_WIDTH_4:
-		OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8);
-		OMAP_HSMMC_WRITE(host->base, HCTL,
-			OMAP_HSMMC_READ(host->base, HCTL) | FOUR_BIT);
-		break;
-	case MMC_BUS_WIDTH_1:
-		OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8);
-		OMAP_HSMMC_WRITE(host->base, HCTL,
-			OMAP_HSMMC_READ(host->base, HCTL) & ~FOUR_BIT);
-		break;
-	}
-}
-
-static void omap_hsmmc_set_bus_mode(struct omap_hsmmc_host *host)
-{
-	struct mmc_ios *ios = &host->mmc->ios;
-	u32 con;
-
-	con = OMAP_HSMMC_READ(host->base, CON);
-	if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN)
-		OMAP_HSMMC_WRITE(host->base, CON, con | OD);
-	else
-		OMAP_HSMMC_WRITE(host->base, CON, con & ~OD);
-}
-
 #ifdef CONFIG_PM
 
 /*
@@ -684,7 +595,8 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host)
 	struct mmc_ios *ios = &host->mmc->ios;
 	struct omap_mmc_platform_data *pdata = host->pdata;
 	int context_loss = 0;
-	u32 hctl, capa;
+	u32 hctl, capa, con;
+	u16 dsor = 0;
 	unsigned long timeout;
 
 	if (pdata->get_context_loss_count) {
@@ -746,12 +658,54 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host)
 	if (host->power_mode == MMC_POWER_OFF)
 		goto out;
 
-	omap_hsmmc_set_bus_width(host);
+	con = OMAP_HSMMC_READ(host->base, CON);
+	switch (ios->bus_width) {
+	case MMC_BUS_WIDTH_8:
+		OMAP_HSMMC_WRITE(host->base, CON, con | DW8);
+		break;
+	case MMC_BUS_WIDTH_4:
+		OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8);
+		OMAP_HSMMC_WRITE(host->base, HCTL,
+			OMAP_HSMMC_READ(host->base, HCTL) | FOUR_BIT);
+		break;
+	case MMC_BUS_WIDTH_1:
+		OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8);
+		OMAP_HSMMC_WRITE(host->base, HCTL,
+			OMAP_HSMMC_READ(host->base, HCTL) & ~FOUR_BIT);
+		break;
+	}
+
+	if (ios->clock) {
+		dsor = OMAP_MMC_MASTER_CLOCK / ios->clock;
+		if (dsor < 1)
+			dsor = 1;
+
+		if (OMAP_MMC_MASTER_CLOCK / dsor > ios->clock)
+			dsor++;
+
+		if (dsor > 250)
+			dsor = 250;
+	}
+
+	OMAP_HSMMC_WRITE(host->base, SYSCTL,
+		OMAP_HSMMC_READ(host->base, SYSCTL) & ~CEN);
+	OMAP_HSMMC_WRITE(host->base, SYSCTL, (dsor << 6) | (DTO << 16));
+	OMAP_HSMMC_WRITE(host->base, SYSCTL,
+		OMAP_HSMMC_READ(host->base, SYSCTL) | ICE);
 
-	omap_hsmmc_set_clock(host);
+	timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS);
+	while ((OMAP_HSMMC_READ(host->base, SYSCTL) & ICS) != ICS
+		&& time_before(jiffies, timeout))
+		;
 
-	omap_hsmmc_set_bus_mode(host);
+	OMAP_HSMMC_WRITE(host->base, SYSCTL,
+		OMAP_HSMMC_READ(host->base, SYSCTL) | CEN);
 
+	con = OMAP_HSMMC_READ(host->base, CON);
+	if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN)
+		OMAP_HSMMC_WRITE(host->base, CON, con | OD);
+	else
+		OMAP_HSMMC_WRITE(host->base, CON, con & ~OD);
 out:
 	host->context_loss = context_loss;
 
@@ -1019,14 +973,14 @@ static void omap_hsmmc_dma_cleanup(struct omap_hsmmc_host *host, int errno)
  * Readable error output
  */
 #ifdef CONFIG_MMC_DEBUG
-static void omap_hsmmc_dbg_report_irq(struct omap_hsmmc_host *host, u32 status)
+static void omap_hsmmc_report_irq(struct omap_hsmmc_host *host, u32 status)
 {
 	/* --- means reserved bit without definition at documentation */
 	static const char *omap_hsmmc_status_bits[] = {
-		"CC"  , "TC"  , "BGE", "---", "BWR" , "BRR" , "---" , "---" ,
-		"CIRQ",	"OBI" , "---", "---", "---" , "---" , "---" , "ERRI",
-		"CTO" , "CCRC", "CEB", "CIE", "DTO" , "DCRC", "DEB" , "---" ,
-		"ACE" , "---" , "---", "---", "CERR", "BADA", "---" , "---"
+		"CC", "TC", "BGE", "---", "BWR", "BRR", "---", "---", "CIRQ",
+		"OBI", "---", "---", "---", "---", "---", "ERRI", "CTO", "CCRC",
+		"CEB", "CIE", "DTO", "DCRC", "DEB", "---", "ACE", "---",
+		"---", "---", "---", "CERR", "CERR", "BADA", "---", "---", "---"
 	};
 	char res[256];
 	char *buf = res;
@@ -1043,11 +997,6 @@ static void omap_hsmmc_dbg_report_irq(struct omap_hsmmc_host *host, u32 status)
 
 	dev_dbg(mmc_dev(host->mmc), "%s\n", res);
 }
-#else
-static inline void omap_hsmmc_dbg_report_irq(struct omap_hsmmc_host *host,
-					     u32 status)
-{
-}
 #endif  /* CONFIG_MMC_DEBUG */
 
 /*
@@ -1106,7 +1055,9 @@ static void omap_hsmmc_do_irq(struct omap_hsmmc_host *host, int status)
 	dev_dbg(mmc_dev(host->mmc), "IRQ Status is %x\n", status);
 
 	if (status & ERR) {
-		omap_hsmmc_dbg_report_irq(host, status);
+#ifdef CONFIG_MMC_DEBUG
+		omap_hsmmc_report_irq(host, status);
+#endif
 		if ((status & CMD_TIMEOUT) ||
 			(status & CMD_CRC)) {
 			if (host->cmd) {
@@ -1204,7 +1155,8 @@ static int omap_hsmmc_switch_opcond(struct omap_hsmmc_host *host, int vdd)
 	int ret;
 
 	/* Disable the clocks */
-	pm_runtime_put_sync(host->dev);
+	clk_disable(host->fclk);
+	clk_disable(host->iclk);
 	if (host->got_dbclk)
 		clk_disable(host->dbclk);
 
@@ -1215,7 +1167,8 @@ static int omap_hsmmc_switch_opcond(struct omap_hsmmc_host *host, int vdd)
 	if (!ret)
 		ret = mmc_slot(host).set_power(host->dev, host->slot_id, 1,
 					       vdd);
-	pm_runtime_get_sync(host->dev);
+	clk_enable(host->iclk);
+	clk_enable(host->fclk);
 	if (host->got_dbclk)
 		clk_enable(host->dbclk);
 
@@ -1369,7 +1322,7 @@ static void omap_hsmmc_config_dma_params(struct omap_hsmmc_host *host,
 static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data)
 {
 	struct omap_hsmmc_host *host = cb_data;
-	struct mmc_data *data;
+	struct mmc_data *data = host->mrq->data;
 	int dma_ch, req_in_progress;
 
 	if (!(ch_status & OMAP_DMA_BLOCK_IRQ)) {
@@ -1384,7 +1337,6 @@ static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data)
 		return;
 	}
 
-	data = host->mrq->data;
 	host->dma_sg_idx++;
 	if (host->dma_sg_idx < host->dma_len) {
 		/* Fire up the next transfer. */
@@ -1394,9 +1346,8 @@ static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data)
 		return;
 	}
 
-	if (!data->host_cookie)
-		dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-			     omap_hsmmc_get_dma_dir(host, data));
+	dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+		omap_hsmmc_get_dma_dir(host, data));
 
 	req_in_progress = host->req_in_progress;
 	dma_ch = host->dma_ch;
@@ -1414,45 +1365,6 @@ static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data)
 	}
 }
 
-static int omap_hsmmc_pre_dma_transfer(struct omap_hsmmc_host *host,
-				       struct mmc_data *data,
-				       struct omap_hsmmc_next *next)
-{
-	int dma_len;
-
-	if (!next && data->host_cookie &&
-	    data->host_cookie != host->next_data.cookie) {
-		printk(KERN_WARNING "[%s] invalid cookie: data->host_cookie %d"
-		       " host->next_data.cookie %d\n",
-		       __func__, data->host_cookie, host->next_data.cookie);
-		data->host_cookie = 0;
-	}
-
-	/* Check if next job is already prepared */
-	if (next ||
-	    (!next && data->host_cookie != host->next_data.cookie)) {
-		dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg,
-				     data->sg_len,
-				     omap_hsmmc_get_dma_dir(host, data));
-
-	} else {
-		dma_len = host->next_data.dma_len;
-		host->next_data.dma_len = 0;
-	}
-
-
-	if (dma_len == 0)
-		return -EINVAL;
-
-	if (next) {
-		next->dma_len = dma_len;
-		data->host_cookie = ++next->cookie < 0 ? 1 : next->cookie;
-	} else
-		host->dma_len = dma_len;
-
-	return 0;
-}
-
 /*
  * Routine to configure and start DMA for the MMC card
  */
@@ -1486,10 +1398,9 @@ static int omap_hsmmc_start_dma_transfer(struct omap_hsmmc_host *host,
 			mmc_hostname(host->mmc), ret);
 		return ret;
 	}
-	ret = omap_hsmmc_pre_dma_transfer(host, data, NULL);
-	if (ret)
-		return ret;
 
+	host->dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg,
+			data->sg_len, omap_hsmmc_get_dma_dir(host, data));
 	host->dma_ch = dma_ch;
 	host->dma_sg_idx = 0;
 
@@ -1569,35 +1480,6 @@ omap_hsmmc_prepare_data(struct omap_hsmmc_host *host, struct mmc_request *req)
 	return 0;
 }
 
-static void omap_hsmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
-				int err)
-{
-	struct omap_hsmmc_host *host = mmc_priv(mmc);
-	struct mmc_data *data = mrq->data;
-
-	if (host->use_dma) {
-		dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-			     omap_hsmmc_get_dma_dir(host, data));
-		data->host_cookie = 0;
-	}
-}
-
-static void omap_hsmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
-			       bool is_first_req)
-{
-	struct omap_hsmmc_host *host = mmc_priv(mmc);
-
-	if (mrq->data->host_cookie) {
-		mrq->data->host_cookie = 0;
-		return ;
-	}
-
-	if (host->use_dma)
-		if (omap_hsmmc_pre_dma_transfer(host, mrq->data,
-						&host->next_data))
-			mrq->data->host_cookie = 0;
-}
-
 /*
  * Request function. for read/write operation
  */
@@ -1646,9 +1528,13 @@ static void omap_hsmmc_request(struct mmc_host *mmc, struct mmc_request *req)
 static void omap_hsmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
+	u16 dsor = 0;
+	unsigned long regval;
+	unsigned long timeout;
+	u32 con;
 	int do_send_init_stream = 0;
 
-	pm_runtime_get_sync(host->dev);
+	mmc_host_enable(host->mmc);
 
 	if (ios->power_mode != host->power_mode) {
 		switch (ios->power_mode) {
@@ -1671,7 +1557,22 @@ static void omap_hsmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 	/* FIXME: set registers based only on changes to ios */
 
-	omap_hsmmc_set_bus_width(host);
+	con = OMAP_HSMMC_READ(host->base, CON);
+	switch (mmc->ios.bus_width) {
+	case MMC_BUS_WIDTH_8:
+		OMAP_HSMMC_WRITE(host->base, CON, con | DW8);
+		break;
+	case MMC_BUS_WIDTH_4:
+		OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8);
+		OMAP_HSMMC_WRITE(host->base, HCTL,
+			OMAP_HSMMC_READ(host->base, HCTL) | FOUR_BIT);
+		break;
+	case MMC_BUS_WIDTH_1:
+		OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8);
+		OMAP_HSMMC_WRITE(host->base, HCTL,
+			OMAP_HSMMC_READ(host->base, HCTL) & ~FOUR_BIT);
+		break;
+	}
 
 	if (host->pdata->controller_flags & OMAP_HSMMC_SUPPORTS_DUAL_VOLT) {
 		/* Only MMC1 can interface at 3V without some flavor
@@ -1691,14 +1592,47 @@ static void omap_hsmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		}
 	}
 
-	omap_hsmmc_set_clock(host);
+	if (ios->clock) {
+		dsor = OMAP_MMC_MASTER_CLOCK / ios->clock;
+		if (dsor < 1)
+			dsor = 1;
+
+		if (OMAP_MMC_MASTER_CLOCK / dsor > ios->clock)
+			dsor++;
+
+		if (dsor > 250)
+			dsor = 250;
+	}
+	omap_hsmmc_stop_clock(host);
+	regval = OMAP_HSMMC_READ(host->base, SYSCTL);
+	regval = regval & ~(CLKD_MASK);
+	regval = regval | (dsor << 6) | (DTO << 16);
+	OMAP_HSMMC_WRITE(host->base, SYSCTL, regval);
+	OMAP_HSMMC_WRITE(host->base, SYSCTL,
+		OMAP_HSMMC_READ(host->base, SYSCTL) | ICE);
+
+	/* Wait till the ICS bit is set */
+	timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS);
+	while ((OMAP_HSMMC_READ(host->base, SYSCTL) & ICS) != ICS
+		&& time_before(jiffies, timeout))
+		msleep(1);
+
+	OMAP_HSMMC_WRITE(host->base, SYSCTL,
+		OMAP_HSMMC_READ(host->base, SYSCTL) | CEN);
 
 	if (do_send_init_stream)
 		send_init_stream(host);
 
-	omap_hsmmc_set_bus_mode(host);
+	con = OMAP_HSMMC_READ(host->base, CON);
+	if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN)
+		OMAP_HSMMC_WRITE(host->base, CON, con | OD);
+	else
+		OMAP_HSMMC_WRITE(host->base, CON, con & ~OD);
 
-	pm_runtime_put_autosuspend(host->dev);
+	if (host->power_mode == MMC_POWER_OFF)
+		mmc_host_disable(host->mmc);
+	else
+		mmc_host_lazy_disable(host->mmc);
 }
 
 static int omap_hsmmc_get_cd(struct mmc_host *mmc)
@@ -1754,12 +1688,230 @@ static void omap_hsmmc_conf_bus_power(struct omap_hsmmc_host *host)
 	set_sd_bus_power(host);
 }
 
-static int omap_hsmmc_enable_fclk(struct mmc_host *mmc)
+/*
+ * Dynamic power saving handling, FSM:
+ *   ENABLED -> DISABLED -> CARDSLEEP / REGSLEEP -> OFF
+ *     ^___________|          |                      |
+ *     |______________________|______________________|
+ *
+ * ENABLED:   mmc host is fully functional
+ * DISABLED:  fclk is off
+ * CARDSLEEP: fclk is off, card is asleep, voltage regulator is asleep
+ * REGSLEEP:  fclk is off, voltage regulator is asleep
+ * OFF:       fclk is off, voltage regulator is off
+ *
+ * Transition handlers return the timeout for the next state transition
+ * or negative error.
+ */
+
+enum {ENABLED = 0, DISABLED, CARDSLEEP, REGSLEEP, OFF};
+
+/* Handler for [ENABLED -> DISABLED] transition */
+static int omap_hsmmc_enabled_to_disabled(struct omap_hsmmc_host *host)
+{
+	omap_hsmmc_context_save(host);
+	clk_disable(host->fclk);
+	host->dpm_state = DISABLED;
+
+	dev_dbg(mmc_dev(host->mmc), "ENABLED -> DISABLED\n");
+
+	if (host->power_mode == MMC_POWER_OFF)
+		return 0;
+
+	return OMAP_MMC_SLEEP_TIMEOUT;
+}
+
+/* Handler for [DISABLED -> REGSLEEP / CARDSLEEP] transition */
+static int omap_hsmmc_disabled_to_sleep(struct omap_hsmmc_host *host)
+{
+	int err, new_state;
+
+	if (!mmc_try_claim_host(host->mmc))
+		return 0;
+
+	clk_enable(host->fclk);
+	omap_hsmmc_context_restore(host);
+	if (mmc_card_can_sleep(host->mmc)) {
+		err = mmc_card_sleep(host->mmc);
+		if (err < 0) {
+			clk_disable(host->fclk);
+			mmc_release_host(host->mmc);
+			return err;
+		}
+		new_state = CARDSLEEP;
+	} else {
+		new_state = REGSLEEP;
+	}
+	if (mmc_slot(host).set_sleep)
+		mmc_slot(host).set_sleep(host->dev, host->slot_id, 1, 0,
+					 new_state == CARDSLEEP);
+	/* FIXME: turn off bus power and perhaps interrupts too */
+	clk_disable(host->fclk);
+	host->dpm_state = new_state;
+
+	mmc_release_host(host->mmc);
+
+	dev_dbg(mmc_dev(host->mmc), "DISABLED -> %s\n",
+		host->dpm_state == CARDSLEEP ? "CARDSLEEP" : "REGSLEEP");
+
+	if (mmc_slot(host).no_off)
+		return 0;
+
+	if ((host->mmc->caps & MMC_CAP_NONREMOVABLE) ||
+	    mmc_slot(host).card_detect ||
+	    (mmc_slot(host).get_cover_state &&
+	     mmc_slot(host).get_cover_state(host->dev, host->slot_id)))
+		return OMAP_MMC_OFF_TIMEOUT;
+
+	return 0;
+}
+
+/* Handler for [REGSLEEP / CARDSLEEP -> OFF] transition */
+static int omap_hsmmc_sleep_to_off(struct omap_hsmmc_host *host)
+{
+	if (!mmc_try_claim_host(host->mmc))
+		return 0;
+
+	if (mmc_slot(host).no_off)
+		return 0;
+
+	if (!((host->mmc->caps & MMC_CAP_NONREMOVABLE) ||
+	      mmc_slot(host).card_detect ||
+	      (mmc_slot(host).get_cover_state &&
+	       mmc_slot(host).get_cover_state(host->dev, host->slot_id)))) {
+		mmc_release_host(host->mmc);
+		return 0;
+	}
+
+	mmc_slot(host).set_power(host->dev, host->slot_id, 0, 0);
+	host->vdd = 0;
+	host->power_mode = MMC_POWER_OFF;
+
+	dev_dbg(mmc_dev(host->mmc), "%s -> OFF\n",
+		host->dpm_state == CARDSLEEP ? "CARDSLEEP" : "REGSLEEP");
+
+	host->dpm_state = OFF;
+
+	mmc_release_host(host->mmc);
+
+	return 0;
+}
+
+/* Handler for [DISABLED -> ENABLED] transition */
+static int omap_hsmmc_disabled_to_enabled(struct omap_hsmmc_host *host)
+{
+	int err;
+
+	err = clk_enable(host->fclk);
+	if (err < 0)
+		return err;
+
+	omap_hsmmc_context_restore(host);
+	host->dpm_state = ENABLED;
+
+	dev_dbg(mmc_dev(host->mmc), "DISABLED -> ENABLED\n");
+
+	return 0;
+}
+
+/* Handler for [SLEEP -> ENABLED] transition */
+static int omap_hsmmc_sleep_to_enabled(struct omap_hsmmc_host *host)
+{
+	if (!mmc_try_claim_host(host->mmc))
+		return 0;
+
+	clk_enable(host->fclk);
+	omap_hsmmc_context_restore(host);
+	if (mmc_slot(host).set_sleep)
+		mmc_slot(host).set_sleep(host->dev, host->slot_id, 0,
+			 host->vdd, host->dpm_state == CARDSLEEP);
+	if (mmc_card_can_sleep(host->mmc))
+		mmc_card_awake(host->mmc);
+
+	dev_dbg(mmc_dev(host->mmc), "%s -> ENABLED\n",
+		host->dpm_state == CARDSLEEP ? "CARDSLEEP" : "REGSLEEP");
+
+	host->dpm_state = ENABLED;
+
+	mmc_release_host(host->mmc);
+
+	return 0;
+}
+
+/* Handler for [OFF -> ENABLED] transition */
+static int omap_hsmmc_off_to_enabled(struct omap_hsmmc_host *host)
+{
+	clk_enable(host->fclk);
+
+	omap_hsmmc_context_restore(host);
+	omap_hsmmc_conf_bus_power(host);
+	mmc_power_restore_host(host->mmc);
+
+	host->dpm_state = ENABLED;
+
+	dev_dbg(mmc_dev(host->mmc), "OFF -> ENABLED\n");
+
+	return 0;
+}
+
+/*
+ * Bring MMC host to ENABLED from any other PM state.
+ */
+static int omap_hsmmc_enable(struct mmc_host *mmc)
+{
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
+
+	switch (host->dpm_state) {
+	case DISABLED:
+		return omap_hsmmc_disabled_to_enabled(host);
+	case CARDSLEEP:
+	case REGSLEEP:
+		return omap_hsmmc_sleep_to_enabled(host);
+	case OFF:
+		return omap_hsmmc_off_to_enabled(host);
+	default:
+		dev_dbg(mmc_dev(host->mmc), "UNKNOWN state\n");
+		return -EINVAL;
+	}
+}
+
+/*
+ * Bring MMC host in PM state (one level deeper).
+ */
+static int omap_hsmmc_disable(struct mmc_host *mmc, int lazy)
 {
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
-	pm_runtime_get_sync(host->dev);
+	switch (host->dpm_state) {
+	case ENABLED: {
+		int delay;
+
+		delay = omap_hsmmc_enabled_to_disabled(host);
+		if (lazy || delay < 0)
+			return delay;
+		return 0;
+	}
+	case DISABLED:
+		return omap_hsmmc_disabled_to_sleep(host);
+	case CARDSLEEP:
+	case REGSLEEP:
+		return omap_hsmmc_sleep_to_off(host);
+	default:
+		dev_dbg(mmc_dev(host->mmc), "UNKNOWN state\n");
+		return -EINVAL;
+	}
+}
+
+static int omap_hsmmc_enable_fclk(struct mmc_host *mmc)
+{
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
+	int err;
 
+	err = clk_enable(host->fclk);
+	if (err)
+		return err;
+	dev_dbg(mmc_dev(host->mmc), "mmc_fclk: enabled\n");
+	omap_hsmmc_context_restore(host);
 	return 0;
 }
 
@@ -1767,17 +1919,26 @@ static int omap_hsmmc_disable_fclk(struct mmc_host *mmc, int lazy)
 {
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
-	pm_runtime_mark_last_busy(host->dev);
-	pm_runtime_put_autosuspend(host->dev);
-
+	omap_hsmmc_context_save(host);
+	clk_disable(host->fclk);
+	dev_dbg(mmc_dev(host->mmc), "mmc_fclk: disabled\n");
 	return 0;
 }
 
 static const struct mmc_host_ops omap_hsmmc_ops = {
 	.enable = omap_hsmmc_enable_fclk,
 	.disable = omap_hsmmc_disable_fclk,
-	.post_req = omap_hsmmc_post_req,
-	.pre_req = omap_hsmmc_pre_req,
+	.request = omap_hsmmc_request,
+	.set_ios = omap_hsmmc_set_ios,
+	.get_cd = omap_hsmmc_get_cd,
+	.get_ro = omap_hsmmc_get_ro,
+	.init_card = omap_hsmmc_init_card,
+	/* NYET -- enable_sdio_irq */
+};
+
+static const struct mmc_host_ops omap_hsmmc_ps_ops = {
+	.enable = omap_hsmmc_enable,
+	.disable = omap_hsmmc_disable,
 	.request = omap_hsmmc_request,
 	.set_ios = omap_hsmmc_set_ios,
 	.get_cd = omap_hsmmc_get_cd,
@@ -1807,12 +1968,15 @@ static int omap_hsmmc_regs_show(struct seq_file *s, void *data)
 			host->dpm_state, mmc->nesting_cnt,
 			host->context_loss, context_loss);
 
-	if (host->suspended) {
+	if (host->suspended || host->dpm_state == OFF) {
 		seq_printf(s, "host suspended, can't read registers\n");
 		return 0;
 	}
 
-	pm_runtime_get_sync(host->dev);
+	if (clk_enable(host->fclk) != 0) {
+		seq_printf(s, "can't read the regs\n");
+		return 0;
+	}
 
 	seq_printf(s, "SYSCONFIG:\t0x%08x\n",
 			OMAP_HSMMC_READ(host->base, SYSCONFIG));
@@ -1829,8 +1993,7 @@ static int omap_hsmmc_regs_show(struct seq_file *s, void *data)
 	seq_printf(s, "CAPA:\t\t0x%08x\n",
 			OMAP_HSMMC_READ(host->base, CAPA));
 
-	pm_runtime_mark_last_busy(host->dev);
-	pm_runtime_put_autosuspend(host->dev);
+	clk_disable(host->fclk);
 
 	return 0;
 }
@@ -1914,12 +2077,14 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev)
 	host->mapbase	= res->start;
 	host->base	= ioremap(host->mapbase, SZ_4K);
 	host->power_mode = MMC_POWER_OFF;
-	host->next_data.cookie = 1;
 
 	platform_set_drvdata(pdev, host);
 	INIT_WORK(&host->mmc_carddetect_work, omap_hsmmc_detect);
 
-	mmc->ops	= &omap_hsmmc_ops;
+	if (mmc_slot(host).power_saving)
+		mmc->ops	= &omap_hsmmc_ps_ops;
+	else
+		mmc->ops	= &omap_hsmmc_ops;
 
 	/*
 	 * If regulator_disable can only put vcc_aux to sleep then there is
@@ -1928,26 +2093,44 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev)
 	if (mmc_slot(host).vcc_aux_disable_is_sleep)
 		mmc_slot(host).no_off = 1;
 
-	mmc->f_min	= OMAP_MMC_MIN_CLOCK;
-	mmc->f_max	= OMAP_MMC_MAX_CLOCK;
+	mmc->f_min	= 400000;
+	mmc->f_max	= 52000000;
 
 	spin_lock_init(&host->irq_lock);
 
+	host->iclk = clk_get(&pdev->dev, "ick");
+	if (IS_ERR(host->iclk)) {
+		ret = PTR_ERR(host->iclk);
+		host->iclk = NULL;
+		goto err1;
+	}
 	host->fclk = clk_get(&pdev->dev, "fck");
 	if (IS_ERR(host->fclk)) {
 		ret = PTR_ERR(host->fclk);
 		host->fclk = NULL;
+		clk_put(host->iclk);
 		goto err1;
 	}
 
 	omap_hsmmc_context_save(host);
 
 	mmc->caps |= MMC_CAP_DISABLE;
+	mmc_set_disable_delay(mmc, OMAP_MMC_DISABLED_TIMEOUT);
+	/* we start off in DISABLED state */
+	host->dpm_state = DISABLED;
 
-	pm_runtime_enable(host->dev);
-	pm_runtime_get_sync(host->dev);
-	pm_runtime_set_autosuspend_delay(host->dev, MMC_AUTOSUSPEND_DELAY);
-	pm_runtime_use_autosuspend(host->dev);
+	if (clk_enable(host->iclk) != 0) {
+		clk_put(host->iclk);
+		clk_put(host->fclk);
+		goto err1;
+	}
+
+	if (mmc_host_enable(host->mmc) != 0) {
+		clk_disable(host->iclk);
+		clk_put(host->iclk);
+		clk_put(host->fclk);
+		goto err1;
+	}
 
 	if (cpu_is_omap2430()) {
 		host->dbclk = clk_get(&pdev->dev, "mmchsdb_fck");
@@ -2057,6 +2240,8 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev)
 
 	omap_hsmmc_disable_irq(host);
 
+	mmc_host_lazy_disable(host->mmc);
+
 	omap_hsmmc_protect_card(host);
 
 	mmc_add_host(mmc);
@@ -2074,8 +2259,6 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev)
 	}
 
 	omap_hsmmc_debugfs(mmc);
-	pm_runtime_mark_last_busy(host->dev);
-	pm_runtime_put_autosuspend(host->dev);
 
 	return 0;
 
@@ -2091,9 +2274,10 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev)
 err_irq_cd_init:
 	free_irq(host->irq, host);
 err_irq:
-	pm_runtime_mark_last_busy(host->dev);
-	pm_runtime_put_autosuspend(host->dev);
+	mmc_host_disable(host->mmc);
+	clk_disable(host->iclk);
 	clk_put(host->fclk);
+	clk_put(host->iclk);
 	if (host->got_dbclk) {
 		clk_disable(host->dbclk);
 		clk_put(host->dbclk);
@@ -2115,7 +2299,7 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
 	struct resource *res;
 
 	if (host) {
-		pm_runtime_get_sync(host->dev);
+		mmc_host_enable(host->mmc);
 		mmc_remove_host(host->mmc);
 		if (host->use_reg)
 			omap_hsmmc_reg_put(host);
@@ -2126,9 +2310,10 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
 			free_irq(mmc_slot(host).card_detect_irq, host);
 		flush_work_sync(&host->mmc_carddetect_work);
 
-		pm_runtime_put_sync(host->dev);
-		pm_runtime_disable(host->dev);
+		mmc_host_disable(host->mmc);
+		clk_disable(host->iclk);
 		clk_put(host->fclk);
+		clk_put(host->iclk);
 		if (host->got_dbclk) {
 			clk_disable(host->dbclk);
 			clk_put(host->dbclk);
@@ -2158,7 +2343,6 @@ static int omap_hsmmc_suspend(struct device *dev)
 		return 0;
 
 	if (host) {
-		pm_runtime_get_sync(host->dev);
 		host->suspended = 1;
 		if (host->pdata->suspend) {
 			ret = host->pdata->suspend(&pdev->dev,
@@ -2173,11 +2357,13 @@ static int omap_hsmmc_suspend(struct device *dev)
 		}
 		cancel_work_sync(&host->mmc_carddetect_work);
 		ret = mmc_suspend_host(host->mmc);
-
+		mmc_host_enable(host->mmc);
 		if (ret == 0) {
 			omap_hsmmc_disable_irq(host);
 			OMAP_HSMMC_WRITE(host->base, HCTL,
 				OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
+			mmc_host_disable(host->mmc);
+			clk_disable(host->iclk);
 			if (host->got_dbclk)
 				clk_disable(host->dbclk);
 		} else {
@@ -2189,8 +2375,9 @@ static int omap_hsmmc_suspend(struct device *dev)
 					dev_dbg(mmc_dev(host->mmc),
 						"Unmask interrupt failed\n");
 			}
+			mmc_host_disable(host->mmc);
 		}
-		pm_runtime_put_sync(host->dev);
+
 	}
 	return ret;
 }
@@ -2206,7 +2393,14 @@ static int omap_hsmmc_resume(struct device *dev)
 		return 0;
 
 	if (host) {
-		pm_runtime_get_sync(host->dev);
+		ret = clk_enable(host->iclk);
+		if (ret)
+			goto clk_en_err;
+
+		if (mmc_host_enable(host->mmc) != 0) {
+			clk_disable(host->iclk);
+			goto clk_en_err;
+		}
 
 		if (host->got_dbclk)
 			clk_enable(host->dbclk);
@@ -2227,12 +2421,15 @@ static int omap_hsmmc_resume(struct device *dev)
 		if (ret == 0)
 			host->suspended = 0;
 
-		pm_runtime_mark_last_busy(host->dev);
-		pm_runtime_put_autosuspend(host->dev);
+		mmc_host_lazy_disable(host->mmc);
 	}
 
 	return ret;
 
+clk_en_err:
+	dev_dbg(mmc_dev(host->mmc),
+		"Failed to enable MMC clocks during resume\n");
+	return ret;
 }
 
 #else
@@ -2240,33 +2437,9 @@ static int omap_hsmmc_resume(struct device *dev)
 #define omap_hsmmc_resume		NULL
 #endif
 
-static int omap_hsmmc_runtime_suspend(struct device *dev)
-{
-	struct omap_hsmmc_host *host;
-
-	host = platform_get_drvdata(to_platform_device(dev));
-	omap_hsmmc_context_save(host);
-	dev_dbg(mmc_dev(host->mmc), "disabled\n");
-
-	return 0;
-}
-
-static int omap_hsmmc_runtime_resume(struct device *dev)
-{
-	struct omap_hsmmc_host *host;
-
-	host = platform_get_drvdata(to_platform_device(dev));
-	omap_hsmmc_context_restore(host);
-	dev_dbg(mmc_dev(host->mmc), "enabled\n");
-
-	return 0;
-}
-
 static struct dev_pm_ops omap_hsmmc_dev_pm_ops = {
 	.suspend	= omap_hsmmc_suspend,
 	.resume		= omap_hsmmc_resume,
-	.runtime_suspend = omap_hsmmc_runtime_suspend,
-	.runtime_resume = omap_hsmmc_runtime_resume,
 };
 
 static struct platform_driver omap_hsmmc_driver = {
diff --git a/trunk/drivers/mmc/host/sdhci-cns3xxx.c b/trunk/drivers/mmc/host/sdhci-cns3xxx.c
index 4b920b7621cf..9ebd1d7759dc 100644
--- a/trunk/drivers/mmc/host/sdhci-cns3xxx.c
+++ b/trunk/drivers/mmc/host/sdhci-cns3xxx.c
@@ -15,7 +15,9 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/mmc/host.h>
+#include <linux/mmc/sdhci-pltfm.h>
 #include <mach/cns3xxx.h>
+#include "sdhci.h"
 #include "sdhci-pltfm.h"
 
 static unsigned int sdhci_cns3xxx_get_max_clk(struct sdhci_host *host)
@@ -84,7 +86,7 @@ static struct sdhci_ops sdhci_cns3xxx_ops = {
 	.set_clock	= sdhci_cns3xxx_set_clock,
 };
 
-static struct sdhci_pltfm_data sdhci_cns3xxx_pdata = {
+struct sdhci_pltfm_data sdhci_cns3xxx_pdata = {
 	.ops = &sdhci_cns3xxx_ops,
 	.quirks = SDHCI_QUIRK_BROKEN_DMA |
 		  SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
@@ -93,43 +95,3 @@ static struct sdhci_pltfm_data sdhci_cns3xxx_pdata = {
 		  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
 		  SDHCI_QUIRK_NONSTANDARD_CLOCK,
 };
-
-static int __devinit sdhci_cns3xxx_probe(struct platform_device *pdev)
-{
-	return sdhci_pltfm_register(pdev, &sdhci_cns3xxx_pdata);
-}
-
-static int __devexit sdhci_cns3xxx_remove(struct platform_device *pdev)
-{
-	return sdhci_pltfm_unregister(pdev);
-}
-
-static struct platform_driver sdhci_cns3xxx_driver = {
-	.driver		= {
-		.name	= "sdhci-cns3xxx",
-		.owner	= THIS_MODULE,
-	},
-	.probe		= sdhci_cns3xxx_probe,
-	.remove		= __devexit_p(sdhci_cns3xxx_remove),
-#ifdef CONFIG_PM
-	.suspend	= sdhci_pltfm_suspend,
-	.resume		= sdhci_pltfm_resume,
-#endif
-};
-
-static int __init sdhci_cns3xxx_init(void)
-{
-	return platform_driver_register(&sdhci_cns3xxx_driver);
-}
-module_init(sdhci_cns3xxx_init);
-
-static void __exit sdhci_cns3xxx_exit(void)
-{
-	platform_driver_unregister(&sdhci_cns3xxx_driver);
-}
-module_exit(sdhci_cns3xxx_exit);
-
-MODULE_DESCRIPTION("SDHCI driver for CNS3xxx");
-MODULE_AUTHOR("Scott Shu, "
-	      "Anton Vorontsov <avorontsov@mvista.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/trunk/drivers/mmc/host/sdhci-dove.c b/trunk/drivers/mmc/host/sdhci-dove.c
index f2d29dca4420..2aeef4ffed8c 100644
--- a/trunk/drivers/mmc/host/sdhci-dove.c
+++ b/trunk/drivers/mmc/host/sdhci-dove.c
@@ -22,6 +22,7 @@
 #include <linux/io.h>
 #include <linux/mmc/host.h>
 
+#include "sdhci.h"
 #include "sdhci-pltfm.h"
 
 static u16 sdhci_dove_readw(struct sdhci_host *host, int reg)
@@ -60,50 +61,10 @@ static struct sdhci_ops sdhci_dove_ops = {
 	.read_l	= sdhci_dove_readl,
 };
 
-static struct sdhci_pltfm_data sdhci_dove_pdata = {
+struct sdhci_pltfm_data sdhci_dove_pdata = {
 	.ops	= &sdhci_dove_ops,
 	.quirks	= SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER |
 		  SDHCI_QUIRK_NO_BUSY_IRQ |
 		  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
 		  SDHCI_QUIRK_FORCE_DMA,
 };
-
-static int __devinit sdhci_dove_probe(struct platform_device *pdev)
-{
-	return sdhci_pltfm_register(pdev, &sdhci_dove_pdata);
-}
-
-static int __devexit sdhci_dove_remove(struct platform_device *pdev)
-{
-	return sdhci_pltfm_unregister(pdev);
-}
-
-static struct platform_driver sdhci_dove_driver = {
-	.driver		= {
-		.name	= "sdhci-dove",
-		.owner	= THIS_MODULE,
-	},
-	.probe		= sdhci_dove_probe,
-	.remove		= __devexit_p(sdhci_dove_remove),
-#ifdef CONFIG_PM
-	.suspend	= sdhci_pltfm_suspend,
-	.resume		= sdhci_pltfm_resume,
-#endif
-};
-
-static int __init sdhci_dove_init(void)
-{
-	return platform_driver_register(&sdhci_dove_driver);
-}
-module_init(sdhci_dove_init);
-
-static void __exit sdhci_dove_exit(void)
-{
-	platform_driver_unregister(&sdhci_dove_driver);
-}
-module_exit(sdhci_dove_exit);
-
-MODULE_DESCRIPTION("SDHCI driver for Dove");
-MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>, "
-	      "Mike Rapoport <mike@compulab.co.il>");
-MODULE_LICENSE("GPL v2");
diff --git a/trunk/drivers/mmc/host/sdhci-esdhc-imx.c b/trunk/drivers/mmc/host/sdhci-esdhc-imx.c
index 710b706f4fcf..a19967d0bfc4 100644
--- a/trunk/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/trunk/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -18,10 +18,12 @@
 #include <linux/gpio.h>
 #include <linux/slab.h>
 #include <linux/mmc/host.h>
+#include <linux/mmc/sdhci-pltfm.h>
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/sdio.h>
 #include <mach/hardware.h>
 #include <mach/esdhc.h>
+#include "sdhci.h"
 #include "sdhci-pltfm.h"
 #include "sdhci-esdhc.h"
 
@@ -29,7 +31,7 @@
 #define SDHCI_VENDOR_SPEC		0xC0
 #define  SDHCI_VENDOR_SPEC_SDIO_QUIRK	0x00000002
 
-#define ESDHC_FLAG_GPIO_FOR_CD		(1 << 0)
+#define ESDHC_FLAG_GPIO_FOR_CD_WP	(1 << 0)
 /*
  * The CMDTYPE of the CMD register (offset 0xE) should be set to
  * "11" when the STOP CMD12 is issued on imx53 to abort one
@@ -65,14 +67,14 @@ static u32 esdhc_readl_le(struct sdhci_host *host, int reg)
 	u32 val = readl(host->ioaddr + reg);
 
 	if (unlikely((reg == SDHCI_PRESENT_STATE)
-			&& (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD))) {
+			&& (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD_WP))) {
 		struct esdhc_platform_data *boarddata =
 				host->mmc->parent->platform_data;
 
 		if (boarddata && gpio_is_valid(boarddata->cd_gpio)
 				&& gpio_get_value(boarddata->cd_gpio))
 			/* no card, if a valid gpio says so... */
-			val &= ~SDHCI_CARD_PRESENT;
+			val &= SDHCI_CARD_PRESENT;
 		else
 			/* ... in all other cases assume card is present */
 			val |= SDHCI_CARD_PRESENT;
@@ -87,7 +89,7 @@ static void esdhc_writel_le(struct sdhci_host *host, u32 val, int reg)
 	struct pltfm_imx_data *imx_data = pltfm_host->priv;
 
 	if (unlikely((reg == SDHCI_INT_ENABLE || reg == SDHCI_SIGNAL_ENABLE)
-			&& (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD)))
+			&& (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD_WP)))
 		/*
 		 * these interrupts won't work with a custom card_detect gpio
 		 * (only applied to mx25/35)
@@ -189,6 +191,16 @@ static unsigned int esdhc_pltfm_get_min_clock(struct sdhci_host *host)
 	return clk_get_rate(pltfm_host->clk) / 256 / 16;
 }
 
+static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host)
+{
+	struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data;
+
+	if (boarddata && gpio_is_valid(boarddata->wp_gpio))
+		return gpio_get_value(boarddata->wp_gpio);
+	else
+		return -ENOSYS;
+}
+
 static struct sdhci_ops sdhci_esdhc_ops = {
 	.read_l = esdhc_readl_le,
 	.read_w = esdhc_readw_le,
@@ -200,24 +212,6 @@ static struct sdhci_ops sdhci_esdhc_ops = {
 	.get_min_clock = esdhc_pltfm_get_min_clock,
 };
 
-static struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = {
-	.quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_ADMA
-			| SDHCI_QUIRK_BROKEN_CARD_DETECTION,
-	/* ADMA has issues. Might be fixable */
-	.ops = &sdhci_esdhc_ops,
-};
-
-static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host)
-{
-	struct esdhc_platform_data *boarddata =
-			host->mmc->parent->platform_data;
-
-	if (boarddata && gpio_is_valid(boarddata->wp_gpio))
-		return gpio_get_value(boarddata->wp_gpio);
-	else
-		return -ENOSYS;
-}
-
 static irqreturn_t cd_irq(int irq, void *data)
 {
 	struct sdhci_host *sdhost = (struct sdhci_host *)data;
@@ -226,35 +220,30 @@ static irqreturn_t cd_irq(int irq, void *data)
 	return IRQ_HANDLED;
 };
 
-static int __devinit sdhci_esdhc_imx_probe(struct platform_device *pdev)
+static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pdata)
 {
-	struct sdhci_pltfm_host *pltfm_host;
-	struct sdhci_host *host;
-	struct esdhc_platform_data *boarddata;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data;
 	struct clk *clk;
 	int err;
 	struct pltfm_imx_data *imx_data;
 
-	host = sdhci_pltfm_init(pdev, &sdhci_esdhc_imx_pdata);
-	if (IS_ERR(host))
-		return PTR_ERR(host);
-
-	pltfm_host = sdhci_priv(host);
-
-	imx_data = kzalloc(sizeof(struct pltfm_imx_data), GFP_KERNEL);
-	if (!imx_data)
-		return -ENOMEM;
-	pltfm_host->priv = imx_data;
-
 	clk = clk_get(mmc_dev(host->mmc), NULL);
 	if (IS_ERR(clk)) {
 		dev_err(mmc_dev(host->mmc), "clk err\n");
-		err = PTR_ERR(clk);
-		goto err_clk_get;
+		return PTR_ERR(clk);
 	}
 	clk_enable(clk);
 	pltfm_host->clk = clk;
 
+	imx_data = kzalloc(sizeof(struct pltfm_imx_data), GFP_KERNEL);
+	if (!imx_data) {
+		clk_disable(pltfm_host->clk);
+		clk_put(pltfm_host->clk);
+		return -ENOMEM;
+	}
+	pltfm_host->priv = imx_data;
+
 	if (!cpu_is_mx25())
 		host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
 
@@ -268,7 +257,6 @@ static int __devinit sdhci_esdhc_imx_probe(struct platform_device *pdev)
 	if (!(cpu_is_mx25() || cpu_is_mx35() || cpu_is_mx51()))
 		imx_data->flags |= ESDHC_FLAG_MULTIBLK_NO_INT;
 
-	boarddata = host->mmc->parent->platform_data;
 	if (boarddata) {
 		err = gpio_request_one(boarddata->wp_gpio, GPIOF_IN, "ESDHC_WP");
 		if (err) {
@@ -296,15 +284,11 @@ static int __devinit sdhci_esdhc_imx_probe(struct platform_device *pdev)
 			goto no_card_detect_irq;
 		}
 
-		imx_data->flags |= ESDHC_FLAG_GPIO_FOR_CD;
+		imx_data->flags |= ESDHC_FLAG_GPIO_FOR_CD_WP;
 		/* Now we have a working card_detect again */
 		host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION;
 	}
 
-	err = sdhci_add_host(host);
-	if (err)
-		goto err_add_host;
-
 	return 0;
 
  no_card_detect_irq:
@@ -313,23 +297,14 @@ static int __devinit sdhci_esdhc_imx_probe(struct platform_device *pdev)
 	boarddata->cd_gpio = err;
  not_supported:
 	kfree(imx_data);
- err_add_host:
-	clk_disable(pltfm_host->clk);
-	clk_put(pltfm_host->clk);
- err_clk_get:
-	sdhci_pltfm_free(pdev);
-	return err;
+	return 0;
 }
 
-static int __devexit sdhci_esdhc_imx_remove(struct platform_device *pdev)
+static void esdhc_pltfm_exit(struct sdhci_host *host)
 {
-	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data;
 	struct pltfm_imx_data *imx_data = pltfm_host->priv;
-	int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
-
-	sdhci_remove_host(host, dead);
 
 	if (boarddata && gpio_is_valid(boarddata->wp_gpio))
 		gpio_free(boarddata->wp_gpio);
@@ -344,37 +319,13 @@ static int __devexit sdhci_esdhc_imx_remove(struct platform_device *pdev)
 	clk_disable(pltfm_host->clk);
 	clk_put(pltfm_host->clk);
 	kfree(imx_data);
-
-	sdhci_pltfm_free(pdev);
-
-	return 0;
 }
 
-static struct platform_driver sdhci_esdhc_imx_driver = {
-	.driver		= {
-		.name	= "sdhci-esdhc-imx",
-		.owner	= THIS_MODULE,
-	},
-	.probe		= sdhci_esdhc_imx_probe,
-	.remove		= __devexit_p(sdhci_esdhc_imx_remove),
-#ifdef CONFIG_PM
-	.suspend	= sdhci_pltfm_suspend,
-	.resume		= sdhci_pltfm_resume,
-#endif
+struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = {
+	.quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_ADMA
+			| SDHCI_QUIRK_BROKEN_CARD_DETECTION,
+	/* ADMA has issues. Might be fixable */
+	.ops = &sdhci_esdhc_ops,
+	.init = esdhc_pltfm_init,
+	.exit = esdhc_pltfm_exit,
 };
-
-static int __init sdhci_esdhc_imx_init(void)
-{
-	return platform_driver_register(&sdhci_esdhc_imx_driver);
-}
-module_init(sdhci_esdhc_imx_init);
-
-static void __exit sdhci_esdhc_imx_exit(void)
-{
-	platform_driver_unregister(&sdhci_esdhc_imx_driver);
-}
-module_exit(sdhci_esdhc_imx_exit);
-
-MODULE_DESCRIPTION("SDHCI driver for Freescale i.MX eSDHC");
-MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>");
-MODULE_LICENSE("GPL v2");
diff --git a/trunk/drivers/mmc/host/sdhci-of-core.c b/trunk/drivers/mmc/host/sdhci-of-core.c
new file mode 100644
index 000000000000..60e4186a4345
--- /dev/null
+++ b/trunk/drivers/mmc/host/sdhci-of-core.c
@@ -0,0 +1,253 @@
+/*
+ * OpenFirmware bindings for Secure Digital Host Controller Interface.
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ * Copyright (c) 2009 MontaVista Software, Inc.
+ *
+ * Authors: Xiaobo Xie <X.Xie@freescale.com>
+ *	    Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/mmc/host.h>
+#ifdef CONFIG_PPC
+#include <asm/machdep.h>
+#endif
+#include "sdhci-of.h"
+#include "sdhci.h"
+
+#ifdef CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
+
+/*
+ * These accessors are designed for big endian hosts doing I/O to
+ * little endian controllers incorporating a 32-bit hardware byte swapper.
+ */
+
+u32 sdhci_be32bs_readl(struct sdhci_host *host, int reg)
+{
+	return in_be32(host->ioaddr + reg);
+}
+
+u16 sdhci_be32bs_readw(struct sdhci_host *host, int reg)
+{
+	return in_be16(host->ioaddr + (reg ^ 0x2));
+}
+
+u8 sdhci_be32bs_readb(struct sdhci_host *host, int reg)
+{
+	return in_8(host->ioaddr + (reg ^ 0x3));
+}
+
+void sdhci_be32bs_writel(struct sdhci_host *host, u32 val, int reg)
+{
+	out_be32(host->ioaddr + reg, val);
+}
+
+void sdhci_be32bs_writew(struct sdhci_host *host, u16 val, int reg)
+{
+	struct sdhci_of_host *of_host = sdhci_priv(host);
+	int base = reg & ~0x3;
+	int shift = (reg & 0x2) * 8;
+
+	switch (reg) {
+	case SDHCI_TRANSFER_MODE:
+		/*
+		 * Postpone this write, we must do it together with a
+		 * command write that is down below.
+		 */
+		of_host->xfer_mode_shadow = val;
+		return;
+	case SDHCI_COMMAND:
+		sdhci_be32bs_writel(host, val << 16 | of_host->xfer_mode_shadow,
+				    SDHCI_TRANSFER_MODE);
+		return;
+	}
+	clrsetbits_be32(host->ioaddr + base, 0xffff << shift, val << shift);
+}
+
+void sdhci_be32bs_writeb(struct sdhci_host *host, u8 val, int reg)
+{
+	int base = reg & ~0x3;
+	int shift = (reg & 0x3) * 8;
+
+	clrsetbits_be32(host->ioaddr + base , 0xff << shift, val << shift);
+}
+#endif /* CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER */
+
+#ifdef CONFIG_PM
+
+static int sdhci_of_suspend(struct platform_device *ofdev, pm_message_t state)
+{
+	struct sdhci_host *host = dev_get_drvdata(&ofdev->dev);
+
+	return mmc_suspend_host(host->mmc);
+}
+
+static int sdhci_of_resume(struct platform_device *ofdev)
+{
+	struct sdhci_host *host = dev_get_drvdata(&ofdev->dev);
+
+	return mmc_resume_host(host->mmc);
+}
+
+#else
+
+#define sdhci_of_suspend NULL
+#define sdhci_of_resume NULL
+
+#endif
+
+static bool __devinit sdhci_of_wp_inverted(struct device_node *np)
+{
+	if (of_get_property(np, "sdhci,wp-inverted", NULL))
+		return true;
+
+	/* Old device trees don't have the wp-inverted property. */
+#ifdef CONFIG_PPC
+	return machine_is(mpc837x_rdb) || machine_is(mpc837x_mds);
+#else
+	return false;
+#endif
+}
+
+static const struct of_device_id sdhci_of_match[];
+static int __devinit sdhci_of_probe(struct platform_device *ofdev)
+{
+	const struct of_device_id *match;
+	struct device_node *np = ofdev->dev.of_node;
+	struct sdhci_of_data *sdhci_of_data;
+	struct sdhci_host *host;
+	struct sdhci_of_host *of_host;
+	const __be32 *clk;
+	int size;
+	int ret;
+
+	match = of_match_device(sdhci_of_match, &ofdev->dev);
+	if (!match)
+		return -EINVAL;
+	sdhci_of_data = match->data;
+
+	if (!of_device_is_available(np))
+		return -ENODEV;
+
+	host = sdhci_alloc_host(&ofdev->dev, sizeof(*of_host));
+	if (IS_ERR(host))
+		return -ENOMEM;
+
+	of_host = sdhci_priv(host);
+	dev_set_drvdata(&ofdev->dev, host);
+
+	host->ioaddr = of_iomap(np, 0);
+	if (!host->ioaddr) {
+		ret = -ENOMEM;
+		goto err_addr_map;
+	}
+
+	host->irq = irq_of_parse_and_map(np, 0);
+	if (!host->irq) {
+		ret = -EINVAL;
+		goto err_no_irq;
+	}
+
+	host->hw_name = dev_name(&ofdev->dev);
+	if (sdhci_of_data) {
+		host->quirks = sdhci_of_data->quirks;
+		host->ops = &sdhci_of_data->ops;
+	}
+
+	if (of_get_property(np, "sdhci,auto-cmd12", NULL))
+		host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12;
+
+
+	if (of_get_property(np, "sdhci,1-bit-only", NULL))
+		host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA;
+
+	if (sdhci_of_wp_inverted(np))
+		host->quirks |= SDHCI_QUIRK_INVERTED_WRITE_PROTECT;
+
+	clk = of_get_property(np, "clock-frequency", &size);
+	if (clk && size == sizeof(*clk) && *clk)
+		of_host->clock = be32_to_cpup(clk);
+
+	ret = sdhci_add_host(host);
+	if (ret)
+		goto err_add_host;
+
+	return 0;
+
+err_add_host:
+	irq_dispose_mapping(host->irq);
+err_no_irq:
+	iounmap(host->ioaddr);
+err_addr_map:
+	sdhci_free_host(host);
+	return ret;
+}
+
+static int __devexit sdhci_of_remove(struct platform_device *ofdev)
+{
+	struct sdhci_host *host = dev_get_drvdata(&ofdev->dev);
+
+	sdhci_remove_host(host, 0);
+	sdhci_free_host(host);
+	irq_dispose_mapping(host->irq);
+	iounmap(host->ioaddr);
+	return 0;
+}
+
+static const struct of_device_id sdhci_of_match[] = {
+#ifdef CONFIG_MMC_SDHCI_OF_ESDHC
+	{ .compatible = "fsl,mpc8379-esdhc", .data = &sdhci_esdhc, },
+	{ .compatible = "fsl,mpc8536-esdhc", .data = &sdhci_esdhc, },
+	{ .compatible = "fsl,esdhc", .data = &sdhci_esdhc, },
+#endif
+#ifdef CONFIG_MMC_SDHCI_OF_HLWD
+	{ .compatible = "nintendo,hollywood-sdhci", .data = &sdhci_hlwd, },
+#endif
+	{ .compatible = "generic-sdhci", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, sdhci_of_match);
+
+static struct platform_driver sdhci_of_driver = {
+	.driver = {
+		.name = "sdhci-of",
+		.owner = THIS_MODULE,
+		.of_match_table = sdhci_of_match,
+	},
+	.probe = sdhci_of_probe,
+	.remove = __devexit_p(sdhci_of_remove),
+	.suspend = sdhci_of_suspend,
+	.resume	= sdhci_of_resume,
+};
+
+static int __init sdhci_of_init(void)
+{
+	return platform_driver_register(&sdhci_of_driver);
+}
+module_init(sdhci_of_init);
+
+static void __exit sdhci_of_exit(void)
+{
+	platform_driver_unregister(&sdhci_of_driver);
+}
+module_exit(sdhci_of_exit);
+
+MODULE_DESCRIPTION("Secure Digital Host Controller Interface OF driver");
+MODULE_AUTHOR("Xiaobo Xie <X.Xie@freescale.com>, "
+	      "Anton Vorontsov <avorontsov@ru.mvista.com>");
+MODULE_LICENSE("GPL");
diff --git a/trunk/drivers/mmc/host/sdhci-of-esdhc.c b/trunk/drivers/mmc/host/sdhci-of-esdhc.c
index fe604df65011..ba40d6d035c7 100644
--- a/trunk/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/trunk/drivers/mmc/host/sdhci-of-esdhc.c
@@ -16,7 +16,8 @@
 #include <linux/io.h>
 #include <linux/delay.h>
 #include <linux/mmc/host.h>
-#include "sdhci-pltfm.h"
+#include "sdhci-of.h"
+#include "sdhci.h"
 #include "sdhci-esdhc.h"
 
 static u16 esdhc_readw(struct sdhci_host *host, int reg)
@@ -59,83 +60,32 @@ static int esdhc_of_enable_dma(struct sdhci_host *host)
 
 static unsigned int esdhc_of_get_max_clock(struct sdhci_host *host)
 {
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_of_host *of_host = sdhci_priv(host);
 
-	return pltfm_host->clock;
+	return of_host->clock;
 }
 
 static unsigned int esdhc_of_get_min_clock(struct sdhci_host *host)
 {
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_of_host *of_host = sdhci_priv(host);
 
-	return pltfm_host->clock / 256 / 16;
+	return of_host->clock / 256 / 16;
 }
 
-static struct sdhci_ops sdhci_esdhc_ops = {
-	.read_l = sdhci_be32bs_readl,
-	.read_w = esdhc_readw,
-	.read_b = sdhci_be32bs_readb,
-	.write_l = sdhci_be32bs_writel,
-	.write_w = esdhc_writew,
-	.write_b = esdhc_writeb,
-	.set_clock = esdhc_set_clock,
-	.enable_dma = esdhc_of_enable_dma,
-	.get_max_clock = esdhc_of_get_max_clock,
-	.get_min_clock = esdhc_of_get_min_clock,
-};
-
-static struct sdhci_pltfm_data sdhci_esdhc_pdata = {
+struct sdhci_of_data sdhci_esdhc = {
 	/* card detection could be handled via GPIO */
 	.quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_CARD_DETECTION
 		| SDHCI_QUIRK_NO_CARD_NO_RESET,
-	.ops = &sdhci_esdhc_ops,
-};
-
-static int __devinit sdhci_esdhc_probe(struct platform_device *pdev)
-{
-	return sdhci_pltfm_register(pdev, &sdhci_esdhc_pdata);
-}
-
-static int __devexit sdhci_esdhc_remove(struct platform_device *pdev)
-{
-	return sdhci_pltfm_unregister(pdev);
-}
-
-static const struct of_device_id sdhci_esdhc_of_match[] = {
-	{ .compatible = "fsl,mpc8379-esdhc" },
-	{ .compatible = "fsl,mpc8536-esdhc" },
-	{ .compatible = "fsl,esdhc" },
-	{ }
-};
-MODULE_DEVICE_TABLE(of, sdhci_esdhc_of_match);
-
-static struct platform_driver sdhci_esdhc_driver = {
-	.driver = {
-		.name = "sdhci-esdhc",
-		.owner = THIS_MODULE,
-		.of_match_table = sdhci_esdhc_of_match,
+	.ops = {
+		.read_l = sdhci_be32bs_readl,
+		.read_w = esdhc_readw,
+		.read_b = sdhci_be32bs_readb,
+		.write_l = sdhci_be32bs_writel,
+		.write_w = esdhc_writew,
+		.write_b = esdhc_writeb,
+		.set_clock = esdhc_set_clock,
+		.enable_dma = esdhc_of_enable_dma,
+		.get_max_clock = esdhc_of_get_max_clock,
+		.get_min_clock = esdhc_of_get_min_clock,
 	},
-	.probe = sdhci_esdhc_probe,
-	.remove = __devexit_p(sdhci_esdhc_remove),
-#ifdef CONFIG_PM
-	.suspend = sdhci_pltfm_suspend,
-	.resume = sdhci_pltfm_resume,
-#endif
 };
-
-static int __init sdhci_esdhc_init(void)
-{
-	return platform_driver_register(&sdhci_esdhc_driver);
-}
-module_init(sdhci_esdhc_init);
-
-static void __exit sdhci_esdhc_exit(void)
-{
-	platform_driver_unregister(&sdhci_esdhc_driver);
-}
-module_exit(sdhci_esdhc_exit);
-
-MODULE_DESCRIPTION("SDHCI OF driver for Freescale MPC eSDHC");
-MODULE_AUTHOR("Xiaobo Xie <X.Xie@freescale.com>, "
-	      "Anton Vorontsov <avorontsov@ru.mvista.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/trunk/drivers/mmc/host/sdhci-of-hlwd.c b/trunk/drivers/mmc/host/sdhci-of-hlwd.c
index 735be131dca9..68ddb7546ae2 100644
--- a/trunk/drivers/mmc/host/sdhci-of-hlwd.c
+++ b/trunk/drivers/mmc/host/sdhci-of-hlwd.c
@@ -21,7 +21,8 @@
 
 #include <linux/delay.h>
 #include <linux/mmc/host.h>
-#include "sdhci-pltfm.h"
+#include "sdhci-of.h"
+#include "sdhci.h"
 
 /*
  * Ops and quirks for the Nintendo Wii SDHCI controllers.
@@ -50,63 +51,15 @@ static void sdhci_hlwd_writeb(struct sdhci_host *host, u8 val, int reg)
 	udelay(SDHCI_HLWD_WRITE_DELAY);
 }
 
-static struct sdhci_ops sdhci_hlwd_ops = {
-	.read_l = sdhci_be32bs_readl,
-	.read_w = sdhci_be32bs_readw,
-	.read_b = sdhci_be32bs_readb,
-	.write_l = sdhci_hlwd_writel,
-	.write_w = sdhci_hlwd_writew,
-	.write_b = sdhci_hlwd_writeb,
-};
-
-static struct sdhci_pltfm_data sdhci_hlwd_pdata = {
+struct sdhci_of_data sdhci_hlwd = {
 	.quirks = SDHCI_QUIRK_32BIT_DMA_ADDR |
 		  SDHCI_QUIRK_32BIT_DMA_SIZE,
-	.ops = &sdhci_hlwd_ops,
-};
-
-static int __devinit sdhci_hlwd_probe(struct platform_device *pdev)
-{
-	return sdhci_pltfm_register(pdev, &sdhci_hlwd_pdata);
-}
-
-static int __devexit sdhci_hlwd_remove(struct platform_device *pdev)
-{
-	return sdhci_pltfm_unregister(pdev);
-}
-
-static const struct of_device_id sdhci_hlwd_of_match[] = {
-	{ .compatible = "nintendo,hollywood-sdhci" },
-	{ }
-};
-MODULE_DEVICE_TABLE(of, sdhci_hlwd_of_match);
-
-static struct platform_driver sdhci_hlwd_driver = {
-	.driver = {
-		.name = "sdhci-hlwd",
-		.owner = THIS_MODULE,
-		.of_match_table = sdhci_hlwd_of_match,
+	.ops = {
+		.read_l = sdhci_be32bs_readl,
+		.read_w = sdhci_be32bs_readw,
+		.read_b = sdhci_be32bs_readb,
+		.write_l = sdhci_hlwd_writel,
+		.write_w = sdhci_hlwd_writew,
+		.write_b = sdhci_hlwd_writeb,
 	},
-	.probe = sdhci_hlwd_probe,
-	.remove = __devexit_p(sdhci_hlwd_remove),
-#ifdef CONFIG_PM
-	.suspend = sdhci_pltfm_suspend,
-	.resume = sdhci_pltfm_resume,
-#endif
 };
-
-static int __init sdhci_hlwd_init(void)
-{
-	return platform_driver_register(&sdhci_hlwd_driver);
-}
-module_init(sdhci_hlwd_init);
-
-static void __exit sdhci_hlwd_exit(void)
-{
-	platform_driver_unregister(&sdhci_hlwd_driver);
-}
-module_exit(sdhci_hlwd_exit);
-
-MODULE_DESCRIPTION("Nintendo Wii SDHCI OF driver");
-MODULE_AUTHOR("The GameCube Linux Team, Albert Herranz");
-MODULE_LICENSE("GPL v2");
diff --git a/trunk/drivers/mmc/host/sdhci-of.h b/trunk/drivers/mmc/host/sdhci-of.h
new file mode 100644
index 000000000000..ad09ad9915d8
--- /dev/null
+++ b/trunk/drivers/mmc/host/sdhci-of.h
@@ -0,0 +1,42 @@
+/*
+ * OpenFirmware bindings for Secure Digital Host Controller Interface.
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ * Copyright (c) 2009 MontaVista Software, Inc.
+ *
+ * Authors: Xiaobo Xie <X.Xie@freescale.com>
+ *	    Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __SDHCI_OF_H
+#define __SDHCI_OF_H
+
+#include <linux/types.h>
+#include "sdhci.h"
+
+struct sdhci_of_data {
+	unsigned int quirks;
+	struct sdhci_ops ops;
+};
+
+struct sdhci_of_host {
+	unsigned int clock;
+	u16 xfer_mode_shadow;
+};
+
+extern u32 sdhci_be32bs_readl(struct sdhci_host *host, int reg);
+extern u16 sdhci_be32bs_readw(struct sdhci_host *host, int reg);
+extern u8 sdhci_be32bs_readb(struct sdhci_host *host, int reg);
+extern void sdhci_be32bs_writel(struct sdhci_host *host, u32 val, int reg);
+extern void sdhci_be32bs_writew(struct sdhci_host *host, u16 val, int reg);
+extern void sdhci_be32bs_writeb(struct sdhci_host *host, u8 val, int reg);
+
+extern struct sdhci_of_data sdhci_esdhc;
+extern struct sdhci_of_data sdhci_hlwd;
+
+#endif /* __SDHCI_OF_H */
diff --git a/trunk/drivers/mmc/host/sdhci-pci.c b/trunk/drivers/mmc/host/sdhci-pci.c
index 26c528648f3c..936bbca19c0a 100644
--- a/trunk/drivers/mmc/host/sdhci-pci.c
+++ b/trunk/drivers/mmc/host/sdhci-pci.c
@@ -143,12 +143,6 @@ static const struct sdhci_pci_fixes sdhci_cafe = {
 			  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
 };
 
-static int mrst_hc_probe_slot(struct sdhci_pci_slot *slot)
-{
-	slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA;
-	return 0;
-}
-
 /*
  * ADMA operation is disabled for Moorestown platform due to
  * hardware bugs.
@@ -163,15 +157,8 @@ static int mrst_hc_probe(struct sdhci_pci_chip *chip)
 	return 0;
 }
 
-static int mfd_emmc_probe_slot(struct sdhci_pci_slot *slot)
-{
-	slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA;
-	return 0;
-}
-
 static const struct sdhci_pci_fixes sdhci_intel_mrst_hc0 = {
 	.quirks		= SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT,
-	.probe_slot	= mrst_hc_probe_slot,
 };
 
 static const struct sdhci_pci_fixes sdhci_intel_mrst_hc1_hc2 = {
@@ -183,13 +170,8 @@ static const struct sdhci_pci_fixes sdhci_intel_mfd_sd = {
 	.quirks		= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
 };
 
-static const struct sdhci_pci_fixes sdhci_intel_mfd_sdio = {
-	.quirks		= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
-};
-
-static const struct sdhci_pci_fixes sdhci_intel_mfd_emmc = {
+static const struct sdhci_pci_fixes sdhci_intel_mfd_emmc_sdio = {
 	.quirks		= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
-	.probe_slot	= mfd_emmc_probe_slot,
 };
 
 /* O2Micro extra registers */
@@ -700,7 +682,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = {
 		.device		= PCI_DEVICE_ID_INTEL_MFD_SDIO1,
 		.subvendor	= PCI_ANY_ID,
 		.subdevice	= PCI_ANY_ID,
-		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_sdio,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio,
 	},
 
 	{
@@ -708,7 +690,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = {
 		.device		= PCI_DEVICE_ID_INTEL_MFD_SDIO2,
 		.subvendor	= PCI_ANY_ID,
 		.subdevice	= PCI_ANY_ID,
-		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_sdio,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio,
 	},
 
 	{
@@ -716,7 +698,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = {
 		.device		= PCI_DEVICE_ID_INTEL_MFD_EMMC0,
 		.subvendor	= PCI_ANY_ID,
 		.subdevice	= PCI_ANY_ID,
-		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio,
 	},
 
 	{
@@ -724,7 +706,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = {
 		.device		= PCI_DEVICE_ID_INTEL_MFD_EMMC1,
 		.subvendor	= PCI_ANY_ID,
 		.subdevice	= PCI_ANY_ID,
-		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio,
 	},
 
 	{
@@ -807,34 +789,8 @@ static int sdhci_pci_enable_dma(struct sdhci_host *host)
 	return 0;
 }
 
-static int sdhci_pci_8bit_width(struct sdhci_host *host, int width)
-{
-	u8 ctrl;
-
-	ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
-
-	switch (width) {
-	case MMC_BUS_WIDTH_8:
-		ctrl |= SDHCI_CTRL_8BITBUS;
-		ctrl &= ~SDHCI_CTRL_4BITBUS;
-		break;
-	case MMC_BUS_WIDTH_4:
-		ctrl |= SDHCI_CTRL_4BITBUS;
-		ctrl &= ~SDHCI_CTRL_8BITBUS;
-		break;
-	default:
-		ctrl &= ~(SDHCI_CTRL_8BITBUS | SDHCI_CTRL_4BITBUS);
-		break;
-	}
-
-	sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
-
-	return 0;
-}
-
 static struct sdhci_ops sdhci_pci_ops = {
 	.enable_dma	= sdhci_pci_enable_dma,
-	.platform_8bit_width	= sdhci_pci_8bit_width,
 };
 
 /*****************************************************************************\
diff --git a/trunk/drivers/mmc/host/sdhci-pltfm.c b/trunk/drivers/mmc/host/sdhci-pltfm.c
index 71c0ce1f6db0..dbab0407f4b6 100644
--- a/trunk/drivers/mmc/host/sdhci-pltfm.c
+++ b/trunk/drivers/mmc/host/sdhci-pltfm.c
@@ -2,12 +2,6 @@
  * sdhci-pltfm.c Support for SDHCI platform devices
  * Copyright (c) 2009 Intel Corporation
  *
- * Copyright (c) 2007 Freescale Semiconductor, Inc.
- * Copyright (c) 2009 MontaVista Software, Inc.
- *
- * Authors: Xiaobo Xie <X.Xie@freescale.com>
- *	    Anton Vorontsov <avorontsov@ru.mvista.com>
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -28,66 +22,48 @@
  * Inspired by sdhci-pci.c, by Pierre Ossman
  */
 
-#include <linux/err.h>
-#include <linux/of.h>
-#ifdef CONFIG_PPC
-#include <asm/machdep.h>
-#endif
-#include "sdhci-pltfm.h"
-
-static struct sdhci_ops sdhci_pltfm_ops = {
-};
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 
-#ifdef CONFIG_OF
-static bool sdhci_of_wp_inverted(struct device_node *np)
-{
-	if (of_get_property(np, "sdhci,wp-inverted", NULL))
-		return true;
+#include <linux/mmc/host.h>
 
-	/* Old device trees don't have the wp-inverted property. */
-#ifdef CONFIG_PPC
-	return machine_is(mpc837x_rdb) || machine_is(mpc837x_mds);
-#else
-	return false;
-#endif /* CONFIG_PPC */
-}
+#include <linux/io.h>
+#include <linux/mmc/sdhci-pltfm.h>
 
-void sdhci_get_of_property(struct platform_device *pdev)
-{
-	struct device_node *np = pdev->dev.of_node;
-	struct sdhci_host *host = platform_get_drvdata(pdev);
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	const __be32 *clk;
-	int size;
+#include "sdhci.h"
+#include "sdhci-pltfm.h"
 
-	if (of_device_is_available(np)) {
-		if (of_get_property(np, "sdhci,auto-cmd12", NULL))
-			host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12;
+/*****************************************************************************\
+ *                                                                           *
+ * SDHCI core callbacks                                                      *
+ *                                                                           *
+\*****************************************************************************/
 
-		if (of_get_property(np, "sdhci,1-bit-only", NULL))
-			host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA;
+static struct sdhci_ops sdhci_pltfm_ops = {
+};
 
-		if (sdhci_of_wp_inverted(np))
-			host->quirks |= SDHCI_QUIRK_INVERTED_WRITE_PROTECT;
+/*****************************************************************************\
+ *                                                                           *
+ * Device probing/removal                                                    *
+ *                                                                           *
+\*****************************************************************************/
 
-		clk = of_get_property(np, "clock-frequency", &size);
-		if (clk && size == sizeof(*clk) && *clk)
-			pltfm_host->clock = be32_to_cpup(clk);
-	}
-}
-#else
-void sdhci_get_of_property(struct platform_device *pdev) {}
-#endif /* CONFIG_OF */
-EXPORT_SYMBOL_GPL(sdhci_get_of_property);
-
-struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev,
-				    struct sdhci_pltfm_data *pdata)
+static int __devinit sdhci_pltfm_probe(struct platform_device *pdev)
 {
+	const struct platform_device_id *platid = platform_get_device_id(pdev);
+	struct sdhci_pltfm_data *pdata;
 	struct sdhci_host *host;
 	struct sdhci_pltfm_host *pltfm_host;
 	struct resource *iomem;
 	int ret;
 
+	if (platid && platid->driver_data)
+		pdata = (void *)platid->driver_data;
+	else
+		pdata = pdev->dev.platform_data;
+
 	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!iomem) {
 		ret = -ENOMEM;
@@ -95,7 +71,8 @@ struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev,
 	}
 
 	if (resource_size(iomem) < 0x100)
-		dev_err(&pdev->dev, "Invalid iomem size!\n");
+		dev_err(&pdev->dev, "Invalid iomem size. You may "
+			"experience problems.\n");
 
 	/* Some PCI-based MFD need the parent here */
 	if (pdev->dev.parent != &platform_bus)
@@ -110,7 +87,7 @@ struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev,
 
 	pltfm_host = sdhci_priv(host);
 
-	host->hw_name = dev_name(&pdev->dev);
+	host->hw_name = "platform";
 	if (pdata && pdata->ops)
 		host->ops = pdata->ops;
 	else
@@ -133,95 +110,126 @@ struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev,
 		goto err_remap;
 	}
 
+	if (pdata && pdata->init) {
+		ret = pdata->init(host, pdata);
+		if (ret)
+			goto err_plat_init;
+	}
+
+	ret = sdhci_add_host(host);
+	if (ret)
+		goto err_add_host;
+
 	platform_set_drvdata(pdev, host);
 
-	return host;
+	return 0;
 
+err_add_host:
+	if (pdata && pdata->exit)
+		pdata->exit(host);
+err_plat_init:
+	iounmap(host->ioaddr);
 err_remap:
 	release_mem_region(iomem->start, resource_size(iomem));
 err_request:
 	sdhci_free_host(host);
 err:
-	dev_err(&pdev->dev, "%s failed %d\n", __func__, ret);
-	return ERR_PTR(ret);
+	printk(KERN_ERR"Probing of sdhci-pltfm failed: %d\n", ret);
+	return ret;
 }
-EXPORT_SYMBOL_GPL(sdhci_pltfm_init);
 
-void sdhci_pltfm_free(struct platform_device *pdev)
+static int __devexit sdhci_pltfm_remove(struct platform_device *pdev)
 {
+	struct sdhci_pltfm_data *pdata = pdev->dev.platform_data;
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	int dead;
+	u32 scratch;
+
+	dead = 0;
+	scratch = readl(host->ioaddr + SDHCI_INT_STATUS);
+	if (scratch == (u32)-1)
+		dead = 1;
 
+	sdhci_remove_host(host, dead);
+	if (pdata && pdata->exit)
+		pdata->exit(host);
 	iounmap(host->ioaddr);
 	release_mem_region(iomem->start, resource_size(iomem));
 	sdhci_free_host(host);
 	platform_set_drvdata(pdev, NULL);
-}
-EXPORT_SYMBOL_GPL(sdhci_pltfm_free);
-
-int sdhci_pltfm_register(struct platform_device *pdev,
-			 struct sdhci_pltfm_data *pdata)
-{
-	struct sdhci_host *host;
-	int ret = 0;
-
-	host = sdhci_pltfm_init(pdev, pdata);
-	if (IS_ERR(host))
-		return PTR_ERR(host);
-
-	sdhci_get_of_property(pdev);
-
-	ret = sdhci_add_host(host);
-	if (ret)
-		sdhci_pltfm_free(pdev);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(sdhci_pltfm_register);
-
-int sdhci_pltfm_unregister(struct platform_device *pdev)
-{
-	struct sdhci_host *host = platform_get_drvdata(pdev);
-	int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
-
-	sdhci_remove_host(host, dead);
-	sdhci_pltfm_free(pdev);
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(sdhci_pltfm_unregister);
+
+static const struct platform_device_id sdhci_pltfm_ids[] = {
+	{ "sdhci", },
+#ifdef CONFIG_MMC_SDHCI_CNS3XXX
+	{ "sdhci-cns3xxx", (kernel_ulong_t)&sdhci_cns3xxx_pdata },
+#endif
+#ifdef CONFIG_MMC_SDHCI_ESDHC_IMX
+	{ "sdhci-esdhc-imx", (kernel_ulong_t)&sdhci_esdhc_imx_pdata },
+#endif
+#ifdef CONFIG_MMC_SDHCI_DOVE
+	{ "sdhci-dove", (kernel_ulong_t)&sdhci_dove_pdata },
+#endif
+#ifdef CONFIG_MMC_SDHCI_TEGRA
+	{ "sdhci-tegra", (kernel_ulong_t)&sdhci_tegra_pdata },
+#endif
+	{ },
+};
+MODULE_DEVICE_TABLE(platform, sdhci_pltfm_ids);
 
 #ifdef CONFIG_PM
-int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state)
+static int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state)
 {
 	struct sdhci_host *host = platform_get_drvdata(dev);
 
 	return sdhci_suspend_host(host, state);
 }
-EXPORT_SYMBOL_GPL(sdhci_pltfm_suspend);
 
-int sdhci_pltfm_resume(struct platform_device *dev)
+static int sdhci_pltfm_resume(struct platform_device *dev)
 {
 	struct sdhci_host *host = platform_get_drvdata(dev);
 
 	return sdhci_resume_host(host);
 }
-EXPORT_SYMBOL_GPL(sdhci_pltfm_resume);
+#else
+#define sdhci_pltfm_suspend	NULL
+#define sdhci_pltfm_resume	NULL
 #endif	/* CONFIG_PM */
 
-static int __init sdhci_pltfm_drv_init(void)
-{
-	pr_info("sdhci-pltfm: SDHCI platform and OF driver helper\n");
+static struct platform_driver sdhci_pltfm_driver = {
+	.driver = {
+		.name	= "sdhci",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= sdhci_pltfm_probe,
+	.remove		= __devexit_p(sdhci_pltfm_remove),
+	.id_table	= sdhci_pltfm_ids,
+	.suspend	= sdhci_pltfm_suspend,
+	.resume		= sdhci_pltfm_resume,
+};
 
-	return 0;
+/*****************************************************************************\
+ *                                                                           *
+ * Driver init/exit                                                          *
+ *                                                                           *
+\*****************************************************************************/
+
+static int __init sdhci_drv_init(void)
+{
+	return platform_driver_register(&sdhci_pltfm_driver);
 }
-module_init(sdhci_pltfm_drv_init);
 
-static void __exit sdhci_pltfm_drv_exit(void)
+static void __exit sdhci_drv_exit(void)
 {
+	platform_driver_unregister(&sdhci_pltfm_driver);
 }
-module_exit(sdhci_pltfm_drv_exit);
 
-MODULE_DESCRIPTION("SDHCI platform and OF driver helper");
-MODULE_AUTHOR("Intel Corporation");
+module_init(sdhci_drv_init);
+module_exit(sdhci_drv_exit);
+
+MODULE_DESCRIPTION("Secure Digital Host Controller Interface platform driver");
+MODULE_AUTHOR("Mocean Laboratories <info@mocean-labs.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/trunk/drivers/mmc/host/sdhci-pltfm.h b/trunk/drivers/mmc/host/sdhci-pltfm.h
index 3a9fc3f40840..2b37016ad0ac 100644
--- a/trunk/drivers/mmc/host/sdhci-pltfm.h
+++ b/trunk/drivers/mmc/host/sdhci-pltfm.h
@@ -12,95 +12,17 @@
 #define _DRIVERS_MMC_SDHCI_PLTFM_H
 
 #include <linux/clk.h>
-#include <linux/platform_device.h>
-#include "sdhci.h"
-
-struct sdhci_pltfm_data {
-	struct sdhci_ops *ops;
-	unsigned int quirks;
-};
+#include <linux/types.h>
+#include <linux/mmc/sdhci-pltfm.h>
 
 struct sdhci_pltfm_host {
 	struct clk *clk;
 	void *priv; /* to handle quirks across io-accessor calls */
-
-	/* migrate from sdhci_of_host */
-	unsigned int clock;
-	u16 xfer_mode_shadow;
 };
 
-#ifdef CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
-/*
- * These accessors are designed for big endian hosts doing I/O to
- * little endian controllers incorporating a 32-bit hardware byte swapper.
- */
-static inline u32 sdhci_be32bs_readl(struct sdhci_host *host, int reg)
-{
-	return in_be32(host->ioaddr + reg);
-}
-
-static inline u16 sdhci_be32bs_readw(struct sdhci_host *host, int reg)
-{
-	return in_be16(host->ioaddr + (reg ^ 0x2));
-}
-
-static inline u8 sdhci_be32bs_readb(struct sdhci_host *host, int reg)
-{
-	return in_8(host->ioaddr + (reg ^ 0x3));
-}
-
-static inline void sdhci_be32bs_writel(struct sdhci_host *host,
-				       u32 val, int reg)
-{
-	out_be32(host->ioaddr + reg, val);
-}
-
-static inline void sdhci_be32bs_writew(struct sdhci_host *host,
-				       u16 val, int reg)
-{
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	int base = reg & ~0x3;
-	int shift = (reg & 0x2) * 8;
-
-	switch (reg) {
-	case SDHCI_TRANSFER_MODE:
-		/*
-		 * Postpone this write, we must do it together with a
-		 * command write that is down below.
-		 */
-		pltfm_host->xfer_mode_shadow = val;
-		return;
-	case SDHCI_COMMAND:
-		sdhci_be32bs_writel(host,
-				    val << 16 | pltfm_host->xfer_mode_shadow,
-				    SDHCI_TRANSFER_MODE);
-		return;
-	}
-	clrsetbits_be32(host->ioaddr + base, 0xffff << shift, val << shift);
-}
-
-static inline void sdhci_be32bs_writeb(struct sdhci_host *host, u8 val, int reg)
-{
-	int base = reg & ~0x3;
-	int shift = (reg & 0x3) * 8;
-
-	clrsetbits_be32(host->ioaddr + base , 0xff << shift, val << shift);
-}
-#endif /* CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER */
-
-extern void sdhci_get_of_property(struct platform_device *pdev);
-
-extern struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev,
-					   struct sdhci_pltfm_data *pdata);
-extern void sdhci_pltfm_free(struct platform_device *pdev);
-
-extern int sdhci_pltfm_register(struct platform_device *pdev,
-				struct sdhci_pltfm_data *pdata);
-extern int sdhci_pltfm_unregister(struct platform_device *pdev);
-
-#ifdef CONFIG_PM
-extern int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state);
-extern int sdhci_pltfm_resume(struct platform_device *dev);
-#endif
+extern struct sdhci_pltfm_data sdhci_cns3xxx_pdata;
+extern struct sdhci_pltfm_data sdhci_esdhc_imx_pdata;
+extern struct sdhci_pltfm_data sdhci_dove_pdata;
+extern struct sdhci_pltfm_data sdhci_tegra_pdata;
 
 #endif /* _DRIVERS_MMC_SDHCI_PLTFM_H */
diff --git a/trunk/drivers/mmc/host/sdhci-pxa.c b/trunk/drivers/mmc/host/sdhci-pxa.c
new file mode 100644
index 000000000000..089c9a68b7b1
--- /dev/null
+++ b/trunk/drivers/mmc/host/sdhci-pxa.c
@@ -0,0 +1,303 @@
+/* linux/drivers/mmc/host/sdhci-pxa.c
+ *
+ * Copyright (C) 2010 Marvell International Ltd.
+ *		Zhangfei Gao <zhangfei.gao@marvell.com>
+ *		Kevin Wang <dwang4@marvell.com>
+ *		Mingwei Wang <mwwang@marvell.com>
+ *		Philip Rakity <prakity@marvell.com>
+ *		Mark Brown <markb@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Supports:
+ * SDHCI support for MMP2/PXA910/PXA168
+ *
+ * Refer to sdhci-s3c.c.
+ */
+
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/mmc/host.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <plat/sdhci.h>
+#include "sdhci.h"
+
+#define DRIVER_NAME	"sdhci-pxa"
+
+#define SD_FIFO_PARAM		0x104
+#define DIS_PAD_SD_CLK_GATE	0x400
+
+struct sdhci_pxa {
+	struct sdhci_host		*host;
+	struct sdhci_pxa_platdata	*pdata;
+	struct clk			*clk;
+	struct resource			*res;
+
+	u8 clk_enable;
+};
+
+/*****************************************************************************\
+ *                                                                           *
+ * SDHCI core callbacks                                                      *
+ *                                                                           *
+\*****************************************************************************/
+static void set_clock(struct sdhci_host *host, unsigned int clock)
+{
+	struct sdhci_pxa *pxa = sdhci_priv(host);
+	u32 tmp = 0;
+
+	if (clock == 0) {
+		if (pxa->clk_enable) {
+			clk_disable(pxa->clk);
+			pxa->clk_enable = 0;
+		}
+	} else {
+		if (0 == pxa->clk_enable) {
+			if (pxa->pdata->flags & PXA_FLAG_DISABLE_CLOCK_GATING) {
+				tmp = readl(host->ioaddr + SD_FIFO_PARAM);
+				tmp |= DIS_PAD_SD_CLK_GATE;
+				writel(tmp, host->ioaddr + SD_FIFO_PARAM);
+			}
+			clk_enable(pxa->clk);
+			pxa->clk_enable = 1;
+		}
+	}
+}
+
+static int set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
+{
+	u16 ctrl_2;
+
+	/*
+	 * Set V18_EN -- UHS modes do not work without this.
+	 * does not change signaling voltage
+	 */
+	ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+
+	/* Select Bus Speed Mode for host */
+	ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
+	switch (uhs) {
+	case MMC_TIMING_UHS_SDR12:
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
+		break;
+	case MMC_TIMING_UHS_SDR25:
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
+		break;
+	case MMC_TIMING_UHS_SDR50:
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR50 | SDHCI_CTRL_VDD_180;
+		break;
+	case MMC_TIMING_UHS_SDR104:
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR104 | SDHCI_CTRL_VDD_180;
+		break;
+	case MMC_TIMING_UHS_DDR50:
+		ctrl_2 |= SDHCI_CTRL_UHS_DDR50 | SDHCI_CTRL_VDD_180;
+		break;
+	}
+
+	sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
+	pr_debug("%s:%s uhs = %d, ctrl_2 = %04X\n",
+		__func__, mmc_hostname(host->mmc), uhs, ctrl_2);
+
+	return 0;
+}
+
+static struct sdhci_ops sdhci_pxa_ops = {
+	.set_uhs_signaling = set_uhs_signaling,
+	.set_clock = set_clock,
+};
+
+/*****************************************************************************\
+ *                                                                           *
+ * Device probing/removal                                                    *
+ *                                                                           *
+\*****************************************************************************/
+
+static int __devinit sdhci_pxa_probe(struct platform_device *pdev)
+{
+	struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data;
+	struct device *dev = &pdev->dev;
+	struct sdhci_host *host = NULL;
+	struct resource *iomem = NULL;
+	struct sdhci_pxa *pxa = NULL;
+	int ret, irq;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "no irq specified\n");
+		return irq;
+	}
+
+	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!iomem) {
+		dev_err(dev, "no memory specified\n");
+		return -ENOENT;
+	}
+
+	host = sdhci_alloc_host(&pdev->dev, sizeof(struct sdhci_pxa));
+	if (IS_ERR(host)) {
+		dev_err(dev, "failed to alloc host\n");
+		return PTR_ERR(host);
+	}
+
+	pxa = sdhci_priv(host);
+	pxa->host = host;
+	pxa->pdata = pdata;
+	pxa->clk_enable = 0;
+
+	pxa->clk = clk_get(dev, "PXA-SDHCLK");
+	if (IS_ERR(pxa->clk)) {
+		dev_err(dev, "failed to get io clock\n");
+		ret = PTR_ERR(pxa->clk);
+		goto out;
+	}
+
+	pxa->res = request_mem_region(iomem->start, resource_size(iomem),
+				      mmc_hostname(host->mmc));
+	if (!pxa->res) {
+		dev_err(&pdev->dev, "cannot request region\n");
+		ret = -EBUSY;
+		goto out;
+	}
+
+	host->ioaddr = ioremap(iomem->start, resource_size(iomem));
+	if (!host->ioaddr) {
+		dev_err(&pdev->dev, "failed to remap registers\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	host->hw_name = "MMC";
+	host->ops = &sdhci_pxa_ops;
+	host->irq = irq;
+	host->quirks = SDHCI_QUIRK_BROKEN_ADMA
+		| SDHCI_QUIRK_BROKEN_TIMEOUT_VAL
+		| SDHCI_QUIRK_32BIT_DMA_ADDR
+		| SDHCI_QUIRK_32BIT_DMA_SIZE
+		| SDHCI_QUIRK_32BIT_ADMA_SIZE
+		| SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC;
+
+	if (pdata->quirks)
+		host->quirks |= pdata->quirks;
+
+	/* enable 1/8V DDR capable */
+	host->mmc->caps |= MMC_CAP_1_8V_DDR;
+
+	/* If slot design supports 8 bit data, indicate this to MMC. */
+	if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT)
+		host->mmc->caps |= MMC_CAP_8_BIT_DATA;
+
+	ret = sdhci_add_host(host);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to add host\n");
+		goto out;
+	}
+
+	if (pxa->pdata->max_speed)
+		host->mmc->f_max = pxa->pdata->max_speed;
+
+	platform_set_drvdata(pdev, host);
+
+	return 0;
+out:
+	if (host) {
+		clk_put(pxa->clk);
+		if (host->ioaddr)
+			iounmap(host->ioaddr);
+		if (pxa->res)
+			release_mem_region(pxa->res->start,
+					   resource_size(pxa->res));
+		sdhci_free_host(host);
+	}
+
+	return ret;
+}
+
+static int __devexit sdhci_pxa_remove(struct platform_device *pdev)
+{
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+	struct sdhci_pxa *pxa = sdhci_priv(host);
+	int dead = 0;
+	u32 scratch;
+
+	if (host) {
+		scratch = readl(host->ioaddr + SDHCI_INT_STATUS);
+		if (scratch == (u32)-1)
+			dead = 1;
+
+		sdhci_remove_host(host, dead);
+
+		if (host->ioaddr)
+			iounmap(host->ioaddr);
+		if (pxa->res)
+			release_mem_region(pxa->res->start,
+					   resource_size(pxa->res));
+		if (pxa->clk_enable) {
+			clk_disable(pxa->clk);
+			pxa->clk_enable = 0;
+		}
+		clk_put(pxa->clk);
+
+		sdhci_free_host(host);
+		platform_set_drvdata(pdev, NULL);
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int sdhci_pxa_suspend(struct platform_device *dev, pm_message_t state)
+{
+	struct sdhci_host *host = platform_get_drvdata(dev);
+
+	return sdhci_suspend_host(host, state);
+}
+
+static int sdhci_pxa_resume(struct platform_device *dev)
+{
+	struct sdhci_host *host = platform_get_drvdata(dev);
+
+	return sdhci_resume_host(host);
+}
+#else
+#define sdhci_pxa_suspend	NULL
+#define sdhci_pxa_resume	NULL
+#endif
+
+static struct platform_driver sdhci_pxa_driver = {
+	.probe		= sdhci_pxa_probe,
+	.remove		= __devexit_p(sdhci_pxa_remove),
+	.suspend	= sdhci_pxa_suspend,
+	.resume		= sdhci_pxa_resume,
+	.driver		= {
+		.name	= DRIVER_NAME,
+		.owner	= THIS_MODULE,
+	},
+};
+
+/*****************************************************************************\
+ *                                                                           *
+ * Driver init/exit                                                          *
+ *                                                                           *
+\*****************************************************************************/
+
+static int __init sdhci_pxa_init(void)
+{
+	return platform_driver_register(&sdhci_pxa_driver);
+}
+
+static void __exit sdhci_pxa_exit(void)
+{
+	platform_driver_unregister(&sdhci_pxa_driver);
+}
+
+module_init(sdhci_pxa_init);
+module_exit(sdhci_pxa_exit);
+
+MODULE_DESCRIPTION("SDH controller driver for PXA168/PXA910/MMP2");
+MODULE_AUTHOR("Zhangfei Gao <zhangfei.gao@marvell.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/trunk/drivers/mmc/host/sdhci-pxav2.c b/trunk/drivers/mmc/host/sdhci-pxav2.c
deleted file mode 100644
index 38f58994f79a..000000000000
--- a/trunk/drivers/mmc/host/sdhci-pxav2.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Copyright (C) 2010 Marvell International Ltd.
- *		Zhangfei Gao <zhangfei.gao@marvell.com>
- *		Kevin Wang <dwang4@marvell.com>
- *		Jun Nie <njun@marvell.com>
- *		Qiming Wu <wuqm@marvell.com>
- *		Philip Rakity <prakity@marvell.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-#include <linux/gpio.h>
-#include <linux/mmc/card.h>
-#include <linux/mmc/host.h>
-#include <linux/platform_data/pxa_sdhci.h>
-#include <linux/slab.h>
-#include "sdhci.h"
-#include "sdhci-pltfm.h"
-
-#define SD_FIFO_PARAM		0xe0
-#define DIS_PAD_SD_CLK_GATE	0x0400 /* Turn on/off Dynamic SD Clock Gating */
-#define CLK_GATE_ON		0x0200 /* Disable/enable Clock Gate */
-#define CLK_GATE_CTL		0x0100 /* Clock Gate Control */
-#define CLK_GATE_SETTING_BITS	(DIS_PAD_SD_CLK_GATE | \
-		CLK_GATE_ON | CLK_GATE_CTL)
-
-#define SD_CLOCK_BURST_SIZE_SETUP	0xe6
-#define SDCLK_SEL_SHIFT		8
-#define SDCLK_SEL_MASK		0x3
-#define SDCLK_DELAY_SHIFT	10
-#define SDCLK_DELAY_MASK	0x3c
-
-#define SD_CE_ATA_2		0xea
-#define MMC_CARD		0x1000
-#define MMC_WIDTH		0x0100
-
-static void pxav2_set_private_registers(struct sdhci_host *host, u8 mask)
-{
-	struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
-	struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data;
-
-	if (mask == SDHCI_RESET_ALL) {
-		u16 tmp = 0;
-
-		/*
-		 * tune timing of read data/command when crc error happen
-		 * no performance impact
-		 */
-		if (pdata->clk_delay_sel == 1) {
-			tmp = readw(host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP);
-
-			tmp &= ~(SDCLK_DELAY_MASK << SDCLK_DELAY_SHIFT);
-			tmp |= (pdata->clk_delay_cycles & SDCLK_DELAY_MASK)
-				<< SDCLK_DELAY_SHIFT;
-			tmp &= ~(SDCLK_SEL_MASK << SDCLK_SEL_SHIFT);
-			tmp |= (1 & SDCLK_SEL_MASK) << SDCLK_SEL_SHIFT;
-
-			writew(tmp, host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP);
-		}
-
-		if (pdata->flags & PXA_FLAG_ENABLE_CLOCK_GATING) {
-			tmp = readw(host->ioaddr + SD_FIFO_PARAM);
-			tmp &= ~CLK_GATE_SETTING_BITS;
-			writew(tmp, host->ioaddr + SD_FIFO_PARAM);
-		} else {
-			tmp = readw(host->ioaddr + SD_FIFO_PARAM);
-			tmp &= ~CLK_GATE_SETTING_BITS;
-			tmp |= CLK_GATE_SETTING_BITS;
-			writew(tmp, host->ioaddr + SD_FIFO_PARAM);
-		}
-	}
-}
-
-static int pxav2_mmc_set_width(struct sdhci_host *host, int width)
-{
-	u8 ctrl;
-	u16 tmp;
-
-	ctrl = readb(host->ioaddr + SDHCI_HOST_CONTROL);
-	tmp = readw(host->ioaddr + SD_CE_ATA_2);
-	if (width == MMC_BUS_WIDTH_8) {
-		ctrl &= ~SDHCI_CTRL_4BITBUS;
-		tmp |= MMC_CARD | MMC_WIDTH;
-	} else {
-		tmp &= ~(MMC_CARD | MMC_WIDTH);
-		if (width == MMC_BUS_WIDTH_4)
-			ctrl |= SDHCI_CTRL_4BITBUS;
-		else
-			ctrl &= ~SDHCI_CTRL_4BITBUS;
-	}
-	writew(tmp, host->ioaddr + SD_CE_ATA_2);
-	writeb(ctrl, host->ioaddr + SDHCI_HOST_CONTROL);
-
-	return 0;
-}
-
-static u32 pxav2_get_max_clock(struct sdhci_host *host)
-{
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-
-	return clk_get_rate(pltfm_host->clk);
-}
-
-static struct sdhci_ops pxav2_sdhci_ops = {
-	.get_max_clock = pxav2_get_max_clock,
-	.platform_reset_exit = pxav2_set_private_registers,
-	.platform_8bit_width = pxav2_mmc_set_width,
-};
-
-static int __devinit sdhci_pxav2_probe(struct platform_device *pdev)
-{
-	struct sdhci_pltfm_host *pltfm_host;
-	struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data;
-	struct device *dev = &pdev->dev;
-	struct sdhci_host *host = NULL;
-	struct sdhci_pxa *pxa = NULL;
-	int ret;
-	struct clk *clk;
-
-	pxa = kzalloc(sizeof(struct sdhci_pxa), GFP_KERNEL);
-	if (!pxa)
-		return -ENOMEM;
-
-	host = sdhci_pltfm_init(pdev, NULL);
-	if (IS_ERR(host)) {
-		kfree(pxa);
-		return PTR_ERR(host);
-	}
-	pltfm_host = sdhci_priv(host);
-	pltfm_host->priv = pxa;
-
-	clk = clk_get(dev, "PXA-SDHCLK");
-	if (IS_ERR(clk)) {
-		dev_err(dev, "failed to get io clock\n");
-		ret = PTR_ERR(clk);
-		goto err_clk_get;
-	}
-	pltfm_host->clk = clk;
-	clk_enable(clk);
-
-	host->quirks = SDHCI_QUIRK_BROKEN_ADMA
-		| SDHCI_QUIRK_BROKEN_TIMEOUT_VAL
-		| SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN;
-
-	if (pdata) {
-		if (pdata->flags & PXA_FLAG_CARD_PERMANENT) {
-			/* on-chip device */
-			host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION;
-			host->mmc->caps |= MMC_CAP_NONREMOVABLE;
-		}
-
-		/* If slot design supports 8 bit data, indicate this to MMC. */
-		if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT)
-			host->mmc->caps |= MMC_CAP_8_BIT_DATA;
-
-		if (pdata->quirks)
-			host->quirks |= pdata->quirks;
-		if (pdata->host_caps)
-			host->mmc->caps |= pdata->host_caps;
-		if (pdata->pm_caps)
-			host->mmc->pm_caps |= pdata->pm_caps;
-	}
-
-	host->ops = &pxav2_sdhci_ops;
-
-	ret = sdhci_add_host(host);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to add host\n");
-		goto err_add_host;
-	}
-
-	platform_set_drvdata(pdev, host);
-
-	return 0;
-
-err_add_host:
-	clk_disable(clk);
-	clk_put(clk);
-err_clk_get:
-	sdhci_pltfm_free(pdev);
-	kfree(pxa);
-	return ret;
-}
-
-static int __devexit sdhci_pxav2_remove(struct platform_device *pdev)
-{
-	struct sdhci_host *host = platform_get_drvdata(pdev);
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_pxa *pxa = pltfm_host->priv;
-
-	sdhci_remove_host(host, 1);
-
-	clk_disable(pltfm_host->clk);
-	clk_put(pltfm_host->clk);
-	sdhci_pltfm_free(pdev);
-	kfree(pxa);
-
-	platform_set_drvdata(pdev, NULL);
-
-	return 0;
-}
-
-static struct platform_driver sdhci_pxav2_driver = {
-	.driver		= {
-		.name	= "sdhci-pxav2",
-		.owner	= THIS_MODULE,
-	},
-	.probe		= sdhci_pxav2_probe,
-	.remove		= __devexit_p(sdhci_pxav2_remove),
-#ifdef CONFIG_PM
-	.suspend	= sdhci_pltfm_suspend,
-	.resume		= sdhci_pltfm_resume,
-#endif
-};
-static int __init sdhci_pxav2_init(void)
-{
-	return platform_driver_register(&sdhci_pxav2_driver);
-}
-
-static void __exit sdhci_pxav2_exit(void)
-{
-	platform_driver_unregister(&sdhci_pxav2_driver);
-}
-
-module_init(sdhci_pxav2_init);
-module_exit(sdhci_pxav2_exit);
-
-MODULE_DESCRIPTION("SDHCI driver for pxav2");
-MODULE_AUTHOR("Marvell International Ltd.");
-MODULE_LICENSE("GPL v2");
-
diff --git a/trunk/drivers/mmc/host/sdhci-pxav3.c b/trunk/drivers/mmc/host/sdhci-pxav3.c
deleted file mode 100644
index 4198dbbc5c20..000000000000
--- a/trunk/drivers/mmc/host/sdhci-pxav3.c
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * Copyright (C) 2010 Marvell International Ltd.
- *		Zhangfei Gao <zhangfei.gao@marvell.com>
- *		Kevin Wang <dwang4@marvell.com>
- *		Mingwei Wang <mwwang@marvell.com>
- *		Philip Rakity <prakity@marvell.com>
- *		Mark Brown <markb@marvell.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-#include <linux/gpio.h>
-#include <linux/mmc/card.h>
-#include <linux/mmc/host.h>
-#include <linux/platform_data/pxa_sdhci.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include "sdhci.h"
-#include "sdhci-pltfm.h"
-
-#define SD_CLOCK_BURST_SIZE_SETUP		0x10A
-#define SDCLK_SEL	0x100
-#define SDCLK_DELAY_SHIFT	9
-#define SDCLK_DELAY_MASK	0x1f
-
-#define SD_CFG_FIFO_PARAM       0x100
-#define SDCFG_GEN_PAD_CLK_ON	(1<<6)
-#define SDCFG_GEN_PAD_CLK_CNT_MASK	0xFF
-#define SDCFG_GEN_PAD_CLK_CNT_SHIFT	24
-
-#define SD_SPI_MODE          0x108
-#define SD_CE_ATA_1          0x10C
-
-#define SD_CE_ATA_2          0x10E
-#define SDCE_MISC_INT		(1<<2)
-#define SDCE_MISC_INT_EN	(1<<1)
-
-static void pxav3_set_private_registers(struct sdhci_host *host, u8 mask)
-{
-	struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
-	struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data;
-
-	if (mask == SDHCI_RESET_ALL) {
-		/*
-		 * tune timing of read data/command when crc error happen
-		 * no performance impact
-		 */
-		if (pdata && 0 != pdata->clk_delay_cycles) {
-			u16 tmp;
-
-			tmp = readw(host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP);
-			tmp |= (pdata->clk_delay_cycles & SDCLK_DELAY_MASK)
-				<< SDCLK_DELAY_SHIFT;
-			tmp |= SDCLK_SEL;
-			writew(tmp, host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP);
-		}
-	}
-}
-
-#define MAX_WAIT_COUNT 5
-static void pxav3_gen_init_74_clocks(struct sdhci_host *host, u8 power_mode)
-{
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_pxa *pxa = pltfm_host->priv;
-	u16 tmp;
-	int count;
-
-	if (pxa->power_mode == MMC_POWER_UP
-			&& power_mode == MMC_POWER_ON) {
-
-		dev_dbg(mmc_dev(host->mmc),
-				"%s: slot->power_mode = %d,"
-				"ios->power_mode = %d\n",
-				__func__,
-				pxa->power_mode,
-				power_mode);
-
-		/* set we want notice of when 74 clocks are sent */
-		tmp = readw(host->ioaddr + SD_CE_ATA_2);
-		tmp |= SDCE_MISC_INT_EN;
-		writew(tmp, host->ioaddr + SD_CE_ATA_2);
-
-		/* start sending the 74 clocks */
-		tmp = readw(host->ioaddr + SD_CFG_FIFO_PARAM);
-		tmp |= SDCFG_GEN_PAD_CLK_ON;
-		writew(tmp, host->ioaddr + SD_CFG_FIFO_PARAM);
-
-		/* slowest speed is about 100KHz or 10usec per clock */
-		udelay(740);
-		count = 0;
-
-		while (count++ < MAX_WAIT_COUNT) {
-			if ((readw(host->ioaddr + SD_CE_ATA_2)
-						& SDCE_MISC_INT) == 0)
-				break;
-			udelay(10);
-		}
-
-		if (count == MAX_WAIT_COUNT)
-			dev_warn(mmc_dev(host->mmc), "74 clock interrupt not cleared\n");
-
-		/* clear the interrupt bit if posted */
-		tmp = readw(host->ioaddr + SD_CE_ATA_2);
-		tmp |= SDCE_MISC_INT;
-		writew(tmp, host->ioaddr + SD_CE_ATA_2);
-	}
-	pxa->power_mode = power_mode;
-}
-
-static int pxav3_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
-{
-	u16 ctrl_2;
-
-	/*
-	 * Set V18_EN -- UHS modes do not work without this.
-	 * does not change signaling voltage
-	 */
-	ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-
-	/* Select Bus Speed Mode for host */
-	ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
-	switch (uhs) {
-	case MMC_TIMING_UHS_SDR12:
-		ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
-		break;
-	case MMC_TIMING_UHS_SDR25:
-		ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
-		break;
-	case MMC_TIMING_UHS_SDR50:
-		ctrl_2 |= SDHCI_CTRL_UHS_SDR50 | SDHCI_CTRL_VDD_180;
-		break;
-	case MMC_TIMING_UHS_SDR104:
-		ctrl_2 |= SDHCI_CTRL_UHS_SDR104 | SDHCI_CTRL_VDD_180;
-		break;
-	case MMC_TIMING_UHS_DDR50:
-		ctrl_2 |= SDHCI_CTRL_UHS_DDR50 | SDHCI_CTRL_VDD_180;
-		break;
-	}
-
-	sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
-	dev_dbg(mmc_dev(host->mmc),
-		"%s uhs = %d, ctrl_2 = %04X\n",
-		__func__, uhs, ctrl_2);
-
-	return 0;
-}
-
-static struct sdhci_ops pxav3_sdhci_ops = {
-	.platform_reset_exit = pxav3_set_private_registers,
-	.set_uhs_signaling = pxav3_set_uhs_signaling,
-	.platform_send_init_74_clocks = pxav3_gen_init_74_clocks,
-};
-
-static int __devinit sdhci_pxav3_probe(struct platform_device *pdev)
-{
-	struct sdhci_pltfm_host *pltfm_host;
-	struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data;
-	struct device *dev = &pdev->dev;
-	struct sdhci_host *host = NULL;
-	struct sdhci_pxa *pxa = NULL;
-	int ret;
-	struct clk *clk;
-
-	pxa = kzalloc(sizeof(struct sdhci_pxa), GFP_KERNEL);
-	if (!pxa)
-		return -ENOMEM;
-
-	host = sdhci_pltfm_init(pdev, NULL);
-	if (IS_ERR(host)) {
-		kfree(pxa);
-		return PTR_ERR(host);
-	}
-	pltfm_host = sdhci_priv(host);
-	pltfm_host->priv = pxa;
-
-	clk = clk_get(dev, "PXA-SDHCLK");
-	if (IS_ERR(clk)) {
-		dev_err(dev, "failed to get io clock\n");
-		ret = PTR_ERR(clk);
-		goto err_clk_get;
-	}
-	pltfm_host->clk = clk;
-	clk_enable(clk);
-
-	host->quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL
-		| SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC;
-
-	/* enable 1/8V DDR capable */
-	host->mmc->caps |= MMC_CAP_1_8V_DDR;
-
-	if (pdata) {
-		if (pdata->flags & PXA_FLAG_CARD_PERMANENT) {
-			/* on-chip device */
-			host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION;
-			host->mmc->caps |= MMC_CAP_NONREMOVABLE;
-		}
-
-		/* If slot design supports 8 bit data, indicate this to MMC. */
-		if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT)
-			host->mmc->caps |= MMC_CAP_8_BIT_DATA;
-
-		if (pdata->quirks)
-			host->quirks |= pdata->quirks;
-		if (pdata->host_caps)
-			host->mmc->caps |= pdata->host_caps;
-		if (pdata->pm_caps)
-			host->mmc->pm_caps |= pdata->pm_caps;
-	}
-
-	host->ops = &pxav3_sdhci_ops;
-
-	ret = sdhci_add_host(host);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to add host\n");
-		goto err_add_host;
-	}
-
-	platform_set_drvdata(pdev, host);
-
-	return 0;
-
-err_add_host:
-	clk_disable(clk);
-	clk_put(clk);
-err_clk_get:
-	sdhci_pltfm_free(pdev);
-	kfree(pxa);
-	return ret;
-}
-
-static int __devexit sdhci_pxav3_remove(struct platform_device *pdev)
-{
-	struct sdhci_host *host = platform_get_drvdata(pdev);
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_pxa *pxa = pltfm_host->priv;
-
-	sdhci_remove_host(host, 1);
-
-	clk_disable(pltfm_host->clk);
-	clk_put(pltfm_host->clk);
-	sdhci_pltfm_free(pdev);
-	kfree(pxa);
-
-	platform_set_drvdata(pdev, NULL);
-
-	return 0;
-}
-
-static struct platform_driver sdhci_pxav3_driver = {
-	.driver		= {
-		.name	= "sdhci-pxav3",
-		.owner	= THIS_MODULE,
-	},
-	.probe		= sdhci_pxav3_probe,
-	.remove		= __devexit_p(sdhci_pxav3_remove),
-#ifdef CONFIG_PM
-	.suspend	= sdhci_pltfm_suspend,
-	.resume		= sdhci_pltfm_resume,
-#endif
-};
-static int __init sdhci_pxav3_init(void)
-{
-	return platform_driver_register(&sdhci_pxav3_driver);
-}
-
-static void __exit sdhci_pxav3_exit(void)
-{
-	platform_driver_unregister(&sdhci_pxav3_driver);
-}
-
-module_init(sdhci_pxav3_init);
-module_exit(sdhci_pxav3_exit);
-
-MODULE_DESCRIPTION("SDHCI driver for pxav3");
-MODULE_AUTHOR("Marvell International Ltd.");
-MODULE_LICENSE("GPL v2");
-
diff --git a/trunk/drivers/mmc/host/sdhci-s3c.c b/trunk/drivers/mmc/host/sdhci-s3c.c
index 460ffaf0f6d7..69e3ee321eb5 100644
--- a/trunk/drivers/mmc/host/sdhci-s3c.c
+++ b/trunk/drivers/mmc/host/sdhci-s3c.c
@@ -612,14 +612,16 @@ static int sdhci_s3c_suspend(struct platform_device *dev, pm_message_t pm)
 {
 	struct sdhci_host *host = platform_get_drvdata(dev);
 
-	return sdhci_suspend_host(host, pm);
+	sdhci_suspend_host(host, pm);
+	return 0;
 }
 
 static int sdhci_s3c_resume(struct platform_device *dev)
 {
 	struct sdhci_host *host = platform_get_drvdata(dev);
 
-	return sdhci_resume_host(host);
+	sdhci_resume_host(host);
+	return 0;
 }
 
 #else
diff --git a/trunk/drivers/mmc/host/sdhci-tegra.c b/trunk/drivers/mmc/host/sdhci-tegra.c
index 18b0bd31de78..343c97edba32 100644
--- a/trunk/drivers/mmc/host/sdhci-tegra.c
+++ b/trunk/drivers/mmc/host/sdhci-tegra.c
@@ -24,6 +24,7 @@
 #include <mach/gpio.h>
 #include <mach/sdhci.h>
 
+#include "sdhci.h"
 #include "sdhci-pltfm.h"
 
 static u32 tegra_sdhci_readl(struct sdhci_host *host, int reg)
@@ -115,42 +116,20 @@ static int tegra_sdhci_8bit(struct sdhci_host *host, int bus_width)
 	return 0;
 }
 
-static struct sdhci_ops tegra_sdhci_ops = {
-	.get_ro     = tegra_sdhci_get_ro,
-	.read_l     = tegra_sdhci_readl,
-	.read_w     = tegra_sdhci_readw,
-	.write_l    = tegra_sdhci_writel,
-	.platform_8bit_width = tegra_sdhci_8bit,
-};
-
-static struct sdhci_pltfm_data sdhci_tegra_pdata = {
-	.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
-		  SDHCI_QUIRK_SINGLE_POWER_WRITE |
-		  SDHCI_QUIRK_NO_HISPD_BIT |
-		  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC,
-	.ops  = &tegra_sdhci_ops,
-};
 
-static int __devinit sdhci_tegra_probe(struct platform_device *pdev)
+static int tegra_sdhci_pltfm_init(struct sdhci_host *host,
+				  struct sdhci_pltfm_data *pdata)
 {
-	struct sdhci_pltfm_host *pltfm_host;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
 	struct tegra_sdhci_platform_data *plat;
-	struct sdhci_host *host;
 	struct clk *clk;
 	int rc;
 
-	host = sdhci_pltfm_init(pdev, &sdhci_tegra_pdata);
-	if (IS_ERR(host))
-		return PTR_ERR(host);
-
-	pltfm_host = sdhci_priv(host);
-
 	plat = pdev->dev.platform_data;
-
 	if (plat == NULL) {
 		dev_err(mmc_dev(host->mmc), "missing platform data\n");
-		rc = -ENXIO;
-		goto err_no_plat;
+		return -ENXIO;
 	}
 
 	if (gpio_is_valid(plat->power_gpio)) {
@@ -158,7 +137,7 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev)
 		if (rc) {
 			dev_err(mmc_dev(host->mmc),
 				"failed to allocate power gpio\n");
-			goto err_power_req;
+			goto out;
 		}
 		tegra_gpio_enable(plat->power_gpio);
 		gpio_direction_output(plat->power_gpio, 1);
@@ -169,7 +148,7 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev)
 		if (rc) {
 			dev_err(mmc_dev(host->mmc),
 				"failed to allocate cd gpio\n");
-			goto err_cd_req;
+			goto out_power;
 		}
 		tegra_gpio_enable(plat->cd_gpio);
 		gpio_direction_input(plat->cd_gpio);
@@ -180,7 +159,7 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev)
 
 		if (rc)	{
 			dev_err(mmc_dev(host->mmc), "request irq error\n");
-			goto err_cd_irq_req;
+			goto out_cd;
 		}
 
 	}
@@ -190,7 +169,7 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev)
 		if (rc) {
 			dev_err(mmc_dev(host->mmc),
 				"failed to allocate wp gpio\n");
-			goto err_wp_req;
+			goto out_irq;
 		}
 		tegra_gpio_enable(plat->wp_gpio);
 		gpio_direction_input(plat->wp_gpio);
@@ -200,7 +179,7 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev)
 	if (IS_ERR(clk)) {
 		dev_err(mmc_dev(host->mmc), "clk err\n");
 		rc = PTR_ERR(clk);
-		goto err_clk_get;
+		goto out_wp;
 	}
 	clk_enable(clk);
 	pltfm_host->clk = clk;
@@ -210,47 +189,38 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev)
 	if (plat->is_8bit)
 		host->mmc->caps |= MMC_CAP_8_BIT_DATA;
 
-	rc = sdhci_add_host(host);
-	if (rc)
-		goto err_add_host;
-
 	return 0;
 
-err_add_host:
-	clk_disable(pltfm_host->clk);
-	clk_put(pltfm_host->clk);
-err_clk_get:
+out_wp:
 	if (gpio_is_valid(plat->wp_gpio)) {
 		tegra_gpio_disable(plat->wp_gpio);
 		gpio_free(plat->wp_gpio);
 	}
-err_wp_req:
+
+out_irq:
 	if (gpio_is_valid(plat->cd_gpio))
 		free_irq(gpio_to_irq(plat->cd_gpio), host);
-err_cd_irq_req:
+out_cd:
 	if (gpio_is_valid(plat->cd_gpio)) {
 		tegra_gpio_disable(plat->cd_gpio);
 		gpio_free(plat->cd_gpio);
 	}
-err_cd_req:
+
+out_power:
 	if (gpio_is_valid(plat->power_gpio)) {
 		tegra_gpio_disable(plat->power_gpio);
 		gpio_free(plat->power_gpio);
 	}
-err_power_req:
-err_no_plat:
-	sdhci_pltfm_free(pdev);
+
+out:
 	return rc;
 }
 
-static int __devexit sdhci_tegra_remove(struct platform_device *pdev)
+static void tegra_sdhci_pltfm_exit(struct sdhci_host *host)
 {
-	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
 	struct tegra_sdhci_platform_data *plat;
-	int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
-
-	sdhci_remove_host(host, dead);
 
 	plat = pdev->dev.platform_data;
 
@@ -272,37 +242,22 @@ static int __devexit sdhci_tegra_remove(struct platform_device *pdev)
 
 	clk_disable(pltfm_host->clk);
 	clk_put(pltfm_host->clk);
-
-	sdhci_pltfm_free(pdev);
-
-	return 0;
 }
 
-static struct platform_driver sdhci_tegra_driver = {
-	.driver		= {
-		.name	= "sdhci-tegra",
-		.owner	= THIS_MODULE,
-	},
-	.probe		= sdhci_tegra_probe,
-	.remove		= __devexit_p(sdhci_tegra_remove),
-#ifdef CONFIG_PM
-	.suspend	= sdhci_pltfm_suspend,
-	.resume		= sdhci_pltfm_resume,
-#endif
+static struct sdhci_ops tegra_sdhci_ops = {
+	.get_ro     = tegra_sdhci_get_ro,
+	.read_l     = tegra_sdhci_readl,
+	.read_w     = tegra_sdhci_readw,
+	.write_l    = tegra_sdhci_writel,
+	.platform_8bit_width = tegra_sdhci_8bit,
 };
 
-static int __init sdhci_tegra_init(void)
-{
-	return platform_driver_register(&sdhci_tegra_driver);
-}
-module_init(sdhci_tegra_init);
-
-static void __exit sdhci_tegra_exit(void)
-{
-	platform_driver_unregister(&sdhci_tegra_driver);
-}
-module_exit(sdhci_tegra_exit);
-
-MODULE_DESCRIPTION("SDHCI driver for Tegra");
-MODULE_AUTHOR(" Google, Inc.");
-MODULE_LICENSE("GPL v2");
+struct sdhci_pltfm_data sdhci_tegra_pdata = {
+	.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
+		  SDHCI_QUIRK_SINGLE_POWER_WRITE |
+		  SDHCI_QUIRK_NO_HISPD_BIT |
+		  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC,
+	.ops  = &tegra_sdhci_ops,
+	.init = tegra_sdhci_pltfm_init,
+	.exit = tegra_sdhci_pltfm_exit,
+};
diff --git a/trunk/drivers/mmc/host/sdhci.c b/trunk/drivers/mmc/host/sdhci.c
index c31a3343340d..58d5436ff649 100644
--- a/trunk/drivers/mmc/host/sdhci.c
+++ b/trunk/drivers/mmc/host/sdhci.c
@@ -127,15 +127,11 @@ static void sdhci_mask_irqs(struct sdhci_host *host, u32 irqs)
 
 static void sdhci_set_card_detection(struct sdhci_host *host, bool enable)
 {
-	u32 present, irqs;
+	u32 irqs = SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT;
 
 	if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION)
 		return;
 
-	present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
-			      SDHCI_CARD_PRESENT;
-	irqs = present ? SDHCI_INT_CARD_REMOVE : SDHCI_INT_CARD_INSERT;
-
 	if (enable)
 		sdhci_unmask_irqs(host, irqs);
 	else
@@ -2158,30 +2154,13 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
 		mmc_hostname(host->mmc), intmask);
 
 	if (intmask & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) {
-		u32 present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
-			      SDHCI_CARD_PRESENT;
-
-		/*
-		 * There is a observation on i.mx esdhc.  INSERT bit will be
-		 * immediately set again when it gets cleared, if a card is
-		 * inserted.  We have to mask the irq to prevent interrupt
-		 * storm which will freeze the system.  And the REMOVE gets
-		 * the same situation.
-		 *
-		 * More testing are needed here to ensure it works for other
-		 * platforms though.
-		 */
-		sdhci_mask_irqs(host, present ? SDHCI_INT_CARD_INSERT :
-						SDHCI_INT_CARD_REMOVE);
-		sdhci_unmask_irqs(host, present ? SDHCI_INT_CARD_REMOVE :
-						  SDHCI_INT_CARD_INSERT);
-
 		sdhci_writel(host, intmask & (SDHCI_INT_CARD_INSERT |
-			     SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS);
-		intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE);
+			SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS);
 		tasklet_schedule(&host->card_tasklet);
 	}
 
+	intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE);
+
 	if (intmask & SDHCI_INT_CMD_MASK) {
 		sdhci_writel(host, intmask & SDHCI_INT_CMD_MASK,
 			SDHCI_INT_STATUS);
@@ -2509,11 +2488,6 @@ int sdhci_add_host(struct sdhci_host *host)
 	} else
 		mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200;
 
-	if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK)
-		mmc->max_discard_to = (1 << 27) / (mmc->f_max / 1000);
-	else
-		mmc->max_discard_to = (1 << 27) / host->timeout_clk;
-
 	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23;
 
 	if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12)
diff --git a/trunk/drivers/mmc/host/sh_mmcif.c b/trunk/drivers/mmc/host/sh_mmcif.c
index 557886bee9ce..14f8edbaa195 100644
--- a/trunk/drivers/mmc/host/sh_mmcif.c
+++ b/trunk/drivers/mmc/host/sh_mmcif.c
@@ -175,7 +175,6 @@ struct sh_mmcif_host {
 	enum mmcif_state state;
 	spinlock_t lock;
 	bool power;
-	bool card_present;
 
 	/* DMA support */
 	struct dma_chan		*chan_rx;
@@ -878,23 +877,23 @@ static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	spin_unlock_irqrestore(&host->lock, flags);
 
 	if (ios->power_mode == MMC_POWER_UP) {
-		if (!host->card_present) {
+		if (p->set_pwr)
+			p->set_pwr(host->pd, ios->power_mode);
+		if (!host->power) {
 			/* See if we also get DMA */
 			sh_mmcif_request_dma(host, host->pd->dev.platform_data);
-			host->card_present = true;
+			pm_runtime_get_sync(&host->pd->dev);
+			host->power = true;
 		}
 	} else if (ios->power_mode == MMC_POWER_OFF || !ios->clock) {
 		/* clock stop */
 		sh_mmcif_clock_control(host, 0);
 		if (ios->power_mode == MMC_POWER_OFF) {
-			if (host->card_present) {
+			if (host->power) {
+				pm_runtime_put(&host->pd->dev);
 				sh_mmcif_release_dma(host);
-				host->card_present = false;
+				host->power = false;
 			}
-		}
-		if (host->power) {
-			pm_runtime_put(&host->pd->dev);
-			host->power = false;
 			if (p->down_pwr)
 				p->down_pwr(host->pd);
 		}
@@ -902,16 +901,8 @@ static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		return;
 	}
 
-	if (ios->clock) {
-		if (!host->power) {
-			if (p->set_pwr)
-				p->set_pwr(host->pd, ios->power_mode);
-			pm_runtime_get_sync(&host->pd->dev);
-			host->power = true;
-			sh_mmcif_sync_reset(host);
-		}
+	if (ios->clock)
 		sh_mmcif_clock_control(host, ios->clock);
-	}
 
 	host->bus_width = ios->bus_width;
 	host->state = STATE_IDLE;
diff --git a/trunk/drivers/mmc/host/sh_mobile_sdhi.c b/trunk/drivers/mmc/host/sh_mobile_sdhi.c
index 774f6439d7ce..ce500f03df85 100644
--- a/trunk/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/trunk/drivers/mmc/host/sh_mobile_sdhi.c
@@ -26,7 +26,6 @@
 #include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mfd/tmio.h>
 #include <linux/sh_dma.h>
-#include <linux/delay.h>
 
 #include "tmio_mmc.h"
 
@@ -56,39 +55,6 @@ static int sh_mobile_sdhi_get_cd(struct platform_device *pdev)
 		return -ENOSYS;
 }
 
-static int sh_mobile_sdhi_wait_idle(struct tmio_mmc_host *host)
-{
-	int timeout = 1000;
-
-	while (--timeout && !(sd_ctrl_read16(host, CTL_STATUS2) & (1 << 13)))
-		udelay(1);
-
-	if (!timeout) {
-		dev_warn(host->pdata->dev, "timeout waiting for SD bus idle\n");
-		return -EBUSY;
-	}
-
-	return 0;
-}
-
-static int sh_mobile_sdhi_write16_hook(struct tmio_mmc_host *host, int addr)
-{
-	switch (addr)
-	{
-	case CTL_SD_CMD:
-	case CTL_STOP_INTERNAL_ACTION:
-	case CTL_XFER_BLK_COUNT:
-	case CTL_SD_CARD_CLK_CTL:
-	case CTL_SD_XFER_LEN:
-	case CTL_SD_MEM_CARD_OPT:
-	case CTL_TRANSACTION_CTL:
-	case CTL_DMA_ENABLE:
-		return sh_mobile_sdhi_wait_idle(host);
-	}
-
-	return 0;
-}
-
 static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev)
 {
 	struct sh_mobile_sdhi *priv;
@@ -120,8 +86,6 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev)
 	mmc_data->hclk = clk_get_rate(priv->clk);
 	mmc_data->set_pwr = sh_mobile_sdhi_set_pwr;
 	mmc_data->get_cd = sh_mobile_sdhi_get_cd;
-	if (mmc_data->flags & TMIO_MMC_HAS_IDLE_WAIT)
-		mmc_data->write16_hook = sh_mobile_sdhi_write16_hook;
 	mmc_data->capabilities = MMC_CAP_MMC_HIGHSPEED;
 	if (p) {
 		mmc_data->flags = p->tmio_flags;
diff --git a/trunk/drivers/mmc/host/tmio_mmc.h b/trunk/drivers/mmc/host/tmio_mmc.h
index 087d88023ba1..8260bc2c34e3 100644
--- a/trunk/drivers/mmc/host/tmio_mmc.h
+++ b/trunk/drivers/mmc/host/tmio_mmc.h
@@ -18,7 +18,6 @@
 
 #include <linux/highmem.h>
 #include <linux/mmc/tmio.h>
-#include <linux/mutex.h>
 #include <linux/pagemap.h>
 #include <linux/spinlock.h>
 
@@ -53,8 +52,6 @@ struct tmio_mmc_host {
 	void (*set_clk_div)(struct platform_device *host, int state);
 
 	int			pm_error;
-	/* recognise system-wide suspend in runtime PM methods */
-	bool			pm_global;
 
 	/* pio related stuff */
 	struct scatterlist      *sg_ptr;
@@ -76,11 +73,8 @@ struct tmio_mmc_host {
 
 	/* Track lost interrupts */
 	struct delayed_work	delayed_reset_work;
-	struct work_struct	done;
-
-	spinlock_t		lock;		/* protect host private data */
+	spinlock_t		lock;
 	unsigned long		last_req_ts;
-	struct mutex		ios_lock;	/* protect set_ios() context */
 };
 
 int tmio_mmc_host_probe(struct tmio_mmc_host **host,
@@ -109,7 +103,6 @@ static inline void tmio_mmc_kunmap_atomic(struct scatterlist *sg,
 
 #if defined(CONFIG_MMC_SDHI) || defined(CONFIG_MMC_SDHI_MODULE)
 void tmio_mmc_start_dma(struct tmio_mmc_host *host, struct mmc_data *data);
-void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable);
 void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdata);
 void tmio_mmc_release_dma(struct tmio_mmc_host *host);
 #else
@@ -118,10 +111,6 @@ static inline void tmio_mmc_start_dma(struct tmio_mmc_host *host,
 {
 }
 
-static inline void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable)
-{
-}
-
 static inline void tmio_mmc_request_dma(struct tmio_mmc_host *host,
 				 struct tmio_mmc_data *pdata)
 {
@@ -145,44 +134,4 @@ int tmio_mmc_host_resume(struct device *dev);
 int tmio_mmc_host_runtime_suspend(struct device *dev);
 int tmio_mmc_host_runtime_resume(struct device *dev);
 
-static inline u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr)
-{
-	return readw(host->ctl + (addr << host->bus_shift));
-}
-
-static inline void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr,
-		u16 *buf, int count)
-{
-	readsw(host->ctl + (addr << host->bus_shift), buf, count);
-}
-
-static inline u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr)
-{
-	return readw(host->ctl + (addr << host->bus_shift)) |
-	       readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16;
-}
-
-static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, u16 val)
-{
-	/* If there is a hook and it returns non-zero then there
-	 * is an error and the write should be skipped
-	 */
-	if (host->pdata->write16_hook && host->pdata->write16_hook(host, addr))
-		return;
-	writew(val, host->ctl + (addr << host->bus_shift));
-}
-
-static inline void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr,
-		u16 *buf, int count)
-{
-	writesw(host->ctl + (addr << host->bus_shift), buf, count);
-}
-
-static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr, u32 val)
-{
-	writew(val, host->ctl + (addr << host->bus_shift));
-	writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
-}
-
-
 #endif
diff --git a/trunk/drivers/mmc/host/tmio_mmc_dma.c b/trunk/drivers/mmc/host/tmio_mmc_dma.c
index 2aa616dec32d..25f1ad6cbe09 100644
--- a/trunk/drivers/mmc/host/tmio_mmc_dma.c
+++ b/trunk/drivers/mmc/host/tmio_mmc_dma.c
@@ -22,14 +22,11 @@
 
 #define TMIO_MMC_MIN_DMA_LEN 8
 
-void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable)
+static void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable)
 {
-	if (!host->chan_tx || !host->chan_rx)
-		return;
-
 #if defined(CONFIG_SUPERH) || defined(CONFIG_ARCH_SHMOBILE)
 	/* Switch DMA mode on or off - SuperH specific? */
-	sd_ctrl_write16(host, CTL_DMA_ENABLE, enable ? 2 : 0);
+	writew(enable ? 2 : 0, host->ctl + (0xd8 << host->bus_shift));
 #endif
 }
 
diff --git a/trunk/drivers/mmc/host/tmio_mmc_pio.c b/trunk/drivers/mmc/host/tmio_mmc_pio.c
index 1f16357e7301..0b09e8239aa0 100644
--- a/trunk/drivers/mmc/host/tmio_mmc_pio.c
+++ b/trunk/drivers/mmc/host/tmio_mmc_pio.c
@@ -46,6 +46,40 @@
 
 #include "tmio_mmc.h"
 
+static u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr)
+{
+	return readw(host->ctl + (addr << host->bus_shift));
+}
+
+static void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr,
+		u16 *buf, int count)
+{
+	readsw(host->ctl + (addr << host->bus_shift), buf, count);
+}
+
+static u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr)
+{
+	return readw(host->ctl + (addr << host->bus_shift)) |
+	       readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16;
+}
+
+static void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, u16 val)
+{
+	writew(val, host->ctl + (addr << host->bus_shift));
+}
+
+static void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr,
+		u16 *buf, int count)
+{
+	writesw(host->ctl + (addr << host->bus_shift), buf, count);
+}
+
+static void sd_ctrl_write32(struct tmio_mmc_host *host, int addr, u32 val)
+{
+	writew(val, host->ctl + (addr << host->bus_shift));
+	writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
+}
+
 void tmio_mmc_enable_mmc_irqs(struct tmio_mmc_host *host, u32 i)
 {
 	u32 mask = sd_ctrl_read32(host, CTL_IRQ_MASK) & ~(i & TMIO_MASK_IRQ);
@@ -250,16 +284,10 @@ static void tmio_mmc_reset_work(struct work_struct *work)
 /* called with host->lock held, interrupts disabled */
 static void tmio_mmc_finish_request(struct tmio_mmc_host *host)
 {
-	struct mmc_request *mrq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&host->lock, flags);
+	struct mmc_request *mrq = host->mrq;
 
-	mrq = host->mrq;
-	if (IS_ERR_OR_NULL(mrq)) {
-		spin_unlock_irqrestore(&host->lock, flags);
+	if (!mrq)
 		return;
-	}
 
 	host->cmd = NULL;
 	host->data = NULL;
@@ -268,18 +296,11 @@ static void tmio_mmc_finish_request(struct tmio_mmc_host *host)
 	cancel_delayed_work(&host->delayed_reset_work);
 
 	host->mrq = NULL;
-	spin_unlock_irqrestore(&host->lock, flags);
 
+	/* FIXME: mmc_request_done() can schedule! */
 	mmc_request_done(host->mmc, mrq);
 }
 
-static void tmio_mmc_done_work(struct work_struct *work)
-{
-	struct tmio_mmc_host *host = container_of(work, struct tmio_mmc_host,
-						  done);
-	tmio_mmc_finish_request(host);
-}
-
 /* These are the bitmasks the tmio chip requires to implement the MMC response
  * types. Note that R1 and R6 are the same in this scheme. */
 #define APP_CMD        0x0040
@@ -446,7 +467,7 @@ void tmio_mmc_do_data_irq(struct tmio_mmc_host *host)
 			BUG();
 	}
 
-	schedule_work(&host->done);
+	tmio_mmc_finish_request(host);
 }
 
 static void tmio_mmc_data_irq(struct tmio_mmc_host *host)
@@ -536,7 +557,7 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
 				tasklet_schedule(&host->dma_issue);
 		}
 	} else {
-		schedule_work(&host->done);
+		tmio_mmc_finish_request(host);
 	}
 
 out:
@@ -546,7 +567,6 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
 irqreturn_t tmio_mmc_irq(int irq, void *devid)
 {
 	struct tmio_mmc_host *host = devid;
-	struct mmc_host *mmc = host->mmc;
 	struct tmio_mmc_data *pdata = host->pdata;
 	unsigned int ireg, irq_mask, status;
 	unsigned int sdio_ireg, sdio_irq_mask, sdio_status;
@@ -568,13 +588,13 @@ irqreturn_t tmio_mmc_irq(int irq, void *devid)
 		if (sdio_ireg && !host->sdio_irq_enabled) {
 			pr_warning("tmio_mmc: Spurious SDIO IRQ, disabling! 0x%04x 0x%04x 0x%04x\n",
 				   sdio_status, sdio_irq_mask, sdio_ireg);
-			tmio_mmc_enable_sdio_irq(mmc, 0);
+			tmio_mmc_enable_sdio_irq(host->mmc, 0);
 			goto out;
 		}
 
-		if (mmc->caps & MMC_CAP_SDIO_IRQ &&
+		if (host->mmc->caps & MMC_CAP_SDIO_IRQ &&
 			sdio_ireg & TMIO_SDIO_STAT_IOIRQ)
-			mmc_signal_sdio_irq(mmc);
+			mmc_signal_sdio_irq(host->mmc);
 
 		if (sdio_ireg)
 			goto out;
@@ -583,49 +603,58 @@ irqreturn_t tmio_mmc_irq(int irq, void *devid)
 	pr_debug_status(status);
 	pr_debug_status(ireg);
 
-	/* Card insert / remove attempts */
-	if (ireg & (TMIO_STAT_CARD_INSERT | TMIO_STAT_CARD_REMOVE)) {
-		tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_CARD_INSERT |
-			TMIO_STAT_CARD_REMOVE);
-		if ((((ireg & TMIO_STAT_CARD_REMOVE) && mmc->card) ||
-		     ((ireg & TMIO_STAT_CARD_INSERT) && !mmc->card)) &&
-		    !work_pending(&mmc->detect.work))
-			mmc_detect_change(host->mmc, msecs_to_jiffies(100));
+	if (!ireg) {
+		tmio_mmc_disable_mmc_irqs(host, status & ~irq_mask);
+
+		pr_warning("tmio_mmc: Spurious irq, disabling! "
+			"0x%08x 0x%08x 0x%08x\n", status, irq_mask, ireg);
+		pr_debug_status(status);
+
 		goto out;
 	}
 
-	/* CRC and other errors */
-/*	if (ireg & TMIO_STAT_ERR_IRQ)
- *		handled |= tmio_error_irq(host, irq, stat);
+	while (ireg) {
+		/* Card insert / remove attempts */
+		if (ireg & (TMIO_STAT_CARD_INSERT | TMIO_STAT_CARD_REMOVE)) {
+			tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_CARD_INSERT |
+				TMIO_STAT_CARD_REMOVE);
+			mmc_detect_change(host->mmc, msecs_to_jiffies(100));
+		}
+
+		/* CRC and other errors */
+/*		if (ireg & TMIO_STAT_ERR_IRQ)
+ *			handled |= tmio_error_irq(host, irq, stat);
  */
 
-	/* Command completion */
-	if (ireg & (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT)) {
-		tmio_mmc_ack_mmc_irqs(host,
-			     TMIO_STAT_CMDRESPEND |
-			     TMIO_STAT_CMDTIMEOUT);
-		tmio_mmc_cmd_irq(host, status);
-		goto out;
-	}
+		/* Command completion */
+		if (ireg & (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT)) {
+			tmio_mmc_ack_mmc_irqs(host,
+				     TMIO_STAT_CMDRESPEND |
+				     TMIO_STAT_CMDTIMEOUT);
+			tmio_mmc_cmd_irq(host, status);
+		}
 
-	/* Data transfer */
-	if (ireg & (TMIO_STAT_RXRDY | TMIO_STAT_TXRQ)) {
-		tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_RXRDY | TMIO_STAT_TXRQ);
-		tmio_mmc_pio_irq(host);
-		goto out;
-	}
+		/* Data transfer */
+		if (ireg & (TMIO_STAT_RXRDY | TMIO_STAT_TXRQ)) {
+			tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_RXRDY | TMIO_STAT_TXRQ);
+			tmio_mmc_pio_irq(host);
+		}
 
-	/* Data transfer completion */
-	if (ireg & TMIO_STAT_DATAEND) {
-		tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_DATAEND);
-		tmio_mmc_data_irq(host);
-		goto out;
-	}
+		/* Data transfer completion */
+		if (ireg & TMIO_STAT_DATAEND) {
+			tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_DATAEND);
+			tmio_mmc_data_irq(host);
+		}
 
-	pr_warning("tmio_mmc: Spurious irq, disabling! "
-		"0x%08x 0x%08x 0x%08x\n", status, irq_mask, ireg);
-	pr_debug_status(status);
-	tmio_mmc_disable_mmc_irqs(host, status & ~irq_mask);
+		/* Check status - keep going until we've handled it all */
+		status = sd_ctrl_read32(host, CTL_STATUS);
+		irq_mask = sd_ctrl_read32(host, CTL_IRQ_MASK);
+		ireg = status & TMIO_MASK_IRQ & ~irq_mask;
+
+		pr_debug("Status at end of loop: %08x\n", status);
+		pr_debug_status(status);
+	}
+	pr_debug("MMC IRQ end\n");
 
 out:
 	return IRQ_HANDLED;
@@ -720,8 +749,6 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	struct tmio_mmc_data *pdata = host->pdata;
 	unsigned long flags;
 
-	mutex_lock(&host->ios_lock);
-
 	spin_lock_irqsave(&host->lock, flags);
 	if (host->mrq) {
 		if (IS_ERR(host->mrq)) {
@@ -737,8 +764,6 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 				host->mrq->cmd->opcode, host->last_req_ts, jiffies);
 		}
 		spin_unlock_irqrestore(&host->lock, flags);
-
-		mutex_unlock(&host->ios_lock);
 		return;
 	}
 
@@ -746,30 +771,33 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 	spin_unlock_irqrestore(&host->lock, flags);
 
-	/*
-	 * pdata->power == false only if COLD_CD is available, otherwise only
-	 * in short time intervals during probing or resuming
-	 */
-	if (ios->power_mode == MMC_POWER_ON && ios->clock) {
-		if (!pdata->power) {
+	if (ios->clock)
+		tmio_mmc_set_clock(host, ios->clock);
+
+	/* Power sequence - OFF -> UP -> ON */
+	if (ios->power_mode == MMC_POWER_UP) {
+		if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) && !pdata->power) {
 			pm_runtime_get_sync(&host->pdev->dev);
 			pdata->power = true;
 		}
-		tmio_mmc_set_clock(host, ios->clock);
 		/* power up SD bus */
 		if (host->set_pwr)
 			host->set_pwr(host->pdev, 1);
-		/* start bus clock */
-		tmio_mmc_clk_start(host);
-	} else if (ios->power_mode != MMC_POWER_UP) {
-		if (host->set_pwr)
-			host->set_pwr(host->pdev, 0);
-		if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) &&
-		    pdata->power) {
-			pdata->power = false;
-			pm_runtime_put(&host->pdev->dev);
+	} else if (ios->power_mode == MMC_POWER_OFF || !ios->clock) {
+		/* power down SD bus */
+		if (ios->power_mode == MMC_POWER_OFF) {
+			if (host->set_pwr)
+				host->set_pwr(host->pdev, 0);
+			if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) &&
+			    pdata->power) {
+				pdata->power = false;
+				pm_runtime_put(&host->pdev->dev);
+			}
 		}
 		tmio_mmc_clk_stop(host);
+	} else {
+		/* start bus clock */
+		tmio_mmc_clk_start(host);
 	}
 
 	switch (ios->bus_width) {
@@ -789,8 +817,6 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 			current->comm, task_pid_nr(current),
 			ios->clock, ios->power_mode);
 	host->mrq = NULL;
-
-	mutex_unlock(&host->ios_lock);
 }
 
 static int tmio_mmc_get_ro(struct mmc_host *mmc)
@@ -887,20 +913,16 @@ int __devinit tmio_mmc_host_probe(struct tmio_mmc_host **host,
 		tmio_mmc_enable_sdio_irq(mmc, 0);
 
 	spin_lock_init(&_host->lock);
-	mutex_init(&_host->ios_lock);
 
 	/* Init delayed work for request timeouts */
 	INIT_DELAYED_WORK(&_host->delayed_reset_work, tmio_mmc_reset_work);
-	INIT_WORK(&_host->done, tmio_mmc_done_work);
 
 	/* See if we also get DMA */
 	tmio_mmc_request_dma(_host, pdata);
 
 	/* We have to keep the device powered for its card detection to work */
-	if (!(pdata->flags & TMIO_MMC_HAS_COLD_CD)) {
-		pdata->power = true;
+	if (!(pdata->flags & TMIO_MMC_HAS_COLD_CD))
 		pm_runtime_get_noresume(&pdev->dev);
-	}
 
 	mmc_add_host(mmc);
 
@@ -941,7 +963,6 @@ void tmio_mmc_host_remove(struct tmio_mmc_host *host)
 		pm_runtime_get_sync(&pdev->dev);
 
 	mmc_remove_host(host->mmc);
-	cancel_work_sync(&host->done);
 	cancel_delayed_work_sync(&host->delayed_reset_work);
 	tmio_mmc_release_dma(host);
 
@@ -977,16 +998,11 @@ int tmio_mmc_host_resume(struct device *dev)
 	/* The MMC core will perform the complete set up */
 	host->pdata->power = false;
 
-	host->pm_global = true;
 	if (!host->pm_error)
 		pm_runtime_get_sync(dev);
 
-	if (host->pm_global) {
-		/* Runtime PM resume callback didn't run */
-		tmio_mmc_reset(host);
-		tmio_mmc_enable_dma(host, true);
-		host->pm_global = false;
-	}
+	tmio_mmc_reset(mmc_priv(mmc));
+	tmio_mmc_request_dma(host, host->pdata);
 
 	return mmc_resume_host(mmc);
 }
@@ -1007,15 +1023,12 @@ int tmio_mmc_host_runtime_resume(struct device *dev)
 	struct tmio_mmc_data *pdata = host->pdata;
 
 	tmio_mmc_reset(host);
-	tmio_mmc_enable_dma(host, true);
 
 	if (pdata->power) {
 		/* Only entered after a card-insert interrupt */
-		if (!mmc->card)
-			tmio_mmc_set_ios(mmc, &mmc->ios);
+		tmio_mmc_set_ios(mmc, &mmc->ios);
 		mmc_detect_change(mmc, msecs_to_jiffies(100));
 	}
-	host->pm_global = false;
 
 	return 0;
 }
diff --git a/trunk/drivers/mtd/ubi/build.c b/trunk/drivers/mtd/ubi/build.c
index 6c3fb5ab20f5..65626c1c446d 100644
--- a/trunk/drivers/mtd/ubi/build.c
+++ b/trunk/drivers/mtd/ubi/build.c
@@ -953,14 +953,10 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
 	if (!ubi->peb_buf2)
 		goto out_free;
 
-	err = ubi_debugging_init_dev(ubi);
-	if (err)
-		goto out_free;
-
 	err = attach_by_scanning(ubi);
 	if (err) {
 		dbg_err("failed to attach by scanning, error %d", err);
-		goto out_debugging;
+		goto out_free;
 	}
 
 	if (ubi->autoresize_vol_id != -1) {
@@ -973,16 +969,12 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
 	if (err)
 		goto out_detach;
 
-	err = ubi_debugfs_init_dev(ubi);
-	if (err)
-		goto out_uif;
-
 	ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name);
 	if (IS_ERR(ubi->bgt_thread)) {
 		err = PTR_ERR(ubi->bgt_thread);
 		ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name,
 			err);
-		goto out_debugfs;
+		goto out_uif;
 	}
 
 	ubi_msg("attached mtd%d to ubi%d", mtd->index, ubi_num);
@@ -1016,18 +1008,12 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
 	ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL);
 	return ubi_num;
 
-out_debugfs:
-	ubi_debugfs_exit_dev(ubi);
 out_uif:
-	get_device(&ubi->dev);
-	ubi_assert(ref);
 	uif_close(ubi);
 out_detach:
 	ubi_wl_close(ubi);
 	free_internal_volumes(ubi);
 	vfree(ubi->vtbl);
-out_debugging:
-	ubi_debugging_exit_dev(ubi);
 out_free:
 	vfree(ubi->peb_buf1);
 	vfree(ubi->peb_buf2);
@@ -1094,13 +1080,11 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway)
 	 */
 	get_device(&ubi->dev);
 
-	ubi_debugfs_exit_dev(ubi);
 	uif_close(ubi);
 	ubi_wl_close(ubi);
 	free_internal_volumes(ubi);
 	vfree(ubi->vtbl);
 	put_mtd_device(ubi->mtd);
-	ubi_debugging_exit_dev(ubi);
 	vfree(ubi->peb_buf1);
 	vfree(ubi->peb_buf2);
 	ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num);
@@ -1215,11 +1199,6 @@ static int __init ubi_init(void)
 	if (!ubi_wl_entry_slab)
 		goto out_dev_unreg;
 
-	err = ubi_debugfs_init();
-	if (err)
-		goto out_slab;
-
-
 	/* Attach MTD devices */
 	for (i = 0; i < mtd_devs; i++) {
 		struct mtd_dev_param *p = &mtd_dev_param[i];
@@ -1268,8 +1247,6 @@ static int __init ubi_init(void)
 			ubi_detach_mtd_dev(ubi_devices[k]->ubi_num, 1);
 			mutex_unlock(&ubi_devices_mutex);
 		}
-	ubi_debugfs_exit();
-out_slab:
 	kmem_cache_destroy(ubi_wl_entry_slab);
 out_dev_unreg:
 	misc_deregister(&ubi_ctrl_cdev);
@@ -1293,7 +1270,6 @@ static void __exit ubi_exit(void)
 			ubi_detach_mtd_dev(ubi_devices[i]->ubi_num, 1);
 			mutex_unlock(&ubi_devices_mutex);
 		}
-	ubi_debugfs_exit();
 	kmem_cache_destroy(ubi_wl_entry_slab);
 	misc_deregister(&ubi_ctrl_cdev);
 	class_remove_file(ubi_class, &ubi_version);
diff --git a/trunk/drivers/mtd/ubi/debug.c b/trunk/drivers/mtd/ubi/debug.c
index ab80c0debac8..2224cbe41ddf 100644
--- a/trunk/drivers/mtd/ubi/debug.c
+++ b/trunk/drivers/mtd/ubi/debug.c
@@ -27,9 +27,17 @@
 #ifdef CONFIG_MTD_UBI_DEBUG
 
 #include "ubi.h"
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
 #include <linux/module.h>
+#include <linux/moduleparam.h>
+
+unsigned int ubi_chk_flags;
+unsigned int ubi_tst_flags;
+
+module_param_named(debug_chks, ubi_chk_flags, uint, S_IRUGO | S_IWUSR);
+module_param_named(debug_tsts, ubi_chk_flags, uint, S_IRUGO | S_IWUSR);
+
+MODULE_PARM_DESC(debug_chks, "Debug check flags");
+MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
 
 /**
  * ubi_dbg_dump_ec_hdr - dump an erase counter header.
@@ -231,261 +239,4 @@ void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len)
 	return;
 }
 
-/**
- * ubi_debugging_init_dev - initialize debugging for an UBI device.
- * @ubi: UBI device description object
- *
- * This function initializes debugging-related data for UBI device @ubi.
- * Returns zero in case of success and a negative error code in case of
- * failure.
- */
-int ubi_debugging_init_dev(struct ubi_device *ubi)
-{
-	ubi->dbg = kzalloc(sizeof(struct ubi_debug_info), GFP_KERNEL);
-	if (!ubi->dbg)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/**
- * ubi_debugging_exit_dev - free debugging data for an UBI device.
- * @ubi: UBI device description object
- */
-void ubi_debugging_exit_dev(struct ubi_device *ubi)
-{
-	kfree(ubi->dbg);
-}
-
-/*
- * Root directory for UBI stuff in debugfs. Contains sub-directories which
- * contain the stuff specific to particular UBI devices.
- */
-static struct dentry *dfs_rootdir;
-
-/**
- * ubi_debugfs_init - create UBI debugfs directory.
- *
- * Create UBI debugfs directory. Returns zero in case of success and a negative
- * error code in case of failure.
- */
-int ubi_debugfs_init(void)
-{
-	dfs_rootdir = debugfs_create_dir("ubi", NULL);
-	if (IS_ERR_OR_NULL(dfs_rootdir)) {
-		int err = dfs_rootdir ? -ENODEV : PTR_ERR(dfs_rootdir);
-
-		ubi_err("cannot create \"ubi\" debugfs directory, error %d\n",
-			err);
-		return err;
-	}
-
-	return 0;
-}
-
-/**
- * ubi_debugfs_exit - remove UBI debugfs directory.
- */
-void ubi_debugfs_exit(void)
-{
-	debugfs_remove(dfs_rootdir);
-}
-
-/* Read an UBI debugfs file */
-static ssize_t dfs_file_read(struct file *file, char __user *user_buf,
-			     size_t count, loff_t *ppos)
-{
-	unsigned long ubi_num = (unsigned long)file->private_data;
-	struct dentry *dent = file->f_path.dentry;
-	struct ubi_device *ubi;
-	struct ubi_debug_info *d;
-	char buf[3];
-	int val;
-
-	ubi = ubi_get_device(ubi_num);
-	if (!ubi)
-		return -ENODEV;
-	d = ubi->dbg;
-
-	if (dent == d->dfs_chk_gen)
-		val = d->chk_gen;
-	else if (dent == d->dfs_chk_io)
-		val = d->chk_io;
-	else if (dent == d->dfs_disable_bgt)
-		val = d->disable_bgt;
-	else if (dent == d->dfs_emulate_bitflips)
-		val = d->emulate_bitflips;
-	else if (dent == d->dfs_emulate_io_failures)
-		val = d->emulate_io_failures;
-	else {
-		count = -EINVAL;
-		goto out;
-	}
-
-	if (val)
-		buf[0] = '1';
-	else
-		buf[0] = '0';
-	buf[1] = '\n';
-	buf[2] = 0x00;
-
-	count = simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-
-out:
-	ubi_put_device(ubi);
-	return count;
-}
-
-/* Write an UBI debugfs file */
-static ssize_t dfs_file_write(struct file *file, const char __user *user_buf,
-			      size_t count, loff_t *ppos)
-{
-	unsigned long ubi_num = (unsigned long)file->private_data;
-	struct dentry *dent = file->f_path.dentry;
-	struct ubi_device *ubi;
-	struct ubi_debug_info *d;
-	size_t buf_size;
-	char buf[8];
-	int val;
-
-	ubi = ubi_get_device(ubi_num);
-	if (!ubi)
-		return -ENODEV;
-	d = ubi->dbg;
-
-	buf_size = min_t(size_t, count, (sizeof(buf) - 1));
-	if (copy_from_user(buf, user_buf, buf_size)) {
-		count = -EFAULT;
-		goto out;
-	}
-
-	if (buf[0] == '1')
-		val = 1;
-	else if (buf[0] == '0')
-		val = 0;
-	else {
-		count = -EINVAL;
-		goto out;
-	}
-
-	if (dent == d->dfs_chk_gen)
-		d->chk_gen = val;
-	else if (dent == d->dfs_chk_io)
-		d->chk_io = val;
-	else if (dent == d->dfs_disable_bgt)
-		d->disable_bgt = val;
-	else if (dent == d->dfs_emulate_bitflips)
-		d->emulate_bitflips = val;
-	else if (dent == d->dfs_emulate_io_failures)
-		d->emulate_io_failures = val;
-	else
-		count = -EINVAL;
-
-out:
-	ubi_put_device(ubi);
-	return count;
-}
-
-static int default_open(struct inode *inode, struct file *file)
-{
-	if (inode->i_private)
-		file->private_data = inode->i_private;
-
-	return 0;
-}
-
-/* File operations for all UBI debugfs files */
-static const struct file_operations dfs_fops = {
-	.read   = dfs_file_read,
-	.write  = dfs_file_write,
-	.open   = default_open,
-	.llseek = no_llseek,
-	.owner  = THIS_MODULE,
-};
-
-/**
- * ubi_debugfs_init_dev - initialize debugfs for an UBI device.
- * @ubi: UBI device description object
- *
- * This function creates all debugfs files for UBI device @ubi. Returns zero in
- * case of success and a negative error code in case of failure.
- */
-int ubi_debugfs_init_dev(struct ubi_device *ubi)
-{
-	int err, n;
-	unsigned long ubi_num = ubi->ubi_num;
-	const char *fname;
-	struct dentry *dent;
-	struct ubi_debug_info *d = ubi->dbg;
-
-	n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN + 1, UBI_DFS_DIR_NAME,
-		     ubi->ubi_num);
-	if (n == UBI_DFS_DIR_LEN) {
-		/* The array size is too small */
-		fname = UBI_DFS_DIR_NAME;
-		dent = ERR_PTR(-EINVAL);
-		goto out;
-	}
-
-	fname = d->dfs_dir_name;
-	dent = debugfs_create_dir(fname, dfs_rootdir);
-	if (IS_ERR_OR_NULL(dent))
-		goto out;
-	d->dfs_dir = dent;
-
-	fname = "chk_gen";
-	dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num,
-				   &dfs_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	d->dfs_chk_gen = dent;
-
-	fname = "chk_io";
-	dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num,
-				   &dfs_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	d->dfs_chk_io = dent;
-
-	fname = "tst_disable_bgt";
-	dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num,
-				   &dfs_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	d->dfs_disable_bgt = dent;
-
-	fname = "tst_emulate_bitflips";
-	dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num,
-				   &dfs_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	d->dfs_emulate_bitflips = dent;
-
-	fname = "tst_emulate_io_failures";
-	dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num,
-				   &dfs_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	d->dfs_emulate_io_failures = dent;
-
-	return 0;
-
-out_remove:
-	debugfs_remove_recursive(d->dfs_dir);
-out:
-	err = dent ? PTR_ERR(dent) : -ENODEV;
-	ubi_err("cannot create \"%s\" debugfs file or directory, error %d\n",
-		fname, err);
-	return err;
-}
-
-/**
- * dbg_debug_exit_dev - free all debugfs files corresponding to device @ubi
- * @ubi: UBI device description object
- */
-void ubi_debugfs_exit_dev(struct ubi_device *ubi)
-{
-	debugfs_remove_recursive(ubi->dbg->dfs_dir);
-}
-
 #endif /* CONFIG_MTD_UBI_DEBUG */
diff --git a/trunk/drivers/mtd/ubi/debug.h b/trunk/drivers/mtd/ubi/debug.h
index 65b5b76cc379..3f1a09c5c438 100644
--- a/trunk/drivers/mtd/ubi/debug.h
+++ b/trunk/drivers/mtd/ubi/debug.h
@@ -21,6 +21,14 @@
 #ifndef __UBI_DEBUG_H__
 #define __UBI_DEBUG_H__
 
+struct ubi_ec_hdr;
+struct ubi_vid_hdr;
+struct ubi_volume;
+struct ubi_vtbl_record;
+struct ubi_scan_volume;
+struct ubi_scan_leb;
+struct ubi_mkvol_req;
+
 #ifdef CONFIG_MTD_UBI_DEBUG
 #include <linux/random.h>
 
@@ -63,103 +71,86 @@ void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv);
 void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type);
 void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
 void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len);
+
+extern unsigned int ubi_chk_flags;
+
+/*
+ * Debugging check flags.
+ *
+ * UBI_CHK_GEN: general checks
+ * UBI_CHK_IO: check writes and erases
+ */
+enum {
+	UBI_CHK_GEN = 0x1,
+	UBI_CHK_IO  = 0x2,
+};
+
 int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len);
 int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum,
 			int offset, int len);
-int ubi_debugging_init_dev(struct ubi_device *ubi);
-void ubi_debugging_exit_dev(struct ubi_device *ubi);
-int ubi_debugfs_init(void);
-void ubi_debugfs_exit(void);
-int ubi_debugfs_init_dev(struct ubi_device *ubi);
-void ubi_debugfs_exit_dev(struct ubi_device *ubi);
 
-/*
- * The UBI debugfs directory name pattern and maximum name length (3 for "ubi"
- * + 2 for the number plus 1 for the trailing zero byte.
- */
-#define UBI_DFS_DIR_NAME "ubi%d"
-#define UBI_DFS_DIR_LEN  (3 + 2 + 1)
+extern unsigned int ubi_tst_flags;
 
-/**
- * struct ubi_debug_info - debugging information for an UBI device.
+/*
+ * Special testing flags.
  *
- * @chk_gen: if UBI general extra checks are enabled
- * @chk_io: if UBI I/O extra checks are enabled
- * @disable_bgt: disable the background task for testing purposes
- * @emulate_bitflips: emulate bit-flips for testing purposes
- * @emulate_io_failures: emulate write/erase failures for testing purposes
- * @dfs_dir_name: name of debugfs directory containing files of this UBI device
- * @dfs_dir: direntry object of the UBI device debugfs directory
- * @dfs_chk_gen: debugfs knob to enable UBI general extra checks
- * @dfs_chk_io: debugfs knob to enable UBI I/O extra checks
- * @dfs_disable_bgt: debugfs knob to disable the background task
- * @dfs_emulate_bitflips: debugfs knob to emulate bit-flips
- * @dfs_emulate_io_failures: debugfs knob to emulate write/erase failures
+ * UBIFS_TST_DISABLE_BGT: disable the background thread
+ * UBI_TST_EMULATE_BITFLIPS: emulate bit-flips
+ * UBI_TST_EMULATE_WRITE_FAILURES: emulate write failures
+ * UBI_TST_EMULATE_ERASE_FAILURES: emulate erase failures
  */
-struct ubi_debug_info {
-	unsigned int chk_gen:1;
-	unsigned int chk_io:1;
-	unsigned int disable_bgt:1;
-	unsigned int emulate_bitflips:1;
-	unsigned int emulate_io_failures:1;
-	char dfs_dir_name[UBI_DFS_DIR_LEN + 1];
-	struct dentry *dfs_dir;
-	struct dentry *dfs_chk_gen;
-	struct dentry *dfs_chk_io;
-	struct dentry *dfs_disable_bgt;
-	struct dentry *dfs_emulate_bitflips;
-	struct dentry *dfs_emulate_io_failures;
+enum {
+	UBI_TST_DISABLE_BGT            = 0x1,
+	UBI_TST_EMULATE_BITFLIPS       = 0x2,
+	UBI_TST_EMULATE_WRITE_FAILURES = 0x4,
+	UBI_TST_EMULATE_ERASE_FAILURES = 0x8,
 };
 
 /**
  * ubi_dbg_is_bgt_disabled - if the background thread is disabled.
- * @ubi: UBI device description object
  *
  * Returns non-zero if the UBI background thread is disabled for testing
  * purposes.
  */
-static inline int ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi)
+static inline int ubi_dbg_is_bgt_disabled(void)
 {
-	return ubi->dbg->disable_bgt;
+	return ubi_tst_flags & UBI_TST_DISABLE_BGT;
 }
 
 /**
  * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip.
- * @ubi: UBI device description object
  *
  * Returns non-zero if a bit-flip should be emulated, otherwise returns zero.
  */
-static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi)
+static inline int ubi_dbg_is_bitflip(void)
 {
-	if (ubi->dbg->emulate_bitflips)
+	if (ubi_tst_flags & UBI_TST_EMULATE_BITFLIPS)
 		return !(random32() % 200);
 	return 0;
 }
 
 /**
  * ubi_dbg_is_write_failure - if it is time to emulate a write failure.
- * @ubi: UBI device description object
  *
  * Returns non-zero if a write failure should be emulated, otherwise returns
  * zero.
  */
-static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi)
+static inline int ubi_dbg_is_write_failure(void)
 {
-	if (ubi->dbg->emulate_io_failures)
+	if (ubi_tst_flags & UBI_TST_EMULATE_WRITE_FAILURES)
 		return !(random32() % 500);
 	return 0;
 }
 
 /**
  * ubi_dbg_is_erase_failure - if its time to emulate an erase failure.
- * @ubi: UBI device description object
  *
  * Returns non-zero if an erase failure should be emulated, otherwise returns
  * zero.
  */
-static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi)
+static inline int ubi_dbg_is_erase_failure(void)
 {
-	if (ubi->dbg->emulate_io_failures)
+	if (ubi_tst_flags & UBI_TST_EMULATE_ERASE_FAILURES)
 		return !(random32() % 400);
 	return 0;
 }
@@ -210,6 +201,11 @@ static inline void ubi_dbg_dump_flash(struct ubi_device *ubi,
 static inline void
 ubi_dbg_print_hex_dump(const char *l, const char *ps, int pt, int r,
 		       int g, const void *b, size_t len, bool a)     { return; }
+
+static inline int ubi_dbg_is_bgt_disabled(void)                    { return 0; }
+static inline int ubi_dbg_is_bitflip(void)                         { return 0; }
+static inline int ubi_dbg_is_write_failure(void)                   { return 0; }
+static inline int ubi_dbg_is_erase_failure(void)                   { return 0; }
 static inline int ubi_dbg_check_all_ff(struct ubi_device *ubi,
 				       int pnum, int offset,
 				       int len)                    { return 0; }
@@ -217,20 +213,5 @@ static inline int ubi_dbg_check_write(struct ubi_device *ubi,
 				      const void *buf, int pnum,
 				      int offset, int len)         { return 0; }
 
-static inline int ubi_debugging_init_dev(struct ubi_device *ubi)   { return 0; }
-static inline void ubi_debugging_exit_dev(struct ubi_device *ubi)  { return; }
-static inline int ubi_debugfs_init(void)                           { return 0; }
-static inline void ubi_debugfs_exit(void)                          { return; }
-static inline int ubi_debugfs_init_dev(struct ubi_device *ubi)     { return 0; }
-static inline void ubi_debugfs_exit_dev(struct ubi_device *ubi)    { return; }
-
-static inline int
-ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi)              { return 0; }
-static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) { return 0; }
-static inline int
-ubi_dbg_is_write_failure(const struct ubi_device *ubi)             { return 0; }
-static inline int
-ubi_dbg_is_erase_failure(const struct ubi_device *ubi)             { return 0; }
-
 #endif /* !CONFIG_MTD_UBI_DEBUG */
 #endif /* !__UBI_DEBUG_H__ */
diff --git a/trunk/drivers/mtd/ubi/io.c b/trunk/drivers/mtd/ubi/io.c
index 6ba55c235873..8c1b1c7bc4a7 100644
--- a/trunk/drivers/mtd/ubi/io.c
+++ b/trunk/drivers/mtd/ubi/io.c
@@ -212,7 +212,7 @@ int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset,
 	} else {
 		ubi_assert(len == read);
 
-		if (ubi_dbg_is_bitflip(ubi)) {
+		if (ubi_dbg_is_bitflip()) {
 			dbg_gen("bit-flip (emulated)");
 			err = UBI_IO_BITFLIPS;
 		}
@@ -281,7 +281,7 @@ int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset,
 			return err;
 	}
 
-	if (ubi_dbg_is_write_failure(ubi)) {
+	if (ubi_dbg_is_write_failure()) {
 		dbg_err("cannot write %d bytes to PEB %d:%d "
 			"(emulated)", len, pnum, offset);
 		ubi_dbg_dump_stack();
@@ -396,7 +396,7 @@ static int do_sync_erase(struct ubi_device *ubi, int pnum)
 	if (err)
 		return err;
 
-	if (ubi_dbg_is_erase_failure(ubi)) {
+	if (ubi_dbg_is_erase_failure()) {
 		dbg_err("cannot erase PEB %d (emulated)", pnum);
 		return -EIO;
 	}
@@ -1146,7 +1146,7 @@ static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum)
 {
 	int err;
 
-	if (!ubi->dbg->chk_io)
+	if (!(ubi_chk_flags & UBI_CHK_IO))
 		return 0;
 
 	err = ubi_io_is_bad(ubi, pnum);
@@ -1173,7 +1173,7 @@ static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum,
 	int err;
 	uint32_t magic;
 
-	if (!ubi->dbg->chk_io)
+	if (!(ubi_chk_flags & UBI_CHK_IO))
 		return 0;
 
 	magic = be32_to_cpu(ec_hdr->magic);
@@ -1211,7 +1211,7 @@ static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum)
 	uint32_t crc, hdr_crc;
 	struct ubi_ec_hdr *ec_hdr;
 
-	if (!ubi->dbg->chk_io)
+	if (!(ubi_chk_flags & UBI_CHK_IO))
 		return 0;
 
 	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
@@ -1255,7 +1255,7 @@ static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum,
 	int err;
 	uint32_t magic;
 
-	if (!ubi->dbg->chk_io)
+	if (!(ubi_chk_flags & UBI_CHK_IO))
 		return 0;
 
 	magic = be32_to_cpu(vid_hdr->magic);
@@ -1296,7 +1296,7 @@ static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum)
 	struct ubi_vid_hdr *vid_hdr;
 	void *p;
 
-	if (!ubi->dbg->chk_io)
+	if (!(ubi_chk_flags & UBI_CHK_IO))
 		return 0;
 
 	vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
@@ -1348,7 +1348,7 @@ int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum,
 	void *buf1;
 	loff_t addr = (loff_t)pnum * ubi->peb_size + offset;
 
-	if (!ubi->dbg->chk_io)
+	if (!(ubi_chk_flags & UBI_CHK_IO))
 		return 0;
 
 	buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL);
@@ -1412,7 +1412,7 @@ int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len)
 	void *buf;
 	loff_t addr = (loff_t)pnum * ubi->peb_size + offset;
 
-	if (!ubi->dbg->chk_io)
+	if (!(ubi_chk_flags & UBI_CHK_IO))
 		return 0;
 
 	buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL);
diff --git a/trunk/drivers/mtd/ubi/scan.c b/trunk/drivers/mtd/ubi/scan.c
index a3a198f9b98d..2135a53732ff 100644
--- a/trunk/drivers/mtd/ubi/scan.c
+++ b/trunk/drivers/mtd/ubi/scan.c
@@ -1347,7 +1347,7 @@ static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si)
 	struct ubi_scan_leb *seb, *last_seb;
 	uint8_t *buf;
 
-	if (!ubi->dbg->chk_gen)
+	if (!(ubi_chk_flags & UBI_CHK_GEN))
 		return 0;
 
 	/*
diff --git a/trunk/drivers/mtd/ubi/ubi.h b/trunk/drivers/mtd/ubi/ubi.h
index dc64c767fd21..c6c22295898e 100644
--- a/trunk/drivers/mtd/ubi/ubi.h
+++ b/trunk/drivers/mtd/ubi/ubi.h
@@ -44,6 +44,7 @@
 
 #include "ubi-media.h"
 #include "scan.h"
+#include "debug.h"
 
 /* Maximum number of supported UBI devices */
 #define UBI_MAX_DEVICES 32
@@ -389,8 +390,6 @@ struct ubi_wl_entry;
  * @peb_buf2: another buffer of PEB size used for different purposes
  * @buf_mutex: protects @peb_buf1 and @peb_buf2
  * @ckvol_mutex: serializes static volume checking when opening
- *
- * @dbg: debugging information for this UBI device
  */
 struct ubi_device {
 	struct cdev cdev;
@@ -473,12 +472,8 @@ struct ubi_device {
 	void *peb_buf2;
 	struct mutex buf_mutex;
 	struct mutex ckvol_mutex;
-
-	struct ubi_debug_info *dbg;
 };
 
-#include "debug.h"
-
 extern struct kmem_cache *ubi_wl_entry_slab;
 extern const struct file_operations ubi_ctrl_cdev_operations;
 extern const struct file_operations ubi_cdev_operations;
@@ -667,7 +662,6 @@ static inline void ubi_ro_mode(struct ubi_device *ubi)
 	if (!ubi->ro_mode) {
 		ubi->ro_mode = 1;
 		ubi_warn("switch to read-only mode");
-		ubi_dbg_dump_stack();
 	}
 }
 
diff --git a/trunk/drivers/mtd/ubi/vmt.c b/trunk/drivers/mtd/ubi/vmt.c
index 97e093d19672..366eb70219a6 100644
--- a/trunk/drivers/mtd/ubi/vmt.c
+++ b/trunk/drivers/mtd/ubi/vmt.c
@@ -871,7 +871,7 @@ static int paranoid_check_volumes(struct ubi_device *ubi)
 {
 	int i, err = 0;
 
-	if (!ubi->dbg->chk_gen)
+	if (!(ubi_chk_flags & UBI_CHK_GEN))
 		return 0;
 
 	for (i = 0; i < ubi->vtbl_slots; i++) {
diff --git a/trunk/drivers/mtd/ubi/vtbl.c b/trunk/drivers/mtd/ubi/vtbl.c
index 4b50a3029b84..fd3bf770f518 100644
--- a/trunk/drivers/mtd/ubi/vtbl.c
+++ b/trunk/drivers/mtd/ubi/vtbl.c
@@ -307,7 +307,8 @@ static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si,
 {
 	int err, tries = 0;
 	static struct ubi_vid_hdr *vid_hdr;
-	struct ubi_scan_leb *new_seb;
+	struct ubi_scan_volume *sv;
+	struct ubi_scan_leb *new_seb, *old_seb = NULL;
 
 	ubi_msg("create volume table (copy #%d)", copy + 1);
 
@@ -315,6 +316,15 @@ static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si,
 	if (!vid_hdr)
 		return -ENOMEM;
 
+	/*
+	 * Check if there is a logical eraseblock which would have to contain
+	 * this volume table copy was found during scanning. It has to be wiped
+	 * out.
+	 */
+	sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID);
+	if (sv)
+		old_seb = ubi_scan_find_seb(sv, copy);
+
 retry:
 	new_seb = ubi_scan_get_free_peb(ubi, si);
 	if (IS_ERR(new_seb)) {
@@ -341,8 +351,8 @@ static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si,
 		goto write_error;
 
 	/*
-	 * And add it to the scanning information. Don't delete the old version
-	 * of this LEB as it will be deleted and freed in 'ubi_scan_add_used()'.
+	 * And add it to the scanning information. Don't delete the old
+	 * @old_seb as it will be deleted and freed in 'ubi_scan_add_used()'.
 	 */
 	err = ubi_scan_add_used(ubi, si, new_seb->pnum, new_seb->ec,
 				vid_hdr, 0);
@@ -866,7 +876,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
  */
 static void paranoid_vtbl_check(const struct ubi_device *ubi)
 {
-	if (!ubi->dbg->chk_gen)
+	if (!(ubi_chk_flags & UBI_CHK_GEN))
 		return;
 
 	if (vtbl_check(ubi, ubi->vtbl)) {
diff --git a/trunk/drivers/mtd/ubi/wl.c b/trunk/drivers/mtd/ubi/wl.c
index 42c684cf3688..ff2c4956eeff 100644
--- a/trunk/drivers/mtd/ubi/wl.c
+++ b/trunk/drivers/mtd/ubi/wl.c
@@ -1,5 +1,4 @@
 /*
- * @ubi: UBI device description object
  * Copyright (c) International Business Machines Corp., 2006
  *
  * This program is free software; you can redistribute it and/or modify
@@ -164,14 +163,12 @@ struct ubi_work {
 
 #ifdef CONFIG_MTD_UBI_DEBUG
 static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec);
-static int paranoid_check_in_wl_tree(const struct ubi_device *ubi,
-				     struct ubi_wl_entry *e,
+static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
 				     struct rb_root *root);
-static int paranoid_check_in_pq(const struct ubi_device *ubi,
-				struct ubi_wl_entry *e);
+static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e);
 #else
 #define paranoid_check_ec(ubi, pnum, ec) 0
-#define paranoid_check_in_wl_tree(ubi, e, root)
+#define paranoid_check_in_wl_tree(e, root)
 #define paranoid_check_in_pq(ubi, e) 0
 #endif
 
@@ -452,7 +449,7 @@ int ubi_wl_get_peb(struct ubi_device *ubi, int dtype)
 		BUG();
 	}
 
-	paranoid_check_in_wl_tree(ubi, e, &ubi->free);
+	paranoid_check_in_wl_tree(e, &ubi->free);
 
 	/*
 	 * Move the physical eraseblock to the protection queue where it will
@@ -616,7 +613,7 @@ static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk)
 	list_add_tail(&wrk->list, &ubi->works);
 	ubi_assert(ubi->works_count >= 0);
 	ubi->works_count += 1;
-	if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi))
+	if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled())
 		wake_up_process(ubi->bgt_thread);
 	spin_unlock(&ubi->wl_lock);
 }
@@ -715,7 +712,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 			       e1->ec, e2->ec);
 			goto out_cancel;
 		}
-		paranoid_check_in_wl_tree(ubi, e1, &ubi->used);
+		paranoid_check_in_wl_tree(e1, &ubi->used);
 		rb_erase(&e1->u.rb, &ubi->used);
 		dbg_wl("move PEB %d EC %d to PEB %d EC %d",
 		       e1->pnum, e1->ec, e2->pnum, e2->ec);
@@ -724,12 +721,12 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 		scrubbing = 1;
 		e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb);
 		e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
-		paranoid_check_in_wl_tree(ubi, e1, &ubi->scrub);
+		paranoid_check_in_wl_tree(e1, &ubi->scrub);
 		rb_erase(&e1->u.rb, &ubi->scrub);
 		dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum);
 	}
 
-	paranoid_check_in_wl_tree(ubi, e2, &ubi->free);
+	paranoid_check_in_wl_tree(e2, &ubi->free);
 	rb_erase(&e2->u.rb, &ubi->free);
 	ubi->move_from = e1;
 	ubi->move_to = e2;
@@ -1172,13 +1169,13 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
 		return 0;
 	} else {
 		if (in_wl_tree(e, &ubi->used)) {
-			paranoid_check_in_wl_tree(ubi, e, &ubi->used);
+			paranoid_check_in_wl_tree(e, &ubi->used);
 			rb_erase(&e->u.rb, &ubi->used);
 		} else if (in_wl_tree(e, &ubi->scrub)) {
-			paranoid_check_in_wl_tree(ubi, e, &ubi->scrub);
+			paranoid_check_in_wl_tree(e, &ubi->scrub);
 			rb_erase(&e->u.rb, &ubi->scrub);
 		} else if (in_wl_tree(e, &ubi->erroneous)) {
-			paranoid_check_in_wl_tree(ubi, e, &ubi->erroneous);
+			paranoid_check_in_wl_tree(e, &ubi->erroneous);
 			rb_erase(&e->u.rb, &ubi->erroneous);
 			ubi->erroneous_peb_count -= 1;
 			ubi_assert(ubi->erroneous_peb_count >= 0);
@@ -1245,7 +1242,7 @@ int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum)
 	}
 
 	if (in_wl_tree(e, &ubi->used)) {
-		paranoid_check_in_wl_tree(ubi, e, &ubi->used);
+		paranoid_check_in_wl_tree(e, &ubi->used);
 		rb_erase(&e->u.rb, &ubi->used);
 	} else {
 		int err;
@@ -1367,7 +1364,7 @@ int ubi_thread(void *u)
 
 		spin_lock(&ubi->wl_lock);
 		if (list_empty(&ubi->works) || ubi->ro_mode ||
-		    !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) {
+		    !ubi->thread_enabled || ubi_dbg_is_bgt_disabled()) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			spin_unlock(&ubi->wl_lock);
 			schedule();
@@ -1582,7 +1579,7 @@ static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec)
 	long long read_ec;
 	struct ubi_ec_hdr *ec_hdr;
 
-	if (!ubi->dbg->chk_gen)
+	if (!(ubi_chk_flags & UBI_CHK_GEN))
 		return 0;
 
 	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
@@ -1612,18 +1609,16 @@ static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec)
 
 /**
  * paranoid_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree.
- * @ubi: UBI device description object
  * @e: the wear-leveling entry to check
  * @root: the root of the tree
  *
  * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it
  * is not.
  */
-static int paranoid_check_in_wl_tree(const struct ubi_device *ubi,
-				     struct ubi_wl_entry *e,
+static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
 				     struct rb_root *root)
 {
-	if (!ubi->dbg->chk_gen)
+	if (!(ubi_chk_flags & UBI_CHK_GEN))
 		return 0;
 
 	if (in_wl_tree(e, root))
@@ -1643,13 +1638,12 @@ static int paranoid_check_in_wl_tree(const struct ubi_device *ubi,
  *
  * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not.
  */
-static int paranoid_check_in_pq(const struct ubi_device *ubi,
-				struct ubi_wl_entry *e)
+static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e)
 {
 	struct ubi_wl_entry *p;
 	int i;
 
-	if (!ubi->dbg->chk_gen)
+	if (!(ubi_chk_flags & UBI_CHK_GEN))
 		return 0;
 
 	for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i)
diff --git a/trunk/drivers/pci/quirks.c b/trunk/drivers/pci/quirks.c
index 1196f61a4ab6..02145e9697a9 100644
--- a/trunk/drivers/pci/quirks.c
+++ b/trunk/drivers/pci/quirks.c
@@ -2758,29 +2758,6 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev)
 
 	dev_notice(&dev->dev, "proprietary Ricoh MMC controller disabled (via firewire function)\n");
 	dev_notice(&dev->dev, "MMC cards are now supported by standard SDHCI controller\n");
-
-	/*
-	 * RICOH 0xe823 SD/MMC card reader fails to recognize
-	 * certain types of SD/MMC cards. Lowering the SD base
-	 * clock frequency from 200Mhz to 50Mhz fixes this issue.
-	 *
-	 * 0x150 - SD2.0 mode enable for changing base clock
-	 *	   frequency to 50Mhz
-	 * 0xe1  - Base clock frequency
-	 * 0x32  - 50Mhz new clock frequency
-	 * 0xf9  - Key register for 0x150
-	 * 0xfc  - key register for 0xe1
-	 */
-	if (dev->device == PCI_DEVICE_ID_RICOH_R5CE823) {
-		pci_write_config_byte(dev, 0xf9, 0xfc);
-		pci_write_config_byte(dev, 0x150, 0x10);
-		pci_write_config_byte(dev, 0xf9, 0x00);
-		pci_write_config_byte(dev, 0xfc, 0x01);
-		pci_write_config_byte(dev, 0xe1, 0x32);
-		pci_write_config_byte(dev, 0xfc, 0x00);
-
-		dev_notice(&dev->dev, "MMC controller base frequency changed to 50Mhz.\n");
-	}
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
diff --git a/trunk/drivers/xen/Kconfig b/trunk/drivers/xen/Kconfig
index 03bc471c3eed..a59638b37c1a 100644
--- a/trunk/drivers/xen/Kconfig
+++ b/trunk/drivers/xen/Kconfig
@@ -9,23 +9,6 @@ config XEN_BALLOON
 	  the system to expand the domain's memory allocation, or alternatively
 	  return unneeded memory to the system.
 
-config XEN_SELFBALLOONING
-	bool "Dynamically self-balloon kernel memory to target"
-	depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP
-	default n
-	help
-	  Self-ballooning dynamically balloons available kernel memory driven
-	  by the current usage of anonymous memory ("committed AS") and
-	  controlled by various sysfs-settable parameters.  Configuring
-	  FRONTSWAP is highly recommended; if it is not configured, self-
-	  ballooning is disabled by default but can be enabled with the
-	  'selfballooning' kernel boot parameter.  If FRONTSWAP is configured,
-	  frontswap-selfshrinking is enabled by default but can be disabled
-	  with the 'noselfshrink' kernel boot parameter; and self-ballooning
-	  is enabled by default but can be disabled with the 'noselfballooning'
-	  kernel boot parameter.  Note that systems without a sufficiently
-	  large swap device should not enable self-ballooning.
-
 config XEN_SCRUB_PAGES
 	bool "Scrub pages before returning them to system"
 	depends on XEN_BALLOON
@@ -122,33 +105,4 @@ config SWIOTLB_XEN
 	depends on PCI
 	select SWIOTLB
 
-config XEN_TMEM
-	bool
-	default y if (CLEANCACHE || FRONTSWAP)
-	help
-	  Shim to interface in-kernel Transcendent Memory hooks
-	  (e.g. cleancache and frontswap) to Xen tmem hypercalls.
-
-config XEN_PCIDEV_BACKEND
-	tristate "Xen PCI-device backend driver"
-	depends on PCI && X86 && XEN
-	depends on XEN_BACKEND
-	default m
-	help
-	  The PCI device backend driver allows the kernel to export arbitrary
-	  PCI devices to other guests. If you select this to be a module, you
-	  will need to make sure no other driver has bound to the device(s)
-	  you want to make visible to other guests.
-
-	  The parameter "passthrough" allows you specify how you want the PCI
-	  devices to appear in the guest. You can choose the default (0) where
-	  PCI topology starts at 00.00.0, or (1) for passthrough if you want
-	  the PCI devices topology appear the same as in the host.
-
-	  The "hide" parameter (only applicable if backend driver is compiled
-	  into the kernel) allows you to bind the PCI devices to this module
-	  from the default device drivers. The argument is the list of PCI BDFs:
-	  xen-pciback.hide=(03:00.0)(04:00.0)
-
-	  If in doubt, say m.
 endmenu
diff --git a/trunk/drivers/xen/Makefile b/trunk/drivers/xen/Makefile
index 72bbb27d7a68..bbc18258ecc5 100644
--- a/trunk/drivers/xen/Makefile
+++ b/trunk/drivers/xen/Makefile
@@ -1,5 +1,6 @@
 obj-y	+= grant-table.o features.o events.o manage.o balloon.o
 obj-y	+= xenbus/
+obj-y	+= tmem.o
 
 nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_features.o			:= $(nostackp)
@@ -8,17 +9,14 @@ obj-$(CONFIG_BLOCK)			+= biomerge.o
 obj-$(CONFIG_HOTPLUG_CPU)		+= cpu_hotplug.o
 obj-$(CONFIG_XEN_XENCOMM)		+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)		+= xen-balloon.o
-obj-$(CONFIG_XEN_SELFBALLOONING)	+= xen-selfballoon.o
 obj-$(CONFIG_XEN_DEV_EVTCHN)		+= xen-evtchn.o
 obj-$(CONFIG_XEN_GNTDEV)		+= xen-gntdev.o
 obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
 obj-$(CONFIG_XENFS)			+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
 obj-$(CONFIG_XEN_PLATFORM_PCI)		+= xen-platform-pci.o
-obj-$(CONFIG_XEN_TMEM)			+= tmem.o
 obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
 obj-$(CONFIG_XEN_DOM0)			+= pci.o
-obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= xen-pciback/
 
 xen-evtchn-y				:= evtchn.o
 xen-gntdev-y				:= gntdev.o
diff --git a/trunk/drivers/xen/events.c b/trunk/drivers/xen/events.c
index da70f5c32eb9..30df85d8fca8 100644
--- a/trunk/drivers/xen/events.c
+++ b/trunk/drivers/xen/events.c
@@ -615,6 +615,11 @@ static int find_irq_by_gsi(unsigned gsi)
 	return -1;
 }
 
+int xen_allocate_pirq_gsi(unsigned gsi)
+{
+	return gsi;
+}
+
 /*
  * Do not make any assumptions regarding the relationship between the
  * IRQ number returned here and the Xen pirq argument.
@@ -1688,6 +1693,6 @@ void __init xen_init_IRQ(void)
 	} else {
 		irq_ctx_init(smp_processor_id());
 		if (xen_initial_domain())
-			pci_xen_initial_domain();
+			xen_setup_pirqs();
 	}
 }
diff --git a/trunk/drivers/xen/tmem.c b/trunk/drivers/xen/tmem.c
index d369965e8f8a..816a44959ef0 100644
--- a/trunk/drivers/xen/tmem.c
+++ b/trunk/drivers/xen/tmem.c
@@ -1,7 +1,7 @@
 /*
  * Xen implementation for transcendent memory (tmem)
  *
- * Copyright (C) 2009-2011 Oracle Corp.  All rights reserved.
+ * Copyright (C) 2009-2010 Oracle Corp.  All rights reserved.
  * Author: Dan Magenheimer
  */
 
@@ -9,14 +9,8 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
-#include <linux/module.h>
 #include <linux/cleancache.h>
 
-/* temporary ifdef until include/linux/frontswap.h is upstream */
-#ifdef CONFIG_FRONTSWAP
-#include <linux/frontswap.h>
-#endif
-
 #include <xen/xen.h>
 #include <xen/interface/xen.h>
 #include <asm/xen/hypercall.h>
@@ -128,8 +122,14 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid)
 	return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0);
 }
 
-int tmem_enabled __read_mostly;
-EXPORT_SYMBOL(tmem_enabled);
+static int xen_tmem_destroy_pool(u32 pool_id)
+{
+	struct tmem_oid oid = { { 0 } };
+
+	return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
+}
+
+int tmem_enabled;
 
 static int __init enable_tmem(char *s)
 {
@@ -139,14 +139,6 @@ static int __init enable_tmem(char *s)
 
 __setup("tmem", enable_tmem);
 
-#ifdef CONFIG_CLEANCACHE
-static int xen_tmem_destroy_pool(u32 pool_id)
-{
-	struct tmem_oid oid = { { 0 } };
-
-	return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
-}
-
 /* cleancache ops */
 
 static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key,
@@ -248,156 +240,18 @@ static struct cleancache_ops tmem_cleancache_ops = {
 	.init_shared_fs = tmem_cleancache_init_shared_fs,
 	.init_fs = tmem_cleancache_init_fs
 };
-#endif
-
-#ifdef CONFIG_FRONTSWAP
-/* frontswap tmem operations */
-
-/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
-static int tmem_frontswap_poolid;
-
-/*
- * Swizzling increases objects per swaptype, increasing tmem concurrency
- * for heavy swaploads.  Later, larger nr_cpus -> larger SWIZ_BITS
- */
-#define SWIZ_BITS		4
-#define SWIZ_MASK		((1 << SWIZ_BITS) - 1)
-#define _oswiz(_type, _ind)	((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
-#define iswiz(_ind)		(_ind >> SWIZ_BITS)
-
-static inline struct tmem_oid oswiz(unsigned type, u32 ind)
-{
-	struct tmem_oid oid = { .oid = { 0 } };
-	oid.oid[0] = _oswiz(type, ind);
-	return oid;
-}
-
-/* returns 0 if the page was successfully put into frontswap, -1 if not */
-static int tmem_frontswap_put_page(unsigned type, pgoff_t offset,
-				   struct page *page)
-{
-	u64 ind64 = (u64)offset;
-	u32 ind = (u32)offset;
-	unsigned long pfn = page_to_pfn(page);
-	int pool = tmem_frontswap_poolid;
-	int ret;
-
-	if (pool < 0)
-		return -1;
-	if (ind64 != ind)
-		return -1;
-	mb(); /* ensure page is quiescent; tmem may address it with an alias */
-	ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn);
-	/* translate Xen tmem return values to linux semantics */
-	if (ret == 1)
-		return 0;
-	else
-		return -1;
-}
-
-/*
- * returns 0 if the page was successfully gotten from frontswap, -1 if
- * was not present (should never happen!)
- */
-static int tmem_frontswap_get_page(unsigned type, pgoff_t offset,
-				   struct page *page)
-{
-	u64 ind64 = (u64)offset;
-	u32 ind = (u32)offset;
-	unsigned long pfn = page_to_pfn(page);
-	int pool = tmem_frontswap_poolid;
-	int ret;
-
-	if (pool < 0)
-		return -1;
-	if (ind64 != ind)
-		return -1;
-	ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn);
-	/* translate Xen tmem return values to linux semantics */
-	if (ret == 1)
-		return 0;
-	else
-		return -1;
-}
-
-/* flush a single page from frontswap */
-static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset)
-{
-	u64 ind64 = (u64)offset;
-	u32 ind = (u32)offset;
-	int pool = tmem_frontswap_poolid;
-
-	if (pool < 0)
-		return;
-	if (ind64 != ind)
-		return;
-	(void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind));
-}
-
-/* flush all pages from the passed swaptype */
-static void tmem_frontswap_flush_area(unsigned type)
-{
-	int pool = tmem_frontswap_poolid;
-	int ind;
-
-	if (pool < 0)
-		return;
-	for (ind = SWIZ_MASK; ind >= 0; ind--)
-		(void)xen_tmem_flush_object(pool, oswiz(type, ind));
-}
-
-static void tmem_frontswap_init(unsigned ignored)
-{
-	struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID;
-
-	/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
-	if (tmem_frontswap_poolid < 0)
-		tmem_frontswap_poolid =
-		    xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE);
-}
-
-static int __initdata use_frontswap = 1;
-
-static int __init no_frontswap(char *s)
-{
-	use_frontswap = 0;
-	return 1;
-}
-
-__setup("nofrontswap", no_frontswap);
-
-static struct frontswap_ops tmem_frontswap_ops = {
-	.put_page = tmem_frontswap_put_page,
-	.get_page = tmem_frontswap_get_page,
-	.flush_page = tmem_frontswap_flush_page,
-	.flush_area = tmem_frontswap_flush_area,
-	.init = tmem_frontswap_init
-};
-#endif
 
 static int __init xen_tmem_init(void)
 {
+	struct cleancache_ops old_ops;
+
 	if (!xen_domain())
 		return 0;
-#ifdef CONFIG_FRONTSWAP
-	if (tmem_enabled && use_frontswap) {
-		char *s = "";
-		struct frontswap_ops old_ops =
-			frontswap_register_ops(&tmem_frontswap_ops);
-
-		tmem_frontswap_poolid = -1;
-		if (old_ops.init != NULL)
-			s = " (WARNING: frontswap_ops overridden)";
-		printk(KERN_INFO "frontswap enabled, RAM provided by "
-				 "Xen Transcendent Memory\n");
-	}
-#endif
 #ifdef CONFIG_CLEANCACHE
 	BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
 	if (tmem_enabled && use_cleancache) {
 		char *s = "";
-		struct cleancache_ops old_ops =
-			cleancache_register_ops(&tmem_cleancache_ops);
+		old_ops = cleancache_register_ops(&tmem_cleancache_ops);
 		if (old_ops.init_fs != NULL)
 			s = " (WARNING: cleancache_ops overridden)";
 		printk(KERN_INFO "cleancache enabled, RAM provided by "
diff --git a/trunk/drivers/xen/xen-balloon.c b/trunk/drivers/xen/xen-balloon.c
index 5c9dc43c1e94..a4ff225ee868 100644
--- a/trunk/drivers/xen/xen-balloon.c
+++ b/trunk/drivers/xen/xen-balloon.c
@@ -98,8 +98,6 @@ static int __init balloon_init(void)
 
 	register_balloon(&balloon_sysdev);
 
-	register_xen_selfballooning(&balloon_sysdev);
-
 	target_watch.callback = watch_target;
 	xenstore_notifier.notifier_call = balloon_init_watcher;
 
diff --git a/trunk/drivers/xen/xen-pciback/Makefile b/trunk/drivers/xen/xen-pciback/Makefile
deleted file mode 100644
index ffe0ad3438bd..000000000000
--- a/trunk/drivers/xen/xen-pciback/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o
-
-xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o
-xen-pciback-y += conf_space.o conf_space_header.o \
-		 conf_space_capability.o \
-		 conf_space_quirks.o vpci.o \
-		 passthrough.o
diff --git a/trunk/drivers/xen/xen-pciback/conf_space.c b/trunk/drivers/xen/xen-pciback/conf_space.c
deleted file mode 100644
index a8031445d94e..000000000000
--- a/trunk/drivers/xen/xen-pciback/conf_space.c
+++ /dev/null
@@ -1,438 +0,0 @@
-/*
- * PCI Backend - Functions for creating a virtual configuration space for
- *               exported PCI Devices.
- *               It's dangerous to allow PCI Driver Domains to change their
- *               device's resources (memory, i/o ports, interrupts). We need to
- *               restrict changes to certain PCI Configuration registers:
- *               BARs, INTERRUPT_PIN, most registers in the header...
- *
- * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include "pciback.h"
-#include "conf_space.h"
-#include "conf_space_quirks.h"
-
-#define DRV_NAME	"xen-pciback"
-static int permissive;
-module_param(permissive, bool, 0644);
-
-/* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word,
- * xen_pcibk_write_config_word, and xen_pcibk_write_config_byte are created. */
-#define DEFINE_PCI_CONFIG(op, size, type)			\
-int xen_pcibk_##op##_config_##size				\
-(struct pci_dev *dev, int offset, type value, void *data)	\
-{								\
-	return pci_##op##_config_##size(dev, offset, value);	\
-}
-
-DEFINE_PCI_CONFIG(read, byte, u8 *)
-DEFINE_PCI_CONFIG(read, word, u16 *)
-DEFINE_PCI_CONFIG(read, dword, u32 *)
-
-DEFINE_PCI_CONFIG(write, byte, u8)
-DEFINE_PCI_CONFIG(write, word, u16)
-DEFINE_PCI_CONFIG(write, dword, u32)
-
-static int conf_space_read(struct pci_dev *dev,
-			   const struct config_field_entry *entry,
-			   int offset, u32 *value)
-{
-	int ret = 0;
-	const struct config_field *field = entry->field;
-
-	*value = 0;
-
-	switch (field->size) {
-	case 1:
-		if (field->u.b.read)
-			ret = field->u.b.read(dev, offset, (u8 *) value,
-					      entry->data);
-		break;
-	case 2:
-		if (field->u.w.read)
-			ret = field->u.w.read(dev, offset, (u16 *) value,
-					      entry->data);
-		break;
-	case 4:
-		if (field->u.dw.read)
-			ret = field->u.dw.read(dev, offset, value, entry->data);
-		break;
-	}
-	return ret;
-}
-
-static int conf_space_write(struct pci_dev *dev,
-			    const struct config_field_entry *entry,
-			    int offset, u32 value)
-{
-	int ret = 0;
-	const struct config_field *field = entry->field;
-
-	switch (field->size) {
-	case 1:
-		if (field->u.b.write)
-			ret = field->u.b.write(dev, offset, (u8) value,
-					       entry->data);
-		break;
-	case 2:
-		if (field->u.w.write)
-			ret = field->u.w.write(dev, offset, (u16) value,
-					       entry->data);
-		break;
-	case 4:
-		if (field->u.dw.write)
-			ret = field->u.dw.write(dev, offset, value,
-						entry->data);
-		break;
-	}
-	return ret;
-}
-
-static inline u32 get_mask(int size)
-{
-	if (size == 1)
-		return 0xff;
-	else if (size == 2)
-		return 0xffff;
-	else
-		return 0xffffffff;
-}
-
-static inline int valid_request(int offset, int size)
-{
-	/* Validate request (no un-aligned requests) */
-	if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
-		return 1;
-	return 0;
-}
-
-static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
-			      int offset)
-{
-	if (offset >= 0) {
-		new_val_mask <<= (offset * 8);
-		new_val <<= (offset * 8);
-	} else {
-		new_val_mask >>= (offset * -8);
-		new_val >>= (offset * -8);
-	}
-	val = (val & ~new_val_mask) | (new_val & new_val_mask);
-
-	return val;
-}
-
-static int pcibios_err_to_errno(int err)
-{
-	switch (err) {
-	case PCIBIOS_SUCCESSFUL:
-		return XEN_PCI_ERR_success;
-	case PCIBIOS_DEVICE_NOT_FOUND:
-		return XEN_PCI_ERR_dev_not_found;
-	case PCIBIOS_BAD_REGISTER_NUMBER:
-		return XEN_PCI_ERR_invalid_offset;
-	case PCIBIOS_FUNC_NOT_SUPPORTED:
-		return XEN_PCI_ERR_not_implemented;
-	case PCIBIOS_SET_FAILED:
-		return XEN_PCI_ERR_access_denied;
-	}
-	return err;
-}
-
-int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
-			  u32 *ret_val)
-{
-	int err = 0;
-	struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
-	const struct config_field_entry *cfg_entry;
-	const struct config_field *field;
-	int req_start, req_end, field_start, field_end;
-	/* if read fails for any reason, return 0
-	 * (as if device didn't respond) */
-	u32 value = 0, tmp_val;
-
-	if (unlikely(verbose_request))
-		printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n",
-		       pci_name(dev), size, offset);
-
-	if (!valid_request(offset, size)) {
-		err = XEN_PCI_ERR_invalid_offset;
-		goto out;
-	}
-
-	/* Get the real value first, then modify as appropriate */
-	switch (size) {
-	case 1:
-		err = pci_read_config_byte(dev, offset, (u8 *) &value);
-		break;
-	case 2:
-		err = pci_read_config_word(dev, offset, (u16 *) &value);
-		break;
-	case 4:
-		err = pci_read_config_dword(dev, offset, &value);
-		break;
-	}
-
-	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-		field = cfg_entry->field;
-
-		req_start = offset;
-		req_end = offset + size;
-		field_start = OFFSET(cfg_entry);
-		field_end = OFFSET(cfg_entry) + field->size;
-
-		if ((req_start >= field_start && req_start < field_end)
-		    || (req_end > field_start && req_end <= field_end)) {
-			err = conf_space_read(dev, cfg_entry, field_start,
-					      &tmp_val);
-			if (err)
-				goto out;
-
-			value = merge_value(value, tmp_val,
-					    get_mask(field->size),
-					    field_start - req_start);
-		}
-	}
-
-out:
-	if (unlikely(verbose_request))
-		printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x = %x\n",
-		       pci_name(dev), size, offset, value);
-
-	*ret_val = value;
-	return pcibios_err_to_errno(err);
-}
-
-int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
-{
-	int err = 0, handled = 0;
-	struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
-	const struct config_field_entry *cfg_entry;
-	const struct config_field *field;
-	u32 tmp_val;
-	int req_start, req_end, field_start, field_end;
-
-	if (unlikely(verbose_request))
-		printk(KERN_DEBUG
-		       DRV_NAME ": %s: write request %d bytes at 0x%x = %x\n",
-		       pci_name(dev), size, offset, value);
-
-	if (!valid_request(offset, size))
-		return XEN_PCI_ERR_invalid_offset;
-
-	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-		field = cfg_entry->field;
-
-		req_start = offset;
-		req_end = offset + size;
-		field_start = OFFSET(cfg_entry);
-		field_end = OFFSET(cfg_entry) + field->size;
-
-		if ((req_start >= field_start && req_start < field_end)
-		    || (req_end > field_start && req_end <= field_end)) {
-			tmp_val = 0;
-
-			err = xen_pcibk_config_read(dev, field_start,
-						  field->size, &tmp_val);
-			if (err)
-				break;
-
-			tmp_val = merge_value(tmp_val, value, get_mask(size),
-					      req_start - field_start);
-
-			err = conf_space_write(dev, cfg_entry, field_start,
-					       tmp_val);
-
-			/* handled is set true here, but not every byte
-			 * may have been written! Properly detecting if
-			 * every byte is handled is unnecessary as the
-			 * flag is used to detect devices that need
-			 * special helpers to work correctly.
-			 */
-			handled = 1;
-		}
-	}
-
-	if (!handled && !err) {
-		/* By default, anything not specificially handled above is
-		 * read-only. The permissive flag changes this behavior so
-		 * that anything not specifically handled above is writable.
-		 * This means that some fields may still be read-only because
-		 * they have entries in the config_field list that intercept
-		 * the write and do nothing. */
-		if (dev_data->permissive || permissive) {
-			switch (size) {
-			case 1:
-				err = pci_write_config_byte(dev, offset,
-							    (u8) value);
-				break;
-			case 2:
-				err = pci_write_config_word(dev, offset,
-							    (u16) value);
-				break;
-			case 4:
-				err = pci_write_config_dword(dev, offset,
-							     (u32) value);
-				break;
-			}
-		} else if (!dev_data->warned_on_write) {
-			dev_data->warned_on_write = 1;
-			dev_warn(&dev->dev, "Driver tried to write to a "
-				 "read-only configuration space field at offset"
-				 " 0x%x, size %d. This may be harmless, but if "
-				 "you have problems with your device:\n"
-				 "1) see permissive attribute in sysfs\n"
-				 "2) report problems to the xen-devel "
-				 "mailing list along with details of your "
-				 "device obtained from lspci.\n", offset, size);
-		}
-	}
-
-	return pcibios_err_to_errno(err);
-}
-
-void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev)
-{
-	struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
-	struct config_field_entry *cfg_entry, *t;
-	const struct config_field *field;
-
-	dev_dbg(&dev->dev, "free-ing dynamically allocated virtual "
-			   "configuration space fields\n");
-	if (!dev_data)
-		return;
-
-	list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
-		field = cfg_entry->field;
-
-		if (field->clean) {
-			field->clean((struct config_field *)field);
-
-			kfree(cfg_entry->data);
-
-			list_del(&cfg_entry->list);
-			kfree(cfg_entry);
-		}
-
-	}
-}
-
-void xen_pcibk_config_reset_dev(struct pci_dev *dev)
-{
-	struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
-	const struct config_field_entry *cfg_entry;
-	const struct config_field *field;
-
-	dev_dbg(&dev->dev, "resetting virtual configuration space\n");
-	if (!dev_data)
-		return;
-
-	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-		field = cfg_entry->field;
-
-		if (field->reset)
-			field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
-	}
-}
-
-void xen_pcibk_config_free_dev(struct pci_dev *dev)
-{
-	struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
-	struct config_field_entry *cfg_entry, *t;
-	const struct config_field *field;
-
-	dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
-	if (!dev_data)
-		return;
-
-	list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
-		list_del(&cfg_entry->list);
-
-		field = cfg_entry->field;
-
-		if (field->release)
-			field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
-
-		kfree(cfg_entry);
-	}
-}
-
-int xen_pcibk_config_add_field_offset(struct pci_dev *dev,
-				    const struct config_field *field,
-				    unsigned int base_offset)
-{
-	int err = 0;
-	struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
-	struct config_field_entry *cfg_entry;
-	void *tmp;
-
-	cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
-	if (!cfg_entry) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	cfg_entry->data = NULL;
-	cfg_entry->field = field;
-	cfg_entry->base_offset = base_offset;
-
-	/* silently ignore duplicate fields */
-	err = xen_pcibk_field_is_dup(dev, OFFSET(cfg_entry));
-	if (err)
-		goto out;
-
-	if (field->init) {
-		tmp = field->init(dev, OFFSET(cfg_entry));
-
-		if (IS_ERR(tmp)) {
-			err = PTR_ERR(tmp);
-			goto out;
-		}
-
-		cfg_entry->data = tmp;
-	}
-
-	dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
-		OFFSET(cfg_entry));
-	list_add_tail(&cfg_entry->list, &dev_data->config_fields);
-
-out:
-	if (err)
-		kfree(cfg_entry);
-
-	return err;
-}
-
-/* This sets up the device's virtual configuration space to keep track of
- * certain registers (like the base address registers (BARs) so that we can
- * keep the client from manipulating them directly.
- */
-int xen_pcibk_config_init_dev(struct pci_dev *dev)
-{
-	int err = 0;
-	struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
-
-	dev_dbg(&dev->dev, "initializing virtual configuration space\n");
-
-	INIT_LIST_HEAD(&dev_data->config_fields);
-
-	err = xen_pcibk_config_header_add_fields(dev);
-	if (err)
-		goto out;
-
-	err = xen_pcibk_config_capability_add_fields(dev);
-	if (err)
-		goto out;
-
-	err = xen_pcibk_config_quirks_init(dev);
-
-out:
-	return err;
-}
-
-int xen_pcibk_config_init(void)
-{
-	return xen_pcibk_config_capability_init();
-}
diff --git a/trunk/drivers/xen/xen-pciback/conf_space.h b/trunk/drivers/xen/xen-pciback/conf_space.h
deleted file mode 100644
index e56c934ad137..000000000000
--- a/trunk/drivers/xen/xen-pciback/conf_space.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * PCI Backend - Common data structures for overriding the configuration space
- *
- * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
- */
-
-#ifndef __XEN_PCIBACK_CONF_SPACE_H__
-#define __XEN_PCIBACK_CONF_SPACE_H__
-
-#include <linux/list.h>
-#include <linux/err.h>
-
-/* conf_field_init can return an errno in a ptr with ERR_PTR() */
-typedef void *(*conf_field_init) (struct pci_dev *dev, int offset);
-typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data);
-typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data);
-
-typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value,
-				 void *data);
-typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value,
-				void *data);
-typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value,
-				void *data);
-typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value,
-				void *data);
-typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value,
-			       void *data);
-typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value,
-			       void *data);
-
-/* These are the fields within the configuration space which we
- * are interested in intercepting reads/writes to and changing their
- * values.
- */
-struct config_field {
-	unsigned int offset;
-	unsigned int size;
-	unsigned int mask;
-	conf_field_init init;
-	conf_field_reset reset;
-	conf_field_free release;
-	void (*clean) (struct config_field *field);
-	union {
-		struct {
-			conf_dword_write write;
-			conf_dword_read read;
-		} dw;
-		struct {
-			conf_word_write write;
-			conf_word_read read;
-		} w;
-		struct {
-			conf_byte_write write;
-			conf_byte_read read;
-		} b;
-	} u;
-	struct list_head list;
-};
-
-struct config_field_entry {
-	struct list_head list;
-	const struct config_field *field;
-	unsigned int base_offset;
-	void *data;
-};
-
-#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
-
-/* Add fields to a device - the add_fields macro expects to get a pointer to
- * the first entry in an array (of which the ending is marked by size==0)
- */
-int xen_pcibk_config_add_field_offset(struct pci_dev *dev,
-				    const struct config_field *field,
-				    unsigned int offset);
-
-static inline int xen_pcibk_config_add_field(struct pci_dev *dev,
-					   const struct config_field *field)
-{
-	return xen_pcibk_config_add_field_offset(dev, field, 0);
-}
-
-static inline int xen_pcibk_config_add_fields(struct pci_dev *dev,
-					    const struct config_field *field)
-{
-	int i, err = 0;
-	for (i = 0; field[i].size != 0; i++) {
-		err = xen_pcibk_config_add_field(dev, &field[i]);
-		if (err)
-			break;
-	}
-	return err;
-}
-
-static inline int xen_pcibk_config_add_fields_offset(struct pci_dev *dev,
-					const struct config_field *field,
-					unsigned int offset)
-{
-	int i, err = 0;
-	for (i = 0; field[i].size != 0; i++) {
-		err = xen_pcibk_config_add_field_offset(dev, &field[i], offset);
-		if (err)
-			break;
-	}
-	return err;
-}
-
-/* Read/Write the real configuration space */
-int xen_pcibk_read_config_byte(struct pci_dev *dev, int offset, u8 *value,
-			       void *data);
-int xen_pcibk_read_config_word(struct pci_dev *dev, int offset, u16 *value,
-			       void *data);
-int xen_pcibk_read_config_dword(struct pci_dev *dev, int offset, u32 *value,
-				void *data);
-int xen_pcibk_write_config_byte(struct pci_dev *dev, int offset, u8 value,
-				 void *data);
-int xen_pcibk_write_config_word(struct pci_dev *dev, int offset, u16 value,
-				void *data);
-int xen_pcibk_write_config_dword(struct pci_dev *dev, int offset, u32 value,
-				 void *data);
-
-int xen_pcibk_config_capability_init(void);
-
-int xen_pcibk_config_header_add_fields(struct pci_dev *dev);
-int xen_pcibk_config_capability_add_fields(struct pci_dev *dev);
-
-#endif				/* __XEN_PCIBACK_CONF_SPACE_H__ */
diff --git a/trunk/drivers/xen/xen-pciback/conf_space_capability.c b/trunk/drivers/xen/xen-pciback/conf_space_capability.c
deleted file mode 100644
index 7f83e9083e9d..000000000000
--- a/trunk/drivers/xen/xen-pciback/conf_space_capability.c
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * PCI Backend - Handles the virtual fields found on the capability lists
- *               in the configuration space.
- *
- * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include "pciback.h"
-#include "conf_space.h"
-
-static LIST_HEAD(capabilities);
-struct xen_pcibk_config_capability {
-	struct list_head cap_list;
-
-	int capability;
-
-	/* If the device has the capability found above, add these fields */
-	const struct config_field *fields;
-};
-
-static const struct config_field caplist_header[] = {
-	{
-	 .offset    = PCI_CAP_LIST_ID,
-	 .size      = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
-	 .u.w.read  = xen_pcibk_read_config_word,
-	 .u.w.write = NULL,
-	},
-	{}
-};
-
-static inline void register_capability(struct xen_pcibk_config_capability *cap)
-{
-	list_add_tail(&cap->cap_list, &capabilities);
-}
-
-int xen_pcibk_config_capability_add_fields(struct pci_dev *dev)
-{
-	int err = 0;
-	struct xen_pcibk_config_capability *cap;
-	int cap_offset;
-
-	list_for_each_entry(cap, &capabilities, cap_list) {
-		cap_offset = pci_find_capability(dev, cap->capability);
-		if (cap_offset) {
-			dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
-				cap->capability, cap_offset);
-
-			err = xen_pcibk_config_add_fields_offset(dev,
-							       caplist_header,
-							       cap_offset);
-			if (err)
-				goto out;
-			err = xen_pcibk_config_add_fields_offset(dev,
-							       cap->fields,
-							       cap_offset);
-			if (err)
-				goto out;
-		}
-	}
-
-out:
-	return err;
-}
-
-static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
-			     void *data)
-{
-	/* Disallow writes to the vital product data */
-	if (value & PCI_VPD_ADDR_F)
-		return PCIBIOS_SET_FAILED;
-	else
-		return pci_write_config_word(dev, offset, value);
-}
-
-static const struct config_field caplist_vpd[] = {
-	{
-	 .offset    = PCI_VPD_ADDR,
-	 .size      = 2,
-	 .u.w.read  = xen_pcibk_read_config_word,
-	 .u.w.write = vpd_address_write,
-	 },
-	{
-	 .offset     = PCI_VPD_DATA,
-	 .size       = 4,
-	 .u.dw.read  = xen_pcibk_read_config_dword,
-	 .u.dw.write = NULL,
-	 },
-	{}
-};
-
-static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
-			void *data)
-{
-	int err;
-	u16 real_value;
-
-	err = pci_read_config_word(dev, offset, &real_value);
-	if (err)
-		goto out;
-
-	*value = real_value & ~PCI_PM_CAP_PME_MASK;
-
-out:
-	return err;
-}
-
-/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
- * Can't allow driver domain to enable PMEs - they're shared */
-#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
-
-static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
-			 void *data)
-{
-	int err;
-	u16 old_value;
-	pci_power_t new_state, old_state;
-
-	err = pci_read_config_word(dev, offset, &old_value);
-	if (err)
-		goto out;
-
-	old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
-	new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
-
-	new_value &= PM_OK_BITS;
-	if ((old_value & PM_OK_BITS) != new_value) {
-		new_value = (old_value & ~PM_OK_BITS) | new_value;
-		err = pci_write_config_word(dev, offset, new_value);
-		if (err)
-			goto out;
-	}
-
-	/* Let pci core handle the power management change */
-	dev_dbg(&dev->dev, "set power state to %x\n", new_state);
-	err = pci_set_power_state(dev, new_state);
-	if (err) {
-		err = PCIBIOS_SET_FAILED;
-		goto out;
-	}
-
- out:
-	return err;
-}
-
-/* Ensure PMEs are disabled */
-static void *pm_ctrl_init(struct pci_dev *dev, int offset)
-{
-	int err;
-	u16 value;
-
-	err = pci_read_config_word(dev, offset, &value);
-	if (err)
-		goto out;
-
-	if (value & PCI_PM_CTRL_PME_ENABLE) {
-		value &= ~PCI_PM_CTRL_PME_ENABLE;
-		err = pci_write_config_word(dev, offset, value);
-	}
-
-out:
-	return ERR_PTR(err);
-}
-
-static const struct config_field caplist_pm[] = {
-	{
-		.offset     = PCI_PM_PMC,
-		.size       = 2,
-		.u.w.read   = pm_caps_read,
-	},
-	{
-		.offset     = PCI_PM_CTRL,
-		.size       = 2,
-		.init       = pm_ctrl_init,
-		.u.w.read   = xen_pcibk_read_config_word,
-		.u.w.write  = pm_ctrl_write,
-	},
-	{
-		.offset     = PCI_PM_PPB_EXTENSIONS,
-		.size       = 1,
-		.u.b.read   = xen_pcibk_read_config_byte,
-	},
-	{
-		.offset     = PCI_PM_DATA_REGISTER,
-		.size       = 1,
-		.u.b.read   = xen_pcibk_read_config_byte,
-	},
-	{}
-};
-
-static struct xen_pcibk_config_capability xen_pcibk_config_capability_pm = {
-	.capability = PCI_CAP_ID_PM,
-	.fields = caplist_pm,
-};
-static struct xen_pcibk_config_capability xen_pcibk_config_capability_vpd = {
-	.capability = PCI_CAP_ID_VPD,
-	.fields = caplist_vpd,
-};
-
-int xen_pcibk_config_capability_init(void)
-{
-	register_capability(&xen_pcibk_config_capability_vpd);
-	register_capability(&xen_pcibk_config_capability_pm);
-
-	return 0;
-}
diff --git a/trunk/drivers/xen/xen-pciback/conf_space_header.c b/trunk/drivers/xen/xen-pciback/conf_space_header.c
deleted file mode 100644
index da3cbdfcb5dc..000000000000
--- a/trunk/drivers/xen/xen-pciback/conf_space_header.c
+++ /dev/null
@@ -1,386 +0,0 @@
-/*
- * PCI Backend - Handles the virtual fields in the configuration space headers.
- *
- * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include "pciback.h"
-#include "conf_space.h"
-
-struct pci_bar_info {
-	u32 val;
-	u32 len_val;
-	int which;
-};
-
-#define DRV_NAME	"xen-pciback"
-#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
-#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
-
-static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
-{
-	int i;
-	int ret;
-
-	ret = xen_pcibk_read_config_word(dev, offset, value, data);
-	if (!atomic_read(&dev->enable_cnt))
-		return ret;
-
-	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
-		if (dev->resource[i].flags & IORESOURCE_IO)
-			*value |= PCI_COMMAND_IO;
-		if (dev->resource[i].flags & IORESOURCE_MEM)
-			*value |= PCI_COMMAND_MEMORY;
-	}
-
-	return ret;
-}
-
-static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
-{
-	struct xen_pcibk_dev_data *dev_data;
-	int err;
-
-	dev_data = pci_get_drvdata(dev);
-	if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
-		if (unlikely(verbose_request))
-			printk(KERN_DEBUG DRV_NAME ": %s: enable\n",
-			       pci_name(dev));
-		err = pci_enable_device(dev);
-		if (err)
-			return err;
-		if (dev_data)
-			dev_data->enable_intx = 1;
-	} else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
-		if (unlikely(verbose_request))
-			printk(KERN_DEBUG DRV_NAME ": %s: disable\n",
-			       pci_name(dev));
-		pci_disable_device(dev);
-		if (dev_data)
-			dev_data->enable_intx = 0;
-	}
-
-	if (!dev->is_busmaster && is_master_cmd(value)) {
-		if (unlikely(verbose_request))
-			printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n",
-			       pci_name(dev));
-		pci_set_master(dev);
-	}
-
-	if (value & PCI_COMMAND_INVALIDATE) {
-		if (unlikely(verbose_request))
-			printk(KERN_DEBUG
-			       DRV_NAME ": %s: enable memory-write-invalidate\n",
-			       pci_name(dev));
-		err = pci_set_mwi(dev);
-		if (err) {
-			printk(KERN_WARNING
-			       DRV_NAME ": %s: cannot enable "
-			       "memory-write-invalidate (%d)\n",
-			       pci_name(dev), err);
-			value &= ~PCI_COMMAND_INVALIDATE;
-		}
-	}
-
-	return pci_write_config_word(dev, offset, value);
-}
-
-static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
-{
-	struct pci_bar_info *bar = data;
-
-	if (unlikely(!bar)) {
-		printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n",
-		       pci_name(dev));
-		return XEN_PCI_ERR_op_failed;
-	}
-
-	/* A write to obtain the length must happen as a 32-bit write.
-	 * This does not (yet) support writing individual bytes
-	 */
-	if (value == ~PCI_ROM_ADDRESS_ENABLE)
-		bar->which = 1;
-	else {
-		u32 tmpval;
-		pci_read_config_dword(dev, offset, &tmpval);
-		if (tmpval != bar->val && value == bar->val) {
-			/* Allow restoration of bar value. */
-			pci_write_config_dword(dev, offset, bar->val);
-		}
-		bar->which = 0;
-	}
-
-	/* Do we need to support enabling/disabling the rom address here? */
-
-	return 0;
-}
-
-/* For the BARs, only allow writes which write ~0 or
- * the correct resource information
- * (Needed for when the driver probes the resource usage)
- */
-static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
-{
-	struct pci_bar_info *bar = data;
-
-	if (unlikely(!bar)) {
-		printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n",
-		       pci_name(dev));
-		return XEN_PCI_ERR_op_failed;
-	}
-
-	/* A write to obtain the length must happen as a 32-bit write.
-	 * This does not (yet) support writing individual bytes
-	 */
-	if (value == ~0)
-		bar->which = 1;
-	else {
-		u32 tmpval;
-		pci_read_config_dword(dev, offset, &tmpval);
-		if (tmpval != bar->val && value == bar->val) {
-			/* Allow restoration of bar value. */
-			pci_write_config_dword(dev, offset, bar->val);
-		}
-		bar->which = 0;
-	}
-
-	return 0;
-}
-
-static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
-{
-	struct pci_bar_info *bar = data;
-
-	if (unlikely(!bar)) {
-		printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n",
-		       pci_name(dev));
-		return XEN_PCI_ERR_op_failed;
-	}
-
-	*value = bar->which ? bar->len_val : bar->val;
-
-	return 0;
-}
-
-static inline void read_dev_bar(struct pci_dev *dev,
-				struct pci_bar_info *bar_info, int offset,
-				u32 len_mask)
-{
-	int	pos;
-	struct resource	*res = dev->resource;
-
-	if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1)
-		pos = PCI_ROM_RESOURCE;
-	else {
-		pos = (offset - PCI_BASE_ADDRESS_0) / 4;
-		if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE |
-				PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
-			   (PCI_BASE_ADDRESS_SPACE_MEMORY |
-				PCI_BASE_ADDRESS_MEM_TYPE_64))) {
-			bar_info->val = res[pos - 1].start >> 32;
-			bar_info->len_val = res[pos - 1].end >> 32;
-			return;
-		}
-	}
-
-	bar_info->val = res[pos].start |
-			(res[pos].flags & PCI_REGION_FLAG_MASK);
-	bar_info->len_val = res[pos].end - res[pos].start + 1;
-}
-
-static void *bar_init(struct pci_dev *dev, int offset)
-{
-	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
-
-	if (!bar)
-		return ERR_PTR(-ENOMEM);
-
-	read_dev_bar(dev, bar, offset, ~0);
-	bar->which = 0;
-
-	return bar;
-}
-
-static void *rom_init(struct pci_dev *dev, int offset)
-{
-	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
-
-	if (!bar)
-		return ERR_PTR(-ENOMEM);
-
-	read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
-	bar->which = 0;
-
-	return bar;
-}
-
-static void bar_reset(struct pci_dev *dev, int offset, void *data)
-{
-	struct pci_bar_info *bar = data;
-
-	bar->which = 0;
-}
-
-static void bar_release(struct pci_dev *dev, int offset, void *data)
-{
-	kfree(data);
-}
-
-static int xen_pcibk_read_vendor(struct pci_dev *dev, int offset,
-			       u16 *value, void *data)
-{
-	*value = dev->vendor;
-
-	return 0;
-}
-
-static int xen_pcibk_read_device(struct pci_dev *dev, int offset,
-			       u16 *value, void *data)
-{
-	*value = dev->device;
-
-	return 0;
-}
-
-static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
-			  void *data)
-{
-	*value = (u8) dev->irq;
-
-	return 0;
-}
-
-static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
-{
-	u8 cur_value;
-	int err;
-
-	err = pci_read_config_byte(dev, offset, &cur_value);
-	if (err)
-		goto out;
-
-	if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
-	    || value == PCI_BIST_START)
-		err = pci_write_config_byte(dev, offset, value);
-
-out:
-	return err;
-}
-
-static const struct config_field header_common[] = {
-	{
-	 .offset    = PCI_VENDOR_ID,
-	 .size      = 2,
-	 .u.w.read  = xen_pcibk_read_vendor,
-	},
-	{
-	 .offset    = PCI_DEVICE_ID,
-	 .size      = 2,
-	 .u.w.read  = xen_pcibk_read_device,
-	},
-	{
-	 .offset    = PCI_COMMAND,
-	 .size      = 2,
-	 .u.w.read  = command_read,
-	 .u.w.write = command_write,
-	},
-	{
-	 .offset    = PCI_INTERRUPT_LINE,
-	 .size      = 1,
-	 .u.b.read  = interrupt_read,
-	},
-	{
-	 .offset    = PCI_INTERRUPT_PIN,
-	 .size      = 1,
-	 .u.b.read  = xen_pcibk_read_config_byte,
-	},
-	{
-	 /* Any side effects of letting driver domain control cache line? */
-	 .offset    = PCI_CACHE_LINE_SIZE,
-	 .size      = 1,
-	 .u.b.read  = xen_pcibk_read_config_byte,
-	 .u.b.write = xen_pcibk_write_config_byte,
-	},
-	{
-	 .offset    = PCI_LATENCY_TIMER,
-	 .size      = 1,
-	 .u.b.read  = xen_pcibk_read_config_byte,
-	},
-	{
-	 .offset    = PCI_BIST,
-	 .size      = 1,
-	 .u.b.read  = xen_pcibk_read_config_byte,
-	 .u.b.write = bist_write,
-	},
-	{}
-};
-
-#define CFG_FIELD_BAR(reg_offset)			\
-	{						\
-	.offset     = reg_offset,			\
-	.size       = 4,				\
-	.init       = bar_init,				\
-	.reset      = bar_reset,			\
-	.release    = bar_release,			\
-	.u.dw.read  = bar_read,				\
-	.u.dw.write = bar_write,			\
-	}
-
-#define CFG_FIELD_ROM(reg_offset)			\
-	{						\
-	.offset     = reg_offset,			\
-	.size       = 4,				\
-	.init       = rom_init,				\
-	.reset      = bar_reset,			\
-	.release    = bar_release,			\
-	.u.dw.read  = bar_read,				\
-	.u.dw.write = rom_write,			\
-	}
-
-static const struct config_field header_0[] = {
-	CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
-	CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
-	CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
-	CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
-	CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
-	CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
-	CFG_FIELD_ROM(PCI_ROM_ADDRESS),
-	{}
-};
-
-static const struct config_field header_1[] = {
-	CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
-	CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
-	CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
-	{}
-};
-
-int xen_pcibk_config_header_add_fields(struct pci_dev *dev)
-{
-	int err;
-
-	err = xen_pcibk_config_add_fields(dev, header_common);
-	if (err)
-		goto out;
-
-	switch (dev->hdr_type) {
-	case PCI_HEADER_TYPE_NORMAL:
-		err = xen_pcibk_config_add_fields(dev, header_0);
-		break;
-
-	case PCI_HEADER_TYPE_BRIDGE:
-		err = xen_pcibk_config_add_fields(dev, header_1);
-		break;
-
-	default:
-		err = -EINVAL;
-		printk(KERN_ERR DRV_NAME ": %s: Unsupported header type %d!\n",
-		       pci_name(dev), dev->hdr_type);
-		break;
-	}
-
-out:
-	return err;
-}
diff --git a/trunk/drivers/xen/xen-pciback/conf_space_quirks.c b/trunk/drivers/xen/xen-pciback/conf_space_quirks.c
deleted file mode 100644
index 921a889e65eb..000000000000
--- a/trunk/drivers/xen/xen-pciback/conf_space_quirks.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * PCI Backend - Handle special overlays for broken devices.
- *
- * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
- * Author: Chris Bookholt <hap10@epoch.ncsc.mil>
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include "pciback.h"
-#include "conf_space.h"
-#include "conf_space_quirks.h"
-
-LIST_HEAD(xen_pcibk_quirks);
-#define	DRV_NAME	"xen-pciback"
-static inline const struct pci_device_id *
-match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
-{
-	if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
-	    (id->device == PCI_ANY_ID || id->device == dev->device) &&
-	    (id->subvendor == PCI_ANY_ID ||
-				id->subvendor == dev->subsystem_vendor) &&
-	    (id->subdevice == PCI_ANY_ID ||
-				id->subdevice == dev->subsystem_device) &&
-	    !((id->class ^ dev->class) & id->class_mask))
-		return id;
-	return NULL;
-}
-
-static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev)
-{
-	struct xen_pcibk_config_quirk *tmp_quirk;
-
-	list_for_each_entry(tmp_quirk, &xen_pcibk_quirks, quirks_list)
-		if (match_one_device(&tmp_quirk->devid, dev) != NULL)
-			goto out;
-	tmp_quirk = NULL;
-	printk(KERN_DEBUG DRV_NAME
-	       ":quirk didn't match any device xen_pciback knows about\n");
-out:
-	return tmp_quirk;
-}
-
-static inline void register_quirk(struct xen_pcibk_config_quirk *quirk)
-{
-	list_add_tail(&quirk->quirks_list, &xen_pcibk_quirks);
-}
-
-int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg)
-{
-	int ret = 0;
-	struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
-	struct config_field_entry *cfg_entry;
-
-	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-		if (OFFSET(cfg_entry) == reg) {
-			ret = 1;
-			break;
-		}
-	}
-	return ret;
-}
-
-int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field
-				    *field)
-{
-	int err = 0;
-
-	switch (field->size) {
-	case 1:
-		field->u.b.read = xen_pcibk_read_config_byte;
-		field->u.b.write = xen_pcibk_write_config_byte;
-		break;
-	case 2:
-		field->u.w.read = xen_pcibk_read_config_word;
-		field->u.w.write = xen_pcibk_write_config_word;
-		break;
-	case 4:
-		field->u.dw.read = xen_pcibk_read_config_dword;
-		field->u.dw.write = xen_pcibk_write_config_dword;
-		break;
-	default:
-		err = -EINVAL;
-		goto out;
-	}
-
-	xen_pcibk_config_add_field(dev, field);
-
-out:
-	return err;
-}
-
-int xen_pcibk_config_quirks_init(struct pci_dev *dev)
-{
-	struct xen_pcibk_config_quirk *quirk;
-	int ret = 0;
-
-	quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
-	if (!quirk) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	quirk->devid.vendor = dev->vendor;
-	quirk->devid.device = dev->device;
-	quirk->devid.subvendor = dev->subsystem_vendor;
-	quirk->devid.subdevice = dev->subsystem_device;
-	quirk->devid.class = 0;
-	quirk->devid.class_mask = 0;
-	quirk->devid.driver_data = 0UL;
-
-	quirk->pdev = dev;
-
-	register_quirk(quirk);
-out:
-	return ret;
-}
-
-void xen_pcibk_config_field_free(struct config_field *field)
-{
-	kfree(field);
-}
-
-int xen_pcibk_config_quirk_release(struct pci_dev *dev)
-{
-	struct xen_pcibk_config_quirk *quirk;
-	int ret = 0;
-
-	quirk = xen_pcibk_find_quirk(dev);
-	if (!quirk) {
-		ret = -ENXIO;
-		goto out;
-	}
-
-	list_del(&quirk->quirks_list);
-	kfree(quirk);
-
-out:
-	return ret;
-}
diff --git a/trunk/drivers/xen/xen-pciback/conf_space_quirks.h b/trunk/drivers/xen/xen-pciback/conf_space_quirks.h
deleted file mode 100644
index cfcc517e4570..000000000000
--- a/trunk/drivers/xen/xen-pciback/conf_space_quirks.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * PCI Backend - Data structures for special overlays for broken devices.
- *
- * Ryan Wilson <hap9@epoch.ncsc.mil>
- * Chris Bookholt <hap10@epoch.ncsc.mil>
- */
-
-#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
-#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
-
-#include <linux/pci.h>
-#include <linux/list.h>
-
-struct xen_pcibk_config_quirk {
-	struct list_head quirks_list;
-	struct pci_device_id devid;
-	struct pci_dev *pdev;
-};
-
-int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field
-				    *field);
-
-int xen_pcibk_config_quirks_remove_field(struct pci_dev *dev, int reg);
-
-int xen_pcibk_config_quirks_init(struct pci_dev *dev);
-
-void xen_pcibk_config_field_free(struct config_field *field);
-
-int xen_pcibk_config_quirk_release(struct pci_dev *dev);
-
-int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg);
-
-#endif
diff --git a/trunk/drivers/xen/xen-pciback/passthrough.c b/trunk/drivers/xen/xen-pciback/passthrough.c
deleted file mode 100644
index 1d32a9a42c01..000000000000
--- a/trunk/drivers/xen/xen-pciback/passthrough.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * PCI Backend - Provides restricted access to the real PCI bus topology
- *               to the frontend
- *
- *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
- */
-
-#include <linux/list.h>
-#include <linux/pci.h>
-#include <linux/spinlock.h>
-#include "pciback.h"
-
-struct passthrough_dev_data {
-	/* Access to dev_list must be protected by lock */
-	struct list_head dev_list;
-	spinlock_t lock;
-};
-
-static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
-					       unsigned int domain,
-					       unsigned int bus,
-					       unsigned int devfn)
-{
-	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-	struct pci_dev_entry *dev_entry;
-	struct pci_dev *dev = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev_data->lock, flags);
-
-	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
-		if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
-		    && bus == (unsigned int)dev_entry->dev->bus->number
-		    && devfn == dev_entry->dev->devfn) {
-			dev = dev_entry->dev;
-			break;
-		}
-	}
-
-	spin_unlock_irqrestore(&dev_data->lock, flags);
-
-	return dev;
-}
-
-static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
-				   struct pci_dev *dev,
-				   int devid, publish_pci_dev_cb publish_cb)
-{
-	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-	struct pci_dev_entry *dev_entry;
-	unsigned long flags;
-	unsigned int domain, bus, devfn;
-	int err;
-
-	dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
-	if (!dev_entry)
-		return -ENOMEM;
-	dev_entry->dev = dev;
-
-	spin_lock_irqsave(&dev_data->lock, flags);
-	list_add_tail(&dev_entry->list, &dev_data->dev_list);
-	spin_unlock_irqrestore(&dev_data->lock, flags);
-
-	/* Publish this device. */
-	domain = (unsigned int)pci_domain_nr(dev->bus);
-	bus = (unsigned int)dev->bus->number;
-	devfn = dev->devfn;
-	err = publish_cb(pdev, domain, bus, devfn, devid);
-
-	return err;
-}
-
-static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
-					struct pci_dev *dev)
-{
-	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-	struct pci_dev_entry *dev_entry, *t;
-	struct pci_dev *found_dev = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev_data->lock, flags);
-
-	list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
-		if (dev_entry->dev == dev) {
-			list_del(&dev_entry->list);
-			found_dev = dev_entry->dev;
-			kfree(dev_entry);
-		}
-	}
-
-	spin_unlock_irqrestore(&dev_data->lock, flags);
-
-	if (found_dev)
-		pcistub_put_pci_dev(found_dev);
-}
-
-static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
-{
-	struct passthrough_dev_data *dev_data;
-
-	dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
-	if (!dev_data)
-		return -ENOMEM;
-
-	spin_lock_init(&dev_data->lock);
-
-	INIT_LIST_HEAD(&dev_data->dev_list);
-
-	pdev->pci_dev_data = dev_data;
-
-	return 0;
-}
-
-static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
-					 publish_pci_root_cb publish_root_cb)
-{
-	int err = 0;
-	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-	struct pci_dev_entry *dev_entry, *e, *tmp;
-	struct pci_dev *dev;
-	int found;
-	unsigned int domain, bus;
-
-	spin_lock(&dev_data->lock);
-
-	list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) {
-		/* Only publish this device as a root if none of its
-		 * parent bridges are exported
-		 */
-		found = 0;
-		dev = dev_entry->dev->bus->self;
-		for (; !found && dev != NULL; dev = dev->bus->self) {
-			list_for_each_entry(e, &dev_data->dev_list, list) {
-				if (dev == e->dev) {
-					found = 1;
-					break;
-				}
-			}
-		}
-
-		domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
-		bus = (unsigned int)dev_entry->dev->bus->number;
-
-		if (!found) {
-			spin_unlock(&dev_data->lock);
-			err = publish_root_cb(pdev, domain, bus);
-			if (err)
-				break;
-			spin_lock(&dev_data->lock);
-		}
-	}
-
-	if (!err)
-		spin_unlock(&dev_data->lock);
-
-	return err;
-}
-
-static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
-{
-	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-	struct pci_dev_entry *dev_entry, *t;
-
-	list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
-		list_del(&dev_entry->list);
-		pcistub_put_pci_dev(dev_entry->dev);
-		kfree(dev_entry);
-	}
-
-	kfree(dev_data);
-	pdev->pci_dev_data = NULL;
-}
-
-static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
-					struct xen_pcibk_device *pdev,
-					unsigned int *domain, unsigned int *bus,
-					unsigned int *devfn)
-{
-	*domain = pci_domain_nr(pcidev->bus);
-	*bus = pcidev->bus->number;
-	*devfn = pcidev->devfn;
-	return 1;
-}
-
-struct xen_pcibk_backend xen_pcibk_passthrough_backend = {
-	.name           = "passthrough",
-	.init           = __xen_pcibk_init_devices,
-	.free		= __xen_pcibk_release_devices,
-	.find           = __xen_pcibk_get_pcifront_dev,
-	.publish        = __xen_pcibk_publish_pci_roots,
-	.release        = __xen_pcibk_release_pci_dev,
-	.add            = __xen_pcibk_add_pci_dev,
-	.get            = __xen_pcibk_get_pci_dev,
-};
diff --git a/trunk/drivers/xen/xen-pciback/pci_stub.c b/trunk/drivers/xen/xen-pciback/pci_stub.c
deleted file mode 100644
index aec214ac0a14..000000000000
--- a/trunk/drivers/xen/xen-pciback/pci_stub.c
+++ /dev/null
@@ -1,1376 +0,0 @@
-/*
- * PCI Stub Driver - Grabs devices in backend to be exported later
- *
- * Ryan Wilson <hap9@epoch.ncsc.mil>
- * Chris Bookholt <hap10@epoch.ncsc.mil>
- */
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/rwsem.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/kref.h>
-#include <linux/pci.h>
-#include <linux/wait.h>
-#include <linux/sched.h>
-#include <linux/atomic.h>
-#include <xen/events.h>
-#include <asm/xen/pci.h>
-#include <asm/xen/hypervisor.h>
-#include "pciback.h"
-#include "conf_space.h"
-#include "conf_space_quirks.h"
-
-#define DRV_NAME	"xen-pciback"
-
-static char *pci_devs_to_hide;
-wait_queue_head_t xen_pcibk_aer_wait_queue;
-/*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops,
-* We want to avoid in middle of AER ops, xen_pcibk devices is being removed
-*/
-static DECLARE_RWSEM(pcistub_sem);
-module_param_named(hide, pci_devs_to_hide, charp, 0444);
-
-struct pcistub_device_id {
-	struct list_head slot_list;
-	int domain;
-	unsigned char bus;
-	unsigned int devfn;
-};
-static LIST_HEAD(pcistub_device_ids);
-static DEFINE_SPINLOCK(device_ids_lock);
-
-struct pcistub_device {
-	struct kref kref;
-	struct list_head dev_list;
-	spinlock_t lock;
-
-	struct pci_dev *dev;
-	struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */
-};
-
-/* Access to pcistub_devices & seized_devices lists and the initialize_devices
- * flag must be locked with pcistub_devices_lock
- */
-static DEFINE_SPINLOCK(pcistub_devices_lock);
-static LIST_HEAD(pcistub_devices);
-
-/* wait for device_initcall before initializing our devices
- * (see pcistub_init_devices_late)
- */
-static int initialize_devices;
-static LIST_HEAD(seized_devices);
-
-static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
-{
-	struct pcistub_device *psdev;
-
-	dev_dbg(&dev->dev, "pcistub_device_alloc\n");
-
-	psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC);
-	if (!psdev)
-		return NULL;
-
-	psdev->dev = pci_dev_get(dev);
-	if (!psdev->dev) {
-		kfree(psdev);
-		return NULL;
-	}
-
-	kref_init(&psdev->kref);
-	spin_lock_init(&psdev->lock);
-
-	return psdev;
-}
-
-/* Don't call this directly as it's called by pcistub_device_put */
-static void pcistub_device_release(struct kref *kref)
-{
-	struct pcistub_device *psdev;
-
-	psdev = container_of(kref, struct pcistub_device, kref);
-
-	dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
-
-	xen_unregister_device_domain_owner(psdev->dev);
-
-	/* Clean-up the device */
-	xen_pcibk_reset_device(psdev->dev);
-	xen_pcibk_config_free_dyn_fields(psdev->dev);
-	xen_pcibk_config_free_dev(psdev->dev);
-	kfree(pci_get_drvdata(psdev->dev));
-	pci_set_drvdata(psdev->dev, NULL);
-
-	pci_dev_put(psdev->dev);
-
-	kfree(psdev);
-}
-
-static inline void pcistub_device_get(struct pcistub_device *psdev)
-{
-	kref_get(&psdev->kref);
-}
-
-static inline void pcistub_device_put(struct pcistub_device *psdev)
-{
-	kref_put(&psdev->kref, pcistub_device_release);
-}
-
-static struct pcistub_device *pcistub_device_find(int domain, int bus,
-						  int slot, int func)
-{
-	struct pcistub_device *psdev = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pcistub_devices_lock, flags);
-
-	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-		if (psdev->dev != NULL
-		    && domain == pci_domain_nr(psdev->dev->bus)
-		    && bus == psdev->dev->bus->number
-		    && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
-			pcistub_device_get(psdev);
-			goto out;
-		}
-	}
-
-	/* didn't find it */
-	psdev = NULL;
-
-out:
-	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-	return psdev;
-}
-
-static struct pci_dev *pcistub_device_get_pci_dev(struct xen_pcibk_device *pdev,
-						  struct pcistub_device *psdev)
-{
-	struct pci_dev *pci_dev = NULL;
-	unsigned long flags;
-
-	pcistub_device_get(psdev);
-
-	spin_lock_irqsave(&psdev->lock, flags);
-	if (!psdev->pdev) {
-		psdev->pdev = pdev;
-		pci_dev = psdev->dev;
-	}
-	spin_unlock_irqrestore(&psdev->lock, flags);
-
-	if (!pci_dev)
-		pcistub_device_put(psdev);
-
-	return pci_dev;
-}
-
-struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev,
-					    int domain, int bus,
-					    int slot, int func)
-{
-	struct pcistub_device *psdev;
-	struct pci_dev *found_dev = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pcistub_devices_lock, flags);
-
-	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-		if (psdev->dev != NULL
-		    && domain == pci_domain_nr(psdev->dev->bus)
-		    && bus == psdev->dev->bus->number
-		    && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
-			found_dev = pcistub_device_get_pci_dev(pdev, psdev);
-			break;
-		}
-	}
-
-	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-	return found_dev;
-}
-
-struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
-				    struct pci_dev *dev)
-{
-	struct pcistub_device *psdev;
-	struct pci_dev *found_dev = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pcistub_devices_lock, flags);
-
-	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-		if (psdev->dev == dev) {
-			found_dev = pcistub_device_get_pci_dev(pdev, psdev);
-			break;
-		}
-	}
-
-	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-	return found_dev;
-}
-
-void pcistub_put_pci_dev(struct pci_dev *dev)
-{
-	struct pcistub_device *psdev, *found_psdev = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pcistub_devices_lock, flags);
-
-	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-		if (psdev->dev == dev) {
-			found_psdev = psdev;
-			break;
-		}
-	}
-
-	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-
-	/*hold this lock for avoiding breaking link between
-	* pcistub and xen_pcibk when AER is in processing
-	*/
-	down_write(&pcistub_sem);
-	/* Cleanup our device
-	 * (so it's ready for the next domain)
-	 */
-	xen_pcibk_reset_device(found_psdev->dev);
-	xen_pcibk_config_free_dyn_fields(found_psdev->dev);
-	xen_pcibk_config_reset_dev(found_psdev->dev);
-
-	spin_lock_irqsave(&found_psdev->lock, flags);
-	found_psdev->pdev = NULL;
-	spin_unlock_irqrestore(&found_psdev->lock, flags);
-
-	pcistub_device_put(found_psdev);
-	up_write(&pcistub_sem);
-}
-
-static int __devinit pcistub_match_one(struct pci_dev *dev,
-				       struct pcistub_device_id *pdev_id)
-{
-	/* Match the specified device by domain, bus, slot, func and also if
-	 * any of the device's parent bridges match.
-	 */
-	for (; dev != NULL; dev = dev->bus->self) {
-		if (pci_domain_nr(dev->bus) == pdev_id->domain
-		    && dev->bus->number == pdev_id->bus
-		    && dev->devfn == pdev_id->devfn)
-			return 1;
-
-		/* Sometimes topmost bridge links to itself. */
-		if (dev == dev->bus->self)
-			break;
-	}
-
-	return 0;
-}
-
-static int __devinit pcistub_match(struct pci_dev *dev)
-{
-	struct pcistub_device_id *pdev_id;
-	unsigned long flags;
-	int found = 0;
-
-	spin_lock_irqsave(&device_ids_lock, flags);
-	list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
-		if (pcistub_match_one(dev, pdev_id)) {
-			found = 1;
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&device_ids_lock, flags);
-
-	return found;
-}
-
-static int __devinit pcistub_init_device(struct pci_dev *dev)
-{
-	struct xen_pcibk_dev_data *dev_data;
-	int err = 0;
-
-	dev_dbg(&dev->dev, "initializing...\n");
-
-	/* The PCI backend is not intended to be a module (or to work with
-	 * removable PCI devices (yet). If it were, xen_pcibk_config_free()
-	 * would need to be called somewhere to free the memory allocated
-	 * here and then to call kfree(pci_get_drvdata(psdev->dev)).
-	 */
-	dev_data = kzalloc(sizeof(*dev_data) +  strlen(DRV_NAME "[]")
-				+ strlen(pci_name(dev)) + 1, GFP_ATOMIC);
-	if (!dev_data) {
-		err = -ENOMEM;
-		goto out;
-	}
-	pci_set_drvdata(dev, dev_data);
-
-	/*
-	 * Setup name for fake IRQ handler. It will only be enabled
-	 * once the device is turned on by the guest.
-	 */
-	sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev));
-
-	dev_dbg(&dev->dev, "initializing config\n");
-
-	init_waitqueue_head(&xen_pcibk_aer_wait_queue);
-	err = xen_pcibk_config_init_dev(dev);
-	if (err)
-		goto out;
-
-	/* HACK: Force device (& ACPI) to determine what IRQ it's on - we
-	 * must do this here because pcibios_enable_device may specify
-	 * the pci device's true irq (and possibly its other resources)
-	 * if they differ from what's in the configuration space.
-	 * This makes the assumption that the device's resources won't
-	 * change after this point (otherwise this code may break!)
-	 */
-	dev_dbg(&dev->dev, "enabling device\n");
-	err = pci_enable_device(dev);
-	if (err)
-		goto config_release;
-
-	/* Now disable the device (this also ensures some private device
-	 * data is setup before we export)
-	 */
-	dev_dbg(&dev->dev, "reset device\n");
-	xen_pcibk_reset_device(dev);
-
-	return 0;
-
-config_release:
-	xen_pcibk_config_free_dev(dev);
-
-out:
-	pci_set_drvdata(dev, NULL);
-	kfree(dev_data);
-	return err;
-}
-
-/*
- * Because some initialization still happens on
- * devices during fs_initcall, we need to defer
- * full initialization of our devices until
- * device_initcall.
- */
-static int __init pcistub_init_devices_late(void)
-{
-	struct pcistub_device *psdev;
-	unsigned long flags;
-	int err = 0;
-
-	pr_debug(DRV_NAME ": pcistub_init_devices_late\n");
-
-	spin_lock_irqsave(&pcistub_devices_lock, flags);
-
-	while (!list_empty(&seized_devices)) {
-		psdev = container_of(seized_devices.next,
-				     struct pcistub_device, dev_list);
-		list_del(&psdev->dev_list);
-
-		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-
-		err = pcistub_init_device(psdev->dev);
-		if (err) {
-			dev_err(&psdev->dev->dev,
-				"error %d initializing device\n", err);
-			kfree(psdev);
-			psdev = NULL;
-		}
-
-		spin_lock_irqsave(&pcistub_devices_lock, flags);
-
-		if (psdev)
-			list_add_tail(&psdev->dev_list, &pcistub_devices);
-	}
-
-	initialize_devices = 1;
-
-	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-
-	return 0;
-}
-
-static int __devinit pcistub_seize(struct pci_dev *dev)
-{
-	struct pcistub_device *psdev;
-	unsigned long flags;
-	int err = 0;
-
-	psdev = pcistub_device_alloc(dev);
-	if (!psdev)
-		return -ENOMEM;
-
-	spin_lock_irqsave(&pcistub_devices_lock, flags);
-
-	if (initialize_devices) {
-		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-
-		/* don't want irqs disabled when calling pcistub_init_device */
-		err = pcistub_init_device(psdev->dev);
-
-		spin_lock_irqsave(&pcistub_devices_lock, flags);
-
-		if (!err)
-			list_add(&psdev->dev_list, &pcistub_devices);
-	} else {
-		dev_dbg(&dev->dev, "deferring initialization\n");
-		list_add(&psdev->dev_list, &seized_devices);
-	}
-
-	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-
-	if (err)
-		pcistub_device_put(psdev);
-
-	return err;
-}
-
-static int __devinit pcistub_probe(struct pci_dev *dev,
-				   const struct pci_device_id *id)
-{
-	int err = 0;
-
-	dev_dbg(&dev->dev, "probing...\n");
-
-	if (pcistub_match(dev)) {
-
-		if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
-		    && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
-			dev_err(&dev->dev, "can't export pci devices that "
-				"don't have a normal (0) or bridge (1) "
-				"header type!\n");
-			err = -ENODEV;
-			goto out;
-		}
-
-		dev_info(&dev->dev, "seizing device\n");
-		err = pcistub_seize(dev);
-	} else
-		/* Didn't find the device */
-		err = -ENODEV;
-
-out:
-	return err;
-}
-
-static void pcistub_remove(struct pci_dev *dev)
-{
-	struct pcistub_device *psdev, *found_psdev = NULL;
-	unsigned long flags;
-
-	dev_dbg(&dev->dev, "removing\n");
-
-	spin_lock_irqsave(&pcistub_devices_lock, flags);
-
-	xen_pcibk_config_quirk_release(dev);
-
-	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-		if (psdev->dev == dev) {
-			found_psdev = psdev;
-			break;
-		}
-	}
-
-	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-
-	if (found_psdev) {
-		dev_dbg(&dev->dev, "found device to remove - in use? %p\n",
-			found_psdev->pdev);
-
-		if (found_psdev->pdev) {
-			printk(KERN_WARNING DRV_NAME ": ****** removing device "
-			       "%s while still in-use! ******\n",
-			       pci_name(found_psdev->dev));
-			printk(KERN_WARNING DRV_NAME ": ****** driver domain may"
-			       " still access this device's i/o resources!\n");
-			printk(KERN_WARNING DRV_NAME ": ****** shutdown driver "
-			       "domain before binding device\n");
-			printk(KERN_WARNING DRV_NAME ": ****** to other drivers "
-			       "or domains\n");
-
-			xen_pcibk_release_pci_dev(found_psdev->pdev,
-						found_psdev->dev);
-		}
-
-		spin_lock_irqsave(&pcistub_devices_lock, flags);
-		list_del(&found_psdev->dev_list);
-		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-
-		/* the final put for releasing from the list */
-		pcistub_device_put(found_psdev);
-	}
-}
-
-static DEFINE_PCI_DEVICE_TABLE(pcistub_ids) = {
-	{
-	 .vendor = PCI_ANY_ID,
-	 .device = PCI_ANY_ID,
-	 .subvendor = PCI_ANY_ID,
-	 .subdevice = PCI_ANY_ID,
-	 },
-	{0,},
-};
-
-#define PCI_NODENAME_MAX 40
-static void kill_domain_by_device(struct pcistub_device *psdev)
-{
-	struct xenbus_transaction xbt;
-	int err;
-	char nodename[PCI_NODENAME_MAX];
-
-	if (!psdev)
-		dev_err(&psdev->dev->dev,
-			"device is NULL when do AER recovery/kill_domain\n");
-	snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
-		psdev->pdev->xdev->otherend_id);
-	nodename[strlen(nodename)] = '\0';
-
-again:
-	err = xenbus_transaction_start(&xbt);
-	if (err) {
-		dev_err(&psdev->dev->dev,
-			"error %d when start xenbus transaction\n", err);
-		return;
-	}
-	/*PV AER handlers will set this flag*/
-	xenbus_printf(xbt, nodename, "aerState" , "aerfail");
-	err = xenbus_transaction_end(xbt, 0);
-	if (err) {
-		if (err == -EAGAIN)
-			goto again;
-		dev_err(&psdev->dev->dev,
-			"error %d when end xenbus transaction\n", err);
-		return;
-	}
-}
-
-/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
- * backend need to have cooperation. In xen_pcibk, those steps will do similar
- * jobs: send service request and waiting for front_end response.
-*/
-static pci_ers_result_t common_process(struct pcistub_device *psdev,
-				       pci_channel_state_t state, int aer_cmd,
-				       pci_ers_result_t result)
-{
-	pci_ers_result_t res = result;
-	struct xen_pcie_aer_op *aer_op;
-	int ret;
-
-	/*with PV AER drivers*/
-	aer_op = &(psdev->pdev->sh_info->aer_op);
-	aer_op->cmd = aer_cmd ;
-	/*useful for error_detected callback*/
-	aer_op->err = state;
-	/*pcifront_end BDF*/
-	ret = xen_pcibk_get_pcifront_dev(psdev->dev, psdev->pdev,
-		&aer_op->domain, &aer_op->bus, &aer_op->devfn);
-	if (!ret) {
-		dev_err(&psdev->dev->dev,
-			DRV_NAME ": failed to get pcifront device\n");
-		return PCI_ERS_RESULT_NONE;
-	}
-	wmb();
-
-	dev_dbg(&psdev->dev->dev,
-			DRV_NAME ": aer_op %x dom %x bus %x devfn %x\n",
-			aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
-	/*local flag to mark there's aer request, xen_pcibk callback will use
-	* this flag to judge whether we need to check pci-front give aer
-	* service ack signal
-	*/
-	set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
-
-	/*It is possible that a pcifront conf_read_write ops request invokes
-	* the callback which cause the spurious execution of wake_up.
-	* Yet it is harmless and better than a spinlock here
-	*/
-	set_bit(_XEN_PCIB_active,
-		(unsigned long *)&psdev->pdev->sh_info->flags);
-	wmb();
-	notify_remote_via_irq(psdev->pdev->evtchn_irq);
-
-	ret = wait_event_timeout(xen_pcibk_aer_wait_queue,
-				 !(test_bit(_XEN_PCIB_active, (unsigned long *)
-				 &psdev->pdev->sh_info->flags)), 300*HZ);
-
-	if (!ret) {
-		if (test_bit(_XEN_PCIB_active,
-			(unsigned long *)&psdev->pdev->sh_info->flags)) {
-			dev_err(&psdev->dev->dev,
-				"pcifront aer process not responding!\n");
-			clear_bit(_XEN_PCIB_active,
-			  (unsigned long *)&psdev->pdev->sh_info->flags);
-			aer_op->err = PCI_ERS_RESULT_NONE;
-			return res;
-		}
-	}
-	clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
-
-	if (test_bit(_XEN_PCIF_active,
-		(unsigned long *)&psdev->pdev->sh_info->flags)) {
-		dev_dbg(&psdev->dev->dev,
-			"schedule pci_conf service in xen_pcibk\n");
-		xen_pcibk_test_and_schedule_op(psdev->pdev);
-	}
-
-	res = (pci_ers_result_t)aer_op->err;
-	return res;
-}
-
-/*
-* xen_pcibk_slot_reset: it will send the slot_reset request to  pcifront in case
-* of the device driver could provide this service, and then wait for pcifront
-* ack.
-* @dev: pointer to PCI devices
-* return value is used by aer_core do_recovery policy
-*/
-static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev)
-{
-	struct pcistub_device *psdev;
-	pci_ers_result_t result;
-
-	result = PCI_ERS_RESULT_RECOVERED;
-	dev_dbg(&dev->dev, "xen_pcibk_slot_reset(bus:%x,devfn:%x)\n",
-		dev->bus->number, dev->devfn);
-
-	down_write(&pcistub_sem);
-	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
-				dev->bus->number,
-				PCI_SLOT(dev->devfn),
-				PCI_FUNC(dev->devfn));
-
-	if (!psdev || !psdev->pdev) {
-		dev_err(&dev->dev,
-			DRV_NAME " device is not found/assigned\n");
-		goto end;
-	}
-
-	if (!psdev->pdev->sh_info) {
-		dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
-			" by HVM, kill it\n");
-		kill_domain_by_device(psdev);
-		goto release;
-	}
-
-	if (!test_bit(_XEN_PCIB_AERHANDLER,
-		(unsigned long *)&psdev->pdev->sh_info->flags)) {
-		dev_err(&dev->dev,
-			"guest with no AER driver should have been killed\n");
-		goto release;
-	}
-	result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result);
-
-	if (result == PCI_ERS_RESULT_NONE ||
-		result == PCI_ERS_RESULT_DISCONNECT) {
-		dev_dbg(&dev->dev,
-			"No AER slot_reset service or disconnected!\n");
-		kill_domain_by_device(psdev);
-	}
-release:
-	pcistub_device_put(psdev);
-end:
-	up_write(&pcistub_sem);
-	return result;
-
-}
-
-
-/*xen_pcibk_mmio_enabled: it will send the mmio_enabled request to  pcifront
-* in case of the device driver could provide this service, and then wait
-* for pcifront ack
-* @dev: pointer to PCI devices
-* return value is used by aer_core do_recovery policy
-*/
-
-static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev)
-{
-	struct pcistub_device *psdev;
-	pci_ers_result_t result;
-
-	result = PCI_ERS_RESULT_RECOVERED;
-	dev_dbg(&dev->dev, "xen_pcibk_mmio_enabled(bus:%x,devfn:%x)\n",
-		dev->bus->number, dev->devfn);
-
-	down_write(&pcistub_sem);
-	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
-				dev->bus->number,
-				PCI_SLOT(dev->devfn),
-				PCI_FUNC(dev->devfn));
-
-	if (!psdev || !psdev->pdev) {
-		dev_err(&dev->dev,
-			DRV_NAME " device is not found/assigned\n");
-		goto end;
-	}
-
-	if (!psdev->pdev->sh_info) {
-		dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
-			" by HVM, kill it\n");
-		kill_domain_by_device(psdev);
-		goto release;
-	}
-
-	if (!test_bit(_XEN_PCIB_AERHANDLER,
-		(unsigned long *)&psdev->pdev->sh_info->flags)) {
-		dev_err(&dev->dev,
-			"guest with no AER driver should have been killed\n");
-		goto release;
-	}
-	result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result);
-
-	if (result == PCI_ERS_RESULT_NONE ||
-		result == PCI_ERS_RESULT_DISCONNECT) {
-		dev_dbg(&dev->dev,
-			"No AER mmio_enabled service or disconnected!\n");
-		kill_domain_by_device(psdev);
-	}
-release:
-	pcistub_device_put(psdev);
-end:
-	up_write(&pcistub_sem);
-	return result;
-}
-
-/*xen_pcibk_error_detected: it will send the error_detected request to  pcifront
-* in case of the device driver could provide this service, and then wait
-* for pcifront ack.
-* @dev: pointer to PCI devices
-* @error: the current PCI connection state
-* return value is used by aer_core do_recovery policy
-*/
-
-static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev,
-	pci_channel_state_t error)
-{
-	struct pcistub_device *psdev;
-	pci_ers_result_t result;
-
-	result = PCI_ERS_RESULT_CAN_RECOVER;
-	dev_dbg(&dev->dev, "xen_pcibk_error_detected(bus:%x,devfn:%x)\n",
-		dev->bus->number, dev->devfn);
-
-	down_write(&pcistub_sem);
-	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
-				dev->bus->number,
-				PCI_SLOT(dev->devfn),
-				PCI_FUNC(dev->devfn));
-
-	if (!psdev || !psdev->pdev) {
-		dev_err(&dev->dev,
-			DRV_NAME " device is not found/assigned\n");
-		goto end;
-	}
-
-	if (!psdev->pdev->sh_info) {
-		dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
-			" by HVM, kill it\n");
-		kill_domain_by_device(psdev);
-		goto release;
-	}
-
-	/*Guest owns the device yet no aer handler regiested, kill guest*/
-	if (!test_bit(_XEN_PCIB_AERHANDLER,
-		(unsigned long *)&psdev->pdev->sh_info->flags)) {
-		dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
-		kill_domain_by_device(psdev);
-		goto release;
-	}
-	result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result);
-
-	if (result == PCI_ERS_RESULT_NONE ||
-		result == PCI_ERS_RESULT_DISCONNECT) {
-		dev_dbg(&dev->dev,
-			"No AER error_detected service or disconnected!\n");
-		kill_domain_by_device(psdev);
-	}
-release:
-	pcistub_device_put(psdev);
-end:
-	up_write(&pcistub_sem);
-	return result;
-}
-
-/*xen_pcibk_error_resume: it will send the error_resume request to  pcifront
-* in case of the device driver could provide this service, and then wait
-* for pcifront ack.
-* @dev: pointer to PCI devices
-*/
-
-static void xen_pcibk_error_resume(struct pci_dev *dev)
-{
-	struct pcistub_device *psdev;
-
-	dev_dbg(&dev->dev, "xen_pcibk_error_resume(bus:%x,devfn:%x)\n",
-		dev->bus->number, dev->devfn);
-
-	down_write(&pcistub_sem);
-	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
-				dev->bus->number,
-				PCI_SLOT(dev->devfn),
-				PCI_FUNC(dev->devfn));
-
-	if (!psdev || !psdev->pdev) {
-		dev_err(&dev->dev,
-			DRV_NAME " device is not found/assigned\n");
-		goto end;
-	}
-
-	if (!psdev->pdev->sh_info) {
-		dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
-			" by HVM, kill it\n");
-		kill_domain_by_device(psdev);
-		goto release;
-	}
-
-	if (!test_bit(_XEN_PCIB_AERHANDLER,
-		(unsigned long *)&psdev->pdev->sh_info->flags)) {
-		dev_err(&dev->dev,
-			"guest with no AER driver should have been killed\n");
-		kill_domain_by_device(psdev);
-		goto release;
-	}
-	common_process(psdev, 1, XEN_PCI_OP_aer_resume,
-		       PCI_ERS_RESULT_RECOVERED);
-release:
-	pcistub_device_put(psdev);
-end:
-	up_write(&pcistub_sem);
-	return;
-}
-
-/*add xen_pcibk AER handling*/
-static struct pci_error_handlers xen_pcibk_error_handler = {
-	.error_detected = xen_pcibk_error_detected,
-	.mmio_enabled = xen_pcibk_mmio_enabled,
-	.slot_reset = xen_pcibk_slot_reset,
-	.resume = xen_pcibk_error_resume,
-};
-
-/*
- * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
- * for a normal device. I don't want it to be loaded automatically.
- */
-
-static struct pci_driver xen_pcibk_pci_driver = {
-	/* The name should be xen_pciback, but until the tools are updated
-	 * we will keep it as pciback. */
-	.name = "pciback",
-	.id_table = pcistub_ids,
-	.probe = pcistub_probe,
-	.remove = pcistub_remove,
-	.err_handler = &xen_pcibk_error_handler,
-};
-
-static inline int str_to_slot(const char *buf, int *domain, int *bus,
-			      int *slot, int *func)
-{
-	int err;
-
-	err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func);
-	if (err == 4)
-		return 0;
-	else if (err < 0)
-		return -EINVAL;
-
-	/* try again without domain */
-	*domain = 0;
-	err = sscanf(buf, " %x:%x.%x", bus, slot, func);
-	if (err == 3)
-		return 0;
-
-	return -EINVAL;
-}
-
-static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
-			       *slot, int *func, int *reg, int *size, int *mask)
-{
-	int err;
-
-	err =
-	    sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot,
-		   func, reg, size, mask);
-	if (err == 7)
-		return 0;
-	return -EINVAL;
-}
-
-static int pcistub_device_id_add(int domain, int bus, int slot, int func)
-{
-	struct pcistub_device_id *pci_dev_id;
-	unsigned long flags;
-
-	pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
-	if (!pci_dev_id)
-		return -ENOMEM;
-
-	pci_dev_id->domain = domain;
-	pci_dev_id->bus = bus;
-	pci_dev_id->devfn = PCI_DEVFN(slot, func);
-
-	pr_debug(DRV_NAME ": wants to seize %04x:%02x:%02x.%01x\n",
-		 domain, bus, slot, func);
-
-	spin_lock_irqsave(&device_ids_lock, flags);
-	list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids);
-	spin_unlock_irqrestore(&device_ids_lock, flags);
-
-	return 0;
-}
-
-static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
-{
-	struct pcistub_device_id *pci_dev_id, *t;
-	int devfn = PCI_DEVFN(slot, func);
-	int err = -ENOENT;
-	unsigned long flags;
-
-	spin_lock_irqsave(&device_ids_lock, flags);
-	list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids,
-				 slot_list) {
-		if (pci_dev_id->domain == domain
-		    && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
-			/* Don't break; here because it's possible the same
-			 * slot could be in the list more than once
-			 */
-			list_del(&pci_dev_id->slot_list);
-			kfree(pci_dev_id);
-
-			err = 0;
-
-			pr_debug(DRV_NAME ": removed %04x:%02x:%02x.%01x from "
-				 "seize list\n", domain, bus, slot, func);
-		}
-	}
-	spin_unlock_irqrestore(&device_ids_lock, flags);
-
-	return err;
-}
-
-static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
-			   int size, int mask)
-{
-	int err = 0;
-	struct pcistub_device *psdev;
-	struct pci_dev *dev;
-	struct config_field *field;
-
-	psdev = pcistub_device_find(domain, bus, slot, func);
-	if (!psdev || !psdev->dev) {
-		err = -ENODEV;
-		goto out;
-	}
-	dev = psdev->dev;
-
-	field = kzalloc(sizeof(*field), GFP_ATOMIC);
-	if (!field) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	field->offset = reg;
-	field->size = size;
-	field->mask = mask;
-	field->init = NULL;
-	field->reset = NULL;
-	field->release = NULL;
-	field->clean = xen_pcibk_config_field_free;
-
-	err = xen_pcibk_config_quirks_add_field(dev, field);
-	if (err)
-		kfree(field);
-out:
-	return err;
-}
-
-static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
-				size_t count)
-{
-	int domain, bus, slot, func;
-	int err;
-
-	err = str_to_slot(buf, &domain, &bus, &slot, &func);
-	if (err)
-		goto out;
-
-	err = pcistub_device_id_add(domain, bus, slot, func);
-
-out:
-	if (!err)
-		err = count;
-	return err;
-}
-
-DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
-
-static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
-				   size_t count)
-{
-	int domain, bus, slot, func;
-	int err;
-
-	err = str_to_slot(buf, &domain, &bus, &slot, &func);
-	if (err)
-		goto out;
-
-	err = pcistub_device_id_remove(domain, bus, slot, func);
-
-out:
-	if (!err)
-		err = count;
-	return err;
-}
-
-DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
-
-static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
-{
-	struct pcistub_device_id *pci_dev_id;
-	size_t count = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&device_ids_lock, flags);
-	list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
-		if (count >= PAGE_SIZE)
-			break;
-
-		count += scnprintf(buf + count, PAGE_SIZE - count,
-				   "%04x:%02x:%02x.%01x\n",
-				   pci_dev_id->domain, pci_dev_id->bus,
-				   PCI_SLOT(pci_dev_id->devfn),
-				   PCI_FUNC(pci_dev_id->devfn));
-	}
-	spin_unlock_irqrestore(&device_ids_lock, flags);
-
-	return count;
-}
-
-DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
-
-static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
-{
-	struct pcistub_device *psdev;
-	struct xen_pcibk_dev_data *dev_data;
-	size_t count = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pcistub_devices_lock, flags);
-	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-		if (count >= PAGE_SIZE)
-			break;
-		if (!psdev->dev)
-			continue;
-		dev_data = pci_get_drvdata(psdev->dev);
-		if (!dev_data)
-			continue;
-		count +=
-		    scnprintf(buf + count, PAGE_SIZE - count,
-			      "%s:%s:%sing:%ld\n",
-			      pci_name(psdev->dev),
-			      dev_data->isr_on ? "on" : "off",
-			      dev_data->ack_intr ? "ack" : "not ack",
-			      dev_data->handled);
-	}
-	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-	return count;
-}
-
-DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
-
-static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
-					  const char *buf,
-					  size_t count)
-{
-	struct pcistub_device *psdev;
-	struct xen_pcibk_dev_data *dev_data;
-	int domain, bus, slot, func;
-	int err = -ENOENT;
-
-	err = str_to_slot(buf, &domain, &bus, &slot, &func);
-	if (err)
-		goto out;
-
-	psdev = pcistub_device_find(domain, bus, slot, func);
-
-	if (!psdev)
-		goto out;
-
-	dev_data = pci_get_drvdata(psdev->dev);
-	if (!dev_data)
-		goto out;
-
-	dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n",
-		dev_data->irq_name, dev_data->isr_on,
-		!dev_data->isr_on);
-
-	dev_data->isr_on = !(dev_data->isr_on);
-	if (dev_data->isr_on)
-		dev_data->ack_intr = 1;
-out:
-	if (!err)
-		err = count;
-	return err;
-}
-DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch);
-
-static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
-				 size_t count)
-{
-	int domain, bus, slot, func, reg, size, mask;
-	int err;
-
-	err = str_to_quirk(buf, &domain, &bus, &slot, &func, &reg, &size,
-			   &mask);
-	if (err)
-		goto out;
-
-	err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
-
-out:
-	if (!err)
-		err = count;
-	return err;
-}
-
-static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
-{
-	int count = 0;
-	unsigned long flags;
-	struct xen_pcibk_config_quirk *quirk;
-	struct xen_pcibk_dev_data *dev_data;
-	const struct config_field *field;
-	const struct config_field_entry *cfg_entry;
-
-	spin_lock_irqsave(&device_ids_lock, flags);
-	list_for_each_entry(quirk, &xen_pcibk_quirks, quirks_list) {
-		if (count >= PAGE_SIZE)
-			goto out;
-
-		count += scnprintf(buf + count, PAGE_SIZE - count,
-				   "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
-				   quirk->pdev->bus->number,
-				   PCI_SLOT(quirk->pdev->devfn),
-				   PCI_FUNC(quirk->pdev->devfn),
-				   quirk->devid.vendor, quirk->devid.device,
-				   quirk->devid.subvendor,
-				   quirk->devid.subdevice);
-
-		dev_data = pci_get_drvdata(quirk->pdev);
-
-		list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-			field = cfg_entry->field;
-			if (count >= PAGE_SIZE)
-				goto out;
-
-			count += scnprintf(buf + count, PAGE_SIZE - count,
-					   "\t\t%08x:%01x:%08x\n",
-					   cfg_entry->base_offset +
-					   field->offset, field->size,
-					   field->mask);
-		}
-	}
-
-out:
-	spin_unlock_irqrestore(&device_ids_lock, flags);
-
-	return count;
-}
-
-DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
-
-static ssize_t permissive_add(struct device_driver *drv, const char *buf,
-			      size_t count)
-{
-	int domain, bus, slot, func;
-	int err;
-	struct pcistub_device *psdev;
-	struct xen_pcibk_dev_data *dev_data;
-	err = str_to_slot(buf, &domain, &bus, &slot, &func);
-	if (err)
-		goto out;
-	psdev = pcistub_device_find(domain, bus, slot, func);
-	if (!psdev) {
-		err = -ENODEV;
-		goto out;
-	}
-	if (!psdev->dev) {
-		err = -ENODEV;
-		goto release;
-	}
-	dev_data = pci_get_drvdata(psdev->dev);
-	/* the driver data for a device should never be null at this point */
-	if (!dev_data) {
-		err = -ENXIO;
-		goto release;
-	}
-	if (!dev_data->permissive) {
-		dev_data->permissive = 1;
-		/* Let user know that what they're doing could be unsafe */
-		dev_warn(&psdev->dev->dev, "enabling permissive mode "
-			 "configuration space accesses!\n");
-		dev_warn(&psdev->dev->dev,
-			 "permissive mode is potentially unsafe!\n");
-	}
-release:
-	pcistub_device_put(psdev);
-out:
-	if (!err)
-		err = count;
-	return err;
-}
-
-static ssize_t permissive_show(struct device_driver *drv, char *buf)
-{
-	struct pcistub_device *psdev;
-	struct xen_pcibk_dev_data *dev_data;
-	size_t count = 0;
-	unsigned long flags;
-	spin_lock_irqsave(&pcistub_devices_lock, flags);
-	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-		if (count >= PAGE_SIZE)
-			break;
-		if (!psdev->dev)
-			continue;
-		dev_data = pci_get_drvdata(psdev->dev);
-		if (!dev_data || !dev_data->permissive)
-			continue;
-		count +=
-		    scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
-			      pci_name(psdev->dev));
-	}
-	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-	return count;
-}
-
-DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
-
-static void pcistub_exit(void)
-{
-	driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_new_slot);
-	driver_remove_file(&xen_pcibk_pci_driver.driver,
-			   &driver_attr_remove_slot);
-	driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_slots);
-	driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_quirks);
-	driver_remove_file(&xen_pcibk_pci_driver.driver,
-			   &driver_attr_permissive);
-	driver_remove_file(&xen_pcibk_pci_driver.driver,
-			   &driver_attr_irq_handlers);
-	driver_remove_file(&xen_pcibk_pci_driver.driver,
-			   &driver_attr_irq_handler_state);
-	pci_unregister_driver(&xen_pcibk_pci_driver);
-}
-
-static int __init pcistub_init(void)
-{
-	int pos = 0;
-	int err = 0;
-	int domain, bus, slot, func;
-	int parsed;
-
-	if (pci_devs_to_hide && *pci_devs_to_hide) {
-		do {
-			parsed = 0;
-
-			err = sscanf(pci_devs_to_hide + pos,
-				     " (%x:%x:%x.%x) %n",
-				     &domain, &bus, &slot, &func, &parsed);
-			if (err != 4) {
-				domain = 0;
-				err = sscanf(pci_devs_to_hide + pos,
-					     " (%x:%x.%x) %n",
-					     &bus, &slot, &func, &parsed);
-				if (err != 3)
-					goto parse_error;
-			}
-
-			err = pcistub_device_id_add(domain, bus, slot, func);
-			if (err)
-				goto out;
-
-			/* if parsed<=0, we've reached the end of the string */
-			pos += parsed;
-		} while (parsed > 0 && pci_devs_to_hide[pos]);
-	}
-
-	/* If we're the first PCI Device Driver to register, we're the
-	 * first one to get offered PCI devices as they become
-	 * available (and thus we can be the first to grab them)
-	 */
-	err = pci_register_driver(&xen_pcibk_pci_driver);
-	if (err < 0)
-		goto out;
-
-	err = driver_create_file(&xen_pcibk_pci_driver.driver,
-				 &driver_attr_new_slot);
-	if (!err)
-		err = driver_create_file(&xen_pcibk_pci_driver.driver,
-					 &driver_attr_remove_slot);
-	if (!err)
-		err = driver_create_file(&xen_pcibk_pci_driver.driver,
-					 &driver_attr_slots);
-	if (!err)
-		err = driver_create_file(&xen_pcibk_pci_driver.driver,
-					 &driver_attr_quirks);
-	if (!err)
-		err = driver_create_file(&xen_pcibk_pci_driver.driver,
-					 &driver_attr_permissive);
-
-	if (!err)
-		err = driver_create_file(&xen_pcibk_pci_driver.driver,
-					 &driver_attr_irq_handlers);
-	if (!err)
-		err = driver_create_file(&xen_pcibk_pci_driver.driver,
-					&driver_attr_irq_handler_state);
-	if (err)
-		pcistub_exit();
-
-out:
-	return err;
-
-parse_error:
-	printk(KERN_ERR DRV_NAME ": Error parsing pci_devs_to_hide at \"%s\"\n",
-	       pci_devs_to_hide + pos);
-	return -EINVAL;
-}
-
-#ifndef MODULE
-/*
- * fs_initcall happens before device_initcall
- * so xen_pcibk *should* get called first (b/c we
- * want to suck up any device before other drivers
- * get a chance by being the first pci device
- * driver to register)
- */
-fs_initcall(pcistub_init);
-#endif
-
-static int __init xen_pcibk_init(void)
-{
-	int err;
-
-	if (!xen_initial_domain())
-		return -ENODEV;
-
-	err = xen_pcibk_config_init();
-	if (err)
-		return err;
-
-#ifdef MODULE
-	err = pcistub_init();
-	if (err < 0)
-		return err;
-#endif
-
-	pcistub_init_devices_late();
-	err = xen_pcibk_xenbus_register();
-	if (err)
-		pcistub_exit();
-
-	return err;
-}
-
-static void __exit xen_pcibk_cleanup(void)
-{
-	xen_pcibk_xenbus_unregister();
-	pcistub_exit();
-}
-
-module_init(xen_pcibk_init);
-module_exit(xen_pcibk_cleanup);
-
-MODULE_LICENSE("Dual BSD/GPL");
diff --git a/trunk/drivers/xen/xen-pciback/pciback.h b/trunk/drivers/xen/xen-pciback/pciback.h
deleted file mode 100644
index a0e131a81503..000000000000
--- a/trunk/drivers/xen/xen-pciback/pciback.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * PCI Backend Common Data Structures & Function Declarations
- *
- *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
- */
-#ifndef __XEN_PCIBACK_H__
-#define __XEN_PCIBACK_H__
-
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <xen/xenbus.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/workqueue.h>
-#include <linux/atomic.h>
-#include <xen/interface/io/pciif.h>
-
-struct pci_dev_entry {
-	struct list_head list;
-	struct pci_dev *dev;
-};
-
-#define _PDEVF_op_active	(0)
-#define PDEVF_op_active		(1<<(_PDEVF_op_active))
-#define _PCIB_op_pending	(1)
-#define PCIB_op_pending		(1<<(_PCIB_op_pending))
-
-struct xen_pcibk_device {
-	void *pci_dev_data;
-	spinlock_t dev_lock;
-	struct xenbus_device *xdev;
-	struct xenbus_watch be_watch;
-	u8 be_watching;
-	int evtchn_irq;
-	struct xen_pci_sharedinfo *sh_info;
-	unsigned long flags;
-	struct work_struct op_work;
-};
-
-struct xen_pcibk_dev_data {
-	struct list_head config_fields;
-	unsigned int permissive:1;
-	unsigned int warned_on_write:1;
-	unsigned int enable_intx:1;
-	unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
-	unsigned int ack_intr:1; /* .. and ACK-ing */
-	unsigned long handled;
-	unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
-	char irq_name[0]; /* xen-pcibk[000:04:00.0] */
-};
-
-/* Used by XenBus and xen_pcibk_ops.c */
-extern wait_queue_head_t xen_pcibk_aer_wait_queue;
-extern struct workqueue_struct *xen_pcibk_wq;
-/* Used by pcistub.c and conf_space_quirks.c */
-extern struct list_head xen_pcibk_quirks;
-
-/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
-struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev,
-					    int domain, int bus,
-					    int slot, int func);
-struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
-				    struct pci_dev *dev);
-void pcistub_put_pci_dev(struct pci_dev *dev);
-
-/* Ensure a device is turned off or reset */
-void xen_pcibk_reset_device(struct pci_dev *pdev);
-
-/* Access a virtual configuration space for a PCI device */
-int xen_pcibk_config_init(void);
-int xen_pcibk_config_init_dev(struct pci_dev *dev);
-void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev);
-void xen_pcibk_config_reset_dev(struct pci_dev *dev);
-void xen_pcibk_config_free_dev(struct pci_dev *dev);
-int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
-			  u32 *ret_val);
-int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size,
-			   u32 value);
-
-/* Handle requests for specific devices from the frontend */
-typedef int (*publish_pci_dev_cb) (struct xen_pcibk_device *pdev,
-				   unsigned int domain, unsigned int bus,
-				   unsigned int devfn, unsigned int devid);
-typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev,
-				    unsigned int domain, unsigned int bus);
-
-/* Backend registration for the two types of BDF representation:
- *  vpci - BDFs start at 00
- *  passthrough - BDFs are exactly like in the host.
- */
-struct xen_pcibk_backend {
-	char *name;
-	int (*init)(struct xen_pcibk_device *pdev);
-	void (*free)(struct xen_pcibk_device *pdev);
-	int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev,
-		    unsigned int *domain, unsigned int *bus,
-		    unsigned int *devfn);
-	int (*publish)(struct xen_pcibk_device *pdev, publish_pci_root_cb cb);
-	void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev);
-	int (*add)(struct xen_pcibk_device *pdev, struct pci_dev *dev,
-		   int devid, publish_pci_dev_cb publish_cb);
-	struct pci_dev *(*get)(struct xen_pcibk_device *pdev,
-			       unsigned int domain, unsigned int bus,
-			       unsigned int devfn);
-};
-
-extern struct xen_pcibk_backend xen_pcibk_vpci_backend;
-extern struct xen_pcibk_backend xen_pcibk_passthrough_backend;
-extern struct xen_pcibk_backend *xen_pcibk_backend;
-
-static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
-					struct pci_dev *dev,
-					int devid,
-					publish_pci_dev_cb publish_cb)
-{
-	if (xen_pcibk_backend && xen_pcibk_backend->add)
-		return xen_pcibk_backend->add(pdev, dev, devid, publish_cb);
-	return -1;
-};
-static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
-					     struct pci_dev *dev)
-{
-	if (xen_pcibk_backend && xen_pcibk_backend->free)
-		return xen_pcibk_backend->release(pdev, dev);
-};
-
-static inline struct pci_dev *
-xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,
-		      unsigned int bus, unsigned int devfn)
-{
-	if (xen_pcibk_backend && xen_pcibk_backend->get)
-		return xen_pcibk_backend->get(pdev, domain, bus, devfn);
-	return NULL;
-};
-/**
-* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk
-* before sending aer request to pcifront, so that guest could identify
-* device, coopearte with xen_pcibk to finish aer recovery job if device driver
-* has the capability
-*/
-static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
-					     struct xen_pcibk_device *pdev,
-					     unsigned int *domain,
-					     unsigned int *bus,
-					     unsigned int *devfn)
-{
-	if (xen_pcibk_backend && xen_pcibk_backend->find)
-		return xen_pcibk_backend->find(pcidev, pdev, domain, bus,
-					       devfn);
-	return -1;
-};
-static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
-{
-	if (xen_pcibk_backend && xen_pcibk_backend->init)
-		return xen_pcibk_backend->init(pdev);
-	return -1;
-};
-static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
-					      publish_pci_root_cb cb)
-{
-	if (xen_pcibk_backend && xen_pcibk_backend->publish)
-		return xen_pcibk_backend->publish(pdev, cb);
-	return -1;
-};
-static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
-{
-	if (xen_pcibk_backend && xen_pcibk_backend->free)
-		return xen_pcibk_backend->free(pdev);
-};
-/* Handles events from front-end */
-irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
-void xen_pcibk_do_op(struct work_struct *data);
-
-int xen_pcibk_xenbus_register(void);
-void xen_pcibk_xenbus_unregister(void);
-
-extern int verbose_request;
-
-void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev);
-#endif
-
-/* Handles shared IRQs that can to device domain and control domain. */
-void xen_pcibk_irq_handler(struct pci_dev *dev, int reset);
diff --git a/trunk/drivers/xen/xen-pciback/pciback_ops.c b/trunk/drivers/xen/xen-pciback/pciback_ops.c
deleted file mode 100644
index 8c95c3415b75..000000000000
--- a/trunk/drivers/xen/xen-pciback/pciback_ops.c
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * PCI Backend Operations - respond to PCI requests from Frontend
- *
- *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
- */
-#include <linux/module.h>
-#include <linux/wait.h>
-#include <linux/bitops.h>
-#include <xen/events.h>
-#include <linux/sched.h>
-#include "pciback.h"
-
-#define DRV_NAME	"xen-pciback"
-int verbose_request;
-module_param(verbose_request, int, 0644);
-
-static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id);
-
-/* Ensure a device is has the fake IRQ handler "turned on/off" and is
- * ready to be exported. This MUST be run after xen_pcibk_reset_device
- * which does the actual PCI device enable/disable.
- */
-static void xen_pcibk_control_isr(struct pci_dev *dev, int reset)
-{
-	struct xen_pcibk_dev_data *dev_data;
-	int rc;
-	int enable = 0;
-
-	dev_data = pci_get_drvdata(dev);
-	if (!dev_data)
-		return;
-
-	/* We don't deal with bridges */
-	if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
-		return;
-
-	if (reset) {
-		dev_data->enable_intx = 0;
-		dev_data->ack_intr = 0;
-	}
-	enable =  dev_data->enable_intx;
-
-	/* Asked to disable, but ISR isn't runnig */
-	if (!enable && !dev_data->isr_on)
-		return;
-
-	/* Squirrel away the IRQs in the dev_data. We need this
-	 * b/c when device transitions to MSI, the dev->irq is
-	 * overwritten with the MSI vector.
-	 */
-	if (enable)
-		dev_data->irq = dev->irq;
-
-	/*
-	 * SR-IOV devices in all use MSI-X and have no legacy
-	 * interrupts, so inhibit creating a fake IRQ handler for them.
-	 */
-	if (dev_data->irq == 0)
-		goto out;
-
-	dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n",
-		dev_data->irq_name,
-		dev_data->irq,
-		pci_is_enabled(dev) ? "on" : "off",
-		dev->msi_enabled ? "MSI" : "",
-		dev->msix_enabled ? "MSI/X" : "",
-		dev_data->isr_on ? "enable" : "disable",
-		enable ? "enable" : "disable");
-
-	if (enable) {
-		rc = request_irq(dev_data->irq,
-				xen_pcibk_guest_interrupt, IRQF_SHARED,
-				dev_data->irq_name, dev);
-		if (rc) {
-			dev_err(&dev->dev, "%s: failed to install fake IRQ " \
-				"handler for IRQ %d! (rc:%d)\n",
-				dev_data->irq_name, dev_data->irq, rc);
-			goto out;
-		}
-	} else {
-		free_irq(dev_data->irq, dev);
-		dev_data->irq = 0;
-	}
-	dev_data->isr_on = enable;
-	dev_data->ack_intr = enable;
-out:
-	dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n",
-		dev_data->irq_name,
-		dev_data->irq,
-		pci_is_enabled(dev) ? "on" : "off",
-		dev->msi_enabled ? "MSI" : "",
-		dev->msix_enabled ? "MSI/X" : "",
-		enable ? (dev_data->isr_on ? "enabled" : "failed to enable") :
-			(dev_data->isr_on ? "failed to disable" : "disabled"));
-}
-
-/* Ensure a device is "turned off" and ready to be exported.
- * (Also see xen_pcibk_config_reset to ensure virtual configuration space is
- * ready to be re-exported)
- */
-void xen_pcibk_reset_device(struct pci_dev *dev)
-{
-	u16 cmd;
-
-	xen_pcibk_control_isr(dev, 1 /* reset device */);
-
-	/* Disable devices (but not bridges) */
-	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
-#ifdef CONFIG_PCI_MSI
-		/* The guest could have been abruptly killed without
-		 * disabling MSI/MSI-X interrupts.*/
-		if (dev->msix_enabled)
-			pci_disable_msix(dev);
-		if (dev->msi_enabled)
-			pci_disable_msi(dev);
-#endif
-		pci_disable_device(dev);
-
-		pci_write_config_word(dev, PCI_COMMAND, 0);
-
-		dev->is_busmaster = 0;
-	} else {
-		pci_read_config_word(dev, PCI_COMMAND, &cmd);
-		if (cmd & (PCI_COMMAND_INVALIDATE)) {
-			cmd &= ~(PCI_COMMAND_INVALIDATE);
-			pci_write_config_word(dev, PCI_COMMAND, cmd);
-
-			dev->is_busmaster = 0;
-		}
-	}
-}
-
-#ifdef CONFIG_PCI_MSI
-static
-int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
-			 struct pci_dev *dev, struct xen_pci_op *op)
-{
-	struct xen_pcibk_dev_data *dev_data;
-	int otherend = pdev->xdev->otherend_id;
-	int status;
-
-	if (unlikely(verbose_request))
-		printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev));
-
-	status = pci_enable_msi(dev);
-
-	if (status) {
-		printk(KERN_ERR "error enable msi for guest %x status %x\n",
-			otherend, status);
-		op->value = 0;
-		return XEN_PCI_ERR_op_failed;
-	}
-
-	/* The value the guest needs is actually the IDT vector, not the
-	 * the local domain's IRQ number. */
-
-	op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
-	if (unlikely(verbose_request))
-		printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
-			op->value);
-
-	dev_data = pci_get_drvdata(dev);
-	if (dev_data)
-		dev_data->ack_intr = 0;
-
-	return 0;
-}
-
-static
-int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev,
-			  struct pci_dev *dev, struct xen_pci_op *op)
-{
-	struct xen_pcibk_dev_data *dev_data;
-
-	if (unlikely(verbose_request))
-		printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n",
-		       pci_name(dev));
-	pci_disable_msi(dev);
-
-	op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
-	if (unlikely(verbose_request))
-		printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
-			op->value);
-	dev_data = pci_get_drvdata(dev);
-	if (dev_data)
-		dev_data->ack_intr = 1;
-	return 0;
-}
-
-static
-int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
-			  struct pci_dev *dev, struct xen_pci_op *op)
-{
-	struct xen_pcibk_dev_data *dev_data;
-	int i, result;
-	struct msix_entry *entries;
-
-	if (unlikely(verbose_request))
-		printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n",
-		       pci_name(dev));
-	if (op->value > SH_INFO_MAX_VEC)
-		return -EINVAL;
-
-	entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
-	if (entries == NULL)
-		return -ENOMEM;
-
-	for (i = 0; i < op->value; i++) {
-		entries[i].entry = op->msix_entries[i].entry;
-		entries[i].vector = op->msix_entries[i].vector;
-	}
-
-	result = pci_enable_msix(dev, entries, op->value);
-
-	if (result == 0) {
-		for (i = 0; i < op->value; i++) {
-			op->msix_entries[i].entry = entries[i].entry;
-			if (entries[i].vector)
-				op->msix_entries[i].vector =
-					xen_pirq_from_irq(entries[i].vector);
-				if (unlikely(verbose_request))
-					printk(KERN_DEBUG DRV_NAME ": %s: " \
-						"MSI-X[%d]: %d\n",
-						pci_name(dev), i,
-						op->msix_entries[i].vector);
-		}
-	} else {
-		printk(KERN_WARNING DRV_NAME ": %s: failed to enable MSI-X: err %d!\n",
-			pci_name(dev), result);
-	}
-	kfree(entries);
-
-	op->value = result;
-	dev_data = pci_get_drvdata(dev);
-	if (dev_data)
-		dev_data->ack_intr = 0;
-
-	return result;
-}
-
-static
-int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
-			   struct pci_dev *dev, struct xen_pci_op *op)
-{
-	struct xen_pcibk_dev_data *dev_data;
-	if (unlikely(verbose_request))
-		printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n",
-			pci_name(dev));
-	pci_disable_msix(dev);
-
-	/*
-	 * SR-IOV devices (which don't have any legacy IRQ) have
-	 * an undefined IRQ value of zero.
-	 */
-	op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
-	if (unlikely(verbose_request))
-		printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev),
-			op->value);
-	dev_data = pci_get_drvdata(dev);
-	if (dev_data)
-		dev_data->ack_intr = 1;
-	return 0;
-}
-#endif
-/*
-* Now the same evtchn is used for both pcifront conf_read_write request
-* as well as pcie aer front end ack. We use a new work_queue to schedule
-* xen_pcibk conf_read_write service for avoiding confict with aer_core
-* do_recovery job which also use the system default work_queue
-*/
-void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
-{
-	/* Check that frontend is requesting an operation and that we are not
-	 * already processing a request */
-	if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
-	    && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
-		queue_work(xen_pcibk_wq, &pdev->op_work);
-	}
-	/*_XEN_PCIB_active should have been cleared by pcifront. And also make
-	sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/
-	if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
-	    && test_bit(_PCIB_op_pending, &pdev->flags)) {
-		wake_up(&xen_pcibk_aer_wait_queue);
-	}
-}
-
-/* Performing the configuration space reads/writes must not be done in atomic
- * context because some of the pci_* functions can sleep (mostly due to ACPI
- * use of semaphores). This function is intended to be called from a work
- * queue in process context taking a struct xen_pcibk_device as a parameter */
-
-void xen_pcibk_do_op(struct work_struct *data)
-{
-	struct xen_pcibk_device *pdev =
-		container_of(data, struct xen_pcibk_device, op_work);
-	struct pci_dev *dev;
-	struct xen_pcibk_dev_data *dev_data = NULL;
-	struct xen_pci_op *op = &pdev->sh_info->op;
-	int test_intx = 0;
-
-	dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
-
-	if (dev == NULL)
-		op->err = XEN_PCI_ERR_dev_not_found;
-	else {
-		dev_data = pci_get_drvdata(dev);
-		if (dev_data)
-			test_intx = dev_data->enable_intx;
-		switch (op->cmd) {
-		case XEN_PCI_OP_conf_read:
-			op->err = xen_pcibk_config_read(dev,
-				  op->offset, op->size, &op->value);
-			break;
-		case XEN_PCI_OP_conf_write:
-			op->err = xen_pcibk_config_write(dev,
-				  op->offset, op->size,	op->value);
-			break;
-#ifdef CONFIG_PCI_MSI
-		case XEN_PCI_OP_enable_msi:
-			op->err = xen_pcibk_enable_msi(pdev, dev, op);
-			break;
-		case XEN_PCI_OP_disable_msi:
-			op->err = xen_pcibk_disable_msi(pdev, dev, op);
-			break;
-		case XEN_PCI_OP_enable_msix:
-			op->err = xen_pcibk_enable_msix(pdev, dev, op);
-			break;
-		case XEN_PCI_OP_disable_msix:
-			op->err = xen_pcibk_disable_msix(pdev, dev, op);
-			break;
-#endif
-		default:
-			op->err = XEN_PCI_ERR_not_implemented;
-			break;
-		}
-	}
-	if (!op->err && dev && dev_data) {
-		/* Transition detected */
-		if ((dev_data->enable_intx != test_intx))
-			xen_pcibk_control_isr(dev, 0 /* no reset */);
-	}
-	/* Tell the driver domain that we're done. */
-	wmb();
-	clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
-	notify_remote_via_irq(pdev->evtchn_irq);
-
-	/* Mark that we're done. */
-	smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
-	clear_bit(_PDEVF_op_active, &pdev->flags);
-	smp_mb__after_clear_bit(); /* /before/ final check for work */
-
-	/* Check to see if the driver domain tried to start another request in
-	 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
-	*/
-	xen_pcibk_test_and_schedule_op(pdev);
-}
-
-irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id)
-{
-	struct xen_pcibk_device *pdev = dev_id;
-
-	xen_pcibk_test_and_schedule_op(pdev);
-
-	return IRQ_HANDLED;
-}
-static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id)
-{
-	struct pci_dev *dev = (struct pci_dev *)dev_id;
-	struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
-
-	if (dev_data->isr_on && dev_data->ack_intr) {
-		dev_data->handled++;
-		if ((dev_data->handled % 1000) == 0) {
-			if (xen_test_irq_shared(irq)) {
-				printk(KERN_INFO "%s IRQ line is not shared "
-					"with other domains. Turning ISR off\n",
-					 dev_data->irq_name);
-				dev_data->ack_intr = 0;
-			}
-		}
-		return IRQ_HANDLED;
-	}
-	return IRQ_NONE;
-}
diff --git a/trunk/drivers/xen/xen-pciback/vpci.c b/trunk/drivers/xen/xen-pciback/vpci.c
deleted file mode 100644
index 4a42cfb0959d..000000000000
--- a/trunk/drivers/xen/xen-pciback/vpci.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * PCI Backend - Provides a Virtual PCI bus (with real devices)
- *               to the frontend
- *
- *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
- */
-
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/spinlock.h>
-#include "pciback.h"
-
-#define PCI_SLOT_MAX 32
-#define DRV_NAME	"xen-pciback"
-
-struct vpci_dev_data {
-	/* Access to dev_list must be protected by lock */
-	struct list_head dev_list[PCI_SLOT_MAX];
-	spinlock_t lock;
-};
-
-static inline struct list_head *list_first(struct list_head *head)
-{
-	return head->next;
-}
-
-static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
-					       unsigned int domain,
-					       unsigned int bus,
-					       unsigned int devfn)
-{
-	struct pci_dev_entry *entry;
-	struct pci_dev *dev = NULL;
-	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-	unsigned long flags;
-
-	if (domain != 0 || bus != 0)
-		return NULL;
-
-	if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
-		spin_lock_irqsave(&vpci_dev->lock, flags);
-
-		list_for_each_entry(entry,
-				    &vpci_dev->dev_list[PCI_SLOT(devfn)],
-				    list) {
-			if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
-				dev = entry->dev;
-				break;
-			}
-		}
-
-		spin_unlock_irqrestore(&vpci_dev->lock, flags);
-	}
-	return dev;
-}
-
-static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
-{
-	if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
-	    && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
-		return 1;
-
-	return 0;
-}
-
-static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
-				   struct pci_dev *dev, int devid,
-				   publish_pci_dev_cb publish_cb)
-{
-	int err = 0, slot, func = -1;
-	struct pci_dev_entry *t, *dev_entry;
-	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-	unsigned long flags;
-
-	if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
-		err = -EFAULT;
-		xenbus_dev_fatal(pdev->xdev, err,
-				 "Can't export bridges on the virtual PCI bus");
-		goto out;
-	}
-
-	dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
-	if (!dev_entry) {
-		err = -ENOMEM;
-		xenbus_dev_fatal(pdev->xdev, err,
-				 "Error adding entry to virtual PCI bus");
-		goto out;
-	}
-
-	dev_entry->dev = dev;
-
-	spin_lock_irqsave(&vpci_dev->lock, flags);
-
-	/* Keep multi-function devices together on the virtual PCI bus */
-	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-		if (!list_empty(&vpci_dev->dev_list[slot])) {
-			t = list_entry(list_first(&vpci_dev->dev_list[slot]),
-				       struct pci_dev_entry, list);
-
-			if (match_slot(dev, t->dev)) {
-				pr_info(DRV_NAME ": vpci: %s: "
-					"assign to virtual slot %d func %d\n",
-					pci_name(dev), slot,
-					PCI_FUNC(dev->devfn));
-				list_add_tail(&dev_entry->list,
-					      &vpci_dev->dev_list[slot]);
-				func = PCI_FUNC(dev->devfn);
-				goto unlock;
-			}
-		}
-	}
-
-	/* Assign to a new slot on the virtual PCI bus */
-	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-		if (list_empty(&vpci_dev->dev_list[slot])) {
-			printk(KERN_INFO DRV_NAME
-			       ": vpci: %s: assign to virtual slot %d\n",
-			       pci_name(dev), slot);
-			list_add_tail(&dev_entry->list,
-				      &vpci_dev->dev_list[slot]);
-			func = PCI_FUNC(dev->devfn);
-			goto unlock;
-		}
-	}
-
-	err = -ENOMEM;
-	xenbus_dev_fatal(pdev->xdev, err,
-			 "No more space on root virtual PCI bus");
-
-unlock:
-	spin_unlock_irqrestore(&vpci_dev->lock, flags);
-
-	/* Publish this device. */
-	if (!err)
-		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
-
-out:
-	return err;
-}
-
-static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
-					struct pci_dev *dev)
-{
-	int slot;
-	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-	struct pci_dev *found_dev = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&vpci_dev->lock, flags);
-
-	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-		struct pci_dev_entry *e, *tmp;
-		list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
-					 list) {
-			if (e->dev == dev) {
-				list_del(&e->list);
-				found_dev = e->dev;
-				kfree(e);
-				goto out;
-			}
-		}
-	}
-
-out:
-	spin_unlock_irqrestore(&vpci_dev->lock, flags);
-
-	if (found_dev)
-		pcistub_put_pci_dev(found_dev);
-}
-
-static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
-{
-	int slot;
-	struct vpci_dev_data *vpci_dev;
-
-	vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
-	if (!vpci_dev)
-		return -ENOMEM;
-
-	spin_lock_init(&vpci_dev->lock);
-
-	for (slot = 0; slot < PCI_SLOT_MAX; slot++)
-		INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
-
-	pdev->pci_dev_data = vpci_dev;
-
-	return 0;
-}
-
-static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
-					 publish_pci_root_cb publish_cb)
-{
-	/* The Virtual PCI bus has only one root */
-	return publish_cb(pdev, 0, 0);
-}
-
-static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
-{
-	int slot;
-	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-
-	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-		struct pci_dev_entry *e, *tmp;
-		list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
-					 list) {
-			list_del(&e->list);
-			pcistub_put_pci_dev(e->dev);
-			kfree(e);
-		}
-	}
-
-	kfree(vpci_dev);
-	pdev->pci_dev_data = NULL;
-}
-
-static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
-					struct xen_pcibk_device *pdev,
-					unsigned int *domain, unsigned int *bus,
-					unsigned int *devfn)
-{
-	struct pci_dev_entry *entry;
-	struct pci_dev *dev = NULL;
-	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-	unsigned long flags;
-	int found = 0, slot;
-
-	spin_lock_irqsave(&vpci_dev->lock, flags);
-	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-		list_for_each_entry(entry,
-			    &vpci_dev->dev_list[slot],
-			    list) {
-			dev = entry->dev;
-			if (dev && dev->bus->number == pcidev->bus->number
-				&& pci_domain_nr(dev->bus) ==
-					pci_domain_nr(pcidev->bus)
-				&& dev->devfn == pcidev->devfn) {
-				found = 1;
-				*domain = 0;
-				*bus = 0;
-				*devfn = PCI_DEVFN(slot,
-					 PCI_FUNC(pcidev->devfn));
-			}
-		}
-	}
-	spin_unlock_irqrestore(&vpci_dev->lock, flags);
-	return found;
-}
-
-struct xen_pcibk_backend xen_pcibk_vpci_backend = {
-	.name		= "vpci",
-	.init		= __xen_pcibk_init_devices,
-	.free		= __xen_pcibk_release_devices,
-	.find		= __xen_pcibk_get_pcifront_dev,
-	.publish	= __xen_pcibk_publish_pci_roots,
-	.release	= __xen_pcibk_release_pci_dev,
-	.add		= __xen_pcibk_add_pci_dev,
-	.get		= __xen_pcibk_get_pci_dev,
-};
diff --git a/trunk/drivers/xen/xen-pciback/xenbus.c b/trunk/drivers/xen/xen-pciback/xenbus.c
deleted file mode 100644
index 206c4ce030bc..000000000000
--- a/trunk/drivers/xen/xen-pciback/xenbus.c
+++ /dev/null
@@ -1,749 +0,0 @@
-/*
- * PCI Backend Xenbus Setup - handles setup with frontend and xend
- *
- *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
- */
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/vmalloc.h>
-#include <linux/workqueue.h>
-#include <xen/xenbus.h>
-#include <xen/events.h>
-#include <asm/xen/pci.h>
-#include <linux/workqueue.h>
-#include "pciback.h"
-
-#define	DRV_NAME	"xen-pciback"
-#define INVALID_EVTCHN_IRQ  (-1)
-struct workqueue_struct *xen_pcibk_wq;
-
-static int __read_mostly passthrough;
-module_param(passthrough, bool, S_IRUGO);
-MODULE_PARM_DESC(passthrough,
-	"Option to specify how to export PCI topology to guest:\n"\
-	" 0 - (default) Hide the true PCI topology and makes the frontend\n"\
-	"   there is a single PCI bus with only the exported devices on it.\n"\
-	"   For example, a device at 03:05.0 will be re-assigned to 00:00.0\n"\
-	"   while second device at 02:1a.1 will be re-assigned to 00:01.1.\n"\
-	" 1 - Passthrough provides a real view of the PCI topology to the\n"\
-	"   frontend (for example, a device at 06:01.b will still appear at\n"\
-	"   06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\
-	"   exposed PCI devices to its driver domains. This may be required\n"\
-	"   for drivers which depend on finding their hardward in certain\n"\
-	"   bus/slot locations.");
-
-static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)
-{
-	struct xen_pcibk_device *pdev;
-
-	pdev = kzalloc(sizeof(struct xen_pcibk_device), GFP_KERNEL);
-	if (pdev == NULL)
-		goto out;
-	dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
-
-	pdev->xdev = xdev;
-	dev_set_drvdata(&xdev->dev, pdev);
-
-	spin_lock_init(&pdev->dev_lock);
-
-	pdev->sh_info = NULL;
-	pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
-	pdev->be_watching = 0;
-
-	INIT_WORK(&pdev->op_work, xen_pcibk_do_op);
-
-	if (xen_pcibk_init_devices(pdev)) {
-		kfree(pdev);
-		pdev = NULL;
-	}
-out:
-	return pdev;
-}
-
-static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
-{
-	spin_lock(&pdev->dev_lock);
-
-	/* Ensure the guest can't trigger our handler before removing devices */
-	if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
-		unbind_from_irqhandler(pdev->evtchn_irq, pdev);
-		pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
-	}
-	spin_unlock(&pdev->dev_lock);
-
-	/* If the driver domain started an op, make sure we complete it
-	 * before releasing the shared memory */
-
-	/* Note, the workqueue does not use spinlocks at all.*/
-	flush_workqueue(xen_pcibk_wq);
-
-	spin_lock(&pdev->dev_lock);
-	if (pdev->sh_info != NULL) {
-		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
-		pdev->sh_info = NULL;
-	}
-	spin_unlock(&pdev->dev_lock);
-
-}
-
-static void free_pdev(struct xen_pcibk_device *pdev)
-{
-	if (pdev->be_watching) {
-		unregister_xenbus_watch(&pdev->be_watch);
-		pdev->be_watching = 0;
-	}
-
-	xen_pcibk_disconnect(pdev);
-
-	xen_pcibk_release_devices(pdev);
-
-	dev_set_drvdata(&pdev->xdev->dev, NULL);
-	pdev->xdev = NULL;
-
-	kfree(pdev);
-}
-
-static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
-			     int remote_evtchn)
-{
-	int err = 0;
-	void *vaddr;
-
-	dev_dbg(&pdev->xdev->dev,
-		"Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
-		gnt_ref, remote_evtchn);
-
-	err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
-	if (err < 0) {
-		xenbus_dev_fatal(pdev->xdev, err,
-				"Error mapping other domain page in ours.");
-		goto out;
-	}
-
-	spin_lock(&pdev->dev_lock);
-	pdev->sh_info = vaddr;
-	spin_unlock(&pdev->dev_lock);
-
-	err = bind_interdomain_evtchn_to_irqhandler(
-		pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
-		0, DRV_NAME, pdev);
-	if (err < 0) {
-		xenbus_dev_fatal(pdev->xdev, err,
-				 "Error binding event channel to IRQ");
-		goto out;
-	}
-
-	spin_lock(&pdev->dev_lock);
-	pdev->evtchn_irq = err;
-	spin_unlock(&pdev->dev_lock);
-	err = 0;
-
-	dev_dbg(&pdev->xdev->dev, "Attached!\n");
-out:
-	return err;
-}
-
-static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
-{
-	int err = 0;
-	int gnt_ref, remote_evtchn;
-	char *magic = NULL;
-
-
-	/* Make sure we only do this setup once */
-	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
-	    XenbusStateInitialised)
-		goto out;
-
-	/* Wait for frontend to state that it has published the configuration */
-	if (xenbus_read_driver_state(pdev->xdev->otherend) !=
-	    XenbusStateInitialised)
-		goto out;
-
-	dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
-
-	err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
-			    "pci-op-ref", "%u", &gnt_ref,
-			    "event-channel", "%u", &remote_evtchn,
-			    "magic", NULL, &magic, NULL);
-	if (err) {
-		/* If configuration didn't get read correctly, wait longer */
-		xenbus_dev_fatal(pdev->xdev, err,
-				 "Error reading configuration from frontend");
-		goto out;
-	}
-
-	if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
-		xenbus_dev_fatal(pdev->xdev, -EFAULT,
-				 "version mismatch (%s/%s) with pcifront - "
-				 "halting xen_pcibk",
-				 magic, XEN_PCI_MAGIC);
-		goto out;
-	}
-
-	err = xen_pcibk_do_attach(pdev, gnt_ref, remote_evtchn);
-	if (err)
-		goto out;
-
-	dev_dbg(&pdev->xdev->dev, "Connecting...\n");
-
-	err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
-	if (err)
-		xenbus_dev_fatal(pdev->xdev, err,
-				 "Error switching to connected state!");
-
-	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
-out:
-
-	kfree(magic);
-
-	return err;
-}
-
-static int xen_pcibk_publish_pci_dev(struct xen_pcibk_device *pdev,
-				   unsigned int domain, unsigned int bus,
-				   unsigned int devfn, unsigned int devid)
-{
-	int err;
-	int len;
-	char str[64];
-
-	len = snprintf(str, sizeof(str), "vdev-%d", devid);
-	if (unlikely(len >= (sizeof(str) - 1))) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
-			    "%04x:%02x:%02x.%02x", domain, bus,
-			    PCI_SLOT(devfn), PCI_FUNC(devfn));
-
-out:
-	return err;
-}
-
-static int xen_pcibk_export_device(struct xen_pcibk_device *pdev,
-				 int domain, int bus, int slot, int func,
-				 int devid)
-{
-	struct pci_dev *dev;
-	int err = 0;
-
-	dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
-		domain, bus, slot, func);
-
-	dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
-	if (!dev) {
-		err = -EINVAL;
-		xenbus_dev_fatal(pdev->xdev, err,
-				 "Couldn't locate PCI device "
-				 "(%04x:%02x:%02x.%01x)! "
-				 "perhaps already in-use?",
-				 domain, bus, slot, func);
-		goto out;
-	}
-
-	err = xen_pcibk_add_pci_dev(pdev, dev, devid,
-				    xen_pcibk_publish_pci_dev);
-	if (err)
-		goto out;
-
-	dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
-	if (xen_register_device_domain_owner(dev,
-					     pdev->xdev->otherend_id) != 0) {
-		dev_err(&dev->dev, "device has been assigned to another " \
-			"domain! Over-writting the ownership, but beware.\n");
-		xen_unregister_device_domain_owner(dev);
-		xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
-	}
-
-	/* TODO: It'd be nice to export a bridge and have all of its children
-	 * get exported with it. This may be best done in xend (which will
-	 * have to calculate resource usage anyway) but we probably want to
-	 * put something in here to ensure that if a bridge gets given to a
-	 * driver domain, that all devices under that bridge are not given
-	 * to other driver domains (as he who controls the bridge can disable
-	 * it and stop the other devices from working).
-	 */
-out:
-	return err;
-}
-
-static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
-				 int domain, int bus, int slot, int func)
-{
-	int err = 0;
-	struct pci_dev *dev;
-
-	dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
-		domain, bus, slot, func);
-
-	dev = xen_pcibk_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
-	if (!dev) {
-		err = -EINVAL;
-		dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
-			"(%04x:%02x:%02x.%01x)! not owned by this domain\n",
-			domain, bus, slot, func);
-		goto out;
-	}
-
-	dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
-	xen_unregister_device_domain_owner(dev);
-
-	xen_pcibk_release_pci_dev(pdev, dev);
-
-out:
-	return err;
-}
-
-static int xen_pcibk_publish_pci_root(struct xen_pcibk_device *pdev,
-				    unsigned int domain, unsigned int bus)
-{
-	unsigned int d, b;
-	int i, root_num, len, err;
-	char str[64];
-
-	dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
-
-	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-			   "root_num", "%d", &root_num);
-	if (err == 0 || err == -ENOENT)
-		root_num = 0;
-	else if (err < 0)
-		goto out;
-
-	/* Verify that we haven't already published this pci root */
-	for (i = 0; i < root_num; i++) {
-		len = snprintf(str, sizeof(str), "root-%d", i);
-		if (unlikely(len >= (sizeof(str) - 1))) {
-			err = -ENOMEM;
-			goto out;
-		}
-
-		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-				   str, "%x:%x", &d, &b);
-		if (err < 0)
-			goto out;
-		if (err != 2) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		if (d == domain && b == bus) {
-			err = 0;
-			goto out;
-		}
-	}
-
-	len = snprintf(str, sizeof(str), "root-%d", root_num);
-	if (unlikely(len >= (sizeof(str) - 1))) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
-		root_num, domain, bus);
-
-	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
-			    "%04x:%02x", domain, bus);
-	if (err)
-		goto out;
-
-	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
-			    "root_num", "%d", (root_num + 1));
-
-out:
-	return err;
-}
-
-static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
-{
-	int err = 0;
-	int num_devs;
-	int domain, bus, slot, func;
-	int substate;
-	int i, len;
-	char state_str[64];
-	char dev_str[64];
-
-
-	dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
-
-	/* Make sure we only reconfigure once */
-	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
-	    XenbusStateReconfiguring)
-		goto out;
-
-	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
-			   &num_devs);
-	if (err != 1) {
-		if (err >= 0)
-			err = -EINVAL;
-		xenbus_dev_fatal(pdev->xdev, err,
-				 "Error reading number of devices");
-		goto out;
-	}
-
-	for (i = 0; i < num_devs; i++) {
-		len = snprintf(state_str, sizeof(state_str), "state-%d", i);
-		if (unlikely(len >= (sizeof(state_str) - 1))) {
-			err = -ENOMEM;
-			xenbus_dev_fatal(pdev->xdev, err,
-					 "String overflow while reading "
-					 "configuration");
-			goto out;
-		}
-		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
-				   "%d", &substate);
-		if (err != 1)
-			substate = XenbusStateUnknown;
-
-		switch (substate) {
-		case XenbusStateInitialising:
-			dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
-
-			len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
-			if (unlikely(len >= (sizeof(dev_str) - 1))) {
-				err = -ENOMEM;
-				xenbus_dev_fatal(pdev->xdev, err,
-						 "String overflow while "
-						 "reading configuration");
-				goto out;
-			}
-			err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-					   dev_str, "%x:%x:%x.%x",
-					   &domain, &bus, &slot, &func);
-			if (err < 0) {
-				xenbus_dev_fatal(pdev->xdev, err,
-						 "Error reading device "
-						 "configuration");
-				goto out;
-			}
-			if (err != 4) {
-				err = -EINVAL;
-				xenbus_dev_fatal(pdev->xdev, err,
-						 "Error parsing pci device "
-						 "configuration");
-				goto out;
-			}
-
-			err = xen_pcibk_export_device(pdev, domain, bus, slot,
-						    func, i);
-			if (err)
-				goto out;
-
-			/* Publish pci roots. */
-			err = xen_pcibk_publish_pci_roots(pdev,
-						xen_pcibk_publish_pci_root);
-			if (err) {
-				xenbus_dev_fatal(pdev->xdev, err,
-						 "Error while publish PCI root"
-						 "buses for frontend");
-				goto out;
-			}
-
-			err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
-					    state_str, "%d",
-					    XenbusStateInitialised);
-			if (err) {
-				xenbus_dev_fatal(pdev->xdev, err,
-						 "Error switching substate of "
-						 "dev-%d\n", i);
-				goto out;
-			}
-			break;
-
-		case XenbusStateClosing:
-			dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
-
-			len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
-			if (unlikely(len >= (sizeof(dev_str) - 1))) {
-				err = -ENOMEM;
-				xenbus_dev_fatal(pdev->xdev, err,
-						 "String overflow while "
-						 "reading configuration");
-				goto out;
-			}
-			err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-					   dev_str, "%x:%x:%x.%x",
-					   &domain, &bus, &slot, &func);
-			if (err < 0) {
-				xenbus_dev_fatal(pdev->xdev, err,
-						 "Error reading device "
-						 "configuration");
-				goto out;
-			}
-			if (err != 4) {
-				err = -EINVAL;
-				xenbus_dev_fatal(pdev->xdev, err,
-						 "Error parsing pci device "
-						 "configuration");
-				goto out;
-			}
-
-			err = xen_pcibk_remove_device(pdev, domain, bus, slot,
-						    func);
-			if (err)
-				goto out;
-
-			/* TODO: If at some point we implement support for pci
-			 * root hot-remove on pcifront side, we'll need to
-			 * remove unnecessary xenstore nodes of pci roots here.
-			 */
-
-			break;
-
-		default:
-			break;
-		}
-	}
-
-	err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
-	if (err) {
-		xenbus_dev_fatal(pdev->xdev, err,
-				 "Error switching to reconfigured state!");
-		goto out;
-	}
-
-out:
-	return 0;
-}
-
-static void xen_pcibk_frontend_changed(struct xenbus_device *xdev,
-				     enum xenbus_state fe_state)
-{
-	struct xen_pcibk_device *pdev = dev_get_drvdata(&xdev->dev);
-
-	dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
-
-	switch (fe_state) {
-	case XenbusStateInitialised:
-		xen_pcibk_attach(pdev);
-		break;
-
-	case XenbusStateReconfiguring:
-		xen_pcibk_reconfigure(pdev);
-		break;
-
-	case XenbusStateConnected:
-		/* pcifront switched its state from reconfiguring to connected.
-		 * Then switch to connected state.
-		 */
-		xenbus_switch_state(xdev, XenbusStateConnected);
-		break;
-
-	case XenbusStateClosing:
-		xen_pcibk_disconnect(pdev);
-		xenbus_switch_state(xdev, XenbusStateClosing);
-		break;
-
-	case XenbusStateClosed:
-		xen_pcibk_disconnect(pdev);
-		xenbus_switch_state(xdev, XenbusStateClosed);
-		if (xenbus_dev_is_online(xdev))
-			break;
-		/* fall through if not online */
-	case XenbusStateUnknown:
-		dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
-		device_unregister(&xdev->dev);
-		break;
-
-	default:
-		break;
-	}
-}
-
-static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
-{
-	/* Get configuration from xend (if available now) */
-	int domain, bus, slot, func;
-	int err = 0;
-	int i, num_devs;
-	char dev_str[64];
-	char state_str[64];
-
-	/* It's possible we could get the call to setup twice, so make sure
-	 * we're not already connected.
-	 */
-	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
-	    XenbusStateInitWait)
-		goto out;
-
-	dev_dbg(&pdev->xdev->dev, "getting be setup\n");
-
-	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
-			   &num_devs);
-	if (err != 1) {
-		if (err >= 0)
-			err = -EINVAL;
-		xenbus_dev_fatal(pdev->xdev, err,
-				 "Error reading number of devices");
-		goto out;
-	}
-
-	for (i = 0; i < num_devs; i++) {
-		int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
-		if (unlikely(l >= (sizeof(dev_str) - 1))) {
-			err = -ENOMEM;
-			xenbus_dev_fatal(pdev->xdev, err,
-					 "String overflow while reading "
-					 "configuration");
-			goto out;
-		}
-
-		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
-				   "%x:%x:%x.%x", &domain, &bus, &slot, &func);
-		if (err < 0) {
-			xenbus_dev_fatal(pdev->xdev, err,
-					 "Error reading device configuration");
-			goto out;
-		}
-		if (err != 4) {
-			err = -EINVAL;
-			xenbus_dev_fatal(pdev->xdev, err,
-					 "Error parsing pci device "
-					 "configuration");
-			goto out;
-		}
-
-		err = xen_pcibk_export_device(pdev, domain, bus, slot, func, i);
-		if (err)
-			goto out;
-
-		/* Switch substate of this device. */
-		l = snprintf(state_str, sizeof(state_str), "state-%d", i);
-		if (unlikely(l >= (sizeof(state_str) - 1))) {
-			err = -ENOMEM;
-			xenbus_dev_fatal(pdev->xdev, err,
-					 "String overflow while reading "
-					 "configuration");
-			goto out;
-		}
-		err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
-				    "%d", XenbusStateInitialised);
-		if (err) {
-			xenbus_dev_fatal(pdev->xdev, err, "Error switching "
-					 "substate of dev-%d\n", i);
-			goto out;
-		}
-	}
-
-	err = xen_pcibk_publish_pci_roots(pdev, xen_pcibk_publish_pci_root);
-	if (err) {
-		xenbus_dev_fatal(pdev->xdev, err,
-				 "Error while publish PCI root buses "
-				 "for frontend");
-		goto out;
-	}
-
-	err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
-	if (err)
-		xenbus_dev_fatal(pdev->xdev, err,
-				 "Error switching to initialised state!");
-
-out:
-	if (!err)
-		/* see if pcifront is already configured (if not, we'll wait) */
-		xen_pcibk_attach(pdev);
-
-	return err;
-}
-
-static void xen_pcibk_be_watch(struct xenbus_watch *watch,
-			     const char **vec, unsigned int len)
-{
-	struct xen_pcibk_device *pdev =
-	    container_of(watch, struct xen_pcibk_device, be_watch);
-
-	switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
-	case XenbusStateInitWait:
-		xen_pcibk_setup_backend(pdev);
-		break;
-
-	default:
-		break;
-	}
-}
-
-static int xen_pcibk_xenbus_probe(struct xenbus_device *dev,
-				const struct xenbus_device_id *id)
-{
-	int err = 0;
-	struct xen_pcibk_device *pdev = alloc_pdev(dev);
-
-	if (pdev == NULL) {
-		err = -ENOMEM;
-		xenbus_dev_fatal(dev, err,
-				 "Error allocating xen_pcibk_device struct");
-		goto out;
-	}
-
-	/* wait for xend to configure us */
-	err = xenbus_switch_state(dev, XenbusStateInitWait);
-	if (err)
-		goto out;
-
-	/* watch the backend node for backend configuration information */
-	err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
-				xen_pcibk_be_watch);
-	if (err)
-		goto out;
-
-	pdev->be_watching = 1;
-
-	/* We need to force a call to our callback here in case
-	 * xend already configured us!
-	 */
-	xen_pcibk_be_watch(&pdev->be_watch, NULL, 0);
-
-out:
-	return err;
-}
-
-static int xen_pcibk_xenbus_remove(struct xenbus_device *dev)
-{
-	struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev);
-
-	if (pdev != NULL)
-		free_pdev(pdev);
-
-	return 0;
-}
-
-static const struct xenbus_device_id xenpci_ids[] = {
-	{"pci"},
-	{""},
-};
-
-static struct xenbus_driver xenbus_xen_pcibk_driver = {
-	.name			= DRV_NAME,
-	.owner			= THIS_MODULE,
-	.ids			= xenpci_ids,
-	.probe			= xen_pcibk_xenbus_probe,
-	.remove			= xen_pcibk_xenbus_remove,
-	.otherend_changed	= xen_pcibk_frontend_changed,
-};
-
-struct xen_pcibk_backend *xen_pcibk_backend;
-
-int __init xen_pcibk_xenbus_register(void)
-{
-	xen_pcibk_wq = create_workqueue("xen_pciback_workqueue");
-	if (!xen_pcibk_wq) {
-		printk(KERN_ERR "%s: create"
-			"xen_pciback_workqueue failed\n", __func__);
-		return -EFAULT;
-	}
-	xen_pcibk_backend = &xen_pcibk_vpci_backend;
-	if (passthrough)
-		xen_pcibk_backend = &xen_pcibk_passthrough_backend;
-	pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name);
-	return xenbus_register_backend(&xenbus_xen_pcibk_driver);
-}
-
-void __exit xen_pcibk_xenbus_unregister(void)
-{
-	destroy_workqueue(xen_pcibk_wq);
-	xenbus_unregister_driver(&xenbus_xen_pcibk_driver);
-}
diff --git a/trunk/drivers/xen/xen-selfballoon.c b/trunk/drivers/xen/xen-selfballoon.c
deleted file mode 100644
index 010937b5a7c9..000000000000
--- a/trunk/drivers/xen/xen-selfballoon.c
+++ /dev/null
@@ -1,485 +0,0 @@
-/******************************************************************************
- * Xen selfballoon driver (and optional frontswap self-shrinking driver)
- *
- * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
- *
- * This code complements the cleancache and frontswap patchsets to optimize
- * support for Xen Transcendent Memory ("tmem").  The policy it implements
- * is rudimentary and will likely improve over time, but it does work well
- * enough today.
- *
- * Two functionalities are implemented here which both use "control theory"
- * (feedback) to optimize memory utilization. In a virtualized environment
- * such as Xen, RAM is often a scarce resource and we would like to ensure
- * that each of a possibly large number of virtual machines is using RAM
- * efficiently, i.e. using as little as possible when under light load
- * and obtaining as much as possible when memory demands are high.
- * Since RAM needs vary highly dynamically and sometimes dramatically,
- * "hysteresis" is used, that is, memory target is determined not just
- * on current data but also on past data stored in the system.
- *
- * "Selfballooning" creates memory pressure by managing the Xen balloon
- * driver to decrease and increase available kernel memory, driven
- * largely by the target value of "Committed_AS" (see /proc/meminfo).
- * Since Committed_AS does not account for clean mapped pages (i.e. pages
- * in RAM that are identical to pages on disk), selfballooning has the
- * affect of pushing less frequently used clean pagecache pages out of
- * kernel RAM and, presumably using cleancache, into Xen tmem where
- * Xen can more efficiently optimize RAM utilization for such pages.
- *
- * When kernel memory demand unexpectedly increases faster than Xen, via
- * the selfballoon driver, is able to (or chooses to) provide usable RAM,
- * the kernel may invoke swapping.  In most cases, frontswap is able
- * to absorb this swapping into Xen tmem.  However, due to the fact
- * that the kernel swap subsystem assumes swapping occurs to a disk,
- * swapped pages may sit on the disk for a very long time; even if
- * the kernel knows the page will never be used again.  This is because
- * the disk space costs very little and can be overwritten when
- * necessary.  When such stale pages are in frontswap, however, they
- * are taking up valuable real estate.  "Frontswap selfshrinking" works
- * to resolve this:  When frontswap activity is otherwise stable
- * and the guest kernel is not under memory pressure, the "frontswap
- * selfshrinking" accounts for this by providing pressure to remove some
- * pages from frontswap and return them to kernel memory.
- *
- * For both "selfballooning" and "frontswap-selfshrinking", a worker
- * thread is used and sysfs tunables are provided to adjust the frequency
- * and rate of adjustments to achieve the goal, as well as to disable one
- * or both functions independently.
- *
- * While some argue that this functionality can and should be implemented
- * in userspace, it has been observed that bad things happen (e.g. OOMs).
- *
- * System configuration note: Selfballooning should not be enabled on
- * systems without a sufficiently large swap device configured; for best
- * results, it is recommended that total swap be increased by the size
- * of the guest memory.  Also, while technically not required to be
- * configured, it is highly recommended that frontswap also be configured
- * and enabled when selfballooning is running.  So, selfballooning
- * is disabled by default if frontswap is not configured and can only
- * be enabled with the "selfballooning" kernel boot option; similarly
- * selfballooning is enabled by default if frontswap is configured and
- * can be disabled with the "noselfballooning" kernel boot option.  Finally,
- * when frontswap is configured, frontswap-selfshrinking can be disabled
- * with the "noselfshrink" kernel boot option.
- *
- * Selfballooning is disallowed in domain0 and force-disabled.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-
-#include <xen/balloon.h>
-
-#include <xen/tmem.h>
-
-/* Enable/disable with sysfs. */
-static int xen_selfballooning_enabled __read_mostly;
-
-/*
- * Controls rate at which memory target (this iteration) approaches
- * ultimate goal when memory need is increasing (up-hysteresis) or
- * decreasing (down-hysteresis). Higher values of hysteresis cause
- * slower increases/decreases. The default values for the various
- * parameters were deemed reasonable by experimentation, may be
- * workload-dependent, and can all be adjusted via sysfs.
- */
-static unsigned int selfballoon_downhysteresis __read_mostly = 8;
-static unsigned int selfballoon_uphysteresis __read_mostly = 1;
-
-/* In HZ, controls frequency of worker invocation. */
-static unsigned int selfballoon_interval __read_mostly = 5;
-
-static void selfballoon_process(struct work_struct *work);
-static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
-
-#ifdef CONFIG_FRONTSWAP
-#include <linux/frontswap.h>
-
-/* Enable/disable with sysfs. */
-static bool frontswap_selfshrinking __read_mostly;
-
-/* Enable/disable with kernel boot option. */
-static bool use_frontswap_selfshrink __initdata = true;
-
-/*
- * The default values for the following parameters were deemed reasonable
- * by experimentation, may be workload-dependent, and can all be
- * adjusted via sysfs.
- */
-
-/* Control rate for frontswap shrinking. Higher hysteresis is slower. */
-static unsigned int frontswap_hysteresis __read_mostly = 20;
-
-/*
- * Number of selfballoon worker invocations to wait before observing that
- * frontswap selfshrinking should commence. Note that selfshrinking does
- * not use a separate worker thread.
- */
-static unsigned int frontswap_inertia __read_mostly = 3;
-
-/* Countdown to next invocation of frontswap_shrink() */
-static unsigned long frontswap_inertia_counter;
-
-/*
- * Invoked by the selfballoon worker thread, uses current number of pages
- * in frontswap (frontswap_curr_pages()), previous status, and control
- * values (hysteresis and inertia) to determine if frontswap should be
- * shrunk and what the new frontswap size should be.  Note that
- * frontswap_shrink is essentially a partial swapoff that immediately
- * transfers pages from the "swap device" (frontswap) back into kernel
- * RAM; despite the name, frontswap "shrinking" is very different from
- * the "shrinker" interface used by the kernel MM subsystem to reclaim
- * memory.
- */
-static void frontswap_selfshrink(void)
-{
-	static unsigned long cur_frontswap_pages;
-	static unsigned long last_frontswap_pages;
-	static unsigned long tgt_frontswap_pages;
-
-	last_frontswap_pages = cur_frontswap_pages;
-	cur_frontswap_pages = frontswap_curr_pages();
-	if (!cur_frontswap_pages ||
-			(cur_frontswap_pages > last_frontswap_pages)) {
-		frontswap_inertia_counter = frontswap_inertia;
-		return;
-	}
-	if (frontswap_inertia_counter && --frontswap_inertia_counter)
-		return;
-	if (cur_frontswap_pages <= frontswap_hysteresis)
-		tgt_frontswap_pages = 0;
-	else
-		tgt_frontswap_pages = cur_frontswap_pages -
-			(cur_frontswap_pages / frontswap_hysteresis);
-	frontswap_shrink(tgt_frontswap_pages);
-}
-
-static int __init xen_nofrontswap_selfshrink_setup(char *s)
-{
-	use_frontswap_selfshrink = false;
-	return 1;
-}
-
-__setup("noselfshrink", xen_nofrontswap_selfshrink_setup);
-
-/* Disable with kernel boot option. */
-static bool use_selfballooning __initdata = true;
-
-static int __init xen_noselfballooning_setup(char *s)
-{
-	use_selfballooning = false;
-	return 1;
-}
-
-__setup("noselfballooning", xen_noselfballooning_setup);
-#else /* !CONFIG_FRONTSWAP */
-/* Enable with kernel boot option. */
-static bool use_selfballooning __initdata = false;
-
-static int __init xen_selfballooning_setup(char *s)
-{
-	use_selfballooning = true;
-	return 1;
-}
-
-__setup("selfballooning", xen_selfballooning_setup);
-#endif /* CONFIG_FRONTSWAP */
-
-/*
- * Use current balloon size, the goal (vm_committed_as), and hysteresis
- * parameters to set a new target balloon size
- */
-static void selfballoon_process(struct work_struct *work)
-{
-	unsigned long cur_pages, goal_pages, tgt_pages;
-	bool reset_timer = false;
-
-	if (xen_selfballooning_enabled) {
-		cur_pages = balloon_stats.current_pages;
-		tgt_pages = cur_pages; /* default is no change */
-		goal_pages = percpu_counter_read_positive(&vm_committed_as) +
-			balloon_stats.current_pages - totalram_pages;
-#ifdef CONFIG_FRONTSWAP
-		/* allow space for frontswap pages to be repatriated */
-		if (frontswap_selfshrinking && frontswap_enabled)
-			goal_pages += frontswap_curr_pages();
-#endif
-		if (cur_pages > goal_pages)
-			tgt_pages = cur_pages -
-				((cur_pages - goal_pages) /
-				  selfballoon_downhysteresis);
-		else if (cur_pages < goal_pages)
-			tgt_pages = cur_pages +
-				((goal_pages - cur_pages) /
-				  selfballoon_uphysteresis);
-		/* else if cur_pages == goal_pages, no change */
-		balloon_set_new_target(tgt_pages);
-		reset_timer = true;
-	}
-#ifdef CONFIG_FRONTSWAP
-	if (frontswap_selfshrinking && frontswap_enabled) {
-		frontswap_selfshrink();
-		reset_timer = true;
-	}
-#endif
-	if (reset_timer)
-		schedule_delayed_work(&selfballoon_worker,
-			selfballoon_interval * HZ);
-}
-
-#ifdef CONFIG_SYSFS
-
-#include <linux/sysdev.h>
-#include <linux/capability.h>
-
-#define SELFBALLOON_SHOW(name, format, args...)				\
-	static ssize_t show_##name(struct sys_device *dev,	\
-					   struct sysdev_attribute *attr, \
-					   char *buf) \
-	{ \
-		return sprintf(buf, format, ##args); \
-	}
-
-SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled);
-
-static ssize_t store_selfballooning(struct sys_device *dev,
-			    struct sysdev_attribute *attr,
-			    const char *buf,
-			    size_t count)
-{
-	bool was_enabled = xen_selfballooning_enabled;
-	unsigned long tmp;
-	int err;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	err = strict_strtoul(buf, 10, &tmp);
-	if (err || ((tmp != 0) && (tmp != 1)))
-		return -EINVAL;
-
-	xen_selfballooning_enabled = !!tmp;
-	if (!was_enabled && xen_selfballooning_enabled)
-		schedule_delayed_work(&selfballoon_worker,
-			selfballoon_interval * HZ);
-
-	return count;
-}
-
-static SYSDEV_ATTR(selfballooning, S_IRUGO | S_IWUSR,
-		   show_selfballooning, store_selfballooning);
-
-SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval);
-
-static ssize_t store_selfballoon_interval(struct sys_device *dev,
-					  struct sysdev_attribute *attr,
-					  const char *buf,
-					  size_t count)
-{
-	unsigned long val;
-	int err;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
-		return -EINVAL;
-	selfballoon_interval = val;
-	return count;
-}
-
-static SYSDEV_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR,
-		   show_selfballoon_interval, store_selfballoon_interval);
-
-SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis);
-
-static ssize_t store_selfballoon_downhys(struct sys_device *dev,
-					 struct sysdev_attribute *attr,
-					 const char *buf,
-					 size_t count)
-{
-	unsigned long val;
-	int err;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
-		return -EINVAL;
-	selfballoon_downhysteresis = val;
-	return count;
-}
-
-static SYSDEV_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR,
-		   show_selfballoon_downhys, store_selfballoon_downhys);
-
-
-SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis);
-
-static ssize_t store_selfballoon_uphys(struct sys_device *dev,
-				       struct sysdev_attribute *attr,
-				       const char *buf,
-				       size_t count)
-{
-	unsigned long val;
-	int err;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
-		return -EINVAL;
-	selfballoon_uphysteresis = val;
-	return count;
-}
-
-static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR,
-		   show_selfballoon_uphys, store_selfballoon_uphys);
-
-#ifdef CONFIG_FRONTSWAP
-SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking);
-
-static ssize_t store_frontswap_selfshrinking(struct sys_device *dev,
-					     struct sysdev_attribute *attr,
-					     const char *buf,
-					     size_t count)
-{
-	bool was_enabled = frontswap_selfshrinking;
-	unsigned long tmp;
-	int err;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-	err = strict_strtoul(buf, 10, &tmp);
-	if (err || ((tmp != 0) && (tmp != 1)))
-		return -EINVAL;
-	frontswap_selfshrinking = !!tmp;
-	if (!was_enabled && !xen_selfballooning_enabled &&
-	     frontswap_selfshrinking)
-		schedule_delayed_work(&selfballoon_worker,
-			selfballoon_interval * HZ);
-
-	return count;
-}
-
-static SYSDEV_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR,
-		   show_frontswap_selfshrinking, store_frontswap_selfshrinking);
-
-SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia);
-
-static ssize_t store_frontswap_inertia(struct sys_device *dev,
-				       struct sysdev_attribute *attr,
-				       const char *buf,
-				       size_t count)
-{
-	unsigned long val;
-	int err;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
-		return -EINVAL;
-	frontswap_inertia = val;
-	frontswap_inertia_counter = val;
-	return count;
-}
-
-static SYSDEV_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR,
-		   show_frontswap_inertia, store_frontswap_inertia);
-
-SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis);
-
-static ssize_t store_frontswap_hysteresis(struct sys_device *dev,
-					  struct sysdev_attribute *attr,
-					  const char *buf,
-					  size_t count)
-{
-	unsigned long val;
-	int err;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
-		return -EINVAL;
-	frontswap_hysteresis = val;
-	return count;
-}
-
-static SYSDEV_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR,
-		   show_frontswap_hysteresis, store_frontswap_hysteresis);
-
-#endif /* CONFIG_FRONTSWAP */
-
-static struct attribute *selfballoon_attrs[] = {
-	&attr_selfballooning.attr,
-	&attr_selfballoon_interval.attr,
-	&attr_selfballoon_downhysteresis.attr,
-	&attr_selfballoon_uphysteresis.attr,
-#ifdef CONFIG_FRONTSWAP
-	&attr_frontswap_selfshrinking.attr,
-	&attr_frontswap_hysteresis.attr,
-	&attr_frontswap_inertia.attr,
-#endif
-	NULL
-};
-
-static struct attribute_group selfballoon_group = {
-	.name = "selfballoon",
-	.attrs = selfballoon_attrs
-};
-#endif
-
-int register_xen_selfballooning(struct sys_device *sysdev)
-{
-	int error = -1;
-
-#ifdef CONFIG_SYSFS
-	error = sysfs_create_group(&sysdev->kobj, &selfballoon_group);
-#endif
-	return error;
-}
-EXPORT_SYMBOL(register_xen_selfballooning);
-
-static int __init xen_selfballoon_init(void)
-{
-	bool enable = false;
-
-	if (!xen_domain())
-		return -ENODEV;
-
-	if (xen_initial_domain()) {
-		pr_info("xen/balloon: Xen selfballooning driver "
-				"disabled for domain0.\n");
-		return -ENODEV;
-	}
-
-	xen_selfballooning_enabled = tmem_enabled && use_selfballooning;
-	if (xen_selfballooning_enabled) {
-		pr_info("xen/balloon: Initializing Xen "
-					"selfballooning driver.\n");
-		enable = true;
-	}
-#ifdef CONFIG_FRONTSWAP
-	frontswap_selfshrinking = tmem_enabled && use_frontswap_selfshrink;
-	if (frontswap_selfshrinking) {
-		pr_info("xen/balloon: Initializing frontswap "
-					"selfshrinking driver.\n");
-		enable = true;
-	}
-#endif
-	if (!enable)
-		return -ENODEV;
-
-	schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ);
-
-	return 0;
-}
-
-subsys_initcall(xen_selfballoon_init);
-
-MODULE_LICENSE("GPL");
diff --git a/trunk/drivers/xen/xenbus/xenbus_probe.c b/trunk/drivers/xen/xenbus/xenbus_probe.c
index bd2f90c9ac8b..739769551e33 100644
--- a/trunk/drivers/xen/xenbus/xenbus_probe.c
+++ b/trunk/drivers/xen/xenbus/xenbus_probe.c
@@ -378,32 +378,26 @@ static void xenbus_dev_release(struct device *dev)
 		kfree(to_xenbus_device(dev));
 }
 
-static ssize_t nodename_show(struct device *dev,
-			     struct device_attribute *attr, char *buf)
+static ssize_t xendev_show_nodename(struct device *dev,
+				    struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
 }
+static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
 
-static ssize_t devtype_show(struct device *dev,
-			    struct device_attribute *attr, char *buf)
+static ssize_t xendev_show_devtype(struct device *dev,
+				   struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
 }
+static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
 
-static ssize_t modalias_show(struct device *dev,
-			     struct device_attribute *attr, char *buf)
+static ssize_t xendev_show_modalias(struct device *dev,
+				    struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%s:%s\n", dev->bus->name,
-		       to_xenbus_device(dev)->devicetype);
+	return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
 }
-
-struct device_attribute xenbus_dev_attrs[] = {
-	__ATTR_RO(nodename),
-	__ATTR_RO(devtype),
-	__ATTR_RO(modalias),
-	__ATTR_NULL
-};
-EXPORT_SYMBOL_GPL(xenbus_dev_attrs);
+static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
 
 int xenbus_probe_node(struct xen_bus_type *bus,
 		      const char *type,
@@ -455,7 +449,25 @@ int xenbus_probe_node(struct xen_bus_type *bus,
 	if (err)
 		goto fail;
 
+	err = device_create_file(&xendev->dev, &dev_attr_nodename);
+	if (err)
+		goto fail_unregister;
+
+	err = device_create_file(&xendev->dev, &dev_attr_devtype);
+	if (err)
+		goto fail_remove_nodename;
+
+	err = device_create_file(&xendev->dev, &dev_attr_modalias);
+	if (err)
+		goto fail_remove_devtype;
+
 	return 0;
+fail_remove_devtype:
+	device_remove_file(&xendev->dev, &dev_attr_devtype);
+fail_remove_nodename:
+	device_remove_file(&xendev->dev, &dev_attr_nodename);
+fail_unregister:
+	device_unregister(&xendev->dev);
 fail:
 	kfree(xendev);
 	return err;
diff --git a/trunk/drivers/xen/xenbus/xenbus_probe.h b/trunk/drivers/xen/xenbus/xenbus_probe.h
index b814935378c7..888b9900ca08 100644
--- a/trunk/drivers/xen/xenbus/xenbus_probe.h
+++ b/trunk/drivers/xen/xenbus/xenbus_probe.h
@@ -48,8 +48,6 @@ struct xen_bus_type
 	struct bus_type bus;
 };
 
-extern struct device_attribute xenbus_dev_attrs[];
-
 extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
 extern int xenbus_dev_probe(struct device *_dev);
 extern int xenbus_dev_remove(struct device *_dev);
diff --git a/trunk/drivers/xen/xenbus/xenbus_probe_backend.c b/trunk/drivers/xen/xenbus/xenbus_probe_backend.c
index 60adf919d78d..6cf467bf63ec 100644
--- a/trunk/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/trunk/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -107,9 +107,6 @@ static int xenbus_uevent_backend(struct device *dev,
 	if (xdev == NULL)
 		return -ENODEV;
 
-	if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype))
-		return -ENOMEM;
-
 	/* stuff we want to pass to /sbin/hotplug */
 	if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype))
 		return -ENOMEM;
@@ -186,6 +183,10 @@ static void frontend_changed(struct xenbus_watch *watch,
 	xenbus_otherend_changed(watch, vec, len, 0);
 }
 
+static struct device_attribute xenbus_backend_dev_attrs[] = {
+	__ATTR_NULL
+};
+
 static struct xen_bus_type xenbus_backend = {
 	.root = "backend",
 	.levels = 3,		/* backend/type/<frontend>/<id> */
@@ -199,7 +200,7 @@ static struct xen_bus_type xenbus_backend = {
 		.probe		= xenbus_dev_probe,
 		.remove		= xenbus_dev_remove,
 		.shutdown	= xenbus_dev_shutdown,
-		.dev_attrs	= xenbus_dev_attrs,
+		.dev_attrs	= xenbus_backend_dev_attrs,
 	},
 };
 
diff --git a/trunk/drivers/xen/xenbus/xenbus_probe_frontend.c b/trunk/drivers/xen/xenbus/xenbus_probe_frontend.c
index ed2ba474a560..b6a2690c9d49 100644
--- a/trunk/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/trunk/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -81,6 +81,10 @@ static void backend_changed(struct xenbus_watch *watch,
 	xenbus_otherend_changed(watch, vec, len, 1);
 }
 
+static struct device_attribute xenbus_frontend_dev_attrs[] = {
+	__ATTR_NULL
+};
+
 static const struct dev_pm_ops xenbus_pm_ops = {
 	.suspend	= xenbus_dev_suspend,
 	.resume		= xenbus_dev_resume,
@@ -102,7 +106,7 @@ static struct xen_bus_type xenbus_frontend = {
 		.probe		= xenbus_dev_probe,
 		.remove		= xenbus_dev_remove,
 		.shutdown	= xenbus_dev_shutdown,
-		.dev_attrs	= xenbus_dev_attrs,
+		.dev_attrs	= xenbus_frontend_dev_attrs,
 
 		.pm		= &xenbus_pm_ops,
 	},
diff --git a/trunk/fs/dlm/ast.c b/trunk/fs/dlm/ast.c
index 90e5997262ea..abc49f292454 100644
--- a/trunk/fs/dlm/ast.c
+++ b/trunk/fs/dlm/ast.c
@@ -14,9 +14,17 @@
 #include "dlm_internal.h"
 #include "lock.h"
 #include "user.h"
+#include "ast.h"
+
+#define WAKE_ASTS  0
+
+static uint64_t			ast_seq_count;
+static struct list_head		ast_queue;
+static spinlock_t		ast_queue_lock;
+static struct task_struct *	astd_task;
+static unsigned long		astd_wakeflags;
+static struct mutex		astd_running;
 
-static uint64_t			dlm_cb_seq;
-static spinlock_t		dlm_cb_seq_spin;
 
 static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb)
 {
@@ -49,13 +57,21 @@ static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb)
 	}
 }
 
+void dlm_del_ast(struct dlm_lkb *lkb)
+{
+	spin_lock(&ast_queue_lock);
+	if (!list_empty(&lkb->lkb_astqueue))
+		list_del_init(&lkb->lkb_astqueue);
+	spin_unlock(&ast_queue_lock);
+}
+
 int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
 			 int status, uint32_t sbflags, uint64_t seq)
 {
 	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
 	uint64_t prev_seq;
 	int prev_mode;
-	int i, rv;
+	int i;
 
 	for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
 		if (lkb->lkb_callbacks[i].seq)
@@ -84,8 +100,7 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
 					  mode,
 					  (unsigned long long)prev_seq,
 					  prev_mode);
-				rv = 0;
-				goto out;
+				return 0;
 			}
 		}
 
@@ -94,7 +109,6 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
 		lkb->lkb_callbacks[i].mode = mode;
 		lkb->lkb_callbacks[i].sb_status = status;
 		lkb->lkb_callbacks[i].sb_flags = (sbflags & 0x000000FF);
-		rv = 0;
 		break;
 	}
 
@@ -103,24 +117,21 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
 			  lkb->lkb_id, (unsigned long long)seq,
 			  flags, mode, status, sbflags);
 		dlm_dump_lkb_callbacks(lkb);
-		rv = -1;
-		goto out;
+		return -1;
 	}
- out:
-	return rv;
+
+	return 0;
 }
 
 int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
 			 struct dlm_callback *cb, int *resid)
 {
-	int i, rv;
+	int i;
 
 	*resid = 0;
 
-	if (!lkb->lkb_callbacks[0].seq) {
-		rv = -ENOENT;
-		goto out;
-	}
+	if (!lkb->lkb_callbacks[0].seq)
+		return -ENOENT;
 
 	/* oldest undelivered cb is callbacks[0] */
 
@@ -152,8 +163,7 @@ int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
 				  cb->mode,
 				  (unsigned long long)lkb->lkb_last_cast.seq,
 				  lkb->lkb_last_cast.mode);
-			rv = 0;
-			goto out;
+			return 0;
 		}
 	}
 
@@ -166,150 +176,171 @@ int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
 		memcpy(&lkb->lkb_last_bast, cb, sizeof(struct dlm_callback));
 		lkb->lkb_last_bast_time = ktime_get();
 	}
-	rv = 0;
- out:
-	return rv;
+
+	return 0;
 }
 
-void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
-		uint32_t sbflags)
+void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
+		 uint32_t sbflags)
 {
-	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
-	uint64_t new_seq, prev_seq;
+	uint64_t seq;
 	int rv;
 
-	spin_lock(&dlm_cb_seq_spin);
-	new_seq = ++dlm_cb_seq;
-	spin_unlock(&dlm_cb_seq_spin);
+	spin_lock(&ast_queue_lock);
+
+	seq = ++ast_seq_count;
 
 	if (lkb->lkb_flags & DLM_IFL_USER) {
-		dlm_user_add_ast(lkb, flags, mode, status, sbflags, new_seq);
+		spin_unlock(&ast_queue_lock);
+		dlm_user_add_ast(lkb, flags, mode, status, sbflags, seq);
 		return;
 	}
 
-	mutex_lock(&lkb->lkb_cb_mutex);
-	prev_seq = lkb->lkb_callbacks[0].seq;
-
-	rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, new_seq);
-	if (rv < 0)
-		goto out;
+	rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, seq);
+	if (rv < 0) {
+		spin_unlock(&ast_queue_lock);
+		return;
+	}
 
-	if (!prev_seq) {
+	if (list_empty(&lkb->lkb_astqueue)) {
 		kref_get(&lkb->lkb_ref);
-
-		if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) {
-			mutex_lock(&ls->ls_cb_mutex);
-			list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay);
-			mutex_unlock(&ls->ls_cb_mutex);
-		} else {
-			queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
-		}
+		list_add_tail(&lkb->lkb_astqueue, &ast_queue);
 	}
- out:
-	mutex_unlock(&lkb->lkb_cb_mutex);
+	spin_unlock(&ast_queue_lock);
+
+	set_bit(WAKE_ASTS, &astd_wakeflags);
+	wake_up_process(astd_task);
 }
 
-void dlm_callback_work(struct work_struct *work)
+static void process_asts(void)
 {
-	struct dlm_lkb *lkb = container_of(work, struct dlm_lkb, lkb_cb_work);
-	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+	struct dlm_ls *ls = NULL;
+	struct dlm_rsb *r = NULL;
+	struct dlm_lkb *lkb;
 	void (*castfn) (void *astparam);
 	void (*bastfn) (void *astparam, int mode);
 	struct dlm_callback callbacks[DLM_CALLBACKS_SIZE];
 	int i, rv, resid;
 
-	memset(&callbacks, 0, sizeof(callbacks));
+repeat:
+	spin_lock(&ast_queue_lock);
+	list_for_each_entry(lkb, &ast_queue, lkb_astqueue) {
+		r = lkb->lkb_resource;
+		ls = r->res_ls;
 
-	mutex_lock(&lkb->lkb_cb_mutex);
-	if (!lkb->lkb_callbacks[0].seq) {
-		/* no callback work exists, shouldn't happen */
-		log_error(ls, "dlm_callback_work %x no work", lkb->lkb_id);
-		dlm_print_lkb(lkb);
-		dlm_dump_lkb_callbacks(lkb);
-	}
+		if (dlm_locking_stopped(ls))
+			continue;
 
-	for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
-		rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid);
-		if (rv < 0)
-			break;
-	}
+		/* we remove from astqueue list and remove everything in
+		   lkb_callbacks before releasing the spinlock so empty
+		   lkb_astqueue is always consistent with empty lkb_callbacks */
 
-	if (resid) {
-		/* cbs remain, loop should have removed all, shouldn't happen */
-		log_error(ls, "dlm_callback_work %x resid %d", lkb->lkb_id,
-			  resid);
-		dlm_print_lkb(lkb);
-		dlm_dump_lkb_callbacks(lkb);
-	}
-	mutex_unlock(&lkb->lkb_cb_mutex);
+		list_del_init(&lkb->lkb_astqueue);
 
-	castfn = lkb->lkb_astfn;
-	bastfn = lkb->lkb_bastfn;
+		castfn = lkb->lkb_astfn;
+		bastfn = lkb->lkb_bastfn;
 
-	for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
-		if (!callbacks[i].seq)
-			break;
-		if (callbacks[i].flags & DLM_CB_SKIP) {
-			continue;
-		} else if (callbacks[i].flags & DLM_CB_BAST) {
-			bastfn(lkb->lkb_astparam, callbacks[i].mode);
-		} else if (callbacks[i].flags & DLM_CB_CAST) {
-			lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
-			lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
-			castfn(lkb->lkb_astparam);
+		memset(&callbacks, 0, sizeof(callbacks));
+
+		for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
+			rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid);
+			if (rv < 0)
+				break;
 		}
+		spin_unlock(&ast_queue_lock);
+
+		if (resid) {
+			/* shouldn't happen, for loop should have removed all */
+			log_error(ls, "callback resid %d lkb %x",
+				  resid, lkb->lkb_id);
+		}
+
+		for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
+			if (!callbacks[i].seq)
+				break;
+			if (callbacks[i].flags & DLM_CB_SKIP) {
+				continue;
+			} else if (callbacks[i].flags & DLM_CB_BAST) {
+				bastfn(lkb->lkb_astparam, callbacks[i].mode);
+			} else if (callbacks[i].flags & DLM_CB_CAST) {
+				lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
+				lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
+				castfn(lkb->lkb_astparam);
+			}
+		}
+
+		/* removes ref for ast_queue, may cause lkb to be freed */
+		dlm_put_lkb(lkb);
+
+		cond_resched();
+		goto repeat;
 	}
+	spin_unlock(&ast_queue_lock);
+}
+
+static inline int no_asts(void)
+{
+	int ret;
 
-	/* undo kref_get from dlm_add_callback, may cause lkb to be freed */
-	dlm_put_lkb(lkb);
+	spin_lock(&ast_queue_lock);
+	ret = list_empty(&ast_queue);
+	spin_unlock(&ast_queue_lock);
+	return ret;
 }
 
-int dlm_callback_start(struct dlm_ls *ls)
+static int dlm_astd(void *data)
 {
-	ls->ls_callback_wq = alloc_workqueue("dlm_callback",
-					     WQ_UNBOUND |
-					     WQ_MEM_RECLAIM |
-					     WQ_NON_REENTRANT,
-					     0);
-	if (!ls->ls_callback_wq) {
-		log_print("can't start dlm_callback workqueue");
-		return -ENOMEM;
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!test_bit(WAKE_ASTS, &astd_wakeflags))
+			schedule();
+		set_current_state(TASK_RUNNING);
+
+		mutex_lock(&astd_running);
+		if (test_and_clear_bit(WAKE_ASTS, &astd_wakeflags))
+			process_asts();
+		mutex_unlock(&astd_running);
 	}
 	return 0;
 }
 
-void dlm_callback_stop(struct dlm_ls *ls)
+void dlm_astd_wake(void)
 {
-	if (ls->ls_callback_wq)
-		destroy_workqueue(ls->ls_callback_wq);
+	if (!no_asts()) {
+		set_bit(WAKE_ASTS, &astd_wakeflags);
+		wake_up_process(astd_task);
+	}
 }
 
-void dlm_callback_suspend(struct dlm_ls *ls)
+int dlm_astd_start(void)
 {
-	set_bit(LSFL_CB_DELAY, &ls->ls_flags);
-
-	if (ls->ls_callback_wq)
-		flush_workqueue(ls->ls_callback_wq);
+	struct task_struct *p;
+	int error = 0;
+
+	INIT_LIST_HEAD(&ast_queue);
+	spin_lock_init(&ast_queue_lock);
+	mutex_init(&astd_running);
+
+	p = kthread_run(dlm_astd, NULL, "dlm_astd");
+	if (IS_ERR(p))
+		error = PTR_ERR(p);
+	else
+		astd_task = p;
+	return error;
 }
 
-void dlm_callback_resume(struct dlm_ls *ls)
+void dlm_astd_stop(void)
 {
-	struct dlm_lkb *lkb, *safe;
-	int count = 0;
-
-	clear_bit(LSFL_CB_DELAY, &ls->ls_flags);
-
-	if (!ls->ls_callback_wq)
-		return;
+	kthread_stop(astd_task);
+}
 
-	mutex_lock(&ls->ls_cb_mutex);
-	list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) {
-		list_del_init(&lkb->lkb_cb_list);
-		queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
-		count++;
-	}
-	mutex_unlock(&ls->ls_cb_mutex);
+void dlm_astd_suspend(void)
+{
+	mutex_lock(&astd_running);
+}
 
-	log_debug(ls, "dlm_callback_resume %d", count);
+void dlm_astd_resume(void)
+{
+	mutex_unlock(&astd_running);
 }
 
diff --git a/trunk/fs/dlm/ast.h b/trunk/fs/dlm/ast.h
index 757b551c6820..8aa89c9b5611 100644
--- a/trunk/fs/dlm/ast.h
+++ b/trunk/fs/dlm/ast.h
@@ -18,15 +18,14 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
                          int status, uint32_t sbflags, uint64_t seq);
 int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
                          struct dlm_callback *cb, int *resid);
-void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
-                uint32_t sbflags);
+void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
+		 uint32_t sbflags);
 
-void dlm_callback_work(struct work_struct *work);
-int dlm_callback_start(struct dlm_ls *ls);
-void dlm_callback_stop(struct dlm_ls *ls);
-void dlm_callback_suspend(struct dlm_ls *ls);
-void dlm_callback_resume(struct dlm_ls *ls);
+void dlm_astd_wake(void);
+int dlm_astd_start(void);
+void dlm_astd_stop(void);
+void dlm_astd_suspend(void);
+void dlm_astd_resume(void);
 
 #endif
 
-
diff --git a/trunk/fs/dlm/config.c b/trunk/fs/dlm/config.c
index 6cf72fcc0d0c..9b026ea8baa9 100644
--- a/trunk/fs/dlm/config.c
+++ b/trunk/fs/dlm/config.c
@@ -28,8 +28,7 @@
  * /config/dlm/<cluster>/spaces/<space>/nodes/<node>/weight
  * /config/dlm/<cluster>/comms/<comm>/nodeid
  * /config/dlm/<cluster>/comms/<comm>/local
- * /config/dlm/<cluster>/comms/<comm>/addr      (write only)
- * /config/dlm/<cluster>/comms/<comm>/addr_list (read only)
+ * /config/dlm/<cluster>/comms/<comm>/addr
  * The <cluster> level is useless, but I haven't figured out how to avoid it.
  */
 
@@ -81,7 +80,6 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf,
 				size_t len);
 static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf,
 				size_t len);
-static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf);
 static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf);
 static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf,
 				size_t len);
@@ -94,6 +92,7 @@ struct dlm_cluster {
 	unsigned int cl_tcp_port;
 	unsigned int cl_buffer_size;
 	unsigned int cl_rsbtbl_size;
+	unsigned int cl_lkbtbl_size;
 	unsigned int cl_dirtbl_size;
 	unsigned int cl_recover_timer;
 	unsigned int cl_toss_secs;
@@ -102,13 +101,13 @@ struct dlm_cluster {
 	unsigned int cl_protocol;
 	unsigned int cl_timewarn_cs;
 	unsigned int cl_waitwarn_us;
-	unsigned int cl_new_rsb_count;
 };
 
 enum {
 	CLUSTER_ATTR_TCP_PORT = 0,
 	CLUSTER_ATTR_BUFFER_SIZE,
 	CLUSTER_ATTR_RSBTBL_SIZE,
+	CLUSTER_ATTR_LKBTBL_SIZE,
 	CLUSTER_ATTR_DIRTBL_SIZE,
 	CLUSTER_ATTR_RECOVER_TIMER,
 	CLUSTER_ATTR_TOSS_SECS,
@@ -117,7 +116,6 @@ enum {
 	CLUSTER_ATTR_PROTOCOL,
 	CLUSTER_ATTR_TIMEWARN_CS,
 	CLUSTER_ATTR_WAITWARN_US,
-	CLUSTER_ATTR_NEW_RSB_COUNT,
 };
 
 struct cluster_attribute {
@@ -162,6 +160,7 @@ __CONFIGFS_ATTR(name, 0644, name##_read, name##_write)
 CLUSTER_ATTR(tcp_port, 1);
 CLUSTER_ATTR(buffer_size, 1);
 CLUSTER_ATTR(rsbtbl_size, 1);
+CLUSTER_ATTR(lkbtbl_size, 1);
 CLUSTER_ATTR(dirtbl_size, 1);
 CLUSTER_ATTR(recover_timer, 1);
 CLUSTER_ATTR(toss_secs, 1);
@@ -170,12 +169,12 @@ CLUSTER_ATTR(log_debug, 0);
 CLUSTER_ATTR(protocol, 0);
 CLUSTER_ATTR(timewarn_cs, 1);
 CLUSTER_ATTR(waitwarn_us, 0);
-CLUSTER_ATTR(new_rsb_count, 0);
 
 static struct configfs_attribute *cluster_attrs[] = {
 	[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
 	[CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr,
 	[CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr,
+	[CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr,
 	[CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr,
 	[CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr,
 	[CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr,
@@ -184,7 +183,6 @@ static struct configfs_attribute *cluster_attrs[] = {
 	[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
 	[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
 	[CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
-	[CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr,
 	NULL,
 };
 
@@ -192,7 +190,6 @@ enum {
 	COMM_ATTR_NODEID = 0,
 	COMM_ATTR_LOCAL,
 	COMM_ATTR_ADDR,
-	COMM_ATTR_ADDR_LIST,
 };
 
 struct comm_attribute {
@@ -220,22 +217,14 @@ static struct comm_attribute comm_attr_local = {
 static struct comm_attribute comm_attr_addr = {
 	.attr   = { .ca_owner = THIS_MODULE,
                     .ca_name = "addr",
-                    .ca_mode = S_IWUSR },
+                    .ca_mode = S_IRUGO | S_IWUSR },
 	.store  = comm_addr_write,
 };
 
-static struct comm_attribute comm_attr_addr_list = {
-	.attr   = { .ca_owner = THIS_MODULE,
-                    .ca_name = "addr_list",
-                    .ca_mode = S_IRUGO },
-	.show   = comm_addr_list_read,
-};
-
 static struct configfs_attribute *comm_attrs[] = {
 	[COMM_ATTR_NODEID] = &comm_attr_nodeid.attr,
 	[COMM_ATTR_LOCAL] = &comm_attr_local.attr,
 	[COMM_ATTR_ADDR] = &comm_attr_addr.attr,
-	[COMM_ATTR_ADDR_LIST] = &comm_attr_addr_list.attr,
 	NULL,
 };
 
@@ -446,6 +435,7 @@ static struct config_group *make_cluster(struct config_group *g,
 	cl->cl_tcp_port = dlm_config.ci_tcp_port;
 	cl->cl_buffer_size = dlm_config.ci_buffer_size;
 	cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size;
+	cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size;
 	cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size;
 	cl->cl_recover_timer = dlm_config.ci_recover_timer;
 	cl->cl_toss_secs = dlm_config.ci_toss_secs;
@@ -454,7 +444,6 @@ static struct config_group *make_cluster(struct config_group *g,
 	cl->cl_protocol = dlm_config.ci_protocol;
 	cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
 	cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
-	cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count;
 
 	space_list = &sps->ss_group;
 	comm_list = &cms->cs_group;
@@ -731,50 +720,6 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
 	return len;
 }
 
-static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf)
-{
-	ssize_t s;
-	ssize_t allowance;
-	int i;
-	struct sockaddr_storage *addr;
-	struct sockaddr_in *addr_in;
-	struct sockaddr_in6 *addr_in6;
-	
-	/* Taken from ip6_addr_string() defined in lib/vsprintf.c */
-	char buf0[sizeof("AF_INET6	xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255\n")];
-	
-
-	/* Derived from SIMPLE_ATTR_SIZE of fs/configfs/file.c */
-	allowance = 4096;
-	buf[0] = '\0';
-
-	for (i = 0; i < cm->addr_count; i++) {
-		addr = cm->addr[i];
-
-		switch(addr->ss_family) {
-		case AF_INET:
-			addr_in = (struct sockaddr_in *)addr;
-			s = sprintf(buf0, "AF_INET	%pI4\n", &addr_in->sin_addr.s_addr);
-			break;
-		case AF_INET6:
-			addr_in6 = (struct sockaddr_in6 *)addr;
-			s = sprintf(buf0, "AF_INET6	%pI6\n", &addr_in6->sin6_addr);
-			break;
-		default:
-			s = sprintf(buf0, "%s\n", "<UNKNOWN>");
-			break;
-		}
-		allowance -= s;
-		if (allowance >= 0)
-			strcat(buf, buf0);
-		else {
-			allowance += s;
-			break;
-		}
-	}
-	return 4096 - allowance;
-}
-
 static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
 			 char *buf)
 {
@@ -1038,6 +983,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 #define DEFAULT_TCP_PORT       21064
 #define DEFAULT_BUFFER_SIZE     4096
 #define DEFAULT_RSBTBL_SIZE     1024
+#define DEFAULT_LKBTBL_SIZE     1024
 #define DEFAULT_DIRTBL_SIZE     1024
 #define DEFAULT_RECOVER_TIMER      5
 #define DEFAULT_TOSS_SECS         10
@@ -1046,12 +992,12 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 #define DEFAULT_PROTOCOL           0
 #define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */
 #define DEFAULT_WAITWARN_US	   0
-#define DEFAULT_NEW_RSB_COUNT    128
 
 struct dlm_config_info dlm_config = {
 	.ci_tcp_port = DEFAULT_TCP_PORT,
 	.ci_buffer_size = DEFAULT_BUFFER_SIZE,
 	.ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE,
+	.ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE,
 	.ci_dirtbl_size = DEFAULT_DIRTBL_SIZE,
 	.ci_recover_timer = DEFAULT_RECOVER_TIMER,
 	.ci_toss_secs = DEFAULT_TOSS_SECS,
@@ -1059,7 +1005,6 @@ struct dlm_config_info dlm_config = {
 	.ci_log_debug = DEFAULT_LOG_DEBUG,
 	.ci_protocol = DEFAULT_PROTOCOL,
 	.ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
-	.ci_waitwarn_us = DEFAULT_WAITWARN_US,
-	.ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT
+	.ci_waitwarn_us = DEFAULT_WAITWARN_US
 };
 
diff --git a/trunk/fs/dlm/config.h b/trunk/fs/dlm/config.h
index 3099d0dd26c0..dd0ce24d5a80 100644
--- a/trunk/fs/dlm/config.h
+++ b/trunk/fs/dlm/config.h
@@ -20,6 +20,7 @@ struct dlm_config_info {
 	int ci_tcp_port;
 	int ci_buffer_size;
 	int ci_rsbtbl_size;
+	int ci_lkbtbl_size;
 	int ci_dirtbl_size;
 	int ci_recover_timer;
 	int ci_toss_secs;
@@ -28,7 +29,6 @@ struct dlm_config_info {
 	int ci_protocol;
 	int ci_timewarn_cs;
 	int ci_waitwarn_us;
-	int ci_new_rsb_count;
 };
 
 extern struct dlm_config_info dlm_config;
diff --git a/trunk/fs/dlm/dlm_internal.h b/trunk/fs/dlm/dlm_internal.h
index fe2860c02449..0262451eb9c6 100644
--- a/trunk/fs/dlm/dlm_internal.h
+++ b/trunk/fs/dlm/dlm_internal.h
@@ -37,7 +37,6 @@
 #include <linux/jhash.h>
 #include <linux/miscdevice.h>
 #include <linux/mutex.h>
-#include <linux/idr.h>
 #include <asm/uaccess.h>
 
 #include <linux/dlm.h>
@@ -53,6 +52,7 @@ struct dlm_ls;
 struct dlm_lkb;
 struct dlm_rsb;
 struct dlm_member;
+struct dlm_lkbtable;
 struct dlm_rsbtable;
 struct dlm_dirtable;
 struct dlm_direntry;
@@ -108,6 +108,11 @@ struct dlm_rsbtable {
 	spinlock_t		lock;
 };
 
+struct dlm_lkbtable {
+	struct list_head	list;
+	rwlock_t		lock;
+	uint16_t		counter;
+};
 
 /*
  * Lockspace member (per node in a ls)
@@ -243,18 +248,17 @@ struct dlm_lkb {
 	int8_t			lkb_wait_count;
 	int			lkb_wait_nodeid; /* for debugging */
 
+	struct list_head	lkb_idtbl_list;	/* lockspace lkbtbl */
 	struct list_head	lkb_statequeue;	/* rsb g/c/w list */
 	struct list_head	lkb_rsb_lookup;	/* waiting for rsb lookup */
 	struct list_head	lkb_wait_reply;	/* waiting for remote reply */
+	struct list_head	lkb_astqueue;	/* need ast to be sent */
 	struct list_head	lkb_ownqueue;	/* list of locks for a process */
 	struct list_head	lkb_time_list;
 	ktime_t			lkb_timestamp;
 	ktime_t			lkb_wait_time;
 	unsigned long		lkb_timeout_cs;
 
-	struct mutex		lkb_cb_mutex;
-	struct work_struct	lkb_cb_work;
-	struct list_head	lkb_cb_list; /* for ls_cb_delay or proc->asts */
 	struct dlm_callback	lkb_callbacks[DLM_CALLBACKS_SIZE];
 	struct dlm_callback	lkb_last_cast;
 	struct dlm_callback	lkb_last_bast;
@@ -295,7 +299,7 @@ struct dlm_rsb {
 	int			res_recover_locks_count;
 
 	char			*res_lvbptr;
-	char			res_name[DLM_RESNAME_MAXLEN+1];
+	char			res_name[1];
 };
 
 /* find_rsb() flags */
@@ -461,12 +465,12 @@ struct dlm_ls {
 	unsigned long		ls_scan_time;
 	struct kobject		ls_kobj;
 
-	struct idr		ls_lkbidr;
-	spinlock_t		ls_lkbidr_spin;
-
 	struct dlm_rsbtable	*ls_rsbtbl;
 	uint32_t		ls_rsbtbl_size;
 
+	struct dlm_lkbtable	*ls_lkbtbl;
+	uint32_t		ls_lkbtbl_size;
+
 	struct dlm_dirtable	*ls_dirtbl;
 	uint32_t		ls_dirtbl_size;
 
@@ -479,10 +483,6 @@ struct dlm_ls {
 	struct mutex		ls_timeout_mutex;
 	struct list_head	ls_timeout;
 
-	spinlock_t		ls_new_rsb_spin;
-	int			ls_new_rsb_count;
-	struct list_head	ls_new_rsb;	/* new rsb structs */
-
 	struct list_head	ls_nodes;	/* current nodes in ls */
 	struct list_head	ls_nodes_gone;	/* dead node list, recovery */
 	int			ls_num_nodes;	/* number of nodes in ls */
@@ -506,12 +506,8 @@ struct dlm_ls {
 
 	struct miscdevice       ls_device;
 
-	struct workqueue_struct	*ls_callback_wq;
-
 	/* recovery related */
 
-	struct mutex		ls_cb_mutex;
-	struct list_head	ls_cb_delay; /* save for queue_work later */
 	struct timer_list	ls_timer;
 	struct task_struct	*ls_recoverd_task;
 	struct mutex		ls_recoverd_active;
@@ -548,7 +544,6 @@ struct dlm_ls {
 #define LSFL_RCOM_WAIT		4
 #define LSFL_UEVENT_WAIT	5
 #define LSFL_TIMEWARN		6
-#define LSFL_CB_DELAY		7
 
 /* much of this is just saving user space pointers associated with the
    lock that we pass back to the user lib with an ast */
diff --git a/trunk/fs/dlm/lock.c b/trunk/fs/dlm/lock.c
index 83b5e32514e1..f71d0b5abd95 100644
--- a/trunk/fs/dlm/lock.c
+++ b/trunk/fs/dlm/lock.c
@@ -305,7 +305,7 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
 		rv = -EDEADLK;
 	}
 
-	dlm_add_cb(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags);
+	dlm_add_ast(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags);
 }
 
 static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -319,7 +319,7 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
 	if (is_master_copy(lkb)) {
 		send_bast(r, lkb, rqmode);
 	} else {
-		dlm_add_cb(lkb, DLM_CB_BAST, rqmode, 0, 0);
+		dlm_add_ast(lkb, DLM_CB_BAST, rqmode, 0, 0);
 	}
 }
 
@@ -327,68 +327,19 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
  * Basic operations on rsb's and lkb's
  */
 
-static int pre_rsb_struct(struct dlm_ls *ls)
-{
-	struct dlm_rsb *r1, *r2;
-	int count = 0;
-
-	spin_lock(&ls->ls_new_rsb_spin);
-	if (ls->ls_new_rsb_count > dlm_config.ci_new_rsb_count / 2) {
-		spin_unlock(&ls->ls_new_rsb_spin);
-		return 0;
-	}
-	spin_unlock(&ls->ls_new_rsb_spin);
-
-	r1 = dlm_allocate_rsb(ls);
-	r2 = dlm_allocate_rsb(ls);
-
-	spin_lock(&ls->ls_new_rsb_spin);
-	if (r1) {
-		list_add(&r1->res_hashchain, &ls->ls_new_rsb);
-		ls->ls_new_rsb_count++;
-	}
-	if (r2) {
-		list_add(&r2->res_hashchain, &ls->ls_new_rsb);
-		ls->ls_new_rsb_count++;
-	}
-	count = ls->ls_new_rsb_count;
-	spin_unlock(&ls->ls_new_rsb_spin);
-
-	if (!count)
-		return -ENOMEM;
-	return 0;
-}
-
-/* If ls->ls_new_rsb is empty, return -EAGAIN, so the caller can
-   unlock any spinlocks, go back and call pre_rsb_struct again.
-   Otherwise, take an rsb off the list and return it. */
-
-static int get_rsb_struct(struct dlm_ls *ls, char *name, int len,
-			  struct dlm_rsb **r_ret)
+static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len)
 {
 	struct dlm_rsb *r;
-	int count;
 
-	spin_lock(&ls->ls_new_rsb_spin);
-	if (list_empty(&ls->ls_new_rsb)) {
-		count = ls->ls_new_rsb_count;
-		spin_unlock(&ls->ls_new_rsb_spin);
-		log_debug(ls, "find_rsb retry %d %d %s",
-			  count, dlm_config.ci_new_rsb_count, name);
-		return -EAGAIN;
-	}
-
-	r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain);
-	list_del(&r->res_hashchain);
-	ls->ls_new_rsb_count--;
-	spin_unlock(&ls->ls_new_rsb_spin);
+	r = dlm_allocate_rsb(ls, len);
+	if (!r)
+		return NULL;
 
 	r->res_ls = ls;
 	r->res_length = len;
 	memcpy(r->res_name, name, len);
 	mutex_init(&r->res_mutex);
 
-	INIT_LIST_HEAD(&r->res_hashchain);
 	INIT_LIST_HEAD(&r->res_lookup);
 	INIT_LIST_HEAD(&r->res_grantqueue);
 	INIT_LIST_HEAD(&r->res_convertqueue);
@@ -396,8 +347,7 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len,
 	INIT_LIST_HEAD(&r->res_root_list);
 	INIT_LIST_HEAD(&r->res_recover_list);
 
-	*r_ret = r;
-	return 0;
+	return r;
 }
 
 static int search_rsb_list(struct list_head *head, char *name, int len,
@@ -455,6 +405,16 @@ static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b,
 	return error;
 }
 
+static int search_rsb(struct dlm_ls *ls, char *name, int len, int b,
+		      unsigned int flags, struct dlm_rsb **r_ret)
+{
+	int error;
+	spin_lock(&ls->ls_rsbtbl[b].lock);
+	error = _search_rsb(ls, name, len, b, flags, r_ret);
+	spin_unlock(&ls->ls_rsbtbl[b].lock);
+	return error;
+}
+
 /*
  * Find rsb in rsbtbl and potentially create/add one
  *
@@ -472,48 +432,35 @@ static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b,
 static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
 		    unsigned int flags, struct dlm_rsb **r_ret)
 {
-	struct dlm_rsb *r = NULL;
+	struct dlm_rsb *r = NULL, *tmp;
 	uint32_t hash, bucket;
-	int error;
+	int error = -EINVAL;
 
-	if (namelen > DLM_RESNAME_MAXLEN) {
-		error = -EINVAL;
+	if (namelen > DLM_RESNAME_MAXLEN)
 		goto out;
-	}
 
 	if (dlm_no_directory(ls))
 		flags |= R_CREATE;
 
+	error = 0;
 	hash = jhash(name, namelen, 0);
 	bucket = hash & (ls->ls_rsbtbl_size - 1);
 
- retry:
-	if (flags & R_CREATE) {
-		error = pre_rsb_struct(ls);
-		if (error < 0)
-			goto out;
-	}
-
-	spin_lock(&ls->ls_rsbtbl[bucket].lock);
-
-	error = _search_rsb(ls, name, namelen, bucket, flags, &r);
+	error = search_rsb(ls, name, namelen, bucket, flags, &r);
 	if (!error)
-		goto out_unlock;
+		goto out;
 
 	if (error == -EBADR && !(flags & R_CREATE))
-		goto out_unlock;
+		goto out;
 
 	/* the rsb was found but wasn't a master copy */
 	if (error == -ENOTBLK)
-		goto out_unlock;
+		goto out;
 
-	error = get_rsb_struct(ls, name, namelen, &r);
-	if (error == -EAGAIN) {
-		spin_unlock(&ls->ls_rsbtbl[bucket].lock);
-		goto retry;
-	}
-	if (error)
-		goto out_unlock;
+	error = -ENOMEM;
+	r = create_rsb(ls, name, namelen);
+	if (!r)
+		goto out;
 
 	r->res_hash = hash;
 	r->res_bucket = bucket;
@@ -527,10 +474,18 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
 			nodeid = 0;
 		r->res_nodeid = nodeid;
 	}
+
+	spin_lock(&ls->ls_rsbtbl[bucket].lock);
+	error = _search_rsb(ls, name, namelen, bucket, 0, &tmp);
+	if (!error) {
+		spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+		dlm_free_rsb(r);
+		r = tmp;
+		goto out;
+	}
 	list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list);
-	error = 0;
- out_unlock:
 	spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+	error = 0;
  out:
 	*r_ret = r;
 	return error;
@@ -625,8 +580,9 @@ static void detach_lkb(struct dlm_lkb *lkb)
 
 static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
 {
-	struct dlm_lkb *lkb;
-	int rv, id;
+	struct dlm_lkb *lkb, *tmp;
+	uint32_t lkid = 0;
+	uint16_t bucket;
 
 	lkb = dlm_allocate_lkb(ls);
 	if (!lkb)
@@ -638,42 +594,60 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
 	INIT_LIST_HEAD(&lkb->lkb_ownqueue);
 	INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
 	INIT_LIST_HEAD(&lkb->lkb_time_list);
-	INIT_LIST_HEAD(&lkb->lkb_cb_list);
-	mutex_init(&lkb->lkb_cb_mutex);
-	INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
+	INIT_LIST_HEAD(&lkb->lkb_astqueue);
 
- retry:
-	rv = idr_pre_get(&ls->ls_lkbidr, GFP_NOFS);
-	if (!rv)
-		return -ENOMEM;
+	get_random_bytes(&bucket, sizeof(bucket));
+	bucket &= (ls->ls_lkbtbl_size - 1);
+
+	write_lock(&ls->ls_lkbtbl[bucket].lock);
 
-	spin_lock(&ls->ls_lkbidr_spin);
-	rv = idr_get_new_above(&ls->ls_lkbidr, lkb, 1, &id);
-	if (!rv)
-		lkb->lkb_id = id;
-	spin_unlock(&ls->ls_lkbidr_spin);
+	/* counter can roll over so we must verify lkid is not in use */
 
-	if (rv == -EAGAIN)
-		goto retry;
+	while (lkid == 0) {
+		lkid = (bucket << 16) | ls->ls_lkbtbl[bucket].counter++;
 
-	if (rv < 0) {
-		log_error(ls, "create_lkb idr error %d", rv);
-		return rv;
+		list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list,
+				    lkb_idtbl_list) {
+			if (tmp->lkb_id != lkid)
+				continue;
+			lkid = 0;
+			break;
+		}
 	}
 
+	lkb->lkb_id = lkid;
+	list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list);
+	write_unlock(&ls->ls_lkbtbl[bucket].lock);
+
 	*lkb_ret = lkb;
 	return 0;
 }
 
+static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid)
+{
+	struct dlm_lkb *lkb;
+	uint16_t bucket = (lkid >> 16);
+
+	list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) {
+		if (lkb->lkb_id == lkid)
+			return lkb;
+	}
+	return NULL;
+}
+
 static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret)
 {
 	struct dlm_lkb *lkb;
+	uint16_t bucket = (lkid >> 16);
+
+	if (bucket >= ls->ls_lkbtbl_size)
+		return -EBADSLT;
 
-	spin_lock(&ls->ls_lkbidr_spin);
-	lkb = idr_find(&ls->ls_lkbidr, lkid);
+	read_lock(&ls->ls_lkbtbl[bucket].lock);
+	lkb = __find_lkb(ls, lkid);
 	if (lkb)
 		kref_get(&lkb->lkb_ref);
-	spin_unlock(&ls->ls_lkbidr_spin);
+	read_unlock(&ls->ls_lkbtbl[bucket].lock);
 
 	*lkb_ret = lkb;
 	return lkb ? 0 : -ENOENT;
@@ -694,12 +668,12 @@ static void kill_lkb(struct kref *kref)
 
 static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
 {
-	uint32_t lkid = lkb->lkb_id;
+	uint16_t bucket = (lkb->lkb_id >> 16);
 
-	spin_lock(&ls->ls_lkbidr_spin);
+	write_lock(&ls->ls_lkbtbl[bucket].lock);
 	if (kref_put(&lkb->lkb_ref, kill_lkb)) {
-		idr_remove(&ls->ls_lkbidr, lkid);
-		spin_unlock(&ls->ls_lkbidr_spin);
+		list_del(&lkb->lkb_idtbl_list);
+		write_unlock(&ls->ls_lkbtbl[bucket].lock);
 
 		detach_lkb(lkb);
 
@@ -709,7 +683,7 @@ static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
 		dlm_free_lkb(lkb);
 		return 1;
 	} else {
-		spin_unlock(&ls->ls_lkbidr_spin);
+		write_unlock(&ls->ls_lkbtbl[bucket].lock);
 		return 0;
 	}
 }
@@ -875,7 +849,9 @@ void dlm_scan_waiters(struct dlm_ls *ls)
 
 		if (!num_nodes) {
 			num_nodes = ls->ls_num_nodes;
-			warned = kzalloc(num_nodes * sizeof(int), GFP_KERNEL);
+			warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int));
+			if (warned)
+				memset(warned, 0, num_nodes * sizeof(int));
 		}
 		if (!warned)
 			continue;
@@ -887,7 +863,9 @@ void dlm_scan_waiters(struct dlm_ls *ls)
 			  dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
 	}
 	mutex_unlock(&ls->ls_waiters_mutex);
-	kfree(warned);
+
+	if (warned)
+		kfree(warned);
 
 	if (debug_expired)
 		log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
@@ -2423,6 +2401,9 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
 
 	if (deadlk) {
 		/* it's left on the granted queue */
+		log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s",
+			  lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status,
+			  lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name);
 		revert_lock(r, lkb);
 		queue_cast(r, lkb, -EDEADLK);
 		error = -EDEADLK;
@@ -4012,6 +3993,8 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms)
 	default:
 		log_error(ls, "unknown message type %d", ms->m_type);
 	}
+
+	dlm_astd_wake();
 }
 
 /* If the lockspace is in recovery mode (locking stopped), then normal
@@ -4150,7 +4133,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
 	struct dlm_message *ms_stub;
 	int wait_type, stub_unlock_result, stub_cancel_result;
 
-	ms_stub = kmalloc(sizeof(struct dlm_message), GFP_KERNEL);
+	ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message));
 	if (!ms_stub) {
 		log_error(ls, "dlm_recover_waiters_pre no mem");
 		return;
@@ -4826,7 +4809,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 		goto out_put;
 
 	spin_lock(&ua->proc->locks_spin);
-	/* dlm_user_add_cb() may have already taken lkb off the proc list */
+	/* dlm_user_add_ast() may have already taken lkb off the proc list */
 	if (!list_empty(&lkb->lkb_ownqueue))
 		list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
 	spin_unlock(&ua->proc->locks_spin);
@@ -4963,7 +4946,7 @@ static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
 
 /* We have to release clear_proc_locks mutex before calling unlock_proc_lock()
    (which does lock_rsb) due to deadlock with receiving a message that does
-   lock_rsb followed by dlm_user_add_cb() */
+   lock_rsb followed by dlm_user_add_ast() */
 
 static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
 				     struct dlm_user_proc *proc)
@@ -4986,7 +4969,7 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
 	return lkb;
 }
 
-/* The ls_clear_proc_locks mutex protects against dlm_user_add_cb() which
+/* The ls_clear_proc_locks mutex protects against dlm_user_add_asts() which
    1) references lkb->ua which we free here and 2) adds lkbs to proc->asts,
    which we clear here. */
 
@@ -5028,10 +5011,10 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
 		dlm_put_lkb(lkb);
 	}
 
-	list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) {
+	list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
 		memset(&lkb->lkb_callbacks, 0,
 		       sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
-		list_del_init(&lkb->lkb_cb_list);
+		list_del_init(&lkb->lkb_astqueue);
 		dlm_put_lkb(lkb);
 	}
 
@@ -5070,10 +5053,10 @@ static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
 	spin_unlock(&proc->locks_spin);
 
 	spin_lock(&proc->asts_spin);
-	list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) {
+	list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
 		memset(&lkb->lkb_callbacks, 0,
 		       sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
-		list_del_init(&lkb->lkb_cb_list);
+		list_del_init(&lkb->lkb_astqueue);
 		dlm_put_lkb(lkb);
 	}
 	spin_unlock(&proc->asts_spin);
diff --git a/trunk/fs/dlm/lockspace.c b/trunk/fs/dlm/lockspace.c
index a1d8f1af144b..14cbf4099753 100644
--- a/trunk/fs/dlm/lockspace.c
+++ b/trunk/fs/dlm/lockspace.c
@@ -15,6 +15,7 @@
 #include "lockspace.h"
 #include "member.h"
 #include "recoverd.h"
+#include "ast.h"
 #include "dir.h"
 #include "lowcomms.h"
 #include "config.h"
@@ -23,7 +24,6 @@
 #include "recover.h"
 #include "requestqueue.h"
 #include "user.h"
-#include "ast.h"
 
 static int			ls_count;
 static struct mutex		ls_lock;
@@ -359,10 +359,17 @@ static int threads_start(void)
 {
 	int error;
 
+	/* Thread which process lock requests for all lockspace's */
+	error = dlm_astd_start();
+	if (error) {
+		log_print("cannot start dlm_astd thread %d", error);
+		goto fail;
+	}
+
 	error = dlm_scand_start();
 	if (error) {
 		log_print("cannot start dlm_scand thread %d", error);
-		goto fail;
+		goto astd_fail;
 	}
 
 	/* Thread for sending/receiving messages for all lockspace's */
@@ -376,6 +383,8 @@ static int threads_start(void)
 
  scand_fail:
 	dlm_scand_stop();
+ astd_fail:
+	dlm_astd_stop();
  fail:
 	return error;
 }
@@ -384,6 +393,7 @@ static void threads_stop(void)
 {
 	dlm_scand_stop();
 	dlm_lowcomms_stop();
+	dlm_astd_stop();
 }
 
 static int new_lockspace(const char *name, int namelen, void **lockspace,
@@ -453,7 +463,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
 	size = dlm_config.ci_rsbtbl_size;
 	ls->ls_rsbtbl_size = size;
 
-	ls->ls_rsbtbl = vmalloc(sizeof(struct dlm_rsbtable) * size);
+	ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_NOFS);
 	if (!ls->ls_rsbtbl)
 		goto out_lsfree;
 	for (i = 0; i < size; i++) {
@@ -462,13 +472,22 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
 		spin_lock_init(&ls->ls_rsbtbl[i].lock);
 	}
 
-	idr_init(&ls->ls_lkbidr);
-	spin_lock_init(&ls->ls_lkbidr_spin);
+	size = dlm_config.ci_lkbtbl_size;
+	ls->ls_lkbtbl_size = size;
+
+	ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_NOFS);
+	if (!ls->ls_lkbtbl)
+		goto out_rsbfree;
+	for (i = 0; i < size; i++) {
+		INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list);
+		rwlock_init(&ls->ls_lkbtbl[i].lock);
+		ls->ls_lkbtbl[i].counter = 1;
+	}
 
 	size = dlm_config.ci_dirtbl_size;
 	ls->ls_dirtbl_size = size;
 
-	ls->ls_dirtbl = vmalloc(sizeof(struct dlm_dirtable) * size);
+	ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_NOFS);
 	if (!ls->ls_dirtbl)
 		goto out_lkbfree;
 	for (i = 0; i < size; i++) {
@@ -483,9 +502,6 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
 	INIT_LIST_HEAD(&ls->ls_timeout);
 	mutex_init(&ls->ls_timeout_mutex);
 
-	INIT_LIST_HEAD(&ls->ls_new_rsb);
-	spin_lock_init(&ls->ls_new_rsb_spin);
-
 	INIT_LIST_HEAD(&ls->ls_nodes);
 	INIT_LIST_HEAD(&ls->ls_nodes_gone);
 	ls->ls_num_nodes = 0;
@@ -504,9 +520,6 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
 	init_completion(&ls->ls_members_done);
 	ls->ls_members_result = -1;
 
-	mutex_init(&ls->ls_cb_mutex);
-	INIT_LIST_HEAD(&ls->ls_cb_delay);
-
 	ls->ls_recoverd_task = NULL;
 	mutex_init(&ls->ls_recoverd_active);
 	spin_lock_init(&ls->ls_recover_lock);
@@ -540,26 +553,18 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
 	list_add(&ls->ls_list, &lslist);
 	spin_unlock(&lslist_lock);
 
-	if (flags & DLM_LSFL_FS) {
-		error = dlm_callback_start(ls);
-		if (error) {
-			log_error(ls, "can't start dlm_callback %d", error);
-			goto out_delist;
-		}
-	}
-
 	/* needs to find ls in lslist */
 	error = dlm_recoverd_start(ls);
 	if (error) {
 		log_error(ls, "can't start dlm_recoverd %d", error);
-		goto out_callback;
+		goto out_delist;
 	}
 
 	ls->ls_kobj.kset = dlm_kset;
 	error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
 				     "%s", ls->ls_name);
 	if (error)
-		goto out_recoverd;
+		goto out_stop;
 	kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
 
 	/* let kobject handle freeing of ls if there's an error */
@@ -573,7 +578,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
 
 	error = do_uevent(ls, 1);
 	if (error)
-		goto out_recoverd;
+		goto out_stop;
 
 	wait_for_completion(&ls->ls_members_done);
 	error = ls->ls_members_result;
@@ -590,20 +595,19 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
 	do_uevent(ls, 0);
 	dlm_clear_members(ls);
 	kfree(ls->ls_node_array);
- out_recoverd:
+ out_stop:
 	dlm_recoverd_stop(ls);
- out_callback:
-	dlm_callback_stop(ls);
  out_delist:
 	spin_lock(&lslist_lock);
 	list_del(&ls->ls_list);
 	spin_unlock(&lslist_lock);
 	kfree(ls->ls_recover_buf);
  out_dirfree:
-	vfree(ls->ls_dirtbl);
+	kfree(ls->ls_dirtbl);
  out_lkbfree:
-	idr_destroy(&ls->ls_lkbidr);
-	vfree(ls->ls_rsbtbl);
+	kfree(ls->ls_lkbtbl);
+ out_rsbfree:
+	kfree(ls->ls_rsbtbl);
  out_lsfree:
 	if (do_unreg)
 		kobject_put(&ls->ls_kobj);
@@ -637,64 +641,50 @@ int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
 	return error;
 }
 
-static int lkb_idr_is_local(int id, void *p, void *data)
-{
-	struct dlm_lkb *lkb = p;
-
-	if (!lkb->lkb_nodeid)
-		return 1;
-	return 0;
-}
-
-static int lkb_idr_is_any(int id, void *p, void *data)
-{
-	return 1;
-}
-
-static int lkb_idr_free(int id, void *p, void *data)
-{
-	struct dlm_lkb *lkb = p;
-
-	if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
-		dlm_free_lvb(lkb->lkb_lvbptr);
-
-	dlm_free_lkb(lkb);
-	return 0;
-}
-
-/* NOTE: We check the lkbidr here rather than the resource table.
-   This is because there may be LKBs queued as ASTs that have been unlinked
-   from their RSBs and are pending deletion once the AST has been delivered */
-
-static int lockspace_busy(struct dlm_ls *ls, int force)
-{
-	int rv;
-
-	spin_lock(&ls->ls_lkbidr_spin);
-	if (force == 0) {
-		rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls);
-	} else if (force == 1) {
-		rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls);
-	} else {
-		rv = 0;
+/* Return 1 if the lockspace still has active remote locks,
+ *        2 if the lockspace still has active local locks.
+ */
+static int lockspace_busy(struct dlm_ls *ls)
+{
+	int i, lkb_found = 0;
+	struct dlm_lkb *lkb;
+
+	/* NOTE: We check the lockidtbl here rather than the resource table.
+	   This is because there may be LKBs queued as ASTs that have been
+	   unlinked from their RSBs and are pending deletion once the AST has
+	   been delivered */
+
+	for (i = 0; i < ls->ls_lkbtbl_size; i++) {
+		read_lock(&ls->ls_lkbtbl[i].lock);
+		if (!list_empty(&ls->ls_lkbtbl[i].list)) {
+			lkb_found = 1;
+			list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list,
+					    lkb_idtbl_list) {
+				if (!lkb->lkb_nodeid) {
+					read_unlock(&ls->ls_lkbtbl[i].lock);
+					return 2;
+				}
+			}
+		}
+		read_unlock(&ls->ls_lkbtbl[i].lock);
 	}
-	spin_unlock(&ls->ls_lkbidr_spin);
-	return rv;
+	return lkb_found;
 }
 
 static int release_lockspace(struct dlm_ls *ls, int force)
 {
+	struct dlm_lkb *lkb;
 	struct dlm_rsb *rsb;
 	struct list_head *head;
 	int i, busy, rv;
 
-	busy = lockspace_busy(ls, force);
+	busy = lockspace_busy(ls);
 
 	spin_lock(&lslist_lock);
 	if (ls->ls_create_count == 1) {
-		if (busy) {
+		if (busy > force)
 			rv = -EBUSY;
-		} else {
+		else {
 			/* remove_lockspace takes ls off lslist */
 			ls->ls_create_count = 0;
 			rv = 0;
@@ -718,12 +708,12 @@ static int release_lockspace(struct dlm_ls *ls, int force)
 
 	dlm_recoverd_stop(ls);
 
-	dlm_callback_stop(ls);
-
 	remove_lockspace(ls);
 
 	dlm_delete_debug_file(ls);
 
+	dlm_astd_suspend();
+
 	kfree(ls->ls_recover_buf);
 
 	/*
@@ -731,15 +721,31 @@ static int release_lockspace(struct dlm_ls *ls, int force)
 	 */
 
 	dlm_dir_clear(ls);
-	vfree(ls->ls_dirtbl);
+	kfree(ls->ls_dirtbl);
 
 	/*
-	 * Free all lkb's in idr
+	 * Free all lkb's on lkbtbl[] lists.
 	 */
 
-	idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
-	idr_remove_all(&ls->ls_lkbidr);
-	idr_destroy(&ls->ls_lkbidr);
+	for (i = 0; i < ls->ls_lkbtbl_size; i++) {
+		head = &ls->ls_lkbtbl[i].list;
+		while (!list_empty(head)) {
+			lkb = list_entry(head->next, struct dlm_lkb,
+					 lkb_idtbl_list);
+
+			list_del(&lkb->lkb_idtbl_list);
+
+			dlm_del_ast(lkb);
+
+			if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
+				dlm_free_lvb(lkb->lkb_lvbptr);
+
+			dlm_free_lkb(lkb);
+		}
+	}
+	dlm_astd_resume();
+
+	kfree(ls->ls_lkbtbl);
 
 	/*
 	 * Free all rsb's on rsbtbl[] lists
@@ -764,14 +770,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
 		}
 	}
 
-	vfree(ls->ls_rsbtbl);
-
-	while (!list_empty(&ls->ls_new_rsb)) {
-		rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb,
-				       res_hashchain);
-		list_del(&rsb->res_hashchain);
-		dlm_free_rsb(rsb);
-	}
+	kfree(ls->ls_rsbtbl);
 
 	/*
 	 * Free structures on any other lists
diff --git a/trunk/fs/dlm/lowcomms.c b/trunk/fs/dlm/lowcomms.c
index 990626e7da80..5e2c71f05e46 100644
--- a/trunk/fs/dlm/lowcomms.c
+++ b/trunk/fs/dlm/lowcomms.c
@@ -512,10 +512,12 @@ static void process_sctp_notification(struct connection *con,
 			}
 			make_sockaddr(&prim.ssp_addr, 0, &addr_len);
 			if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
+				int i;
 				unsigned char *b=(unsigned char *)&prim.ssp_addr;
 				log_print("reject connect from unknown addr");
-				print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 
-						     b, sizeof(struct sockaddr_storage));
+				for (i=0; i<sizeof(struct sockaddr_storage);i++)
+					printk("%02x ", b[i]);
+				printk("\n");
 				sctp_send_shutdown(prim.ssp_assoc_id);
 				return;
 			}
@@ -746,10 +748,7 @@ static int tcp_accept_from_sock(struct connection *con)
 	/* Get the new node's NODEID */
 	make_sockaddr(&peeraddr, 0, &len);
 	if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
-		unsigned char *b=(unsigned char *)&peeraddr;
 		log_print("connect from non cluster node");
-		print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 
-				     b, sizeof(struct sockaddr_storage));
 		sock_release(newsock);
 		mutex_unlock(&con->sock_mutex);
 		return -1;
diff --git a/trunk/fs/dlm/memory.c b/trunk/fs/dlm/memory.c
index da64df7576e1..8e0d00db004f 100644
--- a/trunk/fs/dlm/memory.c
+++ b/trunk/fs/dlm/memory.c
@@ -16,7 +16,6 @@
 #include "memory.h"
 
 static struct kmem_cache *lkb_cache;
-static struct kmem_cache *rsb_cache;
 
 
 int __init dlm_memory_init(void)
@@ -27,14 +26,6 @@ int __init dlm_memory_init(void)
 				__alignof__(struct dlm_lkb), 0, NULL);
 	if (!lkb_cache)
 		ret = -ENOMEM;
-
-	rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb),
-				__alignof__(struct dlm_rsb), 0, NULL);
-	if (!rsb_cache) {
-		kmem_cache_destroy(lkb_cache);
-		ret = -ENOMEM;
-	}
-
 	return ret;
 }
 
@@ -42,8 +33,6 @@ void dlm_memory_exit(void)
 {
 	if (lkb_cache)
 		kmem_cache_destroy(lkb_cache);
-	if (rsb_cache)
-		kmem_cache_destroy(rsb_cache);
 }
 
 char *dlm_allocate_lvb(struct dlm_ls *ls)
@@ -59,11 +48,16 @@ void dlm_free_lvb(char *p)
 	kfree(p);
 }
 
-struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls)
+/* FIXME: have some minimal space built-in to rsb for the name and
+   kmalloc a separate name if needed, like dentries are done */
+
+struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen)
 {
 	struct dlm_rsb *r;
 
-	r = kmem_cache_zalloc(rsb_cache, GFP_NOFS);
+	DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,);
+
+	r = kzalloc(sizeof(*r) + namelen, GFP_NOFS);
 	return r;
 }
 
@@ -71,7 +65,7 @@ void dlm_free_rsb(struct dlm_rsb *r)
 {
 	if (r->res_lvbptr)
 		dlm_free_lvb(r->res_lvbptr);
-	kmem_cache_free(rsb_cache, r);
+	kfree(r);
 }
 
 struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls)
diff --git a/trunk/fs/dlm/memory.h b/trunk/fs/dlm/memory.h
index 177c11cbb0a6..485fb29143bd 100644
--- a/trunk/fs/dlm/memory.h
+++ b/trunk/fs/dlm/memory.h
@@ -16,7 +16,7 @@
 
 int dlm_memory_init(void);
 void dlm_memory_exit(void);
-struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls);
+struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen);
 void dlm_free_rsb(struct dlm_rsb *r);
 struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls);
 void dlm_free_lkb(struct dlm_lkb *l);
diff --git a/trunk/fs/dlm/recoverd.c b/trunk/fs/dlm/recoverd.c
index 774da3cf92c6..fd677c8c3d3b 100644
--- a/trunk/fs/dlm/recoverd.c
+++ b/trunk/fs/dlm/recoverd.c
@@ -58,7 +58,13 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 	mutex_lock(&ls->ls_recoverd_active);
 
-	dlm_callback_suspend(ls);
+	/*
+	 * Suspending and resuming dlm_astd ensures that no lkb's from this ls
+	 * will be processed by dlm_astd during recovery.
+	 */
+
+	dlm_astd_suspend();
+	dlm_astd_resume();
 
 	/*
 	 * Free non-master tossed rsb's.  Master rsb's are kept on toss
@@ -196,8 +202,6 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 	dlm_adjust_timeouts(ls);
 
-	dlm_callback_resume(ls);
-
 	error = enable_locking(ls, rv->seq);
 	if (error) {
 		log_debug(ls, "enable_locking failed %d", error);
@@ -218,6 +222,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 	dlm_grant_after_purge(ls);
 
+	dlm_astd_wake();
+
 	log_debug(ls, "recover %llx done: %u ms",
 		  (unsigned long long)rv->seq,
 		  jiffies_to_msecs(jiffies - start));
diff --git a/trunk/fs/dlm/user.c b/trunk/fs/dlm/user.c
index d8ea60756403..e96bf3e9be88 100644
--- a/trunk/fs/dlm/user.c
+++ b/trunk/fs/dlm/user.c
@@ -213,9 +213,9 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode,
 		goto out;
 	}
 
-	if (list_empty(&lkb->lkb_cb_list)) {
+	if (list_empty(&lkb->lkb_astqueue)) {
 		kref_get(&lkb->lkb_ref);
-		list_add_tail(&lkb->lkb_cb_list, &proc->asts);
+		list_add_tail(&lkb->lkb_astqueue, &proc->asts);
 		wake_up_interruptible(&proc->wait);
 	}
 	spin_unlock(&proc->asts_spin);
@@ -832,24 +832,24 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
 	}
 
 	/* if we empty lkb_callbacks, we don't want to unlock the spinlock
-	   without removing lkb_cb_list; so empty lkb_cb_list is always
+	   without removing lkb_astqueue; so empty lkb_astqueue is always
 	   consistent with empty lkb_callbacks */
 
-	lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_cb_list);
+	lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue);
 
 	rv = dlm_rem_lkb_callback(lkb->lkb_resource->res_ls, lkb, &cb, &resid);
 	if (rv < 0) {
 		/* this shouldn't happen; lkb should have been removed from
 		   list when resid was zero */
 		log_print("dlm_rem_lkb_callback empty %x", lkb->lkb_id);
-		list_del_init(&lkb->lkb_cb_list);
+		list_del_init(&lkb->lkb_astqueue);
 		spin_unlock(&proc->asts_spin);
 		/* removes ref for proc->asts, may cause lkb to be freed */
 		dlm_put_lkb(lkb);
 		goto try_another;
 	}
 	if (!resid)
-		list_del_init(&lkb->lkb_cb_list);
+		list_del_init(&lkb->lkb_astqueue);
 	spin_unlock(&proc->asts_spin);
 
 	if (cb.flags & DLM_CB_SKIP) {
diff --git a/trunk/fs/gfs2/bmap.c b/trunk/fs/gfs2/bmap.c
index 42e477f31223..e65493a8ac00 100644
--- a/trunk/fs/gfs2/bmap.c
+++ b/trunk/fs/gfs2/bmap.c
@@ -854,7 +854,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 			blen++;
 		else {
 			if (bstart) {
-				__gfs2_free_blocks(ip, bstart, blen, metadata);
+				if (metadata)
+					__gfs2_free_meta(ip, bstart, blen);
+				else
+					__gfs2_free_data(ip, bstart, blen);
+
 				btotal += blen;
 			}
 
@@ -866,7 +870,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 		gfs2_add_inode_blocks(&ip->i_inode, -1);
 	}
 	if (bstart) {
-		__gfs2_free_blocks(ip, bstart, blen, metadata);
+		if (metadata)
+			__gfs2_free_meta(ip, bstart, blen);
+		else
+			__gfs2_free_data(ip, bstart, blen);
+
 		btotal += blen;
 	}
 
diff --git a/trunk/fs/gfs2/dir.c b/trunk/fs/gfs2/dir.c
index 1cc2f8ec52a2..091ee4779538 100644
--- a/trunk/fs/gfs2/dir.c
+++ b/trunk/fs/gfs2/dir.c
@@ -339,67 +339,6 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
 	return (copied) ? copied : error;
 }
 
-/**
- * gfs2_dir_get_hash_table - Get pointer to the dir hash table
- * @ip: The inode in question
- *
- * Returns: The hash table or an error
- */
-
-static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
-{
-	struct inode *inode = &ip->i_inode;
-	int ret;
-	u32 hsize;
-	__be64 *hc;
-
-	BUG_ON(!(ip->i_diskflags & GFS2_DIF_EXHASH));
-
-	hc = ip->i_hash_cache;
-	if (hc)
-		return hc;
-
-	hsize = 1 << ip->i_depth;
-	hsize *= sizeof(__be64);
-	if (hsize != i_size_read(&ip->i_inode)) {
-		gfs2_consist_inode(ip);
-		return ERR_PTR(-EIO);
-	}
-
-	hc = kmalloc(hsize, GFP_NOFS);
-	ret = -ENOMEM;
-	if (hc == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	ret = gfs2_dir_read_data(ip, (char *)hc, 0, hsize, 1);
-	if (ret < 0) {
-		kfree(hc);
-		return ERR_PTR(ret);
-	}
-
-	spin_lock(&inode->i_lock);
-	if (ip->i_hash_cache)
-		kfree(hc);
-	else
-		ip->i_hash_cache = hc;
-	spin_unlock(&inode->i_lock);
-
-	return ip->i_hash_cache;
-}
-
-/**
- * gfs2_dir_hash_inval - Invalidate dir hash
- * @ip: The directory inode
- *
- * Must be called with an exclusive glock, or during glock invalidation.
- */
-void gfs2_dir_hash_inval(struct gfs2_inode *ip)
-{
-	__be64 *hc = ip->i_hash_cache;
-	ip->i_hash_cache = NULL;
-	kfree(hc);
-}
-
 static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
 {
 	return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0;
@@ -747,12 +686,17 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no,
 static int get_leaf_nr(struct gfs2_inode *dip, u32 index,
 		       u64 *leaf_out)
 {
-	__be64 *hash;
+	__be64 leaf_no;
+	int error;
+
+	error = gfs2_dir_read_data(dip, (char *)&leaf_no,
+				    index * sizeof(__be64),
+				    sizeof(__be64), 0);
+	if (error != sizeof(u64))
+		return (error < 0) ? error : -EIO;
+
+	*leaf_out = be64_to_cpu(leaf_no);
 
-	hash = gfs2_dir_get_hash_table(dip);
-	if (IS_ERR(hash))
-		return PTR_ERR(hash);
-	*leaf_out = be64_to_cpu(*(hash + index));
 	return 0;
 }
 
@@ -1022,8 +966,6 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 	for (x = 0; x < half_len; x++)
 		lp[x] = cpu_to_be64(bn);
 
-	gfs2_dir_hash_inval(dip);
-
 	error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64),
 				    half_len * sizeof(u64));
 	if (error != half_len * sizeof(u64)) {
@@ -1110,54 +1052,70 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 
 static int dir_double_exhash(struct gfs2_inode *dip)
 {
+	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct buffer_head *dibh;
 	u32 hsize;
-	u32 hsize_bytes;
-	__be64 *hc;
-	__be64 *hc2, *h;
+	u64 *buf;
+	u64 *from, *to;
+	u64 block;
+	u64 disksize = i_size_read(&dip->i_inode);
 	int x;
 	int error = 0;
 
 	hsize = 1 << dip->i_depth;
-	hsize_bytes = hsize * sizeof(__be64);
+	if (hsize * sizeof(u64) != disksize) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
 
-	hc = gfs2_dir_get_hash_table(dip);
-	if (IS_ERR(hc))
-		return PTR_ERR(hc);
+	/*  Allocate both the "from" and "to" buffers in one big chunk  */
 
-	h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS);
-	if (!hc2)
+	buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS);
+	if (!buf)
 		return -ENOMEM;
 
-	error = gfs2_meta_inode_buffer(dip, &dibh);
-	if (error)
-		goto out_kfree;
+	for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) {
+		error = gfs2_dir_read_data(dip, (char *)buf,
+					    block * sdp->sd_hash_bsize,
+					    sdp->sd_hash_bsize, 1);
+		if (error != sdp->sd_hash_bsize) {
+			if (error >= 0)
+				error = -EIO;
+			goto fail;
+		}
+
+		from = buf;
+		to = (u64 *)((char *)buf + sdp->sd_hash_bsize);
 
-	for (x = 0; x < hsize; x++) {
-		*h++ = *hc;
-		*h++ = *hc;
-		hc++;
+		for (x = sdp->sd_hash_ptrs; x--; from++) {
+			*to++ = *from;	/*  No endianess worries  */
+			*to++ = *from;
+		}
+
+		error = gfs2_dir_write_data(dip,
+					     (char *)buf + sdp->sd_hash_bsize,
+					     block * sdp->sd_sb.sb_bsize,
+					     sdp->sd_sb.sb_bsize);
+		if (error != sdp->sd_sb.sb_bsize) {
+			if (error >= 0)
+				error = -EIO;
+			goto fail;
+		}
 	}
 
-	error = gfs2_dir_write_data(dip, (char *)hc2, 0, hsize_bytes * 2);
-	if (error != (hsize_bytes * 2))
-		goto fail;
+	kfree(buf);
 
-	gfs2_dir_hash_inval(dip);
-	dip->i_hash_cache = hc2;
-	dip->i_depth++;
-	gfs2_dinode_out(dip, dibh->b_data);
-	brelse(dibh);
-	return 0;
+	error = gfs2_meta_inode_buffer(dip, &dibh);
+	if (!gfs2_assert_withdraw(sdp, !error)) {
+		dip->i_depth++;
+		gfs2_dinode_out(dip, dibh->b_data);
+		brelse(dibh);
+	}
+
+	return error;
 
 fail:
-	/* Replace original hash table & size */
-	gfs2_dir_write_data(dip, (char *)hc, 0, hsize_bytes);
-	i_size_write(&dip->i_inode, hsize_bytes);
-	gfs2_dinode_out(dip, dibh->b_data);
-	brelse(dibh);
-out_kfree:
-	kfree(hc2);
+	kfree(buf);
 	return error;
 }
 
@@ -1390,7 +1348,6 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
 	return error;
 }
 
-
 /**
  * dir_e_read - Reads the entries from a directory into a filldir buffer
  * @dip: dinode pointer
@@ -1405,7 +1362,9 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
 		      filldir_t filldir)
 {
 	struct gfs2_inode *dip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	u32 hsize, len = 0;
+	u32 ht_offset, lp_offset, ht_offset_cur = -1;
 	u32 hash, index;
 	__be64 *lp;
 	int copied = 0;
@@ -1413,17 +1372,37 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
 	unsigned depth = 0;
 
 	hsize = 1 << dip->i_depth;
+	if (hsize * sizeof(u64) != i_size_read(inode)) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+
 	hash = gfs2_dir_offset2hash(*offset);
 	index = hash >> (32 - dip->i_depth);
 
-	lp = gfs2_dir_get_hash_table(dip);
-	if (IS_ERR(lp))
-		return PTR_ERR(lp);
+	lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
+	if (!lp)
+		return -ENOMEM;
 
 	while (index < hsize) {
+		lp_offset = index & (sdp->sd_hash_ptrs - 1);
+		ht_offset = index - lp_offset;
+
+		if (ht_offset_cur != ht_offset) {
+			error = gfs2_dir_read_data(dip, (char *)lp,
+						ht_offset * sizeof(__be64),
+						sdp->sd_hash_bsize, 1);
+			if (error != sdp->sd_hash_bsize) {
+				if (error >= 0)
+					error = -EIO;
+				goto out;
+			}
+			ht_offset_cur = ht_offset;
+		}
+
 		error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
 					   &copied, &depth,
-					   be64_to_cpu(lp[index]));
+					   be64_to_cpu(lp[lp_offset]));
 		if (error)
 			break;
 
@@ -1431,6 +1410,8 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
 		index = (index & ~(len - 1)) + len;
 	}
 
+out:
+	kfree(lp);
 	if (error > 0)
 		error = 0;
 	return error;
@@ -1933,22 +1914,43 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
 
 int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
 {
+	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct buffer_head *bh;
 	struct gfs2_leaf *leaf;
 	u32 hsize, len;
+	u32 ht_offset, lp_offset, ht_offset_cur = -1;
 	u32 index = 0, next_index;
 	__be64 *lp;
 	u64 leaf_no;
 	int error = 0, last;
 
 	hsize = 1 << dip->i_depth;
+	if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
 
-	lp = gfs2_dir_get_hash_table(dip);
-	if (IS_ERR(lp))
-		return PTR_ERR(lp);
+	lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
+	if (!lp)
+		return -ENOMEM;
 
 	while (index < hsize) {
-		leaf_no = be64_to_cpu(lp[index]);
+		lp_offset = index & (sdp->sd_hash_ptrs - 1);
+		ht_offset = index - lp_offset;
+
+		if (ht_offset_cur != ht_offset) {
+			error = gfs2_dir_read_data(dip, (char *)lp,
+						ht_offset * sizeof(__be64),
+						sdp->sd_hash_bsize, 1);
+			if (error != sdp->sd_hash_bsize) {
+				if (error >= 0)
+					error = -EIO;
+				goto out;
+			}
+			ht_offset_cur = ht_offset;
+		}
+
+		leaf_no = be64_to_cpu(lp[lp_offset]);
 		if (leaf_no) {
 			error = get_leaf(dip, leaf_no, &bh);
 			if (error)
@@ -1974,6 +1976,7 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
 	}
 
 out:
+	kfree(lp);
 
 	return error;
 }
diff --git a/trunk/fs/gfs2/dir.h b/trunk/fs/gfs2/dir.h
index ff5772fbf024..e686af11becd 100644
--- a/trunk/fs/gfs2/dir.h
+++ b/trunk/fs/gfs2/dir.h
@@ -35,7 +35,6 @@ extern int gfs2_diradd_alloc_required(struct inode *dir,
 				      const struct qstr *filename);
 extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
 				   struct buffer_head **bhp);
-extern void gfs2_dir_hash_inval(struct gfs2_inode *ip);
 
 static inline u32 gfs2_disk_hash(const char *data, int len)
 {
diff --git a/trunk/fs/gfs2/file.c b/trunk/fs/gfs2/file.c
index bc2590ef5fc1..a9f5cbe45cd9 100644
--- a/trunk/fs/gfs2/file.c
+++ b/trunk/fs/gfs2/file.c
@@ -174,9 +174,7 @@ void gfs2_set_inode_flags(struct inode *inode)
 	struct gfs2_inode *ip = GFS2_I(inode);
 	unsigned int flags = inode->i_flags;
 
-	flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
-	if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
-		inode->i_flags |= S_NOSEC;
+	flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
 	if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
 		flags |= S_IMMUTABLE;
 	if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
diff --git a/trunk/fs/gfs2/glock.c b/trunk/fs/gfs2/glock.c
index 88e8a23d0026..1c1336e7b3b2 100644
--- a/trunk/fs/gfs2/glock.c
+++ b/trunk/fs/gfs2/glock.c
@@ -409,10 +409,6 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
 	if (held1 && held2 && list_empty(&gl->gl_holders))
 		clear_bit(GLF_QUEUED, &gl->gl_flags);
 
-	if (new_state != gl->gl_target)
-		/* shorten our minimum hold time */
-		gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
-				       GL_GLOCK_MIN_HOLD);
 	gl->gl_state = new_state;
 	gl->gl_tchange = jiffies;
 }
@@ -672,7 +668,7 @@ static void glock_work_func(struct work_struct *work)
 	    gl->gl_demote_state != LM_ST_EXCLUSIVE) {
 		unsigned long holdtime, now = jiffies;
 
-		holdtime = gl->gl_tchange + gl->gl_hold_time;
+		holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
 		if (time_before(now, holdtime))
 			delay = holdtime - now;
 
@@ -683,14 +679,9 @@ static void glock_work_func(struct work_struct *work)
 	}
 	run_queue(gl, 0);
 	spin_unlock(&gl->gl_spin);
-	if (!delay)
+	if (!delay ||
+	    queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
 		gfs2_glock_put(gl);
-	else {
-		if (gl->gl_name.ln_type != LM_TYPE_INODE)
-			delay = 0;
-		if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
-			gfs2_glock_put(gl);
-	}
 	if (drop_ref)
 		gfs2_glock_put(gl);
 }
@@ -752,7 +743,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 	gl->gl_tchange = jiffies;
 	gl->gl_object = NULL;
 	gl->gl_sbd = sdp;
-	gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
 	INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
 	INIT_WORK(&gl->gl_delete, delete_work_func);
 
@@ -865,15 +855,8 @@ static int gfs2_glock_demote_wait(void *word)
 
 static void wait_on_holder(struct gfs2_holder *gh)
 {
-	unsigned long time1 = jiffies;
-
 	might_sleep();
 	wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
-	if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
-		/* Lengthen the minimum hold time. */
-		gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
-					      GL_GLOCK_HOLD_INCR,
-					      GL_GLOCK_MAX_HOLD);
 }
 
 static void wait_on_demote(struct gfs2_glock *gl)
@@ -1110,9 +1093,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 
 	gfs2_glock_hold(gl);
 	if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
-	    !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
-	    gl->gl_name.ln_type == LM_TYPE_INODE)
-		delay = gl->gl_hold_time;
+	    !test_bit(GLF_DEMOTE, &gl->gl_flags))
+		delay = gl->gl_ops->go_min_hold_time;
 	if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
 		gfs2_glock_put(gl);
 }
@@ -1291,13 +1273,12 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
 	unsigned long now = jiffies;
 
 	gfs2_glock_hold(gl);
-	holdtime = gl->gl_tchange + gl->gl_hold_time;
-	if (test_bit(GLF_QUEUED, &gl->gl_flags) &&
-	    gl->gl_name.ln_type == LM_TYPE_INODE) {
+	holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
+	if (test_bit(GLF_QUEUED, &gl->gl_flags)) {
 		if (time_before(now, holdtime))
 			delay = holdtime - now;
 		if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
-			delay = gl->gl_hold_time;
+			delay = gl->gl_ops->go_min_hold_time;
 	}
 
 	spin_lock(&gl->gl_spin);
@@ -1686,7 +1667,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
 	dtime *= 1000000/HZ; /* demote time in uSec */
 	if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
 		dtime = 0;
-	gfs2_print_dbg(seq, "G:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n",
+	gfs2_print_dbg(seq, "G:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n",
 		  state2str(gl->gl_state),
 		  gl->gl_name.ln_type,
 		  (unsigned long long)gl->gl_name.ln_number,
@@ -1695,7 +1676,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
 		  state2str(gl->gl_demote_state), dtime,
 		  atomic_read(&gl->gl_ail_count),
 		  atomic_read(&gl->gl_revokes),
-		  atomic_read(&gl->gl_ref), gl->gl_hold_time);
+		  atomic_read(&gl->gl_ref));
 
 	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
 		error = dump_holder(seq, gh);
diff --git a/trunk/fs/gfs2/glock.h b/trunk/fs/gfs2/glock.h
index 66707118af25..6b2f757b9281 100644
--- a/trunk/fs/gfs2/glock.h
+++ b/trunk/fs/gfs2/glock.h
@@ -113,12 +113,6 @@ enum {
 
 #define GLR_TRYFAILED		13
 
-#define GL_GLOCK_MAX_HOLD        (long)(HZ / 5)
-#define GL_GLOCK_DFT_HOLD        (long)(HZ / 5)
-#define GL_GLOCK_MIN_HOLD        (long)(10)
-#define GL_GLOCK_HOLD_INCR       (long)(HZ / 20)
-#define GL_GLOCK_HOLD_DECR       (long)(HZ / 40)
-
 struct lm_lockops {
 	const char *lm_proto_name;
 	int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
diff --git a/trunk/fs/gfs2/glops.c b/trunk/fs/gfs2/glops.c
index da21ecaafcc2..2cca29316bd6 100644
--- a/trunk/fs/gfs2/glops.c
+++ b/trunk/fs/gfs2/glops.c
@@ -26,7 +26,6 @@
 #include "rgrp.h"
 #include "util.h"
 #include "trans.h"
-#include "dir.h"
 
 /**
  * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
@@ -219,7 +218,6 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
 		if (ip) {
 			set_bit(GIF_INVALID, &ip->i_flags);
 			forget_all_cached_acls(&ip->i_inode);
-			gfs2_dir_hash_inval(ip);
 		}
 	}
 
@@ -318,8 +316,6 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	ip->i_generation = be64_to_cpu(str->di_generation);
 
 	ip->i_diskflags = be32_to_cpu(str->di_flags);
-	ip->i_eattr = be64_to_cpu(str->di_eattr);
-	/* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */
 	gfs2_set_inode_flags(&ip->i_inode);
 	height = be16_to_cpu(str->di_height);
 	if (unlikely(height > GFS2_MAX_META_HEIGHT))
@@ -332,6 +328,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	ip->i_depth = (u8)depth;
 	ip->i_entries = be32_to_cpu(str->di_entries);
 
+	ip->i_eattr = be64_to_cpu(str->di_eattr);
 	if (S_ISREG(ip->i_inode.i_mode))
 		gfs2_set_aops(&ip->i_inode);
 
@@ -552,6 +549,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
 	.go_lock = inode_go_lock,
 	.go_dump = inode_go_dump,
 	.go_type = LM_TYPE_INODE,
+	.go_min_hold_time = HZ / 5,
 	.go_flags = GLOF_ASPACE,
 };
 
@@ -562,6 +560,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
 	.go_unlock = rgrp_go_unlock,
 	.go_dump = gfs2_rgrp_dump,
 	.go_type = LM_TYPE_RGRP,
+	.go_min_hold_time = HZ / 5,
 	.go_flags = GLOF_ASPACE,
 };
 
diff --git a/trunk/fs/gfs2/incore.h b/trunk/fs/gfs2/incore.h
index 892ac37de8ae..81206e70cbf6 100644
--- a/trunk/fs/gfs2/incore.h
+++ b/trunk/fs/gfs2/incore.h
@@ -163,6 +163,7 @@ struct gfs2_glock_operations {
 	int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
 	void (*go_callback) (struct gfs2_glock *gl);
 	const int go_type;
+	const unsigned long go_min_hold_time;
 	const unsigned long go_flags;
 #define GLOF_ASPACE 1
 };
@@ -220,7 +221,6 @@ struct gfs2_glock {
 
 	unsigned int gl_hash;
 	unsigned long gl_demote_time; /* time of first demote request */
-	long gl_hold_time;
 	struct list_head gl_holders;
 
 	const struct gfs2_glock_operations *gl_ops;
@@ -285,7 +285,6 @@ struct gfs2_inode {
 	u64 i_goal;	/* goal block for allocations */
 	struct rw_semaphore i_rw_mutex;
 	struct list_head i_trunc_list;
-	__be64 *i_hash_cache;
 	u32 i_entries;
 	u32 i_diskflags;
 	u8 i_height;
diff --git a/trunk/fs/gfs2/main.c b/trunk/fs/gfs2/main.c
index 29e1ace7953d..c2b34cd2abe0 100644
--- a/trunk/fs/gfs2/main.c
+++ b/trunk/fs/gfs2/main.c
@@ -41,7 +41,6 @@ static void gfs2_init_inode_once(void *foo)
 	init_rwsem(&ip->i_rw_mutex);
 	INIT_LIST_HEAD(&ip->i_trunc_list);
 	ip->i_alloc = NULL;
-	ip->i_hash_cache = NULL;
 }
 
 static void gfs2_init_glock_once(void *foo)
diff --git a/trunk/fs/gfs2/ops_fstype.c b/trunk/fs/gfs2/ops_fstype.c
index 516516e0c2a2..2a77071fb7b6 100644
--- a/trunk/fs/gfs2/ops_fstype.c
+++ b/trunk/fs/gfs2/ops_fstype.c
@@ -1094,7 +1094,6 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
 	if (sdp->sd_args.ar_nobarrier)
 		set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
 
-	sb->s_flags |= MS_NOSEC;
 	sb->s_magic = GFS2_MAGIC;
 	sb->s_op = &gfs2_super_ops;
 	sb->s_d_op = &gfs2_dops;
diff --git a/trunk/fs/gfs2/rgrp.c b/trunk/fs/gfs2/rgrp.c
index 7f8af1eb02de..9b780df3fd54 100644
--- a/trunk/fs/gfs2/rgrp.c
+++ b/trunk/fs/gfs2/rgrp.c
@@ -1607,15 +1607,14 @@ int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation)
 }
 
 /**
- * __gfs2_free_blocks - free a contiguous run of block(s)
+ * gfs2_free_data - free a contiguous run of data block(s)
  * @ip: the inode these blocks are being freed from
  * @bstart: first block of a run of contiguous blocks
  * @blen: the length of the block run
- * @meta: 1 if the blocks represent metadata
  *
  */
 
-void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
+void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *rgd;
@@ -1632,10 +1631,53 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
 	gfs2_trans_add_rg(rgd);
 
 	/* Directories keep their data in the metadata address space */
-	if (meta || ip->i_depth)
+	if (ip->i_depth)
 		gfs2_meta_wipe(ip, bstart, blen);
 }
 
+/**
+ * gfs2_free_data - free a contiguous run of data block(s)
+ * @ip: the inode these blocks are being freed from
+ * @bstart: first block of a run of contiguous blocks
+ * @blen: the length of the block run
+ *
+ */
+
+void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+
+	__gfs2_free_data(ip, bstart, blen);
+	gfs2_statfs_change(sdp, 0, +blen, 0);
+	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
+}
+
+/**
+ * gfs2_free_meta - free a contiguous run of data block(s)
+ * @ip: the inode these blocks are being freed from
+ * @bstart: first block of a run of contiguous blocks
+ * @blen: the length of the block run
+ *
+ */
+
+void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	struct gfs2_rgrpd *rgd;
+
+	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
+	if (!rgd)
+		return;
+	trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
+	rgd->rd_free += blen;
+
+	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
+	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+
+	gfs2_trans_add_rg(rgd);
+	gfs2_meta_wipe(ip, bstart, blen);
+}
+
 /**
  * gfs2_free_meta - free a contiguous run of data block(s)
  * @ip: the inode these blocks are being freed from
@@ -1648,7 +1690,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 
-	__gfs2_free_blocks(ip, bstart, blen, 1);
+	__gfs2_free_meta(ip, bstart, blen);
 	gfs2_statfs_change(sdp, 0, +blen, 0);
 	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
 }
diff --git a/trunk/fs/gfs2/rgrp.h b/trunk/fs/gfs2/rgrp.h
index d253f9a8c70e..a80e3034ac47 100644
--- a/trunk/fs/gfs2/rgrp.h
+++ b/trunk/fs/gfs2/rgrp.h
@@ -52,7 +52,9 @@ extern int gfs2_ri_update(struct gfs2_inode *ip);
 extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
 extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
 
-extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
+extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
 extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
 extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
 extern void gfs2_unlink_di(struct inode *inode);
diff --git a/trunk/fs/gfs2/super.c b/trunk/fs/gfs2/super.c
index b7beadd9ba4c..fb0edf735483 100644
--- a/trunk/fs/gfs2/super.c
+++ b/trunk/fs/gfs2/super.c
@@ -1533,7 +1533,7 @@ static void gfs2_evict_inode(struct inode *inode)
 	/* Case 3 starts here */
 	truncate_inode_pages(&inode->i_data, 0);
 	end_writeback(inode);
-	gfs2_dir_hash_inval(ip);
+
 	ip->i_gl->gl_object = NULL;
 	gfs2_glock_add_to_lru(ip->i_gl);
 	gfs2_glock_put(ip->i_gl);
diff --git a/trunk/fs/hfsplus/brec.c b/trunk/fs/hfsplus/brec.c
index 2a734cfccc92..2312de34bd42 100644
--- a/trunk/fs/hfsplus/brec.c
+++ b/trunk/fs/hfsplus/brec.c
@@ -43,10 +43,6 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec)
 			node->tree->node_size - (rec + 1) * 2);
 		if (!recoff)
 			return 0;
-		if (recoff > node->tree->node_size - 2) {
-			printk(KERN_ERR "hfs: recoff %d too large\n", recoff);
-			return 0;
-		}
 
 		retval = hfs_bnode_read_u16(node, recoff) + 2;
 		if (retval > node->tree->max_key_len + 2) {
diff --git a/trunk/fs/hfsplus/catalog.c b/trunk/fs/hfsplus/catalog.c
index 4dfbfec357e8..b4ba1b319333 100644
--- a/trunk/fs/hfsplus/catalog.c
+++ b/trunk/fs/hfsplus/catalog.c
@@ -212,9 +212,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
 
 	dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n",
 		str->name, cnid, inode->i_nlink);
-	err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
-	if (err)
-		return err;
+	hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
 
 	hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
 	entry_size = hfsplus_fill_cat_thread(sb, &entry,
@@ -271,9 +269,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
 
 	dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n",
 		str ? str->name : NULL, cnid);
-	err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
-	if (err)
-		return err;
+	hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
 
 	if (!str) {
 		int len;
@@ -351,14 +347,12 @@ int hfsplus_rename_cat(u32 cnid,
 	struct hfs_find_data src_fd, dst_fd;
 	hfsplus_cat_entry entry;
 	int entry_size, type;
-	int err;
+	int err = 0;
 
 	dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n",
 		cnid, src_dir->i_ino, src_name->name,
 		dst_dir->i_ino, dst_name->name);
-	err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd);
-	if (err)
-		return err;
+	hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd);
 	dst_fd = src_fd;
 
 	/* find the old dir entry and read the data */
diff --git a/trunk/fs/hfsplus/dir.c b/trunk/fs/hfsplus/dir.c
index 25b2443a004c..4df5059c25da 100644
--- a/trunk/fs/hfsplus/dir.c
+++ b/trunk/fs/hfsplus/dir.c
@@ -38,9 +38,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
 	sb = dir->i_sb;
 
 	dentry->d_fsdata = NULL;
-	err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
-	if (err)
-		return ERR_PTR(err);
+	hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
 	hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
 again:
 	err = hfs_brec_read(&fd, &entry, sizeof(entry));
@@ -134,9 +132,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	if (filp->f_pos >= inode->i_size)
 		return 0;
 
-	err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
-	if (err)
-		return err;
+	hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
 	hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
 	err = hfs_brec_find(&fd);
 	if (err)
diff --git a/trunk/fs/hfsplus/extents.c b/trunk/fs/hfsplus/extents.c
index 5849e3ef35cc..b1991a2a08e0 100644
--- a/trunk/fs/hfsplus/extents.c
+++ b/trunk/fs/hfsplus/extents.c
@@ -119,31 +119,22 @@ static void __hfsplus_ext_write_extent(struct inode *inode,
 	set_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags);
 }
 
-static int hfsplus_ext_write_extent_locked(struct inode *inode)
+static void hfsplus_ext_write_extent_locked(struct inode *inode)
 {
-	int res;
-
 	if (HFSPLUS_I(inode)->extent_state & HFSPLUS_EXT_DIRTY) {
 		struct hfs_find_data fd;
 
-		res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
-		if (res)
-			return res;
+		hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
 		__hfsplus_ext_write_extent(inode, &fd);
 		hfs_find_exit(&fd);
 	}
-	return 0;
 }
 
-int hfsplus_ext_write_extent(struct inode *inode)
+void hfsplus_ext_write_extent(struct inode *inode)
 {
-	int res;
-
 	mutex_lock(&HFSPLUS_I(inode)->extents_lock);
-	res = hfsplus_ext_write_extent_locked(inode);
+	hfsplus_ext_write_extent_locked(inode);
 	mutex_unlock(&HFSPLUS_I(inode)->extents_lock);
-
-	return res;
 }
 
 static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd,
@@ -203,11 +194,9 @@ static int hfsplus_ext_read_extent(struct inode *inode, u32 block)
 	    block < hip->cached_start + hip->cached_blocks)
 		return 0;
 
-	res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
-	if (!res) {
-		res = __hfsplus_ext_cache_extent(&fd, inode, block);
-		hfs_find_exit(&fd);
-	}
+	hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
+	res = __hfsplus_ext_cache_extent(&fd, inode, block);
+	hfs_find_exit(&fd);
 	return res;
 }
 
@@ -220,7 +209,6 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
 	struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
 	int res = -EIO;
 	u32 ablock, dblock, mask;
-	sector_t sector;
 	int was_dirty = 0;
 	int shift;
 
@@ -267,12 +255,10 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
 done:
 	dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n",
 		inode->i_ino, (long long)iblock, dblock);
-
 	mask = (1 << sbi->fs_shift) - 1;
-	sector = ((sector_t)dblock << sbi->fs_shift) +
-		  sbi->blockoffset + (iblock & mask);
-	map_bh(bh_result, sb, sector);
-
+	map_bh(bh_result, sb,
+		(dblock << sbi->fs_shift) + sbi->blockoffset +
+			(iblock & mask));
 	if (create) {
 		set_buffer_new(bh_result);
 		hip->phys_size += sb->s_blocksize;
@@ -385,9 +371,7 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid,
 	if (total_blocks == blocks)
 		return 0;
 
-	res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
-	if (res)
-		return res;
+	hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
 	do {
 		res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid,
 						total_blocks, type);
@@ -485,9 +469,7 @@ int hfsplus_file_extend(struct inode *inode)
 
 insert_extent:
 	dprint(DBG_EXTENT, "insert new extent\n");
-	res = hfsplus_ext_write_extent_locked(inode);
-	if (res)
-		goto out;
+	hfsplus_ext_write_extent_locked(inode);
 
 	memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec));
 	hip->cached_extents[0].start_block = cpu_to_be32(start);
@@ -518,6 +500,7 @@ void hfsplus_file_truncate(struct inode *inode)
 		struct page *page;
 		void *fsdata;
 		u32 size = inode->i_size;
+		int res;
 
 		res = pagecache_write_begin(NULL, mapping, size, 0,
 						AOP_FLAG_UNINTERRUPTIBLE,
@@ -540,12 +523,7 @@ void hfsplus_file_truncate(struct inode *inode)
 		goto out;
 
 	mutex_lock(&hip->extents_lock);
-	res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
-	if (res) {
-		mutex_unlock(&hip->extents_lock);
-		/* XXX: We lack error handling of hfsplus_file_truncate() */
-		return;
-	}
+	hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
 	while (1) {
 		if (alloc_cnt == hip->first_blocks) {
 			hfsplus_free_extents(sb, hip->first_extents,
diff --git a/trunk/fs/hfsplus/hfsplus_fs.h b/trunk/fs/hfsplus/hfsplus_fs.h
index 81dfd1e495e3..d6857523336d 100644
--- a/trunk/fs/hfsplus/hfsplus_fs.h
+++ b/trunk/fs/hfsplus/hfsplus_fs.h
@@ -13,7 +13,6 @@
 #include <linux/fs.h>
 #include <linux/mutex.h>
 #include <linux/buffer_head.h>
-#include <linux/blkdev.h>
 #include "hfsplus_raw.h"
 
 #define DBG_BNODE_REFS	0x00000001
@@ -111,9 +110,7 @@ struct hfsplus_vh;
 struct hfs_btree;
 
 struct hfsplus_sb_info {
-	void *s_vhdr_buf;
 	struct hfsplus_vh *s_vhdr;
-	void *s_backup_vhdr_buf;
 	struct hfsplus_vh *s_backup_vhdr;
 	struct hfs_btree *ext_tree;
 	struct hfs_btree *cat_tree;
@@ -261,15 +258,6 @@ struct hfsplus_readdir_data {
 	struct hfsplus_cat_key key;
 };
 
-/*
- * Find minimum acceptible I/O size for an hfsplus sb.
- */
-static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
-{
-	return max_t(unsigned short, bdev_logical_block_size(sb->s_bdev),
-		     HFSPLUS_SECTOR_SIZE);
-}
-
 #define hfs_btree_open hfsplus_btree_open
 #define hfs_btree_close hfsplus_btree_close
 #define hfs_btree_write hfsplus_btree_write
@@ -386,7 +374,7 @@ extern const struct file_operations hfsplus_dir_operations;
 
 /* extents.c */
 int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
-int hfsplus_ext_write_extent(struct inode *);
+void hfsplus_ext_write_extent(struct inode *);
 int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int);
 int hfsplus_free_fork(struct super_block *, u32,
 		struct hfsplus_fork_raw *, int);
@@ -448,8 +436,8 @@ int hfsplus_compare_dentry(const struct dentry *parent,
 /* wrapper.c */
 int hfsplus_read_wrapper(struct super_block *);
 int hfs_part_find(struct super_block *, sector_t *, sector_t *);
-int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
-		void *buf, void **data, int rw);
+int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
+		void *data, int rw);
 
 /* time macros */
 #define __hfsp_mt2ut(t)		(be32_to_cpu(t) - 2082844800U)
diff --git a/trunk/fs/hfsplus/inode.c b/trunk/fs/hfsplus/inode.c
index 010cd363d085..b248a6cfcad9 100644
--- a/trunk/fs/hfsplus/inode.c
+++ b/trunk/fs/hfsplus/inode.c
@@ -195,13 +195,11 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir,
 	hip->flags = 0;
 	set_bit(HFSPLUS_I_RSRC, &hip->flags);
 
-	err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
-	if (!err) {
-		err = hfsplus_find_cat(sb, dir->i_ino, &fd);
-		if (!err)
-			err = hfsplus_cat_read_inode(inode, &fd);
-		hfs_find_exit(&fd);
-	}
+	hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+	err = hfsplus_find_cat(sb, dir->i_ino, &fd);
+	if (!err)
+		err = hfsplus_cat_read_inode(inode, &fd);
+	hfs_find_exit(&fd);
 	if (err) {
 		iput(inode);
 		return ERR_PTR(err);
diff --git a/trunk/fs/hfsplus/part_tbl.c b/trunk/fs/hfsplus/part_tbl.c
index eb355d81e279..40ad88c12c64 100644
--- a/trunk/fs/hfsplus/part_tbl.c
+++ b/trunk/fs/hfsplus/part_tbl.c
@@ -88,12 +88,11 @@ static int hfs_parse_old_pmap(struct super_block *sb, struct old_pmap *pm,
 	return -ENOENT;
 }
 
-static int hfs_parse_new_pmap(struct super_block *sb, void *buf,
-		struct new_pmap *pm, sector_t *part_start, sector_t *part_size)
+static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm,
+		sector_t *part_start, sector_t *part_size)
 {
 	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
 	int size = be32_to_cpu(pm->pmMapBlkCnt);
-	int buf_size = hfsplus_min_io_size(sb);
 	int res;
 	int i = 0;
 
@@ -108,14 +107,11 @@ static int hfs_parse_new_pmap(struct super_block *sb, void *buf,
 		if (++i >= size)
 			return -ENOENT;
 
-		pm = (struct new_pmap *)((u8 *)pm + HFSPLUS_SECTOR_SIZE);
-		if ((u8 *)pm - (u8 *)buf >= buf_size) {
-			res = hfsplus_submit_bio(sb,
-						 *part_start + HFS_PMAP_BLK + i,
-						 buf, (void **)&pm, READ);
-			if (res)
-				return res;
-		}
+		res = hfsplus_submit_bio(sb->s_bdev,
+					 *part_start + HFS_PMAP_BLK + i,
+					 pm, READ);
+		if (res)
+			return res;
 	} while (pm->pmSig == cpu_to_be16(HFS_NEW_PMAP_MAGIC));
 
 	return -ENOENT;
@@ -128,15 +124,15 @@ static int hfs_parse_new_pmap(struct super_block *sb, void *buf,
 int hfs_part_find(struct super_block *sb,
 		sector_t *part_start, sector_t *part_size)
 {
-	void *buf, *data;
+	void *data;
 	int res;
 
-	buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
-	if (!buf)
+	data = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
+	if (!data)
 		return -ENOMEM;
 
-	res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK,
-				 buf, &data, READ);
+	res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK,
+				 data, READ);
 	if (res)
 		goto out;
 
@@ -145,13 +141,13 @@ int hfs_part_find(struct super_block *sb,
 		res = hfs_parse_old_pmap(sb, data, part_start, part_size);
 		break;
 	case HFS_NEW_PMAP_MAGIC:
-		res = hfs_parse_new_pmap(sb, buf, data, part_start, part_size);
+		res = hfs_parse_new_pmap(sb, data, part_start, part_size);
 		break;
 	default:
 		res = -ENOENT;
 		break;
 	}
 out:
-	kfree(buf);
+	kfree(data);
 	return res;
 }
diff --git a/trunk/fs/hfsplus/super.c b/trunk/fs/hfsplus/super.c
index c106ca22e812..84a47b709f51 100644
--- a/trunk/fs/hfsplus/super.c
+++ b/trunk/fs/hfsplus/super.c
@@ -73,13 +73,11 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino)
 
 	if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID ||
 	    inode->i_ino == HFSPLUS_ROOT_CNID) {
-		err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
-		if (!err) {
-			err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
-			if (!err)
-				err = hfsplus_cat_read_inode(inode, &fd);
-			hfs_find_exit(&fd);
-		}
+		hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
+		err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
+		if (!err)
+			err = hfsplus_cat_read_inode(inode, &fd);
+		hfs_find_exit(&fd);
 	} else {
 		err = hfsplus_system_read_inode(inode);
 	}
@@ -135,13 +133,9 @@ static int hfsplus_system_write_inode(struct inode *inode)
 static int hfsplus_write_inode(struct inode *inode,
 		struct writeback_control *wbc)
 {
-	int err;
-
 	dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino);
 
-	err = hfsplus_ext_write_extent(inode);
-	if (err)
-		return err;
+	hfsplus_ext_write_extent(inode);
 
 	if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID ||
 	    inode->i_ino == HFSPLUS_ROOT_CNID)
@@ -203,17 +197,17 @@ int hfsplus_sync_fs(struct super_block *sb, int wait)
 		write_backup = 1;
 	}
 
-	error2 = hfsplus_submit_bio(sb,
+	error2 = hfsplus_submit_bio(sb->s_bdev,
 				   sbi->part_start + HFSPLUS_VOLHEAD_SECTOR,
-				   sbi->s_vhdr_buf, NULL, WRITE_SYNC);
+				   sbi->s_vhdr, WRITE_SYNC);
 	if (!error)
 		error = error2;
 	if (!write_backup)
 		goto out;
 
-	error2 = hfsplus_submit_bio(sb,
+	error2 = hfsplus_submit_bio(sb->s_bdev,
 				  sbi->part_start + sbi->sect_count - 2,
-				  sbi->s_backup_vhdr_buf, NULL, WRITE_SYNC);
+				  sbi->s_backup_vhdr, WRITE_SYNC);
 	if (!error)
 		error2 = error;
 out:
@@ -257,8 +251,8 @@ static void hfsplus_put_super(struct super_block *sb)
 	hfs_btree_close(sbi->ext_tree);
 	iput(sbi->alloc_file);
 	iput(sbi->hidden_dir);
-	kfree(sbi->s_vhdr_buf);
-	kfree(sbi->s_backup_vhdr_buf);
+	kfree(sbi->s_vhdr);
+	kfree(sbi->s_backup_vhdr);
 	unload_nls(sbi->nls);
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
@@ -399,13 +393,6 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 	if (!sbi->rsrc_clump_blocks)
 		sbi->rsrc_clump_blocks = 1;
 
-	err = generic_check_addressable(sbi->alloc_blksz_shift,
-					sbi->total_blocks);
-	if (err) {
-		printk(KERN_ERR "hfs: filesystem size too large.\n");
-		goto out_free_vhdr;
-	}
-
 	/* Set up operations so we can load metadata */
 	sb->s_op = &hfsplus_sops;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -430,8 +417,6 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 		sb->s_flags |= MS_RDONLY;
 	}
 
-	err = -EINVAL;
-
 	/* Load metadata objects (B*Trees) */
 	sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID);
 	if (!sbi->ext_tree) {
@@ -462,9 +447,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 
 	str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
 	str.name = HFSP_HIDDENDIR_NAME;
-	err = hfs_find_init(sbi->cat_tree, &fd);
-	if (err)
-		goto out_put_root;
+	hfs_find_init(sbi->cat_tree, &fd);
 	hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
 	if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
 		hfs_find_exit(&fd);
diff --git a/trunk/fs/hfsplus/unicode.c b/trunk/fs/hfsplus/unicode.c
index a32998f29f0b..a3f0bfcc881e 100644
--- a/trunk/fs/hfsplus/unicode.c
+++ b/trunk/fs/hfsplus/unicode.c
@@ -142,11 +142,7 @@ int hfsplus_uni2asc(struct super_block *sb,
 		/* search for single decomposed char */
 		if (likely(compose))
 			ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
-		if (ce1)
-			cc = ce1[0];
-		else
-			cc = 0;
-		if (cc) {
+		if (ce1 && (cc = ce1[0])) {
 			/* start of a possibly decomposed Hangul char */
 			if (cc != 0xffff)
 				goto done;
@@ -213,8 +209,7 @@ int hfsplus_uni2asc(struct super_block *sb,
 				i++;
 				ce2 = ce1;
 			}
-			cc = ce2[0];
-			if (cc) {
+			if ((cc = ce2[0])) {
 				ip += i;
 				ustrlen -= i;
 				goto done;
@@ -306,11 +301,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
 	while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
 		size = asc2unichar(sb, astr, len, &c);
 
-		if (decompose)
-			dstr = decompose_unichar(c, &dsize);
-		else
-			dstr = NULL;
-		if (dstr) {
+		if (decompose && (dstr = decompose_unichar(c, &dsize))) {
 			if (outlen + dsize > HFSPLUS_MAX_STRLEN)
 				break;
 			do {
@@ -355,23 +346,15 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
 		astr += size;
 		len -= size;
 
-		if (decompose)
-			dstr = decompose_unichar(c, &dsize);
-		else
-			dstr = NULL;
-		if (dstr) {
+		if (decompose && (dstr = decompose_unichar(c, &dsize))) {
 			do {
 				c2 = *dstr++;
-				if (casefold)
-					c2 = case_fold(c2);
-				if (!casefold || c2)
+				if (!casefold || (c2 = case_fold(c2)))
 					hash = partial_name_hash(c2, hash);
 			} while (--dsize > 0);
 		} else {
 			c2 = c;
-			if (casefold)
-				c2 = case_fold(c2);
-			if (!casefold || c2)
+			if (!casefold || (c2 = case_fold(c2)))
 				hash = partial_name_hash(c2, hash);
 		}
 	}
@@ -439,14 +422,12 @@ int hfsplus_compare_dentry(const struct dentry *parent,
 		c1 = *dstr1;
 		c2 = *dstr2;
 		if (casefold) {
-			c1 = case_fold(c1);
-			if (!c1) {
+			if  (!(c1 = case_fold(c1))) {
 				dstr1++;
 				dsize1--;
 				continue;
 			}
-			c2 = case_fold(c2);
-			if (!c2) {
+			if (!(c2 = case_fold(c2))) {
 				dstr2++;
 				dsize2--;
 				continue;
diff --git a/trunk/fs/hfsplus/wrapper.c b/trunk/fs/hfsplus/wrapper.c
index 10e515a0d452..4ac88ff79aa6 100644
--- a/trunk/fs/hfsplus/wrapper.c
+++ b/trunk/fs/hfsplus/wrapper.c
@@ -31,67 +31,25 @@ static void hfsplus_end_io_sync(struct bio *bio, int err)
 	complete(bio->bi_private);
 }
 
-/*
- * hfsplus_submit_bio - Perfrom block I/O
- * @sb: super block of volume for I/O
- * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes
- * @buf: buffer for I/O
- * @data: output pointer for location of requested data
- * @rw: direction of I/O
- *
- * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than
- * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads
- * @data will return a pointer to the start of the requested sector,
- * which may not be the same location as @buf.
- *
- * If @sector is not aligned to the bdev logical block size it will
- * be rounded down. For writes this means that @buf should contain data
- * that starts at the rounded-down address. As long as the data was
- * read using hfsplus_submit_bio() and the same buffer is used things
- * will work correctly.
- */
-int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
-		void *buf, void **data, int rw)
+int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
+		void *data, int rw)
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct bio *bio;
 	int ret = 0;
-	unsigned int io_size;
-	loff_t start;
-	int offset;
-
-	/*
-	 * Align sector to hardware sector size and find offset. We
-	 * assume that io_size is a power of two, which _should_
-	 * be true.
-	 */
-	io_size = hfsplus_min_io_size(sb);
-	start = (loff_t)sector << HFSPLUS_SECTOR_SHIFT;
-	offset = start & (io_size - 1);
-	sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1);
 
 	bio = bio_alloc(GFP_NOIO, 1);
 	bio->bi_sector = sector;
-	bio->bi_bdev = sb->s_bdev;
+	bio->bi_bdev = bdev;
 	bio->bi_end_io = hfsplus_end_io_sync;
 	bio->bi_private = &wait;
 
-	if (!(rw & WRITE) && data)
-		*data = (u8 *)buf + offset;
-
-	while (io_size > 0) {
-		unsigned int page_offset = offset_in_page(buf);
-		unsigned int len = min_t(unsigned int, PAGE_SIZE - page_offset,
-					 io_size);
-
-		ret = bio_add_page(bio, virt_to_page(buf), len, page_offset);
-		if (ret != len) {
-			ret = -EIO;
-			goto out;
-		}
-		io_size -= len;
-		buf = (u8 *)buf + len;
-	}
+	/*
+	 * We always submit one sector at a time, so bio_add_page must not fail.
+	 */
+	if (bio_add_page(bio, virt_to_page(data), HFSPLUS_SECTOR_SIZE,
+			 offset_in_page(data)) != HFSPLUS_SECTOR_SIZE)
+		BUG();
 
 	submit_bio(rw, bio);
 	wait_for_completion(&wait);
@@ -99,9 +57,8 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
 	if (!bio_flagged(bio, BIO_UPTODATE))
 		ret = -EIO;
 
-out:
 	bio_put(bio);
-	return ret < 0 ? ret : 0;
+	return ret;
 }
 
 static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
@@ -184,19 +141,23 @@ int hfsplus_read_wrapper(struct super_block *sb)
 
 	if (hfsplus_get_last_session(sb, &part_start, &part_size))
 		goto out;
+	if ((u64)part_start + part_size > 0x100000000ULL) {
+		pr_err("hfs: volumes larger than 2TB are not supported yet\n");
+		goto out;
+	}
 
 	error = -ENOMEM;
-	sbi->s_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
-	if (!sbi->s_vhdr_buf)
+	sbi->s_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
+	if (!sbi->s_vhdr)
 		goto out;
-	sbi->s_backup_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
-	if (!sbi->s_backup_vhdr_buf)
+	sbi->s_backup_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
+	if (!sbi->s_backup_vhdr)
 		goto out_free_vhdr;
 
 reread:
-	error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR,
-				   sbi->s_vhdr_buf, (void **)&sbi->s_vhdr,
-				   READ);
+	error = hfsplus_submit_bio(sb->s_bdev,
+				   part_start + HFSPLUS_VOLHEAD_SECTOR,
+				   sbi->s_vhdr, READ);
 	if (error)
 		goto out_free_backup_vhdr;
 
@@ -211,9 +172,8 @@ int hfsplus_read_wrapper(struct super_block *sb)
 		if (!hfsplus_read_mdb(sbi->s_vhdr, &wd))
 			goto out_free_backup_vhdr;
 		wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT;
-		part_start += (sector_t)wd.ablk_start +
-			       (sector_t)wd.embed_start * wd.ablk_size;
-		part_size = (sector_t)wd.embed_count * wd.ablk_size;
+		part_start += wd.ablk_start + wd.embed_start * wd.ablk_size;
+		part_size = wd.embed_count * wd.ablk_size;
 		goto reread;
 	default:
 		/*
@@ -226,9 +186,9 @@ int hfsplus_read_wrapper(struct super_block *sb)
 		goto reread;
 	}
 
-	error = hfsplus_submit_bio(sb, part_start + part_size - 2,
-				   sbi->s_backup_vhdr_buf,
-				   (void **)&sbi->s_backup_vhdr, READ);
+	error = hfsplus_submit_bio(sb->s_bdev,
+				   part_start + part_size - 2,
+				   sbi->s_backup_vhdr, READ);
 	if (error)
 		goto out_free_backup_vhdr;
 
diff --git a/trunk/fs/ubifs/commit.c b/trunk/fs/ubifs/commit.c
index fb3b5c813a30..87cd0ead8633 100644
--- a/trunk/fs/ubifs/commit.c
+++ b/trunk/fs/ubifs/commit.c
@@ -78,7 +78,7 @@ static int nothing_to_commit(struct ubifs_info *c)
 	 * If the root TNC node is dirty, we definitely have something to
 	 * commit.
 	 */
-	if (c->zroot.znode && ubifs_zn_dirty(c->zroot.znode))
+	if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags))
 		return 0;
 
 	/*
@@ -418,7 +418,7 @@ int ubifs_run_commit(struct ubifs_info *c)
 
 	spin_lock(&c->cs_lock);
 	if (c->cmt_state == COMMIT_BROKEN) {
-		err = -EROFS;
+		err = -EINVAL;
 		goto out;
 	}
 
@@ -444,7 +444,7 @@ int ubifs_run_commit(struct ubifs_info *c)
 	 * re-check it.
 	 */
 	if (c->cmt_state == COMMIT_BROKEN) {
-		err = -EROFS;
+		err = -EINVAL;
 		goto out_cmt_unlock;
 	}
 
@@ -576,7 +576,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
 	struct idx_node *i;
 	size_t sz;
 
-	if (!dbg_is_chk_index(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX))
 		return 0;
 
 	INIT_LIST_HEAD(&list);
diff --git a/trunk/fs/ubifs/debug.c b/trunk/fs/ubifs/debug.c
index eef109a1a927..0bb2bcef0de9 100644
--- a/trunk/fs/ubifs/debug.c
+++ b/trunk/fs/ubifs/debug.c
@@ -27,12 +27,13 @@
  * various local functions of those subsystems.
  */
 
+#define UBIFS_DBG_PRESERVE_UBI
+
+#include "ubifs.h"
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/debugfs.h>
 #include <linux/math64.h>
-#include <linux/uaccess.h>
-#include <linux/random.h>
-#include "ubifs.h"
 
 #ifdef CONFIG_UBIFS_FS_DEBUG
 
@@ -41,6 +42,15 @@ DEFINE_SPINLOCK(dbg_lock);
 static char dbg_key_buf0[128];
 static char dbg_key_buf1[128];
 
+unsigned int ubifs_chk_flags;
+unsigned int ubifs_tst_flags;
+
+module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
+module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
+
+MODULE_PARM_DESC(debug_chks, "Debug check flags");
+MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
+
 static const char *get_key_fmt(int fmt)
 {
 	switch (fmt) {
@@ -81,28 +91,6 @@ static const char *get_key_type(int type)
 	}
 }
 
-static const char *get_dent_type(int type)
-{
-	switch (type) {
-	case UBIFS_ITYPE_REG:
-		return "file";
-	case UBIFS_ITYPE_DIR:
-		return "dir";
-	case UBIFS_ITYPE_LNK:
-		return "symlink";
-	case UBIFS_ITYPE_BLK:
-		return "blkdev";
-	case UBIFS_ITYPE_CHR:
-		return "char dev";
-	case UBIFS_ITYPE_FIFO:
-		return "fifo";
-	case UBIFS_ITYPE_SOCK:
-		return "socket";
-	default:
-		return "unknown/invalid type";
-	}
-}
-
 static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key,
 			char *buffer)
 {
@@ -246,13 +234,9 @@ static void dump_ch(const struct ubifs_ch *ch)
 	printk(KERN_DEBUG "\tlen            %u\n", le32_to_cpu(ch->len));
 }
 
-void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode)
+void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode)
 {
 	const struct ubifs_inode *ui = ubifs_inode(inode);
-	struct qstr nm = { .name = NULL };
-	union ubifs_key key;
-	struct ubifs_dent_node *dent, *pdent = NULL;
-	int count = 2;
 
 	printk(KERN_DEBUG "Dump in-memory inode:");
 	printk(KERN_DEBUG "\tinode          %lu\n", inode->i_ino);
@@ -286,32 +270,6 @@ void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode)
 	printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read);
 	printk(KERN_DEBUG "\tread_in_a_row  %lu\n", ui->read_in_a_row);
 	printk(KERN_DEBUG "\tdata_len       %d\n", ui->data_len);
-
-	if (!S_ISDIR(inode->i_mode))
-		return;
-
-	printk(KERN_DEBUG "List of directory entries:\n");
-	ubifs_assert(!mutex_is_locked(&c->tnc_mutex));
-
-	lowest_dent_key(c, &key, inode->i_ino);
-	while (1) {
-		dent = ubifs_tnc_next_ent(c, &key, &nm);
-		if (IS_ERR(dent)) {
-			if (PTR_ERR(dent) != -ENOENT)
-				printk(KERN_DEBUG "error %ld\n", PTR_ERR(dent));
-			break;
-		}
-
-		printk(KERN_DEBUG "\t%d: %s (%s)\n",
-		       count++, dent->name, get_dent_type(dent->type));
-
-		nm.name = dent->name;
-		nm.len = le16_to_cpu(dent->nlen);
-		kfree(pdent);
-		pdent = dent;
-		key_read(c, &dent->key, &key);
-	}
-	kfree(pdent);
 }
 
 void dbg_dump_node(const struct ubifs_info *c, const void *node)
@@ -320,7 +278,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
 	union ubifs_key key;
 	const struct ubifs_ch *ch = node;
 
-	if (dbg_is_tst_rcvry(c))
+	if (dbg_failure_mode)
 		return;
 
 	/* If the magic is incorrect, just hexdump the first bytes */
@@ -876,7 +834,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
 	struct ubifs_scan_node *snod;
 	void *buf;
 
-	if (dbg_is_tst_rcvry(c))
+	if (dbg_failure_mode)
 		return;
 
 	printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
@@ -1122,7 +1080,6 @@ int dbg_check_space_info(struct ubifs_info *c)
 
 /**
  * dbg_check_synced_i_size - check synchronized inode size.
- * @c: UBIFS file-system description object
  * @inode: inode to check
  *
  * If inode is clean, synchronized inode size has to be equivalent to current
@@ -1130,12 +1087,12 @@ int dbg_check_space_info(struct ubifs_info *c)
  * has to be locked). Returns %0 if synchronized inode size if correct, and
  * %-EINVAL if not.
  */
-int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode)
+int dbg_check_synced_i_size(struct inode *inode)
 {
 	int err = 0;
 	struct ubifs_inode *ui = ubifs_inode(inode);
 
-	if (!dbg_is_chk_gen(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
 		return 0;
 	if (!S_ISREG(inode->i_mode))
 		return 0;
@@ -1168,7 +1125,7 @@ int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode)
  * Note, it is good idea to make sure the @dir->i_mutex is locked before
  * calling this function.
  */
-int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
+int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir)
 {
 	unsigned int nlink = 2;
 	union ubifs_key key;
@@ -1176,7 +1133,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
 	struct qstr nm = { .name = NULL };
 	loff_t size = UBIFS_INO_NODE_SZ;
 
-	if (!dbg_is_chk_gen(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
 		return 0;
 
 	if (!S_ISDIR(dir->i_mode))
@@ -1210,14 +1167,12 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
 			  "but calculated size is %llu", dir->i_ino,
 			  (unsigned long long)i_size_read(dir),
 			  (unsigned long long)size);
-		dbg_dump_inode(c, dir);
 		dump_stack();
 		return -EINVAL;
 	}
 	if (dir->i_nlink != nlink) {
 		ubifs_err("directory inode %lu has nlink %u, but calculated "
 			  "nlink is %u", dir->i_ino, dir->i_nlink, nlink);
-		dbg_dump_inode(c, dir);
 		dump_stack();
 		return -EINVAL;
 	}
@@ -1534,7 +1489,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
 	long clean_cnt = 0, dirty_cnt = 0;
 	int err, last;
 
-	if (!dbg_is_chk_index(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_TNC))
 		return 0;
 
 	ubifs_assert(mutex_is_locked(&c->tnc_mutex));
@@ -1781,7 +1736,7 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
 	int err;
 	long long calc = 0;
 
-	if (!dbg_is_chk_index(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ))
 		return 0;
 
 	err = dbg_walk_index(c, NULL, add_size, &calc);
@@ -2357,7 +2312,7 @@ int dbg_check_filesystem(struct ubifs_info *c)
 	int err;
 	struct fsck_data fsckd;
 
-	if (!dbg_is_chk_fs(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_FS))
 		return 0;
 
 	fsckd.inodes = RB_ROOT;
@@ -2392,7 +2347,7 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
 	struct list_head *cur;
 	struct ubifs_scan_node *sa, *sb;
 
-	if (!dbg_is_chk_gen(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
 		return 0;
 
 	for (cur = head->next; cur->next != head; cur = cur->next) {
@@ -2459,7 +2414,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
 	struct list_head *cur;
 	struct ubifs_scan_node *sa, *sb;
 
-	if (!dbg_is_chk_gen(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
 		return 0;
 
 	for (cur = head->next; cur->next != head; cur = cur->next) {
@@ -2536,141 +2491,214 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
 	return 0;
 }
 
-static inline int chance(unsigned int n, unsigned int out_of)
+int dbg_force_in_the_gaps(void)
 {
-	return !!((random32() % out_of) + 1 <= n);
+	if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+		return 0;
 
+	return !(random32() & 7);
 }
 
-static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
+/* Failure mode for recovery testing */
+
+#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d))
+
+struct failure_mode_info {
+	struct list_head list;
+	struct ubifs_info *c;
+};
+
+static LIST_HEAD(fmi_list);
+static DEFINE_SPINLOCK(fmi_lock);
+
+static unsigned int next;
+
+static int simple_rand(void)
 {
-	struct ubifs_debug_info *d = c->dbg;
+	if (next == 0)
+		next = current->pid;
+	next = next * 1103515245 + 12345;
+	return (next >> 16) & 32767;
+}
+
+static void failure_mode_init(struct ubifs_info *c)
+{
+	struct failure_mode_info *fmi;
 
-	ubifs_assert(dbg_is_tst_rcvry(c));
+	fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS);
+	if (!fmi) {
+		ubifs_err("Failed to register failure mode - no memory");
+		return;
+	}
+	fmi->c = c;
+	spin_lock(&fmi_lock);
+	list_add_tail(&fmi->list, &fmi_list);
+	spin_unlock(&fmi_lock);
+}
+
+static void failure_mode_exit(struct ubifs_info *c)
+{
+	struct failure_mode_info *fmi, *tmp;
 
-	if (!d->pc_cnt) {
-		/* First call - decide delay to the power cut */
+	spin_lock(&fmi_lock);
+	list_for_each_entry_safe(fmi, tmp, &fmi_list, list)
+		if (fmi->c == c) {
+			list_del(&fmi->list);
+			kfree(fmi);
+		}
+	spin_unlock(&fmi_lock);
+}
+
+static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc)
+{
+	struct failure_mode_info *fmi;
+
+	spin_lock(&fmi_lock);
+	list_for_each_entry(fmi, &fmi_list, list)
+		if (fmi->c->ubi == desc) {
+			struct ubifs_info *c = fmi->c;
+
+			spin_unlock(&fmi_lock);
+			return c;
+		}
+	spin_unlock(&fmi_lock);
+	return NULL;
+}
+
+static int in_failure_mode(struct ubi_volume_desc *desc)
+{
+	struct ubifs_info *c = dbg_find_info(desc);
+
+	if (c && dbg_failure_mode)
+		return c->dbg->failure_mode;
+	return 0;
+}
+
+static int do_fail(struct ubi_volume_desc *desc, int lnum, int write)
+{
+	struct ubifs_info *c = dbg_find_info(desc);
+	struct ubifs_debug_info *d;
+
+	if (!c || !dbg_failure_mode)
+		return 0;
+	d = c->dbg;
+	if (d->failure_mode)
+		return 1;
+	if (!d->fail_cnt) {
+		/* First call - decide delay to failure */
 		if (chance(1, 2)) {
-			unsigned long delay;
+			unsigned int delay = 1 << (simple_rand() >> 11);
 
 			if (chance(1, 2)) {
-				d->pc_delay = 1;
-				/* Fail withing 1 minute */
-				delay = random32() % 60000;
-				d->pc_timeout = jiffies;
-				d->pc_timeout += msecs_to_jiffies(delay);
-				ubifs_warn("failing after %lums", delay);
+				d->fail_delay = 1;
+				d->fail_timeout = jiffies +
+						  msecs_to_jiffies(delay);
+				dbg_rcvry("failing after %ums", delay);
 			} else {
-				d->pc_delay = 2;
-				delay = random32() % 10000;
-				/* Fail within 10000 operations */
-				d->pc_cnt_max = delay;
-				ubifs_warn("failing after %lu calls", delay);
+				d->fail_delay = 2;
+				d->fail_cnt_max = delay;
+				dbg_rcvry("failing after %u calls", delay);
 			}
 		}
-
-		d->pc_cnt += 1;
+		d->fail_cnt += 1;
 	}
-
 	/* Determine if failure delay has expired */
-	if (d->pc_delay == 1 && time_before(jiffies, d->pc_timeout))
+	if (d->fail_delay == 1) {
+		if (time_before(jiffies, d->fail_timeout))
 			return 0;
-	if (d->pc_delay == 2 && d->pc_cnt++ < d->pc_cnt_max)
+	} else if (d->fail_delay == 2)
+		if (d->fail_cnt++ < d->fail_cnt_max)
 			return 0;
-
 	if (lnum == UBIFS_SB_LNUM) {
-		if (write && chance(1, 2))
-			return 0;
-		if (chance(19, 20))
+		if (write) {
+			if (chance(1, 2))
+				return 0;
+		} else if (chance(19, 20))
 			return 0;
-		ubifs_warn("failing in super block LEB %d", lnum);
+		dbg_rcvry("failing in super block LEB %d", lnum);
 	} else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) {
 		if (chance(19, 20))
 			return 0;
-		ubifs_warn("failing in master LEB %d", lnum);
+		dbg_rcvry("failing in master LEB %d", lnum);
 	} else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) {
-		if (write && chance(99, 100))
+		if (write) {
+			if (chance(99, 100))
+				return 0;
+		} else if (chance(399, 400))
 			return 0;
-		if (chance(399, 400))
-			return 0;
-		ubifs_warn("failing in log LEB %d", lnum);
+		dbg_rcvry("failing in log LEB %d", lnum);
 	} else if (lnum >= c->lpt_first && lnum <= c->lpt_last) {
-		if (write && chance(7, 8))
+		if (write) {
+			if (chance(7, 8))
+				return 0;
+		} else if (chance(19, 20))
 			return 0;
-		if (chance(19, 20))
-			return 0;
-		ubifs_warn("failing in LPT LEB %d", lnum);
+		dbg_rcvry("failing in LPT LEB %d", lnum);
 	} else if (lnum >= c->orph_first && lnum <= c->orph_last) {
-		if (write && chance(1, 2))
-			return 0;
-		if (chance(9, 10))
+		if (write) {
+			if (chance(1, 2))
+				return 0;
+		} else if (chance(9, 10))
 			return 0;
-		ubifs_warn("failing in orphan LEB %d", lnum);
+		dbg_rcvry("failing in orphan LEB %d", lnum);
 	} else if (lnum == c->ihead_lnum) {
 		if (chance(99, 100))
 			return 0;
-		ubifs_warn("failing in index head LEB %d", lnum);
+		dbg_rcvry("failing in index head LEB %d", lnum);
 	} else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) {
 		if (chance(9, 10))
 			return 0;
-		ubifs_warn("failing in GC head LEB %d", lnum);
+		dbg_rcvry("failing in GC head LEB %d", lnum);
 	} else if (write && !RB_EMPTY_ROOT(&c->buds) &&
 		   !ubifs_search_bud(c, lnum)) {
 		if (chance(19, 20))
 			return 0;
-		ubifs_warn("failing in non-bud LEB %d", lnum);
+		dbg_rcvry("failing in non-bud LEB %d", lnum);
 	} else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND ||
 		   c->cmt_state == COMMIT_RUNNING_REQUIRED) {
 		if (chance(999, 1000))
 			return 0;
-		ubifs_warn("failing in bud LEB %d commit running", lnum);
+		dbg_rcvry("failing in bud LEB %d commit running", lnum);
 	} else {
 		if (chance(9999, 10000))
 			return 0;
-		ubifs_warn("failing in bud LEB %d commit not running", lnum);
+		dbg_rcvry("failing in bud LEB %d commit not running", lnum);
 	}
-
-	d->pc_happened = 1;
-	ubifs_warn("========== Power cut emulated ==========");
+	ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum);
+	d->failure_mode = 1;
 	dump_stack();
 	return 1;
 }
 
-static void cut_data(const void *buf, unsigned int len)
+static void cut_data(const void *buf, int len)
 {
-	unsigned int from, to, i, ffs = chance(1, 2);
+	int flen, i;
 	unsigned char *p = (void *)buf;
 
-	from = random32() % (len + 1);
-	if (chance(1, 2))
-		to = random32() % (len - from + 1);
-	else
-		to = len;
-
-	if (from < to)
-		ubifs_warn("filled bytes %u-%u with %s", from, to - 1,
-			   ffs ? "0xFFs" : "random data");
+	flen = (len * (long long)simple_rand()) >> 15;
+	for (i = flen; i < len; i++)
+		p[i] = 0xff;
+}
 
-	if (ffs)
-		for (i = from; i < to; i++)
-			p[i] = 0xFF;
-	else
-		for (i = from; i < to; i++)
-			p[i] = random32() % 0x100;
+int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
+		 int len, int check)
+{
+	if (in_failure_mode(desc))
+		return -EROFS;
+	return ubi_leb_read(desc, lnum, buf, offset, len, check);
 }
 
-int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf,
-		  int offs, int len, int dtype)
+int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
+		  int offset, int len, int dtype)
 {
 	int err, failing;
 
-	if (c->dbg->pc_happened)
+	if (in_failure_mode(desc))
 		return -EROFS;
-
-	failing = power_cut_emulated(c, lnum, 1);
+	failing = do_fail(desc, lnum, 1);
 	if (failing)
 		cut_data(buf, len);
-	err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
+	err = ubi_leb_write(desc, lnum, buf, offset, len, dtype);
 	if (err)
 		return err;
 	if (failing)
@@ -2678,207 +2706,162 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf,
 	return 0;
 }
 
-int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf,
+int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
 		   int len, int dtype)
 {
 	int err;
 
-	if (c->dbg->pc_happened)
+	if (do_fail(desc, lnum, 1))
 		return -EROFS;
-	if (power_cut_emulated(c, lnum, 1))
-		return -EROFS;
-	err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
+	err = ubi_leb_change(desc, lnum, buf, len, dtype);
 	if (err)
 		return err;
-	if (power_cut_emulated(c, lnum, 1))
+	if (do_fail(desc, lnum, 1))
 		return -EROFS;
 	return 0;
 }
 
-int dbg_leb_unmap(struct ubifs_info *c, int lnum)
+int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum)
 {
 	int err;
 
-	if (c->dbg->pc_happened)
-		return -EROFS;
-	if (power_cut_emulated(c, lnum, 0))
+	if (do_fail(desc, lnum, 0))
 		return -EROFS;
-	err = ubi_leb_unmap(c->ubi, lnum);
+	err = ubi_leb_erase(desc, lnum);
 	if (err)
 		return err;
-	if (power_cut_emulated(c, lnum, 0))
+	if (do_fail(desc, lnum, 0))
 		return -EROFS;
 	return 0;
 }
 
-int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype)
+int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum)
 {
 	int err;
 
-	if (c->dbg->pc_happened)
+	if (do_fail(desc, lnum, 0))
 		return -EROFS;
-	if (power_cut_emulated(c, lnum, 0))
-		return -EROFS;
-	err = ubi_leb_map(c->ubi, lnum, dtype);
+	err = ubi_leb_unmap(desc, lnum);
 	if (err)
 		return err;
-	if (power_cut_emulated(c, lnum, 0))
+	if (do_fail(desc, lnum, 0))
 		return -EROFS;
 	return 0;
 }
 
-/*
- * Root directory for UBIFS stuff in debugfs. Contains sub-directories which
- * contain the stuff specific to particular file-system mounts.
- */
-static struct dentry *dfs_rootdir;
+int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
+{
+	if (in_failure_mode(desc))
+		return -EROFS;
+	return ubi_is_mapped(desc, lnum);
+}
 
-static int dfs_file_open(struct inode *inode, struct file *file)
+int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
 {
-	file->private_data = inode->i_private;
-	return nonseekable_open(inode, file);
+	int err;
+
+	if (do_fail(desc, lnum, 0))
+		return -EROFS;
+	err = ubi_leb_map(desc, lnum, dtype);
+	if (err)
+		return err;
+	if (do_fail(desc, lnum, 0))
+		return -EROFS;
+	return 0;
 }
 
 /**
- * provide_user_output - provide output to the user reading a debugfs file.
- * @val: boolean value for the answer
- * @u: the buffer to store the answer at
- * @count: size of the buffer
- * @ppos: position in the @u output buffer
+ * ubifs_debugging_init - initialize UBIFS debugging.
+ * @c: UBIFS file-system description object
  *
- * This is a simple helper function which stores @val boolean value in the user
- * buffer when the user reads one of UBIFS debugfs files. Returns amount of
- * bytes written to @u in case of success and a negative error code in case of
+ * This function initializes debugging-related data for the file system.
+ * Returns zero in case of success and a negative error code in case of
  * failure.
  */
-static int provide_user_output(int val, char __user *u, size_t count,
-			       loff_t *ppos)
+int ubifs_debugging_init(struct ubifs_info *c)
 {
-	char buf[3];
-
-	if (val)
-		buf[0] = '1';
-	else
-		buf[0] = '0';
-	buf[1] = '\n';
-	buf[2] = 0x00;
+	c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL);
+	if (!c->dbg)
+		return -ENOMEM;
 
-	return simple_read_from_buffer(u, count, ppos, buf, 2);
+	failure_mode_init(c);
+	return 0;
 }
 
-static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count,
-			     loff_t *ppos)
+/**
+ * ubifs_debugging_exit - free debugging data.
+ * @c: UBIFS file-system description object
+ */
+void ubifs_debugging_exit(struct ubifs_info *c)
 {
-	struct dentry *dent = file->f_path.dentry;
-	struct ubifs_info *c = file->private_data;
-	struct ubifs_debug_info *d = c->dbg;
-	int val;
-
-	if (dent == d->dfs_chk_gen)
-		val = d->chk_gen;
-	else if (dent == d->dfs_chk_index)
-		val = d->chk_index;
-	else if (dent == d->dfs_chk_orph)
-		val = d->chk_orph;
-	else if (dent == d->dfs_chk_lprops)
-		val = d->chk_lprops;
-	else if (dent == d->dfs_chk_fs)
-		val = d->chk_fs;
-	else if (dent == d->dfs_tst_rcvry)
-		val = d->tst_rcvry;
-	else
-		return -EINVAL;
-
-	return provide_user_output(val, u, count, ppos);
+	failure_mode_exit(c);
+	kfree(c->dbg);
 }
 
+/*
+ * Root directory for UBIFS stuff in debugfs. Contains sub-directories which
+ * contain the stuff specific to particular file-system mounts.
+ */
+static struct dentry *dfs_rootdir;
+
 /**
- * interpret_user_input - interpret user debugfs file input.
- * @u: user-provided buffer with the input
- * @count: buffer size
+ * dbg_debugfs_init - initialize debugfs file-system.
  *
- * This is a helper function which interpret user input to a boolean UBIFS
- * debugfs file. Returns %0 or %1 in case of success and a negative error code
- * in case of failure.
+ * UBIFS uses debugfs file-system to expose various debugging knobs to
+ * user-space. This function creates "ubifs" directory in the debugfs
+ * file-system. Returns zero in case of success and a negative error code in
+ * case of failure.
  */
-static int interpret_user_input(const char __user *u, size_t count)
+int dbg_debugfs_init(void)
 {
-	size_t buf_size;
-	char buf[8];
+	dfs_rootdir = debugfs_create_dir("ubifs", NULL);
+	if (IS_ERR(dfs_rootdir)) {
+		int err = PTR_ERR(dfs_rootdir);
+		ubifs_err("cannot create \"ubifs\" debugfs directory, "
+			  "error %d\n", err);
+		return err;
+	}
 
-	buf_size = min_t(size_t, count, (sizeof(buf) - 1));
-	if (copy_from_user(buf, u, buf_size))
-		return -EFAULT;
+	return 0;
+}
 
-	if (buf[0] == '1')
-		return 1;
-	else if (buf[0] == '0')
-		return 0;
+/**
+ * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system.
+ */
+void dbg_debugfs_exit(void)
+{
+	debugfs_remove(dfs_rootdir);
+}
 
-	return -EINVAL;
+static int open_debugfs_file(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return nonseekable_open(inode, file);
 }
 
-static ssize_t dfs_file_write(struct file *file, const char __user *u,
-			      size_t count, loff_t *ppos)
+static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
+				  size_t count, loff_t *ppos)
 {
 	struct ubifs_info *c = file->private_data;
 	struct ubifs_debug_info *d = c->dbg;
-	struct dentry *dent = file->f_path.dentry;
-	int val;
 
-	/*
-	 * TODO: this is racy - the file-system might have already been
-	 * unmounted and we'd oops in this case. The plan is to fix it with
-	 * help of 'iterate_supers_type()' which we should have in v3.0: when
-	 * a debugfs opened, we rember FS's UUID in file->private_data. Then
-	 * whenever we access the FS via a debugfs file, we iterate all UBIFS
-	 * superblocks and fine the one with the same UUID, and take the
-	 * locking right.
-	 *
-	 * The other way to go suggested by Al Viro is to create a separate
-	 * 'ubifs-debug' file-system instead.
-	 */
-	if (file->f_path.dentry == d->dfs_dump_lprops) {
+	if (file->f_path.dentry == d->dfs_dump_lprops)
 		dbg_dump_lprops(c);
-		return count;
-	}
-	if (file->f_path.dentry == d->dfs_dump_budg) {
+	else if (file->f_path.dentry == d->dfs_dump_budg)
 		dbg_dump_budg(c, &c->bi);
-		return count;
-	}
-	if (file->f_path.dentry == d->dfs_dump_tnc) {
+	else if (file->f_path.dentry == d->dfs_dump_tnc) {
 		mutex_lock(&c->tnc_mutex);
 		dbg_dump_tnc(c);
 		mutex_unlock(&c->tnc_mutex);
-		return count;
-	}
-
-	val = interpret_user_input(u, count);
-	if (val < 0)
-		return val;
-
-	if (dent == d->dfs_chk_gen)
-		d->chk_gen = val;
-	else if (dent == d->dfs_chk_index)
-		d->chk_index = val;
-	else if (dent == d->dfs_chk_orph)
-		d->chk_orph = val;
-	else if (dent == d->dfs_chk_lprops)
-		d->chk_lprops = val;
-	else if (dent == d->dfs_chk_fs)
-		d->chk_fs = val;
-	else if (dent == d->dfs_tst_rcvry)
-		d->tst_rcvry = val;
-	else
+	} else
 		return -EINVAL;
 
 	return count;
 }
 
 static const struct file_operations dfs_fops = {
-	.open = dfs_file_open,
-	.read = dfs_file_read,
-	.write = dfs_file_write,
+	.open = open_debugfs_file,
+	.write = write_debugfs_file,
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
 };
@@ -2897,20 +2880,12 @@ static const struct file_operations dfs_fops = {
  */
 int dbg_debugfs_init_fs(struct ubifs_info *c)
 {
-	int err, n;
+	int err;
 	const char *fname;
 	struct dentry *dent;
 	struct ubifs_debug_info *d = c->dbg;
 
-	n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME,
-		     c->vi.ubi_num, c->vi.vol_id);
-	if (n == UBIFS_DFS_DIR_LEN) {
-		/* The array size is too small */
-		fname = UBIFS_DFS_DIR_NAME;
-		dent = ERR_PTR(-EINVAL);
-		goto out;
-	}
-
+	sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
 	fname = d->dfs_dir_name;
 	dent = debugfs_create_dir(fname, dfs_rootdir);
 	if (IS_ERR_OR_NULL(dent))
@@ -2935,55 +2910,13 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
 		goto out_remove;
 	d->dfs_dump_tnc = dent;
 
-	fname = "chk_general";
-	dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
-				   &dfs_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	d->dfs_chk_gen = dent;
-
-	fname = "chk_index";
-	dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
-				   &dfs_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	d->dfs_chk_index = dent;
-
-	fname = "chk_orphans";
-	dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
-				   &dfs_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	d->dfs_chk_orph = dent;
-
-	fname = "chk_lprops";
-	dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
-				   &dfs_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	d->dfs_chk_lprops = dent;
-
-	fname = "chk_fs";
-	dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
-				   &dfs_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	d->dfs_chk_fs = dent;
-
-	fname = "tst_recovery";
-	dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
-				   &dfs_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	d->dfs_tst_rcvry = dent;
-
 	return 0;
 
 out_remove:
 	debugfs_remove_recursive(d->dfs_dir);
 out:
 	err = dent ? PTR_ERR(dent) : -ENODEV;
-	ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n",
+	ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
 		  fname, err);
 	return err;
 }
@@ -2997,179 +2930,4 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c)
 	debugfs_remove_recursive(c->dbg->dfs_dir);
 }
 
-struct ubifs_global_debug_info ubifs_dbg;
-
-static struct dentry *dfs_chk_gen;
-static struct dentry *dfs_chk_index;
-static struct dentry *dfs_chk_orph;
-static struct dentry *dfs_chk_lprops;
-static struct dentry *dfs_chk_fs;
-static struct dentry *dfs_tst_rcvry;
-
-static ssize_t dfs_global_file_read(struct file *file, char __user *u,
-				    size_t count, loff_t *ppos)
-{
-	struct dentry *dent = file->f_path.dentry;
-	int val;
-
-	if (dent == dfs_chk_gen)
-		val = ubifs_dbg.chk_gen;
-	else if (dent == dfs_chk_index)
-		val = ubifs_dbg.chk_index;
-	else if (dent == dfs_chk_orph)
-		val = ubifs_dbg.chk_orph;
-	else if (dent == dfs_chk_lprops)
-		val = ubifs_dbg.chk_lprops;
-	else if (dent == dfs_chk_fs)
-		val = ubifs_dbg.chk_fs;
-	else if (dent == dfs_tst_rcvry)
-		val = ubifs_dbg.tst_rcvry;
-	else
-		return -EINVAL;
-
-	return provide_user_output(val, u, count, ppos);
-}
-
-static ssize_t dfs_global_file_write(struct file *file, const char __user *u,
-				     size_t count, loff_t *ppos)
-{
-	struct dentry *dent = file->f_path.dentry;
-	int val;
-
-	val = interpret_user_input(u, count);
-	if (val < 0)
-		return val;
-
-	if (dent == dfs_chk_gen)
-		ubifs_dbg.chk_gen = val;
-	else if (dent == dfs_chk_index)
-		ubifs_dbg.chk_index = val;
-	else if (dent == dfs_chk_orph)
-		ubifs_dbg.chk_orph = val;
-	else if (dent == dfs_chk_lprops)
-		ubifs_dbg.chk_lprops = val;
-	else if (dent == dfs_chk_fs)
-		ubifs_dbg.chk_fs = val;
-	else if (dent == dfs_tst_rcvry)
-		ubifs_dbg.tst_rcvry = val;
-	else
-		return -EINVAL;
-
-	return count;
-}
-
-static const struct file_operations dfs_global_fops = {
-	.read = dfs_global_file_read,
-	.write = dfs_global_file_write,
-	.owner = THIS_MODULE,
-	.llseek = no_llseek,
-};
-
-/**
- * dbg_debugfs_init - initialize debugfs file-system.
- *
- * UBIFS uses debugfs file-system to expose various debugging knobs to
- * user-space. This function creates "ubifs" directory in the debugfs
- * file-system. Returns zero in case of success and a negative error code in
- * case of failure.
- */
-int dbg_debugfs_init(void)
-{
-	int err;
-	const char *fname;
-	struct dentry *dent;
-
-	fname = "ubifs";
-	dent = debugfs_create_dir(fname, NULL);
-	if (IS_ERR_OR_NULL(dent))
-		goto out;
-	dfs_rootdir = dent;
-
-	fname = "chk_general";
-	dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
-				   &dfs_global_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	dfs_chk_gen = dent;
-
-	fname = "chk_index";
-	dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
-				   &dfs_global_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	dfs_chk_index = dent;
-
-	fname = "chk_orphans";
-	dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
-				   &dfs_global_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	dfs_chk_orph = dent;
-
-	fname = "chk_lprops";
-	dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
-				   &dfs_global_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	dfs_chk_lprops = dent;
-
-	fname = "chk_fs";
-	dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
-				   &dfs_global_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	dfs_chk_fs = dent;
-
-	fname = "tst_recovery";
-	dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
-				   &dfs_global_fops);
-	if (IS_ERR_OR_NULL(dent))
-		goto out_remove;
-	dfs_tst_rcvry = dent;
-
-	return 0;
-
-out_remove:
-	debugfs_remove_recursive(dfs_rootdir);
-out:
-	err = dent ? PTR_ERR(dent) : -ENODEV;
-	ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n",
-		  fname, err);
-	return err;
-}
-
-/**
- * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system.
- */
-void dbg_debugfs_exit(void)
-{
-	debugfs_remove_recursive(dfs_rootdir);
-}
-
-/**
- * ubifs_debugging_init - initialize UBIFS debugging.
- * @c: UBIFS file-system description object
- *
- * This function initializes debugging-related data for the file system.
- * Returns zero in case of success and a negative error code in case of
- * failure.
- */
-int ubifs_debugging_init(struct ubifs_info *c)
-{
-	c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL);
-	if (!c->dbg)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/**
- * ubifs_debugging_exit - free debugging data.
- * @c: UBIFS file-system description object
- */
-void ubifs_debugging_exit(struct ubifs_info *c)
-{
-	kfree(c->dbg);
-}
-
 #endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/trunk/fs/ubifs/debug.h b/trunk/fs/ubifs/debug.h
index 45174b534377..a811ac4a26bb 100644
--- a/trunk/fs/ubifs/debug.h
+++ b/trunk/fs/ubifs/debug.h
@@ -31,25 +31,18 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
 
 #ifdef CONFIG_UBIFS_FS_DEBUG
 
-/*
- * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi"
- * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte.
- */
-#define UBIFS_DFS_DIR_NAME "ubi%d_%d"
-#define UBIFS_DFS_DIR_LEN  (3 + 1 + 2*2 + 1)
+#include <linux/random.h>
 
 /**
  * ubifs_debug_info - per-FS debugging information.
  * @old_zroot: old index root - used by 'dbg_check_old_index()'
  * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
  * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
- *
- * @pc_happened: non-zero if an emulated power cut happened
- * @pc_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
- * @pc_timeout: time in jiffies when delay of failure mode expires
- * @pc_cnt: current number of calls to failure mode I/O functions
- * @pc_cnt_max: number of calls by which to delay failure mode
- *
+ * @failure_mode: failure mode for recovery testing
+ * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
+ * @fail_timeout: time in jiffies when delay of failure mode expires
+ * @fail_cnt: current number of calls to failure mode I/O functions
+ * @fail_cnt_max: number of calls by which to delay failure mode
  * @chk_lpt_sz: used by LPT tree size checker
  * @chk_lpt_sz2: used by LPT tree size checker
  * @chk_lpt_wastage: used by LPT tree size checker
@@ -63,36 +56,21 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
  * @saved_free: saved amount of free space
  * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt
  *
- * @chk_gen: if general extra checks are enabled
- * @chk_index: if index xtra checks are enabled
- * @chk_orph: if orphans extra checks are enabled
- * @chk_lprops: if lprops extra checks are enabled
- * @chk_fs: if UBIFS contents extra checks are enabled
- * @tst_rcvry: if UBIFS recovery testing mode enabled
- *
  * @dfs_dir_name: name of debugfs directory containing this file-system's files
  * @dfs_dir: direntry object of the file-system debugfs directory
  * @dfs_dump_lprops: "dump lprops" debugfs knob
  * @dfs_dump_budg: "dump budgeting information" debugfs knob
  * @dfs_dump_tnc: "dump TNC" debugfs knob
- * @dfs_chk_gen: debugfs knob to enable UBIFS general extra checks
- * @dfs_chk_index: debugfs knob to enable UBIFS index extra checks
- * @dfs_chk_orph: debugfs knob to enable UBIFS orphans extra checks
- * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks
- * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks
- * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing
  */
 struct ubifs_debug_info {
 	struct ubifs_zbranch old_zroot;
 	int old_zroot_level;
 	unsigned long long old_zroot_sqnum;
-
-	int pc_happened;
-	int pc_delay;
-	unsigned long pc_timeout;
-	unsigned int pc_cnt;
-	unsigned int pc_cnt_max;
-
+	int failure_mode;
+	int fail_delay;
+	unsigned long fail_timeout;
+	unsigned int fail_cnt;
+	unsigned int fail_cnt_max;
 	long long chk_lpt_sz;
 	long long chk_lpt_sz2;
 	long long chk_lpt_wastage;
@@ -106,43 +84,11 @@ struct ubifs_debug_info {
 	long long saved_free;
 	int saved_idx_gc_cnt;
 
-	unsigned int chk_gen:1;
-	unsigned int chk_index:1;
-	unsigned int chk_orph:1;
-	unsigned int chk_lprops:1;
-	unsigned int chk_fs:1;
-	unsigned int tst_rcvry:1;
-
-	char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1];
+	char dfs_dir_name[100];
 	struct dentry *dfs_dir;
 	struct dentry *dfs_dump_lprops;
 	struct dentry *dfs_dump_budg;
 	struct dentry *dfs_dump_tnc;
-	struct dentry *dfs_chk_gen;
-	struct dentry *dfs_chk_index;
-	struct dentry *dfs_chk_orph;
-	struct dentry *dfs_chk_lprops;
-	struct dentry *dfs_chk_fs;
-	struct dentry *dfs_tst_rcvry;
-};
-
-/**
- * ubifs_global_debug_info - global (not per-FS) UBIFS debugging information.
- *
- * @chk_gen: if general extra checks are enabled
- * @chk_index: if index xtra checks are enabled
- * @chk_orph: if orphans extra checks are enabled
- * @chk_lprops: if lprops extra checks are enabled
- * @chk_fs: if UBIFS contents extra checks are enabled
- * @tst_rcvry: if UBIFS recovery testing mode enabled
- */
-struct ubifs_global_debug_info {
-	unsigned int chk_gen:1;
-	unsigned int chk_index:1;
-	unsigned int chk_orph:1;
-	unsigned int chk_lprops:1;
-	unsigned int chk_fs:1;
-	unsigned int tst_rcvry:1;
 };
 
 #define ubifs_assert(expr) do {                                                \
@@ -181,8 +127,6 @@ const char *dbg_key_str1(const struct ubifs_info *c,
 #define DBGKEY(key) dbg_key_str0(c, (key))
 #define DBGKEY1(key) dbg_key_str1(c, (key))
 
-extern spinlock_t dbg_lock;
-
 #define ubifs_dbg_msg(type, fmt, ...) do {                        \
 	spin_lock(&dbg_lock);                                     \
 	pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
@@ -218,36 +162,41 @@ extern spinlock_t dbg_lock;
 /* Additional recovery messages */
 #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
 
-extern struct ubifs_global_debug_info ubifs_dbg;
+/*
+ * Debugging check flags.
+ *
+ * UBIFS_CHK_GEN: general checks
+ * UBIFS_CHK_TNC: check TNC
+ * UBIFS_CHK_IDX_SZ: check index size
+ * UBIFS_CHK_ORPH: check orphans
+ * UBIFS_CHK_OLD_IDX: check the old index
+ * UBIFS_CHK_LPROPS: check lprops
+ * UBIFS_CHK_FS: check the file-system
+ */
+enum {
+	UBIFS_CHK_GEN     = 0x1,
+	UBIFS_CHK_TNC     = 0x2,
+	UBIFS_CHK_IDX_SZ  = 0x4,
+	UBIFS_CHK_ORPH    = 0x8,
+	UBIFS_CHK_OLD_IDX = 0x10,
+	UBIFS_CHK_LPROPS  = 0x20,
+	UBIFS_CHK_FS      = 0x40,
+};
 
-static inline int dbg_is_chk_gen(const struct ubifs_info *c)
-{
-	return !!(ubifs_dbg.chk_gen || c->dbg->chk_gen);
-}
-static inline int dbg_is_chk_index(const struct ubifs_info *c)
-{
-	return !!(ubifs_dbg.chk_index || c->dbg->chk_index);
-}
-static inline int dbg_is_chk_orph(const struct ubifs_info *c)
-{
-	return !!(ubifs_dbg.chk_orph || c->dbg->chk_orph);
-}
-static inline int dbg_is_chk_lprops(const struct ubifs_info *c)
-{
-	return !!(ubifs_dbg.chk_lprops || c->dbg->chk_lprops);
-}
-static inline int dbg_is_chk_fs(const struct ubifs_info *c)
-{
-	return !!(ubifs_dbg.chk_fs || c->dbg->chk_fs);
-}
-static inline int dbg_is_tst_rcvry(const struct ubifs_info *c)
-{
-	return !!(ubifs_dbg.tst_rcvry || c->dbg->tst_rcvry);
-}
-static inline int dbg_is_power_cut(const struct ubifs_info *c)
-{
-	return !!c->dbg->pc_happened;
-}
+/*
+ * Special testing flags.
+ *
+ * UBIFS_TST_RCVRY: failure mode for recovery testing
+ */
+enum {
+	UBIFS_TST_RCVRY             = 0x4,
+};
+
+extern spinlock_t dbg_lock;
+
+extern unsigned int ubifs_msg_flags;
+extern unsigned int ubifs_chk_flags;
+extern unsigned int ubifs_tst_flags;
 
 int ubifs_debugging_init(struct ubifs_info *c);
 void ubifs_debugging_exit(struct ubifs_info *c);
@@ -258,7 +207,7 @@ const char *dbg_cstate(int cmt_state);
 const char *dbg_jhead(int jhead);
 const char *dbg_get_key_dump(const struct ubifs_info *c,
 			     const union ubifs_key *key);
-void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode);
+void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode);
 void dbg_dump_node(const struct ubifs_info *c, const void *node);
 void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
 		       int offs);
@@ -291,8 +240,8 @@ int dbg_check_cats(struct ubifs_info *c);
 int dbg_check_ltab(struct ubifs_info *c);
 int dbg_chk_lpt_free_spc(struct ubifs_info *c);
 int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len);
-int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode);
-int dbg_check_dir(struct ubifs_info *c, const struct inode *dir);
+int dbg_check_synced_i_size(struct inode *inode);
+int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir);
 int dbg_check_tnc(struct ubifs_info *c, int extra);
 int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
 int dbg_check_filesystem(struct ubifs_info *c);
@@ -305,12 +254,54 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
 int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
 int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
 
-int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
-		  int len, int dtype);
-int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len,
-		   int dtype);
-int dbg_leb_unmap(struct ubifs_info *c, int lnum);
-int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype);
+/* Force the use of in-the-gaps method for testing */
+static inline int dbg_force_in_the_gaps_enabled(void)
+{
+	return ubifs_chk_flags & UBIFS_CHK_GEN;
+}
+int dbg_force_in_the_gaps(void);
+
+/* Failure mode for recovery testing */
+#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
+
+#ifndef UBIFS_DBG_PRESERVE_UBI
+#define ubi_leb_read   dbg_leb_read
+#define ubi_leb_write  dbg_leb_write
+#define ubi_leb_change dbg_leb_change
+#define ubi_leb_erase  dbg_leb_erase
+#define ubi_leb_unmap  dbg_leb_unmap
+#define ubi_is_mapped  dbg_is_mapped
+#define ubi_leb_map    dbg_leb_map
+#endif
+
+int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
+		 int len, int check);
+int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
+		  int offset, int len, int dtype);
+int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
+		   int len, int dtype);
+int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum);
+int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum);
+int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum);
+int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
+
+static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf,
+			   int offset, int len)
+{
+	return dbg_leb_read(desc, lnum, buf, offset, len, 0);
+}
+
+static inline int dbg_write(struct ubi_volume_desc *desc, int lnum,
+			    const void *buf, int offset, int len)
+{
+	return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN);
+}
+
+static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
+				    const void *buf, int len)
+{
+	return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN);
+}
 
 /* Debugfs-related stuff */
 int dbg_debugfs_init(void);
@@ -322,7 +313,7 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
 
 /* Use "if (0)" to make compiler check arguments even if debugging is off */
 #define ubifs_assert(expr)  do {                                               \
-	if (0)                                                                 \
+	if (0 && (expr))                                                       \
 		printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
 		       __func__, __LINE__, current->pid);                      \
 } while (0)
@@ -332,9 +323,6 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
 		ubifs_err(fmt, ##__VA_ARGS__);     \
 } while (0)
 
-#define DBGKEY(key)  ((char *)(key))
-#define DBGKEY1(key) ((char *)(key))
-
 #define ubifs_dbg_msg(fmt, ...) do {               \
 	if (0)                                     \
 		pr_debug(fmt "\n", ##__VA_ARGS__); \
@@ -358,6 +346,9 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
 #define dbg_scan(fmt, ...)  ubifs_dbg_msg(fmt, ##__VA_ARGS__)
 #define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
 
+#define DBGKEY(key)  ((char *)(key))
+#define DBGKEY1(key) ((char *)(key))
+
 static inline int ubifs_debugging_init(struct ubifs_info *c)      { return 0; }
 static inline void ubifs_debugging_exit(struct ubifs_info *c)     { return; }
 static inline const char *dbg_ntype(int type)                     { return ""; }
@@ -366,7 +357,7 @@ static inline const char *dbg_jhead(int jhead)                    { return ""; }
 static inline const char *
 dbg_get_key_dump(const struct ubifs_info *c,
 		 const union ubifs_key *key)                      { return ""; }
-static inline void dbg_dump_inode(struct ubifs_info *c,
+static inline void dbg_dump_inode(const struct ubifs_info *c,
 				  const struct inode *inode)      { return; }
 static inline void dbg_dump_node(const struct ubifs_info *c,
 				 const void *node)                { return; }
@@ -418,11 +409,9 @@ static inline int dbg_check_ltab(struct ubifs_info *c)            { return 0; }
 static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c)      { return 0; }
 static inline int dbg_chk_lpt_sz(struct ubifs_info *c,
 				 int action, int len)             { return 0; }
-static inline int
-dbg_check_synced_i_size(const struct ubifs_info *c,
-			struct inode *inode)                      { return 0; }
-static inline int dbg_check_dir(struct ubifs_info *c,
-				const struct inode *dir)          { return 0; }
+static inline int dbg_check_synced_i_size(struct inode *inode)    { return 0; }
+static inline int dbg_check_dir_size(struct ubifs_info *c,
+				     const struct inode *dir)     { return 0; }
 static inline int dbg_check_tnc(struct ubifs_info *c, int extra)  { return 0; }
 static inline int dbg_check_idx_size(struct ubifs_info *c,
 				     long long idx_size)          { return 0; }
@@ -442,23 +431,9 @@ static inline int
 dbg_check_nondata_nodes_order(struct ubifs_info *c,
 			      struct list_head *head)             { return 0; }
 
-static inline int dbg_leb_write(struct ubifs_info *c, int lnum,
-				const void *buf, int offset,
-				int len, int dtype)               { return 0; }
-static inline int dbg_leb_change(struct ubifs_info *c, int lnum,
-				 const void *buf, int len,
-				 int dtype)                       { return 0; }
-static inline int dbg_leb_unmap(struct ubifs_info *c, int lnum)   { return 0; }
-static inline int dbg_leb_map(struct ubifs_info *c, int lnum,
-			      int dtype)                          { return 0; }
-
-static inline int dbg_is_chk_gen(const struct ubifs_info *c)      { return 0; }
-static inline int dbg_is_chk_index(const struct ubifs_info *c)    { return 0; }
-static inline int dbg_is_chk_orph(const struct ubifs_info *c)     { return 0; }
-static inline int dbg_is_chk_lprops(const struct ubifs_info *c)   { return 0; }
-static inline int dbg_is_chk_fs(const struct ubifs_info *c)       { return 0; }
-static inline int dbg_is_tst_rcvry(const struct ubifs_info *c)    { return 0; }
-static inline int dbg_is_power_cut(const struct ubifs_info *c)    { return 0; }
+static inline int dbg_force_in_the_gaps(void)                     { return 0; }
+#define dbg_force_in_the_gaps_enabled() 0
+#define dbg_failure_mode                0
 
 static inline int dbg_debugfs_init(void)                          { return 0; }
 static inline void dbg_debugfs_exit(void)                         { return; }
diff --git a/trunk/fs/ubifs/dir.c b/trunk/fs/ubifs/dir.c
index 683492043317..ef5abd38f0bf 100644
--- a/trunk/fs/ubifs/dir.c
+++ b/trunk/fs/ubifs/dir.c
@@ -102,7 +102,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
 	 * UBIFS has to fully control "clean <-> dirty" transitions of inodes
 	 * to make budgeting work.
 	 */
-	inode->i_flags |= S_NOCMTIME;
+	inode->i_flags |= (S_NOCMTIME);
 
 	inode_init_owner(inode, dir, mode);
 	inode->i_mtime = inode->i_atime = inode->i_ctime =
@@ -172,11 +172,9 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
 
 #ifdef CONFIG_UBIFS_FS_DEBUG
 
-static int dbg_check_name(const struct ubifs_info *c,
-			  const struct ubifs_dent_node *dent,
-			  const struct qstr *nm)
+static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm)
 {
-	if (!dbg_is_chk_gen(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
 		return 0;
 	if (le16_to_cpu(dent->nlen) != nm->len)
 		return -EINVAL;
@@ -187,7 +185,7 @@ static int dbg_check_name(const struct ubifs_info *c,
 
 #else
 
-#define dbg_check_name(c, dent, nm) 0
+#define dbg_check_name(dent, nm) 0
 
 #endif
 
@@ -221,7 +219,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
 		goto out;
 	}
 
-	if (dbg_check_name(c, dent, &dentry->d_name)) {
+	if (dbg_check_name(dent, &dentry->d_name)) {
 		err = -EINVAL;
 		goto out;
 	}
@@ -524,7 +522,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
 	ubifs_assert(mutex_is_locked(&dir->i_mutex));
 	ubifs_assert(mutex_is_locked(&inode->i_mutex));
 
-	err = dbg_check_synced_i_size(c, inode);
+	err = dbg_check_synced_i_size(inode);
 	if (err)
 		return err;
 
@@ -579,7 +577,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
 		inode->i_nlink, dir->i_ino);
 	ubifs_assert(mutex_is_locked(&dir->i_mutex));
 	ubifs_assert(mutex_is_locked(&inode->i_mutex));
-	err = dbg_check_synced_i_size(c, inode);
+	err = dbg_check_synced_i_size(inode);
 	if (err)
 		return err;
 
diff --git a/trunk/fs/ubifs/file.c b/trunk/fs/ubifs/file.c
index 7cf738a4544d..5e7fccfc4b29 100644
--- a/trunk/fs/ubifs/file.c
+++ b/trunk/fs/ubifs/file.c
@@ -1263,7 +1263,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (err)
 		return err;
 
-	err = dbg_check_synced_i_size(c, inode);
+	err = dbg_check_synced_i_size(inode);
 	if (err)
 		return err;
 
diff --git a/trunk/fs/ubifs/io.c b/trunk/fs/ubifs/io.c
index 9228950a658f..3be645e012c9 100644
--- a/trunk/fs/ubifs/io.c
+++ b/trunk/fs/ubifs/io.c
@@ -86,125 +86,8 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
 		c->no_chk_data_crc = 0;
 		c->vfs_sb->s_flags |= MS_RDONLY;
 		ubifs_warn("switched to read-only mode, error %d", err);
-		dump_stack();
-	}
-}
-
-/*
- * Below are simple wrappers over UBI I/O functions which include some
- * additional checks and UBIFS debugging stuff. See corresponding UBI function
- * for more information.
- */
-
-int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
-		   int len, int even_ebadmsg)
-{
-	int err;
-
-	err = ubi_read(c->ubi, lnum, buf, offs, len);
-	/*
-	 * In case of %-EBADMSG print the error message only if the
-	 * @even_ebadmsg is true.
-	 */
-	if (err && (err != -EBADMSG || even_ebadmsg)) {
-		ubifs_err("reading %d bytes from LEB %d:%d failed, error %d",
-			  len, lnum, offs, err);
-		dbg_dump_stack();
-	}
-	return err;
-}
-
-int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
-		    int len, int dtype)
-{
-	int err;
-
-	ubifs_assert(!c->ro_media && !c->ro_mount);
-	if (c->ro_error)
-		return -EROFS;
-	if (!dbg_is_tst_rcvry(c))
-		err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
-	else
-		err = dbg_leb_write(c, lnum, buf, offs, len, dtype);
-	if (err) {
-		ubifs_err("writing %d bytes to LEB %d:%d failed, error %d",
-			  len, lnum, offs, err);
-		ubifs_ro_mode(c, err);
-		dbg_dump_stack();
-	}
-	return err;
-}
-
-int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len,
-		     int dtype)
-{
-	int err;
-
-	ubifs_assert(!c->ro_media && !c->ro_mount);
-	if (c->ro_error)
-		return -EROFS;
-	if (!dbg_is_tst_rcvry(c))
-		err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
-	else
-		err = dbg_leb_change(c, lnum, buf, len, dtype);
-	if (err) {
-		ubifs_err("changing %d bytes in LEB %d failed, error %d",
-			  len, lnum, err);
-		ubifs_ro_mode(c, err);
-		dbg_dump_stack();
-	}
-	return err;
-}
-
-int ubifs_leb_unmap(struct ubifs_info *c, int lnum)
-{
-	int err;
-
-	ubifs_assert(!c->ro_media && !c->ro_mount);
-	if (c->ro_error)
-		return -EROFS;
-	if (!dbg_is_tst_rcvry(c))
-		err = ubi_leb_unmap(c->ubi, lnum);
-	else
-		err = dbg_leb_unmap(c, lnum);
-	if (err) {
-		ubifs_err("unmap LEB %d failed, error %d", lnum, err);
-		ubifs_ro_mode(c, err);
-		dbg_dump_stack();
-	}
-	return err;
-}
-
-int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype)
-{
-	int err;
-
-	ubifs_assert(!c->ro_media && !c->ro_mount);
-	if (c->ro_error)
-		return -EROFS;
-	if (!dbg_is_tst_rcvry(c))
-		err = ubi_leb_map(c->ubi, lnum, dtype);
-	else
-		err = dbg_leb_map(c, lnum, dtype);
-	if (err) {
-		ubifs_err("mapping LEB %d failed, error %d", lnum, err);
-		ubifs_ro_mode(c, err);
-		dbg_dump_stack();
-	}
-	return err;
-}
-
-int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
-{
-	int err;
-
-	err = ubi_is_mapped(c->ubi, lnum);
-	if (err < 0) {
-		ubifs_err("ubi_is_mapped failed for LEB %d, error %d",
-			  lnum, err);
 		dbg_dump_stack();
 	}
-	return err;
 }
 
 /**
@@ -523,10 +406,14 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
 	dirt = sync_len - wbuf->used;
 	if (dirt)
 		ubifs_pad(c, wbuf->buf + wbuf->used, dirt);
-	err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len,
-			      wbuf->dtype);
-	if (err)
+	err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
+			    sync_len, wbuf->dtype);
+	if (err) {
+		ubifs_err("cannot write %d bytes to LEB %d:%d",
+			  sync_len, wbuf->lnum, wbuf->offs);
+		dbg_dump_stack();
 		return err;
+	}
 
 	spin_lock(&wbuf->lock);
 	wbuf->offs += sync_len;
@@ -718,9 +605,9 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
 		if (aligned_len == wbuf->avail) {
 			dbg_io("flush jhead %s wbuf to LEB %d:%d",
 			       dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
-			err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf,
-					      wbuf->offs, wbuf->size,
-					      wbuf->dtype);
+			err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
+					    wbuf->offs, wbuf->size,
+					    wbuf->dtype);
 			if (err)
 				goto out;
 
@@ -755,8 +642,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
 		dbg_io("flush jhead %s wbuf to LEB %d:%d",
 		       dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
 		memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
-		err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs,
-				      wbuf->size, wbuf->dtype);
+		err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
+				    wbuf->size, wbuf->dtype);
 		if (err)
 			goto out;
 
@@ -774,8 +661,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
 		 */
 		dbg_io("write %d bytes to LEB %d:%d",
 		       wbuf->size, wbuf->lnum, wbuf->offs);
-		err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs,
-				      wbuf->size, wbuf->dtype);
+		err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs,
+				    wbuf->size, wbuf->dtype);
 		if (err)
 			goto out;
 
@@ -796,8 +683,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
 		n <<= c->max_write_shift;
 		dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
 		       wbuf->offs);
-		err = ubifs_leb_write(c, wbuf->lnum, buf + written,
-				      wbuf->offs, n, wbuf->dtype);
+		err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written,
+				    wbuf->offs, n, wbuf->dtype);
 		if (err)
 			goto out;
 		wbuf->offs += n;
@@ -879,9 +766,13 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
 		return -EROFS;
 
 	ubifs_prepare_node(c, buf, len, 1);
-	err = ubifs_leb_write(c, lnum, buf, offs, buf_len, dtype);
-	if (err)
+	err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype);
+	if (err) {
+		ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
+			  buf_len, lnum, offs, err);
 		dbg_dump_node(c, buf);
+		dbg_dump_stack();
+	}
 
 	return err;
 }
@@ -933,9 +824,13 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
 
 	if (rlen > 0) {
 		/* Read everything that goes before write-buffer */
-		err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0);
-		if (err && err != -EBADMSG)
+		err = ubi_read(c->ubi, lnum, buf, offs, rlen);
+		if (err && err != -EBADMSG) {
+			ubifs_err("failed to read node %d from LEB %d:%d, "
+				  "error %d", type, lnum, offs, err);
+			dbg_dump_stack();
 			return err;
+		}
 	}
 
 	if (type != ch->node_type) {
@@ -990,9 +885,12 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
 	ubifs_assert(!(offs & 7) && offs < c->leb_size);
 	ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
 
-	err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
-	if (err && err != -EBADMSG)
+	err = ubi_read(c->ubi, lnum, buf, offs, len);
+	if (err && err != -EBADMSG) {
+		ubifs_err("cannot read node %d from LEB %d:%d, error %d",
+			  type, lnum, offs, err);
 		return err;
+	}
 
 	if (type != ch->node_type) {
 		ubifs_err("bad node type (%d but expected %d)",
diff --git a/trunk/fs/ubifs/log.c b/trunk/fs/ubifs/log.c
index f9fd068d1ae0..affea9494ae2 100644
--- a/trunk/fs/ubifs/log.c
+++ b/trunk/fs/ubifs/log.c
@@ -262,7 +262,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
 		 * an unclean reboot, because the target LEB might have been
 		 * unmapped, but not yet physically erased.
 		 */
-		err = ubifs_leb_map(c, bud->lnum, UBI_SHORTTERM);
+		err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM);
 		if (err)
 			goto out_unlock;
 	}
@@ -283,6 +283,8 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
 	return 0;
 
 out_unlock:
+	if (err != -EAGAIN)
+		ubifs_ro_mode(c, err);
 	mutex_unlock(&c->log_mutex);
 	kfree(ref);
 	kfree(bud);
@@ -750,7 +752,7 @@ static int dbg_check_bud_bytes(struct ubifs_info *c)
 	struct ubifs_bud *bud;
 	long long bud_bytes = 0;
 
-	if (!dbg_is_chk_gen(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
 		return 0;
 
 	spin_lock(&c->buds_lock);
diff --git a/trunk/fs/ubifs/lprops.c b/trunk/fs/ubifs/lprops.c
index f8a181e647cc..667884f4a615 100644
--- a/trunk/fs/ubifs/lprops.c
+++ b/trunk/fs/ubifs/lprops.c
@@ -504,7 +504,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops)
 	pnode = (struct ubifs_pnode *)container_of(lprops - pos,
 						   struct ubifs_pnode,
 						   lprops[0]);
-	return !test_bit(COW_CNODE, &pnode->flags) &&
+	return !test_bit(COW_ZNODE, &pnode->flags) &&
 	       test_bit(DIRTY_CNODE, &pnode->flags);
 }
 
@@ -860,7 +860,7 @@ int dbg_check_cats(struct ubifs_info *c)
 	struct list_head *pos;
 	int i, cat;
 
-	if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c))
+	if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS)))
 		return 0;
 
 	list_for_each_entry(lprops, &c->empty_list, list) {
@@ -958,7 +958,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
 {
 	int i = 0, j, err = 0;
 
-	if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c))
+	if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS)))
 		return;
 
 	for (i = 0; i < heap->cnt; i++) {
@@ -1262,7 +1262,7 @@ int dbg_check_lprops(struct ubifs_info *c)
 	int i, err;
 	struct ubifs_lp_stats lst;
 
-	if (!dbg_is_chk_lprops(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
 		return 0;
 
 	/*
diff --git a/trunk/fs/ubifs/lpt.c b/trunk/fs/ubifs/lpt.c
index 6189c74d97f0..ef5155e109a2 100644
--- a/trunk/fs/ubifs/lpt.c
+++ b/trunk/fs/ubifs/lpt.c
@@ -701,8 +701,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
 			alen = ALIGN(len, c->min_io_size);
 			set_ltab(c, lnum, c->leb_size - alen, alen - len);
 			memset(p, 0xff, alen - len);
-			err = ubifs_leb_change(c, lnum++, buf, alen,
-					       UBI_SHORTTERM);
+			err = ubi_leb_change(c->ubi, lnum++, buf, alen,
+					     UBI_SHORTTERM);
 			if (err)
 				goto out;
 			p = buf;
@@ -732,8 +732,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
 				set_ltab(c, lnum, c->leb_size - alen,
 					    alen - len);
 				memset(p, 0xff, alen - len);
-				err = ubifs_leb_change(c, lnum++, buf, alen,
-						       UBI_SHORTTERM);
+				err = ubi_leb_change(c->ubi, lnum++, buf, alen,
+						     UBI_SHORTTERM);
 				if (err)
 					goto out;
 				p = buf;
@@ -780,8 +780,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
 			alen = ALIGN(len, c->min_io_size);
 			set_ltab(c, lnum, c->leb_size - alen, alen - len);
 			memset(p, 0xff, alen - len);
-			err = ubifs_leb_change(c, lnum++, buf, alen,
-					       UBI_SHORTTERM);
+			err = ubi_leb_change(c->ubi, lnum++, buf, alen,
+					     UBI_SHORTTERM);
 			if (err)
 				goto out;
 			p = buf;
@@ -806,7 +806,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
 		alen = ALIGN(len, c->min_io_size);
 		set_ltab(c, lnum, c->leb_size - alen, alen - len);
 		memset(p, 0xff, alen - len);
-		err = ubifs_leb_change(c, lnum++, buf, alen, UBI_SHORTTERM);
+		err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM);
 		if (err)
 			goto out;
 		p = buf;
@@ -826,7 +826,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
 
 	/* Write remaining buffer */
 	memset(p, 0xff, alen - len);
-	err = ubifs_leb_change(c, lnum, buf, alen, UBI_SHORTTERM);
+	err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM);
 	if (err)
 		goto out;
 
@@ -1222,7 +1222,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
 		if (c->big_lpt)
 			nnode->num = calc_nnode_num_from_parent(c, parent, iip);
 	} else {
-		err = ubifs_leb_read(c, lnum, buf, offs, c->nnode_sz, 1);
+		err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz);
 		if (err)
 			goto out;
 		err = ubifs_unpack_nnode(c, buf, nnode);
@@ -1247,7 +1247,6 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
 
 out:
 	ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs);
-	dbg_dump_stack();
 	kfree(nnode);
 	return err;
 }
@@ -1291,7 +1290,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
 			lprops->flags = ubifs_categorize_lprops(c, lprops);
 		}
 	} else {
-		err = ubifs_leb_read(c, lnum, buf, offs, c->pnode_sz, 1);
+		err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz);
 		if (err)
 			goto out;
 		err = unpack_pnode(c, buf, pnode);
@@ -1313,7 +1312,6 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
 out:
 	ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs);
 	dbg_dump_pnode(c, pnode, parent, iip);
-	dbg_dump_stack();
 	dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
 	kfree(pnode);
 	return err;
@@ -1333,7 +1331,7 @@ static int read_ltab(struct ubifs_info *c)
 	buf = vmalloc(c->ltab_sz);
 	if (!buf)
 		return -ENOMEM;
-	err = ubifs_leb_read(c, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz, 1);
+	err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz);
 	if (err)
 		goto out;
 	err = unpack_ltab(c, buf);
@@ -1356,8 +1354,7 @@ static int read_lsave(struct ubifs_info *c)
 	buf = vmalloc(c->lsave_sz);
 	if (!buf)
 		return -ENOMEM;
-	err = ubifs_leb_read(c, c->lsave_lnum, buf, c->lsave_offs,
-			     c->lsave_sz, 1);
+	err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz);
 	if (err)
 		goto out;
 	err = unpack_lsave(c, buf);
@@ -1817,8 +1814,8 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c,
 		if (c->big_lpt)
 			nnode->num = calc_nnode_num_from_parent(c, parent, iip);
 	} else {
-		err = ubifs_leb_read(c, branch->lnum, buf, branch->offs,
-				     c->nnode_sz, 1);
+		err = ubi_read(c->ubi, branch->lnum, buf, branch->offs,
+			       c->nnode_sz);
 		if (err)
 			return ERR_PTR(err);
 		err = ubifs_unpack_nnode(c, buf, nnode);
@@ -1886,8 +1883,8 @@ static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c,
 		ubifs_assert(branch->lnum >= c->lpt_first &&
 			     branch->lnum <= c->lpt_last);
 		ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size);
-		err = ubifs_leb_read(c, branch->lnum, buf, branch->offs,
-				     c->pnode_sz, 1);
+		err = ubi_read(c->ubi, branch->lnum, buf, branch->offs,
+			       c->pnode_sz);
 		if (err)
 			return ERR_PTR(err);
 		err = unpack_pnode(c, buf, pnode);
@@ -2227,7 +2224,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
 	struct ubifs_cnode *cn;
 	int num, iip = 0, err;
 
-	if (!dbg_is_chk_lprops(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
 		return 0;
 
 	while (cnode) {
diff --git a/trunk/fs/ubifs/lpt_commit.c b/trunk/fs/ubifs/lpt_commit.c
index cddd6bd214f4..dfcb5748a7dc 100644
--- a/trunk/fs/ubifs/lpt_commit.c
+++ b/trunk/fs/ubifs/lpt_commit.c
@@ -27,7 +27,6 @@
 
 #include <linux/crc16.h>
 #include <linux/slab.h>
-#include <linux/random.h>
 #include "ubifs.h"
 
 #ifdef CONFIG_UBIFS_FS_DEBUG
@@ -117,8 +116,8 @@ static int get_cnodes_to_commit(struct ubifs_info *c)
 		return 0;
 	cnt += 1;
 	while (1) {
-		ubifs_assert(!test_bit(COW_CNODE, &cnode->flags));
-		__set_bit(COW_CNODE, &cnode->flags);
+		ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags));
+		__set_bit(COW_ZNODE, &cnode->flags);
 		cnext = next_dirty_cnode(cnode);
 		if (!cnext) {
 			cnode->cnext = c->lpt_cnext;
@@ -466,7 +465,7 @@ static int write_cnodes(struct ubifs_info *c)
 		 */
 		clear_bit(DIRTY_CNODE, &cnode->flags);
 		smp_mb__before_clear_bit();
-		clear_bit(COW_CNODE, &cnode->flags);
+		clear_bit(COW_ZNODE, &cnode->flags);
 		smp_mb__after_clear_bit();
 		offs += len;
 		dbg_chk_lpt_sz(c, 1, len);
@@ -1161,11 +1160,11 @@ static int lpt_gc_lnum(struct ubifs_info *c, int lnum)
 	void *buf = c->lpt_buf;
 
 	dbg_lp("LEB %d", lnum);
-
-	err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1);
-	if (err)
+	err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
+	if (err) {
+		ubifs_err("cannot read LEB %d, error %d", lnum, err);
 		return err;
-
+	}
 	while (1) {
 		if (!is_a_node(c, buf, len)) {
 			int pad_len;
@@ -1641,7 +1640,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
 	int ret;
 	void *buf, *p;
 
-	if (!dbg_is_chk_lprops(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
 		return 0;
 
 	buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
@@ -1651,11 +1650,11 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
 	}
 
 	dbg_lp("LEB %d", lnum);
-
-	err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1);
-	if (err)
+	err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
+	if (err) {
+		dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err);
 		goto out;
-
+	}
 	while (1) {
 		if (!is_a_node(c, p, len)) {
 			int i, pad_len;
@@ -1712,7 +1711,7 @@ int dbg_check_ltab(struct ubifs_info *c)
 {
 	int lnum, err, i, cnt;
 
-	if (!dbg_is_chk_lprops(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
 		return 0;
 
 	/* Bring the entire tree into memory */
@@ -1755,7 +1754,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
 	long long free = 0;
 	int i;
 
-	if (!dbg_is_chk_lprops(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
 		return 0;
 
 	for (i = 0; i < c->lpt_lebs; i++) {
@@ -1797,7 +1796,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
 	long long chk_lpt_sz, lpt_sz;
 	int err = 0;
 
-	if (!dbg_is_chk_lprops(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
 		return 0;
 
 	switch (action) {
@@ -1902,10 +1901,11 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
 		return;
 	}
 
-	err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1);
-	if (err)
+	err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
+	if (err) {
+		ubifs_err("cannot read LEB %d, error %d", lnum, err);
 		goto out;
-
+	}
 	while (1) {
 		offs = c->leb_size - len;
 		if (!is_a_node(c, p, len)) {
@@ -2019,7 +2019,7 @@ static int dbg_populate_lsave(struct ubifs_info *c)
 	struct ubifs_lpt_heap *heap;
 	int i;
 
-	if (!dbg_is_chk_gen(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
 		return 0;
 	if (random32() & 3)
 		return 0;
diff --git a/trunk/fs/ubifs/misc.h b/trunk/fs/ubifs/misc.h
index ee7cb5ebb6e8..0b5296a9a4c5 100644
--- a/trunk/fs/ubifs/misc.h
+++ b/trunk/fs/ubifs/misc.h
@@ -38,29 +38,6 @@ static inline int ubifs_zn_dirty(const struct ubifs_znode *znode)
 	return !!test_bit(DIRTY_ZNODE, &znode->flags);
 }
 
-/**
- * ubifs_zn_obsolete - check if znode is obsolete.
- * @znode: znode to check
- *
- * This helper function returns %1 if @znode is obsolete and %0 otherwise.
- */
-static inline int ubifs_zn_obsolete(const struct ubifs_znode *znode)
-{
-	return !!test_bit(OBSOLETE_ZNODE, &znode->flags);
-}
-
-/**
- * ubifs_zn_cow - check if znode has to be copied on write.
- * @znode: znode to check
- *
- * This helper function returns %1 if @znode is has COW flag set and %0
- * otherwise.
- */
-static inline int ubifs_zn_cow(const struct ubifs_znode *znode)
-{
-	return !!test_bit(COW_ZNODE, &znode->flags);
-}
-
 /**
  * ubifs_wake_up_bgt - wake up background thread.
  * @c: UBIFS file-system description object
@@ -144,6 +121,86 @@ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf)
 	return err;
 }
 
+/**
+ * ubifs_leb_unmap - unmap an LEB.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number to unmap
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum)
+{
+	int err;
+
+	ubifs_assert(!c->ro_media && !c->ro_mount);
+	if (c->ro_error)
+		return -EROFS;
+	err = ubi_leb_unmap(c->ubi, lnum);
+	if (err) {
+		ubifs_err("unmap LEB %d failed, error %d", lnum, err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ubifs_leb_write - write to a LEB.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number to write
+ * @buf: buffer to write from
+ * @offs: offset within LEB to write to
+ * @len: length to write
+ * @dtype: data type
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum,
+				  const void *buf, int offs, int len, int dtype)
+{
+	int err;
+
+	ubifs_assert(!c->ro_media && !c->ro_mount);
+	if (c->ro_error)
+		return -EROFS;
+	err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
+	if (err) {
+		ubifs_err("writing %d bytes at %d:%d, error %d",
+			  len, lnum, offs, err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ubifs_leb_change - atomic LEB change.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number to write
+ * @buf: buffer to write from
+ * @len: length to write
+ * @dtype: data type
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum,
+				   const void *buf, int len, int dtype)
+{
+	int err;
+
+	ubifs_assert(!c->ro_media && !c->ro_mount);
+	if (c->ro_error)
+		return -EROFS;
+	err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
+	if (err) {
+		ubifs_err("changing %d bytes in LEB %d, error %d",
+			  len, lnum, err);
+		return err;
+	}
+
+	return 0;
+}
+
 /**
  * ubifs_encode_dev - encode device node IDs.
  * @dev: UBIFS device node information
diff --git a/trunk/fs/ubifs/orphan.c b/trunk/fs/ubifs/orphan.c
index c542c73cfa3c..a5422fffbd69 100644
--- a/trunk/fs/ubifs/orphan.c
+++ b/trunk/fs/ubifs/orphan.c
@@ -929,7 +929,7 @@ static int dbg_check_orphans(struct ubifs_info *c)
 	struct check_info ci;
 	int err;
 
-	if (!dbg_is_chk_orph(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_ORPH))
 		return 0;
 
 	ci.last_ino = 0;
diff --git a/trunk/fs/ubifs/recovery.c b/trunk/fs/ubifs/recovery.c
index af02790d9328..783d8e0beb76 100644
--- a/trunk/fs/ubifs/recovery.c
+++ b/trunk/fs/ubifs/recovery.c
@@ -117,7 +117,7 @@ static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf,
 	if (!sbuf)
 		return -ENOMEM;
 
-	err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0);
+	err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size);
 	if (err && err != -EBADMSG)
 		goto out_free;
 
@@ -213,10 +213,10 @@ static int write_rcvrd_mst_node(struct ubifs_info *c,
 	mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
 
 	ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
-	err = ubifs_leb_change(c, lnum, mst, sz, UBI_SHORTTERM);
+	err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM);
 	if (err)
 		goto out;
-	err = ubifs_leb_change(c, lnum + 1, mst, sz, UBI_SHORTTERM);
+	err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM);
 	if (err)
 		goto out;
 out:
@@ -274,8 +274,7 @@ int ubifs_recover_master_node(struct ubifs_info *c)
 				if (cor1)
 					goto out_err;
 				mst = mst1;
-			} else if (offs1 == 0 &&
-				   c->leb_size - offs2 - sz < sz) {
+			} else if (offs1 == 0 && offs2 + sz >= c->leb_size) {
 				/* 1st LEB was unmapped and written, 2nd not */
 				if (cor1)
 					goto out_err;
@@ -540,8 +539,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 			int len = ALIGN(endpt, c->min_io_size);
 
 			if (start) {
-				err = ubifs_leb_read(c, lnum, sleb->buf, 0,
-						     start, 1);
+				err = ubi_read(c->ubi, lnum, sleb->buf, 0,
+					       start);
 				if (err)
 					return err;
 			}
@@ -555,8 +554,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 					ubifs_pad(c, buf, pad_len);
 				}
 			}
-			err = ubifs_leb_change(c, lnum, sleb->buf, len,
-					       UBI_UNKNOWN);
+			err = ubi_leb_change(c->ubi, lnum, sleb->buf, len,
+					     UBI_UNKNOWN);
 			if (err)
 				return err;
 		}
@@ -820,8 +819,7 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
 		return -ENOMEM;
 	if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
 		goto out_err;
-	err = ubifs_leb_read(c, lnum, (void *)cs_node, offs,
-			     UBIFS_CS_NODE_SZ, 0);
+	err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ);
 	if (err && err != -EBADMSG)
 		goto out_free;
 	ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
@@ -921,7 +919,8 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
  *
  * This function returns %0 on success and a negative error code on failure.
  */
-static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf)
+static int recover_head(const struct ubifs_info *c, int lnum, int offs,
+			void *sbuf)
 {
 	int len = c->max_write_size, err;
 
@@ -932,15 +931,15 @@ static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf)
 		return 0;
 
 	/* Read at the head location and check it is empty flash */
-	err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1);
+	err = ubi_read(c->ubi, lnum, sbuf, offs, len);
 	if (err || !is_empty(sbuf, len)) {
 		dbg_rcvry("cleaning head at %d:%d", lnum, offs);
 		if (offs == 0)
 			return ubifs_leb_unmap(c, lnum);
-		err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1);
+		err = ubi_read(c->ubi, lnum, sbuf, 0, offs);
 		if (err)
 			return err;
-		return ubifs_leb_change(c, lnum, sbuf, offs, UBI_UNKNOWN);
+		return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN);
 	}
 
 	return 0;
@@ -963,7 +962,7 @@ static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf)
  *
  * This function returns %0 on success and a negative error code on failure.
  */
-int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf)
+int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
 {
 	int err;
 
@@ -994,7 +993,7 @@ int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf)
  *
  * This function returns %0 on success and a negative error code on failure.
  */
-static int clean_an_unclean_leb(struct ubifs_info *c,
+static int clean_an_unclean_leb(const struct ubifs_info *c,
 				struct ubifs_unclean_leb *ucleb, void *sbuf)
 {
 	int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
@@ -1010,7 +1009,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c,
 		return 0;
 	}
 
-	err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
+	err = ubi_read(c->ubi, lnum, buf, offs, len);
 	if (err && err != -EBADMSG)
 		return err;
 
@@ -1070,7 +1069,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c,
 	}
 
 	/* Write back the LEB atomically */
-	err = ubifs_leb_change(c, lnum, sbuf, len, UBI_UNKNOWN);
+	err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN);
 	if (err)
 		return err;
 
@@ -1090,7 +1089,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c,
  *
  * This function returns %0 on success and a negative error code on failure.
  */
-int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf)
+int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
 {
 	dbg_rcvry("recovery");
 	while (!list_empty(&c->unclean_leb_list)) {
@@ -1455,7 +1454,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
 	if (i_size >= e->d_size)
 		return 0;
 	/* Read the LEB */
-	err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1);
+	err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size);
 	if (err)
 		goto out;
 	/* Change the size field and recalculate the CRC */
@@ -1471,7 +1470,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
 		len -= 1;
 	len = ALIGN(len + 1, c->min_io_size);
 	/* Atomically write the fixed LEB back again */
-	err = ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN);
+	err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
 	if (err)
 		goto out;
 	dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
diff --git a/trunk/fs/ubifs/replay.c b/trunk/fs/ubifs/replay.c
index ccabaf1164b3..5e97161ce4d3 100644
--- a/trunk/fs/ubifs/replay.c
+++ b/trunk/fs/ubifs/replay.c
@@ -523,7 +523,8 @@ static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
 	if (!list_is_last(&next->list, &jh->buds_list))
 		return 0;
 
-	err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1);
+	err = ubi_read(c->ubi, next->lnum, (char *)&data,
+		       next->start, 4);
 	if (err)
 		return 0;
 
diff --git a/trunk/fs/ubifs/sb.c b/trunk/fs/ubifs/sb.c
index 93d938ad3d2a..c606f010e8df 100644
--- a/trunk/fs/ubifs/sb.c
+++ b/trunk/fs/ubifs/sb.c
@@ -674,15 +674,15 @@ static int fixup_leb(struct ubifs_info *c, int lnum, int len)
 
 	if (len == 0) {
 		dbg_mnt("unmap empty LEB %d", lnum);
-		return ubifs_leb_unmap(c, lnum);
+		return ubi_leb_unmap(c->ubi, lnum);
 	}
 
 	dbg_mnt("fixup LEB %d, data len %d", lnum, len);
-	err = ubifs_leb_read(c, lnum, c->sbuf, 0, len, 1);
+	err = ubi_read(c->ubi, lnum, c->sbuf, 0, len);
 	if (err)
 		return err;
 
-	return ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN);
+	return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
 }
 
 /**
diff --git a/trunk/fs/ubifs/scan.c b/trunk/fs/ubifs/scan.c
index 37383e8011b1..36216b46f772 100644
--- a/trunk/fs/ubifs/scan.c
+++ b/trunk/fs/ubifs/scan.c
@@ -148,7 +148,7 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
 	INIT_LIST_HEAD(&sleb->nodes);
 	sleb->buf = sbuf;
 
-	err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0);
+	err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs);
 	if (err && err != -EBADMSG) {
 		ubifs_err("cannot read %d bytes from LEB %d:%d,"
 			  " error %d", c->leb_size - offs, lnum, offs, err);
@@ -240,7 +240,7 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
 	int len;
 
 	ubifs_err("corruption at LEB %d:%d", lnum, offs);
-	if (dbg_is_tst_rcvry(c))
+	if (dbg_failure_mode)
 		return;
 	len = c->leb_size - offs;
 	if (len > 8192)
diff --git a/trunk/fs/ubifs/super.c b/trunk/fs/ubifs/super.c
index b28121278d46..529be0582029 100644
--- a/trunk/fs/ubifs/super.c
+++ b/trunk/fs/ubifs/super.c
@@ -85,7 +85,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
 	if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA)
 		return 4;
 
-	if (ui->xattr && !S_ISREG(inode->i_mode))
+	if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG)
 		return 5;
 
 	if (!ubifs_compr_present(ui->compr_type)) {
@@ -94,7 +94,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
 			   ubifs_compr_name(ui->compr_type));
 	}
 
-	err = dbg_check_dir(c, inode);
+	err = dbg_check_dir_size(c, inode);
 	return err;
 }
 
@@ -914,7 +914,7 @@ static int check_volume_empty(struct ubifs_info *c)
 
 	c->empty = 1;
 	for (lnum = 0; lnum < c->leb_cnt; lnum++) {
-		err = ubifs_is_mapped(c, lnum);
+		err = ubi_is_mapped(c->ubi, lnum);
 		if (unlikely(err < 0))
 			return err;
 		if (err == 1) {
diff --git a/trunk/fs/ubifs/tnc.c b/trunk/fs/ubifs/tnc.c
index 066738647685..91b4213dde84 100644
--- a/trunk/fs/ubifs/tnc.c
+++ b/trunk/fs/ubifs/tnc.c
@@ -223,7 +223,7 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c,
 	__set_bit(DIRTY_ZNODE, &zn->flags);
 	__clear_bit(COW_ZNODE, &zn->flags);
 
-	ubifs_assert(!ubifs_zn_obsolete(znode));
+	ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags));
 	__set_bit(OBSOLETE_ZNODE, &znode->flags);
 
 	if (znode->level != 0) {
@@ -271,7 +271,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
 	struct ubifs_znode *zn;
 	int err;
 
-	if (!ubifs_zn_cow(znode)) {
+	if (!test_bit(COW_ZNODE, &znode->flags)) {
 		/* znode is not being committed */
 		if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) {
 			atomic_long_inc(&c->dirty_zn_cnt);
@@ -462,7 +462,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
 
 	dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
 
-	err = ubifs_leb_read(c, lnum, buf, offs, len, 1);
+	err = ubi_read(c->ubi, lnum, buf, offs, len);
 	if (err) {
 		ubifs_err("cannot read node type %d from LEB %d:%d, error %d",
 			  type, lnum, offs, err);
@@ -1666,7 +1666,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum,
 	if (!overlap) {
 		/* We may safely unlock the write-buffer and read the data */
 		spin_unlock(&wbuf->lock);
-		return ubifs_leb_read(c, lnum, buf, offs, len, 0);
+		return ubi_read(c->ubi, lnum, buf, offs, len);
 	}
 
 	/* Don't read under wbuf */
@@ -1680,7 +1680,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum,
 
 	if (rlen > 0)
 		/* Read everything that goes before write-buffer */
-		return ubifs_leb_read(c, lnum, buf, offs, rlen, 0);
+		return ubi_read(c->ubi, lnum, buf, offs, rlen);
 
 	return 0;
 }
@@ -1767,7 +1767,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
 	if (wbuf)
 		err = read_wbuf(wbuf, bu->buf, len, lnum, offs);
 	else
-		err = ubifs_leb_read(c, lnum, bu->buf, offs, len, 0);
+		err = ubi_read(c->ubi, lnum, bu->buf, offs, len);
 
 	/* Check for a race with GC */
 	if (maybe_leb_gced(c, lnum, bu->gc_seq))
@@ -2423,7 +2423,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
 	 */
 
 	do {
-		ubifs_assert(!ubifs_zn_obsolete(znode));
+		ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags));
 		ubifs_assert(ubifs_zn_dirty(znode));
 
 		zp = znode->parent;
@@ -2479,8 +2479,9 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
 			c->zroot.offs = zbr->offs;
 			c->zroot.len = zbr->len;
 			c->zroot.znode = znode;
-			ubifs_assert(!ubifs_zn_obsolete(zp));
-			ubifs_assert(ubifs_zn_dirty(zp));
+			ubifs_assert(!test_bit(OBSOLETE_ZNODE,
+				     &zp->flags));
+			ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags));
 			atomic_long_dec(&c->dirty_zn_cnt);
 
 			if (zp->cnext) {
@@ -2864,7 +2865,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
 		struct ubifs_znode *znode = cnext;
 
 		cnext = cnext->cnext;
-		if (ubifs_zn_obsolete(znode))
+		if (test_bit(OBSOLETE_ZNODE, &znode->flags))
 			kfree(znode);
 	} while (cnext && cnext != c->cnext);
 }
@@ -3300,7 +3301,7 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
 
 	if (!S_ISREG(inode->i_mode))
 		return 0;
-	if (!dbg_is_chk_gen(c))
+	if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
 		return 0;
 
 	block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
@@ -3336,10 +3337,9 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
 	ubifs_err("inode %lu has size %lld, but there are data at offset %lld "
 		  "(data key %s)", (unsigned long)inode->i_ino, size,
 		  ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key));
-	mutex_unlock(&c->tnc_mutex);
 	dbg_dump_inode(c, inode);
 	dbg_dump_stack();
-	return -EINVAL;
+	err = -EINVAL;
 
 out_unlock:
 	mutex_unlock(&c->tnc_mutex);
diff --git a/trunk/fs/ubifs/tnc_commit.c b/trunk/fs/ubifs/tnc_commit.c
index 4c15f07a8bb2..41920f357bbf 100644
--- a/trunk/fs/ubifs/tnc_commit.c
+++ b/trunk/fs/ubifs/tnc_commit.c
@@ -22,7 +22,6 @@
 
 /* This file implements TNC functions for committing */
 
-#include <linux/random.h>
 #include "ubifs.h"
 
 /**
@@ -88,12 +87,8 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
 	atomic_long_dec(&c->dirty_zn_cnt);
 
 	ubifs_assert(ubifs_zn_dirty(znode));
-	ubifs_assert(ubifs_zn_cow(znode));
+	ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
 
-	/*
-	 * Note, unlike 'write_index()' we do not add memory barriers here
-	 * because this function is called with @c->tnc_mutex locked.
-	 */
 	__clear_bit(DIRTY_ZNODE, &znode->flags);
 	__clear_bit(COW_ZNODE, &znode->flags);
 
@@ -382,7 +377,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
 				c->gap_lebs = NULL;
 				return err;
 			}
-			if (!dbg_is_chk_index(c)) {
+			if (dbg_force_in_the_gaps_enabled()) {
 				/*
 				 * Do not print scary warnings if the debugging
 				 * option which forces in-the-gaps is enabled.
@@ -496,6 +491,25 @@ static int layout_in_empty_space(struct ubifs_info *c)
 		else
 			next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
 
+		if (c->min_io_size == 1) {
+			buf_offs += ALIGN(len, 8);
+			if (next_len) {
+				if (buf_offs + next_len <= c->leb_size)
+					continue;
+				err = ubifs_update_one_lp(c, lnum, 0,
+						c->leb_size - buf_offs, 0, 0);
+				if (err)
+					return err;
+				lnum = -1;
+				continue;
+			}
+			err = ubifs_update_one_lp(c, lnum,
+					c->leb_size - buf_offs, 0, 0, 0);
+			if (err)
+				return err;
+			break;
+		}
+
 		/* Update buffer positions */
 		wlen = used + len;
 		used += ALIGN(len, 8);
@@ -644,7 +658,7 @@ static int get_znodes_to_commit(struct ubifs_info *c)
 	}
 	cnt += 1;
 	while (1) {
-		ubifs_assert(!ubifs_zn_cow(znode));
+		ubifs_assert(!test_bit(COW_ZNODE, &znode->flags));
 		__set_bit(COW_ZNODE, &znode->flags);
 		znode->alt = 0;
 		cnext = find_next_dirty(znode);
@@ -690,7 +704,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt)
 		c->ilebs[c->ileb_cnt++] = lnum;
 		dbg_cmt("LEB %d", lnum);
 	}
-	if (dbg_is_chk_index(c) && !(random32() & 7))
+	if (dbg_force_in_the_gaps())
 		return -ENOSPC;
 	return 0;
 }
@@ -816,7 +830,7 @@ static int write_index(struct ubifs_info *c)
 	struct ubifs_idx_node *idx;
 	struct ubifs_znode *znode, *cnext;
 	int i, lnum, offs, len, next_len, buf_len, buf_offs, used;
-	int avail, wlen, err, lnum_pos = 0, blen, nxt_offs;
+	int avail, wlen, err, lnum_pos = 0;
 
 	cnext = c->enext;
 	if (!cnext)
@@ -893,7 +907,7 @@ static int write_index(struct ubifs_info *c)
 		cnext = znode->cnext;
 
 		ubifs_assert(ubifs_zn_dirty(znode));
-		ubifs_assert(ubifs_zn_cow(znode));
+		ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
 
 		/*
 		 * It is important that other threads should see %DIRTY_ZNODE
@@ -908,28 +922,6 @@ static int write_index(struct ubifs_info *c)
 		clear_bit(COW_ZNODE, &znode->flags);
 		smp_mb__after_clear_bit();
 
-		/*
-		 * We have marked the znode as clean but have not updated the
-		 * @c->clean_zn_cnt counter. If this znode becomes dirty again
-		 * before 'free_obsolete_znodes()' is called, then
-		 * @c->clean_zn_cnt will be decremented before it gets
-		 * incremented (resulting in 2 decrements for the same znode).
-		 * This means that @c->clean_zn_cnt may become negative for a
-		 * while.
-		 *
-		 * Q: why we cannot increment @c->clean_zn_cnt?
-		 * A: because we do not have the @c->tnc_mutex locked, and the
-		 *    following code would be racy and buggy:
-		 *
-		 *    if (!ubifs_zn_obsolete(znode)) {
-		 *            atomic_long_inc(&c->clean_zn_cnt);
-		 *            atomic_long_inc(&ubifs_clean_zn_cnt);
-		 *    }
-		 *
-		 *    Thus, we just delay the @c->clean_zn_cnt update until we
-		 *    have the mutex locked.
-		 */
-
 		/* Do not access znode from this point on */
 
 		/* Update buffer positions */
@@ -946,38 +938,65 @@ static int write_index(struct ubifs_info *c)
 		else
 			next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
 
-		nxt_offs = buf_offs + used + next_len;
-		if (next_len && nxt_offs <= c->leb_size) {
-			if (avail > 0)
+		if (c->min_io_size == 1) {
+			/*
+			 * Write the prepared index node immediately if there is
+			 * no minimum IO size
+			 */
+			err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
+					      wlen, UBI_SHORTTERM);
+			if (err)
+				return err;
+			buf_offs += ALIGN(wlen, 8);
+			if (next_len) {
+				used = 0;
+				avail = buf_len;
+				if (buf_offs + next_len > c->leb_size) {
+					err = ubifs_update_one_lp(c, lnum,
+						LPROPS_NC, 0, 0, LPROPS_TAKEN);
+					if (err)
+						return err;
+					lnum = -1;
+				}
 				continue;
-			else
-				blen = buf_len;
+			}
 		} else {
-			wlen = ALIGN(wlen, 8);
-			blen = ALIGN(wlen, c->min_io_size);
-			ubifs_pad(c, c->cbuf + wlen, blen - wlen);
-		}
-
-		/* The buffer is full or there are no more znodes to do */
-		err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, blen,
-				      UBI_SHORTTERM);
-		if (err)
-			return err;
-		buf_offs += blen;
-		if (next_len) {
-			if (nxt_offs > c->leb_size) {
-				err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0,
-							  0, LPROPS_TAKEN);
-				if (err)
-					return err;
-				lnum = -1;
+			int blen, nxt_offs = buf_offs + used + next_len;
+
+			if (next_len && nxt_offs <= c->leb_size) {
+				if (avail > 0)
+					continue;
+				else
+					blen = buf_len;
+			} else {
+				wlen = ALIGN(wlen, 8);
+				blen = ALIGN(wlen, c->min_io_size);
+				ubifs_pad(c, c->cbuf + wlen, blen - wlen);
+			}
+			/*
+			 * The buffer is full or there are no more znodes
+			 * to do
+			 */
+			err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
+					      blen, UBI_SHORTTERM);
+			if (err)
+				return err;
+			buf_offs += blen;
+			if (next_len) {
+				if (nxt_offs > c->leb_size) {
+					err = ubifs_update_one_lp(c, lnum,
+						LPROPS_NC, 0, 0, LPROPS_TAKEN);
+					if (err)
+						return err;
+					lnum = -1;
+				}
+				used -= blen;
+				if (used < 0)
+					used = 0;
+				avail = buf_len - used;
+				memmove(c->cbuf, c->cbuf + blen, used);
+				continue;
 			}
-			used -= blen;
-			if (used < 0)
-				used = 0;
-			avail = buf_len - used;
-			memmove(c->cbuf, c->cbuf + blen, used);
-			continue;
 		}
 		break;
 	}
@@ -1010,7 +1029,7 @@ static void free_obsolete_znodes(struct ubifs_info *c)
 	do {
 		znode = cnext;
 		cnext = znode->cnext;
-		if (ubifs_zn_obsolete(znode))
+		if (test_bit(OBSOLETE_ZNODE, &znode->flags))
 			kfree(znode);
 		else {
 			znode->cnext = NULL;
diff --git a/trunk/fs/ubifs/ubifs.h b/trunk/fs/ubifs/ubifs.h
index 702b79258e30..f79983d6f860 100644
--- a/trunk/fs/ubifs/ubifs.h
+++ b/trunk/fs/ubifs/ubifs.h
@@ -230,14 +230,14 @@ enum {
  * LPT cnode flag bits.
  *
  * DIRTY_CNODE: cnode is dirty
- * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted),
- *                 so it can (and must) be freed when the commit is finished
  * COW_CNODE: cnode is being committed and must be copied before writing
+ * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted),
+ * so it can (and must) be freed when the commit is finished
  */
 enum {
 	DIRTY_CNODE    = 0,
-	OBSOLETE_CNODE = 1,
-	COW_CNODE      = 2,
+	COW_CNODE      = 1,
+	OBSOLETE_CNODE = 2,
 };
 
 /*
@@ -1468,15 +1468,6 @@ extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
 
 /* io.c */
 void ubifs_ro_mode(struct ubifs_info *c, int err);
-int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
-		   int len, int even_ebadmsg);
-int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
-		    int len, int dtype);
-int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len,
-		     int dtype);
-int ubifs_leb_unmap(struct ubifs_info *c, int lnum);
-int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype);
-int ubifs_is_mapped(const struct ubifs_info *c, int lnum);
 int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len);
 int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
 			   int dtype);
@@ -1756,8 +1747,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
 					 int offs, void *sbuf, int jhead);
 struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
 					     int offs, void *sbuf);
-int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf);
-int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf);
+int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf);
+int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf);
 int ubifs_rcvry_gc_commit(struct ubifs_info *c);
 int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
 			     int deletion, loff_t new_size);
diff --git a/trunk/fs/xfs/Makefile b/trunk/fs/xfs/Makefile
index 75bb316529dd..284a7c89697e 100644
--- a/trunk/fs/xfs/Makefile
+++ b/trunk/fs/xfs/Makefile
@@ -88,6 +88,8 @@ xfs-y				+= xfs_alloc.o \
 				   xfs_vnodeops.o \
 				   xfs_rw.o
 
+xfs-$(CONFIG_XFS_TRACE)		+= xfs_btree_trace.o
+
 # Objects in linux/
 xfs-y				+= $(addprefix $(XFS_LINUX)/, \
 				   kmem.o \
diff --git a/trunk/fs/xfs/linux-2.6/xfs_acl.c b/trunk/fs/xfs/linux-2.6/xfs_acl.c
index 115ac6919533..39f4f809bb68 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_acl.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_acl.c
@@ -264,7 +264,7 @@ xfs_set_mode(struct inode *inode, mode_t mode)
 		iattr.ia_mode = mode;
 		iattr.ia_ctime = current_fs_time(inode->i_sb);
 
-		error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+		error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
 	}
 
 	return error;
diff --git a/trunk/fs/xfs/linux-2.6/xfs_aops.c b/trunk/fs/xfs/linux-2.6/xfs_aops.c
index 26384fe3f26d..79ce38be15a1 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_aops.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_aops.c
@@ -181,7 +181,6 @@ xfs_setfilesize(
 
 	isize = xfs_ioend_new_eof(ioend);
 	if (isize) {
-		trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
 		ip->i_d.di_size = isize;
 		xfs_mark_inode_dirty(ip);
 	}
@@ -895,6 +894,11 @@ xfs_aops_discard_page(
  * For unwritten space on the page we need to start the conversion to
  * regular allocated space.
  * For any other dirty buffer heads on the page we should flush them.
+ *
+ * If we detect that a transaction would be required to flush the page, we
+ * have to check the process flags first, if we are already in a transaction
+ * or disk I/O during allocations is off, we need to fail the writepage and
+ * redirty the page.
  */
 STATIC int
 xfs_vm_writepage(
@@ -902,6 +906,7 @@ xfs_vm_writepage(
 	struct writeback_control *wbc)
 {
 	struct inode		*inode = page->mapping->host;
+	int			delalloc, unwritten;
 	struct buffer_head	*bh, *head;
 	struct xfs_bmbt_irec	imap;
 	xfs_ioend_t		*ioend = NULL, *iohead = NULL;
@@ -933,10 +938,15 @@ xfs_vm_writepage(
 		goto redirty;
 
 	/*
-	 * Given that we do not allow direct reclaim to call us, we should
-	 * never be called while in a filesystem transaction.
+	 * We need a transaction if there are delalloc or unwritten buffers
+	 * on the page.
+	 *
+	 * If we need a transaction and the process flags say we are already
+	 * in a transaction, or no IO is allowed then mark the page dirty
+	 * again and leave the page as is.
 	 */
-	if (WARN_ON(current->flags & PF_FSTRANS))
+	xfs_count_page_state(page, &delalloc, &unwritten);
+	if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
 		goto redirty;
 
 	/* Is this page beyond the end of the file? */
@@ -960,7 +970,7 @@ xfs_vm_writepage(
 	offset = page_offset(page);
 	type = IO_OVERWRITE;
 
-	if (wbc->sync_mode == WB_SYNC_NONE)
+	if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
 		nonblocking = 1;
 
 	do {
diff --git a/trunk/fs/xfs/linux-2.6/xfs_buf.c b/trunk/fs/xfs/linux-2.6/xfs_buf.c
index b2b411985591..5e68099db2a5 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_buf.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_buf.c
@@ -499,14 +499,16 @@ _xfs_buf_find(
 	spin_unlock(&pag->pag_buf_lock);
 	xfs_perag_put(pag);
 
-	if (!xfs_buf_trylock(bp)) {
-		if (flags & XBF_TRYLOCK) {
+	if (xfs_buf_cond_lock(bp)) {
+		/* failed, so wait for the lock if requested. */
+		if (!(flags & XBF_TRYLOCK)) {
+			xfs_buf_lock(bp);
+			XFS_STATS_INC(xb_get_locked_waited);
+		} else {
 			xfs_buf_rele(bp);
 			XFS_STATS_INC(xb_busy_locked);
 			return NULL;
 		}
-		xfs_buf_lock(bp);
-		XFS_STATS_INC(xb_get_locked_waited);
 	}
 
 	/*
@@ -592,8 +594,10 @@ _xfs_buf_read(
 	ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
 	ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
 
-	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD);
-	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
+	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
+			XBF_READ_AHEAD | _XBF_RUN_QUEUES);
+	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \
+			XBF_READ_AHEAD | _XBF_RUN_QUEUES);
 
 	status = xfs_buf_iorequest(bp);
 	if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC))
@@ -677,6 +681,7 @@ xfs_buf_read_uncached(
 		return NULL;
 
 	/* set up the buffer for a read IO */
+	xfs_buf_lock(bp);
 	XFS_BUF_SET_ADDR(bp, daddr);
 	XFS_BUF_READ(bp);
 	XFS_BUF_BUSY(bp);
@@ -811,6 +816,8 @@ xfs_buf_get_uncached(
 		goto fail_free_mem;
 	}
 
+	xfs_buf_unlock(bp);
+
 	trace_xfs_buf_get_uncached(bp, _RET_IP_);
 	return bp;
 
@@ -889,8 +896,8 @@ xfs_buf_rele(
  *	to push on stale inode buffers.
  */
 int
-xfs_buf_trylock(
-	struct xfs_buf		*bp)
+xfs_buf_cond_lock(
+	xfs_buf_t		*bp)
 {
 	int			locked;
 
@@ -900,8 +907,15 @@ xfs_buf_trylock(
 	else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
 		xfs_log_force(bp->b_target->bt_mount, 0);
 
-	trace_xfs_buf_trylock(bp, _RET_IP_);
-	return locked;
+	trace_xfs_buf_cond_lock(bp, _RET_IP_);
+	return locked ? 0 : -EBUSY;
+}
+
+int
+xfs_buf_lock_value(
+	xfs_buf_t		*bp)
+{
+	return bp->b_sema.count;
 }
 
 /*
@@ -915,7 +929,7 @@ xfs_buf_trylock(
  */
 void
 xfs_buf_lock(
-	struct xfs_buf		*bp)
+	xfs_buf_t		*bp)
 {
 	trace_xfs_buf_lock(bp, _RET_IP_);
 
@@ -936,7 +950,7 @@ xfs_buf_lock(
  */
 void
 xfs_buf_unlock(
-	struct xfs_buf		*bp)
+	xfs_buf_t		*bp)
 {
 	if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
 		atomic_inc(&bp->b_hold);
@@ -1107,7 +1121,7 @@ xfs_bioerror_relse(
 	XFS_BUF_UNDELAYWRITE(bp);
 	XFS_BUF_DONE(bp);
 	XFS_BUF_STALE(bp);
-	bp->b_iodone = NULL;
+	XFS_BUF_CLR_IODONE_FUNC(bp);
 	if (!(fl & XBF_ASYNC)) {
 		/*
 		 * Mark b_error and B_ERROR _both_.
@@ -1209,21 +1223,23 @@ _xfs_buf_ioapply(
 	total_nr_pages = bp->b_page_count;
 	map_i = 0;
 
-	if (bp->b_flags & XBF_WRITE) {
-		if (bp->b_flags & XBF_SYNCIO)
-			rw = WRITE_SYNC;
-		else
-			rw = WRITE;
-		if (bp->b_flags & XBF_FUA)
-			rw |= REQ_FUA;
-		if (bp->b_flags & XBF_FLUSH)
-			rw |= REQ_FLUSH;
-	} else if (bp->b_flags & XBF_READ_AHEAD) {
-		rw = READA;
+	if (bp->b_flags & XBF_ORDERED) {
+		ASSERT(!(bp->b_flags & XBF_READ));
+		rw = WRITE_FLUSH_FUA;
+	} else if (bp->b_flags & XBF_LOG_BUFFER) {
+		ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
+		bp->b_flags &= ~_XBF_RUN_QUEUES;
+		rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
+	} else if (bp->b_flags & _XBF_RUN_QUEUES) {
+		ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
+		bp->b_flags &= ~_XBF_RUN_QUEUES;
+		rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
 	} else {
-		rw = READ;
+		rw = (bp->b_flags & XBF_WRITE) ? WRITE :
+		     (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
 	}
 
+
 next_chunk:
 	atomic_inc(&bp->b_io_remaining);
 	nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
@@ -1678,14 +1694,15 @@ xfs_buf_delwri_split(
 	list_for_each_entry_safe(bp, n, dwq, b_list) {
 		ASSERT(bp->b_flags & XBF_DELWRI);
 
-		if (!XFS_BUF_ISPINNED(bp) && xfs_buf_trylock(bp)) {
+		if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
 			if (!force &&
 			    time_before(jiffies, bp->b_queuetime + age)) {
 				xfs_buf_unlock(bp);
 				break;
 			}
 
-			bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q);
+			bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
+					 _XBF_RUN_QUEUES);
 			bp->b_flags |= XBF_WRITE;
 			list_move_tail(&bp->b_list, list);
 			trace_xfs_buf_delwri_split(bp, _RET_IP_);
@@ -1721,6 +1738,14 @@ xfs_buf_cmp(
 	return 0;
 }
 
+void
+xfs_buf_delwri_sort(
+	xfs_buftarg_t	*target,
+	struct list_head *list)
+{
+	list_sort(NULL, list, xfs_buf_cmp);
+}
+
 STATIC int
 xfsbufd(
 	void		*data)
diff --git a/trunk/fs/xfs/linux-2.6/xfs_buf.h b/trunk/fs/xfs/linux-2.6/xfs_buf.h
index 6a83b46b4bcf..50a7d5fb3b73 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_buf.h
+++ b/trunk/fs/xfs/linux-2.6/xfs_buf.h
@@ -46,46 +46,43 @@ typedef enum {
 
 #define XBF_READ	(1 << 0) /* buffer intended for reading from device */
 #define XBF_WRITE	(1 << 1) /* buffer intended for writing to device */
-#define XBF_READ_AHEAD	(1 << 2) /* asynchronous read-ahead */
-#define XBF_MAPPED	(1 << 3) /* buffer mapped (b_addr valid) */
+#define XBF_MAPPED	(1 << 2) /* buffer mapped (b_addr valid) */
 #define XBF_ASYNC	(1 << 4) /* initiator will not wait for completion */
 #define XBF_DONE	(1 << 5) /* all pages in the buffer uptodate */
 #define XBF_DELWRI	(1 << 6) /* buffer has dirty pages */
 #define XBF_STALE	(1 << 7) /* buffer has been staled, do not find it */
-
-/* I/O hints for the BIO layer */
-#define XBF_SYNCIO	(1 << 10)/* treat this buffer as synchronous I/O */
-#define XBF_FUA		(1 << 11)/* force cache write through mode */
-#define XBF_FLUSH	(1 << 12)/* flush the disk cache before a write */
+#define XBF_ORDERED	(1 << 11)/* use ordered writes */
+#define XBF_READ_AHEAD	(1 << 12)/* asynchronous read-ahead */
+#define XBF_LOG_BUFFER	(1 << 13)/* this is a buffer used for the log */
 
 /* flags used only as arguments to access routines */
-#define XBF_LOCK	(1 << 15)/* lock requested */
-#define XBF_TRYLOCK	(1 << 16)/* lock requested, but do not wait */
-#define XBF_DONT_BLOCK	(1 << 17)/* do not block in current thread */
+#define XBF_LOCK	(1 << 14)/* lock requested */
+#define XBF_TRYLOCK	(1 << 15)/* lock requested, but do not wait */
+#define XBF_DONT_BLOCK	(1 << 16)/* do not block in current thread */
 
 /* flags used only internally */
-#define _XBF_PAGES	(1 << 20)/* backed by refcounted pages */
-#define _XBF_KMEM	(1 << 21)/* backed by heap memory */
-#define _XBF_DELWRI_Q	(1 << 22)/* buffer on delwri queue */
+#define _XBF_PAGES	(1 << 18)/* backed by refcounted pages */
+#define	_XBF_RUN_QUEUES	(1 << 19)/* run block device task queue	*/
+#define	_XBF_KMEM	(1 << 20)/* backed by heap memory */
+#define _XBF_DELWRI_Q	(1 << 21)/* buffer on delwri queue */
 
 typedef unsigned int xfs_buf_flags_t;
 
 #define XFS_BUF_FLAGS \
 	{ XBF_READ,		"READ" }, \
 	{ XBF_WRITE,		"WRITE" }, \
-	{ XBF_READ_AHEAD,	"READ_AHEAD" }, \
 	{ XBF_MAPPED,		"MAPPED" }, \
 	{ XBF_ASYNC,		"ASYNC" }, \
 	{ XBF_DONE,		"DONE" }, \
 	{ XBF_DELWRI,		"DELWRI" }, \
 	{ XBF_STALE,		"STALE" }, \
-	{ XBF_SYNCIO,		"SYNCIO" }, \
-	{ XBF_FUA,		"FUA" }, \
-	{ XBF_FLUSH,		"FLUSH" }, \
+	{ XBF_ORDERED,		"ORDERED" }, \
+	{ XBF_READ_AHEAD,	"READ_AHEAD" }, \
 	{ XBF_LOCK,		"LOCK" },  	/* should never be set */\
 	{ XBF_TRYLOCK,		"TRYLOCK" }, 	/* ditto */\
 	{ XBF_DONT_BLOCK,	"DONT_BLOCK" },	/* ditto */\
 	{ _XBF_PAGES,		"PAGES" }, \
+	{ _XBF_RUN_QUEUES,	"RUN_QUEUES" }, \
 	{ _XBF_KMEM,		"KMEM" }, \
 	{ _XBF_DELWRI_Q,	"DELWRI_Q" }
 
@@ -94,6 +91,11 @@ typedef enum {
 	XBT_FORCE_FLUSH = 1,
 } xfs_buftarg_flags_t;
 
+typedef struct xfs_bufhash {
+	struct list_head	bh_list;
+	spinlock_t		bh_lock;
+} xfs_bufhash_t;
+
 typedef struct xfs_buftarg {
 	dev_t			bt_dev;
 	struct block_device	*bt_bdev;
@@ -149,7 +151,7 @@ typedef struct xfs_buf {
 	xfs_buf_iodone_t	b_iodone;	/* I/O completion function */
 	struct completion	b_iowait;	/* queue for I/O waiters */
 	void			*b_fspriv;
-	struct xfs_trans	*b_transp;
+	void			*b_fspriv2;
 	struct page		**b_pages;	/* array of page pointers */
 	struct page		*b_page_array[XB_PAGES]; /* inline pages */
 	unsigned long		b_queuetime;	/* time buffer was queued */
@@ -190,11 +192,10 @@ extern void xfs_buf_free(xfs_buf_t *);
 extern void xfs_buf_rele(xfs_buf_t *);
 
 /* Locking and Unlocking Buffers */
-extern int xfs_buf_trylock(xfs_buf_t *);
+extern int xfs_buf_cond_lock(xfs_buf_t *);
+extern int xfs_buf_lock_value(xfs_buf_t *);
 extern void xfs_buf_lock(xfs_buf_t *);
 extern void xfs_buf_unlock(xfs_buf_t *);
-#define xfs_buf_islocked(bp) \
-	((bp)->b_sema.count <= 0)
 
 /* Buffer Read and Write Routines */
 extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
@@ -233,9 +234,8 @@ extern void xfs_buf_terminate(void);
 
 
 #define XFS_BUF_BFLAGS(bp)	((bp)->b_flags)
-#define XFS_BUF_ZEROFLAGS(bp) \
-	((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
-			    XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
+#define XFS_BUF_ZEROFLAGS(bp)	((bp)->b_flags &= \
+		~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
 
 void xfs_buf_stale(struct xfs_buf *bp);
 #define XFS_BUF_STALE(bp)	xfs_buf_stale(bp);
@@ -267,6 +267,10 @@ void xfs_buf_stale(struct xfs_buf *bp);
 #define XFS_BUF_UNASYNC(bp)	((bp)->b_flags &= ~XBF_ASYNC)
 #define XFS_BUF_ISASYNC(bp)	((bp)->b_flags & XBF_ASYNC)
 
+#define XFS_BUF_ORDERED(bp)	((bp)->b_flags |= XBF_ORDERED)
+#define XFS_BUF_UNORDERED(bp)	((bp)->b_flags &= ~XBF_ORDERED)
+#define XFS_BUF_ISORDERED(bp)	((bp)->b_flags & XBF_ORDERED)
+
 #define XFS_BUF_HOLD(bp)	xfs_buf_hold(bp)
 #define XFS_BUF_READ(bp)	((bp)->b_flags |= XBF_READ)
 #define XFS_BUF_UNREAD(bp)	((bp)->b_flags &= ~XBF_READ)
@@ -276,6 +280,14 @@ void xfs_buf_stale(struct xfs_buf *bp);
 #define XFS_BUF_UNWRITE(bp)	((bp)->b_flags &= ~XBF_WRITE)
 #define XFS_BUF_ISWRITE(bp)	((bp)->b_flags & XBF_WRITE)
 
+#define XFS_BUF_IODONE_FUNC(bp)			((bp)->b_iodone)
+#define XFS_BUF_SET_IODONE_FUNC(bp, func)	((bp)->b_iodone = (func))
+#define XFS_BUF_CLR_IODONE_FUNC(bp)		((bp)->b_iodone = NULL)
+
+#define XFS_BUF_FSPRIVATE(bp, type)		((type)(bp)->b_fspriv)
+#define XFS_BUF_SET_FSPRIVATE(bp, val)		((bp)->b_fspriv = (void*)(val))
+#define XFS_BUF_FSPRIVATE2(bp, type)		((type)(bp)->b_fspriv2)
+#define XFS_BUF_SET_FSPRIVATE2(bp, val)		((bp)->b_fspriv2 = (void*)(val))
 #define XFS_BUF_SET_START(bp)			do { } while (0)
 
 #define XFS_BUF_PTR(bp)			(xfs_caddr_t)((bp)->b_addr)
@@ -301,6 +313,10 @@ xfs_buf_set_ref(
 
 #define XFS_BUF_ISPINNED(bp)	atomic_read(&((bp)->b_pin_count))
 
+#define XFS_BUF_VALUSEMA(bp)	xfs_buf_lock_value(bp)
+#define XFS_BUF_CPSEMA(bp)	(xfs_buf_cond_lock(bp) == 0)
+#define XFS_BUF_VSEMA(bp)	xfs_buf_unlock(bp)
+#define XFS_BUF_PSEMA(bp,x)	xfs_buf_lock(bp)
 #define XFS_BUF_FINISH_IOWAIT(bp)	complete(&bp->b_iowait);
 
 #define XFS_BUF_SET_TARGET(bp, target)	((bp)->b_target = (target))
diff --git a/trunk/fs/xfs/linux-2.6/xfs_export.c b/trunk/fs/xfs/linux-2.6/xfs_export.c
index 75e5d322e48f..f4f878fc0083 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_export.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_export.c
@@ -151,14 +151,14 @@ xfs_nfs_get_inode(
 		 * We don't use ESTALE directly down the chain to not
 		 * confuse applications using bulkstat that expect EINVAL.
 		 */
-		if (error == EINVAL || error == ENOENT)
+		if (error == EINVAL)
 			error = ESTALE;
 		return ERR_PTR(-error);
 	}
 
 	if (ip->i_d.di_gen != generation) {
 		IRELE(ip);
-		return ERR_PTR(-ESTALE);
+		return ERR_PTR(-ENOENT);
 	}
 
 	return VFS_I(ip);
diff --git a/trunk/fs/xfs/linux-2.6/xfs_file.c b/trunk/fs/xfs/linux-2.6/xfs_file.c
index 8073f61efb8e..7f782af286bf 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_file.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_file.c
@@ -944,7 +944,7 @@ xfs_file_fallocate(
 
 		iattr.ia_valid = ATTR_SIZE;
 		iattr.ia_size = new_size;
-		error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK);
+		error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
 	}
 
 out_unlock:
diff --git a/trunk/fs/xfs/linux-2.6/xfs_iops.c b/trunk/fs/xfs/linux-2.6/xfs_iops.c
index 501e4f630548..d44d92cd12b1 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_iops.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_iops.c
@@ -39,7 +39,6 @@
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
 #include "xfs_vnodeops.h"
-#include "xfs_inode_item.h"
 #include "xfs_trace.h"
 
 #include <linux/capability.h>
@@ -498,442 +497,12 @@ xfs_vn_getattr(
 	return 0;
 }
 
-int
-xfs_setattr_nonsize(
-	struct xfs_inode	*ip,
-	struct iattr		*iattr,
-	int			flags)
-{
-	xfs_mount_t		*mp = ip->i_mount;
-	struct inode		*inode = VFS_I(ip);
-	int			mask = iattr->ia_valid;
-	xfs_trans_t		*tp;
-	int			error;
-	uid_t			uid = 0, iuid = 0;
-	gid_t			gid = 0, igid = 0;
-	struct xfs_dquot	*udqp = NULL, *gdqp = NULL;
-	struct xfs_dquot	*olddquot1 = NULL, *olddquot2 = NULL;
-
-	trace_xfs_setattr(ip);
-
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
-		return XFS_ERROR(EROFS);
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	error = -inode_change_ok(inode, iattr);
-	if (error)
-		return XFS_ERROR(error);
-
-	ASSERT((mask & ATTR_SIZE) == 0);
-
-	/*
-	 * If disk quotas is on, we make sure that the dquots do exist on disk,
-	 * before we start any other transactions. Trying to do this later
-	 * is messy. We don't care to take a readlock to look at the ids
-	 * in inode here, because we can't hold it across the trans_reserve.
-	 * If the IDs do change before we take the ilock, we're covered
-	 * because the i_*dquot fields will get updated anyway.
-	 */
-	if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
-		uint	qflags = 0;
-
-		if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
-			uid = iattr->ia_uid;
-			qflags |= XFS_QMOPT_UQUOTA;
-		} else {
-			uid = ip->i_d.di_uid;
-		}
-		if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
-			gid = iattr->ia_gid;
-			qflags |= XFS_QMOPT_GQUOTA;
-		}  else {
-			gid = ip->i_d.di_gid;
-		}
-
-		/*
-		 * We take a reference when we initialize udqp and gdqp,
-		 * so it is important that we never blindly double trip on
-		 * the same variable. See xfs_create() for an example.
-		 */
-		ASSERT(udqp == NULL);
-		ASSERT(gdqp == NULL);
-		error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
-					 qflags, &udqp, &gdqp);
-		if (error)
-			return error;
-	}
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-	error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
-	if (error)
-		goto out_dqrele;
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-	/*
-	 * Change file ownership.  Must be the owner or privileged.
-	 */
-	if (mask & (ATTR_UID|ATTR_GID)) {
-		/*
-		 * These IDs could have changed since we last looked at them.
-		 * But, we're assured that if the ownership did change
-		 * while we didn't have the inode locked, inode's dquot(s)
-		 * would have changed also.
-		 */
-		iuid = ip->i_d.di_uid;
-		igid = ip->i_d.di_gid;
-		gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
-		uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
-
-		/*
-		 * Do a quota reservation only if uid/gid is actually
-		 * going to change.
-		 */
-		if (XFS_IS_QUOTA_RUNNING(mp) &&
-		    ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
-		     (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
-			ASSERT(tp);
-			error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
-						capable(CAP_FOWNER) ?
-						XFS_QMOPT_FORCE_RES : 0);
-			if (error)	/* out of quota */
-				goto out_trans_cancel;
-		}
-	}
-
-	xfs_trans_ijoin(tp, ip);
-
-	/*
-	 * Change file ownership.  Must be the owner or privileged.
-	 */
-	if (mask & (ATTR_UID|ATTR_GID)) {
-		/*
-		 * CAP_FSETID overrides the following restrictions:
-		 *
-		 * The set-user-ID and set-group-ID bits of a file will be
-		 * cleared upon successful return from chown()
-		 */
-		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
-		    !capable(CAP_FSETID))
-			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
-
-		/*
-		 * Change the ownerships and register quota modifications
-		 * in the transaction.
-		 */
-		if (iuid != uid) {
-			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
-				ASSERT(mask & ATTR_UID);
-				ASSERT(udqp);
-				olddquot1 = xfs_qm_vop_chown(tp, ip,
-							&ip->i_udquot, udqp);
-			}
-			ip->i_d.di_uid = uid;
-			inode->i_uid = uid;
-		}
-		if (igid != gid) {
-			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
-				ASSERT(!XFS_IS_PQUOTA_ON(mp));
-				ASSERT(mask & ATTR_GID);
-				ASSERT(gdqp);
-				olddquot2 = xfs_qm_vop_chown(tp, ip,
-							&ip->i_gdquot, gdqp);
-			}
-			ip->i_d.di_gid = gid;
-			inode->i_gid = gid;
-		}
-	}
-
-	/*
-	 * Change file access modes.
-	 */
-	if (mask & ATTR_MODE) {
-		umode_t mode = iattr->ia_mode;
-
-		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-			mode &= ~S_ISGID;
-
-		ip->i_d.di_mode &= S_IFMT;
-		ip->i_d.di_mode |= mode & ~S_IFMT;
-
-		inode->i_mode &= S_IFMT;
-		inode->i_mode |= mode & ~S_IFMT;
-	}
-
-	/*
-	 * Change file access or modified times.
-	 */
-	if (mask & ATTR_ATIME) {
-		inode->i_atime = iattr->ia_atime;
-		ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
-		ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
-		ip->i_update_core = 1;
-	}
-	if (mask & ATTR_CTIME) {
-		inode->i_ctime = iattr->ia_ctime;
-		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
-		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
-		ip->i_update_core = 1;
-	}
-	if (mask & ATTR_MTIME) {
-		inode->i_mtime = iattr->ia_mtime;
-		ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
-		ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-		ip->i_update_core = 1;
-	}
-
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-	XFS_STATS_INC(xs_ig_attrchg);
-
-	if (mp->m_flags & XFS_MOUNT_WSYNC)
-		xfs_trans_set_sync(tp);
-	error = xfs_trans_commit(tp, 0);
-
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-	/*
-	 * Release any dquot(s) the inode had kept before chown.
-	 */
-	xfs_qm_dqrele(olddquot1);
-	xfs_qm_dqrele(olddquot2);
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
-
-	if (error)
-		return XFS_ERROR(error);
-
-	/*
-	 * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
-	 * 	     update.  We could avoid this with linked transactions
-	 * 	     and passing down the transaction pointer all the way
-	 *	     to attr_set.  No previous user of the generic
-	 * 	     Posix ACL code seems to care about this issue either.
-	 */
-	if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
-		error = -xfs_acl_chmod(inode);
-		if (error)
-			return XFS_ERROR(error);
-	}
-
-	return 0;
-
-out_trans_cancel:
-	xfs_trans_cancel(tp, 0);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-out_dqrele:
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
-	return error;
-}
-
-/*
- * Truncate file.  Must have write permission and not be a directory.
- */
-int
-xfs_setattr_size(
-	struct xfs_inode	*ip,
-	struct iattr		*iattr,
-	int			flags)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	struct inode		*inode = VFS_I(ip);
-	int			mask = iattr->ia_valid;
-	struct xfs_trans	*tp;
-	int			error;
-	uint			lock_flags;
-	uint			commit_flags = 0;
-
-	trace_xfs_setattr(ip);
-
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
-		return XFS_ERROR(EROFS);
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	error = -inode_change_ok(inode, iattr);
-	if (error)
-		return XFS_ERROR(error);
-
-	ASSERT(S_ISREG(ip->i_d.di_mode));
-	ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
-			ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
-			ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
-
-	lock_flags = XFS_ILOCK_EXCL;
-	if (!(flags & XFS_ATTR_NOLOCK))
-		lock_flags |= XFS_IOLOCK_EXCL;
-	xfs_ilock(ip, lock_flags);
-
-	/*
-	 * Short circuit the truncate case for zero length files.
-	 */
-	if (iattr->ia_size == 0 &&
-	    ip->i_size == 0 && ip->i_d.di_nextents == 0) {
-		if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
-			goto out_unlock;
-
-		/*
-		 * Use the regular setattr path to update the timestamps.
-		 */
-		xfs_iunlock(ip, lock_flags);
-		iattr->ia_valid &= ~ATTR_SIZE;
-		return xfs_setattr_nonsize(ip, iattr, 0);
-	}
-
-	/*
-	 * Make sure that the dquots are attached to the inode.
-	 */
-	error = xfs_qm_dqattach_locked(ip, 0);
-	if (error)
-		goto out_unlock;
-
-	/*
-	 * Now we can make the changes.  Before we join the inode to the
-	 * transaction, take care of the part of the truncation that must be
-	 * done without the inode lock.  This needs to be done before joining
-	 * the inode to the transaction, because the inode cannot be unlocked
-	 * once it is a part of the transaction.
-	 */
-	if (iattr->ia_size > ip->i_size) {
-		/*
-		 * Do the first part of growing a file: zero any data in the
-		 * last block that is beyond the old EOF.  We need to do this
-		 * before the inode is joined to the transaction to modify
-		 * i_size.
-		 */
-		error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
-		if (error)
-			goto out_unlock;
-	}
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	lock_flags &= ~XFS_ILOCK_EXCL;
-
-	/*
-	 * We are going to log the inode size change in this transaction so
-	 * any previous writes that are beyond the on disk EOF and the new
-	 * EOF that have not been written out need to be written here.  If we
-	 * do not write the data out, we expose ourselves to the null files
-	 * problem.
-	 *
-	 * Only flush from the on disk size to the smaller of the in memory
-	 * file size or the new size as that's the range we really care about
-	 * here and prevents waiting for other data not within the range we
-	 * care about here.
-	 */
-	if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
-		error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size,
-					XBF_ASYNC, FI_NONE);
-		if (error)
-			goto out_unlock;
-	}
-
-	/*
-	 * Wait for all I/O to complete.
-	 */
-	xfs_ioend_wait(ip);
-
-	error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
-				     xfs_get_blocks);
-	if (error)
-		goto out_unlock;
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
-	error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-				 XFS_TRANS_PERM_LOG_RES,
-				 XFS_ITRUNCATE_LOG_COUNT);
-	if (error)
-		goto out_trans_cancel;
-
-	truncate_setsize(inode, iattr->ia_size);
-
-	commit_flags = XFS_TRANS_RELEASE_LOG_RES;
-	lock_flags |= XFS_ILOCK_EXCL;
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-	xfs_trans_ijoin(tp, ip);
-
-	/*
-	 * Only change the c/mtime if we are changing the size or we are
-	 * explicitly asked to change it.  This handles the semantic difference
-	 * between truncate() and ftruncate() as implemented in the VFS.
-	 *
-	 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
-	 * special case where we need to update the times despite not having
-	 * these flags set.  For all other operations the VFS set these flags
-	 * explicitly if it wants a timestamp update.
-	 */
-	if (iattr->ia_size != ip->i_size &&
-	    (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
-		iattr->ia_ctime = iattr->ia_mtime =
-			current_fs_time(inode->i_sb);
-		mask |= ATTR_CTIME | ATTR_MTIME;
-	}
-
-	if (iattr->ia_size > ip->i_size) {
-		ip->i_d.di_size = iattr->ia_size;
-		ip->i_size = iattr->ia_size;
-	} else if (iattr->ia_size <= ip->i_size ||
-		   (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
-		error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
-		if (error)
-			goto out_trans_abort;
-
-		/*
-		 * Truncated "down", so we're removing references to old data
-		 * here - if we delay flushing for a long time, we expose
-		 * ourselves unduly to the notorious NULL files problem.  So,
-		 * we mark this inode and flush it when the file is closed,
-		 * and do not wait the usual (long) time for writeout.
-		 */
-		xfs_iflags_set(ip, XFS_ITRUNCATED);
-	}
-
-	if (mask & ATTR_CTIME) {
-		inode->i_ctime = iattr->ia_ctime;
-		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
-		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
-		ip->i_update_core = 1;
-	}
-	if (mask & ATTR_MTIME) {
-		inode->i_mtime = iattr->ia_mtime;
-		ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
-		ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-		ip->i_update_core = 1;
-	}
-
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-	XFS_STATS_INC(xs_ig_attrchg);
-
-	if (mp->m_flags & XFS_MOUNT_WSYNC)
-		xfs_trans_set_sync(tp);
-
-	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-out_unlock:
-	if (lock_flags)
-		xfs_iunlock(ip, lock_flags);
-	return error;
-
-out_trans_abort:
-	commit_flags |= XFS_TRANS_ABORT;
-out_trans_cancel:
-	xfs_trans_cancel(tp, commit_flags);
-	goto out_unlock;
-}
-
 STATIC int
 xfs_vn_setattr(
 	struct dentry	*dentry,
 	struct iattr	*iattr)
 {
-	if (iattr->ia_valid & ATTR_SIZE)
-		return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
-	return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
+	return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
 }
 
 #define XFS_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
diff --git a/trunk/fs/xfs/linux-2.6/xfs_linux.h b/trunk/fs/xfs/linux-2.6/xfs_linux.h
index d42f814e4d35..8633521b3b2e 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_linux.h
+++ b/trunk/fs/xfs/linux-2.6/xfs_linux.h
@@ -33,6 +33,7 @@
 #endif
 
 #include <xfs_types.h>
+#include <xfs_arch.h>
 
 #include <kmem.h>
 #include <mrlock.h>
@@ -87,12 +88,6 @@
 #include <xfs_buf.h>
 #include <xfs_message.h>
 
-#ifdef __BIG_ENDIAN
-#define XFS_NATIVE_HOST 1
-#else
-#undef XFS_NATIVE_HOST
-#endif
-
 /*
  * Feature macros (disable/enable)
  */
diff --git a/trunk/fs/xfs/linux-2.6/xfs_super.c b/trunk/fs/xfs/linux-2.6/xfs_super.c
index 25fd2cd6c8b0..a1a881e68a9a 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_super.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_super.c
@@ -33,6 +33,7 @@
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
+#include "xfs_btree_trace.h"
 #include "xfs_ialloc.h"
 #include "xfs_bmap.h"
 #include "xfs_rtalloc.h"
@@ -1411,35 +1412,37 @@ xfs_fs_fill_super(
 	sb->s_time_gran = 1;
 	set_posix_acl_flag(sb);
 
-	xfs_inode_shrinker_register(mp);
-
-	error = xfs_mountfs(mp);
+	error = xfs_syncd_init(mp);
 	if (error)
 		goto out_filestream_unmount;
 
-	error = xfs_syncd_init(mp);
+	xfs_inode_shrinker_register(mp);
+
+	error = xfs_mountfs(mp);
 	if (error)
-		goto out_unmount;
+		goto out_syncd_stop;
 
 	root = igrab(VFS_I(mp->m_rootip));
 	if (!root) {
 		error = ENOENT;
-		goto out_syncd_stop;
+		goto fail_unmount;
 	}
 	if (is_bad_inode(root)) {
 		error = EINVAL;
-		goto out_syncd_stop;
+		goto fail_vnrele;
 	}
 	sb->s_root = d_alloc_root(root);
 	if (!sb->s_root) {
 		error = ENOMEM;
-		goto out_iput;
+		goto fail_vnrele;
 	}
 
 	return 0;
 
- out_filestream_unmount:
+ out_syncd_stop:
 	xfs_inode_shrinker_unregister(mp);
+	xfs_syncd_stop(mp);
+ out_filestream_unmount:
 	xfs_filestream_unmount(mp);
  out_free_sb:
 	xfs_freesb(mp);
@@ -1453,12 +1456,17 @@ xfs_fs_fill_super(
  out:
 	return -error;
 
- out_iput:
-	iput(root);
- out_syncd_stop:
-	xfs_syncd_stop(mp);
- out_unmount:
+ fail_vnrele:
+	if (sb->s_root) {
+		dput(sb->s_root);
+		sb->s_root = NULL;
+	} else {
+		iput(root);
+	}
+
+ fail_unmount:
 	xfs_inode_shrinker_unregister(mp);
+	xfs_syncd_stop(mp);
 
 	/*
 	 * Blow away any referenced inode in the filestreams cache.
diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.c b/trunk/fs/xfs/linux-2.6/xfs_sync.c
index 5cc158e52d4c..8ecad5ff9f9b 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_sync.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_sync.c
@@ -359,11 +359,13 @@ xfs_quiesce_data(
 {
 	int			error, error2 = 0;
 
+	/* push non-blocking */
+	xfs_sync_data(mp, 0);
 	xfs_qm_sync(mp, SYNC_TRYLOCK);
-	xfs_qm_sync(mp, SYNC_WAIT);
 
-	/* force out the newly dirtied log buffers */
-	xfs_log_force(mp, XFS_LOG_SYNC);
+	/* push and block till complete */
+	xfs_sync_data(mp, SYNC_WAIT);
+	xfs_qm_sync(mp, SYNC_WAIT);
 
 	/* write superblock and hoover up shutdown errors */
 	error = xfs_sync_fsdata(mp);
@@ -434,7 +436,7 @@ xfs_quiesce_attr(
 	WARN_ON(atomic_read(&mp->m_active_trans) != 0);
 
 	/* Push the superblock and write an unmount record */
-	error = xfs_log_sbcount(mp);
+	error = xfs_log_sbcount(mp, 1);
 	if (error)
 		xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
 				"Frozen image may not be consistent.");
diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.h b/trunk/fs/xfs/linux-2.6/xfs_sync.h
index e914fd621746..e3a6ad27415f 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_sync.h
+++ b/trunk/fs/xfs/linux-2.6/xfs_sync.h
@@ -21,6 +21,14 @@
 struct xfs_mount;
 struct xfs_perag;
 
+typedef struct xfs_sync_work {
+	struct list_head	w_list;
+	struct xfs_mount	*w_mount;
+	void			*w_data;	/* syncer routine argument */
+	void			(*w_syncer)(struct xfs_mount *, void *);
+	struct completion	*w_completion;
+} xfs_sync_work_t;
+
 #define SYNC_WAIT		0x0001	/* wait for i/o to complete */
 #define SYNC_TRYLOCK		0x0002  /* only try to lock inodes */
 
diff --git a/trunk/fs/xfs/linux-2.6/xfs_trace.h b/trunk/fs/xfs/linux-2.6/xfs_trace.h
index fda0708ef2ea..d48b7a579ae1 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_trace.h
+++ b/trunk/fs/xfs/linux-2.6/xfs_trace.h
@@ -293,7 +293,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
 		__entry->buffer_length = bp->b_buffer_length;
 		__entry->hold = atomic_read(&bp->b_hold);
 		__entry->pincount = atomic_read(&bp->b_pin_count);
-		__entry->lockval = bp->b_sema.count;
+		__entry->lockval = xfs_buf_lock_value(bp);
 		__entry->flags = bp->b_flags;
 		__entry->caller_ip = caller_ip;
 	),
@@ -323,7 +323,7 @@ DEFINE_BUF_EVENT(xfs_buf_bawrite);
 DEFINE_BUF_EVENT(xfs_buf_bdwrite);
 DEFINE_BUF_EVENT(xfs_buf_lock);
 DEFINE_BUF_EVENT(xfs_buf_lock_done);
-DEFINE_BUF_EVENT(xfs_buf_trylock);
+DEFINE_BUF_EVENT(xfs_buf_cond_lock);
 DEFINE_BUF_EVENT(xfs_buf_unlock);
 DEFINE_BUF_EVENT(xfs_buf_iowait);
 DEFINE_BUF_EVENT(xfs_buf_iowait_done);
@@ -366,7 +366,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
 		__entry->flags = flags;
 		__entry->hold = atomic_read(&bp->b_hold);
 		__entry->pincount = atomic_read(&bp->b_pin_count);
-		__entry->lockval = bp->b_sema.count;
+		__entry->lockval = xfs_buf_lock_value(bp);
 		__entry->caller_ip = caller_ip;
 	),
 	TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
@@ -409,7 +409,7 @@ TRACE_EVENT(xfs_buf_ioerror,
 		__entry->buffer_length = bp->b_buffer_length;
 		__entry->hold = atomic_read(&bp->b_hold);
 		__entry->pincount = atomic_read(&bp->b_pin_count);
-		__entry->lockval = bp->b_sema.count;
+		__entry->lockval = xfs_buf_lock_value(bp);
 		__entry->error = error;
 		__entry->flags = bp->b_flags;
 		__entry->caller_ip = caller_ip;
@@ -454,7 +454,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
 		__entry->buf_flags = bip->bli_buf->b_flags;
 		__entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
 		__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
-		__entry->buf_lockval = bip->bli_buf->b_sema.count;
+		__entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf);
 		__entry->li_desc = bip->bli_item.li_desc;
 		__entry->li_flags = bip->bli_item.li_flags;
 	),
@@ -998,8 +998,7 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(xfs_ino_t, ino)
-		__field(loff_t, isize)
-		__field(loff_t, disize)
+		__field(loff_t, size)
 		__field(loff_t, new_size)
 		__field(loff_t, offset)
 		__field(size_t, count)
@@ -1007,18 +1006,16 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
 	TP_fast_assign(
 		__entry->dev = VFS_I(ip)->i_sb->s_dev;
 		__entry->ino = ip->i_ino;
-		__entry->isize = ip->i_size;
-		__entry->disize = ip->i_d.di_size;
+		__entry->size = ip->i_d.di_size;
 		__entry->new_size = ip->i_new_size;
 		__entry->offset = offset;
 		__entry->count = count;
 	),
-	TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
+	TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
 		  "offset 0x%llx count %zd",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
-		  __entry->isize,
-		  __entry->disize,
+		  __entry->size,
 		  __entry->new_size,
 		  __entry->offset,
 		  __entry->count)
@@ -1031,7 +1028,40 @@ DEFINE_EVENT(xfs_simple_io_class, name,	\
 DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
 DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
 DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
-DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
+
+
+TRACE_EVENT(xfs_itruncate_start,
+	TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size, int flag,
+		 xfs_off_t toss_start, xfs_off_t toss_finish),
+	TP_ARGS(ip, new_size, flag, toss_start, toss_finish),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(xfs_fsize_t, size)
+		__field(xfs_fsize_t, new_size)
+		__field(xfs_off_t, toss_start)
+		__field(xfs_off_t, toss_finish)
+		__field(int, flag)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->size = ip->i_d.di_size;
+		__entry->new_size = new_size;
+		__entry->toss_start = toss_start;
+		__entry->toss_finish = toss_finish;
+		__entry->flag = flag;
+	),
+	TP_printk("dev %d:%d ino 0x%llx %s size 0x%llx new_size 0x%llx "
+		  "toss start 0x%llx toss finish 0x%llx",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __print_flags(__entry->flag, "|", XFS_ITRUNC_FLAGS),
+		  __entry->size,
+		  __entry->new_size,
+		  __entry->toss_start,
+		  __entry->toss_finish)
+);
 
 DECLARE_EVENT_CLASS(xfs_itrunc_class,
 	TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
@@ -1059,8 +1089,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class,
 DEFINE_EVENT(xfs_itrunc_class, name, \
 	TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
 	TP_ARGS(ip, new_size))
-DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
-DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
+DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start);
+DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end);
 
 TRACE_EVENT(xfs_pagecache_inval,
 	TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
diff --git a/trunk/fs/xfs/quota/xfs_dquot.c b/trunk/fs/xfs/quota/xfs_dquot.c
index 837f31158d43..6fa214603819 100644
--- a/trunk/fs/xfs/quota/xfs_dquot.c
+++ b/trunk/fs/xfs/quota/xfs_dquot.c
@@ -220,7 +220,7 @@ xfs_qm_adjust_dqtimers(
 {
 	ASSERT(d->d_id);
 
-#ifdef DEBUG
+#ifdef QUOTADEBUG
 	if (d->d_blk_hardlimit)
 		ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
 		       be64_to_cpu(d->d_blk_hardlimit));
@@ -231,7 +231,6 @@ xfs_qm_adjust_dqtimers(
 		ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
 		       be64_to_cpu(d->d_rtb_hardlimit));
 #endif
-
 	if (!d->d_btimer) {
 		if ((d->d_blk_softlimit &&
 		     (be64_to_cpu(d->d_bcount) >=
@@ -319,7 +318,7 @@ xfs_qm_init_dquot_blk(
 
 	ASSERT(tp);
 	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(xfs_buf_islocked(bp));
+	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
 
 	d = (xfs_dqblk_t *)XFS_BUF_PTR(bp);
 
@@ -535,7 +534,7 @@ xfs_qm_dqtobp(
 	}
 
 	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(xfs_buf_islocked(bp));
+	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
 
 	/*
 	 * calculate the location of the dquot inside the buffer.
@@ -623,7 +622,7 @@ xfs_qm_dqread(
 	 * brelse it because we have the changes incore.
 	 */
 	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(xfs_buf_islocked(bp));
+	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
 	xfs_trans_brelse(tp, bp);
 
 	return (error);
@@ -1424,6 +1423,45 @@ xfs_qm_dqpurge(
 }
 
 
+#ifdef QUOTADEBUG
+void
+xfs_qm_dqprint(xfs_dquot_t *dqp)
+{
+	struct xfs_mount	*mp = dqp->q_mount;
+
+	xfs_debug(mp, "-----------KERNEL DQUOT----------------");
+	xfs_debug(mp, "---- dquotID =  %d",
+		(int)be32_to_cpu(dqp->q_core.d_id));
+	xfs_debug(mp, "---- type    =  %s", DQFLAGTO_TYPESTR(dqp));
+	xfs_debug(mp, "---- fs      =  0x%p", dqp->q_mount);
+	xfs_debug(mp, "---- blkno   =  0x%x", (int) dqp->q_blkno);
+	xfs_debug(mp, "---- boffset =  0x%x", (int) dqp->q_bufoffset);
+	xfs_debug(mp, "---- blkhlimit =  %Lu (0x%x)",
+		be64_to_cpu(dqp->q_core.d_blk_hardlimit),
+		(int)be64_to_cpu(dqp->q_core.d_blk_hardlimit));
+	xfs_debug(mp, "---- blkslimit =  %Lu (0x%x)",
+		be64_to_cpu(dqp->q_core.d_blk_softlimit),
+		(int)be64_to_cpu(dqp->q_core.d_blk_softlimit));
+	xfs_debug(mp, "---- inohlimit =  %Lu (0x%x)",
+		be64_to_cpu(dqp->q_core.d_ino_hardlimit),
+		(int)be64_to_cpu(dqp->q_core.d_ino_hardlimit));
+	xfs_debug(mp, "---- inoslimit =  %Lu (0x%x)",
+		be64_to_cpu(dqp->q_core.d_ino_softlimit),
+		(int)be64_to_cpu(dqp->q_core.d_ino_softlimit));
+	xfs_debug(mp, "---- bcount  =  %Lu (0x%x)",
+		be64_to_cpu(dqp->q_core.d_bcount),
+		(int)be64_to_cpu(dqp->q_core.d_bcount));
+	xfs_debug(mp, "---- icount  =  %Lu (0x%x)",
+		be64_to_cpu(dqp->q_core.d_icount),
+		(int)be64_to_cpu(dqp->q_core.d_icount));
+	xfs_debug(mp, "---- btimer  =  %d",
+		(int)be32_to_cpu(dqp->q_core.d_btimer));
+	xfs_debug(mp, "---- itimer  =  %d",
+		(int)be32_to_cpu(dqp->q_core.d_itimer));
+	xfs_debug(mp, "---------------------------");
+}
+#endif
+
 /*
  * Give the buffer a little push if it is incore and
  * wait on the flush lock.
diff --git a/trunk/fs/xfs/quota/xfs_dquot.h b/trunk/fs/xfs/quota/xfs_dquot.h
index 34b7e945dbfa..5da3a23b820d 100644
--- a/trunk/fs/xfs/quota/xfs_dquot.h
+++ b/trunk/fs/xfs/quota/xfs_dquot.h
@@ -116,6 +116,12 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
 				     (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
 				     (XFS_IS_OQUOTA_ON((d)->q_mount))))
 
+#ifdef QUOTADEBUG
+extern void		xfs_qm_dqprint(xfs_dquot_t *);
+#else
+#define xfs_qm_dqprint(a)
+#endif
+
 extern void		xfs_qm_dqdestroy(xfs_dquot_t *);
 extern int		xfs_qm_dqflush(xfs_dquot_t *, uint);
 extern int		xfs_qm_dqpurge(xfs_dquot_t *);
diff --git a/trunk/fs/xfs/quota/xfs_qm.c b/trunk/fs/xfs/quota/xfs_qm.c
index 46e54ad9a2dc..b94dace4e785 100644
--- a/trunk/fs/xfs/quota/xfs_qm.c
+++ b/trunk/fs/xfs/quota/xfs_qm.c
@@ -67,6 +67,32 @@ static struct shrinker xfs_qm_shaker = {
 	.seeks = DEFAULT_SEEKS,
 };
 
+#ifdef DEBUG
+extern struct mutex	qcheck_lock;
+#endif
+
+#ifdef QUOTADEBUG
+static void
+xfs_qm_dquot_list_print(
+	struct xfs_mount *mp)
+{
+	xfs_dquot_t	*dqp;
+	int		i = 0;
+
+	list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) {
+		xfs_debug(mp, "   %d. \"%d (%s)\"   "
+				  "bcnt = %lld, icnt = %lld, refs = %d",
+			i++, be32_to_cpu(dqp->q_core.d_id),
+			DQFLAGTO_TYPESTR(dqp),
+			(long long)be64_to_cpu(dqp->q_core.d_bcount),
+			(long long)be64_to_cpu(dqp->q_core.d_icount),
+			dqp->q_nrefs);
+	}
+}
+#else
+static void xfs_qm_dquot_list_print(struct xfs_mount *mp) { }
+#endif
+
 /*
  * Initialize the XQM structure.
  * Note that there is not one quota manager per file system.
@@ -139,6 +165,9 @@ xfs_Gqm_init(void)
 	atomic_set(&xqm->qm_totaldquots, 0);
 	xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
 	xqm->qm_nrefs = 0;
+#ifdef DEBUG
+	mutex_init(&qcheck_lock);
+#endif
 	return xqm;
 
  out_free_udqhash:
@@ -175,6 +204,9 @@ xfs_qm_destroy(
 	mutex_lock(&xqm->qm_dqfrlist_lock);
 	list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
 		xfs_dqlock(dqp);
+#ifdef QUOTADEBUG
+		xfs_debug(dqp->q_mount, "FREELIST destroy 0x%p", dqp);
+#endif
 		list_del_init(&dqp->q_freelist);
 		xfs_Gqm->qm_dqfrlist_cnt--;
 		xfs_dqunlock(dqp);
@@ -182,6 +214,9 @@ xfs_qm_destroy(
 	}
 	mutex_unlock(&xqm->qm_dqfrlist_lock);
 	mutex_destroy(&xqm->qm_dqfrlist_lock);
+#ifdef DEBUG
+	mutex_destroy(&qcheck_lock);
+#endif
 	kmem_free(xqm);
 }
 
@@ -374,6 +409,11 @@ xfs_qm_mount_quotas(
 		xfs_warn(mp, "Failed to initialize disk quotas.");
 		return;
 	}
+
+#ifdef QUOTADEBUG
+	if (XFS_IS_QUOTA_ON(mp))
+		xfs_qm_internalqcheck(mp);
+#endif
 }
 
 /*
@@ -826,8 +866,8 @@ xfs_qm_dqattach_locked(
 	}
 
  done:
-#ifdef DEBUG
-	if (!error) {
+#ifdef QUOTADEBUG
+	if (! error) {
 		if (XFS_IS_UQUOTA_ON(mp))
 			ASSERT(ip->i_udquot);
 		if (XFS_IS_OQUOTA_ON(mp))
@@ -1693,6 +1733,8 @@ xfs_qm_quotacheck(
 	mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
 	mp->m_qflags |= flags;
 
+	xfs_qm_dquot_list_print(mp);
+
  error_return:
 	if (error) {
 		xfs_warn(mp,
@@ -2054,6 +2096,9 @@ xfs_qm_write_sb_changes(
 	xfs_trans_t	*tp;
 	int		error;
 
+#ifdef QUOTADEBUG
+	xfs_notice(mp, "Writing superblock quota changes");
+#endif
 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
 	if ((error = xfs_trans_reserve(tp, 0,
 				      mp->m_sb.sb_sectsize + 128, 0,
diff --git a/trunk/fs/xfs/quota/xfs_qm.h b/trunk/fs/xfs/quota/xfs_qm.h
index 43b9abe1052c..567b29b9f1b3 100644
--- a/trunk/fs/xfs/quota/xfs_qm.h
+++ b/trunk/fs/xfs/quota/xfs_qm.h
@@ -163,4 +163,10 @@ extern int		xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
 extern int		xfs_qm_scall_quotaon(xfs_mount_t *, uint);
 extern int		xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
 
+#ifdef DEBUG
+extern int		xfs_qm_internalqcheck(xfs_mount_t *);
+#else
+#define xfs_qm_internalqcheck(mp)	(0)
+#endif
+
 #endif /* __XFS_QM_H__ */
diff --git a/trunk/fs/xfs/quota/xfs_qm_syscalls.c b/trunk/fs/xfs/quota/xfs_qm_syscalls.c
index 609246f42e6c..2dadb15d5ca9 100644
--- a/trunk/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/trunk/fs/xfs/quota/xfs_qm_syscalls.c
@@ -263,7 +263,7 @@ xfs_qm_scall_trunc_qfile(
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip);
 
-	error = xfs_itruncate_data(&tp, ip, 0);
+	error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1);
 	if (error) {
 		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
 				     XFS_TRANS_ABORT);
@@ -622,6 +622,7 @@ xfs_qm_scall_setqlim(
 	xfs_trans_log_dquot(tp, dqp);
 
 	error = xfs_trans_commit(tp, 0);
+	xfs_qm_dqprint(dqp);
 	xfs_qm_dqrele(dqp);
 
  out_unlock:
@@ -656,6 +657,7 @@ xfs_qm_scall_getquota(
 		xfs_qm_dqput(dqp);
 		return XFS_ERROR(ENOENT);
 	}
+	/* xfs_qm_dqprint(dqp); */
 	/*
 	 * Convert the disk dquot to the exportable format
 	 */
@@ -904,3 +906,354 @@ xfs_qm_dqrele_all_inodes(
 	ASSERT(mp->m_quotainfo);
 	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
 }
+
+/*------------------------------------------------------------------------*/
+#ifdef DEBUG
+/*
+ * This contains all the test functions for XFS disk quotas.
+ * Currently it does a quota accounting check. ie. it walks through
+ * all inodes in the file system, calculating the dquot accounting fields,
+ * and prints out any inconsistencies.
+ */
+xfs_dqhash_t *qmtest_udqtab;
+xfs_dqhash_t *qmtest_gdqtab;
+int	      qmtest_hashmask;
+int	      qmtest_nfails;
+struct mutex  qcheck_lock;
+
+#define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
+				 (__psunsigned_t)(id)) & \
+				(qmtest_hashmask - 1))
+
+#define DQTEST_HASH(mp, id, type)   ((type & XFS_DQ_USER) ? \
+				     (qmtest_udqtab + \
+				      DQTEST_HASHVAL(mp, id)) : \
+				     (qmtest_gdqtab + \
+				      DQTEST_HASHVAL(mp, id)))
+
+#define DQTEST_LIST_PRINT(l, NXT, title) \
+{ \
+	  xfs_dqtest_t	*dqp; int i = 0;\
+	  xfs_debug(NULL, "%s (#%d)", title, (int) (l)->qh_nelems); \
+	  for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \
+	       dqp = (xfs_dqtest_t *)dqp->NXT) { \
+		xfs_debug(dqp->q_mount,		\
+			"  %d. \"%d (%s)\"  bcnt = %d, icnt = %d", \
+			 ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp),	     \
+			 dqp->d_bcount, dqp->d_icount); } \
+}
+
+typedef struct dqtest {
+	uint		 dq_flags;	/* various flags (XFS_DQ_*) */
+	struct list_head q_hashlist;
+	xfs_dqhash_t	*q_hash;	/* the hashchain header */
+	xfs_mount_t	*q_mount;	/* filesystem this relates to */
+	xfs_dqid_t	d_id;		/* user id or group id */
+	xfs_qcnt_t	d_bcount;	/* # disk blocks owned by the user */
+	xfs_qcnt_t	d_icount;	/* # inodes owned by the user */
+} xfs_dqtest_t;
+
+STATIC void
+xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
+{
+	list_add(&dqp->q_hashlist, &h->qh_list);
+	h->qh_version++;
+	h->qh_nelems++;
+}
+STATIC void
+xfs_qm_dqtest_print(
+	struct xfs_mount	*mp,
+	struct dqtest		*d)
+{
+	xfs_debug(mp, "-----------DQTEST DQUOT----------------");
+	xfs_debug(mp, "---- dquot ID = %d", d->d_id);
+	xfs_debug(mp, "---- fs       = 0x%p", d->q_mount);
+	xfs_debug(mp, "---- bcount   = %Lu (0x%x)",
+		d->d_bcount, (int)d->d_bcount);
+	xfs_debug(mp, "---- icount   = %Lu (0x%x)",
+		d->d_icount, (int)d->d_icount);
+	xfs_debug(mp, "---------------------------");
+}
+
+STATIC void
+xfs_qm_dqtest_failed(
+	xfs_dqtest_t	*d,
+	xfs_dquot_t	*dqp,
+	char		*reason,
+	xfs_qcnt_t	a,
+	xfs_qcnt_t	b,
+	int		error)
+{
+	qmtest_nfails++;
+	if (error)
+		xfs_debug(dqp->q_mount,
+			"quotacheck failed id=%d, err=%d\nreason: %s",
+			d->d_id, error, reason);
+	else
+		xfs_debug(dqp->q_mount,
+			"quotacheck failed id=%d (%s) [%d != %d]",
+			d->d_id, reason, (int)a, (int)b);
+	xfs_qm_dqtest_print(dqp->q_mount, d);
+	if (dqp)
+		xfs_qm_dqprint(dqp);
+}
+
+STATIC int
+xfs_dqtest_cmp2(
+	xfs_dqtest_t	*d,
+	xfs_dquot_t	*dqp)
+{
+	int err = 0;
+	if (be64_to_cpu(dqp->q_core.d_icount) != d->d_icount) {
+		xfs_qm_dqtest_failed(d, dqp, "icount mismatch",
+			be64_to_cpu(dqp->q_core.d_icount),
+			d->d_icount, 0);
+		err++;
+	}
+	if (be64_to_cpu(dqp->q_core.d_bcount) != d->d_bcount) {
+		xfs_qm_dqtest_failed(d, dqp, "bcount mismatch",
+			be64_to_cpu(dqp->q_core.d_bcount),
+			d->d_bcount, 0);
+		err++;
+	}
+	if (dqp->q_core.d_blk_softlimit &&
+	    be64_to_cpu(dqp->q_core.d_bcount) >=
+	    be64_to_cpu(dqp->q_core.d_blk_softlimit)) {
+		if (!dqp->q_core.d_btimer && dqp->q_core.d_id) {
+			xfs_debug(dqp->q_mount,
+				"%d [%s] BLK TIMER NOT STARTED",
+				d->d_id, DQFLAGTO_TYPESTR(d));
+			err++;
+		}
+	}
+	if (dqp->q_core.d_ino_softlimit &&
+	    be64_to_cpu(dqp->q_core.d_icount) >=
+	    be64_to_cpu(dqp->q_core.d_ino_softlimit)) {
+		if (!dqp->q_core.d_itimer && dqp->q_core.d_id) {
+			xfs_debug(dqp->q_mount,
+				"%d [%s] INO TIMER NOT STARTED",
+				d->d_id, DQFLAGTO_TYPESTR(d));
+			err++;
+		}
+	}
+#ifdef QUOTADEBUG
+	if (!err) {
+		xfs_debug(dqp->q_mount, "%d [%s] qchecked",
+			d->d_id, DQFLAGTO_TYPESTR(d));
+	}
+#endif
+	return (err);
+}
+
+STATIC void
+xfs_dqtest_cmp(
+	xfs_dqtest_t	*d)
+{
+	xfs_dquot_t	*dqp;
+	int		error;
+
+	/* xfs_qm_dqtest_print(d); */
+	if ((error = xfs_qm_dqget(d->q_mount, NULL, d->d_id, d->dq_flags, 0,
+				 &dqp))) {
+		xfs_qm_dqtest_failed(d, NULL, "dqget failed", 0, 0, error);
+		return;
+	}
+	xfs_dqtest_cmp2(d, dqp);
+	xfs_qm_dqput(dqp);
+}
+
+STATIC int
+xfs_qm_internalqcheck_dqget(
+	xfs_mount_t	*mp,
+	xfs_dqid_t	id,
+	uint		type,
+	xfs_dqtest_t	**O_dq)
+{
+	xfs_dqtest_t	*d;
+	xfs_dqhash_t	*h;
+
+	h = DQTEST_HASH(mp, id, type);
+	list_for_each_entry(d, &h->qh_list, q_hashlist) {
+		if (d->d_id == id && mp == d->q_mount) {
+			*O_dq = d;
+			return (0);
+		}
+	}
+	d = kmem_zalloc(sizeof(xfs_dqtest_t), KM_SLEEP);
+	d->dq_flags = type;
+	d->d_id = id;
+	d->q_mount = mp;
+	d->q_hash = h;
+	INIT_LIST_HEAD(&d->q_hashlist);
+	xfs_qm_hashinsert(h, d);
+	*O_dq = d;
+	return (0);
+}
+
+STATIC void
+xfs_qm_internalqcheck_get_dquots(
+	xfs_mount_t	*mp,
+	xfs_dqid_t	uid,
+	xfs_dqid_t	projid,
+	xfs_dqid_t	gid,
+	xfs_dqtest_t	**ud,
+	xfs_dqtest_t	**gd)
+{
+	if (XFS_IS_UQUOTA_ON(mp))
+		xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud);
+	if (XFS_IS_GQUOTA_ON(mp))
+		xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd);
+	else if (XFS_IS_PQUOTA_ON(mp))
+		xfs_qm_internalqcheck_dqget(mp, projid, XFS_DQ_PROJ, gd);
+}
+
+
+STATIC void
+xfs_qm_internalqcheck_dqadjust(
+	xfs_inode_t		*ip,
+	xfs_dqtest_t		*d)
+{
+	d->d_icount++;
+	d->d_bcount += (xfs_qcnt_t)ip->i_d.di_nblocks;
+}
+
+STATIC int
+xfs_qm_internalqcheck_adjust(
+	xfs_mount_t	*mp,		/* mount point for filesystem */
+	xfs_ino_t	ino,		/* inode number to get data for */
+	void		__user *buffer,	/* not used */
+	int		ubsize,		/* not used */
+	int		*ubused,	/* not used */
+	int		*res)		/* bulkstat result code */
+{
+	xfs_inode_t		*ip;
+	xfs_dqtest_t		*ud, *gd;
+	uint			lock_flags;
+	boolean_t		ipreleased;
+	int			error;
+
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+	if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
+		*res = BULKSTAT_RV_NOTHING;
+		xfs_debug(mp, "%s: ino=%llu, uqino=%llu, gqino=%llu\n",
+			__func__, (unsigned long long) ino,
+			(unsigned long long) mp->m_sb.sb_uquotino,
+			(unsigned long long) mp->m_sb.sb_gquotino);
+		return XFS_ERROR(EINVAL);
+	}
+	ipreleased = B_FALSE;
+ again:
+	lock_flags = XFS_ILOCK_SHARED;
+	if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) {
+		*res = BULKSTAT_RV_NOTHING;
+		return (error);
+	}
+
+	/*
+	 * This inode can have blocks after eof which can get released
+	 * when we send it to inactive. Since we don't check the dquot
+	 * until the after all our calculations are done, we must get rid
+	 * of those now.
+	 */
+	if (! ipreleased) {
+		xfs_iunlock(ip, lock_flags);
+		IRELE(ip);
+		ipreleased = B_TRUE;
+		goto again;
+	}
+	xfs_qm_internalqcheck_get_dquots(mp,
+					(xfs_dqid_t) ip->i_d.di_uid,
+					(xfs_dqid_t) xfs_get_projid(ip),
+					(xfs_dqid_t) ip->i_d.di_gid,
+					&ud, &gd);
+	if (XFS_IS_UQUOTA_ON(mp)) {
+		ASSERT(ud);
+		xfs_qm_internalqcheck_dqadjust(ip, ud);
+	}
+	if (XFS_IS_OQUOTA_ON(mp)) {
+		ASSERT(gd);
+		xfs_qm_internalqcheck_dqadjust(ip, gd);
+	}
+	xfs_iunlock(ip, lock_flags);
+	IRELE(ip);
+	*res = BULKSTAT_RV_DIDONE;
+	return (0);
+}
+
+
+/* PRIVATE, debugging */
+int
+xfs_qm_internalqcheck(
+	xfs_mount_t	*mp)
+{
+	xfs_ino_t	lastino;
+	int		done, count;
+	int		i;
+	int		error;
+
+	lastino = 0;
+	qmtest_hashmask = 32;
+	count = 5;
+	done = 0;
+	qmtest_nfails = 0;
+
+	if (! XFS_IS_QUOTA_ON(mp))
+		return XFS_ERROR(ESRCH);
+
+	xfs_log_force(mp, XFS_LOG_SYNC);
+	XFS_bflush(mp->m_ddev_targp);
+	xfs_log_force(mp, XFS_LOG_SYNC);
+	XFS_bflush(mp->m_ddev_targp);
+
+	mutex_lock(&qcheck_lock);
+	/* There should be absolutely no quota activity while this
+	   is going on. */
+	qmtest_udqtab = kmem_zalloc(qmtest_hashmask *
+				    sizeof(xfs_dqhash_t), KM_SLEEP);
+	qmtest_gdqtab = kmem_zalloc(qmtest_hashmask *
+				    sizeof(xfs_dqhash_t), KM_SLEEP);
+	do {
+		/*
+		 * Iterate thru all the inodes in the file system,
+		 * adjusting the corresponding dquot counters
+		 */
+		error = xfs_bulkstat(mp, &lastino, &count,
+				 xfs_qm_internalqcheck_adjust,
+				 0, NULL, &done);
+		if (error) {
+			xfs_debug(mp, "Bulkstat returned error 0x%x", error);
+			break;
+		}
+	} while (!done);
+
+	xfs_debug(mp, "Checking results against system dquots");
+	for (i = 0; i < qmtest_hashmask; i++) {
+		xfs_dqtest_t	*d, *n;
+		xfs_dqhash_t	*h;
+
+		h = &qmtest_udqtab[i];
+		list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
+			xfs_dqtest_cmp(d);
+			kmem_free(d);
+		}
+		h = &qmtest_gdqtab[i];
+		list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
+			xfs_dqtest_cmp(d);
+			kmem_free(d);
+		}
+	}
+
+	if (qmtest_nfails) {
+		xfs_debug(mp, "******** quotacheck failed  ********");
+		xfs_debug(mp, "failures = %d", qmtest_nfails);
+	} else {
+		xfs_debug(mp, "******** quotacheck successful! ********");
+	}
+	kmem_free(qmtest_udqtab);
+	kmem_free(qmtest_gdqtab);
+	mutex_unlock(&qcheck_lock);
+	return (qmtest_nfails);
+}
+
+#endif /* DEBUG */
diff --git a/trunk/fs/xfs/quota/xfs_trans_dquot.c b/trunk/fs/xfs/quota/xfs_trans_dquot.c
index 4d00ee67792d..2a3648731331 100644
--- a/trunk/fs/xfs/quota/xfs_trans_dquot.c
+++ b/trunk/fs/xfs/quota/xfs_trans_dquot.c
@@ -59,7 +59,7 @@ xfs_trans_dqjoin(
 	xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
 
 	/*
-	 * Initialize d_transp so we can later determine if this dquot is
+	 * Initialize i_transp so we can later determine if this dquot is
 	 * associated with this transaction.
 	 */
 	dqp->q_transp = tp;
@@ -387,18 +387,18 @@ xfs_trans_apply_dquot_deltas(
 				qtrx->qt_delbcnt_delta;
 			totalrtbdelta = qtrx->qt_rtbcount_delta +
 				qtrx->qt_delrtb_delta;
-#ifdef DEBUG
+#ifdef QUOTADEBUG
 			if (totalbdelta < 0)
 				ASSERT(be64_to_cpu(d->d_bcount) >=
-				       -totalbdelta);
+				       (xfs_qcnt_t) -totalbdelta);
 
 			if (totalrtbdelta < 0)
 				ASSERT(be64_to_cpu(d->d_rtbcount) >=
-				       -totalrtbdelta);
+				       (xfs_qcnt_t) -totalrtbdelta);
 
 			if (qtrx->qt_icount_delta < 0)
 				ASSERT(be64_to_cpu(d->d_icount) >=
-				       -qtrx->qt_icount_delta);
+				       (xfs_qcnt_t) -qtrx->qt_icount_delta);
 #endif
 			if (totalbdelta)
 				be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
@@ -642,6 +642,11 @@ xfs_trans_dqresv(
 	    ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
 	     (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
 	      (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
+#ifdef QUOTADEBUG
+		xfs_debug(mp,
+			"BLK Res: nblks=%ld + resbcount=%Ld > hardlimit=%Ld?",
+			nblks, *resbcountp, hardlimit);
+#endif
 		if (nblks > 0) {
 			/*
 			 * dquot is locked already. See if we'd go over the
diff --git a/trunk/fs/xfs/xfs.h b/trunk/fs/xfs/xfs.h
index 53ec3ea9a625..5ad8ad3a1dcd 100644
--- a/trunk/fs/xfs/xfs.h
+++ b/trunk/fs/xfs/xfs.h
@@ -22,6 +22,7 @@
 #define STATIC
 #define DEBUG 1
 #define XFS_BUF_LOCK_TRACKING 1
+/* #define QUOTADEBUG 1 */
 #endif
 
 #include <linux-2.6/xfs_linux.h>
diff --git a/trunk/fs/xfs/xfs_alloc.c b/trunk/fs/xfs/xfs_alloc.c
index 1e00b3ef6274..95862bbff56b 100644
--- a/trunk/fs/xfs/xfs_alloc.c
+++ b/trunk/fs/xfs/xfs_alloc.c
@@ -570,7 +570,9 @@ xfs_alloc_ag_vextent_exact(
 	xfs_agblock_t	tbno;	/* start block of trimmed extent */
 	xfs_extlen_t	tlen;	/* length of trimmed extent */
 	xfs_agblock_t	tend;	/* end block of trimmed extent */
+	xfs_agblock_t	end;	/* end of allocated extent */
 	int		i;	/* success/failure of operation */
+	xfs_extlen_t	rlen;	/* length of returned extent */
 
 	ASSERT(args->alignment == 1);
 
@@ -623,16 +625,18 @@ xfs_alloc_ag_vextent_exact(
 	 *
 	 * Fix the length according to mod and prod if given.
 	 */
-	args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
-						- args->agbno;
+	end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen);
+	args->len = end - args->agbno;
 	xfs_alloc_fix_len(args);
 	if (!xfs_alloc_fix_minleft(args))
 		goto not_found;
 
-	ASSERT(args->agbno + args->len <= tend);
+	rlen = args->len;
+	ASSERT(args->agbno + rlen <= tend);
+	end = args->agbno + rlen;
 
 	/*
-	 * We are allocating agbno for args->len
+	 * We are allocating agbno for rlen [agbno .. end]
 	 * Allocate/initialize a cursor for the by-size btree.
 	 */
 	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
@@ -2123,7 +2127,7 @@ xfs_read_agf(
 	 * Validate the magic number of the agf block.
 	 */
 	agf_ok =
-		agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
+		be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
 		XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
 		be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
 		be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
diff --git a/trunk/fs/xfs/xfs_alloc_btree.c b/trunk/fs/xfs/xfs_alloc_btree.c
index ffb3386e45c1..2b3518826a69 100644
--- a/trunk/fs/xfs/xfs_alloc_btree.c
+++ b/trunk/fs/xfs/xfs_alloc_btree.c
@@ -31,6 +31,7 @@
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
+#include "xfs_btree_trace.h"
 #include "xfs_alloc.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
@@ -310,6 +311,72 @@ xfs_allocbt_recs_inorder(
 }
 #endif	/* DEBUG */
 
+#ifdef XFS_BTREE_TRACE
+ktrace_t	*xfs_allocbt_trace_buf;
+
+STATIC void
+xfs_allocbt_trace_enter(
+	struct xfs_btree_cur	*cur,
+	const char		*func,
+	char			*s,
+	int			type,
+	int			line,
+	__psunsigned_t		a0,
+	__psunsigned_t		a1,
+	__psunsigned_t		a2,
+	__psunsigned_t		a3,
+	__psunsigned_t		a4,
+	__psunsigned_t		a5,
+	__psunsigned_t		a6,
+	__psunsigned_t		a7,
+	__psunsigned_t		a8,
+	__psunsigned_t		a9,
+	__psunsigned_t		a10)
+{
+	ktrace_enter(xfs_allocbt_trace_buf, (void *)(__psint_t)type,
+		(void *)func, (void *)s, NULL, (void *)cur,
+		(void *)a0, (void *)a1, (void *)a2, (void *)a3,
+		(void *)a4, (void *)a5, (void *)a6, (void *)a7,
+		(void *)a8, (void *)a9, (void *)a10);
+}
+
+STATIC void
+xfs_allocbt_trace_cursor(
+	struct xfs_btree_cur	*cur,
+	__uint32_t		*s0,
+	__uint64_t		*l0,
+	__uint64_t		*l1)
+{
+	*s0 = cur->bc_private.a.agno;
+	*l0 = cur->bc_rec.a.ar_startblock;
+	*l1 = cur->bc_rec.a.ar_blockcount;
+}
+
+STATIC void
+xfs_allocbt_trace_key(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_key	*key,
+	__uint64_t		*l0,
+	__uint64_t		*l1)
+{
+	*l0 = be32_to_cpu(key->alloc.ar_startblock);
+	*l1 = be32_to_cpu(key->alloc.ar_blockcount);
+}
+
+STATIC void
+xfs_allocbt_trace_record(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_rec	*rec,
+	__uint64_t		*l0,
+	__uint64_t		*l1,
+	__uint64_t		*l2)
+{
+	*l0 = be32_to_cpu(rec->alloc.ar_startblock);
+	*l1 = be32_to_cpu(rec->alloc.ar_blockcount);
+	*l2 = 0;
+}
+#endif /* XFS_BTREE_TRACE */
+
 static const struct xfs_btree_ops xfs_allocbt_ops = {
 	.rec_len		= sizeof(xfs_alloc_rec_t),
 	.key_len		= sizeof(xfs_alloc_key_t),
@@ -326,10 +393,18 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
 	.init_rec_from_cur	= xfs_allocbt_init_rec_from_cur,
 	.init_ptr_from_cur	= xfs_allocbt_init_ptr_from_cur,
 	.key_diff		= xfs_allocbt_key_diff,
+
 #ifdef DEBUG
 	.keys_inorder		= xfs_allocbt_keys_inorder,
 	.recs_inorder		= xfs_allocbt_recs_inorder,
 #endif
+
+#ifdef XFS_BTREE_TRACE
+	.trace_enter		= xfs_allocbt_trace_enter,
+	.trace_cursor		= xfs_allocbt_trace_cursor,
+	.trace_key		= xfs_allocbt_trace_key,
+	.trace_record		= xfs_allocbt_trace_record,
+#endif
 };
 
 /*
@@ -352,16 +427,13 @@ xfs_allocbt_init_cursor(
 
 	cur->bc_tp = tp;
 	cur->bc_mp = mp;
+	cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]);
 	cur->bc_btnum = btnum;
 	cur->bc_blocklog = mp->m_sb.sb_blocklog;
-	cur->bc_ops = &xfs_allocbt_ops;
 
-	if (btnum == XFS_BTNUM_CNT) {
-		cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
+	cur->bc_ops = &xfs_allocbt_ops;
+	if (btnum == XFS_BTNUM_CNT)
 		cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
-	} else {
-		cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
-	}
 
 	cur->bc_private.a.agbp = agbp;
 	cur->bc_private.a.agno = agno;
diff --git a/trunk/fs/xfs/xfs_arch.h b/trunk/fs/xfs/xfs_arch.h
new file mode 100644
index 000000000000..0902249354a0
--- /dev/null
+++ b/trunk/fs/xfs/xfs_arch.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_ARCH_H__
+#define __XFS_ARCH_H__
+
+#ifndef XFS_BIG_INUMS
+# error XFS_BIG_INUMS must be defined true or false
+#endif
+
+#ifdef __KERNEL__
+
+#include <asm/byteorder.h>
+
+#ifdef __BIG_ENDIAN
+#define	XFS_NATIVE_HOST	1
+#else
+#undef XFS_NATIVE_HOST
+#endif
+
+#else /* __KERNEL__ */
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define	XFS_NATIVE_HOST	1
+#else
+#undef XFS_NATIVE_HOST
+#endif
+
+#ifdef XFS_NATIVE_HOST
+#define cpu_to_be16(val)	((__force __be16)(__u16)(val))
+#define cpu_to_be32(val)	((__force __be32)(__u32)(val))
+#define cpu_to_be64(val)	((__force __be64)(__u64)(val))
+#define be16_to_cpu(val)	((__force __u16)(__be16)(val))
+#define be32_to_cpu(val)	((__force __u32)(__be32)(val))
+#define be64_to_cpu(val)	((__force __u64)(__be64)(val))
+#else
+#define cpu_to_be16(val)	((__force __be16)__swab16((__u16)(val)))
+#define cpu_to_be32(val)	((__force __be32)__swab32((__u32)(val)))
+#define cpu_to_be64(val)	((__force __be64)__swab64((__u64)(val)))
+#define be16_to_cpu(val)	(__swab16((__force __u16)(__be16)(val)))
+#define be32_to_cpu(val)	(__swab32((__force __u32)(__be32)(val)))
+#define be64_to_cpu(val)	(__swab64((__force __u64)(__be64)(val)))
+#endif
+
+static inline void be16_add_cpu(__be16 *a, __s16 b)
+{
+	*a = cpu_to_be16(be16_to_cpu(*a) + b);
+}
+
+static inline void be32_add_cpu(__be32 *a, __s32 b)
+{
+	*a = cpu_to_be32(be32_to_cpu(*a) + b);
+}
+
+static inline void be64_add_cpu(__be64 *a, __s64 b)
+{
+	*a = cpu_to_be64(be64_to_cpu(*a) + b);
+}
+
+#endif	/* __KERNEL__ */
+
+/*
+ * get and set integers from potentially unaligned locations
+ */
+
+#define INT_GET_UNALIGNED_16_BE(pointer) \
+   ((__u16)((((__u8*)(pointer))[0] << 8) | (((__u8*)(pointer))[1])))
+#define INT_SET_UNALIGNED_16_BE(pointer,value) \
+    { \
+	((__u8*)(pointer))[0] = (((value) >> 8) & 0xff); \
+	((__u8*)(pointer))[1] = (((value)     ) & 0xff); \
+    }
+
+/*
+ * In directories inode numbers are stored as unaligned arrays of unsigned
+ * 8bit integers on disk.
+ *
+ * For v1 directories or v2 directories that contain inode numbers that
+ * do not fit into 32bit the array has eight members, but the first member
+ * is always zero:
+ *
+ *  |unused|48-55|40-47|32-39|24-31|16-23| 8-15| 0- 7|
+ *
+ * For v2 directories that only contain entries with inode numbers that fit
+ * into 32bits a four-member array is used:
+ *
+ *  |24-31|16-23| 8-15| 0- 7|
+ */ 
+
+#define XFS_GET_DIR_INO4(di) \
+	(((__u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
+
+#define XFS_PUT_DIR_INO4(from, di) \
+do { \
+	(di).i[0] = (((from) & 0xff000000ULL) >> 24); \
+	(di).i[1] = (((from) & 0x00ff0000ULL) >> 16); \
+	(di).i[2] = (((from) & 0x0000ff00ULL) >> 8); \
+	(di).i[3] = ((from) & 0x000000ffULL); \
+} while (0)
+
+#define XFS_DI_HI(di) \
+	(((__u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
+#define XFS_DI_LO(di) \
+	(((__u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7]))
+
+#define XFS_GET_DIR_INO8(di)        \
+	(((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \
+	 ((xfs_ino_t)XFS_DI_HI(di) << 32))
+
+#define XFS_PUT_DIR_INO8(from, di) \
+do { \
+	(di).i[0] = 0; \
+	(di).i[1] = (((from) & 0x00ff000000000000ULL) >> 48); \
+	(di).i[2] = (((from) & 0x0000ff0000000000ULL) >> 40); \
+	(di).i[3] = (((from) & 0x000000ff00000000ULL) >> 32); \
+	(di).i[4] = (((from) & 0x00000000ff000000ULL) >> 24); \
+	(di).i[5] = (((from) & 0x0000000000ff0000ULL) >> 16); \
+	(di).i[6] = (((from) & 0x000000000000ff00ULL) >> 8); \
+	(di).i[7] = ((from) & 0x00000000000000ffULL); \
+} while (0)
+	
+#endif	/* __XFS_ARCH_H__ */
diff --git a/trunk/fs/xfs/xfs_attr.c b/trunk/fs/xfs/xfs_attr.c
index cbae424fe1ba..01d2072fb6d4 100644
--- a/trunk/fs/xfs/xfs_attr.c
+++ b/trunk/fs/xfs/xfs_attr.c
@@ -822,21 +822,17 @@ xfs_attr_inactive(xfs_inode_t *dp)
 	error = xfs_attr_root_inactive(&trans, dp);
 	if (error)
 		goto out;
-
 	/*
-	 * Signal synchronous inactive transactions unless this is a
-	 * synchronous mount filesystem in which case we know that we're here
-	 * because we've been called out of xfs_inactive which means that the
-	 * last reference is gone and the unlink transaction has already hit
-	 * the disk so async inactive transactions are safe.
+	 * signal synchronous inactive transactions unless this
+	 * is a synchronous mount filesystem in which case we
+	 * know that we're here because we've been called out of
+	 * xfs_inactive which means that the last reference is gone
+	 * and the unlink transaction has already hit the disk so
+	 * async inactive transactions are safe.
 	 */
-	if (!(mp->m_flags & XFS_MOUNT_WSYNC)) {
-		if (dp->i_d.di_anextents > 0)
-			xfs_trans_set_sync(trans);
-	}
-
-	error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
-	if (error)
+	if ((error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK,
+				(!(mp->m_flags & XFS_MOUNT_WSYNC)
+				 ? 1 : 0))))
 		goto out;
 
 	/*
@@ -1203,7 +1199,7 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
 		return XFS_ERROR(error);
 	ASSERT(bp != NULL);
 	leaf = bp->data;
-	if (unlikely(leaf->hdr.info.magic != cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
+	if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) {
 		XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
 				     context->dp->i_mount, leaf);
 		xfs_da_brelse(NULL, bp);
@@ -1610,8 +1606,9 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 						     XFS_ATTR_FORK);
 		if (error)
 			goto out;
-		ASSERT((((xfs_attr_leafblock_t *)bp->data)->hdr.info.magic) ==
-		       cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+		ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *)
+				      bp->data)->hdr.info.magic)
+						       == XFS_ATTR_LEAF_MAGIC);
 
 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
 			xfs_bmap_init(args->flist, args->firstblock);
@@ -1876,11 +1873,11 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				return(XFS_ERROR(EFSCORRUPTED));
 			}
 			node = bp->data;
-			if (node->hdr.info.magic ==
-			    cpu_to_be16(XFS_ATTR_LEAF_MAGIC))
+			if (be16_to_cpu(node->hdr.info.magic)
+							== XFS_ATTR_LEAF_MAGIC)
 				break;
-			if (unlikely(node->hdr.info.magic !=
-				     cpu_to_be16(XFS_DA_NODE_MAGIC))) {
+			if (unlikely(be16_to_cpu(node->hdr.info.magic)
+							!= XFS_DA_NODE_MAGIC)) {
 				XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
 						     XFS_ERRLEVEL_LOW,
 						     context->dp->i_mount,
@@ -1915,8 +1912,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 	 */
 	for (;;) {
 		leaf = bp->data;
-		if (unlikely(leaf->hdr.info.magic !=
-			     cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
+		if (unlikely(be16_to_cpu(leaf->hdr.info.magic)
+						!= XFS_ATTR_LEAF_MAGIC)) {
 			XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)",
 					     XFS_ERRLEVEL_LOW,
 					     context->dp->i_mount, leaf);
diff --git a/trunk/fs/xfs/xfs_attr_leaf.c b/trunk/fs/xfs/xfs_attr_leaf.c
index 8fad9602542b..71e90dc2aeb1 100644
--- a/trunk/fs/xfs/xfs_attr_leaf.c
+++ b/trunk/fs/xfs/xfs_attr_leaf.c
@@ -731,7 +731,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
 	int bytes, i;
 
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 
 	entry = &leaf->entries[0];
 	bytes = sizeof(struct xfs_attr_sf_hdr);
@@ -777,7 +777,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 	ASSERT(bp != NULL);
 	memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
 	leaf = (xfs_attr_leafblock_t *)tmpbuffer;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
 
 	/*
@@ -872,7 +872,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
 		goto out;
 	node = bp1->data;
 	leaf = bp2->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	/* both on-disk, don't endian-flip twice */
 	node->btree[0].hashval =
 		leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
@@ -997,7 +997,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	int tablesize, entsize, sum, tmp, i;
 
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT((args->index >= 0)
 		&& (args->index <= be16_to_cpu(leaf->hdr.count)));
 	hdr = &leaf->hdr;
@@ -1070,7 +1070,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	int tmp, i;
 
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	hdr = &leaf->hdr;
 	ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
 	ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(hdr->count)));
@@ -1256,8 +1256,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
 	leaf1 = blk1->bp->data;
 	leaf2 = blk2->bp->data;
-	ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
-	ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	args = state->args;
 
 	/*
@@ -1533,7 +1533,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 	 */
 	blk = &state->path.blk[ state->path.active-1 ];
 	info = blk->bp->data;
-	ASSERT(info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
 	leaf = (xfs_attr_leafblock_t *)info;
 	count = be16_to_cpu(leaf->hdr.count);
 	bytes = sizeof(xfs_attr_leaf_hdr_t) +
@@ -1596,7 +1596,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 		bytes  = state->blocksize - (state->blocksize>>2);
 		bytes -= be16_to_cpu(leaf->hdr.usedbytes);
 		leaf = bp->data;
-		ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+		ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 		count += be16_to_cpu(leaf->hdr.count);
 		bytes -= be16_to_cpu(leaf->hdr.usedbytes);
 		bytes -= count * sizeof(xfs_attr_leaf_entry_t);
@@ -1650,7 +1650,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	xfs_mount_t *mp;
 
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	hdr = &leaf->hdr;
 	mp = args->trans->t_mountp;
 	ASSERT((be16_to_cpu(hdr->count) > 0)
@@ -1813,8 +1813,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC);
 	drop_leaf = drop_blk->bp->data;
 	save_leaf = save_blk->bp->data;
-	ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
-	ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	drop_hdr = &drop_leaf->hdr;
 	save_hdr = &save_leaf->hdr;
 
@@ -1915,7 +1915,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	xfs_dahash_t hashval;
 
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(be16_to_cpu(leaf->hdr.count)
 					< (XFS_LBSIZE(args->dp->i_mount)/8));
 
@@ -2019,7 +2019,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	xfs_attr_leaf_name_remote_t *name_rmt;
 
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(be16_to_cpu(leaf->hdr.count)
 					< (XFS_LBSIZE(args->dp->i_mount)/8));
 	ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
@@ -2087,8 +2087,8 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 	/*
 	 * Set up environment.
 	 */
-	ASSERT(leaf_s->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
-	ASSERT(leaf_d->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	hdr_s = &leaf_s->hdr;
 	hdr_d = &leaf_d->hdr;
 	ASSERT((be16_to_cpu(hdr_s->count) > 0) &&
@@ -2222,8 +2222,8 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
 
 	leaf1 = leaf1_bp->data;
 	leaf2 = leaf2_bp->data;
-	ASSERT((leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) &&
-	       (leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)));
+	ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC) &&
+	       (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC));
 	if ((be16_to_cpu(leaf1->hdr.count) > 0) &&
 	    (be16_to_cpu(leaf2->hdr.count) > 0) &&
 	    ((be32_to_cpu(leaf2->entries[0].hashval) <
@@ -2246,7 +2246,7 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
 	xfs_attr_leafblock_t *leaf;
 
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	if (count)
 		*count = be16_to_cpu(leaf->hdr.count);
 	if (!leaf->hdr.count)
@@ -2265,7 +2265,7 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
 	xfs_attr_leaf_name_remote_t *name_rmt;
 	int size;
 
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	if (leaf->entries[index].flags & XFS_ATTR_LOCAL) {
 		name_loc = xfs_attr_leaf_name_local(leaf, index);
 		size = xfs_attr_leaf_entsize_local(name_loc->namelen,
@@ -2451,7 +2451,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
 	ASSERT(bp != NULL);
 
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
 	ASSERT(args->index >= 0);
 	entry = &leaf->entries[ args->index ];
@@ -2515,7 +2515,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
 	ASSERT(bp != NULL);
 
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
 	ASSERT(args->index >= 0);
 	entry = &leaf->entries[ args->index ];
@@ -2585,13 +2585,13 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
 	}
 
 	leaf1 = bp1->data;
-	ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(args->index < be16_to_cpu(leaf1->hdr.count));
 	ASSERT(args->index >= 0);
 	entry1 = &leaf1->entries[ args->index ];
 
 	leaf2 = bp2->data;
-	ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count));
 	ASSERT(args->index2 >= 0);
 	entry2 = &leaf2->entries[ args->index2 ];
@@ -2689,9 +2689,9 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
 	 * This is a depth-first traversal!
 	 */
 	info = bp->data;
-	if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
+	if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) {
 		error = xfs_attr_node_inactive(trans, dp, bp, 1);
-	} else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) {
+	} else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) {
 		error = xfs_attr_leaf_inactive(trans, dp, bp);
 	} else {
 		error = XFS_ERROR(EIO);
@@ -2739,7 +2739,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 	}
 
 	node = bp->data;
-	ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	parent_blkno = xfs_da_blkno(bp);	/* save for re-read later */
 	count = be16_to_cpu(node->hdr.count);
 	if (!count) {
@@ -2773,10 +2773,10 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 			 * Invalidate the subtree, however we have to.
 			 */
 			info = child_bp->data;
-			if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
+			if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) {
 				error = xfs_attr_node_inactive(trans, dp,
 						child_bp, level+1);
-			} else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) {
+			} else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) {
 				error = xfs_attr_leaf_inactive(trans, dp,
 						child_bp);
 			} else {
@@ -2836,7 +2836,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 	int error, count, size, tmp, i;
 
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 
 	/*
 	 * Count the number of "remote" value extents.
diff --git a/trunk/fs/xfs/xfs_bmap.c b/trunk/fs/xfs/xfs_bmap.c
index c51a3f903633..e546a33214c9 100644
--- a/trunk/fs/xfs/xfs_bmap.c
+++ b/trunk/fs/xfs/xfs_bmap.c
@@ -29,11 +29,15 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
+#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_mount.h"
 #include "xfs_itable.h"
+#include "xfs_dir2_data.h"
+#include "xfs_dir2_leaf.h"
+#include "xfs_dir2_block.h"
 #include "xfs_inode_item.h"
 #include "xfs_extfree_item.h"
 #include "xfs_alloc.h"
@@ -90,7 +94,6 @@ xfs_bmap_add_attrfork_local(
  */
 STATIC int				/* error */
 xfs_bmap_add_extent_delay_real(
-	struct xfs_trans	*tp,	/* transaction pointer */
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		*idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
@@ -436,7 +439,6 @@ xfs_bmap_add_attrfork_local(
  */
 STATIC int				/* error */
 xfs_bmap_add_extent(
-	struct xfs_trans	*tp,	/* transaction pointer */
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		*idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
@@ -522,7 +524,7 @@ xfs_bmap_add_extent(
 				if (cur)
 					ASSERT(cur->bc_private.b.flags &
 						XFS_BTCUR_BPRV_WASDEL);
-				error = xfs_bmap_add_extent_delay_real(tp, ip,
+				error = xfs_bmap_add_extent_delay_real(ip,
 						idx, &cur, new, &da_new,
 						first, flist, &logflags);
 			} else {
@@ -559,7 +561,7 @@ xfs_bmap_add_extent(
 		int	tmp_logflags;	/* partial log flag return val */
 
 		ASSERT(cur == NULL);
-		error = xfs_bmap_extents_to_btree(tp, ip, first,
+		error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first,
 			flist, &cur, da_old > 0, &tmp_logflags, whichfork);
 		logflags |= tmp_logflags;
 		if (error)
@@ -602,7 +604,6 @@ xfs_bmap_add_extent(
  */
 STATIC int				/* error */
 xfs_bmap_add_extent_delay_real(
-	struct xfs_trans	*tp,	/* transaction pointer */
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		*idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
@@ -900,7 +901,7 @@ xfs_bmap_add_extent_delay_real(
 		}
 		if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
 		    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
-			error = xfs_bmap_extents_to_btree(tp, ip,
+			error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
 					first, flist, &cur, 1, &tmp_rval,
 					XFS_DATA_FORK);
 			rval |= tmp_rval;
@@ -983,7 +984,7 @@ xfs_bmap_add_extent_delay_real(
 		}
 		if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
 		    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
-			error = xfs_bmap_extents_to_btree(tp, ip,
+			error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
 				first, flist, &cur, 1, &tmp_rval,
 				XFS_DATA_FORK);
 			rval |= tmp_rval;
@@ -1051,7 +1052,7 @@ xfs_bmap_add_extent_delay_real(
 		}
 		if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
 		    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
-			error = xfs_bmap_extents_to_btree(tp, ip,
+			error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
 					first, flist, &cur, 1, &tmp_rval,
 					XFS_DATA_FORK);
 			rval |= tmp_rval;
@@ -2870,8 +2871,8 @@ xfs_bmap_del_extent(
 			len = del->br_blockcount;
 			do_div(bno, mp->m_sb.sb_rextsize);
 			do_div(len, mp->m_sb.sb_rextsize);
-			error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
-			if (error)
+			if ((error = xfs_rtfree_extent(ip->i_transp, bno,
+					(xfs_extlen_t)len)))
 				goto done;
 			do_fx = 0;
 			nblks = len * mp->m_sb.sb_rextsize;
@@ -4079,7 +4080,7 @@ xfs_bmap_sanity_check(
 {
 	struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
 
-	if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) ||
+	if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC ||
 	    be16_to_cpu(block->bb_level) != level ||
 	    be16_to_cpu(block->bb_numrecs) == 0 ||
 	    be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
@@ -4661,7 +4662,7 @@ xfs_bmapi(
 				if (!wasdelay && (flags & XFS_BMAPI_PREALLOC))
 					got.br_state = XFS_EXT_UNWRITTEN;
 			}
-			error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &got,
+			error = xfs_bmap_add_extent(ip, &lastx, &cur, &got,
 				firstblock, flist, &tmp_logflags,
 				whichfork);
 			logflags |= tmp_logflags;
@@ -4762,7 +4763,7 @@ xfs_bmapi(
 			mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
 						? XFS_EXT_NORM
 						: XFS_EXT_UNWRITTEN;
-			error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, mval,
+			error = xfs_bmap_add_extent(ip, &lastx, &cur, mval,
 				firstblock, flist, &tmp_logflags,
 				whichfork);
 			logflags |= tmp_logflags;
@@ -5116,7 +5117,7 @@ xfs_bunmapi(
 				del.br_blockcount = mod;
 			}
 			del.br_state = XFS_EXT_UNWRITTEN;
-			error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &del,
+			error = xfs_bmap_add_extent(ip, &lastx, &cur, &del,
 				firstblock, flist, &logflags,
 				XFS_DATA_FORK);
 			if (error)
@@ -5174,18 +5175,18 @@ xfs_bunmapi(
 				}
 				prev.br_state = XFS_EXT_UNWRITTEN;
 				lastx--;
-				error = xfs_bmap_add_extent(tp, ip, &lastx,
-						&cur, &prev, firstblock, flist,
-						&logflags, XFS_DATA_FORK);
+				error = xfs_bmap_add_extent(ip, &lastx, &cur,
+					&prev, firstblock, flist, &logflags,
+					XFS_DATA_FORK);
 				if (error)
 					goto error0;
 				goto nodelete;
 			} else {
 				ASSERT(del.br_state == XFS_EXT_NORM);
 				del.br_state = XFS_EXT_UNWRITTEN;
-				error = xfs_bmap_add_extent(tp, ip, &lastx,
-						&cur, &del, firstblock, flist,
-						&logflags, XFS_DATA_FORK);
+				error = xfs_bmap_add_extent(ip, &lastx, &cur,
+					&del, firstblock, flist, &logflags,
+					XFS_DATA_FORK);
 				if (error)
 					goto error0;
 				goto nodelete;
diff --git a/trunk/fs/xfs/xfs_bmap_btree.c b/trunk/fs/xfs/xfs_bmap_btree.c
index e2f5d59cbeaf..87d3c10b6954 100644
--- a/trunk/fs/xfs/xfs_bmap_btree.c
+++ b/trunk/fs/xfs/xfs_bmap_btree.c
@@ -33,6 +33,7 @@
 #include "xfs_inode_item.h"
 #include "xfs_alloc.h"
 #include "xfs_btree.h"
+#include "xfs_btree_trace.h"
 #include "xfs_itable.h"
 #include "xfs_bmap.h"
 #include "xfs_error.h"
@@ -424,10 +425,10 @@ xfs_bmbt_to_bmdr(
 	xfs_bmbt_key_t		*tkp;
 	__be64			*tpp;
 
-	ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
-	ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
-	ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
-	ASSERT(rblock->bb_level != 0);
+	ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC);
+	ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO);
+	ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO);
+	ASSERT(be16_to_cpu(rblock->bb_level) > 0);
 	dblock->bb_level = rblock->bb_level;
 	dblock->bb_numrecs = rblock->bb_numrecs;
 	dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
@@ -731,6 +732,95 @@ xfs_bmbt_recs_inorder(
 }
 #endif	/* DEBUG */
 
+#ifdef XFS_BTREE_TRACE
+ktrace_t	*xfs_bmbt_trace_buf;
+
+STATIC void
+xfs_bmbt_trace_enter(
+	struct xfs_btree_cur	*cur,
+	const char		*func,
+	char			*s,
+	int			type,
+	int			line,
+	__psunsigned_t		a0,
+	__psunsigned_t		a1,
+	__psunsigned_t		a2,
+	__psunsigned_t		a3,
+	__psunsigned_t		a4,
+	__psunsigned_t		a5,
+	__psunsigned_t		a6,
+	__psunsigned_t		a7,
+	__psunsigned_t		a8,
+	__psunsigned_t		a9,
+	__psunsigned_t		a10)
+{
+	struct xfs_inode	*ip = cur->bc_private.b.ip;
+	int			whichfork = cur->bc_private.b.whichfork;
+
+	ktrace_enter(xfs_bmbt_trace_buf,
+		(void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
+		(void *)func, (void *)s, (void *)ip, (void *)cur,
+		(void *)a0, (void *)a1, (void *)a2, (void *)a3,
+		(void *)a4, (void *)a5, (void *)a6, (void *)a7,
+		(void *)a8, (void *)a9, (void *)a10);
+}
+
+STATIC void
+xfs_bmbt_trace_cursor(
+	struct xfs_btree_cur	*cur,
+	__uint32_t		*s0,
+	__uint64_t		*l0,
+	__uint64_t		*l1)
+{
+	struct xfs_bmbt_rec_host r;
+
+	xfs_bmbt_set_all(&r, &cur->bc_rec.b);
+
+	*s0 = (cur->bc_nlevels << 24) |
+	      (cur->bc_private.b.flags << 16) |
+	       cur->bc_private.b.allocated;
+	*l0 = r.l0;
+	*l1 = r.l1;
+}
+
+STATIC void
+xfs_bmbt_trace_key(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_key	*key,
+	__uint64_t		*l0,
+	__uint64_t		*l1)
+{
+	*l0 = be64_to_cpu(key->bmbt.br_startoff);
+	*l1 = 0;
+}
+
+/* Endian flipping versions of the bmbt extraction functions */
+STATIC void
+xfs_bmbt_disk_get_all(
+	xfs_bmbt_rec_t	*r,
+	xfs_bmbt_irec_t *s)
+{
+	__xfs_bmbt_get_all(get_unaligned_be64(&r->l0),
+				get_unaligned_be64(&r->l1), s);
+}
+
+STATIC void
+xfs_bmbt_trace_record(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_rec	*rec,
+	__uint64_t		*l0,
+	__uint64_t		*l1,
+	__uint64_t		*l2)
+{
+	struct xfs_bmbt_irec	irec;
+
+	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
+	*l0 = irec.br_startoff;
+	*l1 = irec.br_startblock;
+	*l2 = irec.br_blockcount;
+}
+#endif /* XFS_BTREE_TRACE */
+
 static const struct xfs_btree_ops xfs_bmbt_ops = {
 	.rec_len		= sizeof(xfs_bmbt_rec_t),
 	.key_len		= sizeof(xfs_bmbt_key_t),
@@ -747,10 +837,18 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
 	.init_rec_from_cur	= xfs_bmbt_init_rec_from_cur,
 	.init_ptr_from_cur	= xfs_bmbt_init_ptr_from_cur,
 	.key_diff		= xfs_bmbt_key_diff,
+
 #ifdef DEBUG
 	.keys_inorder		= xfs_bmbt_keys_inorder,
 	.recs_inorder		= xfs_bmbt_recs_inorder,
 #endif
+
+#ifdef XFS_BTREE_TRACE
+	.trace_enter		= xfs_bmbt_trace_enter,
+	.trace_cursor		= xfs_bmbt_trace_cursor,
+	.trace_key		= xfs_bmbt_trace_key,
+	.trace_record		= xfs_bmbt_trace_record,
+#endif
 };
 
 /*
diff --git a/trunk/fs/xfs/xfs_btree.c b/trunk/fs/xfs/xfs_btree.c
index cabf4b5604aa..2f9e97c128a0 100644
--- a/trunk/fs/xfs/xfs_btree.c
+++ b/trunk/fs/xfs/xfs_btree.c
@@ -32,6 +32,7 @@
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_btree.h"
+#include "xfs_btree_trace.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
 
@@ -65,11 +66,11 @@ xfs_btree_check_lblock(
 		be16_to_cpu(block->bb_numrecs) <=
 			cur->bc_ops->get_maxrecs(cur, level) &&
 		block->bb_u.l.bb_leftsib &&
-		(block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
+		(be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO ||
 		 XFS_FSB_SANITY_CHECK(mp,
 		 	be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
 		block->bb_u.l.bb_rightsib &&
-		(block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
+		(be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO ||
 		 XFS_FSB_SANITY_CHECK(mp,
 		 	be64_to_cpu(block->bb_u.l.bb_rightsib)));
 	if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
@@ -104,10 +105,10 @@ xfs_btree_check_sblock(
 		be16_to_cpu(block->bb_level) == level &&
 		be16_to_cpu(block->bb_numrecs) <=
 			cur->bc_ops->get_maxrecs(cur, level) &&
-		(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
+		(be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK ||
 		 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
 		block->bb_u.s.bb_leftsib &&
-		(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
+		(be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK ||
 		 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
 		block->bb_u.s.bb_rightsib;
 	if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
@@ -510,9 +511,9 @@ xfs_btree_islastblock(
 	block = xfs_btree_get_block(cur, level, &bp);
 	xfs_btree_check_block(cur, block, level, bp);
 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-		return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
+		return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
 	else
-		return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
+		return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
 }
 
 /*
@@ -776,14 +777,14 @@ xfs_btree_setbuf(
 
 	b = XFS_BUF_TO_BLOCK(bp);
 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-		if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
+		if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
 			cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
-		if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
+		if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
 			cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
 	} else {
-		if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
+		if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
 			cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
-		if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
+		if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
 			cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
 	}
 }
@@ -794,9 +795,9 @@ xfs_btree_ptr_is_null(
 	union xfs_btree_ptr	*ptr)
 {
 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-		return ptr->l == cpu_to_be64(NULLDFSBNO);
+		return be64_to_cpu(ptr->l) == NULLDFSBNO;
 	else
-		return ptr->s == cpu_to_be32(NULLAGBLOCK);
+		return be32_to_cpu(ptr->s) == NULLAGBLOCK;
 }
 
 STATIC void
@@ -922,12 +923,12 @@ xfs_btree_ptr_to_daddr(
 	union xfs_btree_ptr	*ptr)
 {
 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-		ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
+		ASSERT(be64_to_cpu(ptr->l) != NULLDFSBNO);
 
 		return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
 	} else {
 		ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
-		ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
+		ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
 
 		return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
 					be32_to_cpu(ptr->s));
diff --git a/trunk/fs/xfs/xfs_btree.h b/trunk/fs/xfs/xfs_btree.h
index 8d05a6a46ce3..82fafc66bd1f 100644
--- a/trunk/fs/xfs/xfs_btree.h
+++ b/trunk/fs/xfs/xfs_btree.h
@@ -199,6 +199,25 @@ struct xfs_btree_ops {
 				union xfs_btree_rec *r1,
 				union xfs_btree_rec *r2);
 #endif
+
+	/* btree tracing */
+#ifdef XFS_BTREE_TRACE
+	void		(*trace_enter)(struct xfs_btree_cur *, const char *,
+				       char *, int, int, __psunsigned_t,
+				       __psunsigned_t, __psunsigned_t,
+				       __psunsigned_t, __psunsigned_t,
+				       __psunsigned_t, __psunsigned_t,
+				       __psunsigned_t, __psunsigned_t,
+				       __psunsigned_t, __psunsigned_t);
+	void		(*trace_cursor)(struct xfs_btree_cur *, __uint32_t *,
+					__uint64_t *, __uint64_t *);
+	void		(*trace_key)(struct xfs_btree_cur *,
+				     union xfs_btree_key *, __uint64_t *,
+				     __uint64_t *);
+	void		(*trace_record)(struct xfs_btree_cur *,
+					union xfs_btree_rec *, __uint64_t *,
+					__uint64_t *, __uint64_t *);
+#endif
 };
 
 /*
@@ -433,23 +452,4 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
 	(XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
 		XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
 
-/*
- * Trace hooks.  Currently not implemented as they need to be ported
- * over to the generic tracing functionality, which is some effort.
- *
- * i,j = integer (32 bit)
- * b = btree block buffer (xfs_buf_t)
- * p = btree ptr
- * r = btree record
- * k = btree key
- */
-#define	XFS_BTREE_TRACE_ARGBI(c, b, i)
-#define	XFS_BTREE_TRACE_ARGBII(c, b, i, j)
-#define	XFS_BTREE_TRACE_ARGI(c, i)
-#define	XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
-#define	XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
-#define	XFS_BTREE_TRACE_ARGIK(c, i, k)
-#define XFS_BTREE_TRACE_ARGR(c, r)
-#define	XFS_BTREE_TRACE_CURSOR(c, t)
-
 #endif	/* __XFS_BTREE_H__ */
diff --git a/trunk/fs/xfs/xfs_btree_trace.c b/trunk/fs/xfs/xfs_btree_trace.c
new file mode 100644
index 000000000000..44ff942a0fda
--- /dev/null
+++ b/trunk/fs/xfs/xfs_btree_trace.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_types.h"
+#include "xfs_inum.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_btree_trace.h"
+
+STATIC void
+xfs_btree_trace_ptr(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	ptr,
+	__psunsigned_t		*high,
+	__psunsigned_t		*low)
+{
+	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+		__u64 val = be64_to_cpu(ptr.l);
+		*high = val >> 32;
+		*low = (int)val;
+	} else {
+		*high = 0;
+		*low = be32_to_cpu(ptr.s);
+	}
+}
+
+/*
+ * Add a trace buffer entry for arguments, for a buffer & 1 integer arg.
+ */
+void
+xfs_btree_trace_argbi(
+	const char		*func,
+	struct xfs_btree_cur	*cur,
+	struct xfs_buf		*b,
+	int			i,
+	int			line)
+{
+	cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBI,
+				 line, (__psunsigned_t)b, i, 0, 0, 0, 0, 0,
+				 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for a buffer & 2 integer args.
+ */
+void
+xfs_btree_trace_argbii(
+	const char		*func,
+	struct xfs_btree_cur	*cur,
+	struct xfs_buf		*b,
+	int			i0,
+	int			i1,
+	int			line)
+{
+	cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBII,
+				 line, (__psunsigned_t)b, i0, i1, 0, 0, 0, 0,
+				 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for 3 block-length args
+ * and an integer arg.
+ */
+void
+xfs_btree_trace_argfffi(
+	const char		*func,
+	struct xfs_btree_cur	*cur,
+	xfs_dfiloff_t		o,
+	xfs_dfsbno_t		b,
+	xfs_dfilblks_t		i,
+	int			j,
+	int			line)
+{
+	cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGFFFI,
+				 line,
+				 o >> 32, (int)o,
+				 b >> 32, (int)b,
+				 i >> 32, (int)i,
+				 (int)j, 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for one integer arg.
+ */
+void
+xfs_btree_trace_argi(
+	const char		*func,
+	struct xfs_btree_cur	*cur,
+	int			i,
+	int			line)
+{
+	cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGI,
+				 line, i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for int, fsblock, key.
+ */
+void
+xfs_btree_trace_argipk(
+	const char		*func,
+	struct xfs_btree_cur	*cur,
+	int			i,
+	union xfs_btree_ptr	ptr,
+	union xfs_btree_key	*key,
+	int			line)
+{
+	__psunsigned_t		high, low;
+	__uint64_t		l0, l1;
+
+	xfs_btree_trace_ptr(cur, ptr, &high, &low);
+	cur->bc_ops->trace_key(cur, key, &l0, &l1);
+	cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPK,
+				 line, i, high, low,
+				 l0 >> 32, (int)l0,
+				 l1 >> 32, (int)l1,
+				 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for int, fsblock, rec.
+ */
+void
+xfs_btree_trace_argipr(
+	const char		*func,
+	struct xfs_btree_cur	*cur,
+	int			i,
+	union xfs_btree_ptr	ptr,
+	union xfs_btree_rec	*rec,
+	int			line)
+{
+	__psunsigned_t		high, low;
+	__uint64_t		l0, l1, l2;
+
+	xfs_btree_trace_ptr(cur, ptr, &high, &low);
+	cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
+	cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPR,
+			      line, i,
+			      high, low,
+			      l0 >> 32, (int)l0,
+			      l1 >> 32, (int)l1,
+			      l2 >> 32, (int)l2,
+			      0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for int, key.
+ */
+void
+xfs_btree_trace_argik(
+	const char		*func,
+	struct xfs_btree_cur	*cur,
+	int			i,
+	union xfs_btree_key	*key,
+	int			line)
+{
+	__uint64_t		l0, l1;
+
+	cur->bc_ops->trace_key(cur, key, &l0, &l1);
+	cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIK,
+				 line, i,
+				 l0 >> 32, (int)l0,
+				 l1 >> 32, (int)l1,
+				 0, 0, 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for record.
+ */
+void
+xfs_btree_trace_argr(
+	const char		*func,
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_rec	*rec,
+	int			line)
+{
+	__uint64_t		l0, l1, l2;
+
+	cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
+	cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGR,
+			      line,
+			      l0 >> 32, (int)l0,
+			      l1 >> 32, (int)l1,
+			      l2 >> 32, (int)l2,
+			      0, 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for the cursor/operation.
+ */
+void
+xfs_btree_trace_cursor(
+	const char		*func,
+	struct xfs_btree_cur	*cur,
+	int			type,
+	int			line)
+{
+	__uint32_t		s0;
+	__uint64_t		l0, l1;
+	char			*s;
+
+	switch (type) {
+	case XBT_ARGS:
+		s = "args";
+		break;
+	case XBT_ENTRY:
+		s = "entry";
+		break;
+	case XBT_ERROR:
+		s = "error";
+		break;
+	case XBT_EXIT:
+		s = "exit";
+		break;
+	default:
+		s = "unknown";
+		break;
+	}
+
+	cur->bc_ops->trace_cursor(cur, &s0, &l0, &l1);
+	cur->bc_ops->trace_enter(cur, func, s, XFS_BTREE_KTRACE_CUR, line,
+				 s0,
+				 l0 >> 32, (int)l0,
+				 l1 >> 32, (int)l1,
+				 (__psunsigned_t)cur->bc_bufs[0],
+				 (__psunsigned_t)cur->bc_bufs[1],
+				 (__psunsigned_t)cur->bc_bufs[2],
+				 (__psunsigned_t)cur->bc_bufs[3],
+				 (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1],
+				 (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]);
+}
diff --git a/trunk/fs/xfs/xfs_btree_trace.h b/trunk/fs/xfs/xfs_btree_trace.h
new file mode 100644
index 000000000000..2d8a309873ea
--- /dev/null
+++ b/trunk/fs/xfs/xfs_btree_trace.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_BTREE_TRACE_H__
+#define	__XFS_BTREE_TRACE_H__
+
+struct xfs_btree_cur;
+struct xfs_buf;
+
+
+/*
+ * Trace hooks.
+ * i,j = integer (32 bit)
+ * b = btree block buffer (xfs_buf_t)
+ * p = btree ptr
+ * r = btree record
+ * k = btree key
+ */
+
+#ifdef XFS_BTREE_TRACE
+
+/*
+ * Trace buffer entry types.
+ */
+#define XFS_BTREE_KTRACE_ARGBI   1
+#define XFS_BTREE_KTRACE_ARGBII  2
+#define XFS_BTREE_KTRACE_ARGFFFI 3
+#define XFS_BTREE_KTRACE_ARGI    4
+#define XFS_BTREE_KTRACE_ARGIPK  5
+#define XFS_BTREE_KTRACE_ARGIPR  6
+#define XFS_BTREE_KTRACE_ARGIK   7
+#define XFS_BTREE_KTRACE_ARGR	 8
+#define XFS_BTREE_KTRACE_CUR     9
+
+/*
+ * Sub-types for cursor traces.
+ */
+#define XBT_ARGS	0
+#define XBT_ENTRY	1
+#define XBT_ERROR	2
+#define XBT_EXIT	3
+
+void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *,
+		struct xfs_buf *, int, int);
+void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *,
+		struct xfs_buf *, int, int, int);
+void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int);
+void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int,
+		union xfs_btree_ptr, union xfs_btree_key *, int);
+void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int,
+		union xfs_btree_ptr, union xfs_btree_rec *, int);
+void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int,
+		union xfs_btree_key *, int);
+void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *,
+		union xfs_btree_rec *, int);
+void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int);
+
+#define	XFS_BTREE_TRACE_ARGBI(c, b, i)	\
+	xfs_btree_trace_argbi(__func__, c, b, i, __LINE__)
+#define	XFS_BTREE_TRACE_ARGBII(c, b, i, j)	\
+	xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__)
+#define	XFS_BTREE_TRACE_ARGI(c, i)	\
+	xfs_btree_trace_argi(__func__, c, i, __LINE__)
+#define	XFS_BTREE_TRACE_ARGIPK(c, i, p, k)	\
+	xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__)
+#define	XFS_BTREE_TRACE_ARGIPR(c, i, p, r)	\
+	xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__)
+#define	XFS_BTREE_TRACE_ARGIK(c, i, k)	\
+	xfs_btree_trace_argik(__func__, c, i, k, __LINE__)
+#define XFS_BTREE_TRACE_ARGR(c, r)	\
+	xfs_btree_trace_argr(__func__, c, r, __LINE__)
+#define	XFS_BTREE_TRACE_CURSOR(c, t)	\
+	xfs_btree_trace_cursor(__func__, c, t, __LINE__)
+#else
+#define	XFS_BTREE_TRACE_ARGBI(c, b, i)
+#define	XFS_BTREE_TRACE_ARGBII(c, b, i, j)
+#define	XFS_BTREE_TRACE_ARGI(c, i)
+#define	XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
+#define	XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
+#define	XFS_BTREE_TRACE_ARGIK(c, i, k)
+#define XFS_BTREE_TRACE_ARGR(c, r)
+#define	XFS_BTREE_TRACE_CURSOR(c, t)
+#endif	/* XFS_BTREE_TRACE */
+
+#endif /* __XFS_BTREE_TRACE_H__ */
diff --git a/trunk/fs/xfs/xfs_buf_item.c b/trunk/fs/xfs/xfs_buf_item.c
index 88492916c3dc..7b7e005e3dcc 100644
--- a/trunk/fs/xfs/xfs_buf_item.c
+++ b/trunk/fs/xfs/xfs_buf_item.c
@@ -90,11 +90,13 @@ xfs_buf_item_flush_log_debug(
 	uint		first,
 	uint		last)
 {
-	xfs_buf_log_item_t	*bip = bp->b_fspriv;
+	xfs_buf_log_item_t	*bip;
 	uint			nbytes;
 
-	if (bip == NULL || (bip->bli_item.li_type != XFS_LI_BUF))
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+	if ((bip == NULL) || (bip->bli_item.li_type != XFS_LI_BUF)) {
 		return;
+	}
 
 	ASSERT(bip->bli_logged != NULL);
 	nbytes = last - first + 1;
@@ -406,7 +408,7 @@ xfs_buf_item_unpin(
 	int		stale = bip->bli_flags & XFS_BLI_STALE;
 	int		freed;
 
-	ASSERT(bp->b_fspriv == bip);
+	ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip);
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
 
 	trace_xfs_buf_item_unpin(bip);
@@ -418,7 +420,7 @@ xfs_buf_item_unpin(
 
 	if (freed && stale) {
 		ASSERT(bip->bli_flags & XFS_BLI_STALE);
-		ASSERT(xfs_buf_islocked(bp));
+		ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
 		ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
 		ASSERT(XFS_BUF_ISSTALE(bp));
 		ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
@@ -441,7 +443,7 @@ xfs_buf_item_unpin(
 			 * Since the transaction no longer refers to the buffer,
 			 * the buffer should no longer refer to the transaction.
 			 */
-			bp->b_transp = NULL;
+			XFS_BUF_SET_FSPRIVATE2(bp, NULL);
 		}
 
 		/*
@@ -452,13 +454,13 @@ xfs_buf_item_unpin(
 		 */
 		if (bip->bli_flags & XFS_BLI_STALE_INODE) {
 			xfs_buf_do_callbacks(bp);
-			bp->b_fspriv = NULL;
-			bp->b_iodone = NULL;
+			XFS_BUF_SET_FSPRIVATE(bp, NULL);
+			XFS_BUF_CLR_IODONE_FUNC(bp);
 		} else {
 			spin_lock(&ailp->xa_lock);
 			xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
 			xfs_buf_item_relse(bp);
-			ASSERT(bp->b_fspriv == NULL);
+			ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL);
 		}
 		xfs_buf_relse(bp);
 	}
@@ -481,7 +483,7 @@ xfs_buf_item_trylock(
 
 	if (XFS_BUF_ISPINNED(bp))
 		return XFS_ITEM_PINNED;
-	if (!xfs_buf_trylock(bp))
+	if (!XFS_BUF_CPSEMA(bp))
 		return XFS_ITEM_LOCKED;
 
 	/* take a reference to the buffer.  */
@@ -523,7 +525,7 @@ xfs_buf_item_unlock(
 	uint			hold;
 
 	/* Clear the buffer's association with this transaction. */
-	bp->b_transp = NULL;
+	XFS_BUF_SET_FSPRIVATE2(bp, NULL);
 
 	/*
 	 * If this is a transaction abort, don't return early.  Instead, allow
@@ -682,7 +684,7 @@ xfs_buf_item_init(
 	xfs_buf_t	*bp,
 	xfs_mount_t	*mp)
 {
-	xfs_log_item_t		*lip = bp->b_fspriv;
+	xfs_log_item_t		*lip;
 	xfs_buf_log_item_t	*bip;
 	int			chunks;
 	int			map_size;
@@ -694,8 +696,12 @@ xfs_buf_item_init(
 	 * nothing to do here so return.
 	 */
 	ASSERT(bp->b_target->bt_mount == mp);
-	if (lip != NULL && lip->li_type == XFS_LI_BUF)
-		return;
+	if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
+		lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+		if (lip->li_type == XFS_LI_BUF) {
+			return;
+		}
+	}
 
 	/*
 	 * chunks is the number of XFS_BLF_CHUNK size pieces
@@ -734,9 +740,11 @@ xfs_buf_item_init(
 	 * Put the buf item into the list of items attached to the
 	 * buffer at the front.
 	 */
-	if (bp->b_fspriv)
-		bip->bli_item.li_bio_list = bp->b_fspriv;
-	bp->b_fspriv = bip;
+	if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
+		bip->bli_item.li_bio_list =
+				XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+	}
+	XFS_BUF_SET_FSPRIVATE(bp, bip);
 }
 
 
@@ -868,11 +876,12 @@ xfs_buf_item_relse(
 
 	trace_xfs_buf_item_relse(bp, _RET_IP_);
 
-	bip = bp->b_fspriv;
-	bp->b_fspriv = bip->bli_item.li_bio_list;
-	if (bp->b_fspriv == NULL)
-		bp->b_iodone = NULL;
-
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+	XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list);
+	if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) &&
+	    (XFS_BUF_IODONE_FUNC(bp) != NULL)) {
+		XFS_BUF_CLR_IODONE_FUNC(bp);
+	}
 	xfs_buf_rele(bp);
 	xfs_buf_item_free(bip);
 }
@@ -896,20 +905,20 @@ xfs_buf_attach_iodone(
 	xfs_log_item_t	*head_lip;
 
 	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(xfs_buf_islocked(bp));
+	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
 
 	lip->li_cb = cb;
-	head_lip = bp->b_fspriv;
-	if (head_lip) {
+	if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
+		head_lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
 		lip->li_bio_list = head_lip->li_bio_list;
 		head_lip->li_bio_list = lip;
 	} else {
-		bp->b_fspriv = lip;
+		XFS_BUF_SET_FSPRIVATE(bp, lip);
 	}
 
-	ASSERT(bp->b_iodone == NULL ||
-	       bp->b_iodone == xfs_buf_iodone_callbacks);
-	bp->b_iodone = xfs_buf_iodone_callbacks;
+	ASSERT((XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks) ||
+	       (XFS_BUF_IODONE_FUNC(bp) == NULL));
+	XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
 }
 
 /*
@@ -930,8 +939,8 @@ xfs_buf_do_callbacks(
 {
 	struct xfs_log_item	*lip;
 
-	while ((lip = bp->b_fspriv) != NULL) {
-		bp->b_fspriv = lip->li_bio_list;
+	while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) {
+		XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list);
 		ASSERT(lip->li_cb != NULL);
 		/*
 		 * Clear the next pointer so we don't have any
@@ -998,7 +1007,7 @@ xfs_buf_iodone_callbacks(
 			XFS_BUF_DONE(bp);
 			XFS_BUF_SET_START(bp);
 		}
-		ASSERT(bp->b_iodone != NULL);
+		ASSERT(XFS_BUF_IODONE_FUNC(bp));
 		trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
 		xfs_buf_relse(bp);
 		return;
@@ -1017,8 +1026,8 @@ xfs_buf_iodone_callbacks(
 
 do_callbacks:
 	xfs_buf_do_callbacks(bp);
-	bp->b_fspriv = NULL;
-	bp->b_iodone = NULL;
+	XFS_BUF_SET_FSPRIVATE(bp, NULL);
+	XFS_BUF_CLR_IODONE_FUNC(bp);
 	xfs_buf_ioend(bp, 0);
 }
 
diff --git a/trunk/fs/xfs/xfs_da_btree.c b/trunk/fs/xfs/xfs_da_btree.c
index 2925726529f8..6102ac6d1dff 100644
--- a/trunk/fs/xfs/xfs_da_btree.c
+++ b/trunk/fs/xfs/xfs_da_btree.c
@@ -24,12 +24,11 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
+#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_format.h"
-#include "xfs_dir2_priv.h"
+#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
@@ -37,6 +36,10 @@
 #include "xfs_bmap.h"
 #include "xfs_attr.h"
 #include "xfs_attr_leaf.h"
+#include "xfs_dir2_data.h"
+#include "xfs_dir2_leaf.h"
+#include "xfs_dir2_block.h"
+#include "xfs_dir2_node.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
 
@@ -86,7 +89,7 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
  */
 STATIC uint	xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
 STATIC int	xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
-STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps);
+STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
 STATIC int	xfs_da_blk_unlink(xfs_da_state_t *state,
 				  xfs_da_state_blk_t *drop_blk,
 				  xfs_da_state_blk_t *save_blk);
@@ -318,11 +321,11 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	ASSERT(bp != NULL);
 	node = bp->data;
 	oldroot = blk1->bp->data;
-	if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
+	if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) {
 		size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
 			     (char *)oldroot);
 	} else {
-		ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+		ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 		leaf = (xfs_dir2_leaf_t *)oldroot;
 		size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
 			     (char *)leaf);
@@ -349,7 +352,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	node->hdr.count = cpu_to_be16(2);
 
 #ifdef DEBUG
-	if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
+	if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) {
 		ASSERT(blk1->blkno >= mp->m_dirleafblk &&
 		       blk1->blkno < mp->m_dirfreeblk);
 		ASSERT(blk2->blkno >= mp->m_dirleafblk &&
@@ -381,7 +384,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	int useextra;
 
 	node = oldblk->bp->data;
-	ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 
 	/*
 	 * With V2 dirs the extra block is data or freespace.
@@ -480,8 +483,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		node1 = node2;
 		node2 = tmpnode;
 	}
-	ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
-	ASSERT(node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+	ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
 	if (count == 0)
 		return;
@@ -575,7 +578,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	int tmp;
 
 	node = oldblk->bp->data;
-	ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
 	ASSERT(newblk->blkno != 0);
 	if (state->args->whichfork == XFS_DATA_FORK)
@@ -711,7 +714,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	ASSERT(args != NULL);
 	ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
 	oldroot = root_blk->bp->data;
-	ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+	ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	ASSERT(!oldroot->hdr.info.forw);
 	ASSERT(!oldroot->hdr.info.back);
 
@@ -734,10 +737,10 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	ASSERT(bp != NULL);
 	blkinfo = bp->data;
 	if (be16_to_cpu(oldroot->hdr.level) == 1) {
-		ASSERT(blkinfo->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
-		       blkinfo->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+		ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC ||
+		       be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC);
 	} else {
-		ASSERT(blkinfo->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+		ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC);
 	}
 	ASSERT(!blkinfo->forw);
 	ASSERT(!blkinfo->back);
@@ -773,7 +776,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 	 */
 	blk = &state->path.blk[ state->path.active-1 ];
 	info = blk->bp->data;
-	ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+	ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC);
 	node = (xfs_da_intnode_t *)info;
 	count = be16_to_cpu(node->hdr.count);
 	if (count > (state->node_ents >> 1)) {
@@ -833,7 +836,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 		count -= state->node_ents >> 2;
 		count -= be16_to_cpu(node->hdr.count);
 		node = bp->data;
-		ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 		count -= be16_to_cpu(node->hdr.count);
 		xfs_da_brelse(state->args->trans, bp);
 		if (count >= 0)
@@ -908,7 +911,7 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
 	}
 	for (blk--, level--; level >= 0; blk--, level--) {
 		node = blk->bp->data;
-		ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 		btree = &node->btree[ blk->index ];
 		if (be32_to_cpu(btree->hashval) == lasthash)
 			break;
@@ -976,8 +979,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 
 	drop_node = drop_blk->bp->data;
 	save_node = save_blk->bp->data;
-	ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
-	ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+	ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	tp = state->args->trans;
 
 	/*
@@ -1275,8 +1278,8 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
 
 	node1 = node1_bp->data;
 	node2 = node2_bp->data;
-	ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) &&
-	       node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+	ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) &&
+	       (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC));
 	if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
 	    ((be32_to_cpu(node2->btree[0].hashval) <
 	      be32_to_cpu(node1->btree[0].hashval)) ||
@@ -1296,7 +1299,7 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
 	xfs_da_intnode_t *node;
 
 	node = bp->data;
-	ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	if (count)
 		*count = be16_to_cpu(node->hdr.count);
 	if (!node->hdr.count)
@@ -1409,7 +1412,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 	for (blk = &path->blk[level]; level >= 0; blk--, level--) {
 		ASSERT(blk->bp != NULL);
 		node = blk->bp->data;
-		ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 		if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
 			blk->index++;
 			blkno = be32_to_cpu(node->btree[blk->index].before);
@@ -1448,9 +1451,9 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 			return(error);
 		ASSERT(blk->bp != NULL);
 		info = blk->bp->data;
-		ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
-		       info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
-		       info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+		ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC ||
+		       be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC ||
+		       be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
 		blk->magic = be16_to_cpu(info->magic);
 		if (blk->magic == XFS_DA_NODE_MAGIC) {
 			node = (xfs_da_intnode_t *)info;
@@ -1543,62 +1546,79 @@ const struct xfs_nameops xfs_default_nameops = {
 	.compname	= xfs_da_compname
 };
 
+/*
+ * Add a block to the btree ahead of the file.
+ * Return the new block number to the caller.
+ */
 int
-xfs_da_grow_inode_int(
-	struct xfs_da_args	*args,
-	xfs_fileoff_t		*bno,
-	int			count)
+xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
 {
-	struct xfs_trans	*tp = args->trans;
-	struct xfs_inode	*dp = args->dp;
-	int			w = args->whichfork;
-	xfs_drfsbno_t		nblks = dp->i_d.di_nblocks;
-	struct xfs_bmbt_irec	map, *mapp;
-	int			nmap, error, got, i, mapi;
+	xfs_fileoff_t bno, b;
+	xfs_bmbt_irec_t map;
+	xfs_bmbt_irec_t	*mapp;
+	xfs_inode_t *dp;
+	int nmap, error, w, count, c, got, i, mapi;
+	xfs_trans_t *tp;
+	xfs_mount_t *mp;
+	xfs_drfsbno_t	nblks;
 
+	dp = args->dp;
+	mp = dp->i_mount;
+	w = args->whichfork;
+	tp = args->trans;
+	nblks = dp->i_d.di_nblocks;
+
+	/*
+	 * For new directories adjust the file offset and block count.
+	 */
+	if (w == XFS_DATA_FORK) {
+		bno = mp->m_dirleafblk;
+		count = mp->m_dirblkfsbs;
+	} else {
+		bno = 0;
+		count = 1;
+	}
 	/*
 	 * Find a spot in the file space to put the new block.
 	 */
-	error = xfs_bmap_first_unused(tp, dp, count, bno, w);
-	if (error)
+	if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w)))
 		return error;
-
+	if (w == XFS_DATA_FORK)
+		ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
 	/*
 	 * Try mapping it in one filesystem block.
 	 */
 	nmap = 1;
 	ASSERT(args->firstblock != NULL);
-	error = xfs_bmapi(tp, dp, *bno, count,
+	if ((error = xfs_bmapi(tp, dp, bno, count,
 			xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
 			XFS_BMAPI_CONTIG,
 			args->firstblock, args->total, &map, &nmap,
-			args->flist);
-	if (error)
+			args->flist))) {
 		return error;
-
+	}
 	ASSERT(nmap <= 1);
 	if (nmap == 1) {
 		mapp = &map;
 		mapi = 1;
-	} else if (nmap == 0 && count > 1) {
-		xfs_fileoff_t		b;
-		int			c;
-
-		/*
-		 * If we didn't get it and the block might work if fragmented,
-		 * try without the CONTIG flag.  Loop until we get it all.
-		 */
+	}
+	/*
+	 * If we didn't get it and the block might work if fragmented,
+	 * try without the CONTIG flag.  Loop until we get it all.
+	 */
+	else if (nmap == 0 && count > 1) {
 		mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
-		for (b = *bno, mapi = 0; b < *bno + count; ) {
+		for (b = bno, mapi = 0; b < bno + count; ) {
 			nmap = MIN(XFS_BMAP_MAX_NMAP, count);
-			c = (int)(*bno + count - b);
-			error = xfs_bmapi(tp, dp, b, c,
+			c = (int)(bno + count - b);
+			if ((error = xfs_bmapi(tp, dp, b, c,
 					xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|
 					XFS_BMAPI_METADATA,
 					args->firstblock, args->total,
-					&mapp[mapi], &nmap, args->flist);
-			if (error)
-				goto out_free_map;
+					&mapp[mapi], &nmap, args->flist))) {
+				kmem_free(mapp);
+				return error;
+			}
 			if (nmap < 1)
 				break;
 			mapi += nmap;
@@ -1609,53 +1629,24 @@ xfs_da_grow_inode_int(
 		mapi = 0;
 		mapp = NULL;
 	}
-
 	/*
 	 * Count the blocks we got, make sure it matches the total.
 	 */
 	for (i = 0, got = 0; i < mapi; i++)
 		got += mapp[i].br_blockcount;
-	if (got != count || mapp[0].br_startoff != *bno ||
+	if (got != count || mapp[0].br_startoff != bno ||
 	    mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
-	    *bno + count) {
-		error = XFS_ERROR(ENOSPC);
-		goto out_free_map;
+	    bno + count) {
+		if (mapp != &map)
+			kmem_free(mapp);
+		return XFS_ERROR(ENOSPC);
 	}
-
-	/* account for newly allocated blocks in reserved blocks total */
-	args->total -= dp->i_d.di_nblocks - nblks;
-
-out_free_map:
 	if (mapp != &map)
 		kmem_free(mapp);
-	return error;
-}
-
-/*
- * Add a block to the btree ahead of the file.
- * Return the new block number to the caller.
- */
-int
-xfs_da_grow_inode(
-	struct xfs_da_args	*args,
-	xfs_dablk_t		*new_blkno)
-{
-	xfs_fileoff_t		bno;
-	int			count;
-	int			error;
-
-	if (args->whichfork == XFS_DATA_FORK) {
-		bno = args->dp->i_mount->m_dirleafblk;
-		count = args->dp->i_mount->m_dirblkfsbs;
-	} else {
-		bno = 0;
-		count = 1;
-	}
-
-	error = xfs_da_grow_inode_int(args, &bno, count);
-	if (!error)
-		*new_blkno = (xfs_dablk_t)bno;
-	return error;
+	/* account for newly allocated blocks in reserved blocks total */
+	args->total -= dp->i_d.di_nblocks - nblks;
+	*new_blkno = (xfs_dablk_t)bno;
+	return 0;
 }
 
 /*
@@ -1713,12 +1704,12 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	/*
 	 * Get values from the moved block.
 	 */
-	if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
+	if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) {
 		dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
 		dead_level = 0;
 		dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
 	} else {
-		ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+		ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC);
 		dead_node = (xfs_da_intnode_t *)dead_info;
 		dead_level = be16_to_cpu(dead_node->hdr.level);
 		dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval);
@@ -1777,8 +1768,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
 			goto done;
 		par_node = par_buf->data;
-		if (unlikely(par_node->hdr.info.magic !=
-		    cpu_to_be16(XFS_DA_NODE_MAGIC) ||
+		if (unlikely(
+		    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC ||
 		    (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
 					 XFS_ERRLEVEL_LOW, mp);
@@ -1829,7 +1820,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		par_node = par_buf->data;
 		if (unlikely(
 		    be16_to_cpu(par_node->hdr.level) != level ||
-		    par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) {
+		    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
 					 XFS_ERRLEVEL_LOW, mp);
 			error = XFS_ERROR(EFSCORRUPTED);
@@ -1939,7 +1930,8 @@ xfs_da_do_buf(
 	xfs_daddr_t	*mappedbnop,
 	xfs_dabuf_t	**bpp,
 	int		whichfork,
-	int		caller)
+	int		caller,
+	inst_t		*ra)
 {
 	xfs_buf_t	*bp = NULL;
 	xfs_buf_t	**bplist;
@@ -2078,22 +2070,25 @@ xfs_da_do_buf(
 	 * Build a dabuf structure.
 	 */
 	if (bplist) {
-		rbp = xfs_da_buf_make(nbplist, bplist);
+		rbp = xfs_da_buf_make(nbplist, bplist, ra);
 	} else if (bp)
-		rbp = xfs_da_buf_make(1, &bp);
+		rbp = xfs_da_buf_make(1, &bp, ra);
 	else
 		rbp = NULL;
 	/*
 	 * For read_buf, check the magic number.
 	 */
 	if (caller == 1) {
-		xfs_dir2_data_hdr_t	*hdr = rbp->data;
-		xfs_dir2_free_t		*free = rbp->data;
-		xfs_da_blkinfo_t	*info = rbp->data;
+		xfs_dir2_data_t		*data;
+		xfs_dir2_free_t		*free;
+		xfs_da_blkinfo_t	*info;
 		uint			magic, magic1;
 
+		info = rbp->data;
+		data = rbp->data;
+		free = rbp->data;
 		magic = be16_to_cpu(info->magic);
-		magic1 = be32_to_cpu(hdr->magic);
+		magic1 = be32_to_cpu(data->hdr.magic);
 		if (unlikely(
 		    XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
 				   (magic != XFS_ATTR_LEAF_MAGIC) &&
@@ -2101,7 +2096,7 @@ xfs_da_do_buf(
 				   (magic != XFS_DIR2_LEAFN_MAGIC) &&
 				   (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
 				   (magic1 != XFS_DIR2_DATA_MAGIC) &&
-				   (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)),
+				   (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC),
 				mp, XFS_ERRTAG_DA_READ_BUF,
 				XFS_RANDOM_DA_READ_BUF))) {
 			trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_);
@@ -2148,7 +2143,8 @@ xfs_da_get_buf(
 	xfs_dabuf_t	**bpp,
 	int		whichfork)
 {
-	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0);
+	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0,
+						 (inst_t *)__return_address);
 }
 
 /*
@@ -2163,7 +2159,8 @@ xfs_da_read_buf(
 	xfs_dabuf_t	**bpp,
 	int		whichfork)
 {
-	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1);
+	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1,
+		(inst_t *)__return_address);
 }
 
 /*
@@ -2179,7 +2176,8 @@ xfs_da_reada_buf(
 	xfs_daddr_t		rval;
 
 	rval = -1;
-	if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3))
+	if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3,
+			(inst_t *)__return_address))
 		return -1;
 	else
 		return rval;
@@ -2237,12 +2235,17 @@ xfs_da_state_free(xfs_da_state_t *state)
 	kmem_zone_free(xfs_da_state_zone, state);
 }
 
+#ifdef XFS_DABUF_DEBUG
+xfs_dabuf_t	*xfs_dabuf_global_list;
+static DEFINE_SPINLOCK(xfs_dabuf_global_lock);
+#endif
+
 /*
  * Create a dabuf.
  */
 /* ARGSUSED */
 STATIC xfs_dabuf_t *
-xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
+xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
 {
 	xfs_buf_t	*bp;
 	xfs_dabuf_t	*dabuf;
@@ -2254,6 +2257,11 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
 	else
 		dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
 	dabuf->dirty = 0;
+#ifdef XFS_DABUF_DEBUG
+	dabuf->ra = ra;
+	dabuf->target = XFS_BUF_TARGET(bps[0]);
+	dabuf->blkno = XFS_BUF_ADDR(bps[0]);
+#endif
 	if (nbuf == 1) {
 		dabuf->nbuf = 1;
 		bp = bps[0];
@@ -2273,6 +2281,23 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
 				XFS_BUF_COUNT(bp));
 		}
 	}
+#ifdef XFS_DABUF_DEBUG
+	{
+		xfs_dabuf_t	*p;
+
+		spin_lock(&xfs_dabuf_global_lock);
+		for (p = xfs_dabuf_global_list; p; p = p->next) {
+			ASSERT(p->blkno != dabuf->blkno ||
+			       p->target != dabuf->target);
+		}
+		dabuf->prev = NULL;
+		if (xfs_dabuf_global_list)
+			xfs_dabuf_global_list->prev = dabuf;
+		dabuf->next = xfs_dabuf_global_list;
+		xfs_dabuf_global_list = dabuf;
+		spin_unlock(&xfs_dabuf_global_lock);
+	}
+#endif
 	return dabuf;
 }
 
@@ -2308,12 +2333,25 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
 	if (dabuf->dirty)
 		xfs_da_buf_clean(dabuf);
-	if (dabuf->nbuf > 1) {
+	if (dabuf->nbuf > 1)
 		kmem_free(dabuf->data);
-		kmem_free(dabuf);
-	} else {
-		kmem_zone_free(xfs_dabuf_zone, dabuf);
+#ifdef XFS_DABUF_DEBUG
+	{
+		spin_lock(&xfs_dabuf_global_lock);
+		if (dabuf->prev)
+			dabuf->prev->next = dabuf->next;
+		else
+			xfs_dabuf_global_list = dabuf->next;
+		if (dabuf->next)
+			dabuf->next->prev = dabuf->prev;
+		spin_unlock(&xfs_dabuf_global_lock);
 	}
+	memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
+#endif
+	if (dabuf->nbuf == 1)
+		kmem_zone_free(xfs_dabuf_zone, dabuf);
+	else
+		kmem_free(dabuf);
 }
 
 /*
diff --git a/trunk/fs/xfs/xfs_da_btree.h b/trunk/fs/xfs/xfs_da_btree.h
index dbf7c074ae73..fe9f5a8c1d2a 100644
--- a/trunk/fs/xfs/xfs_da_btree.h
+++ b/trunk/fs/xfs/xfs_da_btree.h
@@ -145,11 +145,22 @@ typedef struct xfs_dabuf {
 	short		dirty;		/* data needs to be copied back */
 	short		bbcount;	/* how large is data in bbs */
 	void		*data;		/* pointer for buffers' data */
+#ifdef XFS_DABUF_DEBUG
+	inst_t		*ra;		/* return address of caller to make */
+	struct xfs_dabuf *next;		/* next in global chain */
+	struct xfs_dabuf *prev;		/* previous in global chain */
+	struct xfs_buftarg *target;	/* device for buffer */
+	xfs_daddr_t	blkno;		/* daddr first in bps[0] */
+#endif
 	struct xfs_buf	*bps[1];	/* actually nbuf of these */
 } xfs_dabuf_t;
 #define	XFS_DA_BUF_SIZE(n)	\
 	(sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1))
 
+#ifdef XFS_DABUF_DEBUG
+extern xfs_dabuf_t	*xfs_dabuf_global_list;
+#endif
+
 /*
  * Storage for holding state during Btree searches and split/join ops.
  *
@@ -237,8 +248,6 @@ int	xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
  * Utility routines.
  */
 int	xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno);
-int	xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno,
-			      int count);
 int	xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp,
 			      xfs_dablk_t bno, xfs_daddr_t mappedbno,
 			      xfs_dabuf_t **bp, int whichfork);
diff --git a/trunk/fs/xfs/xfs_dir2.c b/trunk/fs/xfs/xfs_dir2.c
index 4580ce00aeb4..dba7a71cedf3 100644
--- a/trunk/fs/xfs/xfs_dir2.c
+++ b/trunk/fs/xfs/xfs_dir2.c
@@ -24,17 +24,20 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
+#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
+#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_format.h"
-#include "xfs_dir2_priv.h"
+#include "xfs_dir2_data.h"
+#include "xfs_dir2_leaf.h"
+#include "xfs_dir2_block.h"
+#include "xfs_dir2_node.h"
 #include "xfs_error.h"
 #include "xfs_vnodeops.h"
 #include "xfs_trace.h"
@@ -119,15 +122,15 @@ int
 xfs_dir_isempty(
 	xfs_inode_t	*dp)
 {
-	xfs_dir2_sf_hdr_t	*sfp;
+	xfs_dir2_sf_t	*sfp;
 
 	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
 	if (dp->i_d.di_size == 0)	/* might happen during shutdown. */
 		return 1;
 	if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
 		return 0;
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-	return !sfp->count;
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	return !sfp->hdr.count;
 }
 
 /*
@@ -497,34 +500,129 @@ xfs_dir_canenter(
 
 /*
  * Add a block to the directory.
- *
- * This routine is for data and free blocks, not leaf/node blocks which are
- * handled by xfs_da_grow_inode.
+ * This routine is for data and free blocks, not leaf/node blocks
+ * which are handled by xfs_da_grow_inode.
  */
 int
 xfs_dir2_grow_inode(
-	struct xfs_da_args	*args,
-	int			space,	/* v2 dir's space XFS_DIR2_xxx_SPACE */
-	xfs_dir2_db_t		*dbp)	/* out: block number added */
+	xfs_da_args_t	*args,
+	int		space,		/* v2 dir's space XFS_DIR2_xxx_SPACE */
+	xfs_dir2_db_t	*dbp)		/* out: block number added */
 {
-	struct xfs_inode	*dp = args->dp;
-	struct xfs_mount	*mp = dp->i_mount;
-	xfs_fileoff_t		bno;	/* directory offset of new block */
-	int			count;	/* count of filesystem blocks */
-	int			error;
+	xfs_fileoff_t	bno;		/* directory offset of new block */
+	int		count;		/* count of filesystem blocks */
+	xfs_inode_t	*dp;		/* incore directory inode */
+	int		error;
+	int		got;		/* blocks actually mapped */
+	int		i;
+	xfs_bmbt_irec_t	map;		/* single structure for bmap */
+	int		mapi;		/* mapping index */
+	xfs_bmbt_irec_t	*mapp;		/* bmap mapping structure(s) */
+	xfs_mount_t	*mp;
+	int		nmap;		/* number of bmap entries */
+	xfs_trans_t	*tp;
+	xfs_drfsbno_t	nblks;
 
 	trace_xfs_dir2_grow_inode(args, space);
 
+	dp = args->dp;
+	tp = args->trans;
+	mp = dp->i_mount;
+	nblks = dp->i_d.di_nblocks;
 	/*
 	 * Set lowest possible block in the space requested.
 	 */
 	bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE);
 	count = mp->m_dirblkfsbs;
-
-	error = xfs_da_grow_inode_int(args, &bno, count);
-	if (error)
+	/*
+	 * Find the first hole for our block.
+	 */
+	if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK)))
 		return error;
+	nmap = 1;
+	ASSERT(args->firstblock != NULL);
+	/*
+	 * Try mapping the new block contiguously (one extent).
+	 */
+	if ((error = xfs_bmapi(tp, dp, bno, count,
+			XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
+			args->firstblock, args->total, &map, &nmap,
+			args->flist)))
+		return error;
+	ASSERT(nmap <= 1);
+	if (nmap == 1) {
+		mapp = &map;
+		mapi = 1;
+	}
+	/*
+	 * Didn't work and this is a multiple-fsb directory block.
+	 * Try again with contiguous flag turned on.
+	 */
+	else if (nmap == 0 && count > 1) {
+		xfs_fileoff_t	b;	/* current file offset */
+
+		/*
+		 * Space for maximum number of mappings.
+		 */
+		mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
+		/*
+		 * Iterate until we get to the end of our block.
+		 */
+		for (b = bno, mapi = 0; b < bno + count; ) {
+			int	c;	/* current fsb count */
+
+			/*
+			 * Can't map more than MAX_NMAP at once.
+			 */
+			nmap = MIN(XFS_BMAP_MAX_NMAP, count);
+			c = (int)(bno + count - b);
+			if ((error = xfs_bmapi(tp, dp, b, c,
+					XFS_BMAPI_WRITE|XFS_BMAPI_METADATA,
+					args->firstblock, args->total,
+					&mapp[mapi], &nmap, args->flist))) {
+				kmem_free(mapp);
+				return error;
+			}
+			if (nmap < 1)
+				break;
+			/*
+			 * Add this bunch into our table, go to the next offset.
+			 */
+			mapi += nmap;
+			b = mapp[mapi - 1].br_startoff +
+			    mapp[mapi - 1].br_blockcount;
+		}
+	}
+	/*
+	 * Didn't work.
+	 */
+	else {
+		mapi = 0;
+		mapp = NULL;
+	}
+	/*
+	 * See how many fsb's we got.
+	 */
+	for (i = 0, got = 0; i < mapi; i++)
+		got += mapp[i].br_blockcount;
+	/*
+	 * Didn't get enough fsb's, or the first/last block's are wrong.
+	 */
+	if (got != count || mapp[0].br_startoff != bno ||
+	    mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
+	    bno + count) {
+		if (mapp != &map)
+			kmem_free(mapp);
+		return XFS_ERROR(ENOSPC);
+	}
+	/*
+	 * Done with the temporary mapping table.
+	 */
+	if (mapp != &map)
+		kmem_free(mapp);
 
+	/* account for newly allocated blocks in reserved blocks total */
+	args->total -= dp->i_d.di_nblocks - nblks;
 	*dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
 
 	/*
@@ -536,7 +634,7 @@ xfs_dir2_grow_inode(
 		size = XFS_FSB_TO_B(mp, bno + count);
 		if (size > dp->i_d.di_size) {
 			dp->i_d.di_size = size;
-			xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
+			xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
 		}
 	}
 	return 0;
diff --git a/trunk/fs/xfs/xfs_dir2.h b/trunk/fs/xfs/xfs_dir2.h
index e937d9991c18..74a3b1057685 100644
--- a/trunk/fs/xfs/xfs_dir2.h
+++ b/trunk/fs/xfs/xfs_dir2.h
@@ -16,14 +16,49 @@
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DIR2_H__
-#define __XFS_DIR2_H__
+#define	__XFS_DIR2_H__
 
-struct xfs_bmap_free;
+struct uio;
+struct xfs_dabuf;
 struct xfs_da_args;
+struct xfs_dir2_put_args;
+struct xfs_bmap_free;
 struct xfs_inode;
 struct xfs_mount;
 struct xfs_trans;
 
+/*
+ * Directory version 2.
+ * There are 4 possible formats:
+ *	shortform
+ *	single block - data with embedded leaf at the end
+ *	multiple data blocks, single leaf+freeindex block
+ *	data blocks, node&leaf blocks (btree), freeindex blocks
+ *
+ *	The shortform format is in xfs_dir2_sf.h.
+ *	The single block format is in xfs_dir2_block.h.
+ *	The data block format is in xfs_dir2_data.h.
+ *	The leaf and freeindex block formats are in xfs_dir2_leaf.h.
+ *	Node blocks are the same as the other version, in xfs_da_btree.h.
+ */
+
+/*
+ * Byte offset in data block and shortform entry.
+ */
+typedef	__uint16_t	xfs_dir2_data_off_t;
+#define	NULLDATAOFF	0xffffU
+typedef uint		xfs_dir2_data_aoff_t;	/* argument form */
+
+/*
+ * Directory block number (logical dirblk in file)
+ */
+typedef	__uint32_t	xfs_dir2_db_t;
+
+/*
+ * Byte offset in a directory.
+ */
+typedef	xfs_off_t	xfs_dir2_off_t;
+
 extern struct xfs_name	xfs_name_dotdot;
 
 /*
@@ -51,10 +86,21 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_bmap_free *flist, xfs_extlen_t tot);
 extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_name *name, uint resblks);
+extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
 
 /*
- * Direct call from the bmap code, bypassing the generic directory layer.
+ * Utility routines for v2 directories.
  */
-extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
+extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
+				xfs_dir2_db_t *dbp);
+extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp,
+				int *vp);
+extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
+				int *vp);
+extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
+				struct xfs_dabuf *bp);
+
+extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
+				const unsigned char *name, int len);
 
 #endif	/* __XFS_DIR2_H__ */
diff --git a/trunk/fs/xfs/xfs_dir2_block.c b/trunk/fs/xfs/xfs_dir2_block.c
index 9245e029b8ea..580d99cef9e7 100644
--- a/trunk/fs/xfs/xfs_dir2_block.c
+++ b/trunk/fs/xfs/xfs_dir2_block.c
@@ -23,14 +23,17 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
+#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
-#include "xfs_dir2_format.h"
-#include "xfs_dir2_priv.h"
+#include "xfs_dir2_data.h"
+#include "xfs_dir2_leaf.h"
+#include "xfs_dir2_block.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
 
@@ -64,7 +67,7 @@ xfs_dir2_block_addname(
 	xfs_da_args_t		*args)		/* directory op arguments */
 {
 	xfs_dir2_data_free_t	*bf;		/* bestfree table in block */
-	xfs_dir2_data_hdr_t	*hdr;		/* block header */
+	xfs_dir2_block_t	*block;		/* directory block structure */
 	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
 	xfs_dabuf_t		*bp;		/* buffer for block */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
@@ -102,13 +105,13 @@ xfs_dir2_block_addname(
 		return error;
 	}
 	ASSERT(bp != NULL);
-	hdr = bp->data;
+	block = bp->data;
 	/*
 	 * Check the magic number, corrupted if wrong.
 	 */
-	if (unlikely(hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))) {
+	if (unlikely(be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)) {
 		XFS_CORRUPTION_ERROR("xfs_dir2_block_addname",
-				     XFS_ERRLEVEL_LOW, mp, hdr);
+				     XFS_ERRLEVEL_LOW, mp, block);
 		xfs_da_brelse(tp, bp);
 		return XFS_ERROR(EFSCORRUPTED);
 	}
@@ -116,8 +119,8 @@ xfs_dir2_block_addname(
 	/*
 	 * Set up pointers to parts of the block.
 	 */
-	bf = hdr->bestfree;
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	bf = block->hdr.bestfree;
+	btp = xfs_dir2_block_tail_p(mp, block);
 	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * No stale entries?  Need space for entry and new leaf.
@@ -130,7 +133,7 @@ xfs_dir2_block_addname(
 		/*
 		 * Data object just before the first leaf entry.
 		 */
-		enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
+		enddup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
 		/*
 		 * If it's not free then can't do this add without cleaning up:
 		 * the space before the first leaf entry needs to be free so it
@@ -143,7 +146,7 @@ xfs_dir2_block_addname(
 		 */
 		else {
 			dup = (xfs_dir2_data_unused_t *)
-			      ((char *)hdr + be16_to_cpu(bf[0].offset));
+			      ((char *)block + be16_to_cpu(bf[0].offset));
 			if (dup == enddup) {
 				/*
 				 * It is the biggest freespace, is it too small
@@ -156,7 +159,7 @@ xfs_dir2_block_addname(
 					 */
 					if (be16_to_cpu(bf[1].length) >= len)
 						dup = (xfs_dir2_data_unused_t *)
-						      ((char *)hdr +
+						      ((char *)block +
 						       be16_to_cpu(bf[1].offset));
 					else
 						dup = NULL;
@@ -179,7 +182,7 @@ xfs_dir2_block_addname(
 	 */
 	else if (be16_to_cpu(bf[0].length) >= len) {
 		dup = (xfs_dir2_data_unused_t *)
-		      ((char *)hdr + be16_to_cpu(bf[0].offset));
+		      ((char *)block + be16_to_cpu(bf[0].offset));
 		compact = 0;
 	}
 	/*
@@ -193,7 +196,7 @@ xfs_dir2_block_addname(
 		/*
 		 * Data object just before the first leaf entry.
 		 */
-		dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
+		dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
 		/*
 		 * If it's not free then the data will go where the
 		 * leaf data starts now, if it works at all.
@@ -252,8 +255,7 @@ xfs_dir2_block_addname(
 			highstale = lfloghigh = -1;
 		     fromidx >= 0;
 		     fromidx--) {
-			if (blp[fromidx].address ==
-			    cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
+			if (be32_to_cpu(blp[fromidx].address) == XFS_DIR2_NULL_DATAPTR) {
 				if (highstale == -1)
 					highstale = toidx;
 				else {
@@ -270,7 +272,7 @@ xfs_dir2_block_addname(
 		lfloghigh -= be32_to_cpu(btp->stale) - 1;
 		be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1));
 		xfs_dir2_data_make_free(tp, bp,
-			(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
+			(xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
 			(xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
 			&needlog, &needscan);
 		blp += be32_to_cpu(btp->stale) - 1;
@@ -280,7 +282,7 @@ xfs_dir2_block_addname(
 		 * This needs to happen before the next call to use_free.
 		 */
 		if (needscan) {
-			xfs_dir2_data_freescan(mp, hdr, &needlog);
+			xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
 			needscan = 0;
 		}
 	}
@@ -316,7 +318,7 @@ xfs_dir2_block_addname(
 		 */
 		xfs_dir2_data_use_free(tp, bp, enddup,
 			(xfs_dir2_data_aoff_t)
-			((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) -
+			((char *)enddup - (char *)block + be16_to_cpu(enddup->length) -
 			 sizeof(*blp)),
 			(xfs_dir2_data_aoff_t)sizeof(*blp),
 			&needlog, &needscan);
@@ -329,7 +331,8 @@ xfs_dir2_block_addname(
 		 * This needs to happen before the next call to use_free.
 		 */
 		if (needscan) {
-			xfs_dir2_data_freescan(mp, hdr, &needlog);
+			xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block,
+				&needlog);
 			needscan = 0;
 		}
 		/*
@@ -350,14 +353,12 @@ xfs_dir2_block_addname(
 	else {
 		for (lowstale = mid;
 		     lowstale >= 0 &&
-			blp[lowstale].address !=
-			cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
+			be32_to_cpu(blp[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
 		     lowstale--)
 			continue;
 		for (highstale = mid + 1;
 		     highstale < be32_to_cpu(btp->count) &&
-			blp[highstale].address !=
-			cpu_to_be32(XFS_DIR2_NULL_DATAPTR) &&
+			be32_to_cpu(blp[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
 			(lowstale < 0 || mid - lowstale > highstale - mid);
 		     highstale++)
 			continue;
@@ -396,13 +397,13 @@ xfs_dir2_block_addname(
 	 */
 	blp[mid].hashval = cpu_to_be32(args->hashval);
 	blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
-				(char *)dep - (char *)hdr));
+				(char *)dep - (char *)block));
 	xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
 	/*
 	 * Mark space for the data entry used.
 	 */
 	xfs_dir2_data_use_free(tp, bp, dup,
-		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
+		(xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
 		(xfs_dir2_data_aoff_t)len, &needlog, &needscan);
 	/*
 	 * Create the new data entry.
@@ -411,12 +412,12 @@ xfs_dir2_block_addname(
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, args->namelen);
 	tagp = xfs_dir2_data_entry_tag_p(dep);
-	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
+	*tagp = cpu_to_be16((char *)dep - (char *)block);
 	/*
 	 * Clean up the bestfree array and log the header, tail, and entry.
 	 */
 	if (needscan)
-		xfs_dir2_data_freescan(mp, hdr, &needlog);
+		xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
 	if (needlog)
 		xfs_dir2_data_log_header(tp, bp);
 	xfs_dir2_block_log_tail(tp, bp);
@@ -436,7 +437,7 @@ xfs_dir2_block_getdents(
 	xfs_off_t		*offset,
 	filldir_t		filldir)
 {
-	xfs_dir2_data_hdr_t	*hdr;		/* block header */
+	xfs_dir2_block_t	*block;		/* directory block structure */
 	xfs_dabuf_t		*bp;		/* buffer for block */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
 	xfs_dir2_data_entry_t	*dep;		/* block data entry */
@@ -469,13 +470,13 @@ xfs_dir2_block_getdents(
 	 * We'll skip entries before this.
 	 */
 	wantoff = xfs_dir2_dataptr_to_off(mp, *offset);
-	hdr = bp->data;
+	block = bp->data;
 	xfs_dir2_data_check(dp, bp);
 	/*
 	 * Set up values for the loop.
 	 */
-	btp = xfs_dir2_block_tail_p(mp, hdr);
-	ptr = (char *)(hdr + 1);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	ptr = (char *)block->u;
 	endptr = (char *)xfs_dir2_block_leaf_p(btp);
 
 	/*
@@ -501,11 +502,11 @@ xfs_dir2_block_getdents(
 		/*
 		 * The entry is before the desired starting point, skip it.
 		 */
-		if ((char *)dep - (char *)hdr < wantoff)
+		if ((char *)dep - (char *)block < wantoff)
 			continue;
 
 		cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
-					    (char *)dep - (char *)hdr);
+					    (char *)dep - (char *)block);
 
 		/*
 		 * If it didn't fit, set the final offset to here & return.
@@ -539,14 +540,17 @@ xfs_dir2_block_log_leaf(
 	int			first,		/* index of first logged leaf */
 	int			last)		/* index of last logged leaf */
 {
-	xfs_dir2_data_hdr_t	*hdr = bp->data;
-	xfs_dir2_leaf_entry_t	*blp;
-	xfs_dir2_block_tail_t	*btp;
+	xfs_dir2_block_t	*block;		/* directory block structure */
+	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
+	xfs_dir2_block_tail_t	*btp;		/* block tail */
+	xfs_mount_t		*mp;		/* filesystem mount point */
 
-	btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
+	mp = tp->t_mountp;
+	block = bp->data;
+	btp = xfs_dir2_block_tail_p(mp, block);
 	blp = xfs_dir2_block_leaf_p(btp);
-	xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr),
-		(uint)((char *)&blp[last + 1] - (char *)hdr - 1));
+	xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block),
+		(uint)((char *)&blp[last + 1] - (char *)block - 1));
 }
 
 /*
@@ -557,12 +561,15 @@ xfs_dir2_block_log_tail(
 	xfs_trans_t		*tp,		/* transaction structure */
 	xfs_dabuf_t		*bp)		/* block buffer */
 {
-	xfs_dir2_data_hdr_t	*hdr = bp->data;
-	xfs_dir2_block_tail_t	*btp;
+	xfs_dir2_block_t	*block;		/* directory block structure */
+	xfs_dir2_block_tail_t	*btp;		/* block tail */
+	xfs_mount_t		*mp;		/* filesystem mount point */
 
-	btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
-	xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr),
-		(uint)((char *)(btp + 1) - (char *)hdr - 1));
+	mp = tp->t_mountp;
+	block = bp->data;
+	btp = xfs_dir2_block_tail_p(mp, block);
+	xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block),
+		(uint)((char *)(btp + 1) - (char *)block - 1));
 }
 
 /*
@@ -573,7 +580,7 @@ int						/* error */
 xfs_dir2_block_lookup(
 	xfs_da_args_t		*args)		/* dir lookup arguments */
 {
-	xfs_dir2_data_hdr_t	*hdr;		/* block header */
+	xfs_dir2_block_t	*block;		/* block structure */
 	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
 	xfs_dabuf_t		*bp;		/* block buffer */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
@@ -593,14 +600,14 @@ xfs_dir2_block_lookup(
 		return error;
 	dp = args->dp;
 	mp = dp->i_mount;
-	hdr = bp->data;
+	block = bp->data;
 	xfs_dir2_data_check(dp, bp);
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(mp, block);
 	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Get the offset from the leaf entry, to point to the data.
 	 */
-	dep = (xfs_dir2_data_entry_t *)((char *)hdr +
+	dep = (xfs_dir2_data_entry_t *)((char *)block +
 		xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
 	/*
 	 * Fill in inode number, CI name if appropriate, release the block.
@@ -621,7 +628,7 @@ xfs_dir2_block_lookup_int(
 	int			*entno)		/* returned entry number */
 {
 	xfs_dir2_dataptr_t	addr;		/* data entry address */
-	xfs_dir2_data_hdr_t	*hdr;		/* block header */
+	xfs_dir2_block_t	*block;		/* block structure */
 	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
 	xfs_dabuf_t		*bp;		/* block buffer */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
@@ -647,9 +654,9 @@ xfs_dir2_block_lookup_int(
 		return error;
 	}
 	ASSERT(bp != NULL);
-	hdr = bp->data;
+	block = bp->data;
 	xfs_dir2_data_check(dp, bp);
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(mp, block);
 	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Loop doing a binary search for our hash value.
@@ -687,7 +694,7 @@ xfs_dir2_block_lookup_int(
 		 * Get pointer to the entry from the leaf.
 		 */
 		dep = (xfs_dir2_data_entry_t *)
-			((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
+			((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
 		/*
 		 * Compare name and if it's an exact match, return the index
 		 * and buffer. If it's the first case-insensitive match, store
@@ -726,7 +733,7 @@ int						/* error */
 xfs_dir2_block_removename(
 	xfs_da_args_t		*args)		/* directory operation args */
 {
-	xfs_dir2_data_hdr_t	*hdr;		/* block header */
+	xfs_dir2_block_t	*block;		/* block structure */
 	xfs_dir2_leaf_entry_t	*blp;		/* block leaf pointer */
 	xfs_dabuf_t		*bp;		/* block buffer */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
@@ -753,20 +760,20 @@ xfs_dir2_block_removename(
 	dp = args->dp;
 	tp = args->trans;
 	mp = dp->i_mount;
-	hdr = bp->data;
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	block = bp->data;
+	btp = xfs_dir2_block_tail_p(mp, block);
 	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Point to the data entry using the leaf entry.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+	      ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
 	/*
 	 * Mark the data entry's space free.
 	 */
 	needlog = needscan = 0;
 	xfs_dir2_data_make_free(tp, bp,
-		(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
+		(xfs_dir2_data_aoff_t)((char *)dep - (char *)block),
 		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
 	/*
 	 * Fix up the block tail.
@@ -782,15 +789,15 @@ xfs_dir2_block_removename(
 	 * Fix up bestfree, log the header if necessary.
 	 */
 	if (needscan)
-		xfs_dir2_data_freescan(mp, hdr, &needlog);
+		xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
 	if (needlog)
 		xfs_dir2_data_log_header(tp, bp);
 	xfs_dir2_data_check(dp, bp);
 	/*
 	 * See if the size as a shortform is good enough.
 	 */
-	size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
-	if (size > XFS_IFORK_DSIZE(dp)) {
+	if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) >
+	    XFS_IFORK_DSIZE(dp)) {
 		xfs_da_buf_done(bp);
 		return 0;
 	}
@@ -808,7 +815,7 @@ int						/* error */
 xfs_dir2_block_replace(
 	xfs_da_args_t		*args)		/* directory operation args */
 {
-	xfs_dir2_data_hdr_t	*hdr;		/* block header */
+	xfs_dir2_block_t	*block;		/* block structure */
 	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
 	xfs_dabuf_t		*bp;		/* block buffer */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
@@ -829,14 +836,14 @@ xfs_dir2_block_replace(
 	}
 	dp = args->dp;
 	mp = dp->i_mount;
-	hdr = bp->data;
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	block = bp->data;
+	btp = xfs_dir2_block_tail_p(mp, block);
 	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Point to the data entry we need to change.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+	      ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
 	ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
 	/*
 	 * Change the inode number to the new value.
@@ -875,7 +882,7 @@ xfs_dir2_leaf_to_block(
 	xfs_dabuf_t		*dbp)		/* data buffer */
 {
 	__be16			*bestsp;	/* leaf bests table */
-	xfs_dir2_data_hdr_t	*hdr;		/* block header */
+	xfs_dir2_block_t	*block;		/* block structure */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	xfs_dir2_data_unused_t	*dup;		/* unused data entry */
@@ -899,7 +906,7 @@ xfs_dir2_leaf_to_block(
 	tp = args->trans;
 	mp = dp->i_mount;
 	leaf = lbp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	/*
 	 * If there are data blocks other than the first one, take this
@@ -910,7 +917,7 @@ xfs_dir2_leaf_to_block(
 	while (dp->i_d.di_size > mp->m_dirblksize) {
 		bestsp = xfs_dir2_leaf_bests_p(ltp);
 		if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
-		    mp->m_dirblksize - (uint)sizeof(*hdr)) {
+		    mp->m_dirblksize - (uint)sizeof(block->hdr)) {
 			if ((error =
 			    xfs_dir2_leaf_trim_data(args, lbp,
 				    (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
@@ -928,18 +935,18 @@ xfs_dir2_leaf_to_block(
 		    XFS_DATA_FORK))) {
 		goto out;
 	}
-	hdr = dbp->data;
-	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
+	block = dbp->data;
+	ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_DATA_MAGIC);
 	/*
 	 * Size of the "leaf" area in the block.
 	 */
-	size = (uint)sizeof(xfs_dir2_block_tail_t) +
+	size = (uint)sizeof(block->tail) +
 	       (uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
 	/*
 	 * Look at the last data entry.
 	 */
-	tagp = (__be16 *)((char *)hdr + mp->m_dirblksize) - 1;
-	dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
+	tagp = (__be16 *)((char *)block + mp->m_dirblksize) - 1;
+	dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
 	/*
 	 * If it's not free or is too short we can't do it.
 	 */
@@ -951,7 +958,7 @@ xfs_dir2_leaf_to_block(
 	/*
 	 * Start converting it to block form.
 	 */
-	hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
+	block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
 	needlog = 1;
 	needscan = 0;
 	/*
@@ -962,7 +969,7 @@ xfs_dir2_leaf_to_block(
 	/*
 	 * Initialize the block tail.
 	 */
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(mp, block);
 	btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
 	btp->stale = 0;
 	xfs_dir2_block_log_tail(tp, dbp);
@@ -971,8 +978,7 @@ xfs_dir2_leaf_to_block(
 	 */
 	lep = xfs_dir2_block_leaf_p(btp);
 	for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
-		if (leaf->ents[from].address ==
-		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+		if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		lep[to++] = leaf->ents[from];
 	}
@@ -982,7 +988,7 @@ xfs_dir2_leaf_to_block(
 	 * Scan the bestfree if we need it and log the data block header.
 	 */
 	if (needscan)
-		xfs_dir2_data_freescan(mp, hdr, &needlog);
+		xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
 	if (needlog)
 		xfs_dir2_data_log_header(tp, dbp);
 	/*
@@ -996,8 +1002,8 @@ xfs_dir2_leaf_to_block(
 	/*
 	 * Now see if the resulting block can be shrunken to shortform.
 	 */
-	size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
-	if (size > XFS_IFORK_DSIZE(dp)) {
+	if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) >
+	    XFS_IFORK_DSIZE(dp)) {
 		error = 0;
 		goto out;
 	}
@@ -1018,10 +1024,12 @@ xfs_dir2_sf_to_block(
 	xfs_da_args_t		*args)		/* operation arguments */
 {
 	xfs_dir2_db_t		blkno;		/* dir-relative block # (0) */
-	xfs_dir2_data_hdr_t	*hdr;		/* block header */
+	xfs_dir2_block_t	*block;		/* block structure */
 	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
 	xfs_dabuf_t		*bp;		/* block buffer */
 	xfs_dir2_block_tail_t	*btp;		/* block tail pointer */
+	char			*buf;		/* sf buffer */
+	int			buf_len;
 	xfs_dir2_data_entry_t	*dep;		/* data entry pointer */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			dummy;		/* trash */
@@ -1035,8 +1043,7 @@ xfs_dir2_sf_to_block(
 	int			newoffset;	/* offset from current entry */
 	int			offset;		/* target block offset */
 	xfs_dir2_sf_entry_t	*sfep;		/* sf entry pointer */
-	xfs_dir2_sf_hdr_t	*oldsfp;	/* old shortform header  */
-	xfs_dir2_sf_hdr_t	*sfp;		/* shortform header  */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
 	__be16			*tagp;		/* end of data entry */
 	xfs_trans_t		*tp;		/* transaction pointer */
 	struct xfs_name		name;
@@ -1054,30 +1061,32 @@ xfs_dir2_sf_to_block(
 		ASSERT(XFS_FORCED_SHUTDOWN(mp));
 		return XFS_ERROR(EIO);
 	}
-
-	oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
-	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count));
-
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 	/*
-	 * Copy the directory into a temporary buffer.
+	 * Copy the directory into the stack buffer.
 	 * Then pitch the incore inode data so we can make extents.
 	 */
-	sfp = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP);
-	memcpy(sfp, oldsfp, dp->i_df.if_bytes);
 
-	xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK);
+	buf_len = dp->i_df.if_bytes;
+	buf = kmem_alloc(buf_len, KM_SLEEP);
+
+	memcpy(buf, sfp, buf_len);
+	xfs_idata_realloc(dp, -buf_len, XFS_DATA_FORK);
 	dp->i_d.di_size = 0;
 	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
-
+	/*
+	 * Reset pointer - old sfp is gone.
+	 */
+	sfp = (xfs_dir2_sf_t *)buf;
 	/*
 	 * Add block 0 to the inode.
 	 */
 	error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
 	if (error) {
-		kmem_free(sfp);
+		kmem_free(buf);
 		return error;
 	}
 	/*
@@ -1085,21 +1094,21 @@ xfs_dir2_sf_to_block(
 	 */
 	error = xfs_dir2_data_init(args, blkno, &bp);
 	if (error) {
-		kmem_free(sfp);
+		kmem_free(buf);
 		return error;
 	}
-	hdr = bp->data;
-	hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
+	block = bp->data;
+	block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
 	/*
 	 * Compute size of block "tail" area.
 	 */
 	i = (uint)sizeof(*btp) +
-	    (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
+	    (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
 	/*
 	 * The whole thing is initialized to free by the init routine.
 	 * Say we're using the leaf and tail area.
 	 */
-	dup = (xfs_dir2_data_unused_t *)(hdr + 1);
+	dup = (xfs_dir2_data_unused_t *)block->u;
 	needlog = needscan = 0;
 	xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog,
 		&needscan);
@@ -1107,51 +1116,50 @@ xfs_dir2_sf_to_block(
 	/*
 	 * Fill in the tail.
 	 */
-	btp = xfs_dir2_block_tail_p(mp, hdr);
-	btp->count = cpu_to_be32(sfp->count + 2);	/* ., .. */
+	btp = xfs_dir2_block_tail_p(mp, block);
+	btp->count = cpu_to_be32(sfp->hdr.count + 2);	/* ., .. */
 	btp->stale = 0;
 	blp = xfs_dir2_block_leaf_p(btp);
-	endoffset = (uint)((char *)blp - (char *)hdr);
+	endoffset = (uint)((char *)blp - (char *)block);
 	/*
 	 * Remove the freespace, we'll manage it.
 	 */
 	xfs_dir2_data_use_free(tp, bp, dup,
-		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
+		(xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
 		be16_to_cpu(dup->length), &needlog, &needscan);
 	/*
 	 * Create entry for .
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)hdr + XFS_DIR2_DATA_DOT_OFFSET);
+	      ((char *)block + XFS_DIR2_DATA_DOT_OFFSET);
 	dep->inumber = cpu_to_be64(dp->i_ino);
 	dep->namelen = 1;
 	dep->name[0] = '.';
 	tagp = xfs_dir2_data_entry_tag_p(dep);
-	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
+	*tagp = cpu_to_be16((char *)dep - (char *)block);
 	xfs_dir2_data_log_entry(tp, bp, dep);
 	blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
 	blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
-				(char *)dep - (char *)hdr));
+				(char *)dep - (char *)block));
 	/*
 	 * Create entry for ..
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-		((char *)hdr + XFS_DIR2_DATA_DOTDOT_OFFSET);
-	dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
+		((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET);
+	dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
 	dep->namelen = 2;
 	dep->name[0] = dep->name[1] = '.';
 	tagp = xfs_dir2_data_entry_tag_p(dep);
-	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
+	*tagp = cpu_to_be16((char *)dep - (char *)block);
 	xfs_dir2_data_log_entry(tp, bp, dep);
 	blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
 	blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
-				(char *)dep - (char *)hdr));
+				(char *)dep - (char *)block));
 	offset = XFS_DIR2_DATA_FIRST_OFFSET;
 	/*
 	 * Loop over existing entries, stuff them in.
 	 */
-	i = 0;
-	if (!sfp->count)
+	if ((i = 0) == sfp->hdr.count)
 		sfep = NULL;
 	else
 		sfep = xfs_dir2_sf_firstentry(sfp);
@@ -1171,40 +1179,43 @@ xfs_dir2_sf_to_block(
 		 * There should be a hole here, make one.
 		 */
 		if (offset < newoffset) {
-			dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
+			dup = (xfs_dir2_data_unused_t *)
+			      ((char *)block + offset);
 			dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 			dup->length = cpu_to_be16(newoffset - offset);
 			*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
-				((char *)dup - (char *)hdr));
+				((char *)dup - (char *)block));
 			xfs_dir2_data_log_unused(tp, bp, dup);
-			xfs_dir2_data_freeinsert(hdr, dup, &dummy);
+			(void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
+				dup, &dummy);
 			offset += be16_to_cpu(dup->length);
 			continue;
 		}
 		/*
 		 * Copy a real entry.
 		 */
-		dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset);
-		dep->inumber = cpu_to_be64(xfs_dir2_sfe_get_ino(sfp, sfep));
+		dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset);
+		dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp,
+				xfs_dir2_sf_inumberp(sfep)));
 		dep->namelen = sfep->namelen;
 		memcpy(dep->name, sfep->name, dep->namelen);
 		tagp = xfs_dir2_data_entry_tag_p(dep);
-		*tagp = cpu_to_be16((char *)dep - (char *)hdr);
+		*tagp = cpu_to_be16((char *)dep - (char *)block);
 		xfs_dir2_data_log_entry(tp, bp, dep);
 		name.name = sfep->name;
 		name.len = sfep->namelen;
 		blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
 							hashname(&name));
 		blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
-						 (char *)dep - (char *)hdr));
-		offset = (int)((char *)(tagp + 1) - (char *)hdr);
-		if (++i == sfp->count)
+						 (char *)dep - (char *)block));
+		offset = (int)((char *)(tagp + 1) - (char *)block);
+		if (++i == sfp->hdr.count)
 			sfep = NULL;
 		else
 			sfep = xfs_dir2_sf_nextentry(sfp, sfep);
 	}
 	/* Done with the temporary buffer */
-	kmem_free(sfp);
+	kmem_free(buf);
 	/*
 	 * Sort the leaf entries by hash value.
 	 */
diff --git a/trunk/fs/xfs/xfs_dir2_block.h b/trunk/fs/xfs/xfs_dir2_block.h
new file mode 100644
index 000000000000..10e689676382
--- /dev/null
+++ b/trunk/fs/xfs/xfs_dir2_block.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DIR2_BLOCK_H__
+#define	__XFS_DIR2_BLOCK_H__
+
+/*
+ * xfs_dir2_block.h
+ * Directory version 2, single block format structures
+ */
+
+struct uio;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_dir2_data_hdr;
+struct xfs_dir2_leaf_entry;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * The single block format is as follows:
+ * xfs_dir2_data_hdr_t structure
+ * xfs_dir2_data_entry_t and xfs_dir2_data_unused_t structures
+ * xfs_dir2_leaf_entry_t structures
+ * xfs_dir2_block_tail_t structure
+ */
+
+#define	XFS_DIR2_BLOCK_MAGIC	0x58443242	/* XD2B: for one block dirs */
+
+typedef struct xfs_dir2_block_tail {
+	__be32		count;			/* count of leaf entries */
+	__be32		stale;			/* count of stale lf entries */
+} xfs_dir2_block_tail_t;
+
+/*
+ * Generic single-block structure, for xfs_db.
+ */
+typedef struct xfs_dir2_block {
+	xfs_dir2_data_hdr_t	hdr;		/* magic XFS_DIR2_BLOCK_MAGIC */
+	xfs_dir2_data_union_t	u[1];
+	xfs_dir2_leaf_entry_t	leaf[1];
+	xfs_dir2_block_tail_t	tail;
+} xfs_dir2_block_t;
+
+/*
+ * Pointer to the leaf header embedded in a data block (1-block format)
+ */
+static inline xfs_dir2_block_tail_t *
+xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
+{
+	return (((xfs_dir2_block_tail_t *)
+		((char *)(block) + (mp)->m_dirblksize)) - 1);
+}
+
+/*
+ * Pointer to the leaf entries embedded in a data block (1-block format)
+ */
+static inline struct xfs_dir2_leaf_entry *
+xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp)
+{
+	return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
+}
+
+/*
+ * Function declarations.
+ */
+extern int xfs_dir2_block_addname(struct xfs_da_args *args);
+extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent,
+				   xfs_off_t *offset, filldir_t filldir);
+extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_block_removename(struct xfs_da_args *args);
+extern int xfs_dir2_block_replace(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
+				  struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
+extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
+
+#endif	/* __XFS_DIR2_BLOCK_H__ */
diff --git a/trunk/fs/xfs/xfs_dir2_data.c b/trunk/fs/xfs/xfs_dir2_data.c
index 5bbe2a8a023f..921595b84f5b 100644
--- a/trunk/fs/xfs/xfs_dir2_data.c
+++ b/trunk/fs/xfs/xfs_dir2_data.c
@@ -23,18 +23,18 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
+#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_dir2_format.h"
-#include "xfs_dir2_priv.h"
+#include "xfs_dir2_data.h"
+#include "xfs_dir2_leaf.h"
+#include "xfs_dir2_block.h"
 #include "xfs_error.h"
 
-STATIC xfs_dir2_data_free_t *
-xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
-
 #ifdef DEBUG
 /*
  * Check the consistency of the data block.
@@ -50,7 +50,7 @@ xfs_dir2_data_check(
 	xfs_dir2_data_free_t	*bf;		/* bestfree table */
 	xfs_dir2_block_tail_t	*btp=NULL;	/* block tail */
 	int			count;		/* count of entries found */
-	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
+	xfs_dir2_data_t		*d;		/* data block pointer */
 	xfs_dir2_data_entry_t	*dep;		/* data entry */
 	xfs_dir2_data_free_t	*dfp;		/* bestfree entry */
 	xfs_dir2_data_unused_t	*dup;		/* unused entry */
@@ -66,19 +66,17 @@ xfs_dir2_data_check(
 	struct xfs_name		name;
 
 	mp = dp->i_mount;
-	hdr = bp->data;
-	bf = hdr->bestfree;
-	p = (char *)(hdr + 1);
-
-	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
-		btp = xfs_dir2_block_tail_p(mp, hdr);
+	d = bp->data;
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+	bf = d->hdr.bestfree;
+	p = (char *)d->u;
+	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
+		btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
 		lep = xfs_dir2_block_leaf_p(btp);
 		endp = (char *)lep;
-	} else {
-		ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
-		endp = (char *)hdr + mp->m_dirblksize;
-	}
-
+	} else
+		endp = (char *)d + mp->m_dirblksize;
 	count = lastfree = freeseen = 0;
 	/*
 	 * Account for zero bestfree entries.
@@ -110,8 +108,8 @@ xfs_dir2_data_check(
 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 			ASSERT(lastfree == 0);
 			ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
-			       (char *)dup - (char *)hdr);
-			dfp = xfs_dir2_data_freefind(hdr, dup);
+			       (char *)dup - (char *)d);
+			dfp = xfs_dir2_data_freefind(d, dup);
 			if (dfp) {
 				i = (int)(dfp - bf);
 				ASSERT((freeseen & (1 << i)) == 0);
@@ -134,13 +132,13 @@ xfs_dir2_data_check(
 		ASSERT(dep->namelen != 0);
 		ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0);
 		ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
-		       (char *)dep - (char *)hdr);
+		       (char *)dep - (char *)d);
 		count++;
 		lastfree = 0;
-		if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
+		if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
 			addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 				(xfs_dir2_data_aoff_t)
-				((char *)dep - (char *)hdr));
+				((char *)dep - (char *)d));
 			name.name = dep->name;
 			name.len = dep->namelen;
 			hash = mp->m_dirnameops->hashname(&name);
@@ -157,10 +155,9 @@ xfs_dir2_data_check(
 	 * Need to have seen all the entries and all the bestfree slots.
 	 */
 	ASSERT(freeseen == 7);
-	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
+	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
 		for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
-			if (lep[i].address ==
-			    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+			if (be32_to_cpu(lep[i].address) == XFS_DIR2_NULL_DATAPTR)
 				stale++;
 			if (i > 0)
 				ASSERT(be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval));
@@ -175,9 +172,9 @@ xfs_dir2_data_check(
  * Given a data block and an unused entry from that block,
  * return the bestfree entry if any that corresponds to it.
  */
-STATIC xfs_dir2_data_free_t *
+xfs_dir2_data_free_t *
 xfs_dir2_data_freefind(
-	xfs_dir2_data_hdr_t	*hdr,		/* data block */
+	xfs_dir2_data_t		*d,		/* data block */
 	xfs_dir2_data_unused_t	*dup)		/* data unused entry */
 {
 	xfs_dir2_data_free_t	*dfp;		/* bestfree entry */
@@ -187,17 +184,17 @@ xfs_dir2_data_freefind(
 	int			seenzero;	/* saw a 0 bestfree entry */
 #endif
 
-	off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
+	off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)d);
 #if defined(DEBUG) && defined(__KERNEL__)
 	/*
 	 * Validate some consistency in the bestfree table.
 	 * Check order, non-overlapping entries, and if we find the
 	 * one we're looking for it has to be exact.
 	 */
-	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
-	for (dfp = &hdr->bestfree[0], seenzero = matched = 0;
-	     dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+	for (dfp = &d->hdr.bestfree[0], seenzero = matched = 0;
+	     dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
 	     dfp++) {
 		if (!dfp->offset) {
 			ASSERT(!dfp->length);
@@ -213,7 +210,7 @@ xfs_dir2_data_freefind(
 		else
 			ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
 		ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length));
-		if (dfp > &hdr->bestfree[0])
+		if (dfp > &d->hdr.bestfree[0])
 			ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
 	}
 #endif
@@ -222,13 +219,13 @@ xfs_dir2_data_freefind(
 	 * it can't be there since they're sorted.
 	 */
 	if (be16_to_cpu(dup->length) <
-	    be16_to_cpu(hdr->bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
+	    be16_to_cpu(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
 		return NULL;
 	/*
 	 * Look at the three bestfree entries for our guy.
 	 */
-	for (dfp = &hdr->bestfree[0];
-	     dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
+	for (dfp = &d->hdr.bestfree[0];
+	     dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
 	     dfp++) {
 		if (!dfp->offset)
 			return NULL;
@@ -246,7 +243,7 @@ xfs_dir2_data_freefind(
  */
 xfs_dir2_data_free_t *				/* entry inserted */
 xfs_dir2_data_freeinsert(
-	xfs_dir2_data_hdr_t	*hdr,		/* data block pointer */
+	xfs_dir2_data_t		*d,		/* data block pointer */
 	xfs_dir2_data_unused_t	*dup,		/* unused space */
 	int			*loghead)	/* log the data header (out) */
 {
@@ -254,13 +251,12 @@ xfs_dir2_data_freeinsert(
 	xfs_dir2_data_free_t	new;		/* new bestfree entry */
 
 #ifdef __KERNEL__
-	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 #endif
-	dfp = hdr->bestfree;
+	dfp = d->hdr.bestfree;
 	new.length = dup->length;
-	new.offset = cpu_to_be16((char *)dup - (char *)hdr);
-
+	new.offset = cpu_to_be16((char *)dup - (char *)d);
 	/*
 	 * Insert at position 0, 1, or 2; or not at all.
 	 */
@@ -290,36 +286,36 @@ xfs_dir2_data_freeinsert(
  */
 STATIC void
 xfs_dir2_data_freeremove(
-	xfs_dir2_data_hdr_t	*hdr,		/* data block header */
+	xfs_dir2_data_t		*d,		/* data block pointer */
 	xfs_dir2_data_free_t	*dfp,		/* bestfree entry pointer */
 	int			*loghead)	/* out: log data header */
 {
 #ifdef __KERNEL__
-	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 #endif
 	/*
 	 * It's the first entry, slide the next 2 up.
 	 */
-	if (dfp == &hdr->bestfree[0]) {
-		hdr->bestfree[0] = hdr->bestfree[1];
-		hdr->bestfree[1] = hdr->bestfree[2];
+	if (dfp == &d->hdr.bestfree[0]) {
+		d->hdr.bestfree[0] = d->hdr.bestfree[1];
+		d->hdr.bestfree[1] = d->hdr.bestfree[2];
 	}
 	/*
 	 * It's the second entry, slide the 3rd entry up.
 	 */
-	else if (dfp == &hdr->bestfree[1])
-		hdr->bestfree[1] = hdr->bestfree[2];
+	else if (dfp == &d->hdr.bestfree[1])
+		d->hdr.bestfree[1] = d->hdr.bestfree[2];
 	/*
 	 * Must be the last entry.
 	 */
 	else
-		ASSERT(dfp == &hdr->bestfree[2]);
+		ASSERT(dfp == &d->hdr.bestfree[2]);
 	/*
 	 * Clear the 3rd entry, must be zero now.
 	 */
-	hdr->bestfree[2].length = 0;
-	hdr->bestfree[2].offset = 0;
+	d->hdr.bestfree[2].length = 0;
+	d->hdr.bestfree[2].offset = 0;
 	*loghead = 1;
 }
 
@@ -329,7 +325,7 @@ xfs_dir2_data_freeremove(
 void
 xfs_dir2_data_freescan(
 	xfs_mount_t		*mp,		/* filesystem mount point */
-	xfs_dir2_data_hdr_t	*hdr,		/* data block header */
+	xfs_dir2_data_t		*d,		/* data block pointer */
 	int			*loghead)	/* out: log data header */
 {
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
@@ -339,23 +335,23 @@ xfs_dir2_data_freescan(
 	char			*p;		/* current entry pointer */
 
 #ifdef __KERNEL__
-	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 #endif
 	/*
 	 * Start by clearing the table.
 	 */
-	memset(hdr->bestfree, 0, sizeof(hdr->bestfree));
+	memset(d->hdr.bestfree, 0, sizeof(d->hdr.bestfree));
 	*loghead = 1;
 	/*
 	 * Set up pointers.
 	 */
-	p = (char *)(hdr + 1);
-	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
-		btp = xfs_dir2_block_tail_p(mp, hdr);
+	p = (char *)d->u;
+	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
+		btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
 		endp = (char *)xfs_dir2_block_leaf_p(btp);
 	} else
-		endp = (char *)hdr + mp->m_dirblksize;
+		endp = (char *)d + mp->m_dirblksize;
 	/*
 	 * Loop over the block's entries.
 	 */
@@ -365,9 +361,9 @@ xfs_dir2_data_freescan(
 		 * If it's a free entry, insert it.
 		 */
 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-			ASSERT((char *)dup - (char *)hdr ==
+			ASSERT((char *)dup - (char *)d ==
 			       be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
-			xfs_dir2_data_freeinsert(hdr, dup, loghead);
+			xfs_dir2_data_freeinsert(d, dup, loghead);
 			p += be16_to_cpu(dup->length);
 		}
 		/*
@@ -375,7 +371,7 @@ xfs_dir2_data_freescan(
 		 */
 		else {
 			dep = (xfs_dir2_data_entry_t *)p;
-			ASSERT((char *)dep - (char *)hdr ==
+			ASSERT((char *)dep - (char *)d ==
 			       be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)));
 			p += xfs_dir2_data_entsize(dep->namelen);
 		}
@@ -393,7 +389,7 @@ xfs_dir2_data_init(
 	xfs_dabuf_t		**bpp)		/* output block buffer */
 {
 	xfs_dabuf_t		*bp;		/* block buffer */
-	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
+	xfs_dir2_data_t		*d;		/* pointer to block */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	xfs_dir2_data_unused_t	*dup;		/* unused entry pointer */
 	int			error;		/* error return value */
@@ -414,28 +410,26 @@ xfs_dir2_data_init(
 		return error;
 	}
 	ASSERT(bp != NULL);
-
 	/*
 	 * Initialize the header.
 	 */
-	hdr = bp->data;
-	hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
-	hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr));
+	d = bp->data;
+	d->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
+	d->hdr.bestfree[0].offset = cpu_to_be16(sizeof(d->hdr));
 	for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
-		hdr->bestfree[i].length = 0;
-		hdr->bestfree[i].offset = 0;
+		d->hdr.bestfree[i].length = 0;
+		d->hdr.bestfree[i].offset = 0;
 	}
-
 	/*
 	 * Set up an unused entry for the block's body.
 	 */
-	dup = (xfs_dir2_data_unused_t *)(hdr + 1);
+	dup = &d->u[0].unused;
 	dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 
-	t = mp->m_dirblksize - (uint)sizeof(*hdr);
-	hdr->bestfree[0].length = cpu_to_be16(t);
+	t=mp->m_dirblksize - (uint)sizeof(d->hdr);
+	d->hdr.bestfree[0].length = cpu_to_be16(t);
 	dup->length = cpu_to_be16(t);
-	*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
+	*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d);
 	/*
 	 * Log it and return it.
 	 */
@@ -454,14 +448,14 @@ xfs_dir2_data_log_entry(
 	xfs_dabuf_t		*bp,		/* block buffer */
 	xfs_dir2_data_entry_t	*dep)		/* data entry pointer */
 {
-	xfs_dir2_data_hdr_t	*hdr = bp->data;
-
-	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+	xfs_dir2_data_t		*d;		/* data block pointer */
 
-	xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
+	d = bp->data;
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+	xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d),
 		(uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
-		       (char *)hdr - 1));
+		       (char *)d - 1));
 }
 
 /*
@@ -472,12 +466,13 @@ xfs_dir2_data_log_header(
 	xfs_trans_t		*tp,		/* transaction pointer */
 	xfs_dabuf_t		*bp)		/* block buffer */
 {
-	xfs_dir2_data_hdr_t	*hdr = bp->data;
+	xfs_dir2_data_t		*d;		/* data block pointer */
 
-	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
-
-	xfs_da_log_buf(tp, bp, 0, sizeof(*hdr) - 1);
+	d = bp->data;
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+	xfs_da_log_buf(tp, bp, (uint)((char *)&d->hdr - (char *)d),
+		(uint)(sizeof(d->hdr) - 1));
 }
 
 /*
@@ -489,23 +484,23 @@ xfs_dir2_data_log_unused(
 	xfs_dabuf_t		*bp,		/* block buffer */
 	xfs_dir2_data_unused_t	*dup)		/* data unused pointer */
 {
-	xfs_dir2_data_hdr_t	*hdr = bp->data;
-
-	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+	xfs_dir2_data_t		*d;		/* data block pointer */
 
+	d = bp->data;
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	/*
 	 * Log the first part of the unused entry.
 	 */
-	xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr),
+	xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)d),
 		(uint)((char *)&dup->length + sizeof(dup->length) -
-		       1 - (char *)hdr));
+		       1 - (char *)d));
 	/*
 	 * Log the end (tag) of the unused entry.
 	 */
 	xfs_da_log_buf(tp, bp,
-		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),
-		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +
+		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d),
+		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d +
 		       sizeof(xfs_dir2_data_off_t) - 1));
 }
 
@@ -522,7 +517,7 @@ xfs_dir2_data_make_free(
 	int			*needlogp,	/* out: log header */
 	int			*needscanp)	/* out: regen bestfree */
 {
-	xfs_dir2_data_hdr_t	*hdr;		/* data block pointer */
+	xfs_dir2_data_t		*d;		/* data block pointer */
 	xfs_dir2_data_free_t	*dfp;		/* bestfree pointer */
 	char			*endptr;	/* end of data area */
 	xfs_mount_t		*mp;		/* filesystem mount point */
@@ -532,29 +527,28 @@ xfs_dir2_data_make_free(
 	xfs_dir2_data_unused_t	*prevdup;	/* unused entry before us */
 
 	mp = tp->t_mountp;
-	hdr = bp->data;
-
+	d = bp->data;
 	/*
 	 * Figure out where the end of the data area is.
 	 */
-	if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC))
-		endptr = (char *)hdr + mp->m_dirblksize;
+	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC)
+		endptr = (char *)d + mp->m_dirblksize;
 	else {
 		xfs_dir2_block_tail_t	*btp;	/* block tail */
 
-		ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
-		btp = xfs_dir2_block_tail_p(mp, hdr);
+		ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+		btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
 		endptr = (char *)xfs_dir2_block_leaf_p(btp);
 	}
 	/*
 	 * If this isn't the start of the block, then back up to
 	 * the previous entry and see if it's free.
 	 */
-	if (offset > sizeof(*hdr)) {
+	if (offset > sizeof(d->hdr)) {
 		__be16			*tagp;	/* tag just before us */
 
-		tagp = (__be16 *)((char *)hdr + offset) - 1;
-		prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
+		tagp = (__be16 *)((char *)d + offset) - 1;
+		prevdup = (xfs_dir2_data_unused_t *)((char *)d + be16_to_cpu(*tagp));
 		if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
 			prevdup = NULL;
 	} else
@@ -563,9 +557,9 @@ xfs_dir2_data_make_free(
 	 * If this isn't the end of the block, see if the entry after
 	 * us is free.
 	 */
-	if ((char *)hdr + offset + len < endptr) {
+	if ((char *)d + offset + len < endptr) {
 		postdup =
-			(xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
+			(xfs_dir2_data_unused_t *)((char *)d + offset + len);
 		if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
 			postdup = NULL;
 	} else
@@ -582,21 +576,21 @@ xfs_dir2_data_make_free(
 		/*
 		 * See if prevdup and/or postdup are in bestfree table.
 		 */
-		dfp = xfs_dir2_data_freefind(hdr, prevdup);
-		dfp2 = xfs_dir2_data_freefind(hdr, postdup);
+		dfp = xfs_dir2_data_freefind(d, prevdup);
+		dfp2 = xfs_dir2_data_freefind(d, postdup);
 		/*
 		 * We need a rescan unless there are exactly 2 free entries
 		 * namely our two.  Then we know what's happening, otherwise
 		 * since the third bestfree is there, there might be more
 		 * entries.
 		 */
-		needscan = (hdr->bestfree[2].length != 0);
+		needscan = (d->hdr.bestfree[2].length != 0);
 		/*
 		 * Fix up the new big freespace.
 		 */
 		be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length));
 		*xfs_dir2_data_unused_tag_p(prevdup) =
-			cpu_to_be16((char *)prevdup - (char *)hdr);
+			cpu_to_be16((char *)prevdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, prevdup);
 		if (!needscan) {
 			/*
@@ -606,18 +600,18 @@ xfs_dir2_data_make_free(
 			 * Remove entry 1 first then entry 0.
 			 */
 			ASSERT(dfp && dfp2);
-			if (dfp == &hdr->bestfree[1]) {
-				dfp = &hdr->bestfree[0];
+			if (dfp == &d->hdr.bestfree[1]) {
+				dfp = &d->hdr.bestfree[0];
 				ASSERT(dfp2 == dfp);
-				dfp2 = &hdr->bestfree[1];
+				dfp2 = &d->hdr.bestfree[1];
 			}
-			xfs_dir2_data_freeremove(hdr, dfp2, needlogp);
-			xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+			xfs_dir2_data_freeremove(d, dfp2, needlogp);
+			xfs_dir2_data_freeremove(d, dfp, needlogp);
 			/*
 			 * Now insert the new entry.
 			 */
-			dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
-			ASSERT(dfp == &hdr->bestfree[0]);
+			dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp);
+			ASSERT(dfp == &d->hdr.bestfree[0]);
 			ASSERT(dfp->length == prevdup->length);
 			ASSERT(!dfp[1].length);
 			ASSERT(!dfp[2].length);
@@ -627,10 +621,10 @@ xfs_dir2_data_make_free(
 	 * The entry before us is free, merge with it.
 	 */
 	else if (prevdup) {
-		dfp = xfs_dir2_data_freefind(hdr, prevdup);
+		dfp = xfs_dir2_data_freefind(d, prevdup);
 		be16_add_cpu(&prevdup->length, len);
 		*xfs_dir2_data_unused_tag_p(prevdup) =
-			cpu_to_be16((char *)prevdup - (char *)hdr);
+			cpu_to_be16((char *)prevdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, prevdup);
 		/*
 		 * If the previous entry was in the table, the new entry
@@ -638,27 +632,27 @@ xfs_dir2_data_make_free(
 		 * the old one and add the new one.
 		 */
 		if (dfp) {
-			xfs_dir2_data_freeremove(hdr, dfp, needlogp);
-			xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
+			xfs_dir2_data_freeremove(d, dfp, needlogp);
+			(void)xfs_dir2_data_freeinsert(d, prevdup, needlogp);
 		}
 		/*
 		 * Otherwise we need a scan if the new entry is big enough.
 		 */
 		else {
 			needscan = be16_to_cpu(prevdup->length) >
-				   be16_to_cpu(hdr->bestfree[2].length);
+				   be16_to_cpu(d->hdr.bestfree[2].length);
 		}
 	}
 	/*
 	 * The following entry is free, merge with it.
 	 */
 	else if (postdup) {
-		dfp = xfs_dir2_data_freefind(hdr, postdup);
-		newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
+		dfp = xfs_dir2_data_freefind(d, postdup);
+		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
 		*xfs_dir2_data_unused_tag_p(newdup) =
-			cpu_to_be16((char *)newdup - (char *)hdr);
+			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		/*
 		 * If the following entry was in the table, the new entry
@@ -666,28 +660,28 @@ xfs_dir2_data_make_free(
 		 * the old one and add the new one.
 		 */
 		if (dfp) {
-			xfs_dir2_data_freeremove(hdr, dfp, needlogp);
-			xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
+			xfs_dir2_data_freeremove(d, dfp, needlogp);
+			(void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
 		}
 		/*
 		 * Otherwise we need a scan if the new entry is big enough.
 		 */
 		else {
 			needscan = be16_to_cpu(newdup->length) >
-				   be16_to_cpu(hdr->bestfree[2].length);
+				   be16_to_cpu(d->hdr.bestfree[2].length);
 		}
 	}
 	/*
 	 * Neither neighbor is free.  Make a new entry.
 	 */
 	else {
-		newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
+		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup->length = cpu_to_be16(len);
 		*xfs_dir2_data_unused_tag_p(newdup) =
-			cpu_to_be16((char *)newdup - (char *)hdr);
+			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
-		xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
+		(void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
 	}
 	*needscanp = needscan;
 }
@@ -705,7 +699,7 @@ xfs_dir2_data_use_free(
 	int			*needlogp,	/* out: need to log header */
 	int			*needscanp)	/* out: need regen bestfree */
 {
-	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
+	xfs_dir2_data_t		*d;		/* data block */
 	xfs_dir2_data_free_t	*dfp;		/* bestfree pointer */
 	int			matchback;	/* matches end of freespace */
 	int			matchfront;	/* matches start of freespace */
@@ -714,24 +708,24 @@ xfs_dir2_data_use_free(
 	xfs_dir2_data_unused_t	*newdup2;	/* another new unused entry */
 	int			oldlen;		/* old unused entry's length */
 
-	hdr = bp->data;
-	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+	d = bp->data;
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
-	ASSERT(offset >= (char *)dup - (char *)hdr);
-	ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
-	ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
+	ASSERT(offset >= (char *)dup - (char *)d);
+	ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d);
+	ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
 	/*
 	 * Look up the entry in the bestfree table.
 	 */
-	dfp = xfs_dir2_data_freefind(hdr, dup);
+	dfp = xfs_dir2_data_freefind(d, dup);
 	oldlen = be16_to_cpu(dup->length);
-	ASSERT(dfp || oldlen <= be16_to_cpu(hdr->bestfree[2].length));
+	ASSERT(dfp || oldlen <= be16_to_cpu(d->hdr.bestfree[2].length));
 	/*
 	 * Check for alignment with front and back of the entry.
 	 */
-	matchfront = (char *)dup - (char *)hdr == offset;
-	matchback = (char *)dup + oldlen - (char *)hdr == offset + len;
+	matchfront = (char *)dup - (char *)d == offset;
+	matchback = (char *)dup + oldlen - (char *)d == offset + len;
 	ASSERT(*needscanp == 0);
 	needscan = 0;
 	/*
@@ -740,9 +734,9 @@ xfs_dir2_data_use_free(
 	 */
 	if (matchfront && matchback) {
 		if (dfp) {
-			needscan = (hdr->bestfree[2].offset != 0);
+			needscan = (d->hdr.bestfree[2].offset != 0);
 			if (!needscan)
-				xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+				xfs_dir2_data_freeremove(d, dfp, needlogp);
 		}
 	}
 	/*
@@ -750,27 +744,27 @@ xfs_dir2_data_use_free(
 	 * Make a new entry with the remaining freespace.
 	 */
 	else if (matchfront) {
-		newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
+		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup->length = cpu_to_be16(oldlen - len);
 		*xfs_dir2_data_unused_tag_p(newdup) =
-			cpu_to_be16((char *)newdup - (char *)hdr);
+			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		/*
 		 * If it was in the table, remove it and add the new one.
 		 */
 		if (dfp) {
-			xfs_dir2_data_freeremove(hdr, dfp, needlogp);
-			dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
+			xfs_dir2_data_freeremove(d, dfp, needlogp);
+			dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
 			ASSERT(dfp != NULL);
 			ASSERT(dfp->length == newdup->length);
-			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
+			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
 			/*
 			 * If we got inserted at the last slot,
 			 * that means we don't know if there was a better
 			 * choice for the last slot, or not.  Rescan.
 			 */
-			needscan = dfp == &hdr->bestfree[2];
+			needscan = dfp == &d->hdr.bestfree[2];
 		}
 	}
 	/*
@@ -779,25 +773,25 @@ xfs_dir2_data_use_free(
 	 */
 	else if (matchback) {
 		newdup = dup;
-		newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
+		newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
 		*xfs_dir2_data_unused_tag_p(newdup) =
-			cpu_to_be16((char *)newdup - (char *)hdr);
+			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		/*
 		 * If it was in the table, remove it and add the new one.
 		 */
 		if (dfp) {
-			xfs_dir2_data_freeremove(hdr, dfp, needlogp);
-			dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
+			xfs_dir2_data_freeremove(d, dfp, needlogp);
+			dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
 			ASSERT(dfp != NULL);
 			ASSERT(dfp->length == newdup->length);
-			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
+			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
 			/*
 			 * If we got inserted at the last slot,
 			 * that means we don't know if there was a better
 			 * choice for the last slot, or not.  Rescan.
 			 */
-			needscan = dfp == &hdr->bestfree[2];
+			needscan = dfp == &d->hdr.bestfree[2];
 		}
 	}
 	/*
@@ -806,15 +800,15 @@ xfs_dir2_data_use_free(
 	 */
 	else {
 		newdup = dup;
-		newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
+		newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
 		*xfs_dir2_data_unused_tag_p(newdup) =
-			cpu_to_be16((char *)newdup - (char *)hdr);
+			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
-		newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
+		newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
 		newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
 		*xfs_dir2_data_unused_tag_p(newdup2) =
-			cpu_to_be16((char *)newdup2 - (char *)hdr);
+			cpu_to_be16((char *)newdup2 - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup2);
 		/*
 		 * If the old entry was in the table, we need to scan
@@ -825,12 +819,13 @@ xfs_dir2_data_use_free(
 		 * the 2 new will work.
 		 */
 		if (dfp) {
-			needscan = (hdr->bestfree[2].length != 0);
+			needscan = (d->hdr.bestfree[2].length != 0);
 			if (!needscan) {
-				xfs_dir2_data_freeremove(hdr, dfp, needlogp);
-				xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
-				xfs_dir2_data_freeinsert(hdr, newdup2,
-							 needlogp);
+				xfs_dir2_data_freeremove(d, dfp, needlogp);
+				(void)xfs_dir2_data_freeinsert(d, newdup,
+					needlogp);
+				(void)xfs_dir2_data_freeinsert(d, newdup2,
+					needlogp);
 			}
 		}
 	}
diff --git a/trunk/fs/xfs/xfs_dir2_data.h b/trunk/fs/xfs/xfs_dir2_data.h
new file mode 100644
index 000000000000..efbc290c7fec
--- /dev/null
+++ b/trunk/fs/xfs/xfs_dir2_data.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DIR2_DATA_H__
+#define	__XFS_DIR2_DATA_H__
+
+/*
+ * Directory format 2, data block structures.
+ */
+
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_inode;
+struct xfs_trans;
+
+/*
+ * Constants.
+ */
+#define	XFS_DIR2_DATA_MAGIC	0x58443244	/* XD2D: for multiblock dirs */
+#define	XFS_DIR2_DATA_ALIGN_LOG	3		/* i.e., 8 bytes */
+#define	XFS_DIR2_DATA_ALIGN	(1 << XFS_DIR2_DATA_ALIGN_LOG)
+#define	XFS_DIR2_DATA_FREE_TAG	0xffff
+#define	XFS_DIR2_DATA_FD_COUNT	3
+
+/*
+ * Directory address space divided into sections,
+ * spaces separated by 32GB.
+ */
+#define	XFS_DIR2_SPACE_SIZE	(1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
+#define	XFS_DIR2_DATA_SPACE	0
+#define	XFS_DIR2_DATA_OFFSET	(XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
+#define	XFS_DIR2_DATA_FIRSTDB(mp)	\
+	xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
+
+/*
+ * Offsets of . and .. in data space (always block 0)
+ */
+#define	XFS_DIR2_DATA_DOT_OFFSET	\
+	((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t))
+#define	XFS_DIR2_DATA_DOTDOT_OFFSET	\
+	(XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
+#define	XFS_DIR2_DATA_FIRST_OFFSET		\
+	(XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
+
+/*
+ * Structures.
+ */
+
+/*
+ * Describe a free area in the data block.
+ * The freespace will be formatted as a xfs_dir2_data_unused_t.
+ */
+typedef struct xfs_dir2_data_free {
+	__be16			offset;		/* start of freespace */
+	__be16			length;		/* length of freespace */
+} xfs_dir2_data_free_t;
+
+/*
+ * Header for the data blocks.
+ * Always at the beginning of a directory-sized block.
+ * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
+ */
+typedef struct xfs_dir2_data_hdr {
+	__be32			magic;		/* XFS_DIR2_DATA_MAGIC */
+						/* or XFS_DIR2_BLOCK_MAGIC */
+	xfs_dir2_data_free_t	bestfree[XFS_DIR2_DATA_FD_COUNT];
+} xfs_dir2_data_hdr_t;
+
+/*
+ * Active entry in a data block.  Aligned to 8 bytes.
+ * Tag appears as the last 2 bytes.
+ */
+typedef struct xfs_dir2_data_entry {
+	__be64			inumber;	/* inode number */
+	__u8			namelen;	/* name length */
+	__u8			name[1];	/* name bytes, no null */
+						/* variable offset */
+	__be16			tag;		/* starting offset of us */
+} xfs_dir2_data_entry_t;
+
+/*
+ * Unused entry in a data block.  Aligned to 8 bytes.
+ * Tag appears as the last 2 bytes.
+ */
+typedef struct xfs_dir2_data_unused {
+	__be16			freetag;	/* XFS_DIR2_DATA_FREE_TAG */
+	__be16			length;		/* total free length */
+						/* variable offset */
+	__be16			tag;		/* starting offset of us */
+} xfs_dir2_data_unused_t;
+
+typedef union {
+	xfs_dir2_data_entry_t	entry;
+	xfs_dir2_data_unused_t	unused;
+} xfs_dir2_data_union_t;
+
+/*
+ * Generic data block structure, for xfs_db.
+ */
+typedef struct xfs_dir2_data {
+	xfs_dir2_data_hdr_t	hdr;		/* magic XFS_DIR2_DATA_MAGIC */
+	xfs_dir2_data_union_t	u[1];
+} xfs_dir2_data_t;
+
+/*
+ * Macros.
+ */
+
+/*
+ * Size of a data entry.
+ */
+static inline int xfs_dir2_data_entsize(int n)
+{
+	return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \
+		 (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN);
+}
+
+/*
+ * Pointer to an entry's tag word.
+ */
+static inline __be16 *
+xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
+{
+	return (__be16 *)((char *)dep +
+		xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
+}
+
+/*
+ * Pointer to a freespace's tag word.
+ */
+static inline __be16 *
+xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
+{
+	return (__be16 *)((char *)dup +
+			be16_to_cpu(dup->length) - sizeof(__be16));
+}
+
+/*
+ * Function declarations.
+ */
+#ifdef DEBUG
+extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
+#else
+#define	xfs_dir2_data_check(dp,bp)
+#endif
+extern xfs_dir2_data_free_t *xfs_dir2_data_freefind(xfs_dir2_data_t *d,
+				xfs_dir2_data_unused_t *dup);
+extern xfs_dir2_data_free_t *xfs_dir2_data_freeinsert(xfs_dir2_data_t *d,
+				xfs_dir2_data_unused_t *dup, int *loghead);
+extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d,
+				int *loghead);
+extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
+				struct xfs_dabuf **bpp);
+extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
+				xfs_dir2_data_entry_t *dep);
+extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
+				struct xfs_dabuf *bp);
+extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
+				xfs_dir2_data_unused_t *dup);
+extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+				xfs_dir2_data_aoff_t offset,
+				xfs_dir2_data_aoff_t len, int *needlogp,
+				int *needscanp);
+extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+			       xfs_dir2_data_unused_t *dup,
+			       xfs_dir2_data_aoff_t offset,
+			       xfs_dir2_data_aoff_t len, int *needlogp,
+			       int *needscanp);
+
+#endif	/* __XFS_DIR2_DATA_H__ */
diff --git a/trunk/fs/xfs/xfs_dir2_format.h b/trunk/fs/xfs/xfs_dir2_format.h
deleted file mode 100644
index 07270981f48f..000000000000
--- a/trunk/fs/xfs/xfs_dir2_format.h
+++ /dev/null
@@ -1,597 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR2_FORMAT_H__
-#define __XFS_DIR2_FORMAT_H__
-
-/*
- * Directory version 2.
- *
- * There are 4 possible formats:
- *  - shortform - embedded into the inode
- *  - single block - data with embedded leaf at the end
- *  - multiple data blocks, single leaf+freeindex block
- *  - data blocks, node and leaf blocks (btree), freeindex blocks
- *
- * Note: many node blocks structures and constants are shared with the attr
- * code and defined in xfs_da_btree.h.
- */
-
-#define	XFS_DIR2_BLOCK_MAGIC	0x58443242	/* XD2B: single block dirs */
-#define	XFS_DIR2_DATA_MAGIC	0x58443244	/* XD2D: multiblock dirs */
-#define	XFS_DIR2_FREE_MAGIC	0x58443246	/* XD2F: free index blocks */
-
-/*
- * Byte offset in data block and shortform entry.
- */
-typedef	__uint16_t	xfs_dir2_data_off_t;
-#define	NULLDATAOFF	0xffffU
-typedef uint		xfs_dir2_data_aoff_t;	/* argument form */
-
-/*
- * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
- * Only need 16 bits, this is the byte offset into the single block form.
- */
-typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
-
-/*
- * Offset in data space of a data entry.
- */
-typedef	__uint32_t	xfs_dir2_dataptr_t;
-#define	XFS_DIR2_MAX_DATAPTR	((xfs_dir2_dataptr_t)0xffffffff)
-#define	XFS_DIR2_NULL_DATAPTR	((xfs_dir2_dataptr_t)0)
-
-/*
- * Byte offset in a directory.
- */
-typedef	xfs_off_t	xfs_dir2_off_t;
-
-/*
- * Directory block number (logical dirblk in file)
- */
-typedef	__uint32_t	xfs_dir2_db_t;
-
-/*
- * Inode number stored as 8 8-bit values.
- */
-typedef	struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
-
-/*
- * Inode number stored as 4 8-bit values.
- * Works a lot of the time, when all the inode numbers in a directory
- * fit in 32 bits.
- */
-typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
-
-typedef union {
-	xfs_dir2_ino8_t	i8;
-	xfs_dir2_ino4_t	i4;
-} xfs_dir2_inou_t;
-#define	XFS_DIR2_MAX_SHORT_INUM	((xfs_ino_t)0xffffffffULL)
-
-/*
- * Directory layout when stored internal to an inode.
- *
- * Small directories are packed as tightly as possible so as to fit into the
- * literal area of the inode.  These "shortform" directories consist of a
- * single xfs_dir2_sf_hdr header followed by zero or more xfs_dir2_sf_entry
- * structures.  Due the different inode number storage size and the variable
- * length name field in the xfs_dir2_sf_entry all these structure are
- * variable length, and the accessors in this file should be used to iterate
- * over them.
- */
-typedef struct xfs_dir2_sf_hdr {
-	__uint8_t		count;		/* count of entries */
-	__uint8_t		i8count;	/* count of 8-byte inode #s */
-	xfs_dir2_inou_t		parent;		/* parent dir inode number */
-} __arch_pack xfs_dir2_sf_hdr_t;
-
-typedef struct xfs_dir2_sf_entry {
-	__u8			namelen;	/* actual name length */
-	xfs_dir2_sf_off_t	offset;		/* saved offset */
-	__u8			name[];		/* name, variable size */
-	/*
-	 * A xfs_dir2_ino8_t or xfs_dir2_ino4_t follows here, at a
-	 * variable offset after the name.
-	 */
-} __arch_pack xfs_dir2_sf_entry_t;
-
-static inline int xfs_dir2_sf_hdr_size(int i8count)
-{
-	return sizeof(struct xfs_dir2_sf_hdr) -
-		(i8count == 0) *
-		(sizeof(xfs_dir2_ino8_t) - sizeof(xfs_dir2_ino4_t));
-}
-
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
-{
-	return get_unaligned_be16(&sfep->offset.i);
-}
-
-static inline void
-xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
-{
-	put_unaligned_be16(off, &sfep->offset.i);
-}
-
-static inline int
-xfs_dir2_sf_entsize(struct xfs_dir2_sf_hdr *hdr, int len)
-{
-	return sizeof(struct xfs_dir2_sf_entry) +	/* namelen + offset */
-		len +					/* name */
-		(hdr->i8count ?				/* ino */
-		 sizeof(xfs_dir2_ino8_t) :
-		 sizeof(xfs_dir2_ino4_t));
-}
-
-static inline struct xfs_dir2_sf_entry *
-xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
-{
-	return (struct xfs_dir2_sf_entry *)
-		((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count));
-}
-
-static inline struct xfs_dir2_sf_entry *
-xfs_dir2_sf_nextentry(struct xfs_dir2_sf_hdr *hdr,
-		struct xfs_dir2_sf_entry *sfep)
-{
-	return (struct xfs_dir2_sf_entry *)
-		((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen));
-}
-
-
-/*
- * Data block structures.
- *
- * A pure data block looks like the following drawing on disk:
- *
- *    +-------------------------------------------------+
- *    | xfs_dir2_data_hdr_t                             |
- *    +-------------------------------------------------+
- *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
- *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
- *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
- *    | ...                                             |
- *    +-------------------------------------------------+
- *    | unused space                                    |
- *    +-------------------------------------------------+
- *
- * As all the entries are variable size structures the accessors below should
- * be used to iterate over them.
- *
- * In addition to the pure data blocks for the data and node formats,
- * most structures are also used for the combined data/freespace "block"
- * format below.
- */
-
-#define	XFS_DIR2_DATA_ALIGN_LOG	3		/* i.e., 8 bytes */
-#define	XFS_DIR2_DATA_ALIGN	(1 << XFS_DIR2_DATA_ALIGN_LOG)
-#define	XFS_DIR2_DATA_FREE_TAG	0xffff
-#define	XFS_DIR2_DATA_FD_COUNT	3
-
-/*
- * Directory address space divided into sections,
- * spaces separated by 32GB.
- */
-#define	XFS_DIR2_SPACE_SIZE	(1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
-#define	XFS_DIR2_DATA_SPACE	0
-#define	XFS_DIR2_DATA_OFFSET	(XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
-#define	XFS_DIR2_DATA_FIRSTDB(mp)	\
-	xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
-
-/*
- * Offsets of . and .. in data space (always block 0)
- */
-#define	XFS_DIR2_DATA_DOT_OFFSET	\
-	((xfs_dir2_data_aoff_t)sizeof(struct xfs_dir2_data_hdr))
-#define	XFS_DIR2_DATA_DOTDOT_OFFSET	\
-	(XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
-#define	XFS_DIR2_DATA_FIRST_OFFSET		\
-	(XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
-
-/*
- * Describe a free area in the data block.
- *
- * The freespace will be formatted as a xfs_dir2_data_unused_t.
- */
-typedef struct xfs_dir2_data_free {
-	__be16			offset;		/* start of freespace */
-	__be16			length;		/* length of freespace */
-} xfs_dir2_data_free_t;
-
-/*
- * Header for the data blocks.
- *
- * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
- */
-typedef struct xfs_dir2_data_hdr {
-	__be32			magic;		/* XFS_DIR2_DATA_MAGIC or */
-						/* XFS_DIR2_BLOCK_MAGIC */
-	xfs_dir2_data_free_t	bestfree[XFS_DIR2_DATA_FD_COUNT];
-} xfs_dir2_data_hdr_t;
-
-/*
- * Active entry in a data block.
- *
- * Aligned to 8 bytes.  After the variable length name field there is a
- * 2 byte tag field, which can be accessed using xfs_dir2_data_entry_tag_p.
- */
-typedef struct xfs_dir2_data_entry {
-	__be64			inumber;	/* inode number */
-	__u8			namelen;	/* name length */
-	__u8			name[];		/* name bytes, no null */
-     /*	__be16                  tag; */		/* starting offset of us */
-} xfs_dir2_data_entry_t;
-
-/*
- * Unused entry in a data block.
- *
- * Aligned to 8 bytes.  Tag appears as the last 2 bytes and must be accessed
- * using xfs_dir2_data_unused_tag_p.
- */
-typedef struct xfs_dir2_data_unused {
-	__be16			freetag;	/* XFS_DIR2_DATA_FREE_TAG */
-	__be16			length;		/* total free length */
-						/* variable offset */
-	__be16			tag;		/* starting offset of us */
-} xfs_dir2_data_unused_t;
-
-/*
- * Size of a data entry.
- */
-static inline int xfs_dir2_data_entsize(int n)
-{
-	return (int)roundup(offsetof(struct xfs_dir2_data_entry, name[0]) + n +
-		 (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN);
-}
-
-/*
- * Pointer to an entry's tag word.
- */
-static inline __be16 *
-xfs_dir2_data_entry_tag_p(struct xfs_dir2_data_entry *dep)
-{
-	return (__be16 *)((char *)dep +
-		xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
-}
-
-/*
- * Pointer to a freespace's tag word.
- */
-static inline __be16 *
-xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)
-{
-	return (__be16 *)((char *)dup +
-			be16_to_cpu(dup->length) - sizeof(__be16));
-}
-
-/*
- * Leaf block structures.
- *
- * A pure leaf block looks like the following drawing on disk:
- *
- *    +---------------------------+
- *    | xfs_dir2_leaf_hdr_t       |
- *    +---------------------------+
- *    | xfs_dir2_leaf_entry_t     |
- *    | xfs_dir2_leaf_entry_t     |
- *    | xfs_dir2_leaf_entry_t     |
- *    | xfs_dir2_leaf_entry_t     |
- *    | ...                       |
- *    +---------------------------+
- *    | xfs_dir2_data_off_t       |
- *    | xfs_dir2_data_off_t       |
- *    | xfs_dir2_data_off_t       |
- *    | ...                       |
- *    +---------------------------+
- *    | xfs_dir2_leaf_tail_t      |
- *    +---------------------------+
- *
- * The xfs_dir2_data_off_t members (bests) and tail are at the end of the block
- * for single-leaf (magic = XFS_DIR2_LEAF1_MAGIC) blocks only, but not present
- * for directories with separate leaf nodes and free space blocks
- * (magic = XFS_DIR2_LEAFN_MAGIC).
- *
- * As all the entries are variable size structures the accessors below should
- * be used to iterate over them.
- */
-
-/*
- * Offset of the leaf/node space.  First block in this space
- * is the btree root.
- */
-#define	XFS_DIR2_LEAF_SPACE	1
-#define	XFS_DIR2_LEAF_OFFSET	(XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
-#define	XFS_DIR2_LEAF_FIRSTDB(mp)	\
-	xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
-
-/*
- * Leaf block header.
- */
-typedef struct xfs_dir2_leaf_hdr {
-	xfs_da_blkinfo_t	info;		/* header for da routines */
-	__be16			count;		/* count of entries */
-	__be16			stale;		/* count of stale entries */
-} xfs_dir2_leaf_hdr_t;
-
-/*
- * Leaf block entry.
- */
-typedef struct xfs_dir2_leaf_entry {
-	__be32			hashval;	/* hash value of name */
-	__be32			address;	/* address of data entry */
-} xfs_dir2_leaf_entry_t;
-
-/*
- * Leaf block tail.
- */
-typedef struct xfs_dir2_leaf_tail {
-	__be32			bestcount;
-} xfs_dir2_leaf_tail_t;
-
-/*
- * Leaf block.
- */
-typedef struct xfs_dir2_leaf {
-	xfs_dir2_leaf_hdr_t	hdr;		/* leaf header */
-	xfs_dir2_leaf_entry_t	ents[];		/* entries */
-} xfs_dir2_leaf_t;
-
-/*
- * DB blocks here are logical directory block numbers, not filesystem blocks.
- */
-
-static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
-{
-	return (mp->m_dirblksize - (uint)sizeof(struct xfs_dir2_leaf_hdr)) /
-		(uint)sizeof(struct xfs_dir2_leaf_entry);
-}
-
-/*
- * Get address of the bestcount field in the single-leaf block.
- */
-static inline struct xfs_dir2_leaf_tail *
-xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp)
-{
-	return (struct xfs_dir2_leaf_tail *)
-		((char *)lp + mp->m_dirblksize -
-		  sizeof(struct xfs_dir2_leaf_tail));
-}
-
-/*
- * Get address of the bests array in the single-leaf block.
- */
-static inline __be16 *
-xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
-{
-	return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
-}
-
-/*
- * Convert dataptr to byte in file space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
-	return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG;
-}
-
-/*
- * Convert byte in file space to dataptr.  It had better be aligned.
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-	return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG);
-}
-
-/*
- * Convert byte in space to (DB) block
- */
-static inline xfs_dir2_db_t
-xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-	return (xfs_dir2_db_t)
-		(by >> (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog));
-}
-
-/*
- * Convert dataptr to a block number
- */
-static inline xfs_dir2_db_t
-xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
-	return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
-}
-
-/*
- * Convert byte in space to offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-	return (xfs_dir2_data_aoff_t)(by &
-		((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) - 1));
-}
-
-/*
- * Convert dataptr to a byte offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
-	return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
-}
-
-/*
- * Convert block and offset to byte in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
-			xfs_dir2_data_aoff_t o)
-{
-	return ((xfs_dir2_off_t)db <<
-		(mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) + o;
-}
-
-/*
- * Convert block (DB) to block (dablk)
- */
-static inline xfs_dablk_t
-xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
-	return (xfs_dablk_t)(db << mp->m_sb.sb_dirblklog);
-}
-
-/*
- * Convert byte in space to (DA) block
- */
-static inline xfs_dablk_t
-xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-	return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
-}
-
-/*
- * Convert block and offset to dataptr
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
-			   xfs_dir2_data_aoff_t o)
-{
-	return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
-}
-
-/*
- * Convert block (dablk) to block (DB)
- */
-static inline xfs_dir2_db_t
-xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
-{
-	return (xfs_dir2_db_t)(da >> mp->m_sb.sb_dirblklog);
-}
-
-/*
- * Convert block (dablk) to byte offset in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
-{
-	return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
-}
-
-/*
- * Free space block defintions for the node format.
- */
-
-/*
- * Offset of the freespace index.
- */
-#define	XFS_DIR2_FREE_SPACE	2
-#define	XFS_DIR2_FREE_OFFSET	(XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
-#define	XFS_DIR2_FREE_FIRSTDB(mp)	\
-	xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
-
-typedef	struct xfs_dir2_free_hdr {
-	__be32			magic;		/* XFS_DIR2_FREE_MAGIC */
-	__be32			firstdb;	/* db of first entry */
-	__be32			nvalid;		/* count of valid entries */
-	__be32			nused;		/* count of used entries */
-} xfs_dir2_free_hdr_t;
-
-typedef struct xfs_dir2_free {
-	xfs_dir2_free_hdr_t	hdr;		/* block header */
-	__be16			bests[];	/* best free counts */
-						/* unused entries are -1 */
-} xfs_dir2_free_t;
-
-static inline int xfs_dir2_free_max_bests(struct xfs_mount *mp)
-{
-	return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) /
-		sizeof(xfs_dir2_data_off_t);
-}
-
-/*
- * Convert data space db to the corresponding free db.
- */
-static inline xfs_dir2_db_t
-xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
-	return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp);
-}
-
-/*
- * Convert data space db to the corresponding index in a free db.
- */
-static inline int
-xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
-	return db % xfs_dir2_free_max_bests(mp);
-}
-
-/*
- * Single block format.
- *
- * The single block format looks like the following drawing on disk:
- *
- *    +-------------------------------------------------+
- *    | xfs_dir2_data_hdr_t                             |
- *    +-------------------------------------------------+
- *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
- *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
- *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t :
- *    | ...                                             |
- *    +-------------------------------------------------+
- *    | unused space                                    |
- *    +-------------------------------------------------+
- *    | ...                                             |
- *    | xfs_dir2_leaf_entry_t                           |
- *    | xfs_dir2_leaf_entry_t                           |
- *    +-------------------------------------------------+
- *    | xfs_dir2_block_tail_t                           |
- *    +-------------------------------------------------+
- *
- * As all the entries are variable size structures the accessors below should
- * be used to iterate over them.
- */
-
-typedef struct xfs_dir2_block_tail {
-	__be32		count;			/* count of leaf entries */
-	__be32		stale;			/* count of stale lf entries */
-} xfs_dir2_block_tail_t;
-
-/*
- * Pointer to the leaf header embedded in a data block (1-block format)
- */
-static inline struct xfs_dir2_block_tail *
-xfs_dir2_block_tail_p(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr)
-{
-	return ((struct xfs_dir2_block_tail *)
-		((char *)hdr + mp->m_dirblksize)) - 1;
-}
-
-/*
- * Pointer to the leaf entries embedded in a data block (1-block format)
- */
-static inline struct xfs_dir2_leaf_entry *
-xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
-{
-	return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
-}
-
-#endif /* __XFS_DIR2_FORMAT_H__ */
diff --git a/trunk/fs/xfs/xfs_dir2_leaf.c b/trunk/fs/xfs/xfs_dir2_leaf.c
index ca2386d82cdf..ae891223be90 100644
--- a/trunk/fs/xfs/xfs_dir2_leaf.c
+++ b/trunk/fs/xfs/xfs_dir2_leaf.c
@@ -24,14 +24,18 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
+#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_dir2_format.h"
-#include "xfs_dir2_priv.h"
+#include "xfs_dir2_data.h"
+#include "xfs_dir2_leaf.h"
+#include "xfs_dir2_block.h"
+#include "xfs_dir2_node.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
 
@@ -60,7 +64,7 @@ xfs_dir2_block_to_leaf(
 {
 	__be16			*bestsp;	/* leaf's bestsp entries */
 	xfs_dablk_t		blkno;		/* leaf block's bno */
-	xfs_dir2_data_hdr_t	*hdr;		/* block header */
+	xfs_dir2_block_t	*block;		/* block structure */
 	xfs_dir2_leaf_entry_t	*blp;		/* block's leaf entries */
 	xfs_dir2_block_tail_t	*btp;		/* block's tail */
 	xfs_inode_t		*dp;		/* incore directory inode */
@@ -97,9 +101,9 @@ xfs_dir2_block_to_leaf(
 	}
 	ASSERT(lbp != NULL);
 	leaf = lbp->data;
-	hdr = dbp->data;
+	block = dbp->data;
 	xfs_dir2_data_check(dp, dbp);
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(mp, block);
 	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Set the counts in the leaf header.
@@ -119,23 +123,23 @@ xfs_dir2_block_to_leaf(
 	 * tail be free.
 	 */
 	xfs_dir2_data_make_free(tp, dbp,
-		(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
-		(xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize -
+		(xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
+		(xfs_dir2_data_aoff_t)((char *)block + mp->m_dirblksize -
 				       (char *)blp),
 		&needlog, &needscan);
 	/*
 	 * Fix up the block header, make it a data block.
 	 */
-	hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
+	block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
 	if (needscan)
-		xfs_dir2_data_freescan(mp, hdr, &needlog);
+		xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
 	/*
 	 * Set up leaf tail and bests table.
 	 */
 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	ltp->bestcount = cpu_to_be32(1);
 	bestsp = xfs_dir2_leaf_bests_p(ltp);
-	bestsp[0] =  hdr->bestfree[0].length;
+	bestsp[0] =  block->hdr.bestfree[0].length;
 	/*
 	 * Log the data header and leaf bests table.
 	 */
@@ -148,131 +152,6 @@ xfs_dir2_block_to_leaf(
 	return 0;
 }
 
-STATIC void
-xfs_dir2_leaf_find_stale(
-	struct xfs_dir2_leaf	*leaf,
-	int			index,
-	int			*lowstale,
-	int			*highstale)
-{
-	/*
-	 * Find the first stale entry before our index, if any.
-	 */
-	for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
-		if (leaf->ents[*lowstale].address ==
-		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
-			break;
-	}
-
-	/*
-	 * Find the first stale entry at or after our index, if any.
-	 * Stop if the result would require moving more entries than using
-	 * lowstale.
-	 */
-	for (*highstale = index;
-	     *highstale < be16_to_cpu(leaf->hdr.count);
-	     ++*highstale) {
-		if (leaf->ents[*highstale].address ==
-		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
-			break;
-		if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
-			break;
-	}
-}
-
-struct xfs_dir2_leaf_entry *
-xfs_dir2_leaf_find_entry(
-	xfs_dir2_leaf_t		*leaf,		/* leaf structure */
-	int			index,		/* leaf table position */
-	int			compact,	/* need to compact leaves */
-	int			lowstale,	/* index of prev stale leaf */
-	int			highstale,	/* index of next stale leaf */
-	int			*lfloglow,	/* low leaf logging index */
-	int			*lfloghigh)	/* high leaf logging index */
-{
-	if (!leaf->hdr.stale) {
-		xfs_dir2_leaf_entry_t	*lep;	/* leaf entry table pointer */
-
-		/*
-		 * Now we need to make room to insert the leaf entry.
-		 *
-		 * If there are no stale entries, just insert a hole at index.
-		 */
-		lep = &leaf->ents[index];
-		if (index < be16_to_cpu(leaf->hdr.count))
-			memmove(lep + 1, lep,
-				(be16_to_cpu(leaf->hdr.count) - index) *
-				 sizeof(*lep));
-
-		/*
-		 * Record low and high logging indices for the leaf.
-		 */
-		*lfloglow = index;
-		*lfloghigh = be16_to_cpu(leaf->hdr.count);
-		be16_add_cpu(&leaf->hdr.count, 1);
-		return lep;
-	}
-
-	/*
-	 * There are stale entries.
-	 *
-	 * We will use one of them for the new entry.  It's probably not at
-	 * the right location, so we'll have to shift some up or down first.
-	 *
-	 * If we didn't compact before, we need to find the nearest stale
-	 * entries before and after our insertion point.
-	 */
-	if (compact == 0)
-		xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
-
-	/*
-	 * If the low one is better, use it.
-	 */
-	if (lowstale >= 0 &&
-	    (highstale == be16_to_cpu(leaf->hdr.count) ||
-	     index - lowstale - 1 < highstale - index)) {
-		ASSERT(index - lowstale - 1 >= 0);
-		ASSERT(leaf->ents[lowstale].address ==
-		       cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
-
-		/*
-		 * Copy entries up to cover the stale entry and make room
-		 * for the new entry.
-		 */
-		if (index - lowstale - 1 > 0) {
-			memmove(&leaf->ents[lowstale],
-				&leaf->ents[lowstale + 1],
-				(index - lowstale - 1) *
-				sizeof(xfs_dir2_leaf_entry_t));
-		}
-		*lfloglow = MIN(lowstale, *lfloglow);
-		*lfloghigh = MAX(index - 1, *lfloghigh);
-		be16_add_cpu(&leaf->hdr.stale, -1);
-		return &leaf->ents[index - 1];
-	}
-
-	/*
-	 * The high one is better, so use that one.
-	 */
-	ASSERT(highstale - index >= 0);
-	ASSERT(leaf->ents[highstale].address ==
-	       cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
-
-	/*
-	 * Copy entries down to cover the stale entry and make room for the
-	 * new entry.
-	 */
-	if (highstale - index > 0) {
-		memmove(&leaf->ents[index + 1],
-			&leaf->ents[index],
-			(highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
-	}
-	*lfloglow = MIN(index, *lfloglow);
-	*lfloghigh = MAX(highstale, *lfloghigh);
-	be16_add_cpu(&leaf->hdr.stale, -1);
-	return &leaf->ents[index];
-}
-
 /*
  * Add an entry to a leaf form directory.
  */
@@ -282,7 +161,7 @@ xfs_dir2_leaf_addname(
 {
 	__be16			*bestsp;	/* freespace table in leaf */
 	int			compact;	/* need to compact leaves */
-	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
+	xfs_dir2_data_t		*data;		/* data block structure */
 	xfs_dabuf_t		*dbp;		/* data block buffer */
 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
 	xfs_inode_t		*dp;		/* incore directory inode */
@@ -346,7 +225,7 @@ xfs_dir2_leaf_addname(
 			continue;
 		i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
 		ASSERT(i < be32_to_cpu(ltp->bestcount));
-		ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
+		ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
 		if (be16_to_cpu(bestsp[i]) >= length) {
 			use_block = i;
 			break;
@@ -360,8 +239,7 @@ xfs_dir2_leaf_addname(
 			/*
 			 * Remember a block we see that's missing.
 			 */
-			if (bestsp[i] == cpu_to_be16(NULLDATAOFF) &&
-			    use_block == -1)
+			if (be16_to_cpu(bestsp[i]) == NULLDATAOFF && use_block == -1)
 				use_block = i;
 			else if (be16_to_cpu(bestsp[i]) >= length) {
 				use_block = i;
@@ -372,17 +250,14 @@ xfs_dir2_leaf_addname(
 	/*
 	 * How many bytes do we need in the leaf block?
 	 */
-	needbytes = 0;
-	if (!leaf->hdr.stale)
-		needbytes += sizeof(xfs_dir2_leaf_entry_t);
-	if (use_block == -1)
-		needbytes += sizeof(xfs_dir2_data_off_t);
-
+	needbytes =
+		(leaf->hdr.stale ? 0 : (uint)sizeof(leaf->ents[0])) +
+		(use_block != -1 ? 0 : (uint)sizeof(leaf->bests[0]));
 	/*
 	 * Now kill use_block if it refers to a missing block, so we
 	 * can use it as an indication of allocation needed.
 	 */
-	if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF))
+	if (use_block != -1 && be16_to_cpu(bestsp[use_block]) == NULLDATAOFF)
 		use_block = -1;
 	/*
 	 * If we don't have enough free bytes but we can make enough
@@ -494,8 +369,8 @@ xfs_dir2_leaf_addname(
 		 */
 		else
 			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
-		hdr = dbp->data;
-		bestsp[use_block] = hdr->bestfree[0].length;
+		data = dbp->data;
+		bestsp[use_block] = data->hdr.bestfree[0].length;
 		grown = 1;
 	}
 	/*
@@ -509,7 +384,7 @@ xfs_dir2_leaf_addname(
 			xfs_da_brelse(tp, lbp);
 			return error;
 		}
-		hdr = dbp->data;
+		data = dbp->data;
 		grown = 0;
 	}
 	xfs_dir2_data_check(dp, dbp);
@@ -517,14 +392,14 @@ xfs_dir2_leaf_addname(
 	 * Point to the biggest freespace in our data block.
 	 */
 	dup = (xfs_dir2_data_unused_t *)
-	      ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset));
+	      ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
 	ASSERT(be16_to_cpu(dup->length) >= length);
 	needscan = needlog = 0;
 	/*
 	 * Mark the initial part of our freespace in use for the new entry.
 	 */
 	xfs_dir2_data_use_free(tp, dbp, dup,
-		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
+		(xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
 		&needlog, &needscan);
 	/*
 	 * Initialize our new entry (at last).
@@ -534,12 +409,12 @@ xfs_dir2_leaf_addname(
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, dep->namelen);
 	tagp = xfs_dir2_data_entry_tag_p(dep);
-	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
+	*tagp = cpu_to_be16((char *)dep - (char *)data);
 	/*
 	 * Need to scan fix up the bestfree table.
 	 */
 	if (needscan)
-		xfs_dir2_data_freescan(mp, hdr, &needlog);
+		xfs_dir2_data_freescan(mp, data, &needlog);
 	/*
 	 * Need to log the data block's header.
 	 */
@@ -550,15 +425,107 @@ xfs_dir2_leaf_addname(
 	 * If the bests table needs to be changed, do it.
 	 * Log the change unless we've already done that.
 	 */
-	if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(hdr->bestfree[0].length)) {
-		bestsp[use_block] = hdr->bestfree[0].length;
+	if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
+		bestsp[use_block] = data->hdr.bestfree[0].length;
 		if (!grown)
 			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
 	}
-
-	lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
-				       highstale, &lfloglow, &lfloghigh);
-
+	/*
+	 * Now we need to make room to insert the leaf entry.
+	 * If there are no stale entries, we just insert a hole at index.
+	 */
+	if (!leaf->hdr.stale) {
+		/*
+		 * lep is still good as the index leaf entry.
+		 */
+		if (index < be16_to_cpu(leaf->hdr.count))
+			memmove(lep + 1, lep,
+				(be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
+		/*
+		 * Record low and high logging indices for the leaf.
+		 */
+		lfloglow = index;
+		lfloghigh = be16_to_cpu(leaf->hdr.count);
+		be16_add_cpu(&leaf->hdr.count, 1);
+	}
+	/*
+	 * There are stale entries.
+	 * We will use one of them for the new entry.
+	 * It's probably not at the right location, so we'll have to
+	 * shift some up or down first.
+	 */
+	else {
+		/*
+		 * If we didn't compact before, we need to find the nearest
+		 * stale entries before and after our insertion point.
+		 */
+		if (compact == 0) {
+			/*
+			 * Find the first stale entry before the insertion
+			 * point, if any.
+			 */
+			for (lowstale = index - 1;
+			     lowstale >= 0 &&
+				be32_to_cpu(leaf->ents[lowstale].address) !=
+				XFS_DIR2_NULL_DATAPTR;
+			     lowstale--)
+				continue;
+			/*
+			 * Find the next stale entry at or after the insertion
+			 * point, if any.   Stop if we go so far that the
+			 * lowstale entry would be better.
+			 */
+			for (highstale = index;
+			     highstale < be16_to_cpu(leaf->hdr.count) &&
+				be32_to_cpu(leaf->ents[highstale].address) !=
+				XFS_DIR2_NULL_DATAPTR &&
+				(lowstale < 0 ||
+				 index - lowstale - 1 >= highstale - index);
+			     highstale++)
+				continue;
+		}
+		/*
+		 * If the low one is better, use it.
+		 */
+		if (lowstale >= 0 &&
+		    (highstale == be16_to_cpu(leaf->hdr.count) ||
+		     index - lowstale - 1 < highstale - index)) {
+			ASSERT(index - lowstale - 1 >= 0);
+			ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
+			       XFS_DIR2_NULL_DATAPTR);
+			/*
+			 * Copy entries up to cover the stale entry
+			 * and make room for the new entry.
+			 */
+			if (index - lowstale - 1 > 0)
+				memmove(&leaf->ents[lowstale],
+					&leaf->ents[lowstale + 1],
+					(index - lowstale - 1) * sizeof(*lep));
+			lep = &leaf->ents[index - 1];
+			lfloglow = MIN(lowstale, lfloglow);
+			lfloghigh = MAX(index - 1, lfloghigh);
+		}
+		/*
+		 * The high one is better, so use that one.
+		 */
+		else {
+			ASSERT(highstale - index >= 0);
+			ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
+			       XFS_DIR2_NULL_DATAPTR);
+			/*
+			 * Copy entries down to cover the stale entry
+			 * and make room for the new entry.
+			 */
+			if (highstale - index > 0)
+				memmove(&leaf->ents[index + 1],
+					&leaf->ents[index],
+					(highstale - index) * sizeof(*lep));
+			lep = &leaf->ents[index];
+			lfloglow = MIN(index, lfloglow);
+			lfloghigh = MAX(highstale, lfloghigh);
+		}
+		be16_add_cpu(&leaf->hdr.stale, -1);
+	}
 	/*
 	 * Fill in the new leaf entry.
 	 */
@@ -595,7 +562,7 @@ xfs_dir2_leaf_check(
 
 	leaf = bp->data;
 	mp = dp->i_mount;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
 	/*
 	 * This value is not restrictive enough.
 	 * Should factor in the size of the bests table as well.
@@ -615,7 +582,7 @@ xfs_dir2_leaf_check(
 		if (i + 1 < be16_to_cpu(leaf->hdr.count))
 			ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
 			       be32_to_cpu(leaf->ents[i + 1].hashval));
-		if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+		if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
 			stale++;
 	}
 	ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -644,8 +611,7 @@ xfs_dir2_leaf_compact(
 	 * Compress out the stale entries in place.
 	 */
 	for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
-		if (leaf->ents[from].address ==
-		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+		if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		/*
 		 * Only actually copy the entries that are different.
@@ -697,9 +663,24 @@ xfs_dir2_leaf_compact_x1(
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
 	index = *indexp;
-
-	xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
-
+	/*
+	 * Find the first stale entry before our index, if any.
+	 */
+	for (lowstale = index - 1;
+	     lowstale >= 0 &&
+		be32_to_cpu(leaf->ents[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
+	     lowstale--)
+		continue;
+	/*
+	 * Find the first stale entry at or after our index, if any.
+	 * Stop if the answer would be worse than lowstale.
+	 */
+	for (highstale = index;
+	     highstale < be16_to_cpu(leaf->hdr.count) &&
+		be32_to_cpu(leaf->ents[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
+		(lowstale < 0 || index - lowstale > highstale - index);
+	     highstale++)
+		continue;
 	/*
 	 * Pick the better of lowstale and highstale.
 	 */
@@ -720,8 +701,7 @@ xfs_dir2_leaf_compact_x1(
 		if (index == from)
 			newindex = to;
 		if (from != keepstale &&
-		    leaf->ents[from].address ==
-		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
+		    be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) {
 			if (from == to)
 				*lowlogp = to;
 			continue;
@@ -780,7 +760,7 @@ xfs_dir2_leaf_getdents(
 	int			byteoff;	/* offset in current block */
 	xfs_dir2_db_t		curdb;		/* db for current block */
 	xfs_dir2_off_t		curoff;		/* current overall offset */
-	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
+	xfs_dir2_data_t		*data;		/* data block structure */
 	xfs_dir2_data_entry_t	*dep;		/* data entry */
 	xfs_dir2_data_unused_t	*dup;		/* unused entry */
 	int			error = 0;	/* error return value */
@@ -1038,23 +1018,23 @@ xfs_dir2_leaf_getdents(
 			else if (curoff > newoff)
 				ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
 				       curdb);
-			hdr = bp->data;
+			data = bp->data;
 			xfs_dir2_data_check(dp, bp);
 			/*
 			 * Find our position in the block.
 			 */
-			ptr = (char *)(hdr + 1);
+			ptr = (char *)&data->u;
 			byteoff = xfs_dir2_byte_to_off(mp, curoff);
 			/*
 			 * Skip past the header.
 			 */
 			if (byteoff == 0)
-				curoff += (uint)sizeof(*hdr);
+				curoff += (uint)sizeof(data->hdr);
 			/*
 			 * Skip past entries until we reach our offset.
 			 */
 			else {
-				while ((char *)ptr - (char *)hdr < byteoff) {
+				while ((char *)ptr - (char *)data < byteoff) {
 					dup = (xfs_dir2_data_unused_t *)ptr;
 
 					if (be16_to_cpu(dup->freetag)
@@ -1075,8 +1055,8 @@ xfs_dir2_leaf_getdents(
 				curoff =
 					xfs_dir2_db_off_to_byte(mp,
 					    xfs_dir2_byte_to_db(mp, curoff),
-					    (char *)ptr - (char *)hdr);
-				if (ptr >= (char *)hdr + mp->m_dirblksize) {
+					    (char *)ptr - (char *)data);
+				if (ptr >= (char *)data + mp->m_dirblksize) {
 					continue;
 				}
 			}
@@ -1199,7 +1179,7 @@ xfs_dir2_leaf_log_bests(
 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
 
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
 	ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
 	firstb = xfs_dir2_leaf_bests_p(ltp) + first;
 	lastb = xfs_dir2_leaf_bests_p(ltp) + last;
@@ -1222,8 +1202,8 @@ xfs_dir2_leaf_log_ents(
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
-	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
+	       be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	firstlep = &leaf->ents[first];
 	lastlep = &leaf->ents[last];
 	xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
@@ -1241,8 +1221,8 @@ xfs_dir2_leaf_log_header(
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
-	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
+	       be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
 		(uint)(sizeof(leaf->hdr) - 1));
 }
@@ -1261,7 +1241,7 @@ xfs_dir2_leaf_log_tail(
 
 	mp = tp->t_mountp;
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
 		(uint)(mp->m_dirblksize - 1));
@@ -1457,7 +1437,7 @@ xfs_dir2_leaf_removename(
 	xfs_da_args_t		*args)		/* operation arguments */
 {
 	__be16			*bestsp;	/* leaf block best freespace */
-	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
+	xfs_dir2_data_t		*data;		/* data block structure */
 	xfs_dir2_db_t		db;		/* data block number */
 	xfs_dabuf_t		*dbp;		/* data block buffer */
 	xfs_dir2_data_entry_t	*dep;		/* data entry structure */
@@ -1487,7 +1467,7 @@ xfs_dir2_leaf_removename(
 	tp = args->trans;
 	mp = dp->i_mount;
 	leaf = lbp->data;
-	hdr = dbp->data;
+	data = dbp->data;
 	xfs_dir2_data_check(dp, dbp);
 	/*
 	 * Point to the leaf entry, use that to point to the data entry.
@@ -1495,9 +1475,9 @@ xfs_dir2_leaf_removename(
 	lep = &leaf->ents[index];
 	db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+	      ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
 	needscan = needlog = 0;
-	oldbest = be16_to_cpu(hdr->bestfree[0].length);
+	oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	bestsp = xfs_dir2_leaf_bests_p(ltp);
 	ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
@@ -1505,7 +1485,7 @@ xfs_dir2_leaf_removename(
 	 * Mark the former data entry unused.
 	 */
 	xfs_dir2_data_make_free(tp, dbp,
-		(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
+		(xfs_dir2_data_aoff_t)((char *)dep - (char *)data),
 		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
 	/*
 	 * We just mark the leaf entry stale by putting a null in it.
@@ -1519,23 +1499,23 @@ xfs_dir2_leaf_removename(
 	 * log the data block header if necessary.
 	 */
 	if (needscan)
-		xfs_dir2_data_freescan(mp, hdr, &needlog);
+		xfs_dir2_data_freescan(mp, data, &needlog);
 	if (needlog)
 		xfs_dir2_data_log_header(tp, dbp);
 	/*
 	 * If the longest freespace in the data block has changed,
 	 * put the new value in the bests table and log that.
 	 */
-	if (be16_to_cpu(hdr->bestfree[0].length) != oldbest) {
-		bestsp[db] = hdr->bestfree[0].length;
+	if (be16_to_cpu(data->hdr.bestfree[0].length) != oldbest) {
+		bestsp[db] = data->hdr.bestfree[0].length;
 		xfs_dir2_leaf_log_bests(tp, lbp, db, db);
 	}
 	xfs_dir2_data_check(dp, dbp);
 	/*
 	 * If the data block is now empty then get rid of the data block.
 	 */
-	if (be16_to_cpu(hdr->bestfree[0].length) ==
-	    mp->m_dirblksize - (uint)sizeof(*hdr)) {
+	if (be16_to_cpu(data->hdr.bestfree[0].length) ==
+	    mp->m_dirblksize - (uint)sizeof(data->hdr)) {
 		ASSERT(db != mp->m_dirdatablk);
 		if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
 			/*
@@ -1562,7 +1542,7 @@ xfs_dir2_leaf_removename(
 			 * Look for the last active entry (i).
 			 */
 			for (i = db - 1; i > 0; i--) {
-				if (bestsp[i] != cpu_to_be16(NULLDATAOFF))
+				if (be16_to_cpu(bestsp[i]) != NULLDATAOFF)
 					break;
 			}
 			/*
@@ -1706,6 +1686,9 @@ xfs_dir2_leaf_trim_data(
 	xfs_dir2_db_t		db)		/* data block number */
 {
 	__be16			*bestsp;	/* leaf bests table */
+#ifdef DEBUG
+	xfs_dir2_data_t		*data;		/* data block structure */
+#endif
 	xfs_dabuf_t		*dbp;		/* data block buffer */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return value */
@@ -1724,21 +1707,20 @@ xfs_dir2_leaf_trim_data(
 			XFS_DATA_FORK))) {
 		return error;
 	}
+#ifdef DEBUG
+	data = dbp->data;
+	ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
+#endif
+	/* this seems to be an error
+	 * data is only valid if DEBUG is defined?
+	 * RMC 09/08/1999
+	 */
 
 	leaf = lbp->data;
 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
-
-#ifdef DEBUG
-{
-	struct xfs_dir2_data_hdr *hdr = dbp->data;
-
-	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
-	ASSERT(be16_to_cpu(hdr->bestfree[0].length) ==
-	       mp->m_dirblksize - (uint)sizeof(*hdr));
+	ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) ==
+	       mp->m_dirblksize - (uint)sizeof(data->hdr));
 	ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
-}
-#endif
-
 	/*
 	 * Get rid of the data block.
 	 */
@@ -1758,20 +1740,6 @@ xfs_dir2_leaf_trim_data(
 	return 0;
 }
 
-static inline size_t
-xfs_dir2_leaf_size(
-	struct xfs_dir2_leaf_hdr	*hdr,
-	int				counts)
-{
-	int			entries;
-
-	entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale);
-	return sizeof(xfs_dir2_leaf_hdr_t) +
-	    entries * sizeof(xfs_dir2_leaf_entry_t) +
-	    counts * sizeof(xfs_dir2_data_off_t) +
-	    sizeof(xfs_dir2_leaf_tail_t);
-}
-
 /*
  * Convert node form directory to leaf form directory.
  * The root of the node form dir needs to already be a LEAFN block.
@@ -1842,7 +1810,7 @@ xfs_dir2_node_to_leaf(
 		return 0;
 	lbp = state->path.blk[0].bp;
 	leaf = lbp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	/*
 	 * Read the freespace block.
 	 */
@@ -1851,19 +1819,20 @@ xfs_dir2_node_to_leaf(
 		return error;
 	}
 	free = fbp->data;
-	ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
+	ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	ASSERT(!free->hdr.firstdb);
-
 	/*
 	 * Now see if the leafn and free data will fit in a leaf1.
 	 * If not, release the buffer and give up.
 	 */
-	if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) >
-			mp->m_dirblksize) {
+	if ((uint)sizeof(leaf->hdr) +
+	    (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)) * (uint)sizeof(leaf->ents[0]) +
+	    be32_to_cpu(free->hdr.nvalid) * (uint)sizeof(leaf->bests[0]) +
+	    (uint)sizeof(leaf->tail) >
+	    mp->m_dirblksize) {
 		xfs_da_brelse(tp, fbp);
 		return 0;
 	}
-
 	/*
 	 * If the leaf has any stale entries in it, compress them out.
 	 * The compact routine will log the header.
@@ -1882,7 +1851,7 @@ xfs_dir2_node_to_leaf(
 	 * Set up the leaf bests table.
 	 */
 	memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests,
-		be32_to_cpu(ltp->bestcount) * sizeof(xfs_dir2_data_off_t));
+		be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0]));
 	xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
 	xfs_dir2_leaf_log_tail(tp, lbp);
 	xfs_dir2_leaf_check(dp, lbp);
diff --git a/trunk/fs/xfs/xfs_dir2_leaf.h b/trunk/fs/xfs/xfs_dir2_leaf.h
new file mode 100644
index 000000000000..6c9539f06987
--- /dev/null
+++ b/trunk/fs/xfs/xfs_dir2_leaf.h
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DIR2_LEAF_H__
+#define	__XFS_DIR2_LEAF_H__
+
+struct uio;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * Offset of the leaf/node space.  First block in this space
+ * is the btree root.
+ */
+#define	XFS_DIR2_LEAF_SPACE	1
+#define	XFS_DIR2_LEAF_OFFSET	(XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
+#define	XFS_DIR2_LEAF_FIRSTDB(mp)	\
+	xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
+
+/*
+ * Offset in data space of a data entry.
+ */
+typedef	__uint32_t	xfs_dir2_dataptr_t;
+#define	XFS_DIR2_MAX_DATAPTR	((xfs_dir2_dataptr_t)0xffffffff)
+#define	XFS_DIR2_NULL_DATAPTR	((xfs_dir2_dataptr_t)0)
+
+/*
+ * Leaf block header.
+ */
+typedef struct xfs_dir2_leaf_hdr {
+	xfs_da_blkinfo_t	info;		/* header for da routines */
+	__be16			count;		/* count of entries */
+	__be16			stale;		/* count of stale entries */
+} xfs_dir2_leaf_hdr_t;
+
+/*
+ * Leaf block entry.
+ */
+typedef struct xfs_dir2_leaf_entry {
+	__be32			hashval;	/* hash value of name */
+	__be32			address;	/* address of data entry */
+} xfs_dir2_leaf_entry_t;
+
+/*
+ * Leaf block tail.
+ */
+typedef struct xfs_dir2_leaf_tail {
+	__be32			bestcount;
+} xfs_dir2_leaf_tail_t;
+
+/*
+ * Leaf block.
+ * bests and tail are at the end of the block for single-leaf only
+ * (magic = XFS_DIR2_LEAF1_MAGIC not XFS_DIR2_LEAFN_MAGIC).
+ */
+typedef struct xfs_dir2_leaf {
+	xfs_dir2_leaf_hdr_t	hdr;		/* leaf header */
+	xfs_dir2_leaf_entry_t	ents[1];	/* entries */
+						/* ... */
+	xfs_dir2_data_off_t	bests[1];	/* best free counts */
+	xfs_dir2_leaf_tail_t	tail;		/* leaf tail */
+} xfs_dir2_leaf_t;
+
+/*
+ * DB blocks here are logical directory block numbers, not filesystem blocks.
+ */
+
+static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
+{
+	return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) /
+	       (uint)sizeof(xfs_dir2_leaf_entry_t));
+}
+
+/*
+ * Get address of the bestcount field in the single-leaf block.
+ */
+static inline xfs_dir2_leaf_tail_t *
+xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
+{
+	return (xfs_dir2_leaf_tail_t *)
+		((char *)(lp) + (mp)->m_dirblksize - 
+		  (uint)sizeof(xfs_dir2_leaf_tail_t));
+}
+
+/*
+ * Get address of the bests array in the single-leaf block.
+ */
+static inline __be16 *
+xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
+{
+	return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
+}
+
+/*
+ * Convert dataptr to byte in file space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+{
+	return (xfs_dir2_off_t)(dp) << XFS_DIR2_DATA_ALIGN_LOG;
+}
+
+/*
+ * Convert byte in file space to dataptr.  It had better be aligned.
+ */
+static inline xfs_dir2_dataptr_t
+xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+	return (xfs_dir2_dataptr_t)((by) >> XFS_DIR2_DATA_ALIGN_LOG);
+}
+
+/*
+ * Convert byte in space to (DB) block
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+	return (xfs_dir2_db_t)((by) >> \
+		 ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog));
+}
+
+/*
+ * Convert dataptr to a block number
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+{
+	return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
+}
+
+/*
+ * Convert byte in space to offset in a block
+ */
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+	return (xfs_dir2_data_aoff_t)((by) & \
+		((1 << ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) - 1));
+}
+
+/*
+ * Convert dataptr to a byte offset in a block
+ */
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+{
+	return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
+}
+
+/*
+ * Convert block and offset to byte in space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
+			xfs_dir2_data_aoff_t o)
+{
+	return ((xfs_dir2_off_t)(db) << \
+		((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) + (o);
+}
+
+/*
+ * Convert block (DB) to block (dablk)
+ */
+static inline xfs_dablk_t
+xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+	return (xfs_dablk_t)((db) << (mp)->m_sb.sb_dirblklog);
+}
+
+/*
+ * Convert byte in space to (DA) block
+ */
+static inline xfs_dablk_t
+xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+	return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
+}
+
+/*
+ * Convert block and offset to dataptr
+ */
+static inline xfs_dir2_dataptr_t
+xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
+			   xfs_dir2_data_aoff_t o)
+{
+	return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
+}
+
+/*
+ * Convert block (dablk) to block (DB)
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
+{
+	return (xfs_dir2_db_t)((da) >> (mp)->m_sb.sb_dirblklog);
+}
+
+/*
+ * Convert block (dablk) to byte offset in space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
+{
+	return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
+}
+
+/*
+ * Function declarations.
+ */
+extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
+				  struct xfs_dabuf *dbp);
+extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
+extern void xfs_dir2_leaf_compact(struct xfs_da_args *args,
+				  struct xfs_dabuf *bp);
+extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
+				     int *lowstalep, int *highstalep,
+				     int *lowlogp, int *highlogp);
+extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
+				  size_t bufsize, xfs_off_t *offset,
+				  filldir_t filldir);
+extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
+			      struct xfs_dabuf **bpp, int magic);
+extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
+				   int first, int last);
+extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp,
+				     struct xfs_dabuf *bp);
+extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
+				     struct xfs_dabuf *lbp);
+extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
+				   struct xfs_dabuf *lbp, xfs_dir2_db_t db);
+extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
+
+#endif	/* __XFS_DIR2_LEAF_H__ */
diff --git a/trunk/fs/xfs/xfs_dir2_node.c b/trunk/fs/xfs/xfs_dir2_node.c
index 084b3247d636..a0aab7d3294f 100644
--- a/trunk/fs/xfs/xfs_dir2_node.c
+++ b/trunk/fs/xfs/xfs_dir2_node.c
@@ -23,14 +23,18 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
+#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_dir2_format.h"
-#include "xfs_dir2_priv.h"
+#include "xfs_dir2_data.h"
+#include "xfs_dir2_leaf.h"
+#include "xfs_dir2_block.h"
+#include "xfs_dir2_node.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
 
@@ -69,7 +73,7 @@ xfs_dir2_free_log_bests(
 	xfs_dir2_free_t		*free;		/* freespace structure */
 
 	free = bp->data;
-	ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
+	ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	xfs_da_log_buf(tp, bp,
 		(uint)((char *)&free->bests[first] - (char *)free),
 		(uint)((char *)&free->bests[last] - (char *)free +
@@ -87,7 +91,7 @@ xfs_dir2_free_log_header(
 	xfs_dir2_free_t		*free;		/* freespace structure */
 
 	free = bp->data;
-	ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
+	ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
 		(uint)(sizeof(xfs_dir2_free_hdr_t) - 1));
 }
@@ -240,13 +244,89 @@ xfs_dir2_leafn_add(
 		lfloglow = be16_to_cpu(leaf->hdr.count);
 		lfloghigh = -1;
 	}
-
+	/*
+	 * No stale entries, just insert a space for the new entry.
+	 */
+	if (!leaf->hdr.stale) {
+		lep = &leaf->ents[index];
+		if (index < be16_to_cpu(leaf->hdr.count))
+			memmove(lep + 1, lep,
+				(be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
+		lfloglow = index;
+		lfloghigh = be16_to_cpu(leaf->hdr.count);
+		be16_add_cpu(&leaf->hdr.count, 1);
+	}
+	/*
+	 * There are stale entries.  We'll use one for the new entry.
+	 */
+	else {
+		/*
+		 * If we didn't do a compact then we need to figure out
+		 * which stale entry will be used.
+		 */
+		if (compact == 0) {
+			/*
+			 * Find first stale entry before our insertion point.
+			 */
+			for (lowstale = index - 1;
+			     lowstale >= 0 &&
+				be32_to_cpu(leaf->ents[lowstale].address) !=
+				XFS_DIR2_NULL_DATAPTR;
+			     lowstale--)
+				continue;
+			/*
+			 * Find next stale entry after insertion point.
+			 * Stop looking if the answer would be worse than
+			 * lowstale already found.
+			 */
+			for (highstale = index;
+			     highstale < be16_to_cpu(leaf->hdr.count) &&
+				be32_to_cpu(leaf->ents[highstale].address) !=
+				XFS_DIR2_NULL_DATAPTR &&
+				(lowstale < 0 ||
+				 index - lowstale - 1 >= highstale - index);
+			     highstale++)
+				continue;
+		}
+		/*
+		 * Using the low stale entry.
+		 * Shift entries up toward the stale slot.
+		 */
+		if (lowstale >= 0 &&
+		    (highstale == be16_to_cpu(leaf->hdr.count) ||
+		     index - lowstale - 1 < highstale - index)) {
+			ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
+			       XFS_DIR2_NULL_DATAPTR);
+			ASSERT(index - lowstale - 1 >= 0);
+			if (index - lowstale - 1 > 0)
+				memmove(&leaf->ents[lowstale],
+					&leaf->ents[lowstale + 1],
+					(index - lowstale - 1) * sizeof(*lep));
+			lep = &leaf->ents[index - 1];
+			lfloglow = MIN(lowstale, lfloglow);
+			lfloghigh = MAX(index - 1, lfloghigh);
+		}
+		/*
+		 * Using the high stale entry.
+		 * Shift entries down toward the stale slot.
+		 */
+		else {
+			ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
+			       XFS_DIR2_NULL_DATAPTR);
+			ASSERT(highstale - index >= 0);
+			if (highstale - index > 0)
+				memmove(&leaf->ents[index + 1],
+					&leaf->ents[index],
+					(highstale - index) * sizeof(*lep));
+			lep = &leaf->ents[index];
+			lfloglow = MIN(index, lfloglow);
+			lfloghigh = MAX(highstale, lfloghigh);
+		}
+		be16_add_cpu(&leaf->hdr.stale, -1);
+	}
 	/*
 	 * Insert the new entry, log everything.
 	 */
-	lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
-				       highstale, &lfloglow, &lfloghigh);
-
 	lep->hashval = cpu_to_be32(args->hashval);
 	lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
 				args->blkno, args->index));
@@ -272,14 +352,14 @@ xfs_dir2_leafn_check(
 
 	leaf = bp->data;
 	mp = dp->i_mount;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
 	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
 		if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
 			ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
 			       be32_to_cpu(leaf->ents[i + 1].hashval));
 		}
-		if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+		if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
 			stale++;
 	}
 	ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -298,7 +378,7 @@ xfs_dir2_leafn_lasthash(
 	xfs_dir2_leaf_t	*leaf;			/* leaf structure */
 
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	if (count)
 		*count = be16_to_cpu(leaf->hdr.count);
 	if (!leaf->hdr.count)
@@ -337,7 +417,7 @@ xfs_dir2_leafn_lookup_for_addname(
 	tp = args->trans;
 	mp = dp->i_mount;
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 #ifdef __KERNEL__
 	ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
 #endif
@@ -354,7 +434,7 @@ xfs_dir2_leafn_lookup_for_addname(
 		curbp = state->extrablk.bp;
 		curfdb = state->extrablk.blkno;
 		free = curbp->data;
-		ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
+		ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	}
 	length = xfs_dir2_data_entsize(args->namelen);
 	/*
@@ -408,7 +488,7 @@ xfs_dir2_leafn_lookup_for_addname(
 				ASSERT(be32_to_cpu(free->hdr.magic) ==
 					XFS_DIR2_FREE_MAGIC);
 				ASSERT((be32_to_cpu(free->hdr.firstdb) %
-					xfs_dir2_free_max_bests(mp)) == 0);
+					XFS_DIR2_MAX_FREE_BESTS(mp)) == 0);
 				ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
 				ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) +
 					be32_to_cpu(free->hdr.nvalid));
@@ -420,8 +500,7 @@ xfs_dir2_leafn_lookup_for_addname(
 			/*
 			 * If it has room, return it.
 			 */
-			if (unlikely(free->bests[fi] ==
-			    cpu_to_be16(NULLDATAOFF))) {
+			if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
 				XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
 							XFS_ERRLEVEL_LOW, mp);
 				if (curfdb != newfdb)
@@ -482,7 +561,7 @@ xfs_dir2_leafn_lookup_for_entry(
 	tp = args->trans;
 	mp = dp->i_mount;
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 #ifdef __KERNEL__
 	ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
 #endif
@@ -663,8 +742,7 @@ xfs_dir2_leafn_moveents(
 		int	i;			/* temp leaf index */
 
 		for (i = start_s, stale = 0; i < start_s + count; i++) {
-			if (leaf_s->ents[i].address ==
-			    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+			if (be32_to_cpu(leaf_s->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
 				stale++;
 		}
 	} else
@@ -711,8 +789,8 @@ xfs_dir2_leafn_order(
 
 	leaf1 = leaf1_bp->data;
 	leaf2 = leaf2_bp->data;
-	ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
-	ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+	ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	if (be16_to_cpu(leaf1->hdr.count) > 0 &&
 	    be16_to_cpu(leaf2->hdr.count) > 0 &&
 	    (be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) ||
@@ -840,7 +918,7 @@ xfs_dir2_leafn_remove(
 	xfs_da_state_blk_t	*dblk,		/* data block */
 	int			*rval)		/* resulting block needs join */
 {
-	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
+	xfs_dir2_data_t		*data;		/* data block structure */
 	xfs_dir2_db_t		db;		/* data block number */
 	xfs_dabuf_t		*dbp;		/* data block buffer */
 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
@@ -860,7 +938,7 @@ xfs_dir2_leafn_remove(
 	tp = args->trans;
 	mp = dp->i_mount;
 	leaf = bp->data;
-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	/*
 	 * Point to the entry we're removing.
 	 */
@@ -885,9 +963,9 @@ xfs_dir2_leafn_remove(
 	 * in the data block in case it changes.
 	 */
 	dbp = dblk->bp;
-	hdr = dbp->data;
-	dep = (xfs_dir2_data_entry_t *)((char *)hdr + off);
-	longest = be16_to_cpu(hdr->bestfree[0].length);
+	data = dbp->data;
+	dep = (xfs_dir2_data_entry_t *)((char *)data + off);
+	longest = be16_to_cpu(data->hdr.bestfree[0].length);
 	needlog = needscan = 0;
 	xfs_dir2_data_make_free(tp, dbp, off,
 		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
@@ -896,7 +974,7 @@ xfs_dir2_leafn_remove(
 	 * Log the data block header if needed.
 	 */
 	if (needscan)
-		xfs_dir2_data_freescan(mp, hdr, &needlog);
+		xfs_dir2_data_freescan(mp, data, &needlog);
 	if (needlog)
 		xfs_dir2_data_log_header(tp, dbp);
 	xfs_dir2_data_check(dp, dbp);
@@ -904,7 +982,7 @@ xfs_dir2_leafn_remove(
 	 * If the longest data block freespace changes, need to update
 	 * the corresponding freeblock entry.
 	 */
-	if (longest < be16_to_cpu(hdr->bestfree[0].length)) {
+	if (longest < be16_to_cpu(data->hdr.bestfree[0].length)) {
 		int		error;		/* error return value */
 		xfs_dabuf_t	*fbp;		/* freeblock buffer */
 		xfs_dir2_db_t	fdb;		/* freeblock block number */
@@ -922,27 +1000,27 @@ xfs_dir2_leafn_remove(
 			return error;
 		}
 		free = fbp->data;
-		ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
+		ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 		ASSERT(be32_to_cpu(free->hdr.firstdb) ==
-		       xfs_dir2_free_max_bests(mp) *
+		       XFS_DIR2_MAX_FREE_BESTS(mp) *
 		       (fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
 		/*
 		 * Calculate which entry we need to fix.
 		 */
 		findex = xfs_dir2_db_to_fdindex(mp, db);
-		longest = be16_to_cpu(hdr->bestfree[0].length);
+		longest = be16_to_cpu(data->hdr.bestfree[0].length);
 		/*
 		 * If the data block is now empty we can get rid of it
 		 * (usually).
 		 */
-		if (longest == mp->m_dirblksize - (uint)sizeof(*hdr)) {
+		if (longest == mp->m_dirblksize - (uint)sizeof(data->hdr)) {
 			/*
 			 * Try to punch out the data block.
 			 */
 			error = xfs_dir2_shrink_inode(args, db, dbp);
 			if (error == 0) {
 				dblk->bp = NULL;
-				hdr = NULL;
+				data = NULL;
 			}
 			/*
 			 * We can get ENOSPC if there's no space reservation.
@@ -958,7 +1036,7 @@ xfs_dir2_leafn_remove(
 		 * If we got rid of the data block, we can eliminate that entry
 		 * in the free block.
 		 */
-		if (hdr == NULL) {
+		if (data == NULL) {
 			/*
 			 * One less used entry in the free table.
 			 */
@@ -974,8 +1052,7 @@ xfs_dir2_leafn_remove(
 				int	i;		/* free entry index */
 
 				for (i = findex - 1;
-				     i >= 0 &&
-				     free->bests[i] == cpu_to_be16(NULLDATAOFF);
+				     i >= 0 && be16_to_cpu(free->bests[i]) == NULLDATAOFF;
 				     i--)
 					continue;
 				free->hdr.nvalid = cpu_to_be32(i + 1);
@@ -1132,7 +1209,7 @@ xfs_dir2_leafn_toosmall(
 	 */
 	blk = &state->path.blk[state->path.active - 1];
 	info = blk->bp->data;
-	ASSERT(info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+	ASSERT(be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC);
 	leaf = (xfs_dir2_leaf_t *)info;
 	count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 	bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]);
@@ -1191,7 +1268,7 @@ xfs_dir2_leafn_toosmall(
 		count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 		bytes = state->blocksize - (state->blocksize >> 2);
 		leaf = bp->data;
-		ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+		ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 		count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 		bytes -= count * (uint)sizeof(leaf->ents[0]);
 		/*
@@ -1250,8 +1327,8 @@ xfs_dir2_leafn_unbalance(
 	ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
 	drop_leaf = drop_blk->bp->data;
 	save_leaf = save_blk->bp->data;
-	ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
-	ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+	ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	/*
 	 * If there are any stale leaf entries, take this opportunity
 	 * to purge them.
@@ -1355,7 +1432,7 @@ xfs_dir2_node_addname_int(
 	xfs_da_args_t		*args,		/* operation arguments */
 	xfs_da_state_blk_t	*fblk)		/* optional freespace block */
 {
-	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
+	xfs_dir2_data_t		*data;		/* data block structure */
 	xfs_dir2_db_t		dbno;		/* data block number */
 	xfs_dabuf_t		*dbp;		/* data block buffer */
 	xfs_dir2_data_entry_t	*dep;		/* data entry pointer */
@@ -1392,7 +1469,7 @@ xfs_dir2_node_addname_int(
 		 */
 		ifbno = fblk->blkno;
 		free = fbp->data;
-		ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
+		ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 		findex = fblk->index;
 		/*
 		 * This means the free entry showed that the data block had
@@ -1476,7 +1553,7 @@ xfs_dir2_node_addname_int(
 				continue;
 			}
 			free = fbp->data;
-			ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
+			ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 			findex = 0;
 		}
 		/*
@@ -1603,12 +1680,12 @@ xfs_dir2_node_addname_int(
 			free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
 			free->hdr.firstdb = cpu_to_be32(
 				(fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
-				xfs_dir2_free_max_bests(mp));
+				XFS_DIR2_MAX_FREE_BESTS(mp));
 			free->hdr.nvalid = 0;
 			free->hdr.nused = 0;
 		} else {
 			free = fbp->data;
-			ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
+			ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 		}
 
 		/*
@@ -1620,7 +1697,7 @@ xfs_dir2_node_addname_int(
 		 * freespace block, extend that table.
 		 */
 		if (findex >= be32_to_cpu(free->hdr.nvalid)) {
-			ASSERT(findex < xfs_dir2_free_max_bests(mp));
+			ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp));
 			free->hdr.nvalid = cpu_to_be32(findex + 1);
 			/*
 			 * Tag new entry so nused will go up.
@@ -1631,7 +1708,7 @@ xfs_dir2_node_addname_int(
 		 * If this entry was for an empty data block
 		 * (this should always be true) then update the header.
 		 */
-		if (free->bests[findex] == cpu_to_be16(NULLDATAOFF)) {
+		if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) {
 			be32_add_cpu(&free->hdr.nused, 1);
 			xfs_dir2_free_log_header(tp, fbp);
 		}
@@ -1640,8 +1717,8 @@ xfs_dir2_node_addname_int(
 		 * We haven't allocated the data entry yet so this will
 		 * change again.
 		 */
-		hdr = dbp->data;
-		free->bests[findex] = hdr->bestfree[0].length;
+		data = dbp->data;
+		free->bests[findex] = data->hdr.bestfree[0].length;
 		logfree = 1;
 	}
 	/*
@@ -1666,21 +1743,21 @@ xfs_dir2_node_addname_int(
 				xfs_da_buf_done(fbp);
 			return error;
 		}
-		hdr = dbp->data;
+		data = dbp->data;
 		logfree = 0;
 	}
-	ASSERT(be16_to_cpu(hdr->bestfree[0].length) >= length);
+	ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) >= length);
 	/*
 	 * Point to the existing unused space.
 	 */
 	dup = (xfs_dir2_data_unused_t *)
-	      ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset));
+	      ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
 	needscan = needlog = 0;
 	/*
 	 * Mark the first part of the unused space, inuse for us.
 	 */
 	xfs_dir2_data_use_free(tp, dbp, dup,
-		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
+		(xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
 		&needlog, &needscan);
 	/*
 	 * Fill in the new entry and log it.
@@ -1690,13 +1767,13 @@ xfs_dir2_node_addname_int(
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, dep->namelen);
 	tagp = xfs_dir2_data_entry_tag_p(dep);
-	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
+	*tagp = cpu_to_be16((char *)dep - (char *)data);
 	xfs_dir2_data_log_entry(tp, dbp, dep);
 	/*
 	 * Rescan the block for bestfree if needed.
 	 */
 	if (needscan)
-		xfs_dir2_data_freescan(mp, hdr, &needlog);
+		xfs_dir2_data_freescan(mp, data, &needlog);
 	/*
 	 * Log the data block header if needed.
 	 */
@@ -1705,8 +1782,8 @@ xfs_dir2_node_addname_int(
 	/*
 	 * If the freespace entry is now wrong, update it.
 	 */
-	if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(hdr->bestfree[0].length)) {
-		free->bests[findex] = hdr->bestfree[0].length;
+	if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
+		free->bests[findex] = data->hdr.bestfree[0].length;
 		logfree = 1;
 	}
 	/*
@@ -1856,7 +1933,7 @@ xfs_dir2_node_replace(
 	xfs_da_args_t		*args)		/* operation arguments */
 {
 	xfs_da_state_blk_t	*blk;		/* leaf block */
-	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
+	xfs_dir2_data_t		*data;		/* data block structure */
 	xfs_dir2_data_entry_t	*dep;		/* data entry changed */
 	int			error;		/* error return value */
 	int			i;		/* btree level */
@@ -1900,10 +1977,10 @@ xfs_dir2_node_replace(
 		/*
 		 * Point to the data entry.
 		 */
-		hdr = state->extrablk.bp->data;
-		ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
+		data = state->extrablk.bp->data;
+		ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
 		dep = (xfs_dir2_data_entry_t *)
-		      ((char *)hdr +
+		      ((char *)data +
 		       xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
 		ASSERT(inum != be64_to_cpu(dep->inumber));
 		/*
@@ -1967,7 +2044,7 @@ xfs_dir2_node_trim_free(
 		return 0;
 	}
 	free = bp->data;
-	ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
+	ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	/*
 	 * If there are used entries, there's nothing to do.
 	 */
diff --git a/trunk/fs/xfs/xfs_dir2_node.h b/trunk/fs/xfs/xfs_dir2_node.h
new file mode 100644
index 000000000000..82dfe7147195
--- /dev/null
+++ b/trunk/fs/xfs/xfs_dir2_node.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DIR2_NODE_H__
+#define	__XFS_DIR2_NODE_H__
+
+/*
+ * Directory version 2, btree node format structures
+ */
+
+struct uio;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_da_state;
+struct xfs_da_state_blk;
+struct xfs_inode;
+struct xfs_trans;
+
+/*
+ * Offset of the freespace index.
+ */
+#define	XFS_DIR2_FREE_SPACE	2
+#define	XFS_DIR2_FREE_OFFSET	(XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
+#define	XFS_DIR2_FREE_FIRSTDB(mp)	\
+	xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
+
+#define	XFS_DIR2_FREE_MAGIC	0x58443246	/* XD2F */
+
+typedef	struct xfs_dir2_free_hdr {
+	__be32			magic;		/* XFS_DIR2_FREE_MAGIC */
+	__be32			firstdb;	/* db of first entry */
+	__be32			nvalid;		/* count of valid entries */
+	__be32			nused;		/* count of used entries */
+} xfs_dir2_free_hdr_t;
+
+typedef struct xfs_dir2_free {
+	xfs_dir2_free_hdr_t	hdr;		/* block header */
+	__be16			bests[1];	/* best free counts */
+						/* unused entries are -1 */
+} xfs_dir2_free_t;
+
+#define	XFS_DIR2_MAX_FREE_BESTS(mp)	\
+	(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_free_hdr_t)) / \
+	 (uint)sizeof(xfs_dir2_data_off_t))
+
+/*
+ * Convert data space db to the corresponding free db.
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+	return (XFS_DIR2_FREE_FIRSTDB(mp) + (db) / XFS_DIR2_MAX_FREE_BESTS(mp));
+}
+
+/*
+ * Convert data space db to the corresponding index in a free db.
+ */
+static inline int
+xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+	return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp));
+}
+
+extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
+				 struct xfs_dabuf *lbp);
+extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
+extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
+				     struct xfs_da_args *args, int *indexp,
+				     struct xfs_da_state *state);
+extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
+				struct xfs_dabuf *leaf2_bp);
+extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
+				struct xfs_da_state_blk *oldblk,
+				struct xfs_da_state_blk *newblk);
+extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
+extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
+				     struct xfs_da_state_blk *drop_blk,
+				     struct xfs_da_state_blk *save_blk);
+extern int xfs_dir2_node_addname(struct xfs_da_args *args);
+extern int xfs_dir2_node_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_node_removename(struct xfs_da_args *args);
+extern int xfs_dir2_node_replace(struct xfs_da_args *args);
+extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
+				   int *rvalp);
+
+#endif	/* __XFS_DIR2_NODE_H__ */
diff --git a/trunk/fs/xfs/xfs_dir2_priv.h b/trunk/fs/xfs/xfs_dir2_priv.h
deleted file mode 100644
index 067f403ecf8a..000000000000
--- a/trunk/fs/xfs/xfs_dir2_priv.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR2_PRIV_H__
-#define __XFS_DIR2_PRIV_H__
-
-/* xfs_dir2.c */
-extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
-extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
-extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
-extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
-				xfs_dir2_db_t *dbp);
-extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
-				struct xfs_dabuf *bp);
-extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
-				const unsigned char *name, int len);
-
-/* xfs_dir2_block.c */
-extern int xfs_dir2_block_addname(struct xfs_da_args *args);
-extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent,
-		xfs_off_t *offset, filldir_t filldir);
-extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_block_removename(struct xfs_da_args *args);
-extern int xfs_dir2_block_replace(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
-		struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
-
-/* xfs_dir2_data.c */
-#ifdef DEBUG
-extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
-#else
-#define	xfs_dir2_data_check(dp,bp)
-#endif
-extern struct xfs_dir2_data_free *
-xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
-		struct xfs_dir2_data_unused *dup, int *loghead);
-extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
-		struct xfs_dir2_data_hdr *hdr, int *loghead);
-extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
-		struct xfs_dabuf **bpp);
-extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
-		struct xfs_dir2_data_entry *dep);
-extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
-		struct xfs_dabuf *bp);
-extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
-		struct xfs_dir2_data_unused *dup);
-extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
-		xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
-		int *needlogp, int *needscanp);
-extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
-		struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset,
-		xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
-
-/* xfs_dir2_leaf.c */
-extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
-		struct xfs_dabuf *dbp);
-extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
-extern void xfs_dir2_leaf_compact(struct xfs_da_args *args,
-		struct xfs_dabuf *bp);
-extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
-		int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
-extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
-		size_t bufsize, xfs_off_t *offset, filldir_t filldir);
-extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
-		struct xfs_dabuf **bpp, int magic);
-extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
-		int first, int last);
-extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp,
-		struct xfs_dabuf *bp);
-extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
-		struct xfs_dabuf *lbp);
-extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
-		struct xfs_dabuf *lbp, xfs_dir2_db_t db);
-extern struct xfs_dir2_leaf_entry *
-xfs_dir2_leaf_find_entry(struct xfs_dir2_leaf *leaf, int index, int compact,
-		int lowstale, int highstale,
-		int *lfloglow, int *lfloghigh);
-extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
-
-/* xfs_dir2_node.c */
-extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
-		struct xfs_dabuf *lbp);
-extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
-extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
-		struct xfs_da_args *args, int *indexp,
-		struct xfs_da_state *state);
-extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
-		struct xfs_dabuf *leaf2_bp);
-extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
-	struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk);
-extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
-extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
-		struct xfs_da_state_blk *drop_blk,
-		struct xfs_da_state_blk *save_blk);
-extern int xfs_dir2_node_addname(struct xfs_da_args *args);
-extern int xfs_dir2_node_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_node_removename(struct xfs_da_args *args);
-extern int xfs_dir2_node_replace(struct xfs_da_args *args);
-extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
-		int *rvalp);
-
-/* xfs_dir2_sf.c */
-extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp);
-extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp,
-		struct xfs_dir2_sf_entry *sfep);
-extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
-		struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp);
-extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
-		int size, xfs_dir2_sf_hdr_t *sfhp);
-extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
-extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
-extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent,
-		xfs_off_t *offset, filldir_t filldir);
-extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
-extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
-
-#endif /* __XFS_DIR2_PRIV_H__ */
diff --git a/trunk/fs/xfs/xfs_dir2_sf.c b/trunk/fs/xfs/xfs_dir2_sf.c
index 79d05e84e296..b1bae6b1eed9 100644
--- a/trunk/fs/xfs/xfs_dir2_sf.c
+++ b/trunk/fs/xfs/xfs_dir2_sf.c
@@ -23,16 +23,18 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
+#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_error.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_format.h"
-#include "xfs_dir2_priv.h"
+#include "xfs_dir2_data.h"
+#include "xfs_dir2_leaf.h"
+#include "xfs_dir2_block.h"
 #include "xfs_trace.h"
 
 /*
@@ -57,82 +59,6 @@ static void xfs_dir2_sf_toino4(xfs_da_args_t *args);
 static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
 #endif /* XFS_BIG_INUMS */
 
-/*
- * Inode numbers in short-form directories can come in two versions,
- * either 4 bytes or 8 bytes wide.  These helpers deal with the
- * two forms transparently by looking at the headers i8count field.
- *
- * For 64-bit inode number the most significant byte must be zero.
- */
-static xfs_ino_t
-xfs_dir2_sf_get_ino(
-	struct xfs_dir2_sf_hdr	*hdr,
-	xfs_dir2_inou_t		*from)
-{
-	if (hdr->i8count)
-		return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL;
-	else
-		return get_unaligned_be32(&from->i4.i);
-}
-
-static void
-xfs_dir2_sf_put_ino(
-	struct xfs_dir2_sf_hdr	*hdr,
-	xfs_dir2_inou_t		*to,
-	xfs_ino_t		ino)
-{
-	ASSERT((ino & 0xff00000000000000ULL) == 0);
-
-	if (hdr->i8count)
-		put_unaligned_be64(ino, &to->i8.i);
-	else
-		put_unaligned_be32(ino, &to->i4.i);
-}
-
-xfs_ino_t
-xfs_dir2_sf_get_parent_ino(
-	struct xfs_dir2_sf_hdr	*hdr)
-{
-	return xfs_dir2_sf_get_ino(hdr, &hdr->parent);
-}
-
-static void
-xfs_dir2_sf_put_parent_ino(
-	struct xfs_dir2_sf_hdr	*hdr,
-	xfs_ino_t		ino)
-{
-	xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino);
-}
-
-/*
- * In short-form directory entries the inode numbers are stored at variable
- * offset behind the entry name.  The inode numbers may only be accessed
- * through the helpers below.
- */
-static xfs_dir2_inou_t *
-xfs_dir2_sfe_inop(
-	struct xfs_dir2_sf_entry *sfep)
-{
-	return (xfs_dir2_inou_t *)&sfep->name[sfep->namelen];
-}
-
-xfs_ino_t
-xfs_dir2_sfe_get_ino(
-	struct xfs_dir2_sf_hdr	*hdr,
-	struct xfs_dir2_sf_entry *sfep)
-{
-	return xfs_dir2_sf_get_ino(hdr, xfs_dir2_sfe_inop(sfep));
-}
-
-static void
-xfs_dir2_sfe_put_ino(
-	struct xfs_dir2_sf_hdr	*hdr,
-	struct xfs_dir2_sf_entry *sfep,
-	xfs_ino_t		ino)
-{
-	xfs_dir2_sf_put_ino(hdr, xfs_dir2_sfe_inop(sfep), ino);
-}
-
 /*
  * Given a block directory (dp/block), calculate its size as a shortform (sf)
  * directory and a header for the sf directory, if it will fit it the
@@ -142,7 +68,7 @@ xfs_dir2_sfe_put_ino(
 int						/* size for sf form */
 xfs_dir2_block_sfsize(
 	xfs_inode_t		*dp,		/* incore inode pointer */
-	xfs_dir2_data_hdr_t	*hdr,		/* block directory data */
+	xfs_dir2_block_t	*block,		/* block directory data */
 	xfs_dir2_sf_hdr_t	*sfhp)		/* output: header for sf form */
 {
 	xfs_dir2_dataptr_t	addr;		/* data entry address */
@@ -162,7 +88,7 @@ xfs_dir2_block_sfsize(
 	mp = dp->i_mount;
 
 	count = i8count = namelen = 0;
-	btp = xfs_dir2_block_tail_p(mp, hdr);
+	btp = xfs_dir2_block_tail_p(mp, block);
 	blp = xfs_dir2_block_leaf_p(btp);
 
 	/*
@@ -175,7 +101,7 @@ xfs_dir2_block_sfsize(
 		 * Calculate the pointer to the entry at hand.
 		 */
 		dep = (xfs_dir2_data_entry_t *)
-		      ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
+		      ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
 		/*
 		 * Detect . and .., so we can special-case them.
 		 * . is not included in sf directories.
@@ -212,7 +138,7 @@ xfs_dir2_block_sfsize(
 	 */
 	sfhp->count = count;
 	sfhp->i8count = i8count;
-	xfs_dir2_sf_put_parent_ino(sfhp, parent);
+	xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent);
 	return size;
 }
 
@@ -227,7 +153,7 @@ xfs_dir2_block_to_sf(
 	int			size,		/* shortform directory size */
 	xfs_dir2_sf_hdr_t	*sfhp)		/* shortform directory hdr */
 {
-	xfs_dir2_data_hdr_t	*hdr;		/* block header */
+	xfs_dir2_block_t	*block;		/* block structure */
 	xfs_dir2_block_tail_t	*btp;		/* block tail pointer */
 	xfs_dir2_data_entry_t	*dep;		/* data entry pointer */
 	xfs_inode_t		*dp;		/* incore directory inode */
@@ -238,7 +164,8 @@ xfs_dir2_block_to_sf(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	char			*ptr;		/* current data pointer */
 	xfs_dir2_sf_entry_t	*sfep;		/* shortform entry */
-	xfs_dir2_sf_hdr_t	*sfp;		/* shortform directory header */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
+	xfs_ino_t               temp;
 
 	trace_xfs_dir2_block_to_sf(args);
 
@@ -249,14 +176,13 @@ xfs_dir2_block_to_sf(
 	 * Make a copy of the block data, so we can shrink the inode
 	 * and add local data.
 	 */
-	hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
-	memcpy(hdr, bp->data, mp->m_dirblksize);
+	block = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
+	memcpy(block, bp->data, mp->m_dirblksize);
 	logflags = XFS_ILOG_CORE;
 	if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) {
 		ASSERT(error != ENOSPC);
 		goto out;
 	}
-
 	/*
 	 * The buffer is now unconditionally gone, whether
 	 * xfs_dir2_shrink_inode worked or not.
@@ -272,14 +198,14 @@ xfs_dir2_block_to_sf(
 	/*
 	 * Copy the header into the newly allocate local space.
 	 */
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
 	dp->i_d.di_size = size;
 	/*
 	 * Set up to loop over the block's entries.
 	 */
-	btp = xfs_dir2_block_tail_p(mp, hdr);
-	ptr = (char *)(hdr + 1);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	ptr = (char *)block->u;
 	endptr = (char *)xfs_dir2_block_leaf_p(btp);
 	sfep = xfs_dir2_sf_firstentry(sfp);
 	/*
@@ -307,7 +233,7 @@ xfs_dir2_block_to_sf(
 		else if (dep->namelen == 2 &&
 			 dep->name[0] == '.' && dep->name[1] == '.')
 			ASSERT(be64_to_cpu(dep->inumber) ==
-			       xfs_dir2_sf_get_parent_ino(sfp));
+			       xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
 		/*
 		 * Normal entry, copy it into shortform.
 		 */
@@ -315,11 +241,11 @@ xfs_dir2_block_to_sf(
 			sfep->namelen = dep->namelen;
 			xfs_dir2_sf_put_offset(sfep,
 				(xfs_dir2_data_aoff_t)
-				((char *)dep - (char *)hdr));
+				((char *)dep - (char *)block));
 			memcpy(sfep->name, dep->name, dep->namelen);
-			xfs_dir2_sfe_put_ino(sfp, sfep,
-					     be64_to_cpu(dep->inumber));
-
+			temp = be64_to_cpu(dep->inumber);
+			xfs_dir2_sf_put_inumber(sfp, &temp,
+				xfs_dir2_sf_inumberp(sfep));
 			sfep = xfs_dir2_sf_nextentry(sfp, sfep);
 		}
 		ptr += xfs_dir2_data_entsize(dep->namelen);
@@ -328,7 +254,7 @@ xfs_dir2_block_to_sf(
 	xfs_dir2_sf_check(args);
 out:
 	xfs_trans_log_inode(args->trans, dp, logflags);
-	kmem_free(hdr);
+	kmem_free(block);
 	return error;
 }
 
@@ -351,7 +277,7 @@ xfs_dir2_sf_addname(
 	xfs_dir2_data_aoff_t	offset = 0;	/* offset for new entry */
 	int			old_isize;	/* di_size before adding name */
 	int			pick;		/* which algorithm to use */
-	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
 	xfs_dir2_sf_entry_t	*sfep = NULL;	/* shortform entry */
 
 	trace_xfs_dir2_sf_addname(args);
@@ -368,19 +294,19 @@ xfs_dir2_sf_addname(
 	}
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 	/*
 	 * Compute entry (and change in) size.
 	 */
-	add_entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
+	add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
 	incr_isize = add_entsize;
 	objchange = 0;
 #if XFS_BIG_INUMS
 	/*
 	 * Do we have to change to 8 byte inodes?
 	 */
-	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
+	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) {
 		/*
 		 * Yes, adjust the entry size and the total size.
 		 */
@@ -388,7 +314,7 @@ xfs_dir2_sf_addname(
 			(uint)sizeof(xfs_dir2_ino8_t) -
 			(uint)sizeof(xfs_dir2_ino4_t);
 		incr_isize +=
-			(sfp->count + 2) *
+			(sfp->hdr.count + 2) *
 			((uint)sizeof(xfs_dir2_ino8_t) -
 			 (uint)sizeof(xfs_dir2_ino4_t));
 		objchange = 1;
@@ -458,21 +384,21 @@ xfs_dir2_sf_addname_easy(
 {
 	int			byteoff;	/* byte offset in sf dir */
 	xfs_inode_t		*dp;		/* incore directory inode */
-	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
 
 	dp = args->dp;
 
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	byteoff = (int)((char *)sfep - (char *)sfp);
 	/*
 	 * Grow the in-inode space.
 	 */
-	xfs_idata_realloc(dp, xfs_dir2_sf_entsize(sfp, args->namelen),
+	xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen),
 		XFS_DATA_FORK);
 	/*
 	 * Need to set up again due to realloc of the inode data.
 	 */
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff);
 	/*
 	 * Fill in the new entry.
@@ -480,14 +406,15 @@ xfs_dir2_sf_addname_easy(
 	sfep->namelen = args->namelen;
 	xfs_dir2_sf_put_offset(sfep, offset);
 	memcpy(sfep->name, args->name, sfep->namelen);
-	xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
+	xfs_dir2_sf_put_inumber(sfp, &args->inumber,
+		xfs_dir2_sf_inumberp(sfep));
 	/*
 	 * Update the header and inode.
 	 */
-	sfp->count++;
+	sfp->hdr.count++;
 #if XFS_BIG_INUMS
 	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM)
-		sfp->i8count++;
+		sfp->hdr.i8count++;
 #endif
 	dp->i_d.di_size = new_isize;
 	xfs_dir2_sf_check(args);
@@ -517,19 +444,19 @@ xfs_dir2_sf_addname_hard(
 	xfs_dir2_data_aoff_t	offset;		/* current offset value */
 	int			old_isize;	/* previous di_size */
 	xfs_dir2_sf_entry_t	*oldsfep;	/* entry in original dir */
-	xfs_dir2_sf_hdr_t	*oldsfp;	/* original shortform dir */
+	xfs_dir2_sf_t		*oldsfp;	/* original shortform dir */
 	xfs_dir2_sf_entry_t	*sfep;		/* entry in new dir */
-	xfs_dir2_sf_hdr_t	*sfp;		/* new shortform dir */
+	xfs_dir2_sf_t		*sfp;		/* new shortform dir */
 
 	/*
 	 * Copy the old directory to the stack buffer.
 	 */
 	dp = args->dp;
 
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	old_isize = (int)dp->i_d.di_size;
 	buf = kmem_alloc(old_isize, KM_SLEEP);
-	oldsfp = (xfs_dir2_sf_hdr_t *)buf;
+	oldsfp = (xfs_dir2_sf_t *)buf;
 	memcpy(oldsfp, sfp, old_isize);
 	/*
 	 * Loop over the old directory finding the place we're going
@@ -558,7 +485,7 @@ xfs_dir2_sf_addname_hard(
 	/*
 	 * Reset the pointer since the buffer was reallocated.
 	 */
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	/*
 	 * Copy the first part of the directory, including the header.
 	 */
@@ -571,11 +498,12 @@ xfs_dir2_sf_addname_hard(
 	sfep->namelen = args->namelen;
 	xfs_dir2_sf_put_offset(sfep, offset);
 	memcpy(sfep->name, args->name, sfep->namelen);
-	xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
-	sfp->count++;
+	xfs_dir2_sf_put_inumber(sfp, &args->inumber,
+		xfs_dir2_sf_inumberp(sfep));
+	sfp->hdr.count++;
 #if XFS_BIG_INUMS
 	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
-		sfp->i8count++;
+		sfp->hdr.i8count++;
 #endif
 	/*
 	 * If there's more left to copy, do that.
@@ -609,14 +537,14 @@ xfs_dir2_sf_addname_pick(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	xfs_dir2_data_aoff_t	offset;		/* data block offset */
 	xfs_dir2_sf_entry_t	*sfep;		/* shortform entry */
-	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
 	int			size;		/* entry's data size */
 	int			used;		/* data bytes used */
 
 	dp = args->dp;
 	mp = dp->i_mount;
 
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	size = xfs_dir2_data_entsize(args->namelen);
 	offset = XFS_DIR2_DATA_FIRST_OFFSET;
 	sfep = xfs_dir2_sf_firstentry(sfp);
@@ -626,7 +554,7 @@ xfs_dir2_sf_addname_pick(
 	 * Keep track of data offset and whether we've seen a place
 	 * to insert the new entry.
 	 */
-	for (i = 0; i < sfp->count; i++) {
+	for (i = 0; i < sfp->hdr.count; i++) {
 		if (!holefit)
 			holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
 		offset = xfs_dir2_sf_get_offset(sfep) +
@@ -638,7 +566,7 @@ xfs_dir2_sf_addname_pick(
 	 * was a data block (block form directory).
 	 */
 	used = offset +
-	       (sfp->count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+	       (sfp->hdr.count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
 	       (uint)sizeof(xfs_dir2_block_tail_t);
 	/*
 	 * If it won't fit in a block form then we can't insert it,
@@ -684,30 +612,30 @@ xfs_dir2_sf_check(
 	xfs_ino_t		ino;		/* entry inode number */
 	int			offset;		/* data offset */
 	xfs_dir2_sf_entry_t	*sfep;		/* shortform dir entry */
-	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
 
 	dp = args->dp;
 
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	offset = XFS_DIR2_DATA_FIRST_OFFSET;
-	ino = xfs_dir2_sf_get_parent_ino(sfp);
+	ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
 	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
 
 	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
-	     i < sfp->count;
+	     i < sfp->hdr.count;
 	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
 		ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
-		ino = xfs_dir2_sfe_get_ino(sfp, sfep);
+		ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
 		i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
 		offset =
 			xfs_dir2_sf_get_offset(sfep) +
 			xfs_dir2_data_entsize(sfep->namelen);
 	}
-	ASSERT(i8count == sfp->i8count);
+	ASSERT(i8count == sfp->hdr.i8count);
 	ASSERT(XFS_BIG_INUMS || i8count == 0);
 	ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
 	ASSERT(offset +
-	       (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+	       (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
 	       (uint)sizeof(xfs_dir2_block_tail_t) <=
 	       dp->i_mount->m_dirblksize);
 }
@@ -723,7 +651,7 @@ xfs_dir2_sf_create(
 {
 	xfs_inode_t	*dp;		/* incore directory inode */
 	int		i8count;	/* parent inode is an 8-byte number */
-	xfs_dir2_sf_hdr_t *sfp;		/* shortform structure */
+	xfs_dir2_sf_t	*sfp;		/* shortform structure */
 	int		size;		/* directory size */
 
 	trace_xfs_dir2_sf_create(args);
@@ -753,13 +681,13 @@ xfs_dir2_sf_create(
 	/*
 	 * Fill in the header,
 	 */
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-	sfp->i8count = i8count;
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	sfp->hdr.i8count = i8count;
 	/*
 	 * Now can put in the inode number, since i8count is set.
 	 */
-	xfs_dir2_sf_put_parent_ino(sfp, pino);
-	sfp->count = 0;
+	xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent);
+	sfp->hdr.count = 0;
 	dp->i_d.di_size = size;
 	xfs_dir2_sf_check(args);
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
@@ -777,7 +705,7 @@ xfs_dir2_sf_getdents(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	xfs_dir2_dataptr_t	off;		/* current entry's offset */
 	xfs_dir2_sf_entry_t	*sfep;		/* shortform directory entry */
-	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
 	xfs_dir2_dataptr_t	dot_offset;
 	xfs_dir2_dataptr_t	dotdot_offset;
 	xfs_ino_t		ino;
@@ -796,9 +724,9 @@ xfs_dir2_sf_getdents(
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 
-	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 
 	/*
 	 * If the block number in the offset is out of range, we're done.
@@ -831,7 +759,7 @@ xfs_dir2_sf_getdents(
 	 * Put .. entry unless we're starting past it.
 	 */
 	if (*offset <= dotdot_offset) {
-		ino = xfs_dir2_sf_get_parent_ino(sfp);
+		ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
 		if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) {
 			*offset = dotdot_offset & 0x7fffffff;
 			return 0;
@@ -842,7 +770,7 @@ xfs_dir2_sf_getdents(
 	 * Loop while there are more entries and put'ing works.
 	 */
 	sfep = xfs_dir2_sf_firstentry(sfp);
-	for (i = 0; i < sfp->count; i++) {
+	for (i = 0; i < sfp->hdr.count; i++) {
 		off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 				xfs_dir2_sf_get_offset(sfep));
 
@@ -851,7 +779,7 @@ xfs_dir2_sf_getdents(
 			continue;
 		}
 
-		ino = xfs_dir2_sfe_get_ino(sfp, sfep);
+		ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
 		if (filldir(dirent, (char *)sfep->name, sfep->namelen,
 			    off & 0x7fffffff, ino, DT_UNKNOWN)) {
 			*offset = off & 0x7fffffff;
@@ -877,7 +805,7 @@ xfs_dir2_sf_lookup(
 	int			i;		/* entry index */
 	int			error;
 	xfs_dir2_sf_entry_t	*sfep;		/* shortform directory entry */
-	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
 	enum xfs_dacmp		cmp;		/* comparison result */
 	xfs_dir2_sf_entry_t	*ci_sfep;	/* case-insens. entry */
 
@@ -896,8 +824,8 @@ xfs_dir2_sf_lookup(
 	}
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 	/*
 	 * Special case for .
 	 */
@@ -911,7 +839,7 @@ xfs_dir2_sf_lookup(
 	 */
 	if (args->namelen == 2 &&
 	    args->name[0] == '.' && args->name[1] == '.') {
-		args->inumber = xfs_dir2_sf_get_parent_ino(sfp);
+		args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
 		args->cmpresult = XFS_CMP_EXACT;
 		return XFS_ERROR(EEXIST);
 	}
@@ -919,7 +847,7 @@ xfs_dir2_sf_lookup(
 	 * Loop over all the entries trying to match ours.
 	 */
 	ci_sfep = NULL;
-	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
 				i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
 		/*
 		 * Compare name and if it's an exact match, return the inode
@@ -930,7 +858,8 @@ xfs_dir2_sf_lookup(
 								sfep->namelen);
 		if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
 			args->cmpresult = cmp;
-			args->inumber = xfs_dir2_sfe_get_ino(sfp, sfep);
+			args->inumber = xfs_dir2_sf_get_inumber(sfp,
+						xfs_dir2_sf_inumberp(sfep));
 			if (cmp == XFS_CMP_EXACT)
 				return XFS_ERROR(EEXIST);
 			ci_sfep = sfep;
@@ -962,7 +891,7 @@ xfs_dir2_sf_removename(
 	int			newsize;	/* new inode size */
 	int			oldsize;	/* old inode size */
 	xfs_dir2_sf_entry_t	*sfep;		/* shortform directory entry */
-	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
 
 	trace_xfs_dir2_sf_removename(args);
 
@@ -979,31 +908,32 @@ xfs_dir2_sf_removename(
 	}
 	ASSERT(dp->i_df.if_bytes == oldsize);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-	ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count));
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 	/*
 	 * Loop over the old directory entries.
 	 * Find the one we're deleting.
 	 */
-	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
 				i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
 		if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
 								XFS_CMP_EXACT) {
-			ASSERT(xfs_dir2_sfe_get_ino(sfp, sfep) ==
-			       args->inumber);
+			ASSERT(xfs_dir2_sf_get_inumber(sfp,
+						xfs_dir2_sf_inumberp(sfep)) ==
+								args->inumber);
 			break;
 		}
 	}
 	/*
 	 * Didn't find it.
 	 */
-	if (i == sfp->count)
+	if (i == sfp->hdr.count)
 		return XFS_ERROR(ENOENT);
 	/*
 	 * Calculate sizes.
 	 */
 	byteoff = (int)((char *)sfep - (char *)sfp);
-	entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
+	entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
 	newsize = oldsize - entsize;
 	/*
 	 * Copy the part if any after the removed entry, sliding it down.
@@ -1014,22 +944,22 @@ xfs_dir2_sf_removename(
 	/*
 	 * Fix up the header and file size.
 	 */
-	sfp->count--;
+	sfp->hdr.count--;
 	dp->i_d.di_size = newsize;
 	/*
 	 * Reallocate, making it smaller.
 	 */
 	xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK);
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 #if XFS_BIG_INUMS
 	/*
 	 * Are we changing inode number size?
 	 */
 	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) {
-		if (sfp->i8count == 1)
+		if (sfp->hdr.i8count == 1)
 			xfs_dir2_sf_toino4(args);
 		else
-			sfp->i8count--;
+			sfp->hdr.i8count--;
 	}
 #endif
 	xfs_dir2_sf_check(args);
@@ -1053,7 +983,7 @@ xfs_dir2_sf_replace(
 	int			i8elevated;	/* sf_toino8 set i8count=1 */
 #endif
 	xfs_dir2_sf_entry_t	*sfep;		/* shortform directory entry */
-	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
 
 	trace_xfs_dir2_sf_replace(args);
 
@@ -1069,19 +999,19 @@ xfs_dir2_sf_replace(
 	}
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 #if XFS_BIG_INUMS
 	/*
 	 * New inode number is large, and need to convert to 8-byte inodes.
 	 */
-	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
+	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) {
 		int	error;			/* error return value */
 		int	newsize;		/* new inode size */
 
 		newsize =
 			dp->i_df.if_bytes +
-			(sfp->count + 1) *
+			(sfp->hdr.count + 1) *
 			((uint)sizeof(xfs_dir2_ino8_t) -
 			 (uint)sizeof(xfs_dir2_ino4_t));
 		/*
@@ -1099,7 +1029,7 @@ xfs_dir2_sf_replace(
 		 */
 		xfs_dir2_sf_toino8(args);
 		i8elevated = 1;
-		sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+		sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	} else
 		i8elevated = 0;
 #endif
@@ -1110,32 +1040,34 @@ xfs_dir2_sf_replace(
 	if (args->namelen == 2 &&
 	    args->name[0] == '.' && args->name[1] == '.') {
 #if XFS_BIG_INUMS || defined(DEBUG)
-		ino = xfs_dir2_sf_get_parent_ino(sfp);
+		ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
 		ASSERT(args->inumber != ino);
 #endif
-		xfs_dir2_sf_put_parent_ino(sfp, args->inumber);
+		xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent);
 	}
 	/*
 	 * Normal entry, look for the name.
 	 */
 	else {
 		for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
-				i < sfp->count;
+				i < sfp->hdr.count;
 				i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
 			if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
 								XFS_CMP_EXACT) {
 #if XFS_BIG_INUMS || defined(DEBUG)
-				ino = xfs_dir2_sfe_get_ino(sfp, sfep);
+				ino = xfs_dir2_sf_get_inumber(sfp,
+					xfs_dir2_sf_inumberp(sfep));
 				ASSERT(args->inumber != ino);
 #endif
-				xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
+				xfs_dir2_sf_put_inumber(sfp, &args->inumber,
+					xfs_dir2_sf_inumberp(sfep));
 				break;
 			}
 		}
 		/*
 		 * Didn't find it.
 		 */
-		if (i == sfp->count) {
+		if (i == sfp->hdr.count) {
 			ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
 #if XFS_BIG_INUMS
 			if (i8elevated)
@@ -1153,10 +1085,10 @@ xfs_dir2_sf_replace(
 		/*
 		 * And the old count was one, so need to convert to small.
 		 */
-		if (sfp->i8count == 1)
+		if (sfp->hdr.i8count == 1)
 			xfs_dir2_sf_toino4(args);
 		else
-			sfp->i8count--;
+			sfp->hdr.i8count--;
 	}
 	/*
 	 * See if the old number was small, the new number is large.
@@ -1167,9 +1099,9 @@ xfs_dir2_sf_replace(
 		 * add to the i8count unless we just converted to 8-byte
 		 * inodes (which does an implied i8count = 1)
 		 */
-		ASSERT(sfp->i8count != 0);
+		ASSERT(sfp->hdr.i8count != 0);
 		if (!i8elevated)
-			sfp->i8count++;
+			sfp->hdr.i8count++;
 	}
 #endif
 	xfs_dir2_sf_check(args);
@@ -1189,12 +1121,13 @@ xfs_dir2_sf_toino4(
 	char			*buf;		/* old dir's buffer */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			i;		/* entry index */
+	xfs_ino_t		ino;		/* entry inode number */
 	int			newsize;	/* new inode size */
 	xfs_dir2_sf_entry_t	*oldsfep;	/* old sf entry */
-	xfs_dir2_sf_hdr_t	*oldsfp;	/* old sf directory */
+	xfs_dir2_sf_t		*oldsfp;	/* old sf directory */
 	int			oldsize;	/* old inode size */
 	xfs_dir2_sf_entry_t	*sfep;		/* new sf entry */
-	xfs_dir2_sf_hdr_t	*sfp;		/* new sf directory */
+	xfs_dir2_sf_t		*sfp;		/* new sf directory */
 
 	trace_xfs_dir2_sf_toino4(args);
 
@@ -1207,42 +1140,44 @@ xfs_dir2_sf_toino4(
 	 */
 	oldsize = dp->i_df.if_bytes;
 	buf = kmem_alloc(oldsize, KM_SLEEP);
-	oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-	ASSERT(oldsfp->i8count == 1);
+	oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	ASSERT(oldsfp->hdr.i8count == 1);
 	memcpy(buf, oldsfp, oldsize);
 	/*
 	 * Compute the new inode size.
 	 */
 	newsize =
 		oldsize -
-		(oldsfp->count + 1) *
+		(oldsfp->hdr.count + 1) *
 		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
 	xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
 	xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
 	/*
 	 * Reset our pointers, the data has moved.
 	 */
-	oldsfp = (xfs_dir2_sf_hdr_t *)buf;
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+	oldsfp = (xfs_dir2_sf_t *)buf;
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	/*
 	 * Fill in the new header.
 	 */
-	sfp->count = oldsfp->count;
-	sfp->i8count = 0;
-	xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
+	sfp->hdr.count = oldsfp->hdr.count;
+	sfp->hdr.i8count = 0;
+	ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
+	xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
 	/*
 	 * Copy the entries field by field.
 	 */
 	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
 		    oldsfep = xfs_dir2_sf_firstentry(oldsfp);
-	     i < sfp->count;
+	     i < sfp->hdr.count;
 	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
 		  oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
 		sfep->namelen = oldsfep->namelen;
 		sfep->offset = oldsfep->offset;
 		memcpy(sfep->name, oldsfep->name, sfep->namelen);
-		xfs_dir2_sfe_put_ino(sfp, sfep,
-			xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
+		ino = xfs_dir2_sf_get_inumber(oldsfp,
+			xfs_dir2_sf_inumberp(oldsfep));
+		xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
 	}
 	/*
 	 * Clean up the inode.
@@ -1264,12 +1199,13 @@ xfs_dir2_sf_toino8(
 	char			*buf;		/* old dir's buffer */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			i;		/* entry index */
+	xfs_ino_t		ino;		/* entry inode number */
 	int			newsize;	/* new inode size */
 	xfs_dir2_sf_entry_t	*oldsfep;	/* old sf entry */
-	xfs_dir2_sf_hdr_t	*oldsfp;	/* old sf directory */
+	xfs_dir2_sf_t		*oldsfp;	/* old sf directory */
 	int			oldsize;	/* old inode size */
 	xfs_dir2_sf_entry_t	*sfep;		/* new sf entry */
-	xfs_dir2_sf_hdr_t	*sfp;		/* new sf directory */
+	xfs_dir2_sf_t		*sfp;		/* new sf directory */
 
 	trace_xfs_dir2_sf_toino8(args);
 
@@ -1282,42 +1218,44 @@ xfs_dir2_sf_toino8(
 	 */
 	oldsize = dp->i_df.if_bytes;
 	buf = kmem_alloc(oldsize, KM_SLEEP);
-	oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-	ASSERT(oldsfp->i8count == 0);
+	oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	ASSERT(oldsfp->hdr.i8count == 0);
 	memcpy(buf, oldsfp, oldsize);
 	/*
 	 * Compute the new inode size.
 	 */
 	newsize =
 		oldsize +
-		(oldsfp->count + 1) *
+		(oldsfp->hdr.count + 1) *
 		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
 	xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
 	xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
 	/*
 	 * Reset our pointers, the data has moved.
 	 */
-	oldsfp = (xfs_dir2_sf_hdr_t *)buf;
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+	oldsfp = (xfs_dir2_sf_t *)buf;
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	/*
 	 * Fill in the new header.
 	 */
-	sfp->count = oldsfp->count;
-	sfp->i8count = 1;
-	xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
+	sfp->hdr.count = oldsfp->hdr.count;
+	sfp->hdr.i8count = 1;
+	ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
+	xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
 	/*
 	 * Copy the entries field by field.
 	 */
 	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
 		    oldsfep = xfs_dir2_sf_firstentry(oldsfp);
-	     i < sfp->count;
+	     i < sfp->hdr.count;
 	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
 		  oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
 		sfep->namelen = oldsfep->namelen;
 		sfep->offset = oldsfep->offset;
 		memcpy(sfep->name, oldsfep->name, sfep->namelen);
-		xfs_dir2_sfe_put_ino(sfp, sfep,
-			xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
+		ino = xfs_dir2_sf_get_inumber(oldsfp,
+			xfs_dir2_sf_inumberp(oldsfep));
+		xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
 	}
 	/*
 	 * Clean up the inode.
diff --git a/trunk/fs/xfs/xfs_dir2_sf.h b/trunk/fs/xfs/xfs_dir2_sf.h
new file mode 100644
index 000000000000..6ac44b550d39
--- /dev/null
+++ b/trunk/fs/xfs/xfs_dir2_sf.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DIR2_SF_H__
+#define	__XFS_DIR2_SF_H__
+
+/*
+ * Directory layout when stored internal to an inode.
+ *
+ * Small directories are packed as tightly as possible so as to
+ * fit into the literal area of the inode.
+ */
+
+struct uio;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_dir2_block;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * Inode number stored as 8 8-bit values.
+ */
+typedef	struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
+
+/*
+ * Inode number stored as 4 8-bit values.
+ * Works a lot of the time, when all the inode numbers in a directory
+ * fit in 32 bits.
+ */
+typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
+
+typedef union {
+	xfs_dir2_ino8_t	i8;
+	xfs_dir2_ino4_t	i4;
+} xfs_dir2_inou_t;
+#define	XFS_DIR2_MAX_SHORT_INUM	((xfs_ino_t)0xffffffffULL)
+
+/*
+ * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
+ * Only need 16 bits, this is the byte offset into the single block form.
+ */
+typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
+
+/*
+ * The parent directory has a dedicated field, and the self-pointer must
+ * be calculated on the fly.
+ *
+ * Entries are packed toward the top as tightly as possible.  The header
+ * and the elements must be memcpy'd out into a work area to get correct
+ * alignment for the inode number fields.
+ */
+typedef struct xfs_dir2_sf_hdr {
+	__uint8_t		count;		/* count of entries */
+	__uint8_t		i8count;	/* count of 8-byte inode #s */
+	xfs_dir2_inou_t		parent;		/* parent dir inode number */
+} __arch_pack xfs_dir2_sf_hdr_t;
+
+typedef struct xfs_dir2_sf_entry {
+	__uint8_t		namelen;	/* actual name length */
+	xfs_dir2_sf_off_t	offset;		/* saved offset */
+	__uint8_t		name[1];	/* name, variable size */
+	xfs_dir2_inou_t		inumber;	/* inode number, var. offset */
+} __arch_pack xfs_dir2_sf_entry_t; 
+
+typedef struct xfs_dir2_sf {
+	xfs_dir2_sf_hdr_t	hdr;		/* shortform header */
+	xfs_dir2_sf_entry_t	list[1];	/* shortform entries */
+} xfs_dir2_sf_t;
+
+static inline int xfs_dir2_sf_hdr_size(int i8count)
+{
+	return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \
+		((i8count) == 0) * \
+		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
+}
+
+static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep)
+{
+	return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen];
+}
+
+static inline xfs_intino_t
+xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
+{
+	return ((sfp)->hdr.i8count == 0 ? \
+		(xfs_intino_t)XFS_GET_DIR_INO4((from)->i4) : \
+		(xfs_intino_t)XFS_GET_DIR_INO8((from)->i8));
+}
+
+static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
+						xfs_dir2_inou_t *to)
+{
+	if ((sfp)->hdr.i8count == 0)
+		XFS_PUT_DIR_INO4(*(from), (to)->i4);
+	else
+		XFS_PUT_DIR_INO8(*(from), (to)->i8);
+}
+
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
+{
+	return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i);
+}
+
+static inline void
+xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
+{
+	INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off);
+}
+
+static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
+{
+	return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \
+		((sfp)->hdr.i8count == 0) * \
+		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
+}
+
+static inline int
+xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
+{
+	return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (sfep)->namelen - \
+		((sfp)->hdr.i8count == 0) * \
+		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
+}
+
+static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp)
+{
+	return ((xfs_dir2_sf_entry_t *) \
+		((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count)));
+}
+
+static inline xfs_dir2_sf_entry_t *
+xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
+{
+	return ((xfs_dir2_sf_entry_t *) \
+		((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep)));
+}
+
+/*
+ * Functions.
+ */
+extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
+				 struct xfs_dir2_block *block,
+				 xfs_dir2_sf_hdr_t *sfhp);
+extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
+				int size, xfs_dir2_sf_hdr_t *sfhp);
+extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
+extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
+extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent,
+				xfs_off_t *offset, filldir_t filldir);
+extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
+extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+
+#endif	/* __XFS_DIR2_SF_H__ */
diff --git a/trunk/fs/xfs/xfs_fs.h b/trunk/fs/xfs/xfs_fs.h
index c13fed8c394a..8f6fc1a96386 100644
--- a/trunk/fs/xfs/xfs_fs.h
+++ b/trunk/fs/xfs/xfs_fs.h
@@ -249,11 +249,6 @@ typedef struct xfs_fsop_resblks {
 #define XFS_MAX_LOG_BYTES \
 	((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
 
-/* Used for sanity checks on superblock */
-#define XFS_MAX_DBLOCKS(s) ((xfs_drfsbno_t)(s)->sb_agcount * (s)->sb_agblocks)
-#define XFS_MIN_DBLOCKS(s) ((xfs_drfsbno_t)((s)->sb_agcount - 1) *	\
-			 (s)->sb_agblocks + XFS_MIN_AG_BLOCKS)
-
 /*
  * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT
  */
diff --git a/trunk/fs/xfs/xfs_ialloc.c b/trunk/fs/xfs/xfs_ialloc.c
index dd5628bd8d0b..84ebeec16642 100644
--- a/trunk/fs/xfs/xfs_ialloc.c
+++ b/trunk/fs/xfs/xfs_ialloc.c
@@ -683,7 +683,7 @@ xfs_dialloc(
 			return 0;
 		}
 		agi = XFS_BUF_TO_AGI(agbp);
-		ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
+		ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
 	} else {
 		/*
 		 * Continue where we left off before.  In this case, we
@@ -691,7 +691,7 @@ xfs_dialloc(
 		 */
 		agbp = *IO_agbp;
 		agi = XFS_BUF_TO_AGI(agbp);
-		ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
+		ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
 		ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
 	}
 	mp = tp->t_mountp;
@@ -775,7 +775,7 @@ xfs_dialloc(
 		if (error)
 			goto nextag;
 		agi = XFS_BUF_TO_AGI(agbp);
-		ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
+		ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
 	}
 	/*
 	 * Here with an allocation group that has a free inode.
@@ -944,7 +944,7 @@ xfs_dialloc(
 	 * See if the most recently allocated block has any free.
 	 */
 newino:
-	if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
+	if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
 		error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
 					 XFS_LOOKUP_EQ, &i);
 		if (error)
@@ -1085,7 +1085,7 @@ xfs_difree(
 		return error;
 	}
 	agi = XFS_BUF_TO_AGI(agbp);
-	ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
+	ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
 	ASSERT(agbno < be32_to_cpu(agi->agi_length));
 	/*
 	 * Initialize the cursor.
@@ -1438,7 +1438,7 @@ xfs_ialloc_log_agi(
 	xfs_agi_t		*agi;	/* allocation group header */
 
 	agi = XFS_BUF_TO_AGI(bp);
-	ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
+	ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
 #endif
 	/*
 	 * Compute byte offsets for the first and last fields.
@@ -1492,7 +1492,7 @@ xfs_read_agi(
 	/*
 	 * Validate the magic number of the agi block.
 	 */
-	agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) &&
+	agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
 		XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
 		be32_to_cpu(agi->agi_seqno) == agno;
 	if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
diff --git a/trunk/fs/xfs/xfs_ialloc_btree.c b/trunk/fs/xfs/xfs_ialloc_btree.c
index c6a75815aea0..16921f55c542 100644
--- a/trunk/fs/xfs/xfs_ialloc_btree.c
+++ b/trunk/fs/xfs/xfs_ialloc_btree.c
@@ -31,6 +31,7 @@
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
+#include "xfs_btree_trace.h"
 #include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_error.h"
@@ -204,6 +205,72 @@ xfs_inobt_recs_inorder(
 }
 #endif	/* DEBUG */
 
+#ifdef XFS_BTREE_TRACE
+ktrace_t	*xfs_inobt_trace_buf;
+
+STATIC void
+xfs_inobt_trace_enter(
+	struct xfs_btree_cur	*cur,
+	const char		*func,
+	char			*s,
+	int			type,
+	int			line,
+	__psunsigned_t		a0,
+	__psunsigned_t		a1,
+	__psunsigned_t		a2,
+	__psunsigned_t		a3,
+	__psunsigned_t		a4,
+	__psunsigned_t		a5,
+	__psunsigned_t		a6,
+	__psunsigned_t		a7,
+	__psunsigned_t		a8,
+	__psunsigned_t		a9,
+	__psunsigned_t		a10)
+{
+	ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type,
+		(void *)func, (void *)s, NULL, (void *)cur,
+		(void *)a0, (void *)a1, (void *)a2, (void *)a3,
+		(void *)a4, (void *)a5, (void *)a6, (void *)a7,
+		(void *)a8, (void *)a9, (void *)a10);
+}
+
+STATIC void
+xfs_inobt_trace_cursor(
+	struct xfs_btree_cur	*cur,
+	__uint32_t		*s0,
+	__uint64_t		*l0,
+	__uint64_t		*l1)
+{
+	*s0 = cur->bc_private.a.agno;
+	*l0 = cur->bc_rec.i.ir_startino;
+	*l1 = cur->bc_rec.i.ir_free;
+}
+
+STATIC void
+xfs_inobt_trace_key(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_key	*key,
+	__uint64_t		*l0,
+	__uint64_t		*l1)
+{
+	*l0 = be32_to_cpu(key->inobt.ir_startino);
+	*l1 = 0;
+}
+
+STATIC void
+xfs_inobt_trace_record(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_rec	*rec,
+	__uint64_t		*l0,
+	__uint64_t		*l1,
+	__uint64_t		*l2)
+{
+	*l0 = be32_to_cpu(rec->inobt.ir_startino);
+	*l1 = be32_to_cpu(rec->inobt.ir_freecount);
+	*l2 = be64_to_cpu(rec->inobt.ir_free);
+}
+#endif /* XFS_BTREE_TRACE */
+
 static const struct xfs_btree_ops xfs_inobt_ops = {
 	.rec_len		= sizeof(xfs_inobt_rec_t),
 	.key_len		= sizeof(xfs_inobt_key_t),
@@ -219,10 +286,18 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
 	.init_rec_from_cur	= xfs_inobt_init_rec_from_cur,
 	.init_ptr_from_cur	= xfs_inobt_init_ptr_from_cur,
 	.key_diff		= xfs_inobt_key_diff,
+
 #ifdef DEBUG
 	.keys_inorder		= xfs_inobt_keys_inorder,
 	.recs_inorder		= xfs_inobt_recs_inorder,
 #endif
+
+#ifdef XFS_BTREE_TRACE
+	.trace_enter		= xfs_inobt_trace_enter,
+	.trace_cursor		= xfs_inobt_trace_cursor,
+	.trace_key		= xfs_inobt_trace_key,
+	.trace_record		= xfs_inobt_trace_record,
+#endif
 };
 
 /*
diff --git a/trunk/fs/xfs/xfs_iget.c b/trunk/fs/xfs/xfs_iget.c
index 7759812c1bbe..3631783b2b53 100644
--- a/trunk/fs/xfs/xfs_iget.c
+++ b/trunk/fs/xfs/xfs_iget.c
@@ -38,6 +38,7 @@
 #include "xfs_trans_priv.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap.h"
+#include "xfs_btree_trace.h"
 #include "xfs_trace.h"
 
 
diff --git a/trunk/fs/xfs/xfs_inode.c b/trunk/fs/xfs/xfs_inode.c
index 3cc21ddf9f7e..a098a20ca63e 100644
--- a/trunk/fs/xfs/xfs_inode.c
+++ b/trunk/fs/xfs/xfs_inode.c
@@ -37,6 +37,7 @@
 #include "xfs_buf_item.h"
 #include "xfs_inode_item.h"
 #include "xfs_btree.h"
+#include "xfs_btree_trace.h"
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
 #include "xfs_bmap.h"
@@ -51,7 +52,7 @@ kmem_zone_t *xfs_ifork_zone;
 kmem_zone_t *xfs_inode_zone;
 
 /*
- * Used in xfs_itruncate_extents().  This is the maximum number of extents
+ * Used in xfs_itruncate().  This is the maximum number of extents
  * freed from a file in a single transaction.
  */
 #define	XFS_ITRUNC_MAX_EXTENTS	2
@@ -166,7 +167,7 @@ xfs_imap_to_bp(
 
 		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
 					(i << mp->m_sb.sb_inodelog));
-		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
+		di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC &&
 			    XFS_DINODE_GOOD_VERSION(dip->di_version);
 		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
 						XFS_ERRTAG_ITOBP_INOTOBP,
@@ -801,7 +802,7 @@ xfs_iread(
 	 * If we got something that isn't an inode it means someone
 	 * (nfs or dmi) has a stale handle.
 	 */
-	if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
+	if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) {
 #ifdef DEBUG
 		xfs_alert(mp,
 			"%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
@@ -1178,15 +1179,15 @@ xfs_ialloc(
  * at least do it for regular files.
  */
 #ifdef DEBUG
-STATIC void
+void
 xfs_isize_check(
-	struct xfs_inode	*ip,
-	xfs_fsize_t		isize)
+	xfs_mount_t	*mp,
+	xfs_inode_t	*ip,
+	xfs_fsize_t	isize)
 {
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		map_first;
-	int			nimaps;
-	xfs_bmbt_irec_t		imaps[2];
+	xfs_fileoff_t	map_first;
+	int		nimaps;
+	xfs_bmbt_irec_t	imaps[2];
 
 	if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
 		return;
@@ -1213,14 +1214,168 @@ xfs_isize_check(
 	ASSERT(nimaps == 1);
 	ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
 }
-#else	/* DEBUG */
-#define xfs_isize_check(ip, isize)
 #endif	/* DEBUG */
 
 /*
- * Free up the underlying blocks past new_size.  The new size must be smaller
- * than the current size.  This routine can be used both for the attribute and
- * data fork, and does not modify the inode size, which is left to the caller.
+ * Calculate the last possible buffered byte in a file.  This must
+ * include data that was buffered beyond the EOF by the write code.
+ * This also needs to deal with overflowing the xfs_fsize_t type
+ * which can happen for sizes near the limit.
+ *
+ * We also need to take into account any blocks beyond the EOF.  It
+ * may be the case that they were buffered by a write which failed.
+ * In that case the pages will still be in memory, but the inode size
+ * will never have been updated.
+ */
+STATIC xfs_fsize_t
+xfs_file_last_byte(
+	xfs_inode_t	*ip)
+{
+	xfs_mount_t	*mp;
+	xfs_fsize_t	last_byte;
+	xfs_fileoff_t	last_block;
+	xfs_fileoff_t	size_last_block;
+	int		error;
+
+	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
+
+	mp = ip->i_mount;
+	/*
+	 * Only check for blocks beyond the EOF if the extents have
+	 * been read in.  This eliminates the need for the inode lock,
+	 * and it also saves us from looking when it really isn't
+	 * necessary.
+	 */
+	if (ip->i_df.if_flags & XFS_IFEXTENTS) {
+		xfs_ilock(ip, XFS_ILOCK_SHARED);
+		error = xfs_bmap_last_offset(NULL, ip, &last_block,
+			XFS_DATA_FORK);
+		xfs_iunlock(ip, XFS_ILOCK_SHARED);
+		if (error) {
+			last_block = 0;
+		}
+	} else {
+		last_block = 0;
+	}
+	size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size);
+	last_block = XFS_FILEOFF_MAX(last_block, size_last_block);
+
+	last_byte = XFS_FSB_TO_B(mp, last_block);
+	if (last_byte < 0) {
+		return XFS_MAXIOFFSET(mp);
+	}
+	last_byte += (1 << mp->m_writeio_log);
+	if (last_byte < 0) {
+		return XFS_MAXIOFFSET(mp);
+	}
+	return last_byte;
+}
+
+/*
+ * Start the truncation of the file to new_size.  The new size
+ * must be smaller than the current size.  This routine will
+ * clear the buffer and page caches of file data in the removed
+ * range, and xfs_itruncate_finish() will remove the underlying
+ * disk blocks.
+ *
+ * The inode must have its I/O lock locked EXCLUSIVELY, and it
+ * must NOT have the inode lock held at all.  This is because we're
+ * calling into the buffer/page cache code and we can't hold the
+ * inode lock when we do so.
+ *
+ * We need to wait for any direct I/Os in flight to complete before we
+ * proceed with the truncate. This is needed to prevent the extents
+ * being read or written by the direct I/Os from being removed while the
+ * I/O is in flight as there is no other method of synchronising
+ * direct I/O with the truncate operation.  Also, because we hold
+ * the IOLOCK in exclusive mode, we prevent new direct I/Os from being
+ * started until the truncate completes and drops the lock. Essentially,
+ * the xfs_ioend_wait() call forms an I/O barrier that provides strict
+ * ordering between direct I/Os and the truncate operation.
+ *
+ * The flags parameter can have either the value XFS_ITRUNC_DEFINITE
+ * or XFS_ITRUNC_MAYBE.  The XFS_ITRUNC_MAYBE value should be used
+ * in the case that the caller is locking things out of order and
+ * may not be able to call xfs_itruncate_finish() with the inode lock
+ * held without dropping the I/O lock.  If the caller must drop the
+ * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start()
+ * must be called again with all the same restrictions as the initial
+ * call.
+ */
+int
+xfs_itruncate_start(
+	xfs_inode_t	*ip,
+	uint		flags,
+	xfs_fsize_t	new_size)
+{
+	xfs_fsize_t	last_byte;
+	xfs_off_t	toss_start;
+	xfs_mount_t	*mp;
+	int		error = 0;
+
+	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+	ASSERT((new_size == 0) || (new_size <= ip->i_size));
+	ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
+	       (flags == XFS_ITRUNC_MAYBE));
+
+	mp = ip->i_mount;
+
+	/* wait for the completion of any pending DIOs */
+	if (new_size == 0 || new_size < ip->i_size)
+		xfs_ioend_wait(ip);
+
+	/*
+	 * Call toss_pages or flushinval_pages to get rid of pages
+	 * overlapping the region being removed.  We have to use
+	 * the less efficient flushinval_pages in the case that the
+	 * caller may not be able to finish the truncate without
+	 * dropping the inode's I/O lock.  Make sure
+	 * to catch any pages brought in by buffers overlapping
+	 * the EOF by searching out beyond the isize by our
+	 * block size. We round new_size up to a block boundary
+	 * so that we don't toss things on the same block as
+	 * new_size but before it.
+	 *
+	 * Before calling toss_page or flushinval_pages, make sure to
+	 * call remapf() over the same region if the file is mapped.
+	 * This frees up mapped file references to the pages in the
+	 * given range and for the flushinval_pages case it ensures
+	 * that we get the latest mapped changes flushed out.
+	 */
+	toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
+	toss_start = XFS_FSB_TO_B(mp, toss_start);
+	if (toss_start < 0) {
+		/*
+		 * The place to start tossing is beyond our maximum
+		 * file size, so there is no way that the data extended
+		 * out there.
+		 */
+		return 0;
+	}
+	last_byte = xfs_file_last_byte(ip);
+	trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte);
+	if (last_byte > toss_start) {
+		if (flags & XFS_ITRUNC_DEFINITE) {
+			xfs_tosspages(ip, toss_start,
+					-1, FI_REMAPF_LOCKED);
+		} else {
+			error = xfs_flushinval_pages(ip, toss_start,
+					-1, FI_REMAPF_LOCKED);
+		}
+	}
+
+#ifdef DEBUG
+	if (new_size == 0) {
+		ASSERT(VN_CACHED(VFS_I(ip)) == 0);
+	}
+#endif
+	return error;
+}
+
+/*
+ * Shrink the file to the given new_size.  The new size must be smaller than
+ * the current size.  This will free up the underlying blocks in the removed
+ * range after a call to xfs_itruncate_start() or xfs_atruncate_start().
  *
  * The transaction passed to this routine must have made a permanent log
  * reservation of at least XFS_ITRUNCATE_LOG_RES.  This routine may commit the
@@ -1232,6 +1387,31 @@ xfs_isize_check(
  * will be "held" within the returned transaction.  This routine does NOT
  * require any disk space to be reserved for it within the transaction.
  *
+ * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it
+ * indicates the fork which is to be truncated.  For the attribute fork we only
+ * support truncation to size 0.
+ *
+ * We use the sync parameter to indicate whether or not the first transaction
+ * we perform might have to be synchronous.  For the attr fork, it needs to be
+ * so if the unlink of the inode is not yet known to be permanent in the log.
+ * This keeps us from freeing and reusing the blocks of the attribute fork
+ * before the unlink of the inode becomes permanent.
+ *
+ * For the data fork, we normally have to run synchronously if we're being
+ * called out of the inactive path or we're being called out of the create path
+ * where we're truncating an existing file.  Either way, the truncate needs to
+ * be sync so blocks don't reappear in the file with altered data in case of a
+ * crash.  wsync filesystems can run the first case async because anything that
+ * shrinks the inode has to run sync so by the time we're called here from
+ * inactive, the inode size is permanently set to 0.
+ *
+ * Calls from the truncate path always need to be sync unless we're in a wsync
+ * filesystem and the file has already been unlinked.
+ *
+ * The caller is responsible for correctly setting the sync parameter.  It gets
+ * too hard for us to guess here which path we're being called out of just
+ * based on inode state.
+ *
  * If we get an error, we must return with the inode locked and linked into the
  * current transaction. This keeps things simple for the higher level code,
  * because it always knows that the inode is locked and held in the transaction
@@ -1239,30 +1419,124 @@ xfs_isize_check(
  * dirty on error so that transactions can be easily aborted if possible.
  */
 int
-xfs_itruncate_extents(
-	struct xfs_trans	**tpp,
-	struct xfs_inode	*ip,
-	int			whichfork,
-	xfs_fsize_t		new_size)
+xfs_itruncate_finish(
+	xfs_trans_t	**tp,
+	xfs_inode_t	*ip,
+	xfs_fsize_t	new_size,
+	int		fork,
+	int		sync)
 {
-	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_trans	*tp = *tpp;
-	struct xfs_trans	*ntp;
-	xfs_bmap_free_t		free_list;
-	xfs_fsblock_t		first_block;
-	xfs_fileoff_t		first_unmap_block;
-	xfs_fileoff_t		last_block;
-	xfs_filblks_t		unmap_len;
-	int			committed;
-	int			error = 0;
-	int			done = 0;
+	xfs_fsblock_t	first_block;
+	xfs_fileoff_t	first_unmap_block;
+	xfs_fileoff_t	last_block;
+	xfs_filblks_t	unmap_len=0;
+	xfs_mount_t	*mp;
+	xfs_trans_t	*ntp;
+	int		done;
+	int		committed;
+	xfs_bmap_free_t	free_list;
+	int		error;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-	ASSERT(new_size <= ip->i_size);
-	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+	ASSERT((new_size == 0) || (new_size <= ip->i_size));
+	ASSERT(*tp != NULL);
+	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
+	ASSERT(ip->i_transp == *tp);
 	ASSERT(ip->i_itemp != NULL);
 	ASSERT(ip->i_itemp->ili_lock_flags == 0);
-	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
+
+
+	ntp = *tp;
+	mp = (ntp)->t_mountp;
+	ASSERT(! XFS_NOT_DQATTACHED(mp, ip));
+
+	/*
+	 * We only support truncating the entire attribute fork.
+	 */
+	if (fork == XFS_ATTR_FORK) {
+		new_size = 0LL;
+	}
+	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
+	trace_xfs_itruncate_finish_start(ip, new_size);
+
+	/*
+	 * The first thing we do is set the size to new_size permanently
+	 * on disk.  This way we don't have to worry about anyone ever
+	 * being able to look at the data being freed even in the face
+	 * of a crash.  What we're getting around here is the case where
+	 * we free a block, it is allocated to another file, it is written
+	 * to, and then we crash.  If the new data gets written to the
+	 * file but the log buffers containing the free and reallocation
+	 * don't, then we'd end up with garbage in the blocks being freed.
+	 * As long as we make the new_size permanent before actually
+	 * freeing any blocks it doesn't matter if they get written to.
+	 *
+	 * The callers must signal into us whether or not the size
+	 * setting here must be synchronous.  There are a few cases
+	 * where it doesn't have to be synchronous.  Those cases
+	 * occur if the file is unlinked and we know the unlink is
+	 * permanent or if the blocks being truncated are guaranteed
+	 * to be beyond the inode eof (regardless of the link count)
+	 * and the eof value is permanent.  Both of these cases occur
+	 * only on wsync-mounted filesystems.  In those cases, we're
+	 * guaranteed that no user will ever see the data in the blocks
+	 * that are being truncated so the truncate can run async.
+	 * In the free beyond eof case, the file may wind up with
+	 * more blocks allocated to it than it needs if we crash
+	 * and that won't get fixed until the next time the file
+	 * is re-opened and closed but that's ok as that shouldn't
+	 * be too many blocks.
+	 *
+	 * However, we can't just make all wsync xactions run async
+	 * because there's one call out of the create path that needs
+	 * to run sync where it's truncating an existing file to size
+	 * 0 whose size is > 0.
+	 *
+	 * It's probably possible to come up with a test in this
+	 * routine that would correctly distinguish all the above
+	 * cases from the values of the function parameters and the
+	 * inode state but for sanity's sake, I've decided to let the
+	 * layers above just tell us.  It's simpler to correctly figure
+	 * out in the layer above exactly under what conditions we
+	 * can run async and I think it's easier for others read and
+	 * follow the logic in case something has to be changed.
+	 * cscope is your friend -- rcc.
+	 *
+	 * The attribute fork is much simpler.
+	 *
+	 * For the attribute fork we allow the caller to tell us whether
+	 * the unlink of the inode that led to this call is yet permanent
+	 * in the on disk log.  If it is not and we will be freeing extents
+	 * in this inode then we make the first transaction synchronous
+	 * to make sure that the unlink is permanent by the time we free
+	 * the blocks.
+	 */
+	if (fork == XFS_DATA_FORK) {
+		if (ip->i_d.di_nextents > 0) {
+			/*
+			 * If we are not changing the file size then do
+			 * not update the on-disk file size - we may be
+			 * called from xfs_inactive_free_eofblocks().  If we
+			 * update the on-disk file size and then the system
+			 * crashes before the contents of the file are
+			 * flushed to disk then the files may be full of
+			 * holes (ie NULL files bug).
+			 */
+			if (ip->i_size != new_size) {
+				ip->i_d.di_size = new_size;
+				ip->i_size = new_size;
+				xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
+			}
+		}
+	} else if (sync) {
+		ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC));
+		if (ip->i_d.di_anextents > 0)
+			xfs_trans_set_sync(ntp);
+	}
+	ASSERT(fork == XFS_DATA_FORK ||
+		(fork == XFS_ATTR_FORK &&
+			((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) ||
+			 (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC)))));
 
 	/*
 	 * Since it is possible for space to become allocated beyond
@@ -1273,142 +1547,128 @@ xfs_itruncate_extents(
 	 * beyond the maximum file size (ie it is the same as last_block),
 	 * then there is nothing to do.
 	 */
-	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
 	last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
-	if (first_unmap_block == last_block)
-		return 0;
-
-	ASSERT(first_unmap_block < last_block);
-	unmap_len = last_block - first_unmap_block + 1;
+	ASSERT(first_unmap_block <= last_block);
+	done = 0;
+	if (last_block == first_unmap_block) {
+		done = 1;
+	} else {
+		unmap_len = last_block - first_unmap_block + 1;
+	}
 	while (!done) {
+		/*
+		 * Free up up to XFS_ITRUNC_MAX_EXTENTS.  xfs_bunmapi()
+		 * will tell us whether it freed the entire range or
+		 * not.  If this is a synchronous mount (wsync),
+		 * then we can tell bunmapi to keep all the
+		 * transactions asynchronous since the unlink
+		 * transaction that made this inode inactive has
+		 * already hit the disk.  There's no danger of
+		 * the freed blocks being reused, there being a
+		 * crash, and the reused blocks suddenly reappearing
+		 * in this file with garbage in them once recovery
+		 * runs.
+		 */
 		xfs_bmap_init(&free_list, &first_block);
-		error = xfs_bunmapi(tp, ip,
+		error = xfs_bunmapi(ntp, ip,
 				    first_unmap_block, unmap_len,
-				    xfs_bmapi_aflag(whichfork),
+				    xfs_bmapi_aflag(fork),
 				    XFS_ITRUNC_MAX_EXTENTS,
 				    &first_block, &free_list,
 				    &done);
-		if (error)
-			goto out_bmap_cancel;
+		if (error) {
+			/*
+			 * If the bunmapi call encounters an error,
+			 * return to the caller where the transaction
+			 * can be properly aborted.  We just need to
+			 * make sure we're not holding any resources
+			 * that we were not when we came in.
+			 */
+			xfs_bmap_cancel(&free_list);
+			return error;
+		}
 
 		/*
 		 * Duplicate the transaction that has the permanent
 		 * reservation and commit the old transaction.
 		 */
-		error = xfs_bmap_finish(&tp, &free_list, &committed);
+		error = xfs_bmap_finish(tp, &free_list, &committed);
+		ntp = *tp;
 		if (committed)
-			xfs_trans_ijoin(tp, ip);
-		if (error)
-			goto out_bmap_cancel;
+			xfs_trans_ijoin(ntp, ip);
+
+		if (error) {
+			/*
+			 * If the bmap finish call encounters an error, return
+			 * to the caller where the transaction can be properly
+			 * aborted.  We just need to make sure we're not
+			 * holding any resources that we were not when we came
+			 * in.
+			 *
+			 * Aborting from this point might lose some blocks in
+			 * the file system, but oh well.
+			 */
+			xfs_bmap_cancel(&free_list);
+			return error;
+		}
 
 		if (committed) {
 			/*
 			 * Mark the inode dirty so it will be logged and
 			 * moved forward in the log as part of every commit.
 			 */
-			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+			xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
 		}
 
-		ntp = xfs_trans_dup(tp);
-		error = xfs_trans_commit(tp, 0);
-		tp = ntp;
+		ntp = xfs_trans_dup(ntp);
+		error = xfs_trans_commit(*tp, 0);
+		*tp = ntp;
 
-		xfs_trans_ijoin(tp, ip);
+		xfs_trans_ijoin(ntp, ip);
 
 		if (error)
-			goto out;
-
+			return error;
 		/*
-		 * Transaction commit worked ok so we can drop the extra ticket
+		 * transaction commit worked ok so we can drop the extra ticket
 		 * reference that we gained in xfs_trans_dup()
 		 */
-		xfs_log_ticket_put(tp->t_ticket);
-		error = xfs_trans_reserve(tp, 0,
+		xfs_log_ticket_put(ntp->t_ticket);
+		error = xfs_trans_reserve(ntp, 0,
 					XFS_ITRUNCATE_LOG_RES(mp), 0,
 					XFS_TRANS_PERM_LOG_RES,
 					XFS_ITRUNCATE_LOG_COUNT);
 		if (error)
-			goto out;
+			return error;
 	}
-
-out:
-	*tpp = tp;
-	return error;
-out_bmap_cancel:
 	/*
-	 * If the bunmapi call encounters an error, return to the caller where
-	 * the transaction can be properly aborted.  We just need to make sure
-	 * we're not holding any resources that we were not when we came in.
+	 * Only update the size in the case of the data fork, but
+	 * always re-log the inode so that our permanent transaction
+	 * can keep on rolling it forward in the log.
 	 */
-	xfs_bmap_cancel(&free_list);
-	goto out;
-}
-
-int
-xfs_itruncate_data(
-	struct xfs_trans	**tpp,
-	struct xfs_inode	*ip,
-	xfs_fsize_t		new_size)
-{
-	int			error;
-
-	trace_xfs_itruncate_data_start(ip, new_size);
-
-	/*
-	 * The first thing we do is set the size to new_size permanently on
-	 * disk.  This way we don't have to worry about anyone ever being able
-	 * to look at the data being freed even in the face of a crash.
-	 * What we're getting around here is the case where we free a block, it
-	 * is allocated to another file, it is written to, and then we crash.
-	 * If the new data gets written to the file but the log buffers
-	 * containing the free and reallocation don't, then we'd end up with
-	 * garbage in the blocks being freed.  As long as we make the new_size
-	 * permanent before actually freeing any blocks it doesn't matter if
-	 * they get written to.
-	 */
-	if (ip->i_d.di_nextents > 0) {
+	if (fork == XFS_DATA_FORK) {
+		xfs_isize_check(mp, ip, new_size);
 		/*
-		 * If we are not changing the file size then do not update
-		 * the on-disk file size - we may be called from
-		 * xfs_inactive_free_eofblocks().  If we update the on-disk
-		 * file size and then the system crashes before the contents
-		 * of the file are flushed to disk then the files may be
-		 * full of holes (ie NULL files bug).
+		 * If we are not changing the file size then do
+		 * not update the on-disk file size - we may be
+		 * called from xfs_inactive_free_eofblocks().  If we
+		 * update the on-disk file size and then the system
+		 * crashes before the contents of the file are
+		 * flushed to disk then the files may be full of
+		 * holes (ie NULL files bug).
 		 */
 		if (ip->i_size != new_size) {
 			ip->i_d.di_size = new_size;
 			ip->i_size = new_size;
-			xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
 		}
 	}
-
-	error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size);
-	if (error)
-		return error;
-
-	/*
-	 * If we are not changing the file size then do not update the on-disk
-	 * file size - we may be called from xfs_inactive_free_eofblocks().
-	 * If we update the on-disk file size and then the system crashes
-	 * before the contents of the file are flushed to disk then the files
-	 * may be full of holes (ie NULL files bug).
-	 */
-	xfs_isize_check(ip, new_size);
-	if (ip->i_size != new_size) {
-		ip->i_d.di_size = new_size;
-		ip->i_size = new_size;
-	}
-
-	ASSERT(new_size != 0 || ip->i_delayed_blks == 0);
-	ASSERT(new_size != 0 || ip->i_d.di_nextents == 0);
-
-	/*
-	 * Always re-log the inode so that our permanent transaction can keep
-	 * on rolling it forward in the log.
-	 */
-	xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
-
-	trace_xfs_itruncate_data_end(ip, new_size);
+	xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
+	ASSERT((new_size != 0) ||
+	       (fork == XFS_ATTR_FORK) ||
+	       (ip->i_delayed_blks == 0));
+	ASSERT((new_size != 0) ||
+	       (fork == XFS_ATTR_FORK) ||
+	       (ip->i_d.di_nextents == 0));
+	trace_xfs_itruncate_finish_end(ip, new_size);
 	return 0;
 }
 
@@ -1434,6 +1694,7 @@ xfs_iunlink(
 
 	ASSERT(ip->i_d.di_nlink == 0);
 	ASSERT(ip->i_d.di_mode != 0);
+	ASSERT(ip->i_transp == tp);
 
 	mp = tp->t_mountp;
 
@@ -1456,7 +1717,7 @@ xfs_iunlink(
 	ASSERT(agi->agi_unlinked[bucket_index]);
 	ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
 
-	if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) {
+	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) {
 		/*
 		 * There is already another inode in the bucket we need
 		 * to add ourselves to.  Add us at the front of the list.
@@ -1467,7 +1728,8 @@ xfs_iunlink(
 		if (error)
 			return error;
 
-		ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
+		ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO);
+		/* both on-disk, don't endian flip twice */
 		dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
 		offset = ip->i_imap.im_boffset +
 			offsetof(xfs_dinode_t, di_next_unlinked);
@@ -1532,7 +1794,7 @@ xfs_iunlink_remove(
 	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
 	ASSERT(agino != 0);
 	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
-	ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO));
+	ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO);
 	ASSERT(agi->agi_unlinked[bucket_index]);
 
 	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
@@ -1697,7 +1959,7 @@ xfs_ifree_cluster(
 		 * stale first, we will not attempt to lock them in the loop
 		 * below as the XFS_ISTALE flag will be set.
 		 */
-		lip = bp->b_fspriv;
+		lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
 		while (lip) {
 			if (lip->li_type == XFS_LI_INODE) {
 				iip = (xfs_inode_log_item_t *)lip;
@@ -1824,6 +2086,7 @@ xfs_ifree(
 	xfs_buf_t       	*ibp;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(ip->i_transp == tp);
 	ASSERT(ip->i_d.di_nlink == 0);
 	ASSERT(ip->i_d.di_nextents == 0);
 	ASSERT(ip->i_d.di_anextents == 0);
@@ -2470,7 +2733,7 @@ xfs_iflush_cluster(
 		 * mark the buffer as an error and call them.  Otherwise
 		 * mark it as stale and brelse.
 		 */
-		if (bp->b_iodone) {
+		if (XFS_BUF_IODONE_FUNC(bp)) {
 			XFS_BUF_UNDONE(bp);
 			XFS_BUF_STALE(bp);
 			XFS_BUF_ERROR(bp,EIO);
@@ -2657,7 +2920,7 @@ xfs_iflush_int(
 	 */
 	xfs_synchronize_times(ip);
 
-	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
+	if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC,
 			       mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 			"%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
@@ -2810,8 +3073,8 @@ xfs_iflush_int(
 		 */
 		xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
 
-		ASSERT(bp->b_fspriv != NULL);
-		ASSERT(bp->b_iodone != NULL);
+		ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+		ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL);
 	} else {
 		/*
 		 * We're flushing an inode which is not in the AIL and has
diff --git a/trunk/fs/xfs/xfs_inode.h b/trunk/fs/xfs/xfs_inode.h
index a97644ab945a..964cfea77686 100644
--- a/trunk/fs/xfs/xfs_inode.h
+++ b/trunk/fs/xfs/xfs_inode.h
@@ -241,6 +241,7 @@ typedef struct xfs_inode {
 	xfs_ifork_t		i_df;		/* data fork */
 
 	/* Transaction and locking information. */
+	struct xfs_trans	*i_transp;	/* ptr to owning transaction*/
 	struct xfs_inode_log_item *i_itemp;	/* logging information */
 	mrlock_t		i_lock;		/* inode lock */
 	mrlock_t		i_iolock;	/* inode IO lock */
@@ -456,6 +457,16 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
 
 extern struct lock_class_key xfs_iolock_reclaimable;
 
+/*
+ * Flags for xfs_itruncate_start().
+ */
+#define	XFS_ITRUNC_DEFINITE	0x1
+#define	XFS_ITRUNC_MAYBE	0x2
+
+#define XFS_ITRUNC_FLAGS \
+	{ XFS_ITRUNC_DEFINITE,	"DEFINITE" }, \
+	{ XFS_ITRUNC_MAYBE,	"MAYBE" }
+
 /*
  * For multiple groups support: if S_ISGID bit is set in the parent
  * directory, group of new file is set to that of the parent, and
@@ -490,10 +501,9 @@ uint		xfs_ip2xflags(struct xfs_inode *);
 uint		xfs_dic2xflags(struct xfs_dinode *);
 int		xfs_ifree(struct xfs_trans *, xfs_inode_t *,
 			   struct xfs_bmap_free *);
-int		xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
-				      int, xfs_fsize_t);
-int		xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *,
-				   xfs_fsize_t);
+int		xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
+int		xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
+				     xfs_fsize_t, int, int);
 int		xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
 
 void		xfs_iext_realloc(xfs_inode_t *, int, int);
@@ -569,6 +579,13 @@ void		xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
 
 #define xfs_ipincount(ip)	((unsigned int) atomic_read(&ip->i_pincount))
 
+#ifdef DEBUG
+void		xfs_isize_check(struct xfs_mount *, struct xfs_inode *,
+				xfs_fsize_t);
+#else	/* DEBUG */
+#define xfs_isize_check(mp, ip, isize)
+#endif	/* DEBUG */
+
 #if defined(DEBUG)
 void		xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
 #else
diff --git a/trunk/fs/xfs/xfs_inode_item.c b/trunk/fs/xfs/xfs_inode_item.c
index 588406dc6a35..b1e88d56069c 100644
--- a/trunk/fs/xfs/xfs_inode_item.c
+++ b/trunk/fs/xfs/xfs_inode_item.c
@@ -632,8 +632,13 @@ xfs_inode_item_unlock(
 	struct xfs_inode	*ip = iip->ili_inode;
 	unsigned short		lock_flags;
 
-	ASSERT(ip->i_itemp != NULL);
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(iip->ili_inode->i_itemp != NULL);
+	ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
+
+	/*
+	 * Clear the transaction pointer in the inode.
+	 */
+	ip->i_transp = NULL;
 
 	/*
 	 * If the inode needed a separate buffer with which to log
@@ -659,8 +664,8 @@ xfs_inode_item_unlock(
 	lock_flags = iip->ili_lock_flags;
 	iip->ili_lock_flags = 0;
 	if (lock_flags) {
-		xfs_iunlock(ip, lock_flags);
-		IRELE(ip);
+		xfs_iunlock(iip->ili_inode, lock_flags);
+		IRELE(iip->ili_inode);
 	}
 }
 
@@ -874,7 +879,7 @@ xfs_iflush_done(
 	 * Scan the buffer IO completions for other inodes being completed and
 	 * attach them to the current inode log item.
 	 */
-	blip = bp->b_fspriv;
+	blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
 	prev = NULL;
 	while (blip != NULL) {
 		if (lip->li_cb != xfs_iflush_done) {
@@ -886,7 +891,7 @@ xfs_iflush_done(
 		/* remove from list */
 		next = blip->li_bio_list;
 		if (!prev) {
-			bp->b_fspriv = next;
+			XFS_BUF_SET_FSPRIVATE(bp, next);
 		} else {
 			prev->li_bio_list = next;
 		}
diff --git a/trunk/fs/xfs/xfs_inum.h b/trunk/fs/xfs/xfs_inum.h
index b253c0ea5bec..b8e4ee4e89a4 100644
--- a/trunk/fs/xfs/xfs_inum.h
+++ b/trunk/fs/xfs/xfs_inum.h
@@ -28,6 +28,17 @@
 
 typedef	__uint32_t	xfs_agino_t;	/* within allocation grp inode number */
 
+/*
+ * Useful inode bits for this kernel.
+ * Used in some places where having 64-bits in the 32-bit kernels
+ * costs too much.
+ */
+#if XFS_BIG_INUMS
+typedef	xfs_ino_t	xfs_intino_t;
+#else
+typedef	__uint32_t	xfs_intino_t;
+#endif
+
 #define	NULLFSINO	((xfs_ino_t)-1)
 #define	NULLAGINO	((xfs_agino_t)-1)
 
diff --git a/trunk/fs/xfs/xfs_log.c b/trunk/fs/xfs/xfs_log.c
index 06ff8437ed8e..41d5b8f2bf92 100644
--- a/trunk/fs/xfs/xfs_log.c
+++ b/trunk/fs/xfs/xfs_log.c
@@ -871,9 +871,15 @@ xlog_space_left(
 void
 xlog_iodone(xfs_buf_t *bp)
 {
-	xlog_in_core_t	*iclog = bp->b_fspriv;
-	xlog_t		*l = iclog->ic_log;
-	int		aborted = 0;
+	xlog_in_core_t	*iclog;
+	xlog_t		*l;
+	int		aborted;
+
+	iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2);
+	XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
+	aborted = 0;
+	l = iclog->ic_log;
 
 	/*
 	 * Race to shutdown the filesystem if we see an error.
@@ -1050,9 +1056,10 @@ xlog_alloc_log(xfs_mount_t	*mp,
 	bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
 	if (!bp)
 		goto out_free_log;
-	bp->b_iodone = xlog_iodone;
+	XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
+	XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
 	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(xfs_buf_islocked(bp));
+	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
 	log->l_xbuf = bp;
 
 	spin_lock_init(&log->l_icloglock);
@@ -1083,8 +1090,10 @@ xlog_alloc_log(xfs_mount_t	*mp,
 						log->l_iclog_size, 0);
 		if (!bp)
 			goto out_free_iclog;
-
-		bp->b_iodone = xlog_iodone;
+		if (!XFS_BUF_CPSEMA(bp))
+			ASSERT(0);
+		XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
+		XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
 		iclog->ic_bp = bp;
 		iclog->ic_data = bp->b_addr;
 #ifdef DEBUG
@@ -1109,7 +1118,7 @@ xlog_alloc_log(xfs_mount_t	*mp,
 		iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
 
 		ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
-		ASSERT(xfs_buf_islocked(iclog->ic_bp));
+		ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
 		init_waitqueue_head(&iclog->ic_force_wait);
 		init_waitqueue_head(&iclog->ic_write_wait);
 
@@ -1245,8 +1254,9 @@ STATIC int
 xlog_bdstrat(
 	struct xfs_buf		*bp)
 {
-	struct xlog_in_core	*iclog = bp->b_fspriv;
+	struct xlog_in_core	*iclog;
 
+	iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
 	if (iclog->ic_state & XLOG_STATE_IOERROR) {
 		XFS_BUF_ERROR(bp, EIO);
 		XFS_BUF_STALE(bp);
@@ -1259,6 +1269,7 @@ xlog_bdstrat(
 		return 0;
 	}
 
+	bp->b_flags |= _XBF_RUN_QUEUES;
 	xfs_buf_iorequest(bp);
 	return 0;
 }
@@ -1340,6 +1351,8 @@ xlog_sync(xlog_t		*log,
 	}
 
 	bp = iclog->ic_bp;
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1);
+	XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
 	XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
 
 	XFS_STATS_ADD(xs_log_blocks, BTOBB(count));
@@ -1353,28 +1366,22 @@ xlog_sync(xlog_t		*log,
 		iclog->ic_bwritecnt = 1;
 	}
 	XFS_BUF_SET_COUNT(bp, count);
-	bp->b_fspriv = iclog;
+	XFS_BUF_SET_FSPRIVATE(bp, iclog);	/* save for later */
 	XFS_BUF_ZEROFLAGS(bp);
 	XFS_BUF_BUSY(bp);
 	XFS_BUF_ASYNC(bp);
-	bp->b_flags |= XBF_SYNCIO;
+	bp->b_flags |= XBF_LOG_BUFFER;
 
 	if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
-		bp->b_flags |= XBF_FUA;
-
 		/*
-		 * Flush the data device before flushing the log to make
-		 * sure all meta data written back from the AIL actually made
-		 * it to disk before stamping the new log tail LSN into the
-		 * log buffer.  For an external log we need to issue the
-		 * flush explicitly, and unfortunately synchronously here;
-		 * for an internal log we can simply use the block layer
-		 * state machine for preflushes.
+		 * If we have an external log device, flush the data device
+		 * before flushing the log to make sure all meta data
+		 * written back from the AIL actually made it to disk
+		 * before writing out the new log tail LSN in the log buffer.
 		 */
 		if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
 			xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
-		else
-			bp->b_flags |= XBF_FLUSH;
+		XFS_BUF_ORDERED(bp);
 	}
 
 	ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
@@ -1397,16 +1404,19 @@ xlog_sync(xlog_t		*log,
 	}
 	if (split) {
 		bp = iclog->ic_log->l_xbuf;
+		ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) ==
+							(unsigned long)1);
+		XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
 		XFS_BUF_SET_ADDR(bp, 0);	     /* logical 0 */
 		XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+
 					    (__psint_t)count), split);
-		bp->b_fspriv = iclog;
+		XFS_BUF_SET_FSPRIVATE(bp, iclog);
 		XFS_BUF_ZEROFLAGS(bp);
 		XFS_BUF_BUSY(bp);
 		XFS_BUF_ASYNC(bp);
-		bp->b_flags |= XBF_SYNCIO;
+		bp->b_flags |= XBF_LOG_BUFFER;
 		if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
-			bp->b_flags |= XBF_FUA;
+			XFS_BUF_ORDERED(bp);
 		dptr = XFS_BUF_PTR(bp);
 		/*
 		 * Bump the cycle numbers at the start of each block
@@ -3511,13 +3521,13 @@ xlog_verify_iclog(xlog_t	 *log,
 	spin_unlock(&log->l_icloglock);
 
 	/* check log magic numbers */
-	if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
+	if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM)
 		xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
 
 	ptr = (xfs_caddr_t) &iclog->ic_header;
 	for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count;
 	     ptr += BBSIZE) {
-		if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
+		if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM)
 			xfs_emerg(log->l_mp, "%s: unexpected magic num",
 				__func__);
 	}
diff --git a/trunk/fs/xfs/xfs_log_recover.c b/trunk/fs/xfs/xfs_log_recover.c
index 8fe4206de057..04142caedb2b 100644
--- a/trunk/fs/xfs/xfs_log_recover.c
+++ b/trunk/fs/xfs/xfs_log_recover.c
@@ -91,8 +91,6 @@ xlog_get_bp(
 	xlog_t		*log,
 	int		nbblks)
 {
-	struct xfs_buf	*bp;
-
 	if (!xlog_buf_bbcount_valid(log, nbblks)) {
 		xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
 			nbblks);
@@ -120,10 +118,8 @@ xlog_get_bp(
 		nbblks += log->l_sectBBsize;
 	nbblks = round_up(nbblks, log->l_sectBBsize);
 
-	bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, BBTOB(nbblks), 0);
-	if (bp)
-		xfs_buf_unlock(bp);
-	return bp;
+	return xfs_buf_get_uncached(log->l_mp->m_logdev_targp,
+					BBTOB(nbblks), 0);
 }
 
 STATIC void
@@ -268,7 +264,7 @@ xlog_bwrite(
 	XFS_BUF_ZEROFLAGS(bp);
 	XFS_BUF_BUSY(bp);
 	XFS_BUF_HOLD(bp);
-	xfs_buf_lock(bp);
+	XFS_BUF_PSEMA(bp, PRIBIO);
 	XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
 	XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
 
@@ -304,14 +300,14 @@ xlog_header_check_recover(
 	xfs_mount_t		*mp,
 	xlog_rec_header_t	*head)
 {
-	ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
+	ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM);
 
 	/*
 	 * IRIX doesn't write the h_fmt field and leaves it zeroed
 	 * (XLOG_FMT_UNKNOWN). This stops us from trying to recover
 	 * a dirty log created in IRIX.
 	 */
-	if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) {
+	if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) {
 		xfs_warn(mp,
 	"dirty log written in incompatible format - can't recover");
 		xlog_header_check_dump(mp, head);
@@ -337,7 +333,7 @@ xlog_header_check_mount(
 	xfs_mount_t		*mp,
 	xlog_rec_header_t	*head)
 {
-	ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
+	ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM);
 
 	if (uuid_is_nil(&head->h_fs_uuid)) {
 		/*
@@ -371,7 +367,7 @@ xlog_recover_iodone(
 		xfs_force_shutdown(bp->b_target->bt_mount,
 					SHUTDOWN_META_IO_ERROR);
 	}
-	bp->b_iodone = NULL;
+	XFS_BUF_CLR_IODONE_FUNC(bp);
 	xfs_buf_ioend(bp, 0);
 }
 
@@ -538,7 +534,7 @@ xlog_find_verify_log_record(
 
 		head = (xlog_rec_header_t *)offset;
 
-		if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
+		if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno))
 			break;
 
 		if (!smallmem)
@@ -920,7 +916,7 @@ xlog_find_tail(
 		if (error)
 			goto done;
 
-		if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
+		if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) {
 			found = 1;
 			break;
 		}
@@ -937,8 +933,8 @@ xlog_find_tail(
 			if (error)
 				goto done;
 
-			if (*(__be32 *)offset ==
-			    cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
+			if (XLOG_HEADER_MAGIC_NUM ==
+			    be32_to_cpu(*(__be32 *)offset)) {
 				found = 2;
 				break;
 			}
@@ -1951,7 +1947,7 @@ xfs_qm_dqcheck(
 	 * This is all fine; things are still consistent, and we haven't lost
 	 * any quota information. Just don't complain about bad dquot blks.
 	 */
-	if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) {
+	if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) {
 		if (flags & XFS_QMOPT_DOWARN)
 			xfs_alert(mp,
 			"%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
@@ -2178,7 +2174,7 @@ xlog_recover_buffer_pass2(
 		error = xfs_bwrite(mp, bp);
 	} else {
 		ASSERT(bp->b_target->bt_mount == mp);
-		bp->b_iodone = xlog_recover_iodone;
+		XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
 		xfs_bdwrite(mp, bp);
 	}
 
@@ -2242,7 +2238,7 @@ xlog_recover_inode_pass2(
 	 * Make sure the place we're flushing out to really looks
 	 * like an inode!
 	 */
-	if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) {
+	if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) {
 		xfs_buf_relse(bp);
 		xfs_alert(mp,
 	"%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld",
@@ -2438,7 +2434,7 @@ xlog_recover_inode_pass2(
 
 write_inode_buffer:
 	ASSERT(bp->b_target->bt_mount == mp);
-	bp->b_iodone = xlog_recover_iodone;
+	XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
 	xfs_bdwrite(mp, bp);
 error:
 	if (need_free)
@@ -2560,7 +2556,7 @@ xlog_recover_dquot_pass2(
 
 	ASSERT(dq_f->qlf_size == 2);
 	ASSERT(bp->b_target->bt_mount == mp);
-	bp->b_iodone = xlog_recover_iodone;
+	XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
 	xfs_bdwrite(mp, bp);
 
 	return (0);
@@ -3299,7 +3295,7 @@ xlog_valid_rec_header(
 {
 	int			hlen;
 
-	if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) {
+	if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) {
 		XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
 				XFS_ERRLEVEL_LOW, log->l_mp);
 		return XFS_ERROR(EFSCORRUPTED);
diff --git a/trunk/fs/xfs/xfs_mount.c b/trunk/fs/xfs/xfs_mount.c
index 7f25245da289..b49b82363d20 100644
--- a/trunk/fs/xfs/xfs_mount.c
+++ b/trunk/fs/xfs/xfs_mount.c
@@ -348,7 +348,7 @@ xfs_mount_validate_sb(
 	}
 
 	/*
-	 * More sanity checking.  Most of these were stolen directly from
+	 * More sanity checking. These were stolen directly from
 	 * xfs_repair.
 	 */
 	if (unlikely(
@@ -371,13 +371,23 @@ xfs_mount_validate_sb(
 	    (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)	||
 	    (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)	||
 	    (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)	||
-	    (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */)	||
-	    sbp->sb_dblocks == 0					||
-	    sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp)			||
-	    sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) {
+	    (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
 		if (loud)
-			XFS_CORRUPTION_ERROR("SB sanity check failed",
-				XFS_ERRLEVEL_LOW, mp, sbp);
+			xfs_warn(mp, "SB sanity check 1 failed");
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	/*
+	 * Sanity check AG count, size fields against data size field
+	 */
+	if (unlikely(
+	    sbp->sb_dblocks == 0 ||
+	    sbp->sb_dblocks >
+	     (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks ||
+	    sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
+			      sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
+		if (loud)
+			xfs_warn(mp, "SB sanity check 2 failed");
 		return XFS_ERROR(EFSCORRUPTED);
 	}
 
@@ -854,8 +864,7 @@ xfs_update_alignment(xfs_mount_t *mp)
 		if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
 		    (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
 			if (mp->m_flags & XFS_MOUNT_RETERR) {
-				xfs_warn(mp, "alignment check failed: "
-					 "(sunit/swidth vs. blocksize)");
+				xfs_warn(mp, "alignment check 1 failed");
 				return XFS_ERROR(EINVAL);
 			}
 			mp->m_dalign = mp->m_swidth = 0;
@@ -866,8 +875,6 @@ xfs_update_alignment(xfs_mount_t *mp)
 			mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
 			if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
 				if (mp->m_flags & XFS_MOUNT_RETERR) {
-					xfs_warn(mp, "alignment check failed: "
-						 "(sunit/swidth vs. ag size)");
 					return XFS_ERROR(EINVAL);
 				}
 				xfs_warn(mp,
@@ -882,8 +889,8 @@ xfs_update_alignment(xfs_mount_t *mp)
 				mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
 			} else {
 				if (mp->m_flags & XFS_MOUNT_RETERR) {
-					xfs_warn(mp, "alignment check failed: "
-						"sunit(%d) less than bsize(%d)",
+					xfs_warn(mp,
+		"stripe alignment turned off: sunit(%d) less than bsize(%d)",
 						mp->m_dalign,
 						mp->m_blockmask +1);
 					return XFS_ERROR(EINVAL);
@@ -1089,6 +1096,10 @@ xfs_mount_reset_sbqflags(
 	if (mp->m_flags & XFS_MOUNT_RDONLY)
 		return 0;
 
+#ifdef QUOTADEBUG
+	xfs_notice(mp, "Writing superblock quota changes");
+#endif
+
 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
 	error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
 				      XFS_DEFAULT_LOG_COUNT);
@@ -1521,7 +1532,7 @@ xfs_unmountfs(
 		xfs_warn(mp, "Unable to free reserved block pool. "
 				"Freespace may not be correct on next mount.");
 
-	error = xfs_log_sbcount(mp);
+	error = xfs_log_sbcount(mp, 1);
 	if (error)
 		xfs_warn(mp, "Unable to update superblock counters. "
 				"Freespace may not be correct on next mount.");
@@ -1557,14 +1568,18 @@ xfs_fs_writable(xfs_mount_t *mp)
 /*
  * xfs_log_sbcount
  *
- * Sync the superblock counters to disk.
+ * Called either periodically to keep the on disk superblock values
+ * roughly up to date or from unmount to make sure the values are
+ * correct on a clean unmount.
  *
  * Note this code can be called during the process of freezing, so
- * we may need to use the transaction allocator which does not
+ * we may need to use the transaction allocator which does not not
  * block when the transaction subsystem is in its frozen state.
  */
 int
-xfs_log_sbcount(xfs_mount_t *mp)
+xfs_log_sbcount(
+	xfs_mount_t	*mp,
+	uint		sync)
 {
 	xfs_trans_t	*tp;
 	int		error;
@@ -1590,7 +1605,8 @@ xfs_log_sbcount(xfs_mount_t *mp)
 	}
 
 	xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS);
-	xfs_trans_set_sync(tp);
+	if (sync)
+		xfs_trans_set_sync(tp);
 	error = xfs_trans_commit(tp, 0);
 	return error;
 }
@@ -1925,19 +1941,22 @@ xfs_mod_incore_sb_batch(
  * the superblock buffer if it can be locked without sleeping.
  * If it can't then we'll return NULL.
  */
-struct xfs_buf *
+xfs_buf_t *
 xfs_getsb(
-	struct xfs_mount	*mp,
-	int			flags)
+	xfs_mount_t	*mp,
+	int		flags)
 {
-	struct xfs_buf		*bp = mp->m_sb_bp;
+	xfs_buf_t	*bp;
 
-	if (!xfs_buf_trylock(bp)) {
-		if (flags & XBF_TRYLOCK)
+	ASSERT(mp->m_sb_bp != NULL);
+	bp = mp->m_sb_bp;
+	if (flags & XBF_TRYLOCK) {
+		if (!XFS_BUF_CPSEMA(bp)) {
 			return NULL;
-		xfs_buf_lock(bp);
+		}
+	} else {
+		XFS_BUF_PSEMA(bp, PRIBIO);
 	}
-
 	XFS_BUF_HOLD(bp);
 	ASSERT(XFS_BUF_ISDONE(bp));
 	return bp;
diff --git a/trunk/fs/xfs/xfs_mount.h b/trunk/fs/xfs/xfs_mount.h
index bb24dac42a25..3d68bb267c5f 100644
--- a/trunk/fs/xfs/xfs_mount.h
+++ b/trunk/fs/xfs/xfs_mount.h
@@ -371,7 +371,7 @@ typedef struct xfs_mod_sb {
 	int64_t		msb_delta;	/* Change to make to specified field */
 } xfs_mod_sb_t;
 
-extern int	xfs_log_sbcount(xfs_mount_t *);
+extern int	xfs_log_sbcount(xfs_mount_t *, uint);
 extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
 extern int	xfs_mountfs(xfs_mount_t *mp);
 
diff --git a/trunk/fs/xfs/xfs_trans.c b/trunk/fs/xfs/xfs_trans.c
index efc147f0e9b6..c83f63b33aae 100644
--- a/trunk/fs/xfs/xfs_trans.c
+++ b/trunk/fs/xfs/xfs_trans.c
@@ -1426,7 +1426,6 @@ xfs_trans_committed(
 static inline void
 xfs_log_item_batch_insert(
 	struct xfs_ail		*ailp,
-	struct xfs_ail_cursor	*cur,
 	struct xfs_log_item	**log_items,
 	int			nr_items,
 	xfs_lsn_t		commit_lsn)
@@ -1435,7 +1434,7 @@ xfs_log_item_batch_insert(
 
 	spin_lock(&ailp->xa_lock);
 	/* xfs_trans_ail_update_bulk drops ailp->xa_lock */
-	xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
+	xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn);
 
 	for (i = 0; i < nr_items; i++)
 		IOP_UNPIN(log_items[i], 0);
@@ -1453,13 +1452,6 @@ xfs_log_item_batch_insert(
  * as an iclog write error even though we haven't started any IO yet. Hence in
  * this case all we need to do is IOP_COMMITTED processing, followed by an
  * IOP_UNPIN(aborted) call.
- *
- * The AIL cursor is used to optimise the insert process. If commit_lsn is not
- * at the end of the AIL, the insert cursor avoids the need to walk
- * the AIL to find the insertion point on every xfs_log_item_batch_insert()
- * call. This saves a lot of needless list walking and is a net win, even
- * though it slightly increases that amount of AIL lock traffic to set it up
- * and tear it down.
  */
 void
 xfs_trans_committed_bulk(
@@ -1471,13 +1463,8 @@ xfs_trans_committed_bulk(
 #define LOG_ITEM_BATCH_SIZE	32
 	struct xfs_log_item	*log_items[LOG_ITEM_BATCH_SIZE];
 	struct xfs_log_vec	*lv;
-	struct xfs_ail_cursor	cur;
 	int			i = 0;
 
-	spin_lock(&ailp->xa_lock);
-	xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
-	spin_unlock(&ailp->xa_lock);
-
 	/* unpin all the log items */
 	for (lv = log_vector; lv; lv = lv->lv_next ) {
 		struct xfs_log_item	*lip = lv->lv_item;
@@ -1506,9 +1493,7 @@ xfs_trans_committed_bulk(
 			/*
 			 * Not a bulk update option due to unusual item_lsn.
 			 * Push into AIL immediately, rechecking the lsn once
-			 * we have the ail lock. Then unpin the item. This does
-			 * not affect the AIL cursor the bulk insert path is
-			 * using.
+			 * we have the ail lock. Then unpin the item.
 			 */
 			spin_lock(&ailp->xa_lock);
 			if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
@@ -1522,7 +1507,7 @@ xfs_trans_committed_bulk(
 		/* Item is a candidate for bulk AIL insert.  */
 		log_items[i++] = lv->lv_item;
 		if (i >= LOG_ITEM_BATCH_SIZE) {
-			xfs_log_item_batch_insert(ailp, &cur, log_items,
+			xfs_log_item_batch_insert(ailp, log_items,
 					LOG_ITEM_BATCH_SIZE, commit_lsn);
 			i = 0;
 		}
@@ -1530,11 +1515,7 @@ xfs_trans_committed_bulk(
 
 	/* make sure we insert the remainder! */
 	if (i)
-		xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
-
-	spin_lock(&ailp->xa_lock);
-	xfs_trans_ail_cursor_done(ailp, &cur);
-	spin_unlock(&ailp->xa_lock);
+		xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn);
 }
 
 /*
diff --git a/trunk/fs/xfs/xfs_trans_ail.c b/trunk/fs/xfs/xfs_trans_ail.c
index 43233e92f0f6..5fc2380092c8 100644
--- a/trunk/fs/xfs/xfs_trans_ail.c
+++ b/trunk/fs/xfs/xfs_trans_ail.c
@@ -163,11 +163,17 @@ xfs_ail_max_lsn(
 }
 
 /*
- * The cursor keeps track of where our current traversal is up to by tracking
- * the next item in the list for us. However, for this to be safe, removing an
- * object from the AIL needs to invalidate any cursor that points to it. hence
- * the traversal cursor needs to be linked to the struct xfs_ail so that
- * deletion can search all the active cursors for invalidation.
+ * AIL traversal cursor initialisation.
+ *
+ * The cursor keeps track of where our current traversal is up
+ * to by tracking the next ƣtem in the list for us. However, for
+ * this to be safe, removing an object from the AIL needs to invalidate
+ * any cursor that points to it. hence the traversal cursor needs to
+ * be linked to the struct xfs_ail so that deletion can search all the
+ * active cursors for invalidation.
+ *
+ * We don't link the push cursor because it is embedded in the struct
+ * xfs_ail and hence easily findable.
  */
 STATIC void
 xfs_trans_ail_cursor_init(
@@ -175,12 +181,31 @@ xfs_trans_ail_cursor_init(
 	struct xfs_ail_cursor	*cur)
 {
 	cur->item = NULL;
-	list_add_tail(&cur->list, &ailp->xa_cursors);
+	if (cur == &ailp->xa_cursors)
+		return;
+
+	cur->next = ailp->xa_cursors.next;
+	ailp->xa_cursors.next = cur;
+}
+
+/*
+ * Set the cursor to the next item, because when we look
+ * up the cursor the current item may have been freed.
+ */
+STATIC void
+xfs_trans_ail_cursor_set(
+	struct xfs_ail		*ailp,
+	struct xfs_ail_cursor	*cur,
+	struct xfs_log_item	*lip)
+{
+	if (lip)
+		cur->item = xfs_ail_next(ailp, lip);
 }
 
 /*
- * Get the next item in the traversal and advance the cursor.  If the cursor
- * was invalidated (indicated by a lip of 1), restart the traversal.
+ * Get the next item in the traversal and advance the cursor.
+ * If the cursor was invalidated (inidicated by a lip of 1),
+ * restart the traversal.
  */
 struct xfs_log_item *
 xfs_trans_ail_cursor_next(
@@ -191,31 +216,45 @@ xfs_trans_ail_cursor_next(
 
 	if ((__psint_t)lip & 1)
 		lip = xfs_ail_min(ailp);
-	if (lip)
-		cur->item = xfs_ail_next(ailp, lip);
+	xfs_trans_ail_cursor_set(ailp, cur, lip);
 	return lip;
 }
 
 /*
- * When the traversal is complete, we need to remove the cursor from the list
- * of traversing cursors.
+ * Now that the traversal is complete, we need to remove the cursor
+ * from the list of traversing cursors. Avoid removing the embedded
+ * push cursor, but use the fact it is always present to make the
+ * list deletion simple.
  */
 void
 xfs_trans_ail_cursor_done(
 	struct xfs_ail		*ailp,
-	struct xfs_ail_cursor	*cur)
+	struct xfs_ail_cursor	*done)
 {
-	cur->item = NULL;
-	list_del_init(&cur->list);
+	struct xfs_ail_cursor	*prev = NULL;
+	struct xfs_ail_cursor	*cur;
+
+	done->item = NULL;
+	if (done == &ailp->xa_cursors)
+		return;
+	prev = &ailp->xa_cursors;
+	for (cur = prev->next; cur; prev = cur, cur = prev->next) {
+		if (cur == done) {
+			prev->next = cur->next;
+			break;
+		}
+	}
+	ASSERT(cur);
 }
 
 /*
- * Invalidate any cursor that is pointing to this item. This is called when an
- * item is removed from the AIL. Any cursor pointing to this object is now
- * invalid and the traversal needs to be terminated so it doesn't reference a
- * freed object. We set the low bit of the cursor item pointer so we can
- * distinguish between an invalidation and the end of the list when getting the
- * next item from the cursor.
+ * Invalidate any cursor that is pointing to this item. This is
+ * called when an item is removed from the AIL. Any cursor pointing
+ * to this object is now invalid and the traversal needs to be
+ * terminated so it doesn't reference a freed object. We set the
+ * cursor item to a value of 1 so we can distinguish between an
+ * invalidation and the end of the list when getting the next item
+ * from the cursor.
  */
 STATIC void
 xfs_trans_ail_cursor_clear(
@@ -224,7 +263,8 @@ xfs_trans_ail_cursor_clear(
 {
 	struct xfs_ail_cursor	*cur;
 
-	list_for_each_entry(cur, &ailp->xa_cursors, list) {
+	/* need to search all cursors */
+	for (cur = &ailp->xa_cursors; cur; cur = cur->next) {
 		if (cur->item == lip)
 			cur->item = (struct xfs_log_item *)
 					((__psint_t)cur->item | 1);
@@ -232,10 +272,9 @@ xfs_trans_ail_cursor_clear(
 }
 
 /*
- * Find the first item in the AIL with the given @lsn by searching in ascending
- * LSN order and initialise the cursor to point to the next item for a
- * ascending traversal.  Pass a @lsn of zero to initialise the cursor to the
- * first item in the AIL. Returns NULL if the list is empty.
+ * Return the item in the AIL with the current lsn.
+ * Return the current tree generation number for use
+ * in calls to xfs_trans_next_ail().
  */
 xfs_log_item_t *
 xfs_trans_ail_cursor_first(
@@ -246,112 +285,46 @@ xfs_trans_ail_cursor_first(
 	xfs_log_item_t		*lip;
 
 	xfs_trans_ail_cursor_init(ailp, cur);
-
-	if (lsn == 0) {
-		lip = xfs_ail_min(ailp);
+	lip = xfs_ail_min(ailp);
+	if (lsn == 0)
 		goto out;
-	}
 
 	list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
 		if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
 			goto out;
 	}
-	return NULL;
-
+	lip = NULL;
 out:
-	if (lip)
-		cur->item = xfs_ail_next(ailp, lip);
+	xfs_trans_ail_cursor_set(ailp, cur, lip);
 	return lip;
 }
 
-static struct xfs_log_item *
-__xfs_trans_ail_cursor_last(
-	struct xfs_ail		*ailp,
-	xfs_lsn_t		lsn)
-{
-	xfs_log_item_t		*lip;
-
-	list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) {
-		if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
-			return lip;
-	}
-	return NULL;
-}
-
-/*
- * Find the last item in the AIL with the given @lsn by searching in descending
- * LSN order and initialise the cursor to point to that item.  If there is no
- * item with the value of @lsn, then it sets the cursor to the last item with an
- * LSN lower than @lsn.  Returns NULL if the list is empty.
- */
-struct xfs_log_item *
-xfs_trans_ail_cursor_last(
-	struct xfs_ail		*ailp,
-	struct xfs_ail_cursor	*cur,
-	xfs_lsn_t		lsn)
-{
-	xfs_trans_ail_cursor_init(ailp, cur);
-	cur->item = __xfs_trans_ail_cursor_last(ailp, lsn);
-	return cur->item;
-}
-
 /*
- * Splice the log item list into the AIL at the given LSN. We splice to the
- * tail of the given LSN to maintain insert order for push traversals. The
- * cursor is optional, allowing repeated updates to the same LSN to avoid
- * repeated traversals.
+ * splice the log item list into the AIL at the given LSN.
  */
 static void
 xfs_ail_splice(
-	struct xfs_ail		*ailp,
-	struct xfs_ail_cursor	*cur,
-	struct list_head	*list,
-	xfs_lsn_t		lsn)
+	struct xfs_ail  *ailp,
+	struct list_head *list,
+	xfs_lsn_t       lsn)
 {
-	struct xfs_log_item	*lip = cur ? cur->item : NULL;
-	struct xfs_log_item	*next_lip;
+	xfs_log_item_t  *next_lip;
 
-	/*
-	 * Get a new cursor if we don't have a placeholder or the existing one
-	 * has been invalidated.
-	 */
-	if (!lip || (__psint_t)lip & 1) {
-		lip = __xfs_trans_ail_cursor_last(ailp, lsn);
-
-		if (!lip) {
-			/* The list is empty, so just splice and return.  */
-			if (cur)
-				cur->item = NULL;
-			list_splice(list, &ailp->xa_ail);
-			return;
-		}
+	/* If the list is empty, just insert the item.  */
+	if (list_empty(&ailp->xa_ail)) {
+		list_splice(list, &ailp->xa_ail);
+		return;
 	}
 
-	/*
-	 * Our cursor points to the item we want to insert _after_, so we have
-	 * to update the cursor to point to the end of the list we are splicing
-	 * in so that it points to the correct location for the next splice.
-	 * i.e. before the splice
-	 *
-	 *  lsn -> lsn -> lsn + x -> lsn + x ...
-	 *          ^
-	 *          | cursor points here
-	 *
-	 * After the splice we have:
-	 *
-	 *  lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ...
-	 *          ^                            ^
-	 *          | cursor points here         | needs to move here
-	 *
-	 * So we set the cursor to the last item in the list to be spliced
-	 * before we execute the splice, resulting in the cursor pointing to
-	 * the correct item after the splice occurs.
-	 */
-	if (cur) {
-		next_lip = list_entry(list->prev, struct xfs_log_item, li_ail);
-		cur->item = next_lip;
+	list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
+		if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
+			break;
 	}
-	list_splice(list, &lip->li_ail);
+
+	ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
+	       XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0);
+
+	list_splice_init(list, &next_lip->li_ail);
 }
 
 /*
@@ -378,7 +351,7 @@ xfs_ail_worker(
 	struct xfs_ail		*ailp = container_of(to_delayed_work(work),
 					struct xfs_ail, xa_work);
 	xfs_mount_t		*mp = ailp->xa_mount;
-	struct xfs_ail_cursor	cur;
+	struct xfs_ail_cursor	*cur = &ailp->xa_cursors;
 	xfs_log_item_t		*lip;
 	xfs_lsn_t		lsn;
 	xfs_lsn_t		target;
@@ -390,12 +363,13 @@ xfs_ail_worker(
 
 	spin_lock(&ailp->xa_lock);
 	target = ailp->xa_target;
-	lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn);
+	xfs_trans_ail_cursor_init(ailp, cur);
+	lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn);
 	if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
 		/*
 		 * AIL is empty or our push has reached the end.
 		 */
-		xfs_trans_ail_cursor_done(ailp, &cur);
+		xfs_trans_ail_cursor_done(ailp, cur);
 		spin_unlock(&ailp->xa_lock);
 		goto out_done;
 	}
@@ -483,12 +457,12 @@ xfs_ail_worker(
 		if (stuck > 100)
 			break;
 
-		lip = xfs_trans_ail_cursor_next(ailp, &cur);
+		lip = xfs_trans_ail_cursor_next(ailp, cur);
 		if (lip == NULL)
 			break;
 		lsn = lip->li_lsn;
 	}
-	xfs_trans_ail_cursor_done(ailp, &cur);
+	xfs_trans_ail_cursor_done(ailp, cur);
 	spin_unlock(&ailp->xa_lock);
 
 	if (flush_log) {
@@ -671,7 +645,6 @@ xfs_trans_unlocked_item(
 void
 xfs_trans_ail_update_bulk(
 	struct xfs_ail		*ailp,
-	struct xfs_ail_cursor	*cur,
 	struct xfs_log_item	**log_items,
 	int			nr_items,
 	xfs_lsn_t		lsn) __releases(ailp->xa_lock)
@@ -701,7 +674,7 @@ xfs_trans_ail_update_bulk(
 		list_add(&lip->li_ail, &tmp);
 	}
 
-	xfs_ail_splice(ailp, cur, &tmp, lsn);
+	xfs_ail_splice(ailp, &tmp, lsn);
 
 	if (!mlip_changed) {
 		spin_unlock(&ailp->xa_lock);
@@ -820,7 +793,6 @@ xfs_trans_ail_init(
 
 	ailp->xa_mount = mp;
 	INIT_LIST_HEAD(&ailp->xa_ail);
-	INIT_LIST_HEAD(&ailp->xa_cursors);
 	spin_lock_init(&ailp->xa_lock);
 	INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker);
 	mp->m_ail = ailp;
diff --git a/trunk/fs/xfs/xfs_trans_buf.c b/trunk/fs/xfs/xfs_trans_buf.c
index 15584fc3ed7d..03b3b7f85a3b 100644
--- a/trunk/fs/xfs/xfs_trans_buf.c
+++ b/trunk/fs/xfs/xfs_trans_buf.c
@@ -81,7 +81,7 @@ _xfs_trans_bjoin(
 	struct xfs_buf_log_item	*bip;
 
 	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(bp->b_transp == NULL);
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
 
 	/*
 	 * The xfs_buf_log_item pointer is stored in b_fsprivate.  If
@@ -89,7 +89,7 @@ _xfs_trans_bjoin(
 	 * The checks to see if one is there are in xfs_buf_item_init().
 	 */
 	xfs_buf_item_init(bp, tp->t_mountp);
-	bip = bp->b_fspriv;
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
 	ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
 	ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
@@ -110,7 +110,7 @@ _xfs_trans_bjoin(
 	 * Initialize b_fsprivate2 so we can find it with incore_match()
 	 * in xfs_trans_get_buf() and friends above.
 	 */
-	bp->b_transp = tp;
+	XFS_BUF_SET_FSPRIVATE2(bp, tp);
 
 }
 
@@ -160,7 +160,7 @@ xfs_trans_get_buf(xfs_trans_t	*tp,
 	 */
 	bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
 	if (bp != NULL) {
-		ASSERT(xfs_buf_islocked(bp));
+		ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
 		if (XFS_FORCED_SHUTDOWN(tp->t_mountp))
 			XFS_BUF_SUPER_STALE(bp);
 
@@ -172,8 +172,8 @@ xfs_trans_get_buf(xfs_trans_t	*tp,
 		else if (XFS_BUF_ISSTALE(bp))
 			ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
 
-		ASSERT(bp->b_transp == tp);
-		bip = bp->b_fspriv;
+		ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+		bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
 		ASSERT(bip != NULL);
 		ASSERT(atomic_read(&bip->bli_refcount) > 0);
 		bip->bli_recur++;
@@ -232,8 +232,8 @@ xfs_trans_getsb(xfs_trans_t	*tp,
 	 * recursion count and return the buffer to the caller.
 	 */
 	bp = mp->m_sb_bp;
-	if (bp->b_transp == tp) {
-		bip = bp->b_fspriv;
+	if (XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp) {
+		bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
 		ASSERT(bip != NULL);
 		ASSERT(atomic_read(&bip->bli_refcount) > 0);
 		bip->bli_recur++;
@@ -327,9 +327,9 @@ xfs_trans_read_buf(
 	 */
 	bp = xfs_trans_buf_item_match(tp, target, blkno, len);
 	if (bp != NULL) {
-		ASSERT(xfs_buf_islocked(bp));
-		ASSERT(bp->b_transp == tp);
-		ASSERT(bp->b_fspriv != NULL);
+		ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+		ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+		ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
 		ASSERT((XFS_BUF_ISERROR(bp)) == 0);
 		if (!(XFS_BUF_ISDONE(bp))) {
 			trace_xfs_trans_read_buf_io(bp, _RET_IP_);
@@ -363,7 +363,7 @@ xfs_trans_read_buf(
 		}
 
 
-		bip = bp->b_fspriv;
+		bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
 		bip->bli_recur++;
 
 		ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -460,30 +460,32 @@ xfs_trans_brelse(xfs_trans_t	*tp,
 		 xfs_buf_t	*bp)
 {
 	xfs_buf_log_item_t	*bip;
+	xfs_log_item_t		*lip;
 
 	/*
 	 * Default to a normal brelse() call if the tp is NULL.
 	 */
 	if (tp == NULL) {
-		struct xfs_log_item	*lip = bp->b_fspriv;
-
-		ASSERT(bp->b_transp == NULL);
-
+		ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
 		/*
 		 * If there's a buf log item attached to the buffer,
 		 * then let the AIL know that the buffer is being
 		 * unlocked.
 		 */
-		if (lip != NULL && lip->li_type == XFS_LI_BUF) {
-			bip = bp->b_fspriv;
-			xfs_trans_unlocked_item(bip->bli_item.li_ailp, lip);
+		if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
+			lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+			if (lip->li_type == XFS_LI_BUF) {
+				bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
+				xfs_trans_unlocked_item(bip->bli_item.li_ailp,
+							lip);
+			}
 		}
 		xfs_buf_relse(bp);
 		return;
 	}
 
-	ASSERT(bp->b_transp == tp);
-	bip = bp->b_fspriv;
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
 	ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
 	ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
@@ -554,7 +556,7 @@ xfs_trans_brelse(xfs_trans_t	*tp,
 		xfs_buf_item_relse(bp);
 		bip = NULL;
 	}
-	bp->b_transp = NULL;
+	XFS_BUF_SET_FSPRIVATE2(bp, NULL);
 
 	/*
 	 * If we've still got a buf log item on the buffer, then
@@ -579,15 +581,16 @@ void
 xfs_trans_bhold(xfs_trans_t	*tp,
 		xfs_buf_t	*bp)
 {
-	xfs_buf_log_item_t	*bip = bp->b_fspriv;
+	xfs_buf_log_item_t	*bip;
 
 	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(bp->b_transp == tp);
-	ASSERT(bip != NULL);
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
 	ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
-
 	bip->bli_flags |= XFS_BLI_HOLD;
 	trace_xfs_trans_bhold(bip);
 }
@@ -600,17 +603,19 @@ void
 xfs_trans_bhold_release(xfs_trans_t	*tp,
 			xfs_buf_t	*bp)
 {
-	xfs_buf_log_item_t	*bip = bp->b_fspriv;
+	xfs_buf_log_item_t	*bip;
 
 	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(bp->b_transp == tp);
-	ASSERT(bip != NULL);
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
 	ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
 	ASSERT(bip->bli_flags & XFS_BLI_HOLD);
-
 	bip->bli_flags &= ~XFS_BLI_HOLD;
+
 	trace_xfs_trans_bhold_release(bip);
 }
 
@@ -629,14 +634,14 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
 		  uint		first,
 		  uint		last)
 {
-	xfs_buf_log_item_t	*bip = bp->b_fspriv;
+	xfs_buf_log_item_t	*bip;
 
 	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(bp->b_transp == tp);
-	ASSERT(bip != NULL);
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
 	ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp)));
-	ASSERT(bp->b_iodone == NULL ||
-	       bp->b_iodone == xfs_buf_iodone_callbacks);
+	ASSERT((XFS_BUF_IODONE_FUNC(bp) == NULL) ||
+	       (XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks));
 
 	/*
 	 * Mark the buffer as needing to be written out eventually,
@@ -651,8 +656,9 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
 	XFS_BUF_DELAYWRITE(bp);
 	XFS_BUF_DONE(bp);
 
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
-	bp->b_iodone = xfs_buf_iodone_callbacks;
+	XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
 	bip->bli_item.li_cb = xfs_buf_iodone;
 
 	trace_xfs_trans_log_buf(bip);
@@ -700,11 +706,13 @@ xfs_trans_binval(
 	xfs_trans_t	*tp,
 	xfs_buf_t	*bp)
 {
-	xfs_buf_log_item_t	*bip = bp->b_fspriv;
+	xfs_buf_log_item_t	*bip;
 
 	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(bp->b_transp == tp);
-	ASSERT(bip != NULL);
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
 
 	trace_xfs_trans_binval(bip);
@@ -772,11 +780,13 @@ xfs_trans_inode_buf(
 	xfs_trans_t	*tp,
 	xfs_buf_t	*bp)
 {
-	xfs_buf_log_item_t	*bip = bp->b_fspriv;
+	xfs_buf_log_item_t	*bip;
 
 	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(bp->b_transp == tp);
-	ASSERT(bip != NULL);
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
 
 	bip->bli_flags |= XFS_BLI_INODE_BUF;
@@ -796,11 +806,13 @@ xfs_trans_stale_inode_buf(
 	xfs_trans_t	*tp,
 	xfs_buf_t	*bp)
 {
-	xfs_buf_log_item_t	*bip = bp->b_fspriv;
+	xfs_buf_log_item_t	*bip;
 
 	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(bp->b_transp == tp);
-	ASSERT(bip != NULL);
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
 
 	bip->bli_flags |= XFS_BLI_STALE_INODE;
@@ -821,11 +833,13 @@ xfs_trans_inode_alloc_buf(
 	xfs_trans_t	*tp,
 	xfs_buf_t	*bp)
 {
-	xfs_buf_log_item_t	*bip = bp->b_fspriv;
+	xfs_buf_log_item_t	*bip;
 
 	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(bp->b_transp == tp);
-	ASSERT(bip != NULL);
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
 
 	bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
@@ -849,14 +863,16 @@ xfs_trans_dquot_buf(
 	xfs_buf_t	*bp,
 	uint		type)
 {
-	xfs_buf_log_item_t	*bip = bp->b_fspriv;
+	xfs_buf_log_item_t	*bip;
 
 	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(bp->b_transp == tp);
-	ASSERT(bip != NULL);
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
 	ASSERT(type == XFS_BLF_UDQUOT_BUF ||
 	       type == XFS_BLF_PDQUOT_BUF ||
 	       type == XFS_BLF_GDQUOT_BUF);
+
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
 
 	bip->bli_format.blf_flags |= type;
diff --git a/trunk/fs/xfs/xfs_trans_inode.c b/trunk/fs/xfs/xfs_trans_inode.c
index c8dea2fd7e68..048b0c689d3e 100644
--- a/trunk/fs/xfs/xfs_trans_inode.c
+++ b/trunk/fs/xfs/xfs_trans_inode.c
@@ -55,6 +55,7 @@ xfs_trans_ijoin(
 {
 	xfs_inode_log_item_t	*iip;
 
+	ASSERT(ip->i_transp == NULL);
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 	if (ip->i_itemp == NULL)
 		xfs_inode_item_init(ip, ip->i_mount);
@@ -67,6 +68,12 @@ xfs_trans_ijoin(
 	xfs_trans_add_item(tp, &iip->ili_item);
 
 	xfs_trans_inode_broot_debug(ip);
+
+	/*
+	 * Initialize i_transp so we can find it with xfs_inode_incore()
+	 * in xfs_trans_iget() above.
+	 */
+	ip->i_transp = tp;
 }
 
 /*
@@ -104,6 +111,7 @@ xfs_trans_ichgtime(
 
 	ASSERT(tp);
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(ip->i_transp == tp);
 
 	tv = current_fs_time(inode->i_sb);
 
@@ -132,6 +140,7 @@ xfs_trans_log_inode(
 	xfs_inode_t	*ip,
 	uint		flags)
 {
+	ASSERT(ip->i_transp == tp);
 	ASSERT(ip->i_itemp != NULL);
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
diff --git a/trunk/fs/xfs/xfs_trans_priv.h b/trunk/fs/xfs/xfs_trans_priv.h
index 212946b97239..6b164e9e9a1f 100644
--- a/trunk/fs/xfs/xfs_trans_priv.h
+++ b/trunk/fs/xfs/xfs_trans_priv.h
@@ -53,7 +53,7 @@ void	xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
  * of the list to trigger traversal restarts.
  */
 struct xfs_ail_cursor {
-	struct list_head	list;
+	struct xfs_ail_cursor	*next;
 	struct xfs_log_item	*item;
 };
 
@@ -66,7 +66,7 @@ struct xfs_ail {
 	struct xfs_mount	*xa_mount;
 	struct list_head	xa_ail;
 	xfs_lsn_t		xa_target;
-	struct list_head	xa_cursors;
+	struct xfs_ail_cursor	xa_cursors;
 	spinlock_t		xa_lock;
 	struct delayed_work	xa_work;
 	xfs_lsn_t		xa_last_pushed_lsn;
@@ -82,7 +82,6 @@ struct xfs_ail {
 extern struct workqueue_struct	*xfs_ail_wq;	/* AIL workqueue */
 
 void	xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
-				struct xfs_ail_cursor *cur,
 				struct xfs_log_item **log_items, int nr_items,
 				xfs_lsn_t lsn) __releases(ailp->xa_lock);
 static inline void
@@ -91,7 +90,7 @@ xfs_trans_ail_update(
 	struct xfs_log_item	*lip,
 	xfs_lsn_t		lsn) __releases(ailp->xa_lock)
 {
-	xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
+	xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn);
 }
 
 void	xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
@@ -112,13 +111,10 @@ xfs_lsn_t		xfs_ail_min_lsn(struct xfs_ail *ailp);
 void			xfs_trans_unlocked_item(struct xfs_ail *,
 					xfs_log_item_t *);
 
-struct xfs_log_item *	xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
+struct xfs_log_item	*xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
 					struct xfs_ail_cursor *cur,
 					xfs_lsn_t lsn);
-struct xfs_log_item *	xfs_trans_ail_cursor_last(struct xfs_ail *ailp,
-					struct xfs_ail_cursor *cur,
-					xfs_lsn_t lsn);
-struct xfs_log_item *	xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
+struct xfs_log_item	*xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
 					struct xfs_ail_cursor *cur);
 void			xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
 					struct xfs_ail_cursor *cur);
diff --git a/trunk/fs/xfs/xfs_vnodeops.c b/trunk/fs/xfs/xfs_vnodeops.c
index 88d121486c52..619720705bc6 100644
--- a/trunk/fs/xfs/xfs_vnodeops.c
+++ b/trunk/fs/xfs/xfs_vnodeops.c
@@ -50,6 +50,430 @@
 #include "xfs_vnodeops.h"
 #include "xfs_trace.h"
 
+int
+xfs_setattr(
+	struct xfs_inode	*ip,
+	struct iattr		*iattr,
+	int			flags)
+{
+	xfs_mount_t		*mp = ip->i_mount;
+	struct inode		*inode = VFS_I(ip);
+	int			mask = iattr->ia_valid;
+	xfs_trans_t		*tp;
+	int			code;
+	uint			lock_flags;
+	uint			commit_flags=0;
+	uid_t			uid=0, iuid=0;
+	gid_t			gid=0, igid=0;
+	struct xfs_dquot	*udqp, *gdqp, *olddquot1, *olddquot2;
+	int			need_iolock = 1;
+
+	trace_xfs_setattr(ip);
+
+	if (mp->m_flags & XFS_MOUNT_RDONLY)
+		return XFS_ERROR(EROFS);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	code = -inode_change_ok(inode, iattr);
+	if (code)
+		return code;
+
+	olddquot1 = olddquot2 = NULL;
+	udqp = gdqp = NULL;
+
+	/*
+	 * If disk quotas is on, we make sure that the dquots do exist on disk,
+	 * before we start any other transactions. Trying to do this later
+	 * is messy. We don't care to take a readlock to look at the ids
+	 * in inode here, because we can't hold it across the trans_reserve.
+	 * If the IDs do change before we take the ilock, we're covered
+	 * because the i_*dquot fields will get updated anyway.
+	 */
+	if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
+		uint	qflags = 0;
+
+		if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
+			uid = iattr->ia_uid;
+			qflags |= XFS_QMOPT_UQUOTA;
+		} else {
+			uid = ip->i_d.di_uid;
+		}
+		if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
+			gid = iattr->ia_gid;
+			qflags |= XFS_QMOPT_GQUOTA;
+		}  else {
+			gid = ip->i_d.di_gid;
+		}
+
+		/*
+		 * We take a reference when we initialize udqp and gdqp,
+		 * so it is important that we never blindly double trip on
+		 * the same variable. See xfs_create() for an example.
+		 */
+		ASSERT(udqp == NULL);
+		ASSERT(gdqp == NULL);
+		code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
+					 qflags, &udqp, &gdqp);
+		if (code)
+			return code;
+	}
+
+	/*
+	 * For the other attributes, we acquire the inode lock and
+	 * first do an error checking pass.
+	 */
+	tp = NULL;
+	lock_flags = XFS_ILOCK_EXCL;
+	if (flags & XFS_ATTR_NOLOCK)
+		need_iolock = 0;
+	if (!(mask & ATTR_SIZE)) {
+		tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+		commit_flags = 0;
+		code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp),
+					 0, 0, 0);
+		if (code) {
+			lock_flags = 0;
+			goto error_return;
+		}
+	} else {
+		if (need_iolock)
+			lock_flags |= XFS_IOLOCK_EXCL;
+	}
+
+	xfs_ilock(ip, lock_flags);
+
+	/*
+	 * Change file ownership.  Must be the owner or privileged.
+	 */
+	if (mask & (ATTR_UID|ATTR_GID)) {
+		/*
+		 * These IDs could have changed since we last looked at them.
+		 * But, we're assured that if the ownership did change
+		 * while we didn't have the inode locked, inode's dquot(s)
+		 * would have changed also.
+		 */
+		iuid = ip->i_d.di_uid;
+		igid = ip->i_d.di_gid;
+		gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
+		uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
+
+		/*
+		 * Do a quota reservation only if uid/gid is actually
+		 * going to change.
+		 */
+		if (XFS_IS_QUOTA_RUNNING(mp) &&
+		    ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+		     (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
+			ASSERT(tp);
+			code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
+						capable(CAP_FOWNER) ?
+						XFS_QMOPT_FORCE_RES : 0);
+			if (code)	/* out of quota */
+				goto error_return;
+		}
+	}
+
+	/*
+	 * Truncate file.  Must have write permission and not be a directory.
+	 */
+	if (mask & ATTR_SIZE) {
+		/* Short circuit the truncate case for zero length files */
+		if (iattr->ia_size == 0 &&
+		    ip->i_size == 0 && ip->i_d.di_nextents == 0) {
+			xfs_iunlock(ip, XFS_ILOCK_EXCL);
+			lock_flags &= ~XFS_ILOCK_EXCL;
+			if (mask & ATTR_CTIME) {
+				inode->i_mtime = inode->i_ctime =
+						current_fs_time(inode->i_sb);
+				xfs_mark_inode_dirty_sync(ip);
+			}
+			code = 0;
+			goto error_return;
+		}
+
+		if (S_ISDIR(ip->i_d.di_mode)) {
+			code = XFS_ERROR(EISDIR);
+			goto error_return;
+		} else if (!S_ISREG(ip->i_d.di_mode)) {
+			code = XFS_ERROR(EINVAL);
+			goto error_return;
+		}
+
+		/*
+		 * Make sure that the dquots are attached to the inode.
+		 */
+		code = xfs_qm_dqattach_locked(ip, 0);
+		if (code)
+			goto error_return;
+
+		/*
+		 * Now we can make the changes.  Before we join the inode
+		 * to the transaction, if ATTR_SIZE is set then take care of
+		 * the part of the truncation that must be done without the
+		 * inode lock.  This needs to be done before joining the inode
+		 * to the transaction, because the inode cannot be unlocked
+		 * once it is a part of the transaction.
+		 */
+		if (iattr->ia_size > ip->i_size) {
+			/*
+			 * Do the first part of growing a file: zero any data
+			 * in the last block that is beyond the old EOF.  We
+			 * need to do this before the inode is joined to the
+			 * transaction to modify the i_size.
+			 */
+			code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
+			if (code)
+				goto error_return;
+		}
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		lock_flags &= ~XFS_ILOCK_EXCL;
+
+		/*
+		 * We are going to log the inode size change in this
+		 * transaction so any previous writes that are beyond the on
+		 * disk EOF and the new EOF that have not been written out need
+		 * to be written here. If we do not write the data out, we
+		 * expose ourselves to the null files problem.
+		 *
+		 * Only flush from the on disk size to the smaller of the in
+		 * memory file size or the new size as that's the range we
+		 * really care about here and prevents waiting for other data
+		 * not within the range we care about here.
+		 */
+		if (ip->i_size != ip->i_d.di_size &&
+		    iattr->ia_size > ip->i_d.di_size) {
+			code = xfs_flush_pages(ip,
+					ip->i_d.di_size, iattr->ia_size,
+					XBF_ASYNC, FI_NONE);
+			if (code)
+				goto error_return;
+		}
+
+		/* wait for all I/O to complete */
+		xfs_ioend_wait(ip);
+
+		code = -block_truncate_page(inode->i_mapping, iattr->ia_size,
+					    xfs_get_blocks);
+		if (code)
+			goto error_return;
+
+		tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
+		code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+					 XFS_TRANS_PERM_LOG_RES,
+					 XFS_ITRUNCATE_LOG_COUNT);
+		if (code)
+			goto error_return;
+
+		truncate_setsize(inode, iattr->ia_size);
+
+		commit_flags = XFS_TRANS_RELEASE_LOG_RES;
+		lock_flags |= XFS_ILOCK_EXCL;
+
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+		xfs_trans_ijoin(tp, ip);
+
+		/*
+		 * Only change the c/mtime if we are changing the size
+		 * or we are explicitly asked to change it. This handles
+		 * the semantic difference between truncate() and ftruncate()
+		 * as implemented in the VFS.
+		 *
+		 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME
+		 * is a special case where we need to update the times despite
+		 * not having these flags set.  For all other operations the
+		 * VFS set these flags explicitly if it wants a timestamp
+		 * update.
+		 */
+		if (iattr->ia_size != ip->i_size &&
+		    (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
+			iattr->ia_ctime = iattr->ia_mtime =
+				current_fs_time(inode->i_sb);
+			mask |= ATTR_CTIME | ATTR_MTIME;
+		}
+
+		if (iattr->ia_size > ip->i_size) {
+			ip->i_d.di_size = iattr->ia_size;
+			ip->i_size = iattr->ia_size;
+			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+		} else if (iattr->ia_size <= ip->i_size ||
+			   (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
+			/*
+			 * signal a sync transaction unless
+			 * we're truncating an already unlinked
+			 * file on a wsync filesystem
+			 */
+			code = xfs_itruncate_finish(&tp, ip, iattr->ia_size,
+					    XFS_DATA_FORK,
+					    ((ip->i_d.di_nlink != 0 ||
+					      !(mp->m_flags & XFS_MOUNT_WSYNC))
+					     ? 1 : 0));
+			if (code)
+				goto abort_return;
+			/*
+			 * Truncated "down", so we're removing references
+			 * to old data here - if we now delay flushing for
+			 * a long time, we expose ourselves unduly to the
+			 * notorious NULL files problem.  So, we mark this
+			 * vnode and flush it when the file is closed, and
+			 * do not wait the usual (long) time for writeout.
+			 */
+			xfs_iflags_set(ip, XFS_ITRUNCATED);
+		}
+	} else if (tp) {
+		xfs_trans_ijoin(tp, ip);
+	}
+
+	/*
+	 * Change file ownership.  Must be the owner or privileged.
+	 */
+	if (mask & (ATTR_UID|ATTR_GID)) {
+		/*
+		 * CAP_FSETID overrides the following restrictions:
+		 *
+		 * The set-user-ID and set-group-ID bits of a file will be
+		 * cleared upon successful return from chown()
+		 */
+		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+		    !capable(CAP_FSETID)) {
+			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+		}
+
+		/*
+		 * Change the ownerships and register quota modifications
+		 * in the transaction.
+		 */
+		if (iuid != uid) {
+			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
+				ASSERT(mask & ATTR_UID);
+				ASSERT(udqp);
+				olddquot1 = xfs_qm_vop_chown(tp, ip,
+							&ip->i_udquot, udqp);
+			}
+			ip->i_d.di_uid = uid;
+			inode->i_uid = uid;
+		}
+		if (igid != gid) {
+			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
+				ASSERT(!XFS_IS_PQUOTA_ON(mp));
+				ASSERT(mask & ATTR_GID);
+				ASSERT(gdqp);
+				olddquot2 = xfs_qm_vop_chown(tp, ip,
+							&ip->i_gdquot, gdqp);
+			}
+			ip->i_d.di_gid = gid;
+			inode->i_gid = gid;
+		}
+	}
+
+	/*
+	 * Change file access modes.
+	 */
+	if (mask & ATTR_MODE) {
+		umode_t mode = iattr->ia_mode;
+
+		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+			mode &= ~S_ISGID;
+
+		ip->i_d.di_mode &= S_IFMT;
+		ip->i_d.di_mode |= mode & ~S_IFMT;
+
+		inode->i_mode &= S_IFMT;
+		inode->i_mode |= mode & ~S_IFMT;
+	}
+
+	/*
+	 * Change file access or modified times.
+	 */
+	if (mask & ATTR_ATIME) {
+		inode->i_atime = iattr->ia_atime;
+		ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
+		ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
+		ip->i_update_core = 1;
+	}
+	if (mask & ATTR_CTIME) {
+		inode->i_ctime = iattr->ia_ctime;
+		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+		ip->i_update_core = 1;
+	}
+	if (mask & ATTR_MTIME) {
+		inode->i_mtime = iattr->ia_mtime;
+		ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+		ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+		ip->i_update_core = 1;
+	}
+
+	/*
+	 * And finally, log the inode core if any attribute in it
+	 * has been changed.
+	 */
+	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE|
+		    ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
+		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+	XFS_STATS_INC(xs_ig_attrchg);
+
+	/*
+	 * If this is a synchronous mount, make sure that the
+	 * transaction goes to disk before returning to the user.
+	 * This is slightly sub-optimal in that truncates require
+	 * two sync transactions instead of one for wsync filesystems.
+	 * One for the truncate and one for the timestamps since we
+	 * don't want to change the timestamps unless we're sure the
+	 * truncate worked.  Truncates are less than 1% of the laddis
+	 * mix so this probably isn't worth the trouble to optimize.
+	 */
+	code = 0;
+	if (mp->m_flags & XFS_MOUNT_WSYNC)
+		xfs_trans_set_sync(tp);
+
+	code = xfs_trans_commit(tp, commit_flags);
+
+	xfs_iunlock(ip, lock_flags);
+
+	/*
+	 * Release any dquot(s) the inode had kept before chown.
+	 */
+	xfs_qm_dqrele(olddquot1);
+	xfs_qm_dqrele(olddquot2);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
+
+	if (code)
+		return code;
+
+	/*
+	 * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
+	 * 	     update.  We could avoid this with linked transactions
+	 * 	     and passing down the transaction pointer all the way
+	 *	     to attr_set.  No previous user of the generic
+	 * 	     Posix ACL code seems to care about this issue either.
+	 */
+	if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+		code = -xfs_acl_chmod(inode);
+		if (code)
+			return XFS_ERROR(code);
+	}
+
+	return 0;
+
+ abort_return:
+	commit_flags |= XFS_TRANS_ABORT;
+ error_return:
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
+	if (tp) {
+		xfs_trans_cancel(tp, commit_flags);
+	}
+	if (lock_flags != 0) {
+		xfs_iunlock(ip, lock_flags);
+	}
+	return code;
+}
+
 /*
  * The maximum pathlen is 1024 bytes. Since the minimum file system
  * blocksize is 512 bytes, we can get a max of 2 extents back from
@@ -197,6 +621,13 @@ xfs_free_eofblocks(
 		 */
 		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
 
+		/*
+		 * Do the xfs_itruncate_start() call before
+		 * reserving any log space because
+		 * itruncate_start will call into the buffer
+		 * cache and we can't
+		 * do that within a transaction.
+		 */
 		if (flags & XFS_FREE_EOF_TRYLOCK) {
 			if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
 				xfs_trans_cancel(tp, 0);
@@ -205,6 +636,13 @@ xfs_free_eofblocks(
 		} else {
 			xfs_ilock(ip, XFS_IOLOCK_EXCL);
 		}
+		error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
+				    ip->i_size);
+		if (error) {
+			xfs_trans_cancel(tp, 0);
+			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+			return error;
+		}
 
 		error = xfs_trans_reserve(tp, 0,
 					  XFS_ITRUNCATE_LOG_RES(mp),
@@ -220,12 +658,15 @@ xfs_free_eofblocks(
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		xfs_trans_ijoin(tp, ip);
 
-		error = xfs_itruncate_data(&tp, ip, ip->i_size);
+		error = xfs_itruncate_finish(&tp, ip,
+					     ip->i_size,
+					     XFS_DATA_FORK,
+					     0);
+		/*
+		 * If we get an error at this point we
+		 * simply don't bother truncating the file.
+		 */
 		if (error) {
-			/*
-			 * If we get an error at this point we simply don't
-			 * bother truncating the file.
-			 */
 			xfs_trans_cancel(tp,
 					 (XFS_TRANS_RELEASE_LOG_RES |
 					  XFS_TRANS_ABORT));
@@ -643,9 +1084,20 @@ xfs_inactive(
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
 	if (truncate) {
+		/*
+		 * Do the xfs_itruncate_start() call before
+		 * reserving any log space because itruncate_start
+		 * will call into the buffer cache and we can't
+		 * do that within a transaction.
+		 */
 		xfs_ilock(ip, XFS_IOLOCK_EXCL);
 
-		xfs_ioend_wait(ip);
+		error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0);
+		if (error) {
+			xfs_trans_cancel(tp, 0);
+			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+			return VN_INACTIVE_CACHE;
+		}
 
 		error = xfs_trans_reserve(tp, 0,
 					  XFS_ITRUNCATE_LOG_RES(mp),
@@ -662,7 +1114,16 @@ xfs_inactive(
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		xfs_trans_ijoin(tp, ip);
 
-		error = xfs_itruncate_data(&tp, ip, 0);
+		/*
+		 * normally, we have to run xfs_itruncate_finish sync.
+		 * But if filesystem is wsync and we're in the inactive
+		 * path, then we know that nlink == 0, and that the
+		 * xaction that made nlink == 0 is permanently committed
+		 * since xfs_remove runs as a synchronous transaction.
+		 */
+		error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK,
+				(!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0));
+
 		if (error) {
 			xfs_trans_cancel(tp,
 				XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -1969,8 +2430,6 @@ xfs_zero_remaining_bytes(
 	if (!bp)
 		return XFS_ERROR(ENOMEM);
 
-	xfs_buf_unlock(bp);
-
 	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
 		offset_fsb = XFS_B_TO_FSBT(mp, offset);
 		nimap = 1;
@@ -2325,7 +2784,7 @@ xfs_change_file_space(
 		iattr.ia_valid = ATTR_SIZE;
 		iattr.ia_size = startoffset;
 
-		error = xfs_setattr_size(ip, &iattr, attr_flags);
+		error = xfs_setattr(ip, &iattr, attr_flags);
 
 		if (error)
 			return error;
diff --git a/trunk/fs/xfs/xfs_vnodeops.h b/trunk/fs/xfs/xfs_vnodeops.h
index 35d3d513e1e9..3bcd23353d6c 100644
--- a/trunk/fs/xfs/xfs_vnodeops.h
+++ b/trunk/fs/xfs/xfs_vnodeops.h
@@ -13,8 +13,7 @@ struct xfs_inode;
 struct xfs_iomap;
 
 
-int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags);
-int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags);
+int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
 #define	XFS_ATTR_DMI		0x01	/* invocation from a DMI function */
 #define	XFS_ATTR_NONBLOCK	0x02	/* return EAGAIN if operation would block */
 #define XFS_ATTR_NOLOCK		0x04	/* Don't grab any conflicting locks */
diff --git a/trunk/include/linux/mfd/tmio.h b/trunk/include/linux/mfd/tmio.h
index 0dc98044d8b7..5a90266c3a5a 100644
--- a/trunk/include/linux/mfd/tmio.h
+++ b/trunk/include/linux/mfd/tmio.h
@@ -68,11 +68,6 @@
  * controller and report the event to the driver.
  */
 #define TMIO_MMC_HAS_COLD_CD		(1 << 3)
-/*
- * Some controllers require waiting for the SD bus to become
- * idle before writing to some registers.
- */
-#define TMIO_MMC_HAS_IDLE_WAIT		(1 << 4)
 
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
@@ -85,8 +80,6 @@ struct tmio_mmc_dma {
 	int alignment_shift;
 };
 
-struct tmio_mmc_host;
-
 /*
  * data for the MMC controller
  */
@@ -101,7 +94,6 @@ struct tmio_mmc_data {
 	void (*set_pwr)(struct platform_device *host, int state);
 	void (*set_clk_div)(struct platform_device *host, int state);
 	int (*get_cd)(struct platform_device *host);
-	int (*write16_hook)(struct tmio_mmc_host *host, int addr);
 };
 
 static inline void tmio_mmc_cd_wakeup(struct tmio_mmc_data *pdata)
diff --git a/trunk/include/linux/mmc/boot.h b/trunk/include/linux/mmc/boot.h
index 23acc3baa07d..39d787c229cb 100644
--- a/trunk/include/linux/mmc/boot.h
+++ b/trunk/include/linux/mmc/boot.h
@@ -1,7 +1,7 @@
-#ifndef LINUX_MMC_BOOT_H
-#define LINUX_MMC_BOOT_H
+#ifndef MMC_BOOT_H
+#define MMC_BOOT_H
 
 enum { MMC_PROGRESS_ENTER, MMC_PROGRESS_INIT,
        MMC_PROGRESS_LOAD, MMC_PROGRESS_DONE };
 
-#endif /* LINUX_MMC_BOOT_H */
+#endif
diff --git a/trunk/include/linux/mmc/card.h b/trunk/include/linux/mmc/card.h
index b460fc2af8a1..6ad43554ac05 100644
--- a/trunk/include/linux/mmc/card.h
+++ b/trunk/include/linux/mmc/card.h
@@ -403,4 +403,4 @@ extern void mmc_unregister_driver(struct mmc_driver *);
 extern void mmc_fixup_device(struct mmc_card *card,
 			     const struct mmc_fixup *table);
 
-#endif /* LINUX_MMC_CARD_H */
+#endif
diff --git a/trunk/include/linux/mmc/core.h b/trunk/include/linux/mmc/core.h
index b8b1b7a311f1..b6718e549a51 100644
--- a/trunk/include/linux/mmc/core.h
+++ b/trunk/include/linux/mmc/core.h
@@ -117,7 +117,6 @@ struct mmc_data {
 
 	unsigned int		sg_len;		/* size of scatter list */
 	struct scatterlist	*sg;		/* I/O scatter list */
-	s32			host_cookie;	/* host private data */
 };
 
 struct mmc_request {
@@ -126,16 +125,13 @@ struct mmc_request {
 	struct mmc_data		*data;
 	struct mmc_command	*stop;
 
-	struct completion	completion;
+	void			*done_data;	/* completion data */
 	void			(*done)(struct mmc_request *);/* completion function */
 };
 
 struct mmc_host;
 struct mmc_card;
-struct mmc_async_req;
 
-extern struct mmc_async_req *mmc_start_req(struct mmc_host *,
-					   struct mmc_async_req *, int *);
 extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *);
 extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int);
 extern int mmc_app_cmd(struct mmc_host *, struct mmc_card *);
@@ -159,7 +155,6 @@ extern int mmc_can_trim(struct mmc_card *card);
 extern int mmc_can_secure_erase_trim(struct mmc_card *card);
 extern int mmc_erase_group_aligned(struct mmc_card *card, unsigned int from,
 				   unsigned int nr);
-extern unsigned int mmc_calc_max_discard(struct mmc_card *card);
 
 extern int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen);
 
@@ -184,4 +179,4 @@ static inline void mmc_claim_host(struct mmc_host *host)
 
 extern u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max);
 
-#endif /* LINUX_MMC_CORE_H */
+#endif
diff --git a/trunk/include/linux/mmc/dw_mmc.h b/trunk/include/linux/mmc/dw_mmc.h
index 6b46819705d1..bdd7ceeb99e4 100644
--- a/trunk/include/linux/mmc/dw_mmc.h
+++ b/trunk/include/linux/mmc/dw_mmc.h
@@ -11,8 +11,8 @@
  * (at your option) any later version.
  */
 
-#ifndef LINUX_MMC_DW_MMC_H
-#define LINUX_MMC_DW_MMC_H
+#ifndef _LINUX_MMC_DW_MMC_H_
+#define _LINUX_MMC_DW_MMC_H_
 
 #define MAX_MCI_SLOTS	2
 
@@ -48,7 +48,6 @@ struct mmc_data;
  * @data: The data currently being transferred, or NULL if no data
  *	transfer is in progress.
  * @use_dma: Whether DMA channel is initialized or not.
- * @using_dma: Whether DMA is in use for the current transfer.
  * @sg_dma: Bus address of DMA buffer.
  * @sg_cpu: Virtual address of DMA buffer.
  * @dma_ops: Pointer to platform-specific DMA callbacks.
@@ -75,11 +74,7 @@ struct mmc_data;
  * @pdev: Platform device associated with the MMC controller.
  * @pdata: Platform data associated with the MMC controller.
  * @slot: Slots sharing this MMC controller.
- * @fifo_depth: depth of FIFO.
  * @data_shift: log2 of FIFO item size.
- * @part_buf_start: Start index in part_buf.
- * @part_buf_count: Bytes of partial data in part_buf.
- * @part_buf: Simple buffer for partial fifo reads/writes.
  * @push_data: Pointer to FIFO push function.
  * @pull_data: Pointer to FIFO pull function.
  * @quirks: Set of quirks that apply to specific versions of the IP.
@@ -122,7 +117,6 @@ struct dw_mci {
 
 	/* DMA interface members*/
 	int			use_dma;
-	int			using_dma;
 
 	dma_addr_t		sg_dma;
 	void			*sg_cpu;
@@ -137,7 +131,7 @@ struct dw_mci {
 	u32			stop_cmdr;
 	u32			dir_status;
 	struct tasklet_struct	tasklet;
-	struct work_struct	card_work;
+	struct tasklet_struct	card_tasklet;
 	unsigned long		pending_events;
 	unsigned long		completed_events;
 	enum dw_mci_state	state;
@@ -152,15 +146,7 @@ struct dw_mci {
 	struct dw_mci_slot	*slot[MAX_MCI_SLOTS];
 
 	/* FIFO push and pull */
-	int			fifo_depth;
 	int			data_shift;
-	u8			part_buf_start;
-	u8			part_buf_count;
-	union {
-		u16		part_buf16;
-		u32		part_buf32;
-		u64		part_buf;
-	};
 	void (*push_data)(struct dw_mci *host, void *buf, int cnt);
 	void (*pull_data)(struct dw_mci *host, void *buf, int cnt);
 
@@ -210,12 +196,6 @@ struct dw_mci_board {
 	unsigned int bus_hz; /* Bus speed */
 
 	unsigned int caps;	/* Capabilities */
-	/*
-	 * Override fifo depth. If 0, autodetect it from the FIFOTH register,
-	 * but note that this may not be reliable after a bootloader has used
-	 * it.
-	 */
-	unsigned int fifo_depth;
 
 	/* delay in mS before detecting cards after interrupt */
 	u32 detect_delay_ms;
@@ -239,4 +219,4 @@ struct dw_mci_board {
 	struct block_settings *blk_settings;
 };
 
-#endif /* LINUX_MMC_DW_MMC_H */
+#endif /* _LINUX_MMC_DW_MMC_H_ */
diff --git a/trunk/include/linux/mmc/host.h b/trunk/include/linux/mmc/host.h
index 0f83858147a6..1ee4424462eb 100644
--- a/trunk/include/linux/mmc/host.h
+++ b/trunk/include/linux/mmc/host.h
@@ -106,15 +106,6 @@ struct mmc_host_ops {
 	 */
 	int (*enable)(struct mmc_host *host);
 	int (*disable)(struct mmc_host *host, int lazy);
-	/*
-	 * It is optional for the host to implement pre_req and post_req in
-	 * order to support double buffering of requests (prepare one
-	 * request while another request is active).
-	 */
-	void	(*post_req)(struct mmc_host *host, struct mmc_request *req,
-			    int err);
-	void	(*pre_req)(struct mmc_host *host, struct mmc_request *req,
-			   bool is_first_req);
 	void	(*request)(struct mmc_host *host, struct mmc_request *req);
 	/*
 	 * Avoid calling these three functions too often or in a "fast path",
@@ -148,22 +139,11 @@ struct mmc_host_ops {
 	int	(*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios);
 	int	(*execute_tuning)(struct mmc_host *host);
 	void	(*enable_preset_value)(struct mmc_host *host, bool enable);
-	int	(*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv);
 };
 
 struct mmc_card;
 struct device;
 
-struct mmc_async_req {
-	/* active mmc request */
-	struct mmc_request	*mrq;
-	/*
-	 * Check error status of completed mmc request.
-	 * Returns 0 if success otherwise non zero.
-	 */
-	int (*err_check) (struct mmc_card *, struct mmc_async_req *);
-};
-
 struct mmc_host {
 	struct device		*parent;
 	struct device		class_dev;
@@ -251,7 +231,6 @@ struct mmc_host {
 	unsigned int		max_req_size;	/* maximum number of bytes in one req */
 	unsigned int		max_blk_size;	/* maximum size of one mmc block */
 	unsigned int		max_blk_count;	/* maximum number of blocks in one req */
-	unsigned int		max_discard_to;	/* max. discard timeout in ms */
 
 	/* private data */
 	spinlock_t		lock;		/* lock for claim and bus ops */
@@ -302,8 +281,6 @@ struct mmc_host {
 
 	struct dentry		*debugfs_root;
 
-	struct mmc_async_req	*areq;		/* active async req */
-
 	unsigned long		private[0] ____cacheline_aligned;
 };
 
@@ -396,4 +373,5 @@ static inline int mmc_host_cmd23(struct mmc_host *host)
 {
 	return host->caps & MMC_CAP_CMD23;
 }
-#endif /* LINUX_MMC_HOST_H */
+#endif
+
diff --git a/trunk/include/linux/mmc/ioctl.h b/trunk/include/linux/mmc/ioctl.h
index 8fa5bc5f8059..5baf2983a12f 100644
--- a/trunk/include/linux/mmc/ioctl.h
+++ b/trunk/include/linux/mmc/ioctl.h
@@ -51,4 +51,4 @@ struct mmc_ioc_cmd {
  * block device operations.
  */
 #define MMC_IOC_MAX_BYTES  (512L * 256)
-#endif /* LINUX_MMC_IOCTL_H */
+#endif  /* LINUX_MMC_IOCTL_H */
diff --git a/trunk/include/linux/mmc/mmc.h b/trunk/include/linux/mmc/mmc.h
index 5a794cb503ea..ac26a685cca8 100644
--- a/trunk/include/linux/mmc/mmc.h
+++ b/trunk/include/linux/mmc/mmc.h
@@ -21,8 +21,8 @@
  *          15 May 2002
  */
 
-#ifndef LINUX_MMC_MMC_H
-#define LINUX_MMC_MMC_H
+#ifndef MMC_MMC_H
+#define MMC_MMC_H
 
 /* Standard MMC commands (4.1)           type  argument     response */
    /* class 1 */
@@ -140,16 +140,6 @@ static inline bool mmc_op_multi(u32 opcode)
 #define R1_SWITCH_ERROR		(1 << 7)	/* sx, c */
 #define R1_APP_CMD		(1 << 5)	/* sr, c */
 
-#define R1_STATE_IDLE	0
-#define R1_STATE_READY	1
-#define R1_STATE_IDENT	2
-#define R1_STATE_STBY	3
-#define R1_STATE_TRAN	4
-#define R1_STATE_DATA	5
-#define R1_STATE_RCV	6
-#define R1_STATE_PRG	7
-#define R1_STATE_DIS	8
-
 /*
  * MMC/SD in SPI mode reports R1 status always, and R2 for SEND_STATUS
  * R1 is the low order byte; R2 is the next highest byte, when present.
@@ -337,4 +327,5 @@ struct _mmc_csd {
 #define MMC_SWITCH_MODE_CLEAR_BITS	0x02	/* Clear bits which are 1 in value */
 #define MMC_SWITCH_MODE_WRITE_BYTE	0x03	/* Set target to value */
 
-#endif /* LINUX_MMC_MMC_H */
+#endif  /* MMC_MMC_PROTOCOL_H */
+
diff --git a/trunk/include/linux/mmc/pm.h b/trunk/include/linux/mmc/pm.h
index 4a139204c20c..d37aac49cf9a 100644
--- a/trunk/include/linux/mmc/pm.h
+++ b/trunk/include/linux/mmc/pm.h
@@ -27,4 +27,4 @@ typedef unsigned int mmc_pm_flag_t;
 #define MMC_PM_KEEP_POWER	(1 << 0)	/* preserve card power during suspend */
 #define MMC_PM_WAKE_SDIO_IRQ	(1 << 1)	/* wake up host system on SDIO IRQ assertion */
 
-#endif /* LINUX_MMC_PM_H */
+#endif
diff --git a/trunk/include/linux/mmc/sd.h b/trunk/include/linux/mmc/sd.h
index 1ebcf9ba1256..7d35d52c3df3 100644
--- a/trunk/include/linux/mmc/sd.h
+++ b/trunk/include/linux/mmc/sd.h
@@ -9,8 +9,8 @@
  * your option) any later version.
  */
 
-#ifndef LINUX_MMC_SD_H
-#define LINUX_MMC_SD_H
+#ifndef MMC_SD_H
+#define MMC_SD_H
 
 /* SD commands                           type  argument     response */
   /* class 0 */
@@ -91,4 +91,5 @@
 #define SD_SWITCH_ACCESS_DEF	0
 #define SD_SWITCH_ACCESS_HS	1
 
-#endif /* LINUX_MMC_SD_H */
+#endif
+
diff --git a/trunk/include/linux/mmc/sdhci-pltfm.h b/trunk/include/linux/mmc/sdhci-pltfm.h
new file mode 100644
index 000000000000..548d59d404cb
--- /dev/null
+++ b/trunk/include/linux/mmc/sdhci-pltfm.h
@@ -0,0 +1,35 @@
+/*
+ * Platform data declarations for the sdhci-pltfm driver.
+ *
+ * Copyright (c) 2010 MontaVista Software, LLC.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef _SDHCI_PLTFM_H
+#define _SDHCI_PLTFM_H
+
+struct sdhci_ops;
+struct sdhci_host;
+
+/**
+ * struct sdhci_pltfm_data - SDHCI platform-specific information & hooks
+ * @ops: optional pointer to the platform-provided SDHCI ops
+ * @quirks: optional SDHCI quirks
+ * @init: optional hook that is called during device probe, before the
+ *        driver tries to access any SDHCI registers
+ * @exit: optional hook that is called during device removal
+ */
+struct sdhci_pltfm_data {
+	struct sdhci_ops *ops;
+	unsigned int quirks;
+	int (*init)(struct sdhci_host *host, struct sdhci_pltfm_data *pdata);
+	void (*exit)(struct sdhci_host *host);
+};
+
+#endif /* _SDHCI_PLTFM_H */
diff --git a/trunk/include/linux/mmc/sdhci-spear.h b/trunk/include/linux/mmc/sdhci-spear.h
index 5cdc96da9dd5..9188c973f3e1 100644
--- a/trunk/include/linux/mmc/sdhci-spear.h
+++ b/trunk/include/linux/mmc/sdhci-spear.h
@@ -11,8 +11,8 @@
  * warranty of any kind, whether express or implied.
  */
 
-#ifndef LINUX_MMC_SDHCI_SPEAR_H
-#define LINUX_MMC_SDHCI_SPEAR_H
+#ifndef MMC_SDHCI_SPEAR_H
+#define MMC_SDHCI_SPEAR_H
 
 #include <linux/platform_device.h>
 /*
@@ -39,4 +39,4 @@ sdhci_set_plat_data(struct platform_device *pdev, struct sdhci_plat_data *data)
 	pdev->dev.platform_data = data;
 }
 
-#endif /* LINUX_MMC_SDHCI_SPEAR_H */
+#endif /* MMC_SDHCI_SPEAR_H */
diff --git a/trunk/include/linux/mmc/sdhci.h b/trunk/include/linux/mmc/sdhci.h
index 5666f3abfab7..6a68c4eb4e44 100644
--- a/trunk/include/linux/mmc/sdhci.h
+++ b/trunk/include/linux/mmc/sdhci.h
@@ -8,8 +8,8 @@
  * the Free Software Foundation; either version 2 of the License, or (at
  * your option) any later version.
  */
-#ifndef LINUX_MMC_SDHCI_H
-#define LINUX_MMC_SDHCI_H
+#ifndef __SDHCI_H
+#define __SDHCI_H
 
 #include <linux/scatterlist.h>
 #include <linux/compiler.h>
@@ -162,4 +162,4 @@ struct sdhci_host {
 
 	unsigned long private[0] ____cacheline_aligned;
 };
-#endif /* LINUX_MMC_SDHCI_H */
+#endif /* __SDHCI_H */
diff --git a/trunk/include/linux/mmc/sdio.h b/trunk/include/linux/mmc/sdio.h
index 2a2e9905a247..245cdacee544 100644
--- a/trunk/include/linux/mmc/sdio.h
+++ b/trunk/include/linux/mmc/sdio.h
@@ -9,8 +9,8 @@
  * your option) any later version.
  */
 
-#ifndef LINUX_MMC_SDIO_H
-#define LINUX_MMC_SDIO_H
+#ifndef MMC_SDIO_H
+#define MMC_SDIO_H
 
 /* SDIO commands                         type  argument     response */
 #define SD_IO_SEND_OP_COND          5 /* bcr  [23:0] OCR         R4  */
@@ -161,4 +161,5 @@
 
 #define SDIO_FBR_BLKSIZE	0x10	/* block size (2 bytes) */
 
-#endif /* LINUX_MMC_SDIO_H */
+#endif
+
diff --git a/trunk/include/linux/mmc/sdio_func.h b/trunk/include/linux/mmc/sdio_func.h
index 50f0bc952328..31baaf82f458 100644
--- a/trunk/include/linux/mmc/sdio_func.h
+++ b/trunk/include/linux/mmc/sdio_func.h
@@ -9,8 +9,8 @@
  * your option) any later version.
  */
 
-#ifndef LINUX_MMC_SDIO_FUNC_H
-#define LINUX_MMC_SDIO_FUNC_H
+#ifndef MMC_SDIO_FUNC_H
+#define MMC_SDIO_FUNC_H
 
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
@@ -161,4 +161,5 @@ extern void sdio_f0_writeb(struct sdio_func *func, unsigned char b,
 extern mmc_pm_flag_t sdio_get_host_pm_caps(struct sdio_func *func);
 extern int sdio_set_host_pm_flags(struct sdio_func *func, mmc_pm_flag_t flags);
 
-#endif /* LINUX_MMC_SDIO_FUNC_H */
+#endif
+
diff --git a/trunk/include/linux/mmc/sdio_ids.h b/trunk/include/linux/mmc/sdio_ids.h
index 9f03feedc8e7..a36ab3bc7b03 100644
--- a/trunk/include/linux/mmc/sdio_ids.h
+++ b/trunk/include/linux/mmc/sdio_ids.h
@@ -2,8 +2,8 @@
  * SDIO Classes, Interface Types, Manufacturer IDs, etc.
  */
 
-#ifndef LINUX_MMC_SDIO_IDS_H
-#define LINUX_MMC_SDIO_IDS_H
+#ifndef MMC_SDIO_IDS_H
+#define MMC_SDIO_IDS_H
 
 /*
  * Standard SDIO Function Interfaces
@@ -44,4 +44,4 @@
 #define SDIO_DEVICE_ID_SIANO_NOVA_A0		0x1100
 #define SDIO_DEVICE_ID_SIANO_STELLAR 		0x5347
 
-#endif /* LINUX_MMC_SDIO_IDS_H */
+#endif
diff --git a/trunk/include/linux/mmc/sh_mmcif.h b/trunk/include/linux/mmc/sh_mmcif.h
index 0222cd8ebe76..9eb9b4b96f55 100644
--- a/trunk/include/linux/mmc/sh_mmcif.h
+++ b/trunk/include/linux/mmc/sh_mmcif.h
@@ -11,8 +11,8 @@
  *
  */
 
-#ifndef LINUX_MMC_SH_MMCIF_H
-#define LINUX_MMC_SH_MMCIF_H
+#ifndef __SH_MMCIF_H__
+#define __SH_MMCIF_H__
 
 #include <linux/io.h>
 #include <linux/platform_device.h>
@@ -220,4 +220,4 @@ static inline void sh_mmcif_boot_init(void __iomem *base)
 	sh_mmcif_boot_cmd(base, 0x03400040, 0x00010000);
 }
 
-#endif /* LINUX_MMC_SH_MMCIF_H */
+#endif /* __SH_MMCIF_H__ */
diff --git a/trunk/include/linux/mmc/sh_mobile_sdhi.h b/trunk/include/linux/mmc/sh_mobile_sdhi.h
index bd50b365167f..faf32b6ec185 100644
--- a/trunk/include/linux/mmc/sh_mobile_sdhi.h
+++ b/trunk/include/linux/mmc/sh_mobile_sdhi.h
@@ -1,5 +1,5 @@
-#ifndef LINUX_MMC_SH_MOBILE_SDHI_H
-#define LINUX_MMC_SH_MOBILE_SDHI_H
+#ifndef __SH_MOBILE_SDHI_H__
+#define __SH_MOBILE_SDHI_H__
 
 #include <linux/types.h>
 
@@ -17,4 +17,4 @@ struct sh_mobile_sdhi_info {
 	int (*get_cd)(struct platform_device *pdev);
 };
 
-#endif /* LINUX_MMC_SH_MOBILE_SDHI_H */
+#endif /* __SH_MOBILE_SDHI_H__ */
diff --git a/trunk/include/linux/mmc/tmio.h b/trunk/include/linux/mmc/tmio.h
index a1c1f321e519..19490b942db0 100644
--- a/trunk/include/linux/mmc/tmio.h
+++ b/trunk/include/linux/mmc/tmio.h
@@ -12,8 +12,8 @@
  *
  * TC6393XB TC6391XB TC6387XB T7L66XB ASIC3
  */
-#ifndef LINUX_MMC_TMIO_H
-#define LINUX_MMC_TMIO_H
+#ifndef _LINUX_MMC_TMIO_H_
+#define _LINUX_MMC_TMIO_H_
 
 #define CTL_SD_CMD 0x00
 #define CTL_ARG_REG 0x04
@@ -21,7 +21,6 @@
 #define CTL_XFER_BLK_COUNT 0xa
 #define CTL_RESPONSE 0x0c
 #define CTL_STATUS 0x1c
-#define CTL_STATUS2 0x1e
 #define CTL_IRQ_MASK 0x20
 #define CTL_SD_CARD_CLK_CTL 0x24
 #define CTL_SD_XFER_LEN 0x26
@@ -31,7 +30,6 @@
 #define CTL_TRANSACTION_CTL 0x34
 #define CTL_SDIO_STATUS 0x36
 #define CTL_SDIO_IRQ_MASK 0x38
-#define CTL_DMA_ENABLE 0xd8
 #define CTL_RESET_SD 0xe0
 #define CTL_SDIO_REGS 0x100
 #define CTL_CLK_AND_WAIT_CTL 0x138
@@ -62,4 +60,4 @@
 
 #define TMIO_BBS		512		/* Boot block size */
 
-#endif /* LINUX_MMC_TMIO_H */
+#endif /* _LINUX_MMC_TMIO_H_ */
diff --git a/trunk/include/linux/mtd/ubi.h b/trunk/include/linux/mtd/ubi.h
index db4836bed514..15da0e99f48a 100644
--- a/trunk/include/linux/mtd/ubi.h
+++ b/trunk/include/linux/mtd/ubi.h
@@ -155,14 +155,12 @@ struct ubi_device_info {
 };
 
 /*
- * Volume notification types.
- * @UBI_VOLUME_ADDED: a volume has been added (an UBI device was attached or a
- *                    volume was created)
- * @UBI_VOLUME_REMOVED: a volume has been removed (an UBI device was detached
- *			or a volume was removed)
- * @UBI_VOLUME_RESIZED: a volume has been re-sized
- * @UBI_VOLUME_RENAMED: a volume has been re-named
- * @UBI_VOLUME_UPDATED: data has been written to a volume
+ * enum - volume notification types.
+ * @UBI_VOLUME_ADDED: volume has been added
+ * @UBI_VOLUME_REMOVED: start volume volume
+ * @UBI_VOLUME_RESIZED: volume size has been re-sized
+ * @UBI_VOLUME_RENAMED: volume name has been re-named
+ * @UBI_VOLUME_UPDATED: volume name has been updated
  *
  * These constants define which type of event has happened when a volume
  * notification function is invoked.
diff --git a/trunk/include/linux/platform_data/pxa_sdhci.h b/trunk/include/linux/platform_data/pxa_sdhci.h
deleted file mode 100644
index 51ad0995abac..000000000000
--- a/trunk/include/linux/platform_data/pxa_sdhci.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * include/linux/platform_data/pxa_sdhci.h
- *
- * Copyright 2010 Marvell
- *	Zhangfei Gao <zhangfei.gao@marvell.com>
- *
- * PXA Platform - SDHCI platform data definitions
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _PXA_SDHCI_H_
-#define _PXA_SDHCI_H_
-
-/* pxa specific flag */
-/* Require clock free running */
-#define PXA_FLAG_ENABLE_CLOCK_GATING (1<<0)
-/* card always wired to host, like on-chip emmc */
-#define PXA_FLAG_CARD_PERMANENT	(1<<1)
-/* Board design supports 8-bit data on SD/SDIO BUS */
-#define PXA_FLAG_SD_8_BIT_CAPABLE_SLOT (1<<2)
-
-/*
- * struct pxa_sdhci_platdata() - Platform device data for PXA SDHCI
- * @flags: flags for platform requirement
- * @clk_delay_cycles:
- *	mmp2: each step is roughly 100ps, 5bits width
- *	pxa910: each step is 1ns, 4bits width
- * @clk_delay_sel: select clk_delay, used on pxa910
- *	0: choose feedback clk
- *	1: choose feedback clk + delay value
- *	2: choose internal clk
- * @clk_delay_enable: enable clk_delay or not, used on pxa910
- * @ext_cd_gpio: gpio pin used for external CD line
- * @ext_cd_gpio_invert: invert values for external CD gpio line
- * @max_speed: the maximum speed supported
- * @host_caps: Standard MMC host capabilities bit field.
- * @quirks: quirks of platfrom
- * @pm_caps: pm_caps of platfrom
- */
-struct sdhci_pxa_platdata {
-	unsigned int	flags;
-	unsigned int	clk_delay_cycles;
-	unsigned int	clk_delay_sel;
-	bool		clk_delay_enable;
-	unsigned int	ext_cd_gpio;
-	bool		ext_cd_gpio_invert;
-	unsigned int	max_speed;
-	unsigned int	host_caps;
-	unsigned int	quirks;
-	unsigned int	pm_caps;
-};
-
-struct sdhci_pxa {
-	u8	clk_enable;
-	u8	power_mode;
-};
-#endif /* _PXA_SDHCI_H_ */
diff --git a/trunk/include/linux/slab.h b/trunk/include/linux/slab.h
index 573c809c33d9..ad4dd1c8d30a 100644
--- a/trunk/include/linux/slab.h
+++ b/trunk/include/linux/slab.h
@@ -133,26 +133,6 @@ unsigned int kmem_cache_size(struct kmem_cache *);
 #define KMALLOC_MAX_SIZE	(1UL << KMALLOC_SHIFT_HIGH)
 #define KMALLOC_MAX_ORDER	(KMALLOC_SHIFT_HIGH - PAGE_SHIFT)
 
-/*
- * Some archs want to perform DMA into kmalloc caches and need a guaranteed
- * alignment larger than the alignment of a 64-bit integer.
- * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that.
- */
-#ifdef ARCH_DMA_MINALIGN
-#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
-#else
-#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
-#endif
-
-/*
- * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
- * Intended for arches that get misalignment faults even for 64 bit integer
- * aligned buffers.
- */
-#ifndef ARCH_SLAB_MINALIGN
-#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
-#endif
-
 /*
  * Common kmalloc functions provided by all allocators
  */
diff --git a/trunk/include/linux/slab_def.h b/trunk/include/linux/slab_def.h
index d00e0bacda93..83203ae9390b 100644
--- a/trunk/include/linux/slab_def.h
+++ b/trunk/include/linux/slab_def.h
@@ -17,6 +17,32 @@
 
 #include <trace/events/kmem.h>
 
+/*
+ * Enforce a minimum alignment for the kmalloc caches.
+ * Usually, the kmalloc caches are cache_line_size() aligned, except when
+ * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
+ * Some archs want to perform DMA into kmalloc caches and need a guaranteed
+ * alignment larger than the alignment of a 64-bit integer.
+ * ARCH_KMALLOC_MINALIGN allows that.
+ * Note that increasing this value may disable some debug features.
+ */
+#ifdef ARCH_DMA_MINALIGN
+#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
+#else
+#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
+#endif
+
+#ifndef ARCH_SLAB_MINALIGN
+/*
+ * Enforce a minimum alignment for all caches.
+ * Intended for archs that get misalignment faults even for BYTES_PER_WORD
+ * aligned buffers. Includes ARCH_KMALLOC_MINALIGN.
+ * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables
+ * some debug features.
+ */
+#define ARCH_SLAB_MINALIGN 0
+#endif
+
 /*
  * struct kmem_cache
  *
@@ -24,19 +50,21 @@
  */
 
 struct kmem_cache {
-/* 1) Cache tunables. Protected by cache_chain_mutex */
+/* 1) per-cpu data, touched during every alloc/free */
+	struct array_cache *array[NR_CPUS];
+/* 2) Cache tunables. Protected by cache_chain_mutex */
 	unsigned int batchcount;
 	unsigned int limit;
 	unsigned int shared;
 
 	unsigned int buffer_size;
 	u32 reciprocal_buffer_size;
-/* 2) touched by every alloc & free from the backend */
+/* 3) touched by every alloc & free from the backend */
 
 	unsigned int flags;		/* constant flags */
 	unsigned int num;		/* # of objs per slab */
 
-/* 3) cache_grow/shrink */
+/* 4) cache_grow/shrink */
 	/* order of pgs per slab (2^n) */
 	unsigned int gfporder;
 
@@ -52,11 +80,11 @@ struct kmem_cache {
 	/* constructor func */
 	void (*ctor)(void *obj);
 
-/* 4) cache creation/removal */
+/* 5) cache creation/removal */
 	const char *name;
 	struct list_head next;
 
-/* 5) statistics */
+/* 6) statistics */
 #ifdef CONFIG_DEBUG_SLAB
 	unsigned long num_active;
 	unsigned long num_allocations;
@@ -83,18 +111,16 @@ struct kmem_cache {
 	int obj_size;
 #endif /* CONFIG_DEBUG_SLAB */
 
-/* 6) per-cpu/per-node data, touched during every alloc/free */
 	/*
-	 * We put array[] at the end of kmem_cache, because we want to size
-	 * this array to nr_cpu_ids slots instead of NR_CPUS
+	 * We put nodelists[] at the end of kmem_cache, because we want to size
+	 * this array to nr_node_ids slots instead of MAX_NUMNODES
 	 * (see kmem_cache_init())
-	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
-	 * is statically defined, so we reserve the max number of cpus.
+	 * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
+	 * is statically defined, so we reserve the max number of nodes.
 	 */
-	struct kmem_list3 **nodelists;
-	struct array_cache *array[NR_CPUS];
+	struct kmem_list3 *nodelists[MAX_NUMNODES];
 	/*
-	 * Do not add fields after array[]
+	 * Do not add fields after nodelists[]
 	 */
 };
 
diff --git a/trunk/include/linux/slob_def.h b/trunk/include/linux/slob_def.h
index 0ec00b39d006..4382db09df4f 100644
--- a/trunk/include/linux/slob_def.h
+++ b/trunk/include/linux/slob_def.h
@@ -1,6 +1,16 @@
 #ifndef __LINUX_SLOB_DEF_H
 #define __LINUX_SLOB_DEF_H
 
+#ifdef ARCH_DMA_MINALIGN
+#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
+#else
+#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
+#endif
+
+#ifndef ARCH_SLAB_MINALIGN
+#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
+#endif
+
 void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 
 static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
diff --git a/trunk/include/linux/slub_def.h b/trunk/include/linux/slub_def.h
index 4b35c06dfbc5..c8668d161dd8 100644
--- a/trunk/include/linux/slub_def.h
+++ b/trunk/include/linux/slub_def.h
@@ -113,6 +113,16 @@ struct kmem_cache {
 
 #define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
 
+#ifdef ARCH_DMA_MINALIGN
+#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
+#else
+#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
+#endif
+
+#ifndef ARCH_SLAB_MINALIGN
+#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
+#endif
+
 /*
  * Maximum kmalloc object size handled by SLUB. Larger object allocations
  * are passed through to the page allocator. The page allocator "fastpath"
@@ -218,19 +228,6 @@ kmalloc_order(size_t size, gfp_t flags, unsigned int order)
 	return ret;
 }
 
-/**
- * Calling this on allocated memory will check that the memory
- * is expected to be in use, and print warnings if not.
- */
-#ifdef CONFIG_SLUB_DEBUG
-extern bool verify_mem_not_deleted(const void *x);
-#else
-static inline bool verify_mem_not_deleted(const void *x)
-{
-	return true;
-}
-#endif
-
 #ifdef CONFIG_TRACING
 extern void *
 kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size);
diff --git a/trunk/include/xen/balloon.h b/trunk/include/xen/balloon.h
index 4076ed72afbd..a2b22f01a51d 100644
--- a/trunk/include/xen/balloon.h
+++ b/trunk/include/xen/balloon.h
@@ -23,13 +23,3 @@ void balloon_set_new_target(unsigned long target);
 
 int alloc_xenballooned_pages(int nr_pages, struct page** pages);
 void free_xenballooned_pages(int nr_pages, struct page** pages);
-
-struct sys_device;
-#ifdef CONFIG_XEN_SELFBALLOONING
-extern int register_xen_selfballooning(struct sys_device *sysdev);
-#else
-static inline int register_xen_selfballooning(struct sys_device *sysdev)
-{
-	return -ENOSYS;
-}
-#endif
diff --git a/trunk/include/xen/events.h b/trunk/include/xen/events.h
index d287997d3eab..9af21e19545a 100644
--- a/trunk/include/xen/events.h
+++ b/trunk/include/xen/events.h
@@ -74,6 +74,8 @@ int xen_set_callback_via(uint64_t via);
 void xen_evtchn_do_upcall(struct pt_regs *regs);
 void xen_hvm_evtchn_do_upcall(void);
 
+/* Allocate a pirq for a physical interrupt, given a gsi. */
+int xen_allocate_pirq_gsi(unsigned gsi);
 /* Bind a pirq for a physical interrupt to an irq. */
 int xen_bind_pirq_gsi_to_irq(unsigned gsi,
 			     unsigned pirq, int shareable, char *name);
diff --git a/trunk/include/xen/hvc-console.h b/trunk/include/xen/hvc-console.h
index 901724dc528d..c3adde32669b 100644
--- a/trunk/include/xen/hvc-console.h
+++ b/trunk/include/xen/hvc-console.h
@@ -6,13 +6,11 @@ extern struct console xenboot_console;
 #ifdef CONFIG_HVC_XEN
 void xen_console_resume(void);
 void xen_raw_console_write(const char *str);
-__attribute__((format(printf, 1, 2)))
 void xen_raw_printk(const char *fmt, ...);
 #else
 static inline void xen_console_resume(void) { }
 static inline void xen_raw_console_write(const char *str) { }
-static inline __attribute__((format(printf, 1, 2)))
-void xen_raw_printk(const char *fmt, ...) { }
+static inline void xen_raw_printk(const char *fmt, ...) { }
 #endif
 
 #endif	/* XEN_HVC_CONSOLE_H */
diff --git a/trunk/include/xen/interface/xen.h b/trunk/include/xen/interface/xen.h
index 6acd9cefd517..70213b4515eb 100644
--- a/trunk/include/xen/interface/xen.h
+++ b/trunk/include/xen/interface/xen.h
@@ -450,45 +450,6 @@ struct start_info {
 	int8_t cmd_line[MAX_GUEST_CMDLINE];
 };
 
-struct dom0_vga_console_info {
-	uint8_t video_type;
-#define XEN_VGATYPE_TEXT_MODE_3 0x03
-#define XEN_VGATYPE_VESA_LFB    0x23
-
-	union {
-		struct {
-			/* Font height, in pixels. */
-			uint16_t font_height;
-			/* Cursor location (column, row). */
-			uint16_t cursor_x, cursor_y;
-			/* Number of rows and columns (dimensions in characters). */
-			uint16_t rows, columns;
-		} text_mode_3;
-
-		struct {
-			/* Width and height, in pixels. */
-			uint16_t width, height;
-			/* Bytes per scan line. */
-			uint16_t bytes_per_line;
-			/* Bits per pixel. */
-			uint16_t bits_per_pixel;
-			/* LFB physical address, and size (in units of 64kB). */
-			uint32_t lfb_base;
-			uint32_t lfb_size;
-			/* RGB mask offsets and sizes, as defined by VBE 1.2+ */
-			uint8_t  red_pos, red_size;
-			uint8_t  green_pos, green_size;
-			uint8_t  blue_pos, blue_size;
-			uint8_t  rsvd_pos, rsvd_size;
-
-			/* VESA capabilities (offset 0xa, VESA command 0x4f00). */
-			uint32_t gbl_caps;
-			/* Mode attributes (offset 0x0, VESA command 0x4f01). */
-			uint16_t mode_attrs;
-		} vesa_lfb;
-	} u;
-};
-
 /* These flags are passed in the 'flags' field of start_info_t. */
 #define SIF_PRIVILEGED    (1<<0)  /* Is the domain privileged? */
 #define SIF_INITDOMAIN    (1<<1)  /* Is this the initial control domain? */
diff --git a/trunk/include/xen/tmem.h b/trunk/include/xen/tmem.h
deleted file mode 100644
index 82e2c83a32f5..000000000000
--- a/trunk/include/xen/tmem.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifndef _XEN_TMEM_H
-#define _XEN_TMEM_H
-/* defined in drivers/xen/tmem.c */
-extern int tmem_enabled;
-#endif /* _XEN_TMEM_H */
diff --git a/trunk/include/xen/xenbus.h b/trunk/include/xen/xenbus.h
index aceeca799fd7..5467369e0889 100644
--- a/trunk/include/xen/xenbus.h
+++ b/trunk/include/xen/xenbus.h
@@ -223,9 +223,7 @@ int xenbus_free_evtchn(struct xenbus_device *dev, int port);
 
 enum xenbus_state xenbus_read_driver_state(const char *path);
 
-__attribute__((format(printf, 3, 4)))
 void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...);
-__attribute__((format(printf, 3, 4)))
 void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...);
 
 const char *xenbus_strstate(enum xenbus_state state);
diff --git a/trunk/mm/slab.c b/trunk/mm/slab.c
index 1e523ed47c61..d96e223de775 100644
--- a/trunk/mm/slab.c
+++ b/trunk/mm/slab.c
@@ -574,9 +574,7 @@ static struct arraycache_init initarray_generic =
     { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 
 /* internal cache of cache description objs */
-static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES];
 static struct kmem_cache cache_cache = {
-	.nodelists = cache_cache_nodelists,
 	.batchcount = 1,
 	.limit = BOOT_CPUCACHE_ENTRIES,
 	.shared = 1,
@@ -1494,10 +1492,11 @@ void __init kmem_cache_init(void)
 	cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
 
 	/*
-	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
+	 * struct kmem_cache size depends on nr_node_ids, which
+	 * can be less than MAX_NUMNODES.
 	 */
-	cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
-				  nr_node_ids * sizeof(struct kmem_list3 *);
+	cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
+				 nr_node_ids * sizeof(struct kmem_list3 *);
 #if DEBUG
 	cache_cache.obj_size = cache_cache.buffer_size;
 #endif
@@ -2309,7 +2308,6 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 	if (!cachep)
 		goto oops;
 
-	cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
 #if DEBUG
 	cachep->obj_size = size;
 
@@ -3155,11 +3153,12 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 	objp += obj_offset(cachep);
 	if (cachep->ctor && cachep->flags & SLAB_POISON)
 		cachep->ctor(objp);
-	if (ARCH_SLAB_MINALIGN &&
-	    ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
+#if ARCH_SLAB_MINALIGN
+	if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
 		printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
-		       objp, (int)ARCH_SLAB_MINALIGN);
+		       objp, ARCH_SLAB_MINALIGN);
 	}
+#endif
 	return objp;
 }
 #else
diff --git a/trunk/mm/slob.c b/trunk/mm/slob.c
index 0ae881831ae2..46e0aee33a23 100644
--- a/trunk/mm/slob.c
+++ b/trunk/mm/slob.c
@@ -482,8 +482,6 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
 	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
 	void *ret;
 
-	gfp &= gfp_allowed_mask;
-
 	lockdep_trace_alloc(gfp);
 
 	if (size < PAGE_SIZE - align) {
@@ -610,10 +608,6 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 {
 	void *b;
 
-	flags &= gfp_allowed_mask;
-
-	lockdep_trace_alloc(flags);
-
 	if (c->size < PAGE_SIZE) {
 		b = slob_alloc(c->size, flags, c->align, node);
 		trace_kmem_cache_alloc_node(_RET_IP_, b, c->size,
diff --git a/trunk/mm/slub.c b/trunk/mm/slub.c
index ba83f3fd0757..35f351f26193 100644
--- a/trunk/mm/slub.c
+++ b/trunk/mm/slub.c
@@ -27,7 +27,6 @@
 #include <linux/memory.h>
 #include <linux/math64.h>
 #include <linux/fault-inject.h>
-#include <linux/stacktrace.h>
 
 #include <trace/events/kmem.h>
 
@@ -192,12 +191,8 @@ static LIST_HEAD(slab_caches);
 /*
  * Tracking user of a slab.
  */
-#define TRACK_ADDRS_COUNT 16
 struct track {
 	unsigned long addr;	/* Called from address */
-#ifdef CONFIG_STACKTRACE
-	unsigned long addrs[TRACK_ADDRS_COUNT];	/* Called from address */
-#endif
 	int cpu;		/* Was running on cpu */
 	int pid;		/* Pid context */
 	unsigned long when;	/* When did the operation occur */
@@ -425,24 +420,6 @@ static void set_track(struct kmem_cache *s, void *object,
 	struct track *p = get_track(s, object, alloc);
 
 	if (addr) {
-#ifdef CONFIG_STACKTRACE
-		struct stack_trace trace;
-		int i;
-
-		trace.nr_entries = 0;
-		trace.max_entries = TRACK_ADDRS_COUNT;
-		trace.entries = p->addrs;
-		trace.skip = 3;
-		save_stack_trace(&trace);
-
-		/* See rant in lockdep.c */
-		if (trace.nr_entries != 0 &&
-		    trace.entries[trace.nr_entries - 1] == ULONG_MAX)
-			trace.nr_entries--;
-
-		for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
-			p->addrs[i] = 0;
-#endif
 		p->addr = addr;
 		p->cpu = smp_processor_id();
 		p->pid = current->pid;
@@ -467,16 +444,6 @@ static void print_track(const char *s, struct track *t)
 
 	printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
 		s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
-#ifdef CONFIG_STACKTRACE
-	{
-		int i;
-		for (i = 0; i < TRACK_ADDRS_COUNT; i++)
-			if (t->addrs[i])
-				printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
-			else
-				break;
-	}
-#endif
 }
 
 static void print_tracking(struct kmem_cache *s, void *object)
@@ -590,10 +557,10 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
 		memset(p + s->objsize, val, s->inuse - s->objsize);
 }
 
-static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes)
+static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
 {
 	while (bytes) {
-		if (*start != value)
+		if (*start != (u8)value)
 			return start;
 		start++;
 		bytes--;
@@ -601,38 +568,6 @@ static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes)
 	return NULL;
 }
 
-static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes)
-{
-	u64 value64;
-	unsigned int words, prefix;
-
-	if (bytes <= 16)
-		return check_bytes8(start, value, bytes);
-
-	value64 = value | value << 8 | value << 16 | value << 24;
-	value64 = value64 | value64 << 32;
-	prefix = 8 - ((unsigned long)start) % 8;
-
-	if (prefix) {
-		u8 *r = check_bytes8(start, value, prefix);
-		if (r)
-			return r;
-		start += prefix;
-		bytes -= prefix;
-	}
-
-	words = bytes / 8;
-
-	while (words) {
-		if (*(u64 *)start != value64)
-			return check_bytes8(start, value, 8);
-		start += 8;
-		words--;
-	}
-
-	return check_bytes8(start, value, bytes % 8);
-}
-
 static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
 						void *from, void *to)
 {
@@ -2993,42 +2928,6 @@ size_t ksize(const void *object)
 }
 EXPORT_SYMBOL(ksize);
 
-#ifdef CONFIG_SLUB_DEBUG
-bool verify_mem_not_deleted(const void *x)
-{
-	struct page *page;
-	void *object = (void *)x;
-	unsigned long flags;
-	bool rv;
-
-	if (unlikely(ZERO_OR_NULL_PTR(x)))
-		return false;
-
-	local_irq_save(flags);
-
-	page = virt_to_head_page(x);
-	if (unlikely(!PageSlab(page))) {
-		/* maybe it was from stack? */
-		rv = true;
-		goto out_unlock;
-	}
-
-	slab_lock(page);
-	if (on_freelist(page->slab, page, object)) {
-		object_err(page->slab, page, object, "Object is on free-list");
-		rv = false;
-	} else {
-		rv = true;
-	}
-	slab_unlock(page);
-
-out_unlock:
-	local_irq_restore(flags);
-	return rv;
-}
-EXPORT_SYMBOL(verify_mem_not_deleted);
-#endif
-
 void kfree(const void *x)
 {
 	struct page *page;