From fa9d71e573cc0d2b3a7c21f3e5aa1860c9e1b541 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Sun, 26 Jun 2011 19:36:46 +0300 Subject: [PATCH] --- yaml --- r: 255285 b: refs/heads/master c: e0377e25206328998d036cafddcd00a7c3252e3e h: refs/heads/master i: 255283: c4e999213ac45a4813c364fbacfeba8c5e3c7095 v: v3 --- [refs] | 2 +- trunk/Documentation/filesystems/ubifs.txt | 28 + trunk/Documentation/mmc/00-INDEX | 2 - trunk/Documentation/mmc/mmc-async-req.txt | 87 -- trunk/Documentation/virtual/lguest/lguest.c | 10 +- trunk/MAINTAINERS | 13 +- trunk/arch/arm/configs/mmp2_defconfig | 9 +- .../arm/mach-at91/include/mach}/at91_mci.h | 2 +- trunk/arch/arm/mach-mmp/brownstone.c | 10 +- trunk/arch/arm/mach-mmp/include/mach/mmp2.h | 2 +- trunk/arch/arm/mach-mmp/jasper.c | 2 +- trunk/arch/arm/mach-mmp/mmp2.c | 16 +- trunk/arch/arm/plat-pxa/include/plat/sdhci.h | 35 + trunk/arch/x86/include/asm/xen/pci.h | 5 +- trunk/arch/x86/pci/xen.c | 371 +++-- trunk/arch/x86/xen/Makefile | 2 +- trunk/arch/x86/xen/enlighten.c | 8 - trunk/arch/x86/xen/platform-pci-unplug.c | 2 +- trunk/arch/x86/xen/vga.c | 67 - trunk/arch/x86/xen/xen-ops.h | 11 - trunk/drivers/block/xen-blkback/xenbus.c | 2 +- trunk/drivers/mmc/card/block.c | 681 +++----- trunk/drivers/mmc/card/mmc_test.c | 498 +----- trunk/drivers/mmc/card/queue.c | 217 +-- trunk/drivers/mmc/card/queue.h | 33 +- trunk/drivers/mmc/core/core.c | 197 +-- trunk/drivers/mmc/core/sd.c | 68 +- trunk/drivers/mmc/core/sdio_bus.c | 8 +- trunk/drivers/mmc/host/Kconfig | 84 +- trunk/drivers/mmc/host/Makefile | 25 +- trunk/drivers/mmc/host/at91_mci.c | 3 +- trunk/drivers/mmc/host/atmel-mci.c | 63 - trunk/drivers/mmc/host/dw_mmc.c | 446 ++---- trunk/drivers/mmc/host/dw_mmc.h | 17 +- trunk/drivers/mmc/host/mmci.c | 147 +- trunk/drivers/mmc/host/mmci.h | 8 - trunk/drivers/mmc/host/mxs-mmc.c | 30 +- trunk/drivers/mmc/host/omap_hsmmc.c | 671 +++++--- trunk/drivers/mmc/host/sdhci-cns3xxx.c | 44 +- trunk/drivers/mmc/host/sdhci-dove.c | 43 +- trunk/drivers/mmc/host/sdhci-esdhc-imx.c | 125 +- trunk/drivers/mmc/host/sdhci-of-core.c | 253 +++ trunk/drivers/mmc/host/sdhci-of-esdhc.c | 86 +- trunk/drivers/mmc/host/sdhci-of-hlwd.c | 67 +- trunk/drivers/mmc/host/sdhci-of.h | 42 + trunk/drivers/mmc/host/sdhci-pci.c | 54 +- trunk/drivers/mmc/host/sdhci-pltfm.c | 216 +-- trunk/drivers/mmc/host/sdhci-pltfm.h | 90 +- trunk/drivers/mmc/host/sdhci-pxa.c | 303 ++++ trunk/drivers/mmc/host/sdhci-pxav2.c | 244 --- trunk/drivers/mmc/host/sdhci-pxav3.c | 289 ---- trunk/drivers/mmc/host/sdhci-s3c.c | 6 +- trunk/drivers/mmc/host/sdhci-tegra.c | 117 +- trunk/drivers/mmc/host/sdhci.c | 34 +- trunk/drivers/mmc/host/sh_mmcif.c | 27 +- trunk/drivers/mmc/host/sh_mobile_sdhi.c | 36 - trunk/drivers/mmc/host/tmio_mmc.h | 53 +- trunk/drivers/mmc/host/tmio_mmc_dma.c | 7 +- trunk/drivers/mmc/host/tmio_mmc_pio.c | 205 +-- trunk/drivers/mtd/ubi/build.c | 28 +- trunk/drivers/mtd/ubi/debug.c | 269 +--- trunk/drivers/mtd/ubi/debug.h | 113 +- trunk/drivers/mtd/ubi/io.c | 20 +- trunk/drivers/mtd/ubi/scan.c | 2 +- trunk/drivers/mtd/ubi/ubi.h | 8 +- trunk/drivers/mtd/ubi/vmt.c | 2 +- trunk/drivers/mtd/ubi/vtbl.c | 18 +- trunk/drivers/mtd/ubi/wl.c | 42 +- trunk/drivers/pci/quirks.c | 23 - trunk/drivers/xen/Kconfig | 46 - trunk/drivers/xen/Makefile | 4 +- trunk/drivers/xen/events.c | 7 +- trunk/drivers/xen/tmem.c | 170 +- trunk/drivers/xen/xen-balloon.c | 2 - trunk/drivers/xen/xen-pciback/Makefile | 7 - trunk/drivers/xen/xen-pciback/conf_space.c | 438 ------ trunk/drivers/xen/xen-pciback/conf_space.h | 126 -- .../xen/xen-pciback/conf_space_capability.c | 207 --- .../xen/xen-pciback/conf_space_header.c | 386 ----- .../xen/xen-pciback/conf_space_quirks.c | 140 -- .../xen/xen-pciback/conf_space_quirks.h | 33 - trunk/drivers/xen/xen-pciback/passthrough.c | 194 --- trunk/drivers/xen/xen-pciback/pci_stub.c | 1376 ----------------- trunk/drivers/xen/xen-pciback/pciback.h | 183 --- trunk/drivers/xen/xen-pciback/pciback_ops.c | 384 ----- trunk/drivers/xen/xen-pciback/vpci.c | 259 ---- trunk/drivers/xen/xen-pciback/xenbus.c | 749 --------- trunk/drivers/xen/xen-selfballoon.c | 485 ------ trunk/drivers/xen/xenbus/xenbus_probe.c | 44 +- trunk/drivers/xen/xenbus/xenbus_probe.h | 2 - .../drivers/xen/xenbus/xenbus_probe_backend.c | 9 +- .../xen/xenbus/xenbus_probe_frontend.c | 6 +- trunk/fs/dlm/ast.c | 265 ++-- trunk/fs/dlm/ast.h | 15 +- trunk/fs/dlm/config.c | 75 +- trunk/fs/dlm/config.h | 2 +- trunk/fs/dlm/dlm_internal.h | 29 +- trunk/fs/dlm/lock.c | 225 ++- trunk/fs/dlm/lockspace.c | 177 ++- trunk/fs/dlm/lowcomms.c | 9 +- trunk/fs/dlm/memory.c | 22 +- trunk/fs/dlm/memory.h | 2 +- trunk/fs/dlm/recoverd.c | 12 +- trunk/fs/dlm/user.c | 12 +- trunk/fs/gfs2/bmap.c | 12 +- trunk/fs/gfs2/dir.c | 221 +-- trunk/fs/gfs2/dir.h | 1 - trunk/fs/gfs2/file.c | 4 +- trunk/fs/gfs2/glock.c | 39 +- trunk/fs/gfs2/glock.h | 6 - trunk/fs/gfs2/glops.c | 7 +- trunk/fs/gfs2/incore.h | 3 +- trunk/fs/gfs2/main.c | 1 - trunk/fs/gfs2/ops_fstype.c | 1 - trunk/fs/gfs2/rgrp.c | 52 +- trunk/fs/gfs2/rgrp.h | 4 +- trunk/fs/gfs2/super.c | 2 +- trunk/fs/hfsplus/brec.c | 4 - trunk/fs/hfsplus/catalog.c | 14 +- trunk/fs/hfsplus/dir.c | 8 +- trunk/fs/hfsplus/extents.c | 50 +- trunk/fs/hfsplus/hfsplus_fs.h | 18 +- trunk/fs/hfsplus/inode.c | 12 +- trunk/fs/hfsplus/part_tbl.c | 32 +- trunk/fs/hfsplus/super.c | 43 +- trunk/fs/hfsplus/unicode.c | 35 +- trunk/fs/hfsplus/wrapper.c | 92 +- trunk/fs/ubifs/commit.c | 8 +- trunk/fs/ubifs/debug.c | 762 ++++----- trunk/fs/ubifs/debug.h | 241 ++- trunk/fs/ubifs/dir.c | 16 +- trunk/fs/ubifs/file.c | 2 +- trunk/fs/ubifs/io.c | 168 +- trunk/fs/ubifs/log.c | 6 +- trunk/fs/ubifs/lprops.c | 8 +- trunk/fs/ubifs/lpt.c | 37 +- trunk/fs/ubifs/lpt_commit.c | 40 +- trunk/fs/ubifs/misc.h | 103 +- trunk/fs/ubifs/orphan.c | 2 +- trunk/fs/ubifs/recovery.c | 43 +- trunk/fs/ubifs/replay.c | 3 +- trunk/fs/ubifs/sb.c | 6 +- trunk/fs/ubifs/scan.c | 4 +- trunk/fs/ubifs/super.c | 6 +- trunk/fs/ubifs/tnc.c | 26 +- trunk/fs/ubifs/tnc_commit.c | 145 +- trunk/fs/ubifs/ubifs.h | 21 +- trunk/fs/xfs/Makefile | 2 + trunk/fs/xfs/linux-2.6/xfs_acl.c | 2 +- trunk/fs/xfs/linux-2.6/xfs_aops.c | 20 +- trunk/fs/xfs/linux-2.6/xfs_buf.c | 79 +- trunk/fs/xfs/linux-2.6/xfs_buf.h | 64 +- trunk/fs/xfs/linux-2.6/xfs_export.c | 4 +- trunk/fs/xfs/linux-2.6/xfs_file.c | 2 +- trunk/fs/xfs/linux-2.6/xfs_iops.c | 433 +----- trunk/fs/xfs/linux-2.6/xfs_linux.h | 7 +- trunk/fs/xfs/linux-2.6/xfs_super.c | 36 +- trunk/fs/xfs/linux-2.6/xfs_sync.c | 10 +- trunk/fs/xfs/linux-2.6/xfs_sync.h | 8 + trunk/fs/xfs/linux-2.6/xfs_trace.h | 60 +- trunk/fs/xfs/quota/xfs_dquot.c | 48 +- trunk/fs/xfs/quota/xfs_dquot.h | 6 + trunk/fs/xfs/quota/xfs_qm.c | 49 +- trunk/fs/xfs/quota/xfs_qm.h | 6 + trunk/fs/xfs/quota/xfs_qm_syscalls.c | 355 ++++- trunk/fs/xfs/quota/xfs_trans_dquot.c | 15 +- trunk/fs/xfs/xfs.h | 1 + trunk/fs/xfs/xfs_alloc.c | 14 +- trunk/fs/xfs/xfs_alloc_btree.c | 84 +- trunk/fs/xfs/xfs_arch.h | 136 ++ trunk/fs/xfs/xfs_attr.c | 41 +- trunk/fs/xfs/xfs_attr_leaf.c | 60 +- trunk/fs/xfs/xfs_bmap.c | 41 +- trunk/fs/xfs/xfs_bmap_btree.c | 106 +- trunk/fs/xfs/xfs_btree.c | 29 +- trunk/fs/xfs/xfs_btree.h | 38 +- trunk/fs/xfs/xfs_btree_trace.c | 249 +++ trunk/fs/xfs/xfs_btree_trace.h | 99 ++ trunk/fs/xfs/xfs_buf_item.c | 75 +- trunk/fs/xfs/xfs_da_btree.c | 272 ++-- trunk/fs/xfs/xfs_da_btree.h | 13 +- trunk/fs/xfs/xfs_dir2.c | 140 +- trunk/fs/xfs/xfs_dir2.h | 54 +- trunk/fs/xfs/xfs_dir2_block.c | 253 +-- trunk/fs/xfs/xfs_dir2_block.h | 92 ++ trunk/fs/xfs/xfs_dir2_data.c | 327 ++-- trunk/fs/xfs/xfs_dir2_data.h | 184 +++ trunk/fs/xfs/xfs_dir2_format.h | 597 ------- trunk/fs/xfs/xfs_dir2_leaf.c | 417 +++-- trunk/fs/xfs/xfs_dir2_leaf.h | 253 +++ trunk/fs/xfs/xfs_dir2_node.c | 201 ++- trunk/fs/xfs/xfs_dir2_node.h | 100 ++ trunk/fs/xfs/xfs_dir2_priv.h | 135 -- trunk/fs/xfs/xfs_dir2_sf.c | 338 ++-- trunk/fs/xfs/xfs_dir2_sf.h | 171 ++ trunk/fs/xfs/xfs_fs.h | 5 - trunk/fs/xfs/xfs_ialloc.c | 14 +- trunk/fs/xfs/xfs_ialloc_btree.c | 75 + trunk/fs/xfs/xfs_iget.c | 1 + trunk/fs/xfs/xfs_inode.c | 537 +++++-- trunk/fs/xfs/xfs_inode.h | 25 +- trunk/fs/xfs/xfs_inode_item.c | 17 +- trunk/fs/xfs/xfs_inum.h | 11 + trunk/fs/xfs/xfs_log.c | 64 +- trunk/fs/xfs/xfs_log_recover.c | 38 +- trunk/fs/xfs/xfs_mount.c | 71 +- trunk/fs/xfs/xfs_mount.h | 2 +- trunk/fs/xfs/xfs_trans.c | 27 +- trunk/fs/xfs/xfs_trans_ail.c | 214 ++- trunk/fs/xfs/xfs_trans_buf.c | 118 +- trunk/fs/xfs/xfs_trans_inode.c | 9 + trunk/fs/xfs/xfs_trans_priv.h | 14 +- trunk/fs/xfs/xfs_vnodeops.c | 479 +++++- trunk/fs/xfs/xfs_vnodeops.h | 3 +- trunk/include/linux/mfd/tmio.h | 8 - trunk/include/linux/mmc/boot.h | 6 +- trunk/include/linux/mmc/card.h | 2 +- trunk/include/linux/mmc/core.h | 9 +- trunk/include/linux/mmc/dw_mmc.h | 28 +- trunk/include/linux/mmc/host.h | 26 +- trunk/include/linux/mmc/ioctl.h | 2 +- trunk/include/linux/mmc/mmc.h | 17 +- trunk/include/linux/mmc/pm.h | 2 +- trunk/include/linux/mmc/sd.h | 7 +- trunk/include/linux/mmc/sdhci-pltfm.h | 35 + trunk/include/linux/mmc/sdhci-spear.h | 6 +- trunk/include/linux/mmc/sdhci.h | 6 +- trunk/include/linux/mmc/sdio.h | 7 +- trunk/include/linux/mmc/sdio_func.h | 7 +- trunk/include/linux/mmc/sdio_ids.h | 6 +- trunk/include/linux/mmc/sh_mmcif.h | 6 +- trunk/include/linux/mmc/sh_mobile_sdhi.h | 6 +- trunk/include/linux/mmc/tmio.h | 8 +- trunk/include/linux/mtd/ubi.h | 14 +- trunk/include/linux/platform_data/pxa_sdhci.h | 60 - trunk/include/linux/slab.h | 20 - trunk/include/linux/slab_def.h | 52 +- trunk/include/linux/slob_def.h | 10 + trunk/include/linux/slub_def.h | 23 +- trunk/include/xen/balloon.h | 10 - trunk/include/xen/events.h | 2 + trunk/include/xen/hvc-console.h | 4 +- trunk/include/xen/interface/xen.h | 39 - trunk/include/xen/tmem.h | 5 - trunk/include/xen/xenbus.h | 2 - trunk/mm/slab.c | 17 +- trunk/mm/slob.c | 6 - trunk/mm/slub.c | 105 +- 248 files changed, 8551 insertions(+), 14620 deletions(-) delete mode 100644 trunk/Documentation/mmc/mmc-async-req.txt rename trunk/{drivers/mmc/host => arch/arm/mach-at91/include/mach}/at91_mci.h (99%) create mode 100644 trunk/arch/arm/plat-pxa/include/plat/sdhci.h delete mode 100644 trunk/arch/x86/xen/vga.c create mode 100644 trunk/drivers/mmc/host/sdhci-of-core.c create mode 100644 trunk/drivers/mmc/host/sdhci-of.h create mode 100644 trunk/drivers/mmc/host/sdhci-pxa.c delete mode 100644 trunk/drivers/mmc/host/sdhci-pxav2.c delete mode 100644 trunk/drivers/mmc/host/sdhci-pxav3.c delete mode 100644 trunk/drivers/xen/xen-pciback/Makefile delete mode 100644 trunk/drivers/xen/xen-pciback/conf_space.c delete mode 100644 trunk/drivers/xen/xen-pciback/conf_space.h delete mode 100644 trunk/drivers/xen/xen-pciback/conf_space_capability.c delete mode 100644 trunk/drivers/xen/xen-pciback/conf_space_header.c delete mode 100644 trunk/drivers/xen/xen-pciback/conf_space_quirks.c delete mode 100644 trunk/drivers/xen/xen-pciback/conf_space_quirks.h delete mode 100644 trunk/drivers/xen/xen-pciback/passthrough.c delete mode 100644 trunk/drivers/xen/xen-pciback/pci_stub.c delete mode 100644 trunk/drivers/xen/xen-pciback/pciback.h delete mode 100644 trunk/drivers/xen/xen-pciback/pciback_ops.c delete mode 100644 trunk/drivers/xen/xen-pciback/vpci.c delete mode 100644 trunk/drivers/xen/xen-pciback/xenbus.c delete mode 100644 trunk/drivers/xen/xen-selfballoon.c create mode 100644 trunk/fs/xfs/xfs_arch.h create mode 100644 trunk/fs/xfs/xfs_btree_trace.c create mode 100644 trunk/fs/xfs/xfs_btree_trace.h create mode 100644 trunk/fs/xfs/xfs_dir2_block.h create mode 100644 trunk/fs/xfs/xfs_dir2_data.h delete mode 100644 trunk/fs/xfs/xfs_dir2_format.h create mode 100644 trunk/fs/xfs/xfs_dir2_leaf.h create mode 100644 trunk/fs/xfs/xfs_dir2_node.h delete mode 100644 trunk/fs/xfs/xfs_dir2_priv.h create mode 100644 trunk/fs/xfs/xfs_dir2_sf.h create mode 100644 trunk/include/linux/mmc/sdhci-pltfm.h delete mode 100644 trunk/include/linux/platform_data/pxa_sdhci.h delete mode 100644 trunk/include/xen/tmem.h diff --git a/[refs] b/[refs] index 528498dbb708..367c7065b14a 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 111ad119d1765b1bbef2629a5f2bd825caeb7e74 +refs/heads/master: e0377e25206328998d036cafddcd00a7c3252e3e diff --git a/trunk/Documentation/filesystems/ubifs.txt b/trunk/Documentation/filesystems/ubifs.txt index a0a61d2f389f..8e4fab639d9c 100644 --- a/trunk/Documentation/filesystems/ubifs.txt +++ b/trunk/Documentation/filesystems/ubifs.txt @@ -111,6 +111,34 @@ The following is an example of the kernel boot arguments to attach mtd0 to UBI and mount volume "rootfs": ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs + +Module Parameters for Debugging +=============================== + +When UBIFS has been compiled with debugging enabled, there are 2 module +parameters that are available to control aspects of testing and debugging. + +debug_chks Selects extra checks that UBIFS can do while running: + + Check Flag value + + General checks 1 + Check Tree Node Cache (TNC) 2 + Check indexing tree size 4 + Check orphan area 8 + Check old indexing tree 16 + Check LEB properties (lprops) 32 + Check leaf nodes and inodes 64 + +debug_tsts Selects a mode of testing, as follows: + + Test mode Flag value + + Failure mode for recovery testing 4 + +For example, set debug_chks to 3 to enable general and TNC checks. + + References ========== diff --git a/trunk/Documentation/mmc/00-INDEX b/trunk/Documentation/mmc/00-INDEX index a9ba6720ffdf..93dd7a714075 100644 --- a/trunk/Documentation/mmc/00-INDEX +++ b/trunk/Documentation/mmc/00-INDEX @@ -4,5 +4,3 @@ mmc-dev-attrs.txt - info on SD and MMC device attributes mmc-dev-parts.txt - info on SD and MMC device partitions -mmc-async-req.txt - - info on mmc asynchronous requests diff --git a/trunk/Documentation/mmc/mmc-async-req.txt b/trunk/Documentation/mmc/mmc-async-req.txt deleted file mode 100644 index ae1907b10e4a..000000000000 --- a/trunk/Documentation/mmc/mmc-async-req.txt +++ /dev/null @@ -1,87 +0,0 @@ -Rationale -========= - -How significant is the cache maintenance overhead? -It depends. Fast eMMC and multiple cache levels with speculative cache -pre-fetch makes the cache overhead relatively significant. If the DMA -preparations for the next request are done in parallel with the current -transfer, the DMA preparation overhead would not affect the MMC performance. -The intention of non-blocking (asynchronous) MMC requests is to minimize the -time between when an MMC request ends and another MMC request begins. -Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and -dma_unmap_sg are processing. Using non-blocking MMC requests makes it -possible to prepare the caches for next job in parallel with an active -MMC request. - -MMC block driver -================ - -The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking. -The increase in throughput is proportional to the time it takes to -prepare (major part of preparations are dma_map_sg() and dma_unmap_sg()) -a request and how fast the memory is. The faster the MMC/SD is the -more significant the prepare request time becomes. Roughly the expected -performance gain is 5% for large writes and 10% on large reads on a L2 cache -platform. In power save mode, when clocks run on a lower frequency, the DMA -preparation may cost even more. As long as these slower preparations are run -in parallel with the transfer performance won't be affected. - -Details on measurements from IOZone and mmc_test -================================================ - -https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req - -MMC core API extension -====================== - -There is one new public function mmc_start_req(). -It starts a new MMC command request for a host. The function isn't -truly non-blocking. If there is an ongoing async request it waits -for completion of that request and starts the new one and returns. It -doesn't wait for the new request to complete. If there is no ongoing -request it starts the new request and returns immediately. - -MMC host extensions -=================== - -There are two optional members in the mmc_host_ops -- pre_req() and -post_req() -- that the host driver may implement in order to move work -to before and after the actual mmc_host_ops.request() function is called. -In the DMA case pre_req() may do dma_map_sg() and prepare the DMA -descriptor, and post_req() runs the dma_unmap_sg(). - -Optimize for the first request -============================== - -The first request in a series of requests can't be prepared in parallel -with the previous transfer, since there is no previous request. -The argument is_first_req in pre_req() indicates that there is no previous -request. The host driver may optimize for this scenario to minimize -the performance loss. A way to optimize for this is to split the current -request in two chunks, prepare the first chunk and start the request, -and finally prepare the second chunk and start the transfer. - -Pseudocode to handle is_first_req scenario with minimal prepare overhead: - -if (is_first_req && req->size > threshold) - /* start MMC transfer for the complete transfer size */ - mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE); - - /* - * Begin to prepare DMA while cmd is being processed by MMC. - * The first chunk of the request should take the same time - * to prepare as the "MMC process command time". - * If prepare time exceeds MMC cmd time - * the transfer is delayed, guesstimate max 4k as first chunk size. - */ - prepare_1st_chunk_for_dma(req); - /* flush pending desc to the DMAC (dmaengine.h) */ - dma_issue_pending(req->dma_desc); - - prepare_2nd_chunk_for_dma(req); - /* - * The second issue_pending should be called before MMC runs out - * of the first chunk. If the MMC runs out of the first data chunk - * before this call, the transfer is delayed. - */ - dma_issue_pending(req->dma_desc); diff --git a/trunk/Documentation/virtual/lguest/lguest.c b/trunk/Documentation/virtual/lguest/lguest.c index cd9d6af61d07..e3b9bb7a644a 100644 --- a/trunk/Documentation/virtual/lguest/lguest.c +++ b/trunk/Documentation/virtual/lguest/lguest.c @@ -861,8 +861,10 @@ static void console_output(struct virtqueue *vq) /* writev can return a partial write, so we loop here. */ while (!iov_empty(iov, out)) { int len = writev(STDOUT_FILENO, iov, out); - if (len <= 0) - err(1, "Write to stdout gave %i", len); + if (len <= 0) { + warn("Write to stdout gave %i (%d)", len, errno); + break; + } iov_consume(iov, out, len); } @@ -898,7 +900,7 @@ static void net_output(struct virtqueue *vq) * same format: what a coincidence! */ if (writev(net_info->tunfd, iov, out) < 0) - errx(1, "Write to tun failed?"); + warnx("Write to tun failed (%d)?", errno); /* * Done with that one; wait_for_vq_desc() will send the interrupt if @@ -955,7 +957,7 @@ static void net_input(struct virtqueue *vq) */ len = readv(net_info->tunfd, iov, in); if (len <= 0) - err(1, "Failed to read from tun."); + warn("Failed to read from tun (%d).", errno); /* * Mark that packet buffer as used, but don't interrupt here. We want diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index 789fed662140..187282da9213 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -1,5 +1,4 @@ - List of maintainers and how to submit kernel changes Please try to follow the guidelines below. This will make things @@ -4585,8 +4584,9 @@ S: Maintained F: drivers/mmc/host/omap.c OMAP HS MMC SUPPORT +M: Madhusudhan Chikkature L: linux-omap@vger.kernel.org -S: Orphan +S: Maintained F: drivers/mmc/host/omap_hsmmc.c OMAP RANDOM NUMBER GENERATOR SUPPORT @@ -6242,14 +6242,9 @@ F: drivers/char/toshiba.c F: include/linux/toshiba.h TMIO MMC DRIVER -M: Guennadi Liakhovetski M: Ian Molton -L: linux-mmc@vger.kernel.org S: Maintained -F: drivers/mmc/host/tmio_mmc* -F: drivers/mmc/host/sh_mobile_sdhi.c -F: include/linux/mmc/tmio.h -F: include/linux/mmc/sh_mobile_sdhi.h +F: drivers/mmc/host/tmio_mmc.* TMPFS (SHMEM FILESYSTEM) M: Hugh Dickins @@ -6326,7 +6321,7 @@ F: drivers/scsi/u14-34f.c UBI FILE SYSTEM (UBIFS) M: Artem Bityutskiy -M: Adrian Hunter +M: Adrian Hunter L: linux-mtd@lists.infradead.org T: git git://git.infradead.org/ubifs-2.6.git W: http://www.linux-mtd.infradead.org/doc/ubifs.html diff --git a/trunk/arch/arm/configs/mmp2_defconfig b/trunk/arch/arm/configs/mmp2_defconfig index 5a584520db2f..47ad3b1a4fee 100644 --- a/trunk/arch/arm/configs/mmp2_defconfig +++ b/trunk/arch/arm/configs/mmp2_defconfig @@ -8,7 +8,6 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_ARCH_MMP=y -CONFIG_MACH_BROWNSTONE=y CONFIG_MACH_FLINT=y CONFIG_MACH_MARVELL_JASPER=y CONFIG_HIGH_RES_TIMERS=y @@ -64,16 +63,10 @@ CONFIG_BACKLIGHT_MAX8925=y # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_MAX8925=y -CONFIG_MMC=y # CONFIG_DNOTIFY is not set CONFIG_INOTIFY=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y -CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -CONFIG_EXT4_FS=y -CONFIG_MSDOS_FS=y -CONFIG_FAT_DEFAULT_CODEPAGE=437 CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y @@ -88,7 +81,7 @@ CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_PREEMPT is not set CONFIG_DEBUG_INFO=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set -# CONFIG_DYNAMIC_DEBUG is not set +CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_USER=y CONFIG_DEBUG_ERRORS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/trunk/drivers/mmc/host/at91_mci.h b/trunk/arch/arm/mach-at91/include/mach/at91_mci.h similarity index 99% rename from trunk/drivers/mmc/host/at91_mci.h rename to trunk/arch/arm/mach-at91/include/mach/at91_mci.h index eec3a6b1c2bc..02182c16a022 100644 --- a/trunk/drivers/mmc/host/at91_mci.h +++ b/trunk/arch/arm/mach-at91/include/mach/at91_mci.h @@ -1,5 +1,5 @@ /* - * drivers/mmc/host/at91_mci.h + * arch/arm/mach-at91/include/mach/at91_mci.h * * Copyright (C) 2005 Ivan Kokshaysky * Copyright (C) SAN People diff --git a/trunk/arch/arm/mach-mmp/brownstone.c b/trunk/arch/arm/mach-mmp/brownstone.c index c79162a50f28..7bb78fd5a2a6 100644 --- a/trunk/arch/arm/mach-mmp/brownstone.c +++ b/trunk/arch/arm/mach-mmp/brownstone.c @@ -177,16 +177,9 @@ static struct i2c_board_info brownstone_twsi1_info[] = { }; static struct sdhci_pxa_platdata mmp2_sdh_platdata_mmc0 = { - .clk_delay_cycles = 0x1f, + .max_speed = 25000000, }; -static struct sdhci_pxa_platdata mmp2_sdh_platdata_mmc2 = { - .clk_delay_cycles = 0x1f, - .flags = PXA_FLAG_CARD_PERMANENT - | PXA_FLAG_SD_8_BIT_CAPABLE_SLOT, -}; - - static void __init brownstone_init(void) { mfp_config(ARRAY_AND_SIZE(brownstone_pin_config)); @@ -196,7 +189,6 @@ static void __init brownstone_init(void) mmp2_add_uart(3); mmp2_add_twsi(1, NULL, ARRAY_AND_SIZE(brownstone_twsi1_info)); mmp2_add_sdhost(0, &mmp2_sdh_platdata_mmc0); /* SD/MMC */ - mmp2_add_sdhost(2, &mmp2_sdh_platdata_mmc2); /* eMMC */ /* enable 5v regulator */ platform_device_register(&brownstone_v_5vp_device); diff --git a/trunk/arch/arm/mach-mmp/include/mach/mmp2.h b/trunk/arch/arm/mach-mmp/include/mach/mmp2.h index de7b88826ad7..2cbf6df09b82 100644 --- a/trunk/arch/arm/mach-mmp/include/mach/mmp2.h +++ b/trunk/arch/arm/mach-mmp/include/mach/mmp2.h @@ -1,7 +1,7 @@ #ifndef __ASM_MACH_MMP2_H #define __ASM_MACH_MMP2_H -#include +#include struct sys_timer; diff --git a/trunk/arch/arm/mach-mmp/jasper.c b/trunk/arch/arm/mach-mmp/jasper.c index 5d6421d63254..24172a0aad59 100644 --- a/trunk/arch/arm/mach-mmp/jasper.c +++ b/trunk/arch/arm/mach-mmp/jasper.c @@ -154,7 +154,7 @@ static struct i2c_board_info jasper_twsi1_info[] = { }; static struct sdhci_pxa_platdata mmp2_sdh_platdata_mmc0 = { - .clk_delay_cycles = 0x1f, + .max_speed = 25000000, }; static void __init jasper_init(void) diff --git a/trunk/arch/arm/mach-mmp/mmp2.c b/trunk/arch/arm/mach-mmp/mmp2.c index 079c18861d5c..8e6c3ac7f7c1 100644 --- a/trunk/arch/arm/mach-mmp/mmp2.c +++ b/trunk/arch/arm/mach-mmp/mmp2.c @@ -168,10 +168,10 @@ static struct clk_lookup mmp2_clkregs[] = { INIT_CLKREG(&clk_twsi5, "pxa2xx-i2c.4", NULL), INIT_CLKREG(&clk_twsi6, "pxa2xx-i2c.5", NULL), INIT_CLKREG(&clk_nand, "pxa3xx-nand", NULL), - INIT_CLKREG(&clk_sdh0, "sdhci-pxav3.0", "PXA-SDHCLK"), - INIT_CLKREG(&clk_sdh1, "sdhci-pxav3.1", "PXA-SDHCLK"), - INIT_CLKREG(&clk_sdh2, "sdhci-pxav3.2", "PXA-SDHCLK"), - INIT_CLKREG(&clk_sdh3, "sdhci-pxav3.3", "PXA-SDHCLK"), + INIT_CLKREG(&clk_sdh0, "sdhci-pxa.0", "PXA-SDHCLK"), + INIT_CLKREG(&clk_sdh1, "sdhci-pxa.1", "PXA-SDHCLK"), + INIT_CLKREG(&clk_sdh2, "sdhci-pxa.2", "PXA-SDHCLK"), + INIT_CLKREG(&clk_sdh3, "sdhci-pxa.3", "PXA-SDHCLK"), }; static int __init mmp2_init(void) @@ -222,8 +222,8 @@ MMP2_DEVICE(twsi4, "pxa2xx-i2c", 3, TWSI4, 0xd4033000, 0x70); MMP2_DEVICE(twsi5, "pxa2xx-i2c", 4, TWSI5, 0xd4033800, 0x70); MMP2_DEVICE(twsi6, "pxa2xx-i2c", 5, TWSI6, 0xd4034000, 0x70); MMP2_DEVICE(nand, "pxa3xx-nand", -1, NAND, 0xd4283000, 0x100, 28, 29); -MMP2_DEVICE(sdh0, "sdhci-pxav3", 0, MMC, 0xd4280000, 0x120); -MMP2_DEVICE(sdh1, "sdhci-pxav3", 1, MMC2, 0xd4280800, 0x120); -MMP2_DEVICE(sdh2, "sdhci-pxav3", 2, MMC3, 0xd4281000, 0x120); -MMP2_DEVICE(sdh3, "sdhci-pxav3", 3, MMC4, 0xd4281800, 0x120); +MMP2_DEVICE(sdh0, "sdhci-pxa", 0, MMC, 0xd4280000, 0x120); +MMP2_DEVICE(sdh1, "sdhci-pxa", 1, MMC2, 0xd4280800, 0x120); +MMP2_DEVICE(sdh2, "sdhci-pxa", 2, MMC3, 0xd4281000, 0x120); +MMP2_DEVICE(sdh3, "sdhci-pxa", 3, MMC4, 0xd4281800, 0x120); diff --git a/trunk/arch/arm/plat-pxa/include/plat/sdhci.h b/trunk/arch/arm/plat-pxa/include/plat/sdhci.h new file mode 100644 index 000000000000..1ab332e37d7d --- /dev/null +++ b/trunk/arch/arm/plat-pxa/include/plat/sdhci.h @@ -0,0 +1,35 @@ +/* linux/arch/arm/plat-pxa/include/plat/sdhci.h + * + * Copyright 2010 Marvell + * Zhangfei Gao + * + * PXA Platform - SDHCI platform data definitions + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __PLAT_PXA_SDHCI_H +#define __PLAT_PXA_SDHCI_H + +/* pxa specific flag */ +/* Require clock free running */ +#define PXA_FLAG_DISABLE_CLOCK_GATING (1<<0) + +/* Board design supports 8-bit data on SD/SDIO BUS */ +#define PXA_FLAG_SD_8_BIT_CAPABLE_SLOT (1<<2) + +/* + * struct pxa_sdhci_platdata() - Platform device data for PXA SDHCI + * @max_speed: the maximum speed supported + * @quirks: quirks of specific device + * @flags: flags for platform requirement + */ +struct sdhci_pxa_platdata { + unsigned int max_speed; + unsigned int quirks; + unsigned int flags; +}; + +#endif /* __PLAT_PXA_SDHCI_H */ diff --git a/trunk/arch/x86/include/asm/xen/pci.h b/trunk/arch/x86/include/asm/xen/pci.h index 968d57dd54c9..4fbda9a3f339 100644 --- a/trunk/arch/x86/include/asm/xen/pci.h +++ b/trunk/arch/x86/include/asm/xen/pci.h @@ -14,14 +14,13 @@ static inline int pci_xen_hvm_init(void) } #endif #if defined(CONFIG_XEN_DOM0) -int __init pci_xen_initial_domain(void); +void __init xen_setup_pirqs(void); int xen_find_device_domain_owner(struct pci_dev *dev); int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); int xen_unregister_device_domain_owner(struct pci_dev *dev); #else -static inline int __init pci_xen_initial_domain(void) +static inline void __init xen_setup_pirqs(void) { - return -1; } static inline int xen_find_device_domain_owner(struct pci_dev *dev) { diff --git a/trunk/arch/x86/pci/xen.c b/trunk/arch/x86/pci/xen.c index 1017c7bee388..f567965c0620 100644 --- a/trunk/arch/x86/pci/xen.c +++ b/trunk/arch/x86/pci/xen.c @@ -1,13 +1,8 @@ /* - * Xen PCI - handle PCI (INTx) and MSI infrastructure calls for PV, HVM and - * initial domain support. We also handle the DSDT _PRT callbacks for GSI's - * used in HVM and initial domain mode (PV does not parse ACPI, so it has no - * concept of GSIs). Under PV we hook under the pnbbios API for IRQs and - * 0xcf8 PCI configuration read/write. + * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux + * x86 PCI core to support the Xen PCI Frontend * * Author: Ryan Wilson - * Konrad Rzeszutek Wilk - * Stefano Stabellini */ #include #include @@ -24,53 +19,22 @@ #include #include -static int xen_pcifront_enable_irq(struct pci_dev *dev) -{ - int rc; - int share = 1; - int pirq; - u8 gsi; - - rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); - if (rc < 0) { - dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n", - rc); - return rc; - } - /* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/ - pirq = gsi; - - if (gsi < NR_IRQS_LEGACY) - share = 0; - - rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront"); - if (rc < 0) { - dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n", - gsi, pirq, rc); - return rc; - } - - dev->irq = rc; - dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq); - return 0; -} - #ifdef CONFIG_ACPI -static int xen_register_pirq(u32 gsi, int gsi_override, int triggering, - bool set_pirq) +static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, + int trigger, int polarity) { - int rc, pirq = -1, irq = -1; + int rc, irq; struct physdev_map_pirq map_irq; int shareable = 0; char *name; - if (set_pirq) - pirq = gsi; + if (!xen_hvm_domain()) + return -1; map_irq.domid = DOMID_SELF; map_irq.type = MAP_PIRQ_TYPE_GSI; map_irq.index = gsi; - map_irq.pirq = pirq; + map_irq.pirq = -1; rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); if (rc) { @@ -78,7 +42,7 @@ static int xen_register_pirq(u32 gsi, int gsi_override, int triggering, return -1; } - if (triggering == ACPI_EDGE_SENSITIVE) { + if (trigger == ACPI_EDGE_SENSITIVE) { shareable = 0; name = "ioapic-edge"; } else { @@ -86,63 +50,12 @@ static int xen_register_pirq(u32 gsi, int gsi_override, int triggering, name = "ioapic-level"; } - if (gsi_override >= 0) - gsi = gsi_override; - irq = xen_bind_pirq_gsi_to_irq(gsi, map_irq.pirq, shareable, name); - if (irq < 0) - goto out; - - printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", map_irq.pirq, irq, gsi); -out: - return irq; -} - -static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, - int trigger, int polarity) -{ - if (!xen_hvm_domain()) - return -1; - return xen_register_pirq(gsi, -1 /* no GSI override */, trigger, - false /* no mapping of GSI to PIRQ */); -} - -#ifdef CONFIG_XEN_DOM0 -static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int polarity) -{ - int rc, irq; - struct physdev_setup_gsi setup_gsi; - - if (!xen_pv_domain()) - return -1; - - printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n", - gsi, triggering, polarity); - - irq = xen_register_pirq(gsi, gsi_override, triggering, true); - - setup_gsi.gsi = gsi; - setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1); - setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - - rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); - if (rc == -EEXIST) - printk(KERN_INFO "Already setup the GSI :%d\n", gsi); - else if (rc) { - printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n", - gsi, rc); - } + printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); return irq; } - -static int acpi_register_gsi_xen(struct device *dev, u32 gsi, - int trigger, int polarity) -{ - return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, polarity); -} -#endif #endif #if defined(CONFIG_PCI_MSI) @@ -152,43 +65,6 @@ static int acpi_register_gsi_xen(struct device *dev, u32 gsi, struct xen_pci_frontend_ops *xen_pci_frontend; EXPORT_SYMBOL_GPL(xen_pci_frontend); -static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - int irq, ret, i; - struct msi_desc *msidesc; - int *v; - - v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL); - if (!v) - return -ENOMEM; - - if (type == PCI_CAP_ID_MSIX) - ret = xen_pci_frontend_enable_msix(dev, v, nvec); - else - ret = xen_pci_frontend_enable_msi(dev, v); - if (ret) - goto error; - i = 0; - list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, - (type == PCI_CAP_ID_MSIX) ? - "pcifront-msi-x" : - "pcifront-msi", - DOMID_SELF); - if (irq < 0) - goto free; - i++; - } - kfree(v); - return 0; - -error: - dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n"); -free: - kfree(v); - return ret; -} - #define XEN_PIRQ_MSI_DATA (MSI_DATA_TRIGGER_EDGE | \ MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0)) @@ -247,6 +123,67 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return -ENODEV; } +/* + * For MSI interrupts we have to use drivers/xen/event.s functions to + * allocate an irq_desc and setup the right */ + + +static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, ret, i; + struct msi_desc *msidesc; + int *v; + + v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL); + if (!v) + return -ENOMEM; + + if (type == PCI_CAP_ID_MSIX) + ret = xen_pci_frontend_enable_msix(dev, v, nvec); + else + ret = xen_pci_frontend_enable_msi(dev, v); + if (ret) + goto error; + i = 0; + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, + (type == PCI_CAP_ID_MSIX) ? + "pcifront-msi-x" : + "pcifront-msi", + DOMID_SELF); + if (irq < 0) + goto free; + i++; + } + kfree(v); + return 0; + +error: + dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n"); +free: + kfree(v); + return ret; +} + +static void xen_teardown_msi_irqs(struct pci_dev *dev) +{ + struct msi_desc *msidesc; + + msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); + if (msidesc->msi_attrib.is_msix) + xen_pci_frontend_disable_msix(dev); + else + xen_pci_frontend_disable_msi(dev); + + /* Free the IRQ's and the msidesc using the generic code. */ + default_teardown_msi_irqs(dev); +} + +static void xen_teardown_msi_irq(unsigned int irq) +{ + xen_destroy_irq(irq); +} + #ifdef CONFIG_XEN_DOM0 static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { @@ -305,27 +242,44 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return ret; } #endif +#endif -static void xen_teardown_msi_irqs(struct pci_dev *dev) +static int xen_pcifront_enable_irq(struct pci_dev *dev) { - struct msi_desc *msidesc; + int rc; + int share = 1; + int pirq; + u8 gsi; - msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); - if (msidesc->msi_attrib.is_msix) - xen_pci_frontend_disable_msix(dev); - else - xen_pci_frontend_disable_msi(dev); + rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); + if (rc < 0) { + dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n", + rc); + return rc; + } - /* Free the IRQ's and the msidesc using the generic code. */ - default_teardown_msi_irqs(dev); -} + rc = xen_allocate_pirq_gsi(gsi); + if (rc < 0) { + dev_warn(&dev->dev, "Xen PCI: failed to allocate a PIRQ for GSI%d: %d\n", + gsi, rc); + return rc; + } + pirq = rc; -static void xen_teardown_msi_irq(unsigned int irq) -{ - xen_destroy_irq(irq); -} + if (gsi < NR_IRQS_LEGACY) + share = 0; -#endif + rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront"); + if (rc < 0) { + dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n", + gsi, pirq, rc); + return rc; + } + + dev->irq = rc; + dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq); + return 0; +} int __init pci_xen_init(void) { @@ -373,6 +327,79 @@ int __init pci_xen_hvm_init(void) } #ifdef CONFIG_XEN_DOM0 +static int xen_register_pirq(u32 gsi, int gsi_override, int triggering) +{ + int rc, pirq, irq = -1; + struct physdev_map_pirq map_irq; + int shareable = 0; + char *name; + + if (!xen_pv_domain()) + return -1; + + if (triggering == ACPI_EDGE_SENSITIVE) { + shareable = 0; + name = "ioapic-edge"; + } else { + shareable = 1; + name = "ioapic-level"; + } + pirq = xen_allocate_pirq_gsi(gsi); + if (pirq < 0) + goto out; + + if (gsi_override >= 0) + irq = xen_bind_pirq_gsi_to_irq(gsi_override, pirq, shareable, name); + else + irq = xen_bind_pirq_gsi_to_irq(gsi, pirq, shareable, name); + if (irq < 0) + goto out; + + printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", pirq, irq, gsi); + + map_irq.domid = DOMID_SELF; + map_irq.type = MAP_PIRQ_TYPE_GSI; + map_irq.index = gsi; + map_irq.pirq = pirq; + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + if (rc) { + printk(KERN_WARNING "xen map irq failed %d\n", rc); + return -1; + } + +out: + return irq; +} + +static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int polarity) +{ + int rc, irq; + struct physdev_setup_gsi setup_gsi; + + if (!xen_pv_domain()) + return -1; + + printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n", + gsi, triggering, polarity); + + irq = xen_register_pirq(gsi, gsi_override, triggering); + + setup_gsi.gsi = gsi; + setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1); + setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); + if (rc == -EEXIST) + printk(KERN_INFO "Already setup the GSI :%d\n", gsi); + else if (rc) { + printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n", + gsi, rc); + } + + return irq; +} + static __init void xen_setup_acpi_sci(void) { int rc; @@ -392,7 +419,7 @@ static __init void xen_setup_acpi_sci(void) } trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; - + printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d " "polarity=%d\n", gsi, trigger, polarity); @@ -407,9 +434,10 @@ static __init void xen_setup_acpi_sci(void) * the ACPI interpreter and keels over since IRQ 9 has not been * setup as we had setup IRQ 20 for it). */ + /* Check whether the GSI != IRQ */ if (acpi_gsi_to_irq(gsi, &irq) == 0) { - /* Use the provided value if it's valid. */ - if (irq >= 0) + if (irq >= 0 && irq != gsi) + /* Bugger, we MUST have that IRQ. */ gsi_override = irq; } @@ -419,16 +447,41 @@ static __init void xen_setup_acpi_sci(void) return; } -int __init pci_xen_initial_domain(void) +static int acpi_register_gsi_xen(struct device *dev, u32 gsi, + int trigger, int polarity) { - int irq; + return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, polarity); +} +static int __init pci_xen_initial_domain(void) +{ #ifdef CONFIG_PCI_MSI x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs; x86_msi.teardown_msi_irq = xen_teardown_msi_irq; #endif xen_setup_acpi_sci(); __acpi_register_gsi = acpi_register_gsi_xen; + + return 0; +} + +void __init xen_setup_pirqs(void) +{ + int pirq, irq; + + pci_xen_initial_domain(); + + if (0 == nr_ioapics) { + for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { + pirq = xen_allocate_pirq_gsi(irq); + if (WARN(pirq < 0, + "Could not allocate PIRQ for legacy interrupt\n")) + break; + irq = xen_bind_pirq_gsi_to_irq(irq, pirq, 0, "xt-pic"); + } + return; + } + /* Pre-allocate legacy irqs */ for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { int trigger, polarity; @@ -437,16 +490,12 @@ int __init pci_xen_initial_domain(void) continue; xen_register_pirq(irq, -1 /* no GSI override */, - trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE, - true /* Map GSI to PIRQ */); + trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE); } - if (0 == nr_ioapics) { - for (irq = 0; irq < NR_IRQS_LEGACY; irq++) - xen_bind_pirq_gsi_to_irq(irq, irq, 0, "xt-pic"); - } - return 0; } +#endif +#ifdef CONFIG_XEN_DOM0 struct xen_device_domain_owner { domid_t domain; struct pci_dev *dev; diff --git a/trunk/arch/x86/xen/Makefile b/trunk/arch/x86/xen/Makefile index a6575b949b11..17c565de3d64 100644 --- a/trunk/arch/x86/xen/Makefile +++ b/trunk/arch/x86/xen/Makefile @@ -18,5 +18,5 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o -obj-$(CONFIG_XEN_DOM0) += vga.o + obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o diff --git a/trunk/arch/x86/xen/enlighten.c b/trunk/arch/x86/xen/enlighten.c index 53257421082b..5525163a0398 100644 --- a/trunk/arch/x86/xen/enlighten.c +++ b/trunk/arch/x86/xen/enlighten.c @@ -1248,14 +1248,6 @@ asmlinkage void __init xen_start_kernel(void) if (pci_xen) x86_init.pci.arch_init = pci_xen_init; } else { - const struct dom0_vga_console_info *info = - (void *)((char *)xen_start_info + - xen_start_info->console.dom0.info_off); - - xen_init_vga(info, xen_start_info->console.dom0.info_size); - xen_start_info->console.domU.mfn = 0; - xen_start_info->console.domU.evtchn = 0; - /* Make sure ACS will be enabled */ pci_request_acs(); } diff --git a/trunk/arch/x86/xen/platform-pci-unplug.c b/trunk/arch/x86/xen/platform-pci-unplug.c index ffcf2615640b..25c52f94a27c 100644 --- a/trunk/arch/x86/xen/platform-pci-unplug.c +++ b/trunk/arch/x86/xen/platform-pci-unplug.c @@ -35,7 +35,7 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); #ifdef CONFIG_XEN_PVHVM static int xen_emul_unplug; -static int check_platform_magic(void) +static int __init check_platform_magic(void) { short magic; char protocol; diff --git a/trunk/arch/x86/xen/vga.c b/trunk/arch/x86/xen/vga.c deleted file mode 100644 index 1cd7f4d11e29..000000000000 --- a/trunk/arch/x86/xen/vga.c +++ /dev/null @@ -1,67 +0,0 @@ -#include -#include - -#include -#include - -#include - -#include "xen-ops.h" - -void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) -{ - struct screen_info *screen_info = &boot_params.screen_info; - - /* This is drawn from a dump from vgacon:startup in - * standard Linux. */ - screen_info->orig_video_mode = 3; - screen_info->orig_video_isVGA = 1; - screen_info->orig_video_lines = 25; - screen_info->orig_video_cols = 80; - screen_info->orig_video_ega_bx = 3; - screen_info->orig_video_points = 16; - screen_info->orig_y = screen_info->orig_video_lines - 1; - - switch (info->video_type) { - case XEN_VGATYPE_TEXT_MODE_3: - if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3) - + sizeof(info->u.text_mode_3)) - break; - screen_info->orig_video_lines = info->u.text_mode_3.rows; - screen_info->orig_video_cols = info->u.text_mode_3.columns; - screen_info->orig_x = info->u.text_mode_3.cursor_x; - screen_info->orig_y = info->u.text_mode_3.cursor_y; - screen_info->orig_video_points = - info->u.text_mode_3.font_height; - break; - - case XEN_VGATYPE_VESA_LFB: - if (size < offsetof(struct dom0_vga_console_info, - u.vesa_lfb.gbl_caps)) - break; - screen_info->orig_video_isVGA = VIDEO_TYPE_VLFB; - screen_info->lfb_width = info->u.vesa_lfb.width; - screen_info->lfb_height = info->u.vesa_lfb.height; - screen_info->lfb_depth = info->u.vesa_lfb.bits_per_pixel; - screen_info->lfb_base = info->u.vesa_lfb.lfb_base; - screen_info->lfb_size = info->u.vesa_lfb.lfb_size; - screen_info->lfb_linelength = info->u.vesa_lfb.bytes_per_line; - screen_info->red_size = info->u.vesa_lfb.red_size; - screen_info->red_pos = info->u.vesa_lfb.red_pos; - screen_info->green_size = info->u.vesa_lfb.green_size; - screen_info->green_pos = info->u.vesa_lfb.green_pos; - screen_info->blue_size = info->u.vesa_lfb.blue_size; - screen_info->blue_pos = info->u.vesa_lfb.blue_pos; - screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; - screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; - if (size >= offsetof(struct dom0_vga_console_info, - u.vesa_lfb.gbl_caps) - + sizeof(info->u.vesa_lfb.gbl_caps)) - screen_info->capabilities = info->u.vesa_lfb.gbl_caps; - if (size >= offsetof(struct dom0_vga_console_info, - u.vesa_lfb.mode_attrs) - + sizeof(info->u.vesa_lfb.mode_attrs)) - screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs; - break; - } -} diff --git a/trunk/arch/x86/xen/xen-ops.h b/trunk/arch/x86/xen/xen-ops.h index b095739ccd4c..97dfdc8757b3 100644 --- a/trunk/arch/x86/xen/xen-ops.h +++ b/trunk/arch/x86/xen/xen-ops.h @@ -88,17 +88,6 @@ static inline void xen_uninit_lock_cpu(int cpu) } #endif -struct dom0_vga_console_info; - -#ifdef CONFIG_XEN_DOM0 -void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); -#else -static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, - size_t size) -{ -} -#endif - /* Declare an asm function, along with symbols needed to make it inlineable */ #define DECL_ASM(ret, name, ...) \ diff --git a/trunk/drivers/block/xen-blkback/xenbus.c b/trunk/drivers/block/xen-blkback/xenbus.c index 3f129b45451a..6cc0db1bf522 100644 --- a/trunk/drivers/block/xen-blkback/xenbus.c +++ b/trunk/drivers/block/xen-blkback/xenbus.c @@ -684,7 +684,7 @@ static void connect(struct backend_info *be) err = xenbus_switch_state(dev, XenbusStateConnected); if (err) - xenbus_dev_fatal(dev, err, "%s: switching to Connected state", + xenbus_dev_fatal(dev, err, "switching to Connected state", dev->nodename); return; diff --git a/trunk/drivers/mmc/card/block.c b/trunk/drivers/mmc/card/block.c index 1ff5486213fb..f85e42224559 100644 --- a/trunk/drivers/mmc/card/block.c +++ b/trunk/drivers/mmc/card/block.c @@ -106,16 +106,6 @@ struct mmc_blk_data { static DEFINE_MUTEX(open_lock); -enum mmc_blk_status { - MMC_BLK_SUCCESS = 0, - MMC_BLK_PARTIAL, - MMC_BLK_RETRY, - MMC_BLK_RETRY_SINGLE, - MMC_BLK_DATA_ERR, - MMC_BLK_CMD_ERR, - MMC_BLK_ABORT, -}; - module_param(perdev_minors, int, 0444); MODULE_PARM_DESC(perdev_minors, "Minors numbers to allocate per device"); @@ -437,6 +427,14 @@ static const struct block_device_operations mmc_bdops = { #endif }; +struct mmc_blk_request { + struct mmc_request mrq; + struct mmc_command sbc; + struct mmc_command cmd; + struct mmc_command stop; + struct mmc_data data; +}; + static inline int mmc_blk_part_switch(struct mmc_card *card, struct mmc_blk_data *md) { @@ -527,20 +525,7 @@ static u32 mmc_sd_num_wr_blocks(struct mmc_card *card) return result; } -static int send_stop(struct mmc_card *card, u32 *status) -{ - struct mmc_command cmd = {0}; - int err; - - cmd.opcode = MMC_STOP_TRANSMISSION; - cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC; - err = mmc_wait_for_cmd(card->host, &cmd, 5); - if (err == 0) - *status = cmd.resp[0]; - return err; -} - -static int get_card_status(struct mmc_card *card, u32 *status, int retries) +static u32 get_card_status(struct mmc_card *card, struct request *req) { struct mmc_command cmd = {0}; int err; @@ -549,141 +534,11 @@ static int get_card_status(struct mmc_card *card, u32 *status, int retries) if (!mmc_host_is_spi(card->host)) cmd.arg = card->rca << 16; cmd.flags = MMC_RSP_SPI_R2 | MMC_RSP_R1 | MMC_CMD_AC; - err = mmc_wait_for_cmd(card->host, &cmd, retries); - if (err == 0) - *status = cmd.resp[0]; - return err; -} - -#define ERR_RETRY 2 -#define ERR_ABORT 1 -#define ERR_CONTINUE 0 - -static int mmc_blk_cmd_error(struct request *req, const char *name, int error, - bool status_valid, u32 status) -{ - switch (error) { - case -EILSEQ: - /* response crc error, retry the r/w cmd */ - pr_err("%s: %s sending %s command, card status %#x\n", - req->rq_disk->disk_name, "response CRC error", - name, status); - return ERR_RETRY; - - case -ETIMEDOUT: - pr_err("%s: %s sending %s command, card status %#x\n", - req->rq_disk->disk_name, "timed out", name, status); - - /* If the status cmd initially failed, retry the r/w cmd */ - if (!status_valid) - return ERR_RETRY; - - /* - * If it was a r/w cmd crc error, or illegal command - * (eg, issued in wrong state) then retry - we should - * have corrected the state problem above. - */ - if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND)) - return ERR_RETRY; - - /* Otherwise abort the command */ - return ERR_ABORT; - - default: - /* We don't understand the error code the driver gave us */ - pr_err("%s: unknown error %d sending read/write command, card status %#x\n", - req->rq_disk->disk_name, error, status); - return ERR_ABORT; - } -} - -/* - * Initial r/w and stop cmd error recovery. - * We don't know whether the card received the r/w cmd or not, so try to - * restore things back to a sane state. Essentially, we do this as follows: - * - Obtain card status. If the first attempt to obtain card status fails, - * the status word will reflect the failed status cmd, not the failed - * r/w cmd. If we fail to obtain card status, it suggests we can no - * longer communicate with the card. - * - Check the card state. If the card received the cmd but there was a - * transient problem with the response, it might still be in a data transfer - * mode. Try to send it a stop command. If this fails, we can't recover. - * - If the r/w cmd failed due to a response CRC error, it was probably - * transient, so retry the cmd. - * - If the r/w cmd timed out, but we didn't get the r/w cmd status, retry. - * - If the r/w cmd timed out, and the r/w cmd failed due to CRC error or - * illegal cmd, retry. - * Otherwise we don't understand what happened, so abort. - */ -static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req, - struct mmc_blk_request *brq) -{ - bool prev_cmd_status_valid = true; - u32 status, stop_status = 0; - int err, retry; - - /* - * Try to get card status which indicates both the card state - * and why there was no response. If the first attempt fails, - * we can't be sure the returned status is for the r/w command. - */ - for (retry = 2; retry >= 0; retry--) { - err = get_card_status(card, &status, 0); - if (!err) - break; - - prev_cmd_status_valid = false; - pr_err("%s: error %d sending status command, %sing\n", - req->rq_disk->disk_name, err, retry ? "retry" : "abort"); - } - - /* We couldn't get a response from the card. Give up. */ + err = mmc_wait_for_cmd(card->host, &cmd, 0); if (err) - return ERR_ABORT; - - /* - * Check the current card state. If it is in some data transfer - * mode, tell it to stop (and hopefully transition back to TRAN.) - */ - if (R1_CURRENT_STATE(status) == R1_STATE_DATA || - R1_CURRENT_STATE(status) == R1_STATE_RCV) { - err = send_stop(card, &stop_status); - if (err) - pr_err("%s: error %d sending stop command\n", - req->rq_disk->disk_name, err); - - /* - * If the stop cmd also timed out, the card is probably - * not present, so abort. Other errors are bad news too. - */ - if (err) - return ERR_ABORT; - } - - /* Check for set block count errors */ - if (brq->sbc.error) - return mmc_blk_cmd_error(req, "SET_BLOCK_COUNT", brq->sbc.error, - prev_cmd_status_valid, status); - - /* Check for r/w command errors */ - if (brq->cmd.error) - return mmc_blk_cmd_error(req, "r/w cmd", brq->cmd.error, - prev_cmd_status_valid, status); - - /* Now for stop errors. These aren't fatal to the transfer. */ - pr_err("%s: error %d sending stop command, original cmd response %#x, card status %#x\n", - req->rq_disk->disk_name, brq->stop.error, - brq->cmd.resp[0], status); - - /* - * Subsitute in our own stop status as this will give the error - * state which happened during the execution of the r/w command. - */ - if (stop_status) { - brq->stop.resp[0] = stop_status; - brq->stop.error = 0; - } - return ERR_CONTINUE; + printk(KERN_ERR "%s: error %d sending status command", + req->rq_disk->disk_name, err); + return cmd.resp[0]; } static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) @@ -814,114 +669,12 @@ static inline void mmc_apply_rel_rw(struct mmc_blk_request *brq, } } -#define CMD_ERRORS \ - (R1_OUT_OF_RANGE | /* Command argument out of range */ \ - R1_ADDRESS_ERROR | /* Misaligned address */ \ - R1_BLOCK_LEN_ERROR | /* Transferred block length incorrect */\ - R1_WP_VIOLATION | /* Tried to write to protected block */ \ - R1_CC_ERROR | /* Card controller error */ \ - R1_ERROR) /* General/unknown error */ - -static int mmc_blk_err_check(struct mmc_card *card, - struct mmc_async_req *areq) -{ - enum mmc_blk_status ret = MMC_BLK_SUCCESS; - struct mmc_queue_req *mq_mrq = container_of(areq, struct mmc_queue_req, - mmc_active); - struct mmc_blk_request *brq = &mq_mrq->brq; - struct request *req = mq_mrq->req; - - /* - * sbc.error indicates a problem with the set block count - * command. No data will have been transferred. - * - * cmd.error indicates a problem with the r/w command. No - * data will have been transferred. - * - * stop.error indicates a problem with the stop command. Data - * may have been transferred, or may still be transferring. - */ - if (brq->sbc.error || brq->cmd.error || brq->stop.error) { - switch (mmc_blk_cmd_recovery(card, req, brq)) { - case ERR_RETRY: - return MMC_BLK_RETRY; - case ERR_ABORT: - return MMC_BLK_ABORT; - case ERR_CONTINUE: - break; - } - } - - /* - * Check for errors relating to the execution of the - * initial command - such as address errors. No data - * has been transferred. - */ - if (brq->cmd.resp[0] & CMD_ERRORS) { - pr_err("%s: r/w command failed, status = %#x\n", - req->rq_disk->disk_name, brq->cmd.resp[0]); - return MMC_BLK_ABORT; - } - - /* - * Everything else is either success, or a data error of some - * kind. If it was a write, we may have transitioned to - * program mode, which we have to wait for it to complete. - */ - if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) { - u32 status; - do { - int err = get_card_status(card, &status, 5); - if (err) { - printk(KERN_ERR "%s: error %d requesting status\n", - req->rq_disk->disk_name, err); - return MMC_BLK_CMD_ERR; - } - /* - * Some cards mishandle the status bits, - * so make sure to check both the busy - * indication and the card state. - */ - } while (!(status & R1_READY_FOR_DATA) || - (R1_CURRENT_STATE(status) == R1_STATE_PRG)); - } - - if (brq->data.error) { - pr_err("%s: error %d transferring data, sector %u, nr %u, cmd response %#x, card status %#x\n", - req->rq_disk->disk_name, brq->data.error, - (unsigned)blk_rq_pos(req), - (unsigned)blk_rq_sectors(req), - brq->cmd.resp[0], brq->stop.resp[0]); - - if (rq_data_dir(req) == READ) { - if (brq->data.blocks > 1) { - /* Redo read one sector at a time */ - pr_warning("%s: retrying using single block read\n", - req->rq_disk->disk_name); - return MMC_BLK_RETRY_SINGLE; - } - return MMC_BLK_DATA_ERR; - } else { - return MMC_BLK_CMD_ERR; - } - } - - if (ret == MMC_BLK_SUCCESS && - blk_rq_bytes(req) != brq->data.bytes_xfered) - ret = MMC_BLK_PARTIAL; - - return ret; -} - -static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, - struct mmc_card *card, - int disable_multi, - struct mmc_queue *mq) +static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) { - u32 readcmd, writecmd; - struct mmc_blk_request *brq = &mqrq->brq; - struct request *req = mqrq->req; struct mmc_blk_data *md = mq->data; + struct mmc_card *card = md->queue.card; + struct mmc_blk_request brq; + int ret = 1, disable_multi = 0; /* * Reliable writes are used to implement Forced Unit Access and @@ -932,206 +685,224 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, (rq_data_dir(req) == WRITE) && (md->flags & MMC_BLK_REL_WR); - memset(brq, 0, sizeof(struct mmc_blk_request)); - brq->mrq.cmd = &brq->cmd; - brq->mrq.data = &brq->data; + do { + struct mmc_command cmd = {0}; + u32 readcmd, writecmd, status = 0; + + memset(&brq, 0, sizeof(struct mmc_blk_request)); + brq.mrq.cmd = &brq.cmd; + brq.mrq.data = &brq.data; + + brq.cmd.arg = blk_rq_pos(req); + if (!mmc_card_blockaddr(card)) + brq.cmd.arg <<= 9; + brq.cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC; + brq.data.blksz = 512; + brq.stop.opcode = MMC_STOP_TRANSMISSION; + brq.stop.arg = 0; + brq.stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC; + brq.data.blocks = blk_rq_sectors(req); - brq->cmd.arg = blk_rq_pos(req); - if (!mmc_card_blockaddr(card)) - brq->cmd.arg <<= 9; - brq->cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC; - brq->data.blksz = 512; - brq->stop.opcode = MMC_STOP_TRANSMISSION; - brq->stop.arg = 0; - brq->stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC; - brq->data.blocks = blk_rq_sectors(req); + /* + * The block layer doesn't support all sector count + * restrictions, so we need to be prepared for too big + * requests. + */ + if (brq.data.blocks > card->host->max_blk_count) + brq.data.blocks = card->host->max_blk_count; - /* - * The block layer doesn't support all sector count - * restrictions, so we need to be prepared for too big - * requests. - */ - if (brq->data.blocks > card->host->max_blk_count) - brq->data.blocks = card->host->max_blk_count; + /* + * After a read error, we redo the request one sector at a time + * in order to accurately determine which sectors can be read + * successfully. + */ + if (disable_multi && brq.data.blocks > 1) + brq.data.blocks = 1; - /* - * After a read error, we redo the request one sector at a time - * in order to accurately determine which sectors can be read - * successfully. - */ - if (disable_multi && brq->data.blocks > 1) - brq->data.blocks = 1; + if (brq.data.blocks > 1 || do_rel_wr) { + /* SPI multiblock writes terminate using a special + * token, not a STOP_TRANSMISSION request. + */ + if (!mmc_host_is_spi(card->host) || + rq_data_dir(req) == READ) + brq.mrq.stop = &brq.stop; + readcmd = MMC_READ_MULTIPLE_BLOCK; + writecmd = MMC_WRITE_MULTIPLE_BLOCK; + } else { + brq.mrq.stop = NULL; + readcmd = MMC_READ_SINGLE_BLOCK; + writecmd = MMC_WRITE_BLOCK; + } + if (rq_data_dir(req) == READ) { + brq.cmd.opcode = readcmd; + brq.data.flags |= MMC_DATA_READ; + } else { + brq.cmd.opcode = writecmd; + brq.data.flags |= MMC_DATA_WRITE; + } - if (brq->data.blocks > 1 || do_rel_wr) { - /* SPI multiblock writes terminate using a special - * token, not a STOP_TRANSMISSION request. + if (do_rel_wr) + mmc_apply_rel_rw(&brq, card, req); + + /* + * Pre-defined multi-block transfers are preferable to + * open ended-ones (and necessary for reliable writes). + * However, it is not sufficient to just send CMD23, + * and avoid the final CMD12, as on an error condition + * CMD12 (stop) needs to be sent anyway. This, coupled + * with Auto-CMD23 enhancements provided by some + * hosts, means that the complexity of dealing + * with this is best left to the host. If CMD23 is + * supported by card and host, we'll fill sbc in and let + * the host deal with handling it correctly. This means + * that for hosts that don't expose MMC_CAP_CMD23, no + * change of behavior will be observed. + * + * N.B: Some MMC cards experience perf degradation. + * We'll avoid using CMD23-bounded multiblock writes for + * these, while retaining features like reliable writes. */ - if (!mmc_host_is_spi(card->host) || - rq_data_dir(req) == READ) - brq->mrq.stop = &brq->stop; - readcmd = MMC_READ_MULTIPLE_BLOCK; - writecmd = MMC_WRITE_MULTIPLE_BLOCK; - } else { - brq->mrq.stop = NULL; - readcmd = MMC_READ_SINGLE_BLOCK; - writecmd = MMC_WRITE_BLOCK; - } - if (rq_data_dir(req) == READ) { - brq->cmd.opcode = readcmd; - brq->data.flags |= MMC_DATA_READ; - } else { - brq->cmd.opcode = writecmd; - brq->data.flags |= MMC_DATA_WRITE; - } - if (do_rel_wr) - mmc_apply_rel_rw(brq, card, req); + if ((md->flags & MMC_BLK_CMD23) && + mmc_op_multi(brq.cmd.opcode) && + (do_rel_wr || !(card->quirks & MMC_QUIRK_BLK_NO_CMD23))) { + brq.sbc.opcode = MMC_SET_BLOCK_COUNT; + brq.sbc.arg = brq.data.blocks | + (do_rel_wr ? (1 << 31) : 0); + brq.sbc.flags = MMC_RSP_R1 | MMC_CMD_AC; + brq.mrq.sbc = &brq.sbc; + } - /* - * Pre-defined multi-block transfers are preferable to - * open ended-ones (and necessary for reliable writes). - * However, it is not sufficient to just send CMD23, - * and avoid the final CMD12, as on an error condition - * CMD12 (stop) needs to be sent anyway. This, coupled - * with Auto-CMD23 enhancements provided by some - * hosts, means that the complexity of dealing - * with this is best left to the host. If CMD23 is - * supported by card and host, we'll fill sbc in and let - * the host deal with handling it correctly. This means - * that for hosts that don't expose MMC_CAP_CMD23, no - * change of behavior will be observed. - * - * N.B: Some MMC cards experience perf degradation. - * We'll avoid using CMD23-bounded multiblock writes for - * these, while retaining features like reliable writes. - */ + mmc_set_data_timeout(&brq.data, card); - if ((md->flags & MMC_BLK_CMD23) && - mmc_op_multi(brq->cmd.opcode) && - (do_rel_wr || !(card->quirks & MMC_QUIRK_BLK_NO_CMD23))) { - brq->sbc.opcode = MMC_SET_BLOCK_COUNT; - brq->sbc.arg = brq->data.blocks | - (do_rel_wr ? (1 << 31) : 0); - brq->sbc.flags = MMC_RSP_R1 | MMC_CMD_AC; - brq->mrq.sbc = &brq->sbc; - } + brq.data.sg = mq->sg; + brq.data.sg_len = mmc_queue_map_sg(mq); - mmc_set_data_timeout(&brq->data, card); + /* + * Adjust the sg list so it is the same size as the + * request. + */ + if (brq.data.blocks != blk_rq_sectors(req)) { + int i, data_size = brq.data.blocks << 9; + struct scatterlist *sg; + + for_each_sg(brq.data.sg, sg, brq.data.sg_len, i) { + data_size -= sg->length; + if (data_size <= 0) { + sg->length += data_size; + i++; + break; + } + } + brq.data.sg_len = i; + } - brq->data.sg = mqrq->sg; - brq->data.sg_len = mmc_queue_map_sg(mq, mqrq); + mmc_queue_bounce_pre(mq); - /* - * Adjust the sg list so it is the same size as the - * request. - */ - if (brq->data.blocks != blk_rq_sectors(req)) { - int i, data_size = brq->data.blocks << 9; - struct scatterlist *sg; - - for_each_sg(brq->data.sg, sg, brq->data.sg_len, i) { - data_size -= sg->length; - if (data_size <= 0) { - sg->length += data_size; - i++; - break; + mmc_wait_for_req(card->host, &brq.mrq); + + mmc_queue_bounce_post(mq); + + /* + * Check for errors here, but don't jump to cmd_err + * until later as we need to wait for the card to leave + * programming mode even when things go wrong. + */ + if (brq.sbc.error || brq.cmd.error || + brq.data.error || brq.stop.error) { + if (brq.data.blocks > 1 && rq_data_dir(req) == READ) { + /* Redo read one sector at a time */ + printk(KERN_WARNING "%s: retrying using single " + "block read\n", req->rq_disk->disk_name); + disable_multi = 1; + continue; } + status = get_card_status(card, req); } - brq->data.sg_len = i; - } - mqrq->mmc_active.mrq = &brq->mrq; - mqrq->mmc_active.err_check = mmc_blk_err_check; + if (brq.sbc.error) { + printk(KERN_ERR "%s: error %d sending SET_BLOCK_COUNT " + "command, response %#x, card status %#x\n", + req->rq_disk->disk_name, brq.sbc.error, + brq.sbc.resp[0], status); + } - mmc_queue_bounce_pre(mqrq); -} + if (brq.cmd.error) { + printk(KERN_ERR "%s: error %d sending read/write " + "command, response %#x, card status %#x\n", + req->rq_disk->disk_name, brq.cmd.error, + brq.cmd.resp[0], status); + } -static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) -{ - struct mmc_blk_data *md = mq->data; - struct mmc_card *card = md->queue.card; - struct mmc_blk_request *brq = &mq->mqrq_cur->brq; - int ret = 1, disable_multi = 0, retry = 0; - enum mmc_blk_status status; - struct mmc_queue_req *mq_rq; - struct request *req; - struct mmc_async_req *areq; - - if (!rqc && !mq->mqrq_prev->req) - return 0; + if (brq.data.error) { + if (brq.data.error == -ETIMEDOUT && brq.mrq.stop) + /* 'Stop' response contains card status */ + status = brq.mrq.stop->resp[0]; + printk(KERN_ERR "%s: error %d transferring data," + " sector %u, nr %u, card status %#x\n", + req->rq_disk->disk_name, brq.data.error, + (unsigned)blk_rq_pos(req), + (unsigned)blk_rq_sectors(req), status); + } - do { - if (rqc) { - mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq); - areq = &mq->mqrq_cur->mmc_active; - } else - areq = NULL; - areq = mmc_start_req(card->host, areq, (int *) &status); - if (!areq) - return 0; - - mq_rq = container_of(areq, struct mmc_queue_req, mmc_active); - brq = &mq_rq->brq; - req = mq_rq->req; - mmc_queue_bounce_post(mq_rq); - - switch (status) { - case MMC_BLK_SUCCESS: - case MMC_BLK_PARTIAL: - /* - * A block was successfully transferred. - */ - spin_lock_irq(&md->lock); - ret = __blk_end_request(req, 0, - brq->data.bytes_xfered); - spin_unlock_irq(&md->lock); - if (status == MMC_BLK_SUCCESS && ret) { + if (brq.stop.error) { + printk(KERN_ERR "%s: error %d sending stop command, " + "response %#x, card status %#x\n", + req->rq_disk->disk_name, brq.stop.error, + brq.stop.resp[0], status); + } + + if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) { + do { + int err; + + cmd.opcode = MMC_SEND_STATUS; + cmd.arg = card->rca << 16; + cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; + err = mmc_wait_for_cmd(card->host, &cmd, 5); + if (err) { + printk(KERN_ERR "%s: error %d requesting status\n", + req->rq_disk->disk_name, err); + goto cmd_err; + } /* - * The blk_end_request has returned non zero - * even though all data is transfered and no - * erros returned by host. - * If this happen it's a bug. + * Some cards mishandle the status bits, + * so make sure to check both the busy + * indication and the card state. */ - printk(KERN_ERR "%s BUG rq_tot %d d_xfer %d\n", - __func__, blk_rq_bytes(req), - brq->data.bytes_xfered); - rqc = NULL; - goto cmd_abort; + } while (!(cmd.resp[0] & R1_READY_FOR_DATA) || + (R1_CURRENT_STATE(cmd.resp[0]) == 7)); + +#if 0 + if (cmd.resp[0] & ~0x00000900) + printk(KERN_ERR "%s: status = %08x\n", + req->rq_disk->disk_name, cmd.resp[0]); + if (mmc_decode_status(cmd.resp)) + goto cmd_err; +#endif + } + + if (brq.cmd.error || brq.stop.error || brq.data.error) { + if (rq_data_dir(req) == READ) { + /* + * After an error, we redo I/O one sector at a + * time, so we only reach here after trying to + * read a single sector. + */ + spin_lock_irq(&md->lock); + ret = __blk_end_request(req, -EIO, brq.data.blksz); + spin_unlock_irq(&md->lock); + continue; } - break; - case MMC_BLK_CMD_ERR: goto cmd_err; - case MMC_BLK_RETRY_SINGLE: - disable_multi = 1; - break; - case MMC_BLK_RETRY: - if (retry++ < 5) - break; - case MMC_BLK_ABORT: - goto cmd_abort; - case MMC_BLK_DATA_ERR: - /* - * After an error, we redo I/O one sector at a - * time, so we only reach here after trying to - * read a single sector. - */ - spin_lock_irq(&md->lock); - ret = __blk_end_request(req, -EIO, - brq->data.blksz); - spin_unlock_irq(&md->lock); - if (!ret) - goto start_new_req; - break; } - if (ret) { - /* - * In case of a none complete request - * prepare it again and resend. - */ - mmc_blk_rw_rq_prep(mq_rq, card, disable_multi, mq); - mmc_start_req(card->host, &mq_rq->mmc_active, NULL); - } + /* + * A block was successfully transferred. + */ + spin_lock_irq(&md->lock); + ret = __blk_end_request(req, 0, brq.data.bytes_xfered); + spin_unlock_irq(&md->lock); } while (ret); return 1; @@ -1156,22 +927,15 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) } } else { spin_lock_irq(&md->lock); - ret = __blk_end_request(req, 0, brq->data.bytes_xfered); + ret = __blk_end_request(req, 0, brq.data.bytes_xfered); spin_unlock_irq(&md->lock); } - cmd_abort: spin_lock_irq(&md->lock); while (ret) ret = __blk_end_request(req, -EIO, blk_rq_cur_bytes(req)); spin_unlock_irq(&md->lock); - start_new_req: - if (rqc) { - mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq); - mmc_start_req(card->host, &mq->mqrq_cur->mmc_active, NULL); - } - return 0; } @@ -1181,37 +945,26 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) struct mmc_blk_data *md = mq->data; struct mmc_card *card = md->queue.card; - if (req && !mq->mqrq_prev->req) - /* claim host only for the first request */ - mmc_claim_host(card->host); - + mmc_claim_host(card->host); ret = mmc_blk_part_switch(card, md); if (ret) { ret = 0; goto out; } - if (req && req->cmd_flags & REQ_DISCARD) { - /* complete ongoing async transfer before issuing discard */ - if (card->host->areq) - mmc_blk_issue_rw_rq(mq, NULL); + if (req->cmd_flags & REQ_DISCARD) { if (req->cmd_flags & REQ_SECURE) ret = mmc_blk_issue_secdiscard_rq(mq, req); else ret = mmc_blk_issue_discard_rq(mq, req); - } else if (req && req->cmd_flags & REQ_FLUSH) { - /* complete ongoing async transfer before issuing flush */ - if (card->host->areq) - mmc_blk_issue_rw_rq(mq, NULL); + } else if (req->cmd_flags & REQ_FLUSH) { ret = mmc_blk_issue_flush(mq, req); } else { ret = mmc_blk_issue_rw_rq(mq, req); } out: - if (!req) - /* release host only when there are no more requests */ - mmc_release_host(card->host); + mmc_release_host(card->host); return ret; } diff --git a/trunk/drivers/mmc/card/mmc_test.c b/trunk/drivers/mmc/card/mmc_test.c index 006a5e9f8ab8..233cdfae92f4 100644 --- a/trunk/drivers/mmc/card/mmc_test.c +++ b/trunk/drivers/mmc/card/mmc_test.c @@ -148,27 +148,6 @@ struct mmc_test_card { struct mmc_test_general_result *gr; }; -enum mmc_test_prep_media { - MMC_TEST_PREP_NONE = 0, - MMC_TEST_PREP_WRITE_FULL = 1 << 0, - MMC_TEST_PREP_ERASE = 1 << 1, -}; - -struct mmc_test_multiple_rw { - unsigned int *sg_len; - unsigned int *bs; - unsigned int len; - unsigned int size; - bool do_write; - bool do_nonblock_req; - enum mmc_test_prep_media prepare; -}; - -struct mmc_test_async_req { - struct mmc_async_req areq; - struct mmc_test_card *test; -}; - /*******************************************************************/ /* General helper functions */ /*******************************************************************/ @@ -388,26 +367,21 @@ static struct mmc_test_mem *mmc_test_alloc_mem(unsigned long min_sz, * Map memory into a scatterlist. Optionally allow the same memory to be * mapped more than once. */ -static int mmc_test_map_sg(struct mmc_test_mem *mem, unsigned long size, +static int mmc_test_map_sg(struct mmc_test_mem *mem, unsigned long sz, struct scatterlist *sglist, int repeat, unsigned int max_segs, unsigned int max_seg_sz, - unsigned int *sg_len, int min_sg_len) + unsigned int *sg_len) { struct scatterlist *sg = NULL; unsigned int i; - unsigned long sz = size; sg_init_table(sglist, max_segs); - if (min_sg_len > max_segs) - min_sg_len = max_segs; *sg_len = 0; do { for (i = 0; i < mem->cnt; i++) { unsigned long len = PAGE_SIZE << mem->arr[i].order; - if (min_sg_len && (size / min_sg_len < len)) - len = ALIGN(size / min_sg_len, 512); if (len > sz) len = sz; if (len > max_seg_sz) @@ -580,12 +554,11 @@ static void mmc_test_print_avg_rate(struct mmc_test_card *test, uint64_t bytes, printk(KERN_INFO "%s: Transfer of %u x %u sectors (%u x %u%s KiB) took " "%lu.%09lu seconds (%u kB/s, %u KiB/s, " - "%u.%02u IOPS, sg_len %d)\n", + "%u.%02u IOPS)\n", mmc_hostname(test->card->host), count, sectors, count, sectors >> 1, (sectors & 1 ? ".5" : ""), (unsigned long)ts.tv_sec, (unsigned long)ts.tv_nsec, - rate / 1000, rate / 1024, iops / 100, iops % 100, - test->area.sg_len); + rate / 1000, rate / 1024, iops / 100, iops % 100); mmc_test_save_transfer_result(test, count, sectors, ts, rate, iops); } @@ -688,7 +661,7 @@ static void mmc_test_prepare_broken_mrq(struct mmc_test_card *test, * Checks that a normal transfer didn't have any errors */ static int mmc_test_check_result(struct mmc_test_card *test, - struct mmc_request *mrq) + struct mmc_request *mrq) { int ret; @@ -712,17 +685,6 @@ static int mmc_test_check_result(struct mmc_test_card *test, return ret; } -static int mmc_test_check_result_async(struct mmc_card *card, - struct mmc_async_req *areq) -{ - struct mmc_test_async_req *test_async = - container_of(areq, struct mmc_test_async_req, areq); - - mmc_test_wait_busy(test_async->test); - - return mmc_test_check_result(test_async->test, areq->mrq); -} - /* * Checks that a "short transfer" behaved as expected */ @@ -757,85 +719,6 @@ static int mmc_test_check_broken_result(struct mmc_test_card *test, return ret; } -/* - * Tests nonblock transfer with certain parameters - */ -static void mmc_test_nonblock_reset(struct mmc_request *mrq, - struct mmc_command *cmd, - struct mmc_command *stop, - struct mmc_data *data) -{ - memset(mrq, 0, sizeof(struct mmc_request)); - memset(cmd, 0, sizeof(struct mmc_command)); - memset(data, 0, sizeof(struct mmc_data)); - memset(stop, 0, sizeof(struct mmc_command)); - - mrq->cmd = cmd; - mrq->data = data; - mrq->stop = stop; -} -static int mmc_test_nonblock_transfer(struct mmc_test_card *test, - struct scatterlist *sg, unsigned sg_len, - unsigned dev_addr, unsigned blocks, - unsigned blksz, int write, int count) -{ - struct mmc_request mrq1; - struct mmc_command cmd1; - struct mmc_command stop1; - struct mmc_data data1; - - struct mmc_request mrq2; - struct mmc_command cmd2; - struct mmc_command stop2; - struct mmc_data data2; - - struct mmc_test_async_req test_areq[2]; - struct mmc_async_req *done_areq; - struct mmc_async_req *cur_areq = &test_areq[0].areq; - struct mmc_async_req *other_areq = &test_areq[1].areq; - int i; - int ret; - - test_areq[0].test = test; - test_areq[1].test = test; - - mmc_test_nonblock_reset(&mrq1, &cmd1, &stop1, &data1); - mmc_test_nonblock_reset(&mrq2, &cmd2, &stop2, &data2); - - cur_areq->mrq = &mrq1; - cur_areq->err_check = mmc_test_check_result_async; - other_areq->mrq = &mrq2; - other_areq->err_check = mmc_test_check_result_async; - - for (i = 0; i < count; i++) { - mmc_test_prepare_mrq(test, cur_areq->mrq, sg, sg_len, dev_addr, - blocks, blksz, write); - done_areq = mmc_start_req(test->card->host, cur_areq, &ret); - - if (ret || (!done_areq && i > 0)) - goto err; - - if (done_areq) { - if (done_areq->mrq == &mrq2) - mmc_test_nonblock_reset(&mrq2, &cmd2, - &stop2, &data2); - else - mmc_test_nonblock_reset(&mrq1, &cmd1, - &stop1, &data1); - } - done_areq = cur_areq; - cur_areq = other_areq; - other_areq = done_areq; - dev_addr += blocks; - } - - done_areq = mmc_start_req(test->card->host, NULL, &ret); - - return ret; -err: - return ret; -} - /* * Tests a basic transfer with certain parameters */ @@ -1419,7 +1302,7 @@ static int mmc_test_no_highmem(struct mmc_test_card *test) * Map sz bytes so that it can be transferred. */ static int mmc_test_area_map(struct mmc_test_card *test, unsigned long sz, - int max_scatter, int min_sg_len) + int max_scatter) { struct mmc_test_area *t = &test->area; int err; @@ -1432,7 +1315,7 @@ static int mmc_test_area_map(struct mmc_test_card *test, unsigned long sz, &t->sg_len); } else { err = mmc_test_map_sg(t->mem, sz, t->sg, 1, t->max_segs, - t->max_seg_sz, &t->sg_len, min_sg_len); + t->max_seg_sz, &t->sg_len); } if (err) printk(KERN_INFO "%s: Failed to map sg list\n", @@ -1453,17 +1336,14 @@ static int mmc_test_area_transfer(struct mmc_test_card *test, } /* - * Map and transfer bytes for multiple transfers. + * Map and transfer bytes. */ -static int mmc_test_area_io_seq(struct mmc_test_card *test, unsigned long sz, - unsigned int dev_addr, int write, - int max_scatter, int timed, int count, - bool nonblock, int min_sg_len) +static int mmc_test_area_io(struct mmc_test_card *test, unsigned long sz, + unsigned int dev_addr, int write, int max_scatter, + int timed) { struct timespec ts1, ts2; - int ret = 0; - int i; - struct mmc_test_area *t = &test->area; + int ret; /* * In the case of a maximally scattered transfer, the maximum transfer @@ -1481,21 +1361,14 @@ static int mmc_test_area_io_seq(struct mmc_test_card *test, unsigned long sz, sz = max_tfr; } - ret = mmc_test_area_map(test, sz, max_scatter, min_sg_len); + ret = mmc_test_area_map(test, sz, max_scatter); if (ret) return ret; if (timed) getnstimeofday(&ts1); - if (nonblock) - ret = mmc_test_nonblock_transfer(test, t->sg, t->sg_len, - dev_addr, t->blocks, 512, write, count); - else - for (i = 0; i < count && ret == 0; i++) { - ret = mmc_test_area_transfer(test, dev_addr, write); - dev_addr += sz >> 9; - } + ret = mmc_test_area_transfer(test, dev_addr, write); if (ret) return ret; @@ -1503,19 +1376,11 @@ static int mmc_test_area_io_seq(struct mmc_test_card *test, unsigned long sz, getnstimeofday(&ts2); if (timed) - mmc_test_print_avg_rate(test, sz, count, &ts1, &ts2); + mmc_test_print_rate(test, sz, &ts1, &ts2); return 0; } -static int mmc_test_area_io(struct mmc_test_card *test, unsigned long sz, - unsigned int dev_addr, int write, int max_scatter, - int timed) -{ - return mmc_test_area_io_seq(test, sz, dev_addr, write, max_scatter, - timed, 1, false, 0); -} - /* * Write the test area entirely. */ @@ -2089,245 +1954,6 @@ static int mmc_test_large_seq_write_perf(struct mmc_test_card *test) return mmc_test_large_seq_perf(test, 1); } -static int mmc_test_rw_multiple(struct mmc_test_card *test, - struct mmc_test_multiple_rw *tdata, - unsigned int reqsize, unsigned int size, - int min_sg_len) -{ - unsigned int dev_addr; - struct mmc_test_area *t = &test->area; - int ret = 0; - - /* Set up test area */ - if (size > mmc_test_capacity(test->card) / 2 * 512) - size = mmc_test_capacity(test->card) / 2 * 512; - if (reqsize > t->max_tfr) - reqsize = t->max_tfr; - dev_addr = mmc_test_capacity(test->card) / 4; - if ((dev_addr & 0xffff0000)) - dev_addr &= 0xffff0000; /* Round to 64MiB boundary */ - else - dev_addr &= 0xfffff800; /* Round to 1MiB boundary */ - if (!dev_addr) - goto err; - - if (reqsize > size) - return 0; - - /* prepare test area */ - if (mmc_can_erase(test->card) && - tdata->prepare & MMC_TEST_PREP_ERASE) { - ret = mmc_erase(test->card, dev_addr, - size / 512, MMC_SECURE_ERASE_ARG); - if (ret) - ret = mmc_erase(test->card, dev_addr, - size / 512, MMC_ERASE_ARG); - if (ret) - goto err; - } - - /* Run test */ - ret = mmc_test_area_io_seq(test, reqsize, dev_addr, - tdata->do_write, 0, 1, size / reqsize, - tdata->do_nonblock_req, min_sg_len); - if (ret) - goto err; - - return ret; - err: - printk(KERN_INFO "[%s] error\n", __func__); - return ret; -} - -static int mmc_test_rw_multiple_size(struct mmc_test_card *test, - struct mmc_test_multiple_rw *rw) -{ - int ret = 0; - int i; - void *pre_req = test->card->host->ops->pre_req; - void *post_req = test->card->host->ops->post_req; - - if (rw->do_nonblock_req && - ((!pre_req && post_req) || (pre_req && !post_req))) { - printk(KERN_INFO "error: only one of pre/post is defined\n"); - return -EINVAL; - } - - for (i = 0 ; i < rw->len && ret == 0; i++) { - ret = mmc_test_rw_multiple(test, rw, rw->bs[i], rw->size, 0); - if (ret) - break; - } - return ret; -} - -static int mmc_test_rw_multiple_sg_len(struct mmc_test_card *test, - struct mmc_test_multiple_rw *rw) -{ - int ret = 0; - int i; - - for (i = 0 ; i < rw->len && ret == 0; i++) { - ret = mmc_test_rw_multiple(test, rw, 512*1024, rw->size, - rw->sg_len[i]); - if (ret) - break; - } - return ret; -} - -/* - * Multiple blocking write 4k to 4 MB chunks - */ -static int mmc_test_profile_mult_write_blocking_perf(struct mmc_test_card *test) -{ - unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, - 1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22}; - struct mmc_test_multiple_rw test_data = { - .bs = bs, - .size = TEST_AREA_MAX_SIZE, - .len = ARRAY_SIZE(bs), - .do_write = true, - .do_nonblock_req = false, - .prepare = MMC_TEST_PREP_ERASE, - }; - - return mmc_test_rw_multiple_size(test, &test_data); -}; - -/* - * Multiple non-blocking write 4k to 4 MB chunks - */ -static int mmc_test_profile_mult_write_nonblock_perf(struct mmc_test_card *test) -{ - unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, - 1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22}; - struct mmc_test_multiple_rw test_data = { - .bs = bs, - .size = TEST_AREA_MAX_SIZE, - .len = ARRAY_SIZE(bs), - .do_write = true, - .do_nonblock_req = true, - .prepare = MMC_TEST_PREP_ERASE, - }; - - return mmc_test_rw_multiple_size(test, &test_data); -} - -/* - * Multiple blocking read 4k to 4 MB chunks - */ -static int mmc_test_profile_mult_read_blocking_perf(struct mmc_test_card *test) -{ - unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, - 1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22}; - struct mmc_test_multiple_rw test_data = { - .bs = bs, - .size = TEST_AREA_MAX_SIZE, - .len = ARRAY_SIZE(bs), - .do_write = false, - .do_nonblock_req = false, - .prepare = MMC_TEST_PREP_NONE, - }; - - return mmc_test_rw_multiple_size(test, &test_data); -} - -/* - * Multiple non-blocking read 4k to 4 MB chunks - */ -static int mmc_test_profile_mult_read_nonblock_perf(struct mmc_test_card *test) -{ - unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, - 1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22}; - struct mmc_test_multiple_rw test_data = { - .bs = bs, - .size = TEST_AREA_MAX_SIZE, - .len = ARRAY_SIZE(bs), - .do_write = false, - .do_nonblock_req = true, - .prepare = MMC_TEST_PREP_NONE, - }; - - return mmc_test_rw_multiple_size(test, &test_data); -} - -/* - * Multiple blocking write 1 to 512 sg elements - */ -static int mmc_test_profile_sglen_wr_blocking_perf(struct mmc_test_card *test) -{ - unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6, - 1 << 7, 1 << 8, 1 << 9}; - struct mmc_test_multiple_rw test_data = { - .sg_len = sg_len, - .size = TEST_AREA_MAX_SIZE, - .len = ARRAY_SIZE(sg_len), - .do_write = true, - .do_nonblock_req = false, - .prepare = MMC_TEST_PREP_ERASE, - }; - - return mmc_test_rw_multiple_sg_len(test, &test_data); -}; - -/* - * Multiple non-blocking write 1 to 512 sg elements - */ -static int mmc_test_profile_sglen_wr_nonblock_perf(struct mmc_test_card *test) -{ - unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6, - 1 << 7, 1 << 8, 1 << 9}; - struct mmc_test_multiple_rw test_data = { - .sg_len = sg_len, - .size = TEST_AREA_MAX_SIZE, - .len = ARRAY_SIZE(sg_len), - .do_write = true, - .do_nonblock_req = true, - .prepare = MMC_TEST_PREP_ERASE, - }; - - return mmc_test_rw_multiple_sg_len(test, &test_data); -} - -/* - * Multiple blocking read 1 to 512 sg elements - */ -static int mmc_test_profile_sglen_r_blocking_perf(struct mmc_test_card *test) -{ - unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6, - 1 << 7, 1 << 8, 1 << 9}; - struct mmc_test_multiple_rw test_data = { - .sg_len = sg_len, - .size = TEST_AREA_MAX_SIZE, - .len = ARRAY_SIZE(sg_len), - .do_write = false, - .do_nonblock_req = false, - .prepare = MMC_TEST_PREP_NONE, - }; - - return mmc_test_rw_multiple_sg_len(test, &test_data); -} - -/* - * Multiple non-blocking read 1 to 512 sg elements - */ -static int mmc_test_profile_sglen_r_nonblock_perf(struct mmc_test_card *test) -{ - unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6, - 1 << 7, 1 << 8, 1 << 9}; - struct mmc_test_multiple_rw test_data = { - .sg_len = sg_len, - .size = TEST_AREA_MAX_SIZE, - .len = ARRAY_SIZE(sg_len), - .do_write = false, - .do_nonblock_req = true, - .prepare = MMC_TEST_PREP_NONE, - }; - - return mmc_test_rw_multiple_sg_len(test, &test_data); -} - static const struct mmc_test_case mmc_test_cases[] = { { .name = "Basic write (no data verification)", @@ -2595,61 +2221,6 @@ static const struct mmc_test_case mmc_test_cases[] = { .cleanup = mmc_test_area_cleanup, }, - { - .name = "Write performance with blocking req 4k to 4MB", - .prepare = mmc_test_area_prepare, - .run = mmc_test_profile_mult_write_blocking_perf, - .cleanup = mmc_test_area_cleanup, - }, - - { - .name = "Write performance with non-blocking req 4k to 4MB", - .prepare = mmc_test_area_prepare, - .run = mmc_test_profile_mult_write_nonblock_perf, - .cleanup = mmc_test_area_cleanup, - }, - - { - .name = "Read performance with blocking req 4k to 4MB", - .prepare = mmc_test_area_prepare, - .run = mmc_test_profile_mult_read_blocking_perf, - .cleanup = mmc_test_area_cleanup, - }, - - { - .name = "Read performance with non-blocking req 4k to 4MB", - .prepare = mmc_test_area_prepare, - .run = mmc_test_profile_mult_read_nonblock_perf, - .cleanup = mmc_test_area_cleanup, - }, - - { - .name = "Write performance blocking req 1 to 512 sg elems", - .prepare = mmc_test_area_prepare, - .run = mmc_test_profile_sglen_wr_blocking_perf, - .cleanup = mmc_test_area_cleanup, - }, - - { - .name = "Write performance non-blocking req 1 to 512 sg elems", - .prepare = mmc_test_area_prepare, - .run = mmc_test_profile_sglen_wr_nonblock_perf, - .cleanup = mmc_test_area_cleanup, - }, - - { - .name = "Read performance blocking req 1 to 512 sg elems", - .prepare = mmc_test_area_prepare, - .run = mmc_test_profile_sglen_r_blocking_perf, - .cleanup = mmc_test_area_cleanup, - }, - - { - .name = "Read performance non-blocking req 1 to 512 sg elems", - .prepare = mmc_test_area_prepare, - .run = mmc_test_profile_sglen_r_nonblock_perf, - .cleanup = mmc_test_area_cleanup, - }, }; static DEFINE_MUTEX(mmc_test_lock); @@ -2874,32 +2445,6 @@ static const struct file_operations mmc_test_fops_test = { .release = single_release, }; -static int mtf_testlist_show(struct seq_file *sf, void *data) -{ - int i; - - mutex_lock(&mmc_test_lock); - - for (i = 0; i < ARRAY_SIZE(mmc_test_cases); i++) - seq_printf(sf, "%d:\t%s\n", i+1, mmc_test_cases[i].name); - - mutex_unlock(&mmc_test_lock); - - return 0; -} - -static int mtf_testlist_open(struct inode *inode, struct file *file) -{ - return single_open(file, mtf_testlist_show, inode->i_private); -} - -static const struct file_operations mmc_test_fops_testlist = { - .open = mtf_testlist_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void mmc_test_free_file_test(struct mmc_card *card) { struct mmc_test_dbgfs_file *df, *dfs; @@ -2931,18 +2476,7 @@ static int mmc_test_register_file_test(struct mmc_card *card) if (IS_ERR_OR_NULL(file)) { dev_err(&card->dev, - "Can't create test. Perhaps debugfs is disabled.\n"); - ret = -ENODEV; - goto err; - } - - if (card->debugfs_root) - file = debugfs_create_file("testlist", S_IRUGO, - card->debugfs_root, card, &mmc_test_fops_testlist); - - if (IS_ERR_OR_NULL(file)) { - dev_err(&card->dev, - "Can't create testlist. Perhaps debugfs is disabled.\n"); + "Can't create file. Perhaps debugfs is disabled.\n"); ret = -ENODEV; goto err; } diff --git a/trunk/drivers/mmc/card/queue.c b/trunk/drivers/mmc/card/queue.c index 45fb362e3f01..6413afa318d2 100644 --- a/trunk/drivers/mmc/card/queue.c +++ b/trunk/drivers/mmc/card/queue.c @@ -52,18 +52,14 @@ static int mmc_queue_thread(void *d) down(&mq->thread_sem); do { struct request *req = NULL; - struct mmc_queue_req *tmp; spin_lock_irq(q->queue_lock); set_current_state(TASK_INTERRUPTIBLE); req = blk_fetch_request(q); - mq->mqrq_cur->req = req; + mq->req = req; spin_unlock_irq(q->queue_lock); - if (req || mq->mqrq_prev->req) { - set_current_state(TASK_RUNNING); - mq->issue_fn(mq, req); - } else { + if (!req) { if (kthread_should_stop()) { set_current_state(TASK_RUNNING); break; @@ -71,14 +67,11 @@ static int mmc_queue_thread(void *d) up(&mq->thread_sem); schedule(); down(&mq->thread_sem); + continue; } + set_current_state(TASK_RUNNING); - /* Current request becomes previous request and vice versa. */ - mq->mqrq_prev->brq.mrq.data = NULL; - mq->mqrq_prev->req = NULL; - tmp = mq->mqrq_prev; - mq->mqrq_prev = mq->mqrq_cur; - mq->mqrq_cur = tmp; + mq->issue_fn(mq, req); } while (1); up(&mq->thread_sem); @@ -104,46 +97,10 @@ static void mmc_request(struct request_queue *q) return; } - if (!mq->mqrq_cur->req && !mq->mqrq_prev->req) + if (!mq->req) wake_up_process(mq->thread); } -struct scatterlist *mmc_alloc_sg(int sg_len, int *err) -{ - struct scatterlist *sg; - - sg = kmalloc(sizeof(struct scatterlist)*sg_len, GFP_KERNEL); - if (!sg) - *err = -ENOMEM; - else { - *err = 0; - sg_init_table(sg, sg_len); - } - - return sg; -} - -static void mmc_queue_setup_discard(struct request_queue *q, - struct mmc_card *card) -{ - unsigned max_discard; - - max_discard = mmc_calc_max_discard(card); - if (!max_discard) - return; - - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); - q->limits.max_discard_sectors = max_discard; - if (card->erased_byte == 0) - q->limits.discard_zeroes_data = 1; - q->limits.discard_granularity = card->pref_erase << 9; - /* granularity must not be greater than max. discard */ - if (card->pref_erase > max_discard) - q->limits.discard_granularity = 0; - if (mmc_can_secure_erase_trim(card)) - queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q); -} - /** * mmc_init_queue - initialise a queue structure. * @mq: mmc queue @@ -159,8 +116,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, struct mmc_host *host = card->host; u64 limit = BLK_BOUNCE_HIGH; int ret; - struct mmc_queue_req *mqrq_cur = &mq->mqrq[0]; - struct mmc_queue_req *mqrq_prev = &mq->mqrq[1]; if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask) limit = *mmc_dev(host)->dma_mask; @@ -170,16 +125,21 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, if (!mq->queue) return -ENOMEM; - memset(&mq->mqrq_cur, 0, sizeof(mq->mqrq_cur)); - memset(&mq->mqrq_prev, 0, sizeof(mq->mqrq_prev)); - mq->mqrq_cur = mqrq_cur; - mq->mqrq_prev = mqrq_prev; mq->queue->queuedata = mq; + mq->req = NULL; blk_queue_prep_rq(mq->queue, mmc_prep_request); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue); - if (mmc_can_erase(card)) - mmc_queue_setup_discard(mq->queue, card); + if (mmc_can_erase(card)) { + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue); + mq->queue->limits.max_discard_sectors = UINT_MAX; + if (card->erased_byte == 0) + mq->queue->limits.discard_zeroes_data = 1; + mq->queue->limits.discard_granularity = card->pref_erase << 9; + if (mmc_can_secure_erase_trim(card)) + queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, + mq->queue); + } #ifdef CONFIG_MMC_BLOCK_BOUNCE if (host->max_segs == 1) { @@ -195,64 +155,53 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, bouncesz = host->max_blk_count * 512; if (bouncesz > 512) { - mqrq_cur->bounce_buf = kmalloc(bouncesz, GFP_KERNEL); - if (!mqrq_cur->bounce_buf) { - printk(KERN_WARNING "%s: unable to " - "allocate bounce cur buffer\n", - mmc_card_name(card)); - } - mqrq_prev->bounce_buf = kmalloc(bouncesz, GFP_KERNEL); - if (!mqrq_prev->bounce_buf) { + mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL); + if (!mq->bounce_buf) { printk(KERN_WARNING "%s: unable to " - "allocate bounce prev buffer\n", + "allocate bounce buffer\n", mmc_card_name(card)); - kfree(mqrq_cur->bounce_buf); - mqrq_cur->bounce_buf = NULL; } } - if (mqrq_cur->bounce_buf && mqrq_prev->bounce_buf) { + if (mq->bounce_buf) { blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY); blk_queue_max_hw_sectors(mq->queue, bouncesz / 512); blk_queue_max_segments(mq->queue, bouncesz / 512); blk_queue_max_segment_size(mq->queue, bouncesz); - mqrq_cur->sg = mmc_alloc_sg(1, &ret); - if (ret) - goto cleanup_queue; - - mqrq_cur->bounce_sg = - mmc_alloc_sg(bouncesz / 512, &ret); - if (ret) - goto cleanup_queue; - - mqrq_prev->sg = mmc_alloc_sg(1, &ret); - if (ret) + mq->sg = kmalloc(sizeof(struct scatterlist), + GFP_KERNEL); + if (!mq->sg) { + ret = -ENOMEM; goto cleanup_queue; + } + sg_init_table(mq->sg, 1); - mqrq_prev->bounce_sg = - mmc_alloc_sg(bouncesz / 512, &ret); - if (ret) + mq->bounce_sg = kmalloc(sizeof(struct scatterlist) * + bouncesz / 512, GFP_KERNEL); + if (!mq->bounce_sg) { + ret = -ENOMEM; goto cleanup_queue; + } + sg_init_table(mq->bounce_sg, bouncesz / 512); } } #endif - if (!mqrq_cur->bounce_buf && !mqrq_prev->bounce_buf) { + if (!mq->bounce_buf) { blk_queue_bounce_limit(mq->queue, limit); blk_queue_max_hw_sectors(mq->queue, min(host->max_blk_count, host->max_req_size / 512)); blk_queue_max_segments(mq->queue, host->max_segs); blk_queue_max_segment_size(mq->queue, host->max_seg_size); - mqrq_cur->sg = mmc_alloc_sg(host->max_segs, &ret); - if (ret) - goto cleanup_queue; - - - mqrq_prev->sg = mmc_alloc_sg(host->max_segs, &ret); - if (ret) + mq->sg = kmalloc(sizeof(struct scatterlist) * + host->max_segs, GFP_KERNEL); + if (!mq->sg) { + ret = -ENOMEM; goto cleanup_queue; + } + sg_init_table(mq->sg, host->max_segs); } sema_init(&mq->thread_sem, 1); @@ -267,22 +216,16 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, return 0; free_bounce_sg: - kfree(mqrq_cur->bounce_sg); - mqrq_cur->bounce_sg = NULL; - kfree(mqrq_prev->bounce_sg); - mqrq_prev->bounce_sg = NULL; - + if (mq->bounce_sg) + kfree(mq->bounce_sg); + mq->bounce_sg = NULL; cleanup_queue: - kfree(mqrq_cur->sg); - mqrq_cur->sg = NULL; - kfree(mqrq_cur->bounce_buf); - mqrq_cur->bounce_buf = NULL; - - kfree(mqrq_prev->sg); - mqrq_prev->sg = NULL; - kfree(mqrq_prev->bounce_buf); - mqrq_prev->bounce_buf = NULL; - + if (mq->sg) + kfree(mq->sg); + mq->sg = NULL; + if (mq->bounce_buf) + kfree(mq->bounce_buf); + mq->bounce_buf = NULL; blk_cleanup_queue(mq->queue); return ret; } @@ -291,8 +234,6 @@ void mmc_cleanup_queue(struct mmc_queue *mq) { struct request_queue *q = mq->queue; unsigned long flags; - struct mmc_queue_req *mqrq_cur = mq->mqrq_cur; - struct mmc_queue_req *mqrq_prev = mq->mqrq_prev; /* Make sure the queue isn't suspended, as that will deadlock */ mmc_queue_resume(mq); @@ -306,23 +247,16 @@ void mmc_cleanup_queue(struct mmc_queue *mq) blk_start_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); - kfree(mqrq_cur->bounce_sg); - mqrq_cur->bounce_sg = NULL; + if (mq->bounce_sg) + kfree(mq->bounce_sg); + mq->bounce_sg = NULL; - kfree(mqrq_cur->sg); - mqrq_cur->sg = NULL; + kfree(mq->sg); + mq->sg = NULL; - kfree(mqrq_cur->bounce_buf); - mqrq_cur->bounce_buf = NULL; - - kfree(mqrq_prev->bounce_sg); - mqrq_prev->bounce_sg = NULL; - - kfree(mqrq_prev->sg); - mqrq_prev->sg = NULL; - - kfree(mqrq_prev->bounce_buf); - mqrq_prev->bounce_buf = NULL; + if (mq->bounce_buf) + kfree(mq->bounce_buf); + mq->bounce_buf = NULL; mq->card = NULL; } @@ -375,27 +309,27 @@ void mmc_queue_resume(struct mmc_queue *mq) /* * Prepare the sg list(s) to be handed of to the host driver */ -unsigned int mmc_queue_map_sg(struct mmc_queue *mq, struct mmc_queue_req *mqrq) +unsigned int mmc_queue_map_sg(struct mmc_queue *mq) { unsigned int sg_len; size_t buflen; struct scatterlist *sg; int i; - if (!mqrq->bounce_buf) - return blk_rq_map_sg(mq->queue, mqrq->req, mqrq->sg); + if (!mq->bounce_buf) + return blk_rq_map_sg(mq->queue, mq->req, mq->sg); - BUG_ON(!mqrq->bounce_sg); + BUG_ON(!mq->bounce_sg); - sg_len = blk_rq_map_sg(mq->queue, mqrq->req, mqrq->bounce_sg); + sg_len = blk_rq_map_sg(mq->queue, mq->req, mq->bounce_sg); - mqrq->bounce_sg_len = sg_len; + mq->bounce_sg_len = sg_len; buflen = 0; - for_each_sg(mqrq->bounce_sg, sg, sg_len, i) + for_each_sg(mq->bounce_sg, sg, sg_len, i) buflen += sg->length; - sg_init_one(mqrq->sg, mqrq->bounce_buf, buflen); + sg_init_one(mq->sg, mq->bounce_buf, buflen); return 1; } @@ -404,30 +338,31 @@ unsigned int mmc_queue_map_sg(struct mmc_queue *mq, struct mmc_queue_req *mqrq) * If writing, bounce the data to the buffer before the request * is sent to the host driver */ -void mmc_queue_bounce_pre(struct mmc_queue_req *mqrq) +void mmc_queue_bounce_pre(struct mmc_queue *mq) { - if (!mqrq->bounce_buf) + if (!mq->bounce_buf) return; - if (rq_data_dir(mqrq->req) != WRITE) + if (rq_data_dir(mq->req) != WRITE) return; - sg_copy_to_buffer(mqrq->bounce_sg, mqrq->bounce_sg_len, - mqrq->bounce_buf, mqrq->sg[0].length); + sg_copy_to_buffer(mq->bounce_sg, mq->bounce_sg_len, + mq->bounce_buf, mq->sg[0].length); } /* * If reading, bounce the data from the buffer after the request * has been handled by the host driver */ -void mmc_queue_bounce_post(struct mmc_queue_req *mqrq) +void mmc_queue_bounce_post(struct mmc_queue *mq) { - if (!mqrq->bounce_buf) + if (!mq->bounce_buf) return; - if (rq_data_dir(mqrq->req) != READ) + if (rq_data_dir(mq->req) != READ) return; - sg_copy_from_buffer(mqrq->bounce_sg, mqrq->bounce_sg_len, - mqrq->bounce_buf, mqrq->sg[0].length); + sg_copy_from_buffer(mq->bounce_sg, mq->bounce_sg_len, + mq->bounce_buf, mq->sg[0].length); } + diff --git a/trunk/drivers/mmc/card/queue.h b/trunk/drivers/mmc/card/queue.h index d2a1eb4b9f9f..6223ef8dc9cd 100644 --- a/trunk/drivers/mmc/card/queue.h +++ b/trunk/drivers/mmc/card/queue.h @@ -4,35 +4,19 @@ struct request; struct task_struct; -struct mmc_blk_request { - struct mmc_request mrq; - struct mmc_command sbc; - struct mmc_command cmd; - struct mmc_command stop; - struct mmc_data data; -}; - -struct mmc_queue_req { - struct request *req; - struct mmc_blk_request brq; - struct scatterlist *sg; - char *bounce_buf; - struct scatterlist *bounce_sg; - unsigned int bounce_sg_len; - struct mmc_async_req mmc_active; -}; - struct mmc_queue { struct mmc_card *card; struct task_struct *thread; struct semaphore thread_sem; unsigned int flags; + struct request *req; int (*issue_fn)(struct mmc_queue *, struct request *); void *data; struct request_queue *queue; - struct mmc_queue_req mqrq[2]; - struct mmc_queue_req *mqrq_cur; - struct mmc_queue_req *mqrq_prev; + struct scatterlist *sg; + char *bounce_buf; + struct scatterlist *bounce_sg; + unsigned int bounce_sg_len; }; extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *, @@ -41,9 +25,8 @@ extern void mmc_cleanup_queue(struct mmc_queue *); extern void mmc_queue_suspend(struct mmc_queue *); extern void mmc_queue_resume(struct mmc_queue *); -extern unsigned int mmc_queue_map_sg(struct mmc_queue *, - struct mmc_queue_req *); -extern void mmc_queue_bounce_pre(struct mmc_queue_req *); -extern void mmc_queue_bounce_post(struct mmc_queue_req *); +extern unsigned int mmc_queue_map_sg(struct mmc_queue *); +extern void mmc_queue_bounce_pre(struct mmc_queue *); +extern void mmc_queue_bounce_post(struct mmc_queue *); #endif diff --git a/trunk/drivers/mmc/core/core.c b/trunk/drivers/mmc/core/core.c index f091b43d00c4..7843efe22359 100644 --- a/trunk/drivers/mmc/core/core.c +++ b/trunk/drivers/mmc/core/core.c @@ -198,109 +198,9 @@ mmc_start_request(struct mmc_host *host, struct mmc_request *mrq) static void mmc_wait_done(struct mmc_request *mrq) { - complete(&mrq->completion); + complete(mrq->done_data); } -static void __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq) -{ - init_completion(&mrq->completion); - mrq->done = mmc_wait_done; - mmc_start_request(host, mrq); -} - -static void mmc_wait_for_req_done(struct mmc_host *host, - struct mmc_request *mrq) -{ - wait_for_completion(&mrq->completion); -} - -/** - * mmc_pre_req - Prepare for a new request - * @host: MMC host to prepare command - * @mrq: MMC request to prepare for - * @is_first_req: true if there is no previous started request - * that may run in parellel to this call, otherwise false - * - * mmc_pre_req() is called in prior to mmc_start_req() to let - * host prepare for the new request. Preparation of a request may be - * performed while another request is running on the host. - */ -static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq, - bool is_first_req) -{ - if (host->ops->pre_req) - host->ops->pre_req(host, mrq, is_first_req); -} - -/** - * mmc_post_req - Post process a completed request - * @host: MMC host to post process command - * @mrq: MMC request to post process for - * @err: Error, if non zero, clean up any resources made in pre_req - * - * Let the host post process a completed request. Post processing of - * a request may be performed while another reuqest is running. - */ -static void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq, - int err) -{ - if (host->ops->post_req) - host->ops->post_req(host, mrq, err); -} - -/** - * mmc_start_req - start a non-blocking request - * @host: MMC host to start command - * @areq: async request to start - * @error: out parameter returns 0 for success, otherwise non zero - * - * Start a new MMC custom command request for a host. - * If there is on ongoing async request wait for completion - * of that request and start the new one and return. - * Does not wait for the new request to complete. - * - * Returns the completed request, NULL in case of none completed. - * Wait for the an ongoing request (previoulsy started) to complete and - * return the completed request. If there is no ongoing request, NULL - * is returned without waiting. NULL is not an error condition. - */ -struct mmc_async_req *mmc_start_req(struct mmc_host *host, - struct mmc_async_req *areq, int *error) -{ - int err = 0; - struct mmc_async_req *data = host->areq; - - /* Prepare a new request */ - if (areq) - mmc_pre_req(host, areq->mrq, !host->areq); - - if (host->areq) { - mmc_wait_for_req_done(host, host->areq->mrq); - err = host->areq->err_check(host->card, host->areq); - if (err) { - mmc_post_req(host, host->areq->mrq, 0); - if (areq) - mmc_post_req(host, areq->mrq, -EINVAL); - - host->areq = NULL; - goto out; - } - } - - if (areq) - __mmc_start_req(host, areq->mrq); - - if (host->areq) - mmc_post_req(host, host->areq->mrq, 0); - - host->areq = areq; - out: - if (error) - *error = err; - return data; -} -EXPORT_SYMBOL(mmc_start_req); - /** * mmc_wait_for_req - start a request and wait for completion * @host: MMC host to start command @@ -312,9 +212,16 @@ EXPORT_SYMBOL(mmc_start_req); */ void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq) { - __mmc_start_req(host, mrq); - mmc_wait_for_req_done(host, mrq); + DECLARE_COMPLETION_ONSTACK(complete); + + mrq->done_data = &complete; + mrq->done = mmc_wait_done; + + mmc_start_request(host, mrq); + + wait_for_completion(&complete); } + EXPORT_SYMBOL(mmc_wait_for_req); /** @@ -1609,82 +1516,6 @@ int mmc_erase_group_aligned(struct mmc_card *card, unsigned int from, } EXPORT_SYMBOL(mmc_erase_group_aligned); -static unsigned int mmc_do_calc_max_discard(struct mmc_card *card, - unsigned int arg) -{ - struct mmc_host *host = card->host; - unsigned int max_discard, x, y, qty = 0, max_qty, timeout; - unsigned int last_timeout = 0; - - if (card->erase_shift) - max_qty = UINT_MAX >> card->erase_shift; - else if (mmc_card_sd(card)) - max_qty = UINT_MAX; - else - max_qty = UINT_MAX / card->erase_size; - - /* Find the largest qty with an OK timeout */ - do { - y = 0; - for (x = 1; x && x <= max_qty && max_qty - x >= qty; x <<= 1) { - timeout = mmc_erase_timeout(card, arg, qty + x); - if (timeout > host->max_discard_to) - break; - if (timeout < last_timeout) - break; - last_timeout = timeout; - y = x; - } - qty += y; - } while (y); - - if (!qty) - return 0; - - if (qty == 1) - return 1; - - /* Convert qty to sectors */ - if (card->erase_shift) - max_discard = --qty << card->erase_shift; - else if (mmc_card_sd(card)) - max_discard = qty; - else - max_discard = --qty * card->erase_size; - - return max_discard; -} - -unsigned int mmc_calc_max_discard(struct mmc_card *card) -{ - struct mmc_host *host = card->host; - unsigned int max_discard, max_trim; - - if (!host->max_discard_to) - return UINT_MAX; - - /* - * Without erase_group_def set, MMC erase timeout depends on clock - * frequence which can change. In that case, the best choice is - * just the preferred erase size. - */ - if (mmc_card_mmc(card) && !(card->ext_csd.erase_group_def & 1)) - return card->pref_erase; - - max_discard = mmc_do_calc_max_discard(card, MMC_ERASE_ARG); - if (mmc_can_trim(card)) { - max_trim = mmc_do_calc_max_discard(card, MMC_TRIM_ARG); - if (max_trim < max_discard) - max_discard = max_trim; - } else if (max_discard < card->erase_size) { - max_discard = 0; - } - pr_debug("%s: calculated max. discard sectors %u for timeout %u ms\n", - mmc_hostname(host), max_discard, host->max_discard_to); - return max_discard; -} -EXPORT_SYMBOL(mmc_calc_max_discard); - int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen) { struct mmc_command cmd = {0}; @@ -1832,10 +1663,6 @@ int mmc_power_save_host(struct mmc_host *host) { int ret = 0; -#ifdef CONFIG_MMC_DEBUG - pr_info("%s: %s: powering down\n", mmc_hostname(host), __func__); -#endif - mmc_bus_get(host); if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) { @@ -1858,10 +1685,6 @@ int mmc_power_restore_host(struct mmc_host *host) { int ret; -#ifdef CONFIG_MMC_DEBUG - pr_info("%s: %s: powering up\n", mmc_hostname(host), __func__); -#endif - mmc_bus_get(host); if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) { diff --git a/trunk/drivers/mmc/core/sd.c b/trunk/drivers/mmc/core/sd.c index 633975ff2bb3..ff2774128aa9 100644 --- a/trunk/drivers/mmc/core/sd.c +++ b/trunk/drivers/mmc/core/sd.c @@ -409,62 +409,52 @@ int mmc_sd_switch_hs(struct mmc_card *card) static int sd_select_driver_type(struct mmc_card *card, u8 *status) { - int host_drv_type = SD_DRIVER_TYPE_B; - int card_drv_type = SD_DRIVER_TYPE_B; - int drive_strength; + int host_drv_type = 0, card_drv_type = 0; int err; /* * If the host doesn't support any of the Driver Types A,C or D, - * or there is no board specific handler then default Driver - * Type B is used. + * default Driver Type B is used. */ if (!(card->host->caps & (MMC_CAP_DRIVER_TYPE_A | MMC_CAP_DRIVER_TYPE_C | MMC_CAP_DRIVER_TYPE_D))) return 0; - if (!card->host->ops->select_drive_strength) - return 0; - - if (card->host->caps & MMC_CAP_DRIVER_TYPE_A) - host_drv_type |= SD_DRIVER_TYPE_A; - - if (card->host->caps & MMC_CAP_DRIVER_TYPE_C) - host_drv_type |= SD_DRIVER_TYPE_C; - - if (card->host->caps & MMC_CAP_DRIVER_TYPE_D) - host_drv_type |= SD_DRIVER_TYPE_D; - - if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A) - card_drv_type |= SD_DRIVER_TYPE_A; - - if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C) - card_drv_type |= SD_DRIVER_TYPE_C; - - if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_D) - card_drv_type |= SD_DRIVER_TYPE_D; - - /* - * The drive strength that the hardware can support - * depends on the board design. Pass the appropriate - * information and let the hardware specific code - * return what is possible given the options - */ - drive_strength = card->host->ops->select_drive_strength( - card->sw_caps.uhs_max_dtr, - host_drv_type, card_drv_type); + if (card->host->caps & MMC_CAP_DRIVER_TYPE_A) { + host_drv_type = MMC_SET_DRIVER_TYPE_A; + if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A) + card_drv_type = MMC_SET_DRIVER_TYPE_A; + else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B) + card_drv_type = MMC_SET_DRIVER_TYPE_B; + else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C) + card_drv_type = MMC_SET_DRIVER_TYPE_C; + } else if (card->host->caps & MMC_CAP_DRIVER_TYPE_C) { + host_drv_type = MMC_SET_DRIVER_TYPE_C; + if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C) + card_drv_type = MMC_SET_DRIVER_TYPE_C; + } else if (!(card->host->caps & MMC_CAP_DRIVER_TYPE_D)) { + /* + * If we are here, that means only the default driver type + * B is supported by the host. + */ + host_drv_type = MMC_SET_DRIVER_TYPE_B; + if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B) + card_drv_type = MMC_SET_DRIVER_TYPE_B; + else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C) + card_drv_type = MMC_SET_DRIVER_TYPE_C; + } - err = mmc_sd_switch(card, 1, 2, drive_strength, status); + err = mmc_sd_switch(card, 1, 2, card_drv_type, status); if (err) return err; - if ((status[15] & 0xF) != drive_strength) { - printk(KERN_WARNING "%s: Problem setting drive strength!\n", + if ((status[15] & 0xF) != card_drv_type) { + printk(KERN_WARNING "%s: Problem setting driver strength!\n", mmc_hostname(card->host)); return 0; } - mmc_set_driver_type(card->host, drive_strength); + mmc_set_driver_type(card->host, host_drv_type); return 0; } diff --git a/trunk/drivers/mmc/core/sdio_bus.c b/trunk/drivers/mmc/core/sdio_bus.c index e4e6822d09e3..d2565df8a7fb 100644 --- a/trunk/drivers/mmc/core/sdio_bus.c +++ b/trunk/drivers/mmc/core/sdio_bus.c @@ -167,8 +167,11 @@ static int sdio_bus_remove(struct device *dev) int ret = 0; /* Make sure card is powered before invoking ->remove() */ - if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) - pm_runtime_get_sync(dev); + if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) { + ret = pm_runtime_get_sync(dev); + if (ret < 0) + goto out; + } drv->remove(func); @@ -188,6 +191,7 @@ static int sdio_bus_remove(struct device *dev) if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) pm_runtime_put_sync(dev); +out: return ret; } diff --git a/trunk/drivers/mmc/host/Kconfig b/trunk/drivers/mmc/host/Kconfig index 8c87096531e9..56dbf3f6ad08 100644 --- a/trunk/drivers/mmc/host/Kconfig +++ b/trunk/drivers/mmc/host/Kconfig @@ -81,32 +81,28 @@ config MMC_RICOH_MMC If unsure, say Y. -config MMC_SDHCI_PLTFM - tristate "SDHCI platform and OF driver helper" - depends on MMC_SDHCI +config MMC_SDHCI_OF + tristate "SDHCI support on OpenFirmware platforms" + depends on MMC_SDHCI && OF help - This selects the common helper functions support for Secure Digital - Host Controller Interface based platform and OF drivers. - - If you have a controller with this interface, say Y or M here. + This selects the OF support for Secure Digital Host Controller + Interfaces. If unsure, say N. config MMC_SDHCI_OF_ESDHC - tristate "SDHCI OF support for the Freescale eSDHC controller" - depends on MMC_SDHCI_PLTFM + bool "SDHCI OF support for the Freescale eSDHC controller" + depends on MMC_SDHCI_OF depends on PPC_OF select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER help This selects the Freescale eSDHC controller support. - If you have a controller with this interface, say Y or M here. - If unsure, say N. config MMC_SDHCI_OF_HLWD - tristate "SDHCI OF support for the Nintendo Wii SDHCI controllers" - depends on MMC_SDHCI_PLTFM + bool "SDHCI OF support for the Nintendo Wii SDHCI controllers" + depends on MMC_SDHCI_OF depends on PPC_OF select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER help @@ -114,36 +110,40 @@ config MMC_SDHCI_OF_HLWD found in the "Hollywood" chipset of the Nintendo Wii video game console. + If unsure, say N. + +config MMC_SDHCI_PLTFM + tristate "SDHCI support on the platform specific bus" + depends on MMC_SDHCI + help + This selects the platform specific bus support for Secure Digital Host + Controller Interface. + If you have a controller with this interface, say Y or M here. If unsure, say N. config MMC_SDHCI_CNS3XXX - tristate "SDHCI support on the Cavium Networks CNS3xxx SoC" + bool "SDHCI support on the Cavium Networks CNS3xxx SoC" depends on ARCH_CNS3XXX depends on MMC_SDHCI_PLTFM help This selects the SDHCI support for CNS3xxx System-on-Chip devices. - If you have a controller with this interface, say Y or M here. - If unsure, say N. config MMC_SDHCI_ESDHC_IMX - tristate "SDHCI platform support for the Freescale eSDHC i.MX controller" - depends on ARCH_MX25 || ARCH_MX35 || ARCH_MX5 - depends on MMC_SDHCI_PLTFM + bool "SDHCI platform support for the Freescale eSDHC i.MX controller" + depends on MMC_SDHCI_PLTFM && (ARCH_MX25 || ARCH_MX35 || ARCH_MX5) select MMC_SDHCI_IO_ACCESSORS help This selects the Freescale eSDHC controller support on the platform bus, found on platforms like mx35/51. - If you have a controller with this interface, say Y or M here. - If unsure, say N. config MMC_SDHCI_DOVE - tristate "SDHCI support on Marvell's Dove SoC" + bool "SDHCI support on Marvell's Dove SoC" depends on ARCH_DOVE depends on MMC_SDHCI_PLTFM select MMC_SDHCI_IO_ACCESSORS @@ -151,14 +151,11 @@ config MMC_SDHCI_DOVE This selects the Secure Digital Host Controller Interface in Marvell's Dove SoC. - If you have a controller with this interface, say Y or M here. - If unsure, say N. config MMC_SDHCI_TEGRA - tristate "SDHCI platform support for the Tegra SD/MMC Controller" - depends on ARCH_TEGRA - depends on MMC_SDHCI_PLTFM + bool "SDHCI platform support for the Tegra SD/MMC Controller" + depends on MMC_SDHCI_PLTFM && ARCH_TEGRA select MMC_SDHCI_IO_ACCESSORS help This selects the Tegra SD/MMC controller. If you have a Tegra @@ -181,28 +178,14 @@ config MMC_SDHCI_S3C If unsure, say N. -config MMC_SDHCI_PXAV3 - tristate "Marvell MMP2 SD Host Controller support (PXAV3)" - depends on CLKDEV_LOOKUP +config MMC_SDHCI_PXA + tristate "Marvell PXA168/PXA910/MMP2 SD Host Controller support" + depends on ARCH_PXA || ARCH_MMP select MMC_SDHCI - select MMC_SDHCI_PLTFM - default CPU_MMP2 - help - This selects the Marvell(R) PXAV3 SD Host Controller. - If you have a MMP2 platform with SD Host Controller - and a card slot, say Y or M here. - - If unsure, say N. - -config MMC_SDHCI_PXAV2 - tristate "Marvell PXA9XX SD Host Controller support (PXAV2)" - depends on CLKDEV_LOOKUP - select MMC_SDHCI - select MMC_SDHCI_PLTFM - default CPU_PXA910 + select MMC_SDHCI_IO_ACCESSORS help - This selects the Marvell(R) PXAV2 SD Host Controller. - If you have a PXA9XX platform with SD Host Controller + This selects the Marvell(R) PXA168/PXA910/MMP2 SD Host Controller. + If you have a PXA168/PXA910/MMP2 platform with SD Host Controller and a card slot, say Y or M here. If unsure, say N. @@ -298,12 +281,13 @@ config MMC_ATMELMCI endchoice config MMC_ATMELMCI_DMA - bool "Atmel MCI DMA support" - depends on MMC_ATMELMCI && (AVR32 || ARCH_AT91SAM9G45) && DMA_ENGINE + bool "Atmel MCI DMA support (EXPERIMENTAL)" + depends on MMC_ATMELMCI && (AVR32 || ARCH_AT91SAM9G45) && DMA_ENGINE && EXPERIMENTAL help Say Y here to have the Atmel MCI driver use a DMA engine to do data transfers and thus increase the throughput and - reduce the CPU utilization. + reduce the CPU utilization. Note that this is highly + experimental and may cause the driver to lock up. If unsure, say N. diff --git a/trunk/drivers/mmc/host/Makefile b/trunk/drivers/mmc/host/Makefile index b4b83f302e32..58a5cf73d6e9 100644 --- a/trunk/drivers/mmc/host/Makefile +++ b/trunk/drivers/mmc/host/Makefile @@ -9,8 +9,7 @@ obj-$(CONFIG_MMC_MXC) += mxcmmc.o obj-$(CONFIG_MMC_MXS) += mxs-mmc.o obj-$(CONFIG_MMC_SDHCI) += sdhci.o obj-$(CONFIG_MMC_SDHCI_PCI) += sdhci-pci.o -obj-$(CONFIG_MMC_SDHCI_PXAV3) += sdhci-pxav3.o -obj-$(CONFIG_MMC_SDHCI_PXAV2) += sdhci-pxav2.o +obj-$(CONFIG_MMC_SDHCI_PXA) += sdhci-pxa.o obj-$(CONFIG_MMC_SDHCI_S3C) += sdhci-s3c.o obj-$(CONFIG_MMC_SDHCI_SPEAR) += sdhci-spear.o obj-$(CONFIG_MMC_WBSD) += wbsd.o @@ -32,7 +31,9 @@ obj-$(CONFIG_MMC_SDRICOH_CS) += sdricoh_cs.o obj-$(CONFIG_MMC_TMIO) += tmio_mmc.o obj-$(CONFIG_MMC_TMIO_CORE) += tmio_mmc_core.o tmio_mmc_core-y := tmio_mmc_pio.o -tmio_mmc_core-$(subst m,y,$(CONFIG_MMC_SDHI)) += tmio_mmc_dma.o +ifneq ($(CONFIG_MMC_SDHI),n) +tmio_mmc_core-y += tmio_mmc_dma.o +endif obj-$(CONFIG_MMC_SDHI) += sh_mobile_sdhi.o obj-$(CONFIG_MMC_CB710) += cb710-mmc.o obj-$(CONFIG_MMC_VIA_SDMMC) += via-sdmmc.o @@ -43,13 +44,17 @@ obj-$(CONFIG_MMC_JZ4740) += jz4740_mmc.o obj-$(CONFIG_MMC_VUB300) += vub300.o obj-$(CONFIG_MMC_USHC) += ushc.o -obj-$(CONFIG_MMC_SDHCI_PLTFM) += sdhci-pltfm.o -obj-$(CONFIG_MMC_SDHCI_CNS3XXX) += sdhci-cns3xxx.o -obj-$(CONFIG_MMC_SDHCI_ESDHC_IMX) += sdhci-esdhc-imx.o -obj-$(CONFIG_MMC_SDHCI_DOVE) += sdhci-dove.o -obj-$(CONFIG_MMC_SDHCI_TEGRA) += sdhci-tegra.o -obj-$(CONFIG_MMC_SDHCI_OF_ESDHC) += sdhci-of-esdhc.o -obj-$(CONFIG_MMC_SDHCI_OF_HLWD) += sdhci-of-hlwd.o +obj-$(CONFIG_MMC_SDHCI_PLTFM) += sdhci-platform.o +sdhci-platform-y := sdhci-pltfm.o +sdhci-platform-$(CONFIG_MMC_SDHCI_CNS3XXX) += sdhci-cns3xxx.o +sdhci-platform-$(CONFIG_MMC_SDHCI_ESDHC_IMX) += sdhci-esdhc-imx.o +sdhci-platform-$(CONFIG_MMC_SDHCI_DOVE) += sdhci-dove.o +sdhci-platform-$(CONFIG_MMC_SDHCI_TEGRA) += sdhci-tegra.o + +obj-$(CONFIG_MMC_SDHCI_OF) += sdhci-of.o +sdhci-of-y := sdhci-of-core.o +sdhci-of-$(CONFIG_MMC_SDHCI_OF_ESDHC) += sdhci-of-esdhc.o +sdhci-of-$(CONFIG_MMC_SDHCI_OF_HLWD) += sdhci-of-hlwd.o ifeq ($(CONFIG_CB710_DEBUG),y) CFLAGS-cb710-mmc += -DDEBUG diff --git a/trunk/drivers/mmc/host/at91_mci.c b/trunk/drivers/mmc/host/at91_mci.c index a4aa3af86fed..d3e6a962f423 100644 --- a/trunk/drivers/mmc/host/at91_mci.c +++ b/trunk/drivers/mmc/host/at91_mci.c @@ -77,8 +77,7 @@ #include #include - -#include "at91_mci.h" +#include #define DRIVER_NAME "at91_mci" diff --git a/trunk/drivers/mmc/host/atmel-mci.c b/trunk/drivers/mmc/host/atmel-mci.c index fa8cae1d7005..aa8039f473c4 100644 --- a/trunk/drivers/mmc/host/atmel-mci.c +++ b/trunk/drivers/mmc/host/atmel-mci.c @@ -203,7 +203,6 @@ struct atmel_mci_slot { #define ATMCI_CARD_PRESENT 0 #define ATMCI_CARD_NEED_INIT 1 #define ATMCI_SHUTDOWN 2 -#define ATMCI_SUSPENDED 3 int detect_pin; int wp_pin; @@ -1879,72 +1878,10 @@ static int __exit atmci_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int atmci_suspend(struct device *dev) -{ - struct atmel_mci *host = dev_get_drvdata(dev); - int i; - - for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) { - struct atmel_mci_slot *slot = host->slot[i]; - int ret; - - if (!slot) - continue; - ret = mmc_suspend_host(slot->mmc); - if (ret < 0) { - while (--i >= 0) { - slot = host->slot[i]; - if (slot - && test_bit(ATMCI_SUSPENDED, &slot->flags)) { - mmc_resume_host(host->slot[i]->mmc); - clear_bit(ATMCI_SUSPENDED, &slot->flags); - } - } - return ret; - } else { - set_bit(ATMCI_SUSPENDED, &slot->flags); - } - } - - return 0; -} - -static int atmci_resume(struct device *dev) -{ - struct atmel_mci *host = dev_get_drvdata(dev); - int i; - int ret = 0; - - for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) { - struct atmel_mci_slot *slot = host->slot[i]; - int err; - - slot = host->slot[i]; - if (!slot) - continue; - if (!test_bit(ATMCI_SUSPENDED, &slot->flags)) - continue; - err = mmc_resume_host(slot->mmc); - if (err < 0) - ret = err; - else - clear_bit(ATMCI_SUSPENDED, &slot->flags); - } - - return ret; -} -static SIMPLE_DEV_PM_OPS(atmci_pm, atmci_suspend, atmci_resume); -#define ATMCI_PM_OPS (&atmci_pm) -#else -#define ATMCI_PM_OPS NULL -#endif - static struct platform_driver atmci_driver = { .remove = __exit_p(atmci_remove), .driver = { .name = "atmel_mci", - .pm = ATMCI_PM_OPS, }, }; diff --git a/trunk/drivers/mmc/host/dw_mmc.c b/trunk/drivers/mmc/host/dw_mmc.c index 0c839d3338db..66dcddb9c205 100644 --- a/trunk/drivers/mmc/host/dw_mmc.c +++ b/trunk/drivers/mmc/host/dw_mmc.c @@ -33,7 +33,6 @@ #include #include #include -#include #include "dw_mmc.h" @@ -101,8 +100,6 @@ struct dw_mci_slot { int last_detect_state; }; -static struct workqueue_struct *dw_mci_card_workqueue; - #if defined(CONFIG_DEBUG_FS) static int dw_mci_req_show(struct seq_file *s, void *v) { @@ -287,7 +284,7 @@ static void send_stop_cmd(struct dw_mci *host, struct mmc_data *data) /* DMA interface functions */ static void dw_mci_stop_dma(struct dw_mci *host) { - if (host->using_dma) { + if (host->use_dma) { host->dma_ops->stop(host); host->dma_ops->cleanup(host); } else { @@ -435,8 +432,6 @@ static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data) unsigned int i, direction, sg_len; u32 temp; - host->using_dma = 0; - /* If we don't have a channel, we can't do DMA */ if (!host->use_dma) return -ENODEV; @@ -456,8 +451,6 @@ static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data) return -EINVAL; } - host->using_dma = 1; - if (data->flags & MMC_DATA_READ) direction = DMA_FROM_DEVICE; else @@ -496,18 +489,14 @@ static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data) host->sg = NULL; host->data = data; - if (data->flags & MMC_DATA_READ) - host->dir_status = DW_MCI_RECV_STATUS; - else - host->dir_status = DW_MCI_SEND_STATUS; - if (dw_mci_submit_data_dma(host, data)) { host->sg = data->sg; host->pio_offset = 0; - host->part_buf_start = 0; - host->part_buf_count = 0; + if (data->flags & MMC_DATA_READ) + host->dir_status = DW_MCI_RECV_STATUS; + else + host->dir_status = DW_MCI_SEND_STATUS; - mci_writel(host, RINTSTS, SDMMC_INT_TXDR | SDMMC_INT_RXDR); temp = mci_readl(host, INTMASK); temp |= SDMMC_INT_TXDR | SDMMC_INT_RXDR; mci_writel(host, INTMASK, temp); @@ -585,7 +574,7 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot) } /* Set the current slot bus width */ - mci_writel(host, CTYPE, (slot->ctype << slot->id)); + mci_writel(host, CTYPE, slot->ctype); } static void dw_mci_start_request(struct dw_mci *host, @@ -635,13 +624,13 @@ static void dw_mci_start_request(struct dw_mci *host, host->stop_cmdr = dw_mci_prepare_command(slot->mmc, mrq->stop); } -/* must be called with host->lock held */ static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot, struct mmc_request *mrq) { dev_vdbg(&slot->mmc->class_dev, "queue request: state=%d\n", host->state); + spin_lock_bh(&host->lock); slot->mrq = mrq; if (host->state == STATE_IDLE) { @@ -650,6 +639,8 @@ static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot, } else { list_add_tail(&slot->queue_node, &host->queue); } + + spin_unlock_bh(&host->lock); } static void dw_mci_request(struct mmc_host *mmc, struct mmc_request *mrq) @@ -659,23 +650,14 @@ static void dw_mci_request(struct mmc_host *mmc, struct mmc_request *mrq) WARN_ON(slot->mrq); - /* - * The check for card presence and queueing of the request must be - * atomic, otherwise the card could be removed in between and the - * request wouldn't fail until another card was inserted. - */ - spin_lock_bh(&host->lock); - if (!test_bit(DW_MMC_CARD_PRESENT, &slot->flags)) { - spin_unlock_bh(&host->lock); mrq->cmd->error = -ENOMEDIUM; mmc_request_done(mmc, mrq); return; } + /* We don't support multiple blocks of weird lengths. */ dw_mci_queue_request(host, slot, mrq); - - spin_unlock_bh(&host->lock); } static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) @@ -849,7 +831,7 @@ static void dw_mci_tasklet_func(unsigned long priv) struct mmc_command *cmd; enum dw_mci_state state; enum dw_mci_state prev_state; - u32 status, ctrl; + u32 status; spin_lock(&host->lock); @@ -909,19 +891,13 @@ static void dw_mci_tasklet_func(unsigned long priv) if (status & DW_MCI_DATA_ERROR_FLAGS) { if (status & SDMMC_INT_DTO) { + dev_err(&host->pdev->dev, + "data timeout error\n"); data->error = -ETIMEDOUT; } else if (status & SDMMC_INT_DCRC) { + dev_err(&host->pdev->dev, + "data CRC error\n"); data->error = -EILSEQ; - } else if (status & SDMMC_INT_EBE && - host->dir_status == - DW_MCI_SEND_STATUS) { - /* - * No data CRC status was returned. - * The number of bytes transferred will - * be exaggerated in PIO mode. - */ - data->bytes_xfered = 0; - data->error = -ETIMEDOUT; } else { dev_err(&host->pdev->dev, "data FIFO error " @@ -929,16 +905,6 @@ static void dw_mci_tasklet_func(unsigned long priv) status); data->error = -EIO; } - /* - * After an error, there may be data lingering - * in the FIFO, so reset it - doing so - * generates a block interrupt, hence setting - * the scatter-gather pointer to NULL. - */ - host->sg = NULL; - ctrl = mci_readl(host, CTRL); - ctrl |= SDMMC_CTRL_FIFO_RESET; - mci_writel(host, CTRL, ctrl); } else { data->bytes_xfered = data->blocks * data->blksz; data->error = 0; @@ -980,278 +946,84 @@ static void dw_mci_tasklet_func(unsigned long priv) } -/* push final bytes to part_buf, only use during push */ -static void dw_mci_set_part_bytes(struct dw_mci *host, void *buf, int cnt) +static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt) { - memcpy((void *)&host->part_buf, buf, cnt); - host->part_buf_count = cnt; -} + u16 *pdata = (u16 *)buf; -/* append bytes to part_buf, only use during push */ -static int dw_mci_push_part_bytes(struct dw_mci *host, void *buf, int cnt) -{ - cnt = min(cnt, (1 << host->data_shift) - host->part_buf_count); - memcpy((void *)&host->part_buf + host->part_buf_count, buf, cnt); - host->part_buf_count += cnt; - return cnt; -} + WARN_ON(cnt % 2 != 0); -/* pull first bytes from part_buf, only use during pull */ -static int dw_mci_pull_part_bytes(struct dw_mci *host, void *buf, int cnt) -{ - cnt = min(cnt, (int)host->part_buf_count); - if (cnt) { - memcpy(buf, (void *)&host->part_buf + host->part_buf_start, - cnt); - host->part_buf_count -= cnt; - host->part_buf_start += cnt; + cnt = cnt >> 1; + while (cnt > 0) { + mci_writew(host, DATA, *pdata++); + cnt--; } - return cnt; } -/* pull final bytes from the part_buf, assuming it's just been filled */ -static void dw_mci_pull_final_bytes(struct dw_mci *host, void *buf, int cnt) +static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt) { - memcpy(buf, &host->part_buf, cnt); - host->part_buf_start = cnt; - host->part_buf_count = (1 << host->data_shift) - cnt; -} + u16 *pdata = (u16 *)buf; -static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt) -{ - /* try and push anything in the part_buf */ - if (unlikely(host->part_buf_count)) { - int len = dw_mci_push_part_bytes(host, buf, cnt); - buf += len; - cnt -= len; - if (!sg_next(host->sg) || host->part_buf_count == 2) { - mci_writew(host, DATA, host->part_buf16); - host->part_buf_count = 0; - } - } -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (unlikely((unsigned long)buf & 0x1)) { - while (cnt >= 2) { - u16 aligned_buf[64]; - int len = min(cnt & -2, (int)sizeof(aligned_buf)); - int items = len >> 1; - int i; - /* memcpy from input buffer into aligned buffer */ - memcpy(aligned_buf, buf, len); - buf += len; - cnt -= len; - /* push data from aligned buffer into fifo */ - for (i = 0; i < items; ++i) - mci_writew(host, DATA, aligned_buf[i]); - } - } else -#endif - { - u16 *pdata = buf; - for (; cnt >= 2; cnt -= 2) - mci_writew(host, DATA, *pdata++); - buf = pdata; - } - /* put anything remaining in the part_buf */ - if (cnt) { - dw_mci_set_part_bytes(host, buf, cnt); - if (!sg_next(host->sg)) - mci_writew(host, DATA, host->part_buf16); - } -} + WARN_ON(cnt % 2 != 0); -static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt) -{ -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (unlikely((unsigned long)buf & 0x1)) { - while (cnt >= 2) { - /* pull data from fifo into aligned buffer */ - u16 aligned_buf[64]; - int len = min(cnt & -2, (int)sizeof(aligned_buf)); - int items = len >> 1; - int i; - for (i = 0; i < items; ++i) - aligned_buf[i] = mci_readw(host, DATA); - /* memcpy from aligned buffer into output buffer */ - memcpy(buf, aligned_buf, len); - buf += len; - cnt -= len; - } - } else -#endif - { - u16 *pdata = buf; - for (; cnt >= 2; cnt -= 2) - *pdata++ = mci_readw(host, DATA); - buf = pdata; - } - if (cnt) { - host->part_buf16 = mci_readw(host, DATA); - dw_mci_pull_final_bytes(host, buf, cnt); + cnt = cnt >> 1; + while (cnt > 0) { + *pdata++ = mci_readw(host, DATA); + cnt--; } } static void dw_mci_push_data32(struct dw_mci *host, void *buf, int cnt) { - /* try and push anything in the part_buf */ - if (unlikely(host->part_buf_count)) { - int len = dw_mci_push_part_bytes(host, buf, cnt); - buf += len; - cnt -= len; - if (!sg_next(host->sg) || host->part_buf_count == 4) { - mci_writel(host, DATA, host->part_buf32); - host->part_buf_count = 0; - } - } -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (unlikely((unsigned long)buf & 0x3)) { - while (cnt >= 4) { - u32 aligned_buf[32]; - int len = min(cnt & -4, (int)sizeof(aligned_buf)); - int items = len >> 2; - int i; - /* memcpy from input buffer into aligned buffer */ - memcpy(aligned_buf, buf, len); - buf += len; - cnt -= len; - /* push data from aligned buffer into fifo */ - for (i = 0; i < items; ++i) - mci_writel(host, DATA, aligned_buf[i]); - } - } else -#endif - { - u32 *pdata = buf; - for (; cnt >= 4; cnt -= 4) - mci_writel(host, DATA, *pdata++); - buf = pdata; - } - /* put anything remaining in the part_buf */ - if (cnt) { - dw_mci_set_part_bytes(host, buf, cnt); - if (!sg_next(host->sg)) - mci_writel(host, DATA, host->part_buf32); + u32 *pdata = (u32 *)buf; + + WARN_ON(cnt % 4 != 0); + WARN_ON((unsigned long)pdata & 0x3); + + cnt = cnt >> 2; + while (cnt > 0) { + mci_writel(host, DATA, *pdata++); + cnt--; } } static void dw_mci_pull_data32(struct dw_mci *host, void *buf, int cnt) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (unlikely((unsigned long)buf & 0x3)) { - while (cnt >= 4) { - /* pull data from fifo into aligned buffer */ - u32 aligned_buf[32]; - int len = min(cnt & -4, (int)sizeof(aligned_buf)); - int items = len >> 2; - int i; - for (i = 0; i < items; ++i) - aligned_buf[i] = mci_readl(host, DATA); - /* memcpy from aligned buffer into output buffer */ - memcpy(buf, aligned_buf, len); - buf += len; - cnt -= len; - } - } else -#endif - { - u32 *pdata = buf; - for (; cnt >= 4; cnt -= 4) - *pdata++ = mci_readl(host, DATA); - buf = pdata; - } - if (cnt) { - host->part_buf32 = mci_readl(host, DATA); - dw_mci_pull_final_bytes(host, buf, cnt); + u32 *pdata = (u32 *)buf; + + WARN_ON(cnt % 4 != 0); + WARN_ON((unsigned long)pdata & 0x3); + + cnt = cnt >> 2; + while (cnt > 0) { + *pdata++ = mci_readl(host, DATA); + cnt--; } } static void dw_mci_push_data64(struct dw_mci *host, void *buf, int cnt) { - /* try and push anything in the part_buf */ - if (unlikely(host->part_buf_count)) { - int len = dw_mci_push_part_bytes(host, buf, cnt); - buf += len; - cnt -= len; - if (!sg_next(host->sg) || host->part_buf_count == 8) { - mci_writew(host, DATA, host->part_buf); - host->part_buf_count = 0; - } - } -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (unlikely((unsigned long)buf & 0x7)) { - while (cnt >= 8) { - u64 aligned_buf[16]; - int len = min(cnt & -8, (int)sizeof(aligned_buf)); - int items = len >> 3; - int i; - /* memcpy from input buffer into aligned buffer */ - memcpy(aligned_buf, buf, len); - buf += len; - cnt -= len; - /* push data from aligned buffer into fifo */ - for (i = 0; i < items; ++i) - mci_writeq(host, DATA, aligned_buf[i]); - } - } else -#endif - { - u64 *pdata = buf; - for (; cnt >= 8; cnt -= 8) - mci_writeq(host, DATA, *pdata++); - buf = pdata; - } - /* put anything remaining in the part_buf */ - if (cnt) { - dw_mci_set_part_bytes(host, buf, cnt); - if (!sg_next(host->sg)) - mci_writeq(host, DATA, host->part_buf); - } -} + u64 *pdata = (u64 *)buf; -static void dw_mci_pull_data64(struct dw_mci *host, void *buf, int cnt) -{ -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (unlikely((unsigned long)buf & 0x7)) { - while (cnt >= 8) { - /* pull data from fifo into aligned buffer */ - u64 aligned_buf[16]; - int len = min(cnt & -8, (int)sizeof(aligned_buf)); - int items = len >> 3; - int i; - for (i = 0; i < items; ++i) - aligned_buf[i] = mci_readq(host, DATA); - /* memcpy from aligned buffer into output buffer */ - memcpy(buf, aligned_buf, len); - buf += len; - cnt -= len; - } - } else -#endif - { - u64 *pdata = buf; - for (; cnt >= 8; cnt -= 8) - *pdata++ = mci_readq(host, DATA); - buf = pdata; - } - if (cnt) { - host->part_buf = mci_readq(host, DATA); - dw_mci_pull_final_bytes(host, buf, cnt); + WARN_ON(cnt % 8 != 0); + + cnt = cnt >> 3; + while (cnt > 0) { + mci_writeq(host, DATA, *pdata++); + cnt--; } } -static void dw_mci_pull_data(struct dw_mci *host, void *buf, int cnt) +static void dw_mci_pull_data64(struct dw_mci *host, void *buf, int cnt) { - int len; + u64 *pdata = (u64 *)buf; - /* get remaining partial bytes */ - len = dw_mci_pull_part_bytes(host, buf, cnt); - if (unlikely(len == cnt)) - return; - buf += len; - cnt -= len; + WARN_ON(cnt % 8 != 0); - /* get the rest of the data */ - host->pull_data(host, buf, cnt); + cnt = cnt >> 3; + while (cnt > 0) { + *pdata++ = mci_readq(host, DATA); + cnt--; + } } static void dw_mci_read_data_pio(struct dw_mci *host) @@ -1265,10 +1037,9 @@ static void dw_mci_read_data_pio(struct dw_mci *host) unsigned int nbytes = 0, len; do { - len = host->part_buf_count + - (SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift); + len = SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift; if (offset + len <= sg->length) { - dw_mci_pull_data(host, (void *)(buf + offset), len); + host->pull_data(host, (void *)(buf + offset), len); offset += len; nbytes += len; @@ -1284,8 +1055,8 @@ static void dw_mci_read_data_pio(struct dw_mci *host) } } else { unsigned int remaining = sg->length - offset; - dw_mci_pull_data(host, (void *)(buf + offset), - remaining); + host->pull_data(host, (void *)(buf + offset), + remaining); nbytes += remaining; flush_dcache_page(sg_page(sg)); @@ -1295,7 +1066,7 @@ static void dw_mci_read_data_pio(struct dw_mci *host) offset = len - remaining; buf = sg_virt(sg); - dw_mci_pull_data(host, buf, offset); + host->pull_data(host, buf, offset); nbytes += offset; } @@ -1312,6 +1083,7 @@ static void dw_mci_read_data_pio(struct dw_mci *host) return; } } while (status & SDMMC_INT_RXDR); /*if the RXDR is ready read again*/ + len = SDMMC_GET_FCNT(mci_readl(host, STATUS)); host->pio_offset = offset; data->bytes_xfered += nbytes; return; @@ -1333,9 +1105,8 @@ static void dw_mci_write_data_pio(struct dw_mci *host) unsigned int nbytes = 0, len; do { - len = ((host->fifo_depth - - SDMMC_GET_FCNT(mci_readl(host, STATUS))) << shift) - - host->part_buf_count; + len = SDMMC_FIFO_SZ - + (SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift); if (offset + len <= sg->length) { host->push_data(host, (void *)(buf + offset), len); @@ -1380,8 +1151,10 @@ static void dw_mci_write_data_pio(struct dw_mci *host) return; } } while (status & SDMMC_INT_TXDR); /* if TXDR write again */ + host->pio_offset = offset; data->bytes_xfered += nbytes; + return; done: @@ -1429,6 +1202,7 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) host->cmd_status = status; smp_wmb(); set_bit(EVENT_CMD_COMPLETE, &host->pending_events); + tasklet_schedule(&host->tasklet); } if (pending & DW_MCI_DATA_ERROR_FLAGS) { @@ -1437,9 +1211,7 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) host->data_status = status; smp_wmb(); set_bit(EVENT_DATA_ERROR, &host->pending_events); - if (!(pending & (SDMMC_INT_DTO | SDMMC_INT_DCRC | - SDMMC_INT_SBE | SDMMC_INT_EBE))) - tasklet_schedule(&host->tasklet); + tasklet_schedule(&host->tasklet); } if (pending & SDMMC_INT_DATA_OVER) { @@ -1457,13 +1229,13 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) if (pending & SDMMC_INT_RXDR) { mci_writel(host, RINTSTS, SDMMC_INT_RXDR); - if (host->dir_status == DW_MCI_RECV_STATUS && host->sg) + if (host->sg) dw_mci_read_data_pio(host); } if (pending & SDMMC_INT_TXDR) { mci_writel(host, RINTSTS, SDMMC_INT_TXDR); - if (host->dir_status == DW_MCI_SEND_STATUS && host->sg) + if (host->sg) dw_mci_write_data_pio(host); } @@ -1474,7 +1246,7 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) if (pending & SDMMC_INT_CD) { mci_writel(host, RINTSTS, SDMMC_INT_CD); - queue_work(dw_mci_card_workqueue, &host->card_work); + tasklet_schedule(&host->card_tasklet); } } while (pass_count++ < 5); @@ -1493,9 +1265,9 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void dw_mci_work_routine_card(struct work_struct *work) +static void dw_mci_tasklet_card(unsigned long data) { - struct dw_mci *host = container_of(work, struct dw_mci, card_work); + struct dw_mci *host = (struct dw_mci *)data; int i; for (i = 0; i < host->num_slots; i++) { @@ -1507,21 +1279,22 @@ static void dw_mci_work_routine_card(struct work_struct *work) present = dw_mci_get_cd(mmc); while (present != slot->last_detect_state) { + spin_lock(&host->lock); + dev_dbg(&slot->mmc->class_dev, "card %s\n", present ? "inserted" : "removed"); - /* Power up slot (before spin_lock, may sleep) */ - if (present != 0 && host->pdata->setpower) - host->pdata->setpower(slot->id, mmc->ocr_avail); - - spin_lock_bh(&host->lock); - /* Card change detected */ slot->last_detect_state = present; - /* Mark card as present if applicable */ - if (present != 0) + /* Power up slot */ + if (present != 0) { + if (host->pdata->setpower) + host->pdata->setpower(slot->id, + mmc->ocr_avail); + set_bit(DW_MMC_CARD_PRESENT, &slot->flags); + } /* Clean up queue if present */ mrq = slot->mrq; @@ -1571,6 +1344,8 @@ static void dw_mci_work_routine_card(struct work_struct *work) /* Power down slot */ if (present == 0) { + if (host->pdata->setpower) + host->pdata->setpower(slot->id, 0); clear_bit(DW_MMC_CARD_PRESENT, &slot->flags); /* @@ -1592,12 +1367,7 @@ static void dw_mci_work_routine_card(struct work_struct *work) } - spin_unlock_bh(&host->lock); - - /* Power down slot (after spin_unlock, may sleep) */ - if (present == 0 && host->pdata->setpower) - host->pdata->setpower(slot->id, 0); - + spin_unlock(&host->lock); present = dw_mci_get_cd(mmc); } @@ -1697,7 +1467,7 @@ static int __init dw_mci_init_slot(struct dw_mci *host, unsigned int id) * Card may have been plugged in prior to boot so we * need to run the detect tasklet */ - queue_work(dw_mci_card_workqueue, &host->card_work); + tasklet_schedule(&host->card_tasklet); return 0; } @@ -1875,19 +1645,8 @@ static int dw_mci_probe(struct platform_device *pdev) * FIFO threshold settings RxMark = fifo_size / 2 - 1, * Tx Mark = fifo_size / 2 DMA Size = 8 */ - if (!host->pdata->fifo_depth) { - /* - * Power-on value of RX_WMark is FIFO_DEPTH-1, but this may - * have been overwritten by the bootloader, just like we're - * about to do, so if you know the value for your hardware, you - * should put it in the platform data. - */ - fifo_size = mci_readl(host, FIFOTH); - fifo_size = 1 + ((fifo_size >> 16) & 0x7ff); - } else { - fifo_size = host->pdata->fifo_depth; - } - host->fifo_depth = fifo_size; + fifo_size = mci_readl(host, FIFOTH); + fifo_size = (fifo_size >> 16) & 0x7ff; host->fifoth_val = ((0x2 << 28) | ((fifo_size/2 - 1) << 16) | ((fifo_size/2) << 0)); mci_writel(host, FIFOTH, host->fifoth_val); @@ -1897,15 +1656,12 @@ static int dw_mci_probe(struct platform_device *pdev) mci_writel(host, CLKSRC, 0); tasklet_init(&host->tasklet, dw_mci_tasklet_func, (unsigned long)host); - dw_mci_card_workqueue = alloc_workqueue("dw-mci-card", - WQ_MEM_RECLAIM | WQ_NON_REENTRANT, 1); - if (!dw_mci_card_workqueue) - goto err_dmaunmap; - INIT_WORK(&host->card_work, dw_mci_work_routine_card); + tasklet_init(&host->card_tasklet, + dw_mci_tasklet_card, (unsigned long)host); ret = request_irq(irq, dw_mci_interrupt, 0, "dw-mci", host); if (ret) - goto err_workqueue; + goto err_dmaunmap; platform_set_drvdata(pdev, host); @@ -1934,9 +1690,7 @@ static int dw_mci_probe(struct platform_device *pdev) mci_writel(host, CTRL, SDMMC_CTRL_INT_ENABLE); /* Enable mci interrupt */ dev_info(&pdev->dev, "DW MMC controller at irq %d, " - "%d bit host data width, " - "%u deep fifo\n", - irq, width, fifo_size); + "%d bit host data width\n", irq, width); if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO) dev_info(&pdev->dev, "Internal DMAC interrupt fix enabled.\n"); @@ -1951,9 +1705,6 @@ static int dw_mci_probe(struct platform_device *pdev) } free_irq(irq, host); -err_workqueue: - destroy_workqueue(dw_mci_card_workqueue); - err_dmaunmap: if (host->use_dma && host->dma_ops->exit) host->dma_ops->exit(host); @@ -1993,7 +1744,6 @@ static int __exit dw_mci_remove(struct platform_device *pdev) mci_writel(host, CLKSRC, 0); free_irq(platform_get_irq(pdev, 0), host); - destroy_workqueue(dw_mci_card_workqueue); dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma); if (host->use_dma && host->dma_ops->exit) diff --git a/trunk/drivers/mmc/host/dw_mmc.h b/trunk/drivers/mmc/host/dw_mmc.h index 027d37735394..23c662af5616 100644 --- a/trunk/drivers/mmc/host/dw_mmc.h +++ b/trunk/drivers/mmc/host/dw_mmc.h @@ -118,6 +118,7 @@ #define SDMMC_CMD_INDX(n) ((n) & 0x1F) /* Status register defines */ #define SDMMC_GET_FCNT(x) (((x)>>17) & 0x1FF) +#define SDMMC_FIFO_SZ 32 /* Internal DMAC interrupt defines */ #define SDMMC_IDMAC_INT_AI BIT(9) #define SDMMC_IDMAC_INT_NI BIT(8) @@ -133,22 +134,22 @@ /* Register access macros */ #define mci_readl(dev, reg) \ - __raw_readl((dev)->regs + SDMMC_##reg) + __raw_readl(dev->regs + SDMMC_##reg) #define mci_writel(dev, reg, value) \ - __raw_writel((value), (dev)->regs + SDMMC_##reg) + __raw_writel((value), dev->regs + SDMMC_##reg) /* 16-bit FIFO access macros */ #define mci_readw(dev, reg) \ - __raw_readw((dev)->regs + SDMMC_##reg) + __raw_readw(dev->regs + SDMMC_##reg) #define mci_writew(dev, reg, value) \ - __raw_writew((value), (dev)->regs + SDMMC_##reg) + __raw_writew((value), dev->regs + SDMMC_##reg) /* 64-bit FIFO access macros */ #ifdef readq #define mci_readq(dev, reg) \ - __raw_readq((dev)->regs + SDMMC_##reg) + __raw_readq(dev->regs + SDMMC_##reg) #define mci_writeq(dev, reg, value) \ - __raw_writeq((value), (dev)->regs + SDMMC_##reg) + __raw_writeq((value), dev->regs + SDMMC_##reg) #else /* * Dummy readq implementation for architectures that don't define it. @@ -159,9 +160,9 @@ * rest of the code free from ifdefs. */ #define mci_readq(dev, reg) \ - (*(volatile u64 __force *)((dev)->regs + SDMMC_##reg)) + (*(volatile u64 __force *)(dev->regs + SDMMC_##reg)) #define mci_writeq(dev, reg, value) \ - (*(volatile u64 __force *)((dev)->regs + SDMMC_##reg) = (value)) + (*(volatile u64 __force *)(dev->regs + SDMMC_##reg) = value) #endif #endif /* _DW_MMC_H_ */ diff --git a/trunk/drivers/mmc/host/mmci.c b/trunk/drivers/mmc/host/mmci.c index fef7140eb1d0..fe140724a02e 100644 --- a/trunk/drivers/mmc/host/mmci.c +++ b/trunk/drivers/mmc/host/mmci.c @@ -226,9 +226,6 @@ static void __devinit mmci_dma_setup(struct mmci_host *host) return; } - /* initialize pre request cookie */ - host->next_data.cookie = 1; - /* Try to acquire a generic DMA engine slave channel */ dma_cap_zero(mask); dma_cap_set(DMA_SLAVE, mask); @@ -338,8 +335,7 @@ static void mmci_dma_unmap(struct mmci_host *host, struct mmc_data *data) dir = DMA_FROM_DEVICE; } - if (!data->host_cookie) - dma_unmap_sg(chan->device->dev, data->sg, data->sg_len, dir); + dma_unmap_sg(chan->device->dev, data->sg, data->sg_len, dir); /* * Use of DMA with scatter-gather is impossible. @@ -357,8 +353,7 @@ static void mmci_dma_data_error(struct mmci_host *host) dmaengine_terminate_all(host->dma_current); } -static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data, - struct mmci_host_next *next) +static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl) { struct variant_data *variant = host->variant; struct dma_slave_config conf = { @@ -369,20 +364,13 @@ static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data, .src_maxburst = variant->fifohalfsize >> 2, /* # of words */ .dst_maxburst = variant->fifohalfsize >> 2, /* # of words */ }; + struct mmc_data *data = host->data; struct dma_chan *chan; struct dma_device *device; struct dma_async_tx_descriptor *desc; int nr_sg; - /* Check if next job is already prepared */ - if (data->host_cookie && !next && - host->dma_current && host->dma_desc_current) - return 0; - - if (!next) { - host->dma_current = NULL; - host->dma_desc_current = NULL; - } + host->dma_current = NULL; if (data->flags & MMC_DATA_READ) { conf.direction = DMA_FROM_DEVICE; @@ -397,7 +385,7 @@ static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data, return -EINVAL; /* If less than or equal to the fifo size, don't bother with DMA */ - if (data->blksz * data->blocks <= variant->fifosize) + if (host->size <= variant->fifosize) return -EINVAL; device = chan->device; @@ -411,38 +399,14 @@ static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data, if (!desc) goto unmap_exit; - if (next) { - next->dma_chan = chan; - next->dma_desc = desc; - } else { - host->dma_current = chan; - host->dma_desc_current = desc; - } - - return 0; - - unmap_exit: - if (!next) - dmaengine_terminate_all(chan); - dma_unmap_sg(device->dev, data->sg, data->sg_len, conf.direction); - return -ENOMEM; -} - -static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl) -{ - int ret; - struct mmc_data *data = host->data; - - ret = mmci_dma_prep_data(host, host->data, NULL); - if (ret) - return ret; - /* Okay, go for it. */ + host->dma_current = chan; + dev_vdbg(mmc_dev(host->mmc), "Submit MMCI DMA job, sglen %d blksz %04x blks %04x flags %08x\n", data->sg_len, data->blksz, data->blocks, data->flags); - dmaengine_submit(host->dma_desc_current); - dma_async_issue_pending(host->dma_current); + dmaengine_submit(desc); + dma_async_issue_pending(chan); datactrl |= MCI_DPSM_DMAENABLE; @@ -457,90 +421,14 @@ static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl) writel(readl(host->base + MMCIMASK0) | MCI_DATAENDMASK, host->base + MMCIMASK0); return 0; -} - -static void mmci_get_next_data(struct mmci_host *host, struct mmc_data *data) -{ - struct mmci_host_next *next = &host->next_data; - if (data->host_cookie && data->host_cookie != next->cookie) { - printk(KERN_WARNING "[%s] invalid cookie: data->host_cookie %d" - " host->next_data.cookie %d\n", - __func__, data->host_cookie, host->next_data.cookie); - data->host_cookie = 0; - } - - if (!data->host_cookie) - return; - - host->dma_desc_current = next->dma_desc; - host->dma_current = next->dma_chan; - - next->dma_desc = NULL; - next->dma_chan = NULL; -} - -static void mmci_pre_request(struct mmc_host *mmc, struct mmc_request *mrq, - bool is_first_req) -{ - struct mmci_host *host = mmc_priv(mmc); - struct mmc_data *data = mrq->data; - struct mmci_host_next *nd = &host->next_data; - - if (!data) - return; - - if (data->host_cookie) { - data->host_cookie = 0; - return; - } - - /* if config for dma */ - if (((data->flags & MMC_DATA_WRITE) && host->dma_tx_channel) || - ((data->flags & MMC_DATA_READ) && host->dma_rx_channel)) { - if (mmci_dma_prep_data(host, data, nd)) - data->host_cookie = 0; - else - data->host_cookie = ++nd->cookie < 0 ? 1 : nd->cookie; - } -} - -static void mmci_post_request(struct mmc_host *mmc, struct mmc_request *mrq, - int err) -{ - struct mmci_host *host = mmc_priv(mmc); - struct mmc_data *data = mrq->data; - struct dma_chan *chan; - enum dma_data_direction dir; - - if (!data) - return; - - if (data->flags & MMC_DATA_READ) { - dir = DMA_FROM_DEVICE; - chan = host->dma_rx_channel; - } else { - dir = DMA_TO_DEVICE; - chan = host->dma_tx_channel; - } - - - /* if config for dma */ - if (chan) { - if (err) - dmaengine_terminate_all(chan); - if (err || data->host_cookie) - dma_unmap_sg(mmc_dev(host->mmc), data->sg, - data->sg_len, dir); - mrq->data->host_cookie = 0; - } +unmap_exit: + dmaengine_terminate_all(chan); + dma_unmap_sg(device->dev, data->sg, data->sg_len, conf.direction); + return -ENOMEM; } - #else /* Blank functions if the DMA engine is not available */ -static void mmci_get_next_data(struct mmci_host *host, struct mmc_data *data) -{ -} static inline void mmci_dma_setup(struct mmci_host *host) { } @@ -561,10 +449,6 @@ static inline int mmci_dma_start_data(struct mmci_host *host, unsigned int datac { return -ENOSYS; } - -#define mmci_pre_request NULL -#define mmci_post_request NULL - #endif static void mmci_start_data(struct mmci_host *host, struct mmc_data *data) @@ -988,9 +872,6 @@ static void mmci_request(struct mmc_host *mmc, struct mmc_request *mrq) host->mrq = mrq; - if (mrq->data) - mmci_get_next_data(host, mrq->data); - if (mrq->data && mrq->data->flags & MMC_DATA_READ) mmci_start_data(host, mrq->data); @@ -1105,8 +986,6 @@ static irqreturn_t mmci_cd_irq(int irq, void *dev_id) static const struct mmc_host_ops mmci_ops = { .request = mmci_request, - .pre_req = mmci_pre_request, - .post_req = mmci_post_request, .set_ios = mmci_set_ios, .get_ro = mmci_get_ro, .get_cd = mmci_get_cd, diff --git a/trunk/drivers/mmc/host/mmci.h b/trunk/drivers/mmc/host/mmci.h index 79e4143ab9df..2164e8c6476c 100644 --- a/trunk/drivers/mmc/host/mmci.h +++ b/trunk/drivers/mmc/host/mmci.h @@ -166,12 +166,6 @@ struct clk; struct variant_data; struct dma_chan; -struct mmci_host_next { - struct dma_async_tx_descriptor *dma_desc; - struct dma_chan *dma_chan; - s32 cookie; -}; - struct mmci_host { phys_addr_t phybase; void __iomem *base; @@ -209,8 +203,6 @@ struct mmci_host { struct dma_chan *dma_current; struct dma_chan *dma_rx_channel; struct dma_chan *dma_tx_channel; - struct dma_async_tx_descriptor *dma_desc_current; - struct mmci_host_next next_data; #define dma_inprogress(host) ((host)->dma_current) #else diff --git a/trunk/drivers/mmc/host/mxs-mmc.c b/trunk/drivers/mmc/host/mxs-mmc.c index d513d47364d0..99d39a6a1032 100644 --- a/trunk/drivers/mmc/host/mxs-mmc.c +++ b/trunk/drivers/mmc/host/mxs-mmc.c @@ -564,38 +564,40 @@ static void mxs_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq) static void mxs_mmc_set_clk_rate(struct mxs_mmc_host *host, unsigned int rate) { - unsigned int ssp_clk, ssp_sck; - u32 clock_divide, clock_rate; + unsigned int ssp_rate, bit_rate; + u32 div1, div2; u32 val; - ssp_clk = clk_get_rate(host->clk); + ssp_rate = clk_get_rate(host->clk); - for (clock_divide = 2; clock_divide <= 254; clock_divide += 2) { - clock_rate = DIV_ROUND_UP(ssp_clk, rate * clock_divide); - clock_rate = (clock_rate > 0) ? clock_rate - 1 : 0; - if (clock_rate <= 255) + for (div1 = 2; div1 < 254; div1 += 2) { + div2 = ssp_rate / rate / div1; + if (div2 < 0x100) break; } - if (clock_divide > 254) { + if (div1 >= 254) { dev_err(mmc_dev(host->mmc), "%s: cannot set clock to %d\n", __func__, rate); return; } - ssp_sck = ssp_clk / clock_divide / (1 + clock_rate); + if (div2 == 0) + bit_rate = ssp_rate / div1; + else + bit_rate = ssp_rate / div1 / div2; val = readl(host->base + HW_SSP_TIMING); val &= ~(BM_SSP_TIMING_CLOCK_DIVIDE | BM_SSP_TIMING_CLOCK_RATE); - val |= BF_SSP(clock_divide, TIMING_CLOCK_DIVIDE); - val |= BF_SSP(clock_rate, TIMING_CLOCK_RATE); + val |= BF_SSP(div1, TIMING_CLOCK_DIVIDE); + val |= BF_SSP(div2 - 1, TIMING_CLOCK_RATE); writel(val, host->base + HW_SSP_TIMING); - host->clk_rate = ssp_sck; + host->clk_rate = bit_rate; dev_dbg(mmc_dev(host->mmc), - "%s: clock_divide %d, clock_rate %d, ssp_clk %d, rate_actual %d, rate_requested %d\n", - __func__, clock_divide, clock_rate, ssp_clk, ssp_sck, rate); + "%s: div1 %d, div2 %d, ssp %d, bit %d, rate %d\n", + __func__, div1, div2, ssp_rate, bit_rate, rate); } static void mxs_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) diff --git a/trunk/drivers/mmc/host/omap_hsmmc.c b/trunk/drivers/mmc/host/omap_hsmmc.c index 21e4a799df48..dedf3dab8a3b 100644 --- a/trunk/drivers/mmc/host/omap_hsmmc.c +++ b/trunk/drivers/mmc/host/omap_hsmmc.c @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -34,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -118,13 +116,15 @@ #define OMAP_MMC4_DEVID 3 #define OMAP_MMC5_DEVID 4 -#define MMC_AUTOSUSPEND_DELAY 100 #define MMC_TIMEOUT_MS 20 #define OMAP_MMC_MASTER_CLOCK 96000000 -#define OMAP_MMC_MIN_CLOCK 400000 -#define OMAP_MMC_MAX_CLOCK 52000000 #define DRIVER_NAME "omap_hsmmc" +/* Timeouts for entering power saving states on inactivity, msec */ +#define OMAP_MMC_DISABLED_TIMEOUT 100 +#define OMAP_MMC_SLEEP_TIMEOUT 1000 +#define OMAP_MMC_OFF_TIMEOUT 8000 + /* * One controller can have multiple slots, like on some omap boards using * omap.c controller driver. Luckily this is not currently done on any known @@ -141,11 +141,6 @@ #define OMAP_HSMMC_WRITE(base, reg, val) \ __raw_writel((val), (base) + OMAP_HSMMC_##reg) -struct omap_hsmmc_next { - unsigned int dma_len; - s32 cookie; -}; - struct omap_hsmmc_host { struct device *dev; struct mmc_host *mmc; @@ -153,6 +148,7 @@ struct omap_hsmmc_host { struct mmc_command *cmd; struct mmc_data *data; struct clk *fclk; + struct clk *iclk; struct clk *dbclk; /* * vcc == configured supply @@ -188,7 +184,6 @@ struct omap_hsmmc_host { int reqs_blocked; int use_reg; int req_in_progress; - struct omap_hsmmc_next next_data; struct omap_mmc_platform_data *pdata; }; @@ -552,15 +547,6 @@ static void omap_hsmmc_gpio_free(struct omap_mmc_platform_data *pdata) gpio_free(pdata->slots[0].switch_pin); } -/* - * Start clock to the card - */ -static void omap_hsmmc_start_clock(struct omap_hsmmc_host *host) -{ - OMAP_HSMMC_WRITE(host->base, SYSCTL, - OMAP_HSMMC_READ(host->base, SYSCTL) | CEN); -} - /* * Stop clock to the card */ @@ -598,81 +584,6 @@ static void omap_hsmmc_disable_irq(struct omap_hsmmc_host *host) OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); } -/* Calculate divisor for the given clock frequency */ -static u16 calc_divisor(struct mmc_ios *ios) -{ - u16 dsor = 0; - - if (ios->clock) { - dsor = DIV_ROUND_UP(OMAP_MMC_MASTER_CLOCK, ios->clock); - if (dsor > 250) - dsor = 250; - } - - return dsor; -} - -static void omap_hsmmc_set_clock(struct omap_hsmmc_host *host) -{ - struct mmc_ios *ios = &host->mmc->ios; - unsigned long regval; - unsigned long timeout; - - dev_dbg(mmc_dev(host->mmc), "Set clock to %uHz\n", ios->clock); - - omap_hsmmc_stop_clock(host); - - regval = OMAP_HSMMC_READ(host->base, SYSCTL); - regval = regval & ~(CLKD_MASK | DTO_MASK); - regval = regval | (calc_divisor(ios) << 6) | (DTO << 16); - OMAP_HSMMC_WRITE(host->base, SYSCTL, regval); - OMAP_HSMMC_WRITE(host->base, SYSCTL, - OMAP_HSMMC_READ(host->base, SYSCTL) | ICE); - - /* Wait till the ICS bit is set */ - timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS); - while ((OMAP_HSMMC_READ(host->base, SYSCTL) & ICS) != ICS - && time_before(jiffies, timeout)) - cpu_relax(); - - omap_hsmmc_start_clock(host); -} - -static void omap_hsmmc_set_bus_width(struct omap_hsmmc_host *host) -{ - struct mmc_ios *ios = &host->mmc->ios; - u32 con; - - con = OMAP_HSMMC_READ(host->base, CON); - switch (ios->bus_width) { - case MMC_BUS_WIDTH_8: - OMAP_HSMMC_WRITE(host->base, CON, con | DW8); - break; - case MMC_BUS_WIDTH_4: - OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8); - OMAP_HSMMC_WRITE(host->base, HCTL, - OMAP_HSMMC_READ(host->base, HCTL) | FOUR_BIT); - break; - case MMC_BUS_WIDTH_1: - OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8); - OMAP_HSMMC_WRITE(host->base, HCTL, - OMAP_HSMMC_READ(host->base, HCTL) & ~FOUR_BIT); - break; - } -} - -static void omap_hsmmc_set_bus_mode(struct omap_hsmmc_host *host) -{ - struct mmc_ios *ios = &host->mmc->ios; - u32 con; - - con = OMAP_HSMMC_READ(host->base, CON); - if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN) - OMAP_HSMMC_WRITE(host->base, CON, con | OD); - else - OMAP_HSMMC_WRITE(host->base, CON, con & ~OD); -} - #ifdef CONFIG_PM /* @@ -684,7 +595,8 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host) struct mmc_ios *ios = &host->mmc->ios; struct omap_mmc_platform_data *pdata = host->pdata; int context_loss = 0; - u32 hctl, capa; + u32 hctl, capa, con; + u16 dsor = 0; unsigned long timeout; if (pdata->get_context_loss_count) { @@ -746,12 +658,54 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host) if (host->power_mode == MMC_POWER_OFF) goto out; - omap_hsmmc_set_bus_width(host); + con = OMAP_HSMMC_READ(host->base, CON); + switch (ios->bus_width) { + case MMC_BUS_WIDTH_8: + OMAP_HSMMC_WRITE(host->base, CON, con | DW8); + break; + case MMC_BUS_WIDTH_4: + OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8); + OMAP_HSMMC_WRITE(host->base, HCTL, + OMAP_HSMMC_READ(host->base, HCTL) | FOUR_BIT); + break; + case MMC_BUS_WIDTH_1: + OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8); + OMAP_HSMMC_WRITE(host->base, HCTL, + OMAP_HSMMC_READ(host->base, HCTL) & ~FOUR_BIT); + break; + } + + if (ios->clock) { + dsor = OMAP_MMC_MASTER_CLOCK / ios->clock; + if (dsor < 1) + dsor = 1; + + if (OMAP_MMC_MASTER_CLOCK / dsor > ios->clock) + dsor++; + + if (dsor > 250) + dsor = 250; + } + + OMAP_HSMMC_WRITE(host->base, SYSCTL, + OMAP_HSMMC_READ(host->base, SYSCTL) & ~CEN); + OMAP_HSMMC_WRITE(host->base, SYSCTL, (dsor << 6) | (DTO << 16)); + OMAP_HSMMC_WRITE(host->base, SYSCTL, + OMAP_HSMMC_READ(host->base, SYSCTL) | ICE); - omap_hsmmc_set_clock(host); + timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS); + while ((OMAP_HSMMC_READ(host->base, SYSCTL) & ICS) != ICS + && time_before(jiffies, timeout)) + ; - omap_hsmmc_set_bus_mode(host); + OMAP_HSMMC_WRITE(host->base, SYSCTL, + OMAP_HSMMC_READ(host->base, SYSCTL) | CEN); + con = OMAP_HSMMC_READ(host->base, CON); + if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN) + OMAP_HSMMC_WRITE(host->base, CON, con | OD); + else + OMAP_HSMMC_WRITE(host->base, CON, con & ~OD); out: host->context_loss = context_loss; @@ -1019,14 +973,14 @@ static void omap_hsmmc_dma_cleanup(struct omap_hsmmc_host *host, int errno) * Readable error output */ #ifdef CONFIG_MMC_DEBUG -static void omap_hsmmc_dbg_report_irq(struct omap_hsmmc_host *host, u32 status) +static void omap_hsmmc_report_irq(struct omap_hsmmc_host *host, u32 status) { /* --- means reserved bit without definition at documentation */ static const char *omap_hsmmc_status_bits[] = { - "CC" , "TC" , "BGE", "---", "BWR" , "BRR" , "---" , "---" , - "CIRQ", "OBI" , "---", "---", "---" , "---" , "---" , "ERRI", - "CTO" , "CCRC", "CEB", "CIE", "DTO" , "DCRC", "DEB" , "---" , - "ACE" , "---" , "---", "---", "CERR", "BADA", "---" , "---" + "CC", "TC", "BGE", "---", "BWR", "BRR", "---", "---", "CIRQ", + "OBI", "---", "---", "---", "---", "---", "ERRI", "CTO", "CCRC", + "CEB", "CIE", "DTO", "DCRC", "DEB", "---", "ACE", "---", + "---", "---", "---", "CERR", "CERR", "BADA", "---", "---", "---" }; char res[256]; char *buf = res; @@ -1043,11 +997,6 @@ static void omap_hsmmc_dbg_report_irq(struct omap_hsmmc_host *host, u32 status) dev_dbg(mmc_dev(host->mmc), "%s\n", res); } -#else -static inline void omap_hsmmc_dbg_report_irq(struct omap_hsmmc_host *host, - u32 status) -{ -} #endif /* CONFIG_MMC_DEBUG */ /* @@ -1106,7 +1055,9 @@ static void omap_hsmmc_do_irq(struct omap_hsmmc_host *host, int status) dev_dbg(mmc_dev(host->mmc), "IRQ Status is %x\n", status); if (status & ERR) { - omap_hsmmc_dbg_report_irq(host, status); +#ifdef CONFIG_MMC_DEBUG + omap_hsmmc_report_irq(host, status); +#endif if ((status & CMD_TIMEOUT) || (status & CMD_CRC)) { if (host->cmd) { @@ -1204,7 +1155,8 @@ static int omap_hsmmc_switch_opcond(struct omap_hsmmc_host *host, int vdd) int ret; /* Disable the clocks */ - pm_runtime_put_sync(host->dev); + clk_disable(host->fclk); + clk_disable(host->iclk); if (host->got_dbclk) clk_disable(host->dbclk); @@ -1215,7 +1167,8 @@ static int omap_hsmmc_switch_opcond(struct omap_hsmmc_host *host, int vdd) if (!ret) ret = mmc_slot(host).set_power(host->dev, host->slot_id, 1, vdd); - pm_runtime_get_sync(host->dev); + clk_enable(host->iclk); + clk_enable(host->fclk); if (host->got_dbclk) clk_enable(host->dbclk); @@ -1369,7 +1322,7 @@ static void omap_hsmmc_config_dma_params(struct omap_hsmmc_host *host, static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data) { struct omap_hsmmc_host *host = cb_data; - struct mmc_data *data; + struct mmc_data *data = host->mrq->data; int dma_ch, req_in_progress; if (!(ch_status & OMAP_DMA_BLOCK_IRQ)) { @@ -1384,7 +1337,6 @@ static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data) return; } - data = host->mrq->data; host->dma_sg_idx++; if (host->dma_sg_idx < host->dma_len) { /* Fire up the next transfer. */ @@ -1394,9 +1346,8 @@ static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data) return; } - if (!data->host_cookie) - dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, - omap_hsmmc_get_dma_dir(host, data)); + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + omap_hsmmc_get_dma_dir(host, data)); req_in_progress = host->req_in_progress; dma_ch = host->dma_ch; @@ -1414,45 +1365,6 @@ static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data) } } -static int omap_hsmmc_pre_dma_transfer(struct omap_hsmmc_host *host, - struct mmc_data *data, - struct omap_hsmmc_next *next) -{ - int dma_len; - - if (!next && data->host_cookie && - data->host_cookie != host->next_data.cookie) { - printk(KERN_WARNING "[%s] invalid cookie: data->host_cookie %d" - " host->next_data.cookie %d\n", - __func__, data->host_cookie, host->next_data.cookie); - data->host_cookie = 0; - } - - /* Check if next job is already prepared */ - if (next || - (!next && data->host_cookie != host->next_data.cookie)) { - dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg, - data->sg_len, - omap_hsmmc_get_dma_dir(host, data)); - - } else { - dma_len = host->next_data.dma_len; - host->next_data.dma_len = 0; - } - - - if (dma_len == 0) - return -EINVAL; - - if (next) { - next->dma_len = dma_len; - data->host_cookie = ++next->cookie < 0 ? 1 : next->cookie; - } else - host->dma_len = dma_len; - - return 0; -} - /* * Routine to configure and start DMA for the MMC card */ @@ -1486,10 +1398,9 @@ static int omap_hsmmc_start_dma_transfer(struct omap_hsmmc_host *host, mmc_hostname(host->mmc), ret); return ret; } - ret = omap_hsmmc_pre_dma_transfer(host, data, NULL); - if (ret) - return ret; + host->dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg, + data->sg_len, omap_hsmmc_get_dma_dir(host, data)); host->dma_ch = dma_ch; host->dma_sg_idx = 0; @@ -1569,35 +1480,6 @@ omap_hsmmc_prepare_data(struct omap_hsmmc_host *host, struct mmc_request *req) return 0; } -static void omap_hsmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq, - int err) -{ - struct omap_hsmmc_host *host = mmc_priv(mmc); - struct mmc_data *data = mrq->data; - - if (host->use_dma) { - dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, - omap_hsmmc_get_dma_dir(host, data)); - data->host_cookie = 0; - } -} - -static void omap_hsmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq, - bool is_first_req) -{ - struct omap_hsmmc_host *host = mmc_priv(mmc); - - if (mrq->data->host_cookie) { - mrq->data->host_cookie = 0; - return ; - } - - if (host->use_dma) - if (omap_hsmmc_pre_dma_transfer(host, mrq->data, - &host->next_data)) - mrq->data->host_cookie = 0; -} - /* * Request function. for read/write operation */ @@ -1646,9 +1528,13 @@ static void omap_hsmmc_request(struct mmc_host *mmc, struct mmc_request *req) static void omap_hsmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct omap_hsmmc_host *host = mmc_priv(mmc); + u16 dsor = 0; + unsigned long regval; + unsigned long timeout; + u32 con; int do_send_init_stream = 0; - pm_runtime_get_sync(host->dev); + mmc_host_enable(host->mmc); if (ios->power_mode != host->power_mode) { switch (ios->power_mode) { @@ -1671,7 +1557,22 @@ static void omap_hsmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) /* FIXME: set registers based only on changes to ios */ - omap_hsmmc_set_bus_width(host); + con = OMAP_HSMMC_READ(host->base, CON); + switch (mmc->ios.bus_width) { + case MMC_BUS_WIDTH_8: + OMAP_HSMMC_WRITE(host->base, CON, con | DW8); + break; + case MMC_BUS_WIDTH_4: + OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8); + OMAP_HSMMC_WRITE(host->base, HCTL, + OMAP_HSMMC_READ(host->base, HCTL) | FOUR_BIT); + break; + case MMC_BUS_WIDTH_1: + OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8); + OMAP_HSMMC_WRITE(host->base, HCTL, + OMAP_HSMMC_READ(host->base, HCTL) & ~FOUR_BIT); + break; + } if (host->pdata->controller_flags & OMAP_HSMMC_SUPPORTS_DUAL_VOLT) { /* Only MMC1 can interface at 3V without some flavor @@ -1691,14 +1592,47 @@ static void omap_hsmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } } - omap_hsmmc_set_clock(host); + if (ios->clock) { + dsor = OMAP_MMC_MASTER_CLOCK / ios->clock; + if (dsor < 1) + dsor = 1; + + if (OMAP_MMC_MASTER_CLOCK / dsor > ios->clock) + dsor++; + + if (dsor > 250) + dsor = 250; + } + omap_hsmmc_stop_clock(host); + regval = OMAP_HSMMC_READ(host->base, SYSCTL); + regval = regval & ~(CLKD_MASK); + regval = regval | (dsor << 6) | (DTO << 16); + OMAP_HSMMC_WRITE(host->base, SYSCTL, regval); + OMAP_HSMMC_WRITE(host->base, SYSCTL, + OMAP_HSMMC_READ(host->base, SYSCTL) | ICE); + + /* Wait till the ICS bit is set */ + timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS); + while ((OMAP_HSMMC_READ(host->base, SYSCTL) & ICS) != ICS + && time_before(jiffies, timeout)) + msleep(1); + + OMAP_HSMMC_WRITE(host->base, SYSCTL, + OMAP_HSMMC_READ(host->base, SYSCTL) | CEN); if (do_send_init_stream) send_init_stream(host); - omap_hsmmc_set_bus_mode(host); + con = OMAP_HSMMC_READ(host->base, CON); + if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN) + OMAP_HSMMC_WRITE(host->base, CON, con | OD); + else + OMAP_HSMMC_WRITE(host->base, CON, con & ~OD); - pm_runtime_put_autosuspend(host->dev); + if (host->power_mode == MMC_POWER_OFF) + mmc_host_disable(host->mmc); + else + mmc_host_lazy_disable(host->mmc); } static int omap_hsmmc_get_cd(struct mmc_host *mmc) @@ -1754,12 +1688,230 @@ static void omap_hsmmc_conf_bus_power(struct omap_hsmmc_host *host) set_sd_bus_power(host); } -static int omap_hsmmc_enable_fclk(struct mmc_host *mmc) +/* + * Dynamic power saving handling, FSM: + * ENABLED -> DISABLED -> CARDSLEEP / REGSLEEP -> OFF + * ^___________| | | + * |______________________|______________________| + * + * ENABLED: mmc host is fully functional + * DISABLED: fclk is off + * CARDSLEEP: fclk is off, card is asleep, voltage regulator is asleep + * REGSLEEP: fclk is off, voltage regulator is asleep + * OFF: fclk is off, voltage regulator is off + * + * Transition handlers return the timeout for the next state transition + * or negative error. + */ + +enum {ENABLED = 0, DISABLED, CARDSLEEP, REGSLEEP, OFF}; + +/* Handler for [ENABLED -> DISABLED] transition */ +static int omap_hsmmc_enabled_to_disabled(struct omap_hsmmc_host *host) +{ + omap_hsmmc_context_save(host); + clk_disable(host->fclk); + host->dpm_state = DISABLED; + + dev_dbg(mmc_dev(host->mmc), "ENABLED -> DISABLED\n"); + + if (host->power_mode == MMC_POWER_OFF) + return 0; + + return OMAP_MMC_SLEEP_TIMEOUT; +} + +/* Handler for [DISABLED -> REGSLEEP / CARDSLEEP] transition */ +static int omap_hsmmc_disabled_to_sleep(struct omap_hsmmc_host *host) +{ + int err, new_state; + + if (!mmc_try_claim_host(host->mmc)) + return 0; + + clk_enable(host->fclk); + omap_hsmmc_context_restore(host); + if (mmc_card_can_sleep(host->mmc)) { + err = mmc_card_sleep(host->mmc); + if (err < 0) { + clk_disable(host->fclk); + mmc_release_host(host->mmc); + return err; + } + new_state = CARDSLEEP; + } else { + new_state = REGSLEEP; + } + if (mmc_slot(host).set_sleep) + mmc_slot(host).set_sleep(host->dev, host->slot_id, 1, 0, + new_state == CARDSLEEP); + /* FIXME: turn off bus power and perhaps interrupts too */ + clk_disable(host->fclk); + host->dpm_state = new_state; + + mmc_release_host(host->mmc); + + dev_dbg(mmc_dev(host->mmc), "DISABLED -> %s\n", + host->dpm_state == CARDSLEEP ? "CARDSLEEP" : "REGSLEEP"); + + if (mmc_slot(host).no_off) + return 0; + + if ((host->mmc->caps & MMC_CAP_NONREMOVABLE) || + mmc_slot(host).card_detect || + (mmc_slot(host).get_cover_state && + mmc_slot(host).get_cover_state(host->dev, host->slot_id))) + return OMAP_MMC_OFF_TIMEOUT; + + return 0; +} + +/* Handler for [REGSLEEP / CARDSLEEP -> OFF] transition */ +static int omap_hsmmc_sleep_to_off(struct omap_hsmmc_host *host) +{ + if (!mmc_try_claim_host(host->mmc)) + return 0; + + if (mmc_slot(host).no_off) + return 0; + + if (!((host->mmc->caps & MMC_CAP_NONREMOVABLE) || + mmc_slot(host).card_detect || + (mmc_slot(host).get_cover_state && + mmc_slot(host).get_cover_state(host->dev, host->slot_id)))) { + mmc_release_host(host->mmc); + return 0; + } + + mmc_slot(host).set_power(host->dev, host->slot_id, 0, 0); + host->vdd = 0; + host->power_mode = MMC_POWER_OFF; + + dev_dbg(mmc_dev(host->mmc), "%s -> OFF\n", + host->dpm_state == CARDSLEEP ? "CARDSLEEP" : "REGSLEEP"); + + host->dpm_state = OFF; + + mmc_release_host(host->mmc); + + return 0; +} + +/* Handler for [DISABLED -> ENABLED] transition */ +static int omap_hsmmc_disabled_to_enabled(struct omap_hsmmc_host *host) +{ + int err; + + err = clk_enable(host->fclk); + if (err < 0) + return err; + + omap_hsmmc_context_restore(host); + host->dpm_state = ENABLED; + + dev_dbg(mmc_dev(host->mmc), "DISABLED -> ENABLED\n"); + + return 0; +} + +/* Handler for [SLEEP -> ENABLED] transition */ +static int omap_hsmmc_sleep_to_enabled(struct omap_hsmmc_host *host) +{ + if (!mmc_try_claim_host(host->mmc)) + return 0; + + clk_enable(host->fclk); + omap_hsmmc_context_restore(host); + if (mmc_slot(host).set_sleep) + mmc_slot(host).set_sleep(host->dev, host->slot_id, 0, + host->vdd, host->dpm_state == CARDSLEEP); + if (mmc_card_can_sleep(host->mmc)) + mmc_card_awake(host->mmc); + + dev_dbg(mmc_dev(host->mmc), "%s -> ENABLED\n", + host->dpm_state == CARDSLEEP ? "CARDSLEEP" : "REGSLEEP"); + + host->dpm_state = ENABLED; + + mmc_release_host(host->mmc); + + return 0; +} + +/* Handler for [OFF -> ENABLED] transition */ +static int omap_hsmmc_off_to_enabled(struct omap_hsmmc_host *host) +{ + clk_enable(host->fclk); + + omap_hsmmc_context_restore(host); + omap_hsmmc_conf_bus_power(host); + mmc_power_restore_host(host->mmc); + + host->dpm_state = ENABLED; + + dev_dbg(mmc_dev(host->mmc), "OFF -> ENABLED\n"); + + return 0; +} + +/* + * Bring MMC host to ENABLED from any other PM state. + */ +static int omap_hsmmc_enable(struct mmc_host *mmc) +{ + struct omap_hsmmc_host *host = mmc_priv(mmc); + + switch (host->dpm_state) { + case DISABLED: + return omap_hsmmc_disabled_to_enabled(host); + case CARDSLEEP: + case REGSLEEP: + return omap_hsmmc_sleep_to_enabled(host); + case OFF: + return omap_hsmmc_off_to_enabled(host); + default: + dev_dbg(mmc_dev(host->mmc), "UNKNOWN state\n"); + return -EINVAL; + } +} + +/* + * Bring MMC host in PM state (one level deeper). + */ +static int omap_hsmmc_disable(struct mmc_host *mmc, int lazy) { struct omap_hsmmc_host *host = mmc_priv(mmc); - pm_runtime_get_sync(host->dev); + switch (host->dpm_state) { + case ENABLED: { + int delay; + + delay = omap_hsmmc_enabled_to_disabled(host); + if (lazy || delay < 0) + return delay; + return 0; + } + case DISABLED: + return omap_hsmmc_disabled_to_sleep(host); + case CARDSLEEP: + case REGSLEEP: + return omap_hsmmc_sleep_to_off(host); + default: + dev_dbg(mmc_dev(host->mmc), "UNKNOWN state\n"); + return -EINVAL; + } +} + +static int omap_hsmmc_enable_fclk(struct mmc_host *mmc) +{ + struct omap_hsmmc_host *host = mmc_priv(mmc); + int err; + err = clk_enable(host->fclk); + if (err) + return err; + dev_dbg(mmc_dev(host->mmc), "mmc_fclk: enabled\n"); + omap_hsmmc_context_restore(host); return 0; } @@ -1767,17 +1919,26 @@ static int omap_hsmmc_disable_fclk(struct mmc_host *mmc, int lazy) { struct omap_hsmmc_host *host = mmc_priv(mmc); - pm_runtime_mark_last_busy(host->dev); - pm_runtime_put_autosuspend(host->dev); - + omap_hsmmc_context_save(host); + clk_disable(host->fclk); + dev_dbg(mmc_dev(host->mmc), "mmc_fclk: disabled\n"); return 0; } static const struct mmc_host_ops omap_hsmmc_ops = { .enable = omap_hsmmc_enable_fclk, .disable = omap_hsmmc_disable_fclk, - .post_req = omap_hsmmc_post_req, - .pre_req = omap_hsmmc_pre_req, + .request = omap_hsmmc_request, + .set_ios = omap_hsmmc_set_ios, + .get_cd = omap_hsmmc_get_cd, + .get_ro = omap_hsmmc_get_ro, + .init_card = omap_hsmmc_init_card, + /* NYET -- enable_sdio_irq */ +}; + +static const struct mmc_host_ops omap_hsmmc_ps_ops = { + .enable = omap_hsmmc_enable, + .disable = omap_hsmmc_disable, .request = omap_hsmmc_request, .set_ios = omap_hsmmc_set_ios, .get_cd = omap_hsmmc_get_cd, @@ -1807,12 +1968,15 @@ static int omap_hsmmc_regs_show(struct seq_file *s, void *data) host->dpm_state, mmc->nesting_cnt, host->context_loss, context_loss); - if (host->suspended) { + if (host->suspended || host->dpm_state == OFF) { seq_printf(s, "host suspended, can't read registers\n"); return 0; } - pm_runtime_get_sync(host->dev); + if (clk_enable(host->fclk) != 0) { + seq_printf(s, "can't read the regs\n"); + return 0; + } seq_printf(s, "SYSCONFIG:\t0x%08x\n", OMAP_HSMMC_READ(host->base, SYSCONFIG)); @@ -1829,8 +1993,7 @@ static int omap_hsmmc_regs_show(struct seq_file *s, void *data) seq_printf(s, "CAPA:\t\t0x%08x\n", OMAP_HSMMC_READ(host->base, CAPA)); - pm_runtime_mark_last_busy(host->dev); - pm_runtime_put_autosuspend(host->dev); + clk_disable(host->fclk); return 0; } @@ -1914,12 +2077,14 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev) host->mapbase = res->start; host->base = ioremap(host->mapbase, SZ_4K); host->power_mode = MMC_POWER_OFF; - host->next_data.cookie = 1; platform_set_drvdata(pdev, host); INIT_WORK(&host->mmc_carddetect_work, omap_hsmmc_detect); - mmc->ops = &omap_hsmmc_ops; + if (mmc_slot(host).power_saving) + mmc->ops = &omap_hsmmc_ps_ops; + else + mmc->ops = &omap_hsmmc_ops; /* * If regulator_disable can only put vcc_aux to sleep then there is @@ -1928,26 +2093,44 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev) if (mmc_slot(host).vcc_aux_disable_is_sleep) mmc_slot(host).no_off = 1; - mmc->f_min = OMAP_MMC_MIN_CLOCK; - mmc->f_max = OMAP_MMC_MAX_CLOCK; + mmc->f_min = 400000; + mmc->f_max = 52000000; spin_lock_init(&host->irq_lock); + host->iclk = clk_get(&pdev->dev, "ick"); + if (IS_ERR(host->iclk)) { + ret = PTR_ERR(host->iclk); + host->iclk = NULL; + goto err1; + } host->fclk = clk_get(&pdev->dev, "fck"); if (IS_ERR(host->fclk)) { ret = PTR_ERR(host->fclk); host->fclk = NULL; + clk_put(host->iclk); goto err1; } omap_hsmmc_context_save(host); mmc->caps |= MMC_CAP_DISABLE; + mmc_set_disable_delay(mmc, OMAP_MMC_DISABLED_TIMEOUT); + /* we start off in DISABLED state */ + host->dpm_state = DISABLED; - pm_runtime_enable(host->dev); - pm_runtime_get_sync(host->dev); - pm_runtime_set_autosuspend_delay(host->dev, MMC_AUTOSUSPEND_DELAY); - pm_runtime_use_autosuspend(host->dev); + if (clk_enable(host->iclk) != 0) { + clk_put(host->iclk); + clk_put(host->fclk); + goto err1; + } + + if (mmc_host_enable(host->mmc) != 0) { + clk_disable(host->iclk); + clk_put(host->iclk); + clk_put(host->fclk); + goto err1; + } if (cpu_is_omap2430()) { host->dbclk = clk_get(&pdev->dev, "mmchsdb_fck"); @@ -2057,6 +2240,8 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev) omap_hsmmc_disable_irq(host); + mmc_host_lazy_disable(host->mmc); + omap_hsmmc_protect_card(host); mmc_add_host(mmc); @@ -2074,8 +2259,6 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev) } omap_hsmmc_debugfs(mmc); - pm_runtime_mark_last_busy(host->dev); - pm_runtime_put_autosuspend(host->dev); return 0; @@ -2091,9 +2274,10 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev) err_irq_cd_init: free_irq(host->irq, host); err_irq: - pm_runtime_mark_last_busy(host->dev); - pm_runtime_put_autosuspend(host->dev); + mmc_host_disable(host->mmc); + clk_disable(host->iclk); clk_put(host->fclk); + clk_put(host->iclk); if (host->got_dbclk) { clk_disable(host->dbclk); clk_put(host->dbclk); @@ -2115,7 +2299,7 @@ static int omap_hsmmc_remove(struct platform_device *pdev) struct resource *res; if (host) { - pm_runtime_get_sync(host->dev); + mmc_host_enable(host->mmc); mmc_remove_host(host->mmc); if (host->use_reg) omap_hsmmc_reg_put(host); @@ -2126,9 +2310,10 @@ static int omap_hsmmc_remove(struct platform_device *pdev) free_irq(mmc_slot(host).card_detect_irq, host); flush_work_sync(&host->mmc_carddetect_work); - pm_runtime_put_sync(host->dev); - pm_runtime_disable(host->dev); + mmc_host_disable(host->mmc); + clk_disable(host->iclk); clk_put(host->fclk); + clk_put(host->iclk); if (host->got_dbclk) { clk_disable(host->dbclk); clk_put(host->dbclk); @@ -2158,7 +2343,6 @@ static int omap_hsmmc_suspend(struct device *dev) return 0; if (host) { - pm_runtime_get_sync(host->dev); host->suspended = 1; if (host->pdata->suspend) { ret = host->pdata->suspend(&pdev->dev, @@ -2173,11 +2357,13 @@ static int omap_hsmmc_suspend(struct device *dev) } cancel_work_sync(&host->mmc_carddetect_work); ret = mmc_suspend_host(host->mmc); - + mmc_host_enable(host->mmc); if (ret == 0) { omap_hsmmc_disable_irq(host); OMAP_HSMMC_WRITE(host->base, HCTL, OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP); + mmc_host_disable(host->mmc); + clk_disable(host->iclk); if (host->got_dbclk) clk_disable(host->dbclk); } else { @@ -2189,8 +2375,9 @@ static int omap_hsmmc_suspend(struct device *dev) dev_dbg(mmc_dev(host->mmc), "Unmask interrupt failed\n"); } + mmc_host_disable(host->mmc); } - pm_runtime_put_sync(host->dev); + } return ret; } @@ -2206,7 +2393,14 @@ static int omap_hsmmc_resume(struct device *dev) return 0; if (host) { - pm_runtime_get_sync(host->dev); + ret = clk_enable(host->iclk); + if (ret) + goto clk_en_err; + + if (mmc_host_enable(host->mmc) != 0) { + clk_disable(host->iclk); + goto clk_en_err; + } if (host->got_dbclk) clk_enable(host->dbclk); @@ -2227,12 +2421,15 @@ static int omap_hsmmc_resume(struct device *dev) if (ret == 0) host->suspended = 0; - pm_runtime_mark_last_busy(host->dev); - pm_runtime_put_autosuspend(host->dev); + mmc_host_lazy_disable(host->mmc); } return ret; +clk_en_err: + dev_dbg(mmc_dev(host->mmc), + "Failed to enable MMC clocks during resume\n"); + return ret; } #else @@ -2240,33 +2437,9 @@ static int omap_hsmmc_resume(struct device *dev) #define omap_hsmmc_resume NULL #endif -static int omap_hsmmc_runtime_suspend(struct device *dev) -{ - struct omap_hsmmc_host *host; - - host = platform_get_drvdata(to_platform_device(dev)); - omap_hsmmc_context_save(host); - dev_dbg(mmc_dev(host->mmc), "disabled\n"); - - return 0; -} - -static int omap_hsmmc_runtime_resume(struct device *dev) -{ - struct omap_hsmmc_host *host; - - host = platform_get_drvdata(to_platform_device(dev)); - omap_hsmmc_context_restore(host); - dev_dbg(mmc_dev(host->mmc), "enabled\n"); - - return 0; -} - static struct dev_pm_ops omap_hsmmc_dev_pm_ops = { .suspend = omap_hsmmc_suspend, .resume = omap_hsmmc_resume, - .runtime_suspend = omap_hsmmc_runtime_suspend, - .runtime_resume = omap_hsmmc_runtime_resume, }; static struct platform_driver omap_hsmmc_driver = { diff --git a/trunk/drivers/mmc/host/sdhci-cns3xxx.c b/trunk/drivers/mmc/host/sdhci-cns3xxx.c index 4b920b7621cf..9ebd1d7759dc 100644 --- a/trunk/drivers/mmc/host/sdhci-cns3xxx.c +++ b/trunk/drivers/mmc/host/sdhci-cns3xxx.c @@ -15,7 +15,9 @@ #include #include #include +#include #include +#include "sdhci.h" #include "sdhci-pltfm.h" static unsigned int sdhci_cns3xxx_get_max_clk(struct sdhci_host *host) @@ -84,7 +86,7 @@ static struct sdhci_ops sdhci_cns3xxx_ops = { .set_clock = sdhci_cns3xxx_set_clock, }; -static struct sdhci_pltfm_data sdhci_cns3xxx_pdata = { +struct sdhci_pltfm_data sdhci_cns3xxx_pdata = { .ops = &sdhci_cns3xxx_ops, .quirks = SDHCI_QUIRK_BROKEN_DMA | SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | @@ -93,43 +95,3 @@ static struct sdhci_pltfm_data sdhci_cns3xxx_pdata = { SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | SDHCI_QUIRK_NONSTANDARD_CLOCK, }; - -static int __devinit sdhci_cns3xxx_probe(struct platform_device *pdev) -{ - return sdhci_pltfm_register(pdev, &sdhci_cns3xxx_pdata); -} - -static int __devexit sdhci_cns3xxx_remove(struct platform_device *pdev) -{ - return sdhci_pltfm_unregister(pdev); -} - -static struct platform_driver sdhci_cns3xxx_driver = { - .driver = { - .name = "sdhci-cns3xxx", - .owner = THIS_MODULE, - }, - .probe = sdhci_cns3xxx_probe, - .remove = __devexit_p(sdhci_cns3xxx_remove), -#ifdef CONFIG_PM - .suspend = sdhci_pltfm_suspend, - .resume = sdhci_pltfm_resume, -#endif -}; - -static int __init sdhci_cns3xxx_init(void) -{ - return platform_driver_register(&sdhci_cns3xxx_driver); -} -module_init(sdhci_cns3xxx_init); - -static void __exit sdhci_cns3xxx_exit(void) -{ - platform_driver_unregister(&sdhci_cns3xxx_driver); -} -module_exit(sdhci_cns3xxx_exit); - -MODULE_DESCRIPTION("SDHCI driver for CNS3xxx"); -MODULE_AUTHOR("Scott Shu, " - "Anton Vorontsov "); -MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/mmc/host/sdhci-dove.c b/trunk/drivers/mmc/host/sdhci-dove.c index f2d29dca4420..2aeef4ffed8c 100644 --- a/trunk/drivers/mmc/host/sdhci-dove.c +++ b/trunk/drivers/mmc/host/sdhci-dove.c @@ -22,6 +22,7 @@ #include #include +#include "sdhci.h" #include "sdhci-pltfm.h" static u16 sdhci_dove_readw(struct sdhci_host *host, int reg) @@ -60,50 +61,10 @@ static struct sdhci_ops sdhci_dove_ops = { .read_l = sdhci_dove_readl, }; -static struct sdhci_pltfm_data sdhci_dove_pdata = { +struct sdhci_pltfm_data sdhci_dove_pdata = { .ops = &sdhci_dove_ops, .quirks = SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER | SDHCI_QUIRK_NO_BUSY_IRQ | SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | SDHCI_QUIRK_FORCE_DMA, }; - -static int __devinit sdhci_dove_probe(struct platform_device *pdev) -{ - return sdhci_pltfm_register(pdev, &sdhci_dove_pdata); -} - -static int __devexit sdhci_dove_remove(struct platform_device *pdev) -{ - return sdhci_pltfm_unregister(pdev); -} - -static struct platform_driver sdhci_dove_driver = { - .driver = { - .name = "sdhci-dove", - .owner = THIS_MODULE, - }, - .probe = sdhci_dove_probe, - .remove = __devexit_p(sdhci_dove_remove), -#ifdef CONFIG_PM - .suspend = sdhci_pltfm_suspend, - .resume = sdhci_pltfm_resume, -#endif -}; - -static int __init sdhci_dove_init(void) -{ - return platform_driver_register(&sdhci_dove_driver); -} -module_init(sdhci_dove_init); - -static void __exit sdhci_dove_exit(void) -{ - platform_driver_unregister(&sdhci_dove_driver); -} -module_exit(sdhci_dove_exit); - -MODULE_DESCRIPTION("SDHCI driver for Dove"); -MODULE_AUTHOR("Saeed Bishara , " - "Mike Rapoport "); -MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/mmc/host/sdhci-esdhc-imx.c b/trunk/drivers/mmc/host/sdhci-esdhc-imx.c index 710b706f4fcf..a19967d0bfc4 100644 --- a/trunk/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/trunk/drivers/mmc/host/sdhci-esdhc-imx.c @@ -18,10 +18,12 @@ #include #include #include +#include #include #include #include #include +#include "sdhci.h" #include "sdhci-pltfm.h" #include "sdhci-esdhc.h" @@ -29,7 +31,7 @@ #define SDHCI_VENDOR_SPEC 0xC0 #define SDHCI_VENDOR_SPEC_SDIO_QUIRK 0x00000002 -#define ESDHC_FLAG_GPIO_FOR_CD (1 << 0) +#define ESDHC_FLAG_GPIO_FOR_CD_WP (1 << 0) /* * The CMDTYPE of the CMD register (offset 0xE) should be set to * "11" when the STOP CMD12 is issued on imx53 to abort one @@ -65,14 +67,14 @@ static u32 esdhc_readl_le(struct sdhci_host *host, int reg) u32 val = readl(host->ioaddr + reg); if (unlikely((reg == SDHCI_PRESENT_STATE) - && (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD))) { + && (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD_WP))) { struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data; if (boarddata && gpio_is_valid(boarddata->cd_gpio) && gpio_get_value(boarddata->cd_gpio)) /* no card, if a valid gpio says so... */ - val &= ~SDHCI_CARD_PRESENT; + val &= SDHCI_CARD_PRESENT; else /* ... in all other cases assume card is present */ val |= SDHCI_CARD_PRESENT; @@ -87,7 +89,7 @@ static void esdhc_writel_le(struct sdhci_host *host, u32 val, int reg) struct pltfm_imx_data *imx_data = pltfm_host->priv; if (unlikely((reg == SDHCI_INT_ENABLE || reg == SDHCI_SIGNAL_ENABLE) - && (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD))) + && (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD_WP))) /* * these interrupts won't work with a custom card_detect gpio * (only applied to mx25/35) @@ -189,6 +191,16 @@ static unsigned int esdhc_pltfm_get_min_clock(struct sdhci_host *host) return clk_get_rate(pltfm_host->clk) / 256 / 16; } +static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host) +{ + struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data; + + if (boarddata && gpio_is_valid(boarddata->wp_gpio)) + return gpio_get_value(boarddata->wp_gpio); + else + return -ENOSYS; +} + static struct sdhci_ops sdhci_esdhc_ops = { .read_l = esdhc_readl_le, .read_w = esdhc_readw_le, @@ -200,24 +212,6 @@ static struct sdhci_ops sdhci_esdhc_ops = { .get_min_clock = esdhc_pltfm_get_min_clock, }; -static struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = { - .quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_ADMA - | SDHCI_QUIRK_BROKEN_CARD_DETECTION, - /* ADMA has issues. Might be fixable */ - .ops = &sdhci_esdhc_ops, -}; - -static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host) -{ - struct esdhc_platform_data *boarddata = - host->mmc->parent->platform_data; - - if (boarddata && gpio_is_valid(boarddata->wp_gpio)) - return gpio_get_value(boarddata->wp_gpio); - else - return -ENOSYS; -} - static irqreturn_t cd_irq(int irq, void *data) { struct sdhci_host *sdhost = (struct sdhci_host *)data; @@ -226,35 +220,30 @@ static irqreturn_t cd_irq(int irq, void *data) return IRQ_HANDLED; }; -static int __devinit sdhci_esdhc_imx_probe(struct platform_device *pdev) +static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pdata) { - struct sdhci_pltfm_host *pltfm_host; - struct sdhci_host *host; - struct esdhc_platform_data *boarddata; + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data; struct clk *clk; int err; struct pltfm_imx_data *imx_data; - host = sdhci_pltfm_init(pdev, &sdhci_esdhc_imx_pdata); - if (IS_ERR(host)) - return PTR_ERR(host); - - pltfm_host = sdhci_priv(host); - - imx_data = kzalloc(sizeof(struct pltfm_imx_data), GFP_KERNEL); - if (!imx_data) - return -ENOMEM; - pltfm_host->priv = imx_data; - clk = clk_get(mmc_dev(host->mmc), NULL); if (IS_ERR(clk)) { dev_err(mmc_dev(host->mmc), "clk err\n"); - err = PTR_ERR(clk); - goto err_clk_get; + return PTR_ERR(clk); } clk_enable(clk); pltfm_host->clk = clk; + imx_data = kzalloc(sizeof(struct pltfm_imx_data), GFP_KERNEL); + if (!imx_data) { + clk_disable(pltfm_host->clk); + clk_put(pltfm_host->clk); + return -ENOMEM; + } + pltfm_host->priv = imx_data; + if (!cpu_is_mx25()) host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; @@ -268,7 +257,6 @@ static int __devinit sdhci_esdhc_imx_probe(struct platform_device *pdev) if (!(cpu_is_mx25() || cpu_is_mx35() || cpu_is_mx51())) imx_data->flags |= ESDHC_FLAG_MULTIBLK_NO_INT; - boarddata = host->mmc->parent->platform_data; if (boarddata) { err = gpio_request_one(boarddata->wp_gpio, GPIOF_IN, "ESDHC_WP"); if (err) { @@ -296,15 +284,11 @@ static int __devinit sdhci_esdhc_imx_probe(struct platform_device *pdev) goto no_card_detect_irq; } - imx_data->flags |= ESDHC_FLAG_GPIO_FOR_CD; + imx_data->flags |= ESDHC_FLAG_GPIO_FOR_CD_WP; /* Now we have a working card_detect again */ host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION; } - err = sdhci_add_host(host); - if (err) - goto err_add_host; - return 0; no_card_detect_irq: @@ -313,23 +297,14 @@ static int __devinit sdhci_esdhc_imx_probe(struct platform_device *pdev) boarddata->cd_gpio = err; not_supported: kfree(imx_data); - err_add_host: - clk_disable(pltfm_host->clk); - clk_put(pltfm_host->clk); - err_clk_get: - sdhci_pltfm_free(pdev); - return err; + return 0; } -static int __devexit sdhci_esdhc_imx_remove(struct platform_device *pdev) +static void esdhc_pltfm_exit(struct sdhci_host *host) { - struct sdhci_host *host = platform_get_drvdata(pdev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data; struct pltfm_imx_data *imx_data = pltfm_host->priv; - int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff); - - sdhci_remove_host(host, dead); if (boarddata && gpio_is_valid(boarddata->wp_gpio)) gpio_free(boarddata->wp_gpio); @@ -344,37 +319,13 @@ static int __devexit sdhci_esdhc_imx_remove(struct platform_device *pdev) clk_disable(pltfm_host->clk); clk_put(pltfm_host->clk); kfree(imx_data); - - sdhci_pltfm_free(pdev); - - return 0; } -static struct platform_driver sdhci_esdhc_imx_driver = { - .driver = { - .name = "sdhci-esdhc-imx", - .owner = THIS_MODULE, - }, - .probe = sdhci_esdhc_imx_probe, - .remove = __devexit_p(sdhci_esdhc_imx_remove), -#ifdef CONFIG_PM - .suspend = sdhci_pltfm_suspend, - .resume = sdhci_pltfm_resume, -#endif +struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = { + .quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_ADMA + | SDHCI_QUIRK_BROKEN_CARD_DETECTION, + /* ADMA has issues. Might be fixable */ + .ops = &sdhci_esdhc_ops, + .init = esdhc_pltfm_init, + .exit = esdhc_pltfm_exit, }; - -static int __init sdhci_esdhc_imx_init(void) -{ - return platform_driver_register(&sdhci_esdhc_imx_driver); -} -module_init(sdhci_esdhc_imx_init); - -static void __exit sdhci_esdhc_imx_exit(void) -{ - platform_driver_unregister(&sdhci_esdhc_imx_driver); -} -module_exit(sdhci_esdhc_imx_exit); - -MODULE_DESCRIPTION("SDHCI driver for Freescale i.MX eSDHC"); -MODULE_AUTHOR("Wolfram Sang "); -MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/mmc/host/sdhci-of-core.c b/trunk/drivers/mmc/host/sdhci-of-core.c new file mode 100644 index 000000000000..60e4186a4345 --- /dev/null +++ b/trunk/drivers/mmc/host/sdhci-of-core.c @@ -0,0 +1,253 @@ +/* + * OpenFirmware bindings for Secure Digital Host Controller Interface. + * + * Copyright (c) 2007 Freescale Semiconductor, Inc. + * Copyright (c) 2009 MontaVista Software, Inc. + * + * Authors: Xiaobo Xie + * Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PPC +#include +#endif +#include "sdhci-of.h" +#include "sdhci.h" + +#ifdef CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER + +/* + * These accessors are designed for big endian hosts doing I/O to + * little endian controllers incorporating a 32-bit hardware byte swapper. + */ + +u32 sdhci_be32bs_readl(struct sdhci_host *host, int reg) +{ + return in_be32(host->ioaddr + reg); +} + +u16 sdhci_be32bs_readw(struct sdhci_host *host, int reg) +{ + return in_be16(host->ioaddr + (reg ^ 0x2)); +} + +u8 sdhci_be32bs_readb(struct sdhci_host *host, int reg) +{ + return in_8(host->ioaddr + (reg ^ 0x3)); +} + +void sdhci_be32bs_writel(struct sdhci_host *host, u32 val, int reg) +{ + out_be32(host->ioaddr + reg, val); +} + +void sdhci_be32bs_writew(struct sdhci_host *host, u16 val, int reg) +{ + struct sdhci_of_host *of_host = sdhci_priv(host); + int base = reg & ~0x3; + int shift = (reg & 0x2) * 8; + + switch (reg) { + case SDHCI_TRANSFER_MODE: + /* + * Postpone this write, we must do it together with a + * command write that is down below. + */ + of_host->xfer_mode_shadow = val; + return; + case SDHCI_COMMAND: + sdhci_be32bs_writel(host, val << 16 | of_host->xfer_mode_shadow, + SDHCI_TRANSFER_MODE); + return; + } + clrsetbits_be32(host->ioaddr + base, 0xffff << shift, val << shift); +} + +void sdhci_be32bs_writeb(struct sdhci_host *host, u8 val, int reg) +{ + int base = reg & ~0x3; + int shift = (reg & 0x3) * 8; + + clrsetbits_be32(host->ioaddr + base , 0xff << shift, val << shift); +} +#endif /* CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER */ + +#ifdef CONFIG_PM + +static int sdhci_of_suspend(struct platform_device *ofdev, pm_message_t state) +{ + struct sdhci_host *host = dev_get_drvdata(&ofdev->dev); + + return mmc_suspend_host(host->mmc); +} + +static int sdhci_of_resume(struct platform_device *ofdev) +{ + struct sdhci_host *host = dev_get_drvdata(&ofdev->dev); + + return mmc_resume_host(host->mmc); +} + +#else + +#define sdhci_of_suspend NULL +#define sdhci_of_resume NULL + +#endif + +static bool __devinit sdhci_of_wp_inverted(struct device_node *np) +{ + if (of_get_property(np, "sdhci,wp-inverted", NULL)) + return true; + + /* Old device trees don't have the wp-inverted property. */ +#ifdef CONFIG_PPC + return machine_is(mpc837x_rdb) || machine_is(mpc837x_mds); +#else + return false; +#endif +} + +static const struct of_device_id sdhci_of_match[]; +static int __devinit sdhci_of_probe(struct platform_device *ofdev) +{ + const struct of_device_id *match; + struct device_node *np = ofdev->dev.of_node; + struct sdhci_of_data *sdhci_of_data; + struct sdhci_host *host; + struct sdhci_of_host *of_host; + const __be32 *clk; + int size; + int ret; + + match = of_match_device(sdhci_of_match, &ofdev->dev); + if (!match) + return -EINVAL; + sdhci_of_data = match->data; + + if (!of_device_is_available(np)) + return -ENODEV; + + host = sdhci_alloc_host(&ofdev->dev, sizeof(*of_host)); + if (IS_ERR(host)) + return -ENOMEM; + + of_host = sdhci_priv(host); + dev_set_drvdata(&ofdev->dev, host); + + host->ioaddr = of_iomap(np, 0); + if (!host->ioaddr) { + ret = -ENOMEM; + goto err_addr_map; + } + + host->irq = irq_of_parse_and_map(np, 0); + if (!host->irq) { + ret = -EINVAL; + goto err_no_irq; + } + + host->hw_name = dev_name(&ofdev->dev); + if (sdhci_of_data) { + host->quirks = sdhci_of_data->quirks; + host->ops = &sdhci_of_data->ops; + } + + if (of_get_property(np, "sdhci,auto-cmd12", NULL)) + host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12; + + + if (of_get_property(np, "sdhci,1-bit-only", NULL)) + host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA; + + if (sdhci_of_wp_inverted(np)) + host->quirks |= SDHCI_QUIRK_INVERTED_WRITE_PROTECT; + + clk = of_get_property(np, "clock-frequency", &size); + if (clk && size == sizeof(*clk) && *clk) + of_host->clock = be32_to_cpup(clk); + + ret = sdhci_add_host(host); + if (ret) + goto err_add_host; + + return 0; + +err_add_host: + irq_dispose_mapping(host->irq); +err_no_irq: + iounmap(host->ioaddr); +err_addr_map: + sdhci_free_host(host); + return ret; +} + +static int __devexit sdhci_of_remove(struct platform_device *ofdev) +{ + struct sdhci_host *host = dev_get_drvdata(&ofdev->dev); + + sdhci_remove_host(host, 0); + sdhci_free_host(host); + irq_dispose_mapping(host->irq); + iounmap(host->ioaddr); + return 0; +} + +static const struct of_device_id sdhci_of_match[] = { +#ifdef CONFIG_MMC_SDHCI_OF_ESDHC + { .compatible = "fsl,mpc8379-esdhc", .data = &sdhci_esdhc, }, + { .compatible = "fsl,mpc8536-esdhc", .data = &sdhci_esdhc, }, + { .compatible = "fsl,esdhc", .data = &sdhci_esdhc, }, +#endif +#ifdef CONFIG_MMC_SDHCI_OF_HLWD + { .compatible = "nintendo,hollywood-sdhci", .data = &sdhci_hlwd, }, +#endif + { .compatible = "generic-sdhci", }, + {}, +}; +MODULE_DEVICE_TABLE(of, sdhci_of_match); + +static struct platform_driver sdhci_of_driver = { + .driver = { + .name = "sdhci-of", + .owner = THIS_MODULE, + .of_match_table = sdhci_of_match, + }, + .probe = sdhci_of_probe, + .remove = __devexit_p(sdhci_of_remove), + .suspend = sdhci_of_suspend, + .resume = sdhci_of_resume, +}; + +static int __init sdhci_of_init(void) +{ + return platform_driver_register(&sdhci_of_driver); +} +module_init(sdhci_of_init); + +static void __exit sdhci_of_exit(void) +{ + platform_driver_unregister(&sdhci_of_driver); +} +module_exit(sdhci_of_exit); + +MODULE_DESCRIPTION("Secure Digital Host Controller Interface OF driver"); +MODULE_AUTHOR("Xiaobo Xie , " + "Anton Vorontsov "); +MODULE_LICENSE("GPL"); diff --git a/trunk/drivers/mmc/host/sdhci-of-esdhc.c b/trunk/drivers/mmc/host/sdhci-of-esdhc.c index fe604df65011..ba40d6d035c7 100644 --- a/trunk/drivers/mmc/host/sdhci-of-esdhc.c +++ b/trunk/drivers/mmc/host/sdhci-of-esdhc.c @@ -16,7 +16,8 @@ #include #include #include -#include "sdhci-pltfm.h" +#include "sdhci-of.h" +#include "sdhci.h" #include "sdhci-esdhc.h" static u16 esdhc_readw(struct sdhci_host *host, int reg) @@ -59,83 +60,32 @@ static int esdhc_of_enable_dma(struct sdhci_host *host) static unsigned int esdhc_of_get_max_clock(struct sdhci_host *host) { - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_of_host *of_host = sdhci_priv(host); - return pltfm_host->clock; + return of_host->clock; } static unsigned int esdhc_of_get_min_clock(struct sdhci_host *host) { - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_of_host *of_host = sdhci_priv(host); - return pltfm_host->clock / 256 / 16; + return of_host->clock / 256 / 16; } -static struct sdhci_ops sdhci_esdhc_ops = { - .read_l = sdhci_be32bs_readl, - .read_w = esdhc_readw, - .read_b = sdhci_be32bs_readb, - .write_l = sdhci_be32bs_writel, - .write_w = esdhc_writew, - .write_b = esdhc_writeb, - .set_clock = esdhc_set_clock, - .enable_dma = esdhc_of_enable_dma, - .get_max_clock = esdhc_of_get_max_clock, - .get_min_clock = esdhc_of_get_min_clock, -}; - -static struct sdhci_pltfm_data sdhci_esdhc_pdata = { +struct sdhci_of_data sdhci_esdhc = { /* card detection could be handled via GPIO */ .quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_CARD_DETECTION | SDHCI_QUIRK_NO_CARD_NO_RESET, - .ops = &sdhci_esdhc_ops, -}; - -static int __devinit sdhci_esdhc_probe(struct platform_device *pdev) -{ - return sdhci_pltfm_register(pdev, &sdhci_esdhc_pdata); -} - -static int __devexit sdhci_esdhc_remove(struct platform_device *pdev) -{ - return sdhci_pltfm_unregister(pdev); -} - -static const struct of_device_id sdhci_esdhc_of_match[] = { - { .compatible = "fsl,mpc8379-esdhc" }, - { .compatible = "fsl,mpc8536-esdhc" }, - { .compatible = "fsl,esdhc" }, - { } -}; -MODULE_DEVICE_TABLE(of, sdhci_esdhc_of_match); - -static struct platform_driver sdhci_esdhc_driver = { - .driver = { - .name = "sdhci-esdhc", - .owner = THIS_MODULE, - .of_match_table = sdhci_esdhc_of_match, + .ops = { + .read_l = sdhci_be32bs_readl, + .read_w = esdhc_readw, + .read_b = sdhci_be32bs_readb, + .write_l = sdhci_be32bs_writel, + .write_w = esdhc_writew, + .write_b = esdhc_writeb, + .set_clock = esdhc_set_clock, + .enable_dma = esdhc_of_enable_dma, + .get_max_clock = esdhc_of_get_max_clock, + .get_min_clock = esdhc_of_get_min_clock, }, - .probe = sdhci_esdhc_probe, - .remove = __devexit_p(sdhci_esdhc_remove), -#ifdef CONFIG_PM - .suspend = sdhci_pltfm_suspend, - .resume = sdhci_pltfm_resume, -#endif }; - -static int __init sdhci_esdhc_init(void) -{ - return platform_driver_register(&sdhci_esdhc_driver); -} -module_init(sdhci_esdhc_init); - -static void __exit sdhci_esdhc_exit(void) -{ - platform_driver_unregister(&sdhci_esdhc_driver); -} -module_exit(sdhci_esdhc_exit); - -MODULE_DESCRIPTION("SDHCI OF driver for Freescale MPC eSDHC"); -MODULE_AUTHOR("Xiaobo Xie , " - "Anton Vorontsov "); -MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/mmc/host/sdhci-of-hlwd.c b/trunk/drivers/mmc/host/sdhci-of-hlwd.c index 735be131dca9..68ddb7546ae2 100644 --- a/trunk/drivers/mmc/host/sdhci-of-hlwd.c +++ b/trunk/drivers/mmc/host/sdhci-of-hlwd.c @@ -21,7 +21,8 @@ #include #include -#include "sdhci-pltfm.h" +#include "sdhci-of.h" +#include "sdhci.h" /* * Ops and quirks for the Nintendo Wii SDHCI controllers. @@ -50,63 +51,15 @@ static void sdhci_hlwd_writeb(struct sdhci_host *host, u8 val, int reg) udelay(SDHCI_HLWD_WRITE_DELAY); } -static struct sdhci_ops sdhci_hlwd_ops = { - .read_l = sdhci_be32bs_readl, - .read_w = sdhci_be32bs_readw, - .read_b = sdhci_be32bs_readb, - .write_l = sdhci_hlwd_writel, - .write_w = sdhci_hlwd_writew, - .write_b = sdhci_hlwd_writeb, -}; - -static struct sdhci_pltfm_data sdhci_hlwd_pdata = { +struct sdhci_of_data sdhci_hlwd = { .quirks = SDHCI_QUIRK_32BIT_DMA_ADDR | SDHCI_QUIRK_32BIT_DMA_SIZE, - .ops = &sdhci_hlwd_ops, -}; - -static int __devinit sdhci_hlwd_probe(struct platform_device *pdev) -{ - return sdhci_pltfm_register(pdev, &sdhci_hlwd_pdata); -} - -static int __devexit sdhci_hlwd_remove(struct platform_device *pdev) -{ - return sdhci_pltfm_unregister(pdev); -} - -static const struct of_device_id sdhci_hlwd_of_match[] = { - { .compatible = "nintendo,hollywood-sdhci" }, - { } -}; -MODULE_DEVICE_TABLE(of, sdhci_hlwd_of_match); - -static struct platform_driver sdhci_hlwd_driver = { - .driver = { - .name = "sdhci-hlwd", - .owner = THIS_MODULE, - .of_match_table = sdhci_hlwd_of_match, + .ops = { + .read_l = sdhci_be32bs_readl, + .read_w = sdhci_be32bs_readw, + .read_b = sdhci_be32bs_readb, + .write_l = sdhci_hlwd_writel, + .write_w = sdhci_hlwd_writew, + .write_b = sdhci_hlwd_writeb, }, - .probe = sdhci_hlwd_probe, - .remove = __devexit_p(sdhci_hlwd_remove), -#ifdef CONFIG_PM - .suspend = sdhci_pltfm_suspend, - .resume = sdhci_pltfm_resume, -#endif }; - -static int __init sdhci_hlwd_init(void) -{ - return platform_driver_register(&sdhci_hlwd_driver); -} -module_init(sdhci_hlwd_init); - -static void __exit sdhci_hlwd_exit(void) -{ - platform_driver_unregister(&sdhci_hlwd_driver); -} -module_exit(sdhci_hlwd_exit); - -MODULE_DESCRIPTION("Nintendo Wii SDHCI OF driver"); -MODULE_AUTHOR("The GameCube Linux Team, Albert Herranz"); -MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/mmc/host/sdhci-of.h b/trunk/drivers/mmc/host/sdhci-of.h new file mode 100644 index 000000000000..ad09ad9915d8 --- /dev/null +++ b/trunk/drivers/mmc/host/sdhci-of.h @@ -0,0 +1,42 @@ +/* + * OpenFirmware bindings for Secure Digital Host Controller Interface. + * + * Copyright (c) 2007 Freescale Semiconductor, Inc. + * Copyright (c) 2009 MontaVista Software, Inc. + * + * Authors: Xiaobo Xie + * Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + */ + +#ifndef __SDHCI_OF_H +#define __SDHCI_OF_H + +#include +#include "sdhci.h" + +struct sdhci_of_data { + unsigned int quirks; + struct sdhci_ops ops; +}; + +struct sdhci_of_host { + unsigned int clock; + u16 xfer_mode_shadow; +}; + +extern u32 sdhci_be32bs_readl(struct sdhci_host *host, int reg); +extern u16 sdhci_be32bs_readw(struct sdhci_host *host, int reg); +extern u8 sdhci_be32bs_readb(struct sdhci_host *host, int reg); +extern void sdhci_be32bs_writel(struct sdhci_host *host, u32 val, int reg); +extern void sdhci_be32bs_writew(struct sdhci_host *host, u16 val, int reg); +extern void sdhci_be32bs_writeb(struct sdhci_host *host, u8 val, int reg); + +extern struct sdhci_of_data sdhci_esdhc; +extern struct sdhci_of_data sdhci_hlwd; + +#endif /* __SDHCI_OF_H */ diff --git a/trunk/drivers/mmc/host/sdhci-pci.c b/trunk/drivers/mmc/host/sdhci-pci.c index 26c528648f3c..936bbca19c0a 100644 --- a/trunk/drivers/mmc/host/sdhci-pci.c +++ b/trunk/drivers/mmc/host/sdhci-pci.c @@ -143,12 +143,6 @@ static const struct sdhci_pci_fixes sdhci_cafe = { SDHCI_QUIRK_BROKEN_TIMEOUT_VAL, }; -static int mrst_hc_probe_slot(struct sdhci_pci_slot *slot) -{ - slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA; - return 0; -} - /* * ADMA operation is disabled for Moorestown platform due to * hardware bugs. @@ -163,15 +157,8 @@ static int mrst_hc_probe(struct sdhci_pci_chip *chip) return 0; } -static int mfd_emmc_probe_slot(struct sdhci_pci_slot *slot) -{ - slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA; - return 0; -} - static const struct sdhci_pci_fixes sdhci_intel_mrst_hc0 = { .quirks = SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT, - .probe_slot = mrst_hc_probe_slot, }; static const struct sdhci_pci_fixes sdhci_intel_mrst_hc1_hc2 = { @@ -183,13 +170,8 @@ static const struct sdhci_pci_fixes sdhci_intel_mfd_sd = { .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, }; -static const struct sdhci_pci_fixes sdhci_intel_mfd_sdio = { - .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, -}; - -static const struct sdhci_pci_fixes sdhci_intel_mfd_emmc = { +static const struct sdhci_pci_fixes sdhci_intel_mfd_emmc_sdio = { .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, - .probe_slot = mfd_emmc_probe_slot, }; /* O2Micro extra registers */ @@ -700,7 +682,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_INTEL_MFD_SDIO1, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_sdio, + .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio, }, { @@ -708,7 +690,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_INTEL_MFD_SDIO2, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_sdio, + .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio, }, { @@ -716,7 +698,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_INTEL_MFD_EMMC0, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc, + .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio, }, { @@ -724,7 +706,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_INTEL_MFD_EMMC1, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc, + .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio, }, { @@ -807,34 +789,8 @@ static int sdhci_pci_enable_dma(struct sdhci_host *host) return 0; } -static int sdhci_pci_8bit_width(struct sdhci_host *host, int width) -{ - u8 ctrl; - - ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); - - switch (width) { - case MMC_BUS_WIDTH_8: - ctrl |= SDHCI_CTRL_8BITBUS; - ctrl &= ~SDHCI_CTRL_4BITBUS; - break; - case MMC_BUS_WIDTH_4: - ctrl |= SDHCI_CTRL_4BITBUS; - ctrl &= ~SDHCI_CTRL_8BITBUS; - break; - default: - ctrl &= ~(SDHCI_CTRL_8BITBUS | SDHCI_CTRL_4BITBUS); - break; - } - - sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL); - - return 0; -} - static struct sdhci_ops sdhci_pci_ops = { .enable_dma = sdhci_pci_enable_dma, - .platform_8bit_width = sdhci_pci_8bit_width, }; /*****************************************************************************\ diff --git a/trunk/drivers/mmc/host/sdhci-pltfm.c b/trunk/drivers/mmc/host/sdhci-pltfm.c index 71c0ce1f6db0..dbab0407f4b6 100644 --- a/trunk/drivers/mmc/host/sdhci-pltfm.c +++ b/trunk/drivers/mmc/host/sdhci-pltfm.c @@ -2,12 +2,6 @@ * sdhci-pltfm.c Support for SDHCI platform devices * Copyright (c) 2009 Intel Corporation * - * Copyright (c) 2007 Freescale Semiconductor, Inc. - * Copyright (c) 2009 MontaVista Software, Inc. - * - * Authors: Xiaobo Xie - * Anton Vorontsov - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -28,66 +22,48 @@ * Inspired by sdhci-pci.c, by Pierre Ossman */ -#include -#include -#ifdef CONFIG_PPC -#include -#endif -#include "sdhci-pltfm.h" - -static struct sdhci_ops sdhci_pltfm_ops = { -}; +#include +#include +#include +#include -#ifdef CONFIG_OF -static bool sdhci_of_wp_inverted(struct device_node *np) -{ - if (of_get_property(np, "sdhci,wp-inverted", NULL)) - return true; +#include - /* Old device trees don't have the wp-inverted property. */ -#ifdef CONFIG_PPC - return machine_is(mpc837x_rdb) || machine_is(mpc837x_mds); -#else - return false; -#endif /* CONFIG_PPC */ -} +#include +#include -void sdhci_get_of_property(struct platform_device *pdev) -{ - struct device_node *np = pdev->dev.of_node; - struct sdhci_host *host = platform_get_drvdata(pdev); - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - const __be32 *clk; - int size; +#include "sdhci.h" +#include "sdhci-pltfm.h" - if (of_device_is_available(np)) { - if (of_get_property(np, "sdhci,auto-cmd12", NULL)) - host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12; +/*****************************************************************************\ + * * + * SDHCI core callbacks * + * * +\*****************************************************************************/ - if (of_get_property(np, "sdhci,1-bit-only", NULL)) - host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA; +static struct sdhci_ops sdhci_pltfm_ops = { +}; - if (sdhci_of_wp_inverted(np)) - host->quirks |= SDHCI_QUIRK_INVERTED_WRITE_PROTECT; +/*****************************************************************************\ + * * + * Device probing/removal * + * * +\*****************************************************************************/ - clk = of_get_property(np, "clock-frequency", &size); - if (clk && size == sizeof(*clk) && *clk) - pltfm_host->clock = be32_to_cpup(clk); - } -} -#else -void sdhci_get_of_property(struct platform_device *pdev) {} -#endif /* CONFIG_OF */ -EXPORT_SYMBOL_GPL(sdhci_get_of_property); - -struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev, - struct sdhci_pltfm_data *pdata) +static int __devinit sdhci_pltfm_probe(struct platform_device *pdev) { + const struct platform_device_id *platid = platform_get_device_id(pdev); + struct sdhci_pltfm_data *pdata; struct sdhci_host *host; struct sdhci_pltfm_host *pltfm_host; struct resource *iomem; int ret; + if (platid && platid->driver_data) + pdata = (void *)platid->driver_data; + else + pdata = pdev->dev.platform_data; + iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!iomem) { ret = -ENOMEM; @@ -95,7 +71,8 @@ struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev, } if (resource_size(iomem) < 0x100) - dev_err(&pdev->dev, "Invalid iomem size!\n"); + dev_err(&pdev->dev, "Invalid iomem size. You may " + "experience problems.\n"); /* Some PCI-based MFD need the parent here */ if (pdev->dev.parent != &platform_bus) @@ -110,7 +87,7 @@ struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev, pltfm_host = sdhci_priv(host); - host->hw_name = dev_name(&pdev->dev); + host->hw_name = "platform"; if (pdata && pdata->ops) host->ops = pdata->ops; else @@ -133,95 +110,126 @@ struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev, goto err_remap; } + if (pdata && pdata->init) { + ret = pdata->init(host, pdata); + if (ret) + goto err_plat_init; + } + + ret = sdhci_add_host(host); + if (ret) + goto err_add_host; + platform_set_drvdata(pdev, host); - return host; + return 0; +err_add_host: + if (pdata && pdata->exit) + pdata->exit(host); +err_plat_init: + iounmap(host->ioaddr); err_remap: release_mem_region(iomem->start, resource_size(iomem)); err_request: sdhci_free_host(host); err: - dev_err(&pdev->dev, "%s failed %d\n", __func__, ret); - return ERR_PTR(ret); + printk(KERN_ERR"Probing of sdhci-pltfm failed: %d\n", ret); + return ret; } -EXPORT_SYMBOL_GPL(sdhci_pltfm_init); -void sdhci_pltfm_free(struct platform_device *pdev) +static int __devexit sdhci_pltfm_remove(struct platform_device *pdev) { + struct sdhci_pltfm_data *pdata = pdev->dev.platform_data; struct sdhci_host *host = platform_get_drvdata(pdev); struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + int dead; + u32 scratch; + + dead = 0; + scratch = readl(host->ioaddr + SDHCI_INT_STATUS); + if (scratch == (u32)-1) + dead = 1; + sdhci_remove_host(host, dead); + if (pdata && pdata->exit) + pdata->exit(host); iounmap(host->ioaddr); release_mem_region(iomem->start, resource_size(iomem)); sdhci_free_host(host); platform_set_drvdata(pdev, NULL); -} -EXPORT_SYMBOL_GPL(sdhci_pltfm_free); - -int sdhci_pltfm_register(struct platform_device *pdev, - struct sdhci_pltfm_data *pdata) -{ - struct sdhci_host *host; - int ret = 0; - - host = sdhci_pltfm_init(pdev, pdata); - if (IS_ERR(host)) - return PTR_ERR(host); - - sdhci_get_of_property(pdev); - - ret = sdhci_add_host(host); - if (ret) - sdhci_pltfm_free(pdev); - - return ret; -} -EXPORT_SYMBOL_GPL(sdhci_pltfm_register); - -int sdhci_pltfm_unregister(struct platform_device *pdev) -{ - struct sdhci_host *host = platform_get_drvdata(pdev); - int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff); - - sdhci_remove_host(host, dead); - sdhci_pltfm_free(pdev); return 0; } -EXPORT_SYMBOL_GPL(sdhci_pltfm_unregister); + +static const struct platform_device_id sdhci_pltfm_ids[] = { + { "sdhci", }, +#ifdef CONFIG_MMC_SDHCI_CNS3XXX + { "sdhci-cns3xxx", (kernel_ulong_t)&sdhci_cns3xxx_pdata }, +#endif +#ifdef CONFIG_MMC_SDHCI_ESDHC_IMX + { "sdhci-esdhc-imx", (kernel_ulong_t)&sdhci_esdhc_imx_pdata }, +#endif +#ifdef CONFIG_MMC_SDHCI_DOVE + { "sdhci-dove", (kernel_ulong_t)&sdhci_dove_pdata }, +#endif +#ifdef CONFIG_MMC_SDHCI_TEGRA + { "sdhci-tegra", (kernel_ulong_t)&sdhci_tegra_pdata }, +#endif + { }, +}; +MODULE_DEVICE_TABLE(platform, sdhci_pltfm_ids); #ifdef CONFIG_PM -int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state) +static int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state) { struct sdhci_host *host = platform_get_drvdata(dev); return sdhci_suspend_host(host, state); } -EXPORT_SYMBOL_GPL(sdhci_pltfm_suspend); -int sdhci_pltfm_resume(struct platform_device *dev) +static int sdhci_pltfm_resume(struct platform_device *dev) { struct sdhci_host *host = platform_get_drvdata(dev); return sdhci_resume_host(host); } -EXPORT_SYMBOL_GPL(sdhci_pltfm_resume); +#else +#define sdhci_pltfm_suspend NULL +#define sdhci_pltfm_resume NULL #endif /* CONFIG_PM */ -static int __init sdhci_pltfm_drv_init(void) -{ - pr_info("sdhci-pltfm: SDHCI platform and OF driver helper\n"); +static struct platform_driver sdhci_pltfm_driver = { + .driver = { + .name = "sdhci", + .owner = THIS_MODULE, + }, + .probe = sdhci_pltfm_probe, + .remove = __devexit_p(sdhci_pltfm_remove), + .id_table = sdhci_pltfm_ids, + .suspend = sdhci_pltfm_suspend, + .resume = sdhci_pltfm_resume, +}; - return 0; +/*****************************************************************************\ + * * + * Driver init/exit * + * * +\*****************************************************************************/ + +static int __init sdhci_drv_init(void) +{ + return platform_driver_register(&sdhci_pltfm_driver); } -module_init(sdhci_pltfm_drv_init); -static void __exit sdhci_pltfm_drv_exit(void) +static void __exit sdhci_drv_exit(void) { + platform_driver_unregister(&sdhci_pltfm_driver); } -module_exit(sdhci_pltfm_drv_exit); -MODULE_DESCRIPTION("SDHCI platform and OF driver helper"); -MODULE_AUTHOR("Intel Corporation"); +module_init(sdhci_drv_init); +module_exit(sdhci_drv_exit); + +MODULE_DESCRIPTION("Secure Digital Host Controller Interface platform driver"); +MODULE_AUTHOR("Mocean Laboratories "); MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/mmc/host/sdhci-pltfm.h b/trunk/drivers/mmc/host/sdhci-pltfm.h index 3a9fc3f40840..2b37016ad0ac 100644 --- a/trunk/drivers/mmc/host/sdhci-pltfm.h +++ b/trunk/drivers/mmc/host/sdhci-pltfm.h @@ -12,95 +12,17 @@ #define _DRIVERS_MMC_SDHCI_PLTFM_H #include -#include -#include "sdhci.h" - -struct sdhci_pltfm_data { - struct sdhci_ops *ops; - unsigned int quirks; -}; +#include +#include struct sdhci_pltfm_host { struct clk *clk; void *priv; /* to handle quirks across io-accessor calls */ - - /* migrate from sdhci_of_host */ - unsigned int clock; - u16 xfer_mode_shadow; }; -#ifdef CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER -/* - * These accessors are designed for big endian hosts doing I/O to - * little endian controllers incorporating a 32-bit hardware byte swapper. - */ -static inline u32 sdhci_be32bs_readl(struct sdhci_host *host, int reg) -{ - return in_be32(host->ioaddr + reg); -} - -static inline u16 sdhci_be32bs_readw(struct sdhci_host *host, int reg) -{ - return in_be16(host->ioaddr + (reg ^ 0x2)); -} - -static inline u8 sdhci_be32bs_readb(struct sdhci_host *host, int reg) -{ - return in_8(host->ioaddr + (reg ^ 0x3)); -} - -static inline void sdhci_be32bs_writel(struct sdhci_host *host, - u32 val, int reg) -{ - out_be32(host->ioaddr + reg, val); -} - -static inline void sdhci_be32bs_writew(struct sdhci_host *host, - u16 val, int reg) -{ - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - int base = reg & ~0x3; - int shift = (reg & 0x2) * 8; - - switch (reg) { - case SDHCI_TRANSFER_MODE: - /* - * Postpone this write, we must do it together with a - * command write that is down below. - */ - pltfm_host->xfer_mode_shadow = val; - return; - case SDHCI_COMMAND: - sdhci_be32bs_writel(host, - val << 16 | pltfm_host->xfer_mode_shadow, - SDHCI_TRANSFER_MODE); - return; - } - clrsetbits_be32(host->ioaddr + base, 0xffff << shift, val << shift); -} - -static inline void sdhci_be32bs_writeb(struct sdhci_host *host, u8 val, int reg) -{ - int base = reg & ~0x3; - int shift = (reg & 0x3) * 8; - - clrsetbits_be32(host->ioaddr + base , 0xff << shift, val << shift); -} -#endif /* CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER */ - -extern void sdhci_get_of_property(struct platform_device *pdev); - -extern struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev, - struct sdhci_pltfm_data *pdata); -extern void sdhci_pltfm_free(struct platform_device *pdev); - -extern int sdhci_pltfm_register(struct platform_device *pdev, - struct sdhci_pltfm_data *pdata); -extern int sdhci_pltfm_unregister(struct platform_device *pdev); - -#ifdef CONFIG_PM -extern int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state); -extern int sdhci_pltfm_resume(struct platform_device *dev); -#endif +extern struct sdhci_pltfm_data sdhci_cns3xxx_pdata; +extern struct sdhci_pltfm_data sdhci_esdhc_imx_pdata; +extern struct sdhci_pltfm_data sdhci_dove_pdata; +extern struct sdhci_pltfm_data sdhci_tegra_pdata; #endif /* _DRIVERS_MMC_SDHCI_PLTFM_H */ diff --git a/trunk/drivers/mmc/host/sdhci-pxa.c b/trunk/drivers/mmc/host/sdhci-pxa.c new file mode 100644 index 000000000000..089c9a68b7b1 --- /dev/null +++ b/trunk/drivers/mmc/host/sdhci-pxa.c @@ -0,0 +1,303 @@ +/* linux/drivers/mmc/host/sdhci-pxa.c + * + * Copyright (C) 2010 Marvell International Ltd. + * Zhangfei Gao + * Kevin Wang + * Mingwei Wang + * Philip Rakity + * Mark Brown + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Supports: + * SDHCI support for MMP2/PXA910/PXA168 + * + * Refer to sdhci-s3c.c. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "sdhci.h" + +#define DRIVER_NAME "sdhci-pxa" + +#define SD_FIFO_PARAM 0x104 +#define DIS_PAD_SD_CLK_GATE 0x400 + +struct sdhci_pxa { + struct sdhci_host *host; + struct sdhci_pxa_platdata *pdata; + struct clk *clk; + struct resource *res; + + u8 clk_enable; +}; + +/*****************************************************************************\ + * * + * SDHCI core callbacks * + * * +\*****************************************************************************/ +static void set_clock(struct sdhci_host *host, unsigned int clock) +{ + struct sdhci_pxa *pxa = sdhci_priv(host); + u32 tmp = 0; + + if (clock == 0) { + if (pxa->clk_enable) { + clk_disable(pxa->clk); + pxa->clk_enable = 0; + } + } else { + if (0 == pxa->clk_enable) { + if (pxa->pdata->flags & PXA_FLAG_DISABLE_CLOCK_GATING) { + tmp = readl(host->ioaddr + SD_FIFO_PARAM); + tmp |= DIS_PAD_SD_CLK_GATE; + writel(tmp, host->ioaddr + SD_FIFO_PARAM); + } + clk_enable(pxa->clk); + pxa->clk_enable = 1; + } + } +} + +static int set_uhs_signaling(struct sdhci_host *host, unsigned int uhs) +{ + u16 ctrl_2; + + /* + * Set V18_EN -- UHS modes do not work without this. + * does not change signaling voltage + */ + ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); + + /* Select Bus Speed Mode for host */ + ctrl_2 &= ~SDHCI_CTRL_UHS_MASK; + switch (uhs) { + case MMC_TIMING_UHS_SDR12: + ctrl_2 |= SDHCI_CTRL_UHS_SDR12; + break; + case MMC_TIMING_UHS_SDR25: + ctrl_2 |= SDHCI_CTRL_UHS_SDR25; + break; + case MMC_TIMING_UHS_SDR50: + ctrl_2 |= SDHCI_CTRL_UHS_SDR50 | SDHCI_CTRL_VDD_180; + break; + case MMC_TIMING_UHS_SDR104: + ctrl_2 |= SDHCI_CTRL_UHS_SDR104 | SDHCI_CTRL_VDD_180; + break; + case MMC_TIMING_UHS_DDR50: + ctrl_2 |= SDHCI_CTRL_UHS_DDR50 | SDHCI_CTRL_VDD_180; + break; + } + + sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); + pr_debug("%s:%s uhs = %d, ctrl_2 = %04X\n", + __func__, mmc_hostname(host->mmc), uhs, ctrl_2); + + return 0; +} + +static struct sdhci_ops sdhci_pxa_ops = { + .set_uhs_signaling = set_uhs_signaling, + .set_clock = set_clock, +}; + +/*****************************************************************************\ + * * + * Device probing/removal * + * * +\*****************************************************************************/ + +static int __devinit sdhci_pxa_probe(struct platform_device *pdev) +{ + struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data; + struct device *dev = &pdev->dev; + struct sdhci_host *host = NULL; + struct resource *iomem = NULL; + struct sdhci_pxa *pxa = NULL; + int ret, irq; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "no irq specified\n"); + return irq; + } + + iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!iomem) { + dev_err(dev, "no memory specified\n"); + return -ENOENT; + } + + host = sdhci_alloc_host(&pdev->dev, sizeof(struct sdhci_pxa)); + if (IS_ERR(host)) { + dev_err(dev, "failed to alloc host\n"); + return PTR_ERR(host); + } + + pxa = sdhci_priv(host); + pxa->host = host; + pxa->pdata = pdata; + pxa->clk_enable = 0; + + pxa->clk = clk_get(dev, "PXA-SDHCLK"); + if (IS_ERR(pxa->clk)) { + dev_err(dev, "failed to get io clock\n"); + ret = PTR_ERR(pxa->clk); + goto out; + } + + pxa->res = request_mem_region(iomem->start, resource_size(iomem), + mmc_hostname(host->mmc)); + if (!pxa->res) { + dev_err(&pdev->dev, "cannot request region\n"); + ret = -EBUSY; + goto out; + } + + host->ioaddr = ioremap(iomem->start, resource_size(iomem)); + if (!host->ioaddr) { + dev_err(&pdev->dev, "failed to remap registers\n"); + ret = -ENOMEM; + goto out; + } + + host->hw_name = "MMC"; + host->ops = &sdhci_pxa_ops; + host->irq = irq; + host->quirks = SDHCI_QUIRK_BROKEN_ADMA + | SDHCI_QUIRK_BROKEN_TIMEOUT_VAL + | SDHCI_QUIRK_32BIT_DMA_ADDR + | SDHCI_QUIRK_32BIT_DMA_SIZE + | SDHCI_QUIRK_32BIT_ADMA_SIZE + | SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC; + + if (pdata->quirks) + host->quirks |= pdata->quirks; + + /* enable 1/8V DDR capable */ + host->mmc->caps |= MMC_CAP_1_8V_DDR; + + /* If slot design supports 8 bit data, indicate this to MMC. */ + if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT) + host->mmc->caps |= MMC_CAP_8_BIT_DATA; + + ret = sdhci_add_host(host); + if (ret) { + dev_err(&pdev->dev, "failed to add host\n"); + goto out; + } + + if (pxa->pdata->max_speed) + host->mmc->f_max = pxa->pdata->max_speed; + + platform_set_drvdata(pdev, host); + + return 0; +out: + if (host) { + clk_put(pxa->clk); + if (host->ioaddr) + iounmap(host->ioaddr); + if (pxa->res) + release_mem_region(pxa->res->start, + resource_size(pxa->res)); + sdhci_free_host(host); + } + + return ret; +} + +static int __devexit sdhci_pxa_remove(struct platform_device *pdev) +{ + struct sdhci_host *host = platform_get_drvdata(pdev); + struct sdhci_pxa *pxa = sdhci_priv(host); + int dead = 0; + u32 scratch; + + if (host) { + scratch = readl(host->ioaddr + SDHCI_INT_STATUS); + if (scratch == (u32)-1) + dead = 1; + + sdhci_remove_host(host, dead); + + if (host->ioaddr) + iounmap(host->ioaddr); + if (pxa->res) + release_mem_region(pxa->res->start, + resource_size(pxa->res)); + if (pxa->clk_enable) { + clk_disable(pxa->clk); + pxa->clk_enable = 0; + } + clk_put(pxa->clk); + + sdhci_free_host(host); + platform_set_drvdata(pdev, NULL); + } + + return 0; +} + +#ifdef CONFIG_PM +static int sdhci_pxa_suspend(struct platform_device *dev, pm_message_t state) +{ + struct sdhci_host *host = platform_get_drvdata(dev); + + return sdhci_suspend_host(host, state); +} + +static int sdhci_pxa_resume(struct platform_device *dev) +{ + struct sdhci_host *host = platform_get_drvdata(dev); + + return sdhci_resume_host(host); +} +#else +#define sdhci_pxa_suspend NULL +#define sdhci_pxa_resume NULL +#endif + +static struct platform_driver sdhci_pxa_driver = { + .probe = sdhci_pxa_probe, + .remove = __devexit_p(sdhci_pxa_remove), + .suspend = sdhci_pxa_suspend, + .resume = sdhci_pxa_resume, + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE, + }, +}; + +/*****************************************************************************\ + * * + * Driver init/exit * + * * +\*****************************************************************************/ + +static int __init sdhci_pxa_init(void) +{ + return platform_driver_register(&sdhci_pxa_driver); +} + +static void __exit sdhci_pxa_exit(void) +{ + platform_driver_unregister(&sdhci_pxa_driver); +} + +module_init(sdhci_pxa_init); +module_exit(sdhci_pxa_exit); + +MODULE_DESCRIPTION("SDH controller driver for PXA168/PXA910/MMP2"); +MODULE_AUTHOR("Zhangfei Gao "); +MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/mmc/host/sdhci-pxav2.c b/trunk/drivers/mmc/host/sdhci-pxav2.c deleted file mode 100644 index 38f58994f79a..000000000000 --- a/trunk/drivers/mmc/host/sdhci-pxav2.c +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Copyright (C) 2010 Marvell International Ltd. - * Zhangfei Gao - * Kevin Wang - * Jun Nie - * Qiming Wu - * Philip Rakity - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "sdhci.h" -#include "sdhci-pltfm.h" - -#define SD_FIFO_PARAM 0xe0 -#define DIS_PAD_SD_CLK_GATE 0x0400 /* Turn on/off Dynamic SD Clock Gating */ -#define CLK_GATE_ON 0x0200 /* Disable/enable Clock Gate */ -#define CLK_GATE_CTL 0x0100 /* Clock Gate Control */ -#define CLK_GATE_SETTING_BITS (DIS_PAD_SD_CLK_GATE | \ - CLK_GATE_ON | CLK_GATE_CTL) - -#define SD_CLOCK_BURST_SIZE_SETUP 0xe6 -#define SDCLK_SEL_SHIFT 8 -#define SDCLK_SEL_MASK 0x3 -#define SDCLK_DELAY_SHIFT 10 -#define SDCLK_DELAY_MASK 0x3c - -#define SD_CE_ATA_2 0xea -#define MMC_CARD 0x1000 -#define MMC_WIDTH 0x0100 - -static void pxav2_set_private_registers(struct sdhci_host *host, u8 mask) -{ - struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc)); - struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data; - - if (mask == SDHCI_RESET_ALL) { - u16 tmp = 0; - - /* - * tune timing of read data/command when crc error happen - * no performance impact - */ - if (pdata->clk_delay_sel == 1) { - tmp = readw(host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP); - - tmp &= ~(SDCLK_DELAY_MASK << SDCLK_DELAY_SHIFT); - tmp |= (pdata->clk_delay_cycles & SDCLK_DELAY_MASK) - << SDCLK_DELAY_SHIFT; - tmp &= ~(SDCLK_SEL_MASK << SDCLK_SEL_SHIFT); - tmp |= (1 & SDCLK_SEL_MASK) << SDCLK_SEL_SHIFT; - - writew(tmp, host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP); - } - - if (pdata->flags & PXA_FLAG_ENABLE_CLOCK_GATING) { - tmp = readw(host->ioaddr + SD_FIFO_PARAM); - tmp &= ~CLK_GATE_SETTING_BITS; - writew(tmp, host->ioaddr + SD_FIFO_PARAM); - } else { - tmp = readw(host->ioaddr + SD_FIFO_PARAM); - tmp &= ~CLK_GATE_SETTING_BITS; - tmp |= CLK_GATE_SETTING_BITS; - writew(tmp, host->ioaddr + SD_FIFO_PARAM); - } - } -} - -static int pxav2_mmc_set_width(struct sdhci_host *host, int width) -{ - u8 ctrl; - u16 tmp; - - ctrl = readb(host->ioaddr + SDHCI_HOST_CONTROL); - tmp = readw(host->ioaddr + SD_CE_ATA_2); - if (width == MMC_BUS_WIDTH_8) { - ctrl &= ~SDHCI_CTRL_4BITBUS; - tmp |= MMC_CARD | MMC_WIDTH; - } else { - tmp &= ~(MMC_CARD | MMC_WIDTH); - if (width == MMC_BUS_WIDTH_4) - ctrl |= SDHCI_CTRL_4BITBUS; - else - ctrl &= ~SDHCI_CTRL_4BITBUS; - } - writew(tmp, host->ioaddr + SD_CE_ATA_2); - writeb(ctrl, host->ioaddr + SDHCI_HOST_CONTROL); - - return 0; -} - -static u32 pxav2_get_max_clock(struct sdhci_host *host) -{ - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - - return clk_get_rate(pltfm_host->clk); -} - -static struct sdhci_ops pxav2_sdhci_ops = { - .get_max_clock = pxav2_get_max_clock, - .platform_reset_exit = pxav2_set_private_registers, - .platform_8bit_width = pxav2_mmc_set_width, -}; - -static int __devinit sdhci_pxav2_probe(struct platform_device *pdev) -{ - struct sdhci_pltfm_host *pltfm_host; - struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data; - struct device *dev = &pdev->dev; - struct sdhci_host *host = NULL; - struct sdhci_pxa *pxa = NULL; - int ret; - struct clk *clk; - - pxa = kzalloc(sizeof(struct sdhci_pxa), GFP_KERNEL); - if (!pxa) - return -ENOMEM; - - host = sdhci_pltfm_init(pdev, NULL); - if (IS_ERR(host)) { - kfree(pxa); - return PTR_ERR(host); - } - pltfm_host = sdhci_priv(host); - pltfm_host->priv = pxa; - - clk = clk_get(dev, "PXA-SDHCLK"); - if (IS_ERR(clk)) { - dev_err(dev, "failed to get io clock\n"); - ret = PTR_ERR(clk); - goto err_clk_get; - } - pltfm_host->clk = clk; - clk_enable(clk); - - host->quirks = SDHCI_QUIRK_BROKEN_ADMA - | SDHCI_QUIRK_BROKEN_TIMEOUT_VAL - | SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN; - - if (pdata) { - if (pdata->flags & PXA_FLAG_CARD_PERMANENT) { - /* on-chip device */ - host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION; - host->mmc->caps |= MMC_CAP_NONREMOVABLE; - } - - /* If slot design supports 8 bit data, indicate this to MMC. */ - if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT) - host->mmc->caps |= MMC_CAP_8_BIT_DATA; - - if (pdata->quirks) - host->quirks |= pdata->quirks; - if (pdata->host_caps) - host->mmc->caps |= pdata->host_caps; - if (pdata->pm_caps) - host->mmc->pm_caps |= pdata->pm_caps; - } - - host->ops = &pxav2_sdhci_ops; - - ret = sdhci_add_host(host); - if (ret) { - dev_err(&pdev->dev, "failed to add host\n"); - goto err_add_host; - } - - platform_set_drvdata(pdev, host); - - return 0; - -err_add_host: - clk_disable(clk); - clk_put(clk); -err_clk_get: - sdhci_pltfm_free(pdev); - kfree(pxa); - return ret; -} - -static int __devexit sdhci_pxav2_remove(struct platform_device *pdev) -{ - struct sdhci_host *host = platform_get_drvdata(pdev); - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct sdhci_pxa *pxa = pltfm_host->priv; - - sdhci_remove_host(host, 1); - - clk_disable(pltfm_host->clk); - clk_put(pltfm_host->clk); - sdhci_pltfm_free(pdev); - kfree(pxa); - - platform_set_drvdata(pdev, NULL); - - return 0; -} - -static struct platform_driver sdhci_pxav2_driver = { - .driver = { - .name = "sdhci-pxav2", - .owner = THIS_MODULE, - }, - .probe = sdhci_pxav2_probe, - .remove = __devexit_p(sdhci_pxav2_remove), -#ifdef CONFIG_PM - .suspend = sdhci_pltfm_suspend, - .resume = sdhci_pltfm_resume, -#endif -}; -static int __init sdhci_pxav2_init(void) -{ - return platform_driver_register(&sdhci_pxav2_driver); -} - -static void __exit sdhci_pxav2_exit(void) -{ - platform_driver_unregister(&sdhci_pxav2_driver); -} - -module_init(sdhci_pxav2_init); -module_exit(sdhci_pxav2_exit); - -MODULE_DESCRIPTION("SDHCI driver for pxav2"); -MODULE_AUTHOR("Marvell International Ltd."); -MODULE_LICENSE("GPL v2"); - diff --git a/trunk/drivers/mmc/host/sdhci-pxav3.c b/trunk/drivers/mmc/host/sdhci-pxav3.c deleted file mode 100644 index 4198dbbc5c20..000000000000 --- a/trunk/drivers/mmc/host/sdhci-pxav3.c +++ /dev/null @@ -1,289 +0,0 @@ -/* - * Copyright (C) 2010 Marvell International Ltd. - * Zhangfei Gao - * Kevin Wang - * Mingwei Wang - * Philip Rakity - * Mark Brown - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "sdhci.h" -#include "sdhci-pltfm.h" - -#define SD_CLOCK_BURST_SIZE_SETUP 0x10A -#define SDCLK_SEL 0x100 -#define SDCLK_DELAY_SHIFT 9 -#define SDCLK_DELAY_MASK 0x1f - -#define SD_CFG_FIFO_PARAM 0x100 -#define SDCFG_GEN_PAD_CLK_ON (1<<6) -#define SDCFG_GEN_PAD_CLK_CNT_MASK 0xFF -#define SDCFG_GEN_PAD_CLK_CNT_SHIFT 24 - -#define SD_SPI_MODE 0x108 -#define SD_CE_ATA_1 0x10C - -#define SD_CE_ATA_2 0x10E -#define SDCE_MISC_INT (1<<2) -#define SDCE_MISC_INT_EN (1<<1) - -static void pxav3_set_private_registers(struct sdhci_host *host, u8 mask) -{ - struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc)); - struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data; - - if (mask == SDHCI_RESET_ALL) { - /* - * tune timing of read data/command when crc error happen - * no performance impact - */ - if (pdata && 0 != pdata->clk_delay_cycles) { - u16 tmp; - - tmp = readw(host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP); - tmp |= (pdata->clk_delay_cycles & SDCLK_DELAY_MASK) - << SDCLK_DELAY_SHIFT; - tmp |= SDCLK_SEL; - writew(tmp, host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP); - } - } -} - -#define MAX_WAIT_COUNT 5 -static void pxav3_gen_init_74_clocks(struct sdhci_host *host, u8 power_mode) -{ - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct sdhci_pxa *pxa = pltfm_host->priv; - u16 tmp; - int count; - - if (pxa->power_mode == MMC_POWER_UP - && power_mode == MMC_POWER_ON) { - - dev_dbg(mmc_dev(host->mmc), - "%s: slot->power_mode = %d," - "ios->power_mode = %d\n", - __func__, - pxa->power_mode, - power_mode); - - /* set we want notice of when 74 clocks are sent */ - tmp = readw(host->ioaddr + SD_CE_ATA_2); - tmp |= SDCE_MISC_INT_EN; - writew(tmp, host->ioaddr + SD_CE_ATA_2); - - /* start sending the 74 clocks */ - tmp = readw(host->ioaddr + SD_CFG_FIFO_PARAM); - tmp |= SDCFG_GEN_PAD_CLK_ON; - writew(tmp, host->ioaddr + SD_CFG_FIFO_PARAM); - - /* slowest speed is about 100KHz or 10usec per clock */ - udelay(740); - count = 0; - - while (count++ < MAX_WAIT_COUNT) { - if ((readw(host->ioaddr + SD_CE_ATA_2) - & SDCE_MISC_INT) == 0) - break; - udelay(10); - } - - if (count == MAX_WAIT_COUNT) - dev_warn(mmc_dev(host->mmc), "74 clock interrupt not cleared\n"); - - /* clear the interrupt bit if posted */ - tmp = readw(host->ioaddr + SD_CE_ATA_2); - tmp |= SDCE_MISC_INT; - writew(tmp, host->ioaddr + SD_CE_ATA_2); - } - pxa->power_mode = power_mode; -} - -static int pxav3_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs) -{ - u16 ctrl_2; - - /* - * Set V18_EN -- UHS modes do not work without this. - * does not change signaling voltage - */ - ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); - - /* Select Bus Speed Mode for host */ - ctrl_2 &= ~SDHCI_CTRL_UHS_MASK; - switch (uhs) { - case MMC_TIMING_UHS_SDR12: - ctrl_2 |= SDHCI_CTRL_UHS_SDR12; - break; - case MMC_TIMING_UHS_SDR25: - ctrl_2 |= SDHCI_CTRL_UHS_SDR25; - break; - case MMC_TIMING_UHS_SDR50: - ctrl_2 |= SDHCI_CTRL_UHS_SDR50 | SDHCI_CTRL_VDD_180; - break; - case MMC_TIMING_UHS_SDR104: - ctrl_2 |= SDHCI_CTRL_UHS_SDR104 | SDHCI_CTRL_VDD_180; - break; - case MMC_TIMING_UHS_DDR50: - ctrl_2 |= SDHCI_CTRL_UHS_DDR50 | SDHCI_CTRL_VDD_180; - break; - } - - sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); - dev_dbg(mmc_dev(host->mmc), - "%s uhs = %d, ctrl_2 = %04X\n", - __func__, uhs, ctrl_2); - - return 0; -} - -static struct sdhci_ops pxav3_sdhci_ops = { - .platform_reset_exit = pxav3_set_private_registers, - .set_uhs_signaling = pxav3_set_uhs_signaling, - .platform_send_init_74_clocks = pxav3_gen_init_74_clocks, -}; - -static int __devinit sdhci_pxav3_probe(struct platform_device *pdev) -{ - struct sdhci_pltfm_host *pltfm_host; - struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data; - struct device *dev = &pdev->dev; - struct sdhci_host *host = NULL; - struct sdhci_pxa *pxa = NULL; - int ret; - struct clk *clk; - - pxa = kzalloc(sizeof(struct sdhci_pxa), GFP_KERNEL); - if (!pxa) - return -ENOMEM; - - host = sdhci_pltfm_init(pdev, NULL); - if (IS_ERR(host)) { - kfree(pxa); - return PTR_ERR(host); - } - pltfm_host = sdhci_priv(host); - pltfm_host->priv = pxa; - - clk = clk_get(dev, "PXA-SDHCLK"); - if (IS_ERR(clk)) { - dev_err(dev, "failed to get io clock\n"); - ret = PTR_ERR(clk); - goto err_clk_get; - } - pltfm_host->clk = clk; - clk_enable(clk); - - host->quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL - | SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC; - - /* enable 1/8V DDR capable */ - host->mmc->caps |= MMC_CAP_1_8V_DDR; - - if (pdata) { - if (pdata->flags & PXA_FLAG_CARD_PERMANENT) { - /* on-chip device */ - host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION; - host->mmc->caps |= MMC_CAP_NONREMOVABLE; - } - - /* If slot design supports 8 bit data, indicate this to MMC. */ - if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT) - host->mmc->caps |= MMC_CAP_8_BIT_DATA; - - if (pdata->quirks) - host->quirks |= pdata->quirks; - if (pdata->host_caps) - host->mmc->caps |= pdata->host_caps; - if (pdata->pm_caps) - host->mmc->pm_caps |= pdata->pm_caps; - } - - host->ops = &pxav3_sdhci_ops; - - ret = sdhci_add_host(host); - if (ret) { - dev_err(&pdev->dev, "failed to add host\n"); - goto err_add_host; - } - - platform_set_drvdata(pdev, host); - - return 0; - -err_add_host: - clk_disable(clk); - clk_put(clk); -err_clk_get: - sdhci_pltfm_free(pdev); - kfree(pxa); - return ret; -} - -static int __devexit sdhci_pxav3_remove(struct platform_device *pdev) -{ - struct sdhci_host *host = platform_get_drvdata(pdev); - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct sdhci_pxa *pxa = pltfm_host->priv; - - sdhci_remove_host(host, 1); - - clk_disable(pltfm_host->clk); - clk_put(pltfm_host->clk); - sdhci_pltfm_free(pdev); - kfree(pxa); - - platform_set_drvdata(pdev, NULL); - - return 0; -} - -static struct platform_driver sdhci_pxav3_driver = { - .driver = { - .name = "sdhci-pxav3", - .owner = THIS_MODULE, - }, - .probe = sdhci_pxav3_probe, - .remove = __devexit_p(sdhci_pxav3_remove), -#ifdef CONFIG_PM - .suspend = sdhci_pltfm_suspend, - .resume = sdhci_pltfm_resume, -#endif -}; -static int __init sdhci_pxav3_init(void) -{ - return platform_driver_register(&sdhci_pxav3_driver); -} - -static void __exit sdhci_pxav3_exit(void) -{ - platform_driver_unregister(&sdhci_pxav3_driver); -} - -module_init(sdhci_pxav3_init); -module_exit(sdhci_pxav3_exit); - -MODULE_DESCRIPTION("SDHCI driver for pxav3"); -MODULE_AUTHOR("Marvell International Ltd."); -MODULE_LICENSE("GPL v2"); - diff --git a/trunk/drivers/mmc/host/sdhci-s3c.c b/trunk/drivers/mmc/host/sdhci-s3c.c index 460ffaf0f6d7..69e3ee321eb5 100644 --- a/trunk/drivers/mmc/host/sdhci-s3c.c +++ b/trunk/drivers/mmc/host/sdhci-s3c.c @@ -612,14 +612,16 @@ static int sdhci_s3c_suspend(struct platform_device *dev, pm_message_t pm) { struct sdhci_host *host = platform_get_drvdata(dev); - return sdhci_suspend_host(host, pm); + sdhci_suspend_host(host, pm); + return 0; } static int sdhci_s3c_resume(struct platform_device *dev) { struct sdhci_host *host = platform_get_drvdata(dev); - return sdhci_resume_host(host); + sdhci_resume_host(host); + return 0; } #else diff --git a/trunk/drivers/mmc/host/sdhci-tegra.c b/trunk/drivers/mmc/host/sdhci-tegra.c index 18b0bd31de78..343c97edba32 100644 --- a/trunk/drivers/mmc/host/sdhci-tegra.c +++ b/trunk/drivers/mmc/host/sdhci-tegra.c @@ -24,6 +24,7 @@ #include #include +#include "sdhci.h" #include "sdhci-pltfm.h" static u32 tegra_sdhci_readl(struct sdhci_host *host, int reg) @@ -115,42 +116,20 @@ static int tegra_sdhci_8bit(struct sdhci_host *host, int bus_width) return 0; } -static struct sdhci_ops tegra_sdhci_ops = { - .get_ro = tegra_sdhci_get_ro, - .read_l = tegra_sdhci_readl, - .read_w = tegra_sdhci_readw, - .write_l = tegra_sdhci_writel, - .platform_8bit_width = tegra_sdhci_8bit, -}; - -static struct sdhci_pltfm_data sdhci_tegra_pdata = { - .quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | - SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_NO_HISPD_BIT | - SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC, - .ops = &tegra_sdhci_ops, -}; -static int __devinit sdhci_tegra_probe(struct platform_device *pdev) +static int tegra_sdhci_pltfm_init(struct sdhci_host *host, + struct sdhci_pltfm_data *pdata) { - struct sdhci_pltfm_host *pltfm_host; + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc)); struct tegra_sdhci_platform_data *plat; - struct sdhci_host *host; struct clk *clk; int rc; - host = sdhci_pltfm_init(pdev, &sdhci_tegra_pdata); - if (IS_ERR(host)) - return PTR_ERR(host); - - pltfm_host = sdhci_priv(host); - plat = pdev->dev.platform_data; - if (plat == NULL) { dev_err(mmc_dev(host->mmc), "missing platform data\n"); - rc = -ENXIO; - goto err_no_plat; + return -ENXIO; } if (gpio_is_valid(plat->power_gpio)) { @@ -158,7 +137,7 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev) if (rc) { dev_err(mmc_dev(host->mmc), "failed to allocate power gpio\n"); - goto err_power_req; + goto out; } tegra_gpio_enable(plat->power_gpio); gpio_direction_output(plat->power_gpio, 1); @@ -169,7 +148,7 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev) if (rc) { dev_err(mmc_dev(host->mmc), "failed to allocate cd gpio\n"); - goto err_cd_req; + goto out_power; } tegra_gpio_enable(plat->cd_gpio); gpio_direction_input(plat->cd_gpio); @@ -180,7 +159,7 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev) if (rc) { dev_err(mmc_dev(host->mmc), "request irq error\n"); - goto err_cd_irq_req; + goto out_cd; } } @@ -190,7 +169,7 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev) if (rc) { dev_err(mmc_dev(host->mmc), "failed to allocate wp gpio\n"); - goto err_wp_req; + goto out_irq; } tegra_gpio_enable(plat->wp_gpio); gpio_direction_input(plat->wp_gpio); @@ -200,7 +179,7 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev) if (IS_ERR(clk)) { dev_err(mmc_dev(host->mmc), "clk err\n"); rc = PTR_ERR(clk); - goto err_clk_get; + goto out_wp; } clk_enable(clk); pltfm_host->clk = clk; @@ -210,47 +189,38 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev) if (plat->is_8bit) host->mmc->caps |= MMC_CAP_8_BIT_DATA; - rc = sdhci_add_host(host); - if (rc) - goto err_add_host; - return 0; -err_add_host: - clk_disable(pltfm_host->clk); - clk_put(pltfm_host->clk); -err_clk_get: +out_wp: if (gpio_is_valid(plat->wp_gpio)) { tegra_gpio_disable(plat->wp_gpio); gpio_free(plat->wp_gpio); } -err_wp_req: + +out_irq: if (gpio_is_valid(plat->cd_gpio)) free_irq(gpio_to_irq(plat->cd_gpio), host); -err_cd_irq_req: +out_cd: if (gpio_is_valid(plat->cd_gpio)) { tegra_gpio_disable(plat->cd_gpio); gpio_free(plat->cd_gpio); } -err_cd_req: + +out_power: if (gpio_is_valid(plat->power_gpio)) { tegra_gpio_disable(plat->power_gpio); gpio_free(plat->power_gpio); } -err_power_req: -err_no_plat: - sdhci_pltfm_free(pdev); + +out: return rc; } -static int __devexit sdhci_tegra_remove(struct platform_device *pdev) +static void tegra_sdhci_pltfm_exit(struct sdhci_host *host) { - struct sdhci_host *host = platform_get_drvdata(pdev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc)); struct tegra_sdhci_platform_data *plat; - int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff); - - sdhci_remove_host(host, dead); plat = pdev->dev.platform_data; @@ -272,37 +242,22 @@ static int __devexit sdhci_tegra_remove(struct platform_device *pdev) clk_disable(pltfm_host->clk); clk_put(pltfm_host->clk); - - sdhci_pltfm_free(pdev); - - return 0; } -static struct platform_driver sdhci_tegra_driver = { - .driver = { - .name = "sdhci-tegra", - .owner = THIS_MODULE, - }, - .probe = sdhci_tegra_probe, - .remove = __devexit_p(sdhci_tegra_remove), -#ifdef CONFIG_PM - .suspend = sdhci_pltfm_suspend, - .resume = sdhci_pltfm_resume, -#endif +static struct sdhci_ops tegra_sdhci_ops = { + .get_ro = tegra_sdhci_get_ro, + .read_l = tegra_sdhci_readl, + .read_w = tegra_sdhci_readw, + .write_l = tegra_sdhci_writel, + .platform_8bit_width = tegra_sdhci_8bit, }; -static int __init sdhci_tegra_init(void) -{ - return platform_driver_register(&sdhci_tegra_driver); -} -module_init(sdhci_tegra_init); - -static void __exit sdhci_tegra_exit(void) -{ - platform_driver_unregister(&sdhci_tegra_driver); -} -module_exit(sdhci_tegra_exit); - -MODULE_DESCRIPTION("SDHCI driver for Tegra"); -MODULE_AUTHOR(" Google, Inc."); -MODULE_LICENSE("GPL v2"); +struct sdhci_pltfm_data sdhci_tegra_pdata = { + .quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | + SDHCI_QUIRK_SINGLE_POWER_WRITE | + SDHCI_QUIRK_NO_HISPD_BIT | + SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC, + .ops = &tegra_sdhci_ops, + .init = tegra_sdhci_pltfm_init, + .exit = tegra_sdhci_pltfm_exit, +}; diff --git a/trunk/drivers/mmc/host/sdhci.c b/trunk/drivers/mmc/host/sdhci.c index c31a3343340d..58d5436ff649 100644 --- a/trunk/drivers/mmc/host/sdhci.c +++ b/trunk/drivers/mmc/host/sdhci.c @@ -127,15 +127,11 @@ static void sdhci_mask_irqs(struct sdhci_host *host, u32 irqs) static void sdhci_set_card_detection(struct sdhci_host *host, bool enable) { - u32 present, irqs; + u32 irqs = SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT; if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) return; - present = sdhci_readl(host, SDHCI_PRESENT_STATE) & - SDHCI_CARD_PRESENT; - irqs = present ? SDHCI_INT_CARD_REMOVE : SDHCI_INT_CARD_INSERT; - if (enable) sdhci_unmask_irqs(host, irqs); else @@ -2158,30 +2154,13 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id) mmc_hostname(host->mmc), intmask); if (intmask & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) { - u32 present = sdhci_readl(host, SDHCI_PRESENT_STATE) & - SDHCI_CARD_PRESENT; - - /* - * There is a observation on i.mx esdhc. INSERT bit will be - * immediately set again when it gets cleared, if a card is - * inserted. We have to mask the irq to prevent interrupt - * storm which will freeze the system. And the REMOVE gets - * the same situation. - * - * More testing are needed here to ensure it works for other - * platforms though. - */ - sdhci_mask_irqs(host, present ? SDHCI_INT_CARD_INSERT : - SDHCI_INT_CARD_REMOVE); - sdhci_unmask_irqs(host, present ? SDHCI_INT_CARD_REMOVE : - SDHCI_INT_CARD_INSERT); - sdhci_writel(host, intmask & (SDHCI_INT_CARD_INSERT | - SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS); - intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE); + SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS); tasklet_schedule(&host->card_tasklet); } + intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE); + if (intmask & SDHCI_INT_CMD_MASK) { sdhci_writel(host, intmask & SDHCI_INT_CMD_MASK, SDHCI_INT_STATUS); @@ -2509,11 +2488,6 @@ int sdhci_add_host(struct sdhci_host *host) } else mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200; - if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK) - mmc->max_discard_to = (1 << 27) / (mmc->f_max / 1000); - else - mmc->max_discard_to = (1 << 27) / host->timeout_clk; - mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23; if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12) diff --git a/trunk/drivers/mmc/host/sh_mmcif.c b/trunk/drivers/mmc/host/sh_mmcif.c index 557886bee9ce..14f8edbaa195 100644 --- a/trunk/drivers/mmc/host/sh_mmcif.c +++ b/trunk/drivers/mmc/host/sh_mmcif.c @@ -175,7 +175,6 @@ struct sh_mmcif_host { enum mmcif_state state; spinlock_t lock; bool power; - bool card_present; /* DMA support */ struct dma_chan *chan_rx; @@ -878,23 +877,23 @@ static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) spin_unlock_irqrestore(&host->lock, flags); if (ios->power_mode == MMC_POWER_UP) { - if (!host->card_present) { + if (p->set_pwr) + p->set_pwr(host->pd, ios->power_mode); + if (!host->power) { /* See if we also get DMA */ sh_mmcif_request_dma(host, host->pd->dev.platform_data); - host->card_present = true; + pm_runtime_get_sync(&host->pd->dev); + host->power = true; } } else if (ios->power_mode == MMC_POWER_OFF || !ios->clock) { /* clock stop */ sh_mmcif_clock_control(host, 0); if (ios->power_mode == MMC_POWER_OFF) { - if (host->card_present) { + if (host->power) { + pm_runtime_put(&host->pd->dev); sh_mmcif_release_dma(host); - host->card_present = false; + host->power = false; } - } - if (host->power) { - pm_runtime_put(&host->pd->dev); - host->power = false; if (p->down_pwr) p->down_pwr(host->pd); } @@ -902,16 +901,8 @@ static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) return; } - if (ios->clock) { - if (!host->power) { - if (p->set_pwr) - p->set_pwr(host->pd, ios->power_mode); - pm_runtime_get_sync(&host->pd->dev); - host->power = true; - sh_mmcif_sync_reset(host); - } + if (ios->clock) sh_mmcif_clock_control(host, ios->clock); - } host->bus_width = ios->bus_width; host->state = STATE_IDLE; diff --git a/trunk/drivers/mmc/host/sh_mobile_sdhi.c b/trunk/drivers/mmc/host/sh_mobile_sdhi.c index 774f6439d7ce..ce500f03df85 100644 --- a/trunk/drivers/mmc/host/sh_mobile_sdhi.c +++ b/trunk/drivers/mmc/host/sh_mobile_sdhi.c @@ -26,7 +26,6 @@ #include #include #include -#include #include "tmio_mmc.h" @@ -56,39 +55,6 @@ static int sh_mobile_sdhi_get_cd(struct platform_device *pdev) return -ENOSYS; } -static int sh_mobile_sdhi_wait_idle(struct tmio_mmc_host *host) -{ - int timeout = 1000; - - while (--timeout && !(sd_ctrl_read16(host, CTL_STATUS2) & (1 << 13))) - udelay(1); - - if (!timeout) { - dev_warn(host->pdata->dev, "timeout waiting for SD bus idle\n"); - return -EBUSY; - } - - return 0; -} - -static int sh_mobile_sdhi_write16_hook(struct tmio_mmc_host *host, int addr) -{ - switch (addr) - { - case CTL_SD_CMD: - case CTL_STOP_INTERNAL_ACTION: - case CTL_XFER_BLK_COUNT: - case CTL_SD_CARD_CLK_CTL: - case CTL_SD_XFER_LEN: - case CTL_SD_MEM_CARD_OPT: - case CTL_TRANSACTION_CTL: - case CTL_DMA_ENABLE: - return sh_mobile_sdhi_wait_idle(host); - } - - return 0; -} - static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev) { struct sh_mobile_sdhi *priv; @@ -120,8 +86,6 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev) mmc_data->hclk = clk_get_rate(priv->clk); mmc_data->set_pwr = sh_mobile_sdhi_set_pwr; mmc_data->get_cd = sh_mobile_sdhi_get_cd; - if (mmc_data->flags & TMIO_MMC_HAS_IDLE_WAIT) - mmc_data->write16_hook = sh_mobile_sdhi_write16_hook; mmc_data->capabilities = MMC_CAP_MMC_HIGHSPEED; if (p) { mmc_data->flags = p->tmio_flags; diff --git a/trunk/drivers/mmc/host/tmio_mmc.h b/trunk/drivers/mmc/host/tmio_mmc.h index 087d88023ba1..8260bc2c34e3 100644 --- a/trunk/drivers/mmc/host/tmio_mmc.h +++ b/trunk/drivers/mmc/host/tmio_mmc.h @@ -18,7 +18,6 @@ #include #include -#include #include #include @@ -53,8 +52,6 @@ struct tmio_mmc_host { void (*set_clk_div)(struct platform_device *host, int state); int pm_error; - /* recognise system-wide suspend in runtime PM methods */ - bool pm_global; /* pio related stuff */ struct scatterlist *sg_ptr; @@ -76,11 +73,8 @@ struct tmio_mmc_host { /* Track lost interrupts */ struct delayed_work delayed_reset_work; - struct work_struct done; - - spinlock_t lock; /* protect host private data */ + spinlock_t lock; unsigned long last_req_ts; - struct mutex ios_lock; /* protect set_ios() context */ }; int tmio_mmc_host_probe(struct tmio_mmc_host **host, @@ -109,7 +103,6 @@ static inline void tmio_mmc_kunmap_atomic(struct scatterlist *sg, #if defined(CONFIG_MMC_SDHI) || defined(CONFIG_MMC_SDHI_MODULE) void tmio_mmc_start_dma(struct tmio_mmc_host *host, struct mmc_data *data); -void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable); void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdata); void tmio_mmc_release_dma(struct tmio_mmc_host *host); #else @@ -118,10 +111,6 @@ static inline void tmio_mmc_start_dma(struct tmio_mmc_host *host, { } -static inline void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable) -{ -} - static inline void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdata) { @@ -145,44 +134,4 @@ int tmio_mmc_host_resume(struct device *dev); int tmio_mmc_host_runtime_suspend(struct device *dev); int tmio_mmc_host_runtime_resume(struct device *dev); -static inline u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr) -{ - return readw(host->ctl + (addr << host->bus_shift)); -} - -static inline void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr, - u16 *buf, int count) -{ - readsw(host->ctl + (addr << host->bus_shift), buf, count); -} - -static inline u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr) -{ - return readw(host->ctl + (addr << host->bus_shift)) | - readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16; -} - -static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, u16 val) -{ - /* If there is a hook and it returns non-zero then there - * is an error and the write should be skipped - */ - if (host->pdata->write16_hook && host->pdata->write16_hook(host, addr)) - return; - writew(val, host->ctl + (addr << host->bus_shift)); -} - -static inline void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr, - u16 *buf, int count) -{ - writesw(host->ctl + (addr << host->bus_shift), buf, count); -} - -static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr, u32 val) -{ - writew(val, host->ctl + (addr << host->bus_shift)); - writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift)); -} - - #endif diff --git a/trunk/drivers/mmc/host/tmio_mmc_dma.c b/trunk/drivers/mmc/host/tmio_mmc_dma.c index 2aa616dec32d..25f1ad6cbe09 100644 --- a/trunk/drivers/mmc/host/tmio_mmc_dma.c +++ b/trunk/drivers/mmc/host/tmio_mmc_dma.c @@ -22,14 +22,11 @@ #define TMIO_MMC_MIN_DMA_LEN 8 -void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable) +static void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable) { - if (!host->chan_tx || !host->chan_rx) - return; - #if defined(CONFIG_SUPERH) || defined(CONFIG_ARCH_SHMOBILE) /* Switch DMA mode on or off - SuperH specific? */ - sd_ctrl_write16(host, CTL_DMA_ENABLE, enable ? 2 : 0); + writew(enable ? 2 : 0, host->ctl + (0xd8 << host->bus_shift)); #endif } diff --git a/trunk/drivers/mmc/host/tmio_mmc_pio.c b/trunk/drivers/mmc/host/tmio_mmc_pio.c index 1f16357e7301..0b09e8239aa0 100644 --- a/trunk/drivers/mmc/host/tmio_mmc_pio.c +++ b/trunk/drivers/mmc/host/tmio_mmc_pio.c @@ -46,6 +46,40 @@ #include "tmio_mmc.h" +static u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr) +{ + return readw(host->ctl + (addr << host->bus_shift)); +} + +static void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr, + u16 *buf, int count) +{ + readsw(host->ctl + (addr << host->bus_shift), buf, count); +} + +static u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr) +{ + return readw(host->ctl + (addr << host->bus_shift)) | + readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16; +} + +static void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, u16 val) +{ + writew(val, host->ctl + (addr << host->bus_shift)); +} + +static void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr, + u16 *buf, int count) +{ + writesw(host->ctl + (addr << host->bus_shift), buf, count); +} + +static void sd_ctrl_write32(struct tmio_mmc_host *host, int addr, u32 val) +{ + writew(val, host->ctl + (addr << host->bus_shift)); + writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift)); +} + void tmio_mmc_enable_mmc_irqs(struct tmio_mmc_host *host, u32 i) { u32 mask = sd_ctrl_read32(host, CTL_IRQ_MASK) & ~(i & TMIO_MASK_IRQ); @@ -250,16 +284,10 @@ static void tmio_mmc_reset_work(struct work_struct *work) /* called with host->lock held, interrupts disabled */ static void tmio_mmc_finish_request(struct tmio_mmc_host *host) { - struct mmc_request *mrq; - unsigned long flags; - - spin_lock_irqsave(&host->lock, flags); + struct mmc_request *mrq = host->mrq; - mrq = host->mrq; - if (IS_ERR_OR_NULL(mrq)) { - spin_unlock_irqrestore(&host->lock, flags); + if (!mrq) return; - } host->cmd = NULL; host->data = NULL; @@ -268,18 +296,11 @@ static void tmio_mmc_finish_request(struct tmio_mmc_host *host) cancel_delayed_work(&host->delayed_reset_work); host->mrq = NULL; - spin_unlock_irqrestore(&host->lock, flags); + /* FIXME: mmc_request_done() can schedule! */ mmc_request_done(host->mmc, mrq); } -static void tmio_mmc_done_work(struct work_struct *work) -{ - struct tmio_mmc_host *host = container_of(work, struct tmio_mmc_host, - done); - tmio_mmc_finish_request(host); -} - /* These are the bitmasks the tmio chip requires to implement the MMC response * types. Note that R1 and R6 are the same in this scheme. */ #define APP_CMD 0x0040 @@ -446,7 +467,7 @@ void tmio_mmc_do_data_irq(struct tmio_mmc_host *host) BUG(); } - schedule_work(&host->done); + tmio_mmc_finish_request(host); } static void tmio_mmc_data_irq(struct tmio_mmc_host *host) @@ -536,7 +557,7 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host, tasklet_schedule(&host->dma_issue); } } else { - schedule_work(&host->done); + tmio_mmc_finish_request(host); } out: @@ -546,7 +567,6 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host, irqreturn_t tmio_mmc_irq(int irq, void *devid) { struct tmio_mmc_host *host = devid; - struct mmc_host *mmc = host->mmc; struct tmio_mmc_data *pdata = host->pdata; unsigned int ireg, irq_mask, status; unsigned int sdio_ireg, sdio_irq_mask, sdio_status; @@ -568,13 +588,13 @@ irqreturn_t tmio_mmc_irq(int irq, void *devid) if (sdio_ireg && !host->sdio_irq_enabled) { pr_warning("tmio_mmc: Spurious SDIO IRQ, disabling! 0x%04x 0x%04x 0x%04x\n", sdio_status, sdio_irq_mask, sdio_ireg); - tmio_mmc_enable_sdio_irq(mmc, 0); + tmio_mmc_enable_sdio_irq(host->mmc, 0); goto out; } - if (mmc->caps & MMC_CAP_SDIO_IRQ && + if (host->mmc->caps & MMC_CAP_SDIO_IRQ && sdio_ireg & TMIO_SDIO_STAT_IOIRQ) - mmc_signal_sdio_irq(mmc); + mmc_signal_sdio_irq(host->mmc); if (sdio_ireg) goto out; @@ -583,49 +603,58 @@ irqreturn_t tmio_mmc_irq(int irq, void *devid) pr_debug_status(status); pr_debug_status(ireg); - /* Card insert / remove attempts */ - if (ireg & (TMIO_STAT_CARD_INSERT | TMIO_STAT_CARD_REMOVE)) { - tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_CARD_INSERT | - TMIO_STAT_CARD_REMOVE); - if ((((ireg & TMIO_STAT_CARD_REMOVE) && mmc->card) || - ((ireg & TMIO_STAT_CARD_INSERT) && !mmc->card)) && - !work_pending(&mmc->detect.work)) - mmc_detect_change(host->mmc, msecs_to_jiffies(100)); + if (!ireg) { + tmio_mmc_disable_mmc_irqs(host, status & ~irq_mask); + + pr_warning("tmio_mmc: Spurious irq, disabling! " + "0x%08x 0x%08x 0x%08x\n", status, irq_mask, ireg); + pr_debug_status(status); + goto out; } - /* CRC and other errors */ -/* if (ireg & TMIO_STAT_ERR_IRQ) - * handled |= tmio_error_irq(host, irq, stat); + while (ireg) { + /* Card insert / remove attempts */ + if (ireg & (TMIO_STAT_CARD_INSERT | TMIO_STAT_CARD_REMOVE)) { + tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_CARD_INSERT | + TMIO_STAT_CARD_REMOVE); + mmc_detect_change(host->mmc, msecs_to_jiffies(100)); + } + + /* CRC and other errors */ +/* if (ireg & TMIO_STAT_ERR_IRQ) + * handled |= tmio_error_irq(host, irq, stat); */ - /* Command completion */ - if (ireg & (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT)) { - tmio_mmc_ack_mmc_irqs(host, - TMIO_STAT_CMDRESPEND | - TMIO_STAT_CMDTIMEOUT); - tmio_mmc_cmd_irq(host, status); - goto out; - } + /* Command completion */ + if (ireg & (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT)) { + tmio_mmc_ack_mmc_irqs(host, + TMIO_STAT_CMDRESPEND | + TMIO_STAT_CMDTIMEOUT); + tmio_mmc_cmd_irq(host, status); + } - /* Data transfer */ - if (ireg & (TMIO_STAT_RXRDY | TMIO_STAT_TXRQ)) { - tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_RXRDY | TMIO_STAT_TXRQ); - tmio_mmc_pio_irq(host); - goto out; - } + /* Data transfer */ + if (ireg & (TMIO_STAT_RXRDY | TMIO_STAT_TXRQ)) { + tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_RXRDY | TMIO_STAT_TXRQ); + tmio_mmc_pio_irq(host); + } - /* Data transfer completion */ - if (ireg & TMIO_STAT_DATAEND) { - tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_DATAEND); - tmio_mmc_data_irq(host); - goto out; - } + /* Data transfer completion */ + if (ireg & TMIO_STAT_DATAEND) { + tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_DATAEND); + tmio_mmc_data_irq(host); + } - pr_warning("tmio_mmc: Spurious irq, disabling! " - "0x%08x 0x%08x 0x%08x\n", status, irq_mask, ireg); - pr_debug_status(status); - tmio_mmc_disable_mmc_irqs(host, status & ~irq_mask); + /* Check status - keep going until we've handled it all */ + status = sd_ctrl_read32(host, CTL_STATUS); + irq_mask = sd_ctrl_read32(host, CTL_IRQ_MASK); + ireg = status & TMIO_MASK_IRQ & ~irq_mask; + + pr_debug("Status at end of loop: %08x\n", status); + pr_debug_status(status); + } + pr_debug("MMC IRQ end\n"); out: return IRQ_HANDLED; @@ -720,8 +749,6 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) struct tmio_mmc_data *pdata = host->pdata; unsigned long flags; - mutex_lock(&host->ios_lock); - spin_lock_irqsave(&host->lock, flags); if (host->mrq) { if (IS_ERR(host->mrq)) { @@ -737,8 +764,6 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->mrq->cmd->opcode, host->last_req_ts, jiffies); } spin_unlock_irqrestore(&host->lock, flags); - - mutex_unlock(&host->ios_lock); return; } @@ -746,30 +771,33 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) spin_unlock_irqrestore(&host->lock, flags); - /* - * pdata->power == false only if COLD_CD is available, otherwise only - * in short time intervals during probing or resuming - */ - if (ios->power_mode == MMC_POWER_ON && ios->clock) { - if (!pdata->power) { + if (ios->clock) + tmio_mmc_set_clock(host, ios->clock); + + /* Power sequence - OFF -> UP -> ON */ + if (ios->power_mode == MMC_POWER_UP) { + if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) && !pdata->power) { pm_runtime_get_sync(&host->pdev->dev); pdata->power = true; } - tmio_mmc_set_clock(host, ios->clock); /* power up SD bus */ if (host->set_pwr) host->set_pwr(host->pdev, 1); - /* start bus clock */ - tmio_mmc_clk_start(host); - } else if (ios->power_mode != MMC_POWER_UP) { - if (host->set_pwr) - host->set_pwr(host->pdev, 0); - if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) && - pdata->power) { - pdata->power = false; - pm_runtime_put(&host->pdev->dev); + } else if (ios->power_mode == MMC_POWER_OFF || !ios->clock) { + /* power down SD bus */ + if (ios->power_mode == MMC_POWER_OFF) { + if (host->set_pwr) + host->set_pwr(host->pdev, 0); + if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) && + pdata->power) { + pdata->power = false; + pm_runtime_put(&host->pdev->dev); + } } tmio_mmc_clk_stop(host); + } else { + /* start bus clock */ + tmio_mmc_clk_start(host); } switch (ios->bus_width) { @@ -789,8 +817,6 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) current->comm, task_pid_nr(current), ios->clock, ios->power_mode); host->mrq = NULL; - - mutex_unlock(&host->ios_lock); } static int tmio_mmc_get_ro(struct mmc_host *mmc) @@ -887,20 +913,16 @@ int __devinit tmio_mmc_host_probe(struct tmio_mmc_host **host, tmio_mmc_enable_sdio_irq(mmc, 0); spin_lock_init(&_host->lock); - mutex_init(&_host->ios_lock); /* Init delayed work for request timeouts */ INIT_DELAYED_WORK(&_host->delayed_reset_work, tmio_mmc_reset_work); - INIT_WORK(&_host->done, tmio_mmc_done_work); /* See if we also get DMA */ tmio_mmc_request_dma(_host, pdata); /* We have to keep the device powered for its card detection to work */ - if (!(pdata->flags & TMIO_MMC_HAS_COLD_CD)) { - pdata->power = true; + if (!(pdata->flags & TMIO_MMC_HAS_COLD_CD)) pm_runtime_get_noresume(&pdev->dev); - } mmc_add_host(mmc); @@ -941,7 +963,6 @@ void tmio_mmc_host_remove(struct tmio_mmc_host *host) pm_runtime_get_sync(&pdev->dev); mmc_remove_host(host->mmc); - cancel_work_sync(&host->done); cancel_delayed_work_sync(&host->delayed_reset_work); tmio_mmc_release_dma(host); @@ -977,16 +998,11 @@ int tmio_mmc_host_resume(struct device *dev) /* The MMC core will perform the complete set up */ host->pdata->power = false; - host->pm_global = true; if (!host->pm_error) pm_runtime_get_sync(dev); - if (host->pm_global) { - /* Runtime PM resume callback didn't run */ - tmio_mmc_reset(host); - tmio_mmc_enable_dma(host, true); - host->pm_global = false; - } + tmio_mmc_reset(mmc_priv(mmc)); + tmio_mmc_request_dma(host, host->pdata); return mmc_resume_host(mmc); } @@ -1007,15 +1023,12 @@ int tmio_mmc_host_runtime_resume(struct device *dev) struct tmio_mmc_data *pdata = host->pdata; tmio_mmc_reset(host); - tmio_mmc_enable_dma(host, true); if (pdata->power) { /* Only entered after a card-insert interrupt */ - if (!mmc->card) - tmio_mmc_set_ios(mmc, &mmc->ios); + tmio_mmc_set_ios(mmc, &mmc->ios); mmc_detect_change(mmc, msecs_to_jiffies(100)); } - host->pm_global = false; return 0; } diff --git a/trunk/drivers/mtd/ubi/build.c b/trunk/drivers/mtd/ubi/build.c index 6c3fb5ab20f5..65626c1c446d 100644 --- a/trunk/drivers/mtd/ubi/build.c +++ b/trunk/drivers/mtd/ubi/build.c @@ -953,14 +953,10 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) if (!ubi->peb_buf2) goto out_free; - err = ubi_debugging_init_dev(ubi); - if (err) - goto out_free; - err = attach_by_scanning(ubi); if (err) { dbg_err("failed to attach by scanning, error %d", err); - goto out_debugging; + goto out_free; } if (ubi->autoresize_vol_id != -1) { @@ -973,16 +969,12 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) if (err) goto out_detach; - err = ubi_debugfs_init_dev(ubi); - if (err) - goto out_uif; - ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name); if (IS_ERR(ubi->bgt_thread)) { err = PTR_ERR(ubi->bgt_thread); ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name, err); - goto out_debugfs; + goto out_uif; } ubi_msg("attached mtd%d to ubi%d", mtd->index, ubi_num); @@ -1016,18 +1008,12 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL); return ubi_num; -out_debugfs: - ubi_debugfs_exit_dev(ubi); out_uif: - get_device(&ubi->dev); - ubi_assert(ref); uif_close(ubi); out_detach: ubi_wl_close(ubi); free_internal_volumes(ubi); vfree(ubi->vtbl); -out_debugging: - ubi_debugging_exit_dev(ubi); out_free: vfree(ubi->peb_buf1); vfree(ubi->peb_buf2); @@ -1094,13 +1080,11 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) */ get_device(&ubi->dev); - ubi_debugfs_exit_dev(ubi); uif_close(ubi); ubi_wl_close(ubi); free_internal_volumes(ubi); vfree(ubi->vtbl); put_mtd_device(ubi->mtd); - ubi_debugging_exit_dev(ubi); vfree(ubi->peb_buf1); vfree(ubi->peb_buf2); ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num); @@ -1215,11 +1199,6 @@ static int __init ubi_init(void) if (!ubi_wl_entry_slab) goto out_dev_unreg; - err = ubi_debugfs_init(); - if (err) - goto out_slab; - - /* Attach MTD devices */ for (i = 0; i < mtd_devs; i++) { struct mtd_dev_param *p = &mtd_dev_param[i]; @@ -1268,8 +1247,6 @@ static int __init ubi_init(void) ubi_detach_mtd_dev(ubi_devices[k]->ubi_num, 1); mutex_unlock(&ubi_devices_mutex); } - ubi_debugfs_exit(); -out_slab: kmem_cache_destroy(ubi_wl_entry_slab); out_dev_unreg: misc_deregister(&ubi_ctrl_cdev); @@ -1293,7 +1270,6 @@ static void __exit ubi_exit(void) ubi_detach_mtd_dev(ubi_devices[i]->ubi_num, 1); mutex_unlock(&ubi_devices_mutex); } - ubi_debugfs_exit(); kmem_cache_destroy(ubi_wl_entry_slab); misc_deregister(&ubi_ctrl_cdev); class_remove_file(ubi_class, &ubi_version); diff --git a/trunk/drivers/mtd/ubi/debug.c b/trunk/drivers/mtd/ubi/debug.c index ab80c0debac8..2224cbe41ddf 100644 --- a/trunk/drivers/mtd/ubi/debug.c +++ b/trunk/drivers/mtd/ubi/debug.c @@ -27,9 +27,17 @@ #ifdef CONFIG_MTD_UBI_DEBUG #include "ubi.h" -#include -#include #include +#include + +unsigned int ubi_chk_flags; +unsigned int ubi_tst_flags; + +module_param_named(debug_chks, ubi_chk_flags, uint, S_IRUGO | S_IWUSR); +module_param_named(debug_tsts, ubi_chk_flags, uint, S_IRUGO | S_IWUSR); + +MODULE_PARM_DESC(debug_chks, "Debug check flags"); +MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); /** * ubi_dbg_dump_ec_hdr - dump an erase counter header. @@ -231,261 +239,4 @@ void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len) return; } -/** - * ubi_debugging_init_dev - initialize debugging for an UBI device. - * @ubi: UBI device description object - * - * This function initializes debugging-related data for UBI device @ubi. - * Returns zero in case of success and a negative error code in case of - * failure. - */ -int ubi_debugging_init_dev(struct ubi_device *ubi) -{ - ubi->dbg = kzalloc(sizeof(struct ubi_debug_info), GFP_KERNEL); - if (!ubi->dbg) - return -ENOMEM; - - return 0; -} - -/** - * ubi_debugging_exit_dev - free debugging data for an UBI device. - * @ubi: UBI device description object - */ -void ubi_debugging_exit_dev(struct ubi_device *ubi) -{ - kfree(ubi->dbg); -} - -/* - * Root directory for UBI stuff in debugfs. Contains sub-directories which - * contain the stuff specific to particular UBI devices. - */ -static struct dentry *dfs_rootdir; - -/** - * ubi_debugfs_init - create UBI debugfs directory. - * - * Create UBI debugfs directory. Returns zero in case of success and a negative - * error code in case of failure. - */ -int ubi_debugfs_init(void) -{ - dfs_rootdir = debugfs_create_dir("ubi", NULL); - if (IS_ERR_OR_NULL(dfs_rootdir)) { - int err = dfs_rootdir ? -ENODEV : PTR_ERR(dfs_rootdir); - - ubi_err("cannot create \"ubi\" debugfs directory, error %d\n", - err); - return err; - } - - return 0; -} - -/** - * ubi_debugfs_exit - remove UBI debugfs directory. - */ -void ubi_debugfs_exit(void) -{ - debugfs_remove(dfs_rootdir); -} - -/* Read an UBI debugfs file */ -static ssize_t dfs_file_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - unsigned long ubi_num = (unsigned long)file->private_data; - struct dentry *dent = file->f_path.dentry; - struct ubi_device *ubi; - struct ubi_debug_info *d; - char buf[3]; - int val; - - ubi = ubi_get_device(ubi_num); - if (!ubi) - return -ENODEV; - d = ubi->dbg; - - if (dent == d->dfs_chk_gen) - val = d->chk_gen; - else if (dent == d->dfs_chk_io) - val = d->chk_io; - else if (dent == d->dfs_disable_bgt) - val = d->disable_bgt; - else if (dent == d->dfs_emulate_bitflips) - val = d->emulate_bitflips; - else if (dent == d->dfs_emulate_io_failures) - val = d->emulate_io_failures; - else { - count = -EINVAL; - goto out; - } - - if (val) - buf[0] = '1'; - else - buf[0] = '0'; - buf[1] = '\n'; - buf[2] = 0x00; - - count = simple_read_from_buffer(user_buf, count, ppos, buf, 2); - -out: - ubi_put_device(ubi); - return count; -} - -/* Write an UBI debugfs file */ -static ssize_t dfs_file_write(struct file *file, const char __user *user_buf, - size_t count, loff_t *ppos) -{ - unsigned long ubi_num = (unsigned long)file->private_data; - struct dentry *dent = file->f_path.dentry; - struct ubi_device *ubi; - struct ubi_debug_info *d; - size_t buf_size; - char buf[8]; - int val; - - ubi = ubi_get_device(ubi_num); - if (!ubi) - return -ENODEV; - d = ubi->dbg; - - buf_size = min_t(size_t, count, (sizeof(buf) - 1)); - if (copy_from_user(buf, user_buf, buf_size)) { - count = -EFAULT; - goto out; - } - - if (buf[0] == '1') - val = 1; - else if (buf[0] == '0') - val = 0; - else { - count = -EINVAL; - goto out; - } - - if (dent == d->dfs_chk_gen) - d->chk_gen = val; - else if (dent == d->dfs_chk_io) - d->chk_io = val; - else if (dent == d->dfs_disable_bgt) - d->disable_bgt = val; - else if (dent == d->dfs_emulate_bitflips) - d->emulate_bitflips = val; - else if (dent == d->dfs_emulate_io_failures) - d->emulate_io_failures = val; - else - count = -EINVAL; - -out: - ubi_put_device(ubi); - return count; -} - -static int default_open(struct inode *inode, struct file *file) -{ - if (inode->i_private) - file->private_data = inode->i_private; - - return 0; -} - -/* File operations for all UBI debugfs files */ -static const struct file_operations dfs_fops = { - .read = dfs_file_read, - .write = dfs_file_write, - .open = default_open, - .llseek = no_llseek, - .owner = THIS_MODULE, -}; - -/** - * ubi_debugfs_init_dev - initialize debugfs for an UBI device. - * @ubi: UBI device description object - * - * This function creates all debugfs files for UBI device @ubi. Returns zero in - * case of success and a negative error code in case of failure. - */ -int ubi_debugfs_init_dev(struct ubi_device *ubi) -{ - int err, n; - unsigned long ubi_num = ubi->ubi_num; - const char *fname; - struct dentry *dent; - struct ubi_debug_info *d = ubi->dbg; - - n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN + 1, UBI_DFS_DIR_NAME, - ubi->ubi_num); - if (n == UBI_DFS_DIR_LEN) { - /* The array size is too small */ - fname = UBI_DFS_DIR_NAME; - dent = ERR_PTR(-EINVAL); - goto out; - } - - fname = d->dfs_dir_name; - dent = debugfs_create_dir(fname, dfs_rootdir); - if (IS_ERR_OR_NULL(dent)) - goto out; - d->dfs_dir = dent; - - fname = "chk_gen"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_gen = dent; - - fname = "chk_io"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_io = dent; - - fname = "tst_disable_bgt"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_disable_bgt = dent; - - fname = "tst_emulate_bitflips"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_emulate_bitflips = dent; - - fname = "tst_emulate_io_failures"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_emulate_io_failures = dent; - - return 0; - -out_remove: - debugfs_remove_recursive(d->dfs_dir); -out: - err = dent ? PTR_ERR(dent) : -ENODEV; - ubi_err("cannot create \"%s\" debugfs file or directory, error %d\n", - fname, err); - return err; -} - -/** - * dbg_debug_exit_dev - free all debugfs files corresponding to device @ubi - * @ubi: UBI device description object - */ -void ubi_debugfs_exit_dev(struct ubi_device *ubi) -{ - debugfs_remove_recursive(ubi->dbg->dfs_dir); -} - #endif /* CONFIG_MTD_UBI_DEBUG */ diff --git a/trunk/drivers/mtd/ubi/debug.h b/trunk/drivers/mtd/ubi/debug.h index 65b5b76cc379..3f1a09c5c438 100644 --- a/trunk/drivers/mtd/ubi/debug.h +++ b/trunk/drivers/mtd/ubi/debug.h @@ -21,6 +21,14 @@ #ifndef __UBI_DEBUG_H__ #define __UBI_DEBUG_H__ +struct ubi_ec_hdr; +struct ubi_vid_hdr; +struct ubi_volume; +struct ubi_vtbl_record; +struct ubi_scan_volume; +struct ubi_scan_leb; +struct ubi_mkvol_req; + #ifdef CONFIG_MTD_UBI_DEBUG #include @@ -63,103 +71,86 @@ void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv); void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type); void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req); void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len); + +extern unsigned int ubi_chk_flags; + +/* + * Debugging check flags. + * + * UBI_CHK_GEN: general checks + * UBI_CHK_IO: check writes and erases + */ +enum { + UBI_CHK_GEN = 0x1, + UBI_CHK_IO = 0x2, +}; + int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len); int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, int len); -int ubi_debugging_init_dev(struct ubi_device *ubi); -void ubi_debugging_exit_dev(struct ubi_device *ubi); -int ubi_debugfs_init(void); -void ubi_debugfs_exit(void); -int ubi_debugfs_init_dev(struct ubi_device *ubi); -void ubi_debugfs_exit_dev(struct ubi_device *ubi); -/* - * The UBI debugfs directory name pattern and maximum name length (3 for "ubi" - * + 2 for the number plus 1 for the trailing zero byte. - */ -#define UBI_DFS_DIR_NAME "ubi%d" -#define UBI_DFS_DIR_LEN (3 + 2 + 1) +extern unsigned int ubi_tst_flags; -/** - * struct ubi_debug_info - debugging information for an UBI device. +/* + * Special testing flags. * - * @chk_gen: if UBI general extra checks are enabled - * @chk_io: if UBI I/O extra checks are enabled - * @disable_bgt: disable the background task for testing purposes - * @emulate_bitflips: emulate bit-flips for testing purposes - * @emulate_io_failures: emulate write/erase failures for testing purposes - * @dfs_dir_name: name of debugfs directory containing files of this UBI device - * @dfs_dir: direntry object of the UBI device debugfs directory - * @dfs_chk_gen: debugfs knob to enable UBI general extra checks - * @dfs_chk_io: debugfs knob to enable UBI I/O extra checks - * @dfs_disable_bgt: debugfs knob to disable the background task - * @dfs_emulate_bitflips: debugfs knob to emulate bit-flips - * @dfs_emulate_io_failures: debugfs knob to emulate write/erase failures + * UBIFS_TST_DISABLE_BGT: disable the background thread + * UBI_TST_EMULATE_BITFLIPS: emulate bit-flips + * UBI_TST_EMULATE_WRITE_FAILURES: emulate write failures + * UBI_TST_EMULATE_ERASE_FAILURES: emulate erase failures */ -struct ubi_debug_info { - unsigned int chk_gen:1; - unsigned int chk_io:1; - unsigned int disable_bgt:1; - unsigned int emulate_bitflips:1; - unsigned int emulate_io_failures:1; - char dfs_dir_name[UBI_DFS_DIR_LEN + 1]; - struct dentry *dfs_dir; - struct dentry *dfs_chk_gen; - struct dentry *dfs_chk_io; - struct dentry *dfs_disable_bgt; - struct dentry *dfs_emulate_bitflips; - struct dentry *dfs_emulate_io_failures; +enum { + UBI_TST_DISABLE_BGT = 0x1, + UBI_TST_EMULATE_BITFLIPS = 0x2, + UBI_TST_EMULATE_WRITE_FAILURES = 0x4, + UBI_TST_EMULATE_ERASE_FAILURES = 0x8, }; /** * ubi_dbg_is_bgt_disabled - if the background thread is disabled. - * @ubi: UBI device description object * * Returns non-zero if the UBI background thread is disabled for testing * purposes. */ -static inline int ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) +static inline int ubi_dbg_is_bgt_disabled(void) { - return ubi->dbg->disable_bgt; + return ubi_tst_flags & UBI_TST_DISABLE_BGT; } /** * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip. - * @ubi: UBI device description object * * Returns non-zero if a bit-flip should be emulated, otherwise returns zero. */ -static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) +static inline int ubi_dbg_is_bitflip(void) { - if (ubi->dbg->emulate_bitflips) + if (ubi_tst_flags & UBI_TST_EMULATE_BITFLIPS) return !(random32() % 200); return 0; } /** * ubi_dbg_is_write_failure - if it is time to emulate a write failure. - * @ubi: UBI device description object * * Returns non-zero if a write failure should be emulated, otherwise returns * zero. */ -static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi) +static inline int ubi_dbg_is_write_failure(void) { - if (ubi->dbg->emulate_io_failures) + if (ubi_tst_flags & UBI_TST_EMULATE_WRITE_FAILURES) return !(random32() % 500); return 0; } /** * ubi_dbg_is_erase_failure - if its time to emulate an erase failure. - * @ubi: UBI device description object * * Returns non-zero if an erase failure should be emulated, otherwise returns * zero. */ -static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi) +static inline int ubi_dbg_is_erase_failure(void) { - if (ubi->dbg->emulate_io_failures) + if (ubi_tst_flags & UBI_TST_EMULATE_ERASE_FAILURES) return !(random32() % 400); return 0; } @@ -210,6 +201,11 @@ static inline void ubi_dbg_dump_flash(struct ubi_device *ubi, static inline void ubi_dbg_print_hex_dump(const char *l, const char *ps, int pt, int r, int g, const void *b, size_t len, bool a) { return; } + +static inline int ubi_dbg_is_bgt_disabled(void) { return 0; } +static inline int ubi_dbg_is_bitflip(void) { return 0; } +static inline int ubi_dbg_is_write_failure(void) { return 0; } +static inline int ubi_dbg_is_erase_failure(void) { return 0; } static inline int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) { return 0; } @@ -217,20 +213,5 @@ static inline int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, int len) { return 0; } -static inline int ubi_debugging_init_dev(struct ubi_device *ubi) { return 0; } -static inline void ubi_debugging_exit_dev(struct ubi_device *ubi) { return; } -static inline int ubi_debugfs_init(void) { return 0; } -static inline void ubi_debugfs_exit(void) { return; } -static inline int ubi_debugfs_init_dev(struct ubi_device *ubi) { return 0; } -static inline void ubi_debugfs_exit_dev(struct ubi_device *ubi) { return; } - -static inline int -ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) { return 0; } -static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) { return 0; } -static inline int -ubi_dbg_is_write_failure(const struct ubi_device *ubi) { return 0; } -static inline int -ubi_dbg_is_erase_failure(const struct ubi_device *ubi) { return 0; } - #endif /* !CONFIG_MTD_UBI_DEBUG */ #endif /* !__UBI_DEBUG_H__ */ diff --git a/trunk/drivers/mtd/ubi/io.c b/trunk/drivers/mtd/ubi/io.c index 6ba55c235873..8c1b1c7bc4a7 100644 --- a/trunk/drivers/mtd/ubi/io.c +++ b/trunk/drivers/mtd/ubi/io.c @@ -212,7 +212,7 @@ int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, } else { ubi_assert(len == read); - if (ubi_dbg_is_bitflip(ubi)) { + if (ubi_dbg_is_bitflip()) { dbg_gen("bit-flip (emulated)"); err = UBI_IO_BITFLIPS; } @@ -281,7 +281,7 @@ int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, return err; } - if (ubi_dbg_is_write_failure(ubi)) { + if (ubi_dbg_is_write_failure()) { dbg_err("cannot write %d bytes to PEB %d:%d " "(emulated)", len, pnum, offset); ubi_dbg_dump_stack(); @@ -396,7 +396,7 @@ static int do_sync_erase(struct ubi_device *ubi, int pnum) if (err) return err; - if (ubi_dbg_is_erase_failure(ubi)) { + if (ubi_dbg_is_erase_failure()) { dbg_err("cannot erase PEB %d (emulated)", pnum); return -EIO; } @@ -1146,7 +1146,7 @@ static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum) { int err; - if (!ubi->dbg->chk_io) + if (!(ubi_chk_flags & UBI_CHK_IO)) return 0; err = ubi_io_is_bad(ubi, pnum); @@ -1173,7 +1173,7 @@ static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum, int err; uint32_t magic; - if (!ubi->dbg->chk_io) + if (!(ubi_chk_flags & UBI_CHK_IO)) return 0; magic = be32_to_cpu(ec_hdr->magic); @@ -1211,7 +1211,7 @@ static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum) uint32_t crc, hdr_crc; struct ubi_ec_hdr *ec_hdr; - if (!ubi->dbg->chk_io) + if (!(ubi_chk_flags & UBI_CHK_IO)) return 0; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); @@ -1255,7 +1255,7 @@ static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum, int err; uint32_t magic; - if (!ubi->dbg->chk_io) + if (!(ubi_chk_flags & UBI_CHK_IO)) return 0; magic = be32_to_cpu(vid_hdr->magic); @@ -1296,7 +1296,7 @@ static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) struct ubi_vid_hdr *vid_hdr; void *p; - if (!ubi->dbg->chk_io) + if (!(ubi_chk_flags & UBI_CHK_IO)) return 0; vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); @@ -1348,7 +1348,7 @@ int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum, void *buf1; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; - if (!ubi->dbg->chk_io) + if (!(ubi_chk_flags & UBI_CHK_IO)) return 0; buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); @@ -1412,7 +1412,7 @@ int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) void *buf; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; - if (!ubi->dbg->chk_io) + if (!(ubi_chk_flags & UBI_CHK_IO)) return 0; buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); diff --git a/trunk/drivers/mtd/ubi/scan.c b/trunk/drivers/mtd/ubi/scan.c index a3a198f9b98d..2135a53732ff 100644 --- a/trunk/drivers/mtd/ubi/scan.c +++ b/trunk/drivers/mtd/ubi/scan.c @@ -1347,7 +1347,7 @@ static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si) struct ubi_scan_leb *seb, *last_seb; uint8_t *buf; - if (!ubi->dbg->chk_gen) + if (!(ubi_chk_flags & UBI_CHK_GEN)) return 0; /* diff --git a/trunk/drivers/mtd/ubi/ubi.h b/trunk/drivers/mtd/ubi/ubi.h index dc64c767fd21..c6c22295898e 100644 --- a/trunk/drivers/mtd/ubi/ubi.h +++ b/trunk/drivers/mtd/ubi/ubi.h @@ -44,6 +44,7 @@ #include "ubi-media.h" #include "scan.h" +#include "debug.h" /* Maximum number of supported UBI devices */ #define UBI_MAX_DEVICES 32 @@ -389,8 +390,6 @@ struct ubi_wl_entry; * @peb_buf2: another buffer of PEB size used for different purposes * @buf_mutex: protects @peb_buf1 and @peb_buf2 * @ckvol_mutex: serializes static volume checking when opening - * - * @dbg: debugging information for this UBI device */ struct ubi_device { struct cdev cdev; @@ -473,12 +472,8 @@ struct ubi_device { void *peb_buf2; struct mutex buf_mutex; struct mutex ckvol_mutex; - - struct ubi_debug_info *dbg; }; -#include "debug.h" - extern struct kmem_cache *ubi_wl_entry_slab; extern const struct file_operations ubi_ctrl_cdev_operations; extern const struct file_operations ubi_cdev_operations; @@ -667,7 +662,6 @@ static inline void ubi_ro_mode(struct ubi_device *ubi) if (!ubi->ro_mode) { ubi->ro_mode = 1; ubi_warn("switch to read-only mode"); - ubi_dbg_dump_stack(); } } diff --git a/trunk/drivers/mtd/ubi/vmt.c b/trunk/drivers/mtd/ubi/vmt.c index 97e093d19672..366eb70219a6 100644 --- a/trunk/drivers/mtd/ubi/vmt.c +++ b/trunk/drivers/mtd/ubi/vmt.c @@ -871,7 +871,7 @@ static int paranoid_check_volumes(struct ubi_device *ubi) { int i, err = 0; - if (!ubi->dbg->chk_gen) + if (!(ubi_chk_flags & UBI_CHK_GEN)) return 0; for (i = 0; i < ubi->vtbl_slots; i++) { diff --git a/trunk/drivers/mtd/ubi/vtbl.c b/trunk/drivers/mtd/ubi/vtbl.c index 4b50a3029b84..fd3bf770f518 100644 --- a/trunk/drivers/mtd/ubi/vtbl.c +++ b/trunk/drivers/mtd/ubi/vtbl.c @@ -307,7 +307,8 @@ static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si, { int err, tries = 0; static struct ubi_vid_hdr *vid_hdr; - struct ubi_scan_leb *new_seb; + struct ubi_scan_volume *sv; + struct ubi_scan_leb *new_seb, *old_seb = NULL; ubi_msg("create volume table (copy #%d)", copy + 1); @@ -315,6 +316,15 @@ static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si, if (!vid_hdr) return -ENOMEM; + /* + * Check if there is a logical eraseblock which would have to contain + * this volume table copy was found during scanning. It has to be wiped + * out. + */ + sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID); + if (sv) + old_seb = ubi_scan_find_seb(sv, copy); + retry: new_seb = ubi_scan_get_free_peb(ubi, si); if (IS_ERR(new_seb)) { @@ -341,8 +351,8 @@ static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si, goto write_error; /* - * And add it to the scanning information. Don't delete the old version - * of this LEB as it will be deleted and freed in 'ubi_scan_add_used()'. + * And add it to the scanning information. Don't delete the old + * @old_seb as it will be deleted and freed in 'ubi_scan_add_used()'. */ err = ubi_scan_add_used(ubi, si, new_seb->pnum, new_seb->ec, vid_hdr, 0); @@ -866,7 +876,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si) */ static void paranoid_vtbl_check(const struct ubi_device *ubi) { - if (!ubi->dbg->chk_gen) + if (!(ubi_chk_flags & UBI_CHK_GEN)) return; if (vtbl_check(ubi, ubi->vtbl)) { diff --git a/trunk/drivers/mtd/ubi/wl.c b/trunk/drivers/mtd/ubi/wl.c index 42c684cf3688..ff2c4956eeff 100644 --- a/trunk/drivers/mtd/ubi/wl.c +++ b/trunk/drivers/mtd/ubi/wl.c @@ -1,5 +1,4 @@ /* - * @ubi: UBI device description object * Copyright (c) International Business Machines Corp., 2006 * * This program is free software; you can redistribute it and/or modify @@ -164,14 +163,12 @@ struct ubi_work { #ifdef CONFIG_MTD_UBI_DEBUG static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec); -static int paranoid_check_in_wl_tree(const struct ubi_device *ubi, - struct ubi_wl_entry *e, +static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root); -static int paranoid_check_in_pq(const struct ubi_device *ubi, - struct ubi_wl_entry *e); +static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e); #else #define paranoid_check_ec(ubi, pnum, ec) 0 -#define paranoid_check_in_wl_tree(ubi, e, root) +#define paranoid_check_in_wl_tree(e, root) #define paranoid_check_in_pq(ubi, e) 0 #endif @@ -452,7 +449,7 @@ int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) BUG(); } - paranoid_check_in_wl_tree(ubi, e, &ubi->free); + paranoid_check_in_wl_tree(e, &ubi->free); /* * Move the physical eraseblock to the protection queue where it will @@ -616,7 +613,7 @@ static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) list_add_tail(&wrk->list, &ubi->works); ubi_assert(ubi->works_count >= 0); ubi->works_count += 1; - if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi)) + if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled()) wake_up_process(ubi->bgt_thread); spin_unlock(&ubi->wl_lock); } @@ -715,7 +712,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, e1->ec, e2->ec); goto out_cancel; } - paranoid_check_in_wl_tree(ubi, e1, &ubi->used); + paranoid_check_in_wl_tree(e1, &ubi->used); rb_erase(&e1->u.rb, &ubi->used); dbg_wl("move PEB %d EC %d to PEB %d EC %d", e1->pnum, e1->ec, e2->pnum, e2->ec); @@ -724,12 +721,12 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, scrubbing = 1; e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb); e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); - paranoid_check_in_wl_tree(ubi, e1, &ubi->scrub); + paranoid_check_in_wl_tree(e1, &ubi->scrub); rb_erase(&e1->u.rb, &ubi->scrub); dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); } - paranoid_check_in_wl_tree(ubi, e2, &ubi->free); + paranoid_check_in_wl_tree(e2, &ubi->free); rb_erase(&e2->u.rb, &ubi->free); ubi->move_from = e1; ubi->move_to = e2; @@ -1172,13 +1169,13 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) return 0; } else { if (in_wl_tree(e, &ubi->used)) { - paranoid_check_in_wl_tree(ubi, e, &ubi->used); + paranoid_check_in_wl_tree(e, &ubi->used); rb_erase(&e->u.rb, &ubi->used); } else if (in_wl_tree(e, &ubi->scrub)) { - paranoid_check_in_wl_tree(ubi, e, &ubi->scrub); + paranoid_check_in_wl_tree(e, &ubi->scrub); rb_erase(&e->u.rb, &ubi->scrub); } else if (in_wl_tree(e, &ubi->erroneous)) { - paranoid_check_in_wl_tree(ubi, e, &ubi->erroneous); + paranoid_check_in_wl_tree(e, &ubi->erroneous); rb_erase(&e->u.rb, &ubi->erroneous); ubi->erroneous_peb_count -= 1; ubi_assert(ubi->erroneous_peb_count >= 0); @@ -1245,7 +1242,7 @@ int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) } if (in_wl_tree(e, &ubi->used)) { - paranoid_check_in_wl_tree(ubi, e, &ubi->used); + paranoid_check_in_wl_tree(e, &ubi->used); rb_erase(&e->u.rb, &ubi->used); } else { int err; @@ -1367,7 +1364,7 @@ int ubi_thread(void *u) spin_lock(&ubi->wl_lock); if (list_empty(&ubi->works) || ubi->ro_mode || - !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) { + !ubi->thread_enabled || ubi_dbg_is_bgt_disabled()) { set_current_state(TASK_INTERRUPTIBLE); spin_unlock(&ubi->wl_lock); schedule(); @@ -1582,7 +1579,7 @@ static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec) long long read_ec; struct ubi_ec_hdr *ec_hdr; - if (!ubi->dbg->chk_gen) + if (!(ubi_chk_flags & UBI_CHK_GEN)) return 0; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); @@ -1612,18 +1609,16 @@ static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec) /** * paranoid_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree. - * @ubi: UBI device description object * @e: the wear-leveling entry to check * @root: the root of the tree * * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it * is not. */ -static int paranoid_check_in_wl_tree(const struct ubi_device *ubi, - struct ubi_wl_entry *e, +static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) { - if (!ubi->dbg->chk_gen) + if (!(ubi_chk_flags & UBI_CHK_GEN)) return 0; if (in_wl_tree(e, root)) @@ -1643,13 +1638,12 @@ static int paranoid_check_in_wl_tree(const struct ubi_device *ubi, * * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not. */ -static int paranoid_check_in_pq(const struct ubi_device *ubi, - struct ubi_wl_entry *e) +static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e) { struct ubi_wl_entry *p; int i; - if (!ubi->dbg->chk_gen) + if (!(ubi_chk_flags & UBI_CHK_GEN)) return 0; for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) diff --git a/trunk/drivers/pci/quirks.c b/trunk/drivers/pci/quirks.c index 1196f61a4ab6..02145e9697a9 100644 --- a/trunk/drivers/pci/quirks.c +++ b/trunk/drivers/pci/quirks.c @@ -2758,29 +2758,6 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev) dev_notice(&dev->dev, "proprietary Ricoh MMC controller disabled (via firewire function)\n"); dev_notice(&dev->dev, "MMC cards are now supported by standard SDHCI controller\n"); - - /* - * RICOH 0xe823 SD/MMC card reader fails to recognize - * certain types of SD/MMC cards. Lowering the SD base - * clock frequency from 200Mhz to 50Mhz fixes this issue. - * - * 0x150 - SD2.0 mode enable for changing base clock - * frequency to 50Mhz - * 0xe1 - Base clock frequency - * 0x32 - 50Mhz new clock frequency - * 0xf9 - Key register for 0x150 - * 0xfc - key register for 0xe1 - */ - if (dev->device == PCI_DEVICE_ID_RICOH_R5CE823) { - pci_write_config_byte(dev, 0xf9, 0xfc); - pci_write_config_byte(dev, 0x150, 0x10); - pci_write_config_byte(dev, 0xf9, 0x00); - pci_write_config_byte(dev, 0xfc, 0x01); - pci_write_config_byte(dev, 0xe1, 0x32); - pci_write_config_byte(dev, 0xfc, 0x00); - - dev_notice(&dev->dev, "MMC controller base frequency changed to 50Mhz.\n"); - } } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832); DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832); diff --git a/trunk/drivers/xen/Kconfig b/trunk/drivers/xen/Kconfig index 03bc471c3eed..a59638b37c1a 100644 --- a/trunk/drivers/xen/Kconfig +++ b/trunk/drivers/xen/Kconfig @@ -9,23 +9,6 @@ config XEN_BALLOON the system to expand the domain's memory allocation, or alternatively return unneeded memory to the system. -config XEN_SELFBALLOONING - bool "Dynamically self-balloon kernel memory to target" - depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP - default n - help - Self-ballooning dynamically balloons available kernel memory driven - by the current usage of anonymous memory ("committed AS") and - controlled by various sysfs-settable parameters. Configuring - FRONTSWAP is highly recommended; if it is not configured, self- - ballooning is disabled by default but can be enabled with the - 'selfballooning' kernel boot parameter. If FRONTSWAP is configured, - frontswap-selfshrinking is enabled by default but can be disabled - with the 'noselfshrink' kernel boot parameter; and self-ballooning - is enabled by default but can be disabled with the 'noselfballooning' - kernel boot parameter. Note that systems without a sufficiently - large swap device should not enable self-ballooning. - config XEN_SCRUB_PAGES bool "Scrub pages before returning them to system" depends on XEN_BALLOON @@ -122,33 +105,4 @@ config SWIOTLB_XEN depends on PCI select SWIOTLB -config XEN_TMEM - bool - default y if (CLEANCACHE || FRONTSWAP) - help - Shim to interface in-kernel Transcendent Memory hooks - (e.g. cleancache and frontswap) to Xen tmem hypercalls. - -config XEN_PCIDEV_BACKEND - tristate "Xen PCI-device backend driver" - depends on PCI && X86 && XEN - depends on XEN_BACKEND - default m - help - The PCI device backend driver allows the kernel to export arbitrary - PCI devices to other guests. If you select this to be a module, you - will need to make sure no other driver has bound to the device(s) - you want to make visible to other guests. - - The parameter "passthrough" allows you specify how you want the PCI - devices to appear in the guest. You can choose the default (0) where - PCI topology starts at 00.00.0, or (1) for passthrough if you want - the PCI devices topology appear the same as in the host. - - The "hide" parameter (only applicable if backend driver is compiled - into the kernel) allows you to bind the PCI devices to this module - from the default device drivers. The argument is the list of PCI BDFs: - xen-pciback.hide=(03:00.0)(04:00.0) - - If in doubt, say m. endmenu diff --git a/trunk/drivers/xen/Makefile b/trunk/drivers/xen/Makefile index 72bbb27d7a68..bbc18258ecc5 100644 --- a/trunk/drivers/xen/Makefile +++ b/trunk/drivers/xen/Makefile @@ -1,5 +1,6 @@ obj-y += grant-table.o features.o events.o manage.o balloon.o obj-y += xenbus/ +obj-y += tmem.o nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_features.o := $(nostackp) @@ -8,17 +9,14 @@ obj-$(CONFIG_BLOCK) += biomerge.o obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o -obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o -obj-$(CONFIG_XEN_TMEM) += tmem.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o -obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o diff --git a/trunk/drivers/xen/events.c b/trunk/drivers/xen/events.c index da70f5c32eb9..30df85d8fca8 100644 --- a/trunk/drivers/xen/events.c +++ b/trunk/drivers/xen/events.c @@ -615,6 +615,11 @@ static int find_irq_by_gsi(unsigned gsi) return -1; } +int xen_allocate_pirq_gsi(unsigned gsi) +{ + return gsi; +} + /* * Do not make any assumptions regarding the relationship between the * IRQ number returned here and the Xen pirq argument. @@ -1688,6 +1693,6 @@ void __init xen_init_IRQ(void) } else { irq_ctx_init(smp_processor_id()); if (xen_initial_domain()) - pci_xen_initial_domain(); + xen_setup_pirqs(); } } diff --git a/trunk/drivers/xen/tmem.c b/trunk/drivers/xen/tmem.c index d369965e8f8a..816a44959ef0 100644 --- a/trunk/drivers/xen/tmem.c +++ b/trunk/drivers/xen/tmem.c @@ -1,7 +1,7 @@ /* * Xen implementation for transcendent memory (tmem) * - * Copyright (C) 2009-2011 Oracle Corp. All rights reserved. + * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. * Author: Dan Magenheimer */ @@ -9,14 +9,8 @@ #include #include #include -#include #include -/* temporary ifdef until include/linux/frontswap.h is upstream */ -#ifdef CONFIG_FRONTSWAP -#include -#endif - #include #include #include @@ -128,8 +122,14 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); } -int tmem_enabled __read_mostly; -EXPORT_SYMBOL(tmem_enabled); +static int xen_tmem_destroy_pool(u32 pool_id) +{ + struct tmem_oid oid = { { 0 } }; + + return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); +} + +int tmem_enabled; static int __init enable_tmem(char *s) { @@ -139,14 +139,6 @@ static int __init enable_tmem(char *s) __setup("tmem", enable_tmem); -#ifdef CONFIG_CLEANCACHE -static int xen_tmem_destroy_pool(u32 pool_id) -{ - struct tmem_oid oid = { { 0 } }; - - return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); -} - /* cleancache ops */ static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, @@ -248,156 +240,18 @@ static struct cleancache_ops tmem_cleancache_ops = { .init_shared_fs = tmem_cleancache_init_shared_fs, .init_fs = tmem_cleancache_init_fs }; -#endif - -#ifdef CONFIG_FRONTSWAP -/* frontswap tmem operations */ - -/* a single tmem poolid is used for all frontswap "types" (swapfiles) */ -static int tmem_frontswap_poolid; - -/* - * Swizzling increases objects per swaptype, increasing tmem concurrency - * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS - */ -#define SWIZ_BITS 4 -#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) -#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) -#define iswiz(_ind) (_ind >> SWIZ_BITS) - -static inline struct tmem_oid oswiz(unsigned type, u32 ind) -{ - struct tmem_oid oid = { .oid = { 0 } }; - oid.oid[0] = _oswiz(type, ind); - return oid; -} - -/* returns 0 if the page was successfully put into frontswap, -1 if not */ -static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, - struct page *page) -{ - u64 ind64 = (u64)offset; - u32 ind = (u32)offset; - unsigned long pfn = page_to_pfn(page); - int pool = tmem_frontswap_poolid; - int ret; - - if (pool < 0) - return -1; - if (ind64 != ind) - return -1; - mb(); /* ensure page is quiescent; tmem may address it with an alias */ - ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn); - /* translate Xen tmem return values to linux semantics */ - if (ret == 1) - return 0; - else - return -1; -} - -/* - * returns 0 if the page was successfully gotten from frontswap, -1 if - * was not present (should never happen!) - */ -static int tmem_frontswap_get_page(unsigned type, pgoff_t offset, - struct page *page) -{ - u64 ind64 = (u64)offset; - u32 ind = (u32)offset; - unsigned long pfn = page_to_pfn(page); - int pool = tmem_frontswap_poolid; - int ret; - - if (pool < 0) - return -1; - if (ind64 != ind) - return -1; - ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn); - /* translate Xen tmem return values to linux semantics */ - if (ret == 1) - return 0; - else - return -1; -} - -/* flush a single page from frontswap */ -static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset) -{ - u64 ind64 = (u64)offset; - u32 ind = (u32)offset; - int pool = tmem_frontswap_poolid; - - if (pool < 0) - return; - if (ind64 != ind) - return; - (void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind)); -} - -/* flush all pages from the passed swaptype */ -static void tmem_frontswap_flush_area(unsigned type) -{ - int pool = tmem_frontswap_poolid; - int ind; - - if (pool < 0) - return; - for (ind = SWIZ_MASK; ind >= 0; ind--) - (void)xen_tmem_flush_object(pool, oswiz(type, ind)); -} - -static void tmem_frontswap_init(unsigned ignored) -{ - struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID; - - /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ - if (tmem_frontswap_poolid < 0) - tmem_frontswap_poolid = - xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE); -} - -static int __initdata use_frontswap = 1; - -static int __init no_frontswap(char *s) -{ - use_frontswap = 0; - return 1; -} - -__setup("nofrontswap", no_frontswap); - -static struct frontswap_ops tmem_frontswap_ops = { - .put_page = tmem_frontswap_put_page, - .get_page = tmem_frontswap_get_page, - .flush_page = tmem_frontswap_flush_page, - .flush_area = tmem_frontswap_flush_area, - .init = tmem_frontswap_init -}; -#endif static int __init xen_tmem_init(void) { + struct cleancache_ops old_ops; + if (!xen_domain()) return 0; -#ifdef CONFIG_FRONTSWAP - if (tmem_enabled && use_frontswap) { - char *s = ""; - struct frontswap_ops old_ops = - frontswap_register_ops(&tmem_frontswap_ops); - - tmem_frontswap_poolid = -1; - if (old_ops.init != NULL) - s = " (WARNING: frontswap_ops overridden)"; - printk(KERN_INFO "frontswap enabled, RAM provided by " - "Xen Transcendent Memory\n"); - } -#endif #ifdef CONFIG_CLEANCACHE BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); if (tmem_enabled && use_cleancache) { char *s = ""; - struct cleancache_ops old_ops = - cleancache_register_ops(&tmem_cleancache_ops); + old_ops = cleancache_register_ops(&tmem_cleancache_ops); if (old_ops.init_fs != NULL) s = " (WARNING: cleancache_ops overridden)"; printk(KERN_INFO "cleancache enabled, RAM provided by " diff --git a/trunk/drivers/xen/xen-balloon.c b/trunk/drivers/xen/xen-balloon.c index 5c9dc43c1e94..a4ff225ee868 100644 --- a/trunk/drivers/xen/xen-balloon.c +++ b/trunk/drivers/xen/xen-balloon.c @@ -98,8 +98,6 @@ static int __init balloon_init(void) register_balloon(&balloon_sysdev); - register_xen_selfballooning(&balloon_sysdev); - target_watch.callback = watch_target; xenstore_notifier.notifier_call = balloon_init_watcher; diff --git a/trunk/drivers/xen/xen-pciback/Makefile b/trunk/drivers/xen/xen-pciback/Makefile deleted file mode 100644 index ffe0ad3438bd..000000000000 --- a/trunk/drivers/xen/xen-pciback/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o - -xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o -xen-pciback-y += conf_space.o conf_space_header.o \ - conf_space_capability.o \ - conf_space_quirks.o vpci.o \ - passthrough.o diff --git a/trunk/drivers/xen/xen-pciback/conf_space.c b/trunk/drivers/xen/xen-pciback/conf_space.c deleted file mode 100644 index a8031445d94e..000000000000 --- a/trunk/drivers/xen/xen-pciback/conf_space.c +++ /dev/null @@ -1,438 +0,0 @@ -/* - * PCI Backend - Functions for creating a virtual configuration space for - * exported PCI Devices. - * It's dangerous to allow PCI Driver Domains to change their - * device's resources (memory, i/o ports, interrupts). We need to - * restrict changes to certain PCI Configuration registers: - * BARs, INTERRUPT_PIN, most registers in the header... - * - * Author: Ryan Wilson - */ - -#include -#include -#include "pciback.h" -#include "conf_space.h" -#include "conf_space_quirks.h" - -#define DRV_NAME "xen-pciback" -static int permissive; -module_param(permissive, bool, 0644); - -/* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word, - * xen_pcibk_write_config_word, and xen_pcibk_write_config_byte are created. */ -#define DEFINE_PCI_CONFIG(op, size, type) \ -int xen_pcibk_##op##_config_##size \ -(struct pci_dev *dev, int offset, type value, void *data) \ -{ \ - return pci_##op##_config_##size(dev, offset, value); \ -} - -DEFINE_PCI_CONFIG(read, byte, u8 *) -DEFINE_PCI_CONFIG(read, word, u16 *) -DEFINE_PCI_CONFIG(read, dword, u32 *) - -DEFINE_PCI_CONFIG(write, byte, u8) -DEFINE_PCI_CONFIG(write, word, u16) -DEFINE_PCI_CONFIG(write, dword, u32) - -static int conf_space_read(struct pci_dev *dev, - const struct config_field_entry *entry, - int offset, u32 *value) -{ - int ret = 0; - const struct config_field *field = entry->field; - - *value = 0; - - switch (field->size) { - case 1: - if (field->u.b.read) - ret = field->u.b.read(dev, offset, (u8 *) value, - entry->data); - break; - case 2: - if (field->u.w.read) - ret = field->u.w.read(dev, offset, (u16 *) value, - entry->data); - break; - case 4: - if (field->u.dw.read) - ret = field->u.dw.read(dev, offset, value, entry->data); - break; - } - return ret; -} - -static int conf_space_write(struct pci_dev *dev, - const struct config_field_entry *entry, - int offset, u32 value) -{ - int ret = 0; - const struct config_field *field = entry->field; - - switch (field->size) { - case 1: - if (field->u.b.write) - ret = field->u.b.write(dev, offset, (u8) value, - entry->data); - break; - case 2: - if (field->u.w.write) - ret = field->u.w.write(dev, offset, (u16) value, - entry->data); - break; - case 4: - if (field->u.dw.write) - ret = field->u.dw.write(dev, offset, value, - entry->data); - break; - } - return ret; -} - -static inline u32 get_mask(int size) -{ - if (size == 1) - return 0xff; - else if (size == 2) - return 0xffff; - else - return 0xffffffff; -} - -static inline int valid_request(int offset, int size) -{ - /* Validate request (no un-aligned requests) */ - if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0) - return 1; - return 0; -} - -static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask, - int offset) -{ - if (offset >= 0) { - new_val_mask <<= (offset * 8); - new_val <<= (offset * 8); - } else { - new_val_mask >>= (offset * -8); - new_val >>= (offset * -8); - } - val = (val & ~new_val_mask) | (new_val & new_val_mask); - - return val; -} - -static int pcibios_err_to_errno(int err) -{ - switch (err) { - case PCIBIOS_SUCCESSFUL: - return XEN_PCI_ERR_success; - case PCIBIOS_DEVICE_NOT_FOUND: - return XEN_PCI_ERR_dev_not_found; - case PCIBIOS_BAD_REGISTER_NUMBER: - return XEN_PCI_ERR_invalid_offset; - case PCIBIOS_FUNC_NOT_SUPPORTED: - return XEN_PCI_ERR_not_implemented; - case PCIBIOS_SET_FAILED: - return XEN_PCI_ERR_access_denied; - } - return err; -} - -int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, - u32 *ret_val) -{ - int err = 0; - struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); - const struct config_field_entry *cfg_entry; - const struct config_field *field; - int req_start, req_end, field_start, field_end; - /* if read fails for any reason, return 0 - * (as if device didn't respond) */ - u32 value = 0, tmp_val; - - if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n", - pci_name(dev), size, offset); - - if (!valid_request(offset, size)) { - err = XEN_PCI_ERR_invalid_offset; - goto out; - } - - /* Get the real value first, then modify as appropriate */ - switch (size) { - case 1: - err = pci_read_config_byte(dev, offset, (u8 *) &value); - break; - case 2: - err = pci_read_config_word(dev, offset, (u16 *) &value); - break; - case 4: - err = pci_read_config_dword(dev, offset, &value); - break; - } - - list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { - field = cfg_entry->field; - - req_start = offset; - req_end = offset + size; - field_start = OFFSET(cfg_entry); - field_end = OFFSET(cfg_entry) + field->size; - - if ((req_start >= field_start && req_start < field_end) - || (req_end > field_start && req_end <= field_end)) { - err = conf_space_read(dev, cfg_entry, field_start, - &tmp_val); - if (err) - goto out; - - value = merge_value(value, tmp_val, - get_mask(field->size), - field_start - req_start); - } - } - -out: - if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x = %x\n", - pci_name(dev), size, offset, value); - - *ret_val = value; - return pcibios_err_to_errno(err); -} - -int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) -{ - int err = 0, handled = 0; - struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); - const struct config_field_entry *cfg_entry; - const struct config_field *field; - u32 tmp_val; - int req_start, req_end, field_start, field_end; - - if (unlikely(verbose_request)) - printk(KERN_DEBUG - DRV_NAME ": %s: write request %d bytes at 0x%x = %x\n", - pci_name(dev), size, offset, value); - - if (!valid_request(offset, size)) - return XEN_PCI_ERR_invalid_offset; - - list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { - field = cfg_entry->field; - - req_start = offset; - req_end = offset + size; - field_start = OFFSET(cfg_entry); - field_end = OFFSET(cfg_entry) + field->size; - - if ((req_start >= field_start && req_start < field_end) - || (req_end > field_start && req_end <= field_end)) { - tmp_val = 0; - - err = xen_pcibk_config_read(dev, field_start, - field->size, &tmp_val); - if (err) - break; - - tmp_val = merge_value(tmp_val, value, get_mask(size), - req_start - field_start); - - err = conf_space_write(dev, cfg_entry, field_start, - tmp_val); - - /* handled is set true here, but not every byte - * may have been written! Properly detecting if - * every byte is handled is unnecessary as the - * flag is used to detect devices that need - * special helpers to work correctly. - */ - handled = 1; - } - } - - if (!handled && !err) { - /* By default, anything not specificially handled above is - * read-only. The permissive flag changes this behavior so - * that anything not specifically handled above is writable. - * This means that some fields may still be read-only because - * they have entries in the config_field list that intercept - * the write and do nothing. */ - if (dev_data->permissive || permissive) { - switch (size) { - case 1: - err = pci_write_config_byte(dev, offset, - (u8) value); - break; - case 2: - err = pci_write_config_word(dev, offset, - (u16) value); - break; - case 4: - err = pci_write_config_dword(dev, offset, - (u32) value); - break; - } - } else if (!dev_data->warned_on_write) { - dev_data->warned_on_write = 1; - dev_warn(&dev->dev, "Driver tried to write to a " - "read-only configuration space field at offset" - " 0x%x, size %d. This may be harmless, but if " - "you have problems with your device:\n" - "1) see permissive attribute in sysfs\n" - "2) report problems to the xen-devel " - "mailing list along with details of your " - "device obtained from lspci.\n", offset, size); - } - } - - return pcibios_err_to_errno(err); -} - -void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev) -{ - struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); - struct config_field_entry *cfg_entry, *t; - const struct config_field *field; - - dev_dbg(&dev->dev, "free-ing dynamically allocated virtual " - "configuration space fields\n"); - if (!dev_data) - return; - - list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) { - field = cfg_entry->field; - - if (field->clean) { - field->clean((struct config_field *)field); - - kfree(cfg_entry->data); - - list_del(&cfg_entry->list); - kfree(cfg_entry); - } - - } -} - -void xen_pcibk_config_reset_dev(struct pci_dev *dev) -{ - struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); - const struct config_field_entry *cfg_entry; - const struct config_field *field; - - dev_dbg(&dev->dev, "resetting virtual configuration space\n"); - if (!dev_data) - return; - - list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { - field = cfg_entry->field; - - if (field->reset) - field->reset(dev, OFFSET(cfg_entry), cfg_entry->data); - } -} - -void xen_pcibk_config_free_dev(struct pci_dev *dev) -{ - struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); - struct config_field_entry *cfg_entry, *t; - const struct config_field *field; - - dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n"); - if (!dev_data) - return; - - list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) { - list_del(&cfg_entry->list); - - field = cfg_entry->field; - - if (field->release) - field->release(dev, OFFSET(cfg_entry), cfg_entry->data); - - kfree(cfg_entry); - } -} - -int xen_pcibk_config_add_field_offset(struct pci_dev *dev, - const struct config_field *field, - unsigned int base_offset) -{ - int err = 0; - struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); - struct config_field_entry *cfg_entry; - void *tmp; - - cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL); - if (!cfg_entry) { - err = -ENOMEM; - goto out; - } - - cfg_entry->data = NULL; - cfg_entry->field = field; - cfg_entry->base_offset = base_offset; - - /* silently ignore duplicate fields */ - err = xen_pcibk_field_is_dup(dev, OFFSET(cfg_entry)); - if (err) - goto out; - - if (field->init) { - tmp = field->init(dev, OFFSET(cfg_entry)); - - if (IS_ERR(tmp)) { - err = PTR_ERR(tmp); - goto out; - } - - cfg_entry->data = tmp; - } - - dev_dbg(&dev->dev, "added config field at offset 0x%02x\n", - OFFSET(cfg_entry)); - list_add_tail(&cfg_entry->list, &dev_data->config_fields); - -out: - if (err) - kfree(cfg_entry); - - return err; -} - -/* This sets up the device's virtual configuration space to keep track of - * certain registers (like the base address registers (BARs) so that we can - * keep the client from manipulating them directly. - */ -int xen_pcibk_config_init_dev(struct pci_dev *dev) -{ - int err = 0; - struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); - - dev_dbg(&dev->dev, "initializing virtual configuration space\n"); - - INIT_LIST_HEAD(&dev_data->config_fields); - - err = xen_pcibk_config_header_add_fields(dev); - if (err) - goto out; - - err = xen_pcibk_config_capability_add_fields(dev); - if (err) - goto out; - - err = xen_pcibk_config_quirks_init(dev); - -out: - return err; -} - -int xen_pcibk_config_init(void) -{ - return xen_pcibk_config_capability_init(); -} diff --git a/trunk/drivers/xen/xen-pciback/conf_space.h b/trunk/drivers/xen/xen-pciback/conf_space.h deleted file mode 100644 index e56c934ad137..000000000000 --- a/trunk/drivers/xen/xen-pciback/conf_space.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * PCI Backend - Common data structures for overriding the configuration space - * - * Author: Ryan Wilson - */ - -#ifndef __XEN_PCIBACK_CONF_SPACE_H__ -#define __XEN_PCIBACK_CONF_SPACE_H__ - -#include -#include - -/* conf_field_init can return an errno in a ptr with ERR_PTR() */ -typedef void *(*conf_field_init) (struct pci_dev *dev, int offset); -typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data); -typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data); - -typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value, - void *data); -typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value, - void *data); -typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value, - void *data); -typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value, - void *data); -typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value, - void *data); -typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value, - void *data); - -/* These are the fields within the configuration space which we - * are interested in intercepting reads/writes to and changing their - * values. - */ -struct config_field { - unsigned int offset; - unsigned int size; - unsigned int mask; - conf_field_init init; - conf_field_reset reset; - conf_field_free release; - void (*clean) (struct config_field *field); - union { - struct { - conf_dword_write write; - conf_dword_read read; - } dw; - struct { - conf_word_write write; - conf_word_read read; - } w; - struct { - conf_byte_write write; - conf_byte_read read; - } b; - } u; - struct list_head list; -}; - -struct config_field_entry { - struct list_head list; - const struct config_field *field; - unsigned int base_offset; - void *data; -}; - -#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset) - -/* Add fields to a device - the add_fields macro expects to get a pointer to - * the first entry in an array (of which the ending is marked by size==0) - */ -int xen_pcibk_config_add_field_offset(struct pci_dev *dev, - const struct config_field *field, - unsigned int offset); - -static inline int xen_pcibk_config_add_field(struct pci_dev *dev, - const struct config_field *field) -{ - return xen_pcibk_config_add_field_offset(dev, field, 0); -} - -static inline int xen_pcibk_config_add_fields(struct pci_dev *dev, - const struct config_field *field) -{ - int i, err = 0; - for (i = 0; field[i].size != 0; i++) { - err = xen_pcibk_config_add_field(dev, &field[i]); - if (err) - break; - } - return err; -} - -static inline int xen_pcibk_config_add_fields_offset(struct pci_dev *dev, - const struct config_field *field, - unsigned int offset) -{ - int i, err = 0; - for (i = 0; field[i].size != 0; i++) { - err = xen_pcibk_config_add_field_offset(dev, &field[i], offset); - if (err) - break; - } - return err; -} - -/* Read/Write the real configuration space */ -int xen_pcibk_read_config_byte(struct pci_dev *dev, int offset, u8 *value, - void *data); -int xen_pcibk_read_config_word(struct pci_dev *dev, int offset, u16 *value, - void *data); -int xen_pcibk_read_config_dword(struct pci_dev *dev, int offset, u32 *value, - void *data); -int xen_pcibk_write_config_byte(struct pci_dev *dev, int offset, u8 value, - void *data); -int xen_pcibk_write_config_word(struct pci_dev *dev, int offset, u16 value, - void *data); -int xen_pcibk_write_config_dword(struct pci_dev *dev, int offset, u32 value, - void *data); - -int xen_pcibk_config_capability_init(void); - -int xen_pcibk_config_header_add_fields(struct pci_dev *dev); -int xen_pcibk_config_capability_add_fields(struct pci_dev *dev); - -#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */ diff --git a/trunk/drivers/xen/xen-pciback/conf_space_capability.c b/trunk/drivers/xen/xen-pciback/conf_space_capability.c deleted file mode 100644 index 7f83e9083e9d..000000000000 --- a/trunk/drivers/xen/xen-pciback/conf_space_capability.c +++ /dev/null @@ -1,207 +0,0 @@ -/* - * PCI Backend - Handles the virtual fields found on the capability lists - * in the configuration space. - * - * Author: Ryan Wilson - */ - -#include -#include -#include "pciback.h" -#include "conf_space.h" - -static LIST_HEAD(capabilities); -struct xen_pcibk_config_capability { - struct list_head cap_list; - - int capability; - - /* If the device has the capability found above, add these fields */ - const struct config_field *fields; -}; - -static const struct config_field caplist_header[] = { - { - .offset = PCI_CAP_LIST_ID, - .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */ - .u.w.read = xen_pcibk_read_config_word, - .u.w.write = NULL, - }, - {} -}; - -static inline void register_capability(struct xen_pcibk_config_capability *cap) -{ - list_add_tail(&cap->cap_list, &capabilities); -} - -int xen_pcibk_config_capability_add_fields(struct pci_dev *dev) -{ - int err = 0; - struct xen_pcibk_config_capability *cap; - int cap_offset; - - list_for_each_entry(cap, &capabilities, cap_list) { - cap_offset = pci_find_capability(dev, cap->capability); - if (cap_offset) { - dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n", - cap->capability, cap_offset); - - err = xen_pcibk_config_add_fields_offset(dev, - caplist_header, - cap_offset); - if (err) - goto out; - err = xen_pcibk_config_add_fields_offset(dev, - cap->fields, - cap_offset); - if (err) - goto out; - } - } - -out: - return err; -} - -static int vpd_address_write(struct pci_dev *dev, int offset, u16 value, - void *data) -{ - /* Disallow writes to the vital product data */ - if (value & PCI_VPD_ADDR_F) - return PCIBIOS_SET_FAILED; - else - return pci_write_config_word(dev, offset, value); -} - -static const struct config_field caplist_vpd[] = { - { - .offset = PCI_VPD_ADDR, - .size = 2, - .u.w.read = xen_pcibk_read_config_word, - .u.w.write = vpd_address_write, - }, - { - .offset = PCI_VPD_DATA, - .size = 4, - .u.dw.read = xen_pcibk_read_config_dword, - .u.dw.write = NULL, - }, - {} -}; - -static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value, - void *data) -{ - int err; - u16 real_value; - - err = pci_read_config_word(dev, offset, &real_value); - if (err) - goto out; - - *value = real_value & ~PCI_PM_CAP_PME_MASK; - -out: - return err; -} - -/* PM_OK_BITS specifies the bits that the driver domain is allowed to change. - * Can't allow driver domain to enable PMEs - they're shared */ -#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK) - -static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value, - void *data) -{ - int err; - u16 old_value; - pci_power_t new_state, old_state; - - err = pci_read_config_word(dev, offset, &old_value); - if (err) - goto out; - - old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK); - new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); - - new_value &= PM_OK_BITS; - if ((old_value & PM_OK_BITS) != new_value) { - new_value = (old_value & ~PM_OK_BITS) | new_value; - err = pci_write_config_word(dev, offset, new_value); - if (err) - goto out; - } - - /* Let pci core handle the power management change */ - dev_dbg(&dev->dev, "set power state to %x\n", new_state); - err = pci_set_power_state(dev, new_state); - if (err) { - err = PCIBIOS_SET_FAILED; - goto out; - } - - out: - return err; -} - -/* Ensure PMEs are disabled */ -static void *pm_ctrl_init(struct pci_dev *dev, int offset) -{ - int err; - u16 value; - - err = pci_read_config_word(dev, offset, &value); - if (err) - goto out; - - if (value & PCI_PM_CTRL_PME_ENABLE) { - value &= ~PCI_PM_CTRL_PME_ENABLE; - err = pci_write_config_word(dev, offset, value); - } - -out: - return ERR_PTR(err); -} - -static const struct config_field caplist_pm[] = { - { - .offset = PCI_PM_PMC, - .size = 2, - .u.w.read = pm_caps_read, - }, - { - .offset = PCI_PM_CTRL, - .size = 2, - .init = pm_ctrl_init, - .u.w.read = xen_pcibk_read_config_word, - .u.w.write = pm_ctrl_write, - }, - { - .offset = PCI_PM_PPB_EXTENSIONS, - .size = 1, - .u.b.read = xen_pcibk_read_config_byte, - }, - { - .offset = PCI_PM_DATA_REGISTER, - .size = 1, - .u.b.read = xen_pcibk_read_config_byte, - }, - {} -}; - -static struct xen_pcibk_config_capability xen_pcibk_config_capability_pm = { - .capability = PCI_CAP_ID_PM, - .fields = caplist_pm, -}; -static struct xen_pcibk_config_capability xen_pcibk_config_capability_vpd = { - .capability = PCI_CAP_ID_VPD, - .fields = caplist_vpd, -}; - -int xen_pcibk_config_capability_init(void) -{ - register_capability(&xen_pcibk_config_capability_vpd); - register_capability(&xen_pcibk_config_capability_pm); - - return 0; -} diff --git a/trunk/drivers/xen/xen-pciback/conf_space_header.c b/trunk/drivers/xen/xen-pciback/conf_space_header.c deleted file mode 100644 index da3cbdfcb5dc..000000000000 --- a/trunk/drivers/xen/xen-pciback/conf_space_header.c +++ /dev/null @@ -1,386 +0,0 @@ -/* - * PCI Backend - Handles the virtual fields in the configuration space headers. - * - * Author: Ryan Wilson - */ - -#include -#include -#include "pciback.h" -#include "conf_space.h" - -struct pci_bar_info { - u32 val; - u32 len_val; - int which; -}; - -#define DRV_NAME "xen-pciback" -#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) -#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) - -static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) -{ - int i; - int ret; - - ret = xen_pcibk_read_config_word(dev, offset, value, data); - if (!atomic_read(&dev->enable_cnt)) - return ret; - - for (i = 0; i < PCI_ROM_RESOURCE; i++) { - if (dev->resource[i].flags & IORESOURCE_IO) - *value |= PCI_COMMAND_IO; - if (dev->resource[i].flags & IORESOURCE_MEM) - *value |= PCI_COMMAND_MEMORY; - } - - return ret; -} - -static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) -{ - struct xen_pcibk_dev_data *dev_data; - int err; - - dev_data = pci_get_drvdata(dev); - if (!pci_is_enabled(dev) && is_enable_cmd(value)) { - if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: enable\n", - pci_name(dev)); - err = pci_enable_device(dev); - if (err) - return err; - if (dev_data) - dev_data->enable_intx = 1; - } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) { - if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: disable\n", - pci_name(dev)); - pci_disable_device(dev); - if (dev_data) - dev_data->enable_intx = 0; - } - - if (!dev->is_busmaster && is_master_cmd(value)) { - if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n", - pci_name(dev)); - pci_set_master(dev); - } - - if (value & PCI_COMMAND_INVALIDATE) { - if (unlikely(verbose_request)) - printk(KERN_DEBUG - DRV_NAME ": %s: enable memory-write-invalidate\n", - pci_name(dev)); - err = pci_set_mwi(dev); - if (err) { - printk(KERN_WARNING - DRV_NAME ": %s: cannot enable " - "memory-write-invalidate (%d)\n", - pci_name(dev), err); - value &= ~PCI_COMMAND_INVALIDATE; - } - } - - return pci_write_config_word(dev, offset, value); -} - -static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data) -{ - struct pci_bar_info *bar = data; - - if (unlikely(!bar)) { - printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n", - pci_name(dev)); - return XEN_PCI_ERR_op_failed; - } - - /* A write to obtain the length must happen as a 32-bit write. - * This does not (yet) support writing individual bytes - */ - if (value == ~PCI_ROM_ADDRESS_ENABLE) - bar->which = 1; - else { - u32 tmpval; - pci_read_config_dword(dev, offset, &tmpval); - if (tmpval != bar->val && value == bar->val) { - /* Allow restoration of bar value. */ - pci_write_config_dword(dev, offset, bar->val); - } - bar->which = 0; - } - - /* Do we need to support enabling/disabling the rom address here? */ - - return 0; -} - -/* For the BARs, only allow writes which write ~0 or - * the correct resource information - * (Needed for when the driver probes the resource usage) - */ -static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data) -{ - struct pci_bar_info *bar = data; - - if (unlikely(!bar)) { - printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n", - pci_name(dev)); - return XEN_PCI_ERR_op_failed; - } - - /* A write to obtain the length must happen as a 32-bit write. - * This does not (yet) support writing individual bytes - */ - if (value == ~0) - bar->which = 1; - else { - u32 tmpval; - pci_read_config_dword(dev, offset, &tmpval); - if (tmpval != bar->val && value == bar->val) { - /* Allow restoration of bar value. */ - pci_write_config_dword(dev, offset, bar->val); - } - bar->which = 0; - } - - return 0; -} - -static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data) -{ - struct pci_bar_info *bar = data; - - if (unlikely(!bar)) { - printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n", - pci_name(dev)); - return XEN_PCI_ERR_op_failed; - } - - *value = bar->which ? bar->len_val : bar->val; - - return 0; -} - -static inline void read_dev_bar(struct pci_dev *dev, - struct pci_bar_info *bar_info, int offset, - u32 len_mask) -{ - int pos; - struct resource *res = dev->resource; - - if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1) - pos = PCI_ROM_RESOURCE; - else { - pos = (offset - PCI_BASE_ADDRESS_0) / 4; - if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE | - PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == - (PCI_BASE_ADDRESS_SPACE_MEMORY | - PCI_BASE_ADDRESS_MEM_TYPE_64))) { - bar_info->val = res[pos - 1].start >> 32; - bar_info->len_val = res[pos - 1].end >> 32; - return; - } - } - - bar_info->val = res[pos].start | - (res[pos].flags & PCI_REGION_FLAG_MASK); - bar_info->len_val = res[pos].end - res[pos].start + 1; -} - -static void *bar_init(struct pci_dev *dev, int offset) -{ - struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); - - if (!bar) - return ERR_PTR(-ENOMEM); - - read_dev_bar(dev, bar, offset, ~0); - bar->which = 0; - - return bar; -} - -static void *rom_init(struct pci_dev *dev, int offset) -{ - struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); - - if (!bar) - return ERR_PTR(-ENOMEM); - - read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE); - bar->which = 0; - - return bar; -} - -static void bar_reset(struct pci_dev *dev, int offset, void *data) -{ - struct pci_bar_info *bar = data; - - bar->which = 0; -} - -static void bar_release(struct pci_dev *dev, int offset, void *data) -{ - kfree(data); -} - -static int xen_pcibk_read_vendor(struct pci_dev *dev, int offset, - u16 *value, void *data) -{ - *value = dev->vendor; - - return 0; -} - -static int xen_pcibk_read_device(struct pci_dev *dev, int offset, - u16 *value, void *data) -{ - *value = dev->device; - - return 0; -} - -static int interrupt_read(struct pci_dev *dev, int offset, u8 * value, - void *data) -{ - *value = (u8) dev->irq; - - return 0; -} - -static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data) -{ - u8 cur_value; - int err; - - err = pci_read_config_byte(dev, offset, &cur_value); - if (err) - goto out; - - if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START) - || value == PCI_BIST_START) - err = pci_write_config_byte(dev, offset, value); - -out: - return err; -} - -static const struct config_field header_common[] = { - { - .offset = PCI_VENDOR_ID, - .size = 2, - .u.w.read = xen_pcibk_read_vendor, - }, - { - .offset = PCI_DEVICE_ID, - .size = 2, - .u.w.read = xen_pcibk_read_device, - }, - { - .offset = PCI_COMMAND, - .size = 2, - .u.w.read = command_read, - .u.w.write = command_write, - }, - { - .offset = PCI_INTERRUPT_LINE, - .size = 1, - .u.b.read = interrupt_read, - }, - { - .offset = PCI_INTERRUPT_PIN, - .size = 1, - .u.b.read = xen_pcibk_read_config_byte, - }, - { - /* Any side effects of letting driver domain control cache line? */ - .offset = PCI_CACHE_LINE_SIZE, - .size = 1, - .u.b.read = xen_pcibk_read_config_byte, - .u.b.write = xen_pcibk_write_config_byte, - }, - { - .offset = PCI_LATENCY_TIMER, - .size = 1, - .u.b.read = xen_pcibk_read_config_byte, - }, - { - .offset = PCI_BIST, - .size = 1, - .u.b.read = xen_pcibk_read_config_byte, - .u.b.write = bist_write, - }, - {} -}; - -#define CFG_FIELD_BAR(reg_offset) \ - { \ - .offset = reg_offset, \ - .size = 4, \ - .init = bar_init, \ - .reset = bar_reset, \ - .release = bar_release, \ - .u.dw.read = bar_read, \ - .u.dw.write = bar_write, \ - } - -#define CFG_FIELD_ROM(reg_offset) \ - { \ - .offset = reg_offset, \ - .size = 4, \ - .init = rom_init, \ - .reset = bar_reset, \ - .release = bar_release, \ - .u.dw.read = bar_read, \ - .u.dw.write = rom_write, \ - } - -static const struct config_field header_0[] = { - CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), - CFG_FIELD_BAR(PCI_BASE_ADDRESS_1), - CFG_FIELD_BAR(PCI_BASE_ADDRESS_2), - CFG_FIELD_BAR(PCI_BASE_ADDRESS_3), - CFG_FIELD_BAR(PCI_BASE_ADDRESS_4), - CFG_FIELD_BAR(PCI_BASE_ADDRESS_5), - CFG_FIELD_ROM(PCI_ROM_ADDRESS), - {} -}; - -static const struct config_field header_1[] = { - CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), - CFG_FIELD_BAR(PCI_BASE_ADDRESS_1), - CFG_FIELD_ROM(PCI_ROM_ADDRESS1), - {} -}; - -int xen_pcibk_config_header_add_fields(struct pci_dev *dev) -{ - int err; - - err = xen_pcibk_config_add_fields(dev, header_common); - if (err) - goto out; - - switch (dev->hdr_type) { - case PCI_HEADER_TYPE_NORMAL: - err = xen_pcibk_config_add_fields(dev, header_0); - break; - - case PCI_HEADER_TYPE_BRIDGE: - err = xen_pcibk_config_add_fields(dev, header_1); - break; - - default: - err = -EINVAL; - printk(KERN_ERR DRV_NAME ": %s: Unsupported header type %d!\n", - pci_name(dev), dev->hdr_type); - break; - } - -out: - return err; -} diff --git a/trunk/drivers/xen/xen-pciback/conf_space_quirks.c b/trunk/drivers/xen/xen-pciback/conf_space_quirks.c deleted file mode 100644 index 921a889e65eb..000000000000 --- a/trunk/drivers/xen/xen-pciback/conf_space_quirks.c +++ /dev/null @@ -1,140 +0,0 @@ -/* - * PCI Backend - Handle special overlays for broken devices. - * - * Author: Ryan Wilson - * Author: Chris Bookholt - */ - -#include -#include -#include "pciback.h" -#include "conf_space.h" -#include "conf_space_quirks.h" - -LIST_HEAD(xen_pcibk_quirks); -#define DRV_NAME "xen-pciback" -static inline const struct pci_device_id * -match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) -{ - if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) && - (id->device == PCI_ANY_ID || id->device == dev->device) && - (id->subvendor == PCI_ANY_ID || - id->subvendor == dev->subsystem_vendor) && - (id->subdevice == PCI_ANY_ID || - id->subdevice == dev->subsystem_device) && - !((id->class ^ dev->class) & id->class_mask)) - return id; - return NULL; -} - -static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev) -{ - struct xen_pcibk_config_quirk *tmp_quirk; - - list_for_each_entry(tmp_quirk, &xen_pcibk_quirks, quirks_list) - if (match_one_device(&tmp_quirk->devid, dev) != NULL) - goto out; - tmp_quirk = NULL; - printk(KERN_DEBUG DRV_NAME - ":quirk didn't match any device xen_pciback knows about\n"); -out: - return tmp_quirk; -} - -static inline void register_quirk(struct xen_pcibk_config_quirk *quirk) -{ - list_add_tail(&quirk->quirks_list, &xen_pcibk_quirks); -} - -int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg) -{ - int ret = 0; - struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); - struct config_field_entry *cfg_entry; - - list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { - if (OFFSET(cfg_entry) == reg) { - ret = 1; - break; - } - } - return ret; -} - -int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field - *field) -{ - int err = 0; - - switch (field->size) { - case 1: - field->u.b.read = xen_pcibk_read_config_byte; - field->u.b.write = xen_pcibk_write_config_byte; - break; - case 2: - field->u.w.read = xen_pcibk_read_config_word; - field->u.w.write = xen_pcibk_write_config_word; - break; - case 4: - field->u.dw.read = xen_pcibk_read_config_dword; - field->u.dw.write = xen_pcibk_write_config_dword; - break; - default: - err = -EINVAL; - goto out; - } - - xen_pcibk_config_add_field(dev, field); - -out: - return err; -} - -int xen_pcibk_config_quirks_init(struct pci_dev *dev) -{ - struct xen_pcibk_config_quirk *quirk; - int ret = 0; - - quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC); - if (!quirk) { - ret = -ENOMEM; - goto out; - } - - quirk->devid.vendor = dev->vendor; - quirk->devid.device = dev->device; - quirk->devid.subvendor = dev->subsystem_vendor; - quirk->devid.subdevice = dev->subsystem_device; - quirk->devid.class = 0; - quirk->devid.class_mask = 0; - quirk->devid.driver_data = 0UL; - - quirk->pdev = dev; - - register_quirk(quirk); -out: - return ret; -} - -void xen_pcibk_config_field_free(struct config_field *field) -{ - kfree(field); -} - -int xen_pcibk_config_quirk_release(struct pci_dev *dev) -{ - struct xen_pcibk_config_quirk *quirk; - int ret = 0; - - quirk = xen_pcibk_find_quirk(dev); - if (!quirk) { - ret = -ENXIO; - goto out; - } - - list_del(&quirk->quirks_list); - kfree(quirk); - -out: - return ret; -} diff --git a/trunk/drivers/xen/xen-pciback/conf_space_quirks.h b/trunk/drivers/xen/xen-pciback/conf_space_quirks.h deleted file mode 100644 index cfcc517e4570..000000000000 --- a/trunk/drivers/xen/xen-pciback/conf_space_quirks.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * PCI Backend - Data structures for special overlays for broken devices. - * - * Ryan Wilson - * Chris Bookholt - */ - -#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__ -#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__ - -#include -#include - -struct xen_pcibk_config_quirk { - struct list_head quirks_list; - struct pci_device_id devid; - struct pci_dev *pdev; -}; - -int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field - *field); - -int xen_pcibk_config_quirks_remove_field(struct pci_dev *dev, int reg); - -int xen_pcibk_config_quirks_init(struct pci_dev *dev); - -void xen_pcibk_config_field_free(struct config_field *field); - -int xen_pcibk_config_quirk_release(struct pci_dev *dev); - -int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg); - -#endif diff --git a/trunk/drivers/xen/xen-pciback/passthrough.c b/trunk/drivers/xen/xen-pciback/passthrough.c deleted file mode 100644 index 1d32a9a42c01..000000000000 --- a/trunk/drivers/xen/xen-pciback/passthrough.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - * PCI Backend - Provides restricted access to the real PCI bus topology - * to the frontend - * - * Author: Ryan Wilson - */ - -#include -#include -#include -#include "pciback.h" - -struct passthrough_dev_data { - /* Access to dev_list must be protected by lock */ - struct list_head dev_list; - spinlock_t lock; -}; - -static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, - unsigned int domain, - unsigned int bus, - unsigned int devfn) -{ - struct passthrough_dev_data *dev_data = pdev->pci_dev_data; - struct pci_dev_entry *dev_entry; - struct pci_dev *dev = NULL; - unsigned long flags; - - spin_lock_irqsave(&dev_data->lock, flags); - - list_for_each_entry(dev_entry, &dev_data->dev_list, list) { - if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus) - && bus == (unsigned int)dev_entry->dev->bus->number - && devfn == dev_entry->dev->devfn) { - dev = dev_entry->dev; - break; - } - } - - spin_unlock_irqrestore(&dev_data->lock, flags); - - return dev; -} - -static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, - struct pci_dev *dev, - int devid, publish_pci_dev_cb publish_cb) -{ - struct passthrough_dev_data *dev_data = pdev->pci_dev_data; - struct pci_dev_entry *dev_entry; - unsigned long flags; - unsigned int domain, bus, devfn; - int err; - - dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL); - if (!dev_entry) - return -ENOMEM; - dev_entry->dev = dev; - - spin_lock_irqsave(&dev_data->lock, flags); - list_add_tail(&dev_entry->list, &dev_data->dev_list); - spin_unlock_irqrestore(&dev_data->lock, flags); - - /* Publish this device. */ - domain = (unsigned int)pci_domain_nr(dev->bus); - bus = (unsigned int)dev->bus->number; - devfn = dev->devfn; - err = publish_cb(pdev, domain, bus, devfn, devid); - - return err; -} - -static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, - struct pci_dev *dev) -{ - struct passthrough_dev_data *dev_data = pdev->pci_dev_data; - struct pci_dev_entry *dev_entry, *t; - struct pci_dev *found_dev = NULL; - unsigned long flags; - - spin_lock_irqsave(&dev_data->lock, flags); - - list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { - if (dev_entry->dev == dev) { - list_del(&dev_entry->list); - found_dev = dev_entry->dev; - kfree(dev_entry); - } - } - - spin_unlock_irqrestore(&dev_data->lock, flags); - - if (found_dev) - pcistub_put_pci_dev(found_dev); -} - -static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev) -{ - struct passthrough_dev_data *dev_data; - - dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL); - if (!dev_data) - return -ENOMEM; - - spin_lock_init(&dev_data->lock); - - INIT_LIST_HEAD(&dev_data->dev_list); - - pdev->pci_dev_data = dev_data; - - return 0; -} - -static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, - publish_pci_root_cb publish_root_cb) -{ - int err = 0; - struct passthrough_dev_data *dev_data = pdev->pci_dev_data; - struct pci_dev_entry *dev_entry, *e, *tmp; - struct pci_dev *dev; - int found; - unsigned int domain, bus; - - spin_lock(&dev_data->lock); - - list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) { - /* Only publish this device as a root if none of its - * parent bridges are exported - */ - found = 0; - dev = dev_entry->dev->bus->self; - for (; !found && dev != NULL; dev = dev->bus->self) { - list_for_each_entry(e, &dev_data->dev_list, list) { - if (dev == e->dev) { - found = 1; - break; - } - } - } - - domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus); - bus = (unsigned int)dev_entry->dev->bus->number; - - if (!found) { - spin_unlock(&dev_data->lock); - err = publish_root_cb(pdev, domain, bus); - if (err) - break; - spin_lock(&dev_data->lock); - } - } - - if (!err) - spin_unlock(&dev_data->lock); - - return err; -} - -static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev) -{ - struct passthrough_dev_data *dev_data = pdev->pci_dev_data; - struct pci_dev_entry *dev_entry, *t; - - list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { - list_del(&dev_entry->list); - pcistub_put_pci_dev(dev_entry->dev); - kfree(dev_entry); - } - - kfree(dev_data); - pdev->pci_dev_data = NULL; -} - -static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, - struct xen_pcibk_device *pdev, - unsigned int *domain, unsigned int *bus, - unsigned int *devfn) -{ - *domain = pci_domain_nr(pcidev->bus); - *bus = pcidev->bus->number; - *devfn = pcidev->devfn; - return 1; -} - -struct xen_pcibk_backend xen_pcibk_passthrough_backend = { - .name = "passthrough", - .init = __xen_pcibk_init_devices, - .free = __xen_pcibk_release_devices, - .find = __xen_pcibk_get_pcifront_dev, - .publish = __xen_pcibk_publish_pci_roots, - .release = __xen_pcibk_release_pci_dev, - .add = __xen_pcibk_add_pci_dev, - .get = __xen_pcibk_get_pci_dev, -}; diff --git a/trunk/drivers/xen/xen-pciback/pci_stub.c b/trunk/drivers/xen/xen-pciback/pci_stub.c deleted file mode 100644 index aec214ac0a14..000000000000 --- a/trunk/drivers/xen/xen-pciback/pci_stub.c +++ /dev/null @@ -1,1376 +0,0 @@ -/* - * PCI Stub Driver - Grabs devices in backend to be exported later - * - * Ryan Wilson - * Chris Bookholt - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "pciback.h" -#include "conf_space.h" -#include "conf_space_quirks.h" - -#define DRV_NAME "xen-pciback" - -static char *pci_devs_to_hide; -wait_queue_head_t xen_pcibk_aer_wait_queue; -/*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops, -* We want to avoid in middle of AER ops, xen_pcibk devices is being removed -*/ -static DECLARE_RWSEM(pcistub_sem); -module_param_named(hide, pci_devs_to_hide, charp, 0444); - -struct pcistub_device_id { - struct list_head slot_list; - int domain; - unsigned char bus; - unsigned int devfn; -}; -static LIST_HEAD(pcistub_device_ids); -static DEFINE_SPINLOCK(device_ids_lock); - -struct pcistub_device { - struct kref kref; - struct list_head dev_list; - spinlock_t lock; - - struct pci_dev *dev; - struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */ -}; - -/* Access to pcistub_devices & seized_devices lists and the initialize_devices - * flag must be locked with pcistub_devices_lock - */ -static DEFINE_SPINLOCK(pcistub_devices_lock); -static LIST_HEAD(pcistub_devices); - -/* wait for device_initcall before initializing our devices - * (see pcistub_init_devices_late) - */ -static int initialize_devices; -static LIST_HEAD(seized_devices); - -static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev) -{ - struct pcistub_device *psdev; - - dev_dbg(&dev->dev, "pcistub_device_alloc\n"); - - psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC); - if (!psdev) - return NULL; - - psdev->dev = pci_dev_get(dev); - if (!psdev->dev) { - kfree(psdev); - return NULL; - } - - kref_init(&psdev->kref); - spin_lock_init(&psdev->lock); - - return psdev; -} - -/* Don't call this directly as it's called by pcistub_device_put */ -static void pcistub_device_release(struct kref *kref) -{ - struct pcistub_device *psdev; - - psdev = container_of(kref, struct pcistub_device, kref); - - dev_dbg(&psdev->dev->dev, "pcistub_device_release\n"); - - xen_unregister_device_domain_owner(psdev->dev); - - /* Clean-up the device */ - xen_pcibk_reset_device(psdev->dev); - xen_pcibk_config_free_dyn_fields(psdev->dev); - xen_pcibk_config_free_dev(psdev->dev); - kfree(pci_get_drvdata(psdev->dev)); - pci_set_drvdata(psdev->dev, NULL); - - pci_dev_put(psdev->dev); - - kfree(psdev); -} - -static inline void pcistub_device_get(struct pcistub_device *psdev) -{ - kref_get(&psdev->kref); -} - -static inline void pcistub_device_put(struct pcistub_device *psdev) -{ - kref_put(&psdev->kref, pcistub_device_release); -} - -static struct pcistub_device *pcistub_device_find(int domain, int bus, - int slot, int func) -{ - struct pcistub_device *psdev = NULL; - unsigned long flags; - - spin_lock_irqsave(&pcistub_devices_lock, flags); - - list_for_each_entry(psdev, &pcistub_devices, dev_list) { - if (psdev->dev != NULL - && domain == pci_domain_nr(psdev->dev->bus) - && bus == psdev->dev->bus->number - && PCI_DEVFN(slot, func) == psdev->dev->devfn) { - pcistub_device_get(psdev); - goto out; - } - } - - /* didn't find it */ - psdev = NULL; - -out: - spin_unlock_irqrestore(&pcistub_devices_lock, flags); - return psdev; -} - -static struct pci_dev *pcistub_device_get_pci_dev(struct xen_pcibk_device *pdev, - struct pcistub_device *psdev) -{ - struct pci_dev *pci_dev = NULL; - unsigned long flags; - - pcistub_device_get(psdev); - - spin_lock_irqsave(&psdev->lock, flags); - if (!psdev->pdev) { - psdev->pdev = pdev; - pci_dev = psdev->dev; - } - spin_unlock_irqrestore(&psdev->lock, flags); - - if (!pci_dev) - pcistub_device_put(psdev); - - return pci_dev; -} - -struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev, - int domain, int bus, - int slot, int func) -{ - struct pcistub_device *psdev; - struct pci_dev *found_dev = NULL; - unsigned long flags; - - spin_lock_irqsave(&pcistub_devices_lock, flags); - - list_for_each_entry(psdev, &pcistub_devices, dev_list) { - if (psdev->dev != NULL - && domain == pci_domain_nr(psdev->dev->bus) - && bus == psdev->dev->bus->number - && PCI_DEVFN(slot, func) == psdev->dev->devfn) { - found_dev = pcistub_device_get_pci_dev(pdev, psdev); - break; - } - } - - spin_unlock_irqrestore(&pcistub_devices_lock, flags); - return found_dev; -} - -struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev, - struct pci_dev *dev) -{ - struct pcistub_device *psdev; - struct pci_dev *found_dev = NULL; - unsigned long flags; - - spin_lock_irqsave(&pcistub_devices_lock, flags); - - list_for_each_entry(psdev, &pcistub_devices, dev_list) { - if (psdev->dev == dev) { - found_dev = pcistub_device_get_pci_dev(pdev, psdev); - break; - } - } - - spin_unlock_irqrestore(&pcistub_devices_lock, flags); - return found_dev; -} - -void pcistub_put_pci_dev(struct pci_dev *dev) -{ - struct pcistub_device *psdev, *found_psdev = NULL; - unsigned long flags; - - spin_lock_irqsave(&pcistub_devices_lock, flags); - - list_for_each_entry(psdev, &pcistub_devices, dev_list) { - if (psdev->dev == dev) { - found_psdev = psdev; - break; - } - } - - spin_unlock_irqrestore(&pcistub_devices_lock, flags); - - /*hold this lock for avoiding breaking link between - * pcistub and xen_pcibk when AER is in processing - */ - down_write(&pcistub_sem); - /* Cleanup our device - * (so it's ready for the next domain) - */ - xen_pcibk_reset_device(found_psdev->dev); - xen_pcibk_config_free_dyn_fields(found_psdev->dev); - xen_pcibk_config_reset_dev(found_psdev->dev); - - spin_lock_irqsave(&found_psdev->lock, flags); - found_psdev->pdev = NULL; - spin_unlock_irqrestore(&found_psdev->lock, flags); - - pcistub_device_put(found_psdev); - up_write(&pcistub_sem); -} - -static int __devinit pcistub_match_one(struct pci_dev *dev, - struct pcistub_device_id *pdev_id) -{ - /* Match the specified device by domain, bus, slot, func and also if - * any of the device's parent bridges match. - */ - for (; dev != NULL; dev = dev->bus->self) { - if (pci_domain_nr(dev->bus) == pdev_id->domain - && dev->bus->number == pdev_id->bus - && dev->devfn == pdev_id->devfn) - return 1; - - /* Sometimes topmost bridge links to itself. */ - if (dev == dev->bus->self) - break; - } - - return 0; -} - -static int __devinit pcistub_match(struct pci_dev *dev) -{ - struct pcistub_device_id *pdev_id; - unsigned long flags; - int found = 0; - - spin_lock_irqsave(&device_ids_lock, flags); - list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) { - if (pcistub_match_one(dev, pdev_id)) { - found = 1; - break; - } - } - spin_unlock_irqrestore(&device_ids_lock, flags); - - return found; -} - -static int __devinit pcistub_init_device(struct pci_dev *dev) -{ - struct xen_pcibk_dev_data *dev_data; - int err = 0; - - dev_dbg(&dev->dev, "initializing...\n"); - - /* The PCI backend is not intended to be a module (or to work with - * removable PCI devices (yet). If it were, xen_pcibk_config_free() - * would need to be called somewhere to free the memory allocated - * here and then to call kfree(pci_get_drvdata(psdev->dev)). - */ - dev_data = kzalloc(sizeof(*dev_data) + strlen(DRV_NAME "[]") - + strlen(pci_name(dev)) + 1, GFP_ATOMIC); - if (!dev_data) { - err = -ENOMEM; - goto out; - } - pci_set_drvdata(dev, dev_data); - - /* - * Setup name for fake IRQ handler. It will only be enabled - * once the device is turned on by the guest. - */ - sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev)); - - dev_dbg(&dev->dev, "initializing config\n"); - - init_waitqueue_head(&xen_pcibk_aer_wait_queue); - err = xen_pcibk_config_init_dev(dev); - if (err) - goto out; - - /* HACK: Force device (& ACPI) to determine what IRQ it's on - we - * must do this here because pcibios_enable_device may specify - * the pci device's true irq (and possibly its other resources) - * if they differ from what's in the configuration space. - * This makes the assumption that the device's resources won't - * change after this point (otherwise this code may break!) - */ - dev_dbg(&dev->dev, "enabling device\n"); - err = pci_enable_device(dev); - if (err) - goto config_release; - - /* Now disable the device (this also ensures some private device - * data is setup before we export) - */ - dev_dbg(&dev->dev, "reset device\n"); - xen_pcibk_reset_device(dev); - - return 0; - -config_release: - xen_pcibk_config_free_dev(dev); - -out: - pci_set_drvdata(dev, NULL); - kfree(dev_data); - return err; -} - -/* - * Because some initialization still happens on - * devices during fs_initcall, we need to defer - * full initialization of our devices until - * device_initcall. - */ -static int __init pcistub_init_devices_late(void) -{ - struct pcistub_device *psdev; - unsigned long flags; - int err = 0; - - pr_debug(DRV_NAME ": pcistub_init_devices_late\n"); - - spin_lock_irqsave(&pcistub_devices_lock, flags); - - while (!list_empty(&seized_devices)) { - psdev = container_of(seized_devices.next, - struct pcistub_device, dev_list); - list_del(&psdev->dev_list); - - spin_unlock_irqrestore(&pcistub_devices_lock, flags); - - err = pcistub_init_device(psdev->dev); - if (err) { - dev_err(&psdev->dev->dev, - "error %d initializing device\n", err); - kfree(psdev); - psdev = NULL; - } - - spin_lock_irqsave(&pcistub_devices_lock, flags); - - if (psdev) - list_add_tail(&psdev->dev_list, &pcistub_devices); - } - - initialize_devices = 1; - - spin_unlock_irqrestore(&pcistub_devices_lock, flags); - - return 0; -} - -static int __devinit pcistub_seize(struct pci_dev *dev) -{ - struct pcistub_device *psdev; - unsigned long flags; - int err = 0; - - psdev = pcistub_device_alloc(dev); - if (!psdev) - return -ENOMEM; - - spin_lock_irqsave(&pcistub_devices_lock, flags); - - if (initialize_devices) { - spin_unlock_irqrestore(&pcistub_devices_lock, flags); - - /* don't want irqs disabled when calling pcistub_init_device */ - err = pcistub_init_device(psdev->dev); - - spin_lock_irqsave(&pcistub_devices_lock, flags); - - if (!err) - list_add(&psdev->dev_list, &pcistub_devices); - } else { - dev_dbg(&dev->dev, "deferring initialization\n"); - list_add(&psdev->dev_list, &seized_devices); - } - - spin_unlock_irqrestore(&pcistub_devices_lock, flags); - - if (err) - pcistub_device_put(psdev); - - return err; -} - -static int __devinit pcistub_probe(struct pci_dev *dev, - const struct pci_device_id *id) -{ - int err = 0; - - dev_dbg(&dev->dev, "probing...\n"); - - if (pcistub_match(dev)) { - - if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL - && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { - dev_err(&dev->dev, "can't export pci devices that " - "don't have a normal (0) or bridge (1) " - "header type!\n"); - err = -ENODEV; - goto out; - } - - dev_info(&dev->dev, "seizing device\n"); - err = pcistub_seize(dev); - } else - /* Didn't find the device */ - err = -ENODEV; - -out: - return err; -} - -static void pcistub_remove(struct pci_dev *dev) -{ - struct pcistub_device *psdev, *found_psdev = NULL; - unsigned long flags; - - dev_dbg(&dev->dev, "removing\n"); - - spin_lock_irqsave(&pcistub_devices_lock, flags); - - xen_pcibk_config_quirk_release(dev); - - list_for_each_entry(psdev, &pcistub_devices, dev_list) { - if (psdev->dev == dev) { - found_psdev = psdev; - break; - } - } - - spin_unlock_irqrestore(&pcistub_devices_lock, flags); - - if (found_psdev) { - dev_dbg(&dev->dev, "found device to remove - in use? %p\n", - found_psdev->pdev); - - if (found_psdev->pdev) { - printk(KERN_WARNING DRV_NAME ": ****** removing device " - "%s while still in-use! ******\n", - pci_name(found_psdev->dev)); - printk(KERN_WARNING DRV_NAME ": ****** driver domain may" - " still access this device's i/o resources!\n"); - printk(KERN_WARNING DRV_NAME ": ****** shutdown driver " - "domain before binding device\n"); - printk(KERN_WARNING DRV_NAME ": ****** to other drivers " - "or domains\n"); - - xen_pcibk_release_pci_dev(found_psdev->pdev, - found_psdev->dev); - } - - spin_lock_irqsave(&pcistub_devices_lock, flags); - list_del(&found_psdev->dev_list); - spin_unlock_irqrestore(&pcistub_devices_lock, flags); - - /* the final put for releasing from the list */ - pcistub_device_put(found_psdev); - } -} - -static DEFINE_PCI_DEVICE_TABLE(pcistub_ids) = { - { - .vendor = PCI_ANY_ID, - .device = PCI_ANY_ID, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - }, - {0,}, -}; - -#define PCI_NODENAME_MAX 40 -static void kill_domain_by_device(struct pcistub_device *psdev) -{ - struct xenbus_transaction xbt; - int err; - char nodename[PCI_NODENAME_MAX]; - - if (!psdev) - dev_err(&psdev->dev->dev, - "device is NULL when do AER recovery/kill_domain\n"); - snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0", - psdev->pdev->xdev->otherend_id); - nodename[strlen(nodename)] = '\0'; - -again: - err = xenbus_transaction_start(&xbt); - if (err) { - dev_err(&psdev->dev->dev, - "error %d when start xenbus transaction\n", err); - return; - } - /*PV AER handlers will set this flag*/ - xenbus_printf(xbt, nodename, "aerState" , "aerfail"); - err = xenbus_transaction_end(xbt, 0); - if (err) { - if (err == -EAGAIN) - goto again; - dev_err(&psdev->dev->dev, - "error %d when end xenbus transaction\n", err); - return; - } -} - -/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and - * backend need to have cooperation. In xen_pcibk, those steps will do similar - * jobs: send service request and waiting for front_end response. -*/ -static pci_ers_result_t common_process(struct pcistub_device *psdev, - pci_channel_state_t state, int aer_cmd, - pci_ers_result_t result) -{ - pci_ers_result_t res = result; - struct xen_pcie_aer_op *aer_op; - int ret; - - /*with PV AER drivers*/ - aer_op = &(psdev->pdev->sh_info->aer_op); - aer_op->cmd = aer_cmd ; - /*useful for error_detected callback*/ - aer_op->err = state; - /*pcifront_end BDF*/ - ret = xen_pcibk_get_pcifront_dev(psdev->dev, psdev->pdev, - &aer_op->domain, &aer_op->bus, &aer_op->devfn); - if (!ret) { - dev_err(&psdev->dev->dev, - DRV_NAME ": failed to get pcifront device\n"); - return PCI_ERS_RESULT_NONE; - } - wmb(); - - dev_dbg(&psdev->dev->dev, - DRV_NAME ": aer_op %x dom %x bus %x devfn %x\n", - aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn); - /*local flag to mark there's aer request, xen_pcibk callback will use - * this flag to judge whether we need to check pci-front give aer - * service ack signal - */ - set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags); - - /*It is possible that a pcifront conf_read_write ops request invokes - * the callback which cause the spurious execution of wake_up. - * Yet it is harmless and better than a spinlock here - */ - set_bit(_XEN_PCIB_active, - (unsigned long *)&psdev->pdev->sh_info->flags); - wmb(); - notify_remote_via_irq(psdev->pdev->evtchn_irq); - - ret = wait_event_timeout(xen_pcibk_aer_wait_queue, - !(test_bit(_XEN_PCIB_active, (unsigned long *) - &psdev->pdev->sh_info->flags)), 300*HZ); - - if (!ret) { - if (test_bit(_XEN_PCIB_active, - (unsigned long *)&psdev->pdev->sh_info->flags)) { - dev_err(&psdev->dev->dev, - "pcifront aer process not responding!\n"); - clear_bit(_XEN_PCIB_active, - (unsigned long *)&psdev->pdev->sh_info->flags); - aer_op->err = PCI_ERS_RESULT_NONE; - return res; - } - } - clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags); - - if (test_bit(_XEN_PCIF_active, - (unsigned long *)&psdev->pdev->sh_info->flags)) { - dev_dbg(&psdev->dev->dev, - "schedule pci_conf service in xen_pcibk\n"); - xen_pcibk_test_and_schedule_op(psdev->pdev); - } - - res = (pci_ers_result_t)aer_op->err; - return res; -} - -/* -* xen_pcibk_slot_reset: it will send the slot_reset request to pcifront in case -* of the device driver could provide this service, and then wait for pcifront -* ack. -* @dev: pointer to PCI devices -* return value is used by aer_core do_recovery policy -*/ -static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev) -{ - struct pcistub_device *psdev; - pci_ers_result_t result; - - result = PCI_ERS_RESULT_RECOVERED; - dev_dbg(&dev->dev, "xen_pcibk_slot_reset(bus:%x,devfn:%x)\n", - dev->bus->number, dev->devfn); - - down_write(&pcistub_sem); - psdev = pcistub_device_find(pci_domain_nr(dev->bus), - dev->bus->number, - PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn)); - - if (!psdev || !psdev->pdev) { - dev_err(&dev->dev, - DRV_NAME " device is not found/assigned\n"); - goto end; - } - - if (!psdev->pdev->sh_info) { - dev_err(&dev->dev, DRV_NAME " device is not connected or owned" - " by HVM, kill it\n"); - kill_domain_by_device(psdev); - goto release; - } - - if (!test_bit(_XEN_PCIB_AERHANDLER, - (unsigned long *)&psdev->pdev->sh_info->flags)) { - dev_err(&dev->dev, - "guest with no AER driver should have been killed\n"); - goto release; - } - result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result); - - if (result == PCI_ERS_RESULT_NONE || - result == PCI_ERS_RESULT_DISCONNECT) { - dev_dbg(&dev->dev, - "No AER slot_reset service or disconnected!\n"); - kill_domain_by_device(psdev); - } -release: - pcistub_device_put(psdev); -end: - up_write(&pcistub_sem); - return result; - -} - - -/*xen_pcibk_mmio_enabled: it will send the mmio_enabled request to pcifront -* in case of the device driver could provide this service, and then wait -* for pcifront ack -* @dev: pointer to PCI devices -* return value is used by aer_core do_recovery policy -*/ - -static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev) -{ - struct pcistub_device *psdev; - pci_ers_result_t result; - - result = PCI_ERS_RESULT_RECOVERED; - dev_dbg(&dev->dev, "xen_pcibk_mmio_enabled(bus:%x,devfn:%x)\n", - dev->bus->number, dev->devfn); - - down_write(&pcistub_sem); - psdev = pcistub_device_find(pci_domain_nr(dev->bus), - dev->bus->number, - PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn)); - - if (!psdev || !psdev->pdev) { - dev_err(&dev->dev, - DRV_NAME " device is not found/assigned\n"); - goto end; - } - - if (!psdev->pdev->sh_info) { - dev_err(&dev->dev, DRV_NAME " device is not connected or owned" - " by HVM, kill it\n"); - kill_domain_by_device(psdev); - goto release; - } - - if (!test_bit(_XEN_PCIB_AERHANDLER, - (unsigned long *)&psdev->pdev->sh_info->flags)) { - dev_err(&dev->dev, - "guest with no AER driver should have been killed\n"); - goto release; - } - result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result); - - if (result == PCI_ERS_RESULT_NONE || - result == PCI_ERS_RESULT_DISCONNECT) { - dev_dbg(&dev->dev, - "No AER mmio_enabled service or disconnected!\n"); - kill_domain_by_device(psdev); - } -release: - pcistub_device_put(psdev); -end: - up_write(&pcistub_sem); - return result; -} - -/*xen_pcibk_error_detected: it will send the error_detected request to pcifront -* in case of the device driver could provide this service, and then wait -* for pcifront ack. -* @dev: pointer to PCI devices -* @error: the current PCI connection state -* return value is used by aer_core do_recovery policy -*/ - -static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev, - pci_channel_state_t error) -{ - struct pcistub_device *psdev; - pci_ers_result_t result; - - result = PCI_ERS_RESULT_CAN_RECOVER; - dev_dbg(&dev->dev, "xen_pcibk_error_detected(bus:%x,devfn:%x)\n", - dev->bus->number, dev->devfn); - - down_write(&pcistub_sem); - psdev = pcistub_device_find(pci_domain_nr(dev->bus), - dev->bus->number, - PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn)); - - if (!psdev || !psdev->pdev) { - dev_err(&dev->dev, - DRV_NAME " device is not found/assigned\n"); - goto end; - } - - if (!psdev->pdev->sh_info) { - dev_err(&dev->dev, DRV_NAME " device is not connected or owned" - " by HVM, kill it\n"); - kill_domain_by_device(psdev); - goto release; - } - - /*Guest owns the device yet no aer handler regiested, kill guest*/ - if (!test_bit(_XEN_PCIB_AERHANDLER, - (unsigned long *)&psdev->pdev->sh_info->flags)) { - dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n"); - kill_domain_by_device(psdev); - goto release; - } - result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result); - - if (result == PCI_ERS_RESULT_NONE || - result == PCI_ERS_RESULT_DISCONNECT) { - dev_dbg(&dev->dev, - "No AER error_detected service or disconnected!\n"); - kill_domain_by_device(psdev); - } -release: - pcistub_device_put(psdev); -end: - up_write(&pcistub_sem); - return result; -} - -/*xen_pcibk_error_resume: it will send the error_resume request to pcifront -* in case of the device driver could provide this service, and then wait -* for pcifront ack. -* @dev: pointer to PCI devices -*/ - -static void xen_pcibk_error_resume(struct pci_dev *dev) -{ - struct pcistub_device *psdev; - - dev_dbg(&dev->dev, "xen_pcibk_error_resume(bus:%x,devfn:%x)\n", - dev->bus->number, dev->devfn); - - down_write(&pcistub_sem); - psdev = pcistub_device_find(pci_domain_nr(dev->bus), - dev->bus->number, - PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn)); - - if (!psdev || !psdev->pdev) { - dev_err(&dev->dev, - DRV_NAME " device is not found/assigned\n"); - goto end; - } - - if (!psdev->pdev->sh_info) { - dev_err(&dev->dev, DRV_NAME " device is not connected or owned" - " by HVM, kill it\n"); - kill_domain_by_device(psdev); - goto release; - } - - if (!test_bit(_XEN_PCIB_AERHANDLER, - (unsigned long *)&psdev->pdev->sh_info->flags)) { - dev_err(&dev->dev, - "guest with no AER driver should have been killed\n"); - kill_domain_by_device(psdev); - goto release; - } - common_process(psdev, 1, XEN_PCI_OP_aer_resume, - PCI_ERS_RESULT_RECOVERED); -release: - pcistub_device_put(psdev); -end: - up_write(&pcistub_sem); - return; -} - -/*add xen_pcibk AER handling*/ -static struct pci_error_handlers xen_pcibk_error_handler = { - .error_detected = xen_pcibk_error_detected, - .mmio_enabled = xen_pcibk_mmio_enabled, - .slot_reset = xen_pcibk_slot_reset, - .resume = xen_pcibk_error_resume, -}; - -/* - * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't - * for a normal device. I don't want it to be loaded automatically. - */ - -static struct pci_driver xen_pcibk_pci_driver = { - /* The name should be xen_pciback, but until the tools are updated - * we will keep it as pciback. */ - .name = "pciback", - .id_table = pcistub_ids, - .probe = pcistub_probe, - .remove = pcistub_remove, - .err_handler = &xen_pcibk_error_handler, -}; - -static inline int str_to_slot(const char *buf, int *domain, int *bus, - int *slot, int *func) -{ - int err; - - err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func); - if (err == 4) - return 0; - else if (err < 0) - return -EINVAL; - - /* try again without domain */ - *domain = 0; - err = sscanf(buf, " %x:%x.%x", bus, slot, func); - if (err == 3) - return 0; - - return -EINVAL; -} - -static inline int str_to_quirk(const char *buf, int *domain, int *bus, int - *slot, int *func, int *reg, int *size, int *mask) -{ - int err; - - err = - sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot, - func, reg, size, mask); - if (err == 7) - return 0; - return -EINVAL; -} - -static int pcistub_device_id_add(int domain, int bus, int slot, int func) -{ - struct pcistub_device_id *pci_dev_id; - unsigned long flags; - - pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL); - if (!pci_dev_id) - return -ENOMEM; - - pci_dev_id->domain = domain; - pci_dev_id->bus = bus; - pci_dev_id->devfn = PCI_DEVFN(slot, func); - - pr_debug(DRV_NAME ": wants to seize %04x:%02x:%02x.%01x\n", - domain, bus, slot, func); - - spin_lock_irqsave(&device_ids_lock, flags); - list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids); - spin_unlock_irqrestore(&device_ids_lock, flags); - - return 0; -} - -static int pcistub_device_id_remove(int domain, int bus, int slot, int func) -{ - struct pcistub_device_id *pci_dev_id, *t; - int devfn = PCI_DEVFN(slot, func); - int err = -ENOENT; - unsigned long flags; - - spin_lock_irqsave(&device_ids_lock, flags); - list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, - slot_list) { - if (pci_dev_id->domain == domain - && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) { - /* Don't break; here because it's possible the same - * slot could be in the list more than once - */ - list_del(&pci_dev_id->slot_list); - kfree(pci_dev_id); - - err = 0; - - pr_debug(DRV_NAME ": removed %04x:%02x:%02x.%01x from " - "seize list\n", domain, bus, slot, func); - } - } - spin_unlock_irqrestore(&device_ids_lock, flags); - - return err; -} - -static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg, - int size, int mask) -{ - int err = 0; - struct pcistub_device *psdev; - struct pci_dev *dev; - struct config_field *field; - - psdev = pcistub_device_find(domain, bus, slot, func); - if (!psdev || !psdev->dev) { - err = -ENODEV; - goto out; - } - dev = psdev->dev; - - field = kzalloc(sizeof(*field), GFP_ATOMIC); - if (!field) { - err = -ENOMEM; - goto out; - } - - field->offset = reg; - field->size = size; - field->mask = mask; - field->init = NULL; - field->reset = NULL; - field->release = NULL; - field->clean = xen_pcibk_config_field_free; - - err = xen_pcibk_config_quirks_add_field(dev, field); - if (err) - kfree(field); -out: - return err; -} - -static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf, - size_t count) -{ - int domain, bus, slot, func; - int err; - - err = str_to_slot(buf, &domain, &bus, &slot, &func); - if (err) - goto out; - - err = pcistub_device_id_add(domain, bus, slot, func); - -out: - if (!err) - err = count; - return err; -} - -DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add); - -static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf, - size_t count) -{ - int domain, bus, slot, func; - int err; - - err = str_to_slot(buf, &domain, &bus, &slot, &func); - if (err) - goto out; - - err = pcistub_device_id_remove(domain, bus, slot, func); - -out: - if (!err) - err = count; - return err; -} - -DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove); - -static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) -{ - struct pcistub_device_id *pci_dev_id; - size_t count = 0; - unsigned long flags; - - spin_lock_irqsave(&device_ids_lock, flags); - list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) { - if (count >= PAGE_SIZE) - break; - - count += scnprintf(buf + count, PAGE_SIZE - count, - "%04x:%02x:%02x.%01x\n", - pci_dev_id->domain, pci_dev_id->bus, - PCI_SLOT(pci_dev_id->devfn), - PCI_FUNC(pci_dev_id->devfn)); - } - spin_unlock_irqrestore(&device_ids_lock, flags); - - return count; -} - -DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); - -static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) -{ - struct pcistub_device *psdev; - struct xen_pcibk_dev_data *dev_data; - size_t count = 0; - unsigned long flags; - - spin_lock_irqsave(&pcistub_devices_lock, flags); - list_for_each_entry(psdev, &pcistub_devices, dev_list) { - if (count >= PAGE_SIZE) - break; - if (!psdev->dev) - continue; - dev_data = pci_get_drvdata(psdev->dev); - if (!dev_data) - continue; - count += - scnprintf(buf + count, PAGE_SIZE - count, - "%s:%s:%sing:%ld\n", - pci_name(psdev->dev), - dev_data->isr_on ? "on" : "off", - dev_data->ack_intr ? "ack" : "not ack", - dev_data->handled); - } - spin_unlock_irqrestore(&pcistub_devices_lock, flags); - return count; -} - -DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL); - -static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, - const char *buf, - size_t count) -{ - struct pcistub_device *psdev; - struct xen_pcibk_dev_data *dev_data; - int domain, bus, slot, func; - int err = -ENOENT; - - err = str_to_slot(buf, &domain, &bus, &slot, &func); - if (err) - goto out; - - psdev = pcistub_device_find(domain, bus, slot, func); - - if (!psdev) - goto out; - - dev_data = pci_get_drvdata(psdev->dev); - if (!dev_data) - goto out; - - dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n", - dev_data->irq_name, dev_data->isr_on, - !dev_data->isr_on); - - dev_data->isr_on = !(dev_data->isr_on); - if (dev_data->isr_on) - dev_data->ack_intr = 1; -out: - if (!err) - err = count; - return err; -} -DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch); - -static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf, - size_t count) -{ - int domain, bus, slot, func, reg, size, mask; - int err; - - err = str_to_quirk(buf, &domain, &bus, &slot, &func, ®, &size, - &mask); - if (err) - goto out; - - err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask); - -out: - if (!err) - err = count; - return err; -} - -static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf) -{ - int count = 0; - unsigned long flags; - struct xen_pcibk_config_quirk *quirk; - struct xen_pcibk_dev_data *dev_data; - const struct config_field *field; - const struct config_field_entry *cfg_entry; - - spin_lock_irqsave(&device_ids_lock, flags); - list_for_each_entry(quirk, &xen_pcibk_quirks, quirks_list) { - if (count >= PAGE_SIZE) - goto out; - - count += scnprintf(buf + count, PAGE_SIZE - count, - "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n", - quirk->pdev->bus->number, - PCI_SLOT(quirk->pdev->devfn), - PCI_FUNC(quirk->pdev->devfn), - quirk->devid.vendor, quirk->devid.device, - quirk->devid.subvendor, - quirk->devid.subdevice); - - dev_data = pci_get_drvdata(quirk->pdev); - - list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { - field = cfg_entry->field; - if (count >= PAGE_SIZE) - goto out; - - count += scnprintf(buf + count, PAGE_SIZE - count, - "\t\t%08x:%01x:%08x\n", - cfg_entry->base_offset + - field->offset, field->size, - field->mask); - } - } - -out: - spin_unlock_irqrestore(&device_ids_lock, flags); - - return count; -} - -DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add); - -static ssize_t permissive_add(struct device_driver *drv, const char *buf, - size_t count) -{ - int domain, bus, slot, func; - int err; - struct pcistub_device *psdev; - struct xen_pcibk_dev_data *dev_data; - err = str_to_slot(buf, &domain, &bus, &slot, &func); - if (err) - goto out; - psdev = pcistub_device_find(domain, bus, slot, func); - if (!psdev) { - err = -ENODEV; - goto out; - } - if (!psdev->dev) { - err = -ENODEV; - goto release; - } - dev_data = pci_get_drvdata(psdev->dev); - /* the driver data for a device should never be null at this point */ - if (!dev_data) { - err = -ENXIO; - goto release; - } - if (!dev_data->permissive) { - dev_data->permissive = 1; - /* Let user know that what they're doing could be unsafe */ - dev_warn(&psdev->dev->dev, "enabling permissive mode " - "configuration space accesses!\n"); - dev_warn(&psdev->dev->dev, - "permissive mode is potentially unsafe!\n"); - } -release: - pcistub_device_put(psdev); -out: - if (!err) - err = count; - return err; -} - -static ssize_t permissive_show(struct device_driver *drv, char *buf) -{ - struct pcistub_device *psdev; - struct xen_pcibk_dev_data *dev_data; - size_t count = 0; - unsigned long flags; - spin_lock_irqsave(&pcistub_devices_lock, flags); - list_for_each_entry(psdev, &pcistub_devices, dev_list) { - if (count >= PAGE_SIZE) - break; - if (!psdev->dev) - continue; - dev_data = pci_get_drvdata(psdev->dev); - if (!dev_data || !dev_data->permissive) - continue; - count += - scnprintf(buf + count, PAGE_SIZE - count, "%s\n", - pci_name(psdev->dev)); - } - spin_unlock_irqrestore(&pcistub_devices_lock, flags); - return count; -} - -DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add); - -static void pcistub_exit(void) -{ - driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_new_slot); - driver_remove_file(&xen_pcibk_pci_driver.driver, - &driver_attr_remove_slot); - driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_slots); - driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_quirks); - driver_remove_file(&xen_pcibk_pci_driver.driver, - &driver_attr_permissive); - driver_remove_file(&xen_pcibk_pci_driver.driver, - &driver_attr_irq_handlers); - driver_remove_file(&xen_pcibk_pci_driver.driver, - &driver_attr_irq_handler_state); - pci_unregister_driver(&xen_pcibk_pci_driver); -} - -static int __init pcistub_init(void) -{ - int pos = 0; - int err = 0; - int domain, bus, slot, func; - int parsed; - - if (pci_devs_to_hide && *pci_devs_to_hide) { - do { - parsed = 0; - - err = sscanf(pci_devs_to_hide + pos, - " (%x:%x:%x.%x) %n", - &domain, &bus, &slot, &func, &parsed); - if (err != 4) { - domain = 0; - err = sscanf(pci_devs_to_hide + pos, - " (%x:%x.%x) %n", - &bus, &slot, &func, &parsed); - if (err != 3) - goto parse_error; - } - - err = pcistub_device_id_add(domain, bus, slot, func); - if (err) - goto out; - - /* if parsed<=0, we've reached the end of the string */ - pos += parsed; - } while (parsed > 0 && pci_devs_to_hide[pos]); - } - - /* If we're the first PCI Device Driver to register, we're the - * first one to get offered PCI devices as they become - * available (and thus we can be the first to grab them) - */ - err = pci_register_driver(&xen_pcibk_pci_driver); - if (err < 0) - goto out; - - err = driver_create_file(&xen_pcibk_pci_driver.driver, - &driver_attr_new_slot); - if (!err) - err = driver_create_file(&xen_pcibk_pci_driver.driver, - &driver_attr_remove_slot); - if (!err) - err = driver_create_file(&xen_pcibk_pci_driver.driver, - &driver_attr_slots); - if (!err) - err = driver_create_file(&xen_pcibk_pci_driver.driver, - &driver_attr_quirks); - if (!err) - err = driver_create_file(&xen_pcibk_pci_driver.driver, - &driver_attr_permissive); - - if (!err) - err = driver_create_file(&xen_pcibk_pci_driver.driver, - &driver_attr_irq_handlers); - if (!err) - err = driver_create_file(&xen_pcibk_pci_driver.driver, - &driver_attr_irq_handler_state); - if (err) - pcistub_exit(); - -out: - return err; - -parse_error: - printk(KERN_ERR DRV_NAME ": Error parsing pci_devs_to_hide at \"%s\"\n", - pci_devs_to_hide + pos); - return -EINVAL; -} - -#ifndef MODULE -/* - * fs_initcall happens before device_initcall - * so xen_pcibk *should* get called first (b/c we - * want to suck up any device before other drivers - * get a chance by being the first pci device - * driver to register) - */ -fs_initcall(pcistub_init); -#endif - -static int __init xen_pcibk_init(void) -{ - int err; - - if (!xen_initial_domain()) - return -ENODEV; - - err = xen_pcibk_config_init(); - if (err) - return err; - -#ifdef MODULE - err = pcistub_init(); - if (err < 0) - return err; -#endif - - pcistub_init_devices_late(); - err = xen_pcibk_xenbus_register(); - if (err) - pcistub_exit(); - - return err; -} - -static void __exit xen_pcibk_cleanup(void) -{ - xen_pcibk_xenbus_unregister(); - pcistub_exit(); -} - -module_init(xen_pcibk_init); -module_exit(xen_pcibk_cleanup); - -MODULE_LICENSE("Dual BSD/GPL"); diff --git a/trunk/drivers/xen/xen-pciback/pciback.h b/trunk/drivers/xen/xen-pciback/pciback.h deleted file mode 100644 index a0e131a81503..000000000000 --- a/trunk/drivers/xen/xen-pciback/pciback.h +++ /dev/null @@ -1,183 +0,0 @@ -/* - * PCI Backend Common Data Structures & Function Declarations - * - * Author: Ryan Wilson - */ -#ifndef __XEN_PCIBACK_H__ -#define __XEN_PCIBACK_H__ - -#include -#include -#include -#include -#include -#include -#include -#include - -struct pci_dev_entry { - struct list_head list; - struct pci_dev *dev; -}; - -#define _PDEVF_op_active (0) -#define PDEVF_op_active (1<<(_PDEVF_op_active)) -#define _PCIB_op_pending (1) -#define PCIB_op_pending (1<<(_PCIB_op_pending)) - -struct xen_pcibk_device { - void *pci_dev_data; - spinlock_t dev_lock; - struct xenbus_device *xdev; - struct xenbus_watch be_watch; - u8 be_watching; - int evtchn_irq; - struct xen_pci_sharedinfo *sh_info; - unsigned long flags; - struct work_struct op_work; -}; - -struct xen_pcibk_dev_data { - struct list_head config_fields; - unsigned int permissive:1; - unsigned int warned_on_write:1; - unsigned int enable_intx:1; - unsigned int isr_on:1; /* Whether the IRQ handler is installed. */ - unsigned int ack_intr:1; /* .. and ACK-ing */ - unsigned long handled; - unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */ - char irq_name[0]; /* xen-pcibk[000:04:00.0] */ -}; - -/* Used by XenBus and xen_pcibk_ops.c */ -extern wait_queue_head_t xen_pcibk_aer_wait_queue; -extern struct workqueue_struct *xen_pcibk_wq; -/* Used by pcistub.c and conf_space_quirks.c */ -extern struct list_head xen_pcibk_quirks; - -/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */ -struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev, - int domain, int bus, - int slot, int func); -struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev, - struct pci_dev *dev); -void pcistub_put_pci_dev(struct pci_dev *dev); - -/* Ensure a device is turned off or reset */ -void xen_pcibk_reset_device(struct pci_dev *pdev); - -/* Access a virtual configuration space for a PCI device */ -int xen_pcibk_config_init(void); -int xen_pcibk_config_init_dev(struct pci_dev *dev); -void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev); -void xen_pcibk_config_reset_dev(struct pci_dev *dev); -void xen_pcibk_config_free_dev(struct pci_dev *dev); -int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, - u32 *ret_val); -int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, - u32 value); - -/* Handle requests for specific devices from the frontend */ -typedef int (*publish_pci_dev_cb) (struct xen_pcibk_device *pdev, - unsigned int domain, unsigned int bus, - unsigned int devfn, unsigned int devid); -typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev, - unsigned int domain, unsigned int bus); - -/* Backend registration for the two types of BDF representation: - * vpci - BDFs start at 00 - * passthrough - BDFs are exactly like in the host. - */ -struct xen_pcibk_backend { - char *name; - int (*init)(struct xen_pcibk_device *pdev); - void (*free)(struct xen_pcibk_device *pdev); - int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev, - unsigned int *domain, unsigned int *bus, - unsigned int *devfn); - int (*publish)(struct xen_pcibk_device *pdev, publish_pci_root_cb cb); - void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev); - int (*add)(struct xen_pcibk_device *pdev, struct pci_dev *dev, - int devid, publish_pci_dev_cb publish_cb); - struct pci_dev *(*get)(struct xen_pcibk_device *pdev, - unsigned int domain, unsigned int bus, - unsigned int devfn); -}; - -extern struct xen_pcibk_backend xen_pcibk_vpci_backend; -extern struct xen_pcibk_backend xen_pcibk_passthrough_backend; -extern struct xen_pcibk_backend *xen_pcibk_backend; - -static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, - struct pci_dev *dev, - int devid, - publish_pci_dev_cb publish_cb) -{ - if (xen_pcibk_backend && xen_pcibk_backend->add) - return xen_pcibk_backend->add(pdev, dev, devid, publish_cb); - return -1; -}; -static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, - struct pci_dev *dev) -{ - if (xen_pcibk_backend && xen_pcibk_backend->free) - return xen_pcibk_backend->release(pdev, dev); -}; - -static inline struct pci_dev * -xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain, - unsigned int bus, unsigned int devfn) -{ - if (xen_pcibk_backend && xen_pcibk_backend->get) - return xen_pcibk_backend->get(pdev, domain, bus, devfn); - return NULL; -}; -/** -* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk -* before sending aer request to pcifront, so that guest could identify -* device, coopearte with xen_pcibk to finish aer recovery job if device driver -* has the capability -*/ -static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, - struct xen_pcibk_device *pdev, - unsigned int *domain, - unsigned int *bus, - unsigned int *devfn) -{ - if (xen_pcibk_backend && xen_pcibk_backend->find) - return xen_pcibk_backend->find(pcidev, pdev, domain, bus, - devfn); - return -1; -}; -static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev) -{ - if (xen_pcibk_backend && xen_pcibk_backend->init) - return xen_pcibk_backend->init(pdev); - return -1; -}; -static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, - publish_pci_root_cb cb) -{ - if (xen_pcibk_backend && xen_pcibk_backend->publish) - return xen_pcibk_backend->publish(pdev, cb); - return -1; -}; -static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) -{ - if (xen_pcibk_backend && xen_pcibk_backend->free) - return xen_pcibk_backend->free(pdev); -}; -/* Handles events from front-end */ -irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); -void xen_pcibk_do_op(struct work_struct *data); - -int xen_pcibk_xenbus_register(void); -void xen_pcibk_xenbus_unregister(void); - -extern int verbose_request; - -void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev); -#endif - -/* Handles shared IRQs that can to device domain and control domain. */ -void xen_pcibk_irq_handler(struct pci_dev *dev, int reset); diff --git a/trunk/drivers/xen/xen-pciback/pciback_ops.c b/trunk/drivers/xen/xen-pciback/pciback_ops.c deleted file mode 100644 index 8c95c3415b75..000000000000 --- a/trunk/drivers/xen/xen-pciback/pciback_ops.c +++ /dev/null @@ -1,384 +0,0 @@ -/* - * PCI Backend Operations - respond to PCI requests from Frontend - * - * Author: Ryan Wilson - */ -#include -#include -#include -#include -#include -#include "pciback.h" - -#define DRV_NAME "xen-pciback" -int verbose_request; -module_param(verbose_request, int, 0644); - -static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id); - -/* Ensure a device is has the fake IRQ handler "turned on/off" and is - * ready to be exported. This MUST be run after xen_pcibk_reset_device - * which does the actual PCI device enable/disable. - */ -static void xen_pcibk_control_isr(struct pci_dev *dev, int reset) -{ - struct xen_pcibk_dev_data *dev_data; - int rc; - int enable = 0; - - dev_data = pci_get_drvdata(dev); - if (!dev_data) - return; - - /* We don't deal with bridges */ - if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) - return; - - if (reset) { - dev_data->enable_intx = 0; - dev_data->ack_intr = 0; - } - enable = dev_data->enable_intx; - - /* Asked to disable, but ISR isn't runnig */ - if (!enable && !dev_data->isr_on) - return; - - /* Squirrel away the IRQs in the dev_data. We need this - * b/c when device transitions to MSI, the dev->irq is - * overwritten with the MSI vector. - */ - if (enable) - dev_data->irq = dev->irq; - - /* - * SR-IOV devices in all use MSI-X and have no legacy - * interrupts, so inhibit creating a fake IRQ handler for them. - */ - if (dev_data->irq == 0) - goto out; - - dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n", - dev_data->irq_name, - dev_data->irq, - pci_is_enabled(dev) ? "on" : "off", - dev->msi_enabled ? "MSI" : "", - dev->msix_enabled ? "MSI/X" : "", - dev_data->isr_on ? "enable" : "disable", - enable ? "enable" : "disable"); - - if (enable) { - rc = request_irq(dev_data->irq, - xen_pcibk_guest_interrupt, IRQF_SHARED, - dev_data->irq_name, dev); - if (rc) { - dev_err(&dev->dev, "%s: failed to install fake IRQ " \ - "handler for IRQ %d! (rc:%d)\n", - dev_data->irq_name, dev_data->irq, rc); - goto out; - } - } else { - free_irq(dev_data->irq, dev); - dev_data->irq = 0; - } - dev_data->isr_on = enable; - dev_data->ack_intr = enable; -out: - dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n", - dev_data->irq_name, - dev_data->irq, - pci_is_enabled(dev) ? "on" : "off", - dev->msi_enabled ? "MSI" : "", - dev->msix_enabled ? "MSI/X" : "", - enable ? (dev_data->isr_on ? "enabled" : "failed to enable") : - (dev_data->isr_on ? "failed to disable" : "disabled")); -} - -/* Ensure a device is "turned off" and ready to be exported. - * (Also see xen_pcibk_config_reset to ensure virtual configuration space is - * ready to be re-exported) - */ -void xen_pcibk_reset_device(struct pci_dev *dev) -{ - u16 cmd; - - xen_pcibk_control_isr(dev, 1 /* reset device */); - - /* Disable devices (but not bridges) */ - if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { -#ifdef CONFIG_PCI_MSI - /* The guest could have been abruptly killed without - * disabling MSI/MSI-X interrupts.*/ - if (dev->msix_enabled) - pci_disable_msix(dev); - if (dev->msi_enabled) - pci_disable_msi(dev); -#endif - pci_disable_device(dev); - - pci_write_config_word(dev, PCI_COMMAND, 0); - - dev->is_busmaster = 0; - } else { - pci_read_config_word(dev, PCI_COMMAND, &cmd); - if (cmd & (PCI_COMMAND_INVALIDATE)) { - cmd &= ~(PCI_COMMAND_INVALIDATE); - pci_write_config_word(dev, PCI_COMMAND, cmd); - - dev->is_busmaster = 0; - } - } -} - -#ifdef CONFIG_PCI_MSI -static -int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev, - struct pci_dev *dev, struct xen_pci_op *op) -{ - struct xen_pcibk_dev_data *dev_data; - int otherend = pdev->xdev->otherend_id; - int status; - - if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev)); - - status = pci_enable_msi(dev); - - if (status) { - printk(KERN_ERR "error enable msi for guest %x status %x\n", - otherend, status); - op->value = 0; - return XEN_PCI_ERR_op_failed; - } - - /* The value the guest needs is actually the IDT vector, not the - * the local domain's IRQ number. */ - - op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; - if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), - op->value); - - dev_data = pci_get_drvdata(dev); - if (dev_data) - dev_data->ack_intr = 0; - - return 0; -} - -static -int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, - struct pci_dev *dev, struct xen_pci_op *op) -{ - struct xen_pcibk_dev_data *dev_data; - - if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n", - pci_name(dev)); - pci_disable_msi(dev); - - op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; - if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), - op->value); - dev_data = pci_get_drvdata(dev); - if (dev_data) - dev_data->ack_intr = 1; - return 0; -} - -static -int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, - struct pci_dev *dev, struct xen_pci_op *op) -{ - struct xen_pcibk_dev_data *dev_data; - int i, result; - struct msix_entry *entries; - - if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", - pci_name(dev)); - if (op->value > SH_INFO_MAX_VEC) - return -EINVAL; - - entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); - if (entries == NULL) - return -ENOMEM; - - for (i = 0; i < op->value; i++) { - entries[i].entry = op->msix_entries[i].entry; - entries[i].vector = op->msix_entries[i].vector; - } - - result = pci_enable_msix(dev, entries, op->value); - - if (result == 0) { - for (i = 0; i < op->value; i++) { - op->msix_entries[i].entry = entries[i].entry; - if (entries[i].vector) - op->msix_entries[i].vector = - xen_pirq_from_irq(entries[i].vector); - if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: " \ - "MSI-X[%d]: %d\n", - pci_name(dev), i, - op->msix_entries[i].vector); - } - } else { - printk(KERN_WARNING DRV_NAME ": %s: failed to enable MSI-X: err %d!\n", - pci_name(dev), result); - } - kfree(entries); - - op->value = result; - dev_data = pci_get_drvdata(dev); - if (dev_data) - dev_data->ack_intr = 0; - - return result; -} - -static -int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, - struct pci_dev *dev, struct xen_pci_op *op) -{ - struct xen_pcibk_dev_data *dev_data; - if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n", - pci_name(dev)); - pci_disable_msix(dev); - - /* - * SR-IOV devices (which don't have any legacy IRQ) have - * an undefined IRQ value of zero. - */ - op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; - if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev), - op->value); - dev_data = pci_get_drvdata(dev); - if (dev_data) - dev_data->ack_intr = 1; - return 0; -} -#endif -/* -* Now the same evtchn is used for both pcifront conf_read_write request -* as well as pcie aer front end ack. We use a new work_queue to schedule -* xen_pcibk conf_read_write service for avoiding confict with aer_core -* do_recovery job which also use the system default work_queue -*/ -void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) -{ - /* Check that frontend is requesting an operation and that we are not - * already processing a request */ - if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) - && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { - queue_work(xen_pcibk_wq, &pdev->op_work); - } - /*_XEN_PCIB_active should have been cleared by pcifront. And also make - sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ - if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) - && test_bit(_PCIB_op_pending, &pdev->flags)) { - wake_up(&xen_pcibk_aer_wait_queue); - } -} - -/* Performing the configuration space reads/writes must not be done in atomic - * context because some of the pci_* functions can sleep (mostly due to ACPI - * use of semaphores). This function is intended to be called from a work - * queue in process context taking a struct xen_pcibk_device as a parameter */ - -void xen_pcibk_do_op(struct work_struct *data) -{ - struct xen_pcibk_device *pdev = - container_of(data, struct xen_pcibk_device, op_work); - struct pci_dev *dev; - struct xen_pcibk_dev_data *dev_data = NULL; - struct xen_pci_op *op = &pdev->sh_info->op; - int test_intx = 0; - - dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); - - if (dev == NULL) - op->err = XEN_PCI_ERR_dev_not_found; - else { - dev_data = pci_get_drvdata(dev); - if (dev_data) - test_intx = dev_data->enable_intx; - switch (op->cmd) { - case XEN_PCI_OP_conf_read: - op->err = xen_pcibk_config_read(dev, - op->offset, op->size, &op->value); - break; - case XEN_PCI_OP_conf_write: - op->err = xen_pcibk_config_write(dev, - op->offset, op->size, op->value); - break; -#ifdef CONFIG_PCI_MSI - case XEN_PCI_OP_enable_msi: - op->err = xen_pcibk_enable_msi(pdev, dev, op); - break; - case XEN_PCI_OP_disable_msi: - op->err = xen_pcibk_disable_msi(pdev, dev, op); - break; - case XEN_PCI_OP_enable_msix: - op->err = xen_pcibk_enable_msix(pdev, dev, op); - break; - case XEN_PCI_OP_disable_msix: - op->err = xen_pcibk_disable_msix(pdev, dev, op); - break; -#endif - default: - op->err = XEN_PCI_ERR_not_implemented; - break; - } - } - if (!op->err && dev && dev_data) { - /* Transition detected */ - if ((dev_data->enable_intx != test_intx)) - xen_pcibk_control_isr(dev, 0 /* no reset */); - } - /* Tell the driver domain that we're done. */ - wmb(); - clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); - notify_remote_via_irq(pdev->evtchn_irq); - - /* Mark that we're done. */ - smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */ - clear_bit(_PDEVF_op_active, &pdev->flags); - smp_mb__after_clear_bit(); /* /before/ final check for work */ - - /* Check to see if the driver domain tried to start another request in - * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. - */ - xen_pcibk_test_and_schedule_op(pdev); -} - -irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) -{ - struct xen_pcibk_device *pdev = dev_id; - - xen_pcibk_test_and_schedule_op(pdev); - - return IRQ_HANDLED; -} -static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id) -{ - struct pci_dev *dev = (struct pci_dev *)dev_id; - struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); - - if (dev_data->isr_on && dev_data->ack_intr) { - dev_data->handled++; - if ((dev_data->handled % 1000) == 0) { - if (xen_test_irq_shared(irq)) { - printk(KERN_INFO "%s IRQ line is not shared " - "with other domains. Turning ISR off\n", - dev_data->irq_name); - dev_data->ack_intr = 0; - } - } - return IRQ_HANDLED; - } - return IRQ_NONE; -} diff --git a/trunk/drivers/xen/xen-pciback/vpci.c b/trunk/drivers/xen/xen-pciback/vpci.c deleted file mode 100644 index 4a42cfb0959d..000000000000 --- a/trunk/drivers/xen/xen-pciback/vpci.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * PCI Backend - Provides a Virtual PCI bus (with real devices) - * to the frontend - * - * Author: Ryan Wilson - */ - -#include -#include -#include -#include -#include "pciback.h" - -#define PCI_SLOT_MAX 32 -#define DRV_NAME "xen-pciback" - -struct vpci_dev_data { - /* Access to dev_list must be protected by lock */ - struct list_head dev_list[PCI_SLOT_MAX]; - spinlock_t lock; -}; - -static inline struct list_head *list_first(struct list_head *head) -{ - return head->next; -} - -static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, - unsigned int domain, - unsigned int bus, - unsigned int devfn) -{ - struct pci_dev_entry *entry; - struct pci_dev *dev = NULL; - struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; - unsigned long flags; - - if (domain != 0 || bus != 0) - return NULL; - - if (PCI_SLOT(devfn) < PCI_SLOT_MAX) { - spin_lock_irqsave(&vpci_dev->lock, flags); - - list_for_each_entry(entry, - &vpci_dev->dev_list[PCI_SLOT(devfn)], - list) { - if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) { - dev = entry->dev; - break; - } - } - - spin_unlock_irqrestore(&vpci_dev->lock, flags); - } - return dev; -} - -static inline int match_slot(struct pci_dev *l, struct pci_dev *r) -{ - if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus) - && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn)) - return 1; - - return 0; -} - -static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, - struct pci_dev *dev, int devid, - publish_pci_dev_cb publish_cb) -{ - int err = 0, slot, func = -1; - struct pci_dev_entry *t, *dev_entry; - struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; - unsigned long flags; - - if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { - err = -EFAULT; - xenbus_dev_fatal(pdev->xdev, err, - "Can't export bridges on the virtual PCI bus"); - goto out; - } - - dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL); - if (!dev_entry) { - err = -ENOMEM; - xenbus_dev_fatal(pdev->xdev, err, - "Error adding entry to virtual PCI bus"); - goto out; - } - - dev_entry->dev = dev; - - spin_lock_irqsave(&vpci_dev->lock, flags); - - /* Keep multi-function devices together on the virtual PCI bus */ - for (slot = 0; slot < PCI_SLOT_MAX; slot++) { - if (!list_empty(&vpci_dev->dev_list[slot])) { - t = list_entry(list_first(&vpci_dev->dev_list[slot]), - struct pci_dev_entry, list); - - if (match_slot(dev, t->dev)) { - pr_info(DRV_NAME ": vpci: %s: " - "assign to virtual slot %d func %d\n", - pci_name(dev), slot, - PCI_FUNC(dev->devfn)); - list_add_tail(&dev_entry->list, - &vpci_dev->dev_list[slot]); - func = PCI_FUNC(dev->devfn); - goto unlock; - } - } - } - - /* Assign to a new slot on the virtual PCI bus */ - for (slot = 0; slot < PCI_SLOT_MAX; slot++) { - if (list_empty(&vpci_dev->dev_list[slot])) { - printk(KERN_INFO DRV_NAME - ": vpci: %s: assign to virtual slot %d\n", - pci_name(dev), slot); - list_add_tail(&dev_entry->list, - &vpci_dev->dev_list[slot]); - func = PCI_FUNC(dev->devfn); - goto unlock; - } - } - - err = -ENOMEM; - xenbus_dev_fatal(pdev->xdev, err, - "No more space on root virtual PCI bus"); - -unlock: - spin_unlock_irqrestore(&vpci_dev->lock, flags); - - /* Publish this device. */ - if (!err) - err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid); - -out: - return err; -} - -static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, - struct pci_dev *dev) -{ - int slot; - struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; - struct pci_dev *found_dev = NULL; - unsigned long flags; - - spin_lock_irqsave(&vpci_dev->lock, flags); - - for (slot = 0; slot < PCI_SLOT_MAX; slot++) { - struct pci_dev_entry *e, *tmp; - list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], - list) { - if (e->dev == dev) { - list_del(&e->list); - found_dev = e->dev; - kfree(e); - goto out; - } - } - } - -out: - spin_unlock_irqrestore(&vpci_dev->lock, flags); - - if (found_dev) - pcistub_put_pci_dev(found_dev); -} - -static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev) -{ - int slot; - struct vpci_dev_data *vpci_dev; - - vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL); - if (!vpci_dev) - return -ENOMEM; - - spin_lock_init(&vpci_dev->lock); - - for (slot = 0; slot < PCI_SLOT_MAX; slot++) - INIT_LIST_HEAD(&vpci_dev->dev_list[slot]); - - pdev->pci_dev_data = vpci_dev; - - return 0; -} - -static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, - publish_pci_root_cb publish_cb) -{ - /* The Virtual PCI bus has only one root */ - return publish_cb(pdev, 0, 0); -} - -static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev) -{ - int slot; - struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; - - for (slot = 0; slot < PCI_SLOT_MAX; slot++) { - struct pci_dev_entry *e, *tmp; - list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], - list) { - list_del(&e->list); - pcistub_put_pci_dev(e->dev); - kfree(e); - } - } - - kfree(vpci_dev); - pdev->pci_dev_data = NULL; -} - -static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, - struct xen_pcibk_device *pdev, - unsigned int *domain, unsigned int *bus, - unsigned int *devfn) -{ - struct pci_dev_entry *entry; - struct pci_dev *dev = NULL; - struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; - unsigned long flags; - int found = 0, slot; - - spin_lock_irqsave(&vpci_dev->lock, flags); - for (slot = 0; slot < PCI_SLOT_MAX; slot++) { - list_for_each_entry(entry, - &vpci_dev->dev_list[slot], - list) { - dev = entry->dev; - if (dev && dev->bus->number == pcidev->bus->number - && pci_domain_nr(dev->bus) == - pci_domain_nr(pcidev->bus) - && dev->devfn == pcidev->devfn) { - found = 1; - *domain = 0; - *bus = 0; - *devfn = PCI_DEVFN(slot, - PCI_FUNC(pcidev->devfn)); - } - } - } - spin_unlock_irqrestore(&vpci_dev->lock, flags); - return found; -} - -struct xen_pcibk_backend xen_pcibk_vpci_backend = { - .name = "vpci", - .init = __xen_pcibk_init_devices, - .free = __xen_pcibk_release_devices, - .find = __xen_pcibk_get_pcifront_dev, - .publish = __xen_pcibk_publish_pci_roots, - .release = __xen_pcibk_release_pci_dev, - .add = __xen_pcibk_add_pci_dev, - .get = __xen_pcibk_get_pci_dev, -}; diff --git a/trunk/drivers/xen/xen-pciback/xenbus.c b/trunk/drivers/xen/xen-pciback/xenbus.c deleted file mode 100644 index 206c4ce030bc..000000000000 --- a/trunk/drivers/xen/xen-pciback/xenbus.c +++ /dev/null @@ -1,749 +0,0 @@ -/* - * PCI Backend Xenbus Setup - handles setup with frontend and xend - * - * Author: Ryan Wilson - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "pciback.h" - -#define DRV_NAME "xen-pciback" -#define INVALID_EVTCHN_IRQ (-1) -struct workqueue_struct *xen_pcibk_wq; - -static int __read_mostly passthrough; -module_param(passthrough, bool, S_IRUGO); -MODULE_PARM_DESC(passthrough, - "Option to specify how to export PCI topology to guest:\n"\ - " 0 - (default) Hide the true PCI topology and makes the frontend\n"\ - " there is a single PCI bus with only the exported devices on it.\n"\ - " For example, a device at 03:05.0 will be re-assigned to 00:00.0\n"\ - " while second device at 02:1a.1 will be re-assigned to 00:01.1.\n"\ - " 1 - Passthrough provides a real view of the PCI topology to the\n"\ - " frontend (for example, a device at 06:01.b will still appear at\n"\ - " 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\ - " exposed PCI devices to its driver domains. This may be required\n"\ - " for drivers which depend on finding their hardward in certain\n"\ - " bus/slot locations."); - -static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) -{ - struct xen_pcibk_device *pdev; - - pdev = kzalloc(sizeof(struct xen_pcibk_device), GFP_KERNEL); - if (pdev == NULL) - goto out; - dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); - - pdev->xdev = xdev; - dev_set_drvdata(&xdev->dev, pdev); - - spin_lock_init(&pdev->dev_lock); - - pdev->sh_info = NULL; - pdev->evtchn_irq = INVALID_EVTCHN_IRQ; - pdev->be_watching = 0; - - INIT_WORK(&pdev->op_work, xen_pcibk_do_op); - - if (xen_pcibk_init_devices(pdev)) { - kfree(pdev); - pdev = NULL; - } -out: - return pdev; -} - -static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev) -{ - spin_lock(&pdev->dev_lock); - - /* Ensure the guest can't trigger our handler before removing devices */ - if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) { - unbind_from_irqhandler(pdev->evtchn_irq, pdev); - pdev->evtchn_irq = INVALID_EVTCHN_IRQ; - } - spin_unlock(&pdev->dev_lock); - - /* If the driver domain started an op, make sure we complete it - * before releasing the shared memory */ - - /* Note, the workqueue does not use spinlocks at all.*/ - flush_workqueue(xen_pcibk_wq); - - spin_lock(&pdev->dev_lock); - if (pdev->sh_info != NULL) { - xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info); - pdev->sh_info = NULL; - } - spin_unlock(&pdev->dev_lock); - -} - -static void free_pdev(struct xen_pcibk_device *pdev) -{ - if (pdev->be_watching) { - unregister_xenbus_watch(&pdev->be_watch); - pdev->be_watching = 0; - } - - xen_pcibk_disconnect(pdev); - - xen_pcibk_release_devices(pdev); - - dev_set_drvdata(&pdev->xdev->dev, NULL); - pdev->xdev = NULL; - - kfree(pdev); -} - -static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, - int remote_evtchn) -{ - int err = 0; - void *vaddr; - - dev_dbg(&pdev->xdev->dev, - "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", - gnt_ref, remote_evtchn); - - err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr); - if (err < 0) { - xenbus_dev_fatal(pdev->xdev, err, - "Error mapping other domain page in ours."); - goto out; - } - - spin_lock(&pdev->dev_lock); - pdev->sh_info = vaddr; - spin_unlock(&pdev->dev_lock); - - err = bind_interdomain_evtchn_to_irqhandler( - pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, - 0, DRV_NAME, pdev); - if (err < 0) { - xenbus_dev_fatal(pdev->xdev, err, - "Error binding event channel to IRQ"); - goto out; - } - - spin_lock(&pdev->dev_lock); - pdev->evtchn_irq = err; - spin_unlock(&pdev->dev_lock); - err = 0; - - dev_dbg(&pdev->xdev->dev, "Attached!\n"); -out: - return err; -} - -static int xen_pcibk_attach(struct xen_pcibk_device *pdev) -{ - int err = 0; - int gnt_ref, remote_evtchn; - char *magic = NULL; - - - /* Make sure we only do this setup once */ - if (xenbus_read_driver_state(pdev->xdev->nodename) != - XenbusStateInitialised) - goto out; - - /* Wait for frontend to state that it has published the configuration */ - if (xenbus_read_driver_state(pdev->xdev->otherend) != - XenbusStateInitialised) - goto out; - - dev_dbg(&pdev->xdev->dev, "Reading frontend config\n"); - - err = xenbus_gather(XBT_NIL, pdev->xdev->otherend, - "pci-op-ref", "%u", &gnt_ref, - "event-channel", "%u", &remote_evtchn, - "magic", NULL, &magic, NULL); - if (err) { - /* If configuration didn't get read correctly, wait longer */ - xenbus_dev_fatal(pdev->xdev, err, - "Error reading configuration from frontend"); - goto out; - } - - if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) { - xenbus_dev_fatal(pdev->xdev, -EFAULT, - "version mismatch (%s/%s) with pcifront - " - "halting xen_pcibk", - magic, XEN_PCI_MAGIC); - goto out; - } - - err = xen_pcibk_do_attach(pdev, gnt_ref, remote_evtchn); - if (err) - goto out; - - dev_dbg(&pdev->xdev->dev, "Connecting...\n"); - - err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); - if (err) - xenbus_dev_fatal(pdev->xdev, err, - "Error switching to connected state!"); - - dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err); -out: - - kfree(magic); - - return err; -} - -static int xen_pcibk_publish_pci_dev(struct xen_pcibk_device *pdev, - unsigned int domain, unsigned int bus, - unsigned int devfn, unsigned int devid) -{ - int err; - int len; - char str[64]; - - len = snprintf(str, sizeof(str), "vdev-%d", devid); - if (unlikely(len >= (sizeof(str) - 1))) { - err = -ENOMEM; - goto out; - } - - err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, - "%04x:%02x:%02x.%02x", domain, bus, - PCI_SLOT(devfn), PCI_FUNC(devfn)); - -out: - return err; -} - -static int xen_pcibk_export_device(struct xen_pcibk_device *pdev, - int domain, int bus, int slot, int func, - int devid) -{ - struct pci_dev *dev; - int err = 0; - - dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n", - domain, bus, slot, func); - - dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func); - if (!dev) { - err = -EINVAL; - xenbus_dev_fatal(pdev->xdev, err, - "Couldn't locate PCI device " - "(%04x:%02x:%02x.%01x)! " - "perhaps already in-use?", - domain, bus, slot, func); - goto out; - } - - err = xen_pcibk_add_pci_dev(pdev, dev, devid, - xen_pcibk_publish_pci_dev); - if (err) - goto out; - - dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id); - if (xen_register_device_domain_owner(dev, - pdev->xdev->otherend_id) != 0) { - dev_err(&dev->dev, "device has been assigned to another " \ - "domain! Over-writting the ownership, but beware.\n"); - xen_unregister_device_domain_owner(dev); - xen_register_device_domain_owner(dev, pdev->xdev->otherend_id); - } - - /* TODO: It'd be nice to export a bridge and have all of its children - * get exported with it. This may be best done in xend (which will - * have to calculate resource usage anyway) but we probably want to - * put something in here to ensure that if a bridge gets given to a - * driver domain, that all devices under that bridge are not given - * to other driver domains (as he who controls the bridge can disable - * it and stop the other devices from working). - */ -out: - return err; -} - -static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev, - int domain, int bus, int slot, int func) -{ - int err = 0; - struct pci_dev *dev; - - dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n", - domain, bus, slot, func); - - dev = xen_pcibk_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func)); - if (!dev) { - err = -EINVAL; - dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device " - "(%04x:%02x:%02x.%01x)! not owned by this domain\n", - domain, bus, slot, func); - goto out; - } - - dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id); - xen_unregister_device_domain_owner(dev); - - xen_pcibk_release_pci_dev(pdev, dev); - -out: - return err; -} - -static int xen_pcibk_publish_pci_root(struct xen_pcibk_device *pdev, - unsigned int domain, unsigned int bus) -{ - unsigned int d, b; - int i, root_num, len, err; - char str[64]; - - dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n"); - - err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, - "root_num", "%d", &root_num); - if (err == 0 || err == -ENOENT) - root_num = 0; - else if (err < 0) - goto out; - - /* Verify that we haven't already published this pci root */ - for (i = 0; i < root_num; i++) { - len = snprintf(str, sizeof(str), "root-%d", i); - if (unlikely(len >= (sizeof(str) - 1))) { - err = -ENOMEM; - goto out; - } - - err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, - str, "%x:%x", &d, &b); - if (err < 0) - goto out; - if (err != 2) { - err = -EINVAL; - goto out; - } - - if (d == domain && b == bus) { - err = 0; - goto out; - } - } - - len = snprintf(str, sizeof(str), "root-%d", root_num); - if (unlikely(len >= (sizeof(str) - 1))) { - err = -ENOMEM; - goto out; - } - - dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n", - root_num, domain, bus); - - err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, - "%04x:%02x", domain, bus); - if (err) - goto out; - - err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, - "root_num", "%d", (root_num + 1)); - -out: - return err; -} - -static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) -{ - int err = 0; - int num_devs; - int domain, bus, slot, func; - int substate; - int i, len; - char state_str[64]; - char dev_str[64]; - - - dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); - - /* Make sure we only reconfigure once */ - if (xenbus_read_driver_state(pdev->xdev->nodename) != - XenbusStateReconfiguring) - goto out; - - err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", - &num_devs); - if (err != 1) { - if (err >= 0) - err = -EINVAL; - xenbus_dev_fatal(pdev->xdev, err, - "Error reading number of devices"); - goto out; - } - - for (i = 0; i < num_devs; i++) { - len = snprintf(state_str, sizeof(state_str), "state-%d", i); - if (unlikely(len >= (sizeof(state_str) - 1))) { - err = -ENOMEM; - xenbus_dev_fatal(pdev->xdev, err, - "String overflow while reading " - "configuration"); - goto out; - } - err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str, - "%d", &substate); - if (err != 1) - substate = XenbusStateUnknown; - - switch (substate) { - case XenbusStateInitialising: - dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i); - - len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); - if (unlikely(len >= (sizeof(dev_str) - 1))) { - err = -ENOMEM; - xenbus_dev_fatal(pdev->xdev, err, - "String overflow while " - "reading configuration"); - goto out; - } - err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, - dev_str, "%x:%x:%x.%x", - &domain, &bus, &slot, &func); - if (err < 0) { - xenbus_dev_fatal(pdev->xdev, err, - "Error reading device " - "configuration"); - goto out; - } - if (err != 4) { - err = -EINVAL; - xenbus_dev_fatal(pdev->xdev, err, - "Error parsing pci device " - "configuration"); - goto out; - } - - err = xen_pcibk_export_device(pdev, domain, bus, slot, - func, i); - if (err) - goto out; - - /* Publish pci roots. */ - err = xen_pcibk_publish_pci_roots(pdev, - xen_pcibk_publish_pci_root); - if (err) { - xenbus_dev_fatal(pdev->xdev, err, - "Error while publish PCI root" - "buses for frontend"); - goto out; - } - - err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, - state_str, "%d", - XenbusStateInitialised); - if (err) { - xenbus_dev_fatal(pdev->xdev, err, - "Error switching substate of " - "dev-%d\n", i); - goto out; - } - break; - - case XenbusStateClosing: - dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i); - - len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i); - if (unlikely(len >= (sizeof(dev_str) - 1))) { - err = -ENOMEM; - xenbus_dev_fatal(pdev->xdev, err, - "String overflow while " - "reading configuration"); - goto out; - } - err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, - dev_str, "%x:%x:%x.%x", - &domain, &bus, &slot, &func); - if (err < 0) { - xenbus_dev_fatal(pdev->xdev, err, - "Error reading device " - "configuration"); - goto out; - } - if (err != 4) { - err = -EINVAL; - xenbus_dev_fatal(pdev->xdev, err, - "Error parsing pci device " - "configuration"); - goto out; - } - - err = xen_pcibk_remove_device(pdev, domain, bus, slot, - func); - if (err) - goto out; - - /* TODO: If at some point we implement support for pci - * root hot-remove on pcifront side, we'll need to - * remove unnecessary xenstore nodes of pci roots here. - */ - - break; - - default: - break; - } - } - - err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured); - if (err) { - xenbus_dev_fatal(pdev->xdev, err, - "Error switching to reconfigured state!"); - goto out; - } - -out: - return 0; -} - -static void xen_pcibk_frontend_changed(struct xenbus_device *xdev, - enum xenbus_state fe_state) -{ - struct xen_pcibk_device *pdev = dev_get_drvdata(&xdev->dev); - - dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state); - - switch (fe_state) { - case XenbusStateInitialised: - xen_pcibk_attach(pdev); - break; - - case XenbusStateReconfiguring: - xen_pcibk_reconfigure(pdev); - break; - - case XenbusStateConnected: - /* pcifront switched its state from reconfiguring to connected. - * Then switch to connected state. - */ - xenbus_switch_state(xdev, XenbusStateConnected); - break; - - case XenbusStateClosing: - xen_pcibk_disconnect(pdev); - xenbus_switch_state(xdev, XenbusStateClosing); - break; - - case XenbusStateClosed: - xen_pcibk_disconnect(pdev); - xenbus_switch_state(xdev, XenbusStateClosed); - if (xenbus_dev_is_online(xdev)) - break; - /* fall through if not online */ - case XenbusStateUnknown: - dev_dbg(&xdev->dev, "frontend is gone! unregister device\n"); - device_unregister(&xdev->dev); - break; - - default: - break; - } -} - -static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev) -{ - /* Get configuration from xend (if available now) */ - int domain, bus, slot, func; - int err = 0; - int i, num_devs; - char dev_str[64]; - char state_str[64]; - - /* It's possible we could get the call to setup twice, so make sure - * we're not already connected. - */ - if (xenbus_read_driver_state(pdev->xdev->nodename) != - XenbusStateInitWait) - goto out; - - dev_dbg(&pdev->xdev->dev, "getting be setup\n"); - - err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", - &num_devs); - if (err != 1) { - if (err >= 0) - err = -EINVAL; - xenbus_dev_fatal(pdev->xdev, err, - "Error reading number of devices"); - goto out; - } - - for (i = 0; i < num_devs; i++) { - int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); - if (unlikely(l >= (sizeof(dev_str) - 1))) { - err = -ENOMEM; - xenbus_dev_fatal(pdev->xdev, err, - "String overflow while reading " - "configuration"); - goto out; - } - - err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str, - "%x:%x:%x.%x", &domain, &bus, &slot, &func); - if (err < 0) { - xenbus_dev_fatal(pdev->xdev, err, - "Error reading device configuration"); - goto out; - } - if (err != 4) { - err = -EINVAL; - xenbus_dev_fatal(pdev->xdev, err, - "Error parsing pci device " - "configuration"); - goto out; - } - - err = xen_pcibk_export_device(pdev, domain, bus, slot, func, i); - if (err) - goto out; - - /* Switch substate of this device. */ - l = snprintf(state_str, sizeof(state_str), "state-%d", i); - if (unlikely(l >= (sizeof(state_str) - 1))) { - err = -ENOMEM; - xenbus_dev_fatal(pdev->xdev, err, - "String overflow while reading " - "configuration"); - goto out; - } - err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str, - "%d", XenbusStateInitialised); - if (err) { - xenbus_dev_fatal(pdev->xdev, err, "Error switching " - "substate of dev-%d\n", i); - goto out; - } - } - - err = xen_pcibk_publish_pci_roots(pdev, xen_pcibk_publish_pci_root); - if (err) { - xenbus_dev_fatal(pdev->xdev, err, - "Error while publish PCI root buses " - "for frontend"); - goto out; - } - - err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised); - if (err) - xenbus_dev_fatal(pdev->xdev, err, - "Error switching to initialised state!"); - -out: - if (!err) - /* see if pcifront is already configured (if not, we'll wait) */ - xen_pcibk_attach(pdev); - - return err; -} - -static void xen_pcibk_be_watch(struct xenbus_watch *watch, - const char **vec, unsigned int len) -{ - struct xen_pcibk_device *pdev = - container_of(watch, struct xen_pcibk_device, be_watch); - - switch (xenbus_read_driver_state(pdev->xdev->nodename)) { - case XenbusStateInitWait: - xen_pcibk_setup_backend(pdev); - break; - - default: - break; - } -} - -static int xen_pcibk_xenbus_probe(struct xenbus_device *dev, - const struct xenbus_device_id *id) -{ - int err = 0; - struct xen_pcibk_device *pdev = alloc_pdev(dev); - - if (pdev == NULL) { - err = -ENOMEM; - xenbus_dev_fatal(dev, err, - "Error allocating xen_pcibk_device struct"); - goto out; - } - - /* wait for xend to configure us */ - err = xenbus_switch_state(dev, XenbusStateInitWait); - if (err) - goto out; - - /* watch the backend node for backend configuration information */ - err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch, - xen_pcibk_be_watch); - if (err) - goto out; - - pdev->be_watching = 1; - - /* We need to force a call to our callback here in case - * xend already configured us! - */ - xen_pcibk_be_watch(&pdev->be_watch, NULL, 0); - -out: - return err; -} - -static int xen_pcibk_xenbus_remove(struct xenbus_device *dev) -{ - struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev); - - if (pdev != NULL) - free_pdev(pdev); - - return 0; -} - -static const struct xenbus_device_id xenpci_ids[] = { - {"pci"}, - {""}, -}; - -static struct xenbus_driver xenbus_xen_pcibk_driver = { - .name = DRV_NAME, - .owner = THIS_MODULE, - .ids = xenpci_ids, - .probe = xen_pcibk_xenbus_probe, - .remove = xen_pcibk_xenbus_remove, - .otherend_changed = xen_pcibk_frontend_changed, -}; - -struct xen_pcibk_backend *xen_pcibk_backend; - -int __init xen_pcibk_xenbus_register(void) -{ - xen_pcibk_wq = create_workqueue("xen_pciback_workqueue"); - if (!xen_pcibk_wq) { - printk(KERN_ERR "%s: create" - "xen_pciback_workqueue failed\n", __func__); - return -EFAULT; - } - xen_pcibk_backend = &xen_pcibk_vpci_backend; - if (passthrough) - xen_pcibk_backend = &xen_pcibk_passthrough_backend; - pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name); - return xenbus_register_backend(&xenbus_xen_pcibk_driver); -} - -void __exit xen_pcibk_xenbus_unregister(void) -{ - destroy_workqueue(xen_pcibk_wq); - xenbus_unregister_driver(&xenbus_xen_pcibk_driver); -} diff --git a/trunk/drivers/xen/xen-selfballoon.c b/trunk/drivers/xen/xen-selfballoon.c deleted file mode 100644 index 010937b5a7c9..000000000000 --- a/trunk/drivers/xen/xen-selfballoon.c +++ /dev/null @@ -1,485 +0,0 @@ -/****************************************************************************** - * Xen selfballoon driver (and optional frontswap self-shrinking driver) - * - * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. - * - * This code complements the cleancache and frontswap patchsets to optimize - * support for Xen Transcendent Memory ("tmem"). The policy it implements - * is rudimentary and will likely improve over time, but it does work well - * enough today. - * - * Two functionalities are implemented here which both use "control theory" - * (feedback) to optimize memory utilization. In a virtualized environment - * such as Xen, RAM is often a scarce resource and we would like to ensure - * that each of a possibly large number of virtual machines is using RAM - * efficiently, i.e. using as little as possible when under light load - * and obtaining as much as possible when memory demands are high. - * Since RAM needs vary highly dynamically and sometimes dramatically, - * "hysteresis" is used, that is, memory target is determined not just - * on current data but also on past data stored in the system. - * - * "Selfballooning" creates memory pressure by managing the Xen balloon - * driver to decrease and increase available kernel memory, driven - * largely by the target value of "Committed_AS" (see /proc/meminfo). - * Since Committed_AS does not account for clean mapped pages (i.e. pages - * in RAM that are identical to pages on disk), selfballooning has the - * affect of pushing less frequently used clean pagecache pages out of - * kernel RAM and, presumably using cleancache, into Xen tmem where - * Xen can more efficiently optimize RAM utilization for such pages. - * - * When kernel memory demand unexpectedly increases faster than Xen, via - * the selfballoon driver, is able to (or chooses to) provide usable RAM, - * the kernel may invoke swapping. In most cases, frontswap is able - * to absorb this swapping into Xen tmem. However, due to the fact - * that the kernel swap subsystem assumes swapping occurs to a disk, - * swapped pages may sit on the disk for a very long time; even if - * the kernel knows the page will never be used again. This is because - * the disk space costs very little and can be overwritten when - * necessary. When such stale pages are in frontswap, however, they - * are taking up valuable real estate. "Frontswap selfshrinking" works - * to resolve this: When frontswap activity is otherwise stable - * and the guest kernel is not under memory pressure, the "frontswap - * selfshrinking" accounts for this by providing pressure to remove some - * pages from frontswap and return them to kernel memory. - * - * For both "selfballooning" and "frontswap-selfshrinking", a worker - * thread is used and sysfs tunables are provided to adjust the frequency - * and rate of adjustments to achieve the goal, as well as to disable one - * or both functions independently. - * - * While some argue that this functionality can and should be implemented - * in userspace, it has been observed that bad things happen (e.g. OOMs). - * - * System configuration note: Selfballooning should not be enabled on - * systems without a sufficiently large swap device configured; for best - * results, it is recommended that total swap be increased by the size - * of the guest memory. Also, while technically not required to be - * configured, it is highly recommended that frontswap also be configured - * and enabled when selfballooning is running. So, selfballooning - * is disabled by default if frontswap is not configured and can only - * be enabled with the "selfballooning" kernel boot option; similarly - * selfballooning is enabled by default if frontswap is configured and - * can be disabled with the "noselfballooning" kernel boot option. Finally, - * when frontswap is configured, frontswap-selfshrinking can be disabled - * with the "noselfshrink" kernel boot option. - * - * Selfballooning is disallowed in domain0 and force-disabled. - * - */ - -#include -#include -#include - -#include - -#include - -/* Enable/disable with sysfs. */ -static int xen_selfballooning_enabled __read_mostly; - -/* - * Controls rate at which memory target (this iteration) approaches - * ultimate goal when memory need is increasing (up-hysteresis) or - * decreasing (down-hysteresis). Higher values of hysteresis cause - * slower increases/decreases. The default values for the various - * parameters were deemed reasonable by experimentation, may be - * workload-dependent, and can all be adjusted via sysfs. - */ -static unsigned int selfballoon_downhysteresis __read_mostly = 8; -static unsigned int selfballoon_uphysteresis __read_mostly = 1; - -/* In HZ, controls frequency of worker invocation. */ -static unsigned int selfballoon_interval __read_mostly = 5; - -static void selfballoon_process(struct work_struct *work); -static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); - -#ifdef CONFIG_FRONTSWAP -#include - -/* Enable/disable with sysfs. */ -static bool frontswap_selfshrinking __read_mostly; - -/* Enable/disable with kernel boot option. */ -static bool use_frontswap_selfshrink __initdata = true; - -/* - * The default values for the following parameters were deemed reasonable - * by experimentation, may be workload-dependent, and can all be - * adjusted via sysfs. - */ - -/* Control rate for frontswap shrinking. Higher hysteresis is slower. */ -static unsigned int frontswap_hysteresis __read_mostly = 20; - -/* - * Number of selfballoon worker invocations to wait before observing that - * frontswap selfshrinking should commence. Note that selfshrinking does - * not use a separate worker thread. - */ -static unsigned int frontswap_inertia __read_mostly = 3; - -/* Countdown to next invocation of frontswap_shrink() */ -static unsigned long frontswap_inertia_counter; - -/* - * Invoked by the selfballoon worker thread, uses current number of pages - * in frontswap (frontswap_curr_pages()), previous status, and control - * values (hysteresis and inertia) to determine if frontswap should be - * shrunk and what the new frontswap size should be. Note that - * frontswap_shrink is essentially a partial swapoff that immediately - * transfers pages from the "swap device" (frontswap) back into kernel - * RAM; despite the name, frontswap "shrinking" is very different from - * the "shrinker" interface used by the kernel MM subsystem to reclaim - * memory. - */ -static void frontswap_selfshrink(void) -{ - static unsigned long cur_frontswap_pages; - static unsigned long last_frontswap_pages; - static unsigned long tgt_frontswap_pages; - - last_frontswap_pages = cur_frontswap_pages; - cur_frontswap_pages = frontswap_curr_pages(); - if (!cur_frontswap_pages || - (cur_frontswap_pages > last_frontswap_pages)) { - frontswap_inertia_counter = frontswap_inertia; - return; - } - if (frontswap_inertia_counter && --frontswap_inertia_counter) - return; - if (cur_frontswap_pages <= frontswap_hysteresis) - tgt_frontswap_pages = 0; - else - tgt_frontswap_pages = cur_frontswap_pages - - (cur_frontswap_pages / frontswap_hysteresis); - frontswap_shrink(tgt_frontswap_pages); -} - -static int __init xen_nofrontswap_selfshrink_setup(char *s) -{ - use_frontswap_selfshrink = false; - return 1; -} - -__setup("noselfshrink", xen_nofrontswap_selfshrink_setup); - -/* Disable with kernel boot option. */ -static bool use_selfballooning __initdata = true; - -static int __init xen_noselfballooning_setup(char *s) -{ - use_selfballooning = false; - return 1; -} - -__setup("noselfballooning", xen_noselfballooning_setup); -#else /* !CONFIG_FRONTSWAP */ -/* Enable with kernel boot option. */ -static bool use_selfballooning __initdata = false; - -static int __init xen_selfballooning_setup(char *s) -{ - use_selfballooning = true; - return 1; -} - -__setup("selfballooning", xen_selfballooning_setup); -#endif /* CONFIG_FRONTSWAP */ - -/* - * Use current balloon size, the goal (vm_committed_as), and hysteresis - * parameters to set a new target balloon size - */ -static void selfballoon_process(struct work_struct *work) -{ - unsigned long cur_pages, goal_pages, tgt_pages; - bool reset_timer = false; - - if (xen_selfballooning_enabled) { - cur_pages = balloon_stats.current_pages; - tgt_pages = cur_pages; /* default is no change */ - goal_pages = percpu_counter_read_positive(&vm_committed_as) + - balloon_stats.current_pages - totalram_pages; -#ifdef CONFIG_FRONTSWAP - /* allow space for frontswap pages to be repatriated */ - if (frontswap_selfshrinking && frontswap_enabled) - goal_pages += frontswap_curr_pages(); -#endif - if (cur_pages > goal_pages) - tgt_pages = cur_pages - - ((cur_pages - goal_pages) / - selfballoon_downhysteresis); - else if (cur_pages < goal_pages) - tgt_pages = cur_pages + - ((goal_pages - cur_pages) / - selfballoon_uphysteresis); - /* else if cur_pages == goal_pages, no change */ - balloon_set_new_target(tgt_pages); - reset_timer = true; - } -#ifdef CONFIG_FRONTSWAP - if (frontswap_selfshrinking && frontswap_enabled) { - frontswap_selfshrink(); - reset_timer = true; - } -#endif - if (reset_timer) - schedule_delayed_work(&selfballoon_worker, - selfballoon_interval * HZ); -} - -#ifdef CONFIG_SYSFS - -#include -#include - -#define SELFBALLOON_SHOW(name, format, args...) \ - static ssize_t show_##name(struct sys_device *dev, \ - struct sysdev_attribute *attr, \ - char *buf) \ - { \ - return sprintf(buf, format, ##args); \ - } - -SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled); - -static ssize_t store_selfballooning(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, - size_t count) -{ - bool was_enabled = xen_selfballooning_enabled; - unsigned long tmp; - int err; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - err = strict_strtoul(buf, 10, &tmp); - if (err || ((tmp != 0) && (tmp != 1))) - return -EINVAL; - - xen_selfballooning_enabled = !!tmp; - if (!was_enabled && xen_selfballooning_enabled) - schedule_delayed_work(&selfballoon_worker, - selfballoon_interval * HZ); - - return count; -} - -static SYSDEV_ATTR(selfballooning, S_IRUGO | S_IWUSR, - show_selfballooning, store_selfballooning); - -SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval); - -static ssize_t store_selfballoon_interval(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, - size_t count) -{ - unsigned long val; - int err; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) - return -EINVAL; - selfballoon_interval = val; - return count; -} - -static SYSDEV_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR, - show_selfballoon_interval, store_selfballoon_interval); - -SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis); - -static ssize_t store_selfballoon_downhys(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, - size_t count) -{ - unsigned long val; - int err; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) - return -EINVAL; - selfballoon_downhysteresis = val; - return count; -} - -static SYSDEV_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR, - show_selfballoon_downhys, store_selfballoon_downhys); - - -SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis); - -static ssize_t store_selfballoon_uphys(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, - size_t count) -{ - unsigned long val; - int err; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) - return -EINVAL; - selfballoon_uphysteresis = val; - return count; -} - -static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, - show_selfballoon_uphys, store_selfballoon_uphys); - -#ifdef CONFIG_FRONTSWAP -SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); - -static ssize_t store_frontswap_selfshrinking(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, - size_t count) -{ - bool was_enabled = frontswap_selfshrinking; - unsigned long tmp; - int err; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = strict_strtoul(buf, 10, &tmp); - if (err || ((tmp != 0) && (tmp != 1))) - return -EINVAL; - frontswap_selfshrinking = !!tmp; - if (!was_enabled && !xen_selfballooning_enabled && - frontswap_selfshrinking) - schedule_delayed_work(&selfballoon_worker, - selfballoon_interval * HZ); - - return count; -} - -static SYSDEV_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR, - show_frontswap_selfshrinking, store_frontswap_selfshrinking); - -SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia); - -static ssize_t store_frontswap_inertia(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, - size_t count) -{ - unsigned long val; - int err; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) - return -EINVAL; - frontswap_inertia = val; - frontswap_inertia_counter = val; - return count; -} - -static SYSDEV_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR, - show_frontswap_inertia, store_frontswap_inertia); - -SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis); - -static ssize_t store_frontswap_hysteresis(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, - size_t count) -{ - unsigned long val; - int err; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) - return -EINVAL; - frontswap_hysteresis = val; - return count; -} - -static SYSDEV_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR, - show_frontswap_hysteresis, store_frontswap_hysteresis); - -#endif /* CONFIG_FRONTSWAP */ - -static struct attribute *selfballoon_attrs[] = { - &attr_selfballooning.attr, - &attr_selfballoon_interval.attr, - &attr_selfballoon_downhysteresis.attr, - &attr_selfballoon_uphysteresis.attr, -#ifdef CONFIG_FRONTSWAP - &attr_frontswap_selfshrinking.attr, - &attr_frontswap_hysteresis.attr, - &attr_frontswap_inertia.attr, -#endif - NULL -}; - -static struct attribute_group selfballoon_group = { - .name = "selfballoon", - .attrs = selfballoon_attrs -}; -#endif - -int register_xen_selfballooning(struct sys_device *sysdev) -{ - int error = -1; - -#ifdef CONFIG_SYSFS - error = sysfs_create_group(&sysdev->kobj, &selfballoon_group); -#endif - return error; -} -EXPORT_SYMBOL(register_xen_selfballooning); - -static int __init xen_selfballoon_init(void) -{ - bool enable = false; - - if (!xen_domain()) - return -ENODEV; - - if (xen_initial_domain()) { - pr_info("xen/balloon: Xen selfballooning driver " - "disabled for domain0.\n"); - return -ENODEV; - } - - xen_selfballooning_enabled = tmem_enabled && use_selfballooning; - if (xen_selfballooning_enabled) { - pr_info("xen/balloon: Initializing Xen " - "selfballooning driver.\n"); - enable = true; - } -#ifdef CONFIG_FRONTSWAP - frontswap_selfshrinking = tmem_enabled && use_frontswap_selfshrink; - if (frontswap_selfshrinking) { - pr_info("xen/balloon: Initializing frontswap " - "selfshrinking driver.\n"); - enable = true; - } -#endif - if (!enable) - return -ENODEV; - - schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ); - - return 0; -} - -subsys_initcall(xen_selfballoon_init); - -MODULE_LICENSE("GPL"); diff --git a/trunk/drivers/xen/xenbus/xenbus_probe.c b/trunk/drivers/xen/xenbus/xenbus_probe.c index bd2f90c9ac8b..739769551e33 100644 --- a/trunk/drivers/xen/xenbus/xenbus_probe.c +++ b/trunk/drivers/xen/xenbus/xenbus_probe.c @@ -378,32 +378,26 @@ static void xenbus_dev_release(struct device *dev) kfree(to_xenbus_device(dev)); } -static ssize_t nodename_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t xendev_show_nodename(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); } +static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); -static ssize_t devtype_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t xendev_show_devtype(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); } +static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); -static ssize_t modalias_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t xendev_show_modalias(struct device *dev, + struct device_attribute *attr, char *buf) { - return sprintf(buf, "%s:%s\n", dev->bus->name, - to_xenbus_device(dev)->devicetype); + return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); } - -struct device_attribute xenbus_dev_attrs[] = { - __ATTR_RO(nodename), - __ATTR_RO(devtype), - __ATTR_RO(modalias), - __ATTR_NULL -}; -EXPORT_SYMBOL_GPL(xenbus_dev_attrs); +static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); int xenbus_probe_node(struct xen_bus_type *bus, const char *type, @@ -455,7 +449,25 @@ int xenbus_probe_node(struct xen_bus_type *bus, if (err) goto fail; + err = device_create_file(&xendev->dev, &dev_attr_nodename); + if (err) + goto fail_unregister; + + err = device_create_file(&xendev->dev, &dev_attr_devtype); + if (err) + goto fail_remove_nodename; + + err = device_create_file(&xendev->dev, &dev_attr_modalias); + if (err) + goto fail_remove_devtype; + return 0; +fail_remove_devtype: + device_remove_file(&xendev->dev, &dev_attr_devtype); +fail_remove_nodename: + device_remove_file(&xendev->dev, &dev_attr_nodename); +fail_unregister: + device_unregister(&xendev->dev); fail: kfree(xendev); return err; diff --git a/trunk/drivers/xen/xenbus/xenbus_probe.h b/trunk/drivers/xen/xenbus/xenbus_probe.h index b814935378c7..888b9900ca08 100644 --- a/trunk/drivers/xen/xenbus/xenbus_probe.h +++ b/trunk/drivers/xen/xenbus/xenbus_probe.h @@ -48,8 +48,6 @@ struct xen_bus_type struct bus_type bus; }; -extern struct device_attribute xenbus_dev_attrs[]; - extern int xenbus_match(struct device *_dev, struct device_driver *_drv); extern int xenbus_dev_probe(struct device *_dev); extern int xenbus_dev_remove(struct device *_dev); diff --git a/trunk/drivers/xen/xenbus/xenbus_probe_backend.c b/trunk/drivers/xen/xenbus/xenbus_probe_backend.c index 60adf919d78d..6cf467bf63ec 100644 --- a/trunk/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/trunk/drivers/xen/xenbus/xenbus_probe_backend.c @@ -107,9 +107,6 @@ static int xenbus_uevent_backend(struct device *dev, if (xdev == NULL) return -ENODEV; - if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype)) - return -ENOMEM; - /* stuff we want to pass to /sbin/hotplug */ if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) return -ENOMEM; @@ -186,6 +183,10 @@ static void frontend_changed(struct xenbus_watch *watch, xenbus_otherend_changed(watch, vec, len, 0); } +static struct device_attribute xenbus_backend_dev_attrs[] = { + __ATTR_NULL +}; + static struct xen_bus_type xenbus_backend = { .root = "backend", .levels = 3, /* backend/type// */ @@ -199,7 +200,7 @@ static struct xen_bus_type xenbus_backend = { .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, - .dev_attrs = xenbus_dev_attrs, + .dev_attrs = xenbus_backend_dev_attrs, }, }; diff --git a/trunk/drivers/xen/xenbus/xenbus_probe_frontend.c b/trunk/drivers/xen/xenbus/xenbus_probe_frontend.c index ed2ba474a560..b6a2690c9d49 100644 --- a/trunk/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/trunk/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -81,6 +81,10 @@ static void backend_changed(struct xenbus_watch *watch, xenbus_otherend_changed(watch, vec, len, 1); } +static struct device_attribute xenbus_frontend_dev_attrs[] = { + __ATTR_NULL +}; + static const struct dev_pm_ops xenbus_pm_ops = { .suspend = xenbus_dev_suspend, .resume = xenbus_dev_resume, @@ -102,7 +106,7 @@ static struct xen_bus_type xenbus_frontend = { .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, - .dev_attrs = xenbus_dev_attrs, + .dev_attrs = xenbus_frontend_dev_attrs, .pm = &xenbus_pm_ops, }, diff --git a/trunk/fs/dlm/ast.c b/trunk/fs/dlm/ast.c index 90e5997262ea..abc49f292454 100644 --- a/trunk/fs/dlm/ast.c +++ b/trunk/fs/dlm/ast.c @@ -14,9 +14,17 @@ #include "dlm_internal.h" #include "lock.h" #include "user.h" +#include "ast.h" + +#define WAKE_ASTS 0 + +static uint64_t ast_seq_count; +static struct list_head ast_queue; +static spinlock_t ast_queue_lock; +static struct task_struct * astd_task; +static unsigned long astd_wakeflags; +static struct mutex astd_running; -static uint64_t dlm_cb_seq; -static spinlock_t dlm_cb_seq_spin; static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb) { @@ -49,13 +57,21 @@ static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb) } } +void dlm_del_ast(struct dlm_lkb *lkb) +{ + spin_lock(&ast_queue_lock); + if (!list_empty(&lkb->lkb_astqueue)) + list_del_init(&lkb->lkb_astqueue); + spin_unlock(&ast_queue_lock); +} + int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, uint32_t sbflags, uint64_t seq) { struct dlm_ls *ls = lkb->lkb_resource->res_ls; uint64_t prev_seq; int prev_mode; - int i, rv; + int i; for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { if (lkb->lkb_callbacks[i].seq) @@ -84,8 +100,7 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, mode, (unsigned long long)prev_seq, prev_mode); - rv = 0; - goto out; + return 0; } } @@ -94,7 +109,6 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, lkb->lkb_callbacks[i].mode = mode; lkb->lkb_callbacks[i].sb_status = status; lkb->lkb_callbacks[i].sb_flags = (sbflags & 0x000000FF); - rv = 0; break; } @@ -103,24 +117,21 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, lkb->lkb_id, (unsigned long long)seq, flags, mode, status, sbflags); dlm_dump_lkb_callbacks(lkb); - rv = -1; - goto out; + return -1; } - out: - return rv; + + return 0; } int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_callback *cb, int *resid) { - int i, rv; + int i; *resid = 0; - if (!lkb->lkb_callbacks[0].seq) { - rv = -ENOENT; - goto out; - } + if (!lkb->lkb_callbacks[0].seq) + return -ENOENT; /* oldest undelivered cb is callbacks[0] */ @@ -152,8 +163,7 @@ int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, cb->mode, (unsigned long long)lkb->lkb_last_cast.seq, lkb->lkb_last_cast.mode); - rv = 0; - goto out; + return 0; } } @@ -166,150 +176,171 @@ int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, memcpy(&lkb->lkb_last_bast, cb, sizeof(struct dlm_callback)); lkb->lkb_last_bast_time = ktime_get(); } - rv = 0; - out: - return rv; + + return 0; } -void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, - uint32_t sbflags) +void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, + uint32_t sbflags) { - struct dlm_ls *ls = lkb->lkb_resource->res_ls; - uint64_t new_seq, prev_seq; + uint64_t seq; int rv; - spin_lock(&dlm_cb_seq_spin); - new_seq = ++dlm_cb_seq; - spin_unlock(&dlm_cb_seq_spin); + spin_lock(&ast_queue_lock); + + seq = ++ast_seq_count; if (lkb->lkb_flags & DLM_IFL_USER) { - dlm_user_add_ast(lkb, flags, mode, status, sbflags, new_seq); + spin_unlock(&ast_queue_lock); + dlm_user_add_ast(lkb, flags, mode, status, sbflags, seq); return; } - mutex_lock(&lkb->lkb_cb_mutex); - prev_seq = lkb->lkb_callbacks[0].seq; - - rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, new_seq); - if (rv < 0) - goto out; + rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, seq); + if (rv < 0) { + spin_unlock(&ast_queue_lock); + return; + } - if (!prev_seq) { + if (list_empty(&lkb->lkb_astqueue)) { kref_get(&lkb->lkb_ref); - - if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) { - mutex_lock(&ls->ls_cb_mutex); - list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay); - mutex_unlock(&ls->ls_cb_mutex); - } else { - queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work); - } + list_add_tail(&lkb->lkb_astqueue, &ast_queue); } - out: - mutex_unlock(&lkb->lkb_cb_mutex); + spin_unlock(&ast_queue_lock); + + set_bit(WAKE_ASTS, &astd_wakeflags); + wake_up_process(astd_task); } -void dlm_callback_work(struct work_struct *work) +static void process_asts(void) { - struct dlm_lkb *lkb = container_of(work, struct dlm_lkb, lkb_cb_work); - struct dlm_ls *ls = lkb->lkb_resource->res_ls; + struct dlm_ls *ls = NULL; + struct dlm_rsb *r = NULL; + struct dlm_lkb *lkb; void (*castfn) (void *astparam); void (*bastfn) (void *astparam, int mode); struct dlm_callback callbacks[DLM_CALLBACKS_SIZE]; int i, rv, resid; - memset(&callbacks, 0, sizeof(callbacks)); +repeat: + spin_lock(&ast_queue_lock); + list_for_each_entry(lkb, &ast_queue, lkb_astqueue) { + r = lkb->lkb_resource; + ls = r->res_ls; - mutex_lock(&lkb->lkb_cb_mutex); - if (!lkb->lkb_callbacks[0].seq) { - /* no callback work exists, shouldn't happen */ - log_error(ls, "dlm_callback_work %x no work", lkb->lkb_id); - dlm_print_lkb(lkb); - dlm_dump_lkb_callbacks(lkb); - } + if (dlm_locking_stopped(ls)) + continue; - for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { - rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid); - if (rv < 0) - break; - } + /* we remove from astqueue list and remove everything in + lkb_callbacks before releasing the spinlock so empty + lkb_astqueue is always consistent with empty lkb_callbacks */ - if (resid) { - /* cbs remain, loop should have removed all, shouldn't happen */ - log_error(ls, "dlm_callback_work %x resid %d", lkb->lkb_id, - resid); - dlm_print_lkb(lkb); - dlm_dump_lkb_callbacks(lkb); - } - mutex_unlock(&lkb->lkb_cb_mutex); + list_del_init(&lkb->lkb_astqueue); - castfn = lkb->lkb_astfn; - bastfn = lkb->lkb_bastfn; + castfn = lkb->lkb_astfn; + bastfn = lkb->lkb_bastfn; - for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { - if (!callbacks[i].seq) - break; - if (callbacks[i].flags & DLM_CB_SKIP) { - continue; - } else if (callbacks[i].flags & DLM_CB_BAST) { - bastfn(lkb->lkb_astparam, callbacks[i].mode); - } else if (callbacks[i].flags & DLM_CB_CAST) { - lkb->lkb_lksb->sb_status = callbacks[i].sb_status; - lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags; - castfn(lkb->lkb_astparam); + memset(&callbacks, 0, sizeof(callbacks)); + + for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { + rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid); + if (rv < 0) + break; } + spin_unlock(&ast_queue_lock); + + if (resid) { + /* shouldn't happen, for loop should have removed all */ + log_error(ls, "callback resid %d lkb %x", + resid, lkb->lkb_id); + } + + for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { + if (!callbacks[i].seq) + break; + if (callbacks[i].flags & DLM_CB_SKIP) { + continue; + } else if (callbacks[i].flags & DLM_CB_BAST) { + bastfn(lkb->lkb_astparam, callbacks[i].mode); + } else if (callbacks[i].flags & DLM_CB_CAST) { + lkb->lkb_lksb->sb_status = callbacks[i].sb_status; + lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags; + castfn(lkb->lkb_astparam); + } + } + + /* removes ref for ast_queue, may cause lkb to be freed */ + dlm_put_lkb(lkb); + + cond_resched(); + goto repeat; } + spin_unlock(&ast_queue_lock); +} + +static inline int no_asts(void) +{ + int ret; - /* undo kref_get from dlm_add_callback, may cause lkb to be freed */ - dlm_put_lkb(lkb); + spin_lock(&ast_queue_lock); + ret = list_empty(&ast_queue); + spin_unlock(&ast_queue_lock); + return ret; } -int dlm_callback_start(struct dlm_ls *ls) +static int dlm_astd(void *data) { - ls->ls_callback_wq = alloc_workqueue("dlm_callback", - WQ_UNBOUND | - WQ_MEM_RECLAIM | - WQ_NON_REENTRANT, - 0); - if (!ls->ls_callback_wq) { - log_print("can't start dlm_callback workqueue"); - return -ENOMEM; + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + if (!test_bit(WAKE_ASTS, &astd_wakeflags)) + schedule(); + set_current_state(TASK_RUNNING); + + mutex_lock(&astd_running); + if (test_and_clear_bit(WAKE_ASTS, &astd_wakeflags)) + process_asts(); + mutex_unlock(&astd_running); } return 0; } -void dlm_callback_stop(struct dlm_ls *ls) +void dlm_astd_wake(void) { - if (ls->ls_callback_wq) - destroy_workqueue(ls->ls_callback_wq); + if (!no_asts()) { + set_bit(WAKE_ASTS, &astd_wakeflags); + wake_up_process(astd_task); + } } -void dlm_callback_suspend(struct dlm_ls *ls) +int dlm_astd_start(void) { - set_bit(LSFL_CB_DELAY, &ls->ls_flags); - - if (ls->ls_callback_wq) - flush_workqueue(ls->ls_callback_wq); + struct task_struct *p; + int error = 0; + + INIT_LIST_HEAD(&ast_queue); + spin_lock_init(&ast_queue_lock); + mutex_init(&astd_running); + + p = kthread_run(dlm_astd, NULL, "dlm_astd"); + if (IS_ERR(p)) + error = PTR_ERR(p); + else + astd_task = p; + return error; } -void dlm_callback_resume(struct dlm_ls *ls) +void dlm_astd_stop(void) { - struct dlm_lkb *lkb, *safe; - int count = 0; - - clear_bit(LSFL_CB_DELAY, &ls->ls_flags); - - if (!ls->ls_callback_wq) - return; + kthread_stop(astd_task); +} - mutex_lock(&ls->ls_cb_mutex); - list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) { - list_del_init(&lkb->lkb_cb_list); - queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work); - count++; - } - mutex_unlock(&ls->ls_cb_mutex); +void dlm_astd_suspend(void) +{ + mutex_lock(&astd_running); +} - log_debug(ls, "dlm_callback_resume %d", count); +void dlm_astd_resume(void) +{ + mutex_unlock(&astd_running); } diff --git a/trunk/fs/dlm/ast.h b/trunk/fs/dlm/ast.h index 757b551c6820..8aa89c9b5611 100644 --- a/trunk/fs/dlm/ast.h +++ b/trunk/fs/dlm/ast.h @@ -18,15 +18,14 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, uint32_t sbflags, uint64_t seq); int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_callback *cb, int *resid); -void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, - uint32_t sbflags); +void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, + uint32_t sbflags); -void dlm_callback_work(struct work_struct *work); -int dlm_callback_start(struct dlm_ls *ls); -void dlm_callback_stop(struct dlm_ls *ls); -void dlm_callback_suspend(struct dlm_ls *ls); -void dlm_callback_resume(struct dlm_ls *ls); +void dlm_astd_wake(void); +int dlm_astd_start(void); +void dlm_astd_stop(void); +void dlm_astd_suspend(void); +void dlm_astd_resume(void); #endif - diff --git a/trunk/fs/dlm/config.c b/trunk/fs/dlm/config.c index 6cf72fcc0d0c..9b026ea8baa9 100644 --- a/trunk/fs/dlm/config.c +++ b/trunk/fs/dlm/config.c @@ -28,8 +28,7 @@ * /config/dlm//spaces//nodes//weight * /config/dlm//comms//nodeid * /config/dlm//comms//local - * /config/dlm//comms//addr (write only) - * /config/dlm//comms//addr_list (read only) + * /config/dlm//comms//addr * The level is useless, but I haven't figured out how to avoid it. */ @@ -81,7 +80,6 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, size_t len); static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len); -static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf); static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf); static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, size_t len); @@ -94,6 +92,7 @@ struct dlm_cluster { unsigned int cl_tcp_port; unsigned int cl_buffer_size; unsigned int cl_rsbtbl_size; + unsigned int cl_lkbtbl_size; unsigned int cl_dirtbl_size; unsigned int cl_recover_timer; unsigned int cl_toss_secs; @@ -102,13 +101,13 @@ struct dlm_cluster { unsigned int cl_protocol; unsigned int cl_timewarn_cs; unsigned int cl_waitwarn_us; - unsigned int cl_new_rsb_count; }; enum { CLUSTER_ATTR_TCP_PORT = 0, CLUSTER_ATTR_BUFFER_SIZE, CLUSTER_ATTR_RSBTBL_SIZE, + CLUSTER_ATTR_LKBTBL_SIZE, CLUSTER_ATTR_DIRTBL_SIZE, CLUSTER_ATTR_RECOVER_TIMER, CLUSTER_ATTR_TOSS_SECS, @@ -117,7 +116,6 @@ enum { CLUSTER_ATTR_PROTOCOL, CLUSTER_ATTR_TIMEWARN_CS, CLUSTER_ATTR_WAITWARN_US, - CLUSTER_ATTR_NEW_RSB_COUNT, }; struct cluster_attribute { @@ -162,6 +160,7 @@ __CONFIGFS_ATTR(name, 0644, name##_read, name##_write) CLUSTER_ATTR(tcp_port, 1); CLUSTER_ATTR(buffer_size, 1); CLUSTER_ATTR(rsbtbl_size, 1); +CLUSTER_ATTR(lkbtbl_size, 1); CLUSTER_ATTR(dirtbl_size, 1); CLUSTER_ATTR(recover_timer, 1); CLUSTER_ATTR(toss_secs, 1); @@ -170,12 +169,12 @@ CLUSTER_ATTR(log_debug, 0); CLUSTER_ATTR(protocol, 0); CLUSTER_ATTR(timewarn_cs, 1); CLUSTER_ATTR(waitwarn_us, 0); -CLUSTER_ATTR(new_rsb_count, 0); static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr, [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr, + [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr, [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr, [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr, [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, @@ -184,7 +183,6 @@ static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, - [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr, NULL, }; @@ -192,7 +190,6 @@ enum { COMM_ATTR_NODEID = 0, COMM_ATTR_LOCAL, COMM_ATTR_ADDR, - COMM_ATTR_ADDR_LIST, }; struct comm_attribute { @@ -220,22 +217,14 @@ static struct comm_attribute comm_attr_local = { static struct comm_attribute comm_attr_addr = { .attr = { .ca_owner = THIS_MODULE, .ca_name = "addr", - .ca_mode = S_IWUSR }, + .ca_mode = S_IRUGO | S_IWUSR }, .store = comm_addr_write, }; -static struct comm_attribute comm_attr_addr_list = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "addr_list", - .ca_mode = S_IRUGO }, - .show = comm_addr_list_read, -}; - static struct configfs_attribute *comm_attrs[] = { [COMM_ATTR_NODEID] = &comm_attr_nodeid.attr, [COMM_ATTR_LOCAL] = &comm_attr_local.attr, [COMM_ATTR_ADDR] = &comm_attr_addr.attr, - [COMM_ATTR_ADDR_LIST] = &comm_attr_addr_list.attr, NULL, }; @@ -446,6 +435,7 @@ static struct config_group *make_cluster(struct config_group *g, cl->cl_tcp_port = dlm_config.ci_tcp_port; cl->cl_buffer_size = dlm_config.ci_buffer_size; cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size; + cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size; cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size; cl->cl_recover_timer = dlm_config.ci_recover_timer; cl->cl_toss_secs = dlm_config.ci_toss_secs; @@ -454,7 +444,6 @@ static struct config_group *make_cluster(struct config_group *g, cl->cl_protocol = dlm_config.ci_protocol; cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; - cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count; space_list = &sps->ss_group; comm_list = &cms->cs_group; @@ -731,50 +720,6 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) return len; } -static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf) -{ - ssize_t s; - ssize_t allowance; - int i; - struct sockaddr_storage *addr; - struct sockaddr_in *addr_in; - struct sockaddr_in6 *addr_in6; - - /* Taken from ip6_addr_string() defined in lib/vsprintf.c */ - char buf0[sizeof("AF_INET6 xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255\n")]; - - - /* Derived from SIMPLE_ATTR_SIZE of fs/configfs/file.c */ - allowance = 4096; - buf[0] = '\0'; - - for (i = 0; i < cm->addr_count; i++) { - addr = cm->addr[i]; - - switch(addr->ss_family) { - case AF_INET: - addr_in = (struct sockaddr_in *)addr; - s = sprintf(buf0, "AF_INET %pI4\n", &addr_in->sin_addr.s_addr); - break; - case AF_INET6: - addr_in6 = (struct sockaddr_in6 *)addr; - s = sprintf(buf0, "AF_INET6 %pI6\n", &addr_in6->sin6_addr); - break; - default: - s = sprintf(buf0, "%s\n", ""); - break; - } - allowance -= s; - if (allowance >= 0) - strcat(buf, buf0); - else { - allowance += s; - break; - } - } - return 4096 - allowance; -} - static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, char *buf) { @@ -1038,6 +983,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_TCP_PORT 21064 #define DEFAULT_BUFFER_SIZE 4096 #define DEFAULT_RSBTBL_SIZE 1024 +#define DEFAULT_LKBTBL_SIZE 1024 #define DEFAULT_DIRTBL_SIZE 1024 #define DEFAULT_RECOVER_TIMER 5 #define DEFAULT_TOSS_SECS 10 @@ -1046,12 +992,12 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_PROTOCOL 0 #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ #define DEFAULT_WAITWARN_US 0 -#define DEFAULT_NEW_RSB_COUNT 128 struct dlm_config_info dlm_config = { .ci_tcp_port = DEFAULT_TCP_PORT, .ci_buffer_size = DEFAULT_BUFFER_SIZE, .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE, + .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE, .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE, .ci_recover_timer = DEFAULT_RECOVER_TIMER, .ci_toss_secs = DEFAULT_TOSS_SECS, @@ -1059,7 +1005,6 @@ struct dlm_config_info dlm_config = { .ci_log_debug = DEFAULT_LOG_DEBUG, .ci_protocol = DEFAULT_PROTOCOL, .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, - .ci_waitwarn_us = DEFAULT_WAITWARN_US, - .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT + .ci_waitwarn_us = DEFAULT_WAITWARN_US }; diff --git a/trunk/fs/dlm/config.h b/trunk/fs/dlm/config.h index 3099d0dd26c0..dd0ce24d5a80 100644 --- a/trunk/fs/dlm/config.h +++ b/trunk/fs/dlm/config.h @@ -20,6 +20,7 @@ struct dlm_config_info { int ci_tcp_port; int ci_buffer_size; int ci_rsbtbl_size; + int ci_lkbtbl_size; int ci_dirtbl_size; int ci_recover_timer; int ci_toss_secs; @@ -28,7 +29,6 @@ struct dlm_config_info { int ci_protocol; int ci_timewarn_cs; int ci_waitwarn_us; - int ci_new_rsb_count; }; extern struct dlm_config_info dlm_config; diff --git a/trunk/fs/dlm/dlm_internal.h b/trunk/fs/dlm/dlm_internal.h index fe2860c02449..0262451eb9c6 100644 --- a/trunk/fs/dlm/dlm_internal.h +++ b/trunk/fs/dlm/dlm_internal.h @@ -37,7 +37,6 @@ #include #include #include -#include #include #include @@ -53,6 +52,7 @@ struct dlm_ls; struct dlm_lkb; struct dlm_rsb; struct dlm_member; +struct dlm_lkbtable; struct dlm_rsbtable; struct dlm_dirtable; struct dlm_direntry; @@ -108,6 +108,11 @@ struct dlm_rsbtable { spinlock_t lock; }; +struct dlm_lkbtable { + struct list_head list; + rwlock_t lock; + uint16_t counter; +}; /* * Lockspace member (per node in a ls) @@ -243,18 +248,17 @@ struct dlm_lkb { int8_t lkb_wait_count; int lkb_wait_nodeid; /* for debugging */ + struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ struct list_head lkb_statequeue; /* rsb g/c/w list */ struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */ struct list_head lkb_wait_reply; /* waiting for remote reply */ + struct list_head lkb_astqueue; /* need ast to be sent */ struct list_head lkb_ownqueue; /* list of locks for a process */ struct list_head lkb_time_list; ktime_t lkb_timestamp; ktime_t lkb_wait_time; unsigned long lkb_timeout_cs; - struct mutex lkb_cb_mutex; - struct work_struct lkb_cb_work; - struct list_head lkb_cb_list; /* for ls_cb_delay or proc->asts */ struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE]; struct dlm_callback lkb_last_cast; struct dlm_callback lkb_last_bast; @@ -295,7 +299,7 @@ struct dlm_rsb { int res_recover_locks_count; char *res_lvbptr; - char res_name[DLM_RESNAME_MAXLEN+1]; + char res_name[1]; }; /* find_rsb() flags */ @@ -461,12 +465,12 @@ struct dlm_ls { unsigned long ls_scan_time; struct kobject ls_kobj; - struct idr ls_lkbidr; - spinlock_t ls_lkbidr_spin; - struct dlm_rsbtable *ls_rsbtbl; uint32_t ls_rsbtbl_size; + struct dlm_lkbtable *ls_lkbtbl; + uint32_t ls_lkbtbl_size; + struct dlm_dirtable *ls_dirtbl; uint32_t ls_dirtbl_size; @@ -479,10 +483,6 @@ struct dlm_ls { struct mutex ls_timeout_mutex; struct list_head ls_timeout; - spinlock_t ls_new_rsb_spin; - int ls_new_rsb_count; - struct list_head ls_new_rsb; /* new rsb structs */ - struct list_head ls_nodes; /* current nodes in ls */ struct list_head ls_nodes_gone; /* dead node list, recovery */ int ls_num_nodes; /* number of nodes in ls */ @@ -506,12 +506,8 @@ struct dlm_ls { struct miscdevice ls_device; - struct workqueue_struct *ls_callback_wq; - /* recovery related */ - struct mutex ls_cb_mutex; - struct list_head ls_cb_delay; /* save for queue_work later */ struct timer_list ls_timer; struct task_struct *ls_recoverd_task; struct mutex ls_recoverd_active; @@ -548,7 +544,6 @@ struct dlm_ls { #define LSFL_RCOM_WAIT 4 #define LSFL_UEVENT_WAIT 5 #define LSFL_TIMEWARN 6 -#define LSFL_CB_DELAY 7 /* much of this is just saving user space pointers associated with the lock that we pass back to the user lib with an ast */ diff --git a/trunk/fs/dlm/lock.c b/trunk/fs/dlm/lock.c index 83b5e32514e1..f71d0b5abd95 100644 --- a/trunk/fs/dlm/lock.c +++ b/trunk/fs/dlm/lock.c @@ -305,7 +305,7 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) rv = -EDEADLK; } - dlm_add_cb(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags); + dlm_add_ast(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags); } static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb) @@ -319,7 +319,7 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) if (is_master_copy(lkb)) { send_bast(r, lkb, rqmode); } else { - dlm_add_cb(lkb, DLM_CB_BAST, rqmode, 0, 0); + dlm_add_ast(lkb, DLM_CB_BAST, rqmode, 0, 0); } } @@ -327,68 +327,19 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) * Basic operations on rsb's and lkb's */ -static int pre_rsb_struct(struct dlm_ls *ls) -{ - struct dlm_rsb *r1, *r2; - int count = 0; - - spin_lock(&ls->ls_new_rsb_spin); - if (ls->ls_new_rsb_count > dlm_config.ci_new_rsb_count / 2) { - spin_unlock(&ls->ls_new_rsb_spin); - return 0; - } - spin_unlock(&ls->ls_new_rsb_spin); - - r1 = dlm_allocate_rsb(ls); - r2 = dlm_allocate_rsb(ls); - - spin_lock(&ls->ls_new_rsb_spin); - if (r1) { - list_add(&r1->res_hashchain, &ls->ls_new_rsb); - ls->ls_new_rsb_count++; - } - if (r2) { - list_add(&r2->res_hashchain, &ls->ls_new_rsb); - ls->ls_new_rsb_count++; - } - count = ls->ls_new_rsb_count; - spin_unlock(&ls->ls_new_rsb_spin); - - if (!count) - return -ENOMEM; - return 0; -} - -/* If ls->ls_new_rsb is empty, return -EAGAIN, so the caller can - unlock any spinlocks, go back and call pre_rsb_struct again. - Otherwise, take an rsb off the list and return it. */ - -static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, - struct dlm_rsb **r_ret) +static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len) { struct dlm_rsb *r; - int count; - spin_lock(&ls->ls_new_rsb_spin); - if (list_empty(&ls->ls_new_rsb)) { - count = ls->ls_new_rsb_count; - spin_unlock(&ls->ls_new_rsb_spin); - log_debug(ls, "find_rsb retry %d %d %s", - count, dlm_config.ci_new_rsb_count, name); - return -EAGAIN; - } - - r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain); - list_del(&r->res_hashchain); - ls->ls_new_rsb_count--; - spin_unlock(&ls->ls_new_rsb_spin); + r = dlm_allocate_rsb(ls, len); + if (!r) + return NULL; r->res_ls = ls; r->res_length = len; memcpy(r->res_name, name, len); mutex_init(&r->res_mutex); - INIT_LIST_HEAD(&r->res_hashchain); INIT_LIST_HEAD(&r->res_lookup); INIT_LIST_HEAD(&r->res_grantqueue); INIT_LIST_HEAD(&r->res_convertqueue); @@ -396,8 +347,7 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, INIT_LIST_HEAD(&r->res_root_list); INIT_LIST_HEAD(&r->res_recover_list); - *r_ret = r; - return 0; + return r; } static int search_rsb_list(struct list_head *head, char *name, int len, @@ -455,6 +405,16 @@ static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, return error; } +static int search_rsb(struct dlm_ls *ls, char *name, int len, int b, + unsigned int flags, struct dlm_rsb **r_ret) +{ + int error; + spin_lock(&ls->ls_rsbtbl[b].lock); + error = _search_rsb(ls, name, len, b, flags, r_ret); + spin_unlock(&ls->ls_rsbtbl[b].lock); + return error; +} + /* * Find rsb in rsbtbl and potentially create/add one * @@ -472,48 +432,35 @@ static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, static int find_rsb(struct dlm_ls *ls, char *name, int namelen, unsigned int flags, struct dlm_rsb **r_ret) { - struct dlm_rsb *r = NULL; + struct dlm_rsb *r = NULL, *tmp; uint32_t hash, bucket; - int error; + int error = -EINVAL; - if (namelen > DLM_RESNAME_MAXLEN) { - error = -EINVAL; + if (namelen > DLM_RESNAME_MAXLEN) goto out; - } if (dlm_no_directory(ls)) flags |= R_CREATE; + error = 0; hash = jhash(name, namelen, 0); bucket = hash & (ls->ls_rsbtbl_size - 1); - retry: - if (flags & R_CREATE) { - error = pre_rsb_struct(ls); - if (error < 0) - goto out; - } - - spin_lock(&ls->ls_rsbtbl[bucket].lock); - - error = _search_rsb(ls, name, namelen, bucket, flags, &r); + error = search_rsb(ls, name, namelen, bucket, flags, &r); if (!error) - goto out_unlock; + goto out; if (error == -EBADR && !(flags & R_CREATE)) - goto out_unlock; + goto out; /* the rsb was found but wasn't a master copy */ if (error == -ENOTBLK) - goto out_unlock; + goto out; - error = get_rsb_struct(ls, name, namelen, &r); - if (error == -EAGAIN) { - spin_unlock(&ls->ls_rsbtbl[bucket].lock); - goto retry; - } - if (error) - goto out_unlock; + error = -ENOMEM; + r = create_rsb(ls, name, namelen); + if (!r) + goto out; r->res_hash = hash; r->res_bucket = bucket; @@ -527,10 +474,18 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen, nodeid = 0; r->res_nodeid = nodeid; } + + spin_lock(&ls->ls_rsbtbl[bucket].lock); + error = _search_rsb(ls, name, namelen, bucket, 0, &tmp); + if (!error) { + spin_unlock(&ls->ls_rsbtbl[bucket].lock); + dlm_free_rsb(r); + r = tmp; + goto out; + } list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); - error = 0; - out_unlock: spin_unlock(&ls->ls_rsbtbl[bucket].lock); + error = 0; out: *r_ret = r; return error; @@ -625,8 +580,9 @@ static void detach_lkb(struct dlm_lkb *lkb) static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) { - struct dlm_lkb *lkb; - int rv, id; + struct dlm_lkb *lkb, *tmp; + uint32_t lkid = 0; + uint16_t bucket; lkb = dlm_allocate_lkb(ls); if (!lkb) @@ -638,42 +594,60 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) INIT_LIST_HEAD(&lkb->lkb_ownqueue); INIT_LIST_HEAD(&lkb->lkb_rsb_lookup); INIT_LIST_HEAD(&lkb->lkb_time_list); - INIT_LIST_HEAD(&lkb->lkb_cb_list); - mutex_init(&lkb->lkb_cb_mutex); - INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work); + INIT_LIST_HEAD(&lkb->lkb_astqueue); - retry: - rv = idr_pre_get(&ls->ls_lkbidr, GFP_NOFS); - if (!rv) - return -ENOMEM; + get_random_bytes(&bucket, sizeof(bucket)); + bucket &= (ls->ls_lkbtbl_size - 1); + + write_lock(&ls->ls_lkbtbl[bucket].lock); - spin_lock(&ls->ls_lkbidr_spin); - rv = idr_get_new_above(&ls->ls_lkbidr, lkb, 1, &id); - if (!rv) - lkb->lkb_id = id; - spin_unlock(&ls->ls_lkbidr_spin); + /* counter can roll over so we must verify lkid is not in use */ - if (rv == -EAGAIN) - goto retry; + while (lkid == 0) { + lkid = (bucket << 16) | ls->ls_lkbtbl[bucket].counter++; - if (rv < 0) { - log_error(ls, "create_lkb idr error %d", rv); - return rv; + list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list, + lkb_idtbl_list) { + if (tmp->lkb_id != lkid) + continue; + lkid = 0; + break; + } } + lkb->lkb_id = lkid; + list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list); + write_unlock(&ls->ls_lkbtbl[bucket].lock); + *lkb_ret = lkb; return 0; } +static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid) +{ + struct dlm_lkb *lkb; + uint16_t bucket = (lkid >> 16); + + list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) { + if (lkb->lkb_id == lkid) + return lkb; + } + return NULL; +} + static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret) { struct dlm_lkb *lkb; + uint16_t bucket = (lkid >> 16); + + if (bucket >= ls->ls_lkbtbl_size) + return -EBADSLT; - spin_lock(&ls->ls_lkbidr_spin); - lkb = idr_find(&ls->ls_lkbidr, lkid); + read_lock(&ls->ls_lkbtbl[bucket].lock); + lkb = __find_lkb(ls, lkid); if (lkb) kref_get(&lkb->lkb_ref); - spin_unlock(&ls->ls_lkbidr_spin); + read_unlock(&ls->ls_lkbtbl[bucket].lock); *lkb_ret = lkb; return lkb ? 0 : -ENOENT; @@ -694,12 +668,12 @@ static void kill_lkb(struct kref *kref) static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) { - uint32_t lkid = lkb->lkb_id; + uint16_t bucket = (lkb->lkb_id >> 16); - spin_lock(&ls->ls_lkbidr_spin); + write_lock(&ls->ls_lkbtbl[bucket].lock); if (kref_put(&lkb->lkb_ref, kill_lkb)) { - idr_remove(&ls->ls_lkbidr, lkid); - spin_unlock(&ls->ls_lkbidr_spin); + list_del(&lkb->lkb_idtbl_list); + write_unlock(&ls->ls_lkbtbl[bucket].lock); detach_lkb(lkb); @@ -709,7 +683,7 @@ static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) dlm_free_lkb(lkb); return 1; } else { - spin_unlock(&ls->ls_lkbidr_spin); + write_unlock(&ls->ls_lkbtbl[bucket].lock); return 0; } } @@ -875,7 +849,9 @@ void dlm_scan_waiters(struct dlm_ls *ls) if (!num_nodes) { num_nodes = ls->ls_num_nodes; - warned = kzalloc(num_nodes * sizeof(int), GFP_KERNEL); + warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int)); + if (warned) + memset(warned, 0, num_nodes * sizeof(int)); } if (!warned) continue; @@ -887,7 +863,9 @@ void dlm_scan_waiters(struct dlm_ls *ls) dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid); } mutex_unlock(&ls->ls_waiters_mutex); - kfree(warned); + + if (warned) + kfree(warned); if (debug_expired) log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us", @@ -2423,6 +2401,9 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) if (deadlk) { /* it's left on the granted queue */ + log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s", + lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status, + lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name); revert_lock(r, lkb); queue_cast(r, lkb, -EDEADLK); error = -EDEADLK; @@ -4012,6 +3993,8 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms) default: log_error(ls, "unknown message type %d", ms->m_type); } + + dlm_astd_wake(); } /* If the lockspace is in recovery mode (locking stopped), then normal @@ -4150,7 +4133,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) struct dlm_message *ms_stub; int wait_type, stub_unlock_result, stub_cancel_result; - ms_stub = kmalloc(sizeof(struct dlm_message), GFP_KERNEL); + ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message)); if (!ms_stub) { log_error(ls, "dlm_recover_waiters_pre no mem"); return; @@ -4826,7 +4809,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, goto out_put; spin_lock(&ua->proc->locks_spin); - /* dlm_user_add_cb() may have already taken lkb off the proc list */ + /* dlm_user_add_ast() may have already taken lkb off the proc list */ if (!list_empty(&lkb->lkb_ownqueue)) list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking); spin_unlock(&ua->proc->locks_spin); @@ -4963,7 +4946,7 @@ static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) /* We have to release clear_proc_locks mutex before calling unlock_proc_lock() (which does lock_rsb) due to deadlock with receiving a message that does - lock_rsb followed by dlm_user_add_cb() */ + lock_rsb followed by dlm_user_add_ast() */ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls, struct dlm_user_proc *proc) @@ -4986,7 +4969,7 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls, return lkb; } -/* The ls_clear_proc_locks mutex protects against dlm_user_add_cb() which +/* The ls_clear_proc_locks mutex protects against dlm_user_add_asts() which 1) references lkb->ua which we free here and 2) adds lkbs to proc->asts, which we clear here. */ @@ -5028,10 +5011,10 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) dlm_put_lkb(lkb); } - list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) { + list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { memset(&lkb->lkb_callbacks, 0, sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE); - list_del_init(&lkb->lkb_cb_list); + list_del_init(&lkb->lkb_astqueue); dlm_put_lkb(lkb); } @@ -5070,10 +5053,10 @@ static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) spin_unlock(&proc->locks_spin); spin_lock(&proc->asts_spin); - list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) { + list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { memset(&lkb->lkb_callbacks, 0, sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE); - list_del_init(&lkb->lkb_cb_list); + list_del_init(&lkb->lkb_astqueue); dlm_put_lkb(lkb); } spin_unlock(&proc->asts_spin); diff --git a/trunk/fs/dlm/lockspace.c b/trunk/fs/dlm/lockspace.c index a1d8f1af144b..14cbf4099753 100644 --- a/trunk/fs/dlm/lockspace.c +++ b/trunk/fs/dlm/lockspace.c @@ -15,6 +15,7 @@ #include "lockspace.h" #include "member.h" #include "recoverd.h" +#include "ast.h" #include "dir.h" #include "lowcomms.h" #include "config.h" @@ -23,7 +24,6 @@ #include "recover.h" #include "requestqueue.h" #include "user.h" -#include "ast.h" static int ls_count; static struct mutex ls_lock; @@ -359,10 +359,17 @@ static int threads_start(void) { int error; + /* Thread which process lock requests for all lockspace's */ + error = dlm_astd_start(); + if (error) { + log_print("cannot start dlm_astd thread %d", error); + goto fail; + } + error = dlm_scand_start(); if (error) { log_print("cannot start dlm_scand thread %d", error); - goto fail; + goto astd_fail; } /* Thread for sending/receiving messages for all lockspace's */ @@ -376,6 +383,8 @@ static int threads_start(void) scand_fail: dlm_scand_stop(); + astd_fail: + dlm_astd_stop(); fail: return error; } @@ -384,6 +393,7 @@ static void threads_stop(void) { dlm_scand_stop(); dlm_lowcomms_stop(); + dlm_astd_stop(); } static int new_lockspace(const char *name, int namelen, void **lockspace, @@ -453,7 +463,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, size = dlm_config.ci_rsbtbl_size; ls->ls_rsbtbl_size = size; - ls->ls_rsbtbl = vmalloc(sizeof(struct dlm_rsbtable) * size); + ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_NOFS); if (!ls->ls_rsbtbl) goto out_lsfree; for (i = 0; i < size; i++) { @@ -462,13 +472,22 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, spin_lock_init(&ls->ls_rsbtbl[i].lock); } - idr_init(&ls->ls_lkbidr); - spin_lock_init(&ls->ls_lkbidr_spin); + size = dlm_config.ci_lkbtbl_size; + ls->ls_lkbtbl_size = size; + + ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_NOFS); + if (!ls->ls_lkbtbl) + goto out_rsbfree; + for (i = 0; i < size; i++) { + INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list); + rwlock_init(&ls->ls_lkbtbl[i].lock); + ls->ls_lkbtbl[i].counter = 1; + } size = dlm_config.ci_dirtbl_size; ls->ls_dirtbl_size = size; - ls->ls_dirtbl = vmalloc(sizeof(struct dlm_dirtable) * size); + ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_NOFS); if (!ls->ls_dirtbl) goto out_lkbfree; for (i = 0; i < size; i++) { @@ -483,9 +502,6 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, INIT_LIST_HEAD(&ls->ls_timeout); mutex_init(&ls->ls_timeout_mutex); - INIT_LIST_HEAD(&ls->ls_new_rsb); - spin_lock_init(&ls->ls_new_rsb_spin); - INIT_LIST_HEAD(&ls->ls_nodes); INIT_LIST_HEAD(&ls->ls_nodes_gone); ls->ls_num_nodes = 0; @@ -504,9 +520,6 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, init_completion(&ls->ls_members_done); ls->ls_members_result = -1; - mutex_init(&ls->ls_cb_mutex); - INIT_LIST_HEAD(&ls->ls_cb_delay); - ls->ls_recoverd_task = NULL; mutex_init(&ls->ls_recoverd_active); spin_lock_init(&ls->ls_recover_lock); @@ -540,26 +553,18 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, list_add(&ls->ls_list, &lslist); spin_unlock(&lslist_lock); - if (flags & DLM_LSFL_FS) { - error = dlm_callback_start(ls); - if (error) { - log_error(ls, "can't start dlm_callback %d", error); - goto out_delist; - } - } - /* needs to find ls in lslist */ error = dlm_recoverd_start(ls); if (error) { log_error(ls, "can't start dlm_recoverd %d", error); - goto out_callback; + goto out_delist; } ls->ls_kobj.kset = dlm_kset; error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, "%s", ls->ls_name); if (error) - goto out_recoverd; + goto out_stop; kobject_uevent(&ls->ls_kobj, KOBJ_ADD); /* let kobject handle freeing of ls if there's an error */ @@ -573,7 +578,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, error = do_uevent(ls, 1); if (error) - goto out_recoverd; + goto out_stop; wait_for_completion(&ls->ls_members_done); error = ls->ls_members_result; @@ -590,20 +595,19 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, do_uevent(ls, 0); dlm_clear_members(ls); kfree(ls->ls_node_array); - out_recoverd: + out_stop: dlm_recoverd_stop(ls); - out_callback: - dlm_callback_stop(ls); out_delist: spin_lock(&lslist_lock); list_del(&ls->ls_list); spin_unlock(&lslist_lock); kfree(ls->ls_recover_buf); out_dirfree: - vfree(ls->ls_dirtbl); + kfree(ls->ls_dirtbl); out_lkbfree: - idr_destroy(&ls->ls_lkbidr); - vfree(ls->ls_rsbtbl); + kfree(ls->ls_lkbtbl); + out_rsbfree: + kfree(ls->ls_rsbtbl); out_lsfree: if (do_unreg) kobject_put(&ls->ls_kobj); @@ -637,64 +641,50 @@ int dlm_new_lockspace(const char *name, int namelen, void **lockspace, return error; } -static int lkb_idr_is_local(int id, void *p, void *data) -{ - struct dlm_lkb *lkb = p; - - if (!lkb->lkb_nodeid) - return 1; - return 0; -} - -static int lkb_idr_is_any(int id, void *p, void *data) -{ - return 1; -} - -static int lkb_idr_free(int id, void *p, void *data) -{ - struct dlm_lkb *lkb = p; - - if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY) - dlm_free_lvb(lkb->lkb_lvbptr); - - dlm_free_lkb(lkb); - return 0; -} - -/* NOTE: We check the lkbidr here rather than the resource table. - This is because there may be LKBs queued as ASTs that have been unlinked - from their RSBs and are pending deletion once the AST has been delivered */ - -static int lockspace_busy(struct dlm_ls *ls, int force) -{ - int rv; - - spin_lock(&ls->ls_lkbidr_spin); - if (force == 0) { - rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls); - } else if (force == 1) { - rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls); - } else { - rv = 0; +/* Return 1 if the lockspace still has active remote locks, + * 2 if the lockspace still has active local locks. + */ +static int lockspace_busy(struct dlm_ls *ls) +{ + int i, lkb_found = 0; + struct dlm_lkb *lkb; + + /* NOTE: We check the lockidtbl here rather than the resource table. + This is because there may be LKBs queued as ASTs that have been + unlinked from their RSBs and are pending deletion once the AST has + been delivered */ + + for (i = 0; i < ls->ls_lkbtbl_size; i++) { + read_lock(&ls->ls_lkbtbl[i].lock); + if (!list_empty(&ls->ls_lkbtbl[i].list)) { + lkb_found = 1; + list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list, + lkb_idtbl_list) { + if (!lkb->lkb_nodeid) { + read_unlock(&ls->ls_lkbtbl[i].lock); + return 2; + } + } + } + read_unlock(&ls->ls_lkbtbl[i].lock); } - spin_unlock(&ls->ls_lkbidr_spin); - return rv; + return lkb_found; } static int release_lockspace(struct dlm_ls *ls, int force) { + struct dlm_lkb *lkb; struct dlm_rsb *rsb; struct list_head *head; int i, busy, rv; - busy = lockspace_busy(ls, force); + busy = lockspace_busy(ls); spin_lock(&lslist_lock); if (ls->ls_create_count == 1) { - if (busy) { + if (busy > force) rv = -EBUSY; - } else { + else { /* remove_lockspace takes ls off lslist */ ls->ls_create_count = 0; rv = 0; @@ -718,12 +708,12 @@ static int release_lockspace(struct dlm_ls *ls, int force) dlm_recoverd_stop(ls); - dlm_callback_stop(ls); - remove_lockspace(ls); dlm_delete_debug_file(ls); + dlm_astd_suspend(); + kfree(ls->ls_recover_buf); /* @@ -731,15 +721,31 @@ static int release_lockspace(struct dlm_ls *ls, int force) */ dlm_dir_clear(ls); - vfree(ls->ls_dirtbl); + kfree(ls->ls_dirtbl); /* - * Free all lkb's in idr + * Free all lkb's on lkbtbl[] lists. */ - idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls); - idr_remove_all(&ls->ls_lkbidr); - idr_destroy(&ls->ls_lkbidr); + for (i = 0; i < ls->ls_lkbtbl_size; i++) { + head = &ls->ls_lkbtbl[i].list; + while (!list_empty(head)) { + lkb = list_entry(head->next, struct dlm_lkb, + lkb_idtbl_list); + + list_del(&lkb->lkb_idtbl_list); + + dlm_del_ast(lkb); + + if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY) + dlm_free_lvb(lkb->lkb_lvbptr); + + dlm_free_lkb(lkb); + } + } + dlm_astd_resume(); + + kfree(ls->ls_lkbtbl); /* * Free all rsb's on rsbtbl[] lists @@ -764,14 +770,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) } } - vfree(ls->ls_rsbtbl); - - while (!list_empty(&ls->ls_new_rsb)) { - rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, - res_hashchain); - list_del(&rsb->res_hashchain); - dlm_free_rsb(rsb); - } + kfree(ls->ls_rsbtbl); /* * Free structures on any other lists diff --git a/trunk/fs/dlm/lowcomms.c b/trunk/fs/dlm/lowcomms.c index 990626e7da80..5e2c71f05e46 100644 --- a/trunk/fs/dlm/lowcomms.c +++ b/trunk/fs/dlm/lowcomms.c @@ -512,10 +512,12 @@ static void process_sctp_notification(struct connection *con, } make_sockaddr(&prim.ssp_addr, 0, &addr_len); if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { + int i; unsigned char *b=(unsigned char *)&prim.ssp_addr; log_print("reject connect from unknown addr"); - print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, - b, sizeof(struct sockaddr_storage)); + for (i=0; isock_mutex); return -1; diff --git a/trunk/fs/dlm/memory.c b/trunk/fs/dlm/memory.c index da64df7576e1..8e0d00db004f 100644 --- a/trunk/fs/dlm/memory.c +++ b/trunk/fs/dlm/memory.c @@ -16,7 +16,6 @@ #include "memory.h" static struct kmem_cache *lkb_cache; -static struct kmem_cache *rsb_cache; int __init dlm_memory_init(void) @@ -27,14 +26,6 @@ int __init dlm_memory_init(void) __alignof__(struct dlm_lkb), 0, NULL); if (!lkb_cache) ret = -ENOMEM; - - rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb), - __alignof__(struct dlm_rsb), 0, NULL); - if (!rsb_cache) { - kmem_cache_destroy(lkb_cache); - ret = -ENOMEM; - } - return ret; } @@ -42,8 +33,6 @@ void dlm_memory_exit(void) { if (lkb_cache) kmem_cache_destroy(lkb_cache); - if (rsb_cache) - kmem_cache_destroy(rsb_cache); } char *dlm_allocate_lvb(struct dlm_ls *ls) @@ -59,11 +48,16 @@ void dlm_free_lvb(char *p) kfree(p); } -struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls) +/* FIXME: have some minimal space built-in to rsb for the name and + kmalloc a separate name if needed, like dentries are done */ + +struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen) { struct dlm_rsb *r; - r = kmem_cache_zalloc(rsb_cache, GFP_NOFS); + DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); + + r = kzalloc(sizeof(*r) + namelen, GFP_NOFS); return r; } @@ -71,7 +65,7 @@ void dlm_free_rsb(struct dlm_rsb *r) { if (r->res_lvbptr) dlm_free_lvb(r->res_lvbptr); - kmem_cache_free(rsb_cache, r); + kfree(r); } struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls) diff --git a/trunk/fs/dlm/memory.h b/trunk/fs/dlm/memory.h index 177c11cbb0a6..485fb29143bd 100644 --- a/trunk/fs/dlm/memory.h +++ b/trunk/fs/dlm/memory.h @@ -16,7 +16,7 @@ int dlm_memory_init(void); void dlm_memory_exit(void); -struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls); +struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen); void dlm_free_rsb(struct dlm_rsb *r); struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls); void dlm_free_lkb(struct dlm_lkb *l); diff --git a/trunk/fs/dlm/recoverd.c b/trunk/fs/dlm/recoverd.c index 774da3cf92c6..fd677c8c3d3b 100644 --- a/trunk/fs/dlm/recoverd.c +++ b/trunk/fs/dlm/recoverd.c @@ -58,7 +58,13 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) mutex_lock(&ls->ls_recoverd_active); - dlm_callback_suspend(ls); + /* + * Suspending and resuming dlm_astd ensures that no lkb's from this ls + * will be processed by dlm_astd during recovery. + */ + + dlm_astd_suspend(); + dlm_astd_resume(); /* * Free non-master tossed rsb's. Master rsb's are kept on toss @@ -196,8 +202,6 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) dlm_adjust_timeouts(ls); - dlm_callback_resume(ls); - error = enable_locking(ls, rv->seq); if (error) { log_debug(ls, "enable_locking failed %d", error); @@ -218,6 +222,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) dlm_grant_after_purge(ls); + dlm_astd_wake(); + log_debug(ls, "recover %llx done: %u ms", (unsigned long long)rv->seq, jiffies_to_msecs(jiffies - start)); diff --git a/trunk/fs/dlm/user.c b/trunk/fs/dlm/user.c index d8ea60756403..e96bf3e9be88 100644 --- a/trunk/fs/dlm/user.c +++ b/trunk/fs/dlm/user.c @@ -213,9 +213,9 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, goto out; } - if (list_empty(&lkb->lkb_cb_list)) { + if (list_empty(&lkb->lkb_astqueue)) { kref_get(&lkb->lkb_ref); - list_add_tail(&lkb->lkb_cb_list, &proc->asts); + list_add_tail(&lkb->lkb_astqueue, &proc->asts); wake_up_interruptible(&proc->wait); } spin_unlock(&proc->asts_spin); @@ -832,24 +832,24 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, } /* if we empty lkb_callbacks, we don't want to unlock the spinlock - without removing lkb_cb_list; so empty lkb_cb_list is always + without removing lkb_astqueue; so empty lkb_astqueue is always consistent with empty lkb_callbacks */ - lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_cb_list); + lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue); rv = dlm_rem_lkb_callback(lkb->lkb_resource->res_ls, lkb, &cb, &resid); if (rv < 0) { /* this shouldn't happen; lkb should have been removed from list when resid was zero */ log_print("dlm_rem_lkb_callback empty %x", lkb->lkb_id); - list_del_init(&lkb->lkb_cb_list); + list_del_init(&lkb->lkb_astqueue); spin_unlock(&proc->asts_spin); /* removes ref for proc->asts, may cause lkb to be freed */ dlm_put_lkb(lkb); goto try_another; } if (!resid) - list_del_init(&lkb->lkb_cb_list); + list_del_init(&lkb->lkb_astqueue); spin_unlock(&proc->asts_spin); if (cb.flags & DLM_CB_SKIP) { diff --git a/trunk/fs/gfs2/bmap.c b/trunk/fs/gfs2/bmap.c index 42e477f31223..e65493a8ac00 100644 --- a/trunk/fs/gfs2/bmap.c +++ b/trunk/fs/gfs2/bmap.c @@ -854,7 +854,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, blen++; else { if (bstart) { - __gfs2_free_blocks(ip, bstart, blen, metadata); + if (metadata) + __gfs2_free_meta(ip, bstart, blen); + else + __gfs2_free_data(ip, bstart, blen); + btotal += blen; } @@ -866,7 +870,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, gfs2_add_inode_blocks(&ip->i_inode, -1); } if (bstart) { - __gfs2_free_blocks(ip, bstart, blen, metadata); + if (metadata) + __gfs2_free_meta(ip, bstart, blen); + else + __gfs2_free_data(ip, bstart, blen); + btotal += blen; } diff --git a/trunk/fs/gfs2/dir.c b/trunk/fs/gfs2/dir.c index 1cc2f8ec52a2..091ee4779538 100644 --- a/trunk/fs/gfs2/dir.c +++ b/trunk/fs/gfs2/dir.c @@ -339,67 +339,6 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset, return (copied) ? copied : error; } -/** - * gfs2_dir_get_hash_table - Get pointer to the dir hash table - * @ip: The inode in question - * - * Returns: The hash table or an error - */ - -static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) -{ - struct inode *inode = &ip->i_inode; - int ret; - u32 hsize; - __be64 *hc; - - BUG_ON(!(ip->i_diskflags & GFS2_DIF_EXHASH)); - - hc = ip->i_hash_cache; - if (hc) - return hc; - - hsize = 1 << ip->i_depth; - hsize *= sizeof(__be64); - if (hsize != i_size_read(&ip->i_inode)) { - gfs2_consist_inode(ip); - return ERR_PTR(-EIO); - } - - hc = kmalloc(hsize, GFP_NOFS); - ret = -ENOMEM; - if (hc == NULL) - return ERR_PTR(-ENOMEM); - - ret = gfs2_dir_read_data(ip, (char *)hc, 0, hsize, 1); - if (ret < 0) { - kfree(hc); - return ERR_PTR(ret); - } - - spin_lock(&inode->i_lock); - if (ip->i_hash_cache) - kfree(hc); - else - ip->i_hash_cache = hc; - spin_unlock(&inode->i_lock); - - return ip->i_hash_cache; -} - -/** - * gfs2_dir_hash_inval - Invalidate dir hash - * @ip: The directory inode - * - * Must be called with an exclusive glock, or during glock invalidation. - */ -void gfs2_dir_hash_inval(struct gfs2_inode *ip) -{ - __be64 *hc = ip->i_hash_cache; - ip->i_hash_cache = NULL; - kfree(hc); -} - static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) { return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0; @@ -747,12 +686,17 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, static int get_leaf_nr(struct gfs2_inode *dip, u32 index, u64 *leaf_out) { - __be64 *hash; + __be64 leaf_no; + int error; + + error = gfs2_dir_read_data(dip, (char *)&leaf_no, + index * sizeof(__be64), + sizeof(__be64), 0); + if (error != sizeof(u64)) + return (error < 0) ? error : -EIO; + + *leaf_out = be64_to_cpu(leaf_no); - hash = gfs2_dir_get_hash_table(dip); - if (IS_ERR(hash)) - return PTR_ERR(hash); - *leaf_out = be64_to_cpu(*(hash + index)); return 0; } @@ -1022,8 +966,6 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) for (x = 0; x < half_len; x++) lp[x] = cpu_to_be64(bn); - gfs2_dir_hash_inval(dip); - error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64), half_len * sizeof(u64)); if (error != half_len * sizeof(u64)) { @@ -1110,54 +1052,70 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) static int dir_double_exhash(struct gfs2_inode *dip) { + struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct buffer_head *dibh; u32 hsize; - u32 hsize_bytes; - __be64 *hc; - __be64 *hc2, *h; + u64 *buf; + u64 *from, *to; + u64 block; + u64 disksize = i_size_read(&dip->i_inode); int x; int error = 0; hsize = 1 << dip->i_depth; - hsize_bytes = hsize * sizeof(__be64); + if (hsize * sizeof(u64) != disksize) { + gfs2_consist_inode(dip); + return -EIO; + } - hc = gfs2_dir_get_hash_table(dip); - if (IS_ERR(hc)) - return PTR_ERR(hc); + /* Allocate both the "from" and "to" buffers in one big chunk */ - h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS); - if (!hc2) + buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS); + if (!buf) return -ENOMEM; - error = gfs2_meta_inode_buffer(dip, &dibh); - if (error) - goto out_kfree; + for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) { + error = gfs2_dir_read_data(dip, (char *)buf, + block * sdp->sd_hash_bsize, + sdp->sd_hash_bsize, 1); + if (error != sdp->sd_hash_bsize) { + if (error >= 0) + error = -EIO; + goto fail; + } + + from = buf; + to = (u64 *)((char *)buf + sdp->sd_hash_bsize); - for (x = 0; x < hsize; x++) { - *h++ = *hc; - *h++ = *hc; - hc++; + for (x = sdp->sd_hash_ptrs; x--; from++) { + *to++ = *from; /* No endianess worries */ + *to++ = *from; + } + + error = gfs2_dir_write_data(dip, + (char *)buf + sdp->sd_hash_bsize, + block * sdp->sd_sb.sb_bsize, + sdp->sd_sb.sb_bsize); + if (error != sdp->sd_sb.sb_bsize) { + if (error >= 0) + error = -EIO; + goto fail; + } } - error = gfs2_dir_write_data(dip, (char *)hc2, 0, hsize_bytes * 2); - if (error != (hsize_bytes * 2)) - goto fail; + kfree(buf); - gfs2_dir_hash_inval(dip); - dip->i_hash_cache = hc2; - dip->i_depth++; - gfs2_dinode_out(dip, dibh->b_data); - brelse(dibh); - return 0; + error = gfs2_meta_inode_buffer(dip, &dibh); + if (!gfs2_assert_withdraw(sdp, !error)) { + dip->i_depth++; + gfs2_dinode_out(dip, dibh->b_data); + brelse(dibh); + } + + return error; fail: - /* Replace original hash table & size */ - gfs2_dir_write_data(dip, (char *)hc, 0, hsize_bytes); - i_size_write(&dip->i_inode, hsize_bytes); - gfs2_dinode_out(dip, dibh->b_data); - brelse(dibh); -out_kfree: - kfree(hc2); + kfree(buf); return error; } @@ -1390,7 +1348,6 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, return error; } - /** * dir_e_read - Reads the entries from a directory into a filldir buffer * @dip: dinode pointer @@ -1405,7 +1362,9 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, filldir_t filldir) { struct gfs2_inode *dip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); u32 hsize, len = 0; + u32 ht_offset, lp_offset, ht_offset_cur = -1; u32 hash, index; __be64 *lp; int copied = 0; @@ -1413,17 +1372,37 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, unsigned depth = 0; hsize = 1 << dip->i_depth; + if (hsize * sizeof(u64) != i_size_read(inode)) { + gfs2_consist_inode(dip); + return -EIO; + } + hash = gfs2_dir_offset2hash(*offset); index = hash >> (32 - dip->i_depth); - lp = gfs2_dir_get_hash_table(dip); - if (IS_ERR(lp)) - return PTR_ERR(lp); + lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); + if (!lp) + return -ENOMEM; while (index < hsize) { + lp_offset = index & (sdp->sd_hash_ptrs - 1); + ht_offset = index - lp_offset; + + if (ht_offset_cur != ht_offset) { + error = gfs2_dir_read_data(dip, (char *)lp, + ht_offset * sizeof(__be64), + sdp->sd_hash_bsize, 1); + if (error != sdp->sd_hash_bsize) { + if (error >= 0) + error = -EIO; + goto out; + } + ht_offset_cur = ht_offset; + } + error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, &copied, &depth, - be64_to_cpu(lp[index])); + be64_to_cpu(lp[lp_offset])); if (error) break; @@ -1431,6 +1410,8 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, index = (index & ~(len - 1)) + len; } +out: + kfree(lp); if (error > 0) error = 0; return error; @@ -1933,22 +1914,43 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) { + struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct buffer_head *bh; struct gfs2_leaf *leaf; u32 hsize, len; + u32 ht_offset, lp_offset, ht_offset_cur = -1; u32 index = 0, next_index; __be64 *lp; u64 leaf_no; int error = 0, last; hsize = 1 << dip->i_depth; + if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) { + gfs2_consist_inode(dip); + return -EIO; + } - lp = gfs2_dir_get_hash_table(dip); - if (IS_ERR(lp)) - return PTR_ERR(lp); + lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); + if (!lp) + return -ENOMEM; while (index < hsize) { - leaf_no = be64_to_cpu(lp[index]); + lp_offset = index & (sdp->sd_hash_ptrs - 1); + ht_offset = index - lp_offset; + + if (ht_offset_cur != ht_offset) { + error = gfs2_dir_read_data(dip, (char *)lp, + ht_offset * sizeof(__be64), + sdp->sd_hash_bsize, 1); + if (error != sdp->sd_hash_bsize) { + if (error >= 0) + error = -EIO; + goto out; + } + ht_offset_cur = ht_offset; + } + + leaf_no = be64_to_cpu(lp[lp_offset]); if (leaf_no) { error = get_leaf(dip, leaf_no, &bh); if (error) @@ -1974,6 +1976,7 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) } out: + kfree(lp); return error; } diff --git a/trunk/fs/gfs2/dir.h b/trunk/fs/gfs2/dir.h index ff5772fbf024..e686af11becd 100644 --- a/trunk/fs/gfs2/dir.h +++ b/trunk/fs/gfs2/dir.h @@ -35,7 +35,6 @@ extern int gfs2_diradd_alloc_required(struct inode *dir, const struct qstr *filename); extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, struct buffer_head **bhp); -extern void gfs2_dir_hash_inval(struct gfs2_inode *ip); static inline u32 gfs2_disk_hash(const char *data, int len) { diff --git a/trunk/fs/gfs2/file.c b/trunk/fs/gfs2/file.c index bc2590ef5fc1..a9f5cbe45cd9 100644 --- a/trunk/fs/gfs2/file.c +++ b/trunk/fs/gfs2/file.c @@ -174,9 +174,7 @@ void gfs2_set_inode_flags(struct inode *inode) struct gfs2_inode *ip = GFS2_I(inode); unsigned int flags = inode->i_flags; - flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC); - if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode)) - inode->i_flags |= S_NOSEC; + flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); if (ip->i_diskflags & GFS2_DIF_IMMUTABLE) flags |= S_IMMUTABLE; if (ip->i_diskflags & GFS2_DIF_APPENDONLY) diff --git a/trunk/fs/gfs2/glock.c b/trunk/fs/gfs2/glock.c index 88e8a23d0026..1c1336e7b3b2 100644 --- a/trunk/fs/gfs2/glock.c +++ b/trunk/fs/gfs2/glock.c @@ -409,10 +409,6 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) if (held1 && held2 && list_empty(&gl->gl_holders)) clear_bit(GLF_QUEUED, &gl->gl_flags); - if (new_state != gl->gl_target) - /* shorten our minimum hold time */ - gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR, - GL_GLOCK_MIN_HOLD); gl->gl_state = new_state; gl->gl_tchange = jiffies; } @@ -672,7 +668,7 @@ static void glock_work_func(struct work_struct *work) gl->gl_demote_state != LM_ST_EXCLUSIVE) { unsigned long holdtime, now = jiffies; - holdtime = gl->gl_tchange + gl->gl_hold_time; + holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; if (time_before(now, holdtime)) delay = holdtime - now; @@ -683,14 +679,9 @@ static void glock_work_func(struct work_struct *work) } run_queue(gl, 0); spin_unlock(&gl->gl_spin); - if (!delay) + if (!delay || + queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); - else { - if (gl->gl_name.ln_type != LM_TYPE_INODE) - delay = 0; - if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) - gfs2_glock_put(gl); - } if (drop_ref) gfs2_glock_put(gl); } @@ -752,7 +743,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_tchange = jiffies; gl->gl_object = NULL; gl->gl_sbd = sdp; - gl->gl_hold_time = GL_GLOCK_DFT_HOLD; INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); INIT_WORK(&gl->gl_delete, delete_work_func); @@ -865,15 +855,8 @@ static int gfs2_glock_demote_wait(void *word) static void wait_on_holder(struct gfs2_holder *gh) { - unsigned long time1 = jiffies; - might_sleep(); wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE); - if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */ - /* Lengthen the minimum hold time. */ - gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + - GL_GLOCK_HOLD_INCR, - GL_GLOCK_MAX_HOLD); } static void wait_on_demote(struct gfs2_glock *gl) @@ -1110,9 +1093,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh) gfs2_glock_hold(gl); if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && - !test_bit(GLF_DEMOTE, &gl->gl_flags) && - gl->gl_name.ln_type == LM_TYPE_INODE) - delay = gl->gl_hold_time; + !test_bit(GLF_DEMOTE, &gl->gl_flags)) + delay = gl->gl_ops->go_min_hold_time; if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); } @@ -1291,13 +1273,12 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) unsigned long now = jiffies; gfs2_glock_hold(gl); - holdtime = gl->gl_tchange + gl->gl_hold_time; - if (test_bit(GLF_QUEUED, &gl->gl_flags) && - gl->gl_name.ln_type == LM_TYPE_INODE) { + holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; + if (test_bit(GLF_QUEUED, &gl->gl_flags)) { if (time_before(now, holdtime)) delay = holdtime - now; if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) - delay = gl->gl_hold_time; + delay = gl->gl_ops->go_min_hold_time; } spin_lock(&gl->gl_spin); @@ -1686,7 +1667,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) dtime *= 1000000/HZ; /* demote time in uSec */ if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) dtime = 0; - gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n", + gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n", state2str(gl->gl_state), gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, @@ -1695,7 +1676,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) state2str(gl->gl_demote_state), dtime, atomic_read(&gl->gl_ail_count), atomic_read(&gl->gl_revokes), - atomic_read(&gl->gl_ref), gl->gl_hold_time); + atomic_read(&gl->gl_ref)); list_for_each_entry(gh, &gl->gl_holders, gh_list) { error = dump_holder(seq, gh); diff --git a/trunk/fs/gfs2/glock.h b/trunk/fs/gfs2/glock.h index 66707118af25..6b2f757b9281 100644 --- a/trunk/fs/gfs2/glock.h +++ b/trunk/fs/gfs2/glock.h @@ -113,12 +113,6 @@ enum { #define GLR_TRYFAILED 13 -#define GL_GLOCK_MAX_HOLD (long)(HZ / 5) -#define GL_GLOCK_DFT_HOLD (long)(HZ / 5) -#define GL_GLOCK_MIN_HOLD (long)(10) -#define GL_GLOCK_HOLD_INCR (long)(HZ / 20) -#define GL_GLOCK_HOLD_DECR (long)(HZ / 40) - struct lm_lockops { const char *lm_proto_name; int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); diff --git a/trunk/fs/gfs2/glops.c b/trunk/fs/gfs2/glops.c index da21ecaafcc2..2cca29316bd6 100644 --- a/trunk/fs/gfs2/glops.c +++ b/trunk/fs/gfs2/glops.c @@ -26,7 +26,6 @@ #include "rgrp.h" #include "util.h" #include "trans.h" -#include "dir.h" /** * __gfs2_ail_flush - remove all buffers for a given lock from the AIL @@ -219,7 +218,6 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) if (ip) { set_bit(GIF_INVALID, &ip->i_flags); forget_all_cached_acls(&ip->i_inode); - gfs2_dir_hash_inval(ip); } } @@ -318,8 +316,6 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_generation = be64_to_cpu(str->di_generation); ip->i_diskflags = be32_to_cpu(str->di_flags); - ip->i_eattr = be64_to_cpu(str->di_eattr); - /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */ gfs2_set_inode_flags(&ip->i_inode); height = be16_to_cpu(str->di_height); if (unlikely(height > GFS2_MAX_META_HEIGHT)) @@ -332,6 +328,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_depth = (u8)depth; ip->i_entries = be32_to_cpu(str->di_entries); + ip->i_eattr = be64_to_cpu(str->di_eattr); if (S_ISREG(ip->i_inode.i_mode)) gfs2_set_aops(&ip->i_inode); @@ -552,6 +549,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_lock = inode_go_lock, .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, + .go_min_hold_time = HZ / 5, .go_flags = GLOF_ASPACE, }; @@ -562,6 +560,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_unlock = rgrp_go_unlock, .go_dump = gfs2_rgrp_dump, .go_type = LM_TYPE_RGRP, + .go_min_hold_time = HZ / 5, .go_flags = GLOF_ASPACE, }; diff --git a/trunk/fs/gfs2/incore.h b/trunk/fs/gfs2/incore.h index 892ac37de8ae..81206e70cbf6 100644 --- a/trunk/fs/gfs2/incore.h +++ b/trunk/fs/gfs2/incore.h @@ -163,6 +163,7 @@ struct gfs2_glock_operations { int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); void (*go_callback) (struct gfs2_glock *gl); const int go_type; + const unsigned long go_min_hold_time; const unsigned long go_flags; #define GLOF_ASPACE 1 }; @@ -220,7 +221,6 @@ struct gfs2_glock { unsigned int gl_hash; unsigned long gl_demote_time; /* time of first demote request */ - long gl_hold_time; struct list_head gl_holders; const struct gfs2_glock_operations *gl_ops; @@ -285,7 +285,6 @@ struct gfs2_inode { u64 i_goal; /* goal block for allocations */ struct rw_semaphore i_rw_mutex; struct list_head i_trunc_list; - __be64 *i_hash_cache; u32 i_entries; u32 i_diskflags; u8 i_height; diff --git a/trunk/fs/gfs2/main.c b/trunk/fs/gfs2/main.c index 29e1ace7953d..c2b34cd2abe0 100644 --- a/trunk/fs/gfs2/main.c +++ b/trunk/fs/gfs2/main.c @@ -41,7 +41,6 @@ static void gfs2_init_inode_once(void *foo) init_rwsem(&ip->i_rw_mutex); INIT_LIST_HEAD(&ip->i_trunc_list); ip->i_alloc = NULL; - ip->i_hash_cache = NULL; } static void gfs2_init_glock_once(void *foo) diff --git a/trunk/fs/gfs2/ops_fstype.c b/trunk/fs/gfs2/ops_fstype.c index 516516e0c2a2..2a77071fb7b6 100644 --- a/trunk/fs/gfs2/ops_fstype.c +++ b/trunk/fs/gfs2/ops_fstype.c @@ -1094,7 +1094,6 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (sdp->sd_args.ar_nobarrier) set_bit(SDF_NOBARRIERS, &sdp->sd_flags); - sb->s_flags |= MS_NOSEC; sb->s_magic = GFS2_MAGIC; sb->s_op = &gfs2_super_ops; sb->s_d_op = &gfs2_dops; diff --git a/trunk/fs/gfs2/rgrp.c b/trunk/fs/gfs2/rgrp.c index 7f8af1eb02de..9b780df3fd54 100644 --- a/trunk/fs/gfs2/rgrp.c +++ b/trunk/fs/gfs2/rgrp.c @@ -1607,15 +1607,14 @@ int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation) } /** - * __gfs2_free_blocks - free a contiguous run of block(s) + * gfs2_free_data - free a contiguous run of data block(s) * @ip: the inode these blocks are being freed from * @bstart: first block of a run of contiguous blocks * @blen: the length of the block run - * @meta: 1 if the blocks represent metadata * */ -void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) +void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd; @@ -1632,10 +1631,53 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) gfs2_trans_add_rg(rgd); /* Directories keep their data in the metadata address space */ - if (meta || ip->i_depth) + if (ip->i_depth) gfs2_meta_wipe(ip, bstart, blen); } +/** + * gfs2_free_data - free a contiguous run of data block(s) + * @ip: the inode these blocks are being freed from + * @bstart: first block of a run of contiguous blocks + * @blen: the length of the block run + * + */ + +void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + + __gfs2_free_data(ip, bstart, blen); + gfs2_statfs_change(sdp, 0, +blen, 0); + gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); +} + +/** + * gfs2_free_meta - free a contiguous run of data block(s) + * @ip: the inode these blocks are being freed from + * @bstart: first block of a run of contiguous blocks + * @blen: the length of the block run + * + */ + +void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct gfs2_rgrpd *rgd; + + rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); + if (!rgd) + return; + trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); + rgd->rd_free += blen; + + gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); + gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + + gfs2_trans_add_rg(rgd); + gfs2_meta_wipe(ip, bstart, blen); +} + /** * gfs2_free_meta - free a contiguous run of data block(s) * @ip: the inode these blocks are being freed from @@ -1648,7 +1690,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - __gfs2_free_blocks(ip, bstart, blen, 1); + __gfs2_free_meta(ip, bstart, blen); gfs2_statfs_change(sdp, 0, +blen, 0); gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); } diff --git a/trunk/fs/gfs2/rgrp.h b/trunk/fs/gfs2/rgrp.h index d253f9a8c70e..a80e3034ac47 100644 --- a/trunk/fs/gfs2/rgrp.h +++ b/trunk/fs/gfs2/rgrp.h @@ -52,7 +52,9 @@ extern int gfs2_ri_update(struct gfs2_inode *ip); extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); -extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); +extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); +extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); +extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); extern void gfs2_unlink_di(struct inode *inode); diff --git a/trunk/fs/gfs2/super.c b/trunk/fs/gfs2/super.c index b7beadd9ba4c..fb0edf735483 100644 --- a/trunk/fs/gfs2/super.c +++ b/trunk/fs/gfs2/super.c @@ -1533,7 +1533,7 @@ static void gfs2_evict_inode(struct inode *inode) /* Case 3 starts here */ truncate_inode_pages(&inode->i_data, 0); end_writeback(inode); - gfs2_dir_hash_inval(ip); + ip->i_gl->gl_object = NULL; gfs2_glock_add_to_lru(ip->i_gl); gfs2_glock_put(ip->i_gl); diff --git a/trunk/fs/hfsplus/brec.c b/trunk/fs/hfsplus/brec.c index 2a734cfccc92..2312de34bd42 100644 --- a/trunk/fs/hfsplus/brec.c +++ b/trunk/fs/hfsplus/brec.c @@ -43,10 +43,6 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) node->tree->node_size - (rec + 1) * 2); if (!recoff) return 0; - if (recoff > node->tree->node_size - 2) { - printk(KERN_ERR "hfs: recoff %d too large\n", recoff); - return 0; - } retval = hfs_bnode_read_u16(node, recoff) + 2; if (retval > node->tree->max_key_len + 2) { diff --git a/trunk/fs/hfsplus/catalog.c b/trunk/fs/hfsplus/catalog.c index 4dfbfec357e8..b4ba1b319333 100644 --- a/trunk/fs/hfsplus/catalog.c +++ b/trunk/fs/hfsplus/catalog.c @@ -212,9 +212,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); - err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); - if (err) - return err; + hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); entry_size = hfsplus_fill_cat_thread(sb, &entry, @@ -271,9 +269,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); - err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); - if (err) - return err; + hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); if (!str) { int len; @@ -351,14 +347,12 @@ int hfsplus_rename_cat(u32 cnid, struct hfs_find_data src_fd, dst_fd; hfsplus_cat_entry entry; int entry_size, type; - int err; + int err = 0; dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, dst_dir->i_ino, dst_name->name); - err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); - if (err) - return err; + hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); dst_fd = src_fd; /* find the old dir entry and read the data */ diff --git a/trunk/fs/hfsplus/dir.c b/trunk/fs/hfsplus/dir.c index 25b2443a004c..4df5059c25da 100644 --- a/trunk/fs/hfsplus/dir.c +++ b/trunk/fs/hfsplus/dir.c @@ -38,9 +38,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, sb = dir->i_sb; dentry->d_fsdata = NULL; - err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); - if (err) - return ERR_PTR(err); + hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); again: err = hfs_brec_read(&fd, &entry, sizeof(entry)); @@ -134,9 +132,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) if (filp->f_pos >= inode->i_size) return 0; - err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); - if (err) - return err; + hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); err = hfs_brec_find(&fd); if (err) diff --git a/trunk/fs/hfsplus/extents.c b/trunk/fs/hfsplus/extents.c index 5849e3ef35cc..b1991a2a08e0 100644 --- a/trunk/fs/hfsplus/extents.c +++ b/trunk/fs/hfsplus/extents.c @@ -119,31 +119,22 @@ static void __hfsplus_ext_write_extent(struct inode *inode, set_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags); } -static int hfsplus_ext_write_extent_locked(struct inode *inode) +static void hfsplus_ext_write_extent_locked(struct inode *inode) { - int res; - if (HFSPLUS_I(inode)->extent_state & HFSPLUS_EXT_DIRTY) { struct hfs_find_data fd; - res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); - if (res) - return res; + hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); __hfsplus_ext_write_extent(inode, &fd); hfs_find_exit(&fd); } - return 0; } -int hfsplus_ext_write_extent(struct inode *inode) +void hfsplus_ext_write_extent(struct inode *inode) { - int res; - mutex_lock(&HFSPLUS_I(inode)->extents_lock); - res = hfsplus_ext_write_extent_locked(inode); + hfsplus_ext_write_extent_locked(inode); mutex_unlock(&HFSPLUS_I(inode)->extents_lock); - - return res; } static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, @@ -203,11 +194,9 @@ static int hfsplus_ext_read_extent(struct inode *inode, u32 block) block < hip->cached_start + hip->cached_blocks) return 0; - res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); - if (!res) { - res = __hfsplus_ext_cache_extent(&fd, inode, block); - hfs_find_exit(&fd); - } + hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); + res = __hfsplus_ext_cache_extent(&fd, inode, block); + hfs_find_exit(&fd); return res; } @@ -220,7 +209,6 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, struct hfsplus_inode_info *hip = HFSPLUS_I(inode); int res = -EIO; u32 ablock, dblock, mask; - sector_t sector; int was_dirty = 0; int shift; @@ -267,12 +255,10 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, done: dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); - mask = (1 << sbi->fs_shift) - 1; - sector = ((sector_t)dblock << sbi->fs_shift) + - sbi->blockoffset + (iblock & mask); - map_bh(bh_result, sb, sector); - + map_bh(bh_result, sb, + (dblock << sbi->fs_shift) + sbi->blockoffset + + (iblock & mask)); if (create) { set_buffer_new(bh_result); hip->phys_size += sb->s_blocksize; @@ -385,9 +371,7 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, if (total_blocks == blocks) return 0; - res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); - if (res) - return res; + hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); do { res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid, total_blocks, type); @@ -485,9 +469,7 @@ int hfsplus_file_extend(struct inode *inode) insert_extent: dprint(DBG_EXTENT, "insert new extent\n"); - res = hfsplus_ext_write_extent_locked(inode); - if (res) - goto out; + hfsplus_ext_write_extent_locked(inode); memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); hip->cached_extents[0].start_block = cpu_to_be32(start); @@ -518,6 +500,7 @@ void hfsplus_file_truncate(struct inode *inode) struct page *page; void *fsdata; u32 size = inode->i_size; + int res; res = pagecache_write_begin(NULL, mapping, size, 0, AOP_FLAG_UNINTERRUPTIBLE, @@ -540,12 +523,7 @@ void hfsplus_file_truncate(struct inode *inode) goto out; mutex_lock(&hip->extents_lock); - res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); - if (res) { - mutex_unlock(&hip->extents_lock); - /* XXX: We lack error handling of hfsplus_file_truncate() */ - return; - } + hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); while (1) { if (alloc_cnt == hip->first_blocks) { hfsplus_free_extents(sb, hip->first_extents, diff --git a/trunk/fs/hfsplus/hfsplus_fs.h b/trunk/fs/hfsplus/hfsplus_fs.h index 81dfd1e495e3..d6857523336d 100644 --- a/trunk/fs/hfsplus/hfsplus_fs.h +++ b/trunk/fs/hfsplus/hfsplus_fs.h @@ -13,7 +13,6 @@ #include #include #include -#include #include "hfsplus_raw.h" #define DBG_BNODE_REFS 0x00000001 @@ -111,9 +110,7 @@ struct hfsplus_vh; struct hfs_btree; struct hfsplus_sb_info { - void *s_vhdr_buf; struct hfsplus_vh *s_vhdr; - void *s_backup_vhdr_buf; struct hfsplus_vh *s_backup_vhdr; struct hfs_btree *ext_tree; struct hfs_btree *cat_tree; @@ -261,15 +258,6 @@ struct hfsplus_readdir_data { struct hfsplus_cat_key key; }; -/* - * Find minimum acceptible I/O size for an hfsplus sb. - */ -static inline unsigned short hfsplus_min_io_size(struct super_block *sb) -{ - return max_t(unsigned short, bdev_logical_block_size(sb->s_bdev), - HFSPLUS_SECTOR_SIZE); -} - #define hfs_btree_open hfsplus_btree_open #define hfs_btree_close hfsplus_btree_close #define hfs_btree_write hfsplus_btree_write @@ -386,7 +374,7 @@ extern const struct file_operations hfsplus_dir_operations; /* extents.c */ int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); -int hfsplus_ext_write_extent(struct inode *); +void hfsplus_ext_write_extent(struct inode *); int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int); int hfsplus_free_fork(struct super_block *, u32, struct hfsplus_fork_raw *, int); @@ -448,8 +436,8 @@ int hfsplus_compare_dentry(const struct dentry *parent, /* wrapper.c */ int hfsplus_read_wrapper(struct super_block *); int hfs_part_find(struct super_block *, sector_t *, sector_t *); -int hfsplus_submit_bio(struct super_block *sb, sector_t sector, - void *buf, void **data, int rw); +int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, + void *data, int rw); /* time macros */ #define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U) diff --git a/trunk/fs/hfsplus/inode.c b/trunk/fs/hfsplus/inode.c index 010cd363d085..b248a6cfcad9 100644 --- a/trunk/fs/hfsplus/inode.c +++ b/trunk/fs/hfsplus/inode.c @@ -195,13 +195,11 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, hip->flags = 0; set_bit(HFSPLUS_I_RSRC, &hip->flags); - err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); - if (!err) { - err = hfsplus_find_cat(sb, dir->i_ino, &fd); - if (!err) - err = hfsplus_cat_read_inode(inode, &fd); - hfs_find_exit(&fd); - } + hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + err = hfsplus_find_cat(sb, dir->i_ino, &fd); + if (!err) + err = hfsplus_cat_read_inode(inode, &fd); + hfs_find_exit(&fd); if (err) { iput(inode); return ERR_PTR(err); diff --git a/trunk/fs/hfsplus/part_tbl.c b/trunk/fs/hfsplus/part_tbl.c index eb355d81e279..40ad88c12c64 100644 --- a/trunk/fs/hfsplus/part_tbl.c +++ b/trunk/fs/hfsplus/part_tbl.c @@ -88,12 +88,11 @@ static int hfs_parse_old_pmap(struct super_block *sb, struct old_pmap *pm, return -ENOENT; } -static int hfs_parse_new_pmap(struct super_block *sb, void *buf, - struct new_pmap *pm, sector_t *part_start, sector_t *part_size) +static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm, + sector_t *part_start, sector_t *part_size) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); int size = be32_to_cpu(pm->pmMapBlkCnt); - int buf_size = hfsplus_min_io_size(sb); int res; int i = 0; @@ -108,14 +107,11 @@ static int hfs_parse_new_pmap(struct super_block *sb, void *buf, if (++i >= size) return -ENOENT; - pm = (struct new_pmap *)((u8 *)pm + HFSPLUS_SECTOR_SIZE); - if ((u8 *)pm - (u8 *)buf >= buf_size) { - res = hfsplus_submit_bio(sb, - *part_start + HFS_PMAP_BLK + i, - buf, (void **)&pm, READ); - if (res) - return res; - } + res = hfsplus_submit_bio(sb->s_bdev, + *part_start + HFS_PMAP_BLK + i, + pm, READ); + if (res) + return res; } while (pm->pmSig == cpu_to_be16(HFS_NEW_PMAP_MAGIC)); return -ENOENT; @@ -128,15 +124,15 @@ static int hfs_parse_new_pmap(struct super_block *sb, void *buf, int hfs_part_find(struct super_block *sb, sector_t *part_start, sector_t *part_size) { - void *buf, *data; + void *data; int res; - buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); - if (!buf) + data = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); + if (!data) return -ENOMEM; - res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK, - buf, &data, READ); + res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK, + data, READ); if (res) goto out; @@ -145,13 +141,13 @@ int hfs_part_find(struct super_block *sb, res = hfs_parse_old_pmap(sb, data, part_start, part_size); break; case HFS_NEW_PMAP_MAGIC: - res = hfs_parse_new_pmap(sb, buf, data, part_start, part_size); + res = hfs_parse_new_pmap(sb, data, part_start, part_size); break; default: res = -ENOENT; break; } out: - kfree(buf); + kfree(data); return res; } diff --git a/trunk/fs/hfsplus/super.c b/trunk/fs/hfsplus/super.c index c106ca22e812..84a47b709f51 100644 --- a/trunk/fs/hfsplus/super.c +++ b/trunk/fs/hfsplus/super.c @@ -73,13 +73,11 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || inode->i_ino == HFSPLUS_ROOT_CNID) { - err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); - if (!err) { - err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); - if (!err) - err = hfsplus_cat_read_inode(inode, &fd); - hfs_find_exit(&fd); - } + hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); + err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); + if (!err) + err = hfsplus_cat_read_inode(inode, &fd); + hfs_find_exit(&fd); } else { err = hfsplus_system_read_inode(inode); } @@ -135,13 +133,9 @@ static int hfsplus_system_write_inode(struct inode *inode) static int hfsplus_write_inode(struct inode *inode, struct writeback_control *wbc) { - int err; - dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); - err = hfsplus_ext_write_extent(inode); - if (err) - return err; + hfsplus_ext_write_extent(inode); if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || inode->i_ino == HFSPLUS_ROOT_CNID) @@ -203,17 +197,17 @@ int hfsplus_sync_fs(struct super_block *sb, int wait) write_backup = 1; } - error2 = hfsplus_submit_bio(sb, + error2 = hfsplus_submit_bio(sb->s_bdev, sbi->part_start + HFSPLUS_VOLHEAD_SECTOR, - sbi->s_vhdr_buf, NULL, WRITE_SYNC); + sbi->s_vhdr, WRITE_SYNC); if (!error) error = error2; if (!write_backup) goto out; - error2 = hfsplus_submit_bio(sb, + error2 = hfsplus_submit_bio(sb->s_bdev, sbi->part_start + sbi->sect_count - 2, - sbi->s_backup_vhdr_buf, NULL, WRITE_SYNC); + sbi->s_backup_vhdr, WRITE_SYNC); if (!error) error2 = error; out: @@ -257,8 +251,8 @@ static void hfsplus_put_super(struct super_block *sb) hfs_btree_close(sbi->ext_tree); iput(sbi->alloc_file); iput(sbi->hidden_dir); - kfree(sbi->s_vhdr_buf); - kfree(sbi->s_backup_vhdr_buf); + kfree(sbi->s_vhdr); + kfree(sbi->s_backup_vhdr); unload_nls(sbi->nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; @@ -399,13 +393,6 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (!sbi->rsrc_clump_blocks) sbi->rsrc_clump_blocks = 1; - err = generic_check_addressable(sbi->alloc_blksz_shift, - sbi->total_blocks); - if (err) { - printk(KERN_ERR "hfs: filesystem size too large.\n"); - goto out_free_vhdr; - } - /* Set up operations so we can load metadata */ sb->s_op = &hfsplus_sops; sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -430,8 +417,6 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags |= MS_RDONLY; } - err = -EINVAL; - /* Load metadata objects (B*Trees) */ sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); if (!sbi->ext_tree) { @@ -462,9 +447,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; str.name = HFSP_HIDDENDIR_NAME; - err = hfs_find_init(sbi->cat_tree, &fd); - if (err) - goto out_put_root; + hfs_find_init(sbi->cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { hfs_find_exit(&fd); diff --git a/trunk/fs/hfsplus/unicode.c b/trunk/fs/hfsplus/unicode.c index a32998f29f0b..a3f0bfcc881e 100644 --- a/trunk/fs/hfsplus/unicode.c +++ b/trunk/fs/hfsplus/unicode.c @@ -142,11 +142,7 @@ int hfsplus_uni2asc(struct super_block *sb, /* search for single decomposed char */ if (likely(compose)) ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0); - if (ce1) - cc = ce1[0]; - else - cc = 0; - if (cc) { + if (ce1 && (cc = ce1[0])) { /* start of a possibly decomposed Hangul char */ if (cc != 0xffff) goto done; @@ -213,8 +209,7 @@ int hfsplus_uni2asc(struct super_block *sb, i++; ce2 = ce1; } - cc = ce2[0]; - if (cc) { + if ((cc = ce2[0])) { ip += i; ustrlen -= i; goto done; @@ -306,11 +301,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { size = asc2unichar(sb, astr, len, &c); - if (decompose) - dstr = decompose_unichar(c, &dsize); - else - dstr = NULL; - if (dstr) { + if (decompose && (dstr = decompose_unichar(c, &dsize))) { if (outlen + dsize > HFSPLUS_MAX_STRLEN) break; do { @@ -355,23 +346,15 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, astr += size; len -= size; - if (decompose) - dstr = decompose_unichar(c, &dsize); - else - dstr = NULL; - if (dstr) { + if (decompose && (dstr = decompose_unichar(c, &dsize))) { do { c2 = *dstr++; - if (casefold) - c2 = case_fold(c2); - if (!casefold || c2) + if (!casefold || (c2 = case_fold(c2))) hash = partial_name_hash(c2, hash); } while (--dsize > 0); } else { c2 = c; - if (casefold) - c2 = case_fold(c2); - if (!casefold || c2) + if (!casefold || (c2 = case_fold(c2))) hash = partial_name_hash(c2, hash); } } @@ -439,14 +422,12 @@ int hfsplus_compare_dentry(const struct dentry *parent, c1 = *dstr1; c2 = *dstr2; if (casefold) { - c1 = case_fold(c1); - if (!c1) { + if (!(c1 = case_fold(c1))) { dstr1++; dsize1--; continue; } - c2 = case_fold(c2); - if (!c2) { + if (!(c2 = case_fold(c2))) { dstr2++; dsize2--; continue; diff --git a/trunk/fs/hfsplus/wrapper.c b/trunk/fs/hfsplus/wrapper.c index 10e515a0d452..4ac88ff79aa6 100644 --- a/trunk/fs/hfsplus/wrapper.c +++ b/trunk/fs/hfsplus/wrapper.c @@ -31,67 +31,25 @@ static void hfsplus_end_io_sync(struct bio *bio, int err) complete(bio->bi_private); } -/* - * hfsplus_submit_bio - Perfrom block I/O - * @sb: super block of volume for I/O - * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes - * @buf: buffer for I/O - * @data: output pointer for location of requested data - * @rw: direction of I/O - * - * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than - * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads - * @data will return a pointer to the start of the requested sector, - * which may not be the same location as @buf. - * - * If @sector is not aligned to the bdev logical block size it will - * be rounded down. For writes this means that @buf should contain data - * that starts at the rounded-down address. As long as the data was - * read using hfsplus_submit_bio() and the same buffer is used things - * will work correctly. - */ -int hfsplus_submit_bio(struct super_block *sb, sector_t sector, - void *buf, void **data, int rw) +int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, + void *data, int rw) { DECLARE_COMPLETION_ONSTACK(wait); struct bio *bio; int ret = 0; - unsigned int io_size; - loff_t start; - int offset; - - /* - * Align sector to hardware sector size and find offset. We - * assume that io_size is a power of two, which _should_ - * be true. - */ - io_size = hfsplus_min_io_size(sb); - start = (loff_t)sector << HFSPLUS_SECTOR_SHIFT; - offset = start & (io_size - 1); - sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1); bio = bio_alloc(GFP_NOIO, 1); bio->bi_sector = sector; - bio->bi_bdev = sb->s_bdev; + bio->bi_bdev = bdev; bio->bi_end_io = hfsplus_end_io_sync; bio->bi_private = &wait; - if (!(rw & WRITE) && data) - *data = (u8 *)buf + offset; - - while (io_size > 0) { - unsigned int page_offset = offset_in_page(buf); - unsigned int len = min_t(unsigned int, PAGE_SIZE - page_offset, - io_size); - - ret = bio_add_page(bio, virt_to_page(buf), len, page_offset); - if (ret != len) { - ret = -EIO; - goto out; - } - io_size -= len; - buf = (u8 *)buf + len; - } + /* + * We always submit one sector at a time, so bio_add_page must not fail. + */ + if (bio_add_page(bio, virt_to_page(data), HFSPLUS_SECTOR_SIZE, + offset_in_page(data)) != HFSPLUS_SECTOR_SIZE) + BUG(); submit_bio(rw, bio); wait_for_completion(&wait); @@ -99,9 +57,8 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, if (!bio_flagged(bio, BIO_UPTODATE)) ret = -EIO; -out: bio_put(bio); - return ret < 0 ? ret : 0; + return ret; } static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) @@ -184,19 +141,23 @@ int hfsplus_read_wrapper(struct super_block *sb) if (hfsplus_get_last_session(sb, &part_start, &part_size)) goto out; + if ((u64)part_start + part_size > 0x100000000ULL) { + pr_err("hfs: volumes larger than 2TB are not supported yet\n"); + goto out; + } error = -ENOMEM; - sbi->s_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); - if (!sbi->s_vhdr_buf) + sbi->s_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); + if (!sbi->s_vhdr) goto out; - sbi->s_backup_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); - if (!sbi->s_backup_vhdr_buf) + sbi->s_backup_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); + if (!sbi->s_backup_vhdr) goto out_free_vhdr; reread: - error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, - sbi->s_vhdr_buf, (void **)&sbi->s_vhdr, - READ); + error = hfsplus_submit_bio(sb->s_bdev, + part_start + HFSPLUS_VOLHEAD_SECTOR, + sbi->s_vhdr, READ); if (error) goto out_free_backup_vhdr; @@ -211,9 +172,8 @@ int hfsplus_read_wrapper(struct super_block *sb) if (!hfsplus_read_mdb(sbi->s_vhdr, &wd)) goto out_free_backup_vhdr; wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; - part_start += (sector_t)wd.ablk_start + - (sector_t)wd.embed_start * wd.ablk_size; - part_size = (sector_t)wd.embed_count * wd.ablk_size; + part_start += wd.ablk_start + wd.embed_start * wd.ablk_size; + part_size = wd.embed_count * wd.ablk_size; goto reread; default: /* @@ -226,9 +186,9 @@ int hfsplus_read_wrapper(struct super_block *sb) goto reread; } - error = hfsplus_submit_bio(sb, part_start + part_size - 2, - sbi->s_backup_vhdr_buf, - (void **)&sbi->s_backup_vhdr, READ); + error = hfsplus_submit_bio(sb->s_bdev, + part_start + part_size - 2, + sbi->s_backup_vhdr, READ); if (error) goto out_free_backup_vhdr; diff --git a/trunk/fs/ubifs/commit.c b/trunk/fs/ubifs/commit.c index fb3b5c813a30..87cd0ead8633 100644 --- a/trunk/fs/ubifs/commit.c +++ b/trunk/fs/ubifs/commit.c @@ -78,7 +78,7 @@ static int nothing_to_commit(struct ubifs_info *c) * If the root TNC node is dirty, we definitely have something to * commit. */ - if (c->zroot.znode && ubifs_zn_dirty(c->zroot.znode)) + if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags)) return 0; /* @@ -418,7 +418,7 @@ int ubifs_run_commit(struct ubifs_info *c) spin_lock(&c->cs_lock); if (c->cmt_state == COMMIT_BROKEN) { - err = -EROFS; + err = -EINVAL; goto out; } @@ -444,7 +444,7 @@ int ubifs_run_commit(struct ubifs_info *c) * re-check it. */ if (c->cmt_state == COMMIT_BROKEN) { - err = -EROFS; + err = -EINVAL; goto out_cmt_unlock; } @@ -576,7 +576,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) struct idx_node *i; size_t sz; - if (!dbg_is_chk_index(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) return 0; INIT_LIST_HEAD(&list); diff --git a/trunk/fs/ubifs/debug.c b/trunk/fs/ubifs/debug.c index eef109a1a927..0bb2bcef0de9 100644 --- a/trunk/fs/ubifs/debug.c +++ b/trunk/fs/ubifs/debug.c @@ -27,12 +27,13 @@ * various local functions of those subsystems. */ +#define UBIFS_DBG_PRESERVE_UBI + +#include "ubifs.h" #include +#include #include #include -#include -#include -#include "ubifs.h" #ifdef CONFIG_UBIFS_FS_DEBUG @@ -41,6 +42,15 @@ DEFINE_SPINLOCK(dbg_lock); static char dbg_key_buf0[128]; static char dbg_key_buf1[128]; +unsigned int ubifs_chk_flags; +unsigned int ubifs_tst_flags; + +module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); +module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); + +MODULE_PARM_DESC(debug_chks, "Debug check flags"); +MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); + static const char *get_key_fmt(int fmt) { switch (fmt) { @@ -81,28 +91,6 @@ static const char *get_key_type(int type) } } -static const char *get_dent_type(int type) -{ - switch (type) { - case UBIFS_ITYPE_REG: - return "file"; - case UBIFS_ITYPE_DIR: - return "dir"; - case UBIFS_ITYPE_LNK: - return "symlink"; - case UBIFS_ITYPE_BLK: - return "blkdev"; - case UBIFS_ITYPE_CHR: - return "char dev"; - case UBIFS_ITYPE_FIFO: - return "fifo"; - case UBIFS_ITYPE_SOCK: - return "socket"; - default: - return "unknown/invalid type"; - } -} - static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, char *buffer) { @@ -246,13 +234,9 @@ static void dump_ch(const struct ubifs_ch *ch) printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); } -void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) +void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) { const struct ubifs_inode *ui = ubifs_inode(inode); - struct qstr nm = { .name = NULL }; - union ubifs_key key; - struct ubifs_dent_node *dent, *pdent = NULL; - int count = 2; printk(KERN_DEBUG "Dump in-memory inode:"); printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); @@ -286,32 +270,6 @@ void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); - - if (!S_ISDIR(inode->i_mode)) - return; - - printk(KERN_DEBUG "List of directory entries:\n"); - ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); - - lowest_dent_key(c, &key, inode->i_ino); - while (1) { - dent = ubifs_tnc_next_ent(c, &key, &nm); - if (IS_ERR(dent)) { - if (PTR_ERR(dent) != -ENOENT) - printk(KERN_DEBUG "error %ld\n", PTR_ERR(dent)); - break; - } - - printk(KERN_DEBUG "\t%d: %s (%s)\n", - count++, dent->name, get_dent_type(dent->type)); - - nm.name = dent->name; - nm.len = le16_to_cpu(dent->nlen); - kfree(pdent); - pdent = dent; - key_read(c, &dent->key, &key); - } - kfree(pdent); } void dbg_dump_node(const struct ubifs_info *c, const void *node) @@ -320,7 +278,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) union ubifs_key key; const struct ubifs_ch *ch = node; - if (dbg_is_tst_rcvry(c)) + if (dbg_failure_mode) return; /* If the magic is incorrect, just hexdump the first bytes */ @@ -876,7 +834,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) struct ubifs_scan_node *snod; void *buf; - if (dbg_is_tst_rcvry(c)) + if (dbg_failure_mode) return; printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", @@ -1122,7 +1080,6 @@ int dbg_check_space_info(struct ubifs_info *c) /** * dbg_check_synced_i_size - check synchronized inode size. - * @c: UBIFS file-system description object * @inode: inode to check * * If inode is clean, synchronized inode size has to be equivalent to current @@ -1130,12 +1087,12 @@ int dbg_check_space_info(struct ubifs_info *c) * has to be locked). Returns %0 if synchronized inode size if correct, and * %-EINVAL if not. */ -int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode) +int dbg_check_synced_i_size(struct inode *inode) { int err = 0; struct ubifs_inode *ui = ubifs_inode(inode); - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; if (!S_ISREG(inode->i_mode)) return 0; @@ -1168,7 +1125,7 @@ int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode) * Note, it is good idea to make sure the @dir->i_mutex is locked before * calling this function. */ -int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) +int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) { unsigned int nlink = 2; union ubifs_key key; @@ -1176,7 +1133,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) struct qstr nm = { .name = NULL }; loff_t size = UBIFS_INO_NODE_SZ; - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; if (!S_ISDIR(dir->i_mode)) @@ -1210,14 +1167,12 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) "but calculated size is %llu", dir->i_ino, (unsigned long long)i_size_read(dir), (unsigned long long)size); - dbg_dump_inode(c, dir); dump_stack(); return -EINVAL; } if (dir->i_nlink != nlink) { ubifs_err("directory inode %lu has nlink %u, but calculated " "nlink is %u", dir->i_ino, dir->i_nlink, nlink); - dbg_dump_inode(c, dir); dump_stack(); return -EINVAL; } @@ -1534,7 +1489,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra) long clean_cnt = 0, dirty_cnt = 0; int err, last; - if (!dbg_is_chk_index(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_TNC)) return 0; ubifs_assert(mutex_is_locked(&c->tnc_mutex)); @@ -1781,7 +1736,7 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) int err; long long calc = 0; - if (!dbg_is_chk_index(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ)) return 0; err = dbg_walk_index(c, NULL, add_size, &calc); @@ -2357,7 +2312,7 @@ int dbg_check_filesystem(struct ubifs_info *c) int err; struct fsck_data fsckd; - if (!dbg_is_chk_fs(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_FS)) return 0; fsckd.inodes = RB_ROOT; @@ -2392,7 +2347,7 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) struct list_head *cur; struct ubifs_scan_node *sa, *sb; - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; for (cur = head->next; cur->next != head; cur = cur->next) { @@ -2459,7 +2414,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) struct list_head *cur; struct ubifs_scan_node *sa, *sb; - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; for (cur = head->next; cur->next != head; cur = cur->next) { @@ -2536,141 +2491,214 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) return 0; } -static inline int chance(unsigned int n, unsigned int out_of) +int dbg_force_in_the_gaps(void) { - return !!((random32() % out_of) + 1 <= n); + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; + return !(random32() & 7); } -static int power_cut_emulated(struct ubifs_info *c, int lnum, int write) +/* Failure mode for recovery testing */ + +#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d)) + +struct failure_mode_info { + struct list_head list; + struct ubifs_info *c; +}; + +static LIST_HEAD(fmi_list); +static DEFINE_SPINLOCK(fmi_lock); + +static unsigned int next; + +static int simple_rand(void) { - struct ubifs_debug_info *d = c->dbg; + if (next == 0) + next = current->pid; + next = next * 1103515245 + 12345; + return (next >> 16) & 32767; +} + +static void failure_mode_init(struct ubifs_info *c) +{ + struct failure_mode_info *fmi; - ubifs_assert(dbg_is_tst_rcvry(c)); + fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); + if (!fmi) { + ubifs_err("Failed to register failure mode - no memory"); + return; + } + fmi->c = c; + spin_lock(&fmi_lock); + list_add_tail(&fmi->list, &fmi_list); + spin_unlock(&fmi_lock); +} + +static void failure_mode_exit(struct ubifs_info *c) +{ + struct failure_mode_info *fmi, *tmp; - if (!d->pc_cnt) { - /* First call - decide delay to the power cut */ + spin_lock(&fmi_lock); + list_for_each_entry_safe(fmi, tmp, &fmi_list, list) + if (fmi->c == c) { + list_del(&fmi->list); + kfree(fmi); + } + spin_unlock(&fmi_lock); +} + +static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc) +{ + struct failure_mode_info *fmi; + + spin_lock(&fmi_lock); + list_for_each_entry(fmi, &fmi_list, list) + if (fmi->c->ubi == desc) { + struct ubifs_info *c = fmi->c; + + spin_unlock(&fmi_lock); + return c; + } + spin_unlock(&fmi_lock); + return NULL; +} + +static int in_failure_mode(struct ubi_volume_desc *desc) +{ + struct ubifs_info *c = dbg_find_info(desc); + + if (c && dbg_failure_mode) + return c->dbg->failure_mode; + return 0; +} + +static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) +{ + struct ubifs_info *c = dbg_find_info(desc); + struct ubifs_debug_info *d; + + if (!c || !dbg_failure_mode) + return 0; + d = c->dbg; + if (d->failure_mode) + return 1; + if (!d->fail_cnt) { + /* First call - decide delay to failure */ if (chance(1, 2)) { - unsigned long delay; + unsigned int delay = 1 << (simple_rand() >> 11); if (chance(1, 2)) { - d->pc_delay = 1; - /* Fail withing 1 minute */ - delay = random32() % 60000; - d->pc_timeout = jiffies; - d->pc_timeout += msecs_to_jiffies(delay); - ubifs_warn("failing after %lums", delay); + d->fail_delay = 1; + d->fail_timeout = jiffies + + msecs_to_jiffies(delay); + dbg_rcvry("failing after %ums", delay); } else { - d->pc_delay = 2; - delay = random32() % 10000; - /* Fail within 10000 operations */ - d->pc_cnt_max = delay; - ubifs_warn("failing after %lu calls", delay); + d->fail_delay = 2; + d->fail_cnt_max = delay; + dbg_rcvry("failing after %u calls", delay); } } - - d->pc_cnt += 1; + d->fail_cnt += 1; } - /* Determine if failure delay has expired */ - if (d->pc_delay == 1 && time_before(jiffies, d->pc_timeout)) + if (d->fail_delay == 1) { + if (time_before(jiffies, d->fail_timeout)) return 0; - if (d->pc_delay == 2 && d->pc_cnt++ < d->pc_cnt_max) + } else if (d->fail_delay == 2) + if (d->fail_cnt++ < d->fail_cnt_max) return 0; - if (lnum == UBIFS_SB_LNUM) { - if (write && chance(1, 2)) - return 0; - if (chance(19, 20)) + if (write) { + if (chance(1, 2)) + return 0; + } else if (chance(19, 20)) return 0; - ubifs_warn("failing in super block LEB %d", lnum); + dbg_rcvry("failing in super block LEB %d", lnum); } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { if (chance(19, 20)) return 0; - ubifs_warn("failing in master LEB %d", lnum); + dbg_rcvry("failing in master LEB %d", lnum); } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { - if (write && chance(99, 100)) + if (write) { + if (chance(99, 100)) + return 0; + } else if (chance(399, 400)) return 0; - if (chance(399, 400)) - return 0; - ubifs_warn("failing in log LEB %d", lnum); + dbg_rcvry("failing in log LEB %d", lnum); } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { - if (write && chance(7, 8)) + if (write) { + if (chance(7, 8)) + return 0; + } else if (chance(19, 20)) return 0; - if (chance(19, 20)) - return 0; - ubifs_warn("failing in LPT LEB %d", lnum); + dbg_rcvry("failing in LPT LEB %d", lnum); } else if (lnum >= c->orph_first && lnum <= c->orph_last) { - if (write && chance(1, 2)) - return 0; - if (chance(9, 10)) + if (write) { + if (chance(1, 2)) + return 0; + } else if (chance(9, 10)) return 0; - ubifs_warn("failing in orphan LEB %d", lnum); + dbg_rcvry("failing in orphan LEB %d", lnum); } else if (lnum == c->ihead_lnum) { if (chance(99, 100)) return 0; - ubifs_warn("failing in index head LEB %d", lnum); + dbg_rcvry("failing in index head LEB %d", lnum); } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { if (chance(9, 10)) return 0; - ubifs_warn("failing in GC head LEB %d", lnum); + dbg_rcvry("failing in GC head LEB %d", lnum); } else if (write && !RB_EMPTY_ROOT(&c->buds) && !ubifs_search_bud(c, lnum)) { if (chance(19, 20)) return 0; - ubifs_warn("failing in non-bud LEB %d", lnum); + dbg_rcvry("failing in non-bud LEB %d", lnum); } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || c->cmt_state == COMMIT_RUNNING_REQUIRED) { if (chance(999, 1000)) return 0; - ubifs_warn("failing in bud LEB %d commit running", lnum); + dbg_rcvry("failing in bud LEB %d commit running", lnum); } else { if (chance(9999, 10000)) return 0; - ubifs_warn("failing in bud LEB %d commit not running", lnum); + dbg_rcvry("failing in bud LEB %d commit not running", lnum); } - - d->pc_happened = 1; - ubifs_warn("========== Power cut emulated =========="); + ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); + d->failure_mode = 1; dump_stack(); return 1; } -static void cut_data(const void *buf, unsigned int len) +static void cut_data(const void *buf, int len) { - unsigned int from, to, i, ffs = chance(1, 2); + int flen, i; unsigned char *p = (void *)buf; - from = random32() % (len + 1); - if (chance(1, 2)) - to = random32() % (len - from + 1); - else - to = len; - - if (from < to) - ubifs_warn("filled bytes %u-%u with %s", from, to - 1, - ffs ? "0xFFs" : "random data"); + flen = (len * (long long)simple_rand()) >> 15; + for (i = flen; i < len; i++) + p[i] = 0xff; +} - if (ffs) - for (i = from; i < to; i++) - p[i] = 0xFF; - else - for (i = from; i < to; i++) - p[i] = random32() % 0x100; +int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, + int len, int check) +{ + if (in_failure_mode(desc)) + return -EROFS; + return ubi_leb_read(desc, lnum, buf, offset, len, check); } -int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, - int offs, int len, int dtype) +int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, + int offset, int len, int dtype) { int err, failing; - if (c->dbg->pc_happened) + if (in_failure_mode(desc)) return -EROFS; - - failing = power_cut_emulated(c, lnum, 1); + failing = do_fail(desc, lnum, 1); if (failing) cut_data(buf, len); - err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); + err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); if (err) return err; if (failing) @@ -2678,207 +2706,162 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, return 0; } -int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, +int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, int len, int dtype) { int err; - if (c->dbg->pc_happened) + if (do_fail(desc, lnum, 1)) return -EROFS; - if (power_cut_emulated(c, lnum, 1)) - return -EROFS; - err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); + err = ubi_leb_change(desc, lnum, buf, len, dtype); if (err) return err; - if (power_cut_emulated(c, lnum, 1)) + if (do_fail(desc, lnum, 1)) return -EROFS; return 0; } -int dbg_leb_unmap(struct ubifs_info *c, int lnum) +int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) { int err; - if (c->dbg->pc_happened) - return -EROFS; - if (power_cut_emulated(c, lnum, 0)) + if (do_fail(desc, lnum, 0)) return -EROFS; - err = ubi_leb_unmap(c->ubi, lnum); + err = ubi_leb_erase(desc, lnum); if (err) return err; - if (power_cut_emulated(c, lnum, 0)) + if (do_fail(desc, lnum, 0)) return -EROFS; return 0; } -int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype) +int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) { int err; - if (c->dbg->pc_happened) + if (do_fail(desc, lnum, 0)) return -EROFS; - if (power_cut_emulated(c, lnum, 0)) - return -EROFS; - err = ubi_leb_map(c->ubi, lnum, dtype); + err = ubi_leb_unmap(desc, lnum); if (err) return err; - if (power_cut_emulated(c, lnum, 0)) + if (do_fail(desc, lnum, 0)) return -EROFS; return 0; } -/* - * Root directory for UBIFS stuff in debugfs. Contains sub-directories which - * contain the stuff specific to particular file-system mounts. - */ -static struct dentry *dfs_rootdir; +int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) +{ + if (in_failure_mode(desc)) + return -EROFS; + return ubi_is_mapped(desc, lnum); +} -static int dfs_file_open(struct inode *inode, struct file *file) +int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) { - file->private_data = inode->i_private; - return nonseekable_open(inode, file); + int err; + + if (do_fail(desc, lnum, 0)) + return -EROFS; + err = ubi_leb_map(desc, lnum, dtype); + if (err) + return err; + if (do_fail(desc, lnum, 0)) + return -EROFS; + return 0; } /** - * provide_user_output - provide output to the user reading a debugfs file. - * @val: boolean value for the answer - * @u: the buffer to store the answer at - * @count: size of the buffer - * @ppos: position in the @u output buffer + * ubifs_debugging_init - initialize UBIFS debugging. + * @c: UBIFS file-system description object * - * This is a simple helper function which stores @val boolean value in the user - * buffer when the user reads one of UBIFS debugfs files. Returns amount of - * bytes written to @u in case of success and a negative error code in case of + * This function initializes debugging-related data for the file system. + * Returns zero in case of success and a negative error code in case of * failure. */ -static int provide_user_output(int val, char __user *u, size_t count, - loff_t *ppos) +int ubifs_debugging_init(struct ubifs_info *c) { - char buf[3]; - - if (val) - buf[0] = '1'; - else - buf[0] = '0'; - buf[1] = '\n'; - buf[2] = 0x00; + c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); + if (!c->dbg) + return -ENOMEM; - return simple_read_from_buffer(u, count, ppos, buf, 2); + failure_mode_init(c); + return 0; } -static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count, - loff_t *ppos) +/** + * ubifs_debugging_exit - free debugging data. + * @c: UBIFS file-system description object + */ +void ubifs_debugging_exit(struct ubifs_info *c) { - struct dentry *dent = file->f_path.dentry; - struct ubifs_info *c = file->private_data; - struct ubifs_debug_info *d = c->dbg; - int val; - - if (dent == d->dfs_chk_gen) - val = d->chk_gen; - else if (dent == d->dfs_chk_index) - val = d->chk_index; - else if (dent == d->dfs_chk_orph) - val = d->chk_orph; - else if (dent == d->dfs_chk_lprops) - val = d->chk_lprops; - else if (dent == d->dfs_chk_fs) - val = d->chk_fs; - else if (dent == d->dfs_tst_rcvry) - val = d->tst_rcvry; - else - return -EINVAL; - - return provide_user_output(val, u, count, ppos); + failure_mode_exit(c); + kfree(c->dbg); } +/* + * Root directory for UBIFS stuff in debugfs. Contains sub-directories which + * contain the stuff specific to particular file-system mounts. + */ +static struct dentry *dfs_rootdir; + /** - * interpret_user_input - interpret user debugfs file input. - * @u: user-provided buffer with the input - * @count: buffer size + * dbg_debugfs_init - initialize debugfs file-system. * - * This is a helper function which interpret user input to a boolean UBIFS - * debugfs file. Returns %0 or %1 in case of success and a negative error code - * in case of failure. + * UBIFS uses debugfs file-system to expose various debugging knobs to + * user-space. This function creates "ubifs" directory in the debugfs + * file-system. Returns zero in case of success and a negative error code in + * case of failure. */ -static int interpret_user_input(const char __user *u, size_t count) +int dbg_debugfs_init(void) { - size_t buf_size; - char buf[8]; + dfs_rootdir = debugfs_create_dir("ubifs", NULL); + if (IS_ERR(dfs_rootdir)) { + int err = PTR_ERR(dfs_rootdir); + ubifs_err("cannot create \"ubifs\" debugfs directory, " + "error %d\n", err); + return err; + } - buf_size = min_t(size_t, count, (sizeof(buf) - 1)); - if (copy_from_user(buf, u, buf_size)) - return -EFAULT; + return 0; +} - if (buf[0] == '1') - return 1; - else if (buf[0] == '0') - return 0; +/** + * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. + */ +void dbg_debugfs_exit(void) +{ + debugfs_remove(dfs_rootdir); +} - return -EINVAL; +static int open_debugfs_file(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return nonseekable_open(inode, file); } -static ssize_t dfs_file_write(struct file *file, const char __user *u, - size_t count, loff_t *ppos) +static ssize_t write_debugfs_file(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { struct ubifs_info *c = file->private_data; struct ubifs_debug_info *d = c->dbg; - struct dentry *dent = file->f_path.dentry; - int val; - /* - * TODO: this is racy - the file-system might have already been - * unmounted and we'd oops in this case. The plan is to fix it with - * help of 'iterate_supers_type()' which we should have in v3.0: when - * a debugfs opened, we rember FS's UUID in file->private_data. Then - * whenever we access the FS via a debugfs file, we iterate all UBIFS - * superblocks and fine the one with the same UUID, and take the - * locking right. - * - * The other way to go suggested by Al Viro is to create a separate - * 'ubifs-debug' file-system instead. - */ - if (file->f_path.dentry == d->dfs_dump_lprops) { + if (file->f_path.dentry == d->dfs_dump_lprops) dbg_dump_lprops(c); - return count; - } - if (file->f_path.dentry == d->dfs_dump_budg) { + else if (file->f_path.dentry == d->dfs_dump_budg) dbg_dump_budg(c, &c->bi); - return count; - } - if (file->f_path.dentry == d->dfs_dump_tnc) { + else if (file->f_path.dentry == d->dfs_dump_tnc) { mutex_lock(&c->tnc_mutex); dbg_dump_tnc(c); mutex_unlock(&c->tnc_mutex); - return count; - } - - val = interpret_user_input(u, count); - if (val < 0) - return val; - - if (dent == d->dfs_chk_gen) - d->chk_gen = val; - else if (dent == d->dfs_chk_index) - d->chk_index = val; - else if (dent == d->dfs_chk_orph) - d->chk_orph = val; - else if (dent == d->dfs_chk_lprops) - d->chk_lprops = val; - else if (dent == d->dfs_chk_fs) - d->chk_fs = val; - else if (dent == d->dfs_tst_rcvry) - d->tst_rcvry = val; - else + } else return -EINVAL; return count; } static const struct file_operations dfs_fops = { - .open = dfs_file_open, - .read = dfs_file_read, - .write = dfs_file_write, + .open = open_debugfs_file, + .write = write_debugfs_file, .owner = THIS_MODULE, .llseek = no_llseek, }; @@ -2897,20 +2880,12 @@ static const struct file_operations dfs_fops = { */ int dbg_debugfs_init_fs(struct ubifs_info *c) { - int err, n; + int err; const char *fname; struct dentry *dent; struct ubifs_debug_info *d = c->dbg; - n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, - c->vi.ubi_num, c->vi.vol_id); - if (n == UBIFS_DFS_DIR_LEN) { - /* The array size is too small */ - fname = UBIFS_DFS_DIR_NAME; - dent = ERR_PTR(-EINVAL); - goto out; - } - + sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); fname = d->dfs_dir_name; dent = debugfs_create_dir(fname, dfs_rootdir); if (IS_ERR_OR_NULL(dent)) @@ -2935,55 +2910,13 @@ int dbg_debugfs_init_fs(struct ubifs_info *c) goto out_remove; d->dfs_dump_tnc = dent; - fname = "chk_general"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_gen = dent; - - fname = "chk_index"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_index = dent; - - fname = "chk_orphans"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_orph = dent; - - fname = "chk_lprops"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_lprops = dent; - - fname = "chk_fs"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_fs = dent; - - fname = "tst_recovery"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_tst_rcvry = dent; - return 0; out_remove: debugfs_remove_recursive(d->dfs_dir); out: err = dent ? PTR_ERR(dent) : -ENODEV; - ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", + ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", fname, err); return err; } @@ -2997,179 +2930,4 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c) debugfs_remove_recursive(c->dbg->dfs_dir); } -struct ubifs_global_debug_info ubifs_dbg; - -static struct dentry *dfs_chk_gen; -static struct dentry *dfs_chk_index; -static struct dentry *dfs_chk_orph; -static struct dentry *dfs_chk_lprops; -static struct dentry *dfs_chk_fs; -static struct dentry *dfs_tst_rcvry; - -static ssize_t dfs_global_file_read(struct file *file, char __user *u, - size_t count, loff_t *ppos) -{ - struct dentry *dent = file->f_path.dentry; - int val; - - if (dent == dfs_chk_gen) - val = ubifs_dbg.chk_gen; - else if (dent == dfs_chk_index) - val = ubifs_dbg.chk_index; - else if (dent == dfs_chk_orph) - val = ubifs_dbg.chk_orph; - else if (dent == dfs_chk_lprops) - val = ubifs_dbg.chk_lprops; - else if (dent == dfs_chk_fs) - val = ubifs_dbg.chk_fs; - else if (dent == dfs_tst_rcvry) - val = ubifs_dbg.tst_rcvry; - else - return -EINVAL; - - return provide_user_output(val, u, count, ppos); -} - -static ssize_t dfs_global_file_write(struct file *file, const char __user *u, - size_t count, loff_t *ppos) -{ - struct dentry *dent = file->f_path.dentry; - int val; - - val = interpret_user_input(u, count); - if (val < 0) - return val; - - if (dent == dfs_chk_gen) - ubifs_dbg.chk_gen = val; - else if (dent == dfs_chk_index) - ubifs_dbg.chk_index = val; - else if (dent == dfs_chk_orph) - ubifs_dbg.chk_orph = val; - else if (dent == dfs_chk_lprops) - ubifs_dbg.chk_lprops = val; - else if (dent == dfs_chk_fs) - ubifs_dbg.chk_fs = val; - else if (dent == dfs_tst_rcvry) - ubifs_dbg.tst_rcvry = val; - else - return -EINVAL; - - return count; -} - -static const struct file_operations dfs_global_fops = { - .read = dfs_global_file_read, - .write = dfs_global_file_write, - .owner = THIS_MODULE, - .llseek = no_llseek, -}; - -/** - * dbg_debugfs_init - initialize debugfs file-system. - * - * UBIFS uses debugfs file-system to expose various debugging knobs to - * user-space. This function creates "ubifs" directory in the debugfs - * file-system. Returns zero in case of success and a negative error code in - * case of failure. - */ -int dbg_debugfs_init(void) -{ - int err; - const char *fname; - struct dentry *dent; - - fname = "ubifs"; - dent = debugfs_create_dir(fname, NULL); - if (IS_ERR_OR_NULL(dent)) - goto out; - dfs_rootdir = dent; - - fname = "chk_general"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_gen = dent; - - fname = "chk_index"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_index = dent; - - fname = "chk_orphans"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_orph = dent; - - fname = "chk_lprops"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_lprops = dent; - - fname = "chk_fs"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_fs = dent; - - fname = "tst_recovery"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_tst_rcvry = dent; - - return 0; - -out_remove: - debugfs_remove_recursive(dfs_rootdir); -out: - err = dent ? PTR_ERR(dent) : -ENODEV; - ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", - fname, err); - return err; -} - -/** - * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. - */ -void dbg_debugfs_exit(void) -{ - debugfs_remove_recursive(dfs_rootdir); -} - -/** - * ubifs_debugging_init - initialize UBIFS debugging. - * @c: UBIFS file-system description object - * - * This function initializes debugging-related data for the file system. - * Returns zero in case of success and a negative error code in case of - * failure. - */ -int ubifs_debugging_init(struct ubifs_info *c) -{ - c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); - if (!c->dbg) - return -ENOMEM; - - return 0; -} - -/** - * ubifs_debugging_exit - free debugging data. - * @c: UBIFS file-system description object - */ -void ubifs_debugging_exit(struct ubifs_info *c) -{ - kfree(c->dbg); -} - #endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/trunk/fs/ubifs/debug.h b/trunk/fs/ubifs/debug.h index 45174b534377..a811ac4a26bb 100644 --- a/trunk/fs/ubifs/debug.h +++ b/trunk/fs/ubifs/debug.h @@ -31,25 +31,18 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, #ifdef CONFIG_UBIFS_FS_DEBUG -/* - * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi" - * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte. - */ -#define UBIFS_DFS_DIR_NAME "ubi%d_%d" -#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1) +#include /** * ubifs_debug_info - per-FS debugging information. * @old_zroot: old index root - used by 'dbg_check_old_index()' * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' - * - * @pc_happened: non-zero if an emulated power cut happened - * @pc_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls - * @pc_timeout: time in jiffies when delay of failure mode expires - * @pc_cnt: current number of calls to failure mode I/O functions - * @pc_cnt_max: number of calls by which to delay failure mode - * + * @failure_mode: failure mode for recovery testing + * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls + * @fail_timeout: time in jiffies when delay of failure mode expires + * @fail_cnt: current number of calls to failure mode I/O functions + * @fail_cnt_max: number of calls by which to delay failure mode * @chk_lpt_sz: used by LPT tree size checker * @chk_lpt_sz2: used by LPT tree size checker * @chk_lpt_wastage: used by LPT tree size checker @@ -63,36 +56,21 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, * @saved_free: saved amount of free space * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt * - * @chk_gen: if general extra checks are enabled - * @chk_index: if index xtra checks are enabled - * @chk_orph: if orphans extra checks are enabled - * @chk_lprops: if lprops extra checks are enabled - * @chk_fs: if UBIFS contents extra checks are enabled - * @tst_rcvry: if UBIFS recovery testing mode enabled - * * @dfs_dir_name: name of debugfs directory containing this file-system's files * @dfs_dir: direntry object of the file-system debugfs directory * @dfs_dump_lprops: "dump lprops" debugfs knob * @dfs_dump_budg: "dump budgeting information" debugfs knob * @dfs_dump_tnc: "dump TNC" debugfs knob - * @dfs_chk_gen: debugfs knob to enable UBIFS general extra checks - * @dfs_chk_index: debugfs knob to enable UBIFS index extra checks - * @dfs_chk_orph: debugfs knob to enable UBIFS orphans extra checks - * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks - * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks - * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing */ struct ubifs_debug_info { struct ubifs_zbranch old_zroot; int old_zroot_level; unsigned long long old_zroot_sqnum; - - int pc_happened; - int pc_delay; - unsigned long pc_timeout; - unsigned int pc_cnt; - unsigned int pc_cnt_max; - + int failure_mode; + int fail_delay; + unsigned long fail_timeout; + unsigned int fail_cnt; + unsigned int fail_cnt_max; long long chk_lpt_sz; long long chk_lpt_sz2; long long chk_lpt_wastage; @@ -106,43 +84,11 @@ struct ubifs_debug_info { long long saved_free; int saved_idx_gc_cnt; - unsigned int chk_gen:1; - unsigned int chk_index:1; - unsigned int chk_orph:1; - unsigned int chk_lprops:1; - unsigned int chk_fs:1; - unsigned int tst_rcvry:1; - - char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1]; + char dfs_dir_name[100]; struct dentry *dfs_dir; struct dentry *dfs_dump_lprops; struct dentry *dfs_dump_budg; struct dentry *dfs_dump_tnc; - struct dentry *dfs_chk_gen; - struct dentry *dfs_chk_index; - struct dentry *dfs_chk_orph; - struct dentry *dfs_chk_lprops; - struct dentry *dfs_chk_fs; - struct dentry *dfs_tst_rcvry; -}; - -/** - * ubifs_global_debug_info - global (not per-FS) UBIFS debugging information. - * - * @chk_gen: if general extra checks are enabled - * @chk_index: if index xtra checks are enabled - * @chk_orph: if orphans extra checks are enabled - * @chk_lprops: if lprops extra checks are enabled - * @chk_fs: if UBIFS contents extra checks are enabled - * @tst_rcvry: if UBIFS recovery testing mode enabled - */ -struct ubifs_global_debug_info { - unsigned int chk_gen:1; - unsigned int chk_index:1; - unsigned int chk_orph:1; - unsigned int chk_lprops:1; - unsigned int chk_fs:1; - unsigned int tst_rcvry:1; }; #define ubifs_assert(expr) do { \ @@ -181,8 +127,6 @@ const char *dbg_key_str1(const struct ubifs_info *c, #define DBGKEY(key) dbg_key_str0(c, (key)) #define DBGKEY1(key) dbg_key_str1(c, (key)) -extern spinlock_t dbg_lock; - #define ubifs_dbg_msg(type, fmt, ...) do { \ spin_lock(&dbg_lock); \ pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \ @@ -218,36 +162,41 @@ extern spinlock_t dbg_lock; /* Additional recovery messages */ #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) -extern struct ubifs_global_debug_info ubifs_dbg; +/* + * Debugging check flags. + * + * UBIFS_CHK_GEN: general checks + * UBIFS_CHK_TNC: check TNC + * UBIFS_CHK_IDX_SZ: check index size + * UBIFS_CHK_ORPH: check orphans + * UBIFS_CHK_OLD_IDX: check the old index + * UBIFS_CHK_LPROPS: check lprops + * UBIFS_CHK_FS: check the file-system + */ +enum { + UBIFS_CHK_GEN = 0x1, + UBIFS_CHK_TNC = 0x2, + UBIFS_CHK_IDX_SZ = 0x4, + UBIFS_CHK_ORPH = 0x8, + UBIFS_CHK_OLD_IDX = 0x10, + UBIFS_CHK_LPROPS = 0x20, + UBIFS_CHK_FS = 0x40, +}; -static inline int dbg_is_chk_gen(const struct ubifs_info *c) -{ - return !!(ubifs_dbg.chk_gen || c->dbg->chk_gen); -} -static inline int dbg_is_chk_index(const struct ubifs_info *c) -{ - return !!(ubifs_dbg.chk_index || c->dbg->chk_index); -} -static inline int dbg_is_chk_orph(const struct ubifs_info *c) -{ - return !!(ubifs_dbg.chk_orph || c->dbg->chk_orph); -} -static inline int dbg_is_chk_lprops(const struct ubifs_info *c) -{ - return !!(ubifs_dbg.chk_lprops || c->dbg->chk_lprops); -} -static inline int dbg_is_chk_fs(const struct ubifs_info *c) -{ - return !!(ubifs_dbg.chk_fs || c->dbg->chk_fs); -} -static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) -{ - return !!(ubifs_dbg.tst_rcvry || c->dbg->tst_rcvry); -} -static inline int dbg_is_power_cut(const struct ubifs_info *c) -{ - return !!c->dbg->pc_happened; -} +/* + * Special testing flags. + * + * UBIFS_TST_RCVRY: failure mode for recovery testing + */ +enum { + UBIFS_TST_RCVRY = 0x4, +}; + +extern spinlock_t dbg_lock; + +extern unsigned int ubifs_msg_flags; +extern unsigned int ubifs_chk_flags; +extern unsigned int ubifs_tst_flags; int ubifs_debugging_init(struct ubifs_info *c); void ubifs_debugging_exit(struct ubifs_info *c); @@ -258,7 +207,7 @@ const char *dbg_cstate(int cmt_state); const char *dbg_jhead(int jhead); const char *dbg_get_key_dump(const struct ubifs_info *c, const union ubifs_key *key); -void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode); +void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); void dbg_dump_node(const struct ubifs_info *c, const void *node); void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, int offs); @@ -291,8 +240,8 @@ int dbg_check_cats(struct ubifs_info *c); int dbg_check_ltab(struct ubifs_info *c); int dbg_chk_lpt_free_spc(struct ubifs_info *c); int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len); -int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode); -int dbg_check_dir(struct ubifs_info *c, const struct inode *dir); +int dbg_check_synced_i_size(struct inode *inode); +int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); int dbg_check_tnc(struct ubifs_info *c, int extra); int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); int dbg_check_filesystem(struct ubifs_info *c); @@ -305,12 +254,54 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head); int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); -int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, - int len, int dtype); -int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, - int dtype); -int dbg_leb_unmap(struct ubifs_info *c, int lnum); -int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype); +/* Force the use of in-the-gaps method for testing */ +static inline int dbg_force_in_the_gaps_enabled(void) +{ + return ubifs_chk_flags & UBIFS_CHK_GEN; +} +int dbg_force_in_the_gaps(void); + +/* Failure mode for recovery testing */ +#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) + +#ifndef UBIFS_DBG_PRESERVE_UBI +#define ubi_leb_read dbg_leb_read +#define ubi_leb_write dbg_leb_write +#define ubi_leb_change dbg_leb_change +#define ubi_leb_erase dbg_leb_erase +#define ubi_leb_unmap dbg_leb_unmap +#define ubi_is_mapped dbg_is_mapped +#define ubi_leb_map dbg_leb_map +#endif + +int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, + int len, int check); +int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, + int offset, int len, int dtype); +int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, + int len, int dtype); +int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum); +int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum); +int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum); +int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); + +static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf, + int offset, int len) +{ + return dbg_leb_read(desc, lnum, buf, offset, len, 0); +} + +static inline int dbg_write(struct ubi_volume_desc *desc, int lnum, + const void *buf, int offset, int len) +{ + return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN); +} + +static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, + const void *buf, int len) +{ + return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); +} /* Debugfs-related stuff */ int dbg_debugfs_init(void); @@ -322,7 +313,7 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); /* Use "if (0)" to make compiler check arguments even if debugging is off */ #define ubifs_assert(expr) do { \ - if (0) \ + if (0 && (expr)) \ printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ __func__, __LINE__, current->pid); \ } while (0) @@ -332,9 +323,6 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); ubifs_err(fmt, ##__VA_ARGS__); \ } while (0) -#define DBGKEY(key) ((char *)(key)) -#define DBGKEY1(key) ((char *)(key)) - #define ubifs_dbg_msg(fmt, ...) do { \ if (0) \ pr_debug(fmt "\n", ##__VA_ARGS__); \ @@ -358,6 +346,9 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); #define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) #define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) +#define DBGKEY(key) ((char *)(key)) +#define DBGKEY1(key) ((char *)(key)) + static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; } static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; } static inline const char *dbg_ntype(int type) { return ""; } @@ -366,7 +357,7 @@ static inline const char *dbg_jhead(int jhead) { return ""; } static inline const char * dbg_get_key_dump(const struct ubifs_info *c, const union ubifs_key *key) { return ""; } -static inline void dbg_dump_inode(struct ubifs_info *c, +static inline void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) { return; } static inline void dbg_dump_node(const struct ubifs_info *c, const void *node) { return; } @@ -418,11 +409,9 @@ static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; } static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; } static inline int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) { return 0; } -static inline int -dbg_check_synced_i_size(const struct ubifs_info *c, - struct inode *inode) { return 0; } -static inline int dbg_check_dir(struct ubifs_info *c, - const struct inode *dir) { return 0; } +static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; } +static inline int dbg_check_dir_size(struct ubifs_info *c, + const struct inode *dir) { return 0; } static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; } static inline int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) { return 0; } @@ -442,23 +431,9 @@ static inline int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) { return 0; } -static inline int dbg_leb_write(struct ubifs_info *c, int lnum, - const void *buf, int offset, - int len, int dtype) { return 0; } -static inline int dbg_leb_change(struct ubifs_info *c, int lnum, - const void *buf, int len, - int dtype) { return 0; } -static inline int dbg_leb_unmap(struct ubifs_info *c, int lnum) { return 0; } -static inline int dbg_leb_map(struct ubifs_info *c, int lnum, - int dtype) { return 0; } - -static inline int dbg_is_chk_gen(const struct ubifs_info *c) { return 0; } -static inline int dbg_is_chk_index(const struct ubifs_info *c) { return 0; } -static inline int dbg_is_chk_orph(const struct ubifs_info *c) { return 0; } -static inline int dbg_is_chk_lprops(const struct ubifs_info *c) { return 0; } -static inline int dbg_is_chk_fs(const struct ubifs_info *c) { return 0; } -static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) { return 0; } -static inline int dbg_is_power_cut(const struct ubifs_info *c) { return 0; } +static inline int dbg_force_in_the_gaps(void) { return 0; } +#define dbg_force_in_the_gaps_enabled() 0 +#define dbg_failure_mode 0 static inline int dbg_debugfs_init(void) { return 0; } static inline void dbg_debugfs_exit(void) { return; } diff --git a/trunk/fs/ubifs/dir.c b/trunk/fs/ubifs/dir.c index 683492043317..ef5abd38f0bf 100644 --- a/trunk/fs/ubifs/dir.c +++ b/trunk/fs/ubifs/dir.c @@ -102,7 +102,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, * UBIFS has to fully control "clean <-> dirty" transitions of inodes * to make budgeting work. */ - inode->i_flags |= S_NOCMTIME; + inode->i_flags |= (S_NOCMTIME); inode_init_owner(inode, dir, mode); inode->i_mtime = inode->i_atime = inode->i_ctime = @@ -172,11 +172,9 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, #ifdef CONFIG_UBIFS_FS_DEBUG -static int dbg_check_name(const struct ubifs_info *c, - const struct ubifs_dent_node *dent, - const struct qstr *nm) +static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) { - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; if (le16_to_cpu(dent->nlen) != nm->len) return -EINVAL; @@ -187,7 +185,7 @@ static int dbg_check_name(const struct ubifs_info *c, #else -#define dbg_check_name(c, dent, nm) 0 +#define dbg_check_name(dent, nm) 0 #endif @@ -221,7 +219,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - if (dbg_check_name(c, dent, &dentry->d_name)) { + if (dbg_check_name(dent, &dentry->d_name)) { err = -EINVAL; goto out; } @@ -524,7 +522,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); - err = dbg_check_synced_i_size(c, inode); + err = dbg_check_synced_i_size(inode); if (err) return err; @@ -579,7 +577,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) inode->i_nlink, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); - err = dbg_check_synced_i_size(c, inode); + err = dbg_check_synced_i_size(inode); if (err) return err; diff --git a/trunk/fs/ubifs/file.c b/trunk/fs/ubifs/file.c index 7cf738a4544d..5e7fccfc4b29 100644 --- a/trunk/fs/ubifs/file.c +++ b/trunk/fs/ubifs/file.c @@ -1263,7 +1263,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; - err = dbg_check_synced_i_size(c, inode); + err = dbg_check_synced_i_size(inode); if (err) return err; diff --git a/trunk/fs/ubifs/io.c b/trunk/fs/ubifs/io.c index 9228950a658f..3be645e012c9 100644 --- a/trunk/fs/ubifs/io.c +++ b/trunk/fs/ubifs/io.c @@ -86,125 +86,8 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) c->no_chk_data_crc = 0; c->vfs_sb->s_flags |= MS_RDONLY; ubifs_warn("switched to read-only mode, error %d", err); - dump_stack(); - } -} - -/* - * Below are simple wrappers over UBI I/O functions which include some - * additional checks and UBIFS debugging stuff. See corresponding UBI function - * for more information. - */ - -int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, - int len, int even_ebadmsg) -{ - int err; - - err = ubi_read(c->ubi, lnum, buf, offs, len); - /* - * In case of %-EBADMSG print the error message only if the - * @even_ebadmsg is true. - */ - if (err && (err != -EBADMSG || even_ebadmsg)) { - ubifs_err("reading %d bytes from LEB %d:%d failed, error %d", - len, lnum, offs, err); - dbg_dump_stack(); - } - return err; -} - -int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, - int len, int dtype) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - if (!dbg_is_tst_rcvry(c)) - err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); - else - err = dbg_leb_write(c, lnum, buf, offs, len, dtype); - if (err) { - ubifs_err("writing %d bytes to LEB %d:%d failed, error %d", - len, lnum, offs, err); - ubifs_ro_mode(c, err); - dbg_dump_stack(); - } - return err; -} - -int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, - int dtype) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - if (!dbg_is_tst_rcvry(c)) - err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); - else - err = dbg_leb_change(c, lnum, buf, len, dtype); - if (err) { - ubifs_err("changing %d bytes in LEB %d failed, error %d", - len, lnum, err); - ubifs_ro_mode(c, err); - dbg_dump_stack(); - } - return err; -} - -int ubifs_leb_unmap(struct ubifs_info *c, int lnum) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - if (!dbg_is_tst_rcvry(c)) - err = ubi_leb_unmap(c->ubi, lnum); - else - err = dbg_leb_unmap(c, lnum); - if (err) { - ubifs_err("unmap LEB %d failed, error %d", lnum, err); - ubifs_ro_mode(c, err); - dbg_dump_stack(); - } - return err; -} - -int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - if (!dbg_is_tst_rcvry(c)) - err = ubi_leb_map(c->ubi, lnum, dtype); - else - err = dbg_leb_map(c, lnum, dtype); - if (err) { - ubifs_err("mapping LEB %d failed, error %d", lnum, err); - ubifs_ro_mode(c, err); - dbg_dump_stack(); - } - return err; -} - -int ubifs_is_mapped(const struct ubifs_info *c, int lnum) -{ - int err; - - err = ubi_is_mapped(c->ubi, lnum); - if (err < 0) { - ubifs_err("ubi_is_mapped failed for LEB %d, error %d", - lnum, err); dbg_dump_stack(); } - return err; } /** @@ -523,10 +406,14 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) dirt = sync_len - wbuf->used; if (dirt) ubifs_pad(c, wbuf->buf + wbuf->used, dirt); - err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len, - wbuf->dtype); - if (err) + err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, + sync_len, wbuf->dtype); + if (err) { + ubifs_err("cannot write %d bytes to LEB %d:%d", + sync_len, wbuf->lnum, wbuf->offs); + dbg_dump_stack(); return err; + } spin_lock(&wbuf->lock); wbuf->offs += sync_len; @@ -718,9 +605,9 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) if (aligned_len == wbuf->avail) { dbg_io("flush jhead %s wbuf to LEB %d:%d", dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); - err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, - wbuf->offs, wbuf->size, - wbuf->dtype); + err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, + wbuf->offs, wbuf->size, + wbuf->dtype); if (err) goto out; @@ -755,8 +642,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) dbg_io("flush jhead %s wbuf to LEB %d:%d", dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); - err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, - wbuf->size, wbuf->dtype); + err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, + wbuf->size, wbuf->dtype); if (err) goto out; @@ -774,8 +661,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) */ dbg_io("write %d bytes to LEB %d:%d", wbuf->size, wbuf->lnum, wbuf->offs); - err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs, - wbuf->size, wbuf->dtype); + err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs, + wbuf->size, wbuf->dtype); if (err) goto out; @@ -796,8 +683,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) n <<= c->max_write_shift; dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, wbuf->offs); - err = ubifs_leb_write(c, wbuf->lnum, buf + written, - wbuf->offs, n, wbuf->dtype); + err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, + wbuf->offs, n, wbuf->dtype); if (err) goto out; wbuf->offs += n; @@ -879,9 +766,13 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, return -EROFS; ubifs_prepare_node(c, buf, len, 1); - err = ubifs_leb_write(c, lnum, buf, offs, buf_len, dtype); - if (err) + err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); + if (err) { + ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", + buf_len, lnum, offs, err); dbg_dump_node(c, buf); + dbg_dump_stack(); + } return err; } @@ -933,9 +824,13 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, if (rlen > 0) { /* Read everything that goes before write-buffer */ - err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0); - if (err && err != -EBADMSG) + err = ubi_read(c->ubi, lnum, buf, offs, rlen); + if (err && err != -EBADMSG) { + ubifs_err("failed to read node %d from LEB %d:%d, " + "error %d", type, lnum, offs, err); + dbg_dump_stack(); return err; + } } if (type != ch->node_type) { @@ -990,9 +885,12 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, ubifs_assert(!(offs & 7) && offs < c->leb_size); ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); - err = ubifs_leb_read(c, lnum, buf, offs, len, 0); - if (err && err != -EBADMSG) + err = ubi_read(c->ubi, lnum, buf, offs, len); + if (err && err != -EBADMSG) { + ubifs_err("cannot read node %d from LEB %d:%d, error %d", + type, lnum, offs, err); return err; + } if (type != ch->node_type) { ubifs_err("bad node type (%d but expected %d)", diff --git a/trunk/fs/ubifs/log.c b/trunk/fs/ubifs/log.c index f9fd068d1ae0..affea9494ae2 100644 --- a/trunk/fs/ubifs/log.c +++ b/trunk/fs/ubifs/log.c @@ -262,7 +262,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) * an unclean reboot, because the target LEB might have been * unmapped, but not yet physically erased. */ - err = ubifs_leb_map(c, bud->lnum, UBI_SHORTTERM); + err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM); if (err) goto out_unlock; } @@ -283,6 +283,8 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) return 0; out_unlock: + if (err != -EAGAIN) + ubifs_ro_mode(c, err); mutex_unlock(&c->log_mutex); kfree(ref); kfree(bud); @@ -750,7 +752,7 @@ static int dbg_check_bud_bytes(struct ubifs_info *c) struct ubifs_bud *bud; long long bud_bytes = 0; - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; spin_lock(&c->buds_lock); diff --git a/trunk/fs/ubifs/lprops.c b/trunk/fs/ubifs/lprops.c index f8a181e647cc..667884f4a615 100644 --- a/trunk/fs/ubifs/lprops.c +++ b/trunk/fs/ubifs/lprops.c @@ -504,7 +504,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) pnode = (struct ubifs_pnode *)container_of(lprops - pos, struct ubifs_pnode, lprops[0]); - return !test_bit(COW_CNODE, &pnode->flags) && + return !test_bit(COW_ZNODE, &pnode->flags) && test_bit(DIRTY_CNODE, &pnode->flags); } @@ -860,7 +860,7 @@ int dbg_check_cats(struct ubifs_info *c) struct list_head *pos; int i, cat; - if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) return 0; list_for_each_entry(lprops, &c->empty_list, list) { @@ -958,7 +958,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, { int i = 0, j, err = 0; - if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) return; for (i = 0; i < heap->cnt; i++) { @@ -1262,7 +1262,7 @@ int dbg_check_lprops(struct ubifs_info *c) int i, err; struct ubifs_lp_stats lst; - if (!dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) return 0; /* diff --git a/trunk/fs/ubifs/lpt.c b/trunk/fs/ubifs/lpt.c index 6189c74d97f0..ef5155e109a2 100644 --- a/trunk/fs/ubifs/lpt.c +++ b/trunk/fs/ubifs/lpt.c @@ -701,8 +701,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, alen = ALIGN(len, c->min_io_size); set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubifs_leb_change(c, lnum++, buf, alen, - UBI_SHORTTERM); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, + UBI_SHORTTERM); if (err) goto out; p = buf; @@ -732,8 +732,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubifs_leb_change(c, lnum++, buf, alen, - UBI_SHORTTERM); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, + UBI_SHORTTERM); if (err) goto out; p = buf; @@ -780,8 +780,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, alen = ALIGN(len, c->min_io_size); set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubifs_leb_change(c, lnum++, buf, alen, - UBI_SHORTTERM); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, + UBI_SHORTTERM); if (err) goto out; p = buf; @@ -806,7 +806,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, alen = ALIGN(len, c->min_io_size); set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubifs_leb_change(c, lnum++, buf, alen, UBI_SHORTTERM); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM); if (err) goto out; p = buf; @@ -826,7 +826,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, /* Write remaining buffer */ memset(p, 0xff, alen - len); - err = ubifs_leb_change(c, lnum, buf, alen, UBI_SHORTTERM); + err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM); if (err) goto out; @@ -1222,7 +1222,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) if (c->big_lpt) nnode->num = calc_nnode_num_from_parent(c, parent, iip); } else { - err = ubifs_leb_read(c, lnum, buf, offs, c->nnode_sz, 1); + err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); if (err) goto out; err = ubifs_unpack_nnode(c, buf, nnode); @@ -1247,7 +1247,6 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) out: ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); - dbg_dump_stack(); kfree(nnode); return err; } @@ -1291,7 +1290,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) lprops->flags = ubifs_categorize_lprops(c, lprops); } } else { - err = ubifs_leb_read(c, lnum, buf, offs, c->pnode_sz, 1); + err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); if (err) goto out; err = unpack_pnode(c, buf, pnode); @@ -1313,7 +1312,6 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) out: ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); dbg_dump_pnode(c, pnode, parent, iip); - dbg_dump_stack(); dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); kfree(pnode); return err; @@ -1333,7 +1331,7 @@ static int read_ltab(struct ubifs_info *c) buf = vmalloc(c->ltab_sz); if (!buf) return -ENOMEM; - err = ubifs_leb_read(c, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz, 1); + err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); if (err) goto out; err = unpack_ltab(c, buf); @@ -1356,8 +1354,7 @@ static int read_lsave(struct ubifs_info *c) buf = vmalloc(c->lsave_sz); if (!buf) return -ENOMEM; - err = ubifs_leb_read(c, c->lsave_lnum, buf, c->lsave_offs, - c->lsave_sz, 1); + err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz); if (err) goto out; err = unpack_lsave(c, buf); @@ -1817,8 +1814,8 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c, if (c->big_lpt) nnode->num = calc_nnode_num_from_parent(c, parent, iip); } else { - err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, - c->nnode_sz, 1); + err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, + c->nnode_sz); if (err) return ERR_PTR(err); err = ubifs_unpack_nnode(c, buf, nnode); @@ -1886,8 +1883,8 @@ static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c, ubifs_assert(branch->lnum >= c->lpt_first && branch->lnum <= c->lpt_last); ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size); - err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, - c->pnode_sz, 1); + err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, + c->pnode_sz); if (err) return ERR_PTR(err); err = unpack_pnode(c, buf, pnode); @@ -2227,7 +2224,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, struct ubifs_cnode *cn; int num, iip = 0, err; - if (!dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) return 0; while (cnode) { diff --git a/trunk/fs/ubifs/lpt_commit.c b/trunk/fs/ubifs/lpt_commit.c index cddd6bd214f4..dfcb5748a7dc 100644 --- a/trunk/fs/ubifs/lpt_commit.c +++ b/trunk/fs/ubifs/lpt_commit.c @@ -27,7 +27,6 @@ #include #include -#include #include "ubifs.h" #ifdef CONFIG_UBIFS_FS_DEBUG @@ -117,8 +116,8 @@ static int get_cnodes_to_commit(struct ubifs_info *c) return 0; cnt += 1; while (1) { - ubifs_assert(!test_bit(COW_CNODE, &cnode->flags)); - __set_bit(COW_CNODE, &cnode->flags); + ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags)); + __set_bit(COW_ZNODE, &cnode->flags); cnext = next_dirty_cnode(cnode); if (!cnext) { cnode->cnext = c->lpt_cnext; @@ -466,7 +465,7 @@ static int write_cnodes(struct ubifs_info *c) */ clear_bit(DIRTY_CNODE, &cnode->flags); smp_mb__before_clear_bit(); - clear_bit(COW_CNODE, &cnode->flags); + clear_bit(COW_ZNODE, &cnode->flags); smp_mb__after_clear_bit(); offs += len; dbg_chk_lpt_sz(c, 1, len); @@ -1161,11 +1160,11 @@ static int lpt_gc_lnum(struct ubifs_info *c, int lnum) void *buf = c->lpt_buf; dbg_lp("LEB %d", lnum); - - err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); - if (err) + err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); + if (err) { + ubifs_err("cannot read LEB %d, error %d", lnum, err); return err; - + } while (1) { if (!is_a_node(c, buf, len)) { int pad_len; @@ -1641,7 +1640,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) int ret; void *buf, *p; - if (!dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) return 0; buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); @@ -1651,11 +1650,11 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) } dbg_lp("LEB %d", lnum); - - err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); - if (err) + err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); + if (err) { + dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); goto out; - + } while (1) { if (!is_a_node(c, p, len)) { int i, pad_len; @@ -1712,7 +1711,7 @@ int dbg_check_ltab(struct ubifs_info *c) { int lnum, err, i, cnt; - if (!dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) return 0; /* Bring the entire tree into memory */ @@ -1755,7 +1754,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) long long free = 0; int i; - if (!dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) return 0; for (i = 0; i < c->lpt_lebs; i++) { @@ -1797,7 +1796,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) long long chk_lpt_sz, lpt_sz; int err = 0; - if (!dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) return 0; switch (action) { @@ -1902,10 +1901,11 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) return; } - err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); - if (err) + err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); + if (err) { + ubifs_err("cannot read LEB %d, error %d", lnum, err); goto out; - + } while (1) { offs = c->leb_size - len; if (!is_a_node(c, p, len)) { @@ -2019,7 +2019,7 @@ static int dbg_populate_lsave(struct ubifs_info *c) struct ubifs_lpt_heap *heap; int i; - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; if (random32() & 3) return 0; diff --git a/trunk/fs/ubifs/misc.h b/trunk/fs/ubifs/misc.h index ee7cb5ebb6e8..0b5296a9a4c5 100644 --- a/trunk/fs/ubifs/misc.h +++ b/trunk/fs/ubifs/misc.h @@ -38,29 +38,6 @@ static inline int ubifs_zn_dirty(const struct ubifs_znode *znode) return !!test_bit(DIRTY_ZNODE, &znode->flags); } -/** - * ubifs_zn_obsolete - check if znode is obsolete. - * @znode: znode to check - * - * This helper function returns %1 if @znode is obsolete and %0 otherwise. - */ -static inline int ubifs_zn_obsolete(const struct ubifs_znode *znode) -{ - return !!test_bit(OBSOLETE_ZNODE, &znode->flags); -} - -/** - * ubifs_zn_cow - check if znode has to be copied on write. - * @znode: znode to check - * - * This helper function returns %1 if @znode is has COW flag set and %0 - * otherwise. - */ -static inline int ubifs_zn_cow(const struct ubifs_znode *znode) -{ - return !!test_bit(COW_ZNODE, &znode->flags); -} - /** * ubifs_wake_up_bgt - wake up background thread. * @c: UBIFS file-system description object @@ -144,6 +121,86 @@ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) return err; } +/** + * ubifs_leb_unmap - unmap an LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to unmap + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + err = ubi_leb_unmap(c->ubi, lnum); + if (err) { + ubifs_err("unmap LEB %d failed, error %d", lnum, err); + return err; + } + + return 0; +} + +/** + * ubifs_leb_write - write to a LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to write + * @buf: buffer to write from + * @offs: offset within LEB to write to + * @len: length to write + * @dtype: data type + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, + const void *buf, int offs, int len, int dtype) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); + if (err) { + ubifs_err("writing %d bytes at %d:%d, error %d", + len, lnum, offs, err); + return err; + } + + return 0; +} + +/** + * ubifs_leb_change - atomic LEB change. + * @c: UBIFS file-system description object + * @lnum: LEB number to write + * @buf: buffer to write from + * @len: length to write + * @dtype: data type + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, + const void *buf, int len, int dtype) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); + if (err) { + ubifs_err("changing %d bytes in LEB %d, error %d", + len, lnum, err); + return err; + } + + return 0; +} + /** * ubifs_encode_dev - encode device node IDs. * @dev: UBIFS device node information diff --git a/trunk/fs/ubifs/orphan.c b/trunk/fs/ubifs/orphan.c index c542c73cfa3c..a5422fffbd69 100644 --- a/trunk/fs/ubifs/orphan.c +++ b/trunk/fs/ubifs/orphan.c @@ -929,7 +929,7 @@ static int dbg_check_orphans(struct ubifs_info *c) struct check_info ci; int err; - if (!dbg_is_chk_orph(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_ORPH)) return 0; ci.last_ino = 0; diff --git a/trunk/fs/ubifs/recovery.c b/trunk/fs/ubifs/recovery.c index af02790d9328..783d8e0beb76 100644 --- a/trunk/fs/ubifs/recovery.c +++ b/trunk/fs/ubifs/recovery.c @@ -117,7 +117,7 @@ static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, if (!sbuf) return -ENOMEM; - err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0); + err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); if (err && err != -EBADMSG) goto out_free; @@ -213,10 +213,10 @@ static int write_rcvrd_mst_node(struct ubifs_info *c, mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY); ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); - err = ubifs_leb_change(c, lnum, mst, sz, UBI_SHORTTERM); + err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); if (err) goto out; - err = ubifs_leb_change(c, lnum + 1, mst, sz, UBI_SHORTTERM); + err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); if (err) goto out; out: @@ -274,8 +274,7 @@ int ubifs_recover_master_node(struct ubifs_info *c) if (cor1) goto out_err; mst = mst1; - } else if (offs1 == 0 && - c->leb_size - offs2 - sz < sz) { + } else if (offs1 == 0 && offs2 + sz >= c->leb_size) { /* 1st LEB was unmapped and written, 2nd not */ if (cor1) goto out_err; @@ -540,8 +539,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, int len = ALIGN(endpt, c->min_io_size); if (start) { - err = ubifs_leb_read(c, lnum, sleb->buf, 0, - start, 1); + err = ubi_read(c->ubi, lnum, sleb->buf, 0, + start); if (err) return err; } @@ -555,8 +554,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ubifs_pad(c, buf, pad_len); } } - err = ubifs_leb_change(c, lnum, sleb->buf, len, - UBI_UNKNOWN); + err = ubi_leb_change(c->ubi, lnum, sleb->buf, len, + UBI_UNKNOWN); if (err) return err; } @@ -820,8 +819,7 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, return -ENOMEM; if (c->leb_size - offs < UBIFS_CS_NODE_SZ) goto out_err; - err = ubifs_leb_read(c, lnum, (void *)cs_node, offs, - UBIFS_CS_NODE_SZ, 0); + err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); if (err && err != -EBADMSG) goto out_free; ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); @@ -921,7 +919,8 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, * * This function returns %0 on success and a negative error code on failure. */ -static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf) +static int recover_head(const struct ubifs_info *c, int lnum, int offs, + void *sbuf) { int len = c->max_write_size, err; @@ -932,15 +931,15 @@ static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf) return 0; /* Read at the head location and check it is empty flash */ - err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1); + err = ubi_read(c->ubi, lnum, sbuf, offs, len); if (err || !is_empty(sbuf, len)) { dbg_rcvry("cleaning head at %d:%d", lnum, offs); if (offs == 0) return ubifs_leb_unmap(c, lnum); - err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1); + err = ubi_read(c->ubi, lnum, sbuf, 0, offs); if (err) return err; - return ubifs_leb_change(c, lnum, sbuf, offs, UBI_UNKNOWN); + return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); } return 0; @@ -963,7 +962,7 @@ static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf) * * This function returns %0 on success and a negative error code on failure. */ -int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf) +int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) { int err; @@ -994,7 +993,7 @@ int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf) * * This function returns %0 on success and a negative error code on failure. */ -static int clean_an_unclean_leb(struct ubifs_info *c, +static int clean_an_unclean_leb(const struct ubifs_info *c, struct ubifs_unclean_leb *ucleb, void *sbuf) { int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; @@ -1010,7 +1009,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c, return 0; } - err = ubifs_leb_read(c, lnum, buf, offs, len, 0); + err = ubi_read(c->ubi, lnum, buf, offs, len); if (err && err != -EBADMSG) return err; @@ -1070,7 +1069,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c, } /* Write back the LEB atomically */ - err = ubifs_leb_change(c, lnum, sbuf, len, UBI_UNKNOWN); + err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); if (err) return err; @@ -1090,7 +1089,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c, * * This function returns %0 on success and a negative error code on failure. */ -int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf) +int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) { dbg_rcvry("recovery"); while (!list_empty(&c->unclean_leb_list)) { @@ -1455,7 +1454,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) if (i_size >= e->d_size) return 0; /* Read the LEB */ - err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1); + err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size); if (err) goto out; /* Change the size field and recalculate the CRC */ @@ -1471,7 +1470,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) len -= 1; len = ALIGN(len + 1, c->min_io_size); /* Atomically write the fixed LEB back again */ - err = ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN); + err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); if (err) goto out; dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", diff --git a/trunk/fs/ubifs/replay.c b/trunk/fs/ubifs/replay.c index ccabaf1164b3..5e97161ce4d3 100644 --- a/trunk/fs/ubifs/replay.c +++ b/trunk/fs/ubifs/replay.c @@ -523,7 +523,8 @@ static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) if (!list_is_last(&next->list, &jh->buds_list)) return 0; - err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); + err = ubi_read(c->ubi, next->lnum, (char *)&data, + next->start, 4); if (err) return 0; diff --git a/trunk/fs/ubifs/sb.c b/trunk/fs/ubifs/sb.c index 93d938ad3d2a..c606f010e8df 100644 --- a/trunk/fs/ubifs/sb.c +++ b/trunk/fs/ubifs/sb.c @@ -674,15 +674,15 @@ static int fixup_leb(struct ubifs_info *c, int lnum, int len) if (len == 0) { dbg_mnt("unmap empty LEB %d", lnum); - return ubifs_leb_unmap(c, lnum); + return ubi_leb_unmap(c->ubi, lnum); } dbg_mnt("fixup LEB %d, data len %d", lnum, len); - err = ubifs_leb_read(c, lnum, c->sbuf, 0, len, 1); + err = ubi_read(c->ubi, lnum, c->sbuf, 0, len); if (err) return err; - return ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN); + return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); } /** diff --git a/trunk/fs/ubifs/scan.c b/trunk/fs/ubifs/scan.c index 37383e8011b1..36216b46f772 100644 --- a/trunk/fs/ubifs/scan.c +++ b/trunk/fs/ubifs/scan.c @@ -148,7 +148,7 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, INIT_LIST_HEAD(&sleb->nodes); sleb->buf = sbuf; - err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0); + err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); if (err && err != -EBADMSG) { ubifs_err("cannot read %d bytes from LEB %d:%d," " error %d", c->leb_size - offs, lnum, offs, err); @@ -240,7 +240,7 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, int len; ubifs_err("corruption at LEB %d:%d", lnum, offs); - if (dbg_is_tst_rcvry(c)) + if (dbg_failure_mode) return; len = c->leb_size - offs; if (len > 8192) diff --git a/trunk/fs/ubifs/super.c b/trunk/fs/ubifs/super.c index b28121278d46..529be0582029 100644 --- a/trunk/fs/ubifs/super.c +++ b/trunk/fs/ubifs/super.c @@ -85,7 +85,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) return 4; - if (ui->xattr && !S_ISREG(inode->i_mode)) + if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG) return 5; if (!ubifs_compr_present(ui->compr_type)) { @@ -94,7 +94,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) ubifs_compr_name(ui->compr_type)); } - err = dbg_check_dir(c, inode); + err = dbg_check_dir_size(c, inode); return err; } @@ -914,7 +914,7 @@ static int check_volume_empty(struct ubifs_info *c) c->empty = 1; for (lnum = 0; lnum < c->leb_cnt; lnum++) { - err = ubifs_is_mapped(c, lnum); + err = ubi_is_mapped(c->ubi, lnum); if (unlikely(err < 0)) return err; if (err == 1) { diff --git a/trunk/fs/ubifs/tnc.c b/trunk/fs/ubifs/tnc.c index 066738647685..91b4213dde84 100644 --- a/trunk/fs/ubifs/tnc.c +++ b/trunk/fs/ubifs/tnc.c @@ -223,7 +223,7 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c, __set_bit(DIRTY_ZNODE, &zn->flags); __clear_bit(COW_ZNODE, &zn->flags); - ubifs_assert(!ubifs_zn_obsolete(znode)); + ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); __set_bit(OBSOLETE_ZNODE, &znode->flags); if (znode->level != 0) { @@ -271,7 +271,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, struct ubifs_znode *zn; int err; - if (!ubifs_zn_cow(znode)) { + if (!test_bit(COW_ZNODE, &znode->flags)) { /* znode is not being committed */ if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { atomic_long_inc(&c->dirty_zn_cnt); @@ -462,7 +462,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); - err = ubifs_leb_read(c, lnum, buf, offs, len, 1); + err = ubi_read(c->ubi, lnum, buf, offs, len); if (err) { ubifs_err("cannot read node type %d from LEB %d:%d, error %d", type, lnum, offs, err); @@ -1666,7 +1666,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, if (!overlap) { /* We may safely unlock the write-buffer and read the data */ spin_unlock(&wbuf->lock); - return ubifs_leb_read(c, lnum, buf, offs, len, 0); + return ubi_read(c->ubi, lnum, buf, offs, len); } /* Don't read under wbuf */ @@ -1680,7 +1680,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, if (rlen > 0) /* Read everything that goes before write-buffer */ - return ubifs_leb_read(c, lnum, buf, offs, rlen, 0); + return ubi_read(c->ubi, lnum, buf, offs, rlen); return 0; } @@ -1767,7 +1767,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) if (wbuf) err = read_wbuf(wbuf, bu->buf, len, lnum, offs); else - err = ubifs_leb_read(c, lnum, bu->buf, offs, len, 0); + err = ubi_read(c->ubi, lnum, bu->buf, offs, len); /* Check for a race with GC */ if (maybe_leb_gced(c, lnum, bu->gc_seq)) @@ -2423,7 +2423,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) */ do { - ubifs_assert(!ubifs_zn_obsolete(znode)); + ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); ubifs_assert(ubifs_zn_dirty(znode)); zp = znode->parent; @@ -2479,8 +2479,9 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) c->zroot.offs = zbr->offs; c->zroot.len = zbr->len; c->zroot.znode = znode; - ubifs_assert(!ubifs_zn_obsolete(zp)); - ubifs_assert(ubifs_zn_dirty(zp)); + ubifs_assert(!test_bit(OBSOLETE_ZNODE, + &zp->flags)); + ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags)); atomic_long_dec(&c->dirty_zn_cnt); if (zp->cnext) { @@ -2864,7 +2865,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c) struct ubifs_znode *znode = cnext; cnext = cnext->cnext; - if (ubifs_zn_obsolete(znode)) + if (test_bit(OBSOLETE_ZNODE, &znode->flags)) kfree(znode); } while (cnext && cnext != c->cnext); } @@ -3300,7 +3301,7 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, if (!S_ISREG(inode->i_mode)) return 0; - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; @@ -3336,10 +3337,9 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, ubifs_err("inode %lu has size %lld, but there are data at offset %lld " "(data key %s)", (unsigned long)inode->i_ino, size, ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key)); - mutex_unlock(&c->tnc_mutex); dbg_dump_inode(c, inode); dbg_dump_stack(); - return -EINVAL; + err = -EINVAL; out_unlock: mutex_unlock(&c->tnc_mutex); diff --git a/trunk/fs/ubifs/tnc_commit.c b/trunk/fs/ubifs/tnc_commit.c index 4c15f07a8bb2..41920f357bbf 100644 --- a/trunk/fs/ubifs/tnc_commit.c +++ b/trunk/fs/ubifs/tnc_commit.c @@ -22,7 +22,6 @@ /* This file implements TNC functions for committing */ -#include #include "ubifs.h" /** @@ -88,12 +87,8 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx, atomic_long_dec(&c->dirty_zn_cnt); ubifs_assert(ubifs_zn_dirty(znode)); - ubifs_assert(ubifs_zn_cow(znode)); + ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); - /* - * Note, unlike 'write_index()' we do not add memory barriers here - * because this function is called with @c->tnc_mutex locked. - */ __clear_bit(DIRTY_ZNODE, &znode->flags); __clear_bit(COW_ZNODE, &znode->flags); @@ -382,7 +377,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) c->gap_lebs = NULL; return err; } - if (!dbg_is_chk_index(c)) { + if (dbg_force_in_the_gaps_enabled()) { /* * Do not print scary warnings if the debugging * option which forces in-the-gaps is enabled. @@ -496,6 +491,25 @@ static int layout_in_empty_space(struct ubifs_info *c) else next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + if (c->min_io_size == 1) { + buf_offs += ALIGN(len, 8); + if (next_len) { + if (buf_offs + next_len <= c->leb_size) + continue; + err = ubifs_update_one_lp(c, lnum, 0, + c->leb_size - buf_offs, 0, 0); + if (err) + return err; + lnum = -1; + continue; + } + err = ubifs_update_one_lp(c, lnum, + c->leb_size - buf_offs, 0, 0, 0); + if (err) + return err; + break; + } + /* Update buffer positions */ wlen = used + len; used += ALIGN(len, 8); @@ -644,7 +658,7 @@ static int get_znodes_to_commit(struct ubifs_info *c) } cnt += 1; while (1) { - ubifs_assert(!ubifs_zn_cow(znode)); + ubifs_assert(!test_bit(COW_ZNODE, &znode->flags)); __set_bit(COW_ZNODE, &znode->flags); znode->alt = 0; cnext = find_next_dirty(znode); @@ -690,7 +704,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt) c->ilebs[c->ileb_cnt++] = lnum; dbg_cmt("LEB %d", lnum); } - if (dbg_is_chk_index(c) && !(random32() & 7)) + if (dbg_force_in_the_gaps()) return -ENOSPC; return 0; } @@ -816,7 +830,7 @@ static int write_index(struct ubifs_info *c) struct ubifs_idx_node *idx; struct ubifs_znode *znode, *cnext; int i, lnum, offs, len, next_len, buf_len, buf_offs, used; - int avail, wlen, err, lnum_pos = 0, blen, nxt_offs; + int avail, wlen, err, lnum_pos = 0; cnext = c->enext; if (!cnext) @@ -893,7 +907,7 @@ static int write_index(struct ubifs_info *c) cnext = znode->cnext; ubifs_assert(ubifs_zn_dirty(znode)); - ubifs_assert(ubifs_zn_cow(znode)); + ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); /* * It is important that other threads should see %DIRTY_ZNODE @@ -908,28 +922,6 @@ static int write_index(struct ubifs_info *c) clear_bit(COW_ZNODE, &znode->flags); smp_mb__after_clear_bit(); - /* - * We have marked the znode as clean but have not updated the - * @c->clean_zn_cnt counter. If this znode becomes dirty again - * before 'free_obsolete_znodes()' is called, then - * @c->clean_zn_cnt will be decremented before it gets - * incremented (resulting in 2 decrements for the same znode). - * This means that @c->clean_zn_cnt may become negative for a - * while. - * - * Q: why we cannot increment @c->clean_zn_cnt? - * A: because we do not have the @c->tnc_mutex locked, and the - * following code would be racy and buggy: - * - * if (!ubifs_zn_obsolete(znode)) { - * atomic_long_inc(&c->clean_zn_cnt); - * atomic_long_inc(&ubifs_clean_zn_cnt); - * } - * - * Thus, we just delay the @c->clean_zn_cnt update until we - * have the mutex locked. - */ - /* Do not access znode from this point on */ /* Update buffer positions */ @@ -946,38 +938,65 @@ static int write_index(struct ubifs_info *c) else next_len = ubifs_idx_node_sz(c, cnext->child_cnt); - nxt_offs = buf_offs + used + next_len; - if (next_len && nxt_offs <= c->leb_size) { - if (avail > 0) + if (c->min_io_size == 1) { + /* + * Write the prepared index node immediately if there is + * no minimum IO size + */ + err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, + wlen, UBI_SHORTTERM); + if (err) + return err; + buf_offs += ALIGN(wlen, 8); + if (next_len) { + used = 0; + avail = buf_len; + if (buf_offs + next_len > c->leb_size) { + err = ubifs_update_one_lp(c, lnum, + LPROPS_NC, 0, 0, LPROPS_TAKEN); + if (err) + return err; + lnum = -1; + } continue; - else - blen = buf_len; + } } else { - wlen = ALIGN(wlen, 8); - blen = ALIGN(wlen, c->min_io_size); - ubifs_pad(c, c->cbuf + wlen, blen - wlen); - } - - /* The buffer is full or there are no more znodes to do */ - err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, blen, - UBI_SHORTTERM); - if (err) - return err; - buf_offs += blen; - if (next_len) { - if (nxt_offs > c->leb_size) { - err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, - 0, LPROPS_TAKEN); - if (err) - return err; - lnum = -1; + int blen, nxt_offs = buf_offs + used + next_len; + + if (next_len && nxt_offs <= c->leb_size) { + if (avail > 0) + continue; + else + blen = buf_len; + } else { + wlen = ALIGN(wlen, 8); + blen = ALIGN(wlen, c->min_io_size); + ubifs_pad(c, c->cbuf + wlen, blen - wlen); + } + /* + * The buffer is full or there are no more znodes + * to do + */ + err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, + blen, UBI_SHORTTERM); + if (err) + return err; + buf_offs += blen; + if (next_len) { + if (nxt_offs > c->leb_size) { + err = ubifs_update_one_lp(c, lnum, + LPROPS_NC, 0, 0, LPROPS_TAKEN); + if (err) + return err; + lnum = -1; + } + used -= blen; + if (used < 0) + used = 0; + avail = buf_len - used; + memmove(c->cbuf, c->cbuf + blen, used); + continue; } - used -= blen; - if (used < 0) - used = 0; - avail = buf_len - used; - memmove(c->cbuf, c->cbuf + blen, used); - continue; } break; } @@ -1010,7 +1029,7 @@ static void free_obsolete_znodes(struct ubifs_info *c) do { znode = cnext; cnext = znode->cnext; - if (ubifs_zn_obsolete(znode)) + if (test_bit(OBSOLETE_ZNODE, &znode->flags)) kfree(znode); else { znode->cnext = NULL; diff --git a/trunk/fs/ubifs/ubifs.h b/trunk/fs/ubifs/ubifs.h index 702b79258e30..f79983d6f860 100644 --- a/trunk/fs/ubifs/ubifs.h +++ b/trunk/fs/ubifs/ubifs.h @@ -230,14 +230,14 @@ enum { * LPT cnode flag bits. * * DIRTY_CNODE: cnode is dirty - * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), - * so it can (and must) be freed when the commit is finished * COW_CNODE: cnode is being committed and must be copied before writing + * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), + * so it can (and must) be freed when the commit is finished */ enum { DIRTY_CNODE = 0, - OBSOLETE_CNODE = 1, - COW_CNODE = 2, + COW_CNODE = 1, + OBSOLETE_CNODE = 2, }; /* @@ -1468,15 +1468,6 @@ extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; /* io.c */ void ubifs_ro_mode(struct ubifs_info *c, int err); -int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, - int len, int even_ebadmsg); -int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, - int len, int dtype); -int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, - int dtype); -int ubifs_leb_unmap(struct ubifs_info *c, int lnum); -int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype); -int ubifs_is_mapped(const struct ubifs_info *c, int lnum); int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, int dtype); @@ -1756,8 +1747,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf, int jhead); struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf); -int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf); -int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf); +int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); +int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); int ubifs_rcvry_gc_commit(struct ubifs_info *c); int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, int deletion, loff_t new_size); diff --git a/trunk/fs/xfs/Makefile b/trunk/fs/xfs/Makefile index 75bb316529dd..284a7c89697e 100644 --- a/trunk/fs/xfs/Makefile +++ b/trunk/fs/xfs/Makefile @@ -88,6 +88,8 @@ xfs-y += xfs_alloc.o \ xfs_vnodeops.o \ xfs_rw.o +xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o + # Objects in linux/ xfs-y += $(addprefix $(XFS_LINUX)/, \ kmem.o \ diff --git a/trunk/fs/xfs/linux-2.6/xfs_acl.c b/trunk/fs/xfs/linux-2.6/xfs_acl.c index 115ac6919533..39f4f809bb68 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_acl.c +++ b/trunk/fs/xfs/linux-2.6/xfs_acl.c @@ -264,7 +264,7 @@ xfs_set_mode(struct inode *inode, mode_t mode) iattr.ia_mode = mode; iattr.ia_ctime = current_fs_time(inode->i_sb); - error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); + error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); } return error; diff --git a/trunk/fs/xfs/linux-2.6/xfs_aops.c b/trunk/fs/xfs/linux-2.6/xfs_aops.c index 26384fe3f26d..79ce38be15a1 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_aops.c +++ b/trunk/fs/xfs/linux-2.6/xfs_aops.c @@ -181,7 +181,6 @@ xfs_setfilesize( isize = xfs_ioend_new_eof(ioend); if (isize) { - trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); ip->i_d.di_size = isize; xfs_mark_inode_dirty(ip); } @@ -895,6 +894,11 @@ xfs_aops_discard_page( * For unwritten space on the page we need to start the conversion to * regular allocated space. * For any other dirty buffer heads on the page we should flush them. + * + * If we detect that a transaction would be required to flush the page, we + * have to check the process flags first, if we are already in a transaction + * or disk I/O during allocations is off, we need to fail the writepage and + * redirty the page. */ STATIC int xfs_vm_writepage( @@ -902,6 +906,7 @@ xfs_vm_writepage( struct writeback_control *wbc) { struct inode *inode = page->mapping->host; + int delalloc, unwritten; struct buffer_head *bh, *head; struct xfs_bmbt_irec imap; xfs_ioend_t *ioend = NULL, *iohead = NULL; @@ -933,10 +938,15 @@ xfs_vm_writepage( goto redirty; /* - * Given that we do not allow direct reclaim to call us, we should - * never be called while in a filesystem transaction. + * We need a transaction if there are delalloc or unwritten buffers + * on the page. + * + * If we need a transaction and the process flags say we are already + * in a transaction, or no IO is allowed then mark the page dirty + * again and leave the page as is. */ - if (WARN_ON(current->flags & PF_FSTRANS)) + xfs_count_page_state(page, &delalloc, &unwritten); + if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) goto redirty; /* Is this page beyond the end of the file? */ @@ -960,7 +970,7 @@ xfs_vm_writepage( offset = page_offset(page); type = IO_OVERWRITE; - if (wbc->sync_mode == WB_SYNC_NONE) + if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) nonblocking = 1; do { diff --git a/trunk/fs/xfs/linux-2.6/xfs_buf.c b/trunk/fs/xfs/linux-2.6/xfs_buf.c index b2b411985591..5e68099db2a5 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_buf.c +++ b/trunk/fs/xfs/linux-2.6/xfs_buf.c @@ -499,14 +499,16 @@ _xfs_buf_find( spin_unlock(&pag->pag_buf_lock); xfs_perag_put(pag); - if (!xfs_buf_trylock(bp)) { - if (flags & XBF_TRYLOCK) { + if (xfs_buf_cond_lock(bp)) { + /* failed, so wait for the lock if requested. */ + if (!(flags & XBF_TRYLOCK)) { + xfs_buf_lock(bp); + XFS_STATS_INC(xb_get_locked_waited); + } else { xfs_buf_rele(bp); XFS_STATS_INC(xb_busy_locked); return NULL; } - xfs_buf_lock(bp); - XFS_STATS_INC(xb_get_locked_waited); } /* @@ -592,8 +594,10 @@ _xfs_buf_read( ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); - bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD); - bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); + bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ + XBF_READ_AHEAD | _XBF_RUN_QUEUES); + bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \ + XBF_READ_AHEAD | _XBF_RUN_QUEUES); status = xfs_buf_iorequest(bp); if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC)) @@ -677,6 +681,7 @@ xfs_buf_read_uncached( return NULL; /* set up the buffer for a read IO */ + xfs_buf_lock(bp); XFS_BUF_SET_ADDR(bp, daddr); XFS_BUF_READ(bp); XFS_BUF_BUSY(bp); @@ -811,6 +816,8 @@ xfs_buf_get_uncached( goto fail_free_mem; } + xfs_buf_unlock(bp); + trace_xfs_buf_get_uncached(bp, _RET_IP_); return bp; @@ -889,8 +896,8 @@ xfs_buf_rele( * to push on stale inode buffers. */ int -xfs_buf_trylock( - struct xfs_buf *bp) +xfs_buf_cond_lock( + xfs_buf_t *bp) { int locked; @@ -900,8 +907,15 @@ xfs_buf_trylock( else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) xfs_log_force(bp->b_target->bt_mount, 0); - trace_xfs_buf_trylock(bp, _RET_IP_); - return locked; + trace_xfs_buf_cond_lock(bp, _RET_IP_); + return locked ? 0 : -EBUSY; +} + +int +xfs_buf_lock_value( + xfs_buf_t *bp) +{ + return bp->b_sema.count; } /* @@ -915,7 +929,7 @@ xfs_buf_trylock( */ void xfs_buf_lock( - struct xfs_buf *bp) + xfs_buf_t *bp) { trace_xfs_buf_lock(bp, _RET_IP_); @@ -936,7 +950,7 @@ xfs_buf_lock( */ void xfs_buf_unlock( - struct xfs_buf *bp) + xfs_buf_t *bp) { if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) { atomic_inc(&bp->b_hold); @@ -1107,7 +1121,7 @@ xfs_bioerror_relse( XFS_BUF_UNDELAYWRITE(bp); XFS_BUF_DONE(bp); XFS_BUF_STALE(bp); - bp->b_iodone = NULL; + XFS_BUF_CLR_IODONE_FUNC(bp); if (!(fl & XBF_ASYNC)) { /* * Mark b_error and B_ERROR _both_. @@ -1209,21 +1223,23 @@ _xfs_buf_ioapply( total_nr_pages = bp->b_page_count; map_i = 0; - if (bp->b_flags & XBF_WRITE) { - if (bp->b_flags & XBF_SYNCIO) - rw = WRITE_SYNC; - else - rw = WRITE; - if (bp->b_flags & XBF_FUA) - rw |= REQ_FUA; - if (bp->b_flags & XBF_FLUSH) - rw |= REQ_FLUSH; - } else if (bp->b_flags & XBF_READ_AHEAD) { - rw = READA; + if (bp->b_flags & XBF_ORDERED) { + ASSERT(!(bp->b_flags & XBF_READ)); + rw = WRITE_FLUSH_FUA; + } else if (bp->b_flags & XBF_LOG_BUFFER) { + ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); + bp->b_flags &= ~_XBF_RUN_QUEUES; + rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; + } else if (bp->b_flags & _XBF_RUN_QUEUES) { + ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); + bp->b_flags &= ~_XBF_RUN_QUEUES; + rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META; } else { - rw = READ; + rw = (bp->b_flags & XBF_WRITE) ? WRITE : + (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; } + next_chunk: atomic_inc(&bp->b_io_remaining); nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); @@ -1678,14 +1694,15 @@ xfs_buf_delwri_split( list_for_each_entry_safe(bp, n, dwq, b_list) { ASSERT(bp->b_flags & XBF_DELWRI); - if (!XFS_BUF_ISPINNED(bp) && xfs_buf_trylock(bp)) { + if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) { if (!force && time_before(jiffies, bp->b_queuetime + age)) { xfs_buf_unlock(bp); break; } - bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q); + bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q| + _XBF_RUN_QUEUES); bp->b_flags |= XBF_WRITE; list_move_tail(&bp->b_list, list); trace_xfs_buf_delwri_split(bp, _RET_IP_); @@ -1721,6 +1738,14 @@ xfs_buf_cmp( return 0; } +void +xfs_buf_delwri_sort( + xfs_buftarg_t *target, + struct list_head *list) +{ + list_sort(NULL, list, xfs_buf_cmp); +} + STATIC int xfsbufd( void *data) diff --git a/trunk/fs/xfs/linux-2.6/xfs_buf.h b/trunk/fs/xfs/linux-2.6/xfs_buf.h index 6a83b46b4bcf..50a7d5fb3b73 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_buf.h +++ b/trunk/fs/xfs/linux-2.6/xfs_buf.h @@ -46,46 +46,43 @@ typedef enum { #define XBF_READ (1 << 0) /* buffer intended for reading from device */ #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ -#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ -#define XBF_MAPPED (1 << 3) /* buffer mapped (b_addr valid) */ +#define XBF_MAPPED (1 << 2) /* buffer mapped (b_addr valid) */ #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ #define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ #define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ - -/* I/O hints for the BIO layer */ -#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ -#define XBF_FUA (1 << 11)/* force cache write through mode */ -#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ +#define XBF_ORDERED (1 << 11)/* use ordered writes */ +#define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */ +#define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */ /* flags used only as arguments to access routines */ -#define XBF_LOCK (1 << 15)/* lock requested */ -#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ -#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */ +#define XBF_LOCK (1 << 14)/* lock requested */ +#define XBF_TRYLOCK (1 << 15)/* lock requested, but do not wait */ +#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ /* flags used only internally */ -#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ -#define _XBF_KMEM (1 << 21)/* backed by heap memory */ -#define _XBF_DELWRI_Q (1 << 22)/* buffer on delwri queue */ +#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ +#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ +#define _XBF_KMEM (1 << 20)/* backed by heap memory */ +#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ typedef unsigned int xfs_buf_flags_t; #define XFS_BUF_FLAGS \ { XBF_READ, "READ" }, \ { XBF_WRITE, "WRITE" }, \ - { XBF_READ_AHEAD, "READ_AHEAD" }, \ { XBF_MAPPED, "MAPPED" }, \ { XBF_ASYNC, "ASYNC" }, \ { XBF_DONE, "DONE" }, \ { XBF_DELWRI, "DELWRI" }, \ { XBF_STALE, "STALE" }, \ - { XBF_SYNCIO, "SYNCIO" }, \ - { XBF_FUA, "FUA" }, \ - { XBF_FLUSH, "FLUSH" }, \ + { XBF_ORDERED, "ORDERED" }, \ + { XBF_READ_AHEAD, "READ_AHEAD" }, \ { XBF_LOCK, "LOCK" }, /* should never be set */\ { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ { _XBF_PAGES, "PAGES" }, \ + { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" } @@ -94,6 +91,11 @@ typedef enum { XBT_FORCE_FLUSH = 1, } xfs_buftarg_flags_t; +typedef struct xfs_bufhash { + struct list_head bh_list; + spinlock_t bh_lock; +} xfs_bufhash_t; + typedef struct xfs_buftarg { dev_t bt_dev; struct block_device *bt_bdev; @@ -149,7 +151,7 @@ typedef struct xfs_buf { xfs_buf_iodone_t b_iodone; /* I/O completion function */ struct completion b_iowait; /* queue for I/O waiters */ void *b_fspriv; - struct xfs_trans *b_transp; + void *b_fspriv2; struct page **b_pages; /* array of page pointers */ struct page *b_page_array[XB_PAGES]; /* inline pages */ unsigned long b_queuetime; /* time buffer was queued */ @@ -190,11 +192,10 @@ extern void xfs_buf_free(xfs_buf_t *); extern void xfs_buf_rele(xfs_buf_t *); /* Locking and Unlocking Buffers */ -extern int xfs_buf_trylock(xfs_buf_t *); +extern int xfs_buf_cond_lock(xfs_buf_t *); +extern int xfs_buf_lock_value(xfs_buf_t *); extern void xfs_buf_lock(xfs_buf_t *); extern void xfs_buf_unlock(xfs_buf_t *); -#define xfs_buf_islocked(bp) \ - ((bp)->b_sema.count <= 0) /* Buffer Read and Write Routines */ extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); @@ -233,9 +234,8 @@ extern void xfs_buf_terminate(void); #define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) -#define XFS_BUF_ZEROFLAGS(bp) \ - ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \ - XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) +#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ + ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_STALE(bp) xfs_buf_stale(bp); @@ -267,6 +267,10 @@ void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) #define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) +#define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED) +#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED) +#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED) + #define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) #define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) #define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) @@ -276,6 +280,14 @@ void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) #define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) +#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone) +#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func)) +#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL) + +#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv) +#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) +#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) +#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) #define XFS_BUF_SET_START(bp) do { } while (0) #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) @@ -301,6 +313,10 @@ xfs_buf_set_ref( #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) +#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp) +#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) +#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp) +#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp) #define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); #define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) diff --git a/trunk/fs/xfs/linux-2.6/xfs_export.c b/trunk/fs/xfs/linux-2.6/xfs_export.c index 75e5d322e48f..f4f878fc0083 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_export.c +++ b/trunk/fs/xfs/linux-2.6/xfs_export.c @@ -151,14 +151,14 @@ xfs_nfs_get_inode( * We don't use ESTALE directly down the chain to not * confuse applications using bulkstat that expect EINVAL. */ - if (error == EINVAL || error == ENOENT) + if (error == EINVAL) error = ESTALE; return ERR_PTR(-error); } if (ip->i_d.di_gen != generation) { IRELE(ip); - return ERR_PTR(-ESTALE); + return ERR_PTR(-ENOENT); } return VFS_I(ip); diff --git a/trunk/fs/xfs/linux-2.6/xfs_file.c b/trunk/fs/xfs/linux-2.6/xfs_file.c index 8073f61efb8e..7f782af286bf 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_file.c +++ b/trunk/fs/xfs/linux-2.6/xfs_file.c @@ -944,7 +944,7 @@ xfs_file_fallocate( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; - error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK); + error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); } out_unlock: diff --git a/trunk/fs/xfs/linux-2.6/xfs_iops.c b/trunk/fs/xfs/linux-2.6/xfs_iops.c index 501e4f630548..d44d92cd12b1 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_iops.c +++ b/trunk/fs/xfs/linux-2.6/xfs_iops.c @@ -39,7 +39,6 @@ #include "xfs_buf_item.h" #include "xfs_utils.h" #include "xfs_vnodeops.h" -#include "xfs_inode_item.h" #include "xfs_trace.h" #include @@ -498,442 +497,12 @@ xfs_vn_getattr( return 0; } -int -xfs_setattr_nonsize( - struct xfs_inode *ip, - struct iattr *iattr, - int flags) -{ - xfs_mount_t *mp = ip->i_mount; - struct inode *inode = VFS_I(ip); - int mask = iattr->ia_valid; - xfs_trans_t *tp; - int error; - uid_t uid = 0, iuid = 0; - gid_t gid = 0, igid = 0; - struct xfs_dquot *udqp = NULL, *gdqp = NULL; - struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL; - - trace_xfs_setattr(ip); - - if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); - - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - error = -inode_change_ok(inode, iattr); - if (error) - return XFS_ERROR(error); - - ASSERT((mask & ATTR_SIZE) == 0); - - /* - * If disk quotas is on, we make sure that the dquots do exist on disk, - * before we start any other transactions. Trying to do this later - * is messy. We don't care to take a readlock to look at the ids - * in inode here, because we can't hold it across the trans_reserve. - * If the IDs do change before we take the ilock, we're covered - * because the i_*dquot fields will get updated anyway. - */ - if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { - uint qflags = 0; - - if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { - uid = iattr->ia_uid; - qflags |= XFS_QMOPT_UQUOTA; - } else { - uid = ip->i_d.di_uid; - } - if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { - gid = iattr->ia_gid; - qflags |= XFS_QMOPT_GQUOTA; - } else { - gid = ip->i_d.di_gid; - } - - /* - * We take a reference when we initialize udqp and gdqp, - * so it is important that we never blindly double trip on - * the same variable. See xfs_create() for an example. - */ - ASSERT(udqp == NULL); - ASSERT(gdqp == NULL); - error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), - qflags, &udqp, &gdqp); - if (error) - return error; - } - - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); - error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); - if (error) - goto out_dqrele; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - /* - * Change file ownership. Must be the owner or privileged. - */ - if (mask & (ATTR_UID|ATTR_GID)) { - /* - * These IDs could have changed since we last looked at them. - * But, we're assured that if the ownership did change - * while we didn't have the inode locked, inode's dquot(s) - * would have changed also. - */ - iuid = ip->i_d.di_uid; - igid = ip->i_d.di_gid; - gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; - uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; - - /* - * Do a quota reservation only if uid/gid is actually - * going to change. - */ - if (XFS_IS_QUOTA_RUNNING(mp) && - ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || - (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { - ASSERT(tp); - error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, - capable(CAP_FOWNER) ? - XFS_QMOPT_FORCE_RES : 0); - if (error) /* out of quota */ - goto out_trans_cancel; - } - } - - xfs_trans_ijoin(tp, ip); - - /* - * Change file ownership. Must be the owner or privileged. - */ - if (mask & (ATTR_UID|ATTR_GID)) { - /* - * CAP_FSETID overrides the following restrictions: - * - * The set-user-ID and set-group-ID bits of a file will be - * cleared upon successful return from chown() - */ - if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && - !capable(CAP_FSETID)) - ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); - - /* - * Change the ownerships and register quota modifications - * in the transaction. - */ - if (iuid != uid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { - ASSERT(mask & ATTR_UID); - ASSERT(udqp); - olddquot1 = xfs_qm_vop_chown(tp, ip, - &ip->i_udquot, udqp); - } - ip->i_d.di_uid = uid; - inode->i_uid = uid; - } - if (igid != gid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { - ASSERT(!XFS_IS_PQUOTA_ON(mp)); - ASSERT(mask & ATTR_GID); - ASSERT(gdqp); - olddquot2 = xfs_qm_vop_chown(tp, ip, - &ip->i_gdquot, gdqp); - } - ip->i_d.di_gid = gid; - inode->i_gid = gid; - } - } - - /* - * Change file access modes. - */ - if (mask & ATTR_MODE) { - umode_t mode = iattr->ia_mode; - - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - mode &= ~S_ISGID; - - ip->i_d.di_mode &= S_IFMT; - ip->i_d.di_mode |= mode & ~S_IFMT; - - inode->i_mode &= S_IFMT; - inode->i_mode |= mode & ~S_IFMT; - } - - /* - * Change file access or modified times. - */ - if (mask & ATTR_ATIME) { - inode->i_atime = iattr->ia_atime; - ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; - ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; - ip->i_update_core = 1; - } - if (mask & ATTR_CTIME) { - inode->i_ctime = iattr->ia_ctime; - ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; - ip->i_update_core = 1; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - ip->i_update_core = 1; - } - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - XFS_STATS_INC(xs_ig_attrchg); - - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - /* - * Release any dquot(s) the inode had kept before chown. - */ - xfs_qm_dqrele(olddquot1); - xfs_qm_dqrele(olddquot2); - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); - - if (error) - return XFS_ERROR(error); - - /* - * XXX(hch): Updating the ACL entries is not atomic vs the i_mode - * update. We could avoid this with linked transactions - * and passing down the transaction pointer all the way - * to attr_set. No previous user of the generic - * Posix ACL code seems to care about this issue either. - */ - if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { - error = -xfs_acl_chmod(inode); - if (error) - return XFS_ERROR(error); - } - - return 0; - -out_trans_cancel: - xfs_trans_cancel(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); -out_dqrele: - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); - return error; -} - -/* - * Truncate file. Must have write permission and not be a directory. - */ -int -xfs_setattr_size( - struct xfs_inode *ip, - struct iattr *iattr, - int flags) -{ - struct xfs_mount *mp = ip->i_mount; - struct inode *inode = VFS_I(ip); - int mask = iattr->ia_valid; - struct xfs_trans *tp; - int error; - uint lock_flags; - uint commit_flags = 0; - - trace_xfs_setattr(ip); - - if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); - - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - error = -inode_change_ok(inode, iattr); - if (error) - return XFS_ERROR(error); - - ASSERT(S_ISREG(ip->i_d.di_mode)); - ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| - ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| - ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); - - lock_flags = XFS_ILOCK_EXCL; - if (!(flags & XFS_ATTR_NOLOCK)) - lock_flags |= XFS_IOLOCK_EXCL; - xfs_ilock(ip, lock_flags); - - /* - * Short circuit the truncate case for zero length files. - */ - if (iattr->ia_size == 0 && - ip->i_size == 0 && ip->i_d.di_nextents == 0) { - if (!(mask & (ATTR_CTIME|ATTR_MTIME))) - goto out_unlock; - - /* - * Use the regular setattr path to update the timestamps. - */ - xfs_iunlock(ip, lock_flags); - iattr->ia_valid &= ~ATTR_SIZE; - return xfs_setattr_nonsize(ip, iattr, 0); - } - - /* - * Make sure that the dquots are attached to the inode. - */ - error = xfs_qm_dqattach_locked(ip, 0); - if (error) - goto out_unlock; - - /* - * Now we can make the changes. Before we join the inode to the - * transaction, take care of the part of the truncation that must be - * done without the inode lock. This needs to be done before joining - * the inode to the transaction, because the inode cannot be unlocked - * once it is a part of the transaction. - */ - if (iattr->ia_size > ip->i_size) { - /* - * Do the first part of growing a file: zero any data in the - * last block that is beyond the old EOF. We need to do this - * before the inode is joined to the transaction to modify - * i_size. - */ - error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); - if (error) - goto out_unlock; - } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - lock_flags &= ~XFS_ILOCK_EXCL; - - /* - * We are going to log the inode size change in this transaction so - * any previous writes that are beyond the on disk EOF and the new - * EOF that have not been written out need to be written here. If we - * do not write the data out, we expose ourselves to the null files - * problem. - * - * Only flush from the on disk size to the smaller of the in memory - * file size or the new size as that's the range we really care about - * here and prevents waiting for other data not within the range we - * care about here. - */ - if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { - error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, - XBF_ASYNC, FI_NONE); - if (error) - goto out_unlock; - } - - /* - * Wait for all I/O to complete. - */ - xfs_ioend_wait(ip); - - error = -block_truncate_page(inode->i_mapping, iattr->ia_size, - xfs_get_blocks); - if (error) - goto out_unlock; - - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); - error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_ITRUNCATE_LOG_COUNT); - if (error) - goto out_trans_cancel; - - truncate_setsize(inode, iattr->ia_size); - - commit_flags = XFS_TRANS_RELEASE_LOG_RES; - lock_flags |= XFS_ILOCK_EXCL; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip); - - /* - * Only change the c/mtime if we are changing the size or we are - * explicitly asked to change it. This handles the semantic difference - * between truncate() and ftruncate() as implemented in the VFS. - * - * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a - * special case where we need to update the times despite not having - * these flags set. For all other operations the VFS set these flags - * explicitly if it wants a timestamp update. - */ - if (iattr->ia_size != ip->i_size && - (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { - iattr->ia_ctime = iattr->ia_mtime = - current_fs_time(inode->i_sb); - mask |= ATTR_CTIME | ATTR_MTIME; - } - - if (iattr->ia_size > ip->i_size) { - ip->i_d.di_size = iattr->ia_size; - ip->i_size = iattr->ia_size; - } else if (iattr->ia_size <= ip->i_size || - (iattr->ia_size == 0 && ip->i_d.di_nextents)) { - error = xfs_itruncate_data(&tp, ip, iattr->ia_size); - if (error) - goto out_trans_abort; - - /* - * Truncated "down", so we're removing references to old data - * here - if we delay flushing for a long time, we expose - * ourselves unduly to the notorious NULL files problem. So, - * we mark this inode and flush it when the file is closed, - * and do not wait the usual (long) time for writeout. - */ - xfs_iflags_set(ip, XFS_ITRUNCATED); - } - - if (mask & ATTR_CTIME) { - inode->i_ctime = iattr->ia_ctime; - ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; - ip->i_update_core = 1; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - ip->i_update_core = 1; - } - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - XFS_STATS_INC(xs_ig_attrchg); - - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(tp); - - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); -out_unlock: - if (lock_flags) - xfs_iunlock(ip, lock_flags); - return error; - -out_trans_abort: - commit_flags |= XFS_TRANS_ABORT; -out_trans_cancel: - xfs_trans_cancel(tp, commit_flags); - goto out_unlock; -} - STATIC int xfs_vn_setattr( struct dentry *dentry, struct iattr *iattr) { - if (iattr->ia_valid & ATTR_SIZE) - return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0); - return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0); + return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); } #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) diff --git a/trunk/fs/xfs/linux-2.6/xfs_linux.h b/trunk/fs/xfs/linux-2.6/xfs_linux.h index d42f814e4d35..8633521b3b2e 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_linux.h +++ b/trunk/fs/xfs/linux-2.6/xfs_linux.h @@ -33,6 +33,7 @@ #endif #include +#include #include #include @@ -87,12 +88,6 @@ #include #include -#ifdef __BIG_ENDIAN -#define XFS_NATIVE_HOST 1 -#else -#undef XFS_NATIVE_HOST -#endif - /* * Feature macros (disable/enable) */ diff --git a/trunk/fs/xfs/linux-2.6/xfs_super.c b/trunk/fs/xfs/linux-2.6/xfs_super.c index 25fd2cd6c8b0..a1a881e68a9a 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_super.c +++ b/trunk/fs/xfs/linux-2.6/xfs_super.c @@ -33,6 +33,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" @@ -1411,35 +1412,37 @@ xfs_fs_fill_super( sb->s_time_gran = 1; set_posix_acl_flag(sb); - xfs_inode_shrinker_register(mp); - - error = xfs_mountfs(mp); + error = xfs_syncd_init(mp); if (error) goto out_filestream_unmount; - error = xfs_syncd_init(mp); + xfs_inode_shrinker_register(mp); + + error = xfs_mountfs(mp); if (error) - goto out_unmount; + goto out_syncd_stop; root = igrab(VFS_I(mp->m_rootip)); if (!root) { error = ENOENT; - goto out_syncd_stop; + goto fail_unmount; } if (is_bad_inode(root)) { error = EINVAL; - goto out_syncd_stop; + goto fail_vnrele; } sb->s_root = d_alloc_root(root); if (!sb->s_root) { error = ENOMEM; - goto out_iput; + goto fail_vnrele; } return 0; - out_filestream_unmount: + out_syncd_stop: xfs_inode_shrinker_unregister(mp); + xfs_syncd_stop(mp); + out_filestream_unmount: xfs_filestream_unmount(mp); out_free_sb: xfs_freesb(mp); @@ -1453,12 +1456,17 @@ xfs_fs_fill_super( out: return -error; - out_iput: - iput(root); - out_syncd_stop: - xfs_syncd_stop(mp); - out_unmount: + fail_vnrele: + if (sb->s_root) { + dput(sb->s_root); + sb->s_root = NULL; + } else { + iput(root); + } + + fail_unmount: xfs_inode_shrinker_unregister(mp); + xfs_syncd_stop(mp); /* * Blow away any referenced inode in the filestreams cache. diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.c b/trunk/fs/xfs/linux-2.6/xfs_sync.c index 5cc158e52d4c..8ecad5ff9f9b 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_sync.c +++ b/trunk/fs/xfs/linux-2.6/xfs_sync.c @@ -359,11 +359,13 @@ xfs_quiesce_data( { int error, error2 = 0; + /* push non-blocking */ + xfs_sync_data(mp, 0); xfs_qm_sync(mp, SYNC_TRYLOCK); - xfs_qm_sync(mp, SYNC_WAIT); - /* force out the newly dirtied log buffers */ - xfs_log_force(mp, XFS_LOG_SYNC); + /* push and block till complete */ + xfs_sync_data(mp, SYNC_WAIT); + xfs_qm_sync(mp, SYNC_WAIT); /* write superblock and hoover up shutdown errors */ error = xfs_sync_fsdata(mp); @@ -434,7 +436,7 @@ xfs_quiesce_attr( WARN_ON(atomic_read(&mp->m_active_trans) != 0); /* Push the superblock and write an unmount record */ - error = xfs_log_sbcount(mp); + error = xfs_log_sbcount(mp, 1); if (error) xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " "Frozen image may not be consistent."); diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.h b/trunk/fs/xfs/linux-2.6/xfs_sync.h index e914fd621746..e3a6ad27415f 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_sync.h +++ b/trunk/fs/xfs/linux-2.6/xfs_sync.h @@ -21,6 +21,14 @@ struct xfs_mount; struct xfs_perag; +typedef struct xfs_sync_work { + struct list_head w_list; + struct xfs_mount *w_mount; + void *w_data; /* syncer routine argument */ + void (*w_syncer)(struct xfs_mount *, void *); + struct completion *w_completion; +} xfs_sync_work_t; + #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ diff --git a/trunk/fs/xfs/linux-2.6/xfs_trace.h b/trunk/fs/xfs/linux-2.6/xfs_trace.h index fda0708ef2ea..d48b7a579ae1 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_trace.h +++ b/trunk/fs/xfs/linux-2.6/xfs_trace.h @@ -293,7 +293,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class, __entry->buffer_length = bp->b_buffer_length; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); - __entry->lockval = bp->b_sema.count; + __entry->lockval = xfs_buf_lock_value(bp); __entry->flags = bp->b_flags; __entry->caller_ip = caller_ip; ), @@ -323,7 +323,7 @@ DEFINE_BUF_EVENT(xfs_buf_bawrite); DEFINE_BUF_EVENT(xfs_buf_bdwrite); DEFINE_BUF_EVENT(xfs_buf_lock); DEFINE_BUF_EVENT(xfs_buf_lock_done); -DEFINE_BUF_EVENT(xfs_buf_trylock); +DEFINE_BUF_EVENT(xfs_buf_cond_lock); DEFINE_BUF_EVENT(xfs_buf_unlock); DEFINE_BUF_EVENT(xfs_buf_iowait); DEFINE_BUF_EVENT(xfs_buf_iowait_done); @@ -366,7 +366,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class, __entry->flags = flags; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); - __entry->lockval = bp->b_sema.count; + __entry->lockval = xfs_buf_lock_value(bp); __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " @@ -409,7 +409,7 @@ TRACE_EVENT(xfs_buf_ioerror, __entry->buffer_length = bp->b_buffer_length; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); - __entry->lockval = bp->b_sema.count; + __entry->lockval = xfs_buf_lock_value(bp); __entry->error = error; __entry->flags = bp->b_flags; __entry->caller_ip = caller_ip; @@ -454,7 +454,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class, __entry->buf_flags = bip->bli_buf->b_flags; __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); - __entry->buf_lockval = bip->bli_buf->b_sema.count; + __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf); __entry->li_desc = bip->bli_item.li_desc; __entry->li_flags = bip->bli_item.li_flags; ), @@ -998,8 +998,7 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) - __field(loff_t, isize) - __field(loff_t, disize) + __field(loff_t, size) __field(loff_t, new_size) __field(loff_t, offset) __field(size_t, count) @@ -1007,18 +1006,16 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class, TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; - __entry->isize = ip->i_size; - __entry->disize = ip->i_d.di_size; + __entry->size = ip->i_d.di_size; __entry->new_size = ip->i_new_size; __entry->offset = offset; __entry->count = count; ), - TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx " + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " "offset 0x%llx count %zd", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, - __entry->isize, - __entry->disize, + __entry->size, __entry->new_size, __entry->offset, __entry->count) @@ -1031,7 +1028,40 @@ DEFINE_EVENT(xfs_simple_io_class, name, \ DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); -DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); + + +TRACE_EVENT(xfs_itruncate_start, + TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size, int flag, + xfs_off_t toss_start, xfs_off_t toss_finish), + TP_ARGS(ip, new_size, flag, toss_start, toss_finish), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_fsize_t, new_size) + __field(xfs_off_t, toss_start) + __field(xfs_off_t, toss_finish) + __field(int, flag) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = new_size; + __entry->toss_start = toss_start; + __entry->toss_finish = toss_finish; + __entry->flag = flag; + ), + TP_printk("dev %d:%d ino 0x%llx %s size 0x%llx new_size 0x%llx " + "toss start 0x%llx toss finish 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->flag, "|", XFS_ITRUNC_FLAGS), + __entry->size, + __entry->new_size, + __entry->toss_start, + __entry->toss_finish) +); DECLARE_EVENT_CLASS(xfs_itrunc_class, TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), @@ -1059,8 +1089,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class, DEFINE_EVENT(xfs_itrunc_class, name, \ TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ TP_ARGS(ip, new_size)) -DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start); -DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end); +DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start); +DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end); TRACE_EVENT(xfs_pagecache_inval, TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), diff --git a/trunk/fs/xfs/quota/xfs_dquot.c b/trunk/fs/xfs/quota/xfs_dquot.c index 837f31158d43..6fa214603819 100644 --- a/trunk/fs/xfs/quota/xfs_dquot.c +++ b/trunk/fs/xfs/quota/xfs_dquot.c @@ -220,7 +220,7 @@ xfs_qm_adjust_dqtimers( { ASSERT(d->d_id); -#ifdef DEBUG +#ifdef QUOTADEBUG if (d->d_blk_hardlimit) ASSERT(be64_to_cpu(d->d_blk_softlimit) <= be64_to_cpu(d->d_blk_hardlimit)); @@ -231,7 +231,6 @@ xfs_qm_adjust_dqtimers( ASSERT(be64_to_cpu(d->d_rtb_softlimit) <= be64_to_cpu(d->d_rtb_hardlimit)); #endif - if (!d->d_btimer) { if ((d->d_blk_softlimit && (be64_to_cpu(d->d_bcount) >= @@ -319,7 +318,7 @@ xfs_qm_init_dquot_blk( ASSERT(tp); ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(xfs_buf_islocked(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); d = (xfs_dqblk_t *)XFS_BUF_PTR(bp); @@ -535,7 +534,7 @@ xfs_qm_dqtobp( } ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(xfs_buf_islocked(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); /* * calculate the location of the dquot inside the buffer. @@ -623,7 +622,7 @@ xfs_qm_dqread( * brelse it because we have the changes incore. */ ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(xfs_buf_islocked(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); xfs_trans_brelse(tp, bp); return (error); @@ -1424,6 +1423,45 @@ xfs_qm_dqpurge( } +#ifdef QUOTADEBUG +void +xfs_qm_dqprint(xfs_dquot_t *dqp) +{ + struct xfs_mount *mp = dqp->q_mount; + + xfs_debug(mp, "-----------KERNEL DQUOT----------------"); + xfs_debug(mp, "---- dquotID = %d", + (int)be32_to_cpu(dqp->q_core.d_id)); + xfs_debug(mp, "---- type = %s", DQFLAGTO_TYPESTR(dqp)); + xfs_debug(mp, "---- fs = 0x%p", dqp->q_mount); + xfs_debug(mp, "---- blkno = 0x%x", (int) dqp->q_blkno); + xfs_debug(mp, "---- boffset = 0x%x", (int) dqp->q_bufoffset); + xfs_debug(mp, "---- blkhlimit = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_blk_hardlimit), + (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit)); + xfs_debug(mp, "---- blkslimit = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_blk_softlimit), + (int)be64_to_cpu(dqp->q_core.d_blk_softlimit)); + xfs_debug(mp, "---- inohlimit = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_ino_hardlimit), + (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit)); + xfs_debug(mp, "---- inoslimit = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_ino_softlimit), + (int)be64_to_cpu(dqp->q_core.d_ino_softlimit)); + xfs_debug(mp, "---- bcount = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_bcount), + (int)be64_to_cpu(dqp->q_core.d_bcount)); + xfs_debug(mp, "---- icount = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_icount), + (int)be64_to_cpu(dqp->q_core.d_icount)); + xfs_debug(mp, "---- btimer = %d", + (int)be32_to_cpu(dqp->q_core.d_btimer)); + xfs_debug(mp, "---- itimer = %d", + (int)be32_to_cpu(dqp->q_core.d_itimer)); + xfs_debug(mp, "---------------------------"); +} +#endif + /* * Give the buffer a little push if it is incore and * wait on the flush lock. diff --git a/trunk/fs/xfs/quota/xfs_dquot.h b/trunk/fs/xfs/quota/xfs_dquot.h index 34b7e945dbfa..5da3a23b820d 100644 --- a/trunk/fs/xfs/quota/xfs_dquot.h +++ b/trunk/fs/xfs/quota/xfs_dquot.h @@ -116,6 +116,12 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp) (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ (XFS_IS_OQUOTA_ON((d)->q_mount)))) +#ifdef QUOTADEBUG +extern void xfs_qm_dqprint(xfs_dquot_t *); +#else +#define xfs_qm_dqprint(a) +#endif + extern void xfs_qm_dqdestroy(xfs_dquot_t *); extern int xfs_qm_dqflush(xfs_dquot_t *, uint); extern int xfs_qm_dqpurge(xfs_dquot_t *); diff --git a/trunk/fs/xfs/quota/xfs_qm.c b/trunk/fs/xfs/quota/xfs_qm.c index 46e54ad9a2dc..b94dace4e785 100644 --- a/trunk/fs/xfs/quota/xfs_qm.c +++ b/trunk/fs/xfs/quota/xfs_qm.c @@ -67,6 +67,32 @@ static struct shrinker xfs_qm_shaker = { .seeks = DEFAULT_SEEKS, }; +#ifdef DEBUG +extern struct mutex qcheck_lock; +#endif + +#ifdef QUOTADEBUG +static void +xfs_qm_dquot_list_print( + struct xfs_mount *mp) +{ + xfs_dquot_t *dqp; + int i = 0; + + list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) { + xfs_debug(mp, " %d. \"%d (%s)\" " + "bcnt = %lld, icnt = %lld, refs = %d", + i++, be32_to_cpu(dqp->q_core.d_id), + DQFLAGTO_TYPESTR(dqp), + (long long)be64_to_cpu(dqp->q_core.d_bcount), + (long long)be64_to_cpu(dqp->q_core.d_icount), + dqp->q_nrefs); + } +} +#else +static void xfs_qm_dquot_list_print(struct xfs_mount *mp) { } +#endif + /* * Initialize the XQM structure. * Note that there is not one quota manager per file system. @@ -139,6 +165,9 @@ xfs_Gqm_init(void) atomic_set(&xqm->qm_totaldquots, 0); xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO; xqm->qm_nrefs = 0; +#ifdef DEBUG + mutex_init(&qcheck_lock); +#endif return xqm; out_free_udqhash: @@ -175,6 +204,9 @@ xfs_qm_destroy( mutex_lock(&xqm->qm_dqfrlist_lock); list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { xfs_dqlock(dqp); +#ifdef QUOTADEBUG + xfs_debug(dqp->q_mount, "FREELIST destroy 0x%p", dqp); +#endif list_del_init(&dqp->q_freelist); xfs_Gqm->qm_dqfrlist_cnt--; xfs_dqunlock(dqp); @@ -182,6 +214,9 @@ xfs_qm_destroy( } mutex_unlock(&xqm->qm_dqfrlist_lock); mutex_destroy(&xqm->qm_dqfrlist_lock); +#ifdef DEBUG + mutex_destroy(&qcheck_lock); +#endif kmem_free(xqm); } @@ -374,6 +409,11 @@ xfs_qm_mount_quotas( xfs_warn(mp, "Failed to initialize disk quotas."); return; } + +#ifdef QUOTADEBUG + if (XFS_IS_QUOTA_ON(mp)) + xfs_qm_internalqcheck(mp); +#endif } /* @@ -826,8 +866,8 @@ xfs_qm_dqattach_locked( } done: -#ifdef DEBUG - if (!error) { +#ifdef QUOTADEBUG + if (! error) { if (XFS_IS_UQUOTA_ON(mp)) ASSERT(ip->i_udquot); if (XFS_IS_OQUOTA_ON(mp)) @@ -1693,6 +1733,8 @@ xfs_qm_quotacheck( mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); mp->m_qflags |= flags; + xfs_qm_dquot_list_print(mp); + error_return: if (error) { xfs_warn(mp, @@ -2054,6 +2096,9 @@ xfs_qm_write_sb_changes( xfs_trans_t *tp; int error; +#ifdef QUOTADEBUG + xfs_notice(mp, "Writing superblock quota changes"); +#endif tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, diff --git a/trunk/fs/xfs/quota/xfs_qm.h b/trunk/fs/xfs/quota/xfs_qm.h index 43b9abe1052c..567b29b9f1b3 100644 --- a/trunk/fs/xfs/quota/xfs_qm.h +++ b/trunk/fs/xfs/quota/xfs_qm.h @@ -163,4 +163,10 @@ extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); +#ifdef DEBUG +extern int xfs_qm_internalqcheck(xfs_mount_t *); +#else +#define xfs_qm_internalqcheck(mp) (0) +#endif + #endif /* __XFS_QM_H__ */ diff --git a/trunk/fs/xfs/quota/xfs_qm_syscalls.c b/trunk/fs/xfs/quota/xfs_qm_syscalls.c index 609246f42e6c..2dadb15d5ca9 100644 --- a/trunk/fs/xfs/quota/xfs_qm_syscalls.c +++ b/trunk/fs/xfs/quota/xfs_qm_syscalls.c @@ -263,7 +263,7 @@ xfs_qm_scall_trunc_qfile( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip); - error = xfs_itruncate_data(&tp, ip, 0); + error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1); if (error) { xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); @@ -622,6 +622,7 @@ xfs_qm_scall_setqlim( xfs_trans_log_dquot(tp, dqp); error = xfs_trans_commit(tp, 0); + xfs_qm_dqprint(dqp); xfs_qm_dqrele(dqp); out_unlock: @@ -656,6 +657,7 @@ xfs_qm_scall_getquota( xfs_qm_dqput(dqp); return XFS_ERROR(ENOENT); } + /* xfs_qm_dqprint(dqp); */ /* * Convert the disk dquot to the exportable format */ @@ -904,3 +906,354 @@ xfs_qm_dqrele_all_inodes( ASSERT(mp->m_quotainfo); xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags); } + +/*------------------------------------------------------------------------*/ +#ifdef DEBUG +/* + * This contains all the test functions for XFS disk quotas. + * Currently it does a quota accounting check. ie. it walks through + * all inodes in the file system, calculating the dquot accounting fields, + * and prints out any inconsistencies. + */ +xfs_dqhash_t *qmtest_udqtab; +xfs_dqhash_t *qmtest_gdqtab; +int qmtest_hashmask; +int qmtest_nfails; +struct mutex qcheck_lock; + +#define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \ + (__psunsigned_t)(id)) & \ + (qmtest_hashmask - 1)) + +#define DQTEST_HASH(mp, id, type) ((type & XFS_DQ_USER) ? \ + (qmtest_udqtab + \ + DQTEST_HASHVAL(mp, id)) : \ + (qmtest_gdqtab + \ + DQTEST_HASHVAL(mp, id))) + +#define DQTEST_LIST_PRINT(l, NXT, title) \ +{ \ + xfs_dqtest_t *dqp; int i = 0;\ + xfs_debug(NULL, "%s (#%d)", title, (int) (l)->qh_nelems); \ + for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \ + dqp = (xfs_dqtest_t *)dqp->NXT) { \ + xfs_debug(dqp->q_mount, \ + " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \ + ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \ + dqp->d_bcount, dqp->d_icount); } \ +} + +typedef struct dqtest { + uint dq_flags; /* various flags (XFS_DQ_*) */ + struct list_head q_hashlist; + xfs_dqhash_t *q_hash; /* the hashchain header */ + xfs_mount_t *q_mount; /* filesystem this relates to */ + xfs_dqid_t d_id; /* user id or group id */ + xfs_qcnt_t d_bcount; /* # disk blocks owned by the user */ + xfs_qcnt_t d_icount; /* # inodes owned by the user */ +} xfs_dqtest_t; + +STATIC void +xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp) +{ + list_add(&dqp->q_hashlist, &h->qh_list); + h->qh_version++; + h->qh_nelems++; +} +STATIC void +xfs_qm_dqtest_print( + struct xfs_mount *mp, + struct dqtest *d) +{ + xfs_debug(mp, "-----------DQTEST DQUOT----------------"); + xfs_debug(mp, "---- dquot ID = %d", d->d_id); + xfs_debug(mp, "---- fs = 0x%p", d->q_mount); + xfs_debug(mp, "---- bcount = %Lu (0x%x)", + d->d_bcount, (int)d->d_bcount); + xfs_debug(mp, "---- icount = %Lu (0x%x)", + d->d_icount, (int)d->d_icount); + xfs_debug(mp, "---------------------------"); +} + +STATIC void +xfs_qm_dqtest_failed( + xfs_dqtest_t *d, + xfs_dquot_t *dqp, + char *reason, + xfs_qcnt_t a, + xfs_qcnt_t b, + int error) +{ + qmtest_nfails++; + if (error) + xfs_debug(dqp->q_mount, + "quotacheck failed id=%d, err=%d\nreason: %s", + d->d_id, error, reason); + else + xfs_debug(dqp->q_mount, + "quotacheck failed id=%d (%s) [%d != %d]", + d->d_id, reason, (int)a, (int)b); + xfs_qm_dqtest_print(dqp->q_mount, d); + if (dqp) + xfs_qm_dqprint(dqp); +} + +STATIC int +xfs_dqtest_cmp2( + xfs_dqtest_t *d, + xfs_dquot_t *dqp) +{ + int err = 0; + if (be64_to_cpu(dqp->q_core.d_icount) != d->d_icount) { + xfs_qm_dqtest_failed(d, dqp, "icount mismatch", + be64_to_cpu(dqp->q_core.d_icount), + d->d_icount, 0); + err++; + } + if (be64_to_cpu(dqp->q_core.d_bcount) != d->d_bcount) { + xfs_qm_dqtest_failed(d, dqp, "bcount mismatch", + be64_to_cpu(dqp->q_core.d_bcount), + d->d_bcount, 0); + err++; + } + if (dqp->q_core.d_blk_softlimit && + be64_to_cpu(dqp->q_core.d_bcount) >= + be64_to_cpu(dqp->q_core.d_blk_softlimit)) { + if (!dqp->q_core.d_btimer && dqp->q_core.d_id) { + xfs_debug(dqp->q_mount, + "%d [%s] BLK TIMER NOT STARTED", + d->d_id, DQFLAGTO_TYPESTR(d)); + err++; + } + } + if (dqp->q_core.d_ino_softlimit && + be64_to_cpu(dqp->q_core.d_icount) >= + be64_to_cpu(dqp->q_core.d_ino_softlimit)) { + if (!dqp->q_core.d_itimer && dqp->q_core.d_id) { + xfs_debug(dqp->q_mount, + "%d [%s] INO TIMER NOT STARTED", + d->d_id, DQFLAGTO_TYPESTR(d)); + err++; + } + } +#ifdef QUOTADEBUG + if (!err) { + xfs_debug(dqp->q_mount, "%d [%s] qchecked", + d->d_id, DQFLAGTO_TYPESTR(d)); + } +#endif + return (err); +} + +STATIC void +xfs_dqtest_cmp( + xfs_dqtest_t *d) +{ + xfs_dquot_t *dqp; + int error; + + /* xfs_qm_dqtest_print(d); */ + if ((error = xfs_qm_dqget(d->q_mount, NULL, d->d_id, d->dq_flags, 0, + &dqp))) { + xfs_qm_dqtest_failed(d, NULL, "dqget failed", 0, 0, error); + return; + } + xfs_dqtest_cmp2(d, dqp); + xfs_qm_dqput(dqp); +} + +STATIC int +xfs_qm_internalqcheck_dqget( + xfs_mount_t *mp, + xfs_dqid_t id, + uint type, + xfs_dqtest_t **O_dq) +{ + xfs_dqtest_t *d; + xfs_dqhash_t *h; + + h = DQTEST_HASH(mp, id, type); + list_for_each_entry(d, &h->qh_list, q_hashlist) { + if (d->d_id == id && mp == d->q_mount) { + *O_dq = d; + return (0); + } + } + d = kmem_zalloc(sizeof(xfs_dqtest_t), KM_SLEEP); + d->dq_flags = type; + d->d_id = id; + d->q_mount = mp; + d->q_hash = h; + INIT_LIST_HEAD(&d->q_hashlist); + xfs_qm_hashinsert(h, d); + *O_dq = d; + return (0); +} + +STATIC void +xfs_qm_internalqcheck_get_dquots( + xfs_mount_t *mp, + xfs_dqid_t uid, + xfs_dqid_t projid, + xfs_dqid_t gid, + xfs_dqtest_t **ud, + xfs_dqtest_t **gd) +{ + if (XFS_IS_UQUOTA_ON(mp)) + xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud); + if (XFS_IS_GQUOTA_ON(mp)) + xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd); + else if (XFS_IS_PQUOTA_ON(mp)) + xfs_qm_internalqcheck_dqget(mp, projid, XFS_DQ_PROJ, gd); +} + + +STATIC void +xfs_qm_internalqcheck_dqadjust( + xfs_inode_t *ip, + xfs_dqtest_t *d) +{ + d->d_icount++; + d->d_bcount += (xfs_qcnt_t)ip->i_d.di_nblocks; +} + +STATIC int +xfs_qm_internalqcheck_adjust( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode number to get data for */ + void __user *buffer, /* not used */ + int ubsize, /* not used */ + int *ubused, /* not used */ + int *res) /* bulkstat result code */ +{ + xfs_inode_t *ip; + xfs_dqtest_t *ud, *gd; + uint lock_flags; + boolean_t ipreleased; + int error; + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { + *res = BULKSTAT_RV_NOTHING; + xfs_debug(mp, "%s: ino=%llu, uqino=%llu, gqino=%llu\n", + __func__, (unsigned long long) ino, + (unsigned long long) mp->m_sb.sb_uquotino, + (unsigned long long) mp->m_sb.sb_gquotino); + return XFS_ERROR(EINVAL); + } + ipreleased = B_FALSE; + again: + lock_flags = XFS_ILOCK_SHARED; + if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) { + *res = BULKSTAT_RV_NOTHING; + return (error); + } + + /* + * This inode can have blocks after eof which can get released + * when we send it to inactive. Since we don't check the dquot + * until the after all our calculations are done, we must get rid + * of those now. + */ + if (! ipreleased) { + xfs_iunlock(ip, lock_flags); + IRELE(ip); + ipreleased = B_TRUE; + goto again; + } + xfs_qm_internalqcheck_get_dquots(mp, + (xfs_dqid_t) ip->i_d.di_uid, + (xfs_dqid_t) xfs_get_projid(ip), + (xfs_dqid_t) ip->i_d.di_gid, + &ud, &gd); + if (XFS_IS_UQUOTA_ON(mp)) { + ASSERT(ud); + xfs_qm_internalqcheck_dqadjust(ip, ud); + } + if (XFS_IS_OQUOTA_ON(mp)) { + ASSERT(gd); + xfs_qm_internalqcheck_dqadjust(ip, gd); + } + xfs_iunlock(ip, lock_flags); + IRELE(ip); + *res = BULKSTAT_RV_DIDONE; + return (0); +} + + +/* PRIVATE, debugging */ +int +xfs_qm_internalqcheck( + xfs_mount_t *mp) +{ + xfs_ino_t lastino; + int done, count; + int i; + int error; + + lastino = 0; + qmtest_hashmask = 32; + count = 5; + done = 0; + qmtest_nfails = 0; + + if (! XFS_IS_QUOTA_ON(mp)) + return XFS_ERROR(ESRCH); + + xfs_log_force(mp, XFS_LOG_SYNC); + XFS_bflush(mp->m_ddev_targp); + xfs_log_force(mp, XFS_LOG_SYNC); + XFS_bflush(mp->m_ddev_targp); + + mutex_lock(&qcheck_lock); + /* There should be absolutely no quota activity while this + is going on. */ + qmtest_udqtab = kmem_zalloc(qmtest_hashmask * + sizeof(xfs_dqhash_t), KM_SLEEP); + qmtest_gdqtab = kmem_zalloc(qmtest_hashmask * + sizeof(xfs_dqhash_t), KM_SLEEP); + do { + /* + * Iterate thru all the inodes in the file system, + * adjusting the corresponding dquot counters + */ + error = xfs_bulkstat(mp, &lastino, &count, + xfs_qm_internalqcheck_adjust, + 0, NULL, &done); + if (error) { + xfs_debug(mp, "Bulkstat returned error 0x%x", error); + break; + } + } while (!done); + + xfs_debug(mp, "Checking results against system dquots"); + for (i = 0; i < qmtest_hashmask; i++) { + xfs_dqtest_t *d, *n; + xfs_dqhash_t *h; + + h = &qmtest_udqtab[i]; + list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) { + xfs_dqtest_cmp(d); + kmem_free(d); + } + h = &qmtest_gdqtab[i]; + list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) { + xfs_dqtest_cmp(d); + kmem_free(d); + } + } + + if (qmtest_nfails) { + xfs_debug(mp, "******** quotacheck failed ********"); + xfs_debug(mp, "failures = %d", qmtest_nfails); + } else { + xfs_debug(mp, "******** quotacheck successful! ********"); + } + kmem_free(qmtest_udqtab); + kmem_free(qmtest_gdqtab); + mutex_unlock(&qcheck_lock); + return (qmtest_nfails); +} + +#endif /* DEBUG */ diff --git a/trunk/fs/xfs/quota/xfs_trans_dquot.c b/trunk/fs/xfs/quota/xfs_trans_dquot.c index 4d00ee67792d..2a3648731331 100644 --- a/trunk/fs/xfs/quota/xfs_trans_dquot.c +++ b/trunk/fs/xfs/quota/xfs_trans_dquot.c @@ -59,7 +59,7 @@ xfs_trans_dqjoin( xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); /* - * Initialize d_transp so we can later determine if this dquot is + * Initialize i_transp so we can later determine if this dquot is * associated with this transaction. */ dqp->q_transp = tp; @@ -387,18 +387,18 @@ xfs_trans_apply_dquot_deltas( qtrx->qt_delbcnt_delta; totalrtbdelta = qtrx->qt_rtbcount_delta + qtrx->qt_delrtb_delta; -#ifdef DEBUG +#ifdef QUOTADEBUG if (totalbdelta < 0) ASSERT(be64_to_cpu(d->d_bcount) >= - -totalbdelta); + (xfs_qcnt_t) -totalbdelta); if (totalrtbdelta < 0) ASSERT(be64_to_cpu(d->d_rtbcount) >= - -totalrtbdelta); + (xfs_qcnt_t) -totalrtbdelta); if (qtrx->qt_icount_delta < 0) ASSERT(be64_to_cpu(d->d_icount) >= - -qtrx->qt_icount_delta); + (xfs_qcnt_t) -qtrx->qt_icount_delta); #endif if (totalbdelta) be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta); @@ -642,6 +642,11 @@ xfs_trans_dqresv( ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { +#ifdef QUOTADEBUG + xfs_debug(mp, + "BLK Res: nblks=%ld + resbcount=%Ld > hardlimit=%Ld?", + nblks, *resbcountp, hardlimit); +#endif if (nblks > 0) { /* * dquot is locked already. See if we'd go over the diff --git a/trunk/fs/xfs/xfs.h b/trunk/fs/xfs/xfs.h index 53ec3ea9a625..5ad8ad3a1dcd 100644 --- a/trunk/fs/xfs/xfs.h +++ b/trunk/fs/xfs/xfs.h @@ -22,6 +22,7 @@ #define STATIC #define DEBUG 1 #define XFS_BUF_LOCK_TRACKING 1 +/* #define QUOTADEBUG 1 */ #endif #include diff --git a/trunk/fs/xfs/xfs_alloc.c b/trunk/fs/xfs/xfs_alloc.c index 1e00b3ef6274..95862bbff56b 100644 --- a/trunk/fs/xfs/xfs_alloc.c +++ b/trunk/fs/xfs/xfs_alloc.c @@ -570,7 +570,9 @@ xfs_alloc_ag_vextent_exact( xfs_agblock_t tbno; /* start block of trimmed extent */ xfs_extlen_t tlen; /* length of trimmed extent */ xfs_agblock_t tend; /* end block of trimmed extent */ + xfs_agblock_t end; /* end of allocated extent */ int i; /* success/failure of operation */ + xfs_extlen_t rlen; /* length of returned extent */ ASSERT(args->alignment == 1); @@ -623,16 +625,18 @@ xfs_alloc_ag_vextent_exact( * * Fix the length according to mod and prod if given. */ - args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) - - args->agbno; + end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen); + args->len = end - args->agbno; xfs_alloc_fix_len(args); if (!xfs_alloc_fix_minleft(args)) goto not_found; - ASSERT(args->agbno + args->len <= tend); + rlen = args->len; + ASSERT(args->agbno + rlen <= tend); + end = args->agbno + rlen; /* - * We are allocating agbno for args->len + * We are allocating agbno for rlen [agbno .. end] * Allocate/initialize a cursor for the by-size btree. */ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, @@ -2123,7 +2127,7 @@ xfs_read_agf( * Validate the magic number of the agf block. */ agf_ok = - agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && + be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC && XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && diff --git a/trunk/fs/xfs/xfs_alloc_btree.c b/trunk/fs/xfs/xfs_alloc_btree.c index ffb3386e45c1..2b3518826a69 100644 --- a/trunk/fs/xfs/xfs_alloc_btree.c +++ b/trunk/fs/xfs/xfs_alloc_btree.c @@ -31,6 +31,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -310,6 +311,72 @@ xfs_allocbt_recs_inorder( } #endif /* DEBUG */ +#ifdef XFS_BTREE_TRACE +ktrace_t *xfs_allocbt_trace_buf; + +STATIC void +xfs_allocbt_trace_enter( + struct xfs_btree_cur *cur, + const char *func, + char *s, + int type, + int line, + __psunsigned_t a0, + __psunsigned_t a1, + __psunsigned_t a2, + __psunsigned_t a3, + __psunsigned_t a4, + __psunsigned_t a5, + __psunsigned_t a6, + __psunsigned_t a7, + __psunsigned_t a8, + __psunsigned_t a9, + __psunsigned_t a10) +{ + ktrace_enter(xfs_allocbt_trace_buf, (void *)(__psint_t)type, + (void *)func, (void *)s, NULL, (void *)cur, + (void *)a0, (void *)a1, (void *)a2, (void *)a3, + (void *)a4, (void *)a5, (void *)a6, (void *)a7, + (void *)a8, (void *)a9, (void *)a10); +} + +STATIC void +xfs_allocbt_trace_cursor( + struct xfs_btree_cur *cur, + __uint32_t *s0, + __uint64_t *l0, + __uint64_t *l1) +{ + *s0 = cur->bc_private.a.agno; + *l0 = cur->bc_rec.a.ar_startblock; + *l1 = cur->bc_rec.a.ar_blockcount; +} + +STATIC void +xfs_allocbt_trace_key( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + __uint64_t *l0, + __uint64_t *l1) +{ + *l0 = be32_to_cpu(key->alloc.ar_startblock); + *l1 = be32_to_cpu(key->alloc.ar_blockcount); +} + +STATIC void +xfs_allocbt_trace_record( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + __uint64_t *l0, + __uint64_t *l1, + __uint64_t *l2) +{ + *l0 = be32_to_cpu(rec->alloc.ar_startblock); + *l1 = be32_to_cpu(rec->alloc.ar_blockcount); + *l2 = 0; +} +#endif /* XFS_BTREE_TRACE */ + static const struct xfs_btree_ops xfs_allocbt_ops = { .rec_len = sizeof(xfs_alloc_rec_t), .key_len = sizeof(xfs_alloc_key_t), @@ -326,10 +393,18 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, .key_diff = xfs_allocbt_key_diff, + #ifdef DEBUG .keys_inorder = xfs_allocbt_keys_inorder, .recs_inorder = xfs_allocbt_recs_inorder, #endif + +#ifdef XFS_BTREE_TRACE + .trace_enter = xfs_allocbt_trace_enter, + .trace_cursor = xfs_allocbt_trace_cursor, + .trace_key = xfs_allocbt_trace_key, + .trace_record = xfs_allocbt_trace_record, +#endif }; /* @@ -352,16 +427,13 @@ xfs_allocbt_init_cursor( cur->bc_tp = tp; cur->bc_mp = mp; + cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]); cur->bc_btnum = btnum; cur->bc_blocklog = mp->m_sb.sb_blocklog; - cur->bc_ops = &xfs_allocbt_ops; - if (btnum == XFS_BTNUM_CNT) { - cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); + cur->bc_ops = &xfs_allocbt_ops; + if (btnum == XFS_BTNUM_CNT) cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; - } else { - cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); - } cur->bc_private.a.agbp = agbp; cur->bc_private.a.agno = agno; diff --git a/trunk/fs/xfs/xfs_arch.h b/trunk/fs/xfs/xfs_arch.h new file mode 100644 index 000000000000..0902249354a0 --- /dev/null +++ b/trunk/fs/xfs/xfs_arch.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_ARCH_H__ +#define __XFS_ARCH_H__ + +#ifndef XFS_BIG_INUMS +# error XFS_BIG_INUMS must be defined true or false +#endif + +#ifdef __KERNEL__ + +#include + +#ifdef __BIG_ENDIAN +#define XFS_NATIVE_HOST 1 +#else +#undef XFS_NATIVE_HOST +#endif + +#else /* __KERNEL__ */ + +#if __BYTE_ORDER == __BIG_ENDIAN +#define XFS_NATIVE_HOST 1 +#else +#undef XFS_NATIVE_HOST +#endif + +#ifdef XFS_NATIVE_HOST +#define cpu_to_be16(val) ((__force __be16)(__u16)(val)) +#define cpu_to_be32(val) ((__force __be32)(__u32)(val)) +#define cpu_to_be64(val) ((__force __be64)(__u64)(val)) +#define be16_to_cpu(val) ((__force __u16)(__be16)(val)) +#define be32_to_cpu(val) ((__force __u32)(__be32)(val)) +#define be64_to_cpu(val) ((__force __u64)(__be64)(val)) +#else +#define cpu_to_be16(val) ((__force __be16)__swab16((__u16)(val))) +#define cpu_to_be32(val) ((__force __be32)__swab32((__u32)(val))) +#define cpu_to_be64(val) ((__force __be64)__swab64((__u64)(val))) +#define be16_to_cpu(val) (__swab16((__force __u16)(__be16)(val))) +#define be32_to_cpu(val) (__swab32((__force __u32)(__be32)(val))) +#define be64_to_cpu(val) (__swab64((__force __u64)(__be64)(val))) +#endif + +static inline void be16_add_cpu(__be16 *a, __s16 b) +{ + *a = cpu_to_be16(be16_to_cpu(*a) + b); +} + +static inline void be32_add_cpu(__be32 *a, __s32 b) +{ + *a = cpu_to_be32(be32_to_cpu(*a) + b); +} + +static inline void be64_add_cpu(__be64 *a, __s64 b) +{ + *a = cpu_to_be64(be64_to_cpu(*a) + b); +} + +#endif /* __KERNEL__ */ + +/* + * get and set integers from potentially unaligned locations + */ + +#define INT_GET_UNALIGNED_16_BE(pointer) \ + ((__u16)((((__u8*)(pointer))[0] << 8) | (((__u8*)(pointer))[1]))) +#define INT_SET_UNALIGNED_16_BE(pointer,value) \ + { \ + ((__u8*)(pointer))[0] = (((value) >> 8) & 0xff); \ + ((__u8*)(pointer))[1] = (((value) ) & 0xff); \ + } + +/* + * In directories inode numbers are stored as unaligned arrays of unsigned + * 8bit integers on disk. + * + * For v1 directories or v2 directories that contain inode numbers that + * do not fit into 32bit the array has eight members, but the first member + * is always zero: + * + * |unused|48-55|40-47|32-39|24-31|16-23| 8-15| 0- 7| + * + * For v2 directories that only contain entries with inode numbers that fit + * into 32bits a four-member array is used: + * + * |24-31|16-23| 8-15| 0- 7| + */ + +#define XFS_GET_DIR_INO4(di) \ + (((__u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3])) + +#define XFS_PUT_DIR_INO4(from, di) \ +do { \ + (di).i[0] = (((from) & 0xff000000ULL) >> 24); \ + (di).i[1] = (((from) & 0x00ff0000ULL) >> 16); \ + (di).i[2] = (((from) & 0x0000ff00ULL) >> 8); \ + (di).i[3] = ((from) & 0x000000ffULL); \ +} while (0) + +#define XFS_DI_HI(di) \ + (((__u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3])) +#define XFS_DI_LO(di) \ + (((__u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7])) + +#define XFS_GET_DIR_INO8(di) \ + (((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \ + ((xfs_ino_t)XFS_DI_HI(di) << 32)) + +#define XFS_PUT_DIR_INO8(from, di) \ +do { \ + (di).i[0] = 0; \ + (di).i[1] = (((from) & 0x00ff000000000000ULL) >> 48); \ + (di).i[2] = (((from) & 0x0000ff0000000000ULL) >> 40); \ + (di).i[3] = (((from) & 0x000000ff00000000ULL) >> 32); \ + (di).i[4] = (((from) & 0x00000000ff000000ULL) >> 24); \ + (di).i[5] = (((from) & 0x0000000000ff0000ULL) >> 16); \ + (di).i[6] = (((from) & 0x000000000000ff00ULL) >> 8); \ + (di).i[7] = ((from) & 0x00000000000000ffULL); \ +} while (0) + +#endif /* __XFS_ARCH_H__ */ diff --git a/trunk/fs/xfs/xfs_attr.c b/trunk/fs/xfs/xfs_attr.c index cbae424fe1ba..01d2072fb6d4 100644 --- a/trunk/fs/xfs/xfs_attr.c +++ b/trunk/fs/xfs/xfs_attr.c @@ -822,21 +822,17 @@ xfs_attr_inactive(xfs_inode_t *dp) error = xfs_attr_root_inactive(&trans, dp); if (error) goto out; - /* - * Signal synchronous inactive transactions unless this is a - * synchronous mount filesystem in which case we know that we're here - * because we've been called out of xfs_inactive which means that the - * last reference is gone and the unlink transaction has already hit - * the disk so async inactive transactions are safe. + * signal synchronous inactive transactions unless this + * is a synchronous mount filesystem in which case we + * know that we're here because we've been called out of + * xfs_inactive which means that the last reference is gone + * and the unlink transaction has already hit the disk so + * async inactive transactions are safe. */ - if (!(mp->m_flags & XFS_MOUNT_WSYNC)) { - if (dp->i_d.di_anextents > 0) - xfs_trans_set_sync(trans); - } - - error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); - if (error) + if ((error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK, + (!(mp->m_flags & XFS_MOUNT_WSYNC) + ? 1 : 0)))) goto out; /* @@ -1203,7 +1199,7 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context) return XFS_ERROR(error); ASSERT(bp != NULL); leaf = bp->data; - if (unlikely(leaf->hdr.info.magic != cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) { + if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) { XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW, context->dp->i_mount, leaf); xfs_da_brelse(NULL, bp); @@ -1610,8 +1606,9 @@ xfs_attr_node_removename(xfs_da_args_t *args) XFS_ATTR_FORK); if (error) goto out; - ASSERT((((xfs_attr_leafblock_t *)bp->data)->hdr.info.magic) == - cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *) + bp->data)->hdr.info.magic) + == XFS_ATTR_LEAF_MAGIC); if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { xfs_bmap_init(args->flist, args->firstblock); @@ -1876,11 +1873,11 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) return(XFS_ERROR(EFSCORRUPTED)); } node = bp->data; - if (node->hdr.info.magic == - cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) + if (be16_to_cpu(node->hdr.info.magic) + == XFS_ATTR_LEAF_MAGIC) break; - if (unlikely(node->hdr.info.magic != - cpu_to_be16(XFS_DA_NODE_MAGIC))) { + if (unlikely(be16_to_cpu(node->hdr.info.magic) + != XFS_DA_NODE_MAGIC)) { XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)", XFS_ERRLEVEL_LOW, context->dp->i_mount, @@ -1915,8 +1912,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) */ for (;;) { leaf = bp->data; - if (unlikely(leaf->hdr.info.magic != - cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) { + if (unlikely(be16_to_cpu(leaf->hdr.info.magic) + != XFS_ATTR_LEAF_MAGIC)) { XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)", XFS_ERRLEVEL_LOW, context->dp->i_mount, leaf); diff --git a/trunk/fs/xfs/xfs_attr_leaf.c b/trunk/fs/xfs/xfs_attr_leaf.c index 8fad9602542b..71e90dc2aeb1 100644 --- a/trunk/fs/xfs/xfs_attr_leaf.c +++ b/trunk/fs/xfs/xfs_attr_leaf.c @@ -731,7 +731,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp) int bytes, i; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); entry = &leaf->entries[0]; bytes = sizeof(struct xfs_attr_sf_hdr); @@ -777,7 +777,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) ASSERT(bp != NULL); memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount)); leaf = (xfs_attr_leafblock_t *)tmpbuffer; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); memset(bp->data, 0, XFS_LBSIZE(dp->i_mount)); /* @@ -872,7 +872,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args) goto out; node = bp1->data; leaf = bp2->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); /* both on-disk, don't endian-flip twice */ node->btree[0].hashval = leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval; @@ -997,7 +997,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args) int tablesize, entsize, sum, tmp, i; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(leaf->hdr.count))); hdr = &leaf->hdr; @@ -1070,7 +1070,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) int tmp, i; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); hdr = &leaf->hdr; ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE)); ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(hdr->count))); @@ -1256,8 +1256,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); leaf1 = blk1->bp->data; leaf2 = blk2->bp->data; - ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); - ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); args = state->args; /* @@ -1533,7 +1533,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) */ blk = &state->path.blk[ state->path.active-1 ]; info = blk->bp->data; - ASSERT(info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC); leaf = (xfs_attr_leafblock_t *)info; count = be16_to_cpu(leaf->hdr.count); bytes = sizeof(xfs_attr_leaf_hdr_t) + @@ -1596,7 +1596,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) bytes = state->blocksize - (state->blocksize>>2); bytes -= be16_to_cpu(leaf->hdr.usedbytes); leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); count += be16_to_cpu(leaf->hdr.count); bytes -= be16_to_cpu(leaf->hdr.usedbytes); bytes -= count * sizeof(xfs_attr_leaf_entry_t); @@ -1650,7 +1650,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) xfs_mount_t *mp; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); hdr = &leaf->hdr; mp = args->trans->t_mountp; ASSERT((be16_to_cpu(hdr->count) > 0) @@ -1813,8 +1813,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC); drop_leaf = drop_blk->bp->data; save_leaf = save_blk->bp->data; - ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); - ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); drop_hdr = &drop_leaf->hdr; save_hdr = &save_leaf->hdr; @@ -1915,7 +1915,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) xfs_dahash_t hashval; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); ASSERT(be16_to_cpu(leaf->hdr.count) < (XFS_LBSIZE(args->dp->i_mount)/8)); @@ -2019,7 +2019,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args) xfs_attr_leaf_name_remote_t *name_rmt; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); ASSERT(be16_to_cpu(leaf->hdr.count) < (XFS_LBSIZE(args->dp->i_mount)/8)); ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); @@ -2087,8 +2087,8 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, /* * Set up environment. */ - ASSERT(leaf_s->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); - ASSERT(leaf_d->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); hdr_s = &leaf_s->hdr; hdr_d = &leaf_d->hdr; ASSERT((be16_to_cpu(hdr_s->count) > 0) && @@ -2222,8 +2222,8 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp) leaf1 = leaf1_bp->data; leaf2 = leaf2_bp->data; - ASSERT((leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) && - (leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC))); + ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC) && + (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC)); if ((be16_to_cpu(leaf1->hdr.count) > 0) && (be16_to_cpu(leaf2->hdr.count) > 0) && ((be32_to_cpu(leaf2->entries[0].hashval) < @@ -2246,7 +2246,7 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count) xfs_attr_leafblock_t *leaf; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); if (count) *count = be16_to_cpu(leaf->hdr.count); if (!leaf->hdr.count) @@ -2265,7 +2265,7 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) xfs_attr_leaf_name_remote_t *name_rmt; int size; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); if (leaf->entries[index].flags & XFS_ATTR_LOCAL) { name_loc = xfs_attr_leaf_name_local(leaf, index); size = xfs_attr_leaf_entsize_local(name_loc->namelen, @@ -2451,7 +2451,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) ASSERT(bp != NULL); leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); ASSERT(args->index >= 0); entry = &leaf->entries[ args->index ]; @@ -2515,7 +2515,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args) ASSERT(bp != NULL); leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); ASSERT(args->index >= 0); entry = &leaf->entries[ args->index ]; @@ -2585,13 +2585,13 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) } leaf1 = bp1->data; - ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); ASSERT(args->index < be16_to_cpu(leaf1->hdr.count)); ASSERT(args->index >= 0); entry1 = &leaf1->entries[ args->index ]; leaf2 = bp2->data; - ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count)); ASSERT(args->index2 >= 0); entry2 = &leaf2->entries[ args->index2 ]; @@ -2689,9 +2689,9 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) * This is a depth-first traversal! */ info = bp->data; - if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { + if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) { error = xfs_attr_node_inactive(trans, dp, bp, 1); - } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) { + } else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) { error = xfs_attr_leaf_inactive(trans, dp, bp); } else { error = XFS_ERROR(EIO); @@ -2739,7 +2739,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, } node = bp->data; - ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); parent_blkno = xfs_da_blkno(bp); /* save for re-read later */ count = be16_to_cpu(node->hdr.count); if (!count) { @@ -2773,10 +2773,10 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, * Invalidate the subtree, however we have to. */ info = child_bp->data; - if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { + if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) { error = xfs_attr_node_inactive(trans, dp, child_bp, level+1); - } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) { + } else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) { error = xfs_attr_leaf_inactive(trans, dp, child_bp); } else { @@ -2836,7 +2836,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp) int error, count, size, tmp, i; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); /* * Count the number of "remote" value extents. diff --git a/trunk/fs/xfs/xfs_bmap.c b/trunk/fs/xfs/xfs_bmap.c index c51a3f903633..e546a33214c9 100644 --- a/trunk/fs/xfs/xfs_bmap.c +++ b/trunk/fs/xfs/xfs_bmap.c @@ -29,11 +29,15 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_mount.h" #include "xfs_itable.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" #include "xfs_inode_item.h" #include "xfs_extfree_item.h" #include "xfs_alloc.h" @@ -90,7 +94,6 @@ xfs_bmap_add_attrfork_local( */ STATIC int /* error */ xfs_bmap_add_extent_delay_real( - struct xfs_trans *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ @@ -436,7 +439,6 @@ xfs_bmap_add_attrfork_local( */ STATIC int /* error */ xfs_bmap_add_extent( - struct xfs_trans *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ @@ -522,7 +524,7 @@ xfs_bmap_add_extent( if (cur) ASSERT(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL); - error = xfs_bmap_add_extent_delay_real(tp, ip, + error = xfs_bmap_add_extent_delay_real(ip, idx, &cur, new, &da_new, first, flist, &logflags); } else { @@ -559,7 +561,7 @@ xfs_bmap_add_extent( int tmp_logflags; /* partial log flag return val */ ASSERT(cur == NULL); - error = xfs_bmap_extents_to_btree(tp, ip, first, + error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first, flist, &cur, da_old > 0, &tmp_logflags, whichfork); logflags |= tmp_logflags; if (error) @@ -602,7 +604,6 @@ xfs_bmap_add_extent( */ STATIC int /* error */ xfs_bmap_add_extent_delay_real( - struct xfs_trans *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ @@ -900,7 +901,7 @@ xfs_bmap_add_extent_delay_real( } if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && ip->i_d.di_nextents > ip->i_df.if_ext_max) { - error = xfs_bmap_extents_to_btree(tp, ip, + error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first, flist, &cur, 1, &tmp_rval, XFS_DATA_FORK); rval |= tmp_rval; @@ -983,7 +984,7 @@ xfs_bmap_add_extent_delay_real( } if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && ip->i_d.di_nextents > ip->i_df.if_ext_max) { - error = xfs_bmap_extents_to_btree(tp, ip, + error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first, flist, &cur, 1, &tmp_rval, XFS_DATA_FORK); rval |= tmp_rval; @@ -1051,7 +1052,7 @@ xfs_bmap_add_extent_delay_real( } if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && ip->i_d.di_nextents > ip->i_df.if_ext_max) { - error = xfs_bmap_extents_to_btree(tp, ip, + error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first, flist, &cur, 1, &tmp_rval, XFS_DATA_FORK); rval |= tmp_rval; @@ -2870,8 +2871,8 @@ xfs_bmap_del_extent( len = del->br_blockcount; do_div(bno, mp->m_sb.sb_rextsize); do_div(len, mp->m_sb.sb_rextsize); - error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); - if (error) + if ((error = xfs_rtfree_extent(ip->i_transp, bno, + (xfs_extlen_t)len))) goto done; do_fx = 0; nblks = len * mp->m_sb.sb_rextsize; @@ -4079,7 +4080,7 @@ xfs_bmap_sanity_check( { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) || + if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC || be16_to_cpu(block->bb_level) != level || be16_to_cpu(block->bb_numrecs) == 0 || be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) @@ -4661,7 +4662,7 @@ xfs_bmapi( if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) got.br_state = XFS_EXT_UNWRITTEN; } - error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &got, + error = xfs_bmap_add_extent(ip, &lastx, &cur, &got, firstblock, flist, &tmp_logflags, whichfork); logflags |= tmp_logflags; @@ -4762,7 +4763,7 @@ xfs_bmapi( mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, mval, + error = xfs_bmap_add_extent(ip, &lastx, &cur, mval, firstblock, flist, &tmp_logflags, whichfork); logflags |= tmp_logflags; @@ -5116,7 +5117,7 @@ xfs_bunmapi( del.br_blockcount = mod; } del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &del, + error = xfs_bmap_add_extent(ip, &lastx, &cur, &del, firstblock, flist, &logflags, XFS_DATA_FORK); if (error) @@ -5174,18 +5175,18 @@ xfs_bunmapi( } prev.br_state = XFS_EXT_UNWRITTEN; lastx--; - error = xfs_bmap_add_extent(tp, ip, &lastx, - &cur, &prev, firstblock, flist, - &logflags, XFS_DATA_FORK); + error = xfs_bmap_add_extent(ip, &lastx, &cur, + &prev, firstblock, flist, &logflags, + XFS_DATA_FORK); if (error) goto error0; goto nodelete; } else { ASSERT(del.br_state == XFS_EXT_NORM); del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(tp, ip, &lastx, - &cur, &del, firstblock, flist, - &logflags, XFS_DATA_FORK); + error = xfs_bmap_add_extent(ip, &lastx, &cur, + &del, firstblock, flist, &logflags, + XFS_DATA_FORK); if (error) goto error0; goto nodelete; diff --git a/trunk/fs/xfs/xfs_bmap_btree.c b/trunk/fs/xfs/xfs_bmap_btree.c index e2f5d59cbeaf..87d3c10b6954 100644 --- a/trunk/fs/xfs/xfs_bmap_btree.c +++ b/trunk/fs/xfs/xfs_bmap_btree.c @@ -33,6 +33,7 @@ #include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_itable.h" #include "xfs_bmap.h" #include "xfs_error.h" @@ -424,10 +425,10 @@ xfs_bmbt_to_bmdr( xfs_bmbt_key_t *tkp; __be64 *tpp; - ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC)); - ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)); - ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)); - ASSERT(rblock->bb_level != 0); + ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC); + ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO); + ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO); + ASSERT(be16_to_cpu(rblock->bb_level) > 0); dblock->bb_level = rblock->bb_level; dblock->bb_numrecs = rblock->bb_numrecs; dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); @@ -731,6 +732,95 @@ xfs_bmbt_recs_inorder( } #endif /* DEBUG */ +#ifdef XFS_BTREE_TRACE +ktrace_t *xfs_bmbt_trace_buf; + +STATIC void +xfs_bmbt_trace_enter( + struct xfs_btree_cur *cur, + const char *func, + char *s, + int type, + int line, + __psunsigned_t a0, + __psunsigned_t a1, + __psunsigned_t a2, + __psunsigned_t a3, + __psunsigned_t a4, + __psunsigned_t a5, + __psunsigned_t a6, + __psunsigned_t a7, + __psunsigned_t a8, + __psunsigned_t a9, + __psunsigned_t a10) +{ + struct xfs_inode *ip = cur->bc_private.b.ip; + int whichfork = cur->bc_private.b.whichfork; + + ktrace_enter(xfs_bmbt_trace_buf, + (void *)((__psint_t)type | (whichfork << 8) | (line << 16)), + (void *)func, (void *)s, (void *)ip, (void *)cur, + (void *)a0, (void *)a1, (void *)a2, (void *)a3, + (void *)a4, (void *)a5, (void *)a6, (void *)a7, + (void *)a8, (void *)a9, (void *)a10); +} + +STATIC void +xfs_bmbt_trace_cursor( + struct xfs_btree_cur *cur, + __uint32_t *s0, + __uint64_t *l0, + __uint64_t *l1) +{ + struct xfs_bmbt_rec_host r; + + xfs_bmbt_set_all(&r, &cur->bc_rec.b); + + *s0 = (cur->bc_nlevels << 24) | + (cur->bc_private.b.flags << 16) | + cur->bc_private.b.allocated; + *l0 = r.l0; + *l1 = r.l1; +} + +STATIC void +xfs_bmbt_trace_key( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + __uint64_t *l0, + __uint64_t *l1) +{ + *l0 = be64_to_cpu(key->bmbt.br_startoff); + *l1 = 0; +} + +/* Endian flipping versions of the bmbt extraction functions */ +STATIC void +xfs_bmbt_disk_get_all( + xfs_bmbt_rec_t *r, + xfs_bmbt_irec_t *s) +{ + __xfs_bmbt_get_all(get_unaligned_be64(&r->l0), + get_unaligned_be64(&r->l1), s); +} + +STATIC void +xfs_bmbt_trace_record( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + __uint64_t *l0, + __uint64_t *l1, + __uint64_t *l2) +{ + struct xfs_bmbt_irec irec; + + xfs_bmbt_disk_get_all(&rec->bmbt, &irec); + *l0 = irec.br_startoff; + *l1 = irec.br_startblock; + *l2 = irec.br_blockcount; +} +#endif /* XFS_BTREE_TRACE */ + static const struct xfs_btree_ops xfs_bmbt_ops = { .rec_len = sizeof(xfs_bmbt_rec_t), .key_len = sizeof(xfs_bmbt_key_t), @@ -747,10 +837,18 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, .key_diff = xfs_bmbt_key_diff, + #ifdef DEBUG .keys_inorder = xfs_bmbt_keys_inorder, .recs_inorder = xfs_bmbt_recs_inorder, #endif + +#ifdef XFS_BTREE_TRACE + .trace_enter = xfs_bmbt_trace_enter, + .trace_cursor = xfs_bmbt_trace_cursor, + .trace_key = xfs_bmbt_trace_key, + .trace_record = xfs_bmbt_trace_record, +#endif }; /* diff --git a/trunk/fs/xfs/xfs_btree.c b/trunk/fs/xfs/xfs_btree.c index cabf4b5604aa..2f9e97c128a0 100644 --- a/trunk/fs/xfs/xfs_btree.c +++ b/trunk/fs/xfs/xfs_btree.c @@ -32,6 +32,7 @@ #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -65,11 +66,11 @@ xfs_btree_check_lblock( be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && block->bb_u.l.bb_leftsib && - (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || + (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO || XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) && block->bb_u.l.bb_rightsib && - (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || + (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO || XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))); if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, @@ -104,10 +105,10 @@ xfs_btree_check_sblock( be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && - (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || + (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK || be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && block->bb_u.s.bb_leftsib && - (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || + (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK || be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && block->bb_u.s.bb_rightsib; if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, @@ -510,9 +511,9 @@ xfs_btree_islastblock( block = xfs_btree_get_block(cur, level, &bp); xfs_btree_check_block(cur, block, level, bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO); + return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO; else - return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); + return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK; } /* @@ -776,14 +777,14 @@ xfs_btree_setbuf( b = XFS_BUF_TO_BLOCK(bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)) + if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; - if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)) + if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO) cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; } else { - if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) + if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK) cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; - if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) + if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK) cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; } } @@ -794,9 +795,9 @@ xfs_btree_ptr_is_null( union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return ptr->l == cpu_to_be64(NULLDFSBNO); + return be64_to_cpu(ptr->l) == NULLDFSBNO; else - return ptr->s == cpu_to_be32(NULLAGBLOCK); + return be32_to_cpu(ptr->s) == NULLAGBLOCK; } STATIC void @@ -922,12 +923,12 @@ xfs_btree_ptr_to_daddr( union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO)); + ASSERT(be64_to_cpu(ptr->l) != NULLDFSBNO); return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); } else { ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); - ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK)); + ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK); return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, be32_to_cpu(ptr->s)); diff --git a/trunk/fs/xfs/xfs_btree.h b/trunk/fs/xfs/xfs_btree.h index 8d05a6a46ce3..82fafc66bd1f 100644 --- a/trunk/fs/xfs/xfs_btree.h +++ b/trunk/fs/xfs/xfs_btree.h @@ -199,6 +199,25 @@ struct xfs_btree_ops { union xfs_btree_rec *r1, union xfs_btree_rec *r2); #endif + + /* btree tracing */ +#ifdef XFS_BTREE_TRACE + void (*trace_enter)(struct xfs_btree_cur *, const char *, + char *, int, int, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t); + void (*trace_cursor)(struct xfs_btree_cur *, __uint32_t *, + __uint64_t *, __uint64_t *); + void (*trace_key)(struct xfs_btree_cur *, + union xfs_btree_key *, __uint64_t *, + __uint64_t *); + void (*trace_record)(struct xfs_btree_cur *, + union xfs_btree_rec *, __uint64_t *, + __uint64_t *, __uint64_t *); +#endif }; /* @@ -433,23 +452,4 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) -/* - * Trace hooks. Currently not implemented as they need to be ported - * over to the generic tracing functionality, which is some effort. - * - * i,j = integer (32 bit) - * b = btree block buffer (xfs_buf_t) - * p = btree ptr - * r = btree record - * k = btree key - */ -#define XFS_BTREE_TRACE_ARGBI(c, b, i) -#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) -#define XFS_BTREE_TRACE_ARGI(c, i) -#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) -#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) -#define XFS_BTREE_TRACE_ARGIK(c, i, k) -#define XFS_BTREE_TRACE_ARGR(c, r) -#define XFS_BTREE_TRACE_CURSOR(c, t) - #endif /* __XFS_BTREE_H__ */ diff --git a/trunk/fs/xfs/xfs_btree_trace.c b/trunk/fs/xfs/xfs_btree_trace.c new file mode 100644 index 000000000000..44ff942a0fda --- /dev/null +++ b/trunk/fs/xfs/xfs_btree_trace.c @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_btree_trace.h" + +STATIC void +xfs_btree_trace_ptr( + struct xfs_btree_cur *cur, + union xfs_btree_ptr ptr, + __psunsigned_t *high, + __psunsigned_t *low) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + __u64 val = be64_to_cpu(ptr.l); + *high = val >> 32; + *low = (int)val; + } else { + *high = 0; + *low = be32_to_cpu(ptr.s); + } +} + +/* + * Add a trace buffer entry for arguments, for a buffer & 1 integer arg. + */ +void +xfs_btree_trace_argbi( + const char *func, + struct xfs_btree_cur *cur, + struct xfs_buf *b, + int i, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBI, + line, (__psunsigned_t)b, i, 0, 0, 0, 0, 0, + 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for a buffer & 2 integer args. + */ +void +xfs_btree_trace_argbii( + const char *func, + struct xfs_btree_cur *cur, + struct xfs_buf *b, + int i0, + int i1, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBII, + line, (__psunsigned_t)b, i0, i1, 0, 0, 0, 0, + 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for 3 block-length args + * and an integer arg. + */ +void +xfs_btree_trace_argfffi( + const char *func, + struct xfs_btree_cur *cur, + xfs_dfiloff_t o, + xfs_dfsbno_t b, + xfs_dfilblks_t i, + int j, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGFFFI, + line, + o >> 32, (int)o, + b >> 32, (int)b, + i >> 32, (int)i, + (int)j, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for one integer arg. + */ +void +xfs_btree_trace_argi( + const char *func, + struct xfs_btree_cur *cur, + int i, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGI, + line, i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for int, fsblock, key. + */ +void +xfs_btree_trace_argipk( + const char *func, + struct xfs_btree_cur *cur, + int i, + union xfs_btree_ptr ptr, + union xfs_btree_key *key, + int line) +{ + __psunsigned_t high, low; + __uint64_t l0, l1; + + xfs_btree_trace_ptr(cur, ptr, &high, &low); + cur->bc_ops->trace_key(cur, key, &l0, &l1); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPK, + line, i, high, low, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for int, fsblock, rec. + */ +void +xfs_btree_trace_argipr( + const char *func, + struct xfs_btree_cur *cur, + int i, + union xfs_btree_ptr ptr, + union xfs_btree_rec *rec, + int line) +{ + __psunsigned_t high, low; + __uint64_t l0, l1, l2; + + xfs_btree_trace_ptr(cur, ptr, &high, &low); + cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPR, + line, i, + high, low, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + l2 >> 32, (int)l2, + 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for int, key. + */ +void +xfs_btree_trace_argik( + const char *func, + struct xfs_btree_cur *cur, + int i, + union xfs_btree_key *key, + int line) +{ + __uint64_t l0, l1; + + cur->bc_ops->trace_key(cur, key, &l0, &l1); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIK, + line, i, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + 0, 0, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for record. + */ +void +xfs_btree_trace_argr( + const char *func, + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + int line) +{ + __uint64_t l0, l1, l2; + + cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGR, + line, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + l2 >> 32, (int)l2, + 0, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for the cursor/operation. + */ +void +xfs_btree_trace_cursor( + const char *func, + struct xfs_btree_cur *cur, + int type, + int line) +{ + __uint32_t s0; + __uint64_t l0, l1; + char *s; + + switch (type) { + case XBT_ARGS: + s = "args"; + break; + case XBT_ENTRY: + s = "entry"; + break; + case XBT_ERROR: + s = "error"; + break; + case XBT_EXIT: + s = "exit"; + break; + default: + s = "unknown"; + break; + } + + cur->bc_ops->trace_cursor(cur, &s0, &l0, &l1); + cur->bc_ops->trace_enter(cur, func, s, XFS_BTREE_KTRACE_CUR, line, + s0, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + (__psunsigned_t)cur->bc_bufs[0], + (__psunsigned_t)cur->bc_bufs[1], + (__psunsigned_t)cur->bc_bufs[2], + (__psunsigned_t)cur->bc_bufs[3], + (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1], + (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]); +} diff --git a/trunk/fs/xfs/xfs_btree_trace.h b/trunk/fs/xfs/xfs_btree_trace.h new file mode 100644 index 000000000000..2d8a309873ea --- /dev/null +++ b/trunk/fs/xfs/xfs_btree_trace.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_BTREE_TRACE_H__ +#define __XFS_BTREE_TRACE_H__ + +struct xfs_btree_cur; +struct xfs_buf; + + +/* + * Trace hooks. + * i,j = integer (32 bit) + * b = btree block buffer (xfs_buf_t) + * p = btree ptr + * r = btree record + * k = btree key + */ + +#ifdef XFS_BTREE_TRACE + +/* + * Trace buffer entry types. + */ +#define XFS_BTREE_KTRACE_ARGBI 1 +#define XFS_BTREE_KTRACE_ARGBII 2 +#define XFS_BTREE_KTRACE_ARGFFFI 3 +#define XFS_BTREE_KTRACE_ARGI 4 +#define XFS_BTREE_KTRACE_ARGIPK 5 +#define XFS_BTREE_KTRACE_ARGIPR 6 +#define XFS_BTREE_KTRACE_ARGIK 7 +#define XFS_BTREE_KTRACE_ARGR 8 +#define XFS_BTREE_KTRACE_CUR 9 + +/* + * Sub-types for cursor traces. + */ +#define XBT_ARGS 0 +#define XBT_ENTRY 1 +#define XBT_ERROR 2 +#define XBT_EXIT 3 + +void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *, + struct xfs_buf *, int, int); +void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *, + struct xfs_buf *, int, int, int); +void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int); +void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int, + union xfs_btree_ptr, union xfs_btree_key *, int); +void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int, + union xfs_btree_ptr, union xfs_btree_rec *, int); +void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int, + union xfs_btree_key *, int); +void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *, + union xfs_btree_rec *, int); +void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int); + +#define XFS_BTREE_TRACE_ARGBI(c, b, i) \ + xfs_btree_trace_argbi(__func__, c, b, i, __LINE__) +#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \ + xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__) +#define XFS_BTREE_TRACE_ARGI(c, i) \ + xfs_btree_trace_argi(__func__, c, i, __LINE__) +#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \ + xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__) +#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) \ + xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__) +#define XFS_BTREE_TRACE_ARGIK(c, i, k) \ + xfs_btree_trace_argik(__func__, c, i, k, __LINE__) +#define XFS_BTREE_TRACE_ARGR(c, r) \ + xfs_btree_trace_argr(__func__, c, r, __LINE__) +#define XFS_BTREE_TRACE_CURSOR(c, t) \ + xfs_btree_trace_cursor(__func__, c, t, __LINE__) +#else +#define XFS_BTREE_TRACE_ARGBI(c, b, i) +#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) +#define XFS_BTREE_TRACE_ARGI(c, i) +#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) +#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) +#define XFS_BTREE_TRACE_ARGIK(c, i, k) +#define XFS_BTREE_TRACE_ARGR(c, r) +#define XFS_BTREE_TRACE_CURSOR(c, t) +#endif /* XFS_BTREE_TRACE */ + +#endif /* __XFS_BTREE_TRACE_H__ */ diff --git a/trunk/fs/xfs/xfs_buf_item.c b/trunk/fs/xfs/xfs_buf_item.c index 88492916c3dc..7b7e005e3dcc 100644 --- a/trunk/fs/xfs/xfs_buf_item.c +++ b/trunk/fs/xfs/xfs_buf_item.c @@ -90,11 +90,13 @@ xfs_buf_item_flush_log_debug( uint first, uint last) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; uint nbytes; - if (bip == NULL || (bip->bli_item.li_type != XFS_LI_BUF)) + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); + if ((bip == NULL) || (bip->bli_item.li_type != XFS_LI_BUF)) { return; + } ASSERT(bip->bli_logged != NULL); nbytes = last - first + 1; @@ -406,7 +408,7 @@ xfs_buf_item_unpin( int stale = bip->bli_flags & XFS_BLI_STALE; int freed; - ASSERT(bp->b_fspriv == bip); + ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip); ASSERT(atomic_read(&bip->bli_refcount) > 0); trace_xfs_buf_item_unpin(bip); @@ -418,7 +420,7 @@ xfs_buf_item_unpin( if (freed && stale) { ASSERT(bip->bli_flags & XFS_BLI_STALE); - ASSERT(xfs_buf_islocked(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); ASSERT(XFS_BUF_ISSTALE(bp)); ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); @@ -441,7 +443,7 @@ xfs_buf_item_unpin( * Since the transaction no longer refers to the buffer, * the buffer should no longer refer to the transaction. */ - bp->b_transp = NULL; + XFS_BUF_SET_FSPRIVATE2(bp, NULL); } /* @@ -452,13 +454,13 @@ xfs_buf_item_unpin( */ if (bip->bli_flags & XFS_BLI_STALE_INODE) { xfs_buf_do_callbacks(bp); - bp->b_fspriv = NULL; - bp->b_iodone = NULL; + XFS_BUF_SET_FSPRIVATE(bp, NULL); + XFS_BUF_CLR_IODONE_FUNC(bp); } else { spin_lock(&ailp->xa_lock); xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); xfs_buf_item_relse(bp); - ASSERT(bp->b_fspriv == NULL); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); } xfs_buf_relse(bp); } @@ -481,7 +483,7 @@ xfs_buf_item_trylock( if (XFS_BUF_ISPINNED(bp)) return XFS_ITEM_PINNED; - if (!xfs_buf_trylock(bp)) + if (!XFS_BUF_CPSEMA(bp)) return XFS_ITEM_LOCKED; /* take a reference to the buffer. */ @@ -523,7 +525,7 @@ xfs_buf_item_unlock( uint hold; /* Clear the buffer's association with this transaction. */ - bp->b_transp = NULL; + XFS_BUF_SET_FSPRIVATE2(bp, NULL); /* * If this is a transaction abort, don't return early. Instead, allow @@ -682,7 +684,7 @@ xfs_buf_item_init( xfs_buf_t *bp, xfs_mount_t *mp) { - xfs_log_item_t *lip = bp->b_fspriv; + xfs_log_item_t *lip; xfs_buf_log_item_t *bip; int chunks; int map_size; @@ -694,8 +696,12 @@ xfs_buf_item_init( * nothing to do here so return. */ ASSERT(bp->b_target->bt_mount == mp); - if (lip != NULL && lip->li_type == XFS_LI_BUF) - return; + if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { + lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); + if (lip->li_type == XFS_LI_BUF) { + return; + } + } /* * chunks is the number of XFS_BLF_CHUNK size pieces @@ -734,9 +740,11 @@ xfs_buf_item_init( * Put the buf item into the list of items attached to the * buffer at the front. */ - if (bp->b_fspriv) - bip->bli_item.li_bio_list = bp->b_fspriv; - bp->b_fspriv = bip; + if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { + bip->bli_item.li_bio_list = + XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); + } + XFS_BUF_SET_FSPRIVATE(bp, bip); } @@ -868,11 +876,12 @@ xfs_buf_item_relse( trace_xfs_buf_item_relse(bp, _RET_IP_); - bip = bp->b_fspriv; - bp->b_fspriv = bip->bli_item.li_bio_list; - if (bp->b_fspriv == NULL) - bp->b_iodone = NULL; - + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); + XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list); + if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) && + (XFS_BUF_IODONE_FUNC(bp) != NULL)) { + XFS_BUF_CLR_IODONE_FUNC(bp); + } xfs_buf_rele(bp); xfs_buf_item_free(bip); } @@ -896,20 +905,20 @@ xfs_buf_attach_iodone( xfs_log_item_t *head_lip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(xfs_buf_islocked(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); lip->li_cb = cb; - head_lip = bp->b_fspriv; - if (head_lip) { + if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { + head_lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); lip->li_bio_list = head_lip->li_bio_list; head_lip->li_bio_list = lip; } else { - bp->b_fspriv = lip; + XFS_BUF_SET_FSPRIVATE(bp, lip); } - ASSERT(bp->b_iodone == NULL || - bp->b_iodone == xfs_buf_iodone_callbacks); - bp->b_iodone = xfs_buf_iodone_callbacks; + ASSERT((XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks) || + (XFS_BUF_IODONE_FUNC(bp) == NULL)); + XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); } /* @@ -930,8 +939,8 @@ xfs_buf_do_callbacks( { struct xfs_log_item *lip; - while ((lip = bp->b_fspriv) != NULL) { - bp->b_fspriv = lip->li_bio_list; + while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) { + XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list); ASSERT(lip->li_cb != NULL); /* * Clear the next pointer so we don't have any @@ -998,7 +1007,7 @@ xfs_buf_iodone_callbacks( XFS_BUF_DONE(bp); XFS_BUF_SET_START(bp); } - ASSERT(bp->b_iodone != NULL); + ASSERT(XFS_BUF_IODONE_FUNC(bp)); trace_xfs_buf_item_iodone_async(bp, _RET_IP_); xfs_buf_relse(bp); return; @@ -1017,8 +1026,8 @@ xfs_buf_iodone_callbacks( do_callbacks: xfs_buf_do_callbacks(bp); - bp->b_fspriv = NULL; - bp->b_iodone = NULL; + XFS_BUF_SET_FSPRIVATE(bp, NULL); + XFS_BUF_CLR_IODONE_FUNC(bp); xfs_buf_ioend(bp, 0); } diff --git a/trunk/fs/xfs/xfs_da_btree.c b/trunk/fs/xfs/xfs_da_btree.c index 2925726529f8..6102ac6d1dff 100644 --- a/trunk/fs/xfs/xfs_da_btree.c +++ b/trunk/fs/xfs/xfs_da_btree.c @@ -24,12 +24,11 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_dir2.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2_priv.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" @@ -37,6 +36,10 @@ #include "xfs_bmap.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" +#include "xfs_dir2_node.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -86,7 +89,7 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state, */ STATIC uint xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count); STATIC int xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp); -STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps); +STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra); STATIC int xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, xfs_da_state_blk_t *save_blk); @@ -318,11 +321,11 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, ASSERT(bp != NULL); node = bp->data; oldroot = blk1->bp->data; - if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { + if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) { size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] - (char *)oldroot); } else { - ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); leaf = (xfs_dir2_leaf_t *)oldroot; size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] - (char *)leaf); @@ -349,7 +352,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, node->hdr.count = cpu_to_be16(2); #ifdef DEBUG - if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) { + if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) { ASSERT(blk1->blkno >= mp->m_dirleafblk && blk1->blkno < mp->m_dirfreeblk); ASSERT(blk2->blkno >= mp->m_dirleafblk && @@ -381,7 +384,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, int useextra; node = oldblk->bp->data; - ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); /* * With V2 dirs the extra block is data or freespace. @@ -480,8 +483,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, node1 = node2; node2 = tmpnode; } - ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); - ASSERT(node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC); + ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC); count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2; if (count == 0) return; @@ -575,7 +578,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, int tmp; node = oldblk->bp->data; - ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); ASSERT(newblk->blkno != 0); if (state->args->whichfork == XFS_DATA_FORK) @@ -711,7 +714,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) ASSERT(args != NULL); ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); oldroot = root_blk->bp->data; - ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC); ASSERT(!oldroot->hdr.info.forw); ASSERT(!oldroot->hdr.info.back); @@ -734,10 +737,10 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) ASSERT(bp != NULL); blkinfo = bp->data; if (be16_to_cpu(oldroot->hdr.level) == 1) { - ASSERT(blkinfo->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || - blkinfo->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC || + be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC); } else { - ASSERT(blkinfo->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC); } ASSERT(!blkinfo->forw); ASSERT(!blkinfo->back); @@ -773,7 +776,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) */ blk = &state->path.blk[ state->path.active-1 ]; info = blk->bp->data; - ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC); node = (xfs_da_intnode_t *)info; count = be16_to_cpu(node->hdr.count); if (count > (state->node_ents >> 1)) { @@ -833,7 +836,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) count -= state->node_ents >> 2; count -= be16_to_cpu(node->hdr.count); node = bp->data; - ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); count -= be16_to_cpu(node->hdr.count); xfs_da_brelse(state->args->trans, bp); if (count >= 0) @@ -908,7 +911,7 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path) } for (blk--, level--; level >= 0; blk--, level--) { node = blk->bp->data; - ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); btree = &node->btree[ blk->index ]; if (be32_to_cpu(btree->hashval) == lasthash) break; @@ -976,8 +979,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, drop_node = drop_blk->bp->data; save_node = save_blk->bp->data; - ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); - ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC); tp = state->args->trans; /* @@ -1275,8 +1278,8 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp) node1 = node1_bp->data; node2 = node2_bp->data; - ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) && - node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) && + (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC)); if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) && ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) || @@ -1296,7 +1299,7 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count) xfs_da_intnode_t *node; node = bp->data; - ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); if (count) *count = be16_to_cpu(node->hdr.count); if (!node->hdr.count) @@ -1409,7 +1412,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, for (blk = &path->blk[level]; level >= 0; blk--, level--) { ASSERT(blk->bp != NULL); node = blk->bp->data; - ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) { blk->index++; blkno = be32_to_cpu(node->btree[blk->index].before); @@ -1448,9 +1451,9 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, return(error); ASSERT(blk->bp != NULL); info = blk->bp->data; - ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || - info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || - info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC || + be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC || + be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC); blk->magic = be16_to_cpu(info->magic); if (blk->magic == XFS_DA_NODE_MAGIC) { node = (xfs_da_intnode_t *)info; @@ -1543,62 +1546,79 @@ const struct xfs_nameops xfs_default_nameops = { .compname = xfs_da_compname }; +/* + * Add a block to the btree ahead of the file. + * Return the new block number to the caller. + */ int -xfs_da_grow_inode_int( - struct xfs_da_args *args, - xfs_fileoff_t *bno, - int count) +xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) { - struct xfs_trans *tp = args->trans; - struct xfs_inode *dp = args->dp; - int w = args->whichfork; - xfs_drfsbno_t nblks = dp->i_d.di_nblocks; - struct xfs_bmbt_irec map, *mapp; - int nmap, error, got, i, mapi; + xfs_fileoff_t bno, b; + xfs_bmbt_irec_t map; + xfs_bmbt_irec_t *mapp; + xfs_inode_t *dp; + int nmap, error, w, count, c, got, i, mapi; + xfs_trans_t *tp; + xfs_mount_t *mp; + xfs_drfsbno_t nblks; + dp = args->dp; + mp = dp->i_mount; + w = args->whichfork; + tp = args->trans; + nblks = dp->i_d.di_nblocks; + + /* + * For new directories adjust the file offset and block count. + */ + if (w == XFS_DATA_FORK) { + bno = mp->m_dirleafblk; + count = mp->m_dirblkfsbs; + } else { + bno = 0; + count = 1; + } /* * Find a spot in the file space to put the new block. */ - error = xfs_bmap_first_unused(tp, dp, count, bno, w); - if (error) + if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) return error; - + if (w == XFS_DATA_FORK) + ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk); /* * Try mapping it in one filesystem block. */ nmap = 1; ASSERT(args->firstblock != NULL); - error = xfs_bmapi(tp, dp, *bno, count, + if ((error = xfs_bmapi(tp, dp, bno, count, xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA| XFS_BMAPI_CONTIG, args->firstblock, args->total, &map, &nmap, - args->flist); - if (error) + args->flist))) { return error; - + } ASSERT(nmap <= 1); if (nmap == 1) { mapp = ↦ mapi = 1; - } else if (nmap == 0 && count > 1) { - xfs_fileoff_t b; - int c; - - /* - * If we didn't get it and the block might work if fragmented, - * try without the CONTIG flag. Loop until we get it all. - */ + } + /* + * If we didn't get it and the block might work if fragmented, + * try without the CONTIG flag. Loop until we get it all. + */ + else if (nmap == 0 && count > 1) { mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); - for (b = *bno, mapi = 0; b < *bno + count; ) { + for (b = bno, mapi = 0; b < bno + count; ) { nmap = MIN(XFS_BMAP_MAX_NMAP, count); - c = (int)(*bno + count - b); - error = xfs_bmapi(tp, dp, b, c, + c = (int)(bno + count - b); + if ((error = xfs_bmapi(tp, dp, b, c, xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE| XFS_BMAPI_METADATA, args->firstblock, args->total, - &mapp[mapi], &nmap, args->flist); - if (error) - goto out_free_map; + &mapp[mapi], &nmap, args->flist))) { + kmem_free(mapp); + return error; + } if (nmap < 1) break; mapi += nmap; @@ -1609,53 +1629,24 @@ xfs_da_grow_inode_int( mapi = 0; mapp = NULL; } - /* * Count the blocks we got, make sure it matches the total. */ for (i = 0, got = 0; i < mapi; i++) got += mapp[i].br_blockcount; - if (got != count || mapp[0].br_startoff != *bno || + if (got != count || mapp[0].br_startoff != bno || mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != - *bno + count) { - error = XFS_ERROR(ENOSPC); - goto out_free_map; + bno + count) { + if (mapp != &map) + kmem_free(mapp); + return XFS_ERROR(ENOSPC); } - - /* account for newly allocated blocks in reserved blocks total */ - args->total -= dp->i_d.di_nblocks - nblks; - -out_free_map: if (mapp != &map) kmem_free(mapp); - return error; -} - -/* - * Add a block to the btree ahead of the file. - * Return the new block number to the caller. - */ -int -xfs_da_grow_inode( - struct xfs_da_args *args, - xfs_dablk_t *new_blkno) -{ - xfs_fileoff_t bno; - int count; - int error; - - if (args->whichfork == XFS_DATA_FORK) { - bno = args->dp->i_mount->m_dirleafblk; - count = args->dp->i_mount->m_dirblkfsbs; - } else { - bno = 0; - count = 1; - } - - error = xfs_da_grow_inode_int(args, &bno, count); - if (!error) - *new_blkno = (xfs_dablk_t)bno; - return error; + /* account for newly allocated blocks in reserved blocks total */ + args->total -= dp->i_d.di_nblocks - nblks; + *new_blkno = (xfs_dablk_t)bno; + return 0; } /* @@ -1713,12 +1704,12 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, /* * Get values from the moved block. */ - if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) { + if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) { dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; dead_level = 0; dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval); } else { - ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC); dead_node = (xfs_da_intnode_t *)dead_info; dead_level = be16_to_cpu(dead_node->hdr.level); dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval); @@ -1777,8 +1768,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))) goto done; par_node = par_buf->data; - if (unlikely(par_node->hdr.info.magic != - cpu_to_be16(XFS_DA_NODE_MAGIC) || + if (unlikely( + be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC || (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", XFS_ERRLEVEL_LOW, mp); @@ -1829,7 +1820,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, par_node = par_buf->data; if (unlikely( be16_to_cpu(par_node->hdr.level) != level || - par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) { + be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", XFS_ERRLEVEL_LOW, mp); error = XFS_ERROR(EFSCORRUPTED); @@ -1939,7 +1930,8 @@ xfs_da_do_buf( xfs_daddr_t *mappedbnop, xfs_dabuf_t **bpp, int whichfork, - int caller) + int caller, + inst_t *ra) { xfs_buf_t *bp = NULL; xfs_buf_t **bplist; @@ -2078,22 +2070,25 @@ xfs_da_do_buf( * Build a dabuf structure. */ if (bplist) { - rbp = xfs_da_buf_make(nbplist, bplist); + rbp = xfs_da_buf_make(nbplist, bplist, ra); } else if (bp) - rbp = xfs_da_buf_make(1, &bp); + rbp = xfs_da_buf_make(1, &bp, ra); else rbp = NULL; /* * For read_buf, check the magic number. */ if (caller == 1) { - xfs_dir2_data_hdr_t *hdr = rbp->data; - xfs_dir2_free_t *free = rbp->data; - xfs_da_blkinfo_t *info = rbp->data; + xfs_dir2_data_t *data; + xfs_dir2_free_t *free; + xfs_da_blkinfo_t *info; uint magic, magic1; + info = rbp->data; + data = rbp->data; + free = rbp->data; magic = be16_to_cpu(info->magic); - magic1 = be32_to_cpu(hdr->magic); + magic1 = be32_to_cpu(data->hdr.magic); if (unlikely( XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) && (magic != XFS_ATTR_LEAF_MAGIC) && @@ -2101,7 +2096,7 @@ xfs_da_do_buf( (magic != XFS_DIR2_LEAFN_MAGIC) && (magic1 != XFS_DIR2_BLOCK_MAGIC) && (magic1 != XFS_DIR2_DATA_MAGIC) && - (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)), + (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC), mp, XFS_ERRTAG_DA_READ_BUF, XFS_RANDOM_DA_READ_BUF))) { trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_); @@ -2148,7 +2143,8 @@ xfs_da_get_buf( xfs_dabuf_t **bpp, int whichfork) { - return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0); + return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0, + (inst_t *)__return_address); } /* @@ -2163,7 +2159,8 @@ xfs_da_read_buf( xfs_dabuf_t **bpp, int whichfork) { - return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1); + return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1, + (inst_t *)__return_address); } /* @@ -2179,7 +2176,8 @@ xfs_da_reada_buf( xfs_daddr_t rval; rval = -1; - if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3)) + if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3, + (inst_t *)__return_address)) return -1; else return rval; @@ -2237,12 +2235,17 @@ xfs_da_state_free(xfs_da_state_t *state) kmem_zone_free(xfs_da_state_zone, state); } +#ifdef XFS_DABUF_DEBUG +xfs_dabuf_t *xfs_dabuf_global_list; +static DEFINE_SPINLOCK(xfs_dabuf_global_lock); +#endif + /* * Create a dabuf. */ /* ARGSUSED */ STATIC xfs_dabuf_t * -xfs_da_buf_make(int nbuf, xfs_buf_t **bps) +xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) { xfs_buf_t *bp; xfs_dabuf_t *dabuf; @@ -2254,6 +2257,11 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps) else dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS); dabuf->dirty = 0; +#ifdef XFS_DABUF_DEBUG + dabuf->ra = ra; + dabuf->target = XFS_BUF_TARGET(bps[0]); + dabuf->blkno = XFS_BUF_ADDR(bps[0]); +#endif if (nbuf == 1) { dabuf->nbuf = 1; bp = bps[0]; @@ -2273,6 +2281,23 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps) XFS_BUF_COUNT(bp)); } } +#ifdef XFS_DABUF_DEBUG + { + xfs_dabuf_t *p; + + spin_lock(&xfs_dabuf_global_lock); + for (p = xfs_dabuf_global_list; p; p = p->next) { + ASSERT(p->blkno != dabuf->blkno || + p->target != dabuf->target); + } + dabuf->prev = NULL; + if (xfs_dabuf_global_list) + xfs_dabuf_global_list->prev = dabuf; + dabuf->next = xfs_dabuf_global_list; + xfs_dabuf_global_list = dabuf; + spin_unlock(&xfs_dabuf_global_lock); + } +#endif return dabuf; } @@ -2308,12 +2333,25 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf) ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); if (dabuf->dirty) xfs_da_buf_clean(dabuf); - if (dabuf->nbuf > 1) { + if (dabuf->nbuf > 1) kmem_free(dabuf->data); - kmem_free(dabuf); - } else { - kmem_zone_free(xfs_dabuf_zone, dabuf); +#ifdef XFS_DABUF_DEBUG + { + spin_lock(&xfs_dabuf_global_lock); + if (dabuf->prev) + dabuf->prev->next = dabuf->next; + else + xfs_dabuf_global_list = dabuf->next; + if (dabuf->next) + dabuf->next->prev = dabuf->prev; + spin_unlock(&xfs_dabuf_global_lock); } + memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf)); +#endif + if (dabuf->nbuf == 1) + kmem_zone_free(xfs_dabuf_zone, dabuf); + else + kmem_free(dabuf); } /* diff --git a/trunk/fs/xfs/xfs_da_btree.h b/trunk/fs/xfs/xfs_da_btree.h index dbf7c074ae73..fe9f5a8c1d2a 100644 --- a/trunk/fs/xfs/xfs_da_btree.h +++ b/trunk/fs/xfs/xfs_da_btree.h @@ -145,11 +145,22 @@ typedef struct xfs_dabuf { short dirty; /* data needs to be copied back */ short bbcount; /* how large is data in bbs */ void *data; /* pointer for buffers' data */ +#ifdef XFS_DABUF_DEBUG + inst_t *ra; /* return address of caller to make */ + struct xfs_dabuf *next; /* next in global chain */ + struct xfs_dabuf *prev; /* previous in global chain */ + struct xfs_buftarg *target; /* device for buffer */ + xfs_daddr_t blkno; /* daddr first in bps[0] */ +#endif struct xfs_buf *bps[1]; /* actually nbuf of these */ } xfs_dabuf_t; #define XFS_DA_BUF_SIZE(n) \ (sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1)) +#ifdef XFS_DABUF_DEBUG +extern xfs_dabuf_t *xfs_dabuf_global_list; +#endif + /* * Storage for holding state during Btree searches and split/join ops. * @@ -237,8 +248,6 @@ int xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, * Utility routines. */ int xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno); -int xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno, - int count); int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, xfs_dabuf_t **bp, int whichfork); diff --git a/trunk/fs/xfs/xfs_dir2.c b/trunk/fs/xfs/xfs_dir2.c index 4580ce00aeb4..dba7a71cedf3 100644 --- a/trunk/fs/xfs/xfs_dir2.c +++ b/trunk/fs/xfs/xfs_dir2.c @@ -24,17 +24,20 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" -#include "xfs_dir2.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2_priv.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" +#include "xfs_dir2_node.h" #include "xfs_error.h" #include "xfs_vnodeops.h" #include "xfs_trace.h" @@ -119,15 +122,15 @@ int xfs_dir_isempty( xfs_inode_t *dp) { - xfs_dir2_sf_hdr_t *sfp; + xfs_dir2_sf_t *sfp; ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); if (dp->i_d.di_size == 0) /* might happen during shutdown. */ return 1; if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp)) return 0; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - return !sfp->count; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + return !sfp->hdr.count; } /* @@ -497,34 +500,129 @@ xfs_dir_canenter( /* * Add a block to the directory. - * - * This routine is for data and free blocks, not leaf/node blocks which are - * handled by xfs_da_grow_inode. + * This routine is for data and free blocks, not leaf/node blocks + * which are handled by xfs_da_grow_inode. */ int xfs_dir2_grow_inode( - struct xfs_da_args *args, - int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ - xfs_dir2_db_t *dbp) /* out: block number added */ + xfs_da_args_t *args, + int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ + xfs_dir2_db_t *dbp) /* out: block number added */ { - struct xfs_inode *dp = args->dp; - struct xfs_mount *mp = dp->i_mount; - xfs_fileoff_t bno; /* directory offset of new block */ - int count; /* count of filesystem blocks */ - int error; + xfs_fileoff_t bno; /* directory offset of new block */ + int count; /* count of filesystem blocks */ + xfs_inode_t *dp; /* incore directory inode */ + int error; + int got; /* blocks actually mapped */ + int i; + xfs_bmbt_irec_t map; /* single structure for bmap */ + int mapi; /* mapping index */ + xfs_bmbt_irec_t *mapp; /* bmap mapping structure(s) */ + xfs_mount_t *mp; + int nmap; /* number of bmap entries */ + xfs_trans_t *tp; + xfs_drfsbno_t nblks; trace_xfs_dir2_grow_inode(args, space); + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + nblks = dp->i_d.di_nblocks; /* * Set lowest possible block in the space requested. */ bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE); count = mp->m_dirblkfsbs; - - error = xfs_da_grow_inode_int(args, &bno, count); - if (error) + /* + * Find the first hole for our block. + */ + if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK))) return error; + nmap = 1; + ASSERT(args->firstblock != NULL); + /* + * Try mapping the new block contiguously (one extent). + */ + if ((error = xfs_bmapi(tp, dp, bno, count, + XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, + args->firstblock, args->total, &map, &nmap, + args->flist))) + return error; + ASSERT(nmap <= 1); + if (nmap == 1) { + mapp = ↦ + mapi = 1; + } + /* + * Didn't work and this is a multiple-fsb directory block. + * Try again with contiguous flag turned on. + */ + else if (nmap == 0 && count > 1) { + xfs_fileoff_t b; /* current file offset */ + + /* + * Space for maximum number of mappings. + */ + mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); + /* + * Iterate until we get to the end of our block. + */ + for (b = bno, mapi = 0; b < bno + count; ) { + int c; /* current fsb count */ + + /* + * Can't map more than MAX_NMAP at once. + */ + nmap = MIN(XFS_BMAP_MAX_NMAP, count); + c = (int)(bno + count - b); + if ((error = xfs_bmapi(tp, dp, b, c, + XFS_BMAPI_WRITE|XFS_BMAPI_METADATA, + args->firstblock, args->total, + &mapp[mapi], &nmap, args->flist))) { + kmem_free(mapp); + return error; + } + if (nmap < 1) + break; + /* + * Add this bunch into our table, go to the next offset. + */ + mapi += nmap; + b = mapp[mapi - 1].br_startoff + + mapp[mapi - 1].br_blockcount; + } + } + /* + * Didn't work. + */ + else { + mapi = 0; + mapp = NULL; + } + /* + * See how many fsb's we got. + */ + for (i = 0, got = 0; i < mapi; i++) + got += mapp[i].br_blockcount; + /* + * Didn't get enough fsb's, or the first/last block's are wrong. + */ + if (got != count || mapp[0].br_startoff != bno || + mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != + bno + count) { + if (mapp != &map) + kmem_free(mapp); + return XFS_ERROR(ENOSPC); + } + /* + * Done with the temporary mapping table. + */ + if (mapp != &map) + kmem_free(mapp); + /* account for newly allocated blocks in reserved blocks total */ + args->total -= dp->i_d.di_nblocks - nblks; *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); /* @@ -536,7 +634,7 @@ xfs_dir2_grow_inode( size = XFS_FSB_TO_B(mp, bno + count); if (size > dp->i_d.di_size) { dp->i_d.di_size = size; - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); + xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); } } return 0; diff --git a/trunk/fs/xfs/xfs_dir2.h b/trunk/fs/xfs/xfs_dir2.h index e937d9991c18..74a3b1057685 100644 --- a/trunk/fs/xfs/xfs_dir2.h +++ b/trunk/fs/xfs/xfs_dir2.h @@ -16,14 +16,49 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __XFS_DIR2_H__ -#define __XFS_DIR2_H__ +#define __XFS_DIR2_H__ -struct xfs_bmap_free; +struct uio; +struct xfs_dabuf; struct xfs_da_args; +struct xfs_dir2_put_args; +struct xfs_bmap_free; struct xfs_inode; struct xfs_mount; struct xfs_trans; +/* + * Directory version 2. + * There are 4 possible formats: + * shortform + * single block - data with embedded leaf at the end + * multiple data blocks, single leaf+freeindex block + * data blocks, node&leaf blocks (btree), freeindex blocks + * + * The shortform format is in xfs_dir2_sf.h. + * The single block format is in xfs_dir2_block.h. + * The data block format is in xfs_dir2_data.h. + * The leaf and freeindex block formats are in xfs_dir2_leaf.h. + * Node blocks are the same as the other version, in xfs_da_btree.h. + */ + +/* + * Byte offset in data block and shortform entry. + */ +typedef __uint16_t xfs_dir2_data_off_t; +#define NULLDATAOFF 0xffffU +typedef uint xfs_dir2_data_aoff_t; /* argument form */ + +/* + * Directory block number (logical dirblk in file) + */ +typedef __uint32_t xfs_dir2_db_t; + +/* + * Byte offset in a directory. + */ +typedef xfs_off_t xfs_dir2_off_t; + extern struct xfs_name xfs_name_dotdot; /* @@ -51,10 +86,21 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_bmap_free *flist, xfs_extlen_t tot); extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, uint resblks); +extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); /* - * Direct call from the bmap code, bypassing the generic directory layer. + * Utility routines for v2 directories. */ -extern int xfs_dir2_sf_to_block(struct xfs_da_args *args); +extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, + xfs_dir2_db_t *dbp); +extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, + int *vp); +extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, + int *vp); +extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, + struct xfs_dabuf *bp); + +extern int xfs_dir_cilookup_result(struct xfs_da_args *args, + const unsigned char *name, int len); #endif /* __XFS_DIR2_H__ */ diff --git a/trunk/fs/xfs/xfs_dir2_block.c b/trunk/fs/xfs/xfs_dir2_block.c index 9245e029b8ea..580d99cef9e7 100644 --- a/trunk/fs/xfs/xfs_dir2_block.c +++ b/trunk/fs/xfs/xfs_dir2_block.c @@ -23,14 +23,17 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2_priv.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -64,7 +67,7 @@ xfs_dir2_block_addname( xfs_da_args_t *args) /* directory op arguments */ { xfs_dir2_data_free_t *bf; /* bestfree table in block */ - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* directory block structure */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* buffer for block */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -102,13 +105,13 @@ xfs_dir2_block_addname( return error; } ASSERT(bp != NULL); - hdr = bp->data; + block = bp->data; /* * Check the magic number, corrupted if wrong. */ - if (unlikely(hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))) { + if (unlikely(be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)) { XFS_CORRUPTION_ERROR("xfs_dir2_block_addname", - XFS_ERRLEVEL_LOW, mp, hdr); + XFS_ERRLEVEL_LOW, mp, block); xfs_da_brelse(tp, bp); return XFS_ERROR(EFSCORRUPTED); } @@ -116,8 +119,8 @@ xfs_dir2_block_addname( /* * Set up pointers to parts of the block. */ - bf = hdr->bestfree; - btp = xfs_dir2_block_tail_p(mp, hdr); + bf = block->hdr.bestfree; + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); /* * No stale entries? Need space for entry and new leaf. @@ -130,7 +133,7 @@ xfs_dir2_block_addname( /* * Data object just before the first leaf entry. */ - enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); + enddup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); /* * If it's not free then can't do this add without cleaning up: * the space before the first leaf entry needs to be free so it @@ -143,7 +146,7 @@ xfs_dir2_block_addname( */ else { dup = (xfs_dir2_data_unused_t *) - ((char *)hdr + be16_to_cpu(bf[0].offset)); + ((char *)block + be16_to_cpu(bf[0].offset)); if (dup == enddup) { /* * It is the biggest freespace, is it too small @@ -156,7 +159,7 @@ xfs_dir2_block_addname( */ if (be16_to_cpu(bf[1].length) >= len) dup = (xfs_dir2_data_unused_t *) - ((char *)hdr + + ((char *)block + be16_to_cpu(bf[1].offset)); else dup = NULL; @@ -179,7 +182,7 @@ xfs_dir2_block_addname( */ else if (be16_to_cpu(bf[0].length) >= len) { dup = (xfs_dir2_data_unused_t *) - ((char *)hdr + be16_to_cpu(bf[0].offset)); + ((char *)block + be16_to_cpu(bf[0].offset)); compact = 0; } /* @@ -193,7 +196,7 @@ xfs_dir2_block_addname( /* * Data object just before the first leaf entry. */ - dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); + dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); /* * If it's not free then the data will go where the * leaf data starts now, if it works at all. @@ -252,8 +255,7 @@ xfs_dir2_block_addname( highstale = lfloghigh = -1; fromidx >= 0; fromidx--) { - if (blp[fromidx].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { + if (be32_to_cpu(blp[fromidx].address) == XFS_DIR2_NULL_DATAPTR) { if (highstale == -1) highstale = toidx; else { @@ -270,7 +272,7 @@ xfs_dir2_block_addname( lfloghigh -= be32_to_cpu(btp->stale) - 1; be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); xfs_dir2_data_make_free(tp, bp, - (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), + (xfs_dir2_data_aoff_t)((char *)blp - (char *)block), (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), &needlog, &needscan); blp += be32_to_cpu(btp->stale) - 1; @@ -280,7 +282,7 @@ xfs_dir2_block_addname( * This needs to happen before the next call to use_free. */ if (needscan) { - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); needscan = 0; } } @@ -316,7 +318,7 @@ xfs_dir2_block_addname( */ xfs_dir2_data_use_free(tp, bp, enddup, (xfs_dir2_data_aoff_t) - ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) - + ((char *)enddup - (char *)block + be16_to_cpu(enddup->length) - sizeof(*blp)), (xfs_dir2_data_aoff_t)sizeof(*blp), &needlog, &needscan); @@ -329,7 +331,8 @@ xfs_dir2_block_addname( * This needs to happen before the next call to use_free. */ if (needscan) { - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, + &needlog); needscan = 0; } /* @@ -350,14 +353,12 @@ xfs_dir2_block_addname( else { for (lowstale = mid; lowstale >= 0 && - blp[lowstale].address != - cpu_to_be32(XFS_DIR2_NULL_DATAPTR); + be32_to_cpu(blp[lowstale].address) != XFS_DIR2_NULL_DATAPTR; lowstale--) continue; for (highstale = mid + 1; highstale < be32_to_cpu(btp->count) && - blp[highstale].address != - cpu_to_be32(XFS_DIR2_NULL_DATAPTR) && + be32_to_cpu(blp[highstale].address) != XFS_DIR2_NULL_DATAPTR && (lowstale < 0 || mid - lowstale > highstale - mid); highstale++) continue; @@ -396,13 +397,13 @@ xfs_dir2_block_addname( */ blp[mid].hashval = cpu_to_be32(args->hashval); blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)hdr)); + (char *)dep - (char *)block)); xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); /* * Mark space for the data entry used. */ xfs_dir2_data_use_free(tp, bp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), + (xfs_dir2_data_aoff_t)((char *)dup - (char *)block), (xfs_dir2_data_aoff_t)len, &needlog, &needscan); /* * Create the new data entry. @@ -411,12 +412,12 @@ xfs_dir2_block_addname( dep->namelen = args->namelen; memcpy(dep->name, args->name, args->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); + *tagp = cpu_to_be16((char *)dep - (char *)block); /* * Clean up the bestfree array and log the header, tail, and entry. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); if (needlog) xfs_dir2_data_log_header(tp, bp); xfs_dir2_block_log_tail(tp, bp); @@ -436,7 +437,7 @@ xfs_dir2_block_getdents( xfs_off_t *offset, filldir_t filldir) { - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* directory block structure */ xfs_dabuf_t *bp; /* buffer for block */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ @@ -469,13 +470,13 @@ xfs_dir2_block_getdents( * We'll skip entries before this. */ wantoff = xfs_dir2_dataptr_to_off(mp, *offset); - hdr = bp->data; + block = bp->data; xfs_dir2_data_check(dp, bp); /* * Set up values for the loop. */ - btp = xfs_dir2_block_tail_p(mp, hdr); - ptr = (char *)(hdr + 1); + btp = xfs_dir2_block_tail_p(mp, block); + ptr = (char *)block->u; endptr = (char *)xfs_dir2_block_leaf_p(btp); /* @@ -501,11 +502,11 @@ xfs_dir2_block_getdents( /* * The entry is before the desired starting point, skip it. */ - if ((char *)dep - (char *)hdr < wantoff) + if ((char *)dep - (char *)block < wantoff) continue; cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, - (char *)dep - (char *)hdr); + (char *)dep - (char *)block); /* * If it didn't fit, set the final offset to here & return. @@ -539,14 +540,17 @@ xfs_dir2_block_log_leaf( int first, /* index of first logged leaf */ int last) /* index of last logged leaf */ { - xfs_dir2_data_hdr_t *hdr = bp->data; - xfs_dir2_leaf_entry_t *blp; - xfs_dir2_block_tail_t *btp; + xfs_dir2_block_t *block; /* directory block structure */ + xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_mount_t *mp; /* filesystem mount point */ - btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); + mp = tp->t_mountp; + block = bp->data; + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); - xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr), - (uint)((char *)&blp[last + 1] - (char *)hdr - 1)); + xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block), + (uint)((char *)&blp[last + 1] - (char *)block - 1)); } /* @@ -557,12 +561,15 @@ xfs_dir2_block_log_tail( xfs_trans_t *tp, /* transaction structure */ xfs_dabuf_t *bp) /* block buffer */ { - xfs_dir2_data_hdr_t *hdr = bp->data; - xfs_dir2_block_tail_t *btp; + xfs_dir2_block_t *block; /* directory block structure */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_mount_t *mp; /* filesystem mount point */ - btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); - xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr), - (uint)((char *)(btp + 1) - (char *)hdr - 1)); + mp = tp->t_mountp; + block = bp->data; + btp = xfs_dir2_block_tail_p(mp, block); + xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block), + (uint)((char *)(btp + 1) - (char *)block - 1)); } /* @@ -573,7 +580,7 @@ int /* error */ xfs_dir2_block_lookup( xfs_da_args_t *args) /* dir lookup arguments */ { - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -593,14 +600,14 @@ xfs_dir2_block_lookup( return error; dp = args->dp; mp = dp->i_mount; - hdr = bp->data; + block = bp->data; xfs_dir2_data_check(dp, bp); - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); /* * Get the offset from the leaf entry, to point to the data. */ - dep = (xfs_dir2_data_entry_t *)((char *)hdr + + dep = (xfs_dir2_data_entry_t *)((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); /* * Fill in inode number, CI name if appropriate, release the block. @@ -621,7 +628,7 @@ xfs_dir2_block_lookup_int( int *entno) /* returned entry number */ { xfs_dir2_dataptr_t addr; /* data entry address */ - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -647,9 +654,9 @@ xfs_dir2_block_lookup_int( return error; } ASSERT(bp != NULL); - hdr = bp->data; + block = bp->data; xfs_dir2_data_check(dp, bp); - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); /* * Loop doing a binary search for our hash value. @@ -687,7 +694,7 @@ xfs_dir2_block_lookup_int( * Get pointer to the entry from the leaf. */ dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr)); + ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); /* * Compare name and if it's an exact match, return the index * and buffer. If it's the first case-insensitive match, store @@ -726,7 +733,7 @@ int /* error */ xfs_dir2_block_removename( xfs_da_args_t *args) /* directory operation args */ { - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_leaf_entry_t *blp; /* block leaf pointer */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -753,20 +760,20 @@ xfs_dir2_block_removename( dp = args->dp; tp = args->trans; mp = dp->i_mount; - hdr = bp->data; - btp = xfs_dir2_block_tail_p(mp, hdr); + block = bp->data; + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); /* * Point to the data entry using the leaf entry. */ dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); + ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); /* * Mark the data entry's space free. */ needlog = needscan = 0; xfs_dir2_data_make_free(tp, bp, - (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), + (xfs_dir2_data_aoff_t)((char *)dep - (char *)block), xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); /* * Fix up the block tail. @@ -782,15 +789,15 @@ xfs_dir2_block_removename( * Fix up bestfree, log the header if necessary. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); if (needlog) xfs_dir2_data_log_header(tp, bp); xfs_dir2_data_check(dp, bp); /* * See if the size as a shortform is good enough. */ - size = xfs_dir2_block_sfsize(dp, hdr, &sfh); - if (size > XFS_IFORK_DSIZE(dp)) { + if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) > + XFS_IFORK_DSIZE(dp)) { xfs_da_buf_done(bp); return 0; } @@ -808,7 +815,7 @@ int /* error */ xfs_dir2_block_replace( xfs_da_args_t *args) /* directory operation args */ { - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -829,14 +836,14 @@ xfs_dir2_block_replace( } dp = args->dp; mp = dp->i_mount; - hdr = bp->data; - btp = xfs_dir2_block_tail_p(mp, hdr); + block = bp->data; + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); /* * Point to the data entry we need to change. */ dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); + ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); ASSERT(be64_to_cpu(dep->inumber) != args->inumber); /* * Change the inode number to the new value. @@ -875,7 +882,7 @@ xfs_dir2_leaf_to_block( xfs_dabuf_t *dbp) /* data buffer */ { __be16 *bestsp; /* leaf bests table */ - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* unused data entry */ @@ -899,7 +906,7 @@ xfs_dir2_leaf_to_block( tp = args->trans; mp = dp->i_mount; leaf = lbp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); ltp = xfs_dir2_leaf_tail_p(mp, leaf); /* * If there are data blocks other than the first one, take this @@ -910,7 +917,7 @@ xfs_dir2_leaf_to_block( while (dp->i_d.di_size > mp->m_dirblksize) { bestsp = xfs_dir2_leaf_bests_p(ltp); if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == - mp->m_dirblksize - (uint)sizeof(*hdr)) { + mp->m_dirblksize - (uint)sizeof(block->hdr)) { if ((error = xfs_dir2_leaf_trim_data(args, lbp, (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1)))) @@ -928,18 +935,18 @@ xfs_dir2_leaf_to_block( XFS_DATA_FORK))) { goto out; } - hdr = dbp->data; - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); + block = dbp->data; + ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_DATA_MAGIC); /* * Size of the "leaf" area in the block. */ - size = (uint)sizeof(xfs_dir2_block_tail_t) + + size = (uint)sizeof(block->tail) + (uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); /* * Look at the last data entry. */ - tagp = (__be16 *)((char *)hdr + mp->m_dirblksize) - 1; - dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); + tagp = (__be16 *)((char *)block + mp->m_dirblksize) - 1; + dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); /* * If it's not free or is too short we can't do it. */ @@ -951,7 +958,7 @@ xfs_dir2_leaf_to_block( /* * Start converting it to block form. */ - hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); + block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); needlog = 1; needscan = 0; /* @@ -962,7 +969,7 @@ xfs_dir2_leaf_to_block( /* * Initialize the block tail. */ - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(mp, block); btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); btp->stale = 0; xfs_dir2_block_log_tail(tp, dbp); @@ -971,8 +978,7 @@ xfs_dir2_leaf_to_block( */ lep = xfs_dir2_block_leaf_p(btp); for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) { - if (leaf->ents[from].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) continue; lep[to++] = leaf->ents[from]; } @@ -982,7 +988,7 @@ xfs_dir2_leaf_to_block( * Scan the bestfree if we need it and log the data block header. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); if (needlog) xfs_dir2_data_log_header(tp, dbp); /* @@ -996,8 +1002,8 @@ xfs_dir2_leaf_to_block( /* * Now see if the resulting block can be shrunken to shortform. */ - size = xfs_dir2_block_sfsize(dp, hdr, &sfh); - if (size > XFS_IFORK_DSIZE(dp)) { + if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) > + XFS_IFORK_DSIZE(dp)) { error = 0; goto out; } @@ -1018,10 +1024,12 @@ xfs_dir2_sf_to_block( xfs_da_args_t *args) /* operation arguments */ { xfs_dir2_db_t blkno; /* dir-relative block # (0) */ - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail pointer */ + char *buf; /* sf buffer */ + int buf_len; xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ int dummy; /* trash */ @@ -1035,8 +1043,7 @@ xfs_dir2_sf_to_block( int newoffset; /* offset from current entry */ int offset; /* target block offset */ xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */ - xfs_dir2_sf_hdr_t *oldsfp; /* old shortform header */ - xfs_dir2_sf_hdr_t *sfp; /* shortform header */ + xfs_dir2_sf_t *sfp; /* shortform structure */ __be16 *tagp; /* end of data entry */ xfs_trans_t *tp; /* transaction pointer */ struct xfs_name name; @@ -1054,30 +1061,32 @@ xfs_dir2_sf_to_block( ASSERT(XFS_FORCED_SHUTDOWN(mp)); return XFS_ERROR(EIO); } - - oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count)); - + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* - * Copy the directory into a temporary buffer. + * Copy the directory into the stack buffer. * Then pitch the incore inode data so we can make extents. */ - sfp = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP); - memcpy(sfp, oldsfp, dp->i_df.if_bytes); - xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK); + buf_len = dp->i_df.if_bytes; + buf = kmem_alloc(buf_len, KM_SLEEP); + + memcpy(buf, sfp, buf_len); + xfs_idata_realloc(dp, -buf_len, XFS_DATA_FORK); dp->i_d.di_size = 0; xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - + /* + * Reset pointer - old sfp is gone. + */ + sfp = (xfs_dir2_sf_t *)buf; /* * Add block 0 to the inode. */ error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); if (error) { - kmem_free(sfp); + kmem_free(buf); return error; } /* @@ -1085,21 +1094,21 @@ xfs_dir2_sf_to_block( */ error = xfs_dir2_data_init(args, blkno, &bp); if (error) { - kmem_free(sfp); + kmem_free(buf); return error; } - hdr = bp->data; - hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); + block = bp->data; + block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); /* * Compute size of block "tail" area. */ i = (uint)sizeof(*btp) + - (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t); + (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t); /* * The whole thing is initialized to free by the init routine. * Say we're using the leaf and tail area. */ - dup = (xfs_dir2_data_unused_t *)(hdr + 1); + dup = (xfs_dir2_data_unused_t *)block->u; needlog = needscan = 0; xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog, &needscan); @@ -1107,51 +1116,50 @@ xfs_dir2_sf_to_block( /* * Fill in the tail. */ - btp = xfs_dir2_block_tail_p(mp, hdr); - btp->count = cpu_to_be32(sfp->count + 2); /* ., .. */ + btp = xfs_dir2_block_tail_p(mp, block); + btp->count = cpu_to_be32(sfp->hdr.count + 2); /* ., .. */ btp->stale = 0; blp = xfs_dir2_block_leaf_p(btp); - endoffset = (uint)((char *)blp - (char *)hdr); + endoffset = (uint)((char *)blp - (char *)block); /* * Remove the freespace, we'll manage it. */ xfs_dir2_data_use_free(tp, bp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), + (xfs_dir2_data_aoff_t)((char *)dup - (char *)block), be16_to_cpu(dup->length), &needlog, &needscan); /* * Create entry for . */ dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + XFS_DIR2_DATA_DOT_OFFSET); + ((char *)block + XFS_DIR2_DATA_DOT_OFFSET); dep->inumber = cpu_to_be64(dp->i_ino); dep->namelen = 1; dep->name[0] = '.'; tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); + *tagp = cpu_to_be16((char *)dep - (char *)block); xfs_dir2_data_log_entry(tp, bp, dep); blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)hdr)); + (char *)dep - (char *)block)); /* * Create entry for .. */ dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + XFS_DIR2_DATA_DOTDOT_OFFSET); - dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp)); + ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET); + dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent)); dep->namelen = 2; dep->name[0] = dep->name[1] = '.'; tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); + *tagp = cpu_to_be16((char *)dep - (char *)block); xfs_dir2_data_log_entry(tp, bp, dep); blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)hdr)); + (char *)dep - (char *)block)); offset = XFS_DIR2_DATA_FIRST_OFFSET; /* * Loop over existing entries, stuff them in. */ - i = 0; - if (!sfp->count) + if ((i = 0) == sfp->hdr.count) sfep = NULL; else sfep = xfs_dir2_sf_firstentry(sfp); @@ -1171,40 +1179,43 @@ xfs_dir2_sf_to_block( * There should be a hole here, make one. */ if (offset < newoffset) { - dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); + dup = (xfs_dir2_data_unused_t *) + ((char *)block + offset); dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); dup->length = cpu_to_be16(newoffset - offset); *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( - ((char *)dup - (char *)hdr)); + ((char *)dup - (char *)block)); xfs_dir2_data_log_unused(tp, bp, dup); - xfs_dir2_data_freeinsert(hdr, dup, &dummy); + (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block, + dup, &dummy); offset += be16_to_cpu(dup->length); continue; } /* * Copy a real entry. */ - dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset); - dep->inumber = cpu_to_be64(xfs_dir2_sfe_get_ino(sfp, sfep)); + dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset); + dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, + xfs_dir2_sf_inumberp(sfep))); dep->namelen = sfep->namelen; memcpy(dep->name, sfep->name, dep->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); + *tagp = cpu_to_be16((char *)dep - (char *)block); xfs_dir2_data_log_entry(tp, bp, dep); name.name = sfep->name; name.len = sfep->namelen; blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> hashname(&name)); blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)hdr)); - offset = (int)((char *)(tagp + 1) - (char *)hdr); - if (++i == sfp->count) + (char *)dep - (char *)block)); + offset = (int)((char *)(tagp + 1) - (char *)block); + if (++i == sfp->hdr.count) sfep = NULL; else sfep = xfs_dir2_sf_nextentry(sfp, sfep); } /* Done with the temporary buffer */ - kmem_free(sfp); + kmem_free(buf); /* * Sort the leaf entries by hash value. */ diff --git a/trunk/fs/xfs/xfs_dir2_block.h b/trunk/fs/xfs/xfs_dir2_block.h new file mode 100644 index 000000000000..10e689676382 --- /dev/null +++ b/trunk/fs/xfs/xfs_dir2_block.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DIR2_BLOCK_H__ +#define __XFS_DIR2_BLOCK_H__ + +/* + * xfs_dir2_block.h + * Directory version 2, single block format structures + */ + +struct uio; +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_dir2_data_hdr; +struct xfs_dir2_leaf_entry; +struct xfs_inode; +struct xfs_mount; +struct xfs_trans; + +/* + * The single block format is as follows: + * xfs_dir2_data_hdr_t structure + * xfs_dir2_data_entry_t and xfs_dir2_data_unused_t structures + * xfs_dir2_leaf_entry_t structures + * xfs_dir2_block_tail_t structure + */ + +#define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: for one block dirs */ + +typedef struct xfs_dir2_block_tail { + __be32 count; /* count of leaf entries */ + __be32 stale; /* count of stale lf entries */ +} xfs_dir2_block_tail_t; + +/* + * Generic single-block structure, for xfs_db. + */ +typedef struct xfs_dir2_block { + xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_BLOCK_MAGIC */ + xfs_dir2_data_union_t u[1]; + xfs_dir2_leaf_entry_t leaf[1]; + xfs_dir2_block_tail_t tail; +} xfs_dir2_block_t; + +/* + * Pointer to the leaf header embedded in a data block (1-block format) + */ +static inline xfs_dir2_block_tail_t * +xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block) +{ + return (((xfs_dir2_block_tail_t *) + ((char *)(block) + (mp)->m_dirblksize)) - 1); +} + +/* + * Pointer to the leaf entries embedded in a data block (1-block format) + */ +static inline struct xfs_dir2_leaf_entry * +xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp) +{ + return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count); +} + +/* + * Function declarations. + */ +extern int xfs_dir2_block_addname(struct xfs_da_args *args); +extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent, + xfs_off_t *offset, filldir_t filldir); +extern int xfs_dir2_block_lookup(struct xfs_da_args *args); +extern int xfs_dir2_block_removename(struct xfs_da_args *args); +extern int xfs_dir2_block_replace(struct xfs_da_args *args); +extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, + struct xfs_dabuf *lbp, struct xfs_dabuf *dbp); +extern int xfs_dir2_sf_to_block(struct xfs_da_args *args); + +#endif /* __XFS_DIR2_BLOCK_H__ */ diff --git a/trunk/fs/xfs/xfs_dir2_data.c b/trunk/fs/xfs/xfs_dir2_data.c index 5bbe2a8a023f..921595b84f5b 100644 --- a/trunk/fs/xfs/xfs_dir2_data.c +++ b/trunk/fs/xfs/xfs_dir2_data.c @@ -23,18 +23,18 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2_priv.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" #include "xfs_error.h" -STATIC xfs_dir2_data_free_t * -xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup); - #ifdef DEBUG /* * Check the consistency of the data block. @@ -50,7 +50,7 @@ xfs_dir2_data_check( xfs_dir2_data_free_t *bf; /* bestfree table */ xfs_dir2_block_tail_t *btp=NULL; /* block tail */ int count; /* count of entries found */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *d; /* data block pointer */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_dir2_data_free_t *dfp; /* bestfree entry */ xfs_dir2_data_unused_t *dup; /* unused entry */ @@ -66,19 +66,17 @@ xfs_dir2_data_check( struct xfs_name name; mp = dp->i_mount; - hdr = bp->data; - bf = hdr->bestfree; - p = (char *)(hdr + 1); - - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { - btp = xfs_dir2_block_tail_p(mp, hdr); + d = bp->data; + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + bf = d->hdr.bestfree; + p = (char *)d->u; + if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { + btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); lep = xfs_dir2_block_leaf_p(btp); endp = (char *)lep; - } else { - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); - endp = (char *)hdr + mp->m_dirblksize; - } - + } else + endp = (char *)d + mp->m_dirblksize; count = lastfree = freeseen = 0; /* * Account for zero bestfree entries. @@ -110,8 +108,8 @@ xfs_dir2_data_check( if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { ASSERT(lastfree == 0); ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == - (char *)dup - (char *)hdr); - dfp = xfs_dir2_data_freefind(hdr, dup); + (char *)dup - (char *)d); + dfp = xfs_dir2_data_freefind(d, dup); if (dfp) { i = (int)(dfp - bf); ASSERT((freeseen & (1 << i)) == 0); @@ -134,13 +132,13 @@ xfs_dir2_data_check( ASSERT(dep->namelen != 0); ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0); ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) == - (char *)dep - (char *)hdr); + (char *)dep - (char *)d); count++; lastfree = 0; - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { + if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, (xfs_dir2_data_aoff_t) - ((char *)dep - (char *)hdr)); + ((char *)dep - (char *)d)); name.name = dep->name; name.len = dep->namelen; hash = mp->m_dirnameops->hashname(&name); @@ -157,10 +155,9 @@ xfs_dir2_data_check( * Need to have seen all the entries and all the bestfree slots. */ ASSERT(freeseen == 7); - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { + if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { - if (lep[i].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (be32_to_cpu(lep[i].address) == XFS_DIR2_NULL_DATAPTR) stale++; if (i > 0) ASSERT(be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval)); @@ -175,9 +172,9 @@ xfs_dir2_data_check( * Given a data block and an unused entry from that block, * return the bestfree entry if any that corresponds to it. */ -STATIC xfs_dir2_data_free_t * +xfs_dir2_data_free_t * xfs_dir2_data_freefind( - xfs_dir2_data_hdr_t *hdr, /* data block */ + xfs_dir2_data_t *d, /* data block */ xfs_dir2_data_unused_t *dup) /* data unused entry */ { xfs_dir2_data_free_t *dfp; /* bestfree entry */ @@ -187,17 +184,17 @@ xfs_dir2_data_freefind( int seenzero; /* saw a 0 bestfree entry */ #endif - off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); + off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)d); #if defined(DEBUG) && defined(__KERNEL__) /* * Validate some consistency in the bestfree table. * Check order, non-overlapping entries, and if we find the * one we're looking for it has to be exact. */ - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); - for (dfp = &hdr->bestfree[0], seenzero = matched = 0; - dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT]; + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + for (dfp = &d->hdr.bestfree[0], seenzero = matched = 0; + dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT]; dfp++) { if (!dfp->offset) { ASSERT(!dfp->length); @@ -213,7 +210,7 @@ xfs_dir2_data_freefind( else ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off); ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length)); - if (dfp > &hdr->bestfree[0]) + if (dfp > &d->hdr.bestfree[0]) ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length)); } #endif @@ -222,13 +219,13 @@ xfs_dir2_data_freefind( * it can't be there since they're sorted. */ if (be16_to_cpu(dup->length) < - be16_to_cpu(hdr->bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length)) + be16_to_cpu(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length)) return NULL; /* * Look at the three bestfree entries for our guy. */ - for (dfp = &hdr->bestfree[0]; - dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT]; + for (dfp = &d->hdr.bestfree[0]; + dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT]; dfp++) { if (!dfp->offset) return NULL; @@ -246,7 +243,7 @@ xfs_dir2_data_freefind( */ xfs_dir2_data_free_t * /* entry inserted */ xfs_dir2_data_freeinsert( - xfs_dir2_data_hdr_t *hdr, /* data block pointer */ + xfs_dir2_data_t *d, /* data block pointer */ xfs_dir2_data_unused_t *dup, /* unused space */ int *loghead) /* log the data header (out) */ { @@ -254,13 +251,12 @@ xfs_dir2_data_freeinsert( xfs_dir2_data_free_t new; /* new bestfree entry */ #ifdef __KERNEL__ - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); #endif - dfp = hdr->bestfree; + dfp = d->hdr.bestfree; new.length = dup->length; - new.offset = cpu_to_be16((char *)dup - (char *)hdr); - + new.offset = cpu_to_be16((char *)dup - (char *)d); /* * Insert at position 0, 1, or 2; or not at all. */ @@ -290,36 +286,36 @@ xfs_dir2_data_freeinsert( */ STATIC void xfs_dir2_data_freeremove( - xfs_dir2_data_hdr_t *hdr, /* data block header */ + xfs_dir2_data_t *d, /* data block pointer */ xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */ int *loghead) /* out: log data header */ { #ifdef __KERNEL__ - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); #endif /* * It's the first entry, slide the next 2 up. */ - if (dfp == &hdr->bestfree[0]) { - hdr->bestfree[0] = hdr->bestfree[1]; - hdr->bestfree[1] = hdr->bestfree[2]; + if (dfp == &d->hdr.bestfree[0]) { + d->hdr.bestfree[0] = d->hdr.bestfree[1]; + d->hdr.bestfree[1] = d->hdr.bestfree[2]; } /* * It's the second entry, slide the 3rd entry up. */ - else if (dfp == &hdr->bestfree[1]) - hdr->bestfree[1] = hdr->bestfree[2]; + else if (dfp == &d->hdr.bestfree[1]) + d->hdr.bestfree[1] = d->hdr.bestfree[2]; /* * Must be the last entry. */ else - ASSERT(dfp == &hdr->bestfree[2]); + ASSERT(dfp == &d->hdr.bestfree[2]); /* * Clear the 3rd entry, must be zero now. */ - hdr->bestfree[2].length = 0; - hdr->bestfree[2].offset = 0; + d->hdr.bestfree[2].length = 0; + d->hdr.bestfree[2].offset = 0; *loghead = 1; } @@ -329,7 +325,7 @@ xfs_dir2_data_freeremove( void xfs_dir2_data_freescan( xfs_mount_t *mp, /* filesystem mount point */ - xfs_dir2_data_hdr_t *hdr, /* data block header */ + xfs_dir2_data_t *d, /* data block pointer */ int *loghead) /* out: log data header */ { xfs_dir2_block_tail_t *btp; /* block tail */ @@ -339,23 +335,23 @@ xfs_dir2_data_freescan( char *p; /* current entry pointer */ #ifdef __KERNEL__ - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); #endif /* * Start by clearing the table. */ - memset(hdr->bestfree, 0, sizeof(hdr->bestfree)); + memset(d->hdr.bestfree, 0, sizeof(d->hdr.bestfree)); *loghead = 1; /* * Set up pointers. */ - p = (char *)(hdr + 1); - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { - btp = xfs_dir2_block_tail_p(mp, hdr); + p = (char *)d->u; + if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { + btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); endp = (char *)xfs_dir2_block_leaf_p(btp); } else - endp = (char *)hdr + mp->m_dirblksize; + endp = (char *)d + mp->m_dirblksize; /* * Loop over the block's entries. */ @@ -365,9 +361,9 @@ xfs_dir2_data_freescan( * If it's a free entry, insert it. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - ASSERT((char *)dup - (char *)hdr == + ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); - xfs_dir2_data_freeinsert(hdr, dup, loghead); + xfs_dir2_data_freeinsert(d, dup, loghead); p += be16_to_cpu(dup->length); } /* @@ -375,7 +371,7 @@ xfs_dir2_data_freescan( */ else { dep = (xfs_dir2_data_entry_t *)p; - ASSERT((char *)dep - (char *)hdr == + ASSERT((char *)dep - (char *)d == be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep))); p += xfs_dir2_data_entsize(dep->namelen); } @@ -393,7 +389,7 @@ xfs_dir2_data_init( xfs_dabuf_t **bpp) /* output block buffer */ { xfs_dabuf_t *bp; /* block buffer */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *d; /* pointer to block */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* unused entry pointer */ int error; /* error return value */ @@ -414,28 +410,26 @@ xfs_dir2_data_init( return error; } ASSERT(bp != NULL); - /* * Initialize the header. */ - hdr = bp->data; - hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); - hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr)); + d = bp->data; + d->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); + d->hdr.bestfree[0].offset = cpu_to_be16(sizeof(d->hdr)); for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { - hdr->bestfree[i].length = 0; - hdr->bestfree[i].offset = 0; + d->hdr.bestfree[i].length = 0; + d->hdr.bestfree[i].offset = 0; } - /* * Set up an unused entry for the block's body. */ - dup = (xfs_dir2_data_unused_t *)(hdr + 1); + dup = &d->u[0].unused; dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); - t = mp->m_dirblksize - (uint)sizeof(*hdr); - hdr->bestfree[0].length = cpu_to_be16(t); + t=mp->m_dirblksize - (uint)sizeof(d->hdr); + d->hdr.bestfree[0].length = cpu_to_be16(t); dup->length = cpu_to_be16(t); - *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); + *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d); /* * Log it and return it. */ @@ -454,14 +448,14 @@ xfs_dir2_data_log_entry( xfs_dabuf_t *bp, /* block buffer */ xfs_dir2_data_entry_t *dep) /* data entry pointer */ { - xfs_dir2_data_hdr_t *hdr = bp->data; - - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + xfs_dir2_data_t *d; /* data block pointer */ - xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr), + d = bp->data; + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d), (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) - - (char *)hdr - 1)); + (char *)d - 1)); } /* @@ -472,12 +466,13 @@ xfs_dir2_data_log_header( xfs_trans_t *tp, /* transaction pointer */ xfs_dabuf_t *bp) /* block buffer */ { - xfs_dir2_data_hdr_t *hdr = bp->data; + xfs_dir2_data_t *d; /* data block pointer */ - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); - - xfs_da_log_buf(tp, bp, 0, sizeof(*hdr) - 1); + d = bp->data; + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + xfs_da_log_buf(tp, bp, (uint)((char *)&d->hdr - (char *)d), + (uint)(sizeof(d->hdr) - 1)); } /* @@ -489,23 +484,23 @@ xfs_dir2_data_log_unused( xfs_dabuf_t *bp, /* block buffer */ xfs_dir2_data_unused_t *dup) /* data unused pointer */ { - xfs_dir2_data_hdr_t *hdr = bp->data; - - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + xfs_dir2_data_t *d; /* data block pointer */ + d = bp->data; + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); /* * Log the first part of the unused entry. */ - xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr), + xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)d), (uint)((char *)&dup->length + sizeof(dup->length) - - 1 - (char *)hdr)); + 1 - (char *)d)); /* * Log the end (tag) of the unused entry. */ xfs_da_log_buf(tp, bp, - (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr), - (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr + + (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d), + (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d + sizeof(xfs_dir2_data_off_t) - 1)); } @@ -522,7 +517,7 @@ xfs_dir2_data_make_free( int *needlogp, /* out: log header */ int *needscanp) /* out: regen bestfree */ { - xfs_dir2_data_hdr_t *hdr; /* data block pointer */ + xfs_dir2_data_t *d; /* data block pointer */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ char *endptr; /* end of data area */ xfs_mount_t *mp; /* filesystem mount point */ @@ -532,29 +527,28 @@ xfs_dir2_data_make_free( xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ mp = tp->t_mountp; - hdr = bp->data; - + d = bp->data; /* * Figure out where the end of the data area is. */ - if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)) - endptr = (char *)hdr + mp->m_dirblksize; + if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC) + endptr = (char *)d + mp->m_dirblksize; else { xfs_dir2_block_tail_t *btp; /* block tail */ - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); - btp = xfs_dir2_block_tail_p(mp, hdr); + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); endptr = (char *)xfs_dir2_block_leaf_p(btp); } /* * If this isn't the start of the block, then back up to * the previous entry and see if it's free. */ - if (offset > sizeof(*hdr)) { + if (offset > sizeof(d->hdr)) { __be16 *tagp; /* tag just before us */ - tagp = (__be16 *)((char *)hdr + offset) - 1; - prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); + tagp = (__be16 *)((char *)d + offset) - 1; + prevdup = (xfs_dir2_data_unused_t *)((char *)d + be16_to_cpu(*tagp)); if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG) prevdup = NULL; } else @@ -563,9 +557,9 @@ xfs_dir2_data_make_free( * If this isn't the end of the block, see if the entry after * us is free. */ - if ((char *)hdr + offset + len < endptr) { + if ((char *)d + offset + len < endptr) { postdup = - (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); + (xfs_dir2_data_unused_t *)((char *)d + offset + len); if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG) postdup = NULL; } else @@ -582,21 +576,21 @@ xfs_dir2_data_make_free( /* * See if prevdup and/or postdup are in bestfree table. */ - dfp = xfs_dir2_data_freefind(hdr, prevdup); - dfp2 = xfs_dir2_data_freefind(hdr, postdup); + dfp = xfs_dir2_data_freefind(d, prevdup); + dfp2 = xfs_dir2_data_freefind(d, postdup); /* * We need a rescan unless there are exactly 2 free entries * namely our two. Then we know what's happening, otherwise * since the third bestfree is there, there might be more * entries. */ - needscan = (hdr->bestfree[2].length != 0); + needscan = (d->hdr.bestfree[2].length != 0); /* * Fix up the new big freespace. */ be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(prevdup) = - cpu_to_be16((char *)prevdup - (char *)hdr); + cpu_to_be16((char *)prevdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, prevdup); if (!needscan) { /* @@ -606,18 +600,18 @@ xfs_dir2_data_make_free( * Remove entry 1 first then entry 0. */ ASSERT(dfp && dfp2); - if (dfp == &hdr->bestfree[1]) { - dfp = &hdr->bestfree[0]; + if (dfp == &d->hdr.bestfree[1]) { + dfp = &d->hdr.bestfree[0]; ASSERT(dfp2 == dfp); - dfp2 = &hdr->bestfree[1]; + dfp2 = &d->hdr.bestfree[1]; } - xfs_dir2_data_freeremove(hdr, dfp2, needlogp); - xfs_dir2_data_freeremove(hdr, dfp, needlogp); + xfs_dir2_data_freeremove(d, dfp2, needlogp); + xfs_dir2_data_freeremove(d, dfp, needlogp); /* * Now insert the new entry. */ - dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); - ASSERT(dfp == &hdr->bestfree[0]); + dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp); + ASSERT(dfp == &d->hdr.bestfree[0]); ASSERT(dfp->length == prevdup->length); ASSERT(!dfp[1].length); ASSERT(!dfp[2].length); @@ -627,10 +621,10 @@ xfs_dir2_data_make_free( * The entry before us is free, merge with it. */ else if (prevdup) { - dfp = xfs_dir2_data_freefind(hdr, prevdup); + dfp = xfs_dir2_data_freefind(d, prevdup); be16_add_cpu(&prevdup->length, len); *xfs_dir2_data_unused_tag_p(prevdup) = - cpu_to_be16((char *)prevdup - (char *)hdr); + cpu_to_be16((char *)prevdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, prevdup); /* * If the previous entry was in the table, the new entry @@ -638,27 +632,27 @@ xfs_dir2_data_make_free( * the old one and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, dfp, needlogp); - xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); + xfs_dir2_data_freeremove(d, dfp, needlogp); + (void)xfs_dir2_data_freeinsert(d, prevdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. */ else { needscan = be16_to_cpu(prevdup->length) > - be16_to_cpu(hdr->bestfree[2].length); + be16_to_cpu(d->hdr.bestfree[2].length); } } /* * The following entry is free, merge with it. */ else if (postdup) { - dfp = xfs_dir2_data_freefind(hdr, postdup); - newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); + dfp = xfs_dir2_data_freefind(d, postdup); + newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)hdr); + cpu_to_be16((char *)newdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup); /* * If the following entry was in the table, the new entry @@ -666,28 +660,28 @@ xfs_dir2_data_make_free( * the old one and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, dfp, needlogp); - xfs_dir2_data_freeinsert(hdr, newdup, needlogp); + xfs_dir2_data_freeremove(d, dfp, needlogp); + (void)xfs_dir2_data_freeinsert(d, newdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. */ else { needscan = be16_to_cpu(newdup->length) > - be16_to_cpu(hdr->bestfree[2].length); + be16_to_cpu(d->hdr.bestfree[2].length); } } /* * Neither neighbor is free. Make a new entry. */ else { - newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); + newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)hdr); + cpu_to_be16((char *)newdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup); - xfs_dir2_data_freeinsert(hdr, newdup, needlogp); + (void)xfs_dir2_data_freeinsert(d, newdup, needlogp); } *needscanp = needscan; } @@ -705,7 +699,7 @@ xfs_dir2_data_use_free( int *needlogp, /* out: need to log header */ int *needscanp) /* out: need regen bestfree */ { - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *d; /* data block */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ int matchback; /* matches end of freespace */ int matchfront; /* matches start of freespace */ @@ -714,24 +708,24 @@ xfs_dir2_data_use_free( xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ int oldlen; /* old unused entry's length */ - hdr = bp->data; - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + d = bp->data; + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); - ASSERT(offset >= (char *)dup - (char *)hdr); - ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr); - ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); + ASSERT(offset >= (char *)dup - (char *)d); + ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d); + ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); /* * Look up the entry in the bestfree table. */ - dfp = xfs_dir2_data_freefind(hdr, dup); + dfp = xfs_dir2_data_freefind(d, dup); oldlen = be16_to_cpu(dup->length); - ASSERT(dfp || oldlen <= be16_to_cpu(hdr->bestfree[2].length)); + ASSERT(dfp || oldlen <= be16_to_cpu(d->hdr.bestfree[2].length)); /* * Check for alignment with front and back of the entry. */ - matchfront = (char *)dup - (char *)hdr == offset; - matchback = (char *)dup + oldlen - (char *)hdr == offset + len; + matchfront = (char *)dup - (char *)d == offset; + matchback = (char *)dup + oldlen - (char *)d == offset + len; ASSERT(*needscanp == 0); needscan = 0; /* @@ -740,9 +734,9 @@ xfs_dir2_data_use_free( */ if (matchfront && matchback) { if (dfp) { - needscan = (hdr->bestfree[2].offset != 0); + needscan = (d->hdr.bestfree[2].offset != 0); if (!needscan) - xfs_dir2_data_freeremove(hdr, dfp, needlogp); + xfs_dir2_data_freeremove(d, dfp, needlogp); } } /* @@ -750,27 +744,27 @@ xfs_dir2_data_use_free( * Make a new entry with the remaining freespace. */ else if (matchfront) { - newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); + newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(oldlen - len); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)hdr); + cpu_to_be16((char *)newdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup); /* * If it was in the table, remove it and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, dfp, needlogp); - dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp); + xfs_dir2_data_freeremove(d, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp); ASSERT(dfp != NULL); ASSERT(dfp->length == newdup->length); - ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); + ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d); /* * If we got inserted at the last slot, * that means we don't know if there was a better * choice for the last slot, or not. Rescan. */ - needscan = dfp == &hdr->bestfree[2]; + needscan = dfp == &d->hdr.bestfree[2]; } } /* @@ -779,25 +773,25 @@ xfs_dir2_data_use_free( */ else if (matchback) { newdup = dup; - newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); + newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)hdr); + cpu_to_be16((char *)newdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup); /* * If it was in the table, remove it and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, dfp, needlogp); - dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp); + xfs_dir2_data_freeremove(d, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp); ASSERT(dfp != NULL); ASSERT(dfp->length == newdup->length); - ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); + ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d); /* * If we got inserted at the last slot, * that means we don't know if there was a better * choice for the last slot, or not. Rescan. */ - needscan = dfp == &hdr->bestfree[2]; + needscan = dfp == &d->hdr.bestfree[2]; } } /* @@ -806,15 +800,15 @@ xfs_dir2_data_use_free( */ else { newdup = dup; - newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); + newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)hdr); + cpu_to_be16((char *)newdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup); - newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); + newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len); newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length)); *xfs_dir2_data_unused_tag_p(newdup2) = - cpu_to_be16((char *)newdup2 - (char *)hdr); + cpu_to_be16((char *)newdup2 - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup2); /* * If the old entry was in the table, we need to scan @@ -825,12 +819,13 @@ xfs_dir2_data_use_free( * the 2 new will work. */ if (dfp) { - needscan = (hdr->bestfree[2].length != 0); + needscan = (d->hdr.bestfree[2].length != 0); if (!needscan) { - xfs_dir2_data_freeremove(hdr, dfp, needlogp); - xfs_dir2_data_freeinsert(hdr, newdup, needlogp); - xfs_dir2_data_freeinsert(hdr, newdup2, - needlogp); + xfs_dir2_data_freeremove(d, dfp, needlogp); + (void)xfs_dir2_data_freeinsert(d, newdup, + needlogp); + (void)xfs_dir2_data_freeinsert(d, newdup2, + needlogp); } } } diff --git a/trunk/fs/xfs/xfs_dir2_data.h b/trunk/fs/xfs/xfs_dir2_data.h new file mode 100644 index 000000000000..efbc290c7fec --- /dev/null +++ b/trunk/fs/xfs/xfs_dir2_data.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2000,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DIR2_DATA_H__ +#define __XFS_DIR2_DATA_H__ + +/* + * Directory format 2, data block structures. + */ + +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_inode; +struct xfs_trans; + +/* + * Constants. + */ +#define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: for multiblock dirs */ +#define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */ +#define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG) +#define XFS_DIR2_DATA_FREE_TAG 0xffff +#define XFS_DIR2_DATA_FD_COUNT 3 + +/* + * Directory address space divided into sections, + * spaces separated by 32GB. + */ +#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) +#define XFS_DIR2_DATA_SPACE 0 +#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) +#define XFS_DIR2_DATA_FIRSTDB(mp) \ + xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET) + +/* + * Offsets of . and .. in data space (always block 0) + */ +#define XFS_DIR2_DATA_DOT_OFFSET \ + ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t)) +#define XFS_DIR2_DATA_DOTDOT_OFFSET \ + (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1)) +#define XFS_DIR2_DATA_FIRST_OFFSET \ + (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2)) + +/* + * Structures. + */ + +/* + * Describe a free area in the data block. + * The freespace will be formatted as a xfs_dir2_data_unused_t. + */ +typedef struct xfs_dir2_data_free { + __be16 offset; /* start of freespace */ + __be16 length; /* length of freespace */ +} xfs_dir2_data_free_t; + +/* + * Header for the data blocks. + * Always at the beginning of a directory-sized block. + * The code knows that XFS_DIR2_DATA_FD_COUNT is 3. + */ +typedef struct xfs_dir2_data_hdr { + __be32 magic; /* XFS_DIR2_DATA_MAGIC */ + /* or XFS_DIR2_BLOCK_MAGIC */ + xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT]; +} xfs_dir2_data_hdr_t; + +/* + * Active entry in a data block. Aligned to 8 bytes. + * Tag appears as the last 2 bytes. + */ +typedef struct xfs_dir2_data_entry { + __be64 inumber; /* inode number */ + __u8 namelen; /* name length */ + __u8 name[1]; /* name bytes, no null */ + /* variable offset */ + __be16 tag; /* starting offset of us */ +} xfs_dir2_data_entry_t; + +/* + * Unused entry in a data block. Aligned to 8 bytes. + * Tag appears as the last 2 bytes. + */ +typedef struct xfs_dir2_data_unused { + __be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */ + __be16 length; /* total free length */ + /* variable offset */ + __be16 tag; /* starting offset of us */ +} xfs_dir2_data_unused_t; + +typedef union { + xfs_dir2_data_entry_t entry; + xfs_dir2_data_unused_t unused; +} xfs_dir2_data_union_t; + +/* + * Generic data block structure, for xfs_db. + */ +typedef struct xfs_dir2_data { + xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_DATA_MAGIC */ + xfs_dir2_data_union_t u[1]; +} xfs_dir2_data_t; + +/* + * Macros. + */ + +/* + * Size of a data entry. + */ +static inline int xfs_dir2_data_entsize(int n) +{ + return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \ + (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN); +} + +/* + * Pointer to an entry's tag word. + */ +static inline __be16 * +xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep) +{ + return (__be16 *)((char *)dep + + xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); +} + +/* + * Pointer to a freespace's tag word. + */ +static inline __be16 * +xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup) +{ + return (__be16 *)((char *)dup + + be16_to_cpu(dup->length) - sizeof(__be16)); +} + +/* + * Function declarations. + */ +#ifdef DEBUG +extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp); +#else +#define xfs_dir2_data_check(dp,bp) +#endif +extern xfs_dir2_data_free_t *xfs_dir2_data_freefind(xfs_dir2_data_t *d, + xfs_dir2_data_unused_t *dup); +extern xfs_dir2_data_free_t *xfs_dir2_data_freeinsert(xfs_dir2_data_t *d, + xfs_dir2_data_unused_t *dup, int *loghead); +extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d, + int *loghead); +extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, + struct xfs_dabuf **bpp); +extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp, + xfs_dir2_data_entry_t *dep); +extern void xfs_dir2_data_log_header(struct xfs_trans *tp, + struct xfs_dabuf *bp); +extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp, + xfs_dir2_data_unused_t *dup); +extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp, + xfs_dir2_data_aoff_t offset, + xfs_dir2_data_aoff_t len, int *needlogp, + int *needscanp); +extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp, + xfs_dir2_data_unused_t *dup, + xfs_dir2_data_aoff_t offset, + xfs_dir2_data_aoff_t len, int *needlogp, + int *needscanp); + +#endif /* __XFS_DIR2_DATA_H__ */ diff --git a/trunk/fs/xfs/xfs_dir2_format.h b/trunk/fs/xfs/xfs_dir2_format.h deleted file mode 100644 index 07270981f48f..000000000000 --- a/trunk/fs/xfs/xfs_dir2_format.h +++ /dev/null @@ -1,597 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_FORMAT_H__ -#define __XFS_DIR2_FORMAT_H__ - -/* - * Directory version 2. - * - * There are 4 possible formats: - * - shortform - embedded into the inode - * - single block - data with embedded leaf at the end - * - multiple data blocks, single leaf+freeindex block - * - data blocks, node and leaf blocks (btree), freeindex blocks - * - * Note: many node blocks structures and constants are shared with the attr - * code and defined in xfs_da_btree.h. - */ - -#define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: single block dirs */ -#define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: multiblock dirs */ -#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F: free index blocks */ - -/* - * Byte offset in data block and shortform entry. - */ -typedef __uint16_t xfs_dir2_data_off_t; -#define NULLDATAOFF 0xffffU -typedef uint xfs_dir2_data_aoff_t; /* argument form */ - -/* - * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t. - * Only need 16 bits, this is the byte offset into the single block form. - */ -typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t; - -/* - * Offset in data space of a data entry. - */ -typedef __uint32_t xfs_dir2_dataptr_t; -#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff) -#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0) - -/* - * Byte offset in a directory. - */ -typedef xfs_off_t xfs_dir2_off_t; - -/* - * Directory block number (logical dirblk in file) - */ -typedef __uint32_t xfs_dir2_db_t; - -/* - * Inode number stored as 8 8-bit values. - */ -typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t; - -/* - * Inode number stored as 4 8-bit values. - * Works a lot of the time, when all the inode numbers in a directory - * fit in 32 bits. - */ -typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t; - -typedef union { - xfs_dir2_ino8_t i8; - xfs_dir2_ino4_t i4; -} xfs_dir2_inou_t; -#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL) - -/* - * Directory layout when stored internal to an inode. - * - * Small directories are packed as tightly as possible so as to fit into the - * literal area of the inode. These "shortform" directories consist of a - * single xfs_dir2_sf_hdr header followed by zero or more xfs_dir2_sf_entry - * structures. Due the different inode number storage size and the variable - * length name field in the xfs_dir2_sf_entry all these structure are - * variable length, and the accessors in this file should be used to iterate - * over them. - */ -typedef struct xfs_dir2_sf_hdr { - __uint8_t count; /* count of entries */ - __uint8_t i8count; /* count of 8-byte inode #s */ - xfs_dir2_inou_t parent; /* parent dir inode number */ -} __arch_pack xfs_dir2_sf_hdr_t; - -typedef struct xfs_dir2_sf_entry { - __u8 namelen; /* actual name length */ - xfs_dir2_sf_off_t offset; /* saved offset */ - __u8 name[]; /* name, variable size */ - /* - * A xfs_dir2_ino8_t or xfs_dir2_ino4_t follows here, at a - * variable offset after the name. - */ -} __arch_pack xfs_dir2_sf_entry_t; - -static inline int xfs_dir2_sf_hdr_size(int i8count) -{ - return sizeof(struct xfs_dir2_sf_hdr) - - (i8count == 0) * - (sizeof(xfs_dir2_ino8_t) - sizeof(xfs_dir2_ino4_t)); -} - -static inline xfs_dir2_data_aoff_t -xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep) -{ - return get_unaligned_be16(&sfep->offset.i); -} - -static inline void -xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off) -{ - put_unaligned_be16(off, &sfep->offset.i); -} - -static inline int -xfs_dir2_sf_entsize(struct xfs_dir2_sf_hdr *hdr, int len) -{ - return sizeof(struct xfs_dir2_sf_entry) + /* namelen + offset */ - len + /* name */ - (hdr->i8count ? /* ino */ - sizeof(xfs_dir2_ino8_t) : - sizeof(xfs_dir2_ino4_t)); -} - -static inline struct xfs_dir2_sf_entry * -xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr) -{ - return (struct xfs_dir2_sf_entry *) - ((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count)); -} - -static inline struct xfs_dir2_sf_entry * -xfs_dir2_sf_nextentry(struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return (struct xfs_dir2_sf_entry *) - ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen)); -} - - -/* - * Data block structures. - * - * A pure data block looks like the following drawing on disk: - * - * +-------------------------------------------------+ - * | xfs_dir2_data_hdr_t | - * +-------------------------------------------------+ - * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | - * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | - * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | - * | ... | - * +-------------------------------------------------+ - * | unused space | - * +-------------------------------------------------+ - * - * As all the entries are variable size structures the accessors below should - * be used to iterate over them. - * - * In addition to the pure data blocks for the data and node formats, - * most structures are also used for the combined data/freespace "block" - * format below. - */ - -#define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */ -#define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG) -#define XFS_DIR2_DATA_FREE_TAG 0xffff -#define XFS_DIR2_DATA_FD_COUNT 3 - -/* - * Directory address space divided into sections, - * spaces separated by 32GB. - */ -#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) -#define XFS_DIR2_DATA_SPACE 0 -#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_DATA_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET) - -/* - * Offsets of . and .. in data space (always block 0) - */ -#define XFS_DIR2_DATA_DOT_OFFSET \ - ((xfs_dir2_data_aoff_t)sizeof(struct xfs_dir2_data_hdr)) -#define XFS_DIR2_DATA_DOTDOT_OFFSET \ - (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1)) -#define XFS_DIR2_DATA_FIRST_OFFSET \ - (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2)) - -/* - * Describe a free area in the data block. - * - * The freespace will be formatted as a xfs_dir2_data_unused_t. - */ -typedef struct xfs_dir2_data_free { - __be16 offset; /* start of freespace */ - __be16 length; /* length of freespace */ -} xfs_dir2_data_free_t; - -/* - * Header for the data blocks. - * - * The code knows that XFS_DIR2_DATA_FD_COUNT is 3. - */ -typedef struct xfs_dir2_data_hdr { - __be32 magic; /* XFS_DIR2_DATA_MAGIC or */ - /* XFS_DIR2_BLOCK_MAGIC */ - xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT]; -} xfs_dir2_data_hdr_t; - -/* - * Active entry in a data block. - * - * Aligned to 8 bytes. After the variable length name field there is a - * 2 byte tag field, which can be accessed using xfs_dir2_data_entry_tag_p. - */ -typedef struct xfs_dir2_data_entry { - __be64 inumber; /* inode number */ - __u8 namelen; /* name length */ - __u8 name[]; /* name bytes, no null */ - /* __be16 tag; */ /* starting offset of us */ -} xfs_dir2_data_entry_t; - -/* - * Unused entry in a data block. - * - * Aligned to 8 bytes. Tag appears as the last 2 bytes and must be accessed - * using xfs_dir2_data_unused_tag_p. - */ -typedef struct xfs_dir2_data_unused { - __be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */ - __be16 length; /* total free length */ - /* variable offset */ - __be16 tag; /* starting offset of us */ -} xfs_dir2_data_unused_t; - -/* - * Size of a data entry. - */ -static inline int xfs_dir2_data_entsize(int n) -{ - return (int)roundup(offsetof(struct xfs_dir2_data_entry, name[0]) + n + - (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN); -} - -/* - * Pointer to an entry's tag word. - */ -static inline __be16 * -xfs_dir2_data_entry_tag_p(struct xfs_dir2_data_entry *dep) -{ - return (__be16 *)((char *)dep + - xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); -} - -/* - * Pointer to a freespace's tag word. - */ -static inline __be16 * -xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup) -{ - return (__be16 *)((char *)dup + - be16_to_cpu(dup->length) - sizeof(__be16)); -} - -/* - * Leaf block structures. - * - * A pure leaf block looks like the following drawing on disk: - * - * +---------------------------+ - * | xfs_dir2_leaf_hdr_t | - * +---------------------------+ - * | xfs_dir2_leaf_entry_t | - * | xfs_dir2_leaf_entry_t | - * | xfs_dir2_leaf_entry_t | - * | xfs_dir2_leaf_entry_t | - * | ... | - * +---------------------------+ - * | xfs_dir2_data_off_t | - * | xfs_dir2_data_off_t | - * | xfs_dir2_data_off_t | - * | ... | - * +---------------------------+ - * | xfs_dir2_leaf_tail_t | - * +---------------------------+ - * - * The xfs_dir2_data_off_t members (bests) and tail are at the end of the block - * for single-leaf (magic = XFS_DIR2_LEAF1_MAGIC) blocks only, but not present - * for directories with separate leaf nodes and free space blocks - * (magic = XFS_DIR2_LEAFN_MAGIC). - * - * As all the entries are variable size structures the accessors below should - * be used to iterate over them. - */ - -/* - * Offset of the leaf/node space. First block in this space - * is the btree root. - */ -#define XFS_DIR2_LEAF_SPACE 1 -#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_LEAF_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET) - -/* - * Leaf block header. - */ -typedef struct xfs_dir2_leaf_hdr { - xfs_da_blkinfo_t info; /* header for da routines */ - __be16 count; /* count of entries */ - __be16 stale; /* count of stale entries */ -} xfs_dir2_leaf_hdr_t; - -/* - * Leaf block entry. - */ -typedef struct xfs_dir2_leaf_entry { - __be32 hashval; /* hash value of name */ - __be32 address; /* address of data entry */ -} xfs_dir2_leaf_entry_t; - -/* - * Leaf block tail. - */ -typedef struct xfs_dir2_leaf_tail { - __be32 bestcount; -} xfs_dir2_leaf_tail_t; - -/* - * Leaf block. - */ -typedef struct xfs_dir2_leaf { - xfs_dir2_leaf_hdr_t hdr; /* leaf header */ - xfs_dir2_leaf_entry_t ents[]; /* entries */ -} xfs_dir2_leaf_t; - -/* - * DB blocks here are logical directory block numbers, not filesystem blocks. - */ - -static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp) -{ - return (mp->m_dirblksize - (uint)sizeof(struct xfs_dir2_leaf_hdr)) / - (uint)sizeof(struct xfs_dir2_leaf_entry); -} - -/* - * Get address of the bestcount field in the single-leaf block. - */ -static inline struct xfs_dir2_leaf_tail * -xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp) -{ - return (struct xfs_dir2_leaf_tail *) - ((char *)lp + mp->m_dirblksize - - sizeof(struct xfs_dir2_leaf_tail)); -} - -/* - * Get address of the bests array in the single-leaf block. - */ -static inline __be16 * -xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp) -{ - return (__be16 *)ltp - be32_to_cpu(ltp->bestcount); -} - -/* - * Convert dataptr to byte in file space - */ -static inline xfs_dir2_off_t -xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; -} - -/* - * Convert byte in file space to dataptr. It had better be aligned. - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); -} - -/* - * Convert byte in space to (DB) block - */ -static inline xfs_dir2_db_t -xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_db_t) - (by >> (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)); -} - -/* - * Convert dataptr to a block number - */ -static inline xfs_dir2_db_t -xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp)); -} - -/* - * Convert byte in space to offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_data_aoff_t)(by & - ((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) - 1)); -} - -/* - * Convert dataptr to a byte offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp)); -} - -/* - * Convert block and offset to byte in space - */ -static inline xfs_dir2_off_t -xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return ((xfs_dir2_off_t)db << - (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) + o; -} - -/* - * Convert block (DB) to block (dablk) - */ -static inline xfs_dablk_t -xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return (xfs_dablk_t)(db << mp->m_sb.sb_dirblklog); -} - -/* - * Convert byte in space to (DA) block - */ -static inline xfs_dablk_t -xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by)); -} - -/* - * Convert block and offset to dataptr - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o)); -} - -/* - * Convert block (dablk) to block (DB) - */ -static inline xfs_dir2_db_t -xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da) -{ - return (xfs_dir2_db_t)(da >> mp->m_sb.sb_dirblklog); -} - -/* - * Convert block (dablk) to byte offset in space - */ -static inline xfs_dir2_off_t -xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) -{ - return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0); -} - -/* - * Free space block defintions for the node format. - */ - -/* - * Offset of the freespace index. - */ -#define XFS_DIR2_FREE_SPACE 2 -#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_FREE_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET) - -typedef struct xfs_dir2_free_hdr { - __be32 magic; /* XFS_DIR2_FREE_MAGIC */ - __be32 firstdb; /* db of first entry */ - __be32 nvalid; /* count of valid entries */ - __be32 nused; /* count of used entries */ -} xfs_dir2_free_hdr_t; - -typedef struct xfs_dir2_free { - xfs_dir2_free_hdr_t hdr; /* block header */ - __be16 bests[]; /* best free counts */ - /* unused entries are -1 */ -} xfs_dir2_free_t; - -static inline int xfs_dir2_free_max_bests(struct xfs_mount *mp) -{ - return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) / - sizeof(xfs_dir2_data_off_t); -} - -/* - * Convert data space db to the corresponding free db. - */ -static inline xfs_dir2_db_t -xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp); -} - -/* - * Convert data space db to the corresponding index in a free db. - */ -static inline int -xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return db % xfs_dir2_free_max_bests(mp); -} - -/* - * Single block format. - * - * The single block format looks like the following drawing on disk: - * - * +-------------------------------------------------+ - * | xfs_dir2_data_hdr_t | - * +-------------------------------------------------+ - * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | - * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | - * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t : - * | ... | - * +-------------------------------------------------+ - * | unused space | - * +-------------------------------------------------+ - * | ... | - * | xfs_dir2_leaf_entry_t | - * | xfs_dir2_leaf_entry_t | - * +-------------------------------------------------+ - * | xfs_dir2_block_tail_t | - * +-------------------------------------------------+ - * - * As all the entries are variable size structures the accessors below should - * be used to iterate over them. - */ - -typedef struct xfs_dir2_block_tail { - __be32 count; /* count of leaf entries */ - __be32 stale; /* count of stale lf entries */ -} xfs_dir2_block_tail_t; - -/* - * Pointer to the leaf header embedded in a data block (1-block format) - */ -static inline struct xfs_dir2_block_tail * -xfs_dir2_block_tail_p(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr) -{ - return ((struct xfs_dir2_block_tail *) - ((char *)hdr + mp->m_dirblksize)) - 1; -} - -/* - * Pointer to the leaf entries embedded in a data block (1-block format) - */ -static inline struct xfs_dir2_leaf_entry * -xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp) -{ - return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count); -} - -#endif /* __XFS_DIR2_FORMAT_H__ */ diff --git a/trunk/fs/xfs/xfs_dir2_leaf.c b/trunk/fs/xfs/xfs_dir2_leaf.c index ca2386d82cdf..ae891223be90 100644 --- a/trunk/fs/xfs/xfs_dir2_leaf.c +++ b/trunk/fs/xfs/xfs_dir2_leaf.c @@ -24,14 +24,18 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2_priv.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" +#include "xfs_dir2_node.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -60,7 +64,7 @@ xfs_dir2_block_to_leaf( { __be16 *bestsp; /* leaf's bestsp entries */ xfs_dablk_t blkno; /* leaf block's bno */ - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_leaf_entry_t *blp; /* block's leaf entries */ xfs_dir2_block_tail_t *btp; /* block's tail */ xfs_inode_t *dp; /* incore directory inode */ @@ -97,9 +101,9 @@ xfs_dir2_block_to_leaf( } ASSERT(lbp != NULL); leaf = lbp->data; - hdr = dbp->data; + block = dbp->data; xfs_dir2_data_check(dp, dbp); - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); /* * Set the counts in the leaf header. @@ -119,23 +123,23 @@ xfs_dir2_block_to_leaf( * tail be free. */ xfs_dir2_data_make_free(tp, dbp, - (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), - (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize - + (xfs_dir2_data_aoff_t)((char *)blp - (char *)block), + (xfs_dir2_data_aoff_t)((char *)block + mp->m_dirblksize - (char *)blp), &needlog, &needscan); /* * Fix up the block header, make it a data block. */ - hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); + block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); /* * Set up leaf tail and bests table. */ ltp = xfs_dir2_leaf_tail_p(mp, leaf); ltp->bestcount = cpu_to_be32(1); bestsp = xfs_dir2_leaf_bests_p(ltp); - bestsp[0] = hdr->bestfree[0].length; + bestsp[0] = block->hdr.bestfree[0].length; /* * Log the data header and leaf bests table. */ @@ -148,131 +152,6 @@ xfs_dir2_block_to_leaf( return 0; } -STATIC void -xfs_dir2_leaf_find_stale( - struct xfs_dir2_leaf *leaf, - int index, - int *lowstale, - int *highstale) -{ - /* - * Find the first stale entry before our index, if any. - */ - for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) { - if (leaf->ents[*lowstale].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) - break; - } - - /* - * Find the first stale entry at or after our index, if any. - * Stop if the result would require moving more entries than using - * lowstale. - */ - for (*highstale = index; - *highstale < be16_to_cpu(leaf->hdr.count); - ++*highstale) { - if (leaf->ents[*highstale].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) - break; - if (*lowstale >= 0 && index - *lowstale <= *highstale - index) - break; - } -} - -struct xfs_dir2_leaf_entry * -xfs_dir2_leaf_find_entry( - xfs_dir2_leaf_t *leaf, /* leaf structure */ - int index, /* leaf table position */ - int compact, /* need to compact leaves */ - int lowstale, /* index of prev stale leaf */ - int highstale, /* index of next stale leaf */ - int *lfloglow, /* low leaf logging index */ - int *lfloghigh) /* high leaf logging index */ -{ - if (!leaf->hdr.stale) { - xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */ - - /* - * Now we need to make room to insert the leaf entry. - * - * If there are no stale entries, just insert a hole at index. - */ - lep = &leaf->ents[index]; - if (index < be16_to_cpu(leaf->hdr.count)) - memmove(lep + 1, lep, - (be16_to_cpu(leaf->hdr.count) - index) * - sizeof(*lep)); - - /* - * Record low and high logging indices for the leaf. - */ - *lfloglow = index; - *lfloghigh = be16_to_cpu(leaf->hdr.count); - be16_add_cpu(&leaf->hdr.count, 1); - return lep; - } - - /* - * There are stale entries. - * - * We will use one of them for the new entry. It's probably not at - * the right location, so we'll have to shift some up or down first. - * - * If we didn't compact before, we need to find the nearest stale - * entries before and after our insertion point. - */ - if (compact == 0) - xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale); - - /* - * If the low one is better, use it. - */ - if (lowstale >= 0 && - (highstale == be16_to_cpu(leaf->hdr.count) || - index - lowstale - 1 < highstale - index)) { - ASSERT(index - lowstale - 1 >= 0); - ASSERT(leaf->ents[lowstale].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); - - /* - * Copy entries up to cover the stale entry and make room - * for the new entry. - */ - if (index - lowstale - 1 > 0) { - memmove(&leaf->ents[lowstale], - &leaf->ents[lowstale + 1], - (index - lowstale - 1) * - sizeof(xfs_dir2_leaf_entry_t)); - } - *lfloglow = MIN(lowstale, *lfloglow); - *lfloghigh = MAX(index - 1, *lfloghigh); - be16_add_cpu(&leaf->hdr.stale, -1); - return &leaf->ents[index - 1]; - } - - /* - * The high one is better, so use that one. - */ - ASSERT(highstale - index >= 0); - ASSERT(leaf->ents[highstale].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); - - /* - * Copy entries down to cover the stale entry and make room for the - * new entry. - */ - if (highstale - index > 0) { - memmove(&leaf->ents[index + 1], - &leaf->ents[index], - (highstale - index) * sizeof(xfs_dir2_leaf_entry_t)); - } - *lfloglow = MIN(index, *lfloglow); - *lfloghigh = MAX(highstale, *lfloghigh); - be16_add_cpu(&leaf->hdr.stale, -1); - return &leaf->ents[index]; -} - /* * Add an entry to a leaf form directory. */ @@ -282,7 +161,7 @@ xfs_dir2_leaf_addname( { __be16 *bestsp; /* freespace table in leaf */ int compact; /* need to compact leaves */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *data; /* data block structure */ xfs_dabuf_t *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data block entry */ xfs_inode_t *dp; /* incore directory inode */ @@ -346,7 +225,7 @@ xfs_dir2_leaf_addname( continue; i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); ASSERT(i < be32_to_cpu(ltp->bestcount)); - ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF)); + ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF); if (be16_to_cpu(bestsp[i]) >= length) { use_block = i; break; @@ -360,8 +239,7 @@ xfs_dir2_leaf_addname( /* * Remember a block we see that's missing. */ - if (bestsp[i] == cpu_to_be16(NULLDATAOFF) && - use_block == -1) + if (be16_to_cpu(bestsp[i]) == NULLDATAOFF && use_block == -1) use_block = i; else if (be16_to_cpu(bestsp[i]) >= length) { use_block = i; @@ -372,17 +250,14 @@ xfs_dir2_leaf_addname( /* * How many bytes do we need in the leaf block? */ - needbytes = 0; - if (!leaf->hdr.stale) - needbytes += sizeof(xfs_dir2_leaf_entry_t); - if (use_block == -1) - needbytes += sizeof(xfs_dir2_data_off_t); - + needbytes = + (leaf->hdr.stale ? 0 : (uint)sizeof(leaf->ents[0])) + + (use_block != -1 ? 0 : (uint)sizeof(leaf->bests[0])); /* * Now kill use_block if it refers to a missing block, so we * can use it as an indication of allocation needed. */ - if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF)) + if (use_block != -1 && be16_to_cpu(bestsp[use_block]) == NULLDATAOFF) use_block = -1; /* * If we don't have enough free bytes but we can make enough @@ -494,8 +369,8 @@ xfs_dir2_leaf_addname( */ else xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); - hdr = dbp->data; - bestsp[use_block] = hdr->bestfree[0].length; + data = dbp->data; + bestsp[use_block] = data->hdr.bestfree[0].length; grown = 1; } /* @@ -509,7 +384,7 @@ xfs_dir2_leaf_addname( xfs_da_brelse(tp, lbp); return error; } - hdr = dbp->data; + data = dbp->data; grown = 0; } xfs_dir2_data_check(dp, dbp); @@ -517,14 +392,14 @@ xfs_dir2_leaf_addname( * Point to the biggest freespace in our data block. */ dup = (xfs_dir2_data_unused_t *) - ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset)); + ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset)); ASSERT(be16_to_cpu(dup->length) >= length); needscan = needlog = 0; /* * Mark the initial part of our freespace in use for the new entry. */ xfs_dir2_data_use_free(tp, dbp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length, &needlog, &needscan); /* * Initialize our new entry (at last). @@ -534,12 +409,12 @@ xfs_dir2_leaf_addname( dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); + *tagp = cpu_to_be16((char *)dep - (char *)data); /* * Need to scan fix up the bestfree table. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, data, &needlog); /* * Need to log the data block's header. */ @@ -550,15 +425,107 @@ xfs_dir2_leaf_addname( * If the bests table needs to be changed, do it. * Log the change unless we've already done that. */ - if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(hdr->bestfree[0].length)) { - bestsp[use_block] = hdr->bestfree[0].length; + if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(data->hdr.bestfree[0].length)) { + bestsp[use_block] = data->hdr.bestfree[0].length; if (!grown) xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); } - - lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale, - highstale, &lfloglow, &lfloghigh); - + /* + * Now we need to make room to insert the leaf entry. + * If there are no stale entries, we just insert a hole at index. + */ + if (!leaf->hdr.stale) { + /* + * lep is still good as the index leaf entry. + */ + if (index < be16_to_cpu(leaf->hdr.count)) + memmove(lep + 1, lep, + (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep)); + /* + * Record low and high logging indices for the leaf. + */ + lfloglow = index; + lfloghigh = be16_to_cpu(leaf->hdr.count); + be16_add_cpu(&leaf->hdr.count, 1); + } + /* + * There are stale entries. + * We will use one of them for the new entry. + * It's probably not at the right location, so we'll have to + * shift some up or down first. + */ + else { + /* + * If we didn't compact before, we need to find the nearest + * stale entries before and after our insertion point. + */ + if (compact == 0) { + /* + * Find the first stale entry before the insertion + * point, if any. + */ + for (lowstale = index - 1; + lowstale >= 0 && + be32_to_cpu(leaf->ents[lowstale].address) != + XFS_DIR2_NULL_DATAPTR; + lowstale--) + continue; + /* + * Find the next stale entry at or after the insertion + * point, if any. Stop if we go so far that the + * lowstale entry would be better. + */ + for (highstale = index; + highstale < be16_to_cpu(leaf->hdr.count) && + be32_to_cpu(leaf->ents[highstale].address) != + XFS_DIR2_NULL_DATAPTR && + (lowstale < 0 || + index - lowstale - 1 >= highstale - index); + highstale++) + continue; + } + /* + * If the low one is better, use it. + */ + if (lowstale >= 0 && + (highstale == be16_to_cpu(leaf->hdr.count) || + index - lowstale - 1 < highstale - index)) { + ASSERT(index - lowstale - 1 >= 0); + ASSERT(be32_to_cpu(leaf->ents[lowstale].address) == + XFS_DIR2_NULL_DATAPTR); + /* + * Copy entries up to cover the stale entry + * and make room for the new entry. + */ + if (index - lowstale - 1 > 0) + memmove(&leaf->ents[lowstale], + &leaf->ents[lowstale + 1], + (index - lowstale - 1) * sizeof(*lep)); + lep = &leaf->ents[index - 1]; + lfloglow = MIN(lowstale, lfloglow); + lfloghigh = MAX(index - 1, lfloghigh); + } + /* + * The high one is better, so use that one. + */ + else { + ASSERT(highstale - index >= 0); + ASSERT(be32_to_cpu(leaf->ents[highstale].address) == + XFS_DIR2_NULL_DATAPTR); + /* + * Copy entries down to cover the stale entry + * and make room for the new entry. + */ + if (highstale - index > 0) + memmove(&leaf->ents[index + 1], + &leaf->ents[index], + (highstale - index) * sizeof(*lep)); + lep = &leaf->ents[index]; + lfloglow = MIN(index, lfloglow); + lfloghigh = MAX(highstale, lfloghigh); + } + be16_add_cpu(&leaf->hdr.stale, -1); + } /* * Fill in the new leaf entry. */ @@ -595,7 +562,7 @@ xfs_dir2_leaf_check( leaf = bp->data; mp = dp->i_mount; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); /* * This value is not restrictive enough. * Should factor in the size of the bests table as well. @@ -615,7 +582,7 @@ xfs_dir2_leaf_check( if (i + 1 < be16_to_cpu(leaf->hdr.count)) ASSERT(be32_to_cpu(leaf->ents[i].hashval) <= be32_to_cpu(leaf->ents[i + 1].hashval)); - if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR) stale++; } ASSERT(be16_to_cpu(leaf->hdr.stale) == stale); @@ -644,8 +611,7 @@ xfs_dir2_leaf_compact( * Compress out the stale entries in place. */ for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) { - if (leaf->ents[from].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) continue; /* * Only actually copy the entries that are different. @@ -697,9 +663,24 @@ xfs_dir2_leaf_compact_x1( leaf = bp->data; ASSERT(be16_to_cpu(leaf->hdr.stale) > 1); index = *indexp; - - xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale); - + /* + * Find the first stale entry before our index, if any. + */ + for (lowstale = index - 1; + lowstale >= 0 && + be32_to_cpu(leaf->ents[lowstale].address) != XFS_DIR2_NULL_DATAPTR; + lowstale--) + continue; + /* + * Find the first stale entry at or after our index, if any. + * Stop if the answer would be worse than lowstale. + */ + for (highstale = index; + highstale < be16_to_cpu(leaf->hdr.count) && + be32_to_cpu(leaf->ents[highstale].address) != XFS_DIR2_NULL_DATAPTR && + (lowstale < 0 || index - lowstale > highstale - index); + highstale++) + continue; /* * Pick the better of lowstale and highstale. */ @@ -720,8 +701,7 @@ xfs_dir2_leaf_compact_x1( if (index == from) newindex = to; if (from != keepstale && - leaf->ents[from].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { + be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) { if (from == to) *lowlogp = to; continue; @@ -780,7 +760,7 @@ xfs_dir2_leaf_getdents( int byteoff; /* offset in current block */ xfs_dir2_db_t curdb; /* db for current block */ xfs_dir2_off_t curoff; /* current overall offset */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *data; /* data block structure */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_dir2_data_unused_t *dup; /* unused entry */ int error = 0; /* error return value */ @@ -1038,23 +1018,23 @@ xfs_dir2_leaf_getdents( else if (curoff > newoff) ASSERT(xfs_dir2_byte_to_db(mp, curoff) == curdb); - hdr = bp->data; + data = bp->data; xfs_dir2_data_check(dp, bp); /* * Find our position in the block. */ - ptr = (char *)(hdr + 1); + ptr = (char *)&data->u; byteoff = xfs_dir2_byte_to_off(mp, curoff); /* * Skip past the header. */ if (byteoff == 0) - curoff += (uint)sizeof(*hdr); + curoff += (uint)sizeof(data->hdr); /* * Skip past entries until we reach our offset. */ else { - while ((char *)ptr - (char *)hdr < byteoff) { + while ((char *)ptr - (char *)data < byteoff) { dup = (xfs_dir2_data_unused_t *)ptr; if (be16_to_cpu(dup->freetag) @@ -1075,8 +1055,8 @@ xfs_dir2_leaf_getdents( curoff = xfs_dir2_db_off_to_byte(mp, xfs_dir2_byte_to_db(mp, curoff), - (char *)ptr - (char *)hdr); - if (ptr >= (char *)hdr + mp->m_dirblksize) { + (char *)ptr - (char *)data); + if (ptr >= (char *)data + mp->m_dirblksize) { continue; } } @@ -1199,7 +1179,7 @@ xfs_dir2_leaf_log_bests( xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf); firstb = xfs_dir2_leaf_bests_p(ltp) + first; lastb = xfs_dir2_leaf_bests_p(ltp) + last; @@ -1222,8 +1202,8 @@ xfs_dir2_leaf_log_ents( xfs_dir2_leaf_t *leaf; /* leaf structure */ leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || - leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC || + be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); firstlep = &leaf->ents[first]; lastlep = &leaf->ents[last]; xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), @@ -1241,8 +1221,8 @@ xfs_dir2_leaf_log_header( xfs_dir2_leaf_t *leaf; /* leaf structure */ leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || - leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC || + be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), (uint)(sizeof(leaf->hdr) - 1)); } @@ -1261,7 +1241,7 @@ xfs_dir2_leaf_log_tail( mp = tp->t_mountp; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); ltp = xfs_dir2_leaf_tail_p(mp, leaf); xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), (uint)(mp->m_dirblksize - 1)); @@ -1457,7 +1437,7 @@ xfs_dir2_leaf_removename( xfs_da_args_t *args) /* operation arguments */ { __be16 *bestsp; /* leaf block best freespace */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *data; /* data block structure */ xfs_dir2_db_t db; /* data block number */ xfs_dabuf_t *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data entry structure */ @@ -1487,7 +1467,7 @@ xfs_dir2_leaf_removename( tp = args->trans; mp = dp->i_mount; leaf = lbp->data; - hdr = dbp->data; + data = dbp->data; xfs_dir2_data_check(dp, dbp); /* * Point to the leaf entry, use that to point to the data entry. @@ -1495,9 +1475,9 @@ xfs_dir2_leaf_removename( lep = &leaf->ents[index]; db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); + ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); needscan = needlog = 0; - oldbest = be16_to_cpu(hdr->bestfree[0].length); + oldbest = be16_to_cpu(data->hdr.bestfree[0].length); ltp = xfs_dir2_leaf_tail_p(mp, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); ASSERT(be16_to_cpu(bestsp[db]) == oldbest); @@ -1505,7 +1485,7 @@ xfs_dir2_leaf_removename( * Mark the former data entry unused. */ xfs_dir2_data_make_free(tp, dbp, - (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), + (xfs_dir2_data_aoff_t)((char *)dep - (char *)data), xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); /* * We just mark the leaf entry stale by putting a null in it. @@ -1519,23 +1499,23 @@ xfs_dir2_leaf_removename( * log the data block header if necessary. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, data, &needlog); if (needlog) xfs_dir2_data_log_header(tp, dbp); /* * If the longest freespace in the data block has changed, * put the new value in the bests table and log that. */ - if (be16_to_cpu(hdr->bestfree[0].length) != oldbest) { - bestsp[db] = hdr->bestfree[0].length; + if (be16_to_cpu(data->hdr.bestfree[0].length) != oldbest) { + bestsp[db] = data->hdr.bestfree[0].length; xfs_dir2_leaf_log_bests(tp, lbp, db, db); } xfs_dir2_data_check(dp, dbp); /* * If the data block is now empty then get rid of the data block. */ - if (be16_to_cpu(hdr->bestfree[0].length) == - mp->m_dirblksize - (uint)sizeof(*hdr)) { + if (be16_to_cpu(data->hdr.bestfree[0].length) == + mp->m_dirblksize - (uint)sizeof(data->hdr)) { ASSERT(db != mp->m_dirdatablk); if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { /* @@ -1562,7 +1542,7 @@ xfs_dir2_leaf_removename( * Look for the last active entry (i). */ for (i = db - 1; i > 0; i--) { - if (bestsp[i] != cpu_to_be16(NULLDATAOFF)) + if (be16_to_cpu(bestsp[i]) != NULLDATAOFF) break; } /* @@ -1706,6 +1686,9 @@ xfs_dir2_leaf_trim_data( xfs_dir2_db_t db) /* data block number */ { __be16 *bestsp; /* leaf bests table */ +#ifdef DEBUG + xfs_dir2_data_t *data; /* data block structure */ +#endif xfs_dabuf_t *dbp; /* data block buffer */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return value */ @@ -1724,21 +1707,20 @@ xfs_dir2_leaf_trim_data( XFS_DATA_FORK))) { return error; } +#ifdef DEBUG + data = dbp->data; + ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC); +#endif + /* this seems to be an error + * data is only valid if DEBUG is defined? + * RMC 09/08/1999 + */ leaf = lbp->data; ltp = xfs_dir2_leaf_tail_p(mp, leaf); - -#ifdef DEBUG -{ - struct xfs_dir2_data_hdr *hdr = dbp->data; - - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); - ASSERT(be16_to_cpu(hdr->bestfree[0].length) == - mp->m_dirblksize - (uint)sizeof(*hdr)); + ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) == + mp->m_dirblksize - (uint)sizeof(data->hdr)); ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); -} -#endif - /* * Get rid of the data block. */ @@ -1758,20 +1740,6 @@ xfs_dir2_leaf_trim_data( return 0; } -static inline size_t -xfs_dir2_leaf_size( - struct xfs_dir2_leaf_hdr *hdr, - int counts) -{ - int entries; - - entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale); - return sizeof(xfs_dir2_leaf_hdr_t) + - entries * sizeof(xfs_dir2_leaf_entry_t) + - counts * sizeof(xfs_dir2_data_off_t) + - sizeof(xfs_dir2_leaf_tail_t); -} - /* * Convert node form directory to leaf form directory. * The root of the node form dir needs to already be a LEAFN block. @@ -1842,7 +1810,7 @@ xfs_dir2_node_to_leaf( return 0; lbp = state->path.blk[0].bp; leaf = lbp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); /* * Read the freespace block. */ @@ -1851,19 +1819,20 @@ xfs_dir2_node_to_leaf( return error; } free = fbp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); ASSERT(!free->hdr.firstdb); - /* * Now see if the leafn and free data will fit in a leaf1. * If not, release the buffer and give up. */ - if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) > - mp->m_dirblksize) { + if ((uint)sizeof(leaf->hdr) + + (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)) * (uint)sizeof(leaf->ents[0]) + + be32_to_cpu(free->hdr.nvalid) * (uint)sizeof(leaf->bests[0]) + + (uint)sizeof(leaf->tail) > + mp->m_dirblksize) { xfs_da_brelse(tp, fbp); return 0; } - /* * If the leaf has any stale entries in it, compress them out. * The compact routine will log the header. @@ -1882,7 +1851,7 @@ xfs_dir2_node_to_leaf( * Set up the leaf bests table. */ memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests, - be32_to_cpu(ltp->bestcount) * sizeof(xfs_dir2_data_off_t)); + be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0])); xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); xfs_dir2_leaf_log_tail(tp, lbp); xfs_dir2_leaf_check(dp, lbp); diff --git a/trunk/fs/xfs/xfs_dir2_leaf.h b/trunk/fs/xfs/xfs_dir2_leaf.h new file mode 100644 index 000000000000..6c9539f06987 --- /dev/null +++ b/trunk/fs/xfs/xfs_dir2_leaf.h @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DIR2_LEAF_H__ +#define __XFS_DIR2_LEAF_H__ + +struct uio; +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_inode; +struct xfs_mount; +struct xfs_trans; + +/* + * Offset of the leaf/node space. First block in this space + * is the btree root. + */ +#define XFS_DIR2_LEAF_SPACE 1 +#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) +#define XFS_DIR2_LEAF_FIRSTDB(mp) \ + xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET) + +/* + * Offset in data space of a data entry. + */ +typedef __uint32_t xfs_dir2_dataptr_t; +#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff) +#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0) + +/* + * Leaf block header. + */ +typedef struct xfs_dir2_leaf_hdr { + xfs_da_blkinfo_t info; /* header for da routines */ + __be16 count; /* count of entries */ + __be16 stale; /* count of stale entries */ +} xfs_dir2_leaf_hdr_t; + +/* + * Leaf block entry. + */ +typedef struct xfs_dir2_leaf_entry { + __be32 hashval; /* hash value of name */ + __be32 address; /* address of data entry */ +} xfs_dir2_leaf_entry_t; + +/* + * Leaf block tail. + */ +typedef struct xfs_dir2_leaf_tail { + __be32 bestcount; +} xfs_dir2_leaf_tail_t; + +/* + * Leaf block. + * bests and tail are at the end of the block for single-leaf only + * (magic = XFS_DIR2_LEAF1_MAGIC not XFS_DIR2_LEAFN_MAGIC). + */ +typedef struct xfs_dir2_leaf { + xfs_dir2_leaf_hdr_t hdr; /* leaf header */ + xfs_dir2_leaf_entry_t ents[1]; /* entries */ + /* ... */ + xfs_dir2_data_off_t bests[1]; /* best free counts */ + xfs_dir2_leaf_tail_t tail; /* leaf tail */ +} xfs_dir2_leaf_t; + +/* + * DB blocks here are logical directory block numbers, not filesystem blocks. + */ + +static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp) +{ + return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) / + (uint)sizeof(xfs_dir2_leaf_entry_t)); +} + +/* + * Get address of the bestcount field in the single-leaf block. + */ +static inline xfs_dir2_leaf_tail_t * +xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp) +{ + return (xfs_dir2_leaf_tail_t *) + ((char *)(lp) + (mp)->m_dirblksize - + (uint)sizeof(xfs_dir2_leaf_tail_t)); +} + +/* + * Get address of the bests array in the single-leaf block. + */ +static inline __be16 * +xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp) +{ + return (__be16 *)ltp - be32_to_cpu(ltp->bestcount); +} + +/* + * Convert dataptr to byte in file space + */ +static inline xfs_dir2_off_t +xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) +{ + return (xfs_dir2_off_t)(dp) << XFS_DIR2_DATA_ALIGN_LOG; +} + +/* + * Convert byte in file space to dataptr. It had better be aligned. + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return (xfs_dir2_dataptr_t)((by) >> XFS_DIR2_DATA_ALIGN_LOG); +} + +/* + * Convert byte in space to (DB) block + */ +static inline xfs_dir2_db_t +xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return (xfs_dir2_db_t)((by) >> \ + ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)); +} + +/* + * Convert dataptr to a block number + */ +static inline xfs_dir2_db_t +xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp)); +} + +/* + * Convert byte in space to offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return (xfs_dir2_data_aoff_t)((by) & \ + ((1 << ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) - 1)); +} + +/* + * Convert dataptr to a byte offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp)); +} + +/* + * Convert block and offset to byte in space + */ +static inline xfs_dir2_off_t +xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return ((xfs_dir2_off_t)(db) << \ + ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) + (o); +} + +/* + * Convert block (DB) to block (dablk) + */ +static inline xfs_dablk_t +xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return (xfs_dablk_t)((db) << (mp)->m_sb.sb_dirblklog); +} + +/* + * Convert byte in space to (DA) block + */ +static inline xfs_dablk_t +xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by)); +} + +/* + * Convert block and offset to dataptr + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o)); +} + +/* + * Convert block (dablk) to block (DB) + */ +static inline xfs_dir2_db_t +xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da) +{ + return (xfs_dir2_db_t)((da) >> (mp)->m_sb.sb_dirblklog); +} + +/* + * Convert block (dablk) to byte offset in space + */ +static inline xfs_dir2_off_t +xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) +{ + return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0); +} + +/* + * Function declarations. + */ +extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, + struct xfs_dabuf *dbp); +extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); +extern void xfs_dir2_leaf_compact(struct xfs_da_args *args, + struct xfs_dabuf *bp); +extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp, + int *lowstalep, int *highstalep, + int *lowlogp, int *highlogp); +extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent, + size_t bufsize, xfs_off_t *offset, + filldir_t filldir); +extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno, + struct xfs_dabuf **bpp, int magic); +extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp, + int first, int last); +extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp, + struct xfs_dabuf *bp); +extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); +extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); +extern int xfs_dir2_leaf_replace(struct xfs_da_args *args); +extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args, + struct xfs_dabuf *lbp); +extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args, + struct xfs_dabuf *lbp, xfs_dir2_db_t db); +extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); + +#endif /* __XFS_DIR2_LEAF_H__ */ diff --git a/trunk/fs/xfs/xfs_dir2_node.c b/trunk/fs/xfs/xfs_dir2_node.c index 084b3247d636..a0aab7d3294f 100644 --- a/trunk/fs/xfs/xfs_dir2_node.c +++ b/trunk/fs/xfs/xfs_dir2_node.c @@ -23,14 +23,18 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2_priv.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" +#include "xfs_dir2_node.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -69,7 +73,7 @@ xfs_dir2_free_log_bests( xfs_dir2_free_t *free; /* freespace structure */ free = bp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); xfs_da_log_buf(tp, bp, (uint)((char *)&free->bests[first] - (char *)free), (uint)((char *)&free->bests[last] - (char *)free + @@ -87,7 +91,7 @@ xfs_dir2_free_log_header( xfs_dir2_free_t *free; /* freespace structure */ free = bp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free), (uint)(sizeof(xfs_dir2_free_hdr_t) - 1)); } @@ -240,13 +244,89 @@ xfs_dir2_leafn_add( lfloglow = be16_to_cpu(leaf->hdr.count); lfloghigh = -1; } - + /* + * No stale entries, just insert a space for the new entry. + */ + if (!leaf->hdr.stale) { + lep = &leaf->ents[index]; + if (index < be16_to_cpu(leaf->hdr.count)) + memmove(lep + 1, lep, + (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep)); + lfloglow = index; + lfloghigh = be16_to_cpu(leaf->hdr.count); + be16_add_cpu(&leaf->hdr.count, 1); + } + /* + * There are stale entries. We'll use one for the new entry. + */ + else { + /* + * If we didn't do a compact then we need to figure out + * which stale entry will be used. + */ + if (compact == 0) { + /* + * Find first stale entry before our insertion point. + */ + for (lowstale = index - 1; + lowstale >= 0 && + be32_to_cpu(leaf->ents[lowstale].address) != + XFS_DIR2_NULL_DATAPTR; + lowstale--) + continue; + /* + * Find next stale entry after insertion point. + * Stop looking if the answer would be worse than + * lowstale already found. + */ + for (highstale = index; + highstale < be16_to_cpu(leaf->hdr.count) && + be32_to_cpu(leaf->ents[highstale].address) != + XFS_DIR2_NULL_DATAPTR && + (lowstale < 0 || + index - lowstale - 1 >= highstale - index); + highstale++) + continue; + } + /* + * Using the low stale entry. + * Shift entries up toward the stale slot. + */ + if (lowstale >= 0 && + (highstale == be16_to_cpu(leaf->hdr.count) || + index - lowstale - 1 < highstale - index)) { + ASSERT(be32_to_cpu(leaf->ents[lowstale].address) == + XFS_DIR2_NULL_DATAPTR); + ASSERT(index - lowstale - 1 >= 0); + if (index - lowstale - 1 > 0) + memmove(&leaf->ents[lowstale], + &leaf->ents[lowstale + 1], + (index - lowstale - 1) * sizeof(*lep)); + lep = &leaf->ents[index - 1]; + lfloglow = MIN(lowstale, lfloglow); + lfloghigh = MAX(index - 1, lfloghigh); + } + /* + * Using the high stale entry. + * Shift entries down toward the stale slot. + */ + else { + ASSERT(be32_to_cpu(leaf->ents[highstale].address) == + XFS_DIR2_NULL_DATAPTR); + ASSERT(highstale - index >= 0); + if (highstale - index > 0) + memmove(&leaf->ents[index + 1], + &leaf->ents[index], + (highstale - index) * sizeof(*lep)); + lep = &leaf->ents[index]; + lfloglow = MIN(index, lfloglow); + lfloghigh = MAX(highstale, lfloghigh); + } + be16_add_cpu(&leaf->hdr.stale, -1); + } /* * Insert the new entry, log everything. */ - lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale, - highstale, &lfloglow, &lfloghigh); - lep->hashval = cpu_to_be32(args->hashval); lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, args->blkno, args->index)); @@ -272,14 +352,14 @@ xfs_dir2_leafn_check( leaf = bp->data; mp = dp->i_mount; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp)); for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) { if (i + 1 < be16_to_cpu(leaf->hdr.count)) { ASSERT(be32_to_cpu(leaf->ents[i].hashval) <= be32_to_cpu(leaf->ents[i + 1].hashval)); } - if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR) stale++; } ASSERT(be16_to_cpu(leaf->hdr.stale) == stale); @@ -298,7 +378,7 @@ xfs_dir2_leafn_lasthash( xfs_dir2_leaf_t *leaf; /* leaf structure */ leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); if (count) *count = be16_to_cpu(leaf->hdr.count); if (!leaf->hdr.count) @@ -337,7 +417,7 @@ xfs_dir2_leafn_lookup_for_addname( tp = args->trans; mp = dp->i_mount; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); #ifdef __KERNEL__ ASSERT(be16_to_cpu(leaf->hdr.count) > 0); #endif @@ -354,7 +434,7 @@ xfs_dir2_leafn_lookup_for_addname( curbp = state->extrablk.bp; curfdb = state->extrablk.blkno; free = curbp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); } length = xfs_dir2_data_entsize(args->namelen); /* @@ -408,7 +488,7 @@ xfs_dir2_leafn_lookup_for_addname( ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); ASSERT((be32_to_cpu(free->hdr.firstdb) % - xfs_dir2_free_max_bests(mp)) == 0); + XFS_DIR2_MAX_FREE_BESTS(mp)) == 0); ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb); ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) + be32_to_cpu(free->hdr.nvalid)); @@ -420,8 +500,7 @@ xfs_dir2_leafn_lookup_for_addname( /* * If it has room, return it. */ - if (unlikely(free->bests[fi] == - cpu_to_be16(NULLDATAOFF))) { + if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) { XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", XFS_ERRLEVEL_LOW, mp); if (curfdb != newfdb) @@ -482,7 +561,7 @@ xfs_dir2_leafn_lookup_for_entry( tp = args->trans; mp = dp->i_mount; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); #ifdef __KERNEL__ ASSERT(be16_to_cpu(leaf->hdr.count) > 0); #endif @@ -663,8 +742,7 @@ xfs_dir2_leafn_moveents( int i; /* temp leaf index */ for (i = start_s, stale = 0; i < start_s + count; i++) { - if (leaf_s->ents[i].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (be32_to_cpu(leaf_s->ents[i].address) == XFS_DIR2_NULL_DATAPTR) stale++; } } else @@ -711,8 +789,8 @@ xfs_dir2_leafn_order( leaf1 = leaf1_bp->data; leaf2 = leaf2_bp->data; - ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); - ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); if (be16_to_cpu(leaf1->hdr.count) > 0 && be16_to_cpu(leaf2->hdr.count) > 0 && (be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) || @@ -840,7 +918,7 @@ xfs_dir2_leafn_remove( xfs_da_state_blk_t *dblk, /* data block */ int *rval) /* resulting block needs join */ { - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *data; /* data block structure */ xfs_dir2_db_t db; /* data block number */ xfs_dabuf_t *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data block entry */ @@ -860,7 +938,7 @@ xfs_dir2_leafn_remove( tp = args->trans; mp = dp->i_mount; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); /* * Point to the entry we're removing. */ @@ -885,9 +963,9 @@ xfs_dir2_leafn_remove( * in the data block in case it changes. */ dbp = dblk->bp; - hdr = dbp->data; - dep = (xfs_dir2_data_entry_t *)((char *)hdr + off); - longest = be16_to_cpu(hdr->bestfree[0].length); + data = dbp->data; + dep = (xfs_dir2_data_entry_t *)((char *)data + off); + longest = be16_to_cpu(data->hdr.bestfree[0].length); needlog = needscan = 0; xfs_dir2_data_make_free(tp, dbp, off, xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); @@ -896,7 +974,7 @@ xfs_dir2_leafn_remove( * Log the data block header if needed. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, data, &needlog); if (needlog) xfs_dir2_data_log_header(tp, dbp); xfs_dir2_data_check(dp, dbp); @@ -904,7 +982,7 @@ xfs_dir2_leafn_remove( * If the longest data block freespace changes, need to update * the corresponding freeblock entry. */ - if (longest < be16_to_cpu(hdr->bestfree[0].length)) { + if (longest < be16_to_cpu(data->hdr.bestfree[0].length)) { int error; /* error return value */ xfs_dabuf_t *fbp; /* freeblock buffer */ xfs_dir2_db_t fdb; /* freeblock block number */ @@ -922,27 +1000,27 @@ xfs_dir2_leafn_remove( return error; } free = fbp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); ASSERT(be32_to_cpu(free->hdr.firstdb) == - xfs_dir2_free_max_bests(mp) * + XFS_DIR2_MAX_FREE_BESTS(mp) * (fdb - XFS_DIR2_FREE_FIRSTDB(mp))); /* * Calculate which entry we need to fix. */ findex = xfs_dir2_db_to_fdindex(mp, db); - longest = be16_to_cpu(hdr->bestfree[0].length); + longest = be16_to_cpu(data->hdr.bestfree[0].length); /* * If the data block is now empty we can get rid of it * (usually). */ - if (longest == mp->m_dirblksize - (uint)sizeof(*hdr)) { + if (longest == mp->m_dirblksize - (uint)sizeof(data->hdr)) { /* * Try to punch out the data block. */ error = xfs_dir2_shrink_inode(args, db, dbp); if (error == 0) { dblk->bp = NULL; - hdr = NULL; + data = NULL; } /* * We can get ENOSPC if there's no space reservation. @@ -958,7 +1036,7 @@ xfs_dir2_leafn_remove( * If we got rid of the data block, we can eliminate that entry * in the free block. */ - if (hdr == NULL) { + if (data == NULL) { /* * One less used entry in the free table. */ @@ -974,8 +1052,7 @@ xfs_dir2_leafn_remove( int i; /* free entry index */ for (i = findex - 1; - i >= 0 && - free->bests[i] == cpu_to_be16(NULLDATAOFF); + i >= 0 && be16_to_cpu(free->bests[i]) == NULLDATAOFF; i--) continue; free->hdr.nvalid = cpu_to_be32(i + 1); @@ -1132,7 +1209,7 @@ xfs_dir2_leafn_toosmall( */ blk = &state->path.blk[state->path.active - 1]; info = blk->bp->data; - ASSERT(info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC); leaf = (xfs_dir2_leaf_t *)info; count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]); @@ -1191,7 +1268,7 @@ xfs_dir2_leafn_toosmall( count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); bytes = state->blocksize - (state->blocksize >> 2); leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); bytes -= count * (uint)sizeof(leaf->ents[0]); /* @@ -1250,8 +1327,8 @@ xfs_dir2_leafn_unbalance( ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC); drop_leaf = drop_blk->bp->data; save_leaf = save_blk->bp->data; - ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); - ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); /* * If there are any stale leaf entries, take this opportunity * to purge them. @@ -1355,7 +1432,7 @@ xfs_dir2_node_addname_int( xfs_da_args_t *args, /* operation arguments */ xfs_da_state_blk_t *fblk) /* optional freespace block */ { - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *data; /* data block structure */ xfs_dir2_db_t dbno; /* data block number */ xfs_dabuf_t *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ @@ -1392,7 +1469,7 @@ xfs_dir2_node_addname_int( */ ifbno = fblk->blkno; free = fbp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); findex = fblk->index; /* * This means the free entry showed that the data block had @@ -1476,7 +1553,7 @@ xfs_dir2_node_addname_int( continue; } free = fbp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); findex = 0; } /* @@ -1603,12 +1680,12 @@ xfs_dir2_node_addname_int( free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC); free->hdr.firstdb = cpu_to_be32( (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) * - xfs_dir2_free_max_bests(mp)); + XFS_DIR2_MAX_FREE_BESTS(mp)); free->hdr.nvalid = 0; free->hdr.nused = 0; } else { free = fbp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); } /* @@ -1620,7 +1697,7 @@ xfs_dir2_node_addname_int( * freespace block, extend that table. */ if (findex >= be32_to_cpu(free->hdr.nvalid)) { - ASSERT(findex < xfs_dir2_free_max_bests(mp)); + ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp)); free->hdr.nvalid = cpu_to_be32(findex + 1); /* * Tag new entry so nused will go up. @@ -1631,7 +1708,7 @@ xfs_dir2_node_addname_int( * If this entry was for an empty data block * (this should always be true) then update the header. */ - if (free->bests[findex] == cpu_to_be16(NULLDATAOFF)) { + if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) { be32_add_cpu(&free->hdr.nused, 1); xfs_dir2_free_log_header(tp, fbp); } @@ -1640,8 +1717,8 @@ xfs_dir2_node_addname_int( * We haven't allocated the data entry yet so this will * change again. */ - hdr = dbp->data; - free->bests[findex] = hdr->bestfree[0].length; + data = dbp->data; + free->bests[findex] = data->hdr.bestfree[0].length; logfree = 1; } /* @@ -1666,21 +1743,21 @@ xfs_dir2_node_addname_int( xfs_da_buf_done(fbp); return error; } - hdr = dbp->data; + data = dbp->data; logfree = 0; } - ASSERT(be16_to_cpu(hdr->bestfree[0].length) >= length); + ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) >= length); /* * Point to the existing unused space. */ dup = (xfs_dir2_data_unused_t *) - ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset)); + ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset)); needscan = needlog = 0; /* * Mark the first part of the unused space, inuse for us. */ xfs_dir2_data_use_free(tp, dbp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length, &needlog, &needscan); /* * Fill in the new entry and log it. @@ -1690,13 +1767,13 @@ xfs_dir2_node_addname_int( dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); + *tagp = cpu_to_be16((char *)dep - (char *)data); xfs_dir2_data_log_entry(tp, dbp, dep); /* * Rescan the block for bestfree if needed. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, data, &needlog); /* * Log the data block header if needed. */ @@ -1705,8 +1782,8 @@ xfs_dir2_node_addname_int( /* * If the freespace entry is now wrong, update it. */ - if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(hdr->bestfree[0].length)) { - free->bests[findex] = hdr->bestfree[0].length; + if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(data->hdr.bestfree[0].length)) { + free->bests[findex] = data->hdr.bestfree[0].length; logfree = 1; } /* @@ -1856,7 +1933,7 @@ xfs_dir2_node_replace( xfs_da_args_t *args) /* operation arguments */ { xfs_da_state_blk_t *blk; /* leaf block */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *data; /* data block structure */ xfs_dir2_data_entry_t *dep; /* data entry changed */ int error; /* error return value */ int i; /* btree level */ @@ -1900,10 +1977,10 @@ xfs_dir2_node_replace( /* * Point to the data entry. */ - hdr = state->extrablk.bp->data; - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); + data = state->extrablk.bp->data; + ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC); dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + + ((char *)data + xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address))); ASSERT(inum != be64_to_cpu(dep->inumber)); /* @@ -1967,7 +2044,7 @@ xfs_dir2_node_trim_free( return 0; } free = bp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); /* * If there are used entries, there's nothing to do. */ diff --git a/trunk/fs/xfs/xfs_dir2_node.h b/trunk/fs/xfs/xfs_dir2_node.h new file mode 100644 index 000000000000..82dfe7147195 --- /dev/null +++ b/trunk/fs/xfs/xfs_dir2_node.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2000,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DIR2_NODE_H__ +#define __XFS_DIR2_NODE_H__ + +/* + * Directory version 2, btree node format structures + */ + +struct uio; +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_da_state; +struct xfs_da_state_blk; +struct xfs_inode; +struct xfs_trans; + +/* + * Offset of the freespace index. + */ +#define XFS_DIR2_FREE_SPACE 2 +#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) +#define XFS_DIR2_FREE_FIRSTDB(mp) \ + xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET) + +#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F */ + +typedef struct xfs_dir2_free_hdr { + __be32 magic; /* XFS_DIR2_FREE_MAGIC */ + __be32 firstdb; /* db of first entry */ + __be32 nvalid; /* count of valid entries */ + __be32 nused; /* count of used entries */ +} xfs_dir2_free_hdr_t; + +typedef struct xfs_dir2_free { + xfs_dir2_free_hdr_t hdr; /* block header */ + __be16 bests[1]; /* best free counts */ + /* unused entries are -1 */ +} xfs_dir2_free_t; + +#define XFS_DIR2_MAX_FREE_BESTS(mp) \ + (((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_free_hdr_t)) / \ + (uint)sizeof(xfs_dir2_data_off_t)) + +/* + * Convert data space db to the corresponding free db. + */ +static inline xfs_dir2_db_t +xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return (XFS_DIR2_FREE_FIRSTDB(mp) + (db) / XFS_DIR2_MAX_FREE_BESTS(mp)); +} + +/* + * Convert data space db to the corresponding index in a free db. + */ +static inline int +xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp)); +} + +extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, + struct xfs_dabuf *lbp); +extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count); +extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp, + struct xfs_da_args *args, int *indexp, + struct xfs_da_state *state); +extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp, + struct xfs_dabuf *leaf2_bp); +extern int xfs_dir2_leafn_split(struct xfs_da_state *state, + struct xfs_da_state_blk *oldblk, + struct xfs_da_state_blk *newblk); +extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action); +extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk, + struct xfs_da_state_blk *save_blk); +extern int xfs_dir2_node_addname(struct xfs_da_args *args); +extern int xfs_dir2_node_lookup(struct xfs_da_args *args); +extern int xfs_dir2_node_removename(struct xfs_da_args *args); +extern int xfs_dir2_node_replace(struct xfs_da_args *args); +extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo, + int *rvalp); + +#endif /* __XFS_DIR2_NODE_H__ */ diff --git a/trunk/fs/xfs/xfs_dir2_priv.h b/trunk/fs/xfs/xfs_dir2_priv.h deleted file mode 100644 index 067f403ecf8a..000000000000 --- a/trunk/fs/xfs/xfs_dir2_priv.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_PRIV_H__ -#define __XFS_DIR2_PRIV_H__ - -/* xfs_dir2.c */ -extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); -extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r); -extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r); -extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, - xfs_dir2_db_t *dbp); -extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, - struct xfs_dabuf *bp); -extern int xfs_dir_cilookup_result(struct xfs_da_args *args, - const unsigned char *name, int len); - -/* xfs_dir2_block.c */ -extern int xfs_dir2_block_addname(struct xfs_da_args *args); -extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent, - xfs_off_t *offset, filldir_t filldir); -extern int xfs_dir2_block_lookup(struct xfs_da_args *args); -extern int xfs_dir2_block_removename(struct xfs_da_args *args); -extern int xfs_dir2_block_replace(struct xfs_da_args *args); -extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, - struct xfs_dabuf *lbp, struct xfs_dabuf *dbp); - -/* xfs_dir2_data.c */ -#ifdef DEBUG -extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp); -#else -#define xfs_dir2_data_check(dp,bp) -#endif -extern struct xfs_dir2_data_free * -xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, - struct xfs_dir2_data_unused *dup, int *loghead); -extern void xfs_dir2_data_freescan(struct xfs_mount *mp, - struct xfs_dir2_data_hdr *hdr, int *loghead); -extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, - struct xfs_dabuf **bpp); -extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp, - struct xfs_dir2_data_entry *dep); -extern void xfs_dir2_data_log_header(struct xfs_trans *tp, - struct xfs_dabuf *bp); -extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp, - struct xfs_dir2_data_unused *dup); -extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp, - xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, - int *needlogp, int *needscanp); -extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp, - struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, - xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); - -/* xfs_dir2_leaf.c */ -extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, - struct xfs_dabuf *dbp); -extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); -extern void xfs_dir2_leaf_compact(struct xfs_da_args *args, - struct xfs_dabuf *bp); -extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp, - int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); -extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent, - size_t bufsize, xfs_off_t *offset, filldir_t filldir); -extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno, - struct xfs_dabuf **bpp, int magic); -extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp, - int first, int last); -extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp, - struct xfs_dabuf *bp); -extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); -extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); -extern int xfs_dir2_leaf_replace(struct xfs_da_args *args); -extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args, - struct xfs_dabuf *lbp); -extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args, - struct xfs_dabuf *lbp, xfs_dir2_db_t db); -extern struct xfs_dir2_leaf_entry * -xfs_dir2_leaf_find_entry(struct xfs_dir2_leaf *leaf, int index, int compact, - int lowstale, int highstale, - int *lfloglow, int *lfloghigh); -extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); - -/* xfs_dir2_node.c */ -extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, - struct xfs_dabuf *lbp); -extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count); -extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp, - struct xfs_da_args *args, int *indexp, - struct xfs_da_state *state); -extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp, - struct xfs_dabuf *leaf2_bp); -extern int xfs_dir2_leafn_split(struct xfs_da_state *state, - struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); -extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action); -extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state, - struct xfs_da_state_blk *drop_blk, - struct xfs_da_state_blk *save_blk); -extern int xfs_dir2_node_addname(struct xfs_da_args *args); -extern int xfs_dir2_node_lookup(struct xfs_da_args *args); -extern int xfs_dir2_node_removename(struct xfs_da_args *args); -extern int xfs_dir2_node_replace(struct xfs_da_args *args); -extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo, - int *rvalp); - -/* xfs_dir2_sf.c */ -extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp); -extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp, - struct xfs_dir2_sf_entry *sfep); -extern int xfs_dir2_block_sfsize(struct xfs_inode *dp, - struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp); -extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp, - int size, xfs_dir2_sf_hdr_t *sfhp); -extern int xfs_dir2_sf_addname(struct xfs_da_args *args); -extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); -extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent, - xfs_off_t *offset, filldir_t filldir); -extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); -extern int xfs_dir2_sf_removename(struct xfs_da_args *args); -extern int xfs_dir2_sf_replace(struct xfs_da_args *args); - -#endif /* __XFS_DIR2_PRIV_H__ */ diff --git a/trunk/fs/xfs/xfs_dir2_sf.c b/trunk/fs/xfs/xfs_dir2_sf.c index 79d05e84e296..b1bae6b1eed9 100644 --- a/trunk/fs/xfs/xfs_dir2_sf.c +++ b/trunk/fs/xfs/xfs_dir2_sf.c @@ -23,16 +23,18 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_error.h" -#include "xfs_dir2.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2_priv.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" #include "xfs_trace.h" /* @@ -57,82 +59,6 @@ static void xfs_dir2_sf_toino4(xfs_da_args_t *args); static void xfs_dir2_sf_toino8(xfs_da_args_t *args); #endif /* XFS_BIG_INUMS */ -/* - * Inode numbers in short-form directories can come in two versions, - * either 4 bytes or 8 bytes wide. These helpers deal with the - * two forms transparently by looking at the headers i8count field. - * - * For 64-bit inode number the most significant byte must be zero. - */ -static xfs_ino_t -xfs_dir2_sf_get_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_dir2_inou_t *from) -{ - if (hdr->i8count) - return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL; - else - return get_unaligned_be32(&from->i4.i); -} - -static void -xfs_dir2_sf_put_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_dir2_inou_t *to, - xfs_ino_t ino) -{ - ASSERT((ino & 0xff00000000000000ULL) == 0); - - if (hdr->i8count) - put_unaligned_be64(ino, &to->i8.i); - else - put_unaligned_be32(ino, &to->i4.i); -} - -xfs_ino_t -xfs_dir2_sf_get_parent_ino( - struct xfs_dir2_sf_hdr *hdr) -{ - return xfs_dir2_sf_get_ino(hdr, &hdr->parent); -} - -static void -xfs_dir2_sf_put_parent_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_ino_t ino) -{ - xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino); -} - -/* - * In short-form directory entries the inode numbers are stored at variable - * offset behind the entry name. The inode numbers may only be accessed - * through the helpers below. - */ -static xfs_dir2_inou_t * -xfs_dir2_sfe_inop( - struct xfs_dir2_sf_entry *sfep) -{ - return (xfs_dir2_inou_t *)&sfep->name[sfep->namelen]; -} - -xfs_ino_t -xfs_dir2_sfe_get_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return xfs_dir2_sf_get_ino(hdr, xfs_dir2_sfe_inop(sfep)); -} - -static void -xfs_dir2_sfe_put_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep, - xfs_ino_t ino) -{ - xfs_dir2_sf_put_ino(hdr, xfs_dir2_sfe_inop(sfep), ino); -} - /* * Given a block directory (dp/block), calculate its size as a shortform (sf) * directory and a header for the sf directory, if it will fit it the @@ -142,7 +68,7 @@ xfs_dir2_sfe_put_ino( int /* size for sf form */ xfs_dir2_block_sfsize( xfs_inode_t *dp, /* incore inode pointer */ - xfs_dir2_data_hdr_t *hdr, /* block directory data */ + xfs_dir2_block_t *block, /* block directory data */ xfs_dir2_sf_hdr_t *sfhp) /* output: header for sf form */ { xfs_dir2_dataptr_t addr; /* data entry address */ @@ -162,7 +88,7 @@ xfs_dir2_block_sfsize( mp = dp->i_mount; count = i8count = namelen = 0; - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); /* @@ -175,7 +101,7 @@ xfs_dir2_block_sfsize( * Calculate the pointer to the entry at hand. */ dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr)); + ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); /* * Detect . and .., so we can special-case them. * . is not included in sf directories. @@ -212,7 +138,7 @@ xfs_dir2_block_sfsize( */ sfhp->count = count; sfhp->i8count = i8count; - xfs_dir2_sf_put_parent_ino(sfhp, parent); + xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent); return size; } @@ -227,7 +153,7 @@ xfs_dir2_block_to_sf( int size, /* shortform directory size */ xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ { - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_block_tail_t *btp; /* block tail pointer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ @@ -238,7 +164,8 @@ xfs_dir2_block_to_sf( xfs_mount_t *mp; /* filesystem mount point */ char *ptr; /* current data pointer */ xfs_dir2_sf_entry_t *sfep; /* shortform entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */ + xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_ino_t temp; trace_xfs_dir2_block_to_sf(args); @@ -249,14 +176,13 @@ xfs_dir2_block_to_sf( * Make a copy of the block data, so we can shrink the inode * and add local data. */ - hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP); - memcpy(hdr, bp->data, mp->m_dirblksize); + block = kmem_alloc(mp->m_dirblksize, KM_SLEEP); + memcpy(block, bp->data, mp->m_dirblksize); logflags = XFS_ILOG_CORE; if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) { ASSERT(error != ENOSPC); goto out; } - /* * The buffer is now unconditionally gone, whether * xfs_dir2_shrink_inode worked or not. @@ -272,14 +198,14 @@ xfs_dir2_block_to_sf( /* * Copy the header into the newly allocate local space. */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); dp->i_d.di_size = size; /* * Set up to loop over the block's entries. */ - btp = xfs_dir2_block_tail_p(mp, hdr); - ptr = (char *)(hdr + 1); + btp = xfs_dir2_block_tail_p(mp, block); + ptr = (char *)block->u; endptr = (char *)xfs_dir2_block_leaf_p(btp); sfep = xfs_dir2_sf_firstentry(sfp); /* @@ -307,7 +233,7 @@ xfs_dir2_block_to_sf( else if (dep->namelen == 2 && dep->name[0] == '.' && dep->name[1] == '.') ASSERT(be64_to_cpu(dep->inumber) == - xfs_dir2_sf_get_parent_ino(sfp)); + xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent)); /* * Normal entry, copy it into shortform. */ @@ -315,11 +241,11 @@ xfs_dir2_block_to_sf( sfep->namelen = dep->namelen; xfs_dir2_sf_put_offset(sfep, (xfs_dir2_data_aoff_t) - ((char *)dep - (char *)hdr)); + ((char *)dep - (char *)block)); memcpy(sfep->name, dep->name, dep->namelen); - xfs_dir2_sfe_put_ino(sfp, sfep, - be64_to_cpu(dep->inumber)); - + temp = be64_to_cpu(dep->inumber); + xfs_dir2_sf_put_inumber(sfp, &temp, + xfs_dir2_sf_inumberp(sfep)); sfep = xfs_dir2_sf_nextentry(sfp, sfep); } ptr += xfs_dir2_data_entsize(dep->namelen); @@ -328,7 +254,7 @@ xfs_dir2_block_to_sf( xfs_dir2_sf_check(args); out: xfs_trans_log_inode(args->trans, dp, logflags); - kmem_free(hdr); + kmem_free(block); return error; } @@ -351,7 +277,7 @@ xfs_dir2_sf_addname( xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */ int old_isize; /* di_size before adding name */ int pick; /* which algorithm to use */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ trace_xfs_dir2_sf_addname(args); @@ -368,19 +294,19 @@ xfs_dir2_sf_addname( } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* * Compute entry (and change in) size. */ - add_entsize = xfs_dir2_sf_entsize(sfp, args->namelen); + add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen); incr_isize = add_entsize; objchange = 0; #if XFS_BIG_INUMS /* * Do we have to change to 8 byte inodes? */ - if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) { /* * Yes, adjust the entry size and the total size. */ @@ -388,7 +314,7 @@ xfs_dir2_sf_addname( (uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t); incr_isize += - (sfp->count + 2) * + (sfp->hdr.count + 2) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); objchange = 1; @@ -458,21 +384,21 @@ xfs_dir2_sf_addname_easy( { int byteoff; /* byte offset in sf dir */ xfs_inode_t *dp; /* incore directory inode */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ dp = args->dp; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; byteoff = (int)((char *)sfep - (char *)sfp); /* * Grow the in-inode space. */ - xfs_idata_realloc(dp, xfs_dir2_sf_entsize(sfp, args->namelen), + xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen), XFS_DATA_FORK); /* * Need to set up again due to realloc of the inode data. */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff); /* * Fill in the new entry. @@ -480,14 +406,15 @@ xfs_dir2_sf_addname_easy( sfep->namelen = args->namelen; xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber); + xfs_dir2_sf_put_inumber(sfp, &args->inumber, + xfs_dir2_sf_inumberp(sfep)); /* * Update the header and inode. */ - sfp->count++; + sfp->hdr.count++; #if XFS_BIG_INUMS if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) - sfp->i8count++; + sfp->hdr.i8count++; #endif dp->i_d.di_size = new_isize; xfs_dir2_sf_check(args); @@ -517,19 +444,19 @@ xfs_dir2_sf_addname_hard( xfs_dir2_data_aoff_t offset; /* current offset value */ int old_isize; /* previous di_size */ xfs_dir2_sf_entry_t *oldsfep; /* entry in original dir */ - xfs_dir2_sf_hdr_t *oldsfp; /* original shortform dir */ + xfs_dir2_sf_t *oldsfp; /* original shortform dir */ xfs_dir2_sf_entry_t *sfep; /* entry in new dir */ - xfs_dir2_sf_hdr_t *sfp; /* new shortform dir */ + xfs_dir2_sf_t *sfp; /* new shortform dir */ /* * Copy the old directory to the stack buffer. */ dp = args->dp; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; old_isize = (int)dp->i_d.di_size; buf = kmem_alloc(old_isize, KM_SLEEP); - oldsfp = (xfs_dir2_sf_hdr_t *)buf; + oldsfp = (xfs_dir2_sf_t *)buf; memcpy(oldsfp, sfp, old_isize); /* * Loop over the old directory finding the place we're going @@ -558,7 +485,7 @@ xfs_dir2_sf_addname_hard( /* * Reset the pointer since the buffer was reallocated. */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; /* * Copy the first part of the directory, including the header. */ @@ -571,11 +498,12 @@ xfs_dir2_sf_addname_hard( sfep->namelen = args->namelen; xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber); - sfp->count++; + xfs_dir2_sf_put_inumber(sfp, &args->inumber, + xfs_dir2_sf_inumberp(sfep)); + sfp->hdr.count++; #if XFS_BIG_INUMS if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) - sfp->i8count++; + sfp->hdr.i8count++; #endif /* * If there's more left to copy, do that. @@ -609,14 +537,14 @@ xfs_dir2_sf_addname_pick( xfs_mount_t *mp; /* filesystem mount point */ xfs_dir2_data_aoff_t offset; /* data block offset */ xfs_dir2_sf_entry_t *sfep; /* shortform entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ int size; /* entry's data size */ int used; /* data bytes used */ dp = args->dp; mp = dp->i_mount; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; size = xfs_dir2_data_entsize(args->namelen); offset = XFS_DIR2_DATA_FIRST_OFFSET; sfep = xfs_dir2_sf_firstentry(sfp); @@ -626,7 +554,7 @@ xfs_dir2_sf_addname_pick( * Keep track of data offset and whether we've seen a place * to insert the new entry. */ - for (i = 0; i < sfp->count; i++) { + for (i = 0; i < sfp->hdr.count; i++) { if (!holefit) holefit = offset + size <= xfs_dir2_sf_get_offset(sfep); offset = xfs_dir2_sf_get_offset(sfep) + @@ -638,7 +566,7 @@ xfs_dir2_sf_addname_pick( * was a data block (block form directory). */ used = offset + - (sfp->count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (sfp->hdr.count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) + (uint)sizeof(xfs_dir2_block_tail_t); /* * If it won't fit in a block form then we can't insert it, @@ -684,30 +612,30 @@ xfs_dir2_sf_check( xfs_ino_t ino; /* entry inode number */ int offset; /* data offset */ xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ dp = args->dp; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; offset = XFS_DIR2_DATA_FIRST_OFFSET; - ino = xfs_dir2_sf_get_parent_ino(sfp); + ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); i8count = ino > XFS_DIR2_MAX_SHORT_INUM; for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); - i < sfp->count; + i < sfp->hdr.count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset); - ino = xfs_dir2_sfe_get_ino(sfp, sfep); + ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); i8count += ino > XFS_DIR2_MAX_SHORT_INUM; offset = xfs_dir2_sf_get_offset(sfep) + xfs_dir2_data_entsize(sfep->namelen); } - ASSERT(i8count == sfp->i8count); + ASSERT(i8count == sfp->hdr.i8count); ASSERT(XFS_BIG_INUMS || i8count == 0); ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size); ASSERT(offset + - (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + (uint)sizeof(xfs_dir2_block_tail_t) <= dp->i_mount->m_dirblksize); } @@ -723,7 +651,7 @@ xfs_dir2_sf_create( { xfs_inode_t *dp; /* incore directory inode */ int i8count; /* parent inode is an 8-byte number */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ int size; /* directory size */ trace_xfs_dir2_sf_create(args); @@ -753,13 +681,13 @@ xfs_dir2_sf_create( /* * Fill in the header, */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - sfp->i8count = i8count; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp->hdr.i8count = i8count; /* * Now can put in the inode number, since i8count is set. */ - xfs_dir2_sf_put_parent_ino(sfp, pino); - sfp->count = 0; + xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent); + sfp->hdr.count = 0; dp->i_d.di_size = size; xfs_dir2_sf_check(args); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); @@ -777,7 +705,7 @@ xfs_dir2_sf_getdents( xfs_mount_t *mp; /* filesystem mount point */ xfs_dir2_dataptr_t off; /* current entry's offset */ xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ xfs_dir2_dataptr_t dot_offset; xfs_dir2_dataptr_t dotdot_offset; xfs_ino_t ino; @@ -796,9 +724,9 @@ xfs_dir2_sf_getdents( ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* * If the block number in the offset is out of range, we're done. @@ -831,7 +759,7 @@ xfs_dir2_sf_getdents( * Put .. entry unless we're starting past it. */ if (*offset <= dotdot_offset) { - ino = xfs_dir2_sf_get_parent_ino(sfp); + ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) { *offset = dotdot_offset & 0x7fffffff; return 0; @@ -842,7 +770,7 @@ xfs_dir2_sf_getdents( * Loop while there are more entries and put'ing works. */ sfep = xfs_dir2_sf_firstentry(sfp); - for (i = 0; i < sfp->count; i++) { + for (i = 0; i < sfp->hdr.count; i++) { off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, xfs_dir2_sf_get_offset(sfep)); @@ -851,7 +779,7 @@ xfs_dir2_sf_getdents( continue; } - ino = xfs_dir2_sfe_get_ino(sfp, sfep); + ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); if (filldir(dirent, (char *)sfep->name, sfep->namelen, off & 0x7fffffff, ino, DT_UNKNOWN)) { *offset = off & 0x7fffffff; @@ -877,7 +805,7 @@ xfs_dir2_sf_lookup( int i; /* entry index */ int error; xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ enum xfs_dacmp cmp; /* comparison result */ xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */ @@ -896,8 +824,8 @@ xfs_dir2_sf_lookup( } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* * Special case for . */ @@ -911,7 +839,7 @@ xfs_dir2_sf_lookup( */ if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { - args->inumber = xfs_dir2_sf_get_parent_ino(sfp); + args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); args->cmpresult = XFS_CMP_EXACT; return XFS_ERROR(EEXIST); } @@ -919,7 +847,7 @@ xfs_dir2_sf_lookup( * Loop over all the entries trying to match ours. */ ci_sfep = NULL; - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { /* * Compare name and if it's an exact match, return the inode @@ -930,7 +858,8 @@ xfs_dir2_sf_lookup( sfep->namelen); if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { args->cmpresult = cmp; - args->inumber = xfs_dir2_sfe_get_ino(sfp, sfep); + args->inumber = xfs_dir2_sf_get_inumber(sfp, + xfs_dir2_sf_inumberp(sfep)); if (cmp == XFS_CMP_EXACT) return XFS_ERROR(EEXIST); ci_sfep = sfep; @@ -962,7 +891,7 @@ xfs_dir2_sf_removename( int newsize; /* new inode size */ int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ trace_xfs_dir2_sf_removename(args); @@ -979,31 +908,32 @@ xfs_dir2_sf_removename( } ASSERT(dp->i_df.if_bytes == oldsize); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count)); + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* * Loop over the old directory entries. * Find the one we're deleting. */ - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { - ASSERT(xfs_dir2_sfe_get_ino(sfp, sfep) == - args->inumber); + ASSERT(xfs_dir2_sf_get_inumber(sfp, + xfs_dir2_sf_inumberp(sfep)) == + args->inumber); break; } } /* * Didn't find it. */ - if (i == sfp->count) + if (i == sfp->hdr.count) return XFS_ERROR(ENOENT); /* * Calculate sizes. */ byteoff = (int)((char *)sfep - (char *)sfp); - entsize = xfs_dir2_sf_entsize(sfp, args->namelen); + entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen); newsize = oldsize - entsize; /* * Copy the part if any after the removed entry, sliding it down. @@ -1014,22 +944,22 @@ xfs_dir2_sf_removename( /* * Fix up the header and file size. */ - sfp->count--; + sfp->hdr.count--; dp->i_d.di_size = newsize; /* * Reallocate, making it smaller. */ xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; #if XFS_BIG_INUMS /* * Are we changing inode number size? */ if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) { - if (sfp->i8count == 1) + if (sfp->hdr.i8count == 1) xfs_dir2_sf_toino4(args); else - sfp->i8count--; + sfp->hdr.i8count--; } #endif xfs_dir2_sf_check(args); @@ -1053,7 +983,7 @@ xfs_dir2_sf_replace( int i8elevated; /* sf_toino8 set i8count=1 */ #endif xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ trace_xfs_dir2_sf_replace(args); @@ -1069,19 +999,19 @@ xfs_dir2_sf_replace( } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); #if XFS_BIG_INUMS /* * New inode number is large, and need to convert to 8-byte inodes. */ - if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) { int error; /* error return value */ int newsize; /* new inode size */ newsize = dp->i_df.if_bytes + - (sfp->count + 1) * + (sfp->hdr.count + 1) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); /* @@ -1099,7 +1029,7 @@ xfs_dir2_sf_replace( */ xfs_dir2_sf_toino8(args); i8elevated = 1; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; } else i8elevated = 0; #endif @@ -1110,32 +1040,34 @@ xfs_dir2_sf_replace( if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { #if XFS_BIG_INUMS || defined(DEBUG) - ino = xfs_dir2_sf_get_parent_ino(sfp); + ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); ASSERT(args->inumber != ino); #endif - xfs_dir2_sf_put_parent_ino(sfp, args->inumber); + xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent); } /* * Normal entry, look for the name. */ else { for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); - i < sfp->count; + i < sfp->hdr.count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { #if XFS_BIG_INUMS || defined(DEBUG) - ino = xfs_dir2_sfe_get_ino(sfp, sfep); + ino = xfs_dir2_sf_get_inumber(sfp, + xfs_dir2_sf_inumberp(sfep)); ASSERT(args->inumber != ino); #endif - xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber); + xfs_dir2_sf_put_inumber(sfp, &args->inumber, + xfs_dir2_sf_inumberp(sfep)); break; } } /* * Didn't find it. */ - if (i == sfp->count) { + if (i == sfp->hdr.count) { ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); #if XFS_BIG_INUMS if (i8elevated) @@ -1153,10 +1085,10 @@ xfs_dir2_sf_replace( /* * And the old count was one, so need to convert to small. */ - if (sfp->i8count == 1) + if (sfp->hdr.i8count == 1) xfs_dir2_sf_toino4(args); else - sfp->i8count--; + sfp->hdr.i8count--; } /* * See if the old number was small, the new number is large. @@ -1167,9 +1099,9 @@ xfs_dir2_sf_replace( * add to the i8count unless we just converted to 8-byte * inodes (which does an implied i8count = 1) */ - ASSERT(sfp->i8count != 0); + ASSERT(sfp->hdr.i8count != 0); if (!i8elevated) - sfp->i8count++; + sfp->hdr.i8count++; } #endif xfs_dir2_sf_check(args); @@ -1189,12 +1121,13 @@ xfs_dir2_sf_toino4( char *buf; /* old dir's buffer */ xfs_inode_t *dp; /* incore directory inode */ int i; /* entry index */ + xfs_ino_t ino; /* entry inode number */ int newsize; /* new inode size */ xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ - xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ + xfs_dir2_sf_t *oldsfp; /* old sf directory */ int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* new sf entry */ - xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ + xfs_dir2_sf_t *sfp; /* new sf directory */ trace_xfs_dir2_sf_toino4(args); @@ -1207,42 +1140,44 @@ xfs_dir2_sf_toino4( */ oldsize = dp->i_df.if_bytes; buf = kmem_alloc(oldsize, KM_SLEEP); - oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsfp->i8count == 1); + oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsfp->hdr.i8count == 1); memcpy(buf, oldsfp, oldsize); /* * Compute the new inode size. */ newsize = oldsize - - (oldsfp->count + 1) * + (oldsfp->hdr.count + 1) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); /* * Reset our pointers, the data has moved. */ - oldsfp = (xfs_dir2_sf_hdr_t *)buf; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + oldsfp = (xfs_dir2_sf_t *)buf; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; /* * Fill in the new header. */ - sfp->count = oldsfp->count; - sfp->i8count = 0; - xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); + sfp->hdr.count = oldsfp->hdr.count; + sfp->hdr.i8count = 0; + ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent); + xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent); /* * Copy the entries field by field. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), oldsfep = xfs_dir2_sf_firstentry(oldsfp); - i < sfp->count; + i < sfp->hdr.count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep), oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; sfep->offset = oldsfep->offset; memcpy(sfep->name, oldsfep->name, sfep->namelen); - xfs_dir2_sfe_put_ino(sfp, sfep, - xfs_dir2_sfe_get_ino(oldsfp, oldsfep)); + ino = xfs_dir2_sf_get_inumber(oldsfp, + xfs_dir2_sf_inumberp(oldsfep)); + xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep)); } /* * Clean up the inode. @@ -1264,12 +1199,13 @@ xfs_dir2_sf_toino8( char *buf; /* old dir's buffer */ xfs_inode_t *dp; /* incore directory inode */ int i; /* entry index */ + xfs_ino_t ino; /* entry inode number */ int newsize; /* new inode size */ xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ - xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ + xfs_dir2_sf_t *oldsfp; /* old sf directory */ int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* new sf entry */ - xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ + xfs_dir2_sf_t *sfp; /* new sf directory */ trace_xfs_dir2_sf_toino8(args); @@ -1282,42 +1218,44 @@ xfs_dir2_sf_toino8( */ oldsize = dp->i_df.if_bytes; buf = kmem_alloc(oldsize, KM_SLEEP); - oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsfp->i8count == 0); + oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsfp->hdr.i8count == 0); memcpy(buf, oldsfp, oldsize); /* * Compute the new inode size. */ newsize = oldsize + - (oldsfp->count + 1) * + (oldsfp->hdr.count + 1) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); /* * Reset our pointers, the data has moved. */ - oldsfp = (xfs_dir2_sf_hdr_t *)buf; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + oldsfp = (xfs_dir2_sf_t *)buf; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; /* * Fill in the new header. */ - sfp->count = oldsfp->count; - sfp->i8count = 1; - xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); + sfp->hdr.count = oldsfp->hdr.count; + sfp->hdr.i8count = 1; + ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent); + xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent); /* * Copy the entries field by field. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), oldsfep = xfs_dir2_sf_firstentry(oldsfp); - i < sfp->count; + i < sfp->hdr.count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep), oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; sfep->offset = oldsfep->offset; memcpy(sfep->name, oldsfep->name, sfep->namelen); - xfs_dir2_sfe_put_ino(sfp, sfep, - xfs_dir2_sfe_get_ino(oldsfp, oldsfep)); + ino = xfs_dir2_sf_get_inumber(oldsfp, + xfs_dir2_sf_inumberp(oldsfep)); + xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep)); } /* * Clean up the inode. diff --git a/trunk/fs/xfs/xfs_dir2_sf.h b/trunk/fs/xfs/xfs_dir2_sf.h new file mode 100644 index 000000000000..6ac44b550d39 --- /dev/null +++ b/trunk/fs/xfs/xfs_dir2_sf.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DIR2_SF_H__ +#define __XFS_DIR2_SF_H__ + +/* + * Directory layout when stored internal to an inode. + * + * Small directories are packed as tightly as possible so as to + * fit into the literal area of the inode. + */ + +struct uio; +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_dir2_block; +struct xfs_inode; +struct xfs_mount; +struct xfs_trans; + +/* + * Inode number stored as 8 8-bit values. + */ +typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t; + +/* + * Inode number stored as 4 8-bit values. + * Works a lot of the time, when all the inode numbers in a directory + * fit in 32 bits. + */ +typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t; + +typedef union { + xfs_dir2_ino8_t i8; + xfs_dir2_ino4_t i4; +} xfs_dir2_inou_t; +#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL) + +/* + * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t. + * Only need 16 bits, this is the byte offset into the single block form. + */ +typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t; + +/* + * The parent directory has a dedicated field, and the self-pointer must + * be calculated on the fly. + * + * Entries are packed toward the top as tightly as possible. The header + * and the elements must be memcpy'd out into a work area to get correct + * alignment for the inode number fields. + */ +typedef struct xfs_dir2_sf_hdr { + __uint8_t count; /* count of entries */ + __uint8_t i8count; /* count of 8-byte inode #s */ + xfs_dir2_inou_t parent; /* parent dir inode number */ +} __arch_pack xfs_dir2_sf_hdr_t; + +typedef struct xfs_dir2_sf_entry { + __uint8_t namelen; /* actual name length */ + xfs_dir2_sf_off_t offset; /* saved offset */ + __uint8_t name[1]; /* name, variable size */ + xfs_dir2_inou_t inumber; /* inode number, var. offset */ +} __arch_pack xfs_dir2_sf_entry_t; + +typedef struct xfs_dir2_sf { + xfs_dir2_sf_hdr_t hdr; /* shortform header */ + xfs_dir2_sf_entry_t list[1]; /* shortform entries */ +} xfs_dir2_sf_t; + +static inline int xfs_dir2_sf_hdr_size(int i8count) +{ + return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \ + ((i8count) == 0) * \ + ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); +} + +static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep) +{ + return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen]; +} + +static inline xfs_intino_t +xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from) +{ + return ((sfp)->hdr.i8count == 0 ? \ + (xfs_intino_t)XFS_GET_DIR_INO4((from)->i4) : \ + (xfs_intino_t)XFS_GET_DIR_INO8((from)->i8)); +} + +static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from, + xfs_dir2_inou_t *to) +{ + if ((sfp)->hdr.i8count == 0) + XFS_PUT_DIR_INO4(*(from), (to)->i4); + else + XFS_PUT_DIR_INO8(*(from), (to)->i8); +} + +static inline xfs_dir2_data_aoff_t +xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep) +{ + return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i); +} + +static inline void +xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off) +{ + INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off); +} + +static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len) +{ + return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \ + ((sfp)->hdr.i8count == 0) * \ + ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); +} + +static inline int +xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep) +{ + return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (sfep)->namelen - \ + ((sfp)->hdr.i8count == 0) * \ + ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); +} + +static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp) +{ + return ((xfs_dir2_sf_entry_t *) \ + ((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count))); +} + +static inline xfs_dir2_sf_entry_t * +xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep) +{ + return ((xfs_dir2_sf_entry_t *) \ + ((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep))); +} + +/* + * Functions. + */ +extern int xfs_dir2_block_sfsize(struct xfs_inode *dp, + struct xfs_dir2_block *block, + xfs_dir2_sf_hdr_t *sfhp); +extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp, + int size, xfs_dir2_sf_hdr_t *sfhp); +extern int xfs_dir2_sf_addname(struct xfs_da_args *args); +extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); +extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent, + xfs_off_t *offset, filldir_t filldir); +extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); +extern int xfs_dir2_sf_removename(struct xfs_da_args *args); +extern int xfs_dir2_sf_replace(struct xfs_da_args *args); + +#endif /* __XFS_DIR2_SF_H__ */ diff --git a/trunk/fs/xfs/xfs_fs.h b/trunk/fs/xfs/xfs_fs.h index c13fed8c394a..8f6fc1a96386 100644 --- a/trunk/fs/xfs/xfs_fs.h +++ b/trunk/fs/xfs/xfs_fs.h @@ -249,11 +249,6 @@ typedef struct xfs_fsop_resblks { #define XFS_MAX_LOG_BYTES \ ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) -/* Used for sanity checks on superblock */ -#define XFS_MAX_DBLOCKS(s) ((xfs_drfsbno_t)(s)->sb_agcount * (s)->sb_agblocks) -#define XFS_MIN_DBLOCKS(s) ((xfs_drfsbno_t)((s)->sb_agcount - 1) * \ - (s)->sb_agblocks + XFS_MIN_AG_BLOCKS) - /* * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT */ diff --git a/trunk/fs/xfs/xfs_ialloc.c b/trunk/fs/xfs/xfs_ialloc.c index dd5628bd8d0b..84ebeec16642 100644 --- a/trunk/fs/xfs/xfs_ialloc.c +++ b/trunk/fs/xfs/xfs_ialloc.c @@ -683,7 +683,7 @@ xfs_dialloc( return 0; } agi = XFS_BUF_TO_AGI(agbp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); + ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); } else { /* * Continue where we left off before. In this case, we @@ -691,7 +691,7 @@ xfs_dialloc( */ agbp = *IO_agbp; agi = XFS_BUF_TO_AGI(agbp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); + ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); ASSERT(be32_to_cpu(agi->agi_freecount) > 0); } mp = tp->t_mountp; @@ -775,7 +775,7 @@ xfs_dialloc( if (error) goto nextag; agi = XFS_BUF_TO_AGI(agbp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); + ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); } /* * Here with an allocation group that has a free inode. @@ -944,7 +944,7 @@ xfs_dialloc( * See if the most recently allocated block has any free. */ newino: - if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { + if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), XFS_LOOKUP_EQ, &i); if (error) @@ -1085,7 +1085,7 @@ xfs_difree( return error; } agi = XFS_BUF_TO_AGI(agbp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); + ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); ASSERT(agbno < be32_to_cpu(agi->agi_length)); /* * Initialize the cursor. @@ -1438,7 +1438,7 @@ xfs_ialloc_log_agi( xfs_agi_t *agi; /* allocation group header */ agi = XFS_BUF_TO_AGI(bp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); + ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); #endif /* * Compute byte offsets for the first and last fields. @@ -1492,7 +1492,7 @@ xfs_read_agi( /* * Validate the magic number of the agi block. */ - agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) && + agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) && be32_to_cpu(agi->agi_seqno) == agno; if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, diff --git a/trunk/fs/xfs/xfs_ialloc_btree.c b/trunk/fs/xfs/xfs_ialloc_btree.c index c6a75815aea0..16921f55c542 100644 --- a/trunk/fs/xfs/xfs_ialloc_btree.c +++ b/trunk/fs/xfs/xfs_ialloc_btree.c @@ -31,6 +31,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" @@ -204,6 +205,72 @@ xfs_inobt_recs_inorder( } #endif /* DEBUG */ +#ifdef XFS_BTREE_TRACE +ktrace_t *xfs_inobt_trace_buf; + +STATIC void +xfs_inobt_trace_enter( + struct xfs_btree_cur *cur, + const char *func, + char *s, + int type, + int line, + __psunsigned_t a0, + __psunsigned_t a1, + __psunsigned_t a2, + __psunsigned_t a3, + __psunsigned_t a4, + __psunsigned_t a5, + __psunsigned_t a6, + __psunsigned_t a7, + __psunsigned_t a8, + __psunsigned_t a9, + __psunsigned_t a10) +{ + ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type, + (void *)func, (void *)s, NULL, (void *)cur, + (void *)a0, (void *)a1, (void *)a2, (void *)a3, + (void *)a4, (void *)a5, (void *)a6, (void *)a7, + (void *)a8, (void *)a9, (void *)a10); +} + +STATIC void +xfs_inobt_trace_cursor( + struct xfs_btree_cur *cur, + __uint32_t *s0, + __uint64_t *l0, + __uint64_t *l1) +{ + *s0 = cur->bc_private.a.agno; + *l0 = cur->bc_rec.i.ir_startino; + *l1 = cur->bc_rec.i.ir_free; +} + +STATIC void +xfs_inobt_trace_key( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + __uint64_t *l0, + __uint64_t *l1) +{ + *l0 = be32_to_cpu(key->inobt.ir_startino); + *l1 = 0; +} + +STATIC void +xfs_inobt_trace_record( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + __uint64_t *l0, + __uint64_t *l1, + __uint64_t *l2) +{ + *l0 = be32_to_cpu(rec->inobt.ir_startino); + *l1 = be32_to_cpu(rec->inobt.ir_freecount); + *l2 = be64_to_cpu(rec->inobt.ir_free); +} +#endif /* XFS_BTREE_TRACE */ + static const struct xfs_btree_ops xfs_inobt_ops = { .rec_len = sizeof(xfs_inobt_rec_t), .key_len = sizeof(xfs_inobt_key_t), @@ -219,10 +286,18 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .init_rec_from_cur = xfs_inobt_init_rec_from_cur, .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, .key_diff = xfs_inobt_key_diff, + #ifdef DEBUG .keys_inorder = xfs_inobt_keys_inorder, .recs_inorder = xfs_inobt_recs_inorder, #endif + +#ifdef XFS_BTREE_TRACE + .trace_enter = xfs_inobt_trace_enter, + .trace_cursor = xfs_inobt_trace_cursor, + .trace_key = xfs_inobt_trace_key, + .trace_record = xfs_inobt_trace_record, +#endif }; /* diff --git a/trunk/fs/xfs/xfs_iget.c b/trunk/fs/xfs/xfs_iget.c index 7759812c1bbe..3631783b2b53 100644 --- a/trunk/fs/xfs/xfs_iget.c +++ b/trunk/fs/xfs/xfs_iget.c @@ -38,6 +38,7 @@ #include "xfs_trans_priv.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" +#include "xfs_btree_trace.h" #include "xfs_trace.h" diff --git a/trunk/fs/xfs/xfs_inode.c b/trunk/fs/xfs/xfs_inode.c index 3cc21ddf9f7e..a098a20ca63e 100644 --- a/trunk/fs/xfs/xfs_inode.c +++ b/trunk/fs/xfs/xfs_inode.c @@ -37,6 +37,7 @@ #include "xfs_buf_item.h" #include "xfs_inode_item.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" @@ -51,7 +52,7 @@ kmem_zone_t *xfs_ifork_zone; kmem_zone_t *xfs_inode_zone; /* - * Used in xfs_itruncate_extents(). This is the maximum number of extents + * Used in xfs_itruncate(). This is the maximum number of extents * freed from a file in a single transaction. */ #define XFS_ITRUNC_MAX_EXTENTS 2 @@ -166,7 +167,7 @@ xfs_imap_to_bp( dip = (xfs_dinode_t *)xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); - di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && + di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC && XFS_DINODE_GOOD_VERSION(dip->di_version); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, @@ -801,7 +802,7 @@ xfs_iread( * If we got something that isn't an inode it means someone * (nfs or dmi) has a stale handle. */ - if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) { + if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { #ifdef DEBUG xfs_alert(mp, "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)", @@ -1178,15 +1179,15 @@ xfs_ialloc( * at least do it for regular files. */ #ifdef DEBUG -STATIC void +void xfs_isize_check( - struct xfs_inode *ip, - xfs_fsize_t isize) + xfs_mount_t *mp, + xfs_inode_t *ip, + xfs_fsize_t isize) { - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t map_first; - int nimaps; - xfs_bmbt_irec_t imaps[2]; + xfs_fileoff_t map_first; + int nimaps; + xfs_bmbt_irec_t imaps[2]; if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) return; @@ -1213,14 +1214,168 @@ xfs_isize_check( ASSERT(nimaps == 1); ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); } -#else /* DEBUG */ -#define xfs_isize_check(ip, isize) #endif /* DEBUG */ /* - * Free up the underlying blocks past new_size. The new size must be smaller - * than the current size. This routine can be used both for the attribute and - * data fork, and does not modify the inode size, which is left to the caller. + * Calculate the last possible buffered byte in a file. This must + * include data that was buffered beyond the EOF by the write code. + * This also needs to deal with overflowing the xfs_fsize_t type + * which can happen for sizes near the limit. + * + * We also need to take into account any blocks beyond the EOF. It + * may be the case that they were buffered by a write which failed. + * In that case the pages will still be in memory, but the inode size + * will never have been updated. + */ +STATIC xfs_fsize_t +xfs_file_last_byte( + xfs_inode_t *ip) +{ + xfs_mount_t *mp; + xfs_fsize_t last_byte; + xfs_fileoff_t last_block; + xfs_fileoff_t size_last_block; + int error; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)); + + mp = ip->i_mount; + /* + * Only check for blocks beyond the EOF if the extents have + * been read in. This eliminates the need for the inode lock, + * and it also saves us from looking when it really isn't + * necessary. + */ + if (ip->i_df.if_flags & XFS_IFEXTENTS) { + xfs_ilock(ip, XFS_ILOCK_SHARED); + error = xfs_bmap_last_offset(NULL, ip, &last_block, + XFS_DATA_FORK); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + if (error) { + last_block = 0; + } + } else { + last_block = 0; + } + size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size); + last_block = XFS_FILEOFF_MAX(last_block, size_last_block); + + last_byte = XFS_FSB_TO_B(mp, last_block); + if (last_byte < 0) { + return XFS_MAXIOFFSET(mp); + } + last_byte += (1 << mp->m_writeio_log); + if (last_byte < 0) { + return XFS_MAXIOFFSET(mp); + } + return last_byte; +} + +/* + * Start the truncation of the file to new_size. The new size + * must be smaller than the current size. This routine will + * clear the buffer and page caches of file data in the removed + * range, and xfs_itruncate_finish() will remove the underlying + * disk blocks. + * + * The inode must have its I/O lock locked EXCLUSIVELY, and it + * must NOT have the inode lock held at all. This is because we're + * calling into the buffer/page cache code and we can't hold the + * inode lock when we do so. + * + * We need to wait for any direct I/Os in flight to complete before we + * proceed with the truncate. This is needed to prevent the extents + * being read or written by the direct I/Os from being removed while the + * I/O is in flight as there is no other method of synchronising + * direct I/O with the truncate operation. Also, because we hold + * the IOLOCK in exclusive mode, we prevent new direct I/Os from being + * started until the truncate completes and drops the lock. Essentially, + * the xfs_ioend_wait() call forms an I/O barrier that provides strict + * ordering between direct I/Os and the truncate operation. + * + * The flags parameter can have either the value XFS_ITRUNC_DEFINITE + * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used + * in the case that the caller is locking things out of order and + * may not be able to call xfs_itruncate_finish() with the inode lock + * held without dropping the I/O lock. If the caller must drop the + * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start() + * must be called again with all the same restrictions as the initial + * call. + */ +int +xfs_itruncate_start( + xfs_inode_t *ip, + uint flags, + xfs_fsize_t new_size) +{ + xfs_fsize_t last_byte; + xfs_off_t toss_start; + xfs_mount_t *mp; + int error = 0; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + ASSERT((new_size == 0) || (new_size <= ip->i_size)); + ASSERT((flags == XFS_ITRUNC_DEFINITE) || + (flags == XFS_ITRUNC_MAYBE)); + + mp = ip->i_mount; + + /* wait for the completion of any pending DIOs */ + if (new_size == 0 || new_size < ip->i_size) + xfs_ioend_wait(ip); + + /* + * Call toss_pages or flushinval_pages to get rid of pages + * overlapping the region being removed. We have to use + * the less efficient flushinval_pages in the case that the + * caller may not be able to finish the truncate without + * dropping the inode's I/O lock. Make sure + * to catch any pages brought in by buffers overlapping + * the EOF by searching out beyond the isize by our + * block size. We round new_size up to a block boundary + * so that we don't toss things on the same block as + * new_size but before it. + * + * Before calling toss_page or flushinval_pages, make sure to + * call remapf() over the same region if the file is mapped. + * This frees up mapped file references to the pages in the + * given range and for the flushinval_pages case it ensures + * that we get the latest mapped changes flushed out. + */ + toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); + toss_start = XFS_FSB_TO_B(mp, toss_start); + if (toss_start < 0) { + /* + * The place to start tossing is beyond our maximum + * file size, so there is no way that the data extended + * out there. + */ + return 0; + } + last_byte = xfs_file_last_byte(ip); + trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte); + if (last_byte > toss_start) { + if (flags & XFS_ITRUNC_DEFINITE) { + xfs_tosspages(ip, toss_start, + -1, FI_REMAPF_LOCKED); + } else { + error = xfs_flushinval_pages(ip, toss_start, + -1, FI_REMAPF_LOCKED); + } + } + +#ifdef DEBUG + if (new_size == 0) { + ASSERT(VN_CACHED(VFS_I(ip)) == 0); + } +#endif + return error; +} + +/* + * Shrink the file to the given new_size. The new size must be smaller than + * the current size. This will free up the underlying blocks in the removed + * range after a call to xfs_itruncate_start() or xfs_atruncate_start(). * * The transaction passed to this routine must have made a permanent log * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the @@ -1232,6 +1387,31 @@ xfs_isize_check( * will be "held" within the returned transaction. This routine does NOT * require any disk space to be reserved for it within the transaction. * + * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it + * indicates the fork which is to be truncated. For the attribute fork we only + * support truncation to size 0. + * + * We use the sync parameter to indicate whether or not the first transaction + * we perform might have to be synchronous. For the attr fork, it needs to be + * so if the unlink of the inode is not yet known to be permanent in the log. + * This keeps us from freeing and reusing the blocks of the attribute fork + * before the unlink of the inode becomes permanent. + * + * For the data fork, we normally have to run synchronously if we're being + * called out of the inactive path or we're being called out of the create path + * where we're truncating an existing file. Either way, the truncate needs to + * be sync so blocks don't reappear in the file with altered data in case of a + * crash. wsync filesystems can run the first case async because anything that + * shrinks the inode has to run sync so by the time we're called here from + * inactive, the inode size is permanently set to 0. + * + * Calls from the truncate path always need to be sync unless we're in a wsync + * filesystem and the file has already been unlinked. + * + * The caller is responsible for correctly setting the sync parameter. It gets + * too hard for us to guess here which path we're being called out of just + * based on inode state. + * * If we get an error, we must return with the inode locked and linked into the * current transaction. This keeps things simple for the higher level code, * because it always knows that the inode is locked and held in the transaction @@ -1239,30 +1419,124 @@ xfs_isize_check( * dirty on error so that transactions can be easily aborted if possible. */ int -xfs_itruncate_extents( - struct xfs_trans **tpp, - struct xfs_inode *ip, - int whichfork, - xfs_fsize_t new_size) +xfs_itruncate_finish( + xfs_trans_t **tp, + xfs_inode_t *ip, + xfs_fsize_t new_size, + int fork, + int sync) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp = *tpp; - struct xfs_trans *ntp; - xfs_bmap_free_t free_list; - xfs_fsblock_t first_block; - xfs_fileoff_t first_unmap_block; - xfs_fileoff_t last_block; - xfs_filblks_t unmap_len; - int committed; - int error = 0; - int done = 0; + xfs_fsblock_t first_block; + xfs_fileoff_t first_unmap_block; + xfs_fileoff_t last_block; + xfs_filblks_t unmap_len=0; + xfs_mount_t *mp; + xfs_trans_t *ntp; + int done; + int committed; + xfs_bmap_free_t free_list; + int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); - ASSERT(new_size <= ip->i_size); - ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); + ASSERT((new_size == 0) || (new_size <= ip->i_size)); + ASSERT(*tp != NULL); + ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); + ASSERT(ip->i_transp == *tp); ASSERT(ip->i_itemp != NULL); ASSERT(ip->i_itemp->ili_lock_flags == 0); - ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); + + + ntp = *tp; + mp = (ntp)->t_mountp; + ASSERT(! XFS_NOT_DQATTACHED(mp, ip)); + + /* + * We only support truncating the entire attribute fork. + */ + if (fork == XFS_ATTR_FORK) { + new_size = 0LL; + } + first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); + trace_xfs_itruncate_finish_start(ip, new_size); + + /* + * The first thing we do is set the size to new_size permanently + * on disk. This way we don't have to worry about anyone ever + * being able to look at the data being freed even in the face + * of a crash. What we're getting around here is the case where + * we free a block, it is allocated to another file, it is written + * to, and then we crash. If the new data gets written to the + * file but the log buffers containing the free and reallocation + * don't, then we'd end up with garbage in the blocks being freed. + * As long as we make the new_size permanent before actually + * freeing any blocks it doesn't matter if they get written to. + * + * The callers must signal into us whether or not the size + * setting here must be synchronous. There are a few cases + * where it doesn't have to be synchronous. Those cases + * occur if the file is unlinked and we know the unlink is + * permanent or if the blocks being truncated are guaranteed + * to be beyond the inode eof (regardless of the link count) + * and the eof value is permanent. Both of these cases occur + * only on wsync-mounted filesystems. In those cases, we're + * guaranteed that no user will ever see the data in the blocks + * that are being truncated so the truncate can run async. + * In the free beyond eof case, the file may wind up with + * more blocks allocated to it than it needs if we crash + * and that won't get fixed until the next time the file + * is re-opened and closed but that's ok as that shouldn't + * be too many blocks. + * + * However, we can't just make all wsync xactions run async + * because there's one call out of the create path that needs + * to run sync where it's truncating an existing file to size + * 0 whose size is > 0. + * + * It's probably possible to come up with a test in this + * routine that would correctly distinguish all the above + * cases from the values of the function parameters and the + * inode state but for sanity's sake, I've decided to let the + * layers above just tell us. It's simpler to correctly figure + * out in the layer above exactly under what conditions we + * can run async and I think it's easier for others read and + * follow the logic in case something has to be changed. + * cscope is your friend -- rcc. + * + * The attribute fork is much simpler. + * + * For the attribute fork we allow the caller to tell us whether + * the unlink of the inode that led to this call is yet permanent + * in the on disk log. If it is not and we will be freeing extents + * in this inode then we make the first transaction synchronous + * to make sure that the unlink is permanent by the time we free + * the blocks. + */ + if (fork == XFS_DATA_FORK) { + if (ip->i_d.di_nextents > 0) { + /* + * If we are not changing the file size then do + * not update the on-disk file size - we may be + * called from xfs_inactive_free_eofblocks(). If we + * update the on-disk file size and then the system + * crashes before the contents of the file are + * flushed to disk then the files may be full of + * holes (ie NULL files bug). + */ + if (ip->i_size != new_size) { + ip->i_d.di_size = new_size; + ip->i_size = new_size; + xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); + } + } + } else if (sync) { + ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC)); + if (ip->i_d.di_anextents > 0) + xfs_trans_set_sync(ntp); + } + ASSERT(fork == XFS_DATA_FORK || + (fork == XFS_ATTR_FORK && + ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) || + (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC))))); /* * Since it is possible for space to become allocated beyond @@ -1273,142 +1547,128 @@ xfs_itruncate_extents( * beyond the maximum file size (ie it is the same as last_block), * then there is nothing to do. */ - first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); - if (first_unmap_block == last_block) - return 0; - - ASSERT(first_unmap_block < last_block); - unmap_len = last_block - first_unmap_block + 1; + ASSERT(first_unmap_block <= last_block); + done = 0; + if (last_block == first_unmap_block) { + done = 1; + } else { + unmap_len = last_block - first_unmap_block + 1; + } while (!done) { + /* + * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi() + * will tell us whether it freed the entire range or + * not. If this is a synchronous mount (wsync), + * then we can tell bunmapi to keep all the + * transactions asynchronous since the unlink + * transaction that made this inode inactive has + * already hit the disk. There's no danger of + * the freed blocks being reused, there being a + * crash, and the reused blocks suddenly reappearing + * in this file with garbage in them once recovery + * runs. + */ xfs_bmap_init(&free_list, &first_block); - error = xfs_bunmapi(tp, ip, + error = xfs_bunmapi(ntp, ip, first_unmap_block, unmap_len, - xfs_bmapi_aflag(whichfork), + xfs_bmapi_aflag(fork), XFS_ITRUNC_MAX_EXTENTS, &first_block, &free_list, &done); - if (error) - goto out_bmap_cancel; + if (error) { + /* + * If the bunmapi call encounters an error, + * return to the caller where the transaction + * can be properly aborted. We just need to + * make sure we're not holding any resources + * that we were not when we came in. + */ + xfs_bmap_cancel(&free_list); + return error; + } /* * Duplicate the transaction that has the permanent * reservation and commit the old transaction. */ - error = xfs_bmap_finish(&tp, &free_list, &committed); + error = xfs_bmap_finish(tp, &free_list, &committed); + ntp = *tp; if (committed) - xfs_trans_ijoin(tp, ip); - if (error) - goto out_bmap_cancel; + xfs_trans_ijoin(ntp, ip); + + if (error) { + /* + * If the bmap finish call encounters an error, return + * to the caller where the transaction can be properly + * aborted. We just need to make sure we're not + * holding any resources that we were not when we came + * in. + * + * Aborting from this point might lose some blocks in + * the file system, but oh well. + */ + xfs_bmap_cancel(&free_list); + return error; + } if (committed) { /* * Mark the inode dirty so it will be logged and * moved forward in the log as part of every commit. */ - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); } - ntp = xfs_trans_dup(tp); - error = xfs_trans_commit(tp, 0); - tp = ntp; + ntp = xfs_trans_dup(ntp); + error = xfs_trans_commit(*tp, 0); + *tp = ntp; - xfs_trans_ijoin(tp, ip); + xfs_trans_ijoin(ntp, ip); if (error) - goto out; - + return error; /* - * Transaction commit worked ok so we can drop the extra ticket + * transaction commit worked ok so we can drop the extra ticket * reference that we gained in xfs_trans_dup() */ - xfs_log_ticket_put(tp->t_ticket); - error = xfs_trans_reserve(tp, 0, + xfs_log_ticket_put(ntp->t_ticket); + error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT); if (error) - goto out; + return error; } - -out: - *tpp = tp; - return error; -out_bmap_cancel: /* - * If the bunmapi call encounters an error, return to the caller where - * the transaction can be properly aborted. We just need to make sure - * we're not holding any resources that we were not when we came in. + * Only update the size in the case of the data fork, but + * always re-log the inode so that our permanent transaction + * can keep on rolling it forward in the log. */ - xfs_bmap_cancel(&free_list); - goto out; -} - -int -xfs_itruncate_data( - struct xfs_trans **tpp, - struct xfs_inode *ip, - xfs_fsize_t new_size) -{ - int error; - - trace_xfs_itruncate_data_start(ip, new_size); - - /* - * The first thing we do is set the size to new_size permanently on - * disk. This way we don't have to worry about anyone ever being able - * to look at the data being freed even in the face of a crash. - * What we're getting around here is the case where we free a block, it - * is allocated to another file, it is written to, and then we crash. - * If the new data gets written to the file but the log buffers - * containing the free and reallocation don't, then we'd end up with - * garbage in the blocks being freed. As long as we make the new_size - * permanent before actually freeing any blocks it doesn't matter if - * they get written to. - */ - if (ip->i_d.di_nextents > 0) { + if (fork == XFS_DATA_FORK) { + xfs_isize_check(mp, ip, new_size); /* - * If we are not changing the file size then do not update - * the on-disk file size - we may be called from - * xfs_inactive_free_eofblocks(). If we update the on-disk - * file size and then the system crashes before the contents - * of the file are flushed to disk then the files may be - * full of holes (ie NULL files bug). + * If we are not changing the file size then do + * not update the on-disk file size - we may be + * called from xfs_inactive_free_eofblocks(). If we + * update the on-disk file size and then the system + * crashes before the contents of the file are + * flushed to disk then the files may be full of + * holes (ie NULL files bug). */ if (ip->i_size != new_size) { ip->i_d.di_size = new_size; ip->i_size = new_size; - xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); } } - - error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size); - if (error) - return error; - - /* - * If we are not changing the file size then do not update the on-disk - * file size - we may be called from xfs_inactive_free_eofblocks(). - * If we update the on-disk file size and then the system crashes - * before the contents of the file are flushed to disk then the files - * may be full of holes (ie NULL files bug). - */ - xfs_isize_check(ip, new_size); - if (ip->i_size != new_size) { - ip->i_d.di_size = new_size; - ip->i_size = new_size; - } - - ASSERT(new_size != 0 || ip->i_delayed_blks == 0); - ASSERT(new_size != 0 || ip->i_d.di_nextents == 0); - - /* - * Always re-log the inode so that our permanent transaction can keep - * on rolling it forward in the log. - */ - xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); - - trace_xfs_itruncate_data_end(ip, new_size); + xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); + ASSERT((new_size != 0) || + (fork == XFS_ATTR_FORK) || + (ip->i_delayed_blks == 0)); + ASSERT((new_size != 0) || + (fork == XFS_ATTR_FORK) || + (ip->i_d.di_nextents == 0)); + trace_xfs_itruncate_finish_end(ip, new_size); return 0; } @@ -1434,6 +1694,7 @@ xfs_iunlink( ASSERT(ip->i_d.di_nlink == 0); ASSERT(ip->i_d.di_mode != 0); + ASSERT(ip->i_transp == tp); mp = tp->t_mountp; @@ -1456,7 +1717,7 @@ xfs_iunlink( ASSERT(agi->agi_unlinked[bucket_index]); ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); - if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) { + if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) { /* * There is already another inode in the bucket we need * to add ourselves to. Add us at the front of the list. @@ -1467,7 +1728,8 @@ xfs_iunlink( if (error) return error; - ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO)); + ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO); + /* both on-disk, don't endian flip twice */ dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; offset = ip->i_imap.im_boffset + offsetof(xfs_dinode_t, di_next_unlinked); @@ -1532,7 +1794,7 @@ xfs_iunlink_remove( agino = XFS_INO_TO_AGINO(mp, ip->i_ino); ASSERT(agino != 0); bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; - ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)); + ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO); ASSERT(agi->agi_unlinked[bucket_index]); if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { @@ -1697,7 +1959,7 @@ xfs_ifree_cluster( * stale first, we will not attempt to lock them in the loop * below as the XFS_ISTALE flag will be set. */ - lip = bp->b_fspriv; + lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); while (lip) { if (lip->li_type == XFS_LI_INODE) { iip = (xfs_inode_log_item_t *)lip; @@ -1824,6 +2086,7 @@ xfs_ifree( xfs_buf_t *ibp; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(ip->i_transp == tp); ASSERT(ip->i_d.di_nlink == 0); ASSERT(ip->i_d.di_nextents == 0); ASSERT(ip->i_d.di_anextents == 0); @@ -2470,7 +2733,7 @@ xfs_iflush_cluster( * mark the buffer as an error and call them. Otherwise * mark it as stale and brelse. */ - if (bp->b_iodone) { + if (XFS_BUF_IODONE_FUNC(bp)) { XFS_BUF_UNDONE(bp); XFS_BUF_STALE(bp); XFS_BUF_ERROR(bp,EIO); @@ -2657,7 +2920,7 @@ xfs_iflush_int( */ xfs_synchronize_times(ip); - if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), + if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", @@ -2810,8 +3073,8 @@ xfs_iflush_int( */ xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); - ASSERT(bp->b_fspriv != NULL); - ASSERT(bp->b_iodone != NULL); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); } else { /* * We're flushing an inode which is not in the AIL and has diff --git a/trunk/fs/xfs/xfs_inode.h b/trunk/fs/xfs/xfs_inode.h index a97644ab945a..964cfea77686 100644 --- a/trunk/fs/xfs/xfs_inode.h +++ b/trunk/fs/xfs/xfs_inode.h @@ -241,6 +241,7 @@ typedef struct xfs_inode { xfs_ifork_t i_df; /* data fork */ /* Transaction and locking information. */ + struct xfs_trans *i_transp; /* ptr to owning transaction*/ struct xfs_inode_log_item *i_itemp; /* logging information */ mrlock_t i_lock; /* inode lock */ mrlock_t i_iolock; /* inode IO lock */ @@ -456,6 +457,16 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) extern struct lock_class_key xfs_iolock_reclaimable; +/* + * Flags for xfs_itruncate_start(). + */ +#define XFS_ITRUNC_DEFINITE 0x1 +#define XFS_ITRUNC_MAYBE 0x2 + +#define XFS_ITRUNC_FLAGS \ + { XFS_ITRUNC_DEFINITE, "DEFINITE" }, \ + { XFS_ITRUNC_MAYBE, "MAYBE" } + /* * For multiple groups support: if S_ISGID bit is set in the parent * directory, group of new file is set to that of the parent, and @@ -490,10 +501,9 @@ uint xfs_ip2xflags(struct xfs_inode *); uint xfs_dic2xflags(struct xfs_dinode *); int xfs_ifree(struct xfs_trans *, xfs_inode_t *, struct xfs_bmap_free *); -int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *, - int, xfs_fsize_t); -int xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *, - xfs_fsize_t); +int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); +int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, + xfs_fsize_t, int, int); int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); @@ -569,6 +579,13 @@ void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int); #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) +#ifdef DEBUG +void xfs_isize_check(struct xfs_mount *, struct xfs_inode *, + xfs_fsize_t); +#else /* DEBUG */ +#define xfs_isize_check(mp, ip, isize) +#endif /* DEBUG */ + #if defined(DEBUG) void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #else diff --git a/trunk/fs/xfs/xfs_inode_item.c b/trunk/fs/xfs/xfs_inode_item.c index 588406dc6a35..b1e88d56069c 100644 --- a/trunk/fs/xfs/xfs_inode_item.c +++ b/trunk/fs/xfs/xfs_inode_item.c @@ -632,8 +632,13 @@ xfs_inode_item_unlock( struct xfs_inode *ip = iip->ili_inode; unsigned short lock_flags; - ASSERT(ip->i_itemp != NULL); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(iip->ili_inode->i_itemp != NULL); + ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); + + /* + * Clear the transaction pointer in the inode. + */ + ip->i_transp = NULL; /* * If the inode needed a separate buffer with which to log @@ -659,8 +664,8 @@ xfs_inode_item_unlock( lock_flags = iip->ili_lock_flags; iip->ili_lock_flags = 0; if (lock_flags) { - xfs_iunlock(ip, lock_flags); - IRELE(ip); + xfs_iunlock(iip->ili_inode, lock_flags); + IRELE(iip->ili_inode); } } @@ -874,7 +879,7 @@ xfs_iflush_done( * Scan the buffer IO completions for other inodes being completed and * attach them to the current inode log item. */ - blip = bp->b_fspriv; + blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); prev = NULL; while (blip != NULL) { if (lip->li_cb != xfs_iflush_done) { @@ -886,7 +891,7 @@ xfs_iflush_done( /* remove from list */ next = blip->li_bio_list; if (!prev) { - bp->b_fspriv = next; + XFS_BUF_SET_FSPRIVATE(bp, next); } else { prev->li_bio_list = next; } diff --git a/trunk/fs/xfs/xfs_inum.h b/trunk/fs/xfs/xfs_inum.h index b253c0ea5bec..b8e4ee4e89a4 100644 --- a/trunk/fs/xfs/xfs_inum.h +++ b/trunk/fs/xfs/xfs_inum.h @@ -28,6 +28,17 @@ typedef __uint32_t xfs_agino_t; /* within allocation grp inode number */ +/* + * Useful inode bits for this kernel. + * Used in some places where having 64-bits in the 32-bit kernels + * costs too much. + */ +#if XFS_BIG_INUMS +typedef xfs_ino_t xfs_intino_t; +#else +typedef __uint32_t xfs_intino_t; +#endif + #define NULLFSINO ((xfs_ino_t)-1) #define NULLAGINO ((xfs_agino_t)-1) diff --git a/trunk/fs/xfs/xfs_log.c b/trunk/fs/xfs/xfs_log.c index 06ff8437ed8e..41d5b8f2bf92 100644 --- a/trunk/fs/xfs/xfs_log.c +++ b/trunk/fs/xfs/xfs_log.c @@ -871,9 +871,15 @@ xlog_space_left( void xlog_iodone(xfs_buf_t *bp) { - xlog_in_core_t *iclog = bp->b_fspriv; - xlog_t *l = iclog->ic_log; - int aborted = 0; + xlog_in_core_t *iclog; + xlog_t *l; + int aborted; + + iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); + ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2); + XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); + aborted = 0; + l = iclog->ic_log; /* * Race to shutdown the filesystem if we see an error. @@ -1050,9 +1056,10 @@ xlog_alloc_log(xfs_mount_t *mp, bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); if (!bp) goto out_free_log; - bp->b_iodone = xlog_iodone; + XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); + XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(xfs_buf_islocked(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); log->l_xbuf = bp; spin_lock_init(&log->l_icloglock); @@ -1083,8 +1090,10 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_iclog_size, 0); if (!bp) goto out_free_iclog; - - bp->b_iodone = xlog_iodone; + if (!XFS_BUF_CPSEMA(bp)) + ASSERT(0); + XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); + XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); iclog->ic_bp = bp; iclog->ic_data = bp->b_addr; #ifdef DEBUG @@ -1109,7 +1118,7 @@ xlog_alloc_log(xfs_mount_t *mp, iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); - ASSERT(xfs_buf_islocked(iclog->ic_bp)); + ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); init_waitqueue_head(&iclog->ic_force_wait); init_waitqueue_head(&iclog->ic_write_wait); @@ -1245,8 +1254,9 @@ STATIC int xlog_bdstrat( struct xfs_buf *bp) { - struct xlog_in_core *iclog = bp->b_fspriv; + struct xlog_in_core *iclog; + iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); if (iclog->ic_state & XLOG_STATE_IOERROR) { XFS_BUF_ERROR(bp, EIO); XFS_BUF_STALE(bp); @@ -1259,6 +1269,7 @@ xlog_bdstrat( return 0; } + bp->b_flags |= _XBF_RUN_QUEUES; xfs_buf_iorequest(bp); return 0; } @@ -1340,6 +1351,8 @@ xlog_sync(xlog_t *log, } bp = iclog->ic_bp; + ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1); + XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); XFS_STATS_ADD(xs_log_blocks, BTOBB(count)); @@ -1353,28 +1366,22 @@ xlog_sync(xlog_t *log, iclog->ic_bwritecnt = 1; } XFS_BUF_SET_COUNT(bp, count); - bp->b_fspriv = iclog; + XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */ XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); - bp->b_flags |= XBF_SYNCIO; + bp->b_flags |= XBF_LOG_BUFFER; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { - bp->b_flags |= XBF_FUA; - /* - * Flush the data device before flushing the log to make - * sure all meta data written back from the AIL actually made - * it to disk before stamping the new log tail LSN into the - * log buffer. For an external log we need to issue the - * flush explicitly, and unfortunately synchronously here; - * for an internal log we can simply use the block layer - * state machine for preflushes. + * If we have an external log device, flush the data device + * before flushing the log to make sure all meta data + * written back from the AIL actually made it to disk + * before writing out the new log tail LSN in the log buffer. */ if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); - else - bp->b_flags |= XBF_FLUSH; + XFS_BUF_ORDERED(bp); } ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); @@ -1397,16 +1404,19 @@ xlog_sync(xlog_t *log, } if (split) { bp = iclog->ic_log->l_xbuf; + ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == + (unsigned long)1); + XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+ (__psint_t)count), split); - bp->b_fspriv = iclog; + XFS_BUF_SET_FSPRIVATE(bp, iclog); XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); - bp->b_flags |= XBF_SYNCIO; + bp->b_flags |= XBF_LOG_BUFFER; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) - bp->b_flags |= XBF_FUA; + XFS_BUF_ORDERED(bp); dptr = XFS_BUF_PTR(bp); /* * Bump the cycle numbers at the start of each block @@ -3511,13 +3521,13 @@ xlog_verify_iclog(xlog_t *log, spin_unlock(&log->l_icloglock); /* check log magic numbers */ - if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) + if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) xfs_emerg(log->l_mp, "%s: invalid magic num", __func__); ptr = (xfs_caddr_t) &iclog->ic_header; for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; ptr += BBSIZE) { - if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) + if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) xfs_emerg(log->l_mp, "%s: unexpected magic num", __func__); } diff --git a/trunk/fs/xfs/xfs_log_recover.c b/trunk/fs/xfs/xfs_log_recover.c index 8fe4206de057..04142caedb2b 100644 --- a/trunk/fs/xfs/xfs_log_recover.c +++ b/trunk/fs/xfs/xfs_log_recover.c @@ -91,8 +91,6 @@ xlog_get_bp( xlog_t *log, int nbblks) { - struct xfs_buf *bp; - if (!xlog_buf_bbcount_valid(log, nbblks)) { xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", nbblks); @@ -120,10 +118,8 @@ xlog_get_bp( nbblks += log->l_sectBBsize; nbblks = round_up(nbblks, log->l_sectBBsize); - bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, BBTOB(nbblks), 0); - if (bp) - xfs_buf_unlock(bp); - return bp; + return xfs_buf_get_uncached(log->l_mp->m_logdev_targp, + BBTOB(nbblks), 0); } STATIC void @@ -268,7 +264,7 @@ xlog_bwrite( XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); XFS_BUF_HOLD(bp); - xfs_buf_lock(bp); + XFS_BUF_PSEMA(bp, PRIBIO); XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); @@ -304,14 +300,14 @@ xlog_header_check_recover( xfs_mount_t *mp, xlog_rec_header_t *head) { - ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); + ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); /* * IRIX doesn't write the h_fmt field and leaves it zeroed * (XLOG_FMT_UNKNOWN). This stops us from trying to recover * a dirty log created in IRIX. */ - if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) { + if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { xfs_warn(mp, "dirty log written in incompatible format - can't recover"); xlog_header_check_dump(mp, head); @@ -337,7 +333,7 @@ xlog_header_check_mount( xfs_mount_t *mp, xlog_rec_header_t *head) { - ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); + ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); if (uuid_is_nil(&head->h_fs_uuid)) { /* @@ -371,7 +367,7 @@ xlog_recover_iodone( xfs_force_shutdown(bp->b_target->bt_mount, SHUTDOWN_META_IO_ERROR); } - bp->b_iodone = NULL; + XFS_BUF_CLR_IODONE_FUNC(bp); xfs_buf_ioend(bp, 0); } @@ -538,7 +534,7 @@ xlog_find_verify_log_record( head = (xlog_rec_header_t *)offset; - if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) + if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno)) break; if (!smallmem) @@ -920,7 +916,7 @@ xlog_find_tail( if (error) goto done; - if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { + if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { found = 1; break; } @@ -937,8 +933,8 @@ xlog_find_tail( if (error) goto done; - if (*(__be32 *)offset == - cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { + if (XLOG_HEADER_MAGIC_NUM == + be32_to_cpu(*(__be32 *)offset)) { found = 2; break; } @@ -1951,7 +1947,7 @@ xfs_qm_dqcheck( * This is all fine; things are still consistent, and we haven't lost * any quota information. Just don't complain about bad dquot blks. */ - if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { + if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { if (flags & XFS_QMOPT_DOWARN) xfs_alert(mp, "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", @@ -2178,7 +2174,7 @@ xlog_recover_buffer_pass2( error = xfs_bwrite(mp, bp); } else { ASSERT(bp->b_target->bt_mount == mp); - bp->b_iodone = xlog_recover_iodone; + XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); } @@ -2242,7 +2238,7 @@ xlog_recover_inode_pass2( * Make sure the place we're flushing out to really looks * like an inode! */ - if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { + if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { xfs_buf_relse(bp); xfs_alert(mp, "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", @@ -2438,7 +2434,7 @@ xlog_recover_inode_pass2( write_inode_buffer: ASSERT(bp->b_target->bt_mount == mp); - bp->b_iodone = xlog_recover_iodone; + XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); error: if (need_free) @@ -2560,7 +2556,7 @@ xlog_recover_dquot_pass2( ASSERT(dq_f->qlf_size == 2); ASSERT(bp->b_target->bt_mount == mp); - bp->b_iodone = xlog_recover_iodone; + XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); return (0); @@ -3299,7 +3295,7 @@ xlog_valid_rec_header( { int hlen; - if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) { + if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) { XFS_ERROR_REPORT("xlog_valid_rec_header(1)", XFS_ERRLEVEL_LOW, log->l_mp); return XFS_ERROR(EFSCORRUPTED); diff --git a/trunk/fs/xfs/xfs_mount.c b/trunk/fs/xfs/xfs_mount.c index 7f25245da289..b49b82363d20 100644 --- a/trunk/fs/xfs/xfs_mount.c +++ b/trunk/fs/xfs/xfs_mount.c @@ -348,7 +348,7 @@ xfs_mount_validate_sb( } /* - * More sanity checking. Most of these were stolen directly from + * More sanity checking. These were stolen directly from * xfs_repair. */ if (unlikely( @@ -371,13 +371,23 @@ xfs_mount_validate_sb( (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || - (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */) || - sbp->sb_dblocks == 0 || - sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || - sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) { + (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { if (loud) - XFS_CORRUPTION_ERROR("SB sanity check failed", - XFS_ERRLEVEL_LOW, mp, sbp); + xfs_warn(mp, "SB sanity check 1 failed"); + return XFS_ERROR(EFSCORRUPTED); + } + + /* + * Sanity check AG count, size fields against data size field + */ + if (unlikely( + sbp->sb_dblocks == 0 || + sbp->sb_dblocks > + (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || + sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * + sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { + if (loud) + xfs_warn(mp, "SB sanity check 2 failed"); return XFS_ERROR(EFSCORRUPTED); } @@ -854,8 +864,7 @@ xfs_update_alignment(xfs_mount_t *mp) if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || (BBTOB(mp->m_swidth) & mp->m_blockmask)) { if (mp->m_flags & XFS_MOUNT_RETERR) { - xfs_warn(mp, "alignment check failed: " - "(sunit/swidth vs. blocksize)"); + xfs_warn(mp, "alignment check 1 failed"); return XFS_ERROR(EINVAL); } mp->m_dalign = mp->m_swidth = 0; @@ -866,8 +875,6 @@ xfs_update_alignment(xfs_mount_t *mp) mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { if (mp->m_flags & XFS_MOUNT_RETERR) { - xfs_warn(mp, "alignment check failed: " - "(sunit/swidth vs. ag size)"); return XFS_ERROR(EINVAL); } xfs_warn(mp, @@ -882,8 +889,8 @@ xfs_update_alignment(xfs_mount_t *mp) mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); } else { if (mp->m_flags & XFS_MOUNT_RETERR) { - xfs_warn(mp, "alignment check failed: " - "sunit(%d) less than bsize(%d)", + xfs_warn(mp, + "stripe alignment turned off: sunit(%d) less than bsize(%d)", mp->m_dalign, mp->m_blockmask +1); return XFS_ERROR(EINVAL); @@ -1089,6 +1096,10 @@ xfs_mount_reset_sbqflags( if (mp->m_flags & XFS_MOUNT_RDONLY) return 0; +#ifdef QUOTADEBUG + xfs_notice(mp, "Writing superblock quota changes"); +#endif + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, XFS_DEFAULT_LOG_COUNT); @@ -1521,7 +1532,7 @@ xfs_unmountfs( xfs_warn(mp, "Unable to free reserved block pool. " "Freespace may not be correct on next mount."); - error = xfs_log_sbcount(mp); + error = xfs_log_sbcount(mp, 1); if (error) xfs_warn(mp, "Unable to update superblock counters. " "Freespace may not be correct on next mount."); @@ -1557,14 +1568,18 @@ xfs_fs_writable(xfs_mount_t *mp) /* * xfs_log_sbcount * - * Sync the superblock counters to disk. + * Called either periodically to keep the on disk superblock values + * roughly up to date or from unmount to make sure the values are + * correct on a clean unmount. * * Note this code can be called during the process of freezing, so - * we may need to use the transaction allocator which does not + * we may need to use the transaction allocator which does not not * block when the transaction subsystem is in its frozen state. */ int -xfs_log_sbcount(xfs_mount_t *mp) +xfs_log_sbcount( + xfs_mount_t *mp, + uint sync) { xfs_trans_t *tp; int error; @@ -1590,7 +1605,8 @@ xfs_log_sbcount(xfs_mount_t *mp) } xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); - xfs_trans_set_sync(tp); + if (sync) + xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); return error; } @@ -1925,19 +1941,22 @@ xfs_mod_incore_sb_batch( * the superblock buffer if it can be locked without sleeping. * If it can't then we'll return NULL. */ -struct xfs_buf * +xfs_buf_t * xfs_getsb( - struct xfs_mount *mp, - int flags) + xfs_mount_t *mp, + int flags) { - struct xfs_buf *bp = mp->m_sb_bp; + xfs_buf_t *bp; - if (!xfs_buf_trylock(bp)) { - if (flags & XBF_TRYLOCK) + ASSERT(mp->m_sb_bp != NULL); + bp = mp->m_sb_bp; + if (flags & XBF_TRYLOCK) { + if (!XFS_BUF_CPSEMA(bp)) { return NULL; - xfs_buf_lock(bp); + } + } else { + XFS_BUF_PSEMA(bp, PRIBIO); } - XFS_BUF_HOLD(bp); ASSERT(XFS_BUF_ISDONE(bp)); return bp; diff --git a/trunk/fs/xfs/xfs_mount.h b/trunk/fs/xfs/xfs_mount.h index bb24dac42a25..3d68bb267c5f 100644 --- a/trunk/fs/xfs/xfs_mount.h +++ b/trunk/fs/xfs/xfs_mount.h @@ -371,7 +371,7 @@ typedef struct xfs_mod_sb { int64_t msb_delta; /* Change to make to specified field */ } xfs_mod_sb_t; -extern int xfs_log_sbcount(xfs_mount_t *); +extern int xfs_log_sbcount(xfs_mount_t *, uint); extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); extern int xfs_mountfs(xfs_mount_t *mp); diff --git a/trunk/fs/xfs/xfs_trans.c b/trunk/fs/xfs/xfs_trans.c index efc147f0e9b6..c83f63b33aae 100644 --- a/trunk/fs/xfs/xfs_trans.c +++ b/trunk/fs/xfs/xfs_trans.c @@ -1426,7 +1426,6 @@ xfs_trans_committed( static inline void xfs_log_item_batch_insert( struct xfs_ail *ailp, - struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t commit_lsn) @@ -1435,7 +1434,7 @@ xfs_log_item_batch_insert( spin_lock(&ailp->xa_lock); /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ - xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn); + xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn); for (i = 0; i < nr_items; i++) IOP_UNPIN(log_items[i], 0); @@ -1453,13 +1452,6 @@ xfs_log_item_batch_insert( * as an iclog write error even though we haven't started any IO yet. Hence in * this case all we need to do is IOP_COMMITTED processing, followed by an * IOP_UNPIN(aborted) call. - * - * The AIL cursor is used to optimise the insert process. If commit_lsn is not - * at the end of the AIL, the insert cursor avoids the need to walk - * the AIL to find the insertion point on every xfs_log_item_batch_insert() - * call. This saves a lot of needless list walking and is a net win, even - * though it slightly increases that amount of AIL lock traffic to set it up - * and tear it down. */ void xfs_trans_committed_bulk( @@ -1471,13 +1463,8 @@ xfs_trans_committed_bulk( #define LOG_ITEM_BATCH_SIZE 32 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; struct xfs_log_vec *lv; - struct xfs_ail_cursor cur; int i = 0; - spin_lock(&ailp->xa_lock); - xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn); - spin_unlock(&ailp->xa_lock); - /* unpin all the log items */ for (lv = log_vector; lv; lv = lv->lv_next ) { struct xfs_log_item *lip = lv->lv_item; @@ -1506,9 +1493,7 @@ xfs_trans_committed_bulk( /* * Not a bulk update option due to unusual item_lsn. * Push into AIL immediately, rechecking the lsn once - * we have the ail lock. Then unpin the item. This does - * not affect the AIL cursor the bulk insert path is - * using. + * we have the ail lock. Then unpin the item. */ spin_lock(&ailp->xa_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) @@ -1522,7 +1507,7 @@ xfs_trans_committed_bulk( /* Item is a candidate for bulk AIL insert. */ log_items[i++] = lv->lv_item; if (i >= LOG_ITEM_BATCH_SIZE) { - xfs_log_item_batch_insert(ailp, &cur, log_items, + xfs_log_item_batch_insert(ailp, log_items, LOG_ITEM_BATCH_SIZE, commit_lsn); i = 0; } @@ -1530,11 +1515,7 @@ xfs_trans_committed_bulk( /* make sure we insert the remainder! */ if (i) - xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); - - spin_lock(&ailp->xa_lock); - xfs_trans_ail_cursor_done(ailp, &cur); - spin_unlock(&ailp->xa_lock); + xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn); } /* diff --git a/trunk/fs/xfs/xfs_trans_ail.c b/trunk/fs/xfs/xfs_trans_ail.c index 43233e92f0f6..5fc2380092c8 100644 --- a/trunk/fs/xfs/xfs_trans_ail.c +++ b/trunk/fs/xfs/xfs_trans_ail.c @@ -163,11 +163,17 @@ xfs_ail_max_lsn( } /* - * The cursor keeps track of where our current traversal is up to by tracking - * the next item in the list for us. However, for this to be safe, removing an - * object from the AIL needs to invalidate any cursor that points to it. hence - * the traversal cursor needs to be linked to the struct xfs_ail so that - * deletion can search all the active cursors for invalidation. + * AIL traversal cursor initialisation. + * + * The cursor keeps track of where our current traversal is up + * to by tracking the next Æ£tem in the list for us. However, for + * this to be safe, removing an object from the AIL needs to invalidate + * any cursor that points to it. hence the traversal cursor needs to + * be linked to the struct xfs_ail so that deletion can search all the + * active cursors for invalidation. + * + * We don't link the push cursor because it is embedded in the struct + * xfs_ail and hence easily findable. */ STATIC void xfs_trans_ail_cursor_init( @@ -175,12 +181,31 @@ xfs_trans_ail_cursor_init( struct xfs_ail_cursor *cur) { cur->item = NULL; - list_add_tail(&cur->list, &ailp->xa_cursors); + if (cur == &ailp->xa_cursors) + return; + + cur->next = ailp->xa_cursors.next; + ailp->xa_cursors.next = cur; +} + +/* + * Set the cursor to the next item, because when we look + * up the cursor the current item may have been freed. + */ +STATIC void +xfs_trans_ail_cursor_set( + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + struct xfs_log_item *lip) +{ + if (lip) + cur->item = xfs_ail_next(ailp, lip); } /* - * Get the next item in the traversal and advance the cursor. If the cursor - * was invalidated (indicated by a lip of 1), restart the traversal. + * Get the next item in the traversal and advance the cursor. + * If the cursor was invalidated (inidicated by a lip of 1), + * restart the traversal. */ struct xfs_log_item * xfs_trans_ail_cursor_next( @@ -191,31 +216,45 @@ xfs_trans_ail_cursor_next( if ((__psint_t)lip & 1) lip = xfs_ail_min(ailp); - if (lip) - cur->item = xfs_ail_next(ailp, lip); + xfs_trans_ail_cursor_set(ailp, cur, lip); return lip; } /* - * When the traversal is complete, we need to remove the cursor from the list - * of traversing cursors. + * Now that the traversal is complete, we need to remove the cursor + * from the list of traversing cursors. Avoid removing the embedded + * push cursor, but use the fact it is always present to make the + * list deletion simple. */ void xfs_trans_ail_cursor_done( struct xfs_ail *ailp, - struct xfs_ail_cursor *cur) + struct xfs_ail_cursor *done) { - cur->item = NULL; - list_del_init(&cur->list); + struct xfs_ail_cursor *prev = NULL; + struct xfs_ail_cursor *cur; + + done->item = NULL; + if (done == &ailp->xa_cursors) + return; + prev = &ailp->xa_cursors; + for (cur = prev->next; cur; prev = cur, cur = prev->next) { + if (cur == done) { + prev->next = cur->next; + break; + } + } + ASSERT(cur); } /* - * Invalidate any cursor that is pointing to this item. This is called when an - * item is removed from the AIL. Any cursor pointing to this object is now - * invalid and the traversal needs to be terminated so it doesn't reference a - * freed object. We set the low bit of the cursor item pointer so we can - * distinguish between an invalidation and the end of the list when getting the - * next item from the cursor. + * Invalidate any cursor that is pointing to this item. This is + * called when an item is removed from the AIL. Any cursor pointing + * to this object is now invalid and the traversal needs to be + * terminated so it doesn't reference a freed object. We set the + * cursor item to a value of 1 so we can distinguish between an + * invalidation and the end of the list when getting the next item + * from the cursor. */ STATIC void xfs_trans_ail_cursor_clear( @@ -224,7 +263,8 @@ xfs_trans_ail_cursor_clear( { struct xfs_ail_cursor *cur; - list_for_each_entry(cur, &ailp->xa_cursors, list) { + /* need to search all cursors */ + for (cur = &ailp->xa_cursors; cur; cur = cur->next) { if (cur->item == lip) cur->item = (struct xfs_log_item *) ((__psint_t)cur->item | 1); @@ -232,10 +272,9 @@ xfs_trans_ail_cursor_clear( } /* - * Find the first item in the AIL with the given @lsn by searching in ascending - * LSN order and initialise the cursor to point to the next item for a - * ascending traversal. Pass a @lsn of zero to initialise the cursor to the - * first item in the AIL. Returns NULL if the list is empty. + * Return the item in the AIL with the current lsn. + * Return the current tree generation number for use + * in calls to xfs_trans_next_ail(). */ xfs_log_item_t * xfs_trans_ail_cursor_first( @@ -246,112 +285,46 @@ xfs_trans_ail_cursor_first( xfs_log_item_t *lip; xfs_trans_ail_cursor_init(ailp, cur); - - if (lsn == 0) { - lip = xfs_ail_min(ailp); + lip = xfs_ail_min(ailp); + if (lsn == 0) goto out; - } list_for_each_entry(lip, &ailp->xa_ail, li_ail) { if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) goto out; } - return NULL; - + lip = NULL; out: - if (lip) - cur->item = xfs_ail_next(ailp, lip); + xfs_trans_ail_cursor_set(ailp, cur, lip); return lip; } -static struct xfs_log_item * -__xfs_trans_ail_cursor_last( - struct xfs_ail *ailp, - xfs_lsn_t lsn) -{ - xfs_log_item_t *lip; - - list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) { - if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0) - return lip; - } - return NULL; -} - -/* - * Find the last item in the AIL with the given @lsn by searching in descending - * LSN order and initialise the cursor to point to that item. If there is no - * item with the value of @lsn, then it sets the cursor to the last item with an - * LSN lower than @lsn. Returns NULL if the list is empty. - */ -struct xfs_log_item * -xfs_trans_ail_cursor_last( - struct xfs_ail *ailp, - struct xfs_ail_cursor *cur, - xfs_lsn_t lsn) -{ - xfs_trans_ail_cursor_init(ailp, cur); - cur->item = __xfs_trans_ail_cursor_last(ailp, lsn); - return cur->item; -} - /* - * Splice the log item list into the AIL at the given LSN. We splice to the - * tail of the given LSN to maintain insert order for push traversals. The - * cursor is optional, allowing repeated updates to the same LSN to avoid - * repeated traversals. + * splice the log item list into the AIL at the given LSN. */ static void xfs_ail_splice( - struct xfs_ail *ailp, - struct xfs_ail_cursor *cur, - struct list_head *list, - xfs_lsn_t lsn) + struct xfs_ail *ailp, + struct list_head *list, + xfs_lsn_t lsn) { - struct xfs_log_item *lip = cur ? cur->item : NULL; - struct xfs_log_item *next_lip; + xfs_log_item_t *next_lip; - /* - * Get a new cursor if we don't have a placeholder or the existing one - * has been invalidated. - */ - if (!lip || (__psint_t)lip & 1) { - lip = __xfs_trans_ail_cursor_last(ailp, lsn); - - if (!lip) { - /* The list is empty, so just splice and return. */ - if (cur) - cur->item = NULL; - list_splice(list, &ailp->xa_ail); - return; - } + /* If the list is empty, just insert the item. */ + if (list_empty(&ailp->xa_ail)) { + list_splice(list, &ailp->xa_ail); + return; } - /* - * Our cursor points to the item we want to insert _after_, so we have - * to update the cursor to point to the end of the list we are splicing - * in so that it points to the correct location for the next splice. - * i.e. before the splice - * - * lsn -> lsn -> lsn + x -> lsn + x ... - * ^ - * | cursor points here - * - * After the splice we have: - * - * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ... - * ^ ^ - * | cursor points here | needs to move here - * - * So we set the cursor to the last item in the list to be spliced - * before we execute the splice, resulting in the cursor pointing to - * the correct item after the splice occurs. - */ - if (cur) { - next_lip = list_entry(list->prev, struct xfs_log_item, li_ail); - cur->item = next_lip; + list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { + if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) + break; } - list_splice(list, &lip->li_ail); + + ASSERT(&next_lip->li_ail == &ailp->xa_ail || + XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0); + + list_splice_init(list, &next_lip->li_ail); } /* @@ -378,7 +351,7 @@ xfs_ail_worker( struct xfs_ail *ailp = container_of(to_delayed_work(work), struct xfs_ail, xa_work); xfs_mount_t *mp = ailp->xa_mount; - struct xfs_ail_cursor cur; + struct xfs_ail_cursor *cur = &ailp->xa_cursors; xfs_log_item_t *lip; xfs_lsn_t lsn; xfs_lsn_t target; @@ -390,12 +363,13 @@ xfs_ail_worker( spin_lock(&ailp->xa_lock); target = ailp->xa_target; - lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn); + xfs_trans_ail_cursor_init(ailp, cur); + lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn); if (!lip || XFS_FORCED_SHUTDOWN(mp)) { /* * AIL is empty or our push has reached the end. */ - xfs_trans_ail_cursor_done(ailp, &cur); + xfs_trans_ail_cursor_done(ailp, cur); spin_unlock(&ailp->xa_lock); goto out_done; } @@ -483,12 +457,12 @@ xfs_ail_worker( if (stuck > 100) break; - lip = xfs_trans_ail_cursor_next(ailp, &cur); + lip = xfs_trans_ail_cursor_next(ailp, cur); if (lip == NULL) break; lsn = lip->li_lsn; } - xfs_trans_ail_cursor_done(ailp, &cur); + xfs_trans_ail_cursor_done(ailp, cur); spin_unlock(&ailp->xa_lock); if (flush_log) { @@ -671,7 +645,6 @@ xfs_trans_unlocked_item( void xfs_trans_ail_update_bulk( struct xfs_ail *ailp, - struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->xa_lock) @@ -701,7 +674,7 @@ xfs_trans_ail_update_bulk( list_add(&lip->li_ail, &tmp); } - xfs_ail_splice(ailp, cur, &tmp, lsn); + xfs_ail_splice(ailp, &tmp, lsn); if (!mlip_changed) { spin_unlock(&ailp->xa_lock); @@ -820,7 +793,6 @@ xfs_trans_ail_init( ailp->xa_mount = mp; INIT_LIST_HEAD(&ailp->xa_ail); - INIT_LIST_HEAD(&ailp->xa_cursors); spin_lock_init(&ailp->xa_lock); INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); mp->m_ail = ailp; diff --git a/trunk/fs/xfs/xfs_trans_buf.c b/trunk/fs/xfs/xfs_trans_buf.c index 15584fc3ed7d..03b3b7f85a3b 100644 --- a/trunk/fs/xfs/xfs_trans_buf.c +++ b/trunk/fs/xfs/xfs_trans_buf.c @@ -81,7 +81,7 @@ _xfs_trans_bjoin( struct xfs_buf_log_item *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); /* * The xfs_buf_log_item pointer is stored in b_fsprivate. If @@ -89,7 +89,7 @@ _xfs_trans_bjoin( * The checks to see if one is there are in xfs_buf_item_init(). */ xfs_buf_item_init(bp, tp->t_mountp); - bip = bp->b_fspriv; + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); @@ -110,7 +110,7 @@ _xfs_trans_bjoin( * Initialize b_fsprivate2 so we can find it with incore_match() * in xfs_trans_get_buf() and friends above. */ - bp->b_transp = tp; + XFS_BUF_SET_FSPRIVATE2(bp, tp); } @@ -160,7 +160,7 @@ xfs_trans_get_buf(xfs_trans_t *tp, */ bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len); if (bp != NULL) { - ASSERT(xfs_buf_islocked(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) XFS_BUF_SUPER_STALE(bp); @@ -172,8 +172,8 @@ xfs_trans_get_buf(xfs_trans_t *tp, else if (XFS_BUF_ISSTALE(bp)) ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); - ASSERT(bp->b_transp == tp); - bip = bp->b_fspriv; + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; @@ -232,8 +232,8 @@ xfs_trans_getsb(xfs_trans_t *tp, * recursion count and return the buffer to the caller. */ bp = mp->m_sb_bp; - if (bp->b_transp == tp) { - bip = bp->b_fspriv; + if (XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp) { + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; @@ -327,9 +327,9 @@ xfs_trans_read_buf( */ bp = xfs_trans_buf_item_match(tp, target, blkno, len); if (bp != NULL) { - ASSERT(xfs_buf_islocked(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bp->b_fspriv != NULL); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); ASSERT((XFS_BUF_ISERROR(bp)) == 0); if (!(XFS_BUF_ISDONE(bp))) { trace_xfs_trans_read_buf_io(bp, _RET_IP_); @@ -363,7 +363,7 @@ xfs_trans_read_buf( } - bip = bp->b_fspriv; + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); bip->bli_recur++; ASSERT(atomic_read(&bip->bli_refcount) > 0); @@ -460,30 +460,32 @@ xfs_trans_brelse(xfs_trans_t *tp, xfs_buf_t *bp) { xfs_buf_log_item_t *bip; + xfs_log_item_t *lip; /* * Default to a normal brelse() call if the tp is NULL. */ if (tp == NULL) { - struct xfs_log_item *lip = bp->b_fspriv; - - ASSERT(bp->b_transp == NULL); - + ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); /* * If there's a buf log item attached to the buffer, * then let the AIL know that the buffer is being * unlocked. */ - if (lip != NULL && lip->li_type == XFS_LI_BUF) { - bip = bp->b_fspriv; - xfs_trans_unlocked_item(bip->bli_item.li_ailp, lip); + if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { + lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); + if (lip->li_type == XFS_LI_BUF) { + bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); + xfs_trans_unlocked_item(bip->bli_item.li_ailp, + lip); + } } xfs_buf_relse(bp); return; } - ASSERT(bp->b_transp == tp); - bip = bp->b_fspriv; + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(bip->bli_item.li_type == XFS_LI_BUF); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); @@ -554,7 +556,7 @@ xfs_trans_brelse(xfs_trans_t *tp, xfs_buf_item_relse(bp); bip = NULL; } - bp->b_transp = NULL; + XFS_BUF_SET_FSPRIVATE2(bp, NULL); /* * If we've still got a buf log item on the buffer, then @@ -579,15 +581,16 @@ void xfs_trans_bhold(xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(atomic_read(&bip->bli_refcount) > 0); - bip->bli_flags |= XFS_BLI_HOLD; trace_xfs_trans_bhold(bip); } @@ -600,17 +603,19 @@ void xfs_trans_bhold_release(xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT(bip->bli_flags & XFS_BLI_HOLD); - bip->bli_flags &= ~XFS_BLI_HOLD; + trace_xfs_trans_bhold_release(bip); } @@ -629,14 +634,14 @@ xfs_trans_log_buf(xfs_trans_t *tp, uint first, uint last) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp))); - ASSERT(bp->b_iodone == NULL || - bp->b_iodone == xfs_buf_iodone_callbacks); + ASSERT((XFS_BUF_IODONE_FUNC(bp) == NULL) || + (XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks)); /* * Mark the buffer as needing to be written out eventually, @@ -651,8 +656,9 @@ xfs_trans_log_buf(xfs_trans_t *tp, XFS_BUF_DELAYWRITE(bp); XFS_BUF_DONE(bp); + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); - bp->b_iodone = xfs_buf_iodone_callbacks; + XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); bip->bli_item.li_cb = xfs_buf_iodone; trace_xfs_trans_log_buf(bip); @@ -700,11 +706,13 @@ xfs_trans_binval( xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); trace_xfs_trans_binval(bip); @@ -772,11 +780,13 @@ xfs_trans_inode_buf( xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_INODE_BUF; @@ -796,11 +806,13 @@ xfs_trans_stale_inode_buf( xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_STALE_INODE; @@ -821,11 +833,13 @@ xfs_trans_inode_alloc_buf( xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; @@ -849,14 +863,16 @@ xfs_trans_dquot_buf( xfs_buf_t *bp, uint type) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); ASSERT(type == XFS_BLF_UDQUOT_BUF || type == XFS_BLF_PDQUOT_BUF || type == XFS_BLF_GDQUOT_BUF); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_format.blf_flags |= type; diff --git a/trunk/fs/xfs/xfs_trans_inode.c b/trunk/fs/xfs/xfs_trans_inode.c index c8dea2fd7e68..048b0c689d3e 100644 --- a/trunk/fs/xfs/xfs_trans_inode.c +++ b/trunk/fs/xfs/xfs_trans_inode.c @@ -55,6 +55,7 @@ xfs_trans_ijoin( { xfs_inode_log_item_t *iip; + ASSERT(ip->i_transp == NULL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); if (ip->i_itemp == NULL) xfs_inode_item_init(ip, ip->i_mount); @@ -67,6 +68,12 @@ xfs_trans_ijoin( xfs_trans_add_item(tp, &iip->ili_item); xfs_trans_inode_broot_debug(ip); + + /* + * Initialize i_transp so we can find it with xfs_inode_incore() + * in xfs_trans_iget() above. + */ + ip->i_transp = tp; } /* @@ -104,6 +111,7 @@ xfs_trans_ichgtime( ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(ip->i_transp == tp); tv = current_fs_time(inode->i_sb); @@ -132,6 +140,7 @@ xfs_trans_log_inode( xfs_inode_t *ip, uint flags) { + ASSERT(ip->i_transp == tp); ASSERT(ip->i_itemp != NULL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); diff --git a/trunk/fs/xfs/xfs_trans_priv.h b/trunk/fs/xfs/xfs_trans_priv.h index 212946b97239..6b164e9e9a1f 100644 --- a/trunk/fs/xfs/xfs_trans_priv.h +++ b/trunk/fs/xfs/xfs_trans_priv.h @@ -53,7 +53,7 @@ void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, * of the list to trigger traversal restarts. */ struct xfs_ail_cursor { - struct list_head list; + struct xfs_ail_cursor *next; struct xfs_log_item *item; }; @@ -66,7 +66,7 @@ struct xfs_ail { struct xfs_mount *xa_mount; struct list_head xa_ail; xfs_lsn_t xa_target; - struct list_head xa_cursors; + struct xfs_ail_cursor xa_cursors; spinlock_t xa_lock; struct delayed_work xa_work; xfs_lsn_t xa_last_pushed_lsn; @@ -82,7 +82,6 @@ struct xfs_ail { extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, - struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->xa_lock); static inline void @@ -91,7 +90,7 @@ xfs_trans_ail_update( struct xfs_log_item *lip, xfs_lsn_t lsn) __releases(ailp->xa_lock) { - xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); + xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn); } void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, @@ -112,13 +111,10 @@ xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); void xfs_trans_unlocked_item(struct xfs_ail *, xfs_log_item_t *); -struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, +struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn); -struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp, - struct xfs_ail_cursor *cur, - xfs_lsn_t lsn); -struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp, +struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); diff --git a/trunk/fs/xfs/xfs_vnodeops.c b/trunk/fs/xfs/xfs_vnodeops.c index 88d121486c52..619720705bc6 100644 --- a/trunk/fs/xfs/xfs_vnodeops.c +++ b/trunk/fs/xfs/xfs_vnodeops.c @@ -50,6 +50,430 @@ #include "xfs_vnodeops.h" #include "xfs_trace.h" +int +xfs_setattr( + struct xfs_inode *ip, + struct iattr *iattr, + int flags) +{ + xfs_mount_t *mp = ip->i_mount; + struct inode *inode = VFS_I(ip); + int mask = iattr->ia_valid; + xfs_trans_t *tp; + int code; + uint lock_flags; + uint commit_flags=0; + uid_t uid=0, iuid=0; + gid_t gid=0, igid=0; + struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; + int need_iolock = 1; + + trace_xfs_setattr(ip); + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + code = -inode_change_ok(inode, iattr); + if (code) + return code; + + olddquot1 = olddquot2 = NULL; + udqp = gdqp = NULL; + + /* + * If disk quotas is on, we make sure that the dquots do exist on disk, + * before we start any other transactions. Trying to do this later + * is messy. We don't care to take a readlock to look at the ids + * in inode here, because we can't hold it across the trans_reserve. + * If the IDs do change before we take the ilock, we're covered + * because the i_*dquot fields will get updated anyway. + */ + if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { + uint qflags = 0; + + if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { + uid = iattr->ia_uid; + qflags |= XFS_QMOPT_UQUOTA; + } else { + uid = ip->i_d.di_uid; + } + if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { + gid = iattr->ia_gid; + qflags |= XFS_QMOPT_GQUOTA; + } else { + gid = ip->i_d.di_gid; + } + + /* + * We take a reference when we initialize udqp and gdqp, + * so it is important that we never blindly double trip on + * the same variable. See xfs_create() for an example. + */ + ASSERT(udqp == NULL); + ASSERT(gdqp == NULL); + code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), + qflags, &udqp, &gdqp); + if (code) + return code; + } + + /* + * For the other attributes, we acquire the inode lock and + * first do an error checking pass. + */ + tp = NULL; + lock_flags = XFS_ILOCK_EXCL; + if (flags & XFS_ATTR_NOLOCK) + need_iolock = 0; + if (!(mask & ATTR_SIZE)) { + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); + commit_flags = 0; + code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), + 0, 0, 0); + if (code) { + lock_flags = 0; + goto error_return; + } + } else { + if (need_iolock) + lock_flags |= XFS_IOLOCK_EXCL; + } + + xfs_ilock(ip, lock_flags); + + /* + * Change file ownership. Must be the owner or privileged. + */ + if (mask & (ATTR_UID|ATTR_GID)) { + /* + * These IDs could have changed since we last looked at them. + * But, we're assured that if the ownership did change + * while we didn't have the inode locked, inode's dquot(s) + * would have changed also. + */ + iuid = ip->i_d.di_uid; + igid = ip->i_d.di_gid; + gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; + uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; + + /* + * Do a quota reservation only if uid/gid is actually + * going to change. + */ + if (XFS_IS_QUOTA_RUNNING(mp) && + ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || + (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { + ASSERT(tp); + code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, + capable(CAP_FOWNER) ? + XFS_QMOPT_FORCE_RES : 0); + if (code) /* out of quota */ + goto error_return; + } + } + + /* + * Truncate file. Must have write permission and not be a directory. + */ + if (mask & ATTR_SIZE) { + /* Short circuit the truncate case for zero length files */ + if (iattr->ia_size == 0 && + ip->i_size == 0 && ip->i_d.di_nextents == 0) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + lock_flags &= ~XFS_ILOCK_EXCL; + if (mask & ATTR_CTIME) { + inode->i_mtime = inode->i_ctime = + current_fs_time(inode->i_sb); + xfs_mark_inode_dirty_sync(ip); + } + code = 0; + goto error_return; + } + + if (S_ISDIR(ip->i_d.di_mode)) { + code = XFS_ERROR(EISDIR); + goto error_return; + } else if (!S_ISREG(ip->i_d.di_mode)) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + + /* + * Make sure that the dquots are attached to the inode. + */ + code = xfs_qm_dqattach_locked(ip, 0); + if (code) + goto error_return; + + /* + * Now we can make the changes. Before we join the inode + * to the transaction, if ATTR_SIZE is set then take care of + * the part of the truncation that must be done without the + * inode lock. This needs to be done before joining the inode + * to the transaction, because the inode cannot be unlocked + * once it is a part of the transaction. + */ + if (iattr->ia_size > ip->i_size) { + /* + * Do the first part of growing a file: zero any data + * in the last block that is beyond the old EOF. We + * need to do this before the inode is joined to the + * transaction to modify the i_size. + */ + code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); + if (code) + goto error_return; + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + lock_flags &= ~XFS_ILOCK_EXCL; + + /* + * We are going to log the inode size change in this + * transaction so any previous writes that are beyond the on + * disk EOF and the new EOF that have not been written out need + * to be written here. If we do not write the data out, we + * expose ourselves to the null files problem. + * + * Only flush from the on disk size to the smaller of the in + * memory file size or the new size as that's the range we + * really care about here and prevents waiting for other data + * not within the range we care about here. + */ + if (ip->i_size != ip->i_d.di_size && + iattr->ia_size > ip->i_d.di_size) { + code = xfs_flush_pages(ip, + ip->i_d.di_size, iattr->ia_size, + XBF_ASYNC, FI_NONE); + if (code) + goto error_return; + } + + /* wait for all I/O to complete */ + xfs_ioend_wait(ip); + + code = -block_truncate_page(inode->i_mapping, iattr->ia_size, + xfs_get_blocks); + if (code) + goto error_return; + + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); + code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_ITRUNCATE_LOG_COUNT); + if (code) + goto error_return; + + truncate_setsize(inode, iattr->ia_size); + + commit_flags = XFS_TRANS_RELEASE_LOG_RES; + lock_flags |= XFS_ILOCK_EXCL; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + xfs_trans_ijoin(tp, ip); + + /* + * Only change the c/mtime if we are changing the size + * or we are explicitly asked to change it. This handles + * the semantic difference between truncate() and ftruncate() + * as implemented in the VFS. + * + * The regular truncate() case without ATTR_CTIME and ATTR_MTIME + * is a special case where we need to update the times despite + * not having these flags set. For all other operations the + * VFS set these flags explicitly if it wants a timestamp + * update. + */ + if (iattr->ia_size != ip->i_size && + (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { + iattr->ia_ctime = iattr->ia_mtime = + current_fs_time(inode->i_sb); + mask |= ATTR_CTIME | ATTR_MTIME; + } + + if (iattr->ia_size > ip->i_size) { + ip->i_d.di_size = iattr->ia_size; + ip->i_size = iattr->ia_size; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + } else if (iattr->ia_size <= ip->i_size || + (iattr->ia_size == 0 && ip->i_d.di_nextents)) { + /* + * signal a sync transaction unless + * we're truncating an already unlinked + * file on a wsync filesystem + */ + code = xfs_itruncate_finish(&tp, ip, iattr->ia_size, + XFS_DATA_FORK, + ((ip->i_d.di_nlink != 0 || + !(mp->m_flags & XFS_MOUNT_WSYNC)) + ? 1 : 0)); + if (code) + goto abort_return; + /* + * Truncated "down", so we're removing references + * to old data here - if we now delay flushing for + * a long time, we expose ourselves unduly to the + * notorious NULL files problem. So, we mark this + * vnode and flush it when the file is closed, and + * do not wait the usual (long) time for writeout. + */ + xfs_iflags_set(ip, XFS_ITRUNCATED); + } + } else if (tp) { + xfs_trans_ijoin(tp, ip); + } + + /* + * Change file ownership. Must be the owner or privileged. + */ + if (mask & (ATTR_UID|ATTR_GID)) { + /* + * CAP_FSETID overrides the following restrictions: + * + * The set-user-ID and set-group-ID bits of a file will be + * cleared upon successful return from chown() + */ + if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && + !capable(CAP_FSETID)) { + ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); + } + + /* + * Change the ownerships and register quota modifications + * in the transaction. + */ + if (iuid != uid) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { + ASSERT(mask & ATTR_UID); + ASSERT(udqp); + olddquot1 = xfs_qm_vop_chown(tp, ip, + &ip->i_udquot, udqp); + } + ip->i_d.di_uid = uid; + inode->i_uid = uid; + } + if (igid != gid) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { + ASSERT(!XFS_IS_PQUOTA_ON(mp)); + ASSERT(mask & ATTR_GID); + ASSERT(gdqp); + olddquot2 = xfs_qm_vop_chown(tp, ip, + &ip->i_gdquot, gdqp); + } + ip->i_d.di_gid = gid; + inode->i_gid = gid; + } + } + + /* + * Change file access modes. + */ + if (mask & ATTR_MODE) { + umode_t mode = iattr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + + ip->i_d.di_mode &= S_IFMT; + ip->i_d.di_mode |= mode & ~S_IFMT; + + inode->i_mode &= S_IFMT; + inode->i_mode |= mode & ~S_IFMT; + } + + /* + * Change file access or modified times. + */ + if (mask & ATTR_ATIME) { + inode->i_atime = iattr->ia_atime; + ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; + ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; + ip->i_update_core = 1; + } + if (mask & ATTR_CTIME) { + inode->i_ctime = iattr->ia_ctime; + ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; + ip->i_update_core = 1; + } + if (mask & ATTR_MTIME) { + inode->i_mtime = iattr->ia_mtime; + ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; + ip->i_update_core = 1; + } + + /* + * And finally, log the inode core if any attribute in it + * has been changed. + */ + if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE| + ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + XFS_STATS_INC(xs_ig_attrchg); + + /* + * If this is a synchronous mount, make sure that the + * transaction goes to disk before returning to the user. + * This is slightly sub-optimal in that truncates require + * two sync transactions instead of one for wsync filesystems. + * One for the truncate and one for the timestamps since we + * don't want to change the timestamps unless we're sure the + * truncate worked. Truncates are less than 1% of the laddis + * mix so this probably isn't worth the trouble to optimize. + */ + code = 0; + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + + code = xfs_trans_commit(tp, commit_flags); + + xfs_iunlock(ip, lock_flags); + + /* + * Release any dquot(s) the inode had kept before chown. + */ + xfs_qm_dqrele(olddquot1); + xfs_qm_dqrele(olddquot2); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + + if (code) + return code; + + /* + * XXX(hch): Updating the ACL entries is not atomic vs the i_mode + * update. We could avoid this with linked transactions + * and passing down the transaction pointer all the way + * to attr_set. No previous user of the generic + * Posix ACL code seems to care about this issue either. + */ + if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { + code = -xfs_acl_chmod(inode); + if (code) + return XFS_ERROR(code); + } + + return 0; + + abort_return: + commit_flags |= XFS_TRANS_ABORT; + error_return: + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + if (tp) { + xfs_trans_cancel(tp, commit_flags); + } + if (lock_flags != 0) { + xfs_iunlock(ip, lock_flags); + } + return code; +} + /* * The maximum pathlen is 1024 bytes. Since the minimum file system * blocksize is 512 bytes, we can get a max of 2 extents back from @@ -197,6 +621,13 @@ xfs_free_eofblocks( */ tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); + /* + * Do the xfs_itruncate_start() call before + * reserving any log space because + * itruncate_start will call into the buffer + * cache and we can't + * do that within a transaction. + */ if (flags & XFS_FREE_EOF_TRYLOCK) { if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { xfs_trans_cancel(tp, 0); @@ -205,6 +636,13 @@ xfs_free_eofblocks( } else { xfs_ilock(ip, XFS_IOLOCK_EXCL); } + error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, + ip->i_size); + if (error) { + xfs_trans_cancel(tp, 0); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return error; + } error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), @@ -220,12 +658,15 @@ xfs_free_eofblocks( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip); - error = xfs_itruncate_data(&tp, ip, ip->i_size); + error = xfs_itruncate_finish(&tp, ip, + ip->i_size, + XFS_DATA_FORK, + 0); + /* + * If we get an error at this point we + * simply don't bother truncating the file. + */ if (error) { - /* - * If we get an error at this point we simply don't - * bother truncating the file. - */ xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); @@ -643,9 +1084,20 @@ xfs_inactive( tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); if (truncate) { + /* + * Do the xfs_itruncate_start() call before + * reserving any log space because itruncate_start + * will call into the buffer cache and we can't + * do that within a transaction. + */ xfs_ilock(ip, XFS_IOLOCK_EXCL); - xfs_ioend_wait(ip); + error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); + if (error) { + xfs_trans_cancel(tp, 0); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return VN_INACTIVE_CACHE; + } error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), @@ -662,7 +1114,16 @@ xfs_inactive( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip); - error = xfs_itruncate_data(&tp, ip, 0); + /* + * normally, we have to run xfs_itruncate_finish sync. + * But if filesystem is wsync and we're in the inactive + * path, then we know that nlink == 0, and that the + * xaction that made nlink == 0 is permanently committed + * since xfs_remove runs as a synchronous transaction. + */ + error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, + (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0)); + if (error) { xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); @@ -1969,8 +2430,6 @@ xfs_zero_remaining_bytes( if (!bp) return XFS_ERROR(ENOMEM); - xfs_buf_unlock(bp); - for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; @@ -2325,7 +2784,7 @@ xfs_change_file_space( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = startoffset; - error = xfs_setattr_size(ip, &iattr, attr_flags); + error = xfs_setattr(ip, &iattr, attr_flags); if (error) return error; diff --git a/trunk/fs/xfs/xfs_vnodeops.h b/trunk/fs/xfs/xfs_vnodeops.h index 35d3d513e1e9..3bcd23353d6c 100644 --- a/trunk/fs/xfs/xfs_vnodeops.h +++ b/trunk/fs/xfs/xfs_vnodeops.h @@ -13,8 +13,7 @@ struct xfs_inode; struct xfs_iomap; -int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags); -int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags); +int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ diff --git a/trunk/include/linux/mfd/tmio.h b/trunk/include/linux/mfd/tmio.h index 0dc98044d8b7..5a90266c3a5a 100644 --- a/trunk/include/linux/mfd/tmio.h +++ b/trunk/include/linux/mfd/tmio.h @@ -68,11 +68,6 @@ * controller and report the event to the driver. */ #define TMIO_MMC_HAS_COLD_CD (1 << 3) -/* - * Some controllers require waiting for the SD bus to become - * idle before writing to some registers. - */ -#define TMIO_MMC_HAS_IDLE_WAIT (1 << 4) int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); @@ -85,8 +80,6 @@ struct tmio_mmc_dma { int alignment_shift; }; -struct tmio_mmc_host; - /* * data for the MMC controller */ @@ -101,7 +94,6 @@ struct tmio_mmc_data { void (*set_pwr)(struct platform_device *host, int state); void (*set_clk_div)(struct platform_device *host, int state); int (*get_cd)(struct platform_device *host); - int (*write16_hook)(struct tmio_mmc_host *host, int addr); }; static inline void tmio_mmc_cd_wakeup(struct tmio_mmc_data *pdata) diff --git a/trunk/include/linux/mmc/boot.h b/trunk/include/linux/mmc/boot.h index 23acc3baa07d..39d787c229cb 100644 --- a/trunk/include/linux/mmc/boot.h +++ b/trunk/include/linux/mmc/boot.h @@ -1,7 +1,7 @@ -#ifndef LINUX_MMC_BOOT_H -#define LINUX_MMC_BOOT_H +#ifndef MMC_BOOT_H +#define MMC_BOOT_H enum { MMC_PROGRESS_ENTER, MMC_PROGRESS_INIT, MMC_PROGRESS_LOAD, MMC_PROGRESS_DONE }; -#endif /* LINUX_MMC_BOOT_H */ +#endif diff --git a/trunk/include/linux/mmc/card.h b/trunk/include/linux/mmc/card.h index b460fc2af8a1..6ad43554ac05 100644 --- a/trunk/include/linux/mmc/card.h +++ b/trunk/include/linux/mmc/card.h @@ -403,4 +403,4 @@ extern void mmc_unregister_driver(struct mmc_driver *); extern void mmc_fixup_device(struct mmc_card *card, const struct mmc_fixup *table); -#endif /* LINUX_MMC_CARD_H */ +#endif diff --git a/trunk/include/linux/mmc/core.h b/trunk/include/linux/mmc/core.h index b8b1b7a311f1..b6718e549a51 100644 --- a/trunk/include/linux/mmc/core.h +++ b/trunk/include/linux/mmc/core.h @@ -117,7 +117,6 @@ struct mmc_data { unsigned int sg_len; /* size of scatter list */ struct scatterlist *sg; /* I/O scatter list */ - s32 host_cookie; /* host private data */ }; struct mmc_request { @@ -126,16 +125,13 @@ struct mmc_request { struct mmc_data *data; struct mmc_command *stop; - struct completion completion; + void *done_data; /* completion data */ void (*done)(struct mmc_request *);/* completion function */ }; struct mmc_host; struct mmc_card; -struct mmc_async_req; -extern struct mmc_async_req *mmc_start_req(struct mmc_host *, - struct mmc_async_req *, int *); extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *); extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int); extern int mmc_app_cmd(struct mmc_host *, struct mmc_card *); @@ -159,7 +155,6 @@ extern int mmc_can_trim(struct mmc_card *card); extern int mmc_can_secure_erase_trim(struct mmc_card *card); extern int mmc_erase_group_aligned(struct mmc_card *card, unsigned int from, unsigned int nr); -extern unsigned int mmc_calc_max_discard(struct mmc_card *card); extern int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen); @@ -184,4 +179,4 @@ static inline void mmc_claim_host(struct mmc_host *host) extern u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max); -#endif /* LINUX_MMC_CORE_H */ +#endif diff --git a/trunk/include/linux/mmc/dw_mmc.h b/trunk/include/linux/mmc/dw_mmc.h index 6b46819705d1..bdd7ceeb99e4 100644 --- a/trunk/include/linux/mmc/dw_mmc.h +++ b/trunk/include/linux/mmc/dw_mmc.h @@ -11,8 +11,8 @@ * (at your option) any later version. */ -#ifndef LINUX_MMC_DW_MMC_H -#define LINUX_MMC_DW_MMC_H +#ifndef _LINUX_MMC_DW_MMC_H_ +#define _LINUX_MMC_DW_MMC_H_ #define MAX_MCI_SLOTS 2 @@ -48,7 +48,6 @@ struct mmc_data; * @data: The data currently being transferred, or NULL if no data * transfer is in progress. * @use_dma: Whether DMA channel is initialized or not. - * @using_dma: Whether DMA is in use for the current transfer. * @sg_dma: Bus address of DMA buffer. * @sg_cpu: Virtual address of DMA buffer. * @dma_ops: Pointer to platform-specific DMA callbacks. @@ -75,11 +74,7 @@ struct mmc_data; * @pdev: Platform device associated with the MMC controller. * @pdata: Platform data associated with the MMC controller. * @slot: Slots sharing this MMC controller. - * @fifo_depth: depth of FIFO. * @data_shift: log2 of FIFO item size. - * @part_buf_start: Start index in part_buf. - * @part_buf_count: Bytes of partial data in part_buf. - * @part_buf: Simple buffer for partial fifo reads/writes. * @push_data: Pointer to FIFO push function. * @pull_data: Pointer to FIFO pull function. * @quirks: Set of quirks that apply to specific versions of the IP. @@ -122,7 +117,6 @@ struct dw_mci { /* DMA interface members*/ int use_dma; - int using_dma; dma_addr_t sg_dma; void *sg_cpu; @@ -137,7 +131,7 @@ struct dw_mci { u32 stop_cmdr; u32 dir_status; struct tasklet_struct tasklet; - struct work_struct card_work; + struct tasklet_struct card_tasklet; unsigned long pending_events; unsigned long completed_events; enum dw_mci_state state; @@ -152,15 +146,7 @@ struct dw_mci { struct dw_mci_slot *slot[MAX_MCI_SLOTS]; /* FIFO push and pull */ - int fifo_depth; int data_shift; - u8 part_buf_start; - u8 part_buf_count; - union { - u16 part_buf16; - u32 part_buf32; - u64 part_buf; - }; void (*push_data)(struct dw_mci *host, void *buf, int cnt); void (*pull_data)(struct dw_mci *host, void *buf, int cnt); @@ -210,12 +196,6 @@ struct dw_mci_board { unsigned int bus_hz; /* Bus speed */ unsigned int caps; /* Capabilities */ - /* - * Override fifo depth. If 0, autodetect it from the FIFOTH register, - * but note that this may not be reliable after a bootloader has used - * it. - */ - unsigned int fifo_depth; /* delay in mS before detecting cards after interrupt */ u32 detect_delay_ms; @@ -239,4 +219,4 @@ struct dw_mci_board { struct block_settings *blk_settings; }; -#endif /* LINUX_MMC_DW_MMC_H */ +#endif /* _LINUX_MMC_DW_MMC_H_ */ diff --git a/trunk/include/linux/mmc/host.h b/trunk/include/linux/mmc/host.h index 0f83858147a6..1ee4424462eb 100644 --- a/trunk/include/linux/mmc/host.h +++ b/trunk/include/linux/mmc/host.h @@ -106,15 +106,6 @@ struct mmc_host_ops { */ int (*enable)(struct mmc_host *host); int (*disable)(struct mmc_host *host, int lazy); - /* - * It is optional for the host to implement pre_req and post_req in - * order to support double buffering of requests (prepare one - * request while another request is active). - */ - void (*post_req)(struct mmc_host *host, struct mmc_request *req, - int err); - void (*pre_req)(struct mmc_host *host, struct mmc_request *req, - bool is_first_req); void (*request)(struct mmc_host *host, struct mmc_request *req); /* * Avoid calling these three functions too often or in a "fast path", @@ -148,22 +139,11 @@ struct mmc_host_ops { int (*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios); int (*execute_tuning)(struct mmc_host *host); void (*enable_preset_value)(struct mmc_host *host, bool enable); - int (*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv); }; struct mmc_card; struct device; -struct mmc_async_req { - /* active mmc request */ - struct mmc_request *mrq; - /* - * Check error status of completed mmc request. - * Returns 0 if success otherwise non zero. - */ - int (*err_check) (struct mmc_card *, struct mmc_async_req *); -}; - struct mmc_host { struct device *parent; struct device class_dev; @@ -251,7 +231,6 @@ struct mmc_host { unsigned int max_req_size; /* maximum number of bytes in one req */ unsigned int max_blk_size; /* maximum size of one mmc block */ unsigned int max_blk_count; /* maximum number of blocks in one req */ - unsigned int max_discard_to; /* max. discard timeout in ms */ /* private data */ spinlock_t lock; /* lock for claim and bus ops */ @@ -302,8 +281,6 @@ struct mmc_host { struct dentry *debugfs_root; - struct mmc_async_req *areq; /* active async req */ - unsigned long private[0] ____cacheline_aligned; }; @@ -396,4 +373,5 @@ static inline int mmc_host_cmd23(struct mmc_host *host) { return host->caps & MMC_CAP_CMD23; } -#endif /* LINUX_MMC_HOST_H */ +#endif + diff --git a/trunk/include/linux/mmc/ioctl.h b/trunk/include/linux/mmc/ioctl.h index 8fa5bc5f8059..5baf2983a12f 100644 --- a/trunk/include/linux/mmc/ioctl.h +++ b/trunk/include/linux/mmc/ioctl.h @@ -51,4 +51,4 @@ struct mmc_ioc_cmd { * block device operations. */ #define MMC_IOC_MAX_BYTES (512L * 256) -#endif /* LINUX_MMC_IOCTL_H */ +#endif /* LINUX_MMC_IOCTL_H */ diff --git a/trunk/include/linux/mmc/mmc.h b/trunk/include/linux/mmc/mmc.h index 5a794cb503ea..ac26a685cca8 100644 --- a/trunk/include/linux/mmc/mmc.h +++ b/trunk/include/linux/mmc/mmc.h @@ -21,8 +21,8 @@ * 15 May 2002 */ -#ifndef LINUX_MMC_MMC_H -#define LINUX_MMC_MMC_H +#ifndef MMC_MMC_H +#define MMC_MMC_H /* Standard MMC commands (4.1) type argument response */ /* class 1 */ @@ -140,16 +140,6 @@ static inline bool mmc_op_multi(u32 opcode) #define R1_SWITCH_ERROR (1 << 7) /* sx, c */ #define R1_APP_CMD (1 << 5) /* sr, c */ -#define R1_STATE_IDLE 0 -#define R1_STATE_READY 1 -#define R1_STATE_IDENT 2 -#define R1_STATE_STBY 3 -#define R1_STATE_TRAN 4 -#define R1_STATE_DATA 5 -#define R1_STATE_RCV 6 -#define R1_STATE_PRG 7 -#define R1_STATE_DIS 8 - /* * MMC/SD in SPI mode reports R1 status always, and R2 for SEND_STATUS * R1 is the low order byte; R2 is the next highest byte, when present. @@ -337,4 +327,5 @@ struct _mmc_csd { #define MMC_SWITCH_MODE_CLEAR_BITS 0x02 /* Clear bits which are 1 in value */ #define MMC_SWITCH_MODE_WRITE_BYTE 0x03 /* Set target to value */ -#endif /* LINUX_MMC_MMC_H */ +#endif /* MMC_MMC_PROTOCOL_H */ + diff --git a/trunk/include/linux/mmc/pm.h b/trunk/include/linux/mmc/pm.h index 4a139204c20c..d37aac49cf9a 100644 --- a/trunk/include/linux/mmc/pm.h +++ b/trunk/include/linux/mmc/pm.h @@ -27,4 +27,4 @@ typedef unsigned int mmc_pm_flag_t; #define MMC_PM_KEEP_POWER (1 << 0) /* preserve card power during suspend */ #define MMC_PM_WAKE_SDIO_IRQ (1 << 1) /* wake up host system on SDIO IRQ assertion */ -#endif /* LINUX_MMC_PM_H */ +#endif diff --git a/trunk/include/linux/mmc/sd.h b/trunk/include/linux/mmc/sd.h index 1ebcf9ba1256..7d35d52c3df3 100644 --- a/trunk/include/linux/mmc/sd.h +++ b/trunk/include/linux/mmc/sd.h @@ -9,8 +9,8 @@ * your option) any later version. */ -#ifndef LINUX_MMC_SD_H -#define LINUX_MMC_SD_H +#ifndef MMC_SD_H +#define MMC_SD_H /* SD commands type argument response */ /* class 0 */ @@ -91,4 +91,5 @@ #define SD_SWITCH_ACCESS_DEF 0 #define SD_SWITCH_ACCESS_HS 1 -#endif /* LINUX_MMC_SD_H */ +#endif + diff --git a/trunk/include/linux/mmc/sdhci-pltfm.h b/trunk/include/linux/mmc/sdhci-pltfm.h new file mode 100644 index 000000000000..548d59d404cb --- /dev/null +++ b/trunk/include/linux/mmc/sdhci-pltfm.h @@ -0,0 +1,35 @@ +/* + * Platform data declarations for the sdhci-pltfm driver. + * + * Copyright (c) 2010 MontaVista Software, LLC. + * + * Author: Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + */ + +#ifndef _SDHCI_PLTFM_H +#define _SDHCI_PLTFM_H + +struct sdhci_ops; +struct sdhci_host; + +/** + * struct sdhci_pltfm_data - SDHCI platform-specific information & hooks + * @ops: optional pointer to the platform-provided SDHCI ops + * @quirks: optional SDHCI quirks + * @init: optional hook that is called during device probe, before the + * driver tries to access any SDHCI registers + * @exit: optional hook that is called during device removal + */ +struct sdhci_pltfm_data { + struct sdhci_ops *ops; + unsigned int quirks; + int (*init)(struct sdhci_host *host, struct sdhci_pltfm_data *pdata); + void (*exit)(struct sdhci_host *host); +}; + +#endif /* _SDHCI_PLTFM_H */ diff --git a/trunk/include/linux/mmc/sdhci-spear.h b/trunk/include/linux/mmc/sdhci-spear.h index 5cdc96da9dd5..9188c973f3e1 100644 --- a/trunk/include/linux/mmc/sdhci-spear.h +++ b/trunk/include/linux/mmc/sdhci-spear.h @@ -11,8 +11,8 @@ * warranty of any kind, whether express or implied. */ -#ifndef LINUX_MMC_SDHCI_SPEAR_H -#define LINUX_MMC_SDHCI_SPEAR_H +#ifndef MMC_SDHCI_SPEAR_H +#define MMC_SDHCI_SPEAR_H #include /* @@ -39,4 +39,4 @@ sdhci_set_plat_data(struct platform_device *pdev, struct sdhci_plat_data *data) pdev->dev.platform_data = data; } -#endif /* LINUX_MMC_SDHCI_SPEAR_H */ +#endif /* MMC_SDHCI_SPEAR_H */ diff --git a/trunk/include/linux/mmc/sdhci.h b/trunk/include/linux/mmc/sdhci.h index 5666f3abfab7..6a68c4eb4e44 100644 --- a/trunk/include/linux/mmc/sdhci.h +++ b/trunk/include/linux/mmc/sdhci.h @@ -8,8 +8,8 @@ * the Free Software Foundation; either version 2 of the License, or (at * your option) any later version. */ -#ifndef LINUX_MMC_SDHCI_H -#define LINUX_MMC_SDHCI_H +#ifndef __SDHCI_H +#define __SDHCI_H #include #include @@ -162,4 +162,4 @@ struct sdhci_host { unsigned long private[0] ____cacheline_aligned; }; -#endif /* LINUX_MMC_SDHCI_H */ +#endif /* __SDHCI_H */ diff --git a/trunk/include/linux/mmc/sdio.h b/trunk/include/linux/mmc/sdio.h index 2a2e9905a247..245cdacee544 100644 --- a/trunk/include/linux/mmc/sdio.h +++ b/trunk/include/linux/mmc/sdio.h @@ -9,8 +9,8 @@ * your option) any later version. */ -#ifndef LINUX_MMC_SDIO_H -#define LINUX_MMC_SDIO_H +#ifndef MMC_SDIO_H +#define MMC_SDIO_H /* SDIO commands type argument response */ #define SD_IO_SEND_OP_COND 5 /* bcr [23:0] OCR R4 */ @@ -161,4 +161,5 @@ #define SDIO_FBR_BLKSIZE 0x10 /* block size (2 bytes) */ -#endif /* LINUX_MMC_SDIO_H */ +#endif + diff --git a/trunk/include/linux/mmc/sdio_func.h b/trunk/include/linux/mmc/sdio_func.h index 50f0bc952328..31baaf82f458 100644 --- a/trunk/include/linux/mmc/sdio_func.h +++ b/trunk/include/linux/mmc/sdio_func.h @@ -9,8 +9,8 @@ * your option) any later version. */ -#ifndef LINUX_MMC_SDIO_FUNC_H -#define LINUX_MMC_SDIO_FUNC_H +#ifndef MMC_SDIO_FUNC_H +#define MMC_SDIO_FUNC_H #include #include @@ -161,4 +161,5 @@ extern void sdio_f0_writeb(struct sdio_func *func, unsigned char b, extern mmc_pm_flag_t sdio_get_host_pm_caps(struct sdio_func *func); extern int sdio_set_host_pm_flags(struct sdio_func *func, mmc_pm_flag_t flags); -#endif /* LINUX_MMC_SDIO_FUNC_H */ +#endif + diff --git a/trunk/include/linux/mmc/sdio_ids.h b/trunk/include/linux/mmc/sdio_ids.h index 9f03feedc8e7..a36ab3bc7b03 100644 --- a/trunk/include/linux/mmc/sdio_ids.h +++ b/trunk/include/linux/mmc/sdio_ids.h @@ -2,8 +2,8 @@ * SDIO Classes, Interface Types, Manufacturer IDs, etc. */ -#ifndef LINUX_MMC_SDIO_IDS_H -#define LINUX_MMC_SDIO_IDS_H +#ifndef MMC_SDIO_IDS_H +#define MMC_SDIO_IDS_H /* * Standard SDIO Function Interfaces @@ -44,4 +44,4 @@ #define SDIO_DEVICE_ID_SIANO_NOVA_A0 0x1100 #define SDIO_DEVICE_ID_SIANO_STELLAR 0x5347 -#endif /* LINUX_MMC_SDIO_IDS_H */ +#endif diff --git a/trunk/include/linux/mmc/sh_mmcif.h b/trunk/include/linux/mmc/sh_mmcif.h index 0222cd8ebe76..9eb9b4b96f55 100644 --- a/trunk/include/linux/mmc/sh_mmcif.h +++ b/trunk/include/linux/mmc/sh_mmcif.h @@ -11,8 +11,8 @@ * */ -#ifndef LINUX_MMC_SH_MMCIF_H -#define LINUX_MMC_SH_MMCIF_H +#ifndef __SH_MMCIF_H__ +#define __SH_MMCIF_H__ #include #include @@ -220,4 +220,4 @@ static inline void sh_mmcif_boot_init(void __iomem *base) sh_mmcif_boot_cmd(base, 0x03400040, 0x00010000); } -#endif /* LINUX_MMC_SH_MMCIF_H */ +#endif /* __SH_MMCIF_H__ */ diff --git a/trunk/include/linux/mmc/sh_mobile_sdhi.h b/trunk/include/linux/mmc/sh_mobile_sdhi.h index bd50b365167f..faf32b6ec185 100644 --- a/trunk/include/linux/mmc/sh_mobile_sdhi.h +++ b/trunk/include/linux/mmc/sh_mobile_sdhi.h @@ -1,5 +1,5 @@ -#ifndef LINUX_MMC_SH_MOBILE_SDHI_H -#define LINUX_MMC_SH_MOBILE_SDHI_H +#ifndef __SH_MOBILE_SDHI_H__ +#define __SH_MOBILE_SDHI_H__ #include @@ -17,4 +17,4 @@ struct sh_mobile_sdhi_info { int (*get_cd)(struct platform_device *pdev); }; -#endif /* LINUX_MMC_SH_MOBILE_SDHI_H */ +#endif /* __SH_MOBILE_SDHI_H__ */ diff --git a/trunk/include/linux/mmc/tmio.h b/trunk/include/linux/mmc/tmio.h index a1c1f321e519..19490b942db0 100644 --- a/trunk/include/linux/mmc/tmio.h +++ b/trunk/include/linux/mmc/tmio.h @@ -12,8 +12,8 @@ * * TC6393XB TC6391XB TC6387XB T7L66XB ASIC3 */ -#ifndef LINUX_MMC_TMIO_H -#define LINUX_MMC_TMIO_H +#ifndef _LINUX_MMC_TMIO_H_ +#define _LINUX_MMC_TMIO_H_ #define CTL_SD_CMD 0x00 #define CTL_ARG_REG 0x04 @@ -21,7 +21,6 @@ #define CTL_XFER_BLK_COUNT 0xa #define CTL_RESPONSE 0x0c #define CTL_STATUS 0x1c -#define CTL_STATUS2 0x1e #define CTL_IRQ_MASK 0x20 #define CTL_SD_CARD_CLK_CTL 0x24 #define CTL_SD_XFER_LEN 0x26 @@ -31,7 +30,6 @@ #define CTL_TRANSACTION_CTL 0x34 #define CTL_SDIO_STATUS 0x36 #define CTL_SDIO_IRQ_MASK 0x38 -#define CTL_DMA_ENABLE 0xd8 #define CTL_RESET_SD 0xe0 #define CTL_SDIO_REGS 0x100 #define CTL_CLK_AND_WAIT_CTL 0x138 @@ -62,4 +60,4 @@ #define TMIO_BBS 512 /* Boot block size */ -#endif /* LINUX_MMC_TMIO_H */ +#endif /* _LINUX_MMC_TMIO_H_ */ diff --git a/trunk/include/linux/mtd/ubi.h b/trunk/include/linux/mtd/ubi.h index db4836bed514..15da0e99f48a 100644 --- a/trunk/include/linux/mtd/ubi.h +++ b/trunk/include/linux/mtd/ubi.h @@ -155,14 +155,12 @@ struct ubi_device_info { }; /* - * Volume notification types. - * @UBI_VOLUME_ADDED: a volume has been added (an UBI device was attached or a - * volume was created) - * @UBI_VOLUME_REMOVED: a volume has been removed (an UBI device was detached - * or a volume was removed) - * @UBI_VOLUME_RESIZED: a volume has been re-sized - * @UBI_VOLUME_RENAMED: a volume has been re-named - * @UBI_VOLUME_UPDATED: data has been written to a volume + * enum - volume notification types. + * @UBI_VOLUME_ADDED: volume has been added + * @UBI_VOLUME_REMOVED: start volume volume + * @UBI_VOLUME_RESIZED: volume size has been re-sized + * @UBI_VOLUME_RENAMED: volume name has been re-named + * @UBI_VOLUME_UPDATED: volume name has been updated * * These constants define which type of event has happened when a volume * notification function is invoked. diff --git a/trunk/include/linux/platform_data/pxa_sdhci.h b/trunk/include/linux/platform_data/pxa_sdhci.h deleted file mode 100644 index 51ad0995abac..000000000000 --- a/trunk/include/linux/platform_data/pxa_sdhci.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * include/linux/platform_data/pxa_sdhci.h - * - * Copyright 2010 Marvell - * Zhangfei Gao - * - * PXA Platform - SDHCI platform data definitions - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _PXA_SDHCI_H_ -#define _PXA_SDHCI_H_ - -/* pxa specific flag */ -/* Require clock free running */ -#define PXA_FLAG_ENABLE_CLOCK_GATING (1<<0) -/* card always wired to host, like on-chip emmc */ -#define PXA_FLAG_CARD_PERMANENT (1<<1) -/* Board design supports 8-bit data on SD/SDIO BUS */ -#define PXA_FLAG_SD_8_BIT_CAPABLE_SLOT (1<<2) - -/* - * struct pxa_sdhci_platdata() - Platform device data for PXA SDHCI - * @flags: flags for platform requirement - * @clk_delay_cycles: - * mmp2: each step is roughly 100ps, 5bits width - * pxa910: each step is 1ns, 4bits width - * @clk_delay_sel: select clk_delay, used on pxa910 - * 0: choose feedback clk - * 1: choose feedback clk + delay value - * 2: choose internal clk - * @clk_delay_enable: enable clk_delay or not, used on pxa910 - * @ext_cd_gpio: gpio pin used for external CD line - * @ext_cd_gpio_invert: invert values for external CD gpio line - * @max_speed: the maximum speed supported - * @host_caps: Standard MMC host capabilities bit field. - * @quirks: quirks of platfrom - * @pm_caps: pm_caps of platfrom - */ -struct sdhci_pxa_platdata { - unsigned int flags; - unsigned int clk_delay_cycles; - unsigned int clk_delay_sel; - bool clk_delay_enable; - unsigned int ext_cd_gpio; - bool ext_cd_gpio_invert; - unsigned int max_speed; - unsigned int host_caps; - unsigned int quirks; - unsigned int pm_caps; -}; - -struct sdhci_pxa { - u8 clk_enable; - u8 power_mode; -}; -#endif /* _PXA_SDHCI_H_ */ diff --git a/trunk/include/linux/slab.h b/trunk/include/linux/slab.h index 573c809c33d9..ad4dd1c8d30a 100644 --- a/trunk/include/linux/slab.h +++ b/trunk/include/linux/slab.h @@ -133,26 +133,6 @@ unsigned int kmem_cache_size(struct kmem_cache *); #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_HIGH) #define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_HIGH - PAGE_SHIFT) -/* - * Some archs want to perform DMA into kmalloc caches and need a guaranteed - * alignment larger than the alignment of a 64-bit integer. - * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that. - */ -#ifdef ARCH_DMA_MINALIGN -#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN -#else -#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) -#endif - -/* - * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. - * Intended for arches that get misalignment faults even for 64 bit integer - * aligned buffers. - */ -#ifndef ARCH_SLAB_MINALIGN -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) -#endif - /* * Common kmalloc functions provided by all allocators */ diff --git a/trunk/include/linux/slab_def.h b/trunk/include/linux/slab_def.h index d00e0bacda93..83203ae9390b 100644 --- a/trunk/include/linux/slab_def.h +++ b/trunk/include/linux/slab_def.h @@ -17,6 +17,32 @@ #include +/* + * Enforce a minimum alignment for the kmalloc caches. + * Usually, the kmalloc caches are cache_line_size() aligned, except when + * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned. + * Some archs want to perform DMA into kmalloc caches and need a guaranteed + * alignment larger than the alignment of a 64-bit integer. + * ARCH_KMALLOC_MINALIGN allows that. + * Note that increasing this value may disable some debug features. + */ +#ifdef ARCH_DMA_MINALIGN +#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN +#else +#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) +#endif + +#ifndef ARCH_SLAB_MINALIGN +/* + * Enforce a minimum alignment for all caches. + * Intended for archs that get misalignment faults even for BYTES_PER_WORD + * aligned buffers. Includes ARCH_KMALLOC_MINALIGN. + * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables + * some debug features. + */ +#define ARCH_SLAB_MINALIGN 0 +#endif + /* * struct kmem_cache * @@ -24,19 +50,21 @@ */ struct kmem_cache { -/* 1) Cache tunables. Protected by cache_chain_mutex */ +/* 1) per-cpu data, touched during every alloc/free */ + struct array_cache *array[NR_CPUS]; +/* 2) Cache tunables. Protected by cache_chain_mutex */ unsigned int batchcount; unsigned int limit; unsigned int shared; unsigned int buffer_size; u32 reciprocal_buffer_size; -/* 2) touched by every alloc & free from the backend */ +/* 3) touched by every alloc & free from the backend */ unsigned int flags; /* constant flags */ unsigned int num; /* # of objs per slab */ -/* 3) cache_grow/shrink */ +/* 4) cache_grow/shrink */ /* order of pgs per slab (2^n) */ unsigned int gfporder; @@ -52,11 +80,11 @@ struct kmem_cache { /* constructor func */ void (*ctor)(void *obj); -/* 4) cache creation/removal */ +/* 5) cache creation/removal */ const char *name; struct list_head next; -/* 5) statistics */ +/* 6) statistics */ #ifdef CONFIG_DEBUG_SLAB unsigned long num_active; unsigned long num_allocations; @@ -83,18 +111,16 @@ struct kmem_cache { int obj_size; #endif /* CONFIG_DEBUG_SLAB */ -/* 6) per-cpu/per-node data, touched during every alloc/free */ /* - * We put array[] at the end of kmem_cache, because we want to size - * this array to nr_cpu_ids slots instead of NR_CPUS + * We put nodelists[] at the end of kmem_cache, because we want to size + * this array to nr_node_ids slots instead of MAX_NUMNODES * (see kmem_cache_init()) - * We still use [NR_CPUS] and not [1] or [0] because cache_cache - * is statically defined, so we reserve the max number of cpus. + * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache + * is statically defined, so we reserve the max number of nodes. */ - struct kmem_list3 **nodelists; - struct array_cache *array[NR_CPUS]; + struct kmem_list3 *nodelists[MAX_NUMNODES]; /* - * Do not add fields after array[] + * Do not add fields after nodelists[] */ }; diff --git a/trunk/include/linux/slob_def.h b/trunk/include/linux/slob_def.h index 0ec00b39d006..4382db09df4f 100644 --- a/trunk/include/linux/slob_def.h +++ b/trunk/include/linux/slob_def.h @@ -1,6 +1,16 @@ #ifndef __LINUX_SLOB_DEF_H #define __LINUX_SLOB_DEF_H +#ifdef ARCH_DMA_MINALIGN +#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN +#else +#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long) +#endif + +#ifndef ARCH_SLAB_MINALIGN +#define ARCH_SLAB_MINALIGN __alignof__(unsigned long) +#endif + void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep, diff --git a/trunk/include/linux/slub_def.h b/trunk/include/linux/slub_def.h index 4b35c06dfbc5..c8668d161dd8 100644 --- a/trunk/include/linux/slub_def.h +++ b/trunk/include/linux/slub_def.h @@ -113,6 +113,16 @@ struct kmem_cache { #define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) +#ifdef ARCH_DMA_MINALIGN +#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN +#else +#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) +#endif + +#ifndef ARCH_SLAB_MINALIGN +#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) +#endif + /* * Maximum kmalloc object size handled by SLUB. Larger object allocations * are passed through to the page allocator. The page allocator "fastpath" @@ -218,19 +228,6 @@ kmalloc_order(size_t size, gfp_t flags, unsigned int order) return ret; } -/** - * Calling this on allocated memory will check that the memory - * is expected to be in use, and print warnings if not. - */ -#ifdef CONFIG_SLUB_DEBUG -extern bool verify_mem_not_deleted(const void *x); -#else -static inline bool verify_mem_not_deleted(const void *x) -{ - return true; -} -#endif - #ifdef CONFIG_TRACING extern void * kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size); diff --git a/trunk/include/xen/balloon.h b/trunk/include/xen/balloon.h index 4076ed72afbd..a2b22f01a51d 100644 --- a/trunk/include/xen/balloon.h +++ b/trunk/include/xen/balloon.h @@ -23,13 +23,3 @@ void balloon_set_new_target(unsigned long target); int alloc_xenballooned_pages(int nr_pages, struct page** pages); void free_xenballooned_pages(int nr_pages, struct page** pages); - -struct sys_device; -#ifdef CONFIG_XEN_SELFBALLOONING -extern int register_xen_selfballooning(struct sys_device *sysdev); -#else -static inline int register_xen_selfballooning(struct sys_device *sysdev) -{ - return -ENOSYS; -} -#endif diff --git a/trunk/include/xen/events.h b/trunk/include/xen/events.h index d287997d3eab..9af21e19545a 100644 --- a/trunk/include/xen/events.h +++ b/trunk/include/xen/events.h @@ -74,6 +74,8 @@ int xen_set_callback_via(uint64_t via); void xen_evtchn_do_upcall(struct pt_regs *regs); void xen_hvm_evtchn_do_upcall(void); +/* Allocate a pirq for a physical interrupt, given a gsi. */ +int xen_allocate_pirq_gsi(unsigned gsi); /* Bind a pirq for a physical interrupt to an irq. */ int xen_bind_pirq_gsi_to_irq(unsigned gsi, unsigned pirq, int shareable, char *name); diff --git a/trunk/include/xen/hvc-console.h b/trunk/include/xen/hvc-console.h index 901724dc528d..c3adde32669b 100644 --- a/trunk/include/xen/hvc-console.h +++ b/trunk/include/xen/hvc-console.h @@ -6,13 +6,11 @@ extern struct console xenboot_console; #ifdef CONFIG_HVC_XEN void xen_console_resume(void); void xen_raw_console_write(const char *str); -__attribute__((format(printf, 1, 2))) void xen_raw_printk(const char *fmt, ...); #else static inline void xen_console_resume(void) { } static inline void xen_raw_console_write(const char *str) { } -static inline __attribute__((format(printf, 1, 2))) -void xen_raw_printk(const char *fmt, ...) { } +static inline void xen_raw_printk(const char *fmt, ...) { } #endif #endif /* XEN_HVC_CONSOLE_H */ diff --git a/trunk/include/xen/interface/xen.h b/trunk/include/xen/interface/xen.h index 6acd9cefd517..70213b4515eb 100644 --- a/trunk/include/xen/interface/xen.h +++ b/trunk/include/xen/interface/xen.h @@ -450,45 +450,6 @@ struct start_info { int8_t cmd_line[MAX_GUEST_CMDLINE]; }; -struct dom0_vga_console_info { - uint8_t video_type; -#define XEN_VGATYPE_TEXT_MODE_3 0x03 -#define XEN_VGATYPE_VESA_LFB 0x23 - - union { - struct { - /* Font height, in pixels. */ - uint16_t font_height; - /* Cursor location (column, row). */ - uint16_t cursor_x, cursor_y; - /* Number of rows and columns (dimensions in characters). */ - uint16_t rows, columns; - } text_mode_3; - - struct { - /* Width and height, in pixels. */ - uint16_t width, height; - /* Bytes per scan line. */ - uint16_t bytes_per_line; - /* Bits per pixel. */ - uint16_t bits_per_pixel; - /* LFB physical address, and size (in units of 64kB). */ - uint32_t lfb_base; - uint32_t lfb_size; - /* RGB mask offsets and sizes, as defined by VBE 1.2+ */ - uint8_t red_pos, red_size; - uint8_t green_pos, green_size; - uint8_t blue_pos, blue_size; - uint8_t rsvd_pos, rsvd_size; - - /* VESA capabilities (offset 0xa, VESA command 0x4f00). */ - uint32_t gbl_caps; - /* Mode attributes (offset 0x0, VESA command 0x4f01). */ - uint16_t mode_attrs; - } vesa_lfb; - } u; -}; - /* These flags are passed in the 'flags' field of start_info_t. */ #define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ #define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ diff --git a/trunk/include/xen/tmem.h b/trunk/include/xen/tmem.h deleted file mode 100644 index 82e2c83a32f5..000000000000 --- a/trunk/include/xen/tmem.h +++ /dev/null @@ -1,5 +0,0 @@ -#ifndef _XEN_TMEM_H -#define _XEN_TMEM_H -/* defined in drivers/xen/tmem.c */ -extern int tmem_enabled; -#endif /* _XEN_TMEM_H */ diff --git a/trunk/include/xen/xenbus.h b/trunk/include/xen/xenbus.h index aceeca799fd7..5467369e0889 100644 --- a/trunk/include/xen/xenbus.h +++ b/trunk/include/xen/xenbus.h @@ -223,9 +223,7 @@ int xenbus_free_evtchn(struct xenbus_device *dev, int port); enum xenbus_state xenbus_read_driver_state(const char *path); -__attribute__((format(printf, 3, 4))) void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...); -__attribute__((format(printf, 3, 4))) void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...); const char *xenbus_strstate(enum xenbus_state state); diff --git a/trunk/mm/slab.c b/trunk/mm/slab.c index 1e523ed47c61..d96e223de775 100644 --- a/trunk/mm/slab.c +++ b/trunk/mm/slab.c @@ -574,9 +574,7 @@ static struct arraycache_init initarray_generic = { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; /* internal cache of cache description objs */ -static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES]; static struct kmem_cache cache_cache = { - .nodelists = cache_cache_nodelists, .batchcount = 1, .limit = BOOT_CPUCACHE_ENTRIES, .shared = 1, @@ -1494,10 +1492,11 @@ void __init kmem_cache_init(void) cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; /* - * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids + * struct kmem_cache size depends on nr_node_ids, which + * can be less than MAX_NUMNODES. */ - cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) + - nr_node_ids * sizeof(struct kmem_list3 *); + cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) + + nr_node_ids * sizeof(struct kmem_list3 *); #if DEBUG cache_cache.obj_size = cache_cache.buffer_size; #endif @@ -2309,7 +2308,6 @@ kmem_cache_create (const char *name, size_t size, size_t align, if (!cachep) goto oops; - cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; #if DEBUG cachep->obj_size = size; @@ -3155,11 +3153,12 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, objp += obj_offset(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) cachep->ctor(objp); - if (ARCH_SLAB_MINALIGN && - ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) { +#if ARCH_SLAB_MINALIGN + if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", - objp, (int)ARCH_SLAB_MINALIGN); + objp, ARCH_SLAB_MINALIGN); } +#endif return objp; } #else diff --git a/trunk/mm/slob.c b/trunk/mm/slob.c index 0ae881831ae2..46e0aee33a23 100644 --- a/trunk/mm/slob.c +++ b/trunk/mm/slob.c @@ -482,8 +482,6 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node) int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); void *ret; - gfp &= gfp_allowed_mask; - lockdep_trace_alloc(gfp); if (size < PAGE_SIZE - align) { @@ -610,10 +608,6 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) { void *b; - flags &= gfp_allowed_mask; - - lockdep_trace_alloc(flags); - if (c->size < PAGE_SIZE) { b = slob_alloc(c->size, flags, c->align, node); trace_kmem_cache_alloc_node(_RET_IP_, b, c->size, diff --git a/trunk/mm/slub.c b/trunk/mm/slub.c index ba83f3fd0757..35f351f26193 100644 --- a/trunk/mm/slub.c +++ b/trunk/mm/slub.c @@ -27,7 +27,6 @@ #include #include #include -#include #include @@ -192,12 +191,8 @@ static LIST_HEAD(slab_caches); /* * Tracking user of a slab. */ -#define TRACK_ADDRS_COUNT 16 struct track { unsigned long addr; /* Called from address */ -#ifdef CONFIG_STACKTRACE - unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */ -#endif int cpu; /* Was running on cpu */ int pid; /* Pid context */ unsigned long when; /* When did the operation occur */ @@ -425,24 +420,6 @@ static void set_track(struct kmem_cache *s, void *object, struct track *p = get_track(s, object, alloc); if (addr) { -#ifdef CONFIG_STACKTRACE - struct stack_trace trace; - int i; - - trace.nr_entries = 0; - trace.max_entries = TRACK_ADDRS_COUNT; - trace.entries = p->addrs; - trace.skip = 3; - save_stack_trace(&trace); - - /* See rant in lockdep.c */ - if (trace.nr_entries != 0 && - trace.entries[trace.nr_entries - 1] == ULONG_MAX) - trace.nr_entries--; - - for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++) - p->addrs[i] = 0; -#endif p->addr = addr; p->cpu = smp_processor_id(); p->pid = current->pid; @@ -467,16 +444,6 @@ static void print_track(const char *s, struct track *t) printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n", s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid); -#ifdef CONFIG_STACKTRACE - { - int i; - for (i = 0; i < TRACK_ADDRS_COUNT; i++) - if (t->addrs[i]) - printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]); - else - break; - } -#endif } static void print_tracking(struct kmem_cache *s, void *object) @@ -590,10 +557,10 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) memset(p + s->objsize, val, s->inuse - s->objsize); } -static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes) +static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes) { while (bytes) { - if (*start != value) + if (*start != (u8)value) return start; start++; bytes--; @@ -601,38 +568,6 @@ static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes) return NULL; } -static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes) -{ - u64 value64; - unsigned int words, prefix; - - if (bytes <= 16) - return check_bytes8(start, value, bytes); - - value64 = value | value << 8 | value << 16 | value << 24; - value64 = value64 | value64 << 32; - prefix = 8 - ((unsigned long)start) % 8; - - if (prefix) { - u8 *r = check_bytes8(start, value, prefix); - if (r) - return r; - start += prefix; - bytes -= prefix; - } - - words = bytes / 8; - - while (words) { - if (*(u64 *)start != value64) - return check_bytes8(start, value, 8); - start += 8; - words--; - } - - return check_bytes8(start, value, bytes % 8); -} - static void restore_bytes(struct kmem_cache *s, char *message, u8 data, void *from, void *to) { @@ -2993,42 +2928,6 @@ size_t ksize(const void *object) } EXPORT_SYMBOL(ksize); -#ifdef CONFIG_SLUB_DEBUG -bool verify_mem_not_deleted(const void *x) -{ - struct page *page; - void *object = (void *)x; - unsigned long flags; - bool rv; - - if (unlikely(ZERO_OR_NULL_PTR(x))) - return false; - - local_irq_save(flags); - - page = virt_to_head_page(x); - if (unlikely(!PageSlab(page))) { - /* maybe it was from stack? */ - rv = true; - goto out_unlock; - } - - slab_lock(page); - if (on_freelist(page->slab, page, object)) { - object_err(page->slab, page, object, "Object is on free-list"); - rv = false; - } else { - rv = true; - } - slab_unlock(page); - -out_unlock: - local_irq_restore(flags); - return rv; -} -EXPORT_SYMBOL(verify_mem_not_deleted); -#endif - void kfree(const void *x) { struct page *page;