diff --git a/[refs] b/[refs] index 79801e66cb1a..4c646cb43e04 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: c1f792a5bfebcf7ee1d739c3cb9baeaede0160e7 +refs/heads/master: 85d6509dc8ca24b2b652863ef7a75622ddca17d6 diff --git a/trunk/Documentation/filesystems/ubifs.txt b/trunk/Documentation/filesystems/ubifs.txt index a0a61d2f389f..8e4fab639d9c 100644 --- a/trunk/Documentation/filesystems/ubifs.txt +++ b/trunk/Documentation/filesystems/ubifs.txt @@ -111,6 +111,34 @@ The following is an example of the kernel boot arguments to attach mtd0 to UBI and mount volume "rootfs": ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs + +Module Parameters for Debugging +=============================== + +When UBIFS has been compiled with debugging enabled, there are 2 module +parameters that are available to control aspects of testing and debugging. + +debug_chks Selects extra checks that UBIFS can do while running: + + Check Flag value + + General checks 1 + Check Tree Node Cache (TNC) 2 + Check indexing tree size 4 + Check orphan area 8 + Check old indexing tree 16 + Check LEB properties (lprops) 32 + Check leaf nodes and inodes 64 + +debug_tsts Selects a mode of testing, as follows: + + Test mode Flag value + + Failure mode for recovery testing 4 + +For example, set debug_chks to 3 to enable general and TNC checks. + + References ========== diff --git a/trunk/Documentation/x86/boot.txt b/trunk/Documentation/x86/boot.txt index 7c3a8801b7ce..9b7221a86df2 100644 --- a/trunk/Documentation/x86/boot.txt +++ b/trunk/Documentation/x86/boot.txt @@ -674,7 +674,7 @@ Protocol: 2.10+ Field name: init_size Type: read -Offset/size: 0x260/4 +Offset/size: 0x25c/4 This field indicates the amount of linear contiguous memory starting at the kernel runtime start address that the kernel needs before it diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index 72b979d5076b..187282da9213 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -1,5 +1,4 @@ - List of maintainers and how to submit kernel changes Please try to follow the guidelines below. This will make things @@ -6322,7 +6321,7 @@ F: drivers/scsi/u14-34f.c UBI FILE SYSTEM (UBIFS) M: Artem Bityutskiy -M: Adrian Hunter +M: Adrian Hunter L: linux-mtd@lists.infradead.org T: git git://git.infradead.org/ubifs-2.6.git W: http://www.linux-mtd.infradead.org/doc/ubifs.html diff --git a/trunk/Makefile b/trunk/Makefile index 6a5bdad524af..60d91f76c2fd 100644 --- a/trunk/Makefile +++ b/trunk/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc7 NAME = Sneaky Weasel # *DOCUMENTATION* diff --git a/trunk/arch/arm/mach-davinci/board-dm365-evm.c b/trunk/arch/arm/mach-davinci/board-dm365-evm.c index 09a87e61ffcf..c67f684ee3e5 100644 --- a/trunk/arch/arm/mach-davinci/board-dm365-evm.c +++ b/trunk/arch/arm/mach-davinci/board-dm365-evm.c @@ -520,7 +520,7 @@ static void __init evm_init_cpld(void) */ if (have_imager()) { label = "HD imager"; - mux |= 2; + mux |= 1; /* externally mux MMC1/ENET/AIC33 to imager */ mux |= BIT(6) | BIT(5) | BIT(3); @@ -540,7 +540,7 @@ static void __init evm_init_cpld(void) resets &= ~BIT(1); if (have_tvp7002()) { - mux |= 1; + mux |= 2; resets &= ~BIT(2); label = "tvp7002 HD"; } else { diff --git a/trunk/arch/arm/mach-davinci/gpio.c b/trunk/arch/arm/mach-davinci/gpio.c index cafbe13a82a5..e7221398e5af 100644 --- a/trunk/arch/arm/mach-davinci/gpio.c +++ b/trunk/arch/arm/mach-davinci/gpio.c @@ -254,10 +254,8 @@ gpio_irq_handler(unsigned irq, struct irq_desc *desc) { struct davinci_gpio_regs __iomem *g; u32 mask = 0xffff; - struct davinci_gpio_controller *d; - d = (struct davinci_gpio_controller *)irq_desc_get_handler_data(desc); - g = (struct davinci_gpio_regs __iomem *)d->regs; + g = (__force struct davinci_gpio_regs __iomem *) irq_desc_get_handler_data(desc); /* we only care about one bank */ if (irq & 1) @@ -276,14 +274,11 @@ gpio_irq_handler(unsigned irq, struct irq_desc *desc) if (!status) break; __raw_writel(status, &g->intstat); - - /* now demux them to the right lowlevel handler */ - n = d->irq_base; - if (irq & 1) { - n += 16; + if (irq & 1) status >>= 16; - } + /* now demux them to the right lowlevel handler */ + n = (int)irq_get_handler_data(irq); while (status) { res = ffs(status); n += res; @@ -429,13 +424,7 @@ static int __init davinci_gpio_irq_setup(void) /* set up all irqs in this bank */ irq_set_chained_handler(bank_irq, gpio_irq_handler); - - /* - * Each chip handles 32 gpios, and each irq bank consists of 16 - * gpio irqs. Pass the irq bank's corresponding controller to - * the chained irq handler. - */ - irq_set_handler_data(bank_irq, &chips[gpio / 32]); + irq_set_handler_data(bank_irq, (__force void *)g); for (i = 0; i < 16 && gpio < ngpio; i++, irq++, gpio++) { irq_set_chip(irq, &gpio_irqchip); diff --git a/trunk/arch/arm/mach-davinci/irq.c b/trunk/arch/arm/mach-davinci/irq.c index 952dc126c390..d8c1af025931 100644 --- a/trunk/arch/arm/mach-davinci/irq.c +++ b/trunk/arch/arm/mach-davinci/irq.c @@ -52,12 +52,6 @@ davinci_alloc_gc(void __iomem *base, unsigned int irq_start, unsigned int num) struct irq_chip_type *ct; gc = irq_alloc_generic_chip("AINTC", 1, irq_start, base, handle_edge_irq); - if (!gc) { - pr_err("%s: irq_alloc_generic_chip for IRQ %u failed\n", - __func__, irq_start); - return; - } - ct = gc->chip_types; ct->chip.irq_ack = irq_gc_ack_set_bit; ct->chip.irq_mask = irq_gc_mask_clr_bit; diff --git a/trunk/arch/sparc/include/asm/ptrace.h b/trunk/arch/sparc/include/asm/ptrace.h index b928b31424b1..c7ad3fe2b252 100644 --- a/trunk/arch/sparc/include/asm/ptrace.h +++ b/trunk/arch/sparc/include/asm/ptrace.h @@ -205,7 +205,6 @@ do { current_thread_info()->syscall_noerror = 1; \ } while (0) #define user_mode(regs) (!((regs)->tstate & TSTATE_PRIV)) #define instruction_pointer(regs) ((regs)->tpc) -#define instruction_pointer_set(regs, val) ((regs)->tpc = (val)) #define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP]) #define regs_return_value(regs) ((regs)->u_regs[UREG_I0]) #ifdef CONFIG_SMP diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 37357a599dca..da349723d411 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -1170,7 +1170,7 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" config AMD_NUMA def_bool y prompt "Old style AMD Opteron NUMA detection" - depends on X86_64 && NUMA && PCI + depends on NUMA && PCI ---help--- Enable AMD NUMA node topology detection. You should say Y here if you have a multi processor AMD system. This uses an old method to diff --git a/trunk/arch/x86/kernel/reboot.c b/trunk/arch/x86/kernel/reboot.c index 9242436e9937..4f0d46fefa7f 100644 --- a/trunk/arch/x86/kernel/reboot.c +++ b/trunk/arch/x86/kernel/reboot.c @@ -419,30 +419,6 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), }, }, - { /* Handle problems with rebooting on the Latitude E6320. */ - .callback = set_pci_reboot, - .ident = "Dell Latitude E6320", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"), - }, - }, - { /* Handle problems with rebooting on the Latitude E5420. */ - .callback = set_pci_reboot, - .ident = "Dell Latitude E5420", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5420"), - }, - }, - { /* Handle problems with rebooting on the Latitude E6420. */ - .callback = set_pci_reboot, - .ident = "Dell Latitude E6420", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"), - }, - }, { } }; diff --git a/trunk/drivers/gpu/drm/i915/i915_drv.h b/trunk/drivers/gpu/drm/i915/i915_drv.h index ce7914c4c044..f245c588ae95 100644 --- a/trunk/drivers/gpu/drm/i915/i915_drv.h +++ b/trunk/drivers/gpu/drm/i915/i915_drv.h @@ -262,7 +262,6 @@ enum intel_pch { }; #define QUIRK_PIPEA_FORCE (1<<0) -#define QUIRK_LVDS_SSC_DISABLE (1<<1) struct intel_fbdev; @@ -1195,9 +1194,7 @@ void i915_gem_free_all_phys_object(struct drm_device *dev); void i915_gem_release(struct drm_device *dev, struct drm_file *file); uint32_t -i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, - uint32_t size, - int tiling_mode); +i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj); /* i915_gem_gtt.c */ void i915_gem_restore_gtt_mappings(struct drm_device *dev); diff --git a/trunk/drivers/gpu/drm/i915/i915_gem.c b/trunk/drivers/gpu/drm/i915/i915_gem.c index a087e1bf0c2f..5c0d1247f453 100644 --- a/trunk/drivers/gpu/drm/i915/i915_gem.c +++ b/trunk/drivers/gpu/drm/i915/i915_gem.c @@ -1374,24 +1374,25 @@ i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj) } static uint32_t -i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) +i915_gem_get_gtt_size(struct drm_i915_gem_object *obj) { - uint32_t gtt_size; + struct drm_device *dev = obj->base.dev; + uint32_t size; if (INTEL_INFO(dev)->gen >= 4 || - tiling_mode == I915_TILING_NONE) - return size; + obj->tiling_mode == I915_TILING_NONE) + return obj->base.size; /* Previous chips need a power-of-two fence region when tiling */ if (INTEL_INFO(dev)->gen == 3) - gtt_size = 1024*1024; + size = 1024*1024; else - gtt_size = 512*1024; + size = 512*1024; - while (gtt_size < size) - gtt_size <<= 1; + while (size < obj->base.size) + size <<= 1; - return gtt_size; + return size; } /** @@ -1402,52 +1403,59 @@ i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) * potential fence register mapping. */ static uint32_t -i915_gem_get_gtt_alignment(struct drm_device *dev, - uint32_t size, - int tiling_mode) +i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj) { + struct drm_device *dev = obj->base.dev; + /* * Minimum alignment is 4k (GTT page size), but might be greater * if a fence register is needed for the object. */ if (INTEL_INFO(dev)->gen >= 4 || - tiling_mode == I915_TILING_NONE) + obj->tiling_mode == I915_TILING_NONE) return 4096; /* * Previous chips need to be aligned to the size of the smallest * fence register that can contain the object. */ - return i915_gem_get_gtt_size(dev, size, tiling_mode); + return i915_gem_get_gtt_size(obj); } /** * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an * unfenced object - * @dev: the device - * @size: size of the object - * @tiling_mode: tiling mode of the object + * @obj: object to check * * Return the required GTT alignment for an object, only taking into account * unfenced tiled surface requirements. */ uint32_t -i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, - uint32_t size, - int tiling_mode) +i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj) { + struct drm_device *dev = obj->base.dev; + int tile_height; + /* * Minimum alignment is 4k (GTT page size) for sane hw. */ if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || - tiling_mode == I915_TILING_NONE) + obj->tiling_mode == I915_TILING_NONE) return 4096; - /* Previous hardware however needs to be aligned to a power-of-two - * tile height. The simplest method for determining this is to reuse - * the power-of-tile object size. + /* + * Older chips need unfenced tiled buffers to be aligned to the left + * edge of an even tile row (where tile rows are counted as if the bo is + * placed in a fenced gtt region). */ - return i915_gem_get_gtt_size(dev, size, tiling_mode); + if (IS_GEN2(dev)) + tile_height = 16; + else if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) + tile_height = 32; + else + tile_height = 8; + + return tile_height * obj->stride * 2; } int @@ -2736,16 +2744,9 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, return -EINVAL; } - fence_size = i915_gem_get_gtt_size(dev, - obj->base.size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode); - unfenced_alignment = - i915_gem_get_unfenced_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode); + fence_size = i915_gem_get_gtt_size(obj); + fence_alignment = i915_gem_get_gtt_alignment(obj); + unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(obj); if (alignment == 0) alignment = map_and_fenceable ? fence_alignment : diff --git a/trunk/drivers/gpu/drm/i915/i915_gem_tiling.c b/trunk/drivers/gpu/drm/i915/i915_gem_tiling.c index 99c4faa59d8f..82d70fd9e933 100644 --- a/trunk/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/trunk/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -348,9 +348,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, /* Rebind if we need a change of alignment */ if (!obj->map_and_fenceable) { u32 unfenced_alignment = - i915_gem_get_unfenced_gtt_alignment(dev, - obj->base.size, - args->tiling_mode); + i915_gem_get_unfenced_gtt_alignment(obj); if (obj->gtt_offset & (unfenced_alignment - 1)) ret = i915_gem_object_unbind(obj); } diff --git a/trunk/drivers/gpu/drm/i915/intel_display.c b/trunk/drivers/gpu/drm/i915/intel_display.c index 0f1c799afea1..21b6f93fe919 100644 --- a/trunk/drivers/gpu/drm/i915/intel_display.c +++ b/trunk/drivers/gpu/drm/i915/intel_display.c @@ -4305,8 +4305,7 @@ static void intel_update_watermarks(struct drm_device *dev) static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) { - return dev_priv->lvds_use_ssc && i915_panel_use_ssc - && !(dev_priv->quirks & QUIRK_LVDS_SSC_DISABLE); + return dev_priv->lvds_use_ssc && i915_panel_use_ssc; } static int i9xx_crtc_mode_set(struct drm_crtc *crtc, @@ -7811,15 +7810,6 @@ static void quirk_pipea_force (struct drm_device *dev) DRM_DEBUG_DRIVER("applying pipe a force quirk\n"); } -/* - * Some machines (Lenovo U160) do not work with SSC on LVDS for some reason - */ -static void quirk_ssc_force_disable(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - dev_priv->quirks |= QUIRK_LVDS_SSC_DISABLE; -} - struct intel_quirk { int device; int subsystem_vendor; @@ -7848,9 +7838,6 @@ struct intel_quirk intel_quirks[] = { /* 855 & before need to leave pipe A & dpll A up */ { 0x3582, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force }, { 0x2562, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force }, - - /* Lenovo U160 cannot use SSC on LVDS */ - { 0x0046, 0x17aa, 0x3920, quirk_ssc_force_disable }, }; static void intel_init_quirks(struct drm_device *dev) diff --git a/trunk/drivers/mmc/host/Kconfig b/trunk/drivers/mmc/host/Kconfig index 56dbf3f6ad08..d9ca2623038d 100644 --- a/trunk/drivers/mmc/host/Kconfig +++ b/trunk/drivers/mmc/host/Kconfig @@ -112,29 +112,19 @@ config MMC_SDHCI_OF_HLWD If unsure, say N. -config MMC_SDHCI_PLTFM - tristate "SDHCI support on the platform specific bus" - depends on MMC_SDHCI - help - This selects the platform specific bus support for Secure Digital Host - Controller Interface. - - If you have a controller with this interface, say Y or M here. - - If unsure, say N. - config MMC_SDHCI_CNS3XXX - bool "SDHCI support on the Cavium Networks CNS3xxx SoC" + tristate "SDHCI support on the Cavium Networks CNS3xxx SoC" depends on ARCH_CNS3XXX - depends on MMC_SDHCI_PLTFM + depends on MMC_SDHCI help This selects the SDHCI support for CNS3xxx System-on-Chip devices. If unsure, say N. config MMC_SDHCI_ESDHC_IMX - bool "SDHCI platform support for the Freescale eSDHC i.MX controller" - depends on MMC_SDHCI_PLTFM && (ARCH_MX25 || ARCH_MX35 || ARCH_MX5) + tristate "SDHCI platform support for the Freescale eSDHC i.MX controller" + depends on ARCH_MX25 || ARCH_MX35 || ARCH_MX5 + depends on MMC_SDHCI select MMC_SDHCI_IO_ACCESSORS help This selects the Freescale eSDHC controller support on the platform @@ -143,9 +133,9 @@ config MMC_SDHCI_ESDHC_IMX If unsure, say N. config MMC_SDHCI_DOVE - bool "SDHCI support on Marvell's Dove SoC" + tristate "SDHCI support on Marvell's Dove SoC" depends on ARCH_DOVE - depends on MMC_SDHCI_PLTFM + depends on MMC_SDHCI select MMC_SDHCI_IO_ACCESSORS help This selects the Secure Digital Host Controller Interface in @@ -154,8 +144,9 @@ config MMC_SDHCI_DOVE If unsure, say N. config MMC_SDHCI_TEGRA - bool "SDHCI platform support for the Tegra SD/MMC Controller" - depends on MMC_SDHCI_PLTFM && ARCH_TEGRA + tristate "SDHCI platform support for the Tegra SD/MMC Controller" + depends on ARCH_TEGRA + depends on MMC_SDHCI select MMC_SDHCI_IO_ACCESSORS help This selects the Tegra SD/MMC controller. If you have a Tegra diff --git a/trunk/drivers/mmc/host/Makefile b/trunk/drivers/mmc/host/Makefile index 58a5cf73d6e9..732ec1e2a3d0 100644 --- a/trunk/drivers/mmc/host/Makefile +++ b/trunk/drivers/mmc/host/Makefile @@ -44,12 +44,14 @@ obj-$(CONFIG_MMC_JZ4740) += jz4740_mmc.o obj-$(CONFIG_MMC_VUB300) += vub300.o obj-$(CONFIG_MMC_USHC) += ushc.o -obj-$(CONFIG_MMC_SDHCI_PLTFM) += sdhci-platform.o -sdhci-platform-y := sdhci-pltfm.o -sdhci-platform-$(CONFIG_MMC_SDHCI_CNS3XXX) += sdhci-cns3xxx.o -sdhci-platform-$(CONFIG_MMC_SDHCI_ESDHC_IMX) += sdhci-esdhc-imx.o -sdhci-platform-$(CONFIG_MMC_SDHCI_DOVE) += sdhci-dove.o -sdhci-platform-$(CONFIG_MMC_SDHCI_TEGRA) += sdhci-tegra.o +obj-$(CONFIG_MMC_SDHCI_CNS3XXX) += sdhci-cns3xxx.o +sdhci-cns3xxx-objs := sdhci-pltfm.o +obj-$(CONFIG_MMC_SDHCI_ESDHC_IMX) += sdhci-esdhc-imx.o +sdhci-esdhc-imx-objs := sdhci-pltfm.o +obj-$(CONFIG_MMC_SDHCI_DOVE) += sdhci-dove.o +sdhci-dove-objs := sdhci-pltfm.o +obj-$(CONFIG_MMC_SDHCI_TEGRA) += sdhci-tegra.o +sdhci-tegra-objs := sdhci-pltfm.o obj-$(CONFIG_MMC_SDHCI_OF) += sdhci-of.o sdhci-of-y := sdhci-of-core.o diff --git a/trunk/drivers/mmc/host/sdhci-cns3xxx.c b/trunk/drivers/mmc/host/sdhci-cns3xxx.c index 9ebd1d7759dc..ac4b26f555e6 100644 --- a/trunk/drivers/mmc/host/sdhci-cns3xxx.c +++ b/trunk/drivers/mmc/host/sdhci-cns3xxx.c @@ -86,7 +86,7 @@ static struct sdhci_ops sdhci_cns3xxx_ops = { .set_clock = sdhci_cns3xxx_set_clock, }; -struct sdhci_pltfm_data sdhci_cns3xxx_pdata = { +static struct sdhci_pltfm_data sdhci_cns3xxx_pdata = { .ops = &sdhci_cns3xxx_ops, .quirks = SDHCI_QUIRK_BROKEN_DMA | SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | @@ -95,3 +95,43 @@ struct sdhci_pltfm_data sdhci_cns3xxx_pdata = { SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | SDHCI_QUIRK_NONSTANDARD_CLOCK, }; + +static int __devinit sdhci_cns3xxx_probe(struct platform_device *pdev) +{ + return sdhci_pltfm_register(pdev, &sdhci_cns3xxx_pdata); +} + +static int __devexit sdhci_cns3xxx_remove(struct platform_device *pdev) +{ + return sdhci_pltfm_unregister(pdev); +} + +static struct platform_driver sdhci_cns3xxx_driver = { + .driver = { + .name = "sdhci-cns3xxx", + .owner = THIS_MODULE, + }, + .probe = sdhci_cns3xxx_probe, + .remove = __devexit_p(sdhci_cns3xxx_remove), +#ifdef CONFIG_PM + .suspend = sdhci_pltfm_suspend, + .resume = sdhci_pltfm_resume, +#endif +}; + +static int __init sdhci_cns3xxx_init(void) +{ + return platform_driver_register(&sdhci_cns3xxx_driver); +} +module_init(sdhci_cns3xxx_init); + +static void __exit sdhci_cns3xxx_exit(void) +{ + platform_driver_unregister(&sdhci_cns3xxx_driver); +} +module_exit(sdhci_cns3xxx_exit); + +MODULE_DESCRIPTION("SDHCI driver for CNS3xxx"); +MODULE_AUTHOR("Scott Shu, " + "Anton Vorontsov "); +MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/mmc/host/sdhci-dove.c b/trunk/drivers/mmc/host/sdhci-dove.c index 2aeef4ffed8c..49aa533f23d4 100644 --- a/trunk/drivers/mmc/host/sdhci-dove.c +++ b/trunk/drivers/mmc/host/sdhci-dove.c @@ -61,10 +61,50 @@ static struct sdhci_ops sdhci_dove_ops = { .read_l = sdhci_dove_readl, }; -struct sdhci_pltfm_data sdhci_dove_pdata = { +static struct sdhci_pltfm_data sdhci_dove_pdata = { .ops = &sdhci_dove_ops, .quirks = SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER | SDHCI_QUIRK_NO_BUSY_IRQ | SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | SDHCI_QUIRK_FORCE_DMA, }; + +static int __devinit sdhci_dove_probe(struct platform_device *pdev) +{ + return sdhci_pltfm_register(pdev, &sdhci_dove_pdata); +} + +static int __devexit sdhci_dove_remove(struct platform_device *pdev) +{ + return sdhci_pltfm_unregister(pdev); +} + +static struct platform_driver sdhci_dove_driver = { + .driver = { + .name = "sdhci-dove", + .owner = THIS_MODULE, + }, + .probe = sdhci_dove_probe, + .remove = __devexit_p(sdhci_dove_remove), +#ifdef CONFIG_PM + .suspend = sdhci_pltfm_suspend, + .resume = sdhci_pltfm_resume, +#endif +}; + +static int __init sdhci_dove_init(void) +{ + return platform_driver_register(&sdhci_dove_driver); +} +module_init(sdhci_dove_init); + +static void __exit sdhci_dove_exit(void) +{ + platform_driver_unregister(&sdhci_dove_driver); +} +module_exit(sdhci_dove_exit); + +MODULE_DESCRIPTION("SDHCI driver for Dove"); +MODULE_AUTHOR("Saeed Bishara , " + "Mike Rapoport "); +MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/mmc/host/sdhci-esdhc-imx.c b/trunk/drivers/mmc/host/sdhci-esdhc-imx.c index a19967d0bfc4..e27ccbb5285b 100644 --- a/trunk/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/trunk/drivers/mmc/host/sdhci-esdhc-imx.c @@ -191,16 +191,6 @@ static unsigned int esdhc_pltfm_get_min_clock(struct sdhci_host *host) return clk_get_rate(pltfm_host->clk) / 256 / 16; } -static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host) -{ - struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data; - - if (boarddata && gpio_is_valid(boarddata->wp_gpio)) - return gpio_get_value(boarddata->wp_gpio); - else - return -ENOSYS; -} - static struct sdhci_ops sdhci_esdhc_ops = { .read_l = esdhc_readl_le, .read_w = esdhc_readw_le, @@ -212,6 +202,24 @@ static struct sdhci_ops sdhci_esdhc_ops = { .get_min_clock = esdhc_pltfm_get_min_clock, }; +static struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = { + .quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_ADMA + | SDHCI_QUIRK_BROKEN_CARD_DETECTION, + /* ADMA has issues. Might be fixable */ + .ops = &sdhci_esdhc_ops, +}; + +static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host) +{ + struct esdhc_platform_data *boarddata = + host->mmc->parent->platform_data; + + if (boarddata && gpio_is_valid(boarddata->wp_gpio)) + return gpio_get_value(boarddata->wp_gpio); + else + return -ENOSYS; +} + static irqreturn_t cd_irq(int irq, void *data) { struct sdhci_host *sdhost = (struct sdhci_host *)data; @@ -220,30 +228,35 @@ static irqreturn_t cd_irq(int irq, void *data) return IRQ_HANDLED; }; -static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pdata) +static int __devinit sdhci_esdhc_imx_probe(struct platform_device *pdev) { - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data; + struct sdhci_pltfm_host *pltfm_host; + struct sdhci_host *host; + struct esdhc_platform_data *boarddata; struct clk *clk; int err; struct pltfm_imx_data *imx_data; + host = sdhci_pltfm_init(pdev, &sdhci_esdhc_imx_pdata); + if (IS_ERR(host)) + return PTR_ERR(host); + + pltfm_host = sdhci_priv(host); + + imx_data = kzalloc(sizeof(struct pltfm_imx_data), GFP_KERNEL); + if (!imx_data) + return -ENOMEM; + pltfm_host->priv = imx_data; + clk = clk_get(mmc_dev(host->mmc), NULL); if (IS_ERR(clk)) { dev_err(mmc_dev(host->mmc), "clk err\n"); - return PTR_ERR(clk); + err = PTR_ERR(clk); + goto err_clk_get; } clk_enable(clk); pltfm_host->clk = clk; - imx_data = kzalloc(sizeof(struct pltfm_imx_data), GFP_KERNEL); - if (!imx_data) { - clk_disable(pltfm_host->clk); - clk_put(pltfm_host->clk); - return -ENOMEM; - } - pltfm_host->priv = imx_data; - if (!cpu_is_mx25()) host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; @@ -257,6 +270,7 @@ static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pd if (!(cpu_is_mx25() || cpu_is_mx35() || cpu_is_mx51())) imx_data->flags |= ESDHC_FLAG_MULTIBLK_NO_INT; + boarddata = host->mmc->parent->platform_data; if (boarddata) { err = gpio_request_one(boarddata->wp_gpio, GPIOF_IN, "ESDHC_WP"); if (err) { @@ -289,6 +303,10 @@ static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pd host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION; } + err = sdhci_add_host(host); + if (err) + goto err_add_host; + return 0; no_card_detect_irq: @@ -297,14 +315,23 @@ static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pd boarddata->cd_gpio = err; not_supported: kfree(imx_data); - return 0; + err_add_host: + clk_disable(pltfm_host->clk); + clk_put(pltfm_host->clk); + err_clk_get: + sdhci_pltfm_free(pdev); + return err; } -static void esdhc_pltfm_exit(struct sdhci_host *host) +static int __devexit sdhci_esdhc_imx_remove(struct platform_device *pdev) { + struct sdhci_host *host = platform_get_drvdata(pdev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data; struct pltfm_imx_data *imx_data = pltfm_host->priv; + int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff); + + sdhci_remove_host(host, dead); if (boarddata && gpio_is_valid(boarddata->wp_gpio)) gpio_free(boarddata->wp_gpio); @@ -319,13 +346,37 @@ static void esdhc_pltfm_exit(struct sdhci_host *host) clk_disable(pltfm_host->clk); clk_put(pltfm_host->clk); kfree(imx_data); + + sdhci_pltfm_free(pdev); + + return 0; } -struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = { - .quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_ADMA - | SDHCI_QUIRK_BROKEN_CARD_DETECTION, - /* ADMA has issues. Might be fixable */ - .ops = &sdhci_esdhc_ops, - .init = esdhc_pltfm_init, - .exit = esdhc_pltfm_exit, +static struct platform_driver sdhci_esdhc_imx_driver = { + .driver = { + .name = "sdhci-esdhc-imx", + .owner = THIS_MODULE, + }, + .probe = sdhci_esdhc_imx_probe, + .remove = __devexit_p(sdhci_esdhc_imx_remove), +#ifdef CONFIG_PM + .suspend = sdhci_pltfm_suspend, + .resume = sdhci_pltfm_resume, +#endif }; + +static int __init sdhci_esdhc_imx_init(void) +{ + return platform_driver_register(&sdhci_esdhc_imx_driver); +} +module_init(sdhci_esdhc_imx_init); + +static void __exit sdhci_esdhc_imx_exit(void) +{ + platform_driver_unregister(&sdhci_esdhc_imx_driver); +} +module_exit(sdhci_esdhc_imx_exit); + +MODULE_DESCRIPTION("SDHCI driver for Freescale i.MX eSDHC"); +MODULE_AUTHOR("Wolfram Sang "); +MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/mmc/host/sdhci-pltfm.c b/trunk/drivers/mmc/host/sdhci-pltfm.c index dbab0407f4b6..8ccf25666201 100644 --- a/trunk/drivers/mmc/host/sdhci-pltfm.c +++ b/trunk/drivers/mmc/host/sdhci-pltfm.c @@ -22,48 +22,22 @@ * Inspired by sdhci-pci.c, by Pierre Ossman */ -#include -#include -#include -#include - -#include - -#include -#include +#include #include "sdhci.h" #include "sdhci-pltfm.h" -/*****************************************************************************\ - * * - * SDHCI core callbacks * - * * -\*****************************************************************************/ - static struct sdhci_ops sdhci_pltfm_ops = { }; -/*****************************************************************************\ - * * - * Device probing/removal * - * * -\*****************************************************************************/ - -static int __devinit sdhci_pltfm_probe(struct platform_device *pdev) +struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev, + struct sdhci_pltfm_data *pdata) { - const struct platform_device_id *platid = platform_get_device_id(pdev); - struct sdhci_pltfm_data *pdata; struct sdhci_host *host; struct sdhci_pltfm_host *pltfm_host; struct resource *iomem; int ret; - if (platid && platid->driver_data) - pdata = (void *)platid->driver_data; - else - pdata = pdev->dev.platform_data; - iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!iomem) { ret = -ENOMEM; @@ -71,8 +45,7 @@ static int __devinit sdhci_pltfm_probe(struct platform_device *pdev) } if (resource_size(iomem) < 0x100) - dev_err(&pdev->dev, "Invalid iomem size. You may " - "experience problems.\n"); + dev_err(&pdev->dev, "Invalid iomem size!\n"); /* Some PCI-based MFD need the parent here */ if (pdev->dev.parent != &platform_bus) @@ -87,7 +60,7 @@ static int __devinit sdhci_pltfm_probe(struct platform_device *pdev) pltfm_host = sdhci_priv(host); - host->hw_name = "platform"; + host->hw_name = dev_name(&pdev->dev); if (pdata && pdata->ops) host->ops = pdata->ops; else @@ -110,126 +83,70 @@ static int __devinit sdhci_pltfm_probe(struct platform_device *pdev) goto err_remap; } - if (pdata && pdata->init) { - ret = pdata->init(host, pdata); - if (ret) - goto err_plat_init; - } - - ret = sdhci_add_host(host); - if (ret) - goto err_add_host; - platform_set_drvdata(pdev, host); - return 0; + return host; -err_add_host: - if (pdata && pdata->exit) - pdata->exit(host); -err_plat_init: - iounmap(host->ioaddr); err_remap: release_mem_region(iomem->start, resource_size(iomem)); err_request: sdhci_free_host(host); err: - printk(KERN_ERR"Probing of sdhci-pltfm failed: %d\n", ret); - return ret; + dev_err(&pdev->dev, "%s failed %d\n", __func__, ret); + return ERR_PTR(ret); } -static int __devexit sdhci_pltfm_remove(struct platform_device *pdev) +void sdhci_pltfm_free(struct platform_device *pdev) { - struct sdhci_pltfm_data *pdata = pdev->dev.platform_data; struct sdhci_host *host = platform_get_drvdata(pdev); struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - int dead; - u32 scratch; - - dead = 0; - scratch = readl(host->ioaddr + SDHCI_INT_STATUS); - if (scratch == (u32)-1) - dead = 1; - sdhci_remove_host(host, dead); - if (pdata && pdata->exit) - pdata->exit(host); iounmap(host->ioaddr); release_mem_region(iomem->start, resource_size(iomem)); sdhci_free_host(host); platform_set_drvdata(pdev, NULL); +} - return 0; +int sdhci_pltfm_register(struct platform_device *pdev, + struct sdhci_pltfm_data *pdata) +{ + struct sdhci_host *host; + int ret = 0; + + host = sdhci_pltfm_init(pdev, pdata); + if (IS_ERR(host)) + return PTR_ERR(host); + + ret = sdhci_add_host(host); + if (ret) + sdhci_pltfm_free(pdev); + + return ret; } -static const struct platform_device_id sdhci_pltfm_ids[] = { - { "sdhci", }, -#ifdef CONFIG_MMC_SDHCI_CNS3XXX - { "sdhci-cns3xxx", (kernel_ulong_t)&sdhci_cns3xxx_pdata }, -#endif -#ifdef CONFIG_MMC_SDHCI_ESDHC_IMX - { "sdhci-esdhc-imx", (kernel_ulong_t)&sdhci_esdhc_imx_pdata }, -#endif -#ifdef CONFIG_MMC_SDHCI_DOVE - { "sdhci-dove", (kernel_ulong_t)&sdhci_dove_pdata }, -#endif -#ifdef CONFIG_MMC_SDHCI_TEGRA - { "sdhci-tegra", (kernel_ulong_t)&sdhci_tegra_pdata }, -#endif - { }, -}; -MODULE_DEVICE_TABLE(platform, sdhci_pltfm_ids); +int sdhci_pltfm_unregister(struct platform_device *pdev) +{ + struct sdhci_host *host = platform_get_drvdata(pdev); + int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff); + + sdhci_remove_host(host, dead); + sdhci_pltfm_free(pdev); + + return 0; +} #ifdef CONFIG_PM -static int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state) +int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state) { struct sdhci_host *host = platform_get_drvdata(dev); return sdhci_suspend_host(host, state); } -static int sdhci_pltfm_resume(struct platform_device *dev) +int sdhci_pltfm_resume(struct platform_device *dev) { struct sdhci_host *host = platform_get_drvdata(dev); return sdhci_resume_host(host); } -#else -#define sdhci_pltfm_suspend NULL -#define sdhci_pltfm_resume NULL #endif /* CONFIG_PM */ - -static struct platform_driver sdhci_pltfm_driver = { - .driver = { - .name = "sdhci", - .owner = THIS_MODULE, - }, - .probe = sdhci_pltfm_probe, - .remove = __devexit_p(sdhci_pltfm_remove), - .id_table = sdhci_pltfm_ids, - .suspend = sdhci_pltfm_suspend, - .resume = sdhci_pltfm_resume, -}; - -/*****************************************************************************\ - * * - * Driver init/exit * - * * -\*****************************************************************************/ - -static int __init sdhci_drv_init(void) -{ - return platform_driver_register(&sdhci_pltfm_driver); -} - -static void __exit sdhci_drv_exit(void) -{ - platform_driver_unregister(&sdhci_pltfm_driver); -} - -module_init(sdhci_drv_init); -module_exit(sdhci_drv_exit); - -MODULE_DESCRIPTION("Secure Digital Host Controller Interface platform driver"); -MODULE_AUTHOR("Mocean Laboratories "); -MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/mmc/host/sdhci-pltfm.h b/trunk/drivers/mmc/host/sdhci-pltfm.h index 2b37016ad0ac..ff4b7eb326fb 100644 --- a/trunk/drivers/mmc/host/sdhci-pltfm.h +++ b/trunk/drivers/mmc/host/sdhci-pltfm.h @@ -13,6 +13,7 @@ #include #include +#include #include struct sdhci_pltfm_host { @@ -20,9 +21,17 @@ struct sdhci_pltfm_host { void *priv; /* to handle quirks across io-accessor calls */ }; -extern struct sdhci_pltfm_data sdhci_cns3xxx_pdata; -extern struct sdhci_pltfm_data sdhci_esdhc_imx_pdata; -extern struct sdhci_pltfm_data sdhci_dove_pdata; -extern struct sdhci_pltfm_data sdhci_tegra_pdata; +extern struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev, + struct sdhci_pltfm_data *pdata); +extern void sdhci_pltfm_free(struct platform_device *pdev); + +extern int sdhci_pltfm_register(struct platform_device *pdev, + struct sdhci_pltfm_data *pdata); +extern int sdhci_pltfm_unregister(struct platform_device *pdev); + +#ifdef CONFIG_PM +extern int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state); +extern int sdhci_pltfm_resume(struct platform_device *dev); +#endif #endif /* _DRIVERS_MMC_SDHCI_PLTFM_H */ diff --git a/trunk/drivers/mmc/host/sdhci-tegra.c b/trunk/drivers/mmc/host/sdhci-tegra.c index 343c97edba32..1f66aca5f506 100644 --- a/trunk/drivers/mmc/host/sdhci-tegra.c +++ b/trunk/drivers/mmc/host/sdhci-tegra.c @@ -116,20 +116,42 @@ static int tegra_sdhci_8bit(struct sdhci_host *host, int bus_width) return 0; } +static struct sdhci_ops tegra_sdhci_ops = { + .get_ro = tegra_sdhci_get_ro, + .read_l = tegra_sdhci_readl, + .read_w = tegra_sdhci_readw, + .write_l = tegra_sdhci_writel, + .platform_8bit_width = tegra_sdhci_8bit, +}; + +static struct sdhci_pltfm_data sdhci_tegra_pdata = { + .quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | + SDHCI_QUIRK_SINGLE_POWER_WRITE | + SDHCI_QUIRK_NO_HISPD_BIT | + SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC, + .ops = &tegra_sdhci_ops, +}; -static int tegra_sdhci_pltfm_init(struct sdhci_host *host, - struct sdhci_pltfm_data *pdata) +static int __devinit sdhci_tegra_probe(struct platform_device *pdev) { - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc)); + struct sdhci_pltfm_host *pltfm_host; struct tegra_sdhci_platform_data *plat; + struct sdhci_host *host; struct clk *clk; int rc; + host = sdhci_pltfm_init(pdev, &sdhci_tegra_pdata); + if (IS_ERR(host)) + return PTR_ERR(host); + + pltfm_host = sdhci_priv(host); + plat = pdev->dev.platform_data; + if (plat == NULL) { dev_err(mmc_dev(host->mmc), "missing platform data\n"); - return -ENXIO; + rc = -ENXIO; + goto err_no_plat; } if (gpio_is_valid(plat->power_gpio)) { @@ -137,7 +159,7 @@ static int tegra_sdhci_pltfm_init(struct sdhci_host *host, if (rc) { dev_err(mmc_dev(host->mmc), "failed to allocate power gpio\n"); - goto out; + goto err_power_req; } tegra_gpio_enable(plat->power_gpio); gpio_direction_output(plat->power_gpio, 1); @@ -148,7 +170,7 @@ static int tegra_sdhci_pltfm_init(struct sdhci_host *host, if (rc) { dev_err(mmc_dev(host->mmc), "failed to allocate cd gpio\n"); - goto out_power; + goto err_cd_req; } tegra_gpio_enable(plat->cd_gpio); gpio_direction_input(plat->cd_gpio); @@ -159,7 +181,7 @@ static int tegra_sdhci_pltfm_init(struct sdhci_host *host, if (rc) { dev_err(mmc_dev(host->mmc), "request irq error\n"); - goto out_cd; + goto err_cd_irq_req; } } @@ -169,7 +191,7 @@ static int tegra_sdhci_pltfm_init(struct sdhci_host *host, if (rc) { dev_err(mmc_dev(host->mmc), "failed to allocate wp gpio\n"); - goto out_irq; + goto err_wp_req; } tegra_gpio_enable(plat->wp_gpio); gpio_direction_input(plat->wp_gpio); @@ -179,7 +201,7 @@ static int tegra_sdhci_pltfm_init(struct sdhci_host *host, if (IS_ERR(clk)) { dev_err(mmc_dev(host->mmc), "clk err\n"); rc = PTR_ERR(clk); - goto out_wp; + goto err_clk_get; } clk_enable(clk); pltfm_host->clk = clk; @@ -189,38 +211,47 @@ static int tegra_sdhci_pltfm_init(struct sdhci_host *host, if (plat->is_8bit) host->mmc->caps |= MMC_CAP_8_BIT_DATA; + rc = sdhci_add_host(host); + if (rc) + goto err_add_host; + return 0; -out_wp: +err_add_host: + clk_disable(pltfm_host->clk); + clk_put(pltfm_host->clk); +err_clk_get: if (gpio_is_valid(plat->wp_gpio)) { tegra_gpio_disable(plat->wp_gpio); gpio_free(plat->wp_gpio); } - -out_irq: +err_wp_req: if (gpio_is_valid(plat->cd_gpio)) free_irq(gpio_to_irq(plat->cd_gpio), host); -out_cd: +err_cd_irq_req: if (gpio_is_valid(plat->cd_gpio)) { tegra_gpio_disable(plat->cd_gpio); gpio_free(plat->cd_gpio); } - -out_power: +err_cd_req: if (gpio_is_valid(plat->power_gpio)) { tegra_gpio_disable(plat->power_gpio); gpio_free(plat->power_gpio); } - -out: +err_power_req: +err_no_plat: + sdhci_pltfm_free(pdev); return rc; } -static void tegra_sdhci_pltfm_exit(struct sdhci_host *host) +static int __devexit sdhci_tegra_remove(struct platform_device *pdev) { + struct sdhci_host *host = platform_get_drvdata(pdev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc)); struct tegra_sdhci_platform_data *plat; + int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff); + + sdhci_remove_host(host, dead); plat = pdev->dev.platform_data; @@ -242,22 +273,37 @@ static void tegra_sdhci_pltfm_exit(struct sdhci_host *host) clk_disable(pltfm_host->clk); clk_put(pltfm_host->clk); + + sdhci_pltfm_free(pdev); + + return 0; } -static struct sdhci_ops tegra_sdhci_ops = { - .get_ro = tegra_sdhci_get_ro, - .read_l = tegra_sdhci_readl, - .read_w = tegra_sdhci_readw, - .write_l = tegra_sdhci_writel, - .platform_8bit_width = tegra_sdhci_8bit, +static struct platform_driver sdhci_tegra_driver = { + .driver = { + .name = "sdhci-tegra", + .owner = THIS_MODULE, + }, + .probe = sdhci_tegra_probe, + .remove = __devexit_p(sdhci_tegra_remove), +#ifdef CONFIG_PM + .suspend = sdhci_pltfm_suspend, + .resume = sdhci_pltfm_resume, +#endif }; -struct sdhci_pltfm_data sdhci_tegra_pdata = { - .quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | - SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_NO_HISPD_BIT | - SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC, - .ops = &tegra_sdhci_ops, - .init = tegra_sdhci_pltfm_init, - .exit = tegra_sdhci_pltfm_exit, -}; +static int __init sdhci_tegra_init(void) +{ + return platform_driver_register(&sdhci_tegra_driver); +} +module_init(sdhci_tegra_init); + +static void __exit sdhci_tegra_exit(void) +{ + platform_driver_unregister(&sdhci_tegra_driver); +} +module_exit(sdhci_tegra_exit); + +MODULE_DESCRIPTION("SDHCI driver for Tegra"); +MODULE_AUTHOR(" Google, Inc."); +MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/mtd/ubi/build.c b/trunk/drivers/mtd/ubi/build.c index 6c3fb5ab20f5..65626c1c446d 100644 --- a/trunk/drivers/mtd/ubi/build.c +++ b/trunk/drivers/mtd/ubi/build.c @@ -953,14 +953,10 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) if (!ubi->peb_buf2) goto out_free; - err = ubi_debugging_init_dev(ubi); - if (err) - goto out_free; - err = attach_by_scanning(ubi); if (err) { dbg_err("failed to attach by scanning, error %d", err); - goto out_debugging; + goto out_free; } if (ubi->autoresize_vol_id != -1) { @@ -973,16 +969,12 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) if (err) goto out_detach; - err = ubi_debugfs_init_dev(ubi); - if (err) - goto out_uif; - ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name); if (IS_ERR(ubi->bgt_thread)) { err = PTR_ERR(ubi->bgt_thread); ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name, err); - goto out_debugfs; + goto out_uif; } ubi_msg("attached mtd%d to ubi%d", mtd->index, ubi_num); @@ -1016,18 +1008,12 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL); return ubi_num; -out_debugfs: - ubi_debugfs_exit_dev(ubi); out_uif: - get_device(&ubi->dev); - ubi_assert(ref); uif_close(ubi); out_detach: ubi_wl_close(ubi); free_internal_volumes(ubi); vfree(ubi->vtbl); -out_debugging: - ubi_debugging_exit_dev(ubi); out_free: vfree(ubi->peb_buf1); vfree(ubi->peb_buf2); @@ -1094,13 +1080,11 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) */ get_device(&ubi->dev); - ubi_debugfs_exit_dev(ubi); uif_close(ubi); ubi_wl_close(ubi); free_internal_volumes(ubi); vfree(ubi->vtbl); put_mtd_device(ubi->mtd); - ubi_debugging_exit_dev(ubi); vfree(ubi->peb_buf1); vfree(ubi->peb_buf2); ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num); @@ -1215,11 +1199,6 @@ static int __init ubi_init(void) if (!ubi_wl_entry_slab) goto out_dev_unreg; - err = ubi_debugfs_init(); - if (err) - goto out_slab; - - /* Attach MTD devices */ for (i = 0; i < mtd_devs; i++) { struct mtd_dev_param *p = &mtd_dev_param[i]; @@ -1268,8 +1247,6 @@ static int __init ubi_init(void) ubi_detach_mtd_dev(ubi_devices[k]->ubi_num, 1); mutex_unlock(&ubi_devices_mutex); } - ubi_debugfs_exit(); -out_slab: kmem_cache_destroy(ubi_wl_entry_slab); out_dev_unreg: misc_deregister(&ubi_ctrl_cdev); @@ -1293,7 +1270,6 @@ static void __exit ubi_exit(void) ubi_detach_mtd_dev(ubi_devices[i]->ubi_num, 1); mutex_unlock(&ubi_devices_mutex); } - ubi_debugfs_exit(); kmem_cache_destroy(ubi_wl_entry_slab); misc_deregister(&ubi_ctrl_cdev); class_remove_file(ubi_class, &ubi_version); diff --git a/trunk/drivers/mtd/ubi/debug.c b/trunk/drivers/mtd/ubi/debug.c index ab80c0debac8..2224cbe41ddf 100644 --- a/trunk/drivers/mtd/ubi/debug.c +++ b/trunk/drivers/mtd/ubi/debug.c @@ -27,9 +27,17 @@ #ifdef CONFIG_MTD_UBI_DEBUG #include "ubi.h" -#include -#include #include +#include + +unsigned int ubi_chk_flags; +unsigned int ubi_tst_flags; + +module_param_named(debug_chks, ubi_chk_flags, uint, S_IRUGO | S_IWUSR); +module_param_named(debug_tsts, ubi_chk_flags, uint, S_IRUGO | S_IWUSR); + +MODULE_PARM_DESC(debug_chks, "Debug check flags"); +MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); /** * ubi_dbg_dump_ec_hdr - dump an erase counter header. @@ -231,261 +239,4 @@ void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len) return; } -/** - * ubi_debugging_init_dev - initialize debugging for an UBI device. - * @ubi: UBI device description object - * - * This function initializes debugging-related data for UBI device @ubi. - * Returns zero in case of success and a negative error code in case of - * failure. - */ -int ubi_debugging_init_dev(struct ubi_device *ubi) -{ - ubi->dbg = kzalloc(sizeof(struct ubi_debug_info), GFP_KERNEL); - if (!ubi->dbg) - return -ENOMEM; - - return 0; -} - -/** - * ubi_debugging_exit_dev - free debugging data for an UBI device. - * @ubi: UBI device description object - */ -void ubi_debugging_exit_dev(struct ubi_device *ubi) -{ - kfree(ubi->dbg); -} - -/* - * Root directory for UBI stuff in debugfs. Contains sub-directories which - * contain the stuff specific to particular UBI devices. - */ -static struct dentry *dfs_rootdir; - -/** - * ubi_debugfs_init - create UBI debugfs directory. - * - * Create UBI debugfs directory. Returns zero in case of success and a negative - * error code in case of failure. - */ -int ubi_debugfs_init(void) -{ - dfs_rootdir = debugfs_create_dir("ubi", NULL); - if (IS_ERR_OR_NULL(dfs_rootdir)) { - int err = dfs_rootdir ? -ENODEV : PTR_ERR(dfs_rootdir); - - ubi_err("cannot create \"ubi\" debugfs directory, error %d\n", - err); - return err; - } - - return 0; -} - -/** - * ubi_debugfs_exit - remove UBI debugfs directory. - */ -void ubi_debugfs_exit(void) -{ - debugfs_remove(dfs_rootdir); -} - -/* Read an UBI debugfs file */ -static ssize_t dfs_file_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - unsigned long ubi_num = (unsigned long)file->private_data; - struct dentry *dent = file->f_path.dentry; - struct ubi_device *ubi; - struct ubi_debug_info *d; - char buf[3]; - int val; - - ubi = ubi_get_device(ubi_num); - if (!ubi) - return -ENODEV; - d = ubi->dbg; - - if (dent == d->dfs_chk_gen) - val = d->chk_gen; - else if (dent == d->dfs_chk_io) - val = d->chk_io; - else if (dent == d->dfs_disable_bgt) - val = d->disable_bgt; - else if (dent == d->dfs_emulate_bitflips) - val = d->emulate_bitflips; - else if (dent == d->dfs_emulate_io_failures) - val = d->emulate_io_failures; - else { - count = -EINVAL; - goto out; - } - - if (val) - buf[0] = '1'; - else - buf[0] = '0'; - buf[1] = '\n'; - buf[2] = 0x00; - - count = simple_read_from_buffer(user_buf, count, ppos, buf, 2); - -out: - ubi_put_device(ubi); - return count; -} - -/* Write an UBI debugfs file */ -static ssize_t dfs_file_write(struct file *file, const char __user *user_buf, - size_t count, loff_t *ppos) -{ - unsigned long ubi_num = (unsigned long)file->private_data; - struct dentry *dent = file->f_path.dentry; - struct ubi_device *ubi; - struct ubi_debug_info *d; - size_t buf_size; - char buf[8]; - int val; - - ubi = ubi_get_device(ubi_num); - if (!ubi) - return -ENODEV; - d = ubi->dbg; - - buf_size = min_t(size_t, count, (sizeof(buf) - 1)); - if (copy_from_user(buf, user_buf, buf_size)) { - count = -EFAULT; - goto out; - } - - if (buf[0] == '1') - val = 1; - else if (buf[0] == '0') - val = 0; - else { - count = -EINVAL; - goto out; - } - - if (dent == d->dfs_chk_gen) - d->chk_gen = val; - else if (dent == d->dfs_chk_io) - d->chk_io = val; - else if (dent == d->dfs_disable_bgt) - d->disable_bgt = val; - else if (dent == d->dfs_emulate_bitflips) - d->emulate_bitflips = val; - else if (dent == d->dfs_emulate_io_failures) - d->emulate_io_failures = val; - else - count = -EINVAL; - -out: - ubi_put_device(ubi); - return count; -} - -static int default_open(struct inode *inode, struct file *file) -{ - if (inode->i_private) - file->private_data = inode->i_private; - - return 0; -} - -/* File operations for all UBI debugfs files */ -static const struct file_operations dfs_fops = { - .read = dfs_file_read, - .write = dfs_file_write, - .open = default_open, - .llseek = no_llseek, - .owner = THIS_MODULE, -}; - -/** - * ubi_debugfs_init_dev - initialize debugfs for an UBI device. - * @ubi: UBI device description object - * - * This function creates all debugfs files for UBI device @ubi. Returns zero in - * case of success and a negative error code in case of failure. - */ -int ubi_debugfs_init_dev(struct ubi_device *ubi) -{ - int err, n; - unsigned long ubi_num = ubi->ubi_num; - const char *fname; - struct dentry *dent; - struct ubi_debug_info *d = ubi->dbg; - - n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN + 1, UBI_DFS_DIR_NAME, - ubi->ubi_num); - if (n == UBI_DFS_DIR_LEN) { - /* The array size is too small */ - fname = UBI_DFS_DIR_NAME; - dent = ERR_PTR(-EINVAL); - goto out; - } - - fname = d->dfs_dir_name; - dent = debugfs_create_dir(fname, dfs_rootdir); - if (IS_ERR_OR_NULL(dent)) - goto out; - d->dfs_dir = dent; - - fname = "chk_gen"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_gen = dent; - - fname = "chk_io"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_io = dent; - - fname = "tst_disable_bgt"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_disable_bgt = dent; - - fname = "tst_emulate_bitflips"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_emulate_bitflips = dent; - - fname = "tst_emulate_io_failures"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_emulate_io_failures = dent; - - return 0; - -out_remove: - debugfs_remove_recursive(d->dfs_dir); -out: - err = dent ? PTR_ERR(dent) : -ENODEV; - ubi_err("cannot create \"%s\" debugfs file or directory, error %d\n", - fname, err); - return err; -} - -/** - * dbg_debug_exit_dev - free all debugfs files corresponding to device @ubi - * @ubi: UBI device description object - */ -void ubi_debugfs_exit_dev(struct ubi_device *ubi) -{ - debugfs_remove_recursive(ubi->dbg->dfs_dir); -} - #endif /* CONFIG_MTD_UBI_DEBUG */ diff --git a/trunk/drivers/mtd/ubi/debug.h b/trunk/drivers/mtd/ubi/debug.h index 65b5b76cc379..3f1a09c5c438 100644 --- a/trunk/drivers/mtd/ubi/debug.h +++ b/trunk/drivers/mtd/ubi/debug.h @@ -21,6 +21,14 @@ #ifndef __UBI_DEBUG_H__ #define __UBI_DEBUG_H__ +struct ubi_ec_hdr; +struct ubi_vid_hdr; +struct ubi_volume; +struct ubi_vtbl_record; +struct ubi_scan_volume; +struct ubi_scan_leb; +struct ubi_mkvol_req; + #ifdef CONFIG_MTD_UBI_DEBUG #include @@ -63,103 +71,86 @@ void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv); void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type); void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req); void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len); + +extern unsigned int ubi_chk_flags; + +/* + * Debugging check flags. + * + * UBI_CHK_GEN: general checks + * UBI_CHK_IO: check writes and erases + */ +enum { + UBI_CHK_GEN = 0x1, + UBI_CHK_IO = 0x2, +}; + int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len); int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, int len); -int ubi_debugging_init_dev(struct ubi_device *ubi); -void ubi_debugging_exit_dev(struct ubi_device *ubi); -int ubi_debugfs_init(void); -void ubi_debugfs_exit(void); -int ubi_debugfs_init_dev(struct ubi_device *ubi); -void ubi_debugfs_exit_dev(struct ubi_device *ubi); -/* - * The UBI debugfs directory name pattern and maximum name length (3 for "ubi" - * + 2 for the number plus 1 for the trailing zero byte. - */ -#define UBI_DFS_DIR_NAME "ubi%d" -#define UBI_DFS_DIR_LEN (3 + 2 + 1) +extern unsigned int ubi_tst_flags; -/** - * struct ubi_debug_info - debugging information for an UBI device. +/* + * Special testing flags. * - * @chk_gen: if UBI general extra checks are enabled - * @chk_io: if UBI I/O extra checks are enabled - * @disable_bgt: disable the background task for testing purposes - * @emulate_bitflips: emulate bit-flips for testing purposes - * @emulate_io_failures: emulate write/erase failures for testing purposes - * @dfs_dir_name: name of debugfs directory containing files of this UBI device - * @dfs_dir: direntry object of the UBI device debugfs directory - * @dfs_chk_gen: debugfs knob to enable UBI general extra checks - * @dfs_chk_io: debugfs knob to enable UBI I/O extra checks - * @dfs_disable_bgt: debugfs knob to disable the background task - * @dfs_emulate_bitflips: debugfs knob to emulate bit-flips - * @dfs_emulate_io_failures: debugfs knob to emulate write/erase failures + * UBIFS_TST_DISABLE_BGT: disable the background thread + * UBI_TST_EMULATE_BITFLIPS: emulate bit-flips + * UBI_TST_EMULATE_WRITE_FAILURES: emulate write failures + * UBI_TST_EMULATE_ERASE_FAILURES: emulate erase failures */ -struct ubi_debug_info { - unsigned int chk_gen:1; - unsigned int chk_io:1; - unsigned int disable_bgt:1; - unsigned int emulate_bitflips:1; - unsigned int emulate_io_failures:1; - char dfs_dir_name[UBI_DFS_DIR_LEN + 1]; - struct dentry *dfs_dir; - struct dentry *dfs_chk_gen; - struct dentry *dfs_chk_io; - struct dentry *dfs_disable_bgt; - struct dentry *dfs_emulate_bitflips; - struct dentry *dfs_emulate_io_failures; +enum { + UBI_TST_DISABLE_BGT = 0x1, + UBI_TST_EMULATE_BITFLIPS = 0x2, + UBI_TST_EMULATE_WRITE_FAILURES = 0x4, + UBI_TST_EMULATE_ERASE_FAILURES = 0x8, }; /** * ubi_dbg_is_bgt_disabled - if the background thread is disabled. - * @ubi: UBI device description object * * Returns non-zero if the UBI background thread is disabled for testing * purposes. */ -static inline int ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) +static inline int ubi_dbg_is_bgt_disabled(void) { - return ubi->dbg->disable_bgt; + return ubi_tst_flags & UBI_TST_DISABLE_BGT; } /** * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip. - * @ubi: UBI device description object * * Returns non-zero if a bit-flip should be emulated, otherwise returns zero. */ -static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) +static inline int ubi_dbg_is_bitflip(void) { - if (ubi->dbg->emulate_bitflips) + if (ubi_tst_flags & UBI_TST_EMULATE_BITFLIPS) return !(random32() % 200); return 0; } /** * ubi_dbg_is_write_failure - if it is time to emulate a write failure. - * @ubi: UBI device description object * * Returns non-zero if a write failure should be emulated, otherwise returns * zero. */ -static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi) +static inline int ubi_dbg_is_write_failure(void) { - if (ubi->dbg->emulate_io_failures) + if (ubi_tst_flags & UBI_TST_EMULATE_WRITE_FAILURES) return !(random32() % 500); return 0; } /** * ubi_dbg_is_erase_failure - if its time to emulate an erase failure. - * @ubi: UBI device description object * * Returns non-zero if an erase failure should be emulated, otherwise returns * zero. */ -static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi) +static inline int ubi_dbg_is_erase_failure(void) { - if (ubi->dbg->emulate_io_failures) + if (ubi_tst_flags & UBI_TST_EMULATE_ERASE_FAILURES) return !(random32() % 400); return 0; } @@ -210,6 +201,11 @@ static inline void ubi_dbg_dump_flash(struct ubi_device *ubi, static inline void ubi_dbg_print_hex_dump(const char *l, const char *ps, int pt, int r, int g, const void *b, size_t len, bool a) { return; } + +static inline int ubi_dbg_is_bgt_disabled(void) { return 0; } +static inline int ubi_dbg_is_bitflip(void) { return 0; } +static inline int ubi_dbg_is_write_failure(void) { return 0; } +static inline int ubi_dbg_is_erase_failure(void) { return 0; } static inline int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) { return 0; } @@ -217,20 +213,5 @@ static inline int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, int len) { return 0; } -static inline int ubi_debugging_init_dev(struct ubi_device *ubi) { return 0; } -static inline void ubi_debugging_exit_dev(struct ubi_device *ubi) { return; } -static inline int ubi_debugfs_init(void) { return 0; } -static inline void ubi_debugfs_exit(void) { return; } -static inline int ubi_debugfs_init_dev(struct ubi_device *ubi) { return 0; } -static inline void ubi_debugfs_exit_dev(struct ubi_device *ubi) { return; } - -static inline int -ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) { return 0; } -static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) { return 0; } -static inline int -ubi_dbg_is_write_failure(const struct ubi_device *ubi) { return 0; } -static inline int -ubi_dbg_is_erase_failure(const struct ubi_device *ubi) { return 0; } - #endif /* !CONFIG_MTD_UBI_DEBUG */ #endif /* !__UBI_DEBUG_H__ */ diff --git a/trunk/drivers/mtd/ubi/io.c b/trunk/drivers/mtd/ubi/io.c index 6ba55c235873..8c1b1c7bc4a7 100644 --- a/trunk/drivers/mtd/ubi/io.c +++ b/trunk/drivers/mtd/ubi/io.c @@ -212,7 +212,7 @@ int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, } else { ubi_assert(len == read); - if (ubi_dbg_is_bitflip(ubi)) { + if (ubi_dbg_is_bitflip()) { dbg_gen("bit-flip (emulated)"); err = UBI_IO_BITFLIPS; } @@ -281,7 +281,7 @@ int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, return err; } - if (ubi_dbg_is_write_failure(ubi)) { + if (ubi_dbg_is_write_failure()) { dbg_err("cannot write %d bytes to PEB %d:%d " "(emulated)", len, pnum, offset); ubi_dbg_dump_stack(); @@ -396,7 +396,7 @@ static int do_sync_erase(struct ubi_device *ubi, int pnum) if (err) return err; - if (ubi_dbg_is_erase_failure(ubi)) { + if (ubi_dbg_is_erase_failure()) { dbg_err("cannot erase PEB %d (emulated)", pnum); return -EIO; } @@ -1146,7 +1146,7 @@ static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum) { int err; - if (!ubi->dbg->chk_io) + if (!(ubi_chk_flags & UBI_CHK_IO)) return 0; err = ubi_io_is_bad(ubi, pnum); @@ -1173,7 +1173,7 @@ static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum, int err; uint32_t magic; - if (!ubi->dbg->chk_io) + if (!(ubi_chk_flags & UBI_CHK_IO)) return 0; magic = be32_to_cpu(ec_hdr->magic); @@ -1211,7 +1211,7 @@ static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum) uint32_t crc, hdr_crc; struct ubi_ec_hdr *ec_hdr; - if (!ubi->dbg->chk_io) + if (!(ubi_chk_flags & UBI_CHK_IO)) return 0; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); @@ -1255,7 +1255,7 @@ static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum, int err; uint32_t magic; - if (!ubi->dbg->chk_io) + if (!(ubi_chk_flags & UBI_CHK_IO)) return 0; magic = be32_to_cpu(vid_hdr->magic); @@ -1296,7 +1296,7 @@ static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) struct ubi_vid_hdr *vid_hdr; void *p; - if (!ubi->dbg->chk_io) + if (!(ubi_chk_flags & UBI_CHK_IO)) return 0; vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); @@ -1348,7 +1348,7 @@ int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum, void *buf1; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; - if (!ubi->dbg->chk_io) + if (!(ubi_chk_flags & UBI_CHK_IO)) return 0; buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); @@ -1412,7 +1412,7 @@ int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) void *buf; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; - if (!ubi->dbg->chk_io) + if (!(ubi_chk_flags & UBI_CHK_IO)) return 0; buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); diff --git a/trunk/drivers/mtd/ubi/scan.c b/trunk/drivers/mtd/ubi/scan.c index a3a198f9b98d..2135a53732ff 100644 --- a/trunk/drivers/mtd/ubi/scan.c +++ b/trunk/drivers/mtd/ubi/scan.c @@ -1347,7 +1347,7 @@ static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si) struct ubi_scan_leb *seb, *last_seb; uint8_t *buf; - if (!ubi->dbg->chk_gen) + if (!(ubi_chk_flags & UBI_CHK_GEN)) return 0; /* diff --git a/trunk/drivers/mtd/ubi/ubi.h b/trunk/drivers/mtd/ubi/ubi.h index dc64c767fd21..c6c22295898e 100644 --- a/trunk/drivers/mtd/ubi/ubi.h +++ b/trunk/drivers/mtd/ubi/ubi.h @@ -44,6 +44,7 @@ #include "ubi-media.h" #include "scan.h" +#include "debug.h" /* Maximum number of supported UBI devices */ #define UBI_MAX_DEVICES 32 @@ -389,8 +390,6 @@ struct ubi_wl_entry; * @peb_buf2: another buffer of PEB size used for different purposes * @buf_mutex: protects @peb_buf1 and @peb_buf2 * @ckvol_mutex: serializes static volume checking when opening - * - * @dbg: debugging information for this UBI device */ struct ubi_device { struct cdev cdev; @@ -473,12 +472,8 @@ struct ubi_device { void *peb_buf2; struct mutex buf_mutex; struct mutex ckvol_mutex; - - struct ubi_debug_info *dbg; }; -#include "debug.h" - extern struct kmem_cache *ubi_wl_entry_slab; extern const struct file_operations ubi_ctrl_cdev_operations; extern const struct file_operations ubi_cdev_operations; @@ -667,7 +662,6 @@ static inline void ubi_ro_mode(struct ubi_device *ubi) if (!ubi->ro_mode) { ubi->ro_mode = 1; ubi_warn("switch to read-only mode"); - ubi_dbg_dump_stack(); } } diff --git a/trunk/drivers/mtd/ubi/vmt.c b/trunk/drivers/mtd/ubi/vmt.c index 97e093d19672..366eb70219a6 100644 --- a/trunk/drivers/mtd/ubi/vmt.c +++ b/trunk/drivers/mtd/ubi/vmt.c @@ -871,7 +871,7 @@ static int paranoid_check_volumes(struct ubi_device *ubi) { int i, err = 0; - if (!ubi->dbg->chk_gen) + if (!(ubi_chk_flags & UBI_CHK_GEN)) return 0; for (i = 0; i < ubi->vtbl_slots; i++) { diff --git a/trunk/drivers/mtd/ubi/vtbl.c b/trunk/drivers/mtd/ubi/vtbl.c index 4b50a3029b84..fd3bf770f518 100644 --- a/trunk/drivers/mtd/ubi/vtbl.c +++ b/trunk/drivers/mtd/ubi/vtbl.c @@ -307,7 +307,8 @@ static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si, { int err, tries = 0; static struct ubi_vid_hdr *vid_hdr; - struct ubi_scan_leb *new_seb; + struct ubi_scan_volume *sv; + struct ubi_scan_leb *new_seb, *old_seb = NULL; ubi_msg("create volume table (copy #%d)", copy + 1); @@ -315,6 +316,15 @@ static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si, if (!vid_hdr) return -ENOMEM; + /* + * Check if there is a logical eraseblock which would have to contain + * this volume table copy was found during scanning. It has to be wiped + * out. + */ + sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID); + if (sv) + old_seb = ubi_scan_find_seb(sv, copy); + retry: new_seb = ubi_scan_get_free_peb(ubi, si); if (IS_ERR(new_seb)) { @@ -341,8 +351,8 @@ static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si, goto write_error; /* - * And add it to the scanning information. Don't delete the old version - * of this LEB as it will be deleted and freed in 'ubi_scan_add_used()'. + * And add it to the scanning information. Don't delete the old + * @old_seb as it will be deleted and freed in 'ubi_scan_add_used()'. */ err = ubi_scan_add_used(ubi, si, new_seb->pnum, new_seb->ec, vid_hdr, 0); @@ -866,7 +876,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si) */ static void paranoid_vtbl_check(const struct ubi_device *ubi) { - if (!ubi->dbg->chk_gen) + if (!(ubi_chk_flags & UBI_CHK_GEN)) return; if (vtbl_check(ubi, ubi->vtbl)) { diff --git a/trunk/drivers/mtd/ubi/wl.c b/trunk/drivers/mtd/ubi/wl.c index 42c684cf3688..ff2c4956eeff 100644 --- a/trunk/drivers/mtd/ubi/wl.c +++ b/trunk/drivers/mtd/ubi/wl.c @@ -1,5 +1,4 @@ /* - * @ubi: UBI device description object * Copyright (c) International Business Machines Corp., 2006 * * This program is free software; you can redistribute it and/or modify @@ -164,14 +163,12 @@ struct ubi_work { #ifdef CONFIG_MTD_UBI_DEBUG static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec); -static int paranoid_check_in_wl_tree(const struct ubi_device *ubi, - struct ubi_wl_entry *e, +static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root); -static int paranoid_check_in_pq(const struct ubi_device *ubi, - struct ubi_wl_entry *e); +static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e); #else #define paranoid_check_ec(ubi, pnum, ec) 0 -#define paranoid_check_in_wl_tree(ubi, e, root) +#define paranoid_check_in_wl_tree(e, root) #define paranoid_check_in_pq(ubi, e) 0 #endif @@ -452,7 +449,7 @@ int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) BUG(); } - paranoid_check_in_wl_tree(ubi, e, &ubi->free); + paranoid_check_in_wl_tree(e, &ubi->free); /* * Move the physical eraseblock to the protection queue where it will @@ -616,7 +613,7 @@ static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) list_add_tail(&wrk->list, &ubi->works); ubi_assert(ubi->works_count >= 0); ubi->works_count += 1; - if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi)) + if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled()) wake_up_process(ubi->bgt_thread); spin_unlock(&ubi->wl_lock); } @@ -715,7 +712,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, e1->ec, e2->ec); goto out_cancel; } - paranoid_check_in_wl_tree(ubi, e1, &ubi->used); + paranoid_check_in_wl_tree(e1, &ubi->used); rb_erase(&e1->u.rb, &ubi->used); dbg_wl("move PEB %d EC %d to PEB %d EC %d", e1->pnum, e1->ec, e2->pnum, e2->ec); @@ -724,12 +721,12 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, scrubbing = 1; e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb); e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); - paranoid_check_in_wl_tree(ubi, e1, &ubi->scrub); + paranoid_check_in_wl_tree(e1, &ubi->scrub); rb_erase(&e1->u.rb, &ubi->scrub); dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); } - paranoid_check_in_wl_tree(ubi, e2, &ubi->free); + paranoid_check_in_wl_tree(e2, &ubi->free); rb_erase(&e2->u.rb, &ubi->free); ubi->move_from = e1; ubi->move_to = e2; @@ -1172,13 +1169,13 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) return 0; } else { if (in_wl_tree(e, &ubi->used)) { - paranoid_check_in_wl_tree(ubi, e, &ubi->used); + paranoid_check_in_wl_tree(e, &ubi->used); rb_erase(&e->u.rb, &ubi->used); } else if (in_wl_tree(e, &ubi->scrub)) { - paranoid_check_in_wl_tree(ubi, e, &ubi->scrub); + paranoid_check_in_wl_tree(e, &ubi->scrub); rb_erase(&e->u.rb, &ubi->scrub); } else if (in_wl_tree(e, &ubi->erroneous)) { - paranoid_check_in_wl_tree(ubi, e, &ubi->erroneous); + paranoid_check_in_wl_tree(e, &ubi->erroneous); rb_erase(&e->u.rb, &ubi->erroneous); ubi->erroneous_peb_count -= 1; ubi_assert(ubi->erroneous_peb_count >= 0); @@ -1245,7 +1242,7 @@ int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) } if (in_wl_tree(e, &ubi->used)) { - paranoid_check_in_wl_tree(ubi, e, &ubi->used); + paranoid_check_in_wl_tree(e, &ubi->used); rb_erase(&e->u.rb, &ubi->used); } else { int err; @@ -1367,7 +1364,7 @@ int ubi_thread(void *u) spin_lock(&ubi->wl_lock); if (list_empty(&ubi->works) || ubi->ro_mode || - !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) { + !ubi->thread_enabled || ubi_dbg_is_bgt_disabled()) { set_current_state(TASK_INTERRUPTIBLE); spin_unlock(&ubi->wl_lock); schedule(); @@ -1582,7 +1579,7 @@ static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec) long long read_ec; struct ubi_ec_hdr *ec_hdr; - if (!ubi->dbg->chk_gen) + if (!(ubi_chk_flags & UBI_CHK_GEN)) return 0; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); @@ -1612,18 +1609,16 @@ static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec) /** * paranoid_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree. - * @ubi: UBI device description object * @e: the wear-leveling entry to check * @root: the root of the tree * * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it * is not. */ -static int paranoid_check_in_wl_tree(const struct ubi_device *ubi, - struct ubi_wl_entry *e, +static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) { - if (!ubi->dbg->chk_gen) + if (!(ubi_chk_flags & UBI_CHK_GEN)) return 0; if (in_wl_tree(e, root)) @@ -1643,13 +1638,12 @@ static int paranoid_check_in_wl_tree(const struct ubi_device *ubi, * * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not. */ -static int paranoid_check_in_pq(const struct ubi_device *ubi, - struct ubi_wl_entry *e) +static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e) { struct ubi_wl_entry *p; int i; - if (!ubi->dbg->chk_gen) + if (!(ubi_chk_flags & UBI_CHK_GEN)) return 0; for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) diff --git a/trunk/fs/cifs/cifsfs.c b/trunk/fs/cifs/cifsfs.c index bc4b12ca537b..3e2989976297 100644 --- a/trunk/fs/cifs/cifsfs.c +++ b/trunk/fs/cifs/cifsfs.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include "cifsfs.h" #include "cifspdu.h" @@ -543,12 +542,14 @@ static const struct super_operations cifs_super_ops = { static struct dentry * cifs_get_root(struct smb_vol *vol, struct super_block *sb) { - struct dentry *dentry; + int xid, rc; + struct inode *inode; + struct qstr name; + struct dentry *dparent = NULL, *dchild = NULL, *alias; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); - char *full_path = NULL; - char *s, *p; + unsigned int i, full_len, len; + char *full_path = NULL, *pstart; char sep; - int xid; full_path = cifs_build_path_to_root(vol, cifs_sb, cifs_sb_master_tcon(cifs_sb)); @@ -559,32 +560,73 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) xid = GetXid(); sep = CIFS_DIR_SEP(cifs_sb); - dentry = dget(sb->s_root); - p = s = full_path; - - do { - struct inode *dir = dentry->d_inode; - struct dentry *child; - - /* skip separators */ - while (*s == sep) - s++; - if (!*s) - break; - p = s++; - /* next separator */ - while (*s && *s != sep) - s++; - - mutex_lock(&dir->i_mutex); - child = lookup_one_len(p, dentry, s - p); - mutex_unlock(&dir->i_mutex); - dput(dentry); - dentry = child; - } while (!IS_ERR(dentry)); + dparent = dget(sb->s_root); + full_len = strlen(full_path); + full_path[full_len] = sep; + pstart = full_path + 1; + + for (i = 1, len = 0; i <= full_len; i++) { + if (full_path[i] != sep || !len) { + len++; + continue; + } + + full_path[i] = 0; + cFYI(1, "get dentry for %s", pstart); + + name.name = pstart; + name.len = len; + name.hash = full_name_hash(pstart, len); + dchild = d_lookup(dparent, &name); + if (dchild == NULL) { + cFYI(1, "not exists"); + dchild = d_alloc(dparent, &name); + if (dchild == NULL) { + dput(dparent); + dparent = ERR_PTR(-ENOMEM); + goto out; + } + } + + cFYI(1, "get inode"); + if (dchild->d_inode == NULL) { + cFYI(1, "not exists"); + inode = NULL; + if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) + rc = cifs_get_inode_info_unix(&inode, full_path, + sb, xid); + else + rc = cifs_get_inode_info(&inode, full_path, + NULL, sb, xid, NULL); + if (rc) { + dput(dchild); + dput(dparent); + dparent = ERR_PTR(rc); + goto out; + } + alias = d_materialise_unique(dchild, inode); + if (alias != NULL) { + dput(dchild); + if (IS_ERR(alias)) { + dput(dparent); + dparent = ERR_PTR(-EINVAL); /* XXX */ + goto out; + } + dchild = alias; + } + } + cFYI(1, "parent %p, child %p", dparent, dchild); + + dput(dparent); + dparent = dchild; + len = 0; + pstart = full_path + i + 1; + full_path[i] = sep; + } +out: _FreeXid(xid); kfree(full_path); - return dentry; + return dparent; } static int cifs_set_super(struct super_block *sb, void *data) diff --git a/trunk/fs/cifs/file.c b/trunk/fs/cifs/file.c index a9b4a24f2a16..bb71471a4d9d 100644 --- a/trunk/fs/cifs/file.c +++ b/trunk/fs/cifs/file.c @@ -1737,7 +1737,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, io_parms.pid = pid; io_parms.tcon = pTcon; io_parms.offset = *poffset; - io_parms.length = cur_len; + io_parms.length = len; rc = CIFSSMBRead(xid, &io_parms, &bytes_read, &read_data, &buf_type); pSMBr = (struct smb_com_read_rsp *)read_data; diff --git a/trunk/fs/dcache.c b/trunk/fs/dcache.c index fbdcbca40725..6e4ea6d87774 100644 --- a/trunk/fs/dcache.c +++ b/trunk/fs/dcache.c @@ -1813,6 +1813,8 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, tname = dentry->d_name.name; i = dentry->d_inode; prefetch(tname); + if (i) + prefetch(i); /* * This seqcount check is required to ensure name and * len are loaded atomically, so as not to walk off the diff --git a/trunk/fs/dlm/ast.c b/trunk/fs/dlm/ast.c index 90e5997262ea..abc49f292454 100644 --- a/trunk/fs/dlm/ast.c +++ b/trunk/fs/dlm/ast.c @@ -14,9 +14,17 @@ #include "dlm_internal.h" #include "lock.h" #include "user.h" +#include "ast.h" + +#define WAKE_ASTS 0 + +static uint64_t ast_seq_count; +static struct list_head ast_queue; +static spinlock_t ast_queue_lock; +static struct task_struct * astd_task; +static unsigned long astd_wakeflags; +static struct mutex astd_running; -static uint64_t dlm_cb_seq; -static spinlock_t dlm_cb_seq_spin; static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb) { @@ -49,13 +57,21 @@ static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb) } } +void dlm_del_ast(struct dlm_lkb *lkb) +{ + spin_lock(&ast_queue_lock); + if (!list_empty(&lkb->lkb_astqueue)) + list_del_init(&lkb->lkb_astqueue); + spin_unlock(&ast_queue_lock); +} + int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, uint32_t sbflags, uint64_t seq) { struct dlm_ls *ls = lkb->lkb_resource->res_ls; uint64_t prev_seq; int prev_mode; - int i, rv; + int i; for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { if (lkb->lkb_callbacks[i].seq) @@ -84,8 +100,7 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, mode, (unsigned long long)prev_seq, prev_mode); - rv = 0; - goto out; + return 0; } } @@ -94,7 +109,6 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, lkb->lkb_callbacks[i].mode = mode; lkb->lkb_callbacks[i].sb_status = status; lkb->lkb_callbacks[i].sb_flags = (sbflags & 0x000000FF); - rv = 0; break; } @@ -103,24 +117,21 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, lkb->lkb_id, (unsigned long long)seq, flags, mode, status, sbflags); dlm_dump_lkb_callbacks(lkb); - rv = -1; - goto out; + return -1; } - out: - return rv; + + return 0; } int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_callback *cb, int *resid) { - int i, rv; + int i; *resid = 0; - if (!lkb->lkb_callbacks[0].seq) { - rv = -ENOENT; - goto out; - } + if (!lkb->lkb_callbacks[0].seq) + return -ENOENT; /* oldest undelivered cb is callbacks[0] */ @@ -152,8 +163,7 @@ int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, cb->mode, (unsigned long long)lkb->lkb_last_cast.seq, lkb->lkb_last_cast.mode); - rv = 0; - goto out; + return 0; } } @@ -166,150 +176,171 @@ int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, memcpy(&lkb->lkb_last_bast, cb, sizeof(struct dlm_callback)); lkb->lkb_last_bast_time = ktime_get(); } - rv = 0; - out: - return rv; + + return 0; } -void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, - uint32_t sbflags) +void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, + uint32_t sbflags) { - struct dlm_ls *ls = lkb->lkb_resource->res_ls; - uint64_t new_seq, prev_seq; + uint64_t seq; int rv; - spin_lock(&dlm_cb_seq_spin); - new_seq = ++dlm_cb_seq; - spin_unlock(&dlm_cb_seq_spin); + spin_lock(&ast_queue_lock); + + seq = ++ast_seq_count; if (lkb->lkb_flags & DLM_IFL_USER) { - dlm_user_add_ast(lkb, flags, mode, status, sbflags, new_seq); + spin_unlock(&ast_queue_lock); + dlm_user_add_ast(lkb, flags, mode, status, sbflags, seq); return; } - mutex_lock(&lkb->lkb_cb_mutex); - prev_seq = lkb->lkb_callbacks[0].seq; - - rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, new_seq); - if (rv < 0) - goto out; + rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, seq); + if (rv < 0) { + spin_unlock(&ast_queue_lock); + return; + } - if (!prev_seq) { + if (list_empty(&lkb->lkb_astqueue)) { kref_get(&lkb->lkb_ref); - - if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) { - mutex_lock(&ls->ls_cb_mutex); - list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay); - mutex_unlock(&ls->ls_cb_mutex); - } else { - queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work); - } + list_add_tail(&lkb->lkb_astqueue, &ast_queue); } - out: - mutex_unlock(&lkb->lkb_cb_mutex); + spin_unlock(&ast_queue_lock); + + set_bit(WAKE_ASTS, &astd_wakeflags); + wake_up_process(astd_task); } -void dlm_callback_work(struct work_struct *work) +static void process_asts(void) { - struct dlm_lkb *lkb = container_of(work, struct dlm_lkb, lkb_cb_work); - struct dlm_ls *ls = lkb->lkb_resource->res_ls; + struct dlm_ls *ls = NULL; + struct dlm_rsb *r = NULL; + struct dlm_lkb *lkb; void (*castfn) (void *astparam); void (*bastfn) (void *astparam, int mode); struct dlm_callback callbacks[DLM_CALLBACKS_SIZE]; int i, rv, resid; - memset(&callbacks, 0, sizeof(callbacks)); +repeat: + spin_lock(&ast_queue_lock); + list_for_each_entry(lkb, &ast_queue, lkb_astqueue) { + r = lkb->lkb_resource; + ls = r->res_ls; - mutex_lock(&lkb->lkb_cb_mutex); - if (!lkb->lkb_callbacks[0].seq) { - /* no callback work exists, shouldn't happen */ - log_error(ls, "dlm_callback_work %x no work", lkb->lkb_id); - dlm_print_lkb(lkb); - dlm_dump_lkb_callbacks(lkb); - } + if (dlm_locking_stopped(ls)) + continue; - for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { - rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid); - if (rv < 0) - break; - } + /* we remove from astqueue list and remove everything in + lkb_callbacks before releasing the spinlock so empty + lkb_astqueue is always consistent with empty lkb_callbacks */ - if (resid) { - /* cbs remain, loop should have removed all, shouldn't happen */ - log_error(ls, "dlm_callback_work %x resid %d", lkb->lkb_id, - resid); - dlm_print_lkb(lkb); - dlm_dump_lkb_callbacks(lkb); - } - mutex_unlock(&lkb->lkb_cb_mutex); + list_del_init(&lkb->lkb_astqueue); - castfn = lkb->lkb_astfn; - bastfn = lkb->lkb_bastfn; + castfn = lkb->lkb_astfn; + bastfn = lkb->lkb_bastfn; - for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { - if (!callbacks[i].seq) - break; - if (callbacks[i].flags & DLM_CB_SKIP) { - continue; - } else if (callbacks[i].flags & DLM_CB_BAST) { - bastfn(lkb->lkb_astparam, callbacks[i].mode); - } else if (callbacks[i].flags & DLM_CB_CAST) { - lkb->lkb_lksb->sb_status = callbacks[i].sb_status; - lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags; - castfn(lkb->lkb_astparam); + memset(&callbacks, 0, sizeof(callbacks)); + + for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { + rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid); + if (rv < 0) + break; } + spin_unlock(&ast_queue_lock); + + if (resid) { + /* shouldn't happen, for loop should have removed all */ + log_error(ls, "callback resid %d lkb %x", + resid, lkb->lkb_id); + } + + for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { + if (!callbacks[i].seq) + break; + if (callbacks[i].flags & DLM_CB_SKIP) { + continue; + } else if (callbacks[i].flags & DLM_CB_BAST) { + bastfn(lkb->lkb_astparam, callbacks[i].mode); + } else if (callbacks[i].flags & DLM_CB_CAST) { + lkb->lkb_lksb->sb_status = callbacks[i].sb_status; + lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags; + castfn(lkb->lkb_astparam); + } + } + + /* removes ref for ast_queue, may cause lkb to be freed */ + dlm_put_lkb(lkb); + + cond_resched(); + goto repeat; } + spin_unlock(&ast_queue_lock); +} + +static inline int no_asts(void) +{ + int ret; - /* undo kref_get from dlm_add_callback, may cause lkb to be freed */ - dlm_put_lkb(lkb); + spin_lock(&ast_queue_lock); + ret = list_empty(&ast_queue); + spin_unlock(&ast_queue_lock); + return ret; } -int dlm_callback_start(struct dlm_ls *ls) +static int dlm_astd(void *data) { - ls->ls_callback_wq = alloc_workqueue("dlm_callback", - WQ_UNBOUND | - WQ_MEM_RECLAIM | - WQ_NON_REENTRANT, - 0); - if (!ls->ls_callback_wq) { - log_print("can't start dlm_callback workqueue"); - return -ENOMEM; + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + if (!test_bit(WAKE_ASTS, &astd_wakeflags)) + schedule(); + set_current_state(TASK_RUNNING); + + mutex_lock(&astd_running); + if (test_and_clear_bit(WAKE_ASTS, &astd_wakeflags)) + process_asts(); + mutex_unlock(&astd_running); } return 0; } -void dlm_callback_stop(struct dlm_ls *ls) +void dlm_astd_wake(void) { - if (ls->ls_callback_wq) - destroy_workqueue(ls->ls_callback_wq); + if (!no_asts()) { + set_bit(WAKE_ASTS, &astd_wakeflags); + wake_up_process(astd_task); + } } -void dlm_callback_suspend(struct dlm_ls *ls) +int dlm_astd_start(void) { - set_bit(LSFL_CB_DELAY, &ls->ls_flags); - - if (ls->ls_callback_wq) - flush_workqueue(ls->ls_callback_wq); + struct task_struct *p; + int error = 0; + + INIT_LIST_HEAD(&ast_queue); + spin_lock_init(&ast_queue_lock); + mutex_init(&astd_running); + + p = kthread_run(dlm_astd, NULL, "dlm_astd"); + if (IS_ERR(p)) + error = PTR_ERR(p); + else + astd_task = p; + return error; } -void dlm_callback_resume(struct dlm_ls *ls) +void dlm_astd_stop(void) { - struct dlm_lkb *lkb, *safe; - int count = 0; - - clear_bit(LSFL_CB_DELAY, &ls->ls_flags); - - if (!ls->ls_callback_wq) - return; + kthread_stop(astd_task); +} - mutex_lock(&ls->ls_cb_mutex); - list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) { - list_del_init(&lkb->lkb_cb_list); - queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work); - count++; - } - mutex_unlock(&ls->ls_cb_mutex); +void dlm_astd_suspend(void) +{ + mutex_lock(&astd_running); +} - log_debug(ls, "dlm_callback_resume %d", count); +void dlm_astd_resume(void) +{ + mutex_unlock(&astd_running); } diff --git a/trunk/fs/dlm/ast.h b/trunk/fs/dlm/ast.h index 757b551c6820..8aa89c9b5611 100644 --- a/trunk/fs/dlm/ast.h +++ b/trunk/fs/dlm/ast.h @@ -18,15 +18,14 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, uint32_t sbflags, uint64_t seq); int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_callback *cb, int *resid); -void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, - uint32_t sbflags); +void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, + uint32_t sbflags); -void dlm_callback_work(struct work_struct *work); -int dlm_callback_start(struct dlm_ls *ls); -void dlm_callback_stop(struct dlm_ls *ls); -void dlm_callback_suspend(struct dlm_ls *ls); -void dlm_callback_resume(struct dlm_ls *ls); +void dlm_astd_wake(void); +int dlm_astd_start(void); +void dlm_astd_stop(void); +void dlm_astd_suspend(void); +void dlm_astd_resume(void); #endif - diff --git a/trunk/fs/dlm/config.c b/trunk/fs/dlm/config.c index 6cf72fcc0d0c..9b026ea8baa9 100644 --- a/trunk/fs/dlm/config.c +++ b/trunk/fs/dlm/config.c @@ -28,8 +28,7 @@ * /config/dlm//spaces//nodes//weight * /config/dlm//comms//nodeid * /config/dlm//comms//local - * /config/dlm//comms//addr (write only) - * /config/dlm//comms//addr_list (read only) + * /config/dlm//comms//addr * The level is useless, but I haven't figured out how to avoid it. */ @@ -81,7 +80,6 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, size_t len); static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len); -static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf); static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf); static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, size_t len); @@ -94,6 +92,7 @@ struct dlm_cluster { unsigned int cl_tcp_port; unsigned int cl_buffer_size; unsigned int cl_rsbtbl_size; + unsigned int cl_lkbtbl_size; unsigned int cl_dirtbl_size; unsigned int cl_recover_timer; unsigned int cl_toss_secs; @@ -102,13 +101,13 @@ struct dlm_cluster { unsigned int cl_protocol; unsigned int cl_timewarn_cs; unsigned int cl_waitwarn_us; - unsigned int cl_new_rsb_count; }; enum { CLUSTER_ATTR_TCP_PORT = 0, CLUSTER_ATTR_BUFFER_SIZE, CLUSTER_ATTR_RSBTBL_SIZE, + CLUSTER_ATTR_LKBTBL_SIZE, CLUSTER_ATTR_DIRTBL_SIZE, CLUSTER_ATTR_RECOVER_TIMER, CLUSTER_ATTR_TOSS_SECS, @@ -117,7 +116,6 @@ enum { CLUSTER_ATTR_PROTOCOL, CLUSTER_ATTR_TIMEWARN_CS, CLUSTER_ATTR_WAITWARN_US, - CLUSTER_ATTR_NEW_RSB_COUNT, }; struct cluster_attribute { @@ -162,6 +160,7 @@ __CONFIGFS_ATTR(name, 0644, name##_read, name##_write) CLUSTER_ATTR(tcp_port, 1); CLUSTER_ATTR(buffer_size, 1); CLUSTER_ATTR(rsbtbl_size, 1); +CLUSTER_ATTR(lkbtbl_size, 1); CLUSTER_ATTR(dirtbl_size, 1); CLUSTER_ATTR(recover_timer, 1); CLUSTER_ATTR(toss_secs, 1); @@ -170,12 +169,12 @@ CLUSTER_ATTR(log_debug, 0); CLUSTER_ATTR(protocol, 0); CLUSTER_ATTR(timewarn_cs, 1); CLUSTER_ATTR(waitwarn_us, 0); -CLUSTER_ATTR(new_rsb_count, 0); static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr, [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr, + [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr, [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr, [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr, [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, @@ -184,7 +183,6 @@ static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, - [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr, NULL, }; @@ -192,7 +190,6 @@ enum { COMM_ATTR_NODEID = 0, COMM_ATTR_LOCAL, COMM_ATTR_ADDR, - COMM_ATTR_ADDR_LIST, }; struct comm_attribute { @@ -220,22 +217,14 @@ static struct comm_attribute comm_attr_local = { static struct comm_attribute comm_attr_addr = { .attr = { .ca_owner = THIS_MODULE, .ca_name = "addr", - .ca_mode = S_IWUSR }, + .ca_mode = S_IRUGO | S_IWUSR }, .store = comm_addr_write, }; -static struct comm_attribute comm_attr_addr_list = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "addr_list", - .ca_mode = S_IRUGO }, - .show = comm_addr_list_read, -}; - static struct configfs_attribute *comm_attrs[] = { [COMM_ATTR_NODEID] = &comm_attr_nodeid.attr, [COMM_ATTR_LOCAL] = &comm_attr_local.attr, [COMM_ATTR_ADDR] = &comm_attr_addr.attr, - [COMM_ATTR_ADDR_LIST] = &comm_attr_addr_list.attr, NULL, }; @@ -446,6 +435,7 @@ static struct config_group *make_cluster(struct config_group *g, cl->cl_tcp_port = dlm_config.ci_tcp_port; cl->cl_buffer_size = dlm_config.ci_buffer_size; cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size; + cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size; cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size; cl->cl_recover_timer = dlm_config.ci_recover_timer; cl->cl_toss_secs = dlm_config.ci_toss_secs; @@ -454,7 +444,6 @@ static struct config_group *make_cluster(struct config_group *g, cl->cl_protocol = dlm_config.ci_protocol; cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; - cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count; space_list = &sps->ss_group; comm_list = &cms->cs_group; @@ -731,50 +720,6 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) return len; } -static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf) -{ - ssize_t s; - ssize_t allowance; - int i; - struct sockaddr_storage *addr; - struct sockaddr_in *addr_in; - struct sockaddr_in6 *addr_in6; - - /* Taken from ip6_addr_string() defined in lib/vsprintf.c */ - char buf0[sizeof("AF_INET6 xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255\n")]; - - - /* Derived from SIMPLE_ATTR_SIZE of fs/configfs/file.c */ - allowance = 4096; - buf[0] = '\0'; - - for (i = 0; i < cm->addr_count; i++) { - addr = cm->addr[i]; - - switch(addr->ss_family) { - case AF_INET: - addr_in = (struct sockaddr_in *)addr; - s = sprintf(buf0, "AF_INET %pI4\n", &addr_in->sin_addr.s_addr); - break; - case AF_INET6: - addr_in6 = (struct sockaddr_in6 *)addr; - s = sprintf(buf0, "AF_INET6 %pI6\n", &addr_in6->sin6_addr); - break; - default: - s = sprintf(buf0, "%s\n", ""); - break; - } - allowance -= s; - if (allowance >= 0) - strcat(buf, buf0); - else { - allowance += s; - break; - } - } - return 4096 - allowance; -} - static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, char *buf) { @@ -1038,6 +983,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_TCP_PORT 21064 #define DEFAULT_BUFFER_SIZE 4096 #define DEFAULT_RSBTBL_SIZE 1024 +#define DEFAULT_LKBTBL_SIZE 1024 #define DEFAULT_DIRTBL_SIZE 1024 #define DEFAULT_RECOVER_TIMER 5 #define DEFAULT_TOSS_SECS 10 @@ -1046,12 +992,12 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_PROTOCOL 0 #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ #define DEFAULT_WAITWARN_US 0 -#define DEFAULT_NEW_RSB_COUNT 128 struct dlm_config_info dlm_config = { .ci_tcp_port = DEFAULT_TCP_PORT, .ci_buffer_size = DEFAULT_BUFFER_SIZE, .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE, + .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE, .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE, .ci_recover_timer = DEFAULT_RECOVER_TIMER, .ci_toss_secs = DEFAULT_TOSS_SECS, @@ -1059,7 +1005,6 @@ struct dlm_config_info dlm_config = { .ci_log_debug = DEFAULT_LOG_DEBUG, .ci_protocol = DEFAULT_PROTOCOL, .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, - .ci_waitwarn_us = DEFAULT_WAITWARN_US, - .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT + .ci_waitwarn_us = DEFAULT_WAITWARN_US }; diff --git a/trunk/fs/dlm/config.h b/trunk/fs/dlm/config.h index 3099d0dd26c0..dd0ce24d5a80 100644 --- a/trunk/fs/dlm/config.h +++ b/trunk/fs/dlm/config.h @@ -20,6 +20,7 @@ struct dlm_config_info { int ci_tcp_port; int ci_buffer_size; int ci_rsbtbl_size; + int ci_lkbtbl_size; int ci_dirtbl_size; int ci_recover_timer; int ci_toss_secs; @@ -28,7 +29,6 @@ struct dlm_config_info { int ci_protocol; int ci_timewarn_cs; int ci_waitwarn_us; - int ci_new_rsb_count; }; extern struct dlm_config_info dlm_config; diff --git a/trunk/fs/dlm/dlm_internal.h b/trunk/fs/dlm/dlm_internal.h index fe2860c02449..0262451eb9c6 100644 --- a/trunk/fs/dlm/dlm_internal.h +++ b/trunk/fs/dlm/dlm_internal.h @@ -37,7 +37,6 @@ #include #include #include -#include #include #include @@ -53,6 +52,7 @@ struct dlm_ls; struct dlm_lkb; struct dlm_rsb; struct dlm_member; +struct dlm_lkbtable; struct dlm_rsbtable; struct dlm_dirtable; struct dlm_direntry; @@ -108,6 +108,11 @@ struct dlm_rsbtable { spinlock_t lock; }; +struct dlm_lkbtable { + struct list_head list; + rwlock_t lock; + uint16_t counter; +}; /* * Lockspace member (per node in a ls) @@ -243,18 +248,17 @@ struct dlm_lkb { int8_t lkb_wait_count; int lkb_wait_nodeid; /* for debugging */ + struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ struct list_head lkb_statequeue; /* rsb g/c/w list */ struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */ struct list_head lkb_wait_reply; /* waiting for remote reply */ + struct list_head lkb_astqueue; /* need ast to be sent */ struct list_head lkb_ownqueue; /* list of locks for a process */ struct list_head lkb_time_list; ktime_t lkb_timestamp; ktime_t lkb_wait_time; unsigned long lkb_timeout_cs; - struct mutex lkb_cb_mutex; - struct work_struct lkb_cb_work; - struct list_head lkb_cb_list; /* for ls_cb_delay or proc->asts */ struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE]; struct dlm_callback lkb_last_cast; struct dlm_callback lkb_last_bast; @@ -295,7 +299,7 @@ struct dlm_rsb { int res_recover_locks_count; char *res_lvbptr; - char res_name[DLM_RESNAME_MAXLEN+1]; + char res_name[1]; }; /* find_rsb() flags */ @@ -461,12 +465,12 @@ struct dlm_ls { unsigned long ls_scan_time; struct kobject ls_kobj; - struct idr ls_lkbidr; - spinlock_t ls_lkbidr_spin; - struct dlm_rsbtable *ls_rsbtbl; uint32_t ls_rsbtbl_size; + struct dlm_lkbtable *ls_lkbtbl; + uint32_t ls_lkbtbl_size; + struct dlm_dirtable *ls_dirtbl; uint32_t ls_dirtbl_size; @@ -479,10 +483,6 @@ struct dlm_ls { struct mutex ls_timeout_mutex; struct list_head ls_timeout; - spinlock_t ls_new_rsb_spin; - int ls_new_rsb_count; - struct list_head ls_new_rsb; /* new rsb structs */ - struct list_head ls_nodes; /* current nodes in ls */ struct list_head ls_nodes_gone; /* dead node list, recovery */ int ls_num_nodes; /* number of nodes in ls */ @@ -506,12 +506,8 @@ struct dlm_ls { struct miscdevice ls_device; - struct workqueue_struct *ls_callback_wq; - /* recovery related */ - struct mutex ls_cb_mutex; - struct list_head ls_cb_delay; /* save for queue_work later */ struct timer_list ls_timer; struct task_struct *ls_recoverd_task; struct mutex ls_recoverd_active; @@ -548,7 +544,6 @@ struct dlm_ls { #define LSFL_RCOM_WAIT 4 #define LSFL_UEVENT_WAIT 5 #define LSFL_TIMEWARN 6 -#define LSFL_CB_DELAY 7 /* much of this is just saving user space pointers associated with the lock that we pass back to the user lib with an ast */ diff --git a/trunk/fs/dlm/lock.c b/trunk/fs/dlm/lock.c index 83b5e32514e1..f71d0b5abd95 100644 --- a/trunk/fs/dlm/lock.c +++ b/trunk/fs/dlm/lock.c @@ -305,7 +305,7 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) rv = -EDEADLK; } - dlm_add_cb(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags); + dlm_add_ast(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags); } static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb) @@ -319,7 +319,7 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) if (is_master_copy(lkb)) { send_bast(r, lkb, rqmode); } else { - dlm_add_cb(lkb, DLM_CB_BAST, rqmode, 0, 0); + dlm_add_ast(lkb, DLM_CB_BAST, rqmode, 0, 0); } } @@ -327,68 +327,19 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) * Basic operations on rsb's and lkb's */ -static int pre_rsb_struct(struct dlm_ls *ls) -{ - struct dlm_rsb *r1, *r2; - int count = 0; - - spin_lock(&ls->ls_new_rsb_spin); - if (ls->ls_new_rsb_count > dlm_config.ci_new_rsb_count / 2) { - spin_unlock(&ls->ls_new_rsb_spin); - return 0; - } - spin_unlock(&ls->ls_new_rsb_spin); - - r1 = dlm_allocate_rsb(ls); - r2 = dlm_allocate_rsb(ls); - - spin_lock(&ls->ls_new_rsb_spin); - if (r1) { - list_add(&r1->res_hashchain, &ls->ls_new_rsb); - ls->ls_new_rsb_count++; - } - if (r2) { - list_add(&r2->res_hashchain, &ls->ls_new_rsb); - ls->ls_new_rsb_count++; - } - count = ls->ls_new_rsb_count; - spin_unlock(&ls->ls_new_rsb_spin); - - if (!count) - return -ENOMEM; - return 0; -} - -/* If ls->ls_new_rsb is empty, return -EAGAIN, so the caller can - unlock any spinlocks, go back and call pre_rsb_struct again. - Otherwise, take an rsb off the list and return it. */ - -static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, - struct dlm_rsb **r_ret) +static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len) { struct dlm_rsb *r; - int count; - spin_lock(&ls->ls_new_rsb_spin); - if (list_empty(&ls->ls_new_rsb)) { - count = ls->ls_new_rsb_count; - spin_unlock(&ls->ls_new_rsb_spin); - log_debug(ls, "find_rsb retry %d %d %s", - count, dlm_config.ci_new_rsb_count, name); - return -EAGAIN; - } - - r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain); - list_del(&r->res_hashchain); - ls->ls_new_rsb_count--; - spin_unlock(&ls->ls_new_rsb_spin); + r = dlm_allocate_rsb(ls, len); + if (!r) + return NULL; r->res_ls = ls; r->res_length = len; memcpy(r->res_name, name, len); mutex_init(&r->res_mutex); - INIT_LIST_HEAD(&r->res_hashchain); INIT_LIST_HEAD(&r->res_lookup); INIT_LIST_HEAD(&r->res_grantqueue); INIT_LIST_HEAD(&r->res_convertqueue); @@ -396,8 +347,7 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, INIT_LIST_HEAD(&r->res_root_list); INIT_LIST_HEAD(&r->res_recover_list); - *r_ret = r; - return 0; + return r; } static int search_rsb_list(struct list_head *head, char *name, int len, @@ -455,6 +405,16 @@ static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, return error; } +static int search_rsb(struct dlm_ls *ls, char *name, int len, int b, + unsigned int flags, struct dlm_rsb **r_ret) +{ + int error; + spin_lock(&ls->ls_rsbtbl[b].lock); + error = _search_rsb(ls, name, len, b, flags, r_ret); + spin_unlock(&ls->ls_rsbtbl[b].lock); + return error; +} + /* * Find rsb in rsbtbl and potentially create/add one * @@ -472,48 +432,35 @@ static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, static int find_rsb(struct dlm_ls *ls, char *name, int namelen, unsigned int flags, struct dlm_rsb **r_ret) { - struct dlm_rsb *r = NULL; + struct dlm_rsb *r = NULL, *tmp; uint32_t hash, bucket; - int error; + int error = -EINVAL; - if (namelen > DLM_RESNAME_MAXLEN) { - error = -EINVAL; + if (namelen > DLM_RESNAME_MAXLEN) goto out; - } if (dlm_no_directory(ls)) flags |= R_CREATE; + error = 0; hash = jhash(name, namelen, 0); bucket = hash & (ls->ls_rsbtbl_size - 1); - retry: - if (flags & R_CREATE) { - error = pre_rsb_struct(ls); - if (error < 0) - goto out; - } - - spin_lock(&ls->ls_rsbtbl[bucket].lock); - - error = _search_rsb(ls, name, namelen, bucket, flags, &r); + error = search_rsb(ls, name, namelen, bucket, flags, &r); if (!error) - goto out_unlock; + goto out; if (error == -EBADR && !(flags & R_CREATE)) - goto out_unlock; + goto out; /* the rsb was found but wasn't a master copy */ if (error == -ENOTBLK) - goto out_unlock; + goto out; - error = get_rsb_struct(ls, name, namelen, &r); - if (error == -EAGAIN) { - spin_unlock(&ls->ls_rsbtbl[bucket].lock); - goto retry; - } - if (error) - goto out_unlock; + error = -ENOMEM; + r = create_rsb(ls, name, namelen); + if (!r) + goto out; r->res_hash = hash; r->res_bucket = bucket; @@ -527,10 +474,18 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen, nodeid = 0; r->res_nodeid = nodeid; } + + spin_lock(&ls->ls_rsbtbl[bucket].lock); + error = _search_rsb(ls, name, namelen, bucket, 0, &tmp); + if (!error) { + spin_unlock(&ls->ls_rsbtbl[bucket].lock); + dlm_free_rsb(r); + r = tmp; + goto out; + } list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); - error = 0; - out_unlock: spin_unlock(&ls->ls_rsbtbl[bucket].lock); + error = 0; out: *r_ret = r; return error; @@ -625,8 +580,9 @@ static void detach_lkb(struct dlm_lkb *lkb) static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) { - struct dlm_lkb *lkb; - int rv, id; + struct dlm_lkb *lkb, *tmp; + uint32_t lkid = 0; + uint16_t bucket; lkb = dlm_allocate_lkb(ls); if (!lkb) @@ -638,42 +594,60 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) INIT_LIST_HEAD(&lkb->lkb_ownqueue); INIT_LIST_HEAD(&lkb->lkb_rsb_lookup); INIT_LIST_HEAD(&lkb->lkb_time_list); - INIT_LIST_HEAD(&lkb->lkb_cb_list); - mutex_init(&lkb->lkb_cb_mutex); - INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work); + INIT_LIST_HEAD(&lkb->lkb_astqueue); - retry: - rv = idr_pre_get(&ls->ls_lkbidr, GFP_NOFS); - if (!rv) - return -ENOMEM; + get_random_bytes(&bucket, sizeof(bucket)); + bucket &= (ls->ls_lkbtbl_size - 1); + + write_lock(&ls->ls_lkbtbl[bucket].lock); - spin_lock(&ls->ls_lkbidr_spin); - rv = idr_get_new_above(&ls->ls_lkbidr, lkb, 1, &id); - if (!rv) - lkb->lkb_id = id; - spin_unlock(&ls->ls_lkbidr_spin); + /* counter can roll over so we must verify lkid is not in use */ - if (rv == -EAGAIN) - goto retry; + while (lkid == 0) { + lkid = (bucket << 16) | ls->ls_lkbtbl[bucket].counter++; - if (rv < 0) { - log_error(ls, "create_lkb idr error %d", rv); - return rv; + list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list, + lkb_idtbl_list) { + if (tmp->lkb_id != lkid) + continue; + lkid = 0; + break; + } } + lkb->lkb_id = lkid; + list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list); + write_unlock(&ls->ls_lkbtbl[bucket].lock); + *lkb_ret = lkb; return 0; } +static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid) +{ + struct dlm_lkb *lkb; + uint16_t bucket = (lkid >> 16); + + list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) { + if (lkb->lkb_id == lkid) + return lkb; + } + return NULL; +} + static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret) { struct dlm_lkb *lkb; + uint16_t bucket = (lkid >> 16); + + if (bucket >= ls->ls_lkbtbl_size) + return -EBADSLT; - spin_lock(&ls->ls_lkbidr_spin); - lkb = idr_find(&ls->ls_lkbidr, lkid); + read_lock(&ls->ls_lkbtbl[bucket].lock); + lkb = __find_lkb(ls, lkid); if (lkb) kref_get(&lkb->lkb_ref); - spin_unlock(&ls->ls_lkbidr_spin); + read_unlock(&ls->ls_lkbtbl[bucket].lock); *lkb_ret = lkb; return lkb ? 0 : -ENOENT; @@ -694,12 +668,12 @@ static void kill_lkb(struct kref *kref) static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) { - uint32_t lkid = lkb->lkb_id; + uint16_t bucket = (lkb->lkb_id >> 16); - spin_lock(&ls->ls_lkbidr_spin); + write_lock(&ls->ls_lkbtbl[bucket].lock); if (kref_put(&lkb->lkb_ref, kill_lkb)) { - idr_remove(&ls->ls_lkbidr, lkid); - spin_unlock(&ls->ls_lkbidr_spin); + list_del(&lkb->lkb_idtbl_list); + write_unlock(&ls->ls_lkbtbl[bucket].lock); detach_lkb(lkb); @@ -709,7 +683,7 @@ static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) dlm_free_lkb(lkb); return 1; } else { - spin_unlock(&ls->ls_lkbidr_spin); + write_unlock(&ls->ls_lkbtbl[bucket].lock); return 0; } } @@ -875,7 +849,9 @@ void dlm_scan_waiters(struct dlm_ls *ls) if (!num_nodes) { num_nodes = ls->ls_num_nodes; - warned = kzalloc(num_nodes * sizeof(int), GFP_KERNEL); + warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int)); + if (warned) + memset(warned, 0, num_nodes * sizeof(int)); } if (!warned) continue; @@ -887,7 +863,9 @@ void dlm_scan_waiters(struct dlm_ls *ls) dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid); } mutex_unlock(&ls->ls_waiters_mutex); - kfree(warned); + + if (warned) + kfree(warned); if (debug_expired) log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us", @@ -2423,6 +2401,9 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) if (deadlk) { /* it's left on the granted queue */ + log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s", + lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status, + lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name); revert_lock(r, lkb); queue_cast(r, lkb, -EDEADLK); error = -EDEADLK; @@ -4012,6 +3993,8 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms) default: log_error(ls, "unknown message type %d", ms->m_type); } + + dlm_astd_wake(); } /* If the lockspace is in recovery mode (locking stopped), then normal @@ -4150,7 +4133,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) struct dlm_message *ms_stub; int wait_type, stub_unlock_result, stub_cancel_result; - ms_stub = kmalloc(sizeof(struct dlm_message), GFP_KERNEL); + ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message)); if (!ms_stub) { log_error(ls, "dlm_recover_waiters_pre no mem"); return; @@ -4826,7 +4809,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, goto out_put; spin_lock(&ua->proc->locks_spin); - /* dlm_user_add_cb() may have already taken lkb off the proc list */ + /* dlm_user_add_ast() may have already taken lkb off the proc list */ if (!list_empty(&lkb->lkb_ownqueue)) list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking); spin_unlock(&ua->proc->locks_spin); @@ -4963,7 +4946,7 @@ static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) /* We have to release clear_proc_locks mutex before calling unlock_proc_lock() (which does lock_rsb) due to deadlock with receiving a message that does - lock_rsb followed by dlm_user_add_cb() */ + lock_rsb followed by dlm_user_add_ast() */ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls, struct dlm_user_proc *proc) @@ -4986,7 +4969,7 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls, return lkb; } -/* The ls_clear_proc_locks mutex protects against dlm_user_add_cb() which +/* The ls_clear_proc_locks mutex protects against dlm_user_add_asts() which 1) references lkb->ua which we free here and 2) adds lkbs to proc->asts, which we clear here. */ @@ -5028,10 +5011,10 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) dlm_put_lkb(lkb); } - list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) { + list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { memset(&lkb->lkb_callbacks, 0, sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE); - list_del_init(&lkb->lkb_cb_list); + list_del_init(&lkb->lkb_astqueue); dlm_put_lkb(lkb); } @@ -5070,10 +5053,10 @@ static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) spin_unlock(&proc->locks_spin); spin_lock(&proc->asts_spin); - list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) { + list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { memset(&lkb->lkb_callbacks, 0, sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE); - list_del_init(&lkb->lkb_cb_list); + list_del_init(&lkb->lkb_astqueue); dlm_put_lkb(lkb); } spin_unlock(&proc->asts_spin); diff --git a/trunk/fs/dlm/lockspace.c b/trunk/fs/dlm/lockspace.c index a1d8f1af144b..14cbf4099753 100644 --- a/trunk/fs/dlm/lockspace.c +++ b/trunk/fs/dlm/lockspace.c @@ -15,6 +15,7 @@ #include "lockspace.h" #include "member.h" #include "recoverd.h" +#include "ast.h" #include "dir.h" #include "lowcomms.h" #include "config.h" @@ -23,7 +24,6 @@ #include "recover.h" #include "requestqueue.h" #include "user.h" -#include "ast.h" static int ls_count; static struct mutex ls_lock; @@ -359,10 +359,17 @@ static int threads_start(void) { int error; + /* Thread which process lock requests for all lockspace's */ + error = dlm_astd_start(); + if (error) { + log_print("cannot start dlm_astd thread %d", error); + goto fail; + } + error = dlm_scand_start(); if (error) { log_print("cannot start dlm_scand thread %d", error); - goto fail; + goto astd_fail; } /* Thread for sending/receiving messages for all lockspace's */ @@ -376,6 +383,8 @@ static int threads_start(void) scand_fail: dlm_scand_stop(); + astd_fail: + dlm_astd_stop(); fail: return error; } @@ -384,6 +393,7 @@ static void threads_stop(void) { dlm_scand_stop(); dlm_lowcomms_stop(); + dlm_astd_stop(); } static int new_lockspace(const char *name, int namelen, void **lockspace, @@ -453,7 +463,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, size = dlm_config.ci_rsbtbl_size; ls->ls_rsbtbl_size = size; - ls->ls_rsbtbl = vmalloc(sizeof(struct dlm_rsbtable) * size); + ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_NOFS); if (!ls->ls_rsbtbl) goto out_lsfree; for (i = 0; i < size; i++) { @@ -462,13 +472,22 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, spin_lock_init(&ls->ls_rsbtbl[i].lock); } - idr_init(&ls->ls_lkbidr); - spin_lock_init(&ls->ls_lkbidr_spin); + size = dlm_config.ci_lkbtbl_size; + ls->ls_lkbtbl_size = size; + + ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_NOFS); + if (!ls->ls_lkbtbl) + goto out_rsbfree; + for (i = 0; i < size; i++) { + INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list); + rwlock_init(&ls->ls_lkbtbl[i].lock); + ls->ls_lkbtbl[i].counter = 1; + } size = dlm_config.ci_dirtbl_size; ls->ls_dirtbl_size = size; - ls->ls_dirtbl = vmalloc(sizeof(struct dlm_dirtable) * size); + ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_NOFS); if (!ls->ls_dirtbl) goto out_lkbfree; for (i = 0; i < size; i++) { @@ -483,9 +502,6 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, INIT_LIST_HEAD(&ls->ls_timeout); mutex_init(&ls->ls_timeout_mutex); - INIT_LIST_HEAD(&ls->ls_new_rsb); - spin_lock_init(&ls->ls_new_rsb_spin); - INIT_LIST_HEAD(&ls->ls_nodes); INIT_LIST_HEAD(&ls->ls_nodes_gone); ls->ls_num_nodes = 0; @@ -504,9 +520,6 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, init_completion(&ls->ls_members_done); ls->ls_members_result = -1; - mutex_init(&ls->ls_cb_mutex); - INIT_LIST_HEAD(&ls->ls_cb_delay); - ls->ls_recoverd_task = NULL; mutex_init(&ls->ls_recoverd_active); spin_lock_init(&ls->ls_recover_lock); @@ -540,26 +553,18 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, list_add(&ls->ls_list, &lslist); spin_unlock(&lslist_lock); - if (flags & DLM_LSFL_FS) { - error = dlm_callback_start(ls); - if (error) { - log_error(ls, "can't start dlm_callback %d", error); - goto out_delist; - } - } - /* needs to find ls in lslist */ error = dlm_recoverd_start(ls); if (error) { log_error(ls, "can't start dlm_recoverd %d", error); - goto out_callback; + goto out_delist; } ls->ls_kobj.kset = dlm_kset; error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, "%s", ls->ls_name); if (error) - goto out_recoverd; + goto out_stop; kobject_uevent(&ls->ls_kobj, KOBJ_ADD); /* let kobject handle freeing of ls if there's an error */ @@ -573,7 +578,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, error = do_uevent(ls, 1); if (error) - goto out_recoverd; + goto out_stop; wait_for_completion(&ls->ls_members_done); error = ls->ls_members_result; @@ -590,20 +595,19 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, do_uevent(ls, 0); dlm_clear_members(ls); kfree(ls->ls_node_array); - out_recoverd: + out_stop: dlm_recoverd_stop(ls); - out_callback: - dlm_callback_stop(ls); out_delist: spin_lock(&lslist_lock); list_del(&ls->ls_list); spin_unlock(&lslist_lock); kfree(ls->ls_recover_buf); out_dirfree: - vfree(ls->ls_dirtbl); + kfree(ls->ls_dirtbl); out_lkbfree: - idr_destroy(&ls->ls_lkbidr); - vfree(ls->ls_rsbtbl); + kfree(ls->ls_lkbtbl); + out_rsbfree: + kfree(ls->ls_rsbtbl); out_lsfree: if (do_unreg) kobject_put(&ls->ls_kobj); @@ -637,64 +641,50 @@ int dlm_new_lockspace(const char *name, int namelen, void **lockspace, return error; } -static int lkb_idr_is_local(int id, void *p, void *data) -{ - struct dlm_lkb *lkb = p; - - if (!lkb->lkb_nodeid) - return 1; - return 0; -} - -static int lkb_idr_is_any(int id, void *p, void *data) -{ - return 1; -} - -static int lkb_idr_free(int id, void *p, void *data) -{ - struct dlm_lkb *lkb = p; - - if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY) - dlm_free_lvb(lkb->lkb_lvbptr); - - dlm_free_lkb(lkb); - return 0; -} - -/* NOTE: We check the lkbidr here rather than the resource table. - This is because there may be LKBs queued as ASTs that have been unlinked - from their RSBs and are pending deletion once the AST has been delivered */ - -static int lockspace_busy(struct dlm_ls *ls, int force) -{ - int rv; - - spin_lock(&ls->ls_lkbidr_spin); - if (force == 0) { - rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls); - } else if (force == 1) { - rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls); - } else { - rv = 0; +/* Return 1 if the lockspace still has active remote locks, + * 2 if the lockspace still has active local locks. + */ +static int lockspace_busy(struct dlm_ls *ls) +{ + int i, lkb_found = 0; + struct dlm_lkb *lkb; + + /* NOTE: We check the lockidtbl here rather than the resource table. + This is because there may be LKBs queued as ASTs that have been + unlinked from their RSBs and are pending deletion once the AST has + been delivered */ + + for (i = 0; i < ls->ls_lkbtbl_size; i++) { + read_lock(&ls->ls_lkbtbl[i].lock); + if (!list_empty(&ls->ls_lkbtbl[i].list)) { + lkb_found = 1; + list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list, + lkb_idtbl_list) { + if (!lkb->lkb_nodeid) { + read_unlock(&ls->ls_lkbtbl[i].lock); + return 2; + } + } + } + read_unlock(&ls->ls_lkbtbl[i].lock); } - spin_unlock(&ls->ls_lkbidr_spin); - return rv; + return lkb_found; } static int release_lockspace(struct dlm_ls *ls, int force) { + struct dlm_lkb *lkb; struct dlm_rsb *rsb; struct list_head *head; int i, busy, rv; - busy = lockspace_busy(ls, force); + busy = lockspace_busy(ls); spin_lock(&lslist_lock); if (ls->ls_create_count == 1) { - if (busy) { + if (busy > force) rv = -EBUSY; - } else { + else { /* remove_lockspace takes ls off lslist */ ls->ls_create_count = 0; rv = 0; @@ -718,12 +708,12 @@ static int release_lockspace(struct dlm_ls *ls, int force) dlm_recoverd_stop(ls); - dlm_callback_stop(ls); - remove_lockspace(ls); dlm_delete_debug_file(ls); + dlm_astd_suspend(); + kfree(ls->ls_recover_buf); /* @@ -731,15 +721,31 @@ static int release_lockspace(struct dlm_ls *ls, int force) */ dlm_dir_clear(ls); - vfree(ls->ls_dirtbl); + kfree(ls->ls_dirtbl); /* - * Free all lkb's in idr + * Free all lkb's on lkbtbl[] lists. */ - idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls); - idr_remove_all(&ls->ls_lkbidr); - idr_destroy(&ls->ls_lkbidr); + for (i = 0; i < ls->ls_lkbtbl_size; i++) { + head = &ls->ls_lkbtbl[i].list; + while (!list_empty(head)) { + lkb = list_entry(head->next, struct dlm_lkb, + lkb_idtbl_list); + + list_del(&lkb->lkb_idtbl_list); + + dlm_del_ast(lkb); + + if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY) + dlm_free_lvb(lkb->lkb_lvbptr); + + dlm_free_lkb(lkb); + } + } + dlm_astd_resume(); + + kfree(ls->ls_lkbtbl); /* * Free all rsb's on rsbtbl[] lists @@ -764,14 +770,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) } } - vfree(ls->ls_rsbtbl); - - while (!list_empty(&ls->ls_new_rsb)) { - rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, - res_hashchain); - list_del(&rsb->res_hashchain); - dlm_free_rsb(rsb); - } + kfree(ls->ls_rsbtbl); /* * Free structures on any other lists diff --git a/trunk/fs/dlm/lowcomms.c b/trunk/fs/dlm/lowcomms.c index 990626e7da80..5e2c71f05e46 100644 --- a/trunk/fs/dlm/lowcomms.c +++ b/trunk/fs/dlm/lowcomms.c @@ -512,10 +512,12 @@ static void process_sctp_notification(struct connection *con, } make_sockaddr(&prim.ssp_addr, 0, &addr_len); if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { + int i; unsigned char *b=(unsigned char *)&prim.ssp_addr; log_print("reject connect from unknown addr"); - print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, - b, sizeof(struct sockaddr_storage)); + for (i=0; isock_mutex); return -1; diff --git a/trunk/fs/dlm/memory.c b/trunk/fs/dlm/memory.c index da64df7576e1..8e0d00db004f 100644 --- a/trunk/fs/dlm/memory.c +++ b/trunk/fs/dlm/memory.c @@ -16,7 +16,6 @@ #include "memory.h" static struct kmem_cache *lkb_cache; -static struct kmem_cache *rsb_cache; int __init dlm_memory_init(void) @@ -27,14 +26,6 @@ int __init dlm_memory_init(void) __alignof__(struct dlm_lkb), 0, NULL); if (!lkb_cache) ret = -ENOMEM; - - rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb), - __alignof__(struct dlm_rsb), 0, NULL); - if (!rsb_cache) { - kmem_cache_destroy(lkb_cache); - ret = -ENOMEM; - } - return ret; } @@ -42,8 +33,6 @@ void dlm_memory_exit(void) { if (lkb_cache) kmem_cache_destroy(lkb_cache); - if (rsb_cache) - kmem_cache_destroy(rsb_cache); } char *dlm_allocate_lvb(struct dlm_ls *ls) @@ -59,11 +48,16 @@ void dlm_free_lvb(char *p) kfree(p); } -struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls) +/* FIXME: have some minimal space built-in to rsb for the name and + kmalloc a separate name if needed, like dentries are done */ + +struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen) { struct dlm_rsb *r; - r = kmem_cache_zalloc(rsb_cache, GFP_NOFS); + DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); + + r = kzalloc(sizeof(*r) + namelen, GFP_NOFS); return r; } @@ -71,7 +65,7 @@ void dlm_free_rsb(struct dlm_rsb *r) { if (r->res_lvbptr) dlm_free_lvb(r->res_lvbptr); - kmem_cache_free(rsb_cache, r); + kfree(r); } struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls) diff --git a/trunk/fs/dlm/memory.h b/trunk/fs/dlm/memory.h index 177c11cbb0a6..485fb29143bd 100644 --- a/trunk/fs/dlm/memory.h +++ b/trunk/fs/dlm/memory.h @@ -16,7 +16,7 @@ int dlm_memory_init(void); void dlm_memory_exit(void); -struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls); +struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen); void dlm_free_rsb(struct dlm_rsb *r); struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls); void dlm_free_lkb(struct dlm_lkb *l); diff --git a/trunk/fs/dlm/recoverd.c b/trunk/fs/dlm/recoverd.c index 774da3cf92c6..fd677c8c3d3b 100644 --- a/trunk/fs/dlm/recoverd.c +++ b/trunk/fs/dlm/recoverd.c @@ -58,7 +58,13 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) mutex_lock(&ls->ls_recoverd_active); - dlm_callback_suspend(ls); + /* + * Suspending and resuming dlm_astd ensures that no lkb's from this ls + * will be processed by dlm_astd during recovery. + */ + + dlm_astd_suspend(); + dlm_astd_resume(); /* * Free non-master tossed rsb's. Master rsb's are kept on toss @@ -196,8 +202,6 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) dlm_adjust_timeouts(ls); - dlm_callback_resume(ls); - error = enable_locking(ls, rv->seq); if (error) { log_debug(ls, "enable_locking failed %d", error); @@ -218,6 +222,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) dlm_grant_after_purge(ls); + dlm_astd_wake(); + log_debug(ls, "recover %llx done: %u ms", (unsigned long long)rv->seq, jiffies_to_msecs(jiffies - start)); diff --git a/trunk/fs/dlm/user.c b/trunk/fs/dlm/user.c index d8ea60756403..e96bf3e9be88 100644 --- a/trunk/fs/dlm/user.c +++ b/trunk/fs/dlm/user.c @@ -213,9 +213,9 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, goto out; } - if (list_empty(&lkb->lkb_cb_list)) { + if (list_empty(&lkb->lkb_astqueue)) { kref_get(&lkb->lkb_ref); - list_add_tail(&lkb->lkb_cb_list, &proc->asts); + list_add_tail(&lkb->lkb_astqueue, &proc->asts); wake_up_interruptible(&proc->wait); } spin_unlock(&proc->asts_spin); @@ -832,24 +832,24 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, } /* if we empty lkb_callbacks, we don't want to unlock the spinlock - without removing lkb_cb_list; so empty lkb_cb_list is always + without removing lkb_astqueue; so empty lkb_astqueue is always consistent with empty lkb_callbacks */ - lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_cb_list); + lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue); rv = dlm_rem_lkb_callback(lkb->lkb_resource->res_ls, lkb, &cb, &resid); if (rv < 0) { /* this shouldn't happen; lkb should have been removed from list when resid was zero */ log_print("dlm_rem_lkb_callback empty %x", lkb->lkb_id); - list_del_init(&lkb->lkb_cb_list); + list_del_init(&lkb->lkb_astqueue); spin_unlock(&proc->asts_spin); /* removes ref for proc->asts, may cause lkb to be freed */ dlm_put_lkb(lkb); goto try_another; } if (!resid) - list_del_init(&lkb->lkb_cb_list); + list_del_init(&lkb->lkb_astqueue); spin_unlock(&proc->asts_spin); if (cb.flags & DLM_CB_SKIP) { diff --git a/trunk/fs/fscache/page.c b/trunk/fs/fscache/page.c index 3f7a59bfa7ad..2f343b4d7a7d 100644 --- a/trunk/fs/fscache/page.c +++ b/trunk/fs/fscache/page.c @@ -976,12 +976,16 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, pagevec_init(&pvec, 0); next = 0; - do { - if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) - break; + while (next <= (loff_t)-1 && + pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE) + ) { for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - next = page->index; + pgoff_t page_index = page->index; + + ASSERTCMP(page_index, >=, next); + next = page_index + 1; + if (PageFsCache(page)) { __fscache_wait_on_page_write(cookie, page); __fscache_uncache_page(cookie, page); @@ -989,7 +993,7 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, } pagevec_release(&pvec); cond_resched(); - } while (++next); + } _leave(""); } diff --git a/trunk/fs/gfs2/bmap.c b/trunk/fs/gfs2/bmap.c index 42e477f31223..e65493a8ac00 100644 --- a/trunk/fs/gfs2/bmap.c +++ b/trunk/fs/gfs2/bmap.c @@ -854,7 +854,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, blen++; else { if (bstart) { - __gfs2_free_blocks(ip, bstart, blen, metadata); + if (metadata) + __gfs2_free_meta(ip, bstart, blen); + else + __gfs2_free_data(ip, bstart, blen); + btotal += blen; } @@ -866,7 +870,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, gfs2_add_inode_blocks(&ip->i_inode, -1); } if (bstart) { - __gfs2_free_blocks(ip, bstart, blen, metadata); + if (metadata) + __gfs2_free_meta(ip, bstart, blen); + else + __gfs2_free_data(ip, bstart, blen); + btotal += blen; } diff --git a/trunk/fs/gfs2/dir.c b/trunk/fs/gfs2/dir.c index 1cc2f8ec52a2..091ee4779538 100644 --- a/trunk/fs/gfs2/dir.c +++ b/trunk/fs/gfs2/dir.c @@ -339,67 +339,6 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset, return (copied) ? copied : error; } -/** - * gfs2_dir_get_hash_table - Get pointer to the dir hash table - * @ip: The inode in question - * - * Returns: The hash table or an error - */ - -static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) -{ - struct inode *inode = &ip->i_inode; - int ret; - u32 hsize; - __be64 *hc; - - BUG_ON(!(ip->i_diskflags & GFS2_DIF_EXHASH)); - - hc = ip->i_hash_cache; - if (hc) - return hc; - - hsize = 1 << ip->i_depth; - hsize *= sizeof(__be64); - if (hsize != i_size_read(&ip->i_inode)) { - gfs2_consist_inode(ip); - return ERR_PTR(-EIO); - } - - hc = kmalloc(hsize, GFP_NOFS); - ret = -ENOMEM; - if (hc == NULL) - return ERR_PTR(-ENOMEM); - - ret = gfs2_dir_read_data(ip, (char *)hc, 0, hsize, 1); - if (ret < 0) { - kfree(hc); - return ERR_PTR(ret); - } - - spin_lock(&inode->i_lock); - if (ip->i_hash_cache) - kfree(hc); - else - ip->i_hash_cache = hc; - spin_unlock(&inode->i_lock); - - return ip->i_hash_cache; -} - -/** - * gfs2_dir_hash_inval - Invalidate dir hash - * @ip: The directory inode - * - * Must be called with an exclusive glock, or during glock invalidation. - */ -void gfs2_dir_hash_inval(struct gfs2_inode *ip) -{ - __be64 *hc = ip->i_hash_cache; - ip->i_hash_cache = NULL; - kfree(hc); -} - static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) { return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0; @@ -747,12 +686,17 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, static int get_leaf_nr(struct gfs2_inode *dip, u32 index, u64 *leaf_out) { - __be64 *hash; + __be64 leaf_no; + int error; + + error = gfs2_dir_read_data(dip, (char *)&leaf_no, + index * sizeof(__be64), + sizeof(__be64), 0); + if (error != sizeof(u64)) + return (error < 0) ? error : -EIO; + + *leaf_out = be64_to_cpu(leaf_no); - hash = gfs2_dir_get_hash_table(dip); - if (IS_ERR(hash)) - return PTR_ERR(hash); - *leaf_out = be64_to_cpu(*(hash + index)); return 0; } @@ -1022,8 +966,6 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) for (x = 0; x < half_len; x++) lp[x] = cpu_to_be64(bn); - gfs2_dir_hash_inval(dip); - error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64), half_len * sizeof(u64)); if (error != half_len * sizeof(u64)) { @@ -1110,54 +1052,70 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) static int dir_double_exhash(struct gfs2_inode *dip) { + struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct buffer_head *dibh; u32 hsize; - u32 hsize_bytes; - __be64 *hc; - __be64 *hc2, *h; + u64 *buf; + u64 *from, *to; + u64 block; + u64 disksize = i_size_read(&dip->i_inode); int x; int error = 0; hsize = 1 << dip->i_depth; - hsize_bytes = hsize * sizeof(__be64); + if (hsize * sizeof(u64) != disksize) { + gfs2_consist_inode(dip); + return -EIO; + } - hc = gfs2_dir_get_hash_table(dip); - if (IS_ERR(hc)) - return PTR_ERR(hc); + /* Allocate both the "from" and "to" buffers in one big chunk */ - h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS); - if (!hc2) + buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS); + if (!buf) return -ENOMEM; - error = gfs2_meta_inode_buffer(dip, &dibh); - if (error) - goto out_kfree; + for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) { + error = gfs2_dir_read_data(dip, (char *)buf, + block * sdp->sd_hash_bsize, + sdp->sd_hash_bsize, 1); + if (error != sdp->sd_hash_bsize) { + if (error >= 0) + error = -EIO; + goto fail; + } + + from = buf; + to = (u64 *)((char *)buf + sdp->sd_hash_bsize); - for (x = 0; x < hsize; x++) { - *h++ = *hc; - *h++ = *hc; - hc++; + for (x = sdp->sd_hash_ptrs; x--; from++) { + *to++ = *from; /* No endianess worries */ + *to++ = *from; + } + + error = gfs2_dir_write_data(dip, + (char *)buf + sdp->sd_hash_bsize, + block * sdp->sd_sb.sb_bsize, + sdp->sd_sb.sb_bsize); + if (error != sdp->sd_sb.sb_bsize) { + if (error >= 0) + error = -EIO; + goto fail; + } } - error = gfs2_dir_write_data(dip, (char *)hc2, 0, hsize_bytes * 2); - if (error != (hsize_bytes * 2)) - goto fail; + kfree(buf); - gfs2_dir_hash_inval(dip); - dip->i_hash_cache = hc2; - dip->i_depth++; - gfs2_dinode_out(dip, dibh->b_data); - brelse(dibh); - return 0; + error = gfs2_meta_inode_buffer(dip, &dibh); + if (!gfs2_assert_withdraw(sdp, !error)) { + dip->i_depth++; + gfs2_dinode_out(dip, dibh->b_data); + brelse(dibh); + } + + return error; fail: - /* Replace original hash table & size */ - gfs2_dir_write_data(dip, (char *)hc, 0, hsize_bytes); - i_size_write(&dip->i_inode, hsize_bytes); - gfs2_dinode_out(dip, dibh->b_data); - brelse(dibh); -out_kfree: - kfree(hc2); + kfree(buf); return error; } @@ -1390,7 +1348,6 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, return error; } - /** * dir_e_read - Reads the entries from a directory into a filldir buffer * @dip: dinode pointer @@ -1405,7 +1362,9 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, filldir_t filldir) { struct gfs2_inode *dip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); u32 hsize, len = 0; + u32 ht_offset, lp_offset, ht_offset_cur = -1; u32 hash, index; __be64 *lp; int copied = 0; @@ -1413,17 +1372,37 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, unsigned depth = 0; hsize = 1 << dip->i_depth; + if (hsize * sizeof(u64) != i_size_read(inode)) { + gfs2_consist_inode(dip); + return -EIO; + } + hash = gfs2_dir_offset2hash(*offset); index = hash >> (32 - dip->i_depth); - lp = gfs2_dir_get_hash_table(dip); - if (IS_ERR(lp)) - return PTR_ERR(lp); + lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); + if (!lp) + return -ENOMEM; while (index < hsize) { + lp_offset = index & (sdp->sd_hash_ptrs - 1); + ht_offset = index - lp_offset; + + if (ht_offset_cur != ht_offset) { + error = gfs2_dir_read_data(dip, (char *)lp, + ht_offset * sizeof(__be64), + sdp->sd_hash_bsize, 1); + if (error != sdp->sd_hash_bsize) { + if (error >= 0) + error = -EIO; + goto out; + } + ht_offset_cur = ht_offset; + } + error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, &copied, &depth, - be64_to_cpu(lp[index])); + be64_to_cpu(lp[lp_offset])); if (error) break; @@ -1431,6 +1410,8 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, index = (index & ~(len - 1)) + len; } +out: + kfree(lp); if (error > 0) error = 0; return error; @@ -1933,22 +1914,43 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) { + struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct buffer_head *bh; struct gfs2_leaf *leaf; u32 hsize, len; + u32 ht_offset, lp_offset, ht_offset_cur = -1; u32 index = 0, next_index; __be64 *lp; u64 leaf_no; int error = 0, last; hsize = 1 << dip->i_depth; + if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) { + gfs2_consist_inode(dip); + return -EIO; + } - lp = gfs2_dir_get_hash_table(dip); - if (IS_ERR(lp)) - return PTR_ERR(lp); + lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); + if (!lp) + return -ENOMEM; while (index < hsize) { - leaf_no = be64_to_cpu(lp[index]); + lp_offset = index & (sdp->sd_hash_ptrs - 1); + ht_offset = index - lp_offset; + + if (ht_offset_cur != ht_offset) { + error = gfs2_dir_read_data(dip, (char *)lp, + ht_offset * sizeof(__be64), + sdp->sd_hash_bsize, 1); + if (error != sdp->sd_hash_bsize) { + if (error >= 0) + error = -EIO; + goto out; + } + ht_offset_cur = ht_offset; + } + + leaf_no = be64_to_cpu(lp[lp_offset]); if (leaf_no) { error = get_leaf(dip, leaf_no, &bh); if (error) @@ -1974,6 +1976,7 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) } out: + kfree(lp); return error; } diff --git a/trunk/fs/gfs2/dir.h b/trunk/fs/gfs2/dir.h index ff5772fbf024..e686af11becd 100644 --- a/trunk/fs/gfs2/dir.h +++ b/trunk/fs/gfs2/dir.h @@ -35,7 +35,6 @@ extern int gfs2_diradd_alloc_required(struct inode *dir, const struct qstr *filename); extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, struct buffer_head **bhp); -extern void gfs2_dir_hash_inval(struct gfs2_inode *ip); static inline u32 gfs2_disk_hash(const char *data, int len) { diff --git a/trunk/fs/gfs2/file.c b/trunk/fs/gfs2/file.c index bc2590ef5fc1..a9f5cbe45cd9 100644 --- a/trunk/fs/gfs2/file.c +++ b/trunk/fs/gfs2/file.c @@ -174,9 +174,7 @@ void gfs2_set_inode_flags(struct inode *inode) struct gfs2_inode *ip = GFS2_I(inode); unsigned int flags = inode->i_flags; - flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC); - if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode)) - inode->i_flags |= S_NOSEC; + flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); if (ip->i_diskflags & GFS2_DIF_IMMUTABLE) flags |= S_IMMUTABLE; if (ip->i_diskflags & GFS2_DIF_APPENDONLY) diff --git a/trunk/fs/gfs2/glock.c b/trunk/fs/gfs2/glock.c index 88e8a23d0026..1c1336e7b3b2 100644 --- a/trunk/fs/gfs2/glock.c +++ b/trunk/fs/gfs2/glock.c @@ -409,10 +409,6 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) if (held1 && held2 && list_empty(&gl->gl_holders)) clear_bit(GLF_QUEUED, &gl->gl_flags); - if (new_state != gl->gl_target) - /* shorten our minimum hold time */ - gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR, - GL_GLOCK_MIN_HOLD); gl->gl_state = new_state; gl->gl_tchange = jiffies; } @@ -672,7 +668,7 @@ static void glock_work_func(struct work_struct *work) gl->gl_demote_state != LM_ST_EXCLUSIVE) { unsigned long holdtime, now = jiffies; - holdtime = gl->gl_tchange + gl->gl_hold_time; + holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; if (time_before(now, holdtime)) delay = holdtime - now; @@ -683,14 +679,9 @@ static void glock_work_func(struct work_struct *work) } run_queue(gl, 0); spin_unlock(&gl->gl_spin); - if (!delay) + if (!delay || + queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); - else { - if (gl->gl_name.ln_type != LM_TYPE_INODE) - delay = 0; - if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) - gfs2_glock_put(gl); - } if (drop_ref) gfs2_glock_put(gl); } @@ -752,7 +743,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_tchange = jiffies; gl->gl_object = NULL; gl->gl_sbd = sdp; - gl->gl_hold_time = GL_GLOCK_DFT_HOLD; INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); INIT_WORK(&gl->gl_delete, delete_work_func); @@ -865,15 +855,8 @@ static int gfs2_glock_demote_wait(void *word) static void wait_on_holder(struct gfs2_holder *gh) { - unsigned long time1 = jiffies; - might_sleep(); wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE); - if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */ - /* Lengthen the minimum hold time. */ - gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + - GL_GLOCK_HOLD_INCR, - GL_GLOCK_MAX_HOLD); } static void wait_on_demote(struct gfs2_glock *gl) @@ -1110,9 +1093,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh) gfs2_glock_hold(gl); if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && - !test_bit(GLF_DEMOTE, &gl->gl_flags) && - gl->gl_name.ln_type == LM_TYPE_INODE) - delay = gl->gl_hold_time; + !test_bit(GLF_DEMOTE, &gl->gl_flags)) + delay = gl->gl_ops->go_min_hold_time; if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); } @@ -1291,13 +1273,12 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) unsigned long now = jiffies; gfs2_glock_hold(gl); - holdtime = gl->gl_tchange + gl->gl_hold_time; - if (test_bit(GLF_QUEUED, &gl->gl_flags) && - gl->gl_name.ln_type == LM_TYPE_INODE) { + holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; + if (test_bit(GLF_QUEUED, &gl->gl_flags)) { if (time_before(now, holdtime)) delay = holdtime - now; if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) - delay = gl->gl_hold_time; + delay = gl->gl_ops->go_min_hold_time; } spin_lock(&gl->gl_spin); @@ -1686,7 +1667,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) dtime *= 1000000/HZ; /* demote time in uSec */ if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) dtime = 0; - gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n", + gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n", state2str(gl->gl_state), gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, @@ -1695,7 +1676,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) state2str(gl->gl_demote_state), dtime, atomic_read(&gl->gl_ail_count), atomic_read(&gl->gl_revokes), - atomic_read(&gl->gl_ref), gl->gl_hold_time); + atomic_read(&gl->gl_ref)); list_for_each_entry(gh, &gl->gl_holders, gh_list) { error = dump_holder(seq, gh); diff --git a/trunk/fs/gfs2/glock.h b/trunk/fs/gfs2/glock.h index 66707118af25..6b2f757b9281 100644 --- a/trunk/fs/gfs2/glock.h +++ b/trunk/fs/gfs2/glock.h @@ -113,12 +113,6 @@ enum { #define GLR_TRYFAILED 13 -#define GL_GLOCK_MAX_HOLD (long)(HZ / 5) -#define GL_GLOCK_DFT_HOLD (long)(HZ / 5) -#define GL_GLOCK_MIN_HOLD (long)(10) -#define GL_GLOCK_HOLD_INCR (long)(HZ / 20) -#define GL_GLOCK_HOLD_DECR (long)(HZ / 40) - struct lm_lockops { const char *lm_proto_name; int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); diff --git a/trunk/fs/gfs2/glops.c b/trunk/fs/gfs2/glops.c index da21ecaafcc2..2cca29316bd6 100644 --- a/trunk/fs/gfs2/glops.c +++ b/trunk/fs/gfs2/glops.c @@ -26,7 +26,6 @@ #include "rgrp.h" #include "util.h" #include "trans.h" -#include "dir.h" /** * __gfs2_ail_flush - remove all buffers for a given lock from the AIL @@ -219,7 +218,6 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) if (ip) { set_bit(GIF_INVALID, &ip->i_flags); forget_all_cached_acls(&ip->i_inode); - gfs2_dir_hash_inval(ip); } } @@ -318,8 +316,6 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_generation = be64_to_cpu(str->di_generation); ip->i_diskflags = be32_to_cpu(str->di_flags); - ip->i_eattr = be64_to_cpu(str->di_eattr); - /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */ gfs2_set_inode_flags(&ip->i_inode); height = be16_to_cpu(str->di_height); if (unlikely(height > GFS2_MAX_META_HEIGHT)) @@ -332,6 +328,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_depth = (u8)depth; ip->i_entries = be32_to_cpu(str->di_entries); + ip->i_eattr = be64_to_cpu(str->di_eattr); if (S_ISREG(ip->i_inode.i_mode)) gfs2_set_aops(&ip->i_inode); @@ -552,6 +549,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_lock = inode_go_lock, .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, + .go_min_hold_time = HZ / 5, .go_flags = GLOF_ASPACE, }; @@ -562,6 +560,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_unlock = rgrp_go_unlock, .go_dump = gfs2_rgrp_dump, .go_type = LM_TYPE_RGRP, + .go_min_hold_time = HZ / 5, .go_flags = GLOF_ASPACE, }; diff --git a/trunk/fs/gfs2/incore.h b/trunk/fs/gfs2/incore.h index 892ac37de8ae..81206e70cbf6 100644 --- a/trunk/fs/gfs2/incore.h +++ b/trunk/fs/gfs2/incore.h @@ -163,6 +163,7 @@ struct gfs2_glock_operations { int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); void (*go_callback) (struct gfs2_glock *gl); const int go_type; + const unsigned long go_min_hold_time; const unsigned long go_flags; #define GLOF_ASPACE 1 }; @@ -220,7 +221,6 @@ struct gfs2_glock { unsigned int gl_hash; unsigned long gl_demote_time; /* time of first demote request */ - long gl_hold_time; struct list_head gl_holders; const struct gfs2_glock_operations *gl_ops; @@ -285,7 +285,6 @@ struct gfs2_inode { u64 i_goal; /* goal block for allocations */ struct rw_semaphore i_rw_mutex; struct list_head i_trunc_list; - __be64 *i_hash_cache; u32 i_entries; u32 i_diskflags; u8 i_height; diff --git a/trunk/fs/gfs2/main.c b/trunk/fs/gfs2/main.c index 29e1ace7953d..c2b34cd2abe0 100644 --- a/trunk/fs/gfs2/main.c +++ b/trunk/fs/gfs2/main.c @@ -41,7 +41,6 @@ static void gfs2_init_inode_once(void *foo) init_rwsem(&ip->i_rw_mutex); INIT_LIST_HEAD(&ip->i_trunc_list); ip->i_alloc = NULL; - ip->i_hash_cache = NULL; } static void gfs2_init_glock_once(void *foo) diff --git a/trunk/fs/gfs2/ops_fstype.c b/trunk/fs/gfs2/ops_fstype.c index 516516e0c2a2..2a77071fb7b6 100644 --- a/trunk/fs/gfs2/ops_fstype.c +++ b/trunk/fs/gfs2/ops_fstype.c @@ -1094,7 +1094,6 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (sdp->sd_args.ar_nobarrier) set_bit(SDF_NOBARRIERS, &sdp->sd_flags); - sb->s_flags |= MS_NOSEC; sb->s_magic = GFS2_MAGIC; sb->s_op = &gfs2_super_ops; sb->s_d_op = &gfs2_dops; diff --git a/trunk/fs/gfs2/rgrp.c b/trunk/fs/gfs2/rgrp.c index 7f8af1eb02de..9b780df3fd54 100644 --- a/trunk/fs/gfs2/rgrp.c +++ b/trunk/fs/gfs2/rgrp.c @@ -1607,15 +1607,14 @@ int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation) } /** - * __gfs2_free_blocks - free a contiguous run of block(s) + * gfs2_free_data - free a contiguous run of data block(s) * @ip: the inode these blocks are being freed from * @bstart: first block of a run of contiguous blocks * @blen: the length of the block run - * @meta: 1 if the blocks represent metadata * */ -void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) +void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd; @@ -1632,10 +1631,53 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) gfs2_trans_add_rg(rgd); /* Directories keep their data in the metadata address space */ - if (meta || ip->i_depth) + if (ip->i_depth) gfs2_meta_wipe(ip, bstart, blen); } +/** + * gfs2_free_data - free a contiguous run of data block(s) + * @ip: the inode these blocks are being freed from + * @bstart: first block of a run of contiguous blocks + * @blen: the length of the block run + * + */ + +void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + + __gfs2_free_data(ip, bstart, blen); + gfs2_statfs_change(sdp, 0, +blen, 0); + gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); +} + +/** + * gfs2_free_meta - free a contiguous run of data block(s) + * @ip: the inode these blocks are being freed from + * @bstart: first block of a run of contiguous blocks + * @blen: the length of the block run + * + */ + +void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct gfs2_rgrpd *rgd; + + rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); + if (!rgd) + return; + trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); + rgd->rd_free += blen; + + gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); + gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + + gfs2_trans_add_rg(rgd); + gfs2_meta_wipe(ip, bstart, blen); +} + /** * gfs2_free_meta - free a contiguous run of data block(s) * @ip: the inode these blocks are being freed from @@ -1648,7 +1690,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - __gfs2_free_blocks(ip, bstart, blen, 1); + __gfs2_free_meta(ip, bstart, blen); gfs2_statfs_change(sdp, 0, +blen, 0); gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); } diff --git a/trunk/fs/gfs2/rgrp.h b/trunk/fs/gfs2/rgrp.h index d253f9a8c70e..a80e3034ac47 100644 --- a/trunk/fs/gfs2/rgrp.h +++ b/trunk/fs/gfs2/rgrp.h @@ -52,7 +52,9 @@ extern int gfs2_ri_update(struct gfs2_inode *ip); extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); -extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); +extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); +extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); +extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); extern void gfs2_unlink_di(struct inode *inode); diff --git a/trunk/fs/gfs2/super.c b/trunk/fs/gfs2/super.c index b7beadd9ba4c..fb0edf735483 100644 --- a/trunk/fs/gfs2/super.c +++ b/trunk/fs/gfs2/super.c @@ -1533,7 +1533,7 @@ static void gfs2_evict_inode(struct inode *inode) /* Case 3 starts here */ truncate_inode_pages(&inode->i_data, 0); end_writeback(inode); - gfs2_dir_hash_inval(ip); + ip->i_gl->gl_object = NULL; gfs2_glock_add_to_lru(ip->i_gl); gfs2_glock_put(ip->i_gl); diff --git a/trunk/fs/hfsplus/brec.c b/trunk/fs/hfsplus/brec.c index 2a734cfccc92..2312de34bd42 100644 --- a/trunk/fs/hfsplus/brec.c +++ b/trunk/fs/hfsplus/brec.c @@ -43,10 +43,6 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) node->tree->node_size - (rec + 1) * 2); if (!recoff) return 0; - if (recoff > node->tree->node_size - 2) { - printk(KERN_ERR "hfs: recoff %d too large\n", recoff); - return 0; - } retval = hfs_bnode_read_u16(node, recoff) + 2; if (retval > node->tree->max_key_len + 2) { diff --git a/trunk/fs/hfsplus/catalog.c b/trunk/fs/hfsplus/catalog.c index 4dfbfec357e8..b4ba1b319333 100644 --- a/trunk/fs/hfsplus/catalog.c +++ b/trunk/fs/hfsplus/catalog.c @@ -212,9 +212,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); - err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); - if (err) - return err; + hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); entry_size = hfsplus_fill_cat_thread(sb, &entry, @@ -271,9 +269,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); - err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); - if (err) - return err; + hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); if (!str) { int len; @@ -351,14 +347,12 @@ int hfsplus_rename_cat(u32 cnid, struct hfs_find_data src_fd, dst_fd; hfsplus_cat_entry entry; int entry_size, type; - int err; + int err = 0; dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, dst_dir->i_ino, dst_name->name); - err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); - if (err) - return err; + hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); dst_fd = src_fd; /* find the old dir entry and read the data */ diff --git a/trunk/fs/hfsplus/dir.c b/trunk/fs/hfsplus/dir.c index 25b2443a004c..4df5059c25da 100644 --- a/trunk/fs/hfsplus/dir.c +++ b/trunk/fs/hfsplus/dir.c @@ -38,9 +38,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, sb = dir->i_sb; dentry->d_fsdata = NULL; - err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); - if (err) - return ERR_PTR(err); + hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); again: err = hfs_brec_read(&fd, &entry, sizeof(entry)); @@ -134,9 +132,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) if (filp->f_pos >= inode->i_size) return 0; - err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); - if (err) - return err; + hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); err = hfs_brec_find(&fd); if (err) diff --git a/trunk/fs/hfsplus/extents.c b/trunk/fs/hfsplus/extents.c index 5849e3ef35cc..b1991a2a08e0 100644 --- a/trunk/fs/hfsplus/extents.c +++ b/trunk/fs/hfsplus/extents.c @@ -119,31 +119,22 @@ static void __hfsplus_ext_write_extent(struct inode *inode, set_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags); } -static int hfsplus_ext_write_extent_locked(struct inode *inode) +static void hfsplus_ext_write_extent_locked(struct inode *inode) { - int res; - if (HFSPLUS_I(inode)->extent_state & HFSPLUS_EXT_DIRTY) { struct hfs_find_data fd; - res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); - if (res) - return res; + hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); __hfsplus_ext_write_extent(inode, &fd); hfs_find_exit(&fd); } - return 0; } -int hfsplus_ext_write_extent(struct inode *inode) +void hfsplus_ext_write_extent(struct inode *inode) { - int res; - mutex_lock(&HFSPLUS_I(inode)->extents_lock); - res = hfsplus_ext_write_extent_locked(inode); + hfsplus_ext_write_extent_locked(inode); mutex_unlock(&HFSPLUS_I(inode)->extents_lock); - - return res; } static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, @@ -203,11 +194,9 @@ static int hfsplus_ext_read_extent(struct inode *inode, u32 block) block < hip->cached_start + hip->cached_blocks) return 0; - res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); - if (!res) { - res = __hfsplus_ext_cache_extent(&fd, inode, block); - hfs_find_exit(&fd); - } + hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); + res = __hfsplus_ext_cache_extent(&fd, inode, block); + hfs_find_exit(&fd); return res; } @@ -220,7 +209,6 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, struct hfsplus_inode_info *hip = HFSPLUS_I(inode); int res = -EIO; u32 ablock, dblock, mask; - sector_t sector; int was_dirty = 0; int shift; @@ -267,12 +255,10 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, done: dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); - mask = (1 << sbi->fs_shift) - 1; - sector = ((sector_t)dblock << sbi->fs_shift) + - sbi->blockoffset + (iblock & mask); - map_bh(bh_result, sb, sector); - + map_bh(bh_result, sb, + (dblock << sbi->fs_shift) + sbi->blockoffset + + (iblock & mask)); if (create) { set_buffer_new(bh_result); hip->phys_size += sb->s_blocksize; @@ -385,9 +371,7 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, if (total_blocks == blocks) return 0; - res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); - if (res) - return res; + hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); do { res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid, total_blocks, type); @@ -485,9 +469,7 @@ int hfsplus_file_extend(struct inode *inode) insert_extent: dprint(DBG_EXTENT, "insert new extent\n"); - res = hfsplus_ext_write_extent_locked(inode); - if (res) - goto out; + hfsplus_ext_write_extent_locked(inode); memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); hip->cached_extents[0].start_block = cpu_to_be32(start); @@ -518,6 +500,7 @@ void hfsplus_file_truncate(struct inode *inode) struct page *page; void *fsdata; u32 size = inode->i_size; + int res; res = pagecache_write_begin(NULL, mapping, size, 0, AOP_FLAG_UNINTERRUPTIBLE, @@ -540,12 +523,7 @@ void hfsplus_file_truncate(struct inode *inode) goto out; mutex_lock(&hip->extents_lock); - res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); - if (res) { - mutex_unlock(&hip->extents_lock); - /* XXX: We lack error handling of hfsplus_file_truncate() */ - return; - } + hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); while (1) { if (alloc_cnt == hip->first_blocks) { hfsplus_free_extents(sb, hip->first_extents, diff --git a/trunk/fs/hfsplus/hfsplus_fs.h b/trunk/fs/hfsplus/hfsplus_fs.h index 81dfd1e495e3..d6857523336d 100644 --- a/trunk/fs/hfsplus/hfsplus_fs.h +++ b/trunk/fs/hfsplus/hfsplus_fs.h @@ -13,7 +13,6 @@ #include #include #include -#include #include "hfsplus_raw.h" #define DBG_BNODE_REFS 0x00000001 @@ -111,9 +110,7 @@ struct hfsplus_vh; struct hfs_btree; struct hfsplus_sb_info { - void *s_vhdr_buf; struct hfsplus_vh *s_vhdr; - void *s_backup_vhdr_buf; struct hfsplus_vh *s_backup_vhdr; struct hfs_btree *ext_tree; struct hfs_btree *cat_tree; @@ -261,15 +258,6 @@ struct hfsplus_readdir_data { struct hfsplus_cat_key key; }; -/* - * Find minimum acceptible I/O size for an hfsplus sb. - */ -static inline unsigned short hfsplus_min_io_size(struct super_block *sb) -{ - return max_t(unsigned short, bdev_logical_block_size(sb->s_bdev), - HFSPLUS_SECTOR_SIZE); -} - #define hfs_btree_open hfsplus_btree_open #define hfs_btree_close hfsplus_btree_close #define hfs_btree_write hfsplus_btree_write @@ -386,7 +374,7 @@ extern const struct file_operations hfsplus_dir_operations; /* extents.c */ int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); -int hfsplus_ext_write_extent(struct inode *); +void hfsplus_ext_write_extent(struct inode *); int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int); int hfsplus_free_fork(struct super_block *, u32, struct hfsplus_fork_raw *, int); @@ -448,8 +436,8 @@ int hfsplus_compare_dentry(const struct dentry *parent, /* wrapper.c */ int hfsplus_read_wrapper(struct super_block *); int hfs_part_find(struct super_block *, sector_t *, sector_t *); -int hfsplus_submit_bio(struct super_block *sb, sector_t sector, - void *buf, void **data, int rw); +int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, + void *data, int rw); /* time macros */ #define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U) diff --git a/trunk/fs/hfsplus/inode.c b/trunk/fs/hfsplus/inode.c index 010cd363d085..b248a6cfcad9 100644 --- a/trunk/fs/hfsplus/inode.c +++ b/trunk/fs/hfsplus/inode.c @@ -195,13 +195,11 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, hip->flags = 0; set_bit(HFSPLUS_I_RSRC, &hip->flags); - err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); - if (!err) { - err = hfsplus_find_cat(sb, dir->i_ino, &fd); - if (!err) - err = hfsplus_cat_read_inode(inode, &fd); - hfs_find_exit(&fd); - } + hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + err = hfsplus_find_cat(sb, dir->i_ino, &fd); + if (!err) + err = hfsplus_cat_read_inode(inode, &fd); + hfs_find_exit(&fd); if (err) { iput(inode); return ERR_PTR(err); diff --git a/trunk/fs/hfsplus/part_tbl.c b/trunk/fs/hfsplus/part_tbl.c index eb355d81e279..40ad88c12c64 100644 --- a/trunk/fs/hfsplus/part_tbl.c +++ b/trunk/fs/hfsplus/part_tbl.c @@ -88,12 +88,11 @@ static int hfs_parse_old_pmap(struct super_block *sb, struct old_pmap *pm, return -ENOENT; } -static int hfs_parse_new_pmap(struct super_block *sb, void *buf, - struct new_pmap *pm, sector_t *part_start, sector_t *part_size) +static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm, + sector_t *part_start, sector_t *part_size) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); int size = be32_to_cpu(pm->pmMapBlkCnt); - int buf_size = hfsplus_min_io_size(sb); int res; int i = 0; @@ -108,14 +107,11 @@ static int hfs_parse_new_pmap(struct super_block *sb, void *buf, if (++i >= size) return -ENOENT; - pm = (struct new_pmap *)((u8 *)pm + HFSPLUS_SECTOR_SIZE); - if ((u8 *)pm - (u8 *)buf >= buf_size) { - res = hfsplus_submit_bio(sb, - *part_start + HFS_PMAP_BLK + i, - buf, (void **)&pm, READ); - if (res) - return res; - } + res = hfsplus_submit_bio(sb->s_bdev, + *part_start + HFS_PMAP_BLK + i, + pm, READ); + if (res) + return res; } while (pm->pmSig == cpu_to_be16(HFS_NEW_PMAP_MAGIC)); return -ENOENT; @@ -128,15 +124,15 @@ static int hfs_parse_new_pmap(struct super_block *sb, void *buf, int hfs_part_find(struct super_block *sb, sector_t *part_start, sector_t *part_size) { - void *buf, *data; + void *data; int res; - buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); - if (!buf) + data = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); + if (!data) return -ENOMEM; - res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK, - buf, &data, READ); + res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK, + data, READ); if (res) goto out; @@ -145,13 +141,13 @@ int hfs_part_find(struct super_block *sb, res = hfs_parse_old_pmap(sb, data, part_start, part_size); break; case HFS_NEW_PMAP_MAGIC: - res = hfs_parse_new_pmap(sb, buf, data, part_start, part_size); + res = hfs_parse_new_pmap(sb, data, part_start, part_size); break; default: res = -ENOENT; break; } out: - kfree(buf); + kfree(data); return res; } diff --git a/trunk/fs/hfsplus/super.c b/trunk/fs/hfsplus/super.c index c106ca22e812..84a47b709f51 100644 --- a/trunk/fs/hfsplus/super.c +++ b/trunk/fs/hfsplus/super.c @@ -73,13 +73,11 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || inode->i_ino == HFSPLUS_ROOT_CNID) { - err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); - if (!err) { - err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); - if (!err) - err = hfsplus_cat_read_inode(inode, &fd); - hfs_find_exit(&fd); - } + hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); + err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); + if (!err) + err = hfsplus_cat_read_inode(inode, &fd); + hfs_find_exit(&fd); } else { err = hfsplus_system_read_inode(inode); } @@ -135,13 +133,9 @@ static int hfsplus_system_write_inode(struct inode *inode) static int hfsplus_write_inode(struct inode *inode, struct writeback_control *wbc) { - int err; - dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); - err = hfsplus_ext_write_extent(inode); - if (err) - return err; + hfsplus_ext_write_extent(inode); if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || inode->i_ino == HFSPLUS_ROOT_CNID) @@ -203,17 +197,17 @@ int hfsplus_sync_fs(struct super_block *sb, int wait) write_backup = 1; } - error2 = hfsplus_submit_bio(sb, + error2 = hfsplus_submit_bio(sb->s_bdev, sbi->part_start + HFSPLUS_VOLHEAD_SECTOR, - sbi->s_vhdr_buf, NULL, WRITE_SYNC); + sbi->s_vhdr, WRITE_SYNC); if (!error) error = error2; if (!write_backup) goto out; - error2 = hfsplus_submit_bio(sb, + error2 = hfsplus_submit_bio(sb->s_bdev, sbi->part_start + sbi->sect_count - 2, - sbi->s_backup_vhdr_buf, NULL, WRITE_SYNC); + sbi->s_backup_vhdr, WRITE_SYNC); if (!error) error2 = error; out: @@ -257,8 +251,8 @@ static void hfsplus_put_super(struct super_block *sb) hfs_btree_close(sbi->ext_tree); iput(sbi->alloc_file); iput(sbi->hidden_dir); - kfree(sbi->s_vhdr_buf); - kfree(sbi->s_backup_vhdr_buf); + kfree(sbi->s_vhdr); + kfree(sbi->s_backup_vhdr); unload_nls(sbi->nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; @@ -399,13 +393,6 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (!sbi->rsrc_clump_blocks) sbi->rsrc_clump_blocks = 1; - err = generic_check_addressable(sbi->alloc_blksz_shift, - sbi->total_blocks); - if (err) { - printk(KERN_ERR "hfs: filesystem size too large.\n"); - goto out_free_vhdr; - } - /* Set up operations so we can load metadata */ sb->s_op = &hfsplus_sops; sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -430,8 +417,6 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags |= MS_RDONLY; } - err = -EINVAL; - /* Load metadata objects (B*Trees) */ sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); if (!sbi->ext_tree) { @@ -462,9 +447,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; str.name = HFSP_HIDDENDIR_NAME; - err = hfs_find_init(sbi->cat_tree, &fd); - if (err) - goto out_put_root; + hfs_find_init(sbi->cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { hfs_find_exit(&fd); diff --git a/trunk/fs/hfsplus/unicode.c b/trunk/fs/hfsplus/unicode.c index a32998f29f0b..a3f0bfcc881e 100644 --- a/trunk/fs/hfsplus/unicode.c +++ b/trunk/fs/hfsplus/unicode.c @@ -142,11 +142,7 @@ int hfsplus_uni2asc(struct super_block *sb, /* search for single decomposed char */ if (likely(compose)) ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0); - if (ce1) - cc = ce1[0]; - else - cc = 0; - if (cc) { + if (ce1 && (cc = ce1[0])) { /* start of a possibly decomposed Hangul char */ if (cc != 0xffff) goto done; @@ -213,8 +209,7 @@ int hfsplus_uni2asc(struct super_block *sb, i++; ce2 = ce1; } - cc = ce2[0]; - if (cc) { + if ((cc = ce2[0])) { ip += i; ustrlen -= i; goto done; @@ -306,11 +301,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { size = asc2unichar(sb, astr, len, &c); - if (decompose) - dstr = decompose_unichar(c, &dsize); - else - dstr = NULL; - if (dstr) { + if (decompose && (dstr = decompose_unichar(c, &dsize))) { if (outlen + dsize > HFSPLUS_MAX_STRLEN) break; do { @@ -355,23 +346,15 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, astr += size; len -= size; - if (decompose) - dstr = decompose_unichar(c, &dsize); - else - dstr = NULL; - if (dstr) { + if (decompose && (dstr = decompose_unichar(c, &dsize))) { do { c2 = *dstr++; - if (casefold) - c2 = case_fold(c2); - if (!casefold || c2) + if (!casefold || (c2 = case_fold(c2))) hash = partial_name_hash(c2, hash); } while (--dsize > 0); } else { c2 = c; - if (casefold) - c2 = case_fold(c2); - if (!casefold || c2) + if (!casefold || (c2 = case_fold(c2))) hash = partial_name_hash(c2, hash); } } @@ -439,14 +422,12 @@ int hfsplus_compare_dentry(const struct dentry *parent, c1 = *dstr1; c2 = *dstr2; if (casefold) { - c1 = case_fold(c1); - if (!c1) { + if (!(c1 = case_fold(c1))) { dstr1++; dsize1--; continue; } - c2 = case_fold(c2); - if (!c2) { + if (!(c2 = case_fold(c2))) { dstr2++; dsize2--; continue; diff --git a/trunk/fs/hfsplus/wrapper.c b/trunk/fs/hfsplus/wrapper.c index 10e515a0d452..4ac88ff79aa6 100644 --- a/trunk/fs/hfsplus/wrapper.c +++ b/trunk/fs/hfsplus/wrapper.c @@ -31,67 +31,25 @@ static void hfsplus_end_io_sync(struct bio *bio, int err) complete(bio->bi_private); } -/* - * hfsplus_submit_bio - Perfrom block I/O - * @sb: super block of volume for I/O - * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes - * @buf: buffer for I/O - * @data: output pointer for location of requested data - * @rw: direction of I/O - * - * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than - * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads - * @data will return a pointer to the start of the requested sector, - * which may not be the same location as @buf. - * - * If @sector is not aligned to the bdev logical block size it will - * be rounded down. For writes this means that @buf should contain data - * that starts at the rounded-down address. As long as the data was - * read using hfsplus_submit_bio() and the same buffer is used things - * will work correctly. - */ -int hfsplus_submit_bio(struct super_block *sb, sector_t sector, - void *buf, void **data, int rw) +int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, + void *data, int rw) { DECLARE_COMPLETION_ONSTACK(wait); struct bio *bio; int ret = 0; - unsigned int io_size; - loff_t start; - int offset; - - /* - * Align sector to hardware sector size and find offset. We - * assume that io_size is a power of two, which _should_ - * be true. - */ - io_size = hfsplus_min_io_size(sb); - start = (loff_t)sector << HFSPLUS_SECTOR_SHIFT; - offset = start & (io_size - 1); - sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1); bio = bio_alloc(GFP_NOIO, 1); bio->bi_sector = sector; - bio->bi_bdev = sb->s_bdev; + bio->bi_bdev = bdev; bio->bi_end_io = hfsplus_end_io_sync; bio->bi_private = &wait; - if (!(rw & WRITE) && data) - *data = (u8 *)buf + offset; - - while (io_size > 0) { - unsigned int page_offset = offset_in_page(buf); - unsigned int len = min_t(unsigned int, PAGE_SIZE - page_offset, - io_size); - - ret = bio_add_page(bio, virt_to_page(buf), len, page_offset); - if (ret != len) { - ret = -EIO; - goto out; - } - io_size -= len; - buf = (u8 *)buf + len; - } + /* + * We always submit one sector at a time, so bio_add_page must not fail. + */ + if (bio_add_page(bio, virt_to_page(data), HFSPLUS_SECTOR_SIZE, + offset_in_page(data)) != HFSPLUS_SECTOR_SIZE) + BUG(); submit_bio(rw, bio); wait_for_completion(&wait); @@ -99,9 +57,8 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, if (!bio_flagged(bio, BIO_UPTODATE)) ret = -EIO; -out: bio_put(bio); - return ret < 0 ? ret : 0; + return ret; } static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) @@ -184,19 +141,23 @@ int hfsplus_read_wrapper(struct super_block *sb) if (hfsplus_get_last_session(sb, &part_start, &part_size)) goto out; + if ((u64)part_start + part_size > 0x100000000ULL) { + pr_err("hfs: volumes larger than 2TB are not supported yet\n"); + goto out; + } error = -ENOMEM; - sbi->s_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); - if (!sbi->s_vhdr_buf) + sbi->s_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); + if (!sbi->s_vhdr) goto out; - sbi->s_backup_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); - if (!sbi->s_backup_vhdr_buf) + sbi->s_backup_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); + if (!sbi->s_backup_vhdr) goto out_free_vhdr; reread: - error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, - sbi->s_vhdr_buf, (void **)&sbi->s_vhdr, - READ); + error = hfsplus_submit_bio(sb->s_bdev, + part_start + HFSPLUS_VOLHEAD_SECTOR, + sbi->s_vhdr, READ); if (error) goto out_free_backup_vhdr; @@ -211,9 +172,8 @@ int hfsplus_read_wrapper(struct super_block *sb) if (!hfsplus_read_mdb(sbi->s_vhdr, &wd)) goto out_free_backup_vhdr; wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; - part_start += (sector_t)wd.ablk_start + - (sector_t)wd.embed_start * wd.ablk_size; - part_size = (sector_t)wd.embed_count * wd.ablk_size; + part_start += wd.ablk_start + wd.embed_start * wd.ablk_size; + part_size = wd.embed_count * wd.ablk_size; goto reread; default: /* @@ -226,9 +186,9 @@ int hfsplus_read_wrapper(struct super_block *sb) goto reread; } - error = hfsplus_submit_bio(sb, part_start + part_size - 2, - sbi->s_backup_vhdr_buf, - (void **)&sbi->s_backup_vhdr, READ); + error = hfsplus_submit_bio(sb->s_bdev, + part_start + part_size - 2, + sbi->s_backup_vhdr, READ); if (error) goto out_free_backup_vhdr; diff --git a/trunk/fs/libfs.c b/trunk/fs/libfs.c index 275ca4749a2e..c88eab55aec9 100644 --- a/trunk/fs/libfs.c +++ b/trunk/fs/libfs.c @@ -822,7 +822,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, goto out; attr->set_buf[size] = '\0'; - val = simple_strtoll(attr->set_buf, NULL, 0); + val = simple_strtol(attr->set_buf, NULL, 0); ret = attr->set(attr->data, val); if (ret == 0) ret = len; /* on success, claim we got the whole input */ diff --git a/trunk/fs/namei.c b/trunk/fs/namei.c index 14ab8d3f2f0c..5c867dd1c0b3 100644 --- a/trunk/fs/namei.c +++ b/trunk/fs/namei.c @@ -942,6 +942,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, * Don't forget we might have a non-mountpoint managed dentry * that wants to block transit. */ + *inode = path->dentry->d_inode; if (unlikely(managed_dentry_might_block(path->dentry))) return false; @@ -954,12 +955,6 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, path->mnt = mounted; path->dentry = mounted->mnt_root; nd->seq = read_seqcount_begin(&path->dentry->d_seq); - /* - * Update the inode too. We don't need to re-check the - * dentry sequence number here after this d_inode read, - * because a mount-point is always pinned. - */ - *inode = path->dentry->d_inode; } return true; } diff --git a/trunk/fs/ubifs/commit.c b/trunk/fs/ubifs/commit.c index fb3b5c813a30..87cd0ead8633 100644 --- a/trunk/fs/ubifs/commit.c +++ b/trunk/fs/ubifs/commit.c @@ -78,7 +78,7 @@ static int nothing_to_commit(struct ubifs_info *c) * If the root TNC node is dirty, we definitely have something to * commit. */ - if (c->zroot.znode && ubifs_zn_dirty(c->zroot.znode)) + if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags)) return 0; /* @@ -418,7 +418,7 @@ int ubifs_run_commit(struct ubifs_info *c) spin_lock(&c->cs_lock); if (c->cmt_state == COMMIT_BROKEN) { - err = -EROFS; + err = -EINVAL; goto out; } @@ -444,7 +444,7 @@ int ubifs_run_commit(struct ubifs_info *c) * re-check it. */ if (c->cmt_state == COMMIT_BROKEN) { - err = -EROFS; + err = -EINVAL; goto out_cmt_unlock; } @@ -576,7 +576,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) struct idx_node *i; size_t sz; - if (!dbg_is_chk_index(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) return 0; INIT_LIST_HEAD(&list); diff --git a/trunk/fs/ubifs/debug.c b/trunk/fs/ubifs/debug.c index eef109a1a927..0bb2bcef0de9 100644 --- a/trunk/fs/ubifs/debug.c +++ b/trunk/fs/ubifs/debug.c @@ -27,12 +27,13 @@ * various local functions of those subsystems. */ +#define UBIFS_DBG_PRESERVE_UBI + +#include "ubifs.h" #include +#include #include #include -#include -#include -#include "ubifs.h" #ifdef CONFIG_UBIFS_FS_DEBUG @@ -41,6 +42,15 @@ DEFINE_SPINLOCK(dbg_lock); static char dbg_key_buf0[128]; static char dbg_key_buf1[128]; +unsigned int ubifs_chk_flags; +unsigned int ubifs_tst_flags; + +module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); +module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); + +MODULE_PARM_DESC(debug_chks, "Debug check flags"); +MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); + static const char *get_key_fmt(int fmt) { switch (fmt) { @@ -81,28 +91,6 @@ static const char *get_key_type(int type) } } -static const char *get_dent_type(int type) -{ - switch (type) { - case UBIFS_ITYPE_REG: - return "file"; - case UBIFS_ITYPE_DIR: - return "dir"; - case UBIFS_ITYPE_LNK: - return "symlink"; - case UBIFS_ITYPE_BLK: - return "blkdev"; - case UBIFS_ITYPE_CHR: - return "char dev"; - case UBIFS_ITYPE_FIFO: - return "fifo"; - case UBIFS_ITYPE_SOCK: - return "socket"; - default: - return "unknown/invalid type"; - } -} - static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, char *buffer) { @@ -246,13 +234,9 @@ static void dump_ch(const struct ubifs_ch *ch) printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); } -void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) +void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) { const struct ubifs_inode *ui = ubifs_inode(inode); - struct qstr nm = { .name = NULL }; - union ubifs_key key; - struct ubifs_dent_node *dent, *pdent = NULL; - int count = 2; printk(KERN_DEBUG "Dump in-memory inode:"); printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); @@ -286,32 +270,6 @@ void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); - - if (!S_ISDIR(inode->i_mode)) - return; - - printk(KERN_DEBUG "List of directory entries:\n"); - ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); - - lowest_dent_key(c, &key, inode->i_ino); - while (1) { - dent = ubifs_tnc_next_ent(c, &key, &nm); - if (IS_ERR(dent)) { - if (PTR_ERR(dent) != -ENOENT) - printk(KERN_DEBUG "error %ld\n", PTR_ERR(dent)); - break; - } - - printk(KERN_DEBUG "\t%d: %s (%s)\n", - count++, dent->name, get_dent_type(dent->type)); - - nm.name = dent->name; - nm.len = le16_to_cpu(dent->nlen); - kfree(pdent); - pdent = dent; - key_read(c, &dent->key, &key); - } - kfree(pdent); } void dbg_dump_node(const struct ubifs_info *c, const void *node) @@ -320,7 +278,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) union ubifs_key key; const struct ubifs_ch *ch = node; - if (dbg_is_tst_rcvry(c)) + if (dbg_failure_mode) return; /* If the magic is incorrect, just hexdump the first bytes */ @@ -876,7 +834,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) struct ubifs_scan_node *snod; void *buf; - if (dbg_is_tst_rcvry(c)) + if (dbg_failure_mode) return; printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", @@ -1122,7 +1080,6 @@ int dbg_check_space_info(struct ubifs_info *c) /** * dbg_check_synced_i_size - check synchronized inode size. - * @c: UBIFS file-system description object * @inode: inode to check * * If inode is clean, synchronized inode size has to be equivalent to current @@ -1130,12 +1087,12 @@ int dbg_check_space_info(struct ubifs_info *c) * has to be locked). Returns %0 if synchronized inode size if correct, and * %-EINVAL if not. */ -int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode) +int dbg_check_synced_i_size(struct inode *inode) { int err = 0; struct ubifs_inode *ui = ubifs_inode(inode); - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; if (!S_ISREG(inode->i_mode)) return 0; @@ -1168,7 +1125,7 @@ int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode) * Note, it is good idea to make sure the @dir->i_mutex is locked before * calling this function. */ -int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) +int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) { unsigned int nlink = 2; union ubifs_key key; @@ -1176,7 +1133,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) struct qstr nm = { .name = NULL }; loff_t size = UBIFS_INO_NODE_SZ; - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; if (!S_ISDIR(dir->i_mode)) @@ -1210,14 +1167,12 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) "but calculated size is %llu", dir->i_ino, (unsigned long long)i_size_read(dir), (unsigned long long)size); - dbg_dump_inode(c, dir); dump_stack(); return -EINVAL; } if (dir->i_nlink != nlink) { ubifs_err("directory inode %lu has nlink %u, but calculated " "nlink is %u", dir->i_ino, dir->i_nlink, nlink); - dbg_dump_inode(c, dir); dump_stack(); return -EINVAL; } @@ -1534,7 +1489,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra) long clean_cnt = 0, dirty_cnt = 0; int err, last; - if (!dbg_is_chk_index(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_TNC)) return 0; ubifs_assert(mutex_is_locked(&c->tnc_mutex)); @@ -1781,7 +1736,7 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) int err; long long calc = 0; - if (!dbg_is_chk_index(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ)) return 0; err = dbg_walk_index(c, NULL, add_size, &calc); @@ -2357,7 +2312,7 @@ int dbg_check_filesystem(struct ubifs_info *c) int err; struct fsck_data fsckd; - if (!dbg_is_chk_fs(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_FS)) return 0; fsckd.inodes = RB_ROOT; @@ -2392,7 +2347,7 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) struct list_head *cur; struct ubifs_scan_node *sa, *sb; - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; for (cur = head->next; cur->next != head; cur = cur->next) { @@ -2459,7 +2414,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) struct list_head *cur; struct ubifs_scan_node *sa, *sb; - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; for (cur = head->next; cur->next != head; cur = cur->next) { @@ -2536,141 +2491,214 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) return 0; } -static inline int chance(unsigned int n, unsigned int out_of) +int dbg_force_in_the_gaps(void) { - return !!((random32() % out_of) + 1 <= n); + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; + return !(random32() & 7); } -static int power_cut_emulated(struct ubifs_info *c, int lnum, int write) +/* Failure mode for recovery testing */ + +#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d)) + +struct failure_mode_info { + struct list_head list; + struct ubifs_info *c; +}; + +static LIST_HEAD(fmi_list); +static DEFINE_SPINLOCK(fmi_lock); + +static unsigned int next; + +static int simple_rand(void) { - struct ubifs_debug_info *d = c->dbg; + if (next == 0) + next = current->pid; + next = next * 1103515245 + 12345; + return (next >> 16) & 32767; +} + +static void failure_mode_init(struct ubifs_info *c) +{ + struct failure_mode_info *fmi; - ubifs_assert(dbg_is_tst_rcvry(c)); + fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); + if (!fmi) { + ubifs_err("Failed to register failure mode - no memory"); + return; + } + fmi->c = c; + spin_lock(&fmi_lock); + list_add_tail(&fmi->list, &fmi_list); + spin_unlock(&fmi_lock); +} + +static void failure_mode_exit(struct ubifs_info *c) +{ + struct failure_mode_info *fmi, *tmp; - if (!d->pc_cnt) { - /* First call - decide delay to the power cut */ + spin_lock(&fmi_lock); + list_for_each_entry_safe(fmi, tmp, &fmi_list, list) + if (fmi->c == c) { + list_del(&fmi->list); + kfree(fmi); + } + spin_unlock(&fmi_lock); +} + +static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc) +{ + struct failure_mode_info *fmi; + + spin_lock(&fmi_lock); + list_for_each_entry(fmi, &fmi_list, list) + if (fmi->c->ubi == desc) { + struct ubifs_info *c = fmi->c; + + spin_unlock(&fmi_lock); + return c; + } + spin_unlock(&fmi_lock); + return NULL; +} + +static int in_failure_mode(struct ubi_volume_desc *desc) +{ + struct ubifs_info *c = dbg_find_info(desc); + + if (c && dbg_failure_mode) + return c->dbg->failure_mode; + return 0; +} + +static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) +{ + struct ubifs_info *c = dbg_find_info(desc); + struct ubifs_debug_info *d; + + if (!c || !dbg_failure_mode) + return 0; + d = c->dbg; + if (d->failure_mode) + return 1; + if (!d->fail_cnt) { + /* First call - decide delay to failure */ if (chance(1, 2)) { - unsigned long delay; + unsigned int delay = 1 << (simple_rand() >> 11); if (chance(1, 2)) { - d->pc_delay = 1; - /* Fail withing 1 minute */ - delay = random32() % 60000; - d->pc_timeout = jiffies; - d->pc_timeout += msecs_to_jiffies(delay); - ubifs_warn("failing after %lums", delay); + d->fail_delay = 1; + d->fail_timeout = jiffies + + msecs_to_jiffies(delay); + dbg_rcvry("failing after %ums", delay); } else { - d->pc_delay = 2; - delay = random32() % 10000; - /* Fail within 10000 operations */ - d->pc_cnt_max = delay; - ubifs_warn("failing after %lu calls", delay); + d->fail_delay = 2; + d->fail_cnt_max = delay; + dbg_rcvry("failing after %u calls", delay); } } - - d->pc_cnt += 1; + d->fail_cnt += 1; } - /* Determine if failure delay has expired */ - if (d->pc_delay == 1 && time_before(jiffies, d->pc_timeout)) + if (d->fail_delay == 1) { + if (time_before(jiffies, d->fail_timeout)) return 0; - if (d->pc_delay == 2 && d->pc_cnt++ < d->pc_cnt_max) + } else if (d->fail_delay == 2) + if (d->fail_cnt++ < d->fail_cnt_max) return 0; - if (lnum == UBIFS_SB_LNUM) { - if (write && chance(1, 2)) - return 0; - if (chance(19, 20)) + if (write) { + if (chance(1, 2)) + return 0; + } else if (chance(19, 20)) return 0; - ubifs_warn("failing in super block LEB %d", lnum); + dbg_rcvry("failing in super block LEB %d", lnum); } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { if (chance(19, 20)) return 0; - ubifs_warn("failing in master LEB %d", lnum); + dbg_rcvry("failing in master LEB %d", lnum); } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { - if (write && chance(99, 100)) + if (write) { + if (chance(99, 100)) + return 0; + } else if (chance(399, 400)) return 0; - if (chance(399, 400)) - return 0; - ubifs_warn("failing in log LEB %d", lnum); + dbg_rcvry("failing in log LEB %d", lnum); } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { - if (write && chance(7, 8)) + if (write) { + if (chance(7, 8)) + return 0; + } else if (chance(19, 20)) return 0; - if (chance(19, 20)) - return 0; - ubifs_warn("failing in LPT LEB %d", lnum); + dbg_rcvry("failing in LPT LEB %d", lnum); } else if (lnum >= c->orph_first && lnum <= c->orph_last) { - if (write && chance(1, 2)) - return 0; - if (chance(9, 10)) + if (write) { + if (chance(1, 2)) + return 0; + } else if (chance(9, 10)) return 0; - ubifs_warn("failing in orphan LEB %d", lnum); + dbg_rcvry("failing in orphan LEB %d", lnum); } else if (lnum == c->ihead_lnum) { if (chance(99, 100)) return 0; - ubifs_warn("failing in index head LEB %d", lnum); + dbg_rcvry("failing in index head LEB %d", lnum); } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { if (chance(9, 10)) return 0; - ubifs_warn("failing in GC head LEB %d", lnum); + dbg_rcvry("failing in GC head LEB %d", lnum); } else if (write && !RB_EMPTY_ROOT(&c->buds) && !ubifs_search_bud(c, lnum)) { if (chance(19, 20)) return 0; - ubifs_warn("failing in non-bud LEB %d", lnum); + dbg_rcvry("failing in non-bud LEB %d", lnum); } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || c->cmt_state == COMMIT_RUNNING_REQUIRED) { if (chance(999, 1000)) return 0; - ubifs_warn("failing in bud LEB %d commit running", lnum); + dbg_rcvry("failing in bud LEB %d commit running", lnum); } else { if (chance(9999, 10000)) return 0; - ubifs_warn("failing in bud LEB %d commit not running", lnum); + dbg_rcvry("failing in bud LEB %d commit not running", lnum); } - - d->pc_happened = 1; - ubifs_warn("========== Power cut emulated =========="); + ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); + d->failure_mode = 1; dump_stack(); return 1; } -static void cut_data(const void *buf, unsigned int len) +static void cut_data(const void *buf, int len) { - unsigned int from, to, i, ffs = chance(1, 2); + int flen, i; unsigned char *p = (void *)buf; - from = random32() % (len + 1); - if (chance(1, 2)) - to = random32() % (len - from + 1); - else - to = len; - - if (from < to) - ubifs_warn("filled bytes %u-%u with %s", from, to - 1, - ffs ? "0xFFs" : "random data"); + flen = (len * (long long)simple_rand()) >> 15; + for (i = flen; i < len; i++) + p[i] = 0xff; +} - if (ffs) - for (i = from; i < to; i++) - p[i] = 0xFF; - else - for (i = from; i < to; i++) - p[i] = random32() % 0x100; +int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, + int len, int check) +{ + if (in_failure_mode(desc)) + return -EROFS; + return ubi_leb_read(desc, lnum, buf, offset, len, check); } -int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, - int offs, int len, int dtype) +int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, + int offset, int len, int dtype) { int err, failing; - if (c->dbg->pc_happened) + if (in_failure_mode(desc)) return -EROFS; - - failing = power_cut_emulated(c, lnum, 1); + failing = do_fail(desc, lnum, 1); if (failing) cut_data(buf, len); - err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); + err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); if (err) return err; if (failing) @@ -2678,207 +2706,162 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, return 0; } -int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, +int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, int len, int dtype) { int err; - if (c->dbg->pc_happened) + if (do_fail(desc, lnum, 1)) return -EROFS; - if (power_cut_emulated(c, lnum, 1)) - return -EROFS; - err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); + err = ubi_leb_change(desc, lnum, buf, len, dtype); if (err) return err; - if (power_cut_emulated(c, lnum, 1)) + if (do_fail(desc, lnum, 1)) return -EROFS; return 0; } -int dbg_leb_unmap(struct ubifs_info *c, int lnum) +int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) { int err; - if (c->dbg->pc_happened) - return -EROFS; - if (power_cut_emulated(c, lnum, 0)) + if (do_fail(desc, lnum, 0)) return -EROFS; - err = ubi_leb_unmap(c->ubi, lnum); + err = ubi_leb_erase(desc, lnum); if (err) return err; - if (power_cut_emulated(c, lnum, 0)) + if (do_fail(desc, lnum, 0)) return -EROFS; return 0; } -int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype) +int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) { int err; - if (c->dbg->pc_happened) + if (do_fail(desc, lnum, 0)) return -EROFS; - if (power_cut_emulated(c, lnum, 0)) - return -EROFS; - err = ubi_leb_map(c->ubi, lnum, dtype); + err = ubi_leb_unmap(desc, lnum); if (err) return err; - if (power_cut_emulated(c, lnum, 0)) + if (do_fail(desc, lnum, 0)) return -EROFS; return 0; } -/* - * Root directory for UBIFS stuff in debugfs. Contains sub-directories which - * contain the stuff specific to particular file-system mounts. - */ -static struct dentry *dfs_rootdir; +int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) +{ + if (in_failure_mode(desc)) + return -EROFS; + return ubi_is_mapped(desc, lnum); +} -static int dfs_file_open(struct inode *inode, struct file *file) +int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) { - file->private_data = inode->i_private; - return nonseekable_open(inode, file); + int err; + + if (do_fail(desc, lnum, 0)) + return -EROFS; + err = ubi_leb_map(desc, lnum, dtype); + if (err) + return err; + if (do_fail(desc, lnum, 0)) + return -EROFS; + return 0; } /** - * provide_user_output - provide output to the user reading a debugfs file. - * @val: boolean value for the answer - * @u: the buffer to store the answer at - * @count: size of the buffer - * @ppos: position in the @u output buffer + * ubifs_debugging_init - initialize UBIFS debugging. + * @c: UBIFS file-system description object * - * This is a simple helper function which stores @val boolean value in the user - * buffer when the user reads one of UBIFS debugfs files. Returns amount of - * bytes written to @u in case of success and a negative error code in case of + * This function initializes debugging-related data for the file system. + * Returns zero in case of success and a negative error code in case of * failure. */ -static int provide_user_output(int val, char __user *u, size_t count, - loff_t *ppos) +int ubifs_debugging_init(struct ubifs_info *c) { - char buf[3]; - - if (val) - buf[0] = '1'; - else - buf[0] = '0'; - buf[1] = '\n'; - buf[2] = 0x00; + c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); + if (!c->dbg) + return -ENOMEM; - return simple_read_from_buffer(u, count, ppos, buf, 2); + failure_mode_init(c); + return 0; } -static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count, - loff_t *ppos) +/** + * ubifs_debugging_exit - free debugging data. + * @c: UBIFS file-system description object + */ +void ubifs_debugging_exit(struct ubifs_info *c) { - struct dentry *dent = file->f_path.dentry; - struct ubifs_info *c = file->private_data; - struct ubifs_debug_info *d = c->dbg; - int val; - - if (dent == d->dfs_chk_gen) - val = d->chk_gen; - else if (dent == d->dfs_chk_index) - val = d->chk_index; - else if (dent == d->dfs_chk_orph) - val = d->chk_orph; - else if (dent == d->dfs_chk_lprops) - val = d->chk_lprops; - else if (dent == d->dfs_chk_fs) - val = d->chk_fs; - else if (dent == d->dfs_tst_rcvry) - val = d->tst_rcvry; - else - return -EINVAL; - - return provide_user_output(val, u, count, ppos); + failure_mode_exit(c); + kfree(c->dbg); } +/* + * Root directory for UBIFS stuff in debugfs. Contains sub-directories which + * contain the stuff specific to particular file-system mounts. + */ +static struct dentry *dfs_rootdir; + /** - * interpret_user_input - interpret user debugfs file input. - * @u: user-provided buffer with the input - * @count: buffer size + * dbg_debugfs_init - initialize debugfs file-system. * - * This is a helper function which interpret user input to a boolean UBIFS - * debugfs file. Returns %0 or %1 in case of success and a negative error code - * in case of failure. + * UBIFS uses debugfs file-system to expose various debugging knobs to + * user-space. This function creates "ubifs" directory in the debugfs + * file-system. Returns zero in case of success and a negative error code in + * case of failure. */ -static int interpret_user_input(const char __user *u, size_t count) +int dbg_debugfs_init(void) { - size_t buf_size; - char buf[8]; + dfs_rootdir = debugfs_create_dir("ubifs", NULL); + if (IS_ERR(dfs_rootdir)) { + int err = PTR_ERR(dfs_rootdir); + ubifs_err("cannot create \"ubifs\" debugfs directory, " + "error %d\n", err); + return err; + } - buf_size = min_t(size_t, count, (sizeof(buf) - 1)); - if (copy_from_user(buf, u, buf_size)) - return -EFAULT; + return 0; +} - if (buf[0] == '1') - return 1; - else if (buf[0] == '0') - return 0; +/** + * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. + */ +void dbg_debugfs_exit(void) +{ + debugfs_remove(dfs_rootdir); +} - return -EINVAL; +static int open_debugfs_file(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return nonseekable_open(inode, file); } -static ssize_t dfs_file_write(struct file *file, const char __user *u, - size_t count, loff_t *ppos) +static ssize_t write_debugfs_file(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { struct ubifs_info *c = file->private_data; struct ubifs_debug_info *d = c->dbg; - struct dentry *dent = file->f_path.dentry; - int val; - /* - * TODO: this is racy - the file-system might have already been - * unmounted and we'd oops in this case. The plan is to fix it with - * help of 'iterate_supers_type()' which we should have in v3.0: when - * a debugfs opened, we rember FS's UUID in file->private_data. Then - * whenever we access the FS via a debugfs file, we iterate all UBIFS - * superblocks and fine the one with the same UUID, and take the - * locking right. - * - * The other way to go suggested by Al Viro is to create a separate - * 'ubifs-debug' file-system instead. - */ - if (file->f_path.dentry == d->dfs_dump_lprops) { + if (file->f_path.dentry == d->dfs_dump_lprops) dbg_dump_lprops(c); - return count; - } - if (file->f_path.dentry == d->dfs_dump_budg) { + else if (file->f_path.dentry == d->dfs_dump_budg) dbg_dump_budg(c, &c->bi); - return count; - } - if (file->f_path.dentry == d->dfs_dump_tnc) { + else if (file->f_path.dentry == d->dfs_dump_tnc) { mutex_lock(&c->tnc_mutex); dbg_dump_tnc(c); mutex_unlock(&c->tnc_mutex); - return count; - } - - val = interpret_user_input(u, count); - if (val < 0) - return val; - - if (dent == d->dfs_chk_gen) - d->chk_gen = val; - else if (dent == d->dfs_chk_index) - d->chk_index = val; - else if (dent == d->dfs_chk_orph) - d->chk_orph = val; - else if (dent == d->dfs_chk_lprops) - d->chk_lprops = val; - else if (dent == d->dfs_chk_fs) - d->chk_fs = val; - else if (dent == d->dfs_tst_rcvry) - d->tst_rcvry = val; - else + } else return -EINVAL; return count; } static const struct file_operations dfs_fops = { - .open = dfs_file_open, - .read = dfs_file_read, - .write = dfs_file_write, + .open = open_debugfs_file, + .write = write_debugfs_file, .owner = THIS_MODULE, .llseek = no_llseek, }; @@ -2897,20 +2880,12 @@ static const struct file_operations dfs_fops = { */ int dbg_debugfs_init_fs(struct ubifs_info *c) { - int err, n; + int err; const char *fname; struct dentry *dent; struct ubifs_debug_info *d = c->dbg; - n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, - c->vi.ubi_num, c->vi.vol_id); - if (n == UBIFS_DFS_DIR_LEN) { - /* The array size is too small */ - fname = UBIFS_DFS_DIR_NAME; - dent = ERR_PTR(-EINVAL); - goto out; - } - + sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); fname = d->dfs_dir_name; dent = debugfs_create_dir(fname, dfs_rootdir); if (IS_ERR_OR_NULL(dent)) @@ -2935,55 +2910,13 @@ int dbg_debugfs_init_fs(struct ubifs_info *c) goto out_remove; d->dfs_dump_tnc = dent; - fname = "chk_general"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_gen = dent; - - fname = "chk_index"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_index = dent; - - fname = "chk_orphans"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_orph = dent; - - fname = "chk_lprops"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_lprops = dent; - - fname = "chk_fs"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_fs = dent; - - fname = "tst_recovery"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_tst_rcvry = dent; - return 0; out_remove: debugfs_remove_recursive(d->dfs_dir); out: err = dent ? PTR_ERR(dent) : -ENODEV; - ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", + ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", fname, err); return err; } @@ -2997,179 +2930,4 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c) debugfs_remove_recursive(c->dbg->dfs_dir); } -struct ubifs_global_debug_info ubifs_dbg; - -static struct dentry *dfs_chk_gen; -static struct dentry *dfs_chk_index; -static struct dentry *dfs_chk_orph; -static struct dentry *dfs_chk_lprops; -static struct dentry *dfs_chk_fs; -static struct dentry *dfs_tst_rcvry; - -static ssize_t dfs_global_file_read(struct file *file, char __user *u, - size_t count, loff_t *ppos) -{ - struct dentry *dent = file->f_path.dentry; - int val; - - if (dent == dfs_chk_gen) - val = ubifs_dbg.chk_gen; - else if (dent == dfs_chk_index) - val = ubifs_dbg.chk_index; - else if (dent == dfs_chk_orph) - val = ubifs_dbg.chk_orph; - else if (dent == dfs_chk_lprops) - val = ubifs_dbg.chk_lprops; - else if (dent == dfs_chk_fs) - val = ubifs_dbg.chk_fs; - else if (dent == dfs_tst_rcvry) - val = ubifs_dbg.tst_rcvry; - else - return -EINVAL; - - return provide_user_output(val, u, count, ppos); -} - -static ssize_t dfs_global_file_write(struct file *file, const char __user *u, - size_t count, loff_t *ppos) -{ - struct dentry *dent = file->f_path.dentry; - int val; - - val = interpret_user_input(u, count); - if (val < 0) - return val; - - if (dent == dfs_chk_gen) - ubifs_dbg.chk_gen = val; - else if (dent == dfs_chk_index) - ubifs_dbg.chk_index = val; - else if (dent == dfs_chk_orph) - ubifs_dbg.chk_orph = val; - else if (dent == dfs_chk_lprops) - ubifs_dbg.chk_lprops = val; - else if (dent == dfs_chk_fs) - ubifs_dbg.chk_fs = val; - else if (dent == dfs_tst_rcvry) - ubifs_dbg.tst_rcvry = val; - else - return -EINVAL; - - return count; -} - -static const struct file_operations dfs_global_fops = { - .read = dfs_global_file_read, - .write = dfs_global_file_write, - .owner = THIS_MODULE, - .llseek = no_llseek, -}; - -/** - * dbg_debugfs_init - initialize debugfs file-system. - * - * UBIFS uses debugfs file-system to expose various debugging knobs to - * user-space. This function creates "ubifs" directory in the debugfs - * file-system. Returns zero in case of success and a negative error code in - * case of failure. - */ -int dbg_debugfs_init(void) -{ - int err; - const char *fname; - struct dentry *dent; - - fname = "ubifs"; - dent = debugfs_create_dir(fname, NULL); - if (IS_ERR_OR_NULL(dent)) - goto out; - dfs_rootdir = dent; - - fname = "chk_general"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_gen = dent; - - fname = "chk_index"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_index = dent; - - fname = "chk_orphans"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_orph = dent; - - fname = "chk_lprops"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_lprops = dent; - - fname = "chk_fs"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_fs = dent; - - fname = "tst_recovery"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_tst_rcvry = dent; - - return 0; - -out_remove: - debugfs_remove_recursive(dfs_rootdir); -out: - err = dent ? PTR_ERR(dent) : -ENODEV; - ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", - fname, err); - return err; -} - -/** - * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. - */ -void dbg_debugfs_exit(void) -{ - debugfs_remove_recursive(dfs_rootdir); -} - -/** - * ubifs_debugging_init - initialize UBIFS debugging. - * @c: UBIFS file-system description object - * - * This function initializes debugging-related data for the file system. - * Returns zero in case of success and a negative error code in case of - * failure. - */ -int ubifs_debugging_init(struct ubifs_info *c) -{ - c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); - if (!c->dbg) - return -ENOMEM; - - return 0; -} - -/** - * ubifs_debugging_exit - free debugging data. - * @c: UBIFS file-system description object - */ -void ubifs_debugging_exit(struct ubifs_info *c) -{ - kfree(c->dbg); -} - #endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/trunk/fs/ubifs/debug.h b/trunk/fs/ubifs/debug.h index 45174b534377..a811ac4a26bb 100644 --- a/trunk/fs/ubifs/debug.h +++ b/trunk/fs/ubifs/debug.h @@ -31,25 +31,18 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, #ifdef CONFIG_UBIFS_FS_DEBUG -/* - * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi" - * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte. - */ -#define UBIFS_DFS_DIR_NAME "ubi%d_%d" -#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1) +#include /** * ubifs_debug_info - per-FS debugging information. * @old_zroot: old index root - used by 'dbg_check_old_index()' * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' - * - * @pc_happened: non-zero if an emulated power cut happened - * @pc_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls - * @pc_timeout: time in jiffies when delay of failure mode expires - * @pc_cnt: current number of calls to failure mode I/O functions - * @pc_cnt_max: number of calls by which to delay failure mode - * + * @failure_mode: failure mode for recovery testing + * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls + * @fail_timeout: time in jiffies when delay of failure mode expires + * @fail_cnt: current number of calls to failure mode I/O functions + * @fail_cnt_max: number of calls by which to delay failure mode * @chk_lpt_sz: used by LPT tree size checker * @chk_lpt_sz2: used by LPT tree size checker * @chk_lpt_wastage: used by LPT tree size checker @@ -63,36 +56,21 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, * @saved_free: saved amount of free space * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt * - * @chk_gen: if general extra checks are enabled - * @chk_index: if index xtra checks are enabled - * @chk_orph: if orphans extra checks are enabled - * @chk_lprops: if lprops extra checks are enabled - * @chk_fs: if UBIFS contents extra checks are enabled - * @tst_rcvry: if UBIFS recovery testing mode enabled - * * @dfs_dir_name: name of debugfs directory containing this file-system's files * @dfs_dir: direntry object of the file-system debugfs directory * @dfs_dump_lprops: "dump lprops" debugfs knob * @dfs_dump_budg: "dump budgeting information" debugfs knob * @dfs_dump_tnc: "dump TNC" debugfs knob - * @dfs_chk_gen: debugfs knob to enable UBIFS general extra checks - * @dfs_chk_index: debugfs knob to enable UBIFS index extra checks - * @dfs_chk_orph: debugfs knob to enable UBIFS orphans extra checks - * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks - * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks - * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing */ struct ubifs_debug_info { struct ubifs_zbranch old_zroot; int old_zroot_level; unsigned long long old_zroot_sqnum; - - int pc_happened; - int pc_delay; - unsigned long pc_timeout; - unsigned int pc_cnt; - unsigned int pc_cnt_max; - + int failure_mode; + int fail_delay; + unsigned long fail_timeout; + unsigned int fail_cnt; + unsigned int fail_cnt_max; long long chk_lpt_sz; long long chk_lpt_sz2; long long chk_lpt_wastage; @@ -106,43 +84,11 @@ struct ubifs_debug_info { long long saved_free; int saved_idx_gc_cnt; - unsigned int chk_gen:1; - unsigned int chk_index:1; - unsigned int chk_orph:1; - unsigned int chk_lprops:1; - unsigned int chk_fs:1; - unsigned int tst_rcvry:1; - - char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1]; + char dfs_dir_name[100]; struct dentry *dfs_dir; struct dentry *dfs_dump_lprops; struct dentry *dfs_dump_budg; struct dentry *dfs_dump_tnc; - struct dentry *dfs_chk_gen; - struct dentry *dfs_chk_index; - struct dentry *dfs_chk_orph; - struct dentry *dfs_chk_lprops; - struct dentry *dfs_chk_fs; - struct dentry *dfs_tst_rcvry; -}; - -/** - * ubifs_global_debug_info - global (not per-FS) UBIFS debugging information. - * - * @chk_gen: if general extra checks are enabled - * @chk_index: if index xtra checks are enabled - * @chk_orph: if orphans extra checks are enabled - * @chk_lprops: if lprops extra checks are enabled - * @chk_fs: if UBIFS contents extra checks are enabled - * @tst_rcvry: if UBIFS recovery testing mode enabled - */ -struct ubifs_global_debug_info { - unsigned int chk_gen:1; - unsigned int chk_index:1; - unsigned int chk_orph:1; - unsigned int chk_lprops:1; - unsigned int chk_fs:1; - unsigned int tst_rcvry:1; }; #define ubifs_assert(expr) do { \ @@ -181,8 +127,6 @@ const char *dbg_key_str1(const struct ubifs_info *c, #define DBGKEY(key) dbg_key_str0(c, (key)) #define DBGKEY1(key) dbg_key_str1(c, (key)) -extern spinlock_t dbg_lock; - #define ubifs_dbg_msg(type, fmt, ...) do { \ spin_lock(&dbg_lock); \ pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \ @@ -218,36 +162,41 @@ extern spinlock_t dbg_lock; /* Additional recovery messages */ #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) -extern struct ubifs_global_debug_info ubifs_dbg; +/* + * Debugging check flags. + * + * UBIFS_CHK_GEN: general checks + * UBIFS_CHK_TNC: check TNC + * UBIFS_CHK_IDX_SZ: check index size + * UBIFS_CHK_ORPH: check orphans + * UBIFS_CHK_OLD_IDX: check the old index + * UBIFS_CHK_LPROPS: check lprops + * UBIFS_CHK_FS: check the file-system + */ +enum { + UBIFS_CHK_GEN = 0x1, + UBIFS_CHK_TNC = 0x2, + UBIFS_CHK_IDX_SZ = 0x4, + UBIFS_CHK_ORPH = 0x8, + UBIFS_CHK_OLD_IDX = 0x10, + UBIFS_CHK_LPROPS = 0x20, + UBIFS_CHK_FS = 0x40, +}; -static inline int dbg_is_chk_gen(const struct ubifs_info *c) -{ - return !!(ubifs_dbg.chk_gen || c->dbg->chk_gen); -} -static inline int dbg_is_chk_index(const struct ubifs_info *c) -{ - return !!(ubifs_dbg.chk_index || c->dbg->chk_index); -} -static inline int dbg_is_chk_orph(const struct ubifs_info *c) -{ - return !!(ubifs_dbg.chk_orph || c->dbg->chk_orph); -} -static inline int dbg_is_chk_lprops(const struct ubifs_info *c) -{ - return !!(ubifs_dbg.chk_lprops || c->dbg->chk_lprops); -} -static inline int dbg_is_chk_fs(const struct ubifs_info *c) -{ - return !!(ubifs_dbg.chk_fs || c->dbg->chk_fs); -} -static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) -{ - return !!(ubifs_dbg.tst_rcvry || c->dbg->tst_rcvry); -} -static inline int dbg_is_power_cut(const struct ubifs_info *c) -{ - return !!c->dbg->pc_happened; -} +/* + * Special testing flags. + * + * UBIFS_TST_RCVRY: failure mode for recovery testing + */ +enum { + UBIFS_TST_RCVRY = 0x4, +}; + +extern spinlock_t dbg_lock; + +extern unsigned int ubifs_msg_flags; +extern unsigned int ubifs_chk_flags; +extern unsigned int ubifs_tst_flags; int ubifs_debugging_init(struct ubifs_info *c); void ubifs_debugging_exit(struct ubifs_info *c); @@ -258,7 +207,7 @@ const char *dbg_cstate(int cmt_state); const char *dbg_jhead(int jhead); const char *dbg_get_key_dump(const struct ubifs_info *c, const union ubifs_key *key); -void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode); +void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); void dbg_dump_node(const struct ubifs_info *c, const void *node); void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, int offs); @@ -291,8 +240,8 @@ int dbg_check_cats(struct ubifs_info *c); int dbg_check_ltab(struct ubifs_info *c); int dbg_chk_lpt_free_spc(struct ubifs_info *c); int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len); -int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode); -int dbg_check_dir(struct ubifs_info *c, const struct inode *dir); +int dbg_check_synced_i_size(struct inode *inode); +int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); int dbg_check_tnc(struct ubifs_info *c, int extra); int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); int dbg_check_filesystem(struct ubifs_info *c); @@ -305,12 +254,54 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head); int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); -int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, - int len, int dtype); -int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, - int dtype); -int dbg_leb_unmap(struct ubifs_info *c, int lnum); -int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype); +/* Force the use of in-the-gaps method for testing */ +static inline int dbg_force_in_the_gaps_enabled(void) +{ + return ubifs_chk_flags & UBIFS_CHK_GEN; +} +int dbg_force_in_the_gaps(void); + +/* Failure mode for recovery testing */ +#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) + +#ifndef UBIFS_DBG_PRESERVE_UBI +#define ubi_leb_read dbg_leb_read +#define ubi_leb_write dbg_leb_write +#define ubi_leb_change dbg_leb_change +#define ubi_leb_erase dbg_leb_erase +#define ubi_leb_unmap dbg_leb_unmap +#define ubi_is_mapped dbg_is_mapped +#define ubi_leb_map dbg_leb_map +#endif + +int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, + int len, int check); +int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, + int offset, int len, int dtype); +int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, + int len, int dtype); +int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum); +int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum); +int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum); +int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); + +static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf, + int offset, int len) +{ + return dbg_leb_read(desc, lnum, buf, offset, len, 0); +} + +static inline int dbg_write(struct ubi_volume_desc *desc, int lnum, + const void *buf, int offset, int len) +{ + return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN); +} + +static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, + const void *buf, int len) +{ + return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); +} /* Debugfs-related stuff */ int dbg_debugfs_init(void); @@ -322,7 +313,7 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); /* Use "if (0)" to make compiler check arguments even if debugging is off */ #define ubifs_assert(expr) do { \ - if (0) \ + if (0 && (expr)) \ printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ __func__, __LINE__, current->pid); \ } while (0) @@ -332,9 +323,6 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); ubifs_err(fmt, ##__VA_ARGS__); \ } while (0) -#define DBGKEY(key) ((char *)(key)) -#define DBGKEY1(key) ((char *)(key)) - #define ubifs_dbg_msg(fmt, ...) do { \ if (0) \ pr_debug(fmt "\n", ##__VA_ARGS__); \ @@ -358,6 +346,9 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); #define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) #define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) +#define DBGKEY(key) ((char *)(key)) +#define DBGKEY1(key) ((char *)(key)) + static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; } static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; } static inline const char *dbg_ntype(int type) { return ""; } @@ -366,7 +357,7 @@ static inline const char *dbg_jhead(int jhead) { return ""; } static inline const char * dbg_get_key_dump(const struct ubifs_info *c, const union ubifs_key *key) { return ""; } -static inline void dbg_dump_inode(struct ubifs_info *c, +static inline void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) { return; } static inline void dbg_dump_node(const struct ubifs_info *c, const void *node) { return; } @@ -418,11 +409,9 @@ static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; } static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; } static inline int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) { return 0; } -static inline int -dbg_check_synced_i_size(const struct ubifs_info *c, - struct inode *inode) { return 0; } -static inline int dbg_check_dir(struct ubifs_info *c, - const struct inode *dir) { return 0; } +static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; } +static inline int dbg_check_dir_size(struct ubifs_info *c, + const struct inode *dir) { return 0; } static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; } static inline int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) { return 0; } @@ -442,23 +431,9 @@ static inline int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) { return 0; } -static inline int dbg_leb_write(struct ubifs_info *c, int lnum, - const void *buf, int offset, - int len, int dtype) { return 0; } -static inline int dbg_leb_change(struct ubifs_info *c, int lnum, - const void *buf, int len, - int dtype) { return 0; } -static inline int dbg_leb_unmap(struct ubifs_info *c, int lnum) { return 0; } -static inline int dbg_leb_map(struct ubifs_info *c, int lnum, - int dtype) { return 0; } - -static inline int dbg_is_chk_gen(const struct ubifs_info *c) { return 0; } -static inline int dbg_is_chk_index(const struct ubifs_info *c) { return 0; } -static inline int dbg_is_chk_orph(const struct ubifs_info *c) { return 0; } -static inline int dbg_is_chk_lprops(const struct ubifs_info *c) { return 0; } -static inline int dbg_is_chk_fs(const struct ubifs_info *c) { return 0; } -static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) { return 0; } -static inline int dbg_is_power_cut(const struct ubifs_info *c) { return 0; } +static inline int dbg_force_in_the_gaps(void) { return 0; } +#define dbg_force_in_the_gaps_enabled() 0 +#define dbg_failure_mode 0 static inline int dbg_debugfs_init(void) { return 0; } static inline void dbg_debugfs_exit(void) { return; } diff --git a/trunk/fs/ubifs/dir.c b/trunk/fs/ubifs/dir.c index 683492043317..ef5abd38f0bf 100644 --- a/trunk/fs/ubifs/dir.c +++ b/trunk/fs/ubifs/dir.c @@ -102,7 +102,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, * UBIFS has to fully control "clean <-> dirty" transitions of inodes * to make budgeting work. */ - inode->i_flags |= S_NOCMTIME; + inode->i_flags |= (S_NOCMTIME); inode_init_owner(inode, dir, mode); inode->i_mtime = inode->i_atime = inode->i_ctime = @@ -172,11 +172,9 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, #ifdef CONFIG_UBIFS_FS_DEBUG -static int dbg_check_name(const struct ubifs_info *c, - const struct ubifs_dent_node *dent, - const struct qstr *nm) +static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) { - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; if (le16_to_cpu(dent->nlen) != nm->len) return -EINVAL; @@ -187,7 +185,7 @@ static int dbg_check_name(const struct ubifs_info *c, #else -#define dbg_check_name(c, dent, nm) 0 +#define dbg_check_name(dent, nm) 0 #endif @@ -221,7 +219,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - if (dbg_check_name(c, dent, &dentry->d_name)) { + if (dbg_check_name(dent, &dentry->d_name)) { err = -EINVAL; goto out; } @@ -524,7 +522,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); - err = dbg_check_synced_i_size(c, inode); + err = dbg_check_synced_i_size(inode); if (err) return err; @@ -579,7 +577,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) inode->i_nlink, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); - err = dbg_check_synced_i_size(c, inode); + err = dbg_check_synced_i_size(inode); if (err) return err; diff --git a/trunk/fs/ubifs/file.c b/trunk/fs/ubifs/file.c index 7cf738a4544d..5e7fccfc4b29 100644 --- a/trunk/fs/ubifs/file.c +++ b/trunk/fs/ubifs/file.c @@ -1263,7 +1263,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; - err = dbg_check_synced_i_size(c, inode); + err = dbg_check_synced_i_size(inode); if (err) return err; diff --git a/trunk/fs/ubifs/io.c b/trunk/fs/ubifs/io.c index 9228950a658f..3be645e012c9 100644 --- a/trunk/fs/ubifs/io.c +++ b/trunk/fs/ubifs/io.c @@ -86,125 +86,8 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) c->no_chk_data_crc = 0; c->vfs_sb->s_flags |= MS_RDONLY; ubifs_warn("switched to read-only mode, error %d", err); - dump_stack(); - } -} - -/* - * Below are simple wrappers over UBI I/O functions which include some - * additional checks and UBIFS debugging stuff. See corresponding UBI function - * for more information. - */ - -int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, - int len, int even_ebadmsg) -{ - int err; - - err = ubi_read(c->ubi, lnum, buf, offs, len); - /* - * In case of %-EBADMSG print the error message only if the - * @even_ebadmsg is true. - */ - if (err && (err != -EBADMSG || even_ebadmsg)) { - ubifs_err("reading %d bytes from LEB %d:%d failed, error %d", - len, lnum, offs, err); - dbg_dump_stack(); - } - return err; -} - -int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, - int len, int dtype) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - if (!dbg_is_tst_rcvry(c)) - err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); - else - err = dbg_leb_write(c, lnum, buf, offs, len, dtype); - if (err) { - ubifs_err("writing %d bytes to LEB %d:%d failed, error %d", - len, lnum, offs, err); - ubifs_ro_mode(c, err); - dbg_dump_stack(); - } - return err; -} - -int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, - int dtype) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - if (!dbg_is_tst_rcvry(c)) - err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); - else - err = dbg_leb_change(c, lnum, buf, len, dtype); - if (err) { - ubifs_err("changing %d bytes in LEB %d failed, error %d", - len, lnum, err); - ubifs_ro_mode(c, err); - dbg_dump_stack(); - } - return err; -} - -int ubifs_leb_unmap(struct ubifs_info *c, int lnum) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - if (!dbg_is_tst_rcvry(c)) - err = ubi_leb_unmap(c->ubi, lnum); - else - err = dbg_leb_unmap(c, lnum); - if (err) { - ubifs_err("unmap LEB %d failed, error %d", lnum, err); - ubifs_ro_mode(c, err); - dbg_dump_stack(); - } - return err; -} - -int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - if (!dbg_is_tst_rcvry(c)) - err = ubi_leb_map(c->ubi, lnum, dtype); - else - err = dbg_leb_map(c, lnum, dtype); - if (err) { - ubifs_err("mapping LEB %d failed, error %d", lnum, err); - ubifs_ro_mode(c, err); - dbg_dump_stack(); - } - return err; -} - -int ubifs_is_mapped(const struct ubifs_info *c, int lnum) -{ - int err; - - err = ubi_is_mapped(c->ubi, lnum); - if (err < 0) { - ubifs_err("ubi_is_mapped failed for LEB %d, error %d", - lnum, err); dbg_dump_stack(); } - return err; } /** @@ -523,10 +406,14 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) dirt = sync_len - wbuf->used; if (dirt) ubifs_pad(c, wbuf->buf + wbuf->used, dirt); - err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len, - wbuf->dtype); - if (err) + err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, + sync_len, wbuf->dtype); + if (err) { + ubifs_err("cannot write %d bytes to LEB %d:%d", + sync_len, wbuf->lnum, wbuf->offs); + dbg_dump_stack(); return err; + } spin_lock(&wbuf->lock); wbuf->offs += sync_len; @@ -718,9 +605,9 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) if (aligned_len == wbuf->avail) { dbg_io("flush jhead %s wbuf to LEB %d:%d", dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); - err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, - wbuf->offs, wbuf->size, - wbuf->dtype); + err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, + wbuf->offs, wbuf->size, + wbuf->dtype); if (err) goto out; @@ -755,8 +642,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) dbg_io("flush jhead %s wbuf to LEB %d:%d", dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); - err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, - wbuf->size, wbuf->dtype); + err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, + wbuf->size, wbuf->dtype); if (err) goto out; @@ -774,8 +661,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) */ dbg_io("write %d bytes to LEB %d:%d", wbuf->size, wbuf->lnum, wbuf->offs); - err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs, - wbuf->size, wbuf->dtype); + err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs, + wbuf->size, wbuf->dtype); if (err) goto out; @@ -796,8 +683,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) n <<= c->max_write_shift; dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, wbuf->offs); - err = ubifs_leb_write(c, wbuf->lnum, buf + written, - wbuf->offs, n, wbuf->dtype); + err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, + wbuf->offs, n, wbuf->dtype); if (err) goto out; wbuf->offs += n; @@ -879,9 +766,13 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, return -EROFS; ubifs_prepare_node(c, buf, len, 1); - err = ubifs_leb_write(c, lnum, buf, offs, buf_len, dtype); - if (err) + err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); + if (err) { + ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", + buf_len, lnum, offs, err); dbg_dump_node(c, buf); + dbg_dump_stack(); + } return err; } @@ -933,9 +824,13 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, if (rlen > 0) { /* Read everything that goes before write-buffer */ - err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0); - if (err && err != -EBADMSG) + err = ubi_read(c->ubi, lnum, buf, offs, rlen); + if (err && err != -EBADMSG) { + ubifs_err("failed to read node %d from LEB %d:%d, " + "error %d", type, lnum, offs, err); + dbg_dump_stack(); return err; + } } if (type != ch->node_type) { @@ -990,9 +885,12 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, ubifs_assert(!(offs & 7) && offs < c->leb_size); ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); - err = ubifs_leb_read(c, lnum, buf, offs, len, 0); - if (err && err != -EBADMSG) + err = ubi_read(c->ubi, lnum, buf, offs, len); + if (err && err != -EBADMSG) { + ubifs_err("cannot read node %d from LEB %d:%d, error %d", + type, lnum, offs, err); return err; + } if (type != ch->node_type) { ubifs_err("bad node type (%d but expected %d)", diff --git a/trunk/fs/ubifs/log.c b/trunk/fs/ubifs/log.c index f9fd068d1ae0..affea9494ae2 100644 --- a/trunk/fs/ubifs/log.c +++ b/trunk/fs/ubifs/log.c @@ -262,7 +262,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) * an unclean reboot, because the target LEB might have been * unmapped, but not yet physically erased. */ - err = ubifs_leb_map(c, bud->lnum, UBI_SHORTTERM); + err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM); if (err) goto out_unlock; } @@ -283,6 +283,8 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) return 0; out_unlock: + if (err != -EAGAIN) + ubifs_ro_mode(c, err); mutex_unlock(&c->log_mutex); kfree(ref); kfree(bud); @@ -750,7 +752,7 @@ static int dbg_check_bud_bytes(struct ubifs_info *c) struct ubifs_bud *bud; long long bud_bytes = 0; - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; spin_lock(&c->buds_lock); diff --git a/trunk/fs/ubifs/lprops.c b/trunk/fs/ubifs/lprops.c index f8a181e647cc..667884f4a615 100644 --- a/trunk/fs/ubifs/lprops.c +++ b/trunk/fs/ubifs/lprops.c @@ -504,7 +504,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) pnode = (struct ubifs_pnode *)container_of(lprops - pos, struct ubifs_pnode, lprops[0]); - return !test_bit(COW_CNODE, &pnode->flags) && + return !test_bit(COW_ZNODE, &pnode->flags) && test_bit(DIRTY_CNODE, &pnode->flags); } @@ -860,7 +860,7 @@ int dbg_check_cats(struct ubifs_info *c) struct list_head *pos; int i, cat; - if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) return 0; list_for_each_entry(lprops, &c->empty_list, list) { @@ -958,7 +958,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, { int i = 0, j, err = 0; - if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) return; for (i = 0; i < heap->cnt; i++) { @@ -1262,7 +1262,7 @@ int dbg_check_lprops(struct ubifs_info *c) int i, err; struct ubifs_lp_stats lst; - if (!dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) return 0; /* diff --git a/trunk/fs/ubifs/lpt.c b/trunk/fs/ubifs/lpt.c index 6189c74d97f0..ef5155e109a2 100644 --- a/trunk/fs/ubifs/lpt.c +++ b/trunk/fs/ubifs/lpt.c @@ -701,8 +701,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, alen = ALIGN(len, c->min_io_size); set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubifs_leb_change(c, lnum++, buf, alen, - UBI_SHORTTERM); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, + UBI_SHORTTERM); if (err) goto out; p = buf; @@ -732,8 +732,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubifs_leb_change(c, lnum++, buf, alen, - UBI_SHORTTERM); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, + UBI_SHORTTERM); if (err) goto out; p = buf; @@ -780,8 +780,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, alen = ALIGN(len, c->min_io_size); set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubifs_leb_change(c, lnum++, buf, alen, - UBI_SHORTTERM); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, + UBI_SHORTTERM); if (err) goto out; p = buf; @@ -806,7 +806,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, alen = ALIGN(len, c->min_io_size); set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubifs_leb_change(c, lnum++, buf, alen, UBI_SHORTTERM); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM); if (err) goto out; p = buf; @@ -826,7 +826,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, /* Write remaining buffer */ memset(p, 0xff, alen - len); - err = ubifs_leb_change(c, lnum, buf, alen, UBI_SHORTTERM); + err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM); if (err) goto out; @@ -1222,7 +1222,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) if (c->big_lpt) nnode->num = calc_nnode_num_from_parent(c, parent, iip); } else { - err = ubifs_leb_read(c, lnum, buf, offs, c->nnode_sz, 1); + err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); if (err) goto out; err = ubifs_unpack_nnode(c, buf, nnode); @@ -1247,7 +1247,6 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) out: ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); - dbg_dump_stack(); kfree(nnode); return err; } @@ -1291,7 +1290,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) lprops->flags = ubifs_categorize_lprops(c, lprops); } } else { - err = ubifs_leb_read(c, lnum, buf, offs, c->pnode_sz, 1); + err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); if (err) goto out; err = unpack_pnode(c, buf, pnode); @@ -1313,7 +1312,6 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) out: ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); dbg_dump_pnode(c, pnode, parent, iip); - dbg_dump_stack(); dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); kfree(pnode); return err; @@ -1333,7 +1331,7 @@ static int read_ltab(struct ubifs_info *c) buf = vmalloc(c->ltab_sz); if (!buf) return -ENOMEM; - err = ubifs_leb_read(c, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz, 1); + err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); if (err) goto out; err = unpack_ltab(c, buf); @@ -1356,8 +1354,7 @@ static int read_lsave(struct ubifs_info *c) buf = vmalloc(c->lsave_sz); if (!buf) return -ENOMEM; - err = ubifs_leb_read(c, c->lsave_lnum, buf, c->lsave_offs, - c->lsave_sz, 1); + err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz); if (err) goto out; err = unpack_lsave(c, buf); @@ -1817,8 +1814,8 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c, if (c->big_lpt) nnode->num = calc_nnode_num_from_parent(c, parent, iip); } else { - err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, - c->nnode_sz, 1); + err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, + c->nnode_sz); if (err) return ERR_PTR(err); err = ubifs_unpack_nnode(c, buf, nnode); @@ -1886,8 +1883,8 @@ static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c, ubifs_assert(branch->lnum >= c->lpt_first && branch->lnum <= c->lpt_last); ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size); - err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, - c->pnode_sz, 1); + err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, + c->pnode_sz); if (err) return ERR_PTR(err); err = unpack_pnode(c, buf, pnode); @@ -2227,7 +2224,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, struct ubifs_cnode *cn; int num, iip = 0, err; - if (!dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) return 0; while (cnode) { diff --git a/trunk/fs/ubifs/lpt_commit.c b/trunk/fs/ubifs/lpt_commit.c index cddd6bd214f4..dfcb5748a7dc 100644 --- a/trunk/fs/ubifs/lpt_commit.c +++ b/trunk/fs/ubifs/lpt_commit.c @@ -27,7 +27,6 @@ #include #include -#include #include "ubifs.h" #ifdef CONFIG_UBIFS_FS_DEBUG @@ -117,8 +116,8 @@ static int get_cnodes_to_commit(struct ubifs_info *c) return 0; cnt += 1; while (1) { - ubifs_assert(!test_bit(COW_CNODE, &cnode->flags)); - __set_bit(COW_CNODE, &cnode->flags); + ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags)); + __set_bit(COW_ZNODE, &cnode->flags); cnext = next_dirty_cnode(cnode); if (!cnext) { cnode->cnext = c->lpt_cnext; @@ -466,7 +465,7 @@ static int write_cnodes(struct ubifs_info *c) */ clear_bit(DIRTY_CNODE, &cnode->flags); smp_mb__before_clear_bit(); - clear_bit(COW_CNODE, &cnode->flags); + clear_bit(COW_ZNODE, &cnode->flags); smp_mb__after_clear_bit(); offs += len; dbg_chk_lpt_sz(c, 1, len); @@ -1161,11 +1160,11 @@ static int lpt_gc_lnum(struct ubifs_info *c, int lnum) void *buf = c->lpt_buf; dbg_lp("LEB %d", lnum); - - err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); - if (err) + err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); + if (err) { + ubifs_err("cannot read LEB %d, error %d", lnum, err); return err; - + } while (1) { if (!is_a_node(c, buf, len)) { int pad_len; @@ -1641,7 +1640,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) int ret; void *buf, *p; - if (!dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) return 0; buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); @@ -1651,11 +1650,11 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) } dbg_lp("LEB %d", lnum); - - err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); - if (err) + err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); + if (err) { + dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); goto out; - + } while (1) { if (!is_a_node(c, p, len)) { int i, pad_len; @@ -1712,7 +1711,7 @@ int dbg_check_ltab(struct ubifs_info *c) { int lnum, err, i, cnt; - if (!dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) return 0; /* Bring the entire tree into memory */ @@ -1755,7 +1754,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) long long free = 0; int i; - if (!dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) return 0; for (i = 0; i < c->lpt_lebs; i++) { @@ -1797,7 +1796,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) long long chk_lpt_sz, lpt_sz; int err = 0; - if (!dbg_is_chk_lprops(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) return 0; switch (action) { @@ -1902,10 +1901,11 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) return; } - err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); - if (err) + err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); + if (err) { + ubifs_err("cannot read LEB %d, error %d", lnum, err); goto out; - + } while (1) { offs = c->leb_size - len; if (!is_a_node(c, p, len)) { @@ -2019,7 +2019,7 @@ static int dbg_populate_lsave(struct ubifs_info *c) struct ubifs_lpt_heap *heap; int i; - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; if (random32() & 3) return 0; diff --git a/trunk/fs/ubifs/misc.h b/trunk/fs/ubifs/misc.h index ee7cb5ebb6e8..0b5296a9a4c5 100644 --- a/trunk/fs/ubifs/misc.h +++ b/trunk/fs/ubifs/misc.h @@ -38,29 +38,6 @@ static inline int ubifs_zn_dirty(const struct ubifs_znode *znode) return !!test_bit(DIRTY_ZNODE, &znode->flags); } -/** - * ubifs_zn_obsolete - check if znode is obsolete. - * @znode: znode to check - * - * This helper function returns %1 if @znode is obsolete and %0 otherwise. - */ -static inline int ubifs_zn_obsolete(const struct ubifs_znode *znode) -{ - return !!test_bit(OBSOLETE_ZNODE, &znode->flags); -} - -/** - * ubifs_zn_cow - check if znode has to be copied on write. - * @znode: znode to check - * - * This helper function returns %1 if @znode is has COW flag set and %0 - * otherwise. - */ -static inline int ubifs_zn_cow(const struct ubifs_znode *znode) -{ - return !!test_bit(COW_ZNODE, &znode->flags); -} - /** * ubifs_wake_up_bgt - wake up background thread. * @c: UBIFS file-system description object @@ -144,6 +121,86 @@ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) return err; } +/** + * ubifs_leb_unmap - unmap an LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to unmap + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + err = ubi_leb_unmap(c->ubi, lnum); + if (err) { + ubifs_err("unmap LEB %d failed, error %d", lnum, err); + return err; + } + + return 0; +} + +/** + * ubifs_leb_write - write to a LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to write + * @buf: buffer to write from + * @offs: offset within LEB to write to + * @len: length to write + * @dtype: data type + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, + const void *buf, int offs, int len, int dtype) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); + if (err) { + ubifs_err("writing %d bytes at %d:%d, error %d", + len, lnum, offs, err); + return err; + } + + return 0; +} + +/** + * ubifs_leb_change - atomic LEB change. + * @c: UBIFS file-system description object + * @lnum: LEB number to write + * @buf: buffer to write from + * @len: length to write + * @dtype: data type + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, + const void *buf, int len, int dtype) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); + if (err) { + ubifs_err("changing %d bytes in LEB %d, error %d", + len, lnum, err); + return err; + } + + return 0; +} + /** * ubifs_encode_dev - encode device node IDs. * @dev: UBIFS device node information diff --git a/trunk/fs/ubifs/orphan.c b/trunk/fs/ubifs/orphan.c index c542c73cfa3c..a5422fffbd69 100644 --- a/trunk/fs/ubifs/orphan.c +++ b/trunk/fs/ubifs/orphan.c @@ -929,7 +929,7 @@ static int dbg_check_orphans(struct ubifs_info *c) struct check_info ci; int err; - if (!dbg_is_chk_orph(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_ORPH)) return 0; ci.last_ino = 0; diff --git a/trunk/fs/ubifs/recovery.c b/trunk/fs/ubifs/recovery.c index af02790d9328..783d8e0beb76 100644 --- a/trunk/fs/ubifs/recovery.c +++ b/trunk/fs/ubifs/recovery.c @@ -117,7 +117,7 @@ static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, if (!sbuf) return -ENOMEM; - err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0); + err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); if (err && err != -EBADMSG) goto out_free; @@ -213,10 +213,10 @@ static int write_rcvrd_mst_node(struct ubifs_info *c, mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY); ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); - err = ubifs_leb_change(c, lnum, mst, sz, UBI_SHORTTERM); + err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); if (err) goto out; - err = ubifs_leb_change(c, lnum + 1, mst, sz, UBI_SHORTTERM); + err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); if (err) goto out; out: @@ -274,8 +274,7 @@ int ubifs_recover_master_node(struct ubifs_info *c) if (cor1) goto out_err; mst = mst1; - } else if (offs1 == 0 && - c->leb_size - offs2 - sz < sz) { + } else if (offs1 == 0 && offs2 + sz >= c->leb_size) { /* 1st LEB was unmapped and written, 2nd not */ if (cor1) goto out_err; @@ -540,8 +539,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, int len = ALIGN(endpt, c->min_io_size); if (start) { - err = ubifs_leb_read(c, lnum, sleb->buf, 0, - start, 1); + err = ubi_read(c->ubi, lnum, sleb->buf, 0, + start); if (err) return err; } @@ -555,8 +554,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ubifs_pad(c, buf, pad_len); } } - err = ubifs_leb_change(c, lnum, sleb->buf, len, - UBI_UNKNOWN); + err = ubi_leb_change(c->ubi, lnum, sleb->buf, len, + UBI_UNKNOWN); if (err) return err; } @@ -820,8 +819,7 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, return -ENOMEM; if (c->leb_size - offs < UBIFS_CS_NODE_SZ) goto out_err; - err = ubifs_leb_read(c, lnum, (void *)cs_node, offs, - UBIFS_CS_NODE_SZ, 0); + err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); if (err && err != -EBADMSG) goto out_free; ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); @@ -921,7 +919,8 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, * * This function returns %0 on success and a negative error code on failure. */ -static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf) +static int recover_head(const struct ubifs_info *c, int lnum, int offs, + void *sbuf) { int len = c->max_write_size, err; @@ -932,15 +931,15 @@ static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf) return 0; /* Read at the head location and check it is empty flash */ - err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1); + err = ubi_read(c->ubi, lnum, sbuf, offs, len); if (err || !is_empty(sbuf, len)) { dbg_rcvry("cleaning head at %d:%d", lnum, offs); if (offs == 0) return ubifs_leb_unmap(c, lnum); - err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1); + err = ubi_read(c->ubi, lnum, sbuf, 0, offs); if (err) return err; - return ubifs_leb_change(c, lnum, sbuf, offs, UBI_UNKNOWN); + return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); } return 0; @@ -963,7 +962,7 @@ static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf) * * This function returns %0 on success and a negative error code on failure. */ -int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf) +int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) { int err; @@ -994,7 +993,7 @@ int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf) * * This function returns %0 on success and a negative error code on failure. */ -static int clean_an_unclean_leb(struct ubifs_info *c, +static int clean_an_unclean_leb(const struct ubifs_info *c, struct ubifs_unclean_leb *ucleb, void *sbuf) { int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; @@ -1010,7 +1009,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c, return 0; } - err = ubifs_leb_read(c, lnum, buf, offs, len, 0); + err = ubi_read(c->ubi, lnum, buf, offs, len); if (err && err != -EBADMSG) return err; @@ -1070,7 +1069,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c, } /* Write back the LEB atomically */ - err = ubifs_leb_change(c, lnum, sbuf, len, UBI_UNKNOWN); + err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); if (err) return err; @@ -1090,7 +1089,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c, * * This function returns %0 on success and a negative error code on failure. */ -int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf) +int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) { dbg_rcvry("recovery"); while (!list_empty(&c->unclean_leb_list)) { @@ -1455,7 +1454,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) if (i_size >= e->d_size) return 0; /* Read the LEB */ - err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1); + err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size); if (err) goto out; /* Change the size field and recalculate the CRC */ @@ -1471,7 +1470,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) len -= 1; len = ALIGN(len + 1, c->min_io_size); /* Atomically write the fixed LEB back again */ - err = ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN); + err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); if (err) goto out; dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", diff --git a/trunk/fs/ubifs/replay.c b/trunk/fs/ubifs/replay.c index ccabaf1164b3..5e97161ce4d3 100644 --- a/trunk/fs/ubifs/replay.c +++ b/trunk/fs/ubifs/replay.c @@ -523,7 +523,8 @@ static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) if (!list_is_last(&next->list, &jh->buds_list)) return 0; - err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); + err = ubi_read(c->ubi, next->lnum, (char *)&data, + next->start, 4); if (err) return 0; diff --git a/trunk/fs/ubifs/sb.c b/trunk/fs/ubifs/sb.c index 93d938ad3d2a..c606f010e8df 100644 --- a/trunk/fs/ubifs/sb.c +++ b/trunk/fs/ubifs/sb.c @@ -674,15 +674,15 @@ static int fixup_leb(struct ubifs_info *c, int lnum, int len) if (len == 0) { dbg_mnt("unmap empty LEB %d", lnum); - return ubifs_leb_unmap(c, lnum); + return ubi_leb_unmap(c->ubi, lnum); } dbg_mnt("fixup LEB %d, data len %d", lnum, len); - err = ubifs_leb_read(c, lnum, c->sbuf, 0, len, 1); + err = ubi_read(c->ubi, lnum, c->sbuf, 0, len); if (err) return err; - return ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN); + return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); } /** diff --git a/trunk/fs/ubifs/scan.c b/trunk/fs/ubifs/scan.c index 37383e8011b1..36216b46f772 100644 --- a/trunk/fs/ubifs/scan.c +++ b/trunk/fs/ubifs/scan.c @@ -148,7 +148,7 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, INIT_LIST_HEAD(&sleb->nodes); sleb->buf = sbuf; - err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0); + err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); if (err && err != -EBADMSG) { ubifs_err("cannot read %d bytes from LEB %d:%d," " error %d", c->leb_size - offs, lnum, offs, err); @@ -240,7 +240,7 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, int len; ubifs_err("corruption at LEB %d:%d", lnum, offs); - if (dbg_is_tst_rcvry(c)) + if (dbg_failure_mode) return; len = c->leb_size - offs; if (len > 8192) diff --git a/trunk/fs/ubifs/super.c b/trunk/fs/ubifs/super.c index b28121278d46..529be0582029 100644 --- a/trunk/fs/ubifs/super.c +++ b/trunk/fs/ubifs/super.c @@ -85,7 +85,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) return 4; - if (ui->xattr && !S_ISREG(inode->i_mode)) + if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG) return 5; if (!ubifs_compr_present(ui->compr_type)) { @@ -94,7 +94,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) ubifs_compr_name(ui->compr_type)); } - err = dbg_check_dir(c, inode); + err = dbg_check_dir_size(c, inode); return err; } @@ -914,7 +914,7 @@ static int check_volume_empty(struct ubifs_info *c) c->empty = 1; for (lnum = 0; lnum < c->leb_cnt; lnum++) { - err = ubifs_is_mapped(c, lnum); + err = ubi_is_mapped(c->ubi, lnum); if (unlikely(err < 0)) return err; if (err == 1) { diff --git a/trunk/fs/ubifs/tnc.c b/trunk/fs/ubifs/tnc.c index 066738647685..91b4213dde84 100644 --- a/trunk/fs/ubifs/tnc.c +++ b/trunk/fs/ubifs/tnc.c @@ -223,7 +223,7 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c, __set_bit(DIRTY_ZNODE, &zn->flags); __clear_bit(COW_ZNODE, &zn->flags); - ubifs_assert(!ubifs_zn_obsolete(znode)); + ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); __set_bit(OBSOLETE_ZNODE, &znode->flags); if (znode->level != 0) { @@ -271,7 +271,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, struct ubifs_znode *zn; int err; - if (!ubifs_zn_cow(znode)) { + if (!test_bit(COW_ZNODE, &znode->flags)) { /* znode is not being committed */ if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { atomic_long_inc(&c->dirty_zn_cnt); @@ -462,7 +462,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); - err = ubifs_leb_read(c, lnum, buf, offs, len, 1); + err = ubi_read(c->ubi, lnum, buf, offs, len); if (err) { ubifs_err("cannot read node type %d from LEB %d:%d, error %d", type, lnum, offs, err); @@ -1666,7 +1666,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, if (!overlap) { /* We may safely unlock the write-buffer and read the data */ spin_unlock(&wbuf->lock); - return ubifs_leb_read(c, lnum, buf, offs, len, 0); + return ubi_read(c->ubi, lnum, buf, offs, len); } /* Don't read under wbuf */ @@ -1680,7 +1680,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, if (rlen > 0) /* Read everything that goes before write-buffer */ - return ubifs_leb_read(c, lnum, buf, offs, rlen, 0); + return ubi_read(c->ubi, lnum, buf, offs, rlen); return 0; } @@ -1767,7 +1767,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) if (wbuf) err = read_wbuf(wbuf, bu->buf, len, lnum, offs); else - err = ubifs_leb_read(c, lnum, bu->buf, offs, len, 0); + err = ubi_read(c->ubi, lnum, bu->buf, offs, len); /* Check for a race with GC */ if (maybe_leb_gced(c, lnum, bu->gc_seq)) @@ -2423,7 +2423,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) */ do { - ubifs_assert(!ubifs_zn_obsolete(znode)); + ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); ubifs_assert(ubifs_zn_dirty(znode)); zp = znode->parent; @@ -2479,8 +2479,9 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) c->zroot.offs = zbr->offs; c->zroot.len = zbr->len; c->zroot.znode = znode; - ubifs_assert(!ubifs_zn_obsolete(zp)); - ubifs_assert(ubifs_zn_dirty(zp)); + ubifs_assert(!test_bit(OBSOLETE_ZNODE, + &zp->flags)); + ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags)); atomic_long_dec(&c->dirty_zn_cnt); if (zp->cnext) { @@ -2864,7 +2865,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c) struct ubifs_znode *znode = cnext; cnext = cnext->cnext; - if (ubifs_zn_obsolete(znode)) + if (test_bit(OBSOLETE_ZNODE, &znode->flags)) kfree(znode); } while (cnext && cnext != c->cnext); } @@ -3300,7 +3301,7 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, if (!S_ISREG(inode->i_mode)) return 0; - if (!dbg_is_chk_gen(c)) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; @@ -3336,10 +3337,9 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, ubifs_err("inode %lu has size %lld, but there are data at offset %lld " "(data key %s)", (unsigned long)inode->i_ino, size, ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key)); - mutex_unlock(&c->tnc_mutex); dbg_dump_inode(c, inode); dbg_dump_stack(); - return -EINVAL; + err = -EINVAL; out_unlock: mutex_unlock(&c->tnc_mutex); diff --git a/trunk/fs/ubifs/tnc_commit.c b/trunk/fs/ubifs/tnc_commit.c index 4c15f07a8bb2..41920f357bbf 100644 --- a/trunk/fs/ubifs/tnc_commit.c +++ b/trunk/fs/ubifs/tnc_commit.c @@ -22,7 +22,6 @@ /* This file implements TNC functions for committing */ -#include #include "ubifs.h" /** @@ -88,12 +87,8 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx, atomic_long_dec(&c->dirty_zn_cnt); ubifs_assert(ubifs_zn_dirty(znode)); - ubifs_assert(ubifs_zn_cow(znode)); + ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); - /* - * Note, unlike 'write_index()' we do not add memory barriers here - * because this function is called with @c->tnc_mutex locked. - */ __clear_bit(DIRTY_ZNODE, &znode->flags); __clear_bit(COW_ZNODE, &znode->flags); @@ -382,7 +377,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) c->gap_lebs = NULL; return err; } - if (!dbg_is_chk_index(c)) { + if (dbg_force_in_the_gaps_enabled()) { /* * Do not print scary warnings if the debugging * option which forces in-the-gaps is enabled. @@ -496,6 +491,25 @@ static int layout_in_empty_space(struct ubifs_info *c) else next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + if (c->min_io_size == 1) { + buf_offs += ALIGN(len, 8); + if (next_len) { + if (buf_offs + next_len <= c->leb_size) + continue; + err = ubifs_update_one_lp(c, lnum, 0, + c->leb_size - buf_offs, 0, 0); + if (err) + return err; + lnum = -1; + continue; + } + err = ubifs_update_one_lp(c, lnum, + c->leb_size - buf_offs, 0, 0, 0); + if (err) + return err; + break; + } + /* Update buffer positions */ wlen = used + len; used += ALIGN(len, 8); @@ -644,7 +658,7 @@ static int get_znodes_to_commit(struct ubifs_info *c) } cnt += 1; while (1) { - ubifs_assert(!ubifs_zn_cow(znode)); + ubifs_assert(!test_bit(COW_ZNODE, &znode->flags)); __set_bit(COW_ZNODE, &znode->flags); znode->alt = 0; cnext = find_next_dirty(znode); @@ -690,7 +704,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt) c->ilebs[c->ileb_cnt++] = lnum; dbg_cmt("LEB %d", lnum); } - if (dbg_is_chk_index(c) && !(random32() & 7)) + if (dbg_force_in_the_gaps()) return -ENOSPC; return 0; } @@ -816,7 +830,7 @@ static int write_index(struct ubifs_info *c) struct ubifs_idx_node *idx; struct ubifs_znode *znode, *cnext; int i, lnum, offs, len, next_len, buf_len, buf_offs, used; - int avail, wlen, err, lnum_pos = 0, blen, nxt_offs; + int avail, wlen, err, lnum_pos = 0; cnext = c->enext; if (!cnext) @@ -893,7 +907,7 @@ static int write_index(struct ubifs_info *c) cnext = znode->cnext; ubifs_assert(ubifs_zn_dirty(znode)); - ubifs_assert(ubifs_zn_cow(znode)); + ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); /* * It is important that other threads should see %DIRTY_ZNODE @@ -908,28 +922,6 @@ static int write_index(struct ubifs_info *c) clear_bit(COW_ZNODE, &znode->flags); smp_mb__after_clear_bit(); - /* - * We have marked the znode as clean but have not updated the - * @c->clean_zn_cnt counter. If this znode becomes dirty again - * before 'free_obsolete_znodes()' is called, then - * @c->clean_zn_cnt will be decremented before it gets - * incremented (resulting in 2 decrements for the same znode). - * This means that @c->clean_zn_cnt may become negative for a - * while. - * - * Q: why we cannot increment @c->clean_zn_cnt? - * A: because we do not have the @c->tnc_mutex locked, and the - * following code would be racy and buggy: - * - * if (!ubifs_zn_obsolete(znode)) { - * atomic_long_inc(&c->clean_zn_cnt); - * atomic_long_inc(&ubifs_clean_zn_cnt); - * } - * - * Thus, we just delay the @c->clean_zn_cnt update until we - * have the mutex locked. - */ - /* Do not access znode from this point on */ /* Update buffer positions */ @@ -946,38 +938,65 @@ static int write_index(struct ubifs_info *c) else next_len = ubifs_idx_node_sz(c, cnext->child_cnt); - nxt_offs = buf_offs + used + next_len; - if (next_len && nxt_offs <= c->leb_size) { - if (avail > 0) + if (c->min_io_size == 1) { + /* + * Write the prepared index node immediately if there is + * no minimum IO size + */ + err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, + wlen, UBI_SHORTTERM); + if (err) + return err; + buf_offs += ALIGN(wlen, 8); + if (next_len) { + used = 0; + avail = buf_len; + if (buf_offs + next_len > c->leb_size) { + err = ubifs_update_one_lp(c, lnum, + LPROPS_NC, 0, 0, LPROPS_TAKEN); + if (err) + return err; + lnum = -1; + } continue; - else - blen = buf_len; + } } else { - wlen = ALIGN(wlen, 8); - blen = ALIGN(wlen, c->min_io_size); - ubifs_pad(c, c->cbuf + wlen, blen - wlen); - } - - /* The buffer is full or there are no more znodes to do */ - err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, blen, - UBI_SHORTTERM); - if (err) - return err; - buf_offs += blen; - if (next_len) { - if (nxt_offs > c->leb_size) { - err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, - 0, LPROPS_TAKEN); - if (err) - return err; - lnum = -1; + int blen, nxt_offs = buf_offs + used + next_len; + + if (next_len && nxt_offs <= c->leb_size) { + if (avail > 0) + continue; + else + blen = buf_len; + } else { + wlen = ALIGN(wlen, 8); + blen = ALIGN(wlen, c->min_io_size); + ubifs_pad(c, c->cbuf + wlen, blen - wlen); + } + /* + * The buffer is full or there are no more znodes + * to do + */ + err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, + blen, UBI_SHORTTERM); + if (err) + return err; + buf_offs += blen; + if (next_len) { + if (nxt_offs > c->leb_size) { + err = ubifs_update_one_lp(c, lnum, + LPROPS_NC, 0, 0, LPROPS_TAKEN); + if (err) + return err; + lnum = -1; + } + used -= blen; + if (used < 0) + used = 0; + avail = buf_len - used; + memmove(c->cbuf, c->cbuf + blen, used); + continue; } - used -= blen; - if (used < 0) - used = 0; - avail = buf_len - used; - memmove(c->cbuf, c->cbuf + blen, used); - continue; } break; } @@ -1010,7 +1029,7 @@ static void free_obsolete_znodes(struct ubifs_info *c) do { znode = cnext; cnext = znode->cnext; - if (ubifs_zn_obsolete(znode)) + if (test_bit(OBSOLETE_ZNODE, &znode->flags)) kfree(znode); else { znode->cnext = NULL; diff --git a/trunk/fs/ubifs/ubifs.h b/trunk/fs/ubifs/ubifs.h index 702b79258e30..f79983d6f860 100644 --- a/trunk/fs/ubifs/ubifs.h +++ b/trunk/fs/ubifs/ubifs.h @@ -230,14 +230,14 @@ enum { * LPT cnode flag bits. * * DIRTY_CNODE: cnode is dirty - * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), - * so it can (and must) be freed when the commit is finished * COW_CNODE: cnode is being committed and must be copied before writing + * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), + * so it can (and must) be freed when the commit is finished */ enum { DIRTY_CNODE = 0, - OBSOLETE_CNODE = 1, - COW_CNODE = 2, + COW_CNODE = 1, + OBSOLETE_CNODE = 2, }; /* @@ -1468,15 +1468,6 @@ extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; /* io.c */ void ubifs_ro_mode(struct ubifs_info *c, int err); -int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, - int len, int even_ebadmsg); -int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, - int len, int dtype); -int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, - int dtype); -int ubifs_leb_unmap(struct ubifs_info *c, int lnum); -int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype); -int ubifs_is_mapped(const struct ubifs_info *c, int lnum); int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, int dtype); @@ -1756,8 +1747,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf, int jhead); struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf); -int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf); -int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf); +int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); +int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); int ubifs_rcvry_gc_commit(struct ubifs_info *c); int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, int deletion, loff_t new_size); diff --git a/trunk/fs/xfs/Makefile b/trunk/fs/xfs/Makefile index 75bb316529dd..284a7c89697e 100644 --- a/trunk/fs/xfs/Makefile +++ b/trunk/fs/xfs/Makefile @@ -88,6 +88,8 @@ xfs-y += xfs_alloc.o \ xfs_vnodeops.o \ xfs_rw.o +xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o + # Objects in linux/ xfs-y += $(addprefix $(XFS_LINUX)/, \ kmem.o \ diff --git a/trunk/fs/xfs/linux-2.6/xfs_acl.c b/trunk/fs/xfs/linux-2.6/xfs_acl.c index 115ac6919533..39f4f809bb68 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_acl.c +++ b/trunk/fs/xfs/linux-2.6/xfs_acl.c @@ -264,7 +264,7 @@ xfs_set_mode(struct inode *inode, mode_t mode) iattr.ia_mode = mode; iattr.ia_ctime = current_fs_time(inode->i_sb); - error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); + error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); } return error; diff --git a/trunk/fs/xfs/linux-2.6/xfs_aops.c b/trunk/fs/xfs/linux-2.6/xfs_aops.c index 26384fe3f26d..79ce38be15a1 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_aops.c +++ b/trunk/fs/xfs/linux-2.6/xfs_aops.c @@ -181,7 +181,6 @@ xfs_setfilesize( isize = xfs_ioend_new_eof(ioend); if (isize) { - trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); ip->i_d.di_size = isize; xfs_mark_inode_dirty(ip); } @@ -895,6 +894,11 @@ xfs_aops_discard_page( * For unwritten space on the page we need to start the conversion to * regular allocated space. * For any other dirty buffer heads on the page we should flush them. + * + * If we detect that a transaction would be required to flush the page, we + * have to check the process flags first, if we are already in a transaction + * or disk I/O during allocations is off, we need to fail the writepage and + * redirty the page. */ STATIC int xfs_vm_writepage( @@ -902,6 +906,7 @@ xfs_vm_writepage( struct writeback_control *wbc) { struct inode *inode = page->mapping->host; + int delalloc, unwritten; struct buffer_head *bh, *head; struct xfs_bmbt_irec imap; xfs_ioend_t *ioend = NULL, *iohead = NULL; @@ -933,10 +938,15 @@ xfs_vm_writepage( goto redirty; /* - * Given that we do not allow direct reclaim to call us, we should - * never be called while in a filesystem transaction. + * We need a transaction if there are delalloc or unwritten buffers + * on the page. + * + * If we need a transaction and the process flags say we are already + * in a transaction, or no IO is allowed then mark the page dirty + * again and leave the page as is. */ - if (WARN_ON(current->flags & PF_FSTRANS)) + xfs_count_page_state(page, &delalloc, &unwritten); + if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) goto redirty; /* Is this page beyond the end of the file? */ @@ -960,7 +970,7 @@ xfs_vm_writepage( offset = page_offset(page); type = IO_OVERWRITE; - if (wbc->sync_mode == WB_SYNC_NONE) + if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) nonblocking = 1; do { diff --git a/trunk/fs/xfs/linux-2.6/xfs_buf.c b/trunk/fs/xfs/linux-2.6/xfs_buf.c index b2b411985591..5e68099db2a5 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_buf.c +++ b/trunk/fs/xfs/linux-2.6/xfs_buf.c @@ -499,14 +499,16 @@ _xfs_buf_find( spin_unlock(&pag->pag_buf_lock); xfs_perag_put(pag); - if (!xfs_buf_trylock(bp)) { - if (flags & XBF_TRYLOCK) { + if (xfs_buf_cond_lock(bp)) { + /* failed, so wait for the lock if requested. */ + if (!(flags & XBF_TRYLOCK)) { + xfs_buf_lock(bp); + XFS_STATS_INC(xb_get_locked_waited); + } else { xfs_buf_rele(bp); XFS_STATS_INC(xb_busy_locked); return NULL; } - xfs_buf_lock(bp); - XFS_STATS_INC(xb_get_locked_waited); } /* @@ -592,8 +594,10 @@ _xfs_buf_read( ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); - bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD); - bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); + bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ + XBF_READ_AHEAD | _XBF_RUN_QUEUES); + bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \ + XBF_READ_AHEAD | _XBF_RUN_QUEUES); status = xfs_buf_iorequest(bp); if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC)) @@ -677,6 +681,7 @@ xfs_buf_read_uncached( return NULL; /* set up the buffer for a read IO */ + xfs_buf_lock(bp); XFS_BUF_SET_ADDR(bp, daddr); XFS_BUF_READ(bp); XFS_BUF_BUSY(bp); @@ -811,6 +816,8 @@ xfs_buf_get_uncached( goto fail_free_mem; } + xfs_buf_unlock(bp); + trace_xfs_buf_get_uncached(bp, _RET_IP_); return bp; @@ -889,8 +896,8 @@ xfs_buf_rele( * to push on stale inode buffers. */ int -xfs_buf_trylock( - struct xfs_buf *bp) +xfs_buf_cond_lock( + xfs_buf_t *bp) { int locked; @@ -900,8 +907,15 @@ xfs_buf_trylock( else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) xfs_log_force(bp->b_target->bt_mount, 0); - trace_xfs_buf_trylock(bp, _RET_IP_); - return locked; + trace_xfs_buf_cond_lock(bp, _RET_IP_); + return locked ? 0 : -EBUSY; +} + +int +xfs_buf_lock_value( + xfs_buf_t *bp) +{ + return bp->b_sema.count; } /* @@ -915,7 +929,7 @@ xfs_buf_trylock( */ void xfs_buf_lock( - struct xfs_buf *bp) + xfs_buf_t *bp) { trace_xfs_buf_lock(bp, _RET_IP_); @@ -936,7 +950,7 @@ xfs_buf_lock( */ void xfs_buf_unlock( - struct xfs_buf *bp) + xfs_buf_t *bp) { if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) { atomic_inc(&bp->b_hold); @@ -1107,7 +1121,7 @@ xfs_bioerror_relse( XFS_BUF_UNDELAYWRITE(bp); XFS_BUF_DONE(bp); XFS_BUF_STALE(bp); - bp->b_iodone = NULL; + XFS_BUF_CLR_IODONE_FUNC(bp); if (!(fl & XBF_ASYNC)) { /* * Mark b_error and B_ERROR _both_. @@ -1209,21 +1223,23 @@ _xfs_buf_ioapply( total_nr_pages = bp->b_page_count; map_i = 0; - if (bp->b_flags & XBF_WRITE) { - if (bp->b_flags & XBF_SYNCIO) - rw = WRITE_SYNC; - else - rw = WRITE; - if (bp->b_flags & XBF_FUA) - rw |= REQ_FUA; - if (bp->b_flags & XBF_FLUSH) - rw |= REQ_FLUSH; - } else if (bp->b_flags & XBF_READ_AHEAD) { - rw = READA; + if (bp->b_flags & XBF_ORDERED) { + ASSERT(!(bp->b_flags & XBF_READ)); + rw = WRITE_FLUSH_FUA; + } else if (bp->b_flags & XBF_LOG_BUFFER) { + ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); + bp->b_flags &= ~_XBF_RUN_QUEUES; + rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; + } else if (bp->b_flags & _XBF_RUN_QUEUES) { + ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); + bp->b_flags &= ~_XBF_RUN_QUEUES; + rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META; } else { - rw = READ; + rw = (bp->b_flags & XBF_WRITE) ? WRITE : + (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; } + next_chunk: atomic_inc(&bp->b_io_remaining); nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); @@ -1678,14 +1694,15 @@ xfs_buf_delwri_split( list_for_each_entry_safe(bp, n, dwq, b_list) { ASSERT(bp->b_flags & XBF_DELWRI); - if (!XFS_BUF_ISPINNED(bp) && xfs_buf_trylock(bp)) { + if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) { if (!force && time_before(jiffies, bp->b_queuetime + age)) { xfs_buf_unlock(bp); break; } - bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q); + bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q| + _XBF_RUN_QUEUES); bp->b_flags |= XBF_WRITE; list_move_tail(&bp->b_list, list); trace_xfs_buf_delwri_split(bp, _RET_IP_); @@ -1721,6 +1738,14 @@ xfs_buf_cmp( return 0; } +void +xfs_buf_delwri_sort( + xfs_buftarg_t *target, + struct list_head *list) +{ + list_sort(NULL, list, xfs_buf_cmp); +} + STATIC int xfsbufd( void *data) diff --git a/trunk/fs/xfs/linux-2.6/xfs_buf.h b/trunk/fs/xfs/linux-2.6/xfs_buf.h index 6a83b46b4bcf..50a7d5fb3b73 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_buf.h +++ b/trunk/fs/xfs/linux-2.6/xfs_buf.h @@ -46,46 +46,43 @@ typedef enum { #define XBF_READ (1 << 0) /* buffer intended for reading from device */ #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ -#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ -#define XBF_MAPPED (1 << 3) /* buffer mapped (b_addr valid) */ +#define XBF_MAPPED (1 << 2) /* buffer mapped (b_addr valid) */ #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ #define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ #define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ - -/* I/O hints for the BIO layer */ -#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ -#define XBF_FUA (1 << 11)/* force cache write through mode */ -#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ +#define XBF_ORDERED (1 << 11)/* use ordered writes */ +#define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */ +#define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */ /* flags used only as arguments to access routines */ -#define XBF_LOCK (1 << 15)/* lock requested */ -#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ -#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */ +#define XBF_LOCK (1 << 14)/* lock requested */ +#define XBF_TRYLOCK (1 << 15)/* lock requested, but do not wait */ +#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ /* flags used only internally */ -#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ -#define _XBF_KMEM (1 << 21)/* backed by heap memory */ -#define _XBF_DELWRI_Q (1 << 22)/* buffer on delwri queue */ +#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ +#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ +#define _XBF_KMEM (1 << 20)/* backed by heap memory */ +#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ typedef unsigned int xfs_buf_flags_t; #define XFS_BUF_FLAGS \ { XBF_READ, "READ" }, \ { XBF_WRITE, "WRITE" }, \ - { XBF_READ_AHEAD, "READ_AHEAD" }, \ { XBF_MAPPED, "MAPPED" }, \ { XBF_ASYNC, "ASYNC" }, \ { XBF_DONE, "DONE" }, \ { XBF_DELWRI, "DELWRI" }, \ { XBF_STALE, "STALE" }, \ - { XBF_SYNCIO, "SYNCIO" }, \ - { XBF_FUA, "FUA" }, \ - { XBF_FLUSH, "FLUSH" }, \ + { XBF_ORDERED, "ORDERED" }, \ + { XBF_READ_AHEAD, "READ_AHEAD" }, \ { XBF_LOCK, "LOCK" }, /* should never be set */\ { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ { _XBF_PAGES, "PAGES" }, \ + { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" } @@ -94,6 +91,11 @@ typedef enum { XBT_FORCE_FLUSH = 1, } xfs_buftarg_flags_t; +typedef struct xfs_bufhash { + struct list_head bh_list; + spinlock_t bh_lock; +} xfs_bufhash_t; + typedef struct xfs_buftarg { dev_t bt_dev; struct block_device *bt_bdev; @@ -149,7 +151,7 @@ typedef struct xfs_buf { xfs_buf_iodone_t b_iodone; /* I/O completion function */ struct completion b_iowait; /* queue for I/O waiters */ void *b_fspriv; - struct xfs_trans *b_transp; + void *b_fspriv2; struct page **b_pages; /* array of page pointers */ struct page *b_page_array[XB_PAGES]; /* inline pages */ unsigned long b_queuetime; /* time buffer was queued */ @@ -190,11 +192,10 @@ extern void xfs_buf_free(xfs_buf_t *); extern void xfs_buf_rele(xfs_buf_t *); /* Locking and Unlocking Buffers */ -extern int xfs_buf_trylock(xfs_buf_t *); +extern int xfs_buf_cond_lock(xfs_buf_t *); +extern int xfs_buf_lock_value(xfs_buf_t *); extern void xfs_buf_lock(xfs_buf_t *); extern void xfs_buf_unlock(xfs_buf_t *); -#define xfs_buf_islocked(bp) \ - ((bp)->b_sema.count <= 0) /* Buffer Read and Write Routines */ extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); @@ -233,9 +234,8 @@ extern void xfs_buf_terminate(void); #define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) -#define XFS_BUF_ZEROFLAGS(bp) \ - ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \ - XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) +#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ + ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_STALE(bp) xfs_buf_stale(bp); @@ -267,6 +267,10 @@ void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) #define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) +#define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED) +#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED) +#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED) + #define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) #define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) #define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) @@ -276,6 +280,14 @@ void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) #define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) +#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone) +#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func)) +#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL) + +#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv) +#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) +#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) +#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) #define XFS_BUF_SET_START(bp) do { } while (0) #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) @@ -301,6 +313,10 @@ xfs_buf_set_ref( #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) +#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp) +#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) +#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp) +#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp) #define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); #define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) diff --git a/trunk/fs/xfs/linux-2.6/xfs_export.c b/trunk/fs/xfs/linux-2.6/xfs_export.c index 75e5d322e48f..f4f878fc0083 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_export.c +++ b/trunk/fs/xfs/linux-2.6/xfs_export.c @@ -151,14 +151,14 @@ xfs_nfs_get_inode( * We don't use ESTALE directly down the chain to not * confuse applications using bulkstat that expect EINVAL. */ - if (error == EINVAL || error == ENOENT) + if (error == EINVAL) error = ESTALE; return ERR_PTR(-error); } if (ip->i_d.di_gen != generation) { IRELE(ip); - return ERR_PTR(-ESTALE); + return ERR_PTR(-ENOENT); } return VFS_I(ip); diff --git a/trunk/fs/xfs/linux-2.6/xfs_file.c b/trunk/fs/xfs/linux-2.6/xfs_file.c index 8073f61efb8e..7f782af286bf 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_file.c +++ b/trunk/fs/xfs/linux-2.6/xfs_file.c @@ -944,7 +944,7 @@ xfs_file_fallocate( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; - error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK); + error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); } out_unlock: diff --git a/trunk/fs/xfs/linux-2.6/xfs_iops.c b/trunk/fs/xfs/linux-2.6/xfs_iops.c index 501e4f630548..d44d92cd12b1 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_iops.c +++ b/trunk/fs/xfs/linux-2.6/xfs_iops.c @@ -39,7 +39,6 @@ #include "xfs_buf_item.h" #include "xfs_utils.h" #include "xfs_vnodeops.h" -#include "xfs_inode_item.h" #include "xfs_trace.h" #include @@ -498,442 +497,12 @@ xfs_vn_getattr( return 0; } -int -xfs_setattr_nonsize( - struct xfs_inode *ip, - struct iattr *iattr, - int flags) -{ - xfs_mount_t *mp = ip->i_mount; - struct inode *inode = VFS_I(ip); - int mask = iattr->ia_valid; - xfs_trans_t *tp; - int error; - uid_t uid = 0, iuid = 0; - gid_t gid = 0, igid = 0; - struct xfs_dquot *udqp = NULL, *gdqp = NULL; - struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL; - - trace_xfs_setattr(ip); - - if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); - - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - error = -inode_change_ok(inode, iattr); - if (error) - return XFS_ERROR(error); - - ASSERT((mask & ATTR_SIZE) == 0); - - /* - * If disk quotas is on, we make sure that the dquots do exist on disk, - * before we start any other transactions. Trying to do this later - * is messy. We don't care to take a readlock to look at the ids - * in inode here, because we can't hold it across the trans_reserve. - * If the IDs do change before we take the ilock, we're covered - * because the i_*dquot fields will get updated anyway. - */ - if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { - uint qflags = 0; - - if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { - uid = iattr->ia_uid; - qflags |= XFS_QMOPT_UQUOTA; - } else { - uid = ip->i_d.di_uid; - } - if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { - gid = iattr->ia_gid; - qflags |= XFS_QMOPT_GQUOTA; - } else { - gid = ip->i_d.di_gid; - } - - /* - * We take a reference when we initialize udqp and gdqp, - * so it is important that we never blindly double trip on - * the same variable. See xfs_create() for an example. - */ - ASSERT(udqp == NULL); - ASSERT(gdqp == NULL); - error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), - qflags, &udqp, &gdqp); - if (error) - return error; - } - - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); - error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); - if (error) - goto out_dqrele; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - /* - * Change file ownership. Must be the owner or privileged. - */ - if (mask & (ATTR_UID|ATTR_GID)) { - /* - * These IDs could have changed since we last looked at them. - * But, we're assured that if the ownership did change - * while we didn't have the inode locked, inode's dquot(s) - * would have changed also. - */ - iuid = ip->i_d.di_uid; - igid = ip->i_d.di_gid; - gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; - uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; - - /* - * Do a quota reservation only if uid/gid is actually - * going to change. - */ - if (XFS_IS_QUOTA_RUNNING(mp) && - ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || - (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { - ASSERT(tp); - error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, - capable(CAP_FOWNER) ? - XFS_QMOPT_FORCE_RES : 0); - if (error) /* out of quota */ - goto out_trans_cancel; - } - } - - xfs_trans_ijoin(tp, ip); - - /* - * Change file ownership. Must be the owner or privileged. - */ - if (mask & (ATTR_UID|ATTR_GID)) { - /* - * CAP_FSETID overrides the following restrictions: - * - * The set-user-ID and set-group-ID bits of a file will be - * cleared upon successful return from chown() - */ - if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && - !capable(CAP_FSETID)) - ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); - - /* - * Change the ownerships and register quota modifications - * in the transaction. - */ - if (iuid != uid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { - ASSERT(mask & ATTR_UID); - ASSERT(udqp); - olddquot1 = xfs_qm_vop_chown(tp, ip, - &ip->i_udquot, udqp); - } - ip->i_d.di_uid = uid; - inode->i_uid = uid; - } - if (igid != gid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { - ASSERT(!XFS_IS_PQUOTA_ON(mp)); - ASSERT(mask & ATTR_GID); - ASSERT(gdqp); - olddquot2 = xfs_qm_vop_chown(tp, ip, - &ip->i_gdquot, gdqp); - } - ip->i_d.di_gid = gid; - inode->i_gid = gid; - } - } - - /* - * Change file access modes. - */ - if (mask & ATTR_MODE) { - umode_t mode = iattr->ia_mode; - - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - mode &= ~S_ISGID; - - ip->i_d.di_mode &= S_IFMT; - ip->i_d.di_mode |= mode & ~S_IFMT; - - inode->i_mode &= S_IFMT; - inode->i_mode |= mode & ~S_IFMT; - } - - /* - * Change file access or modified times. - */ - if (mask & ATTR_ATIME) { - inode->i_atime = iattr->ia_atime; - ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; - ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; - ip->i_update_core = 1; - } - if (mask & ATTR_CTIME) { - inode->i_ctime = iattr->ia_ctime; - ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; - ip->i_update_core = 1; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - ip->i_update_core = 1; - } - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - XFS_STATS_INC(xs_ig_attrchg); - - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - /* - * Release any dquot(s) the inode had kept before chown. - */ - xfs_qm_dqrele(olddquot1); - xfs_qm_dqrele(olddquot2); - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); - - if (error) - return XFS_ERROR(error); - - /* - * XXX(hch): Updating the ACL entries is not atomic vs the i_mode - * update. We could avoid this with linked transactions - * and passing down the transaction pointer all the way - * to attr_set. No previous user of the generic - * Posix ACL code seems to care about this issue either. - */ - if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { - error = -xfs_acl_chmod(inode); - if (error) - return XFS_ERROR(error); - } - - return 0; - -out_trans_cancel: - xfs_trans_cancel(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); -out_dqrele: - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); - return error; -} - -/* - * Truncate file. Must have write permission and not be a directory. - */ -int -xfs_setattr_size( - struct xfs_inode *ip, - struct iattr *iattr, - int flags) -{ - struct xfs_mount *mp = ip->i_mount; - struct inode *inode = VFS_I(ip); - int mask = iattr->ia_valid; - struct xfs_trans *tp; - int error; - uint lock_flags; - uint commit_flags = 0; - - trace_xfs_setattr(ip); - - if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); - - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - error = -inode_change_ok(inode, iattr); - if (error) - return XFS_ERROR(error); - - ASSERT(S_ISREG(ip->i_d.di_mode)); - ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| - ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| - ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); - - lock_flags = XFS_ILOCK_EXCL; - if (!(flags & XFS_ATTR_NOLOCK)) - lock_flags |= XFS_IOLOCK_EXCL; - xfs_ilock(ip, lock_flags); - - /* - * Short circuit the truncate case for zero length files. - */ - if (iattr->ia_size == 0 && - ip->i_size == 0 && ip->i_d.di_nextents == 0) { - if (!(mask & (ATTR_CTIME|ATTR_MTIME))) - goto out_unlock; - - /* - * Use the regular setattr path to update the timestamps. - */ - xfs_iunlock(ip, lock_flags); - iattr->ia_valid &= ~ATTR_SIZE; - return xfs_setattr_nonsize(ip, iattr, 0); - } - - /* - * Make sure that the dquots are attached to the inode. - */ - error = xfs_qm_dqattach_locked(ip, 0); - if (error) - goto out_unlock; - - /* - * Now we can make the changes. Before we join the inode to the - * transaction, take care of the part of the truncation that must be - * done without the inode lock. This needs to be done before joining - * the inode to the transaction, because the inode cannot be unlocked - * once it is a part of the transaction. - */ - if (iattr->ia_size > ip->i_size) { - /* - * Do the first part of growing a file: zero any data in the - * last block that is beyond the old EOF. We need to do this - * before the inode is joined to the transaction to modify - * i_size. - */ - error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); - if (error) - goto out_unlock; - } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - lock_flags &= ~XFS_ILOCK_EXCL; - - /* - * We are going to log the inode size change in this transaction so - * any previous writes that are beyond the on disk EOF and the new - * EOF that have not been written out need to be written here. If we - * do not write the data out, we expose ourselves to the null files - * problem. - * - * Only flush from the on disk size to the smaller of the in memory - * file size or the new size as that's the range we really care about - * here and prevents waiting for other data not within the range we - * care about here. - */ - if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { - error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, - XBF_ASYNC, FI_NONE); - if (error) - goto out_unlock; - } - - /* - * Wait for all I/O to complete. - */ - xfs_ioend_wait(ip); - - error = -block_truncate_page(inode->i_mapping, iattr->ia_size, - xfs_get_blocks); - if (error) - goto out_unlock; - - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); - error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_ITRUNCATE_LOG_COUNT); - if (error) - goto out_trans_cancel; - - truncate_setsize(inode, iattr->ia_size); - - commit_flags = XFS_TRANS_RELEASE_LOG_RES; - lock_flags |= XFS_ILOCK_EXCL; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip); - - /* - * Only change the c/mtime if we are changing the size or we are - * explicitly asked to change it. This handles the semantic difference - * between truncate() and ftruncate() as implemented in the VFS. - * - * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a - * special case where we need to update the times despite not having - * these flags set. For all other operations the VFS set these flags - * explicitly if it wants a timestamp update. - */ - if (iattr->ia_size != ip->i_size && - (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { - iattr->ia_ctime = iattr->ia_mtime = - current_fs_time(inode->i_sb); - mask |= ATTR_CTIME | ATTR_MTIME; - } - - if (iattr->ia_size > ip->i_size) { - ip->i_d.di_size = iattr->ia_size; - ip->i_size = iattr->ia_size; - } else if (iattr->ia_size <= ip->i_size || - (iattr->ia_size == 0 && ip->i_d.di_nextents)) { - error = xfs_itruncate_data(&tp, ip, iattr->ia_size); - if (error) - goto out_trans_abort; - - /* - * Truncated "down", so we're removing references to old data - * here - if we delay flushing for a long time, we expose - * ourselves unduly to the notorious NULL files problem. So, - * we mark this inode and flush it when the file is closed, - * and do not wait the usual (long) time for writeout. - */ - xfs_iflags_set(ip, XFS_ITRUNCATED); - } - - if (mask & ATTR_CTIME) { - inode->i_ctime = iattr->ia_ctime; - ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; - ip->i_update_core = 1; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - ip->i_update_core = 1; - } - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - XFS_STATS_INC(xs_ig_attrchg); - - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(tp); - - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); -out_unlock: - if (lock_flags) - xfs_iunlock(ip, lock_flags); - return error; - -out_trans_abort: - commit_flags |= XFS_TRANS_ABORT; -out_trans_cancel: - xfs_trans_cancel(tp, commit_flags); - goto out_unlock; -} - STATIC int xfs_vn_setattr( struct dentry *dentry, struct iattr *iattr) { - if (iattr->ia_valid & ATTR_SIZE) - return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0); - return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0); + return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); } #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) diff --git a/trunk/fs/xfs/linux-2.6/xfs_linux.h b/trunk/fs/xfs/linux-2.6/xfs_linux.h index d42f814e4d35..8633521b3b2e 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_linux.h +++ b/trunk/fs/xfs/linux-2.6/xfs_linux.h @@ -33,6 +33,7 @@ #endif #include +#include #include #include @@ -87,12 +88,6 @@ #include #include -#ifdef __BIG_ENDIAN -#define XFS_NATIVE_HOST 1 -#else -#undef XFS_NATIVE_HOST -#endif - /* * Feature macros (disable/enable) */ diff --git a/trunk/fs/xfs/linux-2.6/xfs_super.c b/trunk/fs/xfs/linux-2.6/xfs_super.c index 25fd2cd6c8b0..a1a881e68a9a 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_super.c +++ b/trunk/fs/xfs/linux-2.6/xfs_super.c @@ -33,6 +33,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" @@ -1411,35 +1412,37 @@ xfs_fs_fill_super( sb->s_time_gran = 1; set_posix_acl_flag(sb); - xfs_inode_shrinker_register(mp); - - error = xfs_mountfs(mp); + error = xfs_syncd_init(mp); if (error) goto out_filestream_unmount; - error = xfs_syncd_init(mp); + xfs_inode_shrinker_register(mp); + + error = xfs_mountfs(mp); if (error) - goto out_unmount; + goto out_syncd_stop; root = igrab(VFS_I(mp->m_rootip)); if (!root) { error = ENOENT; - goto out_syncd_stop; + goto fail_unmount; } if (is_bad_inode(root)) { error = EINVAL; - goto out_syncd_stop; + goto fail_vnrele; } sb->s_root = d_alloc_root(root); if (!sb->s_root) { error = ENOMEM; - goto out_iput; + goto fail_vnrele; } return 0; - out_filestream_unmount: + out_syncd_stop: xfs_inode_shrinker_unregister(mp); + xfs_syncd_stop(mp); + out_filestream_unmount: xfs_filestream_unmount(mp); out_free_sb: xfs_freesb(mp); @@ -1453,12 +1456,17 @@ xfs_fs_fill_super( out: return -error; - out_iput: - iput(root); - out_syncd_stop: - xfs_syncd_stop(mp); - out_unmount: + fail_vnrele: + if (sb->s_root) { + dput(sb->s_root); + sb->s_root = NULL; + } else { + iput(root); + } + + fail_unmount: xfs_inode_shrinker_unregister(mp); + xfs_syncd_stop(mp); /* * Blow away any referenced inode in the filestreams cache. diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.c b/trunk/fs/xfs/linux-2.6/xfs_sync.c index 5cc158e52d4c..8ecad5ff9f9b 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_sync.c +++ b/trunk/fs/xfs/linux-2.6/xfs_sync.c @@ -359,11 +359,13 @@ xfs_quiesce_data( { int error, error2 = 0; + /* push non-blocking */ + xfs_sync_data(mp, 0); xfs_qm_sync(mp, SYNC_TRYLOCK); - xfs_qm_sync(mp, SYNC_WAIT); - /* force out the newly dirtied log buffers */ - xfs_log_force(mp, XFS_LOG_SYNC); + /* push and block till complete */ + xfs_sync_data(mp, SYNC_WAIT); + xfs_qm_sync(mp, SYNC_WAIT); /* write superblock and hoover up shutdown errors */ error = xfs_sync_fsdata(mp); @@ -434,7 +436,7 @@ xfs_quiesce_attr( WARN_ON(atomic_read(&mp->m_active_trans) != 0); /* Push the superblock and write an unmount record */ - error = xfs_log_sbcount(mp); + error = xfs_log_sbcount(mp, 1); if (error) xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " "Frozen image may not be consistent."); diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.h b/trunk/fs/xfs/linux-2.6/xfs_sync.h index e914fd621746..e3a6ad27415f 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_sync.h +++ b/trunk/fs/xfs/linux-2.6/xfs_sync.h @@ -21,6 +21,14 @@ struct xfs_mount; struct xfs_perag; +typedef struct xfs_sync_work { + struct list_head w_list; + struct xfs_mount *w_mount; + void *w_data; /* syncer routine argument */ + void (*w_syncer)(struct xfs_mount *, void *); + struct completion *w_completion; +} xfs_sync_work_t; + #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ diff --git a/trunk/fs/xfs/linux-2.6/xfs_trace.h b/trunk/fs/xfs/linux-2.6/xfs_trace.h index fda0708ef2ea..d48b7a579ae1 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_trace.h +++ b/trunk/fs/xfs/linux-2.6/xfs_trace.h @@ -293,7 +293,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class, __entry->buffer_length = bp->b_buffer_length; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); - __entry->lockval = bp->b_sema.count; + __entry->lockval = xfs_buf_lock_value(bp); __entry->flags = bp->b_flags; __entry->caller_ip = caller_ip; ), @@ -323,7 +323,7 @@ DEFINE_BUF_EVENT(xfs_buf_bawrite); DEFINE_BUF_EVENT(xfs_buf_bdwrite); DEFINE_BUF_EVENT(xfs_buf_lock); DEFINE_BUF_EVENT(xfs_buf_lock_done); -DEFINE_BUF_EVENT(xfs_buf_trylock); +DEFINE_BUF_EVENT(xfs_buf_cond_lock); DEFINE_BUF_EVENT(xfs_buf_unlock); DEFINE_BUF_EVENT(xfs_buf_iowait); DEFINE_BUF_EVENT(xfs_buf_iowait_done); @@ -366,7 +366,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class, __entry->flags = flags; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); - __entry->lockval = bp->b_sema.count; + __entry->lockval = xfs_buf_lock_value(bp); __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " @@ -409,7 +409,7 @@ TRACE_EVENT(xfs_buf_ioerror, __entry->buffer_length = bp->b_buffer_length; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); - __entry->lockval = bp->b_sema.count; + __entry->lockval = xfs_buf_lock_value(bp); __entry->error = error; __entry->flags = bp->b_flags; __entry->caller_ip = caller_ip; @@ -454,7 +454,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class, __entry->buf_flags = bip->bli_buf->b_flags; __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); - __entry->buf_lockval = bip->bli_buf->b_sema.count; + __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf); __entry->li_desc = bip->bli_item.li_desc; __entry->li_flags = bip->bli_item.li_flags; ), @@ -998,8 +998,7 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) - __field(loff_t, isize) - __field(loff_t, disize) + __field(loff_t, size) __field(loff_t, new_size) __field(loff_t, offset) __field(size_t, count) @@ -1007,18 +1006,16 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class, TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; - __entry->isize = ip->i_size; - __entry->disize = ip->i_d.di_size; + __entry->size = ip->i_d.di_size; __entry->new_size = ip->i_new_size; __entry->offset = offset; __entry->count = count; ), - TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx " + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " "offset 0x%llx count %zd", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, - __entry->isize, - __entry->disize, + __entry->size, __entry->new_size, __entry->offset, __entry->count) @@ -1031,7 +1028,40 @@ DEFINE_EVENT(xfs_simple_io_class, name, \ DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); -DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); + + +TRACE_EVENT(xfs_itruncate_start, + TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size, int flag, + xfs_off_t toss_start, xfs_off_t toss_finish), + TP_ARGS(ip, new_size, flag, toss_start, toss_finish), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_fsize_t, new_size) + __field(xfs_off_t, toss_start) + __field(xfs_off_t, toss_finish) + __field(int, flag) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = new_size; + __entry->toss_start = toss_start; + __entry->toss_finish = toss_finish; + __entry->flag = flag; + ), + TP_printk("dev %d:%d ino 0x%llx %s size 0x%llx new_size 0x%llx " + "toss start 0x%llx toss finish 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->flag, "|", XFS_ITRUNC_FLAGS), + __entry->size, + __entry->new_size, + __entry->toss_start, + __entry->toss_finish) +); DECLARE_EVENT_CLASS(xfs_itrunc_class, TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), @@ -1059,8 +1089,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class, DEFINE_EVENT(xfs_itrunc_class, name, \ TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ TP_ARGS(ip, new_size)) -DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start); -DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end); +DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start); +DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end); TRACE_EVENT(xfs_pagecache_inval, TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), diff --git a/trunk/fs/xfs/quota/xfs_dquot.c b/trunk/fs/xfs/quota/xfs_dquot.c index 837f31158d43..6fa214603819 100644 --- a/trunk/fs/xfs/quota/xfs_dquot.c +++ b/trunk/fs/xfs/quota/xfs_dquot.c @@ -220,7 +220,7 @@ xfs_qm_adjust_dqtimers( { ASSERT(d->d_id); -#ifdef DEBUG +#ifdef QUOTADEBUG if (d->d_blk_hardlimit) ASSERT(be64_to_cpu(d->d_blk_softlimit) <= be64_to_cpu(d->d_blk_hardlimit)); @@ -231,7 +231,6 @@ xfs_qm_adjust_dqtimers( ASSERT(be64_to_cpu(d->d_rtb_softlimit) <= be64_to_cpu(d->d_rtb_hardlimit)); #endif - if (!d->d_btimer) { if ((d->d_blk_softlimit && (be64_to_cpu(d->d_bcount) >= @@ -319,7 +318,7 @@ xfs_qm_init_dquot_blk( ASSERT(tp); ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(xfs_buf_islocked(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); d = (xfs_dqblk_t *)XFS_BUF_PTR(bp); @@ -535,7 +534,7 @@ xfs_qm_dqtobp( } ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(xfs_buf_islocked(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); /* * calculate the location of the dquot inside the buffer. @@ -623,7 +622,7 @@ xfs_qm_dqread( * brelse it because we have the changes incore. */ ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(xfs_buf_islocked(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); xfs_trans_brelse(tp, bp); return (error); @@ -1424,6 +1423,45 @@ xfs_qm_dqpurge( } +#ifdef QUOTADEBUG +void +xfs_qm_dqprint(xfs_dquot_t *dqp) +{ + struct xfs_mount *mp = dqp->q_mount; + + xfs_debug(mp, "-----------KERNEL DQUOT----------------"); + xfs_debug(mp, "---- dquotID = %d", + (int)be32_to_cpu(dqp->q_core.d_id)); + xfs_debug(mp, "---- type = %s", DQFLAGTO_TYPESTR(dqp)); + xfs_debug(mp, "---- fs = 0x%p", dqp->q_mount); + xfs_debug(mp, "---- blkno = 0x%x", (int) dqp->q_blkno); + xfs_debug(mp, "---- boffset = 0x%x", (int) dqp->q_bufoffset); + xfs_debug(mp, "---- blkhlimit = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_blk_hardlimit), + (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit)); + xfs_debug(mp, "---- blkslimit = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_blk_softlimit), + (int)be64_to_cpu(dqp->q_core.d_blk_softlimit)); + xfs_debug(mp, "---- inohlimit = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_ino_hardlimit), + (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit)); + xfs_debug(mp, "---- inoslimit = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_ino_softlimit), + (int)be64_to_cpu(dqp->q_core.d_ino_softlimit)); + xfs_debug(mp, "---- bcount = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_bcount), + (int)be64_to_cpu(dqp->q_core.d_bcount)); + xfs_debug(mp, "---- icount = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_icount), + (int)be64_to_cpu(dqp->q_core.d_icount)); + xfs_debug(mp, "---- btimer = %d", + (int)be32_to_cpu(dqp->q_core.d_btimer)); + xfs_debug(mp, "---- itimer = %d", + (int)be32_to_cpu(dqp->q_core.d_itimer)); + xfs_debug(mp, "---------------------------"); +} +#endif + /* * Give the buffer a little push if it is incore and * wait on the flush lock. diff --git a/trunk/fs/xfs/quota/xfs_dquot.h b/trunk/fs/xfs/quota/xfs_dquot.h index 34b7e945dbfa..5da3a23b820d 100644 --- a/trunk/fs/xfs/quota/xfs_dquot.h +++ b/trunk/fs/xfs/quota/xfs_dquot.h @@ -116,6 +116,12 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp) (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ (XFS_IS_OQUOTA_ON((d)->q_mount)))) +#ifdef QUOTADEBUG +extern void xfs_qm_dqprint(xfs_dquot_t *); +#else +#define xfs_qm_dqprint(a) +#endif + extern void xfs_qm_dqdestroy(xfs_dquot_t *); extern int xfs_qm_dqflush(xfs_dquot_t *, uint); extern int xfs_qm_dqpurge(xfs_dquot_t *); diff --git a/trunk/fs/xfs/quota/xfs_qm.c b/trunk/fs/xfs/quota/xfs_qm.c index 46e54ad9a2dc..b94dace4e785 100644 --- a/trunk/fs/xfs/quota/xfs_qm.c +++ b/trunk/fs/xfs/quota/xfs_qm.c @@ -67,6 +67,32 @@ static struct shrinker xfs_qm_shaker = { .seeks = DEFAULT_SEEKS, }; +#ifdef DEBUG +extern struct mutex qcheck_lock; +#endif + +#ifdef QUOTADEBUG +static void +xfs_qm_dquot_list_print( + struct xfs_mount *mp) +{ + xfs_dquot_t *dqp; + int i = 0; + + list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) { + xfs_debug(mp, " %d. \"%d (%s)\" " + "bcnt = %lld, icnt = %lld, refs = %d", + i++, be32_to_cpu(dqp->q_core.d_id), + DQFLAGTO_TYPESTR(dqp), + (long long)be64_to_cpu(dqp->q_core.d_bcount), + (long long)be64_to_cpu(dqp->q_core.d_icount), + dqp->q_nrefs); + } +} +#else +static void xfs_qm_dquot_list_print(struct xfs_mount *mp) { } +#endif + /* * Initialize the XQM structure. * Note that there is not one quota manager per file system. @@ -139,6 +165,9 @@ xfs_Gqm_init(void) atomic_set(&xqm->qm_totaldquots, 0); xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO; xqm->qm_nrefs = 0; +#ifdef DEBUG + mutex_init(&qcheck_lock); +#endif return xqm; out_free_udqhash: @@ -175,6 +204,9 @@ xfs_qm_destroy( mutex_lock(&xqm->qm_dqfrlist_lock); list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { xfs_dqlock(dqp); +#ifdef QUOTADEBUG + xfs_debug(dqp->q_mount, "FREELIST destroy 0x%p", dqp); +#endif list_del_init(&dqp->q_freelist); xfs_Gqm->qm_dqfrlist_cnt--; xfs_dqunlock(dqp); @@ -182,6 +214,9 @@ xfs_qm_destroy( } mutex_unlock(&xqm->qm_dqfrlist_lock); mutex_destroy(&xqm->qm_dqfrlist_lock); +#ifdef DEBUG + mutex_destroy(&qcheck_lock); +#endif kmem_free(xqm); } @@ -374,6 +409,11 @@ xfs_qm_mount_quotas( xfs_warn(mp, "Failed to initialize disk quotas."); return; } + +#ifdef QUOTADEBUG + if (XFS_IS_QUOTA_ON(mp)) + xfs_qm_internalqcheck(mp); +#endif } /* @@ -826,8 +866,8 @@ xfs_qm_dqattach_locked( } done: -#ifdef DEBUG - if (!error) { +#ifdef QUOTADEBUG + if (! error) { if (XFS_IS_UQUOTA_ON(mp)) ASSERT(ip->i_udquot); if (XFS_IS_OQUOTA_ON(mp)) @@ -1693,6 +1733,8 @@ xfs_qm_quotacheck( mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); mp->m_qflags |= flags; + xfs_qm_dquot_list_print(mp); + error_return: if (error) { xfs_warn(mp, @@ -2054,6 +2096,9 @@ xfs_qm_write_sb_changes( xfs_trans_t *tp; int error; +#ifdef QUOTADEBUG + xfs_notice(mp, "Writing superblock quota changes"); +#endif tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, diff --git a/trunk/fs/xfs/quota/xfs_qm.h b/trunk/fs/xfs/quota/xfs_qm.h index 43b9abe1052c..567b29b9f1b3 100644 --- a/trunk/fs/xfs/quota/xfs_qm.h +++ b/trunk/fs/xfs/quota/xfs_qm.h @@ -163,4 +163,10 @@ extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); +#ifdef DEBUG +extern int xfs_qm_internalqcheck(xfs_mount_t *); +#else +#define xfs_qm_internalqcheck(mp) (0) +#endif + #endif /* __XFS_QM_H__ */ diff --git a/trunk/fs/xfs/quota/xfs_qm_syscalls.c b/trunk/fs/xfs/quota/xfs_qm_syscalls.c index 609246f42e6c..2dadb15d5ca9 100644 --- a/trunk/fs/xfs/quota/xfs_qm_syscalls.c +++ b/trunk/fs/xfs/quota/xfs_qm_syscalls.c @@ -263,7 +263,7 @@ xfs_qm_scall_trunc_qfile( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip); - error = xfs_itruncate_data(&tp, ip, 0); + error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1); if (error) { xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); @@ -622,6 +622,7 @@ xfs_qm_scall_setqlim( xfs_trans_log_dquot(tp, dqp); error = xfs_trans_commit(tp, 0); + xfs_qm_dqprint(dqp); xfs_qm_dqrele(dqp); out_unlock: @@ -656,6 +657,7 @@ xfs_qm_scall_getquota( xfs_qm_dqput(dqp); return XFS_ERROR(ENOENT); } + /* xfs_qm_dqprint(dqp); */ /* * Convert the disk dquot to the exportable format */ @@ -904,3 +906,354 @@ xfs_qm_dqrele_all_inodes( ASSERT(mp->m_quotainfo); xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags); } + +/*------------------------------------------------------------------------*/ +#ifdef DEBUG +/* + * This contains all the test functions for XFS disk quotas. + * Currently it does a quota accounting check. ie. it walks through + * all inodes in the file system, calculating the dquot accounting fields, + * and prints out any inconsistencies. + */ +xfs_dqhash_t *qmtest_udqtab; +xfs_dqhash_t *qmtest_gdqtab; +int qmtest_hashmask; +int qmtest_nfails; +struct mutex qcheck_lock; + +#define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \ + (__psunsigned_t)(id)) & \ + (qmtest_hashmask - 1)) + +#define DQTEST_HASH(mp, id, type) ((type & XFS_DQ_USER) ? \ + (qmtest_udqtab + \ + DQTEST_HASHVAL(mp, id)) : \ + (qmtest_gdqtab + \ + DQTEST_HASHVAL(mp, id))) + +#define DQTEST_LIST_PRINT(l, NXT, title) \ +{ \ + xfs_dqtest_t *dqp; int i = 0;\ + xfs_debug(NULL, "%s (#%d)", title, (int) (l)->qh_nelems); \ + for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \ + dqp = (xfs_dqtest_t *)dqp->NXT) { \ + xfs_debug(dqp->q_mount, \ + " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \ + ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \ + dqp->d_bcount, dqp->d_icount); } \ +} + +typedef struct dqtest { + uint dq_flags; /* various flags (XFS_DQ_*) */ + struct list_head q_hashlist; + xfs_dqhash_t *q_hash; /* the hashchain header */ + xfs_mount_t *q_mount; /* filesystem this relates to */ + xfs_dqid_t d_id; /* user id or group id */ + xfs_qcnt_t d_bcount; /* # disk blocks owned by the user */ + xfs_qcnt_t d_icount; /* # inodes owned by the user */ +} xfs_dqtest_t; + +STATIC void +xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp) +{ + list_add(&dqp->q_hashlist, &h->qh_list); + h->qh_version++; + h->qh_nelems++; +} +STATIC void +xfs_qm_dqtest_print( + struct xfs_mount *mp, + struct dqtest *d) +{ + xfs_debug(mp, "-----------DQTEST DQUOT----------------"); + xfs_debug(mp, "---- dquot ID = %d", d->d_id); + xfs_debug(mp, "---- fs = 0x%p", d->q_mount); + xfs_debug(mp, "---- bcount = %Lu (0x%x)", + d->d_bcount, (int)d->d_bcount); + xfs_debug(mp, "---- icount = %Lu (0x%x)", + d->d_icount, (int)d->d_icount); + xfs_debug(mp, "---------------------------"); +} + +STATIC void +xfs_qm_dqtest_failed( + xfs_dqtest_t *d, + xfs_dquot_t *dqp, + char *reason, + xfs_qcnt_t a, + xfs_qcnt_t b, + int error) +{ + qmtest_nfails++; + if (error) + xfs_debug(dqp->q_mount, + "quotacheck failed id=%d, err=%d\nreason: %s", + d->d_id, error, reason); + else + xfs_debug(dqp->q_mount, + "quotacheck failed id=%d (%s) [%d != %d]", + d->d_id, reason, (int)a, (int)b); + xfs_qm_dqtest_print(dqp->q_mount, d); + if (dqp) + xfs_qm_dqprint(dqp); +} + +STATIC int +xfs_dqtest_cmp2( + xfs_dqtest_t *d, + xfs_dquot_t *dqp) +{ + int err = 0; + if (be64_to_cpu(dqp->q_core.d_icount) != d->d_icount) { + xfs_qm_dqtest_failed(d, dqp, "icount mismatch", + be64_to_cpu(dqp->q_core.d_icount), + d->d_icount, 0); + err++; + } + if (be64_to_cpu(dqp->q_core.d_bcount) != d->d_bcount) { + xfs_qm_dqtest_failed(d, dqp, "bcount mismatch", + be64_to_cpu(dqp->q_core.d_bcount), + d->d_bcount, 0); + err++; + } + if (dqp->q_core.d_blk_softlimit && + be64_to_cpu(dqp->q_core.d_bcount) >= + be64_to_cpu(dqp->q_core.d_blk_softlimit)) { + if (!dqp->q_core.d_btimer && dqp->q_core.d_id) { + xfs_debug(dqp->q_mount, + "%d [%s] BLK TIMER NOT STARTED", + d->d_id, DQFLAGTO_TYPESTR(d)); + err++; + } + } + if (dqp->q_core.d_ino_softlimit && + be64_to_cpu(dqp->q_core.d_icount) >= + be64_to_cpu(dqp->q_core.d_ino_softlimit)) { + if (!dqp->q_core.d_itimer && dqp->q_core.d_id) { + xfs_debug(dqp->q_mount, + "%d [%s] INO TIMER NOT STARTED", + d->d_id, DQFLAGTO_TYPESTR(d)); + err++; + } + } +#ifdef QUOTADEBUG + if (!err) { + xfs_debug(dqp->q_mount, "%d [%s] qchecked", + d->d_id, DQFLAGTO_TYPESTR(d)); + } +#endif + return (err); +} + +STATIC void +xfs_dqtest_cmp( + xfs_dqtest_t *d) +{ + xfs_dquot_t *dqp; + int error; + + /* xfs_qm_dqtest_print(d); */ + if ((error = xfs_qm_dqget(d->q_mount, NULL, d->d_id, d->dq_flags, 0, + &dqp))) { + xfs_qm_dqtest_failed(d, NULL, "dqget failed", 0, 0, error); + return; + } + xfs_dqtest_cmp2(d, dqp); + xfs_qm_dqput(dqp); +} + +STATIC int +xfs_qm_internalqcheck_dqget( + xfs_mount_t *mp, + xfs_dqid_t id, + uint type, + xfs_dqtest_t **O_dq) +{ + xfs_dqtest_t *d; + xfs_dqhash_t *h; + + h = DQTEST_HASH(mp, id, type); + list_for_each_entry(d, &h->qh_list, q_hashlist) { + if (d->d_id == id && mp == d->q_mount) { + *O_dq = d; + return (0); + } + } + d = kmem_zalloc(sizeof(xfs_dqtest_t), KM_SLEEP); + d->dq_flags = type; + d->d_id = id; + d->q_mount = mp; + d->q_hash = h; + INIT_LIST_HEAD(&d->q_hashlist); + xfs_qm_hashinsert(h, d); + *O_dq = d; + return (0); +} + +STATIC void +xfs_qm_internalqcheck_get_dquots( + xfs_mount_t *mp, + xfs_dqid_t uid, + xfs_dqid_t projid, + xfs_dqid_t gid, + xfs_dqtest_t **ud, + xfs_dqtest_t **gd) +{ + if (XFS_IS_UQUOTA_ON(mp)) + xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud); + if (XFS_IS_GQUOTA_ON(mp)) + xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd); + else if (XFS_IS_PQUOTA_ON(mp)) + xfs_qm_internalqcheck_dqget(mp, projid, XFS_DQ_PROJ, gd); +} + + +STATIC void +xfs_qm_internalqcheck_dqadjust( + xfs_inode_t *ip, + xfs_dqtest_t *d) +{ + d->d_icount++; + d->d_bcount += (xfs_qcnt_t)ip->i_d.di_nblocks; +} + +STATIC int +xfs_qm_internalqcheck_adjust( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode number to get data for */ + void __user *buffer, /* not used */ + int ubsize, /* not used */ + int *ubused, /* not used */ + int *res) /* bulkstat result code */ +{ + xfs_inode_t *ip; + xfs_dqtest_t *ud, *gd; + uint lock_flags; + boolean_t ipreleased; + int error; + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { + *res = BULKSTAT_RV_NOTHING; + xfs_debug(mp, "%s: ino=%llu, uqino=%llu, gqino=%llu\n", + __func__, (unsigned long long) ino, + (unsigned long long) mp->m_sb.sb_uquotino, + (unsigned long long) mp->m_sb.sb_gquotino); + return XFS_ERROR(EINVAL); + } + ipreleased = B_FALSE; + again: + lock_flags = XFS_ILOCK_SHARED; + if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) { + *res = BULKSTAT_RV_NOTHING; + return (error); + } + + /* + * This inode can have blocks after eof which can get released + * when we send it to inactive. Since we don't check the dquot + * until the after all our calculations are done, we must get rid + * of those now. + */ + if (! ipreleased) { + xfs_iunlock(ip, lock_flags); + IRELE(ip); + ipreleased = B_TRUE; + goto again; + } + xfs_qm_internalqcheck_get_dquots(mp, + (xfs_dqid_t) ip->i_d.di_uid, + (xfs_dqid_t) xfs_get_projid(ip), + (xfs_dqid_t) ip->i_d.di_gid, + &ud, &gd); + if (XFS_IS_UQUOTA_ON(mp)) { + ASSERT(ud); + xfs_qm_internalqcheck_dqadjust(ip, ud); + } + if (XFS_IS_OQUOTA_ON(mp)) { + ASSERT(gd); + xfs_qm_internalqcheck_dqadjust(ip, gd); + } + xfs_iunlock(ip, lock_flags); + IRELE(ip); + *res = BULKSTAT_RV_DIDONE; + return (0); +} + + +/* PRIVATE, debugging */ +int +xfs_qm_internalqcheck( + xfs_mount_t *mp) +{ + xfs_ino_t lastino; + int done, count; + int i; + int error; + + lastino = 0; + qmtest_hashmask = 32; + count = 5; + done = 0; + qmtest_nfails = 0; + + if (! XFS_IS_QUOTA_ON(mp)) + return XFS_ERROR(ESRCH); + + xfs_log_force(mp, XFS_LOG_SYNC); + XFS_bflush(mp->m_ddev_targp); + xfs_log_force(mp, XFS_LOG_SYNC); + XFS_bflush(mp->m_ddev_targp); + + mutex_lock(&qcheck_lock); + /* There should be absolutely no quota activity while this + is going on. */ + qmtest_udqtab = kmem_zalloc(qmtest_hashmask * + sizeof(xfs_dqhash_t), KM_SLEEP); + qmtest_gdqtab = kmem_zalloc(qmtest_hashmask * + sizeof(xfs_dqhash_t), KM_SLEEP); + do { + /* + * Iterate thru all the inodes in the file system, + * adjusting the corresponding dquot counters + */ + error = xfs_bulkstat(mp, &lastino, &count, + xfs_qm_internalqcheck_adjust, + 0, NULL, &done); + if (error) { + xfs_debug(mp, "Bulkstat returned error 0x%x", error); + break; + } + } while (!done); + + xfs_debug(mp, "Checking results against system dquots"); + for (i = 0; i < qmtest_hashmask; i++) { + xfs_dqtest_t *d, *n; + xfs_dqhash_t *h; + + h = &qmtest_udqtab[i]; + list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) { + xfs_dqtest_cmp(d); + kmem_free(d); + } + h = &qmtest_gdqtab[i]; + list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) { + xfs_dqtest_cmp(d); + kmem_free(d); + } + } + + if (qmtest_nfails) { + xfs_debug(mp, "******** quotacheck failed ********"); + xfs_debug(mp, "failures = %d", qmtest_nfails); + } else { + xfs_debug(mp, "******** quotacheck successful! ********"); + } + kmem_free(qmtest_udqtab); + kmem_free(qmtest_gdqtab); + mutex_unlock(&qcheck_lock); + return (qmtest_nfails); +} + +#endif /* DEBUG */ diff --git a/trunk/fs/xfs/quota/xfs_trans_dquot.c b/trunk/fs/xfs/quota/xfs_trans_dquot.c index 4d00ee67792d..2a3648731331 100644 --- a/trunk/fs/xfs/quota/xfs_trans_dquot.c +++ b/trunk/fs/xfs/quota/xfs_trans_dquot.c @@ -59,7 +59,7 @@ xfs_trans_dqjoin( xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); /* - * Initialize d_transp so we can later determine if this dquot is + * Initialize i_transp so we can later determine if this dquot is * associated with this transaction. */ dqp->q_transp = tp; @@ -387,18 +387,18 @@ xfs_trans_apply_dquot_deltas( qtrx->qt_delbcnt_delta; totalrtbdelta = qtrx->qt_rtbcount_delta + qtrx->qt_delrtb_delta; -#ifdef DEBUG +#ifdef QUOTADEBUG if (totalbdelta < 0) ASSERT(be64_to_cpu(d->d_bcount) >= - -totalbdelta); + (xfs_qcnt_t) -totalbdelta); if (totalrtbdelta < 0) ASSERT(be64_to_cpu(d->d_rtbcount) >= - -totalrtbdelta); + (xfs_qcnt_t) -totalrtbdelta); if (qtrx->qt_icount_delta < 0) ASSERT(be64_to_cpu(d->d_icount) >= - -qtrx->qt_icount_delta); + (xfs_qcnt_t) -qtrx->qt_icount_delta); #endif if (totalbdelta) be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta); @@ -642,6 +642,11 @@ xfs_trans_dqresv( ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { +#ifdef QUOTADEBUG + xfs_debug(mp, + "BLK Res: nblks=%ld + resbcount=%Ld > hardlimit=%Ld?", + nblks, *resbcountp, hardlimit); +#endif if (nblks > 0) { /* * dquot is locked already. See if we'd go over the diff --git a/trunk/fs/xfs/xfs.h b/trunk/fs/xfs/xfs.h index 53ec3ea9a625..5ad8ad3a1dcd 100644 --- a/trunk/fs/xfs/xfs.h +++ b/trunk/fs/xfs/xfs.h @@ -22,6 +22,7 @@ #define STATIC #define DEBUG 1 #define XFS_BUF_LOCK_TRACKING 1 +/* #define QUOTADEBUG 1 */ #endif #include diff --git a/trunk/fs/xfs/xfs_alloc.c b/trunk/fs/xfs/xfs_alloc.c index 1e00b3ef6274..95862bbff56b 100644 --- a/trunk/fs/xfs/xfs_alloc.c +++ b/trunk/fs/xfs/xfs_alloc.c @@ -570,7 +570,9 @@ xfs_alloc_ag_vextent_exact( xfs_agblock_t tbno; /* start block of trimmed extent */ xfs_extlen_t tlen; /* length of trimmed extent */ xfs_agblock_t tend; /* end block of trimmed extent */ + xfs_agblock_t end; /* end of allocated extent */ int i; /* success/failure of operation */ + xfs_extlen_t rlen; /* length of returned extent */ ASSERT(args->alignment == 1); @@ -623,16 +625,18 @@ xfs_alloc_ag_vextent_exact( * * Fix the length according to mod and prod if given. */ - args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) - - args->agbno; + end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen); + args->len = end - args->agbno; xfs_alloc_fix_len(args); if (!xfs_alloc_fix_minleft(args)) goto not_found; - ASSERT(args->agbno + args->len <= tend); + rlen = args->len; + ASSERT(args->agbno + rlen <= tend); + end = args->agbno + rlen; /* - * We are allocating agbno for args->len + * We are allocating agbno for rlen [agbno .. end] * Allocate/initialize a cursor for the by-size btree. */ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, @@ -2123,7 +2127,7 @@ xfs_read_agf( * Validate the magic number of the agf block. */ agf_ok = - agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && + be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC && XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && diff --git a/trunk/fs/xfs/xfs_alloc_btree.c b/trunk/fs/xfs/xfs_alloc_btree.c index ffb3386e45c1..2b3518826a69 100644 --- a/trunk/fs/xfs/xfs_alloc_btree.c +++ b/trunk/fs/xfs/xfs_alloc_btree.c @@ -31,6 +31,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -310,6 +311,72 @@ xfs_allocbt_recs_inorder( } #endif /* DEBUG */ +#ifdef XFS_BTREE_TRACE +ktrace_t *xfs_allocbt_trace_buf; + +STATIC void +xfs_allocbt_trace_enter( + struct xfs_btree_cur *cur, + const char *func, + char *s, + int type, + int line, + __psunsigned_t a0, + __psunsigned_t a1, + __psunsigned_t a2, + __psunsigned_t a3, + __psunsigned_t a4, + __psunsigned_t a5, + __psunsigned_t a6, + __psunsigned_t a7, + __psunsigned_t a8, + __psunsigned_t a9, + __psunsigned_t a10) +{ + ktrace_enter(xfs_allocbt_trace_buf, (void *)(__psint_t)type, + (void *)func, (void *)s, NULL, (void *)cur, + (void *)a0, (void *)a1, (void *)a2, (void *)a3, + (void *)a4, (void *)a5, (void *)a6, (void *)a7, + (void *)a8, (void *)a9, (void *)a10); +} + +STATIC void +xfs_allocbt_trace_cursor( + struct xfs_btree_cur *cur, + __uint32_t *s0, + __uint64_t *l0, + __uint64_t *l1) +{ + *s0 = cur->bc_private.a.agno; + *l0 = cur->bc_rec.a.ar_startblock; + *l1 = cur->bc_rec.a.ar_blockcount; +} + +STATIC void +xfs_allocbt_trace_key( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + __uint64_t *l0, + __uint64_t *l1) +{ + *l0 = be32_to_cpu(key->alloc.ar_startblock); + *l1 = be32_to_cpu(key->alloc.ar_blockcount); +} + +STATIC void +xfs_allocbt_trace_record( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + __uint64_t *l0, + __uint64_t *l1, + __uint64_t *l2) +{ + *l0 = be32_to_cpu(rec->alloc.ar_startblock); + *l1 = be32_to_cpu(rec->alloc.ar_blockcount); + *l2 = 0; +} +#endif /* XFS_BTREE_TRACE */ + static const struct xfs_btree_ops xfs_allocbt_ops = { .rec_len = sizeof(xfs_alloc_rec_t), .key_len = sizeof(xfs_alloc_key_t), @@ -326,10 +393,18 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, .key_diff = xfs_allocbt_key_diff, + #ifdef DEBUG .keys_inorder = xfs_allocbt_keys_inorder, .recs_inorder = xfs_allocbt_recs_inorder, #endif + +#ifdef XFS_BTREE_TRACE + .trace_enter = xfs_allocbt_trace_enter, + .trace_cursor = xfs_allocbt_trace_cursor, + .trace_key = xfs_allocbt_trace_key, + .trace_record = xfs_allocbt_trace_record, +#endif }; /* @@ -352,16 +427,13 @@ xfs_allocbt_init_cursor( cur->bc_tp = tp; cur->bc_mp = mp; + cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]); cur->bc_btnum = btnum; cur->bc_blocklog = mp->m_sb.sb_blocklog; - cur->bc_ops = &xfs_allocbt_ops; - if (btnum == XFS_BTNUM_CNT) { - cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); + cur->bc_ops = &xfs_allocbt_ops; + if (btnum == XFS_BTNUM_CNT) cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; - } else { - cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); - } cur->bc_private.a.agbp = agbp; cur->bc_private.a.agno = agno; diff --git a/trunk/fs/xfs/xfs_arch.h b/trunk/fs/xfs/xfs_arch.h new file mode 100644 index 000000000000..0902249354a0 --- /dev/null +++ b/trunk/fs/xfs/xfs_arch.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_ARCH_H__ +#define __XFS_ARCH_H__ + +#ifndef XFS_BIG_INUMS +# error XFS_BIG_INUMS must be defined true or false +#endif + +#ifdef __KERNEL__ + +#include + +#ifdef __BIG_ENDIAN +#define XFS_NATIVE_HOST 1 +#else +#undef XFS_NATIVE_HOST +#endif + +#else /* __KERNEL__ */ + +#if __BYTE_ORDER == __BIG_ENDIAN +#define XFS_NATIVE_HOST 1 +#else +#undef XFS_NATIVE_HOST +#endif + +#ifdef XFS_NATIVE_HOST +#define cpu_to_be16(val) ((__force __be16)(__u16)(val)) +#define cpu_to_be32(val) ((__force __be32)(__u32)(val)) +#define cpu_to_be64(val) ((__force __be64)(__u64)(val)) +#define be16_to_cpu(val) ((__force __u16)(__be16)(val)) +#define be32_to_cpu(val) ((__force __u32)(__be32)(val)) +#define be64_to_cpu(val) ((__force __u64)(__be64)(val)) +#else +#define cpu_to_be16(val) ((__force __be16)__swab16((__u16)(val))) +#define cpu_to_be32(val) ((__force __be32)__swab32((__u32)(val))) +#define cpu_to_be64(val) ((__force __be64)__swab64((__u64)(val))) +#define be16_to_cpu(val) (__swab16((__force __u16)(__be16)(val))) +#define be32_to_cpu(val) (__swab32((__force __u32)(__be32)(val))) +#define be64_to_cpu(val) (__swab64((__force __u64)(__be64)(val))) +#endif + +static inline void be16_add_cpu(__be16 *a, __s16 b) +{ + *a = cpu_to_be16(be16_to_cpu(*a) + b); +} + +static inline void be32_add_cpu(__be32 *a, __s32 b) +{ + *a = cpu_to_be32(be32_to_cpu(*a) + b); +} + +static inline void be64_add_cpu(__be64 *a, __s64 b) +{ + *a = cpu_to_be64(be64_to_cpu(*a) + b); +} + +#endif /* __KERNEL__ */ + +/* + * get and set integers from potentially unaligned locations + */ + +#define INT_GET_UNALIGNED_16_BE(pointer) \ + ((__u16)((((__u8*)(pointer))[0] << 8) | (((__u8*)(pointer))[1]))) +#define INT_SET_UNALIGNED_16_BE(pointer,value) \ + { \ + ((__u8*)(pointer))[0] = (((value) >> 8) & 0xff); \ + ((__u8*)(pointer))[1] = (((value) ) & 0xff); \ + } + +/* + * In directories inode numbers are stored as unaligned arrays of unsigned + * 8bit integers on disk. + * + * For v1 directories or v2 directories that contain inode numbers that + * do not fit into 32bit the array has eight members, but the first member + * is always zero: + * + * |unused|48-55|40-47|32-39|24-31|16-23| 8-15| 0- 7| + * + * For v2 directories that only contain entries with inode numbers that fit + * into 32bits a four-member array is used: + * + * |24-31|16-23| 8-15| 0- 7| + */ + +#define XFS_GET_DIR_INO4(di) \ + (((__u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3])) + +#define XFS_PUT_DIR_INO4(from, di) \ +do { \ + (di).i[0] = (((from) & 0xff000000ULL) >> 24); \ + (di).i[1] = (((from) & 0x00ff0000ULL) >> 16); \ + (di).i[2] = (((from) & 0x0000ff00ULL) >> 8); \ + (di).i[3] = ((from) & 0x000000ffULL); \ +} while (0) + +#define XFS_DI_HI(di) \ + (((__u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3])) +#define XFS_DI_LO(di) \ + (((__u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7])) + +#define XFS_GET_DIR_INO8(di) \ + (((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \ + ((xfs_ino_t)XFS_DI_HI(di) << 32)) + +#define XFS_PUT_DIR_INO8(from, di) \ +do { \ + (di).i[0] = 0; \ + (di).i[1] = (((from) & 0x00ff000000000000ULL) >> 48); \ + (di).i[2] = (((from) & 0x0000ff0000000000ULL) >> 40); \ + (di).i[3] = (((from) & 0x000000ff00000000ULL) >> 32); \ + (di).i[4] = (((from) & 0x00000000ff000000ULL) >> 24); \ + (di).i[5] = (((from) & 0x0000000000ff0000ULL) >> 16); \ + (di).i[6] = (((from) & 0x000000000000ff00ULL) >> 8); \ + (di).i[7] = ((from) & 0x00000000000000ffULL); \ +} while (0) + +#endif /* __XFS_ARCH_H__ */ diff --git a/trunk/fs/xfs/xfs_attr.c b/trunk/fs/xfs/xfs_attr.c index cbae424fe1ba..01d2072fb6d4 100644 --- a/trunk/fs/xfs/xfs_attr.c +++ b/trunk/fs/xfs/xfs_attr.c @@ -822,21 +822,17 @@ xfs_attr_inactive(xfs_inode_t *dp) error = xfs_attr_root_inactive(&trans, dp); if (error) goto out; - /* - * Signal synchronous inactive transactions unless this is a - * synchronous mount filesystem in which case we know that we're here - * because we've been called out of xfs_inactive which means that the - * last reference is gone and the unlink transaction has already hit - * the disk so async inactive transactions are safe. + * signal synchronous inactive transactions unless this + * is a synchronous mount filesystem in which case we + * know that we're here because we've been called out of + * xfs_inactive which means that the last reference is gone + * and the unlink transaction has already hit the disk so + * async inactive transactions are safe. */ - if (!(mp->m_flags & XFS_MOUNT_WSYNC)) { - if (dp->i_d.di_anextents > 0) - xfs_trans_set_sync(trans); - } - - error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); - if (error) + if ((error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK, + (!(mp->m_flags & XFS_MOUNT_WSYNC) + ? 1 : 0)))) goto out; /* @@ -1203,7 +1199,7 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context) return XFS_ERROR(error); ASSERT(bp != NULL); leaf = bp->data; - if (unlikely(leaf->hdr.info.magic != cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) { + if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) { XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW, context->dp->i_mount, leaf); xfs_da_brelse(NULL, bp); @@ -1610,8 +1606,9 @@ xfs_attr_node_removename(xfs_da_args_t *args) XFS_ATTR_FORK); if (error) goto out; - ASSERT((((xfs_attr_leafblock_t *)bp->data)->hdr.info.magic) == - cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *) + bp->data)->hdr.info.magic) + == XFS_ATTR_LEAF_MAGIC); if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { xfs_bmap_init(args->flist, args->firstblock); @@ -1876,11 +1873,11 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) return(XFS_ERROR(EFSCORRUPTED)); } node = bp->data; - if (node->hdr.info.magic == - cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) + if (be16_to_cpu(node->hdr.info.magic) + == XFS_ATTR_LEAF_MAGIC) break; - if (unlikely(node->hdr.info.magic != - cpu_to_be16(XFS_DA_NODE_MAGIC))) { + if (unlikely(be16_to_cpu(node->hdr.info.magic) + != XFS_DA_NODE_MAGIC)) { XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)", XFS_ERRLEVEL_LOW, context->dp->i_mount, @@ -1915,8 +1912,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) */ for (;;) { leaf = bp->data; - if (unlikely(leaf->hdr.info.magic != - cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) { + if (unlikely(be16_to_cpu(leaf->hdr.info.magic) + != XFS_ATTR_LEAF_MAGIC)) { XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)", XFS_ERRLEVEL_LOW, context->dp->i_mount, leaf); diff --git a/trunk/fs/xfs/xfs_attr_leaf.c b/trunk/fs/xfs/xfs_attr_leaf.c index 8fad9602542b..71e90dc2aeb1 100644 --- a/trunk/fs/xfs/xfs_attr_leaf.c +++ b/trunk/fs/xfs/xfs_attr_leaf.c @@ -731,7 +731,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp) int bytes, i; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); entry = &leaf->entries[0]; bytes = sizeof(struct xfs_attr_sf_hdr); @@ -777,7 +777,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) ASSERT(bp != NULL); memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount)); leaf = (xfs_attr_leafblock_t *)tmpbuffer; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); memset(bp->data, 0, XFS_LBSIZE(dp->i_mount)); /* @@ -872,7 +872,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args) goto out; node = bp1->data; leaf = bp2->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); /* both on-disk, don't endian-flip twice */ node->btree[0].hashval = leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval; @@ -997,7 +997,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args) int tablesize, entsize, sum, tmp, i; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(leaf->hdr.count))); hdr = &leaf->hdr; @@ -1070,7 +1070,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) int tmp, i; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); hdr = &leaf->hdr; ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE)); ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(hdr->count))); @@ -1256,8 +1256,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); leaf1 = blk1->bp->data; leaf2 = blk2->bp->data; - ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); - ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); args = state->args; /* @@ -1533,7 +1533,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) */ blk = &state->path.blk[ state->path.active-1 ]; info = blk->bp->data; - ASSERT(info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC); leaf = (xfs_attr_leafblock_t *)info; count = be16_to_cpu(leaf->hdr.count); bytes = sizeof(xfs_attr_leaf_hdr_t) + @@ -1596,7 +1596,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) bytes = state->blocksize - (state->blocksize>>2); bytes -= be16_to_cpu(leaf->hdr.usedbytes); leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); count += be16_to_cpu(leaf->hdr.count); bytes -= be16_to_cpu(leaf->hdr.usedbytes); bytes -= count * sizeof(xfs_attr_leaf_entry_t); @@ -1650,7 +1650,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) xfs_mount_t *mp; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); hdr = &leaf->hdr; mp = args->trans->t_mountp; ASSERT((be16_to_cpu(hdr->count) > 0) @@ -1813,8 +1813,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC); drop_leaf = drop_blk->bp->data; save_leaf = save_blk->bp->data; - ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); - ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); drop_hdr = &drop_leaf->hdr; save_hdr = &save_leaf->hdr; @@ -1915,7 +1915,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) xfs_dahash_t hashval; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); ASSERT(be16_to_cpu(leaf->hdr.count) < (XFS_LBSIZE(args->dp->i_mount)/8)); @@ -2019,7 +2019,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args) xfs_attr_leaf_name_remote_t *name_rmt; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); ASSERT(be16_to_cpu(leaf->hdr.count) < (XFS_LBSIZE(args->dp->i_mount)/8)); ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); @@ -2087,8 +2087,8 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, /* * Set up environment. */ - ASSERT(leaf_s->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); - ASSERT(leaf_d->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); hdr_s = &leaf_s->hdr; hdr_d = &leaf_d->hdr; ASSERT((be16_to_cpu(hdr_s->count) > 0) && @@ -2222,8 +2222,8 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp) leaf1 = leaf1_bp->data; leaf2 = leaf2_bp->data; - ASSERT((leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) && - (leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC))); + ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC) && + (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC)); if ((be16_to_cpu(leaf1->hdr.count) > 0) && (be16_to_cpu(leaf2->hdr.count) > 0) && ((be32_to_cpu(leaf2->entries[0].hashval) < @@ -2246,7 +2246,7 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count) xfs_attr_leafblock_t *leaf; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); if (count) *count = be16_to_cpu(leaf->hdr.count); if (!leaf->hdr.count) @@ -2265,7 +2265,7 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) xfs_attr_leaf_name_remote_t *name_rmt; int size; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); if (leaf->entries[index].flags & XFS_ATTR_LOCAL) { name_loc = xfs_attr_leaf_name_local(leaf, index); size = xfs_attr_leaf_entsize_local(name_loc->namelen, @@ -2451,7 +2451,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) ASSERT(bp != NULL); leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); ASSERT(args->index >= 0); entry = &leaf->entries[ args->index ]; @@ -2515,7 +2515,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args) ASSERT(bp != NULL); leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); ASSERT(args->index >= 0); entry = &leaf->entries[ args->index ]; @@ -2585,13 +2585,13 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) } leaf1 = bp1->data; - ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); ASSERT(args->index < be16_to_cpu(leaf1->hdr.count)); ASSERT(args->index >= 0); entry1 = &leaf1->entries[ args->index ]; leaf2 = bp2->data; - ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count)); ASSERT(args->index2 >= 0); entry2 = &leaf2->entries[ args->index2 ]; @@ -2689,9 +2689,9 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) * This is a depth-first traversal! */ info = bp->data; - if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { + if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) { error = xfs_attr_node_inactive(trans, dp, bp, 1); - } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) { + } else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) { error = xfs_attr_leaf_inactive(trans, dp, bp); } else { error = XFS_ERROR(EIO); @@ -2739,7 +2739,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, } node = bp->data; - ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); parent_blkno = xfs_da_blkno(bp); /* save for re-read later */ count = be16_to_cpu(node->hdr.count); if (!count) { @@ -2773,10 +2773,10 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, * Invalidate the subtree, however we have to. */ info = child_bp->data; - if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { + if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) { error = xfs_attr_node_inactive(trans, dp, child_bp, level+1); - } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) { + } else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) { error = xfs_attr_leaf_inactive(trans, dp, child_bp); } else { @@ -2836,7 +2836,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp) int error, count, size, tmp, i; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); /* * Count the number of "remote" value extents. diff --git a/trunk/fs/xfs/xfs_bmap.c b/trunk/fs/xfs/xfs_bmap.c index c51a3f903633..e546a33214c9 100644 --- a/trunk/fs/xfs/xfs_bmap.c +++ b/trunk/fs/xfs/xfs_bmap.c @@ -29,11 +29,15 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_mount.h" #include "xfs_itable.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" #include "xfs_inode_item.h" #include "xfs_extfree_item.h" #include "xfs_alloc.h" @@ -90,7 +94,6 @@ xfs_bmap_add_attrfork_local( */ STATIC int /* error */ xfs_bmap_add_extent_delay_real( - struct xfs_trans *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ @@ -436,7 +439,6 @@ xfs_bmap_add_attrfork_local( */ STATIC int /* error */ xfs_bmap_add_extent( - struct xfs_trans *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ @@ -522,7 +524,7 @@ xfs_bmap_add_extent( if (cur) ASSERT(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL); - error = xfs_bmap_add_extent_delay_real(tp, ip, + error = xfs_bmap_add_extent_delay_real(ip, idx, &cur, new, &da_new, first, flist, &logflags); } else { @@ -559,7 +561,7 @@ xfs_bmap_add_extent( int tmp_logflags; /* partial log flag return val */ ASSERT(cur == NULL); - error = xfs_bmap_extents_to_btree(tp, ip, first, + error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first, flist, &cur, da_old > 0, &tmp_logflags, whichfork); logflags |= tmp_logflags; if (error) @@ -602,7 +604,6 @@ xfs_bmap_add_extent( */ STATIC int /* error */ xfs_bmap_add_extent_delay_real( - struct xfs_trans *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ @@ -900,7 +901,7 @@ xfs_bmap_add_extent_delay_real( } if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && ip->i_d.di_nextents > ip->i_df.if_ext_max) { - error = xfs_bmap_extents_to_btree(tp, ip, + error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first, flist, &cur, 1, &tmp_rval, XFS_DATA_FORK); rval |= tmp_rval; @@ -983,7 +984,7 @@ xfs_bmap_add_extent_delay_real( } if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && ip->i_d.di_nextents > ip->i_df.if_ext_max) { - error = xfs_bmap_extents_to_btree(tp, ip, + error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first, flist, &cur, 1, &tmp_rval, XFS_DATA_FORK); rval |= tmp_rval; @@ -1051,7 +1052,7 @@ xfs_bmap_add_extent_delay_real( } if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && ip->i_d.di_nextents > ip->i_df.if_ext_max) { - error = xfs_bmap_extents_to_btree(tp, ip, + error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first, flist, &cur, 1, &tmp_rval, XFS_DATA_FORK); rval |= tmp_rval; @@ -2870,8 +2871,8 @@ xfs_bmap_del_extent( len = del->br_blockcount; do_div(bno, mp->m_sb.sb_rextsize); do_div(len, mp->m_sb.sb_rextsize); - error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); - if (error) + if ((error = xfs_rtfree_extent(ip->i_transp, bno, + (xfs_extlen_t)len))) goto done; do_fx = 0; nblks = len * mp->m_sb.sb_rextsize; @@ -4079,7 +4080,7 @@ xfs_bmap_sanity_check( { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) || + if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC || be16_to_cpu(block->bb_level) != level || be16_to_cpu(block->bb_numrecs) == 0 || be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) @@ -4661,7 +4662,7 @@ xfs_bmapi( if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) got.br_state = XFS_EXT_UNWRITTEN; } - error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &got, + error = xfs_bmap_add_extent(ip, &lastx, &cur, &got, firstblock, flist, &tmp_logflags, whichfork); logflags |= tmp_logflags; @@ -4762,7 +4763,7 @@ xfs_bmapi( mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, mval, + error = xfs_bmap_add_extent(ip, &lastx, &cur, mval, firstblock, flist, &tmp_logflags, whichfork); logflags |= tmp_logflags; @@ -5116,7 +5117,7 @@ xfs_bunmapi( del.br_blockcount = mod; } del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &del, + error = xfs_bmap_add_extent(ip, &lastx, &cur, &del, firstblock, flist, &logflags, XFS_DATA_FORK); if (error) @@ -5174,18 +5175,18 @@ xfs_bunmapi( } prev.br_state = XFS_EXT_UNWRITTEN; lastx--; - error = xfs_bmap_add_extent(tp, ip, &lastx, - &cur, &prev, firstblock, flist, - &logflags, XFS_DATA_FORK); + error = xfs_bmap_add_extent(ip, &lastx, &cur, + &prev, firstblock, flist, &logflags, + XFS_DATA_FORK); if (error) goto error0; goto nodelete; } else { ASSERT(del.br_state == XFS_EXT_NORM); del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(tp, ip, &lastx, - &cur, &del, firstblock, flist, - &logflags, XFS_DATA_FORK); + error = xfs_bmap_add_extent(ip, &lastx, &cur, + &del, firstblock, flist, &logflags, + XFS_DATA_FORK); if (error) goto error0; goto nodelete; diff --git a/trunk/fs/xfs/xfs_bmap_btree.c b/trunk/fs/xfs/xfs_bmap_btree.c index e2f5d59cbeaf..87d3c10b6954 100644 --- a/trunk/fs/xfs/xfs_bmap_btree.c +++ b/trunk/fs/xfs/xfs_bmap_btree.c @@ -33,6 +33,7 @@ #include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_itable.h" #include "xfs_bmap.h" #include "xfs_error.h" @@ -424,10 +425,10 @@ xfs_bmbt_to_bmdr( xfs_bmbt_key_t *tkp; __be64 *tpp; - ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC)); - ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)); - ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)); - ASSERT(rblock->bb_level != 0); + ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC); + ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO); + ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO); + ASSERT(be16_to_cpu(rblock->bb_level) > 0); dblock->bb_level = rblock->bb_level; dblock->bb_numrecs = rblock->bb_numrecs; dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); @@ -731,6 +732,95 @@ xfs_bmbt_recs_inorder( } #endif /* DEBUG */ +#ifdef XFS_BTREE_TRACE +ktrace_t *xfs_bmbt_trace_buf; + +STATIC void +xfs_bmbt_trace_enter( + struct xfs_btree_cur *cur, + const char *func, + char *s, + int type, + int line, + __psunsigned_t a0, + __psunsigned_t a1, + __psunsigned_t a2, + __psunsigned_t a3, + __psunsigned_t a4, + __psunsigned_t a5, + __psunsigned_t a6, + __psunsigned_t a7, + __psunsigned_t a8, + __psunsigned_t a9, + __psunsigned_t a10) +{ + struct xfs_inode *ip = cur->bc_private.b.ip; + int whichfork = cur->bc_private.b.whichfork; + + ktrace_enter(xfs_bmbt_trace_buf, + (void *)((__psint_t)type | (whichfork << 8) | (line << 16)), + (void *)func, (void *)s, (void *)ip, (void *)cur, + (void *)a0, (void *)a1, (void *)a2, (void *)a3, + (void *)a4, (void *)a5, (void *)a6, (void *)a7, + (void *)a8, (void *)a9, (void *)a10); +} + +STATIC void +xfs_bmbt_trace_cursor( + struct xfs_btree_cur *cur, + __uint32_t *s0, + __uint64_t *l0, + __uint64_t *l1) +{ + struct xfs_bmbt_rec_host r; + + xfs_bmbt_set_all(&r, &cur->bc_rec.b); + + *s0 = (cur->bc_nlevels << 24) | + (cur->bc_private.b.flags << 16) | + cur->bc_private.b.allocated; + *l0 = r.l0; + *l1 = r.l1; +} + +STATIC void +xfs_bmbt_trace_key( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + __uint64_t *l0, + __uint64_t *l1) +{ + *l0 = be64_to_cpu(key->bmbt.br_startoff); + *l1 = 0; +} + +/* Endian flipping versions of the bmbt extraction functions */ +STATIC void +xfs_bmbt_disk_get_all( + xfs_bmbt_rec_t *r, + xfs_bmbt_irec_t *s) +{ + __xfs_bmbt_get_all(get_unaligned_be64(&r->l0), + get_unaligned_be64(&r->l1), s); +} + +STATIC void +xfs_bmbt_trace_record( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + __uint64_t *l0, + __uint64_t *l1, + __uint64_t *l2) +{ + struct xfs_bmbt_irec irec; + + xfs_bmbt_disk_get_all(&rec->bmbt, &irec); + *l0 = irec.br_startoff; + *l1 = irec.br_startblock; + *l2 = irec.br_blockcount; +} +#endif /* XFS_BTREE_TRACE */ + static const struct xfs_btree_ops xfs_bmbt_ops = { .rec_len = sizeof(xfs_bmbt_rec_t), .key_len = sizeof(xfs_bmbt_key_t), @@ -747,10 +837,18 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, .key_diff = xfs_bmbt_key_diff, + #ifdef DEBUG .keys_inorder = xfs_bmbt_keys_inorder, .recs_inorder = xfs_bmbt_recs_inorder, #endif + +#ifdef XFS_BTREE_TRACE + .trace_enter = xfs_bmbt_trace_enter, + .trace_cursor = xfs_bmbt_trace_cursor, + .trace_key = xfs_bmbt_trace_key, + .trace_record = xfs_bmbt_trace_record, +#endif }; /* diff --git a/trunk/fs/xfs/xfs_btree.c b/trunk/fs/xfs/xfs_btree.c index cabf4b5604aa..2f9e97c128a0 100644 --- a/trunk/fs/xfs/xfs_btree.c +++ b/trunk/fs/xfs/xfs_btree.c @@ -32,6 +32,7 @@ #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -65,11 +66,11 @@ xfs_btree_check_lblock( be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && block->bb_u.l.bb_leftsib && - (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || + (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO || XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) && block->bb_u.l.bb_rightsib && - (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || + (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO || XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))); if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, @@ -104,10 +105,10 @@ xfs_btree_check_sblock( be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && - (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || + (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK || be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && block->bb_u.s.bb_leftsib && - (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || + (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK || be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && block->bb_u.s.bb_rightsib; if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, @@ -510,9 +511,9 @@ xfs_btree_islastblock( block = xfs_btree_get_block(cur, level, &bp); xfs_btree_check_block(cur, block, level, bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO); + return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO; else - return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); + return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK; } /* @@ -776,14 +777,14 @@ xfs_btree_setbuf( b = XFS_BUF_TO_BLOCK(bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)) + if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; - if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)) + if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO) cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; } else { - if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) + if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK) cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; - if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) + if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK) cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; } } @@ -794,9 +795,9 @@ xfs_btree_ptr_is_null( union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return ptr->l == cpu_to_be64(NULLDFSBNO); + return be64_to_cpu(ptr->l) == NULLDFSBNO; else - return ptr->s == cpu_to_be32(NULLAGBLOCK); + return be32_to_cpu(ptr->s) == NULLAGBLOCK; } STATIC void @@ -922,12 +923,12 @@ xfs_btree_ptr_to_daddr( union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO)); + ASSERT(be64_to_cpu(ptr->l) != NULLDFSBNO); return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); } else { ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); - ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK)); + ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK); return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, be32_to_cpu(ptr->s)); diff --git a/trunk/fs/xfs/xfs_btree.h b/trunk/fs/xfs/xfs_btree.h index 8d05a6a46ce3..82fafc66bd1f 100644 --- a/trunk/fs/xfs/xfs_btree.h +++ b/trunk/fs/xfs/xfs_btree.h @@ -199,6 +199,25 @@ struct xfs_btree_ops { union xfs_btree_rec *r1, union xfs_btree_rec *r2); #endif + + /* btree tracing */ +#ifdef XFS_BTREE_TRACE + void (*trace_enter)(struct xfs_btree_cur *, const char *, + char *, int, int, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t); + void (*trace_cursor)(struct xfs_btree_cur *, __uint32_t *, + __uint64_t *, __uint64_t *); + void (*trace_key)(struct xfs_btree_cur *, + union xfs_btree_key *, __uint64_t *, + __uint64_t *); + void (*trace_record)(struct xfs_btree_cur *, + union xfs_btree_rec *, __uint64_t *, + __uint64_t *, __uint64_t *); +#endif }; /* @@ -433,23 +452,4 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) -/* - * Trace hooks. Currently not implemented as they need to be ported - * over to the generic tracing functionality, which is some effort. - * - * i,j = integer (32 bit) - * b = btree block buffer (xfs_buf_t) - * p = btree ptr - * r = btree record - * k = btree key - */ -#define XFS_BTREE_TRACE_ARGBI(c, b, i) -#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) -#define XFS_BTREE_TRACE_ARGI(c, i) -#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) -#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) -#define XFS_BTREE_TRACE_ARGIK(c, i, k) -#define XFS_BTREE_TRACE_ARGR(c, r) -#define XFS_BTREE_TRACE_CURSOR(c, t) - #endif /* __XFS_BTREE_H__ */ diff --git a/trunk/fs/xfs/xfs_btree_trace.c b/trunk/fs/xfs/xfs_btree_trace.c new file mode 100644 index 000000000000..44ff942a0fda --- /dev/null +++ b/trunk/fs/xfs/xfs_btree_trace.c @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_btree_trace.h" + +STATIC void +xfs_btree_trace_ptr( + struct xfs_btree_cur *cur, + union xfs_btree_ptr ptr, + __psunsigned_t *high, + __psunsigned_t *low) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + __u64 val = be64_to_cpu(ptr.l); + *high = val >> 32; + *low = (int)val; + } else { + *high = 0; + *low = be32_to_cpu(ptr.s); + } +} + +/* + * Add a trace buffer entry for arguments, for a buffer & 1 integer arg. + */ +void +xfs_btree_trace_argbi( + const char *func, + struct xfs_btree_cur *cur, + struct xfs_buf *b, + int i, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBI, + line, (__psunsigned_t)b, i, 0, 0, 0, 0, 0, + 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for a buffer & 2 integer args. + */ +void +xfs_btree_trace_argbii( + const char *func, + struct xfs_btree_cur *cur, + struct xfs_buf *b, + int i0, + int i1, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBII, + line, (__psunsigned_t)b, i0, i1, 0, 0, 0, 0, + 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for 3 block-length args + * and an integer arg. + */ +void +xfs_btree_trace_argfffi( + const char *func, + struct xfs_btree_cur *cur, + xfs_dfiloff_t o, + xfs_dfsbno_t b, + xfs_dfilblks_t i, + int j, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGFFFI, + line, + o >> 32, (int)o, + b >> 32, (int)b, + i >> 32, (int)i, + (int)j, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for one integer arg. + */ +void +xfs_btree_trace_argi( + const char *func, + struct xfs_btree_cur *cur, + int i, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGI, + line, i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for int, fsblock, key. + */ +void +xfs_btree_trace_argipk( + const char *func, + struct xfs_btree_cur *cur, + int i, + union xfs_btree_ptr ptr, + union xfs_btree_key *key, + int line) +{ + __psunsigned_t high, low; + __uint64_t l0, l1; + + xfs_btree_trace_ptr(cur, ptr, &high, &low); + cur->bc_ops->trace_key(cur, key, &l0, &l1); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPK, + line, i, high, low, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for int, fsblock, rec. + */ +void +xfs_btree_trace_argipr( + const char *func, + struct xfs_btree_cur *cur, + int i, + union xfs_btree_ptr ptr, + union xfs_btree_rec *rec, + int line) +{ + __psunsigned_t high, low; + __uint64_t l0, l1, l2; + + xfs_btree_trace_ptr(cur, ptr, &high, &low); + cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPR, + line, i, + high, low, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + l2 >> 32, (int)l2, + 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for int, key. + */ +void +xfs_btree_trace_argik( + const char *func, + struct xfs_btree_cur *cur, + int i, + union xfs_btree_key *key, + int line) +{ + __uint64_t l0, l1; + + cur->bc_ops->trace_key(cur, key, &l0, &l1); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIK, + line, i, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + 0, 0, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for record. + */ +void +xfs_btree_trace_argr( + const char *func, + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + int line) +{ + __uint64_t l0, l1, l2; + + cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGR, + line, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + l2 >> 32, (int)l2, + 0, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for the cursor/operation. + */ +void +xfs_btree_trace_cursor( + const char *func, + struct xfs_btree_cur *cur, + int type, + int line) +{ + __uint32_t s0; + __uint64_t l0, l1; + char *s; + + switch (type) { + case XBT_ARGS: + s = "args"; + break; + case XBT_ENTRY: + s = "entry"; + break; + case XBT_ERROR: + s = "error"; + break; + case XBT_EXIT: + s = "exit"; + break; + default: + s = "unknown"; + break; + } + + cur->bc_ops->trace_cursor(cur, &s0, &l0, &l1); + cur->bc_ops->trace_enter(cur, func, s, XFS_BTREE_KTRACE_CUR, line, + s0, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + (__psunsigned_t)cur->bc_bufs[0], + (__psunsigned_t)cur->bc_bufs[1], + (__psunsigned_t)cur->bc_bufs[2], + (__psunsigned_t)cur->bc_bufs[3], + (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1], + (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]); +} diff --git a/trunk/fs/xfs/xfs_btree_trace.h b/trunk/fs/xfs/xfs_btree_trace.h new file mode 100644 index 000000000000..2d8a309873ea --- /dev/null +++ b/trunk/fs/xfs/xfs_btree_trace.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_BTREE_TRACE_H__ +#define __XFS_BTREE_TRACE_H__ + +struct xfs_btree_cur; +struct xfs_buf; + + +/* + * Trace hooks. + * i,j = integer (32 bit) + * b = btree block buffer (xfs_buf_t) + * p = btree ptr + * r = btree record + * k = btree key + */ + +#ifdef XFS_BTREE_TRACE + +/* + * Trace buffer entry types. + */ +#define XFS_BTREE_KTRACE_ARGBI 1 +#define XFS_BTREE_KTRACE_ARGBII 2 +#define XFS_BTREE_KTRACE_ARGFFFI 3 +#define XFS_BTREE_KTRACE_ARGI 4 +#define XFS_BTREE_KTRACE_ARGIPK 5 +#define XFS_BTREE_KTRACE_ARGIPR 6 +#define XFS_BTREE_KTRACE_ARGIK 7 +#define XFS_BTREE_KTRACE_ARGR 8 +#define XFS_BTREE_KTRACE_CUR 9 + +/* + * Sub-types for cursor traces. + */ +#define XBT_ARGS 0 +#define XBT_ENTRY 1 +#define XBT_ERROR 2 +#define XBT_EXIT 3 + +void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *, + struct xfs_buf *, int, int); +void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *, + struct xfs_buf *, int, int, int); +void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int); +void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int, + union xfs_btree_ptr, union xfs_btree_key *, int); +void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int, + union xfs_btree_ptr, union xfs_btree_rec *, int); +void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int, + union xfs_btree_key *, int); +void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *, + union xfs_btree_rec *, int); +void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int); + +#define XFS_BTREE_TRACE_ARGBI(c, b, i) \ + xfs_btree_trace_argbi(__func__, c, b, i, __LINE__) +#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \ + xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__) +#define XFS_BTREE_TRACE_ARGI(c, i) \ + xfs_btree_trace_argi(__func__, c, i, __LINE__) +#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \ + xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__) +#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) \ + xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__) +#define XFS_BTREE_TRACE_ARGIK(c, i, k) \ + xfs_btree_trace_argik(__func__, c, i, k, __LINE__) +#define XFS_BTREE_TRACE_ARGR(c, r) \ + xfs_btree_trace_argr(__func__, c, r, __LINE__) +#define XFS_BTREE_TRACE_CURSOR(c, t) \ + xfs_btree_trace_cursor(__func__, c, t, __LINE__) +#else +#define XFS_BTREE_TRACE_ARGBI(c, b, i) +#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) +#define XFS_BTREE_TRACE_ARGI(c, i) +#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) +#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) +#define XFS_BTREE_TRACE_ARGIK(c, i, k) +#define XFS_BTREE_TRACE_ARGR(c, r) +#define XFS_BTREE_TRACE_CURSOR(c, t) +#endif /* XFS_BTREE_TRACE */ + +#endif /* __XFS_BTREE_TRACE_H__ */ diff --git a/trunk/fs/xfs/xfs_buf_item.c b/trunk/fs/xfs/xfs_buf_item.c index 88492916c3dc..7b7e005e3dcc 100644 --- a/trunk/fs/xfs/xfs_buf_item.c +++ b/trunk/fs/xfs/xfs_buf_item.c @@ -90,11 +90,13 @@ xfs_buf_item_flush_log_debug( uint first, uint last) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; uint nbytes; - if (bip == NULL || (bip->bli_item.li_type != XFS_LI_BUF)) + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); + if ((bip == NULL) || (bip->bli_item.li_type != XFS_LI_BUF)) { return; + } ASSERT(bip->bli_logged != NULL); nbytes = last - first + 1; @@ -406,7 +408,7 @@ xfs_buf_item_unpin( int stale = bip->bli_flags & XFS_BLI_STALE; int freed; - ASSERT(bp->b_fspriv == bip); + ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip); ASSERT(atomic_read(&bip->bli_refcount) > 0); trace_xfs_buf_item_unpin(bip); @@ -418,7 +420,7 @@ xfs_buf_item_unpin( if (freed && stale) { ASSERT(bip->bli_flags & XFS_BLI_STALE); - ASSERT(xfs_buf_islocked(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); ASSERT(XFS_BUF_ISSTALE(bp)); ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); @@ -441,7 +443,7 @@ xfs_buf_item_unpin( * Since the transaction no longer refers to the buffer, * the buffer should no longer refer to the transaction. */ - bp->b_transp = NULL; + XFS_BUF_SET_FSPRIVATE2(bp, NULL); } /* @@ -452,13 +454,13 @@ xfs_buf_item_unpin( */ if (bip->bli_flags & XFS_BLI_STALE_INODE) { xfs_buf_do_callbacks(bp); - bp->b_fspriv = NULL; - bp->b_iodone = NULL; + XFS_BUF_SET_FSPRIVATE(bp, NULL); + XFS_BUF_CLR_IODONE_FUNC(bp); } else { spin_lock(&ailp->xa_lock); xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); xfs_buf_item_relse(bp); - ASSERT(bp->b_fspriv == NULL); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); } xfs_buf_relse(bp); } @@ -481,7 +483,7 @@ xfs_buf_item_trylock( if (XFS_BUF_ISPINNED(bp)) return XFS_ITEM_PINNED; - if (!xfs_buf_trylock(bp)) + if (!XFS_BUF_CPSEMA(bp)) return XFS_ITEM_LOCKED; /* take a reference to the buffer. */ @@ -523,7 +525,7 @@ xfs_buf_item_unlock( uint hold; /* Clear the buffer's association with this transaction. */ - bp->b_transp = NULL; + XFS_BUF_SET_FSPRIVATE2(bp, NULL); /* * If this is a transaction abort, don't return early. Instead, allow @@ -682,7 +684,7 @@ xfs_buf_item_init( xfs_buf_t *bp, xfs_mount_t *mp) { - xfs_log_item_t *lip = bp->b_fspriv; + xfs_log_item_t *lip; xfs_buf_log_item_t *bip; int chunks; int map_size; @@ -694,8 +696,12 @@ xfs_buf_item_init( * nothing to do here so return. */ ASSERT(bp->b_target->bt_mount == mp); - if (lip != NULL && lip->li_type == XFS_LI_BUF) - return; + if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { + lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); + if (lip->li_type == XFS_LI_BUF) { + return; + } + } /* * chunks is the number of XFS_BLF_CHUNK size pieces @@ -734,9 +740,11 @@ xfs_buf_item_init( * Put the buf item into the list of items attached to the * buffer at the front. */ - if (bp->b_fspriv) - bip->bli_item.li_bio_list = bp->b_fspriv; - bp->b_fspriv = bip; + if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { + bip->bli_item.li_bio_list = + XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); + } + XFS_BUF_SET_FSPRIVATE(bp, bip); } @@ -868,11 +876,12 @@ xfs_buf_item_relse( trace_xfs_buf_item_relse(bp, _RET_IP_); - bip = bp->b_fspriv; - bp->b_fspriv = bip->bli_item.li_bio_list; - if (bp->b_fspriv == NULL) - bp->b_iodone = NULL; - + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); + XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list); + if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) && + (XFS_BUF_IODONE_FUNC(bp) != NULL)) { + XFS_BUF_CLR_IODONE_FUNC(bp); + } xfs_buf_rele(bp); xfs_buf_item_free(bip); } @@ -896,20 +905,20 @@ xfs_buf_attach_iodone( xfs_log_item_t *head_lip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(xfs_buf_islocked(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); lip->li_cb = cb; - head_lip = bp->b_fspriv; - if (head_lip) { + if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { + head_lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); lip->li_bio_list = head_lip->li_bio_list; head_lip->li_bio_list = lip; } else { - bp->b_fspriv = lip; + XFS_BUF_SET_FSPRIVATE(bp, lip); } - ASSERT(bp->b_iodone == NULL || - bp->b_iodone == xfs_buf_iodone_callbacks); - bp->b_iodone = xfs_buf_iodone_callbacks; + ASSERT((XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks) || + (XFS_BUF_IODONE_FUNC(bp) == NULL)); + XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); } /* @@ -930,8 +939,8 @@ xfs_buf_do_callbacks( { struct xfs_log_item *lip; - while ((lip = bp->b_fspriv) != NULL) { - bp->b_fspriv = lip->li_bio_list; + while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) { + XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list); ASSERT(lip->li_cb != NULL); /* * Clear the next pointer so we don't have any @@ -998,7 +1007,7 @@ xfs_buf_iodone_callbacks( XFS_BUF_DONE(bp); XFS_BUF_SET_START(bp); } - ASSERT(bp->b_iodone != NULL); + ASSERT(XFS_BUF_IODONE_FUNC(bp)); trace_xfs_buf_item_iodone_async(bp, _RET_IP_); xfs_buf_relse(bp); return; @@ -1017,8 +1026,8 @@ xfs_buf_iodone_callbacks( do_callbacks: xfs_buf_do_callbacks(bp); - bp->b_fspriv = NULL; - bp->b_iodone = NULL; + XFS_BUF_SET_FSPRIVATE(bp, NULL); + XFS_BUF_CLR_IODONE_FUNC(bp); xfs_buf_ioend(bp, 0); } diff --git a/trunk/fs/xfs/xfs_da_btree.c b/trunk/fs/xfs/xfs_da_btree.c index 2925726529f8..6102ac6d1dff 100644 --- a/trunk/fs/xfs/xfs_da_btree.c +++ b/trunk/fs/xfs/xfs_da_btree.c @@ -24,12 +24,11 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_dir2.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2_priv.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" @@ -37,6 +36,10 @@ #include "xfs_bmap.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" +#include "xfs_dir2_node.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -86,7 +89,7 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state, */ STATIC uint xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count); STATIC int xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp); -STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps); +STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra); STATIC int xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, xfs_da_state_blk_t *save_blk); @@ -318,11 +321,11 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, ASSERT(bp != NULL); node = bp->data; oldroot = blk1->bp->data; - if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { + if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) { size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] - (char *)oldroot); } else { - ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); leaf = (xfs_dir2_leaf_t *)oldroot; size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] - (char *)leaf); @@ -349,7 +352,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, node->hdr.count = cpu_to_be16(2); #ifdef DEBUG - if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) { + if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) { ASSERT(blk1->blkno >= mp->m_dirleafblk && blk1->blkno < mp->m_dirfreeblk); ASSERT(blk2->blkno >= mp->m_dirleafblk && @@ -381,7 +384,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, int useextra; node = oldblk->bp->data; - ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); /* * With V2 dirs the extra block is data or freespace. @@ -480,8 +483,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, node1 = node2; node2 = tmpnode; } - ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); - ASSERT(node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC); + ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC); count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2; if (count == 0) return; @@ -575,7 +578,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, int tmp; node = oldblk->bp->data; - ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); ASSERT(newblk->blkno != 0); if (state->args->whichfork == XFS_DATA_FORK) @@ -711,7 +714,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) ASSERT(args != NULL); ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); oldroot = root_blk->bp->data; - ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC); ASSERT(!oldroot->hdr.info.forw); ASSERT(!oldroot->hdr.info.back); @@ -734,10 +737,10 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) ASSERT(bp != NULL); blkinfo = bp->data; if (be16_to_cpu(oldroot->hdr.level) == 1) { - ASSERT(blkinfo->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || - blkinfo->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC || + be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC); } else { - ASSERT(blkinfo->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC); } ASSERT(!blkinfo->forw); ASSERT(!blkinfo->back); @@ -773,7 +776,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) */ blk = &state->path.blk[ state->path.active-1 ]; info = blk->bp->data; - ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC); node = (xfs_da_intnode_t *)info; count = be16_to_cpu(node->hdr.count); if (count > (state->node_ents >> 1)) { @@ -833,7 +836,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) count -= state->node_ents >> 2; count -= be16_to_cpu(node->hdr.count); node = bp->data; - ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); count -= be16_to_cpu(node->hdr.count); xfs_da_brelse(state->args->trans, bp); if (count >= 0) @@ -908,7 +911,7 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path) } for (blk--, level--; level >= 0; blk--, level--) { node = blk->bp->data; - ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); btree = &node->btree[ blk->index ]; if (be32_to_cpu(btree->hashval) == lasthash) break; @@ -976,8 +979,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, drop_node = drop_blk->bp->data; save_node = save_blk->bp->data; - ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); - ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC); tp = state->args->trans; /* @@ -1275,8 +1278,8 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp) node1 = node1_bp->data; node2 = node2_bp->data; - ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) && - node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) && + (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC)); if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) && ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) || @@ -1296,7 +1299,7 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count) xfs_da_intnode_t *node; node = bp->data; - ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); if (count) *count = be16_to_cpu(node->hdr.count); if (!node->hdr.count) @@ -1409,7 +1412,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, for (blk = &path->blk[level]; level >= 0; blk--, level--) { ASSERT(blk->bp != NULL); node = blk->bp->data; - ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) { blk->index++; blkno = be32_to_cpu(node->btree[blk->index].before); @@ -1448,9 +1451,9 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, return(error); ASSERT(blk->bp != NULL); info = blk->bp->data; - ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || - info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || - info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC || + be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC || + be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC); blk->magic = be16_to_cpu(info->magic); if (blk->magic == XFS_DA_NODE_MAGIC) { node = (xfs_da_intnode_t *)info; @@ -1543,62 +1546,79 @@ const struct xfs_nameops xfs_default_nameops = { .compname = xfs_da_compname }; +/* + * Add a block to the btree ahead of the file. + * Return the new block number to the caller. + */ int -xfs_da_grow_inode_int( - struct xfs_da_args *args, - xfs_fileoff_t *bno, - int count) +xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) { - struct xfs_trans *tp = args->trans; - struct xfs_inode *dp = args->dp; - int w = args->whichfork; - xfs_drfsbno_t nblks = dp->i_d.di_nblocks; - struct xfs_bmbt_irec map, *mapp; - int nmap, error, got, i, mapi; + xfs_fileoff_t bno, b; + xfs_bmbt_irec_t map; + xfs_bmbt_irec_t *mapp; + xfs_inode_t *dp; + int nmap, error, w, count, c, got, i, mapi; + xfs_trans_t *tp; + xfs_mount_t *mp; + xfs_drfsbno_t nblks; + dp = args->dp; + mp = dp->i_mount; + w = args->whichfork; + tp = args->trans; + nblks = dp->i_d.di_nblocks; + + /* + * For new directories adjust the file offset and block count. + */ + if (w == XFS_DATA_FORK) { + bno = mp->m_dirleafblk; + count = mp->m_dirblkfsbs; + } else { + bno = 0; + count = 1; + } /* * Find a spot in the file space to put the new block. */ - error = xfs_bmap_first_unused(tp, dp, count, bno, w); - if (error) + if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) return error; - + if (w == XFS_DATA_FORK) + ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk); /* * Try mapping it in one filesystem block. */ nmap = 1; ASSERT(args->firstblock != NULL); - error = xfs_bmapi(tp, dp, *bno, count, + if ((error = xfs_bmapi(tp, dp, bno, count, xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA| XFS_BMAPI_CONTIG, args->firstblock, args->total, &map, &nmap, - args->flist); - if (error) + args->flist))) { return error; - + } ASSERT(nmap <= 1); if (nmap == 1) { mapp = ↦ mapi = 1; - } else if (nmap == 0 && count > 1) { - xfs_fileoff_t b; - int c; - - /* - * If we didn't get it and the block might work if fragmented, - * try without the CONTIG flag. Loop until we get it all. - */ + } + /* + * If we didn't get it and the block might work if fragmented, + * try without the CONTIG flag. Loop until we get it all. + */ + else if (nmap == 0 && count > 1) { mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); - for (b = *bno, mapi = 0; b < *bno + count; ) { + for (b = bno, mapi = 0; b < bno + count; ) { nmap = MIN(XFS_BMAP_MAX_NMAP, count); - c = (int)(*bno + count - b); - error = xfs_bmapi(tp, dp, b, c, + c = (int)(bno + count - b); + if ((error = xfs_bmapi(tp, dp, b, c, xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE| XFS_BMAPI_METADATA, args->firstblock, args->total, - &mapp[mapi], &nmap, args->flist); - if (error) - goto out_free_map; + &mapp[mapi], &nmap, args->flist))) { + kmem_free(mapp); + return error; + } if (nmap < 1) break; mapi += nmap; @@ -1609,53 +1629,24 @@ xfs_da_grow_inode_int( mapi = 0; mapp = NULL; } - /* * Count the blocks we got, make sure it matches the total. */ for (i = 0, got = 0; i < mapi; i++) got += mapp[i].br_blockcount; - if (got != count || mapp[0].br_startoff != *bno || + if (got != count || mapp[0].br_startoff != bno || mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != - *bno + count) { - error = XFS_ERROR(ENOSPC); - goto out_free_map; + bno + count) { + if (mapp != &map) + kmem_free(mapp); + return XFS_ERROR(ENOSPC); } - - /* account for newly allocated blocks in reserved blocks total */ - args->total -= dp->i_d.di_nblocks - nblks; - -out_free_map: if (mapp != &map) kmem_free(mapp); - return error; -} - -/* - * Add a block to the btree ahead of the file. - * Return the new block number to the caller. - */ -int -xfs_da_grow_inode( - struct xfs_da_args *args, - xfs_dablk_t *new_blkno) -{ - xfs_fileoff_t bno; - int count; - int error; - - if (args->whichfork == XFS_DATA_FORK) { - bno = args->dp->i_mount->m_dirleafblk; - count = args->dp->i_mount->m_dirblkfsbs; - } else { - bno = 0; - count = 1; - } - - error = xfs_da_grow_inode_int(args, &bno, count); - if (!error) - *new_blkno = (xfs_dablk_t)bno; - return error; + /* account for newly allocated blocks in reserved blocks total */ + args->total -= dp->i_d.di_nblocks - nblks; + *new_blkno = (xfs_dablk_t)bno; + return 0; } /* @@ -1713,12 +1704,12 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, /* * Get values from the moved block. */ - if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) { + if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) { dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; dead_level = 0; dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval); } else { - ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC); dead_node = (xfs_da_intnode_t *)dead_info; dead_level = be16_to_cpu(dead_node->hdr.level); dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval); @@ -1777,8 +1768,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))) goto done; par_node = par_buf->data; - if (unlikely(par_node->hdr.info.magic != - cpu_to_be16(XFS_DA_NODE_MAGIC) || + if (unlikely( + be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC || (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", XFS_ERRLEVEL_LOW, mp); @@ -1829,7 +1820,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, par_node = par_buf->data; if (unlikely( be16_to_cpu(par_node->hdr.level) != level || - par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) { + be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", XFS_ERRLEVEL_LOW, mp); error = XFS_ERROR(EFSCORRUPTED); @@ -1939,7 +1930,8 @@ xfs_da_do_buf( xfs_daddr_t *mappedbnop, xfs_dabuf_t **bpp, int whichfork, - int caller) + int caller, + inst_t *ra) { xfs_buf_t *bp = NULL; xfs_buf_t **bplist; @@ -2078,22 +2070,25 @@ xfs_da_do_buf( * Build a dabuf structure. */ if (bplist) { - rbp = xfs_da_buf_make(nbplist, bplist); + rbp = xfs_da_buf_make(nbplist, bplist, ra); } else if (bp) - rbp = xfs_da_buf_make(1, &bp); + rbp = xfs_da_buf_make(1, &bp, ra); else rbp = NULL; /* * For read_buf, check the magic number. */ if (caller == 1) { - xfs_dir2_data_hdr_t *hdr = rbp->data; - xfs_dir2_free_t *free = rbp->data; - xfs_da_blkinfo_t *info = rbp->data; + xfs_dir2_data_t *data; + xfs_dir2_free_t *free; + xfs_da_blkinfo_t *info; uint magic, magic1; + info = rbp->data; + data = rbp->data; + free = rbp->data; magic = be16_to_cpu(info->magic); - magic1 = be32_to_cpu(hdr->magic); + magic1 = be32_to_cpu(data->hdr.magic); if (unlikely( XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) && (magic != XFS_ATTR_LEAF_MAGIC) && @@ -2101,7 +2096,7 @@ xfs_da_do_buf( (magic != XFS_DIR2_LEAFN_MAGIC) && (magic1 != XFS_DIR2_BLOCK_MAGIC) && (magic1 != XFS_DIR2_DATA_MAGIC) && - (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)), + (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC), mp, XFS_ERRTAG_DA_READ_BUF, XFS_RANDOM_DA_READ_BUF))) { trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_); @@ -2148,7 +2143,8 @@ xfs_da_get_buf( xfs_dabuf_t **bpp, int whichfork) { - return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0); + return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0, + (inst_t *)__return_address); } /* @@ -2163,7 +2159,8 @@ xfs_da_read_buf( xfs_dabuf_t **bpp, int whichfork) { - return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1); + return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1, + (inst_t *)__return_address); } /* @@ -2179,7 +2176,8 @@ xfs_da_reada_buf( xfs_daddr_t rval; rval = -1; - if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3)) + if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3, + (inst_t *)__return_address)) return -1; else return rval; @@ -2237,12 +2235,17 @@ xfs_da_state_free(xfs_da_state_t *state) kmem_zone_free(xfs_da_state_zone, state); } +#ifdef XFS_DABUF_DEBUG +xfs_dabuf_t *xfs_dabuf_global_list; +static DEFINE_SPINLOCK(xfs_dabuf_global_lock); +#endif + /* * Create a dabuf. */ /* ARGSUSED */ STATIC xfs_dabuf_t * -xfs_da_buf_make(int nbuf, xfs_buf_t **bps) +xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) { xfs_buf_t *bp; xfs_dabuf_t *dabuf; @@ -2254,6 +2257,11 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps) else dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS); dabuf->dirty = 0; +#ifdef XFS_DABUF_DEBUG + dabuf->ra = ra; + dabuf->target = XFS_BUF_TARGET(bps[0]); + dabuf->blkno = XFS_BUF_ADDR(bps[0]); +#endif if (nbuf == 1) { dabuf->nbuf = 1; bp = bps[0]; @@ -2273,6 +2281,23 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps) XFS_BUF_COUNT(bp)); } } +#ifdef XFS_DABUF_DEBUG + { + xfs_dabuf_t *p; + + spin_lock(&xfs_dabuf_global_lock); + for (p = xfs_dabuf_global_list; p; p = p->next) { + ASSERT(p->blkno != dabuf->blkno || + p->target != dabuf->target); + } + dabuf->prev = NULL; + if (xfs_dabuf_global_list) + xfs_dabuf_global_list->prev = dabuf; + dabuf->next = xfs_dabuf_global_list; + xfs_dabuf_global_list = dabuf; + spin_unlock(&xfs_dabuf_global_lock); + } +#endif return dabuf; } @@ -2308,12 +2333,25 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf) ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); if (dabuf->dirty) xfs_da_buf_clean(dabuf); - if (dabuf->nbuf > 1) { + if (dabuf->nbuf > 1) kmem_free(dabuf->data); - kmem_free(dabuf); - } else { - kmem_zone_free(xfs_dabuf_zone, dabuf); +#ifdef XFS_DABUF_DEBUG + { + spin_lock(&xfs_dabuf_global_lock); + if (dabuf->prev) + dabuf->prev->next = dabuf->next; + else + xfs_dabuf_global_list = dabuf->next; + if (dabuf->next) + dabuf->next->prev = dabuf->prev; + spin_unlock(&xfs_dabuf_global_lock); } + memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf)); +#endif + if (dabuf->nbuf == 1) + kmem_zone_free(xfs_dabuf_zone, dabuf); + else + kmem_free(dabuf); } /* diff --git a/trunk/fs/xfs/xfs_da_btree.h b/trunk/fs/xfs/xfs_da_btree.h index dbf7c074ae73..fe9f5a8c1d2a 100644 --- a/trunk/fs/xfs/xfs_da_btree.h +++ b/trunk/fs/xfs/xfs_da_btree.h @@ -145,11 +145,22 @@ typedef struct xfs_dabuf { short dirty; /* data needs to be copied back */ short bbcount; /* how large is data in bbs */ void *data; /* pointer for buffers' data */ +#ifdef XFS_DABUF_DEBUG + inst_t *ra; /* return address of caller to make */ + struct xfs_dabuf *next; /* next in global chain */ + struct xfs_dabuf *prev; /* previous in global chain */ + struct xfs_buftarg *target; /* device for buffer */ + xfs_daddr_t blkno; /* daddr first in bps[0] */ +#endif struct xfs_buf *bps[1]; /* actually nbuf of these */ } xfs_dabuf_t; #define XFS_DA_BUF_SIZE(n) \ (sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1)) +#ifdef XFS_DABUF_DEBUG +extern xfs_dabuf_t *xfs_dabuf_global_list; +#endif + /* * Storage for holding state during Btree searches and split/join ops. * @@ -237,8 +248,6 @@ int xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, * Utility routines. */ int xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno); -int xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno, - int count); int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, xfs_dabuf_t **bp, int whichfork); diff --git a/trunk/fs/xfs/xfs_dir2.c b/trunk/fs/xfs/xfs_dir2.c index 4580ce00aeb4..dba7a71cedf3 100644 --- a/trunk/fs/xfs/xfs_dir2.c +++ b/trunk/fs/xfs/xfs_dir2.c @@ -24,17 +24,20 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" -#include "xfs_dir2.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2_priv.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" +#include "xfs_dir2_node.h" #include "xfs_error.h" #include "xfs_vnodeops.h" #include "xfs_trace.h" @@ -119,15 +122,15 @@ int xfs_dir_isempty( xfs_inode_t *dp) { - xfs_dir2_sf_hdr_t *sfp; + xfs_dir2_sf_t *sfp; ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); if (dp->i_d.di_size == 0) /* might happen during shutdown. */ return 1; if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp)) return 0; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - return !sfp->count; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + return !sfp->hdr.count; } /* @@ -497,34 +500,129 @@ xfs_dir_canenter( /* * Add a block to the directory. - * - * This routine is for data and free blocks, not leaf/node blocks which are - * handled by xfs_da_grow_inode. + * This routine is for data and free blocks, not leaf/node blocks + * which are handled by xfs_da_grow_inode. */ int xfs_dir2_grow_inode( - struct xfs_da_args *args, - int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ - xfs_dir2_db_t *dbp) /* out: block number added */ + xfs_da_args_t *args, + int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ + xfs_dir2_db_t *dbp) /* out: block number added */ { - struct xfs_inode *dp = args->dp; - struct xfs_mount *mp = dp->i_mount; - xfs_fileoff_t bno; /* directory offset of new block */ - int count; /* count of filesystem blocks */ - int error; + xfs_fileoff_t bno; /* directory offset of new block */ + int count; /* count of filesystem blocks */ + xfs_inode_t *dp; /* incore directory inode */ + int error; + int got; /* blocks actually mapped */ + int i; + xfs_bmbt_irec_t map; /* single structure for bmap */ + int mapi; /* mapping index */ + xfs_bmbt_irec_t *mapp; /* bmap mapping structure(s) */ + xfs_mount_t *mp; + int nmap; /* number of bmap entries */ + xfs_trans_t *tp; + xfs_drfsbno_t nblks; trace_xfs_dir2_grow_inode(args, space); + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + nblks = dp->i_d.di_nblocks; /* * Set lowest possible block in the space requested. */ bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE); count = mp->m_dirblkfsbs; - - error = xfs_da_grow_inode_int(args, &bno, count); - if (error) + /* + * Find the first hole for our block. + */ + if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK))) return error; + nmap = 1; + ASSERT(args->firstblock != NULL); + /* + * Try mapping the new block contiguously (one extent). + */ + if ((error = xfs_bmapi(tp, dp, bno, count, + XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, + args->firstblock, args->total, &map, &nmap, + args->flist))) + return error; + ASSERT(nmap <= 1); + if (nmap == 1) { + mapp = ↦ + mapi = 1; + } + /* + * Didn't work and this is a multiple-fsb directory block. + * Try again with contiguous flag turned on. + */ + else if (nmap == 0 && count > 1) { + xfs_fileoff_t b; /* current file offset */ + + /* + * Space for maximum number of mappings. + */ + mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); + /* + * Iterate until we get to the end of our block. + */ + for (b = bno, mapi = 0; b < bno + count; ) { + int c; /* current fsb count */ + + /* + * Can't map more than MAX_NMAP at once. + */ + nmap = MIN(XFS_BMAP_MAX_NMAP, count); + c = (int)(bno + count - b); + if ((error = xfs_bmapi(tp, dp, b, c, + XFS_BMAPI_WRITE|XFS_BMAPI_METADATA, + args->firstblock, args->total, + &mapp[mapi], &nmap, args->flist))) { + kmem_free(mapp); + return error; + } + if (nmap < 1) + break; + /* + * Add this bunch into our table, go to the next offset. + */ + mapi += nmap; + b = mapp[mapi - 1].br_startoff + + mapp[mapi - 1].br_blockcount; + } + } + /* + * Didn't work. + */ + else { + mapi = 0; + mapp = NULL; + } + /* + * See how many fsb's we got. + */ + for (i = 0, got = 0; i < mapi; i++) + got += mapp[i].br_blockcount; + /* + * Didn't get enough fsb's, or the first/last block's are wrong. + */ + if (got != count || mapp[0].br_startoff != bno || + mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != + bno + count) { + if (mapp != &map) + kmem_free(mapp); + return XFS_ERROR(ENOSPC); + } + /* + * Done with the temporary mapping table. + */ + if (mapp != &map) + kmem_free(mapp); + /* account for newly allocated blocks in reserved blocks total */ + args->total -= dp->i_d.di_nblocks - nblks; *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); /* @@ -536,7 +634,7 @@ xfs_dir2_grow_inode( size = XFS_FSB_TO_B(mp, bno + count); if (size > dp->i_d.di_size) { dp->i_d.di_size = size; - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); + xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); } } return 0; diff --git a/trunk/fs/xfs/xfs_dir2.h b/trunk/fs/xfs/xfs_dir2.h index e937d9991c18..74a3b1057685 100644 --- a/trunk/fs/xfs/xfs_dir2.h +++ b/trunk/fs/xfs/xfs_dir2.h @@ -16,14 +16,49 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __XFS_DIR2_H__ -#define __XFS_DIR2_H__ +#define __XFS_DIR2_H__ -struct xfs_bmap_free; +struct uio; +struct xfs_dabuf; struct xfs_da_args; +struct xfs_dir2_put_args; +struct xfs_bmap_free; struct xfs_inode; struct xfs_mount; struct xfs_trans; +/* + * Directory version 2. + * There are 4 possible formats: + * shortform + * single block - data with embedded leaf at the end + * multiple data blocks, single leaf+freeindex block + * data blocks, node&leaf blocks (btree), freeindex blocks + * + * The shortform format is in xfs_dir2_sf.h. + * The single block format is in xfs_dir2_block.h. + * The data block format is in xfs_dir2_data.h. + * The leaf and freeindex block formats are in xfs_dir2_leaf.h. + * Node blocks are the same as the other version, in xfs_da_btree.h. + */ + +/* + * Byte offset in data block and shortform entry. + */ +typedef __uint16_t xfs_dir2_data_off_t; +#define NULLDATAOFF 0xffffU +typedef uint xfs_dir2_data_aoff_t; /* argument form */ + +/* + * Directory block number (logical dirblk in file) + */ +typedef __uint32_t xfs_dir2_db_t; + +/* + * Byte offset in a directory. + */ +typedef xfs_off_t xfs_dir2_off_t; + extern struct xfs_name xfs_name_dotdot; /* @@ -51,10 +86,21 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_bmap_free *flist, xfs_extlen_t tot); extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, uint resblks); +extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); /* - * Direct call from the bmap code, bypassing the generic directory layer. + * Utility routines for v2 directories. */ -extern int xfs_dir2_sf_to_block(struct xfs_da_args *args); +extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, + xfs_dir2_db_t *dbp); +extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, + int *vp); +extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, + int *vp); +extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, + struct xfs_dabuf *bp); + +extern int xfs_dir_cilookup_result(struct xfs_da_args *args, + const unsigned char *name, int len); #endif /* __XFS_DIR2_H__ */ diff --git a/trunk/fs/xfs/xfs_dir2_block.c b/trunk/fs/xfs/xfs_dir2_block.c index 9245e029b8ea..580d99cef9e7 100644 --- a/trunk/fs/xfs/xfs_dir2_block.c +++ b/trunk/fs/xfs/xfs_dir2_block.c @@ -23,14 +23,17 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2_priv.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -64,7 +67,7 @@ xfs_dir2_block_addname( xfs_da_args_t *args) /* directory op arguments */ { xfs_dir2_data_free_t *bf; /* bestfree table in block */ - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* directory block structure */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* buffer for block */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -102,13 +105,13 @@ xfs_dir2_block_addname( return error; } ASSERT(bp != NULL); - hdr = bp->data; + block = bp->data; /* * Check the magic number, corrupted if wrong. */ - if (unlikely(hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))) { + if (unlikely(be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)) { XFS_CORRUPTION_ERROR("xfs_dir2_block_addname", - XFS_ERRLEVEL_LOW, mp, hdr); + XFS_ERRLEVEL_LOW, mp, block); xfs_da_brelse(tp, bp); return XFS_ERROR(EFSCORRUPTED); } @@ -116,8 +119,8 @@ xfs_dir2_block_addname( /* * Set up pointers to parts of the block. */ - bf = hdr->bestfree; - btp = xfs_dir2_block_tail_p(mp, hdr); + bf = block->hdr.bestfree; + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); /* * No stale entries? Need space for entry and new leaf. @@ -130,7 +133,7 @@ xfs_dir2_block_addname( /* * Data object just before the first leaf entry. */ - enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); + enddup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); /* * If it's not free then can't do this add without cleaning up: * the space before the first leaf entry needs to be free so it @@ -143,7 +146,7 @@ xfs_dir2_block_addname( */ else { dup = (xfs_dir2_data_unused_t *) - ((char *)hdr + be16_to_cpu(bf[0].offset)); + ((char *)block + be16_to_cpu(bf[0].offset)); if (dup == enddup) { /* * It is the biggest freespace, is it too small @@ -156,7 +159,7 @@ xfs_dir2_block_addname( */ if (be16_to_cpu(bf[1].length) >= len) dup = (xfs_dir2_data_unused_t *) - ((char *)hdr + + ((char *)block + be16_to_cpu(bf[1].offset)); else dup = NULL; @@ -179,7 +182,7 @@ xfs_dir2_block_addname( */ else if (be16_to_cpu(bf[0].length) >= len) { dup = (xfs_dir2_data_unused_t *) - ((char *)hdr + be16_to_cpu(bf[0].offset)); + ((char *)block + be16_to_cpu(bf[0].offset)); compact = 0; } /* @@ -193,7 +196,7 @@ xfs_dir2_block_addname( /* * Data object just before the first leaf entry. */ - dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); + dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); /* * If it's not free then the data will go where the * leaf data starts now, if it works at all. @@ -252,8 +255,7 @@ xfs_dir2_block_addname( highstale = lfloghigh = -1; fromidx >= 0; fromidx--) { - if (blp[fromidx].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { + if (be32_to_cpu(blp[fromidx].address) == XFS_DIR2_NULL_DATAPTR) { if (highstale == -1) highstale = toidx; else { @@ -270,7 +272,7 @@ xfs_dir2_block_addname( lfloghigh -= be32_to_cpu(btp->stale) - 1; be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); xfs_dir2_data_make_free(tp, bp, - (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), + (xfs_dir2_data_aoff_t)((char *)blp - (char *)block), (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), &needlog, &needscan); blp += be32_to_cpu(btp->stale) - 1; @@ -280,7 +282,7 @@ xfs_dir2_block_addname( * This needs to happen before the next call to use_free. */ if (needscan) { - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); needscan = 0; } } @@ -316,7 +318,7 @@ xfs_dir2_block_addname( */ xfs_dir2_data_use_free(tp, bp, enddup, (xfs_dir2_data_aoff_t) - ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) - + ((char *)enddup - (char *)block + be16_to_cpu(enddup->length) - sizeof(*blp)), (xfs_dir2_data_aoff_t)sizeof(*blp), &needlog, &needscan); @@ -329,7 +331,8 @@ xfs_dir2_block_addname( * This needs to happen before the next call to use_free. */ if (needscan) { - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, + &needlog); needscan = 0; } /* @@ -350,14 +353,12 @@ xfs_dir2_block_addname( else { for (lowstale = mid; lowstale >= 0 && - blp[lowstale].address != - cpu_to_be32(XFS_DIR2_NULL_DATAPTR); + be32_to_cpu(blp[lowstale].address) != XFS_DIR2_NULL_DATAPTR; lowstale--) continue; for (highstale = mid + 1; highstale < be32_to_cpu(btp->count) && - blp[highstale].address != - cpu_to_be32(XFS_DIR2_NULL_DATAPTR) && + be32_to_cpu(blp[highstale].address) != XFS_DIR2_NULL_DATAPTR && (lowstale < 0 || mid - lowstale > highstale - mid); highstale++) continue; @@ -396,13 +397,13 @@ xfs_dir2_block_addname( */ blp[mid].hashval = cpu_to_be32(args->hashval); blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)hdr)); + (char *)dep - (char *)block)); xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); /* * Mark space for the data entry used. */ xfs_dir2_data_use_free(tp, bp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), + (xfs_dir2_data_aoff_t)((char *)dup - (char *)block), (xfs_dir2_data_aoff_t)len, &needlog, &needscan); /* * Create the new data entry. @@ -411,12 +412,12 @@ xfs_dir2_block_addname( dep->namelen = args->namelen; memcpy(dep->name, args->name, args->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); + *tagp = cpu_to_be16((char *)dep - (char *)block); /* * Clean up the bestfree array and log the header, tail, and entry. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); if (needlog) xfs_dir2_data_log_header(tp, bp); xfs_dir2_block_log_tail(tp, bp); @@ -436,7 +437,7 @@ xfs_dir2_block_getdents( xfs_off_t *offset, filldir_t filldir) { - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* directory block structure */ xfs_dabuf_t *bp; /* buffer for block */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ @@ -469,13 +470,13 @@ xfs_dir2_block_getdents( * We'll skip entries before this. */ wantoff = xfs_dir2_dataptr_to_off(mp, *offset); - hdr = bp->data; + block = bp->data; xfs_dir2_data_check(dp, bp); /* * Set up values for the loop. */ - btp = xfs_dir2_block_tail_p(mp, hdr); - ptr = (char *)(hdr + 1); + btp = xfs_dir2_block_tail_p(mp, block); + ptr = (char *)block->u; endptr = (char *)xfs_dir2_block_leaf_p(btp); /* @@ -501,11 +502,11 @@ xfs_dir2_block_getdents( /* * The entry is before the desired starting point, skip it. */ - if ((char *)dep - (char *)hdr < wantoff) + if ((char *)dep - (char *)block < wantoff) continue; cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, - (char *)dep - (char *)hdr); + (char *)dep - (char *)block); /* * If it didn't fit, set the final offset to here & return. @@ -539,14 +540,17 @@ xfs_dir2_block_log_leaf( int first, /* index of first logged leaf */ int last) /* index of last logged leaf */ { - xfs_dir2_data_hdr_t *hdr = bp->data; - xfs_dir2_leaf_entry_t *blp; - xfs_dir2_block_tail_t *btp; + xfs_dir2_block_t *block; /* directory block structure */ + xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_mount_t *mp; /* filesystem mount point */ - btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); + mp = tp->t_mountp; + block = bp->data; + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); - xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr), - (uint)((char *)&blp[last + 1] - (char *)hdr - 1)); + xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block), + (uint)((char *)&blp[last + 1] - (char *)block - 1)); } /* @@ -557,12 +561,15 @@ xfs_dir2_block_log_tail( xfs_trans_t *tp, /* transaction structure */ xfs_dabuf_t *bp) /* block buffer */ { - xfs_dir2_data_hdr_t *hdr = bp->data; - xfs_dir2_block_tail_t *btp; + xfs_dir2_block_t *block; /* directory block structure */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_mount_t *mp; /* filesystem mount point */ - btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); - xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr), - (uint)((char *)(btp + 1) - (char *)hdr - 1)); + mp = tp->t_mountp; + block = bp->data; + btp = xfs_dir2_block_tail_p(mp, block); + xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block), + (uint)((char *)(btp + 1) - (char *)block - 1)); } /* @@ -573,7 +580,7 @@ int /* error */ xfs_dir2_block_lookup( xfs_da_args_t *args) /* dir lookup arguments */ { - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -593,14 +600,14 @@ xfs_dir2_block_lookup( return error; dp = args->dp; mp = dp->i_mount; - hdr = bp->data; + block = bp->data; xfs_dir2_data_check(dp, bp); - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); /* * Get the offset from the leaf entry, to point to the data. */ - dep = (xfs_dir2_data_entry_t *)((char *)hdr + + dep = (xfs_dir2_data_entry_t *)((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); /* * Fill in inode number, CI name if appropriate, release the block. @@ -621,7 +628,7 @@ xfs_dir2_block_lookup_int( int *entno) /* returned entry number */ { xfs_dir2_dataptr_t addr; /* data entry address */ - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -647,9 +654,9 @@ xfs_dir2_block_lookup_int( return error; } ASSERT(bp != NULL); - hdr = bp->data; + block = bp->data; xfs_dir2_data_check(dp, bp); - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); /* * Loop doing a binary search for our hash value. @@ -687,7 +694,7 @@ xfs_dir2_block_lookup_int( * Get pointer to the entry from the leaf. */ dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr)); + ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); /* * Compare name and if it's an exact match, return the index * and buffer. If it's the first case-insensitive match, store @@ -726,7 +733,7 @@ int /* error */ xfs_dir2_block_removename( xfs_da_args_t *args) /* directory operation args */ { - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_leaf_entry_t *blp; /* block leaf pointer */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -753,20 +760,20 @@ xfs_dir2_block_removename( dp = args->dp; tp = args->trans; mp = dp->i_mount; - hdr = bp->data; - btp = xfs_dir2_block_tail_p(mp, hdr); + block = bp->data; + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); /* * Point to the data entry using the leaf entry. */ dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); + ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); /* * Mark the data entry's space free. */ needlog = needscan = 0; xfs_dir2_data_make_free(tp, bp, - (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), + (xfs_dir2_data_aoff_t)((char *)dep - (char *)block), xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); /* * Fix up the block tail. @@ -782,15 +789,15 @@ xfs_dir2_block_removename( * Fix up bestfree, log the header if necessary. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); if (needlog) xfs_dir2_data_log_header(tp, bp); xfs_dir2_data_check(dp, bp); /* * See if the size as a shortform is good enough. */ - size = xfs_dir2_block_sfsize(dp, hdr, &sfh); - if (size > XFS_IFORK_DSIZE(dp)) { + if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) > + XFS_IFORK_DSIZE(dp)) { xfs_da_buf_done(bp); return 0; } @@ -808,7 +815,7 @@ int /* error */ xfs_dir2_block_replace( xfs_da_args_t *args) /* directory operation args */ { - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -829,14 +836,14 @@ xfs_dir2_block_replace( } dp = args->dp; mp = dp->i_mount; - hdr = bp->data; - btp = xfs_dir2_block_tail_p(mp, hdr); + block = bp->data; + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); /* * Point to the data entry we need to change. */ dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); + ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); ASSERT(be64_to_cpu(dep->inumber) != args->inumber); /* * Change the inode number to the new value. @@ -875,7 +882,7 @@ xfs_dir2_leaf_to_block( xfs_dabuf_t *dbp) /* data buffer */ { __be16 *bestsp; /* leaf bests table */ - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* unused data entry */ @@ -899,7 +906,7 @@ xfs_dir2_leaf_to_block( tp = args->trans; mp = dp->i_mount; leaf = lbp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); ltp = xfs_dir2_leaf_tail_p(mp, leaf); /* * If there are data blocks other than the first one, take this @@ -910,7 +917,7 @@ xfs_dir2_leaf_to_block( while (dp->i_d.di_size > mp->m_dirblksize) { bestsp = xfs_dir2_leaf_bests_p(ltp); if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == - mp->m_dirblksize - (uint)sizeof(*hdr)) { + mp->m_dirblksize - (uint)sizeof(block->hdr)) { if ((error = xfs_dir2_leaf_trim_data(args, lbp, (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1)))) @@ -928,18 +935,18 @@ xfs_dir2_leaf_to_block( XFS_DATA_FORK))) { goto out; } - hdr = dbp->data; - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); + block = dbp->data; + ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_DATA_MAGIC); /* * Size of the "leaf" area in the block. */ - size = (uint)sizeof(xfs_dir2_block_tail_t) + + size = (uint)sizeof(block->tail) + (uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); /* * Look at the last data entry. */ - tagp = (__be16 *)((char *)hdr + mp->m_dirblksize) - 1; - dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); + tagp = (__be16 *)((char *)block + mp->m_dirblksize) - 1; + dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); /* * If it's not free or is too short we can't do it. */ @@ -951,7 +958,7 @@ xfs_dir2_leaf_to_block( /* * Start converting it to block form. */ - hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); + block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); needlog = 1; needscan = 0; /* @@ -962,7 +969,7 @@ xfs_dir2_leaf_to_block( /* * Initialize the block tail. */ - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(mp, block); btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); btp->stale = 0; xfs_dir2_block_log_tail(tp, dbp); @@ -971,8 +978,7 @@ xfs_dir2_leaf_to_block( */ lep = xfs_dir2_block_leaf_p(btp); for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) { - if (leaf->ents[from].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) continue; lep[to++] = leaf->ents[from]; } @@ -982,7 +988,7 @@ xfs_dir2_leaf_to_block( * Scan the bestfree if we need it and log the data block header. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); if (needlog) xfs_dir2_data_log_header(tp, dbp); /* @@ -996,8 +1002,8 @@ xfs_dir2_leaf_to_block( /* * Now see if the resulting block can be shrunken to shortform. */ - size = xfs_dir2_block_sfsize(dp, hdr, &sfh); - if (size > XFS_IFORK_DSIZE(dp)) { + if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) > + XFS_IFORK_DSIZE(dp)) { error = 0; goto out; } @@ -1018,10 +1024,12 @@ xfs_dir2_sf_to_block( xfs_da_args_t *args) /* operation arguments */ { xfs_dir2_db_t blkno; /* dir-relative block # (0) */ - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail pointer */ + char *buf; /* sf buffer */ + int buf_len; xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ int dummy; /* trash */ @@ -1035,8 +1043,7 @@ xfs_dir2_sf_to_block( int newoffset; /* offset from current entry */ int offset; /* target block offset */ xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */ - xfs_dir2_sf_hdr_t *oldsfp; /* old shortform header */ - xfs_dir2_sf_hdr_t *sfp; /* shortform header */ + xfs_dir2_sf_t *sfp; /* shortform structure */ __be16 *tagp; /* end of data entry */ xfs_trans_t *tp; /* transaction pointer */ struct xfs_name name; @@ -1054,30 +1061,32 @@ xfs_dir2_sf_to_block( ASSERT(XFS_FORCED_SHUTDOWN(mp)); return XFS_ERROR(EIO); } - - oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count)); - + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* - * Copy the directory into a temporary buffer. + * Copy the directory into the stack buffer. * Then pitch the incore inode data so we can make extents. */ - sfp = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP); - memcpy(sfp, oldsfp, dp->i_df.if_bytes); - xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK); + buf_len = dp->i_df.if_bytes; + buf = kmem_alloc(buf_len, KM_SLEEP); + + memcpy(buf, sfp, buf_len); + xfs_idata_realloc(dp, -buf_len, XFS_DATA_FORK); dp->i_d.di_size = 0; xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - + /* + * Reset pointer - old sfp is gone. + */ + sfp = (xfs_dir2_sf_t *)buf; /* * Add block 0 to the inode. */ error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); if (error) { - kmem_free(sfp); + kmem_free(buf); return error; } /* @@ -1085,21 +1094,21 @@ xfs_dir2_sf_to_block( */ error = xfs_dir2_data_init(args, blkno, &bp); if (error) { - kmem_free(sfp); + kmem_free(buf); return error; } - hdr = bp->data; - hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); + block = bp->data; + block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); /* * Compute size of block "tail" area. */ i = (uint)sizeof(*btp) + - (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t); + (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t); /* * The whole thing is initialized to free by the init routine. * Say we're using the leaf and tail area. */ - dup = (xfs_dir2_data_unused_t *)(hdr + 1); + dup = (xfs_dir2_data_unused_t *)block->u; needlog = needscan = 0; xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog, &needscan); @@ -1107,51 +1116,50 @@ xfs_dir2_sf_to_block( /* * Fill in the tail. */ - btp = xfs_dir2_block_tail_p(mp, hdr); - btp->count = cpu_to_be32(sfp->count + 2); /* ., .. */ + btp = xfs_dir2_block_tail_p(mp, block); + btp->count = cpu_to_be32(sfp->hdr.count + 2); /* ., .. */ btp->stale = 0; blp = xfs_dir2_block_leaf_p(btp); - endoffset = (uint)((char *)blp - (char *)hdr); + endoffset = (uint)((char *)blp - (char *)block); /* * Remove the freespace, we'll manage it. */ xfs_dir2_data_use_free(tp, bp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), + (xfs_dir2_data_aoff_t)((char *)dup - (char *)block), be16_to_cpu(dup->length), &needlog, &needscan); /* * Create entry for . */ dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + XFS_DIR2_DATA_DOT_OFFSET); + ((char *)block + XFS_DIR2_DATA_DOT_OFFSET); dep->inumber = cpu_to_be64(dp->i_ino); dep->namelen = 1; dep->name[0] = '.'; tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); + *tagp = cpu_to_be16((char *)dep - (char *)block); xfs_dir2_data_log_entry(tp, bp, dep); blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)hdr)); + (char *)dep - (char *)block)); /* * Create entry for .. */ dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + XFS_DIR2_DATA_DOTDOT_OFFSET); - dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp)); + ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET); + dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent)); dep->namelen = 2; dep->name[0] = dep->name[1] = '.'; tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); + *tagp = cpu_to_be16((char *)dep - (char *)block); xfs_dir2_data_log_entry(tp, bp, dep); blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)hdr)); + (char *)dep - (char *)block)); offset = XFS_DIR2_DATA_FIRST_OFFSET; /* * Loop over existing entries, stuff them in. */ - i = 0; - if (!sfp->count) + if ((i = 0) == sfp->hdr.count) sfep = NULL; else sfep = xfs_dir2_sf_firstentry(sfp); @@ -1171,40 +1179,43 @@ xfs_dir2_sf_to_block( * There should be a hole here, make one. */ if (offset < newoffset) { - dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); + dup = (xfs_dir2_data_unused_t *) + ((char *)block + offset); dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); dup->length = cpu_to_be16(newoffset - offset); *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( - ((char *)dup - (char *)hdr)); + ((char *)dup - (char *)block)); xfs_dir2_data_log_unused(tp, bp, dup); - xfs_dir2_data_freeinsert(hdr, dup, &dummy); + (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block, + dup, &dummy); offset += be16_to_cpu(dup->length); continue; } /* * Copy a real entry. */ - dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset); - dep->inumber = cpu_to_be64(xfs_dir2_sfe_get_ino(sfp, sfep)); + dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset); + dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, + xfs_dir2_sf_inumberp(sfep))); dep->namelen = sfep->namelen; memcpy(dep->name, sfep->name, dep->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); + *tagp = cpu_to_be16((char *)dep - (char *)block); xfs_dir2_data_log_entry(tp, bp, dep); name.name = sfep->name; name.len = sfep->namelen; blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> hashname(&name)); blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)hdr)); - offset = (int)((char *)(tagp + 1) - (char *)hdr); - if (++i == sfp->count) + (char *)dep - (char *)block)); + offset = (int)((char *)(tagp + 1) - (char *)block); + if (++i == sfp->hdr.count) sfep = NULL; else sfep = xfs_dir2_sf_nextentry(sfp, sfep); } /* Done with the temporary buffer */ - kmem_free(sfp); + kmem_free(buf); /* * Sort the leaf entries by hash value. */ diff --git a/trunk/fs/xfs/xfs_dir2_block.h b/trunk/fs/xfs/xfs_dir2_block.h new file mode 100644 index 000000000000..10e689676382 --- /dev/null +++ b/trunk/fs/xfs/xfs_dir2_block.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DIR2_BLOCK_H__ +#define __XFS_DIR2_BLOCK_H__ + +/* + * xfs_dir2_block.h + * Directory version 2, single block format structures + */ + +struct uio; +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_dir2_data_hdr; +struct xfs_dir2_leaf_entry; +struct xfs_inode; +struct xfs_mount; +struct xfs_trans; + +/* + * The single block format is as follows: + * xfs_dir2_data_hdr_t structure + * xfs_dir2_data_entry_t and xfs_dir2_data_unused_t structures + * xfs_dir2_leaf_entry_t structures + * xfs_dir2_block_tail_t structure + */ + +#define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: for one block dirs */ + +typedef struct xfs_dir2_block_tail { + __be32 count; /* count of leaf entries */ + __be32 stale; /* count of stale lf entries */ +} xfs_dir2_block_tail_t; + +/* + * Generic single-block structure, for xfs_db. + */ +typedef struct xfs_dir2_block { + xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_BLOCK_MAGIC */ + xfs_dir2_data_union_t u[1]; + xfs_dir2_leaf_entry_t leaf[1]; + xfs_dir2_block_tail_t tail; +} xfs_dir2_block_t; + +/* + * Pointer to the leaf header embedded in a data block (1-block format) + */ +static inline xfs_dir2_block_tail_t * +xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block) +{ + return (((xfs_dir2_block_tail_t *) + ((char *)(block) + (mp)->m_dirblksize)) - 1); +} + +/* + * Pointer to the leaf entries embedded in a data block (1-block format) + */ +static inline struct xfs_dir2_leaf_entry * +xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp) +{ + return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count); +} + +/* + * Function declarations. + */ +extern int xfs_dir2_block_addname(struct xfs_da_args *args); +extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent, + xfs_off_t *offset, filldir_t filldir); +extern int xfs_dir2_block_lookup(struct xfs_da_args *args); +extern int xfs_dir2_block_removename(struct xfs_da_args *args); +extern int xfs_dir2_block_replace(struct xfs_da_args *args); +extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, + struct xfs_dabuf *lbp, struct xfs_dabuf *dbp); +extern int xfs_dir2_sf_to_block(struct xfs_da_args *args); + +#endif /* __XFS_DIR2_BLOCK_H__ */ diff --git a/trunk/fs/xfs/xfs_dir2_data.c b/trunk/fs/xfs/xfs_dir2_data.c index 5bbe2a8a023f..921595b84f5b 100644 --- a/trunk/fs/xfs/xfs_dir2_data.c +++ b/trunk/fs/xfs/xfs_dir2_data.c @@ -23,18 +23,18 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2_priv.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" #include "xfs_error.h" -STATIC xfs_dir2_data_free_t * -xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup); - #ifdef DEBUG /* * Check the consistency of the data block. @@ -50,7 +50,7 @@ xfs_dir2_data_check( xfs_dir2_data_free_t *bf; /* bestfree table */ xfs_dir2_block_tail_t *btp=NULL; /* block tail */ int count; /* count of entries found */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *d; /* data block pointer */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_dir2_data_free_t *dfp; /* bestfree entry */ xfs_dir2_data_unused_t *dup; /* unused entry */ @@ -66,19 +66,17 @@ xfs_dir2_data_check( struct xfs_name name; mp = dp->i_mount; - hdr = bp->data; - bf = hdr->bestfree; - p = (char *)(hdr + 1); - - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { - btp = xfs_dir2_block_tail_p(mp, hdr); + d = bp->data; + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + bf = d->hdr.bestfree; + p = (char *)d->u; + if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { + btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); lep = xfs_dir2_block_leaf_p(btp); endp = (char *)lep; - } else { - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); - endp = (char *)hdr + mp->m_dirblksize; - } - + } else + endp = (char *)d + mp->m_dirblksize; count = lastfree = freeseen = 0; /* * Account for zero bestfree entries. @@ -110,8 +108,8 @@ xfs_dir2_data_check( if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { ASSERT(lastfree == 0); ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == - (char *)dup - (char *)hdr); - dfp = xfs_dir2_data_freefind(hdr, dup); + (char *)dup - (char *)d); + dfp = xfs_dir2_data_freefind(d, dup); if (dfp) { i = (int)(dfp - bf); ASSERT((freeseen & (1 << i)) == 0); @@ -134,13 +132,13 @@ xfs_dir2_data_check( ASSERT(dep->namelen != 0); ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0); ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) == - (char *)dep - (char *)hdr); + (char *)dep - (char *)d); count++; lastfree = 0; - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { + if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, (xfs_dir2_data_aoff_t) - ((char *)dep - (char *)hdr)); + ((char *)dep - (char *)d)); name.name = dep->name; name.len = dep->namelen; hash = mp->m_dirnameops->hashname(&name); @@ -157,10 +155,9 @@ xfs_dir2_data_check( * Need to have seen all the entries and all the bestfree slots. */ ASSERT(freeseen == 7); - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { + if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { - if (lep[i].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (be32_to_cpu(lep[i].address) == XFS_DIR2_NULL_DATAPTR) stale++; if (i > 0) ASSERT(be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval)); @@ -175,9 +172,9 @@ xfs_dir2_data_check( * Given a data block and an unused entry from that block, * return the bestfree entry if any that corresponds to it. */ -STATIC xfs_dir2_data_free_t * +xfs_dir2_data_free_t * xfs_dir2_data_freefind( - xfs_dir2_data_hdr_t *hdr, /* data block */ + xfs_dir2_data_t *d, /* data block */ xfs_dir2_data_unused_t *dup) /* data unused entry */ { xfs_dir2_data_free_t *dfp; /* bestfree entry */ @@ -187,17 +184,17 @@ xfs_dir2_data_freefind( int seenzero; /* saw a 0 bestfree entry */ #endif - off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); + off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)d); #if defined(DEBUG) && defined(__KERNEL__) /* * Validate some consistency in the bestfree table. * Check order, non-overlapping entries, and if we find the * one we're looking for it has to be exact. */ - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); - for (dfp = &hdr->bestfree[0], seenzero = matched = 0; - dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT]; + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + for (dfp = &d->hdr.bestfree[0], seenzero = matched = 0; + dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT]; dfp++) { if (!dfp->offset) { ASSERT(!dfp->length); @@ -213,7 +210,7 @@ xfs_dir2_data_freefind( else ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off); ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length)); - if (dfp > &hdr->bestfree[0]) + if (dfp > &d->hdr.bestfree[0]) ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length)); } #endif @@ -222,13 +219,13 @@ xfs_dir2_data_freefind( * it can't be there since they're sorted. */ if (be16_to_cpu(dup->length) < - be16_to_cpu(hdr->bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length)) + be16_to_cpu(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length)) return NULL; /* * Look at the three bestfree entries for our guy. */ - for (dfp = &hdr->bestfree[0]; - dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT]; + for (dfp = &d->hdr.bestfree[0]; + dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT]; dfp++) { if (!dfp->offset) return NULL; @@ -246,7 +243,7 @@ xfs_dir2_data_freefind( */ xfs_dir2_data_free_t * /* entry inserted */ xfs_dir2_data_freeinsert( - xfs_dir2_data_hdr_t *hdr, /* data block pointer */ + xfs_dir2_data_t *d, /* data block pointer */ xfs_dir2_data_unused_t *dup, /* unused space */ int *loghead) /* log the data header (out) */ { @@ -254,13 +251,12 @@ xfs_dir2_data_freeinsert( xfs_dir2_data_free_t new; /* new bestfree entry */ #ifdef __KERNEL__ - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); #endif - dfp = hdr->bestfree; + dfp = d->hdr.bestfree; new.length = dup->length; - new.offset = cpu_to_be16((char *)dup - (char *)hdr); - + new.offset = cpu_to_be16((char *)dup - (char *)d); /* * Insert at position 0, 1, or 2; or not at all. */ @@ -290,36 +286,36 @@ xfs_dir2_data_freeinsert( */ STATIC void xfs_dir2_data_freeremove( - xfs_dir2_data_hdr_t *hdr, /* data block header */ + xfs_dir2_data_t *d, /* data block pointer */ xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */ int *loghead) /* out: log data header */ { #ifdef __KERNEL__ - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); #endif /* * It's the first entry, slide the next 2 up. */ - if (dfp == &hdr->bestfree[0]) { - hdr->bestfree[0] = hdr->bestfree[1]; - hdr->bestfree[1] = hdr->bestfree[2]; + if (dfp == &d->hdr.bestfree[0]) { + d->hdr.bestfree[0] = d->hdr.bestfree[1]; + d->hdr.bestfree[1] = d->hdr.bestfree[2]; } /* * It's the second entry, slide the 3rd entry up. */ - else if (dfp == &hdr->bestfree[1]) - hdr->bestfree[1] = hdr->bestfree[2]; + else if (dfp == &d->hdr.bestfree[1]) + d->hdr.bestfree[1] = d->hdr.bestfree[2]; /* * Must be the last entry. */ else - ASSERT(dfp == &hdr->bestfree[2]); + ASSERT(dfp == &d->hdr.bestfree[2]); /* * Clear the 3rd entry, must be zero now. */ - hdr->bestfree[2].length = 0; - hdr->bestfree[2].offset = 0; + d->hdr.bestfree[2].length = 0; + d->hdr.bestfree[2].offset = 0; *loghead = 1; } @@ -329,7 +325,7 @@ xfs_dir2_data_freeremove( void xfs_dir2_data_freescan( xfs_mount_t *mp, /* filesystem mount point */ - xfs_dir2_data_hdr_t *hdr, /* data block header */ + xfs_dir2_data_t *d, /* data block pointer */ int *loghead) /* out: log data header */ { xfs_dir2_block_tail_t *btp; /* block tail */ @@ -339,23 +335,23 @@ xfs_dir2_data_freescan( char *p; /* current entry pointer */ #ifdef __KERNEL__ - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); #endif /* * Start by clearing the table. */ - memset(hdr->bestfree, 0, sizeof(hdr->bestfree)); + memset(d->hdr.bestfree, 0, sizeof(d->hdr.bestfree)); *loghead = 1; /* * Set up pointers. */ - p = (char *)(hdr + 1); - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { - btp = xfs_dir2_block_tail_p(mp, hdr); + p = (char *)d->u; + if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { + btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); endp = (char *)xfs_dir2_block_leaf_p(btp); } else - endp = (char *)hdr + mp->m_dirblksize; + endp = (char *)d + mp->m_dirblksize; /* * Loop over the block's entries. */ @@ -365,9 +361,9 @@ xfs_dir2_data_freescan( * If it's a free entry, insert it. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - ASSERT((char *)dup - (char *)hdr == + ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); - xfs_dir2_data_freeinsert(hdr, dup, loghead); + xfs_dir2_data_freeinsert(d, dup, loghead); p += be16_to_cpu(dup->length); } /* @@ -375,7 +371,7 @@ xfs_dir2_data_freescan( */ else { dep = (xfs_dir2_data_entry_t *)p; - ASSERT((char *)dep - (char *)hdr == + ASSERT((char *)dep - (char *)d == be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep))); p += xfs_dir2_data_entsize(dep->namelen); } @@ -393,7 +389,7 @@ xfs_dir2_data_init( xfs_dabuf_t **bpp) /* output block buffer */ { xfs_dabuf_t *bp; /* block buffer */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *d; /* pointer to block */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* unused entry pointer */ int error; /* error return value */ @@ -414,28 +410,26 @@ xfs_dir2_data_init( return error; } ASSERT(bp != NULL); - /* * Initialize the header. */ - hdr = bp->data; - hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); - hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr)); + d = bp->data; + d->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); + d->hdr.bestfree[0].offset = cpu_to_be16(sizeof(d->hdr)); for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { - hdr->bestfree[i].length = 0; - hdr->bestfree[i].offset = 0; + d->hdr.bestfree[i].length = 0; + d->hdr.bestfree[i].offset = 0; } - /* * Set up an unused entry for the block's body. */ - dup = (xfs_dir2_data_unused_t *)(hdr + 1); + dup = &d->u[0].unused; dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); - t = mp->m_dirblksize - (uint)sizeof(*hdr); - hdr->bestfree[0].length = cpu_to_be16(t); + t=mp->m_dirblksize - (uint)sizeof(d->hdr); + d->hdr.bestfree[0].length = cpu_to_be16(t); dup->length = cpu_to_be16(t); - *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); + *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d); /* * Log it and return it. */ @@ -454,14 +448,14 @@ xfs_dir2_data_log_entry( xfs_dabuf_t *bp, /* block buffer */ xfs_dir2_data_entry_t *dep) /* data entry pointer */ { - xfs_dir2_data_hdr_t *hdr = bp->data; - - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + xfs_dir2_data_t *d; /* data block pointer */ - xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr), + d = bp->data; + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d), (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) - - (char *)hdr - 1)); + (char *)d - 1)); } /* @@ -472,12 +466,13 @@ xfs_dir2_data_log_header( xfs_trans_t *tp, /* transaction pointer */ xfs_dabuf_t *bp) /* block buffer */ { - xfs_dir2_data_hdr_t *hdr = bp->data; + xfs_dir2_data_t *d; /* data block pointer */ - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); - - xfs_da_log_buf(tp, bp, 0, sizeof(*hdr) - 1); + d = bp->data; + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + xfs_da_log_buf(tp, bp, (uint)((char *)&d->hdr - (char *)d), + (uint)(sizeof(d->hdr) - 1)); } /* @@ -489,23 +484,23 @@ xfs_dir2_data_log_unused( xfs_dabuf_t *bp, /* block buffer */ xfs_dir2_data_unused_t *dup) /* data unused pointer */ { - xfs_dir2_data_hdr_t *hdr = bp->data; - - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + xfs_dir2_data_t *d; /* data block pointer */ + d = bp->data; + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); /* * Log the first part of the unused entry. */ - xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr), + xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)d), (uint)((char *)&dup->length + sizeof(dup->length) - - 1 - (char *)hdr)); + 1 - (char *)d)); /* * Log the end (tag) of the unused entry. */ xfs_da_log_buf(tp, bp, - (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr), - (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr + + (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d), + (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d + sizeof(xfs_dir2_data_off_t) - 1)); } @@ -522,7 +517,7 @@ xfs_dir2_data_make_free( int *needlogp, /* out: log header */ int *needscanp) /* out: regen bestfree */ { - xfs_dir2_data_hdr_t *hdr; /* data block pointer */ + xfs_dir2_data_t *d; /* data block pointer */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ char *endptr; /* end of data area */ xfs_mount_t *mp; /* filesystem mount point */ @@ -532,29 +527,28 @@ xfs_dir2_data_make_free( xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ mp = tp->t_mountp; - hdr = bp->data; - + d = bp->data; /* * Figure out where the end of the data area is. */ - if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)) - endptr = (char *)hdr + mp->m_dirblksize; + if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC) + endptr = (char *)d + mp->m_dirblksize; else { xfs_dir2_block_tail_t *btp; /* block tail */ - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); - btp = xfs_dir2_block_tail_p(mp, hdr); + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); endptr = (char *)xfs_dir2_block_leaf_p(btp); } /* * If this isn't the start of the block, then back up to * the previous entry and see if it's free. */ - if (offset > sizeof(*hdr)) { + if (offset > sizeof(d->hdr)) { __be16 *tagp; /* tag just before us */ - tagp = (__be16 *)((char *)hdr + offset) - 1; - prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); + tagp = (__be16 *)((char *)d + offset) - 1; + prevdup = (xfs_dir2_data_unused_t *)((char *)d + be16_to_cpu(*tagp)); if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG) prevdup = NULL; } else @@ -563,9 +557,9 @@ xfs_dir2_data_make_free( * If this isn't the end of the block, see if the entry after * us is free. */ - if ((char *)hdr + offset + len < endptr) { + if ((char *)d + offset + len < endptr) { postdup = - (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); + (xfs_dir2_data_unused_t *)((char *)d + offset + len); if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG) postdup = NULL; } else @@ -582,21 +576,21 @@ xfs_dir2_data_make_free( /* * See if prevdup and/or postdup are in bestfree table. */ - dfp = xfs_dir2_data_freefind(hdr, prevdup); - dfp2 = xfs_dir2_data_freefind(hdr, postdup); + dfp = xfs_dir2_data_freefind(d, prevdup); + dfp2 = xfs_dir2_data_freefind(d, postdup); /* * We need a rescan unless there are exactly 2 free entries * namely our two. Then we know what's happening, otherwise * since the third bestfree is there, there might be more * entries. */ - needscan = (hdr->bestfree[2].length != 0); + needscan = (d->hdr.bestfree[2].length != 0); /* * Fix up the new big freespace. */ be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(prevdup) = - cpu_to_be16((char *)prevdup - (char *)hdr); + cpu_to_be16((char *)prevdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, prevdup); if (!needscan) { /* @@ -606,18 +600,18 @@ xfs_dir2_data_make_free( * Remove entry 1 first then entry 0. */ ASSERT(dfp && dfp2); - if (dfp == &hdr->bestfree[1]) { - dfp = &hdr->bestfree[0]; + if (dfp == &d->hdr.bestfree[1]) { + dfp = &d->hdr.bestfree[0]; ASSERT(dfp2 == dfp); - dfp2 = &hdr->bestfree[1]; + dfp2 = &d->hdr.bestfree[1]; } - xfs_dir2_data_freeremove(hdr, dfp2, needlogp); - xfs_dir2_data_freeremove(hdr, dfp, needlogp); + xfs_dir2_data_freeremove(d, dfp2, needlogp); + xfs_dir2_data_freeremove(d, dfp, needlogp); /* * Now insert the new entry. */ - dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); - ASSERT(dfp == &hdr->bestfree[0]); + dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp); + ASSERT(dfp == &d->hdr.bestfree[0]); ASSERT(dfp->length == prevdup->length); ASSERT(!dfp[1].length); ASSERT(!dfp[2].length); @@ -627,10 +621,10 @@ xfs_dir2_data_make_free( * The entry before us is free, merge with it. */ else if (prevdup) { - dfp = xfs_dir2_data_freefind(hdr, prevdup); + dfp = xfs_dir2_data_freefind(d, prevdup); be16_add_cpu(&prevdup->length, len); *xfs_dir2_data_unused_tag_p(prevdup) = - cpu_to_be16((char *)prevdup - (char *)hdr); + cpu_to_be16((char *)prevdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, prevdup); /* * If the previous entry was in the table, the new entry @@ -638,27 +632,27 @@ xfs_dir2_data_make_free( * the old one and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, dfp, needlogp); - xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); + xfs_dir2_data_freeremove(d, dfp, needlogp); + (void)xfs_dir2_data_freeinsert(d, prevdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. */ else { needscan = be16_to_cpu(prevdup->length) > - be16_to_cpu(hdr->bestfree[2].length); + be16_to_cpu(d->hdr.bestfree[2].length); } } /* * The following entry is free, merge with it. */ else if (postdup) { - dfp = xfs_dir2_data_freefind(hdr, postdup); - newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); + dfp = xfs_dir2_data_freefind(d, postdup); + newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)hdr); + cpu_to_be16((char *)newdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup); /* * If the following entry was in the table, the new entry @@ -666,28 +660,28 @@ xfs_dir2_data_make_free( * the old one and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, dfp, needlogp); - xfs_dir2_data_freeinsert(hdr, newdup, needlogp); + xfs_dir2_data_freeremove(d, dfp, needlogp); + (void)xfs_dir2_data_freeinsert(d, newdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. */ else { needscan = be16_to_cpu(newdup->length) > - be16_to_cpu(hdr->bestfree[2].length); + be16_to_cpu(d->hdr.bestfree[2].length); } } /* * Neither neighbor is free. Make a new entry. */ else { - newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); + newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)hdr); + cpu_to_be16((char *)newdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup); - xfs_dir2_data_freeinsert(hdr, newdup, needlogp); + (void)xfs_dir2_data_freeinsert(d, newdup, needlogp); } *needscanp = needscan; } @@ -705,7 +699,7 @@ xfs_dir2_data_use_free( int *needlogp, /* out: need to log header */ int *needscanp) /* out: need regen bestfree */ { - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *d; /* data block */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ int matchback; /* matches end of freespace */ int matchfront; /* matches start of freespace */ @@ -714,24 +708,24 @@ xfs_dir2_data_use_free( xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ int oldlen; /* old unused entry's length */ - hdr = bp->data; - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + d = bp->data; + ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || + be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); - ASSERT(offset >= (char *)dup - (char *)hdr); - ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr); - ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); + ASSERT(offset >= (char *)dup - (char *)d); + ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d); + ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); /* * Look up the entry in the bestfree table. */ - dfp = xfs_dir2_data_freefind(hdr, dup); + dfp = xfs_dir2_data_freefind(d, dup); oldlen = be16_to_cpu(dup->length); - ASSERT(dfp || oldlen <= be16_to_cpu(hdr->bestfree[2].length)); + ASSERT(dfp || oldlen <= be16_to_cpu(d->hdr.bestfree[2].length)); /* * Check for alignment with front and back of the entry. */ - matchfront = (char *)dup - (char *)hdr == offset; - matchback = (char *)dup + oldlen - (char *)hdr == offset + len; + matchfront = (char *)dup - (char *)d == offset; + matchback = (char *)dup + oldlen - (char *)d == offset + len; ASSERT(*needscanp == 0); needscan = 0; /* @@ -740,9 +734,9 @@ xfs_dir2_data_use_free( */ if (matchfront && matchback) { if (dfp) { - needscan = (hdr->bestfree[2].offset != 0); + needscan = (d->hdr.bestfree[2].offset != 0); if (!needscan) - xfs_dir2_data_freeremove(hdr, dfp, needlogp); + xfs_dir2_data_freeremove(d, dfp, needlogp); } } /* @@ -750,27 +744,27 @@ xfs_dir2_data_use_free( * Make a new entry with the remaining freespace. */ else if (matchfront) { - newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); + newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(oldlen - len); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)hdr); + cpu_to_be16((char *)newdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup); /* * If it was in the table, remove it and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, dfp, needlogp); - dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp); + xfs_dir2_data_freeremove(d, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp); ASSERT(dfp != NULL); ASSERT(dfp->length == newdup->length); - ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); + ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d); /* * If we got inserted at the last slot, * that means we don't know if there was a better * choice for the last slot, or not. Rescan. */ - needscan = dfp == &hdr->bestfree[2]; + needscan = dfp == &d->hdr.bestfree[2]; } } /* @@ -779,25 +773,25 @@ xfs_dir2_data_use_free( */ else if (matchback) { newdup = dup; - newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); + newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)hdr); + cpu_to_be16((char *)newdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup); /* * If it was in the table, remove it and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, dfp, needlogp); - dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp); + xfs_dir2_data_freeremove(d, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp); ASSERT(dfp != NULL); ASSERT(dfp->length == newdup->length); - ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); + ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d); /* * If we got inserted at the last slot, * that means we don't know if there was a better * choice for the last slot, or not. Rescan. */ - needscan = dfp == &hdr->bestfree[2]; + needscan = dfp == &d->hdr.bestfree[2]; } } /* @@ -806,15 +800,15 @@ xfs_dir2_data_use_free( */ else { newdup = dup; - newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); + newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)hdr); + cpu_to_be16((char *)newdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup); - newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); + newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len); newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length)); *xfs_dir2_data_unused_tag_p(newdup2) = - cpu_to_be16((char *)newdup2 - (char *)hdr); + cpu_to_be16((char *)newdup2 - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup2); /* * If the old entry was in the table, we need to scan @@ -825,12 +819,13 @@ xfs_dir2_data_use_free( * the 2 new will work. */ if (dfp) { - needscan = (hdr->bestfree[2].length != 0); + needscan = (d->hdr.bestfree[2].length != 0); if (!needscan) { - xfs_dir2_data_freeremove(hdr, dfp, needlogp); - xfs_dir2_data_freeinsert(hdr, newdup, needlogp); - xfs_dir2_data_freeinsert(hdr, newdup2, - needlogp); + xfs_dir2_data_freeremove(d, dfp, needlogp); + (void)xfs_dir2_data_freeinsert(d, newdup, + needlogp); + (void)xfs_dir2_data_freeinsert(d, newdup2, + needlogp); } } } diff --git a/trunk/fs/xfs/xfs_dir2_data.h b/trunk/fs/xfs/xfs_dir2_data.h new file mode 100644 index 000000000000..efbc290c7fec --- /dev/null +++ b/trunk/fs/xfs/xfs_dir2_data.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2000,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DIR2_DATA_H__ +#define __XFS_DIR2_DATA_H__ + +/* + * Directory format 2, data block structures. + */ + +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_inode; +struct xfs_trans; + +/* + * Constants. + */ +#define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: for multiblock dirs */ +#define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */ +#define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG) +#define XFS_DIR2_DATA_FREE_TAG 0xffff +#define XFS_DIR2_DATA_FD_COUNT 3 + +/* + * Directory address space divided into sections, + * spaces separated by 32GB. + */ +#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) +#define XFS_DIR2_DATA_SPACE 0 +#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) +#define XFS_DIR2_DATA_FIRSTDB(mp) \ + xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET) + +/* + * Offsets of . and .. in data space (always block 0) + */ +#define XFS_DIR2_DATA_DOT_OFFSET \ + ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t)) +#define XFS_DIR2_DATA_DOTDOT_OFFSET \ + (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1)) +#define XFS_DIR2_DATA_FIRST_OFFSET \ + (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2)) + +/* + * Structures. + */ + +/* + * Describe a free area in the data block. + * The freespace will be formatted as a xfs_dir2_data_unused_t. + */ +typedef struct xfs_dir2_data_free { + __be16 offset; /* start of freespace */ + __be16 length; /* length of freespace */ +} xfs_dir2_data_free_t; + +/* + * Header for the data blocks. + * Always at the beginning of a directory-sized block. + * The code knows that XFS_DIR2_DATA_FD_COUNT is 3. + */ +typedef struct xfs_dir2_data_hdr { + __be32 magic; /* XFS_DIR2_DATA_MAGIC */ + /* or XFS_DIR2_BLOCK_MAGIC */ + xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT]; +} xfs_dir2_data_hdr_t; + +/* + * Active entry in a data block. Aligned to 8 bytes. + * Tag appears as the last 2 bytes. + */ +typedef struct xfs_dir2_data_entry { + __be64 inumber; /* inode number */ + __u8 namelen; /* name length */ + __u8 name[1]; /* name bytes, no null */ + /* variable offset */ + __be16 tag; /* starting offset of us */ +} xfs_dir2_data_entry_t; + +/* + * Unused entry in a data block. Aligned to 8 bytes. + * Tag appears as the last 2 bytes. + */ +typedef struct xfs_dir2_data_unused { + __be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */ + __be16 length; /* total free length */ + /* variable offset */ + __be16 tag; /* starting offset of us */ +} xfs_dir2_data_unused_t; + +typedef union { + xfs_dir2_data_entry_t entry; + xfs_dir2_data_unused_t unused; +} xfs_dir2_data_union_t; + +/* + * Generic data block structure, for xfs_db. + */ +typedef struct xfs_dir2_data { + xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_DATA_MAGIC */ + xfs_dir2_data_union_t u[1]; +} xfs_dir2_data_t; + +/* + * Macros. + */ + +/* + * Size of a data entry. + */ +static inline int xfs_dir2_data_entsize(int n) +{ + return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \ + (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN); +} + +/* + * Pointer to an entry's tag word. + */ +static inline __be16 * +xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep) +{ + return (__be16 *)((char *)dep + + xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); +} + +/* + * Pointer to a freespace's tag word. + */ +static inline __be16 * +xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup) +{ + return (__be16 *)((char *)dup + + be16_to_cpu(dup->length) - sizeof(__be16)); +} + +/* + * Function declarations. + */ +#ifdef DEBUG +extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp); +#else +#define xfs_dir2_data_check(dp,bp) +#endif +extern xfs_dir2_data_free_t *xfs_dir2_data_freefind(xfs_dir2_data_t *d, + xfs_dir2_data_unused_t *dup); +extern xfs_dir2_data_free_t *xfs_dir2_data_freeinsert(xfs_dir2_data_t *d, + xfs_dir2_data_unused_t *dup, int *loghead); +extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d, + int *loghead); +extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, + struct xfs_dabuf **bpp); +extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp, + xfs_dir2_data_entry_t *dep); +extern void xfs_dir2_data_log_header(struct xfs_trans *tp, + struct xfs_dabuf *bp); +extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp, + xfs_dir2_data_unused_t *dup); +extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp, + xfs_dir2_data_aoff_t offset, + xfs_dir2_data_aoff_t len, int *needlogp, + int *needscanp); +extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp, + xfs_dir2_data_unused_t *dup, + xfs_dir2_data_aoff_t offset, + xfs_dir2_data_aoff_t len, int *needlogp, + int *needscanp); + +#endif /* __XFS_DIR2_DATA_H__ */ diff --git a/trunk/fs/xfs/xfs_dir2_format.h b/trunk/fs/xfs/xfs_dir2_format.h deleted file mode 100644 index 07270981f48f..000000000000 --- a/trunk/fs/xfs/xfs_dir2_format.h +++ /dev/null @@ -1,597 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_FORMAT_H__ -#define __XFS_DIR2_FORMAT_H__ - -/* - * Directory version 2. - * - * There are 4 possible formats: - * - shortform - embedded into the inode - * - single block - data with embedded leaf at the end - * - multiple data blocks, single leaf+freeindex block - * - data blocks, node and leaf blocks (btree), freeindex blocks - * - * Note: many node blocks structures and constants are shared with the attr - * code and defined in xfs_da_btree.h. - */ - -#define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: single block dirs */ -#define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: multiblock dirs */ -#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F: free index blocks */ - -/* - * Byte offset in data block and shortform entry. - */ -typedef __uint16_t xfs_dir2_data_off_t; -#define NULLDATAOFF 0xffffU -typedef uint xfs_dir2_data_aoff_t; /* argument form */ - -/* - * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t. - * Only need 16 bits, this is the byte offset into the single block form. - */ -typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t; - -/* - * Offset in data space of a data entry. - */ -typedef __uint32_t xfs_dir2_dataptr_t; -#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff) -#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0) - -/* - * Byte offset in a directory. - */ -typedef xfs_off_t xfs_dir2_off_t; - -/* - * Directory block number (logical dirblk in file) - */ -typedef __uint32_t xfs_dir2_db_t; - -/* - * Inode number stored as 8 8-bit values. - */ -typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t; - -/* - * Inode number stored as 4 8-bit values. - * Works a lot of the time, when all the inode numbers in a directory - * fit in 32 bits. - */ -typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t; - -typedef union { - xfs_dir2_ino8_t i8; - xfs_dir2_ino4_t i4; -} xfs_dir2_inou_t; -#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL) - -/* - * Directory layout when stored internal to an inode. - * - * Small directories are packed as tightly as possible so as to fit into the - * literal area of the inode. These "shortform" directories consist of a - * single xfs_dir2_sf_hdr header followed by zero or more xfs_dir2_sf_entry - * structures. Due the different inode number storage size and the variable - * length name field in the xfs_dir2_sf_entry all these structure are - * variable length, and the accessors in this file should be used to iterate - * over them. - */ -typedef struct xfs_dir2_sf_hdr { - __uint8_t count; /* count of entries */ - __uint8_t i8count; /* count of 8-byte inode #s */ - xfs_dir2_inou_t parent; /* parent dir inode number */ -} __arch_pack xfs_dir2_sf_hdr_t; - -typedef struct xfs_dir2_sf_entry { - __u8 namelen; /* actual name length */ - xfs_dir2_sf_off_t offset; /* saved offset */ - __u8 name[]; /* name, variable size */ - /* - * A xfs_dir2_ino8_t or xfs_dir2_ino4_t follows here, at a - * variable offset after the name. - */ -} __arch_pack xfs_dir2_sf_entry_t; - -static inline int xfs_dir2_sf_hdr_size(int i8count) -{ - return sizeof(struct xfs_dir2_sf_hdr) - - (i8count == 0) * - (sizeof(xfs_dir2_ino8_t) - sizeof(xfs_dir2_ino4_t)); -} - -static inline xfs_dir2_data_aoff_t -xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep) -{ - return get_unaligned_be16(&sfep->offset.i); -} - -static inline void -xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off) -{ - put_unaligned_be16(off, &sfep->offset.i); -} - -static inline int -xfs_dir2_sf_entsize(struct xfs_dir2_sf_hdr *hdr, int len) -{ - return sizeof(struct xfs_dir2_sf_entry) + /* namelen + offset */ - len + /* name */ - (hdr->i8count ? /* ino */ - sizeof(xfs_dir2_ino8_t) : - sizeof(xfs_dir2_ino4_t)); -} - -static inline struct xfs_dir2_sf_entry * -xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr) -{ - return (struct xfs_dir2_sf_entry *) - ((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count)); -} - -static inline struct xfs_dir2_sf_entry * -xfs_dir2_sf_nextentry(struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return (struct xfs_dir2_sf_entry *) - ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen)); -} - - -/* - * Data block structures. - * - * A pure data block looks like the following drawing on disk: - * - * +-------------------------------------------------+ - * | xfs_dir2_data_hdr_t | - * +-------------------------------------------------+ - * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | - * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | - * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | - * | ... | - * +-------------------------------------------------+ - * | unused space | - * +-------------------------------------------------+ - * - * As all the entries are variable size structures the accessors below should - * be used to iterate over them. - * - * In addition to the pure data blocks for the data and node formats, - * most structures are also used for the combined data/freespace "block" - * format below. - */ - -#define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */ -#define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG) -#define XFS_DIR2_DATA_FREE_TAG 0xffff -#define XFS_DIR2_DATA_FD_COUNT 3 - -/* - * Directory address space divided into sections, - * spaces separated by 32GB. - */ -#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) -#define XFS_DIR2_DATA_SPACE 0 -#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_DATA_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET) - -/* - * Offsets of . and .. in data space (always block 0) - */ -#define XFS_DIR2_DATA_DOT_OFFSET \ - ((xfs_dir2_data_aoff_t)sizeof(struct xfs_dir2_data_hdr)) -#define XFS_DIR2_DATA_DOTDOT_OFFSET \ - (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1)) -#define XFS_DIR2_DATA_FIRST_OFFSET \ - (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2)) - -/* - * Describe a free area in the data block. - * - * The freespace will be formatted as a xfs_dir2_data_unused_t. - */ -typedef struct xfs_dir2_data_free { - __be16 offset; /* start of freespace */ - __be16 length; /* length of freespace */ -} xfs_dir2_data_free_t; - -/* - * Header for the data blocks. - * - * The code knows that XFS_DIR2_DATA_FD_COUNT is 3. - */ -typedef struct xfs_dir2_data_hdr { - __be32 magic; /* XFS_DIR2_DATA_MAGIC or */ - /* XFS_DIR2_BLOCK_MAGIC */ - xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT]; -} xfs_dir2_data_hdr_t; - -/* - * Active entry in a data block. - * - * Aligned to 8 bytes. After the variable length name field there is a - * 2 byte tag field, which can be accessed using xfs_dir2_data_entry_tag_p. - */ -typedef struct xfs_dir2_data_entry { - __be64 inumber; /* inode number */ - __u8 namelen; /* name length */ - __u8 name[]; /* name bytes, no null */ - /* __be16 tag; */ /* starting offset of us */ -} xfs_dir2_data_entry_t; - -/* - * Unused entry in a data block. - * - * Aligned to 8 bytes. Tag appears as the last 2 bytes and must be accessed - * using xfs_dir2_data_unused_tag_p. - */ -typedef struct xfs_dir2_data_unused { - __be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */ - __be16 length; /* total free length */ - /* variable offset */ - __be16 tag; /* starting offset of us */ -} xfs_dir2_data_unused_t; - -/* - * Size of a data entry. - */ -static inline int xfs_dir2_data_entsize(int n) -{ - return (int)roundup(offsetof(struct xfs_dir2_data_entry, name[0]) + n + - (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN); -} - -/* - * Pointer to an entry's tag word. - */ -static inline __be16 * -xfs_dir2_data_entry_tag_p(struct xfs_dir2_data_entry *dep) -{ - return (__be16 *)((char *)dep + - xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); -} - -/* - * Pointer to a freespace's tag word. - */ -static inline __be16 * -xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup) -{ - return (__be16 *)((char *)dup + - be16_to_cpu(dup->length) - sizeof(__be16)); -} - -/* - * Leaf block structures. - * - * A pure leaf block looks like the following drawing on disk: - * - * +---------------------------+ - * | xfs_dir2_leaf_hdr_t | - * +---------------------------+ - * | xfs_dir2_leaf_entry_t | - * | xfs_dir2_leaf_entry_t | - * | xfs_dir2_leaf_entry_t | - * | xfs_dir2_leaf_entry_t | - * | ... | - * +---------------------------+ - * | xfs_dir2_data_off_t | - * | xfs_dir2_data_off_t | - * | xfs_dir2_data_off_t | - * | ... | - * +---------------------------+ - * | xfs_dir2_leaf_tail_t | - * +---------------------------+ - * - * The xfs_dir2_data_off_t members (bests) and tail are at the end of the block - * for single-leaf (magic = XFS_DIR2_LEAF1_MAGIC) blocks only, but not present - * for directories with separate leaf nodes and free space blocks - * (magic = XFS_DIR2_LEAFN_MAGIC). - * - * As all the entries are variable size structures the accessors below should - * be used to iterate over them. - */ - -/* - * Offset of the leaf/node space. First block in this space - * is the btree root. - */ -#define XFS_DIR2_LEAF_SPACE 1 -#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_LEAF_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET) - -/* - * Leaf block header. - */ -typedef struct xfs_dir2_leaf_hdr { - xfs_da_blkinfo_t info; /* header for da routines */ - __be16 count; /* count of entries */ - __be16 stale; /* count of stale entries */ -} xfs_dir2_leaf_hdr_t; - -/* - * Leaf block entry. - */ -typedef struct xfs_dir2_leaf_entry { - __be32 hashval; /* hash value of name */ - __be32 address; /* address of data entry */ -} xfs_dir2_leaf_entry_t; - -/* - * Leaf block tail. - */ -typedef struct xfs_dir2_leaf_tail { - __be32 bestcount; -} xfs_dir2_leaf_tail_t; - -/* - * Leaf block. - */ -typedef struct xfs_dir2_leaf { - xfs_dir2_leaf_hdr_t hdr; /* leaf header */ - xfs_dir2_leaf_entry_t ents[]; /* entries */ -} xfs_dir2_leaf_t; - -/* - * DB blocks here are logical directory block numbers, not filesystem blocks. - */ - -static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp) -{ - return (mp->m_dirblksize - (uint)sizeof(struct xfs_dir2_leaf_hdr)) / - (uint)sizeof(struct xfs_dir2_leaf_entry); -} - -/* - * Get address of the bestcount field in the single-leaf block. - */ -static inline struct xfs_dir2_leaf_tail * -xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp) -{ - return (struct xfs_dir2_leaf_tail *) - ((char *)lp + mp->m_dirblksize - - sizeof(struct xfs_dir2_leaf_tail)); -} - -/* - * Get address of the bests array in the single-leaf block. - */ -static inline __be16 * -xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp) -{ - return (__be16 *)ltp - be32_to_cpu(ltp->bestcount); -} - -/* - * Convert dataptr to byte in file space - */ -static inline xfs_dir2_off_t -xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; -} - -/* - * Convert byte in file space to dataptr. It had better be aligned. - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); -} - -/* - * Convert byte in space to (DB) block - */ -static inline xfs_dir2_db_t -xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_db_t) - (by >> (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)); -} - -/* - * Convert dataptr to a block number - */ -static inline xfs_dir2_db_t -xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp)); -} - -/* - * Convert byte in space to offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_data_aoff_t)(by & - ((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) - 1)); -} - -/* - * Convert dataptr to a byte offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp)); -} - -/* - * Convert block and offset to byte in space - */ -static inline xfs_dir2_off_t -xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return ((xfs_dir2_off_t)db << - (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) + o; -} - -/* - * Convert block (DB) to block (dablk) - */ -static inline xfs_dablk_t -xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return (xfs_dablk_t)(db << mp->m_sb.sb_dirblklog); -} - -/* - * Convert byte in space to (DA) block - */ -static inline xfs_dablk_t -xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by)); -} - -/* - * Convert block and offset to dataptr - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o)); -} - -/* - * Convert block (dablk) to block (DB) - */ -static inline xfs_dir2_db_t -xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da) -{ - return (xfs_dir2_db_t)(da >> mp->m_sb.sb_dirblklog); -} - -/* - * Convert block (dablk) to byte offset in space - */ -static inline xfs_dir2_off_t -xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) -{ - return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0); -} - -/* - * Free space block defintions for the node format. - */ - -/* - * Offset of the freespace index. - */ -#define XFS_DIR2_FREE_SPACE 2 -#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_FREE_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET) - -typedef struct xfs_dir2_free_hdr { - __be32 magic; /* XFS_DIR2_FREE_MAGIC */ - __be32 firstdb; /* db of first entry */ - __be32 nvalid; /* count of valid entries */ - __be32 nused; /* count of used entries */ -} xfs_dir2_free_hdr_t; - -typedef struct xfs_dir2_free { - xfs_dir2_free_hdr_t hdr; /* block header */ - __be16 bests[]; /* best free counts */ - /* unused entries are -1 */ -} xfs_dir2_free_t; - -static inline int xfs_dir2_free_max_bests(struct xfs_mount *mp) -{ - return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) / - sizeof(xfs_dir2_data_off_t); -} - -/* - * Convert data space db to the corresponding free db. - */ -static inline xfs_dir2_db_t -xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp); -} - -/* - * Convert data space db to the corresponding index in a free db. - */ -static inline int -xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return db % xfs_dir2_free_max_bests(mp); -} - -/* - * Single block format. - * - * The single block format looks like the following drawing on disk: - * - * +-------------------------------------------------+ - * | xfs_dir2_data_hdr_t | - * +-------------------------------------------------+ - * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | - * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | - * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t : - * | ... | - * +-------------------------------------------------+ - * | unused space | - * +-------------------------------------------------+ - * | ... | - * | xfs_dir2_leaf_entry_t | - * | xfs_dir2_leaf_entry_t | - * +-------------------------------------------------+ - * | xfs_dir2_block_tail_t | - * +-------------------------------------------------+ - * - * As all the entries are variable size structures the accessors below should - * be used to iterate over them. - */ - -typedef struct xfs_dir2_block_tail { - __be32 count; /* count of leaf entries */ - __be32 stale; /* count of stale lf entries */ -} xfs_dir2_block_tail_t; - -/* - * Pointer to the leaf header embedded in a data block (1-block format) - */ -static inline struct xfs_dir2_block_tail * -xfs_dir2_block_tail_p(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr) -{ - return ((struct xfs_dir2_block_tail *) - ((char *)hdr + mp->m_dirblksize)) - 1; -} - -/* - * Pointer to the leaf entries embedded in a data block (1-block format) - */ -static inline struct xfs_dir2_leaf_entry * -xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp) -{ - return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count); -} - -#endif /* __XFS_DIR2_FORMAT_H__ */ diff --git a/trunk/fs/xfs/xfs_dir2_leaf.c b/trunk/fs/xfs/xfs_dir2_leaf.c index ca2386d82cdf..ae891223be90 100644 --- a/trunk/fs/xfs/xfs_dir2_leaf.c +++ b/trunk/fs/xfs/xfs_dir2_leaf.c @@ -24,14 +24,18 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2_priv.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" +#include "xfs_dir2_node.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -60,7 +64,7 @@ xfs_dir2_block_to_leaf( { __be16 *bestsp; /* leaf's bestsp entries */ xfs_dablk_t blkno; /* leaf block's bno */ - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_leaf_entry_t *blp; /* block's leaf entries */ xfs_dir2_block_tail_t *btp; /* block's tail */ xfs_inode_t *dp; /* incore directory inode */ @@ -97,9 +101,9 @@ xfs_dir2_block_to_leaf( } ASSERT(lbp != NULL); leaf = lbp->data; - hdr = dbp->data; + block = dbp->data; xfs_dir2_data_check(dp, dbp); - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); /* * Set the counts in the leaf header. @@ -119,23 +123,23 @@ xfs_dir2_block_to_leaf( * tail be free. */ xfs_dir2_data_make_free(tp, dbp, - (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), - (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize - + (xfs_dir2_data_aoff_t)((char *)blp - (char *)block), + (xfs_dir2_data_aoff_t)((char *)block + mp->m_dirblksize - (char *)blp), &needlog, &needscan); /* * Fix up the block header, make it a data block. */ - hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); + block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); /* * Set up leaf tail and bests table. */ ltp = xfs_dir2_leaf_tail_p(mp, leaf); ltp->bestcount = cpu_to_be32(1); bestsp = xfs_dir2_leaf_bests_p(ltp); - bestsp[0] = hdr->bestfree[0].length; + bestsp[0] = block->hdr.bestfree[0].length; /* * Log the data header and leaf bests table. */ @@ -148,131 +152,6 @@ xfs_dir2_block_to_leaf( return 0; } -STATIC void -xfs_dir2_leaf_find_stale( - struct xfs_dir2_leaf *leaf, - int index, - int *lowstale, - int *highstale) -{ - /* - * Find the first stale entry before our index, if any. - */ - for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) { - if (leaf->ents[*lowstale].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) - break; - } - - /* - * Find the first stale entry at or after our index, if any. - * Stop if the result would require moving more entries than using - * lowstale. - */ - for (*highstale = index; - *highstale < be16_to_cpu(leaf->hdr.count); - ++*highstale) { - if (leaf->ents[*highstale].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) - break; - if (*lowstale >= 0 && index - *lowstale <= *highstale - index) - break; - } -} - -struct xfs_dir2_leaf_entry * -xfs_dir2_leaf_find_entry( - xfs_dir2_leaf_t *leaf, /* leaf structure */ - int index, /* leaf table position */ - int compact, /* need to compact leaves */ - int lowstale, /* index of prev stale leaf */ - int highstale, /* index of next stale leaf */ - int *lfloglow, /* low leaf logging index */ - int *lfloghigh) /* high leaf logging index */ -{ - if (!leaf->hdr.stale) { - xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */ - - /* - * Now we need to make room to insert the leaf entry. - * - * If there are no stale entries, just insert a hole at index. - */ - lep = &leaf->ents[index]; - if (index < be16_to_cpu(leaf->hdr.count)) - memmove(lep + 1, lep, - (be16_to_cpu(leaf->hdr.count) - index) * - sizeof(*lep)); - - /* - * Record low and high logging indices for the leaf. - */ - *lfloglow = index; - *lfloghigh = be16_to_cpu(leaf->hdr.count); - be16_add_cpu(&leaf->hdr.count, 1); - return lep; - } - - /* - * There are stale entries. - * - * We will use one of them for the new entry. It's probably not at - * the right location, so we'll have to shift some up or down first. - * - * If we didn't compact before, we need to find the nearest stale - * entries before and after our insertion point. - */ - if (compact == 0) - xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale); - - /* - * If the low one is better, use it. - */ - if (lowstale >= 0 && - (highstale == be16_to_cpu(leaf->hdr.count) || - index - lowstale - 1 < highstale - index)) { - ASSERT(index - lowstale - 1 >= 0); - ASSERT(leaf->ents[lowstale].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); - - /* - * Copy entries up to cover the stale entry and make room - * for the new entry. - */ - if (index - lowstale - 1 > 0) { - memmove(&leaf->ents[lowstale], - &leaf->ents[lowstale + 1], - (index - lowstale - 1) * - sizeof(xfs_dir2_leaf_entry_t)); - } - *lfloglow = MIN(lowstale, *lfloglow); - *lfloghigh = MAX(index - 1, *lfloghigh); - be16_add_cpu(&leaf->hdr.stale, -1); - return &leaf->ents[index - 1]; - } - - /* - * The high one is better, so use that one. - */ - ASSERT(highstale - index >= 0); - ASSERT(leaf->ents[highstale].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); - - /* - * Copy entries down to cover the stale entry and make room for the - * new entry. - */ - if (highstale - index > 0) { - memmove(&leaf->ents[index + 1], - &leaf->ents[index], - (highstale - index) * sizeof(xfs_dir2_leaf_entry_t)); - } - *lfloglow = MIN(index, *lfloglow); - *lfloghigh = MAX(highstale, *lfloghigh); - be16_add_cpu(&leaf->hdr.stale, -1); - return &leaf->ents[index]; -} - /* * Add an entry to a leaf form directory. */ @@ -282,7 +161,7 @@ xfs_dir2_leaf_addname( { __be16 *bestsp; /* freespace table in leaf */ int compact; /* need to compact leaves */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *data; /* data block structure */ xfs_dabuf_t *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data block entry */ xfs_inode_t *dp; /* incore directory inode */ @@ -346,7 +225,7 @@ xfs_dir2_leaf_addname( continue; i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); ASSERT(i < be32_to_cpu(ltp->bestcount)); - ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF)); + ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF); if (be16_to_cpu(bestsp[i]) >= length) { use_block = i; break; @@ -360,8 +239,7 @@ xfs_dir2_leaf_addname( /* * Remember a block we see that's missing. */ - if (bestsp[i] == cpu_to_be16(NULLDATAOFF) && - use_block == -1) + if (be16_to_cpu(bestsp[i]) == NULLDATAOFF && use_block == -1) use_block = i; else if (be16_to_cpu(bestsp[i]) >= length) { use_block = i; @@ -372,17 +250,14 @@ xfs_dir2_leaf_addname( /* * How many bytes do we need in the leaf block? */ - needbytes = 0; - if (!leaf->hdr.stale) - needbytes += sizeof(xfs_dir2_leaf_entry_t); - if (use_block == -1) - needbytes += sizeof(xfs_dir2_data_off_t); - + needbytes = + (leaf->hdr.stale ? 0 : (uint)sizeof(leaf->ents[0])) + + (use_block != -1 ? 0 : (uint)sizeof(leaf->bests[0])); /* * Now kill use_block if it refers to a missing block, so we * can use it as an indication of allocation needed. */ - if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF)) + if (use_block != -1 && be16_to_cpu(bestsp[use_block]) == NULLDATAOFF) use_block = -1; /* * If we don't have enough free bytes but we can make enough @@ -494,8 +369,8 @@ xfs_dir2_leaf_addname( */ else xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); - hdr = dbp->data; - bestsp[use_block] = hdr->bestfree[0].length; + data = dbp->data; + bestsp[use_block] = data->hdr.bestfree[0].length; grown = 1; } /* @@ -509,7 +384,7 @@ xfs_dir2_leaf_addname( xfs_da_brelse(tp, lbp); return error; } - hdr = dbp->data; + data = dbp->data; grown = 0; } xfs_dir2_data_check(dp, dbp); @@ -517,14 +392,14 @@ xfs_dir2_leaf_addname( * Point to the biggest freespace in our data block. */ dup = (xfs_dir2_data_unused_t *) - ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset)); + ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset)); ASSERT(be16_to_cpu(dup->length) >= length); needscan = needlog = 0; /* * Mark the initial part of our freespace in use for the new entry. */ xfs_dir2_data_use_free(tp, dbp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length, &needlog, &needscan); /* * Initialize our new entry (at last). @@ -534,12 +409,12 @@ xfs_dir2_leaf_addname( dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); + *tagp = cpu_to_be16((char *)dep - (char *)data); /* * Need to scan fix up the bestfree table. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, data, &needlog); /* * Need to log the data block's header. */ @@ -550,15 +425,107 @@ xfs_dir2_leaf_addname( * If the bests table needs to be changed, do it. * Log the change unless we've already done that. */ - if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(hdr->bestfree[0].length)) { - bestsp[use_block] = hdr->bestfree[0].length; + if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(data->hdr.bestfree[0].length)) { + bestsp[use_block] = data->hdr.bestfree[0].length; if (!grown) xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); } - - lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale, - highstale, &lfloglow, &lfloghigh); - + /* + * Now we need to make room to insert the leaf entry. + * If there are no stale entries, we just insert a hole at index. + */ + if (!leaf->hdr.stale) { + /* + * lep is still good as the index leaf entry. + */ + if (index < be16_to_cpu(leaf->hdr.count)) + memmove(lep + 1, lep, + (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep)); + /* + * Record low and high logging indices for the leaf. + */ + lfloglow = index; + lfloghigh = be16_to_cpu(leaf->hdr.count); + be16_add_cpu(&leaf->hdr.count, 1); + } + /* + * There are stale entries. + * We will use one of them for the new entry. + * It's probably not at the right location, so we'll have to + * shift some up or down first. + */ + else { + /* + * If we didn't compact before, we need to find the nearest + * stale entries before and after our insertion point. + */ + if (compact == 0) { + /* + * Find the first stale entry before the insertion + * point, if any. + */ + for (lowstale = index - 1; + lowstale >= 0 && + be32_to_cpu(leaf->ents[lowstale].address) != + XFS_DIR2_NULL_DATAPTR; + lowstale--) + continue; + /* + * Find the next stale entry at or after the insertion + * point, if any. Stop if we go so far that the + * lowstale entry would be better. + */ + for (highstale = index; + highstale < be16_to_cpu(leaf->hdr.count) && + be32_to_cpu(leaf->ents[highstale].address) != + XFS_DIR2_NULL_DATAPTR && + (lowstale < 0 || + index - lowstale - 1 >= highstale - index); + highstale++) + continue; + } + /* + * If the low one is better, use it. + */ + if (lowstale >= 0 && + (highstale == be16_to_cpu(leaf->hdr.count) || + index - lowstale - 1 < highstale - index)) { + ASSERT(index - lowstale - 1 >= 0); + ASSERT(be32_to_cpu(leaf->ents[lowstale].address) == + XFS_DIR2_NULL_DATAPTR); + /* + * Copy entries up to cover the stale entry + * and make room for the new entry. + */ + if (index - lowstale - 1 > 0) + memmove(&leaf->ents[lowstale], + &leaf->ents[lowstale + 1], + (index - lowstale - 1) * sizeof(*lep)); + lep = &leaf->ents[index - 1]; + lfloglow = MIN(lowstale, lfloglow); + lfloghigh = MAX(index - 1, lfloghigh); + } + /* + * The high one is better, so use that one. + */ + else { + ASSERT(highstale - index >= 0); + ASSERT(be32_to_cpu(leaf->ents[highstale].address) == + XFS_DIR2_NULL_DATAPTR); + /* + * Copy entries down to cover the stale entry + * and make room for the new entry. + */ + if (highstale - index > 0) + memmove(&leaf->ents[index + 1], + &leaf->ents[index], + (highstale - index) * sizeof(*lep)); + lep = &leaf->ents[index]; + lfloglow = MIN(index, lfloglow); + lfloghigh = MAX(highstale, lfloghigh); + } + be16_add_cpu(&leaf->hdr.stale, -1); + } /* * Fill in the new leaf entry. */ @@ -595,7 +562,7 @@ xfs_dir2_leaf_check( leaf = bp->data; mp = dp->i_mount; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); /* * This value is not restrictive enough. * Should factor in the size of the bests table as well. @@ -615,7 +582,7 @@ xfs_dir2_leaf_check( if (i + 1 < be16_to_cpu(leaf->hdr.count)) ASSERT(be32_to_cpu(leaf->ents[i].hashval) <= be32_to_cpu(leaf->ents[i + 1].hashval)); - if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR) stale++; } ASSERT(be16_to_cpu(leaf->hdr.stale) == stale); @@ -644,8 +611,7 @@ xfs_dir2_leaf_compact( * Compress out the stale entries in place. */ for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) { - if (leaf->ents[from].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) continue; /* * Only actually copy the entries that are different. @@ -697,9 +663,24 @@ xfs_dir2_leaf_compact_x1( leaf = bp->data; ASSERT(be16_to_cpu(leaf->hdr.stale) > 1); index = *indexp; - - xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale); - + /* + * Find the first stale entry before our index, if any. + */ + for (lowstale = index - 1; + lowstale >= 0 && + be32_to_cpu(leaf->ents[lowstale].address) != XFS_DIR2_NULL_DATAPTR; + lowstale--) + continue; + /* + * Find the first stale entry at or after our index, if any. + * Stop if the answer would be worse than lowstale. + */ + for (highstale = index; + highstale < be16_to_cpu(leaf->hdr.count) && + be32_to_cpu(leaf->ents[highstale].address) != XFS_DIR2_NULL_DATAPTR && + (lowstale < 0 || index - lowstale > highstale - index); + highstale++) + continue; /* * Pick the better of lowstale and highstale. */ @@ -720,8 +701,7 @@ xfs_dir2_leaf_compact_x1( if (index == from) newindex = to; if (from != keepstale && - leaf->ents[from].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { + be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) { if (from == to) *lowlogp = to; continue; @@ -780,7 +760,7 @@ xfs_dir2_leaf_getdents( int byteoff; /* offset in current block */ xfs_dir2_db_t curdb; /* db for current block */ xfs_dir2_off_t curoff; /* current overall offset */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *data; /* data block structure */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_dir2_data_unused_t *dup; /* unused entry */ int error = 0; /* error return value */ @@ -1038,23 +1018,23 @@ xfs_dir2_leaf_getdents( else if (curoff > newoff) ASSERT(xfs_dir2_byte_to_db(mp, curoff) == curdb); - hdr = bp->data; + data = bp->data; xfs_dir2_data_check(dp, bp); /* * Find our position in the block. */ - ptr = (char *)(hdr + 1); + ptr = (char *)&data->u; byteoff = xfs_dir2_byte_to_off(mp, curoff); /* * Skip past the header. */ if (byteoff == 0) - curoff += (uint)sizeof(*hdr); + curoff += (uint)sizeof(data->hdr); /* * Skip past entries until we reach our offset. */ else { - while ((char *)ptr - (char *)hdr < byteoff) { + while ((char *)ptr - (char *)data < byteoff) { dup = (xfs_dir2_data_unused_t *)ptr; if (be16_to_cpu(dup->freetag) @@ -1075,8 +1055,8 @@ xfs_dir2_leaf_getdents( curoff = xfs_dir2_db_off_to_byte(mp, xfs_dir2_byte_to_db(mp, curoff), - (char *)ptr - (char *)hdr); - if (ptr >= (char *)hdr + mp->m_dirblksize) { + (char *)ptr - (char *)data); + if (ptr >= (char *)data + mp->m_dirblksize) { continue; } } @@ -1199,7 +1179,7 @@ xfs_dir2_leaf_log_bests( xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf); firstb = xfs_dir2_leaf_bests_p(ltp) + first; lastb = xfs_dir2_leaf_bests_p(ltp) + last; @@ -1222,8 +1202,8 @@ xfs_dir2_leaf_log_ents( xfs_dir2_leaf_t *leaf; /* leaf structure */ leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || - leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC || + be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); firstlep = &leaf->ents[first]; lastlep = &leaf->ents[last]; xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), @@ -1241,8 +1221,8 @@ xfs_dir2_leaf_log_header( xfs_dir2_leaf_t *leaf; /* leaf structure */ leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || - leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC || + be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), (uint)(sizeof(leaf->hdr) - 1)); } @@ -1261,7 +1241,7 @@ xfs_dir2_leaf_log_tail( mp = tp->t_mountp; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); ltp = xfs_dir2_leaf_tail_p(mp, leaf); xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), (uint)(mp->m_dirblksize - 1)); @@ -1457,7 +1437,7 @@ xfs_dir2_leaf_removename( xfs_da_args_t *args) /* operation arguments */ { __be16 *bestsp; /* leaf block best freespace */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *data; /* data block structure */ xfs_dir2_db_t db; /* data block number */ xfs_dabuf_t *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data entry structure */ @@ -1487,7 +1467,7 @@ xfs_dir2_leaf_removename( tp = args->trans; mp = dp->i_mount; leaf = lbp->data; - hdr = dbp->data; + data = dbp->data; xfs_dir2_data_check(dp, dbp); /* * Point to the leaf entry, use that to point to the data entry. @@ -1495,9 +1475,9 @@ xfs_dir2_leaf_removename( lep = &leaf->ents[index]; db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); + ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); needscan = needlog = 0; - oldbest = be16_to_cpu(hdr->bestfree[0].length); + oldbest = be16_to_cpu(data->hdr.bestfree[0].length); ltp = xfs_dir2_leaf_tail_p(mp, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); ASSERT(be16_to_cpu(bestsp[db]) == oldbest); @@ -1505,7 +1485,7 @@ xfs_dir2_leaf_removename( * Mark the former data entry unused. */ xfs_dir2_data_make_free(tp, dbp, - (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), + (xfs_dir2_data_aoff_t)((char *)dep - (char *)data), xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); /* * We just mark the leaf entry stale by putting a null in it. @@ -1519,23 +1499,23 @@ xfs_dir2_leaf_removename( * log the data block header if necessary. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, data, &needlog); if (needlog) xfs_dir2_data_log_header(tp, dbp); /* * If the longest freespace in the data block has changed, * put the new value in the bests table and log that. */ - if (be16_to_cpu(hdr->bestfree[0].length) != oldbest) { - bestsp[db] = hdr->bestfree[0].length; + if (be16_to_cpu(data->hdr.bestfree[0].length) != oldbest) { + bestsp[db] = data->hdr.bestfree[0].length; xfs_dir2_leaf_log_bests(tp, lbp, db, db); } xfs_dir2_data_check(dp, dbp); /* * If the data block is now empty then get rid of the data block. */ - if (be16_to_cpu(hdr->bestfree[0].length) == - mp->m_dirblksize - (uint)sizeof(*hdr)) { + if (be16_to_cpu(data->hdr.bestfree[0].length) == + mp->m_dirblksize - (uint)sizeof(data->hdr)) { ASSERT(db != mp->m_dirdatablk); if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { /* @@ -1562,7 +1542,7 @@ xfs_dir2_leaf_removename( * Look for the last active entry (i). */ for (i = db - 1; i > 0; i--) { - if (bestsp[i] != cpu_to_be16(NULLDATAOFF)) + if (be16_to_cpu(bestsp[i]) != NULLDATAOFF) break; } /* @@ -1706,6 +1686,9 @@ xfs_dir2_leaf_trim_data( xfs_dir2_db_t db) /* data block number */ { __be16 *bestsp; /* leaf bests table */ +#ifdef DEBUG + xfs_dir2_data_t *data; /* data block structure */ +#endif xfs_dabuf_t *dbp; /* data block buffer */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return value */ @@ -1724,21 +1707,20 @@ xfs_dir2_leaf_trim_data( XFS_DATA_FORK))) { return error; } +#ifdef DEBUG + data = dbp->data; + ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC); +#endif + /* this seems to be an error + * data is only valid if DEBUG is defined? + * RMC 09/08/1999 + */ leaf = lbp->data; ltp = xfs_dir2_leaf_tail_p(mp, leaf); - -#ifdef DEBUG -{ - struct xfs_dir2_data_hdr *hdr = dbp->data; - - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); - ASSERT(be16_to_cpu(hdr->bestfree[0].length) == - mp->m_dirblksize - (uint)sizeof(*hdr)); + ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) == + mp->m_dirblksize - (uint)sizeof(data->hdr)); ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); -} -#endif - /* * Get rid of the data block. */ @@ -1758,20 +1740,6 @@ xfs_dir2_leaf_trim_data( return 0; } -static inline size_t -xfs_dir2_leaf_size( - struct xfs_dir2_leaf_hdr *hdr, - int counts) -{ - int entries; - - entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale); - return sizeof(xfs_dir2_leaf_hdr_t) + - entries * sizeof(xfs_dir2_leaf_entry_t) + - counts * sizeof(xfs_dir2_data_off_t) + - sizeof(xfs_dir2_leaf_tail_t); -} - /* * Convert node form directory to leaf form directory. * The root of the node form dir needs to already be a LEAFN block. @@ -1842,7 +1810,7 @@ xfs_dir2_node_to_leaf( return 0; lbp = state->path.blk[0].bp; leaf = lbp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); /* * Read the freespace block. */ @@ -1851,19 +1819,20 @@ xfs_dir2_node_to_leaf( return error; } free = fbp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); ASSERT(!free->hdr.firstdb); - /* * Now see if the leafn and free data will fit in a leaf1. * If not, release the buffer and give up. */ - if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) > - mp->m_dirblksize) { + if ((uint)sizeof(leaf->hdr) + + (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)) * (uint)sizeof(leaf->ents[0]) + + be32_to_cpu(free->hdr.nvalid) * (uint)sizeof(leaf->bests[0]) + + (uint)sizeof(leaf->tail) > + mp->m_dirblksize) { xfs_da_brelse(tp, fbp); return 0; } - /* * If the leaf has any stale entries in it, compress them out. * The compact routine will log the header. @@ -1882,7 +1851,7 @@ xfs_dir2_node_to_leaf( * Set up the leaf bests table. */ memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests, - be32_to_cpu(ltp->bestcount) * sizeof(xfs_dir2_data_off_t)); + be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0])); xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); xfs_dir2_leaf_log_tail(tp, lbp); xfs_dir2_leaf_check(dp, lbp); diff --git a/trunk/fs/xfs/xfs_dir2_leaf.h b/trunk/fs/xfs/xfs_dir2_leaf.h new file mode 100644 index 000000000000..6c9539f06987 --- /dev/null +++ b/trunk/fs/xfs/xfs_dir2_leaf.h @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DIR2_LEAF_H__ +#define __XFS_DIR2_LEAF_H__ + +struct uio; +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_inode; +struct xfs_mount; +struct xfs_trans; + +/* + * Offset of the leaf/node space. First block in this space + * is the btree root. + */ +#define XFS_DIR2_LEAF_SPACE 1 +#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) +#define XFS_DIR2_LEAF_FIRSTDB(mp) \ + xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET) + +/* + * Offset in data space of a data entry. + */ +typedef __uint32_t xfs_dir2_dataptr_t; +#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff) +#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0) + +/* + * Leaf block header. + */ +typedef struct xfs_dir2_leaf_hdr { + xfs_da_blkinfo_t info; /* header for da routines */ + __be16 count; /* count of entries */ + __be16 stale; /* count of stale entries */ +} xfs_dir2_leaf_hdr_t; + +/* + * Leaf block entry. + */ +typedef struct xfs_dir2_leaf_entry { + __be32 hashval; /* hash value of name */ + __be32 address; /* address of data entry */ +} xfs_dir2_leaf_entry_t; + +/* + * Leaf block tail. + */ +typedef struct xfs_dir2_leaf_tail { + __be32 bestcount; +} xfs_dir2_leaf_tail_t; + +/* + * Leaf block. + * bests and tail are at the end of the block for single-leaf only + * (magic = XFS_DIR2_LEAF1_MAGIC not XFS_DIR2_LEAFN_MAGIC). + */ +typedef struct xfs_dir2_leaf { + xfs_dir2_leaf_hdr_t hdr; /* leaf header */ + xfs_dir2_leaf_entry_t ents[1]; /* entries */ + /* ... */ + xfs_dir2_data_off_t bests[1]; /* best free counts */ + xfs_dir2_leaf_tail_t tail; /* leaf tail */ +} xfs_dir2_leaf_t; + +/* + * DB blocks here are logical directory block numbers, not filesystem blocks. + */ + +static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp) +{ + return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) / + (uint)sizeof(xfs_dir2_leaf_entry_t)); +} + +/* + * Get address of the bestcount field in the single-leaf block. + */ +static inline xfs_dir2_leaf_tail_t * +xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp) +{ + return (xfs_dir2_leaf_tail_t *) + ((char *)(lp) + (mp)->m_dirblksize - + (uint)sizeof(xfs_dir2_leaf_tail_t)); +} + +/* + * Get address of the bests array in the single-leaf block. + */ +static inline __be16 * +xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp) +{ + return (__be16 *)ltp - be32_to_cpu(ltp->bestcount); +} + +/* + * Convert dataptr to byte in file space + */ +static inline xfs_dir2_off_t +xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) +{ + return (xfs_dir2_off_t)(dp) << XFS_DIR2_DATA_ALIGN_LOG; +} + +/* + * Convert byte in file space to dataptr. It had better be aligned. + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return (xfs_dir2_dataptr_t)((by) >> XFS_DIR2_DATA_ALIGN_LOG); +} + +/* + * Convert byte in space to (DB) block + */ +static inline xfs_dir2_db_t +xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return (xfs_dir2_db_t)((by) >> \ + ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)); +} + +/* + * Convert dataptr to a block number + */ +static inline xfs_dir2_db_t +xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp)); +} + +/* + * Convert byte in space to offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return (xfs_dir2_data_aoff_t)((by) & \ + ((1 << ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) - 1)); +} + +/* + * Convert dataptr to a byte offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp)); +} + +/* + * Convert block and offset to byte in space + */ +static inline xfs_dir2_off_t +xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return ((xfs_dir2_off_t)(db) << \ + ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) + (o); +} + +/* + * Convert block (DB) to block (dablk) + */ +static inline xfs_dablk_t +xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return (xfs_dablk_t)((db) << (mp)->m_sb.sb_dirblklog); +} + +/* + * Convert byte in space to (DA) block + */ +static inline xfs_dablk_t +xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by)); +} + +/* + * Convert block and offset to dataptr + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o)); +} + +/* + * Convert block (dablk) to block (DB) + */ +static inline xfs_dir2_db_t +xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da) +{ + return (xfs_dir2_db_t)((da) >> (mp)->m_sb.sb_dirblklog); +} + +/* + * Convert block (dablk) to byte offset in space + */ +static inline xfs_dir2_off_t +xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) +{ + return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0); +} + +/* + * Function declarations. + */ +extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, + struct xfs_dabuf *dbp); +extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); +extern void xfs_dir2_leaf_compact(struct xfs_da_args *args, + struct xfs_dabuf *bp); +extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp, + int *lowstalep, int *highstalep, + int *lowlogp, int *highlogp); +extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent, + size_t bufsize, xfs_off_t *offset, + filldir_t filldir); +extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno, + struct xfs_dabuf **bpp, int magic); +extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp, + int first, int last); +extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp, + struct xfs_dabuf *bp); +extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); +extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); +extern int xfs_dir2_leaf_replace(struct xfs_da_args *args); +extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args, + struct xfs_dabuf *lbp); +extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args, + struct xfs_dabuf *lbp, xfs_dir2_db_t db); +extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); + +#endif /* __XFS_DIR2_LEAF_H__ */ diff --git a/trunk/fs/xfs/xfs_dir2_node.c b/trunk/fs/xfs/xfs_dir2_node.c index 084b3247d636..a0aab7d3294f 100644 --- a/trunk/fs/xfs/xfs_dir2_node.c +++ b/trunk/fs/xfs/xfs_dir2_node.c @@ -23,14 +23,18 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2_priv.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" +#include "xfs_dir2_node.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -69,7 +73,7 @@ xfs_dir2_free_log_bests( xfs_dir2_free_t *free; /* freespace structure */ free = bp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); xfs_da_log_buf(tp, bp, (uint)((char *)&free->bests[first] - (char *)free), (uint)((char *)&free->bests[last] - (char *)free + @@ -87,7 +91,7 @@ xfs_dir2_free_log_header( xfs_dir2_free_t *free; /* freespace structure */ free = bp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free), (uint)(sizeof(xfs_dir2_free_hdr_t) - 1)); } @@ -240,13 +244,89 @@ xfs_dir2_leafn_add( lfloglow = be16_to_cpu(leaf->hdr.count); lfloghigh = -1; } - + /* + * No stale entries, just insert a space for the new entry. + */ + if (!leaf->hdr.stale) { + lep = &leaf->ents[index]; + if (index < be16_to_cpu(leaf->hdr.count)) + memmove(lep + 1, lep, + (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep)); + lfloglow = index; + lfloghigh = be16_to_cpu(leaf->hdr.count); + be16_add_cpu(&leaf->hdr.count, 1); + } + /* + * There are stale entries. We'll use one for the new entry. + */ + else { + /* + * If we didn't do a compact then we need to figure out + * which stale entry will be used. + */ + if (compact == 0) { + /* + * Find first stale entry before our insertion point. + */ + for (lowstale = index - 1; + lowstale >= 0 && + be32_to_cpu(leaf->ents[lowstale].address) != + XFS_DIR2_NULL_DATAPTR; + lowstale--) + continue; + /* + * Find next stale entry after insertion point. + * Stop looking if the answer would be worse than + * lowstale already found. + */ + for (highstale = index; + highstale < be16_to_cpu(leaf->hdr.count) && + be32_to_cpu(leaf->ents[highstale].address) != + XFS_DIR2_NULL_DATAPTR && + (lowstale < 0 || + index - lowstale - 1 >= highstale - index); + highstale++) + continue; + } + /* + * Using the low stale entry. + * Shift entries up toward the stale slot. + */ + if (lowstale >= 0 && + (highstale == be16_to_cpu(leaf->hdr.count) || + index - lowstale - 1 < highstale - index)) { + ASSERT(be32_to_cpu(leaf->ents[lowstale].address) == + XFS_DIR2_NULL_DATAPTR); + ASSERT(index - lowstale - 1 >= 0); + if (index - lowstale - 1 > 0) + memmove(&leaf->ents[lowstale], + &leaf->ents[lowstale + 1], + (index - lowstale - 1) * sizeof(*lep)); + lep = &leaf->ents[index - 1]; + lfloglow = MIN(lowstale, lfloglow); + lfloghigh = MAX(index - 1, lfloghigh); + } + /* + * Using the high stale entry. + * Shift entries down toward the stale slot. + */ + else { + ASSERT(be32_to_cpu(leaf->ents[highstale].address) == + XFS_DIR2_NULL_DATAPTR); + ASSERT(highstale - index >= 0); + if (highstale - index > 0) + memmove(&leaf->ents[index + 1], + &leaf->ents[index], + (highstale - index) * sizeof(*lep)); + lep = &leaf->ents[index]; + lfloglow = MIN(index, lfloglow); + lfloghigh = MAX(highstale, lfloghigh); + } + be16_add_cpu(&leaf->hdr.stale, -1); + } /* * Insert the new entry, log everything. */ - lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale, - highstale, &lfloglow, &lfloghigh); - lep->hashval = cpu_to_be32(args->hashval); lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, args->blkno, args->index)); @@ -272,14 +352,14 @@ xfs_dir2_leafn_check( leaf = bp->data; mp = dp->i_mount; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp)); for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) { if (i + 1 < be16_to_cpu(leaf->hdr.count)) { ASSERT(be32_to_cpu(leaf->ents[i].hashval) <= be32_to_cpu(leaf->ents[i + 1].hashval)); } - if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR) stale++; } ASSERT(be16_to_cpu(leaf->hdr.stale) == stale); @@ -298,7 +378,7 @@ xfs_dir2_leafn_lasthash( xfs_dir2_leaf_t *leaf; /* leaf structure */ leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); if (count) *count = be16_to_cpu(leaf->hdr.count); if (!leaf->hdr.count) @@ -337,7 +417,7 @@ xfs_dir2_leafn_lookup_for_addname( tp = args->trans; mp = dp->i_mount; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); #ifdef __KERNEL__ ASSERT(be16_to_cpu(leaf->hdr.count) > 0); #endif @@ -354,7 +434,7 @@ xfs_dir2_leafn_lookup_for_addname( curbp = state->extrablk.bp; curfdb = state->extrablk.blkno; free = curbp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); } length = xfs_dir2_data_entsize(args->namelen); /* @@ -408,7 +488,7 @@ xfs_dir2_leafn_lookup_for_addname( ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); ASSERT((be32_to_cpu(free->hdr.firstdb) % - xfs_dir2_free_max_bests(mp)) == 0); + XFS_DIR2_MAX_FREE_BESTS(mp)) == 0); ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb); ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) + be32_to_cpu(free->hdr.nvalid)); @@ -420,8 +500,7 @@ xfs_dir2_leafn_lookup_for_addname( /* * If it has room, return it. */ - if (unlikely(free->bests[fi] == - cpu_to_be16(NULLDATAOFF))) { + if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) { XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", XFS_ERRLEVEL_LOW, mp); if (curfdb != newfdb) @@ -482,7 +561,7 @@ xfs_dir2_leafn_lookup_for_entry( tp = args->trans; mp = dp->i_mount; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); #ifdef __KERNEL__ ASSERT(be16_to_cpu(leaf->hdr.count) > 0); #endif @@ -663,8 +742,7 @@ xfs_dir2_leafn_moveents( int i; /* temp leaf index */ for (i = start_s, stale = 0; i < start_s + count; i++) { - if (leaf_s->ents[i].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (be32_to_cpu(leaf_s->ents[i].address) == XFS_DIR2_NULL_DATAPTR) stale++; } } else @@ -711,8 +789,8 @@ xfs_dir2_leafn_order( leaf1 = leaf1_bp->data; leaf2 = leaf2_bp->data; - ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); - ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); if (be16_to_cpu(leaf1->hdr.count) > 0 && be16_to_cpu(leaf2->hdr.count) > 0 && (be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) || @@ -840,7 +918,7 @@ xfs_dir2_leafn_remove( xfs_da_state_blk_t *dblk, /* data block */ int *rval) /* resulting block needs join */ { - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *data; /* data block structure */ xfs_dir2_db_t db; /* data block number */ xfs_dabuf_t *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data block entry */ @@ -860,7 +938,7 @@ xfs_dir2_leafn_remove( tp = args->trans; mp = dp->i_mount; leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); /* * Point to the entry we're removing. */ @@ -885,9 +963,9 @@ xfs_dir2_leafn_remove( * in the data block in case it changes. */ dbp = dblk->bp; - hdr = dbp->data; - dep = (xfs_dir2_data_entry_t *)((char *)hdr + off); - longest = be16_to_cpu(hdr->bestfree[0].length); + data = dbp->data; + dep = (xfs_dir2_data_entry_t *)((char *)data + off); + longest = be16_to_cpu(data->hdr.bestfree[0].length); needlog = needscan = 0; xfs_dir2_data_make_free(tp, dbp, off, xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); @@ -896,7 +974,7 @@ xfs_dir2_leafn_remove( * Log the data block header if needed. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, data, &needlog); if (needlog) xfs_dir2_data_log_header(tp, dbp); xfs_dir2_data_check(dp, dbp); @@ -904,7 +982,7 @@ xfs_dir2_leafn_remove( * If the longest data block freespace changes, need to update * the corresponding freeblock entry. */ - if (longest < be16_to_cpu(hdr->bestfree[0].length)) { + if (longest < be16_to_cpu(data->hdr.bestfree[0].length)) { int error; /* error return value */ xfs_dabuf_t *fbp; /* freeblock buffer */ xfs_dir2_db_t fdb; /* freeblock block number */ @@ -922,27 +1000,27 @@ xfs_dir2_leafn_remove( return error; } free = fbp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); ASSERT(be32_to_cpu(free->hdr.firstdb) == - xfs_dir2_free_max_bests(mp) * + XFS_DIR2_MAX_FREE_BESTS(mp) * (fdb - XFS_DIR2_FREE_FIRSTDB(mp))); /* * Calculate which entry we need to fix. */ findex = xfs_dir2_db_to_fdindex(mp, db); - longest = be16_to_cpu(hdr->bestfree[0].length); + longest = be16_to_cpu(data->hdr.bestfree[0].length); /* * If the data block is now empty we can get rid of it * (usually). */ - if (longest == mp->m_dirblksize - (uint)sizeof(*hdr)) { + if (longest == mp->m_dirblksize - (uint)sizeof(data->hdr)) { /* * Try to punch out the data block. */ error = xfs_dir2_shrink_inode(args, db, dbp); if (error == 0) { dblk->bp = NULL; - hdr = NULL; + data = NULL; } /* * We can get ENOSPC if there's no space reservation. @@ -958,7 +1036,7 @@ xfs_dir2_leafn_remove( * If we got rid of the data block, we can eliminate that entry * in the free block. */ - if (hdr == NULL) { + if (data == NULL) { /* * One less used entry in the free table. */ @@ -974,8 +1052,7 @@ xfs_dir2_leafn_remove( int i; /* free entry index */ for (i = findex - 1; - i >= 0 && - free->bests[i] == cpu_to_be16(NULLDATAOFF); + i >= 0 && be16_to_cpu(free->bests[i]) == NULLDATAOFF; i--) continue; free->hdr.nvalid = cpu_to_be32(i + 1); @@ -1132,7 +1209,7 @@ xfs_dir2_leafn_toosmall( */ blk = &state->path.blk[state->path.active - 1]; info = blk->bp->data; - ASSERT(info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC); leaf = (xfs_dir2_leaf_t *)info; count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]); @@ -1191,7 +1268,7 @@ xfs_dir2_leafn_toosmall( count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); bytes = state->blocksize - (state->blocksize >> 2); leaf = bp->data; - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); bytes -= count * (uint)sizeof(leaf->ents[0]); /* @@ -1250,8 +1327,8 @@ xfs_dir2_leafn_unbalance( ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC); drop_leaf = drop_blk->bp->data; save_leaf = save_blk->bp->data; - ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); - ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); /* * If there are any stale leaf entries, take this opportunity * to purge them. @@ -1355,7 +1432,7 @@ xfs_dir2_node_addname_int( xfs_da_args_t *args, /* operation arguments */ xfs_da_state_blk_t *fblk) /* optional freespace block */ { - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *data; /* data block structure */ xfs_dir2_db_t dbno; /* data block number */ xfs_dabuf_t *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ @@ -1392,7 +1469,7 @@ xfs_dir2_node_addname_int( */ ifbno = fblk->blkno; free = fbp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); findex = fblk->index; /* * This means the free entry showed that the data block had @@ -1476,7 +1553,7 @@ xfs_dir2_node_addname_int( continue; } free = fbp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); findex = 0; } /* @@ -1603,12 +1680,12 @@ xfs_dir2_node_addname_int( free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC); free->hdr.firstdb = cpu_to_be32( (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) * - xfs_dir2_free_max_bests(mp)); + XFS_DIR2_MAX_FREE_BESTS(mp)); free->hdr.nvalid = 0; free->hdr.nused = 0; } else { free = fbp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); } /* @@ -1620,7 +1697,7 @@ xfs_dir2_node_addname_int( * freespace block, extend that table. */ if (findex >= be32_to_cpu(free->hdr.nvalid)) { - ASSERT(findex < xfs_dir2_free_max_bests(mp)); + ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp)); free->hdr.nvalid = cpu_to_be32(findex + 1); /* * Tag new entry so nused will go up. @@ -1631,7 +1708,7 @@ xfs_dir2_node_addname_int( * If this entry was for an empty data block * (this should always be true) then update the header. */ - if (free->bests[findex] == cpu_to_be16(NULLDATAOFF)) { + if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) { be32_add_cpu(&free->hdr.nused, 1); xfs_dir2_free_log_header(tp, fbp); } @@ -1640,8 +1717,8 @@ xfs_dir2_node_addname_int( * We haven't allocated the data entry yet so this will * change again. */ - hdr = dbp->data; - free->bests[findex] = hdr->bestfree[0].length; + data = dbp->data; + free->bests[findex] = data->hdr.bestfree[0].length; logfree = 1; } /* @@ -1666,21 +1743,21 @@ xfs_dir2_node_addname_int( xfs_da_buf_done(fbp); return error; } - hdr = dbp->data; + data = dbp->data; logfree = 0; } - ASSERT(be16_to_cpu(hdr->bestfree[0].length) >= length); + ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) >= length); /* * Point to the existing unused space. */ dup = (xfs_dir2_data_unused_t *) - ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset)); + ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset)); needscan = needlog = 0; /* * Mark the first part of the unused space, inuse for us. */ xfs_dir2_data_use_free(tp, dbp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length, &needlog, &needscan); /* * Fill in the new entry and log it. @@ -1690,13 +1767,13 @@ xfs_dir2_node_addname_int( dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); + *tagp = cpu_to_be16((char *)dep - (char *)data); xfs_dir2_data_log_entry(tp, dbp, dep); /* * Rescan the block for bestfree if needed. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(mp, data, &needlog); /* * Log the data block header if needed. */ @@ -1705,8 +1782,8 @@ xfs_dir2_node_addname_int( /* * If the freespace entry is now wrong, update it. */ - if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(hdr->bestfree[0].length)) { - free->bests[findex] = hdr->bestfree[0].length; + if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(data->hdr.bestfree[0].length)) { + free->bests[findex] = data->hdr.bestfree[0].length; logfree = 1; } /* @@ -1856,7 +1933,7 @@ xfs_dir2_node_replace( xfs_da_args_t *args) /* operation arguments */ { xfs_da_state_blk_t *blk; /* leaf block */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_t *data; /* data block structure */ xfs_dir2_data_entry_t *dep; /* data entry changed */ int error; /* error return value */ int i; /* btree level */ @@ -1900,10 +1977,10 @@ xfs_dir2_node_replace( /* * Point to the data entry. */ - hdr = state->extrablk.bp->data; - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); + data = state->extrablk.bp->data; + ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC); dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + + ((char *)data + xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address))); ASSERT(inum != be64_to_cpu(dep->inumber)); /* @@ -1967,7 +2044,7 @@ xfs_dir2_node_trim_free( return 0; } free = bp->data; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); /* * If there are used entries, there's nothing to do. */ diff --git a/trunk/fs/xfs/xfs_dir2_node.h b/trunk/fs/xfs/xfs_dir2_node.h new file mode 100644 index 000000000000..82dfe7147195 --- /dev/null +++ b/trunk/fs/xfs/xfs_dir2_node.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2000,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DIR2_NODE_H__ +#define __XFS_DIR2_NODE_H__ + +/* + * Directory version 2, btree node format structures + */ + +struct uio; +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_da_state; +struct xfs_da_state_blk; +struct xfs_inode; +struct xfs_trans; + +/* + * Offset of the freespace index. + */ +#define XFS_DIR2_FREE_SPACE 2 +#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) +#define XFS_DIR2_FREE_FIRSTDB(mp) \ + xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET) + +#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F */ + +typedef struct xfs_dir2_free_hdr { + __be32 magic; /* XFS_DIR2_FREE_MAGIC */ + __be32 firstdb; /* db of first entry */ + __be32 nvalid; /* count of valid entries */ + __be32 nused; /* count of used entries */ +} xfs_dir2_free_hdr_t; + +typedef struct xfs_dir2_free { + xfs_dir2_free_hdr_t hdr; /* block header */ + __be16 bests[1]; /* best free counts */ + /* unused entries are -1 */ +} xfs_dir2_free_t; + +#define XFS_DIR2_MAX_FREE_BESTS(mp) \ + (((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_free_hdr_t)) / \ + (uint)sizeof(xfs_dir2_data_off_t)) + +/* + * Convert data space db to the corresponding free db. + */ +static inline xfs_dir2_db_t +xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return (XFS_DIR2_FREE_FIRSTDB(mp) + (db) / XFS_DIR2_MAX_FREE_BESTS(mp)); +} + +/* + * Convert data space db to the corresponding index in a free db. + */ +static inline int +xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp)); +} + +extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, + struct xfs_dabuf *lbp); +extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count); +extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp, + struct xfs_da_args *args, int *indexp, + struct xfs_da_state *state); +extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp, + struct xfs_dabuf *leaf2_bp); +extern int xfs_dir2_leafn_split(struct xfs_da_state *state, + struct xfs_da_state_blk *oldblk, + struct xfs_da_state_blk *newblk); +extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action); +extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk, + struct xfs_da_state_blk *save_blk); +extern int xfs_dir2_node_addname(struct xfs_da_args *args); +extern int xfs_dir2_node_lookup(struct xfs_da_args *args); +extern int xfs_dir2_node_removename(struct xfs_da_args *args); +extern int xfs_dir2_node_replace(struct xfs_da_args *args); +extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo, + int *rvalp); + +#endif /* __XFS_DIR2_NODE_H__ */ diff --git a/trunk/fs/xfs/xfs_dir2_priv.h b/trunk/fs/xfs/xfs_dir2_priv.h deleted file mode 100644 index 067f403ecf8a..000000000000 --- a/trunk/fs/xfs/xfs_dir2_priv.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_PRIV_H__ -#define __XFS_DIR2_PRIV_H__ - -/* xfs_dir2.c */ -extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); -extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r); -extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r); -extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, - xfs_dir2_db_t *dbp); -extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, - struct xfs_dabuf *bp); -extern int xfs_dir_cilookup_result(struct xfs_da_args *args, - const unsigned char *name, int len); - -/* xfs_dir2_block.c */ -extern int xfs_dir2_block_addname(struct xfs_da_args *args); -extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent, - xfs_off_t *offset, filldir_t filldir); -extern int xfs_dir2_block_lookup(struct xfs_da_args *args); -extern int xfs_dir2_block_removename(struct xfs_da_args *args); -extern int xfs_dir2_block_replace(struct xfs_da_args *args); -extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, - struct xfs_dabuf *lbp, struct xfs_dabuf *dbp); - -/* xfs_dir2_data.c */ -#ifdef DEBUG -extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp); -#else -#define xfs_dir2_data_check(dp,bp) -#endif -extern struct xfs_dir2_data_free * -xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, - struct xfs_dir2_data_unused *dup, int *loghead); -extern void xfs_dir2_data_freescan(struct xfs_mount *mp, - struct xfs_dir2_data_hdr *hdr, int *loghead); -extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, - struct xfs_dabuf **bpp); -extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp, - struct xfs_dir2_data_entry *dep); -extern void xfs_dir2_data_log_header(struct xfs_trans *tp, - struct xfs_dabuf *bp); -extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp, - struct xfs_dir2_data_unused *dup); -extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp, - xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, - int *needlogp, int *needscanp); -extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp, - struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, - xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); - -/* xfs_dir2_leaf.c */ -extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, - struct xfs_dabuf *dbp); -extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); -extern void xfs_dir2_leaf_compact(struct xfs_da_args *args, - struct xfs_dabuf *bp); -extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp, - int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); -extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent, - size_t bufsize, xfs_off_t *offset, filldir_t filldir); -extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno, - struct xfs_dabuf **bpp, int magic); -extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp, - int first, int last); -extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp, - struct xfs_dabuf *bp); -extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); -extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); -extern int xfs_dir2_leaf_replace(struct xfs_da_args *args); -extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args, - struct xfs_dabuf *lbp); -extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args, - struct xfs_dabuf *lbp, xfs_dir2_db_t db); -extern struct xfs_dir2_leaf_entry * -xfs_dir2_leaf_find_entry(struct xfs_dir2_leaf *leaf, int index, int compact, - int lowstale, int highstale, - int *lfloglow, int *lfloghigh); -extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); - -/* xfs_dir2_node.c */ -extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, - struct xfs_dabuf *lbp); -extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count); -extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp, - struct xfs_da_args *args, int *indexp, - struct xfs_da_state *state); -extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp, - struct xfs_dabuf *leaf2_bp); -extern int xfs_dir2_leafn_split(struct xfs_da_state *state, - struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); -extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action); -extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state, - struct xfs_da_state_blk *drop_blk, - struct xfs_da_state_blk *save_blk); -extern int xfs_dir2_node_addname(struct xfs_da_args *args); -extern int xfs_dir2_node_lookup(struct xfs_da_args *args); -extern int xfs_dir2_node_removename(struct xfs_da_args *args); -extern int xfs_dir2_node_replace(struct xfs_da_args *args); -extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo, - int *rvalp); - -/* xfs_dir2_sf.c */ -extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp); -extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp, - struct xfs_dir2_sf_entry *sfep); -extern int xfs_dir2_block_sfsize(struct xfs_inode *dp, - struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp); -extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp, - int size, xfs_dir2_sf_hdr_t *sfhp); -extern int xfs_dir2_sf_addname(struct xfs_da_args *args); -extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); -extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent, - xfs_off_t *offset, filldir_t filldir); -extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); -extern int xfs_dir2_sf_removename(struct xfs_da_args *args); -extern int xfs_dir2_sf_replace(struct xfs_da_args *args); - -#endif /* __XFS_DIR2_PRIV_H__ */ diff --git a/trunk/fs/xfs/xfs_dir2_sf.c b/trunk/fs/xfs/xfs_dir2_sf.c index 79d05e84e296..b1bae6b1eed9 100644 --- a/trunk/fs/xfs/xfs_dir2_sf.c +++ b/trunk/fs/xfs/xfs_dir2_sf.c @@ -23,16 +23,18 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" +#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_error.h" -#include "xfs_dir2.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2_priv.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" #include "xfs_trace.h" /* @@ -57,82 +59,6 @@ static void xfs_dir2_sf_toino4(xfs_da_args_t *args); static void xfs_dir2_sf_toino8(xfs_da_args_t *args); #endif /* XFS_BIG_INUMS */ -/* - * Inode numbers in short-form directories can come in two versions, - * either 4 bytes or 8 bytes wide. These helpers deal with the - * two forms transparently by looking at the headers i8count field. - * - * For 64-bit inode number the most significant byte must be zero. - */ -static xfs_ino_t -xfs_dir2_sf_get_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_dir2_inou_t *from) -{ - if (hdr->i8count) - return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL; - else - return get_unaligned_be32(&from->i4.i); -} - -static void -xfs_dir2_sf_put_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_dir2_inou_t *to, - xfs_ino_t ino) -{ - ASSERT((ino & 0xff00000000000000ULL) == 0); - - if (hdr->i8count) - put_unaligned_be64(ino, &to->i8.i); - else - put_unaligned_be32(ino, &to->i4.i); -} - -xfs_ino_t -xfs_dir2_sf_get_parent_ino( - struct xfs_dir2_sf_hdr *hdr) -{ - return xfs_dir2_sf_get_ino(hdr, &hdr->parent); -} - -static void -xfs_dir2_sf_put_parent_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_ino_t ino) -{ - xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino); -} - -/* - * In short-form directory entries the inode numbers are stored at variable - * offset behind the entry name. The inode numbers may only be accessed - * through the helpers below. - */ -static xfs_dir2_inou_t * -xfs_dir2_sfe_inop( - struct xfs_dir2_sf_entry *sfep) -{ - return (xfs_dir2_inou_t *)&sfep->name[sfep->namelen]; -} - -xfs_ino_t -xfs_dir2_sfe_get_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return xfs_dir2_sf_get_ino(hdr, xfs_dir2_sfe_inop(sfep)); -} - -static void -xfs_dir2_sfe_put_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep, - xfs_ino_t ino) -{ - xfs_dir2_sf_put_ino(hdr, xfs_dir2_sfe_inop(sfep), ino); -} - /* * Given a block directory (dp/block), calculate its size as a shortform (sf) * directory and a header for the sf directory, if it will fit it the @@ -142,7 +68,7 @@ xfs_dir2_sfe_put_ino( int /* size for sf form */ xfs_dir2_block_sfsize( xfs_inode_t *dp, /* incore inode pointer */ - xfs_dir2_data_hdr_t *hdr, /* block directory data */ + xfs_dir2_block_t *block, /* block directory data */ xfs_dir2_sf_hdr_t *sfhp) /* output: header for sf form */ { xfs_dir2_dataptr_t addr; /* data entry address */ @@ -162,7 +88,7 @@ xfs_dir2_block_sfsize( mp = dp->i_mount; count = i8count = namelen = 0; - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); /* @@ -175,7 +101,7 @@ xfs_dir2_block_sfsize( * Calculate the pointer to the entry at hand. */ dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr)); + ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); /* * Detect . and .., so we can special-case them. * . is not included in sf directories. @@ -212,7 +138,7 @@ xfs_dir2_block_sfsize( */ sfhp->count = count; sfhp->i8count = i8count; - xfs_dir2_sf_put_parent_ino(sfhp, parent); + xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent); return size; } @@ -227,7 +153,7 @@ xfs_dir2_block_to_sf( int size, /* shortform directory size */ xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ { - xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_t *block; /* block structure */ xfs_dir2_block_tail_t *btp; /* block tail pointer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ @@ -238,7 +164,8 @@ xfs_dir2_block_to_sf( xfs_mount_t *mp; /* filesystem mount point */ char *ptr; /* current data pointer */ xfs_dir2_sf_entry_t *sfep; /* shortform entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */ + xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_ino_t temp; trace_xfs_dir2_block_to_sf(args); @@ -249,14 +176,13 @@ xfs_dir2_block_to_sf( * Make a copy of the block data, so we can shrink the inode * and add local data. */ - hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP); - memcpy(hdr, bp->data, mp->m_dirblksize); + block = kmem_alloc(mp->m_dirblksize, KM_SLEEP); + memcpy(block, bp->data, mp->m_dirblksize); logflags = XFS_ILOG_CORE; if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) { ASSERT(error != ENOSPC); goto out; } - /* * The buffer is now unconditionally gone, whether * xfs_dir2_shrink_inode worked or not. @@ -272,14 +198,14 @@ xfs_dir2_block_to_sf( /* * Copy the header into the newly allocate local space. */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); dp->i_d.di_size = size; /* * Set up to loop over the block's entries. */ - btp = xfs_dir2_block_tail_p(mp, hdr); - ptr = (char *)(hdr + 1); + btp = xfs_dir2_block_tail_p(mp, block); + ptr = (char *)block->u; endptr = (char *)xfs_dir2_block_leaf_p(btp); sfep = xfs_dir2_sf_firstentry(sfp); /* @@ -307,7 +233,7 @@ xfs_dir2_block_to_sf( else if (dep->namelen == 2 && dep->name[0] == '.' && dep->name[1] == '.') ASSERT(be64_to_cpu(dep->inumber) == - xfs_dir2_sf_get_parent_ino(sfp)); + xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent)); /* * Normal entry, copy it into shortform. */ @@ -315,11 +241,11 @@ xfs_dir2_block_to_sf( sfep->namelen = dep->namelen; xfs_dir2_sf_put_offset(sfep, (xfs_dir2_data_aoff_t) - ((char *)dep - (char *)hdr)); + ((char *)dep - (char *)block)); memcpy(sfep->name, dep->name, dep->namelen); - xfs_dir2_sfe_put_ino(sfp, sfep, - be64_to_cpu(dep->inumber)); - + temp = be64_to_cpu(dep->inumber); + xfs_dir2_sf_put_inumber(sfp, &temp, + xfs_dir2_sf_inumberp(sfep)); sfep = xfs_dir2_sf_nextentry(sfp, sfep); } ptr += xfs_dir2_data_entsize(dep->namelen); @@ -328,7 +254,7 @@ xfs_dir2_block_to_sf( xfs_dir2_sf_check(args); out: xfs_trans_log_inode(args->trans, dp, logflags); - kmem_free(hdr); + kmem_free(block); return error; } @@ -351,7 +277,7 @@ xfs_dir2_sf_addname( xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */ int old_isize; /* di_size before adding name */ int pick; /* which algorithm to use */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ trace_xfs_dir2_sf_addname(args); @@ -368,19 +294,19 @@ xfs_dir2_sf_addname( } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* * Compute entry (and change in) size. */ - add_entsize = xfs_dir2_sf_entsize(sfp, args->namelen); + add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen); incr_isize = add_entsize; objchange = 0; #if XFS_BIG_INUMS /* * Do we have to change to 8 byte inodes? */ - if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) { /* * Yes, adjust the entry size and the total size. */ @@ -388,7 +314,7 @@ xfs_dir2_sf_addname( (uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t); incr_isize += - (sfp->count + 2) * + (sfp->hdr.count + 2) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); objchange = 1; @@ -458,21 +384,21 @@ xfs_dir2_sf_addname_easy( { int byteoff; /* byte offset in sf dir */ xfs_inode_t *dp; /* incore directory inode */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ dp = args->dp; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; byteoff = (int)((char *)sfep - (char *)sfp); /* * Grow the in-inode space. */ - xfs_idata_realloc(dp, xfs_dir2_sf_entsize(sfp, args->namelen), + xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen), XFS_DATA_FORK); /* * Need to set up again due to realloc of the inode data. */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff); /* * Fill in the new entry. @@ -480,14 +406,15 @@ xfs_dir2_sf_addname_easy( sfep->namelen = args->namelen; xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber); + xfs_dir2_sf_put_inumber(sfp, &args->inumber, + xfs_dir2_sf_inumberp(sfep)); /* * Update the header and inode. */ - sfp->count++; + sfp->hdr.count++; #if XFS_BIG_INUMS if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) - sfp->i8count++; + sfp->hdr.i8count++; #endif dp->i_d.di_size = new_isize; xfs_dir2_sf_check(args); @@ -517,19 +444,19 @@ xfs_dir2_sf_addname_hard( xfs_dir2_data_aoff_t offset; /* current offset value */ int old_isize; /* previous di_size */ xfs_dir2_sf_entry_t *oldsfep; /* entry in original dir */ - xfs_dir2_sf_hdr_t *oldsfp; /* original shortform dir */ + xfs_dir2_sf_t *oldsfp; /* original shortform dir */ xfs_dir2_sf_entry_t *sfep; /* entry in new dir */ - xfs_dir2_sf_hdr_t *sfp; /* new shortform dir */ + xfs_dir2_sf_t *sfp; /* new shortform dir */ /* * Copy the old directory to the stack buffer. */ dp = args->dp; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; old_isize = (int)dp->i_d.di_size; buf = kmem_alloc(old_isize, KM_SLEEP); - oldsfp = (xfs_dir2_sf_hdr_t *)buf; + oldsfp = (xfs_dir2_sf_t *)buf; memcpy(oldsfp, sfp, old_isize); /* * Loop over the old directory finding the place we're going @@ -558,7 +485,7 @@ xfs_dir2_sf_addname_hard( /* * Reset the pointer since the buffer was reallocated. */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; /* * Copy the first part of the directory, including the header. */ @@ -571,11 +498,12 @@ xfs_dir2_sf_addname_hard( sfep->namelen = args->namelen; xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber); - sfp->count++; + xfs_dir2_sf_put_inumber(sfp, &args->inumber, + xfs_dir2_sf_inumberp(sfep)); + sfp->hdr.count++; #if XFS_BIG_INUMS if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) - sfp->i8count++; + sfp->hdr.i8count++; #endif /* * If there's more left to copy, do that. @@ -609,14 +537,14 @@ xfs_dir2_sf_addname_pick( xfs_mount_t *mp; /* filesystem mount point */ xfs_dir2_data_aoff_t offset; /* data block offset */ xfs_dir2_sf_entry_t *sfep; /* shortform entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ int size; /* entry's data size */ int used; /* data bytes used */ dp = args->dp; mp = dp->i_mount; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; size = xfs_dir2_data_entsize(args->namelen); offset = XFS_DIR2_DATA_FIRST_OFFSET; sfep = xfs_dir2_sf_firstentry(sfp); @@ -626,7 +554,7 @@ xfs_dir2_sf_addname_pick( * Keep track of data offset and whether we've seen a place * to insert the new entry. */ - for (i = 0; i < sfp->count; i++) { + for (i = 0; i < sfp->hdr.count; i++) { if (!holefit) holefit = offset + size <= xfs_dir2_sf_get_offset(sfep); offset = xfs_dir2_sf_get_offset(sfep) + @@ -638,7 +566,7 @@ xfs_dir2_sf_addname_pick( * was a data block (block form directory). */ used = offset + - (sfp->count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (sfp->hdr.count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) + (uint)sizeof(xfs_dir2_block_tail_t); /* * If it won't fit in a block form then we can't insert it, @@ -684,30 +612,30 @@ xfs_dir2_sf_check( xfs_ino_t ino; /* entry inode number */ int offset; /* data offset */ xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ dp = args->dp; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; offset = XFS_DIR2_DATA_FIRST_OFFSET; - ino = xfs_dir2_sf_get_parent_ino(sfp); + ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); i8count = ino > XFS_DIR2_MAX_SHORT_INUM; for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); - i < sfp->count; + i < sfp->hdr.count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset); - ino = xfs_dir2_sfe_get_ino(sfp, sfep); + ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); i8count += ino > XFS_DIR2_MAX_SHORT_INUM; offset = xfs_dir2_sf_get_offset(sfep) + xfs_dir2_data_entsize(sfep->namelen); } - ASSERT(i8count == sfp->i8count); + ASSERT(i8count == sfp->hdr.i8count); ASSERT(XFS_BIG_INUMS || i8count == 0); ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size); ASSERT(offset + - (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + (uint)sizeof(xfs_dir2_block_tail_t) <= dp->i_mount->m_dirblksize); } @@ -723,7 +651,7 @@ xfs_dir2_sf_create( { xfs_inode_t *dp; /* incore directory inode */ int i8count; /* parent inode is an 8-byte number */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ int size; /* directory size */ trace_xfs_dir2_sf_create(args); @@ -753,13 +681,13 @@ xfs_dir2_sf_create( /* * Fill in the header, */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - sfp->i8count = i8count; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp->hdr.i8count = i8count; /* * Now can put in the inode number, since i8count is set. */ - xfs_dir2_sf_put_parent_ino(sfp, pino); - sfp->count = 0; + xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent); + sfp->hdr.count = 0; dp->i_d.di_size = size; xfs_dir2_sf_check(args); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); @@ -777,7 +705,7 @@ xfs_dir2_sf_getdents( xfs_mount_t *mp; /* filesystem mount point */ xfs_dir2_dataptr_t off; /* current entry's offset */ xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ xfs_dir2_dataptr_t dot_offset; xfs_dir2_dataptr_t dotdot_offset; xfs_ino_t ino; @@ -796,9 +724,9 @@ xfs_dir2_sf_getdents( ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* * If the block number in the offset is out of range, we're done. @@ -831,7 +759,7 @@ xfs_dir2_sf_getdents( * Put .. entry unless we're starting past it. */ if (*offset <= dotdot_offset) { - ino = xfs_dir2_sf_get_parent_ino(sfp); + ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) { *offset = dotdot_offset & 0x7fffffff; return 0; @@ -842,7 +770,7 @@ xfs_dir2_sf_getdents( * Loop while there are more entries and put'ing works. */ sfep = xfs_dir2_sf_firstentry(sfp); - for (i = 0; i < sfp->count; i++) { + for (i = 0; i < sfp->hdr.count; i++) { off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, xfs_dir2_sf_get_offset(sfep)); @@ -851,7 +779,7 @@ xfs_dir2_sf_getdents( continue; } - ino = xfs_dir2_sfe_get_ino(sfp, sfep); + ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); if (filldir(dirent, (char *)sfep->name, sfep->namelen, off & 0x7fffffff, ino, DT_UNKNOWN)) { *offset = off & 0x7fffffff; @@ -877,7 +805,7 @@ xfs_dir2_sf_lookup( int i; /* entry index */ int error; xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ enum xfs_dacmp cmp; /* comparison result */ xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */ @@ -896,8 +824,8 @@ xfs_dir2_sf_lookup( } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* * Special case for . */ @@ -911,7 +839,7 @@ xfs_dir2_sf_lookup( */ if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { - args->inumber = xfs_dir2_sf_get_parent_ino(sfp); + args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); args->cmpresult = XFS_CMP_EXACT; return XFS_ERROR(EEXIST); } @@ -919,7 +847,7 @@ xfs_dir2_sf_lookup( * Loop over all the entries trying to match ours. */ ci_sfep = NULL; - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { /* * Compare name and if it's an exact match, return the inode @@ -930,7 +858,8 @@ xfs_dir2_sf_lookup( sfep->namelen); if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { args->cmpresult = cmp; - args->inumber = xfs_dir2_sfe_get_ino(sfp, sfep); + args->inumber = xfs_dir2_sf_get_inumber(sfp, + xfs_dir2_sf_inumberp(sfep)); if (cmp == XFS_CMP_EXACT) return XFS_ERROR(EEXIST); ci_sfep = sfep; @@ -962,7 +891,7 @@ xfs_dir2_sf_removename( int newsize; /* new inode size */ int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ trace_xfs_dir2_sf_removename(args); @@ -979,31 +908,32 @@ xfs_dir2_sf_removename( } ASSERT(dp->i_df.if_bytes == oldsize); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count)); + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* * Loop over the old directory entries. * Find the one we're deleting. */ - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { - ASSERT(xfs_dir2_sfe_get_ino(sfp, sfep) == - args->inumber); + ASSERT(xfs_dir2_sf_get_inumber(sfp, + xfs_dir2_sf_inumberp(sfep)) == + args->inumber); break; } } /* * Didn't find it. */ - if (i == sfp->count) + if (i == sfp->hdr.count) return XFS_ERROR(ENOENT); /* * Calculate sizes. */ byteoff = (int)((char *)sfep - (char *)sfp); - entsize = xfs_dir2_sf_entsize(sfp, args->namelen); + entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen); newsize = oldsize - entsize; /* * Copy the part if any after the removed entry, sliding it down. @@ -1014,22 +944,22 @@ xfs_dir2_sf_removename( /* * Fix up the header and file size. */ - sfp->count--; + sfp->hdr.count--; dp->i_d.di_size = newsize; /* * Reallocate, making it smaller. */ xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; #if XFS_BIG_INUMS /* * Are we changing inode number size? */ if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) { - if (sfp->i8count == 1) + if (sfp->hdr.i8count == 1) xfs_dir2_sf_toino4(args); else - sfp->i8count--; + sfp->hdr.i8count--; } #endif xfs_dir2_sf_check(args); @@ -1053,7 +983,7 @@ xfs_dir2_sf_replace( int i8elevated; /* sf_toino8 set i8count=1 */ #endif xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_t *sfp; /* shortform structure */ trace_xfs_dir2_sf_replace(args); @@ -1069,19 +999,19 @@ xfs_dir2_sf_replace( } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); #if XFS_BIG_INUMS /* * New inode number is large, and need to convert to 8-byte inodes. */ - if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) { int error; /* error return value */ int newsize; /* new inode size */ newsize = dp->i_df.if_bytes + - (sfp->count + 1) * + (sfp->hdr.count + 1) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); /* @@ -1099,7 +1029,7 @@ xfs_dir2_sf_replace( */ xfs_dir2_sf_toino8(args); i8elevated = 1; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; } else i8elevated = 0; #endif @@ -1110,32 +1040,34 @@ xfs_dir2_sf_replace( if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { #if XFS_BIG_INUMS || defined(DEBUG) - ino = xfs_dir2_sf_get_parent_ino(sfp); + ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); ASSERT(args->inumber != ino); #endif - xfs_dir2_sf_put_parent_ino(sfp, args->inumber); + xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent); } /* * Normal entry, look for the name. */ else { for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); - i < sfp->count; + i < sfp->hdr.count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { #if XFS_BIG_INUMS || defined(DEBUG) - ino = xfs_dir2_sfe_get_ino(sfp, sfep); + ino = xfs_dir2_sf_get_inumber(sfp, + xfs_dir2_sf_inumberp(sfep)); ASSERT(args->inumber != ino); #endif - xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber); + xfs_dir2_sf_put_inumber(sfp, &args->inumber, + xfs_dir2_sf_inumberp(sfep)); break; } } /* * Didn't find it. */ - if (i == sfp->count) { + if (i == sfp->hdr.count) { ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); #if XFS_BIG_INUMS if (i8elevated) @@ -1153,10 +1085,10 @@ xfs_dir2_sf_replace( /* * And the old count was one, so need to convert to small. */ - if (sfp->i8count == 1) + if (sfp->hdr.i8count == 1) xfs_dir2_sf_toino4(args); else - sfp->i8count--; + sfp->hdr.i8count--; } /* * See if the old number was small, the new number is large. @@ -1167,9 +1099,9 @@ xfs_dir2_sf_replace( * add to the i8count unless we just converted to 8-byte * inodes (which does an implied i8count = 1) */ - ASSERT(sfp->i8count != 0); + ASSERT(sfp->hdr.i8count != 0); if (!i8elevated) - sfp->i8count++; + sfp->hdr.i8count++; } #endif xfs_dir2_sf_check(args); @@ -1189,12 +1121,13 @@ xfs_dir2_sf_toino4( char *buf; /* old dir's buffer */ xfs_inode_t *dp; /* incore directory inode */ int i; /* entry index */ + xfs_ino_t ino; /* entry inode number */ int newsize; /* new inode size */ xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ - xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ + xfs_dir2_sf_t *oldsfp; /* old sf directory */ int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* new sf entry */ - xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ + xfs_dir2_sf_t *sfp; /* new sf directory */ trace_xfs_dir2_sf_toino4(args); @@ -1207,42 +1140,44 @@ xfs_dir2_sf_toino4( */ oldsize = dp->i_df.if_bytes; buf = kmem_alloc(oldsize, KM_SLEEP); - oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsfp->i8count == 1); + oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsfp->hdr.i8count == 1); memcpy(buf, oldsfp, oldsize); /* * Compute the new inode size. */ newsize = oldsize - - (oldsfp->count + 1) * + (oldsfp->hdr.count + 1) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); /* * Reset our pointers, the data has moved. */ - oldsfp = (xfs_dir2_sf_hdr_t *)buf; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + oldsfp = (xfs_dir2_sf_t *)buf; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; /* * Fill in the new header. */ - sfp->count = oldsfp->count; - sfp->i8count = 0; - xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); + sfp->hdr.count = oldsfp->hdr.count; + sfp->hdr.i8count = 0; + ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent); + xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent); /* * Copy the entries field by field. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), oldsfep = xfs_dir2_sf_firstentry(oldsfp); - i < sfp->count; + i < sfp->hdr.count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep), oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; sfep->offset = oldsfep->offset; memcpy(sfep->name, oldsfep->name, sfep->namelen); - xfs_dir2_sfe_put_ino(sfp, sfep, - xfs_dir2_sfe_get_ino(oldsfp, oldsfep)); + ino = xfs_dir2_sf_get_inumber(oldsfp, + xfs_dir2_sf_inumberp(oldsfep)); + xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep)); } /* * Clean up the inode. @@ -1264,12 +1199,13 @@ xfs_dir2_sf_toino8( char *buf; /* old dir's buffer */ xfs_inode_t *dp; /* incore directory inode */ int i; /* entry index */ + xfs_ino_t ino; /* entry inode number */ int newsize; /* new inode size */ xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ - xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ + xfs_dir2_sf_t *oldsfp; /* old sf directory */ int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* new sf entry */ - xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ + xfs_dir2_sf_t *sfp; /* new sf directory */ trace_xfs_dir2_sf_toino8(args); @@ -1282,42 +1218,44 @@ xfs_dir2_sf_toino8( */ oldsize = dp->i_df.if_bytes; buf = kmem_alloc(oldsize, KM_SLEEP); - oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsfp->i8count == 0); + oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsfp->hdr.i8count == 0); memcpy(buf, oldsfp, oldsize); /* * Compute the new inode size. */ newsize = oldsize + - (oldsfp->count + 1) * + (oldsfp->hdr.count + 1) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); /* * Reset our pointers, the data has moved. */ - oldsfp = (xfs_dir2_sf_hdr_t *)buf; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + oldsfp = (xfs_dir2_sf_t *)buf; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; /* * Fill in the new header. */ - sfp->count = oldsfp->count; - sfp->i8count = 1; - xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); + sfp->hdr.count = oldsfp->hdr.count; + sfp->hdr.i8count = 1; + ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent); + xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent); /* * Copy the entries field by field. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), oldsfep = xfs_dir2_sf_firstentry(oldsfp); - i < sfp->count; + i < sfp->hdr.count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep), oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; sfep->offset = oldsfep->offset; memcpy(sfep->name, oldsfep->name, sfep->namelen); - xfs_dir2_sfe_put_ino(sfp, sfep, - xfs_dir2_sfe_get_ino(oldsfp, oldsfep)); + ino = xfs_dir2_sf_get_inumber(oldsfp, + xfs_dir2_sf_inumberp(oldsfep)); + xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep)); } /* * Clean up the inode. diff --git a/trunk/fs/xfs/xfs_dir2_sf.h b/trunk/fs/xfs/xfs_dir2_sf.h new file mode 100644 index 000000000000..6ac44b550d39 --- /dev/null +++ b/trunk/fs/xfs/xfs_dir2_sf.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DIR2_SF_H__ +#define __XFS_DIR2_SF_H__ + +/* + * Directory layout when stored internal to an inode. + * + * Small directories are packed as tightly as possible so as to + * fit into the literal area of the inode. + */ + +struct uio; +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_dir2_block; +struct xfs_inode; +struct xfs_mount; +struct xfs_trans; + +/* + * Inode number stored as 8 8-bit values. + */ +typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t; + +/* + * Inode number stored as 4 8-bit values. + * Works a lot of the time, when all the inode numbers in a directory + * fit in 32 bits. + */ +typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t; + +typedef union { + xfs_dir2_ino8_t i8; + xfs_dir2_ino4_t i4; +} xfs_dir2_inou_t; +#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL) + +/* + * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t. + * Only need 16 bits, this is the byte offset into the single block form. + */ +typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t; + +/* + * The parent directory has a dedicated field, and the self-pointer must + * be calculated on the fly. + * + * Entries are packed toward the top as tightly as possible. The header + * and the elements must be memcpy'd out into a work area to get correct + * alignment for the inode number fields. + */ +typedef struct xfs_dir2_sf_hdr { + __uint8_t count; /* count of entries */ + __uint8_t i8count; /* count of 8-byte inode #s */ + xfs_dir2_inou_t parent; /* parent dir inode number */ +} __arch_pack xfs_dir2_sf_hdr_t; + +typedef struct xfs_dir2_sf_entry { + __uint8_t namelen; /* actual name length */ + xfs_dir2_sf_off_t offset; /* saved offset */ + __uint8_t name[1]; /* name, variable size */ + xfs_dir2_inou_t inumber; /* inode number, var. offset */ +} __arch_pack xfs_dir2_sf_entry_t; + +typedef struct xfs_dir2_sf { + xfs_dir2_sf_hdr_t hdr; /* shortform header */ + xfs_dir2_sf_entry_t list[1]; /* shortform entries */ +} xfs_dir2_sf_t; + +static inline int xfs_dir2_sf_hdr_size(int i8count) +{ + return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \ + ((i8count) == 0) * \ + ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); +} + +static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep) +{ + return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen]; +} + +static inline xfs_intino_t +xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from) +{ + return ((sfp)->hdr.i8count == 0 ? \ + (xfs_intino_t)XFS_GET_DIR_INO4((from)->i4) : \ + (xfs_intino_t)XFS_GET_DIR_INO8((from)->i8)); +} + +static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from, + xfs_dir2_inou_t *to) +{ + if ((sfp)->hdr.i8count == 0) + XFS_PUT_DIR_INO4(*(from), (to)->i4); + else + XFS_PUT_DIR_INO8(*(from), (to)->i8); +} + +static inline xfs_dir2_data_aoff_t +xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep) +{ + return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i); +} + +static inline void +xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off) +{ + INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off); +} + +static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len) +{ + return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \ + ((sfp)->hdr.i8count == 0) * \ + ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); +} + +static inline int +xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep) +{ + return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (sfep)->namelen - \ + ((sfp)->hdr.i8count == 0) * \ + ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); +} + +static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp) +{ + return ((xfs_dir2_sf_entry_t *) \ + ((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count))); +} + +static inline xfs_dir2_sf_entry_t * +xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep) +{ + return ((xfs_dir2_sf_entry_t *) \ + ((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep))); +} + +/* + * Functions. + */ +extern int xfs_dir2_block_sfsize(struct xfs_inode *dp, + struct xfs_dir2_block *block, + xfs_dir2_sf_hdr_t *sfhp); +extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp, + int size, xfs_dir2_sf_hdr_t *sfhp); +extern int xfs_dir2_sf_addname(struct xfs_da_args *args); +extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); +extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent, + xfs_off_t *offset, filldir_t filldir); +extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); +extern int xfs_dir2_sf_removename(struct xfs_da_args *args); +extern int xfs_dir2_sf_replace(struct xfs_da_args *args); + +#endif /* __XFS_DIR2_SF_H__ */ diff --git a/trunk/fs/xfs/xfs_fs.h b/trunk/fs/xfs/xfs_fs.h index c13fed8c394a..8f6fc1a96386 100644 --- a/trunk/fs/xfs/xfs_fs.h +++ b/trunk/fs/xfs/xfs_fs.h @@ -249,11 +249,6 @@ typedef struct xfs_fsop_resblks { #define XFS_MAX_LOG_BYTES \ ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) -/* Used for sanity checks on superblock */ -#define XFS_MAX_DBLOCKS(s) ((xfs_drfsbno_t)(s)->sb_agcount * (s)->sb_agblocks) -#define XFS_MIN_DBLOCKS(s) ((xfs_drfsbno_t)((s)->sb_agcount - 1) * \ - (s)->sb_agblocks + XFS_MIN_AG_BLOCKS) - /* * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT */ diff --git a/trunk/fs/xfs/xfs_ialloc.c b/trunk/fs/xfs/xfs_ialloc.c index dd5628bd8d0b..84ebeec16642 100644 --- a/trunk/fs/xfs/xfs_ialloc.c +++ b/trunk/fs/xfs/xfs_ialloc.c @@ -683,7 +683,7 @@ xfs_dialloc( return 0; } agi = XFS_BUF_TO_AGI(agbp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); + ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); } else { /* * Continue where we left off before. In this case, we @@ -691,7 +691,7 @@ xfs_dialloc( */ agbp = *IO_agbp; agi = XFS_BUF_TO_AGI(agbp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); + ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); ASSERT(be32_to_cpu(agi->agi_freecount) > 0); } mp = tp->t_mountp; @@ -775,7 +775,7 @@ xfs_dialloc( if (error) goto nextag; agi = XFS_BUF_TO_AGI(agbp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); + ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); } /* * Here with an allocation group that has a free inode. @@ -944,7 +944,7 @@ xfs_dialloc( * See if the most recently allocated block has any free. */ newino: - if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { + if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), XFS_LOOKUP_EQ, &i); if (error) @@ -1085,7 +1085,7 @@ xfs_difree( return error; } agi = XFS_BUF_TO_AGI(agbp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); + ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); ASSERT(agbno < be32_to_cpu(agi->agi_length)); /* * Initialize the cursor. @@ -1438,7 +1438,7 @@ xfs_ialloc_log_agi( xfs_agi_t *agi; /* allocation group header */ agi = XFS_BUF_TO_AGI(bp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); + ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); #endif /* * Compute byte offsets for the first and last fields. @@ -1492,7 +1492,7 @@ xfs_read_agi( /* * Validate the magic number of the agi block. */ - agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) && + agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) && be32_to_cpu(agi->agi_seqno) == agno; if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, diff --git a/trunk/fs/xfs/xfs_ialloc_btree.c b/trunk/fs/xfs/xfs_ialloc_btree.c index c6a75815aea0..16921f55c542 100644 --- a/trunk/fs/xfs/xfs_ialloc_btree.c +++ b/trunk/fs/xfs/xfs_ialloc_btree.c @@ -31,6 +31,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" @@ -204,6 +205,72 @@ xfs_inobt_recs_inorder( } #endif /* DEBUG */ +#ifdef XFS_BTREE_TRACE +ktrace_t *xfs_inobt_trace_buf; + +STATIC void +xfs_inobt_trace_enter( + struct xfs_btree_cur *cur, + const char *func, + char *s, + int type, + int line, + __psunsigned_t a0, + __psunsigned_t a1, + __psunsigned_t a2, + __psunsigned_t a3, + __psunsigned_t a4, + __psunsigned_t a5, + __psunsigned_t a6, + __psunsigned_t a7, + __psunsigned_t a8, + __psunsigned_t a9, + __psunsigned_t a10) +{ + ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type, + (void *)func, (void *)s, NULL, (void *)cur, + (void *)a0, (void *)a1, (void *)a2, (void *)a3, + (void *)a4, (void *)a5, (void *)a6, (void *)a7, + (void *)a8, (void *)a9, (void *)a10); +} + +STATIC void +xfs_inobt_trace_cursor( + struct xfs_btree_cur *cur, + __uint32_t *s0, + __uint64_t *l0, + __uint64_t *l1) +{ + *s0 = cur->bc_private.a.agno; + *l0 = cur->bc_rec.i.ir_startino; + *l1 = cur->bc_rec.i.ir_free; +} + +STATIC void +xfs_inobt_trace_key( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + __uint64_t *l0, + __uint64_t *l1) +{ + *l0 = be32_to_cpu(key->inobt.ir_startino); + *l1 = 0; +} + +STATIC void +xfs_inobt_trace_record( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + __uint64_t *l0, + __uint64_t *l1, + __uint64_t *l2) +{ + *l0 = be32_to_cpu(rec->inobt.ir_startino); + *l1 = be32_to_cpu(rec->inobt.ir_freecount); + *l2 = be64_to_cpu(rec->inobt.ir_free); +} +#endif /* XFS_BTREE_TRACE */ + static const struct xfs_btree_ops xfs_inobt_ops = { .rec_len = sizeof(xfs_inobt_rec_t), .key_len = sizeof(xfs_inobt_key_t), @@ -219,10 +286,18 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .init_rec_from_cur = xfs_inobt_init_rec_from_cur, .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, .key_diff = xfs_inobt_key_diff, + #ifdef DEBUG .keys_inorder = xfs_inobt_keys_inorder, .recs_inorder = xfs_inobt_recs_inorder, #endif + +#ifdef XFS_BTREE_TRACE + .trace_enter = xfs_inobt_trace_enter, + .trace_cursor = xfs_inobt_trace_cursor, + .trace_key = xfs_inobt_trace_key, + .trace_record = xfs_inobt_trace_record, +#endif }; /* diff --git a/trunk/fs/xfs/xfs_iget.c b/trunk/fs/xfs/xfs_iget.c index 7759812c1bbe..3631783b2b53 100644 --- a/trunk/fs/xfs/xfs_iget.c +++ b/trunk/fs/xfs/xfs_iget.c @@ -38,6 +38,7 @@ #include "xfs_trans_priv.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" +#include "xfs_btree_trace.h" #include "xfs_trace.h" diff --git a/trunk/fs/xfs/xfs_inode.c b/trunk/fs/xfs/xfs_inode.c index 3cc21ddf9f7e..a098a20ca63e 100644 --- a/trunk/fs/xfs/xfs_inode.c +++ b/trunk/fs/xfs/xfs_inode.c @@ -37,6 +37,7 @@ #include "xfs_buf_item.h" #include "xfs_inode_item.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" @@ -51,7 +52,7 @@ kmem_zone_t *xfs_ifork_zone; kmem_zone_t *xfs_inode_zone; /* - * Used in xfs_itruncate_extents(). This is the maximum number of extents + * Used in xfs_itruncate(). This is the maximum number of extents * freed from a file in a single transaction. */ #define XFS_ITRUNC_MAX_EXTENTS 2 @@ -166,7 +167,7 @@ xfs_imap_to_bp( dip = (xfs_dinode_t *)xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); - di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && + di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC && XFS_DINODE_GOOD_VERSION(dip->di_version); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, @@ -801,7 +802,7 @@ xfs_iread( * If we got something that isn't an inode it means someone * (nfs or dmi) has a stale handle. */ - if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) { + if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { #ifdef DEBUG xfs_alert(mp, "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)", @@ -1178,15 +1179,15 @@ xfs_ialloc( * at least do it for regular files. */ #ifdef DEBUG -STATIC void +void xfs_isize_check( - struct xfs_inode *ip, - xfs_fsize_t isize) + xfs_mount_t *mp, + xfs_inode_t *ip, + xfs_fsize_t isize) { - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t map_first; - int nimaps; - xfs_bmbt_irec_t imaps[2]; + xfs_fileoff_t map_first; + int nimaps; + xfs_bmbt_irec_t imaps[2]; if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) return; @@ -1213,14 +1214,168 @@ xfs_isize_check( ASSERT(nimaps == 1); ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); } -#else /* DEBUG */ -#define xfs_isize_check(ip, isize) #endif /* DEBUG */ /* - * Free up the underlying blocks past new_size. The new size must be smaller - * than the current size. This routine can be used both for the attribute and - * data fork, and does not modify the inode size, which is left to the caller. + * Calculate the last possible buffered byte in a file. This must + * include data that was buffered beyond the EOF by the write code. + * This also needs to deal with overflowing the xfs_fsize_t type + * which can happen for sizes near the limit. + * + * We also need to take into account any blocks beyond the EOF. It + * may be the case that they were buffered by a write which failed. + * In that case the pages will still be in memory, but the inode size + * will never have been updated. + */ +STATIC xfs_fsize_t +xfs_file_last_byte( + xfs_inode_t *ip) +{ + xfs_mount_t *mp; + xfs_fsize_t last_byte; + xfs_fileoff_t last_block; + xfs_fileoff_t size_last_block; + int error; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)); + + mp = ip->i_mount; + /* + * Only check for blocks beyond the EOF if the extents have + * been read in. This eliminates the need for the inode lock, + * and it also saves us from looking when it really isn't + * necessary. + */ + if (ip->i_df.if_flags & XFS_IFEXTENTS) { + xfs_ilock(ip, XFS_ILOCK_SHARED); + error = xfs_bmap_last_offset(NULL, ip, &last_block, + XFS_DATA_FORK); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + if (error) { + last_block = 0; + } + } else { + last_block = 0; + } + size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size); + last_block = XFS_FILEOFF_MAX(last_block, size_last_block); + + last_byte = XFS_FSB_TO_B(mp, last_block); + if (last_byte < 0) { + return XFS_MAXIOFFSET(mp); + } + last_byte += (1 << mp->m_writeio_log); + if (last_byte < 0) { + return XFS_MAXIOFFSET(mp); + } + return last_byte; +} + +/* + * Start the truncation of the file to new_size. The new size + * must be smaller than the current size. This routine will + * clear the buffer and page caches of file data in the removed + * range, and xfs_itruncate_finish() will remove the underlying + * disk blocks. + * + * The inode must have its I/O lock locked EXCLUSIVELY, and it + * must NOT have the inode lock held at all. This is because we're + * calling into the buffer/page cache code and we can't hold the + * inode lock when we do so. + * + * We need to wait for any direct I/Os in flight to complete before we + * proceed with the truncate. This is needed to prevent the extents + * being read or written by the direct I/Os from being removed while the + * I/O is in flight as there is no other method of synchronising + * direct I/O with the truncate operation. Also, because we hold + * the IOLOCK in exclusive mode, we prevent new direct I/Os from being + * started until the truncate completes and drops the lock. Essentially, + * the xfs_ioend_wait() call forms an I/O barrier that provides strict + * ordering between direct I/Os and the truncate operation. + * + * The flags parameter can have either the value XFS_ITRUNC_DEFINITE + * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used + * in the case that the caller is locking things out of order and + * may not be able to call xfs_itruncate_finish() with the inode lock + * held without dropping the I/O lock. If the caller must drop the + * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start() + * must be called again with all the same restrictions as the initial + * call. + */ +int +xfs_itruncate_start( + xfs_inode_t *ip, + uint flags, + xfs_fsize_t new_size) +{ + xfs_fsize_t last_byte; + xfs_off_t toss_start; + xfs_mount_t *mp; + int error = 0; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + ASSERT((new_size == 0) || (new_size <= ip->i_size)); + ASSERT((flags == XFS_ITRUNC_DEFINITE) || + (flags == XFS_ITRUNC_MAYBE)); + + mp = ip->i_mount; + + /* wait for the completion of any pending DIOs */ + if (new_size == 0 || new_size < ip->i_size) + xfs_ioend_wait(ip); + + /* + * Call toss_pages or flushinval_pages to get rid of pages + * overlapping the region being removed. We have to use + * the less efficient flushinval_pages in the case that the + * caller may not be able to finish the truncate without + * dropping the inode's I/O lock. Make sure + * to catch any pages brought in by buffers overlapping + * the EOF by searching out beyond the isize by our + * block size. We round new_size up to a block boundary + * so that we don't toss things on the same block as + * new_size but before it. + * + * Before calling toss_page or flushinval_pages, make sure to + * call remapf() over the same region if the file is mapped. + * This frees up mapped file references to the pages in the + * given range and for the flushinval_pages case it ensures + * that we get the latest mapped changes flushed out. + */ + toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); + toss_start = XFS_FSB_TO_B(mp, toss_start); + if (toss_start < 0) { + /* + * The place to start tossing is beyond our maximum + * file size, so there is no way that the data extended + * out there. + */ + return 0; + } + last_byte = xfs_file_last_byte(ip); + trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte); + if (last_byte > toss_start) { + if (flags & XFS_ITRUNC_DEFINITE) { + xfs_tosspages(ip, toss_start, + -1, FI_REMAPF_LOCKED); + } else { + error = xfs_flushinval_pages(ip, toss_start, + -1, FI_REMAPF_LOCKED); + } + } + +#ifdef DEBUG + if (new_size == 0) { + ASSERT(VN_CACHED(VFS_I(ip)) == 0); + } +#endif + return error; +} + +/* + * Shrink the file to the given new_size. The new size must be smaller than + * the current size. This will free up the underlying blocks in the removed + * range after a call to xfs_itruncate_start() or xfs_atruncate_start(). * * The transaction passed to this routine must have made a permanent log * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the @@ -1232,6 +1387,31 @@ xfs_isize_check( * will be "held" within the returned transaction. This routine does NOT * require any disk space to be reserved for it within the transaction. * + * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it + * indicates the fork which is to be truncated. For the attribute fork we only + * support truncation to size 0. + * + * We use the sync parameter to indicate whether or not the first transaction + * we perform might have to be synchronous. For the attr fork, it needs to be + * so if the unlink of the inode is not yet known to be permanent in the log. + * This keeps us from freeing and reusing the blocks of the attribute fork + * before the unlink of the inode becomes permanent. + * + * For the data fork, we normally have to run synchronously if we're being + * called out of the inactive path or we're being called out of the create path + * where we're truncating an existing file. Either way, the truncate needs to + * be sync so blocks don't reappear in the file with altered data in case of a + * crash. wsync filesystems can run the first case async because anything that + * shrinks the inode has to run sync so by the time we're called here from + * inactive, the inode size is permanently set to 0. + * + * Calls from the truncate path always need to be sync unless we're in a wsync + * filesystem and the file has already been unlinked. + * + * The caller is responsible for correctly setting the sync parameter. It gets + * too hard for us to guess here which path we're being called out of just + * based on inode state. + * * If we get an error, we must return with the inode locked and linked into the * current transaction. This keeps things simple for the higher level code, * because it always knows that the inode is locked and held in the transaction @@ -1239,30 +1419,124 @@ xfs_isize_check( * dirty on error so that transactions can be easily aborted if possible. */ int -xfs_itruncate_extents( - struct xfs_trans **tpp, - struct xfs_inode *ip, - int whichfork, - xfs_fsize_t new_size) +xfs_itruncate_finish( + xfs_trans_t **tp, + xfs_inode_t *ip, + xfs_fsize_t new_size, + int fork, + int sync) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp = *tpp; - struct xfs_trans *ntp; - xfs_bmap_free_t free_list; - xfs_fsblock_t first_block; - xfs_fileoff_t first_unmap_block; - xfs_fileoff_t last_block; - xfs_filblks_t unmap_len; - int committed; - int error = 0; - int done = 0; + xfs_fsblock_t first_block; + xfs_fileoff_t first_unmap_block; + xfs_fileoff_t last_block; + xfs_filblks_t unmap_len=0; + xfs_mount_t *mp; + xfs_trans_t *ntp; + int done; + int committed; + xfs_bmap_free_t free_list; + int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); - ASSERT(new_size <= ip->i_size); - ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); + ASSERT((new_size == 0) || (new_size <= ip->i_size)); + ASSERT(*tp != NULL); + ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); + ASSERT(ip->i_transp == *tp); ASSERT(ip->i_itemp != NULL); ASSERT(ip->i_itemp->ili_lock_flags == 0); - ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); + + + ntp = *tp; + mp = (ntp)->t_mountp; + ASSERT(! XFS_NOT_DQATTACHED(mp, ip)); + + /* + * We only support truncating the entire attribute fork. + */ + if (fork == XFS_ATTR_FORK) { + new_size = 0LL; + } + first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); + trace_xfs_itruncate_finish_start(ip, new_size); + + /* + * The first thing we do is set the size to new_size permanently + * on disk. This way we don't have to worry about anyone ever + * being able to look at the data being freed even in the face + * of a crash. What we're getting around here is the case where + * we free a block, it is allocated to another file, it is written + * to, and then we crash. If the new data gets written to the + * file but the log buffers containing the free and reallocation + * don't, then we'd end up with garbage in the blocks being freed. + * As long as we make the new_size permanent before actually + * freeing any blocks it doesn't matter if they get written to. + * + * The callers must signal into us whether or not the size + * setting here must be synchronous. There are a few cases + * where it doesn't have to be synchronous. Those cases + * occur if the file is unlinked and we know the unlink is + * permanent or if the blocks being truncated are guaranteed + * to be beyond the inode eof (regardless of the link count) + * and the eof value is permanent. Both of these cases occur + * only on wsync-mounted filesystems. In those cases, we're + * guaranteed that no user will ever see the data in the blocks + * that are being truncated so the truncate can run async. + * In the free beyond eof case, the file may wind up with + * more blocks allocated to it than it needs if we crash + * and that won't get fixed until the next time the file + * is re-opened and closed but that's ok as that shouldn't + * be too many blocks. + * + * However, we can't just make all wsync xactions run async + * because there's one call out of the create path that needs + * to run sync where it's truncating an existing file to size + * 0 whose size is > 0. + * + * It's probably possible to come up with a test in this + * routine that would correctly distinguish all the above + * cases from the values of the function parameters and the + * inode state but for sanity's sake, I've decided to let the + * layers above just tell us. It's simpler to correctly figure + * out in the layer above exactly under what conditions we + * can run async and I think it's easier for others read and + * follow the logic in case something has to be changed. + * cscope is your friend -- rcc. + * + * The attribute fork is much simpler. + * + * For the attribute fork we allow the caller to tell us whether + * the unlink of the inode that led to this call is yet permanent + * in the on disk log. If it is not and we will be freeing extents + * in this inode then we make the first transaction synchronous + * to make sure that the unlink is permanent by the time we free + * the blocks. + */ + if (fork == XFS_DATA_FORK) { + if (ip->i_d.di_nextents > 0) { + /* + * If we are not changing the file size then do + * not update the on-disk file size - we may be + * called from xfs_inactive_free_eofblocks(). If we + * update the on-disk file size and then the system + * crashes before the contents of the file are + * flushed to disk then the files may be full of + * holes (ie NULL files bug). + */ + if (ip->i_size != new_size) { + ip->i_d.di_size = new_size; + ip->i_size = new_size; + xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); + } + } + } else if (sync) { + ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC)); + if (ip->i_d.di_anextents > 0) + xfs_trans_set_sync(ntp); + } + ASSERT(fork == XFS_DATA_FORK || + (fork == XFS_ATTR_FORK && + ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) || + (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC))))); /* * Since it is possible for space to become allocated beyond @@ -1273,142 +1547,128 @@ xfs_itruncate_extents( * beyond the maximum file size (ie it is the same as last_block), * then there is nothing to do. */ - first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); - if (first_unmap_block == last_block) - return 0; - - ASSERT(first_unmap_block < last_block); - unmap_len = last_block - first_unmap_block + 1; + ASSERT(first_unmap_block <= last_block); + done = 0; + if (last_block == first_unmap_block) { + done = 1; + } else { + unmap_len = last_block - first_unmap_block + 1; + } while (!done) { + /* + * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi() + * will tell us whether it freed the entire range or + * not. If this is a synchronous mount (wsync), + * then we can tell bunmapi to keep all the + * transactions asynchronous since the unlink + * transaction that made this inode inactive has + * already hit the disk. There's no danger of + * the freed blocks being reused, there being a + * crash, and the reused blocks suddenly reappearing + * in this file with garbage in them once recovery + * runs. + */ xfs_bmap_init(&free_list, &first_block); - error = xfs_bunmapi(tp, ip, + error = xfs_bunmapi(ntp, ip, first_unmap_block, unmap_len, - xfs_bmapi_aflag(whichfork), + xfs_bmapi_aflag(fork), XFS_ITRUNC_MAX_EXTENTS, &first_block, &free_list, &done); - if (error) - goto out_bmap_cancel; + if (error) { + /* + * If the bunmapi call encounters an error, + * return to the caller where the transaction + * can be properly aborted. We just need to + * make sure we're not holding any resources + * that we were not when we came in. + */ + xfs_bmap_cancel(&free_list); + return error; + } /* * Duplicate the transaction that has the permanent * reservation and commit the old transaction. */ - error = xfs_bmap_finish(&tp, &free_list, &committed); + error = xfs_bmap_finish(tp, &free_list, &committed); + ntp = *tp; if (committed) - xfs_trans_ijoin(tp, ip); - if (error) - goto out_bmap_cancel; + xfs_trans_ijoin(ntp, ip); + + if (error) { + /* + * If the bmap finish call encounters an error, return + * to the caller where the transaction can be properly + * aborted. We just need to make sure we're not + * holding any resources that we were not when we came + * in. + * + * Aborting from this point might lose some blocks in + * the file system, but oh well. + */ + xfs_bmap_cancel(&free_list); + return error; + } if (committed) { /* * Mark the inode dirty so it will be logged and * moved forward in the log as part of every commit. */ - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); } - ntp = xfs_trans_dup(tp); - error = xfs_trans_commit(tp, 0); - tp = ntp; + ntp = xfs_trans_dup(ntp); + error = xfs_trans_commit(*tp, 0); + *tp = ntp; - xfs_trans_ijoin(tp, ip); + xfs_trans_ijoin(ntp, ip); if (error) - goto out; - + return error; /* - * Transaction commit worked ok so we can drop the extra ticket + * transaction commit worked ok so we can drop the extra ticket * reference that we gained in xfs_trans_dup() */ - xfs_log_ticket_put(tp->t_ticket); - error = xfs_trans_reserve(tp, 0, + xfs_log_ticket_put(ntp->t_ticket); + error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT); if (error) - goto out; + return error; } - -out: - *tpp = tp; - return error; -out_bmap_cancel: /* - * If the bunmapi call encounters an error, return to the caller where - * the transaction can be properly aborted. We just need to make sure - * we're not holding any resources that we were not when we came in. + * Only update the size in the case of the data fork, but + * always re-log the inode so that our permanent transaction + * can keep on rolling it forward in the log. */ - xfs_bmap_cancel(&free_list); - goto out; -} - -int -xfs_itruncate_data( - struct xfs_trans **tpp, - struct xfs_inode *ip, - xfs_fsize_t new_size) -{ - int error; - - trace_xfs_itruncate_data_start(ip, new_size); - - /* - * The first thing we do is set the size to new_size permanently on - * disk. This way we don't have to worry about anyone ever being able - * to look at the data being freed even in the face of a crash. - * What we're getting around here is the case where we free a block, it - * is allocated to another file, it is written to, and then we crash. - * If the new data gets written to the file but the log buffers - * containing the free and reallocation don't, then we'd end up with - * garbage in the blocks being freed. As long as we make the new_size - * permanent before actually freeing any blocks it doesn't matter if - * they get written to. - */ - if (ip->i_d.di_nextents > 0) { + if (fork == XFS_DATA_FORK) { + xfs_isize_check(mp, ip, new_size); /* - * If we are not changing the file size then do not update - * the on-disk file size - we may be called from - * xfs_inactive_free_eofblocks(). If we update the on-disk - * file size and then the system crashes before the contents - * of the file are flushed to disk then the files may be - * full of holes (ie NULL files bug). + * If we are not changing the file size then do + * not update the on-disk file size - we may be + * called from xfs_inactive_free_eofblocks(). If we + * update the on-disk file size and then the system + * crashes before the contents of the file are + * flushed to disk then the files may be full of + * holes (ie NULL files bug). */ if (ip->i_size != new_size) { ip->i_d.di_size = new_size; ip->i_size = new_size; - xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); } } - - error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size); - if (error) - return error; - - /* - * If we are not changing the file size then do not update the on-disk - * file size - we may be called from xfs_inactive_free_eofblocks(). - * If we update the on-disk file size and then the system crashes - * before the contents of the file are flushed to disk then the files - * may be full of holes (ie NULL files bug). - */ - xfs_isize_check(ip, new_size); - if (ip->i_size != new_size) { - ip->i_d.di_size = new_size; - ip->i_size = new_size; - } - - ASSERT(new_size != 0 || ip->i_delayed_blks == 0); - ASSERT(new_size != 0 || ip->i_d.di_nextents == 0); - - /* - * Always re-log the inode so that our permanent transaction can keep - * on rolling it forward in the log. - */ - xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); - - trace_xfs_itruncate_data_end(ip, new_size); + xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); + ASSERT((new_size != 0) || + (fork == XFS_ATTR_FORK) || + (ip->i_delayed_blks == 0)); + ASSERT((new_size != 0) || + (fork == XFS_ATTR_FORK) || + (ip->i_d.di_nextents == 0)); + trace_xfs_itruncate_finish_end(ip, new_size); return 0; } @@ -1434,6 +1694,7 @@ xfs_iunlink( ASSERT(ip->i_d.di_nlink == 0); ASSERT(ip->i_d.di_mode != 0); + ASSERT(ip->i_transp == tp); mp = tp->t_mountp; @@ -1456,7 +1717,7 @@ xfs_iunlink( ASSERT(agi->agi_unlinked[bucket_index]); ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); - if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) { + if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) { /* * There is already another inode in the bucket we need * to add ourselves to. Add us at the front of the list. @@ -1467,7 +1728,8 @@ xfs_iunlink( if (error) return error; - ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO)); + ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO); + /* both on-disk, don't endian flip twice */ dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; offset = ip->i_imap.im_boffset + offsetof(xfs_dinode_t, di_next_unlinked); @@ -1532,7 +1794,7 @@ xfs_iunlink_remove( agino = XFS_INO_TO_AGINO(mp, ip->i_ino); ASSERT(agino != 0); bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; - ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)); + ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO); ASSERT(agi->agi_unlinked[bucket_index]); if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { @@ -1697,7 +1959,7 @@ xfs_ifree_cluster( * stale first, we will not attempt to lock them in the loop * below as the XFS_ISTALE flag will be set. */ - lip = bp->b_fspriv; + lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); while (lip) { if (lip->li_type == XFS_LI_INODE) { iip = (xfs_inode_log_item_t *)lip; @@ -1824,6 +2086,7 @@ xfs_ifree( xfs_buf_t *ibp; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(ip->i_transp == tp); ASSERT(ip->i_d.di_nlink == 0); ASSERT(ip->i_d.di_nextents == 0); ASSERT(ip->i_d.di_anextents == 0); @@ -2470,7 +2733,7 @@ xfs_iflush_cluster( * mark the buffer as an error and call them. Otherwise * mark it as stale and brelse. */ - if (bp->b_iodone) { + if (XFS_BUF_IODONE_FUNC(bp)) { XFS_BUF_UNDONE(bp); XFS_BUF_STALE(bp); XFS_BUF_ERROR(bp,EIO); @@ -2657,7 +2920,7 @@ xfs_iflush_int( */ xfs_synchronize_times(ip); - if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), + if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", @@ -2810,8 +3073,8 @@ xfs_iflush_int( */ xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); - ASSERT(bp->b_fspriv != NULL); - ASSERT(bp->b_iodone != NULL); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); } else { /* * We're flushing an inode which is not in the AIL and has diff --git a/trunk/fs/xfs/xfs_inode.h b/trunk/fs/xfs/xfs_inode.h index a97644ab945a..964cfea77686 100644 --- a/trunk/fs/xfs/xfs_inode.h +++ b/trunk/fs/xfs/xfs_inode.h @@ -241,6 +241,7 @@ typedef struct xfs_inode { xfs_ifork_t i_df; /* data fork */ /* Transaction and locking information. */ + struct xfs_trans *i_transp; /* ptr to owning transaction*/ struct xfs_inode_log_item *i_itemp; /* logging information */ mrlock_t i_lock; /* inode lock */ mrlock_t i_iolock; /* inode IO lock */ @@ -456,6 +457,16 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) extern struct lock_class_key xfs_iolock_reclaimable; +/* + * Flags for xfs_itruncate_start(). + */ +#define XFS_ITRUNC_DEFINITE 0x1 +#define XFS_ITRUNC_MAYBE 0x2 + +#define XFS_ITRUNC_FLAGS \ + { XFS_ITRUNC_DEFINITE, "DEFINITE" }, \ + { XFS_ITRUNC_MAYBE, "MAYBE" } + /* * For multiple groups support: if S_ISGID bit is set in the parent * directory, group of new file is set to that of the parent, and @@ -490,10 +501,9 @@ uint xfs_ip2xflags(struct xfs_inode *); uint xfs_dic2xflags(struct xfs_dinode *); int xfs_ifree(struct xfs_trans *, xfs_inode_t *, struct xfs_bmap_free *); -int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *, - int, xfs_fsize_t); -int xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *, - xfs_fsize_t); +int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); +int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, + xfs_fsize_t, int, int); int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); @@ -569,6 +579,13 @@ void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int); #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) +#ifdef DEBUG +void xfs_isize_check(struct xfs_mount *, struct xfs_inode *, + xfs_fsize_t); +#else /* DEBUG */ +#define xfs_isize_check(mp, ip, isize) +#endif /* DEBUG */ + #if defined(DEBUG) void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #else diff --git a/trunk/fs/xfs/xfs_inode_item.c b/trunk/fs/xfs/xfs_inode_item.c index 588406dc6a35..b1e88d56069c 100644 --- a/trunk/fs/xfs/xfs_inode_item.c +++ b/trunk/fs/xfs/xfs_inode_item.c @@ -632,8 +632,13 @@ xfs_inode_item_unlock( struct xfs_inode *ip = iip->ili_inode; unsigned short lock_flags; - ASSERT(ip->i_itemp != NULL); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(iip->ili_inode->i_itemp != NULL); + ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); + + /* + * Clear the transaction pointer in the inode. + */ + ip->i_transp = NULL; /* * If the inode needed a separate buffer with which to log @@ -659,8 +664,8 @@ xfs_inode_item_unlock( lock_flags = iip->ili_lock_flags; iip->ili_lock_flags = 0; if (lock_flags) { - xfs_iunlock(ip, lock_flags); - IRELE(ip); + xfs_iunlock(iip->ili_inode, lock_flags); + IRELE(iip->ili_inode); } } @@ -874,7 +879,7 @@ xfs_iflush_done( * Scan the buffer IO completions for other inodes being completed and * attach them to the current inode log item. */ - blip = bp->b_fspriv; + blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); prev = NULL; while (blip != NULL) { if (lip->li_cb != xfs_iflush_done) { @@ -886,7 +891,7 @@ xfs_iflush_done( /* remove from list */ next = blip->li_bio_list; if (!prev) { - bp->b_fspriv = next; + XFS_BUF_SET_FSPRIVATE(bp, next); } else { prev->li_bio_list = next; } diff --git a/trunk/fs/xfs/xfs_inum.h b/trunk/fs/xfs/xfs_inum.h index b253c0ea5bec..b8e4ee4e89a4 100644 --- a/trunk/fs/xfs/xfs_inum.h +++ b/trunk/fs/xfs/xfs_inum.h @@ -28,6 +28,17 @@ typedef __uint32_t xfs_agino_t; /* within allocation grp inode number */ +/* + * Useful inode bits for this kernel. + * Used in some places where having 64-bits in the 32-bit kernels + * costs too much. + */ +#if XFS_BIG_INUMS +typedef xfs_ino_t xfs_intino_t; +#else +typedef __uint32_t xfs_intino_t; +#endif + #define NULLFSINO ((xfs_ino_t)-1) #define NULLAGINO ((xfs_agino_t)-1) diff --git a/trunk/fs/xfs/xfs_log.c b/trunk/fs/xfs/xfs_log.c index 06ff8437ed8e..41d5b8f2bf92 100644 --- a/trunk/fs/xfs/xfs_log.c +++ b/trunk/fs/xfs/xfs_log.c @@ -871,9 +871,15 @@ xlog_space_left( void xlog_iodone(xfs_buf_t *bp) { - xlog_in_core_t *iclog = bp->b_fspriv; - xlog_t *l = iclog->ic_log; - int aborted = 0; + xlog_in_core_t *iclog; + xlog_t *l; + int aborted; + + iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); + ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2); + XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); + aborted = 0; + l = iclog->ic_log; /* * Race to shutdown the filesystem if we see an error. @@ -1050,9 +1056,10 @@ xlog_alloc_log(xfs_mount_t *mp, bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); if (!bp) goto out_free_log; - bp->b_iodone = xlog_iodone; + XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); + XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(xfs_buf_islocked(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); log->l_xbuf = bp; spin_lock_init(&log->l_icloglock); @@ -1083,8 +1090,10 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_iclog_size, 0); if (!bp) goto out_free_iclog; - - bp->b_iodone = xlog_iodone; + if (!XFS_BUF_CPSEMA(bp)) + ASSERT(0); + XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); + XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); iclog->ic_bp = bp; iclog->ic_data = bp->b_addr; #ifdef DEBUG @@ -1109,7 +1118,7 @@ xlog_alloc_log(xfs_mount_t *mp, iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); - ASSERT(xfs_buf_islocked(iclog->ic_bp)); + ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); init_waitqueue_head(&iclog->ic_force_wait); init_waitqueue_head(&iclog->ic_write_wait); @@ -1245,8 +1254,9 @@ STATIC int xlog_bdstrat( struct xfs_buf *bp) { - struct xlog_in_core *iclog = bp->b_fspriv; + struct xlog_in_core *iclog; + iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); if (iclog->ic_state & XLOG_STATE_IOERROR) { XFS_BUF_ERROR(bp, EIO); XFS_BUF_STALE(bp); @@ -1259,6 +1269,7 @@ xlog_bdstrat( return 0; } + bp->b_flags |= _XBF_RUN_QUEUES; xfs_buf_iorequest(bp); return 0; } @@ -1340,6 +1351,8 @@ xlog_sync(xlog_t *log, } bp = iclog->ic_bp; + ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1); + XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); XFS_STATS_ADD(xs_log_blocks, BTOBB(count)); @@ -1353,28 +1366,22 @@ xlog_sync(xlog_t *log, iclog->ic_bwritecnt = 1; } XFS_BUF_SET_COUNT(bp, count); - bp->b_fspriv = iclog; + XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */ XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); - bp->b_flags |= XBF_SYNCIO; + bp->b_flags |= XBF_LOG_BUFFER; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { - bp->b_flags |= XBF_FUA; - /* - * Flush the data device before flushing the log to make - * sure all meta data written back from the AIL actually made - * it to disk before stamping the new log tail LSN into the - * log buffer. For an external log we need to issue the - * flush explicitly, and unfortunately synchronously here; - * for an internal log we can simply use the block layer - * state machine for preflushes. + * If we have an external log device, flush the data device + * before flushing the log to make sure all meta data + * written back from the AIL actually made it to disk + * before writing out the new log tail LSN in the log buffer. */ if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); - else - bp->b_flags |= XBF_FLUSH; + XFS_BUF_ORDERED(bp); } ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); @@ -1397,16 +1404,19 @@ xlog_sync(xlog_t *log, } if (split) { bp = iclog->ic_log->l_xbuf; + ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == + (unsigned long)1); + XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+ (__psint_t)count), split); - bp->b_fspriv = iclog; + XFS_BUF_SET_FSPRIVATE(bp, iclog); XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); - bp->b_flags |= XBF_SYNCIO; + bp->b_flags |= XBF_LOG_BUFFER; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) - bp->b_flags |= XBF_FUA; + XFS_BUF_ORDERED(bp); dptr = XFS_BUF_PTR(bp); /* * Bump the cycle numbers at the start of each block @@ -3511,13 +3521,13 @@ xlog_verify_iclog(xlog_t *log, spin_unlock(&log->l_icloglock); /* check log magic numbers */ - if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) + if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) xfs_emerg(log->l_mp, "%s: invalid magic num", __func__); ptr = (xfs_caddr_t) &iclog->ic_header; for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; ptr += BBSIZE) { - if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) + if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) xfs_emerg(log->l_mp, "%s: unexpected magic num", __func__); } diff --git a/trunk/fs/xfs/xfs_log_recover.c b/trunk/fs/xfs/xfs_log_recover.c index 8fe4206de057..04142caedb2b 100644 --- a/trunk/fs/xfs/xfs_log_recover.c +++ b/trunk/fs/xfs/xfs_log_recover.c @@ -91,8 +91,6 @@ xlog_get_bp( xlog_t *log, int nbblks) { - struct xfs_buf *bp; - if (!xlog_buf_bbcount_valid(log, nbblks)) { xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", nbblks); @@ -120,10 +118,8 @@ xlog_get_bp( nbblks += log->l_sectBBsize; nbblks = round_up(nbblks, log->l_sectBBsize); - bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, BBTOB(nbblks), 0); - if (bp) - xfs_buf_unlock(bp); - return bp; + return xfs_buf_get_uncached(log->l_mp->m_logdev_targp, + BBTOB(nbblks), 0); } STATIC void @@ -268,7 +264,7 @@ xlog_bwrite( XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); XFS_BUF_HOLD(bp); - xfs_buf_lock(bp); + XFS_BUF_PSEMA(bp, PRIBIO); XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); @@ -304,14 +300,14 @@ xlog_header_check_recover( xfs_mount_t *mp, xlog_rec_header_t *head) { - ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); + ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); /* * IRIX doesn't write the h_fmt field and leaves it zeroed * (XLOG_FMT_UNKNOWN). This stops us from trying to recover * a dirty log created in IRIX. */ - if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) { + if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { xfs_warn(mp, "dirty log written in incompatible format - can't recover"); xlog_header_check_dump(mp, head); @@ -337,7 +333,7 @@ xlog_header_check_mount( xfs_mount_t *mp, xlog_rec_header_t *head) { - ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); + ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); if (uuid_is_nil(&head->h_fs_uuid)) { /* @@ -371,7 +367,7 @@ xlog_recover_iodone( xfs_force_shutdown(bp->b_target->bt_mount, SHUTDOWN_META_IO_ERROR); } - bp->b_iodone = NULL; + XFS_BUF_CLR_IODONE_FUNC(bp); xfs_buf_ioend(bp, 0); } @@ -538,7 +534,7 @@ xlog_find_verify_log_record( head = (xlog_rec_header_t *)offset; - if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) + if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno)) break; if (!smallmem) @@ -920,7 +916,7 @@ xlog_find_tail( if (error) goto done; - if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { + if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { found = 1; break; } @@ -937,8 +933,8 @@ xlog_find_tail( if (error) goto done; - if (*(__be32 *)offset == - cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { + if (XLOG_HEADER_MAGIC_NUM == + be32_to_cpu(*(__be32 *)offset)) { found = 2; break; } @@ -1951,7 +1947,7 @@ xfs_qm_dqcheck( * This is all fine; things are still consistent, and we haven't lost * any quota information. Just don't complain about bad dquot blks. */ - if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { + if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { if (flags & XFS_QMOPT_DOWARN) xfs_alert(mp, "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", @@ -2178,7 +2174,7 @@ xlog_recover_buffer_pass2( error = xfs_bwrite(mp, bp); } else { ASSERT(bp->b_target->bt_mount == mp); - bp->b_iodone = xlog_recover_iodone; + XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); } @@ -2242,7 +2238,7 @@ xlog_recover_inode_pass2( * Make sure the place we're flushing out to really looks * like an inode! */ - if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { + if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { xfs_buf_relse(bp); xfs_alert(mp, "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", @@ -2438,7 +2434,7 @@ xlog_recover_inode_pass2( write_inode_buffer: ASSERT(bp->b_target->bt_mount == mp); - bp->b_iodone = xlog_recover_iodone; + XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); error: if (need_free) @@ -2560,7 +2556,7 @@ xlog_recover_dquot_pass2( ASSERT(dq_f->qlf_size == 2); ASSERT(bp->b_target->bt_mount == mp); - bp->b_iodone = xlog_recover_iodone; + XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); return (0); @@ -3299,7 +3295,7 @@ xlog_valid_rec_header( { int hlen; - if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) { + if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) { XFS_ERROR_REPORT("xlog_valid_rec_header(1)", XFS_ERRLEVEL_LOW, log->l_mp); return XFS_ERROR(EFSCORRUPTED); diff --git a/trunk/fs/xfs/xfs_mount.c b/trunk/fs/xfs/xfs_mount.c index 7f25245da289..b49b82363d20 100644 --- a/trunk/fs/xfs/xfs_mount.c +++ b/trunk/fs/xfs/xfs_mount.c @@ -348,7 +348,7 @@ xfs_mount_validate_sb( } /* - * More sanity checking. Most of these were stolen directly from + * More sanity checking. These were stolen directly from * xfs_repair. */ if (unlikely( @@ -371,13 +371,23 @@ xfs_mount_validate_sb( (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || - (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */) || - sbp->sb_dblocks == 0 || - sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || - sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) { + (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { if (loud) - XFS_CORRUPTION_ERROR("SB sanity check failed", - XFS_ERRLEVEL_LOW, mp, sbp); + xfs_warn(mp, "SB sanity check 1 failed"); + return XFS_ERROR(EFSCORRUPTED); + } + + /* + * Sanity check AG count, size fields against data size field + */ + if (unlikely( + sbp->sb_dblocks == 0 || + sbp->sb_dblocks > + (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || + sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * + sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { + if (loud) + xfs_warn(mp, "SB sanity check 2 failed"); return XFS_ERROR(EFSCORRUPTED); } @@ -854,8 +864,7 @@ xfs_update_alignment(xfs_mount_t *mp) if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || (BBTOB(mp->m_swidth) & mp->m_blockmask)) { if (mp->m_flags & XFS_MOUNT_RETERR) { - xfs_warn(mp, "alignment check failed: " - "(sunit/swidth vs. blocksize)"); + xfs_warn(mp, "alignment check 1 failed"); return XFS_ERROR(EINVAL); } mp->m_dalign = mp->m_swidth = 0; @@ -866,8 +875,6 @@ xfs_update_alignment(xfs_mount_t *mp) mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { if (mp->m_flags & XFS_MOUNT_RETERR) { - xfs_warn(mp, "alignment check failed: " - "(sunit/swidth vs. ag size)"); return XFS_ERROR(EINVAL); } xfs_warn(mp, @@ -882,8 +889,8 @@ xfs_update_alignment(xfs_mount_t *mp) mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); } else { if (mp->m_flags & XFS_MOUNT_RETERR) { - xfs_warn(mp, "alignment check failed: " - "sunit(%d) less than bsize(%d)", + xfs_warn(mp, + "stripe alignment turned off: sunit(%d) less than bsize(%d)", mp->m_dalign, mp->m_blockmask +1); return XFS_ERROR(EINVAL); @@ -1089,6 +1096,10 @@ xfs_mount_reset_sbqflags( if (mp->m_flags & XFS_MOUNT_RDONLY) return 0; +#ifdef QUOTADEBUG + xfs_notice(mp, "Writing superblock quota changes"); +#endif + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, XFS_DEFAULT_LOG_COUNT); @@ -1521,7 +1532,7 @@ xfs_unmountfs( xfs_warn(mp, "Unable to free reserved block pool. " "Freespace may not be correct on next mount."); - error = xfs_log_sbcount(mp); + error = xfs_log_sbcount(mp, 1); if (error) xfs_warn(mp, "Unable to update superblock counters. " "Freespace may not be correct on next mount."); @@ -1557,14 +1568,18 @@ xfs_fs_writable(xfs_mount_t *mp) /* * xfs_log_sbcount * - * Sync the superblock counters to disk. + * Called either periodically to keep the on disk superblock values + * roughly up to date or from unmount to make sure the values are + * correct on a clean unmount. * * Note this code can be called during the process of freezing, so - * we may need to use the transaction allocator which does not + * we may need to use the transaction allocator which does not not * block when the transaction subsystem is in its frozen state. */ int -xfs_log_sbcount(xfs_mount_t *mp) +xfs_log_sbcount( + xfs_mount_t *mp, + uint sync) { xfs_trans_t *tp; int error; @@ -1590,7 +1605,8 @@ xfs_log_sbcount(xfs_mount_t *mp) } xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); - xfs_trans_set_sync(tp); + if (sync) + xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); return error; } @@ -1925,19 +1941,22 @@ xfs_mod_incore_sb_batch( * the superblock buffer if it can be locked without sleeping. * If it can't then we'll return NULL. */ -struct xfs_buf * +xfs_buf_t * xfs_getsb( - struct xfs_mount *mp, - int flags) + xfs_mount_t *mp, + int flags) { - struct xfs_buf *bp = mp->m_sb_bp; + xfs_buf_t *bp; - if (!xfs_buf_trylock(bp)) { - if (flags & XBF_TRYLOCK) + ASSERT(mp->m_sb_bp != NULL); + bp = mp->m_sb_bp; + if (flags & XBF_TRYLOCK) { + if (!XFS_BUF_CPSEMA(bp)) { return NULL; - xfs_buf_lock(bp); + } + } else { + XFS_BUF_PSEMA(bp, PRIBIO); } - XFS_BUF_HOLD(bp); ASSERT(XFS_BUF_ISDONE(bp)); return bp; diff --git a/trunk/fs/xfs/xfs_mount.h b/trunk/fs/xfs/xfs_mount.h index bb24dac42a25..3d68bb267c5f 100644 --- a/trunk/fs/xfs/xfs_mount.h +++ b/trunk/fs/xfs/xfs_mount.h @@ -371,7 +371,7 @@ typedef struct xfs_mod_sb { int64_t msb_delta; /* Change to make to specified field */ } xfs_mod_sb_t; -extern int xfs_log_sbcount(xfs_mount_t *); +extern int xfs_log_sbcount(xfs_mount_t *, uint); extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); extern int xfs_mountfs(xfs_mount_t *mp); diff --git a/trunk/fs/xfs/xfs_trans.c b/trunk/fs/xfs/xfs_trans.c index efc147f0e9b6..c83f63b33aae 100644 --- a/trunk/fs/xfs/xfs_trans.c +++ b/trunk/fs/xfs/xfs_trans.c @@ -1426,7 +1426,6 @@ xfs_trans_committed( static inline void xfs_log_item_batch_insert( struct xfs_ail *ailp, - struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t commit_lsn) @@ -1435,7 +1434,7 @@ xfs_log_item_batch_insert( spin_lock(&ailp->xa_lock); /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ - xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn); + xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn); for (i = 0; i < nr_items; i++) IOP_UNPIN(log_items[i], 0); @@ -1453,13 +1452,6 @@ xfs_log_item_batch_insert( * as an iclog write error even though we haven't started any IO yet. Hence in * this case all we need to do is IOP_COMMITTED processing, followed by an * IOP_UNPIN(aborted) call. - * - * The AIL cursor is used to optimise the insert process. If commit_lsn is not - * at the end of the AIL, the insert cursor avoids the need to walk - * the AIL to find the insertion point on every xfs_log_item_batch_insert() - * call. This saves a lot of needless list walking and is a net win, even - * though it slightly increases that amount of AIL lock traffic to set it up - * and tear it down. */ void xfs_trans_committed_bulk( @@ -1471,13 +1463,8 @@ xfs_trans_committed_bulk( #define LOG_ITEM_BATCH_SIZE 32 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; struct xfs_log_vec *lv; - struct xfs_ail_cursor cur; int i = 0; - spin_lock(&ailp->xa_lock); - xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn); - spin_unlock(&ailp->xa_lock); - /* unpin all the log items */ for (lv = log_vector; lv; lv = lv->lv_next ) { struct xfs_log_item *lip = lv->lv_item; @@ -1506,9 +1493,7 @@ xfs_trans_committed_bulk( /* * Not a bulk update option due to unusual item_lsn. * Push into AIL immediately, rechecking the lsn once - * we have the ail lock. Then unpin the item. This does - * not affect the AIL cursor the bulk insert path is - * using. + * we have the ail lock. Then unpin the item. */ spin_lock(&ailp->xa_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) @@ -1522,7 +1507,7 @@ xfs_trans_committed_bulk( /* Item is a candidate for bulk AIL insert. */ log_items[i++] = lv->lv_item; if (i >= LOG_ITEM_BATCH_SIZE) { - xfs_log_item_batch_insert(ailp, &cur, log_items, + xfs_log_item_batch_insert(ailp, log_items, LOG_ITEM_BATCH_SIZE, commit_lsn); i = 0; } @@ -1530,11 +1515,7 @@ xfs_trans_committed_bulk( /* make sure we insert the remainder! */ if (i) - xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); - - spin_lock(&ailp->xa_lock); - xfs_trans_ail_cursor_done(ailp, &cur); - spin_unlock(&ailp->xa_lock); + xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn); } /* diff --git a/trunk/fs/xfs/xfs_trans_ail.c b/trunk/fs/xfs/xfs_trans_ail.c index 43233e92f0f6..5fc2380092c8 100644 --- a/trunk/fs/xfs/xfs_trans_ail.c +++ b/trunk/fs/xfs/xfs_trans_ail.c @@ -163,11 +163,17 @@ xfs_ail_max_lsn( } /* - * The cursor keeps track of where our current traversal is up to by tracking - * the next item in the list for us. However, for this to be safe, removing an - * object from the AIL needs to invalidate any cursor that points to it. hence - * the traversal cursor needs to be linked to the struct xfs_ail so that - * deletion can search all the active cursors for invalidation. + * AIL traversal cursor initialisation. + * + * The cursor keeps track of where our current traversal is up + * to by tracking the next Æ£tem in the list for us. However, for + * this to be safe, removing an object from the AIL needs to invalidate + * any cursor that points to it. hence the traversal cursor needs to + * be linked to the struct xfs_ail so that deletion can search all the + * active cursors for invalidation. + * + * We don't link the push cursor because it is embedded in the struct + * xfs_ail and hence easily findable. */ STATIC void xfs_trans_ail_cursor_init( @@ -175,12 +181,31 @@ xfs_trans_ail_cursor_init( struct xfs_ail_cursor *cur) { cur->item = NULL; - list_add_tail(&cur->list, &ailp->xa_cursors); + if (cur == &ailp->xa_cursors) + return; + + cur->next = ailp->xa_cursors.next; + ailp->xa_cursors.next = cur; +} + +/* + * Set the cursor to the next item, because when we look + * up the cursor the current item may have been freed. + */ +STATIC void +xfs_trans_ail_cursor_set( + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + struct xfs_log_item *lip) +{ + if (lip) + cur->item = xfs_ail_next(ailp, lip); } /* - * Get the next item in the traversal and advance the cursor. If the cursor - * was invalidated (indicated by a lip of 1), restart the traversal. + * Get the next item in the traversal and advance the cursor. + * If the cursor was invalidated (inidicated by a lip of 1), + * restart the traversal. */ struct xfs_log_item * xfs_trans_ail_cursor_next( @@ -191,31 +216,45 @@ xfs_trans_ail_cursor_next( if ((__psint_t)lip & 1) lip = xfs_ail_min(ailp); - if (lip) - cur->item = xfs_ail_next(ailp, lip); + xfs_trans_ail_cursor_set(ailp, cur, lip); return lip; } /* - * When the traversal is complete, we need to remove the cursor from the list - * of traversing cursors. + * Now that the traversal is complete, we need to remove the cursor + * from the list of traversing cursors. Avoid removing the embedded + * push cursor, but use the fact it is always present to make the + * list deletion simple. */ void xfs_trans_ail_cursor_done( struct xfs_ail *ailp, - struct xfs_ail_cursor *cur) + struct xfs_ail_cursor *done) { - cur->item = NULL; - list_del_init(&cur->list); + struct xfs_ail_cursor *prev = NULL; + struct xfs_ail_cursor *cur; + + done->item = NULL; + if (done == &ailp->xa_cursors) + return; + prev = &ailp->xa_cursors; + for (cur = prev->next; cur; prev = cur, cur = prev->next) { + if (cur == done) { + prev->next = cur->next; + break; + } + } + ASSERT(cur); } /* - * Invalidate any cursor that is pointing to this item. This is called when an - * item is removed from the AIL. Any cursor pointing to this object is now - * invalid and the traversal needs to be terminated so it doesn't reference a - * freed object. We set the low bit of the cursor item pointer so we can - * distinguish between an invalidation and the end of the list when getting the - * next item from the cursor. + * Invalidate any cursor that is pointing to this item. This is + * called when an item is removed from the AIL. Any cursor pointing + * to this object is now invalid and the traversal needs to be + * terminated so it doesn't reference a freed object. We set the + * cursor item to a value of 1 so we can distinguish between an + * invalidation and the end of the list when getting the next item + * from the cursor. */ STATIC void xfs_trans_ail_cursor_clear( @@ -224,7 +263,8 @@ xfs_trans_ail_cursor_clear( { struct xfs_ail_cursor *cur; - list_for_each_entry(cur, &ailp->xa_cursors, list) { + /* need to search all cursors */ + for (cur = &ailp->xa_cursors; cur; cur = cur->next) { if (cur->item == lip) cur->item = (struct xfs_log_item *) ((__psint_t)cur->item | 1); @@ -232,10 +272,9 @@ xfs_trans_ail_cursor_clear( } /* - * Find the first item in the AIL with the given @lsn by searching in ascending - * LSN order and initialise the cursor to point to the next item for a - * ascending traversal. Pass a @lsn of zero to initialise the cursor to the - * first item in the AIL. Returns NULL if the list is empty. + * Return the item in the AIL with the current lsn. + * Return the current tree generation number for use + * in calls to xfs_trans_next_ail(). */ xfs_log_item_t * xfs_trans_ail_cursor_first( @@ -246,112 +285,46 @@ xfs_trans_ail_cursor_first( xfs_log_item_t *lip; xfs_trans_ail_cursor_init(ailp, cur); - - if (lsn == 0) { - lip = xfs_ail_min(ailp); + lip = xfs_ail_min(ailp); + if (lsn == 0) goto out; - } list_for_each_entry(lip, &ailp->xa_ail, li_ail) { if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) goto out; } - return NULL; - + lip = NULL; out: - if (lip) - cur->item = xfs_ail_next(ailp, lip); + xfs_trans_ail_cursor_set(ailp, cur, lip); return lip; } -static struct xfs_log_item * -__xfs_trans_ail_cursor_last( - struct xfs_ail *ailp, - xfs_lsn_t lsn) -{ - xfs_log_item_t *lip; - - list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) { - if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0) - return lip; - } - return NULL; -} - -/* - * Find the last item in the AIL with the given @lsn by searching in descending - * LSN order and initialise the cursor to point to that item. If there is no - * item with the value of @lsn, then it sets the cursor to the last item with an - * LSN lower than @lsn. Returns NULL if the list is empty. - */ -struct xfs_log_item * -xfs_trans_ail_cursor_last( - struct xfs_ail *ailp, - struct xfs_ail_cursor *cur, - xfs_lsn_t lsn) -{ - xfs_trans_ail_cursor_init(ailp, cur); - cur->item = __xfs_trans_ail_cursor_last(ailp, lsn); - return cur->item; -} - /* - * Splice the log item list into the AIL at the given LSN. We splice to the - * tail of the given LSN to maintain insert order for push traversals. The - * cursor is optional, allowing repeated updates to the same LSN to avoid - * repeated traversals. + * splice the log item list into the AIL at the given LSN. */ static void xfs_ail_splice( - struct xfs_ail *ailp, - struct xfs_ail_cursor *cur, - struct list_head *list, - xfs_lsn_t lsn) + struct xfs_ail *ailp, + struct list_head *list, + xfs_lsn_t lsn) { - struct xfs_log_item *lip = cur ? cur->item : NULL; - struct xfs_log_item *next_lip; + xfs_log_item_t *next_lip; - /* - * Get a new cursor if we don't have a placeholder or the existing one - * has been invalidated. - */ - if (!lip || (__psint_t)lip & 1) { - lip = __xfs_trans_ail_cursor_last(ailp, lsn); - - if (!lip) { - /* The list is empty, so just splice and return. */ - if (cur) - cur->item = NULL; - list_splice(list, &ailp->xa_ail); - return; - } + /* If the list is empty, just insert the item. */ + if (list_empty(&ailp->xa_ail)) { + list_splice(list, &ailp->xa_ail); + return; } - /* - * Our cursor points to the item we want to insert _after_, so we have - * to update the cursor to point to the end of the list we are splicing - * in so that it points to the correct location for the next splice. - * i.e. before the splice - * - * lsn -> lsn -> lsn + x -> lsn + x ... - * ^ - * | cursor points here - * - * After the splice we have: - * - * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ... - * ^ ^ - * | cursor points here | needs to move here - * - * So we set the cursor to the last item in the list to be spliced - * before we execute the splice, resulting in the cursor pointing to - * the correct item after the splice occurs. - */ - if (cur) { - next_lip = list_entry(list->prev, struct xfs_log_item, li_ail); - cur->item = next_lip; + list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { + if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) + break; } - list_splice(list, &lip->li_ail); + + ASSERT(&next_lip->li_ail == &ailp->xa_ail || + XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0); + + list_splice_init(list, &next_lip->li_ail); } /* @@ -378,7 +351,7 @@ xfs_ail_worker( struct xfs_ail *ailp = container_of(to_delayed_work(work), struct xfs_ail, xa_work); xfs_mount_t *mp = ailp->xa_mount; - struct xfs_ail_cursor cur; + struct xfs_ail_cursor *cur = &ailp->xa_cursors; xfs_log_item_t *lip; xfs_lsn_t lsn; xfs_lsn_t target; @@ -390,12 +363,13 @@ xfs_ail_worker( spin_lock(&ailp->xa_lock); target = ailp->xa_target; - lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn); + xfs_trans_ail_cursor_init(ailp, cur); + lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn); if (!lip || XFS_FORCED_SHUTDOWN(mp)) { /* * AIL is empty or our push has reached the end. */ - xfs_trans_ail_cursor_done(ailp, &cur); + xfs_trans_ail_cursor_done(ailp, cur); spin_unlock(&ailp->xa_lock); goto out_done; } @@ -483,12 +457,12 @@ xfs_ail_worker( if (stuck > 100) break; - lip = xfs_trans_ail_cursor_next(ailp, &cur); + lip = xfs_trans_ail_cursor_next(ailp, cur); if (lip == NULL) break; lsn = lip->li_lsn; } - xfs_trans_ail_cursor_done(ailp, &cur); + xfs_trans_ail_cursor_done(ailp, cur); spin_unlock(&ailp->xa_lock); if (flush_log) { @@ -671,7 +645,6 @@ xfs_trans_unlocked_item( void xfs_trans_ail_update_bulk( struct xfs_ail *ailp, - struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->xa_lock) @@ -701,7 +674,7 @@ xfs_trans_ail_update_bulk( list_add(&lip->li_ail, &tmp); } - xfs_ail_splice(ailp, cur, &tmp, lsn); + xfs_ail_splice(ailp, &tmp, lsn); if (!mlip_changed) { spin_unlock(&ailp->xa_lock); @@ -820,7 +793,6 @@ xfs_trans_ail_init( ailp->xa_mount = mp; INIT_LIST_HEAD(&ailp->xa_ail); - INIT_LIST_HEAD(&ailp->xa_cursors); spin_lock_init(&ailp->xa_lock); INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); mp->m_ail = ailp; diff --git a/trunk/fs/xfs/xfs_trans_buf.c b/trunk/fs/xfs/xfs_trans_buf.c index 15584fc3ed7d..03b3b7f85a3b 100644 --- a/trunk/fs/xfs/xfs_trans_buf.c +++ b/trunk/fs/xfs/xfs_trans_buf.c @@ -81,7 +81,7 @@ _xfs_trans_bjoin( struct xfs_buf_log_item *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); /* * The xfs_buf_log_item pointer is stored in b_fsprivate. If @@ -89,7 +89,7 @@ _xfs_trans_bjoin( * The checks to see if one is there are in xfs_buf_item_init(). */ xfs_buf_item_init(bp, tp->t_mountp); - bip = bp->b_fspriv; + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); @@ -110,7 +110,7 @@ _xfs_trans_bjoin( * Initialize b_fsprivate2 so we can find it with incore_match() * in xfs_trans_get_buf() and friends above. */ - bp->b_transp = tp; + XFS_BUF_SET_FSPRIVATE2(bp, tp); } @@ -160,7 +160,7 @@ xfs_trans_get_buf(xfs_trans_t *tp, */ bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len); if (bp != NULL) { - ASSERT(xfs_buf_islocked(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) XFS_BUF_SUPER_STALE(bp); @@ -172,8 +172,8 @@ xfs_trans_get_buf(xfs_trans_t *tp, else if (XFS_BUF_ISSTALE(bp)) ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); - ASSERT(bp->b_transp == tp); - bip = bp->b_fspriv; + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; @@ -232,8 +232,8 @@ xfs_trans_getsb(xfs_trans_t *tp, * recursion count and return the buffer to the caller. */ bp = mp->m_sb_bp; - if (bp->b_transp == tp) { - bip = bp->b_fspriv; + if (XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp) { + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; @@ -327,9 +327,9 @@ xfs_trans_read_buf( */ bp = xfs_trans_buf_item_match(tp, target, blkno, len); if (bp != NULL) { - ASSERT(xfs_buf_islocked(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bp->b_fspriv != NULL); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); ASSERT((XFS_BUF_ISERROR(bp)) == 0); if (!(XFS_BUF_ISDONE(bp))) { trace_xfs_trans_read_buf_io(bp, _RET_IP_); @@ -363,7 +363,7 @@ xfs_trans_read_buf( } - bip = bp->b_fspriv; + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); bip->bli_recur++; ASSERT(atomic_read(&bip->bli_refcount) > 0); @@ -460,30 +460,32 @@ xfs_trans_brelse(xfs_trans_t *tp, xfs_buf_t *bp) { xfs_buf_log_item_t *bip; + xfs_log_item_t *lip; /* * Default to a normal brelse() call if the tp is NULL. */ if (tp == NULL) { - struct xfs_log_item *lip = bp->b_fspriv; - - ASSERT(bp->b_transp == NULL); - + ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); /* * If there's a buf log item attached to the buffer, * then let the AIL know that the buffer is being * unlocked. */ - if (lip != NULL && lip->li_type == XFS_LI_BUF) { - bip = bp->b_fspriv; - xfs_trans_unlocked_item(bip->bli_item.li_ailp, lip); + if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { + lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); + if (lip->li_type == XFS_LI_BUF) { + bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); + xfs_trans_unlocked_item(bip->bli_item.li_ailp, + lip); + } } xfs_buf_relse(bp); return; } - ASSERT(bp->b_transp == tp); - bip = bp->b_fspriv; + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(bip->bli_item.li_type == XFS_LI_BUF); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); @@ -554,7 +556,7 @@ xfs_trans_brelse(xfs_trans_t *tp, xfs_buf_item_relse(bp); bip = NULL; } - bp->b_transp = NULL; + XFS_BUF_SET_FSPRIVATE2(bp, NULL); /* * If we've still got a buf log item on the buffer, then @@ -579,15 +581,16 @@ void xfs_trans_bhold(xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(atomic_read(&bip->bli_refcount) > 0); - bip->bli_flags |= XFS_BLI_HOLD; trace_xfs_trans_bhold(bip); } @@ -600,17 +603,19 @@ void xfs_trans_bhold_release(xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT(bip->bli_flags & XFS_BLI_HOLD); - bip->bli_flags &= ~XFS_BLI_HOLD; + trace_xfs_trans_bhold_release(bip); } @@ -629,14 +634,14 @@ xfs_trans_log_buf(xfs_trans_t *tp, uint first, uint last) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp))); - ASSERT(bp->b_iodone == NULL || - bp->b_iodone == xfs_buf_iodone_callbacks); + ASSERT((XFS_BUF_IODONE_FUNC(bp) == NULL) || + (XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks)); /* * Mark the buffer as needing to be written out eventually, @@ -651,8 +656,9 @@ xfs_trans_log_buf(xfs_trans_t *tp, XFS_BUF_DELAYWRITE(bp); XFS_BUF_DONE(bp); + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); - bp->b_iodone = xfs_buf_iodone_callbacks; + XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); bip->bli_item.li_cb = xfs_buf_iodone; trace_xfs_trans_log_buf(bip); @@ -700,11 +706,13 @@ xfs_trans_binval( xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); trace_xfs_trans_binval(bip); @@ -772,11 +780,13 @@ xfs_trans_inode_buf( xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_INODE_BUF; @@ -796,11 +806,13 @@ xfs_trans_stale_inode_buf( xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_STALE_INODE; @@ -821,11 +833,13 @@ xfs_trans_inode_alloc_buf( xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; @@ -849,14 +863,16 @@ xfs_trans_dquot_buf( xfs_buf_t *bp, uint type) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(bp->b_transp == tp); - ASSERT(bip != NULL); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); ASSERT(type == XFS_BLF_UDQUOT_BUF || type == XFS_BLF_PDQUOT_BUF || type == XFS_BLF_GDQUOT_BUF); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_format.blf_flags |= type; diff --git a/trunk/fs/xfs/xfs_trans_inode.c b/trunk/fs/xfs/xfs_trans_inode.c index c8dea2fd7e68..048b0c689d3e 100644 --- a/trunk/fs/xfs/xfs_trans_inode.c +++ b/trunk/fs/xfs/xfs_trans_inode.c @@ -55,6 +55,7 @@ xfs_trans_ijoin( { xfs_inode_log_item_t *iip; + ASSERT(ip->i_transp == NULL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); if (ip->i_itemp == NULL) xfs_inode_item_init(ip, ip->i_mount); @@ -67,6 +68,12 @@ xfs_trans_ijoin( xfs_trans_add_item(tp, &iip->ili_item); xfs_trans_inode_broot_debug(ip); + + /* + * Initialize i_transp so we can find it with xfs_inode_incore() + * in xfs_trans_iget() above. + */ + ip->i_transp = tp; } /* @@ -104,6 +111,7 @@ xfs_trans_ichgtime( ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(ip->i_transp == tp); tv = current_fs_time(inode->i_sb); @@ -132,6 +140,7 @@ xfs_trans_log_inode( xfs_inode_t *ip, uint flags) { + ASSERT(ip->i_transp == tp); ASSERT(ip->i_itemp != NULL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); diff --git a/trunk/fs/xfs/xfs_trans_priv.h b/trunk/fs/xfs/xfs_trans_priv.h index 212946b97239..6b164e9e9a1f 100644 --- a/trunk/fs/xfs/xfs_trans_priv.h +++ b/trunk/fs/xfs/xfs_trans_priv.h @@ -53,7 +53,7 @@ void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, * of the list to trigger traversal restarts. */ struct xfs_ail_cursor { - struct list_head list; + struct xfs_ail_cursor *next; struct xfs_log_item *item; }; @@ -66,7 +66,7 @@ struct xfs_ail { struct xfs_mount *xa_mount; struct list_head xa_ail; xfs_lsn_t xa_target; - struct list_head xa_cursors; + struct xfs_ail_cursor xa_cursors; spinlock_t xa_lock; struct delayed_work xa_work; xfs_lsn_t xa_last_pushed_lsn; @@ -82,7 +82,6 @@ struct xfs_ail { extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, - struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->xa_lock); static inline void @@ -91,7 +90,7 @@ xfs_trans_ail_update( struct xfs_log_item *lip, xfs_lsn_t lsn) __releases(ailp->xa_lock) { - xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); + xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn); } void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, @@ -112,13 +111,10 @@ xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); void xfs_trans_unlocked_item(struct xfs_ail *, xfs_log_item_t *); -struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, +struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn); -struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp, - struct xfs_ail_cursor *cur, - xfs_lsn_t lsn); -struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp, +struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); diff --git a/trunk/fs/xfs/xfs_vnodeops.c b/trunk/fs/xfs/xfs_vnodeops.c index 88d121486c52..619720705bc6 100644 --- a/trunk/fs/xfs/xfs_vnodeops.c +++ b/trunk/fs/xfs/xfs_vnodeops.c @@ -50,6 +50,430 @@ #include "xfs_vnodeops.h" #include "xfs_trace.h" +int +xfs_setattr( + struct xfs_inode *ip, + struct iattr *iattr, + int flags) +{ + xfs_mount_t *mp = ip->i_mount; + struct inode *inode = VFS_I(ip); + int mask = iattr->ia_valid; + xfs_trans_t *tp; + int code; + uint lock_flags; + uint commit_flags=0; + uid_t uid=0, iuid=0; + gid_t gid=0, igid=0; + struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; + int need_iolock = 1; + + trace_xfs_setattr(ip); + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + code = -inode_change_ok(inode, iattr); + if (code) + return code; + + olddquot1 = olddquot2 = NULL; + udqp = gdqp = NULL; + + /* + * If disk quotas is on, we make sure that the dquots do exist on disk, + * before we start any other transactions. Trying to do this later + * is messy. We don't care to take a readlock to look at the ids + * in inode here, because we can't hold it across the trans_reserve. + * If the IDs do change before we take the ilock, we're covered + * because the i_*dquot fields will get updated anyway. + */ + if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { + uint qflags = 0; + + if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { + uid = iattr->ia_uid; + qflags |= XFS_QMOPT_UQUOTA; + } else { + uid = ip->i_d.di_uid; + } + if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { + gid = iattr->ia_gid; + qflags |= XFS_QMOPT_GQUOTA; + } else { + gid = ip->i_d.di_gid; + } + + /* + * We take a reference when we initialize udqp and gdqp, + * so it is important that we never blindly double trip on + * the same variable. See xfs_create() for an example. + */ + ASSERT(udqp == NULL); + ASSERT(gdqp == NULL); + code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), + qflags, &udqp, &gdqp); + if (code) + return code; + } + + /* + * For the other attributes, we acquire the inode lock and + * first do an error checking pass. + */ + tp = NULL; + lock_flags = XFS_ILOCK_EXCL; + if (flags & XFS_ATTR_NOLOCK) + need_iolock = 0; + if (!(mask & ATTR_SIZE)) { + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); + commit_flags = 0; + code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), + 0, 0, 0); + if (code) { + lock_flags = 0; + goto error_return; + } + } else { + if (need_iolock) + lock_flags |= XFS_IOLOCK_EXCL; + } + + xfs_ilock(ip, lock_flags); + + /* + * Change file ownership. Must be the owner or privileged. + */ + if (mask & (ATTR_UID|ATTR_GID)) { + /* + * These IDs could have changed since we last looked at them. + * But, we're assured that if the ownership did change + * while we didn't have the inode locked, inode's dquot(s) + * would have changed also. + */ + iuid = ip->i_d.di_uid; + igid = ip->i_d.di_gid; + gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; + uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; + + /* + * Do a quota reservation only if uid/gid is actually + * going to change. + */ + if (XFS_IS_QUOTA_RUNNING(mp) && + ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || + (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { + ASSERT(tp); + code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, + capable(CAP_FOWNER) ? + XFS_QMOPT_FORCE_RES : 0); + if (code) /* out of quota */ + goto error_return; + } + } + + /* + * Truncate file. Must have write permission and not be a directory. + */ + if (mask & ATTR_SIZE) { + /* Short circuit the truncate case for zero length files */ + if (iattr->ia_size == 0 && + ip->i_size == 0 && ip->i_d.di_nextents == 0) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + lock_flags &= ~XFS_ILOCK_EXCL; + if (mask & ATTR_CTIME) { + inode->i_mtime = inode->i_ctime = + current_fs_time(inode->i_sb); + xfs_mark_inode_dirty_sync(ip); + } + code = 0; + goto error_return; + } + + if (S_ISDIR(ip->i_d.di_mode)) { + code = XFS_ERROR(EISDIR); + goto error_return; + } else if (!S_ISREG(ip->i_d.di_mode)) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + + /* + * Make sure that the dquots are attached to the inode. + */ + code = xfs_qm_dqattach_locked(ip, 0); + if (code) + goto error_return; + + /* + * Now we can make the changes. Before we join the inode + * to the transaction, if ATTR_SIZE is set then take care of + * the part of the truncation that must be done without the + * inode lock. This needs to be done before joining the inode + * to the transaction, because the inode cannot be unlocked + * once it is a part of the transaction. + */ + if (iattr->ia_size > ip->i_size) { + /* + * Do the first part of growing a file: zero any data + * in the last block that is beyond the old EOF. We + * need to do this before the inode is joined to the + * transaction to modify the i_size. + */ + code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); + if (code) + goto error_return; + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + lock_flags &= ~XFS_ILOCK_EXCL; + + /* + * We are going to log the inode size change in this + * transaction so any previous writes that are beyond the on + * disk EOF and the new EOF that have not been written out need + * to be written here. If we do not write the data out, we + * expose ourselves to the null files problem. + * + * Only flush from the on disk size to the smaller of the in + * memory file size or the new size as that's the range we + * really care about here and prevents waiting for other data + * not within the range we care about here. + */ + if (ip->i_size != ip->i_d.di_size && + iattr->ia_size > ip->i_d.di_size) { + code = xfs_flush_pages(ip, + ip->i_d.di_size, iattr->ia_size, + XBF_ASYNC, FI_NONE); + if (code) + goto error_return; + } + + /* wait for all I/O to complete */ + xfs_ioend_wait(ip); + + code = -block_truncate_page(inode->i_mapping, iattr->ia_size, + xfs_get_blocks); + if (code) + goto error_return; + + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); + code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_ITRUNCATE_LOG_COUNT); + if (code) + goto error_return; + + truncate_setsize(inode, iattr->ia_size); + + commit_flags = XFS_TRANS_RELEASE_LOG_RES; + lock_flags |= XFS_ILOCK_EXCL; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + xfs_trans_ijoin(tp, ip); + + /* + * Only change the c/mtime if we are changing the size + * or we are explicitly asked to change it. This handles + * the semantic difference between truncate() and ftruncate() + * as implemented in the VFS. + * + * The regular truncate() case without ATTR_CTIME and ATTR_MTIME + * is a special case where we need to update the times despite + * not having these flags set. For all other operations the + * VFS set these flags explicitly if it wants a timestamp + * update. + */ + if (iattr->ia_size != ip->i_size && + (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { + iattr->ia_ctime = iattr->ia_mtime = + current_fs_time(inode->i_sb); + mask |= ATTR_CTIME | ATTR_MTIME; + } + + if (iattr->ia_size > ip->i_size) { + ip->i_d.di_size = iattr->ia_size; + ip->i_size = iattr->ia_size; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + } else if (iattr->ia_size <= ip->i_size || + (iattr->ia_size == 0 && ip->i_d.di_nextents)) { + /* + * signal a sync transaction unless + * we're truncating an already unlinked + * file on a wsync filesystem + */ + code = xfs_itruncate_finish(&tp, ip, iattr->ia_size, + XFS_DATA_FORK, + ((ip->i_d.di_nlink != 0 || + !(mp->m_flags & XFS_MOUNT_WSYNC)) + ? 1 : 0)); + if (code) + goto abort_return; + /* + * Truncated "down", so we're removing references + * to old data here - if we now delay flushing for + * a long time, we expose ourselves unduly to the + * notorious NULL files problem. So, we mark this + * vnode and flush it when the file is closed, and + * do not wait the usual (long) time for writeout. + */ + xfs_iflags_set(ip, XFS_ITRUNCATED); + } + } else if (tp) { + xfs_trans_ijoin(tp, ip); + } + + /* + * Change file ownership. Must be the owner or privileged. + */ + if (mask & (ATTR_UID|ATTR_GID)) { + /* + * CAP_FSETID overrides the following restrictions: + * + * The set-user-ID and set-group-ID bits of a file will be + * cleared upon successful return from chown() + */ + if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && + !capable(CAP_FSETID)) { + ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); + } + + /* + * Change the ownerships and register quota modifications + * in the transaction. + */ + if (iuid != uid) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { + ASSERT(mask & ATTR_UID); + ASSERT(udqp); + olddquot1 = xfs_qm_vop_chown(tp, ip, + &ip->i_udquot, udqp); + } + ip->i_d.di_uid = uid; + inode->i_uid = uid; + } + if (igid != gid) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { + ASSERT(!XFS_IS_PQUOTA_ON(mp)); + ASSERT(mask & ATTR_GID); + ASSERT(gdqp); + olddquot2 = xfs_qm_vop_chown(tp, ip, + &ip->i_gdquot, gdqp); + } + ip->i_d.di_gid = gid; + inode->i_gid = gid; + } + } + + /* + * Change file access modes. + */ + if (mask & ATTR_MODE) { + umode_t mode = iattr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + + ip->i_d.di_mode &= S_IFMT; + ip->i_d.di_mode |= mode & ~S_IFMT; + + inode->i_mode &= S_IFMT; + inode->i_mode |= mode & ~S_IFMT; + } + + /* + * Change file access or modified times. + */ + if (mask & ATTR_ATIME) { + inode->i_atime = iattr->ia_atime; + ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; + ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; + ip->i_update_core = 1; + } + if (mask & ATTR_CTIME) { + inode->i_ctime = iattr->ia_ctime; + ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; + ip->i_update_core = 1; + } + if (mask & ATTR_MTIME) { + inode->i_mtime = iattr->ia_mtime; + ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; + ip->i_update_core = 1; + } + + /* + * And finally, log the inode core if any attribute in it + * has been changed. + */ + if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE| + ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + XFS_STATS_INC(xs_ig_attrchg); + + /* + * If this is a synchronous mount, make sure that the + * transaction goes to disk before returning to the user. + * This is slightly sub-optimal in that truncates require + * two sync transactions instead of one for wsync filesystems. + * One for the truncate and one for the timestamps since we + * don't want to change the timestamps unless we're sure the + * truncate worked. Truncates are less than 1% of the laddis + * mix so this probably isn't worth the trouble to optimize. + */ + code = 0; + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + + code = xfs_trans_commit(tp, commit_flags); + + xfs_iunlock(ip, lock_flags); + + /* + * Release any dquot(s) the inode had kept before chown. + */ + xfs_qm_dqrele(olddquot1); + xfs_qm_dqrele(olddquot2); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + + if (code) + return code; + + /* + * XXX(hch): Updating the ACL entries is not atomic vs the i_mode + * update. We could avoid this with linked transactions + * and passing down the transaction pointer all the way + * to attr_set. No previous user of the generic + * Posix ACL code seems to care about this issue either. + */ + if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { + code = -xfs_acl_chmod(inode); + if (code) + return XFS_ERROR(code); + } + + return 0; + + abort_return: + commit_flags |= XFS_TRANS_ABORT; + error_return: + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + if (tp) { + xfs_trans_cancel(tp, commit_flags); + } + if (lock_flags != 0) { + xfs_iunlock(ip, lock_flags); + } + return code; +} + /* * The maximum pathlen is 1024 bytes. Since the minimum file system * blocksize is 512 bytes, we can get a max of 2 extents back from @@ -197,6 +621,13 @@ xfs_free_eofblocks( */ tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); + /* + * Do the xfs_itruncate_start() call before + * reserving any log space because + * itruncate_start will call into the buffer + * cache and we can't + * do that within a transaction. + */ if (flags & XFS_FREE_EOF_TRYLOCK) { if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { xfs_trans_cancel(tp, 0); @@ -205,6 +636,13 @@ xfs_free_eofblocks( } else { xfs_ilock(ip, XFS_IOLOCK_EXCL); } + error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, + ip->i_size); + if (error) { + xfs_trans_cancel(tp, 0); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return error; + } error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), @@ -220,12 +658,15 @@ xfs_free_eofblocks( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip); - error = xfs_itruncate_data(&tp, ip, ip->i_size); + error = xfs_itruncate_finish(&tp, ip, + ip->i_size, + XFS_DATA_FORK, + 0); + /* + * If we get an error at this point we + * simply don't bother truncating the file. + */ if (error) { - /* - * If we get an error at this point we simply don't - * bother truncating the file. - */ xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); @@ -643,9 +1084,20 @@ xfs_inactive( tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); if (truncate) { + /* + * Do the xfs_itruncate_start() call before + * reserving any log space because itruncate_start + * will call into the buffer cache and we can't + * do that within a transaction. + */ xfs_ilock(ip, XFS_IOLOCK_EXCL); - xfs_ioend_wait(ip); + error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); + if (error) { + xfs_trans_cancel(tp, 0); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return VN_INACTIVE_CACHE; + } error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), @@ -662,7 +1114,16 @@ xfs_inactive( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip); - error = xfs_itruncate_data(&tp, ip, 0); + /* + * normally, we have to run xfs_itruncate_finish sync. + * But if filesystem is wsync and we're in the inactive + * path, then we know that nlink == 0, and that the + * xaction that made nlink == 0 is permanently committed + * since xfs_remove runs as a synchronous transaction. + */ + error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, + (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0)); + if (error) { xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); @@ -1969,8 +2430,6 @@ xfs_zero_remaining_bytes( if (!bp) return XFS_ERROR(ENOMEM); - xfs_buf_unlock(bp); - for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; @@ -2325,7 +2784,7 @@ xfs_change_file_space( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = startoffset; - error = xfs_setattr_size(ip, &iattr, attr_flags); + error = xfs_setattr(ip, &iattr, attr_flags); if (error) return error; diff --git a/trunk/fs/xfs/xfs_vnodeops.h b/trunk/fs/xfs/xfs_vnodeops.h index 35d3d513e1e9..3bcd23353d6c 100644 --- a/trunk/fs/xfs/xfs_vnodeops.h +++ b/trunk/fs/xfs/xfs_vnodeops.h @@ -13,8 +13,7 @@ struct xfs_inode; struct xfs_iomap; -int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags); -int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags); +int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ diff --git a/trunk/include/linux/mmc/sdhci-pltfm.h b/trunk/include/linux/mmc/sdhci-pltfm.h index 548d59d404cb..f1c2ac3fb300 100644 --- a/trunk/include/linux/mmc/sdhci-pltfm.h +++ b/trunk/include/linux/mmc/sdhci-pltfm.h @@ -15,21 +15,15 @@ #define _SDHCI_PLTFM_H struct sdhci_ops; -struct sdhci_host; /** * struct sdhci_pltfm_data - SDHCI platform-specific information & hooks * @ops: optional pointer to the platform-provided SDHCI ops * @quirks: optional SDHCI quirks - * @init: optional hook that is called during device probe, before the - * driver tries to access any SDHCI registers - * @exit: optional hook that is called during device removal */ struct sdhci_pltfm_data { struct sdhci_ops *ops; unsigned int quirks; - int (*init)(struct sdhci_host *host, struct sdhci_pltfm_data *pdata); - void (*exit)(struct sdhci_host *host); }; #endif /* _SDHCI_PLTFM_H */ diff --git a/trunk/include/linux/mtd/ubi.h b/trunk/include/linux/mtd/ubi.h index db4836bed514..15da0e99f48a 100644 --- a/trunk/include/linux/mtd/ubi.h +++ b/trunk/include/linux/mtd/ubi.h @@ -155,14 +155,12 @@ struct ubi_device_info { }; /* - * Volume notification types. - * @UBI_VOLUME_ADDED: a volume has been added (an UBI device was attached or a - * volume was created) - * @UBI_VOLUME_REMOVED: a volume has been removed (an UBI device was detached - * or a volume was removed) - * @UBI_VOLUME_RESIZED: a volume has been re-sized - * @UBI_VOLUME_RENAMED: a volume has been re-named - * @UBI_VOLUME_UPDATED: data has been written to a volume + * enum - volume notification types. + * @UBI_VOLUME_ADDED: volume has been added + * @UBI_VOLUME_REMOVED: start volume volume + * @UBI_VOLUME_RESIZED: volume size has been re-sized + * @UBI_VOLUME_RENAMED: volume name has been re-named + * @UBI_VOLUME_UPDATED: volume name has been updated * * These constants define which type of event has happened when a volume * notification function is invoked. diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h index 14a6c7b545de..496770a96487 100644 --- a/trunk/include/linux/sched.h +++ b/trunk/include/linux/sched.h @@ -844,7 +844,6 @@ enum cpu_idle_type { #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ #define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ -#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ enum powersavings_balance_level { POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */ @@ -894,21 +893,16 @@ static inline int sd_power_saving_flags(void) return 0; } -struct sched_group_power { +struct sched_group { + struct sched_group *next; /* Must be a circular list */ atomic_t ref; + /* * CPU power of this group, SCHED_LOAD_SCALE being max power for a * single CPU. */ - unsigned int power, power_orig; -}; - -struct sched_group { - struct sched_group *next; /* Must be a circular list */ - atomic_t ref; - + unsigned int cpu_power, cpu_power_orig; unsigned int group_weight; - struct sched_group_power *sgp; /* * The CPUs this group covers. @@ -1260,9 +1254,6 @@ struct task_struct { #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; char rcu_read_unlock_special; -#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) - int rcu_boosted; -#endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */ struct list_head rcu_node_entry; #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TREE_PREEMPT_RCU diff --git a/trunk/include/linux/slab.h b/trunk/include/linux/slab.h index 573c809c33d9..ad4dd1c8d30a 100644 --- a/trunk/include/linux/slab.h +++ b/trunk/include/linux/slab.h @@ -133,26 +133,6 @@ unsigned int kmem_cache_size(struct kmem_cache *); #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_HIGH) #define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_HIGH - PAGE_SHIFT) -/* - * Some archs want to perform DMA into kmalloc caches and need a guaranteed - * alignment larger than the alignment of a 64-bit integer. - * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that. - */ -#ifdef ARCH_DMA_MINALIGN -#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN -#else -#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) -#endif - -/* - * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. - * Intended for arches that get misalignment faults even for 64 bit integer - * aligned buffers. - */ -#ifndef ARCH_SLAB_MINALIGN -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) -#endif - /* * Common kmalloc functions provided by all allocators */ diff --git a/trunk/include/linux/slab_def.h b/trunk/include/linux/slab_def.h index d00e0bacda93..83203ae9390b 100644 --- a/trunk/include/linux/slab_def.h +++ b/trunk/include/linux/slab_def.h @@ -17,6 +17,32 @@ #include +/* + * Enforce a minimum alignment for the kmalloc caches. + * Usually, the kmalloc caches are cache_line_size() aligned, except when + * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned. + * Some archs want to perform DMA into kmalloc caches and need a guaranteed + * alignment larger than the alignment of a 64-bit integer. + * ARCH_KMALLOC_MINALIGN allows that. + * Note that increasing this value may disable some debug features. + */ +#ifdef ARCH_DMA_MINALIGN +#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN +#else +#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) +#endif + +#ifndef ARCH_SLAB_MINALIGN +/* + * Enforce a minimum alignment for all caches. + * Intended for archs that get misalignment faults even for BYTES_PER_WORD + * aligned buffers. Includes ARCH_KMALLOC_MINALIGN. + * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables + * some debug features. + */ +#define ARCH_SLAB_MINALIGN 0 +#endif + /* * struct kmem_cache * @@ -24,19 +50,21 @@ */ struct kmem_cache { -/* 1) Cache tunables. Protected by cache_chain_mutex */ +/* 1) per-cpu data, touched during every alloc/free */ + struct array_cache *array[NR_CPUS]; +/* 2) Cache tunables. Protected by cache_chain_mutex */ unsigned int batchcount; unsigned int limit; unsigned int shared; unsigned int buffer_size; u32 reciprocal_buffer_size; -/* 2) touched by every alloc & free from the backend */ +/* 3) touched by every alloc & free from the backend */ unsigned int flags; /* constant flags */ unsigned int num; /* # of objs per slab */ -/* 3) cache_grow/shrink */ +/* 4) cache_grow/shrink */ /* order of pgs per slab (2^n) */ unsigned int gfporder; @@ -52,11 +80,11 @@ struct kmem_cache { /* constructor func */ void (*ctor)(void *obj); -/* 4) cache creation/removal */ +/* 5) cache creation/removal */ const char *name; struct list_head next; -/* 5) statistics */ +/* 6) statistics */ #ifdef CONFIG_DEBUG_SLAB unsigned long num_active; unsigned long num_allocations; @@ -83,18 +111,16 @@ struct kmem_cache { int obj_size; #endif /* CONFIG_DEBUG_SLAB */ -/* 6) per-cpu/per-node data, touched during every alloc/free */ /* - * We put array[] at the end of kmem_cache, because we want to size - * this array to nr_cpu_ids slots instead of NR_CPUS + * We put nodelists[] at the end of kmem_cache, because we want to size + * this array to nr_node_ids slots instead of MAX_NUMNODES * (see kmem_cache_init()) - * We still use [NR_CPUS] and not [1] or [0] because cache_cache - * is statically defined, so we reserve the max number of cpus. + * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache + * is statically defined, so we reserve the max number of nodes. */ - struct kmem_list3 **nodelists; - struct array_cache *array[NR_CPUS]; + struct kmem_list3 *nodelists[MAX_NUMNODES]; /* - * Do not add fields after array[] + * Do not add fields after nodelists[] */ }; diff --git a/trunk/include/linux/slob_def.h b/trunk/include/linux/slob_def.h index 0ec00b39d006..4382db09df4f 100644 --- a/trunk/include/linux/slob_def.h +++ b/trunk/include/linux/slob_def.h @@ -1,6 +1,16 @@ #ifndef __LINUX_SLOB_DEF_H #define __LINUX_SLOB_DEF_H +#ifdef ARCH_DMA_MINALIGN +#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN +#else +#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long) +#endif + +#ifndef ARCH_SLAB_MINALIGN +#define ARCH_SLAB_MINALIGN __alignof__(unsigned long) +#endif + void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep, diff --git a/trunk/include/linux/slub_def.h b/trunk/include/linux/slub_def.h index 4b35c06dfbc5..c8668d161dd8 100644 --- a/trunk/include/linux/slub_def.h +++ b/trunk/include/linux/slub_def.h @@ -113,6 +113,16 @@ struct kmem_cache { #define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) +#ifdef ARCH_DMA_MINALIGN +#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN +#else +#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) +#endif + +#ifndef ARCH_SLAB_MINALIGN +#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) +#endif + /* * Maximum kmalloc object size handled by SLUB. Larger object allocations * are passed through to the page allocator. The page allocator "fastpath" @@ -218,19 +228,6 @@ kmalloc_order(size_t size, gfp_t flags, unsigned int order) return ret; } -/** - * Calling this on allocated memory will check that the memory - * is expected to be in use, and print warnings if not. - */ -#ifdef CONFIG_SLUB_DEBUG -extern bool verify_mem_not_deleted(const void *x); -#else -static inline bool verify_mem_not_deleted(const void *x) -{ - return true; -} -#endif - #ifdef CONFIG_TRACING extern void * kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size); diff --git a/trunk/kernel/rcutree_plugin.h b/trunk/kernel/rcutree_plugin.h index 8aafbb80b8b0..75113cb7c4fb 100644 --- a/trunk/kernel/rcutree_plugin.h +++ b/trunk/kernel/rcutree_plugin.h @@ -68,7 +68,6 @@ struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); static struct rcu_state *rcu_state = &rcu_preempt_state; -static void rcu_read_unlock_special(struct task_struct *t); static int rcu_preempted_readers_exp(struct rcu_node *rnp); /* @@ -148,7 +147,7 @@ static void rcu_preempt_note_context_switch(int cpu) struct rcu_data *rdp; struct rcu_node *rnp; - if (t->rcu_read_lock_nesting > 0 && + if (t->rcu_read_lock_nesting && (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { /* Possibly blocking in an RCU read-side critical section. */ @@ -191,14 +190,6 @@ static void rcu_preempt_note_context_switch(int cpu) rnp->gp_tasks = &t->rcu_node_entry; } raw_spin_unlock_irqrestore(&rnp->lock, flags); - } else if (t->rcu_read_lock_nesting < 0 && - t->rcu_read_unlock_special) { - - /* - * Complete exit from RCU read-side critical section on - * behalf of preempted instance of __rcu_read_unlock(). - */ - rcu_read_unlock_special(t); } /* @@ -293,7 +284,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t, * notify RCU core processing or task having blocked during the RCU * read-side critical section. */ -static noinline void rcu_read_unlock_special(struct task_struct *t) +static void rcu_read_unlock_special(struct task_struct *t) { int empty; int empty_exp; @@ -318,7 +309,7 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) } /* Hardware IRQ handlers cannot block. */ - if (in_irq() || in_serving_softirq()) { + if (in_irq()) { local_irq_restore(flags); return; } @@ -351,11 +342,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) #ifdef CONFIG_RCU_BOOST if (&t->rcu_node_entry == rnp->boost_tasks) rnp->boost_tasks = np; - /* Snapshot and clear ->rcu_boosted with rcu_node lock held. */ - if (t->rcu_boosted) { - special |= RCU_READ_UNLOCK_BOOSTED; - t->rcu_boosted = 0; - } #endif /* #ifdef CONFIG_RCU_BOOST */ t->rcu_blocked_node = NULL; @@ -372,6 +358,7 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) #ifdef CONFIG_RCU_BOOST /* Unboost if we were boosted. */ if (special & RCU_READ_UNLOCK_BOOSTED) { + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; rt_mutex_unlock(t->rcu_boost_mutex); t->rcu_boost_mutex = NULL; } @@ -400,22 +387,13 @@ void __rcu_read_unlock(void) struct task_struct *t = current; barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ - if (t->rcu_read_lock_nesting != 1) - --t->rcu_read_lock_nesting; - else { - t->rcu_read_lock_nesting = INT_MIN; - barrier(); /* assign before ->rcu_read_unlock_special load */ - if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) - rcu_read_unlock_special(t); - barrier(); /* ->rcu_read_unlock_special load before assign */ - t->rcu_read_lock_nesting = 0; - } + --t->rcu_read_lock_nesting; + barrier(); /* decrement before load of ->rcu_read_unlock_special */ + if (t->rcu_read_lock_nesting == 0 && + unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) + rcu_read_unlock_special(t); #ifdef CONFIG_PROVE_LOCKING - { - int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); - - WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); - } + WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0); #endif /* #ifdef CONFIG_PROVE_LOCKING */ } EXPORT_SYMBOL_GPL(__rcu_read_unlock); @@ -611,8 +589,7 @@ static void rcu_preempt_check_callbacks(int cpu) rcu_preempt_qs(cpu); return; } - if (t->rcu_read_lock_nesting > 0 && - per_cpu(rcu_preempt_data, cpu).qs_pending) + if (per_cpu(rcu_preempt_data, cpu).qs_pending) t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; } @@ -718,12 +695,9 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) raw_spin_lock_irqsave(&rnp->lock, flags); for (;;) { - if (!sync_rcu_preempt_exp_done(rnp)) { - raw_spin_unlock_irqrestore(&rnp->lock, flags); + if (!sync_rcu_preempt_exp_done(rnp)) break; - } if (rnp->parent == NULL) { - raw_spin_unlock_irqrestore(&rnp->lock, flags); wake_up(&sync_rcu_preempt_exp_wq); break; } @@ -733,6 +707,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) raw_spin_lock(&rnp->lock); /* irqs already disabled */ rnp->expmask &= ~mask; } + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -1199,7 +1174,7 @@ static int rcu_boost(struct rcu_node *rnp) t = container_of(tb, struct task_struct, rcu_node_entry); rt_mutex_init_proxy_locked(&mtx, t); t->rcu_boost_mutex = &mtx; - t->rcu_boosted = 1; + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; raw_spin_unlock_irqrestore(&rnp->lock, flags); rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c index fde6ff903525..3dc716f6d8ad 100644 --- a/trunk/kernel/sched.c +++ b/trunk/kernel/sched.c @@ -2544,9 +2544,13 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) } #ifdef CONFIG_SMP -static void sched_ttwu_do_pending(struct task_struct *list) +static void sched_ttwu_pending(void) { struct rq *rq = this_rq(); + struct task_struct *list = xchg(&rq->wake_list, NULL); + + if (!list) + return; raw_spin_lock(&rq->lock); @@ -2559,45 +2563,9 @@ static void sched_ttwu_do_pending(struct task_struct *list) raw_spin_unlock(&rq->lock); } -#ifdef CONFIG_HOTPLUG_CPU - -static void sched_ttwu_pending(void) -{ - struct rq *rq = this_rq(); - struct task_struct *list = xchg(&rq->wake_list, NULL); - - if (!list) - return; - - sched_ttwu_do_pending(list); -} - -#endif /* CONFIG_HOTPLUG_CPU */ - void scheduler_ipi(void) { - struct rq *rq = this_rq(); - struct task_struct *list = xchg(&rq->wake_list, NULL); - - if (!list) - return; - - /* - * Not all reschedule IPI handlers call irq_enter/irq_exit, since - * traditionally all their work was done from the interrupt return - * path. Now that we actually do some work, we need to make sure - * we do call them. - * - * Some archs already do call them, luckily irq_enter/exit nest - * properly. - * - * Arguably we should visit all archs and update all handlers, - * however a fair share of IPIs are still resched only so this would - * somewhat pessimize the simple resched case. - */ - irq_enter(); - sched_ttwu_do_pending(list); - irq_exit(); + sched_ttwu_pending(); } static void ttwu_queue_remote(struct task_struct *p, int cpu) @@ -6589,7 +6557,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, break; } - if (!group->sgp->power) { + if (!group->cpu_power) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: domain->cpu_power not " "set\n"); @@ -6613,9 +6581,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); printk(KERN_CONT " %s", str); - if (group->sgp->power != SCHED_POWER_SCALE) { + if (group->cpu_power != SCHED_POWER_SCALE) { printk(KERN_CONT " (cpu_power = %d)", - group->sgp->power); + group->cpu_power); } group = group->next; @@ -6806,39 +6774,11 @@ static struct root_domain *alloc_rootdomain(void) return rd; } -static void free_sched_groups(struct sched_group *sg, int free_sgp) -{ - struct sched_group *tmp, *first; - - if (!sg) - return; - - first = sg; - do { - tmp = sg->next; - - if (free_sgp && atomic_dec_and_test(&sg->sgp->ref)) - kfree(sg->sgp); - - kfree(sg); - sg = tmp; - } while (sg != first); -} - static void free_sched_domain(struct rcu_head *rcu) { struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu); - - /* - * If its an overlapping domain it has private groups, iterate and - * nuke them all. - */ - if (sd->flags & SD_OVERLAP) { - free_sched_groups(sd->groups, 1); - } else if (atomic_dec_and_test(&sd->groups->ref)) { - kfree(sd->groups->sgp); + if (atomic_dec_and_test(&sd->groups->ref)) kfree(sd->groups); - } kfree(sd); } @@ -7005,7 +6945,6 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0; struct sd_data { struct sched_domain **__percpu sd; struct sched_group **__percpu sg; - struct sched_group_power **__percpu sgp; }; struct s_data { @@ -7025,73 +6964,15 @@ struct sched_domain_topology_level; typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu); typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); -#define SDTL_OVERLAP 0x01 - struct sched_domain_topology_level { sched_domain_init_f init; sched_domain_mask_f mask; - int flags; struct sd_data data; }; -static int -build_overlap_sched_groups(struct sched_domain *sd, int cpu) -{ - struct sched_group *first = NULL, *last = NULL, *groups = NULL, *sg; - const struct cpumask *span = sched_domain_span(sd); - struct cpumask *covered = sched_domains_tmpmask; - struct sd_data *sdd = sd->private; - struct sched_domain *child; - int i; - - cpumask_clear(covered); - - for_each_cpu(i, span) { - struct cpumask *sg_span; - - if (cpumask_test_cpu(i, covered)) - continue; - - sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), - GFP_KERNEL, cpu_to_node(i)); - - if (!sg) - goto fail; - - sg_span = sched_group_cpus(sg); - - child = *per_cpu_ptr(sdd->sd, i); - if (child->child) { - child = child->child; - cpumask_copy(sg_span, sched_domain_span(child)); - } else - cpumask_set_cpu(i, sg_span); - - cpumask_or(covered, covered, sg_span); - - sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span)); - atomic_inc(&sg->sgp->ref); - - if (cpumask_test_cpu(cpu, sg_span)) - groups = sg; - - if (!first) - first = sg; - if (last) - last->next = sg; - last = sg; - last->next = first; - } - sd->groups = groups; - - return 0; - -fail: - free_sched_groups(first, 0); - - return -ENOMEM; -} - +/* + * Assumes the sched_domain tree is fully constructed + */ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) { struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); @@ -7100,24 +6981,24 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) if (child) cpu = cpumask_first(sched_domain_span(child)); - if (sg) { + if (sg) *sg = *per_cpu_ptr(sdd->sg, cpu); - (*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu); - atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */ - } return cpu; } /* + * build_sched_groups takes the cpumask we wish to span, and a pointer + * to a function which identifies what group(along with sched group) a CPU + * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids + * (due to the fact that we keep track of groups covered with a struct cpumask). + * * build_sched_groups will build a circular linked list of the groups * covered by the given span, and will set each group's ->cpumask correctly, * and ->cpu_power to 0. - * - * Assumes the sched_domain tree is fully constructed */ -static int -build_sched_groups(struct sched_domain *sd, int cpu) +static void +build_sched_groups(struct sched_domain *sd) { struct sched_group *first = NULL, *last = NULL; struct sd_data *sdd = sd->private; @@ -7125,12 +7006,6 @@ build_sched_groups(struct sched_domain *sd, int cpu) struct cpumask *covered; int i; - get_group(cpu, sdd, &sd->groups); - atomic_inc(&sd->groups->ref); - - if (cpu != cpumask_first(sched_domain_span(sd))) - return 0; - lockdep_assert_held(&sched_domains_mutex); covered = sched_domains_tmpmask; @@ -7145,7 +7020,7 @@ build_sched_groups(struct sched_domain *sd, int cpu) continue; cpumask_clear(sched_group_cpus(sg)); - sg->sgp->power = 0; + sg->cpu_power = 0; for_each_cpu(j, span) { if (get_group(j, sdd, NULL) != group) @@ -7162,8 +7037,6 @@ build_sched_groups(struct sched_domain *sd, int cpu) last = sg; } last->next = first; - - return 0; } /* @@ -7178,18 +7051,13 @@ build_sched_groups(struct sched_domain *sd, int cpu) */ static void init_sched_groups_power(int cpu, struct sched_domain *sd) { - struct sched_group *sg = sd->groups; + WARN_ON(!sd || !sd->groups); - WARN_ON(!sd || !sg); - - do { - sg->group_weight = cpumask_weight(sched_group_cpus(sg)); - sg = sg->next; - } while (sg != sd->groups); - - if (cpu != group_first_cpu(sg)) + if (cpu != group_first_cpu(sd->groups)) return; + sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); + update_group_power(sd, cpu); } @@ -7309,15 +7177,15 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, static void claim_allocations(int cpu, struct sched_domain *sd) { struct sd_data *sdd = sd->private; + struct sched_group *sg = sd->groups; WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd); *per_cpu_ptr(sdd->sd, cpu) = NULL; - if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref)) + if (cpu == cpumask_first(sched_group_cpus(sg))) { + WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg); *per_cpu_ptr(sdd->sg, cpu) = NULL; - - if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref)) - *per_cpu_ptr(sdd->sgp, cpu) = NULL; + } } #ifdef CONFIG_SCHED_SMT @@ -7342,7 +7210,7 @@ static struct sched_domain_topology_level default_topology[] = { #endif { sd_init_CPU, cpu_cpu_mask, }, #ifdef CONFIG_NUMA - { sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, }, + { sd_init_NODE, cpu_node_mask, }, { sd_init_ALLNODES, cpu_allnodes_mask, }, #endif { NULL, }, @@ -7366,14 +7234,9 @@ static int __sdt_alloc(const struct cpumask *cpu_map) if (!sdd->sg) return -ENOMEM; - sdd->sgp = alloc_percpu(struct sched_group_power *); - if (!sdd->sgp) - return -ENOMEM; - for_each_cpu(j, cpu_map) { struct sched_domain *sd; struct sched_group *sg; - struct sched_group_power *sgp; sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), GFP_KERNEL, cpu_to_node(j)); @@ -7388,13 +7251,6 @@ static int __sdt_alloc(const struct cpumask *cpu_map) return -ENOMEM; *per_cpu_ptr(sdd->sg, j) = sg; - - sgp = kzalloc_node(sizeof(struct sched_group_power), - GFP_KERNEL, cpu_to_node(j)); - if (!sgp) - return -ENOMEM; - - *per_cpu_ptr(sdd->sgp, j) = sgp; } } @@ -7410,15 +7266,11 @@ static void __sdt_free(const struct cpumask *cpu_map) struct sd_data *sdd = &tl->data; for_each_cpu(j, cpu_map) { - struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); - if (sd && (sd->flags & SD_OVERLAP)) - free_sched_groups(sd->groups, 0); + kfree(*per_cpu_ptr(sdd->sd, j)); kfree(*per_cpu_ptr(sdd->sg, j)); - kfree(*per_cpu_ptr(sdd->sgp, j)); } free_percpu(sdd->sd); free_percpu(sdd->sg); - free_percpu(sdd->sgp); } } @@ -7464,13 +7316,8 @@ static int build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_topology_level *tl; sd = NULL; - for (tl = sched_domain_topology; tl->init; tl++) { + for (tl = sched_domain_topology; tl->init; tl++) sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i); - if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP)) - sd->flags |= SD_OVERLAP; - if (cpumask_equal(cpu_map, sched_domain_span(sd))) - break; - } while (sd->child) sd = sd->child; @@ -7482,13 +7329,13 @@ static int build_sched_domains(const struct cpumask *cpu_map, for_each_cpu(i, cpu_map) { for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { sd->span_weight = cpumask_weight(sched_domain_span(sd)); - if (sd->flags & SD_OVERLAP) { - if (build_overlap_sched_groups(sd, i)) - goto error; - } else { - if (build_sched_groups(sd, i)) - goto error; - } + get_group(i, sd->private, &sd->groups); + atomic_inc(&sd->groups->ref); + + if (i != cpumask_first(sched_domain_span(sd))) + continue; + + build_sched_groups(sd); } } diff --git a/trunk/kernel/sched_fair.c b/trunk/kernel/sched_fair.c index c768588e180b..433491c2dc8f 100644 --- a/trunk/kernel/sched_fair.c +++ b/trunk/kernel/sched_fair.c @@ -1585,7 +1585,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, } /* Adjust by relative CPU power of the group */ - avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power; + avg_load = (avg_load * SCHED_POWER_SCALE) / group->cpu_power; if (local_group) { this_load = avg_load; @@ -2631,7 +2631,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) power >>= SCHED_POWER_SHIFT; } - sdg->sgp->power_orig = power; + sdg->cpu_power_orig = power; if (sched_feat(ARCH_POWER)) power *= arch_scale_freq_power(sd, cpu); @@ -2647,7 +2647,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) power = 1; cpu_rq(cpu)->cpu_power = power; - sdg->sgp->power = power; + sdg->cpu_power = power; } static void update_group_power(struct sched_domain *sd, int cpu) @@ -2665,11 +2665,11 @@ static void update_group_power(struct sched_domain *sd, int cpu) group = child->groups; do { - power += group->sgp->power; + power += group->cpu_power; group = group->next; } while (group != child->groups); - sdg->sgp->power = power; + sdg->cpu_power = power; } /* @@ -2691,7 +2691,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) /* * If ~90% of the cpu_power is still there, we're good. */ - if (group->sgp->power * 32 > group->sgp->power_orig * 29) + if (group->cpu_power * 32 > group->cpu_power_orig * 29) return 1; return 0; @@ -2771,7 +2771,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, } /* Adjust by relative CPU power of the group */ - sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->sgp->power; + sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->cpu_power; /* * Consider the group unbalanced when the imbalance is larger @@ -2788,7 +2788,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) sgs->group_imb = 1; - sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power, + sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_POWER_SCALE); if (!sgs->group_capacity) sgs->group_capacity = fix_small_capacity(sd, group); @@ -2877,7 +2877,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, return; sds->total_load += sgs.group_load; - sds->total_pwr += sg->sgp->power; + sds->total_pwr += sg->cpu_power; /* * In case the child domain prefers tasks go to siblings @@ -2962,7 +2962,7 @@ static int check_asym_packing(struct sched_domain *sd, if (this_cpu > busiest_cpu) return 0; - *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->sgp->power, + *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power, SCHED_POWER_SCALE); return 1; } @@ -2993,7 +2993,7 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds, scaled_busy_load_per_task = sds->busiest_load_per_task * SCHED_POWER_SCALE; - scaled_busy_load_per_task /= sds->busiest->sgp->power; + scaled_busy_load_per_task /= sds->busiest->cpu_power; if (sds->max_load - sds->this_load + scaled_busy_load_per_task >= (scaled_busy_load_per_task * imbn)) { @@ -3007,28 +3007,28 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds, * moving them. */ - pwr_now += sds->busiest->sgp->power * + pwr_now += sds->busiest->cpu_power * min(sds->busiest_load_per_task, sds->max_load); - pwr_now += sds->this->sgp->power * + pwr_now += sds->this->cpu_power * min(sds->this_load_per_task, sds->this_load); pwr_now /= SCHED_POWER_SCALE; /* Amount of load we'd subtract */ tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / - sds->busiest->sgp->power; + sds->busiest->cpu_power; if (sds->max_load > tmp) - pwr_move += sds->busiest->sgp->power * + pwr_move += sds->busiest->cpu_power * min(sds->busiest_load_per_task, sds->max_load - tmp); /* Amount of load we'd add */ - if (sds->max_load * sds->busiest->sgp->power < + if (sds->max_load * sds->busiest->cpu_power < sds->busiest_load_per_task * SCHED_POWER_SCALE) - tmp = (sds->max_load * sds->busiest->sgp->power) / - sds->this->sgp->power; + tmp = (sds->max_load * sds->busiest->cpu_power) / + sds->this->cpu_power; else tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / - sds->this->sgp->power; - pwr_move += sds->this->sgp->power * + sds->this->cpu_power; + pwr_move += sds->this->cpu_power * min(sds->this_load_per_task, sds->this_load + tmp); pwr_move /= SCHED_POWER_SCALE; @@ -3074,7 +3074,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE); - load_above_capacity /= sds->busiest->sgp->power; + load_above_capacity /= sds->busiest->cpu_power; } /* @@ -3090,8 +3090,8 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, max_pull = min(sds->max_load - sds->avg_load, load_above_capacity); /* How much load to actually move to equalise the imbalance */ - *imbalance = min(max_pull * sds->busiest->sgp->power, - (sds->avg_load - sds->this_load) * sds->this->sgp->power) + *imbalance = min(max_pull * sds->busiest->cpu_power, + (sds->avg_load - sds->this_load) * sds->this->cpu_power) / SCHED_POWER_SCALE; /* diff --git a/trunk/kernel/sched_features.h b/trunk/kernel/sched_features.h index 1e7066d76c26..be40f7371ee1 100644 --- a/trunk/kernel/sched_features.h +++ b/trunk/kernel/sched_features.h @@ -70,5 +70,3 @@ SCHED_FEAT(NONIRQ_POWER, 1) * using the scheduler IPI. Reduces rq->lock contention/bounces. */ SCHED_FEAT(TTWU_QUEUE, 1) - -SCHED_FEAT(FORCE_SD_OVERLAP, 0) diff --git a/trunk/kernel/signal.c b/trunk/kernel/signal.c index 415d85d6f6c6..ff7678603328 100644 --- a/trunk/kernel/signal.c +++ b/trunk/kernel/signal.c @@ -1178,25 +1178,18 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, { struct sighand_struct *sighand; + rcu_read_lock(); for (;;) { - local_irq_save(*flags); - rcu_read_lock(); sighand = rcu_dereference(tsk->sighand); - if (unlikely(sighand == NULL)) { - rcu_read_unlock(); - local_irq_restore(*flags); + if (unlikely(sighand == NULL)) break; - } - spin_lock(&sighand->siglock); - if (likely(sighand == tsk->sighand)) { - rcu_read_unlock(); + spin_lock_irqsave(&sighand->siglock, *flags); + if (likely(sighand == tsk->sighand)) break; - } - spin_unlock(&sighand->siglock); - rcu_read_unlock(); - local_irq_restore(*flags); + spin_unlock_irqrestore(&sighand->siglock, *flags); } + rcu_read_unlock(); return sighand; } diff --git a/trunk/kernel/softirq.c b/trunk/kernel/softirq.c index fca82c32042b..40cf63ddd4b3 100644 --- a/trunk/kernel/softirq.c +++ b/trunk/kernel/softirq.c @@ -315,24 +315,16 @@ static inline void invoke_softirq(void) { if (!force_irqthreads) __do_softirq(); - else { - __local_bh_disable((unsigned long)__builtin_return_address(0), - SOFTIRQ_OFFSET); + else wakeup_softirqd(); - __local_bh_enable(SOFTIRQ_OFFSET); - } } #else static inline void invoke_softirq(void) { if (!force_irqthreads) do_softirq(); - else { - __local_bh_disable((unsigned long)__builtin_return_address(0), - SOFTIRQ_OFFSET); + else wakeup_softirqd(); - __local_bh_enable(SOFTIRQ_OFFSET); - } } #endif diff --git a/trunk/mm/slab.c b/trunk/mm/slab.c index 1e523ed47c61..d96e223de775 100644 --- a/trunk/mm/slab.c +++ b/trunk/mm/slab.c @@ -574,9 +574,7 @@ static struct arraycache_init initarray_generic = { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; /* internal cache of cache description objs */ -static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES]; static struct kmem_cache cache_cache = { - .nodelists = cache_cache_nodelists, .batchcount = 1, .limit = BOOT_CPUCACHE_ENTRIES, .shared = 1, @@ -1494,10 +1492,11 @@ void __init kmem_cache_init(void) cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; /* - * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids + * struct kmem_cache size depends on nr_node_ids, which + * can be less than MAX_NUMNODES. */ - cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) + - nr_node_ids * sizeof(struct kmem_list3 *); + cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) + + nr_node_ids * sizeof(struct kmem_list3 *); #if DEBUG cache_cache.obj_size = cache_cache.buffer_size; #endif @@ -2309,7 +2308,6 @@ kmem_cache_create (const char *name, size_t size, size_t align, if (!cachep) goto oops; - cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; #if DEBUG cachep->obj_size = size; @@ -3155,11 +3153,12 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, objp += obj_offset(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) cachep->ctor(objp); - if (ARCH_SLAB_MINALIGN && - ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) { +#if ARCH_SLAB_MINALIGN + if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", - objp, (int)ARCH_SLAB_MINALIGN); + objp, ARCH_SLAB_MINALIGN); } +#endif return objp; } #else diff --git a/trunk/mm/slob.c b/trunk/mm/slob.c index 0ae881831ae2..46e0aee33a23 100644 --- a/trunk/mm/slob.c +++ b/trunk/mm/slob.c @@ -482,8 +482,6 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node) int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); void *ret; - gfp &= gfp_allowed_mask; - lockdep_trace_alloc(gfp); if (size < PAGE_SIZE - align) { @@ -610,10 +608,6 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) { void *b; - flags &= gfp_allowed_mask; - - lockdep_trace_alloc(flags); - if (c->size < PAGE_SIZE) { b = slob_alloc(c->size, flags, c->align, node); trace_kmem_cache_alloc_node(_RET_IP_, b, c->size, diff --git a/trunk/mm/slub.c b/trunk/mm/slub.c index ba83f3fd0757..35f351f26193 100644 --- a/trunk/mm/slub.c +++ b/trunk/mm/slub.c @@ -27,7 +27,6 @@ #include #include #include -#include #include @@ -192,12 +191,8 @@ static LIST_HEAD(slab_caches); /* * Tracking user of a slab. */ -#define TRACK_ADDRS_COUNT 16 struct track { unsigned long addr; /* Called from address */ -#ifdef CONFIG_STACKTRACE - unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */ -#endif int cpu; /* Was running on cpu */ int pid; /* Pid context */ unsigned long when; /* When did the operation occur */ @@ -425,24 +420,6 @@ static void set_track(struct kmem_cache *s, void *object, struct track *p = get_track(s, object, alloc); if (addr) { -#ifdef CONFIG_STACKTRACE - struct stack_trace trace; - int i; - - trace.nr_entries = 0; - trace.max_entries = TRACK_ADDRS_COUNT; - trace.entries = p->addrs; - trace.skip = 3; - save_stack_trace(&trace); - - /* See rant in lockdep.c */ - if (trace.nr_entries != 0 && - trace.entries[trace.nr_entries - 1] == ULONG_MAX) - trace.nr_entries--; - - for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++) - p->addrs[i] = 0; -#endif p->addr = addr; p->cpu = smp_processor_id(); p->pid = current->pid; @@ -467,16 +444,6 @@ static void print_track(const char *s, struct track *t) printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n", s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid); -#ifdef CONFIG_STACKTRACE - { - int i; - for (i = 0; i < TRACK_ADDRS_COUNT; i++) - if (t->addrs[i]) - printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]); - else - break; - } -#endif } static void print_tracking(struct kmem_cache *s, void *object) @@ -590,10 +557,10 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) memset(p + s->objsize, val, s->inuse - s->objsize); } -static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes) +static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes) { while (bytes) { - if (*start != value) + if (*start != (u8)value) return start; start++; bytes--; @@ -601,38 +568,6 @@ static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes) return NULL; } -static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes) -{ - u64 value64; - unsigned int words, prefix; - - if (bytes <= 16) - return check_bytes8(start, value, bytes); - - value64 = value | value << 8 | value << 16 | value << 24; - value64 = value64 | value64 << 32; - prefix = 8 - ((unsigned long)start) % 8; - - if (prefix) { - u8 *r = check_bytes8(start, value, prefix); - if (r) - return r; - start += prefix; - bytes -= prefix; - } - - words = bytes / 8; - - while (words) { - if (*(u64 *)start != value64) - return check_bytes8(start, value, 8); - start += 8; - words--; - } - - return check_bytes8(start, value, bytes % 8); -} - static void restore_bytes(struct kmem_cache *s, char *message, u8 data, void *from, void *to) { @@ -2993,42 +2928,6 @@ size_t ksize(const void *object) } EXPORT_SYMBOL(ksize); -#ifdef CONFIG_SLUB_DEBUG -bool verify_mem_not_deleted(const void *x) -{ - struct page *page; - void *object = (void *)x; - unsigned long flags; - bool rv; - - if (unlikely(ZERO_OR_NULL_PTR(x))) - return false; - - local_irq_save(flags); - - page = virt_to_head_page(x); - if (unlikely(!PageSlab(page))) { - /* maybe it was from stack? */ - rv = true; - goto out_unlock; - } - - slab_lock(page); - if (on_freelist(page->slab, page, object)) { - object_err(page->slab, page, object, "Object is on free-list"); - rv = false; - } else { - rv = true; - } - slab_unlock(page); - -out_unlock: - local_irq_restore(flags); - return rv; -} -EXPORT_SYMBOL(verify_mem_not_deleted); -#endif - void kfree(const void *x) { struct page *page; diff --git a/trunk/mm/vmscan.c b/trunk/mm/vmscan.c index d036e59d302b..5ed24b94c5e6 100644 --- a/trunk/mm/vmscan.c +++ b/trunk/mm/vmscan.c @@ -2310,8 +2310,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages, for (i = 0; i <= classzone_idx; i++) present_pages += pgdat->node_zones[i].present_pages; - /* A special case here: if zone has no page, we think it's balanced */ - return balanced_pages >= (present_pages >> 2); + return balanced_pages > (present_pages >> 2); } /* is kswapd sleeping prematurely? */ diff --git a/trunk/net/ceph/ceph_fs.c b/trunk/net/ceph/ceph_fs.c index 41466ccb972a..a3a3a31d3c37 100644 --- a/trunk/net/ceph/ceph_fs.c +++ b/trunk/net/ceph/ceph_fs.c @@ -36,19 +36,16 @@ int ceph_flags_to_mode(int flags) if ((flags & O_DIRECTORY) == O_DIRECTORY) return CEPH_FILE_MODE_PIN; #endif + if ((flags & O_APPEND) == O_APPEND) + flags |= O_WRONLY; - switch (flags & O_ACCMODE) { - case O_WRONLY: + if ((flags & O_ACCMODE) == O_RDWR) + mode = CEPH_FILE_MODE_RDWR; + else if ((flags & O_ACCMODE) == O_WRONLY) mode = CEPH_FILE_MODE_WR; - break; - case O_RDONLY: + else mode = CEPH_FILE_MODE_RD; - break; - case O_RDWR: - case O_ACCMODE: /* this is what the VFS does */ - mode = CEPH_FILE_MODE_RDWR; - break; - } + #ifdef O_LAZY if (flags & O_LAZY) mode |= CEPH_FILE_MODE_LAZY;