From cd54c58d8b66c0d716d59e47d9b3fa49e07f9435 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 15 Nov 2010 03:04:51 +0000 Subject: [PATCH] --- yaml --- r: 229065 b: refs/heads/master c: 009ca3897ea8313b4ed4da964a2f31ecf5a0624d h: refs/heads/master i: 229063: 89b128c1aec6331a5b9c8dff86f1e2e6c65b1b53 v: v3 --- [refs] | 2 +- trunk/MAINTAINERS | 5 +- trunk/arch/arm/mach-dove/common.c | 4 +- .../arch/arm/mach-tegra/include/mach/sdhci.h | 29 - trunk/arch/microblaze/Kconfig.debug | 4 + trunk/arch/microblaze/Makefile | 2 +- trunk/arch/microblaze/configs/mmu_defconfig | 1 + trunk/arch/microblaze/include/asm/pvr.h | 185 +- trunk/arch/microblaze/kernel/cpu/cpuinfo.c | 1 - trunk/arch/microblaze/kernel/entry.S | 46 +- trunk/arch/microblaze/kernel/exceptions.c | 3 - .../microblaze/kernel/hw_exception_handler.S | 9 - trunk/arch/microblaze/kernel/prom.c | 4 +- trunk/arch/microblaze/kernel/vmlinux.lds.S | 16 +- trunk/arch/microblaze/lib/memmove.c | 2 +- trunk/arch/microblaze/lib/muldi3.S | 121 ++ trunk/arch/microblaze/lib/muldi3.c | 60 - trunk/arch/x86/include/asm/acpi.h | 11 +- trunk/arch/x86/include/asm/amd_nb.h | 13 +- trunk/arch/x86/include/asm/fixmap.h | 4 +- trunk/arch/x86/include/asm/gpio.h | 5 +- trunk/arch/x86/include/asm/kdebug.h | 1 + trunk/arch/x86/include/asm/mach_traps.h | 12 +- trunk/arch/x86/include/asm/nmi.h | 20 - trunk/arch/x86/include/asm/numa_64.h | 2 +- trunk/arch/x86/include/asm/perf_event_p4.h | 3 - trunk/arch/x86/kernel/amd_nb.c | 7 - trunk/arch/x86/kernel/aperture_64.c | 44 +- trunk/arch/x86/kernel/apic/apic.c | 2 +- trunk/arch/x86/kernel/apic/hw_nmi.c | 3 +- trunk/arch/x86/kernel/apic/x2apic_uv_x.c | 4 +- trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c | 5 +- trunk/arch/x86/kernel/cpu/perf_event.c | 3 +- trunk/arch/x86/kernel/cpu/perf_event_p4.c | 28 +- trunk/arch/x86/kernel/dumpstack.c | 6 + trunk/arch/x86/kernel/entry_64.S | 36 +- trunk/arch/x86/kernel/kgdb.c | 7 +- trunk/arch/x86/kernel/reboot.c | 5 +- trunk/arch/x86/kernel/smpboot.c | 4 +- trunk/arch/x86/kernel/traps.c | 102 +- trunk/arch/x86/kernel/tsc.c | 2 +- trunk/arch/x86/mm/amdtopology_64.c | 86 +- trunk/arch/x86/mm/numa_64.c | 157 +- trunk/arch/x86/mm/srat_64.c | 26 +- trunk/arch/x86/oprofile/nmi_int.c | 3 +- trunk/arch/x86/oprofile/nmi_timer_int.c | 2 +- trunk/arch/x86/pci/amd_bus.c | 33 - trunk/drivers/char/ipmi/ipmi_watchdog.c | 2 +- trunk/drivers/mfd/sh_mobile_sdhi.c | 6 - trunk/drivers/mmc/card/Kconfig | 1 - trunk/drivers/mmc/core/Kconfig | 11 - trunk/drivers/mmc/core/bus.c | 8 +- trunk/drivers/mmc/core/core.c | 206 +- trunk/drivers/mmc/core/core.h | 9 +- trunk/drivers/mmc/core/debugfs.c | 5 - trunk/drivers/mmc/core/host.c | 206 +- trunk/drivers/mmc/core/host.h | 21 - trunk/drivers/mmc/core/mmc.c | 91 +- trunk/drivers/mmc/core/mmc_ops.c | 101 - trunk/drivers/mmc/core/mmc_ops.h | 1 - trunk/drivers/mmc/core/sd.c | 16 +- trunk/drivers/mmc/core/sdio.c | 36 +- trunk/drivers/mmc/core/sdio_bus.c | 32 + trunk/drivers/mmc/host/Kconfig | 37 - trunk/drivers/mmc/host/Makefile | 3 - trunk/drivers/mmc/host/davinci_mmc.c | 80 +- trunk/drivers/mmc/host/dw_mmc.c | 1796 ----------------- trunk/drivers/mmc/host/dw_mmc.h | 168 -- trunk/drivers/mmc/host/mxcmmc.c | 53 +- trunk/drivers/mmc/host/sdhci-dove.c | 70 - trunk/drivers/mmc/host/sdhci-pci.c | 161 +- trunk/drivers/mmc/host/sdhci-pltfm.c | 6 - trunk/drivers/mmc/host/sdhci-pltfm.h | 2 - trunk/drivers/mmc/host/sdhci-s3c.c | 66 - trunk/drivers/mmc/host/sdhci-tegra.c | 257 --- trunk/drivers/mmc/host/sdhci.c | 45 +- trunk/drivers/mmc/host/sdhci.h | 3 +- trunk/drivers/mmc/host/tmio_mmc.c | 561 +---- trunk/drivers/mmc/host/tmio_mmc.h | 228 +++ trunk/drivers/rtc/class.c | 13 - trunk/drivers/rtc/interface.c | 574 +++--- trunk/drivers/rtc/rtc-dev.c | 104 + trunk/drivers/rtc/rtc-lib.c | 28 - trunk/drivers/watchdog/hpwdt.c | 2 +- trunk/fs/9p/acl.c | 2 +- trunk/fs/9p/xattr.c | 2 +- trunk/fs/ocfs2/Kconfig | 2 +- trunk/fs/ocfs2/alloc.c | 77 +- trunk/fs/ocfs2/alloc.h | 4 + trunk/fs/ocfs2/aops.c | 59 +- trunk/fs/ocfs2/cluster/heartbeat.c | 246 +-- trunk/fs/ocfs2/cluster/netdebug.c | 286 +-- trunk/fs/ocfs2/cluster/tcp.c | 145 +- trunk/fs/ocfs2/cluster/tcp_internal.h | 33 +- trunk/fs/ocfs2/dlm/dlmast.c | 76 +- trunk/fs/ocfs2/dlm/dlmcommon.h | 86 +- trunk/fs/ocfs2/dlm/dlmdebug.c | 200 +- trunk/fs/ocfs2/dlm/dlmdebug.h | 5 + trunk/fs/ocfs2/dlm/dlmdomain.c | 10 +- trunk/fs/ocfs2/dlm/dlmlock.c | 3 - trunk/fs/ocfs2/dlm/dlmthread.c | 132 +- trunk/fs/ocfs2/namei.c | 5 +- trunk/fs/ocfs2/ocfs2.h | 5 - trunk/fs/xfs/linux-2.6/sv.h | 59 + trunk/fs/xfs/linux-2.6/xfs_aops.c | 425 ++-- trunk/fs/xfs/linux-2.6/xfs_aops.h | 16 - trunk/fs/xfs/linux-2.6/xfs_buf.c | 235 +-- trunk/fs/xfs/linux-2.6/xfs_buf.h | 22 +- trunk/fs/xfs/linux-2.6/xfs_export.c | 12 +- trunk/fs/xfs/linux-2.6/xfs_linux.h | 1 + trunk/fs/xfs/linux-2.6/xfs_super.c | 22 +- trunk/fs/xfs/linux-2.6/xfs_sync.c | 92 +- trunk/fs/xfs/linux-2.6/xfs_trace.h | 59 +- trunk/fs/xfs/quota/xfs_dquot.c | 1 + trunk/fs/xfs/xfs_ag.h | 2 +- trunk/fs/xfs/xfs_alloc.c | 351 ++-- trunk/fs/xfs/xfs_attr_leaf.c | 4 +- trunk/fs/xfs/xfs_btree.c | 9 +- trunk/fs/xfs/xfs_buf_item.c | 32 +- trunk/fs/xfs/xfs_buf_item.h | 11 + trunk/fs/xfs/xfs_extfree_item.c | 97 +- trunk/fs/xfs/xfs_extfree_item.h | 11 +- trunk/fs/xfs/xfs_fsops.c | 1 - trunk/fs/xfs/xfs_iget.c | 79 +- trunk/fs/xfs/xfs_inode.c | 54 +- trunk/fs/xfs/xfs_inode.h | 15 +- trunk/fs/xfs/xfs_inode_item.c | 90 +- trunk/fs/xfs/xfs_iomap.c | 233 ++- trunk/fs/xfs/xfs_iomap.h | 27 +- trunk/fs/xfs/xfs_log.c | 739 ++++--- trunk/fs/xfs/xfs_log_cil.c | 17 +- trunk/fs/xfs/xfs_log_priv.h | 127 +- trunk/fs/xfs/xfs_log_recover.c | 620 +++--- trunk/fs/xfs/xfs_mount.c | 23 +- trunk/fs/xfs/xfs_mount.h | 14 - trunk/fs/xfs/xfs_trans.c | 79 +- trunk/fs/xfs/xfs_trans.h | 2 +- trunk/fs/xfs/xfs_trans_ail.c | 232 +-- trunk/fs/xfs/xfs_trans_extfree.c | 8 +- trunk/fs/xfs/xfs_trans_priv.h | 35 +- trunk/fs/xfs/xfs_vnodeops.c | 61 +- trunk/include/linux/dynamic_debug.h | 18 +- trunk/include/linux/mfd/tmio.h | 5 - trunk/include/linux/mmc/dw_mmc.h | 217 -- trunk/include/linux/mmc/host.h | 19 - trunk/include/linux/mmc/mmc.h | 2 - trunk/include/linux/mmc/sdhci.h | 6 - trunk/include/linux/pci_ids.h | 8 - trunk/include/linux/rtc.h | 51 +- trunk/include/linux/tracepoint.h | 4 +- trunk/include/trace/define_trace.h | 10 - trunk/include/trace/events/skb.h | 4 +- trunk/kernel/Makefile | 1 - trunk/kernel/exit.c | 14 +- trunk/kernel/perf_event.c | 82 +- trunk/kernel/trace/Makefile | 2 +- trunk/kernel/trace/trace.c | 6 +- trunk/lib/dynamic_debug.c | 9 +- trunk/tools/perf/Makefile | 2 +- trunk/tools/perf/builtin-record.c | 3 - trunk/tools/perf/builtin-sched.c | 5 +- trunk/tools/perf/builtin-stat.c | 5 +- trunk/tools/perf/builtin-test.c | 116 +- trunk/tools/perf/builtin-top.c | 2 - trunk/tools/perf/util/evsel.c | 87 +- trunk/tools/perf/util/evsel.h | 2 +- trunk/tools/perf/util/parse-events.c | 74 +- trunk/tools/perf/util/session.c | 2 +- 168 files changed, 3883 insertions(+), 8307 deletions(-) delete mode 100644 trunk/arch/arm/mach-tegra/include/mach/sdhci.h create mode 100644 trunk/arch/microblaze/lib/muldi3.S delete mode 100644 trunk/arch/microblaze/lib/muldi3.c delete mode 100644 trunk/drivers/mmc/host/dw_mmc.c delete mode 100644 trunk/drivers/mmc/host/dw_mmc.h delete mode 100644 trunk/drivers/mmc/host/sdhci-dove.c delete mode 100644 trunk/drivers/mmc/host/sdhci-tegra.c create mode 100644 trunk/drivers/mmc/host/tmio_mmc.h create mode 100644 trunk/fs/xfs/linux-2.6/sv.h delete mode 100644 trunk/include/linux/mmc/dw_mmc.h diff --git a/[refs] b/[refs] index 73f8edb244a1..d03925fe369c 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 7bc4a4ce68f8c6d064ea949446852e996526f692 +refs/heads/master: 009ca3897ea8313b4ed4da964a2f31ecf5a0624d diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index 42f991e5a85d..bb6c1ac85138 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -1785,8 +1785,7 @@ S: Maintained F: drivers/usb/atm/cxacru.c CONFIGFS -M: Joel Becker -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/configfs.git +M: Joel Becker S: Supported F: fs/configfs/ F: include/linux/configfs.h @@ -4550,7 +4549,7 @@ F: include/linux/oprofile.h ORACLE CLUSTER FILESYSTEM 2 (OCFS2) M: Mark Fasheh -M: Joel Becker +M: Joel Becker L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers) W: http://oss.oracle.com/projects/ocfs2/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2.git diff --git a/trunk/arch/arm/mach-dove/common.c b/trunk/arch/arm/mach-dove/common.c index fe627aba6da7..f7a12586a1f5 100644 --- a/trunk/arch/arm/mach-dove/common.c +++ b/trunk/arch/arm/mach-dove/common.c @@ -770,7 +770,7 @@ static struct resource dove_sdio0_resources[] = { }; static struct platform_device dove_sdio0 = { - .name = "sdhci-dove", + .name = "sdhci-mv", .id = 0, .dev = { .dma_mask = &sdio_dmamask, @@ -798,7 +798,7 @@ static struct resource dove_sdio1_resources[] = { }; static struct platform_device dove_sdio1 = { - .name = "sdhci-dove", + .name = "sdhci-mv", .id = 1, .dev = { .dma_mask = &sdio_dmamask, diff --git a/trunk/arch/arm/mach-tegra/include/mach/sdhci.h b/trunk/arch/arm/mach-tegra/include/mach/sdhci.h deleted file mode 100644 index 3ad086e859c3..000000000000 --- a/trunk/arch/arm/mach-tegra/include/mach/sdhci.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * include/asm-arm/arch-tegra/include/mach/sdhci.h - * - * Copyright (C) 2009 Palm, Inc. - * Author: Yvonne Yip - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ -#ifndef __ASM_ARM_ARCH_TEGRA_SDHCI_H -#define __ASM_ARM_ARCH_TEGRA_SDHCI_H - -#include - -struct tegra_sdhci_platform_data { - int cd_gpio; - int wp_gpio; - int power_gpio; - int is_8bit; -}; - -#endif diff --git a/trunk/arch/microblaze/Kconfig.debug b/trunk/arch/microblaze/Kconfig.debug index 012e377330cd..e66e25c4b0b2 100644 --- a/trunk/arch/microblaze/Kconfig.debug +++ b/trunk/arch/microblaze/Kconfig.debug @@ -23,4 +23,8 @@ config HEART_BEAT This option turns on/off heart beat kernel functionality. First GPIO node is taken. +config DEBUG_BOOTMEM + depends on DEBUG_KERNEL + bool "Debug BOOTMEM initialization" + endmenu diff --git a/trunk/arch/microblaze/Makefile b/trunk/arch/microblaze/Makefile index 6f432e6df9af..15f1f1d1840d 100644 --- a/trunk/arch/microblaze/Makefile +++ b/trunk/arch/microblaze/Makefile @@ -17,7 +17,7 @@ export CPU_VER CPU_MAJOR CPU_MINOR CPU_REV # The various CONFIG_XILINX cpu features options are integers 0/1/2... # rather than bools y/n -# Work out HW multipler support. This is tricky. +# Work out HW multipler support. This is icky. # 1. Spartan2 has no HW multiplers. # 2. MicroBlaze v3.x always uses them, except in Spartan 2 # 3. All other FPGa/CPU ver combos, we can trust the CONFIG_ settings diff --git a/trunk/arch/microblaze/configs/mmu_defconfig b/trunk/arch/microblaze/configs/mmu_defconfig index ab8fbe7ad90b..8b422b12ef78 100644 --- a/trunk/arch/microblaze/configs/mmu_defconfig +++ b/trunk/arch/microblaze/configs/mmu_defconfig @@ -66,4 +66,5 @@ CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_INFO=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_EARLY_PRINTK=y +CONFIG_DEBUG_BOOTMEM=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/trunk/arch/microblaze/include/asm/pvr.h b/trunk/arch/microblaze/include/asm/pvr.h index a10bec62e857..37db96a15b45 100644 --- a/trunk/arch/microblaze/include/asm/pvr.h +++ b/trunk/arch/microblaze/include/asm/pvr.h @@ -1,9 +1,9 @@ /* * Support for the MicroBlaze PVR (Processor Version Register) * - * Copyright (C) 2009 - 2011 Michal Simek + * Copyright (C) 2009 Michal Simek * Copyright (C) 2007 John Williams - * Copyright (C) 2007 - 2011 PetaLogix + * Copyright (C) 2007 - 2009 PetaLogix * * This file is subject to the terms and conditions of the GNU General * Public License. See the file COPYING in the main directory of this @@ -46,11 +46,11 @@ struct pvr_s { #define PVR2_I_LMB_MASK 0x10000000 #define PVR2_INTERRUPT_IS_EDGE_MASK 0x08000000 #define PVR2_EDGE_IS_POSITIVE_MASK 0x04000000 -#define PVR2_D_PLB_MASK 0x02000000 /* new */ -#define PVR2_I_PLB_MASK 0x01000000 /* new */ -#define PVR2_INTERCONNECT 0x00800000 /* new */ -#define PVR2_USE_EXTEND_FSL 0x00080000 /* new */ -#define PVR2_USE_FSL_EXC 0x00040000 /* new */ +#define PVR2_D_PLB_MASK 0x02000000 /* new */ +#define PVR2_I_PLB_MASK 0x01000000 /* new */ +#define PVR2_INTERCONNECT 0x00800000 /* new */ +#define PVR2_USE_EXTEND_FSL 0x00080000 /* new */ +#define PVR2_USE_FSL_EXC 0x00040000 /* new */ #define PVR2_USE_MSR_INSTR 0x00020000 #define PVR2_USE_PCMP_INSTR 0x00010000 #define PVR2_AREA_OPTIMISED 0x00008000 @@ -59,7 +59,7 @@ struct pvr_s { #define PVR2_USE_HW_MUL_MASK 0x00001000 #define PVR2_USE_FPU_MASK 0x00000800 #define PVR2_USE_MUL64_MASK 0x00000400 -#define PVR2_USE_FPU2_MASK 0x00000200 /* new */ +#define PVR2_USE_FPU2_MASK 0x00000200 /* new */ #define PVR2_USE_IPLBEXC 0x00000100 #define PVR2_USE_DPLBEXC 0x00000080 #define PVR2_OPCODE_0x0_ILL_MASK 0x00000040 @@ -122,103 +122,96 @@ struct pvr_s { /* PVR access macros */ -#define PVR_IS_FULL(_pvr) (_pvr.pvr[0] & PVR0_PVR_FULL_MASK) -#define PVR_USE_BARREL(_pvr) (_pvr.pvr[0] & PVR0_USE_BARREL_MASK) -#define PVR_USE_DIV(_pvr) (_pvr.pvr[0] & PVR0_USE_DIV_MASK) -#define PVR_USE_HW_MUL(_pvr) (_pvr.pvr[0] & PVR0_USE_HW_MUL_MASK) -#define PVR_USE_FPU(_pvr) (_pvr.pvr[0] & PVR0_USE_FPU_MASK) -#define PVR_USE_FPU2(_pvr) (_pvr.pvr[2] & PVR2_USE_FPU2_MASK) -#define PVR_USE_ICACHE(_pvr) (_pvr.pvr[0] & PVR0_USE_ICACHE_MASK) -#define PVR_USE_DCACHE(_pvr) (_pvr.pvr[0] & PVR0_USE_DCACHE_MASK) -#define PVR_VERSION(_pvr) ((_pvr.pvr[0] & PVR0_VERSION_MASK) >> 8) -#define PVR_USER1(_pvr) (_pvr.pvr[0] & PVR0_USER1_MASK) -#define PVR_USER2(_pvr) (_pvr.pvr[1] & PVR1_USER2_MASK) - -#define PVR_D_OPB(_pvr) (_pvr.pvr[2] & PVR2_D_OPB_MASK) -#define PVR_D_LMB(_pvr) (_pvr.pvr[2] & PVR2_D_LMB_MASK) -#define PVR_I_OPB(_pvr) (_pvr.pvr[2] & PVR2_I_OPB_MASK) -#define PVR_I_LMB(_pvr) (_pvr.pvr[2] & PVR2_I_LMB_MASK) -#define PVR_INTERRUPT_IS_EDGE(_pvr) \ - (_pvr.pvr[2] & PVR2_INTERRUPT_IS_EDGE_MASK) -#define PVR_EDGE_IS_POSITIVE(_pvr) \ - (_pvr.pvr[2] & PVR2_EDGE_IS_POSITIVE_MASK) -#define PVR_USE_MSR_INSTR(_pvr) (_pvr.pvr[2] & PVR2_USE_MSR_INSTR) -#define PVR_USE_PCMP_INSTR(_pvr) (_pvr.pvr[2] & PVR2_USE_PCMP_INSTR) -#define PVR_AREA_OPTIMISED(_pvr) (_pvr.pvr[2] & PVR2_AREA_OPTIMISED) -#define PVR_USE_MUL64(_pvr) (_pvr.pvr[2] & PVR2_USE_MUL64_MASK) -#define PVR_OPCODE_0x0_ILLEGAL(_pvr) \ - (_pvr.pvr[2] & PVR2_OPCODE_0x0_ILL_MASK) -#define PVR_UNALIGNED_EXCEPTION(_pvr) \ - (_pvr.pvr[2] & PVR2_UNALIGNED_EXC_MASK) -#define PVR_ILL_OPCODE_EXCEPTION(_pvr) \ - (_pvr.pvr[2] & PVR2_ILL_OPCODE_EXC_MASK) -#define PVR_IOPB_BUS_EXCEPTION(_pvr) \ - (_pvr.pvr[2] & PVR2_IOPB_BUS_EXC_MASK) -#define PVR_DOPB_BUS_EXCEPTION(_pvr) \ - (_pvr.pvr[2] & PVR2_DOPB_BUS_EXC_MASK) -#define PVR_DIV_ZERO_EXCEPTION(_pvr) \ - (_pvr.pvr[2] & PVR2_DIV_ZERO_EXC_MASK) -#define PVR_FPU_EXCEPTION(_pvr) (_pvr.pvr[2] & PVR2_FPU_EXC_MASK) -#define PVR_FSL_EXCEPTION(_pvr) (_pvr.pvr[2] & PVR2_USE_EXTEND_FSL) - -#define PVR_DEBUG_ENABLED(_pvr) (_pvr.pvr[3] & PVR3_DEBUG_ENABLED_MASK) -#define PVR_NUMBER_OF_PC_BRK(_pvr) \ - ((_pvr.pvr[3] & PVR3_NUMBER_OF_PC_BRK_MASK) >> 25) -#define PVR_NUMBER_OF_RD_ADDR_BRK(_pvr) \ - ((_pvr.pvr[3] & PVR3_NUMBER_OF_RD_ADDR_BRK_MASK) >> 19) -#define PVR_NUMBER_OF_WR_ADDR_BRK(_pvr) \ - ((_pvr.pvr[3] & PVR3_NUMBER_OF_WR_ADDR_BRK_MASK) >> 13) -#define PVR_FSL_LINKS(_pvr) ((_pvr.pvr[3] & PVR3_FSL_LINKS_MASK) >> 7) - -#define PVR_ICACHE_ADDR_TAG_BITS(_pvr) \ - ((_pvr.pvr[4] & PVR4_ICACHE_ADDR_TAG_BITS_MASK) >> 26) -#define PVR_ICACHE_USE_FSL(_pvr) \ - (_pvr.pvr[4] & PVR4_ICACHE_USE_FSL_MASK) -#define PVR_ICACHE_ALLOW_WR(_pvr) \ - (_pvr.pvr[4] & PVR4_ICACHE_ALLOW_WR_MASK) -#define PVR_ICACHE_LINE_LEN(_pvr) \ - (1 << ((_pvr.pvr[4] & PVR4_ICACHE_LINE_LEN_MASK) >> 21)) -#define PVR_ICACHE_BYTE_SIZE(_pvr) \ - (1 << ((_pvr.pvr[4] & PVR4_ICACHE_BYTE_SIZE_MASK) >> 16)) - -#define PVR_DCACHE_ADDR_TAG_BITS(_pvr) \ - ((_pvr.pvr[5] & PVR5_DCACHE_ADDR_TAG_BITS_MASK) >> 26) -#define PVR_DCACHE_USE_FSL(_pvr) (_pvr.pvr[5] & PVR5_DCACHE_USE_FSL_MASK) -#define PVR_DCACHE_ALLOW_WR(_pvr) \ - (_pvr.pvr[5] & PVR5_DCACHE_ALLOW_WR_MASK) +#define PVR_IS_FULL(pvr) (pvr.pvr[0] & PVR0_PVR_FULL_MASK) +#define PVR_USE_BARREL(pvr) (pvr.pvr[0] & PVR0_USE_BARREL_MASK) +#define PVR_USE_DIV(pvr) (pvr.pvr[0] & PVR0_USE_DIV_MASK) +#define PVR_USE_HW_MUL(pvr) (pvr.pvr[0] & PVR0_USE_HW_MUL_MASK) +#define PVR_USE_FPU(pvr) (pvr.pvr[0] & PVR0_USE_FPU_MASK) +#define PVR_USE_FPU2(pvr) (pvr.pvr[2] & PVR2_USE_FPU2_MASK) +#define PVR_USE_ICACHE(pvr) (pvr.pvr[0] & PVR0_USE_ICACHE_MASK) +#define PVR_USE_DCACHE(pvr) (pvr.pvr[0] & PVR0_USE_DCACHE_MASK) +#define PVR_VERSION(pvr) ((pvr.pvr[0] & PVR0_VERSION_MASK) >> 8) +#define PVR_USER1(pvr) (pvr.pvr[0] & PVR0_USER1_MASK) +#define PVR_USER2(pvr) (pvr.pvr[1] & PVR1_USER2_MASK) + +#define PVR_D_OPB(pvr) (pvr.pvr[2] & PVR2_D_OPB_MASK) +#define PVR_D_LMB(pvr) (pvr.pvr[2] & PVR2_D_LMB_MASK) +#define PVR_I_OPB(pvr) (pvr.pvr[2] & PVR2_I_OPB_MASK) +#define PVR_I_LMB(pvr) (pvr.pvr[2] & PVR2_I_LMB_MASK) +#define PVR_INTERRUPT_IS_EDGE(pvr) \ + (pvr.pvr[2] & PVR2_INTERRUPT_IS_EDGE_MASK) +#define PVR_EDGE_IS_POSITIVE(pvr) \ + (pvr.pvr[2] & PVR2_EDGE_IS_POSITIVE_MASK) +#define PVR_USE_MSR_INSTR(pvr) (pvr.pvr[2] & PVR2_USE_MSR_INSTR) +#define PVR_USE_PCMP_INSTR(pvr) (pvr.pvr[2] & PVR2_USE_PCMP_INSTR) +#define PVR_AREA_OPTIMISED(pvr) (pvr.pvr[2] & PVR2_AREA_OPTIMISED) +#define PVR_USE_MUL64(pvr) (pvr.pvr[2] & PVR2_USE_MUL64_MASK) +#define PVR_OPCODE_0x0_ILLEGAL(pvr) \ + (pvr.pvr[2] & PVR2_OPCODE_0x0_ILL_MASK) +#define PVR_UNALIGNED_EXCEPTION(pvr) \ + (pvr.pvr[2] & PVR2_UNALIGNED_EXC_MASK) +#define PVR_ILL_OPCODE_EXCEPTION(pvr) \ + (pvr.pvr[2] & PVR2_ILL_OPCODE_EXC_MASK) +#define PVR_IOPB_BUS_EXCEPTION(pvr) \ + (pvr.pvr[2] & PVR2_IOPB_BUS_EXC_MASK) +#define PVR_DOPB_BUS_EXCEPTION(pvr) \ + (pvr.pvr[2] & PVR2_DOPB_BUS_EXC_MASK) +#define PVR_DIV_ZERO_EXCEPTION(pvr) \ + (pvr.pvr[2] & PVR2_DIV_ZERO_EXC_MASK) +#define PVR_FPU_EXCEPTION(pvr) (pvr.pvr[2] & PVR2_FPU_EXC_MASK) +#define PVR_FSL_EXCEPTION(pvr) (pvr.pvr[2] & PVR2_USE_EXTEND_FSL) + +#define PVR_DEBUG_ENABLED(pvr) (pvr.pvr[3] & PVR3_DEBUG_ENABLED_MASK) +#define PVR_NUMBER_OF_PC_BRK(pvr) \ + ((pvr.pvr[3] & PVR3_NUMBER_OF_PC_BRK_MASK) >> 25) +#define PVR_NUMBER_OF_RD_ADDR_BRK(pvr) \ + ((pvr.pvr[3] & PVR3_NUMBER_OF_RD_ADDR_BRK_MASK) >> 19) +#define PVR_NUMBER_OF_WR_ADDR_BRK(pvr) \ + ((pvr.pvr[3] & PVR3_NUMBER_OF_WR_ADDR_BRK_MASK) >> 13) +#define PVR_FSL_LINKS(pvr) ((pvr.pvr[3] & PVR3_FSL_LINKS_MASK) >> 7) + +#define PVR_ICACHE_ADDR_TAG_BITS(pvr) \ + ((pvr.pvr[4] & PVR4_ICACHE_ADDR_TAG_BITS_MASK) >> 26) +#define PVR_ICACHE_USE_FSL(pvr) (pvr.pvr[4] & PVR4_ICACHE_USE_FSL_MASK) +#define PVR_ICACHE_ALLOW_WR(pvr) (pvr.pvr[4] & PVR4_ICACHE_ALLOW_WR_MASK) +#define PVR_ICACHE_LINE_LEN(pvr) \ + (1 << ((pvr.pvr[4] & PVR4_ICACHE_LINE_LEN_MASK) >> 21)) +#define PVR_ICACHE_BYTE_SIZE(pvr) \ + (1 << ((pvr.pvr[4] & PVR4_ICACHE_BYTE_SIZE_MASK) >> 16)) + +#define PVR_DCACHE_ADDR_TAG_BITS(pvr) \ + ((pvr.pvr[5] & PVR5_DCACHE_ADDR_TAG_BITS_MASK) >> 26) +#define PVR_DCACHE_USE_FSL(pvr) (pvr.pvr[5] & PVR5_DCACHE_USE_FSL_MASK) +#define PVR_DCACHE_ALLOW_WR(pvr) (pvr.pvr[5] & PVR5_DCACHE_ALLOW_WR_MASK) /* FIXME two shifts on one line needs any comment */ -#define PVR_DCACHE_LINE_LEN(_pvr) \ - (1 << ((_pvr.pvr[5] & PVR5_DCACHE_LINE_LEN_MASK) >> 21)) -#define PVR_DCACHE_BYTE_SIZE(_pvr) \ - (1 << ((_pvr.pvr[5] & PVR5_DCACHE_BYTE_SIZE_MASK) >> 16)) +#define PVR_DCACHE_LINE_LEN(pvr) \ + (1 << ((pvr.pvr[5] & PVR5_DCACHE_LINE_LEN_MASK) >> 21)) +#define PVR_DCACHE_BYTE_SIZE(pvr) \ + (1 << ((pvr.pvr[5] & PVR5_DCACHE_BYTE_SIZE_MASK) >> 16)) -#define PVR_DCACHE_USE_WRITEBACK(_pvr) \ - ((_pvr.pvr[5] & PVR5_DCACHE_USE_WRITEBACK) >> 14) +#define PVR_DCACHE_USE_WRITEBACK(pvr) \ + ((pvr.pvr[5] & PVR5_DCACHE_USE_WRITEBACK) >> 14) -#define PVR_ICACHE_BASEADDR(_pvr) \ - (_pvr.pvr[6] & PVR6_ICACHE_BASEADDR_MASK) -#define PVR_ICACHE_HIGHADDR(_pvr) \ - (_pvr.pvr[7] & PVR7_ICACHE_HIGHADDR_MASK) -#define PVR_DCACHE_BASEADDR(_pvr) \ - (_pvr.pvr[8] & PVR8_DCACHE_BASEADDR_MASK) -#define PVR_DCACHE_HIGHADDR(_pvr) \ - (_pvr.pvr[9] & PVR9_DCACHE_HIGHADDR_MASK) +#define PVR_ICACHE_BASEADDR(pvr) (pvr.pvr[6] & PVR6_ICACHE_BASEADDR_MASK) +#define PVR_ICACHE_HIGHADDR(pvr) (pvr.pvr[7] & PVR7_ICACHE_HIGHADDR_MASK) -#define PVR_TARGET_FAMILY(_pvr) \ - ((_pvr.pvr[10] & PVR10_TARGET_FAMILY_MASK) >> 24) +#define PVR_DCACHE_BASEADDR(pvr) (pvr.pvr[8] & PVR8_DCACHE_BASEADDR_MASK) +#define PVR_DCACHE_HIGHADDR(pvr) (pvr.pvr[9] & PVR9_DCACHE_HIGHADDR_MASK) -#define PVR_MSR_RESET_VALUE(_pvr) \ - (_pvr.pvr[11] & PVR11_MSR_RESET_VALUE_MASK) +#define PVR_TARGET_FAMILY(pvr) ((pvr.pvr[10] & PVR10_TARGET_FAMILY_MASK) >> 24) + +#define PVR_MSR_RESET_VALUE(pvr) \ + (pvr.pvr[11] & PVR11_MSR_RESET_VALUE_MASK) /* mmu */ -#define PVR_USE_MMU(_pvr) ((_pvr.pvr[11] & PVR11_USE_MMU) >> 30) -#define PVR_MMU_ITLB_SIZE(_pvr) (_pvr.pvr[11] & PVR11_MMU_ITLB_SIZE) -#define PVR_MMU_DTLB_SIZE(_pvr) (_pvr.pvr[11] & PVR11_MMU_DTLB_SIZE) -#define PVR_MMU_TLB_ACCESS(_pvr) (_pvr.pvr[11] & PVR11_MMU_TLB_ACCESS) -#define PVR_MMU_ZONES(_pvr) (_pvr.pvr[11] & PVR11_MMU_ZONES) +#define PVR_USE_MMU(pvr) ((pvr.pvr[11] & PVR11_USE_MMU) >> 30) +#define PVR_MMU_ITLB_SIZE(pvr) (pvr.pvr[11] & PVR11_MMU_ITLB_SIZE) +#define PVR_MMU_DTLB_SIZE(pvr) (pvr.pvr[11] & PVR11_MMU_DTLB_SIZE) +#define PVR_MMU_TLB_ACCESS(pvr) (pvr.pvr[11] & PVR11_MMU_TLB_ACCESS) +#define PVR_MMU_ZONES(pvr) (pvr.pvr[11] & PVR11_MMU_ZONES) /* endian */ -#define PVR_ENDIAN(_pvr) (_pvr.pvr[0] & PVR0_ENDI) +#define PVR_ENDIAN(pvr) (pvr.pvr[0] & PVR0_ENDI) int cpu_has_pvr(void); void get_pvr(struct pvr_s *pvr); diff --git a/trunk/arch/microblaze/kernel/cpu/cpuinfo.c b/trunk/arch/microblaze/kernel/cpu/cpuinfo.c index 2c309fccf230..87c79fa275c3 100644 --- a/trunk/arch/microblaze/kernel/cpu/cpuinfo.c +++ b/trunk/arch/microblaze/kernel/cpu/cpuinfo.c @@ -32,7 +32,6 @@ const struct cpu_ver_key cpu_ver_lookup[] = { {"7.30.a", 0x10}, {"7.30.b", 0x11}, {"8.00.a", 0x12}, - {"8.00.b", 0x13}, {NULL, 0}, }; diff --git a/trunk/arch/microblaze/kernel/entry.S b/trunk/arch/microblaze/kernel/entry.S index 41c30cdb2704..819238b8a429 100644 --- a/trunk/arch/microblaze/kernel/entry.S +++ b/trunk/arch/microblaze/kernel/entry.S @@ -287,44 +287,25 @@ * are masked. This is nice, means we don't have to CLI before state save */ C_ENTRY(_user_exception): - swi r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */ addi r14, r14, 4 /* return address is 4 byte after call */ + swi r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */ - mfs r1, rmsr - nop - andi r1, r1, MSR_UMS - bnei r1, 1f - -/* Kernel-mode state save - kernel execve */ - lwi r1, r0, TOPHYS(PER_CPU(ENTRY_SP)); /* Reload kernel stack-ptr*/ - tophys(r1,r1); - - addik r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack. */ - SAVE_REGS - - swi r1, r1, PTO + PT_MODE; /* pt_regs -> kernel mode */ - brid 2f; - nop; /* Fill delay slot */ - -/* User-mode state save. */ -1: lwi r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */ tophys(r1,r1); lwi r1, r1, TS_THREAD_INFO; /* get stack from task_struct */ -/* calculate kernel stack pointer from task struct 8k */ - addik r1, r1, THREAD_SIZE; - tophys(r1,r1); - - addik r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack. */ + /* MS these three instructions can be added to one */ + /* addik r1, r1, THREAD_SIZE; */ + /* tophys(r1,r1); */ + /* addik r1, r1, -STATE_SAVE_SIZE; */ + addik r1, r1, THREAD_SIZE + CONFIG_KERNEL_BASE_ADDR - CONFIG_KERNEL_START - STATE_SAVE_SIZE; SAVE_REGS swi r0, r1, PTO + PT_R3 swi r0, r1, PTO + PT_R4 - swi r0, r1, PTO + PT_MODE; /* Was in user-mode. */ lwi r11, r0, TOPHYS(PER_CPU(ENTRY_SP)); swi r11, r1, PTO+PT_R1; /* Store user SP. */ clear_ums; -2: lwi CURRENT_TASK, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); + lwi CURRENT_TASK, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* Save away the syscall number. */ swi r12, r1, PTO+PT_R0; tovirt(r1,r1) @@ -394,9 +375,6 @@ C_ENTRY(ret_from_trap): swi r3, r1, PTO + PT_R3 swi r4, r1, PTO + PT_R4 - lwi r11, r1, PTO + PT_MODE; -/* See if returning to kernel mode, if so, skip resched &c. */ - bnei r11, 2f; /* We're returning to user mode, so check for various conditions that * trigger rescheduling. */ /* FIXME: Restructure all these flag checks. */ @@ -439,16 +417,6 @@ C_ENTRY(ret_from_trap): RESTORE_REGS; addik r1, r1, STATE_SAVE_SIZE /* Clean up stack space. */ lwi r1, r1, PT_R1 - PT_SIZE;/* Restore user stack pointer. */ - bri 6f; - -/* Return to kernel state. */ -2: set_bip; /* Ints masked for state restore */ - VM_OFF; - tophys(r1,r1); - RESTORE_REGS; - addik r1, r1, STATE_SAVE_SIZE /* Clean up stack space. */ - tovirt(r1,r1); -6: TRAP_return: /* Make global symbol for debugging */ rtbd r14, 0; /* Instructions to return from an IRQ */ nop; diff --git a/trunk/arch/microblaze/kernel/exceptions.c b/trunk/arch/microblaze/kernel/exceptions.c index a7fa6ae76d89..478f2943ede7 100644 --- a/trunk/arch/microblaze/kernel/exceptions.c +++ b/trunk/arch/microblaze/kernel/exceptions.c @@ -25,7 +25,6 @@ #include #include #include -#include #define MICROBLAZE_ILL_OPCODE_EXCEPTION 0x02 #define MICROBLAZE_IBUS_EXCEPTION 0x03 @@ -53,8 +52,6 @@ void die(const char *str, struct pt_regs *fp, long err) void sw_exception(struct pt_regs *regs) { _exception(SIGTRAP, regs, TRAP_BRKPT, regs->r16); - flush_dcache_range(regs->r16, regs->r16 + 0x4); - flush_icache_range(regs->r16, regs->r16 + 0x4); } void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) diff --git a/trunk/arch/microblaze/kernel/hw_exception_handler.S b/trunk/arch/microblaze/kernel/hw_exception_handler.S index 25f6e07d8de8..781195438ee6 100644 --- a/trunk/arch/microblaze/kernel/hw_exception_handler.S +++ b/trunk/arch/microblaze/kernel/hw_exception_handler.S @@ -945,20 +945,11 @@ store3: sbi r3, r4, 2; store4: sbi r3, r4, 3; /* Delay slot */ ex_shw_vm: /* Store the lower half-word, byte-by-byte into destination address */ -#ifdef __MICROBLAZEEL__ - lbui r3, r5, 0; -store5: sbi r3, r4, 0; - lbui r3, r5, 1; - brid ret_from_exc; -store6: sbi r3, r4, 1; /* Delay slot */ -#else lbui r3, r5, 2; store5: sbi r3, r4, 0; lbui r3, r5, 3; brid ret_from_exc; store6: sbi r3, r4, 1; /* Delay slot */ -#endif - ex_sw_end_vm: /* Exception handling of store word, ends. */ /* We have to prevent cases that get/put_user macros get unaligned pointer diff --git a/trunk/arch/microblaze/kernel/prom.c b/trunk/arch/microblaze/kernel/prom.c index c881393f07fd..a105301e2b7f 100644 --- a/trunk/arch/microblaze/kernel/prom.c +++ b/trunk/arch/microblaze/kernel/prom.c @@ -61,12 +61,14 @@ static int __init early_init_dt_scan_serial(unsigned long node, char *p; int *addr; - pr_debug("search \"serial\", depth: %d, uname: %s\n", depth, uname); + pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname); /* find all serial nodes */ if (strncmp(uname, "serial", 6) != 0) return 0; + early_init_dt_check_for_initrd(node); + /* find compatible node with uartlite */ p = of_get_flat_dt_prop(node, "compatible", &l); if ((strncmp(p, "xlnx,xps-uartlite", 17) != 0) && diff --git a/trunk/arch/microblaze/kernel/vmlinux.lds.S b/trunk/arch/microblaze/kernel/vmlinux.lds.S index 3451bdec9f05..96a88c31fe48 100644 --- a/trunk/arch/microblaze/kernel/vmlinux.lds.S +++ b/trunk/arch/microblaze/kernel/vmlinux.lds.S @@ -123,10 +123,20 @@ SECTIONS { __init_end_before_initramfs = .; - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - INIT_RAM_FS + .init.ramfs ALIGN(PAGE_SIZE) : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + . = ALIGN(4); + LONG(0); +/* + * FIXME this can break initramfs for MMU. + * Pad init.ramfs up to page boundary, + * so that __init_end == __bss_start. This will make image.elf + * consistent with the image.bin + */ + /* . = ALIGN(PAGE_SIZE); */ } - __init_end = .; .bss ALIGN (PAGE_SIZE) : AT(ADDR(.bss) - LOAD_OFFSET) { diff --git a/trunk/arch/microblaze/lib/memmove.c b/trunk/arch/microblaze/lib/memmove.c index 810fd68775e3..123e3616f2dd 100644 --- a/trunk/arch/microblaze/lib/memmove.c +++ b/trunk/arch/microblaze/lib/memmove.c @@ -182,7 +182,7 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c) for (; c >= 4; c -= 4) { value = *--i_src; *--i_dst = buf_hold | ((value & 0xFF000000)>> 24); - buf_hold = (value & 0xFFFFFF) << 8; + buf_hold = (value & 0xFFFFFF) << 8;; } #endif /* Realign the source */ diff --git a/trunk/arch/microblaze/lib/muldi3.S b/trunk/arch/microblaze/lib/muldi3.S new file mode 100644 index 000000000000..ceeaa8c407f2 --- /dev/null +++ b/trunk/arch/microblaze/lib/muldi3.S @@ -0,0 +1,121 @@ +#include + +/* + * Multiply operation for 64 bit integers, for devices with hard multiply + * Input : Operand1[H] in Reg r5 + * Operand1[L] in Reg r6 + * Operand2[H] in Reg r7 + * Operand2[L] in Reg r8 + * Output: Result[H] in Reg r3 + * Result[L] in Reg r4 + * + * Explaination: + * + * Both the input numbers are divided into 16 bit number as follows + * op1 = A B C D + * op2 = E F G H + * result = D * H + * + (C * H + D * G) << 16 + * + (B * H + C * G + D * F) << 32 + * + (A * H + B * G + C * F + D * E) << 48 + * + * Only 64 bits of the output are considered + */ + + .text + .globl __muldi3 + .type __muldi3, @function + .ent __muldi3 + +__muldi3: + addi r1, r1, -40 + +/* Save the input operands on the caller's stack */ + swi r5, r1, 44 + swi r6, r1, 48 + swi r7, r1, 52 + swi r8, r1, 56 + +/* Store all the callee saved registers */ + sw r20, r1, r0 + swi r21, r1, 4 + swi r22, r1, 8 + swi r23, r1, 12 + swi r24, r1, 16 + swi r25, r1, 20 + swi r26, r1, 24 + swi r27, r1, 28 + +/* Load all the 16 bit values for A thru H */ + lhui r20, r1, 44 /* A */ + lhui r21, r1, 46 /* B */ + lhui r22, r1, 48 /* C */ + lhui r23, r1, 50 /* D */ + lhui r24, r1, 52 /* E */ + lhui r25, r1, 54 /* F */ + lhui r26, r1, 56 /* G */ + lhui r27, r1, 58 /* H */ + +/* D * H ==> LSB of the result on stack ==> Store1 */ + mul r9, r23, r27 + swi r9, r1, 36 /* Pos2 and Pos3 */ + +/* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */ +/* Store the carry generated in position 2 for Pos 3 */ + lhui r11, r1, 36 /* Pos2 */ + mul r9, r22, r27 /* C * H */ + mul r10, r23, r26 /* D * G */ + add r9, r9, r10 + addc r12, r0, r0 + add r9, r9, r11 + addc r12, r12, r0 /* Store the Carry */ + shi r9, r1, 36 /* Store Pos2 */ + swi r9, r1, 32 + lhui r11, r1, 32 + shi r11, r1, 34 /* Store Pos1 */ + +/* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */ + mul r9, r21, r27 /* B * H */ + mul r10, r22, r26 /* C * G */ + mul r7, r23, r25 /* D * F */ + add r9, r9, r11 + add r9, r9, r10 + add r9, r9, r7 + swi r9, r1, 32 /* Pos0 and Pos1 */ + +/* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */ + lhui r11, r1, 32 /* Pos0 */ + mul r9, r20, r27 /* A * H */ + mul r10, r21, r26 /* B * G */ + mul r7, r22, r25 /* C * F */ + mul r8, r23, r24 /* D * E */ + add r9, r9, r11 + add r9, r9, r10 + add r9, r9, r7 + add r9, r9, r8 + sext16 r9, r9 /* Sign extend the MSB */ + shi r9, r1, 32 + +/* Move results to r3 and r4 */ + lhui r3, r1, 32 + add r3, r3, r12 + shi r3, r1, 32 + lwi r3, r1, 32 /* Hi Part */ + lwi r4, r1, 36 /* Lo Part */ + +/* Restore Callee saved registers */ + lw r20, r1, r0 + lwi r21, r1, 4 + lwi r22, r1, 8 + lwi r23, r1, 12 + lwi r24, r1, 16 + lwi r25, r1, 20 + lwi r26, r1, 24 + lwi r27, r1, 28 + +/* Restore Frame and return */ + rtsd r15, 8 + addi r1, r1, 40 + +.size __muldi3, . - __muldi3 +.end __muldi3 diff --git a/trunk/arch/microblaze/lib/muldi3.c b/trunk/arch/microblaze/lib/muldi3.c deleted file mode 100644 index d4860e154d29..000000000000 --- a/trunk/arch/microblaze/lib/muldi3.c +++ /dev/null @@ -1,60 +0,0 @@ -#include - -#include "libgcc.h" - -#define DWtype long long -#define UWtype unsigned long -#define UHWtype unsigned short - -#define W_TYPE_SIZE 32 - -#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) -#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) -#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) - -/* If we still don't have umul_ppmm, define it using plain C. */ -#if !defined(umul_ppmm) -#define umul_ppmm(w1, w0, u, v) \ - do { \ - UWtype __x0, __x1, __x2, __x3; \ - UHWtype __ul, __vl, __uh, __vh; \ - \ - __ul = __ll_lowpart(u); \ - __uh = __ll_highpart(u); \ - __vl = __ll_lowpart(v); \ - __vh = __ll_highpart(v); \ - \ - __x0 = (UWtype) __ul * __vl; \ - __x1 = (UWtype) __ul * __vh; \ - __x2 = (UWtype) __uh * __vl; \ - __x3 = (UWtype) __uh * __vh; \ - \ - __x1 += __ll_highpart(__x0); /* this can't give carry */\ - __x1 += __x2; /* but this indeed can */ \ - if (__x1 < __x2) /* did we get it? */ \ - __x3 += __ll_B; /* yes, add it in the proper pos */ \ - \ - (w1) = __x3 + __ll_highpart(__x1); \ - (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\ - } while (0) -#endif - -#if !defined(__umulsidi3) -#define __umulsidi3(u, v) ({ \ - DWunion __w; \ - umul_ppmm(__w.s.high, __w.s.low, u, v); \ - __w.ll; \ - }) -#endif - -DWtype __muldi3(DWtype u, DWtype v) -{ - const DWunion uu = {.ll = u}; - const DWunion vv = {.ll = v}; - DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)}; - - w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high - + (UWtype) uu.s.high * (UWtype) vv.s.low); - - return w.ll; -} diff --git a/trunk/arch/x86/include/asm/acpi.h b/trunk/arch/x86/include/asm/acpi.h index 211ca3f7fd16..55d106b5e31b 100644 --- a/trunk/arch/x86/include/asm/acpi.h +++ b/trunk/arch/x86/include/asm/acpi.h @@ -185,16 +185,17 @@ struct bootnode; #ifdef CONFIG_ACPI_NUMA extern int acpi_numa; -extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start, - unsigned long end); +extern int acpi_get_nodes(struct bootnode *physnodes); extern int acpi_scan_nodes(unsigned long start, unsigned long end); #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) - -#ifdef CONFIG_NUMA_EMU extern void acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes); +#else +static inline void acpi_fake_nodes(const struct bootnode *fake_nodes, + int num_nodes) +{ +} #endif -#endif /* CONFIG_ACPI_NUMA */ #define acpi_unlazy_tlb(x) leave_mm(x) diff --git a/trunk/arch/x86/include/asm/amd_nb.h b/trunk/arch/x86/include/asm/amd_nb.h index 64dc82ee19f0..6aee50d655d1 100644 --- a/trunk/arch/x86/include/asm/amd_nb.h +++ b/trunk/arch/x86/include/asm/amd_nb.h @@ -3,27 +3,16 @@ #include -struct amd_nb_bus_dev_range { - u8 bus; - u8 dev_base; - u8 dev_limit; -}; - extern struct pci_device_id amd_nb_misc_ids[]; -extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; struct bootnode; extern int early_is_amd_nb(u32 value); extern int amd_cache_northbridges(void); extern void amd_flush_garts(void); +extern int amd_get_nodes(struct bootnode *nodes); extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn); extern int amd_scan_nodes(void); -#ifdef CONFIG_NUMA_EMU -extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes); -extern void amd_get_nodes(struct bootnode *nodes); -#endif - struct amd_northbridge { struct pci_dev *misc; }; diff --git a/trunk/arch/x86/include/asm/fixmap.h b/trunk/arch/x86/include/asm/fixmap.h index 4729b2b63117..0141b234406f 100644 --- a/trunk/arch/x86/include/asm/fixmap.h +++ b/trunk/arch/x86/include/asm/fixmap.h @@ -116,11 +116,11 @@ enum fixed_addresses { #endif FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ + __end_of_permanent_fixed_addresses, + #ifdef CONFIG_X86_MRST FIX_LNW_VRTC, #endif - __end_of_permanent_fixed_addresses, - /* * 256 temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. diff --git a/trunk/arch/x86/include/asm/gpio.h b/trunk/arch/x86/include/asm/gpio.h index 91d915a65259..49dbfdfa50f9 100644 --- a/trunk/arch/x86/include/asm/gpio.h +++ b/trunk/arch/x86/include/asm/gpio.h @@ -38,9 +38,12 @@ static inline int gpio_cansleep(unsigned int gpio) return __gpio_cansleep(gpio); } +/* + * Not implemented, yet. + */ static inline int gpio_to_irq(unsigned int gpio) { - return __gpio_to_irq(gpio); + return -ENOSYS; } static inline int irq_to_gpio(unsigned int irq) diff --git a/trunk/arch/x86/include/asm/kdebug.h b/trunk/arch/x86/include/asm/kdebug.h index ca242d35e873..f23eb2528464 100644 --- a/trunk/arch/x86/include/asm/kdebug.h +++ b/trunk/arch/x86/include/asm/kdebug.h @@ -18,6 +18,7 @@ enum die_val { DIE_TRAP, DIE_GPF, DIE_CALL, + DIE_NMI_IPI, DIE_PAGE_FAULT, DIE_NMIUNKNOWN, }; diff --git a/trunk/arch/x86/include/asm/mach_traps.h b/trunk/arch/x86/include/asm/mach_traps.h index 72a8b52e7dfd..f7920601e472 100644 --- a/trunk/arch/x86/include/asm/mach_traps.h +++ b/trunk/arch/x86/include/asm/mach_traps.h @@ -7,19 +7,9 @@ #include -#define NMI_REASON_PORT 0x61 - -#define NMI_REASON_SERR 0x80 -#define NMI_REASON_IOCHK 0x40 -#define NMI_REASON_MASK (NMI_REASON_SERR | NMI_REASON_IOCHK) - -#define NMI_REASON_CLEAR_SERR 0x04 -#define NMI_REASON_CLEAR_IOCHK 0x08 -#define NMI_REASON_CLEAR_MASK 0x0f - static inline unsigned char get_nmi_reason(void) { - return inb(NMI_REASON_PORT); + return inb(0x61); } static inline void reassert_nmi(void) diff --git a/trunk/arch/x86/include/asm/nmi.h b/trunk/arch/x86/include/asm/nmi.h index c76f5b92b840..c4021b953510 100644 --- a/trunk/arch/x86/include/asm/nmi.h +++ b/trunk/arch/x86/include/asm/nmi.h @@ -23,26 +23,6 @@ void arch_trigger_all_cpu_backtrace(void); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace #endif -/* - * Define some priorities for the nmi notifier call chain. - * - * Create a local nmi bit that has a higher priority than - * external nmis, because the local ones are more frequent. - * - * Also setup some default high/normal/low settings for - * subsystems to registers with. Using 4 bits to seperate - * the priorities. This can go alot higher if needed be. - */ - -#define NMI_LOCAL_SHIFT 16 /* randomly picked */ -#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT) -#define NMI_HIGH_PRIOR (1ULL << 8) -#define NMI_NORMAL_PRIOR (1ULL << 4) -#define NMI_LOW_PRIOR (1ULL << 0) -#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR) -#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR) -#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR) - void stop_nmi(void); void restart_nmi(void); diff --git a/trunk/arch/x86/include/asm/numa_64.h b/trunk/arch/x86/include/asm/numa_64.h index 5ae87285a502..823e070e7c26 100644 --- a/trunk/arch/x86/include/asm/numa_64.h +++ b/trunk/arch/x86/include/asm/numa_64.h @@ -38,7 +38,7 @@ extern void __cpuinit numa_add_cpu(int cpu); extern void __cpuinit numa_remove_cpu(int cpu); #ifdef CONFIG_NUMA_EMU -#define FAKE_NODE_MIN_SIZE ((u64)32 << 20) +#define FAKE_NODE_MIN_SIZE ((u64)64 << 20) #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) #endif /* CONFIG_NUMA_EMU */ #else diff --git a/trunk/arch/x86/include/asm/perf_event_p4.h b/trunk/arch/x86/include/asm/perf_event_p4.h index e2f6a99f14ab..295e2ff18a6a 100644 --- a/trunk/arch/x86/include/asm/perf_event_p4.h +++ b/trunk/arch/x86/include/asm/perf_event_p4.h @@ -20,9 +20,6 @@ #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) #define ARCH_P4_MAX_CCCR (18) -#define ARCH_P4_CNTRVAL_BITS (40) -#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1) - #define P4_ESCR_EVENT_MASK 0x7e000000U #define P4_ESCR_EVENT_SHIFT 25 #define P4_ESCR_EVENTMASK_MASK 0x01fffe00U diff --git a/trunk/arch/x86/kernel/amd_nb.c b/trunk/arch/x86/kernel/amd_nb.c index 0a99f7198bc3..affacb5e0065 100644 --- a/trunk/arch/x86/kernel/amd_nb.c +++ b/trunk/arch/x86/kernel/amd_nb.c @@ -20,13 +20,6 @@ struct pci_device_id amd_nb_misc_ids[] = { }; EXPORT_SYMBOL(amd_nb_misc_ids); -const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = { - { 0x00, 0x18, 0x20 }, - { 0xff, 0x00, 0x20 }, - { 0xfe, 0x00, 0x20 }, - { } -}; - struct amd_northbridge_info amd_northbridges; EXPORT_SYMBOL(amd_northbridges); diff --git a/trunk/arch/x86/kernel/aperture_64.c b/trunk/arch/x86/kernel/aperture_64.c index 5955a7800a96..dcd7c83e1659 100644 --- a/trunk/arch/x86/kernel/aperture_64.c +++ b/trunk/arch/x86/kernel/aperture_64.c @@ -39,6 +39,18 @@ int fallback_aper_force __initdata; int fix_aperture __initdata = 1; +struct bus_dev_range { + int bus; + int dev_base; + int dev_limit; +}; + +static struct bus_dev_range bus_dev_ranges[] __initdata = { + { 0x00, 0x18, 0x20}, + { 0xff, 0x00, 0x20}, + { 0xfe, 0x00, 0x20} +}; + static struct resource gart_resource = { .name = "GART", .flags = IORESOURCE_MEM, @@ -282,13 +294,13 @@ void __init early_gart_iommu_check(void) search_agp_bridge(&agp_aper_order, &valid_agp); fix = 0; - for (i = 0; amd_nb_bus_dev_ranges[i].dev_limit; i++) { + for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { int bus; int dev_base, dev_limit; - bus = amd_nb_bus_dev_ranges[i].bus; - dev_base = amd_nb_bus_dev_ranges[i].dev_base; - dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; + bus = bus_dev_ranges[i].bus; + dev_base = bus_dev_ranges[i].dev_base; + dev_limit = bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) @@ -337,13 +349,13 @@ void __init early_gart_iommu_check(void) return; /* disable them all at first */ - for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { + for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { int bus; int dev_base, dev_limit; - bus = amd_nb_bus_dev_ranges[i].bus; - dev_base = amd_nb_bus_dev_ranges[i].dev_base; - dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; + bus = bus_dev_ranges[i].bus; + dev_base = bus_dev_ranges[i].dev_base; + dev_limit = bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) @@ -378,14 +390,14 @@ int __init gart_iommu_hole_init(void) fix = 0; node = 0; - for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { + for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { int bus; int dev_base, dev_limit; u32 ctl; - bus = amd_nb_bus_dev_ranges[i].bus; - dev_base = amd_nb_bus_dev_ranges[i].dev_base; - dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; + bus = bus_dev_ranges[i].bus; + dev_base = bus_dev_ranges[i].dev_base; + dev_limit = bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) @@ -493,7 +505,7 @@ int __init gart_iommu_hole_init(void) } /* Fix up the north bridges */ - for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { + for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { int bus, dev_base, dev_limit; /* @@ -502,9 +514,9 @@ int __init gart_iommu_hole_init(void) */ u32 ctl = DISTLBWALKPRB | aper_order << 1; - bus = amd_nb_bus_dev_ranges[i].bus; - dev_base = amd_nb_bus_dev_ranges[i].dev_base; - dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; + bus = bus_dev_ranges[i].bus; + dev_base = bus_dev_ranges[i].dev_base; + dev_limit = bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) continue; diff --git a/trunk/arch/x86/kernel/apic/apic.c b/trunk/arch/x86/kernel/apic/apic.c index 06c196d7e59c..a51345ba449e 100644 --- a/trunk/arch/x86/kernel/apic/apic.c +++ b/trunk/arch/x86/kernel/apic/apic.c @@ -684,7 +684,7 @@ static int __init calibrate_APIC_clock(void) lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, lapic_clockevent.shift); lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent); + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); lapic_clockevent.min_delta_ns = clockevent_delta2ns(0xF, &lapic_clockevent); diff --git a/trunk/arch/x86/kernel/apic/hw_nmi.c b/trunk/arch/x86/kernel/apic/hw_nmi.c index 79fd43ca6f96..72ec29e1ae06 100644 --- a/trunk/arch/x86/kernel/apic/hw_nmi.c +++ b/trunk/arch/x86/kernel/apic/hw_nmi.c @@ -68,6 +68,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, switch (cmd) { case DIE_NMI: + case DIE_NMI_IPI: break; default: @@ -95,7 +96,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, static __read_mostly struct notifier_block backtrace_notifier = { .notifier_call = arch_trigger_all_cpu_backtrace_handler, .next = NULL, - .priority = NMI_LOCAL_LOW_PRIOR, + .priority = 1 }; static int __init register_trigger_all_cpu_backtrace(void) diff --git a/trunk/arch/x86/kernel/apic/x2apic_uv_x.c b/trunk/arch/x86/kernel/apic/x2apic_uv_x.c index bd16b58b8850..ecca5f41ad2c 100644 --- a/trunk/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/trunk/arch/x86/kernel/apic/x2apic_uv_x.c @@ -378,7 +378,7 @@ struct apic __refdata apic_x2apic_uv_x = { static __cpuinit void set_x2apic_extra_bits(int pnode) { - __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift); + __this_cpu_write(x2apic_extra_bits, (pnode << 6)); } /* @@ -641,7 +641,7 @@ void __cpuinit uv_cpu_init(void) */ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) { - if (reason != DIE_NMIUNKNOWN) + if (reason != DIE_NMI_IPI) return NOTIFY_OK; if (in_crash_kexec) diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c b/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c index a77971979564..e7dbde7bfedb 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -25,7 +25,6 @@ #include #include #include -#include /* Update fake mce registers on current CPU. */ static void inject_mce(struct mce *m) @@ -84,7 +83,7 @@ static int mce_raise_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int cpu = smp_processor_id(); struct mce *m = &__get_cpu_var(injectm); - if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) + if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) return NOTIFY_DONE; cpumask_clear_cpu(cpu, mce_inject_cpumask); if (m->inject_flags & MCJ_EXCEPTION) @@ -96,7 +95,7 @@ static int mce_raise_notify(struct notifier_block *self, static struct notifier_block mce_raise_nb = { .notifier_call = mce_raise_notify, - .priority = NMI_LOCAL_NORMAL_PRIOR, + .priority = 1000, }; /* Inject mce on current CPU */ diff --git a/trunk/arch/x86/kernel/cpu/perf_event.c b/trunk/arch/x86/kernel/cpu/perf_event.c index 9d977a2ea693..04921017abe0 100644 --- a/trunk/arch/x86/kernel/cpu/perf_event.c +++ b/trunk/arch/x86/kernel/cpu/perf_event.c @@ -1267,6 +1267,7 @@ perf_event_nmi_handler(struct notifier_block *self, switch (cmd) { case DIE_NMI: + case DIE_NMI_IPI: break; case DIE_NMIUNKNOWN: this_nmi = percpu_read(irq_stat.__nmi_count); @@ -1316,7 +1317,7 @@ perf_event_nmi_handler(struct notifier_block *self, static __read_mostly struct notifier_block perf_event_nmi_notifier = { .notifier_call = perf_event_nmi_handler, .next = NULL, - .priority = NMI_LOCAL_LOW_PRIOR, + .priority = 1 }; static struct event_constraint unconstrained; diff --git a/trunk/arch/x86/kernel/cpu/perf_event_p4.c b/trunk/arch/x86/kernel/cpu/perf_event_p4.c index e56b9bfbabd1..81400b93e694 100644 --- a/trunk/arch/x86/kernel/cpu/perf_event_p4.c +++ b/trunk/arch/x86/kernel/cpu/perf_event_p4.c @@ -753,21 +753,19 @@ static int p4_hw_config(struct perf_event *event) static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) { - u64 v; + int overflow = 0; + u32 low, high; - /* an official way for overflow indication */ - rdmsrl(hwc->config_base + hwc->idx, v); - if (v & P4_CCCR_OVF) { - wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF); - return 1; - } + rdmsr(hwc->config_base + hwc->idx, low, high); - /* it might be unflagged overflow */ - rdmsrl(hwc->event_base + hwc->idx, v); - if (!(v & ARCH_P4_CNTRVAL_MASK)) - return 1; + /* we need to check high bit for unflagged overflows */ + if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { + overflow = 1; + (void)checking_wrmsrl(hwc->config_base + hwc->idx, + ((u64)low) & ~P4_CCCR_OVF); + } - return 0; + return overflow; } static void p4_pmu_disable_pebs(void) @@ -1154,9 +1152,9 @@ static __initconst const struct x86_pmu p4_pmu = { */ .num_counters = ARCH_P4_MAX_CCCR, .apic = 1, - .cntval_bits = ARCH_P4_CNTRVAL_BITS, - .cntval_mask = ARCH_P4_CNTRVAL_MASK, - .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1, + .cntval_bits = 40, + .cntval_mask = (1ULL << 40) - 1, + .max_period = (1ULL << 39) - 1, .hw_config = p4_hw_config, .schedule_events = p4_pmu_schedule_events, /* diff --git a/trunk/arch/x86/kernel/dumpstack.c b/trunk/arch/x86/kernel/dumpstack.c index d6fb146c0d8b..8474c998cbd4 100644 --- a/trunk/arch/x86/kernel/dumpstack.c +++ b/trunk/arch/x86/kernel/dumpstack.c @@ -197,8 +197,14 @@ void show_stack(struct task_struct *task, unsigned long *sp) */ void dump_stack(void) { + unsigned long bp = 0; unsigned long stack; +#ifdef CONFIG_FRAME_POINTER + if (!bp) + get_bp(bp); +#endif + printk("Pid: %d, comm: %.20s %s %s %.*s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, diff --git a/trunk/arch/x86/kernel/entry_64.S b/trunk/arch/x86/kernel/entry_64.S index d3b895f375d3..e3ba417e8697 100644 --- a/trunk/arch/x86/kernel/entry_64.S +++ b/trunk/arch/x86/kernel/entry_64.S @@ -299,21 +299,17 @@ ENDPROC(native_usergs_sysret64) ENTRY(save_args) XCPT_FRAME cld - /* - * start from rbp in pt_regs and jump over - * return address. - */ - movq_cfi rdi, RDI+8-RBP - movq_cfi rsi, RSI+8-RBP - movq_cfi rdx, RDX+8-RBP - movq_cfi rcx, RCX+8-RBP - movq_cfi rax, RAX+8-RBP - movq_cfi r8, R8+8-RBP - movq_cfi r9, R9+8-RBP - movq_cfi r10, R10+8-RBP - movq_cfi r11, R11+8-RBP - - leaq -RBP+8(%rsp),%rdi /* arg1 for handler */ + movq_cfi rdi, RDI+16-ARGOFFSET + movq_cfi rsi, RSI+16-ARGOFFSET + movq_cfi rdx, RDX+16-ARGOFFSET + movq_cfi rcx, RCX+16-ARGOFFSET + movq_cfi rax, RAX+16-ARGOFFSET + movq_cfi r8, R8+16-ARGOFFSET + movq_cfi r9, R9+16-ARGOFFSET + movq_cfi r10, R10+16-ARGOFFSET + movq_cfi r11, R11+16-ARGOFFSET + + leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ movq_cfi rbp, 8 /* push %rbp */ leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ testl $3, CS(%rdi) @@ -786,9 +782,8 @@ END(interrupt) /* 0(%rsp): ~(interrupt number) */ .macro interrupt func - /* reserve pt_regs for scratch regs and rbp */ - subq $ORIG_RAX-RBP, %rsp - CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP + subq $ORIG_RAX-ARGOFFSET+8, %rsp + CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8 call save_args PARTIAL_FRAME 0 call \func @@ -813,14 +808,9 @@ ret_from_intr: TRACE_IRQS_OFF decl PER_CPU_VAR(irq_count) leaveq - CFI_RESTORE rbp CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 - - /* we did not save rbx, restore only from ARGOFFSET */ - addq $8, %rsp - CFI_ADJUST_CFA_OFFSET -8 exit_intr: GET_THREAD_INFO(%rcx) testl $3,CS-ARGOFFSET(%rsp) diff --git a/trunk/arch/x86/kernel/kgdb.c b/trunk/arch/x86/kernel/kgdb.c index a4130005028a..cd21b654dec6 100644 --- a/trunk/arch/x86/kernel/kgdb.c +++ b/trunk/arch/x86/kernel/kgdb.c @@ -48,7 +48,6 @@ #include #include #include -#include struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -526,6 +525,10 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) } return NOTIFY_DONE; + case DIE_NMI_IPI: + /* Just ignore, we will handle the roundup on DIE_NMI. */ + return NOTIFY_DONE; + case DIE_NMIUNKNOWN: if (was_in_debug_nmi[raw_smp_processor_id()]) { was_in_debug_nmi[raw_smp_processor_id()] = 0; @@ -603,7 +606,7 @@ static struct notifier_block kgdb_notifier = { /* * Lowest-prio notifier priority, we want to be notified last: */ - .priority = NMI_LOCAL_LOW_PRIOR, + .priority = -INT_MAX, }; /** diff --git a/trunk/arch/x86/kernel/reboot.c b/trunk/arch/x86/kernel/reboot.c index fc7aae1e2bc7..c495aa8d4815 100644 --- a/trunk/arch/x86/kernel/reboot.c +++ b/trunk/arch/x86/kernel/reboot.c @@ -18,7 +18,6 @@ #include #include #include -#include #ifdef CONFIG_X86_32 # include @@ -748,7 +747,7 @@ static int crash_nmi_callback(struct notifier_block *self, { int cpu; - if (val != DIE_NMI) + if (val != DIE_NMI_IPI) return NOTIFY_OK; cpu = raw_smp_processor_id(); @@ -779,8 +778,6 @@ static void smp_send_nmi_allbutself(void) static struct notifier_block crash_nmi_nb = { .notifier_call = crash_nmi_callback, - /* we want to be the first one called */ - .priority = NMI_LOCAL_HIGH_PRIOR+1, }; /* Halt all other CPUs, calling the specified function on each of them diff --git a/trunk/arch/x86/kernel/smpboot.c b/trunk/arch/x86/kernel/smpboot.c index 763df77343dd..c7149c96d079 100644 --- a/trunk/arch/x86/kernel/smpboot.c +++ b/trunk/arch/x86/kernel/smpboot.c @@ -97,12 +97,12 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); */ static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex); -void cpu_hotplug_driver_lock(void) +void cpu_hotplug_driver_lock() { mutex_lock(&x86_cpu_hotplug_driver_mutex); } -void cpu_hotplug_driver_unlock(void) +void cpu_hotplug_driver_unlock() { mutex_unlock(&x86_cpu_hotplug_driver_mutex); } diff --git a/trunk/arch/x86/kernel/traps.c b/trunk/arch/x86/kernel/traps.c index b9b67166f9de..c76aaca5694d 100644 --- a/trunk/arch/x86/kernel/traps.c +++ b/trunk/arch/x86/kernel/traps.c @@ -84,11 +84,6 @@ EXPORT_SYMBOL_GPL(used_vectors); static int ignore_nmis; int unknown_nmi_panic; -/* - * Prevent NMI reason port (0x61) being accessed simultaneously, can - * only be used in NMI handler. - */ -static DEFINE_RAW_SPINLOCK(nmi_reason_lock); static inline void conditional_sti(struct pt_regs *regs) { @@ -315,15 +310,15 @@ static int __init setup_unknown_nmi_panic(char *str) __setup("unknown_nmi_panic", setup_unknown_nmi_panic); static notrace __kprobes void -pci_serr_error(unsigned char reason, struct pt_regs *regs) +mem_parity_error(unsigned char reason, struct pt_regs *regs) { - pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", - reason, smp_processor_id()); + printk(KERN_EMERG + "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", + reason, smp_processor_id()); + + printk(KERN_EMERG + "You have some hardware problem, likely on the PCI bus.\n"); - /* - * On some machines, PCI SERR line is used to report memory - * errors. EDAC makes use of it. - */ #if defined(CONFIG_EDAC) if (edac_handler_set()) { edac_atomic_assert_error(); @@ -334,11 +329,11 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); - pr_emerg("Dazed and confused, but trying to continue\n"); + printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); - /* Clear and disable the PCI SERR error line. */ - reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; - outb(reason, NMI_REASON_PORT); + /* Clear and disable the memory parity error line. */ + reason = (reason & 0xf) | 4; + outb(reason, 0x61); } static notrace __kprobes void @@ -346,17 +341,15 @@ io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; - pr_emerg( - "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", - reason, smp_processor_id()); + printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); show_registers(regs); if (panic_on_io_nmi) panic("NMI IOCK error: Not continuing"); /* Re-enable the IOCK line, wait for a few seconds */ - reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; - outb(reason, NMI_REASON_PORT); + reason = (reason & 0xf) | 8; + outb(reason, 0x61); i = 20000; while (--i) { @@ -364,8 +357,8 @@ io_check_error(unsigned char reason, struct pt_regs *regs) udelay(100); } - reason &= ~NMI_REASON_CLEAR_IOCHK; - outb(reason, NMI_REASON_PORT); + reason &= ~8; + outb(reason, 0x61); } static notrace __kprobes void @@ -384,50 +377,57 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) return; } #endif - pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); + printk(KERN_EMERG + "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", + reason, smp_processor_id()); - pr_emerg("Do you have a strange power saving mode enabled?\n"); + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); if (unknown_nmi_panic || panic_on_unrecovered_nmi) panic("NMI: Not continuing"); - pr_emerg("Dazed and confused, but trying to continue\n"); + printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); } static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; + int cpu; - /* - * CPU-specific NMI must be processed before non-CPU-specific - * NMI, otherwise we may lose it, because the CPU-specific - * NMI can not be detected/processed on other CPUs. - */ - if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; + cpu = smp_processor_id(); - /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ - raw_spin_lock(&nmi_reason_lock); - reason = get_nmi_reason(); + /* Only the BSP gets external NMIs from the system. */ + if (!cpu) + reason = get_nmi_reason(); - if (reason & NMI_REASON_MASK) { - if (reason & NMI_REASON_SERR) - pci_serr_error(reason, regs); - else if (reason & NMI_REASON_IOCHK) - io_check_error(reason, regs); -#ifdef CONFIG_X86_32 - /* - * Reassert NMI in case it became active - * meanwhile as it's edge-triggered: - */ - reassert_nmi(); + if (!(reason & 0xc0)) { + if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) + == NOTIFY_STOP) + return; + +#ifdef CONFIG_X86_LOCAL_APIC + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) + == NOTIFY_STOP) + return; #endif - raw_spin_unlock(&nmi_reason_lock); + unknown_nmi_error(reason, regs); + return; } - raw_spin_unlock(&nmi_reason_lock); + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) + return; - unknown_nmi_error(reason, regs); + /* AK: following checks seem to be broken on modern chipsets. FIXME */ + if (reason & 0x80) + mem_parity_error(reason, regs); + if (reason & 0x40) + io_check_error(reason, regs); +#ifdef CONFIG_X86_32 + /* + * Reassert NMI in case it became active meanwhile + * as it's edge-triggered: + */ + reassert_nmi(); +#endif } dotraplinkage notrace __kprobes void diff --git a/trunk/arch/x86/kernel/tsc.c b/trunk/arch/x86/kernel/tsc.c index 823f79a17ad1..03d2ea82f35a 100644 --- a/trunk/arch/x86/kernel/tsc.c +++ b/trunk/arch/x86/kernel/tsc.c @@ -965,7 +965,7 @@ static void tsc_refine_calibration_work(struct work_struct *work) static int __init init_tsc_clocksource(void) { - if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz) + if (!cpu_has_tsc || tsc_disabled > 0) return 0; if (tsc_clocksource_reliable) diff --git a/trunk/arch/x86/mm/amdtopology_64.c b/trunk/arch/x86/mm/amdtopology_64.c index f21962c435ed..08a0069b87a5 100644 --- a/trunk/arch/x86/mm/amdtopology_64.c +++ b/trunk/arch/x86/mm/amdtopology_64.c @@ -27,7 +27,6 @@ #include static struct bootnode __initdata nodes[8]; -static unsigned char __initdata nodeids[8]; static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; static __init int find_northbridge(void) @@ -69,6 +68,19 @@ static __init void early_get_boot_cpu_id(void) #endif } +int __init amd_get_nodes(struct bootnode *physnodes) +{ + int i; + int ret = 0; + + for_each_node_mask(i, nodes_parsed) { + physnodes[ret].start = nodes[i].start; + physnodes[ret].end = nodes[i].end; + ret++; + } + return ret; +} + int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) { unsigned long start = PFN_PHYS(start_pfn); @@ -101,7 +113,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) base = read_pci_config(0, nb, 1, 0x40 + i*8); limit = read_pci_config(0, nb, 1, 0x44 + i*8); - nodeids[i] = nodeid = limit & 7; + nodeid = limit & 7; if ((base & 3) == 0) { if (i < numnodes) pr_info("Skipping disabled node %d\n", i); @@ -181,76 +193,6 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) return 0; } -#ifdef CONFIG_NUMA_EMU -static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = { - [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE -}; - -void __init amd_get_nodes(struct bootnode *physnodes) -{ - int i; - - for_each_node_mask(i, nodes_parsed) { - physnodes[i].start = nodes[i].start; - physnodes[i].end = nodes[i].end; - } -} - -static int __init find_node_by_addr(unsigned long addr) -{ - int ret = NUMA_NO_NODE; - int i; - - for (i = 0; i < 8; i++) - if (addr >= nodes[i].start && addr < nodes[i].end) { - ret = i; - break; - } - return ret; -} - -/* - * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be - * setup to represent the physical topology but reflect the emulated - * environment. For each emulated node, the real node which it appears on is - * found and a fake pxm to nid mapping is created which mirrors the actual - * locality. node_distance() then represents the correct distances between - * emulated nodes by using the fake acpi mappings to pxms. - */ -void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes) -{ - unsigned int bits; - unsigned int cores; - unsigned int apicid_base = 0; - int i; - - bits = boot_cpu_data.x86_coreid_bits; - cores = 1 << bits; - early_get_boot_cpu_id(); - if (boot_cpu_physical_apicid > 0) - apicid_base = boot_cpu_physical_apicid; - - for (i = 0; i < nr_nodes; i++) { - int index; - int nid; - int j; - - nid = find_node_by_addr(nodes[i].start); - if (nid == NUMA_NO_NODE) - continue; - - index = nodeids[nid] << bits; - if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE) - for (j = apicid_base; j < cores + apicid_base; j++) - fake_apicid_to_node[index + j] = i; -#ifdef CONFIG_ACPI_NUMA - __acpi_map_pxm_to_node(nid, i); -#endif - } - memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); -} -#endif /* CONFIG_NUMA_EMU */ - int __init amd_scan_nodes(void) { unsigned int bits; diff --git a/trunk/arch/x86/mm/numa_64.c b/trunk/arch/x86/mm/numa_64.c index 1e72102e80c9..7762a517d69d 100644 --- a/trunk/arch/x86/mm/numa_64.c +++ b/trunk/arch/x86/mm/numa_64.c @@ -260,30 +260,30 @@ void __init numa_init_array(void) #ifdef CONFIG_NUMA_EMU /* Numa emulation */ static struct bootnode nodes[MAX_NUMNODES] __initdata; -static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata; +static struct bootnode physnodes[MAX_NUMNODES] __initdata; static char *cmdline __initdata; static int __init setup_physnodes(unsigned long start, unsigned long end, int acpi, int amd) { + int nr_nodes = 0; int ret = 0; int i; - memset(physnodes, 0, sizeof(physnodes)); #ifdef CONFIG_ACPI_NUMA if (acpi) - acpi_get_nodes(physnodes, start, end); + nr_nodes = acpi_get_nodes(physnodes); #endif #ifdef CONFIG_AMD_NUMA if (amd) - amd_get_nodes(physnodes); + nr_nodes = amd_get_nodes(physnodes); #endif /* * Basic sanity checking on the physical node map: there may be errors * if the SRAT or AMD code incorrectly reported the topology or the mem= * kernel parameter is used. */ - for (i = 0; i < MAX_NUMNODES; i++) { + for (i = 0; i < nr_nodes; i++) { if (physnodes[i].start == physnodes[i].end) continue; if (physnodes[i].start > end) { @@ -298,6 +298,17 @@ static int __init setup_physnodes(unsigned long start, unsigned long end, physnodes[i].start = start; if (physnodes[i].end > end) physnodes[i].end = end; + } + + /* + * Remove all nodes that have no memory or were truncated because of the + * limited address range. + */ + for (i = 0; i < nr_nodes; i++) { + if (physnodes[i].start == physnodes[i].end) + continue; + physnodes[ret].start = physnodes[i].start; + physnodes[ret].end = physnodes[i].end; ret++; } @@ -313,24 +324,6 @@ static int __init setup_physnodes(unsigned long start, unsigned long end, return ret; } -static void __init fake_physnodes(int acpi, int amd, int nr_nodes) -{ - int i; - - BUG_ON(acpi && amd); -#ifdef CONFIG_ACPI_NUMA - if (acpi) - acpi_fake_nodes(nodes, nr_nodes); -#endif -#ifdef CONFIG_AMD_NUMA - if (amd) - amd_fake_nodes(nodes, nr_nodes); -#endif - if (!acpi && !amd) - for (i = 0; i < nr_cpu_ids; i++) - numa_set_node(i, 0); -} - /* * Setups up nid to range from addr to addr + size. If the end * boundary is greater than max_addr, then max_addr is used instead. @@ -359,7 +352,8 @@ static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr) * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr * to max_addr. The return value is the number of nodes allocated. */ -static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes) +static int __init split_nodes_interleave(u64 addr, u64 max_addr, + int nr_phys_nodes, int nr_nodes) { nodemask_t physnode_mask = NODE_MASK_NONE; u64 size; @@ -390,7 +384,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes) return -1; } - for (i = 0; i < MAX_NUMNODES; i++) + for (i = 0; i < nr_phys_nodes; i++) if (physnodes[i].start != physnodes[i].end) node_set(i, physnode_mask); @@ -559,9 +553,11 @@ static int __init numa_emulation(unsigned long start_pfn, { u64 addr = start_pfn << PAGE_SHIFT; u64 max_addr = last_pfn << PAGE_SHIFT; + int num_phys_nodes; int num_nodes; int i; + num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd); /* * If the numa=fake command-line contains a 'M' or 'G', it represents * the fixed node size. Otherwise, if it is just a single number N, @@ -576,7 +572,7 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long n; n = simple_strtoul(cmdline, NULL, 0); - num_nodes = split_nodes_interleave(addr, max_addr, n); + num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n); } if (num_nodes < 0) @@ -599,8 +595,7 @@ static int __init numa_emulation(unsigned long start_pfn, nodes[i].end >> PAGE_SHIFT); setup_node_bootmem(i, nodes[i].start, nodes[i].end); } - setup_physnodes(addr, max_addr, acpi, amd); - fake_physnodes(acpi, amd, num_nodes); + acpi_fake_nodes(nodes, num_nodes); numa_init_array(); return 0; } @@ -615,12 +610,8 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, nodes_clear(node_online_map); #ifdef CONFIG_NUMA_EMU - setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT, - acpi, amd); if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd)) return; - setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT, - acpi, amd); nodes_clear(node_possible_map); nodes_clear(node_online_map); #endif @@ -776,7 +767,6 @@ void __cpuinit numa_clear_node(int cpu) #ifndef CONFIG_DEBUG_PER_CPU_MAPS -#ifndef CONFIG_NUMA_EMU void __cpuinit numa_add_cpu(int cpu) { cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); @@ -786,115 +776,34 @@ void __cpuinit numa_remove_cpu(int cpu) { cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); } -#else -void __cpuinit numa_add_cpu(int cpu) -{ - unsigned long addr; - u16 apicid; - int physnid; - int nid = NUMA_NO_NODE; - - apicid = early_per_cpu(x86_cpu_to_apicid, cpu); - if (apicid != BAD_APICID) - nid = apicid_to_node[apicid]; - if (nid == NUMA_NO_NODE) - nid = early_cpu_to_node(cpu); - BUG_ON(nid == NUMA_NO_NODE || !node_online(nid)); - - /* - * Use the starting address of the emulated node to find which physical - * node it is allocated on. - */ - addr = node_start_pfn(nid) << PAGE_SHIFT; - for (physnid = 0; physnid < MAX_NUMNODES; physnid++) - if (addr >= physnodes[physnid].start && - addr < physnodes[physnid].end) - break; - - /* - * Map the cpu to each emulated node that is allocated on the physical - * node of the cpu's apic id. - */ - for_each_online_node(nid) { - addr = node_start_pfn(nid) << PAGE_SHIFT; - if (addr >= physnodes[physnid].start && - addr < physnodes[physnid].end) - cpumask_set_cpu(cpu, node_to_cpumask_map[nid]); - } -} - -void __cpuinit numa_remove_cpu(int cpu) -{ - int i; - - for_each_online_node(i) - cpumask_clear_cpu(cpu, node_to_cpumask_map[i]); -} -#endif /* !CONFIG_NUMA_EMU */ #else /* CONFIG_DEBUG_PER_CPU_MAPS */ -static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable) -{ - int node = early_cpu_to_node(cpu); - struct cpumask *mask; - char buf[64]; - - mask = node_to_cpumask_map[node]; - if (!mask) { - pr_err("node_to_cpumask_map[%i] NULL\n", node); - dump_stack(); - return NULL; - } - - cpulist_scnprintf(buf, sizeof(buf), mask); - printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", - enable ? "numa_add_cpu" : "numa_remove_cpu", - cpu, node, buf); - return mask; -} /* * --------- debug versions of the numa functions --------- */ -#ifndef CONFIG_NUMA_EMU static void __cpuinit numa_set_cpumask(int cpu, int enable) { + int node = early_cpu_to_node(cpu); struct cpumask *mask; + char buf[64]; - mask = debug_cpumask_set_cpu(cpu, enable); - if (!mask) + mask = node_to_cpumask_map[node]; + if (mask == NULL) { + printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node); + dump_stack(); return; + } if (enable) cpumask_set_cpu(cpu, mask); else cpumask_clear_cpu(cpu, mask); -} -#else -static void __cpuinit numa_set_cpumask(int cpu, int enable) -{ - int node = early_cpu_to_node(cpu); - struct cpumask *mask; - int i; - for_each_online_node(i) { - unsigned long addr; - - addr = node_start_pfn(i) << PAGE_SHIFT; - if (addr < physnodes[node].start || - addr >= physnodes[node].end) - continue; - mask = debug_cpumask_set_cpu(cpu, enable); - if (!mask) - return; - - if (enable) - cpumask_set_cpu(cpu, mask); - else - cpumask_clear_cpu(cpu, mask); - } + cpulist_scnprintf(buf, sizeof(buf), mask); + printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", + enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); } -#endif /* CONFIG_NUMA_EMU */ void __cpuinit numa_add_cpu(int cpu) { diff --git a/trunk/arch/x86/mm/srat_64.c b/trunk/arch/x86/mm/srat_64.c index 603d285d1daa..171a0aacb99a 100644 --- a/trunk/arch/x86/mm/srat_64.c +++ b/trunk/arch/x86/mm/srat_64.c @@ -349,19 +349,18 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) void __init acpi_numa_arch_fixup(void) {} -#ifdef CONFIG_NUMA_EMU -void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start, - unsigned long end) +int __init acpi_get_nodes(struct bootnode *physnodes) { int i; + int ret = 0; for_each_node_mask(i, nodes_parsed) { - cutoff_node(i, start, end); - physnodes[i].start = nodes[i].start; - physnodes[i].end = nodes[i].end; + physnodes[ret].start = nodes[i].start; + physnodes[ret].end = nodes[i].end; + ret++; } + return ret; } -#endif /* CONFIG_NUMA_EMU */ /* Use the information discovered above to actually set up the nodes. */ int __init acpi_scan_nodes(unsigned long start, unsigned long end) @@ -506,6 +505,8 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) { int i, j; + printk(KERN_INFO "Faking PXM affinity for fake nodes on real " + "topology.\n"); for (i = 0; i < num_nodes; i++) { int nid, pxm; @@ -525,17 +526,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) fake_apicid_to_node[j] == NUMA_NO_NODE) fake_apicid_to_node[j] = i; } - - /* - * If there are apicid-to-node mappings for physical nodes that do not - * have a corresponding emulated node, it should default to a guaranteed - * value. - */ - for (i = 0; i < MAX_LOCAL_APIC; i++) - if (apicid_to_node[i] != NUMA_NO_NODE && - fake_apicid_to_node[i] == NUMA_NO_NODE) - fake_apicid_to_node[i] = 0; - for (i = 0; i < num_nodes; i++) __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); diff --git a/trunk/arch/x86/oprofile/nmi_int.c b/trunk/arch/x86/oprofile/nmi_int.c index e2b7b0c06cdf..f24a8533bcdf 100644 --- a/trunk/arch/x86/oprofile/nmi_int.c +++ b/trunk/arch/x86/oprofile/nmi_int.c @@ -65,6 +65,7 @@ static int profile_exceptions_notify(struct notifier_block *self, switch (val) { case DIE_NMI: + case DIE_NMI_IPI: if (ctr_running) model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); else if (!nmi_enabled) @@ -360,7 +361,7 @@ static void nmi_cpu_setup(void *dummy) static struct notifier_block profile_exceptions_nb = { .notifier_call = profile_exceptions_notify, .next = NULL, - .priority = NMI_LOCAL_LOW_PRIOR, + .priority = 2 }; static void nmi_cpu_restore_registers(struct op_msrs *msrs) diff --git a/trunk/arch/x86/oprofile/nmi_timer_int.c b/trunk/arch/x86/oprofile/nmi_timer_int.c index 720bf5a53c51..0636dd93cef8 100644 --- a/trunk/arch/x86/oprofile/nmi_timer_int.c +++ b/trunk/arch/x86/oprofile/nmi_timer_int.c @@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self, static struct notifier_block profile_timer_exceptions_nb = { .notifier_call = profile_timer_exceptions_notify, .next = NULL, - .priority = NMI_LOW_PRIOR, + .priority = 0 }; static int timer_start(void) diff --git a/trunk/arch/x86/pci/amd_bus.c b/trunk/arch/x86/pci/amd_bus.c index e27dffbbb1a7..fc1e8fe07e5c 100644 --- a/trunk/arch/x86/pci/amd_bus.c +++ b/trunk/arch/x86/pci/amd_bus.c @@ -4,7 +4,6 @@ #include #include -#include #include #include @@ -379,34 +378,6 @@ static struct notifier_block __cpuinitdata amd_cpu_notifier = { .notifier_call = amd_cpu_notify, }; -static void __init pci_enable_pci_io_ecs(void) -{ -#ifdef CONFIG_AMD_NB - unsigned int i, n; - - for (n = i = 0; !n && amd_nb_bus_dev_ranges[i].dev_limit; ++i) { - u8 bus = amd_nb_bus_dev_ranges[i].bus; - u8 slot = amd_nb_bus_dev_ranges[i].dev_base; - u8 limit = amd_nb_bus_dev_ranges[i].dev_limit; - - for (; slot < limit; ++slot) { - u32 val = read_pci_config(bus, slot, 3, 0); - - if (!early_is_amd_nb(val)) - continue; - - val = read_pci_config(bus, slot, 3, 0x8c); - if (!(val & (ENABLE_CF8_EXT_CFG >> 32))) { - val |= ENABLE_CF8_EXT_CFG >> 32; - write_pci_config(bus, slot, 3, 0x8c, val); - } - ++n; - } - } - pr_info("Extended Config Space enabled on %u nodes\n", n); -#endif -} - static int __init pci_io_ecs_init(void) { int cpu; @@ -415,10 +386,6 @@ static int __init pci_io_ecs_init(void) if (boot_cpu_data.x86 < 0x10) return 0; - /* Try the PCI method first. */ - if (early_pci_allowed()) - pci_enable_pci_io_ecs(); - register_cpu_notifier(&amd_cpu_notifier); for_each_online_cpu(cpu) amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE, diff --git a/trunk/drivers/char/ipmi/ipmi_watchdog.c b/trunk/drivers/char/ipmi/ipmi_watchdog.c index 320668f4c3aa..f4d334f2536e 100644 --- a/trunk/drivers/char/ipmi/ipmi_watchdog.c +++ b/trunk/drivers/char/ipmi/ipmi_watchdog.c @@ -1081,7 +1081,7 @@ ipmi_nmi(struct notifier_block *self, unsigned long val, void *data) { struct die_args *args = data; - if (val != DIE_NMIUNKNOWN) + if (val != DIE_NMI) return NOTIFY_OK; /* Hack, if it's a memory or I/O error, ignore it. */ diff --git a/trunk/drivers/mfd/sh_mobile_sdhi.c b/trunk/drivers/mfd/sh_mobile_sdhi.c index 0a7df44a93c0..f1714f93af9d 100644 --- a/trunk/drivers/mfd/sh_mobile_sdhi.c +++ b/trunk/drivers/mfd/sh_mobile_sdhi.c @@ -131,17 +131,11 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev) */ mmc_data->flags |= TMIO_MMC_BLKSZ_2BYTES; - /* - * All SDHI blocks support SDIO IRQ signalling. - */ - mmc_data->flags |= TMIO_MMC_SDIO_IRQ; - if (p && p->dma_slave_tx >= 0 && p->dma_slave_rx >= 0) { priv->param_tx.slave_id = p->dma_slave_tx; priv->param_rx.slave_id = p->dma_slave_rx; priv->dma_priv.chan_priv_tx = &priv->param_tx; priv->dma_priv.chan_priv_rx = &priv->param_rx; - priv->dma_priv.alignment_shift = 1; /* 2-byte alignment */ mmc_data->dma = &priv->dma_priv; } diff --git a/trunk/drivers/mmc/card/Kconfig b/trunk/drivers/mmc/card/Kconfig index 2a876c4099cd..57e4416b9ef0 100644 --- a/trunk/drivers/mmc/card/Kconfig +++ b/trunk/drivers/mmc/card/Kconfig @@ -16,7 +16,6 @@ config MMC_BLOCK config MMC_BLOCK_MINORS int "Number of minors per block device" - depends on MMC_BLOCK range 4 256 default 8 help diff --git a/trunk/drivers/mmc/core/Kconfig b/trunk/drivers/mmc/core/Kconfig index ef103871517f..bb22ffd76ef8 100644 --- a/trunk/drivers/mmc/core/Kconfig +++ b/trunk/drivers/mmc/core/Kconfig @@ -16,14 +16,3 @@ config MMC_UNSAFE_RESUME This option sets a default which can be overridden by the module parameter "removable=0" or "removable=1". - -config MMC_CLKGATE - bool "MMC host clock gating (EXPERIMENTAL)" - depends on EXPERIMENTAL - help - This will attempt to aggressively gate the clock to the MMC card. - This is done to save power due to gating off the logic and bus - noise when the MMC card is not in use. Your host driver has to - support handling this in order for it to be of any use. - - If unsure, say N. diff --git a/trunk/drivers/mmc/core/bus.c b/trunk/drivers/mmc/core/bus.c index 63667a8f140c..af8dc6a2a317 100644 --- a/trunk/drivers/mmc/core/bus.c +++ b/trunk/drivers/mmc/core/bus.c @@ -303,14 +303,14 @@ int mmc_add_card(struct mmc_card *card) type, card->rca); } -#ifdef CONFIG_DEBUG_FS - mmc_add_card_debugfs(card); -#endif - ret = device_add(&card->dev); if (ret) return ret; +#ifdef CONFIG_DEBUG_FS + mmc_add_card_debugfs(card); +#endif + mmc_card_set_present(card); return 0; diff --git a/trunk/drivers/mmc/core/core.c b/trunk/drivers/mmc/core/core.c index 6625c057be05..a3a780faf85a 100644 --- a/trunk/drivers/mmc/core/core.c +++ b/trunk/drivers/mmc/core/core.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -131,8 +130,6 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq) if (mrq->done) mrq->done(mrq); - - mmc_host_clk_gate(host); } } @@ -193,7 +190,6 @@ mmc_start_request(struct mmc_host *host, struct mmc_request *mrq) mrq->stop->mrq = mrq; } } - mmc_host_clk_ungate(host); host->ops->request(host, mrq); } @@ -299,9 +295,8 @@ void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card) unsigned int timeout_us, limit_us; timeout_us = data->timeout_ns / 1000; - if (mmc_host_clk_rate(card->host)) - timeout_us += data->timeout_clks * 1000 / - (mmc_host_clk_rate(card->host) / 1000); + timeout_us += data->timeout_clks * 1000 / + (card->host->ios.clock / 1000); if (data->flags & MMC_DATA_WRITE) /* @@ -619,8 +614,6 @@ static inline void mmc_set_ios(struct mmc_host *host) ios->power_mode, ios->chip_select, ios->vdd, ios->bus_width, ios->timing); - if (ios->clock > 0) - mmc_set_ungated(host); host->ops->set_ios(host, ios); } @@ -648,61 +641,6 @@ void mmc_set_clock(struct mmc_host *host, unsigned int hz) mmc_set_ios(host); } -#ifdef CONFIG_MMC_CLKGATE -/* - * This gates the clock by setting it to 0 Hz. - */ -void mmc_gate_clock(struct mmc_host *host) -{ - unsigned long flags; - - spin_lock_irqsave(&host->clk_lock, flags); - host->clk_old = host->ios.clock; - host->ios.clock = 0; - host->clk_gated = true; - spin_unlock_irqrestore(&host->clk_lock, flags); - mmc_set_ios(host); -} - -/* - * This restores the clock from gating by using the cached - * clock value. - */ -void mmc_ungate_clock(struct mmc_host *host) -{ - /* - * We should previously have gated the clock, so the clock shall - * be 0 here! The clock may however be 0 during initialization, - * when some request operations are performed before setting - * the frequency. When ungate is requested in that situation - * we just ignore the call. - */ - if (host->clk_old) { - BUG_ON(host->ios.clock); - /* This call will also set host->clk_gated to false */ - mmc_set_clock(host, host->clk_old); - } -} - -void mmc_set_ungated(struct mmc_host *host) -{ - unsigned long flags; - - /* - * We've been given a new frequency while the clock is gated, - * so make sure we regard this as ungating it. - */ - spin_lock_irqsave(&host->clk_lock, flags); - host->clk_gated = false; - spin_unlock_irqrestore(&host->clk_lock, flags); -} - -#else -void mmc_set_ungated(struct mmc_host *host) -{ -} -#endif - /* * Change the bus mode (open drain/push-pull) of a host. */ @@ -1486,57 +1424,35 @@ int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen) } EXPORT_SYMBOL(mmc_set_blocklen); -static int mmc_rescan_try_freq(struct mmc_host *host, unsigned freq) -{ - host->f_init = freq; - -#ifdef CONFIG_MMC_DEBUG - pr_info("%s: %s: trying to init card at %u Hz\n", - mmc_hostname(host), __func__, host->f_init); -#endif - mmc_power_up(host); - sdio_reset(host); - mmc_go_idle(host); - - mmc_send_if_cond(host, host->ocr_avail); - - /* Order's important: probe SDIO, then SD, then MMC */ - if (!mmc_attach_sdio(host)) - return 0; - if (!mmc_attach_sd(host)) - return 0; - if (!mmc_attach_mmc(host)) - return 0; - - mmc_power_off(host); - return -EIO; -} - void mmc_rescan(struct work_struct *work) { - static const unsigned freqs[] = { 400000, 300000, 200000, 100000 }; struct mmc_host *host = container_of(work, struct mmc_host, detect.work); + u32 ocr; + int err; + unsigned long flags; int i; + const unsigned freqs[] = { 400000, 300000, 200000, 100000 }; + + spin_lock_irqsave(&host->lock, flags); - if (host->rescan_disable) + if (host->rescan_disable) { + spin_unlock_irqrestore(&host->lock, flags); return; + } + + spin_unlock_irqrestore(&host->lock, flags); + mmc_bus_get(host); - /* - * if there is a _removable_ card registered, check whether it is - * still present - */ - if (host->bus_ops && host->bus_ops->detect && !host->bus_dead - && mmc_card_is_removable(host)) + /* if there is a card registered, check whether it is still present */ + if ((host->bus_ops != NULL) && host->bus_ops->detect && !host->bus_dead) host->bus_ops->detect(host); - /* - * Let mmc_bus_put() free the bus/bus_ops if we've found that - * the card is no longer present. - */ mmc_bus_put(host); + + mmc_bus_get(host); /* if there still is a card present, stop here */ @@ -1545,6 +1461,8 @@ void mmc_rescan(struct work_struct *work) goto out; } + /* detect a newly inserted card */ + /* * Only we can add a new handler, so it's safe to * release the lock here. @@ -1554,16 +1472,72 @@ void mmc_rescan(struct work_struct *work) if (host->ops->get_cd && host->ops->get_cd(host) == 0) goto out; - mmc_claim_host(host); for (i = 0; i < ARRAY_SIZE(freqs); i++) { - if (!mmc_rescan_try_freq(host, max(freqs[i], host->f_min))) - break; - if (freqs[i] < host->f_min) - break; - } - mmc_release_host(host); + mmc_claim_host(host); + + if (freqs[i] >= host->f_min) + host->f_init = freqs[i]; + else if (!i || freqs[i-1] > host->f_min) + host->f_init = host->f_min; + else { + mmc_release_host(host); + goto out; + } +#ifdef CONFIG_MMC_DEBUG + pr_info("%s: %s: trying to init card at %u Hz\n", + mmc_hostname(host), __func__, host->f_init); +#endif + mmc_power_up(host); + sdio_reset(host); + mmc_go_idle(host); + + mmc_send_if_cond(host, host->ocr_avail); - out: + /* + * First we search for SDIO... + */ + err = mmc_send_io_op_cond(host, 0, &ocr); + if (!err) { + if (mmc_attach_sdio(host, ocr)) { + mmc_claim_host(host); + /* + * Try SDMEM (but not MMC) even if SDIO + * is broken. + */ + if (mmc_send_app_op_cond(host, 0, &ocr)) + goto out_fail; + + if (mmc_attach_sd(host, ocr)) + mmc_power_off(host); + } + goto out; + } + + /* + * ...then normal SD... + */ + err = mmc_send_app_op_cond(host, 0, &ocr); + if (!err) { + if (mmc_attach_sd(host, ocr)) + mmc_power_off(host); + goto out; + } + + /* + * ...and finally MMC. + */ + err = mmc_send_op_cond(host, 0, &ocr); + if (!err) { + if (mmc_attach_mmc(host, ocr)) + mmc_power_off(host); + goto out; + } + +out_fail: + mmc_release_host(host); + mmc_power_off(host); + } +out: if (host->caps & MMC_CAP_NEEDS_POLL) mmc_schedule_delayed_work(&host->detect, HZ); } @@ -1747,18 +1721,6 @@ int mmc_resume_host(struct mmc_host *host) if (!(host->pm_flags & MMC_PM_KEEP_POWER)) { mmc_power_up(host); mmc_select_voltage(host, host->ocr); - /* - * Tell runtime PM core we just powered up the card, - * since it still believes the card is powered off. - * Note that currently runtime PM is only enabled - * for SDIO cards that are MMC_CAP_POWER_OFF_CARD - */ - if (mmc_card_sdio(host->card) && - (host->caps & MMC_CAP_POWER_OFF_CARD)) { - pm_runtime_disable(&host->card->dev); - pm_runtime_set_active(&host->card->dev); - pm_runtime_enable(&host->card->dev); - } } BUG_ON(!host->bus_ops->resume); err = host->bus_ops->resume(host); diff --git a/trunk/drivers/mmc/core/core.h b/trunk/drivers/mmc/core/core.h index ca1fdde29df6..77240cd11bcf 100644 --- a/trunk/drivers/mmc/core/core.h +++ b/trunk/drivers/mmc/core/core.h @@ -33,9 +33,6 @@ void mmc_init_erase(struct mmc_card *card); void mmc_set_chip_select(struct mmc_host *host, int mode); void mmc_set_clock(struct mmc_host *host, unsigned int hz); -void mmc_gate_clock(struct mmc_host *host); -void mmc_ungate_clock(struct mmc_host *host); -void mmc_set_ungated(struct mmc_host *host); void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode); void mmc_set_bus_width(struct mmc_host *host, unsigned int width); void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width, @@ -57,9 +54,9 @@ void mmc_rescan(struct work_struct *work); void mmc_start_host(struct mmc_host *host); void mmc_stop_host(struct mmc_host *host); -int mmc_attach_mmc(struct mmc_host *host); -int mmc_attach_sd(struct mmc_host *host); -int mmc_attach_sdio(struct mmc_host *host); +int mmc_attach_mmc(struct mmc_host *host, u32 ocr); +int mmc_attach_sd(struct mmc_host *host, u32 ocr); +int mmc_attach_sdio(struct mmc_host *host, u32 ocr); /* Module parameters */ extern int use_spi_crc; diff --git a/trunk/drivers/mmc/core/debugfs.c b/trunk/drivers/mmc/core/debugfs.c index 998797ed67a6..eed1405fd742 100644 --- a/trunk/drivers/mmc/core/debugfs.c +++ b/trunk/drivers/mmc/core/debugfs.c @@ -183,11 +183,6 @@ void mmc_add_host_debugfs(struct mmc_host *host) &mmc_clock_fops)) goto err_node; -#ifdef CONFIG_MMC_CLKGATE - if (!debugfs_create_u32("clk_delay", (S_IRUSR | S_IWUSR), - root, &host->clk_delay)) - goto err_node; -#endif return; err_node: diff --git a/trunk/drivers/mmc/core/host.c b/trunk/drivers/mmc/core/host.c index b3ac6c5bc5c6..10b8af27e03a 100644 --- a/trunk/drivers/mmc/core/host.c +++ b/trunk/drivers/mmc/core/host.c @@ -3,7 +3,6 @@ * * Copyright (C) 2003 Russell King, All Rights Reserved. * Copyright (C) 2007-2008 Pierre Ossman - * Copyright (C) 2010 Linus Walleij * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -21,7 +20,6 @@ #include #include -#include #include "core.h" #include "host.h" @@ -52,205 +50,6 @@ void mmc_unregister_host_class(void) static DEFINE_IDR(mmc_host_idr); static DEFINE_SPINLOCK(mmc_host_lock); -#ifdef CONFIG_MMC_CLKGATE - -/* - * Enabling clock gating will make the core call out to the host - * once up and once down when it performs a request or card operation - * intermingled in any fashion. The driver will see this through - * set_ios() operations with ios.clock field set to 0 to gate (disable) - * the block clock, and to the old frequency to enable it again. - */ -static void mmc_host_clk_gate_delayed(struct mmc_host *host) -{ - unsigned long tick_ns; - unsigned long freq = host->ios.clock; - unsigned long flags; - - if (!freq) { - pr_debug("%s: frequency set to 0 in disable function, " - "this means the clock is already disabled.\n", - mmc_hostname(host)); - return; - } - /* - * New requests may have appeared while we were scheduling, - * then there is no reason to delay the check before - * clk_disable(). - */ - spin_lock_irqsave(&host->clk_lock, flags); - - /* - * Delay n bus cycles (at least 8 from MMC spec) before attempting - * to disable the MCI block clock. The reference count may have - * gone up again after this delay due to rescheduling! - */ - if (!host->clk_requests) { - spin_unlock_irqrestore(&host->clk_lock, flags); - tick_ns = DIV_ROUND_UP(1000000000, freq); - ndelay(host->clk_delay * tick_ns); - } else { - /* New users appeared while waiting for this work */ - spin_unlock_irqrestore(&host->clk_lock, flags); - return; - } - mutex_lock(&host->clk_gate_mutex); - spin_lock_irqsave(&host->clk_lock, flags); - if (!host->clk_requests) { - spin_unlock_irqrestore(&host->clk_lock, flags); - /* This will set host->ios.clock to 0 */ - mmc_gate_clock(host); - spin_lock_irqsave(&host->clk_lock, flags); - pr_debug("%s: gated MCI clock\n", mmc_hostname(host)); - } - spin_unlock_irqrestore(&host->clk_lock, flags); - mutex_unlock(&host->clk_gate_mutex); -} - -/* - * Internal work. Work to disable the clock at some later point. - */ -static void mmc_host_clk_gate_work(struct work_struct *work) -{ - struct mmc_host *host = container_of(work, struct mmc_host, - clk_gate_work); - - mmc_host_clk_gate_delayed(host); -} - -/** - * mmc_host_clk_ungate - ungate hardware MCI clocks - * @host: host to ungate. - * - * Makes sure the host ios.clock is restored to a non-zero value - * past this call. Increase clock reference count and ungate clock - * if we're the first user. - */ -void mmc_host_clk_ungate(struct mmc_host *host) -{ - unsigned long flags; - - mutex_lock(&host->clk_gate_mutex); - spin_lock_irqsave(&host->clk_lock, flags); - if (host->clk_gated) { - spin_unlock_irqrestore(&host->clk_lock, flags); - mmc_ungate_clock(host); - spin_lock_irqsave(&host->clk_lock, flags); - pr_debug("%s: ungated MCI clock\n", mmc_hostname(host)); - } - host->clk_requests++; - spin_unlock_irqrestore(&host->clk_lock, flags); - mutex_unlock(&host->clk_gate_mutex); -} - -/** - * mmc_host_may_gate_card - check if this card may be gated - * @card: card to check. - */ -static bool mmc_host_may_gate_card(struct mmc_card *card) -{ - /* If there is no card we may gate it */ - if (!card) - return true; - /* - * Don't gate SDIO cards! These need to be clocked at all times - * since they may be independent systems generating interrupts - * and other events. The clock requests counter from the core will - * go down to zero since the core does not need it, but we will not - * gate the clock, because there is somebody out there that may still - * be using it. - */ - if (mmc_card_sdio(card)) - return false; - - return true; -} - -/** - * mmc_host_clk_gate - gate off hardware MCI clocks - * @host: host to gate. - * - * Calls the host driver with ios.clock set to zero as often as possible - * in order to gate off hardware MCI clocks. Decrease clock reference - * count and schedule disabling of clock. - */ -void mmc_host_clk_gate(struct mmc_host *host) -{ - unsigned long flags; - - spin_lock_irqsave(&host->clk_lock, flags); - host->clk_requests--; - if (mmc_host_may_gate_card(host->card) && - !host->clk_requests) - schedule_work(&host->clk_gate_work); - spin_unlock_irqrestore(&host->clk_lock, flags); -} - -/** - * mmc_host_clk_rate - get current clock frequency setting - * @host: host to get the clock frequency for. - * - * Returns current clock frequency regardless of gating. - */ -unsigned int mmc_host_clk_rate(struct mmc_host *host) -{ - unsigned long freq; - unsigned long flags; - - spin_lock_irqsave(&host->clk_lock, flags); - if (host->clk_gated) - freq = host->clk_old; - else - freq = host->ios.clock; - spin_unlock_irqrestore(&host->clk_lock, flags); - return freq; -} - -/** - * mmc_host_clk_init - set up clock gating code - * @host: host with potential clock to control - */ -static inline void mmc_host_clk_init(struct mmc_host *host) -{ - host->clk_requests = 0; - /* Hold MCI clock for 8 cycles by default */ - host->clk_delay = 8; - host->clk_gated = false; - INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work); - spin_lock_init(&host->clk_lock); - mutex_init(&host->clk_gate_mutex); -} - -/** - * mmc_host_clk_exit - shut down clock gating code - * @host: host with potential clock to control - */ -static inline void mmc_host_clk_exit(struct mmc_host *host) -{ - /* - * Wait for any outstanding gate and then make sure we're - * ungated before exiting. - */ - if (cancel_work_sync(&host->clk_gate_work)) - mmc_host_clk_gate_delayed(host); - if (host->clk_gated) - mmc_host_clk_ungate(host); - /* There should be only one user now */ - WARN_ON(host->clk_requests > 1); -} - -#else - -static inline void mmc_host_clk_init(struct mmc_host *host) -{ -} - -static inline void mmc_host_clk_exit(struct mmc_host *host) -{ -} - -#endif - /** * mmc_alloc_host - initialise the per-host structure. * @extra: sizeof private data structure @@ -283,8 +82,6 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) host->class_dev.class = &mmc_host_class; device_initialize(&host->class_dev); - mmc_host_clk_init(host); - spin_lock_init(&host->lock); init_waitqueue_head(&host->wq); INIT_DELAYED_WORK(&host->detect, mmc_rescan); @@ -366,8 +163,6 @@ void mmc_remove_host(struct mmc_host *host) device_del(&host->class_dev); led_trigger_unregister_simple(host->led); - - mmc_host_clk_exit(host); } EXPORT_SYMBOL(mmc_remove_host); @@ -388,3 +183,4 @@ void mmc_free_host(struct mmc_host *host) } EXPORT_SYMBOL(mmc_free_host); + diff --git a/trunk/drivers/mmc/core/host.h b/trunk/drivers/mmc/core/host.h index de199f911928..8c87e1109a34 100644 --- a/trunk/drivers/mmc/core/host.h +++ b/trunk/drivers/mmc/core/host.h @@ -10,31 +10,10 @@ */ #ifndef _MMC_CORE_HOST_H #define _MMC_CORE_HOST_H -#include int mmc_register_host_class(void); void mmc_unregister_host_class(void); -#ifdef CONFIG_MMC_CLKGATE -void mmc_host_clk_ungate(struct mmc_host *host); -void mmc_host_clk_gate(struct mmc_host *host); -unsigned int mmc_host_clk_rate(struct mmc_host *host); - -#else -static inline void mmc_host_clk_ungate(struct mmc_host *host) -{ -} - -static inline void mmc_host_clk_gate(struct mmc_host *host) -{ -} - -static inline unsigned int mmc_host_clk_rate(struct mmc_host *host) -{ - return host->ios.clock; -} -#endif - void mmc_host_deeper_disable(struct work_struct *work); #endif diff --git a/trunk/drivers/mmc/core/mmc.c b/trunk/drivers/mmc/core/mmc.c index 16006ef153fe..77f93c3b8808 100644 --- a/trunk/drivers/mmc/core/mmc.c +++ b/trunk/drivers/mmc/core/mmc.c @@ -534,57 +534,39 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, */ if ((card->csd.mmca_vsn >= CSD_SPEC_VER_4) && (host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) { - static unsigned ext_csd_bits[][2] = { - { EXT_CSD_BUS_WIDTH_8, EXT_CSD_DDR_BUS_WIDTH_8 }, - { EXT_CSD_BUS_WIDTH_4, EXT_CSD_DDR_BUS_WIDTH_4 }, - { EXT_CSD_BUS_WIDTH_1, EXT_CSD_BUS_WIDTH_1 }, - }; - static unsigned bus_widths[] = { - MMC_BUS_WIDTH_8, - MMC_BUS_WIDTH_4, - MMC_BUS_WIDTH_1 - }; - unsigned idx, bus_width = 0; - - if (host->caps & MMC_CAP_8_BIT_DATA) - idx = 0; - else - idx = 1; - for (; idx < ARRAY_SIZE(bus_widths); idx++) { - bus_width = bus_widths[idx]; - if (bus_width == MMC_BUS_WIDTH_1) - ddr = 0; /* no DDR for 1-bit width */ - err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_BUS_WIDTH, - ext_csd_bits[idx][0]); - if (!err) { - mmc_set_bus_width_ddr(card->host, - bus_width, MMC_SDR_MODE); - /* - * If controller can't handle bus width test, - * use the highest bus width to maintain - * compatibility with previous MMC behavior. - */ - if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST)) - break; - err = mmc_bus_test(card, bus_width); - if (!err) - break; - } + unsigned ext_csd_bit, bus_width; + + if (host->caps & MMC_CAP_8_BIT_DATA) { + if (ddr) + ext_csd_bit = EXT_CSD_DDR_BUS_WIDTH_8; + else + ext_csd_bit = EXT_CSD_BUS_WIDTH_8; + bus_width = MMC_BUS_WIDTH_8; + } else { + if (ddr) + ext_csd_bit = EXT_CSD_DDR_BUS_WIDTH_4; + else + ext_csd_bit = EXT_CSD_BUS_WIDTH_4; + bus_width = MMC_BUS_WIDTH_4; } - if (!err && ddr) { - err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_BUS_WIDTH, - ext_csd_bits[idx][1]); - } + err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_BUS_WIDTH, ext_csd_bit); + + if (err && err != -EBADMSG) + goto free_card; + if (err) { printk(KERN_WARNING "%s: switch to bus width %d ddr %d " - "failed\n", mmc_hostname(card->host), - 1 << bus_width, ddr); - goto free_card; - } else if (ddr) { - mmc_card_set_ddr_mode(card); + "failed\n", mmc_hostname(card->host), + 1 << bus_width, ddr); + err = 0; + } else { + if (ddr) + mmc_card_set_ddr_mode(card); + else + ddr = MMC_SDR_MODE; + mmc_set_bus_width_ddr(card->host, bus_width, ddr); } } @@ -755,21 +737,14 @@ static void mmc_attach_bus_ops(struct mmc_host *host) /* * Starting point for MMC card init. */ -int mmc_attach_mmc(struct mmc_host *host) +int mmc_attach_mmc(struct mmc_host *host, u32 ocr) { int err; - u32 ocr; BUG_ON(!host); WARN_ON(!host->claimed); - err = mmc_send_op_cond(host, 0, &ocr); - if (err) - return err; - mmc_attach_bus_ops(host); - if (host->ocr_avail_mmc) - host->ocr_avail = host->ocr_avail_mmc; /* * We need to get OCR a different way for SPI. @@ -809,20 +784,20 @@ int mmc_attach_mmc(struct mmc_host *host) goto err; mmc_release_host(host); + err = mmc_add_card(host->card); - mmc_claim_host(host); if (err) goto remove_card; return 0; remove_card: - mmc_release_host(host); mmc_remove_card(host->card); - mmc_claim_host(host); host->card = NULL; + mmc_claim_host(host); err: mmc_detach_bus(host); + mmc_release_host(host); printk(KERN_ERR "%s: error %d whilst initialising MMC card\n", mmc_hostname(host), err); diff --git a/trunk/drivers/mmc/core/mmc_ops.c b/trunk/drivers/mmc/core/mmc_ops.c index 60842f878ded..326447c9ede8 100644 --- a/trunk/drivers/mmc/core/mmc_ops.c +++ b/trunk/drivers/mmc/core/mmc_ops.c @@ -462,104 +462,3 @@ int mmc_send_status(struct mmc_card *card, u32 *status) return 0; } -static int -mmc_send_bus_test(struct mmc_card *card, struct mmc_host *host, u8 opcode, - u8 len) -{ - struct mmc_request mrq; - struct mmc_command cmd; - struct mmc_data data; - struct scatterlist sg; - u8 *data_buf; - u8 *test_buf; - int i, err; - static u8 testdata_8bit[8] = { 0x55, 0xaa, 0, 0, 0, 0, 0, 0 }; - static u8 testdata_4bit[4] = { 0x5a, 0, 0, 0 }; - - /* dma onto stack is unsafe/nonportable, but callers to this - * routine normally provide temporary on-stack buffers ... - */ - data_buf = kmalloc(len, GFP_KERNEL); - if (!data_buf) - return -ENOMEM; - - if (len == 8) - test_buf = testdata_8bit; - else if (len == 4) - test_buf = testdata_4bit; - else { - printk(KERN_ERR "%s: Invalid bus_width %d\n", - mmc_hostname(host), len); - kfree(data_buf); - return -EINVAL; - } - - if (opcode == MMC_BUS_TEST_W) - memcpy(data_buf, test_buf, len); - - memset(&mrq, 0, sizeof(struct mmc_request)); - memset(&cmd, 0, sizeof(struct mmc_command)); - memset(&data, 0, sizeof(struct mmc_data)); - - mrq.cmd = &cmd; - mrq.data = &data; - cmd.opcode = opcode; - cmd.arg = 0; - - /* NOTE HACK: the MMC_RSP_SPI_R1 is always correct here, but we - * rely on callers to never use this with "native" calls for reading - * CSD or CID. Native versions of those commands use the R2 type, - * not R1 plus a data block. - */ - cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC; - - data.blksz = len; - data.blocks = 1; - if (opcode == MMC_BUS_TEST_R) - data.flags = MMC_DATA_READ; - else - data.flags = MMC_DATA_WRITE; - - data.sg = &sg; - data.sg_len = 1; - sg_init_one(&sg, data_buf, len); - mmc_wait_for_req(host, &mrq); - err = 0; - if (opcode == MMC_BUS_TEST_R) { - for (i = 0; i < len / 4; i++) - if ((test_buf[i] ^ data_buf[i]) != 0xff) { - err = -EIO; - break; - } - } - kfree(data_buf); - - if (cmd.error) - return cmd.error; - if (data.error) - return data.error; - - return err; -} - -int mmc_bus_test(struct mmc_card *card, u8 bus_width) -{ - int err, width; - - if (bus_width == MMC_BUS_WIDTH_8) - width = 8; - else if (bus_width == MMC_BUS_WIDTH_4) - width = 4; - else if (bus_width == MMC_BUS_WIDTH_1) - return 0; /* no need for test */ - else - return -EINVAL; - - /* - * Ignore errors from BUS_TEST_W. BUS_TEST_R will fail if there - * is a problem. This improves chances that the test will work. - */ - mmc_send_bus_test(card, card->host, MMC_BUS_TEST_W, width); - err = mmc_send_bus_test(card, card->host, MMC_BUS_TEST_R, width); - return err; -} diff --git a/trunk/drivers/mmc/core/mmc_ops.h b/trunk/drivers/mmc/core/mmc_ops.h index e6d44b8a18db..653eb8e84178 100644 --- a/trunk/drivers/mmc/core/mmc_ops.h +++ b/trunk/drivers/mmc/core/mmc_ops.h @@ -26,7 +26,6 @@ int mmc_send_cid(struct mmc_host *host, u32 *cid); int mmc_spi_read_ocr(struct mmc_host *host, int highcap, u32 *ocrp); int mmc_spi_set_crc(struct mmc_host *host, int use_crc); int mmc_card_sleepawake(struct mmc_host *host, int sleep); -int mmc_bus_test(struct mmc_card *card, u8 bus_width); #endif diff --git a/trunk/drivers/mmc/core/sd.c b/trunk/drivers/mmc/core/sd.c index d18c32bca99b..49da4dffd28e 100644 --- a/trunk/drivers/mmc/core/sd.c +++ b/trunk/drivers/mmc/core/sd.c @@ -764,21 +764,14 @@ static void mmc_sd_attach_bus_ops(struct mmc_host *host) /* * Starting point for SD card init. */ -int mmc_attach_sd(struct mmc_host *host) +int mmc_attach_sd(struct mmc_host *host, u32 ocr) { int err; - u32 ocr; BUG_ON(!host); WARN_ON(!host->claimed); - err = mmc_send_app_op_cond(host, 0, &ocr); - if (err) - return err; - mmc_sd_attach_bus_ops(host); - if (host->ocr_avail_sd) - host->ocr_avail = host->ocr_avail_sd; /* * We need to get OCR a different way for SPI. @@ -802,8 +795,7 @@ int mmc_attach_sd(struct mmc_host *host) ocr &= ~0x7F; } - if ((ocr & MMC_VDD_165_195) && - !(host->ocr_avail_sd & MMC_VDD_165_195)) { + if (ocr & MMC_VDD_165_195) { printk(KERN_WARNING "%s: SD card claims to support the " "incompletely defined 'low voltage range'. This " "will be ignored.\n", mmc_hostname(host)); @@ -828,20 +820,20 @@ int mmc_attach_sd(struct mmc_host *host) goto err; mmc_release_host(host); + err = mmc_add_card(host->card); - mmc_claim_host(host); if (err) goto remove_card; return 0; remove_card: - mmc_release_host(host); mmc_remove_card(host->card); host->card = NULL; mmc_claim_host(host); err: mmc_detach_bus(host); + mmc_release_host(host); printk(KERN_ERR "%s: error %d whilst initialising SD card\n", mmc_hostname(host), err); diff --git a/trunk/drivers/mmc/core/sdio.c b/trunk/drivers/mmc/core/sdio.c index 5c4a54d9b6a4..efef5f94ac42 100644 --- a/trunk/drivers/mmc/core/sdio.c +++ b/trunk/drivers/mmc/core/sdio.c @@ -627,27 +627,15 @@ static int mmc_sdio_suspend(struct mmc_host *host) static int mmc_sdio_resume(struct mmc_host *host) { - int i, err = 0; + int i, err; BUG_ON(!host); BUG_ON(!host->card); /* Basic card reinitialization. */ mmc_claim_host(host); - - /* No need to reinitialize powered-resumed nonremovable cards */ - if (mmc_card_is_removable(host) || !mmc_card_is_powered_resumed(host)) - err = mmc_sdio_init_card(host, host->ocr, host->card, + err = mmc_sdio_init_card(host, host->ocr, host->card, (host->pm_flags & MMC_PM_KEEP_POWER)); - else if (mmc_card_is_powered_resumed(host)) { - /* We may have switched to 1-bit mode during suspend */ - err = sdio_enable_4bit_bus(host->card); - if (err > 0) { - mmc_set_bus_width(host, MMC_BUS_WIDTH_4); - err = 0; - } - } - if (!err && host->sdio_irqs) mmc_signal_sdio_irq(host); mmc_release_host(host); @@ -702,22 +690,16 @@ static const struct mmc_bus_ops mmc_sdio_ops = { /* * Starting point for SDIO card init. */ -int mmc_attach_sdio(struct mmc_host *host) +int mmc_attach_sdio(struct mmc_host *host, u32 ocr) { - int err, i, funcs; - u32 ocr; + int err; + int i, funcs; struct mmc_card *card; BUG_ON(!host); WARN_ON(!host->claimed); - err = mmc_send_io_op_cond(host, 0, &ocr); - if (err) - return err; - mmc_attach_bus(host, &mmc_sdio_ops); - if (host->ocr_avail_sdio) - host->ocr_avail = host->ocr_avail_sdio; /* * Sanity check the voltages that the card claims to @@ -787,12 +769,12 @@ int mmc_attach_sdio(struct mmc_host *host) pm_runtime_enable(&card->sdio_func[i]->dev); } + mmc_release_host(host); + /* * First add the card to the driver model... */ - mmc_release_host(host); err = mmc_add_card(host->card); - mmc_claim_host(host); if (err) goto remove_added; @@ -810,17 +792,15 @@ int mmc_attach_sdio(struct mmc_host *host) remove_added: /* Remove without lock if the device has been added. */ - mmc_release_host(host); mmc_sdio_remove(host); mmc_claim_host(host); remove: /* And with lock if it hasn't been added. */ - mmc_release_host(host); if (host->card) mmc_sdio_remove(host); - mmc_claim_host(host); err: mmc_detach_bus(host); + mmc_release_host(host); printk(KERN_ERR "%s: error %d whilst initialising SDIO card\n", mmc_hostname(host), err); diff --git a/trunk/drivers/mmc/core/sdio_bus.c b/trunk/drivers/mmc/core/sdio_bus.c index d29b9c36919a..203da443e339 100644 --- a/trunk/drivers/mmc/core/sdio_bus.c +++ b/trunk/drivers/mmc/core/sdio_bus.c @@ -197,12 +197,44 @@ static int sdio_bus_remove(struct device *dev) #ifdef CONFIG_PM_RUNTIME +static int sdio_bus_pm_prepare(struct device *dev) +{ + struct sdio_func *func = dev_to_sdio_func(dev); + + /* + * Resume an SDIO device which was suspended at run time at this + * point, in order to allow standard SDIO suspend/resume paths + * to keep working as usual. + * + * Ultimately, the SDIO driver itself will decide (in its + * suspend handler, or lack thereof) whether the card should be + * removed or kept, and if kept, at what power state. + * + * At this point, PM core have increased our use count, so it's + * safe to directly resume the device. After system is resumed + * again, PM core will drop back its runtime PM use count, and if + * needed device will be suspended again. + * + * The end result is guaranteed to be a power state that is + * coherent with the device's runtime PM use count. + * + * The return value of pm_runtime_resume is deliberately unchecked + * since there is little point in failing system suspend if a + * device can't be resumed. + */ + if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) + pm_runtime_resume(dev); + + return 0; +} + static const struct dev_pm_ops sdio_bus_pm_ops = { SET_RUNTIME_PM_OPS( pm_generic_runtime_suspend, pm_generic_runtime_resume, pm_generic_runtime_idle ) + .prepare = sdio_bus_pm_prepare, }; #define SDIO_PM_OPS_PTR (&sdio_bus_pm_ops) diff --git a/trunk/drivers/mmc/host/Kconfig b/trunk/drivers/mmc/host/Kconfig index c22a4c039988..e960a9300eb2 100644 --- a/trunk/drivers/mmc/host/Kconfig +++ b/trunk/drivers/mmc/host/Kconfig @@ -142,27 +142,6 @@ config MMC_SDHCI_ESDHC_IMX If unsure, say N. -config MMC_SDHCI_DOVE - bool "SDHCI support on Marvell's Dove SoC" - depends on ARCH_DOVE - depends on MMC_SDHCI_PLTFM - select MMC_SDHCI_IO_ACCESSORS - help - This selects the Secure Digital Host Controller Interface in - Marvell's Dove SoC. - - If unsure, say N. - -config MMC_SDHCI_TEGRA - tristate "SDHCI platform support for the Tegra SD/MMC Controller" - depends on MMC_SDHCI_PLTFM && ARCH_TEGRA - select MMC_SDHCI_IO_ACCESSORS - help - This selects the Tegra SD/MMC controller. If you have a Tegra - platform with SD or MMC devices, say Y or M here. - - If unsure, say N. - config MMC_SDHCI_S3C tristate "SDHCI support on Samsung S3C SoC" depends on MMC_SDHCI && PLAT_SAMSUNG @@ -481,22 +460,6 @@ config SDH_BFIN_MISSING_CMD_PULLUP_WORKAROUND help If you say yes here SD-Cards may work on the EZkit. -config MMC_DW - tristate "Synopsys DesignWare Memory Card Interface" - depends on ARM - help - This selects support for the Synopsys DesignWare Mobile Storage IP - block, this provides host support for SD and MMC interfaces, in both - PIO and external DMA modes. - -config MMC_DW_IDMAC - bool "Internal DMAC interface" - depends on MMC_DW - help - This selects support for the internal DMAC block within the Synopsys - Designware Mobile Storage IP block. This disables the external DMA - interface. - config MMC_SH_MMCIF tristate "SuperH Internal MMCIF support" depends on MMC_BLOCK && (SUPERH || ARCH_SHMOBILE) diff --git a/trunk/drivers/mmc/host/Makefile b/trunk/drivers/mmc/host/Makefile index e834fb223e9a..7b645ff43b30 100644 --- a/trunk/drivers/mmc/host/Makefile +++ b/trunk/drivers/mmc/host/Makefile @@ -31,7 +31,6 @@ obj-$(CONFIG_MMC_TMIO) += tmio_mmc.o obj-$(CONFIG_MMC_CB710) += cb710-mmc.o obj-$(CONFIG_MMC_VIA_SDMMC) += via-sdmmc.o obj-$(CONFIG_SDH_BFIN) += bfin_sdh.o -obj-$(CONFIG_MMC_DW) += dw_mmc.o obj-$(CONFIG_MMC_SH_MMCIF) += sh_mmcif.o obj-$(CONFIG_MMC_JZ4740) += jz4740_mmc.o obj-$(CONFIG_MMC_USHC) += ushc.o @@ -40,8 +39,6 @@ obj-$(CONFIG_MMC_SDHCI_PLTFM) += sdhci-platform.o sdhci-platform-y := sdhci-pltfm.o sdhci-platform-$(CONFIG_MMC_SDHCI_CNS3XXX) += sdhci-cns3xxx.o sdhci-platform-$(CONFIG_MMC_SDHCI_ESDHC_IMX) += sdhci-esdhc-imx.o -sdhci-platform-$(CONFIG_MMC_SDHCI_DOVE) += sdhci-dove.o -sdhci-platform-$(CONFIG_MMC_SDHCI_TEGRA) += sdhci-tegra.o obj-$(CONFIG_MMC_SDHCI_OF) += sdhci-of.o sdhci-of-y := sdhci-of-core.o diff --git a/trunk/drivers/mmc/host/davinci_mmc.c b/trunk/drivers/mmc/host/davinci_mmc.c index 0076c7448fe6..e15547cf701f 100644 --- a/trunk/drivers/mmc/host/davinci_mmc.c +++ b/trunk/drivers/mmc/host/davinci_mmc.c @@ -66,8 +66,8 @@ #define DAVINCI_MMCBLNC 0x60 #define DAVINCI_SDIOCTL 0x64 #define DAVINCI_SDIOST0 0x68 -#define DAVINCI_SDIOIEN 0x6C -#define DAVINCI_SDIOIST 0x70 +#define DAVINCI_SDIOEN 0x6C +#define DAVINCI_SDIOST 0x70 #define DAVINCI_MMCFIFOCTL 0x74 /* FIFO Control Register */ /* DAVINCI_MMCCTL definitions */ @@ -131,14 +131,6 @@ #define MMCFIFOCTL_ACCWD_2 (2 << 3) /* access width of 2 bytes */ #define MMCFIFOCTL_ACCWD_1 (3 << 3) /* access width of 1 byte */ -/* DAVINCI_SDIOST0 definitions */ -#define SDIOST0_DAT1_HI BIT(0) - -/* DAVINCI_SDIOIEN definitions */ -#define SDIOIEN_IOINTEN BIT(0) - -/* DAVINCI_SDIOIST definitions */ -#define SDIOIST_IOINT BIT(0) /* MMCSD Init clock in Hz in opendrain mode */ #define MMCSD_INIT_CLOCK 200000 @@ -172,7 +164,7 @@ struct mmc_davinci_host { unsigned int mmc_input_clk; void __iomem *base; struct resource *mem_res; - int mmc_irq, sdio_irq; + int irq; unsigned char bus_mode; #define DAVINCI_MMC_DATADIR_NONE 0 @@ -192,7 +184,6 @@ struct mmc_davinci_host { u32 rxdma, txdma; bool use_dma; bool do_dma; - bool sdio_int; /* Scatterlist DMA uses one or more parameter RAM entries: * the main one (associated with rxdma or txdma) plus zero or @@ -489,7 +480,7 @@ static void mmc_davinci_send_dma_request(struct mmc_davinci_host *host, struct scatterlist *sg; unsigned sg_len; unsigned bytes_left = host->bytes_left; - const unsigned shift = ffs(rw_threshold) - 1; + const unsigned shift = ffs(rw_threshold) - 1;; if (host->data_dir == DAVINCI_MMC_DATADIR_WRITE) { template = &host->tx_template; @@ -875,19 +866,6 @@ mmc_davinci_xfer_done(struct mmc_davinci_host *host, struct mmc_data *data) { host->data = NULL; - if (host->mmc->caps & MMC_CAP_SDIO_IRQ) { - /* - * SDIO Interrupt Detection work-around as suggested by - * Davinci Errata (TMS320DM355 Silicon Revision 1.1 Errata - * 2.1.6): Signal SDIO interrupt only if it is enabled by core - */ - if (host->sdio_int && !(readl(host->base + DAVINCI_SDIOST0) & - SDIOST0_DAT1_HI)) { - writel(SDIOIST_IOINT, host->base + DAVINCI_SDIOIST); - mmc_signal_sdio_irq(host->mmc); - } - } - if (host->do_dma) { davinci_abort_dma(host); @@ -954,21 +932,6 @@ davinci_abort_data(struct mmc_davinci_host *host, struct mmc_data *data) mmc_davinci_reset_ctrl(host, 0); } -static irqreturn_t mmc_davinci_sdio_irq(int irq, void *dev_id) -{ - struct mmc_davinci_host *host = dev_id; - unsigned int status; - - status = readl(host->base + DAVINCI_SDIOIST); - if (status & SDIOIST_IOINT) { - dev_dbg(mmc_dev(host->mmc), - "SDIO interrupt status %x\n", status); - writel(status | SDIOIST_IOINT, host->base + DAVINCI_SDIOIST); - mmc_signal_sdio_irq(host->mmc); - } - return IRQ_HANDLED; -} - static irqreturn_t mmc_davinci_irq(int irq, void *dev_id) { struct mmc_davinci_host *host = (struct mmc_davinci_host *)dev_id; @@ -1113,32 +1076,11 @@ static int mmc_davinci_get_ro(struct mmc_host *mmc) return config->get_ro(pdev->id); } -static void mmc_davinci_enable_sdio_irq(struct mmc_host *mmc, int enable) -{ - struct mmc_davinci_host *host = mmc_priv(mmc); - - if (enable) { - if (!(readl(host->base + DAVINCI_SDIOST0) & SDIOST0_DAT1_HI)) { - writel(SDIOIST_IOINT, host->base + DAVINCI_SDIOIST); - mmc_signal_sdio_irq(host->mmc); - } else { - host->sdio_int = true; - writel(readl(host->base + DAVINCI_SDIOIEN) | - SDIOIEN_IOINTEN, host->base + DAVINCI_SDIOIEN); - } - } else { - host->sdio_int = false; - writel(readl(host->base + DAVINCI_SDIOIEN) & ~SDIOIEN_IOINTEN, - host->base + DAVINCI_SDIOIEN); - } -} - static struct mmc_host_ops mmc_davinci_ops = { .request = mmc_davinci_request, .set_ios = mmc_davinci_set_ios, .get_cd = mmc_davinci_get_cd, .get_ro = mmc_davinci_get_ro, - .enable_sdio_irq = mmc_davinci_enable_sdio_irq, }; /*----------------------------------------------------------------------*/ @@ -1267,8 +1209,7 @@ static int __init davinci_mmcsd_probe(struct platform_device *pdev) host->nr_sg = MAX_NR_SG; host->use_dma = use_dma; - host->mmc_irq = irq; - host->sdio_irq = platform_get_irq(pdev, 1); + host->irq = irq; if (host->use_dma && davinci_acquire_dma_channels(host) != 0) host->use_dma = 0; @@ -1329,13 +1270,6 @@ static int __init davinci_mmcsd_probe(struct platform_device *pdev) if (ret) goto out; - if (host->sdio_irq >= 0) { - ret = request_irq(host->sdio_irq, mmc_davinci_sdio_irq, 0, - mmc_hostname(mmc), host); - if (!ret) - mmc->caps |= MMC_CAP_SDIO_IRQ; - } - rename_region(mem, mmc_hostname(mmc)); dev_info(mmc_dev(host->mmc), "Using %s, %d-bit mode\n", @@ -1379,9 +1313,7 @@ static int __exit davinci_mmcsd_remove(struct platform_device *pdev) mmc_davinci_cpufreq_deregister(host); mmc_remove_host(host->mmc); - free_irq(host->mmc_irq, host); - if (host->mmc->caps & MMC_CAP_SDIO_IRQ) - free_irq(host->sdio_irq, host); + free_irq(host->irq, host); davinci_release_dma_channels(host); diff --git a/trunk/drivers/mmc/host/dw_mmc.c b/trunk/drivers/mmc/host/dw_mmc.c deleted file mode 100644 index 2fcc82577c1b..000000000000 --- a/trunk/drivers/mmc/host/dw_mmc.c +++ /dev/null @@ -1,1796 +0,0 @@ -/* - * Synopsys DesignWare Multimedia Card Interface driver - * (Based on NXP driver for lpc 31xx) - * - * Copyright (C) 2009 NXP Semiconductors - * Copyright (C) 2009, 2010 Imagination Technologies Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "dw_mmc.h" - -/* Common flag combinations */ -#define DW_MCI_DATA_ERROR_FLAGS (SDMMC_INT_DTO | SDMMC_INT_DCRC | \ - SDMMC_INT_HTO | SDMMC_INT_SBE | \ - SDMMC_INT_EBE) -#define DW_MCI_CMD_ERROR_FLAGS (SDMMC_INT_RTO | SDMMC_INT_RCRC | \ - SDMMC_INT_RESP_ERR) -#define DW_MCI_ERROR_FLAGS (DW_MCI_DATA_ERROR_FLAGS | \ - DW_MCI_CMD_ERROR_FLAGS | SDMMC_INT_HLE) -#define DW_MCI_SEND_STATUS 1 -#define DW_MCI_RECV_STATUS 2 -#define DW_MCI_DMA_THRESHOLD 16 - -#ifdef CONFIG_MMC_DW_IDMAC -struct idmac_desc { - u32 des0; /* Control Descriptor */ -#define IDMAC_DES0_DIC BIT(1) -#define IDMAC_DES0_LD BIT(2) -#define IDMAC_DES0_FD BIT(3) -#define IDMAC_DES0_CH BIT(4) -#define IDMAC_DES0_ER BIT(5) -#define IDMAC_DES0_CES BIT(30) -#define IDMAC_DES0_OWN BIT(31) - - u32 des1; /* Buffer sizes */ -#define IDMAC_SET_BUFFER1_SIZE(d, s) \ - ((d)->des1 = ((d)->des1 & 0x03ffc000) | ((s) & 0x3fff)) - - u32 des2; /* buffer 1 physical address */ - - u32 des3; /* buffer 2 physical address */ -}; -#endif /* CONFIG_MMC_DW_IDMAC */ - -/** - * struct dw_mci_slot - MMC slot state - * @mmc: The mmc_host representing this slot. - * @host: The MMC controller this slot is using. - * @ctype: Card type for this slot. - * @mrq: mmc_request currently being processed or waiting to be - * processed, or NULL when the slot is idle. - * @queue_node: List node for placing this node in the @queue list of - * &struct dw_mci. - * @clock: Clock rate configured by set_ios(). Protected by host->lock. - * @flags: Random state bits associated with the slot. - * @id: Number of this slot. - * @last_detect_state: Most recently observed card detect state. - */ -struct dw_mci_slot { - struct mmc_host *mmc; - struct dw_mci *host; - - u32 ctype; - - struct mmc_request *mrq; - struct list_head queue_node; - - unsigned int clock; - unsigned long flags; -#define DW_MMC_CARD_PRESENT 0 -#define DW_MMC_CARD_NEED_INIT 1 - int id; - int last_detect_state; -}; - -#if defined(CONFIG_DEBUG_FS) -static int dw_mci_req_show(struct seq_file *s, void *v) -{ - struct dw_mci_slot *slot = s->private; - struct mmc_request *mrq; - struct mmc_command *cmd; - struct mmc_command *stop; - struct mmc_data *data; - - /* Make sure we get a consistent snapshot */ - spin_lock_bh(&slot->host->lock); - mrq = slot->mrq; - - if (mrq) { - cmd = mrq->cmd; - data = mrq->data; - stop = mrq->stop; - - if (cmd) - seq_printf(s, - "CMD%u(0x%x) flg %x rsp %x %x %x %x err %d\n", - cmd->opcode, cmd->arg, cmd->flags, - cmd->resp[0], cmd->resp[1], cmd->resp[2], - cmd->resp[2], cmd->error); - if (data) - seq_printf(s, "DATA %u / %u * %u flg %x err %d\n", - data->bytes_xfered, data->blocks, - data->blksz, data->flags, data->error); - if (stop) - seq_printf(s, - "CMD%u(0x%x) flg %x rsp %x %x %x %x err %d\n", - stop->opcode, stop->arg, stop->flags, - stop->resp[0], stop->resp[1], stop->resp[2], - stop->resp[2], stop->error); - } - - spin_unlock_bh(&slot->host->lock); - - return 0; -} - -static int dw_mci_req_open(struct inode *inode, struct file *file) -{ - return single_open(file, dw_mci_req_show, inode->i_private); -} - -static const struct file_operations dw_mci_req_fops = { - .owner = THIS_MODULE, - .open = dw_mci_req_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int dw_mci_regs_show(struct seq_file *s, void *v) -{ - seq_printf(s, "STATUS:\t0x%08x\n", SDMMC_STATUS); - seq_printf(s, "RINTSTS:\t0x%08x\n", SDMMC_RINTSTS); - seq_printf(s, "CMD:\t0x%08x\n", SDMMC_CMD); - seq_printf(s, "CTRL:\t0x%08x\n", SDMMC_CTRL); - seq_printf(s, "INTMASK:\t0x%08x\n", SDMMC_INTMASK); - seq_printf(s, "CLKENA:\t0x%08x\n", SDMMC_CLKENA); - - return 0; -} - -static int dw_mci_regs_open(struct inode *inode, struct file *file) -{ - return single_open(file, dw_mci_regs_show, inode->i_private); -} - -static const struct file_operations dw_mci_regs_fops = { - .owner = THIS_MODULE, - .open = dw_mci_regs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static void dw_mci_init_debugfs(struct dw_mci_slot *slot) -{ - struct mmc_host *mmc = slot->mmc; - struct dw_mci *host = slot->host; - struct dentry *root; - struct dentry *node; - - root = mmc->debugfs_root; - if (!root) - return; - - node = debugfs_create_file("regs", S_IRUSR, root, host, - &dw_mci_regs_fops); - if (!node) - goto err; - - node = debugfs_create_file("req", S_IRUSR, root, slot, - &dw_mci_req_fops); - if (!node) - goto err; - - node = debugfs_create_u32("state", S_IRUSR, root, (u32 *)&host->state); - if (!node) - goto err; - - node = debugfs_create_x32("pending_events", S_IRUSR, root, - (u32 *)&host->pending_events); - if (!node) - goto err; - - node = debugfs_create_x32("completed_events", S_IRUSR, root, - (u32 *)&host->completed_events); - if (!node) - goto err; - - return; - -err: - dev_err(&mmc->class_dev, "failed to initialize debugfs for slot\n"); -} -#endif /* defined(CONFIG_DEBUG_FS) */ - -static void dw_mci_set_timeout(struct dw_mci *host) -{ - /* timeout (maximum) */ - mci_writel(host, TMOUT, 0xffffffff); -} - -static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd) -{ - struct mmc_data *data; - u32 cmdr; - cmd->error = -EINPROGRESS; - - cmdr = cmd->opcode; - - if (cmdr == MMC_STOP_TRANSMISSION) - cmdr |= SDMMC_CMD_STOP; - else - cmdr |= SDMMC_CMD_PRV_DAT_WAIT; - - if (cmd->flags & MMC_RSP_PRESENT) { - /* We expect a response, so set this bit */ - cmdr |= SDMMC_CMD_RESP_EXP; - if (cmd->flags & MMC_RSP_136) - cmdr |= SDMMC_CMD_RESP_LONG; - } - - if (cmd->flags & MMC_RSP_CRC) - cmdr |= SDMMC_CMD_RESP_CRC; - - data = cmd->data; - if (data) { - cmdr |= SDMMC_CMD_DAT_EXP; - if (data->flags & MMC_DATA_STREAM) - cmdr |= SDMMC_CMD_STRM_MODE; - if (data->flags & MMC_DATA_WRITE) - cmdr |= SDMMC_CMD_DAT_WR; - } - - return cmdr; -} - -static void dw_mci_start_command(struct dw_mci *host, - struct mmc_command *cmd, u32 cmd_flags) -{ - host->cmd = cmd; - dev_vdbg(&host->pdev->dev, - "start command: ARGR=0x%08x CMDR=0x%08x\n", - cmd->arg, cmd_flags); - - mci_writel(host, CMDARG, cmd->arg); - wmb(); - - mci_writel(host, CMD, cmd_flags | SDMMC_CMD_START); -} - -static void send_stop_cmd(struct dw_mci *host, struct mmc_data *data) -{ - dw_mci_start_command(host, data->stop, host->stop_cmdr); -} - -/* DMA interface functions */ -static void dw_mci_stop_dma(struct dw_mci *host) -{ - if (host->use_dma) { - host->dma_ops->stop(host); - host->dma_ops->cleanup(host); - } else { - /* Data transfer was stopped by the interrupt handler */ - set_bit(EVENT_XFER_COMPLETE, &host->pending_events); - } -} - -#ifdef CONFIG_MMC_DW_IDMAC -static void dw_mci_dma_cleanup(struct dw_mci *host) -{ - struct mmc_data *data = host->data; - - if (data) - dma_unmap_sg(&host->pdev->dev, data->sg, data->sg_len, - ((data->flags & MMC_DATA_WRITE) - ? DMA_TO_DEVICE : DMA_FROM_DEVICE)); -} - -static void dw_mci_idmac_stop_dma(struct dw_mci *host) -{ - u32 temp; - - /* Disable and reset the IDMAC interface */ - temp = mci_readl(host, CTRL); - temp &= ~SDMMC_CTRL_USE_IDMAC; - temp |= SDMMC_CTRL_DMA_RESET; - mci_writel(host, CTRL, temp); - - /* Stop the IDMAC running */ - temp = mci_readl(host, BMOD); - temp &= ~SDMMC_IDMAC_ENABLE; - mci_writel(host, BMOD, temp); -} - -static void dw_mci_idmac_complete_dma(struct dw_mci *host) -{ - struct mmc_data *data = host->data; - - dev_vdbg(&host->pdev->dev, "DMA complete\n"); - - host->dma_ops->cleanup(host); - - /* - * If the card was removed, data will be NULL. No point in trying to - * send the stop command or waiting for NBUSY in this case. - */ - if (data) { - set_bit(EVENT_XFER_COMPLETE, &host->pending_events); - tasklet_schedule(&host->tasklet); - } -} - -static void dw_mci_translate_sglist(struct dw_mci *host, struct mmc_data *data, - unsigned int sg_len) -{ - int i; - struct idmac_desc *desc = host->sg_cpu; - - for (i = 0; i < sg_len; i++, desc++) { - unsigned int length = sg_dma_len(&data->sg[i]); - u32 mem_addr = sg_dma_address(&data->sg[i]); - - /* Set the OWN bit and disable interrupts for this descriptor */ - desc->des0 = IDMAC_DES0_OWN | IDMAC_DES0_DIC | IDMAC_DES0_CH; - - /* Buffer length */ - IDMAC_SET_BUFFER1_SIZE(desc, length); - - /* Physical address to DMA to/from */ - desc->des2 = mem_addr; - } - - /* Set first descriptor */ - desc = host->sg_cpu; - desc->des0 |= IDMAC_DES0_FD; - - /* Set last descriptor */ - desc = host->sg_cpu + (i - 1) * sizeof(struct idmac_desc); - desc->des0 &= ~(IDMAC_DES0_CH | IDMAC_DES0_DIC); - desc->des0 |= IDMAC_DES0_LD; - - wmb(); -} - -static void dw_mci_idmac_start_dma(struct dw_mci *host, unsigned int sg_len) -{ - u32 temp; - - dw_mci_translate_sglist(host, host->data, sg_len); - - /* Select IDMAC interface */ - temp = mci_readl(host, CTRL); - temp |= SDMMC_CTRL_USE_IDMAC; - mci_writel(host, CTRL, temp); - - wmb(); - - /* Enable the IDMAC */ - temp = mci_readl(host, BMOD); - temp |= SDMMC_IDMAC_ENABLE; - mci_writel(host, BMOD, temp); - - /* Start it running */ - mci_writel(host, PLDMND, 1); -} - -static int dw_mci_idmac_init(struct dw_mci *host) -{ - struct idmac_desc *p; - int i; - - /* Number of descriptors in the ring buffer */ - host->ring_size = PAGE_SIZE / sizeof(struct idmac_desc); - - /* Forward link the descriptor list */ - for (i = 0, p = host->sg_cpu; i < host->ring_size - 1; i++, p++) - p->des3 = host->sg_dma + (sizeof(struct idmac_desc) * (i + 1)); - - /* Set the last descriptor as the end-of-ring descriptor */ - p->des3 = host->sg_dma; - p->des0 = IDMAC_DES0_ER; - - /* Mask out interrupts - get Tx & Rx complete only */ - mci_writel(host, IDINTEN, SDMMC_IDMAC_INT_NI | SDMMC_IDMAC_INT_RI | - SDMMC_IDMAC_INT_TI); - - /* Set the descriptor base address */ - mci_writel(host, DBADDR, host->sg_dma); - return 0; -} - -static struct dw_mci_dma_ops dw_mci_idmac_ops = { - .init = dw_mci_idmac_init, - .start = dw_mci_idmac_start_dma, - .stop = dw_mci_idmac_stop_dma, - .complete = dw_mci_idmac_complete_dma, - .cleanup = dw_mci_dma_cleanup, -}; -#endif /* CONFIG_MMC_DW_IDMAC */ - -static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data) -{ - struct scatterlist *sg; - unsigned int i, direction, sg_len; - u32 temp; - - /* If we don't have a channel, we can't do DMA */ - if (!host->use_dma) - return -ENODEV; - - /* - * We don't do DMA on "complex" transfers, i.e. with - * non-word-aligned buffers or lengths. Also, we don't bother - * with all the DMA setup overhead for short transfers. - */ - if (data->blocks * data->blksz < DW_MCI_DMA_THRESHOLD) - return -EINVAL; - if (data->blksz & 3) - return -EINVAL; - - for_each_sg(data->sg, sg, data->sg_len, i) { - if (sg->offset & 3 || sg->length & 3) - return -EINVAL; - } - - if (data->flags & MMC_DATA_READ) - direction = DMA_FROM_DEVICE; - else - direction = DMA_TO_DEVICE; - - sg_len = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len, - direction); - - dev_vdbg(&host->pdev->dev, - "sd sg_cpu: %#lx sg_dma: %#lx sg_len: %d\n", - (unsigned long)host->sg_cpu, (unsigned long)host->sg_dma, - sg_len); - - /* Enable the DMA interface */ - temp = mci_readl(host, CTRL); - temp |= SDMMC_CTRL_DMA_ENABLE; - mci_writel(host, CTRL, temp); - - /* Disable RX/TX IRQs, let DMA handle it */ - temp = mci_readl(host, INTMASK); - temp &= ~(SDMMC_INT_RXDR | SDMMC_INT_TXDR); - mci_writel(host, INTMASK, temp); - - host->dma_ops->start(host, sg_len); - - return 0; -} - -static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data) -{ - u32 temp; - - data->error = -EINPROGRESS; - - WARN_ON(host->data); - host->sg = NULL; - host->data = data; - - if (dw_mci_submit_data_dma(host, data)) { - host->sg = data->sg; - host->pio_offset = 0; - if (data->flags & MMC_DATA_READ) - host->dir_status = DW_MCI_RECV_STATUS; - else - host->dir_status = DW_MCI_SEND_STATUS; - - temp = mci_readl(host, INTMASK); - temp |= SDMMC_INT_TXDR | SDMMC_INT_RXDR; - mci_writel(host, INTMASK, temp); - - temp = mci_readl(host, CTRL); - temp &= ~SDMMC_CTRL_DMA_ENABLE; - mci_writel(host, CTRL, temp); - } -} - -static void mci_send_cmd(struct dw_mci_slot *slot, u32 cmd, u32 arg) -{ - struct dw_mci *host = slot->host; - unsigned long timeout = jiffies + msecs_to_jiffies(500); - unsigned int cmd_status = 0; - - mci_writel(host, CMDARG, arg); - wmb(); - mci_writel(host, CMD, SDMMC_CMD_START | cmd); - - while (time_before(jiffies, timeout)) { - cmd_status = mci_readl(host, CMD); - if (!(cmd_status & SDMMC_CMD_START)) - return; - } - dev_err(&slot->mmc->class_dev, - "Timeout sending command (cmd %#x arg %#x status %#x)\n", - cmd, arg, cmd_status); -} - -static void dw_mci_setup_bus(struct dw_mci_slot *slot) -{ - struct dw_mci *host = slot->host; - u32 div; - - if (slot->clock != host->current_speed) { - if (host->bus_hz % slot->clock) - /* - * move the + 1 after the divide to prevent - * over-clocking the card. - */ - div = ((host->bus_hz / slot->clock) >> 1) + 1; - else - div = (host->bus_hz / slot->clock) >> 1; - - dev_info(&slot->mmc->class_dev, - "Bus speed (slot %d) = %dHz (slot req %dHz, actual %dHZ" - " div = %d)\n", slot->id, host->bus_hz, slot->clock, - div ? ((host->bus_hz / div) >> 1) : host->bus_hz, div); - - /* disable clock */ - mci_writel(host, CLKENA, 0); - mci_writel(host, CLKSRC, 0); - - /* inform CIU */ - mci_send_cmd(slot, - SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0); - - /* set clock to desired speed */ - mci_writel(host, CLKDIV, div); - - /* inform CIU */ - mci_send_cmd(slot, - SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0); - - /* enable clock */ - mci_writel(host, CLKENA, SDMMC_CLKEN_ENABLE); - - /* inform CIU */ - mci_send_cmd(slot, - SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0); - - host->current_speed = slot->clock; - } - - /* Set the current slot bus width */ - mci_writel(host, CTYPE, slot->ctype); -} - -static void dw_mci_start_request(struct dw_mci *host, - struct dw_mci_slot *slot) -{ - struct mmc_request *mrq; - struct mmc_command *cmd; - struct mmc_data *data; - u32 cmdflags; - - mrq = slot->mrq; - if (host->pdata->select_slot) - host->pdata->select_slot(slot->id); - - /* Slot specific timing and width adjustment */ - dw_mci_setup_bus(slot); - - host->cur_slot = slot; - host->mrq = mrq; - - host->pending_events = 0; - host->completed_events = 0; - host->data_status = 0; - - data = mrq->data; - if (data) { - dw_mci_set_timeout(host); - mci_writel(host, BYTCNT, data->blksz*data->blocks); - mci_writel(host, BLKSIZ, data->blksz); - } - - cmd = mrq->cmd; - cmdflags = dw_mci_prepare_command(slot->mmc, cmd); - - /* this is the first command, send the initialization clock */ - if (test_and_clear_bit(DW_MMC_CARD_NEED_INIT, &slot->flags)) - cmdflags |= SDMMC_CMD_INIT; - - if (data) { - dw_mci_submit_data(host, data); - wmb(); - } - - dw_mci_start_command(host, cmd, cmdflags); - - if (mrq->stop) - host->stop_cmdr = dw_mci_prepare_command(slot->mmc, mrq->stop); -} - -static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot, - struct mmc_request *mrq) -{ - dev_vdbg(&slot->mmc->class_dev, "queue request: state=%d\n", - host->state); - - spin_lock_bh(&host->lock); - slot->mrq = mrq; - - if (host->state == STATE_IDLE) { - host->state = STATE_SENDING_CMD; - dw_mci_start_request(host, slot); - } else { - list_add_tail(&slot->queue_node, &host->queue); - } - - spin_unlock_bh(&host->lock); -} - -static void dw_mci_request(struct mmc_host *mmc, struct mmc_request *mrq) -{ - struct dw_mci_slot *slot = mmc_priv(mmc); - struct dw_mci *host = slot->host; - - WARN_ON(slot->mrq); - - if (!test_bit(DW_MMC_CARD_PRESENT, &slot->flags)) { - mrq->cmd->error = -ENOMEDIUM; - mmc_request_done(mmc, mrq); - return; - } - - /* We don't support multiple blocks of weird lengths. */ - dw_mci_queue_request(host, slot, mrq); -} - -static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) -{ - struct dw_mci_slot *slot = mmc_priv(mmc); - - /* set default 1 bit mode */ - slot->ctype = SDMMC_CTYPE_1BIT; - - switch (ios->bus_width) { - case MMC_BUS_WIDTH_1: - slot->ctype = SDMMC_CTYPE_1BIT; - break; - case MMC_BUS_WIDTH_4: - slot->ctype = SDMMC_CTYPE_4BIT; - break; - } - - if (ios->clock) { - /* - * Use mirror of ios->clock to prevent race with mmc - * core ios update when finding the minimum. - */ - slot->clock = ios->clock; - } - - switch (ios->power_mode) { - case MMC_POWER_UP: - set_bit(DW_MMC_CARD_NEED_INIT, &slot->flags); - break; - default: - break; - } -} - -static int dw_mci_get_ro(struct mmc_host *mmc) -{ - int read_only; - struct dw_mci_slot *slot = mmc_priv(mmc); - struct dw_mci_board *brd = slot->host->pdata; - - /* Use platform get_ro function, else try on board write protect */ - if (brd->get_ro) - read_only = brd->get_ro(slot->id); - else - read_only = - mci_readl(slot->host, WRTPRT) & (1 << slot->id) ? 1 : 0; - - dev_dbg(&mmc->class_dev, "card is %s\n", - read_only ? "read-only" : "read-write"); - - return read_only; -} - -static int dw_mci_get_cd(struct mmc_host *mmc) -{ - int present; - struct dw_mci_slot *slot = mmc_priv(mmc); - struct dw_mci_board *brd = slot->host->pdata; - - /* Use platform get_cd function, else try onboard card detect */ - if (brd->get_cd) - present = !brd->get_cd(slot->id); - else - present = (mci_readl(slot->host, CDETECT) & (1 << slot->id)) - == 0 ? 1 : 0; - - if (present) - dev_dbg(&mmc->class_dev, "card is present\n"); - else - dev_dbg(&mmc->class_dev, "card is not present\n"); - - return present; -} - -static const struct mmc_host_ops dw_mci_ops = { - .request = dw_mci_request, - .set_ios = dw_mci_set_ios, - .get_ro = dw_mci_get_ro, - .get_cd = dw_mci_get_cd, -}; - -static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq) - __releases(&host->lock) - __acquires(&host->lock) -{ - struct dw_mci_slot *slot; - struct mmc_host *prev_mmc = host->cur_slot->mmc; - - WARN_ON(host->cmd || host->data); - - host->cur_slot->mrq = NULL; - host->mrq = NULL; - if (!list_empty(&host->queue)) { - slot = list_entry(host->queue.next, - struct dw_mci_slot, queue_node); - list_del(&slot->queue_node); - dev_vdbg(&host->pdev->dev, "list not empty: %s is next\n", - mmc_hostname(slot->mmc)); - host->state = STATE_SENDING_CMD; - dw_mci_start_request(host, slot); - } else { - dev_vdbg(&host->pdev->dev, "list empty\n"); - host->state = STATE_IDLE; - } - - spin_unlock(&host->lock); - mmc_request_done(prev_mmc, mrq); - spin_lock(&host->lock); -} - -static void dw_mci_command_complete(struct dw_mci *host, struct mmc_command *cmd) -{ - u32 status = host->cmd_status; - - host->cmd_status = 0; - - /* Read the response from the card (up to 16 bytes) */ - if (cmd->flags & MMC_RSP_PRESENT) { - if (cmd->flags & MMC_RSP_136) { - cmd->resp[3] = mci_readl(host, RESP0); - cmd->resp[2] = mci_readl(host, RESP1); - cmd->resp[1] = mci_readl(host, RESP2); - cmd->resp[0] = mci_readl(host, RESP3); - } else { - cmd->resp[0] = mci_readl(host, RESP0); - cmd->resp[1] = 0; - cmd->resp[2] = 0; - cmd->resp[3] = 0; - } - } - - if (status & SDMMC_INT_RTO) - cmd->error = -ETIMEDOUT; - else if ((cmd->flags & MMC_RSP_CRC) && (status & SDMMC_INT_RCRC)) - cmd->error = -EILSEQ; - else if (status & SDMMC_INT_RESP_ERR) - cmd->error = -EIO; - else - cmd->error = 0; - - if (cmd->error) { - /* newer ip versions need a delay between retries */ - if (host->quirks & DW_MCI_QUIRK_RETRY_DELAY) - mdelay(20); - - if (cmd->data) { - host->data = NULL; - dw_mci_stop_dma(host); - } - } -} - -static void dw_mci_tasklet_func(unsigned long priv) -{ - struct dw_mci *host = (struct dw_mci *)priv; - struct mmc_data *data; - struct mmc_command *cmd; - enum dw_mci_state state; - enum dw_mci_state prev_state; - u32 status; - - spin_lock(&host->lock); - - state = host->state; - data = host->data; - - do { - prev_state = state; - - switch (state) { - case STATE_IDLE: - break; - - case STATE_SENDING_CMD: - if (!test_and_clear_bit(EVENT_CMD_COMPLETE, - &host->pending_events)) - break; - - cmd = host->cmd; - host->cmd = NULL; - set_bit(EVENT_CMD_COMPLETE, &host->completed_events); - dw_mci_command_complete(host, host->mrq->cmd); - if (!host->mrq->data || cmd->error) { - dw_mci_request_end(host, host->mrq); - goto unlock; - } - - prev_state = state = STATE_SENDING_DATA; - /* fall through */ - - case STATE_SENDING_DATA: - if (test_and_clear_bit(EVENT_DATA_ERROR, - &host->pending_events)) { - dw_mci_stop_dma(host); - if (data->stop) - send_stop_cmd(host, data); - state = STATE_DATA_ERROR; - break; - } - - if (!test_and_clear_bit(EVENT_XFER_COMPLETE, - &host->pending_events)) - break; - - set_bit(EVENT_XFER_COMPLETE, &host->completed_events); - prev_state = state = STATE_DATA_BUSY; - /* fall through */ - - case STATE_DATA_BUSY: - if (!test_and_clear_bit(EVENT_DATA_COMPLETE, - &host->pending_events)) - break; - - host->data = NULL; - set_bit(EVENT_DATA_COMPLETE, &host->completed_events); - status = host->data_status; - - if (status & DW_MCI_DATA_ERROR_FLAGS) { - if (status & SDMMC_INT_DTO) { - dev_err(&host->pdev->dev, - "data timeout error\n"); - data->error = -ETIMEDOUT; - } else if (status & SDMMC_INT_DCRC) { - dev_err(&host->pdev->dev, - "data CRC error\n"); - data->error = -EILSEQ; - } else { - dev_err(&host->pdev->dev, - "data FIFO error " - "(status=%08x)\n", - status); - data->error = -EIO; - } - } else { - data->bytes_xfered = data->blocks * data->blksz; - data->error = 0; - } - - if (!data->stop) { - dw_mci_request_end(host, host->mrq); - goto unlock; - } - - prev_state = state = STATE_SENDING_STOP; - if (!data->error) - send_stop_cmd(host, data); - /* fall through */ - - case STATE_SENDING_STOP: - if (!test_and_clear_bit(EVENT_CMD_COMPLETE, - &host->pending_events)) - break; - - host->cmd = NULL; - dw_mci_command_complete(host, host->mrq->stop); - dw_mci_request_end(host, host->mrq); - goto unlock; - - case STATE_DATA_ERROR: - if (!test_and_clear_bit(EVENT_XFER_COMPLETE, - &host->pending_events)) - break; - - state = STATE_DATA_BUSY; - break; - } - } while (state != prev_state); - - host->state = state; -unlock: - spin_unlock(&host->lock); - -} - -static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt) -{ - u16 *pdata = (u16 *)buf; - - WARN_ON(cnt % 2 != 0); - - cnt = cnt >> 1; - while (cnt > 0) { - mci_writew(host, DATA, *pdata++); - cnt--; - } -} - -static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt) -{ - u16 *pdata = (u16 *)buf; - - WARN_ON(cnt % 2 != 0); - - cnt = cnt >> 1; - while (cnt > 0) { - *pdata++ = mci_readw(host, DATA); - cnt--; - } -} - -static void dw_mci_push_data32(struct dw_mci *host, void *buf, int cnt) -{ - u32 *pdata = (u32 *)buf; - - WARN_ON(cnt % 4 != 0); - WARN_ON((unsigned long)pdata & 0x3); - - cnt = cnt >> 2; - while (cnt > 0) { - mci_writel(host, DATA, *pdata++); - cnt--; - } -} - -static void dw_mci_pull_data32(struct dw_mci *host, void *buf, int cnt) -{ - u32 *pdata = (u32 *)buf; - - WARN_ON(cnt % 4 != 0); - WARN_ON((unsigned long)pdata & 0x3); - - cnt = cnt >> 2; - while (cnt > 0) { - *pdata++ = mci_readl(host, DATA); - cnt--; - } -} - -static void dw_mci_push_data64(struct dw_mci *host, void *buf, int cnt) -{ - u64 *pdata = (u64 *)buf; - - WARN_ON(cnt % 8 != 0); - - cnt = cnt >> 3; - while (cnt > 0) { - mci_writeq(host, DATA, *pdata++); - cnt--; - } -} - -static void dw_mci_pull_data64(struct dw_mci *host, void *buf, int cnt) -{ - u64 *pdata = (u64 *)buf; - - WARN_ON(cnt % 8 != 0); - - cnt = cnt >> 3; - while (cnt > 0) { - *pdata++ = mci_readq(host, DATA); - cnt--; - } -} - -static void dw_mci_read_data_pio(struct dw_mci *host) -{ - struct scatterlist *sg = host->sg; - void *buf = sg_virt(sg); - unsigned int offset = host->pio_offset; - struct mmc_data *data = host->data; - int shift = host->data_shift; - u32 status; - unsigned int nbytes = 0, len, old_len, count = 0; - - do { - len = SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift; - if (count == 0) - old_len = len; - - if (offset + len <= sg->length) { - host->pull_data(host, (void *)(buf + offset), len); - - offset += len; - nbytes += len; - - if (offset == sg->length) { - flush_dcache_page(sg_page(sg)); - host->sg = sg = sg_next(sg); - if (!sg) - goto done; - - offset = 0; - buf = sg_virt(sg); - } - } else { - unsigned int remaining = sg->length - offset; - host->pull_data(host, (void *)(buf + offset), - remaining); - nbytes += remaining; - - flush_dcache_page(sg_page(sg)); - host->sg = sg = sg_next(sg); - if (!sg) - goto done; - - offset = len - remaining; - buf = sg_virt(sg); - host->pull_data(host, buf, offset); - nbytes += offset; - } - - status = mci_readl(host, MINTSTS); - mci_writel(host, RINTSTS, SDMMC_INT_RXDR); - if (status & DW_MCI_DATA_ERROR_FLAGS) { - host->data_status = status; - data->bytes_xfered += nbytes; - smp_wmb(); - - set_bit(EVENT_DATA_ERROR, &host->pending_events); - - tasklet_schedule(&host->tasklet); - return; - } - count++; - } while (status & SDMMC_INT_RXDR); /*if the RXDR is ready read again*/ - len = SDMMC_GET_FCNT(mci_readl(host, STATUS)); - host->pio_offset = offset; - data->bytes_xfered += nbytes; - return; - -done: - data->bytes_xfered += nbytes; - smp_wmb(); - set_bit(EVENT_XFER_COMPLETE, &host->pending_events); -} - -static void dw_mci_write_data_pio(struct dw_mci *host) -{ - struct scatterlist *sg = host->sg; - void *buf = sg_virt(sg); - unsigned int offset = host->pio_offset; - struct mmc_data *data = host->data; - int shift = host->data_shift; - u32 status; - unsigned int nbytes = 0, len; - - do { - len = SDMMC_FIFO_SZ - - (SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift); - if (offset + len <= sg->length) { - host->push_data(host, (void *)(buf + offset), len); - - offset += len; - nbytes += len; - if (offset == sg->length) { - host->sg = sg = sg_next(sg); - if (!sg) - goto done; - - offset = 0; - buf = sg_virt(sg); - } - } else { - unsigned int remaining = sg->length - offset; - - host->push_data(host, (void *)(buf + offset), - remaining); - nbytes += remaining; - - host->sg = sg = sg_next(sg); - if (!sg) - goto done; - - offset = len - remaining; - buf = sg_virt(sg); - host->push_data(host, (void *)buf, offset); - nbytes += offset; - } - - status = mci_readl(host, MINTSTS); - mci_writel(host, RINTSTS, SDMMC_INT_TXDR); - if (status & DW_MCI_DATA_ERROR_FLAGS) { - host->data_status = status; - data->bytes_xfered += nbytes; - - smp_wmb(); - - set_bit(EVENT_DATA_ERROR, &host->pending_events); - - tasklet_schedule(&host->tasklet); - return; - } - } while (status & SDMMC_INT_TXDR); /* if TXDR write again */ - - host->pio_offset = offset; - data->bytes_xfered += nbytes; - - return; - -done: - data->bytes_xfered += nbytes; - smp_wmb(); - set_bit(EVENT_XFER_COMPLETE, &host->pending_events); -} - -static void dw_mci_cmd_interrupt(struct dw_mci *host, u32 status) -{ - if (!host->cmd_status) - host->cmd_status = status; - - smp_wmb(); - - set_bit(EVENT_CMD_COMPLETE, &host->pending_events); - tasklet_schedule(&host->tasklet); -} - -static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) -{ - struct dw_mci *host = dev_id; - u32 status, pending; - unsigned int pass_count = 0; - - do { - status = mci_readl(host, RINTSTS); - pending = mci_readl(host, MINTSTS); /* read-only mask reg */ - - /* - * DTO fix - version 2.10a and below, and only if internal DMA - * is configured. - */ - if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO) { - if (!pending && - ((mci_readl(host, STATUS) >> 17) & 0x1fff)) - pending |= SDMMC_INT_DATA_OVER; - } - - if (!pending) - break; - - if (pending & DW_MCI_CMD_ERROR_FLAGS) { - mci_writel(host, RINTSTS, DW_MCI_CMD_ERROR_FLAGS); - host->cmd_status = status; - smp_wmb(); - set_bit(EVENT_CMD_COMPLETE, &host->pending_events); - tasklet_schedule(&host->tasklet); - } - - if (pending & DW_MCI_DATA_ERROR_FLAGS) { - /* if there is an error report DATA_ERROR */ - mci_writel(host, RINTSTS, DW_MCI_DATA_ERROR_FLAGS); - host->data_status = status; - smp_wmb(); - set_bit(EVENT_DATA_ERROR, &host->pending_events); - tasklet_schedule(&host->tasklet); - } - - if (pending & SDMMC_INT_DATA_OVER) { - mci_writel(host, RINTSTS, SDMMC_INT_DATA_OVER); - if (!host->data_status) - host->data_status = status; - smp_wmb(); - if (host->dir_status == DW_MCI_RECV_STATUS) { - if (host->sg != NULL) - dw_mci_read_data_pio(host); - } - set_bit(EVENT_DATA_COMPLETE, &host->pending_events); - tasklet_schedule(&host->tasklet); - } - - if (pending & SDMMC_INT_RXDR) { - mci_writel(host, RINTSTS, SDMMC_INT_RXDR); - if (host->sg) - dw_mci_read_data_pio(host); - } - - if (pending & SDMMC_INT_TXDR) { - mci_writel(host, RINTSTS, SDMMC_INT_TXDR); - if (host->sg) - dw_mci_write_data_pio(host); - } - - if (pending & SDMMC_INT_CMD_DONE) { - mci_writel(host, RINTSTS, SDMMC_INT_CMD_DONE); - dw_mci_cmd_interrupt(host, status); - } - - if (pending & SDMMC_INT_CD) { - mci_writel(host, RINTSTS, SDMMC_INT_CD); - tasklet_schedule(&host->card_tasklet); - } - - } while (pass_count++ < 5); - -#ifdef CONFIG_MMC_DW_IDMAC - /* Handle DMA interrupts */ - pending = mci_readl(host, IDSTS); - if (pending & (SDMMC_IDMAC_INT_TI | SDMMC_IDMAC_INT_RI)) { - mci_writel(host, IDSTS, SDMMC_IDMAC_INT_TI | SDMMC_IDMAC_INT_RI); - mci_writel(host, IDSTS, SDMMC_IDMAC_INT_NI); - set_bit(EVENT_DATA_COMPLETE, &host->pending_events); - host->dma_ops->complete(host); - } -#endif - - return IRQ_HANDLED; -} - -static void dw_mci_tasklet_card(unsigned long data) -{ - struct dw_mci *host = (struct dw_mci *)data; - int i; - - for (i = 0; i < host->num_slots; i++) { - struct dw_mci_slot *slot = host->slot[i]; - struct mmc_host *mmc = slot->mmc; - struct mmc_request *mrq; - int present; - u32 ctrl; - - present = dw_mci_get_cd(mmc); - while (present != slot->last_detect_state) { - spin_lock(&host->lock); - - dev_dbg(&slot->mmc->class_dev, "card %s\n", - present ? "inserted" : "removed"); - - /* Card change detected */ - slot->last_detect_state = present; - - /* Power up slot */ - if (present != 0) { - if (host->pdata->setpower) - host->pdata->setpower(slot->id, - mmc->ocr_avail); - - set_bit(DW_MMC_CARD_PRESENT, &slot->flags); - } - - /* Clean up queue if present */ - mrq = slot->mrq; - if (mrq) { - if (mrq == host->mrq) { - host->data = NULL; - host->cmd = NULL; - - switch (host->state) { - case STATE_IDLE: - break; - case STATE_SENDING_CMD: - mrq->cmd->error = -ENOMEDIUM; - if (!mrq->data) - break; - /* fall through */ - case STATE_SENDING_DATA: - mrq->data->error = -ENOMEDIUM; - dw_mci_stop_dma(host); - break; - case STATE_DATA_BUSY: - case STATE_DATA_ERROR: - if (mrq->data->error == -EINPROGRESS) - mrq->data->error = -ENOMEDIUM; - if (!mrq->stop) - break; - /* fall through */ - case STATE_SENDING_STOP: - mrq->stop->error = -ENOMEDIUM; - break; - } - - dw_mci_request_end(host, mrq); - } else { - list_del(&slot->queue_node); - mrq->cmd->error = -ENOMEDIUM; - if (mrq->data) - mrq->data->error = -ENOMEDIUM; - if (mrq->stop) - mrq->stop->error = -ENOMEDIUM; - - spin_unlock(&host->lock); - mmc_request_done(slot->mmc, mrq); - spin_lock(&host->lock); - } - } - - /* Power down slot */ - if (present == 0) { - if (host->pdata->setpower) - host->pdata->setpower(slot->id, 0); - clear_bit(DW_MMC_CARD_PRESENT, &slot->flags); - - /* - * Clear down the FIFO - doing so generates a - * block interrupt, hence setting the - * scatter-gather pointer to NULL. - */ - host->sg = NULL; - - ctrl = mci_readl(host, CTRL); - ctrl |= SDMMC_CTRL_FIFO_RESET; - mci_writel(host, CTRL, ctrl); - -#ifdef CONFIG_MMC_DW_IDMAC - ctrl = mci_readl(host, BMOD); - ctrl |= 0x01; /* Software reset of DMA */ - mci_writel(host, BMOD, ctrl); -#endif - - } - - spin_unlock(&host->lock); - present = dw_mci_get_cd(mmc); - } - - mmc_detect_change(slot->mmc, - msecs_to_jiffies(host->pdata->detect_delay_ms)); - } -} - -static int __init dw_mci_init_slot(struct dw_mci *host, unsigned int id) -{ - struct mmc_host *mmc; - struct dw_mci_slot *slot; - - mmc = mmc_alloc_host(sizeof(struct dw_mci_slot), &host->pdev->dev); - if (!mmc) - return -ENOMEM; - - slot = mmc_priv(mmc); - slot->id = id; - slot->mmc = mmc; - slot->host = host; - - mmc->ops = &dw_mci_ops; - mmc->f_min = DIV_ROUND_UP(host->bus_hz, 510); - mmc->f_max = host->bus_hz; - - if (host->pdata->get_ocr) - mmc->ocr_avail = host->pdata->get_ocr(id); - else - mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; - - /* - * Start with slot power disabled, it will be enabled when a card - * is detected. - */ - if (host->pdata->setpower) - host->pdata->setpower(id, 0); - - mmc->caps = 0; - if (host->pdata->get_bus_wd) - if (host->pdata->get_bus_wd(slot->id) >= 4) - mmc->caps |= MMC_CAP_4_BIT_DATA; - - if (host->pdata->quirks & DW_MCI_QUIRK_HIGHSPEED) - mmc->caps |= MMC_CAP_SD_HIGHSPEED; - -#ifdef CONFIG_MMC_DW_IDMAC - mmc->max_segs = host->ring_size; - mmc->max_blk_size = 65536; - mmc->max_blk_count = host->ring_size; - mmc->max_seg_size = 0x1000; - mmc->max_req_size = mmc->max_seg_size * mmc->max_blk_count; -#else - if (host->pdata->blk_settings) { - mmc->max_segs = host->pdata->blk_settings->max_segs; - mmc->max_blk_size = host->pdata->blk_settings->max_blk_size; - mmc->max_blk_count = host->pdata->blk_settings->max_blk_count; - mmc->max_req_size = host->pdata->blk_settings->max_req_size; - mmc->max_seg_size = host->pdata->blk_settings->max_seg_size; - } else { - /* Useful defaults if platform data is unset. */ - mmc->max_segs = 64; - mmc->max_blk_size = 65536; /* BLKSIZ is 16 bits */ - mmc->max_blk_count = 512; - mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count; - mmc->max_seg_size = mmc->max_req_size; - } -#endif /* CONFIG_MMC_DW_IDMAC */ - - if (dw_mci_get_cd(mmc)) - set_bit(DW_MMC_CARD_PRESENT, &slot->flags); - else - clear_bit(DW_MMC_CARD_PRESENT, &slot->flags); - - host->slot[id] = slot; - mmc_add_host(mmc); - -#if defined(CONFIG_DEBUG_FS) - dw_mci_init_debugfs(slot); -#endif - - /* Card initially undetected */ - slot->last_detect_state = 0; - - return 0; -} - -static void dw_mci_cleanup_slot(struct dw_mci_slot *slot, unsigned int id) -{ - /* Shutdown detect IRQ */ - if (slot->host->pdata->exit) - slot->host->pdata->exit(id); - - /* Debugfs stuff is cleaned up by mmc core */ - mmc_remove_host(slot->mmc); - slot->host->slot[id] = NULL; - mmc_free_host(slot->mmc); -} - -static void dw_mci_init_dma(struct dw_mci *host) -{ - /* Alloc memory for sg translation */ - host->sg_cpu = dma_alloc_coherent(&host->pdev->dev, PAGE_SIZE, - &host->sg_dma, GFP_KERNEL); - if (!host->sg_cpu) { - dev_err(&host->pdev->dev, "%s: could not alloc DMA memory\n", - __func__); - goto no_dma; - } - - /* Determine which DMA interface to use */ -#ifdef CONFIG_MMC_DW_IDMAC - host->dma_ops = &dw_mci_idmac_ops; - dev_info(&host->pdev->dev, "Using internal DMA controller.\n"); -#endif - - if (!host->dma_ops) - goto no_dma; - - if (host->dma_ops->init) { - if (host->dma_ops->init(host)) { - dev_err(&host->pdev->dev, "%s: Unable to initialize " - "DMA Controller.\n", __func__); - goto no_dma; - } - } else { - dev_err(&host->pdev->dev, "DMA initialization not found.\n"); - goto no_dma; - } - - host->use_dma = 1; - return; - -no_dma: - dev_info(&host->pdev->dev, "Using PIO mode.\n"); - host->use_dma = 0; - return; -} - -static bool mci_wait_reset(struct device *dev, struct dw_mci *host) -{ - unsigned long timeout = jiffies + msecs_to_jiffies(500); - unsigned int ctrl; - - mci_writel(host, CTRL, (SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET | - SDMMC_CTRL_DMA_RESET)); - - /* wait till resets clear */ - do { - ctrl = mci_readl(host, CTRL); - if (!(ctrl & (SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET | - SDMMC_CTRL_DMA_RESET))) - return true; - } while (time_before(jiffies, timeout)); - - dev_err(dev, "Timeout resetting block (ctrl %#x)\n", ctrl); - - return false; -} - -static int dw_mci_probe(struct platform_device *pdev) -{ - struct dw_mci *host; - struct resource *regs; - struct dw_mci_board *pdata; - int irq, ret, i, width; - u32 fifo_size; - - regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!regs) - return -ENXIO; - - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - - host = kzalloc(sizeof(struct dw_mci), GFP_KERNEL); - if (!host) - return -ENOMEM; - - host->pdev = pdev; - host->pdata = pdata = pdev->dev.platform_data; - if (!pdata || !pdata->init) { - dev_err(&pdev->dev, - "Platform data must supply init function\n"); - ret = -ENODEV; - goto err_freehost; - } - - if (!pdata->select_slot && pdata->num_slots > 1) { - dev_err(&pdev->dev, - "Platform data must supply select_slot function\n"); - ret = -ENODEV; - goto err_freehost; - } - - if (!pdata->bus_hz) { - dev_err(&pdev->dev, - "Platform data must supply bus speed\n"); - ret = -ENODEV; - goto err_freehost; - } - - host->bus_hz = pdata->bus_hz; - host->quirks = pdata->quirks; - - spin_lock_init(&host->lock); - INIT_LIST_HEAD(&host->queue); - - ret = -ENOMEM; - host->regs = ioremap(regs->start, regs->end - regs->start + 1); - if (!host->regs) - goto err_freehost; - - host->dma_ops = pdata->dma_ops; - dw_mci_init_dma(host); - - /* - * Get the host data width - this assumes that HCON has been set with - * the correct values. - */ - i = (mci_readl(host, HCON) >> 7) & 0x7; - if (!i) { - host->push_data = dw_mci_push_data16; - host->pull_data = dw_mci_pull_data16; - width = 16; - host->data_shift = 1; - } else if (i == 2) { - host->push_data = dw_mci_push_data64; - host->pull_data = dw_mci_pull_data64; - width = 64; - host->data_shift = 3; - } else { - /* Check for a reserved value, and warn if it is */ - WARN((i != 1), - "HCON reports a reserved host data width!\n" - "Defaulting to 32-bit access.\n"); - host->push_data = dw_mci_push_data32; - host->pull_data = dw_mci_pull_data32; - width = 32; - host->data_shift = 2; - } - - /* Reset all blocks */ - if (!mci_wait_reset(&pdev->dev, host)) { - ret = -ENODEV; - goto err_dmaunmap; - } - - /* Clear the interrupts for the host controller */ - mci_writel(host, RINTSTS, 0xFFFFFFFF); - mci_writel(host, INTMASK, 0); /* disable all mmc interrupt first */ - - /* Put in max timeout */ - mci_writel(host, TMOUT, 0xFFFFFFFF); - - /* - * FIFO threshold settings RxMark = fifo_size / 2 - 1, - * Tx Mark = fifo_size / 2 DMA Size = 8 - */ - fifo_size = mci_readl(host, FIFOTH); - fifo_size = (fifo_size >> 16) & 0x7ff; - mci_writel(host, FIFOTH, ((0x2 << 28) | ((fifo_size/2 - 1) << 16) | - ((fifo_size/2) << 0))); - - /* disable clock to CIU */ - mci_writel(host, CLKENA, 0); - mci_writel(host, CLKSRC, 0); - - tasklet_init(&host->tasklet, dw_mci_tasklet_func, (unsigned long)host); - tasklet_init(&host->card_tasklet, - dw_mci_tasklet_card, (unsigned long)host); - - ret = request_irq(irq, dw_mci_interrupt, 0, "dw-mci", host); - if (ret) - goto err_dmaunmap; - - platform_set_drvdata(pdev, host); - - if (host->pdata->num_slots) - host->num_slots = host->pdata->num_slots; - else - host->num_slots = ((mci_readl(host, HCON) >> 1) & 0x1F) + 1; - - /* We need at least one slot to succeed */ - for (i = 0; i < host->num_slots; i++) { - ret = dw_mci_init_slot(host, i); - if (ret) { - ret = -ENODEV; - goto err_init_slot; - } - } - - /* - * Enable interrupts for command done, data over, data empty, card det, - * receive ready and error such as transmit, receive timeout, crc error - */ - mci_writel(host, RINTSTS, 0xFFFFFFFF); - mci_writel(host, INTMASK, SDMMC_INT_CMD_DONE | SDMMC_INT_DATA_OVER | - SDMMC_INT_TXDR | SDMMC_INT_RXDR | - DW_MCI_ERROR_FLAGS | SDMMC_INT_CD); - mci_writel(host, CTRL, SDMMC_CTRL_INT_ENABLE); /* Enable mci interrupt */ - - dev_info(&pdev->dev, "DW MMC controller at irq %d, " - "%d bit host data width\n", irq, width); - if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO) - dev_info(&pdev->dev, "Internal DMAC interrupt fix enabled.\n"); - - return 0; - -err_init_slot: - /* De-init any initialized slots */ - while (i > 0) { - if (host->slot[i]) - dw_mci_cleanup_slot(host->slot[i], i); - i--; - } - free_irq(irq, host); - -err_dmaunmap: - if (host->use_dma && host->dma_ops->exit) - host->dma_ops->exit(host); - dma_free_coherent(&host->pdev->dev, PAGE_SIZE, - host->sg_cpu, host->sg_dma); - iounmap(host->regs); - -err_freehost: - kfree(host); - return ret; -} - -static int __exit dw_mci_remove(struct platform_device *pdev) -{ - struct dw_mci *host = platform_get_drvdata(pdev); - int i; - - mci_writel(host, RINTSTS, 0xFFFFFFFF); - mci_writel(host, INTMASK, 0); /* disable all mmc interrupt first */ - - platform_set_drvdata(pdev, NULL); - - for (i = 0; i < host->num_slots; i++) { - dev_dbg(&pdev->dev, "remove slot %d\n", i); - if (host->slot[i]) - dw_mci_cleanup_slot(host->slot[i], i); - } - - /* disable clock to CIU */ - mci_writel(host, CLKENA, 0); - mci_writel(host, CLKSRC, 0); - - free_irq(platform_get_irq(pdev, 0), host); - dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma); - - if (host->use_dma && host->dma_ops->exit) - host->dma_ops->exit(host); - - iounmap(host->regs); - - kfree(host); - return 0; -} - -#ifdef CONFIG_PM -/* - * TODO: we should probably disable the clock to the card in the suspend path. - */ -static int dw_mci_suspend(struct platform_device *pdev, pm_message_t mesg) -{ - int i, ret; - struct dw_mci *host = platform_get_drvdata(pdev); - - for (i = 0; i < host->num_slots; i++) { - struct dw_mci_slot *slot = host->slot[i]; - if (!slot) - continue; - ret = mmc_suspend_host(slot->mmc); - if (ret < 0) { - while (--i >= 0) { - slot = host->slot[i]; - if (slot) - mmc_resume_host(host->slot[i]->mmc); - } - return ret; - } - } - - return 0; -} - -static int dw_mci_resume(struct platform_device *pdev) -{ - int i, ret; - struct dw_mci *host = platform_get_drvdata(pdev); - - for (i = 0; i < host->num_slots; i++) { - struct dw_mci_slot *slot = host->slot[i]; - if (!slot) - continue; - ret = mmc_resume_host(host->slot[i]->mmc); - if (ret < 0) - return ret; - } - - return 0; -} -#else -#define dw_mci_suspend NULL -#define dw_mci_resume NULL -#endif /* CONFIG_PM */ - -static struct platform_driver dw_mci_driver = { - .remove = __exit_p(dw_mci_remove), - .suspend = dw_mci_suspend, - .resume = dw_mci_resume, - .driver = { - .name = "dw_mmc", - }, -}; - -static int __init dw_mci_init(void) -{ - return platform_driver_probe(&dw_mci_driver, dw_mci_probe); -} - -static void __exit dw_mci_exit(void) -{ - platform_driver_unregister(&dw_mci_driver); -} - -module_init(dw_mci_init); -module_exit(dw_mci_exit); - -MODULE_DESCRIPTION("DW Multimedia Card Interface driver"); -MODULE_AUTHOR("NXP Semiconductor VietNam"); -MODULE_AUTHOR("Imagination Technologies Ltd"); -MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/mmc/host/dw_mmc.h b/trunk/drivers/mmc/host/dw_mmc.h deleted file mode 100644 index 5dd55a75233d..000000000000 --- a/trunk/drivers/mmc/host/dw_mmc.h +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Synopsys DesignWare Multimedia Card Interface driver - * (Based on NXP driver for lpc 31xx) - * - * Copyright (C) 2009 NXP Semiconductors - * Copyright (C) 2009, 2010 Imagination Technologies Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef _DW_MMC_H_ -#define _DW_MMC_H_ - -#define SDMMC_CTRL 0x000 -#define SDMMC_PWREN 0x004 -#define SDMMC_CLKDIV 0x008 -#define SDMMC_CLKSRC 0x00c -#define SDMMC_CLKENA 0x010 -#define SDMMC_TMOUT 0x014 -#define SDMMC_CTYPE 0x018 -#define SDMMC_BLKSIZ 0x01c -#define SDMMC_BYTCNT 0x020 -#define SDMMC_INTMASK 0x024 -#define SDMMC_CMDARG 0x028 -#define SDMMC_CMD 0x02c -#define SDMMC_RESP0 0x030 -#define SDMMC_RESP1 0x034 -#define SDMMC_RESP2 0x038 -#define SDMMC_RESP3 0x03c -#define SDMMC_MINTSTS 0x040 -#define SDMMC_RINTSTS 0x044 -#define SDMMC_STATUS 0x048 -#define SDMMC_FIFOTH 0x04c -#define SDMMC_CDETECT 0x050 -#define SDMMC_WRTPRT 0x054 -#define SDMMC_GPIO 0x058 -#define SDMMC_TCBCNT 0x05c -#define SDMMC_TBBCNT 0x060 -#define SDMMC_DEBNCE 0x064 -#define SDMMC_USRID 0x068 -#define SDMMC_VERID 0x06c -#define SDMMC_HCON 0x070 -#define SDMMC_BMOD 0x080 -#define SDMMC_PLDMND 0x084 -#define SDMMC_DBADDR 0x088 -#define SDMMC_IDSTS 0x08c -#define SDMMC_IDINTEN 0x090 -#define SDMMC_DSCADDR 0x094 -#define SDMMC_BUFADDR 0x098 -#define SDMMC_DATA 0x100 -#define SDMMC_DATA_ADR 0x100 - -/* shift bit field */ -#define _SBF(f, v) ((v) << (f)) - -/* Control register defines */ -#define SDMMC_CTRL_USE_IDMAC BIT(25) -#define SDMMC_CTRL_CEATA_INT_EN BIT(11) -#define SDMMC_CTRL_SEND_AS_CCSD BIT(10) -#define SDMMC_CTRL_SEND_CCSD BIT(9) -#define SDMMC_CTRL_ABRT_READ_DATA BIT(8) -#define SDMMC_CTRL_SEND_IRQ_RESP BIT(7) -#define SDMMC_CTRL_READ_WAIT BIT(6) -#define SDMMC_CTRL_DMA_ENABLE BIT(5) -#define SDMMC_CTRL_INT_ENABLE BIT(4) -#define SDMMC_CTRL_DMA_RESET BIT(2) -#define SDMMC_CTRL_FIFO_RESET BIT(1) -#define SDMMC_CTRL_RESET BIT(0) -/* Clock Enable register defines */ -#define SDMMC_CLKEN_LOW_PWR BIT(16) -#define SDMMC_CLKEN_ENABLE BIT(0) -/* time-out register defines */ -#define SDMMC_TMOUT_DATA(n) _SBF(8, (n)) -#define SDMMC_TMOUT_DATA_MSK 0xFFFFFF00 -#define SDMMC_TMOUT_RESP(n) ((n) & 0xFF) -#define SDMMC_TMOUT_RESP_MSK 0xFF -/* card-type register defines */ -#define SDMMC_CTYPE_8BIT BIT(16) -#define SDMMC_CTYPE_4BIT BIT(0) -#define SDMMC_CTYPE_1BIT 0 -/* Interrupt status & mask register defines */ -#define SDMMC_INT_SDIO BIT(16) -#define SDMMC_INT_EBE BIT(15) -#define SDMMC_INT_ACD BIT(14) -#define SDMMC_INT_SBE BIT(13) -#define SDMMC_INT_HLE BIT(12) -#define SDMMC_INT_FRUN BIT(11) -#define SDMMC_INT_HTO BIT(10) -#define SDMMC_INT_DTO BIT(9) -#define SDMMC_INT_RTO BIT(8) -#define SDMMC_INT_DCRC BIT(7) -#define SDMMC_INT_RCRC BIT(6) -#define SDMMC_INT_RXDR BIT(5) -#define SDMMC_INT_TXDR BIT(4) -#define SDMMC_INT_DATA_OVER BIT(3) -#define SDMMC_INT_CMD_DONE BIT(2) -#define SDMMC_INT_RESP_ERR BIT(1) -#define SDMMC_INT_CD BIT(0) -#define SDMMC_INT_ERROR 0xbfc2 -/* Command register defines */ -#define SDMMC_CMD_START BIT(31) -#define SDMMC_CMD_CCS_EXP BIT(23) -#define SDMMC_CMD_CEATA_RD BIT(22) -#define SDMMC_CMD_UPD_CLK BIT(21) -#define SDMMC_CMD_INIT BIT(15) -#define SDMMC_CMD_STOP BIT(14) -#define SDMMC_CMD_PRV_DAT_WAIT BIT(13) -#define SDMMC_CMD_SEND_STOP BIT(12) -#define SDMMC_CMD_STRM_MODE BIT(11) -#define SDMMC_CMD_DAT_WR BIT(10) -#define SDMMC_CMD_DAT_EXP BIT(9) -#define SDMMC_CMD_RESP_CRC BIT(8) -#define SDMMC_CMD_RESP_LONG BIT(7) -#define SDMMC_CMD_RESP_EXP BIT(6) -#define SDMMC_CMD_INDX(n) ((n) & 0x1F) -/* Status register defines */ -#define SDMMC_GET_FCNT(x) (((x)>>17) & 0x1FF) -#define SDMMC_FIFO_SZ 32 -/* Internal DMAC interrupt defines */ -#define SDMMC_IDMAC_INT_AI BIT(9) -#define SDMMC_IDMAC_INT_NI BIT(8) -#define SDMMC_IDMAC_INT_CES BIT(5) -#define SDMMC_IDMAC_INT_DU BIT(4) -#define SDMMC_IDMAC_INT_FBE BIT(2) -#define SDMMC_IDMAC_INT_RI BIT(1) -#define SDMMC_IDMAC_INT_TI BIT(0) -/* Internal DMAC bus mode bits */ -#define SDMMC_IDMAC_ENABLE BIT(7) -#define SDMMC_IDMAC_FB BIT(1) -#define SDMMC_IDMAC_SWRESET BIT(0) - -/* Register access macros */ -#define mci_readl(dev, reg) \ - __raw_readl(dev->regs + SDMMC_##reg) -#define mci_writel(dev, reg, value) \ - __raw_writel((value), dev->regs + SDMMC_##reg) - -/* 16-bit FIFO access macros */ -#define mci_readw(dev, reg) \ - __raw_readw(dev->regs + SDMMC_##reg) -#define mci_writew(dev, reg, value) \ - __raw_writew((value), dev->regs + SDMMC_##reg) - -/* 64-bit FIFO access macros */ -#ifdef readq -#define mci_readq(dev, reg) \ - __raw_readq(dev->regs + SDMMC_##reg) -#define mci_writeq(dev, reg, value) \ - __raw_writeq((value), dev->regs + SDMMC_##reg) -#else -/* - * Dummy readq implementation for architectures that don't define it. - * - * We would assume that none of these architectures would configure - * the IP block with a 64bit FIFO width, so this code will never be - * executed on those machines. Defining these macros here keeps the - * rest of the code free from ifdefs. - */ -#define mci_readq(dev, reg) \ - (*(volatile u64 __force *)(dev->regs + SDMMC_##reg)) -#define mci_writeq(dev, reg, value) \ - (*(volatile u64 __force *)(dev->regs + SDMMC_##reg) = value) -#endif - -#endif /* _DW_MMC_H_ */ diff --git a/trunk/drivers/mmc/host/mxcmmc.c b/trunk/drivers/mmc/host/mxcmmc.c index 4428594261c5..bdd2cbb87cba 100644 --- a/trunk/drivers/mmc/host/mxcmmc.c +++ b/trunk/drivers/mmc/host/mxcmmc.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include @@ -142,49 +141,10 @@ struct mxcmci_host { struct work_struct datawork; spinlock_t lock; - - struct regulator *vcc; }; static void mxcmci_set_clk_rate(struct mxcmci_host *host, unsigned int clk_ios); -static inline void mxcmci_init_ocr(struct mxcmci_host *host) -{ - host->vcc = regulator_get(mmc_dev(host->mmc), "vmmc"); - - if (IS_ERR(host->vcc)) { - host->vcc = NULL; - } else { - host->mmc->ocr_avail = mmc_regulator_get_ocrmask(host->vcc); - if (host->pdata && host->pdata->ocr_avail) - dev_warn(mmc_dev(host->mmc), - "pdata->ocr_avail will not be used\n"); - } - - if (host->vcc == NULL) { - /* fall-back to platform data */ - if (host->pdata && host->pdata->ocr_avail) - host->mmc->ocr_avail = host->pdata->ocr_avail; - else - host->mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; - } -} - -static inline void mxcmci_set_power(struct mxcmci_host *host, - unsigned char power_mode, - unsigned int vdd) -{ - if (host->vcc) { - if (power_mode == MMC_POWER_UP) - mmc_regulator_set_ocr(host->mmc, host->vcc, vdd); - else if (power_mode == MMC_POWER_OFF) - mmc_regulator_set_ocr(host->mmc, host->vcc, 0); - } - - if (host->pdata && host->pdata->setpower) - host->pdata->setpower(mmc_dev(host->mmc), vdd); -} - static inline int mxcmci_use_dma(struct mxcmci_host *host) { return host->do_dma; @@ -720,9 +680,9 @@ static void mxcmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->cmdat &= ~CMD_DAT_CONT_BUS_WIDTH_4; if (host->power_mode != ios->power_mode) { - mxcmci_set_power(host, ios->power_mode, ios->vdd); + if (host->pdata && host->pdata->setpower) + host->pdata->setpower(mmc_dev(mmc), ios->vdd); host->power_mode = ios->power_mode; - if (ios->power_mode == MMC_POWER_ON) host->cmdat |= CMD_DAT_CONT_INIT; } @@ -847,7 +807,10 @@ static int mxcmci_probe(struct platform_device *pdev) host->pdata = pdev->dev.platform_data; spin_lock_init(&host->lock); - mxcmci_init_ocr(host); + if (host->pdata && host->pdata->ocr_avail) + mmc->ocr_avail = host->pdata->ocr_avail; + else + mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; if (host->pdata && host->pdata->dat3_card_detect) host->default_irq_mask = @@ -952,9 +915,6 @@ static int mxcmci_remove(struct platform_device *pdev) mmc_remove_host(mmc); - if (host->vcc) - regulator_put(host->vcc); - if (host->pdata && host->pdata->exit) host->pdata->exit(&pdev->dev, mmc); @@ -967,6 +927,7 @@ static int mxcmci_remove(struct platform_device *pdev) clk_put(host->clk); release_mem_region(host->res->start, resource_size(host->res)); + release_resource(host->res); mmc_free_host(mmc); diff --git a/trunk/drivers/mmc/host/sdhci-dove.c b/trunk/drivers/mmc/host/sdhci-dove.c deleted file mode 100644 index 2aeef4ffed8c..000000000000 --- a/trunk/drivers/mmc/host/sdhci-dove.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * sdhci-dove.c Support for SDHCI on Marvell's Dove SoC - * - * Author: Saeed Bishara - * Mike Rapoport - * Based on sdhci-cns3xxx.c - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include - -#include "sdhci.h" -#include "sdhci-pltfm.h" - -static u16 sdhci_dove_readw(struct sdhci_host *host, int reg) -{ - u16 ret; - - switch (reg) { - case SDHCI_HOST_VERSION: - case SDHCI_SLOT_INT_STATUS: - /* those registers don't exist */ - return 0; - default: - ret = readw(host->ioaddr + reg); - } - return ret; -} - -static u32 sdhci_dove_readl(struct sdhci_host *host, int reg) -{ - u32 ret; - - switch (reg) { - case SDHCI_CAPABILITIES: - ret = readl(host->ioaddr + reg); - /* Mask the support for 3.0V */ - ret &= ~SDHCI_CAN_VDD_300; - break; - default: - ret = readl(host->ioaddr + reg); - } - return ret; -} - -static struct sdhci_ops sdhci_dove_ops = { - .read_w = sdhci_dove_readw, - .read_l = sdhci_dove_readl, -}; - -struct sdhci_pltfm_data sdhci_dove_pdata = { - .ops = &sdhci_dove_ops, - .quirks = SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER | - SDHCI_QUIRK_NO_BUSY_IRQ | - SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | - SDHCI_QUIRK_FORCE_DMA, -}; diff --git a/trunk/drivers/mmc/host/sdhci-pci.c b/trunk/drivers/mmc/host/sdhci-pci.c index 0dc905b20eee..3d9c2460d437 100644 --- a/trunk/drivers/mmc/host/sdhci-pci.c +++ b/trunk/drivers/mmc/host/sdhci-pci.c @@ -176,74 +176,6 @@ static const struct sdhci_pci_fixes sdhci_intel_mfd_emmc_sdio = { .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, }; -/* O2Micro extra registers */ -#define O2_SD_LOCK_WP 0xD3 -#define O2_SD_MULTI_VCC3V 0xEE -#define O2_SD_CLKREQ 0xEC -#define O2_SD_CAPS 0xE0 -#define O2_SD_ADMA1 0xE2 -#define O2_SD_ADMA2 0xE7 -#define O2_SD_INF_MOD 0xF1 - -static int o2_probe(struct sdhci_pci_chip *chip) -{ - int ret; - u8 scratch; - - switch (chip->pdev->device) { - case PCI_DEVICE_ID_O2_8220: - case PCI_DEVICE_ID_O2_8221: - case PCI_DEVICE_ID_O2_8320: - case PCI_DEVICE_ID_O2_8321: - /* This extra setup is required due to broken ADMA. */ - ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); - if (ret) - return ret; - scratch &= 0x7f; - pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); - - /* Set Multi 3 to VCC3V# */ - pci_write_config_byte(chip->pdev, O2_SD_MULTI_VCC3V, 0x08); - - /* Disable CLK_REQ# support after media DET */ - ret = pci_read_config_byte(chip->pdev, O2_SD_CLKREQ, &scratch); - if (ret) - return ret; - scratch |= 0x20; - pci_write_config_byte(chip->pdev, O2_SD_CLKREQ, scratch); - - /* Choose capabilities, enable SDMA. We have to write 0x01 - * to the capabilities register first to unlock it. - */ - ret = pci_read_config_byte(chip->pdev, O2_SD_CAPS, &scratch); - if (ret) - return ret; - scratch |= 0x01; - pci_write_config_byte(chip->pdev, O2_SD_CAPS, scratch); - pci_write_config_byte(chip->pdev, O2_SD_CAPS, 0x73); - - /* Disable ADMA1/2 */ - pci_write_config_byte(chip->pdev, O2_SD_ADMA1, 0x39); - pci_write_config_byte(chip->pdev, O2_SD_ADMA2, 0x08); - - /* Disable the infinite transfer mode */ - ret = pci_read_config_byte(chip->pdev, O2_SD_INF_MOD, &scratch); - if (ret) - return ret; - scratch |= 0x08; - pci_write_config_byte(chip->pdev, O2_SD_INF_MOD, scratch); - - /* Lock WP */ - ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); - if (ret) - return ret; - scratch |= 0x80; - pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); - } - - return 0; -} - static int jmicron_pmos(struct sdhci_pci_chip *chip, int on) { u8 scratch; @@ -272,7 +204,6 @@ static int jmicron_pmos(struct sdhci_pci_chip *chip, int on) static int jmicron_probe(struct sdhci_pci_chip *chip) { int ret; - u16 mmcdev = 0; if (chip->pdev->revision == 0) { chip->quirks |= SDHCI_QUIRK_32BIT_DMA_ADDR | @@ -294,17 +225,12 @@ static int jmicron_probe(struct sdhci_pci_chip *chip) * 2. The MMC interface has a lower subfunction number * than the SD interface. */ - if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_SD) - mmcdev = PCI_DEVICE_ID_JMICRON_JMB38X_MMC; - else if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_SD) - mmcdev = PCI_DEVICE_ID_JMICRON_JMB388_ESD; - - if (mmcdev) { + if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_SD) { struct pci_dev *sd_dev; sd_dev = NULL; while ((sd_dev = pci_get_device(PCI_VENDOR_ID_JMICRON, - mmcdev, sd_dev)) != NULL) { + PCI_DEVICE_ID_JMICRON_JMB38X_MMC, sd_dev)) != NULL) { if ((PCI_SLOT(chip->pdev->devfn) == PCI_SLOT(sd_dev->devfn)) && (chip->pdev->bus == sd_dev->bus)) @@ -364,25 +290,13 @@ static int jmicron_probe_slot(struct sdhci_pci_slot *slot) slot->host->quirks |= SDHCI_QUIRK_BROKEN_ADMA; } - /* JM388 MMC doesn't support 1.8V while SD supports it */ - if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) { - slot->host->ocr_avail_sd = MMC_VDD_32_33 | MMC_VDD_33_34 | - MMC_VDD_29_30 | MMC_VDD_30_31 | - MMC_VDD_165_195; /* allow 1.8V */ - slot->host->ocr_avail_mmc = MMC_VDD_32_33 | MMC_VDD_33_34 | - MMC_VDD_29_30 | MMC_VDD_30_31; /* no 1.8V for MMC */ - } - /* * The secondary interface requires a bit set to get the * interrupts. */ - if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC || - slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) + if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) jmicron_enable_mmc(slot->host, 1); - slot->host->mmc->caps |= MMC_CAP_BUS_WIDTH_TEST; - return 0; } @@ -391,8 +305,7 @@ static void jmicron_remove_slot(struct sdhci_pci_slot *slot, int dead) if (dead) return; - if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC || - slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) + if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) jmicron_enable_mmc(slot->host, 0); } @@ -400,8 +313,7 @@ static int jmicron_suspend(struct sdhci_pci_chip *chip, pm_message_t state) { int i; - if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC || - chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) { + if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) { for (i = 0;i < chip->num_slots;i++) jmicron_enable_mmc(chip->slots[i]->host, 0); } @@ -413,8 +325,7 @@ static int jmicron_resume(struct sdhci_pci_chip *chip) { int ret, i; - if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC || - chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) { + if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) { for (i = 0;i < chip->num_slots;i++) jmicron_enable_mmc(chip->slots[i]->host, 1); } @@ -428,10 +339,6 @@ static int jmicron_resume(struct sdhci_pci_chip *chip) return 0; } -static const struct sdhci_pci_fixes sdhci_o2 = { - .probe = o2_probe, -}; - static const struct sdhci_pci_fixes sdhci_jmicron = { .probe = jmicron_probe, @@ -602,22 +509,6 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .driver_data = (kernel_ulong_t)&sdhci_jmicron, }, - { - .vendor = PCI_VENDOR_ID_JMICRON, - .device = PCI_DEVICE_ID_JMICRON_JMB388_SD, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_jmicron, - }, - - { - .vendor = PCI_VENDOR_ID_JMICRON, - .device = PCI_DEVICE_ID_JMICRON_JMB388_ESD, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_jmicron, - }, - { .vendor = PCI_VENDOR_ID_SYSKONNECT, .device = 0x8000, @@ -698,46 +589,6 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio, }, - { - .vendor = PCI_VENDOR_ID_O2, - .device = PCI_DEVICE_ID_O2_8120, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_o2, - }, - - { - .vendor = PCI_VENDOR_ID_O2, - .device = PCI_DEVICE_ID_O2_8220, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_o2, - }, - - { - .vendor = PCI_VENDOR_ID_O2, - .device = PCI_DEVICE_ID_O2_8221, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_o2, - }, - - { - .vendor = PCI_VENDOR_ID_O2, - .device = PCI_DEVICE_ID_O2_8320, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_o2, - }, - - { - .vendor = PCI_VENDOR_ID_O2, - .device = PCI_DEVICE_ID_O2_8321, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_o2, - }, - { /* Generic SD host controller */ PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00) }, diff --git a/trunk/drivers/mmc/host/sdhci-pltfm.c b/trunk/drivers/mmc/host/sdhci-pltfm.c index dbab0407f4b6..0502f89f662b 100644 --- a/trunk/drivers/mmc/host/sdhci-pltfm.c +++ b/trunk/drivers/mmc/host/sdhci-pltfm.c @@ -169,12 +169,6 @@ static const struct platform_device_id sdhci_pltfm_ids[] = { #endif #ifdef CONFIG_MMC_SDHCI_ESDHC_IMX { "sdhci-esdhc-imx", (kernel_ulong_t)&sdhci_esdhc_imx_pdata }, -#endif -#ifdef CONFIG_MMC_SDHCI_DOVE - { "sdhci-dove", (kernel_ulong_t)&sdhci_dove_pdata }, -#endif -#ifdef CONFIG_MMC_SDHCI_TEGRA - { "sdhci-tegra", (kernel_ulong_t)&sdhci_tegra_pdata }, #endif { }, }; diff --git a/trunk/drivers/mmc/host/sdhci-pltfm.h b/trunk/drivers/mmc/host/sdhci-pltfm.h index ea2e44d9be5e..c1bfe48af56a 100644 --- a/trunk/drivers/mmc/host/sdhci-pltfm.h +++ b/trunk/drivers/mmc/host/sdhci-pltfm.h @@ -22,7 +22,5 @@ struct sdhci_pltfm_host { extern struct sdhci_pltfm_data sdhci_cns3xxx_pdata; extern struct sdhci_pltfm_data sdhci_esdhc_imx_pdata; -extern struct sdhci_pltfm_data sdhci_dove_pdata; -extern struct sdhci_pltfm_data sdhci_tegra_pdata; #endif /* _DRIVERS_MMC_SDHCI_PLTFM_H */ diff --git a/trunk/drivers/mmc/host/sdhci-s3c.c b/trunk/drivers/mmc/host/sdhci-s3c.c index 17203586305c..aacb862ecc8a 100644 --- a/trunk/drivers/mmc/host/sdhci-s3c.c +++ b/trunk/drivers/mmc/host/sdhci-s3c.c @@ -130,15 +130,6 @@ static unsigned int sdhci_s3c_consider_clock(struct sdhci_s3c *ourhost, if (!clksrc) return UINT_MAX; - /* - * Clock divider's step is different as 1 from that of host controller - * when 'clk_type' is S3C_SDHCI_CLK_DIV_EXTERNAL. - */ - if (ourhost->pdata->clk_type) { - rate = clk_round_rate(clksrc, wanted); - return wanted - rate; - } - rate = clk_get_rate(clksrc); for (div = 1; div < 256; div *= 2) { @@ -241,42 +232,6 @@ static unsigned int sdhci_s3c_get_min_clock(struct sdhci_host *host) return min; } -/* sdhci_cmu_get_max_clk - callback to get maximum clock frequency.*/ -static unsigned int sdhci_cmu_get_max_clock(struct sdhci_host *host) -{ - struct sdhci_s3c *ourhost = to_s3c(host); - - return clk_round_rate(ourhost->clk_bus[ourhost->cur_clk], UINT_MAX); -} - -/* sdhci_cmu_get_min_clock - callback to get minimal supported clock value. */ -static unsigned int sdhci_cmu_get_min_clock(struct sdhci_host *host) -{ - struct sdhci_s3c *ourhost = to_s3c(host); - - /* - * initial clock can be in the frequency range of - * 100KHz-400KHz, so we set it as max value. - */ - return clk_round_rate(ourhost->clk_bus[ourhost->cur_clk], 400000); -} - -/* sdhci_cmu_set_clock - callback on clock change.*/ -static void sdhci_cmu_set_clock(struct sdhci_host *host, unsigned int clock) -{ - struct sdhci_s3c *ourhost = to_s3c(host); - - /* don't bother if the clock is going off */ - if (clock == 0) - return; - - sdhci_s3c_set_clock(host, clock); - - clk_set_rate(ourhost->clk_bus[ourhost->cur_clk], clock); - - host->clock = clock; -} - static struct sdhci_ops sdhci_s3c_ops = { .get_max_clock = sdhci_s3c_get_max_clk, .set_clock = sdhci_s3c_set_clock, @@ -406,13 +361,6 @@ static int __devinit sdhci_s3c_probe(struct platform_device *pdev) clks++; sc->clk_bus[ptr] = clk; - - /* - * save current clock index to know which clock bus - * is used later in overriding functions. - */ - sc->cur_clk = ptr; - clk_enable(clk); dev_info(dev, "clock source %d: %s (%ld Hz)\n", @@ -479,20 +427,6 @@ static int __devinit sdhci_s3c_probe(struct platform_device *pdev) /* HSMMC on Samsung SoCs uses SDCLK as timeout clock */ host->quirks |= SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK; - /* - * If controller does not have internal clock divider, - * we can use overriding functions instead of default. - */ - if (pdata->clk_type) { - sdhci_s3c_ops.set_clock = sdhci_cmu_set_clock; - sdhci_s3c_ops.get_min_clock = sdhci_cmu_get_min_clock; - sdhci_s3c_ops.get_max_clock = sdhci_cmu_get_max_clock; - } - - /* It supports additional host capabilities if needed */ - if (pdata->host_caps) - host->mmc->caps |= pdata->host_caps; - ret = sdhci_add_host(host); if (ret) { dev_err(dev, "sdhci_add_host() failed\n"); diff --git a/trunk/drivers/mmc/host/sdhci-tegra.c b/trunk/drivers/mmc/host/sdhci-tegra.c deleted file mode 100644 index 4823ee94a63f..000000000000 --- a/trunk/drivers/mmc/host/sdhci-tegra.c +++ /dev/null @@ -1,257 +0,0 @@ -/* - * Copyright (C) 2010 Google, Inc. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "sdhci.h" -#include "sdhci-pltfm.h" - -static u32 tegra_sdhci_readl(struct sdhci_host *host, int reg) -{ - u32 val; - - if (unlikely(reg == SDHCI_PRESENT_STATE)) { - /* Use wp_gpio here instead? */ - val = readl(host->ioaddr + reg); - return val | SDHCI_WRITE_PROTECT; - } - - return readl(host->ioaddr + reg); -} - -static u16 tegra_sdhci_readw(struct sdhci_host *host, int reg) -{ - if (unlikely(reg == SDHCI_HOST_VERSION)) { - /* Erratum: Version register is invalid in HW. */ - return SDHCI_SPEC_200; - } - - return readw(host->ioaddr + reg); -} - -static void tegra_sdhci_writel(struct sdhci_host *host, u32 val, int reg) -{ - /* Seems like we're getting spurious timeout and crc errors, so - * disable signalling of them. In case of real errors software - * timers should take care of eventually detecting them. - */ - if (unlikely(reg == SDHCI_SIGNAL_ENABLE)) - val &= ~(SDHCI_INT_TIMEOUT|SDHCI_INT_CRC); - - writel(val, host->ioaddr + reg); - - if (unlikely(reg == SDHCI_INT_ENABLE)) { - /* Erratum: Must enable block gap interrupt detection */ - u8 gap_ctrl = readb(host->ioaddr + SDHCI_BLOCK_GAP_CONTROL); - if (val & SDHCI_INT_CARD_INT) - gap_ctrl |= 0x8; - else - gap_ctrl &= ~0x8; - writeb(gap_ctrl, host->ioaddr + SDHCI_BLOCK_GAP_CONTROL); - } -} - -static unsigned int tegra_sdhci_get_ro(struct sdhci_host *sdhci) -{ - struct platform_device *pdev = to_platform_device(mmc_dev(sdhci->mmc)); - struct tegra_sdhci_platform_data *plat; - - plat = pdev->dev.platform_data; - - if (!gpio_is_valid(plat->wp_gpio)) - return -1; - - return gpio_get_value(plat->wp_gpio); -} - -static irqreturn_t carddetect_irq(int irq, void *data) -{ - struct sdhci_host *sdhost = (struct sdhci_host *)data; - - tasklet_schedule(&sdhost->card_tasklet); - return IRQ_HANDLED; -}; - -static int tegra_sdhci_8bit(struct sdhci_host *host, int bus_width) -{ - struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc)); - struct tegra_sdhci_platform_data *plat; - u32 ctrl; - - plat = pdev->dev.platform_data; - - ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); - if (plat->is_8bit && bus_width == MMC_BUS_WIDTH_8) { - ctrl &= ~SDHCI_CTRL_4BITBUS; - ctrl |= SDHCI_CTRL_8BITBUS; - } else { - ctrl &= ~SDHCI_CTRL_8BITBUS; - if (bus_width == MMC_BUS_WIDTH_4) - ctrl |= SDHCI_CTRL_4BITBUS; - else - ctrl &= ~SDHCI_CTRL_4BITBUS; - } - sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL); - return 0; -} - - -static int tegra_sdhci_pltfm_init(struct sdhci_host *host, - struct sdhci_pltfm_data *pdata) -{ - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc)); - struct tegra_sdhci_platform_data *plat; - struct clk *clk; - int rc; - - plat = pdev->dev.platform_data; - if (plat == NULL) { - dev_err(mmc_dev(host->mmc), "missing platform data\n"); - return -ENXIO; - } - - if (gpio_is_valid(plat->power_gpio)) { - rc = gpio_request(plat->power_gpio, "sdhci_power"); - if (rc) { - dev_err(mmc_dev(host->mmc), - "failed to allocate power gpio\n"); - goto out; - } - tegra_gpio_enable(plat->power_gpio); - gpio_direction_output(plat->power_gpio, 1); - } - - if (gpio_is_valid(plat->cd_gpio)) { - rc = gpio_request(plat->cd_gpio, "sdhci_cd"); - if (rc) { - dev_err(mmc_dev(host->mmc), - "failed to allocate cd gpio\n"); - goto out_power; - } - tegra_gpio_enable(plat->cd_gpio); - gpio_direction_input(plat->cd_gpio); - - rc = request_irq(gpio_to_irq(plat->cd_gpio), carddetect_irq, - IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, - mmc_hostname(host->mmc), host); - - if (rc) { - dev_err(mmc_dev(host->mmc), "request irq error\n"); - goto out_cd; - } - - } - - if (gpio_is_valid(plat->wp_gpio)) { - rc = gpio_request(plat->wp_gpio, "sdhci_wp"); - if (rc) { - dev_err(mmc_dev(host->mmc), - "failed to allocate wp gpio\n"); - goto out_cd; - } - tegra_gpio_enable(plat->wp_gpio); - gpio_direction_input(plat->wp_gpio); - } - - clk = clk_get(mmc_dev(host->mmc), NULL); - if (IS_ERR(clk)) { - dev_err(mmc_dev(host->mmc), "clk err\n"); - rc = PTR_ERR(clk); - goto out_wp; - } - clk_enable(clk); - pltfm_host->clk = clk; - - if (plat->is_8bit) - host->mmc->caps |= MMC_CAP_8_BIT_DATA; - - return 0; - -out_wp: - if (gpio_is_valid(plat->wp_gpio)) { - tegra_gpio_disable(plat->wp_gpio); - gpio_free(plat->wp_gpio); - } - -out_cd: - if (gpio_is_valid(plat->cd_gpio)) { - tegra_gpio_disable(plat->cd_gpio); - gpio_free(plat->cd_gpio); - } - -out_power: - if (gpio_is_valid(plat->power_gpio)) { - tegra_gpio_disable(plat->power_gpio); - gpio_free(plat->power_gpio); - } - -out: - return rc; -} - -static void tegra_sdhci_pltfm_exit(struct sdhci_host *host) -{ - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc)); - struct tegra_sdhci_platform_data *plat; - - plat = pdev->dev.platform_data; - - if (gpio_is_valid(plat->wp_gpio)) { - tegra_gpio_disable(plat->wp_gpio); - gpio_free(plat->wp_gpio); - } - - if (gpio_is_valid(plat->cd_gpio)) { - tegra_gpio_disable(plat->cd_gpio); - gpio_free(plat->cd_gpio); - } - - if (gpio_is_valid(plat->power_gpio)) { - tegra_gpio_disable(plat->power_gpio); - gpio_free(plat->power_gpio); - } - - clk_disable(pltfm_host->clk); - clk_put(pltfm_host->clk); -} - -static struct sdhci_ops tegra_sdhci_ops = { - .get_ro = tegra_sdhci_get_ro, - .read_l = tegra_sdhci_readl, - .read_w = tegra_sdhci_readw, - .write_l = tegra_sdhci_writel, - .platform_8bit_width = tegra_sdhci_8bit, -}; - -struct sdhci_pltfm_data sdhci_tegra_pdata = { - .quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | - SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_NO_HISPD_BIT | - SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC, - .ops = &tegra_sdhci_ops, - .init = tegra_sdhci_pltfm_init, - .exit = tegra_sdhci_pltfm_exit, -}; diff --git a/trunk/drivers/mmc/host/sdhci.c b/trunk/drivers/mmc/host/sdhci.c index 9e15f41f87be..a25db426c910 100644 --- a/trunk/drivers/mmc/host/sdhci.c +++ b/trunk/drivers/mmc/host/sdhci.c @@ -23,7 +23,6 @@ #include -#include #include #include "sdhci.h" @@ -78,11 +77,8 @@ static void sdhci_dumpregs(struct sdhci_host *host) printk(KERN_DEBUG DRIVER_NAME ": AC12 err: 0x%08x | Slot int: 0x%08x\n", sdhci_readw(host, SDHCI_ACMD12_ERR), sdhci_readw(host, SDHCI_SLOT_INT_STATUS)); - printk(KERN_DEBUG DRIVER_NAME ": Caps: 0x%08x | Caps_1: 0x%08x\n", + printk(KERN_DEBUG DRIVER_NAME ": Caps: 0x%08x | Max curr: 0x%08x\n", sdhci_readl(host, SDHCI_CAPABILITIES), - sdhci_readl(host, SDHCI_CAPABILITIES_1)); - printk(KERN_DEBUG DRIVER_NAME ": Cmd: 0x%08x | Max curr: 0x%08x\n", - sdhci_readw(host, SDHCI_COMMAND), sdhci_readl(host, SDHCI_MAX_CURRENT)); if (host->flags & SDHCI_USE_ADMA) @@ -1522,11 +1518,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask) if (intmask & SDHCI_INT_DATA_TIMEOUT) host->data->error = -ETIMEDOUT; - else if (intmask & SDHCI_INT_DATA_END_BIT) - host->data->error = -EILSEQ; - else if ((intmask & SDHCI_INT_DATA_CRC) && - SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND)) - != MMC_BUS_TEST_R) + else if (intmask & (SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_END_BIT)) host->data->error = -EILSEQ; else if (intmask & SDHCI_INT_ADMA_ERROR) { printk(KERN_ERR "%s: ADMA error\n", mmc_hostname(host->mmc)); @@ -1744,7 +1736,7 @@ EXPORT_SYMBOL_GPL(sdhci_alloc_host); int sdhci_add_host(struct sdhci_host *host) { struct mmc_host *mmc; - unsigned int caps, ocr_avail; + unsigned int caps; int ret; WARN_ON(host == NULL); @@ -1898,26 +1890,13 @@ int sdhci_add_host(struct sdhci_host *host) mmc_card_is_removable(mmc)) mmc->caps |= MMC_CAP_NEEDS_POLL; - ocr_avail = 0; + mmc->ocr_avail = 0; if (caps & SDHCI_CAN_VDD_330) - ocr_avail |= MMC_VDD_32_33 | MMC_VDD_33_34; + mmc->ocr_avail |= MMC_VDD_32_33|MMC_VDD_33_34; if (caps & SDHCI_CAN_VDD_300) - ocr_avail |= MMC_VDD_29_30 | MMC_VDD_30_31; + mmc->ocr_avail |= MMC_VDD_29_30|MMC_VDD_30_31; if (caps & SDHCI_CAN_VDD_180) - ocr_avail |= MMC_VDD_165_195; - - mmc->ocr_avail = ocr_avail; - mmc->ocr_avail_sdio = ocr_avail; - if (host->ocr_avail_sdio) - mmc->ocr_avail_sdio &= host->ocr_avail_sdio; - mmc->ocr_avail_sd = ocr_avail; - if (host->ocr_avail_sd) - mmc->ocr_avail_sd &= host->ocr_avail_sd; - else /* normal SD controllers don't support 1.8V */ - mmc->ocr_avail_sd &= ~MMC_VDD_165_195; - mmc->ocr_avail_mmc = ocr_avail; - if (host->ocr_avail_mmc) - mmc->ocr_avail_mmc &= host->ocr_avail_mmc; + mmc->ocr_avail |= MMC_VDD_165_195; if (mmc->ocr_avail == 0) { printk(KERN_ERR "%s: Hardware doesn't report any " @@ -1949,14 +1928,10 @@ int sdhci_add_host(struct sdhci_host *host) * of bytes. When doing hardware scatter/gather, each entry cannot * be larger than 64 KiB though. */ - if (host->flags & SDHCI_USE_ADMA) { - if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC) - mmc->max_seg_size = 65535; - else - mmc->max_seg_size = 65536; - } else { + if (host->flags & SDHCI_USE_ADMA) + mmc->max_seg_size = 65536; + else mmc->max_seg_size = mmc->max_req_size; - } /* * Maximum block size. This varies from controller to controller and diff --git a/trunk/drivers/mmc/host/sdhci.h b/trunk/drivers/mmc/host/sdhci.h index 6e0969e40650..e42d7f00c060 100644 --- a/trunk/drivers/mmc/host/sdhci.h +++ b/trunk/drivers/mmc/host/sdhci.h @@ -52,7 +52,6 @@ #define SDHCI_CMD_RESP_SHORT_BUSY 0x03 #define SDHCI_MAKE_CMD(c, f) (((c & 0xff) << 8) | (f & 0xff)) -#define SDHCI_GET_CMD(c) ((c>>8) & 0x3f) #define SDHCI_RESPONSE 0x10 @@ -166,7 +165,7 @@ #define SDHCI_CAN_VDD_180 0x04000000 #define SDHCI_CAN_64BIT 0x10000000 -#define SDHCI_CAPABILITIES_1 0x44 +/* 44-47 reserved for more caps */ #define SDHCI_MAX_CURRENT 0x48 diff --git a/trunk/drivers/mmc/host/tmio_mmc.c b/trunk/drivers/mmc/host/tmio_mmc.c index e3c6ef208391..e7765a89593e 100644 --- a/trunk/drivers/mmc/host/tmio_mmc.c +++ b/trunk/drivers/mmc/host/tmio_mmc.c @@ -25,261 +25,16 @@ * double buffer support * */ - -#include +#include +#include #include +#include #include -#include -#include -#include -#include +#include #include #include -#include -#include -#include -#include -#include -#include - -#define CTL_SD_CMD 0x00 -#define CTL_ARG_REG 0x04 -#define CTL_STOP_INTERNAL_ACTION 0x08 -#define CTL_XFER_BLK_COUNT 0xa -#define CTL_RESPONSE 0x0c -#define CTL_STATUS 0x1c -#define CTL_IRQ_MASK 0x20 -#define CTL_SD_CARD_CLK_CTL 0x24 -#define CTL_SD_XFER_LEN 0x26 -#define CTL_SD_MEM_CARD_OPT 0x28 -#define CTL_SD_ERROR_DETAIL_STATUS 0x2c -#define CTL_SD_DATA_PORT 0x30 -#define CTL_TRANSACTION_CTL 0x34 -#define CTL_SDIO_STATUS 0x36 -#define CTL_SDIO_IRQ_MASK 0x38 -#define CTL_RESET_SD 0xe0 -#define CTL_SDIO_REGS 0x100 -#define CTL_CLK_AND_WAIT_CTL 0x138 -#define CTL_RESET_SDIO 0x1e0 - -/* Definitions for values the CTRL_STATUS register can take. */ -#define TMIO_STAT_CMDRESPEND 0x00000001 -#define TMIO_STAT_DATAEND 0x00000004 -#define TMIO_STAT_CARD_REMOVE 0x00000008 -#define TMIO_STAT_CARD_INSERT 0x00000010 -#define TMIO_STAT_SIGSTATE 0x00000020 -#define TMIO_STAT_WRPROTECT 0x00000080 -#define TMIO_STAT_CARD_REMOVE_A 0x00000100 -#define TMIO_STAT_CARD_INSERT_A 0x00000200 -#define TMIO_STAT_SIGSTATE_A 0x00000400 -#define TMIO_STAT_CMD_IDX_ERR 0x00010000 -#define TMIO_STAT_CRCFAIL 0x00020000 -#define TMIO_STAT_STOPBIT_ERR 0x00040000 -#define TMIO_STAT_DATATIMEOUT 0x00080000 -#define TMIO_STAT_RXOVERFLOW 0x00100000 -#define TMIO_STAT_TXUNDERRUN 0x00200000 -#define TMIO_STAT_CMDTIMEOUT 0x00400000 -#define TMIO_STAT_RXRDY 0x01000000 -#define TMIO_STAT_TXRQ 0x02000000 -#define TMIO_STAT_ILL_FUNC 0x20000000 -#define TMIO_STAT_CMD_BUSY 0x40000000 -#define TMIO_STAT_ILL_ACCESS 0x80000000 - -/* Definitions for values the CTRL_SDIO_STATUS register can take. */ -#define TMIO_SDIO_STAT_IOIRQ 0x0001 -#define TMIO_SDIO_STAT_EXPUB52 0x4000 -#define TMIO_SDIO_STAT_EXWT 0x8000 -#define TMIO_SDIO_MASK_ALL 0xc007 - -/* Define some IRQ masks */ -/* This is the mask used at reset by the chip */ -#define TMIO_MASK_ALL 0x837f031d -#define TMIO_MASK_READOP (TMIO_STAT_RXRDY | TMIO_STAT_DATAEND) -#define TMIO_MASK_WRITEOP (TMIO_STAT_TXRQ | TMIO_STAT_DATAEND) -#define TMIO_MASK_CMD (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT | \ - TMIO_STAT_CARD_REMOVE | TMIO_STAT_CARD_INSERT) -#define TMIO_MASK_IRQ (TMIO_MASK_READOP | TMIO_MASK_WRITEOP | TMIO_MASK_CMD) - -#define enable_mmc_irqs(host, i) \ - do { \ - u32 mask;\ - mask = sd_ctrl_read32((host), CTL_IRQ_MASK); \ - mask &= ~((i) & TMIO_MASK_IRQ); \ - sd_ctrl_write32((host), CTL_IRQ_MASK, mask); \ - } while (0) - -#define disable_mmc_irqs(host, i) \ - do { \ - u32 mask;\ - mask = sd_ctrl_read32((host), CTL_IRQ_MASK); \ - mask |= ((i) & TMIO_MASK_IRQ); \ - sd_ctrl_write32((host), CTL_IRQ_MASK, mask); \ - } while (0) - -#define ack_mmc_irqs(host, i) \ - do { \ - sd_ctrl_write32((host), CTL_STATUS, ~(i)); \ - } while (0) - -/* This is arbitrary, just noone needed any higher alignment yet */ -#define MAX_ALIGN 4 - -struct tmio_mmc_host { - void __iomem *ctl; - unsigned long bus_shift; - struct mmc_command *cmd; - struct mmc_request *mrq; - struct mmc_data *data; - struct mmc_host *mmc; - int irq; - unsigned int sdio_irq_enabled; - - /* Callbacks for clock / power control */ - void (*set_pwr)(struct platform_device *host, int state); - void (*set_clk_div)(struct platform_device *host, int state); - - /* pio related stuff */ - struct scatterlist *sg_ptr; - struct scatterlist *sg_orig; - unsigned int sg_len; - unsigned int sg_off; - - struct platform_device *pdev; - - /* DMA support */ - struct dma_chan *chan_rx; - struct dma_chan *chan_tx; - struct tasklet_struct dma_complete; - struct tasklet_struct dma_issue; -#ifdef CONFIG_TMIO_MMC_DMA - unsigned int dma_sglen; - u8 bounce_buf[PAGE_CACHE_SIZE] __attribute__((aligned(MAX_ALIGN))); - struct scatterlist bounce_sg; -#endif - - /* Track lost interrupts */ - struct delayed_work delayed_reset_work; - spinlock_t lock; - unsigned long last_req_ts; -}; - -static void tmio_check_bounce_buffer(struct tmio_mmc_host *host); - -static u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr) -{ - return readw(host->ctl + (addr << host->bus_shift)); -} - -static void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr, - u16 *buf, int count) -{ - readsw(host->ctl + (addr << host->bus_shift), buf, count); -} -static u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr) -{ - return readw(host->ctl + (addr << host->bus_shift)) | - readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16; -} - -static void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, u16 val) -{ - writew(val, host->ctl + (addr << host->bus_shift)); -} - -static void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr, - u16 *buf, int count) -{ - writesw(host->ctl + (addr << host->bus_shift), buf, count); -} - -static void sd_ctrl_write32(struct tmio_mmc_host *host, int addr, u32 val) -{ - writew(val, host->ctl + (addr << host->bus_shift)); - writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift)); -} - -static void tmio_mmc_init_sg(struct tmio_mmc_host *host, struct mmc_data *data) -{ - host->sg_len = data->sg_len; - host->sg_ptr = data->sg; - host->sg_orig = data->sg; - host->sg_off = 0; -} - -static int tmio_mmc_next_sg(struct tmio_mmc_host *host) -{ - host->sg_ptr = sg_next(host->sg_ptr); - host->sg_off = 0; - return --host->sg_len; -} - -static char *tmio_mmc_kmap_atomic(struct scatterlist *sg, unsigned long *flags) -{ - local_irq_save(*flags); - return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset; -} - -static void tmio_mmc_kunmap_atomic(void *virt, unsigned long *flags) -{ - kunmap_atomic(virt, KM_BIO_SRC_IRQ); - local_irq_restore(*flags); -} - -#ifdef CONFIG_MMC_DEBUG - -#define STATUS_TO_TEXT(a) \ - do { \ - if (status & TMIO_STAT_##a) \ - printk(#a); \ - } while (0) - -void pr_debug_status(u32 status) -{ - printk(KERN_DEBUG "status: %08x = ", status); - STATUS_TO_TEXT(CARD_REMOVE); - STATUS_TO_TEXT(CARD_INSERT); - STATUS_TO_TEXT(SIGSTATE); - STATUS_TO_TEXT(WRPROTECT); - STATUS_TO_TEXT(CARD_REMOVE_A); - STATUS_TO_TEXT(CARD_INSERT_A); - STATUS_TO_TEXT(SIGSTATE_A); - STATUS_TO_TEXT(CMD_IDX_ERR); - STATUS_TO_TEXT(STOPBIT_ERR); - STATUS_TO_TEXT(ILL_FUNC); - STATUS_TO_TEXT(CMD_BUSY); - STATUS_TO_TEXT(CMDRESPEND); - STATUS_TO_TEXT(DATAEND); - STATUS_TO_TEXT(CRCFAIL); - STATUS_TO_TEXT(DATATIMEOUT); - STATUS_TO_TEXT(CMDTIMEOUT); - STATUS_TO_TEXT(RXOVERFLOW); - STATUS_TO_TEXT(TXUNDERRUN); - STATUS_TO_TEXT(RXRDY); - STATUS_TO_TEXT(TXRQ); - STATUS_TO_TEXT(ILL_ACCESS); - printk("\n"); -} - -#else -#define pr_debug_status(s) do { } while (0) -#endif - -static void tmio_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable) -{ - struct tmio_mmc_host *host = mmc_priv(mmc); - - if (enable) { - host->sdio_irq_enabled = 1; - sd_ctrl_write16(host, CTL_TRANSACTION_CTL, 0x0001); - sd_ctrl_write16(host, CTL_SDIO_IRQ_MASK, - (TMIO_SDIO_MASK_ALL & ~TMIO_SDIO_STAT_IOIRQ)); - } else { - sd_ctrl_write16(host, CTL_SDIO_IRQ_MASK, TMIO_SDIO_MASK_ALL); - sd_ctrl_write16(host, CTL_TRANSACTION_CTL, 0x0000); - host->sdio_irq_enabled = 0; - } -} +#include "tmio_mmc.h" static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock) { @@ -300,23 +55,8 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock) static void tmio_mmc_clk_stop(struct tmio_mmc_host *host) { - struct mfd_cell *cell = host->pdev->dev.platform_data; - struct tmio_mmc_data *pdata = cell->driver_data; - - /* - * Testing on sh-mobile showed that SDIO IRQs are unmasked when - * CTL_CLK_AND_WAIT_CTL gets written, so we have to disable the - * device IRQ here and restore the SDIO IRQ mask before - * re-enabling the device IRQ. - */ - if (pdata->flags & TMIO_MMC_SDIO_IRQ) - disable_irq(host->irq); sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0000); msleep(10); - if (pdata->flags & TMIO_MMC_SDIO_IRQ) { - tmio_mmc_enable_sdio_irq(host->mmc, host->sdio_irq_enabled); - enable_irq(host->irq); - } sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~0x0100 & sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL)); msleep(10); @@ -324,21 +64,11 @@ static void tmio_mmc_clk_stop(struct tmio_mmc_host *host) static void tmio_mmc_clk_start(struct tmio_mmc_host *host) { - struct mfd_cell *cell = host->pdev->dev.platform_data; - struct tmio_mmc_data *pdata = cell->driver_data; - sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, 0x0100 | sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL)); msleep(10); - /* see comment in tmio_mmc_clk_stop above */ - if (pdata->flags & TMIO_MMC_SDIO_IRQ) - disable_irq(host->irq); sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0100); msleep(10); - if (pdata->flags & TMIO_MMC_SDIO_IRQ) { - tmio_mmc_enable_sdio_irq(host->mmc, host->sdio_irq_enabled); - enable_irq(host->irq); - } } static void reset(struct tmio_mmc_host *host) @@ -352,60 +82,15 @@ static void reset(struct tmio_mmc_host *host) msleep(10); } -static void tmio_mmc_reset_work(struct work_struct *work) -{ - struct tmio_mmc_host *host = container_of(work, struct tmio_mmc_host, - delayed_reset_work.work); - struct mmc_request *mrq; - unsigned long flags; - - spin_lock_irqsave(&host->lock, flags); - mrq = host->mrq; - - /* request already finished */ - if (!mrq - || time_is_after_jiffies(host->last_req_ts + - msecs_to_jiffies(2000))) { - spin_unlock_irqrestore(&host->lock, flags); - return; - } - - dev_warn(&host->pdev->dev, - "timeout waiting for hardware interrupt (CMD%u)\n", - mrq->cmd->opcode); - - if (host->data) - host->data->error = -ETIMEDOUT; - else if (host->cmd) - host->cmd->error = -ETIMEDOUT; - else - mrq->cmd->error = -ETIMEDOUT; - - host->cmd = NULL; - host->data = NULL; - host->mrq = NULL; - - spin_unlock_irqrestore(&host->lock, flags); - - reset(host); - - mmc_request_done(host->mmc, mrq); -} - static void tmio_mmc_finish_request(struct tmio_mmc_host *host) { struct mmc_request *mrq = host->mrq; - if (!mrq) - return; - host->mrq = NULL; host->cmd = NULL; host->data = NULL; - cancel_delayed_work(&host->delayed_reset_work); - mmc_request_done(host->mmc, mrq); } @@ -515,7 +200,6 @@ static void tmio_mmc_pio_irq(struct tmio_mmc_host *host) return; } -/* needs to be called with host->lock held */ static void tmio_mmc_do_data_irq(struct tmio_mmc_host *host) { struct mmc_data *data = host->data; @@ -549,8 +233,6 @@ static void tmio_mmc_do_data_irq(struct tmio_mmc_host *host) if (data->flags & MMC_DATA_READ) { if (!host->chan_rx) disable_mmc_irqs(host, TMIO_MASK_READOP); - else - tmio_check_bounce_buffer(host); dev_dbg(&host->pdev->dev, "Complete Rx request %p\n", host->mrq); } else { @@ -572,12 +254,10 @@ static void tmio_mmc_do_data_irq(struct tmio_mmc_host *host) static void tmio_mmc_data_irq(struct tmio_mmc_host *host) { - struct mmc_data *data; - spin_lock(&host->lock); - data = host->data; + struct mmc_data *data = host->data; if (!data) - goto out; + return; if (host->chan_tx && (data->flags & MMC_DATA_WRITE)) { /* @@ -598,8 +278,6 @@ static void tmio_mmc_data_irq(struct tmio_mmc_host *host) } else { tmio_mmc_do_data_irq(host); } -out: - spin_unlock(&host->lock); } static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host, @@ -608,11 +286,9 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host, struct mmc_command *cmd = host->cmd; int i, addr; - spin_lock(&host->lock); - if (!host->cmd) { pr_debug("Spurious CMD irq\n"); - goto out; + return; } host->cmd = NULL; @@ -648,7 +324,8 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host, if (!host->chan_rx) enable_mmc_irqs(host, TMIO_MASK_READOP); } else { - if (!host->chan_tx) + struct dma_chan *chan = host->chan_tx; + if (!chan) enable_mmc_irqs(host, TMIO_MASK_WRITEOP); else tasklet_schedule(&host->dma_issue); @@ -657,19 +334,13 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host, tmio_mmc_finish_request(host); } -out: - spin_unlock(&host->lock); - return; } static irqreturn_t tmio_mmc_irq(int irq, void *devid) { struct tmio_mmc_host *host = devid; - struct mfd_cell *cell = host->pdev->dev.platform_data; - struct tmio_mmc_data *pdata = cell->driver_data; unsigned int ireg, irq_mask, status; - unsigned int sdio_ireg, sdio_irq_mask, sdio_status; pr_debug("MMC IRQ begin\n"); @@ -677,29 +348,6 @@ static irqreturn_t tmio_mmc_irq(int irq, void *devid) irq_mask = sd_ctrl_read32(host, CTL_IRQ_MASK); ireg = status & TMIO_MASK_IRQ & ~irq_mask; - sdio_ireg = 0; - if (!ireg && pdata->flags & TMIO_MMC_SDIO_IRQ) { - sdio_status = sd_ctrl_read16(host, CTL_SDIO_STATUS); - sdio_irq_mask = sd_ctrl_read16(host, CTL_SDIO_IRQ_MASK); - sdio_ireg = sdio_status & TMIO_SDIO_MASK_ALL & ~sdio_irq_mask; - - sd_ctrl_write16(host, CTL_SDIO_STATUS, sdio_status & ~TMIO_SDIO_MASK_ALL); - - if (sdio_ireg && !host->sdio_irq_enabled) { - pr_warning("tmio_mmc: Spurious SDIO IRQ, disabling! 0x%04x 0x%04x 0x%04x\n", - sdio_status, sdio_irq_mask, sdio_ireg); - tmio_mmc_enable_sdio_irq(host->mmc, 0); - goto out; - } - - if (host->mmc->caps & MMC_CAP_SDIO_IRQ && - sdio_ireg & TMIO_SDIO_STAT_IOIRQ) - mmc_signal_sdio_irq(host->mmc); - - if (sdio_ireg) - goto out; - } - pr_debug_status(status); pr_debug_status(ireg); @@ -727,10 +375,8 @@ static irqreturn_t tmio_mmc_irq(int irq, void *devid) */ /* Command completion */ - if (ireg & (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT)) { - ack_mmc_irqs(host, - TMIO_STAT_CMDRESPEND | - TMIO_STAT_CMDTIMEOUT); + if (ireg & TMIO_MASK_CMD) { + ack_mmc_irqs(host, TMIO_MASK_CMD); tmio_mmc_cmd_irq(host, status); } @@ -761,16 +407,6 @@ static irqreturn_t tmio_mmc_irq(int irq, void *devid) } #ifdef CONFIG_TMIO_MMC_DMA -static void tmio_check_bounce_buffer(struct tmio_mmc_host *host) -{ - if (host->sg_ptr == &host->bounce_sg) { - unsigned long flags; - void *sg_vaddr = tmio_mmc_kmap_atomic(host->sg_orig, &flags); - memcpy(sg_vaddr, host->bounce_buf, host->bounce_sg.length); - tmio_mmc_kunmap_atomic(sg_vaddr, &flags); - } -} - static void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable) { #if defined(CONFIG_SUPERH) || defined(CONFIG_ARCH_SHMOBILE) @@ -791,39 +427,12 @@ static void tmio_dma_complete(void *arg) enable_mmc_irqs(host, TMIO_STAT_DATAEND); } -static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host) +static int tmio_mmc_start_dma_rx(struct tmio_mmc_host *host) { - struct scatterlist *sg = host->sg_ptr, *sg_tmp; + struct scatterlist *sg = host->sg_ptr; struct dma_async_tx_descriptor *desc = NULL; struct dma_chan *chan = host->chan_rx; - struct mfd_cell *cell = host->pdev->dev.platform_data; - struct tmio_mmc_data *pdata = cell->driver_data; - dma_cookie_t cookie; - int ret, i; - bool aligned = true, multiple = true; - unsigned int align = (1 << pdata->dma->alignment_shift) - 1; - - for_each_sg(sg, sg_tmp, host->sg_len, i) { - if (sg_tmp->offset & align) - aligned = false; - if (sg_tmp->length & align) { - multiple = false; - break; - } - } - - if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_CACHE_SIZE || - align >= MAX_ALIGN)) || !multiple) { - ret = -EINVAL; - goto pio; - } - - /* The only sg element can be unaligned, use our bounce buffer then */ - if (!aligned) { - sg_init_one(&host->bounce_sg, host->bounce_buf, sg->length); - host->sg_ptr = &host->bounce_sg; - sg = host->sg_ptr; - } + int ret; ret = dma_map_sg(&host->pdev->dev, sg, host->sg_len, DMA_FROM_DEVICE); if (ret > 0) { @@ -833,21 +442,21 @@ static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host) } if (desc) { + host->desc = desc; desc->callback = tmio_dma_complete; desc->callback_param = host; - cookie = desc->tx_submit(desc); - if (cookie < 0) { - desc = NULL; - ret = cookie; + host->cookie = desc->tx_submit(desc); + if (host->cookie < 0) { + host->desc = NULL; + ret = host->cookie; } else { chan->device->device_issue_pending(chan); } } dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n", - __func__, host->sg_len, ret, cookie, host->mrq); + __func__, host->sg_len, ret, host->cookie, host->mrq); -pio: - if (!desc) { + if (!host->desc) { /* DMA failed, fall back to PIO */ if (ret >= 0) ret = -EIO; @@ -862,49 +471,24 @@ static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host) dev_warn(&host->pdev->dev, "DMA failed: %d, falling back to PIO\n", ret); tmio_mmc_enable_dma(host, false); + reset(host); + /* Fail this request, let above layers recover */ + host->mrq->cmd->error = ret; + tmio_mmc_finish_request(host); } dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__, - desc, cookie, host->sg_len); + desc, host->cookie, host->sg_len); + + return ret > 0 ? 0 : ret; } -static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host) +static int tmio_mmc_start_dma_tx(struct tmio_mmc_host *host) { - struct scatterlist *sg = host->sg_ptr, *sg_tmp; + struct scatterlist *sg = host->sg_ptr; struct dma_async_tx_descriptor *desc = NULL; struct dma_chan *chan = host->chan_tx; - struct mfd_cell *cell = host->pdev->dev.platform_data; - struct tmio_mmc_data *pdata = cell->driver_data; - dma_cookie_t cookie; - int ret, i; - bool aligned = true, multiple = true; - unsigned int align = (1 << pdata->dma->alignment_shift) - 1; - - for_each_sg(sg, sg_tmp, host->sg_len, i) { - if (sg_tmp->offset & align) - aligned = false; - if (sg_tmp->length & align) { - multiple = false; - break; - } - } - - if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_CACHE_SIZE || - align >= MAX_ALIGN)) || !multiple) { - ret = -EINVAL; - goto pio; - } - - /* The only sg element can be unaligned, use our bounce buffer then */ - if (!aligned) { - unsigned long flags; - void *sg_vaddr = tmio_mmc_kmap_atomic(sg, &flags); - sg_init_one(&host->bounce_sg, host->bounce_buf, sg->length); - memcpy(host->bounce_buf, sg_vaddr, host->bounce_sg.length); - tmio_mmc_kunmap_atomic(sg_vaddr, &flags); - host->sg_ptr = &host->bounce_sg; - sg = host->sg_ptr; - } + int ret; ret = dma_map_sg(&host->pdev->dev, sg, host->sg_len, DMA_TO_DEVICE); if (ret > 0) { @@ -914,19 +498,19 @@ static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host) } if (desc) { + host->desc = desc; desc->callback = tmio_dma_complete; desc->callback_param = host; - cookie = desc->tx_submit(desc); - if (cookie < 0) { - desc = NULL; - ret = cookie; + host->cookie = desc->tx_submit(desc); + if (host->cookie < 0) { + host->desc = NULL; + ret = host->cookie; } } dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n", - __func__, host->sg_len, ret, cookie, host->mrq); + __func__, host->sg_len, ret, host->cookie, host->mrq); -pio: - if (!desc) { + if (!host->desc) { /* DMA failed, fall back to PIO */ if (ret >= 0) ret = -EIO; @@ -941,22 +525,30 @@ static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host) dev_warn(&host->pdev->dev, "DMA failed: %d, falling back to PIO\n", ret); tmio_mmc_enable_dma(host, false); + reset(host); + /* Fail this request, let above layers recover */ + host->mrq->cmd->error = ret; + tmio_mmc_finish_request(host); } dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d\n", __func__, - desc, cookie); + desc, host->cookie); + + return ret > 0 ? 0 : ret; } -static void tmio_mmc_start_dma(struct tmio_mmc_host *host, +static int tmio_mmc_start_dma(struct tmio_mmc_host *host, struct mmc_data *data) { if (data->flags & MMC_DATA_READ) { if (host->chan_rx) - tmio_mmc_start_dma_rx(host); + return tmio_mmc_start_dma_rx(host); } else { if (host->chan_tx) - tmio_mmc_start_dma_tx(host); + return tmio_mmc_start_dma_tx(host); } + + return 0; } static void tmio_issue_tasklet_fn(unsigned long priv) @@ -970,12 +562,6 @@ static void tmio_issue_tasklet_fn(unsigned long priv) static void tmio_tasklet_fn(unsigned long arg) { struct tmio_mmc_host *host = (struct tmio_mmc_host *)arg; - unsigned long flags; - - spin_lock_irqsave(&host->lock, flags); - - if (!host->data) - goto out; if (host->data->flags & MMC_DATA_READ) dma_unmap_sg(&host->pdev->dev, host->sg_ptr, host->dma_sglen, @@ -985,8 +571,6 @@ static void tmio_tasklet_fn(unsigned long arg) DMA_TO_DEVICE); tmio_mmc_do_data_irq(host); -out: - spin_unlock_irqrestore(&host->lock, flags); } /* It might be necessary to make filter MFD specific */ @@ -1000,6 +584,9 @@ static bool tmio_mmc_filter(struct dma_chan *chan, void *arg) static void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdata) { + host->cookie = -EINVAL; + host->desc = NULL; + /* We can only either use DMA for both Tx and Rx or not use it at all */ if (pdata->dma) { dma_cap_mask_t mask; @@ -1045,15 +632,15 @@ static void tmio_mmc_release_dma(struct tmio_mmc_host *host) host->chan_rx = NULL; dma_release_channel(chan); } + + host->cookie = -EINVAL; + host->desc = NULL; } #else -static void tmio_check_bounce_buffer(struct tmio_mmc_host *host) -{ -} - -static void tmio_mmc_start_dma(struct tmio_mmc_host *host, +static int tmio_mmc_start_dma(struct tmio_mmc_host *host, struct mmc_data *data) { + return 0; } static void tmio_mmc_request_dma(struct tmio_mmc_host *host, @@ -1095,9 +682,7 @@ static int tmio_mmc_start_data(struct tmio_mmc_host *host, sd_ctrl_write16(host, CTL_SD_XFER_LEN, data->blksz); sd_ctrl_write16(host, CTL_XFER_BLK_COUNT, data->blocks); - tmio_mmc_start_dma(host, data); - - return 0; + return tmio_mmc_start_dma(host, data); } /* Process requests from the MMC layer */ @@ -1109,8 +694,6 @@ static void tmio_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq) if (host->mrq) pr_debug("request not null\n"); - host->last_req_ts = jiffies; - wmb(); host->mrq = mrq; if (mrq->data) { @@ -1120,14 +703,10 @@ static void tmio_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq) } ret = tmio_mmc_start_command(host, mrq->cmd); - if (!ret) { - schedule_delayed_work(&host->delayed_reset_work, - msecs_to_jiffies(2000)); + if (!ret) return; - } fail: - host->mrq = NULL; mrq->cmd->error = ret; mmc_request_done(mmc, mrq); } @@ -1201,7 +780,6 @@ static const struct mmc_host_ops tmio_mmc_ops = { .set_ios = tmio_mmc_set_ios, .get_ro = tmio_mmc_get_ro, .get_cd = tmio_mmc_get_cd, - .enable_sdio_irq = tmio_mmc_enable_sdio_irq, }; #ifdef CONFIG_PM @@ -1286,15 +864,10 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev) goto host_free; mmc->ops = &tmio_mmc_ops; - mmc->caps = MMC_CAP_4_BIT_DATA | pdata->capabilities; + mmc->caps = MMC_CAP_4_BIT_DATA; + mmc->caps |= pdata->capabilities; mmc->f_max = pdata->hclk; mmc->f_min = mmc->f_max / 512; - mmc->max_segs = 32; - mmc->max_blk_size = 512; - mmc->max_blk_count = (PAGE_CACHE_SIZE / mmc->max_blk_size) * - mmc->max_segs; - mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count; - mmc->max_seg_size = mmc->max_req_size; if (pdata->ocr_mask) mmc->ocr_avail = pdata->ocr_mask; else @@ -1317,19 +890,12 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev) goto cell_disable; disable_mmc_irqs(host, TMIO_MASK_ALL); - if (pdata->flags & TMIO_MMC_SDIO_IRQ) - tmio_mmc_enable_sdio_irq(mmc, 0); ret = request_irq(host->irq, tmio_mmc_irq, IRQF_DISABLED | IRQF_TRIGGER_FALLING, dev_name(&dev->dev), host); if (ret) goto cell_disable; - spin_lock_init(&host->lock); - - /* Init delayed work for request timeouts */ - INIT_DELAYED_WORK(&host->delayed_reset_work, tmio_mmc_reset_work); - /* See if we also get DMA */ tmio_mmc_request_dma(host, pdata); @@ -1368,7 +934,6 @@ static int __devexit tmio_mmc_remove(struct platform_device *dev) if (mmc) { struct tmio_mmc_host *host = mmc_priv(mmc); mmc_remove_host(mmc); - cancel_delayed_work_sync(&host->delayed_reset_work); tmio_mmc_release_dma(host); free_irq(host->irq, host); if (cell->disable) diff --git a/trunk/drivers/mmc/host/tmio_mmc.h b/trunk/drivers/mmc/host/tmio_mmc.h new file mode 100644 index 000000000000..0fedc78e3ea5 --- /dev/null +++ b/trunk/drivers/mmc/host/tmio_mmc.h @@ -0,0 +1,228 @@ +/* Definitons for use with the tmio_mmc.c + * + * (c) 2004 Ian Molton + * (c) 2007 Ian Molton + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include + +#define CTL_SD_CMD 0x00 +#define CTL_ARG_REG 0x04 +#define CTL_STOP_INTERNAL_ACTION 0x08 +#define CTL_XFER_BLK_COUNT 0xa +#define CTL_RESPONSE 0x0c +#define CTL_STATUS 0x1c +#define CTL_IRQ_MASK 0x20 +#define CTL_SD_CARD_CLK_CTL 0x24 +#define CTL_SD_XFER_LEN 0x26 +#define CTL_SD_MEM_CARD_OPT 0x28 +#define CTL_SD_ERROR_DETAIL_STATUS 0x2c +#define CTL_SD_DATA_PORT 0x30 +#define CTL_TRANSACTION_CTL 0x34 +#define CTL_RESET_SD 0xe0 +#define CTL_SDIO_REGS 0x100 +#define CTL_CLK_AND_WAIT_CTL 0x138 +#define CTL_RESET_SDIO 0x1e0 + +/* Definitions for values the CTRL_STATUS register can take. */ +#define TMIO_STAT_CMDRESPEND 0x00000001 +#define TMIO_STAT_DATAEND 0x00000004 +#define TMIO_STAT_CARD_REMOVE 0x00000008 +#define TMIO_STAT_CARD_INSERT 0x00000010 +#define TMIO_STAT_SIGSTATE 0x00000020 +#define TMIO_STAT_WRPROTECT 0x00000080 +#define TMIO_STAT_CARD_REMOVE_A 0x00000100 +#define TMIO_STAT_CARD_INSERT_A 0x00000200 +#define TMIO_STAT_SIGSTATE_A 0x00000400 +#define TMIO_STAT_CMD_IDX_ERR 0x00010000 +#define TMIO_STAT_CRCFAIL 0x00020000 +#define TMIO_STAT_STOPBIT_ERR 0x00040000 +#define TMIO_STAT_DATATIMEOUT 0x00080000 +#define TMIO_STAT_RXOVERFLOW 0x00100000 +#define TMIO_STAT_TXUNDERRUN 0x00200000 +#define TMIO_STAT_CMDTIMEOUT 0x00400000 +#define TMIO_STAT_RXRDY 0x01000000 +#define TMIO_STAT_TXRQ 0x02000000 +#define TMIO_STAT_ILL_FUNC 0x20000000 +#define TMIO_STAT_CMD_BUSY 0x40000000 +#define TMIO_STAT_ILL_ACCESS 0x80000000 + +/* Define some IRQ masks */ +/* This is the mask used at reset by the chip */ +#define TMIO_MASK_ALL 0x837f031d +#define TMIO_MASK_READOP (TMIO_STAT_RXRDY | TMIO_STAT_DATAEND) +#define TMIO_MASK_WRITEOP (TMIO_STAT_TXRQ | TMIO_STAT_DATAEND) +#define TMIO_MASK_CMD (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT | \ + TMIO_STAT_CARD_REMOVE | TMIO_STAT_CARD_INSERT) +#define TMIO_MASK_IRQ (TMIO_MASK_READOP | TMIO_MASK_WRITEOP | TMIO_MASK_CMD) + + +#define enable_mmc_irqs(host, i) \ + do { \ + u32 mask;\ + mask = sd_ctrl_read32((host), CTL_IRQ_MASK); \ + mask &= ~((i) & TMIO_MASK_IRQ); \ + sd_ctrl_write32((host), CTL_IRQ_MASK, mask); \ + } while (0) + +#define disable_mmc_irqs(host, i) \ + do { \ + u32 mask;\ + mask = sd_ctrl_read32((host), CTL_IRQ_MASK); \ + mask |= ((i) & TMIO_MASK_IRQ); \ + sd_ctrl_write32((host), CTL_IRQ_MASK, mask); \ + } while (0) + +#define ack_mmc_irqs(host, i) \ + do { \ + sd_ctrl_write32((host), CTL_STATUS, ~(i)); \ + } while (0) + + +struct tmio_mmc_host { + void __iomem *ctl; + unsigned long bus_shift; + struct mmc_command *cmd; + struct mmc_request *mrq; + struct mmc_data *data; + struct mmc_host *mmc; + int irq; + + /* Callbacks for clock / power control */ + void (*set_pwr)(struct platform_device *host, int state); + void (*set_clk_div)(struct platform_device *host, int state); + + /* pio related stuff */ + struct scatterlist *sg_ptr; + unsigned int sg_len; + unsigned int sg_off; + + struct platform_device *pdev; + + /* DMA support */ + struct dma_chan *chan_rx; + struct dma_chan *chan_tx; + struct tasklet_struct dma_complete; + struct tasklet_struct dma_issue; +#ifdef CONFIG_TMIO_MMC_DMA + struct dma_async_tx_descriptor *desc; + unsigned int dma_sglen; + dma_cookie_t cookie; +#endif +}; + +#include + +static inline u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr) +{ + return readw(host->ctl + (addr << host->bus_shift)); +} + +static inline void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr, + u16 *buf, int count) +{ + readsw(host->ctl + (addr << host->bus_shift), buf, count); +} + +static inline u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr) +{ + return readw(host->ctl + (addr << host->bus_shift)) | + readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16; +} + +static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, + u16 val) +{ + writew(val, host->ctl + (addr << host->bus_shift)); +} + +static inline void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr, + u16 *buf, int count) +{ + writesw(host->ctl + (addr << host->bus_shift), buf, count); +} + +static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr, + u32 val) +{ + writew(val, host->ctl + (addr << host->bus_shift)); + writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift)); +} + +#include +#include + +static inline void tmio_mmc_init_sg(struct tmio_mmc_host *host, + struct mmc_data *data) +{ + host->sg_len = data->sg_len; + host->sg_ptr = data->sg; + host->sg_off = 0; +} + +static inline int tmio_mmc_next_sg(struct tmio_mmc_host *host) +{ + host->sg_ptr = sg_next(host->sg_ptr); + host->sg_off = 0; + return --host->sg_len; +} + +static inline char *tmio_mmc_kmap_atomic(struct scatterlist *sg, + unsigned long *flags) +{ + local_irq_save(*flags); + return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset; +} + +static inline void tmio_mmc_kunmap_atomic(void *virt, + unsigned long *flags) +{ + kunmap_atomic(virt, KM_BIO_SRC_IRQ); + local_irq_restore(*flags); +} + +#ifdef CONFIG_MMC_DEBUG + +#define STATUS_TO_TEXT(a) \ + do { \ + if (status & TMIO_STAT_##a) \ + printk(#a); \ + } while (0) + +void pr_debug_status(u32 status) +{ + printk(KERN_DEBUG "status: %08x = ", status); + STATUS_TO_TEXT(CARD_REMOVE); + STATUS_TO_TEXT(CARD_INSERT); + STATUS_TO_TEXT(SIGSTATE); + STATUS_TO_TEXT(WRPROTECT); + STATUS_TO_TEXT(CARD_REMOVE_A); + STATUS_TO_TEXT(CARD_INSERT_A); + STATUS_TO_TEXT(SIGSTATE_A); + STATUS_TO_TEXT(CMD_IDX_ERR); + STATUS_TO_TEXT(STOPBIT_ERR); + STATUS_TO_TEXT(ILL_FUNC); + STATUS_TO_TEXT(CMD_BUSY); + STATUS_TO_TEXT(CMDRESPEND); + STATUS_TO_TEXT(DATAEND); + STATUS_TO_TEXT(CRCFAIL); + STATUS_TO_TEXT(DATATIMEOUT); + STATUS_TO_TEXT(CMDTIMEOUT); + STATUS_TO_TEXT(RXOVERFLOW); + STATUS_TO_TEXT(TXUNDERRUN); + STATUS_TO_TEXT(RXRDY); + STATUS_TO_TEXT(TXRQ); + STATUS_TO_TEXT(ILL_ACCESS); + printk("\n"); +} + +#else +#define pr_debug_status(s) do { } while (0) +#endif diff --git a/trunk/drivers/rtc/class.c b/trunk/drivers/rtc/class.c index 9583cbcc6b79..e6539cbabb35 100644 --- a/trunk/drivers/rtc/class.c +++ b/trunk/drivers/rtc/class.c @@ -16,7 +16,6 @@ #include #include #include -#include #include "rtc-core.h" @@ -153,18 +152,6 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev, spin_lock_init(&rtc->irq_task_lock); init_waitqueue_head(&rtc->irq_queue); - /* Init timerqueue */ - timerqueue_init_head(&rtc->timerqueue); - INIT_WORK(&rtc->irqwork, rtc_timer_do_work); - /* Init aie timer */ - rtc_timer_init(&rtc->aie_timer, rtc_aie_update_irq, (void *)rtc); - /* Init uie timer */ - rtc_timer_init(&rtc->uie_rtctimer, rtc_uie_update_irq, (void *)rtc); - /* Init pie timer */ - hrtimer_init(&rtc->pie_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - rtc->pie_timer.function = rtc_pie_update_irq; - rtc->pie_enabled = 0; - strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE); dev_set_name(&rtc->dev, "rtc%d", id); diff --git a/trunk/drivers/rtc/interface.c b/trunk/drivers/rtc/interface.c index 90384b9f6b2c..a0c816238aa9 100644 --- a/trunk/drivers/rtc/interface.c +++ b/trunk/drivers/rtc/interface.c @@ -14,11 +14,15 @@ #include #include #include -#include -static int __rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm) +int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm) { int err; + + err = mutex_lock_interruptible(&rtc->ops_lock); + if (err) + return err; + if (!rtc->ops) err = -ENODEV; else if (!rtc->ops->read_time) @@ -27,18 +31,7 @@ static int __rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm) memset(tm, 0, sizeof(struct rtc_time)); err = rtc->ops->read_time(rtc->dev.parent, tm); } - return err; -} - -int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm) -{ - int err; - err = mutex_lock_interruptible(&rtc->ops_lock); - if (err) - return err; - - err = __rtc_read_time(rtc, tm); mutex_unlock(&rtc->ops_lock); return err; } @@ -113,54 +106,188 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs) } EXPORT_SYMBOL_GPL(rtc_set_mmss); -int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) +static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { int err; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; - alarm->enabled = rtc->aie_timer.enabled; - if (alarm->enabled) - alarm->time = rtc_ktime_to_tm(rtc->aie_timer.node.expires); - mutex_unlock(&rtc->ops_lock); - return 0; + if (rtc->ops == NULL) + err = -ENODEV; + else if (!rtc->ops->read_alarm) + err = -EINVAL; + else { + memset(alarm, 0, sizeof(struct rtc_wkalrm)); + err = rtc->ops->read_alarm(rtc->dev.parent, alarm); + } + + mutex_unlock(&rtc->ops_lock); + return err; } -EXPORT_SYMBOL_GPL(rtc_read_alarm); -int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) +int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { - struct rtc_time tm; - long now, scheduled; int err; + struct rtc_time before, now; + int first_time = 1; + unsigned long t_now, t_alm; + enum { none, day, month, year } missing = none; + unsigned days; + + /* The lower level RTC driver may return -1 in some fields, + * creating invalid alarm->time values, for reasons like: + * + * - The hardware may not be capable of filling them in; + * many alarms match only on time-of-day fields, not + * day/month/year calendar data. + * + * - Some hardware uses illegal values as "wildcard" match + * values, which non-Linux firmware (like a BIOS) may try + * to set up as e.g. "alarm 15 minutes after each hour". + * Linux uses only oneshot alarms. + * + * When we see that here, we deal with it by using values from + * a current RTC timestamp for any missing (-1) values. The + * RTC driver prevents "periodic alarm" modes. + * + * But this can be racey, because some fields of the RTC timestamp + * may have wrapped in the interval since we read the RTC alarm, + * which would lead to us inserting inconsistent values in place + * of the -1 fields. + * + * Reading the alarm and timestamp in the reverse sequence + * would have the same race condition, and not solve the issue. + * + * So, we must first read the RTC timestamp, + * then read the RTC alarm value, + * and then read a second RTC timestamp. + * + * If any fields of the second timestamp have changed + * when compared with the first timestamp, then we know + * our timestamp may be inconsistent with that used by + * the low-level rtc_read_alarm_internal() function. + * + * So, when the two timestamps disagree, we just loop and do + * the process again to get a fully consistent set of values. + * + * This could all instead be done in the lower level driver, + * but since more than one lower level RTC implementation needs it, + * then it's probably best best to do it here instead of there.. + */ - err = rtc_valid_tm(&alarm->time); - if (err) + /* Get the "before" timestamp */ + err = rtc_read_time(rtc, &before); + if (err < 0) return err; - rtc_tm_to_time(&alarm->time, &scheduled); - - /* Make sure we're not setting alarms in the past */ - err = __rtc_read_time(rtc, &tm); - rtc_tm_to_time(&tm, &now); - if (scheduled <= now) - return -ETIME; - /* - * XXX - We just checked to make sure the alarm time is not - * in the past, but there is still a race window where if - * the is alarm set for the next second and the second ticks - * over right here, before we set the alarm. + do { + if (!first_time) + memcpy(&before, &now, sizeof(struct rtc_time)); + first_time = 0; + + /* get the RTC alarm values, which may be incomplete */ + err = rtc_read_alarm_internal(rtc, alarm); + if (err) + return err; + if (!alarm->enabled) + return 0; + + /* full-function RTCs won't have such missing fields */ + if (rtc_valid_tm(&alarm->time) == 0) + return 0; + + /* get the "after" timestamp, to detect wrapped fields */ + err = rtc_read_time(rtc, &now); + if (err < 0) + return err; + + /* note that tm_sec is a "don't care" value here: */ + } while ( before.tm_min != now.tm_min + || before.tm_hour != now.tm_hour + || before.tm_mon != now.tm_mon + || before.tm_year != now.tm_year); + + /* Fill in the missing alarm fields using the timestamp; we + * know there's at least one since alarm->time is invalid. */ + if (alarm->time.tm_sec == -1) + alarm->time.tm_sec = now.tm_sec; + if (alarm->time.tm_min == -1) + alarm->time.tm_min = now.tm_min; + if (alarm->time.tm_hour == -1) + alarm->time.tm_hour = now.tm_hour; + + /* For simplicity, only support date rollover for now */ + if (alarm->time.tm_mday == -1) { + alarm->time.tm_mday = now.tm_mday; + missing = day; + } + if (alarm->time.tm_mon == -1) { + alarm->time.tm_mon = now.tm_mon; + if (missing == none) + missing = month; + } + if (alarm->time.tm_year == -1) { + alarm->time.tm_year = now.tm_year; + if (missing == none) + missing = year; + } - if (!rtc->ops) - err = -ENODEV; - else if (!rtc->ops->set_alarm) - err = -EINVAL; - else - err = rtc->ops->set_alarm(rtc->dev.parent, alarm); + /* with luck, no rollover is needed */ + rtc_tm_to_time(&now, &t_now); + rtc_tm_to_time(&alarm->time, &t_alm); + if (t_now < t_alm) + goto done; - return err; + switch (missing) { + + /* 24 hour rollover ... if it's now 10am Monday, an alarm that + * that will trigger at 5am will do so at 5am Tuesday, which + * could also be in the next month or year. This is a common + * case, especially for PCs. + */ + case day: + dev_dbg(&rtc->dev, "alarm rollover: %s\n", "day"); + t_alm += 24 * 60 * 60; + rtc_time_to_tm(t_alm, &alarm->time); + break; + + /* Month rollover ... if it's the 31th, an alarm on the 3rd will + * be next month. An alarm matching on the 30th, 29th, or 28th + * may end up in the month after that! Many newer PCs support + * this type of alarm. + */ + case month: + dev_dbg(&rtc->dev, "alarm rollover: %s\n", "month"); + do { + if (alarm->time.tm_mon < 11) + alarm->time.tm_mon++; + else { + alarm->time.tm_mon = 0; + alarm->time.tm_year++; + } + days = rtc_month_days(alarm->time.tm_mon, + alarm->time.tm_year); + } while (days < alarm->time.tm_mday); + break; + + /* Year rollover ... easy except for leap years! */ + case year: + dev_dbg(&rtc->dev, "alarm rollover: %s\n", "year"); + do { + alarm->time.tm_year++; + } while (rtc_valid_tm(&alarm->time) != 0); + break; + + default: + dev_warn(&rtc->dev, "alarm rollover not handled\n"); + } + +done: + return 0; } +EXPORT_SYMBOL_GPL(rtc_read_alarm); int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { @@ -173,18 +300,16 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; - if (rtc->aie_timer.enabled) { - rtc_timer_remove(rtc, &rtc->aie_timer); - rtc->aie_timer.enabled = 0; - } - rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time); - rtc->aie_timer.period = ktime_set(0, 0); - if (alarm->enabled) { - rtc->aie_timer.enabled = 1; - rtc_timer_enqueue(rtc, &rtc->aie_timer); - } + + if (!rtc->ops) + err = -ENODEV; + else if (!rtc->ops->set_alarm) + err = -EINVAL; + else + err = rtc->ops->set_alarm(rtc->dev.parent, alarm); + mutex_unlock(&rtc->ops_lock); - return 0; + return err; } EXPORT_SYMBOL_GPL(rtc_set_alarm); @@ -194,16 +319,6 @@ int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled) if (err) return err; - if (rtc->aie_timer.enabled != enabled) { - if (enabled) { - rtc->aie_timer.enabled = 1; - rtc_timer_enqueue(rtc, &rtc->aie_timer); - } else { - rtc_timer_remove(rtc, &rtc->aie_timer); - rtc->aie_timer.enabled = 0; - } - } - if (!rtc->ops) err = -ENODEV; else if (!rtc->ops->alarm_irq_enable) @@ -222,53 +337,52 @@ int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled) if (err) return err; - /* make sure we're changing state */ - if (rtc->uie_rtctimer.enabled == enabled) - goto out; - - if (enabled) { - struct rtc_time tm; - ktime_t now, onesec; - - __rtc_read_time(rtc, &tm); - onesec = ktime_set(1, 0); - now = rtc_tm_to_ktime(tm); - rtc->uie_rtctimer.node.expires = ktime_add(now, onesec); - rtc->uie_rtctimer.period = ktime_set(1, 0); - rtc->uie_rtctimer.enabled = 1; - rtc_timer_enqueue(rtc, &rtc->uie_rtctimer); - } else { - rtc_timer_remove(rtc, &rtc->uie_rtctimer); - rtc->uie_rtctimer.enabled = 0; +#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL + if (enabled == 0 && rtc->uie_irq_active) { + mutex_unlock(&rtc->ops_lock); + return rtc_dev_update_irq_enable_emul(rtc, enabled); } +#endif + + if (!rtc->ops) + err = -ENODEV; + else if (!rtc->ops->update_irq_enable) + err = -EINVAL; + else + err = rtc->ops->update_irq_enable(rtc->dev.parent, enabled); -out: mutex_unlock(&rtc->ops_lock); - return err; +#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL + /* + * Enable emulation if the driver did not provide + * the update_irq_enable function pointer or if returned + * -EINVAL to signal that it has been configured without + * interrupts or that are not available at the moment. + */ + if (err == -EINVAL) + err = rtc_dev_update_irq_enable_emul(rtc, enabled); +#endif + return err; } EXPORT_SYMBOL_GPL(rtc_update_irq_enable); - /** - * rtc_handle_legacy_irq - AIE, UIE and PIE event hook - * @rtc: pointer to the rtc device - * - * This function is called when an AIE, UIE or PIE mode interrupt - * has occured (or been emulated). - * - * Triggers the registered irq_task function callback. + * rtc_update_irq - report RTC periodic, alarm, and/or update irqs + * @rtc: the rtc device + * @num: how many irqs are being reported (usually one) + * @events: mask of RTC_IRQF with one or more of RTC_PF, RTC_AF, RTC_UF + * Context: any */ -static void rtc_handle_legacy_irq(struct rtc_device *rtc, int num, int mode) +void rtc_update_irq(struct rtc_device *rtc, + unsigned long num, unsigned long events) { unsigned long flags; - /* mark one irq of the appropriate mode */ spin_lock_irqsave(&rtc->irq_lock, flags); - rtc->irq_data = (rtc->irq_data + (num << 8)) | (RTC_IRQF|mode); + rtc->irq_data = (rtc->irq_data + (num << 8)) | events; spin_unlock_irqrestore(&rtc->irq_lock, flags); - /* call the task func */ spin_lock_irqsave(&rtc->irq_task_lock, flags); if (rtc->irq_task) rtc->irq_task->func(rtc->irq_task->private_data); @@ -277,69 +391,6 @@ static void rtc_handle_legacy_irq(struct rtc_device *rtc, int num, int mode) wake_up_interruptible(&rtc->irq_queue); kill_fasync(&rtc->async_queue, SIGIO, POLL_IN); } - - -/** - * rtc_aie_update_irq - AIE mode rtctimer hook - * @private: pointer to the rtc_device - * - * This functions is called when the aie_timer expires. - */ -void rtc_aie_update_irq(void *private) -{ - struct rtc_device *rtc = (struct rtc_device *)private; - rtc_handle_legacy_irq(rtc, 1, RTC_AF); -} - - -/** - * rtc_uie_update_irq - UIE mode rtctimer hook - * @private: pointer to the rtc_device - * - * This functions is called when the uie_timer expires. - */ -void rtc_uie_update_irq(void *private) -{ - struct rtc_device *rtc = (struct rtc_device *)private; - rtc_handle_legacy_irq(rtc, 1, RTC_UF); -} - - -/** - * rtc_pie_update_irq - PIE mode hrtimer hook - * @timer: pointer to the pie mode hrtimer - * - * This function is used to emulate PIE mode interrupts - * using an hrtimer. This function is called when the periodic - * hrtimer expires. - */ -enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer) -{ - struct rtc_device *rtc; - ktime_t period; - int count; - rtc = container_of(timer, struct rtc_device, pie_timer); - - period = ktime_set(0, NSEC_PER_SEC/rtc->irq_freq); - count = hrtimer_forward_now(timer, period); - - rtc_handle_legacy_irq(rtc, count, RTC_PF); - - return HRTIMER_RESTART; -} - -/** - * rtc_update_irq - Triggered when a RTC interrupt occurs. - * @rtc: the rtc device - * @num: how many irqs are being reported (usually one) - * @events: mask of RTC_IRQF with one or more of RTC_PF, RTC_AF, RTC_UF - * Context: any - */ -void rtc_update_irq(struct rtc_device *rtc, - unsigned long num, unsigned long events) -{ - schedule_work(&rtc->irqwork); -} EXPORT_SYMBOL_GPL(rtc_update_irq); static int __rtc_match(struct device *dev, void *data) @@ -426,21 +477,19 @@ int rtc_irq_set_state(struct rtc_device *rtc, struct rtc_task *task, int enabled int err = 0; unsigned long flags; + if (rtc->ops->irq_set_state == NULL) + return -ENXIO; + spin_lock_irqsave(&rtc->irq_task_lock, flags); if (rtc->irq_task != NULL && task == NULL) err = -EBUSY; if (rtc->irq_task != task) err = -EACCES; - - if (enabled) { - ktime_t period = ktime_set(0, NSEC_PER_SEC/rtc->irq_freq); - hrtimer_start(&rtc->pie_timer, period, HRTIMER_MODE_REL); - } else { - hrtimer_cancel(&rtc->pie_timer); - } - rtc->pie_enabled = enabled; spin_unlock_irqrestore(&rtc->irq_task_lock, flags); + if (err == 0) + err = rtc->ops->irq_set_state(rtc->dev.parent, enabled); + return err; } EXPORT_SYMBOL_GPL(rtc_irq_set_state); @@ -460,194 +509,21 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq) int err = 0; unsigned long flags; + if (rtc->ops->irq_set_freq == NULL) + return -ENXIO; + spin_lock_irqsave(&rtc->irq_task_lock, flags); if (rtc->irq_task != NULL && task == NULL) err = -EBUSY; if (rtc->irq_task != task) err = -EACCES; + spin_unlock_irqrestore(&rtc->irq_task_lock, flags); + if (err == 0) { - rtc->irq_freq = freq; - if (rtc->pie_enabled) { - ktime_t period; - hrtimer_cancel(&rtc->pie_timer); - period = ktime_set(0, NSEC_PER_SEC/rtc->irq_freq); - hrtimer_start(&rtc->pie_timer, period, - HRTIMER_MODE_REL); - } + err = rtc->ops->irq_set_freq(rtc->dev.parent, freq); + if (err == 0) + rtc->irq_freq = freq; } - spin_unlock_irqrestore(&rtc->irq_task_lock, flags); return err; } EXPORT_SYMBOL_GPL(rtc_irq_set_freq); - -/** - * rtc_timer_enqueue - Adds a rtc_timer to the rtc_device timerqueue - * @rtc rtc device - * @timer timer being added. - * - * Enqueues a timer onto the rtc devices timerqueue and sets - * the next alarm event appropriately. - * - * Must hold ops_lock for proper serialization of timerqueue - */ -void rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) -{ - timerqueue_add(&rtc->timerqueue, &timer->node); - if (&timer->node == timerqueue_getnext(&rtc->timerqueue)) { - struct rtc_wkalrm alarm; - int err; - alarm.time = rtc_ktime_to_tm(timer->node.expires); - alarm.enabled = 1; - err = __rtc_set_alarm(rtc, &alarm); - if (err == -ETIME) - schedule_work(&rtc->irqwork); - } -} - -/** - * rtc_timer_remove - Removes a rtc_timer from the rtc_device timerqueue - * @rtc rtc device - * @timer timer being removed. - * - * Removes a timer onto the rtc devices timerqueue and sets - * the next alarm event appropriately. - * - * Must hold ops_lock for proper serialization of timerqueue - */ -void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer) -{ - struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue); - timerqueue_del(&rtc->timerqueue, &timer->node); - - if (next == &timer->node) { - struct rtc_wkalrm alarm; - int err; - next = timerqueue_getnext(&rtc->timerqueue); - if (!next) - return; - alarm.time = rtc_ktime_to_tm(next->expires); - alarm.enabled = 1; - err = __rtc_set_alarm(rtc, &alarm); - if (err == -ETIME) - schedule_work(&rtc->irqwork); - } -} - -/** - * rtc_timer_do_work - Expires rtc timers - * @rtc rtc device - * @timer timer being removed. - * - * Expires rtc timers. Reprograms next alarm event if needed. - * Called via worktask. - * - * Serializes access to timerqueue via ops_lock mutex - */ -void rtc_timer_do_work(struct work_struct *work) -{ - struct rtc_timer *timer; - struct timerqueue_node *next; - ktime_t now; - struct rtc_time tm; - - struct rtc_device *rtc = - container_of(work, struct rtc_device, irqwork); - - mutex_lock(&rtc->ops_lock); -again: - __rtc_read_time(rtc, &tm); - now = rtc_tm_to_ktime(tm); - while ((next = timerqueue_getnext(&rtc->timerqueue))) { - if (next->expires.tv64 > now.tv64) - break; - - /* expire timer */ - timer = container_of(next, struct rtc_timer, node); - timerqueue_del(&rtc->timerqueue, &timer->node); - timer->enabled = 0; - if (timer->task.func) - timer->task.func(timer->task.private_data); - - /* Re-add/fwd periodic timers */ - if (ktime_to_ns(timer->period)) { - timer->node.expires = ktime_add(timer->node.expires, - timer->period); - timer->enabled = 1; - timerqueue_add(&rtc->timerqueue, &timer->node); - } - } - - /* Set next alarm */ - if (next) { - struct rtc_wkalrm alarm; - int err; - alarm.time = rtc_ktime_to_tm(next->expires); - alarm.enabled = 1; - err = __rtc_set_alarm(rtc, &alarm); - if (err == -ETIME) - goto again; - } - - mutex_unlock(&rtc->ops_lock); -} - - -/* rtc_timer_init - Initializes an rtc_timer - * @timer: timer to be intiialized - * @f: function pointer to be called when timer fires - * @data: private data passed to function pointer - * - * Kernel interface to initializing an rtc_timer. - */ -void rtc_timer_init(struct rtc_timer *timer, void (*f)(void* p), void* data) -{ - timerqueue_init(&timer->node); - timer->enabled = 0; - timer->task.func = f; - timer->task.private_data = data; -} - -/* rtc_timer_start - Sets an rtc_timer to fire in the future - * @ rtc: rtc device to be used - * @ timer: timer being set - * @ expires: time at which to expire the timer - * @ period: period that the timer will recur - * - * Kernel interface to set an rtc_timer - */ -int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer* timer, - ktime_t expires, ktime_t period) -{ - int ret = 0; - mutex_lock(&rtc->ops_lock); - if (timer->enabled) - rtc_timer_remove(rtc, timer); - - timer->node.expires = expires; - timer->period = period; - - timer->enabled = 1; - rtc_timer_enqueue(rtc, timer); - - mutex_unlock(&rtc->ops_lock); - return ret; -} - -/* rtc_timer_cancel - Stops an rtc_timer - * @ rtc: rtc device to be used - * @ timer: timer being set - * - * Kernel interface to cancel an rtc_timer - */ -int rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer* timer) -{ - int ret = 0; - mutex_lock(&rtc->ops_lock); - if (timer->enabled) - rtc_timer_remove(rtc, timer); - timer->enabled = 0; - mutex_unlock(&rtc->ops_lock); - return ret; -} - - diff --git a/trunk/drivers/rtc/rtc-dev.c b/trunk/drivers/rtc/rtc-dev.c index 212b16edafc0..0cc0984d155b 100644 --- a/trunk/drivers/rtc/rtc-dev.c +++ b/trunk/drivers/rtc/rtc-dev.c @@ -46,6 +46,105 @@ static int rtc_dev_open(struct inode *inode, struct file *file) return err; } +#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL +/* + * Routine to poll RTC seconds field for change as often as possible, + * after first RTC_UIE use timer to reduce polling + */ +static void rtc_uie_task(struct work_struct *work) +{ + struct rtc_device *rtc = + container_of(work, struct rtc_device, uie_task); + struct rtc_time tm; + int num = 0; + int err; + + err = rtc_read_time(rtc, &tm); + + spin_lock_irq(&rtc->irq_lock); + if (rtc->stop_uie_polling || err) { + rtc->uie_task_active = 0; + } else if (rtc->oldsecs != tm.tm_sec) { + num = (tm.tm_sec + 60 - rtc->oldsecs) % 60; + rtc->oldsecs = tm.tm_sec; + rtc->uie_timer.expires = jiffies + HZ - (HZ/10); + rtc->uie_timer_active = 1; + rtc->uie_task_active = 0; + add_timer(&rtc->uie_timer); + } else if (schedule_work(&rtc->uie_task) == 0) { + rtc->uie_task_active = 0; + } + spin_unlock_irq(&rtc->irq_lock); + if (num) + rtc_update_irq(rtc, num, RTC_UF | RTC_IRQF); +} +static void rtc_uie_timer(unsigned long data) +{ + struct rtc_device *rtc = (struct rtc_device *)data; + unsigned long flags; + + spin_lock_irqsave(&rtc->irq_lock, flags); + rtc->uie_timer_active = 0; + rtc->uie_task_active = 1; + if ((schedule_work(&rtc->uie_task) == 0)) + rtc->uie_task_active = 0; + spin_unlock_irqrestore(&rtc->irq_lock, flags); +} + +static int clear_uie(struct rtc_device *rtc) +{ + spin_lock_irq(&rtc->irq_lock); + if (rtc->uie_irq_active) { + rtc->stop_uie_polling = 1; + if (rtc->uie_timer_active) { + spin_unlock_irq(&rtc->irq_lock); + del_timer_sync(&rtc->uie_timer); + spin_lock_irq(&rtc->irq_lock); + rtc->uie_timer_active = 0; + } + if (rtc->uie_task_active) { + spin_unlock_irq(&rtc->irq_lock); + flush_work_sync(&rtc->uie_task); + spin_lock_irq(&rtc->irq_lock); + } + rtc->uie_irq_active = 0; + } + spin_unlock_irq(&rtc->irq_lock); + return 0; +} + +static int set_uie(struct rtc_device *rtc) +{ + struct rtc_time tm; + int err; + + err = rtc_read_time(rtc, &tm); + if (err) + return err; + spin_lock_irq(&rtc->irq_lock); + if (!rtc->uie_irq_active) { + rtc->uie_irq_active = 1; + rtc->stop_uie_polling = 0; + rtc->oldsecs = tm.tm_sec; + rtc->uie_task_active = 1; + if (schedule_work(&rtc->uie_task) == 0) + rtc->uie_task_active = 0; + } + rtc->irq_data = 0; + spin_unlock_irq(&rtc->irq_lock); + return 0; +} + +int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc, unsigned int enabled) +{ + if (enabled) + return set_uie(rtc); + else + return clear_uie(rtc); +} +EXPORT_SYMBOL(rtc_dev_update_irq_enable_emul); + +#endif /* CONFIG_RTC_INTF_DEV_UIE_EMUL */ static ssize_t rtc_dev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -394,6 +493,11 @@ void rtc_dev_prepare(struct rtc_device *rtc) rtc->dev.devt = MKDEV(MAJOR(rtc_devt), rtc->id); +#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL + INIT_WORK(&rtc->uie_task, rtc_uie_task); + setup_timer(&rtc->uie_timer, rtc_uie_timer, (unsigned long)rtc); +#endif + cdev_init(&rtc->char_dev, &rtc_dev_fops); rtc->char_dev.owner = rtc->owner; } diff --git a/trunk/drivers/rtc/rtc-lib.c b/trunk/drivers/rtc/rtc-lib.c index 075f1708deae..773851f338b8 100644 --- a/trunk/drivers/rtc/rtc-lib.c +++ b/trunk/drivers/rtc/rtc-lib.c @@ -117,32 +117,4 @@ int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time) } EXPORT_SYMBOL(rtc_tm_to_time); -/* - * Convert rtc_time to ktime - */ -ktime_t rtc_tm_to_ktime(struct rtc_time tm) -{ - time_t time; - rtc_tm_to_time(&tm, &time); - return ktime_set(time, 0); -} -EXPORT_SYMBOL_GPL(rtc_tm_to_ktime); - -/* - * Convert ktime to rtc_time - */ -struct rtc_time rtc_ktime_to_tm(ktime_t kt) -{ - struct timespec ts; - struct rtc_time ret; - - ts = ktime_to_timespec(kt); - /* Round up any ns */ - if (ts.tv_nsec) - ts.tv_sec++; - rtc_time_to_tm(ts.tv_sec, &ret); - return ret; -} -EXPORT_SYMBOL_GPL(rtc_ktime_to_tm); - MODULE_LICENSE("GPL"); diff --git a/trunk/drivers/watchdog/hpwdt.c b/trunk/drivers/watchdog/hpwdt.c index 24b966d5061a..dea7b5bf6e2c 100644 --- a/trunk/drivers/watchdog/hpwdt.c +++ b/trunk/drivers/watchdog/hpwdt.c @@ -469,7 +469,7 @@ static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason, unsigned long rom_pl; static int die_nmi_called; - if (ulReason != DIE_NMIUNKNOWN) + if (ulReason != DIE_NMI && ulReason != DIE_NMI_IPI) goto out; if (!hpwdt_nmi_decoding) diff --git a/trunk/fs/9p/acl.c b/trunk/fs/9p/acl.c index 6e58c4ca1e6e..c9da2640f6f1 100644 --- a/trunk/fs/9p/acl.c +++ b/trunk/fs/9p/acl.c @@ -28,7 +28,7 @@ static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name) { ssize_t size; void *value = NULL; - struct posix_acl *acl = NULL;; + struct posix_acl *acl = NULL; size = v9fs_fid_xattr_get(fid, name, NULL, 0); if (size > 0) { diff --git a/trunk/fs/9p/xattr.c b/trunk/fs/9p/xattr.c index 43ec7df84336..d288773871b3 100644 --- a/trunk/fs/9p/xattr.c +++ b/trunk/fs/9p/xattr.c @@ -133,7 +133,7 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name, "p9_client_xattrcreate failed %d\n", retval); goto error; } - msize = fid->clnt->msize;; + msize = fid->clnt->msize; while (value_len) { if (value_len > (msize - P9_IOHDRSZ)) write_count = msize - P9_IOHDRSZ; diff --git a/trunk/fs/ocfs2/Kconfig b/trunk/fs/ocfs2/Kconfig index ab152c00cd3a..0d840669698e 100644 --- a/trunk/fs/ocfs2/Kconfig +++ b/trunk/fs/ocfs2/Kconfig @@ -51,7 +51,7 @@ config OCFS2_FS_USERSPACE_CLUSTER config OCFS2_FS_STATS bool "OCFS2 statistics" - depends on OCFS2_FS && DEBUG_FS + depends on OCFS2_FS default y help This option allows some fs statistics to be captured. Enabling diff --git a/trunk/fs/ocfs2/alloc.c b/trunk/fs/ocfs2/alloc.c index e4984e259cb6..592fae5007d1 100644 --- a/trunk/fs/ocfs2/alloc.c +++ b/trunk/fs/ocfs2/alloc.c @@ -565,6 +565,7 @@ static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et) return ret; } +static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, struct ocfs2_extent_block *eb); static void ocfs2_adjust_rightmost_records(handle_t *handle, @@ -5857,7 +5858,6 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb, ocfs2_journal_dirty(handle, tl_bh); - osb->truncated_clusters += num_clusters; bail: mlog_exit(status); return status; @@ -5929,8 +5929,6 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, i--; } - osb->truncated_clusters = 0; - bail: mlog_exit(status); return status; @@ -7140,6 +7138,64 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, return status; } +/* + * Expects the inode to already be locked. + */ +int ocfs2_prepare_truncate(struct ocfs2_super *osb, + struct inode *inode, + struct buffer_head *fe_bh, + struct ocfs2_truncate_context **tc) +{ + int status; + unsigned int new_i_clusters; + struct ocfs2_dinode *fe; + struct ocfs2_extent_block *eb; + struct buffer_head *last_eb_bh = NULL; + + mlog_entry_void(); + + *tc = NULL; + + new_i_clusters = ocfs2_clusters_for_bytes(osb->sb, + i_size_read(inode)); + fe = (struct ocfs2_dinode *) fe_bh->b_data; + + mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size =" + "%llu\n", le32_to_cpu(fe->i_clusters), new_i_clusters, + (unsigned long long)le64_to_cpu(fe->i_size)); + + *tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL); + if (!(*tc)) { + status = -ENOMEM; + mlog_errno(status); + goto bail; + } + ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); + + if (fe->id2.i_list.l_tree_depth) { + status = ocfs2_read_extent_block(INODE_CACHE(inode), + le64_to_cpu(fe->i_last_eb_blk), + &last_eb_bh); + if (status < 0) { + mlog_errno(status); + goto bail; + } + eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; + } + + (*tc)->tc_last_eb_bh = last_eb_bh; + + status = 0; +bail: + if (status < 0) { + if (*tc) + ocfs2_free_truncate_context(*tc); + *tc = NULL; + } + mlog_exit_void(); + return status; +} + /* * 'start' is inclusive, 'end' is not. */ @@ -7214,3 +7270,18 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, out: return ret; } + +static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) +{ + /* + * The caller is responsible for completing deallocation + * before freeing the context. + */ + if (tc->tc_dealloc.c_first_suballocator != NULL) + mlog(ML_NOTICE, + "Truncate completion has non-empty dealloc context\n"); + + brelse(tc->tc_last_eb_bh); + + kfree(tc); +} diff --git a/trunk/fs/ocfs2/alloc.h b/trunk/fs/ocfs2/alloc.h index 3bd08a03251c..55762b554b99 100644 --- a/trunk/fs/ocfs2/alloc.h +++ b/trunk/fs/ocfs2/alloc.h @@ -228,6 +228,10 @@ struct ocfs2_truncate_context { int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, u64 range_start, u64 range_end); +int ocfs2_prepare_truncate(struct ocfs2_super *osb, + struct inode *inode, + struct buffer_head *fe_bh, + struct ocfs2_truncate_context **tc); int ocfs2_commit_truncate(struct ocfs2_super *osb, struct inode *inode, struct buffer_head *di_bh); diff --git a/trunk/fs/ocfs2/aops.c b/trunk/fs/ocfs2/aops.c index 1fbb0e20131b..0d7c5540ad66 100644 --- a/trunk/fs/ocfs2/aops.c +++ b/trunk/fs/ocfs2/aops.c @@ -1630,43 +1630,6 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh, return ret; } -/* - * Try to flush truncate logs if we can free enough clusters from it. - * As for return value, "< 0" means error, "0" no space and "1" means - * we have freed enough spaces and let the caller try to allocate again. - */ -static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb, - unsigned int needed) -{ - tid_t target; - int ret = 0; - unsigned int truncated_clusters; - - mutex_lock(&osb->osb_tl_inode->i_mutex); - truncated_clusters = osb->truncated_clusters; - mutex_unlock(&osb->osb_tl_inode->i_mutex); - - /* - * Check whether we can succeed in allocating if we free - * the truncate log. - */ - if (truncated_clusters < needed) - goto out; - - ret = ocfs2_flush_truncate_log(osb); - if (ret) { - mlog_errno(ret); - goto out; - } - - if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) { - jbd2_log_wait_commit(osb->journal->j_journal, target); - ret = 1; - } -out: - return ret; -} - int ocfs2_write_begin_nolock(struct file *filp, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, @@ -1674,7 +1637,7 @@ int ocfs2_write_begin_nolock(struct file *filp, struct buffer_head *di_bh, struct page *mmap_page) { int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS; - unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0; + unsigned int clusters_to_alloc, extents_to_split; struct ocfs2_write_ctxt *wc; struct inode *inode = mapping->host; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); @@ -1683,9 +1646,7 @@ int ocfs2_write_begin_nolock(struct file *filp, struct ocfs2_alloc_context *meta_ac = NULL; handle_t *handle; struct ocfs2_extent_tree et; - int try_free = 1, ret1; -try_again: ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); if (ret) { mlog_errno(ret); @@ -1720,7 +1681,6 @@ int ocfs2_write_begin_nolock(struct file *filp, mlog_errno(ret); goto out; } else if (ret == 1) { - clusters_need = wc->w_clen; ret = ocfs2_refcount_cow(inode, filp, di_bh, wc->w_cpos, wc->w_clen, UINT_MAX); if (ret) { @@ -1735,7 +1695,6 @@ int ocfs2_write_begin_nolock(struct file *filp, mlog_errno(ret); goto out; } - clusters_need += clusters_to_alloc; di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; @@ -1858,22 +1817,6 @@ int ocfs2_write_begin_nolock(struct file *filp, ocfs2_free_alloc_context(data_ac); if (meta_ac) ocfs2_free_alloc_context(meta_ac); - - if (ret == -ENOSPC && try_free) { - /* - * Try to free some truncate log so that we can have enough - * clusters to allocate. - */ - try_free = 0; - - ret1 = ocfs2_try_to_free_truncate_log(osb, clusters_need); - if (ret1 == 1) - goto try_again; - - if (ret1 < 0) - mlog_errno(ret1); - } - return ret; } diff --git a/trunk/fs/ocfs2/cluster/heartbeat.c b/trunk/fs/ocfs2/cluster/heartbeat.c index a6cc05302e9f..9e3d45bcb5fd 100644 --- a/trunk/fs/ocfs2/cluster/heartbeat.c +++ b/trunk/fs/ocfs2/cluster/heartbeat.c @@ -82,7 +82,6 @@ static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; #define O2HB_DB_TYPE_REGION_LIVENODES 4 #define O2HB_DB_TYPE_REGION_NUMBER 5 #define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6 -#define O2HB_DB_TYPE_REGION_PINNED 7 struct o2hb_debug_buf { int db_type; int db_size; @@ -102,7 +101,6 @@ static struct o2hb_debug_buf *o2hb_db_failedregions; #define O2HB_DEBUG_FAILEDREGIONS "failed_regions" #define O2HB_DEBUG_REGION_NUMBER "num" #define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms" -#define O2HB_DEBUG_REGION_PINNED "pinned" static struct dentry *o2hb_debug_dir; static struct dentry *o2hb_debug_livenodes; @@ -134,33 +132,6 @@ char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = { unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; -/* - * o2hb_dependent_users tracks the number of registered callbacks that depend - * on heartbeat. o2net and o2dlm are two entities that register this callback. - * However only o2dlm depends on the heartbeat. It does not want the heartbeat - * to stop while a dlm domain is still active. - */ -unsigned int o2hb_dependent_users; - -/* - * In global heartbeat mode, all regions are pinned if there are one or more - * dependent users and the quorum region count is <= O2HB_PIN_CUT_OFF. All - * regions are unpinned if the region count exceeds the cut off or the number - * of dependent users falls to zero. - */ -#define O2HB_PIN_CUT_OFF 3 - -/* - * In local heartbeat mode, we assume the dlm domain name to be the same as - * region uuid. This is true for domains created for the file system but not - * necessarily true for userdlm domains. This is a known limitation. - * - * In global heartbeat mode, we pin/unpin all o2hb regions. This solution - * works for both file system and userdlm domains. - */ -static int o2hb_region_pin(const char *region_uuid); -static void o2hb_region_unpin(const char *region_uuid); - /* Only sets a new threshold if there are no active regions. * * No locking or otherwise interesting code is required for reading @@ -215,9 +186,7 @@ struct o2hb_region { struct config_item hr_item; struct list_head hr_all_item; - unsigned hr_unclean_stop:1, - hr_item_pinned:1, - hr_item_dropped:1; + unsigned hr_unclean_stop:1; /* protected by the hr_callback_sem */ struct task_struct *hr_task; @@ -243,11 +212,9 @@ struct o2hb_region { struct dentry *hr_debug_livenodes; struct dentry *hr_debug_regnum; struct dentry *hr_debug_elapsed_time; - struct dentry *hr_debug_pinned; struct o2hb_debug_buf *hr_db_livenodes; struct o2hb_debug_buf *hr_db_regnum; struct o2hb_debug_buf *hr_db_elapsed_time; - struct o2hb_debug_buf *hr_db_pinned; /* let the person setting up hb wait for it to return until it * has reached a 'steady' state. This will be fixed when we have @@ -734,14 +701,6 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg, config_item_name(®->hr_item)); set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); - - /* - * If global heartbeat active, unpin all regions if the - * region count > CUT_OFF - */ - if (o2hb_pop_count(&o2hb_quorum_region_bitmap, - O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF) - o2hb_region_unpin(NULL); } static int o2hb_check_slot(struct o2hb_region *reg, @@ -1082,9 +1041,6 @@ static int o2hb_thread(void *data) set_user_nice(current, -20); - /* Pin node */ - o2nm_depend_this_node(); - while (!kthread_should_stop() && !reg->hr_unclean_stop) { /* We track the time spent inside * o2hb_do_disk_heartbeat so that we avoid more than @@ -1134,9 +1090,6 @@ static int o2hb_thread(void *data) mlog_errno(ret); } - /* Unpin node */ - o2nm_undepend_this_node(); - mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n"); return 0; @@ -1189,12 +1142,6 @@ static int o2hb_debug_open(struct inode *inode, struct file *file) reg->hr_last_timeout_start)); goto done; - case O2HB_DB_TYPE_REGION_PINNED: - reg = (struct o2hb_region *)db->db_data; - out += snprintf(buf + out, PAGE_SIZE - out, "%u\n", - !!reg->hr_item_pinned); - goto done; - default: goto done; } @@ -1368,8 +1315,6 @@ int o2hb_init(void) memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap)); memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap)); - o2hb_dependent_users = 0; - return o2hb_debug_init(); } @@ -1439,7 +1384,6 @@ static void o2hb_region_release(struct config_item *item) debugfs_remove(reg->hr_debug_livenodes); debugfs_remove(reg->hr_debug_regnum); debugfs_remove(reg->hr_debug_elapsed_time); - debugfs_remove(reg->hr_debug_pinned); debugfs_remove(reg->hr_debug_dir); spin_lock(&o2hb_live_lock); @@ -2004,18 +1948,6 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) goto bail; } - reg->hr_debug_pinned = - o2hb_debug_create(O2HB_DEBUG_REGION_PINNED, - reg->hr_debug_dir, - &(reg->hr_db_pinned), - sizeof(*(reg->hr_db_pinned)), - O2HB_DB_TYPE_REGION_PINNED, - 0, 0, reg); - if (!reg->hr_debug_pinned) { - mlog_errno(ret); - goto bail; - } - ret = 0; bail: return ret; @@ -2070,20 +2002,15 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, { struct task_struct *hb_task; struct o2hb_region *reg = to_o2hb_region(item); - int quorum_region = 0; /* stop the thread when the user removes the region dir */ spin_lock(&o2hb_live_lock); if (o2hb_global_heartbeat_active()) { clear_bit(reg->hr_region_num, o2hb_region_bitmap); clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); - if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) - quorum_region = 1; - clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); } hb_task = reg->hr_task; reg->hr_task = NULL; - reg->hr_item_dropped = 1; spin_unlock(&o2hb_live_lock); if (hb_task) @@ -2101,27 +2028,7 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, if (o2hb_global_heartbeat_active()) printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n", config_item_name(®->hr_item)); - config_item_put(item); - - if (!o2hb_global_heartbeat_active() || !quorum_region) - return; - - /* - * If global heartbeat active and there are dependent users, - * pin all regions if quorum region count <= CUT_OFF - */ - spin_lock(&o2hb_live_lock); - - if (!o2hb_dependent_users) - goto unlock; - - if (o2hb_pop_count(&o2hb_quorum_region_bitmap, - O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF) - o2hb_region_pin(NULL); - -unlock: - spin_unlock(&o2hb_live_lock); } struct o2hb_heartbeat_group_attribute { @@ -2307,138 +2214,63 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc, } EXPORT_SYMBOL_GPL(o2hb_setup_callback); -/* - * In local heartbeat mode, region_uuid passed matches the dlm domain name. - * In global heartbeat mode, region_uuid passed is NULL. - * - * In local, we only pin the matching region. In global we pin all the active - * regions. - */ -static int o2hb_region_pin(const char *region_uuid) +static struct o2hb_region *o2hb_find_region(const char *region_uuid) { - int ret = 0, found = 0; - struct o2hb_region *reg; - char *uuid; + struct o2hb_region *p, *reg = NULL; assert_spin_locked(&o2hb_live_lock); - list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { - uuid = config_item_name(®->hr_item); - - /* local heartbeat */ - if (region_uuid) { - if (strcmp(region_uuid, uuid)) - continue; - found = 1; - } - - if (reg->hr_item_pinned || reg->hr_item_dropped) - goto skip_pin; - - /* Ignore ENOENT only for local hb (userdlm domain) */ - ret = o2nm_depend_item(®->hr_item); - if (!ret) { - mlog(ML_CLUSTER, "Pin region %s\n", uuid); - reg->hr_item_pinned = 1; - } else { - if (ret == -ENOENT && found) - ret = 0; - else { - mlog(ML_ERROR, "Pin region %s fails with %d\n", - uuid, ret); - break; - } - } -skip_pin: - if (found) + list_for_each_entry(p, &o2hb_all_regions, hr_all_item) { + if (!strcmp(region_uuid, config_item_name(&p->hr_item))) { + reg = p; break; - } - - return ret; -} - -/* - * In local heartbeat mode, region_uuid passed matches the dlm domain name. - * In global heartbeat mode, region_uuid passed is NULL. - * - * In local, we only unpin the matching region. In global we unpin all the - * active regions. - */ -static void o2hb_region_unpin(const char *region_uuid) -{ - struct o2hb_region *reg; - char *uuid; - int found = 0; - - assert_spin_locked(&o2hb_live_lock); - - list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { - uuid = config_item_name(®->hr_item); - if (region_uuid) { - if (strcmp(region_uuid, uuid)) - continue; - found = 1; } - - if (reg->hr_item_pinned) { - mlog(ML_CLUSTER, "Unpin region %s\n", uuid); - o2nm_undepend_item(®->hr_item); - reg->hr_item_pinned = 0; - } - if (found) - break; } + + return reg; } -static int o2hb_region_inc_user(const char *region_uuid) +static int o2hb_region_get(const char *region_uuid) { int ret = 0; + struct o2hb_region *reg; spin_lock(&o2hb_live_lock); - /* local heartbeat */ - if (!o2hb_global_heartbeat_active()) { - ret = o2hb_region_pin(region_uuid); - goto unlock; - } + reg = o2hb_find_region(region_uuid); + if (!reg) + ret = -ENOENT; + spin_unlock(&o2hb_live_lock); - /* - * if global heartbeat active and this is the first dependent user, - * pin all regions if quorum region count <= CUT_OFF - */ - o2hb_dependent_users++; - if (o2hb_dependent_users > 1) - goto unlock; + if (ret) + goto out; - if (o2hb_pop_count(&o2hb_quorum_region_bitmap, - O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF) - ret = o2hb_region_pin(NULL); + ret = o2nm_depend_this_node(); + if (ret) + goto out; -unlock: - spin_unlock(&o2hb_live_lock); + ret = o2nm_depend_item(®->hr_item); + if (ret) + o2nm_undepend_this_node(); + +out: return ret; } -void o2hb_region_dec_user(const char *region_uuid) +static void o2hb_region_put(const char *region_uuid) { - spin_lock(&o2hb_live_lock); + struct o2hb_region *reg; - /* local heartbeat */ - if (!o2hb_global_heartbeat_active()) { - o2hb_region_unpin(region_uuid); - goto unlock; - } + spin_lock(&o2hb_live_lock); - /* - * if global heartbeat active and there are no dependent users, - * unpin all quorum regions - */ - o2hb_dependent_users--; - if (!o2hb_dependent_users) - o2hb_region_unpin(NULL); + reg = o2hb_find_region(region_uuid); -unlock: spin_unlock(&o2hb_live_lock); + + if (reg) { + o2nm_undepend_item(®->hr_item); + o2nm_undepend_this_node(); + } } int o2hb_register_callback(const char *region_uuid, @@ -2459,11 +2291,9 @@ int o2hb_register_callback(const char *region_uuid, } if (region_uuid) { - ret = o2hb_region_inc_user(region_uuid); - if (ret) { - mlog_errno(ret); + ret = o2hb_region_get(region_uuid); + if (ret) goto out; - } } down_write(&o2hb_callback_sem); @@ -2481,7 +2311,7 @@ int o2hb_register_callback(const char *region_uuid, up_write(&o2hb_callback_sem); ret = 0; out: - mlog(ML_CLUSTER, "returning %d on behalf of %p for funcs %p\n", + mlog(ML_HEARTBEAT, "returning %d on behalf of %p for funcs %p\n", ret, __builtin_return_address(0), hc); return ret; } @@ -2492,7 +2322,7 @@ void o2hb_unregister_callback(const char *region_uuid, { BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); - mlog(ML_CLUSTER, "on behalf of %p for funcs %p\n", + mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n", __builtin_return_address(0), hc); /* XXX Can this happen _with_ a region reference? */ @@ -2500,7 +2330,7 @@ void o2hb_unregister_callback(const char *region_uuid, return; if (region_uuid) - o2hb_region_dec_user(region_uuid); + o2hb_region_put(region_uuid); down_write(&o2hb_callback_sem); diff --git a/trunk/fs/ocfs2/cluster/netdebug.c b/trunk/fs/ocfs2/cluster/netdebug.c index 3a5835904b3d..a3f150e52b02 100644 --- a/trunk/fs/ocfs2/cluster/netdebug.c +++ b/trunk/fs/ocfs2/cluster/netdebug.c @@ -46,15 +46,10 @@ #define O2NET_DEBUG_DIR "o2net" #define SC_DEBUG_NAME "sock_containers" #define NST_DEBUG_NAME "send_tracking" -#define STATS_DEBUG_NAME "stats" - -#define SHOW_SOCK_CONTAINERS 0 -#define SHOW_SOCK_STATS 1 static struct dentry *o2net_dentry; static struct dentry *sc_dentry; static struct dentry *nst_dentry; -static struct dentry *stats_dentry; static DEFINE_SPINLOCK(o2net_debug_lock); @@ -128,42 +123,37 @@ static void *nst_seq_next(struct seq_file *seq, void *v, loff_t *pos) static int nst_seq_show(struct seq_file *seq, void *v) { struct o2net_send_tracking *nst, *dummy_nst = seq->private; - ktime_t now; - s64 sock, send, status; spin_lock(&o2net_debug_lock); nst = next_nst(dummy_nst); - if (!nst) - goto out; - now = ktime_get(); - sock = ktime_to_us(ktime_sub(now, nst->st_sock_time)); - send = ktime_to_us(ktime_sub(now, nst->st_send_time)); - status = ktime_to_us(ktime_sub(now, nst->st_status_time)); - - /* get_task_comm isn't exported. oh well. */ - seq_printf(seq, "%p:\n" - " pid: %lu\n" - " tgid: %lu\n" - " process name: %s\n" - " node: %u\n" - " sc: %p\n" - " message id: %d\n" - " message type: %u\n" - " message key: 0x%08x\n" - " sock acquiry: %lld usecs ago\n" - " send start: %lld usecs ago\n" - " wait start: %lld usecs ago\n", - nst, (unsigned long)task_pid_nr(nst->st_task), - (unsigned long)nst->st_task->tgid, - nst->st_task->comm, nst->st_node, - nst->st_sc, nst->st_id, nst->st_msg_type, - nst->st_msg_key, - (long long)sock, - (long long)send, - (long long)status); + if (nst != NULL) { + /* get_task_comm isn't exported. oh well. */ + seq_printf(seq, "%p:\n" + " pid: %lu\n" + " tgid: %lu\n" + " process name: %s\n" + " node: %u\n" + " sc: %p\n" + " message id: %d\n" + " message type: %u\n" + " message key: 0x%08x\n" + " sock acquiry: %lu.%ld\n" + " send start: %lu.%ld\n" + " wait start: %lu.%ld\n", + nst, (unsigned long)nst->st_task->pid, + (unsigned long)nst->st_task->tgid, + nst->st_task->comm, nst->st_node, + nst->st_sc, nst->st_id, nst->st_msg_type, + nst->st_msg_key, + nst->st_sock_time.tv_sec, + (long)nst->st_sock_time.tv_usec, + nst->st_send_time.tv_sec, + (long)nst->st_send_time.tv_usec, + nst->st_status_time.tv_sec, + (long)nst->st_status_time.tv_usec); + } -out: spin_unlock(&o2net_debug_lock); return 0; @@ -238,11 +228,6 @@ void o2net_debug_del_sc(struct o2net_sock_container *sc) spin_unlock(&o2net_debug_lock); } -struct o2net_sock_debug { - int dbg_ctxt; - struct o2net_sock_container *dbg_sock; -}; - static struct o2net_sock_container *next_sc(struct o2net_sock_container *sc_start) { @@ -268,8 +253,7 @@ static struct o2net_sock_container static void *sc_seq_start(struct seq_file *seq, loff_t *pos) { - struct o2net_sock_debug *sd = seq->private; - struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock; + struct o2net_sock_container *sc, *dummy_sc = seq->private; spin_lock(&o2net_debug_lock); sc = next_sc(dummy_sc); @@ -280,8 +264,7 @@ static void *sc_seq_start(struct seq_file *seq, loff_t *pos) static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct o2net_sock_debug *sd = seq->private; - struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock; + struct o2net_sock_container *sc, *dummy_sc = seq->private; spin_lock(&o2net_debug_lock); sc = next_sc(dummy_sc); @@ -293,107 +276,65 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) return sc; /* unused, just needs to be null when done */ } -#ifdef CONFIG_OCFS2_FS_STATS -# define sc_send_count(_s) ((_s)->sc_send_count) -# define sc_recv_count(_s) ((_s)->sc_recv_count) -# define sc_tv_acquiry_total_ns(_s) (ktime_to_ns((_s)->sc_tv_acquiry_total)) -# define sc_tv_send_total_ns(_s) (ktime_to_ns((_s)->sc_tv_send_total)) -# define sc_tv_status_total_ns(_s) (ktime_to_ns((_s)->sc_tv_status_total)) -# define sc_tv_process_total_ns(_s) (ktime_to_ns((_s)->sc_tv_process_total)) -#else -# define sc_send_count(_s) (0U) -# define sc_recv_count(_s) (0U) -# define sc_tv_acquiry_total_ns(_s) (0LL) -# define sc_tv_send_total_ns(_s) (0LL) -# define sc_tv_status_total_ns(_s) (0LL) -# define sc_tv_process_total_ns(_s) (0LL) -#endif - -/* So that debugfs.ocfs2 can determine which format is being used */ -#define O2NET_STATS_STR_VERSION 1 -static void sc_show_sock_stats(struct seq_file *seq, - struct o2net_sock_container *sc) -{ - if (!sc) - return; - - seq_printf(seq, "%d,%u,%lu,%lld,%lld,%lld,%lu,%lld\n", O2NET_STATS_STR_VERSION, - sc->sc_node->nd_num, (unsigned long)sc_send_count(sc), - (long long)sc_tv_acquiry_total_ns(sc), - (long long)sc_tv_send_total_ns(sc), - (long long)sc_tv_status_total_ns(sc), - (unsigned long)sc_recv_count(sc), - (long long)sc_tv_process_total_ns(sc)); -} - -static void sc_show_sock_container(struct seq_file *seq, - struct o2net_sock_container *sc) -{ - struct inet_sock *inet = NULL; - __be32 saddr = 0, daddr = 0; - __be16 sport = 0, dport = 0; - - if (!sc) - return; - - if (sc->sc_sock) { - inet = inet_sk(sc->sc_sock->sk); - /* the stack's structs aren't sparse endian clean */ - saddr = (__force __be32)inet->inet_saddr; - daddr = (__force __be32)inet->inet_daddr; - sport = (__force __be16)inet->inet_sport; - dport = (__force __be16)inet->inet_dport; - } - - /* XXX sigh, inet-> doesn't have sparse annotation so any - * use of it here generates a warning with -Wbitwise */ - seq_printf(seq, "%p:\n" - " krefs: %d\n" - " sock: %pI4:%u -> " - "%pI4:%u\n" - " remote node: %s\n" - " page off: %zu\n" - " handshake ok: %u\n" - " timer: %lld usecs\n" - " data ready: %lld usecs\n" - " advance start: %lld usecs\n" - " advance stop: %lld usecs\n" - " func start: %lld usecs\n" - " func stop: %lld usecs\n" - " func key: 0x%08x\n" - " func type: %u\n", - sc, - atomic_read(&sc->sc_kref.refcount), - &saddr, inet ? ntohs(sport) : 0, - &daddr, inet ? ntohs(dport) : 0, - sc->sc_node->nd_name, - sc->sc_page_off, - sc->sc_handshake_ok, - (long long)ktime_to_us(sc->sc_tv_timer), - (long long)ktime_to_us(sc->sc_tv_data_ready), - (long long)ktime_to_us(sc->sc_tv_advance_start), - (long long)ktime_to_us(sc->sc_tv_advance_stop), - (long long)ktime_to_us(sc->sc_tv_func_start), - (long long)ktime_to_us(sc->sc_tv_func_stop), - sc->sc_msg_key, - sc->sc_msg_type); -} +#define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec static int sc_seq_show(struct seq_file *seq, void *v) { - struct o2net_sock_debug *sd = seq->private; - struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock; + struct o2net_sock_container *sc, *dummy_sc = seq->private; spin_lock(&o2net_debug_lock); sc = next_sc(dummy_sc); - if (sc) { - if (sd->dbg_ctxt == SHOW_SOCK_CONTAINERS) - sc_show_sock_container(seq, sc); - else - sc_show_sock_stats(seq, sc); + if (sc != NULL) { + struct inet_sock *inet = NULL; + + __be32 saddr = 0, daddr = 0; + __be16 sport = 0, dport = 0; + + if (sc->sc_sock) { + inet = inet_sk(sc->sc_sock->sk); + /* the stack's structs aren't sparse endian clean */ + saddr = (__force __be32)inet->inet_saddr; + daddr = (__force __be32)inet->inet_daddr; + sport = (__force __be16)inet->inet_sport; + dport = (__force __be16)inet->inet_dport; + } + + /* XXX sigh, inet-> doesn't have sparse annotation so any + * use of it here generates a warning with -Wbitwise */ + seq_printf(seq, "%p:\n" + " krefs: %d\n" + " sock: %pI4:%u -> " + "%pI4:%u\n" + " remote node: %s\n" + " page off: %zu\n" + " handshake ok: %u\n" + " timer: %lu.%ld\n" + " data ready: %lu.%ld\n" + " advance start: %lu.%ld\n" + " advance stop: %lu.%ld\n" + " func start: %lu.%ld\n" + " func stop: %lu.%ld\n" + " func key: %u\n" + " func type: %u\n", + sc, + atomic_read(&sc->sc_kref.refcount), + &saddr, inet ? ntohs(sport) : 0, + &daddr, inet ? ntohs(dport) : 0, + sc->sc_node->nd_name, + sc->sc_page_off, + sc->sc_handshake_ok, + TV_SEC_USEC(sc->sc_tv_timer), + TV_SEC_USEC(sc->sc_tv_data_ready), + TV_SEC_USEC(sc->sc_tv_advance_start), + TV_SEC_USEC(sc->sc_tv_advance_stop), + TV_SEC_USEC(sc->sc_tv_func_start), + TV_SEC_USEC(sc->sc_tv_func_stop), + sc->sc_msg_key, + sc->sc_msg_type); } + spin_unlock(&o2net_debug_lock); return 0; @@ -410,7 +351,7 @@ static const struct seq_operations sc_seq_ops = { .show = sc_seq_show, }; -static int sc_common_open(struct file *file, struct o2net_sock_debug *sd) +static int sc_fop_open(struct inode *inode, struct file *file) { struct o2net_sock_container *dummy_sc; struct seq_file *seq; @@ -428,8 +369,7 @@ static int sc_common_open(struct file *file, struct o2net_sock_debug *sd) goto out; seq = file->private_data; - seq->private = sd; - sd->dbg_sock = dummy_sc; + seq->private = dummy_sc; o2net_debug_add_sc(dummy_sc); dummy_sc = NULL; @@ -442,48 +382,12 @@ static int sc_common_open(struct file *file, struct o2net_sock_debug *sd) static int sc_fop_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; - struct o2net_sock_debug *sd = seq->private; - struct o2net_sock_container *dummy_sc = sd->dbg_sock; + struct o2net_sock_container *dummy_sc = seq->private; o2net_debug_del_sc(dummy_sc); return seq_release_private(inode, file); } -static int stats_fop_open(struct inode *inode, struct file *file) -{ - struct o2net_sock_debug *sd; - - sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL); - if (sd == NULL) - return -ENOMEM; - - sd->dbg_ctxt = SHOW_SOCK_STATS; - sd->dbg_sock = NULL; - - return sc_common_open(file, sd); -} - -static const struct file_operations stats_seq_fops = { - .open = stats_fop_open, - .read = seq_read, - .llseek = seq_lseek, - .release = sc_fop_release, -}; - -static int sc_fop_open(struct inode *inode, struct file *file) -{ - struct o2net_sock_debug *sd; - - sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL); - if (sd == NULL) - return -ENOMEM; - - sd->dbg_ctxt = SHOW_SOCK_CONTAINERS; - sd->dbg_sock = NULL; - - return sc_common_open(file, sd); -} - static const struct file_operations sc_seq_fops = { .open = sc_fop_open, .read = seq_read, @@ -515,29 +419,25 @@ int o2net_debugfs_init(void) goto bail; } - stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, S_IFREG|S_IRUSR, - o2net_dentry, NULL, - &stats_seq_fops); - if (!stats_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } - return 0; bail: - debugfs_remove(stats_dentry); - debugfs_remove(sc_dentry); - debugfs_remove(nst_dentry); - debugfs_remove(o2net_dentry); + if (sc_dentry) + debugfs_remove(sc_dentry); + if (nst_dentry) + debugfs_remove(nst_dentry); + if (o2net_dentry) + debugfs_remove(o2net_dentry); return -ENOMEM; } void o2net_debugfs_exit(void) { - debugfs_remove(stats_dentry); - debugfs_remove(sc_dentry); - debugfs_remove(nst_dentry); - debugfs_remove(o2net_dentry); + if (sc_dentry) + debugfs_remove(sc_dentry); + if (nst_dentry) + debugfs_remove(nst_dentry); + if (o2net_dentry) + debugfs_remove(o2net_dentry); } #endif /* CONFIG_DEBUG_FS */ diff --git a/trunk/fs/ocfs2/cluster/tcp.c b/trunk/fs/ocfs2/cluster/tcp.c index 3b11cb1e38fc..9aa426e42123 100644 --- a/trunk/fs/ocfs2/cluster/tcp.c +++ b/trunk/fs/ocfs2/cluster/tcp.c @@ -153,113 +153,62 @@ static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, nst->st_node = node; } -static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) +static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) { - nst->st_sock_time = ktime_get(); + do_gettimeofday(&nst->st_sock_time); } -static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) +static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) { - nst->st_send_time = ktime_get(); + do_gettimeofday(&nst->st_send_time); } -static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) +static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) { - nst->st_status_time = ktime_get(); + do_gettimeofday(&nst->st_status_time); } -static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, - struct o2net_sock_container *sc) +static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, + struct o2net_sock_container *sc) { nst->st_sc = sc; } -static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, - u32 msg_id) +static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) { nst->st_id = msg_id; } -static inline void o2net_set_sock_timer(struct o2net_sock_container *sc) -{ - sc->sc_tv_timer = ktime_get(); -} - -static inline void o2net_set_data_ready_time(struct o2net_sock_container *sc) -{ - sc->sc_tv_data_ready = ktime_get(); -} +#else /* CONFIG_DEBUG_FS */ -static inline void o2net_set_advance_start_time(struct o2net_sock_container *sc) +static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, + u32 msgkey, struct task_struct *task, u8 node) { - sc->sc_tv_advance_start = ktime_get(); } -static inline void o2net_set_advance_stop_time(struct o2net_sock_container *sc) +static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) { - sc->sc_tv_advance_stop = ktime_get(); } -static inline void o2net_set_func_start_time(struct o2net_sock_container *sc) +static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) { - sc->sc_tv_func_start = ktime_get(); } -static inline void o2net_set_func_stop_time(struct o2net_sock_container *sc) +static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) { - sc->sc_tv_func_stop = ktime_get(); } -static ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc) -{ - return ktime_sub(sc->sc_tv_func_stop, sc->sc_tv_func_start); -} -#else /* CONFIG_DEBUG_FS */ -# define o2net_init_nst(a, b, c, d, e) -# define o2net_set_nst_sock_time(a) -# define o2net_set_nst_send_time(a) -# define o2net_set_nst_status_time(a) -# define o2net_set_nst_sock_container(a, b) -# define o2net_set_nst_msg_id(a, b) -# define o2net_set_sock_timer(a) -# define o2net_set_data_ready_time(a) -# define o2net_set_advance_start_time(a) -# define o2net_set_advance_stop_time(a) -# define o2net_set_func_start_time(a) -# define o2net_set_func_stop_time(a) -# define o2net_get_func_run_time(a) (ktime_t)0 -#endif /* CONFIG_DEBUG_FS */ - -#ifdef CONFIG_OCFS2_FS_STATS -static void o2net_update_send_stats(struct o2net_send_tracking *nst, - struct o2net_sock_container *sc) +static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, + struct o2net_sock_container *sc) { - sc->sc_tv_status_total = ktime_add(sc->sc_tv_status_total, - ktime_sub(ktime_get(), - nst->st_status_time)); - sc->sc_tv_send_total = ktime_add(sc->sc_tv_send_total, - ktime_sub(nst->st_status_time, - nst->st_send_time)); - sc->sc_tv_acquiry_total = ktime_add(sc->sc_tv_acquiry_total, - ktime_sub(nst->st_send_time, - nst->st_sock_time)); - sc->sc_send_count++; } -static void o2net_update_recv_stats(struct o2net_sock_container *sc) +static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, + u32 msg_id) { - sc->sc_tv_process_total = ktime_add(sc->sc_tv_process_total, - o2net_get_func_run_time(sc)); - sc->sc_recv_count++; } -#else - -# define o2net_update_send_stats(a, b) - -# define o2net_update_recv_stats(sc) - -#endif /* CONFIG_OCFS2_FS_STATS */ +#endif /* CONFIG_DEBUG_FS */ static inline int o2net_reconnect_delay(void) { @@ -406,7 +355,6 @@ static void sc_kref_release(struct kref *kref) sc->sc_sock = NULL; } - o2nm_undepend_item(&sc->sc_node->nd_item); o2nm_node_put(sc->sc_node); sc->sc_node = NULL; @@ -428,7 +376,6 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node) { struct o2net_sock_container *sc, *ret = NULL; struct page *page = NULL; - int status = 0; page = alloc_page(GFP_NOFS); sc = kzalloc(sizeof(*sc), GFP_NOFS); @@ -439,13 +386,6 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node) o2nm_node_get(node); sc->sc_node = node; - /* pin the node item of the remote node */ - status = o2nm_depend_item(&node->nd_item); - if (status) { - mlog_errno(status); - o2nm_node_put(node); - goto out; - } INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed); INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty); INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc); @@ -606,7 +546,7 @@ static void o2net_data_ready(struct sock *sk, int bytes) if (sk->sk_user_data) { struct o2net_sock_container *sc = sk->sk_user_data; sclog(sc, "data_ready hit\n"); - o2net_set_data_ready_time(sc); + do_gettimeofday(&sc->sc_tv_data_ready); o2net_sc_queue_work(sc, &sc->sc_rx_work); ready = sc->sc_data_ready; } else { @@ -1130,8 +1070,6 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, o2net_set_nst_status_time(&nst); wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw)); - o2net_update_send_stats(&nst, sc); - /* Note that we avoid overwriting the callers status return * variable if a system error was reported on the other * side. Callers beware. */ @@ -1245,15 +1183,13 @@ static int o2net_process_message(struct o2net_sock_container *sc, if (syserr != O2NET_ERR_NONE) goto out_respond; - o2net_set_func_start_time(sc); + do_gettimeofday(&sc->sc_tv_func_start); sc->sc_msg_key = be32_to_cpu(hdr->key); sc->sc_msg_type = be16_to_cpu(hdr->msg_type); handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) + be16_to_cpu(hdr->data_len), nmh->nh_func_data, &ret_data); - o2net_set_func_stop_time(sc); - - o2net_update_recv_stats(sc); + do_gettimeofday(&sc->sc_tv_func_stop); out_respond: /* this destroys the hdr, so don't use it after this */ @@ -1364,7 +1300,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc) size_t datalen; sclog(sc, "receiving\n"); - o2net_set_advance_start_time(sc); + do_gettimeofday(&sc->sc_tv_advance_start); if (unlikely(sc->sc_handshake_ok == 0)) { if(sc->sc_page_off < sizeof(struct o2net_handshake)) { @@ -1439,7 +1375,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc) out: sclog(sc, "ret = %d\n", ret); - o2net_set_advance_stop_time(sc); + do_gettimeofday(&sc->sc_tv_advance_stop); return ret; } @@ -1539,28 +1475,27 @@ static void o2net_idle_timer(unsigned long data) { struct o2net_sock_container *sc = (struct o2net_sock_container *)data; struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); + struct timeval now; -#ifdef CONFIG_DEBUG_FS - ktime_t now = ktime_get(); -#endif + do_gettimeofday(&now); printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), o2net_idle_timeout() / 1000, o2net_idle_timeout() % 1000); - -#ifdef CONFIG_DEBUG_FS - mlog(ML_NOTICE, "Here are some times that might help debug the " - "situation: (Timer: %lld, Now %lld, DataReady %lld, Advance %lld-%lld, " - "Key 0x%08x, Func %u, FuncTime %lld-%lld)\n", - (long long)ktime_to_us(sc->sc_tv_timer), (long long)ktime_to_us(now), - (long long)ktime_to_us(sc->sc_tv_data_ready), - (long long)ktime_to_us(sc->sc_tv_advance_start), - (long long)ktime_to_us(sc->sc_tv_advance_stop), + mlog(ML_NOTICE, "here are some times that might help debug the " + "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " + "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", + sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec, + now.tv_sec, (long) now.tv_usec, + sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec, + sc->sc_tv_advance_start.tv_sec, + (long) sc->sc_tv_advance_start.tv_usec, + sc->sc_tv_advance_stop.tv_sec, + (long) sc->sc_tv_advance_stop.tv_usec, sc->sc_msg_key, sc->sc_msg_type, - (long long)ktime_to_us(sc->sc_tv_func_start), - (long long)ktime_to_us(sc->sc_tv_func_stop)); -#endif + sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, + sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); /* * Initialize the nn_timeout so that the next connection attempt @@ -1576,7 +1511,7 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, msecs_to_jiffies(o2net_keepalive_delay())); - o2net_set_sock_timer(sc); + do_gettimeofday(&sc->sc_tv_timer); mod_timer(&sc->sc_idle_timeout, jiffies + msecs_to_jiffies(o2net_idle_timeout())); } diff --git a/trunk/fs/ocfs2/cluster/tcp_internal.h b/trunk/fs/ocfs2/cluster/tcp_internal.h index 4cbcb65784a3..15fdbdf9eb4b 100644 --- a/trunk/fs/ocfs2/cluster/tcp_internal.h +++ b/trunk/fs/ocfs2/cluster/tcp_internal.h @@ -166,27 +166,18 @@ struct o2net_sock_container { /* original handlers for the sockets */ void (*sc_state_change)(struct sock *sk); void (*sc_data_ready)(struct sock *sk, int bytes); - - u32 sc_msg_key; - u16 sc_msg_type; - #ifdef CONFIG_DEBUG_FS struct list_head sc_net_debug_item; - ktime_t sc_tv_timer; - ktime_t sc_tv_data_ready; - ktime_t sc_tv_advance_start; - ktime_t sc_tv_advance_stop; - ktime_t sc_tv_func_start; - ktime_t sc_tv_func_stop; -#endif -#ifdef CONFIG_OCFS2_FS_STATS - ktime_t sc_tv_acquiry_total; - ktime_t sc_tv_send_total; - ktime_t sc_tv_status_total; - u32 sc_send_count; - u32 sc_recv_count; - ktime_t sc_tv_process_total; #endif + struct timeval sc_tv_timer; + struct timeval sc_tv_data_ready; + struct timeval sc_tv_advance_start; + struct timeval sc_tv_advance_stop; + struct timeval sc_tv_func_start; + struct timeval sc_tv_func_stop; + u32 sc_msg_key; + u16 sc_msg_type; + struct mutex sc_send_lock; }; @@ -229,9 +220,9 @@ struct o2net_send_tracking { u32 st_msg_type; u32 st_msg_key; u8 st_node; - ktime_t st_sock_time; - ktime_t st_send_time; - ktime_t st_status_time; + struct timeval st_sock_time; + struct timeval st_send_time; + struct timeval st_status_time; }; #else struct o2net_send_tracking { diff --git a/trunk/fs/ocfs2/dlm/dlmast.c b/trunk/fs/ocfs2/dlm/dlmast.c index 3a3ed4bb794b..f44999156839 100644 --- a/trunk/fs/ocfs2/dlm/dlmast.c +++ b/trunk/fs/ocfs2/dlm/dlmast.c @@ -90,29 +90,19 @@ static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock) void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) { - struct dlm_lock_resource *res; + mlog_entry_void(); BUG_ON(!dlm); BUG_ON(!lock); - res = lock->lockres; - assert_spin_locked(&dlm->ast_lock); - if (!list_empty(&lock->ast_list)) { - mlog(ML_ERROR, "%s: res %.*s, lock %u:%llu, " - "AST list not empty, pending %d, newlevel %d\n", - dlm->name, res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), + mlog(ML_ERROR, "ast list not empty!! pending=%d, newlevel=%d\n", lock->ast_pending, lock->ml.type); BUG(); } if (lock->ast_pending) - mlog(0, "%s: res %.*s, lock %u:%llu, AST getting flushed\n", - dlm->name, res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie))); + mlog(0, "lock has an ast getting flushed right now\n"); /* putting lock on list, add a ref */ dlm_lock_get(lock); @@ -120,10 +110,9 @@ void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) /* check to see if this ast obsoletes the bast */ if (dlm_should_cancel_bast(dlm, lock)) { - mlog(0, "%s: res %.*s, lock %u:%llu, Cancelling BAST\n", - dlm->name, res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie))); + struct dlm_lock_resource *res = lock->lockres; + mlog(0, "%s: cancelling bast for %.*s\n", + dlm->name, res->lockname.len, res->lockname.name); lock->bast_pending = 0; list_del_init(&lock->bast_list); lock->ml.highest_blocked = LKM_IVMODE; @@ -145,6 +134,8 @@ void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) { + mlog_entry_void(); + BUG_ON(!dlm); BUG_ON(!lock); @@ -156,21 +147,15 @@ void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock) { - struct dlm_lock_resource *res; + mlog_entry_void(); BUG_ON(!dlm); BUG_ON(!lock); - assert_spin_locked(&dlm->ast_lock); - res = lock->lockres; - BUG_ON(!list_empty(&lock->bast_list)); if (lock->bast_pending) - mlog(0, "%s: res %.*s, lock %u:%llu, BAST getting flushed\n", - dlm->name, res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie))); + mlog(0, "lock has a bast getting flushed right now\n"); /* putting lock on list, add a ref */ dlm_lock_get(lock); @@ -182,6 +167,8 @@ void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock) void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock) { + mlog_entry_void(); + BUG_ON(!dlm); BUG_ON(!lock); @@ -226,10 +213,7 @@ void dlm_do_local_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, dlm_astlockfunc_t *fn; struct dlm_lockstatus *lksb; - mlog(0, "%s: res %.*s, lock %u:%llu, Local AST\n", dlm->name, - res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie))); + mlog_entry_void(); lksb = lock->lksb; fn = lock->ast; @@ -247,10 +231,7 @@ int dlm_do_remote_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, struct dlm_lockstatus *lksb; int lksbflags; - mlog(0, "%s: res %.*s, lock %u:%llu, Remote AST\n", dlm->name, - res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie))); + mlog_entry_void(); lksb = lock->lksb; BUG_ON(lock->ml.node == dlm->node_num); @@ -269,14 +250,9 @@ void dlm_do_local_bast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, { dlm_bastlockfunc_t *fn = lock->bast; + mlog_entry_void(); BUG_ON(lock->ml.node != dlm->node_num); - mlog(0, "%s: res %.*s, lock %u:%llu, Local BAST, blocked %d\n", - dlm->name, res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - blocked_type); - (*fn)(lock->astdata, blocked_type); } @@ -356,8 +332,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, /* cannot get a proxy ast message if this node owns it */ BUG_ON(res->owner == dlm->node_num); - mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len, - res->lockname.name); + mlog(0, "lockres %.*s\n", res->lockname.len, res->lockname.name); spin_lock(&res->spinlock); if (res->state & DLM_LOCK_RES_RECOVERING) { @@ -407,12 +382,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, if (past->type == DLM_AST) { /* do not alter lock refcount. switching lists. */ list_move_tail(&lock->list, &res->granted); - mlog(0, "%s: res %.*s, lock %u:%llu, Granted type %d => %d\n", - dlm->name, res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), - lock->ml.type, lock->ml.convert_type); - + mlog(0, "ast: Adding to granted list... type=%d, " + "convert_type=%d\n", lock->ml.type, lock->ml.convert_type); if (lock->ml.convert_type != LKM_IVMODE) { lock->ml.type = lock->ml.convert_type; lock->ml.convert_type = LKM_IVMODE; @@ -455,9 +426,9 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, size_t veclen = 1; int status; - mlog(0, "%s: res %.*s, to %u, type %d, blocked_type %d\n", dlm->name, - res->lockname.len, res->lockname.name, lock->ml.node, msg_type, - blocked_type); + mlog_entry("res %.*s, to=%u, type=%d, blocked_type=%d\n", + res->lockname.len, res->lockname.name, lock->ml.node, + msg_type, blocked_type); memset(&past, 0, sizeof(struct dlm_proxy_ast)); past.node_idx = dlm->node_num; @@ -470,6 +441,7 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, vec[0].iov_len = sizeof(struct dlm_proxy_ast); vec[0].iov_base = &past; if (flags & DLM_LKSB_GET_LVB) { + mlog(0, "returning requested LVB data\n"); be32_add_cpu(&past.flags, LKM_GET_LVB); vec[1].iov_len = DLM_LVB_LEN; vec[1].iov_base = lock->lksb->lvb; @@ -479,8 +451,8 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen, lock->ml.node, &status); if (ret < 0) - mlog(ML_ERROR, "%s: res %.*s, error %d send AST to node %u\n", - dlm->name, res->lockname.len, res->lockname.name, ret, + mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " + "node %u\n", ret, DLM_PROXY_AST_MSG, dlm->key, lock->ml.node); else { if (status == DLM_RECOVERING) { diff --git a/trunk/fs/ocfs2/dlm/dlmcommon.h b/trunk/fs/ocfs2/dlm/dlmcommon.h index 4bdf7baee344..b36d0bf77a5a 100644 --- a/trunk/fs/ocfs2/dlm/dlmcommon.h +++ b/trunk/fs/ocfs2/dlm/dlmcommon.h @@ -50,10 +50,10 @@ #define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l) enum dlm_mle_type { - DLM_MLE_BLOCK = 0, - DLM_MLE_MASTER = 1, - DLM_MLE_MIGRATION = 2, - DLM_MLE_NUM_TYPES = 3, + DLM_MLE_BLOCK, + DLM_MLE_MASTER, + DLM_MLE_MIGRATION, + DLM_MLE_NUM_TYPES }; struct dlm_master_list_entry { @@ -82,8 +82,8 @@ struct dlm_master_list_entry { enum dlm_ast_type { DLM_AST = 0, - DLM_BAST = 1, - DLM_ASTUNLOCK = 2, + DLM_BAST, + DLM_ASTUNLOCK }; @@ -119,9 +119,9 @@ struct dlm_recovery_ctxt enum dlm_ctxt_state { DLM_CTXT_NEW = 0, - DLM_CTXT_JOINED = 1, - DLM_CTXT_IN_SHUTDOWN = 2, - DLM_CTXT_LEAVING = 3, + DLM_CTXT_JOINED, + DLM_CTXT_IN_SHUTDOWN, + DLM_CTXT_LEAVING, }; struct dlm_ctxt @@ -388,8 +388,8 @@ struct dlm_lock enum dlm_lockres_list { DLM_GRANTED_LIST = 0, - DLM_CONVERTING_LIST = 1, - DLM_BLOCKED_LIST = 2, + DLM_CONVERTING_LIST, + DLM_BLOCKED_LIST }; static inline int dlm_lvb_is_empty(char *lvb) @@ -427,27 +427,27 @@ struct dlm_node_iter enum { - DLM_MASTER_REQUEST_MSG = 500, - DLM_UNUSED_MSG1 = 501, - DLM_ASSERT_MASTER_MSG = 502, - DLM_CREATE_LOCK_MSG = 503, - DLM_CONVERT_LOCK_MSG = 504, - DLM_PROXY_AST_MSG = 505, - DLM_UNLOCK_LOCK_MSG = 506, - DLM_DEREF_LOCKRES_MSG = 507, - DLM_MIGRATE_REQUEST_MSG = 508, - DLM_MIG_LOCKRES_MSG = 509, - DLM_QUERY_JOIN_MSG = 510, - DLM_ASSERT_JOINED_MSG = 511, - DLM_CANCEL_JOIN_MSG = 512, - DLM_EXIT_DOMAIN_MSG = 513, - DLM_MASTER_REQUERY_MSG = 514, - DLM_LOCK_REQUEST_MSG = 515, - DLM_RECO_DATA_DONE_MSG = 516, - DLM_BEGIN_RECO_MSG = 517, - DLM_FINALIZE_RECO_MSG = 518, - DLM_QUERY_REGION = 519, - DLM_QUERY_NODEINFO = 520, + DLM_MASTER_REQUEST_MSG = 500, + DLM_UNUSED_MSG1, /* 501 */ + DLM_ASSERT_MASTER_MSG, /* 502 */ + DLM_CREATE_LOCK_MSG, /* 503 */ + DLM_CONVERT_LOCK_MSG, /* 504 */ + DLM_PROXY_AST_MSG, /* 505 */ + DLM_UNLOCK_LOCK_MSG, /* 506 */ + DLM_DEREF_LOCKRES_MSG, /* 507 */ + DLM_MIGRATE_REQUEST_MSG, /* 508 */ + DLM_MIG_LOCKRES_MSG, /* 509 */ + DLM_QUERY_JOIN_MSG, /* 510 */ + DLM_ASSERT_JOINED_MSG, /* 511 */ + DLM_CANCEL_JOIN_MSG, /* 512 */ + DLM_EXIT_DOMAIN_MSG, /* 513 */ + DLM_MASTER_REQUERY_MSG, /* 514 */ + DLM_LOCK_REQUEST_MSG, /* 515 */ + DLM_RECO_DATA_DONE_MSG, /* 516 */ + DLM_BEGIN_RECO_MSG, /* 517 */ + DLM_FINALIZE_RECO_MSG, /* 518 */ + DLM_QUERY_REGION, /* 519 */ + DLM_QUERY_NODEINFO, /* 520 */ }; struct dlm_reco_node_data @@ -460,19 +460,19 @@ struct dlm_reco_node_data enum { DLM_RECO_NODE_DATA_DEAD = -1, DLM_RECO_NODE_DATA_INIT = 0, - DLM_RECO_NODE_DATA_REQUESTING = 1, - DLM_RECO_NODE_DATA_REQUESTED = 2, - DLM_RECO_NODE_DATA_RECEIVING = 3, - DLM_RECO_NODE_DATA_DONE = 4, - DLM_RECO_NODE_DATA_FINALIZE_SENT = 5, + DLM_RECO_NODE_DATA_REQUESTING, + DLM_RECO_NODE_DATA_REQUESTED, + DLM_RECO_NODE_DATA_RECEIVING, + DLM_RECO_NODE_DATA_DONE, + DLM_RECO_NODE_DATA_FINALIZE_SENT, }; enum { DLM_MASTER_RESP_NO = 0, - DLM_MASTER_RESP_YES = 1, - DLM_MASTER_RESP_MAYBE = 2, - DLM_MASTER_RESP_ERROR = 3, + DLM_MASTER_RESP_YES, + DLM_MASTER_RESP_MAYBE, + DLM_MASTER_RESP_ERROR }; @@ -649,9 +649,9 @@ struct dlm_proxy_ast #define DLM_MOD_KEY (0x666c6172) enum dlm_query_join_response_code { JOIN_DISALLOW = 0, - JOIN_OK = 1, - JOIN_OK_NO_MAP = 2, - JOIN_PROTOCOL_MISMATCH = 3, + JOIN_OK, + JOIN_OK_NO_MAP, + JOIN_PROTOCOL_MISMATCH, }; struct dlm_query_join_packet { diff --git a/trunk/fs/ocfs2/dlm/dlmdebug.c b/trunk/fs/ocfs2/dlm/dlmdebug.c index 04a32be0aeb9..272ec8631a51 100644 --- a/trunk/fs/ocfs2/dlm/dlmdebug.c +++ b/trunk/fs/ocfs2/dlm/dlmdebug.c @@ -370,46 +370,92 @@ static void dlm_debug_get(struct dlm_debug_ctxt *dc) kref_get(&dc->debug_refcnt); } -static int debug_release(struct inode *inode, struct file *file) +static struct debug_buffer *debug_buffer_allocate(void) { - free_page((unsigned long)file->private_data); - return 0; + struct debug_buffer *db = NULL; + + db = kzalloc(sizeof(struct debug_buffer), GFP_KERNEL); + if (!db) + goto bail; + + db->len = PAGE_SIZE; + db->buf = kmalloc(db->len, GFP_KERNEL); + if (!db->buf) + goto bail; + + return db; +bail: + kfree(db); + return NULL; +} + +static ssize_t debug_buffer_read(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) +{ + struct debug_buffer *db = file->private_data; + + return simple_read_from_buffer(buf, nbytes, ppos, db->buf, db->len); } -static ssize_t debug_read(struct file *file, char __user *buf, - size_t nbytes, loff_t *ppos) +static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence) { - return simple_read_from_buffer(buf, nbytes, ppos, file->private_data, - i_size_read(file->f_mapping->host)); + struct debug_buffer *db = file->private_data; + loff_t new = -1; + + switch (whence) { + case 0: + new = off; + break; + case 1: + new = file->f_pos + off; + break; + } + + if (new < 0 || new > db->len) + return -EINVAL; + + return (file->f_pos = new); +} + +static int debug_buffer_release(struct inode *inode, struct file *file) +{ + struct debug_buffer *db = file->private_data; + + if (db) + kfree(db->buf); + kfree(db); + + return 0; } /* end - util funcs */ /* begin - purge list funcs */ -static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len) +static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db) { struct dlm_lock_resource *res; int out = 0; unsigned long total = 0; - out += snprintf(buf + out, len - out, + out += snprintf(db->buf + out, db->len - out, "Dumping Purgelist for Domain: %s\n", dlm->name); spin_lock(&dlm->spinlock); list_for_each_entry(res, &dlm->purge_list, purge) { ++total; - if (len - out < 100) + if (db->len - out < 100) continue; spin_lock(&res->spinlock); out += stringify_lockname(res->lockname.name, res->lockname.len, - buf + out, len - out); - out += snprintf(buf + out, len - out, "\t%ld\n", + db->buf + out, db->len - out); + out += snprintf(db->buf + out, db->len - out, "\t%ld\n", (jiffies - res->last_used)/HZ); spin_unlock(&res->spinlock); } spin_unlock(&dlm->spinlock); - out += snprintf(buf + out, len - out, "Total on list: %ld\n", total); + out += snprintf(db->buf + out, db->len - out, + "Total on list: %ld\n", total); return out; } @@ -417,15 +463,15 @@ static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len) static int debug_purgelist_open(struct inode *inode, struct file *file) { struct dlm_ctxt *dlm = inode->i_private; - char *buf = NULL; + struct debug_buffer *db; - buf = (char *) get_zeroed_page(GFP_NOFS); - if (!buf) + db = debug_buffer_allocate(); + if (!db) goto bail; - i_size_write(inode, debug_purgelist_print(dlm, buf, PAGE_SIZE - 1)); + db->len = debug_purgelist_print(dlm, db); - file->private_data = buf; + file->private_data = db; return 0; bail: @@ -434,14 +480,14 @@ static int debug_purgelist_open(struct inode *inode, struct file *file) static const struct file_operations debug_purgelist_fops = { .open = debug_purgelist_open, - .release = debug_release, - .read = debug_read, - .llseek = generic_file_llseek, + .release = debug_buffer_release, + .read = debug_buffer_read, + .llseek = debug_buffer_llseek, }; /* end - purge list funcs */ /* begin - debug mle funcs */ -static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) +static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) { struct dlm_master_list_entry *mle; struct hlist_head *bucket; @@ -449,7 +495,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) int i, out = 0; unsigned long total = 0, longest = 0, bucket_count = 0; - out += snprintf(buf + out, len - out, + out += snprintf(db->buf + out, db->len - out, "Dumping MLEs for Domain: %s\n", dlm->name); spin_lock(&dlm->master_lock); @@ -460,16 +506,16 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) master_hash_node); ++total; ++bucket_count; - if (len - out < 200) + if (db->len - out < 200) continue; - out += dump_mle(mle, buf + out, len - out); + out += dump_mle(mle, db->buf + out, db->len - out); } longest = max(longest, bucket_count); bucket_count = 0; } spin_unlock(&dlm->master_lock); - out += snprintf(buf + out, len - out, + out += snprintf(db->buf + out, db->len - out, "Total: %ld, Longest: %ld\n", total, longest); return out; } @@ -477,15 +523,15 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) static int debug_mle_open(struct inode *inode, struct file *file) { struct dlm_ctxt *dlm = inode->i_private; - char *buf = NULL; + struct debug_buffer *db; - buf = (char *) get_zeroed_page(GFP_NOFS); - if (!buf) + db = debug_buffer_allocate(); + if (!db) goto bail; - i_size_write(inode, debug_mle_print(dlm, buf, PAGE_SIZE - 1)); + db->len = debug_mle_print(dlm, db); - file->private_data = buf; + file->private_data = db; return 0; bail: @@ -494,9 +540,9 @@ static int debug_mle_open(struct inode *inode, struct file *file) static const struct file_operations debug_mle_fops = { .open = debug_mle_open, - .release = debug_release, - .read = debug_read, - .llseek = generic_file_llseek, + .release = debug_buffer_release, + .read = debug_buffer_read, + .llseek = debug_buffer_llseek, }; /* end - debug mle funcs */ @@ -711,7 +757,7 @@ static const struct file_operations debug_lockres_fops = { /* end - debug lockres funcs */ /* begin - debug state funcs */ -static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) +static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) { int out = 0; struct dlm_reco_node_data *node; @@ -735,35 +781,35 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) } /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ - out += snprintf(buf + out, len - out, + out += snprintf(db->buf + out, db->len - out, "Domain: %s Key: 0x%08x Protocol: %d.%d\n", dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major, dlm->dlm_locking_proto.pv_minor); /* Thread Pid: xxx Node: xxx State: xxxxx */ - out += snprintf(buf + out, len - out, + out += snprintf(db->buf + out, db->len - out, "Thread Pid: %d Node: %d State: %s\n", - task_pid_nr(dlm->dlm_thread_task), dlm->node_num, state); + dlm->dlm_thread_task->pid, dlm->node_num, state); /* Number of Joins: xxx Joining Node: xxx */ - out += snprintf(buf + out, len - out, + out += snprintf(db->buf + out, db->len - out, "Number of Joins: %d Joining Node: %d\n", dlm->num_joins, dlm->joining_node); /* Domain Map: xx xx xx */ - out += snprintf(buf + out, len - out, "Domain Map: "); + out += snprintf(db->buf + out, db->len - out, "Domain Map: "); out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES, - buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); + db->buf + out, db->len - out); + out += snprintf(db->buf + out, db->len - out, "\n"); /* Live Map: xx xx xx */ - out += snprintf(buf + out, len - out, "Live Map: "); + out += snprintf(db->buf + out, db->len - out, "Live Map: "); out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, - buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); + db->buf + out, db->len - out); + out += snprintf(db->buf + out, db->len - out, "\n"); /* Lock Resources: xxx (xxx) */ - out += snprintf(buf + out, len - out, + out += snprintf(db->buf + out, db->len - out, "Lock Resources: %d (%d)\n", atomic_read(&dlm->res_cur_count), atomic_read(&dlm->res_tot_count)); @@ -775,29 +821,29 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) cur_mles += atomic_read(&dlm->mle_cur_count[i]); /* MLEs: xxx (xxx) */ - out += snprintf(buf + out, len - out, + out += snprintf(db->buf + out, db->len - out, "MLEs: %d (%d)\n", cur_mles, tot_mles); /* Blocking: xxx (xxx) */ - out += snprintf(buf + out, len - out, + out += snprintf(db->buf + out, db->len - out, " Blocking: %d (%d)\n", atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]), atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK])); /* Mastery: xxx (xxx) */ - out += snprintf(buf + out, len - out, + out += snprintf(db->buf + out, db->len - out, " Mastery: %d (%d)\n", atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]), atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER])); /* Migration: xxx (xxx) */ - out += snprintf(buf + out, len - out, + out += snprintf(db->buf + out, db->len - out, " Migration: %d (%d)\n", atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]), atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION])); /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */ - out += snprintf(buf + out, len - out, + out += snprintf(db->buf + out, db->len - out, "Lists: Dirty=%s Purge=%s PendingASTs=%s " "PendingBASTs=%s\n", (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"), @@ -806,12 +852,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) (list_empty(&dlm->pending_basts) ? "Empty" : "InUse")); /* Purge Count: xxx Refs: xxx */ - out += snprintf(buf + out, len - out, + out += snprintf(db->buf + out, db->len - out, "Purge Count: %d Refs: %d\n", dlm->purge_count, atomic_read(&dlm->dlm_refs.refcount)); /* Dead Node: xxx */ - out += snprintf(buf + out, len - out, + out += snprintf(db->buf + out, db->len - out, "Dead Node: %d\n", dlm->reco.dead_node); /* What about DLM_RECO_STATE_FINALIZE? */ @@ -821,19 +867,19 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) state = "INACTIVE"; /* Recovery Pid: xxxx Master: xxx State: xxxx */ - out += snprintf(buf + out, len - out, + out += snprintf(db->buf + out, db->len - out, "Recovery Pid: %d Master: %d State: %s\n", - task_pid_nr(dlm->dlm_reco_thread_task), + dlm->dlm_reco_thread_task->pid, dlm->reco.new_master, state); /* Recovery Map: xx xx */ - out += snprintf(buf + out, len - out, "Recovery Map: "); + out += snprintf(db->buf + out, db->len - out, "Recovery Map: "); out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES, - buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); + db->buf + out, db->len - out); + out += snprintf(db->buf + out, db->len - out, "\n"); /* Recovery Node State: */ - out += snprintf(buf + out, len - out, "Recovery Node State:\n"); + out += snprintf(db->buf + out, db->len - out, "Recovery Node State:\n"); list_for_each_entry(node, &dlm->reco.node_data, list) { switch (node->state) { case DLM_RECO_NODE_DATA_INIT: @@ -861,7 +907,7 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) state = "BAD"; break; } - out += snprintf(buf + out, len - out, "\t%u - %s\n", + out += snprintf(db->buf + out, db->len - out, "\t%u - %s\n", node->node_num, state); } @@ -873,15 +919,15 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) static int debug_state_open(struct inode *inode, struct file *file) { struct dlm_ctxt *dlm = inode->i_private; - char *buf = NULL; + struct debug_buffer *db = NULL; - buf = (char *) get_zeroed_page(GFP_NOFS); - if (!buf) + db = debug_buffer_allocate(); + if (!db) goto bail; - i_size_write(inode, debug_state_print(dlm, buf, PAGE_SIZE - 1)); + db->len = debug_state_print(dlm, db); - file->private_data = buf; + file->private_data = db; return 0; bail: @@ -890,9 +936,9 @@ static int debug_state_open(struct inode *inode, struct file *file) static const struct file_operations debug_state_fops = { .open = debug_state_open, - .release = debug_release, - .read = debug_read, - .llseek = generic_file_llseek, + .release = debug_buffer_release, + .read = debug_buffer_read, + .llseek = debug_buffer_llseek, }; /* end - debug state funcs */ @@ -956,10 +1002,14 @@ void dlm_debug_shutdown(struct dlm_ctxt *dlm) struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; if (dc) { - debugfs_remove(dc->debug_purgelist_dentry); - debugfs_remove(dc->debug_mle_dentry); - debugfs_remove(dc->debug_lockres_dentry); - debugfs_remove(dc->debug_state_dentry); + if (dc->debug_purgelist_dentry) + debugfs_remove(dc->debug_purgelist_dentry); + if (dc->debug_mle_dentry) + debugfs_remove(dc->debug_mle_dentry); + if (dc->debug_lockres_dentry) + debugfs_remove(dc->debug_lockres_dentry); + if (dc->debug_state_dentry) + debugfs_remove(dc->debug_state_dentry); dlm_debug_put(dc); } } @@ -990,7 +1040,8 @@ int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) { - debugfs_remove(dlm->dlm_debugfs_subroot); + if (dlm->dlm_debugfs_subroot) + debugfs_remove(dlm->dlm_debugfs_subroot); } /* debugfs root */ @@ -1006,6 +1057,7 @@ int dlm_create_debugfs_root(void) void dlm_destroy_debugfs_root(void) { - debugfs_remove(dlm_debugfs_root); + if (dlm_debugfs_root) + debugfs_remove(dlm_debugfs_root); } #endif /* CONFIG_DEBUG_FS */ diff --git a/trunk/fs/ocfs2/dlm/dlmdebug.h b/trunk/fs/ocfs2/dlm/dlmdebug.h index 1f27c4812d1a..8c686d22f9c7 100644 --- a/trunk/fs/ocfs2/dlm/dlmdebug.h +++ b/trunk/fs/ocfs2/dlm/dlmdebug.h @@ -37,6 +37,11 @@ struct dlm_debug_ctxt { struct dentry *debug_purgelist_dentry; }; +struct debug_buffer { + int len; + char *buf; +}; + struct debug_lockres { int dl_len; char *dl_buf; diff --git a/trunk/fs/ocfs2/dlm/dlmdomain.c b/trunk/fs/ocfs2/dlm/dlmdomain.c index 7e38a072d720..cc2aaa96cfe5 100644 --- a/trunk/fs/ocfs2/dlm/dlmdomain.c +++ b/trunk/fs/ocfs2/dlm/dlmdomain.c @@ -460,6 +460,8 @@ static int dlm_migrate_all_locks(struct dlm_ctxt *dlm) } cond_resched_lock(&dlm->spinlock); num += n; + mlog(0, "%s: touched %d lockreses in bucket %d " + "(tot=%d)\n", dlm->name, n, i, num); } spin_unlock(&dlm->spinlock); wake_up(&dlm->dlm_thread_wq); @@ -1659,8 +1661,8 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm) { - o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_up); - o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_down); + o2hb_unregister_callback(NULL, &dlm->dlm_hb_up); + o2hb_unregister_callback(NULL, &dlm->dlm_hb_down); o2net_unregister_handler_list(&dlm->dlm_domain_handlers); } @@ -1672,13 +1674,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB, dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI); - status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_down); + status = o2hb_register_callback(NULL, &dlm->dlm_hb_down); if (status) goto bail; o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB, dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI); - status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_up); + status = o2hb_register_callback(NULL, &dlm->dlm_hb_up); if (status) goto bail; diff --git a/trunk/fs/ocfs2/dlm/dlmlock.c b/trunk/fs/ocfs2/dlm/dlmlock.c index 7009292aac5a..69cf369961c4 100644 --- a/trunk/fs/ocfs2/dlm/dlmlock.c +++ b/trunk/fs/ocfs2/dlm/dlmlock.c @@ -106,9 +106,6 @@ static int dlm_can_grant_new_lock(struct dlm_lock_resource *res, if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) return 0; - if (!dlm_lock_compatible(tmplock->ml.convert_type, - lock->ml.type)) - return 0; } return 1; diff --git a/trunk/fs/ocfs2/dlm/dlmthread.c b/trunk/fs/ocfs2/dlm/dlmthread.c index 1d6d1d22c471..2211acf33d9b 100644 --- a/trunk/fs/ocfs2/dlm/dlmthread.c +++ b/trunk/fs/ocfs2/dlm/dlmthread.c @@ -122,13 +122,15 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res) void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { + mlog_entry("%.*s\n", res->lockname.len, res->lockname.name); + assert_spin_locked(&dlm->spinlock); assert_spin_locked(&res->spinlock); if (__dlm_lockres_unused(res)){ if (list_empty(&res->purge)) { - mlog(0, "%s: Adding res %.*s to purge list\n", - dlm->name, res->lockname.len, res->lockname.name); + mlog(0, "putting lockres %.*s:%p onto purge list\n", + res->lockname.len, res->lockname.name, res); res->last_used = jiffies; dlm_lockres_get(res); @@ -136,8 +138,8 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, dlm->purge_count++; } } else if (!list_empty(&res->purge)) { - mlog(0, "%s: Removing res %.*s from purge list\n", - dlm->name, res->lockname.len, res->lockname.name); + mlog(0, "removing lockres %.*s:%p from purge list, owner=%u\n", + res->lockname.len, res->lockname.name, res, res->owner); list_del_init(&res->purge); dlm_lockres_put(res); @@ -148,6 +150,7 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { + mlog_entry("%.*s\n", res->lockname.len, res->lockname.name); spin_lock(&dlm->spinlock); spin_lock(&res->spinlock); @@ -168,8 +171,9 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm, master = (res->owner == dlm->node_num); - mlog(0, "%s: Purging res %.*s, master %d\n", dlm->name, - res->lockname.len, res->lockname.name, master); + + mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, + res->lockname.name, master); if (!master) { res->state |= DLM_LOCK_RES_DROPPING_REF; @@ -185,25 +189,27 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm, /* clear our bit from the master's refmap, ignore errors */ ret = dlm_drop_lockres_ref(dlm, res); if (ret < 0) { - mlog(ML_ERROR, "%s: deref %.*s failed %d\n", dlm->name, - res->lockname.len, res->lockname.name, ret); + mlog_errno(ret); if (!dlm_is_host_down(ret)) BUG(); } + mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", + dlm->name, res->lockname.len, res->lockname.name, ret); spin_lock(&dlm->spinlock); spin_lock(&res->spinlock); } if (!list_empty(&res->purge)) { - mlog(0, "%s: Removing res %.*s from purgelist, master %d\n", - dlm->name, res->lockname.len, res->lockname.name, master); + mlog(0, "removing lockres %.*s:%p from purgelist, " + "master = %d\n", res->lockname.len, res->lockname.name, + res, master); list_del_init(&res->purge); dlm_lockres_put(res); dlm->purge_count--; } if (!__dlm_lockres_unused(res)) { - mlog(ML_ERROR, "%s: res %.*s in use after deref\n", + mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n", dlm->name, res->lockname.len, res->lockname.name); __dlm_print_one_lock_resource(res); BUG(); @@ -260,10 +266,10 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, unused = __dlm_lockres_unused(lockres); if (!unused || (lockres->state & DLM_LOCK_RES_MIGRATING)) { - mlog(0, "%s: res %.*s is in use or being remastered, " - "used %d, state %d\n", dlm->name, - lockres->lockname.len, lockres->lockname.name, - !unused, lockres->state); + mlog(0, "lockres %s:%.*s: is in use or " + "being remastered, used %d, state %d\n", + dlm->name, lockres->lockname.len, + lockres->lockname.name, !unused, lockres->state); list_move_tail(&dlm->purge_list, &lockres->purge); spin_unlock(&lockres->spinlock); continue; @@ -290,12 +296,15 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm, struct list_head *head; int can_grant = 1; - /* - * Because this function is called with the lockres + //mlog(0, "res->lockname.len=%d\n", res->lockname.len); + //mlog(0, "res->lockname.name=%p\n", res->lockname.name); + //mlog(0, "shuffle res %.*s\n", res->lockname.len, + // res->lockname.name); + + /* because this function is called with the lockres * spinlock, and because we know that it is not migrating/ * recovering/in-progress, it is fine to reserve asts and - * basts right before queueing them all throughout - */ + * basts right before queueing them all throughout */ assert_spin_locked(&dlm->ast_lock); assert_spin_locked(&res->spinlock); BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING| @@ -305,13 +314,13 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm, converting: if (list_empty(&res->converting)) goto blocked; - mlog(0, "%s: res %.*s has locks on the convert queue\n", dlm->name, - res->lockname.len, res->lockname.name); + mlog(0, "res %.*s has locks on a convert queue\n", res->lockname.len, + res->lockname.name); target = list_entry(res->converting.next, struct dlm_lock, list); if (target->ml.convert_type == LKM_IVMODE) { - mlog(ML_ERROR, "%s: res %.*s converting lock to invalid mode\n", - dlm->name, res->lockname.len, res->lockname.name); + mlog(ML_ERROR, "%.*s: converting a lock with no " + "convert_type!\n", res->lockname.len, res->lockname.name); BUG(); } head = &res->granted; @@ -356,12 +365,9 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm, spin_lock(&target->spinlock); BUG_ON(target->ml.highest_blocked != LKM_IVMODE); - mlog(0, "%s: res %.*s, AST for Converting lock %u:%llu, type " - "%d => %d, node %u\n", dlm->name, res->lockname.len, - res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)), - target->ml.type, + mlog(0, "calling ast for converting lock: %.*s, have: %d, " + "granting: %d, node: %u\n", res->lockname.len, + res->lockname.name, target->ml.type, target->ml.convert_type, target->ml.node); target->ml.type = target->ml.convert_type; @@ -422,14 +428,11 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm, spin_lock(&target->spinlock); BUG_ON(target->ml.highest_blocked != LKM_IVMODE); - mlog(0, "%s: res %.*s, AST for Blocked lock %u:%llu, type %d, " - "node %u\n", dlm->name, res->lockname.len, - res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)), + mlog(0, "calling ast for blocked lock: %.*s, granting: %d, " + "node: %u\n", res->lockname.len, res->lockname.name, target->ml.type, target->ml.node); - /* target->ml.type is already correct */ + // target->ml.type is already correct list_move_tail(&target->list, &res->granted); BUG_ON(!target->lksb); @@ -450,6 +453,7 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm, /* must have NO locks when calling this with res !=NULL * */ void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { + mlog_entry("dlm=%p, res=%p\n", dlm, res); if (res) { spin_lock(&dlm->spinlock); spin_lock(&res->spinlock); @@ -462,6 +466,8 @@ void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { + mlog_entry("dlm=%p, res=%p\n", dlm, res); + assert_spin_locked(&dlm->spinlock); assert_spin_locked(&res->spinlock); @@ -478,16 +484,13 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) res->state |= DLM_LOCK_RES_DIRTY; } } - - mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len, - res->lockname.name); } /* Launch the NM thread for the mounted volume */ int dlm_launch_thread(struct dlm_ctxt *dlm) { - mlog(0, "Starting dlm_thread...\n"); + mlog(0, "starting dlm thread...\n"); dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm_thread"); if (IS_ERR(dlm->dlm_thread_task)) { @@ -502,7 +505,7 @@ int dlm_launch_thread(struct dlm_ctxt *dlm) void dlm_complete_thread(struct dlm_ctxt *dlm) { if (dlm->dlm_thread_task) { - mlog(ML_KTHREAD, "Waiting for dlm thread to exit\n"); + mlog(ML_KTHREAD, "waiting for dlm thread to exit\n"); kthread_stop(dlm->dlm_thread_task); dlm->dlm_thread_task = NULL; } @@ -533,12 +536,7 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm) /* get an extra ref on lock */ dlm_lock_get(lock); res = lock->lockres; - mlog(0, "%s: res %.*s, Flush AST for lock %u:%llu, type %d, " - "node %u\n", dlm->name, res->lockname.len, - res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - lock->ml.type, lock->ml.node); + mlog(0, "delivering an ast for this lockres\n"); BUG_ON(!lock->ast_pending); @@ -559,9 +557,9 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm) /* possible that another ast was queued while * we were delivering the last one */ if (!list_empty(&lock->ast_list)) { - mlog(0, "%s: res %.*s, AST queued while flushing last " - "one\n", dlm->name, res->lockname.len, - res->lockname.name); + mlog(0, "aha another ast got queued while " + "we were finishing the last one. will " + "keep the ast_pending flag set.\n"); } else lock->ast_pending = 0; @@ -592,12 +590,8 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm) dlm_lock_put(lock); spin_unlock(&dlm->ast_lock); - mlog(0, "%s: res %.*s, Flush BAST for lock %u:%llu, " - "blocked %d, node %u\n", - dlm->name, res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - hi, lock->ml.node); + mlog(0, "delivering a bast for this lockres " + "(blocked = %d\n", hi); if (lock->ml.node != dlm->node_num) { ret = dlm_send_proxy_bast(dlm, res, lock, hi); @@ -611,9 +605,9 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm) /* possible that another bast was queued while * we were delivering the last one */ if (!list_empty(&lock->bast_list)) { - mlog(0, "%s: res %.*s, BAST queued while flushing last " - "one\n", dlm->name, res->lockname.len, - res->lockname.name); + mlog(0, "aha another bast got queued while " + "we were finishing the last one. will " + "keep the bast_pending flag set.\n"); } else lock->bast_pending = 0; @@ -681,12 +675,11 @@ static int dlm_thread(void *data) spin_lock(&res->spinlock); if (res->owner != dlm->node_num) { __dlm_print_one_lock_resource(res); - mlog(ML_ERROR, "%s: inprog %d, mig %d, reco %d," - " dirty %d\n", dlm->name, - !!(res->state & DLM_LOCK_RES_IN_PROGRESS), - !!(res->state & DLM_LOCK_RES_MIGRATING), - !!(res->state & DLM_LOCK_RES_RECOVERING), - !!(res->state & DLM_LOCK_RES_DIRTY)); + mlog(ML_ERROR, "inprog:%s, mig:%s, reco:%s, dirty:%s\n", + res->state & DLM_LOCK_RES_IN_PROGRESS ? "yes" : "no", + res->state & DLM_LOCK_RES_MIGRATING ? "yes" : "no", + res->state & DLM_LOCK_RES_RECOVERING ? "yes" : "no", + res->state & DLM_LOCK_RES_DIRTY ? "yes" : "no"); } BUG_ON(res->owner != dlm->node_num); @@ -700,8 +693,8 @@ static int dlm_thread(void *data) res->state &= ~DLM_LOCK_RES_DIRTY; spin_unlock(&res->spinlock); spin_unlock(&dlm->ast_lock); - mlog(0, "%s: res %.*s, inprogress, delay list " - "shuffle, state %d\n", dlm->name, + mlog(0, "delaying list shuffling for in-" + "progress lockres %.*s, state=%d\n", res->lockname.len, res->lockname.name, res->state); delay = 1; @@ -713,6 +706,10 @@ static int dlm_thread(void *data) * spinlock and do NOT have the dlm lock. * safe to reserve/queue asts and run the lists. */ + mlog(0, "calling dlm_shuffle_lists with dlm=%s, " + "res=%.*s\n", dlm->name, + res->lockname.len, res->lockname.name); + /* called while holding lockres lock */ dlm_shuffle_lists(dlm, res); res->state &= ~DLM_LOCK_RES_DIRTY; @@ -736,8 +733,7 @@ static int dlm_thread(void *data) /* unlikely, but we may need to give time to * other tasks */ if (!--n) { - mlog(0, "%s: Throttling dlm thread\n", - dlm->name); + mlog(0, "throttling dlm_thread\n"); break; } } diff --git a/trunk/fs/ocfs2/namei.c b/trunk/fs/ocfs2/namei.c index 30c523144452..d14cad6e2e41 100644 --- a/trunk/fs/ocfs2/namei.c +++ b/trunk/fs/ocfs2/namei.c @@ -1017,11 +1017,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, * An error return must mean that no cluster locks * were held on function exit. */ - if (oi1->ip_blkno != oi2->ip_blkno) { + if (oi1->ip_blkno != oi2->ip_blkno) ocfs2_inode_unlock(inode2, 1); - brelse(*bh2); - *bh2 = NULL; - } if (status != -ENOENT) mlog_errno(status); diff --git a/trunk/fs/ocfs2/ocfs2.h b/trunk/fs/ocfs2/ocfs2.h index 51cd6898e7f1..70dd3b1798f1 100644 --- a/trunk/fs/ocfs2/ocfs2.h +++ b/trunk/fs/ocfs2/ocfs2.h @@ -420,11 +420,6 @@ struct ocfs2_super struct inode *osb_tl_inode; struct buffer_head *osb_tl_bh; struct delayed_work osb_truncate_log_wq; - /* - * How many clusters in our truncate log. - * It must be protected by osb_tl_inode->i_mutex. - */ - unsigned int truncated_clusters; struct ocfs2_node_map osb_recovering_orphan_dirs; unsigned int *osb_orphan_wipes; diff --git a/trunk/fs/xfs/linux-2.6/sv.h b/trunk/fs/xfs/linux-2.6/sv.h new file mode 100644 index 000000000000..4dfc7c370819 --- /dev/null +++ b/trunk/fs/xfs/linux-2.6/sv.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_SUPPORT_SV_H__ +#define __XFS_SUPPORT_SV_H__ + +#include +#include +#include + +/* + * Synchronisation variables. + * + * (Parameters "pri", "svf" and "rts" are not implemented) + */ + +typedef struct sv_s { + wait_queue_head_t waiters; +} sv_t; + +static inline void _sv_wait(sv_t *sv, spinlock_t *lock) +{ + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue_exclusive(&sv->waiters, &wait); + __set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(lock); + + schedule(); + + remove_wait_queue(&sv->waiters, &wait); +} + +#define sv_init(sv,flag,name) \ + init_waitqueue_head(&(sv)->waiters) +#define sv_destroy(sv) \ + /*NOTHING*/ +#define sv_wait(sv, pri, lock, s) \ + _sv_wait(sv, lock) +#define sv_signal(sv) \ + wake_up(&(sv)->waiters) +#define sv_broadcast(sv) \ + wake_up_all(&(sv)->waiters) + +#endif /* __XFS_SUPPORT_SV_H__ */ diff --git a/trunk/fs/xfs/linux-2.6/xfs_aops.c b/trunk/fs/xfs/linux-2.6/xfs_aops.c index ec7bbb5645b6..691f61223ed6 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_aops.c +++ b/trunk/fs/xfs/linux-2.6/xfs_aops.c @@ -38,6 +38,15 @@ #include #include +/* + * Types of I/O for bmap clustering and I/O completion tracking. + */ +enum { + IO_READ, /* mapping for a read */ + IO_DELAY, /* mapping covers delalloc region */ + IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ + IO_NEW /* just allocated */ +}; /* * Prime number of hash buckets since address is used as the key. @@ -173,6 +182,9 @@ xfs_setfilesize( xfs_inode_t *ip = XFS_I(ioend->io_inode); xfs_fsize_t isize; + ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); + ASSERT(ioend->io_type != IO_READ); + if (unlikely(ioend->io_error)) return 0; @@ -232,8 +244,10 @@ xfs_end_io( * We might have to update the on-disk file size after extending * writes. */ - error = xfs_setfilesize(ioend); - ASSERT(!error || error == EAGAIN); + if (ioend->io_type != IO_READ) { + error = xfs_setfilesize(ioend); + ASSERT(!error || error == EAGAIN); + } /* * If we didn't complete processing of the ioend, requeue it to the @@ -304,63 +318,14 @@ STATIC int xfs_map_blocks( struct inode *inode, loff_t offset, + ssize_t count, struct xfs_bmbt_irec *imap, - int type, - int nonblocking) + int flags) { - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - ssize_t count = 1 << inode->i_blkbits; - xfs_fileoff_t offset_fsb, end_fsb; - int error = 0; - int bmapi_flags = XFS_BMAPI_ENTIRE; - int nimaps = 1; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); - - if (type == IO_UNWRITTEN) - bmapi_flags |= XFS_BMAPI_IGSTATE; - - if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { - if (nonblocking) - return -XFS_ERROR(EAGAIN); - xfs_ilock(ip, XFS_ILOCK_SHARED); - } - - ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || - (ip->i_df.if_flags & XFS_IFEXTENTS)); - ASSERT(offset <= mp->m_maxioffset); - - if (offset + count > mp->m_maxioffset) - count = mp->m_maxioffset - offset; - end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); - offset_fsb = XFS_B_TO_FSBT(mp, offset); - error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, - bmapi_flags, NULL, 0, imap, &nimaps, NULL); - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - if (error) - return -XFS_ERROR(error); - - if (type == IO_DELALLOC && - (!nimaps || isnullstartblock(imap->br_startblock))) { - error = xfs_iomap_write_allocate(ip, offset, count, imap); - if (!error) - trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); - return -XFS_ERROR(error); - } + int nmaps = 1; + int new = 0; -#ifdef DEBUG - if (type == IO_UNWRITTEN) { - ASSERT(nimaps); - ASSERT(imap->br_startblock != HOLESTARTBLOCK); - ASSERT(imap->br_startblock != DELAYSTARTBLOCK); - } -#endif - if (nimaps) - trace_xfs_map_blocks_found(ip, offset, count, type, imap); - return 0; + return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new); } STATIC int @@ -415,18 +380,26 @@ xfs_submit_ioend_bio( submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE, bio); + ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); + bio_put(bio); } STATIC struct bio * xfs_alloc_ioend_bio( struct buffer_head *bh) { + struct bio *bio; int nvecs = bio_get_nr_vecs(bh->b_bdev); - struct bio *bio = bio_alloc(GFP_NOIO, nvecs); + + do { + bio = bio_alloc(GFP_NOIO, nvecs); + nvecs >>= 1; + } while (!bio); ASSERT(bio->bi_private == NULL); bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio->bi_bdev = bh->b_bdev; + bio_get(bio); return bio; } @@ -497,8 +470,9 @@ xfs_submit_ioend( /* Pass 1 - start writeback */ do { next = ioend->io_list; - for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) + for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { xfs_start_buffer_writeback(bh); + } } while ((ioend = next) != NULL); /* Pass 2 - submit I/O */ @@ -626,12 +600,116 @@ xfs_map_at_offset( ASSERT(imap->br_startblock != HOLESTARTBLOCK); ASSERT(imap->br_startblock != DELAYSTARTBLOCK); + lock_buffer(bh); xfs_map_buffer(inode, bh, imap, offset); + bh->b_bdev = xfs_find_bdev_for_inode(inode); set_buffer_mapped(bh); clear_buffer_delay(bh); clear_buffer_unwritten(bh); } +/* + * Look for a page at index that is suitable for clustering. + */ +STATIC unsigned int +xfs_probe_page( + struct page *page, + unsigned int pg_offset) +{ + struct buffer_head *bh, *head; + int ret = 0; + + if (PageWriteback(page)) + return 0; + if (!PageDirty(page)) + return 0; + if (!page->mapping) + return 0; + if (!page_has_buffers(page)) + return 0; + + bh = head = page_buffers(page); + do { + if (!buffer_uptodate(bh)) + break; + if (!buffer_mapped(bh)) + break; + ret += bh->b_size; + if (ret >= pg_offset) + break; + } while ((bh = bh->b_this_page) != head); + + return ret; +} + +STATIC size_t +xfs_probe_cluster( + struct inode *inode, + struct page *startpage, + struct buffer_head *bh, + struct buffer_head *head) +{ + struct pagevec pvec; + pgoff_t tindex, tlast, tloff; + size_t total = 0; + int done = 0, i; + + /* First sum forwards in this page */ + do { + if (!buffer_uptodate(bh) || !buffer_mapped(bh)) + return total; + total += bh->b_size; + } while ((bh = bh->b_this_page) != head); + + /* if we reached the end of the page, sum forwards in following pages */ + tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; + tindex = startpage->index + 1; + + /* Prune this back to avoid pathological behavior */ + tloff = min(tlast, startpage->index + 64); + + pagevec_init(&pvec, 0); + while (!done && tindex <= tloff) { + unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); + + if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) + break; + + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + size_t pg_offset, pg_len = 0; + + if (tindex == tlast) { + pg_offset = + i_size_read(inode) & (PAGE_CACHE_SIZE - 1); + if (!pg_offset) { + done = 1; + break; + } + } else + pg_offset = PAGE_CACHE_SIZE; + + if (page->index == tindex && trylock_page(page)) { + pg_len = xfs_probe_page(page, pg_offset); + unlock_page(page); + } + + if (!pg_len) { + done = 1; + break; + } + + total += pg_len; + tindex++; + } + + pagevec_release(&pvec); + cond_resched(); + } + + return total; +} + /* * Test if a given page is suitable for writing as part of an unwritten * or delayed allocate extent. @@ -653,9 +731,9 @@ xfs_is_delayed_page( if (buffer_unwritten(bh)) acceptable = (type == IO_UNWRITTEN); else if (buffer_delay(bh)) - acceptable = (type == IO_DELALLOC); + acceptable = (type == IO_DELAY); else if (buffer_dirty(bh) && buffer_mapped(bh)) - acceptable = (type == IO_OVERWRITE); + acceptable = (type == IO_NEW); else break; } while ((bh = bh->b_this_page) != head); @@ -680,7 +758,8 @@ xfs_convert_page( loff_t tindex, struct xfs_bmbt_irec *imap, xfs_ioend_t **ioendp, - struct writeback_control *wbc) + struct writeback_control *wbc, + int all_bh) { struct buffer_head *bh, *head; xfs_off_t end_offset; @@ -735,30 +814,37 @@ xfs_convert_page( continue; } - if (buffer_unwritten(bh) || buffer_delay(bh) || - buffer_mapped(bh)) { + if (buffer_unwritten(bh) || buffer_delay(bh)) { if (buffer_unwritten(bh)) type = IO_UNWRITTEN; - else if (buffer_delay(bh)) - type = IO_DELALLOC; else - type = IO_OVERWRITE; + type = IO_DELAY; if (!xfs_imap_valid(inode, imap, offset)) { done = 1; continue; } - lock_buffer(bh); - if (type != IO_OVERWRITE) - xfs_map_at_offset(inode, bh, imap, offset); + ASSERT(imap->br_startblock != HOLESTARTBLOCK); + ASSERT(imap->br_startblock != DELAYSTARTBLOCK); + + xfs_map_at_offset(inode, bh, imap, offset); xfs_add_to_ioend(inode, bh, offset, type, ioendp, done); page_dirty--; count++; } else { - done = 1; + type = IO_NEW; + if (buffer_mapped(bh) && all_bh) { + lock_buffer(bh); + xfs_add_to_ioend(inode, bh, offset, + type, ioendp, done); + count++; + page_dirty--; + } else { + done = 1; + } } } while (offset += len, (bh = bh->b_this_page) != head); @@ -790,6 +876,7 @@ xfs_cluster_write( struct xfs_bmbt_irec *imap, xfs_ioend_t **ioendp, struct writeback_control *wbc, + int all_bh, pgoff_t tlast) { struct pagevec pvec; @@ -804,7 +891,7 @@ xfs_cluster_write( for (i = 0; i < pagevec_count(&pvec); i++) { done = xfs_convert_page(inode, pvec.pages[i], tindex++, - imap, ioendp, wbc); + imap, ioendp, wbc, all_bh); if (done) break; } @@ -848,7 +935,7 @@ xfs_aops_discard_page( struct buffer_head *bh, *head; loff_t offset = page_offset(page); - if (!xfs_is_delayed_page(page, IO_DELALLOC)) + if (!xfs_is_delayed_page(page, IO_DELAY)) goto out_invalidate; if (XFS_FORCED_SHUTDOWN(ip->i_mount)) @@ -915,10 +1002,10 @@ xfs_vm_writepage( unsigned int type; __uint64_t end_offset; pgoff_t end_index, last_index; - ssize_t len; - int err, imap_valid = 0, uptodate = 1; + ssize_t size, len; + int flags, err, imap_valid = 0, uptodate = 1; int count = 0; - int nonblocking = 0; + int all_bh = 0; trace_xfs_writepage(inode, page, 0); @@ -969,14 +1056,10 @@ xfs_vm_writepage( bh = head = page_buffers(page); offset = page_offset(page); - type = IO_OVERWRITE; - - if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) - nonblocking = 1; + flags = BMAPI_READ; + type = IO_NEW; do { - int new_ioend = 0; - if (offset >= end_offset) break; if (!buffer_uptodate(bh)) @@ -993,54 +1076,90 @@ xfs_vm_writepage( continue; } - if (buffer_unwritten(bh)) { - if (type != IO_UNWRITTEN) { - type = IO_UNWRITTEN; + if (imap_valid) + imap_valid = xfs_imap_valid(inode, &imap, offset); + + if (buffer_unwritten(bh) || buffer_delay(bh)) { + int new_ioend = 0; + + /* + * Make sure we don't use a read-only iomap + */ + if (flags == BMAPI_READ) imap_valid = 0; + + if (buffer_unwritten(bh)) { + type = IO_UNWRITTEN; + flags = BMAPI_WRITE | BMAPI_IGNSTATE; + } else if (buffer_delay(bh)) { + type = IO_DELAY; + flags = BMAPI_ALLOCATE; + + if (wbc->sync_mode == WB_SYNC_NONE) + flags |= BMAPI_TRYLOCK; } - } else if (buffer_delay(bh)) { - if (type != IO_DELALLOC) { - type = IO_DELALLOC; - imap_valid = 0; + + if (!imap_valid) { + /* + * If we didn't have a valid mapping then we + * need to ensure that we put the new mapping + * in a new ioend structure. This needs to be + * done to ensure that the ioends correctly + * reflect the block mappings at io completion + * for unwritten extent conversion. + */ + new_ioend = 1; + err = xfs_map_blocks(inode, offset, len, + &imap, flags); + if (err) + goto error; + imap_valid = xfs_imap_valid(inode, &imap, + offset); } - } else if (buffer_uptodate(bh)) { - if (type != IO_OVERWRITE) { - type = IO_OVERWRITE; - imap_valid = 0; + if (imap_valid) { + xfs_map_at_offset(inode, bh, &imap, offset); + xfs_add_to_ioend(inode, bh, offset, type, + &ioend, new_ioend); + count++; } - } else { - if (PageUptodate(page)) { - ASSERT(buffer_mapped(bh)); - imap_valid = 0; + } else if (buffer_uptodate(bh)) { + /* + * we got here because the buffer is already mapped. + * That means it must already have extents allocated + * underneath it. Map the extent by reading it. + */ + if (!imap_valid || flags != BMAPI_READ) { + flags = BMAPI_READ; + size = xfs_probe_cluster(inode, page, bh, head); + err = xfs_map_blocks(inode, offset, size, + &imap, flags); + if (err) + goto error; + imap_valid = xfs_imap_valid(inode, &imap, + offset); } - continue; - } - if (imap_valid) - imap_valid = xfs_imap_valid(inode, &imap, offset); - if (!imap_valid) { /* - * If we didn't have a valid mapping then we need to - * put the new mapping into a separate ioend structure. - * This ensures non-contiguous extents always have - * separate ioends, which is particularly important - * for unwritten extent conversion at I/O completion - * time. + * We set the type to IO_NEW in case we are doing a + * small write at EOF that is extending the file but + * without needing an allocation. We need to update the + * file size on I/O completion in this case so it is + * the same case as having just allocated a new extent + * that we are writing into for the first time. */ - new_ioend = 1; - err = xfs_map_blocks(inode, offset, &imap, type, - nonblocking); - if (err) - goto error; - imap_valid = xfs_imap_valid(inode, &imap, offset); - } - if (imap_valid) { - lock_buffer(bh); - if (type != IO_OVERWRITE) - xfs_map_at_offset(inode, bh, &imap, offset); - xfs_add_to_ioend(inode, bh, offset, type, &ioend, - new_ioend); - count++; + type = IO_NEW; + if (trylock_buffer(bh)) { + if (imap_valid) + all_bh = 1; + xfs_add_to_ioend(inode, bh, offset, type, + &ioend, !imap_valid); + count++; + } else { + imap_valid = 0; + } + } else if (PageUptodate(page)) { + ASSERT(buffer_mapped(bh)); + imap_valid = 0; } if (!iohead) @@ -1069,7 +1188,7 @@ xfs_vm_writepage( end_index = last_index; xfs_cluster_write(inode, page->index + 1, &imap, &ioend, - wbc, end_index); + wbc, all_bh, end_index); } if (iohead) @@ -1138,19 +1257,13 @@ __xfs_get_blocks( int create, int direct) { - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t offset_fsb, end_fsb; - int error = 0; - int lockmode = 0; + int flags = create ? BMAPI_WRITE : BMAPI_READ; struct xfs_bmbt_irec imap; - int nimaps = 1; xfs_off_t offset; ssize_t size; + int nimap = 1; int new = 0; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + int error; offset = (xfs_off_t)iblock << inode->i_blkbits; ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); @@ -1159,45 +1272,15 @@ __xfs_get_blocks( if (!create && direct && offset >= i_size_read(inode)) return 0; - if (create) { - lockmode = XFS_ILOCK_EXCL; - xfs_ilock(ip, lockmode); - } else { - lockmode = xfs_ilock_map_shared(ip); - } - - ASSERT(offset <= mp->m_maxioffset); - if (offset + size > mp->m_maxioffset) - size = mp->m_maxioffset - offset; - end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); - offset_fsb = XFS_B_TO_FSBT(mp, offset); + if (direct && create) + flags |= BMAPI_DIRECT; - error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, - XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL); + error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap, + &new); if (error) - goto out_unlock; - - if (create && - (!nimaps || - (imap.br_startblock == HOLESTARTBLOCK || - imap.br_startblock == DELAYSTARTBLOCK))) { - if (direct) { - error = xfs_iomap_write_direct(ip, offset, size, - &imap, nimaps); - } else { - error = xfs_iomap_write_delay(ip, offset, size, &imap); - } - if (error) - goto out_unlock; - - trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); - } else if (nimaps) { - trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); - } else { - trace_xfs_get_blocks_notfound(ip, offset, size); - goto out_unlock; - } - xfs_iunlock(ip, lockmode); + return -error; + if (nimap == 0) + return 0; if (imap.br_startblock != HOLESTARTBLOCK && imap.br_startblock != DELAYSTARTBLOCK) { @@ -1264,10 +1347,6 @@ __xfs_get_blocks( } return 0; - -out_unlock: - xfs_iunlock(ip, lockmode); - return -error; } int @@ -1355,7 +1434,7 @@ xfs_vm_direct_IO( ssize_t ret; if (rw & WRITE) { - iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); + iocb->private = xfs_alloc_ioend(inode, IO_NEW); ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, nr_segs, diff --git a/trunk/fs/xfs/linux-2.6/xfs_aops.h b/trunk/fs/xfs/linux-2.6/xfs_aops.h index 71f721e1a71f..c5057fb6237a 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_aops.h +++ b/trunk/fs/xfs/linux-2.6/xfs_aops.h @@ -22,22 +22,6 @@ extern struct workqueue_struct *xfsdatad_workqueue; extern struct workqueue_struct *xfsconvertd_workqueue; extern mempool_t *xfs_ioend_pool; -/* - * Types of I/O for bmap clustering and I/O completion tracking. - */ -enum { - IO_DIRECT = 0, /* special case for direct I/O ioends */ - IO_DELALLOC, /* mapping covers delalloc region */ - IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ - IO_OVERWRITE, /* mapping covers already allocated extent */ -}; - -#define XFS_IO_TYPES \ - { 0, "" }, \ - { IO_DELALLOC, "delalloc" }, \ - { IO_UNWRITTEN, "unwritten" }, \ - { IO_OVERWRITE, "overwrite" } - /* * xfs_ioend struct manages large extent writes for XFS. * It can manage several multi-page bio's at once. diff --git a/trunk/fs/xfs/linux-2.6/xfs_buf.c b/trunk/fs/xfs/linux-2.6/xfs_buf.c index 92f1f2acc6ab..4c5deb6e9e31 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_buf.c +++ b/trunk/fs/xfs/linux-2.6/xfs_buf.c @@ -44,7 +44,12 @@ static kmem_zone_t *xfs_buf_zone; STATIC int xfsbufd(void *); +STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t); STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); +static struct shrinker xfs_buf_shake = { + .shrink = xfsbufd_wakeup, + .seeks = DEFAULT_SEEKS, +}; static struct workqueue_struct *xfslogd_workqueue; struct workqueue_struct *xfsdatad_workqueue; @@ -163,79 +168,8 @@ test_page_region( } /* - * xfs_buf_lru_add - add a buffer to the LRU. - * - * The LRU takes a new reference to the buffer so that it will only be freed - * once the shrinker takes the buffer off the LRU. + * Internal xfs_buf_t object manipulation */ -STATIC void -xfs_buf_lru_add( - struct xfs_buf *bp) -{ - struct xfs_buftarg *btp = bp->b_target; - - spin_lock(&btp->bt_lru_lock); - if (list_empty(&bp->b_lru)) { - atomic_inc(&bp->b_hold); - list_add_tail(&bp->b_lru, &btp->bt_lru); - btp->bt_lru_nr++; - } - spin_unlock(&btp->bt_lru_lock); -} - -/* - * xfs_buf_lru_del - remove a buffer from the LRU - * - * The unlocked check is safe here because it only occurs when there are not - * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there - * to optimise the shrinker removing the buffer from the LRU and calling - * xfs_buf_free(). i.e. it removes an unneccessary round trip on the - * bt_lru_lock. - */ -STATIC void -xfs_buf_lru_del( - struct xfs_buf *bp) -{ - struct xfs_buftarg *btp = bp->b_target; - - if (list_empty(&bp->b_lru)) - return; - - spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru)) { - list_del_init(&bp->b_lru); - btp->bt_lru_nr--; - } - spin_unlock(&btp->bt_lru_lock); -} - -/* - * When we mark a buffer stale, we remove the buffer from the LRU and clear the - * b_lru_ref count so that the buffer is freed immediately when the buffer - * reference count falls to zero. If the buffer is already on the LRU, we need - * to remove the reference that LRU holds on the buffer. - * - * This prevents build-up of stale buffers on the LRU. - */ -void -xfs_buf_stale( - struct xfs_buf *bp) -{ - bp->b_flags |= XBF_STALE; - atomic_set(&(bp)->b_lru_ref, 0); - if (!list_empty(&bp->b_lru)) { - struct xfs_buftarg *btp = bp->b_target; - - spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru)) { - list_del_init(&bp->b_lru); - btp->bt_lru_nr--; - atomic_dec(&bp->b_hold); - } - spin_unlock(&btp->bt_lru_lock); - } - ASSERT(atomic_read(&bp->b_hold) >= 1); -} STATIC void _xfs_buf_initialize( @@ -252,9 +186,7 @@ _xfs_buf_initialize( memset(bp, 0, sizeof(xfs_buf_t)); atomic_set(&bp->b_hold, 1); - atomic_set(&bp->b_lru_ref, 1); init_completion(&bp->b_iowait); - INIT_LIST_HEAD(&bp->b_lru); INIT_LIST_HEAD(&bp->b_list); RB_CLEAR_NODE(&bp->b_rbnode); sema_init(&bp->b_sema, 0); /* held, no waiters */ @@ -330,8 +262,6 @@ xfs_buf_free( { trace_xfs_buf_free(bp, _RET_IP_); - ASSERT(list_empty(&bp->b_lru)); - if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { uint i; @@ -407,6 +337,7 @@ _xfs_buf_lookup_pages( __func__, gfp_mask); XFS_STATS_INC(xb_page_retries); + xfsbufd_wakeup(NULL, 0, gfp_mask); congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } @@ -897,7 +828,6 @@ xfs_buf_rele( if (!pag) { ASSERT(!bp->b_relse); - ASSERT(list_empty(&bp->b_lru)); ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); if (atomic_dec_and_test(&bp->b_hold)) xfs_buf_free(bp); @@ -905,19 +835,13 @@ xfs_buf_rele( } ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); - ASSERT(atomic_read(&bp->b_hold) > 0); if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { if (bp->b_relse) { atomic_inc(&bp->b_hold); spin_unlock(&pag->pag_buf_lock); bp->b_relse(bp); - } else if (!(bp->b_flags & XBF_STALE) && - atomic_read(&bp->b_lru_ref)) { - xfs_buf_lru_add(bp); - spin_unlock(&pag->pag_buf_lock); } else { - xfs_buf_lru_del(bp); ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); spin_unlock(&pag->pag_buf_lock); @@ -1514,84 +1438,51 @@ xfs_buf_iomove( */ /* - * Wait for any bufs with callbacks that have been submitted but have not yet - * returned. These buffers will have an elevated hold count, so wait on those - * while freeing all the buffers only held by the LRU. + * Wait for any bufs with callbacks that have been submitted but + * have not yet returned... walk the hash list for the target. */ void xfs_wait_buftarg( struct xfs_buftarg *btp) { - struct xfs_buf *bp; + struct xfs_perag *pag; + uint i; -restart: - spin_lock(&btp->bt_lru_lock); - while (!list_empty(&btp->bt_lru)) { - bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); - if (atomic_read(&bp->b_hold) > 1) { - spin_unlock(&btp->bt_lru_lock); + for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) { + pag = xfs_perag_get(btp->bt_mount, i); + spin_lock(&pag->pag_buf_lock); + while (rb_first(&pag->pag_buf_tree)) { + spin_unlock(&pag->pag_buf_lock); delay(100); - goto restart; + spin_lock(&pag->pag_buf_lock); } - /* - * clear the LRU reference count so the bufer doesn't get - * ignored in xfs_buf_rele(). - */ - atomic_set(&bp->b_lru_ref, 0); - spin_unlock(&btp->bt_lru_lock); - xfs_buf_rele(bp); - spin_lock(&btp->bt_lru_lock); + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); } - spin_unlock(&btp->bt_lru_lock); } -int -xfs_buftarg_shrink( - struct shrinker *shrink, - int nr_to_scan, - gfp_t mask) -{ - struct xfs_buftarg *btp = container_of(shrink, - struct xfs_buftarg, bt_shrinker); - struct xfs_buf *bp; - LIST_HEAD(dispose); - - if (!nr_to_scan) - return btp->bt_lru_nr; - - spin_lock(&btp->bt_lru_lock); - while (!list_empty(&btp->bt_lru)) { - if (nr_to_scan-- <= 0) - break; - - bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); - - /* - * Decrement the b_lru_ref count unless the value is already - * zero. If the value is already zero, we need to reclaim the - * buffer, otherwise it gets another trip through the LRU. - */ - if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { - list_move_tail(&bp->b_lru, &btp->bt_lru); - continue; - } - - /* - * remove the buffer from the LRU now to avoid needing another - * lock round trip inside xfs_buf_rele(). - */ - list_move(&bp->b_lru, &dispose); - btp->bt_lru_nr--; - } - spin_unlock(&btp->bt_lru_lock); +/* + * buftarg list for delwrite queue processing + */ +static LIST_HEAD(xfs_buftarg_list); +static DEFINE_SPINLOCK(xfs_buftarg_lock); - while (!list_empty(&dispose)) { - bp = list_first_entry(&dispose, struct xfs_buf, b_lru); - list_del_init(&bp->b_lru); - xfs_buf_rele(bp); - } +STATIC void +xfs_register_buftarg( + xfs_buftarg_t *btp) +{ + spin_lock(&xfs_buftarg_lock); + list_add(&btp->bt_list, &xfs_buftarg_list); + spin_unlock(&xfs_buftarg_lock); +} - return btp->bt_lru_nr; +STATIC void +xfs_unregister_buftarg( + xfs_buftarg_t *btp) +{ + spin_lock(&xfs_buftarg_lock); + list_del(&btp->bt_list); + spin_unlock(&xfs_buftarg_lock); } void @@ -1599,14 +1490,17 @@ xfs_free_buftarg( struct xfs_mount *mp, struct xfs_buftarg *btp) { - unregister_shrinker(&btp->bt_shrinker); - xfs_flush_buftarg(btp, 1); if (mp->m_flags & XFS_MOUNT_BARRIER) xfs_blkdev_issue_flush(btp); iput(btp->bt_mapping->host); + /* Unregister the buftarg first so that we don't get a + * wakeup finding a non-existent task + */ + xfs_unregister_buftarg(btp); kthread_stop(btp->bt_task); + kmem_free(btp); } @@ -1703,13 +1597,20 @@ xfs_alloc_delwrite_queue( xfs_buftarg_t *btp, const char *fsname) { + int error = 0; + + INIT_LIST_HEAD(&btp->bt_list); INIT_LIST_HEAD(&btp->bt_delwrite_queue); spin_lock_init(&btp->bt_delwrite_lock); btp->bt_flags = 0; btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); - if (IS_ERR(btp->bt_task)) - return PTR_ERR(btp->bt_task); - return 0; + if (IS_ERR(btp->bt_task)) { + error = PTR_ERR(btp->bt_task); + goto out_error; + } + xfs_register_buftarg(btp); +out_error: + return error; } xfs_buftarg_t * @@ -1726,17 +1627,12 @@ xfs_alloc_buftarg( btp->bt_mount = mp; btp->bt_dev = bdev->bd_dev; btp->bt_bdev = bdev; - INIT_LIST_HEAD(&btp->bt_lru); - spin_lock_init(&btp->bt_lru_lock); if (xfs_setsize_buftarg_early(btp, bdev)) goto error; if (xfs_mapping_buftarg(btp, bdev)) goto error; if (xfs_alloc_delwrite_queue(btp, fsname)) goto error; - btp->bt_shrinker.shrink = xfs_buftarg_shrink; - btp->bt_shrinker.seeks = DEFAULT_SEEKS; - register_shrinker(&btp->bt_shrinker); return btp; error: @@ -1841,6 +1737,27 @@ xfs_buf_runall_queues( flush_workqueue(queue); } +STATIC int +xfsbufd_wakeup( + struct shrinker *shrink, + int priority, + gfp_t mask) +{ + xfs_buftarg_t *btp; + + spin_lock(&xfs_buftarg_lock); + list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { + if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) + continue; + if (list_empty(&btp->bt_delwrite_queue)) + continue; + set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); + wake_up_process(btp->bt_task); + } + spin_unlock(&xfs_buftarg_lock); + return 0; +} + /* * Move as many buffers as specified to the supplied list * idicating if we skipped any buffers to prevent deadlocks. @@ -2035,6 +1952,7 @@ xfs_buf_init(void) if (!xfsconvertd_workqueue) goto out_destroy_xfsdatad_workqueue; + register_shrinker(&xfs_buf_shake); return 0; out_destroy_xfsdatad_workqueue: @@ -2050,6 +1968,7 @@ xfs_buf_init(void) void xfs_buf_terminate(void) { + unregister_shrinker(&xfs_buf_shake); destroy_workqueue(xfsconvertd_workqueue); destroy_workqueue(xfsdatad_workqueue); destroy_workqueue(xfslogd_workqueue); diff --git a/trunk/fs/xfs/linux-2.6/xfs_buf.h b/trunk/fs/xfs/linux-2.6/xfs_buf.h index a76c2428faff..383a3f37cf98 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_buf.h +++ b/trunk/fs/xfs/linux-2.6/xfs_buf.h @@ -128,15 +128,10 @@ typedef struct xfs_buftarg { /* per device delwri queue */ struct task_struct *bt_task; + struct list_head bt_list; struct list_head bt_delwrite_queue; spinlock_t bt_delwrite_lock; unsigned long bt_flags; - - /* LRU control structures */ - struct shrinker bt_shrinker; - struct list_head bt_lru; - spinlock_t bt_lru_lock; - unsigned int bt_lru_nr; } xfs_buftarg_t; /* @@ -169,11 +164,9 @@ typedef struct xfs_buf { xfs_off_t b_file_offset; /* offset in file */ size_t b_buffer_length;/* size of buffer in bytes */ atomic_t b_hold; /* reference count */ - atomic_t b_lru_ref; /* lru reclaim ref count */ xfs_buf_flags_t b_flags; /* status flags */ struct semaphore b_sema; /* semaphore for lockables */ - struct list_head b_lru; /* lru list */ wait_queue_head_t b_waiters; /* unpin waiters */ struct list_head b_list; struct xfs_perag *b_pag; /* contains rbtree root */ @@ -271,8 +264,7 @@ extern void xfs_buf_terminate(void); #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) -void xfs_buf_stale(struct xfs_buf *bp); -#define XFS_BUF_STALE(bp) xfs_buf_stale(bp); +#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE) #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) #define XFS_BUF_SUPER_STALE(bp) do { \ @@ -336,15 +328,9 @@ void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) -static inline void -xfs_buf_set_ref( - struct xfs_buf *bp, - int lru_ref) -{ - atomic_set(&bp->b_lru_ref, lru_ref); -} -#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref) +#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0) #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) +#define XFS_BUF_SET_REF(bp, ref) do { } while (0) #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) diff --git a/trunk/fs/xfs/linux-2.6/xfs_export.c b/trunk/fs/xfs/linux-2.6/xfs_export.c index fc0114da7fdd..3764d74790ec 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_export.c +++ b/trunk/fs/xfs/linux-2.6/xfs_export.c @@ -70,16 +70,8 @@ xfs_fs_encode_fh( else fileid_type = FILEID_INO32_GEN_PARENT; - /* - * If the the filesystem may contain 64bit inode numbers, we need - * to use larger file handles that can represent them. - * - * While we only allocate inodes that do not fit into 32 bits any - * large enough filesystem may contain them, thus the slightly - * confusing looking conditional below. - */ - if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) || - (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES)) + /* filesystem may contain 64bit inode numbers */ + if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS)) fileid_type |= XFS_FILEID_TYPE_64FLAG; /* diff --git a/trunk/fs/xfs/linux-2.6/xfs_linux.h b/trunk/fs/xfs/linux-2.6/xfs_linux.h index 096494997747..214ddd71ff79 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_linux.h +++ b/trunk/fs/xfs/linux-2.6/xfs_linux.h @@ -37,6 +37,7 @@ #include #include +#include #include #include diff --git a/trunk/fs/xfs/linux-2.6/xfs_super.c b/trunk/fs/xfs/linux-2.6/xfs_super.c index c51faaa5e291..064f964d4f3c 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_super.c +++ b/trunk/fs/xfs/linux-2.6/xfs_super.c @@ -834,11 +834,8 @@ xfsaild_wakeup( struct xfs_ail *ailp, xfs_lsn_t threshold_lsn) { - /* only ever move the target forwards */ - if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) { - ailp->xa_target = threshold_lsn; - wake_up_process(ailp->xa_task); - } + ailp->xa_target = threshold_lsn; + wake_up_process(ailp->xa_task); } STATIC int @@ -850,17 +847,8 @@ xfsaild( long tout = 0; /* milliseconds */ while (!kthread_should_stop()) { - /* - * for short sleeps indicating congestion, don't allow us to - * get woken early. Otherwise all we do is bang on the AIL lock - * without making progress. - */ - if (tout && tout <= 20) - __set_current_state(TASK_KILLABLE); - else - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(tout ? - msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); + schedule_timeout_interruptible(tout ? + msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); /* swsusp */ try_to_freeze(); @@ -1130,8 +1118,6 @@ xfs_fs_evict_inode( */ ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); - lockdep_set_class_and_name(&ip->i_iolock.mr_lock, - &xfs_iolock_reclaimable, "xfs_iolock_reclaimable"); xfs_inactive(ip); } diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.c b/trunk/fs/xfs/linux-2.6/xfs_sync.c index a02480de9759..afb0d7cfad1c 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_sync.c +++ b/trunk/fs/xfs/linux-2.6/xfs_sync.c @@ -53,30 +53,14 @@ xfs_inode_ag_walk_grab( { struct inode *inode = VFS_I(ip); - ASSERT(rcu_read_lock_held()); - - /* - * check for stale RCU freed inode - * - * If the inode has been reallocated, it doesn't matter if it's not in - * the AG we are walking - we are walking for writeback, so if it - * passes all the "valid inode" checks and is dirty, then we'll write - * it back anyway. If it has been reallocated and still being - * initialised, the XFS_INEW check below will catch it. - */ - spin_lock(&ip->i_flags_lock); - if (!ip->i_ino) - goto out_unlock_noent; - - /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ - if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) - goto out_unlock_noent; - spin_unlock(&ip->i_flags_lock); - /* nothing to sync during shutdown */ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return EFSCORRUPTED; + /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ + if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) + return ENOENT; + /* If we can't grab the inode, it must on it's way to reclaim. */ if (!igrab(inode)) return ENOENT; @@ -88,10 +72,6 @@ xfs_inode_ag_walk_grab( /* inode is valid */ return 0; - -out_unlock_noent: - spin_unlock(&ip->i_flags_lock); - return ENOENT; } STATIC int @@ -118,12 +98,12 @@ xfs_inode_ag_walk( int error = 0; int i; - rcu_read_lock(); + read_lock(&pag->pag_ici_lock); nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void **)batch, first_index, XFS_LOOKUP_BATCH); if (!nr_found) { - rcu_read_unlock(); + read_unlock(&pag->pag_ici_lock); break; } @@ -138,26 +118,18 @@ xfs_inode_ag_walk( batch[i] = NULL; /* - * Update the index for the next lookup. Catch - * overflows into the next AG range which can occur if - * we have inodes in the last block of the AG and we - * are currently pointing to the last inode. - * - * Because we may see inodes that are from the wrong AG - * due to RCU freeing and reallocation, only update the - * index if it lies in this AG. It was a race that lead - * us to see this inode, so another lookup from the - * same index will not find it again. + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. */ - if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) - continue; first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) done = 1; } /* unlock now we've grabbed the inodes. */ - rcu_read_unlock(); + read_unlock(&pag->pag_ici_lock); for (i = 0; i < nr_found; i++) { if (!batch[i]) @@ -620,12 +592,12 @@ xfs_inode_set_reclaim_tag( struct xfs_perag *pag; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - spin_lock(&pag->pag_ici_lock); + write_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); __xfs_inode_set_reclaim_tag(pag, ip); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); spin_unlock(&ip->i_flags_lock); - spin_unlock(&pag->pag_ici_lock); + write_unlock(&pag->pag_ici_lock); xfs_perag_put(pag); } @@ -667,14 +639,9 @@ xfs_reclaim_inode_grab( struct xfs_inode *ip, int flags) { - ASSERT(rcu_read_lock_held()); - - /* quick check for stale RCU freed inode */ - if (!ip->i_ino) - return 1; /* - * do some unlocked checks first to avoid unnecessary lock traffic. + * do some unlocked checks first to avoid unnecceary lock traffic. * The first is a flush lock check, the second is a already in reclaim * check. Only do these checks if we are not going to block on locks. */ @@ -687,16 +654,11 @@ xfs_reclaim_inode_grab( * The radix tree lock here protects a thread in xfs_iget from racing * with us starting reclaim on the inode. Once we have the * XFS_IRECLAIM flag set it will not touch us. - * - * Due to RCU lookup, we may find inodes that have been freed and only - * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that - * aren't candidates for reclaim at all, so we must check the - * XFS_IRECLAIMABLE is set first before proceeding to reclaim. */ spin_lock(&ip->i_flags_lock); - if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || - __xfs_iflags_test(ip, XFS_IRECLAIM)) { - /* not a reclaim candidate. */ + ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { + /* ignore as it is already under reclaim */ spin_unlock(&ip->i_flags_lock); return 1; } @@ -833,12 +795,12 @@ xfs_reclaim_inode( * added to the tree assert that it's been there before to catch * problems with the inode life time early on. */ - spin_lock(&pag->pag_ici_lock); + write_lock(&pag->pag_ici_lock); if (!radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) ASSERT(0); __xfs_inode_clear_reclaim(pag, ip); - spin_unlock(&pag->pag_ici_lock); + write_unlock(&pag->pag_ici_lock); /* * Here we do an (almost) spurious inode lock in order to coordinate @@ -902,14 +864,14 @@ xfs_reclaim_inodes_ag( struct xfs_inode *batch[XFS_LOOKUP_BATCH]; int i; - rcu_read_lock(); + write_lock(&pag->pag_ici_lock); nr_found = radix_tree_gang_lookup_tag( &pag->pag_ici_root, (void **)batch, first_index, XFS_LOOKUP_BATCH, XFS_ICI_RECLAIM_TAG); if (!nr_found) { - rcu_read_unlock(); + write_unlock(&pag->pag_ici_lock); break; } @@ -929,24 +891,14 @@ xfs_reclaim_inodes_ag( * occur if we have inodes in the last block of * the AG and we are currently pointing to the * last inode. - * - * Because we may see inodes that are from the - * wrong AG due to RCU freeing and - * reallocation, only update the index if it - * lies in this AG. It was a race that lead us - * to see this inode, so another lookup from - * the same index will not find it again. */ - if (XFS_INO_TO_AGNO(mp, ip->i_ino) != - pag->pag_agno) - continue; first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) done = 1; } /* unlock now we've grabbed the inodes. */ - rcu_read_unlock(); + write_unlock(&pag->pag_ici_lock); for (i = 0; i < nr_found; i++) { if (!batch[i]) diff --git a/trunk/fs/xfs/linux-2.6/xfs_trace.h b/trunk/fs/xfs/linux-2.6/xfs_trace.h index 647af2a2e7aa..acef2e98c594 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_trace.h +++ b/trunk/fs/xfs/linux-2.6/xfs_trace.h @@ -766,8 +766,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, __field(int, curr_res) __field(int, unit_res) __field(unsigned int, flags) - __field(int, reserveq) - __field(int, writeq) + __field(void *, reserve_headq) + __field(void *, write_headq) __field(int, grant_reserve_cycle) __field(int, grant_reserve_bytes) __field(int, grant_write_cycle) @@ -784,21 +784,19 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, __entry->curr_res = tic->t_curr_res; __entry->unit_res = tic->t_unit_res; __entry->flags = tic->t_flags; - __entry->reserveq = list_empty(&log->l_reserveq); - __entry->writeq = list_empty(&log->l_writeq); - xlog_crack_grant_head(&log->l_grant_reserve_head, - &__entry->grant_reserve_cycle, - &__entry->grant_reserve_bytes); - xlog_crack_grant_head(&log->l_grant_write_head, - &__entry->grant_write_cycle, - &__entry->grant_write_bytes); + __entry->reserve_headq = log->l_reserve_headq; + __entry->write_headq = log->l_write_headq; + __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; + __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; + __entry->grant_write_cycle = log->l_grant_write_cycle; + __entry->grant_write_bytes = log->l_grant_write_bytes; __entry->curr_cycle = log->l_curr_cycle; __entry->curr_block = log->l_curr_block; - __entry->tail_lsn = atomic64_read(&log->l_tail_lsn); + __entry->tail_lsn = log->l_tail_lsn; ), TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " - "t_unit_res %u t_flags %s reserveq %s " - "writeq %s grant_reserve_cycle %d " + "t_unit_res %u t_flags %s reserve_headq 0x%p " + "write_headq 0x%p grant_reserve_cycle %d " "grant_reserve_bytes %d grant_write_cycle %d " "grant_write_bytes %d curr_cycle %d curr_block %d " "tail_cycle %d tail_block %d", @@ -809,8 +807,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, __entry->curr_res, __entry->unit_res, __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), - __entry->reserveq ? "empty" : "active", - __entry->writeq ? "empty" : "active", + __entry->reserve_headq, + __entry->write_headq, __entry->grant_reserve_cycle, __entry->grant_reserve_bytes, __entry->grant_write_cycle, @@ -837,7 +835,6 @@ DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1); DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); @@ -845,7 +842,6 @@ DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); @@ -939,10 +935,10 @@ DEFINE_PAGE_EVENT(xfs_writepage); DEFINE_PAGE_EVENT(xfs_releasepage); DEFINE_PAGE_EVENT(xfs_invalidatepage); -DECLARE_EVENT_CLASS(xfs_imap_class, +DECLARE_EVENT_CLASS(xfs_iomap_class, TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, - int type, struct xfs_bmbt_irec *irec), - TP_ARGS(ip, offset, count, type, irec), + int flags, struct xfs_bmbt_irec *irec), + TP_ARGS(ip, offset, count, flags, irec), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) @@ -950,7 +946,7 @@ DECLARE_EVENT_CLASS(xfs_imap_class, __field(loff_t, new_size) __field(loff_t, offset) __field(size_t, count) - __field(int, type) + __field(int, flags) __field(xfs_fileoff_t, startoff) __field(xfs_fsblock_t, startblock) __field(xfs_filblks_t, blockcount) @@ -962,13 +958,13 @@ DECLARE_EVENT_CLASS(xfs_imap_class, __entry->new_size = ip->i_new_size; __entry->offset = offset; __entry->count = count; - __entry->type = type; + __entry->flags = flags; __entry->startoff = irec ? irec->br_startoff : 0; __entry->startblock = irec ? irec->br_startblock : 0; __entry->blockcount = irec ? irec->br_blockcount : 0; ), TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " - "offset 0x%llx count %zd type %s " + "offset 0x%llx count %zd flags %s " "startoff 0x%llx startblock %lld blockcount 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, @@ -976,21 +972,20 @@ DECLARE_EVENT_CLASS(xfs_imap_class, __entry->new_size, __entry->offset, __entry->count, - __print_symbolic(__entry->type, XFS_IO_TYPES), + __print_flags(__entry->flags, "|", BMAPI_FLAGS), __entry->startoff, (__int64_t)__entry->startblock, __entry->blockcount) ) #define DEFINE_IOMAP_EVENT(name) \ -DEFINE_EVENT(xfs_imap_class, name, \ +DEFINE_EVENT(xfs_iomap_class, name, \ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ - int type, struct xfs_bmbt_irec *irec), \ - TP_ARGS(ip, offset, count, type, irec)) -DEFINE_IOMAP_EVENT(xfs_map_blocks_found); -DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); -DEFINE_IOMAP_EVENT(xfs_get_blocks_found); -DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); + int flags, struct xfs_bmbt_irec *irec), \ + TP_ARGS(ip, offset, count, flags, irec)) +DEFINE_IOMAP_EVENT(xfs_iomap_enter); +DEFINE_IOMAP_EVENT(xfs_iomap_found); +DEFINE_IOMAP_EVENT(xfs_iomap_alloc); DECLARE_EVENT_CLASS(xfs_simple_io_class, TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), @@ -1027,7 +1022,6 @@ DEFINE_EVENT(xfs_simple_io_class, name, \ TP_ARGS(ip, offset, count)) DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); -DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); TRACE_EVENT(xfs_itruncate_start, @@ -1426,7 +1420,6 @@ DEFINE_EVENT(xfs_alloc_class, name, \ TP_PROTO(struct xfs_alloc_arg *args), \ TP_ARGS(args)) DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); -DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound); DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); DEFINE_ALLOC_EVENT(xfs_alloc_near_first); diff --git a/trunk/fs/xfs/quota/xfs_dquot.c b/trunk/fs/xfs/quota/xfs_dquot.c index d22aa3103106..faf8e1a83a12 100644 --- a/trunk/fs/xfs/quota/xfs_dquot.c +++ b/trunk/fs/xfs/quota/xfs_dquot.c @@ -149,6 +149,7 @@ xfs_qm_dqdestroy( ASSERT(list_empty(&dqp->q_freelist)); mutex_destroy(&dqp->q_qlock); + sv_destroy(&dqp->q_pinwait); kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); atomic_dec(&xfs_Gqm->qm_totaldquots); diff --git a/trunk/fs/xfs/xfs_ag.h b/trunk/fs/xfs/xfs_ag.h index 58632cc17f2d..63c7a1a6c022 100644 --- a/trunk/fs/xfs/xfs_ag.h +++ b/trunk/fs/xfs/xfs_ag.h @@ -227,7 +227,7 @@ typedef struct xfs_perag { atomic_t pagf_fstrms; /* # of filestreams active in this AG */ - spinlock_t pag_ici_lock; /* incore inode cache lock */ + rwlock_t pag_ici_lock; /* incore inode lock */ struct radix_tree_root pag_ici_root; /* incore inode cache root */ int pag_ici_reclaimable; /* reclaimable inodes */ struct mutex pag_ici_reclaim_lock; /* serialisation point */ diff --git a/trunk/fs/xfs/xfs_alloc.c b/trunk/fs/xfs/xfs_alloc.c index fa8723f5870a..112abc439ca5 100644 --- a/trunk/fs/xfs/xfs_alloc.c +++ b/trunk/fs/xfs/xfs_alloc.c @@ -577,58 +577,61 @@ xfs_alloc_ag_vextent_exact( xfs_extlen_t rlen; /* length of returned extent */ ASSERT(args->alignment == 1); - /* * Allocate/initialize a cursor for the by-number freespace btree. */ bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO); - + args->agno, XFS_BTNUM_BNO); /* * Lookup bno and minlen in the btree (minlen is irrelevant, really). * Look for the closest free block <= bno, it must contain bno * if any free block does. */ - error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); - if (error) + if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i))) goto error0; - if (!i) - goto not_found; - + if (!i) { + /* + * Didn't find it, return null. + */ + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + args->agbno = NULLAGBLOCK; + return 0; + } /* * Grab the freespace record. */ - error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); - if (error) + if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); ASSERT(fbno <= args->agbno); minend = args->agbno + args->minlen; maxend = args->agbno + args->maxlen; fend = fbno + flen; - /* * Give up if the freespace isn't long enough for the minimum request. */ - if (fend < minend) - goto not_found; - + if (fend < minend) { + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + args->agbno = NULLAGBLOCK; + return 0; + } /* * End of extent will be smaller of the freespace end and the * maximal requested end. - * - * Fix the length according to mod and prod if given. */ end = XFS_AGBLOCK_MIN(fend, maxend); + /* + * Fix the length according to mod and prod if given. + */ args->len = end - args->agbno; xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) - goto not_found; - + if (!xfs_alloc_fix_minleft(args)) { + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + return 0; + } rlen = args->len; ASSERT(args->agbno + rlen <= fend); end = args->agbno + rlen; - /* * We are allocating agbno for rlen [agbno .. end] * Allocate/initialize a cursor for the by-size btree. @@ -637,25 +640,16 @@ xfs_alloc_ag_vextent_exact( args->agno, XFS_BTNUM_CNT); ASSERT(args->agbno + args->len <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); - error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, - args->len, XFSA_FIXUP_BNO_OK); - if (error) { + if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, + args->agbno, args->len, XFSA_FIXUP_BNO_OK))) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); goto error0; } - xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - args->wasfromfl = 0; trace_xfs_alloc_exact_done(args); - return 0; - -not_found: - /* Didn't find it, return null. */ - xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); - args->agbno = NULLAGBLOCK; - trace_xfs_alloc_exact_notfound(args); + args->wasfromfl = 0; return 0; error0: @@ -664,95 +658,6 @@ xfs_alloc_ag_vextent_exact( return error; } -/* - * Search the btree in a given direction via the search cursor and compare - * the records found against the good extent we've already found. - */ -STATIC int -xfs_alloc_find_best_extent( - struct xfs_alloc_arg *args, /* allocation argument structure */ - struct xfs_btree_cur **gcur, /* good cursor */ - struct xfs_btree_cur **scur, /* searching cursor */ - xfs_agblock_t gdiff, /* difference for search comparison */ - xfs_agblock_t *sbno, /* extent found by search */ - xfs_extlen_t *slen, - xfs_extlen_t *slena, /* aligned length */ - int dir) /* 0 = search right, 1 = search left */ -{ - xfs_agblock_t bno; - xfs_agblock_t new; - xfs_agblock_t sdiff; - int error; - int i; - - /* The good extent is perfect, no need to search. */ - if (!gdiff) - goto out_use_good; - - /* - * Look until we find a better one, run out of space or run off the end. - */ - do { - error = xfs_alloc_get_rec(*scur, sbno, slen, &i); - if (error) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(*sbno, *slen, args->alignment, - args->minlen, &bno, slena); - - /* - * The good extent is closer than this one. - */ - if (!dir) { - if (bno >= args->agbno + gdiff) - goto out_use_good; - } else { - if (bno <= args->agbno - gdiff) - goto out_use_good; - } - - /* - * Same distance, compare length and pick the best. - */ - if (*slena >= args->minlen) { - args->len = XFS_EXTLEN_MIN(*slena, args->maxlen); - xfs_alloc_fix_len(args); - - sdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, *sbno, - *slen, &new); - - /* - * Choose closer size and invalidate other cursor. - */ - if (sdiff < gdiff) - goto out_use_search; - goto out_use_good; - } - - if (!dir) - error = xfs_btree_increment(*scur, 0, &i); - else - error = xfs_btree_decrement(*scur, 0, &i); - if (error) - goto error0; - } while (i); - -out_use_good: - xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR); - *scur = NULL; - return 0; - -out_use_search: - xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR); - *gcur = NULL; - return 0; - -error0: - /* caller invalidates cursors */ - return error; -} - /* * Allocate a variable extent near bno in the allocation group agno. * Extent's length (returned in len) will be between minlen and maxlen, @@ -1020,45 +925,203 @@ xfs_alloc_ag_vextent_near( } } } while (bno_cur_lt || bno_cur_gt); - /* * Got both cursors still active, need to find better entry. */ if (bno_cur_lt && bno_cur_gt) { + /* + * Left side is long enough, look for a right side entry. + */ if (ltlena >= args->minlen) { /* - * Left side is good, look for a right side entry. + * Fix up the length. */ args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); - ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, + rlen = args->len; + ltdiff = xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, ltlen, <new); - - error = xfs_alloc_find_best_extent(args, - &bno_cur_lt, &bno_cur_gt, - ltdiff, >bno, >len, >lena, - 0 /* search right */); - } else { - ASSERT(gtlena >= args->minlen); - /* - * Right side is good, look for a left side entry. + * Not perfect. + */ + if (ltdiff) { + /* + * Look until we find a better one, run out of + * space, or run off the end. + */ + while (bno_cur_lt && bno_cur_gt) { + if ((error = xfs_alloc_get_rec( + bno_cur_gt, >bno, + >len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_alloc_compute_aligned(gtbno, gtlen, + args->alignment, args->minlen, + >bnoa, >lena); + /* + * The left one is clearly better. + */ + if (gtbnoa >= args->agbno + ltdiff) { + xfs_btree_del_cursor( + bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + break; + } + /* + * If we reach a big enough entry, + * compare the two and pick the best. + */ + if (gtlena >= args->minlen) { + args->len = + XFS_EXTLEN_MIN(gtlena, + args->maxlen); + xfs_alloc_fix_len(args); + rlen = args->len; + gtdiff = xfs_alloc_compute_diff( + args->agbno, rlen, + args->alignment, + gtbno, gtlen, >new); + /* + * Right side is better. + */ + if (gtdiff < ltdiff) { + xfs_btree_del_cursor( + bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + } + /* + * Left side is better. + */ + else { + xfs_btree_del_cursor( + bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + } + break; + } + /* + * Fell off the right end. + */ + if ((error = xfs_btree_increment( + bno_cur_gt, 0, &i))) + goto error0; + if (!i) { + xfs_btree_del_cursor( + bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + break; + } + } + } + /* + * The left side is perfect, trash the right side. + */ + else { + xfs_btree_del_cursor(bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + } + } + /* + * It's the right side that was found first, look left. + */ + else { + /* + * Fix up the length. */ args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); xfs_alloc_fix_len(args); - gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, + rlen = args->len; + gtdiff = xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, gtbno, gtlen, >new); - - error = xfs_alloc_find_best_extent(args, - &bno_cur_gt, &bno_cur_lt, - gtdiff, <bno, <len, <lena, - 1 /* search left */); + /* + * Right side entry isn't perfect. + */ + if (gtdiff) { + /* + * Look until we find a better one, run out of + * space, or run off the end. + */ + while (bno_cur_lt && bno_cur_gt) { + if ((error = xfs_alloc_get_rec( + bno_cur_lt, <bno, + <len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_alloc_compute_aligned(ltbno, ltlen, + args->alignment, args->minlen, + <bnoa, <lena); + /* + * The right one is clearly better. + */ + if (ltbnoa <= args->agbno - gtdiff) { + xfs_btree_del_cursor( + bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + break; + } + /* + * If we reach a big enough entry, + * compare the two and pick the best. + */ + if (ltlena >= args->minlen) { + args->len = XFS_EXTLEN_MIN( + ltlena, args->maxlen); + xfs_alloc_fix_len(args); + rlen = args->len; + ltdiff = xfs_alloc_compute_diff( + args->agbno, rlen, + args->alignment, + ltbno, ltlen, <new); + /* + * Left side is better. + */ + if (ltdiff < gtdiff) { + xfs_btree_del_cursor( + bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + } + /* + * Right side is better. + */ + else { + xfs_btree_del_cursor( + bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + } + break; + } + /* + * Fell off the left end. + */ + if ((error = xfs_btree_decrement( + bno_cur_lt, 0, &i))) + goto error0; + if (!i) { + xfs_btree_del_cursor(bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + break; + } + } + } + /* + * The right side is perfect, trash the left side. + */ + else { + xfs_btree_del_cursor(bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + } } - - if (error) - goto error0; } - /* * If we couldn't get anything, give up. */ @@ -1067,7 +1130,6 @@ xfs_alloc_ag_vextent_near( args->agbno = NULLAGBLOCK; return 0; } - /* * At this point we have selected a freespace entry, either to the * left or to the right. If it's on the right, copy all the @@ -1084,7 +1146,6 @@ xfs_alloc_ag_vextent_near( j = 1; } else j = 0; - /* * Fix up the length and compute the useful address. */ diff --git a/trunk/fs/xfs/xfs_attr_leaf.c b/trunk/fs/xfs/xfs_attr_leaf.c index 71e90dc2aeb1..a6cff8edcdb6 100644 --- a/trunk/fs/xfs/xfs_attr_leaf.c +++ b/trunk/fs/xfs/xfs_attr_leaf.c @@ -637,7 +637,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) * It didn't all fit, so we have to sort everything on hashval. */ sbsize = sf->hdr.count * sizeof(*sbuf); - sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS); + sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP); /* * Scan the attribute list for the rest of the entries, storing @@ -2386,7 +2386,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) args.dp = context->dp; args.whichfork = XFS_ATTR_FORK; args.valuelen = valuelen; - args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS); + args.value = kmem_alloc(valuelen, KM_SLEEP); args.rmtblkno = be32_to_cpu(name_rmt->valueblk); args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen); retval = xfs_attr_rmtval_get(&args); diff --git a/trunk/fs/xfs/xfs_btree.c b/trunk/fs/xfs/xfs_btree.c index 2f9e97c128a0..04f9cca8da7e 100644 --- a/trunk/fs/xfs/xfs_btree.c +++ b/trunk/fs/xfs/xfs_btree.c @@ -634,8 +634,9 @@ xfs_btree_read_bufl( return error; } ASSERT(!bp || !XFS_BUF_GETERROR(bp)); - if (bp) + if (bp != NULL) { XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); + } *bpp = bp; return 0; } @@ -943,13 +944,13 @@ xfs_btree_set_refs( switch (cur->bc_btnum) { case XFS_BTNUM_BNO: case XFS_BTNUM_CNT: - XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF); + XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF); break; case XFS_BTNUM_INO: - XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF); + XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF); break; case XFS_BTNUM_BMAP: - XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF); + XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF); break; default: ASSERT(0); diff --git a/trunk/fs/xfs/xfs_buf_item.c b/trunk/fs/xfs/xfs_buf_item.c index ed2b65f3f8b9..2686d0d54c5b 100644 --- a/trunk/fs/xfs/xfs_buf_item.c +++ b/trunk/fs/xfs/xfs_buf_item.c @@ -142,7 +142,7 @@ xfs_buf_item_log_check( #endif STATIC void xfs_buf_error_relse(xfs_buf_t *bp); -STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); +STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip); /* * This returns the number of log iovecs needed to log the @@ -450,7 +450,7 @@ xfs_buf_item_unpin( * xfs_trans_ail_delete() drops the AIL lock. */ if (bip->bli_flags & XFS_BLI_STALE_INODE) { - xfs_buf_do_callbacks(bp); + xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); } else { @@ -918,26 +918,15 @@ xfs_buf_attach_iodone( XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); } -/* - * We can have many callbacks on a buffer. Running the callbacks individually - * can cause a lot of contention on the AIL lock, so we allow for a single - * callback to be able to scan the remaining lip->li_bio_list for other items - * of the same type and callback to be processed in the first call. - * - * As a result, the loop walking the callback list below will also modify the - * list. it removes the first item from the list and then runs the callback. - * The loop then restarts from the new head of the list. This allows the - * callback to scan and modify the list attached to the buffer and we don't - * have to care about maintaining a next item pointer. - */ STATIC void xfs_buf_do_callbacks( - struct xfs_buf *bp) + xfs_buf_t *bp, + xfs_log_item_t *lip) { - struct xfs_log_item *lip; + xfs_log_item_t *nlip; - while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) { - XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list); + while (lip != NULL) { + nlip = lip->li_bio_list; ASSERT(lip->li_cb != NULL); /* * Clear the next pointer so we don't have any @@ -947,6 +936,7 @@ xfs_buf_do_callbacks( */ lip->li_bio_list = NULL; lip->li_cb(bp, lip); + lip = nlip; } } @@ -980,7 +970,7 @@ xfs_buf_iodone_callbacks( ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); XFS_BUF_SUPER_STALE(bp); trace_xfs_buf_item_iodone(bp, _RET_IP_); - xfs_buf_do_callbacks(bp); + xfs_buf_do_callbacks(bp, lip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); xfs_buf_ioend(bp, 0); @@ -1039,7 +1029,7 @@ xfs_buf_iodone_callbacks( return; } - xfs_buf_do_callbacks(bp); + xfs_buf_do_callbacks(bp, lip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); xfs_buf_ioend(bp, 0); @@ -1073,7 +1063,7 @@ xfs_buf_error_relse( * We have to unpin the pinned buffers so do the * callbacks. */ - xfs_buf_do_callbacks(bp); + xfs_buf_do_callbacks(bp, lip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); XFS_BUF_SET_BRELSE_FUNC(bp,NULL); diff --git a/trunk/fs/xfs/xfs_buf_item.h b/trunk/fs/xfs/xfs_buf_item.h index b6ecd2061e7c..0e2ed43f16c7 100644 --- a/trunk/fs/xfs/xfs_buf_item.h +++ b/trunk/fs/xfs/xfs_buf_item.h @@ -105,6 +105,17 @@ typedef struct xfs_buf_log_item { xfs_buf_log_format_t bli_format; /* in-log header */ } xfs_buf_log_item_t; +/* + * This structure is used during recovery to record the buf log + * items which have been canceled and should not be replayed. + */ +typedef struct xfs_buf_cancel { + xfs_daddr_t bc_blkno; + uint bc_len; + int bc_refcount; + struct xfs_buf_cancel *bc_next; +} xfs_buf_cancel_t; + void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); void xfs_buf_item_relse(struct xfs_buf *); void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); diff --git a/trunk/fs/xfs/xfs_extfree_item.c b/trunk/fs/xfs/xfs_extfree_item.c index 75f2ef60e579..a55e687bf562 100644 --- a/trunk/fs/xfs/xfs_extfree_item.c +++ b/trunk/fs/xfs/xfs_extfree_item.c @@ -47,28 +47,6 @@ xfs_efi_item_free( kmem_zone_free(xfs_efi_zone, efip); } -/* - * Freeing the efi requires that we remove it from the AIL if it has already - * been placed there. However, the EFI may not yet have been placed in the AIL - * when called by xfs_efi_release() from EFD processing due to the ordering of - * committed vs unpin operations in bulk insert operations. Hence the - * test_and_clear_bit(XFS_EFI_COMMITTED) to ensure only the last caller frees - * the EFI. - */ -STATIC void -__xfs_efi_release( - struct xfs_efi_log_item *efip) -{ - struct xfs_ail *ailp = efip->efi_item.li_ailp; - - if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) { - spin_lock(&ailp->xa_lock); - /* xfs_trans_ail_delete() drops the AIL lock. */ - xfs_trans_ail_delete(ailp, &efip->efi_item); - xfs_efi_item_free(efip); - } -} - /* * This returns the number of iovecs needed to log the given efi item. * We only need 1 iovec for an efi item. It just logs the efi_log_format @@ -96,8 +74,7 @@ xfs_efi_item_format( struct xfs_efi_log_item *efip = EFI_ITEM(lip); uint size; - ASSERT(atomic_read(&efip->efi_next_extent) == - efip->efi_format.efi_nextents); + ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents); efip->efi_format.efi_type = XFS_LI_EFI; @@ -122,12 +99,10 @@ xfs_efi_item_pin( } /* - * While EFIs cannot really be pinned, the unpin operation is the last place at - * which the EFI is manipulated during a transaction. If we are being asked to - * remove the EFI it's because the transaction has been cancelled and by - * definition that means the EFI cannot be in the AIL so remove it from the - * transaction and free it. Otherwise coordinate with xfs_efi_release() (via - * XFS_EFI_COMMITTED) to determine who gets to free the EFI. + * While EFIs cannot really be pinned, the unpin operation is the + * last place at which the EFI is manipulated during a transaction. + * Here we coordinate with xfs_efi_cancel() to determine who gets to + * free the EFI. */ STATIC void xfs_efi_item_unpin( @@ -135,14 +110,20 @@ xfs_efi_item_unpin( int remove) { struct xfs_efi_log_item *efip = EFI_ITEM(lip); + struct xfs_ail *ailp = lip->li_ailp; + + spin_lock(&ailp->xa_lock); + if (efip->efi_flags & XFS_EFI_CANCELED) { + if (remove) + xfs_trans_del_item(lip); - if (remove) { - ASSERT(!(lip->li_flags & XFS_LI_IN_AIL)); - xfs_trans_del_item(lip); + /* xfs_trans_ail_delete() drops the AIL lock. */ + xfs_trans_ail_delete(ailp, lip); xfs_efi_item_free(efip); - return; + } else { + efip->efi_flags |= XFS_EFI_COMMITTED; + spin_unlock(&ailp->xa_lock); } - __xfs_efi_release(efip); } /* @@ -171,20 +152,16 @@ xfs_efi_item_unlock( } /* - * The EFI is logged only once and cannot be moved in the log, so simply return - * the lsn at which it's been logged. For bulk transaction committed - * processing, the EFI may be processed but not yet unpinned prior to the EFD - * being processed. Set the XFS_EFI_COMMITTED flag so this case can be detected - * when processing the EFD. + * The EFI is logged only once and cannot be moved in the log, so + * simply return the lsn at which it's been logged. The canceled + * flag is not paid any attention here. Checking for that is delayed + * until the EFI is unpinned. */ STATIC xfs_lsn_t xfs_efi_item_committed( struct xfs_log_item *lip, xfs_lsn_t lsn) { - struct xfs_efi_log_item *efip = EFI_ITEM(lip); - - set_bit(XFS_EFI_COMMITTED, &efip->efi_flags); return lsn; } @@ -253,7 +230,6 @@ xfs_efi_init( xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); efip->efi_format.efi_nextents = nextents; efip->efi_format.efi_id = (__psint_t)(void*)efip; - atomic_set(&efip->efi_next_extent, 0); return efip; } @@ -313,18 +289,37 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) } /* - * This is called by the efd item code below to release references to the given - * efi item. Each efd calls this with the number of extents that it has - * logged, and when the sum of these reaches the total number of extents logged - * by this efi item we can free the efi item. + * This is called by the efd item code below to release references to + * the given efi item. Each efd calls this with the number of + * extents that it has logged, and when the sum of these reaches + * the total number of extents logged by this efi item we can free + * the efi item. + * + * Freeing the efi item requires that we remove it from the AIL. + * We'll use the AIL lock to protect our counters as well as + * the removal from the AIL. */ void xfs_efi_release(xfs_efi_log_item_t *efip, uint nextents) { - ASSERT(atomic_read(&efip->efi_next_extent) >= nextents); - if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) - __xfs_efi_release(efip); + struct xfs_ail *ailp = efip->efi_item.li_ailp; + int extents_left; + + ASSERT(efip->efi_next_extent > 0); + ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); + + spin_lock(&ailp->xa_lock); + ASSERT(efip->efi_next_extent >= nextents); + efip->efi_next_extent -= nextents; + extents_left = efip->efi_next_extent; + if (extents_left == 0) { + /* xfs_trans_ail_delete() drops the AIL lock. */ + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip); + xfs_efi_item_free(efip); + } else { + spin_unlock(&ailp->xa_lock); + } } static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) diff --git a/trunk/fs/xfs/xfs_extfree_item.h b/trunk/fs/xfs/xfs_extfree_item.h index 375f68e42531..0d22c56fdf64 100644 --- a/trunk/fs/xfs/xfs_extfree_item.h +++ b/trunk/fs/xfs/xfs_extfree_item.h @@ -111,10 +111,11 @@ typedef struct xfs_efd_log_format_64 { #define XFS_EFI_MAX_FAST_EXTENTS 16 /* - * Define EFI flag bits. Manipulated by set/clear/test_bit operators. + * Define EFI flags. */ -#define XFS_EFI_RECOVERED 1 -#define XFS_EFI_COMMITTED 2 +#define XFS_EFI_RECOVERED 0x1 +#define XFS_EFI_COMMITTED 0x2 +#define XFS_EFI_CANCELED 0x4 /* * This is the "extent free intention" log item. It is used @@ -124,8 +125,8 @@ typedef struct xfs_efd_log_format_64 { */ typedef struct xfs_efi_log_item { xfs_log_item_t efi_item; - atomic_t efi_next_extent; - unsigned long efi_flags; /* misc flags */ + uint efi_flags; /* misc flags */ + uint efi_next_extent; xfs_efi_log_format_t efi_format; } xfs_efi_log_item_t; diff --git a/trunk/fs/xfs/xfs_fsops.c b/trunk/fs/xfs/xfs_fsops.c index f56d30e8040c..a7c116e814af 100644 --- a/trunk/fs/xfs/xfs_fsops.c +++ b/trunk/fs/xfs/xfs_fsops.c @@ -374,7 +374,6 @@ xfs_growfs_data_private( mp->m_maxicount = icount << mp->m_sb.sb_inopblog; } else mp->m_maxicount = 0; - xfs_set_low_space_thresholds(mp); /* update secondary superblocks. */ for (agno = 1; agno < nagcount; agno++) { diff --git a/trunk/fs/xfs/xfs_iget.c b/trunk/fs/xfs/xfs_iget.c index cb9b6d1469f7..d7de5a3f7867 100644 --- a/trunk/fs/xfs/xfs_iget.c +++ b/trunk/fs/xfs/xfs_iget.c @@ -42,17 +42,6 @@ #include "xfs_trace.h" -/* - * Define xfs inode iolock lockdep classes. We need to ensure that all active - * inodes are considered the same for lockdep purposes, including inodes that - * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to - * guarantee the locks are considered the same when there are multiple lock - * initialisation siteÑ•. Also, define a reclaimable inode class so it is - * obvious in lockdep reports which class the report is against. - */ -static struct lock_class_key xfs_iolock_active; -struct lock_class_key xfs_iolock_reclaimable; - /* * Allocate and initialise an xfs_inode. */ @@ -80,11 +69,8 @@ xfs_inode_alloc( ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); - ASSERT(ip->i_ino == 0); mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); - lockdep_set_class_and_name(&ip->i_iolock.mr_lock, - &xfs_iolock_active, "xfs_iolock_active"); /* initialise the xfs inode */ ip->i_ino = ino; @@ -99,6 +85,9 @@ xfs_inode_alloc( ip->i_size = 0; ip->i_new_size = 0; + /* prevent anyone from using this yet */ + VFS_I(ip)->i_state = I_NEW; + return ip; } @@ -156,18 +145,7 @@ xfs_inode_free( ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); - /* - * Because we use RCU freeing we need to ensure the inode always - * appears to be reclaimed with an invalid inode number when in the - * free state. The ip->i_flags_lock provides the barrier against lookup - * races. - */ - spin_lock(&ip->i_flags_lock); - ip->i_flags = XFS_IRECLAIM; - ip->i_ino = 0; - spin_unlock(&ip->i_flags_lock); - - call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); + call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback); } /* @@ -177,29 +155,14 @@ static int xfs_iget_cache_hit( struct xfs_perag *pag, struct xfs_inode *ip, - xfs_ino_t ino, int flags, - int lock_flags) __releases(RCU) + int lock_flags) __releases(pag->pag_ici_lock) { struct inode *inode = VFS_I(ip); struct xfs_mount *mp = ip->i_mount; int error; - /* - * check for re-use of an inode within an RCU grace period due to the - * radix tree nodes not being updated yet. We monitor for this by - * setting the inode number to zero before freeing the inode structure. - * If the inode has been reallocated and set up, then the inode number - * will not match, so check for that, too. - */ spin_lock(&ip->i_flags_lock); - if (ip->i_ino != ino) { - trace_xfs_iget_skip(ip); - XFS_STATS_INC(xs_ig_frecycle); - error = EAGAIN; - goto out_error; - } - /* * If we are racing with another cache hit that is currently @@ -242,7 +205,7 @@ xfs_iget_cache_hit( ip->i_flags |= XFS_IRECLAIM; spin_unlock(&ip->i_flags_lock); - rcu_read_unlock(); + read_unlock(&pag->pag_ici_lock); error = -inode_init_always(mp->m_super, inode); if (error) { @@ -250,7 +213,7 @@ xfs_iget_cache_hit( * Re-initializing the inode failed, and we are in deep * trouble. Try to re-add it to the reclaim list. */ - rcu_read_lock(); + read_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); ip->i_flags &= ~XFS_INEW; @@ -260,20 +223,14 @@ xfs_iget_cache_hit( goto out_error; } - spin_lock(&pag->pag_ici_lock); + write_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); ip->i_flags |= XFS_INEW; __xfs_inode_clear_reclaim_tag(mp, pag, ip); inode->i_state = I_NEW; - - ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); - mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); - lockdep_set_class_and_name(&ip->i_iolock.mr_lock, - &xfs_iolock_active, "xfs_iolock_active"); - spin_unlock(&ip->i_flags_lock); - spin_unlock(&pag->pag_ici_lock); + write_unlock(&pag->pag_ici_lock); } else { /* If the VFS inode is being torn down, pause and try again. */ if (!igrab(inode)) { @@ -284,7 +241,7 @@ xfs_iget_cache_hit( /* We've got a live one. */ spin_unlock(&ip->i_flags_lock); - rcu_read_unlock(); + read_unlock(&pag->pag_ici_lock); trace_xfs_iget_hit(ip); } @@ -298,7 +255,7 @@ xfs_iget_cache_hit( out_error: spin_unlock(&ip->i_flags_lock); - rcu_read_unlock(); + read_unlock(&pag->pag_ici_lock); return error; } @@ -351,7 +308,7 @@ xfs_iget_cache_miss( BUG(); } - spin_lock(&pag->pag_ici_lock); + write_lock(&pag->pag_ici_lock); /* insert the new inode */ error = radix_tree_insert(&pag->pag_ici_root, agino, ip); @@ -366,14 +323,14 @@ xfs_iget_cache_miss( ip->i_udquot = ip->i_gdquot = NULL; xfs_iflags_set(ip, XFS_INEW); - spin_unlock(&pag->pag_ici_lock); + write_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); *ipp = ip; return 0; out_preload_end: - spin_unlock(&pag->pag_ici_lock); + write_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); if (lock_flags) xfs_iunlock(ip, lock_flags); @@ -420,7 +377,7 @@ xfs_iget( xfs_agino_t agino; /* reject inode numbers outside existing AGs */ - if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) + if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) return EINVAL; /* get the perag structure and ensure that it's inode capable */ @@ -429,15 +386,15 @@ xfs_iget( again: error = 0; - rcu_read_lock(); + read_lock(&pag->pag_ici_lock); ip = radix_tree_lookup(&pag->pag_ici_root, agino); if (ip) { - error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); + error = xfs_iget_cache_hit(pag, ip, flags, lock_flags); if (error) goto out_error_or_again; } else { - rcu_read_unlock(); + read_unlock(&pag->pag_ici_lock); XFS_STATS_INC(xs_ig_missed); error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, diff --git a/trunk/fs/xfs/xfs_inode.c b/trunk/fs/xfs/xfs_inode.c index be7cf625421f..108c7a085f94 100644 --- a/trunk/fs/xfs/xfs_inode.c +++ b/trunk/fs/xfs/xfs_inode.c @@ -887,7 +887,7 @@ xfs_iread( * around for a while. This helps to keep recently accessed * meta-data in-core longer. */ - xfs_buf_set_ref(bp, XFS_INO_REF); + XFS_BUF_SET_REF(bp, XFS_INO_REF); /* * Use xfs_trans_brelse() to release the buffer containing the @@ -2000,32 +2000,16 @@ xfs_ifree_cluster( */ for (i = 0; i < ninodes; i++) { retry: - rcu_read_lock(); + read_lock(&pag->pag_ici_lock); ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, (inum + i))); - /* Inode not in memory, nothing to do */ - if (!ip) { - rcu_read_unlock(); + /* Inode not in memory or stale, nothing to do */ + if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { + read_unlock(&pag->pag_ici_lock); continue; } - /* - * because this is an RCU protected lookup, we could - * find a recently freed or even reallocated inode - * during the lookup. We need to check under the - * i_flags_lock for a valid inode here. Skip it if it - * is not valid, the wrong inode or stale. - */ - spin_lock(&ip->i_flags_lock); - if (ip->i_ino != inum + i || - __xfs_iflags_test(ip, XFS_ISTALE)) { - spin_unlock(&ip->i_flags_lock); - rcu_read_unlock(); - continue; - } - spin_unlock(&ip->i_flags_lock); - /* * Don't try to lock/unlock the current inode, but we * _cannot_ skip the other inodes that we did not find @@ -2035,11 +2019,11 @@ xfs_ifree_cluster( */ if (ip != free_ip && !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { - rcu_read_unlock(); + read_unlock(&pag->pag_ici_lock); delay(1); goto retry; } - rcu_read_unlock(); + read_unlock(&pag->pag_ici_lock); xfs_iflock(ip); xfs_iflags_set(ip, XFS_ISTALE); @@ -2645,7 +2629,7 @@ xfs_iflush_cluster( mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; - rcu_read_lock(); + read_lock(&pag->pag_ici_lock); /* really need a gang lookup range call here */ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, first_index, inodes_per_cluster); @@ -2656,21 +2640,9 @@ xfs_iflush_cluster( iq = ilist[i]; if (iq == ip) continue; - - /* - * because this is an RCU protected lookup, we could find a - * recently freed or even reallocated inode during the lookup. - * We need to check under the i_flags_lock for a valid inode - * here. Skip it if it is not valid or the wrong inode. - */ - spin_lock(&ip->i_flags_lock); - if (!ip->i_ino || - (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { - spin_unlock(&ip->i_flags_lock); - continue; - } - spin_unlock(&ip->i_flags_lock); - + /* if the inode lies outside this cluster, we're done. */ + if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) + break; /* * Do an un-protected check to see if the inode is dirty and * is a candidate for flushing. These checks will be repeated @@ -2720,7 +2692,7 @@ xfs_iflush_cluster( } out_free: - rcu_read_unlock(); + read_unlock(&pag->pag_ici_lock); kmem_free(ilist); out_put: xfs_perag_put(pag); @@ -2732,7 +2704,7 @@ xfs_iflush_cluster( * Corruption detected in the clustering loop. Invalidate the * inode buffer and shut down the filesystem. */ - rcu_read_unlock(); + read_unlock(&pag->pag_ici_lock); /* * Clean up the buffer. If it was B_DELWRI, just release it -- * brelse can handle it with no problems. If not, shut down the diff --git a/trunk/fs/xfs/xfs_inode.h b/trunk/fs/xfs/xfs_inode.h index 5c95fa8ec11d..fb2ca2e4cdc9 100644 --- a/trunk/fs/xfs/xfs_inode.h +++ b/trunk/fs/xfs/xfs_inode.h @@ -376,13 +376,12 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) /* * In-core inode flags. */ -#define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */ -#define XFS_ISTALE 0x0002 /* inode has been staled */ -#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ -#define XFS_INEW 0x0008 /* inode has just been allocated */ -#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ -#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ -#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */ +#define XFS_IRECLAIM 0x0001 /* we have started reclaiming this inode */ +#define XFS_ISTALE 0x0002 /* inode has been staled */ +#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ +#define XFS_INEW 0x0008 /* inode has just been allocated */ +#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ +#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ /* * Flags for inode locking. @@ -439,8 +438,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) -extern struct lock_class_key xfs_iolock_reclaimable; - /* * Flags for xfs_itruncate_start(). */ diff --git a/trunk/fs/xfs/xfs_inode_item.c b/trunk/fs/xfs/xfs_inode_item.c index fd4f398bd6f1..7c8d30c453c3 100644 --- a/trunk/fs/xfs/xfs_inode_item.c +++ b/trunk/fs/xfs/xfs_inode_item.c @@ -842,64 +842,15 @@ xfs_inode_item_destroy( * flushed to disk. It is responsible for removing the inode item * from the AIL if it has not been re-logged, and unlocking the inode's * flush lock. - * - * To reduce AIL lock traffic as much as possible, we scan the buffer log item - * list for other inodes that will run this function. We remove them from the - * buffer list so we can process all the inode IO completions in one AIL lock - * traversal. */ void xfs_iflush_done( struct xfs_buf *bp, struct xfs_log_item *lip) { - struct xfs_inode_log_item *iip; - struct xfs_log_item *blip; - struct xfs_log_item *next; - struct xfs_log_item *prev; + struct xfs_inode_log_item *iip = INODE_ITEM(lip); + xfs_inode_t *ip = iip->ili_inode; struct xfs_ail *ailp = lip->li_ailp; - int need_ail = 0; - - /* - * Scan the buffer IO completions for other inodes being completed and - * attach them to the current inode log item. - */ - blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); - prev = NULL; - while (blip != NULL) { - if (lip->li_cb != xfs_iflush_done) { - prev = blip; - blip = blip->li_bio_list; - continue; - } - - /* remove from list */ - next = blip->li_bio_list; - if (!prev) { - XFS_BUF_SET_FSPRIVATE(bp, next); - } else { - prev->li_bio_list = next; - } - - /* add to current list */ - blip->li_bio_list = lip->li_bio_list; - lip->li_bio_list = blip; - - /* - * while we have the item, do the unlocked check for needing - * the AIL lock. - */ - iip = INODE_ITEM(blip); - if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) - need_ail++; - - blip = next; - } - - /* make sure we capture the state of the initial inode. */ - iip = INODE_ITEM(lip); - if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) - need_ail++; /* * We only want to pull the item from the AIL if it is @@ -910,37 +861,28 @@ xfs_iflush_done( * the lock since it's cheaper, and then we recheck while * holding the lock before removing the inode from the AIL. */ - if (need_ail) { - struct xfs_log_item *log_items[need_ail]; - int i = 0; + if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) { spin_lock(&ailp->xa_lock); - for (blip = lip; blip; blip = blip->li_bio_list) { - iip = INODE_ITEM(blip); - if (iip->ili_logged && - blip->li_lsn == iip->ili_flush_lsn) { - log_items[i++] = blip; - } - ASSERT(i <= need_ail); + if (lip->li_lsn == iip->ili_flush_lsn) { + /* xfs_trans_ail_delete() drops the AIL lock. */ + xfs_trans_ail_delete(ailp, lip); + } else { + spin_unlock(&ailp->xa_lock); } - /* xfs_trans_ail_delete_bulk() drops the AIL lock. */ - xfs_trans_ail_delete_bulk(ailp, log_items, i); } + iip->ili_logged = 0; /* - * clean up and unlock the flush lock now we are done. We can clear the - * ili_last_fields bits now that we know that the data corresponding to - * them is safely on disk. + * Clear the ili_last_fields bits now that we know that the + * data corresponding to them is safely on disk. */ - for (blip = lip; blip; blip = next) { - next = blip->li_bio_list; - blip->li_bio_list = NULL; + iip->ili_last_fields = 0; - iip = INODE_ITEM(blip); - iip->ili_logged = 0; - iip->ili_last_fields = 0; - xfs_ifunlock(iip->ili_inode); - } + /* + * Release the inode's flush lock since we're done with it. + */ + xfs_ifunlock(ip); } /* diff --git a/trunk/fs/xfs/xfs_iomap.c b/trunk/fs/xfs/xfs_iomap.c index 55582bd66659..20576146369f 100644 --- a/trunk/fs/xfs/xfs_iomap.c +++ b/trunk/fs/xfs/xfs_iomap.c @@ -47,8 +47,127 @@ #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ << mp->m_writeio_log) +#define XFS_STRAT_WRITE_IMAPS 2 #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP +STATIC int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, + int, struct xfs_bmbt_irec *, int *); +STATIC int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int, + struct xfs_bmbt_irec *, int *); +STATIC int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t, + struct xfs_bmbt_irec *, int *); + +int +xfs_iomap( + struct xfs_inode *ip, + xfs_off_t offset, + ssize_t count, + int flags, + struct xfs_bmbt_irec *imap, + int *nimaps, + int *new) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t offset_fsb, end_fsb; + int error = 0; + int lockmode = 0; + int bmapi_flags = 0; + + ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); + + *new = 0; + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + trace_xfs_iomap_enter(ip, offset, count, flags, NULL); + + switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) { + case BMAPI_READ: + lockmode = xfs_ilock_map_shared(ip); + bmapi_flags = XFS_BMAPI_ENTIRE; + break; + case BMAPI_WRITE: + lockmode = XFS_ILOCK_EXCL; + if (flags & BMAPI_IGNSTATE) + bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; + xfs_ilock(ip, lockmode); + break; + case BMAPI_ALLOCATE: + lockmode = XFS_ILOCK_SHARED; + bmapi_flags = XFS_BMAPI_ENTIRE; + + /* Attempt non-blocking lock */ + if (flags & BMAPI_TRYLOCK) { + if (!xfs_ilock_nowait(ip, lockmode)) + return XFS_ERROR(EAGAIN); + } else { + xfs_ilock(ip, lockmode); + } + break; + default: + BUG(); + } + + ASSERT(offset <= mp->m_maxioffset); + if ((xfs_fsize_t)offset + count > mp->m_maxioffset) + count = mp->m_maxioffset - offset; + end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); + offset_fsb = XFS_B_TO_FSBT(mp, offset); + + error = xfs_bmapi(NULL, ip, offset_fsb, + (xfs_filblks_t)(end_fsb - offset_fsb), + bmapi_flags, NULL, 0, imap, + nimaps, NULL); + + if (error) + goto out; + + switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) { + case BMAPI_WRITE: + /* If we found an extent, return it */ + if (*nimaps && + (imap->br_startblock != HOLESTARTBLOCK) && + (imap->br_startblock != DELAYSTARTBLOCK)) { + trace_xfs_iomap_found(ip, offset, count, flags, imap); + break; + } + + if (flags & BMAPI_DIRECT) { + error = xfs_iomap_write_direct(ip, offset, count, flags, + imap, nimaps); + } else { + error = xfs_iomap_write_delay(ip, offset, count, flags, + imap, nimaps); + } + if (!error) { + trace_xfs_iomap_alloc(ip, offset, count, flags, imap); + } + *new = 1; + break; + case BMAPI_ALLOCATE: + /* If we found an extent, return it */ + xfs_iunlock(ip, lockmode); + lockmode = 0; + + if (*nimaps && !isnullstartblock(imap->br_startblock)) { + trace_xfs_iomap_found(ip, offset, count, flags, imap); + break; + } + + error = xfs_iomap_write_allocate(ip, offset, count, + imap, nimaps); + break; + } + + ASSERT(*nimaps <= 1); + +out: + if (lockmode) + xfs_iunlock(ip, lockmode); + return XFS_ERROR(error); +} + STATIC int xfs_iomap_eof_align_last_fsb( xfs_mount_t *mp, @@ -117,13 +236,14 @@ xfs_cmn_err_fsblock_zero( return EFSCORRUPTED; } -int +STATIC int xfs_iomap_write_direct( xfs_inode_t *ip, xfs_off_t offset, size_t count, + int flags, xfs_bmbt_irec_t *imap, - int nmaps) + int *nmaps) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb; @@ -159,7 +279,7 @@ xfs_iomap_write_direct( if (error) goto error_out; } else { - if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) + if (*nmaps && (imap->br_startblock == HOLESTARTBLOCK)) last_fsb = MIN(last_fsb, (xfs_fileoff_t) imap->br_blockcount + imap->br_startoff); @@ -211,7 +331,7 @@ xfs_iomap_write_direct( xfs_trans_ijoin(tp, ip); bmapi_flag = XFS_BMAPI_WRITE; - if (offset < ip->i_size || extsz) + if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz)) bmapi_flag |= XFS_BMAPI_PREALLOC; /* @@ -250,6 +370,7 @@ xfs_iomap_write_direct( goto error_out; } + *nmaps = 1; return 0; error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ @@ -258,6 +379,7 @@ xfs_iomap_write_direct( error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); + *nmaps = 0; /* nothing set-up here */ error_out: return XFS_ERROR(error); @@ -267,9 +389,6 @@ xfs_iomap_write_direct( * If the caller is doing a write at the end of the file, then extend the * allocation out to the file system's write iosize. We clean up any extra * space left over when the file is closed in xfs_inactive(). - * - * If we find we already have delalloc preallocation beyond EOF, don't do more - * preallocation as it it not needed. */ STATIC int xfs_iomap_eof_want_preallocate( @@ -277,6 +396,7 @@ xfs_iomap_eof_want_preallocate( xfs_inode_t *ip, xfs_off_t offset, size_t count, + int ioflag, xfs_bmbt_irec_t *imap, int nimaps, int *prealloc) @@ -285,7 +405,6 @@ xfs_iomap_eof_want_preallocate( xfs_filblks_t count_fsb; xfs_fsblock_t firstblock; int n, error, imaps; - int found_delalloc = 0; *prealloc = 0; if ((offset + count) <= ip->i_size) @@ -310,66 +429,20 @@ xfs_iomap_eof_want_preallocate( return 0; start_fsb += imap[n].br_blockcount; count_fsb -= imap[n].br_blockcount; - - if (imap[n].br_startblock == DELAYSTARTBLOCK) - found_delalloc = 1; } } - if (!found_delalloc) - *prealloc = 1; + *prealloc = 1; return 0; } -/* - * If we don't have a user specified preallocation size, dynamically increase - * the preallocation size as the size of the file grows. Cap the maximum size - * at a single extent or less if the filesystem is near full. The closer the - * filesystem is to full, the smaller the maximum prealocation. - */ -STATIC xfs_fsblock_t -xfs_iomap_prealloc_size( - struct xfs_mount *mp, - struct xfs_inode *ip) -{ - xfs_fsblock_t alloc_blocks = 0; - - if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { - int shift = 0; - int64_t freesp; - - alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size); - alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, - rounddown_pow_of_two(alloc_blocks)); - - xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); - freesp = mp->m_sb.sb_fdblocks; - if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { - shift = 2; - if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) - shift++; - if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT]) - shift++; - if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT]) - shift++; - if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT]) - shift++; - } - if (shift) - alloc_blocks >>= shift; - } - - if (alloc_blocks < mp->m_writeio_blocks) - alloc_blocks = mp->m_writeio_blocks; - - return alloc_blocks; -} - -int +STATIC int xfs_iomap_write_delay( xfs_inode_t *ip, xfs_off_t offset, size_t count, - xfs_bmbt_irec_t *ret_imap) + int ioflag, + xfs_bmbt_irec_t *ret_imap, + int *nmaps) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb; @@ -396,19 +469,16 @@ xfs_iomap_write_delay( extsz = xfs_get_extsz_hint(ip); offset_fsb = XFS_B_TO_FSBT(mp, offset); - error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, - imap, XFS_WRITE_IMAPS, &prealloc); + ioflag, imap, XFS_WRITE_IMAPS, &prealloc); if (error) return error; retry: if (prealloc) { - xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip); - aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); ioalign = XFS_B_TO_FSBT(mp, aligned_offset); - last_fsb = ioalign + alloc_blocks; + last_fsb = ioalign + mp->m_writeio_blocks; } else { last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); } @@ -426,31 +496,22 @@ xfs_iomap_write_delay( XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, &nimaps, NULL); - switch (error) { - case 0: - case ENOSPC: - case EDQUOT: - break; - default: + if (error && (error != ENOSPC)) return XFS_ERROR(error); - } /* - * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. For - * ENOSPC, * flush all other inodes with delalloc blocks to free up - * some of the excess reserved metadata space. For both cases, retry - * without EOF preallocation. + * If bmapi returned us nothing, and if we didn't get back EDQUOT, + * then we must have run out of space - flush all other inodes with + * delalloc blocks and retry without EOF preallocation. */ if (nimaps == 0) { trace_xfs_delalloc_enospc(ip, offset, count); if (flushed) - return XFS_ERROR(error ? error : ENOSPC); + return XFS_ERROR(ENOSPC); - if (error == ENOSPC) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_flush_inodes(ip); - xfs_ilock(ip, XFS_ILOCK_EXCL); - } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_flush_inodes(ip); + xfs_ilock(ip, XFS_ILOCK_EXCL); flushed = 1; error = 0; @@ -462,6 +523,8 @@ xfs_iomap_write_delay( return xfs_cmn_err_fsblock_zero(ip, &imap[0]); *ret_imap = imap[0]; + *nmaps = 1; + return 0; } @@ -475,12 +538,13 @@ xfs_iomap_write_delay( * We no longer bother to look at the incoming map - all we have to * guarantee is that whatever we allocate fills the required range. */ -int +STATIC int xfs_iomap_write_allocate( xfs_inode_t *ip, xfs_off_t offset, size_t count, - xfs_bmbt_irec_t *imap) + xfs_bmbt_irec_t *imap, + int *retmap) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb, last_block; @@ -493,6 +557,8 @@ xfs_iomap_write_allocate( int error = 0; int nres; + *retmap = 0; + /* * Make sure that the dquots are there. */ @@ -614,6 +680,7 @@ xfs_iomap_write_allocate( if ((offset_fsb >= imap->br_startoff) && (offset_fsb < (imap->br_startoff + imap->br_blockcount))) { + *retmap = 1; XFS_STATS_INC(xs_xstrat_quick); return 0; } diff --git a/trunk/fs/xfs/xfs_iomap.h b/trunk/fs/xfs/xfs_iomap.h index 80615760959a..7748a430f50d 100644 --- a/trunk/fs/xfs/xfs_iomap.h +++ b/trunk/fs/xfs/xfs_iomap.h @@ -18,15 +18,30 @@ #ifndef __XFS_IOMAP_H__ #define __XFS_IOMAP_H__ +/* base extent manipulation calls */ +#define BMAPI_READ (1 << 0) /* read extents */ +#define BMAPI_WRITE (1 << 1) /* create extents */ +#define BMAPI_ALLOCATE (1 << 2) /* delayed allocate to real extents */ + +/* modifiers */ +#define BMAPI_IGNSTATE (1 << 4) /* ignore unwritten state on read */ +#define BMAPI_DIRECT (1 << 5) /* direct instead of buffered write */ +#define BMAPI_MMA (1 << 6) /* allocate for mmap write */ +#define BMAPI_TRYLOCK (1 << 7) /* non-blocking request */ + +#define BMAPI_FLAGS \ + { BMAPI_READ, "READ" }, \ + { BMAPI_WRITE, "WRITE" }, \ + { BMAPI_ALLOCATE, "ALLOCATE" }, \ + { BMAPI_IGNSTATE, "IGNSTATE" }, \ + { BMAPI_DIRECT, "DIRECT" }, \ + { BMAPI_TRYLOCK, "TRYLOCK" } + struct xfs_inode; struct xfs_bmbt_irec; -extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, - struct xfs_bmbt_irec *, int); -extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, - struct xfs_bmbt_irec *); -extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t, - struct xfs_bmbt_irec *); +extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int, + struct xfs_bmbt_irec *, int *, int *); extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); #endif /* __XFS_IOMAP_H__*/ diff --git a/trunk/fs/xfs/xfs_log.c b/trunk/fs/xfs/xfs_log.c index 0bf24b11d0c4..cee4ab9f8a9e 100644 --- a/trunk/fs/xfs/xfs_log.c +++ b/trunk/fs/xfs/xfs_log.c @@ -47,7 +47,7 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, xfs_buftarg_t *log_target, xfs_daddr_t blk_offset, int num_bblks); -STATIC int xlog_space_left(struct log *log, atomic64_t *head); +STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); STATIC void xlog_dealloc_log(xlog_t *log); @@ -70,7 +70,7 @@ STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); /* local functions to manipulate grant head */ STATIC int xlog_grant_log_space(xlog_t *log, xlog_ticket_t *xtic); -STATIC void xlog_grant_push_ail(struct log *log, +STATIC void xlog_grant_push_ail(xfs_mount_t *mp, int need_bytes); STATIC void xlog_regrant_reserve_log_space(xlog_t *log, xlog_ticket_t *ticket); @@ -81,73 +81,98 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, #if defined(DEBUG) STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); -STATIC void xlog_verify_grant_tail(struct log *log); +STATIC void xlog_verify_grant_head(xlog_t *log, int equals); STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, int count, boolean_t syncing); STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, xfs_lsn_t tail_lsn); #else #define xlog_verify_dest_ptr(a,b) -#define xlog_verify_grant_tail(a) +#define xlog_verify_grant_head(a,b) #define xlog_verify_iclog(a,b,c,d) #define xlog_verify_tail_lsn(a,b,c) #endif STATIC int xlog_iclogs_empty(xlog_t *log); + static void -xlog_grant_sub_space( - struct log *log, - atomic64_t *head, - int bytes) +xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) { - int64_t head_val = atomic64_read(head); - int64_t new, old; - - do { - int cycle, space; + if (*qp) { + tic->t_next = (*qp); + tic->t_prev = (*qp)->t_prev; + (*qp)->t_prev->t_next = tic; + (*qp)->t_prev = tic; + } else { + tic->t_prev = tic->t_next = tic; + *qp = tic; + } - xlog_crack_grant_head_val(head_val, &cycle, &space); + tic->t_flags |= XLOG_TIC_IN_Q; +} - space -= bytes; - if (space < 0) { - space += log->l_logsize; - cycle--; - } +static void +xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) +{ + if (tic == tic->t_next) { + *qp = NULL; + } else { + *qp = tic->t_next; + tic->t_next->t_prev = tic->t_prev; + tic->t_prev->t_next = tic->t_next; + } - old = head_val; - new = xlog_assign_grant_head_val(cycle, space); - head_val = atomic64_cmpxchg(head, old, new); - } while (head_val != old); + tic->t_next = tic->t_prev = NULL; + tic->t_flags &= ~XLOG_TIC_IN_Q; } static void -xlog_grant_add_space( - struct log *log, - atomic64_t *head, - int bytes) +xlog_grant_sub_space(struct log *log, int bytes) { - int64_t head_val = atomic64_read(head); - int64_t new, old; + log->l_grant_write_bytes -= bytes; + if (log->l_grant_write_bytes < 0) { + log->l_grant_write_bytes += log->l_logsize; + log->l_grant_write_cycle--; + } - do { - int tmp; - int cycle, space; + log->l_grant_reserve_bytes -= bytes; + if ((log)->l_grant_reserve_bytes < 0) { + log->l_grant_reserve_bytes += log->l_logsize; + log->l_grant_reserve_cycle--; + } - xlog_crack_grant_head_val(head_val, &cycle, &space); +} - tmp = log->l_logsize - space; - if (tmp > bytes) - space += bytes; - else { - space = bytes - tmp; - cycle++; - } +static void +xlog_grant_add_space_write(struct log *log, int bytes) +{ + int tmp = log->l_logsize - log->l_grant_write_bytes; + if (tmp > bytes) + log->l_grant_write_bytes += bytes; + else { + log->l_grant_write_cycle++; + log->l_grant_write_bytes = bytes - tmp; + } +} + +static void +xlog_grant_add_space_reserve(struct log *log, int bytes) +{ + int tmp = log->l_logsize - log->l_grant_reserve_bytes; + if (tmp > bytes) + log->l_grant_reserve_bytes += bytes; + else { + log->l_grant_reserve_cycle++; + log->l_grant_reserve_bytes = bytes - tmp; + } +} - old = head_val; - new = xlog_assign_grant_head_val(cycle, space); - head_val = atomic64_cmpxchg(head, old, new); - } while (head_val != old); +static inline void +xlog_grant_add_space(struct log *log, int bytes) +{ + xlog_grant_add_space_write(log, bytes); + xlog_grant_add_space_reserve(log, bytes); } static void @@ -330,7 +355,7 @@ xfs_log_reserve( trace_xfs_log_reserve(log, internal_ticket); - xlog_grant_push_ail(log, internal_ticket->t_unit_res); + xlog_grant_push_ail(mp, internal_ticket->t_unit_res); retval = xlog_regrant_write_log_space(log, internal_ticket); } else { /* may sleep if need to allocate more tickets */ @@ -344,7 +369,7 @@ xfs_log_reserve( trace_xfs_log_reserve(log, internal_ticket); - xlog_grant_push_ail(log, + xlog_grant_push_ail(mp, (internal_ticket->t_unit_res * internal_ticket->t_cnt)); retval = xlog_grant_log_space(log, internal_ticket); @@ -559,8 +584,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (!(iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_DIRTY)) { if (!XLOG_FORCED_SHUTDOWN(log)) { - xlog_wait(&iclog->ic_force_wait, - &log->l_icloglock); + sv_wait(&iclog->ic_force_wait, PMEM, + &log->l_icloglock, s); } else { spin_unlock(&log->l_icloglock); } @@ -600,8 +625,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) || iclog->ic_state == XLOG_STATE_DIRTY || iclog->ic_state == XLOG_STATE_IOERROR) ) { - xlog_wait(&iclog->ic_force_wait, - &log->l_icloglock); + sv_wait(&iclog->ic_force_wait, PMEM, + &log->l_icloglock, s); } else { spin_unlock(&log->l_icloglock); } @@ -678,46 +703,55 @@ xfs_log_move_tail(xfs_mount_t *mp, { xlog_ticket_t *tic; xlog_t *log = mp->m_log; - int need_bytes, free_bytes; + int need_bytes, free_bytes, cycle, bytes; if (XLOG_FORCED_SHUTDOWN(log)) return; - if (tail_lsn == 0) - tail_lsn = atomic64_read(&log->l_last_sync_lsn); + if (tail_lsn == 0) { + /* needed since sync_lsn is 64 bits */ + spin_lock(&log->l_icloglock); + tail_lsn = log->l_last_sync_lsn; + spin_unlock(&log->l_icloglock); + } + + spin_lock(&log->l_grant_lock); - /* tail_lsn == 1 implies that we weren't passed a valid value. */ - if (tail_lsn != 1) - atomic64_set(&log->l_tail_lsn, tail_lsn); + /* Also an invalid lsn. 1 implies that we aren't passing in a valid + * tail_lsn. + */ + if (tail_lsn != 1) { + log->l_tail_lsn = tail_lsn; + } - if (!list_empty_careful(&log->l_writeq)) { + if ((tic = log->l_write_headq)) { #ifdef DEBUG if (log->l_flags & XLOG_ACTIVE_RECOVERY) panic("Recovery problem"); #endif - spin_lock(&log->l_grant_write_lock); - free_bytes = xlog_space_left(log, &log->l_grant_write_head); - list_for_each_entry(tic, &log->l_writeq, t_queue) { + cycle = log->l_grant_write_cycle; + bytes = log->l_grant_write_bytes; + free_bytes = xlog_space_left(log, cycle, bytes); + do { ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); if (free_bytes < tic->t_unit_res && tail_lsn != 1) break; tail_lsn = 0; free_bytes -= tic->t_unit_res; - trace_xfs_log_regrant_write_wake_up(log, tic); - wake_up(&tic->t_wait); - } - spin_unlock(&log->l_grant_write_lock); + sv_signal(&tic->t_wait); + tic = tic->t_next; + } while (tic != log->l_write_headq); } - - if (!list_empty_careful(&log->l_reserveq)) { + if ((tic = log->l_reserve_headq)) { #ifdef DEBUG if (log->l_flags & XLOG_ACTIVE_RECOVERY) panic("Recovery problem"); #endif - spin_lock(&log->l_grant_reserve_lock); - free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); - list_for_each_entry(tic, &log->l_reserveq, t_queue) { + cycle = log->l_grant_reserve_cycle; + bytes = log->l_grant_reserve_bytes; + free_bytes = xlog_space_left(log, cycle, bytes); + do { if (tic->t_flags & XLOG_TIC_PERM_RESERV) need_bytes = tic->t_unit_res*tic->t_cnt; else @@ -726,12 +760,12 @@ xfs_log_move_tail(xfs_mount_t *mp, break; tail_lsn = 0; free_bytes -= need_bytes; - trace_xfs_log_grant_wake_up(log, tic); - wake_up(&tic->t_wait); - } - spin_unlock(&log->l_grant_reserve_lock); + sv_signal(&tic->t_wait); + tic = tic->t_next; + } while (tic != log->l_reserve_headq); } -} + spin_unlock(&log->l_grant_lock); +} /* xfs_log_move_tail */ /* * Determine if we have a transaction that has gone to disk @@ -797,19 +831,23 @@ xfs_log_need_covered(xfs_mount_t *mp) * We may be holding the log iclog lock upon entering this routine. */ xfs_lsn_t -xlog_assign_tail_lsn( - struct xfs_mount *mp) +xlog_assign_tail_lsn(xfs_mount_t *mp) { - xfs_lsn_t tail_lsn; - struct log *log = mp->m_log; + xfs_lsn_t tail_lsn; + xlog_t *log = mp->m_log; tail_lsn = xfs_trans_ail_tail(mp->m_ail); - if (!tail_lsn) - tail_lsn = atomic64_read(&log->l_last_sync_lsn); + spin_lock(&log->l_grant_lock); + if (tail_lsn != 0) { + log->l_tail_lsn = tail_lsn; + } else { + tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn; + } + spin_unlock(&log->l_grant_lock); - atomic64_set(&log->l_tail_lsn, tail_lsn); return tail_lsn; -} +} /* xlog_assign_tail_lsn */ + /* * Return the space in the log between the tail and the head. The head @@ -826,26 +864,21 @@ xlog_assign_tail_lsn( * result is that we return the size of the log as the amount of space left. */ STATIC int -xlog_space_left( - struct log *log, - atomic64_t *head) -{ - int free_bytes; - int tail_bytes; - int tail_cycle; - int head_cycle; - int head_bytes; - - xlog_crack_grant_head(head, &head_cycle, &head_bytes); - xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes); - tail_bytes = BBTOB(tail_bytes); - if (tail_cycle == head_cycle && head_bytes >= tail_bytes) - free_bytes = log->l_logsize - (head_bytes - tail_bytes); - else if (tail_cycle + 1 < head_cycle) +xlog_space_left(xlog_t *log, int cycle, int bytes) +{ + int free_bytes; + int tail_bytes; + int tail_cycle; + + tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn)); + tail_cycle = CYCLE_LSN(log->l_tail_lsn); + if ((tail_cycle == cycle) && (bytes >= tail_bytes)) { + free_bytes = log->l_logsize - (bytes - tail_bytes); + } else if ((tail_cycle + 1) < cycle) { return 0; - else if (tail_cycle < head_cycle) { - ASSERT(tail_cycle == (head_cycle - 1)); - free_bytes = tail_bytes - head_bytes; + } else if (tail_cycle < cycle) { + ASSERT(tail_cycle == (cycle - 1)); + free_bytes = tail_bytes - bytes; } else { /* * The reservation head is behind the tail. @@ -856,12 +889,12 @@ xlog_space_left( "xlog_space_left: head behind tail\n" " tail_cycle = %d, tail_bytes = %d\n" " GH cycle = %d, GH bytes = %d", - tail_cycle, tail_bytes, head_cycle, head_bytes); + tail_cycle, tail_bytes, cycle, bytes); ASSERT(0); free_bytes = log->l_logsize; } return free_bytes; -} +} /* xlog_space_left */ /* @@ -1014,16 +1047,12 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_flags |= XLOG_ACTIVE_RECOVERY; log->l_prev_block = -1; + log->l_tail_lsn = xlog_assign_lsn(1, 0); /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ - xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); - xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); + log->l_last_sync_lsn = log->l_tail_lsn; log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ - xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0); - xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); - INIT_LIST_HEAD(&log->l_reserveq); - INIT_LIST_HEAD(&log->l_writeq); - spin_lock_init(&log->l_grant_reserve_lock); - spin_lock_init(&log->l_grant_write_lock); + log->l_grant_reserve_cycle = 1; + log->l_grant_write_cycle = 1; error = EFSCORRUPTED; if (xfs_sb_version_hassector(&mp->m_sb)) { @@ -1065,7 +1094,8 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_xbuf = bp; spin_lock_init(&log->l_icloglock); - init_waitqueue_head(&log->l_flush_wait); + spin_lock_init(&log->l_grant_lock); + sv_init(&log->l_flush_wait, 0, "flush_wait"); /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); @@ -1121,8 +1151,8 @@ xlog_alloc_log(xfs_mount_t *mp, ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); - init_waitqueue_head(&iclog->ic_force_wait); - init_waitqueue_head(&iclog->ic_write_wait); + sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); + sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); iclogp = &iclog->ic_next; } @@ -1137,11 +1167,15 @@ xlog_alloc_log(xfs_mount_t *mp, out_free_iclog: for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { prev_iclog = iclog->ic_next; - if (iclog->ic_bp) + if (iclog->ic_bp) { + sv_destroy(&iclog->ic_force_wait); + sv_destroy(&iclog->ic_write_wait); xfs_buf_free(iclog->ic_bp); + } kmem_free(iclog); } spinlock_destroy(&log->l_icloglock); + spinlock_destroy(&log->l_grant_lock); xfs_buf_free(log->l_xbuf); out_free_log: kmem_free(log); @@ -1189,60 +1223,61 @@ xlog_commit_record( * water mark. In this manner, we would be creating a low water mark. */ STATIC void -xlog_grant_push_ail( - struct log *log, - int need_bytes) +xlog_grant_push_ail(xfs_mount_t *mp, + int need_bytes) { - xfs_lsn_t threshold_lsn = 0; - xfs_lsn_t last_sync_lsn; - int free_blocks; - int free_bytes; - int threshold_block; - int threshold_cycle; - int free_threshold; - - ASSERT(BTOBB(need_bytes) < log->l_logBBsize); - - free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); - free_blocks = BTOBBT(free_bytes); - - /* - * Set the threshold for the minimum number of free blocks in the - * log to the maximum of what the caller needs, one quarter of the - * log, and 256 blocks. - */ - free_threshold = BTOBB(need_bytes); - free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2)); - free_threshold = MAX(free_threshold, 256); - if (free_blocks >= free_threshold) - return; - - xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle, - &threshold_block); - threshold_block += free_threshold; + xlog_t *log = mp->m_log; /* pointer to the log */ + xfs_lsn_t tail_lsn; /* lsn of the log tail */ + xfs_lsn_t threshold_lsn = 0; /* lsn we'd like to be at */ + int free_blocks; /* free blocks left to write to */ + int free_bytes; /* free bytes left to write to */ + int threshold_block; /* block in lsn we'd like to be at */ + int threshold_cycle; /* lsn cycle we'd like to be at */ + int free_threshold; + + ASSERT(BTOBB(need_bytes) < log->l_logBBsize); + + spin_lock(&log->l_grant_lock); + free_bytes = xlog_space_left(log, + log->l_grant_reserve_cycle, + log->l_grant_reserve_bytes); + tail_lsn = log->l_tail_lsn; + free_blocks = BTOBBT(free_bytes); + + /* + * Set the threshold for the minimum number of free blocks in the + * log to the maximum of what the caller needs, one quarter of the + * log, and 256 blocks. + */ + free_threshold = BTOBB(need_bytes); + free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2)); + free_threshold = MAX(free_threshold, 256); + if (free_blocks < free_threshold) { + threshold_block = BLOCK_LSN(tail_lsn) + free_threshold; + threshold_cycle = CYCLE_LSN(tail_lsn); if (threshold_block >= log->l_logBBsize) { - threshold_block -= log->l_logBBsize; - threshold_cycle += 1; + threshold_block -= log->l_logBBsize; + threshold_cycle += 1; } - threshold_lsn = xlog_assign_lsn(threshold_cycle, - threshold_block); - /* - * Don't pass in an lsn greater than the lsn of the last - * log record known to be on disk. Use a snapshot of the last sync lsn - * so that it doesn't change between the compare and the set. - */ - last_sync_lsn = atomic64_read(&log->l_last_sync_lsn); - if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0) - threshold_lsn = last_sync_lsn; + threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block); - /* - * Get the transaction layer to kick the dirty buffers out to - * disk asynchronously. No point in trying to do this if - * the filesystem is shutting down. + /* Don't pass in an lsn greater than the lsn of the last + * log record known to be on disk. */ - if (!XLOG_FORCED_SHUTDOWN(log)) - xfs_trans_ail_push(log->l_ailp, threshold_lsn); -} + if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0) + threshold_lsn = log->l_last_sync_lsn; + } + spin_unlock(&log->l_grant_lock); + + /* + * Get the transaction layer to kick the dirty buffers out to + * disk asynchronously. No point in trying to do this if + * the filesystem is shutting down. + */ + if (threshold_lsn && + !XLOG_FORCED_SHUTDOWN(log)) + xfs_trans_ail_push(log->l_ailp, threshold_lsn); +} /* xlog_grant_push_ail */ /* * The bdstrat callback function for log bufs. This gives us a central @@ -1337,8 +1372,9 @@ xlog_sync(xlog_t *log, roundoff < BBTOB(1))); /* move grant heads by roundoff in sync */ - xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff); - xlog_grant_add_space(log, &log->l_grant_write_head, roundoff); + spin_lock(&log->l_grant_lock); + xlog_grant_add_space(log, roundoff); + spin_unlock(&log->l_grant_lock); /* put cycle number in every block */ xlog_pack_data(log, iclog, roundoff); @@ -1453,12 +1489,15 @@ xlog_dealloc_log(xlog_t *log) iclog = log->l_iclog; for (i=0; il_iclog_bufs; i++) { + sv_destroy(&iclog->ic_force_wait); + sv_destroy(&iclog->ic_write_wait); xfs_buf_free(iclog->ic_bp); next_iclog = iclog->ic_next; kmem_free(iclog); iclog = next_iclog; } spinlock_destroy(&log->l_icloglock); + spinlock_destroy(&log->l_grant_lock); xfs_buf_free(log->l_xbuf); log->l_mp->m_log = NULL; @@ -2193,7 +2232,7 @@ xlog_state_do_callback( lowest_lsn = xlog_get_lowest_lsn(log); if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, - be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { + be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { iclog = iclog->ic_next; continue; /* Leave this iclog for * another thread */ @@ -2201,21 +2240,23 @@ xlog_state_do_callback( iclog->ic_state = XLOG_STATE_CALLBACK; + spin_unlock(&log->l_icloglock); - /* - * update the last_sync_lsn before we drop the - * icloglock to ensure we are the only one that - * can update it. + /* l_last_sync_lsn field protected by + * l_grant_lock. Don't worry about iclog's lsn. + * No one else can be here except us. */ - ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), - be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); - atomic64_set(&log->l_last_sync_lsn, - be64_to_cpu(iclog->ic_header.h_lsn)); + spin_lock(&log->l_grant_lock); + ASSERT(XFS_LSN_CMP(log->l_last_sync_lsn, + be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); + log->l_last_sync_lsn = + be64_to_cpu(iclog->ic_header.h_lsn); + spin_unlock(&log->l_grant_lock); - } else + } else { + spin_unlock(&log->l_icloglock); ioerrors++; - - spin_unlock(&log->l_icloglock); + } /* * Keep processing entries in the callback list until @@ -2256,7 +2297,7 @@ xlog_state_do_callback( xlog_state_clean_log(log); /* wake up threads waiting in xfs_log_force() */ - wake_up_all(&iclog->ic_force_wait); + sv_broadcast(&iclog->ic_force_wait); iclog = iclog->ic_next; } while (first_iclog != iclog); @@ -2303,7 +2344,7 @@ xlog_state_do_callback( spin_unlock(&log->l_icloglock); if (wake) - wake_up_all(&log->l_flush_wait); + sv_broadcast(&log->l_flush_wait); } @@ -2354,7 +2395,7 @@ xlog_state_done_syncing( * iclog buffer, we wake them all, one will get to do the * I/O, the others get to wait for the result. */ - wake_up_all(&iclog->ic_write_wait); + sv_broadcast(&iclog->ic_write_wait); spin_unlock(&log->l_icloglock); xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ } /* xlog_state_done_syncing */ @@ -2403,7 +2444,7 @@ xlog_state_get_iclog_space(xlog_t *log, XFS_STATS_INC(xs_log_noiclogs); /* Wait for log writes to have flushed */ - xlog_wait(&log->l_flush_wait, &log->l_icloglock); + sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0); goto restart; } @@ -2486,18 +2527,6 @@ xlog_state_get_iclog_space(xlog_t *log, * * Once a ticket gets put onto the reserveq, it will only return after * the needed reservation is satisfied. - * - * This function is structured so that it has a lock free fast path. This is - * necessary because every new transaction reservation will come through this - * path. Hence any lock will be globally hot if we take it unconditionally on - * every pass. - * - * As tickets are only ever moved on and off the reserveq under the - * l_grant_reserve_lock, we only need to take that lock if we are going - * to add the ticket to the queue and sleep. We can avoid taking the lock if the - * ticket was never added to the reserveq because the t_queue list head will be - * empty and we hold the only reference to it so it can safely be checked - * unlocked. */ STATIC int xlog_grant_log_space(xlog_t *log, @@ -2505,27 +2534,24 @@ xlog_grant_log_space(xlog_t *log, { int free_bytes; int need_bytes; +#ifdef DEBUG + xfs_lsn_t tail_lsn; +#endif + #ifdef DEBUG if (log->l_flags & XLOG_ACTIVE_RECOVERY) panic("grant Recovery problem"); #endif - trace_xfs_log_grant_enter(log, tic); + /* Is there space or do we need to sleep? */ + spin_lock(&log->l_grant_lock); - need_bytes = tic->t_unit_res; - if (tic->t_flags & XFS_LOG_PERM_RESERV) - need_bytes *= tic->t_ocnt; + trace_xfs_log_grant_enter(log, tic); /* something is already sleeping; insert new transaction at end */ - if (!list_empty_careful(&log->l_reserveq)) { - spin_lock(&log->l_grant_reserve_lock); - /* recheck the queue now we are locked */ - if (list_empty(&log->l_reserveq)) { - spin_unlock(&log->l_grant_reserve_lock); - goto redo; - } - list_add_tail(&tic->t_queue, &log->l_reserveq); + if (log->l_reserve_headq) { + xlog_ins_ticketq(&log->l_reserve_headq, tic); trace_xfs_log_grant_sleep1(log, tic); @@ -2537,57 +2563,72 @@ xlog_grant_log_space(xlog_t *log, goto error_return; XFS_STATS_INC(xs_sleep_logspace); - xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); - + sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); /* * If we got an error, and the filesystem is shutting down, * we'll catch it down below. So just continue... */ trace_xfs_log_grant_wake1(log, tic); + spin_lock(&log->l_grant_lock); } + if (tic->t_flags & XFS_LOG_PERM_RESERV) + need_bytes = tic->t_unit_res*tic->t_ocnt; + else + need_bytes = tic->t_unit_res; redo: if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return_unlocked; + goto error_return; - free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); + free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle, + log->l_grant_reserve_bytes); if (free_bytes < need_bytes) { - spin_lock(&log->l_grant_reserve_lock); - if (list_empty(&tic->t_queue)) - list_add_tail(&tic->t_queue, &log->l_reserveq); + if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) + xlog_ins_ticketq(&log->l_reserve_headq, tic); trace_xfs_log_grant_sleep2(log, tic); - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return; - - xlog_grant_push_ail(log, need_bytes); + spin_unlock(&log->l_grant_lock); + xlog_grant_push_ail(log->l_mp, need_bytes); + spin_lock(&log->l_grant_lock); XFS_STATS_INC(xs_sleep_logspace); - xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); + sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); + + spin_lock(&log->l_grant_lock); + if (XLOG_FORCED_SHUTDOWN(log)) + goto error_return; trace_xfs_log_grant_wake2(log, tic); - goto redo; - } - if (!list_empty(&tic->t_queue)) { - spin_lock(&log->l_grant_reserve_lock); - list_del_init(&tic->t_queue); - spin_unlock(&log->l_grant_reserve_lock); - } + goto redo; + } else if (tic->t_flags & XLOG_TIC_IN_Q) + xlog_del_ticketq(&log->l_reserve_headq, tic); /* we've got enough space */ - xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); - xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); + xlog_grant_add_space(log, need_bytes); +#ifdef DEBUG + tail_lsn = log->l_tail_lsn; + /* + * Check to make sure the grant write head didn't just over lap the + * tail. If the cycles are the same, we can't be overlapping. + * Otherwise, make sure that the cycles differ by exactly one and + * check the byte count. + */ + if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { + ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn)); + ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); + } +#endif trace_xfs_log_grant_exit(log, tic); - xlog_verify_grant_tail(log); + xlog_verify_grant_head(log, 1); + spin_unlock(&log->l_grant_lock); return 0; -error_return_unlocked: - spin_lock(&log->l_grant_reserve_lock); -error_return: - list_del_init(&tic->t_queue); - spin_unlock(&log->l_grant_reserve_lock); + error_return: + if (tic->t_flags & XLOG_TIC_IN_Q) + xlog_del_ticketq(&log->l_reserve_headq, tic); + trace_xfs_log_grant_error(log, tic); /* @@ -2597,6 +2638,7 @@ xlog_grant_log_space(xlog_t *log, */ tic->t_curr_res = 0; tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ + spin_unlock(&log->l_grant_lock); return XFS_ERROR(EIO); } /* xlog_grant_log_space */ @@ -2604,14 +2646,17 @@ xlog_grant_log_space(xlog_t *log, /* * Replenish the byte reservation required by moving the grant write head. * - * Similar to xlog_grant_log_space, the function is structured to have a lock - * free fast path. + * */ STATIC int xlog_regrant_write_log_space(xlog_t *log, xlog_ticket_t *tic) { int free_bytes, need_bytes; + xlog_ticket_t *ntic; +#ifdef DEBUG + xfs_lsn_t tail_lsn; +#endif tic->t_curr_res = tic->t_unit_res; xlog_tic_reset_res(tic); @@ -2624,9 +2669,12 @@ xlog_regrant_write_log_space(xlog_t *log, panic("regrant Recovery problem"); #endif + spin_lock(&log->l_grant_lock); + trace_xfs_log_regrant_write_enter(log, tic); + if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return_unlocked; + goto error_return; /* If there are other waiters on the queue then give them a * chance at logspace before us. Wake up the first waiters, @@ -2635,76 +2683,92 @@ xlog_regrant_write_log_space(xlog_t *log, * this transaction. */ need_bytes = tic->t_unit_res; - if (!list_empty_careful(&log->l_writeq)) { - struct xlog_ticket *ntic; - - spin_lock(&log->l_grant_write_lock); - free_bytes = xlog_space_left(log, &log->l_grant_write_head); - list_for_each_entry(ntic, &log->l_writeq, t_queue) { + if ((ntic = log->l_write_headq)) { + free_bytes = xlog_space_left(log, log->l_grant_write_cycle, + log->l_grant_write_bytes); + do { ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); if (free_bytes < ntic->t_unit_res) break; free_bytes -= ntic->t_unit_res; - wake_up(&ntic->t_wait); - } + sv_signal(&ntic->t_wait); + ntic = ntic->t_next; + } while (ntic != log->l_write_headq); + + if (ntic != log->l_write_headq) { + if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) + xlog_ins_ticketq(&log->l_write_headq, tic); - if (ntic != list_first_entry(&log->l_writeq, - struct xlog_ticket, t_queue)) { - if (list_empty(&tic->t_queue)) - list_add_tail(&tic->t_queue, &log->l_writeq); trace_xfs_log_regrant_write_sleep1(log, tic); - xlog_grant_push_ail(log, need_bytes); + spin_unlock(&log->l_grant_lock); + xlog_grant_push_ail(log->l_mp, need_bytes); + spin_lock(&log->l_grant_lock); XFS_STATS_INC(xs_sleep_logspace); - xlog_wait(&tic->t_wait, &log->l_grant_write_lock); + sv_wait(&tic->t_wait, PINOD|PLTWAIT, + &log->l_grant_lock, s); + + /* If we're shutting down, this tic is already + * off the queue */ + spin_lock(&log->l_grant_lock); + if (XLOG_FORCED_SHUTDOWN(log)) + goto error_return; + trace_xfs_log_regrant_write_wake1(log, tic); - } else - spin_unlock(&log->l_grant_write_lock); + } } redo: if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return_unlocked; + goto error_return; - free_bytes = xlog_space_left(log, &log->l_grant_write_head); + free_bytes = xlog_space_left(log, log->l_grant_write_cycle, + log->l_grant_write_bytes); if (free_bytes < need_bytes) { - spin_lock(&log->l_grant_write_lock); - if (list_empty(&tic->t_queue)) - list_add_tail(&tic->t_queue, &log->l_writeq); - - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return; - - xlog_grant_push_ail(log, need_bytes); + if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) + xlog_ins_ticketq(&log->l_write_headq, tic); + spin_unlock(&log->l_grant_lock); + xlog_grant_push_ail(log->l_mp, need_bytes); + spin_lock(&log->l_grant_lock); XFS_STATS_INC(xs_sleep_logspace); trace_xfs_log_regrant_write_sleep2(log, tic); - xlog_wait(&tic->t_wait, &log->l_grant_write_lock); + + sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); + + /* If we're shutting down, this tic is already off the queue */ + spin_lock(&log->l_grant_lock); + if (XLOG_FORCED_SHUTDOWN(log)) + goto error_return; trace_xfs_log_regrant_write_wake2(log, tic); goto redo; - } + } else if (tic->t_flags & XLOG_TIC_IN_Q) + xlog_del_ticketq(&log->l_write_headq, tic); - if (!list_empty(&tic->t_queue)) { - spin_lock(&log->l_grant_write_lock); - list_del_init(&tic->t_queue); - spin_unlock(&log->l_grant_write_lock); + /* we've got enough space */ + xlog_grant_add_space_write(log, need_bytes); +#ifdef DEBUG + tail_lsn = log->l_tail_lsn; + if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { + ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn)); + ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); } +#endif - /* we've got enough space */ - xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); trace_xfs_log_regrant_write_exit(log, tic); - xlog_verify_grant_tail(log); + + xlog_verify_grant_head(log, 1); + spin_unlock(&log->l_grant_lock); return 0; - error_return_unlocked: - spin_lock(&log->l_grant_write_lock); error_return: - list_del_init(&tic->t_queue); - spin_unlock(&log->l_grant_write_lock); + if (tic->t_flags & XLOG_TIC_IN_Q) + xlog_del_ticketq(&log->l_reserve_headq, tic); + trace_xfs_log_regrant_write_error(log, tic); /* @@ -2714,6 +2778,7 @@ xlog_regrant_write_log_space(xlog_t *log, */ tic->t_curr_res = 0; tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ + spin_unlock(&log->l_grant_lock); return XFS_ERROR(EIO); } /* xlog_regrant_write_log_space */ @@ -2734,24 +2799,27 @@ xlog_regrant_reserve_log_space(xlog_t *log, if (ticket->t_cnt > 0) ticket->t_cnt--; - xlog_grant_sub_space(log, &log->l_grant_reserve_head, - ticket->t_curr_res); - xlog_grant_sub_space(log, &log->l_grant_write_head, - ticket->t_curr_res); + spin_lock(&log->l_grant_lock); + xlog_grant_sub_space(log, ticket->t_curr_res); ticket->t_curr_res = ticket->t_unit_res; xlog_tic_reset_res(ticket); trace_xfs_log_regrant_reserve_sub(log, ticket); + xlog_verify_grant_head(log, 1); + /* just return if we still have some of the pre-reserved space */ - if (ticket->t_cnt > 0) + if (ticket->t_cnt > 0) { + spin_unlock(&log->l_grant_lock); return; + } - xlog_grant_add_space(log, &log->l_grant_reserve_head, - ticket->t_unit_res); + xlog_grant_add_space_reserve(log, ticket->t_unit_res); trace_xfs_log_regrant_reserve_exit(log, ticket); + xlog_verify_grant_head(log, 0); + spin_unlock(&log->l_grant_lock); ticket->t_curr_res = ticket->t_unit_res; xlog_tic_reset_res(ticket); } /* xlog_regrant_reserve_log_space */ @@ -2775,29 +2843,28 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, xlog_ticket_t *ticket) { - int bytes; - if (ticket->t_cnt > 0) ticket->t_cnt--; + spin_lock(&log->l_grant_lock); trace_xfs_log_ungrant_enter(log, ticket); + + xlog_grant_sub_space(log, ticket->t_curr_res); + trace_xfs_log_ungrant_sub(log, ticket); - /* - * If this is a permanent reservation ticket, we may be able to free + /* If this is a permanent reservation ticket, we may be able to free * up more space based on the remaining count. */ - bytes = ticket->t_curr_res; if (ticket->t_cnt > 0) { ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); - bytes += ticket->t_unit_res*ticket->t_cnt; + xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt); } - xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes); - xlog_grant_sub_space(log, &log->l_grant_write_head, bytes); - trace_xfs_log_ungrant_exit(log, ticket); + xlog_verify_grant_head(log, 1); + spin_unlock(&log->l_grant_lock); xfs_log_move_tail(log->l_mp, 1); } /* xlog_ungrant_log_space */ @@ -2834,11 +2901,11 @@ xlog_state_release_iclog( if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { /* update tail before writing to iclog */ - xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp); + xlog_assign_tail_lsn(log->l_mp); sync++; iclog->ic_state = XLOG_STATE_SYNCING; - iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); - xlog_verify_tail_lsn(log, iclog, tail_lsn); + iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); + xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); /* cycle incremented when incrementing curr_block */ } spin_unlock(&log->l_icloglock); @@ -3021,7 +3088,7 @@ _xfs_log_force( return XFS_ERROR(EIO); } XFS_STATS_INC(xs_log_force_sleep); - xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); + sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s); /* * No need to grab the log lock here since we're * only deciding whether or not to return EIO @@ -3139,8 +3206,8 @@ _xfs_log_force_lsn( XFS_STATS_INC(xs_log_force_sleep); - xlog_wait(&iclog->ic_prev->ic_write_wait, - &log->l_icloglock); + sv_wait(&iclog->ic_prev->ic_write_wait, + PSWP, &log->l_icloglock, s); if (log_flushed) *log_flushed = 1; already_slept = 1; @@ -3168,7 +3235,7 @@ _xfs_log_force_lsn( return XFS_ERROR(EIO); } XFS_STATS_INC(xs_log_force_sleep); - xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); + sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); /* * No need to grab the log lock here since we're * only deciding whether or not to return EIO @@ -3243,8 +3310,10 @@ xfs_log_ticket_put( xlog_ticket_t *ticket) { ASSERT(atomic_read(&ticket->t_ref) > 0); - if (atomic_dec_and_test(&ticket->t_ref)) + if (atomic_dec_and_test(&ticket->t_ref)) { + sv_destroy(&ticket->t_wait); kmem_zone_free(xfs_log_ticket_zone, ticket); + } } xlog_ticket_t * @@ -3366,7 +3435,6 @@ xlog_ticket_alloc( } atomic_set(&tic->t_ref, 1); - INIT_LIST_HEAD(&tic->t_queue); tic->t_unit_res = unit_bytes; tic->t_curr_res = unit_bytes; tic->t_cnt = cnt; @@ -3377,7 +3445,7 @@ xlog_ticket_alloc( tic->t_trans_type = 0; if (xflags & XFS_LOG_PERM_RESERV) tic->t_flags |= XLOG_TIC_PERM_RESERV; - init_waitqueue_head(&tic->t_wait); + sv_init(&tic->t_wait, SV_DEFAULT, "logtick"); xlog_tic_reset_res(tic); @@ -3416,25 +3484,18 @@ xlog_verify_dest_ptr( } STATIC void -xlog_verify_grant_tail( - struct log *log) +xlog_verify_grant_head(xlog_t *log, int equals) { - int tail_cycle, tail_blocks; - int cycle, space; - - /* - * Check to make sure the grant write head didn't just over lap the - * tail. If the cycles are the same, we can't be overlapping. - * Otherwise, make sure that the cycles differ by exactly one and - * check the byte count. - */ - xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); - xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); - if (tail_cycle != cycle) { - ASSERT(cycle - 1 == tail_cycle); - ASSERT(space <= BBTOB(tail_blocks)); - } -} + if (log->l_grant_reserve_cycle == log->l_grant_write_cycle) { + if (equals) + ASSERT(log->l_grant_reserve_bytes >= log->l_grant_write_bytes); + else + ASSERT(log->l_grant_reserve_bytes > log->l_grant_write_bytes); + } else { + ASSERT(log->l_grant_reserve_cycle-1 == log->l_grant_write_cycle); + ASSERT(log->l_grant_write_bytes >= log->l_grant_reserve_bytes); + } +} /* xlog_verify_grant_head */ /* check if it will fit */ STATIC void @@ -3655,10 +3716,12 @@ xfs_log_force_umount( xlog_cil_force(log); /* - * mark the filesystem and the as in a shutdown state and wake - * everybody up to tell them the bad news. + * We must hold both the GRANT lock and the LOG lock, + * before we mark the filesystem SHUTDOWN and wake + * everybody up to tell the bad news. */ spin_lock(&log->l_icloglock); + spin_lock(&log->l_grant_lock); mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; if (mp->m_sb_bp) XFS_BUF_DONE(mp->m_sb_bp); @@ -3679,21 +3742,27 @@ xfs_log_force_umount( spin_unlock(&log->l_icloglock); /* - * We don't want anybody waiting for log reservations after this. That - * means we have to wake up everybody queued up on reserveq as well as - * writeq. In addition, we make sure in xlog_{re}grant_log_space that - * we don't enqueue anything once the SHUTDOWN flag is set, and this - * action is protected by the grant locks. + * We don't want anybody waiting for log reservations + * after this. That means we have to wake up everybody + * queued up on reserve_headq as well as write_headq. + * In addition, we make sure in xlog_{re}grant_log_space + * that we don't enqueue anything once the SHUTDOWN flag + * is set, and this action is protected by the GRANTLOCK. */ - spin_lock(&log->l_grant_reserve_lock); - list_for_each_entry(tic, &log->l_reserveq, t_queue) - wake_up(&tic->t_wait); - spin_unlock(&log->l_grant_reserve_lock); - - spin_lock(&log->l_grant_write_lock); - list_for_each_entry(tic, &log->l_writeq, t_queue) - wake_up(&tic->t_wait); - spin_unlock(&log->l_grant_write_lock); + if ((tic = log->l_reserve_headq)) { + do { + sv_signal(&tic->t_wait); + tic = tic->t_next; + } while (tic != log->l_reserve_headq); + } + + if ((tic = log->l_write_headq)) { + do { + sv_signal(&tic->t_wait); + tic = tic->t_next; + } while (tic != log->l_write_headq); + } + spin_unlock(&log->l_grant_lock); if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { ASSERT(!logerror); diff --git a/trunk/fs/xfs/xfs_log_cil.c b/trunk/fs/xfs/xfs_log_cil.c index 9dc8125d04e5..23d6ceb5e97b 100644 --- a/trunk/fs/xfs/xfs_log_cil.c +++ b/trunk/fs/xfs/xfs_log_cil.c @@ -61,7 +61,7 @@ xlog_cil_init( INIT_LIST_HEAD(&cil->xc_committing); spin_lock_init(&cil->xc_cil_lock); init_rwsem(&cil->xc_ctx_lock); - init_waitqueue_head(&cil->xc_commit_wait); + sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait"); INIT_LIST_HEAD(&ctx->committing); INIT_LIST_HEAD(&ctx->busy_extents); @@ -361,10 +361,15 @@ xlog_cil_committed( int abort) { struct xfs_cil_ctx *ctx = args; + struct xfs_log_vec *lv; + int abortflag = abort ? XFS_LI_ABORTED : 0; struct xfs_busy_extent *busyp, *n; - xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, - ctx->start_lsn, abort); + /* unpin all the log items */ + for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) { + xfs_trans_item_committed(lv->lv_item, ctx->start_lsn, + abortflag); + } list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); @@ -563,7 +568,7 @@ xlog_cil_push( * It is still being pushed! Wait for the push to * complete, then start again from the beginning. */ - xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock); + sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); goto restart; } } @@ -587,7 +592,7 @@ xlog_cil_push( */ spin_lock(&cil->xc_cil_lock); ctx->commit_lsn = commit_lsn; - wake_up_all(&cil->xc_commit_wait); + sv_broadcast(&cil->xc_commit_wait); spin_unlock(&cil->xc_cil_lock); /* release the hounds! */ @@ -752,7 +757,7 @@ xlog_cil_force_lsn( * It is still being pushed! Wait for the push to * complete, then start again from the beginning. */ - xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock); + sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); goto restart; } if (ctx->sequence != sequence) diff --git a/trunk/fs/xfs/xfs_log_priv.h b/trunk/fs/xfs/xfs_log_priv.h index d5f8be8f4bf6..edcdfe01617f 100644 --- a/trunk/fs/xfs/xfs_log_priv.h +++ b/trunk/fs/xfs/xfs_log_priv.h @@ -21,6 +21,7 @@ struct xfs_buf; struct log; struct xlog_ticket; +struct xfs_buf_cancel; struct xfs_mount; /* @@ -53,6 +54,7 @@ struct xfs_mount; BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \ XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) + static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block) { return ((xfs_lsn_t)cycle << 32) | block; @@ -131,10 +133,12 @@ static inline uint xlog_get_client_id(__be32 i) */ #define XLOG_TIC_INITED 0x1 /* has been initialized */ #define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ +#define XLOG_TIC_IN_Q 0x4 #define XLOG_TIC_FLAGS \ { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \ - { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" } + { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }, \ + { XLOG_TIC_IN_Q, "XLOG_TIC_IN_Q" } #endif /* __KERNEL__ */ @@ -240,8 +244,9 @@ typedef struct xlog_res { } xlog_res_t; typedef struct xlog_ticket { - wait_queue_head_t t_wait; /* ticket wait queue */ - struct list_head t_queue; /* reserve/write queue */ + sv_t t_wait; /* ticket wait queue : 20 */ + struct xlog_ticket *t_next; /* :4|8 */ + struct xlog_ticket *t_prev; /* :4|8 */ xlog_tid_t t_tid; /* transaction identifier : 4 */ atomic_t t_ref; /* ticket reference count : 4 */ int t_curr_res; /* current reservation in bytes : 4 */ @@ -348,8 +353,8 @@ typedef union xlog_in_core2 { * and move everything else out to subsequent cachelines. */ typedef struct xlog_in_core { - wait_queue_head_t ic_force_wait; - wait_queue_head_t ic_write_wait; + sv_t ic_force_wait; + sv_t ic_write_wait; struct xlog_in_core *ic_next; struct xlog_in_core *ic_prev; struct xfs_buf *ic_bp; @@ -416,7 +421,7 @@ struct xfs_cil { struct xfs_cil_ctx *xc_ctx; struct rw_semaphore xc_ctx_lock; struct list_head xc_committing; - wait_queue_head_t xc_commit_wait; + sv_t xc_commit_wait; xfs_lsn_t xc_current_sequence; }; @@ -486,7 +491,7 @@ typedef struct log { struct xfs_buftarg *l_targ; /* buftarg of log */ uint l_flags; uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ - struct list_head *l_buf_cancel_table; + struct xfs_buf_cancel **l_buf_cancel_table; int l_iclog_hsize; /* size of iclog header */ int l_iclog_heads; /* # of iclog header sectors */ uint l_sectBBsize; /* sector size in BBs (2^n) */ @@ -498,40 +503,29 @@ typedef struct log { int l_logBBsize; /* size of log in BB chunks */ /* The following block of fields are changed while holding icloglock */ - wait_queue_head_t l_flush_wait ____cacheline_aligned_in_smp; + sv_t l_flush_wait ____cacheline_aligned_in_smp; /* waiting for iclog flush */ int l_covered_state;/* state of "covering disk * log entries" */ xlog_in_core_t *l_iclog; /* head log queue */ spinlock_t l_icloglock; /* grab to change iclog state */ + xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed + * buffers */ + xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ int l_curr_cycle; /* Cycle number of log writes */ int l_prev_cycle; /* Cycle number before last * block increment */ int l_curr_block; /* current logical log block */ int l_prev_block; /* previous logical log block */ - /* - * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and - * read without needing to hold specific locks. To avoid operations - * contending with other hot objects, place each of them on a separate - * cacheline. - */ - /* lsn of last LR on disk */ - atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp; - /* lsn of 1st LR with unflushed * buffers */ - atomic64_t l_tail_lsn ____cacheline_aligned_in_smp; - - /* - * ticket grant locks, queues and accounting have their own cachlines - * as these are quite hot and can be operated on concurrently. - */ - spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp; - struct list_head l_reserveq; - atomic64_t l_grant_reserve_head; - - spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp; - struct list_head l_writeq; - atomic64_t l_grant_write_head; + /* The following block of fields are changed while holding grant_lock */ + spinlock_t l_grant_lock ____cacheline_aligned_in_smp; + xlog_ticket_t *l_reserve_headq; + xlog_ticket_t *l_write_headq; + int l_grant_reserve_cycle; + int l_grant_reserve_bytes; + int l_grant_write_cycle; + int l_grant_write_bytes; /* The following field are used for debugging; need to hold icloglock */ #ifdef DEBUG @@ -540,9 +534,6 @@ typedef struct log { } xlog_t; -#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ - ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE)) - #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) /* common routines */ @@ -570,61 +561,6 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector, struct xlog_ticket *tic, xfs_lsn_t *start_lsn, xlog_in_core_t **commit_iclog, uint flags); -/* - * When we crack an atomic LSN, we sample it first so that the value will not - * change while we are cracking it into the component values. This means we - * will always get consistent component values to work from. This should always - * be used to smaple and crack LSNs taht are stored and updated in atomic - * variables. - */ -static inline void -xlog_crack_atomic_lsn(atomic64_t *lsn, uint *cycle, uint *block) -{ - xfs_lsn_t val = atomic64_read(lsn); - - *cycle = CYCLE_LSN(val); - *block = BLOCK_LSN(val); -} - -/* - * Calculate and assign a value to an atomic LSN variable from component pieces. - */ -static inline void -xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block) -{ - atomic64_set(lsn, xlog_assign_lsn(cycle, block)); -} - -/* - * When we crack the grant head, we sample it first so that the value will not - * change while we are cracking it into the component values. This means we - * will always get consistent component values to work from. - */ -static inline void -xlog_crack_grant_head_val(int64_t val, int *cycle, int *space) -{ - *cycle = val >> 32; - *space = val & 0xffffffff; -} - -static inline void -xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space) -{ - xlog_crack_grant_head_val(atomic64_read(head), cycle, space); -} - -static inline int64_t -xlog_assign_grant_head_val(int cycle, int space) -{ - return ((int64_t)cycle << 32) | space; -} - -static inline void -xlog_assign_grant_head(atomic64_t *head, int cycle, int space) -{ - atomic64_set(head, xlog_assign_grant_head_val(cycle, space)); -} - /* * Committed Item List interfaces */ @@ -649,21 +585,6 @@ xlog_cil_force(struct log *log) */ #define XLOG_UNMOUNT_REC_TYPE (-1U) -/* - * Wrapper function for waiting on a wait queue serialised against wakeups - * by a spinlock. This matches the semantics of all the wait queues used in the - * log code. - */ -static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock) -{ - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue_exclusive(wq, &wait); - __set_current_state(TASK_UNINTERRUPTIBLE); - spin_unlock(lock); - schedule(); - remove_wait_queue(wq, &wait); -} #endif /* __KERNEL__ */ #endif /* __XFS_LOG_PRIV_H__ */ diff --git a/trunk/fs/xfs/xfs_log_recover.c b/trunk/fs/xfs/xfs_log_recover.c index 204d8e5fa7fa..966d3f97458c 100644 --- a/trunk/fs/xfs/xfs_log_recover.c +++ b/trunk/fs/xfs/xfs_log_recover.c @@ -52,17 +52,6 @@ STATIC void xlog_recover_check_summary(xlog_t *); #define xlog_recover_check_summary(log) #endif -/* - * This structure is used during recovery to record the buf log items which - * have been canceled and should not be replayed. - */ -struct xfs_buf_cancel { - xfs_daddr_t bc_blkno; - uint bc_len; - int bc_refcount; - struct list_head bc_list; -}; - /* * Sector aligned buffer routines for buffer create/read/write/access */ @@ -936,12 +925,12 @@ xlog_find_tail( log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); if (found == 2) log->l_curr_cycle++; - atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); - atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); - xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle, - BBTOB(log->l_curr_block)); - xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle, - BBTOB(log->l_curr_block)); + log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn); + log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn); + log->l_grant_reserve_cycle = log->l_curr_cycle; + log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); + log->l_grant_write_cycle = log->l_curr_cycle; + log->l_grant_write_bytes = BBTOB(log->l_curr_block); /* * Look for unmount record. If we find it, then we know there @@ -971,7 +960,7 @@ xlog_find_tail( } after_umount_blk = (i + hblks + (int) BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; - tail_lsn = atomic64_read(&log->l_tail_lsn); + tail_lsn = log->l_tail_lsn; if (*head_blk == after_umount_blk && be32_to_cpu(rhead->h_num_logops) == 1) { umount_data_blk = (i + hblks) % log->l_logBBsize; @@ -986,10 +975,12 @@ xlog_find_tail( * log records will point recovery to after the * current unmount record. */ - xlog_assign_atomic_lsn(&log->l_tail_lsn, - log->l_curr_cycle, after_umount_blk); - xlog_assign_atomic_lsn(&log->l_last_sync_lsn, - log->l_curr_cycle, after_umount_blk); + log->l_tail_lsn = + xlog_assign_lsn(log->l_curr_cycle, + after_umount_blk); + log->l_last_sync_lsn = + xlog_assign_lsn(log->l_curr_cycle, + after_umount_blk); *tail_blk = after_umount_blk; /* @@ -1614,45 +1605,82 @@ xlog_recover_reorder_trans( * record in the table to tell us how many times we expect to see this * record during the second pass. */ -STATIC int -xlog_recover_buffer_pass1( - struct log *log, - xlog_recover_item_t *item) +STATIC void +xlog_recover_do_buffer_pass1( + xlog_t *log, + xfs_buf_log_format_t *buf_f) { - xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; - struct list_head *bucket; - struct xfs_buf_cancel *bcp; + xfs_buf_cancel_t *bcp; + xfs_buf_cancel_t *nextp; + xfs_buf_cancel_t *prevp; + xfs_buf_cancel_t **bucket; + xfs_daddr_t blkno = 0; + uint len = 0; + ushort flags = 0; + + switch (buf_f->blf_type) { + case XFS_LI_BUF: + blkno = buf_f->blf_blkno; + len = buf_f->blf_len; + flags = buf_f->blf_flags; + break; + } /* * If this isn't a cancel buffer item, then just return. */ - if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { + if (!(flags & XFS_BLF_CANCEL)) { trace_xfs_log_recover_buf_not_cancel(log, buf_f); - return 0; + return; } /* - * Insert an xfs_buf_cancel record into the hash table of them. - * If there is already an identical record, bump its reference count. + * Insert an xfs_buf_cancel record into the hash table of + * them. If there is already an identical record, bump + * its reference count. */ - bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno); - list_for_each_entry(bcp, bucket, bc_list) { - if (bcp->bc_blkno == buf_f->blf_blkno && - bcp->bc_len == buf_f->blf_len) { - bcp->bc_refcount++; - trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); - return 0; - } + bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % + XLOG_BC_TABLE_SIZE]; + /* + * If the hash bucket is empty then just insert a new record into + * the bucket. + */ + if (*bucket == NULL) { + bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), + KM_SLEEP); + bcp->bc_blkno = blkno; + bcp->bc_len = len; + bcp->bc_refcount = 1; + bcp->bc_next = NULL; + *bucket = bcp; + return; } - bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP); - bcp->bc_blkno = buf_f->blf_blkno; - bcp->bc_len = buf_f->blf_len; + /* + * The hash bucket is not empty, so search for duplicates of our + * record. If we find one them just bump its refcount. If not + * then add us at the end of the list. + */ + prevp = NULL; + nextp = *bucket; + while (nextp != NULL) { + if (nextp->bc_blkno == blkno && nextp->bc_len == len) { + nextp->bc_refcount++; + trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); + return; + } + prevp = nextp; + nextp = nextp->bc_next; + } + ASSERT(prevp != NULL); + bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), + KM_SLEEP); + bcp->bc_blkno = blkno; + bcp->bc_len = len; bcp->bc_refcount = 1; - list_add_tail(&bcp->bc_list, bucket); - + bcp->bc_next = NULL; + prevp->bc_next = bcp; trace_xfs_log_recover_buf_cancel_add(log, buf_f); - return 0; } /* @@ -1670,13 +1698,14 @@ xlog_recover_buffer_pass1( */ STATIC int xlog_check_buffer_cancelled( - struct log *log, + xlog_t *log, xfs_daddr_t blkno, uint len, ushort flags) { - struct list_head *bucket; - struct xfs_buf_cancel *bcp; + xfs_buf_cancel_t *bcp; + xfs_buf_cancel_t *prevp; + xfs_buf_cancel_t **bucket; if (log->l_buf_cancel_table == NULL) { /* @@ -1687,70 +1716,128 @@ xlog_check_buffer_cancelled( return 0; } + bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % + XLOG_BC_TABLE_SIZE]; + bcp = *bucket; + if (bcp == NULL) { + /* + * There is no corresponding entry in the table built + * in pass one, so this buffer has not been cancelled. + */ + ASSERT(!(flags & XFS_BLF_CANCEL)); + return 0; + } + /* - * Search for an entry in the cancel table that matches our buffer. + * Search for an entry in the buffer cancel table that + * matches our buffer. */ - bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno); - list_for_each_entry(bcp, bucket, bc_list) { - if (bcp->bc_blkno == blkno && bcp->bc_len == len) - goto found; + prevp = NULL; + while (bcp != NULL) { + if (bcp->bc_blkno == blkno && bcp->bc_len == len) { + /* + * We've go a match, so return 1 so that the + * recovery of this buffer is cancelled. + * If this buffer is actually a buffer cancel + * log item, then decrement the refcount on the + * one in the table and remove it if this is the + * last reference. + */ + if (flags & XFS_BLF_CANCEL) { + bcp->bc_refcount--; + if (bcp->bc_refcount == 0) { + if (prevp == NULL) { + *bucket = bcp->bc_next; + } else { + prevp->bc_next = bcp->bc_next; + } + kmem_free(bcp); + } + } + return 1; + } + prevp = bcp; + bcp = bcp->bc_next; } - /* - * We didn't find a corresponding entry in the table, so return 0 so - * that the buffer is NOT cancelled. + * We didn't find a corresponding entry in the table, so + * return 0 so that the buffer is NOT cancelled. */ ASSERT(!(flags & XFS_BLF_CANCEL)); return 0; +} -found: - /* - * We've go a match, so return 1 so that the recovery of this buffer - * is cancelled. If this buffer is actually a buffer cancel log - * item, then decrement the refcount on the one in the table and - * remove it if this is the last reference. - */ - if (flags & XFS_BLF_CANCEL) { - if (--bcp->bc_refcount == 0) { - list_del(&bcp->bc_list); - kmem_free(bcp); - } +STATIC int +xlog_recover_do_buffer_pass2( + xlog_t *log, + xfs_buf_log_format_t *buf_f) +{ + xfs_daddr_t blkno = 0; + ushort flags = 0; + uint len = 0; + + switch (buf_f->blf_type) { + case XFS_LI_BUF: + blkno = buf_f->blf_blkno; + flags = buf_f->blf_flags; + len = buf_f->blf_len; + break; } - return 1; + + return xlog_check_buffer_cancelled(log, blkno, len, flags); } /* - * Perform recovery for a buffer full of inodes. In these buffers, the only - * data which should be recovered is that which corresponds to the - * di_next_unlinked pointers in the on disk inode structures. The rest of the - * data for the inodes is always logged through the inodes themselves rather - * than the inode buffer and is recovered in xlog_recover_inode_pass2(). + * Perform recovery for a buffer full of inodes. In these buffers, + * the only data which should be recovered is that which corresponds + * to the di_next_unlinked pointers in the on disk inode structures. + * The rest of the data for the inodes is always logged through the + * inodes themselves rather than the inode buffer and is recovered + * in xlog_recover_do_inode_trans(). * - * The only time when buffers full of inodes are fully recovered is when the - * buffer is full of newly allocated inodes. In this case the buffer will - * not be marked as an inode buffer and so will be sent to - * xlog_recover_do_reg_buffer() below during recovery. + * The only time when buffers full of inodes are fully recovered is + * when the buffer is full of newly allocated inodes. In this case + * the buffer will not be marked as an inode buffer and so will be + * sent to xlog_recover_do_reg_buffer() below during recovery. */ STATIC int xlog_recover_do_inode_buffer( - struct xfs_mount *mp, + xfs_mount_t *mp, xlog_recover_item_t *item, - struct xfs_buf *bp, + xfs_buf_t *bp, xfs_buf_log_format_t *buf_f) { int i; - int item_index = 0; - int bit = 0; - int nbits = 0; - int reg_buf_offset = 0; - int reg_buf_bytes = 0; + int item_index; + int bit; + int nbits; + int reg_buf_offset; + int reg_buf_bytes; int next_unlinked_offset; int inodes_per_buf; xfs_agino_t *logged_nextp; xfs_agino_t *buffer_nextp; + unsigned int *data_map = NULL; + unsigned int map_size = 0; trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); + switch (buf_f->blf_type) { + case XFS_LI_BUF: + data_map = buf_f->blf_data_map; + map_size = buf_f->blf_map_size; + break; + } + /* + * Set the variables corresponding to the current region to + * 0 so that we'll initialize them on the first pass through + * the loop. + */ + reg_buf_offset = 0; + reg_buf_bytes = 0; + bit = 0; + nbits = 0; + item_index = 0; inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; for (i = 0; i < inodes_per_buf; i++) { next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + @@ -1765,18 +1852,18 @@ xlog_recover_do_inode_buffer( * the current di_next_unlinked field. */ bit += nbits; - bit = xfs_next_bit(buf_f->blf_data_map, - buf_f->blf_map_size, bit); + bit = xfs_next_bit(data_map, map_size, bit); /* * If there are no more logged regions in the * buffer, then we're done. */ - if (bit == -1) + if (bit == -1) { return 0; + } - nbits = xfs_contig_bits(buf_f->blf_data_map, - buf_f->blf_map_size, bit); + nbits = xfs_contig_bits(data_map, map_size, + bit); ASSERT(nbits > 0); reg_buf_offset = bit << XFS_BLF_SHIFT; reg_buf_bytes = nbits << XFS_BLF_SHIFT; @@ -1788,8 +1875,9 @@ xlog_recover_do_inode_buffer( * di_next_unlinked field, then move on to the next * di_next_unlinked field. */ - if (next_unlinked_offset < reg_buf_offset) + if (next_unlinked_offset < reg_buf_offset) { continue; + } ASSERT(item->ri_buf[item_index].i_addr != NULL); ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); @@ -1825,29 +1913,36 @@ xlog_recover_do_inode_buffer( * given buffer. The bitmap in the buf log format structure indicates * where to place the logged data. */ +/*ARGSUSED*/ STATIC void xlog_recover_do_reg_buffer( struct xfs_mount *mp, xlog_recover_item_t *item, - struct xfs_buf *bp, + xfs_buf_t *bp, xfs_buf_log_format_t *buf_f) { int i; int bit; int nbits; + unsigned int *data_map = NULL; + unsigned int map_size = 0; int error; trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); + switch (buf_f->blf_type) { + case XFS_LI_BUF: + data_map = buf_f->blf_data_map; + map_size = buf_f->blf_map_size; + break; + } bit = 0; i = 1; /* 0 is the buf format structure */ while (1) { - bit = xfs_next_bit(buf_f->blf_data_map, - buf_f->blf_map_size, bit); + bit = xfs_next_bit(data_map, map_size, bit); if (bit == -1) break; - nbits = xfs_contig_bits(buf_f->blf_data_map, - buf_f->blf_map_size, bit); + nbits = xfs_contig_bits(data_map, map_size, bit); ASSERT(nbits > 0); ASSERT(item->ri_buf[i].i_addr != NULL); ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); @@ -2081,46 +2176,77 @@ xlog_recover_do_dquot_buffer( * for more details on the implementation of the table of cancel records. */ STATIC int -xlog_recover_buffer_pass2( +xlog_recover_do_buffer_trans( xlog_t *log, - xlog_recover_item_t *item) + xlog_recover_item_t *item, + int pass) { xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; - xfs_mount_t *mp = log->l_mp; + xfs_mount_t *mp; xfs_buf_t *bp; int error; + int cancel; + xfs_daddr_t blkno; + int len; + ushort flags; uint buf_flags; - /* - * In this pass we only want to recover all the buffers which have - * not been cancelled and are not cancellation buffers themselves. - */ - if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno, - buf_f->blf_len, buf_f->blf_flags)) { - trace_xfs_log_recover_buf_cancel(log, buf_f); + if (pass == XLOG_RECOVER_PASS1) { + /* + * In this pass we're only looking for buf items + * with the XFS_BLF_CANCEL bit set. + */ + xlog_recover_do_buffer_pass1(log, buf_f); return 0; + } else { + /* + * In this pass we want to recover all the buffers + * which have not been cancelled and are not + * cancellation buffers themselves. The routine + * we call here will tell us whether or not to + * continue with the replay of this buffer. + */ + cancel = xlog_recover_do_buffer_pass2(log, buf_f); + if (cancel) { + trace_xfs_log_recover_buf_cancel(log, buf_f); + return 0; + } } - trace_xfs_log_recover_buf_recover(log, buf_f); + switch (buf_f->blf_type) { + case XFS_LI_BUF: + blkno = buf_f->blf_blkno; + len = buf_f->blf_len; + flags = buf_f->blf_flags; + break; + default: + xfs_fs_cmn_err(CE_ALERT, log->l_mp, + "xfs_log_recover: unknown buffer type 0x%x, logdev %s", + buf_f->blf_type, log->l_mp->m_logname ? + log->l_mp->m_logname : "internal"); + XFS_ERROR_REPORT("xlog_recover_do_buffer_trans", + XFS_ERRLEVEL_LOW, log->l_mp); + return XFS_ERROR(EFSCORRUPTED); + } + mp = log->l_mp; buf_flags = XBF_LOCK; - if (!(buf_f->blf_flags & XFS_BLF_INODE_BUF)) + if (!(flags & XFS_BLF_INODE_BUF)) buf_flags |= XBF_MAPPED; - bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, - buf_flags); + bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); if (XFS_BUF_ISERROR(bp)) { - xfs_ioerror_alert("xlog_recover_do..(read#1)", mp, - bp, buf_f->blf_blkno); + xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, + bp, blkno); error = XFS_BUF_GETERROR(bp); xfs_buf_relse(bp); return error; } error = 0; - if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { + if (flags & XFS_BLF_INODE_BUF) { error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); - } else if (buf_f->blf_flags & + } else if (flags & (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); } else { @@ -2160,14 +2286,16 @@ xlog_recover_buffer_pass2( } STATIC int -xlog_recover_inode_pass2( +xlog_recover_do_inode_trans( xlog_t *log, - xlog_recover_item_t *item) + xlog_recover_item_t *item, + int pass) { xfs_inode_log_format_t *in_f; - xfs_mount_t *mp = log->l_mp; + xfs_mount_t *mp; xfs_buf_t *bp; xfs_dinode_t *dip; + xfs_ino_t ino; int len; xfs_caddr_t src; xfs_caddr_t dest; @@ -2177,6 +2305,10 @@ xlog_recover_inode_pass2( xfs_icdinode_t *dicp; int need_free = 0; + if (pass == XLOG_RECOVER_PASS1) { + return 0; + } + if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { in_f = item->ri_buf[0].i_addr; } else { @@ -2186,6 +2318,8 @@ xlog_recover_inode_pass2( if (error) goto error; } + ino = in_f->ilf_ino; + mp = log->l_mp; /* * Inode buffers can be freed, look out for it, @@ -2220,8 +2354,8 @@ xlog_recover_inode_pass2( xfs_buf_relse(bp); xfs_fs_cmn_err(CE_ALERT, mp, "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", - dip, bp, in_f->ilf_ino); - XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", + dip, bp, ino); + XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)", XFS_ERRLEVEL_LOW, mp); error = EFSCORRUPTED; goto error; @@ -2231,8 +2365,8 @@ xlog_recover_inode_pass2( xfs_buf_relse(bp); xfs_fs_cmn_err(CE_ALERT, mp, "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", - item, in_f->ilf_ino); - XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", + item, ino); + XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)", XFS_ERRLEVEL_LOW, mp); error = EFSCORRUPTED; goto error; @@ -2260,12 +2394,12 @@ xlog_recover_inode_pass2( if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && (dicp->di_format != XFS_DINODE_FMT_BTREE)) { - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", + XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)", XFS_ERRLEVEL_LOW, mp, dicp); xfs_buf_relse(bp); xfs_fs_cmn_err(CE_ALERT, mp, "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", - item, dip, bp, in_f->ilf_ino); + item, dip, bp, ino); error = EFSCORRUPTED; goto error; } @@ -2273,40 +2407,40 @@ xlog_recover_inode_pass2( if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && (dicp->di_format != XFS_DINODE_FMT_BTREE) && (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", + XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)", XFS_ERRLEVEL_LOW, mp, dicp); xfs_buf_relse(bp); xfs_fs_cmn_err(CE_ALERT, mp, "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", - item, dip, bp, in_f->ilf_ino); + item, dip, bp, ino); error = EFSCORRUPTED; goto error; } } if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", + XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)", XFS_ERRLEVEL_LOW, mp, dicp); xfs_buf_relse(bp); xfs_fs_cmn_err(CE_ALERT, mp, "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", - item, dip, bp, in_f->ilf_ino, + item, dip, bp, ino, dicp->di_nextents + dicp->di_anextents, dicp->di_nblocks); error = EFSCORRUPTED; goto error; } if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", + XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)", XFS_ERRLEVEL_LOW, mp, dicp); xfs_buf_relse(bp); xfs_fs_cmn_err(CE_ALERT, mp, "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", - item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); + item, dip, bp, ino, dicp->di_forkoff); error = EFSCORRUPTED; goto error; } if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", + XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", XFS_ERRLEVEL_LOW, mp, dicp); xfs_buf_relse(bp); xfs_fs_cmn_err(CE_ALERT, mp, @@ -2398,7 +2532,7 @@ xlog_recover_inode_pass2( break; default: - xlog_warn("XFS: xlog_recover_inode_pass2: Invalid flag"); + xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag"); ASSERT(0); xfs_buf_relse(bp); error = EIO; @@ -2422,11 +2556,18 @@ xlog_recover_inode_pass2( * of that type. */ STATIC int -xlog_recover_quotaoff_pass1( +xlog_recover_do_quotaoff_trans( xlog_t *log, - xlog_recover_item_t *item) + xlog_recover_item_t *item, + int pass) { - xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr; + xfs_qoff_logformat_t *qoff_f; + + if (pass == XLOG_RECOVER_PASS2) { + return (0); + } + + qoff_f = item->ri_buf[0].i_addr; ASSERT(qoff_f); /* @@ -2447,17 +2588,22 @@ xlog_recover_quotaoff_pass1( * Recover a dquot record */ STATIC int -xlog_recover_dquot_pass2( +xlog_recover_do_dquot_trans( xlog_t *log, - xlog_recover_item_t *item) + xlog_recover_item_t *item, + int pass) { - xfs_mount_t *mp = log->l_mp; + xfs_mount_t *mp; xfs_buf_t *bp; struct xfs_disk_dquot *ddq, *recddq; int error; xfs_dq_logformat_t *dq_f; uint type; + if (pass == XLOG_RECOVER_PASS1) { + return 0; + } + mp = log->l_mp; /* * Filesystems are required to send in quota flags at mount time. @@ -2501,7 +2647,7 @@ xlog_recover_dquot_pass2( if ((error = xfs_qm_dqcheck(recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, - "xlog_recover_dquot_pass2 (log copy)"))) { + "xlog_recover_do_dquot_trans (log copy)"))) { return XFS_ERROR(EIO); } ASSERT(dq_f->qlf_len == 1); @@ -2524,7 +2670,7 @@ xlog_recover_dquot_pass2( * minimal initialization then. */ if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, - "xlog_recover_dquot_pass2")) { + "xlog_recover_do_dquot_trans")) { xfs_buf_relse(bp); return XFS_ERROR(EIO); } @@ -2547,31 +2693,38 @@ xlog_recover_dquot_pass2( * LSN. */ STATIC int -xlog_recover_efi_pass2( +xlog_recover_do_efi_trans( xlog_t *log, xlog_recover_item_t *item, - xfs_lsn_t lsn) + xfs_lsn_t lsn, + int pass) { int error; - xfs_mount_t *mp = log->l_mp; + xfs_mount_t *mp; xfs_efi_log_item_t *efip; xfs_efi_log_format_t *efi_formatp; + if (pass == XLOG_RECOVER_PASS1) { + return 0; + } + efi_formatp = item->ri_buf[0].i_addr; + mp = log->l_mp; efip = xfs_efi_init(mp, efi_formatp->efi_nextents); if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), &(efip->efi_format)))) { xfs_efi_item_free(efip); return error; } - atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); + efip->efi_next_extent = efi_formatp->efi_nextents; + efip->efi_flags |= XFS_EFI_COMMITTED; spin_lock(&log->l_ailp->xa_lock); /* * xfs_trans_ail_update() drops the AIL lock. */ - xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn); + xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn); return 0; } @@ -2584,10 +2737,11 @@ xlog_recover_efi_pass2( * efd format structure. If we find it, we remove the efi from the * AIL and free it. */ -STATIC int -xlog_recover_efd_pass2( +STATIC void +xlog_recover_do_efd_trans( xlog_t *log, - xlog_recover_item_t *item) + xlog_recover_item_t *item, + int pass) { xfs_efd_log_format_t *efd_formatp; xfs_efi_log_item_t *efip = NULL; @@ -2596,6 +2750,10 @@ xlog_recover_efd_pass2( struct xfs_ail_cursor cur; struct xfs_ail *ailp = log->l_ailp; + if (pass == XLOG_RECOVER_PASS1) { + return; + } + efd_formatp = item->ri_buf[0].i_addr; ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || @@ -2627,6 +2785,62 @@ xlog_recover_efd_pass2( } xfs_trans_ail_cursor_done(ailp, &cur); spin_unlock(&ailp->xa_lock); +} + +/* + * Perform the transaction + * + * If the transaction modifies a buffer or inode, do it now. Otherwise, + * EFIs and EFDs get queued up by adding entries into the AIL for them. + */ +STATIC int +xlog_recover_do_trans( + xlog_t *log, + xlog_recover_t *trans, + int pass) +{ + int error = 0; + xlog_recover_item_t *item; + + error = xlog_recover_reorder_trans(log, trans, pass); + if (error) + return error; + + list_for_each_entry(item, &trans->r_itemq, ri_list) { + trace_xfs_log_recover_item_recover(log, trans, item, pass); + switch (ITEM_TYPE(item)) { + case XFS_LI_BUF: + error = xlog_recover_do_buffer_trans(log, item, pass); + break; + case XFS_LI_INODE: + error = xlog_recover_do_inode_trans(log, item, pass); + break; + case XFS_LI_EFI: + error = xlog_recover_do_efi_trans(log, item, + trans->r_lsn, pass); + break; + case XFS_LI_EFD: + xlog_recover_do_efd_trans(log, item, pass); + error = 0; + break; + case XFS_LI_DQUOT: + error = xlog_recover_do_dquot_trans(log, item, pass); + break; + case XFS_LI_QUOTAOFF: + error = xlog_recover_do_quotaoff_trans(log, item, + pass); + break; + default: + xlog_warn( + "XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item)); + ASSERT(0); + error = XFS_ERROR(EIO); + break; + } + + if (error) + return error; + } return 0; } @@ -2638,7 +2852,7 @@ xlog_recover_efd_pass2( */ STATIC void xlog_recover_free_trans( - struct xlog_recover *trans) + xlog_recover_t *trans) { xlog_recover_item_t *item, *n; int i; @@ -2656,96 +2870,18 @@ xlog_recover_free_trans( kmem_free(trans); } -STATIC int -xlog_recover_commit_pass1( - struct log *log, - struct xlog_recover *trans, - xlog_recover_item_t *item) -{ - trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1); - - switch (ITEM_TYPE(item)) { - case XFS_LI_BUF: - return xlog_recover_buffer_pass1(log, item); - case XFS_LI_QUOTAOFF: - return xlog_recover_quotaoff_pass1(log, item); - case XFS_LI_INODE: - case XFS_LI_EFI: - case XFS_LI_EFD: - case XFS_LI_DQUOT: - /* nothing to do in pass 1 */ - return 0; - default: - xlog_warn( - "XFS: invalid item type (%d) xlog_recover_commit_pass1", - ITEM_TYPE(item)); - ASSERT(0); - return XFS_ERROR(EIO); - } -} - -STATIC int -xlog_recover_commit_pass2( - struct log *log, - struct xlog_recover *trans, - xlog_recover_item_t *item) -{ - trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); - - switch (ITEM_TYPE(item)) { - case XFS_LI_BUF: - return xlog_recover_buffer_pass2(log, item); - case XFS_LI_INODE: - return xlog_recover_inode_pass2(log, item); - case XFS_LI_EFI: - return xlog_recover_efi_pass2(log, item, trans->r_lsn); - case XFS_LI_EFD: - return xlog_recover_efd_pass2(log, item); - case XFS_LI_DQUOT: - return xlog_recover_dquot_pass2(log, item); - case XFS_LI_QUOTAOFF: - /* nothing to do in pass2 */ - return 0; - default: - xlog_warn( - "XFS: invalid item type (%d) xlog_recover_commit_pass2", - ITEM_TYPE(item)); - ASSERT(0); - return XFS_ERROR(EIO); - } -} - -/* - * Perform the transaction. - * - * If the transaction modifies a buffer or inode, do it now. Otherwise, - * EFIs and EFDs get queued up by adding entries into the AIL for them. - */ STATIC int xlog_recover_commit_trans( - struct log *log, - struct xlog_recover *trans, + xlog_t *log, + xlog_recover_t *trans, int pass) { - int error = 0; - xlog_recover_item_t *item; + int error; hlist_del(&trans->r_list); - - error = xlog_recover_reorder_trans(log, trans, pass); - if (error) + if ((error = xlog_recover_do_trans(log, trans, pass))) return error; - - list_for_each_entry(item, &trans->r_itemq, ri_list) { - if (pass == XLOG_RECOVER_PASS1) - error = xlog_recover_commit_pass1(log, trans, item); - else - error = xlog_recover_commit_pass2(log, trans, item); - if (error) - return error; - } - - xlog_recover_free_trans(trans); + xlog_recover_free_trans(trans); /* no error */ return 0; } @@ -2875,7 +3011,7 @@ xlog_recover_process_efi( xfs_extent_t *extp; xfs_fsblock_t startblock_fsb; - ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)); + ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED)); /* * First check the validity of the extents described by the @@ -2914,7 +3050,7 @@ xlog_recover_process_efi( extp->ext_len); } - set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); + efip->efi_flags |= XFS_EFI_RECOVERED; error = xfs_trans_commit(tp, 0); return error; @@ -2971,7 +3107,7 @@ xlog_recover_process_efis( * Skip EFIs that we've already processed. */ efip = (xfs_efi_log_item_t *)lip; - if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) { + if (efip->efi_flags & XFS_EFI_RECOVERED) { lip = xfs_trans_ail_cursor_next(ailp, &cur); continue; } @@ -3588,7 +3724,7 @@ xlog_do_log_recovery( xfs_daddr_t head_blk, xfs_daddr_t tail_blk) { - int error, i; + int error; ASSERT(head_blk != tail_blk); @@ -3596,12 +3732,10 @@ xlog_do_log_recovery( * First do a pass to find all of the cancelled buf log items. * Store them in the buf_cancel_table for use in the second pass. */ - log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * - sizeof(struct list_head), + log->l_buf_cancel_table = + (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE * + sizeof(xfs_buf_cancel_t*), KM_SLEEP); - for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) - INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); - error = xlog_do_recovery_pass(log, head_blk, tail_blk, XLOG_RECOVER_PASS1); if (error != 0) { @@ -3620,7 +3754,7 @@ xlog_do_log_recovery( int i; for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) - ASSERT(list_empty(&log->l_buf_cancel_table[i])); + ASSERT(log->l_buf_cancel_table[i] == NULL); } #endif /* DEBUG */ diff --git a/trunk/fs/xfs/xfs_mount.c b/trunk/fs/xfs/xfs_mount.c index d447aef84bc3..19e9dfa1c254 100644 --- a/trunk/fs/xfs/xfs_mount.c +++ b/trunk/fs/xfs/xfs_mount.c @@ -472,7 +472,7 @@ xfs_initialize_perag( goto out_unwind; pag->pag_agno = index; pag->pag_mount = mp; - spin_lock_init(&pag->pag_ici_lock); + rwlock_init(&pag->pag_ici_lock); mutex_init(&pag->pag_ici_reclaim_lock); INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); spin_lock_init(&pag->pag_buf_lock); @@ -974,24 +974,6 @@ xfs_set_rw_sizes(xfs_mount_t *mp) mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog); } -/* - * precalculate the low space thresholds for dynamic speculative preallocation. - */ -void -xfs_set_low_space_thresholds( - struct xfs_mount *mp) -{ - int i; - - for (i = 0; i < XFS_LOWSP_MAX; i++) { - __uint64_t space = mp->m_sb.sb_dblocks; - - do_div(space, 100); - mp->m_low_space[i] = space * (i + 1); - } -} - - /* * Set whether we're using inode alignment. */ @@ -1214,9 +1196,6 @@ xfs_mountfs( */ xfs_set_rw_sizes(mp); - /* set the low space thresholds for dynamic preallocation */ - xfs_set_low_space_thresholds(mp); - /* * Set the inode cluster size. * This may still be overridden by the file system diff --git a/trunk/fs/xfs/xfs_mount.h b/trunk/fs/xfs/xfs_mount.h index a62e8971539d..5861b4980740 100644 --- a/trunk/fs/xfs/xfs_mount.h +++ b/trunk/fs/xfs/xfs_mount.h @@ -103,16 +103,6 @@ extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t, xfs_mod_incore_sb(mp, field, delta, rsvd) #endif -/* dynamic preallocation free space thresholds, 5% down to 1% */ -enum { - XFS_LOWSP_1_PCNT = 0, - XFS_LOWSP_2_PCNT, - XFS_LOWSP_3_PCNT, - XFS_LOWSP_4_PCNT, - XFS_LOWSP_5_PCNT, - XFS_LOWSP_MAX, -}; - typedef struct xfs_mount { struct super_block *m_super; xfs_tid_t m_tid; /* next unused tid for fs */ @@ -212,8 +202,6 @@ typedef struct xfs_mount { __int64_t m_update_flags; /* sb flags we need to update on the next remount,rw */ struct shrinker m_inode_shrink; /* inode reclaim shrinker */ - int64_t m_low_space[XFS_LOWSP_MAX]; - /* low free space thresholds */ } xfs_mount_t; /* @@ -391,8 +379,6 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); extern int xfs_dev_is_read_only(struct xfs_mount *, char *); -extern void xfs_set_low_space_thresholds(struct xfs_mount *); - #endif /* __KERNEL__ */ extern void xfs_mod_sb(struct xfs_trans *, __int64_t); diff --git a/trunk/fs/xfs/xfs_trans.c b/trunk/fs/xfs/xfs_trans.c index f80a067a4658..f6d956b7711e 100644 --- a/trunk/fs/xfs/xfs_trans.c +++ b/trunk/fs/xfs/xfs_trans.c @@ -1350,7 +1350,7 @@ xfs_trans_fill_vecs( * they could be immediately flushed and we'd have to race with the flusher * trying to pull the item from the AIL as we add it. */ -static void +void xfs_trans_item_committed( struct xfs_log_item *lip, xfs_lsn_t commit_lsn, @@ -1425,83 +1425,6 @@ xfs_trans_committed( xfs_trans_free(tp); } -static inline void -xfs_log_item_batch_insert( - struct xfs_ail *ailp, - struct xfs_log_item **log_items, - int nr_items, - xfs_lsn_t commit_lsn) -{ - int i; - - spin_lock(&ailp->xa_lock); - /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ - xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn); - - for (i = 0; i < nr_items; i++) - IOP_UNPIN(log_items[i], 0); -} - -/* - * Bulk operation version of xfs_trans_committed that takes a log vector of - * items to insert into the AIL. This uses bulk AIL insertion techniques to - * minimise lock traffic. - */ -void -xfs_trans_committed_bulk( - struct xfs_ail *ailp, - struct xfs_log_vec *log_vector, - xfs_lsn_t commit_lsn, - int aborted) -{ -#define LOG_ITEM_BATCH_SIZE 32 - struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; - struct xfs_log_vec *lv; - int i = 0; - - /* unpin all the log items */ - for (lv = log_vector; lv; lv = lv->lv_next ) { - struct xfs_log_item *lip = lv->lv_item; - xfs_lsn_t item_lsn; - - if (aborted) - lip->li_flags |= XFS_LI_ABORTED; - item_lsn = IOP_COMMITTED(lip, commit_lsn); - - /* item_lsn of -1 means the item was freed */ - if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) - continue; - - if (item_lsn != commit_lsn) { - - /* - * Not a bulk update option due to unusual item_lsn. - * Push into AIL immediately, rechecking the lsn once - * we have the ail lock. Then unpin the item. - */ - spin_lock(&ailp->xa_lock); - if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) - xfs_trans_ail_update(ailp, lip, item_lsn); - else - spin_unlock(&ailp->xa_lock); - IOP_UNPIN(lip, 0); - continue; - } - - /* Item is a candidate for bulk AIL insert. */ - log_items[i++] = lv->lv_item; - if (i >= LOG_ITEM_BATCH_SIZE) { - xfs_log_item_batch_insert(ailp, log_items, - LOG_ITEM_BATCH_SIZE, commit_lsn); - i = 0; - } - } - - /* make sure we insert the remainder! */ - if (i) - xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn); -} - /* * Called from the trans_commit code when we notice that * the filesystem is in the middle of a forced shutdown. diff --git a/trunk/fs/xfs/xfs_trans.h b/trunk/fs/xfs/xfs_trans.h index c2042b736b81..246286b77a86 100644 --- a/trunk/fs/xfs/xfs_trans.h +++ b/trunk/fs/xfs/xfs_trans.h @@ -294,8 +294,8 @@ struct xfs_log_item_desc { #define XFS_ALLOC_BTREE_REF 2 #define XFS_BMAP_BTREE_REF 2 #define XFS_DIR_BTREE_REF 2 -#define XFS_INO_REF 2 #define XFS_ATTR_BTREE_REF 1 +#define XFS_INO_REF 1 #define XFS_DQUOT_REF 1 #ifdef __KERNEL__ diff --git a/trunk/fs/xfs/xfs_trans_ail.c b/trunk/fs/xfs/xfs_trans_ail.c index c5bbbc45db91..dc9069568ff7 100644 --- a/trunk/fs/xfs/xfs_trans_ail.c +++ b/trunk/fs/xfs/xfs_trans_ail.c @@ -28,8 +28,8 @@ #include "xfs_trans_priv.h" #include "xfs_error.h" -STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t); -STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); +STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *); +STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); @@ -449,153 +449,130 @@ xfs_trans_unlocked_item( xfs_log_move_tail(ailp->xa_mount, 1); } /* xfs_trans_unlocked_item */ + /* - * xfs_trans_ail_update - bulk AIL insertion operation. - * - * @xfs_trans_ail_update takes an array of log items that all need to be - * positioned at the same LSN in the AIL. If an item is not in the AIL, it will - * be added. Otherwise, it will be repositioned by removing it and re-adding - * it to the AIL. If we move the first item in the AIL, update the log tail to - * match the new minimum LSN in the AIL. + * Update the position of the item in the AIL with the new + * lsn. If it is not yet in the AIL, add it. Otherwise, move + * it to its new position by removing it and re-adding it. * - * This function takes the AIL lock once to execute the update operations on - * all the items in the array, and as such should not be called with the AIL - * lock held. As a result, once we have the AIL lock, we need to check each log - * item LSN to confirm it needs to be moved forward in the AIL. + * Wakeup anyone with an lsn less than the item's lsn. If the item + * we move in the AIL is the minimum one, update the tail lsn in the + * log manager. * - * To optimise the insert operation, we delete all the items from the AIL in - * the first pass, moving them into a temporary list, then splice the temporary - * list into the correct position in the AIL. This avoids needing to do an - * insert operation on every item. - * - * This function must be called with the AIL lock held. The lock is dropped - * before returning. + * This function must be called with the AIL lock held. The lock + * is dropped before returning. */ void -xfs_trans_ail_update_bulk( - struct xfs_ail *ailp, - struct xfs_log_item **log_items, - int nr_items, - xfs_lsn_t lsn) __releases(ailp->xa_lock) +xfs_trans_ail_update( + struct xfs_ail *ailp, + xfs_log_item_t *lip, + xfs_lsn_t lsn) __releases(ailp->xa_lock) { - xfs_log_item_t *mlip; + xfs_log_item_t *dlip = NULL; + xfs_log_item_t *mlip; /* ptr to minimum lip */ xfs_lsn_t tail_lsn; - int mlip_changed = 0; - int i; - LIST_HEAD(tmp); mlip = xfs_ail_min(ailp); - for (i = 0; i < nr_items; i++) { - struct xfs_log_item *lip = log_items[i]; - if (lip->li_flags & XFS_LI_IN_AIL) { - /* check if we really need to move the item */ - if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0) - continue; - - xfs_ail_delete(ailp, lip); - if (mlip == lip) - mlip_changed = 1; - } else { - lip->li_flags |= XFS_LI_IN_AIL; - } - lip->li_lsn = lsn; - list_add(&lip->li_ail, &tmp); + if (lip->li_flags & XFS_LI_IN_AIL) { + dlip = xfs_ail_delete(ailp, lip); + ASSERT(dlip == lip); + xfs_trans_ail_cursor_clear(ailp, dlip); + } else { + lip->li_flags |= XFS_LI_IN_AIL; } - xfs_ail_splice(ailp, &tmp, lsn); + lip->li_lsn = lsn; + xfs_ail_insert(ailp, lip); - if (!mlip_changed) { + if (mlip == dlip) { + mlip = xfs_ail_min(ailp); + /* + * It is not safe to access mlip after the AIL lock is + * dropped, so we must get a copy of li_lsn before we do + * so. This is especially important on 32-bit platforms + * where accessing and updating 64-bit values like li_lsn + * is not atomic. + */ + tail_lsn = mlip->li_lsn; + spin_unlock(&ailp->xa_lock); + xfs_log_move_tail(ailp->xa_mount, tail_lsn); + } else { spin_unlock(&ailp->xa_lock); - return; } - /* - * It is not safe to access mlip after the AIL lock is dropped, so we - * must get a copy of li_lsn before we do so. This is especially - * important on 32-bit platforms where accessing and updating 64-bit - * values like li_lsn is not atomic. - */ - mlip = xfs_ail_min(ailp); - tail_lsn = mlip->li_lsn; - spin_unlock(&ailp->xa_lock); - xfs_log_move_tail(ailp->xa_mount, tail_lsn); -} + +} /* xfs_trans_update_ail */ /* - * xfs_trans_ail_delete_bulk - remove multiple log items from the AIL + * Delete the given item from the AIL. It must already be in + * the AIL. * - * @xfs_trans_ail_delete_bulk takes an array of log items that all need to - * removed from the AIL. The caller is already holding the AIL lock, and done - * all the checks necessary to ensure the items passed in via @log_items are - * ready for deletion. This includes checking that the items are in the AIL. + * Wakeup anyone with an lsn less than item's lsn. If the item + * we delete in the AIL is the minimum one, update the tail lsn in the + * log manager. * - * For each log item to be removed, unlink it from the AIL, clear the IN_AIL - * flag from the item and reset the item's lsn to 0. If we remove the first - * item in the AIL, update the log tail to match the new minimum LSN in the - * AIL. + * Clear the IN_AIL flag from the item, reset its lsn to 0, and + * bump the AIL's generation count to indicate that the tree + * has changed. * - * This function will not drop the AIL lock until all items are removed from - * the AIL to minimise the amount of lock traffic on the AIL. This does not - * greatly increase the AIL hold time, but does significantly reduce the amount - * of traffic on the lock, especially during IO completion. - * - * This function must be called with the AIL lock held. The lock is dropped - * before returning. + * This function must be called with the AIL lock held. The lock + * is dropped before returning. */ void -xfs_trans_ail_delete_bulk( - struct xfs_ail *ailp, - struct xfs_log_item **log_items, - int nr_items) __releases(ailp->xa_lock) +xfs_trans_ail_delete( + struct xfs_ail *ailp, + xfs_log_item_t *lip) __releases(ailp->xa_lock) { + xfs_log_item_t *dlip; xfs_log_item_t *mlip; xfs_lsn_t tail_lsn; - int mlip_changed = 0; - int i; - mlip = xfs_ail_min(ailp); - - for (i = 0; i < nr_items; i++) { - struct xfs_log_item *lip = log_items[i]; - if (!(lip->li_flags & XFS_LI_IN_AIL)) { - struct xfs_mount *mp = ailp->xa_mount; + if (lip->li_flags & XFS_LI_IN_AIL) { + mlip = xfs_ail_min(ailp); + dlip = xfs_ail_delete(ailp, lip); + ASSERT(dlip == lip); + xfs_trans_ail_cursor_clear(ailp, dlip); - spin_unlock(&ailp->xa_lock); - if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, - "%s: attempting to delete a log item that is not in the AIL", - __func__); - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - } - return; - } - xfs_ail_delete(ailp, lip); lip->li_flags &= ~XFS_LI_IN_AIL; lip->li_lsn = 0; - if (mlip == lip) - mlip_changed = 1; + + if (mlip == dlip) { + mlip = xfs_ail_min(ailp); + /* + * It is not safe to access mlip after the AIL lock + * is dropped, so we must get a copy of li_lsn + * before we do so. This is especially important + * on 32-bit platforms where accessing and updating + * 64-bit values like li_lsn is not atomic. + */ + tail_lsn = mlip ? mlip->li_lsn : 0; + spin_unlock(&ailp->xa_lock); + xfs_log_move_tail(ailp->xa_mount, tail_lsn); + } else { + spin_unlock(&ailp->xa_lock); + } } + else { + /* + * If the file system is not being shutdown, we are in + * serious trouble if we get to this stage. + */ + struct xfs_mount *mp = ailp->xa_mount; - if (!mlip_changed) { spin_unlock(&ailp->xa_lock); - return; + if (!XFS_FORCED_SHUTDOWN(mp)) { + xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, + "%s: attempting to delete a log item that is not in the AIL", + __func__); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + } } - - /* - * It is not safe to access mlip after the AIL lock is dropped, so we - * must get a copy of li_lsn before we do so. This is especially - * important on 32-bit platforms where accessing and updating 64-bit - * values like li_lsn is not atomic. It is possible we've emptied the - * AIL here, so if that is the case, pass an LSN of 0 to the tail move. - */ - mlip = xfs_ail_min(ailp); - tail_lsn = mlip ? mlip->li_lsn : 0; - spin_unlock(&ailp->xa_lock); - xfs_log_move_tail(ailp->xa_mount, tail_lsn); } + + /* * The active item list (AIL) is a doubly linked list of log * items sorted by ascending lsn. The base of the list is @@ -646,13 +623,16 @@ xfs_trans_ail_destroy( } /* - * splice the log item list into the AIL at the given LSN. + * Insert the given log item into the AIL. + * We almost always insert at the end of the list, so on inserts + * we search from the end of the list to find where the + * new item belongs. */ STATIC void -xfs_ail_splice( +xfs_ail_insert( struct xfs_ail *ailp, - struct list_head *list, - xfs_lsn_t lsn) + xfs_log_item_t *lip) +/* ARGSUSED */ { xfs_log_item_t *next_lip; @@ -660,33 +640,39 @@ xfs_ail_splice( * If the list is empty, just insert the item. */ if (list_empty(&ailp->xa_ail)) { - list_splice(list, &ailp->xa_ail); + list_add(&lip->li_ail, &ailp->xa_ail); return; } list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { - if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) + if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0) break; } ASSERT((&next_lip->li_ail == &ailp->xa_ail) || - (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)); + (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)); + + list_add(&lip->li_ail, &next_lip->li_ail); - list_splice_init(list, &next_lip->li_ail); + xfs_ail_check(ailp, lip); return; } /* * Delete the given item from the AIL. Return a pointer to the item. */ -STATIC void +/*ARGSUSED*/ +STATIC xfs_log_item_t * xfs_ail_delete( struct xfs_ail *ailp, xfs_log_item_t *lip) +/* ARGSUSED */ { xfs_ail_check(ailp, lip); + list_del(&lip->li_ail); - xfs_trans_ail_cursor_clear(ailp, lip); + + return lip; } /* @@ -696,6 +682,7 @@ xfs_ail_delete( STATIC xfs_log_item_t * xfs_ail_min( struct xfs_ail *ailp) +/* ARGSUSED */ { if (list_empty(&ailp->xa_ail)) return NULL; @@ -712,6 +699,7 @@ STATIC xfs_log_item_t * xfs_ail_next( struct xfs_ail *ailp, xfs_log_item_t *lip) +/* ARGSUSED */ { if (lip->li_ail.next == &ailp->xa_ail) return NULL; diff --git a/trunk/fs/xfs/xfs_trans_extfree.c b/trunk/fs/xfs/xfs_trans_extfree.c index f7590f5badea..f783d5e9fa70 100644 --- a/trunk/fs/xfs/xfs_trans_extfree.c +++ b/trunk/fs/xfs/xfs_trans_extfree.c @@ -69,16 +69,12 @@ xfs_trans_log_efi_extent(xfs_trans_t *tp, tp->t_flags |= XFS_TRANS_DIRTY; efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY; - /* - * atomic_inc_return gives us the value after the increment; - * we want to use it as an array index so we need to subtract 1 from - * it. - */ - next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; + next_extent = efip->efi_next_extent; ASSERT(next_extent < efip->efi_format.efi_nextents); extp = &(efip->efi_format.efi_extents[next_extent]); extp->ext_start = start_block; extp->ext_len = ext_len; + efip->efi_next_extent++; } diff --git a/trunk/fs/xfs/xfs_trans_priv.h b/trunk/fs/xfs/xfs_trans_priv.h index 35162c238fa3..62da86c90de5 100644 --- a/trunk/fs/xfs/xfs_trans_priv.h +++ b/trunk/fs/xfs/xfs_trans_priv.h @@ -22,17 +22,15 @@ struct xfs_log_item; struct xfs_log_item_desc; struct xfs_mount; struct xfs_trans; -struct xfs_ail; -struct xfs_log_vec; void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); void xfs_trans_del_item(struct xfs_log_item *); void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, int flags); +void xfs_trans_item_committed(struct xfs_log_item *lip, + xfs_lsn_t commit_lsn, int aborted); void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); -void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, - xfs_lsn_t commit_lsn, int aborted); /* * AIL traversal cursor. * @@ -75,29 +73,12 @@ struct xfs_ail { /* * From xfs_trans_ail.c */ -void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, - struct xfs_log_item **log_items, int nr_items, - xfs_lsn_t lsn) __releases(ailp->xa_lock); -static inline void -xfs_trans_ail_update( - struct xfs_ail *ailp, - struct xfs_log_item *lip, - xfs_lsn_t lsn) __releases(ailp->xa_lock) -{ - xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn); -} - -void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, - struct xfs_log_item **log_items, int nr_items) - __releases(ailp->xa_lock); -static inline void -xfs_trans_ail_delete( - struct xfs_ail *ailp, - xfs_log_item_t *lip) __releases(ailp->xa_lock) -{ - xfs_trans_ail_delete_bulk(ailp, &lip, 1); -} - +void xfs_trans_ail_update(struct xfs_ail *ailp, + struct xfs_log_item *lip, xfs_lsn_t lsn) + __releases(ailp->xa_lock); +void xfs_trans_ail_delete(struct xfs_ail *ailp, + struct xfs_log_item *lip) + __releases(ailp->xa_lock); void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); void xfs_trans_unlocked_item(struct xfs_ail *, xfs_log_item_t *); diff --git a/trunk/fs/xfs/xfs_vnodeops.c b/trunk/fs/xfs/xfs_vnodeops.c index d8e6f8cd6f0c..8e4a63c4151a 100644 --- a/trunk/fs/xfs/xfs_vnodeops.c +++ b/trunk/fs/xfs/xfs_vnodeops.c @@ -964,48 +964,29 @@ xfs_release( xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); } - if (ip->i_d.di_nlink == 0) - return 0; - - if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && - ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || - ip->i_delayed_blks > 0)) && - (ip->i_df.if_flags & XFS_IFEXTENTS)) && - (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { - - /* - * If we can't get the iolock just skip truncating the blocks - * past EOF because we could deadlock with the mmap_sem - * otherwise. We'll get another chance to drop them once the - * last reference to the inode is dropped, so we'll never leak - * blocks permanently. - * - * Further, check if the inode is being opened, written and - * closed frequently and we have delayed allocation blocks - * oustanding (e.g. streaming writes from the NFS server), - * truncating the blocks past EOF will cause fragmentation to - * occur. - * - * In this case don't do the truncation, either, but we have to - * be careful how we detect this case. Blocks beyond EOF show - * up as i_delayed_blks even when the inode is clean, so we - * need to truncate them away first before checking for a dirty - * release. Hence on the first dirty close we will still remove - * the speculative allocation, but after that we will leave it - * in place. - */ - if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) - return 0; - - error = xfs_free_eofblocks(mp, ip, - XFS_FREE_EOF_TRYLOCK); - if (error) - return error; + if (ip->i_d.di_nlink != 0) { + if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && + ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || + ip->i_delayed_blks > 0)) && + (ip->i_df.if_flags & XFS_IFEXTENTS)) && + (!(ip->i_d.di_flags & + (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { - /* delalloc blocks after truncation means it really is dirty */ - if (ip->i_delayed_blks) - xfs_iflags_set(ip, XFS_IDIRTY_RELEASE); + /* + * If we can't get the iolock just skip truncating + * the blocks past EOF because we could deadlock + * with the mmap_sem otherwise. We'll get another + * chance to drop them once the last reference to + * the inode is dropped, so we'll never leak blocks + * permanently. + */ + error = xfs_free_eofblocks(mp, ip, + XFS_FREE_EOF_TRYLOCK); + if (error) + return error; + } } + return 0; } diff --git a/trunk/include/linux/dynamic_debug.h b/trunk/include/linux/dynamic_debug.h index 1c70028f81f9..a90b3892074a 100644 --- a/trunk/include/linux/dynamic_debug.h +++ b/trunk/include/linux/dynamic_debug.h @@ -44,24 +44,34 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, extern int ddebug_remove_module(const char *mod_name); #define dynamic_pr_debug(fmt, ...) do { \ + __label__ do_printk; \ + __label__ out; \ static struct _ddebug descriptor \ __used \ __attribute__((section("__verbose"), aligned(8))) = \ { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ _DPRINTK_FLAGS_DEFAULT }; \ - if (unlikely(descriptor.enabled)) \ - printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ + JUMP_LABEL(&descriptor.enabled, do_printk); \ + goto out; \ +do_printk: \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ +out: ; \ } while (0) #define dynamic_dev_dbg(dev, fmt, ...) do { \ + __label__ do_printk; \ + __label__ out; \ static struct _ddebug descriptor \ __used \ __attribute__((section("__verbose"), aligned(8))) = \ { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ _DPRINTK_FLAGS_DEFAULT }; \ - if (unlikely(descriptor.enabled)) \ - dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ + JUMP_LABEL(&descriptor.enabled, do_printk); \ + goto out; \ +do_printk: \ + dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ +out: ; \ } while (0) #else diff --git a/trunk/include/linux/mfd/tmio.h b/trunk/include/linux/mfd/tmio.h index 8e70310ee945..085f041197dc 100644 --- a/trunk/include/linux/mfd/tmio.h +++ b/trunk/include/linux/mfd/tmio.h @@ -57,10 +57,6 @@ * is configured in 4-bit mode. */ #define TMIO_MMC_BLKSZ_2BYTES (1 << 1) -/* - * Some controllers can support SDIO IRQ signalling. - */ -#define TMIO_MMC_SDIO_IRQ (1 << 2) int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); @@ -70,7 +66,6 @@ void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state); struct tmio_mmc_dma { void *chan_priv_tx; void *chan_priv_rx; - int alignment_shift; }; /* diff --git a/trunk/include/linux/mmc/dw_mmc.h b/trunk/include/linux/mmc/dw_mmc.h deleted file mode 100644 index 16b0261763ed..000000000000 --- a/trunk/include/linux/mmc/dw_mmc.h +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Synopsys DesignWare Multimedia Card Interface driver - * (Based on NXP driver for lpc 31xx) - * - * Copyright (C) 2009 NXP Semiconductors - * Copyright (C) 2009, 2010 Imagination Technologies Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef _LINUX_MMC_DW_MMC_H_ -#define _LINUX_MMC_DW_MMC_H_ - -#define MAX_MCI_SLOTS 2 - -enum dw_mci_state { - STATE_IDLE = 0, - STATE_SENDING_CMD, - STATE_SENDING_DATA, - STATE_DATA_BUSY, - STATE_SENDING_STOP, - STATE_DATA_ERROR, -}; - -enum { - EVENT_CMD_COMPLETE = 0, - EVENT_XFER_COMPLETE, - EVENT_DATA_COMPLETE, - EVENT_DATA_ERROR, - EVENT_XFER_ERROR -}; - -struct mmc_data; - -/** - * struct dw_mci - MMC controller state shared between all slots - * @lock: Spinlock protecting the queue and associated data. - * @regs: Pointer to MMIO registers. - * @sg: Scatterlist entry currently being processed by PIO code, if any. - * @pio_offset: Offset into the current scatterlist entry. - * @cur_slot: The slot which is currently using the controller. - * @mrq: The request currently being processed on @cur_slot, - * or NULL if the controller is idle. - * @cmd: The command currently being sent to the card, or NULL. - * @data: The data currently being transferred, or NULL if no data - * transfer is in progress. - * @use_dma: Whether DMA channel is initialized or not. - * @sg_dma: Bus address of DMA buffer. - * @sg_cpu: Virtual address of DMA buffer. - * @dma_ops: Pointer to platform-specific DMA callbacks. - * @cmd_status: Snapshot of SR taken upon completion of the current - * command. Only valid when EVENT_CMD_COMPLETE is pending. - * @data_status: Snapshot of SR taken upon completion of the current - * data transfer. Only valid when EVENT_DATA_COMPLETE or - * EVENT_DATA_ERROR is pending. - * @stop_cmdr: Value to be loaded into CMDR when the stop command is - * to be sent. - * @dir_status: Direction of current transfer. - * @tasklet: Tasklet running the request state machine. - * @card_tasklet: Tasklet handling card detect. - * @pending_events: Bitmask of events flagged by the interrupt handler - * to be processed by the tasklet. - * @completed_events: Bitmask of events which the state machine has - * processed. - * @state: Tasklet state. - * @queue: List of slots waiting for access to the controller. - * @bus_hz: The rate of @mck in Hz. This forms the basis for MMC bus - * rate and timeout calculations. - * @current_speed: Configured rate of the controller. - * @num_slots: Number of slots available. - * @pdev: Platform device associated with the MMC controller. - * @pdata: Platform data associated with the MMC controller. - * @slot: Slots sharing this MMC controller. - * @data_shift: log2 of FIFO item size. - * @push_data: Pointer to FIFO push function. - * @pull_data: Pointer to FIFO pull function. - * @quirks: Set of quirks that apply to specific versions of the IP. - * - * Locking - * ======= - * - * @lock is a softirq-safe spinlock protecting @queue as well as - * @cur_slot, @mrq and @state. These must always be updated - * at the same time while holding @lock. - * - * The @mrq field of struct dw_mci_slot is also protected by @lock, - * and must always be written at the same time as the slot is added to - * @queue. - * - * @pending_events and @completed_events are accessed using atomic bit - * operations, so they don't need any locking. - * - * None of the fields touched by the interrupt handler need any - * locking. However, ordering is important: Before EVENT_DATA_ERROR or - * EVENT_DATA_COMPLETE is set in @pending_events, all data-related - * interrupts must be disabled and @data_status updated with a - * snapshot of SR. Similarly, before EVENT_CMD_COMPLETE is set, the - * CMDRDY interupt must be disabled and @cmd_status updated with a - * snapshot of SR, and before EVENT_XFER_COMPLETE can be set, the - * bytes_xfered field of @data must be written. This is ensured by - * using barriers. - */ -struct dw_mci { - spinlock_t lock; - void __iomem *regs; - - struct scatterlist *sg; - unsigned int pio_offset; - - struct dw_mci_slot *cur_slot; - struct mmc_request *mrq; - struct mmc_command *cmd; - struct mmc_data *data; - - /* DMA interface members*/ - int use_dma; - - dma_addr_t sg_dma; - void *sg_cpu; - struct dw_mci_dma_ops *dma_ops; -#ifdef CONFIG_MMC_DW_IDMAC - unsigned int ring_size; -#else - struct dw_mci_dma_data *dma_data; -#endif - u32 cmd_status; - u32 data_status; - u32 stop_cmdr; - u32 dir_status; - struct tasklet_struct tasklet; - struct tasklet_struct card_tasklet; - unsigned long pending_events; - unsigned long completed_events; - enum dw_mci_state state; - struct list_head queue; - - u32 bus_hz; - u32 current_speed; - u32 num_slots; - struct platform_device *pdev; - struct dw_mci_board *pdata; - struct dw_mci_slot *slot[MAX_MCI_SLOTS]; - - /* FIFO push and pull */ - int data_shift; - void (*push_data)(struct dw_mci *host, void *buf, int cnt); - void (*pull_data)(struct dw_mci *host, void *buf, int cnt); - - /* Workaround flags */ - u32 quirks; -}; - -/* DMA ops for Internal/External DMAC interface */ -struct dw_mci_dma_ops { - /* DMA Ops */ - int (*init)(struct dw_mci *host); - void (*start)(struct dw_mci *host, unsigned int sg_len); - void (*complete)(struct dw_mci *host); - void (*stop)(struct dw_mci *host); - void (*cleanup)(struct dw_mci *host); - void (*exit)(struct dw_mci *host); -}; - -/* IP Quirks/flags. */ -/* No special quirks or flags to cater for */ -#define DW_MCI_QUIRK_NONE 0 -/* DTO fix for command transmission with IDMAC configured */ -#define DW_MCI_QUIRK_IDMAC_DTO 1 -/* delay needed between retries on some 2.11a implementations */ -#define DW_MCI_QUIRK_RETRY_DELAY 2 -/* High Speed Capable - Supports HS cards (upto 50MHz) */ -#define DW_MCI_QUIRK_HIGHSPEED 4 - - -struct dma_pdata; - -struct block_settings { - unsigned short max_segs; /* see blk_queue_max_segments */ - unsigned int max_blk_size; /* maximum size of one mmc block */ - unsigned int max_blk_count; /* maximum number of blocks in one req*/ - unsigned int max_req_size; /* maximum number of bytes in one req*/ - unsigned int max_seg_size; /* see blk_queue_max_segment_size */ -}; - -/* Board platform data */ -struct dw_mci_board { - u32 num_slots; - - u32 quirks; /* Workaround / Quirk flags */ - unsigned int bus_hz; /* Bus speed */ - - /* delay in mS before detecting cards after interrupt */ - u32 detect_delay_ms; - - int (*init)(u32 slot_id, irq_handler_t , void *); - int (*get_ro)(u32 slot_id); - int (*get_cd)(u32 slot_id); - int (*get_ocr)(u32 slot_id); - int (*get_bus_wd)(u32 slot_id); - /* - * Enable power to selected slot and set voltage to desired level. - * Voltage levels are specified using MMC_VDD_xxx defines defined - * in linux/mmc/host.h file. - */ - void (*setpower)(u32 slot_id, u32 volt); - void (*exit)(u32 slot_id); - void (*select_slot)(u32 slot_id); - - struct dw_mci_dma_ops *dma_ops; - struct dma_pdata *data; - struct block_settings *blk_settings; -}; - -#endif /* _LINUX_MMC_DW_MMC_H_ */ diff --git a/trunk/include/linux/mmc/host.h b/trunk/include/linux/mmc/host.h index bcb793ec7374..30f6fad99a58 100644 --- a/trunk/include/linux/mmc/host.h +++ b/trunk/include/linux/mmc/host.h @@ -131,9 +131,6 @@ struct mmc_host { unsigned int f_max; unsigned int f_init; u32 ocr_avail; - u32 ocr_avail_sdio; /* SDIO-specific OCR */ - u32 ocr_avail_sd; /* SD-specific OCR */ - u32 ocr_avail_mmc; /* MMC-specific OCR */ struct notifier_block pm_notify; #define MMC_VDD_165_195 0x00000080 /* VDD voltage 1.65 - 1.95 */ @@ -172,20 +169,9 @@ struct mmc_host { #define MMC_CAP_1_2V_DDR (1 << 12) /* can support */ /* DDR mode at 1.2V */ #define MMC_CAP_POWER_OFF_CARD (1 << 13) /* Can power off after boot */ -#define MMC_CAP_BUS_WIDTH_TEST (1 << 14) /* CMD14/CMD19 bus width ok */ mmc_pm_flag_t pm_caps; /* supported pm features */ -#ifdef CONFIG_MMC_CLKGATE - int clk_requests; /* internal reference counter */ - unsigned int clk_delay; /* number of MCI clk hold cycles */ - bool clk_gated; /* clock gated */ - struct work_struct clk_gate_work; /* delayed clock gate */ - unsigned int clk_old; /* old clock value cache */ - spinlock_t clk_lock; /* lock for clk fields */ - struct mutex clk_gate_mutex; /* mutex for clock gating */ -#endif - /* host specific block data */ unsigned int max_seg_size; /* see blk_queue_max_segment_size */ unsigned short max_segs; /* see blk_queue_max_segments */ @@ -321,10 +307,5 @@ static inline int mmc_card_is_removable(struct mmc_host *host) return !(host->caps & MMC_CAP_NONREMOVABLE) && mmc_assume_removable; } -static inline int mmc_card_is_powered_resumed(struct mmc_host *host) -{ - return host->pm_flags & MMC_PM_KEEP_POWER; -} - #endif diff --git a/trunk/include/linux/mmc/mmc.h b/trunk/include/linux/mmc/mmc.h index 612301f85d14..956fbd877692 100644 --- a/trunk/include/linux/mmc/mmc.h +++ b/trunk/include/linux/mmc/mmc.h @@ -40,9 +40,7 @@ #define MMC_READ_DAT_UNTIL_STOP 11 /* adtc [31:0] dadr R1 */ #define MMC_STOP_TRANSMISSION 12 /* ac R1b */ #define MMC_SEND_STATUS 13 /* ac [31:16] RCA R1 */ -#define MMC_BUS_TEST_R 14 /* adtc R1 */ #define MMC_GO_INACTIVE_STATE 15 /* ac [31:16] RCA */ -#define MMC_BUS_TEST_W 19 /* adtc R1 */ #define MMC_SPI_READ_OCR 58 /* spi spi_R3 */ #define MMC_SPI_CRC_ON_OFF 59 /* spi [0:0] flag spi_R1 */ diff --git a/trunk/include/linux/mmc/sdhci.h b/trunk/include/linux/mmc/sdhci.h index 83bd9f76709a..1fdc673f2396 100644 --- a/trunk/include/linux/mmc/sdhci.h +++ b/trunk/include/linux/mmc/sdhci.h @@ -83,8 +83,6 @@ struct sdhci_host { #define SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 (1<<28) /* Controller doesn't have HISPD bit field in HI-SPEED SD card */ #define SDHCI_QUIRK_NO_HISPD_BIT (1<<29) -/* Controller treats ADMA descriptors with length 0000h incorrectly */ -#define SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC (1<<30) int irq; /* Device IRQ */ void __iomem *ioaddr; /* Mapped address */ @@ -141,10 +139,6 @@ struct sdhci_host { unsigned int caps; /* Alternative capabilities */ - unsigned int ocr_avail_sdio; /* OCR bit masks */ - unsigned int ocr_avail_sd; - unsigned int ocr_avail_mmc; - unsigned long private[0] ____cacheline_aligned; }; #endif /* __SDHCI_H */ diff --git a/trunk/include/linux/pci_ids.h b/trunk/include/linux/pci_ids.h index ab47732d81e0..cb845c16ad7d 100644 --- a/trunk/include/linux/pci_ids.h +++ b/trunk/include/linux/pci_ids.h @@ -518,7 +518,6 @@ #define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 #define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603 -#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 #define PCI_DEVICE_ID_AMD_SCSI 0x2020 @@ -1651,11 +1650,6 @@ #define PCI_DEVICE_ID_O2_6836 0x6836 #define PCI_DEVICE_ID_O2_6812 0x6872 #define PCI_DEVICE_ID_O2_6933 0x6933 -#define PCI_DEVICE_ID_O2_8120 0x8120 -#define PCI_DEVICE_ID_O2_8220 0x8220 -#define PCI_DEVICE_ID_O2_8221 0x8221 -#define PCI_DEVICE_ID_O2_8320 0x8320 -#define PCI_DEVICE_ID_O2_8321 0x8321 #define PCI_VENDOR_ID_3DFX 0x121a #define PCI_DEVICE_ID_3DFX_VOODOO 0x0001 @@ -2369,8 +2363,6 @@ #define PCI_DEVICE_ID_JMICRON_JMB38X_SD 0x2381 #define PCI_DEVICE_ID_JMICRON_JMB38X_MMC 0x2382 #define PCI_DEVICE_ID_JMICRON_JMB38X_MS 0x2383 -#define PCI_DEVICE_ID_JMICRON_JMB388_SD 0x2391 -#define PCI_DEVICE_ID_JMICRON_JMB388_ESD 0x2392 #define PCI_VENDOR_ID_KORENIX 0x1982 #define PCI_DEVICE_ID_KORENIX_JETCARDF0 0x1600 diff --git a/trunk/include/linux/rtc.h b/trunk/include/linux/rtc.h index 3c995b4d742c..14dbc83ded20 100644 --- a/trunk/include/linux/rtc.h +++ b/trunk/include/linux/rtc.h @@ -107,17 +107,12 @@ extern int rtc_year_days(unsigned int day, unsigned int month, unsigned int year extern int rtc_valid_tm(struct rtc_time *tm); extern int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time); extern void rtc_time_to_tm(unsigned long time, struct rtc_time *tm); -ktime_t rtc_tm_to_ktime(struct rtc_time tm); -struct rtc_time rtc_ktime_to_tm(ktime_t kt); - #include #include #include #include #include -#include -#include extern struct class *rtc_class; @@ -156,19 +151,7 @@ struct rtc_class_ops { }; #define RTC_DEVICE_NAME_SIZE 20 -typedef struct rtc_task { - void (*func)(void *private_data); - void *private_data; -} rtc_task_t; - - -struct rtc_timer { - struct rtc_task task; - struct timerqueue_node node; - ktime_t period; - int enabled; -}; - +struct rtc_task; /* flags */ #define RTC_DEV_BUSY 0 @@ -196,13 +179,16 @@ struct rtc_device spinlock_t irq_task_lock; int irq_freq; int max_user_freq; - - struct timerqueue_head timerqueue; - struct rtc_timer aie_timer; - struct rtc_timer uie_rtctimer; - struct hrtimer pie_timer; /* sub second exp, so needs hrtimer */ - int pie_enabled; - struct work_struct irqwork; +#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL + struct work_struct uie_task; + struct timer_list uie_timer; + /* Those fields are protected by rtc->irq_lock */ + unsigned int oldsecs; + unsigned int uie_irq_active:1; + unsigned int stop_uie_polling:1; + unsigned int uie_task_active:1; + unsigned int uie_timer_active:1; +#endif }; #define to_rtc_device(d) container_of(d, struct rtc_device, dev) @@ -238,22 +224,15 @@ extern int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled); extern int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc, unsigned int enabled); -void rtc_aie_update_irq(void *private); -void rtc_uie_update_irq(void *private); -enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer); +typedef struct rtc_task { + void (*func)(void *private_data); + void *private_data; +} rtc_task_t; int rtc_register(rtc_task_t *task); int rtc_unregister(rtc_task_t *task); int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg); -void rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer); -void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer); -void rtc_timer_init(struct rtc_timer *timer, void (*f)(void* p), void* data); -int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer* timer, - ktime_t expires, ktime_t period); -int rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer* timer); -void rtc_timer_do_work(struct work_struct *work); - static inline bool is_leap_year(unsigned int year) { return (!(year % 4) && (year % 100)) || !(year % 400); diff --git a/trunk/include/linux/tracepoint.h b/trunk/include/linux/tracepoint.h index c6814616653b..d3e4f87e95c0 100644 --- a/trunk/include/linux/tracepoint.h +++ b/trunk/include/linux/tracepoint.h @@ -32,7 +32,7 @@ struct tracepoint { int state; /* State. */ void (*regfunc)(void); void (*unregfunc)(void); - struct tracepoint_func __rcu *funcs; + struct tracepoint_func *funcs; } __attribute__((aligned(32))); /* * Aligned on 32 bytes because it is * globally visible and gcc happily @@ -326,7 +326,7 @@ do_trace: \ * memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); * __entry->next_pid = next->pid; * __entry->next_prio = next->prio; - * ), + * ) * * * * * Formatted output of a trace record via TP_printk(). diff --git a/trunk/include/trace/define_trace.h b/trunk/include/trace/define_trace.h index da39b22636f7..b0b4eb24d592 100644 --- a/trunk/include/trace/define_trace.h +++ b/trunk/include/trace/define_trace.h @@ -21,16 +21,6 @@ #undef CREATE_TRACE_POINTS #include -/* - * module.h includes tracepoints, and because ftrace.h - * pulls in module.h: - * trace/ftrace.h -> linux/ftrace_event.h -> linux/perf_event.h -> - * linux/ftrace.h -> linux/module.h - * we must include module.h here before we play with any of - * the TRACE_EVENT() macros, otherwise the tracepoints included - * by module.h may break the build. - */ -#include #undef TRACE_EVENT #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ diff --git a/trunk/include/trace/events/skb.h b/trunk/include/trace/events/skb.h index f10293c41b1e..75ce9d500d8e 100644 --- a/trunk/include/trace/events/skb.h +++ b/trunk/include/trace/events/skb.h @@ -25,7 +25,9 @@ TRACE_EVENT(kfree_skb, TP_fast_assign( __entry->skbaddr = skb; - __entry->protocol = ntohs(skb->protocol); + if (skb) { + __entry->protocol = ntohs(skb->protocol); + } __entry->location = location; ), diff --git a/trunk/kernel/Makefile b/trunk/kernel/Makefile index 5669f71dfdd5..33e0a39cf359 100644 --- a/trunk/kernel/Makefile +++ b/trunk/kernel/Makefile @@ -100,7 +100,6 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ -obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o diff --git a/trunk/kernel/exit.c b/trunk/kernel/exit.c index f9a45ebcc7b1..89c74861a3da 100644 --- a/trunk/kernel/exit.c +++ b/trunk/kernel/exit.c @@ -994,15 +994,6 @@ NORET_TYPE void do_exit(long code) exit_fs(tsk); check_stack_usage(); exit_thread(); - - /* - * Flush inherited counters to the parent - before the parent - * gets woken up by child-exit notifications. - * - * because of cgroup mode, must be called before cgroup_exit() - */ - perf_event_exit_task(tsk); - cgroup_exit(tsk, 1); if (group_dead) @@ -1016,6 +1007,11 @@ NORET_TYPE void do_exit(long code) * FIXME: do that only when needed, using sched_exit tracepoint */ flush_ptrace_hw_breakpoint(tsk); + /* + * Flush inherited counters to the parent - before the parent + * gets woken up by child-exit notifications. + */ + perf_event_exit_task(tsk); exit_notify(tsk, group_dead); #ifdef CONFIG_NUMA diff --git a/trunk/kernel/perf_event.c b/trunk/kernel/perf_event.c index b782b7a79f00..11847bf1e8cc 100644 --- a/trunk/kernel/perf_event.c +++ b/trunk/kernel/perf_event.c @@ -38,12 +38,6 @@ #include -enum event_type_t { - EVENT_FLEXIBLE = 0x1, - EVENT_PINNED = 0x2, - EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, -}; - atomic_t perf_task_events __read_mostly; static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; @@ -71,12 +65,6 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000; static atomic64_t perf_event_id; -static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, - enum event_type_t event_type); - -static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, - enum event_type_t event_type); - void __weak perf_event_print_debug(void) { } extern __weak const char *perf_pmu_name(void) @@ -84,11 +72,6 @@ extern __weak const char *perf_pmu_name(void) return "pmu"; } -static inline u64 perf_clock(void) -{ - return local_clock(); -} - void perf_pmu_disable(struct pmu *pmu) { int *count = this_cpu_ptr(pmu->pmu_disable_count); @@ -257,6 +240,11 @@ static void perf_unpin_context(struct perf_event_context *ctx) put_ctx(ctx); } +static inline u64 perf_clock(void) +{ + return local_clock(); +} + /* * Update the record of the current time in a context. */ @@ -268,12 +256,6 @@ static void update_context_time(struct perf_event_context *ctx) ctx->timestamp = now; } -static u64 perf_event_time(struct perf_event *event) -{ - struct perf_event_context *ctx = event->ctx; - return ctx ? ctx->time : 0; -} - /* * Update the total_time_enabled and total_time_running fields for a event. */ @@ -287,7 +269,7 @@ static void update_event_times(struct perf_event *event) return; if (ctx->is_active) - run_end = perf_event_time(event); + run_end = ctx->time; else run_end = event->tstamp_stopped; @@ -296,7 +278,7 @@ static void update_event_times(struct perf_event *event) if (event->state == PERF_EVENT_STATE_INACTIVE) run_end = event->tstamp_stopped; else - run_end = perf_event_time(event); + run_end = ctx->time; event->total_time_running = run_end - event->tstamp_running; } @@ -552,7 +534,6 @@ event_sched_out(struct perf_event *event, struct perf_cpu_context *cpuctx, struct perf_event_context *ctx) { - u64 tstamp = perf_event_time(event); u64 delta; /* * An event which could not be activated because of @@ -564,7 +545,7 @@ event_sched_out(struct perf_event *event, && !event_filter_match(event)) { delta = ctx->time - event->tstamp_stopped; event->tstamp_running += delta; - event->tstamp_stopped = tstamp; + event->tstamp_stopped = ctx->time; } if (event->state != PERF_EVENT_STATE_ACTIVE) @@ -575,7 +556,7 @@ event_sched_out(struct perf_event *event, event->pending_disable = 0; event->state = PERF_EVENT_STATE_OFF; } - event->tstamp_stopped = tstamp; + event->tstamp_stopped = ctx->time; event->pmu->del(event, 0); event->oncpu = -1; @@ -787,8 +768,6 @@ event_sched_in(struct perf_event *event, struct perf_cpu_context *cpuctx, struct perf_event_context *ctx) { - u64 tstamp = perf_event_time(event); - if (event->state <= PERF_EVENT_STATE_OFF) return 0; @@ -805,9 +784,9 @@ event_sched_in(struct perf_event *event, return -EAGAIN; } - event->tstamp_running += tstamp - event->tstamp_stopped; + event->tstamp_running += ctx->time - event->tstamp_stopped; - event->shadow_ctx_time = tstamp - ctx->timestamp; + event->shadow_ctx_time = ctx->time - ctx->timestamp; if (!is_software_event(event)) cpuctx->active_oncpu++; @@ -919,13 +898,11 @@ static int group_can_go_on(struct perf_event *event, static void add_event_to_ctx(struct perf_event *event, struct perf_event_context *ctx) { - u64 tstamp = perf_event_time(event); - list_add_event(event, ctx); perf_group_attach(event); - event->tstamp_enabled = tstamp; - event->tstamp_running = tstamp; - event->tstamp_stopped = tstamp; + event->tstamp_enabled = ctx->time; + event->tstamp_running = ctx->time; + event->tstamp_stopped = ctx->time; } /* @@ -960,7 +937,7 @@ static void __perf_install_in_context(void *info) add_event_to_ctx(event, ctx); - if (!event_filter_match(event)) + if (event->cpu != -1 && event->cpu != smp_processor_id()) goto unlock; /* @@ -1065,13 +1042,14 @@ static void __perf_event_mark_enabled(struct perf_event *event, struct perf_event_context *ctx) { struct perf_event *sub; - u64 tstamp = perf_event_time(event); event->state = PERF_EVENT_STATE_INACTIVE; - event->tstamp_enabled = tstamp - event->total_time_enabled; + event->tstamp_enabled = ctx->time - event->total_time_enabled; list_for_each_entry(sub, &event->sibling_list, group_entry) { - if (sub->state >= PERF_EVENT_STATE_INACTIVE) - sub->tstamp_enabled = tstamp - sub->total_time_enabled; + if (sub->state >= PERF_EVENT_STATE_INACTIVE) { + sub->tstamp_enabled = + ctx->time - sub->total_time_enabled; + } } } @@ -1104,7 +1082,7 @@ static void __perf_event_enable(void *info) goto unlock; __perf_event_mark_enabled(event, ctx); - if (!event_filter_match(event)) + if (event->cpu != -1 && event->cpu != smp_processor_id()) goto unlock; /* @@ -1215,6 +1193,12 @@ static int perf_event_refresh(struct perf_event *event, int refresh) return 0; } +enum event_type_t { + EVENT_FLEXIBLE = 0x1, + EVENT_PINNED = 0x2, + EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, +}; + static void ctx_sched_out(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, enum event_type_t event_type) @@ -1451,7 +1435,7 @@ ctx_pinned_sched_in(struct perf_event_context *ctx, list_for_each_entry(event, &ctx->pinned_groups, group_entry) { if (event->state <= PERF_EVENT_STATE_OFF) continue; - if (!event_filter_match(event)) + if (event->cpu != -1 && event->cpu != smp_processor_id()) continue; if (group_can_go_on(event, cpuctx, 1)) @@ -1483,7 +1467,7 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, * Listen to the 'cpu' scheduling filter constraint * of events: */ - if (!event_filter_match(event)) + if (event->cpu != -1 && event->cpu != smp_processor_id()) continue; if (group_can_go_on(event, cpuctx, can_add_hw)) { @@ -1710,7 +1694,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) if (event->state != PERF_EVENT_STATE_ACTIVE) continue; - if (!event_filter_match(event)) + if (event->cpu != -1 && event->cpu != smp_processor_id()) continue; hwc = &event->hw; @@ -3909,7 +3893,7 @@ static int perf_event_task_match(struct perf_event *event) if (event->state < PERF_EVENT_STATE_INACTIVE) return 0; - if (!event_filter_match(event)) + if (event->cpu != -1 && event->cpu != smp_processor_id()) return 0; if (event->attr.comm || event->attr.mmap || @@ -4046,7 +4030,7 @@ static int perf_event_comm_match(struct perf_event *event) if (event->state < PERF_EVENT_STATE_INACTIVE) return 0; - if (!event_filter_match(event)) + if (event->cpu != -1 && event->cpu != smp_processor_id()) return 0; if (event->attr.comm) @@ -4194,7 +4178,7 @@ static int perf_event_mmap_match(struct perf_event *event, if (event->state < PERF_EVENT_STATE_INACTIVE) return 0; - if (!event_filter_match(event)) + if (event->cpu != -1 && event->cpu != smp_processor_id()) return 0; if ((!executable && event->attr.mmap_data) || diff --git a/trunk/kernel/trace/Makefile b/trunk/kernel/trace/Makefile index 761c510a06c5..53f338190b26 100644 --- a/trunk/kernel/trace/Makefile +++ b/trunk/kernel/trace/Makefile @@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o -obj-$(CONFIG_TRACEPOINTS) += power-traces.o +obj-$(CONFIG_EVENT_TRACING) += power-traces.o ifeq ($(CONFIG_TRACING),y) obj-$(CONFIG_KGDB_KDB) += trace_kdb.o endif diff --git a/trunk/kernel/trace/trace.c b/trunk/kernel/trace/trace.c index dc53ecb80589..f8cf959bad45 100644 --- a/trunk/kernel/trace/trace.c +++ b/trunk/kernel/trace/trace.c @@ -1313,10 +1313,12 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) __this_cpu_inc(user_stack_count); + + event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, sizeof(*entry), flags, pc); if (!event) - goto out_drop_count; + return; entry = ring_buffer_event_data(event); entry->tgid = current->tgid; @@ -1331,8 +1333,8 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) if (!filter_check_discard(call, entry, buffer, event)) ring_buffer_unlock_commit(buffer, event); - out_drop_count: __this_cpu_dec(user_stack_count); + out: preempt_enable(); } diff --git a/trunk/lib/dynamic_debug.c b/trunk/lib/dynamic_debug.c index b335acb43be2..3094318bfea7 100644 --- a/trunk/lib/dynamic_debug.c +++ b/trunk/lib/dynamic_debug.c @@ -141,10 +141,11 @@ static void ddebug_change(const struct ddebug_query *query, else if (!dp->flags) dt->num_enabled++; dp->flags = newflags; - if (newflags) - dp->enabled = 1; - else - dp->enabled = 0; + if (newflags) { + jump_label_enable(&dp->enabled); + } else { + jump_label_disable(&dp->enabled); + } if (verbose) printk(KERN_INFO "ddebug: changed %s:%d [%s]%s %s\n", diff --git a/trunk/tools/perf/Makefile b/trunk/tools/perf/Makefile index 2b5387d53ba5..1b9b13ee2a72 100644 --- a/trunk/tools/perf/Makefile +++ b/trunk/tools/perf/Makefile @@ -227,7 +227,7 @@ ifndef PERF_DEBUG CFLAGS_OPTIMIZE = -O6 endif -CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) EXTLIBS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 ALL_LDFLAGS = $(LDFLAGS) diff --git a/trunk/tools/perf/builtin-record.c b/trunk/tools/perf/builtin-record.c index 7069bd3e90b3..7bc049035484 100644 --- a/trunk/tools/perf/builtin-record.c +++ b/trunk/tools/perf/builtin-record.c @@ -331,9 +331,6 @@ static void create_counter(struct perf_evsel *evsel, int cpu) else if (err == ENODEV && cpu_list) { die("No such device - did you specify" " an out-of-range profile CPU?\n"); - } else if (err == ENOENT) { - die("%s event is not supported. ", - event_name(evsel)); } else if (err == EINVAL && sample_id_all_avail) { /* * Old kernel, no attr->sample_id_type_all field diff --git a/trunk/tools/perf/builtin-sched.c b/trunk/tools/perf/builtin-sched.c index abd4b8497bc4..7a4ebeb8b016 100644 --- a/trunk/tools/perf/builtin-sched.c +++ b/trunk/tools/perf/builtin-sched.c @@ -489,8 +489,7 @@ static void create_tasks(void) err = pthread_attr_init(&attr); BUG_ON(err); - err = pthread_attr_setstacksize(&attr, - (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); + err = pthread_attr_setstacksize(&attr, (size_t)(16*1024)); BUG_ON(err); err = pthread_mutex_lock(&start_work_mutex); BUG_ON(err); @@ -1862,7 +1861,7 @@ static int __cmd_record(int argc, const char **argv) rec_argc = ARRAY_SIZE(record_args) + argc - 1; rec_argv = calloc(rec_argc + 1, sizeof(char *)); - if (rec_argv == NULL) + if (rec_argv) return -ENOMEM; for (i = 0; i < ARRAY_SIZE(record_args); i++) diff --git a/trunk/tools/perf/builtin-stat.c b/trunk/tools/perf/builtin-stat.c index c385a63ebfd1..02b2d8013a61 100644 --- a/trunk/tools/perf/builtin-stat.c +++ b/trunk/tools/perf/builtin-stat.c @@ -316,8 +316,6 @@ static int run_perf_stat(int argc __used, const char **argv) "\t Consider tweaking" " /proc/sys/kernel/perf_event_paranoid or running as root.", system_wide ? "system-wide " : ""); - } else if (errno == ENOENT) { - error("%s event is not supported. ", event_name(counter)); } else { error("open_counter returned with %d (%s). " "/bin/dmesg may provide additional information.\n", @@ -685,7 +683,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) nr_counters = ARRAY_SIZE(default_attrs); for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { - pos = perf_evsel__new(&default_attrs[c], + pos = perf_evsel__new(default_attrs[c].type, + default_attrs[c].config, nr_counters); if (pos == NULL) goto out; diff --git a/trunk/tools/perf/builtin-test.c b/trunk/tools/perf/builtin-test.c index ed5696198d3d..1c984342a579 100644 --- a/trunk/tools/perf/builtin-test.c +++ b/trunk/tools/perf/builtin-test.c @@ -234,7 +234,6 @@ static int test__vmlinux_matches_kallsyms(void) return err; } -#include "util/cpumap.h" #include "util/evsel.h" #include @@ -265,7 +264,6 @@ static int test__open_syscall_event(void) int err = -1, fd; struct thread_map *threads; struct perf_evsel *evsel; - struct perf_event_attr attr; unsigned int nr_open_calls = 111, i; int id = trace_event__id("sys_enter_open"); @@ -280,10 +278,7 @@ static int test__open_syscall_event(void) return -1; } - memset(&attr, 0, sizeof(attr)); - attr.type = PERF_TYPE_TRACEPOINT; - attr.config = id; - evsel = perf_evsel__new(&attr, 0); + evsel = perf_evsel__new(PERF_TYPE_TRACEPOINT, id, 0); if (evsel == NULL) { pr_debug("perf_evsel__new\n"); goto out_thread_map_delete; @@ -322,111 +317,6 @@ static int test__open_syscall_event(void) return err; } -#include - -static int test__open_syscall_event_on_all_cpus(void) -{ - int err = -1, fd, cpu; - struct thread_map *threads; - struct cpu_map *cpus; - struct perf_evsel *evsel; - struct perf_event_attr attr; - unsigned int nr_open_calls = 111, i; - cpu_set_t *cpu_set; - size_t cpu_set_size; - int id = trace_event__id("sys_enter_open"); - - if (id < 0) { - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); - return -1; - } - - threads = thread_map__new(-1, getpid()); - if (threads == NULL) { - pr_debug("thread_map__new\n"); - return -1; - } - - cpus = cpu_map__new(NULL); - if (threads == NULL) { - pr_debug("thread_map__new\n"); - return -1; - } - - cpu_set = CPU_ALLOC(cpus->nr); - - if (cpu_set == NULL) - goto out_thread_map_delete; - - cpu_set_size = CPU_ALLOC_SIZE(cpus->nr); - CPU_ZERO_S(cpu_set_size, cpu_set); - - memset(&attr, 0, sizeof(attr)); - attr.type = PERF_TYPE_TRACEPOINT; - attr.config = id; - evsel = perf_evsel__new(&attr, 0); - if (evsel == NULL) { - pr_debug("perf_evsel__new\n"); - goto out_cpu_free; - } - - if (perf_evsel__open(evsel, cpus, threads) < 0) { - pr_debug("failed to open counter: %s, " - "tweak /proc/sys/kernel/perf_event_paranoid?\n", - strerror(errno)); - goto out_evsel_delete; - } - - for (cpu = 0; cpu < cpus->nr; ++cpu) { - unsigned int ncalls = nr_open_calls + cpu; - - CPU_SET(cpu, cpu_set); - sched_setaffinity(0, cpu_set_size, cpu_set); - for (i = 0; i < ncalls; ++i) { - fd = open("/etc/passwd", O_RDONLY); - close(fd); - } - CPU_CLR(cpu, cpu_set); - } - - /* - * Here we need to explicitely preallocate the counts, as if - * we use the auto allocation it will allocate just for 1 cpu, - * as we start by cpu 0. - */ - if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) { - pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr); - goto out_close_fd; - } - - for (cpu = 0; cpu < cpus->nr; ++cpu) { - unsigned int expected; - - if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) { - pr_debug("perf_evsel__open_read_on_cpu\n"); - goto out_close_fd; - } - - expected = nr_open_calls + cpu; - if (evsel->counts->cpu[cpu].val != expected) { - pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %Ld\n", - expected, cpu, evsel->counts->cpu[cpu].val); - goto out_close_fd; - } - } - - err = 0; -out_close_fd: - perf_evsel__close_fd(evsel, 1, threads->nr); -out_evsel_delete: - perf_evsel__delete(evsel); -out_cpu_free: - CPU_FREE(cpu_set); -out_thread_map_delete: - thread_map__delete(threads); - return err; -} - static struct test { const char *desc; int (*func)(void); @@ -439,10 +329,6 @@ static struct test { .desc = "detect open syscall event", .func = test__open_syscall_event, }, - { - .desc = "detect open syscall event on all cpus", - .func = test__open_syscall_event_on_all_cpus, - }, { .func = NULL, }, diff --git a/trunk/tools/perf/builtin-top.c b/trunk/tools/perf/builtin-top.c index 6ce4042421bd..1e67ab9c7ebc 100644 --- a/trunk/tools/perf/builtin-top.c +++ b/trunk/tools/perf/builtin-top.c @@ -1247,8 +1247,6 @@ static void start_counter(int i, struct perf_evsel *evsel) die("Permission error - are you root?\n" "\t Consider tweaking" " /proc/sys/kernel/perf_event_paranoid.\n"); - if (err == ENOENT) - die("%s event is not supported. ", event_name(evsel)); /* * If it's cycles then fall back to hrtimer * based cpu-clock-tick sw counter, which diff --git a/trunk/tools/perf/util/evsel.c b/trunk/tools/perf/util/evsel.c index f5cfed60af98..c95267e63c5b 100644 --- a/trunk/tools/perf/util/evsel.c +++ b/trunk/tools/perf/util/evsel.c @@ -6,13 +6,14 @@ #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) -struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) +struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx) { struct perf_evsel *evsel = zalloc(sizeof(*evsel)); if (evsel != NULL) { evsel->idx = idx; - evsel->attr = *attr; + evsel->attr.type = type; + evsel->attr.config = config; INIT_LIST_HEAD(&evsel->node); } @@ -127,75 +128,59 @@ int __perf_evsel__read(struct perf_evsel *evsel, return 0; } -static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads) +int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus) { - int cpu, thread; + int cpu; - if (evsel->fd == NULL && - perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) + if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, cpus->nr, 1) < 0) return -1; for (cpu = 0; cpu < cpus->nr; cpu++) { - for (thread = 0; thread < threads->nr; thread++) { - FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, - threads->map[thread], - cpus->map[cpu], -1, 0); - if (FD(evsel, cpu, thread) < 0) - goto out_close; - } + FD(evsel, cpu, 0) = sys_perf_event_open(&evsel->attr, -1, + cpus->map[cpu], -1, 0); + if (FD(evsel, cpu, 0) < 0) + goto out_close; } return 0; out_close: - do { - while (--thread >= 0) { - close(FD(evsel, cpu, thread)); - FD(evsel, cpu, thread) = -1; - } - thread = threads->nr; - } while (--cpu >= 0); + while (--cpu >= 0) { + close(FD(evsel, cpu, 0)); + FD(evsel, cpu, 0) = -1; + } return -1; } -static struct { - struct cpu_map map; - int cpus[1]; -} empty_cpu_map = { - .map.nr = 1, - .cpus = { -1, }, -}; - -static struct { - struct thread_map map; - int threads[1]; -} empty_thread_map = { - .map.nr = 1, - .threads = { -1, }, -}; - -int perf_evsel__open(struct perf_evsel *evsel, - struct cpu_map *cpus, struct thread_map *threads) +int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads) { + int thread; + + if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, 1, threads->nr)) + return -1; - if (cpus == NULL) { - /* Work around old compiler warnings about strict aliasing */ - cpus = &empty_cpu_map.map; + for (thread = 0; thread < threads->nr; thread++) { + FD(evsel, 0, thread) = sys_perf_event_open(&evsel->attr, + threads->map[thread], -1, -1, 0); + if (FD(evsel, 0, thread) < 0) + goto out_close; } - if (threads == NULL) - threads = &empty_thread_map.map; + return 0; - return __perf_evsel__open(evsel, cpus, threads); +out_close: + while (--thread >= 0) { + close(FD(evsel, 0, thread)); + FD(evsel, 0, thread) = -1; + } + return -1; } -int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus) +int perf_evsel__open(struct perf_evsel *evsel, + struct cpu_map *cpus, struct thread_map *threads) { - return __perf_evsel__open(evsel, cpus, &empty_thread_map.map); -} + if (threads == NULL) + return perf_evsel__open_per_cpu(evsel, cpus); -int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads) -{ - return __perf_evsel__open(evsel, &empty_cpu_map.map, threads); + return perf_evsel__open_per_thread(evsel, threads); } diff --git a/trunk/tools/perf/util/evsel.h b/trunk/tools/perf/util/evsel.h index b2d755fe88a5..a0ccd69c3fc2 100644 --- a/trunk/tools/perf/util/evsel.h +++ b/trunk/tools/perf/util/evsel.h @@ -37,7 +37,7 @@ struct perf_evsel { struct cpu_map; struct thread_map; -struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx); +struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx); void perf_evsel__delete(struct perf_evsel *evsel); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); diff --git a/trunk/tools/perf/util/parse-events.c b/trunk/tools/perf/util/parse-events.c index 5cb6f4bde905..649083f27e08 100644 --- a/trunk/tools/perf/util/parse-events.c +++ b/trunk/tools/perf/util/parse-events.c @@ -490,31 +490,6 @@ parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp, return EVT_HANDLED_ALL; } -static int store_event_type(const char *orgname) -{ - char filename[PATH_MAX], *c; - FILE *file; - int id, n; - - sprintf(filename, "%s/", debugfs_path); - strncat(filename, orgname, strlen(orgname)); - strcat(filename, "/id"); - - c = strchr(filename, ':'); - if (c) - *c = '/'; - - file = fopen(filename, "r"); - if (!file) - return 0; - n = fscanf(file, "%i", &id); - fclose(file); - if (n < 1) { - pr_err("cannot store event ID\n"); - return -EINVAL; - } - return perf_header__push_event(id, orgname); -} static enum event_result parse_tracepoint_event(const char **strp, struct perf_event_attr *attr) @@ -558,13 +533,9 @@ static enum event_result parse_tracepoint_event(const char **strp, *strp += strlen(sys_name) + evt_length; return parse_multiple_tracepoint_event(sys_name, evt_name, flags); - } else { - if (store_event_type(evt_name) < 0) - return EVT_FAILED; - + } else return parse_single_tracepoint_event(sys_name, evt_name, evt_length, attr, strp); - } } static enum event_result @@ -807,11 +778,41 @@ parse_event_symbols(const char **str, struct perf_event_attr *attr) return ret; } +static int store_event_type(const char *orgname) +{ + char filename[PATH_MAX], *c; + FILE *file; + int id, n; + + sprintf(filename, "%s/", debugfs_path); + strncat(filename, orgname, strlen(orgname)); + strcat(filename, "/id"); + + c = strchr(filename, ':'); + if (c) + *c = '/'; + + file = fopen(filename, "r"); + if (!file) + return 0; + n = fscanf(file, "%i", &id); + fclose(file); + if (n < 1) { + pr_err("cannot store event ID\n"); + return -EINVAL; + } + return perf_header__push_event(id, orgname); +} + int parse_events(const struct option *opt __used, const char *str, int unset __used) { struct perf_event_attr attr; enum event_result ret; + if (strchr(str, ':')) + if (store_event_type(str) < 0) + return -1; + for (;;) { memset(&attr, 0, sizeof(attr)); ret = parse_event_symbols(&str, &attr); @@ -823,7 +824,7 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u if (ret != EVT_HANDLED_ALL) { struct perf_evsel *evsel; - evsel = perf_evsel__new(&attr, + evsel = perf_evsel__new(attr.type, attr.config, nr_counters); if (evsel == NULL) return -1; @@ -1013,15 +1014,8 @@ void print_events(void) int perf_evsel_list__create_default(void) { - struct perf_evsel *evsel; - struct perf_event_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.type = PERF_TYPE_HARDWARE; - attr.config = PERF_COUNT_HW_CPU_CYCLES; - - evsel = perf_evsel__new(&attr, 0); - + struct perf_evsel *evsel = perf_evsel__new(PERF_TYPE_HARDWARE, + PERF_COUNT_HW_CPU_CYCLES, 0); if (evsel == NULL) return -ENOMEM; diff --git a/trunk/tools/perf/util/session.c b/trunk/tools/perf/util/session.c index 313dac2d94ce..6fb4694d05fa 100644 --- a/trunk/tools/perf/util/session.c +++ b/trunk/tools/perf/util/session.c @@ -1007,7 +1007,7 @@ int __perf_session__process_events(struct perf_session *session, if (size == 0) size = 8; - if (head + event->header.size > mmap_size) { + if (head + event->header.size >= mmap_size) { if (mmaps[map_idx]) { munmap(mmaps[map_idx], mmap_size); mmaps[map_idx] = NULL;