From faef0de18d858a7d09f501735afbfed61c1183ce Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 31 Mar 2009 15:49:08 +0100 Subject: [PATCH] --- yaml --- r: 143351 b: refs/heads/master c: 7fa5d20d1a5e60ef7e453993b67b26c87dc09f07 h: refs/heads/master i: 143349: 37a42114b9dc1cf51b455dec4339db507a35f96c 143347: 080c2b088e0b0514006e10a735c8e2924e4754f0 143343: 0b2023ae85340dc04c89e54907312ad47143887b v: v3 --- [refs] | 2 +- trunk/Documentation/block/biodoc.txt | 19 +- .../powerpc/dts-bindings/fsl/i2c.txt | 46 +-- trunk/arch/powerpc/Kconfig | 2 +- trunk/arch/powerpc/boot/dts/tqm8540.dts | 4 +- trunk/arch/powerpc/boot/dts/tqm8541.dts | 4 +- .../powerpc/boot/dts/tqm8548-bigflash.dts | 8 +- trunk/arch/powerpc/boot/dts/tqm8548.dts | 8 +- trunk/arch/powerpc/boot/dts/tqm8555.dts | 4 +- trunk/arch/powerpc/boot/dts/tqm8560.dts | 4 +- .../powerpc/configs/85xx/tqm8548_defconfig | 164 +++++--- trunk/arch/powerpc/include/asm/futex.h | 12 +- trunk/arch/powerpc/include/asm/mmu.h | 6 - trunk/arch/powerpc/include/asm/ppc-opcode.h | 13 +- trunk/arch/powerpc/kernel/cputable.c | 2 +- trunk/arch/powerpc/mm/tlb_nohash.c | 1 + trunk/arch/powerpc/mm/tlb_nohash_low.S | 14 +- trunk/arch/powerpc/platforms/pseries/dtl.c | 1 - .../powerpc/platforms/pseries/eeh_driver.c | 2 - .../x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 6 - trunk/block/as-iosched.c | 116 +++--- trunk/block/blk-barrier.c | 3 + trunk/block/blk-sysfs.c | 4 +- trunk/block/blk.h | 4 +- trunk/block/cfq-iosched.c | 270 +++---------- trunk/block/elevator.c | 8 +- trunk/block/ioctl.c | 2 + trunk/block/scsi_ioctl.c | 6 +- trunk/drivers/block/brd.c | 5 +- trunk/drivers/md/dm-bio-list.h | 117 ++++++ trunk/drivers/md/dm-delay.c | 2 + trunk/drivers/md/dm-mpath.c | 1 + trunk/drivers/md/dm-raid1.c | 1 + trunk/drivers/md/dm-region-hash.c | 1 + trunk/drivers/md/dm-snap.c | 1 + trunk/drivers/md/dm.c | 1 + trunk/drivers/md/raid1.c | 1 + trunk/drivers/md/raid10.c | 1 + trunk/fs/bio.c | 18 - trunk/fs/buffer.c | 11 +- trunk/fs/direct-io.c | 2 + trunk/fs/ext4/extents.c | 2 + trunk/fs/gfs2/ops_fstype.c | 5 + trunk/fs/gfs2/quota.c | 2 +- trunk/fs/inode.c | 36 ++ trunk/fs/ocfs2/file.c | 94 +---- trunk/fs/pipe.c | 42 +- trunk/fs/splice.c | 371 +++++++++--------- trunk/include/linux/bio.h | 109 ----- trunk/include/linux/fs.h | 64 +-- trunk/include/linux/fsl_devices.h | 4 + trunk/include/linux/pipe_fs_i.h | 5 - trunk/include/linux/splice.h | 12 - trunk/kernel/power/swap.c | 2 + 54 files changed, 687 insertions(+), 958 deletions(-) create mode 100644 trunk/drivers/md/dm-bio-list.h diff --git a/[refs] b/[refs] index 31cdac3c0ffc..1245111fde98 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 23da64b4714812b66ecf010e7dfb3ed1bf2eda69 +refs/heads/master: 7fa5d20d1a5e60ef7e453993b67b26c87dc09f07 diff --git a/trunk/Documentation/block/biodoc.txt b/trunk/Documentation/block/biodoc.txt index 6fab97ea7e6b..ecad6ee75705 100644 --- a/trunk/Documentation/block/biodoc.txt +++ b/trunk/Documentation/block/biodoc.txt @@ -1040,21 +1040,23 @@ Front merges are handled by the binary trees in AS and deadline schedulers. iii. Plugging the queue to batch requests in anticipation of opportunities for merge/sort optimizations +This is just the same as in 2.4 so far, though per-device unplugging +support is anticipated for 2.5. Also with a priority-based i/o scheduler, +such decisions could be based on request priorities. + Plugging is an approach that the current i/o scheduling algorithm resorts to so that it collects up enough requests in the queue to be able to take advantage of the sorting/merging logic in the elevator. If the queue is empty when a request comes in, then it plugs the request queue -(sort of like plugging the bath tub of a vessel to get fluid to build up) +(sort of like plugging the bottom of a vessel to get fluid to build up) till it fills up with a few more requests, before starting to service the requests. This provides an opportunity to merge/sort the requests before passing them down to the device. There are various conditions when the queue is unplugged (to open up the flow again), either through a scheduled task or could be on demand. For example wait_on_buffer sets the unplugging going -through sync_buffer() running blk_run_address_space(mapping). Or the caller -can do it explicity through blk_unplug(bdev). So in the read case, -the queue gets explicitly unplugged as part of waiting for completion on that -buffer. For page driven IO, the address space ->sync_page() takes care of -doing the blk_run_address_space(). +(by running tq_disk) so the read gets satisfied soon. So in the read case, +the queue gets explicitly unplugged as part of waiting for completion, +in fact all queues get unplugged as a side-effect. Aside: This is kind of controversial territory, as it's not clear if plugging is @@ -1065,6 +1067,11 @@ Aside: multi-page bios being queued in one shot, we may not need to wait to merge a big request from the broken up pieces coming by. + Per-queue granularity unplugging (still a Todo) may help reduce some of the + concerns with just a single tq_disk flush approach. Something like + blk_kick_queue() to unplug a specific queue (right away ?) + or optionally, all queues, is in the plan. + 4.4 I/O contexts I/O contexts provide a dynamically allocated per process data area. They may be used in I/O schedulers, and in the block layer (could be used for IO statis, diff --git a/trunk/Documentation/powerpc/dts-bindings/fsl/i2c.txt b/trunk/Documentation/powerpc/dts-bindings/fsl/i2c.txt index b6d2e21474f9..d0ab33e21fe6 100644 --- a/trunk/Documentation/powerpc/dts-bindings/fsl/i2c.txt +++ b/trunk/Documentation/powerpc/dts-bindings/fsl/i2c.txt @@ -7,10 +7,8 @@ Required properties : Recommended properties : - - compatible : compatibility list with 2 entries, the first should - be "fsl,CHIP-i2c" where CHIP is the name of a compatible processor, - e.g. mpc8313, mpc8543, mpc8544, mpc5200 or mpc5200b. The second one - should be "fsl-i2c". + - compatible : Should be "fsl-i2c" for parts compatible with + Freescale I2C specifications. - interrupts : where a is the interrupt number and b is a field that represents an encoding of the sense and level information for the interrupt. This should be encoded based on @@ -18,31 +16,17 @@ Recommended properties : controller you have. - interrupt-parent : the phandle for the interrupt controller that services interrupts for this device. - - fsl,preserve-clocking : boolean; if defined, the clock settings - from the bootloader are preserved (not touched). - - clock-frequency : desired I2C bus clock frequency in Hz. - -Examples : - - i2c@3d00 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,mpc5200b-i2c","fsl,mpc5200-i2c","fsl-i2c"; - cell-index = <0>; - reg = <0x3d00 0x40>; - interrupts = <2 15 0>; - interrupt-parent = <&mpc5200_pic>; - fsl,preserve-clocking; + - dfsrr : boolean; if defined, indicates that this I2C device has + a digital filter sampling rate register + - fsl5200-clocking : boolean; if defined, indicated that this device + uses the FSL 5200 clocking mechanism. + +Example : + i2c@3000 { + interrupt-parent = <40000>; + interrupts = <1b 3>; + reg = <3000 18>; + device_type = "i2c"; + compatible = "fsl-i2c"; + dfsrr; }; - - i2c@3100 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <1>; - compatible = "fsl,mpc8544-i2c", "fsl-i2c"; - reg = <0x3100 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - clock-frequency = <400000>; - }; - diff --git a/trunk/arch/powerpc/Kconfig b/trunk/arch/powerpc/Kconfig index 4c7804551362..5b50e1ac6179 100644 --- a/trunk/arch/powerpc/Kconfig +++ b/trunk/arch/powerpc/Kconfig @@ -462,7 +462,7 @@ config PPC_64K_PAGES config PPC_256K_PAGES bool "256k page size" if 44x - depends on !STDBINUTILS + depends on !STDBINUTILS && (!SHMEM || BROKEN) help Make the page size 256k. diff --git a/trunk/arch/powerpc/boot/dts/tqm8540.dts b/trunk/arch/powerpc/boot/dts/tqm8540.dts index b6f1fc6eb960..231bae756637 100644 --- a/trunk/arch/powerpc/boot/dts/tqm8540.dts +++ b/trunk/arch/powerpc/boot/dts/tqm8540.dts @@ -84,9 +84,9 @@ interrupt-parent = <&mpic>; dfsrr; - dtt@48 { + dtt@50 { compatible = "national,lm75"; - reg = <0x48>; + reg = <0x50>; }; rtc@68 { diff --git a/trunk/arch/powerpc/boot/dts/tqm8541.dts b/trunk/arch/powerpc/boot/dts/tqm8541.dts index fa6a3d54a8a5..4356a1f08295 100644 --- a/trunk/arch/powerpc/boot/dts/tqm8541.dts +++ b/trunk/arch/powerpc/boot/dts/tqm8541.dts @@ -83,9 +83,9 @@ interrupt-parent = <&mpic>; dfsrr; - dtt@48 { + dtt@50 { compatible = "national,lm75"; - reg = <0x48>; + reg = <0x50>; }; rtc@68 { diff --git a/trunk/arch/powerpc/boot/dts/tqm8548-bigflash.dts b/trunk/arch/powerpc/boot/dts/tqm8548-bigflash.dts index 00f7ed7a2455..19aa72301c83 100644 --- a/trunk/arch/powerpc/boot/dts/tqm8548-bigflash.dts +++ b/trunk/arch/powerpc/boot/dts/tqm8548-bigflash.dts @@ -85,9 +85,9 @@ interrupt-parent = <&mpic>; dfsrr; - dtt@48 { + dtt@50 { compatible = "national,lm75"; - reg = <0x48>; + reg = <0x50>; }; rtc@68 { @@ -247,7 +247,7 @@ interrupts = <31 2 32 2 33 2>; interrupt-parent = <&mpic>; tbi-handle = <&tbi2>; - phy-handle = <&phy4>; + phy-handle = <&phy3>; mdio@520 { #address-cells = <1>; @@ -275,7 +275,7 @@ interrupts = <37 2 38 2 39 2>; interrupt-parent = <&mpic>; tbi-handle = <&tbi3>; - phy-handle = <&phy5>; + phy-handle = <&phy4>; mdio@520 { #address-cells = <1>; diff --git a/trunk/arch/powerpc/boot/dts/tqm8548.dts b/trunk/arch/powerpc/boot/dts/tqm8548.dts index 673e4a778ac8..49145a04fc6c 100644 --- a/trunk/arch/powerpc/boot/dts/tqm8548.dts +++ b/trunk/arch/powerpc/boot/dts/tqm8548.dts @@ -85,9 +85,9 @@ interrupt-parent = <&mpic>; dfsrr; - dtt@48 { + dtt@50 { compatible = "national,lm75"; - reg = <0x48>; + reg = <0x50>; }; rtc@68 { @@ -247,7 +247,7 @@ interrupts = <31 2 32 2 33 2>; interrupt-parent = <&mpic>; tbi-handle = <&tbi2>; - phy-handle = <&phy4>; + phy-handle = <&phy3>; mdio@520 { #address-cells = <1>; @@ -275,7 +275,7 @@ interrupts = <37 2 38 2 39 2>; interrupt-parent = <&mpic>; tbi-handle = <&tbi3>; - phy-handle = <&phy5>; + phy-handle = <&phy4>; mdio@520 { #address-cells = <1>; diff --git a/trunk/arch/powerpc/boot/dts/tqm8555.dts b/trunk/arch/powerpc/boot/dts/tqm8555.dts index 6a99f1eef7ad..06d366ebbda3 100644 --- a/trunk/arch/powerpc/boot/dts/tqm8555.dts +++ b/trunk/arch/powerpc/boot/dts/tqm8555.dts @@ -83,9 +83,9 @@ interrupt-parent = <&mpic>; dfsrr; - dtt@48 { + dtt@50 { compatible = "national,lm75"; - reg = <0x48>; + reg = <0x50>; }; rtc@68 { diff --git a/trunk/arch/powerpc/boot/dts/tqm8560.dts b/trunk/arch/powerpc/boot/dts/tqm8560.dts index b6c2d71defd3..feff915e0492 100644 --- a/trunk/arch/powerpc/boot/dts/tqm8560.dts +++ b/trunk/arch/powerpc/boot/dts/tqm8560.dts @@ -85,9 +85,9 @@ interrupt-parent = <&mpic>; dfsrr; - dtt@48 { + dtt@50 { compatible = "national,lm75"; - reg = <0x48>; + reg = <0x50>; }; rtc@68 { diff --git a/trunk/arch/powerpc/configs/85xx/tqm8548_defconfig b/trunk/arch/powerpc/configs/85xx/tqm8548_defconfig index 43030fea2eee..0bc45975911a 100644 --- a/trunk/arch/powerpc/configs/85xx/tqm8548_defconfig +++ b/trunk/arch/powerpc/configs/85xx/tqm8548_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29-rc7 -# Mon Mar 16 09:03:28 2009 +# Linux kernel version: 2.6.29-rc2 +# Mon Jan 26 15:36:20 2009 # # CONFIG_PPC64 is not set @@ -22,7 +22,6 @@ CONFIG_FSL_EMB_PERFMON=y # CONFIG_PHYS_64BIT is not set CONFIG_SPE=y CONFIG_PPC_MMU_NOHASH=y -CONFIG_PPC_BOOK3E_MMU=y # CONFIG_PPC_MM_SLICES is not set # CONFIG_SMP is not set CONFIG_PPC32=y @@ -76,15 +75,6 @@ CONFIG_SYSVIPC_SYSCTL=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set - -# -# RCU Subsystem -# -CONFIG_CLASSIC_RCU=y -# CONFIG_TREE_RCU is not set -# CONFIG_PREEMPT_RCU is not set -# CONFIG_TREE_RCU_TRACE is not set -# CONFIG_PREEMPT_RCU_TRACE is not set # CONFIG_IKCONFIG is not set CONFIG_LOG_BUF_SHIFT=14 CONFIG_GROUP_SCHED=y @@ -162,6 +152,11 @@ CONFIG_DEFAULT_AS=y # CONFIG_DEFAULT_CFQ is not set # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="anticipatory" +CONFIG_CLASSIC_RCU=y +# CONFIG_TREE_RCU is not set +# CONFIG_PREEMPT_RCU is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set # CONFIG_FREEZER is not set # @@ -207,7 +202,7 @@ CONFIG_MPIC=y # # Kernel options # -CONFIG_HIGHMEM=y +# CONFIG_HIGHMEM is not set CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y @@ -249,7 +244,6 @@ CONFIG_UNEVICTABLE_LRU=y CONFIG_PPC_4K_PAGES=y # CONFIG_PPC_16K_PAGES is not set # CONFIG_PPC_64K_PAGES is not set -# CONFIG_PPC_256K_PAGES is not set CONFIG_FORCE_MAX_ZONEORDER=11 CONFIG_PROC_DEVICETREE=y # CONFIG_CMDLINE_BOOL is not set @@ -265,7 +259,6 @@ CONFIG_ZONE_DMA=y CONFIG_PPC_INDIRECT_PCI=y CONFIG_FSL_SOC=y CONFIG_FSL_PCI=y -CONFIG_FSL_LBC=y CONFIG_PPC_PCI_CHOICE=y CONFIG_PCI=y CONFIG_PCI_DOMAINS=y @@ -291,11 +284,10 @@ CONFIG_ARCH_SUPPORTS_MSI=y # Default settings for advanced configuration options are used # CONFIG_LOWMEM_SIZE=0x30000000 -CONFIG_LOWMEM_CAM_NUM=3 CONFIG_PAGE_OFFSET=0xc0000000 CONFIG_KERNEL_START=0xc0000000 CONFIG_PHYSICAL_START=0x00000000 -CONFIG_PHYSICAL_ALIGN=0x04000000 +CONFIG_PHYSICAL_ALIGN=0x10000000 CONFIG_TASK_SIZE=0xc0000000 CONFIG_NET=y @@ -371,7 +363,12 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set # CONFIG_PHONET is not set -# CONFIG_WIRELESS is not set +CONFIG_WIRELESS=y +# CONFIG_CFG80211 is not set +CONFIG_WIRELESS_OLD_REGULATORY=y +# CONFIG_WIRELESS_EXT is not set +# CONFIG_LIB80211 is not set +# CONFIG_MAC80211 is not set # CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -474,18 +471,27 @@ CONFIG_MTD_NAND_IDS=y # CONFIG_MTD_NAND_NANDSIM is not set # CONFIG_MTD_NAND_PLATFORM is not set # CONFIG_MTD_NAND_FSL_ELBC is not set -CONFIG_MTD_NAND_FSL_UPM=y +# CONFIG_MTD_NAND_FSL_UPM is not set # CONFIG_MTD_ONENAND is not set # # LPDDR flash memory drivers # # CONFIG_MTD_LPDDR is not set +# CONFIG_MTD_QINFO_PROBE is not set # # UBI - Unsorted block images # -# CONFIG_MTD_UBI is not set +CONFIG_MTD_UBI=m +CONFIG_MTD_UBI_WL_THRESHOLD=4096 +CONFIG_MTD_UBI_BEB_RESERVE=1 +# CONFIG_MTD_UBI_GLUEBI is not set + +# +# UBI debugging options +# +# CONFIG_MTD_UBI_DEBUG is not set CONFIG_OF_DEVICE=y CONFIG_OF_I2C=y # CONFIG_PARPORT is not set @@ -509,21 +515,69 @@ CONFIG_BLK_DEV_RAM_SIZE=32768 # CONFIG_BLK_DEV_HD is not set CONFIG_MISC_DEVICES=y # CONFIG_PHANTOM is not set +# CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set # CONFIG_ICS932S401 is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_HP_ILO is not set # CONFIG_C2PORT is not set - -# -# EEPROM support -# -# CONFIG_EEPROM_AT24 is not set -# CONFIG_EEPROM_LEGACY is not set -# CONFIG_EEPROM_93CX6 is not set CONFIG_HAVE_IDE=y -# CONFIG_IDE is not set +CONFIG_IDE=y + +# +# Please see Documentation/ide/ide.txt for help/info on IDE drives +# +CONFIG_IDE_TIMINGS=y +# CONFIG_BLK_DEV_IDE_SATA is not set +CONFIG_IDE_GD=y +CONFIG_IDE_GD_ATA=y +# CONFIG_IDE_GD_ATAPI is not set +# CONFIG_BLK_DEV_IDECD is not set +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_IDE_TASK_IOCTL is not set +CONFIG_IDE_PROC_FS=y + +# +# IDE chipset support/bugfixes +# +# CONFIG_BLK_DEV_PLATFORM is not set +CONFIG_BLK_DEV_IDEDMA_SFF=y + +# +# PCI IDE chipsets support +# +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_IDEPCI_PCIBUS_ORDER=y +# CONFIG_BLK_DEV_OFFBOARD is not set +CONFIG_BLK_DEV_GENERIC=y +# CONFIG_BLK_DEV_OPTI621 is not set +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +# CONFIG_BLK_DEV_AMD74XX is not set +# CONFIG_BLK_DEV_CMD64X is not set +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_JMICRON is not set +# CONFIG_BLK_DEV_SC1200 is not set +# CONFIG_BLK_DEV_PIIX is not set +# CONFIG_BLK_DEV_IT8172 is not set +# CONFIG_BLK_DEV_IT8213 is not set +# CONFIG_BLK_DEV_IT821X is not set +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SL82C105 is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +CONFIG_BLK_DEV_VIA82CXXX=y +# CONFIG_BLK_DEV_TC86C001 is not set +CONFIG_BLK_DEV_IDEDMA=y # # SCSI device support @@ -596,7 +650,7 @@ CONFIG_MII=y CONFIG_NETDEV_1000=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set -# CONFIG_E1000 is not set +CONFIG_E1000=y # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set @@ -614,7 +668,6 @@ CONFIG_GIANFAR=y # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set # CONFIG_ATL1E is not set -# CONFIG_ATL1C is not set # CONFIG_JME is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set @@ -782,6 +835,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -920,7 +975,26 @@ CONFIG_HID=y # Special HID drivers # CONFIG_HID_COMPAT=y -# CONFIG_USB_SUPPORT is not set +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +# CONFIG_USB is not set +# CONFIG_USB_OTG_WHITELIST is not set +# CONFIG_USB_OTG_BLACKLIST_HUB is not set + +# +# Enable Host or Gadget support to see Inventra options +# + +# +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; +# +# CONFIG_USB_GADGET is not set + +# +# OTG and related infrastructure +# # CONFIG_UWB is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set @@ -990,9 +1064,16 @@ CONFIG_RTC_DRV_DS1307=y # # File systems # -# CONFIG_EXT2_FS is not set -# CONFIG_EXT3_FS is not set +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +# CONFIG_EXT3_FS_POSIX_ACL is not set +# CONFIG_EXT3_FS_SECURITY is not set # CONFIG_EXT4_FS is not set +CONFIG_JBD=y +CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set # CONFIG_FS_POSIX_ACL is not set @@ -1041,17 +1122,8 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set -CONFIG_JFFS2_FS=y -CONFIG_JFFS2_FS_DEBUG=0 -CONFIG_JFFS2_FS_WRITEBUFFER=y -# CONFIG_JFFS2_FS_WBUF_VERIFY is not set -# CONFIG_JFFS2_SUMMARY is not set -# CONFIG_JFFS2_FS_XATTR is not set -# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set -CONFIG_JFFS2_ZLIB=y -# CONFIG_JFFS2_LZO is not set -CONFIG_JFFS2_RTIME=y -# CONFIG_JFFS2_RUBIN is not set +# CONFIG_JFFS2_FS is not set +# CONFIG_UBIFS_FS is not set # CONFIG_CRAMFS is not set # CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set @@ -1112,8 +1184,6 @@ CONFIG_GENERIC_FIND_LAST_BIT=y CONFIG_CRC32=y # CONFIG_CRC7 is not set # CONFIG_LIBCRC32C is not set -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=y CONFIG_PLIST=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y @@ -1149,7 +1219,6 @@ CONFIG_DEBUG_MUTEXES=y # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_DEBUG_KOBJECT is not set -# CONFIG_DEBUG_HIGHMEM is not set # CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set @@ -1167,7 +1236,6 @@ CONFIG_DEBUG_MUTEXES=y # CONFIG_LATENCYTOP is not set CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_HAVE_FUNCTION_TRACER=y -CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y diff --git a/trunk/arch/powerpc/include/asm/futex.h b/trunk/arch/powerpc/include/asm/futex.h index 9696cc36d2dc..6d406c5c5de4 100644 --- a/trunk/arch/powerpc/include/asm/futex.h +++ b/trunk/arch/powerpc/include/asm/futex.h @@ -27,7 +27,7 @@ PPC_LONG "1b,4b,2b,4b\n" \ ".previous" \ : "=&r" (oldval), "=&r" (ret) \ - : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \ + : "b" (uaddr), "i" (-EFAULT), "1" (oparg) \ : "cr0", "memory") static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) @@ -47,19 +47,19 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("mr %1,%4\n", ret, oldval, uaddr, oparg); + __futex_atomic_op("", ret, oldval, uaddr, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op("add %1,%0,%4\n", ret, oldval, uaddr, oparg); + __futex_atomic_op("add %1,%0,%1\n", ret, oldval, uaddr, oparg); break; case FUTEX_OP_OR: - __futex_atomic_op("or %1,%0,%4\n", ret, oldval, uaddr, oparg); + __futex_atomic_op("or %1,%0,%1\n", ret, oldval, uaddr, oparg); break; case FUTEX_OP_ANDN: - __futex_atomic_op("andc %1,%0,%4\n", ret, oldval, uaddr, oparg); + __futex_atomic_op("andc %1,%0,%1\n", ret, oldval, uaddr, oparg); break; case FUTEX_OP_XOR: - __futex_atomic_op("xor %1,%0,%4\n", ret, oldval, uaddr, oparg); + __futex_atomic_op("xor %1,%0,%1\n", ret, oldval, uaddr, oparg); break; default: ret = -ENOSYS; diff --git a/trunk/arch/powerpc/include/asm/mmu.h b/trunk/arch/powerpc/include/asm/mmu.h index 86d2366ab6a1..cbf154387091 100644 --- a/trunk/arch/powerpc/include/asm/mmu.h +++ b/trunk/arch/powerpc/include/asm/mmu.h @@ -52,12 +52,6 @@ */ #define MMU_FTR_NEED_DTLB_SW_LRU ASM_CONST(0x00200000) -/* This indicates that the processor uses the wrong opcode for tlbilx - * instructions. During the ISA 2.06 development the opcode for tlbilx - * changed and some early implementations used to old opcode - */ -#define MMU_FTR_TLBILX_EARLY_OPCODE ASM_CONST(0x00400000) - #ifndef __ASSEMBLY__ #include diff --git a/trunk/arch/powerpc/include/asm/ppc-opcode.h b/trunk/arch/powerpc/include/asm/ppc-opcode.h index ef4da37f3c10..f4a4db8d5555 100644 --- a/trunk/arch/powerpc/include/asm/ppc-opcode.h +++ b/trunk/arch/powerpc/include/asm/ppc-opcode.h @@ -43,8 +43,7 @@ #define PPC_INST_STSWI 0x7c0005aa #define PPC_INST_STSWX 0x7c00052a -#define PPC_INST_TLBILX 0x7c000024 -#define PPC_INST_TLBILX_EARLY 0x7c000626 +#define PPC_INST_TLBILX 0x7c000626 #define PPC_INST_WAIT 0x7c00007c /* macros to insert fields into opcodes */ @@ -64,18 +63,10 @@ #define PPC_RFDI stringify_in_c(.long PPC_INST_RFDI) #define PPC_RFMCI stringify_in_c(.long PPC_INST_RFMCI) #define PPC_TLBILX(t, a, b) stringify_in_c(.long PPC_INST_TLBILX | \ - __PPC_T_TLB(t) | \ - __PPC_RA(a) | __PPC_RB(b)) + __PPC_T_TLB(t) | __PPC_RA(a) | __PPC_RB(b)) #define PPC_TLBILX_ALL(a, b) PPC_TLBILX(0, a, b) #define PPC_TLBILX_PID(a, b) PPC_TLBILX(1, a, b) #define PPC_TLBILX_VA(a, b) PPC_TLBILX(3, a, b) - -#define PPC_TLBILX_EARLY(t, a, b) stringify_in_c(.long PPC_INST_TLBILX_EARLY | \ - __PPC_T_TLB(t) | \ - __PPC_RA(a) | __PPC_RB(b)) -#define PPC_TLBILX_ALL_EARLY(a, b) PPC_TLBILX_EARLY(0, a, b) -#define PPC_TLBILX_PID_EARLY(a, b) PPC_TLBILX_EARLY(1, a, b) -#define PPC_TLBILX_VA_EARLY(a, b) PPC_TLBILX_EARLY(3, a, b) #define PPC_WAIT(w) stringify_in_c(.long PPC_INST_WAIT | \ __PPC_WC(w)) diff --git a/trunk/arch/powerpc/kernel/cputable.c b/trunk/arch/powerpc/kernel/cputable.c index 57db50f40289..cd1b687544f3 100644 --- a/trunk/arch/powerpc/kernel/cputable.c +++ b/trunk/arch/powerpc/kernel/cputable.c @@ -1766,7 +1766,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_E500MC, .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | - MMU_FTR_USE_TLBILX | MMU_FTR_TLBILX_EARLY_OPCODE, + MMU_FTR_USE_TLBILX, .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 4, diff --git a/trunk/arch/powerpc/mm/tlb_nohash.c b/trunk/arch/powerpc/mm/tlb_nohash.c index ad2eb4d34dd4..7af72970faed 100644 --- a/trunk/arch/powerpc/mm/tlb_nohash.c +++ b/trunk/arch/powerpc/mm/tlb_nohash.c @@ -125,6 +125,7 @@ static void do_flush_tlb_page_ipi(void *param) void flush_tlb_mm(struct mm_struct *mm) { + cpumask_t cpu_mask; unsigned int pid; preempt_disable(); diff --git a/trunk/arch/powerpc/mm/tlb_nohash_low.S b/trunk/arch/powerpc/mm/tlb_nohash_low.S index 45fed3698349..788b87c36f77 100644 --- a/trunk/arch/powerpc/mm/tlb_nohash_low.S +++ b/trunk/arch/powerpc/mm/tlb_nohash_low.S @@ -138,11 +138,7 @@ BEGIN_MMU_FTR_SECTION andi. r3,r3,MMUCSR0_TLBFI@l bne 1b MMU_FTR_SECTION_ELSE - BEGIN_MMU_FTR_SECTION_NESTED(96) - PPC_TLBILX_ALL(0,r3) - MMU_FTR_SECTION_ELSE_NESTED(96) - PPC_TLBILX_ALL_EARLY(0,r3) - ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_TLBILX_EARLY_OPCODE, 96) + PPC_TLBILX_ALL(0,0) ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX) msync isync @@ -155,11 +151,7 @@ BEGIN_MMU_FTR_SECTION wrteei 0 mfspr r4,SPRN_MAS6 /* save MAS6 */ mtspr SPRN_MAS6,r3 - BEGIN_MMU_FTR_SECTION_NESTED(96) PPC_TLBILX_PID(0,0) - MMU_FTR_SECTION_ELSE_NESTED(96) - PPC_TLBILX_PID_EARLY(0,0) - ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_TLBILX_EARLY_OPCODE, 96) mtspr SPRN_MAS6,r4 /* restore MAS6 */ wrtee r10 MMU_FTR_SECTION_ELSE @@ -193,11 +185,7 @@ BEGIN_MMU_FTR_SECTION mtspr SPRN_MAS1,r4 tlbwe MMU_FTR_SECTION_ELSE - BEGIN_MMU_FTR_SECTION_NESTED(96) PPC_TLBILX_VA(0,r3) - MMU_FTR_SECTION_ELSE_NESTED(96) - PPC_TLBILX_VA_EARLY(0,r3) - ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_TLBILX_EARLY_OPCODE, 96) ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX) msync isync diff --git a/trunk/arch/powerpc/platforms/pseries/dtl.c b/trunk/arch/powerpc/platforms/pseries/dtl.c index ab69925d579b..fafcaa0e81ef 100644 --- a/trunk/arch/powerpc/platforms/pseries/dtl.c +++ b/trunk/arch/powerpc/platforms/pseries/dtl.c @@ -25,7 +25,6 @@ #include #include #include -#include #include "plpar_wrappers.h" diff --git a/trunk/arch/powerpc/platforms/pseries/eeh_driver.c b/trunk/arch/powerpc/platforms/pseries/eeh_driver.c index 9a2a6e32f00f..380420f8c400 100644 --- a/trunk/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/trunk/arch/powerpc/platforms/pseries/eeh_driver.c @@ -182,8 +182,6 @@ static void eeh_report_reset(struct pci_dev *dev, void *userdata) if (!driver) return; - dev->error_state = pci_channel_io_normal; - eeh_enable_irq(dev); if (!driver->err_handler || diff --git a/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ecdb682ab516..837c2c4cc203 100644 --- a/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -204,13 +204,7 @@ static void drv_read(struct drv_cmd *cmd) static void drv_write(struct drv_cmd *cmd) { - int this_cpu; - - this_cpu = get_cpu(); - if (cpumask_test_cpu(this_cpu, cmd->mask)) - do_drv_write(cmd); smp_call_function_many(cmd->mask, do_drv_write, cmd, 1); - put_cpu(); } static u32 get_cur_val(const struct cpumask *mask) diff --git a/trunk/block/as-iosched.c b/trunk/block/as-iosched.c index c48fa670d221..631f6f44460a 100644 --- a/trunk/block/as-iosched.c +++ b/trunk/block/as-iosched.c @@ -17,6 +17,9 @@ #include #include +#define REQ_SYNC 1 +#define REQ_ASYNC 0 + /* * See Documentation/block/as-iosched.txt */ @@ -90,7 +93,7 @@ struct as_data { struct list_head fifo_list[2]; struct request *next_rq[2]; /* next in sort order */ - sector_t last_sector[2]; /* last SYNC & ASYNC sectors */ + sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ unsigned long exit_prob; /* probability a task will exit while being waited on */ @@ -106,7 +109,7 @@ struct as_data { unsigned long last_check_fifo[2]; int changed_batch; /* 1: waiting for old batch to end */ int new_batch; /* 1: waiting on first read complete */ - int batch_data_dir; /* current batch SYNC / ASYNC */ + int batch_data_dir; /* current batch REQ_SYNC / REQ_ASYNC */ int write_batch_count; /* max # of reqs in a write batch */ int current_write_count; /* how many requests left this batch */ int write_batch_idled; /* has the write batch gone idle? */ @@ -551,7 +554,7 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, if (aic == NULL) return; - if (data_dir == BLK_RW_SYNC) { + if (data_dir == REQ_SYNC) { unsigned long in_flight = atomic_read(&aic->nr_queued) + atomic_read(&aic->nr_dispatched); spin_lock(&aic->lock); @@ -808,7 +811,7 @@ static void as_update_rq(struct as_data *ad, struct request *rq) */ static void update_write_batch(struct as_data *ad) { - unsigned long batch = ad->batch_expire[BLK_RW_ASYNC]; + unsigned long batch = ad->batch_expire[REQ_ASYNC]; long write_time; write_time = (jiffies - ad->current_batch_expires) + batch; @@ -852,7 +855,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq) kblockd_schedule_work(q, &ad->antic_work); ad->changed_batch = 0; - if (ad->batch_data_dir == BLK_RW_SYNC) + if (ad->batch_data_dir == REQ_SYNC) ad->new_batch = 1; } WARN_ON(ad->nr_dispatched == 0); @@ -866,7 +869,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq) if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) { update_write_batch(ad); ad->current_batch_expires = jiffies + - ad->batch_expire[BLK_RW_SYNC]; + ad->batch_expire[REQ_SYNC]; ad->new_batch = 0; } @@ -957,7 +960,7 @@ static inline int as_batch_expired(struct as_data *ad) if (ad->changed_batch || ad->new_batch) return 0; - if (ad->batch_data_dir == BLK_RW_SYNC) + if (ad->batch_data_dir == REQ_SYNC) /* TODO! add a check so a complete fifo gets written? */ return time_after(jiffies, ad->current_batch_expires); @@ -983,7 +986,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq) */ ad->last_sector[data_dir] = rq->sector + rq->nr_sectors; - if (data_dir == BLK_RW_SYNC) { + if (data_dir == REQ_SYNC) { struct io_context *ioc = RQ_IOC(rq); /* In case we have to anticipate after this */ copy_io_context(&ad->io_context, &ioc); @@ -1022,41 +1025,41 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq) static int as_dispatch_request(struct request_queue *q, int force) { struct as_data *ad = q->elevator->elevator_data; - const int reads = !list_empty(&ad->fifo_list[BLK_RW_SYNC]); - const int writes = !list_empty(&ad->fifo_list[BLK_RW_ASYNC]); + const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); + const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]); struct request *rq; if (unlikely(force)) { /* * Forced dispatch, accounting is useless. Reset * accounting states and dump fifo_lists. Note that - * batch_data_dir is reset to BLK_RW_SYNC to avoid + * batch_data_dir is reset to REQ_SYNC to avoid * screwing write batch accounting as write batch * accounting occurs on W->R transition. */ int dispatched = 0; - ad->batch_data_dir = BLK_RW_SYNC; + ad->batch_data_dir = REQ_SYNC; ad->changed_batch = 0; ad->new_batch = 0; - while (ad->next_rq[BLK_RW_SYNC]) { - as_move_to_dispatch(ad, ad->next_rq[BLK_RW_SYNC]); + while (ad->next_rq[REQ_SYNC]) { + as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]); dispatched++; } - ad->last_check_fifo[BLK_RW_SYNC] = jiffies; + ad->last_check_fifo[REQ_SYNC] = jiffies; - while (ad->next_rq[BLK_RW_ASYNC]) { - as_move_to_dispatch(ad, ad->next_rq[BLK_RW_ASYNC]); + while (ad->next_rq[REQ_ASYNC]) { + as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]); dispatched++; } - ad->last_check_fifo[BLK_RW_ASYNC] = jiffies; + ad->last_check_fifo[REQ_ASYNC] = jiffies; return dispatched; } /* Signal that the write batch was uncontended, so we can't time it */ - if (ad->batch_data_dir == BLK_RW_ASYNC && !reads) { + if (ad->batch_data_dir == REQ_ASYNC && !reads) { if (ad->current_write_count == 0 || !writes) ad->write_batch_idled = 1; } @@ -1073,8 +1076,8 @@ static int as_dispatch_request(struct request_queue *q, int force) */ rq = ad->next_rq[ad->batch_data_dir]; - if (ad->batch_data_dir == BLK_RW_SYNC && ad->antic_expire) { - if (as_fifo_expired(ad, BLK_RW_SYNC)) + if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) { + if (as_fifo_expired(ad, REQ_SYNC)) goto fifo_expired; if (as_can_anticipate(ad, rq)) { @@ -1087,7 +1090,7 @@ static int as_dispatch_request(struct request_queue *q, int force) /* we have a "next request" */ if (reads && !writes) ad->current_batch_expires = - jiffies + ad->batch_expire[BLK_RW_SYNC]; + jiffies + ad->batch_expire[REQ_SYNC]; goto dispatch_request; } } @@ -1098,20 +1101,20 @@ static int as_dispatch_request(struct request_queue *q, int force) */ if (reads) { - BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[BLK_RW_SYNC])); + BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_SYNC])); - if (writes && ad->batch_data_dir == BLK_RW_SYNC) + if (writes && ad->batch_data_dir == REQ_SYNC) /* * Last batch was a read, switch to writes */ goto dispatch_writes; - if (ad->batch_data_dir == BLK_RW_ASYNC) { + if (ad->batch_data_dir == REQ_ASYNC) { WARN_ON(ad->new_batch); ad->changed_batch = 1; } - ad->batch_data_dir = BLK_RW_SYNC; - rq = rq_entry_fifo(ad->fifo_list[BLK_RW_SYNC].next); + ad->batch_data_dir = REQ_SYNC; + rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next); ad->last_check_fifo[ad->batch_data_dir] = jiffies; goto dispatch_request; } @@ -1122,9 +1125,9 @@ static int as_dispatch_request(struct request_queue *q, int force) if (writes) { dispatch_writes: - BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[BLK_RW_ASYNC])); + BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_ASYNC])); - if (ad->batch_data_dir == BLK_RW_SYNC) { + if (ad->batch_data_dir == REQ_SYNC) { ad->changed_batch = 1; /* @@ -1134,11 +1137,11 @@ static int as_dispatch_request(struct request_queue *q, int force) */ ad->new_batch = 0; } - ad->batch_data_dir = BLK_RW_ASYNC; + ad->batch_data_dir = REQ_ASYNC; ad->current_write_count = ad->write_batch_count; ad->write_batch_idled = 0; - rq = rq_entry_fifo(ad->fifo_list[BLK_RW_ASYNC].next); - ad->last_check_fifo[BLK_RW_ASYNC] = jiffies; + rq = rq_entry_fifo(ad->fifo_list[REQ_ASYNC].next); + ad->last_check_fifo[REQ_ASYNC] = jiffies; goto dispatch_request; } @@ -1161,9 +1164,9 @@ static int as_dispatch_request(struct request_queue *q, int force) if (ad->nr_dispatched) return 0; - if (ad->batch_data_dir == BLK_RW_ASYNC) + if (ad->batch_data_dir == REQ_ASYNC) ad->current_batch_expires = jiffies + - ad->batch_expire[BLK_RW_ASYNC]; + ad->batch_expire[REQ_ASYNC]; else ad->new_batch = 1; @@ -1235,8 +1238,8 @@ static int as_queue_empty(struct request_queue *q) { struct as_data *ad = q->elevator->elevator_data; - return list_empty(&ad->fifo_list[BLK_RW_ASYNC]) - && list_empty(&ad->fifo_list[BLK_RW_SYNC]); + return list_empty(&ad->fifo_list[REQ_ASYNC]) + && list_empty(&ad->fifo_list[REQ_SYNC]); } static int @@ -1343,8 +1346,8 @@ static void as_exit_queue(struct elevator_queue *e) del_timer_sync(&ad->antic_timer); cancel_work_sync(&ad->antic_work); - BUG_ON(!list_empty(&ad->fifo_list[BLK_RW_SYNC])); - BUG_ON(!list_empty(&ad->fifo_list[BLK_RW_ASYNC])); + BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); + BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); put_io_context(ad->io_context); kfree(ad); @@ -1369,18 +1372,18 @@ static void *as_init_queue(struct request_queue *q) init_timer(&ad->antic_timer); INIT_WORK(&ad->antic_work, as_work_handler); - INIT_LIST_HEAD(&ad->fifo_list[BLK_RW_SYNC]); - INIT_LIST_HEAD(&ad->fifo_list[BLK_RW_ASYNC]); - ad->sort_list[BLK_RW_SYNC] = RB_ROOT; - ad->sort_list[BLK_RW_ASYNC] = RB_ROOT; - ad->fifo_expire[BLK_RW_SYNC] = default_read_expire; - ad->fifo_expire[BLK_RW_ASYNC] = default_write_expire; + INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]); + INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); + ad->sort_list[REQ_SYNC] = RB_ROOT; + ad->sort_list[REQ_ASYNC] = RB_ROOT; + ad->fifo_expire[REQ_SYNC] = default_read_expire; + ad->fifo_expire[REQ_ASYNC] = default_write_expire; ad->antic_expire = default_antic_expire; - ad->batch_expire[BLK_RW_SYNC] = default_read_batch_expire; - ad->batch_expire[BLK_RW_ASYNC] = default_write_batch_expire; + ad->batch_expire[REQ_SYNC] = default_read_batch_expire; + ad->batch_expire[REQ_ASYNC] = default_write_batch_expire; - ad->current_batch_expires = jiffies + ad->batch_expire[BLK_RW_SYNC]; - ad->write_batch_count = ad->batch_expire[BLK_RW_ASYNC] / 10; + ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; + ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10; if (ad->write_batch_count < 2) ad->write_batch_count = 2; @@ -1429,11 +1432,11 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page) \ struct as_data *ad = e->elevator_data; \ return as_var_show(jiffies_to_msecs((__VAR)), (page)); \ } -SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[BLK_RW_SYNC]); -SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[BLK_RW_ASYNC]); +SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[REQ_SYNC]); +SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[REQ_ASYNC]); SHOW_FUNCTION(as_antic_expire_show, ad->antic_expire); -SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[BLK_RW_SYNC]); -SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[BLK_RW_ASYNC]); +SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[REQ_SYNC]); +SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ @@ -1448,14 +1451,13 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) *(__PTR) = msecs_to_jiffies(*(__PTR)); \ return ret; \ } -STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[BLK_RW_SYNC], 0, INT_MAX); -STORE_FUNCTION(as_write_expire_store, - &ad->fifo_expire[BLK_RW_ASYNC], 0, INT_MAX); +STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX); +STORE_FUNCTION(as_write_expire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX); STORE_FUNCTION(as_antic_expire_store, &ad->antic_expire, 0, INT_MAX); STORE_FUNCTION(as_read_batch_expire_store, - &ad->batch_expire[BLK_RW_SYNC], 0, INT_MAX); + &ad->batch_expire[REQ_SYNC], 0, INT_MAX); STORE_FUNCTION(as_write_batch_expire_store, - &ad->batch_expire[BLK_RW_ASYNC], 0, INT_MAX); + &ad->batch_expire[REQ_ASYNC], 0, INT_MAX); #undef STORE_FUNCTION #define AS_ATTR(name) \ diff --git a/trunk/block/blk-barrier.c b/trunk/block/blk-barrier.c index 20b4111fa050..f7dae57e6cab 100644 --- a/trunk/block/blk-barrier.c +++ b/trunk/block/blk-barrier.c @@ -319,6 +319,9 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) return -ENXIO; bio = bio_alloc(GFP_KERNEL, 0); + if (!bio) + return -ENOMEM; + bio->bi_end_io = bio_end_empty_barrier; bio->bi_private = &wait; bio->bi_bdev = bdev; diff --git a/trunk/block/blk-sysfs.c b/trunk/block/blk-sysfs.c index cac4e9febe6a..73f36beff5cd 100644 --- a/trunk/block/blk-sysfs.c +++ b/trunk/block/blk-sysfs.c @@ -209,14 +209,14 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page, ssize_t ret = queue_var_store(&stats, page, count); spin_lock_irq(q->queue_lock); - elv_quiesce_start(q); + elv_quisce_start(q); if (stats) queue_flag_set(QUEUE_FLAG_IO_STAT, q); else queue_flag_clear(QUEUE_FLAG_IO_STAT, q); - elv_quiesce_end(q); + elv_quisce_end(q); spin_unlock_irq(q->queue_lock); return ret; diff --git a/trunk/block/blk.h b/trunk/block/blk.h index 5dfc41267a08..24fcaeeaf620 100644 --- a/trunk/block/blk.h +++ b/trunk/block/blk.h @@ -70,8 +70,8 @@ void blk_queue_congestion_threshold(struct request_queue *q); int blk_dev_init(void); -void elv_quiesce_start(struct request_queue *q); -void elv_quiesce_end(struct request_queue *q); +void elv_quisce_start(struct request_queue *q); +void elv_quisce_end(struct request_queue *q); /* diff --git a/trunk/block/cfq-iosched.c b/trunk/block/cfq-iosched.c index 0d3b70de3d80..a4809de6fea6 100644 --- a/trunk/block/cfq-iosched.c +++ b/trunk/block/cfq-iosched.c @@ -56,6 +56,9 @@ static DEFINE_SPINLOCK(ioc_gone_lock); #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) +#define ASYNC (0) +#define SYNC (1) + #define sample_valid(samples) ((samples) > 80) /* @@ -80,14 +83,6 @@ struct cfq_data { * rr list of queues with requests and the count of them */ struct cfq_rb_root service_tree; - - /* - * Each priority tree is sorted by next_request position. These - * trees are used when determining if two or more queues are - * interleaving requests (see cfq_close_cooperator). - */ - struct rb_root prio_trees[CFQ_PRIO_LISTS]; - unsigned int busy_queues; /* * Used to track any pending rt requests so we can pre-empt current @@ -152,8 +147,6 @@ struct cfq_queue { struct rb_node rb_node; /* service_tree key */ unsigned long rb_key; - /* prio tree member */ - struct rb_node p_node; /* sorted list of pending requests */ struct rb_root sort_list; /* if fifo isn't expired, next request to serve */ @@ -192,7 +185,6 @@ enum cfqq_state_flags { CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ CFQ_CFQQ_FLAG_sync, /* synchronous queue */ - CFQ_CFQQ_FLAG_coop, /* has done a coop jump of the queue */ }; #define CFQ_CFQQ_FNS(name) \ @@ -219,7 +211,6 @@ CFQ_CFQQ_FNS(idle_window); CFQ_CFQQ_FNS(prio_changed); CFQ_CFQQ_FNS(slice_new); CFQ_CFQQ_FNS(sync); -CFQ_CFQQ_FNS(coop); #undef CFQ_CFQQ_FNS #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ @@ -428,17 +419,13 @@ static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root) return NULL; } -static void rb_erase_init(struct rb_node *n, struct rb_root *root) -{ - rb_erase(n, root); - RB_CLEAR_NODE(n); -} - static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) { if (root->left == n) root->left = NULL; - rb_erase_init(n, &root->rb); + + rb_erase(n, &root->rb); + RB_CLEAR_NODE(n); } /* @@ -483,8 +470,8 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd, * requests waiting to be processed. It is sorted in the order that * we will service the queues. */ -static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, - int add_front) +static void cfq_service_tree_add(struct cfq_data *cfqd, + struct cfq_queue *cfqq, int add_front) { struct rb_node **p, *parent; struct cfq_queue *__cfqq; @@ -557,63 +544,6 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb); } -static struct cfq_queue * -cfq_prio_tree_lookup(struct cfq_data *cfqd, int ioprio, sector_t sector, - struct rb_node **ret_parent, struct rb_node ***rb_link) -{ - struct rb_root *root = &cfqd->prio_trees[ioprio]; - struct rb_node **p, *parent; - struct cfq_queue *cfqq = NULL; - - parent = NULL; - p = &root->rb_node; - while (*p) { - struct rb_node **n; - - parent = *p; - cfqq = rb_entry(parent, struct cfq_queue, p_node); - - /* - * Sort strictly based on sector. Smallest to the left, - * largest to the right. - */ - if (sector > cfqq->next_rq->sector) - n = &(*p)->rb_right; - else if (sector < cfqq->next_rq->sector) - n = &(*p)->rb_left; - else - break; - p = n; - } - - *ret_parent = parent; - if (rb_link) - *rb_link = p; - return NULL; -} - -static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq) -{ - struct rb_root *root = &cfqd->prio_trees[cfqq->ioprio]; - struct rb_node **p, *parent; - struct cfq_queue *__cfqq; - - if (!RB_EMPTY_NODE(&cfqq->p_node)) - rb_erase_init(&cfqq->p_node, root); - - if (cfq_class_idle(cfqq)) - return; - if (!cfqq->next_rq) - return; - - __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->ioprio, cfqq->next_rq->sector, - &parent, &p); - BUG_ON(__cfqq); - - rb_link_node(&cfqq->p_node, parent, p); - rb_insert_color(&cfqq->p_node, root); -} - /* * Update cfqq's position in the service tree. */ @@ -622,10 +552,8 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq) /* * Resorting requires the cfqq to be on the RR list already. */ - if (cfq_cfqq_on_rr(cfqq)) { + if (cfq_cfqq_on_rr(cfqq)) cfq_service_tree_add(cfqd, cfqq, 0); - cfq_prio_tree_add(cfqd, cfqq); - } } /* @@ -656,8 +584,6 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) if (!RB_EMPTY_NODE(&cfqq->rb_node)) cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); - if (!RB_EMPTY_NODE(&cfqq->p_node)) - rb_erase_init(&cfqq->p_node, &cfqd->prio_trees[cfqq->ioprio]); BUG_ON(!cfqd->busy_queues); cfqd->busy_queues--; @@ -687,7 +613,7 @@ static void cfq_add_rq_rb(struct request *rq) { struct cfq_queue *cfqq = RQ_CFQQ(rq); struct cfq_data *cfqd = cfqq->cfqd; - struct request *__alias, *prev; + struct request *__alias; cfqq->queued[rq_is_sync(rq)]++; @@ -704,15 +630,7 @@ static void cfq_add_rq_rb(struct request *rq) /* * check if this request is a better next-serve candidate */ - prev = cfqq->next_rq; cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq); - - /* - * adjust priority tree position, if ->next_rq changes - */ - if (prev != cfqq->next_rq) - cfq_prio_tree_add(cfqd, cfqq); - BUG_ON(!cfqq->next_rq); } @@ -925,15 +843,11 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) /* * Get and set a new active queue for service. */ -static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd, - struct cfq_queue *cfqq) +static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) { - if (!cfqq) { - cfqq = cfq_get_next_queue(cfqd); - if (cfqq) - cfq_clear_cfqq_coop(cfqq); - } + struct cfq_queue *cfqq; + cfqq = cfq_get_next_queue(cfqd); __cfq_set_active_queue(cfqd, cfqq); return cfqq; } @@ -957,89 +871,17 @@ static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq) return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean; } -static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, - struct cfq_queue *cur_cfqq) -{ - struct rb_root *root = &cfqd->prio_trees[cur_cfqq->ioprio]; - struct rb_node *parent, *node; - struct cfq_queue *__cfqq; - sector_t sector = cfqd->last_position; - - if (RB_EMPTY_ROOT(root)) - return NULL; - - /* - * First, if we find a request starting at the end of the last - * request, choose it. - */ - __cfqq = cfq_prio_tree_lookup(cfqd, cur_cfqq->ioprio, - sector, &parent, NULL); - if (__cfqq) - return __cfqq; - - /* - * If the exact sector wasn't found, the parent of the NULL leaf - * will contain the closest sector. - */ - __cfqq = rb_entry(parent, struct cfq_queue, p_node); - if (cfq_rq_close(cfqd, __cfqq->next_rq)) - return __cfqq; - - if (__cfqq->next_rq->sector < sector) - node = rb_next(&__cfqq->p_node); - else - node = rb_prev(&__cfqq->p_node); - if (!node) - return NULL; - - __cfqq = rb_entry(node, struct cfq_queue, p_node); - if (cfq_rq_close(cfqd, __cfqq->next_rq)) - return __cfqq; - - return NULL; -} - -/* - * cfqd - obvious - * cur_cfqq - passed in so that we don't decide that the current queue is - * closely cooperating with itself. - * - * So, basically we're assuming that that cur_cfqq has dispatched at least - * one request, and that cfqd->last_position reflects a position on the disk - * associated with the I/O issued by cur_cfqq. I'm not sure this is a valid - * assumption. - */ -static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, - struct cfq_queue *cur_cfqq, - int probe) +static int cfq_close_cooperator(struct cfq_data *cfq_data, + struct cfq_queue *cfqq) { - struct cfq_queue *cfqq; - - /* - * A valid cfq_io_context is necessary to compare requests against - * the seek_mean of the current cfqq. - */ - if (!cfqd->active_cic) - return NULL; - /* * We should notice if some of the queues are cooperating, eg * working closely on the same area of the disk. In that case, * we can group them together and don't waste time idling. */ - cfqq = cfqq_close(cfqd, cur_cfqq); - if (!cfqq) - return NULL; - - if (cfq_cfqq_coop(cfqq)) - return NULL; - - if (!probe) - cfq_mark_cfqq_coop(cfqq); - return cfqq; + return 0; } - #define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024)) static void cfq_arm_slice_timer(struct cfq_data *cfqd) @@ -1078,6 +920,13 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) if (!cic || !atomic_read(&cic->ioc->nr_tasks)) return; + /* + * See if this prio level has a good candidate + */ + if (cfq_close_cooperator(cfqd, cfqq) && + (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2)) + return; + cfq_mark_cfqq_wait_request(cfqq); /* @@ -1090,7 +939,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); mod_timer(&cfqd->idle_slice_timer, jiffies + sl); - cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl); + cfq_log(cfqd, "arm_idle: %lu", sl); } /* @@ -1154,7 +1003,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) */ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) { - struct cfq_queue *cfqq, *new_cfqq = NULL; + struct cfq_queue *cfqq; cfqq = cfqd->active_queue; if (!cfqq) @@ -1187,16 +1036,6 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) if (!RB_EMPTY_ROOT(&cfqq->sort_list)) goto keep_queue; - /* - * If another queue has a request waiting within our mean seek - * distance, let it run. The expire code will check for close - * cooperators and put the close queue at the front of the service - * tree. - */ - new_cfqq = cfq_close_cooperator(cfqd, cfqq, 0); - if (new_cfqq) - goto expire; - /* * No requests pending. If the active queue still has requests in * flight or is idling for a new request, allow either of these @@ -1211,7 +1050,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) expire: cfq_slice_expired(cfqd, 0); new_queue: - cfqq = cfq_set_active_queue(cfqd, new_cfqq); + cfqq = cfq_set_active_queue(cfqd); keep_queue: return cfqq; } @@ -1494,14 +1333,14 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, if (ioc->ioc_data == cic) rcu_assign_pointer(ioc->ioc_data, NULL); - if (cic->cfqq[BLK_RW_ASYNC]) { - cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]); - cic->cfqq[BLK_RW_ASYNC] = NULL; + if (cic->cfqq[ASYNC]) { + cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]); + cic->cfqq[ASYNC] = NULL; } - if (cic->cfqq[BLK_RW_SYNC]) { - cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_SYNC]); - cic->cfqq[BLK_RW_SYNC] = NULL; + if (cic->cfqq[SYNC]) { + cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]); + cic->cfqq[SYNC] = NULL; } } @@ -1610,18 +1449,17 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) spin_lock_irqsave(cfqd->queue->queue_lock, flags); - cfqq = cic->cfqq[BLK_RW_ASYNC]; + cfqq = cic->cfqq[ASYNC]; if (cfqq) { struct cfq_queue *new_cfqq; - new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc, - GFP_ATOMIC); + new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc, GFP_ATOMIC); if (new_cfqq) { - cic->cfqq[BLK_RW_ASYNC] = new_cfqq; + cic->cfqq[ASYNC] = new_cfqq; cfq_put_queue(cfqq); } } - cfqq = cic->cfqq[BLK_RW_SYNC]; + cfqq = cic->cfqq[SYNC]; if (cfqq) cfq_mark_cfqq_prio_changed(cfqq); @@ -1672,7 +1510,6 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, } RB_CLEAR_NODE(&cfqq->rb_node); - RB_CLEAR_NODE(&cfqq->p_node); INIT_LIST_HEAD(&cfqq->fifo); atomic_set(&cfqq->ref, 0); @@ -2068,20 +1905,10 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, * Remember that we saw a request from this process, but * don't start queuing just yet. Otherwise we risk seeing lots * of tiny requests, because we disrupt the normal plugging - * and merging. If the request is already larger than a single - * page, let it rip immediately. For that case we assume that - * merging is already done. Ditto for a busy system that - * has other work pending, don't risk delaying until the - * idle timer unplug to continue working. + * and merging. */ - if (cfq_cfqq_wait_request(cfqq)) { - if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || - cfqd->busy_queues > 1) { - del_timer(&cfqd->idle_slice_timer); - blk_start_queueing(cfqd->queue); - } + if (cfq_cfqq_wait_request(cfqq)) cfq_mark_cfqq_must_dispatch(cfqq); - } } else if (cfq_should_preempt(cfqd, cfqq, rq)) { /* * not the active queue - expire current slice if it is @@ -2165,24 +1992,16 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) * or if we want to idle in case it has no pending requests. */ if (cfqd->active_queue == cfqq) { - const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list); - if (cfq_cfqq_slice_new(cfqq)) { cfq_set_prio_slice(cfqd, cfqq); cfq_clear_cfqq_slice_new(cfqq); } - /* - * If there are no requests waiting in this queue, and - * there are other queues ready to issue requests, AND - * those other queues are issuing requests within our - * mean seek distance, give them a chance to run instead - * of idling. - */ if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) cfq_slice_expired(cfqd, 1); - else if (cfqq_empty && !cfq_close_cooperator(cfqd, cfqq, 1) && - sync && !rq_noidle(rq)) + else if (sync && !rq_noidle(rq) && + RB_EMPTY_ROOT(&cfqq->sort_list)) { cfq_arm_slice_timer(cfqd); + } } if (!cfqd->rq_in_driver) @@ -2243,7 +2062,7 @@ static int cfq_may_queue(struct request_queue *q, int rw) if (!cic) return ELV_MQUEUE_MAY; - cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); + cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC); if (cfqq) { cfq_init_prio_data(cfqq, cic->ioc); cfq_prio_boost(cfqq); @@ -2333,10 +2152,11 @@ static void cfq_kick_queue(struct work_struct *work) struct cfq_data *cfqd = container_of(work, struct cfq_data, unplug_work); struct request_queue *q = cfqd->queue; + unsigned long flags; - spin_lock_irq(q->queue_lock); + spin_lock_irqsave(q->queue_lock, flags); blk_start_queueing(q); - spin_unlock_irq(q->queue_lock); + spin_unlock_irqrestore(q->queue_lock, flags); } /* diff --git a/trunk/block/elevator.c b/trunk/block/elevator.c index 7073a9072577..fb81bcc14a8c 100644 --- a/trunk/block/elevator.c +++ b/trunk/block/elevator.c @@ -590,7 +590,7 @@ void elv_drain_elevator(struct request_queue *q) /* * Call with queue lock held, interrupts disabled */ -void elv_quiesce_start(struct request_queue *q) +void elv_quisce_start(struct request_queue *q) { queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); @@ -607,7 +607,7 @@ void elv_quiesce_start(struct request_queue *q) } } -void elv_quiesce_end(struct request_queue *q) +void elv_quisce_end(struct request_queue *q) { queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); } @@ -1126,7 +1126,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) * Turn on BYPASS and drain all requests w/ elevator private data */ spin_lock_irq(q->queue_lock); - elv_quiesce_start(q); + elv_quisce_start(q); /* * Remember old elevator. @@ -1150,7 +1150,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) */ elevator_exit(old_elevator); spin_lock_irq(q->queue_lock); - elv_quiesce_end(q); + elv_quisce_end(q); spin_unlock_irq(q->queue_lock); blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); diff --git a/trunk/block/ioctl.c b/trunk/block/ioctl.c index ad474d4bbcce..0f22e629b13c 100644 --- a/trunk/block/ioctl.c +++ b/trunk/block/ioctl.c @@ -146,6 +146,8 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, struct bio *bio; bio = bio_alloc(GFP_KERNEL, 0); + if (!bio) + return -ENOMEM; bio->bi_end_io = blk_ioc_discard_endio; bio->bi_bdev = bdev; diff --git a/trunk/block/scsi_ioctl.c b/trunk/block/scsi_ioctl.c index 84b7f8709f41..626ee274c5c4 100644 --- a/trunk/block/scsi_ioctl.c +++ b/trunk/block/scsi_ioctl.c @@ -217,7 +217,7 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, struct bio *bio) { - int r, ret = 0; + int ret = 0; /* * fill in all the output members @@ -242,9 +242,7 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, ret = -EFAULT; } - r = blk_rq_unmap_user(bio); - if (!ret) - ret = r; + blk_rq_unmap_user(bio); blk_put_request(rq); return ret; diff --git a/trunk/drivers/block/brd.c b/trunk/drivers/block/brd.c index 5f7e64ba87e5..bdd4f5f45575 100644 --- a/trunk/drivers/block/brd.c +++ b/trunk/drivers/block/brd.c @@ -275,10 +275,8 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page, if (rw == READ) { copy_from_brd(mem + off, brd, sector, len); flush_dcache_page(page); - } else { - flush_dcache_page(page); + } else copy_to_brd(brd, mem + off, sector, len); - } kunmap_atomic(mem, KM_USER0); out: @@ -438,7 +436,6 @@ static struct brd_device *brd_alloc(int i) if (!brd->brd_queue) goto out_free_dev; blk_queue_make_request(brd->brd_queue, brd_make_request); - blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG, NULL); blk_queue_max_sectors(brd->brd_queue, 1024); blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); diff --git a/trunk/drivers/md/dm-bio-list.h b/trunk/drivers/md/dm-bio-list.h new file mode 100644 index 000000000000..345098b4ca77 --- /dev/null +++ b/trunk/drivers/md/dm-bio-list.h @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2004 Red Hat UK Ltd. + * + * This file is released under the GPL. + */ + +#ifndef DM_BIO_LIST_H +#define DM_BIO_LIST_H + +#include + +#ifdef CONFIG_BLOCK + +struct bio_list { + struct bio *head; + struct bio *tail; +}; + +static inline int bio_list_empty(const struct bio_list *bl) +{ + return bl->head == NULL; +} + +static inline void bio_list_init(struct bio_list *bl) +{ + bl->head = bl->tail = NULL; +} + +#define bio_list_for_each(bio, bl) \ + for (bio = (bl)->head; bio; bio = bio->bi_next) + +static inline unsigned bio_list_size(const struct bio_list *bl) +{ + unsigned sz = 0; + struct bio *bio; + + bio_list_for_each(bio, bl) + sz++; + + return sz; +} + +static inline void bio_list_add(struct bio_list *bl, struct bio *bio) +{ + bio->bi_next = NULL; + + if (bl->tail) + bl->tail->bi_next = bio; + else + bl->head = bio; + + bl->tail = bio; +} + +static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) +{ + bio->bi_next = bl->head; + + bl->head = bio; + + if (!bl->tail) + bl->tail = bio; +} + +static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) +{ + if (!bl2->head) + return; + + if (bl->tail) + bl->tail->bi_next = bl2->head; + else + bl->head = bl2->head; + + bl->tail = bl2->tail; +} + +static inline void bio_list_merge_head(struct bio_list *bl, + struct bio_list *bl2) +{ + if (!bl2->head) + return; + + if (bl->head) + bl2->tail->bi_next = bl->head; + else + bl->tail = bl2->tail; + + bl->head = bl2->head; +} + +static inline struct bio *bio_list_pop(struct bio_list *bl) +{ + struct bio *bio = bl->head; + + if (bio) { + bl->head = bl->head->bi_next; + if (!bl->head) + bl->tail = NULL; + + bio->bi_next = NULL; + } + + return bio; +} + +static inline struct bio *bio_list_get(struct bio_list *bl) +{ + struct bio *bio = bl->head; + + bl->head = bl->tail = NULL; + + return bio; +} + +#endif /* CONFIG_BLOCK */ +#endif diff --git a/trunk/drivers/md/dm-delay.c b/trunk/drivers/md/dm-delay.c index 559dbb52bc85..59ee1b015d2d 100644 --- a/trunk/drivers/md/dm-delay.c +++ b/trunk/drivers/md/dm-delay.c @@ -15,6 +15,8 @@ #include +#include "dm-bio-list.h" + #define DM_MSG_PREFIX "delay" struct delay_c { diff --git a/trunk/drivers/md/dm-mpath.c b/trunk/drivers/md/dm-mpath.c index 6a386ab4f7eb..095f77bf9681 100644 --- a/trunk/drivers/md/dm-mpath.c +++ b/trunk/drivers/md/dm-mpath.c @@ -8,6 +8,7 @@ #include #include "dm-path-selector.h" +#include "dm-bio-list.h" #include "dm-bio-record.h" #include "dm-uevent.h" diff --git a/trunk/drivers/md/dm-raid1.c b/trunk/drivers/md/dm-raid1.c index 076fbb4e967a..536ef0bef154 100644 --- a/trunk/drivers/md/dm-raid1.c +++ b/trunk/drivers/md/dm-raid1.c @@ -5,6 +5,7 @@ * This file is released under the GPL. */ +#include "dm-bio-list.h" #include "dm-bio-record.h" #include diff --git a/trunk/drivers/md/dm-region-hash.c b/trunk/drivers/md/dm-region-hash.c index 7b899be0b087..59f8d9df9e1a 100644 --- a/trunk/drivers/md/dm-region-hash.c +++ b/trunk/drivers/md/dm-region-hash.c @@ -14,6 +14,7 @@ #include #include "dm.h" +#include "dm-bio-list.h" #define DM_MSG_PREFIX "region hash" diff --git a/trunk/drivers/md/dm-snap.c b/trunk/drivers/md/dm-snap.c index d73f17fc7778..981a0413068f 100644 --- a/trunk/drivers/md/dm-snap.c +++ b/trunk/drivers/md/dm-snap.c @@ -22,6 +22,7 @@ #include #include "dm-exception-store.h" +#include "dm-bio-list.h" #define DM_MSG_PREFIX "snapshots" diff --git a/trunk/drivers/md/dm.c b/trunk/drivers/md/dm.c index 424f7b048c30..8a994be035ba 100644 --- a/trunk/drivers/md/dm.c +++ b/trunk/drivers/md/dm.c @@ -6,6 +6,7 @@ */ #include "dm.h" +#include "dm-bio-list.h" #include "dm-uevent.h" #include diff --git a/trunk/drivers/md/raid1.c b/trunk/drivers/md/raid1.c index 36df9109cde1..274b491a11c1 100644 --- a/trunk/drivers/md/raid1.c +++ b/trunk/drivers/md/raid1.c @@ -35,6 +35,7 @@ #include #include #include "md.h" +#include "dm-bio-list.h" #include "raid1.h" #include "bitmap.h" diff --git a/trunk/drivers/md/raid10.c b/trunk/drivers/md/raid10.c index 81a54f17417e..e293d92641ac 100644 --- a/trunk/drivers/md/raid10.c +++ b/trunk/drivers/md/raid10.c @@ -22,6 +22,7 @@ #include #include #include "md.h" +#include "dm-bio-list.h" #include "raid10.h" #include "bitmap.h" diff --git a/trunk/fs/bio.c b/trunk/fs/bio.c index cd42bb882f30..e0c9e545bbfa 100644 --- a/trunk/fs/bio.c +++ b/trunk/fs/bio.c @@ -348,24 +348,6 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) return NULL; } -/** - * bio_alloc - allocate a bio for I/O - * @gfp_mask: the GFP_ mask given to the slab allocator - * @nr_iovecs: number of iovecs to pre-allocate - * - * Description: - * bio_alloc will allocate a bio and associated bio_vec array that can hold - * at least @nr_iovecs entries. Allocations will be done from the - * fs_bio_set. Also see @bio_alloc_bioset. - * - * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate - * a bio. This is due to the mempool guarantees. To make this work, callers - * must never allocate more than 1 bio at the time from this pool. Callers - * that need to allocate more than 1 bio must always submit the previously - * allocate bio for IO before attempting to allocate a new one. Failure to - * do so can cause livelocks under memory pressure. - * - **/ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) { struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); diff --git a/trunk/fs/buffer.c b/trunk/fs/buffer.c index ff8bb1f2333a..13edf7ad3ff1 100644 --- a/trunk/fs/buffer.c +++ b/trunk/fs/buffer.c @@ -547,7 +547,7 @@ static int osync_buffers_list(spinlock_t *lock, struct list_head *list) return err; } -void do_thaw_all(struct work_struct *work) +void do_thaw_all(unsigned long unused) { struct super_block *sb; char b[BDEVNAME_SIZE]; @@ -567,7 +567,6 @@ void do_thaw_all(struct work_struct *work) goto restart; } spin_unlock(&sb_lock); - kfree(work); printk(KERN_WARNING "Emergency Thaw complete\n"); } @@ -578,13 +577,7 @@ void do_thaw_all(struct work_struct *work) */ void emergency_thaw_all(void) { - struct work_struct *work; - - work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (work) { - INIT_WORK(work, do_thaw_all); - schedule_work(work); - } + pdflush_operation(do_thaw_all, 0); } /** diff --git a/trunk/fs/direct-io.c b/trunk/fs/direct-io.c index 05763bbc2050..da258e7249cc 100644 --- a/trunk/fs/direct-io.c +++ b/trunk/fs/direct-io.c @@ -307,6 +307,8 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, struct bio *bio; bio = bio_alloc(GFP_KERNEL, nr_vecs); + if (bio == NULL) + return -ENOMEM; bio->bi_bdev = bdev; bio->bi_sector = first_sector; diff --git a/trunk/fs/ext4/extents.c b/trunk/fs/ext4/extents.c index 2a1cb0979768..6132353dcf62 100644 --- a/trunk/fs/ext4/extents.c +++ b/trunk/fs/ext4/extents.c @@ -2416,6 +2416,8 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) len = ee_len; bio = bio_alloc(GFP_NOIO, len); + if (!bio) + return -ENOMEM; bio->bi_sector = ee_pblock; bio->bi_bdev = inode->i_sb->s_bdev; diff --git a/trunk/fs/gfs2/ops_fstype.c b/trunk/fs/gfs2/ops_fstype.c index 650a730707b7..51883b3ad89c 100644 --- a/trunk/fs/gfs2/ops_fstype.c +++ b/trunk/fs/gfs2/ops_fstype.c @@ -272,6 +272,11 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) lock_page(page); bio = bio_alloc(GFP_NOFS, 1); + if (unlikely(!bio)) { + __free_page(page); + return -ENOBUFS; + } + bio->bi_sector = sector * (sb->s_blocksize >> 9); bio->bi_bdev = sb->s_bdev; bio_add_page(bio, page, PAGE_SIZE, 0); diff --git a/trunk/fs/gfs2/quota.c b/trunk/fs/gfs2/quota.c index 8d53f66b5bcc..47bc5cbba48e 100644 --- a/trunk/fs/gfs2/quota.c +++ b/trunk/fs/gfs2/quota.c @@ -1364,7 +1364,7 @@ int gfs2_quotad(void *data) refrigerator(); t = min(quotad_timeo, statfs_timeo); - prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE); spin_lock(&sdp->sd_trunc_lock); empty = list_empty(&sdp->sd_trunc_list); spin_unlock(&sdp->sd_trunc_lock); diff --git a/trunk/fs/inode.c b/trunk/fs/inode.c index 6ad14a1cd8c9..d06d6d268de9 100644 --- a/trunk/fs/inode.c +++ b/trunk/fs/inode.c @@ -1470,6 +1470,42 @@ static void __wait_on_freeing_inode(struct inode *inode) spin_lock(&inode_lock); } +/* + * We rarely want to lock two inodes that do not have a parent/child + * relationship (such as directory, child inode) simultaneously. The + * vast majority of file systems should be able to get along fine + * without this. Do not use these functions except as a last resort. + */ +void inode_double_lock(struct inode *inode1, struct inode *inode2) +{ + if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { + if (inode1) + mutex_lock(&inode1->i_mutex); + else if (inode2) + mutex_lock(&inode2->i_mutex); + return; + } + + if (inode1 < inode2) { + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); + } +} +EXPORT_SYMBOL(inode_double_lock); + +void inode_double_unlock(struct inode *inode1, struct inode *inode2) +{ + if (inode1) + mutex_unlock(&inode1->i_mutex); + + if (inode2 && inode2 != inode1) + mutex_unlock(&inode2->i_mutex); +} +EXPORT_SYMBOL(inode_double_unlock); + static __initdata unsigned long ihash_entries; static int __init set_ihash_entries(char *str) { diff --git a/trunk/fs/ocfs2/file.c b/trunk/fs/ocfs2/file.c index c2a87c885b73..8672b9536039 100644 --- a/trunk/fs/ocfs2/file.c +++ b/trunk/fs/ocfs2/file.c @@ -1912,22 +1912,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, return written ? written : ret; } -static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, - struct file *out, - struct splice_desc *sd) -{ - int ret; - - ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, - sd->total_len, 0, NULL); - if (ret < 0) { - mlog_errno(ret); - return ret; - } - - return splice_from_pipe_feed(pipe, sd, pipe_to_file); -} - static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, @@ -1935,76 +1919,38 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, unsigned int flags) { int ret; - struct address_space *mapping = out->f_mapping; - struct inode *inode = mapping->host; - struct splice_desc sd = { - .total_len = len, - .flags = flags, - .pos = *ppos, - .u.file = out, - }; + struct inode *inode = out->f_path.dentry->d_inode; mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, (unsigned int)len, out->f_path.dentry->d_name.len, out->f_path.dentry->d_name.name); - if (pipe->inode) - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); - splice_from_pipe_begin(&sd); - do { - ret = splice_from_pipe_next(pipe, &sd); - if (ret <= 0) - break; + ret = ocfs2_rw_lock(inode, 1); + if (ret < 0) { + mlog_errno(ret); + goto out; + } - mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); - ret = ocfs2_rw_lock(inode, 1); - if (ret < 0) - mlog_errno(ret); - else { - ret = ocfs2_splice_to_file(pipe, out, &sd); - ocfs2_rw_unlock(inode, 1); - } - mutex_unlock(&inode->i_mutex); - } while (ret > 0); - splice_from_pipe_end(pipe, &sd); + ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0, + NULL); + if (ret < 0) { + mlog_errno(ret); + goto out_unlock; + } + if (pipe->inode) + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); + ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); - if (sd.num_spliced) - ret = sd.num_spliced; - - if (ret > 0) { - unsigned long nr_pages; - - *ppos += ret; - nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - /* - * If file or inode is SYNC and we actually wrote some data, - * sync it. - */ - if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { - int err; - - mutex_lock(&inode->i_mutex); - err = ocfs2_rw_lock(inode, 1); - if (err < 0) { - mlog_errno(err); - } else { - err = generic_osync_inode(inode, mapping, - OSYNC_METADATA|OSYNC_DATA); - ocfs2_rw_unlock(inode, 1); - } - mutex_unlock(&inode->i_mutex); - - if (err) - ret = err; - } - balance_dirty_pages_ratelimited_nr(mapping, nr_pages); - } +out_unlock: + ocfs2_rw_unlock(inode, 1); +out: + mutex_unlock(&inode->i_mutex); mlog_exit(ret); return ret; diff --git a/trunk/fs/pipe.c b/trunk/fs/pipe.c index 13414ec45b8d..4af7aa521813 100644 --- a/trunk/fs/pipe.c +++ b/trunk/fs/pipe.c @@ -37,42 +37,6 @@ * -- Manfred Spraul 2002-05-09 */ -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) -{ - if (pipe->inode) - mutex_lock_nested(&pipe->inode->i_mutex, subclass); -} - -void pipe_lock(struct pipe_inode_info *pipe) -{ - /* - * pipe_lock() nests non-pipe inode locks (for writing to a file) - */ - pipe_lock_nested(pipe, I_MUTEX_PARENT); -} -EXPORT_SYMBOL(pipe_lock); - -void pipe_unlock(struct pipe_inode_info *pipe) -{ - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); -} -EXPORT_SYMBOL(pipe_unlock); - -void pipe_double_lock(struct pipe_inode_info *pipe1, - struct pipe_inode_info *pipe2) -{ - BUG_ON(pipe1 == pipe2); - - if (pipe1 < pipe2) { - pipe_lock_nested(pipe1, I_MUTEX_PARENT); - pipe_lock_nested(pipe2, I_MUTEX_CHILD); - } else { - pipe_lock_nested(pipe2, I_MUTEX_CHILD); - pipe_lock_nested(pipe1, I_MUTEX_PARENT); - } -} - /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe) { @@ -83,10 +47,12 @@ void pipe_wait(struct pipe_inode_info *pipe) * is considered a noninteractive wait: */ prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); - pipe_unlock(pipe); + if (pipe->inode) + mutex_unlock(&pipe->inode->i_mutex); schedule(); finish_wait(&pipe->wait, &wait); - pipe_lock(pipe); + if (pipe->inode) + mutex_lock(&pipe->inode->i_mutex); } static int diff --git a/trunk/fs/splice.c b/trunk/fs/splice.c index 5384a90665d0..c18aa7e03e2b 100644 --- a/trunk/fs/splice.c +++ b/trunk/fs/splice.c @@ -182,7 +182,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, do_wakeup = 0; page_nr = 0; - pipe_lock(pipe); + if (pipe->inode) + mutex_lock(&pipe->inode->i_mutex); for (;;) { if (!pipe->readers) { @@ -244,13 +245,15 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, pipe->waiting_writers--; } - pipe_unlock(pipe); + if (pipe->inode) { + mutex_unlock(&pipe->inode->i_mutex); - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + if (do_wakeup) { + smp_mb(); + if (waitqueue_active(&pipe->wait)) + wake_up_interruptible(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + } } while (page_nr < spd_pages) @@ -552,8 +555,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create * a new page in the output file page cache and fill/dirty that. */ -int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, - struct splice_desc *sd) +static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, + struct splice_desc *sd) { struct file *file = sd->u.file; struct address_space *mapping = file->f_mapping; @@ -597,178 +600,108 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, out: return ret; } -EXPORT_SYMBOL(pipe_to_file); - -static void wakeup_pipe_writers(struct pipe_inode_info *pipe) -{ - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); -} /** - * splice_from_pipe_feed - feed available data from a pipe to a file + * __splice_from_pipe - splice data from a pipe to given actor * @pipe: pipe to splice from * @sd: information to @actor * @actor: handler that splices the data * * Description: - - * This function loops over the pipe and calls @actor to do the - * actual moving of a single struct pipe_buffer to the desired - * destination. It returns when there's no more buffers left in - * the pipe or if the requested number of bytes (@sd->total_len) - * have been copied. It returns a positive number (one) if the - * pipe needs to be filled with more data, zero if the required - * number of bytes have been copied and -errno on error. + * This function does little more than loop over the pipe and call + * @actor to do the actual moving of a single struct pipe_buffer to + * the desired destination. See pipe_to_file, pipe_to_sendpage, or + * pipe_to_user. * - * This, together with splice_from_pipe_{begin,end,next}, may be - * used to implement the functionality of __splice_from_pipe() when - * locking is required around copying the pipe buffers to the - * destination. */ -int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, - splice_actor *actor) +ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, + splice_actor *actor) { - int ret; + int ret, do_wakeup, err; - while (pipe->nrbufs) { - struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; - const struct pipe_buf_operations *ops = buf->ops; + ret = 0; + do_wakeup = 0; - sd->len = buf->len; - if (sd->len > sd->total_len) - sd->len = sd->total_len; + for (;;) { + if (pipe->nrbufs) { + struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; + const struct pipe_buf_operations *ops = buf->ops; - ret = actor(pipe, buf, sd); - if (ret <= 0) { - if (ret == -ENODATA) - ret = 0; - return ret; - } - buf->offset += ret; - buf->len -= ret; + sd->len = buf->len; + if (sd->len > sd->total_len) + sd->len = sd->total_len; - sd->num_spliced += ret; - sd->len -= ret; - sd->pos += ret; - sd->total_len -= ret; + err = actor(pipe, buf, sd); + if (err <= 0) { + if (!ret && err != -ENODATA) + ret = err; - if (!buf->len) { - buf->ops = NULL; - ops->release(pipe, buf); - pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); - pipe->nrbufs--; - if (pipe->inode) - sd->need_wakeup = true; - } + break; + } - if (!sd->total_len) - return 0; - } + ret += err; + buf->offset += err; + buf->len -= err; - return 1; -} -EXPORT_SYMBOL(splice_from_pipe_feed); + sd->len -= err; + sd->pos += err; + sd->total_len -= err; + if (sd->len) + continue; -/** - * splice_from_pipe_next - wait for some data to splice from - * @pipe: pipe to splice from - * @sd: information about the splice operation - * - * Description: - * This function will wait for some data and return a positive - * value (one) if pipe buffers are available. It will return zero - * or -errno if no more data needs to be spliced. - */ -int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) -{ - while (!pipe->nrbufs) { - if (!pipe->writers) - return 0; + if (!buf->len) { + buf->ops = NULL; + ops->release(pipe, buf); + pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); + pipe->nrbufs--; + if (pipe->inode) + do_wakeup = 1; + } - if (!pipe->waiting_writers && sd->num_spliced) - return 0; + if (!sd->total_len) + break; + } - if (sd->flags & SPLICE_F_NONBLOCK) - return -EAGAIN; + if (pipe->nrbufs) + continue; + if (!pipe->writers) + break; + if (!pipe->waiting_writers) { + if (ret) + break; + } - if (signal_pending(current)) - return -ERESTARTSYS; + if (sd->flags & SPLICE_F_NONBLOCK) { + if (!ret) + ret = -EAGAIN; + break; + } + + if (signal_pending(current)) { + if (!ret) + ret = -ERESTARTSYS; + break; + } - if (sd->need_wakeup) { - wakeup_pipe_writers(pipe); - sd->need_wakeup = false; + if (do_wakeup) { + smp_mb(); + if (waitqueue_active(&pipe->wait)) + wake_up_interruptible_sync(&pipe->wait); + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); + do_wakeup = 0; } pipe_wait(pipe); } - return 1; -} -EXPORT_SYMBOL(splice_from_pipe_next); - -/** - * splice_from_pipe_begin - start splicing from pipe - * @pipe: pipe to splice from - * - * Description: - * This function should be called before a loop containing - * splice_from_pipe_next() and splice_from_pipe_feed() to - * initialize the necessary fields of @sd. - */ -void splice_from_pipe_begin(struct splice_desc *sd) -{ - sd->num_spliced = 0; - sd->need_wakeup = false; -} -EXPORT_SYMBOL(splice_from_pipe_begin); - -/** - * splice_from_pipe_end - finish splicing from pipe - * @pipe: pipe to splice from - * @sd: information about the splice operation - * - * Description: - * This function will wake up pipe writers if necessary. It should - * be called after a loop containing splice_from_pipe_next() and - * splice_from_pipe_feed(). - */ -void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) -{ - if (sd->need_wakeup) - wakeup_pipe_writers(pipe); -} -EXPORT_SYMBOL(splice_from_pipe_end); - -/** - * __splice_from_pipe - splice data from a pipe to given actor - * @pipe: pipe to splice from - * @sd: information to @actor - * @actor: handler that splices the data - * - * Description: - * This function does little more than loop over the pipe and call - * @actor to do the actual moving of a single struct pipe_buffer to - * the desired destination. See pipe_to_file, pipe_to_sendpage, or - * pipe_to_user. - * - */ -ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, - splice_actor *actor) -{ - int ret; - - splice_from_pipe_begin(sd); - do { - ret = splice_from_pipe_next(pipe, sd); - if (ret > 0) - ret = splice_from_pipe_feed(pipe, sd, actor); - } while (ret > 0); - splice_from_pipe_end(pipe, sd); + if (do_wakeup) { + smp_mb(); + if (waitqueue_active(&pipe->wait)) + wake_up_interruptible(&pipe->wait); + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); + } - return sd->num_spliced ? sd->num_spliced : ret; + return ret; } EXPORT_SYMBOL(__splice_from_pipe); @@ -782,7 +715,7 @@ EXPORT_SYMBOL(__splice_from_pipe); * @actor: handler that splices the data * * Description: - * See __splice_from_pipe. This function locks the pipe inode, + * See __splice_from_pipe. This function locks the input and output inodes, * otherwise it's identical to __splice_from_pipe(). * */ @@ -791,6 +724,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, splice_actor *actor) { ssize_t ret; + struct inode *inode = out->f_mapping->host; struct splice_desc sd = { .total_len = len, .flags = flags, @@ -798,15 +732,30 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, .u.file = out, }; - pipe_lock(pipe); + /* + * The actor worker might be calling ->write_begin and + * ->write_end. Most of the time, these expect i_mutex to + * be held. Since this may result in an ABBA deadlock with + * pipe->inode, we have to order lock acquiry here. + * + * Outer lock must be inode->i_mutex, as pipe_wait() will + * release and reacquire pipe->inode->i_mutex, AND inode must + * never be a pipe. + */ + WARN_ON(S_ISFIFO(inode->i_mode)); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); + if (pipe->inode) + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); ret = __splice_from_pipe(pipe, &sd, actor); - pipe_unlock(pipe); + if (pipe->inode) + mutex_unlock(&pipe->inode->i_mutex); + mutex_unlock(&inode->i_mutex); return ret; } /** - * generic_file_splice_write - splice data from a pipe to a file + * generic_file_splice_write_nolock - generic_file_splice_write without mutexes * @pipe: pipe info * @out: file to write to * @ppos: position in @out @@ -815,12 +764,13 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, * * Description: * Will either move or copy pages (determined by @flags options) from - * the given pipe inode to the given file. + * the given pipe inode to the given file. The caller is responsible + * for acquiring i_mutex on both inodes. * */ ssize_t -generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags) +generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) { struct address_space *mapping = out->f_mapping; struct inode *inode = mapping->host; @@ -831,28 +781,76 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, .u.file = out, }; ssize_t ret; + int err; - pipe_lock(pipe); + err = file_remove_suid(out); + if (unlikely(err)) + return err; - splice_from_pipe_begin(&sd); - do { - ret = splice_from_pipe_next(pipe, &sd); - if (ret <= 0) - break; + ret = __splice_from_pipe(pipe, &sd, pipe_to_file); + if (ret > 0) { + unsigned long nr_pages; - mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); - ret = file_remove_suid(out); - if (!ret) - ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); - mutex_unlock(&inode->i_mutex); - } while (ret > 0); - splice_from_pipe_end(pipe, &sd); + *ppos += ret; + nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - pipe_unlock(pipe); + /* + * If file or inode is SYNC and we actually wrote some data, + * sync it. + */ + if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { + err = generic_osync_inode(inode, mapping, + OSYNC_METADATA|OSYNC_DATA); + + if (err) + ret = err; + } + balance_dirty_pages_ratelimited_nr(mapping, nr_pages); + } - if (sd.num_spliced) - ret = sd.num_spliced; + return ret; +} + +EXPORT_SYMBOL(generic_file_splice_write_nolock); + +/** + * generic_file_splice_write - splice data from a pipe to a file + * @pipe: pipe info + * @out: file to write to + * @ppos: position in @out + * @len: number of bytes to splice + * @flags: splice modifier flags + * + * Description: + * Will either move or copy pages (determined by @flags options) from + * the given pipe inode to the given file. + * + */ +ssize_t +generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) +{ + struct address_space *mapping = out->f_mapping; + struct inode *inode = mapping->host; + struct splice_desc sd = { + .total_len = len, + .flags = flags, + .pos = *ppos, + .u.file = out, + }; + ssize_t ret; + WARN_ON(S_ISFIFO(inode->i_mode)); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); + ret = file_remove_suid(out); + if (likely(!ret)) { + if (pipe->inode) + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); + ret = __splice_from_pipe(pipe, &sd, pipe_to_file); + if (pipe->inode) + mutex_unlock(&pipe->inode->i_mutex); + } + mutex_unlock(&inode->i_mutex); if (ret > 0) { unsigned long nr_pages; @@ -1341,7 +1339,8 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, if (!pipe) return -EBADF; - pipe_lock(pipe); + if (pipe->inode) + mutex_lock(&pipe->inode->i_mutex); error = ret = 0; while (nr_segs) { @@ -1396,7 +1395,8 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, iov++; } - pipe_unlock(pipe); + if (pipe->inode) + mutex_unlock(&pipe->inode->i_mutex); if (!ret) ret = error; @@ -1524,7 +1524,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) return 0; ret = 0; - pipe_lock(pipe); + mutex_lock(&pipe->inode->i_mutex); while (!pipe->nrbufs) { if (signal_pending(current)) { @@ -1542,7 +1542,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) pipe_wait(pipe); } - pipe_unlock(pipe); + mutex_unlock(&pipe->inode->i_mutex); return ret; } @@ -1562,7 +1562,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) return 0; ret = 0; - pipe_lock(pipe); + mutex_lock(&pipe->inode->i_mutex); while (pipe->nrbufs >= PIPE_BUFFERS) { if (!pipe->readers) { @@ -1583,7 +1583,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) pipe->waiting_writers--; } - pipe_unlock(pipe); + mutex_unlock(&pipe->inode->i_mutex); return ret; } @@ -1599,10 +1599,10 @@ static int link_pipe(struct pipe_inode_info *ipipe, /* * Potential ABBA deadlock, work around it by ordering lock - * grabbing by pipe info address. Otherwise two different processes + * grabbing by inode address. Otherwise two different processes * could deadlock (one doing tee from A -> B, the other from B -> A). */ - pipe_double_lock(ipipe, opipe); + inode_double_lock(ipipe->inode, opipe->inode); do { if (!opipe->readers) { @@ -1653,8 +1653,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) ret = -EAGAIN; - pipe_unlock(ipipe); - pipe_unlock(opipe); + inode_double_unlock(ipipe->inode, opipe->inode); /* * If we put data in the output pipe, wakeup any potential readers. diff --git a/trunk/include/linux/bio.h b/trunk/include/linux/bio.h index b89cf2d82898..b900d2c67d29 100644 --- a/trunk/include/linux/bio.h +++ b/trunk/include/linux/bio.h @@ -504,115 +504,6 @@ static inline int bio_has_data(struct bio *bio) return bio && bio->bi_io_vec != NULL; } -/* - * BIO list managment for use by remapping drivers (e.g. DM or MD). - * - * A bio_list anchors a singly-linked list of bios chained through the bi_next - * member of the bio. The bio_list also caches the last list member to allow - * fast access to the tail. - */ -struct bio_list { - struct bio *head; - struct bio *tail; -}; - -static inline int bio_list_empty(const struct bio_list *bl) -{ - return bl->head == NULL; -} - -static inline void bio_list_init(struct bio_list *bl) -{ - bl->head = bl->tail = NULL; -} - -#define bio_list_for_each(bio, bl) \ - for (bio = (bl)->head; bio; bio = bio->bi_next) - -static inline unsigned bio_list_size(const struct bio_list *bl) -{ - unsigned sz = 0; - struct bio *bio; - - bio_list_for_each(bio, bl) - sz++; - - return sz; -} - -static inline void bio_list_add(struct bio_list *bl, struct bio *bio) -{ - bio->bi_next = NULL; - - if (bl->tail) - bl->tail->bi_next = bio; - else - bl->head = bio; - - bl->tail = bio; -} - -static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) -{ - bio->bi_next = bl->head; - - bl->head = bio; - - if (!bl->tail) - bl->tail = bio; -} - -static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) -{ - if (!bl2->head) - return; - - if (bl->tail) - bl->tail->bi_next = bl2->head; - else - bl->head = bl2->head; - - bl->tail = bl2->tail; -} - -static inline void bio_list_merge_head(struct bio_list *bl, - struct bio_list *bl2) -{ - if (!bl2->head) - return; - - if (bl->head) - bl2->tail->bi_next = bl->head; - else - bl->tail = bl2->tail; - - bl->head = bl2->head; -} - -static inline struct bio *bio_list_pop(struct bio_list *bl) -{ - struct bio *bio = bl->head; - - if (bio) { - bl->head = bl->head->bi_next; - if (!bl->head) - bl->tail = NULL; - - bio->bi_next = NULL; - } - - return bio; -} - -static inline struct bio *bio_list_get(struct bio_list *bl) -{ - struct bio *bio = bl->head; - - bl->head = bl->tail = NULL; - - return bio; -} - #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) diff --git a/trunk/include/linux/fs.h b/trunk/include/linux/fs.h index e766be0d4329..562d2855cf30 100644 --- a/trunk/include/linux/fs.h +++ b/trunk/include/linux/fs.h @@ -87,60 +87,6 @@ struct inodes_stat_t { */ #define FMODE_NOCMTIME ((__force fmode_t)2048) -/* - * The below are the various read and write types that we support. Some of - * them include behavioral modifiers that send information down to the - * block layer and IO scheduler. Terminology: - * - * The block layer uses device plugging to defer IO a little bit, in - * the hope that we will see more IO very shortly. This increases - * coalescing of adjacent IO and thus reduces the number of IOs we - * have to send to the device. It also allows for better queuing, - * if the IO isn't mergeable. If the caller is going to be waiting - * for the IO, then he must ensure that the device is unplugged so - * that the IO is dispatched to the driver. - * - * All IO is handled async in Linux. This is fine for background - * writes, but for reads or writes that someone waits for completion - * on, we want to notify the block layer and IO scheduler so that they - * know about it. That allows them to make better scheduling - * decisions. So when the below references 'sync' and 'async', it - * is referencing this priority hint. - * - * With that in mind, the available types are: - * - * READ A normal read operation. Device will be plugged. - * READ_SYNC A synchronous read. Device is not plugged, caller can - * immediately wait on this read without caring about - * unplugging. - * READA Used for read-ahead operations. Lower priority, and the - * block layer could (in theory) choose to ignore this - * request if it runs into resource problems. - * WRITE A normal async write. Device will be plugged. - * SWRITE Like WRITE, but a special case for ll_rw_block() that - * tells it to lock the buffer first. Normally a buffer - * must be locked before doing IO. - * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down - * the hint that someone will be waiting on this IO - * shortly. The device must still be unplugged explicitly, - * WRITE_SYNC_PLUG does not do this as we could be - * submitting more writes before we actually wait on any - * of them. - * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device - * immediately after submission. The write equivalent - * of READ_SYNC. - * WRITE_ODIRECT Special case write for O_DIRECT only. - * SWRITE_SYNC - * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. - * See SWRITE. - * WRITE_BARRIER Like WRITE, but tells the block layer that all - * previously submitted writes must be safely on storage - * before this one is started. Also guarantees that when - * this write is complete, it itself is also safely on - * storage. Prevents reordering of writes on both sides - * of this IO. - * - */ #define RW_MASK 1 #define RWA_MASK 2 #define READ 0 @@ -156,11 +102,6 @@ struct inodes_stat_t { (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) - -/* - * These aren't really reads or writes, they pass down information about - * parts of device that are now unused by the file system. - */ #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) @@ -797,6 +738,9 @@ enum inode_i_mutex_lock_class I_MUTEX_QUOTA }; +extern void inode_double_lock(struct inode *inode1, struct inode *inode2); +extern void inode_double_unlock(struct inode *inode1, struct inode *inode2); + /* * NOTE: in a 32bit arch with a preemptable kernel and * an UP compile the i_size_read/write must be atomic @@ -2206,6 +2150,8 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); +extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *, + struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, loff_t *, size_t len, unsigned int flags); extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, diff --git a/trunk/include/linux/fsl_devices.h b/trunk/include/linux/fsl_devices.h index 43fc95d822d5..f2a78b5e8b55 100644 --- a/trunk/include/linux/fsl_devices.h +++ b/trunk/include/linux/fsl_devices.h @@ -43,6 +43,10 @@ * */ +/* Flags related to I2C device features */ +#define FSL_I2C_DEV_SEPARATE_DFSRR 0x00000001 +#define FSL_I2C_DEV_CLOCK_5200 0x00000002 + enum fsl_usb2_operating_modes { FSL_USB2_MPH_HOST, FSL_USB2_DR_HOST, diff --git a/trunk/include/linux/pipe_fs_i.h b/trunk/include/linux/pipe_fs_i.h index c8f038554e80..8e4120285f72 100644 --- a/trunk/include/linux/pipe_fs_i.h +++ b/trunk/include/linux/pipe_fs_i.h @@ -134,11 +134,6 @@ struct pipe_buf_operations { memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE -/* Pipe lock and unlock operations */ -void pipe_lock(struct pipe_inode_info *); -void pipe_unlock(struct pipe_inode_info *); -void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); - /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe); diff --git a/trunk/include/linux/splice.h b/trunk/include/linux/splice.h index 5f3faa9d15ae..528dcb93c2f2 100644 --- a/trunk/include/linux/splice.h +++ b/trunk/include/linux/splice.h @@ -36,8 +36,6 @@ struct splice_desc { void *data; /* cookie */ } u; loff_t pos; /* file position */ - size_t num_spliced; /* number of bytes already spliced */ - bool need_wakeup; /* need to wake up writer */ }; struct partial_page { @@ -68,16 +66,6 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *, splice_actor *); extern ssize_t __splice_from_pipe(struct pipe_inode_info *, struct splice_desc *, splice_actor *); -extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *, - splice_actor *); -extern int splice_from_pipe_next(struct pipe_inode_info *, - struct splice_desc *); -extern void splice_from_pipe_begin(struct splice_desc *); -extern void splice_from_pipe_end(struct pipe_inode_info *, - struct splice_desc *); -extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *, - struct splice_desc *); - extern ssize_t splice_to_pipe(struct pipe_inode_info *, struct splice_pipe_desc *); extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, diff --git a/trunk/kernel/power/swap.c b/trunk/kernel/power/swap.c index 8ba052c86d48..505f319e489c 100644 --- a/trunk/kernel/power/swap.c +++ b/trunk/kernel/power/swap.c @@ -64,6 +64,8 @@ static int submit(int rw, pgoff_t page_off, struct page *page, struct bio *bio; bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); + if (!bio) + return -ENOMEM; bio->bi_sector = page_off * (PAGE_SIZE >> 9); bio->bi_bdev = resume_bdev; bio->bi_end_io = end_swap_bio_read;