From 3d386cb8f6dc5bb863e6d1e805e12431c35bb95b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 2 Aug 2011 11:29:24 +0200 Subject: [PATCH] --- yaml --- r: 262216 b: refs/heads/master c: 50658b6602dad6dc76c226917f5d8cec0f680eab h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/Documentation/ABI/testing/pstore | 6 - .../Documentation/device-mapper/dm-crypt.txt | 21 +- .../Documentation/device-mapper/dm-flakey.txt | 48 +- trunk/Documentation/device-mapper/dm-raid.txt | 138 +- trunk/Documentation/dmaengine.txt | 234 +-- trunk/Documentation/kernel-parameters.txt | 2 - trunk/arch/ia64/Kconfig | 4 - trunk/arch/ia64/include/asm/gpio.h | 55 - trunk/drivers/acpi/apei/erst.c | 20 +- trunk/drivers/base/devtmpfs.c | 2 +- trunk/drivers/dma/TODO | 1 + trunk/drivers/dma/amba-pl08x.c | 246 ++- trunk/drivers/dma/at_hdmac.c | 4 +- trunk/drivers/dma/coh901318.c | 19 +- trunk/drivers/dma/dmaengine.c | 4 +- trunk/drivers/dma/ep93xx_dma.c | 2 +- trunk/drivers/dma/imx-sdma.c | 4 +- trunk/drivers/dma/intel_mid_dma.c | 2 + trunk/drivers/dma/ipu/ipu_idmac.c | 6 +- trunk/drivers/dma/mv_xor.c | 3 +- trunk/drivers/dma/mxs-dma.c | 13 +- trunk/drivers/dma/pch_dma.c | 127 +- trunk/drivers/dma/pl330.c | 64 +- trunk/drivers/dma/ste_dma40.c | 270 ++- trunk/drivers/dma/ste_dma40_ll.h | 3 + trunk/drivers/firmware/efivars.c | 243 +-- trunk/drivers/md/Kconfig | 5 +- trunk/drivers/md/dm-crypt.c | 62 +- trunk/drivers/md/dm-flakey.c | 270 +-- trunk/drivers/md/dm-io.c | 29 +- trunk/drivers/md/dm-ioctl.c | 89 +- trunk/drivers/md/dm-kcopyd.c | 42 +- trunk/drivers/md/dm-log-userspace-base.c | 3 +- trunk/drivers/md/dm-log.c | 32 +- trunk/drivers/md/dm-mpath.c | 147 +- trunk/drivers/md/dm-raid.c | 621 +------ trunk/drivers/md/dm-snap-persistent.c | 80 +- trunk/drivers/md/dm-snap.c | 84 +- trunk/drivers/md/dm-table.c | 155 +- trunk/drivers/md/dm.c | 75 +- trunk/drivers/md/dm.h | 2 - trunk/drivers/regulator/core.c | 190 +- trunk/drivers/regulator/dummy.c | 32 +- trunk/drivers/regulator/tps65910-regulator.c | 63 +- trunk/drivers/regulator/twl-regulator.c | 66 +- trunk/drivers/regulator/wm831x-dcdc.c | 126 +- trunk/drivers/regulator/wm831x-ldo.c | 25 +- trunk/drivers/regulator/wm8994-regulator.c | 4 +- trunk/drivers/spi/spi-pl022.c | 11 +- trunk/drivers/watchdog/Kconfig | 3 + trunk/drivers/watchdog/nv_tco.c | 8 - trunk/drivers/watchdog/shwdt.c | 2 +- trunk/fs/9p/acl.c | 6 +- trunk/fs/9p/acl.h | 4 +- trunk/fs/9p/vfs_inode_dotl.c | 6 +- trunk/fs/block_dev.c | 1 - trunk/fs/btrfs/acl.c | 10 +- trunk/fs/btrfs/inode.c | 47 +- trunk/fs/dcache.c | 69 +- trunk/fs/ext2/acl.c | 8 +- trunk/fs/ext3/acl.c | 9 +- trunk/fs/ext4/Makefile | 2 +- trunk/fs/ext4/acl.c | 9 +- trunk/fs/ext4/balloc.c | 48 - trunk/fs/ext4/block_validity.c | 21 - trunk/fs/ext4/ext4.h | 55 +- trunk/fs/ext4/extents.c | 129 +- trunk/fs/ext4/fsync.c | 26 +- trunk/fs/ext4/ialloc.c | 2 +- trunk/fs/ext4/indirect.c | 1482 --------------- trunk/fs/ext4/inode.c | 1596 ++++++++++++++++- trunk/fs/ext4/ioctl.c | 12 +- trunk/fs/ext4/mballoc.c | 230 +-- trunk/fs/ext4/mballoc.h | 1 + trunk/fs/ext4/namei.c | 21 +- trunk/fs/ext4/page-io.c | 6 +- trunk/fs/ext4/resize.c | 199 +- trunk/fs/ext4/super.c | 88 +- trunk/fs/ext4/truncate.h | 43 - trunk/fs/generic_acl.c | 13 +- trunk/fs/gfs2/acl.c | 6 +- trunk/fs/hppfs/hppfs.c | 1 - trunk/fs/inode.c | 13 +- trunk/fs/jbd2/checkpoint.c | 5 +- trunk/fs/jbd2/journal.c | 67 + trunk/fs/jffs2/acl.c | 4 +- trunk/fs/jffs2/acl.h | 2 +- trunk/fs/jffs2/fs.c | 2 +- trunk/fs/jffs2/os-linux.h | 2 +- trunk/fs/jfs/acl.c | 4 +- trunk/fs/jfs/xattr.c | 4 +- trunk/fs/namei.c | 24 +- trunk/fs/nfs/nfs3acl.c | 2 +- trunk/fs/nfs/nfs3proc.c | 6 +- trunk/fs/ocfs2/acl.c | 4 +- trunk/fs/posix_acl.c | 16 +- trunk/fs/pstore/inode.c | 12 +- trunk/fs/pstore/internal.h | 2 +- trunk/fs/pstore/platform.c | 30 +- trunk/fs/reiserfs/xattr_acl.c | 10 +- trunk/fs/xfs/linux-2.6/xfs_acl.c | 6 +- trunk/include/linux/amba/pl08x.h | 9 +- trunk/include/linux/device-mapper.h | 43 - trunk/include/linux/dm-ioctl.h | 4 +- trunk/include/linux/dm-kcopyd.h | 15 - trunk/include/linux/efi.h | 6 - trunk/include/linux/fs.h | 9 +- trunk/include/linux/jbd2.h | 6 + trunk/include/linux/nfs_fs.h | 4 +- trunk/include/linux/posix_acl.h | 8 +- trunk/include/linux/pstore.h | 9 +- trunk/include/linux/regulator/consumer.h | 3 - trunk/include/linux/regulator/driver.h | 4 +- trunk/include/trace/events/ext4.h | 87 +- trunk/include/trace/events/jbd2.h | 36 +- trunk/kernel/debug/gdbstub.c | 22 +- trunk/kernel/debug/kdb/kdb_bt.c | 5 +- trunk/kernel/debug/kdb/kdb_cmds | 4 + trunk/kernel/debug/kdb/kdb_debugger.c | 21 +- trunk/kernel/debug/kdb/kdb_io.c | 36 +- trunk/kernel/debug/kdb/kdb_main.c | 4 +- trunk/kernel/debug/kdb/kdb_private.h | 3 +- trunk/mm/oom_kill.c | 4 +- trunk/sound/core/pcm_compat.c | 2 +- trunk/sound/core/rtctimer.c | 2 +- trunk/sound/pci/asihpi/hpidspcd.c | 9 +- trunk/sound/pci/asihpi/hpioctl.c | 19 +- trunk/sound/pci/rme9652/hdspm.c | 109 +- trunk/sound/soc/txx9/txx9aclc.c | 1 - 130 files changed, 3226 insertions(+), 5392 deletions(-) delete mode 100644 trunk/arch/ia64/include/asm/gpio.h delete mode 100644 trunk/fs/ext4/indirect.c delete mode 100644 trunk/fs/ext4/truncate.h diff --git a/[refs] b/[refs] index 6407ebb2fbad..4b14df2097e4 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: f3406816bb2486fc44558bec77179cd9bcbd4450 +refs/heads/master: 50658b6602dad6dc76c226917f5d8cec0f680eab diff --git a/trunk/Documentation/ABI/testing/pstore b/trunk/Documentation/ABI/testing/pstore index ff1df4e3b059..ddf451ee2a08 100644 --- a/trunk/Documentation/ABI/testing/pstore +++ b/trunk/Documentation/ABI/testing/pstore @@ -39,9 +39,3 @@ Description: Generic interface to platform dependent persistent storage. multiple) files based on the record size of the underlying persistent storage until at least this amount is reached. Default is 10 Kbytes. - - Pstore only supports one backend at a time. If multiple - backends are available, the preferred backend may be - set by passing the pstore.backend= argument to the kernel at - boot time. - diff --git a/trunk/Documentation/device-mapper/dm-crypt.txt b/trunk/Documentation/device-mapper/dm-crypt.txt index 2c656ae43ba7..6b5c42dbbe84 100644 --- a/trunk/Documentation/device-mapper/dm-crypt.txt +++ b/trunk/Documentation/device-mapper/dm-crypt.txt @@ -4,8 +4,7 @@ dm-crypt Device-Mapper's "crypt" target provides transparent encryption of block devices using the kernel crypto API. -Parameters: \ - [<#opt_params> ] +Parameters: Encryption cipher and an optional IV generation mode. @@ -38,24 +37,6 @@ Parameters: \ Starting sector within the device where the encrypted data begins. -<#opt_params> - Number of optional parameters. If there are no optional parameters, - the optional paramaters section can be skipped or #opt_params can be zero. - Otherwise #opt_params is the number of following arguments. - - Example of optional parameters section: - 1 allow_discards - -allow_discards - Block discard requests (a.k.a. TRIM) are passed through the crypt device. - The default is to ignore discard requests. - - WARNING: Assess the specific security risks carefully before enabling this - option. For example, allowing discards on encrypted devices may lead to - the leak of information about the ciphertext device (filesystem type, - used space etc.) if the discarded blocks can be located easily on the - device later. - Example scripts =============== LUKS (Linux Unified Key Setup) is now the preferred way to set up disk diff --git a/trunk/Documentation/device-mapper/dm-flakey.txt b/trunk/Documentation/device-mapper/dm-flakey.txt index 6ff5c2327227..c8efdfd19a65 100644 --- a/trunk/Documentation/device-mapper/dm-flakey.txt +++ b/trunk/Documentation/device-mapper/dm-flakey.txt @@ -1,53 +1,17 @@ dm-flakey ========= -This target is the same as the linear target except that it exhibits -unreliable behaviour periodically. It's been found useful in simulating -failing devices for testing purposes. +This target is the same as the linear target except that it returns I/O +errors periodically. It's been found useful in simulating failing +devices for testing purposes. Starting from the time the table is loaded, the device is available for - seconds, then exhibits unreliable behaviour for seconds, and then this cycle repeats. + seconds, then returns errors for seconds, +and then this cycle repeats. -Also, consider using this in combination with the dm-delay target too, -which can delay reads and writes and/or send them to different -underlying devices. - -Table parameters ----------------- - \ - [ []] - -Mandatory parameters: +Parameters: : Full pathname to the underlying block-device, or a "major:minor" device-number. : Starting sector within the device. : Number of seconds device is available. : Number of seconds device returns errors. - -Optional feature parameters: - If no feature parameters are present, during the periods of - unreliability, all I/O returns errors. - - drop_writes: - All write I/O is silently ignored. - Read I/O is handled correctly. - - corrupt_bio_byte : - During , replace of the data of - each matching bio with . - - : The offset of the byte to replace. - Counting starts at 1, to replace the first byte. - : Either 'r' to corrupt reads or 'w' to corrupt writes. - 'w' is incompatible with drop_writes. - : The value (from 0-255) to write. - : Perform the replacement only if bio->bi_rw has all the - selected flags set. - -Examples: - corrupt_bio_byte 32 r 1 0 - - replaces the 32nd byte of READ bios with the value 1 - - corrupt_bio_byte 224 w 0 32 - - replaces the 224th byte of REQ_META (=32) bios with the value 0 diff --git a/trunk/Documentation/device-mapper/dm-raid.txt b/trunk/Documentation/device-mapper/dm-raid.txt index 2a8c11331d2d..33b6b7071ac8 100644 --- a/trunk/Documentation/device-mapper/dm-raid.txt +++ b/trunk/Documentation/device-mapper/dm-raid.txt @@ -1,108 +1,70 @@ -dm-raid -------- +Device-mapper RAID (dm-raid) is a bridge from DM to MD. It +provides a way to use device-mapper interfaces to access the MD RAID +drivers. -The device-mapper RAID (dm-raid) target provides a bridge from DM to MD. -It allows the MD RAID drivers to be accessed using a device-mapper -interface. +As with all device-mapper targets, the nominal public interfaces are the +constructor (CTR) tables and the status outputs (both STATUSTYPE_INFO +and STATUSTYPE_TABLE). The CTR table looks like the following: -The target is named "raid" and it accepts the following parameters: - - <#raid_params> \ - <#raid_devs> [.. ] - -: - raid1 RAID1 mirroring - raid4 RAID4 dedicated parity disk - raid5_la RAID5 left asymmetric - - rotating parity 0 with data continuation - raid5_ra RAID5 right asymmetric - - rotating parity N with data continuation - raid5_ls RAID5 left symmetric - - rotating parity 0 with data restart - raid5_rs RAID5 right symmetric - - rotating parity N with data restart - raid6_zr RAID6 zero restart - - rotating parity zero (left-to-right) with data restart - raid6_nr RAID6 N restart - - rotating parity N (right-to-left) with data restart - raid6_nc RAID6 N continue - - rotating parity N (right-to-left) with data continuation - - Refererence: Chapter 4 of - http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf - -<#raid_params>: The number of parameters that follow. - - consists of - Mandatory parameters: - : Chunk size in sectors. This parameter is often known as - "stripe size". It is the only mandatory parameter and - is placed first. - - followed by optional parameters (in any order): - [sync|nosync] Force or prevent RAID initialization. - - [rebuild ] Rebuild drive number idx (first drive is 0). - - [daemon_sleep ] - Interval between runs of the bitmap daemon that - clear bits. A longer interval means less bitmap I/O but - resyncing after a failure is likely to take longer. - - [min_recovery_rate ] Throttle RAID initialization - [max_recovery_rate ] Throttle RAID initialization - [write_mostly ] Drive index is write-mostly - [max_write_behind ] See '-write-behind=' (man mdadm) - [stripe_cache ] Stripe cache size (higher RAIDs only) - [region_size ] - The region_size multiplied by the number of regions is the - logical size of the array. The bitmap records the device - synchronisation state for each region. - -<#raid_devs>: The number of devices composing the array. - Each device consists of two entries. The first is the device - containing the metadata (if any); the second is the one containing the - data. - - If a drive has failed or is missing at creation time, a '-' can be - given for both the metadata and data drives for a given position. - - -Example tables --------------- -# RAID4 - 4 data drives, 1 parity (no metadata devices) +1: raid \ +2: <#raid_params> \ +3: <#raid_devs> .. + +Line 1 contains the standard first three arguments to any device-mapper +target - the start, length, and target type fields. The target type in +this case is "raid". + +Line 2 contains the arguments that define the particular raid +type/personality/level, the required arguments for that raid type, and +any optional arguments. Possible raid types include: raid4, raid5_la, +raid5_ls, raid5_rs, raid6_zr, raid6_nr, and raid6_nc. (raid1 is +planned for the future.) The list of required and optional parameters +is the same for all the current raid types. The required parameters are +positional, while the optional parameters are given as key/value pairs. +The possible parameters are as follows: + Chunk size in sectors. + [[no]sync] Force/Prevent RAID initialization + [rebuild ] Rebuild the drive indicated by the index + [daemon_sleep ] Time between bitmap daemon work to clear bits + [min_recovery_rate ] Throttle RAID initialization + [max_recovery_rate ] Throttle RAID initialization + [max_write_behind ] See '-write-behind=' (man mdadm) + [stripe_cache ] Stripe cache size for higher RAIDs + +Line 3 contains the list of devices that compose the array in +metadata/data device pairs. If the metadata is stored separately, a '-' +is given for the metadata device position. If a drive has failed or is +missing at creation time, a '-' can be given for both the metadata and +data drives for a given position. + +NB. Currently all metadata devices must be specified as '-'. + +Examples: +# RAID4 - 4 data drives, 1 parity # No metadata devices specified to hold superblock/bitmap info # Chunk size of 1MiB # (Lines separated for easy reading) - 0 1960893648 raid \ raid4 1 2048 \ 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81 -# RAID4 - 4 data drives, 1 parity (with metadata devices) +# RAID4 - 4 data drives, 1 parity (no metadata devices) # Chunk size of 1MiB, force RAID initialization, # min recovery rate at 20 kiB/sec/disk - 0 1960893648 raid \ - raid4 4 2048 sync min_recovery_rate 20 \ - 5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82 + raid4 4 2048 min_recovery_rate 20 sync\ + 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81 -'dmsetup table' displays the table used to construct the mapping. -The optional parameters are always printed in the order listed -above with "sync" or "nosync" always output ahead of the other -arguments, regardless of the order used when originally loading the table. -Arguments that can be repeated are ordered by value. +Performing a 'dmsetup table' should display the CTR table used to +construct the mapping (with possible reordering of optional +parameters). -'dmsetup status' yields information on the state and health of the -array. -The output is as follows: +Performing a 'dmsetup status' will yield information on the state and +health of the array. The output is as follows: 1: raid \ 2: <#devices> <1 health char for each dev> -Line 1 is the standard output produced by device-mapper. -Line 2 is produced by the raid target, and best explained by example: +Line 1 is standard DM output. Line 2 is best shown by example: 0 1960893648 raid raid4 5 AAAAA 2/490221568 Here we can see the RAID type is raid4, there are 5 devices - all of which are 'A'live, and the array is 2/490221568 complete with recovery. -Faulty or missing devices are marked 'D'. Devices that are out-of-sync -are marked 'a'. diff --git a/trunk/Documentation/dmaengine.txt b/trunk/Documentation/dmaengine.txt index 94b7e0f96b38..5a0cb1ef6164 100644 --- a/trunk/Documentation/dmaengine.txt +++ b/trunk/Documentation/dmaengine.txt @@ -10,181 +10,87 @@ NOTE: For DMA Engine usage in async_tx please see: Below is a guide to device driver writers on how to use the Slave-DMA API of the DMA Engine. This is applicable only for slave DMA usage only. -The slave DMA usage consists of following steps: +The slave DMA usage consists of following steps 1. Allocate a DMA slave channel 2. Set slave and controller specific parameters 3. Get a descriptor for transaction -4. Submit the transaction -5. Issue pending requests and wait for callback notification +4. Submit the transaction and wait for callback notification 1. Allocate a DMA slave channel - - Channel allocation is slightly different in the slave DMA context, - client drivers typically need a channel from a particular DMA - controller only and even in some cases a specific channel is desired. - To request a channel dma_request_channel() API is used. - - Interface: - struct dma_chan *dma_request_channel(dma_cap_mask_t mask, - dma_filter_fn filter_fn, - void *filter_param); - where dma_filter_fn is defined as: - typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); - - The 'filter_fn' parameter is optional, but highly recommended for - slave and cyclic channels as they typically need to obtain a specific - DMA channel. - - When the optional 'filter_fn' parameter is NULL, dma_request_channel() - simply returns the first channel that satisfies the capability mask. - - Otherwise, the 'filter_fn' routine will be called once for each free - channel which has a capability in 'mask'. 'filter_fn' is expected to - return 'true' when the desired DMA channel is found. - - A channel allocated via this interface is exclusive to the caller, - until dma_release_channel() is called. +Channel allocation is slightly different in the slave DMA context, client +drivers typically need a channel from a particular DMA controller only and even +in some cases a specific channel is desired. To request a channel +dma_request_channel() API is used. + +Interface: +struct dma_chan *dma_request_channel(dma_cap_mask_t mask, + dma_filter_fn filter_fn, + void *filter_param); +where dma_filter_fn is defined as: +typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); + +When the optional 'filter_fn' parameter is set to NULL dma_request_channel +simply returns the first channel that satisfies the capability mask. Otherwise, +when the mask parameter is insufficient for specifying the necessary channel, +the filter_fn routine can be used to disposition the available channels in the +system. The filter_fn routine is called once for each free channel in the +system. Upon seeing a suitable channel filter_fn returns DMA_ACK which flags +that channel to be the return value from dma_request_channel. A channel +allocated via this interface is exclusive to the caller, until +dma_release_channel() is called. 2. Set slave and controller specific parameters - - Next step is always to pass some specific information to the DMA - driver. Most of the generic information which a slave DMA can use - is in struct dma_slave_config. This allows the clients to specify - DMA direction, DMA addresses, bus widths, DMA burst lengths etc - for the peripheral. - - If some DMA controllers have more parameters to be sent then they - should try to embed struct dma_slave_config in their controller - specific structure. That gives flexibility to client to pass more - parameters, if required. - - Interface: - int dmaengine_slave_config(struct dma_chan *chan, - struct dma_slave_config *config) - - Please see the dma_slave_config structure definition in dmaengine.h - for a detailed explaination of the struct members. Please note - that the 'direction' member will be going away as it duplicates the - direction given in the prepare call. +Next step is always to pass some specific information to the DMA driver. Most of +the generic information which a slave DMA can use is in struct dma_slave_config. +It allows the clients to specify DMA direction, DMA addresses, bus widths, DMA +burst lengths etc. If some DMA controllers have more parameters to be sent then +they should try to embed struct dma_slave_config in their controller specific +structure. That gives flexibility to client to pass more parameters, if +required. + +Interface: +int dmaengine_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) 3. Get a descriptor for transaction - - For slave usage the various modes of slave transfers supported by the - DMA-engine are: - - slave_sg - DMA a list of scatter gather buffers from/to a peripheral - dma_cyclic - Perform a cyclic DMA operation from/to a peripheral till the +For slave usage the various modes of slave transfers supported by the +DMA-engine are: +slave_sg - DMA a list of scatter gather buffers from/to a peripheral +dma_cyclic - Perform a cyclic DMA operation from/to a peripheral till the operation is explicitly stopped. - - A non-NULL return of this transfer API represents a "descriptor" for - the given transaction. - - Interface: - struct dma_async_tx_descriptor *(*chan->device->device_prep_slave_sg)( - struct dma_chan *chan, struct scatterlist *sgl, - unsigned int sg_len, enum dma_data_direction direction, +The non NULL return of this transfer API represents a "descriptor" for the given +transaction. + +Interface: +struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_sg)( + struct dma_chan *chan, + struct scatterlist *dst_sg, unsigned int dst_nents, + struct scatterlist *src_sg, unsigned int src_nents, unsigned long flags); - - struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)( +struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_data_direction direction); - The peripheral driver is expected to have mapped the scatterlist for - the DMA operation prior to calling device_prep_slave_sg, and must - keep the scatterlist mapped until the DMA operation has completed. - The scatterlist must be mapped using the DMA struct device. So, - normal setup should look like this: - - nr_sg = dma_map_sg(chan->device->dev, sgl, sg_len); - if (nr_sg == 0) - /* error */ - - desc = chan->device->device_prep_slave_sg(chan, sgl, nr_sg, - direction, flags); - - Once a descriptor has been obtained, the callback information can be - added and the descriptor must then be submitted. Some DMA engine - drivers may hold a spinlock between a successful preparation and - submission so it is important that these two operations are closely - paired. - - Note: - Although the async_tx API specifies that completion callback - routines cannot submit any new operations, this is not the - case for slave/cyclic DMA. - - For slave DMA, the subsequent transaction may not be available - for submission prior to callback function being invoked, so - slave DMA callbacks are permitted to prepare and submit a new - transaction. - - For cyclic DMA, a callback function may wish to terminate the - DMA via dmaengine_terminate_all(). - - Therefore, it is important that DMA engine drivers drop any - locks before calling the callback function which may cause a - deadlock. - - Note that callbacks will always be invoked from the DMA - engines tasklet, never from interrupt context. - -4. Submit the transaction - - Once the descriptor has been prepared and the callback information - added, it must be placed on the DMA engine drivers pending queue. - - Interface: - dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc) - - This returns a cookie can be used to check the progress of DMA engine - activity via other DMA engine calls not covered in this document. - - dmaengine_submit() will not start the DMA operation, it merely adds - it to the pending queue. For this, see step 5, dma_async_issue_pending. - -5. Issue pending DMA requests and wait for callback notification - - The transactions in the pending queue can be activated by calling the - issue_pending API. If channel is idle then the first transaction in - queue is started and subsequent ones queued up. - - On completion of each DMA operation, the next in queue is started and - a tasklet triggered. The tasklet will then call the client driver - completion callback routine for notification, if set. - - Interface: - void dma_async_issue_pending(struct dma_chan *chan); - -Further APIs: - -1. int dmaengine_terminate_all(struct dma_chan *chan) - - This causes all activity for the DMA channel to be stopped, and may - discard data in the DMA FIFO which hasn't been fully transferred. - No callback functions will be called for any incomplete transfers. - -2. int dmaengine_pause(struct dma_chan *chan) - - This pauses activity on the DMA channel without data loss. - -3. int dmaengine_resume(struct dma_chan *chan) - - Resume a previously paused DMA channel. It is invalid to resume a - channel which is not currently paused. - -4. enum dma_status dma_async_is_tx_complete(struct dma_chan *chan, - dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used) - - This can be used to check the status of the channel. Please see - the documentation in include/linux/dmaengine.h for a more complete - description of this API. - - This can be used in conjunction with dma_async_is_complete() and - the cookie returned from 'descriptor->submit()' to check for - completion of a specific DMA transaction. - - Note: - Not all DMA engine drivers can return reliable information for - a running DMA channel. It is recommended that DMA engine users - pause or stop (via dmaengine_terminate_all) the channel before - using this API. +4. Submit the transaction and wait for callback notification +To schedule the transaction to be scheduled by dma device, the "descriptor" +returned in above (3) needs to be submitted. +To tell the dma driver that a transaction is ready to be serviced, the +descriptor->submit() callback needs to be invoked. This chains the descriptor to +the pending queue. +The transactions in the pending queue can be activated by calling the +issue_pending API. If channel is idle then the first transaction in queue is +started and subsequent ones queued up. +On completion of the DMA operation the next in queue is submitted and a tasklet +triggered. The tasklet would then call the client driver completion callback +routine for notification, if set. +Interface: +void dma_async_issue_pending(struct dma_chan *chan); + +============================================================================== + +Additional usage notes for dma driver writers +1/ Although DMA engine specifies that completion callback routines cannot submit +any new operations, but typically for slave DMA subsequent transaction may not +be available for submit prior to callback routine being called. This requirement +is not a requirement for DMA-slave devices. But they should take care to drop +the spin-lock they might be holding before calling the callback routine diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index 26a83743af19..4ca93898fbd3 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -2153,8 +2153,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. [HW,MOUSE] Controls Logitech smartscroll autorepeat. 0 = disabled, 1 = enabled (default). - pstore.backend= Specify the name of the pstore backend to use - pt. [PARIDE] See Documentation/blockdev/paride.txt. diff --git a/trunk/arch/ia64/Kconfig b/trunk/arch/ia64/Kconfig index 64c7ab7e7a81..137b277f7e56 100644 --- a/trunk/arch/ia64/Kconfig +++ b/trunk/arch/ia64/Kconfig @@ -27,7 +27,6 @@ config IA64 select GENERIC_PENDING_IRQ if SMP select IRQ_PER_CPU select GENERIC_IRQ_SHOW - select ARCH_WANT_OPTIONAL_GPIOLIB default y help The Itanium Processor Family is Intel's 64-bit successor to @@ -90,9 +89,6 @@ config GENERIC_TIME_VSYSCALL config HAVE_SETUP_PER_CPU_AREA def_bool y -config GENERIC_GPIO - def_bool y - config DMI bool default y diff --git a/trunk/arch/ia64/include/asm/gpio.h b/trunk/arch/ia64/include/asm/gpio.h deleted file mode 100644 index 590a20debc4e..000000000000 --- a/trunk/arch/ia64/include/asm/gpio.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Generic GPIO API implementation for IA-64. - * - * A stright copy of that for PowerPC which was: - * - * Copyright (c) 2007-2008 MontaVista Software, Inc. - * - * Author: Anton Vorontsov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef _ASM_IA64_GPIO_H -#define _ASM_IA64_GPIO_H - -#include -#include - -#ifdef CONFIG_GPIOLIB - -/* - * We don't (yet) implement inlined/rapid versions for on-chip gpios. - * Just call gpiolib. - */ -static inline int gpio_get_value(unsigned int gpio) -{ - return __gpio_get_value(gpio); -} - -static inline void gpio_set_value(unsigned int gpio, int value) -{ - __gpio_set_value(gpio, value); -} - -static inline int gpio_cansleep(unsigned int gpio) -{ - return __gpio_cansleep(gpio); -} - -static inline int gpio_to_irq(unsigned int gpio) -{ - return __gpio_to_irq(gpio); -} - -static inline int irq_to_gpio(unsigned int irq) -{ - return -EINVAL; -} - -#endif /* CONFIG_GPIOLIB */ - -#endif /* _ASM_IA64_GPIO_H */ diff --git a/trunk/drivers/acpi/apei/erst.c b/trunk/drivers/acpi/apei/erst.c index 6053f4780df9..e6cef8e1b534 100644 --- a/trunk/drivers/acpi/apei/erst.c +++ b/trunk/drivers/acpi/apei/erst.c @@ -932,11 +932,8 @@ static int erst_check_table(struct acpi_table_erst *erst_tab) static int erst_open_pstore(struct pstore_info *psi); static int erst_close_pstore(struct pstore_info *psi); static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, - struct timespec *time, struct pstore_info *psi); -static u64 erst_writer(enum pstore_type_id type, unsigned int part, - size_t size, struct pstore_info *psi); -static int erst_clearer(enum pstore_type_id type, u64 id, - struct pstore_info *psi); + struct timespec *time); +static u64 erst_writer(enum pstore_type_id type, size_t size); static struct pstore_info erst_info = { .owner = THIS_MODULE, @@ -945,7 +942,7 @@ static struct pstore_info erst_info = { .close = erst_close_pstore, .read = erst_reader, .write = erst_writer, - .erase = erst_clearer + .erase = erst_clear }; #define CPER_CREATOR_PSTORE \ @@ -986,7 +983,7 @@ static int erst_close_pstore(struct pstore_info *psi) } static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, - struct timespec *time, struct pstore_info *psi) + struct timespec *time) { int rc; ssize_t len = 0; @@ -1040,8 +1037,7 @@ static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, return (rc < 0) ? rc : (len - sizeof(*rcd)); } -static u64 erst_writer(enum pstore_type_id type, unsigned int part, - size_t size, struct pstore_info *psi) +static u64 erst_writer(enum pstore_type_id type, size_t size) { struct cper_pstore_record *rcd = (struct cper_pstore_record *) (erst_info.buf - sizeof(*rcd)); @@ -1084,12 +1080,6 @@ static u64 erst_writer(enum pstore_type_id type, unsigned int part, return rcd->hdr.record_id; } -static int erst_clearer(enum pstore_type_id type, u64 id, - struct pstore_info *psi) -{ - return erst_clear(id); -} - static int __init erst_init(void) { int rc = 0; diff --git a/trunk/drivers/base/devtmpfs.c b/trunk/drivers/base/devtmpfs.c index 33e1bed68fdd..b89fffc1d777 100644 --- a/trunk/drivers/base/devtmpfs.c +++ b/trunk/drivers/base/devtmpfs.c @@ -166,7 +166,7 @@ static int create_path(const char *nodepath) { char *path; char *s; - int err = 0; + int err; /* parent directories do not exist, create them */ path = kstrdup(nodepath, GFP_KERNEL); diff --git a/trunk/drivers/dma/TODO b/trunk/drivers/dma/TODO index 734ed0206cd5..a4af8589330c 100644 --- a/trunk/drivers/dma/TODO +++ b/trunk/drivers/dma/TODO @@ -9,5 +9,6 @@ TODO for slave dma - mxs-dma.c - dw_dmac - intel_mid_dma + - ste_dma40 4. Check other subsystems for dma drivers and merge/move to dmaengine 5. Remove dma_slave_config's dma direction. diff --git a/trunk/drivers/dma/amba-pl08x.c b/trunk/drivers/dma/amba-pl08x.c index 196a7378d332..e6d7228b1479 100644 --- a/trunk/drivers/dma/amba-pl08x.c +++ b/trunk/drivers/dma/amba-pl08x.c @@ -156,10 +156,14 @@ struct pl08x_driver_data { #define PL08X_BOUNDARY_SHIFT (10) /* 1KB 0x400 */ #define PL08X_BOUNDARY_SIZE (1 << PL08X_BOUNDARY_SHIFT) +/* Minimum period between work queue runs */ +#define PL08X_WQ_PERIODMIN 20 + /* Size (bytes) of each LLI buffer allocated for one transfer */ # define PL08X_LLI_TSFR_SIZE 0x2000 /* Maximum times we call dma_pool_alloc on this pool without freeing */ +#define PL08X_MAX_ALLOCS 0x40 #define MAX_NUM_TSFR_LLIS (PL08X_LLI_TSFR_SIZE/sizeof(struct pl08x_lli)) #define PL08X_ALIGN 8 @@ -491,10 +495,10 @@ static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth, struct pl08x_lli_build_data { struct pl08x_txd *txd; + struct pl08x_driver_data *pl08x; struct pl08x_bus_data srcbus; struct pl08x_bus_data dstbus; size_t remainder; - u32 lli_bus; }; /* @@ -547,7 +551,8 @@ static void pl08x_fill_lli_for_desc(struct pl08x_lli_build_data *bd, llis_va[num_llis].src = bd->srcbus.addr; llis_va[num_llis].dst = bd->dstbus.addr; llis_va[num_llis].lli = llis_bus + (num_llis + 1) * sizeof(struct pl08x_lli); - llis_va[num_llis].lli |= bd->lli_bus; + if (bd->pl08x->lli_buses & PL08X_AHB2) + llis_va[num_llis].lli |= PL080_LLI_LM_AHB2; if (cctl & PL080_CONTROL_SRC_INCR) bd->srcbus.addr += len; @@ -600,9 +605,9 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, cctl = txd->cctl; bd.txd = txd; + bd.pl08x = pl08x; bd.srcbus.addr = txd->src_addr; bd.dstbus.addr = txd->dst_addr; - bd.lli_bus = (pl08x->lli_buses & PL08X_AHB2) ? PL080_LLI_LM_AHB2 : 0; /* Find maximum width of the source bus */ bd.srcbus.maxwidth = @@ -617,15 +622,25 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, /* Set up the bus widths to the maximum */ bd.srcbus.buswidth = bd.srcbus.maxwidth; bd.dstbus.buswidth = bd.dstbus.maxwidth; + dev_vdbg(&pl08x->adev->dev, + "%s source bus is %d bytes wide, dest bus is %d bytes wide\n", + __func__, bd.srcbus.buswidth, bd.dstbus.buswidth); + /* * Bytes transferred == tsize * MIN(buswidths), not max(buswidths) */ max_bytes_per_lli = min(bd.srcbus.buswidth, bd.dstbus.buswidth) * PL080_CONTROL_TRANSFER_SIZE_MASK; + dev_vdbg(&pl08x->adev->dev, + "%s max bytes per lli = %zu\n", + __func__, max_bytes_per_lli); /* We need to count this down to zero */ bd.remainder = txd->len; + dev_vdbg(&pl08x->adev->dev, + "%s remainder = %zu\n", + __func__, bd.remainder); /* * Choose bus to align to @@ -634,16 +649,6 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, */ pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl); - dev_vdbg(&pl08x->adev->dev, "src=0x%08x%s/%u dst=0x%08x%s/%u len=%zu llimax=%zu\n", - bd.srcbus.addr, cctl & PL080_CONTROL_SRC_INCR ? "+" : "", - bd.srcbus.buswidth, - bd.dstbus.addr, cctl & PL080_CONTROL_DST_INCR ? "+" : "", - bd.dstbus.buswidth, - bd.remainder, max_bytes_per_lli); - dev_vdbg(&pl08x->adev->dev, "mbus=%s sbus=%s\n", - mbus == &bd.srcbus ? "src" : "dst", - sbus == &bd.srcbus ? "src" : "dst"); - if (txd->len < mbus->buswidth) { /* Less than a bus width available - send as single bytes */ while (bd.remainder) { @@ -835,14 +840,15 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, { int i; - dev_vdbg(&pl08x->adev->dev, - "%-3s %-9s %-10s %-10s %-10s %s\n", - "lli", "", "csrc", "cdst", "clli", "cctl"); for (i = 0; i < num_llis; i++) { dev_vdbg(&pl08x->adev->dev, - "%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x\n", - i, &llis_va[i], llis_va[i].src, - llis_va[i].dst, llis_va[i].lli, llis_va[i].cctl + "lli %d @%p: csrc=0x%08x, cdst=0x%08x, cctl=0x%08x, clli=0x%08x\n", + i, + &llis_va[i], + llis_va[i].src, + llis_va[i].dst, + llis_va[i].cctl, + llis_va[i].lli ); } } @@ -1048,105 +1054,64 @@ pl08x_dma_tx_status(struct dma_chan *chan, /* PrimeCell DMA extension */ struct burst_table { - u32 burstwords; + int burstwords; u32 reg; }; static const struct burst_table burst_sizes[] = { { .burstwords = 256, - .reg = PL080_BSIZE_256, + .reg = (PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT), }, { .burstwords = 128, - .reg = PL080_BSIZE_128, + .reg = (PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT), }, { .burstwords = 64, - .reg = PL080_BSIZE_64, + .reg = (PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT), }, { .burstwords = 32, - .reg = PL080_BSIZE_32, + .reg = (PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT), }, { .burstwords = 16, - .reg = PL080_BSIZE_16, + .reg = (PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT), }, { .burstwords = 8, - .reg = PL080_BSIZE_8, + .reg = (PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT), }, { .burstwords = 4, - .reg = PL080_BSIZE_4, + .reg = (PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT), }, { - .burstwords = 0, - .reg = PL080_BSIZE_1, + .burstwords = 1, + .reg = (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT), }, }; -/* - * Given the source and destination available bus masks, select which - * will be routed to each port. We try to have source and destination - * on separate ports, but always respect the allowable settings. - */ -static u32 pl08x_select_bus(u8 src, u8 dst) -{ - u32 cctl = 0; - - if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1))) - cctl |= PL080_CONTROL_DST_AHB2; - if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2))) - cctl |= PL080_CONTROL_SRC_AHB2; - - return cctl; -} - -static u32 pl08x_cctl(u32 cctl) -{ - cctl &= ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 | - PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR | - PL080_CONTROL_PROT_MASK); - - /* Access the cell in privileged mode, non-bufferable, non-cacheable */ - return cctl | PL080_CONTROL_PROT_SYS; -} - -static u32 pl08x_width(enum dma_slave_buswidth width) -{ - switch (width) { - case DMA_SLAVE_BUSWIDTH_1_BYTE: - return PL080_WIDTH_8BIT; - case DMA_SLAVE_BUSWIDTH_2_BYTES: - return PL080_WIDTH_16BIT; - case DMA_SLAVE_BUSWIDTH_4_BYTES: - return PL080_WIDTH_32BIT; - default: - return ~0; - } -} - -static u32 pl08x_burst(u32 maxburst) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(burst_sizes); i++) - if (burst_sizes[i].burstwords <= maxburst) - break; - - return burst_sizes[i].reg; -} - static int dma_set_runtime_config(struct dma_chan *chan, struct dma_slave_config *config) { struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_channel_data *cd = plchan->cd; enum dma_slave_buswidth addr_width; - u32 width, burst, maxburst; + dma_addr_t addr; + u32 maxburst; u32 cctl = 0; + int i; if (!plchan->slave) return -EINVAL; @@ -1154,9 +1119,11 @@ static int dma_set_runtime_config(struct dma_chan *chan, /* Transfer direction */ plchan->runtime_direction = config->direction; if (config->direction == DMA_TO_DEVICE) { + addr = config->dst_addr; addr_width = config->dst_addr_width; maxburst = config->dst_maxburst; } else if (config->direction == DMA_FROM_DEVICE) { + addr = config->src_addr; addr_width = config->src_addr_width; maxburst = config->src_maxburst; } else { @@ -1165,40 +1132,46 @@ static int dma_set_runtime_config(struct dma_chan *chan, return -EINVAL; } - width = pl08x_width(addr_width); - if (width == ~0) { + switch (addr_width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + cctl |= (PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT) | + (PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT); + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + cctl |= (PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT) | + (PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT); + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + cctl |= (PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT) | + (PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT); + break; + default: dev_err(&pl08x->adev->dev, "bad runtime_config: alien address width\n"); return -EINVAL; } - cctl |= width << PL080_CONTROL_SWIDTH_SHIFT; - cctl |= width << PL080_CONTROL_DWIDTH_SHIFT; - /* + * Now decide on a maxburst: * If this channel will only request single transfers, set this * down to ONE element. Also select one element if no maxburst * is specified. */ - if (plchan->cd->single) - maxburst = 1; - - burst = pl08x_burst(maxburst); - cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT; - cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT; - - if (plchan->runtime_direction == DMA_FROM_DEVICE) { - plchan->src_addr = config->src_addr; - plchan->src_cctl = pl08x_cctl(cctl) | PL080_CONTROL_DST_INCR | - pl08x_select_bus(plchan->cd->periph_buses, - pl08x->mem_buses); + if (plchan->cd->single || maxburst == 0) { + cctl |= (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT); } else { - plchan->dst_addr = config->dst_addr; - plchan->dst_cctl = pl08x_cctl(cctl) | PL080_CONTROL_SRC_INCR | - pl08x_select_bus(pl08x->mem_buses, - plchan->cd->periph_buses); + for (i = 0; i < ARRAY_SIZE(burst_sizes); i++) + if (burst_sizes[i].burstwords <= maxburst) + break; + cctl |= burst_sizes[i].reg; } + plchan->runtime_addr = addr; + + /* Modify the default channel data to fit PrimeCell request */ + cd->cctl = cctl; + dev_dbg(&pl08x->adev->dev, "configured channel %s (%s) for %s, data width %d, " "maxburst %d words, LE, CCTL=0x%08x\n", @@ -1297,6 +1270,23 @@ static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan, return 0; } +/* + * Given the source and destination available bus masks, select which + * will be routed to each port. We try to have source and destination + * on separate ports, but always respect the allowable settings. + */ +static u32 pl08x_select_bus(struct pl08x_driver_data *pl08x, u8 src, u8 dst) +{ + u32 cctl = 0; + + if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1))) + cctl |= PL080_CONTROL_DST_AHB2; + if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2))) + cctl |= PL080_CONTROL_SRC_AHB2; + + return cctl; +} + static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan, unsigned long flags) { @@ -1348,8 +1338,8 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy( txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR; if (pl08x->vd->dualmaster) - txd->cctl |= pl08x_select_bus(pl08x->mem_buses, - pl08x->mem_buses); + txd->cctl |= pl08x_select_bus(pl08x, + pl08x->mem_buses, pl08x->mem_buses); ret = pl08x_prep_channel_resources(plchan, txd); if (ret) @@ -1366,6 +1356,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg( struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); struct pl08x_driver_data *pl08x = plchan->host; struct pl08x_txd *txd; + u8 src_buses, dst_buses; int ret; /* @@ -1399,22 +1390,42 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg( txd->direction = direction; txd->len = sgl->length; + txd->cctl = plchan->cd->cctl & + ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 | + PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR | + PL080_CONTROL_PROT_MASK); + + /* Access the cell in privileged mode, non-bufferable, non-cacheable */ + txd->cctl |= PL080_CONTROL_PROT_SYS; + if (direction == DMA_TO_DEVICE) { txd->ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT; - txd->cctl = plchan->dst_cctl; + txd->cctl |= PL080_CONTROL_SRC_INCR; txd->src_addr = sgl->dma_address; - txd->dst_addr = plchan->dst_addr; + if (plchan->runtime_addr) + txd->dst_addr = plchan->runtime_addr; + else + txd->dst_addr = plchan->cd->addr; + src_buses = pl08x->mem_buses; + dst_buses = plchan->cd->periph_buses; } else if (direction == DMA_FROM_DEVICE) { txd->ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; - txd->cctl = plchan->src_cctl; - txd->src_addr = plchan->src_addr; + txd->cctl |= PL080_CONTROL_DST_INCR; + if (plchan->runtime_addr) + txd->src_addr = plchan->runtime_addr; + else + txd->src_addr = plchan->cd->addr; txd->dst_addr = sgl->dma_address; + src_buses = plchan->cd->periph_buses; + dst_buses = pl08x->mem_buses; } else { dev_err(&pl08x->adev->dev, "%s direction unsupported\n", __func__); return NULL; } + txd->cctl |= pl08x_select_bus(pl08x, src_buses, dst_buses); + ret = pl08x_prep_channel_resources(plchan, txd); if (ret) return NULL; @@ -1665,20 +1676,6 @@ static irqreturn_t pl08x_irq(int irq, void *dev) return mask ? IRQ_HANDLED : IRQ_NONE; } -static void pl08x_dma_slave_init(struct pl08x_dma_chan *chan) -{ - u32 cctl = pl08x_cctl(chan->cd->cctl); - - chan->slave = true; - chan->name = chan->cd->bus_id; - chan->src_addr = chan->cd->addr; - chan->dst_addr = chan->cd->addr; - chan->src_cctl = cctl | PL080_CONTROL_DST_INCR | - pl08x_select_bus(chan->cd->periph_buses, chan->host->mem_buses); - chan->dst_cctl = cctl | PL080_CONTROL_SRC_INCR | - pl08x_select_bus(chan->host->mem_buses, chan->cd->periph_buses); -} - /* * Initialise the DMAC memcpy/slave channels. * Make a local wrapper to hold required data @@ -1710,8 +1707,9 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, chan->state = PL08X_CHAN_IDLE; if (slave) { + chan->slave = true; + chan->name = pl08x->pd->slave_channels[i].bus_id; chan->cd = &pl08x->pd->slave_channels[i]; - pl08x_dma_slave_init(chan); } else { chan->cd = &pl08x->pd->memcpy_channel; chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); diff --git a/trunk/drivers/dma/at_hdmac.c b/trunk/drivers/dma/at_hdmac.c index 6a483eac7b3f..36144f88d718 100644 --- a/trunk/drivers/dma/at_hdmac.c +++ b/trunk/drivers/dma/at_hdmac.c @@ -1216,7 +1216,7 @@ static int __init at_dma_probe(struct platform_device *pdev) atdma->dma_common.cap_mask = pdata->cap_mask; atdma->all_chan_mask = (1 << pdata->nr_channels) - 1; - size = resource_size(io); + size = io->end - io->start + 1; if (!request_mem_region(io->start, size, pdev->dev.driver->name)) { err = -EBUSY; goto err_kfree; @@ -1362,7 +1362,7 @@ static int __exit at_dma_remove(struct platform_device *pdev) atdma->regs = NULL; io = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(io->start, resource_size(io)); + release_mem_region(io->start, io->end - io->start + 1); kfree(atdma); diff --git a/trunk/drivers/dma/coh901318.c b/trunk/drivers/dma/coh901318.c index 4234f416ef11..a92d95eac86b 100644 --- a/trunk/drivers/dma/coh901318.c +++ b/trunk/drivers/dma/coh901318.c @@ -41,8 +41,6 @@ struct coh901318_desc { struct coh901318_lli *lli; enum dma_data_direction dir; unsigned long flags; - u32 head_config; - u32 head_ctrl; }; struct coh901318_base { @@ -663,9 +661,6 @@ static struct coh901318_desc *coh901318_queue_start(struct coh901318_chan *cohc) coh901318_desc_submit(cohc, cohd); - /* Program the transaction head */ - coh901318_set_conf(cohc, cohd->head_config); - coh901318_set_ctrl(cohc, cohd->head_ctrl); coh901318_prep_linked_list(cohc, cohd->lli); /* start dma job on this channel */ @@ -1096,6 +1091,8 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, } else goto err_direction; + coh901318_set_conf(cohc, config); + /* The dma only supports transmitting packages up to * MAX_DMA_PACKET_SIZE. Calculate to total number of * dma elemts required to send the entire sg list @@ -1132,18 +1129,16 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (ret) goto err_lli_fill; + /* + * Set the default ctrl for the channel to the one from the lli, + * things may have changed due to odd buffer alignment etc. + */ + coh901318_set_ctrl(cohc, lli->control); COH_DBG(coh901318_list_print(cohc, lli)); /* Pick a descriptor to handle this transfer */ cohd = coh901318_desc_get(cohc); - cohd->head_config = config; - /* - * Set the default head ctrl for the channel to the one from the - * lli, things may have changed due to odd buffer alignment - * etc. - */ - cohd->head_ctrl = lli->control; cohd->dir = direction; cohd->flags = flags; cohd->desc.tx_submit = coh901318_tx_submit; diff --git a/trunk/drivers/dma/dmaengine.c b/trunk/drivers/dma/dmaengine.c index 26374b2a55a2..48694c34d96b 100644 --- a/trunk/drivers/dma/dmaengine.c +++ b/trunk/drivers/dma/dmaengine.c @@ -510,8 +510,8 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v dma_chan_name(chan)); list_del_rcu(&device->global_node); } else if (err) - pr_debug("dmaengine: failed to get %s: (%d)\n", - dma_chan_name(chan), err); + pr_err("dmaengine: failed to get %s: (%d)\n", + dma_chan_name(chan), err); else break; if (--device->privatecnt == 0) diff --git a/trunk/drivers/dma/ep93xx_dma.c b/trunk/drivers/dma/ep93xx_dma.c index 5d7a49bd7c26..0766c1e53b1d 100644 --- a/trunk/drivers/dma/ep93xx_dma.c +++ b/trunk/drivers/dma/ep93xx_dma.c @@ -902,7 +902,7 @@ static void ep93xx_dma_free_chan_resources(struct dma_chan *chan) * * Returns a valid DMA descriptor or %NULL in case of failure. */ -static struct dma_async_tx_descriptor * +struct dma_async_tx_descriptor * ep93xx_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) { diff --git a/trunk/drivers/dma/imx-sdma.c b/trunk/drivers/dma/imx-sdma.c index 7bd7e98548cd..1eb60ded2f0d 100644 --- a/trunk/drivers/dma/imx-sdma.c +++ b/trunk/drivers/dma/imx-sdma.c @@ -1305,10 +1305,8 @@ static int __init sdma_probe(struct platform_device *pdev) goto err_request_irq; sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL); - if (!sdma->script_addrs) { - ret = -ENOMEM; + if (!sdma->script_addrs) goto err_alloc; - } if (of_id) pdev->id_entry = of_id->data; diff --git a/trunk/drivers/dma/intel_mid_dma.c b/trunk/drivers/dma/intel_mid_dma.c index 8a3fdd87db97..f653517ef744 100644 --- a/trunk/drivers/dma/intel_mid_dma.c +++ b/trunk/drivers/dma/intel_mid_dma.c @@ -1351,6 +1351,7 @@ int dma_suspend(struct pci_dev *pci, pm_message_t state) return -EAGAIN; } device->state = SUSPENDED; + pci_set_drvdata(pci, device); pci_save_state(pci); pci_disable_device(pci); pci_set_power_state(pci, PCI_D3hot); @@ -1379,6 +1380,7 @@ int dma_resume(struct pci_dev *pci) } device->state = RUNNING; iowrite32(REG_BIT0, device->dma_base + DMA_CFG); + pci_set_drvdata(pci, device); return 0; } diff --git a/trunk/drivers/dma/ipu/ipu_idmac.c b/trunk/drivers/dma/ipu/ipu_idmac.c index 6815905a772f..fd7d2b308cf2 100644 --- a/trunk/drivers/dma/ipu/ipu_idmac.c +++ b/trunk/drivers/dma/ipu/ipu_idmac.c @@ -1706,14 +1706,16 @@ static int __init ipu_probe(struct platform_device *pdev) ipu_data.irq_fn, ipu_data.irq_err, ipu_data.irq_base); /* Remap IPU common registers */ - ipu_data.reg_ipu = ioremap(mem_ipu->start, resource_size(mem_ipu)); + ipu_data.reg_ipu = ioremap(mem_ipu->start, + mem_ipu->end - mem_ipu->start + 1); if (!ipu_data.reg_ipu) { ret = -ENOMEM; goto err_ioremap_ipu; } /* Remap Image Converter and Image DMA Controller registers */ - ipu_data.reg_ic = ioremap(mem_ic->start, resource_size(mem_ic)); + ipu_data.reg_ic = ioremap(mem_ic->start, + mem_ic->end - mem_ic->start + 1); if (!ipu_data.reg_ic) { ret = -ENOMEM; goto err_ioremap_ic; diff --git a/trunk/drivers/dma/mv_xor.c b/trunk/drivers/dma/mv_xor.c index 9a353c2216d0..06f9f27dbe7c 100644 --- a/trunk/drivers/dma/mv_xor.c +++ b/trunk/drivers/dma/mv_xor.c @@ -1304,8 +1304,7 @@ static int mv_xor_shared_probe(struct platform_device *pdev) if (!res) return -ENODEV; - msp->xor_base = devm_ioremap(&pdev->dev, res->start, - resource_size(res)); + msp->xor_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); if (!msp->xor_base) return -EBUSY; diff --git a/trunk/drivers/dma/mxs-dma.c b/trunk/drivers/dma/mxs-dma.c index be641cbd36fc..88aad4f54002 100644 --- a/trunk/drivers/dma/mxs-dma.c +++ b/trunk/drivers/dma/mxs-dma.c @@ -327,12 +327,10 @@ static int mxs_dma_alloc_chan_resources(struct dma_chan *chan) memset(mxs_chan->ccw, 0, PAGE_SIZE); - if (mxs_chan->chan_irq != NO_IRQ) { - ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler, - 0, "mxs-dma", mxs_dma); - if (ret) - goto err_irq; - } + ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler, + 0, "mxs-dma", mxs_dma); + if (ret) + goto err_irq; ret = clk_enable(mxs_dma->clk); if (ret) @@ -537,7 +535,6 @@ static int mxs_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, switch (cmd) { case DMA_TERMINATE_ALL: mxs_dma_disable_chan(mxs_chan); - mxs_dma_reset_chan(mxs_chan); break; case DMA_PAUSE: mxs_dma_pause_chan(mxs_chan); @@ -710,8 +707,6 @@ static struct platform_device_id mxs_dma_type[] = { }, { .name = "mxs-dma-apbx", .driver_data = MXS_DMA_APBX, - }, { - /* end of list */ } }; diff --git a/trunk/drivers/dma/pch_dma.c b/trunk/drivers/dma/pch_dma.c index 1ac8d4b580b7..ff5b38f9d45b 100644 --- a/trunk/drivers/dma/pch_dma.c +++ b/trunk/drivers/dma/pch_dma.c @@ -45,8 +45,7 @@ #define DMA_STATUS_MASK_BITS 0x3 #define DMA_STATUS_SHIFT_BITS 16 #define DMA_STATUS_IRQ(x) (0x1 << (x)) -#define DMA_STATUS0_ERR(x) (0x1 << ((x) + 8)) -#define DMA_STATUS2_ERR(x) (0x1 << (x)) +#define DMA_STATUS_ERR(x) (0x1 << ((x) + 8)) #define DMA_DESC_WIDTH_SHIFT_BITS 12 #define DMA_DESC_WIDTH_1_BYTE (0x3 << DMA_DESC_WIDTH_SHIFT_BITS) @@ -62,9 +61,6 @@ #define MAX_CHAN_NR 8 -#define DMA_MASK_CTL0_MODE 0x33333333 -#define DMA_MASK_CTL2_MODE 0x00003333 - static unsigned int init_nr_desc_per_channel = 64; module_param(init_nr_desc_per_channel, uint, 0644); MODULE_PARM_DESC(init_nr_desc_per_channel, @@ -137,7 +133,6 @@ struct pch_dma { #define PCH_DMA_CTL3 0x0C #define PCH_DMA_STS0 0x10 #define PCH_DMA_STS1 0x14 -#define PCH_DMA_STS2 0x18 #define dma_readl(pd, name) \ readl((pd)->membase + PCH_DMA_##name) @@ -188,19 +183,13 @@ static void pdc_enable_irq(struct dma_chan *chan, int enable) { struct pch_dma *pd = to_pd(chan->device); u32 val; - int pos; - - if (chan->chan_id < 8) - pos = chan->chan_id; - else - pos = chan->chan_id + 8; val = dma_readl(pd, CTL2); if (enable) - val |= 0x1 << pos; + val |= 0x1 << chan->chan_id; else - val &= ~(0x1 << pos); + val &= ~(0x1 << chan->chan_id); dma_writel(pd, CTL2, val); @@ -213,17 +202,10 @@ static void pdc_set_dir(struct dma_chan *chan) struct pch_dma_chan *pd_chan = to_pd_chan(chan); struct pch_dma *pd = to_pd(chan->device); u32 val; - u32 mask_mode; - u32 mask_ctl; if (chan->chan_id < 8) { val = dma_readl(pd, CTL0); - mask_mode = DMA_CTL0_MODE_MASK_BITS << - (DMA_CTL0_BITS_PER_CH * chan->chan_id); - mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS << - (DMA_CTL0_BITS_PER_CH * chan->chan_id)); - val &= mask_mode; if (pd_chan->dir == DMA_TO_DEVICE) val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + DMA_CTL0_DIR_SHIFT_BITS); @@ -231,24 +213,18 @@ static void pdc_set_dir(struct dma_chan *chan) val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + DMA_CTL0_DIR_SHIFT_BITS)); - val |= mask_ctl; dma_writel(pd, CTL0, val); } else { int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ val = dma_readl(pd, CTL3); - mask_mode = DMA_CTL0_MODE_MASK_BITS << - (DMA_CTL0_BITS_PER_CH * ch); - mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS << - (DMA_CTL0_BITS_PER_CH * ch)); - val &= mask_mode; if (pd_chan->dir == DMA_TO_DEVICE) val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch + DMA_CTL0_DIR_SHIFT_BITS); else val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch + DMA_CTL0_DIR_SHIFT_BITS)); - val |= mask_ctl; + dma_writel(pd, CTL3, val); } @@ -260,37 +236,33 @@ static void pdc_set_mode(struct dma_chan *chan, u32 mode) { struct pch_dma *pd = to_pd(chan->device); u32 val; - u32 mask_ctl; - u32 mask_dir; if (chan->chan_id < 8) { - mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS << - (DMA_CTL0_BITS_PER_CH * chan->chan_id)); - mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +\ - DMA_CTL0_DIR_SHIFT_BITS); val = dma_readl(pd, CTL0); - val &= mask_dir; + + val &= ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * chan->chan_id)); val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id); - val |= mask_ctl; + dma_writel(pd, CTL0, val); } else { int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ - mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS << - (DMA_CTL0_BITS_PER_CH * ch)); - mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * ch +\ - DMA_CTL0_DIR_SHIFT_BITS); + val = dma_readl(pd, CTL3); - val &= mask_dir; + + val &= ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * ch)); val |= mode << (DMA_CTL0_BITS_PER_CH * ch); - val |= mask_ctl; + dma_writel(pd, CTL3, val); + } dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n", chan->chan_id, val); } -static u32 pdc_get_status0(struct pch_dma_chan *pd_chan) +static u32 pdc_get_status(struct pch_dma_chan *pd_chan) { struct pch_dma *pd = to_pd(pd_chan->chan.device); u32 val; @@ -300,27 +272,9 @@ static u32 pdc_get_status0(struct pch_dma_chan *pd_chan) DMA_STATUS_BITS_PER_CH * pd_chan->chan.chan_id)); } -static u32 pdc_get_status2(struct pch_dma_chan *pd_chan) -{ - struct pch_dma *pd = to_pd(pd_chan->chan.device); - u32 val; - - val = dma_readl(pd, STS2); - return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS + - DMA_STATUS_BITS_PER_CH * (pd_chan->chan.chan_id - 8))); -} - static bool pdc_is_idle(struct pch_dma_chan *pd_chan) { - u32 sts; - - if (pd_chan->chan.chan_id < 8) - sts = pdc_get_status0(pd_chan); - else - sts = pdc_get_status2(pd_chan); - - - if (sts == DMA_STATUS_IDLE) + if (pdc_get_status(pd_chan) == DMA_STATUS_IDLE) return true; else return false; @@ -541,11 +495,11 @@ static int pd_alloc_chan_resources(struct dma_chan *chan) list_add_tail(&desc->desc_node, &tmp_list); } - spin_lock_irq(&pd_chan->lock); + spin_lock_bh(&pd_chan->lock); list_splice(&tmp_list, &pd_chan->free_list); pd_chan->descs_allocated = i; pd_chan->completed_cookie = chan->cookie = 1; - spin_unlock_irq(&pd_chan->lock); + spin_unlock_bh(&pd_chan->lock); pdc_enable_irq(chan, 1); @@ -563,10 +517,10 @@ static void pd_free_chan_resources(struct dma_chan *chan) BUG_ON(!list_empty(&pd_chan->active_list)); BUG_ON(!list_empty(&pd_chan->queue)); - spin_lock_irq(&pd_chan->lock); + spin_lock_bh(&pd_chan->lock); list_splice_init(&pd_chan->free_list, &tmp_list); pd_chan->descs_allocated = 0; - spin_unlock_irq(&pd_chan->lock); + spin_unlock_bh(&pd_chan->lock); list_for_each_entry_safe(desc, _d, &tmp_list, desc_node) pci_pool_free(pd->pool, desc, desc->txd.phys); @@ -582,10 +536,10 @@ static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie, dma_cookie_t last_completed; int ret; - spin_lock_irq(&pd_chan->lock); + spin_lock_bh(&pd_chan->lock); last_completed = pd_chan->completed_cookie; last_used = chan->cookie; - spin_unlock_irq(&pd_chan->lock); + spin_unlock_bh(&pd_chan->lock); ret = dma_async_is_complete(cookie, last_completed, last_used); @@ -700,7 +654,7 @@ static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, if (cmd != DMA_TERMINATE_ALL) return -ENXIO; - spin_lock_irq(&pd_chan->lock); + spin_lock_bh(&pd_chan->lock); pdc_set_mode(&pd_chan->chan, DMA_CTL0_DISABLE); @@ -710,7 +664,7 @@ static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, list_for_each_entry_safe(desc, _d, &list, desc_node) pdc_chain_complete(pd_chan, desc); - spin_unlock_irq(&pd_chan->lock); + spin_unlock_bh(&pd_chan->lock); return 0; } @@ -739,45 +693,30 @@ static irqreturn_t pd_irq(int irq, void *devid) struct pch_dma *pd = (struct pch_dma *)devid; struct pch_dma_chan *pd_chan; u32 sts0; - u32 sts2; int i; - int ret0 = IRQ_NONE; - int ret2 = IRQ_NONE; + int ret = IRQ_NONE; sts0 = dma_readl(pd, STS0); - sts2 = dma_readl(pd, STS2); dev_dbg(pd->dma.dev, "pd_irq sts0: %x\n", sts0); for (i = 0; i < pd->dma.chancnt; i++) { pd_chan = &pd->channels[i]; - if (i < 8) { - if (sts0 & DMA_STATUS_IRQ(i)) { - if (sts0 & DMA_STATUS0_ERR(i)) - set_bit(0, &pd_chan->err_status); - - tasklet_schedule(&pd_chan->tasklet); - ret0 = IRQ_HANDLED; - } - } else { - if (sts2 & DMA_STATUS_IRQ(i - 8)) { - if (sts2 & DMA_STATUS2_ERR(i)) - set_bit(0, &pd_chan->err_status); + if (sts0 & DMA_STATUS_IRQ(i)) { + if (sts0 & DMA_STATUS_ERR(i)) + set_bit(0, &pd_chan->err_status); - tasklet_schedule(&pd_chan->tasklet); - ret2 = IRQ_HANDLED; - } + tasklet_schedule(&pd_chan->tasklet); + ret = IRQ_HANDLED; } + } /* clear interrupt bits in status register */ - if (ret0) - dma_writel(pd, STS0, sts0); - if (ret2) - dma_writel(pd, STS2, sts2); + dma_writel(pd, STS0, sts0); - return ret0 | ret2; + return ret; } #ifdef CONFIG_PM diff --git a/trunk/drivers/dma/pl330.c b/trunk/drivers/dma/pl330.c index 00eee59e8b33..6abe1ec1f2ce 100644 --- a/trunk/drivers/dma/pl330.c +++ b/trunk/drivers/dma/pl330.c @@ -82,7 +82,7 @@ struct dma_pl330_dmac { spinlock_t pool_lock; /* Peripheral channels connected to this DMAC */ - struct dma_pl330_chan *peripherals; /* keep at end */ + struct dma_pl330_chan peripherals[0]; /* keep at end */ }; struct dma_pl330_desc { @@ -451,13 +451,8 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch) desc->txd.cookie = 0; async_tx_ack(&desc->txd); - if (peri) { - desc->req.rqtype = peri->rqtype; - desc->req.peri = peri->peri_id; - } else { - desc->req.rqtype = MEMTOMEM; - desc->req.peri = 0; - } + desc->req.rqtype = peri->rqtype; + desc->req.peri = peri->peri_id; dma_async_tx_descriptor_init(&desc->txd, &pch->chan); @@ -534,10 +529,10 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, struct pl330_info *pi; int burst; - if (unlikely(!pch || !len)) + if (unlikely(!pch || !len || !peri)) return NULL; - if (peri && peri->rqtype != MEMTOMEM) + if (peri->rqtype != MEMTOMEM) return NULL; pi = &pch->dmac->pif; @@ -582,7 +577,7 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, int i, burst_size; dma_addr_t addr; - if (unlikely(!pch || !sgl || !sg_len || !peri)) + if (unlikely(!pch || !sgl || !sg_len)) return NULL; /* Make sure the direction is consistent */ @@ -671,12 +666,17 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) struct dma_device *pd; struct resource *res; int i, ret, irq; - int num_chan; pdat = adev->dev.platform_data; + if (!pdat || !pdat->nr_valid_peri) { + dev_err(&adev->dev, "platform data missing\n"); + return -ENODEV; + } + /* Allocate a new DMAC and its Channels */ - pdmac = kzalloc(sizeof(*pdmac), GFP_KERNEL); + pdmac = kzalloc(pdat->nr_valid_peri * sizeof(*pch) + + sizeof(*pdmac), GFP_KERNEL); if (!pdmac) { dev_err(&adev->dev, "unable to allocate mem\n"); return -ENOMEM; @@ -685,7 +685,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) pi = &pdmac->pif; pi->dev = &adev->dev; pi->pl330_data = NULL; - pi->mcbufsz = pdat ? pdat->mcbuf_sz : 0; + pi->mcbufsz = pdat->mcbuf_sz; res = &adev->res; request_mem_region(res->start, resource_size(res), "dma-pl330"); @@ -717,35 +717,27 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) INIT_LIST_HEAD(&pd->channels); /* Initialize channel parameters */ - num_chan = max(pdat ? pdat->nr_valid_peri : 0, (u8)pi->pcfg.num_chan); - pdmac->peripherals = kzalloc(num_chan * sizeof(*pch), GFP_KERNEL); - - for (i = 0; i < num_chan; i++) { + for (i = 0; i < pdat->nr_valid_peri; i++) { + struct dma_pl330_peri *peri = &pdat->peri[i]; pch = &pdmac->peripherals[i]; - if (pdat) { - struct dma_pl330_peri *peri = &pdat->peri[i]; - - switch (peri->rqtype) { - case MEMTOMEM: - dma_cap_set(DMA_MEMCPY, pd->cap_mask); - break; - case MEMTODEV: - case DEVTOMEM: - dma_cap_set(DMA_SLAVE, pd->cap_mask); - break; - default: - dev_err(&adev->dev, "DEVTODEV Not Supported\n"); - continue; - } - pch->chan.private = peri; - } else { + + switch (peri->rqtype) { + case MEMTOMEM: dma_cap_set(DMA_MEMCPY, pd->cap_mask); - pch->chan.private = NULL; + break; + case MEMTODEV: + case DEVTOMEM: + dma_cap_set(DMA_SLAVE, pd->cap_mask); + break; + default: + dev_err(&adev->dev, "DEVTODEV Not Supported\n"); + continue; } INIT_LIST_HEAD(&pch->work_list); spin_lock_init(&pch->lock); pch->pl330_chid = NULL; + pch->chan.private = peri; pch->chan.device = pd; pch->chan.chan_id = i; pch->dmac = pdmac; diff --git a/trunk/drivers/dma/ste_dma40.c b/trunk/drivers/dma/ste_dma40.c index cd3a7c726bf8..29d1addbe0cf 100644 --- a/trunk/drivers/dma/ste_dma40.c +++ b/trunk/drivers/dma/ste_dma40.c @@ -14,7 +14,6 @@ #include #include #include -#include #include @@ -46,6 +45,9 @@ #define D40_ALLOC_PHY (1 << 30) #define D40_ALLOC_LOG_FREE 0 +/* Hardware designer of the block */ +#define D40_HW_DESIGNER 0x8 + /** * enum 40_command - The different commands and/or statuses. * @@ -184,8 +186,6 @@ struct d40_base; * @log_def: Default logical channel settings. * @lcla: Space for one dst src pair for logical channel transfers. * @lcpa: Pointer to dst and src lcpa settings. - * @runtime_addr: runtime configured address. - * @runtime_direction: runtime configured direction. * * This struct can either "be" a logical or a physical channel. */ @@ -200,7 +200,6 @@ struct d40_chan { struct dma_chan chan; struct tasklet_struct tasklet; struct list_head client; - struct list_head pending_queue; struct list_head active; struct list_head queue; struct stedma40_chan_cfg dma_cfg; @@ -646,20 +645,7 @@ static struct d40_desc *d40_first_active_get(struct d40_chan *d40c) static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc) { - list_add_tail(&desc->node, &d40c->pending_queue); -} - -static struct d40_desc *d40_first_pending(struct d40_chan *d40c) -{ - struct d40_desc *d; - - if (list_empty(&d40c->pending_queue)) - return NULL; - - d = list_first_entry(&d40c->pending_queue, - struct d40_desc, - node); - return d; + list_add_tail(&desc->node, &d40c->queue); } static struct d40_desc *d40_first_queued(struct d40_chan *d40c) @@ -816,11 +802,6 @@ static void d40_term_all(struct d40_chan *d40c) d40_desc_free(d40c, d40d); } - /* Release pending descriptors */ - while ((d40d = d40_first_pending(d40c))) { - d40_desc_remove(d40d); - d40_desc_free(d40c, d40d); - } d40c->pending_tx = 0; d40c->busy = false; @@ -2111,7 +2092,7 @@ dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, struct scatterlist *sg; int i; - sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_NOWAIT); + sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_KERNEL); for (i = 0; i < periods; i++) { sg_dma_address(&sg[i]) = dma_addr; sg_dma_len(&sg[i]) = period_len; @@ -2171,87 +2152,24 @@ static void d40_issue_pending(struct dma_chan *chan) spin_lock_irqsave(&d40c->lock, flags); - list_splice_tail_init(&d40c->pending_queue, &d40c->queue); - - /* Busy means that queued jobs are already being processed */ + /* Busy means that pending jobs are already being processed */ if (!d40c->busy) (void) d40_queue_start(d40c); spin_unlock_irqrestore(&d40c->lock, flags); } -static int -dma40_config_to_halfchannel(struct d40_chan *d40c, - struct stedma40_half_channel_info *info, - enum dma_slave_buswidth width, - u32 maxburst) -{ - enum stedma40_periph_data_width addr_width; - int psize; - - switch (width) { - case DMA_SLAVE_BUSWIDTH_1_BYTE: - addr_width = STEDMA40_BYTE_WIDTH; - break; - case DMA_SLAVE_BUSWIDTH_2_BYTES: - addr_width = STEDMA40_HALFWORD_WIDTH; - break; - case DMA_SLAVE_BUSWIDTH_4_BYTES: - addr_width = STEDMA40_WORD_WIDTH; - break; - case DMA_SLAVE_BUSWIDTH_8_BYTES: - addr_width = STEDMA40_DOUBLEWORD_WIDTH; - break; - default: - dev_err(d40c->base->dev, - "illegal peripheral address width " - "requested (%d)\n", - width); - return -EINVAL; - } - - if (chan_is_logical(d40c)) { - if (maxburst >= 16) - psize = STEDMA40_PSIZE_LOG_16; - else if (maxburst >= 8) - psize = STEDMA40_PSIZE_LOG_8; - else if (maxburst >= 4) - psize = STEDMA40_PSIZE_LOG_4; - else - psize = STEDMA40_PSIZE_LOG_1; - } else { - if (maxburst >= 16) - psize = STEDMA40_PSIZE_PHY_16; - else if (maxburst >= 8) - psize = STEDMA40_PSIZE_PHY_8; - else if (maxburst >= 4) - psize = STEDMA40_PSIZE_PHY_4; - else - psize = STEDMA40_PSIZE_PHY_1; - } - - info->data_width = addr_width; - info->psize = psize; - info->flow_ctrl = STEDMA40_NO_FLOW_CTRL; - - return 0; -} - /* Runtime reconfiguration extension */ -static int d40_set_runtime_config(struct dma_chan *chan, - struct dma_slave_config *config) +static void d40_set_runtime_config(struct dma_chan *chan, + struct dma_slave_config *config) { struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); struct stedma40_chan_cfg *cfg = &d40c->dma_cfg; - enum dma_slave_buswidth src_addr_width, dst_addr_width; + enum dma_slave_buswidth config_addr_width; dma_addr_t config_addr; - u32 src_maxburst, dst_maxburst; - int ret; - - src_addr_width = config->src_addr_width; - src_maxburst = config->src_maxburst; - dst_addr_width = config->dst_addr_width; - dst_maxburst = config->dst_maxburst; + u32 config_maxburst; + enum stedma40_periph_data_width addr_width; + int psize; if (config->direction == DMA_FROM_DEVICE) { dma_addr_t dev_addr_rx = @@ -2270,11 +2188,8 @@ static int d40_set_runtime_config(struct dma_chan *chan, cfg->dir); cfg->dir = STEDMA40_PERIPH_TO_MEM; - /* Configure the memory side */ - if (dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) - dst_addr_width = src_addr_width; - if (dst_maxburst == 0) - dst_maxburst = src_maxburst; + config_addr_width = config->src_addr_width; + config_maxburst = config->src_maxburst; } else if (config->direction == DMA_TO_DEVICE) { dma_addr_t dev_addr_tx = @@ -2293,39 +2208,68 @@ static int d40_set_runtime_config(struct dma_chan *chan, cfg->dir); cfg->dir = STEDMA40_MEM_TO_PERIPH; - /* Configure the memory side */ - if (src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) - src_addr_width = dst_addr_width; - if (src_maxburst == 0) - src_maxburst = dst_maxburst; + config_addr_width = config->dst_addr_width; + config_maxburst = config->dst_maxburst; + } else { dev_err(d40c->base->dev, "unrecognized channel direction %d\n", config->direction); - return -EINVAL; + return; } - if (src_maxburst * src_addr_width != dst_maxburst * dst_addr_width) { + switch (config_addr_width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + addr_width = STEDMA40_BYTE_WIDTH; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + addr_width = STEDMA40_HALFWORD_WIDTH; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + addr_width = STEDMA40_WORD_WIDTH; + break; + case DMA_SLAVE_BUSWIDTH_8_BYTES: + addr_width = STEDMA40_DOUBLEWORD_WIDTH; + break; + default: dev_err(d40c->base->dev, - "src/dst width/maxburst mismatch: %d*%d != %d*%d\n", - src_maxburst, - src_addr_width, - dst_maxburst, - dst_addr_width); - return -EINVAL; + "illegal peripheral address width " + "requested (%d)\n", + config->src_addr_width); + return; } - ret = dma40_config_to_halfchannel(d40c, &cfg->src_info, - src_addr_width, - src_maxburst); - if (ret) - return ret; + if (chan_is_logical(d40c)) { + if (config_maxburst >= 16) + psize = STEDMA40_PSIZE_LOG_16; + else if (config_maxburst >= 8) + psize = STEDMA40_PSIZE_LOG_8; + else if (config_maxburst >= 4) + psize = STEDMA40_PSIZE_LOG_4; + else + psize = STEDMA40_PSIZE_LOG_1; + } else { + if (config_maxburst >= 16) + psize = STEDMA40_PSIZE_PHY_16; + else if (config_maxburst >= 8) + psize = STEDMA40_PSIZE_PHY_8; + else if (config_maxburst >= 4) + psize = STEDMA40_PSIZE_PHY_4; + else if (config_maxburst >= 2) + psize = STEDMA40_PSIZE_PHY_2; + else + psize = STEDMA40_PSIZE_PHY_1; + } - ret = dma40_config_to_halfchannel(d40c, &cfg->dst_info, - dst_addr_width, - dst_maxburst); - if (ret) - return ret; + /* Set up all the endpoint configs */ + cfg->src_info.data_width = addr_width; + cfg->src_info.psize = psize; + cfg->src_info.big_endian = false; + cfg->src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL; + cfg->dst_info.data_width = addr_width; + cfg->dst_info.psize = psize; + cfg->dst_info.big_endian = false; + cfg->dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL; /* Fill in register values */ if (chan_is_logical(d40c)) @@ -2338,14 +2282,12 @@ static int d40_set_runtime_config(struct dma_chan *chan, d40c->runtime_addr = config_addr; d40c->runtime_direction = config->direction; dev_dbg(d40c->base->dev, - "configured channel %s for %s, data width %d/%d, " - "maxburst %d/%d elements, LE, no flow control\n", + "configured channel %s for %s, data width %d, " + "maxburst %d bytes, LE, no flow control\n", dma_chan_name(chan), (config->direction == DMA_FROM_DEVICE) ? "RX" : "TX", - src_addr_width, dst_addr_width, - src_maxburst, dst_maxburst); - - return 0; + config_addr_width, + config_maxburst); } static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, @@ -2366,8 +2308,9 @@ static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, case DMA_RESUME: return d40_resume(d40c); case DMA_SLAVE_CONFIG: - return d40_set_runtime_config(chan, + d40_set_runtime_config(chan, (struct dma_slave_config *) arg); + return 0; default: break; } @@ -2398,7 +2341,6 @@ static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma, INIT_LIST_HEAD(&d40c->active); INIT_LIST_HEAD(&d40c->queue); - INIT_LIST_HEAD(&d40c->pending_queue); INIT_LIST_HEAD(&d40c->client); tasklet_init(&d40c->tasklet, dma_tasklet, @@ -2560,6 +2502,25 @@ static int __init d40_phy_res_init(struct d40_base *base) static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) { + static const struct d40_reg_val dma_id_regs[] = { + /* Peripheral Id */ + { .reg = D40_DREG_PERIPHID0, .val = 0x0040}, + { .reg = D40_DREG_PERIPHID1, .val = 0x0000}, + /* + * D40_DREG_PERIPHID2 Depends on HW revision: + * DB8500ed has 0x0008, + * ? has 0x0018, + * DB8500v1 has 0x0028 + * DB8500v2 has 0x0038 + */ + { .reg = D40_DREG_PERIPHID3, .val = 0x0000}, + + /* PCell Id */ + { .reg = D40_DREG_CELLID0, .val = 0x000d}, + { .reg = D40_DREG_CELLID1, .val = 0x00f0}, + { .reg = D40_DREG_CELLID2, .val = 0x0005}, + { .reg = D40_DREG_CELLID3, .val = 0x00b1} + }; struct stedma40_platform_data *plat_data; struct clk *clk = NULL; void __iomem *virtbase = NULL; @@ -2568,9 +2529,8 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) int num_log_chans = 0; int num_phy_chans; int i; - u32 pid; - u32 cid; - u8 rev; + u32 val; + u32 rev; clk = clk_get(&pdev->dev, NULL); @@ -2594,32 +2554,32 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) if (!virtbase) goto failure; - /* This is just a regular AMBA PrimeCell ID actually */ - for (pid = 0, i = 0; i < 4; i++) - pid |= (readl(virtbase + resource_size(res) - 0x20 + 4 * i) - & 255) << (i * 8); - for (cid = 0, i = 0; i < 4; i++) - cid |= (readl(virtbase + resource_size(res) - 0x10 + 4 * i) - & 255) << (i * 8); - - if (cid != AMBA_CID) { - d40_err(&pdev->dev, "Unknown hardware! No PrimeCell ID\n"); - goto failure; + /* HW version check */ + for (i = 0; i < ARRAY_SIZE(dma_id_regs); i++) { + if (dma_id_regs[i].val != + readl(virtbase + dma_id_regs[i].reg)) { + d40_err(&pdev->dev, + "Unknown hardware! Expected 0x%x at 0x%x but got 0x%x\n", + dma_id_regs[i].val, + dma_id_regs[i].reg, + readl(virtbase + dma_id_regs[i].reg)); + goto failure; + } } - if (AMBA_MANF_BITS(pid) != AMBA_VENDOR_ST) { + + /* Get silicon revision and designer */ + val = readl(virtbase + D40_DREG_PERIPHID2); + + if ((val & D40_DREG_PERIPHID2_DESIGNER_MASK) != + D40_HW_DESIGNER) { d40_err(&pdev->dev, "Unknown designer! Got %x wanted %x\n", - AMBA_MANF_BITS(pid), - AMBA_VENDOR_ST); + val & D40_DREG_PERIPHID2_DESIGNER_MASK, + D40_HW_DESIGNER); goto failure; } - /* - * HW revision: - * DB8500ed has revision 0 - * ? has revision 1 - * DB8500v1 has revision 2 - * DB8500v2 has revision 3 - */ - rev = AMBA_REV_BITS(pid); + + rev = (val & D40_DREG_PERIPHID2_REV_MASK) >> + D40_DREG_PERIPHID2_REV_POS; /* The number of physical channels on this HW */ num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4; diff --git a/trunk/drivers/dma/ste_dma40_ll.h b/trunk/drivers/dma/ste_dma40_ll.h index b44c455158de..195ee65ee7f3 100644 --- a/trunk/drivers/dma/ste_dma40_ll.h +++ b/trunk/drivers/dma/ste_dma40_ll.h @@ -184,6 +184,9 @@ #define D40_DREG_PERIPHID0 0xFE0 #define D40_DREG_PERIPHID1 0xFE4 #define D40_DREG_PERIPHID2 0xFE8 +#define D40_DREG_PERIPHID2_REV_POS 4 +#define D40_DREG_PERIPHID2_REV_MASK (0xf << D40_DREG_PERIPHID2_REV_POS) +#define D40_DREG_PERIPHID2_DESIGNER_MASK 0xf #define D40_DREG_PERIPHID3 0xFEC #define D40_DREG_CELLID0 0xFF0 #define D40_DREG_CELLID1 0xFF4 diff --git a/trunk/drivers/firmware/efivars.c b/trunk/drivers/firmware/efivars.c index eacb05e6cfb3..5f29aafd4462 100644 --- a/trunk/drivers/firmware/efivars.c +++ b/trunk/drivers/firmware/efivars.c @@ -78,7 +78,6 @@ #include #include #include -#include #include @@ -90,8 +89,6 @@ MODULE_DESCRIPTION("sysfs interface to EFI Variables"); MODULE_LICENSE("GPL"); MODULE_VERSION(EFIVARS_VERSION); -#define DUMP_NAME_LEN 52 - /* * The maximum size of VariableName + Data = 1024 * Therefore, it's reasonable to save that much @@ -122,10 +119,6 @@ struct efivar_attribute { ssize_t (*store)(struct efivar_entry *entry, const char *buf, size_t count); }; -#define PSTORE_EFI_ATTRIBUTES \ - (EFI_VARIABLE_NON_VOLATILE | \ - EFI_VARIABLE_BOOTSERVICE_ACCESS | \ - EFI_VARIABLE_RUNTIME_ACCESS) #define EFIVAR_ATTR(_name, _mode, _show, _store) \ struct efivar_attribute efivar_attr_##_name = { \ @@ -148,72 +141,38 @@ efivar_create_sysfs_entry(struct efivars *efivars, /* Return the number of unicode characters in data */ static unsigned long -utf16_strnlen(efi_char16_t *s, size_t maxlength) +utf8_strlen(efi_char16_t *data, unsigned long maxlength) { unsigned long length = 0; - while (*s++ != 0 && length < maxlength) + while (*data++ != 0 && length < maxlength) length++; return length; } -static unsigned long -utf16_strlen(efi_char16_t *s) -{ - return utf16_strnlen(s, ~0UL); -} - /* * Return the number of bytes is the length of this string * Note: this is NOT the same as the number of unicode characters */ static inline unsigned long -utf16_strsize(efi_char16_t *data, unsigned long maxlength) +utf8_strsize(efi_char16_t *data, unsigned long maxlength) { - return utf16_strnlen(data, maxlength/sizeof(efi_char16_t)) * sizeof(efi_char16_t); -} - -static inline int -utf16_strncmp(const efi_char16_t *a, const efi_char16_t *b, size_t len) -{ - while (1) { - if (len == 0) - return 0; - if (*a < *b) - return -1; - if (*a > *b) - return 1; - if (*a == 0) /* implies *b == 0 */ - return 0; - a++; - b++; - len--; - } + return utf8_strlen(data, maxlength/sizeof(efi_char16_t)) * sizeof(efi_char16_t); } static efi_status_t -get_var_data_locked(struct efivars *efivars, struct efi_variable *var) +get_var_data(struct efivars *efivars, struct efi_variable *var) { efi_status_t status; + spin_lock(&efivars->lock); var->DataSize = 1024; status = efivars->ops->get_variable(var->VariableName, &var->VendorGuid, &var->Attributes, &var->DataSize, var->Data); - return status; -} - -static efi_status_t -get_var_data(struct efivars *efivars, struct efi_variable *var) -{ - efi_status_t status; - - spin_lock(&efivars->lock); - status = get_var_data_locked(efivars, var); spin_unlock(&efivars->lock); - if (status != EFI_SUCCESS) { printk(KERN_WARNING "efivars: get_variable() failed 0x%lx!\n", status); @@ -428,180 +387,12 @@ static struct kobj_type efivar_ktype = { .default_attrs = def_attrs, }; -static struct pstore_info efi_pstore_info; - static inline void efivar_unregister(struct efivar_entry *var) { kobject_put(&var->kobj); } -#ifdef CONFIG_PSTORE - -static int efi_pstore_open(struct pstore_info *psi) -{ - struct efivars *efivars = psi->data; - - spin_lock(&efivars->lock); - efivars->walk_entry = list_first_entry(&efivars->list, - struct efivar_entry, list); - return 0; -} - -static int efi_pstore_close(struct pstore_info *psi) -{ - struct efivars *efivars = psi->data; - - spin_unlock(&efivars->lock); - return 0; -} - -static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, - struct timespec *timespec, struct pstore_info *psi) -{ - efi_guid_t vendor = LINUX_EFI_CRASH_GUID; - struct efivars *efivars = psi->data; - char name[DUMP_NAME_LEN]; - int i; - unsigned int part, size; - unsigned long time; - - while (&efivars->walk_entry->list != &efivars->list) { - if (!efi_guidcmp(efivars->walk_entry->var.VendorGuid, - vendor)) { - for (i = 0; i < DUMP_NAME_LEN; i++) { - name[i] = efivars->walk_entry->var.VariableName[i]; - } - if (sscanf(name, "dump-type%u-%u-%lu", type, &part, &time) == 3) { - *id = part; - timespec->tv_sec = time; - timespec->tv_nsec = 0; - get_var_data_locked(efivars, &efivars->walk_entry->var); - size = efivars->walk_entry->var.DataSize; - memcpy(psi->buf, efivars->walk_entry->var.Data, size); - efivars->walk_entry = list_entry(efivars->walk_entry->list.next, - struct efivar_entry, list); - return size; - } - } - efivars->walk_entry = list_entry(efivars->walk_entry->list.next, - struct efivar_entry, list); - } - return 0; -} - -static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, - size_t size, struct pstore_info *psi) -{ - char name[DUMP_NAME_LEN]; - char stub_name[DUMP_NAME_LEN]; - efi_char16_t efi_name[DUMP_NAME_LEN]; - efi_guid_t vendor = LINUX_EFI_CRASH_GUID; - struct efivars *efivars = psi->data; - struct efivar_entry *entry, *found = NULL; - int i; - - sprintf(stub_name, "dump-type%u-%u-", type, part); - sprintf(name, "%s%lu", stub_name, get_seconds()); - - spin_lock(&efivars->lock); - - for (i = 0; i < DUMP_NAME_LEN; i++) - efi_name[i] = stub_name[i]; - - /* - * Clean up any entries with the same name - */ - - list_for_each_entry(entry, &efivars->list, list) { - get_var_data_locked(efivars, &entry->var); - - if (efi_guidcmp(entry->var.VendorGuid, vendor)) - continue; - if (utf16_strncmp(entry->var.VariableName, efi_name, - utf16_strlen(efi_name))) - continue; - /* Needs to be a prefix */ - if (entry->var.VariableName[utf16_strlen(efi_name)] == 0) - continue; - - /* found */ - found = entry; - efivars->ops->set_variable(entry->var.VariableName, - &entry->var.VendorGuid, - PSTORE_EFI_ATTRIBUTES, - 0, NULL); - } - - if (found) - list_del(&found->list); - - for (i = 0; i < DUMP_NAME_LEN; i++) - efi_name[i] = name[i]; - - efivars->ops->set_variable(efi_name, &vendor, PSTORE_EFI_ATTRIBUTES, - size, psi->buf); - - spin_unlock(&efivars->lock); - - if (found) - efivar_unregister(found); - - if (size) - efivar_create_sysfs_entry(efivars, - utf16_strsize(efi_name, - DUMP_NAME_LEN * 2), - efi_name, &vendor); - - return part; -}; - -static int efi_pstore_erase(enum pstore_type_id type, u64 id, - struct pstore_info *psi) -{ - efi_pstore_write(type, id, 0, psi); - - return 0; -} -#else -static int efi_pstore_open(struct pstore_info *psi) -{ - return 0; -} - -static int efi_pstore_close(struct pstore_info *psi) -{ - return 0; -} - -static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, - struct timespec *time, struct pstore_info *psi) -{ - return -1; -} - -static u64 efi_pstore_write(enum pstore_type_id type, int part, size_t size, - struct pstore_info *psi) -{ - return 0; -} - -static int efi_pstore_erase(enum pstore_type_id type, u64 id, - struct pstore_info *psi) -{ - return 0; -} -#endif - -static struct pstore_info efi_pstore_info = { - .owner = THIS_MODULE, - .name = "efi", - .open = efi_pstore_open, - .close = efi_pstore_close, - .read = efi_pstore_read, - .write = efi_pstore_write, - .erase = efi_pstore_erase, -}; static ssize_t efivar_create(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, @@ -623,8 +414,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, * Does this variable already exist? */ list_for_each_entry_safe(search_efivar, n, &efivars->list, list) { - strsize1 = utf16_strsize(search_efivar->var.VariableName, 1024); - strsize2 = utf16_strsize(new_var->VariableName, 1024); + strsize1 = utf8_strsize(search_efivar->var.VariableName, 1024); + strsize2 = utf8_strsize(new_var->VariableName, 1024); if (strsize1 == strsize2 && !memcmp(&(search_efivar->var.VariableName), new_var->VariableName, strsize1) && @@ -656,8 +447,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, /* Create the entry in sysfs. Locking is not required here */ status = efivar_create_sysfs_entry(efivars, - utf16_strsize(new_var->VariableName, - 1024), + utf8_strsize(new_var->VariableName, + 1024), new_var->VariableName, &new_var->VendorGuid); if (status) { @@ -686,8 +477,8 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj, * Does this variable already exist? */ list_for_each_entry_safe(search_efivar, n, &efivars->list, list) { - strsize1 = utf16_strsize(search_efivar->var.VariableName, 1024); - strsize2 = utf16_strsize(del_var->VariableName, 1024); + strsize1 = utf8_strsize(search_efivar->var.VariableName, 1024); + strsize2 = utf8_strsize(del_var->VariableName, 1024); if (strsize1 == strsize2 && !memcmp(&(search_efivar->var.VariableName), del_var->VariableName, strsize1) && @@ -972,16 +763,6 @@ int register_efivars(struct efivars *efivars, if (error) unregister_efivars(efivars); - efivars->efi_pstore_info = efi_pstore_info; - - efivars->efi_pstore_info.buf = kmalloc(4096, GFP_KERNEL); - if (efivars->efi_pstore_info.buf) { - efivars->efi_pstore_info.bufsize = 1024; - efivars->efi_pstore_info.data = efivars; - mutex_init(&efivars->efi_pstore_info.buf_mutex); - pstore_register(&efivars->efi_pstore_info); - } - out: kfree(variable_name); diff --git a/trunk/drivers/md/Kconfig b/trunk/drivers/md/Kconfig index f75a66e7d312..8420129fc5ee 100644 --- a/trunk/drivers/md/Kconfig +++ b/trunk/drivers/md/Kconfig @@ -241,13 +241,12 @@ config DM_MIRROR needed for live data migration tools such as 'pvmove'. config DM_RAID - tristate "RAID 1/4/5/6 target (EXPERIMENTAL)" + tristate "RAID 4/5/6 target (EXPERIMENTAL)" depends on BLK_DEV_DM && EXPERIMENTAL - select MD_RAID1 select MD_RAID456 select BLK_DEV_MD ---help--- - A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings + A dm target that supports RAID4, RAID5 and RAID6 mappings A RAID-5 set of N drives with a capacity of C MB per drive provides the capacity of C * (N - 1) MB, and protects against a failure diff --git a/trunk/drivers/md/dm-crypt.c b/trunk/drivers/md/dm-crypt.c index 49da55c1528a..bae6c4e23d3f 100644 --- a/trunk/drivers/md/dm-crypt.c +++ b/trunk/drivers/md/dm-crypt.c @@ -30,6 +30,7 @@ #include #define DM_MSG_PREFIX "crypt" +#define MESG_STR(x) x, sizeof(x) /* * context holding the current state of a multi-part conversion @@ -238,7 +239,7 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, struct dm_crypt_request *dmreq) { memset(iv, 0, cc->iv_size); - *(__le32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff); + *(u32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff); return 0; } @@ -247,7 +248,7 @@ static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv, struct dm_crypt_request *dmreq) { memset(iv, 0, cc->iv_size); - *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector); + *(u64 *)iv = cpu_to_le64(dmreq->iv_sector); return 0; } @@ -414,7 +415,7 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private; memset(iv, 0, cc->iv_size); - *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector); + *(u64 *)iv = cpu_to_le64(dmreq->iv_sector); crypto_cipher_encrypt_one(essiv_tfm, iv, iv); return 0; @@ -1574,17 +1575,11 @@ static int crypt_ctr_cipher(struct dm_target *ti, static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct crypt_config *cc; - unsigned int key_size, opt_params; + unsigned int key_size; unsigned long long tmpll; int ret; - struct dm_arg_set as; - const char *opt_string; - - static struct dm_arg _args[] = { - {0, 1, "Invalid number of feature args"}, - }; - if (argc < 5) { + if (argc != 5) { ti->error = "Not enough arguments"; return -EINVAL; } @@ -1653,30 +1648,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } cc->start = tmpll; - argv += 5; - argc -= 5; - - /* Optional parameters */ - if (argc) { - as.argc = argc; - as.argv = argv; - - ret = dm_read_arg_group(_args, &as, &opt_params, &ti->error); - if (ret) - goto bad; - - opt_string = dm_shift_arg(&as); - - if (opt_params == 1 && opt_string && - !strcasecmp(opt_string, "allow_discards")) - ti->num_discard_requests = 1; - else if (opt_params) { - ret = -EINVAL; - ti->error = "Invalid feature arguments"; - goto bad; - } - } - ret = -ENOMEM; cc->io_queue = alloc_workqueue("kcryptd_io", WQ_NON_REENTRANT| @@ -1711,16 +1682,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, struct dm_crypt_io *io; struct crypt_config *cc; - /* - * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues. - * - for REQ_FLUSH device-mapper core ensures that no IO is in-flight - * - for REQ_DISCARD caller must use flush if IO ordering matters - */ - if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) { + if (bio->bi_rw & REQ_FLUSH) { cc = ti->private; bio->bi_bdev = cc->dev->bdev; - if (bio_sectors(bio)) - bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector); return DM_MAPIO_REMAPPED; } @@ -1763,10 +1727,6 @@ static int crypt_status(struct dm_target *ti, status_type_t type, DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, cc->dev->name, (unsigned long long)cc->start); - - if (ti->num_discard_requests) - DMEMIT(" 1 allow_discards"); - break; } return 0; @@ -1810,12 +1770,12 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) if (argc < 2) goto error; - if (!strcasecmp(argv[0], "key")) { + if (!strnicmp(argv[0], MESG_STR("key"))) { if (!test_bit(DM_CRYPT_SUSPENDED, &cc->flags)) { DMWARN("not suspended during key manipulation."); return -EINVAL; } - if (argc == 3 && !strcasecmp(argv[1], "set")) { + if (argc == 3 && !strnicmp(argv[1], MESG_STR("set"))) { ret = crypt_set_key(cc, argv[2]); if (ret) return ret; @@ -1823,7 +1783,7 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) ret = cc->iv_gen_ops->init(cc); return ret; } - if (argc == 2 && !strcasecmp(argv[1], "wipe")) { + if (argc == 2 && !strnicmp(argv[1], MESG_STR("wipe"))) { if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) { ret = cc->iv_gen_ops->wipe(cc); if (ret) @@ -1863,7 +1823,7 @@ static int crypt_iterate_devices(struct dm_target *ti, static struct target_type crypt_target = { .name = "crypt", - .version = {1, 11, 0}, + .version = {1, 10, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/trunk/drivers/md/dm-flakey.c b/trunk/drivers/md/dm-flakey.c index 89f73ca22cfa..ea790623c30b 100644 --- a/trunk/drivers/md/dm-flakey.c +++ b/trunk/drivers/md/dm-flakey.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2003 Sistina Software (UK) Limited. - * Copyright (C) 2004, 2010-2011 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004, 2010 Red Hat, Inc. All rights reserved. * * This file is released under the GPL. */ @@ -15,9 +15,6 @@ #define DM_MSG_PREFIX "flakey" -#define all_corrupt_bio_flags_match(bio, fc) \ - (((bio)->bi_rw & (fc)->corrupt_bio_flags) == (fc)->corrupt_bio_flags) - /* * Flakey: Used for testing only, simulates intermittent, * catastrophic device failure. @@ -28,189 +25,60 @@ struct flakey_c { sector_t start; unsigned up_interval; unsigned down_interval; - unsigned long flags; - unsigned corrupt_bio_byte; - unsigned corrupt_bio_rw; - unsigned corrupt_bio_value; - unsigned corrupt_bio_flags; -}; - -enum feature_flag_bits { - DROP_WRITES }; -static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, - struct dm_target *ti) -{ - int r; - unsigned argc; - const char *arg_name; - - static struct dm_arg _args[] = { - {0, 6, "Invalid number of feature args"}, - {1, UINT_MAX, "Invalid corrupt bio byte"}, - {0, 255, "Invalid corrupt value to write into bio byte (0-255)"}, - {0, UINT_MAX, "Invalid corrupt bio flags mask"}, - }; - - /* No feature arguments supplied. */ - if (!as->argc) - return 0; - - r = dm_read_arg_group(_args, as, &argc, &ti->error); - if (r) - return r; - - while (argc) { - arg_name = dm_shift_arg(as); - argc--; - - /* - * drop_writes - */ - if (!strcasecmp(arg_name, "drop_writes")) { - if (test_and_set_bit(DROP_WRITES, &fc->flags)) { - ti->error = "Feature drop_writes duplicated"; - return -EINVAL; - } - - continue; - } - - /* - * corrupt_bio_byte - */ - if (!strcasecmp(arg_name, "corrupt_bio_byte")) { - if (!argc) - ti->error = "Feature corrupt_bio_byte requires parameters"; - - r = dm_read_arg(_args + 1, as, &fc->corrupt_bio_byte, &ti->error); - if (r) - return r; - argc--; - - /* - * Direction r or w? - */ - arg_name = dm_shift_arg(as); - if (!strcasecmp(arg_name, "w")) - fc->corrupt_bio_rw = WRITE; - else if (!strcasecmp(arg_name, "r")) - fc->corrupt_bio_rw = READ; - else { - ti->error = "Invalid corrupt bio direction (r or w)"; - return -EINVAL; - } - argc--; - - /* - * Value of byte (0-255) to write in place of correct one. - */ - r = dm_read_arg(_args + 2, as, &fc->corrupt_bio_value, &ti->error); - if (r) - return r; - argc--; - - /* - * Only corrupt bios with these flags set. - */ - r = dm_read_arg(_args + 3, as, &fc->corrupt_bio_flags, &ti->error); - if (r) - return r; - argc--; - - continue; - } - - ti->error = "Unrecognised flakey feature requested"; - return -EINVAL; - } - - if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) { - ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set"; - return -EINVAL; - } - - return 0; -} - /* - * Construct a flakey mapping: - * [<#feature args> []*] - * - * Feature args: - * [drop_writes] - * [corrupt_bio_byte ] - * - * Nth_byte starts from 1 for the first byte. - * Direction is r for READ or w for WRITE. - * bio_flags is ignored if 0. + * Construct a flakey mapping: */ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) { - static struct dm_arg _args[] = { - {0, UINT_MAX, "Invalid up interval"}, - {0, UINT_MAX, "Invalid down interval"}, - }; - - int r; struct flakey_c *fc; - unsigned long long tmpll; - struct dm_arg_set as; - const char *devname; + unsigned long long tmp; - as.argc = argc; - as.argv = argv; - - if (argc < 4) { - ti->error = "Invalid argument count"; + if (argc != 4) { + ti->error = "dm-flakey: Invalid argument count"; return -EINVAL; } - fc = kzalloc(sizeof(*fc), GFP_KERNEL); + fc = kmalloc(sizeof(*fc), GFP_KERNEL); if (!fc) { - ti->error = "Cannot allocate linear context"; + ti->error = "dm-flakey: Cannot allocate linear context"; return -ENOMEM; } fc->start_time = jiffies; - devname = dm_shift_arg(&as); - - if (sscanf(dm_shift_arg(&as), "%llu", &tmpll) != 1) { - ti->error = "Invalid device sector"; + if (sscanf(argv[1], "%llu", &tmp) != 1) { + ti->error = "dm-flakey: Invalid device sector"; goto bad; } - fc->start = tmpll; + fc->start = tmp; - r = dm_read_arg(_args, &as, &fc->up_interval, &ti->error); - if (r) + if (sscanf(argv[2], "%u", &fc->up_interval) != 1) { + ti->error = "dm-flakey: Invalid up interval"; goto bad; + } - r = dm_read_arg(_args, &as, &fc->down_interval, &ti->error); - if (r) + if (sscanf(argv[3], "%u", &fc->down_interval) != 1) { + ti->error = "dm-flakey: Invalid down interval"; goto bad; + } if (!(fc->up_interval + fc->down_interval)) { - ti->error = "Total (up + down) interval is zero"; + ti->error = "dm-flakey: Total (up + down) interval is zero"; goto bad; } if (fc->up_interval + fc->down_interval < fc->up_interval) { - ti->error = "Interval overflow"; + ti->error = "dm-flakey: Interval overflow"; goto bad; } - r = parse_features(&as, fc, ti); - if (r) - goto bad; - - if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &fc->dev)) { - ti->error = "Device lookup failed"; + if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &fc->dev)) { + ti->error = "dm-flakey: Device lookup failed"; goto bad; } ti->num_flush_requests = 1; - ti->num_discard_requests = 1; ti->private = fc; return 0; @@ -231,7 +99,7 @@ static sector_t flakey_map_sector(struct dm_target *ti, sector_t bi_sector) { struct flakey_c *fc = ti->private; - return fc->start + dm_target_offset(ti, bi_sector); + return fc->start + (bi_sector - ti->begin); } static void flakey_map_bio(struct dm_target *ti, struct bio *bio) @@ -243,25 +111,6 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio) bio->bi_sector = flakey_map_sector(ti, bio->bi_sector); } -static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) -{ - unsigned bio_bytes = bio_cur_bytes(bio); - char *data = bio_data(bio); - - /* - * Overwrite the Nth byte of the data returned. - */ - if (data && bio_bytes >= fc->corrupt_bio_byte) { - data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value; - - DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " - "(rw=%c bi_rw=%lu bi_sector=%llu cur_bytes=%u)\n", - bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, - (bio_data_dir(bio) == WRITE) ? 'w' : 'r', - bio->bi_rw, (unsigned long long)bio->bi_sector, bio_bytes); - } -} - static int flakey_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { @@ -270,71 +119,18 @@ static int flakey_map(struct dm_target *ti, struct bio *bio, /* Are we alive ? */ elapsed = (jiffies - fc->start_time) / HZ; - if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) { - /* - * Flag this bio as submitted while down. - */ - map_context->ll = 1; - - /* - * Map reads as normal. - */ - if (bio_data_dir(bio) == READ) - goto map_bio; - - /* - * Drop writes? - */ - if (test_bit(DROP_WRITES, &fc->flags)) { - bio_endio(bio, 0); - return DM_MAPIO_SUBMITTED; - } - - /* - * Corrupt matching writes. - */ - if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == WRITE)) { - if (all_corrupt_bio_flags_match(bio, fc)) - corrupt_bio_data(bio, fc); - goto map_bio; - } - - /* - * By default, error all I/O. - */ + if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) return -EIO; - } -map_bio: flakey_map_bio(ti, bio); return DM_MAPIO_REMAPPED; } -static int flakey_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) -{ - struct flakey_c *fc = ti->private; - unsigned bio_submitted_while_down = map_context->ll; - - /* - * Corrupt successful READs while in down state. - * If flags were specified, only corrupt those that match. - */ - if (!error && bio_submitted_while_down && - (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) && - all_corrupt_bio_flags_match(bio, fc)) - corrupt_bio_data(bio, fc); - - return error; -} - static int flakey_status(struct dm_target *ti, status_type_t type, char *result, unsigned int maxlen) { - unsigned sz = 0; struct flakey_c *fc = ti->private; - unsigned drop_writes; switch (type) { case STATUSTYPE_INFO: @@ -342,22 +138,9 @@ static int flakey_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: - DMEMIT("%s %llu %u %u ", fc->dev->name, - (unsigned long long)fc->start, fc->up_interval, - fc->down_interval); - - drop_writes = test_bit(DROP_WRITES, &fc->flags); - DMEMIT("%u ", drop_writes + (fc->corrupt_bio_byte > 0) * 5); - - if (drop_writes) - DMEMIT("drop_writes "); - - if (fc->corrupt_bio_byte) - DMEMIT("corrupt_bio_byte %u %c %u %u ", - fc->corrupt_bio_byte, - (fc->corrupt_bio_rw == WRITE) ? 'w' : 'r', - fc->corrupt_bio_value, fc->corrupt_bio_flags); - + snprintf(result, maxlen, "%s %llu %u %u", fc->dev->name, + (unsigned long long)fc->start, fc->up_interval, + fc->down_interval); break; } return 0; @@ -394,12 +177,11 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", - .version = {1, 2, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, .map = flakey_map, - .end_io = flakey_end_io, .status = flakey_status, .ioctl = flakey_ioctl, .merge = flakey_merge, diff --git a/trunk/drivers/md/dm-io.c b/trunk/drivers/md/dm-io.c index ad2eba40e319..2067288f61f9 100644 --- a/trunk/drivers/md/dm-io.c +++ b/trunk/drivers/md/dm-io.c @@ -38,8 +38,6 @@ struct io { struct dm_io_client *client; io_notify_fn callback; void *context; - void *vma_invalidate_address; - unsigned long vma_invalidate_size; } __attribute__((aligned(DM_IO_MAX_REGIONS))); static struct kmem_cache *_dm_io_cache; @@ -118,10 +116,6 @@ static void dec_count(struct io *io, unsigned int region, int error) set_bit(region, &io->error_bits); if (atomic_dec_and_test(&io->count)) { - if (io->vma_invalidate_size) - invalidate_kernel_vmap_range(io->vma_invalidate_address, - io->vma_invalidate_size); - if (io->sleeper) wake_up_process(io->sleeper); @@ -165,9 +159,6 @@ struct dpages { unsigned context_u; void *context_ptr; - - void *vma_invalidate_address; - unsigned long vma_invalidate_size; }; /* @@ -386,9 +377,6 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, io->sleeper = current; io->client = client; - io->vma_invalidate_address = dp->vma_invalidate_address; - io->vma_invalidate_size = dp->vma_invalidate_size; - dispatch_io(rw, num_regions, where, dp, io, 1); while (1) { @@ -427,21 +415,13 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, io->callback = fn; io->context = context; - io->vma_invalidate_address = dp->vma_invalidate_address; - io->vma_invalidate_size = dp->vma_invalidate_size; - dispatch_io(rw, num_regions, where, dp, io, 0); return 0; } -static int dp_init(struct dm_io_request *io_req, struct dpages *dp, - unsigned long size) +static int dp_init(struct dm_io_request *io_req, struct dpages *dp) { /* Set up dpages based on memory type */ - - dp->vma_invalidate_address = NULL; - dp->vma_invalidate_size = 0; - switch (io_req->mem.type) { case DM_IO_PAGE_LIST: list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); @@ -452,11 +432,6 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp, break; case DM_IO_VMA: - flush_kernel_vmap_range(io_req->mem.ptr.vma, size); - if ((io_req->bi_rw & RW_MASK) == READ) { - dp->vma_invalidate_address = io_req->mem.ptr.vma; - dp->vma_invalidate_size = size; - } vm_dp_init(dp, io_req->mem.ptr.vma); break; @@ -485,7 +460,7 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions, int r; struct dpages dp; - r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT); + r = dp_init(io_req, &dp); if (r) return r; diff --git a/trunk/drivers/md/dm-ioctl.c b/trunk/drivers/md/dm-ioctl.c index 2e9a3ca37bdd..4cacdad2270a 100644 --- a/trunk/drivers/md/dm-ioctl.c +++ b/trunk/drivers/md/dm-ioctl.c @@ -128,24 +128,6 @@ static struct hash_cell *__get_uuid_cell(const char *str) return NULL; } -static struct hash_cell *__get_dev_cell(uint64_t dev) -{ - struct mapped_device *md; - struct hash_cell *hc; - - md = dm_get_md(huge_decode_dev(dev)); - if (!md) - return NULL; - - hc = dm_get_mdptr(md); - if (!hc) { - dm_put(md); - return NULL; - } - - return hc; -} - /*----------------------------------------------------------------- * Inserting, removing and renaming a device. *---------------------------------------------------------------*/ @@ -736,45 +718,25 @@ static int dev_create(struct dm_ioctl *param, size_t param_size) */ static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param) { - struct hash_cell *hc = NULL; + struct mapped_device *md; + void *mdptr = NULL; - if (*param->uuid) { - if (*param->name || param->dev) - return NULL; + if (*param->uuid) + return __get_uuid_cell(param->uuid); - hc = __get_uuid_cell(param->uuid); - if (!hc) - return NULL; - } else if (*param->name) { - if (param->dev) - return NULL; + if (*param->name) + return __get_name_cell(param->name); - hc = __get_name_cell(param->name); - if (!hc) - return NULL; - } else if (param->dev) { - hc = __get_dev_cell(param->dev); - if (!hc) - return NULL; - } else - return NULL; - - /* - * Sneakily write in both the name and the uuid - * while we have the cell. - */ - strlcpy(param->name, hc->name, sizeof(param->name)); - if (hc->uuid) - strlcpy(param->uuid, hc->uuid, sizeof(param->uuid)); - else - param->uuid[0] = '\0'; + md = dm_get_md(huge_decode_dev(param->dev)); + if (!md) + goto out; - if (hc->new_map) - param->flags |= DM_INACTIVE_PRESENT_FLAG; - else - param->flags &= ~DM_INACTIVE_PRESENT_FLAG; + mdptr = dm_get_mdptr(md); + if (!mdptr) + dm_put(md); - return hc; +out: + return mdptr; } static struct mapped_device *find_device(struct dm_ioctl *param) @@ -784,8 +746,24 @@ static struct mapped_device *find_device(struct dm_ioctl *param) down_read(&_hash_lock); hc = __find_device_hash_cell(param); - if (hc) + if (hc) { md = hc->md; + + /* + * Sneakily write in both the name and the uuid + * while we have the cell. + */ + strlcpy(param->name, hc->name, sizeof(param->name)); + if (hc->uuid) + strlcpy(param->uuid, hc->uuid, sizeof(param->uuid)); + else + param->uuid[0] = '\0'; + + if (hc->new_map) + param->flags |= DM_INACTIVE_PRESENT_FLAG; + else + param->flags &= ~DM_INACTIVE_PRESENT_FLAG; + } up_read(&_hash_lock); return md; @@ -1424,11 +1402,6 @@ static int target_message(struct dm_ioctl *param, size_t param_size) goto out; } - if (!argc) { - DMWARN("Empty message received."); - goto out; - } - table = dm_get_live_table(md); if (!table) goto out_argv; diff --git a/trunk/drivers/md/dm-kcopyd.c b/trunk/drivers/md/dm-kcopyd.c index f82147029636..320401dec104 100644 --- a/trunk/drivers/md/dm-kcopyd.c +++ b/trunk/drivers/md/dm-kcopyd.c @@ -224,6 +224,8 @@ struct kcopyd_job { unsigned int num_dests; struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS]; + sector_t offset; + unsigned int nr_pages; struct page_list *pages; /* @@ -378,7 +380,7 @@ static int run_io_job(struct kcopyd_job *job) .bi_rw = job->rw, .mem.type = DM_IO_PAGE_LIST, .mem.ptr.pl = job->pages, - .mem.offset = 0, + .mem.offset = job->offset, .notify.fn = complete_io, .notify.context = job, .client = job->kc->io_client, @@ -395,9 +397,10 @@ static int run_io_job(struct kcopyd_job *job) static int run_pages_job(struct kcopyd_job *job) { int r; - unsigned nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9); - r = kcopyd_get_pages(job->kc, nr_pages, &job->pages); + job->nr_pages = dm_div_up(job->dests[0].count + job->offset, + PAGE_SIZE >> 9); + r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages); if (!r) { /* this job is ready for io */ push(&job->kc->io_jobs, job); @@ -599,6 +602,8 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->num_dests = num_dests; memcpy(&job->dests, dests, sizeof(*dests) * num_dests); + job->offset = 0; + job->nr_pages = 0; job->pages = NULL; job->fn = fn; @@ -617,37 +622,6 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, } EXPORT_SYMBOL(dm_kcopyd_copy); -void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, - dm_kcopyd_notify_fn fn, void *context) -{ - struct kcopyd_job *job; - - job = mempool_alloc(kc->job_pool, GFP_NOIO); - - memset(job, 0, sizeof(struct kcopyd_job)); - job->kc = kc; - job->fn = fn; - job->context = context; - - atomic_inc(&kc->nr_jobs); - - return job; -} -EXPORT_SYMBOL(dm_kcopyd_prepare_callback); - -void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err) -{ - struct kcopyd_job *job = j; - struct dm_kcopyd_client *kc = job->kc; - - job->read_err = read_err; - job->write_err = write_err; - - push(&kc->complete_jobs, job); - wake(kc); -} -EXPORT_SYMBOL(dm_kcopyd_do_callback); - /* * Cancels a kcopyd job, eg. someone might be deactivating a * mirror. diff --git a/trunk/drivers/md/dm-log-userspace-base.c b/trunk/drivers/md/dm-log-userspace-base.c index 1021c8986011..aa2e0c374ab3 100644 --- a/trunk/drivers/md/dm-log-userspace-base.c +++ b/trunk/drivers/md/dm-log-userspace-base.c @@ -394,7 +394,8 @@ static int flush_by_group(struct log_c *lc, struct list_head *flush_list) group[count] = fe->region; count++; - list_move(&fe->list, &tmp_list); + list_del(&fe->list); + list_add(&fe->list, &tmp_list); type = fe->type; if (count >= MAX_FLUSH_GROUP_COUNT) diff --git a/trunk/drivers/md/dm-log.c b/trunk/drivers/md/dm-log.c index 3b52bb72bd1f..948e3f4925bf 100644 --- a/trunk/drivers/md/dm-log.c +++ b/trunk/drivers/md/dm-log.c @@ -197,21 +197,15 @@ EXPORT_SYMBOL(dm_dirty_log_destroy); #define MIRROR_DISK_VERSION 2 #define LOG_OFFSET 2 -struct log_header_disk { - __le32 magic; +struct log_header { + uint32_t magic; /* * Simple, incrementing version. no backward * compatibility. */ - __le32 version; - __le64 nr_regions; -} __packed; - -struct log_header_core { - uint32_t magic; uint32_t version; - uint64_t nr_regions; + sector_t nr_regions; }; struct log_c { @@ -245,10 +239,10 @@ struct log_c { int log_dev_failed; int log_dev_flush_failed; struct dm_dev *log_dev; - struct log_header_core header; + struct log_header header; struct dm_io_region header_location; - struct log_header_disk *disk_header; + struct log_header *disk_header; }; /* @@ -257,34 +251,34 @@ struct log_c { */ static inline int log_test_bit(uint32_t *bs, unsigned bit) { - return test_bit_le(bit, bs) ? 1 : 0; + return test_bit_le(bit, (unsigned long *) bs) ? 1 : 0; } static inline void log_set_bit(struct log_c *l, uint32_t *bs, unsigned bit) { - __set_bit_le(bit, bs); + __test_and_set_bit_le(bit, (unsigned long *) bs); l->touched_cleaned = 1; } static inline void log_clear_bit(struct log_c *l, uint32_t *bs, unsigned bit) { - __clear_bit_le(bit, bs); + __test_and_clear_bit_le(bit, (unsigned long *) bs); l->touched_dirtied = 1; } /*---------------------------------------------------------------- * Header IO *--------------------------------------------------------------*/ -static void header_to_disk(struct log_header_core *core, struct log_header_disk *disk) +static void header_to_disk(struct log_header *core, struct log_header *disk) { disk->magic = cpu_to_le32(core->magic); disk->version = cpu_to_le32(core->version); disk->nr_regions = cpu_to_le64(core->nr_regions); } -static void header_from_disk(struct log_header_core *core, struct log_header_disk *disk) +static void header_from_disk(struct log_header *core, struct log_header *disk) { core->magic = le32_to_cpu(disk->magic); core->version = le32_to_cpu(disk->version); @@ -492,7 +486,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size); lc->sync_count = (sync == NOSYNC) ? region_count : 0; - lc->recovering_bits = vzalloc(bitset_size); + lc->recovering_bits = vmalloc(bitset_size); if (!lc->recovering_bits) { DMWARN("couldn't allocate sync bitset"); vfree(lc->sync_bits); @@ -504,6 +498,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, kfree(lc); return -ENOMEM; } + memset(lc->recovering_bits, 0, bitset_size); lc->sync_search = 0; log->context = lc; @@ -744,7 +739,8 @@ static int core_get_resync_work(struct dm_dirty_log *log, region_t *region) return 0; do { - *region = find_next_zero_bit_le(lc->sync_bits, + *region = find_next_zero_bit_le( + (unsigned long *) lc->sync_bits, lc->region_count, lc->sync_search); lc->sync_search = *region + 1; diff --git a/trunk/drivers/md/dm-mpath.c b/trunk/drivers/md/dm-mpath.c index 5e0090ef4182..c3547016f0f1 100644 --- a/trunk/drivers/md/dm-mpath.c +++ b/trunk/drivers/md/dm-mpath.c @@ -22,6 +22,7 @@ #include #define DM_MSG_PREFIX "multipath" +#define MESG_STR(x) x, sizeof(x) #define DM_PG_INIT_DELAY_MSECS 2000 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) @@ -504,29 +505,80 @@ static void trigger_event(struct work_struct *work) * <#paths> <#per-path selector args> * [ []* ]+ ]+ *---------------------------------------------------------------*/ -static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, +struct param { + unsigned min; + unsigned max; + char *error; +}; + +static int read_param(struct param *param, char *str, unsigned *v, char **error) +{ + if (!str || + (sscanf(str, "%u", v) != 1) || + (*v < param->min) || + (*v > param->max)) { + *error = param->error; + return -EINVAL; + } + + return 0; +} + +struct arg_set { + unsigned argc; + char **argv; +}; + +static char *shift(struct arg_set *as) +{ + char *r; + + if (as->argc) { + as->argc--; + r = *as->argv; + as->argv++; + return r; + } + + return NULL; +} + +static void consume(struct arg_set *as, unsigned n) +{ + BUG_ON (as->argc < n); + as->argc -= n; + as->argv += n; +} + +static int parse_path_selector(struct arg_set *as, struct priority_group *pg, struct dm_target *ti) { int r; struct path_selector_type *pst; unsigned ps_argc; - static struct dm_arg _args[] = { + static struct param _params[] = { {0, 1024, "invalid number of path selector args"}, }; - pst = dm_get_path_selector(dm_shift_arg(as)); + pst = dm_get_path_selector(shift(as)); if (!pst) { ti->error = "unknown path selector type"; return -EINVAL; } - r = dm_read_arg_group(_args, as, &ps_argc, &ti->error); + r = read_param(_params, shift(as), &ps_argc, &ti->error); if (r) { dm_put_path_selector(pst); return -EINVAL; } + if (ps_argc > as->argc) { + dm_put_path_selector(pst); + ti->error = "not enough arguments for path selector"; + return -EINVAL; + } + r = pst->create(&pg->ps, ps_argc, as->argv); if (r) { dm_put_path_selector(pst); @@ -535,12 +587,12 @@ static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, } pg->ps.type = pst; - dm_consume_args(as, ps_argc); + consume(as, ps_argc); return 0; } -static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps, +static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, struct dm_target *ti) { int r; @@ -557,7 +609,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps if (!p) return ERR_PTR(-ENOMEM); - r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table), + r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table), &p->path.dev); if (r) { ti->error = "error getting device"; @@ -608,16 +660,16 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps return ERR_PTR(r); } -static struct priority_group *parse_priority_group(struct dm_arg_set *as, +static struct priority_group *parse_priority_group(struct arg_set *as, struct multipath *m) { - static struct dm_arg _args[] = { + static struct param _params[] = { {1, 1024, "invalid number of paths"}, {0, 1024, "invalid number of selector args"} }; int r; - unsigned i, nr_selector_args, nr_args; + unsigned i, nr_selector_args, nr_params; struct priority_group *pg; struct dm_target *ti = m->ti; @@ -641,26 +693,26 @@ static struct priority_group *parse_priority_group(struct dm_arg_set *as, /* * read the paths */ - r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error); + r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error); if (r) goto bad; - r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error); + r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error); if (r) goto bad; - nr_args = 1 + nr_selector_args; + nr_params = 1 + nr_selector_args; for (i = 0; i < pg->nr_pgpaths; i++) { struct pgpath *pgpath; - struct dm_arg_set path_args; + struct arg_set path_args; - if (as->argc < nr_args) { + if (as->argc < nr_params) { ti->error = "not enough path parameters"; r = -EINVAL; goto bad; } - path_args.argc = nr_args; + path_args.argc = nr_params; path_args.argv = as->argv; pgpath = parse_path(&path_args, &pg->ps, ti); @@ -671,7 +723,7 @@ static struct priority_group *parse_priority_group(struct dm_arg_set *as, pgpath->pg = pg; list_add_tail(&pgpath->list, &pg->pgpaths); - dm_consume_args(as, nr_args); + consume(as, nr_params); } return pg; @@ -681,23 +733,28 @@ static struct priority_group *parse_priority_group(struct dm_arg_set *as, return ERR_PTR(r); } -static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) +static int parse_hw_handler(struct arg_set *as, struct multipath *m) { unsigned hw_argc; int ret; struct dm_target *ti = m->ti; - static struct dm_arg _args[] = { + static struct param _params[] = { {0, 1024, "invalid number of hardware handler args"}, }; - if (dm_read_arg_group(_args, as, &hw_argc, &ti->error)) + if (read_param(_params, shift(as), &hw_argc, &ti->error)) return -EINVAL; if (!hw_argc) return 0; - m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL); + if (hw_argc > as->argc) { + ti->error = "not enough arguments for hardware handler"; + return -EINVAL; + } + + m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL); request_module("scsi_dh_%s", m->hw_handler_name); if (scsi_dh_handler_exist(m->hw_handler_name) == 0) { ti->error = "unknown hardware handler type"; @@ -721,7 +778,7 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1) j = sprintf(p, "%s", as->argv[i]); } - dm_consume_args(as, hw_argc - 1); + consume(as, hw_argc - 1); return 0; fail: @@ -730,20 +787,20 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) return ret; } -static int parse_features(struct dm_arg_set *as, struct multipath *m) +static int parse_features(struct arg_set *as, struct multipath *m) { int r; unsigned argc; struct dm_target *ti = m->ti; - const char *arg_name; + const char *param_name; - static struct dm_arg _args[] = { + static struct param _params[] = { {0, 5, "invalid number of feature args"}, {1, 50, "pg_init_retries must be between 1 and 50"}, {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, }; - r = dm_read_arg_group(_args, as, &argc, &ti->error); + r = read_param(_params, shift(as), &argc, &ti->error); if (r) return -EINVAL; @@ -751,24 +808,26 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) return 0; do { - arg_name = dm_shift_arg(as); + param_name = shift(as); argc--; - if (!strcasecmp(arg_name, "queue_if_no_path")) { + if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) { r = queue_if_no_path(m, 1, 0); continue; } - if (!strcasecmp(arg_name, "pg_init_retries") && + if (!strnicmp(param_name, MESG_STR("pg_init_retries")) && (argc >= 1)) { - r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error); + r = read_param(_params + 1, shift(as), + &m->pg_init_retries, &ti->error); argc--; continue; } - if (!strcasecmp(arg_name, "pg_init_delay_msecs") && + if (!strnicmp(param_name, MESG_STR("pg_init_delay_msecs")) && (argc >= 1)) { - r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error); + r = read_param(_params + 2, shift(as), + &m->pg_init_delay_msecs, &ti->error); argc--; continue; } @@ -783,15 +842,15 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) static int multipath_ctr(struct dm_target *ti, unsigned int argc, char **argv) { - /* target arguments */ - static struct dm_arg _args[] = { + /* target parameters */ + static struct param _params[] = { {0, 1024, "invalid number of priority groups"}, {0, 1024, "invalid initial priority group number"}, }; int r; struct multipath *m; - struct dm_arg_set as; + struct arg_set as; unsigned pg_count = 0; unsigned next_pg_num; @@ -812,11 +871,11 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, if (r) goto bad; - r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error); + r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error); if (r) goto bad; - r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error); + r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error); if (r) goto bad; @@ -1446,10 +1505,10 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) } if (argc == 1) { - if (!strcasecmp(argv[0], "queue_if_no_path")) { + if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) { r = queue_if_no_path(m, 1, 0); goto out; - } else if (!strcasecmp(argv[0], "fail_if_no_path")) { + } else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) { r = queue_if_no_path(m, 0, 0); goto out; } @@ -1460,18 +1519,18 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) goto out; } - if (!strcasecmp(argv[0], "disable_group")) { + if (!strnicmp(argv[0], MESG_STR("disable_group"))) { r = bypass_pg_num(m, argv[1], 1); goto out; - } else if (!strcasecmp(argv[0], "enable_group")) { + } else if (!strnicmp(argv[0], MESG_STR("enable_group"))) { r = bypass_pg_num(m, argv[1], 0); goto out; - } else if (!strcasecmp(argv[0], "switch_group")) { + } else if (!strnicmp(argv[0], MESG_STR("switch_group"))) { r = switch_pg_num(m, argv[1]); goto out; - } else if (!strcasecmp(argv[0], "reinstate_path")) + } else if (!strnicmp(argv[0], MESG_STR("reinstate_path"))) action = reinstate_path; - else if (!strcasecmp(argv[0], "fail_path")) + else if (!strnicmp(argv[0], MESG_STR("fail_path"))) action = fail_path; else { DMWARN("Unrecognised multipath message received."); diff --git a/trunk/drivers/md/dm-raid.c b/trunk/drivers/md/dm-raid.c index a002dd85db1e..e5d8904fc8f6 100644 --- a/trunk/drivers/md/dm-raid.c +++ b/trunk/drivers/md/dm-raid.c @@ -8,19 +8,19 @@ #include #include "md.h" -#include "raid1.h" #include "raid5.h" +#include "dm.h" #include "bitmap.h" -#include - #define DM_MSG_PREFIX "raid" /* - * The following flags are used by dm-raid.c to set up the array state. - * They must be cleared before md_run is called. + * If the MD doesn't support MD_SYNC_STATE_FORCED yet, then + * make it so the flag doesn't set anything. */ -#define FirstUse 10 /* rdev flag */ +#ifndef MD_SYNC_STATE_FORCED +#define MD_SYNC_STATE_FORCED 0 +#endif struct raid_dev { /* @@ -43,15 +43,14 @@ struct raid_dev { /* * Flags for rs->print_flags field. */ -#define DMPF_SYNC 0x1 -#define DMPF_NOSYNC 0x2 -#define DMPF_REBUILD 0x4 -#define DMPF_DAEMON_SLEEP 0x8 -#define DMPF_MIN_RECOVERY_RATE 0x10 -#define DMPF_MAX_RECOVERY_RATE 0x20 -#define DMPF_MAX_WRITE_BEHIND 0x40 -#define DMPF_STRIPE_CACHE 0x80 -#define DMPF_REGION_SIZE 0X100 +#define DMPF_DAEMON_SLEEP 0x1 +#define DMPF_MAX_WRITE_BEHIND 0x2 +#define DMPF_SYNC 0x4 +#define DMPF_NOSYNC 0x8 +#define DMPF_STRIPE_CACHE 0x10 +#define DMPF_MIN_RECOVERY_RATE 0x20 +#define DMPF_MAX_RECOVERY_RATE 0x40 + struct raid_set { struct dm_target *ti; @@ -73,7 +72,6 @@ static struct raid_type { const unsigned level; /* RAID level. */ const unsigned algorithm; /* RAID algorithm. */ } raid_types[] = { - {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, @@ -107,8 +105,7 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra } sectors_per_dev = ti->len; - if ((raid_type->level > 1) && - sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) { + if (sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) { ti->error = "Target length not divisible by number of data devices"; return ERR_PTR(-EINVAL); } @@ -150,16 +147,9 @@ static void context_free(struct raid_set *rs) { int i; - for (i = 0; i < rs->md.raid_disks; i++) { - if (rs->dev[i].meta_dev) - dm_put_device(rs->ti, rs->dev[i].meta_dev); - if (rs->dev[i].rdev.sb_page) - put_page(rs->dev[i].rdev.sb_page); - rs->dev[i].rdev.sb_page = NULL; - rs->dev[i].rdev.sb_loaded = 0; + for (i = 0; i < rs->md.raid_disks; i++) if (rs->dev[i].data_dev) dm_put_device(rs->ti, rs->dev[i].data_dev); - } kfree(rs); } @@ -169,16 +159,7 @@ static void context_free(struct raid_set *rs) * : meta device name or '-' if missing * : data device name or '-' if missing * - * The following are permitted: - * - - - * - - * - * - * The following is not allowed: - * - - * - * This code parses those words. If there is a failure, - * the caller must use context_free to unwind the operations. + * This code parses those words. */ static int dev_parms(struct raid_set *rs, char **argv) { @@ -201,16 +182,8 @@ static int dev_parms(struct raid_set *rs, char **argv) rs->dev[i].rdev.mddev = &rs->md; if (strcmp(argv[0], "-")) { - ret = dm_get_device(rs->ti, argv[0], - dm_table_get_mode(rs->ti->table), - &rs->dev[i].meta_dev); - rs->ti->error = "RAID metadata device lookup failure"; - if (ret) - return ret; - - rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL); - if (!rs->dev[i].rdev.sb_page) - return -ENOMEM; + rs->ti->error = "Metadata devices not supported"; + return -EINVAL; } if (!strcmp(argv[1], "-")) { @@ -220,10 +193,6 @@ static int dev_parms(struct raid_set *rs, char **argv) return -EINVAL; } - rs->ti->error = "No data device supplied with metadata device"; - if (rs->dev[i].meta_dev) - return -EINVAL; - continue; } @@ -235,10 +204,6 @@ static int dev_parms(struct raid_set *rs, char **argv) return ret; } - if (rs->dev[i].meta_dev) { - metadata_available = 1; - rs->dev[i].rdev.meta_bdev = rs->dev[i].meta_dev->bdev; - } rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev; list_add(&rs->dev[i].rdev.same_set, &rs->md.disks); if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) @@ -269,110 +234,34 @@ static int dev_parms(struct raid_set *rs, char **argv) return 0; } -/* - * validate_region_size - * @rs - * @region_size: region size in sectors. If 0, pick a size (4MiB default). - * - * Set rs->md.bitmap_info.chunksize (which really refers to 'region size'). - * Ensure that (ti->len/region_size < 2^21) - required by MD bitmap. - * - * Returns: 0 on success, -EINVAL on failure. - */ -static int validate_region_size(struct raid_set *rs, unsigned long region_size) -{ - unsigned long min_region_size = rs->ti->len / (1 << 21); - - if (!region_size) { - /* - * Choose a reasonable default. All figures in sectors. - */ - if (min_region_size > (1 << 13)) { - DMINFO("Choosing default region size of %lu sectors", - region_size); - region_size = min_region_size; - } else { - DMINFO("Choosing default region size of 4MiB"); - region_size = 1 << 13; /* sectors */ - } - } else { - /* - * Validate user-supplied value. - */ - if (region_size > rs->ti->len) { - rs->ti->error = "Supplied region size is too large"; - return -EINVAL; - } - - if (region_size < min_region_size) { - DMERR("Supplied region_size (%lu sectors) below minimum (%lu)", - region_size, min_region_size); - rs->ti->error = "Supplied region size is too small"; - return -EINVAL; - } - - if (!is_power_of_2(region_size)) { - rs->ti->error = "Region size is not a power of 2"; - return -EINVAL; - } - - if (region_size < rs->md.chunk_sectors) { - rs->ti->error = "Region size is smaller than the chunk size"; - return -EINVAL; - } - } - - /* - * Convert sectors to bytes. - */ - rs->md.bitmap_info.chunksize = (region_size << 9); - - return 0; -} - /* * Possible arguments are... + * RAID456: * [optional_args] * - * Argument definitions - * The number of sectors per disk that - * will form the "stripe" - * [[no]sync] Force or prevent recovery of the - * entire array + * Optional args: + * [[no]sync] Force or prevent recovery of the entire array * [rebuild ] Rebuild the drive indicated by the index - * [daemon_sleep ] Time between bitmap daemon work to - * clear bits + * [daemon_sleep ] Time between bitmap daemon work to clear bits * [min_recovery_rate ] Throttle RAID initialization * [max_recovery_rate ] Throttle RAID initialization - * [write_mostly ] Indicate a write mostly drive via index * [max_write_behind ] See '-write-behind=' (man mdadm) * [stripe_cache ] Stripe cache size for higher RAIDs - * [region_size ] Defines granularity of bitmap */ static int parse_raid_params(struct raid_set *rs, char **argv, unsigned num_raid_params) { unsigned i, rebuild_cnt = 0; - unsigned long value, region_size = 0; + unsigned long value; char *key; /* * First, parse the in-order required arguments - * "chunk_size" is the only argument of this type. */ - if ((strict_strtoul(argv[0], 10, &value) < 0)) { + if ((strict_strtoul(argv[0], 10, &value) < 0) || + !is_power_of_2(value) || (value < 8)) { rs->ti->error = "Bad chunk size"; return -EINVAL; - } else if (rs->raid_type->level == 1) { - if (value) - DMERR("Ignoring chunk size parameter for RAID 1"); - value = 0; - } else if (!is_power_of_2(value)) { - rs->ti->error = "Chunk size must be a power of 2"; - return -EINVAL; - } else if (value < 8) { - rs->ti->error = "Chunk size value is too small"; - return -EINVAL; } rs->md.new_chunk_sectors = rs->md.chunk_sectors = value; @@ -380,39 +269,22 @@ static int parse_raid_params(struct raid_set *rs, char **argv, num_raid_params--; /* - * We set each individual device as In_sync with a completed - * 'recovery_offset'. If there has been a device failure or - * replacement then one of the following cases applies: - * - * 1) User specifies 'rebuild'. - * - Device is reset when param is read. - * 2) A new device is supplied. - * - No matching superblock found, resets device. - * 3) Device failure was transient and returns on reload. - * - Failure noticed, resets device for bitmap replay. - * 4) Device hadn't completed recovery after previous failure. - * - Superblock is read and overrides recovery_offset. - * - * What is found in the superblocks of the devices is always - * authoritative, unless 'rebuild' or '[no]sync' was specified. + * Second, parse the unordered optional arguments */ - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < rs->md.raid_disks; i++) set_bit(In_sync, &rs->dev[i].rdev.flags); - rs->dev[i].rdev.recovery_offset = MaxSector; - } - /* - * Second, parse the unordered optional arguments - */ for (i = 0; i < num_raid_params; i++) { - if (!strcasecmp(argv[i], "nosync")) { + if (!strcmp(argv[i], "nosync")) { rs->md.recovery_cp = MaxSector; rs->print_flags |= DMPF_NOSYNC; + rs->md.flags |= MD_SYNC_STATE_FORCED; continue; } - if (!strcasecmp(argv[i], "sync")) { + if (!strcmp(argv[i], "sync")) { rs->md.recovery_cp = 0; rs->print_flags |= DMPF_SYNC; + rs->md.flags |= MD_SYNC_STATE_FORCED; continue; } @@ -428,13 +300,9 @@ static int parse_raid_params(struct raid_set *rs, char **argv, return -EINVAL; } - if (!strcasecmp(key, "rebuild")) { - rebuild_cnt++; - if (((rs->raid_type->level != 1) && - (rebuild_cnt > rs->raid_type->parity_devs)) || - ((rs->raid_type->level == 1) && - (rebuild_cnt > (rs->md.raid_disks - 1)))) { - rs->ti->error = "Too many rebuild devices specified for given RAID type"; + if (!strcmp(key, "rebuild")) { + if (++rebuild_cnt > rs->raid_type->parity_devs) { + rs->ti->error = "Too many rebuild drives given"; return -EINVAL; } if (value > rs->md.raid_disks) { @@ -443,22 +311,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, } clear_bit(In_sync, &rs->dev[value].rdev.flags); rs->dev[value].rdev.recovery_offset = 0; - rs->print_flags |= DMPF_REBUILD; - } else if (!strcasecmp(key, "write_mostly")) { - if (rs->raid_type->level != 1) { - rs->ti->error = "write_mostly option is only valid for RAID1"; - return -EINVAL; - } - if (value > rs->md.raid_disks) { - rs->ti->error = "Invalid write_mostly drive index given"; - return -EINVAL; - } - set_bit(WriteMostly, &rs->dev[value].rdev.flags); - } else if (!strcasecmp(key, "max_write_behind")) { - if (rs->raid_type->level != 1) { - rs->ti->error = "max_write_behind option is only valid for RAID1"; - return -EINVAL; - } + } else if (!strcmp(key, "max_write_behind")) { rs->print_flags |= DMPF_MAX_WRITE_BEHIND; /* @@ -471,14 +324,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv, return -EINVAL; } rs->md.bitmap_info.max_write_behind = value; - } else if (!strcasecmp(key, "daemon_sleep")) { + } else if (!strcmp(key, "daemon_sleep")) { rs->print_flags |= DMPF_DAEMON_SLEEP; if (!value || (value > MAX_SCHEDULE_TIMEOUT)) { rs->ti->error = "daemon sleep period out of range"; return -EINVAL; } rs->md.bitmap_info.daemon_sleep = value; - } else if (!strcasecmp(key, "stripe_cache")) { + } else if (!strcmp(key, "stripe_cache")) { rs->print_flags |= DMPF_STRIPE_CACHE; /* @@ -495,23 +348,20 @@ static int parse_raid_params(struct raid_set *rs, char **argv, rs->ti->error = "Bad stripe_cache size"; return -EINVAL; } - } else if (!strcasecmp(key, "min_recovery_rate")) { + } else if (!strcmp(key, "min_recovery_rate")) { rs->print_flags |= DMPF_MIN_RECOVERY_RATE; if (value > INT_MAX) { rs->ti->error = "min_recovery_rate out of range"; return -EINVAL; } rs->md.sync_speed_min = (int)value; - } else if (!strcasecmp(key, "max_recovery_rate")) { + } else if (!strcmp(key, "max_recovery_rate")) { rs->print_flags |= DMPF_MAX_RECOVERY_RATE; if (value > INT_MAX) { rs->ti->error = "max_recovery_rate out of range"; return -EINVAL; } rs->md.sync_speed_max = (int)value; - } else if (!strcasecmp(key, "region_size")) { - rs->print_flags |= DMPF_REGION_SIZE; - region_size = value; } else { DMERR("Unable to parse RAID parameter: %s", key); rs->ti->error = "Unable to parse RAID parameters"; @@ -519,19 +369,6 @@ static int parse_raid_params(struct raid_set *rs, char **argv, } } - if (validate_region_size(rs, region_size)) - return -EINVAL; - - if (rs->md.chunk_sectors) - rs->ti->split_io = rs->md.chunk_sectors; - else - rs->ti->split_io = region_size; - - if (rs->md.chunk_sectors) - rs->ti->split_io = rs->md.chunk_sectors; - else - rs->ti->split_io = region_size; - /* Assume there are no metadata devices until the drives are parsed */ rs->md.persistent = 0; rs->md.external = 1; @@ -550,351 +387,17 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) { struct raid_set *rs = container_of(cb, struct raid_set, callbacks); - if (rs->raid_type->level == 1) - return md_raid1_congested(&rs->md, bits); - return md_raid5_congested(&rs->md, bits); } -/* - * This structure is never routinely used by userspace, unlike md superblocks. - * Devices with this superblock should only ever be accessed via device-mapper. - */ -#define DM_RAID_MAGIC 0x64526D44 -struct dm_raid_superblock { - __le32 magic; /* "DmRd" */ - __le32 features; /* Used to indicate possible future changes */ - - __le32 num_devices; /* Number of devices in this array. (Max 64) */ - __le32 array_position; /* The position of this drive in the array */ - - __le64 events; /* Incremented by md when superblock updated */ - __le64 failed_devices; /* Bit field of devices to indicate failures */ - - /* - * This offset tracks the progress of the repair or replacement of - * an individual drive. - */ - __le64 disk_recovery_offset; - - /* - * This offset tracks the progress of the initial array - * synchronisation/parity calculation. - */ - __le64 array_resync_offset; - - /* - * RAID characteristics - */ - __le32 level; - __le32 layout; - __le32 stripe_sectors; - - __u8 pad[452]; /* Round struct to 512 bytes. */ - /* Always set to 0 when writing. */ -} __packed; - -static int read_disk_sb(mdk_rdev_t *rdev, int size) -{ - BUG_ON(!rdev->sb_page); - - if (rdev->sb_loaded) - return 0; - - if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) { - DMERR("Failed to read device superblock"); - return -EINVAL; - } - - rdev->sb_loaded = 1; - - return 0; -} - -static void super_sync(mddev_t *mddev, mdk_rdev_t *rdev) -{ - mdk_rdev_t *r, *t; - uint64_t failed_devices; - struct dm_raid_superblock *sb; - - sb = page_address(rdev->sb_page); - failed_devices = le64_to_cpu(sb->failed_devices); - - rdev_for_each(r, t, mddev) - if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags)) - failed_devices |= (1ULL << r->raid_disk); - - memset(sb, 0, sizeof(*sb)); - - sb->magic = cpu_to_le32(DM_RAID_MAGIC); - sb->features = cpu_to_le32(0); /* No features yet */ - - sb->num_devices = cpu_to_le32(mddev->raid_disks); - sb->array_position = cpu_to_le32(rdev->raid_disk); - - sb->events = cpu_to_le64(mddev->events); - sb->failed_devices = cpu_to_le64(failed_devices); - - sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset); - sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp); - - sb->level = cpu_to_le32(mddev->level); - sb->layout = cpu_to_le32(mddev->layout); - sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors); -} - -/* - * super_load - * - * This function creates a superblock if one is not found on the device - * and will decide which superblock to use if there's a choice. - * - * Return: 1 if use rdev, 0 if use refdev, -Exxx otherwise - */ -static int super_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev) -{ - int ret; - struct dm_raid_superblock *sb; - struct dm_raid_superblock *refsb; - uint64_t events_sb, events_refsb; - - rdev->sb_start = 0; - rdev->sb_size = sizeof(*sb); - - ret = read_disk_sb(rdev, rdev->sb_size); - if (ret) - return ret; - - sb = page_address(rdev->sb_page); - if (sb->magic != cpu_to_le32(DM_RAID_MAGIC)) { - super_sync(rdev->mddev, rdev); - - set_bit(FirstUse, &rdev->flags); - - /* Force writing of superblocks to disk */ - set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags); - - /* Any superblock is better than none, choose that if given */ - return refdev ? 0 : 1; - } - - if (!refdev) - return 1; - - events_sb = le64_to_cpu(sb->events); - - refsb = page_address(refdev->sb_page); - events_refsb = le64_to_cpu(refsb->events); - - return (events_sb > events_refsb) ? 1 : 0; -} - -static int super_init_validation(mddev_t *mddev, mdk_rdev_t *rdev) -{ - int role; - struct raid_set *rs = container_of(mddev, struct raid_set, md); - uint64_t events_sb; - uint64_t failed_devices; - struct dm_raid_superblock *sb; - uint32_t new_devs = 0; - uint32_t rebuilds = 0; - mdk_rdev_t *r, *t; - struct dm_raid_superblock *sb2; - - sb = page_address(rdev->sb_page); - events_sb = le64_to_cpu(sb->events); - failed_devices = le64_to_cpu(sb->failed_devices); - - /* - * Initialise to 1 if this is a new superblock. - */ - mddev->events = events_sb ? : 1; - - /* - * Reshaping is not currently allowed - */ - if ((le32_to_cpu(sb->level) != mddev->level) || - (le32_to_cpu(sb->layout) != mddev->layout) || - (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors)) { - DMERR("Reshaping arrays not yet supported."); - return -EINVAL; - } - - /* We can only change the number of devices in RAID1 right now */ - if ((rs->raid_type->level != 1) && - (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) { - DMERR("Reshaping arrays not yet supported."); - return -EINVAL; - } - - if (!(rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC))) - mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset); - - /* - * During load, we set FirstUse if a new superblock was written. - * There are two reasons we might not have a superblock: - * 1) The array is brand new - in which case, all of the - * devices must have their In_sync bit set. Also, - * recovery_cp must be 0, unless forced. - * 2) This is a new device being added to an old array - * and the new device needs to be rebuilt - in which - * case the In_sync bit will /not/ be set and - * recovery_cp must be MaxSector. - */ - rdev_for_each(r, t, mddev) { - if (!test_bit(In_sync, &r->flags)) { - if (!test_bit(FirstUse, &r->flags)) - DMERR("Superblock area of " - "rebuild device %d should have been " - "cleared.", r->raid_disk); - set_bit(FirstUse, &r->flags); - rebuilds++; - } else if (test_bit(FirstUse, &r->flags)) - new_devs++; - } - - if (!rebuilds) { - if (new_devs == mddev->raid_disks) { - DMINFO("Superblocks created for new array"); - set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); - } else if (new_devs) { - DMERR("New device injected " - "into existing array without 'rebuild' " - "parameter specified"); - return -EINVAL; - } - } else if (new_devs) { - DMERR("'rebuild' devices cannot be " - "injected into an array with other first-time devices"); - return -EINVAL; - } else if (mddev->recovery_cp != MaxSector) { - DMERR("'rebuild' specified while array is not in-sync"); - return -EINVAL; - } - - /* - * Now we set the Faulty bit for those devices that are - * recorded in the superblock as failed. - */ - rdev_for_each(r, t, mddev) { - if (!r->sb_page) - continue; - sb2 = page_address(r->sb_page); - sb2->failed_devices = 0; - - /* - * Check for any device re-ordering. - */ - if (!test_bit(FirstUse, &r->flags) && (r->raid_disk >= 0)) { - role = le32_to_cpu(sb2->array_position); - if (role != r->raid_disk) { - if (rs->raid_type->level != 1) { - rs->ti->error = "Cannot change device " - "positions in RAID array"; - return -EINVAL; - } - DMINFO("RAID1 device #%d now at position #%d", - role, r->raid_disk); - } - - /* - * Partial recovery is performed on - * returning failed devices. - */ - if (failed_devices & (1 << role)) - set_bit(Faulty, &r->flags); - } - } - - return 0; -} - -static int super_validate(mddev_t *mddev, mdk_rdev_t *rdev) -{ - struct dm_raid_superblock *sb = page_address(rdev->sb_page); - - /* - * If mddev->events is not set, we know we have not yet initialized - * the array. - */ - if (!mddev->events && super_init_validation(mddev, rdev)) - return -EINVAL; - - mddev->bitmap_info.offset = 4096 >> 9; /* Enable bitmap creation */ - rdev->mddev->bitmap_info.default_offset = 4096 >> 9; - if (!test_bit(FirstUse, &rdev->flags)) { - rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset); - if (rdev->recovery_offset != MaxSector) - clear_bit(In_sync, &rdev->flags); - } - - /* - * If a device comes back, set it as not In_sync and no longer faulty. - */ - if (test_bit(Faulty, &rdev->flags)) { - clear_bit(Faulty, &rdev->flags); - clear_bit(In_sync, &rdev->flags); - rdev->saved_raid_disk = rdev->raid_disk; - rdev->recovery_offset = 0; - } - - clear_bit(FirstUse, &rdev->flags); - - return 0; -} - -/* - * Analyse superblocks and select the freshest. - */ -static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) -{ - int ret; - mdk_rdev_t *rdev, *freshest, *tmp; - mddev_t *mddev = &rs->md; - - freshest = NULL; - rdev_for_each(rdev, tmp, mddev) { - if (!rdev->meta_bdev) - continue; - - ret = super_load(rdev, freshest); - - switch (ret) { - case 1: - freshest = rdev; - break; - case 0: - break; - default: - ti->error = "Failed to load superblock"; - return ret; - } - } - - if (!freshest) - return 0; - - /* - * Validation of the freshest device provides the source of - * validation for the remaining devices. - */ - ti->error = "Unable to assemble array: Invalid superblocks"; - if (super_validate(mddev, freshest)) - return -EINVAL; - - rdev_for_each(rdev, tmp, mddev) - if ((rdev != freshest) && super_validate(mddev, rdev)) - return -EINVAL; - - return 0; -} - /* * Construct a RAID4/5/6 mapping: * Args: * <#raid_params> \ * <#raid_devs> { .. } * + * ** metadata devices are not supported yet, use '-' instead ** + * * varies by . See 'parse_raid_params' for * details on possible . */ @@ -962,12 +465,8 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) if (ret) goto bad; - rs->md.sync_super = super_sync; - ret = analyse_superblocks(ti, rs); - if (ret) - goto bad; - INIT_WORK(&rs->md.event_work, do_table_event); + ti->split_io = rs->md.chunk_sectors; ti->private = rs; mutex_lock(&rs->md.reconfig_mutex); @@ -983,7 +482,6 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) rs->callbacks.congested_fn = raid_is_congested; dm_table_add_target_callbacks(ti->table, &rs->callbacks); - mddev_suspend(&rs->md); return 0; bad: @@ -1048,17 +546,12 @@ static int raid_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: /* The string you would use to construct this array */ - for (i = 0; i < rs->md.raid_disks; i++) { - if ((rs->print_flags & DMPF_REBUILD) && - rs->dev[i].data_dev && - !test_bit(In_sync, &rs->dev[i].rdev.flags)) - raid_param_cnt += 2; /* for rebuilds */ + for (i = 0; i < rs->md.raid_disks; i++) if (rs->dev[i].data_dev && - test_bit(WriteMostly, &rs->dev[i].rdev.flags)) - raid_param_cnt += 2; - } + !test_bit(In_sync, &rs->dev[i].rdev.flags)) + raid_param_cnt++; /* for rebuilds */ - raid_param_cnt += (hweight64(rs->print_flags & ~DMPF_REBUILD) * 2); + raid_param_cnt += (hweight64(rs->print_flags) * 2); if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)) raid_param_cnt--; @@ -1072,8 +565,7 @@ static int raid_status(struct dm_target *ti, status_type_t type, DMEMIT(" nosync"); for (i = 0; i < rs->md.raid_disks; i++) - if ((rs->print_flags & DMPF_REBUILD) && - rs->dev[i].data_dev && + if (rs->dev[i].data_dev && !test_bit(In_sync, &rs->dev[i].rdev.flags)) DMEMIT(" rebuild %u", i); @@ -1087,11 +579,6 @@ static int raid_status(struct dm_target *ti, status_type_t type, if (rs->print_flags & DMPF_MAX_RECOVERY_RATE) DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max); - for (i = 0; i < rs->md.raid_disks; i++) - if (rs->dev[i].data_dev && - test_bit(WriteMostly, &rs->dev[i].rdev.flags)) - DMEMIT(" write_mostly %u", i); - if (rs->print_flags & DMPF_MAX_WRITE_BEHIND) DMEMIT(" max_write_behind %lu", rs->md.bitmap_info.max_write_behind); @@ -1104,16 +591,9 @@ static int raid_status(struct dm_target *ti, status_type_t type, conf ? conf->max_nr_stripes * 2 : 0); } - if (rs->print_flags & DMPF_REGION_SIZE) - DMEMIT(" region_size %lu", - rs->md.bitmap_info.chunksize >> 9); - DMEMIT(" %d", rs->md.raid_disks); for (i = 0; i < rs->md.raid_disks; i++) { - if (rs->dev[i].meta_dev) - DMEMIT(" %s", rs->dev[i].meta_dev->name); - else - DMEMIT(" -"); + DMEMIT(" -"); /* metadata device */ if (rs->dev[i].data_dev) DMEMIT(" %s", rs->dev[i].data_dev->name); @@ -1170,13 +650,12 @@ static void raid_resume(struct dm_target *ti) { struct raid_set *rs = ti->private; - bitmap_load(&rs->md); mddev_resume(&rs->md); } static struct target_type raid_target = { .name = "raid", - .version = {1, 1, 0}, + .version = {1, 0, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, diff --git a/trunk/drivers/md/dm-snap-persistent.c b/trunk/drivers/md/dm-snap-persistent.c index d1f1d7017103..135c2f1fdbfc 100644 --- a/trunk/drivers/md/dm-snap-persistent.c +++ b/trunk/drivers/md/dm-snap-persistent.c @@ -58,30 +58,25 @@ #define NUM_SNAPSHOT_HDR_CHUNKS 1 struct disk_header { - __le32 magic; + uint32_t magic; /* * Is this snapshot valid. There is no way of recovering * an invalid snapshot. */ - __le32 valid; + uint32_t valid; /* * Simple, incrementing version. no backward * compatibility. */ - __le32 version; + uint32_t version; /* In sectors */ - __le32 chunk_size; -} __packed; + uint32_t chunk_size; +}; struct disk_exception { - __le64 old_chunk; - __le64 new_chunk; -} __packed; - -struct core_exception { uint64_t old_chunk; uint64_t new_chunk; }; @@ -174,9 +169,10 @@ static int alloc_area(struct pstore *ps) if (!ps->area) goto err_area; - ps->zero_area = vzalloc(len); + ps->zero_area = vmalloc(len); if (!ps->zero_area) goto err_zero_area; + memset(ps->zero_area, 0, len); ps->header_area = vmalloc(len); if (!ps->header_area) @@ -400,32 +396,32 @@ static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) } static void read_exception(struct pstore *ps, - uint32_t index, struct core_exception *result) + uint32_t index, struct disk_exception *result) { - struct disk_exception *de = get_exception(ps, index); + struct disk_exception *e = get_exception(ps, index); /* copy it */ - result->old_chunk = le64_to_cpu(de->old_chunk); - result->new_chunk = le64_to_cpu(de->new_chunk); + result->old_chunk = le64_to_cpu(e->old_chunk); + result->new_chunk = le64_to_cpu(e->new_chunk); } static void write_exception(struct pstore *ps, - uint32_t index, struct core_exception *e) + uint32_t index, struct disk_exception *de) { - struct disk_exception *de = get_exception(ps, index); + struct disk_exception *e = get_exception(ps, index); /* copy it */ - de->old_chunk = cpu_to_le64(e->old_chunk); - de->new_chunk = cpu_to_le64(e->new_chunk); + e->old_chunk = cpu_to_le64(de->old_chunk); + e->new_chunk = cpu_to_le64(de->new_chunk); } static void clear_exception(struct pstore *ps, uint32_t index) { - struct disk_exception *de = get_exception(ps, index); + struct disk_exception *e = get_exception(ps, index); /* clear it */ - de->old_chunk = 0; - de->new_chunk = 0; + e->old_chunk = 0; + e->new_chunk = 0; } /* @@ -441,13 +437,13 @@ static int insert_exceptions(struct pstore *ps, { int r; unsigned int i; - struct core_exception e; + struct disk_exception de; /* presume the area is full */ *full = 1; for (i = 0; i < ps->exceptions_per_area; i++) { - read_exception(ps, i, &e); + read_exception(ps, i, &de); /* * If the new_chunk is pointing at the start of @@ -455,7 +451,7 @@ static int insert_exceptions(struct pstore *ps, * is we know that we've hit the end of the * exceptions. Therefore the area is not full. */ - if (e.new_chunk == 0LL) { + if (de.new_chunk == 0LL) { ps->current_committed = i; *full = 0; break; @@ -464,13 +460,13 @@ static int insert_exceptions(struct pstore *ps, /* * Keep track of the start of the free chunks. */ - if (ps->next_free <= e.new_chunk) - ps->next_free = e.new_chunk + 1; + if (ps->next_free <= de.new_chunk) + ps->next_free = de.new_chunk + 1; /* * Otherwise we add the exception to the snapshot. */ - r = callback(callback_context, e.old_chunk, e.new_chunk); + r = callback(callback_context, de.old_chunk, de.new_chunk); if (r) return r; } @@ -567,7 +563,7 @@ static int persistent_read_metadata(struct dm_exception_store *store, ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) / sizeof(struct disk_exception); ps->callbacks = dm_vcalloc(ps->exceptions_per_area, - sizeof(*ps->callbacks)); + sizeof(*ps->callbacks)); if (!ps->callbacks) return -ENOMEM; @@ -645,12 +641,12 @@ static void persistent_commit_exception(struct dm_exception_store *store, { unsigned int i; struct pstore *ps = get_info(store); - struct core_exception ce; + struct disk_exception de; struct commit_callback *cb; - ce.old_chunk = e->old_chunk; - ce.new_chunk = e->new_chunk; - write_exception(ps, ps->current_committed++, &ce); + de.old_chunk = e->old_chunk; + de.new_chunk = e->new_chunk; + write_exception(ps, ps->current_committed++, &de); /* * Add the callback to the back of the array. This code @@ -674,7 +670,7 @@ static void persistent_commit_exception(struct dm_exception_store *store, * If we completely filled the current area, then wipe the next one. */ if ((ps->current_committed == ps->exceptions_per_area) && - zero_disk_area(ps, ps->current_area + 1)) + zero_disk_area(ps, ps->current_area + 1)) ps->valid = 0; /* @@ -705,7 +701,7 @@ static int persistent_prepare_merge(struct dm_exception_store *store, chunk_t *last_new_chunk) { struct pstore *ps = get_info(store); - struct core_exception ce; + struct disk_exception de; int nr_consecutive; int r; @@ -726,9 +722,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store, ps->current_committed = ps->exceptions_per_area; } - read_exception(ps, ps->current_committed - 1, &ce); - *last_old_chunk = ce.old_chunk; - *last_new_chunk = ce.new_chunk; + read_exception(ps, ps->current_committed - 1, &de); + *last_old_chunk = de.old_chunk; + *last_new_chunk = de.new_chunk; /* * Find number of consecutive chunks within the current area, @@ -737,9 +733,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store, for (nr_consecutive = 1; nr_consecutive < ps->current_committed; nr_consecutive++) { read_exception(ps, ps->current_committed - 1 - nr_consecutive, - &ce); - if (ce.old_chunk != *last_old_chunk - nr_consecutive || - ce.new_chunk != *last_new_chunk - nr_consecutive) + &de); + if (de.old_chunk != *last_old_chunk - nr_consecutive || + de.new_chunk != *last_new_chunk - nr_consecutive) break; } @@ -757,7 +753,7 @@ static int persistent_commit_merge(struct dm_exception_store *store, for (i = 0; i < nr_merged; i++) clear_exception(ps, ps->current_committed - 1 - i); - r = area_io(ps, WRITE_FLUSH_FUA); + r = area_io(ps, WRITE); if (r < 0) return r; diff --git a/trunk/drivers/md/dm-snap.c b/trunk/drivers/md/dm-snap.c index 6f758870fc19..9ecff5f3023a 100644 --- a/trunk/drivers/md/dm-snap.c +++ b/trunk/drivers/md/dm-snap.c @@ -29,6 +29,16 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; #define dm_target_is_snapshot_merge(ti) \ ((ti)->type->name == dm_snapshot_merge_target_name) +/* + * The percentage increment we will wake up users at + */ +#define WAKE_UP_PERCENT 5 + +/* + * kcopyd priority of snapshot operations + */ +#define SNAPSHOT_COPY_PRIORITY 2 + /* * The size of the mempool used to track chunks in use. */ @@ -170,13 +180,6 @@ struct dm_snap_pending_exception { * kcopyd. */ int started; - - /* - * For writing a complete chunk, bypassing the copy. - */ - struct bio *full_bio; - bio_end_io_t *full_bio_end_io; - void *full_bio_private; }; /* @@ -1052,7 +1055,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) { - ti->error = "Cannot allocate private snapshot structure"; + ti->error = "Cannot allocate snapshot context private " + "structure"; r = -ENOMEM; goto bad; } @@ -1376,7 +1380,6 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) struct dm_snapshot *s = pe->snap; struct bio *origin_bios = NULL; struct bio *snapshot_bios = NULL; - struct bio *full_bio = NULL; int error = 0; if (!success) { @@ -1412,15 +1415,10 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) */ dm_insert_exception(&s->complete, e); -out: + out: dm_remove_exception(&pe->e); snapshot_bios = bio_list_get(&pe->snapshot_bios); origin_bios = bio_list_get(&pe->origin_bios); - full_bio = pe->full_bio; - if (full_bio) { - full_bio->bi_end_io = pe->full_bio_end_io; - full_bio->bi_private = pe->full_bio_private; - } free_pending_exception(pe); increment_pending_exceptions_done_count(); @@ -1428,15 +1426,10 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) up_write(&s->lock); /* Submit any pending write bios */ - if (error) { - if (full_bio) - bio_io_error(full_bio); + if (error) error_bios(snapshot_bios); - } else { - if (full_bio) - bio_endio(full_bio, 0); + else flush_bios(snapshot_bios); - } retry_origin_bios(s, origin_bios); } @@ -1487,33 +1480,8 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count; /* Hand over to kcopyd */ - dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); -} - -static void full_bio_end_io(struct bio *bio, int error) -{ - void *callback_data = bio->bi_private; - - dm_kcopyd_do_callback(callback_data, 0, error ? 1 : 0); -} - -static void start_full_bio(struct dm_snap_pending_exception *pe, - struct bio *bio) -{ - struct dm_snapshot *s = pe->snap; - void *callback_data; - - pe->full_bio = bio; - pe->full_bio_end_io = bio->bi_end_io; - pe->full_bio_private = bio->bi_private; - - callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, - copy_callback, pe); - - bio->bi_end_io = full_bio_end_io; - bio->bi_private = callback_data; - - generic_make_request(bio); + dm_kcopyd_copy(s->kcopyd_client, + &src, 1, &dest, 0, copy_callback, pe); } static struct dm_snap_pending_exception * @@ -1551,7 +1519,6 @@ __find_pending_exception(struct dm_snapshot *s, bio_list_init(&pe->origin_bios); bio_list_init(&pe->snapshot_bios); pe->started = 0; - pe->full_bio = NULL; if (s->store->type->prepare_exception(s->store, &pe->e)) { free_pending_exception(pe); @@ -1645,19 +1612,10 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, } remap_exception(s, &pe->e, bio, chunk); + bio_list_add(&pe->snapshot_bios, bio); r = DM_MAPIO_SUBMITTED; - if (!pe->started && - bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) { - pe->started = 1; - up_write(&s->lock); - start_full_bio(pe, bio); - goto out; - } - - bio_list_add(&pe->snapshot_bios, bio); - if (!pe->started) { /* this is protected by snap->lock */ pe->started = 1; @@ -1670,9 +1628,9 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, map_context->ptr = track_chunk(s, chunk); } -out_unlock: + out_unlock: up_write(&s->lock); -out: + out: return r; } @@ -2016,7 +1974,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, pe_to_start_now = pe; } -next_snapshot: + next_snapshot: up_write(&snap->lock); if (pe_to_start_now) { diff --git a/trunk/drivers/md/dm-table.c b/trunk/drivers/md/dm-table.c index 986b8754bb08..bfe9c2333cea 100644 --- a/trunk/drivers/md/dm-table.c +++ b/trunk/drivers/md/dm-table.c @@ -54,6 +54,7 @@ struct dm_table { sector_t *highs; struct dm_target *targets; + unsigned discards_supported:1; unsigned integrity_supported:1; /* @@ -153,11 +154,12 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size) return NULL; size = nmemb * elem_size; - addr = vzalloc(size); + addr = vmalloc(size); + if (addr) + memset(addr, 0, size); return addr; } -EXPORT_SYMBOL(dm_vcalloc); /* * highs, and targets are managed as dynamic arrays during a @@ -207,6 +209,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode, INIT_LIST_HEAD(&t->devices); INIT_LIST_HEAD(&t->target_callbacks); atomic_set(&t->holders, 0); + t->discards_supported = 1; if (!num_targets) num_targets = KEYS_PER_NODE; @@ -278,7 +281,6 @@ void dm_table_get(struct dm_table *t) { atomic_inc(&t->holders); } -EXPORT_SYMBOL(dm_table_get); void dm_table_put(struct dm_table *t) { @@ -288,7 +290,6 @@ void dm_table_put(struct dm_table *t) smp_mb__before_atomic_dec(); atomic_dec(&t->holders); } -EXPORT_SYMBOL(dm_table_put); /* * Checks to see if we need to extend highs or targets. @@ -454,14 +455,13 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, * Add a device to the list, or just increment the usage count if * it's already present. */ -int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, - struct dm_dev **result) +static int __table_get_device(struct dm_table *t, struct dm_target *ti, + const char *path, fmode_t mode, struct dm_dev **result) { int r; dev_t uninitialized_var(dev); struct dm_dev_internal *dd; unsigned int major, minor; - struct dm_table *t = ti->table; BUG_ON(!t); @@ -509,7 +509,6 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, *result = &dd->dm_dev; return 0; } -EXPORT_SYMBOL(dm_get_device); int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) @@ -540,15 +539,23 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, * If not we'll force DM to use PAGE_SIZE or * smaller I/O, just to be safe. */ - if (dm_queue_merge_is_compulsory(q) && !ti->type->merge) + + if (q->merge_bvec_fn && !ti->type->merge) blk_limits_max_hw_sectors(limits, (unsigned int) (PAGE_SIZE >> 9)); return 0; } EXPORT_SYMBOL_GPL(dm_set_device_limits); +int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, + struct dm_dev **result) +{ + return __table_get_device(ti->table, ti, path, mode, result); +} + + /* - * Decrement a device's use count and remove it if necessary. + * Decrement a devices use count and remove it if necessary. */ void dm_put_device(struct dm_target *ti, struct dm_dev *d) { @@ -561,7 +568,6 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d) kfree(dd); } } -EXPORT_SYMBOL(dm_put_device); /* * Checks to see if the target joins onto the end of the table. @@ -785,9 +791,8 @@ int dm_table_add_target(struct dm_table *t, const char *type, t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; - if (!tgt->num_discard_requests && tgt->discards_supported) - DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.", - dm_device_name(t->md), type); + if (!tgt->num_discard_requests) + t->discards_supported = 0; return 0; @@ -797,63 +802,6 @@ int dm_table_add_target(struct dm_table *t, const char *type, return r; } -/* - * Target argument parsing helpers. - */ -static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, - unsigned *value, char **error, unsigned grouped) -{ - const char *arg_str = dm_shift_arg(arg_set); - - if (!arg_str || - (sscanf(arg_str, "%u", value) != 1) || - (*value < arg->min) || - (*value > arg->max) || - (grouped && arg_set->argc < *value)) { - *error = arg->error; - return -EINVAL; - } - - return 0; -} - -int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, - unsigned *value, char **error) -{ - return validate_next_arg(arg, arg_set, value, error, 0); -} -EXPORT_SYMBOL(dm_read_arg); - -int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set, - unsigned *value, char **error) -{ - return validate_next_arg(arg, arg_set, value, error, 1); -} -EXPORT_SYMBOL(dm_read_arg_group); - -const char *dm_shift_arg(struct dm_arg_set *as) -{ - char *r; - - if (as->argc) { - as->argc--; - r = *as->argv; - as->argv++; - return r; - } - - return NULL; -} -EXPORT_SYMBOL(dm_shift_arg); - -void dm_consume_args(struct dm_arg_set *as, unsigned num_args) -{ - BUG_ON(as->argc < num_args); - as->argc -= num_args; - as->argv += num_args; -} -EXPORT_SYMBOL(dm_consume_args); - static int dm_table_set_type(struct dm_table *t) { unsigned i; @@ -1129,13 +1077,11 @@ void dm_table_event(struct dm_table *t) t->event_fn(t->event_context); mutex_unlock(&_event_lock); } -EXPORT_SYMBOL(dm_table_event); sector_t dm_table_get_size(struct dm_table *t) { return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; } -EXPORT_SYMBOL(dm_table_get_size); struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index) { @@ -1248,45 +1194,9 @@ static void dm_table_set_integrity(struct dm_table *t) blk_get_integrity(template_disk)); } -static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) -{ - unsigned flush = (*(unsigned *)data); - struct request_queue *q = bdev_get_queue(dev->bdev); - - return q && (q->flush_flags & flush); -} - -static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) -{ - struct dm_target *ti; - unsigned i = 0; - - /* - * Require at least one underlying device to support flushes. - * t->devices includes internal dm devices such as mirror logs - * so we need to use iterate_devices here, which targets - * supporting flushes must provide. - */ - while (i < dm_table_get_num_targets(t)) { - ti = dm_table_get_target(t, i++); - - if (!ti->num_flush_requests) - continue; - - if (ti->type->iterate_devices && - ti->type->iterate_devices(ti, device_flush_capable, &flush)) - return 1; - } - - return 0; -} - void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { - unsigned flush = 0; - /* * Copy table's limits to the DM device's request_queue */ @@ -1297,13 +1207,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); - if (dm_table_supports_flush(t, REQ_FLUSH)) { - flush |= REQ_FLUSH; - if (dm_table_supports_flush(t, REQ_FUA)) - flush |= REQ_FUA; - } - blk_queue_flush(q, flush); - dm_table_set_integrity(t); /* @@ -1334,7 +1237,6 @@ fmode_t dm_table_get_mode(struct dm_table *t) { return t->mode; } -EXPORT_SYMBOL(dm_table_get_mode); static void suspend_targets(struct dm_table *t, unsigned postsuspend) { @@ -1443,7 +1345,6 @@ struct mapped_device *dm_table_get_md(struct dm_table *t) { return t->md; } -EXPORT_SYMBOL(dm_table_get_md); static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) @@ -1458,19 +1359,19 @@ bool dm_table_supports_discards(struct dm_table *t) struct dm_target *ti; unsigned i = 0; + if (!t->discards_supported) + return 0; + /* * Unless any target used by the table set discards_supported, * require at least one underlying device to support discards. * t->devices includes internal dm devices such as mirror logs * so we need to use iterate_devices here, which targets - * supporting discard selectively must provide. + * supporting discard must provide. */ while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); - if (!ti->num_discard_requests) - continue; - if (ti->discards_supported) return 1; @@ -1481,3 +1382,13 @@ bool dm_table_supports_discards(struct dm_table *t) return 0; } + +EXPORT_SYMBOL(dm_vcalloc); +EXPORT_SYMBOL(dm_get_device); +EXPORT_SYMBOL(dm_put_device); +EXPORT_SYMBOL(dm_table_event); +EXPORT_SYMBOL(dm_table_get_size); +EXPORT_SYMBOL(dm_table_get_mode); +EXPORT_SYMBOL(dm_table_get_md); +EXPORT_SYMBOL(dm_table_put); +EXPORT_SYMBOL(dm_table_get); diff --git a/trunk/drivers/md/dm.c b/trunk/drivers/md/dm.c index 52b39f335bb3..0cf68b478878 100644 --- a/trunk/drivers/md/dm.c +++ b/trunk/drivers/md/dm.c @@ -37,8 +37,6 @@ static const char *_name = DM_NAME; static unsigned int major = 0; static unsigned int _major = 0; -static DEFINE_IDR(_minor_idr); - static DEFINE_SPINLOCK(_minor_lock); /* * For bio-based dm. @@ -111,7 +109,6 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); #define DMF_FREEING 3 #define DMF_DELETING 4 #define DMF_NOFLUSH_SUSPENDING 5 -#define DMF_MERGE_IS_OPTIONAL 6 /* * Work processed by per-device workqueue. @@ -316,12 +313,6 @@ static void __exit dm_exit(void) while (i--) _exits[i](); - - /* - * Should be empty by this point. - */ - idr_remove_all(&_minor_idr); - idr_destroy(&_minor_idr); } /* @@ -1180,8 +1171,7 @@ static int __clone_and_map_discard(struct clone_info *ci) /* * Even though the device advertised discard support, - * that does not mean every target supports it, and - * reconfiguration might also have changed that since the + * reconfiguration might have changed that since the * check was performed. */ if (!ti->num_discard_requests) @@ -1715,6 +1705,8 @@ static int dm_any_congested(void *congested_data, int bdi_bits) /*----------------------------------------------------------------- * An IDR is used to keep track of allocated minor numbers. *---------------------------------------------------------------*/ +static DEFINE_IDR(_minor_idr); + static void free_minor(int minor) { spin_lock(&_minor_lock); @@ -1808,6 +1800,7 @@ static void dm_init_md_queue(struct mapped_device *md) blk_queue_make_request(md->queue, dm_request); blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); blk_queue_merge_bvec(md->queue, dm_merge_bvec); + blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); } /* @@ -1992,59 +1985,6 @@ static void __set_size(struct mapped_device *md, sector_t size) i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); } -/* - * Return 1 if the queue has a compulsory merge_bvec_fn function. - * - * If this function returns 0, then the device is either a non-dm - * device without a merge_bvec_fn, or it is a dm device that is - * able to split any bios it receives that are too big. - */ -int dm_queue_merge_is_compulsory(struct request_queue *q) -{ - struct mapped_device *dev_md; - - if (!q->merge_bvec_fn) - return 0; - - if (q->make_request_fn == dm_request) { - dev_md = q->queuedata; - if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags)) - return 0; - } - - return 1; -} - -static int dm_device_merge_is_compulsory(struct dm_target *ti, - struct dm_dev *dev, sector_t start, - sector_t len, void *data) -{ - struct block_device *bdev = dev->bdev; - struct request_queue *q = bdev_get_queue(bdev); - - return dm_queue_merge_is_compulsory(q); -} - -/* - * Return 1 if it is acceptable to ignore merge_bvec_fn based - * on the properties of the underlying devices. - */ -static int dm_table_merge_is_optional(struct dm_table *table) -{ - unsigned i = 0; - struct dm_target *ti; - - while (i < dm_table_get_num_targets(table)) { - ti = dm_table_get_target(table, i++); - - if (ti->type->iterate_devices && - ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL)) - return 0; - } - - return 1; -} - /* * Returns old map, which caller must destroy. */ @@ -2055,7 +1995,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, struct request_queue *q = md->queue; sector_t size; unsigned long flags; - int merge_is_optional; size = dm_table_get_size(t); @@ -2081,16 +2020,10 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, __bind_mempools(md, t); - merge_is_optional = dm_table_merge_is_optional(t); - write_lock_irqsave(&md->map_lock, flags); old_map = md->map; md->map = t; dm_table_set_restrictions(t, q, limits); - if (merge_is_optional) - set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); - else - clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); write_unlock_irqrestore(&md->map_lock, flags); return old_map; diff --git a/trunk/drivers/md/dm.h b/trunk/drivers/md/dm.h index 6745dbd278a4..1aaf16746da8 100644 --- a/trunk/drivers/md/dm.h +++ b/trunk/drivers/md/dm.h @@ -66,8 +66,6 @@ int dm_table_alloc_md_mempools(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); -int dm_queue_merge_is_compulsory(struct request_queue *q); - void dm_lock_md_type(struct mapped_device *md); void dm_unlock_md_type(struct mapped_device *md); void dm_set_md_type(struct mapped_device *md, unsigned type); diff --git a/trunk/drivers/regulator/core.c b/trunk/drivers/regulator/core.c index d8e6a429e8ba..d3e38790906e 100644 --- a/trunk/drivers/regulator/core.c +++ b/trunk/drivers/regulator/core.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -34,8 +33,6 @@ #include "dummy.h" -#define rdev_crit(rdev, fmt, ...) \ - pr_crit("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__) #define rdev_err(rdev, fmt, ...) \ pr_err("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__) #define rdev_warn(rdev, fmt, ...) \ @@ -81,13 +78,11 @@ struct regulator { char *supply_name; struct device_attribute dev_attr; struct regulator_dev *rdev; -#ifdef CONFIG_DEBUG_FS - struct dentry *debugfs; -#endif }; static int _regulator_is_enabled(struct regulator_dev *rdev); -static int _regulator_disable(struct regulator_dev *rdev); +static int _regulator_disable(struct regulator_dev *rdev, + struct regulator_dev **supply_rdev_ptr); static int _regulator_get_voltage(struct regulator_dev *rdev); static int _regulator_get_current_limit(struct regulator_dev *rdev); static unsigned int _regulator_get_mode(struct regulator_dev *rdev); @@ -95,9 +90,6 @@ static void _notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data); static int _regulator_do_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV); -static struct regulator *create_regulator(struct regulator_dev *rdev, - struct device *dev, - const char *supply_name); static const char *rdev_get_name(struct regulator_dev *rdev) { @@ -151,11 +143,8 @@ static int regulator_check_voltage(struct regulator_dev *rdev, if (*min_uV < rdev->constraints->min_uV) *min_uV = rdev->constraints->min_uV; - if (*min_uV > *max_uV) { - rdev_err(rdev, "unsupportable voltage range: %d-%duV\n", - *min_uV, *max_uV); + if (*min_uV > *max_uV) return -EINVAL; - } return 0; } @@ -208,11 +197,8 @@ static int regulator_check_current_limit(struct regulator_dev *rdev, if (*min_uA < rdev->constraints->min_uA) *min_uA = rdev->constraints->min_uA; - if (*min_uA > *max_uA) { - rdev_err(rdev, "unsupportable current range: %d-%duA\n", - *min_uA, *max_uA); + if (*min_uA > *max_uA) return -EINVAL; - } return 0; } @@ -227,7 +213,6 @@ static int regulator_mode_constrain(struct regulator_dev *rdev, int *mode) case REGULATOR_MODE_STANDBY: break; default: - rdev_err(rdev, "invalid mode %x specified\n", *mode); return -EINVAL; } @@ -794,6 +779,7 @@ static int machine_constraints_voltage(struct regulator_dev *rdev, if (ret < 0) { rdev_err(rdev, "failed to apply %duV constraint\n", rdev->constraints->min_uV); + rdev->constraints = NULL; return ret; } } @@ -896,6 +882,7 @@ static int set_machine_constraints(struct regulator_dev *rdev, ret = suspend_prepare(rdev, rdev->constraints->initial_state); if (ret < 0) { rdev_err(rdev, "failed to set suspend state\n"); + rdev->constraints = NULL; goto out; } } @@ -922,15 +909,13 @@ static int set_machine_constraints(struct regulator_dev *rdev, ret = ops->enable(rdev); if (ret < 0) { rdev_err(rdev, "failed to enable\n"); + rdev->constraints = NULL; goto out; } } print_constraints(rdev); - return 0; out: - kfree(rdev->constraints); - rdev->constraints = NULL; return ret; } @@ -944,20 +929,21 @@ static int set_machine_constraints(struct regulator_dev *rdev, * core if it's child is enabled. */ static int set_supply(struct regulator_dev *rdev, - struct regulator_dev *supply_rdev) + struct regulator_dev *supply_rdev) { int err; - rdev_info(rdev, "supplied by %s\n", rdev_get_name(supply_rdev)); - - rdev->supply = create_regulator(supply_rdev, &rdev->dev, "SUPPLY"); - if (IS_ERR(rdev->supply)) { - err = PTR_ERR(rdev->supply); - rdev->supply = NULL; - return err; + err = sysfs_create_link(&rdev->dev.kobj, &supply_rdev->dev.kobj, + "supply"); + if (err) { + rdev_err(rdev, "could not add device link %s err %d\n", + supply_rdev->dev.kobj.name, err); + goto out; } - - return 0; + rdev->supply = supply_rdev; + list_add(&rdev->slist, &supply_rdev->supply_list); +out: + return err; } /** @@ -1046,7 +1032,7 @@ static void unset_regulator_supplies(struct regulator_dev *rdev) } } -#define REG_STR_SIZE 64 +#define REG_STR_SIZE 32 static struct regulator *create_regulator(struct regulator_dev *rdev, struct device *dev, @@ -1066,9 +1052,8 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, if (dev) { /* create a 'requested_microamps_name' sysfs entry */ - size = scnprintf(buf, REG_STR_SIZE, - "microamps_requested_%s-%s", - dev_name(dev), supply_name); + size = scnprintf(buf, REG_STR_SIZE, "microamps_requested_%s", + supply_name); if (size >= REG_STR_SIZE) goto overflow_err; @@ -1103,28 +1088,7 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, dev->kobj.name, err); goto link_name_err; } - } else { - regulator->supply_name = kstrdup(supply_name, GFP_KERNEL); - if (regulator->supply_name == NULL) - goto attr_err; - } - -#ifdef CONFIG_DEBUG_FS - regulator->debugfs = debugfs_create_dir(regulator->supply_name, - rdev->debugfs); - if (IS_ERR_OR_NULL(regulator->debugfs)) { - rdev_warn(rdev, "Failed to create debugfs directory\n"); - regulator->debugfs = NULL; - } else { - debugfs_create_u32("uA_load", 0444, regulator->debugfs, - ®ulator->uA_load); - debugfs_create_u32("min_uV", 0444, regulator->debugfs, - ®ulator->min_uV); - debugfs_create_u32("max_uV", 0444, regulator->debugfs, - ®ulator->max_uV); } -#endif - mutex_unlock(&rdev->mutex); return regulator; link_name_err: @@ -1303,17 +1267,13 @@ void regulator_put(struct regulator *regulator) mutex_lock(®ulator_list_mutex); rdev = regulator->rdev; -#ifdef CONFIG_DEBUG_FS - debugfs_remove_recursive(regulator->debugfs); -#endif - /* remove any sysfs entries */ if (regulator->dev) { sysfs_remove_link(&rdev->dev.kobj, regulator->supply_name); + kfree(regulator->supply_name); device_remove_file(regulator->dev, ®ulator->dev_attr); kfree(regulator->dev_attr.attr.name); } - kfree(regulator->supply_name); list_del(®ulator->list); kfree(regulator); @@ -1341,6 +1301,19 @@ static int _regulator_enable(struct regulator_dev *rdev) { int ret, delay; + if (rdev->use_count == 0) { + /* do we need to enable the supply regulator first */ + if (rdev->supply) { + mutex_lock(&rdev->supply->mutex); + ret = _regulator_enable(rdev->supply); + mutex_unlock(&rdev->supply->mutex); + if (ret < 0) { + rdev_err(rdev, "failed to enable: %d\n", ret); + return ret; + } + } + } + /* check voltage and requested load before enabling */ if (rdev->constraints && (rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_DRMS)) @@ -1415,27 +1388,19 @@ int regulator_enable(struct regulator *regulator) struct regulator_dev *rdev = regulator->rdev; int ret = 0; - if (rdev->supply) { - ret = regulator_enable(rdev->supply); - if (ret != 0) - return ret; - } - mutex_lock(&rdev->mutex); ret = _regulator_enable(rdev); mutex_unlock(&rdev->mutex); - - if (ret != 0) - regulator_disable(rdev->supply); - return ret; } EXPORT_SYMBOL_GPL(regulator_enable); /* locks held by regulator_disable() */ -static int _regulator_disable(struct regulator_dev *rdev) +static int _regulator_disable(struct regulator_dev *rdev, + struct regulator_dev **supply_rdev_ptr) { int ret = 0; + *supply_rdev_ptr = NULL; if (WARN(rdev->use_count <= 0, "unbalanced disables for %s\n", rdev_get_name(rdev))) @@ -1462,6 +1427,9 @@ static int _regulator_disable(struct regulator_dev *rdev) NULL); } + /* decrease our supplies ref count and disable if required */ + *supply_rdev_ptr = rdev->supply; + rdev->use_count = 0; } else if (rdev->use_count > 1) { @@ -1472,7 +1440,6 @@ static int _regulator_disable(struct regulator_dev *rdev) rdev->use_count--; } - return ret; } @@ -1491,21 +1458,29 @@ static int _regulator_disable(struct regulator_dev *rdev) int regulator_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; + struct regulator_dev *supply_rdev = NULL; int ret = 0; mutex_lock(&rdev->mutex); - ret = _regulator_disable(rdev); + ret = _regulator_disable(rdev, &supply_rdev); mutex_unlock(&rdev->mutex); - if (ret == 0 && rdev->supply) - regulator_disable(rdev->supply); + /* decrease our supplies ref count and disable if required */ + while (supply_rdev != NULL) { + rdev = supply_rdev; + + mutex_lock(&rdev->mutex); + _regulator_disable(rdev, &supply_rdev); + mutex_unlock(&rdev->mutex); + } return ret; } EXPORT_SYMBOL_GPL(regulator_disable); /* locks held by regulator_force_disable() */ -static int _regulator_force_disable(struct regulator_dev *rdev) +static int _regulator_force_disable(struct regulator_dev *rdev, + struct regulator_dev **supply_rdev_ptr) { int ret = 0; @@ -1522,6 +1497,10 @@ static int _regulator_force_disable(struct regulator_dev *rdev) REGULATOR_EVENT_DISABLE, NULL); } + /* decrease our supplies ref count and disable if required */ + *supply_rdev_ptr = rdev->supply; + + rdev->use_count = 0; return ret; } @@ -1537,16 +1516,16 @@ static int _regulator_force_disable(struct regulator_dev *rdev) int regulator_force_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; + struct regulator_dev *supply_rdev = NULL; int ret; mutex_lock(&rdev->mutex); regulator->uA_load = 0; - ret = _regulator_force_disable(regulator->rdev); + ret = _regulator_force_disable(rdev, &supply_rdev); mutex_unlock(&rdev->mutex); - if (rdev->supply) - while (rdev->open_count--) - regulator_disable(rdev->supply); + if (supply_rdev) + regulator_disable(get_device_regulator(rdev_get_dev(supply_rdev))); return ret; } @@ -2157,7 +2136,7 @@ int regulator_set_optimum_mode(struct regulator *regulator, int uA_load) /* get input voltage */ input_uV = 0; if (rdev->supply) - input_uV = regulator_get_voltage(rdev->supply); + input_uV = _regulator_get_voltage(rdev->supply); if (input_uV <= 0) input_uV = rdev->constraints->input_uV; if (input_uV <= 0) { @@ -2227,8 +2206,17 @@ EXPORT_SYMBOL_GPL(regulator_unregister_notifier); static void _notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data) { + struct regulator_dev *_rdev; + /* call rdev chain first */ blocking_notifier_call_chain(&rdev->notifier, event, NULL); + + /* now notify regulator we supply */ + list_for_each_entry(_rdev, &rdev->supply_list, slist) { + mutex_lock(&_rdev->mutex); + _notifier_call_chain(_rdev, event, data); + mutex_unlock(&_rdev->mutex); + } } /** @@ -2276,13 +2264,6 @@ int regulator_bulk_get(struct device *dev, int num_consumers, } EXPORT_SYMBOL_GPL(regulator_bulk_get); -static void regulator_bulk_enable_async(void *data, async_cookie_t cookie) -{ - struct regulator_bulk_data *bulk = data; - - bulk->ret = regulator_enable(bulk->consumer); -} - /** * regulator_bulk_enable - enable multiple regulator consumers * @@ -2298,33 +2279,21 @@ static void regulator_bulk_enable_async(void *data, async_cookie_t cookie) int regulator_bulk_enable(int num_consumers, struct regulator_bulk_data *consumers) { - LIST_HEAD(async_domain); int i; - int ret = 0; - - for (i = 0; i < num_consumers; i++) - async_schedule_domain(regulator_bulk_enable_async, - &consumers[i], &async_domain); - - async_synchronize_full_domain(&async_domain); + int ret; - /* If any consumer failed we need to unwind any that succeeded */ for (i = 0; i < num_consumers; i++) { - if (consumers[i].ret != 0) { - ret = consumers[i].ret; + ret = regulator_enable(consumers[i].consumer); + if (ret != 0) goto err; - } } return 0; err: - for (i = 0; i < num_consumers; i++) - if (consumers[i].ret == 0) - regulator_disable(consumers[i].consumer); - else - pr_err("Failed to enable %s: %d\n", - consumers[i].supply, consumers[i].ret); + pr_err("Failed to enable %s: %d\n", consumers[i].supply, ret); + for (--i; i >= 0; --i) + regulator_disable(consumers[i].consumer); return ret; } @@ -2620,7 +2589,9 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc, rdev->owner = regulator_desc->owner; rdev->desc = regulator_desc; INIT_LIST_HEAD(&rdev->consumer_list); + INIT_LIST_HEAD(&rdev->supply_list); INIT_LIST_HEAD(&rdev->list); + INIT_LIST_HEAD(&rdev->slist); BLOCKING_INIT_NOTIFIER_HEAD(&rdev->notifier); /* preform any regulator specific init */ @@ -2701,7 +2672,6 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc, unset_regulator_supplies(rdev); scrub: - kfree(rdev->constraints); device_unregister(&rdev->dev); /* device core frees rdev */ rdev = ERR_PTR(ret); @@ -2733,7 +2703,7 @@ void regulator_unregister(struct regulator_dev *rdev) unset_regulator_supplies(rdev); list_del(&rdev->list); if (rdev->supply) - regulator_put(rdev->supply); + sysfs_remove_link(&rdev->dev.kobj, "supply"); device_unregister(&rdev->dev); kfree(rdev->constraints); mutex_unlock(®ulator_list_mutex); diff --git a/trunk/drivers/regulator/dummy.c b/trunk/drivers/regulator/dummy.c index f6ef6694ab98..c7410bde7b5d 100644 --- a/trunk/drivers/regulator/dummy.c +++ b/trunk/drivers/regulator/dummy.c @@ -36,29 +36,6 @@ static struct regulator_desc dummy_desc = { .ops = &dummy_ops, }; -static int __devinit dummy_regulator_probe(struct platform_device *pdev) -{ - int ret; - - dummy_regulator_rdev = regulator_register(&dummy_desc, NULL, - &dummy_initdata, NULL); - if (IS_ERR(dummy_regulator_rdev)) { - ret = PTR_ERR(dummy_regulator_rdev); - pr_err("Failed to register regulator: %d\n", ret); - return ret; - } - - return 0; -} - -static struct platform_driver dummy_regulator_driver = { - .probe = dummy_regulator_probe, - .driver = { - .name = "reg-dummy", - .owner = THIS_MODULE, - }, -}; - static struct platform_device *dummy_pdev; void __init regulator_dummy_init(void) @@ -78,9 +55,12 @@ void __init regulator_dummy_init(void) return; } - ret = platform_driver_register(&dummy_regulator_driver); - if (ret != 0) { - pr_err("Failed to register dummy regulator driver: %d\n", ret); + dummy_regulator_rdev = regulator_register(&dummy_desc, NULL, + &dummy_initdata, NULL); + if (IS_ERR(dummy_regulator_rdev)) { + ret = PTR_ERR(dummy_regulator_rdev); + pr_err("Failed to register regulator: %d\n", ret); platform_device_unregister(dummy_pdev); + return; } } diff --git a/trunk/drivers/regulator/tps65910-regulator.c b/trunk/drivers/regulator/tps65910-regulator.c index 66d2d60b436a..55dd4e6650db 100644 --- a/trunk/drivers/regulator/tps65910-regulator.c +++ b/trunk/drivers/regulator/tps65910-regulator.c @@ -49,6 +49,7 @@ #define TPS65911_REG_LDO7 11 #define TPS65911_REG_LDO8 12 +#define TPS65910_NUM_REGULATOR 13 #define TPS65910_SUPPLY_STATE_ENABLED 0x1 /* supported VIO voltages in milivolts */ @@ -263,12 +264,11 @@ static struct tps_info tps65911_regs[] = { }; struct tps65910_reg { - struct regulator_desc *desc; + struct regulator_desc desc[TPS65910_NUM_REGULATOR]; struct tps65910 *mfd; - struct regulator_dev **rdev; - struct tps_info **info; + struct regulator_dev *rdev[TPS65910_NUM_REGULATOR]; + struct tps_info *info[TPS65910_NUM_REGULATOR]; struct mutex mutex; - int num_regulators; int mode; int (*get_ctrl_reg)(int); }; @@ -759,13 +759,8 @@ static int tps65910_list_voltage_dcdc(struct regulator_dev *dev, mult = (selector / VDD1_2_NUM_VOLTS) + 1; volt = VDD1_2_MIN_VOLT + (selector % VDD1_2_NUM_VOLTS) * VDD1_2_OFFSET; - break; case TPS65911_REG_VDDCTRL: volt = VDDCTRL_MIN_VOLT + (selector * VDDCTRL_OFFSET); - break; - default: - BUG(); - return -EINVAL; } return volt * 100 * mult; @@ -902,42 +897,16 @@ static __devinit int tps65910_probe(struct platform_device *pdev) switch(tps65910_chip_id(tps65910)) { case TPS65910: pmic->get_ctrl_reg = &tps65910_get_ctrl_register; - pmic->num_regulators = ARRAY_SIZE(tps65910_regs); info = tps65910_regs; - break; case TPS65911: pmic->get_ctrl_reg = &tps65911_get_ctrl_register; - pmic->num_regulators = ARRAY_SIZE(tps65911_regs); info = tps65911_regs; - break; default: pr_err("Invalid tps chip version\n"); - kfree(pmic); return -ENODEV; } - pmic->desc = kcalloc(pmic->num_regulators, - sizeof(struct regulator_desc), GFP_KERNEL); - if (!pmic->desc) { - err = -ENOMEM; - goto err_free_pmic; - } - - pmic->info = kcalloc(pmic->num_regulators, - sizeof(struct tps_info *), GFP_KERNEL); - if (!pmic->info) { - err = -ENOMEM; - goto err_free_desc; - } - - pmic->rdev = kcalloc(pmic->num_regulators, - sizeof(struct regulator_dev *), GFP_KERNEL); - if (!pmic->rdev) { - err = -ENOMEM; - goto err_free_info; - } - - for (i = 0; i < pmic->num_regulators; i++, info++, reg_data++) { + for (i = 0; i < TPS65910_NUM_REGULATOR; i++, info++, reg_data++) { /* Register the regulators */ pmic->info[i] = info; @@ -969,7 +938,7 @@ static __devinit int tps65910_probe(struct platform_device *pdev) "failed to register %s regulator\n", pdev->name); err = PTR_ERR(rdev); - goto err_unregister_regulator; + goto err; } /* Save regulator for cleanup */ @@ -977,31 +946,23 @@ static __devinit int tps65910_probe(struct platform_device *pdev) } return 0; -err_unregister_regulator: +err: while (--i >= 0) regulator_unregister(pmic->rdev[i]); - kfree(pmic->rdev); -err_free_info: - kfree(pmic->info); -err_free_desc: - kfree(pmic->desc); -err_free_pmic: + kfree(pmic); return err; } static int __devexit tps65910_remove(struct platform_device *pdev) { - struct tps65910_reg *pmic = platform_get_drvdata(pdev); + struct tps65910_reg *tps65910_reg = platform_get_drvdata(pdev); int i; - for (i = 0; i < pmic->num_regulators; i++) - regulator_unregister(pmic->rdev[i]); + for (i = 0; i < TPS65910_NUM_REGULATOR; i++) + regulator_unregister(tps65910_reg->rdev[i]); - kfree(pmic->rdev); - kfree(pmic->info); - kfree(pmic->desc); - kfree(pmic); + kfree(tps65910_reg); return 0; } diff --git a/trunk/drivers/regulator/twl-regulator.c b/trunk/drivers/regulator/twl-regulator.c index ee8747f4fa08..87fe0f75a56e 100644 --- a/trunk/drivers/regulator/twl-regulator.c +++ b/trunk/drivers/regulator/twl-regulator.c @@ -835,8 +835,8 @@ static struct regulator_ops twlsmps_ops = { remap_conf) \ TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \ remap_conf, TWL4030, twl4030fixed_ops) -#define TWL6030_FIXED_LDO(label, offset, mVolts, turnon_delay) \ - TWL_FIXED_LDO(label, offset, mVolts, 0x0, turnon_delay, \ +#define TWL6030_FIXED_LDO(label, offset, mVolts, num, turnon_delay) \ + TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \ 0x0, TWL6030, twl6030fixed_ops) #define TWL4030_ADJUSTABLE_LDO(label, offset, num, turnon_delay, remap_conf) { \ @@ -856,22 +856,24 @@ static struct regulator_ops twlsmps_ops = { }, \ } -#define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) { \ +#define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts, num) { \ .base = offset, \ + .id = num, \ .min_mV = min_mVolts, \ .max_mV = max_mVolts, \ .desc = { \ .name = #label, \ .id = TWL6030_REG_##label, \ - .n_voltages = (max_mVolts - min_mVolts)/100 + 1, \ + .n_voltages = (max_mVolts - min_mVolts)/100, \ .ops = &twl6030ldo_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ }, \ } -#define TWL6025_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) { \ +#define TWL6025_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts, num) { \ .base = offset, \ + .id = num, \ .min_mV = min_mVolts, \ .max_mV = max_mVolts, \ .desc = { \ @@ -901,8 +903,9 @@ static struct regulator_ops twlsmps_ops = { }, \ } -#define TWL6030_FIXED_RESOURCE(label, offset, turnon_delay) { \ +#define TWL6030_FIXED_RESOURCE(label, offset, num, turnon_delay) { \ .base = offset, \ + .id = num, \ .delay = turnon_delay, \ .desc = { \ .name = #label, \ @@ -913,8 +916,9 @@ static struct regulator_ops twlsmps_ops = { }, \ } -#define TWL6025_ADJUSTABLE_SMPS(label, offset) { \ +#define TWL6025_ADJUSTABLE_SMPS(label, offset, num) { \ .base = offset, \ + .id = num, \ .min_mV = 600, \ .max_mV = 2100, \ .desc = { \ @@ -957,32 +961,32 @@ static struct twlreg_info twl_regs[] = { /* 6030 REG with base as PMC Slave Misc : 0x0030 */ /* Turnon-delay and remap configuration values for 6030 are not verified since the specification is not public */ - TWL6030_ADJUSTABLE_LDO(VAUX1_6030, 0x54, 1000, 3300), - TWL6030_ADJUSTABLE_LDO(VAUX2_6030, 0x58, 1000, 3300), - TWL6030_ADJUSTABLE_LDO(VAUX3_6030, 0x5c, 1000, 3300), - TWL6030_ADJUSTABLE_LDO(VMMC, 0x68, 1000, 3300), - TWL6030_ADJUSTABLE_LDO(VPP, 0x6c, 1000, 3300), - TWL6030_ADJUSTABLE_LDO(VUSIM, 0x74, 1000, 3300), - TWL6030_FIXED_LDO(VANA, 0x50, 2100, 0), - TWL6030_FIXED_LDO(VCXIO, 0x60, 1800, 0), - TWL6030_FIXED_LDO(VDAC, 0x64, 1800, 0), - TWL6030_FIXED_LDO(VUSB, 0x70, 3300, 0), - TWL6030_FIXED_RESOURCE(CLK32KG, 0x8C, 0), + TWL6030_ADJUSTABLE_LDO(VAUX1_6030, 0x54, 1000, 3300, 1), + TWL6030_ADJUSTABLE_LDO(VAUX2_6030, 0x58, 1000, 3300, 2), + TWL6030_ADJUSTABLE_LDO(VAUX3_6030, 0x5c, 1000, 3300, 3), + TWL6030_ADJUSTABLE_LDO(VMMC, 0x68, 1000, 3300, 4), + TWL6030_ADJUSTABLE_LDO(VPP, 0x6c, 1000, 3300, 5), + TWL6030_ADJUSTABLE_LDO(VUSIM, 0x74, 1000, 3300, 7), + TWL6030_FIXED_LDO(VANA, 0x50, 2100, 15, 0), + TWL6030_FIXED_LDO(VCXIO, 0x60, 1800, 16, 0), + TWL6030_FIXED_LDO(VDAC, 0x64, 1800, 17, 0), + TWL6030_FIXED_LDO(VUSB, 0x70, 3300, 18, 0), + TWL6030_FIXED_RESOURCE(CLK32KG, 0x8C, 48, 0), /* 6025 are renamed compared to 6030 versions */ - TWL6025_ADJUSTABLE_LDO(LDO2, 0x54, 1000, 3300), - TWL6025_ADJUSTABLE_LDO(LDO4, 0x58, 1000, 3300), - TWL6025_ADJUSTABLE_LDO(LDO3, 0x5c, 1000, 3300), - TWL6025_ADJUSTABLE_LDO(LDO5, 0x68, 1000, 3300), - TWL6025_ADJUSTABLE_LDO(LDO1, 0x6c, 1000, 3300), - TWL6025_ADJUSTABLE_LDO(LDO7, 0x74, 1000, 3300), - TWL6025_ADJUSTABLE_LDO(LDO6, 0x60, 1000, 3300), - TWL6025_ADJUSTABLE_LDO(LDOLN, 0x64, 1000, 3300), - TWL6025_ADJUSTABLE_LDO(LDOUSB, 0x70, 1000, 3300), - - TWL6025_ADJUSTABLE_SMPS(SMPS3, 0x34), - TWL6025_ADJUSTABLE_SMPS(SMPS4, 0x10), - TWL6025_ADJUSTABLE_SMPS(VIO, 0x16), + TWL6025_ADJUSTABLE_LDO(LDO2, 0x54, 1000, 3300, 1), + TWL6025_ADJUSTABLE_LDO(LDO4, 0x58, 1000, 3300, 2), + TWL6025_ADJUSTABLE_LDO(LDO3, 0x5c, 1000, 3300, 3), + TWL6025_ADJUSTABLE_LDO(LDO5, 0x68, 1000, 3300, 4), + TWL6025_ADJUSTABLE_LDO(LDO1, 0x6c, 1000, 3300, 5), + TWL6025_ADJUSTABLE_LDO(LDO7, 0x74, 1000, 3300, 7), + TWL6025_ADJUSTABLE_LDO(LDO6, 0x60, 1000, 3300, 16), + TWL6025_ADJUSTABLE_LDO(LDOLN, 0x64, 1000, 3300, 17), + TWL6025_ADJUSTABLE_LDO(LDOUSB, 0x70, 1000, 3300, 18), + + TWL6025_ADJUSTABLE_SMPS(SMPS3, 0x34, 1), + TWL6025_ADJUSTABLE_SMPS(SMPS4, 0x10, 2), + TWL6025_ADJUSTABLE_SMPS(VIO, 0x16, 3), }; static u8 twl_get_smps_offset(void) diff --git a/trunk/drivers/regulator/wm831x-dcdc.c b/trunk/drivers/regulator/wm831x-dcdc.c index bd3531d8b2ac..a0982e809851 100644 --- a/trunk/drivers/regulator/wm831x-dcdc.c +++ b/trunk/drivers/regulator/wm831x-dcdc.c @@ -267,6 +267,23 @@ static int wm831x_buckv_select_min_voltage(struct regulator_dev *rdev, return vsel; } +static int wm831x_buckv_select_max_voltage(struct regulator_dev *rdev, + int min_uV, int max_uV) +{ + u16 vsel; + + if (max_uV < 600000 || max_uV > 1800000) + return -EINVAL; + + vsel = ((max_uV - 600000) / 12500) + 8; + + if (wm831x_buckv_list_voltage(rdev, vsel) < min_uV || + wm831x_buckv_list_voltage(rdev, vsel) < max_uV) + return -EINVAL; + + return vsel; +} + static int wm831x_buckv_set_dvs(struct regulator_dev *rdev, int state) { struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev); @@ -321,23 +338,28 @@ static int wm831x_buckv_set_voltage(struct regulator_dev *rdev, if (ret < 0) return ret; - /* - * If this VSEL is higher than the last one we've seen then - * remember it as the DVS VSEL. This is optimised for CPUfreq - * usage where we want to get to the highest voltage very - * quickly. - */ - if (vsel > dcdc->dvs_vsel) { - ret = wm831x_set_bits(wm831x, dvs_reg, - WM831X_DC1_DVS_VSEL_MASK, - dcdc->dvs_vsel); - if (ret == 0) - dcdc->dvs_vsel = vsel; - else - dev_warn(wm831x->dev, - "Failed to set DCDC DVS VSEL: %d\n", ret); + /* Set the high voltage as the DVS voltage. This is optimised + * for CPUfreq usage, most processors will keep the maximum + * voltage constant and lower the minimum with the frequency. */ + vsel = wm831x_buckv_select_max_voltage(rdev, min_uV, max_uV); + if (vsel < 0) { + /* This should never happen - at worst the same vsel + * should be chosen */ + WARN_ON(vsel < 0); + return 0; } + /* Don't bother if it's the same VSEL we're already using */ + if (vsel == dcdc->on_vsel) + return 0; + + ret = wm831x_set_bits(wm831x, dvs_reg, WM831X_DC1_DVS_VSEL_MASK, vsel); + if (ret == 0) + dcdc->dvs_vsel = vsel; + else + dev_warn(wm831x->dev, "Failed to set DCDC DVS VSEL: %d\n", + ret); + return 0; } @@ -434,6 +456,27 @@ static __devinit void wm831x_buckv_dvs_init(struct wm831x_dcdc *dcdc, if (!pdata || !pdata->dvs_gpio) return; + switch (pdata->dvs_control_src) { + case 1: + ctrl = 2 << WM831X_DC1_DVS_SRC_SHIFT; + break; + case 2: + ctrl = 3 << WM831X_DC1_DVS_SRC_SHIFT; + break; + default: + dev_err(wm831x->dev, "Invalid DVS control source %d for %s\n", + pdata->dvs_control_src, dcdc->name); + return; + } + + ret = wm831x_set_bits(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL, + WM831X_DC1_DVS_SRC_MASK, ctrl); + if (ret < 0) { + dev_err(wm831x->dev, "Failed to set %s DVS source: %d\n", + dcdc->name, ret); + return; + } + ret = gpio_request(pdata->dvs_gpio, "DCDC DVS"); if (ret < 0) { dev_err(wm831x->dev, "Failed to get %s DVS GPIO: %d\n", @@ -455,57 +498,17 @@ static __devinit void wm831x_buckv_dvs_init(struct wm831x_dcdc *dcdc, } dcdc->dvs_gpio = pdata->dvs_gpio; - - switch (pdata->dvs_control_src) { - case 1: - ctrl = 2 << WM831X_DC1_DVS_SRC_SHIFT; - break; - case 2: - ctrl = 3 << WM831X_DC1_DVS_SRC_SHIFT; - break; - default: - dev_err(wm831x->dev, "Invalid DVS control source %d for %s\n", - pdata->dvs_control_src, dcdc->name); - return; - } - - /* If DVS_VSEL is set to the minimum value then raise it to ON_VSEL - * to make bootstrapping a bit smoother. - */ - if (!dcdc->dvs_vsel) { - ret = wm831x_set_bits(wm831x, - dcdc->base + WM831X_DCDC_DVS_CONTROL, - WM831X_DC1_DVS_VSEL_MASK, dcdc->on_vsel); - if (ret == 0) - dcdc->dvs_vsel = dcdc->on_vsel; - else - dev_warn(wm831x->dev, "Failed to set DVS_VSEL: %d\n", - ret); - } - - ret = wm831x_set_bits(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL, - WM831X_DC1_DVS_SRC_MASK, ctrl); - if (ret < 0) { - dev_err(wm831x->dev, "Failed to set %s DVS source: %d\n", - dcdc->name, ret); - } } static __devinit int wm831x_buckv_probe(struct platform_device *pdev) { struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); struct wm831x_pdata *pdata = wm831x->dev->platform_data; - int id; + int id = pdev->id % ARRAY_SIZE(pdata->dcdc); struct wm831x_dcdc *dcdc; struct resource *res; int ret, irq; - if (pdata && pdata->wm831x_num) - id = (pdata->wm831x_num * 10) + 1; - else - id = 0; - id = pdev->id - id; - dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1); if (pdata == NULL || pdata->dcdc[id] == NULL) @@ -542,7 +545,7 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev) } dcdc->on_vsel = ret & WM831X_DC1_ON_VSEL_MASK; - ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL); + ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_ON_CONFIG); if (ret < 0) { dev_err(wm831x->dev, "Failed to read DVS VSEL: %d\n", ret); goto err; @@ -706,17 +709,11 @@ static __devinit int wm831x_buckp_probe(struct platform_device *pdev) { struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); struct wm831x_pdata *pdata = wm831x->dev->platform_data; - int id; + int id = pdev->id % ARRAY_SIZE(pdata->dcdc); struct wm831x_dcdc *dcdc; struct resource *res; int ret, irq; - if (pdata && pdata->wm831x_num) - id = (pdata->wm831x_num * 10) + 1; - else - id = 0; - id = pdev->id - id; - dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1); if (pdata == NULL || pdata->dcdc[id] == NULL) @@ -1049,4 +1046,3 @@ MODULE_DESCRIPTION("WM831x DC-DC convertor driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:wm831x-buckv"); MODULE_ALIAS("platform:wm831x-buckp"); -MODULE_ALIAS("platform:wm831x-epe"); diff --git a/trunk/drivers/regulator/wm831x-ldo.c b/trunk/drivers/regulator/wm831x-ldo.c index 6709710a059e..2220cf8defb1 100644 --- a/trunk/drivers/regulator/wm831x-ldo.c +++ b/trunk/drivers/regulator/wm831x-ldo.c @@ -310,17 +310,11 @@ static __devinit int wm831x_gp_ldo_probe(struct platform_device *pdev) { struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); struct wm831x_pdata *pdata = wm831x->dev->platform_data; - int id; + int id = pdev->id % ARRAY_SIZE(pdata->ldo); struct wm831x_ldo *ldo; struct resource *res; int ret, irq; - if (pdata && pdata->wm831x_num) - id = (pdata->wm831x_num * 10) + 1; - else - id = 0; - id = pdev->id - id; - dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1); if (pdata == NULL || pdata->ldo[id] == NULL) @@ -580,17 +574,11 @@ static __devinit int wm831x_aldo_probe(struct platform_device *pdev) { struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); struct wm831x_pdata *pdata = wm831x->dev->platform_data; - int id; + int id = pdev->id % ARRAY_SIZE(pdata->ldo); struct wm831x_ldo *ldo; struct resource *res; int ret, irq; - if (pdata && pdata->wm831x_num) - id = (pdata->wm831x_num * 10) + 1; - else - id = 0; - id = pdev->id - id; - dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1); if (pdata == NULL || pdata->ldo[id] == NULL) @@ -776,18 +764,11 @@ static __devinit int wm831x_alive_ldo_probe(struct platform_device *pdev) { struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); struct wm831x_pdata *pdata = wm831x->dev->platform_data; - int id; + int id = pdev->id % ARRAY_SIZE(pdata->ldo); struct wm831x_ldo *ldo; struct resource *res; int ret; - if (pdata && pdata->wm831x_num) - id = (pdata->wm831x_num * 10) + 1; - else - id = 0; - id = pdev->id - id; - - dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1); if (pdata == NULL || pdata->ldo[id] == NULL) diff --git a/trunk/drivers/regulator/wm8994-regulator.c b/trunk/drivers/regulator/wm8994-regulator.c index 1a6a690f24db..35b2958d5106 100644 --- a/trunk/drivers/regulator/wm8994-regulator.c +++ b/trunk/drivers/regulator/wm8994-regulator.c @@ -43,7 +43,7 @@ static int wm8994_ldo_enable(struct regulator_dev *rdev) if (!ldo->enable) return 0; - gpio_set_value_cansleep(ldo->enable, 1); + gpio_set_value(ldo->enable, 1); ldo->is_enabled = true; return 0; @@ -57,7 +57,7 @@ static int wm8994_ldo_disable(struct regulator_dev *rdev) if (!ldo->enable) return -EINVAL; - gpio_set_value_cansleep(ldo->enable, 0); + gpio_set_value(ldo->enable, 0); ldo->is_enabled = false; return 0; diff --git a/trunk/drivers/spi/spi-pl022.c b/trunk/drivers/spi/spi-pl022.c index eba88c749fb1..730b4a37b823 100644 --- a/trunk/drivers/spi/spi-pl022.c +++ b/trunk/drivers/spi/spi-pl022.c @@ -2267,17 +2267,13 @@ static int __devexit pl022_remove(struct amba_device *adev) { struct pl022 *pl022 = amba_get_drvdata(adev); - int status = 0; + if (!pl022) return 0; /* Remove the queue */ - status = destroy_queue(pl022); - if (status != 0) { - dev_err(&adev->dev, - "queue remove failed (%d)\n", status); - return status; - } + if (destroy_queue(pl022) != 0) + dev_err(&adev->dev, "queue remove failed\n"); load_ssp_default_config(pl022); pl022_dma_remove(pl022); free_irq(adev->irq[0], pl022); @@ -2289,7 +2285,6 @@ pl022_remove(struct amba_device *adev) spi_unregister_master(pl022->master); spi_master_put(pl022->master); amba_set_drvdata(adev, NULL); - dev_dbg(&adev->dev, "remove succeeded\n"); return 0; } diff --git a/trunk/drivers/watchdog/Kconfig b/trunk/drivers/watchdog/Kconfig index 86b0735e6aa0..f441726ddf2b 100644 --- a/trunk/drivers/watchdog/Kconfig +++ b/trunk/drivers/watchdog/Kconfig @@ -36,6 +36,9 @@ config WATCHDOG_CORE and gives them the /dev/watchdog interface (and later also the sysfs interface). + To compile this driver as a module, choose M here: the module will + be called watchdog. + config WATCHDOG_NOWAYOUT bool "Disable watchdog shutdown on close" help diff --git a/trunk/drivers/watchdog/nv_tco.c b/trunk/drivers/watchdog/nv_tco.c index 809f41c30c44..afa78a54711e 100644 --- a/trunk/drivers/watchdog/nv_tco.c +++ b/trunk/drivers/watchdog/nv_tco.c @@ -458,15 +458,7 @@ static int __devexit nv_tco_remove(struct platform_device *dev) static void nv_tco_shutdown(struct platform_device *dev) { - u32 val; - tco_timer_stop(); - - /* Some BIOSes fail the POST (once) if the NO_REBOOT flag is not - * unset during shutdown. */ - pci_read_config_dword(tco_pci, MCP51_SMBUS_SETUP_B, &val); - val &= ~MCP51_SMBUS_SETUP_B_TCO_REBOOT; - pci_write_config_dword(tco_pci, MCP51_SMBUS_SETUP_B, val); } static struct platform_driver nv_tco_driver = { diff --git a/trunk/drivers/watchdog/shwdt.c b/trunk/drivers/watchdog/shwdt.c index a267dc078daf..db84f2322d1a 100644 --- a/trunk/drivers/watchdog/shwdt.c +++ b/trunk/drivers/watchdog/shwdt.c @@ -64,7 +64,7 @@ * misses its deadline, the kernel timer will allow the WDT to overflow. */ static int clock_division_ratio = WTCSR_CKS_4096; -#define next_ping_period(cks) (jiffies + msecs_to_jiffies(cks - 4)) +#define next_ping_period(cks) msecs_to_jiffies(cks - 4) static const struct watchdog_info sh_wdt_info; static struct platform_device *sh_wdt_dev; diff --git a/trunk/fs/9p/acl.c b/trunk/fs/9p/acl.c index 9a1d42630751..e9cb57f07546 100644 --- a/trunk/fs/9p/acl.c +++ b/trunk/fs/9p/acl.c @@ -182,11 +182,11 @@ int v9fs_set_create_acl(struct dentry *dentry, return 0; } -int v9fs_acl_mode(struct inode *dir, umode_t *modep, +int v9fs_acl_mode(struct inode *dir, mode_t *modep, struct posix_acl **dpacl, struct posix_acl **pacl) { int retval = 0; - umode_t mode = *modep; + mode_t mode = *modep; struct posix_acl *acl = NULL; if (!S_ISLNK(mode)) { @@ -319,7 +319,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; if (acl) { - umode_t mode = inode->i_mode; + mode_t mode = inode->i_mode; retval = posix_acl_equiv_mode(acl, &mode); if (retval < 0) goto err_out; diff --git a/trunk/fs/9p/acl.h b/trunk/fs/9p/acl.h index 559556411965..ddb7ae19d971 100644 --- a/trunk/fs/9p/acl.h +++ b/trunk/fs/9p/acl.h @@ -20,7 +20,7 @@ extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type); extern int v9fs_acl_chmod(struct dentry *); extern int v9fs_set_create_acl(struct dentry *, struct posix_acl **, struct posix_acl **); -extern int v9fs_acl_mode(struct inode *dir, umode_t *modep, +extern int v9fs_acl_mode(struct inode *dir, mode_t *modep, struct posix_acl **dpacl, struct posix_acl **pacl); #else #define v9fs_iop_get_acl NULL @@ -38,7 +38,7 @@ static inline int v9fs_set_create_acl(struct dentry *dentry, { return 0; } -static inline int v9fs_acl_mode(struct inode *dir, umode_t *modep, +static inline int v9fs_acl_mode(struct inode *dir, mode_t *modep, struct posix_acl **dpacl, struct posix_acl **pacl) { diff --git a/trunk/fs/9p/vfs_inode_dotl.c b/trunk/fs/9p/vfs_inode_dotl.c index b6c8ed205192..9a26dce5a99f 100644 --- a/trunk/fs/9p/vfs_inode_dotl.c +++ b/trunk/fs/9p/vfs_inode_dotl.c @@ -206,7 +206,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, int err = 0; gid_t gid; int flags; - umode_t mode; + mode_t mode; char *name = NULL; struct file *filp; struct p9_qid qid; @@ -348,7 +348,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct p9_fid *fid = NULL, *dfid = NULL; gid_t gid; char *name; - umode_t mode; + mode_t mode; struct inode *inode; struct p9_qid qid; struct dentry *dir_dentry; @@ -751,7 +751,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, int err; gid_t gid; char *name; - umode_t mode; + mode_t mode; struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL, *dfid = NULL; struct inode *inode; diff --git a/trunk/fs/block_dev.c b/trunk/fs/block_dev.c index f28680553288..f55aad4d1611 100644 --- a/trunk/fs/block_dev.c +++ b/trunk/fs/block_dev.c @@ -552,7 +552,6 @@ struct block_device *bdget(dev_t dev) if (inode->i_state & I_NEW) { bdev->bd_contains = NULL; - bdev->bd_super = NULL; bdev->bd_inode = inode; bdev->bd_block_size = (1 << inode->i_blkbits); bdev->bd_part_count = 0; diff --git a/trunk/fs/btrfs/acl.c b/trunk/fs/btrfs/acl.c index 4cc5c0164ed6..65a735d8f6e4 100644 --- a/trunk/fs/btrfs/acl.c +++ b/trunk/fs/btrfs/acl.c @@ -111,6 +111,7 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, int ret, size = 0; const char *name; char *value = NULL; + mode_t mode; if (acl) { ret = posix_acl_valid(acl); @@ -121,11 +122,13 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, switch (type) { case ACL_TYPE_ACCESS: + mode = inode->i_mode; name = POSIX_ACL_XATTR_ACCESS; if (acl) { - ret = posix_acl_equiv_mode(acl, &inode->i_mode); + ret = posix_acl_equiv_mode(acl, &mode); if (ret < 0) return ret; + inode->i_mode = mode; } ret = 0; break; @@ -219,16 +222,19 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, } if (IS_POSIXACL(dir) && acl) { + mode_t mode = inode->i_mode; + if (S_ISDIR(inode->i_mode)) { ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_DEFAULT); if (ret) goto failed; } - ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); + ret = posix_acl_create(&acl, GFP_NOFS, &mode); if (ret < 0) return ret; + inode->i_mode = mode; if (ret > 0) { /* we need an acl */ ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); diff --git a/trunk/fs/btrfs/inode.c b/trunk/fs/btrfs/inode.c index ae762dab37f8..13e6255182e3 100644 --- a/trunk/fs/btrfs/inode.c +++ b/trunk/fs/btrfs/inode.c @@ -3993,19 +3993,12 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) struct btrfs_root *sub_root = root; struct btrfs_key location; int index; - int ret = 0; + int ret; if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - if (unlikely(d_need_lookup(dentry))) { - memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key)); - kfree(dentry->d_fsdata); - dentry->d_fsdata = NULL; - d_clear_need_lookup(dentry); - } else { - ret = btrfs_inode_by_name(dir, dentry, &location); - } + ret = btrfs_inode_by_name(dir, dentry, &location); if (ret < 0) return ERR_PTR(ret); @@ -4060,12 +4053,6 @@ static int btrfs_dentry_delete(const struct dentry *dentry) return 0; } -static void btrfs_dentry_release(struct dentry *dentry) -{ - if (dentry->d_fsdata) - kfree(dentry->d_fsdata); -} - static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { @@ -4088,7 +4075,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, struct btrfs_path *path; struct list_head ins_list; struct list_head del_list; - struct qstr q; int ret; struct extent_buffer *leaf; int slot; @@ -4178,7 +4164,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, while (di_cur < di_total) { struct btrfs_key location; - struct dentry *tmp; if (verify_dir_item(root, leaf, di)) break; @@ -4199,33 +4184,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; btrfs_dir_item_key_to_cpu(leaf, di, &location); - q.name = name_ptr; - q.len = name_len; - q.hash = full_name_hash(q.name, q.len); - tmp = d_lookup(filp->f_dentry, &q); - if (!tmp) { - struct btrfs_key *newkey; - - newkey = kzalloc(sizeof(struct btrfs_key), - GFP_NOFS); - if (!newkey) - goto no_dentry; - tmp = d_alloc(filp->f_dentry, &q); - if (!tmp) { - kfree(newkey); - dput(tmp); - goto no_dentry; - } - memcpy(newkey, &location, - sizeof(struct btrfs_key)); - tmp->d_fsdata = newkey; - tmp->d_flags |= DCACHE_NEED_LOOKUP; - d_rehash(tmp); - dput(tmp); - } else { - dput(tmp); - } -no_dentry: /* is this a reference to our own snapshot? If so * skip it */ @@ -7472,5 +7430,4 @@ static const struct inode_operations btrfs_symlink_inode_operations = { const struct dentry_operations btrfs_dentry_operations = { .d_delete = btrfs_dentry_delete, - .d_release = btrfs_dentry_release, }; diff --git a/trunk/fs/dcache.c b/trunk/fs/dcache.c index 2347cdb15abb..b05aac3a8cfc 100644 --- a/trunk/fs/dcache.c +++ b/trunk/fs/dcache.c @@ -301,27 +301,6 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) return parent; } -/* - * Unhash a dentry without inserting an RCU walk barrier or checking that - * dentry->d_lock is locked. The caller must take care of that, if - * appropriate. - */ -static void __d_shrink(struct dentry *dentry) -{ - if (!d_unhashed(dentry)) { - struct hlist_bl_head *b; - if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) - b = &dentry->d_sb->s_anon; - else - b = d_hash(dentry->d_parent, dentry->d_name.hash); - - hlist_bl_lock(b); - __hlist_bl_del(&dentry->d_hash); - dentry->d_hash.pprev = NULL; - hlist_bl_unlock(b); - } -} - /** * d_drop - drop a dentry * @dentry: dentry to drop @@ -340,7 +319,17 @@ static void __d_shrink(struct dentry *dentry) void __d_drop(struct dentry *dentry) { if (!d_unhashed(dentry)) { - __d_shrink(dentry); + struct hlist_bl_head *b; + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) + b = &dentry->d_sb->s_anon; + else + b = d_hash(dentry->d_parent, dentry->d_name.hash); + + hlist_bl_lock(b); + __hlist_bl_del(&dentry->d_hash); + dentry->d_hash.pprev = NULL; + hlist_bl_unlock(b); + dentry_rcuwalk_barrier(dentry); } } @@ -839,24 +828,44 @@ EXPORT_SYMBOL(shrink_dcache_sb); static void shrink_dcache_for_umount_subtree(struct dentry *dentry) { struct dentry *parent; + unsigned detached = 0; BUG_ON(!IS_ROOT(dentry)); + /* detach this root from the system */ + spin_lock(&dentry->d_lock); + dentry_lru_del(dentry); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); + for (;;) { /* descend to the first leaf in the current subtree */ - while (!list_empty(&dentry->d_subdirs)) + while (!list_empty(&dentry->d_subdirs)) { + struct dentry *loop; + + /* this is a branch with children - detach all of them + * from the system in one go */ + spin_lock(&dentry->d_lock); + list_for_each_entry(loop, &dentry->d_subdirs, + d_u.d_child) { + spin_lock_nested(&loop->d_lock, + DENTRY_D_LOCK_NESTED); + dentry_lru_del(loop); + __d_drop(loop); + spin_unlock(&loop->d_lock); + } + spin_unlock(&dentry->d_lock); + + /* move to the first child */ dentry = list_entry(dentry->d_subdirs.next, struct dentry, d_u.d_child); + } /* consume the dentries from this leaf up through its parents * until we find one with children or run out altogether */ do { struct inode *inode; - /* detach from the system */ - dentry_lru_del(dentry); - __d_shrink(dentry); - if (dentry->d_count != 0) { printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%s}" @@ -877,10 +886,14 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) list_del(&dentry->d_u.d_child); } else { parent = dentry->d_parent; + spin_lock(&parent->d_lock); parent->d_count--; list_del(&dentry->d_u.d_child); + spin_unlock(&parent->d_lock); } + detached++; + inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; @@ -925,7 +938,9 @@ void shrink_dcache_for_umount(struct super_block *sb) dentry = sb->s_root; sb->s_root = NULL; + spin_lock(&dentry->d_lock); dentry->d_count--; + spin_unlock(&dentry->d_lock); shrink_dcache_for_umount_subtree(dentry); while (!hlist_bl_empty(&sb->s_anon)) { diff --git a/trunk/fs/ext2/acl.c b/trunk/fs/ext2/acl.c index 35d6a3cfd9ff..52c053763942 100644 --- a/trunk/fs/ext2/acl.c +++ b/trunk/fs/ext2/acl.c @@ -194,10 +194,12 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) case ACL_TYPE_ACCESS: name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); if (error < 0) return error; else { + inode->i_mode = mode; inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); if (error == 0) @@ -251,14 +253,16 @@ ext2_init_acl(struct inode *inode, struct inode *dir) inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { + mode_t mode = inode->i_mode; if (S_ISDIR(inode->i_mode)) { error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl); if (error) goto cleanup; } - error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); + error = posix_acl_create(&acl, GFP_KERNEL, &mode); if (error < 0) return error; + inode->i_mode = mode; if (error > 0) { /* This is an extended ACL */ error = ext2_set_acl(inode, ACL_TYPE_ACCESS, acl); diff --git a/trunk/fs/ext3/acl.c b/trunk/fs/ext3/acl.c index 3091f62e55b6..6c29bf0df04a 100644 --- a/trunk/fs/ext3/acl.c +++ b/trunk/fs/ext3/acl.c @@ -199,10 +199,12 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, case ACL_TYPE_ACCESS: name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); if (error < 0) return error; else { + inode->i_mode = mode; inode->i_ctime = CURRENT_TIME_SEC; ext3_mark_inode_dirty(handle, inode); if (error == 0) @@ -259,16 +261,19 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { + mode_t mode = inode->i_mode; + if (S_ISDIR(inode->i_mode)) { error = ext3_set_acl(handle, inode, ACL_TYPE_DEFAULT, acl); if (error) goto cleanup; } - error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); + error = posix_acl_create(&acl, GFP_NOFS, &mode); if (error < 0) return error; + inode->i_mode = mode; if (error > 0) { /* This is an extended ACL */ error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); diff --git a/trunk/fs/ext4/Makefile b/trunk/fs/ext4/Makefile index 56fd8f865930..04109460ba9e 100644 --- a/trunk/fs/ext4/Makefile +++ b/trunk/fs/ext4/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ - mmp.o indirect.o + mmp.o ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o diff --git a/trunk/fs/ext4/acl.c b/trunk/fs/ext4/acl.c index a5c29bb3b835..dca2d1ded931 100644 --- a/trunk/fs/ext4/acl.c +++ b/trunk/fs/ext4/acl.c @@ -198,10 +198,12 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, case ACL_TYPE_ACCESS: name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); if (error < 0) return error; else { + inode->i_mode = mode; inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); if (error == 0) @@ -257,16 +259,19 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { + mode_t mode = inode->i_mode; + if (S_ISDIR(inode->i_mode)) { error = ext4_set_acl(handle, inode, ACL_TYPE_DEFAULT, acl); if (error) goto cleanup; } - error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); + error = posix_acl_create(&acl, GFP_NOFS, &mode); if (error < 0) return error; + inode->i_mode = mode; if (error > 0) { /* This is an extended ACL */ error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); diff --git a/trunk/fs/ext4/balloc.c b/trunk/fs/ext4/balloc.c index f8224adf496e..264f6949511e 100644 --- a/trunk/fs/ext4/balloc.c +++ b/trunk/fs/ext4/balloc.c @@ -620,51 +620,3 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) } -/** - * ext4_inode_to_goal_block - return a hint for block allocation - * @inode: inode for block allocation - * - * Return the ideal location to start allocating blocks for a - * newly created inode. - */ -ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - ext4_group_t block_group; - ext4_grpblk_t colour; - int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); - ext4_fsblk_t bg_start; - ext4_fsblk_t last_block; - - block_group = ei->i_block_group; - if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { - /* - * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME - * block groups per flexgroup, reserve the first block - * group for directories and special files. Regular - * files will start at the second block group. This - * tends to speed up directory access and improves - * fsck times. - */ - block_group &= ~(flex_size-1); - if (S_ISREG(inode->i_mode)) - block_group++; - } - bg_start = ext4_group_first_block_no(inode->i_sb, block_group); - last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; - - /* - * If we are doing delayed allocation, we don't need take - * colour into account. - */ - if (test_opt(inode->i_sb, DELALLOC)) - return bg_start; - - if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) - colour = (current->pid % 16) * - (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); - else - colour = (current->pid % 16) * ((last_block - bg_start) / 16); - return bg_start + colour; -} - diff --git a/trunk/fs/ext4/block_validity.c b/trunk/fs/ext4/block_validity.c index 8efb2f0a3447..fac90f3fba80 100644 --- a/trunk/fs/ext4/block_validity.c +++ b/trunk/fs/ext4/block_validity.c @@ -246,24 +246,3 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, return 1; } -int ext4_check_blockref(const char *function, unsigned int line, - struct inode *inode, __le32 *p, unsigned int max) -{ - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; - __le32 *bref = p; - unsigned int blk; - - while (bref < p+max) { - blk = le32_to_cpu(*bref++); - if (blk && - unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), - blk, 1))) { - es->s_last_error_block = cpu_to_le64(blk); - ext4_error_inode(inode, function, line, blk, - "invalid block"); - return -EIO; - } - } - return 0; -} - diff --git a/trunk/fs/ext4/ext4.h b/trunk/fs/ext4/ext4.h index e717dfd2f2b4..fa44df879711 100644 --- a/trunk/fs/ext4/ext4.h +++ b/trunk/fs/ext4/ext4.h @@ -526,7 +526,6 @@ struct ext4_new_group_data { #define EXT4_FREE_BLOCKS_METADATA 0x0001 #define EXT4_FREE_BLOCKS_FORGET 0x0002 #define EXT4_FREE_BLOCKS_VALIDATED 0x0004 -#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 /* * ioctl commands @@ -940,8 +939,6 @@ struct ext4_inode_info { #define ext4_find_next_zero_bit find_next_zero_bit_le #define ext4_find_next_bit find_next_bit_le -extern void ext4_set_bits(void *bm, int cur, int len); - /* * Maximal mount counts between two filesystem checks */ @@ -1129,8 +1126,7 @@ struct ext4_sb_info { struct journal_s *s_journal; struct list_head s_orphan; struct mutex s_orphan_lock; - unsigned long s_resize_flags; /* Flags indicating if there - is a resizer */ + struct mutex s_resize_lock; unsigned long s_commit_interval; u32 s_max_batch_time; u32 s_min_batch_time; @@ -1218,9 +1214,6 @@ struct ext4_sb_info { /* Kernel thread for multiple mount protection */ struct task_struct *s_mmp_tsk; - - /* record the last minlen when FITRIM is called. */ - atomic_t s_last_trim_minblks; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -1750,7 +1743,6 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb, struct ext4_group_desc *desc); #define ext4_free_blocks_after_init(sb, group, desc) \ ext4_init_block_bitmap(sb, NULL, group, desc) -ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); /* dir.c */ extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, @@ -1801,7 +1793,7 @@ extern void ext4_free_blocks(handle_t *handle, struct inode *inode, unsigned long count, int flags); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); -extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, +extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count); extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); @@ -1842,17 +1834,6 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, int used, int quota_claim); - -/* indirect.c */ -extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, - struct ext4_map_blocks *map, int flags); -extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs); -extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); -extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk); -extern void ext4_ind_truncate(struct inode *inode); - /* ioctl.c */ extern long ext4_ioctl(struct file *, unsigned int, unsigned long); extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); @@ -1874,9 +1855,6 @@ extern int ext4_group_extend(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ -extern void *ext4_kvmalloc(size_t size, gfp_t flags); -extern void *ext4_kvzalloc(size_t size, gfp_t flags); -extern void ext4_kvfree(void *ptr); extern void __ext4_error(struct super_block *, const char *, unsigned int, const char *, ...) __attribute__ ((format (printf, 4, 5))); @@ -2089,19 +2067,11 @@ struct ext4_group_info { * 5 free 8-block regions. */ }; -#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 -#define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1 +#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 #define EXT4_MB_GRP_NEED_INIT(grp) \ (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) -#define EXT4_MB_GRP_WAS_TRIMMED(grp) \ - (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) -#define EXT4_MB_GRP_SET_TRIMMED(grp) \ - (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) -#define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \ - (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) - #define EXT4_MAX_CONTENTION 8 #define EXT4_CONTENTION_THRESHOLD 2 @@ -2152,19 +2122,6 @@ static inline void ext4_mark_super_dirty(struct super_block *sb) sb->s_dirt =1; } -/* - * Block validity checking - */ -#define ext4_check_indirect_blockref(inode, bh) \ - ext4_check_blockref(__func__, __LINE__, inode, \ - (__le32 *)(bh)->b_data, \ - EXT4_ADDR_PER_BLOCK((inode)->i_sb)) - -#define ext4_ind_check_inode(inode) \ - ext4_check_blockref(__func__, __LINE__, inode, \ - EXT4_I(inode)->i_data, \ - EXT4_NDIR_BLOCKS) - /* * Inodes and files operations */ @@ -2194,8 +2151,6 @@ extern void ext4_exit_system_zone(void); extern int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, unsigned int count); -extern int ext4_check_blockref(const char *, unsigned int, - struct inode *, __le32 *, unsigned int); /* extents.c */ extern int ext4_ext_tree_init(handle_t *handle, struct inode *); @@ -2275,10 +2230,6 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; -#define EXT4_RESIZING 0 -extern int ext4_resize_begin(struct super_block *sb); -extern void ext4_resize_end(struct super_block *sb); - #endif /* __KERNEL__ */ #endif /* _EXT4_H */ diff --git a/trunk/fs/ext4/extents.c b/trunk/fs/ext4/extents.c index 57cf568a98ab..f815cc81e7a2 100644 --- a/trunk/fs/ext4/extents.c +++ b/trunk/fs/ext4/extents.c @@ -114,6 +114,12 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t block) { + struct ext4_inode_info *ei = EXT4_I(inode); + ext4_fsblk_t bg_start; + ext4_fsblk_t last_block; + ext4_grpblk_t colour; + ext4_group_t block_group; + int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); int depth; if (path) { @@ -155,7 +161,36 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, } /* OK. use inode's group */ - return ext4_inode_to_goal_block(inode); + block_group = ei->i_block_group; + if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { + /* + * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME + * block groups per flexgroup, reserve the first block + * group for directories and special files. Regular + * files will start at the second block group. This + * tends to speed up directory access and improves + * fsck times. + */ + block_group &= ~(flex_size-1); + if (S_ISREG(inode->i_mode)) + block_group++; + } + bg_start = ext4_group_first_block_no(inode->i_sb, block_group); + last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; + + /* + * If we are doing delayed allocation, we don't need take + * colour into account. + */ + if (test_opt(inode->i_sb, DELALLOC)) + return bg_start; + + if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) + colour = (current->pid % 16) * + (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); + else + colour = (current->pid % 16) * ((last_block - bg_start) / 16); + return bg_start + colour + block; } /* @@ -741,16 +776,6 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, logical, le32_to_cpu(curp->p_idx->ei_block)); return -EIO; } - - if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) - >= le16_to_cpu(curp->p_hdr->eh_max))) { - EXT4_ERROR_INODE(inode, - "eh_entries %d >= eh_max %d!", - le16_to_cpu(curp->p_hdr->eh_entries), - le16_to_cpu(curp->p_hdr->eh_max)); - return -EIO; - } - len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; if (logical > le32_to_cpu(curp->p_idx->ei_block)) { /* insert after */ @@ -780,6 +805,13 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, ext4_idx_store_pblock(ix, ptr); le16_add_cpu(&curp->p_hdr->eh_entries, 1); + if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) + > le16_to_cpu(curp->p_hdr->eh_max))) { + EXT4_ERROR_INODE(inode, + "logical %d == ei_block %d!", + logical, le32_to_cpu(curp->p_idx->ei_block)); + return -EIO; + } if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); return -EIO; @@ -1414,7 +1446,8 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) * ext4_ext_next_leaf_block: * returns first allocated block from next leaf or EXT_MAX_BLOCKS */ -static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path) +static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, + struct ext4_ext_path *path) { int depth; @@ -1724,6 +1757,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, goto merge; } +repeat: depth = ext_depth(inode); eh = path[depth].p_hdr; if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) @@ -1731,10 +1765,9 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, /* probably next leaf has space for us? */ fex = EXT_LAST_EXTENT(eh); - next = EXT_MAX_BLOCKS; - if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) - next = ext4_ext_next_leaf_block(path); - if (next != EXT_MAX_BLOCKS) { + next = ext4_ext_next_leaf_block(inode, path); + if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) + && next != EXT_MAX_BLOCKS) { ext_debug("next leaf block - %d\n", next); BUG_ON(npath != NULL); npath = ext4_ext_find_extent(inode, next, NULL); @@ -1746,7 +1779,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, ext_debug("next leaf isn't full(%d)\n", le16_to_cpu(eh->eh_entries)); path = npath; - goto has_space; + goto repeat; } ext_debug("next leaf has no free space(%d,%d)\n", le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); @@ -1806,7 +1839,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, ext4_ext_pblock(newext), ext4_ext_is_uninitialized(newext), ext4_ext_get_actual_len(newext), - nearex, len, nearex, nearex + 1); + nearex, len, nearex + 1, nearex + 2); memmove(nearex + 1, nearex, len); path[depth].p_ext = nearex; } @@ -2019,7 +2052,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, } /* - * ext4_ext_check_cache() + * ext4_ext_in_cache() * Checks to see if the given block is in the cache. * If it is, the cached extent is stored in the given * cache extent pointer. If the cached extent is a hole, @@ -2101,6 +2134,8 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, /* * ext4_ext_rm_idx: * removes index from the index block. + * It's used in truncate case only, thus all requests are for + * last index in the block only. */ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) @@ -2118,13 +2153,6 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path); if (err) return err; - - if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) { - int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx; - len *= sizeof(struct ext4_extent_idx); - memmove(path->p_idx, path->p_idx + 1, len); - } - le16_add_cpu(&path->p_hdr->eh_entries, -1); err = ext4_ext_dirty(handle, inode, path); if (err) @@ -2506,7 +2534,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) return 1; } -static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) +static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, + ext4_lblk_t end) { struct super_block *sb = inode->i_sb; int depth = ext_depth(inode); @@ -2546,7 +2575,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) if (i == depth) { /* this is leaf block */ err = ext4_ext_rm_leaf(handle, inode, path, - start, EXT_MAX_BLOCKS - 1); + start, end); /* root level has p_bh == NULL, brelse() eats this */ brelse(path[i].p_bh); path[i].p_bh = NULL; @@ -3078,10 +3107,12 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, struct ext4_ext_path *path) { struct ext4_extent *ex; + struct ext4_extent_header *eh; int depth; int err = 0; depth = ext_depth(inode); + eh = path[depth].p_hdr; ex = path[depth].p_ext; ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" @@ -3326,8 +3357,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); /* check in cache */ - if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && - ext4_ext_in_cache(inode, map->m_lblk, &newex)) { + if (ext4_ext_in_cache(inode, map->m_lblk, &newex) && + ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0)) { if (!newex.ee_start_lo && !newex.ee_start_hi) { if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { /* @@ -3466,27 +3497,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ext4_ext_mark_uninitialized(ex); - ext4_ext_invalidate_cache(inode); - - err = ext4_ext_rm_leaf(handle, inode, path, - map->m_lblk, map->m_lblk + punched_out); - - if (!err && path->p_hdr->eh_entries == 0) { - /* - * Punch hole freed all of this sub tree, - * so we need to correct eh_depth - */ - err = ext4_ext_get_access(handle, inode, path); - if (err == 0) { - ext_inode_hdr(inode)->eh_depth = 0; - ext_inode_hdr(inode)->eh_max = - cpu_to_le16(ext4_ext_space_root( - inode, 0)); - - err = ext4_ext_dirty( - handle, inode, path); - } - } + err = ext4_ext_remove_space(inode, map->m_lblk, + map->m_lblk + punched_out); goto out2; } @@ -3584,18 +3596,17 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, } err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); - if (!err) - err = ext4_ext_insert_extent(handle, inode, path, - &newex, flags); + if (err) + goto out2; + + err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err) { - int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? - EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; /* free data blocks we just allocated */ /* not a good idea to call discard here directly, * but otherwise we'd need to call it every free() */ ext4_discard_preallocations(inode); ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), - ext4_ext_get_actual_len(&newex), fb_flags); + ext4_ext_get_actual_len(&newex), 0); goto out2; } @@ -3688,7 +3699,7 @@ void ext4_ext_truncate(struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); - err = ext4_ext_remove_space(inode, last_block); + err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); /* In a multi-transaction truncate, we only make the final * transaction synchronous. @@ -3824,7 +3835,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) blkbits) >> blkbits)) new_size = offset + len; else - new_size = ((loff_t) map.m_lblk + ret) << blkbits; + new_size = (map.m_lblk + ret) << blkbits; ext4_falloc_update_inode(inode, mode, new_size, (map.m_flags & EXT4_MAP_NEW)); diff --git a/trunk/fs/ext4/fsync.c b/trunk/fs/ext4/fsync.c index 036f78f7a1ef..da3bed3e0c29 100644 --- a/trunk/fs/ext4/fsync.c +++ b/trunk/fs/ext4/fsync.c @@ -129,30 +129,15 @@ static int ext4_sync_parent(struct inode *inode) { struct writeback_control wbc; struct dentry *dentry = NULL; - struct inode *next; int ret = 0; - if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) - return 0; - inode = igrab(inode); - while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { + while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); - dentry = NULL; - spin_lock(&inode->i_lock); - if (!list_empty(&inode->i_dentry)) { - dentry = list_first_entry(&inode->i_dentry, - struct dentry, d_alias); - dget(dentry); - } - spin_unlock(&inode->i_lock); - if (!dentry) + dentry = list_entry(inode->i_dentry.next, + struct dentry, d_alias); + if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) break; - next = igrab(dentry->d_parent->d_inode); - dput(dentry); - if (!next) - break; - iput(inode); - inode = next; + inode = dentry->d_parent->d_inode; ret = sync_mapping_buffers(inode->i_mapping); if (ret) break; @@ -163,7 +148,6 @@ static int ext4_sync_parent(struct inode *inode) if (ret) break; } - iput(inode); return ret; } diff --git a/trunk/fs/ext4/ialloc.c b/trunk/fs/ext4/ialloc.c index 9c63f273b550..21bb2f61e502 100644 --- a/trunk/fs/ext4/ialloc.c +++ b/trunk/fs/ext4/ialloc.c @@ -1287,7 +1287,7 @@ extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, group, used_blks, ext4_itable_unused_count(sb, gdp)); ret = 1; - goto err_out; + goto out; } blk = ext4_inode_table(sb, gdp) + used_blks; diff --git a/trunk/fs/ext4/indirect.c b/trunk/fs/ext4/indirect.c deleted file mode 100644 index b8602cde5b5a..000000000000 --- a/trunk/fs/ext4/indirect.c +++ /dev/null @@ -1,1482 +0,0 @@ -/* - * linux/fs/ext4/indirect.c - * - * from - * - * linux/fs/ext4/inode.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/fs/minix/inode.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Goal-directed block allocation by Stephen Tweedie - * (sct@redhat.com), 1993, 1998 - */ - -#include -#include "ext4_jbd2.h" -#include "truncate.h" - -#include - -typedef struct { - __le32 *p; - __le32 key; - struct buffer_head *bh; -} Indirect; - -static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) -{ - p->key = *(p->p = v); - p->bh = bh; -} - -/** - * ext4_block_to_path - parse the block number into array of offsets - * @inode: inode in question (we are only interested in its superblock) - * @i_block: block number to be parsed - * @offsets: array to store the offsets in - * @boundary: set this non-zero if the referred-to block is likely to be - * followed (on disk) by an indirect block. - * - * To store the locations of file's data ext4 uses a data structure common - * for UNIX filesystems - tree of pointers anchored in the inode, with - * data blocks at leaves and indirect blocks in intermediate nodes. - * This function translates the block number into path in that tree - - * return value is the path length and @offsets[n] is the offset of - * pointer to (n+1)th node in the nth one. If @block is out of range - * (negative or too large) warning is printed and zero returned. - * - * Note: function doesn't find node addresses, so no IO is needed. All - * we need to know is the capacity of indirect blocks (taken from the - * inode->i_sb). - */ - -/* - * Portability note: the last comparison (check that we fit into triple - * indirect block) is spelled differently, because otherwise on an - * architecture with 32-bit longs and 8Kb pages we might get into trouble - * if our filesystem had 8Kb blocks. We might use long long, but that would - * kill us on x86. Oh, well, at least the sign propagation does not matter - - * i_block would have to be negative in the very beginning, so we would not - * get there at all. - */ - -static int ext4_block_to_path(struct inode *inode, - ext4_lblk_t i_block, - ext4_lblk_t offsets[4], int *boundary) -{ - int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); - int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); - const long direct_blocks = EXT4_NDIR_BLOCKS, - indirect_blocks = ptrs, - double_blocks = (1 << (ptrs_bits * 2)); - int n = 0; - int final = 0; - - if (i_block < direct_blocks) { - offsets[n++] = i_block; - final = direct_blocks; - } else if ((i_block -= direct_blocks) < indirect_blocks) { - offsets[n++] = EXT4_IND_BLOCK; - offsets[n++] = i_block; - final = ptrs; - } else if ((i_block -= indirect_blocks) < double_blocks) { - offsets[n++] = EXT4_DIND_BLOCK; - offsets[n++] = i_block >> ptrs_bits; - offsets[n++] = i_block & (ptrs - 1); - final = ptrs; - } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { - offsets[n++] = EXT4_TIND_BLOCK; - offsets[n++] = i_block >> (ptrs_bits * 2); - offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); - offsets[n++] = i_block & (ptrs - 1); - final = ptrs; - } else { - ext4_warning(inode->i_sb, "block %lu > max in inode %lu", - i_block + direct_blocks + - indirect_blocks + double_blocks, inode->i_ino); - } - if (boundary) - *boundary = final - 1 - (i_block & (ptrs - 1)); - return n; -} - -/** - * ext4_get_branch - read the chain of indirect blocks leading to data - * @inode: inode in question - * @depth: depth of the chain (1 - direct pointer, etc.) - * @offsets: offsets of pointers in inode/indirect blocks - * @chain: place to store the result - * @err: here we store the error value - * - * Function fills the array of triples and returns %NULL - * if everything went OK or the pointer to the last filled triple - * (incomplete one) otherwise. Upon the return chain[i].key contains - * the number of (i+1)-th block in the chain (as it is stored in memory, - * i.e. little-endian 32-bit), chain[i].p contains the address of that - * number (it points into struct inode for i==0 and into the bh->b_data - * for i>0) and chain[i].bh points to the buffer_head of i-th indirect - * block for i>0 and NULL for i==0. In other words, it holds the block - * numbers of the chain, addresses they were taken from (and where we can - * verify that chain did not change) and buffer_heads hosting these - * numbers. - * - * Function stops when it stumbles upon zero pointer (absent block) - * (pointer to last triple returned, *@err == 0) - * or when it gets an IO error reading an indirect block - * (ditto, *@err == -EIO) - * or when it reads all @depth-1 indirect blocks successfully and finds - * the whole chain, all way to the data (returns %NULL, *err == 0). - * - * Need to be called with - * down_read(&EXT4_I(inode)->i_data_sem) - */ -static Indirect *ext4_get_branch(struct inode *inode, int depth, - ext4_lblk_t *offsets, - Indirect chain[4], int *err) -{ - struct super_block *sb = inode->i_sb; - Indirect *p = chain; - struct buffer_head *bh; - - *err = 0; - /* i_data is not going away, no lock needed */ - add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); - if (!p->key) - goto no_block; - while (--depth) { - bh = sb_getblk(sb, le32_to_cpu(p->key)); - if (unlikely(!bh)) - goto failure; - - if (!bh_uptodate_or_lock(bh)) { - if (bh_submit_read(bh) < 0) { - put_bh(bh); - goto failure; - } - /* validate block references */ - if (ext4_check_indirect_blockref(inode, bh)) { - put_bh(bh); - goto failure; - } - } - - add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); - /* Reader: end */ - if (!p->key) - goto no_block; - } - return NULL; - -failure: - *err = -EIO; -no_block: - return p; -} - -/** - * ext4_find_near - find a place for allocation with sufficient locality - * @inode: owner - * @ind: descriptor of indirect block. - * - * This function returns the preferred place for block allocation. - * It is used when heuristic for sequential allocation fails. - * Rules are: - * + if there is a block to the left of our position - allocate near it. - * + if pointer will live in indirect block - allocate near that block. - * + if pointer will live in inode - allocate in the same - * cylinder group. - * - * In the latter case we colour the starting block by the callers PID to - * prevent it from clashing with concurrent allocations for a different inode - * in the same block group. The PID is used here so that functionally related - * files will be close-by on-disk. - * - * Caller must make sure that @ind is valid and will stay that way. - */ -static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; - __le32 *p; - - /* Try to find previous block */ - for (p = ind->p - 1; p >= start; p--) { - if (*p) - return le32_to_cpu(*p); - } - - /* No such thing, so let's try location of indirect block */ - if (ind->bh) - return ind->bh->b_blocknr; - - /* - * It is going to be referred to from the inode itself? OK, just put it - * into the same cylinder group then. - */ - return ext4_inode_to_goal_block(inode); -} - -/** - * ext4_find_goal - find a preferred place for allocation. - * @inode: owner - * @block: block we want - * @partial: pointer to the last triple within a chain - * - * Normally this function find the preferred place for block allocation, - * returns it. - * Because this is only used for non-extent files, we limit the block nr - * to 32 bits. - */ -static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, - Indirect *partial) -{ - ext4_fsblk_t goal; - - /* - * XXX need to get goal block from mballoc's data structures - */ - - goal = ext4_find_near(inode, partial); - goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; - return goal; -} - -/** - * ext4_blks_to_allocate - Look up the block map and count the number - * of direct blocks need to be allocated for the given branch. - * - * @branch: chain of indirect blocks - * @k: number of blocks need for indirect blocks - * @blks: number of data blocks to be mapped. - * @blocks_to_boundary: the offset in the indirect block - * - * return the total number of blocks to be allocate, including the - * direct and indirect blocks. - */ -static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, - int blocks_to_boundary) -{ - unsigned int count = 0; - - /* - * Simple case, [t,d]Indirect block(s) has not allocated yet - * then it's clear blocks on that path have not allocated - */ - if (k > 0) { - /* right now we don't handle cross boundary allocation */ - if (blks < blocks_to_boundary + 1) - count += blks; - else - count += blocks_to_boundary + 1; - return count; - } - - count++; - while (count < blks && count <= blocks_to_boundary && - le32_to_cpu(*(branch[0].p + count)) == 0) { - count++; - } - return count; -} - -/** - * ext4_alloc_blocks: multiple allocate blocks needed for a branch - * @handle: handle for this transaction - * @inode: inode which needs allocated blocks - * @iblock: the logical block to start allocated at - * @goal: preferred physical block of allocation - * @indirect_blks: the number of blocks need to allocate for indirect - * blocks - * @blks: number of desired blocks - * @new_blocks: on return it will store the new block numbers for - * the indirect blocks(if needed) and the first direct block, - * @err: on return it will store the error code - * - * This function will return the number of blocks allocated as - * requested by the passed-in parameters. - */ -static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, ext4_fsblk_t goal, - int indirect_blks, int blks, - ext4_fsblk_t new_blocks[4], int *err) -{ - struct ext4_allocation_request ar; - int target, i; - unsigned long count = 0, blk_allocated = 0; - int index = 0; - ext4_fsblk_t current_block = 0; - int ret = 0; - - /* - * Here we try to allocate the requested multiple blocks at once, - * on a best-effort basis. - * To build a branch, we should allocate blocks for - * the indirect blocks(if not allocated yet), and at least - * the first direct block of this branch. That's the - * minimum number of blocks need to allocate(required) - */ - /* first we try to allocate the indirect blocks */ - target = indirect_blks; - while (target > 0) { - count = target; - /* allocating blocks for indirect blocks and direct blocks */ - current_block = ext4_new_meta_blocks(handle, inode, goal, - 0, &count, err); - if (*err) - goto failed_out; - - if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { - EXT4_ERROR_INODE(inode, - "current_block %llu + count %lu > %d!", - current_block, count, - EXT4_MAX_BLOCK_FILE_PHYS); - *err = -EIO; - goto failed_out; - } - - target -= count; - /* allocate blocks for indirect blocks */ - while (index < indirect_blks && count) { - new_blocks[index++] = current_block++; - count--; - } - if (count > 0) { - /* - * save the new block number - * for the first direct block - */ - new_blocks[index] = current_block; - printk(KERN_INFO "%s returned more blocks than " - "requested\n", __func__); - WARN_ON(1); - break; - } - } - - target = blks - count ; - blk_allocated = count; - if (!target) - goto allocated; - /* Now allocate data blocks */ - memset(&ar, 0, sizeof(ar)); - ar.inode = inode; - ar.goal = goal; - ar.len = target; - ar.logical = iblock; - if (S_ISREG(inode->i_mode)) - /* enable in-core preallocation only for regular files */ - ar.flags = EXT4_MB_HINT_DATA; - - current_block = ext4_mb_new_blocks(handle, &ar, err); - if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { - EXT4_ERROR_INODE(inode, - "current_block %llu + ar.len %d > %d!", - current_block, ar.len, - EXT4_MAX_BLOCK_FILE_PHYS); - *err = -EIO; - goto failed_out; - } - - if (*err && (target == blks)) { - /* - * if the allocation failed and we didn't allocate - * any blocks before - */ - goto failed_out; - } - if (!*err) { - if (target == blks) { - /* - * save the new block number - * for the first direct block - */ - new_blocks[index] = current_block; - } - blk_allocated += ar.len; - } -allocated: - /* total number of blocks allocated for direct blocks */ - ret = blk_allocated; - *err = 0; - return ret; -failed_out: - for (i = 0; i < index; i++) - ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); - return ret; -} - -/** - * ext4_alloc_branch - allocate and set up a chain of blocks. - * @handle: handle for this transaction - * @inode: owner - * @indirect_blks: number of allocated indirect blocks - * @blks: number of allocated direct blocks - * @goal: preferred place for allocation - * @offsets: offsets (in the blocks) to store the pointers to next. - * @branch: place to store the chain in. - * - * This function allocates blocks, zeroes out all but the last one, - * links them into chain and (if we are synchronous) writes them to disk. - * In other words, it prepares a branch that can be spliced onto the - * inode. It stores the information about that chain in the branch[], in - * the same format as ext4_get_branch() would do. We are calling it after - * we had read the existing part of chain and partial points to the last - * triple of that (one with zero ->key). Upon the exit we have the same - * picture as after the successful ext4_get_block(), except that in one - * place chain is disconnected - *branch->p is still zero (we did not - * set the last link), but branch->key contains the number that should - * be placed into *branch->p to fill that gap. - * - * If allocation fails we free all blocks we've allocated (and forget - * their buffer_heads) and return the error value the from failed - * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain - * as described above and return 0. - */ -static int ext4_alloc_branch(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, int indirect_blks, - int *blks, ext4_fsblk_t goal, - ext4_lblk_t *offsets, Indirect *branch) -{ - int blocksize = inode->i_sb->s_blocksize; - int i, n = 0; - int err = 0; - struct buffer_head *bh; - int num; - ext4_fsblk_t new_blocks[4]; - ext4_fsblk_t current_block; - - num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, - *blks, new_blocks, &err); - if (err) - return err; - - branch[0].key = cpu_to_le32(new_blocks[0]); - /* - * metadata blocks and data blocks are allocated. - */ - for (n = 1; n <= indirect_blks; n++) { - /* - * Get buffer_head for parent block, zero it out - * and set the pointer to new one, then send - * parent to disk. - */ - bh = sb_getblk(inode->i_sb, new_blocks[n-1]); - if (unlikely(!bh)) { - err = -EIO; - goto failed; - } - - branch[n].bh = bh; - lock_buffer(bh); - BUFFER_TRACE(bh, "call get_create_access"); - err = ext4_journal_get_create_access(handle, bh); - if (err) { - /* Don't brelse(bh) here; it's done in - * ext4_journal_forget() below */ - unlock_buffer(bh); - goto failed; - } - - memset(bh->b_data, 0, blocksize); - branch[n].p = (__le32 *) bh->b_data + offsets[n]; - branch[n].key = cpu_to_le32(new_blocks[n]); - *branch[n].p = branch[n].key; - if (n == indirect_blks) { - current_block = new_blocks[n]; - /* - * End of chain, update the last new metablock of - * the chain to point to the new allocated - * data blocks numbers - */ - for (i = 1; i < num; i++) - *(branch[n].p + i) = cpu_to_le32(++current_block); - } - BUFFER_TRACE(bh, "marking uptodate"); - set_buffer_uptodate(bh); - unlock_buffer(bh); - - BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, bh); - if (err) - goto failed; - } - *blks = num; - return err; -failed: - /* Allocation failed, free what we already allocated */ - ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); - for (i = 1; i <= n ; i++) { - /* - * branch[i].bh is newly allocated, so there is no - * need to revoke the block, which is why we don't - * need to set EXT4_FREE_BLOCKS_METADATA. - */ - ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, - EXT4_FREE_BLOCKS_FORGET); - } - for (i = n+1; i < indirect_blks; i++) - ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); - - ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); - - return err; -} - -/** - * ext4_splice_branch - splice the allocated branch onto inode. - * @handle: handle for this transaction - * @inode: owner - * @block: (logical) number of block we are adding - * @chain: chain of indirect blocks (with a missing link - see - * ext4_alloc_branch) - * @where: location of missing link - * @num: number of indirect blocks we are adding - * @blks: number of direct blocks we are adding - * - * This function fills the missing link and does all housekeeping needed in - * inode (->i_blocks, etc.). In case of success we end up with the full - * chain to new block and return 0. - */ -static int ext4_splice_branch(handle_t *handle, struct inode *inode, - ext4_lblk_t block, Indirect *where, int num, - int blks) -{ - int i; - int err = 0; - ext4_fsblk_t current_block; - - /* - * If we're splicing into a [td]indirect block (as opposed to the - * inode) then we need to get write access to the [td]indirect block - * before the splice. - */ - if (where->bh) { - BUFFER_TRACE(where->bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, where->bh); - if (err) - goto err_out; - } - /* That's it */ - - *where->p = where->key; - - /* - * Update the host buffer_head or inode to point to more just allocated - * direct blocks blocks - */ - if (num == 0 && blks > 1) { - current_block = le32_to_cpu(where->key) + 1; - for (i = 1; i < blks; i++) - *(where->p + i) = cpu_to_le32(current_block++); - } - - /* We are done with atomic stuff, now do the rest of housekeeping */ - /* had we spliced it onto indirect block? */ - if (where->bh) { - /* - * If we spliced it onto an indirect block, we haven't - * altered the inode. Note however that if it is being spliced - * onto an indirect block at the very end of the file (the - * file is growing) then we *will* alter the inode to reflect - * the new i_size. But that is not done here - it is done in - * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. - */ - jbd_debug(5, "splicing indirect only\n"); - BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, where->bh); - if (err) - goto err_out; - } else { - /* - * OK, we spliced it into the inode itself on a direct block. - */ - ext4_mark_inode_dirty(handle, inode); - jbd_debug(5, "splicing direct\n"); - } - return err; - -err_out: - for (i = 1; i <= num; i++) { - /* - * branch[i].bh is newly allocated, so there is no - * need to revoke the block, which is why we don't - * need to set EXT4_FREE_BLOCKS_METADATA. - */ - ext4_free_blocks(handle, inode, where[i].bh, 0, 1, - EXT4_FREE_BLOCKS_FORGET); - } - ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), - blks, 0); - - return err; -} - -/* - * The ext4_ind_map_blocks() function handles non-extents inodes - * (i.e., using the traditional indirect/double-indirect i_blocks - * scheme) for ext4_map_blocks(). - * - * Allocation strategy is simple: if we have to allocate something, we will - * have to go the whole way to leaf. So let's do it before attaching anything - * to tree, set linkage between the newborn blocks, write them if sync is - * required, recheck the path, free and repeat if check fails, otherwise - * set the last missing link (that will protect us from any truncate-generated - * removals - all blocks on the path are immune now) and possibly force the - * write on the parent block. - * That has a nice additional property: no special recovery from the failed - * allocations is needed - we simply release blocks and do not touch anything - * reachable from inode. - * - * `handle' can be NULL if create == 0. - * - * return > 0, # of blocks mapped or allocated. - * return = 0, if plain lookup failed. - * return < 0, error case. - * - * The ext4_ind_get_blocks() function should be called with - * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem - * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or - * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system - * blocks. - */ -int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, - struct ext4_map_blocks *map, - int flags) -{ - int err = -EIO; - ext4_lblk_t offsets[4]; - Indirect chain[4]; - Indirect *partial; - ext4_fsblk_t goal; - int indirect_blks; - int blocks_to_boundary = 0; - int depth; - int count = 0; - ext4_fsblk_t first_block = 0; - - trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); - J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); - J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); - depth = ext4_block_to_path(inode, map->m_lblk, offsets, - &blocks_to_boundary); - - if (depth == 0) - goto out; - - partial = ext4_get_branch(inode, depth, offsets, chain, &err); - - /* Simplest case - block found, no allocation needed */ - if (!partial) { - first_block = le32_to_cpu(chain[depth - 1].key); - count++; - /*map more blocks*/ - while (count < map->m_len && count <= blocks_to_boundary) { - ext4_fsblk_t blk; - - blk = le32_to_cpu(*(chain[depth-1].p + count)); - - if (blk == first_block + count) - count++; - else - break; - } - goto got_it; - } - - /* Next simple case - plain lookup or failed read of indirect block */ - if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) - goto cleanup; - - /* - * Okay, we need to do block allocation. - */ - goal = ext4_find_goal(inode, map->m_lblk, partial); - - /* the number of blocks need to allocate for [d,t]indirect blocks */ - indirect_blks = (chain + depth) - partial - 1; - - /* - * Next look up the indirect map to count the totoal number of - * direct blocks to allocate for this branch. - */ - count = ext4_blks_to_allocate(partial, indirect_blks, - map->m_len, blocks_to_boundary); - /* - * Block out ext4_truncate while we alter the tree - */ - err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, - &count, goal, - offsets + (partial - chain), partial); - - /* - * The ext4_splice_branch call will free and forget any buffers - * on the new chain if there is a failure, but that risks using - * up transaction credits, especially for bitmaps where the - * credits cannot be returned. Can we handle this somehow? We - * may need to return -EAGAIN upwards in the worst case. --sct - */ - if (!err) - err = ext4_splice_branch(handle, inode, map->m_lblk, - partial, indirect_blks, count); - if (err) - goto cleanup; - - map->m_flags |= EXT4_MAP_NEW; - - ext4_update_inode_fsync_trans(handle, inode, 1); -got_it: - map->m_flags |= EXT4_MAP_MAPPED; - map->m_pblk = le32_to_cpu(chain[depth-1].key); - map->m_len = count; - if (count > blocks_to_boundary) - map->m_flags |= EXT4_MAP_BOUNDARY; - err = count; - /* Clean up and exit */ - partial = chain + depth - 1; /* the whole chain */ -cleanup: - while (partial > chain) { - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); - partial--; - } -out: - trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, - map->m_pblk, map->m_len, err); - return err; -} - -/* - * O_DIRECT for ext3 (or indirect map) based files - * - * If the O_DIRECT write will extend the file then add this inode to the - * orphan list. So recovery will truncate it back to the original size - * if the machine crashes during the write. - * - * If the O_DIRECT write is intantiating holes inside i_size and the machine - * crashes then stale disk data _may_ be exposed inside the file. But current - * VFS code falls back into buffered path in that case so we are safe. - */ -ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct ext4_inode_info *ei = EXT4_I(inode); - handle_t *handle; - ssize_t ret; - int orphan = 0; - size_t count = iov_length(iov, nr_segs); - int retries = 0; - - if (rw == WRITE) { - loff_t final_size = offset + count; - - if (final_size > inode->i_size) { - /* Credits for sb + inode write */ - handle = ext4_journal_start(inode, 2); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - ret = ext4_orphan_add(handle, inode); - if (ret) { - ext4_journal_stop(handle); - goto out; - } - orphan = 1; - ei->i_disksize = inode->i_size; - ext4_journal_stop(handle); - } - } - -retry: - if (rw == READ && ext4_should_dioread_nolock(inode)) - ret = __blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, iov, - offset, nr_segs, - ext4_get_block, NULL, NULL, 0); - else { - ret = blockdev_direct_IO(rw, iocb, inode, iov, - offset, nr_segs, ext4_get_block); - - if (unlikely((rw & WRITE) && ret < 0)) { - loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); - - if (end > isize) - ext4_truncate_failed_write(inode); - } - } - if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) - goto retry; - - if (orphan) { - int err; - - /* Credits for sb + inode write */ - handle = ext4_journal_start(inode, 2); - if (IS_ERR(handle)) { - /* This is really bad luck. We've written the data - * but cannot extend i_size. Bail out and pretend - * the write failed... */ - ret = PTR_ERR(handle); - if (inode->i_nlink) - ext4_orphan_del(NULL, inode); - - goto out; - } - if (inode->i_nlink) - ext4_orphan_del(handle, inode); - if (ret > 0) { - loff_t end = offset + ret; - if (end > inode->i_size) { - ei->i_disksize = end; - i_size_write(inode, end); - /* - * We're going to return a positive `ret' - * here due to non-zero-length I/O, so there's - * no way of reporting error returns from - * ext4_mark_inode_dirty() to userspace. So - * ignore it. - */ - ext4_mark_inode_dirty(handle, inode); - } - } - err = ext4_journal_stop(handle); - if (ret == 0) - ret = err; - } -out: - return ret; -} - -/* - * Calculate the number of metadata blocks need to reserve - * to allocate a new block at @lblocks for non extent file based file - */ -int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); - int blk_bits; - - if (lblock < EXT4_NDIR_BLOCKS) - return 0; - - lblock -= EXT4_NDIR_BLOCKS; - - if (ei->i_da_metadata_calc_len && - (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { - ei->i_da_metadata_calc_len++; - return 0; - } - ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; - ei->i_da_metadata_calc_len = 1; - blk_bits = order_base_2(lblock); - return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; -} - -int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk) -{ - int indirects; - - /* if nrblocks are contiguous */ - if (chunk) { - /* - * With N contiguous data blocks, we need at most - * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, - * 2 dindirect blocks, and 1 tindirect block - */ - return DIV_ROUND_UP(nrblocks, - EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; - } - /* - * if nrblocks are not contiguous, worse case, each block touch - * a indirect block, and each indirect block touch a double indirect - * block, plus a triple indirect block - */ - indirects = nrblocks * 2 + 1; - return indirects; -} - -/* - * Truncate transactions can be complex and absolutely huge. So we need to - * be able to restart the transaction at a conventient checkpoint to make - * sure we don't overflow the journal. - * - * start_transaction gets us a new handle for a truncate transaction, - * and extend_transaction tries to extend the existing one a bit. If - * extend fails, we need to propagate the failure up and restart the - * transaction in the top-level truncate loop. --sct - */ -static handle_t *start_transaction(struct inode *inode) -{ - handle_t *result; - - result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)); - if (!IS_ERR(result)) - return result; - - ext4_std_error(inode->i_sb, PTR_ERR(result)); - return result; -} - -/* - * Try to extend this transaction for the purposes of truncation. - * - * Returns 0 if we managed to create more room. If we can't create more - * room, and the transaction must be restarted we return 1. - */ -static int try_to_extend_transaction(handle_t *handle, struct inode *inode) -{ - if (!ext4_handle_valid(handle)) - return 0; - if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) - return 0; - if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode))) - return 0; - return 1; -} - -/* - * Probably it should be a library function... search for first non-zero word - * or memcmp with zero_page, whatever is better for particular architecture. - * Linus? - */ -static inline int all_zeroes(__le32 *p, __le32 *q) -{ - while (p < q) - if (*p++) - return 0; - return 1; -} - -/** - * ext4_find_shared - find the indirect blocks for partial truncation. - * @inode: inode in question - * @depth: depth of the affected branch - * @offsets: offsets of pointers in that branch (see ext4_block_to_path) - * @chain: place to store the pointers to partial indirect blocks - * @top: place to the (detached) top of branch - * - * This is a helper function used by ext4_truncate(). - * - * When we do truncate() we may have to clean the ends of several - * indirect blocks but leave the blocks themselves alive. Block is - * partially truncated if some data below the new i_size is referred - * from it (and it is on the path to the first completely truncated - * data block, indeed). We have to free the top of that path along - * with everything to the right of the path. Since no allocation - * past the truncation point is possible until ext4_truncate() - * finishes, we may safely do the latter, but top of branch may - * require special attention - pageout below the truncation point - * might try to populate it. - * - * We atomically detach the top of branch from the tree, store the - * block number of its root in *@top, pointers to buffer_heads of - * partially truncated blocks - in @chain[].bh and pointers to - * their last elements that should not be removed - in - * @chain[].p. Return value is the pointer to last filled element - * of @chain. - * - * The work left to caller to do the actual freeing of subtrees: - * a) free the subtree starting from *@top - * b) free the subtrees whose roots are stored in - * (@chain[i].p+1 .. end of @chain[i].bh->b_data) - * c) free the subtrees growing from the inode past the @chain[0]. - * (no partially truncated stuff there). */ - -static Indirect *ext4_find_shared(struct inode *inode, int depth, - ext4_lblk_t offsets[4], Indirect chain[4], - __le32 *top) -{ - Indirect *partial, *p; - int k, err; - - *top = 0; - /* Make k index the deepest non-null offset + 1 */ - for (k = depth; k > 1 && !offsets[k-1]; k--) - ; - partial = ext4_get_branch(inode, k, offsets, chain, &err); - /* Writer: pointers */ - if (!partial) - partial = chain + k-1; - /* - * If the branch acquired continuation since we've looked at it - - * fine, it should all survive and (new) top doesn't belong to us. - */ - if (!partial->key && *partial->p) - /* Writer: end */ - goto no_top; - for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) - ; - /* - * OK, we've found the last block that must survive. The rest of our - * branch should be detached before unlocking. However, if that rest - * of branch is all ours and does not grow immediately from the inode - * it's easier to cheat and just decrement partial->p. - */ - if (p == chain + k - 1 && p > chain) { - p->p--; - } else { - *top = *p->p; - /* Nope, don't do this in ext4. Must leave the tree intact */ -#if 0 - *p->p = 0; -#endif - } - /* Writer: end */ - - while (partial > p) { - brelse(partial->bh); - partial--; - } -no_top: - return partial; -} - -/* - * Zero a number of block pointers in either an inode or an indirect block. - * If we restart the transaction we must again get write access to the - * indirect block for further modification. - * - * We release `count' blocks on disk, but (last - first) may be greater - * than `count' because there can be holes in there. - * - * Return 0 on success, 1 on invalid block range - * and < 0 on fatal error. - */ -static int ext4_clear_blocks(handle_t *handle, struct inode *inode, - struct buffer_head *bh, - ext4_fsblk_t block_to_free, - unsigned long count, __le32 *first, - __le32 *last) -{ - __le32 *p; - int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; - int err; - - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - flags |= EXT4_FREE_BLOCKS_METADATA; - - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, - count)) { - EXT4_ERROR_INODE(inode, "attempt to clear invalid " - "blocks %llu len %lu", - (unsigned long long) block_to_free, count); - return 1; - } - - if (try_to_extend_transaction(handle, inode)) { - if (bh) { - BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, bh); - if (unlikely(err)) - goto out_err; - } - err = ext4_mark_inode_dirty(handle, inode); - if (unlikely(err)) - goto out_err; - err = ext4_truncate_restart_trans(handle, inode, - ext4_blocks_for_truncate(inode)); - if (unlikely(err)) - goto out_err; - if (bh) { - BUFFER_TRACE(bh, "retaking write access"); - err = ext4_journal_get_write_access(handle, bh); - if (unlikely(err)) - goto out_err; - } - } - - for (p = first; p < last; p++) - *p = 0; - - ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); - return 0; -out_err: - ext4_std_error(inode->i_sb, err); - return err; -} - -/** - * ext4_free_data - free a list of data blocks - * @handle: handle for this transaction - * @inode: inode we are dealing with - * @this_bh: indirect buffer_head which contains *@first and *@last - * @first: array of block numbers - * @last: points immediately past the end of array - * - * We are freeing all blocks referred from that array (numbers are stored as - * little-endian 32-bit) and updating @inode->i_blocks appropriately. - * - * We accumulate contiguous runs of blocks to free. Conveniently, if these - * blocks are contiguous then releasing them at one time will only affect one - * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't - * actually use a lot of journal space. - * - * @this_bh will be %NULL if @first and @last point into the inode's direct - * block pointers. - */ -static void ext4_free_data(handle_t *handle, struct inode *inode, - struct buffer_head *this_bh, - __le32 *first, __le32 *last) -{ - ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ - unsigned long count = 0; /* Number of blocks in the run */ - __le32 *block_to_free_p = NULL; /* Pointer into inode/ind - corresponding to - block_to_free */ - ext4_fsblk_t nr; /* Current block # */ - __le32 *p; /* Pointer into inode/ind - for current block */ - int err = 0; - - if (this_bh) { /* For indirect block */ - BUFFER_TRACE(this_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, this_bh); - /* Important: if we can't update the indirect pointers - * to the blocks, we can't free them. */ - if (err) - return; - } - - for (p = first; p < last; p++) { - nr = le32_to_cpu(*p); - if (nr) { - /* accumulate blocks to free if they're contiguous */ - if (count == 0) { - block_to_free = nr; - block_to_free_p = p; - count = 1; - } else if (nr == block_to_free + count) { - count++; - } else { - err = ext4_clear_blocks(handle, inode, this_bh, - block_to_free, count, - block_to_free_p, p); - if (err) - break; - block_to_free = nr; - block_to_free_p = p; - count = 1; - } - } - } - - if (!err && count > 0) - err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, - count, block_to_free_p, p); - if (err < 0) - /* fatal error */ - return; - - if (this_bh) { - BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); - - /* - * The buffer head should have an attached journal head at this - * point. However, if the data is corrupted and an indirect - * block pointed to itself, it would have been detached when - * the block was cleared. Check for this instead of OOPSing. - */ - if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) - ext4_handle_dirty_metadata(handle, inode, this_bh); - else - EXT4_ERROR_INODE(inode, - "circular indirect block detected at " - "block %llu", - (unsigned long long) this_bh->b_blocknr); - } -} - -/** - * ext4_free_branches - free an array of branches - * @handle: JBD handle for this transaction - * @inode: inode we are dealing with - * @parent_bh: the buffer_head which contains *@first and *@last - * @first: array of block numbers - * @last: pointer immediately past the end of array - * @depth: depth of the branches to free - * - * We are freeing all blocks referred from these branches (numbers are - * stored as little-endian 32-bit) and updating @inode->i_blocks - * appropriately. - */ -static void ext4_free_branches(handle_t *handle, struct inode *inode, - struct buffer_head *parent_bh, - __le32 *first, __le32 *last, int depth) -{ - ext4_fsblk_t nr; - __le32 *p; - - if (ext4_handle_is_aborted(handle)) - return; - - if (depth--) { - struct buffer_head *bh; - int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); - p = last; - while (--p >= first) { - nr = le32_to_cpu(*p); - if (!nr) - continue; /* A hole */ - - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), - nr, 1)) { - EXT4_ERROR_INODE(inode, - "invalid indirect mapped " - "block %lu (level %d)", - (unsigned long) nr, depth); - break; - } - - /* Go read the buffer for the next level down */ - bh = sb_bread(inode->i_sb, nr); - - /* - * A read failure? Report error and clear slot - * (should be rare). - */ - if (!bh) { - EXT4_ERROR_INODE_BLOCK(inode, nr, - "Read failure"); - continue; - } - - /* This zaps the entire block. Bottom up. */ - BUFFER_TRACE(bh, "free child branches"); - ext4_free_branches(handle, inode, bh, - (__le32 *) bh->b_data, - (__le32 *) bh->b_data + addr_per_block, - depth); - brelse(bh); - - /* - * Everything below this this pointer has been - * released. Now let this top-of-subtree go. - * - * We want the freeing of this indirect block to be - * atomic in the journal with the updating of the - * bitmap block which owns it. So make some room in - * the journal. - * - * We zero the parent pointer *after* freeing its - * pointee in the bitmaps, so if extend_transaction() - * for some reason fails to put the bitmap changes and - * the release into the same transaction, recovery - * will merely complain about releasing a free block, - * rather than leaking blocks. - */ - if (ext4_handle_is_aborted(handle)) - return; - if (try_to_extend_transaction(handle, inode)) { - ext4_mark_inode_dirty(handle, inode); - ext4_truncate_restart_trans(handle, inode, - ext4_blocks_for_truncate(inode)); - } - - /* - * The forget flag here is critical because if - * we are journaling (and not doing data - * journaling), we have to make sure a revoke - * record is written to prevent the journal - * replay from overwriting the (former) - * indirect block if it gets reallocated as a - * data block. This must happen in the same - * transaction where the data blocks are - * actually freed. - */ - ext4_free_blocks(handle, inode, NULL, nr, 1, - EXT4_FREE_BLOCKS_METADATA| - EXT4_FREE_BLOCKS_FORGET); - - if (parent_bh) { - /* - * The block which we have just freed is - * pointed to by an indirect block: journal it - */ - BUFFER_TRACE(parent_bh, "get_write_access"); - if (!ext4_journal_get_write_access(handle, - parent_bh)){ - *p = 0; - BUFFER_TRACE(parent_bh, - "call ext4_handle_dirty_metadata"); - ext4_handle_dirty_metadata(handle, - inode, - parent_bh); - } - } - } - } else { - /* We have reached the bottom of the tree. */ - BUFFER_TRACE(parent_bh, "free data blocks"); - ext4_free_data(handle, inode, parent_bh, first, last); - } -} - -void ext4_ind_truncate(struct inode *inode) -{ - handle_t *handle; - struct ext4_inode_info *ei = EXT4_I(inode); - __le32 *i_data = ei->i_data; - int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); - struct address_space *mapping = inode->i_mapping; - ext4_lblk_t offsets[4]; - Indirect chain[4]; - Indirect *partial; - __le32 nr = 0; - int n = 0; - ext4_lblk_t last_block, max_block; - unsigned blocksize = inode->i_sb->s_blocksize; - - handle = start_transaction(inode); - if (IS_ERR(handle)) - return; /* AKPM: return what? */ - - last_block = (inode->i_size + blocksize-1) - >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); - max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) - >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); - - if (inode->i_size & (blocksize - 1)) - if (ext4_block_truncate_page(handle, mapping, inode->i_size)) - goto out_stop; - - if (last_block != max_block) { - n = ext4_block_to_path(inode, last_block, offsets, NULL); - if (n == 0) - goto out_stop; /* error */ - } - - /* - * OK. This truncate is going to happen. We add the inode to the - * orphan list, so that if this truncate spans multiple transactions, - * and we crash, we will resume the truncate when the filesystem - * recovers. It also marks the inode dirty, to catch the new size. - * - * Implication: the file must always be in a sane, consistent - * truncatable state while each transaction commits. - */ - if (ext4_orphan_add(handle, inode)) - goto out_stop; - - /* - * From here we block out all ext4_get_block() callers who want to - * modify the block allocation tree. - */ - down_write(&ei->i_data_sem); - - ext4_discard_preallocations(inode); - - /* - * The orphan list entry will now protect us from any crash which - * occurs before the truncate completes, so it is now safe to propagate - * the new, shorter inode size (held for now in i_size) into the - * on-disk inode. We do this via i_disksize, which is the value which - * ext4 *really* writes onto the disk inode. - */ - ei->i_disksize = inode->i_size; - - if (last_block == max_block) { - /* - * It is unnecessary to free any data blocks if last_block is - * equal to the indirect block limit. - */ - goto out_unlock; - } else if (n == 1) { /* direct blocks */ - ext4_free_data(handle, inode, NULL, i_data+offsets[0], - i_data + EXT4_NDIR_BLOCKS); - goto do_indirects; - } - - partial = ext4_find_shared(inode, n, offsets, chain, &nr); - /* Kill the top of shared branch (not detached) */ - if (nr) { - if (partial == chain) { - /* Shared branch grows from the inode */ - ext4_free_branches(handle, inode, NULL, - &nr, &nr+1, (chain+n-1) - partial); - *partial->p = 0; - /* - * We mark the inode dirty prior to restart, - * and prior to stop. No need for it here. - */ - } else { - /* Shared branch grows from an indirect block */ - BUFFER_TRACE(partial->bh, "get_write_access"); - ext4_free_branches(handle, inode, partial->bh, - partial->p, - partial->p+1, (chain+n-1) - partial); - } - } - /* Clear the ends of indirect blocks on the shared branch */ - while (partial > chain) { - ext4_free_branches(handle, inode, partial->bh, partial->p + 1, - (__le32*)partial->bh->b_data+addr_per_block, - (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); - partial--; - } -do_indirects: - /* Kill the remaining (whole) subtrees */ - switch (offsets[0]) { - default: - nr = i_data[EXT4_IND_BLOCK]; - if (nr) { - ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); - i_data[EXT4_IND_BLOCK] = 0; - } - case EXT4_IND_BLOCK: - nr = i_data[EXT4_DIND_BLOCK]; - if (nr) { - ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); - i_data[EXT4_DIND_BLOCK] = 0; - } - case EXT4_DIND_BLOCK: - nr = i_data[EXT4_TIND_BLOCK]; - if (nr) { - ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); - i_data[EXT4_TIND_BLOCK] = 0; - } - case EXT4_TIND_BLOCK: - ; - } - -out_unlock: - up_write(&ei->i_data_sem); - inode->i_mtime = inode->i_ctime = ext4_current_time(inode); - ext4_mark_inode_dirty(handle, inode); - - /* - * In a multi-transaction truncate, we only make the final transaction - * synchronous - */ - if (IS_SYNC(inode)) - ext4_handle_sync(handle); -out_stop: - /* - * If this was a simple ftruncate(), and the file will remain alive - * then we need to clear up the orphan record which we created above. - * However, if this was a real unlink then we were called by - * ext4_delete_inode(), and we allow that function to clean up the - * orphan info for us. - */ - if (inode->i_nlink) - ext4_orphan_del(handle, inode); - - ext4_journal_stop(handle); - trace_ext4_truncate_exit(inode); -} - diff --git a/trunk/fs/ext4/inode.c b/trunk/fs/ext4/inode.c index d47264cafee0..3e5191f9f398 100644 --- a/trunk/fs/ext4/inode.c +++ b/trunk/fs/ext4/inode.c @@ -12,6 +12,10 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * + * Goal-directed block allocation by Stephen Tweedie + * (sct@redhat.com), 1993, 1998 + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 * 64-bit file support on 64-bit platforms by Jakub Jelinek * (jj@sunsite.ms.mff.cuni.cz) * @@ -43,7 +47,6 @@ #include "xattr.h" #include "acl.h" #include "ext4_extents.h" -#include "truncate.h" #include @@ -85,6 +88,72 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); } +/* + * Work out how many blocks we need to proceed with the next chunk of a + * truncate transaction. + */ +static unsigned long blocks_for_truncate(struct inode *inode) +{ + ext4_lblk_t needed; + + needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); + + /* Give ourselves just enough room to cope with inodes in which + * i_blocks is corrupt: we've seen disk corruptions in the past + * which resulted in random data in an inode which looked enough + * like a regular file for ext4 to try to delete it. Things + * will go a bit crazy if that happens, but at least we should + * try not to panic the whole kernel. */ + if (needed < 2) + needed = 2; + + /* But we need to bound the transaction so we don't overflow the + * journal. */ + if (needed > EXT4_MAX_TRANS_DATA) + needed = EXT4_MAX_TRANS_DATA; + + return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed; +} + +/* + * Truncate transactions can be complex and absolutely huge. So we need to + * be able to restart the transaction at a conventient checkpoint to make + * sure we don't overflow the journal. + * + * start_transaction gets us a new handle for a truncate transaction, + * and extend_transaction tries to extend the existing one a bit. If + * extend fails, we need to propagate the failure up and restart the + * transaction in the top-level truncate loop. --sct + */ +static handle_t *start_transaction(struct inode *inode) +{ + handle_t *result; + + result = ext4_journal_start(inode, blocks_for_truncate(inode)); + if (!IS_ERR(result)) + return result; + + ext4_std_error(inode->i_sb, PTR_ERR(result)); + return result; +} + +/* + * Try to extend this transaction for the purposes of truncation. + * + * Returns 0 if we managed to create more room. If we can't create more + * room, and the transaction must be restarted we return 1. + */ +static int try_to_extend_transaction(handle_t *handle, struct inode *inode) +{ + if (!ext4_handle_valid(handle)) + return 0; + if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) + return 0; + if (!ext4_journal_extend(handle, blocks_for_truncate(inode))) + return 0; + return 1; +} + /* * Restart the transaction associated with *handle. This does a commit, * so before we call here everything must be consistently dirtied against @@ -121,33 +190,6 @@ void ext4_evict_inode(struct inode *inode) trace_ext4_evict_inode(inode); if (inode->i_nlink) { - /* - * When journalling data dirty buffers are tracked only in the - * journal. So although mm thinks everything is clean and - * ready for reaping the inode might still have some pages to - * write in the running transaction or waiting to be - * checkpointed. Thus calling jbd2_journal_invalidatepage() - * (via truncate_inode_pages()) to discard these buffers can - * cause data loss. Also even if we did not discard these - * buffers, we would have no way to find them after the inode - * is reaped and thus user could see stale data if he tries to - * read them before the transaction is checkpointed. So be - * careful and force everything to disk here... We use - * ei->i_datasync_tid to store the newest transaction - * containing inode's data. - * - * Note that directories do not have this problem because they - * don't use page cache. - */ - if (ext4_should_journal_data(inode) && - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { - journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; - tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; - - jbd2_log_start_commit(journal, commit_tid); - jbd2_log_wait_commit(journal, commit_tid); - filemap_write_and_wait(&inode->i_data); - } truncate_inode_pages(&inode->i_data, 0); goto no_delete; } @@ -162,7 +204,7 @@ void ext4_evict_inode(struct inode *inode) if (is_bad_inode(inode)) goto no_delete; - handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3); + handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3); if (IS_ERR(handle)) { ext4_std_error(inode->i_sb, PTR_ERR(handle)); /* @@ -235,6 +277,793 @@ void ext4_evict_inode(struct inode *inode) ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ } +typedef struct { + __le32 *p; + __le32 key; + struct buffer_head *bh; +} Indirect; + +static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) +{ + p->key = *(p->p = v); + p->bh = bh; +} + +/** + * ext4_block_to_path - parse the block number into array of offsets + * @inode: inode in question (we are only interested in its superblock) + * @i_block: block number to be parsed + * @offsets: array to store the offsets in + * @boundary: set this non-zero if the referred-to block is likely to be + * followed (on disk) by an indirect block. + * + * To store the locations of file's data ext4 uses a data structure common + * for UNIX filesystems - tree of pointers anchored in the inode, with + * data blocks at leaves and indirect blocks in intermediate nodes. + * This function translates the block number into path in that tree - + * return value is the path length and @offsets[n] is the offset of + * pointer to (n+1)th node in the nth one. If @block is out of range + * (negative or too large) warning is printed and zero returned. + * + * Note: function doesn't find node addresses, so no IO is needed. All + * we need to know is the capacity of indirect blocks (taken from the + * inode->i_sb). + */ + +/* + * Portability note: the last comparison (check that we fit into triple + * indirect block) is spelled differently, because otherwise on an + * architecture with 32-bit longs and 8Kb pages we might get into trouble + * if our filesystem had 8Kb blocks. We might use long long, but that would + * kill us on x86. Oh, well, at least the sign propagation does not matter - + * i_block would have to be negative in the very beginning, so we would not + * get there at all. + */ + +static int ext4_block_to_path(struct inode *inode, + ext4_lblk_t i_block, + ext4_lblk_t offsets[4], int *boundary) +{ + int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); + int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); + const long direct_blocks = EXT4_NDIR_BLOCKS, + indirect_blocks = ptrs, + double_blocks = (1 << (ptrs_bits * 2)); + int n = 0; + int final = 0; + + if (i_block < direct_blocks) { + offsets[n++] = i_block; + final = direct_blocks; + } else if ((i_block -= direct_blocks) < indirect_blocks) { + offsets[n++] = EXT4_IND_BLOCK; + offsets[n++] = i_block; + final = ptrs; + } else if ((i_block -= indirect_blocks) < double_blocks) { + offsets[n++] = EXT4_DIND_BLOCK; + offsets[n++] = i_block >> ptrs_bits; + offsets[n++] = i_block & (ptrs - 1); + final = ptrs; + } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { + offsets[n++] = EXT4_TIND_BLOCK; + offsets[n++] = i_block >> (ptrs_bits * 2); + offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); + offsets[n++] = i_block & (ptrs - 1); + final = ptrs; + } else { + ext4_warning(inode->i_sb, "block %lu > max in inode %lu", + i_block + direct_blocks + + indirect_blocks + double_blocks, inode->i_ino); + } + if (boundary) + *boundary = final - 1 - (i_block & (ptrs - 1)); + return n; +} + +static int __ext4_check_blockref(const char *function, unsigned int line, + struct inode *inode, + __le32 *p, unsigned int max) +{ + struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + __le32 *bref = p; + unsigned int blk; + + while (bref < p+max) { + blk = le32_to_cpu(*bref++); + if (blk && + unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), + blk, 1))) { + es->s_last_error_block = cpu_to_le64(blk); + ext4_error_inode(inode, function, line, blk, + "invalid block"); + return -EIO; + } + } + return 0; +} + + +#define ext4_check_indirect_blockref(inode, bh) \ + __ext4_check_blockref(__func__, __LINE__, inode, \ + (__le32 *)(bh)->b_data, \ + EXT4_ADDR_PER_BLOCK((inode)->i_sb)) + +#define ext4_check_inode_blockref(inode) \ + __ext4_check_blockref(__func__, __LINE__, inode, \ + EXT4_I(inode)->i_data, \ + EXT4_NDIR_BLOCKS) + +/** + * ext4_get_branch - read the chain of indirect blocks leading to data + * @inode: inode in question + * @depth: depth of the chain (1 - direct pointer, etc.) + * @offsets: offsets of pointers in inode/indirect blocks + * @chain: place to store the result + * @err: here we store the error value + * + * Function fills the array of triples and returns %NULL + * if everything went OK or the pointer to the last filled triple + * (incomplete one) otherwise. Upon the return chain[i].key contains + * the number of (i+1)-th block in the chain (as it is stored in memory, + * i.e. little-endian 32-bit), chain[i].p contains the address of that + * number (it points into struct inode for i==0 and into the bh->b_data + * for i>0) and chain[i].bh points to the buffer_head of i-th indirect + * block for i>0 and NULL for i==0. In other words, it holds the block + * numbers of the chain, addresses they were taken from (and where we can + * verify that chain did not change) and buffer_heads hosting these + * numbers. + * + * Function stops when it stumbles upon zero pointer (absent block) + * (pointer to last triple returned, *@err == 0) + * or when it gets an IO error reading an indirect block + * (ditto, *@err == -EIO) + * or when it reads all @depth-1 indirect blocks successfully and finds + * the whole chain, all way to the data (returns %NULL, *err == 0). + * + * Need to be called with + * down_read(&EXT4_I(inode)->i_data_sem) + */ +static Indirect *ext4_get_branch(struct inode *inode, int depth, + ext4_lblk_t *offsets, + Indirect chain[4], int *err) +{ + struct super_block *sb = inode->i_sb; + Indirect *p = chain; + struct buffer_head *bh; + + *err = 0; + /* i_data is not going away, no lock needed */ + add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); + if (!p->key) + goto no_block; + while (--depth) { + bh = sb_getblk(sb, le32_to_cpu(p->key)); + if (unlikely(!bh)) + goto failure; + + if (!bh_uptodate_or_lock(bh)) { + if (bh_submit_read(bh) < 0) { + put_bh(bh); + goto failure; + } + /* validate block references */ + if (ext4_check_indirect_blockref(inode, bh)) { + put_bh(bh); + goto failure; + } + } + + add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); + /* Reader: end */ + if (!p->key) + goto no_block; + } + return NULL; + +failure: + *err = -EIO; +no_block: + return p; +} + +/** + * ext4_find_near - find a place for allocation with sufficient locality + * @inode: owner + * @ind: descriptor of indirect block. + * + * This function returns the preferred place for block allocation. + * It is used when heuristic for sequential allocation fails. + * Rules are: + * + if there is a block to the left of our position - allocate near it. + * + if pointer will live in indirect block - allocate near that block. + * + if pointer will live in inode - allocate in the same + * cylinder group. + * + * In the latter case we colour the starting block by the callers PID to + * prevent it from clashing with concurrent allocations for a different inode + * in the same block group. The PID is used here so that functionally related + * files will be close-by on-disk. + * + * Caller must make sure that @ind is valid and will stay that way. + */ +static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; + __le32 *p; + ext4_fsblk_t bg_start; + ext4_fsblk_t last_block; + ext4_grpblk_t colour; + ext4_group_t block_group; + int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); + + /* Try to find previous block */ + for (p = ind->p - 1; p >= start; p--) { + if (*p) + return le32_to_cpu(*p); + } + + /* No such thing, so let's try location of indirect block */ + if (ind->bh) + return ind->bh->b_blocknr; + + /* + * It is going to be referred to from the inode itself? OK, just put it + * into the same cylinder group then. + */ + block_group = ei->i_block_group; + if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { + block_group &= ~(flex_size-1); + if (S_ISREG(inode->i_mode)) + block_group++; + } + bg_start = ext4_group_first_block_no(inode->i_sb, block_group); + last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; + + /* + * If we are doing delayed allocation, we don't need take + * colour into account. + */ + if (test_opt(inode->i_sb, DELALLOC)) + return bg_start; + + if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) + colour = (current->pid % 16) * + (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); + else + colour = (current->pid % 16) * ((last_block - bg_start) / 16); + return bg_start + colour; +} + +/** + * ext4_find_goal - find a preferred place for allocation. + * @inode: owner + * @block: block we want + * @partial: pointer to the last triple within a chain + * + * Normally this function find the preferred place for block allocation, + * returns it. + * Because this is only used for non-extent files, we limit the block nr + * to 32 bits. + */ +static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, + Indirect *partial) +{ + ext4_fsblk_t goal; + + /* + * XXX need to get goal block from mballoc's data structures + */ + + goal = ext4_find_near(inode, partial); + goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; + return goal; +} + +/** + * ext4_blks_to_allocate - Look up the block map and count the number + * of direct blocks need to be allocated for the given branch. + * + * @branch: chain of indirect blocks + * @k: number of blocks need for indirect blocks + * @blks: number of data blocks to be mapped. + * @blocks_to_boundary: the offset in the indirect block + * + * return the total number of blocks to be allocate, including the + * direct and indirect blocks. + */ +static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, + int blocks_to_boundary) +{ + unsigned int count = 0; + + /* + * Simple case, [t,d]Indirect block(s) has not allocated yet + * then it's clear blocks on that path have not allocated + */ + if (k > 0) { + /* right now we don't handle cross boundary allocation */ + if (blks < blocks_to_boundary + 1) + count += blks; + else + count += blocks_to_boundary + 1; + return count; + } + + count++; + while (count < blks && count <= blocks_to_boundary && + le32_to_cpu(*(branch[0].p + count)) == 0) { + count++; + } + return count; +} + +/** + * ext4_alloc_blocks: multiple allocate blocks needed for a branch + * @handle: handle for this transaction + * @inode: inode which needs allocated blocks + * @iblock: the logical block to start allocated at + * @goal: preferred physical block of allocation + * @indirect_blks: the number of blocks need to allocate for indirect + * blocks + * @blks: number of desired blocks + * @new_blocks: on return it will store the new block numbers for + * the indirect blocks(if needed) and the first direct block, + * @err: on return it will store the error code + * + * This function will return the number of blocks allocated as + * requested by the passed-in parameters. + */ +static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, + ext4_lblk_t iblock, ext4_fsblk_t goal, + int indirect_blks, int blks, + ext4_fsblk_t new_blocks[4], int *err) +{ + struct ext4_allocation_request ar; + int target, i; + unsigned long count = 0, blk_allocated = 0; + int index = 0; + ext4_fsblk_t current_block = 0; + int ret = 0; + + /* + * Here we try to allocate the requested multiple blocks at once, + * on a best-effort basis. + * To build a branch, we should allocate blocks for + * the indirect blocks(if not allocated yet), and at least + * the first direct block of this branch. That's the + * minimum number of blocks need to allocate(required) + */ + /* first we try to allocate the indirect blocks */ + target = indirect_blks; + while (target > 0) { + count = target; + /* allocating blocks for indirect blocks and direct blocks */ + current_block = ext4_new_meta_blocks(handle, inode, goal, + 0, &count, err); + if (*err) + goto failed_out; + + if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { + EXT4_ERROR_INODE(inode, + "current_block %llu + count %lu > %d!", + current_block, count, + EXT4_MAX_BLOCK_FILE_PHYS); + *err = -EIO; + goto failed_out; + } + + target -= count; + /* allocate blocks for indirect blocks */ + while (index < indirect_blks && count) { + new_blocks[index++] = current_block++; + count--; + } + if (count > 0) { + /* + * save the new block number + * for the first direct block + */ + new_blocks[index] = current_block; + printk(KERN_INFO "%s returned more blocks than " + "requested\n", __func__); + WARN_ON(1); + break; + } + } + + target = blks - count ; + blk_allocated = count; + if (!target) + goto allocated; + /* Now allocate data blocks */ + memset(&ar, 0, sizeof(ar)); + ar.inode = inode; + ar.goal = goal; + ar.len = target; + ar.logical = iblock; + if (S_ISREG(inode->i_mode)) + /* enable in-core preallocation only for regular files */ + ar.flags = EXT4_MB_HINT_DATA; + + current_block = ext4_mb_new_blocks(handle, &ar, err); + if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { + EXT4_ERROR_INODE(inode, + "current_block %llu + ar.len %d > %d!", + current_block, ar.len, + EXT4_MAX_BLOCK_FILE_PHYS); + *err = -EIO; + goto failed_out; + } + + if (*err && (target == blks)) { + /* + * if the allocation failed and we didn't allocate + * any blocks before + */ + goto failed_out; + } + if (!*err) { + if (target == blks) { + /* + * save the new block number + * for the first direct block + */ + new_blocks[index] = current_block; + } + blk_allocated += ar.len; + } +allocated: + /* total number of blocks allocated for direct blocks */ + ret = blk_allocated; + *err = 0; + return ret; +failed_out: + for (i = 0; i < index; i++) + ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); + return ret; +} + +/** + * ext4_alloc_branch - allocate and set up a chain of blocks. + * @handle: handle for this transaction + * @inode: owner + * @indirect_blks: number of allocated indirect blocks + * @blks: number of allocated direct blocks + * @goal: preferred place for allocation + * @offsets: offsets (in the blocks) to store the pointers to next. + * @branch: place to store the chain in. + * + * This function allocates blocks, zeroes out all but the last one, + * links them into chain and (if we are synchronous) writes them to disk. + * In other words, it prepares a branch that can be spliced onto the + * inode. It stores the information about that chain in the branch[], in + * the same format as ext4_get_branch() would do. We are calling it after + * we had read the existing part of chain and partial points to the last + * triple of that (one with zero ->key). Upon the exit we have the same + * picture as after the successful ext4_get_block(), except that in one + * place chain is disconnected - *branch->p is still zero (we did not + * set the last link), but branch->key contains the number that should + * be placed into *branch->p to fill that gap. + * + * If allocation fails we free all blocks we've allocated (and forget + * their buffer_heads) and return the error value the from failed + * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain + * as described above and return 0. + */ +static int ext4_alloc_branch(handle_t *handle, struct inode *inode, + ext4_lblk_t iblock, int indirect_blks, + int *blks, ext4_fsblk_t goal, + ext4_lblk_t *offsets, Indirect *branch) +{ + int blocksize = inode->i_sb->s_blocksize; + int i, n = 0; + int err = 0; + struct buffer_head *bh; + int num; + ext4_fsblk_t new_blocks[4]; + ext4_fsblk_t current_block; + + num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, + *blks, new_blocks, &err); + if (err) + return err; + + branch[0].key = cpu_to_le32(new_blocks[0]); + /* + * metadata blocks and data blocks are allocated. + */ + for (n = 1; n <= indirect_blks; n++) { + /* + * Get buffer_head for parent block, zero it out + * and set the pointer to new one, then send + * parent to disk. + */ + bh = sb_getblk(inode->i_sb, new_blocks[n-1]); + if (unlikely(!bh)) { + err = -EIO; + goto failed; + } + + branch[n].bh = bh; + lock_buffer(bh); + BUFFER_TRACE(bh, "call get_create_access"); + err = ext4_journal_get_create_access(handle, bh); + if (err) { + /* Don't brelse(bh) here; it's done in + * ext4_journal_forget() below */ + unlock_buffer(bh); + goto failed; + } + + memset(bh->b_data, 0, blocksize); + branch[n].p = (__le32 *) bh->b_data + offsets[n]; + branch[n].key = cpu_to_le32(new_blocks[n]); + *branch[n].p = branch[n].key; + if (n == indirect_blks) { + current_block = new_blocks[n]; + /* + * End of chain, update the last new metablock of + * the chain to point to the new allocated + * data blocks numbers + */ + for (i = 1; i < num; i++) + *(branch[n].p + i) = cpu_to_le32(++current_block); + } + BUFFER_TRACE(bh, "marking uptodate"); + set_buffer_uptodate(bh); + unlock_buffer(bh); + + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, inode, bh); + if (err) + goto failed; + } + *blks = num; + return err; +failed: + /* Allocation failed, free what we already allocated */ + ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); + for (i = 1; i <= n ; i++) { + /* + * branch[i].bh is newly allocated, so there is no + * need to revoke the block, which is why we don't + * need to set EXT4_FREE_BLOCKS_METADATA. + */ + ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, + EXT4_FREE_BLOCKS_FORGET); + } + for (i = n+1; i < indirect_blks; i++) + ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); + + ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); + + return err; +} + +/** + * ext4_splice_branch - splice the allocated branch onto inode. + * @handle: handle for this transaction + * @inode: owner + * @block: (logical) number of block we are adding + * @chain: chain of indirect blocks (with a missing link - see + * ext4_alloc_branch) + * @where: location of missing link + * @num: number of indirect blocks we are adding + * @blks: number of direct blocks we are adding + * + * This function fills the missing link and does all housekeeping needed in + * inode (->i_blocks, etc.). In case of success we end up with the full + * chain to new block and return 0. + */ +static int ext4_splice_branch(handle_t *handle, struct inode *inode, + ext4_lblk_t block, Indirect *where, int num, + int blks) +{ + int i; + int err = 0; + ext4_fsblk_t current_block; + + /* + * If we're splicing into a [td]indirect block (as opposed to the + * inode) then we need to get write access to the [td]indirect block + * before the splice. + */ + if (where->bh) { + BUFFER_TRACE(where->bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, where->bh); + if (err) + goto err_out; + } + /* That's it */ + + *where->p = where->key; + + /* + * Update the host buffer_head or inode to point to more just allocated + * direct blocks blocks + */ + if (num == 0 && blks > 1) { + current_block = le32_to_cpu(where->key) + 1; + for (i = 1; i < blks; i++) + *(where->p + i) = cpu_to_le32(current_block++); + } + + /* We are done with atomic stuff, now do the rest of housekeeping */ + /* had we spliced it onto indirect block? */ + if (where->bh) { + /* + * If we spliced it onto an indirect block, we haven't + * altered the inode. Note however that if it is being spliced + * onto an indirect block at the very end of the file (the + * file is growing) then we *will* alter the inode to reflect + * the new i_size. But that is not done here - it is done in + * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. + */ + jbd_debug(5, "splicing indirect only\n"); + BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, inode, where->bh); + if (err) + goto err_out; + } else { + /* + * OK, we spliced it into the inode itself on a direct block. + */ + ext4_mark_inode_dirty(handle, inode); + jbd_debug(5, "splicing direct\n"); + } + return err; + +err_out: + for (i = 1; i <= num; i++) { + /* + * branch[i].bh is newly allocated, so there is no + * need to revoke the block, which is why we don't + * need to set EXT4_FREE_BLOCKS_METADATA. + */ + ext4_free_blocks(handle, inode, where[i].bh, 0, 1, + EXT4_FREE_BLOCKS_FORGET); + } + ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), + blks, 0); + + return err; +} + +/* + * The ext4_ind_map_blocks() function handles non-extents inodes + * (i.e., using the traditional indirect/double-indirect i_blocks + * scheme) for ext4_map_blocks(). + * + * Allocation strategy is simple: if we have to allocate something, we will + * have to go the whole way to leaf. So let's do it before attaching anything + * to tree, set linkage between the newborn blocks, write them if sync is + * required, recheck the path, free and repeat if check fails, otherwise + * set the last missing link (that will protect us from any truncate-generated + * removals - all blocks on the path are immune now) and possibly force the + * write on the parent block. + * That has a nice additional property: no special recovery from the failed + * allocations is needed - we simply release blocks and do not touch anything + * reachable from inode. + * + * `handle' can be NULL if create == 0. + * + * return > 0, # of blocks mapped or allocated. + * return = 0, if plain lookup failed. + * return < 0, error case. + * + * The ext4_ind_get_blocks() function should be called with + * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem + * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or + * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system + * blocks. + */ +static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, + int flags) +{ + int err = -EIO; + ext4_lblk_t offsets[4]; + Indirect chain[4]; + Indirect *partial; + ext4_fsblk_t goal; + int indirect_blks; + int blocks_to_boundary = 0; + int depth; + int count = 0; + ext4_fsblk_t first_block = 0; + + trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); + J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); + J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); + depth = ext4_block_to_path(inode, map->m_lblk, offsets, + &blocks_to_boundary); + + if (depth == 0) + goto out; + + partial = ext4_get_branch(inode, depth, offsets, chain, &err); + + /* Simplest case - block found, no allocation needed */ + if (!partial) { + first_block = le32_to_cpu(chain[depth - 1].key); + count++; + /*map more blocks*/ + while (count < map->m_len && count <= blocks_to_boundary) { + ext4_fsblk_t blk; + + blk = le32_to_cpu(*(chain[depth-1].p + count)); + + if (blk == first_block + count) + count++; + else + break; + } + goto got_it; + } + + /* Next simple case - plain lookup or failed read of indirect block */ + if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) + goto cleanup; + + /* + * Okay, we need to do block allocation. + */ + goal = ext4_find_goal(inode, map->m_lblk, partial); + + /* the number of blocks need to allocate for [d,t]indirect blocks */ + indirect_blks = (chain + depth) - partial - 1; + + /* + * Next look up the indirect map to count the totoal number of + * direct blocks to allocate for this branch. + */ + count = ext4_blks_to_allocate(partial, indirect_blks, + map->m_len, blocks_to_boundary); + /* + * Block out ext4_truncate while we alter the tree + */ + err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, + &count, goal, + offsets + (partial - chain), partial); + + /* + * The ext4_splice_branch call will free and forget any buffers + * on the new chain if there is a failure, but that risks using + * up transaction credits, especially for bitmaps where the + * credits cannot be returned. Can we handle this somehow? We + * may need to return -EAGAIN upwards in the worst case. --sct + */ + if (!err) + err = ext4_splice_branch(handle, inode, map->m_lblk, + partial, indirect_blks, count); + if (err) + goto cleanup; + + map->m_flags |= EXT4_MAP_NEW; + + ext4_update_inode_fsync_trans(handle, inode, 1); +got_it: + map->m_flags |= EXT4_MAP_MAPPED; + map->m_pblk = le32_to_cpu(chain[depth-1].key); + map->m_len = count; + if (count > blocks_to_boundary) + map->m_flags |= EXT4_MAP_BOUNDARY; + err = count; + /* Clean up and exit */ + partial = chain + depth - 1; /* the whole chain */ +cleanup: + while (partial > chain) { + BUFFER_TRACE(partial->bh, "call brelse"); + brelse(partial->bh); + partial--; + } +out: + trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, + map->m_pblk, map->m_len, err); + return err; +} + #ifdef CONFIG_QUOTA qsize_t *ext4_get_reserved_space(struct inode *inode) { @@ -242,6 +1071,33 @@ qsize_t *ext4_get_reserved_space(struct inode *inode) } #endif +/* + * Calculate the number of metadata blocks need to reserve + * to allocate a new block at @lblocks for non extent file based file + */ +static int ext4_indirect_calc_metadata_amount(struct inode *inode, + sector_t lblock) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); + int blk_bits; + + if (lblock < EXT4_NDIR_BLOCKS) + return 0; + + lblock -= EXT4_NDIR_BLOCKS; + + if (ei->i_da_metadata_calc_len && + (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { + ei->i_da_metadata_calc_len++; + return 0; + } + ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; + ei->i_da_metadata_calc_len = 1; + blk_bits = order_base_2(lblock); + return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; +} + /* * Calculate the number of metadata blocks need to reserve * to allocate a block located at @lblock @@ -251,7 +1107,7 @@ static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return ext4_ext_calc_metadata_amount(inode, lblock); - return ext4_ind_calc_metadata_amount(inode, lblock); + return ext4_indirect_calc_metadata_amount(inode, lblock); } /* @@ -733,6 +1589,16 @@ static int do_journal_get_write_access(handle_t *handle, return ret; } +/* + * Truncate blocks that were not used by write. We have to truncate the + * pagecache as well so that corresponding buffers get properly unmapped. + */ +static void ext4_truncate_failed_write(struct inode *inode) +{ + truncate_inode_pages(inode->i_mapping, inode->i_size); + ext4_truncate(inode); +} + static int ext4_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); static int ext4_write_begin(struct file *file, struct address_space *mapping, @@ -997,7 +1863,6 @@ static int ext4_journalled_write_end(struct file *file, if (new_i_size > inode->i_size) i_size_write(inode, pos+copied); ext4_set_inode_state(inode, EXT4_STATE_JDATA); - EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; if (new_i_size > EXT4_I(inode)->i_disksize) { ext4_update_i_disksize(inode, new_i_size); ret2 = ext4_mark_inode_dirty(handle, inode); @@ -1706,7 +2571,6 @@ static int __ext4_journalled_writepage(struct page *page, write_end_fn); if (ret == 0) ret = err; - EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; err = ext4_journal_stop(handle); if (!ret) ret = err; @@ -2585,6 +3449,112 @@ static int ext4_releasepage(struct page *page, gfp_t wait) return try_to_free_buffers(page); } +/* + * O_DIRECT for ext3 (or indirect map) based files + * + * If the O_DIRECT write will extend the file then add this inode to the + * orphan list. So recovery will truncate it back to the original size + * if the machine crashes during the write. + * + * If the O_DIRECT write is intantiating holes inside i_size and the machine + * crashes then stale disk data _may_ be exposed inside the file. But current + * VFS code falls back into buffered path in that case so we are safe. + */ +static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct ext4_inode_info *ei = EXT4_I(inode); + handle_t *handle; + ssize_t ret; + int orphan = 0; + size_t count = iov_length(iov, nr_segs); + int retries = 0; + + if (rw == WRITE) { + loff_t final_size = offset + count; + + if (final_size > inode->i_size) { + /* Credits for sb + inode write */ + handle = ext4_journal_start(inode, 2); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } + ret = ext4_orphan_add(handle, inode); + if (ret) { + ext4_journal_stop(handle); + goto out; + } + orphan = 1; + ei->i_disksize = inode->i_size; + ext4_journal_stop(handle); + } + } + +retry: + if (rw == READ && ext4_should_dioread_nolock(inode)) + ret = __blockdev_direct_IO(rw, iocb, inode, + inode->i_sb->s_bdev, iov, + offset, nr_segs, + ext4_get_block, NULL, NULL, 0); + else { + ret = blockdev_direct_IO(rw, iocb, inode, iov, + offset, nr_segs, ext4_get_block); + + if (unlikely((rw & WRITE) && ret < 0)) { + loff_t isize = i_size_read(inode); + loff_t end = offset + iov_length(iov, nr_segs); + + if (end > isize) + ext4_truncate_failed_write(inode); + } + } + if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; + + if (orphan) { + int err; + + /* Credits for sb + inode write */ + handle = ext4_journal_start(inode, 2); + if (IS_ERR(handle)) { + /* This is really bad luck. We've written the data + * but cannot extend i_size. Bail out and pretend + * the write failed... */ + ret = PTR_ERR(handle); + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + + goto out; + } + if (inode->i_nlink) + ext4_orphan_del(handle, inode); + if (ret > 0) { + loff_t end = offset + ret; + if (end > inode->i_size) { + ei->i_disksize = end; + i_size_write(inode, end); + /* + * We're going to return a positive `ret' + * here due to non-zero-length I/O, so there's + * no way of reporting error returns from + * ext4_mark_inode_dirty() to userspace. So + * ignore it. + */ + ext4_mark_inode_dirty(handle, inode); + } + } + err = ext4_journal_stop(handle); + if (ret == 0) + ret = err; + } +out: + return ret; +} + /* * ext4_get_block used when preparing for a DIO write or buffer write. * We allocate an uinitialized extent if blocks haven't been allocated. @@ -3063,6 +4033,383 @@ int ext4_block_zero_page_range(handle_t *handle, return err; } +/* + * Probably it should be a library function... search for first non-zero word + * or memcmp with zero_page, whatever is better for particular architecture. + * Linus? + */ +static inline int all_zeroes(__le32 *p, __le32 *q) +{ + while (p < q) + if (*p++) + return 0; + return 1; +} + +/** + * ext4_find_shared - find the indirect blocks for partial truncation. + * @inode: inode in question + * @depth: depth of the affected branch + * @offsets: offsets of pointers in that branch (see ext4_block_to_path) + * @chain: place to store the pointers to partial indirect blocks + * @top: place to the (detached) top of branch + * + * This is a helper function used by ext4_truncate(). + * + * When we do truncate() we may have to clean the ends of several + * indirect blocks but leave the blocks themselves alive. Block is + * partially truncated if some data below the new i_size is referred + * from it (and it is on the path to the first completely truncated + * data block, indeed). We have to free the top of that path along + * with everything to the right of the path. Since no allocation + * past the truncation point is possible until ext4_truncate() + * finishes, we may safely do the latter, but top of branch may + * require special attention - pageout below the truncation point + * might try to populate it. + * + * We atomically detach the top of branch from the tree, store the + * block number of its root in *@top, pointers to buffer_heads of + * partially truncated blocks - in @chain[].bh and pointers to + * their last elements that should not be removed - in + * @chain[].p. Return value is the pointer to last filled element + * of @chain. + * + * The work left to caller to do the actual freeing of subtrees: + * a) free the subtree starting from *@top + * b) free the subtrees whose roots are stored in + * (@chain[i].p+1 .. end of @chain[i].bh->b_data) + * c) free the subtrees growing from the inode past the @chain[0]. + * (no partially truncated stuff there). */ + +static Indirect *ext4_find_shared(struct inode *inode, int depth, + ext4_lblk_t offsets[4], Indirect chain[4], + __le32 *top) +{ + Indirect *partial, *p; + int k, err; + + *top = 0; + /* Make k index the deepest non-null offset + 1 */ + for (k = depth; k > 1 && !offsets[k-1]; k--) + ; + partial = ext4_get_branch(inode, k, offsets, chain, &err); + /* Writer: pointers */ + if (!partial) + partial = chain + k-1; + /* + * If the branch acquired continuation since we've looked at it - + * fine, it should all survive and (new) top doesn't belong to us. + */ + if (!partial->key && *partial->p) + /* Writer: end */ + goto no_top; + for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) + ; + /* + * OK, we've found the last block that must survive. The rest of our + * branch should be detached before unlocking. However, if that rest + * of branch is all ours and does not grow immediately from the inode + * it's easier to cheat and just decrement partial->p. + */ + if (p == chain + k - 1 && p > chain) { + p->p--; + } else { + *top = *p->p; + /* Nope, don't do this in ext4. Must leave the tree intact */ +#if 0 + *p->p = 0; +#endif + } + /* Writer: end */ + + while (partial > p) { + brelse(partial->bh); + partial--; + } +no_top: + return partial; +} + +/* + * Zero a number of block pointers in either an inode or an indirect block. + * If we restart the transaction we must again get write access to the + * indirect block for further modification. + * + * We release `count' blocks on disk, but (last - first) may be greater + * than `count' because there can be holes in there. + * + * Return 0 on success, 1 on invalid block range + * and < 0 on fatal error. + */ +static int ext4_clear_blocks(handle_t *handle, struct inode *inode, + struct buffer_head *bh, + ext4_fsblk_t block_to_free, + unsigned long count, __le32 *first, + __le32 *last) +{ + __le32 *p; + int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; + int err; + + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + flags |= EXT4_FREE_BLOCKS_METADATA; + + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, + count)) { + EXT4_ERROR_INODE(inode, "attempt to clear invalid " + "blocks %llu len %lu", + (unsigned long long) block_to_free, count); + return 1; + } + + if (try_to_extend_transaction(handle, inode)) { + if (bh) { + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, inode, bh); + if (unlikely(err)) + goto out_err; + } + err = ext4_mark_inode_dirty(handle, inode); + if (unlikely(err)) + goto out_err; + err = ext4_truncate_restart_trans(handle, inode, + blocks_for_truncate(inode)); + if (unlikely(err)) + goto out_err; + if (bh) { + BUFFER_TRACE(bh, "retaking write access"); + err = ext4_journal_get_write_access(handle, bh); + if (unlikely(err)) + goto out_err; + } + } + + for (p = first; p < last; p++) + *p = 0; + + ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); + return 0; +out_err: + ext4_std_error(inode->i_sb, err); + return err; +} + +/** + * ext4_free_data - free a list of data blocks + * @handle: handle for this transaction + * @inode: inode we are dealing with + * @this_bh: indirect buffer_head which contains *@first and *@last + * @first: array of block numbers + * @last: points immediately past the end of array + * + * We are freeing all blocks referred from that array (numbers are stored as + * little-endian 32-bit) and updating @inode->i_blocks appropriately. + * + * We accumulate contiguous runs of blocks to free. Conveniently, if these + * blocks are contiguous then releasing them at one time will only affect one + * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't + * actually use a lot of journal space. + * + * @this_bh will be %NULL if @first and @last point into the inode's direct + * block pointers. + */ +static void ext4_free_data(handle_t *handle, struct inode *inode, + struct buffer_head *this_bh, + __le32 *first, __le32 *last) +{ + ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ + unsigned long count = 0; /* Number of blocks in the run */ + __le32 *block_to_free_p = NULL; /* Pointer into inode/ind + corresponding to + block_to_free */ + ext4_fsblk_t nr; /* Current block # */ + __le32 *p; /* Pointer into inode/ind + for current block */ + int err = 0; + + if (this_bh) { /* For indirect block */ + BUFFER_TRACE(this_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, this_bh); + /* Important: if we can't update the indirect pointers + * to the blocks, we can't free them. */ + if (err) + return; + } + + for (p = first; p < last; p++) { + nr = le32_to_cpu(*p); + if (nr) { + /* accumulate blocks to free if they're contiguous */ + if (count == 0) { + block_to_free = nr; + block_to_free_p = p; + count = 1; + } else if (nr == block_to_free + count) { + count++; + } else { + err = ext4_clear_blocks(handle, inode, this_bh, + block_to_free, count, + block_to_free_p, p); + if (err) + break; + block_to_free = nr; + block_to_free_p = p; + count = 1; + } + } + } + + if (!err && count > 0) + err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, + count, block_to_free_p, p); + if (err < 0) + /* fatal error */ + return; + + if (this_bh) { + BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); + + /* + * The buffer head should have an attached journal head at this + * point. However, if the data is corrupted and an indirect + * block pointed to itself, it would have been detached when + * the block was cleared. Check for this instead of OOPSing. + */ + if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) + ext4_handle_dirty_metadata(handle, inode, this_bh); + else + EXT4_ERROR_INODE(inode, + "circular indirect block detected at " + "block %llu", + (unsigned long long) this_bh->b_blocknr); + } +} + +/** + * ext4_free_branches - free an array of branches + * @handle: JBD handle for this transaction + * @inode: inode we are dealing with + * @parent_bh: the buffer_head which contains *@first and *@last + * @first: array of block numbers + * @last: pointer immediately past the end of array + * @depth: depth of the branches to free + * + * We are freeing all blocks referred from these branches (numbers are + * stored as little-endian 32-bit) and updating @inode->i_blocks + * appropriately. + */ +static void ext4_free_branches(handle_t *handle, struct inode *inode, + struct buffer_head *parent_bh, + __le32 *first, __le32 *last, int depth) +{ + ext4_fsblk_t nr; + __le32 *p; + + if (ext4_handle_is_aborted(handle)) + return; + + if (depth--) { + struct buffer_head *bh; + int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); + p = last; + while (--p >= first) { + nr = le32_to_cpu(*p); + if (!nr) + continue; /* A hole */ + + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), + nr, 1)) { + EXT4_ERROR_INODE(inode, + "invalid indirect mapped " + "block %lu (level %d)", + (unsigned long) nr, depth); + break; + } + + /* Go read the buffer for the next level down */ + bh = sb_bread(inode->i_sb, nr); + + /* + * A read failure? Report error and clear slot + * (should be rare). + */ + if (!bh) { + EXT4_ERROR_INODE_BLOCK(inode, nr, + "Read failure"); + continue; + } + + /* This zaps the entire block. Bottom up. */ + BUFFER_TRACE(bh, "free child branches"); + ext4_free_branches(handle, inode, bh, + (__le32 *) bh->b_data, + (__le32 *) bh->b_data + addr_per_block, + depth); + brelse(bh); + + /* + * Everything below this this pointer has been + * released. Now let this top-of-subtree go. + * + * We want the freeing of this indirect block to be + * atomic in the journal with the updating of the + * bitmap block which owns it. So make some room in + * the journal. + * + * We zero the parent pointer *after* freeing its + * pointee in the bitmaps, so if extend_transaction() + * for some reason fails to put the bitmap changes and + * the release into the same transaction, recovery + * will merely complain about releasing a free block, + * rather than leaking blocks. + */ + if (ext4_handle_is_aborted(handle)) + return; + if (try_to_extend_transaction(handle, inode)) { + ext4_mark_inode_dirty(handle, inode); + ext4_truncate_restart_trans(handle, inode, + blocks_for_truncate(inode)); + } + + /* + * The forget flag here is critical because if + * we are journaling (and not doing data + * journaling), we have to make sure a revoke + * record is written to prevent the journal + * replay from overwriting the (former) + * indirect block if it gets reallocated as a + * data block. This must happen in the same + * transaction where the data blocks are + * actually freed. + */ + ext4_free_blocks(handle, inode, NULL, nr, 1, + EXT4_FREE_BLOCKS_METADATA| + EXT4_FREE_BLOCKS_FORGET); + + if (parent_bh) { + /* + * The block which we have just freed is + * pointed to by an indirect block: journal it + */ + BUFFER_TRACE(parent_bh, "get_write_access"); + if (!ext4_journal_get_write_access(handle, + parent_bh)){ + *p = 0; + BUFFER_TRACE(parent_bh, + "call ext4_handle_dirty_metadata"); + ext4_handle_dirty_metadata(handle, + inode, + parent_bh); + } + } + } + } else { + /* We have reached the bottom of the tree. */ + BUFFER_TRACE(parent_bh, "free data blocks"); + ext4_free_data(handle, inode, parent_bh, first, last); + } +} + int ext4_can_truncate(struct inode *inode) { if (S_ISREG(inode->i_mode)) @@ -3129,6 +4476,19 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) */ void ext4_truncate(struct inode *inode) { + handle_t *handle; + struct ext4_inode_info *ei = EXT4_I(inode); + __le32 *i_data = ei->i_data; + int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + ext4_lblk_t offsets[4]; + Indirect chain[4]; + Indirect *partial; + __le32 nr = 0; + int n = 0; + ext4_lblk_t last_block, max_block; + unsigned blocksize = inode->i_sb->s_blocksize; + trace_ext4_truncate_enter(inode); if (!ext4_can_truncate(inode)) @@ -3139,11 +4499,149 @@ void ext4_truncate(struct inode *inode) if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { ext4_ext_truncate(inode); - else - ext4_ind_truncate(inode); + trace_ext4_truncate_exit(inode); + return; + } + + handle = start_transaction(inode); + if (IS_ERR(handle)) + return; /* AKPM: return what? */ + + last_block = (inode->i_size + blocksize-1) + >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); + max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) + >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); + + if (inode->i_size & (blocksize - 1)) + if (ext4_block_truncate_page(handle, mapping, inode->i_size)) + goto out_stop; + + if (last_block != max_block) { + n = ext4_block_to_path(inode, last_block, offsets, NULL); + if (n == 0) + goto out_stop; /* error */ + } + + /* + * OK. This truncate is going to happen. We add the inode to the + * orphan list, so that if this truncate spans multiple transactions, + * and we crash, we will resume the truncate when the filesystem + * recovers. It also marks the inode dirty, to catch the new size. + * + * Implication: the file must always be in a sane, consistent + * truncatable state while each transaction commits. + */ + if (ext4_orphan_add(handle, inode)) + goto out_stop; + + /* + * From here we block out all ext4_get_block() callers who want to + * modify the block allocation tree. + */ + down_write(&ei->i_data_sem); + + ext4_discard_preallocations(inode); + + /* + * The orphan list entry will now protect us from any crash which + * occurs before the truncate completes, so it is now safe to propagate + * the new, shorter inode size (held for now in i_size) into the + * on-disk inode. We do this via i_disksize, which is the value which + * ext4 *really* writes onto the disk inode. + */ + ei->i_disksize = inode->i_size; + + if (last_block == max_block) { + /* + * It is unnecessary to free any data blocks if last_block is + * equal to the indirect block limit. + */ + goto out_unlock; + } else if (n == 1) { /* direct blocks */ + ext4_free_data(handle, inode, NULL, i_data+offsets[0], + i_data + EXT4_NDIR_BLOCKS); + goto do_indirects; + } + + partial = ext4_find_shared(inode, n, offsets, chain, &nr); + /* Kill the top of shared branch (not detached) */ + if (nr) { + if (partial == chain) { + /* Shared branch grows from the inode */ + ext4_free_branches(handle, inode, NULL, + &nr, &nr+1, (chain+n-1) - partial); + *partial->p = 0; + /* + * We mark the inode dirty prior to restart, + * and prior to stop. No need for it here. + */ + } else { + /* Shared branch grows from an indirect block */ + BUFFER_TRACE(partial->bh, "get_write_access"); + ext4_free_branches(handle, inode, partial->bh, + partial->p, + partial->p+1, (chain+n-1) - partial); + } + } + /* Clear the ends of indirect blocks on the shared branch */ + while (partial > chain) { + ext4_free_branches(handle, inode, partial->bh, partial->p + 1, + (__le32*)partial->bh->b_data+addr_per_block, + (chain+n-1) - partial); + BUFFER_TRACE(partial->bh, "call brelse"); + brelse(partial->bh); + partial--; + } +do_indirects: + /* Kill the remaining (whole) subtrees */ + switch (offsets[0]) { + default: + nr = i_data[EXT4_IND_BLOCK]; + if (nr) { + ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); + i_data[EXT4_IND_BLOCK] = 0; + } + case EXT4_IND_BLOCK: + nr = i_data[EXT4_DIND_BLOCK]; + if (nr) { + ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); + i_data[EXT4_DIND_BLOCK] = 0; + } + case EXT4_DIND_BLOCK: + nr = i_data[EXT4_TIND_BLOCK]; + if (nr) { + ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); + i_data[EXT4_TIND_BLOCK] = 0; + } + case EXT4_TIND_BLOCK: + ; + } +out_unlock: + up_write(&ei->i_data_sem); + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); + ext4_mark_inode_dirty(handle, inode); + + /* + * In a multi-transaction truncate, we only make the final transaction + * synchronous + */ + if (IS_SYNC(inode)) + ext4_handle_sync(handle); +out_stop: + /* + * If this was a simple ftruncate(), and the file will remain alive + * then we need to clear up the orphan record which we created above. + * However, if this was a real unlink then we were called by + * ext4_delete_inode(), and we allow that function to clean up the + * orphan info for us. + */ + if (inode->i_nlink) + ext4_orphan_del(handle, inode); + + ext4_journal_stop(handle); trace_ext4_truncate_exit(inode); } @@ -3514,7 +5012,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) (S_ISLNK(inode->i_mode) && !ext4_inode_is_fast_symlink(inode))) { /* Validate block references which are part of inode */ - ret = ext4_ind_check_inode(inode); + ret = ext4_check_inode_blockref(inode); } if (ret) goto bad_inode; @@ -3961,10 +5459,34 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, return 0; } +static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, + int chunk) +{ + int indirects; + + /* if nrblocks are contiguous */ + if (chunk) { + /* + * With N contiguous data blocks, we need at most + * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, + * 2 dindirect blocks, and 1 tindirect block + */ + return DIV_ROUND_UP(nrblocks, + EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; + } + /* + * if nrblocks are not contiguous, worse case, each block touch + * a indirect block, and each indirect block touch a double indirect + * block, plus a triple indirect block + */ + indirects = nrblocks * 2 + 1; + return indirects; +} + static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) { if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) - return ext4_ind_trans_blocks(inode, nrblocks, chunk); + return ext4_indirect_trans_blocks(inode, nrblocks, chunk); return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); } diff --git a/trunk/fs/ext4/ioctl.c b/trunk/fs/ext4/ioctl.c index f18bfe37aff8..808c554e773f 100644 --- a/trunk/fs/ext4/ioctl.c +++ b/trunk/fs/ext4/ioctl.c @@ -202,9 +202,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct super_block *sb = inode->i_sb; int err, err2=0; - err = ext4_resize_begin(sb); - if (err) - return err; + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; if (get_user(n_blocks_count, (__u32 __user *)arg)) return -EFAULT; @@ -222,7 +221,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (err == 0) err = err2; mnt_drop_write(filp->f_path.mnt); - ext4_resize_end(sb); return err; } @@ -273,9 +271,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct super_block *sb = inode->i_sb; int err, err2=0; - err = ext4_resize_begin(sb); - if (err) - return err; + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, sizeof(input))) @@ -294,7 +291,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (err == 0) err = err2; mnt_drop_write(filp->f_path.mnt); - ext4_resize_end(sb); return err; } diff --git a/trunk/fs/ext4/mballoc.c b/trunk/fs/ext4/mballoc.c index 17a5a57c415a..6ed859d56850 100644 --- a/trunk/fs/ext4/mballoc.c +++ b/trunk/fs/ext4/mballoc.c @@ -75,8 +75,8 @@ * * The inode preallocation space is used looking at the _logical_ start * block. If only the logical file block falls within the range of prealloc - * space we will consume the particular prealloc space. This makes sure that - * we have contiguous physical blocks representing the file blocks + * space we will consume the particular prealloc space. This make sure that + * that the we have contiguous physical blocks representing the file blocks * * The important thing to be noted in case of inode prealloc space is that * we don't modify the values associated to inode prealloc space except @@ -84,7 +84,7 @@ * * If we are not able to find blocks in the inode prealloc space and if we * have the group allocation flag set then we look at the locality group - * prealloc space. These are per CPU prealloc list represented as + * prealloc space. These are per CPU prealloc list repreasented as * * ext4_sb_info.s_locality_groups[smp_processor_id()] * @@ -128,13 +128,12 @@ * we are doing a group prealloc we try to normalize the request to * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is * 512 blocks. This can be tuned via - * /sys/fs/ext4//mb_group_prealloc. The value is represented in + * /sys/fs/ext4/ option the group prealloc request is normalized to the - * the smallest multiple of the stripe value (sbi->s_stripe) which is - * greater than the default mb_group_prealloc. + * stripe value (sbi->s_stripe) * - * The regular allocator (using the buddy cache) supports a few tunables. + * The regular allocator(using the buddy cache) supports few tunables. * * /sys/fs/ext4//mb_min_to_scan * /sys/fs/ext4//mb_max_to_scan @@ -153,7 +152,7 @@ * best extent in the found extents. Searching for the blocks starts with * the group specified as the goal value in allocation context via * ac_g_ex. Each group is first checked based on the criteria whether it - * can be used for allocation. ext4_mb_good_group explains how the groups are + * can used for allocation. ext4_mb_good_group explains how the groups are * checked. * * Both the prealloc space are getting populated as above. So for the first @@ -493,11 +492,10 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) b2 = (unsigned char *) bitmap; for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { if (b1[i] != b2[i]) { - ext4_msg(e4b->bd_sb, KERN_ERR, - "corruption in group %u " - "at byte %u(%u): %x in copy != %x " - "on disk/prealloc", - e4b->bd_group, i, i * 8, b1[i], b2[i]); + printk(KERN_ERR "corruption in group %u " + "at byte %u(%u): %x in copy != %x " + "on disk/prealloc\n", + e4b->bd_group, i, i * 8, b1[i], b2[i]); BUG(); } } @@ -1127,7 +1125,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, grp = ext4_get_group_info(sb, group); e4b->bd_blkbits = sb->s_blocksize_bits; - e4b->bd_info = grp; + e4b->bd_info = ext4_get_group_info(sb, group); e4b->bd_sb = sb; e4b->bd_group = group; e4b->bd_buddy_page = NULL; @@ -1283,7 +1281,7 @@ static void mb_clear_bits(void *bm, int cur, int len) } } -void ext4_set_bits(void *bm, int cur, int len) +static void mb_set_bits(void *bm, int cur, int len) { __u32 *addr; @@ -1512,7 +1510,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) } mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); - ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); + mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); mb_check_buddy(e4b); return ret; @@ -2225,8 +2223,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, EXT4_DESC_PER_BLOCK_BITS(sb); meta_group_info = kmalloc(metalen, GFP_KERNEL); if (meta_group_info == NULL) { - ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem " - "for a buddy group"); + printk(KERN_ERR "EXT4-fs: can't allocate mem for a " + "buddy group\n"); goto exit_meta_group_info; } sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = @@ -2239,7 +2237,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); if (meta_group_info[i] == NULL) { - ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem"); + printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); goto exit_group_info; } memset(meta_group_info[i], 0, kmem_cache_size(cachep)); @@ -2281,10 +2279,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, exit_group_info: /* If a meta_group_info table has been allocated, release it now */ - if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { + if (group % EXT4_DESC_PER_BLOCK(sb) == 0) kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL; - } exit_meta_group_info: return -ENOMEM; } /* ext4_mb_add_groupinfo */ @@ -2332,26 +2328,23 @@ static int ext4_mb_init_backend(struct super_block *sb) /* An 8TB filesystem with 64-bit pointers requires a 4096 byte * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. * So a two level scheme suffices for now. */ - sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL); + sbi->s_group_info = kzalloc(array_size, GFP_KERNEL); if (sbi->s_group_info == NULL) { - ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); + printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); return -ENOMEM; } sbi->s_buddy_cache = new_inode(sb); if (sbi->s_buddy_cache == NULL) { - ext4_msg(sb, KERN_ERR, "can't get new inode"); + printk(KERN_ERR "EXT4-fs: can't get new inode\n"); goto err_freesgi; } - /* To avoid potentially colliding with an valid on-disk inode number, - * use EXT4_BAD_INO for the buddy cache inode number. This inode is - * not in the inode hash, so it should never be found by iget(), but - * this will avoid confusion if it ever shows up during debugging. */ - sbi->s_buddy_cache->i_ino = EXT4_BAD_INO; + sbi->s_buddy_cache->i_ino = get_next_ino(); EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; for (i = 0; i < ngroups; i++) { desc = ext4_get_group_desc(sb, i, NULL); if (desc == NULL) { - ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i); + printk(KERN_ERR + "EXT4-fs: can't read descriptor %u\n", i); goto err_freebuddy; } if (ext4_mb_add_groupinfo(sb, i, desc) != 0) @@ -2369,7 +2362,7 @@ static int ext4_mb_init_backend(struct super_block *sb) kfree(sbi->s_group_info[i]); iput(sbi->s_buddy_cache); err_freesgi: - ext4_kvfree(sbi->s_group_info); + kfree(sbi->s_group_info); return -ENOMEM; } @@ -2411,15 +2404,14 @@ static int ext4_groupinfo_create_slab(size_t size) slab_size, 0, SLAB_RECLAIM_ACCOUNT, NULL); - ext4_groupinfo_caches[cache_index] = cachep; - mutex_unlock(&ext4_grpinfo_slab_create_mutex); if (!cachep) { - printk(KERN_EMERG - "EXT4-fs: no memory for groupinfo slab cache\n"); + printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n"); return -ENOMEM; } + ext4_groupinfo_caches[cache_index] = cachep; + return 0; } @@ -2465,6 +2457,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) i++; } while (i <= sb->s_blocksize_bits + 1); + /* init file for buddy data */ + ret = ext4_mb_init_backend(sb); + if (ret != 0) { + goto out; + } + spin_lock_init(&sbi->s_md_lock); spin_lock_init(&sbi->s_bal_lock); @@ -2474,18 +2472,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; - /* - * If there is a s_stripe > 1, then we set the s_mb_group_prealloc - * to the lowest multiple of s_stripe which is bigger than - * the s_mb_group_prealloc as determined above. We want - * the preallocation size to be an exact multiple of the - * RAID stripe size so that preallocations don't fragment - * the stripes. - */ - if (sbi->s_stripe > 1) { - sbi->s_mb_group_prealloc = roundup( - sbi->s_mb_group_prealloc, sbi->s_stripe); - } sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); if (sbi->s_locality_groups == NULL) { @@ -2501,12 +2487,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) spin_lock_init(&lg->lg_prealloc_lock); } - /* init file for buddy data */ - ret = ext4_mb_init_backend(sb); - if (ret != 0) { - goto out; - } - if (sbi->s_proc) proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, &ext4_mb_seq_groups_fops, sb); @@ -2564,32 +2544,32 @@ int ext4_mb_release(struct super_block *sb) EXT4_DESC_PER_BLOCK_BITS(sb); for (i = 0; i < num_meta_group_infos; i++) kfree(sbi->s_group_info[i]); - ext4_kvfree(sbi->s_group_info); + kfree(sbi->s_group_info); } kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); if (sbi->s_buddy_cache) iput(sbi->s_buddy_cache); if (sbi->s_mb_stats) { - ext4_msg(sb, KERN_INFO, - "mballoc: %u blocks %u reqs (%u success)", + printk(KERN_INFO + "EXT4-fs: mballoc: %u blocks %u reqs (%u success)\n", atomic_read(&sbi->s_bal_allocated), atomic_read(&sbi->s_bal_reqs), atomic_read(&sbi->s_bal_success)); - ext4_msg(sb, KERN_INFO, - "mballoc: %u extents scanned, %u goal hits, " - "%u 2^N hits, %u breaks, %u lost", + printk(KERN_INFO + "EXT4-fs: mballoc: %u extents scanned, %u goal hits, " + "%u 2^N hits, %u breaks, %u lost\n", atomic_read(&sbi->s_bal_ex_scanned), atomic_read(&sbi->s_bal_goals), atomic_read(&sbi->s_bal_2orders), atomic_read(&sbi->s_bal_breaks), atomic_read(&sbi->s_mb_lost_chunks)); - ext4_msg(sb, KERN_INFO, - "mballoc: %lu generated and it took %Lu", - sbi->s_mb_buddies_generated, + printk(KERN_INFO + "EXT4-fs: mballoc: %lu generated and it took %Lu\n", + sbi->s_mb_buddies_generated++, sbi->s_mb_generation_time); - ext4_msg(sb, KERN_INFO, - "mballoc: %u preallocated, %u discarded", + printk(KERN_INFO + "EXT4-fs: mballoc: %u preallocated, %u discarded\n", atomic_read(&sbi->s_mb_preallocated), atomic_read(&sbi->s_mb_discarded)); } @@ -2648,15 +2628,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) rb_erase(&entry->node, &(db->bb_free_root)); mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); - /* - * Clear the trimmed flag for the group so that the next - * ext4_trim_fs can trim it. - * If the volume is mounted with -o discard, online discard - * is supported and the free blocks will be trimmed online. - */ - if (!test_opt(sb, DISCARD)) - EXT4_MB_GRP_CLEAR_TRIMMED(db); - if (!db->bb_free_root.rb_node) { /* No more items in the per group rb tree * balance refcounts from ext4_mb_free_metadata() @@ -2800,8 +2771,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, * We leak some of the blocks here. */ ext4_lock_group(sb, ac->ac_b_ex.fe_group); - ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, - ac->ac_b_ex.fe_len); + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, + ac->ac_b_ex.fe_len); ext4_unlock_group(sb, ac->ac_b_ex.fe_group); err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (!err) @@ -2819,8 +2790,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, } } #endif - ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, - ac->ac_b_ex.fe_len); + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len); if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_blks_set(sb, gdp, @@ -2860,9 +2830,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, /* * here we normalize request for locality group - * Group request are normalized to s_mb_group_prealloc, which goes to - * s_strip if we set the same via mount option. - * s_mb_group_prealloc can be configured via + * Group request are normalized to s_strip size if we set the same via mount + * option. If not we set it to s_mb_group_prealloc which can be configured via * /sys/fs/ext4//mb_group_prealloc * * XXX: should we try to preallocate more than the group has now? @@ -2873,7 +2842,10 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) struct ext4_locality_group *lg = ac->ac_lg; BUG_ON(lg == NULL); - ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; + if (EXT4_SB(sb)->s_stripe) + ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; + else + ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; mb_debug(1, "#%u: goal %u blocks for locality group\n", current->pid, ac->ac_g_ex.fe_len); } @@ -3029,10 +3001,9 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, if (start + size <= ac->ac_o_ex.fe_logical && start > ac->ac_o_ex.fe_logical) { - ext4_msg(ac->ac_sb, KERN_ERR, - "start %lu, size %lu, fe_logical %lu", - (unsigned long) start, (unsigned long) size, - (unsigned long) ac->ac_o_ex.fe_logical); + printk(KERN_ERR "start %lu, size %lu, fe_logical %lu\n", + (unsigned long) start, (unsigned long) size, + (unsigned long) ac->ac_o_ex.fe_logical); } BUG_ON(start + size <= ac->ac_o_ex.fe_logical && start > ac->ac_o_ex.fe_logical); @@ -3291,7 +3262,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, while (n) { entry = rb_entry(n, struct ext4_free_data, node); - ext4_set_bits(bitmap, entry->start_blk, entry->count); + mb_set_bits(bitmap, entry->start_blk, entry->count); n = rb_next(n); } return; @@ -3333,7 +3304,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, if (unlikely(len == 0)) continue; BUG_ON(groupnr != group); - ext4_set_bits(bitmap, start, len); + mb_set_bits(bitmap, start, len); preallocated += len; count++; } @@ -3613,11 +3584,10 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, bit = next + 1; } if (free != pa->pa_free) { - ext4_msg(e4b->bd_sb, KERN_CRIT, - "pa %p: logic %lu, phys. %lu, len %lu", - pa, (unsigned long) pa->pa_lstart, - (unsigned long) pa->pa_pstart, - (unsigned long) pa->pa_len); + printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n", + pa, (unsigned long) pa->pa_lstart, + (unsigned long) pa->pa_pstart, + (unsigned long) pa->pa_len); ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u", free, pa->pa_free); /* @@ -3805,8 +3775,7 @@ void ext4_discard_preallocations(struct inode *inode) * use preallocation while we're discarding it */ spin_unlock(&pa->pa_lock); spin_unlock(&ei->i_prealloc_lock); - ext4_msg(sb, KERN_ERR, - "uh-oh! used pa while discarding"); + printk(KERN_ERR "uh-oh! used pa while discarding\n"); WARN_ON(1); schedule_timeout_uninterruptible(HZ); goto repeat; @@ -3883,13 +3852,12 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) return; - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:" - " Allocation context details:"); - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d", + printk(KERN_ERR "EXT4-fs: Can't allocate:" + " Allocation context details:\n"); + printk(KERN_ERR "EXT4-fs: status %d flags %d\n", ac->ac_status, ac->ac_flags); - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, " - "goal %lu/%lu/%lu@%lu, " - "best %lu/%lu/%lu@%lu cr %d", + printk(KERN_ERR "EXT4-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, " + "best %lu/%lu/%lu@%lu cr %d\n", (unsigned long)ac->ac_o_ex.fe_group, (unsigned long)ac->ac_o_ex.fe_start, (unsigned long)ac->ac_o_ex.fe_len, @@ -3903,9 +3871,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) (unsigned long)ac->ac_b_ex.fe_len, (unsigned long)ac->ac_b_ex.fe_logical, (int)ac->ac_criteria); - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found", - ac->ac_ex_scanned, ac->ac_found); - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: "); + printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, + ac->ac_found); + printk(KERN_ERR "EXT4-fs: groups: \n"); ngroups = ext4_get_groups_count(sb); for (i = 0; i < ngroups; i++) { struct ext4_group_info *grp = ext4_get_group_info(sb, i); @@ -4669,7 +4637,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, } ext4_mark_super_dirty(sb); error_return: - if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) + if (freed) dquot_free_block(inode, freed); brelse(bitmap_bh); ext4_std_error(sb, err); @@ -4677,7 +4645,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, } /** - * ext4_group_add_blocks() -- Add given blocks to an existing group + * ext4_add_groupblocks() -- Add given blocks to an existing group * @handle: handle to this transaction * @sb: super block * @block: start physcial block to add to the block group @@ -4685,7 +4653,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, * * This marks the blocks as free in the bitmap and buddy. */ -int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, +void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count) { struct buffer_head *bitmap_bh = NULL; @@ -4698,35 +4666,25 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, struct ext4_buddy e4b; int err = 0, ret, blk_free_count; ext4_grpblk_t blocks_freed; + struct ext4_group_info *grp; ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); - if (count == 0) - return 0; - ext4_get_group_no_and_offset(sb, block, &block_group, &bit); + grp = ext4_get_group_info(sb, block_group); /* * Check to see if we are freeing blocks across a group * boundary. */ - if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { - ext4_warning(sb, "too much blocks added to group %u\n", - block_group); - err = -EINVAL; + if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) goto error_return; - } bitmap_bh = ext4_read_block_bitmap(sb, block_group); - if (!bitmap_bh) { - err = -EIO; + if (!bitmap_bh) goto error_return; - } - desc = ext4_get_group_desc(sb, block_group, &gd_bh); - if (!desc) { - err = -EIO; + if (!desc) goto error_return; - } if (in_range(ext4_block_bitmap(sb, desc), block, count) || in_range(ext4_inode_bitmap(sb, desc), block, count) || @@ -4736,7 +4694,6 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_error(sb, "Adding blocks in system zones - " "Block = %llu, count = %lu", block, count); - err = -EINVAL; goto error_return; } @@ -4805,7 +4762,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, error_return: brelse(bitmap_bh); ext4_std_error(sb, err); - return err; + return; } /** @@ -4825,8 +4782,6 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count, { struct ext4_free_extent ex; - trace_ext4_trim_extent(sb, group, start, count); - assert_spin_locked(ext4_group_lock_ptr(sb, group)); ex.fe_start = start; @@ -4847,7 +4802,7 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count, /** * ext4_trim_all_free -- function to trim all free space in alloc. group * @sb: super block for file system - * @group: group to be trimmed + * @e4b: ext4 buddy * @start: first group block to examine * @max: last group block to examine * @minblocks: minimum extent block count @@ -4868,12 +4823,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_grpblk_t minblocks) { void *bitmap; - ext4_grpblk_t next, count = 0, free_count = 0; + ext4_grpblk_t next, count = 0; struct ext4_buddy e4b; int ret; - trace_ext4_trim_all_free(sb, group, start, max); - ret = ext4_mb_load_buddy(sb, group, &e4b); if (ret) { ext4_error(sb, "Error in loading buddy " @@ -4883,10 +4836,6 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, bitmap = e4b.bd_bitmap; ext4_lock_group(sb, group); - if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) && - minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) - goto out; - start = (e4b.bd_info->bb_first_free > start) ? e4b.bd_info->bb_first_free : start; @@ -4901,7 +4850,6 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, next - start, group, &e4b); count += next - start; } - free_count += next - start; start = next + 1; if (fatal_signal_pending(current)) { @@ -4915,13 +4863,9 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_lock_group(sb, group); } - if ((e4b.bd_info->bb_free - free_count) < minblocks) + if ((e4b.bd_info->bb_free - count) < minblocks) break; } - - if (!ret) - EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); -out: ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); @@ -4960,8 +4904,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) return -EINVAL; - if (start + len <= first_data_blk) - goto out; if (start < first_data_blk) { len -= first_data_blk - start; start = first_data_blk; @@ -5010,9 +4952,5 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) } range->len = trimmed * sb->s_blocksize; - if (!ret) - atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); - -out: return ret; } diff --git a/trunk/fs/ext4/mballoc.h b/trunk/fs/ext4/mballoc.h index 9d4a636b546c..20b5e7bfebd1 100644 --- a/trunk/fs/ext4/mballoc.h +++ b/trunk/fs/ext4/mballoc.h @@ -187,6 +187,7 @@ struct ext4_allocation_context { __u16 ac_flags; /* allocation hints */ __u8 ac_status; __u8 ac_criteria; + __u8 ac_repeats; __u8 ac_2order; /* if request is to allocate 2^N blocks and * N > 0, the field stores N, otherwise 0 */ __u8 ac_op; /* operation, for history only */ diff --git a/trunk/fs/ext4/namei.c b/trunk/fs/ext4/namei.c index 565a154e22d4..8c9babac43dc 100644 --- a/trunk/fs/ext4/namei.c +++ b/trunk/fs/ext4/namei.c @@ -289,7 +289,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent while (len--) printk("%c", *name++); ext4fs_dirhash(de->name, de->name_len, &h); printk(":%x.%u ", h.hash, - (unsigned) ((char *) de - base)); + ((char *) de - base)); } space += EXT4_DIR_REC_LEN(de->name_len); names++; @@ -1013,7 +1013,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q *err = -ENOENT; errout: - dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name)); + dxtrace(printk(KERN_DEBUG "%s not found\n", name)); dx_release (frames); return NULL; } @@ -1985,11 +1985,18 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) if (!list_empty(&EXT4_I(inode)->i_orphan)) goto out_unlock; - /* - * Orphan handling is only valid for files with data blocks - * being truncated, or files being unlinked. Note that we either - * hold i_mutex, or the inode can not be referenced from outside, - * so i_nlink should not be bumped due to race + /* Orphan handling is only valid for files with data blocks + * being truncated, or files being unlinked. */ + + /* @@@ FIXME: Observation from aviro: + * I think I can trigger J_ASSERT in ext4_orphan_add(). We block + * here (on s_orphan_lock), so race with ext4_link() which might bump + * ->i_nlink. For, say it, character device. Not a regular file, + * not a directory, not a symlink and ->i_nlink > 0. + * + * tytso, 4/25/2009: I'm not sure how that could happen; + * shouldn't the fs core protect us from these sort of + * unlink()/link() races? */ J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); diff --git a/trunk/fs/ext4/page-io.c b/trunk/fs/ext4/page-io.c index 430c401d0895..7bb8f76d470a 100644 --- a/trunk/fs/ext4/page-io.c +++ b/trunk/fs/ext4/page-io.c @@ -285,7 +285,11 @@ static int io_submit_init(struct ext4_io_submit *io, io_end = ext4_init_io_end(inode, GFP_NOFS); if (!io_end) return -ENOMEM; - bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); + do { + bio = bio_alloc(GFP_NOIO, nvecs); + nvecs >>= 1; + } while (bio == NULL); + bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio->bi_bdev = bh->b_bdev; bio->bi_private = io->io_end = io_end; diff --git a/trunk/fs/ext4/resize.c b/trunk/fs/ext4/resize.c index 707d3f16f7ce..80bbc9c60c24 100644 --- a/trunk/fs/ext4/resize.c +++ b/trunk/fs/ext4/resize.c @@ -16,35 +16,6 @@ #include "ext4_jbd2.h" -int ext4_resize_begin(struct super_block *sb) -{ - int ret = 0; - - if (!capable(CAP_SYS_RESOURCE)) - return -EPERM; - - /* - * We are not allowed to do online-resizing on a filesystem mounted - * with error, because it can destroy the filesystem easily. - */ - if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { - ext4_warning(sb, "There are errors in the filesystem, " - "so online resizing is not allowed\n"); - return -EPERM; - } - - if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags)) - ret = -EBUSY; - - return ret; -} - -void ext4_resize_end(struct super_block *sb) -{ - clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags); - smp_mb__after_clear_bit(); -} - #define outside(b, first, last) ((b) < (first) || (b) >= (last)) #define inside(b, first, last) ((b) >= (first) && (b) < (last)) @@ -147,8 +118,10 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, brelse(bh); bh = ERR_PTR(err); } else { + lock_buffer(bh); memset(bh->b_data, 0, sb->s_blocksize); set_buffer_uptodate(bh); + unlock_buffer(bh); } return bh; @@ -159,7 +132,8 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, * If that fails, restart the transaction & regain write access for the * buffer head which is used for block_bitmap modifications. */ -static int extend_or_restart_transaction(handle_t *handle, int thresh) +static int extend_or_restart_transaction(handle_t *handle, int thresh, + struct buffer_head *bh) { int err; @@ -170,8 +144,9 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh) if (err < 0) return err; if (err) { - err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA); - if (err) + if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) + return err; + if ((err = ext4_journal_get_write_access(handle, bh))) return err; } @@ -206,7 +181,21 @@ static int setup_new_group_blocks(struct super_block *sb, if (IS_ERR(handle)) return PTR_ERR(handle); - BUG_ON(input->group != sbi->s_groups_count); + mutex_lock(&sbi->s_resize_lock); + if (input->group != sbi->s_groups_count) { + err = -EBUSY; + goto exit_journal; + } + + if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { + err = PTR_ERR(bh); + goto exit_journal; + } + + if (ext4_bg_has_super(sb, input->group)) { + ext4_debug("mark backup superblock %#04llx (+0)\n", start); + ext4_set_bit(0, bh->b_data); + } /* Copy all of the GDT blocks into the backup in this group */ for (i = 0, bit = 1, block = start + 1; @@ -214,26 +203,29 @@ static int setup_new_group_blocks(struct super_block *sb, struct buffer_head *gdb; ext4_debug("update backup group %#04llx (+%d)\n", block, bit); - err = extend_or_restart_transaction(handle, 1); - if (err) - goto exit_journal; + + if ((err = extend_or_restart_transaction(handle, 1, bh))) + goto exit_bh; gdb = sb_getblk(sb, block); if (!gdb) { err = -EIO; - goto exit_journal; + goto exit_bh; } if ((err = ext4_journal_get_write_access(handle, gdb))) { brelse(gdb); - goto exit_journal; + goto exit_bh; } + lock_buffer(gdb); memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); set_buffer_uptodate(gdb); + unlock_buffer(gdb); err = ext4_handle_dirty_metadata(handle, NULL, gdb); if (unlikely(err)) { brelse(gdb); - goto exit_journal; + goto exit_bh; } + ext4_set_bit(bit, bh->b_data); brelse(gdb); } @@ -243,22 +235,9 @@ static int setup_new_group_blocks(struct super_block *sb, err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, GFP_NOFS); if (err) - goto exit_journal; - - err = extend_or_restart_transaction(handle, 2); - if (err) - goto exit_journal; - - bh = bclean(handle, sb, input->block_bitmap); - if (IS_ERR(bh)) { - err = PTR_ERR(bh); - goto exit_journal; - } - - if (ext4_bg_has_super(sb, input->group)) { - ext4_debug("mark backup group tables %#04llx (+0)\n", start); - ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1); - } + goto exit_bh; + for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++) + ext4_set_bit(bit, bh->b_data); ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, input->block_bitmap - start); @@ -274,9 +253,12 @@ static int setup_new_group_blocks(struct super_block *sb, err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); if (err) goto exit_bh; - ext4_set_bits(bh->b_data, input->inode_table - start, - sbi->s_itb_per_group); + for (i = 0, bit = input->inode_table - start; + i < sbi->s_itb_per_group; i++, bit++) + ext4_set_bit(bit, bh->b_data); + if ((err = extend_or_restart_transaction(handle, 2, bh))) + goto exit_bh; ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data); @@ -303,6 +285,7 @@ static int setup_new_group_blocks(struct super_block *sb, brelse(bh); exit_journal: + mutex_unlock(&sbi->s_resize_lock); if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; @@ -394,15 +377,15 @@ static int verify_reserved_gdb(struct super_block *sb, * fail once we start modifying the data on disk, because JBD has no rollback. */ static int add_new_gdb(handle_t *handle, struct inode *inode, - ext4_group_t group) + struct ext4_new_group_data *input, + struct buffer_head **primary) { struct super_block *sb = inode->i_sb; struct ext4_super_block *es = EXT4_SB(sb)->s_es; - unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); + unsigned long gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; struct buffer_head **o_group_desc, **n_group_desc; struct buffer_head *dind; - struct buffer_head *gdb_bh; int gdbackups; struct ext4_iloc iloc; __le32 *data; @@ -425,12 +408,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, return -EPERM; } - gdb_bh = sb_bread(sb, gdblock); - if (!gdb_bh) + *primary = sb_bread(sb, gdblock); + if (!*primary) return -EIO; - gdbackups = verify_reserved_gdb(sb, gdb_bh); - if (gdbackups < 0) { + if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) { err = gdbackups; goto exit_bh; } @@ -445,7 +427,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, data = (__le32 *)dind->b_data; if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { ext4_warning(sb, "new group %u GDT block %llu not reserved", - group, gdblock); + input->group, gdblock); err = -EINVAL; goto exit_dind; } @@ -454,7 +436,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, if (unlikely(err)) goto exit_dind; - err = ext4_journal_get_write_access(handle, gdb_bh); + err = ext4_journal_get_write_access(handle, *primary); if (unlikely(err)) goto exit_sbh; @@ -467,13 +449,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, if (unlikely(err)) goto exit_dindj; - n_group_desc = ext4_kvmalloc((gdb_num + 1) * - sizeof(struct buffer_head *), - GFP_NOFS); + n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), + GFP_NOFS); if (!n_group_desc) { err = -ENOMEM; - ext4_warning(sb, "not enough memory for %lu groups", - gdb_num + 1); + ext4_warning(sb, + "not enough memory for %lu groups", gdb_num + 1); goto exit_inode; } @@ -494,8 +475,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, } inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; ext4_mark_iloc_dirty(handle, inode, &iloc); - memset(gdb_bh->b_data, 0, sb->s_blocksize); - err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); + memset((*primary)->b_data, 0, sb->s_blocksize); + err = ext4_handle_dirty_metadata(handle, NULL, *primary); if (unlikely(err)) { ext4_std_error(sb, err); goto exit_inode; @@ -505,10 +486,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, o_group_desc = EXT4_SB(sb)->s_group_desc; memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); - n_group_desc[gdb_num] = gdb_bh; + n_group_desc[gdb_num] = *primary; EXT4_SB(sb)->s_group_desc = n_group_desc; EXT4_SB(sb)->s_gdb_count++; - ext4_kvfree(o_group_desc); + kfree(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); @@ -518,7 +499,6 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, return err; exit_inode: - ext4_kvfree(n_group_desc); /* ext4_handle_release_buffer(handle, iloc.bh); */ brelse(iloc.bh); exit_dindj: @@ -528,7 +508,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, exit_dind: brelse(dind); exit_bh: - brelse(gdb_bh); + brelse(*primary); ext4_debug("leaving with error %d\n", err); return err; @@ -548,7 +528,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, * backup GDT blocks are stored in their reserved primary GDT block. */ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, - ext4_group_t group) + struct ext4_new_group_data *input) { struct super_block *sb = inode->i_sb; int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); @@ -619,7 +599,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, * Finally we can add each of the reserved backup GDT blocks from * the new group to its reserved primary GDT block. */ - blk = group * EXT4_BLOCKS_PER_GROUP(sb); + blk = input->group * EXT4_BLOCKS_PER_GROUP(sb); for (i = 0; i < reserved_gdb; i++) { int err2; data = (__le32 *)primary[i]->b_data; @@ -819,6 +799,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) goto exit_put; } + mutex_lock(&sbi->s_resize_lock); + if (input->group != sbi->s_groups_count) { + ext4_warning(sb, "multiple resizers run on filesystem!"); + err = -EBUSY; + goto exit_journal; + } + if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) goto exit_journal; @@ -833,25 +820,16 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if ((err = ext4_journal_get_write_access(handle, primary))) goto exit_journal; - if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) { - err = reserve_backup_gdb(handle, inode, input->group); - if (err) - goto exit_journal; - } - } else { - /* - * Note that we can access new group descriptor block safely - * only if add_new_gdb() succeeds. - */ - err = add_new_gdb(handle, inode, input->group); - if (err) + if (reserved_gdb && ext4_bg_num_gdb(sb, input->group) && + (err = reserve_backup_gdb(handle, inode, input))) goto exit_journal; - primary = sbi->s_group_desc[gdb_num]; - } + } else if ((err = add_new_gdb(handle, inode, input, &primary))) + goto exit_journal; /* * OK, now we've set up the new group. Time to make it active. * + * We do not lock all allocations via s_resize_lock * so we have to be safe wrt. concurrent accesses the group * data. So we need to be careful to set all of the relevant * group descriptor data etc. *before* we enable the group. @@ -908,9 +886,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) * * The precise rules we use are: * + * * Writers of s_groups_count *must* hold s_resize_lock + * AND * * Writers must perform a smp_wmb() after updating all dependent * data and before modifying the groups count * + * * Readers must hold s_resize_lock over the access + * OR * * Readers must perform an smp_rmb() after reading the groups count * and before reading any dependent data. * @@ -955,9 +937,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ext4_handle_dirty_super(handle, sb); exit_journal: + mutex_unlock(&sbi->s_resize_lock); if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; - if (!err && primary) { + if (!err) { update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, sizeof(struct ext4_super_block)); update_backups(sb, primary->b_blocknr, primary->b_data, @@ -986,13 +969,16 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_grpblk_t add; struct buffer_head *bh; handle_t *handle; - int err, err2; + int err; ext4_group_t group; + /* We don't need to worry about locking wrt other resizers just + * yet: we're going to revalidate es->s_blocks_count after + * taking the s_resize_lock below. */ o_blocks_count = ext4_blocks_count(es); if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n", + printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n", o_blocks_count, n_blocks_count); if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) @@ -1009,7 +995,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, if (n_blocks_count < o_blocks_count) { ext4_warning(sb, "can't shrink FS - resize aborted"); - return -EINVAL; + return -EBUSY; } /* Handle the remaining blocks in the last group only. */ @@ -1052,25 +1038,32 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, goto exit_put; } + mutex_lock(&EXT4_SB(sb)->s_resize_lock); + if (o_blocks_count != ext4_blocks_count(es)) { + ext4_warning(sb, "multiple resizers run on filesystem!"); + mutex_unlock(&EXT4_SB(sb)->s_resize_lock); + ext4_journal_stop(handle); + err = -EBUSY; + goto exit_put; + } + if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh))) { ext4_warning(sb, "error %d on journal write access", err); + mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_journal_stop(handle); goto exit_put; } ext4_blocks_count_set(es, o_blocks_count + add); + mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ - err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); + ext4_add_groupblocks(handle, sb, o_blocks_count, add); ext4_handle_dirty_super(handle, sb); ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); - err2 = ext4_journal_stop(handle); - if (!err && err2) - err = err2; - - if (err) + if ((err = ext4_journal_stop(handle))) goto exit_put; if (test_opt(sb, DEBUG)) diff --git a/trunk/fs/ext4/super.c b/trunk/fs/ext4/super.c index e2d88baf91d3..9ea71aa864b3 100644 --- a/trunk/fs/ext4/super.c +++ b/trunk/fs/ext4/super.c @@ -110,35 +110,6 @@ static struct file_system_type ext3_fs_type = { #define IS_EXT3_SB(sb) (0) #endif -void *ext4_kvmalloc(size_t size, gfp_t flags) -{ - void *ret; - - ret = kmalloc(size, flags); - if (!ret) - ret = __vmalloc(size, flags, PAGE_KERNEL); - return ret; -} - -void *ext4_kvzalloc(size_t size, gfp_t flags) -{ - void *ret; - - ret = kmalloc(size, flags); - if (!ret) - ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); - return ret; -} - -void ext4_kvfree(void *ptr) -{ - if (is_vmalloc_addr(ptr)) - vfree(ptr); - else - kfree(ptr); - -} - ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, struct ext4_group_desc *bg) { @@ -298,7 +269,6 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) journal_t *journal; handle_t *handle; - trace_ext4_journal_start(sb, nblocks, _RET_IP_); if (sb->s_flags & MS_RDONLY) return ERR_PTR(-EROFS); @@ -819,8 +789,11 @@ static void ext4_put_super(struct super_block *sb) for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); - ext4_kvfree(sbi->s_group_desc); - ext4_kvfree(sbi->s_flex_groups); + kfree(sbi->s_group_desc); + if (is_vmalloc_addr(sbi->s_flex_groups)) + vfree(sbi->s_flex_groups); + else + kfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); @@ -2003,11 +1976,15 @@ static int ext4_fill_flex_info(struct super_block *sb) ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; size = flex_group_count * sizeof(struct flex_groups); - sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL); + sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); if (sbi->s_flex_groups == NULL) { - ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups", - flex_group_count); - goto failed; + sbi->s_flex_groups = vzalloc(size); + if (sbi->s_flex_groups == NULL) { + ext4_msg(sb, KERN_ERR, + "not enough memory for %u flex groups", + flex_group_count); + goto failed; + } } for (i = 0; i < sbi->s_groups_count; i++) { @@ -2406,25 +2383,17 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); unsigned long stripe_width = le32_to_cpu(sbi->s_es->s_raid_stripe_width); - int ret; if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) - ret = sbi->s_stripe; - else if (stripe_width <= sbi->s_blocks_per_group) - ret = stripe_width; - else if (stride <= sbi->s_blocks_per_group) - ret = stride; - else - ret = 0; + return sbi->s_stripe; - /* - * If the stripe width is 1, this makes no sense and - * we set it to 0 to turn off stripe handling code. - */ - if (ret <= 1) - ret = 0; + if (stripe_width <= sbi->s_blocks_per_group) + return stripe_width; - return ret; + if (stride <= sbi->s_blocks_per_group) + return stride; + + return 0; } /* sysfs supprt */ @@ -3439,9 +3408,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); - sbi->s_group_desc = ext4_kvmalloc(db_count * - sizeof(struct buffer_head *), - GFP_KERNEL); + sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), + GFP_KERNEL); if (sbi->s_group_desc == NULL) { ext4_msg(sb, KERN_ERR, "not enough memory"); goto failed_mount; @@ -3523,7 +3491,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ mutex_init(&sbi->s_orphan_lock); - sbi->s_resize_flags = 0; + mutex_init(&sbi->s_resize_lock); sb->s_root = NULL; @@ -3773,8 +3741,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } failed_mount3: del_timer(&sbi->s_err_report); - if (sbi->s_flex_groups) - ext4_kvfree(sbi->s_flex_groups); + if (sbi->s_flex_groups) { + if (is_vmalloc_addr(sbi->s_flex_groups)) + vfree(sbi->s_flex_groups); + else + kfree(sbi->s_flex_groups); + } percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); @@ -3784,7 +3756,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); - ext4_kvfree(sbi->s_group_desc); + kfree(sbi->s_group_desc); failed_mount: if (sbi->s_proc) { remove_proc_entry(sb->s_id, ext4_proc_root); diff --git a/trunk/fs/ext4/truncate.h b/trunk/fs/ext4/truncate.h deleted file mode 100644 index 011ba6670d99..000000000000 --- a/trunk/fs/ext4/truncate.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * linux/fs/ext4/truncate.h - * - * Common inline functions needed for truncate support - */ - -/* - * Truncate blocks that were not used by write. We have to truncate the - * pagecache as well so that corresponding buffers get properly unmapped. - */ -static inline void ext4_truncate_failed_write(struct inode *inode) -{ - truncate_inode_pages(inode->i_mapping, inode->i_size); - ext4_truncate(inode); -} - -/* - * Work out how many blocks we need to proceed with the next chunk of a - * truncate transaction. - */ -static inline unsigned long ext4_blocks_for_truncate(struct inode *inode) -{ - ext4_lblk_t needed; - - needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); - - /* Give ourselves just enough room to cope with inodes in which - * i_blocks is corrupt: we've seen disk corruptions in the past - * which resulted in random data in an inode which looked enough - * like a regular file for ext4 to try to delete it. Things - * will go a bit crazy if that happens, but at least we should - * try not to panic the whole kernel. */ - if (needed < 2) - needed = 2; - - /* But we need to bound the transaction so we don't overflow the - * journal. */ - if (needed > EXT4_MAX_TRANS_DATA) - needed = EXT4_MAX_TRANS_DATA; - - return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed; -} - diff --git a/trunk/fs/generic_acl.c b/trunk/fs/generic_acl.c index d0dddaceac59..d5e33a077a67 100644 --- a/trunk/fs/generic_acl.c +++ b/trunk/fs/generic_acl.c @@ -82,14 +82,18 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value, return PTR_ERR(acl); } if (acl) { + mode_t mode; + error = posix_acl_valid(acl); if (error) goto failed; switch (type) { case ACL_TYPE_ACCESS: - error = posix_acl_equiv_mode(acl, &inode->i_mode); + mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); if (error < 0) goto failed; + inode->i_mode = mode; inode->i_ctime = CURRENT_TIME; if (error == 0) { posix_acl_release(acl); @@ -121,20 +125,21 @@ int generic_acl_init(struct inode *inode, struct inode *dir) { struct posix_acl *acl = NULL; + mode_t mode = inode->i_mode; int error; + inode->i_mode = mode & ~current_umask(); if (!S_ISLNK(inode->i_mode)) acl = get_cached_acl(dir, ACL_TYPE_DEFAULT); if (acl) { if (S_ISDIR(inode->i_mode)) set_cached_acl(inode, ACL_TYPE_DEFAULT, acl); - error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); + error = posix_acl_create(&acl, GFP_KERNEL, &mode); if (error < 0) return error; + inode->i_mode = mode; if (error > 0) set_cached_acl(inode, ACL_TYPE_ACCESS, acl); - } else { - inode->i_mode &= ~current_umask(); } error = 0; diff --git a/trunk/fs/gfs2/acl.c b/trunk/fs/gfs2/acl.c index 34501b64bc47..884c9af0542f 100644 --- a/trunk/fs/gfs2/acl.c +++ b/trunk/fs/gfs2/acl.c @@ -72,7 +72,7 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type) return gfs2_acl_get(GFS2_I(inode), type); } -static int gfs2_set_mode(struct inode *inode, umode_t mode) +static int gfs2_set_mode(struct inode *inode, mode_t mode) { int error = 0; @@ -117,7 +117,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct posix_acl *acl; - umode_t mode = inode->i_mode; + mode_t mode = inode->i_mode; int error = 0; if (!sdp->sd_args.ar_posix_acl) @@ -276,7 +276,7 @@ static int gfs2_xattr_system_set(struct dentry *dentry, const char *name, goto out_release; if (type == ACL_TYPE_ACCESS) { - umode_t mode = inode->i_mode; + mode_t mode = inode->i_mode; error = posix_acl_equiv_mode(acl, &mode); if (error <= 0) { diff --git a/trunk/fs/hppfs/hppfs.c b/trunk/fs/hppfs/hppfs.c index 970ea987b3f6..8635be5ffd97 100644 --- a/trunk/fs/hppfs/hppfs.c +++ b/trunk/fs/hppfs/hppfs.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include "os.h" diff --git a/trunk/fs/inode.c b/trunk/fs/inode.c index 5aab80dc008c..d0c72ff6b30e 100644 --- a/trunk/fs/inode.c +++ b/trunk/fs/inode.c @@ -399,12 +399,12 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval) EXPORT_SYMBOL(__insert_inode_hash); /** - * __remove_inode_hash - remove an inode from the hash + * remove_inode_hash - remove an inode from the hash * @inode: inode to unhash * * Remove an inode from the superblock. */ -void __remove_inode_hash(struct inode *inode) +void remove_inode_hash(struct inode *inode) { spin_lock(&inode_hash_lock); spin_lock(&inode->i_lock); @@ -412,7 +412,7 @@ void __remove_inode_hash(struct inode *inode) spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); } -EXPORT_SYMBOL(__remove_inode_hash); +EXPORT_SYMBOL(remove_inode_hash); void end_writeback(struct inode *inode) { @@ -454,9 +454,7 @@ static void evict(struct inode *inode) BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(!list_empty(&inode->i_lru)); - if (!list_empty(&inode->i_wb_list)) - inode_wb_list_del(inode); - + inode_wb_list_del(inode); inode_sb_list_del(inode); if (op->evict_inode) { @@ -1330,8 +1328,7 @@ static void iput_final(struct inode *inode) } inode->i_state |= I_FREEING; - if (!list_empty(&inode->i_lru)) - inode_lru_list_del(inode); + inode_lru_list_del(inode); spin_unlock(&inode->i_lock); evict(inode); diff --git a/trunk/fs/jbd2/checkpoint.c b/trunk/fs/jbd2/checkpoint.c index 16a698bd906d..2c62c5aae82f 100644 --- a/trunk/fs/jbd2/checkpoint.c +++ b/trunk/fs/jbd2/checkpoint.c @@ -257,12 +257,9 @@ static void __flush_batch(journal_t *journal, int *batch_count) { int i; - struct blk_plug plug; - blk_start_plug(&plug); for (i = 0; i < *batch_count; i++) - write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE_SYNC); - blk_finish_plug(&plug); + write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE); for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = journal->j_chkpt_bhs[i]; diff --git a/trunk/fs/jbd2/journal.c b/trunk/fs/jbd2/journal.c index f24df13adc4e..0dfa5b598e68 100644 --- a/trunk/fs/jbd2/journal.c +++ b/trunk/fs/jbd2/journal.c @@ -2390,6 +2390,73 @@ static void __exit journal_exit(void) jbd2_journal_destroy_caches(); } +/* + * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 + * tracing infrastructure to map a dev_t to a device name. + * + * The caller should use rcu_read_lock() in order to make sure the + * device name stays valid until its done with it. We use + * rcu_read_lock() as well to make sure we're safe in case the caller + * gets sloppy, and because rcu_read_lock() is cheap and can be safely + * nested. + */ +struct devname_cache { + struct rcu_head rcu; + dev_t device; + char devname[BDEVNAME_SIZE]; +}; +#define CACHE_SIZE_BITS 6 +static struct devname_cache *devcache[1 << CACHE_SIZE_BITS]; +static DEFINE_SPINLOCK(devname_cache_lock); + +static void free_devcache(struct rcu_head *rcu) +{ + kfree(rcu); +} + +const char *jbd2_dev_to_name(dev_t device) +{ + int i = hash_32(device, CACHE_SIZE_BITS); + char *ret; + struct block_device *bd; + static struct devname_cache *new_dev; + + rcu_read_lock(); + if (devcache[i] && devcache[i]->device == device) { + ret = devcache[i]->devname; + rcu_read_unlock(); + return ret; + } + rcu_read_unlock(); + + new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); + if (!new_dev) + return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ + bd = bdget(device); + spin_lock(&devname_cache_lock); + if (devcache[i]) { + if (devcache[i]->device == device) { + kfree(new_dev); + bdput(bd); + ret = devcache[i]->devname; + spin_unlock(&devname_cache_lock); + return ret; + } + call_rcu(&devcache[i]->rcu, free_devcache); + } + devcache[i] = new_dev; + devcache[i]->device = device; + if (bd) { + bdevname(bd, devcache[i]->devname); + bdput(bd); + } else + __bdevname(device, devcache[i]->devname); + ret = devcache[i]->devname; + spin_unlock(&devname_cache_lock); + return ret; +} +EXPORT_SYMBOL(jbd2_dev_to_name); + MODULE_LICENSE("GPL"); module_init(journal_init); module_exit(journal_exit); diff --git a/trunk/fs/jffs2/acl.c b/trunk/fs/jffs2/acl.c index 926d02068a14..27c511a1cf05 100644 --- a/trunk/fs/jffs2/acl.c +++ b/trunk/fs/jffs2/acl.c @@ -227,7 +227,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) case ACL_TYPE_ACCESS: xprefix = JFFS2_XPREFIX_ACL_ACCESS; if (acl) { - umode_t mode = inode->i_mode; + mode_t mode = inode->i_mode; rc = posix_acl_equiv_mode(acl, &mode); if (rc < 0) return rc; @@ -259,7 +259,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) return rc; } -int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, umode_t *i_mode) +int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, mode_t *i_mode) { struct posix_acl *acl; int rc; diff --git a/trunk/fs/jffs2/acl.h b/trunk/fs/jffs2/acl.h index 9b477246f2a6..b3421c78d9f8 100644 --- a/trunk/fs/jffs2/acl.h +++ b/trunk/fs/jffs2/acl.h @@ -28,7 +28,7 @@ struct jffs2_acl_header { struct posix_acl *jffs2_get_acl(struct inode *inode, int type); extern int jffs2_acl_chmod(struct inode *); -extern int jffs2_init_acl_pre(struct inode *, struct inode *, umode_t *); +extern int jffs2_init_acl_pre(struct inode *, struct inode *, mode_t *); extern int jffs2_init_acl_post(struct inode *); extern const struct xattr_handler jffs2_acl_access_xattr_handler; diff --git a/trunk/fs/jffs2/fs.c b/trunk/fs/jffs2/fs.c index bbcb9755dd2b..b81b35ddf4e4 100644 --- a/trunk/fs/jffs2/fs.c +++ b/trunk/fs/jffs2/fs.c @@ -406,7 +406,7 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data) /* jffs2_new_inode: allocate a new inode and inocache, add it to the hash, fill in the raw_inode while you're at it. */ -struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_raw_inode *ri) +struct inode *jffs2_new_inode (struct inode *dir_i, mode_t mode, struct jffs2_raw_inode *ri) { struct inode *inode; struct super_block *sb = dir_i->i_sb; diff --git a/trunk/fs/jffs2/os-linux.h b/trunk/fs/jffs2/os-linux.h index 6c1755c59c0f..526979c607b6 100644 --- a/trunk/fs/jffs2/os-linux.h +++ b/trunk/fs/jffs2/os-linux.h @@ -173,7 +173,7 @@ int jffs2_do_setattr (struct inode *, struct iattr *); struct inode *jffs2_iget(struct super_block *, unsigned long); void jffs2_evict_inode (struct inode *); void jffs2_dirty_inode(struct inode *inode, int flags); -struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, +struct inode *jffs2_new_inode (struct inode *dir_i, mode_t mode, struct jffs2_raw_inode *ri); int jffs2_statfs (struct dentry *, struct kstatfs *); int jffs2_remount_fs (struct super_block *, int *, char *); diff --git a/trunk/fs/jfs/acl.c b/trunk/fs/jfs/acl.c index 45559dc3ea2f..b3a32caf2b45 100644 --- a/trunk/fs/jfs/acl.c +++ b/trunk/fs/jfs/acl.c @@ -127,14 +127,16 @@ int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) return PTR_ERR(acl); if (acl) { + mode_t mode = inode->i_mode; if (S_ISDIR(inode->i_mode)) { rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl); if (rc) goto cleanup; } - rc = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); + rc = posix_acl_create(&acl, GFP_KERNEL, &mode); if (rc < 0) goto cleanup; /* posix_acl_release(NULL) is no-op */ + inode->i_mode = mode; if (rc > 0) rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl); cleanup: diff --git a/trunk/fs/jfs/xattr.c b/trunk/fs/jfs/xattr.c index e87fedef23db..24838f1eeee5 100644 --- a/trunk/fs/jfs/xattr.c +++ b/trunk/fs/jfs/xattr.c @@ -693,7 +693,8 @@ static int can_set_system_xattr(struct inode *inode, const char *name, return rc; } if (acl) { - rc = posix_acl_equiv_mode(acl, &inode->i_mode); + mode_t mode = inode->i_mode; + rc = posix_acl_equiv_mode(acl, &mode); posix_acl_release(acl); if (rc < 0) { printk(KERN_ERR @@ -701,6 +702,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, rc); return rc; } + inode->i_mode = mode; mark_inode_dirty(inode); } /* diff --git a/trunk/fs/namei.c b/trunk/fs/namei.c index 445fd5da11fa..f8c69d373793 100644 --- a/trunk/fs/namei.c +++ b/trunk/fs/namei.c @@ -716,25 +716,19 @@ static int follow_automount(struct path *path, unsigned flags, if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT)) return -EISDIR; /* we actually want to stop here */ - /* + /* We want to mount if someone is trying to open/create a file of any + * type under the mountpoint, wants to traverse through the mountpoint + * or wants to open the mounted directory. + * * We don't want to mount if someone's just doing a stat and they've * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and * appended a '/' to the name. */ - if (!(flags & LOOKUP_FOLLOW)) { - /* We do, however, want to mount if someone wants to open or - * create a file of any type under the mountpoint, wants to - * traverse through the mountpoint or wants to open the mounted - * directory. - * Also, autofs may mark negative dentries as being automount - * points. These will need the attentions of the daemon to - * instantiate them before they can be used. - */ - if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | - LOOKUP_OPEN | LOOKUP_CREATE)) && - path->dentry->d_inode) - return -EISDIR; - } + if (!(flags & LOOKUP_FOLLOW) && + !(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | + LOOKUP_OPEN | LOOKUP_CREATE))) + return -EISDIR; + current->total_link_count++; if (current->total_link_count >= 40) return -ELOOP; diff --git a/trunk/fs/nfs/nfs3acl.c b/trunk/fs/nfs/nfs3acl.c index 7ef23979896d..e49e73107e62 100644 --- a/trunk/fs/nfs/nfs3acl.c +++ b/trunk/fs/nfs/nfs3acl.c @@ -415,7 +415,7 @@ int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl) } int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, - umode_t mode) + mode_t mode) { struct posix_acl *dfacl, *acl; int error = 0; diff --git a/trunk/fs/nfs/nfs3proc.c b/trunk/fs/nfs/nfs3proc.c index 85f1690ca08c..38053d823eb0 100644 --- a/trunk/fs/nfs/nfs3proc.c +++ b/trunk/fs/nfs/nfs3proc.c @@ -316,7 +316,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags, struct nfs_open_context *ctx) { struct nfs3_createdata *data; - umode_t mode = sattr->ia_mode; + mode_t mode = sattr->ia_mode; int status = -ENOMEM; dprintk("NFS call create %s\n", dentry->d_name.name); @@ -562,7 +562,7 @@ static int nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) { struct nfs3_createdata *data; - umode_t mode = sattr->ia_mode; + int mode = sattr->ia_mode; int status = -ENOMEM; dprintk("NFS call mkdir %s\n", dentry->d_name.name); @@ -681,7 +681,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dev_t rdev) { struct nfs3_createdata *data; - umode_t mode = sattr->ia_mode; + mode_t mode = sattr->ia_mode; int status = -ENOMEM; dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, diff --git a/trunk/fs/ocfs2/acl.c b/trunk/fs/ocfs2/acl.c index a7219075b4de..783c58d9daf1 100644 --- a/trunk/fs/ocfs2/acl.c +++ b/trunk/fs/ocfs2/acl.c @@ -247,7 +247,7 @@ static int ocfs2_set_acl(handle_t *handle, case ACL_TYPE_ACCESS: name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - umode_t mode = inode->i_mode; + mode_t mode = inode->i_mode; ret = posix_acl_equiv_mode(acl, &mode); if (ret < 0) return ret; @@ -351,7 +351,7 @@ int ocfs2_init_acl(handle_t *handle, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct posix_acl *acl = NULL; int ret = 0, ret2; - umode_t mode; + mode_t mode; if (!S_ISLNK(inode->i_mode)) { if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { diff --git a/trunk/fs/posix_acl.c b/trunk/fs/posix_acl.c index 10027b42b7e2..d43729a760e2 100644 --- a/trunk/fs/posix_acl.c +++ b/trunk/fs/posix_acl.c @@ -149,10 +149,10 @@ posix_acl_valid(const struct posix_acl *acl) * file mode permission bits, or else 1. Returns -E... on error. */ int -posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p) +posix_acl_equiv_mode(const struct posix_acl *acl, mode_t *mode_p) { const struct posix_acl_entry *pa, *pe; - umode_t mode = 0; + mode_t mode = 0; int not_equiv = 0; FOREACH_ACL_ENTRY(pa, acl, pe) { @@ -188,7 +188,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p) * Create an ACL representing the file mode permission bits of an inode. */ struct posix_acl * -posix_acl_from_mode(umode_t mode, gfp_t flags) +posix_acl_from_mode(mode_t mode, gfp_t flags) { struct posix_acl *acl = posix_acl_alloc(3, flags); if (!acl) @@ -279,11 +279,11 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want) * system calls. All permissions that are not granted by the acl are removed. * The permissions in the acl are changed to reflect the mode_p parameter. */ -static int posix_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) +static int posix_acl_create_masq(struct posix_acl *acl, mode_t *mode_p) { struct posix_acl_entry *pa, *pe; struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; - umode_t mode = *mode_p; + mode_t mode = *mode_p; int not_equiv = 0; /* assert(atomic_read(acl->a_refcount) == 1); */ @@ -336,7 +336,7 @@ static int posix_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) /* * Modify the ACL for the chmod syscall. */ -static int posix_acl_chmod_masq(struct posix_acl *acl, umode_t mode) +static int posix_acl_chmod_masq(struct posix_acl *acl, mode_t mode) { struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; struct posix_acl_entry *pa, *pe; @@ -382,7 +382,7 @@ static int posix_acl_chmod_masq(struct posix_acl *acl, umode_t mode) } int -posix_acl_create(struct posix_acl **acl, gfp_t gfp, umode_t *mode_p) +posix_acl_create(struct posix_acl **acl, gfp_t gfp, mode_t *mode_p) { struct posix_acl *clone = posix_acl_clone(*acl, gfp); int err = -ENOMEM; @@ -400,7 +400,7 @@ posix_acl_create(struct posix_acl **acl, gfp_t gfp, umode_t *mode_p) EXPORT_SYMBOL(posix_acl_create); int -posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, umode_t mode) +posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, mode_t mode) { struct posix_acl *clone = posix_acl_clone(*acl, gfp); int err = -ENOMEM; diff --git a/trunk/fs/pstore/inode.c b/trunk/fs/pstore/inode.c index 893b961dcfd8..977ed2723845 100644 --- a/trunk/fs/pstore/inode.c +++ b/trunk/fs/pstore/inode.c @@ -39,9 +39,8 @@ #define PSTORE_NAMELEN 64 struct pstore_private { - struct pstore_info *psi; - enum pstore_type_id type; u64 id; + int (*erase)(u64); ssize_t size; char data[]; }; @@ -74,7 +73,7 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) { struct pstore_private *p = dentry->d_inode->i_private; - p->psi->erase(p->type, p->id, p->psi); + p->erase(p->id); return simple_unlink(dir, dentry); } @@ -176,8 +175,8 @@ int pstore_is_mounted(void) * Set the mtime & ctime to the date that this record was originally stored. */ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, - char *data, size_t size, struct timespec time, - struct pstore_info *psi) + char *data, size_t size, + struct timespec time, int (*erase)(u64)) { struct dentry *root = pstore_sb->s_root; struct dentry *dentry; @@ -193,9 +192,8 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, private = kmalloc(sizeof *private + size, GFP_KERNEL); if (!private) goto fail_alloc; - private->type = type; private->id = id; - private->psi = psi; + private->erase = erase; switch (type) { case PSTORE_TYPE_DMESG: diff --git a/trunk/fs/pstore/internal.h b/trunk/fs/pstore/internal.h index 611c1b3c46fa..8c9f23eb1645 100644 --- a/trunk/fs/pstore/internal.h +++ b/trunk/fs/pstore/internal.h @@ -2,5 +2,5 @@ extern void pstore_set_kmsg_bytes(int); extern void pstore_get_records(void); extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, char *data, size_t size, - struct timespec time, struct pstore_info *psi); + struct timespec time, int (*erase)(u64)); extern int pstore_is_mounted(void); diff --git a/trunk/fs/pstore/platform.c b/trunk/fs/pstore/platform.c index c5300ec31696..f2c3ff20ea68 100644 --- a/trunk/fs/pstore/platform.c +++ b/trunk/fs/pstore/platform.c @@ -37,8 +37,6 @@ static DEFINE_SPINLOCK(pstore_lock); static struct pstore_info *psinfo; -static char *backend; - /* How much of the console log to snapshot */ static unsigned long kmsg_bytes = 10240; @@ -69,8 +67,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, unsigned long size, total = 0; char *dst, *why; u64 id; - int hsize; - unsigned int part = 1; + int hsize, part = 1; if (reason < ARRAY_SIZE(reason_str)) why = reason_str[reason]; @@ -81,7 +78,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, oopscount++; while (total < kmsg_bytes) { dst = psinfo->buf; - hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part); + hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part++); size = psinfo->bufsize - hsize; dst += hsize; @@ -97,16 +94,14 @@ static void pstore_dump(struct kmsg_dumper *dumper, memcpy(dst, s1 + s1_start, l1_cpy); memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); - id = psinfo->write(PSTORE_TYPE_DMESG, part, - hsize + l1_cpy + l2_cpy, psinfo); + id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy); if (reason == KMSG_DUMP_OOPS && pstore_is_mounted()) pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, hsize + l1_cpy + l2_cpy, - CURRENT_TIME, psinfo); + CURRENT_TIME, psinfo->erase); l1 -= l1_cpy; l2 -= l2_cpy; total += l1_cpy + l2_cpy; - part++; } mutex_unlock(&psinfo->buf_mutex); } @@ -133,12 +128,6 @@ int pstore_register(struct pstore_info *psi) spin_unlock(&pstore_lock); return -EBUSY; } - - if (backend && strcmp(backend, psi->name)) { - spin_unlock(&pstore_lock); - return -EINVAL; - } - psinfo = psi; spin_unlock(&pstore_lock); @@ -177,9 +166,9 @@ void pstore_get_records(void) if (rc) goto out; - while ((size = psi->read(&id, &type, &time, psi)) > 0) { + while ((size = psi->read(&id, &type, &time)) > 0) { if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size, - time, psi)) + time, psi->erase)) failed++; } psi->close(psi); @@ -207,15 +196,12 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size) mutex_lock(&psinfo->buf_mutex); memcpy(psinfo->buf, buf, size); - id = psinfo->write(type, 0, size, psinfo); + id = psinfo->write(type, size); if (pstore_is_mounted()) pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, - size, CURRENT_TIME, psinfo); + size, CURRENT_TIME, psinfo->erase); mutex_unlock(&psinfo->buf_mutex); return 0; } EXPORT_SYMBOL_GPL(pstore_write); - -module_param(backend, charp, 0444); -MODULE_PARM_DESC(backend, "Pstore backend to use"); diff --git a/trunk/fs/reiserfs/xattr_acl.c b/trunk/fs/reiserfs/xattr_acl.c index 6da0396e5052..7362cf4c946a 100644 --- a/trunk/fs/reiserfs/xattr_acl.c +++ b/trunk/fs/reiserfs/xattr_acl.c @@ -272,10 +272,12 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; if (acl) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); if (error < 0) return error; else { + inode->i_mode = mode; if (error == 0) acl = NULL; } @@ -352,6 +354,8 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, return PTR_ERR(acl); if (acl) { + mode_t mode = inode->i_mode; + /* Copy the default ACL to the default ACL of a new directory */ if (S_ISDIR(inode->i_mode)) { err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT, @@ -362,10 +366,12 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, /* Now we reconcile the new ACL and the mode, potentially modifying both */ - err = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); + err = posix_acl_create(&acl, GFP_NOFS, &mode); if (err < 0) return err; + inode->i_mode = mode; + /* If we need an ACL.. */ if (err > 0) err = reiserfs_set_acl(th, inode, ACL_TYPE_ACCESS, acl); diff --git a/trunk/fs/xfs/linux-2.6/xfs_acl.c b/trunk/fs/xfs/linux-2.6/xfs_acl.c index b6c4b3795c4a..44ce51656804 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_acl.c +++ b/trunk/fs/xfs/linux-2.6/xfs_acl.c @@ -221,7 +221,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) } static int -xfs_set_mode(struct inode *inode, umode_t mode) +xfs_set_mode(struct inode *inode, mode_t mode) { int error = 0; @@ -267,7 +267,7 @@ posix_acl_default_exists(struct inode *inode) int xfs_inherit_acl(struct inode *inode, struct posix_acl *acl) { - umode_t mode = inode->i_mode; + mode_t mode = inode->i_mode; int error = 0, inherit = 0; if (S_ISDIR(inode->i_mode)) { @@ -381,7 +381,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name, goto out_release; if (type == ACL_TYPE_ACCESS) { - umode_t mode = inode->i_mode; + mode_t mode = inode->i_mode; error = posix_acl_equiv_mode(acl, &mode); if (error <= 0) { diff --git a/trunk/include/linux/amba/pl08x.h b/trunk/include/linux/amba/pl08x.h index e6e28f37d8ec..3111385b8ca7 100644 --- a/trunk/include/linux/amba/pl08x.h +++ b/trunk/include/linux/amba/pl08x.h @@ -172,11 +172,8 @@ struct pl08x_dma_chan { int phychan_hold; struct tasklet_struct tasklet; char *name; - const struct pl08x_channel_data *cd; - dma_addr_t src_addr; - dma_addr_t dst_addr; - u32 src_cctl; - u32 dst_cctl; + struct pl08x_channel_data *cd; + dma_addr_t runtime_addr; enum dma_data_direction runtime_direction; dma_cookie_t lc; struct list_head pend_list; @@ -205,7 +202,7 @@ struct pl08x_dma_chan { * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2 */ struct pl08x_platform_data { - const struct pl08x_channel_data *slave_channels; + struct pl08x_channel_data *slave_channels; unsigned int num_slave_channels; struct pl08x_channel_data memcpy_channel; int (*get_signal)(struct pl08x_dma_chan *); diff --git a/trunk/include/linux/device-mapper.h b/trunk/include/linux/device-mapper.h index 3fa1f3d90ce0..4427e0454051 100644 --- a/trunk/include/linux/device-mapper.h +++ b/trunk/include/linux/device-mapper.h @@ -208,49 +208,6 @@ struct dm_target_callbacks { int dm_register_target(struct target_type *t); void dm_unregister_target(struct target_type *t); -/* - * Target argument parsing. - */ -struct dm_arg_set { - unsigned argc; - char **argv; -}; - -/* - * The minimum and maximum value of a numeric argument, together with - * the error message to use if the number is found to be outside that range. - */ -struct dm_arg { - unsigned min; - unsigned max; - char *error; -}; - -/* - * Validate the next argument, either returning it as *value or, if invalid, - * returning -EINVAL and setting *error. - */ -int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, - unsigned *value, char **error); - -/* - * Process the next argument as the start of a group containing between - * arg->min and arg->max further arguments. Either return the size as - * *num_args or, if invalid, return -EINVAL and set *error. - */ -int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set, - unsigned *num_args, char **error); - -/* - * Return the current argument and shift to the next. - */ -const char *dm_shift_arg(struct dm_arg_set *as); - -/* - * Move through num_args arguments. - */ -void dm_consume_args(struct dm_arg_set *as, unsigned num_args); - /*----------------------------------------------------------------- * Functions for creating and manipulating mapped devices. * Drop the reference with dm_put when you finish with the object. diff --git a/trunk/include/linux/dm-ioctl.h b/trunk/include/linux/dm-ioctl.h index 0cb8eff76bd6..3708455ee6c3 100644 --- a/trunk/include/linux/dm-ioctl.h +++ b/trunk/include/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 21 +#define DM_VERSION_MINOR 20 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2011-07-06)" +#define DM_VERSION_EXTRA "-ioctl (2011-02-02)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ diff --git a/trunk/include/linux/dm-kcopyd.h b/trunk/include/linux/dm-kcopyd.h index 5e54458e920f..298d587e349b 100644 --- a/trunk/include/linux/dm-kcopyd.h +++ b/trunk/include/linux/dm-kcopyd.h @@ -42,20 +42,5 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, unsigned num_dests, struct dm_io_region *dests, unsigned flags, dm_kcopyd_notify_fn fn, void *context); -/* - * Prepare a callback and submit it via the kcopyd thread. - * - * dm_kcopyd_prepare_callback allocates a callback structure and returns it. - * It must not be called from interrupt context. - * The returned value should be passed into dm_kcopyd_do_callback. - * - * dm_kcopyd_do_callback submits the callback. - * It may be called from interrupt context. - * The callback is issued from the kcopyd thread. - */ -void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, - dm_kcopyd_notify_fn fn, void *context); -void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err); - #endif /* __KERNEL__ */ #endif /* _LINUX_DM_KCOPYD_H */ diff --git a/trunk/include/linux/efi.h b/trunk/include/linux/efi.h index 2362a0bc7f0d..ec2572693925 100644 --- a/trunk/include/linux/efi.h +++ b/trunk/include/linux/efi.h @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -233,9 +232,6 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules, #define UV_SYSTEM_TABLE_GUID \ EFI_GUID( 0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 ) -#define LINUX_EFI_CRASH_GUID \ - EFI_GUID( 0xcfc8fc79, 0xbe2e, 0x4ddc, 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0 ) - typedef struct { efi_guid_t guid; unsigned long table; @@ -462,8 +458,6 @@ struct efivars { struct kset *kset; struct bin_attribute *new_var, *del_var; const struct efivar_operations *ops; - struct efivar_entry *walk_entry; - struct pstore_info efi_pstore_info; }; int register_efivars(struct efivars *efivars, diff --git a/trunk/include/linux/fs.h b/trunk/include/linux/fs.h index 786b3b1113cf..f23bcb77260c 100644 --- a/trunk/include/linux/fs.h +++ b/trunk/include/linux/fs.h @@ -2317,18 +2317,11 @@ extern int should_remove_suid(struct dentry *); extern int file_remove_suid(struct file *); extern void __insert_inode_hash(struct inode *, unsigned long hashval); +extern void remove_inode_hash(struct inode *); static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } - -extern void __remove_inode_hash(struct inode *); -static inline void remove_inode_hash(struct inode *inode) -{ - if (!inode_unhashed(inode)) - __remove_inode_hash(inode); -} - extern void inode_sb_list_add(struct inode *inode); #ifdef CONFIG_BLOCK diff --git a/trunk/include/linux/jbd2.h b/trunk/include/linux/jbd2.h index 38f307b8c334..d087c2e7b2aa 100644 --- a/trunk/include/linux/jbd2.h +++ b/trunk/include/linux/jbd2.h @@ -1329,6 +1329,12 @@ extern int jbd_blocks_per_page(struct inode *inode); #define BUFFER_TRACE2(bh, bh2, info) do {} while (0) #define JBUFFER_TRACE(jh, info) do {} while (0) +/* + * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 + * tracing infrastructure to map a dev_t to a device name. + */ +extern const char *jbd2_dev_to_name(dev_t device); + #endif /* __KERNEL__ */ #endif /* _LINUX_JBD2_H */ diff --git a/trunk/include/linux/nfs_fs.h b/trunk/include/linux/nfs_fs.h index eaac770f886e..b96fb99072ff 100644 --- a/trunk/include/linux/nfs_fs.h +++ b/trunk/include/linux/nfs_fs.h @@ -569,12 +569,12 @@ extern struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type); extern int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl); extern int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, - umode_t mode); + mode_t mode); extern void nfs3_forget_cached_acls(struct inode *inode); #else static inline int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, - umode_t mode) + mode_t mode) { return 0; } diff --git a/trunk/include/linux/posix_acl.h b/trunk/include/linux/posix_acl.h index 951bba82d50d..9a53b99818e2 100644 --- a/trunk/include/linux/posix_acl.h +++ b/trunk/include/linux/posix_acl.h @@ -75,10 +75,10 @@ extern void posix_acl_init(struct posix_acl *, int); extern struct posix_acl *posix_acl_alloc(int, gfp_t); extern int posix_acl_valid(const struct posix_acl *); extern int posix_acl_permission(struct inode *, const struct posix_acl *, int); -extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t); -extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *); -extern int posix_acl_create(struct posix_acl **, gfp_t, umode_t *); -extern int posix_acl_chmod(struct posix_acl **, gfp_t, umode_t); +extern struct posix_acl *posix_acl_from_mode(mode_t, gfp_t); +extern int posix_acl_equiv_mode(const struct posix_acl *, mode_t *); +extern int posix_acl_create(struct posix_acl **, gfp_t, mode_t *); +extern int posix_acl_chmod(struct posix_acl **, gfp_t, mode_t); extern struct posix_acl *get_posix_acl(struct inode *, int); extern int set_posix_acl(struct inode *, int, struct posix_acl *); diff --git a/trunk/include/linux/pstore.h b/trunk/include/linux/pstore.h index cc03bbf5c4b8..2455ef2683f0 100644 --- a/trunk/include/linux/pstore.h +++ b/trunk/include/linux/pstore.h @@ -38,12 +38,9 @@ struct pstore_info { int (*open)(struct pstore_info *psi); int (*close)(struct pstore_info *psi); ssize_t (*read)(u64 *id, enum pstore_type_id *type, - struct timespec *time, struct pstore_info *psi); - u64 (*write)(enum pstore_type_id type, unsigned int part, - size_t size, struct pstore_info *psi); - int (*erase)(enum pstore_type_id type, u64 id, - struct pstore_info *psi); - void *data; + struct timespec *time); + u64 (*write)(enum pstore_type_id type, size_t size); + int (*erase)(u64 id); }; #ifdef CONFIG_PSTORE diff --git a/trunk/include/linux/regulator/consumer.h b/trunk/include/linux/regulator/consumer.h index 26f6ea4444e3..9e87c1cb7270 100644 --- a/trunk/include/linux/regulator/consumer.h +++ b/trunk/include/linux/regulator/consumer.h @@ -122,9 +122,6 @@ struct regulator; struct regulator_bulk_data { const char *supply; struct regulator *consumer; - - /* Internal use */ - int ret; }; #if defined(CONFIG_REGULATOR) diff --git a/trunk/include/linux/regulator/driver.h b/trunk/include/linux/regulator/driver.h index 1a80bc77517d..6c433b89c80d 100644 --- a/trunk/include/linux/regulator/driver.h +++ b/trunk/include/linux/regulator/driver.h @@ -188,16 +188,18 @@ struct regulator_dev { /* lists we belong to */ struct list_head list; /* list of all regulators */ + struct list_head slist; /* list of supplied regulators */ /* lists we own */ struct list_head consumer_list; /* consumers we supply */ + struct list_head supply_list; /* regulators we supply */ struct blocking_notifier_head notifier; struct mutex mutex; /* consumer lock */ struct module *owner; struct device dev; struct regulation_constraints *constraints; - struct regulator *supply; /* for tree */ + struct regulator_dev *supply; /* for tree */ void *reg_data; /* regulator_dev data */ diff --git a/trunk/include/trace/events/ext4.h b/trunk/include/trace/events/ext4.h index b50a54736242..6363193a3418 100644 --- a/trunk/include/trace/events/ext4.h +++ b/trunk/include/trace/events/ext4.h @@ -23,7 +23,7 @@ TRACE_EVENT(ext4_free_inode, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( __u16, mode ) + __field( umode_t, mode ) __field( uid_t, uid ) __field( gid_t, gid ) __field( __u64, blocks ) @@ -52,7 +52,7 @@ TRACE_EVENT(ext4_request_inode, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, dir ) - __field( __u16, mode ) + __field( umode_t, mode ) ), TP_fast_assign( @@ -75,7 +75,7 @@ TRACE_EVENT(ext4_allocate_inode, __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, dir ) - __field( __u16, mode ) + __field( umode_t, mode ) ), TP_fast_assign( @@ -725,7 +725,7 @@ TRACE_EVENT(ext4_free_blocks, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( __u16, mode ) + __field( umode_t, mode ) __field( __u64, block ) __field( unsigned long, count ) __field( int, flags ) @@ -1012,7 +1012,7 @@ TRACE_EVENT(ext4_forget, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( __u16, mode ) + __field( umode_t, mode ) __field( int, is_metadata ) __field( __u64, block ) ), @@ -1039,7 +1039,7 @@ TRACE_EVENT(ext4_da_update_reserve_space, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( __u16, mode ) + __field( umode_t, mode ) __field( __u64, i_blocks ) __field( int, used_blocks ) __field( int, reserved_data_blocks ) @@ -1076,7 +1076,7 @@ TRACE_EVENT(ext4_da_reserve_space, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( __u16, mode ) + __field( umode_t, mode ) __field( __u64, i_blocks ) __field( int, md_needed ) __field( int, reserved_data_blocks ) @@ -1110,7 +1110,7 @@ TRACE_EVENT(ext4_da_release_space, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( __u16, mode ) + __field( umode_t, mode ) __field( __u64, i_blocks ) __field( int, freed_blocks ) __field( int, reserved_data_blocks ) @@ -1518,77 +1518,6 @@ TRACE_EVENT(ext4_load_inode, (unsigned long) __entry->ino) ); -TRACE_EVENT(ext4_journal_start, - TP_PROTO(struct super_block *sb, int nblocks, unsigned long IP), - - TP_ARGS(sb, nblocks, IP), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( int, nblocks ) - __field(unsigned long, ip ) - ), - - TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->nblocks = nblocks; - __entry->ip = IP; - ), - - TP_printk("dev %d,%d nblocks %d caller %pF", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->nblocks, (void *)__entry->ip) -); - -DECLARE_EVENT_CLASS(ext4__trim, - TP_PROTO(struct super_block *sb, - ext4_group_t group, - ext4_grpblk_t start, - ext4_grpblk_t len), - - TP_ARGS(sb, group, start, len), - - TP_STRUCT__entry( - __field( int, dev_major ) - __field( int, dev_minor ) - __field( __u32, group ) - __field( int, start ) - __field( int, len ) - ), - - TP_fast_assign( - __entry->dev_major = MAJOR(sb->s_dev); - __entry->dev_minor = MINOR(sb->s_dev); - __entry->group = group; - __entry->start = start; - __entry->len = len; - ), - - TP_printk("dev %d,%d group %u, start %d, len %d", - __entry->dev_major, __entry->dev_minor, - __entry->group, __entry->start, __entry->len) -); - -DEFINE_EVENT(ext4__trim, ext4_trim_extent, - - TP_PROTO(struct super_block *sb, - ext4_group_t group, - ext4_grpblk_t start, - ext4_grpblk_t len), - - TP_ARGS(sb, group, start, len) -); - -DEFINE_EVENT(ext4__trim, ext4_trim_all_free, - - TP_PROTO(struct super_block *sb, - ext4_group_t group, - ext4_grpblk_t start, - ext4_grpblk_t len), - - TP_ARGS(sb, group, start, len) -); - #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ diff --git a/trunk/include/trace/events/jbd2.h b/trunk/include/trace/events/jbd2.h index 75964412ddbb..bf16545cc977 100644 --- a/trunk/include/trace/events/jbd2.h +++ b/trunk/include/trace/events/jbd2.h @@ -26,8 +26,8 @@ TRACE_EVENT(jbd2_checkpoint, __entry->result = result; ), - TP_printk("dev %d,%d result %d", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->result) + TP_printk("dev %s result %d", + jbd2_dev_to_name(__entry->dev), __entry->result) ); DECLARE_EVENT_CLASS(jbd2_commit, @@ -48,9 +48,9 @@ DECLARE_EVENT_CLASS(jbd2_commit, __entry->transaction = commit_transaction->t_tid; ), - TP_printk("dev %d,%d transaction %d sync %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->transaction, __entry->sync_commit) + TP_printk("dev %s transaction %d sync %d", + jbd2_dev_to_name(__entry->dev), __entry->transaction, + __entry->sync_commit) ); DEFINE_EVENT(jbd2_commit, jbd2_start_commit, @@ -100,9 +100,9 @@ TRACE_EVENT(jbd2_end_commit, __entry->head = journal->j_tail_sequence; ), - TP_printk("dev %d,%d transaction %d sync %d head %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->transaction, __entry->sync_commit, __entry->head) + TP_printk("dev %s transaction %d sync %d head %d", + jbd2_dev_to_name(__entry->dev), __entry->transaction, + __entry->sync_commit, __entry->head) ); TRACE_EVENT(jbd2_submit_inode_data, @@ -120,9 +120,8 @@ TRACE_EVENT(jbd2_submit_inode_data, __entry->ino = inode->i_ino; ), - TP_printk("dev %d,%d ino %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino) + TP_printk("dev %s ino %lu", + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino) ); TRACE_EVENT(jbd2_run_stats, @@ -157,9 +156,9 @@ TRACE_EVENT(jbd2_run_stats, __entry->blocks_logged = stats->rs_blocks_logged; ), - TP_printk("dev %d,%d tid %lu wait %u running %u locked %u flushing %u " + TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u " "logging %u handle_count %u blocks %u blocks_logged %u", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, + jbd2_dev_to_name(__entry->dev), __entry->tid, jiffies_to_msecs(__entry->wait), jiffies_to_msecs(__entry->running), jiffies_to_msecs(__entry->locked), @@ -193,9 +192,9 @@ TRACE_EVENT(jbd2_checkpoint_stats, __entry->dropped = stats->cs_dropped; ), - TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u " + TP_printk("dev %s tid %lu chp_time %u forced_to_close %u " "written %u dropped %u", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, + jbd2_dev_to_name(__entry->dev), __entry->tid, jiffies_to_msecs(__entry->chp_time), __entry->forced_to_close, __entry->written, __entry->dropped) ); @@ -223,10 +222,9 @@ TRACE_EVENT(jbd2_cleanup_journal_tail, __entry->freed = freed; ), - TP_printk("dev %d,%d from %u to %u offset %lu freed %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->tail_sequence, __entry->first_tid, - __entry->block_nr, __entry->freed) + TP_printk("dev %s from %u to %u offset %lu freed %lu", + jbd2_dev_to_name(__entry->dev), __entry->tail_sequence, + __entry->first_tid, __entry->block_nr, __entry->freed) ); #endif /* _TRACE_JBD2_H */ diff --git a/trunk/kernel/debug/gdbstub.c b/trunk/kernel/debug/gdbstub.c index 34872482315e..a11db956dd62 100644 --- a/trunk/kernel/debug/gdbstub.c +++ b/trunk/kernel/debug/gdbstub.c @@ -42,8 +42,6 @@ /* Our I/O buffers. */ static char remcom_in_buffer[BUFMAX]; static char remcom_out_buffer[BUFMAX]; -static int gdbstub_use_prev_in_buf; -static int gdbstub_prev_in_buf_pos; /* Storage for the registers, in GDB format. */ static unsigned long gdb_regs[(NUMREGBYTES + @@ -60,13 +58,6 @@ static int gdbstub_read_wait(void) int ret = -1; int i; - if (unlikely(gdbstub_use_prev_in_buf)) { - if (gdbstub_prev_in_buf_pos < gdbstub_use_prev_in_buf) - return remcom_in_buffer[gdbstub_prev_in_buf_pos++]; - else - gdbstub_use_prev_in_buf = 0; - } - /* poll any additional I/O interfaces that are defined */ while (ret < 0) for (i = 0; kdb_poll_funcs[i] != NULL; i++) { @@ -118,6 +109,7 @@ static void get_packet(char *buffer) buffer[count] = ch; count = count + 1; } + buffer[count] = 0; if (ch == '#') { xmitcsum = hex_to_bin(gdbstub_read_wait()) << 4; @@ -132,7 +124,6 @@ static void get_packet(char *buffer) if (dbg_io_ops->flush) dbg_io_ops->flush(); } - buffer[count] = 0; } while (checksum != xmitcsum); } @@ -1091,11 +1082,12 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd) case 'c': strcpy(remcom_in_buffer, cmd); return 0; - case '$': - strcpy(remcom_in_buffer, cmd); - gdbstub_use_prev_in_buf = strlen(remcom_in_buffer); - gdbstub_prev_in_buf_pos = 0; - return 0; + case '?': + gdb_cmd_status(ks); + break; + case '\0': + strcpy(remcom_out_buffer, ""); + break; } dbg_io_ops->write_char('+'); put_packet(remcom_out_buffer); diff --git a/trunk/kernel/debug/kdb/kdb_bt.c b/trunk/kernel/debug/kdb/kdb_bt.c index 7179eac7b41c..2f62fe85f16a 100644 --- a/trunk/kernel/debug/kdb/kdb_bt.c +++ b/trunk/kernel/debug/kdb/kdb_bt.c @@ -112,8 +112,9 @@ kdb_bt(int argc, const char **argv) unsigned long addr; long offset; - /* Prompt after each proc in bta */ - kdbgetintenv("BTAPROMPT", &btaprompt); + kdbgetintenv("BTARGS", &argcount); /* Arguments to print */ + kdbgetintenv("BTAPROMPT", &btaprompt); /* Prompt after each + * proc in bta */ if (strcmp(argv[0], "bta") == 0) { struct task_struct *g, *p; diff --git a/trunk/kernel/debug/kdb/kdb_cmds b/trunk/kernel/debug/kdb/kdb_cmds index 9834ad303ab6..56c88e4db309 100644 --- a/trunk/kernel/debug/kdb/kdb_cmds +++ b/trunk/kernel/debug/kdb/kdb_cmds @@ -18,12 +18,16 @@ defcmd dumpcommon "" "Common kdb debugging" endefcmd defcmd dumpall "" "First line debugging" + set BTSYMARG 1 + set BTARGS 9 pid R -dumpcommon -bta endefcmd defcmd dumpcpu "" "Same as dumpall but only tasks on cpus" + set BTSYMARG 1 + set BTARGS 9 pid R -dumpcommon -btc diff --git a/trunk/kernel/debug/kdb/kdb_debugger.c b/trunk/kernel/debug/kdb/kdb_debugger.c index d9ca9aa481ec..dd0b1b7dd02c 100644 --- a/trunk/kernel/debug/kdb/kdb_debugger.c +++ b/trunk/kernel/debug/kdb/kdb_debugger.c @@ -30,8 +30,6 @@ EXPORT_SYMBOL_GPL(kdb_poll_funcs); int kdb_poll_idx = 1; EXPORT_SYMBOL_GPL(kdb_poll_idx); -static struct kgdb_state *kdb_ks; - int kdb_stub(struct kgdb_state *ks) { int error = 0; @@ -41,7 +39,6 @@ int kdb_stub(struct kgdb_state *ks) kdb_dbtrap_t db_result = KDB_DB_NOBPT; int i; - kdb_ks = ks; if (KDB_STATE(REENTRY)) { reason = KDB_REASON_SWITCH; KDB_STATE_CLEAR(REENTRY); @@ -126,8 +123,20 @@ int kdb_stub(struct kgdb_state *ks) KDB_STATE_CLEAR(PAGER); kdbnearsym_cleanup(); if (error == KDB_CMD_KGDB) { - if (KDB_STATE(DOING_KGDB)) + if (KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)) { + /* + * This inteface glue which allows kdb to transition in into + * the gdb stub. In order to do this the '?' or '' gdb serial + * packet response is processed here. And then control is + * passed to the gdbstub. + */ + if (KDB_STATE(DOING_KGDB)) + gdbstub_state(ks, "?"); + else + gdbstub_state(ks, ""); KDB_STATE_CLEAR(DOING_KGDB); + KDB_STATE_CLEAR(DOING_KGDB2); + } return DBG_PASS_EVENT; } kdb_bp_install(ks->linux_regs); @@ -157,7 +166,3 @@ int kdb_stub(struct kgdb_state *ks) return kgdb_info[ks->cpu].ret_state; } -void kdb_gdb_state_pass(char *buf) -{ - gdbstub_state(kdb_ks, buf); -} diff --git a/trunk/kernel/debug/kdb/kdb_io.c b/trunk/kernel/debug/kdb/kdb_io.c index 4802eb5840e1..96fdaac46a80 100644 --- a/trunk/kernel/debug/kdb/kdb_io.c +++ b/trunk/kernel/debug/kdb/kdb_io.c @@ -31,21 +31,15 @@ char kdb_prompt_str[CMD_BUFLEN]; int kdb_trap_printk; -static int kgdb_transition_check(char *buffer) +static void kgdb_transition_check(char *buffer) { - if (buffer[0] != '+' && buffer[0] != '$') { + int slen = strlen(buffer); + if (strncmp(buffer, "$?#3f", slen) != 0 && + strncmp(buffer, "$qSupported#37", slen) != 0 && + strncmp(buffer, "+$qSupported#37", slen) != 0) { KDB_STATE_SET(KGDB_TRANS); kdb_printf("%s", buffer); - } else { - int slen = strlen(buffer); - if (slen > 3 && buffer[slen - 3] == '#') { - kdb_gdb_state_pass(buffer); - strcpy(buffer, "kgdb"); - KDB_STATE_SET(DOING_KGDB); - return 1; - } } - return 0; } static int kdb_read_get_key(char *buffer, size_t bufsize) @@ -257,10 +251,6 @@ static char *kdb_read(char *buffer, size_t bufsize) case 13: /* enter */ *lastchar++ = '\n'; *lastchar++ = '\0'; - if (!KDB_STATE(KGDB_TRANS)) { - KDB_STATE_SET(KGDB_TRANS); - kdb_printf("%s", buffer); - } kdb_printf("\n"); return buffer; case 4: /* Del */ @@ -392,26 +382,22 @@ static char *kdb_read(char *buffer, size_t bufsize) * printed characters if we think that * kgdb is connecting, until the check * fails */ - if (!KDB_STATE(KGDB_TRANS)) { - if (kgdb_transition_check(buffer)) - return buffer; - } else { + if (!KDB_STATE(KGDB_TRANS)) + kgdb_transition_check(buffer); + else kdb_printf("%c", key); - } } /* Special escape to kgdb */ if (lastchar - buffer >= 5 && strcmp(lastchar - 5, "$?#3f") == 0) { - kdb_gdb_state_pass(lastchar - 5); strcpy(buffer, "kgdb"); KDB_STATE_SET(DOING_KGDB); return buffer; } - if (lastchar - buffer >= 11 && - strcmp(lastchar - 11, "$qSupported") == 0) { - kdb_gdb_state_pass(lastchar - 11); + if (lastchar - buffer >= 14 && + strcmp(lastchar - 14, "$qSupported#37") == 0) { strcpy(buffer, "kgdb"); - KDB_STATE_SET(DOING_KGDB); + KDB_STATE_SET(DOING_KGDB2); return buffer; } } diff --git a/trunk/kernel/debug/kdb/kdb_main.c b/trunk/kernel/debug/kdb/kdb_main.c index 63786e71a3cd..be14779bcef6 100644 --- a/trunk/kernel/debug/kdb/kdb_main.c +++ b/trunk/kernel/debug/kdb/kdb_main.c @@ -145,6 +145,7 @@ static char *__env[] = { #endif "RADIX=16", "MDCOUNT=8", /* lines of md output */ + "BTARGS=9", /* 9 possible args in bt */ KDB_PLATFORM_ENV, "DTABCOUNT=30", "NOSECT=1", @@ -171,7 +172,6 @@ static char *__env[] = { (char *)0, (char *)0, (char *)0, - (char *)0, }; static const int __nenv = (sizeof(__env) / sizeof(char *)); @@ -1386,7 +1386,7 @@ int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error, } if (result == KDB_CMD_KGDB) { - if (!KDB_STATE(DOING_KGDB)) + if (!(KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2))) kdb_printf("Entering please attach debugger " "or use $D#44+ or $3#33\n"); break; diff --git a/trunk/kernel/debug/kdb/kdb_private.h b/trunk/kernel/debug/kdb/kdb_private.h index e381d105b40b..35d69ed1dfb5 100644 --- a/trunk/kernel/debug/kdb/kdb_private.h +++ b/trunk/kernel/debug/kdb/kdb_private.h @@ -21,6 +21,7 @@ #define KDB_CMD_SS (-1003) #define KDB_CMD_SSB (-1004) #define KDB_CMD_KGDB (-1005) +#define KDB_CMD_KGDB2 (-1006) /* Internal debug flags */ #define KDB_DEBUG_FLAG_BP 0x0002 /* Breakpoint subsystem debug */ @@ -145,6 +146,7 @@ extern int kdb_state; * keyboard on this cpu */ #define KDB_STATE_KEXEC 0x00040000 /* kexec issued */ #define KDB_STATE_DOING_KGDB 0x00080000 /* kgdb enter now issued */ +#define KDB_STATE_DOING_KGDB2 0x00100000 /* kgdb enter now issued */ #define KDB_STATE_KGDB_TRANS 0x00200000 /* Transition to kgdb */ #define KDB_STATE_ARCH 0xff000000 /* Reserved for arch * specific use */ @@ -216,7 +218,6 @@ extern void kdb_print_nameval(const char *name, unsigned long val); extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); extern void kdb_meminfo_proc_show(void); extern char *kdb_getstr(char *, size_t, char *); -extern void kdb_gdb_state_pass(char *buf); /* Defines for kdb_symbol_print */ #define KDB_SP_SPACEB 0x0001 /* Space before string */ diff --git a/trunk/mm/oom_kill.c b/trunk/mm/oom_kill.c index 626303b52f3c..eafff89b3dd6 100644 --- a/trunk/mm/oom_kill.c +++ b/trunk/mm/oom_kill.c @@ -303,7 +303,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, do_each_thread(g, p) { unsigned int points; - if (p->exit_state) + if (!p->mm) continue; if (oom_unkillable_task(p, mem, nodemask)) continue; @@ -319,8 +319,6 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, */ if (test_tsk_thread_flag(p, TIF_MEMDIE)) return ERR_PTR(-1UL); - if (!p->mm) - continue; if (p->flags & PF_EXITING) { /* diff --git a/trunk/sound/core/pcm_compat.c b/trunk/sound/core/pcm_compat.c index 91cdf9435fec..5fb2e28e796f 100644 --- a/trunk/sound/core/pcm_compat.c +++ b/trunk/sound/core/pcm_compat.c @@ -342,7 +342,7 @@ static int snd_pcm_ioctl_xfern_compat(struct snd_pcm_substream *substream, kfree(bufs); return -EFAULT; } - bufs[i] = compat_ptr(ptr); + bufs[ch] = compat_ptr(ptr); bufptr++; } if (dir == SNDRV_PCM_STREAM_PLAYBACK) diff --git a/trunk/sound/core/rtctimer.c b/trunk/sound/core/rtctimer.c index e85e72baff9e..0851cd13e303 100644 --- a/trunk/sound/core/rtctimer.c +++ b/trunk/sound/core/rtctimer.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include diff --git a/trunk/sound/pci/asihpi/hpidspcd.c b/trunk/sound/pci/asihpi/hpidspcd.c index 71d32c868c92..3a7afa31c1d8 100644 --- a/trunk/sound/pci/asihpi/hpidspcd.c +++ b/trunk/sound/pci/asihpi/hpidspcd.c @@ -43,7 +43,6 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code, struct pci_dev *dev = os_data; struct code_header header; char fw_name[20]; - short err_ret = HPI_ERROR_DSP_FILE_NOT_FOUND; int err; sprintf(fw_name, "asihpi/dsp%04x.bin", adapter); @@ -86,10 +85,8 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code, HPI_DEBUG_LOG(DEBUG, "dsp code %s opened\n", fw_name); dsp_code->pvt = kmalloc(sizeof(*dsp_code->pvt), GFP_KERNEL); - if (!dsp_code->pvt) { - err_ret = HPI_ERROR_MEMORY_ALLOC; - goto error2; - } + if (!dsp_code->pvt) + return HPI_ERROR_MEMORY_ALLOC; dsp_code->pvt->dev = dev; dsp_code->pvt->firmware = firmware; @@ -102,7 +99,7 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code, release_firmware(firmware); error1: dsp_code->block_length = 0; - return err_ret; + return HPI_ERROR_DSP_FILE_NOT_FOUND; } /*-------------------------------------------------------------------*/ diff --git a/trunk/sound/pci/asihpi/hpioctl.c b/trunk/sound/pci/asihpi/hpioctl.c index a32502e796de..9683f84ecdc8 100644 --- a/trunk/sound/pci/asihpi/hpioctl.c +++ b/trunk/sound/pci/asihpi/hpioctl.c @@ -177,21 +177,16 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } else { u16 __user *ptr = NULL; u32 size = 0; - u32 adapter_present; + /* -1=no data 0=read from user mem, 1=write to user mem */ int wrflag = -1; - struct hpi_adapter *pa; - - if (hm->h.adapter_index < HPI_MAX_ADAPTERS) { - pa = &adapters[hm->h.adapter_index]; - adapter_present = pa->type; - } else { - adapter_present = 0; - } + u32 adapter = hm->h.adapter_index; + struct hpi_adapter *pa = &adapters[adapter]; - if (!adapter_present) { - hpi_init_response(&hr->r0, hm->h.object, - hm->h.function, HPI_ERROR_BAD_ADAPTER_NUMBER); + if ((adapter >= HPI_MAX_ADAPTERS) || (!pa->type)) { + hpi_init_response(&hr->r0, HPI_OBJ_ADAPTER, + HPI_ADAPTER_OPEN, + HPI_ERROR_BAD_ADAPTER_NUMBER); uncopied_bytes = copy_to_user(puhr, hr, sizeof(hr->h)); diff --git a/trunk/sound/pci/rme9652/hdspm.c b/trunk/sound/pci/rme9652/hdspm.c index 6edc67ced905..af130ee0c45d 100644 --- a/trunk/sound/pci/rme9652/hdspm.c +++ b/trunk/sound/pci/rme9652/hdspm.c @@ -521,7 +521,6 @@ MODULE_SUPPORTED_DEVICE("{{RME HDSPM-MADI}}"); #define HDSPM_DMA_AREA_KILOBYTES (HDSPM_DMA_AREA_BYTES/1024) /* revisions >= 230 indicate AES32 card */ -#define HDSPM_MADI_ANCIENT_REV 204 #define HDSPM_MADI_OLD_REV 207 #define HDSPM_MADI_REV 210 #define HDSPM_RAYDAT_REV 211 @@ -1218,22 +1217,6 @@ static int hdspm_external_sample_rate(struct hdspm *hdspm) rate = 0; break; } - - /* QS and DS rates normally can not be detected - * automatically by the card. Only exception is MADI - * in 96k frame mode. - * - * So if we read SS values (32 .. 48k), check for - * user-provided DS/QS bits in the control register - * and multiply the base frequency accordingly. - */ - if (rate <= 48000) { - if (hdspm->control_register & HDSPM_QuadSpeed) - rate *= 4; - else if (hdspm->control_register & - HDSPM_DoubleSpeed) - rate *= 2; - } } break; } @@ -3432,91 +3415,6 @@ static int snd_hdspm_put_qs_wire(struct snd_kcontrol *kcontrol, return change; } -#define HDSPM_MADI_SPEEDMODE(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ - .name = xname, \ - .index = xindex, \ - .info = snd_hdspm_info_madi_speedmode, \ - .get = snd_hdspm_get_madi_speedmode, \ - .put = snd_hdspm_put_madi_speedmode \ -} - -static int hdspm_madi_speedmode(struct hdspm *hdspm) -{ - if (hdspm->control_register & HDSPM_QuadSpeed) - return 2; - if (hdspm->control_register & HDSPM_DoubleSpeed) - return 1; - return 0; -} - -static int hdspm_set_madi_speedmode(struct hdspm *hdspm, int mode) -{ - hdspm->control_register &= ~(HDSPM_DoubleSpeed | HDSPM_QuadSpeed); - switch (mode) { - case 0: - break; - case 1: - hdspm->control_register |= HDSPM_DoubleSpeed; - break; - case 2: - hdspm->control_register |= HDSPM_QuadSpeed; - break; - } - hdspm_write(hdspm, HDSPM_controlRegister, hdspm->control_register); - - return 0; -} - -static int snd_hdspm_info_madi_speedmode(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_info *uinfo) -{ - static char *texts[] = { "Single", "Double", "Quad" }; - - uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; - uinfo->count = 1; - uinfo->value.enumerated.items = 3; - - if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items) - uinfo->value.enumerated.item = - uinfo->value.enumerated.items - 1; - strcpy(uinfo->value.enumerated.name, - texts[uinfo->value.enumerated.item]); - - return 0; -} - -static int snd_hdspm_get_madi_speedmode(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); - - spin_lock_irq(&hdspm->lock); - ucontrol->value.enumerated.item[0] = hdspm_madi_speedmode(hdspm); - spin_unlock_irq(&hdspm->lock); - return 0; -} - -static int snd_hdspm_put_madi_speedmode(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); - int change; - int val; - - if (!snd_hdspm_use_is_exclusive(hdspm)) - return -EBUSY; - val = ucontrol->value.integer.value[0]; - if (val < 0) - val = 0; - if (val > 2) - val = 2; - spin_lock_irq(&hdspm->lock); - change = val != hdspm_madi_speedmode(hdspm); - hdspm_set_madi_speedmode(hdspm, val); - spin_unlock_irq(&hdspm->lock); - return change; -} #define HDSPM_MIXER(xname, xindex) \ { .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ @@ -4391,8 +4289,7 @@ static struct snd_kcontrol_new snd_hdspm_controls_madi[] = { HDSPM_TX_64("TX 64 channels mode", 0), HDSPM_C_TMS("Clear Track Marker", 0), HDSPM_SAFE_MODE("Safe Mode", 0), - HDSPM_INPUT_SELECT("Input Select", 0), - HDSPM_MADI_SPEEDMODE("MADI Speed Mode", 0) + HDSPM_INPUT_SELECT("Input Select", 0) }; @@ -4405,8 +4302,7 @@ static struct snd_kcontrol_new snd_hdspm_controls_madiface[] = { HDSPM_SYNC_CHECK("MADI SyncCheck", 0), HDSPM_TX_64("TX 64 channels mode", 0), HDSPM_C_TMS("Clear Track Marker", 0), - HDSPM_SAFE_MODE("Safe Mode", 0), - HDSPM_MADI_SPEEDMODE("MADI Speed Mode", 0) + HDSPM_SAFE_MODE("Safe Mode", 0) }; static struct snd_kcontrol_new snd_hdspm_controls_aio[] = { @@ -6485,7 +6381,6 @@ static int __devinit snd_hdspm_create(struct snd_card *card, switch (hdspm->firmware_rev) { case HDSPM_MADI_REV: case HDSPM_MADI_OLD_REV: - case HDSPM_MADI_ANCIENT_REV: hdspm->io_type = MADI; hdspm->card_name = "RME MADI"; hdspm->midiPorts = 3; diff --git a/trunk/sound/soc/txx9/txx9aclc.c b/trunk/sound/soc/txx9/txx9aclc.c index 3de99af8cb82..34aa972669ed 100644 --- a/trunk/sound/soc/txx9/txx9aclc.c +++ b/trunk/sound/soc/txx9/txx9aclc.c @@ -290,7 +290,6 @@ static void txx9aclc_pcm_free_dma_buffers(struct snd_pcm *pcm) static int txx9aclc_pcm_new(struct snd_soc_pcm_runtime *rtd) { - struct snd_card *card = rtd->card->snd_card; struct snd_soc_dai *dai = rtd->cpu_dai; struct snd_pcm *pcm = rtd->pcm; struct platform_device *pdev = to_platform_device(dai->platform->dev);